Package java.text

Examples of java.text.BreakIterator


    return Math.round(characters / CHINESE_RATIO_WORD_CHARACTER);
  }
 
  private int countWords(String text, Locale locale) {
    int count = 0;
    BreakIterator wordIterator = BreakIterator.getWordInstance(locale);
   
    wordIterator.setText(text);
    int start = wordIterator.first();
    int end = wordIterator.next();
    while (end != BreakIterator.DONE) {
      char ch = text.charAt(start);
      if (Character.isLetterOrDigit(ch)) {
        count++;
      }
      start = end;
      end = wordIterator.next();
    }
   
    return count;
  }
View Full Code Here


    if(locale == null) {
      locale = I18nModule.getDefaultLocale();
    }
   
    int count = 0;
    BreakIterator characterIterator = BreakIterator.getCharacterInstance(locale);
   
    characterIterator.setText(text);
    int start = characterIterator.first();
    int end = characterIterator.next();
    while (end != BreakIterator.DONE) {
      char ch = text.charAt(start);
      if (Character.isLetterOrDigit(ch)) {
        count++;
      }
      start = end;
      end = characterIterator.next();
    }
   
    return count;
  }
View Full Code Here

   * @return list of lines
   */
  private List<AttributedCharacterIterator> splitFormatted(final AttributedString text, final int width) {
    final List<AttributedCharacterIterator> lines = new LinkedList<AttributedCharacterIterator>();

    final BreakIterator iter = BreakIterator.getLineInstance();
    iter.setText(text.getIterator());
   
    int previous = iter.first();
   
    AttributedCharacterIterator best = null;
   
    while (iter.next() != BreakIterator.DONE) {
      final AttributedCharacterIterator candidate = text.getIterator(null, previous, iter.current());

      if (getPixelWidth(candidate) <= width) {
        // check for line breaks within the provided text
        // unfortunately, the BreakIterators are too dumb to tell *why* they consider the
        // location a break, so the check needs to be implemented here
        final CharacterIterator cit = iter.getText();
        if (isHardLineBreak(cit)) {
          lines.add(candidate);
          previous = iter.current();
          best = null;
        } else {
          best = candidate;
        }
      } else {
        if (best == null) {
          // could not break the line - the word's simply too long. Use more force to
          // to fit it to the width
          best = splitAggressively(candidate, width);
          // splitAggressively returns an iterator with its own indexing,
          // so instead of using it directly we need to adjust the old one
          previous += best.getEndIndex() - best.getBeginIndex();
        } else {
          previous = best.getEndIndex();
          // Trim the trailing white space
          char endChar = best.last();

          int endIndex = previous;
          while (Character.isWhitespace(endChar) && endChar != CharacterIterator.DONE) {
            endIndex = best.getIndex();
            endChar = best.previous();
          }

          best = text.getIterator(null, best.getBeginIndex(), endIndex);
        }

        lines.add(best);

        // a special check for a hard line break just after the word
        // that got moved to the next line
        final CharacterIterator cit = iter.getText();
        if (isHardLineBreak(cit)) {
          lines.add(text.getIterator(null, previous, iter.current()));
          previous = iter.current();
        }
       
        // Pick the shortest candidate possible (backtrack a bit, if needed)
        if (iter.current() > previous + 1) {
          iter.previous();
        }

        best = null;

        if (lines.size() > MAX_LINES) {
          /*
           * Limit the height of the text boxes. Append ellipsis
           * to tell the user to take a look at the chat log.
           * The last line is removed twice to avoid the situation
           * where the last text line would fit on the space the
           * ellipsis occupies.
           */
          lines.remove(lines.size() - 1);
          lines.remove(lines.size() - 1);
          lines.add(new AttributedString("...").getIterator());
          return lines;
        }
      }
    }

    // add the rest of the text, if there's any
    if (previous < iter.last()) {
      lines.add(text.getIterator(null, previous, iter.last()));
    }

    return lines;
  }
View Full Code Here

   *
   * @return iterator to the part of the line that fits in width
   */
  private AttributedCharacterIterator splitAggressively(final AttributedCharacterIterator text, final int width) {
    final int offset = text.getBeginIndex();
    final BreakIterator wordIterator = BreakIterator.getWordInstance();
   
    final AttributedString tmpText = new AttributedString(text);
    // return the original iterator if there are no suitable break points
    AttributedCharacterIterator best = text;
    wordIterator.setText(text);
   
    while (wordIterator.next() != BreakIterator.DONE) {
      final AttributedCharacterIterator candidate = tmpText.getIterator(null, tmpText.getIterator().getBeginIndex(), wordIterator.current() - offset);
     
      if (getPixelWidth(candidate) <= width) {
        best = candidate;
      } else {
        return best;
View Full Code Here

    doTests(CharArrayIterator.newWordInstance());
  }
 
  public void testConsumeWordInstance() {
    // we use the default locale, as its randomized by LuceneTestCase
    BreakIterator bi = BreakIterator.getWordInstance(Locale.getDefault());
    CharArrayIterator ci = CharArrayIterator.newWordInstance();
    for (int i = 0; i < 10000; i++) {
      char text[] = _TestUtil.randomUnicodeString(random()).toCharArray();
      ci.setText(text, 0, text.length);
      consume(bi, ci);
View Full Code Here

    doTests(CharArrayIterator.newSentenceInstance());
  }
 
  public void testConsumeSentenceInstance() {
    // we use the default locale, as its randomized by LuceneTestCase
    BreakIterator bi = BreakIterator.getSentenceInstance(Locale.getDefault());
    CharArrayIterator ci = CharArrayIterator.newSentenceInstance();
    for (int i = 0; i < 10000; i++) {
      char text[] = _TestUtil.randomUnicodeString(random()).toCharArray();
      ci.setText(text, 0, text.length);
      consume(bi, ci);
View Full Code Here

    }

    public static String wrap(TextWidthCounter textWidthCounter, String s,
            int width, int initialOffset, String initialIndent,
            String subsequentIndent) {
        BreakIterator iter = BreakIterator.getLineInstance();
        iter.setText(s);
        StringBuilder res = new StringBuilder(initialIndent);
        StringBuilder sb = new StringBuilder();
        int currentWidth = initialOffset + initialIndent.length();
        for (int start = iter.first(), end = iter.next(); end != BreakIterator.DONE; start = end, end = iter
                .next()) {
            String sub = s.substring(start, end);
            int subwidth = textWidthCounter.width(sub);
            currentWidth += subwidth;
            if (currentWidth > width) {
View Full Code Here

      String text) {
        if("".equals(text)) {
          return 0;
        }
        // utility that helps us to break the lines
        final BreakIterator bi = BreakIterator.getLineInstance();
        bi.setText(text);
       
        int lineCount = 0;
        final int lineHeight = fm.getHeight();
       
        // offsets for String.substring(start, end);
        int startOffset = bi.first();
        int endOffset = bi.next();
        // we go over each possible line break that BreakIterator suggests.
        do {
          if(endOffset == text.length()) {
            // we are at the end. this would cause IllegalArgumentException
            // so we just subtract 1
            endOffset--;
          }
          // get the width of the current substring
          // and check if we are over the maximum width
          final String substring = text.substring(startOffset, endOffset);
          final int stringWidth = fm.stringWidth(substring);
          if(stringWidth > maxWidth) {
            // calculate how many lines we have to add.
            // If there is a very long string with no spaces
            // it could be that we have to add more than 1 line.
            int toAdd = (int) (Math.ceil((double) stringWidth / (double) maxWidth) - 1);
            lineCount+= toAdd;
            // we need to advance the startOffset so
            // we can start to search for a new line
            startOffset = bi.preceding(endOffset);
            bi.next();
          }
        } while((endOffset = bi.next()) != BreakIterator.DONE);
        // ensure that the rest of a line also gets a line
        lineCount++;
        return lineHeight * lineCount;
  }
 
View Full Code Here

public class SentenceDetectionTest extends TamingTextTestJ4 {

  @Test
  public void testBreakIterator() {
    //<start id="sentDetect"/>
    BreakIterator sentIterator = BreakIterator.getSentenceInstance(Locale.US);
    String testString = "This is a sentence.  It has fruits, vegetables," +
            " etc. but does not have meat.  Mr. Smith went to Washington.";
    sentIterator.setText(testString);
    int start = sentIterator.first();
    int end = -1;
    List<String> sentences = new ArrayList<String>();
    while ((end = sentIterator.next()) != BreakIterator.DONE) {
      String sentence = testString.substring(start, end);
      start = end;
      sentences.add(sentence);
      System.out.println("Sentence: " + sentence);
    }
View Full Code Here

      locale = country == null ? new Locale(language) : new Locale(language, country);
    }

    // construct BreakIterator
    String type = params.getFieldParam(fieldName, HighlightParams.BS_TYPE, "WORD").toLowerCase();
    BreakIterator bi = null;
    if(type.equals("character")){
      bi = locale == null ? BreakIterator.getCharacterInstance() : BreakIterator.getCharacterInstance(locale);
    }
    else if(type.equals("word")){
      bi = locale == null ? BreakIterator.getWordInstance() : BreakIterator.getWordInstance(locale);
View Full Code Here

TOP

Related Classes of java.text.BreakIterator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.