Examples of java.text.BreakIterator

java.text.BreakIterator

each word in order BreakIterator boundary = BreakIterator.getWordInstance(); boundary.setText(stringToExamine); printEachForward(boundary, stringToExamine); //print each sentence in reverse order boundary = BreakIterator.getSentenceInstance(Locale.US); boundary.setText(stringToExamine); printEachBackward(boundary, stringToExamine); printFirst(boundary, stringToExamine); printLast(boundary, stringToExamine); } } Print each element in order:

 public static void printEachForward(BreakIterator boundary, String source) { int start = boundary.first(); for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) { System.out.println(source.substring(start,end)); } }

Print each element in reverse order:

 public static void printEachBackward(BreakIterator boundary, String source) { int end = boundary.last(); for (int start = boundary.previous(); start != BreakIterator.DONE; end = start, start = boundary.previous()) { System.out.println(source.substring(start,end)); } }

Print first element:

 public static void printFirst(BreakIterator boundary, String source) { int start = boundary.first(); int end = boundary.next(); System.out.println(source.substring(start,end)); }

Print last element:

 public static void printLast(BreakIterator boundary, String source) { int end = boundary.last(); int start = boundary.previous(); System.out.println(source.substring(start,end)); }

Print the element at a specified position:

 public static void printAt(BreakIterator boundary, int pos, String source) { int end = boundary.following(pos); int start = boundary.previous(); System.out.println(source.substring(start,end)); }

Find the next word:

 public static int nextWordStartAfter(int pos, String text) { BreakIterator wb = BreakIterator.getWordInstance(); wb.setText(text); int last = wb.following(pos); int current = wb.next(); while (current != BreakIterator.DONE) { for (int p = last; p < current; p++) { if (Character.isLetter(text.codePointAt(p))) return last; } last = current; current = wb.next(); } return BreakIterator.DONE; } 
(The iterator returned by BreakIterator.getWordInstance() is unique in that the break positions it returns don't represent both the start and end of the thing being iterated over. That is, a sentence-break iterator returns breaks that each represent the end of one sentence and the beginning of the next. With the word-break iterator, the characters between two boundaries might be a word, or they might be the punctuation or whitespace between two words. The above code uses a simple heuristic to determine which boundary is the beginning of a word: If the characters between this boundary and the next boundary include at least one letter (this can be an alphabetical letter, a CJK ideograph, a Hangul syllable, a Kana character, etc.), then the text between this boundary and the next is a word; otherwise, it's the material between words.)

@see CharacterIterator

    return Math.round(characters / CHINESE_RATIO_WORD_CHARACTER);
  }
  
  private int countWords(String text, Locale locale) {
    int count = 0;
    BreakIterator wordIterator = BreakIterator.getWordInstance(locale);
    
    wordIterator.setText(text);
    int start = wordIterator.first();
    int end = wordIterator.next();
    while (end != BreakIterator.DONE) {
      char ch = text.charAt(start);
      if (Character.isLetterOrDigit(ch)) {
        count++;
      }
      start = end;
      end = wordIterator.next();
    }
    
    return count;
  }

View Full Code Here

    if(locale == null) {
      locale = I18nModule.getDefaultLocale();
    }
    
    int count = 0;
    BreakIterator characterIterator = BreakIterator.getCharacterInstance(locale);
    
    characterIterator.setText(text);
    int start = characterIterator.first();
    int end = characterIterator.next();
    while (end != BreakIterator.DONE) {
      char ch = text.charAt(start);
      if (Character.isLetterOrDigit(ch)) {
        count++;
      }
      start = end;
      end = characterIterator.next();
    }
    
    return count;
  }

View Full Code Here

   * @return list of lines
   */
  private List<AttributedCharacterIterator> splitFormatted(final AttributedString text, final int width) {
    final List<AttributedCharacterIterator> lines = new LinkedList<AttributedCharacterIterator>();


    final BreakIterator iter = BreakIterator.getLineInstance();
    iter.setText(text.getIterator());
    
    int previous = iter.first();
    
    AttributedCharacterIterator best = null;
    
    while (iter.next() != BreakIterator.DONE) {
      final AttributedCharacterIterator candidate = text.getIterator(null, previous, iter.current());


      if (getPixelWidth(candidate) <= width) {
        // check for line breaks within the provided text
        // unfortunately, the BreakIterators are too dumb to tell *why* they consider the 
        // location a break, so the check needs to be implemented here
        final CharacterIterator cit = iter.getText();
        if (isHardLineBreak(cit)) {
          lines.add(candidate);
          previous = iter.current();
          best = null;
        } else {
          best = candidate;
        }
      } else {
        if (best == null) {
          // could not break the line - the word's simply too long. Use more force to
          // to fit it to the width
          best = splitAggressively(candidate, width);
          // splitAggressively returns an iterator with its own indexing,
          // so instead of using it directly we need to adjust the old one
          previous += best.getEndIndex() - best.getBeginIndex();
        } else {
          previous = best.getEndIndex();
          // Trim the trailing white space
          char endChar = best.last();


          int endIndex = previous;
          while (Character.isWhitespace(endChar) && endChar != CharacterIterator.DONE) {
            endIndex = best.getIndex();
            endChar = best.previous();
          }


          best = text.getIterator(null, best.getBeginIndex(), endIndex);
        }


        lines.add(best);


        // a special check for a hard line break just after the word 
        // that got moved to the next line
        final CharacterIterator cit = iter.getText();
        if (isHardLineBreak(cit)) {
          lines.add(text.getIterator(null, previous, iter.current()));
          previous = iter.current();
        }
        
        // Pick the shortest candidate possible (backtrack a bit, if needed)
        if (iter.current() > previous + 1) {
          iter.previous();
        }


        best = null;


        if (lines.size() > MAX_LINES) {
          /*
           * Limit the height of the text boxes. Append ellipsis
           * to tell the user to take a look at the chat log.
           * The last line is removed twice to avoid the situation
           * where the last text line would fit on the space the
           * ellipsis occupies.
           */
          lines.remove(lines.size() - 1);
          lines.remove(lines.size() - 1);
          lines.add(new AttributedString("...").getIterator());
          return lines;
        }
      }
    }


    // add the rest of the text, if there's any
    if (previous < iter.last()) {
      lines.add(text.getIterator(null, previous, iter.last()));
    }


    return lines;
  }

View Full Code Here

   * 
   * @return iterator to the part of the line that fits in width
   */
  private AttributedCharacterIterator splitAggressively(final AttributedCharacterIterator text, final int width) {
    final int offset = text.getBeginIndex();
    final BreakIterator wordIterator = BreakIterator.getWordInstance();
    
    final AttributedString tmpText = new AttributedString(text);
    // return the original iterator if there are no suitable break points
    AttributedCharacterIterator best = text;
    wordIterator.setText(text);
    
    while (wordIterator.next() != BreakIterator.DONE) {
      final AttributedCharacterIterator candidate = tmpText.getIterator(null, tmpText.getIterator().getBeginIndex(), wordIterator.current() - offset);
      
      if (getPixelWidth(candidate) <= width) {
        best = candidate;
      } else {
        return best;

View Full Code Here

    doTests(CharArrayIterator.newWordInstance());
  }
  
  public void testConsumeWordInstance() {
    // we use the default locale, as its randomized by LuceneTestCase
    BreakIterator bi = BreakIterator.getWordInstance(Locale.getDefault());
    CharArrayIterator ci = CharArrayIterator.newWordInstance();
    for (int i = 0; i < 10000; i++) {
      char text[] = _TestUtil.randomUnicodeString(random()).toCharArray();
      ci.setText(text, 0, text.length);
      consume(bi, ci);

View Full Code Here

    doTests(CharArrayIterator.newSentenceInstance());
  }
  
  public void testConsumeSentenceInstance() {
    // we use the default locale, as its randomized by LuceneTestCase
    BreakIterator bi = BreakIterator.getSentenceInstance(Locale.getDefault());
    CharArrayIterator ci = CharArrayIterator.newSentenceInstance();
    for (int i = 0; i < 10000; i++) {
      char text[] = _TestUtil.randomUnicodeString(random()).toCharArray();
      ci.setText(text, 0, text.length);
      consume(bi, ci);

View Full Code Here

    }


    public static String wrap(TextWidthCounter textWidthCounter, String s,
            int width, int initialOffset, String initialIndent,
            String subsequentIndent) {
        BreakIterator iter = BreakIterator.getLineInstance();
        iter.setText(s);
        StringBuilder res = new StringBuilder(initialIndent);
        StringBuilder sb = new StringBuilder();
        int currentWidth = initialOffset + initialIndent.length();
        for (int start = iter.first(), end = iter.next(); end != BreakIterator.DONE; start = end, end = iter
                .next()) {
            String sub = s.substring(start, end);
            int subwidth = textWidthCounter.width(sub);
            currentWidth += subwidth;
            if (currentWidth > width) {

View Full Code Here

      String text) {
        if("".equals(text)) {
          return 0;
        }
        // utility that helps us to break the lines
        final BreakIterator bi = BreakIterator.getLineInstance();
        bi.setText(text);
        
        int lineCount = 0;
        final int lineHeight = fm.getHeight();
        
        // offsets for String.substring(start, end);
        int startOffset = bi.first();
        int endOffset = bi.next();
        // we go over each possible line break that BreakIterator suggests.
        do {
          if(endOffset == text.length()) {
            // we are at the end. this would cause IllegalArgumentException
            // so we just subtract 1
            endOffset--;
          }
          // get the width of the current substring
          // and check if we are over the maximum width
          final String substring = text.substring(startOffset, endOffset);
          final int stringWidth = fm.stringWidth(substring);
          if(stringWidth > maxWidth) {
            // calculate how many lines we have to add.
            // If there is a very long string with no spaces
            // it could be that we have to add more than 1 line.
            int toAdd = (int) (Math.ceil((double) stringWidth / (double) maxWidth) - 1);
            lineCount+= toAdd;
            // we need to advance the startOffset so
            // we can start to search for a new line
            startOffset = bi.preceding(endOffset);
            bi.next();
          }
        } while((endOffset = bi.next()) != BreakIterator.DONE);
        // ensure that the rest of a line also gets a line 
        lineCount++;
        return lineHeight * lineCount;
  }

View Full Code Here

public class SentenceDetectionTest extends TamingTextTestJ4 {


  @Test
  public void testBreakIterator() {
    //<start id="sentDetect"/>
    BreakIterator sentIterator = BreakIterator.getSentenceInstance(Locale.US);
    String testString = "This is a sentence.  It has fruits, vegetables," +
            " etc. but does not have meat.  Mr. Smith went to Washington.";
    sentIterator.setText(testString);
    int start = sentIterator.first();
    int end = -1;
    List<String> sentences = new ArrayList<String>();
    while ((end = sentIterator.next()) != BreakIterator.DONE) {
      String sentence = testString.substring(start, end);
      start = end;
      sentences.add(sentence);
      System.out.println("Sentence: " + sentence);
    }

View Full Code Here

      locale = country == null ? new Locale(language) : new Locale(language, country);
    }


    // construct BreakIterator
    String type = params.getFieldParam(fieldName, HighlightParams.BS_TYPE, "WORD").toLowerCase();
    BreakIterator bi = null;
    if(type.equals("character")){
      bi = locale == null ? BreakIterator.getCharacterInstance() : BreakIterator.getCharacterInstance(locale);
    }
    else if(type.equals("word")){
      bi = locale == null ? BreakIterator.getWordInstance() : BreakIterator.getWordInstance(locale);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of java.text.BreakIterator

ae.java.awt.TextComponent$AccessibleAWTTextComponent

ariba.util.core.StringUtil

ch.swingfx.text.TextUtil

com.alibaba.antx.config.wizard.text.ConfigWizard

com.alibaba.maven.plugin.docbook.WordBreaker

com.google.gwt.benchmarks.BenchmarkReport

com.google.gwt.junit.benchmarks.BenchmarkReport

com.ibm.icu.dev.demo.impl.DemoTextBox

com.ibm.icu.dev.demo.rbnf.DemoTextField

com.ibm.icu.dev.tool.docs.ICUTaglet$ICUObsoleteTaglet

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.