Package com.ibm.icu.text

Examples of com.ibm.icu.text.BreakIterator


    // expression (see the Javadoc for class Pattern).
    // Need to avoid both String.split and regular expressions, in order to
    // compile against JCL Foundation (bug 80053).
    // Also need to do this in an NL-sensitive way. The use of BreakIterator
    // was suggested in bug 90579.
    BreakIterator iter = BreakIterator.getWordInstance();
    iter.setText(text);
    int i = iter.first();
    while (i != java.text.BreakIterator.DONE && i < text.length()) {
      int j = iter.following(i);
      if (j == java.text.BreakIterator.DONE) {
        j = text.length();
      }
      // match the word
      if (Character.isLetterOrDigit(text.charAt(i))) {
View Full Code Here


  public void inform(ResourceLoader loader) throws IOException {
    assert tailored != null : "init must be called first!";
    if (tailored.isEmpty()) {
      config = new DefaultICUTokenizerConfig();
    } else {
      final BreakIterator breakers[] = new BreakIterator[UScript.CODE_LIMIT];
      for (Map.Entry<Integer,String> entry : tailored.entrySet()) {
        int code = entry.getKey();
        String resourcePath = entry.getValue();
        breakers[code] = parseRules(resourcePath, loader);
      }
View Full Code Here

  public void inform(ResourceLoader loader) throws IOException {
    assert tailored != null : "init must be called first!";
    if (tailored.isEmpty()) {
      config = new DefaultICUTokenizerConfig(cjkAsWords);
    } else {
      final BreakIterator breakers[] = new BreakIterator[UScript.CODE_LIMIT];
      for (Map.Entry<Integer,String> entry : tailored.entrySet()) {
        int code = entry.getKey();
        String resourcePath = entry.getValue();
        breakers[code] = parseRules(resourcePath, loader);
      }
View Full Code Here

  public void inform(ResourceLoader loader) throws IOException {
    assert tailored != null : "init must be called first!";
    if (tailored.isEmpty()) {
      config = new DefaultICUTokenizerConfig();
    } else {
      final BreakIterator breakers[] = new BreakIterator[UScript.CODE_LIMIT];
      for (Map.Entry<Integer,String> entry : tailored.entrySet()) {
        int code = entry.getKey();
        String resourcePath = entry.getValue();
        breakers[code] = parseRules(resourcePath, loader);
      }
View Full Code Here

  public void inform(ResourceLoader loader) throws IOException {
    assert tailored != null : "init must be called first!";
    if (tailored.isEmpty()) {
      config = new DefaultICUTokenizerConfig(cjkAsWords);
    } else {
      final BreakIterator breakers[] = new BreakIterator[UScript.CODE_LIMIT];
      for (Map.Entry<Integer,String> entry : tailored.entrySet()) {
        int code = entry.getKey();
        String resourcePath = entry.getValue();
        breakers[code] = parseRules(resourcePath, loader);
      }
View Full Code Here

      if (position == line.getOffset() + line.getLength())
        return;

      fDocIter.setDocument(document, line);

      BreakIterator breakIter= BreakIterator.getWordInstance();
      breakIter.setText(fDocIter);

      int start= breakIter.preceding(position);
      if (start == BreakIterator.DONE)
        start= line.getOffset();

      int end= breakIter.following(position);
      if (end == BreakIterator.DONE)
        end= line.getOffset() + line.getLength();

      if (breakIter.isBoundary(position)) {
        if (end - position > position- start)
          start= position;
        else
          end= position;
      }
View Full Code Here

                        if(!csp.addStringCaseClosure(str, foldSet)) {
                            foldSet.add(str); // does not map to code points: add the folded string itself
                        }
                    }
                } else {
                    BreakIterator bi = BreakIterator.getWordInstance(root);
                    Iterator it = strings.iterator();
                    while (it.hasNext()) {
                        str = (String)it.next();
                        foldSet.add(UCharacter.toLowerCase(root, str));
                        foldSet.add(UCharacter.toTitleCase(root, str, bi));
View Full Code Here

     * @param type
     * @draft ICU 3.6
     * @provisional This API might change or be removed in a future release.
     */
    protected BreakIterator guessBreakIterator(int type) {
        BreakIterator bitr = null;
        ULocale brkLocale = getAvailableLocale(TYPE_BREAKITERATOR);
        if (brkLocale == null) {
            brkLocale = ULocale.ROOT;
        }
        switch (type) {
View Full Code Here

                String test = TITLE_DATA_[i++];
                String expected = TITLE_DATA_[i++];
                ULocale locale = new ULocale(TITLE_DATA_[i++]);
                int breakType = Integer.parseInt(TITLE_DATA_[i++]);
                String optionsString = TITLE_DATA_[i++];
                BreakIterator iter =
                    breakType >= 0 ?
                        BreakIterator.getBreakInstance(locale, breakType) :
                        breakType == -2 ?
                            // Open a trivial break iterator that only delivers { 0, length }
                            // or even just { 0 } as boundaries.
View Full Code Here

    public void TestDutchTitle() {
        ULocale LOC_DUTCH = new ULocale("nl");
        int options = 0;
        options |= UCharacter.TITLECASE_NO_LOWERCASE;
        BreakIterator iter = BreakIterator.getWordInstance(LOC_DUTCH);

        assertEquals("Dutch titlecase check in English",
                "Ijssel Igloo Ijmuiden",
                UCharacter.toTitleCase(ULocale.ENGLISH, "ijssel igloo IJMUIDEN", null));

        assertEquals("Dutch titlecase check in Dutch",
                "IJssel Igloo IJmuiden",
                UCharacter.toTitleCase(LOC_DUTCH, "ijssel igloo IJMUIDEN", null));

        iter.setText("ijssel igloo IjMUIdEN iPoD ijenough");
        assertEquals("Dutch titlecase check in Dutch with nolowercase option",
                "IJssel Igloo IJMUIdEN IPoD IJenough",
                UCharacter.toTitleCase(LOC_DUTCH, "ijssel igloo IjMUIdEN iPoD ijenough", iter, options));
    }
View Full Code Here

TOP

Related Classes of com.ibm.icu.text.BreakIterator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.