Examples of com.ibm.icu.text.UCharacterIterator

com.ibm.icu.text.UCharacterIterator
Abstract class that defines an API for iteration on text objects.This is an interface for forward and backward iteration and random access into a text object. Forward iteration is done with post-increment and backward iteration is done with pre-decrement semantics, while the java.text.CharacterIterator interface methods provided forward iteration with "pre-increment" and backward iteration with pre-decrement semantics. This API is more efficient for forward iteration over code points. The other major difference is that this API can do both code unit and code point iteration, java.text.CharacterIterator can only iterate over code units and is limited to BMP (0 - 0xFFFF) @author Ram @stable ICU 2.4

    /**
    * Testing cloning
    */
    public void TestClone() throws CloneNotSupportedException
    {
         UCharacterIterator iterator = UCharacterIterator.getInstance("testing");
         UCharacterIterator cloned = (UCharacterIterator)iterator.clone();
         int completed = 0;
         while (completed != UCharacterIterator.DONE) {
            completed = iterator.next();
            if (completed != cloned.next()) {
                errln("Cloned operation failed");
            }
         }
    }

View Full Code Here

    /**
     * Testing iteration
     */
    public void TestIteration()
    {
        UCharacterIterator iterator  = UCharacterIterator.getInstance(
                                                       ITERATION_STRING_);
        UCharacterIterator iterator2 = UCharacterIterator.getInstance(
                                                       ITERATION_STRING_);
        iterator.setToStart();                                               
        if (iterator.current() != ITERATION_STRING_.charAt(0)) {
            errln("Iterator failed retrieving first character");
        }
        iterator.setToLimit(); 
        if (iterator.previous() != ITERATION_STRING_.charAt(
                                       ITERATION_STRING_.length() - 1)) {
            errln("Iterator failed retrieving last character");
        }                                               
        if (iterator.getLength() != ITERATION_STRING_.length()) {
            errln("Iterator failed determining begin and end index");
        }  
        iterator2.setIndex(0);
        iterator.setIndex(0);
        int ch = 0;
        while (ch != UCharacterIterator.DONE) {
            int index = iterator2.getIndex();
            ch = iterator2.nextCodePoint();
            if (index != ITERATION_SUPPLEMENTARY_INDEX) {
                if (ch != (int)iterator.next() && 
                    ch != UCharacterIterator.DONE) {
                    errln("Error mismatch in next() and nextCodePoint()"); 
                }
            }
            else {
                if (UTF16.getLeadSurrogate(ch) != iterator.next() ||
                    UTF16.getTrailSurrogate(ch) != iterator.next()) {
                    errln("Error mismatch in next and nextCodePoint for " +
                          "supplementary characters");
                }
            }
        }
        iterator.setIndex(ITERATION_STRING_.length());
        iterator2.setIndex(ITERATION_STRING_.length());
        while (ch != UCharacterIterator.DONE) {
            int index = iterator2.getIndex();
            ch = iterator2.previousCodePoint();
            if (index != ITERATION_SUPPLEMENTARY_INDEX) {
                if (ch != (int)iterator.previous() && 
                    ch != UCharacterIterator.DONE) {
                    errln("Error mismatch in previous() and " +
                          "previousCodePoint()");

View Full Code Here

     *            too small for the output.
     */
    public static int compress(String source, byte buffer[], int offset) 
    {
        int prev = 0;
        UCharacterIterator iterator = UCharacterIterator.getInstance(source);
        int codepoint = iterator.nextCodePoint();
        while (codepoint != UCharacterIterator.DONE) {
            if (prev < 0x4e00 || prev >= 0xa000) {
                prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_;
            } 
            else {
                // Unihan U+4e00..U+9fa5:
                // double-bytes down from the upper end
                prev = 0x9fff - SLOPE_REACH_POS_2_;
            }
        
            offset = writeDiff(codepoint - prev, buffer, offset);
            prev = codepoint;
            codepoint = iterator.nextCodePoint();
        }
        return offset;
    }

View Full Code Here

     */
    public static int getCompressionLength(String source) 
    {
        int prev = 0;
        int result = 0;
        UCharacterIterator iterator =  UCharacterIterator.getInstance(source);
        int codepoint = iterator.nextCodePoint();
        while (codepoint != UCharacterIterator.DONE) {
            if (prev < 0x4e00 || prev >= 0xa000) {
                prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_;
            } 
            else {
                // Unihan U+4e00..U+9fa5:
                // double-bytes down from the upper end
                prev = 0x9fff - SLOPE_REACH_POS_2_;
            }
        
            codepoint = iterator.nextCodePoint();
            result += lengthOfDiff(codepoint - prev);
            prev = codepoint;
        }
        return result;
    }

View Full Code Here

     *            too small for the output.
     */
    public static int compress(String source, byte buffer[], int offset) 
    {
        int prev = 0;
        UCharacterIterator iterator = UCharacterIterator.getInstance(source);
        int codepoint = iterator.nextCodePoint();
        while (codepoint != UCharacterIterator.DONE) {
            if (prev < 0x4e00 || prev >= 0xa000) {
                prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_;
            } 
            else {
                // Unihan U+4e00..U+9fa5:
                // double-bytes down from the upper end
                prev = 0x9fff - SLOPE_REACH_POS_2_;
            }
        
            offset = writeDiff(codepoint - prev, buffer, offset);
            prev = codepoint;
            codepoint = iterator.nextCodePoint();
        }
        return offset;
    }

View Full Code Here

     */
    public static int getCompressionLength(String source) 
    {
        int prev = 0;
        int result = 0;
        UCharacterIterator iterator =  UCharacterIterator.getInstance(source);
        int codepoint = iterator.nextCodePoint();
        while (codepoint != UCharacterIterator.DONE) {
            if (prev < 0x4e00 || prev >= 0xa000) {
                prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_;
            } 
            else {
                // Unihan U+4e00..U+9fa5:
                // double-bytes down from the upper end
                prev = 0x9fff - SLOPE_REACH_POS_2_;
            }
        
            codepoint = iterator.nextCodePoint();
            result += lengthOfDiff(codepoint - prev);
            prev = codepoint;
        }
        return result;
    }

View Full Code Here

    for (;;) {
      sepIndex = getSeparatorIndex(srcArr, sepIndex, srcArr.length);
      String label = new String(srcArr, oldSepIndex, sepIndex - oldSepIndex);
      //make sure this is not a root label separator.
      if (!(label.length() == 0 && sepIndex == srcArr.length)) {
        UCharacterIterator iter = UCharacterIterator.getInstance(label);
        result.append(convertToASCII(iter, options));
      }
      if (sepIndex == srcArr.length) {
        break;
      }

View Full Code Here

      sepIndex = getSeparatorIndex(srcArr, sepIndex, srcArr.length);
      String label = new String(srcArr, oldSepIndex, sepIndex - oldSepIndex);
      if (label.length() == 0 && sepIndex != srcArr.length) {
        throw new StringPrepParseException("Found zero length lable after NamePrep.", StringPrepParseException.ZERO_LENGTH_LABEL);
      }
      UCharacterIterator iter = UCharacterIterator.getInstance(label);
      result.append(convertToUnicode(iter, options));
      if (sepIndex == srcArr.length) {
        break;
      }
      // Unlike the ToASCII operation we don't normalize the label separators

View Full Code Here

     *            too small for the output.
     */
    public static int compress(String source, byte buffer[], int offset) 
    {
        int prev = 0;
        UCharacterIterator iterator = UCharacterIterator.getInstance(source);
        int codepoint = iterator.nextCodePoint();
        while (codepoint != UCharacterIterator.DONE) {
            if (prev < 0x4e00 || prev >= 0xa000) {
                prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_;
            } 
            else {
                // Unihan U+4e00..U+9fa5:
                // double-bytes down from the upper end
                prev = 0x9fff - SLOPE_REACH_POS_2_;
            }
        
            offset = writeDiff(codepoint - prev, buffer, offset);
            prev = codepoint;
            codepoint = iterator.nextCodePoint();
        }
        return offset;
    }

View Full Code Here

     */
    public static int getCompressionLength(String source) 
    {
        int prev = 0;
        int result = 0;
        UCharacterIterator iterator =  UCharacterIterator.getInstance(source);
        int codepoint = iterator.nextCodePoint();
        while (codepoint != UCharacterIterator.DONE) {
            if (prev < 0x4e00 || prev >= 0xa000) {
                prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_;
            } 
            else {
                // Unihan U+4e00..U+9fa5:
                // double-bytes down from the upper end
                prev = 0x9fff - SLOPE_REACH_POS_2_;
            }
        
            codepoint = iterator.nextCodePoint();
            result += lengthOfDiff(codepoint - prev);
            prev = codepoint;
        }
        return result;
    }

View Full Code Here

0 1 2 3

TOP

Related Classes of com.ibm.icu.text.UCharacterIterator

com.ibm.icu.dev.test.iterator.TestUCharacterIterator

com.ibm.icu.dev.test.normalizer.BasicTest

com.ibm.icu.dev.test.stringprep.IDNAReference

com.ibm.icu.dev.test.stringprep.NamePrepTransform

com.ibm.icu.dev.test.stringprep.NFS4StringPrep

com.ibm.icu.dev.test.stringprep.PunycodeReference

com.ibm.icu.dev.test.stringprep.TestIDNA

com.ibm.icu.dev.test.stringprep.TestIDNARef

com.ibm.icu.impl.BOCU

com.ibm.icu.impl.CharacterIteratorWrapper

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.