Package com.ibm.icu.text

Examples of com.ibm.icu.text.UnicodeSet


                             "StringTokenizer", "constructors!"};
        StringTokenizer defaultst = new StringTokenizer(str);
        StringTokenizer stdelimiter = new StringTokenizer(str, delimiter);
        StringTokenizer stdelimiterreturn = new StringTokenizer(str, delimiter,
                                                                false);
        UnicodeSet delimiterset = new UnicodeSet("[" + delimiter + "]", false);
        StringTokenizer stdelimiterset = new StringTokenizer(str, delimiterset);
        StringTokenizer stdelimitersetreturn = new StringTokenizer(str,
                                                                delimiterset,
                                                                false);
        for (int i = 0; i < expected.length; i ++) {
            if (!(defaultst.nextElement().equals(expected[i])
                  && stdelimiter.nextElement().equals(expected[i])
                  && stdelimiterreturn.nextElement().equals(expected[i])
                  && stdelimiterset.nextElement().equals(expected[i])
                  && stdelimitersetreturn.nextElement().equals(expected[i]))) {
                errln("Constructor with default delimiter gives wrong results");
            }
        }
       
        String expected1[] = {"this", "\t", "is", "\n", "a", "\r", "string", "\f",
                            "testing", "\t", "StringTokenizer", "\n",
                            "constructors!"};
        stdelimiterreturn = new StringTokenizer(str, delimiter, true);
        stdelimitersetreturn = new StringTokenizer(str, delimiterset, true);
        for (int i = 0; i < expected1.length; i ++) {
            if (!(stdelimiterreturn.nextElement().equals(expected1[i])
                  && stdelimitersetreturn.nextElement().equals(expected1[i]))) {
                errln("Constructor with default delimiter and delimiter tokens gives wrong results");
            }
        }
                           
        stdelimiter = new StringTokenizer(str, (String)null);
        stdelimiterreturn = new StringTokenizer(str, (String)null, false);
        delimiterset = null;
        stdelimiterset = new StringTokenizer(str, delimiterset);
        stdelimitersetreturn = new StringTokenizer(str, delimiterset, false);
       
        if (!(stdelimiter.nextElement().equals(str)
              && stdelimiterreturn.nextElement().equals(str)
              && stdelimiterset.nextElement().equals(str)
              && stdelimitersetreturn.nextElement().equals(str))) {
            errln("Constructor with null delimiter gives wrong results");
        }
       
        delimiter = "";
        stdelimiter = new StringTokenizer(str, delimiter);
        stdelimiterreturn = new StringTokenizer(str, delimiter, false);
        delimiterset = new UnicodeSet();
        stdelimiterset = new StringTokenizer(str, delimiterset);
        stdelimitersetreturn = new StringTokenizer(str, delimiterset, false);
       
        if (!(stdelimiter.nextElement().equals(str)
              && stdelimiterreturn.nextElement().equals(str)
View Full Code Here


     */
    private static final synchronized UnicodeSet internalGetNXHangul() {
        /* internal function, does not check for incoming U_FAILURE */
   
        if(nxCache[NX_HANGUL]==null) {
             nxCache[NX_HANGUL]=new UnicodeSet(0xac00, 0xd7a3);
        }
        return nxCache[NX_HANGUL];
    }
View Full Code Here

//printTable();
    }

    public void buildColumnMap(InputStreamReader in) throws IOException {
System.out.println("Building column map...");
        UnicodeSet charsInFile = new UnicodeSet();
        int c = in.read();
int totalChars = 0;
        while (c >= 0) {
++totalChars; if (totalChars > 0 && totalChars % 5000 == 0) System.out.println("Read " + totalChars + " characters...");
            if (c > ' ')
                charsInFile.add((char)c);
            c = in.read();
        }
//        Test.debugPrintln(charsInFile.toString());

        StringBuffer tempReverseMap = new StringBuffer();
        tempReverseMap.append(' ');

        columnMap = new CompactByteArray();
        int n = charsInFile.getRangeCount();
        byte p = 1;
        for (int i=0; i<n; ++i) {
            char start = (char) charsInFile.getRangeStart(i);
            char end = (char) charsInFile.getRangeEnd(i);
            for (char ch = start; ch <= end; ch++) {
                if (columnMap.elementAt(Character.toLowerCase(ch)) == 0) {
                    columnMap.setElementAt(Character.toUpperCase(ch), Character.toUpperCase(ch),
                                        p);
                    columnMap.setElementAt(Character.toLowerCase(ch), Character.toLowerCase(ch),
View Full Code Here

        /* internal function, does not check for incoming U_FAILURE */
   
        if(nxCache[NX_CJK_COMPAT]==null) {

            /* build a set from [CJK Ideographs]&[has canonical decomposition] */
            UnicodeSet set, hasDecomp;
   
            set=new UnicodeSet("[:Ideographic:]");
   
            /* start with an empty set for [has canonical decomposition] */
            hasDecomp=new UnicodeSet();
   
            /* iterate over all ideographs and remember which canonically decompose */
            UnicodeSetIterator it = new UnicodeSetIterator(set);
            int start, end;
            long norm32;
View Full Code Here

            return null;
        }
   
        if(nxCache[options]==null) {
            /* build a set with all code points that were not designated by the specified Unicode version */
            UnicodeSet set = new UnicodeSet();

            switch(options) {
            case Normalizer.UNICODE_3_2:
                set.applyPattern("[:^Age=3.2:]");
                break;
            default:
                return null;
            }
           
View Full Code Here

            if((options & OPTIONS_UNICODE_MASK)!=0 && (options & OPTIONS_NX_MASK)==0) {
                return internalGetNXUnicode(options);
            }
   
            /* build a set from multiple subsets */
            UnicodeSet set;
            UnicodeSet other;
   
            set=new UnicodeSet();

   
            if((options & NX_HANGUL)!=0 && null!=(other=internalGetNXHangul())) {
                set.addAll(other);
            }
View Full Code Here

        for (int script = fMinScript; script <= fMaxScript; script += 1) {
            fScriptNames[script - fMinScript] = UScript.getName(script).toUpperCase();
            fScriptTags[script - fMinScript= UScript.getShortName(script).toLowerCase();
           
            if (script != commonScript) {
                UnicodeSet scriptSet  = new UnicodeSet("\\p{" + fScriptTags[script - fMinScript] + "}");
                UnicodeSetIterator it = new UnicodeSetIterator(scriptSet);
           
                while (it.nextRange()) {
                    Record record = new Record(it.codepoint, it.codepointEnd, script);
                   
View Full Code Here

    // TODO: The UnicodeSet is constrained to the BMP because the ClassTable data structure can
    // only handle 16-bit entries. This is probably OK as long as there aren't any joining scripts
    // outside of the BMP...
    public void buildShapingTypes(String filename)
    {
        UnicodeSet shapingTypes = new UnicodeSet("[[\\P{Joining_Type=Non_Joining}] & [\\u0000-\\uFFFF]]");
        int count = shapingTypes.size();
       
        System.out.println("There are " + count + " characters with a joining type.");
       
        for(int i = 0; i < count; i += 1) {
            int ch = shapingTypes.charAt(i);
           
            classTable.addMapping(ch, UCharacter.getIntPropertyValue(ch, UProperty.JOINING_TYPE));
        }
       
        LigatureModuleWriter writer = new LigatureModuleWriter();
View Full Code Here

                + ", NameChoice: " + nameChoice + ", "
                + e1.getClass().getName());
            continue;
          }
          logln("Value (" + valueNum + "): " + valueName);
          UnicodeSet testSet;
          try {
            testSet = new UnicodeSet("[:" + propName + "=" + valueName + ":]");
          } catch (RuntimeException e) {
            errln("Can't create UnicodeSet for: "
                + "Property (" + propNum + "): " + propName + ", "
                + "Value (" + valueNum + "): " + valueName + ", "
                + e.getClass().getName());
            continue;
          }
          UnicodeSet collectedErrors = new UnicodeSet();
          for (UnicodeSetIterator it = new UnicodeSetIterator(testSet); it.next();) {
            int value = UCharacter.getIntPropertyValue(it.codepoint, propNum);
            if (value != valueNum) {
              collectedErrors.add(it.codepoint);
            }
          }
          if (collectedErrors.size() != 0) {
            errln("Property Value Differs: "
                + "Property (" + propNum + "): " + propName + ", "
                + "Value (" + valueNum + "): " + valueName + ", "
                + "Differing values: " + collectedErrors.toPattern(true));
          }
        }
      }
    }
  }
View Full Code Here

   */
  public void TestToPattern() throws Exception {
    // Test that toPattern() round trips with syntax characters
    // and whitespace.
    for (int i = 0; i < OTHER_TOPATTERN_TESTS.length; ++i) {
      checkPat(OTHER_TOPATTERN_TESTS[i], new UnicodeSet(OTHER_TOPATTERN_TESTS[i]));
    }
    for (int i = 0; i <= 0x10FFFF; ++i) {
      if ((i <= 0xFF && !UCharacter.isLetter(i)) || UCharacter.isWhitespace(i)) {
        // check various combinations to make sure they all work.
        if (i != 0 && !toPatternAux(i, i)) continue;
        if (!toPatternAux(0, i)) continue;
        if (!toPatternAux(i, 0xFFFF)) continue;
      }
    }
   
    // Test pattern behavior of multicharacter strings.
    UnicodeSet s = new UnicodeSet("[a-z {aa} {ab}]");
    expectToPattern(s, "[a-z{aa}{ab}]",
        new String[] {"aa", "ab", NOT, "ac"});
    s.add("ac");
    expectToPattern(s, "[a-z{aa}{ab}{ac}]",
        new String[] {"aa", "ab", "ac", NOT, "xy"});
   
    s.applyPattern("[a-z {\\{l} {r\\}}]");
    expectToPattern(s, "[a-z{r\\}}{\\{l}]",
        new String[] {"{l", "r}", NOT, "xy"});
    s.add("[]");
    expectToPattern(s, "[a-z{\\[\\]}{r\\}}{\\{l}]",
        new String[] {"{l", "r}", "[]", NOT, "xy"});
   
    s.applyPattern("[a-z {\u4E01\u4E02}{\\n\\r}]");
    expectToPattern(s, "[a-z{\\u000A\\u000D}{\\u4E01\\u4E02}]",
        new String[] {"\u4E01\u4E02", "\n\r"});
   
    s.clear();
    s.add("abc");
    s.add("abc");
    expectToPattern(s, "[{abc}]",
        new String[] {"abc", NOT, "ab"});
   
    // JB#3400: For 2 character ranges prefer [ab] to [a-b]
    s.clear();
    s.add('a', 'b');
    expectToPattern(s, "[ab]", null);
   
    // Cover applyPattern, applyPropertyAlias
    s.clear();
    s.applyPattern("[ab ]", true);
    expectToPattern(s, "[ab]", new String[] {"a", NOT, "ab", " "});
    s.clear();
    s.applyPattern("[ab ]", false);
    expectToPattern(s, "[\\ ab]", new String[] {"a", "\u0020", NOT, "ab"});
   
    s.clear();
    s.applyPropertyAlias("nv", "0.5");
    expectToPattern(s, "[\\u00BD\\u0D74\\u0F2A\\u2CFD\\U00010141\\U00010175\\U00010176]", null);
    // Unicode 5.1 adds Malayalam 1/2 (\u0D74)
   
    s.clear();
    s.applyPropertyAlias("gc", "Lu");
    // TODO expectToPattern(s, what?)

    // RemoveAllStrings()
    s.clear();
    s.applyPattern("[a-z{abc}{def}]");
    expectToPattern(s, "[a-z{abc}{def}]", null);
    s.removeAllStrings();
    expectToPattern(s, "[a-z]", null);
  }
View Full Code Here

TOP

Related Classes of com.ibm.icu.text.UnicodeSet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.