Package com.ibm.icu.text

Examples of com.ibm.icu.text.UnicodeSet


 
  public boolean toPatternAux(int start, int end) {
    // use Integer.toString because Utility.hex doesn't handle ints
    String source = "0x" + Integer.toString(start,16).toUpperCase();
    if (start != end) source += "..0x" + Integer.toString(end,16).toUpperCase();
    UnicodeSet testSet = new UnicodeSet();
    testSet.add(start, end);
    return checkPat(source, testSet);
  }
View Full Code Here


    }
    return true;
  }
 
  boolean checkPat (String source, UnicodeSet testSet, String pat) {
    UnicodeSet testSet2 = new UnicodeSet(pat);
    if (!testSet2.equals(testSet)) {
      errln("Fail toPattern: " + source + "; " + pat + " => " +
          testSet2.toPattern(false) + ", expected " +
          testSet.toPattern(false));
      return false;
    }
    return true;
  }
View Full Code Here

    }
    return buf.toString();
  }
 
  public void TestPatterns() {
    UnicodeSet set = new UnicodeSet();
    expectPattern(set, "[[a-m]&[d-z]&[k-y]]""km");
    expectPattern(set, "[[a-z]-[m-y]-[d-r]]""aczz");
    expectPattern(set, "[a\\-z]""--aazz");
    expectPattern(set, "[-az]""--aazz");
    expectPattern(set, "[az-]""--aazz");
    expectPattern(set, "[[[a-z]-[aeiou]i]]", "bdfnptvz");
   
    // Throw in a test of complement
    set.complement();
    String exp = '\u0000' + "aeeoouu" + (char)('z'+1) + '\uFFFF';
    expectPairs(set, exp);
  }
View Full Code Here

    expectPairs(set, exp);
  }
 
  public void TestCategories() {
    int failures = 0;
    UnicodeSet set = new UnicodeSet("[:Lu:]");
    expectContainment(set, "ABC", "abc");
   
    // Make sure generation of L doesn't pollute cached Lu set
    // First generate L, then Lu
    // not used int TOP = 0x200; // Don't need to go over the whole range:
    set = new UnicodeSet("[:L:]");
    for (int i=0; i<0x200; ++i) {
      boolean l = UCharacter.isLetter(i);
      if (l != set.contains((char)i)) {
        errln("FAIL: L contains " + (char)i + " = " +
            set.contains((char)i));
        if (++failures == 10) break;
      }
    }
   
    set = new UnicodeSet("[:Lu:]");
    for (int i=0; i<0x200; ++i) {
      boolean lu = (UCharacter.getType(i) == ECharacterCategory.UPPERCASE_LETTER);
      if (lu != set.contains((char)i)) {
        errln("FAIL: Lu contains " + (char)i + " = " +
            set.contains((char)i));
        if (++failures == 20) break;
      }
    }
  }
View Full Code Here

      }
    }
  }
 
  public void TestAddRemove() {
    UnicodeSet set = new UnicodeSet();
    set.add('a', 'z');
    expectPairs(set, "az");
    set.remove('m', 'p');
    expectPairs(set, "alqz");
    set.remove('e', 'g');
    expectPairs(set, "adhlqz");
    set.remove('d', 'i');
    expectPairs(set, "acjlqz");
    set.remove('c', 'r');
    expectPairs(set, "absz");
    set.add('f', 'q');
    expectPairs(set, "abfqsz");
    set.remove('a', 'g');
    expectPairs(set, "hqsz");
    set.remove('a', 'z');
    expectPairs(set, "");
   
    // Try removing an entire set from another set
    expectPattern(set, "[c-x]", "cx");
    UnicodeSet set2 = new UnicodeSet();
    expectPattern(set2, "[f-ky-za-bc[vw]]", "acfkvwyz");
    set.removeAll(set2);
    expectPairs(set, "deluxx");
   
    // Try adding an entire set to another set
View Full Code Here

           
            generatePropertyAliases(true);
           
            BagFormatter bf = new BagFormatter();

            UnicodeSet us = new UnicodeSet("[:gc=nd:]")
            BagFormatter.CONSOLE.println("[:gc=nd:]");
            bf.showSetNames(BagFormatter.CONSOLE,us);

            us = new UnicodeSet("[:numeric_value=2:]")
            BagFormatter.CONSOLE.println("[:numeric_value=2:]");
            bf.showSetNames(BagFormatter.CONSOLE,us);
           
            us = new UnicodeSet("[:numeric_type=numeric:]");  
            BagFormatter.CONSOLE.println("[:numeric_type=numeric:]");
            bf.showSetNames(BagFormatter.CONSOLE,us);
           
            UnicodeProperty.Factory ups = ICUPropertyFactory.make();
            us = ups.getSet("gc=mn", null, null);
View Full Code Here

    static final UnicodeSet RTL = new UnicodeSet("[[:L:]&[[:bidi class=R:][:bidi class=AL:]]]");
   
    static boolean isRTL(Locale loc) {       
        // in 2.8 we can use the exemplar characters, but for 2.6 we have to work around it
        int[] scripts = UScript.getCode(loc);
        return new UnicodeSet()
            .applyIntPropertyValue(UProperty.SCRIPT, scripts == null ? UScript.LATIN : scripts[0])
            .retainAll(RTL).size() != 0;
    }
View Full Code Here

         * unintentionally incomplete last-minute change.
         */
        String digitsPattern = "[:Nd:]";
        String decimalValuesPattern = "[:Numeric_Type=Decimal:]";

        UnicodeSet digits, decimalValues;

        digits= new UnicodeSet(digitsPattern);
        decimalValues=new UnicodeSet(decimalValuesPattern);


        compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", true);


View Full Code Here

              UCharacter.getMirror(0x301d) == 0x301d)) {
            errln("getMirror() does not work correctly");
        }

        /* verify that Bidi_Mirroring_Glyph roundtrips */
        UnicodeSet set=new UnicodeSet("[:Bidi_Mirrored:]");
        UnicodeSetIterator iter=new UnicodeSetIterator(set);
        int start, end, c2, c3;
        while(iter.nextRange() && (start=iter.codepoint)>=0) {
            end=iter.codepointEnd;
            do {
View Full Code Here

            // Test getCharNameCharacters
            if (getInclusion() >= 10) {
                boolean map[] = new boolean[256];

                UnicodeSet set = new UnicodeSet(1, 0); // empty set
                UnicodeSet dumb = new UnicodeSet(1, 0); // empty set

                // uprv_getCharNameCharacters() will likely return more lowercase
                // letters than actual character names contain because
                // it includes all the characters in lowercased names of
                // general categories, for the full possible set of extended names.
                UCharacterName.getInstance().getCharNameCharacters(set);

                // build set the dumb (but sure-fire) way
                Arrays.fill(map, false);

                int maxLength = 0;
                for (int cp = 0; cp < 0x110000; ++ cp) {
                    String n = UCharacter.getExtendedName(cp);
                    int len = n.length();
                    if (len > maxLength) {
                        maxLength = len;
                    }

                    for (int i = 0; i < len; ++ i) {
                        char ch = n.charAt(i);
                        if (!map[ch & 0xff]) {
                            dumb.add(ch);
                            map[ch & 0xff] = true;
                        }
                    }
                }

                length = UCharacterName.getInstance().getMaxCharNameLength();
                if (length != maxLength) {
                    errln("getMaxCharNameLength()=" + length
                          + " differs from the maximum length " + maxLength
                          + " of all extended names");
                }

                // compare the sets.  Where is my uset_equals?!!
                boolean ok = true;
                for (int i = 0; i < 256; ++ i) {
                    if (set.contains(i) != dumb.contains(i)) {
                        if (0x61 <= i && i <= 0x7a // a-z
                            && set.contains(i) && !dumb.contains(i)) {
                            // ignore lowercase a-z that are in set but not in dumb
                            ok = true;
                        }
                        else {
                            ok = false;
                            break;
                        }
                    }
                }

                String pattern1 = set.toPattern(true);
                String pattern2 = dumb.toPattern(true);

                if (!ok) {
                    errln("FAIL: getCharNameCharacters() returned " + pattern1
                          + " expected " + pattern2
                          + " (too many lowercase a-z are ok)");
View Full Code Here

TOP

Related Classes of com.ibm.icu.text.UnicodeSet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.