Package com.ibm.icu.text

Examples of com.ibm.icu.text.UnicodeSet


            }
        }
    }
   
    public void TestVariableCharacters() {
        UnicodeSet valid = new UnicodeSet("[G   y   Y   u   Q   q   M   L   w   W   d   D   F   g   E   e   c   a   h   H   K   k   m   s   S   A   z   Z   v   V]");
        for (char c = 0; c < 0xFF; ++c) {
            boolean works = false;
            try {
                VariableField vf = new VariableField(String.valueOf(c), true);
                logln("VariableField " + vf.toString());
                works = true;
            } catch (Exception e) {}
            if (works != valid.contains(c)) {
                if (works) {
                    errln("VariableField can be created with illegal character: " + c);
                } else {
                    errln("VariableField can't be created with legal character: " + c);
                }
View Full Code Here


    public static final String UNUSED = "??";

    public final UnicodeSet getSet(PatternMatcher matcher, UnicodeSet result) {
        if (result == null)
            result = new UnicodeSet();
        if (isType(STRING_OR_MISC_MASK)) {
            for (int i = 0; i <= 0x10FFFF; ++i) {
                String value = getValue(i);
                if (value != null && matcher.matches(value)) {
                    result.add(i);
View Full Code Here

                    System.out.println(propertyName + "=" + propertyValue);
                UnicodeProperty prop = getProperty(propertyName);
                if (prop == null)
                    return false;
                result.clear();
                UnicodeSet x = prop.getSet(propertyValue, result);
                return x.size() != 0;
            }
View Full Code Here

                if (prop == null) {
                    throw new IllegalArgumentException("Invalid Property in: "
                            + s + "\r\nUse " + showSet(getAvailableNames()));
                }
                String value = s.substring(pos + 1);
                UnicodeSet set;
                if (value.startsWith("\u00AB")) { // regex!
                    set = prop.getSet(regexMatcher.set(value.substring(1, value
                            .length() - 1)));
                } else {
                    set = prop.getSet(value);
                }
                if (set.size() == 0) {
                    throw new IllegalArgumentException(
                            "Empty Property-Value in: " + s + "\r\nUse "
                                    + showSet(prop.getAvailableValues()));
                }
                if (DEBUG)
                    System.out.println("\t(" + prefix + ")Returning "
                            + set.toPattern(true));
                return set.toPattern(true).toCharArray(); // really ugly
            }
View Full Code Here

        int flags)
    {
        if (pw == null) pw = CONSOLE;
        String[] names = { name1, name2 };

        UnicodeSet temp;
       
        if ((flags&1) != 0) {
            temp = new UnicodeSet(set1).removeAll(set2);
            pw.print(lineSeparator);
            pw.print(inOut.format(names));
            pw.print(lineSeparator);
            showSetNames(pw, temp);
        }

        if ((flags&2) != 0) {
            temp = new UnicodeSet(set2).removeAll(set1);
            pw.print(lineSeparator);
            pw.print(outIn.format(names));
            pw.print(lineSeparator);
            showSetNames(pw, temp);
        }

        if ((flags&4) != 0) {
            temp = new UnicodeSet(set2).retainAll(set1);
            pw.print(lineSeparator);
            pw.print(inIn.format(names));
            pw.print(lineSeparator);
            showSetNames(pw, temp);
        }
View Full Code Here

        }

        int numOfEncodings = encodings.size();
        for (int i = 0; i < numOfEncodings; i++) {
            resultsManually[findIndex((String) encodings.get(i))] = true;
            UnicodeSet unicodePointSet = new UnicodeSet();
            Charset testCharset = CharsetICU.forNameICU((String) encodings
                    .get(i));
            ((CharsetICU) testCharset).getUnicodeSet(unicodePointSet,
                    mappingTypes);
            int ch;
            int index = 0;
            while (index < s.length()) {
                ch = UTF16.charAt(s, index);
                if (!excludedEncodings.contains(ch)
                        && !unicodePointSet.contains(ch)) {
                    resultsManually[findIndex((String) encodings.get(i))] = false;
                    break;
                }
                index += UTF16.getCharCount(ch);
            }
View Full Code Here

                171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182,
                183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
                195, 196, 197, 198, 199, 200, -1, 1, -1 };

        UnicodeSet[] excludedSets = new UnicodeSet[3];
        excludedSets[0] = new UnicodeSet();
        for (int i = 1; i < 3; i++) {
            excludedSets[i] = new UnicodeSet(i * 30, i * 30 + 500);
        }
       
        for (int excludedSetId = 0; excludedSetId < 3; excludedSetId++) {
            for (int testCaseIdx = 0, prev = 0; testCaseIdx < encodingsTestCases.length; testCaseIdx++) {
                if (encodingsTestCases[testCaseIdx] != -1)
View Full Code Here

        ConversionCase cc = new ConversionCase();
        CharsetProviderICU provider = new CharsetProviderICU();
        CharsetICU charset  ;
      
            
        UnicodeSet mapset = new UnicodeSet();
        UnicodeSet mapnotset = new UnicodeSet();
        UnicodeSet unicodeset = new UnicodeSet();
        String ellipsis = "0x2e";
        cc.charset = ((ICUResourceBundle) testcase.getObject("charset"))
                .getString();
        cc.map = ((ICUResourceBundle) testcase.getObject("map")).getString();
        cc.mapnot = ((ICUResourceBundle) testcase.getObject("mapnot"))
                .getString();
       
    
        int which = ((ICUResourceBundle) testcase.getObject("which")).getInt(); // only checking for ROUNDTRIP_SET
       
        // ----for debugging only
        logln("");
        logln("TestGetUnicodeSet[" + cc.charset + "] ");
        logln("...............................................");
       
        try{
           // if cc.charset starts with '*', obtain it from com/ibm/icu/dev/data/testdata
           charset = (cc.charset != null && cc.charset.length() > 0 && cc.charset.charAt(0) == '*')
                    ? (CharsetICU) provider.charsetForName(cc.charset.substring(1),
                        "com/ibm/icu/dev/data/testdata", this.getClass().getClassLoader())
                    : (CharsetICU) provider.charsetForName(cc.charset);
          
           //checking for converter that are not supported at this point       
           try{
               if(charset.name()=="BOCU-1" ||charset.name()== "SCSU"|| charset.name()=="lmbcs1" || charset.name()== "lmbcs2" ||
                      charset.name()== "lmbcs3" || charset.name()== "lmbcs4" || charset.name()=="lmbcs5" || charset.name()=="lmbcs6" ||
                      charset.name()== "lmbcs8" || charset.name()=="lmbcs11" || charset.name()=="lmbcs16" || charset.name()=="lmbcs17" ||
                      charset.name()=="lmbcs18"|| charset.name()=="lmbcs19"){
                  
                   logln("Converter not supported at this point :" +charset.displayName());
                   return;
               }
                            
               if(which==1){
                   logln("Fallback set not supported at this point for converter : "+charset.displayName());
                  return;
               }
              
           }catch(Exception e){
               return;
           }
          
           mapset.clear();
           mapnotset.clear();
                  
           mapset.applyPattern(cc.map,false);
           mapnotset.applyPattern(cc.mapnot,false);
          
           charset.getUnicodeSet(unicodeset, which);
           UnicodeSet diffset = new UnicodeSet();
          
           //are there items that must be in unicodeset but are not?          
           (diffset = mapset).removeAll(unicodeset);
           if(!diffset.isEmpty()){
               StringBuffer s = new StringBuffer(diffset.toPattern(true));
               if(s.length()>100){
                   s.replace(0, 0x7fffffff, ellipsis);
               }
               errln("error in missing items - conversion/getUnicodeSet test case "+cc.charset + "\n" + s.toString());
           }
          
          //are the items that must not be in unicodeset but are?
           (diffset=mapnotset).retainAll(unicodeset);
           if(!diffset.isEmpty()){
               StringBuffer s = new StringBuffer(diffset.toPattern(true));
               if(s.length()>100){
                   s.replace(0, 0x7fffffff, ellipsis);
               }
               errln("contains unexpected items - conversion/getUnicodeSet test case "+cc.charset + "\n" + s.toString());
           }
View Full Code Here

  public void testOptimizer() throws Exception {
    String rules = "a > b; b > c;"; // convert a's to b's and b's to c's
    Transliterator custom = Transliterator.createFromRules("test", rules, Transliterator.FORWARD);
    assertTrue(custom.getFilter() == null);
    new ICUTransformFilter(new KeywordTokenizer(new StringReader("")), custom);
    assertTrue(custom.getFilter().equals(new UnicodeSet("[ab]")));
  }
View Full Code Here

  public void testOptimizerSurrogate() throws Exception {
    String rules = "\\U00020087 > x;"; // convert CJK UNIFIED IDEOGRAPH-20087 to an x
    Transliterator custom = Transliterator.createFromRules("test", rules, Transliterator.FORWARD);
    assertTrue(custom.getFilter() == null);
    new ICUTransformFilter(new KeywordTokenizer(new StringReader("")), custom);
    assertTrue(custom.getFilter().equals(new UnicodeSet("[\\U00020087]")));
  }
View Full Code Here

TOP

Related Classes of com.ibm.icu.text.UnicodeSet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.