Package com.ibm.icu.text

Examples of com.ibm.icu.text.UnicodeSet


  }

  // we have to carefully output the possibilities as compact utf-16
  // range expressions, or jflex will OOM!
  static void outputMacro(String name, String pattern) {
    UnicodeSet set = new UnicodeSet(pattern);
    set.removeAll(BMP);
    System.out.println(name + " = (");
    // if the set is empty, we have to do this or jflex will barf
    if (set.isEmpty()) {
      System.out.println("\t  []");
    }

    HashMap<Character,UnicodeSet> utf16ByLead = new HashMap<Character,UnicodeSet>();
    for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) {
      char utf16[] = Character.toChars(it.codepoint);
      UnicodeSet trails = utf16ByLead.get(utf16[0]);
      if (trails == null) {
        trails = new UnicodeSet();
        utf16ByLead.put(utf16[0], trails);
      }
      trails.add(utf16[1]);
    }
   
    Map<String,UnicodeSet> utf16ByTrail = new HashMap<String,UnicodeSet>();
    for (Map.Entry<Character,UnicodeSet> entry : utf16ByLead.entrySet()) {
      String trail = entry.getValue().getRegexEquivalent();
      UnicodeSet leads = utf16ByTrail.get(trail);
      if (leads == null) {
        leads = new UnicodeSet();
        utf16ByTrail.put(trail, leads);
      }
      leads.add(entry.getKey());
    }

    boolean isFirst = true;
    for (Map.Entry<String,UnicodeSet> entry : utf16ByTrail.entrySet()) {
      System.out.print( isFirst ? "\t  " : "\t| ");
View Full Code Here


  }
 
  // we have to carefully output the possibilities as compact utf-16
  // range expressions, or jflex will OOM!
  static void outputMacro(String name, String pattern) {
    UnicodeSet set = new UnicodeSet(pattern);
    set.removeAll(BMP);
    System.out.println(name + " = (");
    // if the set is empty, we have to do this or jflex will barf
    if (set.isEmpty()) {
      System.out.println("\t  []");
    }
   
    HashMap<Character,UnicodeSet> utf16ByLead = new HashMap<Character,UnicodeSet>();
    for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) {   
      char utf16[] = Character.toChars(it.codepoint);
      UnicodeSet trails = utf16ByLead.get(utf16[0]);
      if (trails == null) {
        trails = new UnicodeSet();
        utf16ByLead.put(utf16[0], trails);
      }
      trails.add(utf16[1]);
    }
   
    boolean isFirst = true;
    for (Character c : utf16ByLead.keySet()) {
      UnicodeSet trail = utf16ByLead.get(c);
      System.out.print( isFirst ? "\t  " : "\t| ");
      isFirst = false;
      System.out.println("([\\u" + Integer.toHexString(c) + "]" + trail.getRegexEquivalent() + ")");
    }
    System.out.println(")");
  }
View Full Code Here

    /* Initialize the two UnicodeSets use for proper Gurmukhi conversion if they have not already been created. */
    private void initializePNJSets() {
        if (PNJ_BINDI_TIPPI_SET != null && PNJ_CONSONANT_SET != null) {
            return;
        }
        PNJ_BINDI_TIPPI_SET = new UnicodeSet();
        PNJ_CONSONANT_SET = new UnicodeSet();
       
        PNJ_CONSONANT_SET.add(0x0a15, 0x0a28);
        PNJ_CONSONANT_SET.add(0x0a2a, 0x0a30);
        PNJ_CONSONANT_SET.add(0x0a35, 0x0a36);
        PNJ_CONSONANT_SET.add(0x0a38, 0x0a39);
View Full Code Here

                    PropsVectors.ERROR_VALUE_CP, col, ~0, ~0);
        }

        for (int i = 0; i < encodings.length; ++i) {
            Charset testCharset = CharsetICU.forNameICU(encodings[i]);
            UnicodeSet unicodePointSet = new UnicodeSet(); // empty set
            ((CharsetICU) testCharset).getUnicodeSet(unicodePointSet,
                    mappingTypes);
            int column = i / 32;
            int mask = 1 << (i % 32);
            // now iterate over intervals on set i
            int itemCount = unicodePointSet.getRangeCount();
            for (int j = 0; j < itemCount; ++j) {
                int startChar = unicodePointSet.getRangeStart(j);
                int endChar = unicodePointSet.getRangeEnd(j);
                pvec.setValue(startChar, endChar, column, ~0, mask);
            }
        }

        // handle excluded encodings
View Full Code Here

        }

        // generate a list of all caseless characters -- characters whose
        // case closure is themselves.

        UnicodeSet caseless = new UnicodeSet();

        for (int i = 0; i <= 0x10FFFF; ++i) {
            String cp = UTF16.valueOf(i);
            ci.reset(cp);
            int count = 0;
            String fold = null;
            for (String temp = ci.next(); temp != null; temp = ci.next()) {
                fold = temp;
                if (++count > 1) break;
            }
            if (count==1 && fold.equals(cp)) {
                caseless.add(i);
            }
        }

        System.out.println("caseless = " + caseless.toPattern(true));

        UnicodeSet not_lc = new UnicodeSet("[:^lc:]");
       
        UnicodeSet a = new UnicodeSet();
        a.set(not_lc);
        a.removeAll(caseless);
        System.out.println("[:^lc:] - caseless = " + a.toPattern(true));

        a.set(caseless);
        a.removeAll(not_lc);
        System.out.println("caseless - [:^lc:] = " + a.toPattern(true));
    }
View Full Code Here

        String[] array = new String[]{"a", "b", "c", "{de}"};
        List list = Arrays.asList(array);
        Set aset = new HashSet(list);
        logln(" *** The source set's size is: " + aset.size());
    //The size reads 4
        UnicodeSet set = new UnicodeSet();
        set.clear();
        set.addAll(aset);
        logln(" *** After addAll, the UnicodeSet size is: " + set.size());
    //The size should also read 4, but 0 is seen instead

    }
View Full Code Here

     * @param value
     * @param result
     * @return result
     */
    public UnicodeSet getSet(Object value, UnicodeSet result) {
        if (result == null) result = new UnicodeSet();
        for (int i = 0; i < length - 1; ++i) {
            if (areEqual(value, values[i])) {
                result.add(transitions[i], transitions[i+1]-1);
            }
        }
View Full Code Here

            }
        } else {
            Set set = (Set) getAvailableValues(new TreeSet(collected));
            for (Iterator it = set.iterator(); it.hasNext();) {
                Object value = it.next();
                UnicodeSet s = getSet(value);
                result.append(value)
                .append("\t=> ")
                .append(s.toPattern(true))
                .append("\r\n");
            }
        }
        return result.toString();
    }
View Full Code Here

    public void TestTitleRegression() throws java.io.IOException {
        UCaseProps props = new UCaseProps();
        int type = props.getTypeOrIgnorable('\'');
        assertEquals("Case Ignorable check", -1, type); // should be case-ignorable (-1)
        UnicodeSet allCaseIgnorables = new UnicodeSet();
        for (int cp = 0; cp <= 0x10FFFF; ++cp) {
            if (props.getTypeOrIgnorable(cp) < 0) {
                allCaseIgnorables.add(cp);
            }
        }
        logln(allCaseIgnorables.toString());
        assertEquals("Titlecase check",
                "The Quick Brown Fox Can't Jump Over The Lazy Dogs.",
                UCharacter.toTitleCase(ULocale.ENGLISH, "THE QUICK BROWN FOX CAN'T JUMP OVER THE LAZY DOGS.", null));
    }
View Full Code Here

                if(locale.toString().indexOf(("in"))<0){
                    errln("UScript.getCode returned null for locale: " + locale);
                }
                continue;
            }
            UnicodeSet exemplarSets[] = new UnicodeSet[2];
            for (int k=0; k<2; ++k) {   // for casing option in (normal, caseInsensitive)
                int option = (k==0) ? 0 : UnicodeSet.CASE;
                UnicodeSet exemplarSet = LocaleData.getExemplarSet(locale, option);
                exemplarSets[k] = exemplarSet;
                ExemplarGroup exGrp = new ExemplarGroup(exemplarSet, scriptCodes);
                if (!testedExemplars.contains(exGrp)) {
                    testedExemplars.add(exGrp);
                    UnicodeSet[] sets = new UnicodeSet[scriptCodes.length];
                    // create the UnicodeSets for the script
                    for(int j=0; j < scriptCodes.length; j++){
                        sets[j] = new UnicodeSet("[:" + UScript.getShortName(scriptCodes[j]) + ":]");
                    }
                    boolean existsInScript = false;
                    UnicodeSetIterator iter = new UnicodeSetIterator(exemplarSet);
                    // iterate over the
                    while (!existsInScript && iter.nextRange()) {
View Full Code Here

TOP

Related Classes of com.ibm.icu.text.UnicodeSet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.