Package com.ibm.icu.text

Examples of com.ibm.icu.text.UnicodeSetIterator


            for (int j = 0; j < indicScripts.length; ++j) {
                if (i == j) continue;
                String target = names[j];
                Transliterator forward = Transliterator.getInstance(source + '-' + target);
                Transliterator backward = forward.getInverse();
                UnicodeSetIterator it = new UnicodeSetIterator(sourceChars);
                while (it.next()) {
                    if (lengthMarks.contains(it.codepoint)) continue;
                    String s = Normalizer.normalize(it.codepoint,Normalizer.NFC,0);
                    //if (!Normalizer.isNormalized(s,Normalizer.NFC,0)) continue;
                    if (!s.equals(Normalizer.normalize(s,Normalizer.NFD,0))) {
                        failNorm.add(it.codepoint);
                    }
                    String t = fix(forward.transliterate(s));
                    if (t.equals(testString)) {
                        System.out.println("debug");
                    }

                    String r = fix(backward.transliterate(t));
                    if (Normalizer.compare(s,r,0) == 0) {
                        if (indicScripts[j] != UScript.LATIN) eq.add(s,t);
                    } else {
                        if (indicScripts[j] == UScript.LATIN) {
                            latinFail.add(s + " - " + t + " - " + r);
                        }
                    }
                }
            }
        }
        // collect equivalents
        pw.println("<table border='1' cellspacing='0'><tr>");
        for (int i = 0; i < indicScripts.length; ++i) {
            pw.print("<th width='10%'>" + names[i].substring(0,3) + "</th>");
        }
        pw.println("</tr>");

        Iterator rit = eq.getSetIterator(new MyComparator());
        while(rit.hasNext()) {
            Set equivs = (Set)rit.next();
            pw.print("<tr>");
            Iterator sit = equivs.iterator();
            String source = (String)sit.next();
            String item = anyToLatin.transliterate(source);
            if (item.equals("") || source.equals(item)) item = "&nbsp;";
            pw.print("<td>" + item + "</td>");
            for (int i = 1; i < indicScripts.length; ++i) {
                sit = equivs.iterator();
                item = "";
                while (sit.hasNext()) {
                    String trial = (String)sit.next();
                    if (!sets[i].containsAll(trial)) continue;
                    item = trial;
                    break;
                }
                String classString = "";
                if (item.equals("")) {
                    classString = " class='miss'";
                    String temp = fallbacks[i].transliterate(source);
                    if (!temp.equals("") && !temp.equals(source)) item = temp;
                }
                String backup = item.equals("") ? "&nbsp;" : item;
                pw.print("<td" + classString + " title='" + getName(item, "; ") + "'>"
                    + backup + "<br><tt>" + Utility.hex(item) + "</tt></td>");
            }
            /*
            Iterator sit = equivs.iterator();
            while (sit.hasNext()) {
                String item = (String)sit.next();
                pw.print("<td>" + item + "</td>");
            }
            */
            pw.println("</tr>");
        }
        pw.println("</table>");
        if (true) {
            pw.println("<h2>Failed Normalization</h2>");
   
            UnicodeSetIterator it = new UnicodeSetIterator(failNorm);
            UnicodeSet pieces = new UnicodeSet();
            while (it.next()) {
                String s = UTF16.valueOf(it.codepoint);
                String d = Normalizer.normalize(s,Normalizer.NFD,0);
                pw.println("Norm:" + s + ", " + Utility.hex(s) + " " + UCharacter.getName(it.codepoint)
                     + "; " + d + ", " + Utility.hex(d) + ", ");
                pw.println(UCharacter.getName(d.charAt(1)) + "<br>");
View Full Code Here


                + "Value (" + valueNum + "): " + valueName + ", "
                + e.getClass().getName());
            continue;
          }
          UnicodeSet collectedErrors = new UnicodeSet();
          for (UnicodeSetIterator it = new UnicodeSetIterator(testSet); it.next();) {
            int value = UCharacter.getIntPropertyValue(it.codepoint, propNum);
            if (value != valueNum) {
              collectedErrors.add(it.codepoint);
            }
          }
View Full Code Here

    set.clear();
    set.complement("ab");
    exp.applyPattern("[{ab}]");
    if (!set.equals(exp)) { errln("FAIL: complement(\"ab\")"); return; }
   
    UnicodeSetIterator iset = new UnicodeSetIterator(set);
    if (!iset.next() || iset.codepoint != UnicodeSetIterator.IS_STRING) {
      errln("FAIL: UnicodeSetIterator.next/IS_STRING");
    } else if (!iset.string.equals("ab")) {
      errln("FAIL: UnicodeSetIterator.string");
    }
   
View Full Code Here

    checkEqual(s, t, "toPattern(true)");
  }
 
  UnicodeSet copyWithIterator(UnicodeSet s, boolean withRange) {
    UnicodeSet t = new UnicodeSet();
    UnicodeSetIterator it = new UnicodeSetIterator(s);
    if (withRange) {
      while (it.nextRange()) {
        if (it.codepoint == UnicodeSetIterator.IS_STRING) {
          t.add(it.string);
        } else {
          t.add(it.codepoint, it.codepointEnd);
        }
      }
    } else {
      while (it.next()) {
        if (it.codepoint == UnicodeSetIterator.IS_STRING) {
          t.add(it.string);
        } else {
          t.add(it.codepoint);
        }
View Full Code Here

    }

    // might want to add to UnicodeSet
    private String getList(UnicodeSet set) {
        StringBuffer result = new StringBuffer();
        for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) {
            result.append(it.getString());
        }
        return result.toString();
    }
View Full Code Here

  private static void expandSingleRule
      (StringBuilder builder, String leftHandSide, String rightHandSide)
      throws IllegalArgumentException {
    UnicodeSet set = new UnicodeSet(leftHandSide, UnicodeSet.IGNORE_SPACE);
    boolean numericValue = NUMERIC_VALUE_PATTERN.matcher(rightHandSide).matches();
    for (UnicodeSetIterator it = new UnicodeSetIterator(set) ; it.nextRange() ; ) {
      if (it.codepoint != UnicodeSetIterator.IS_STRING) {
        if (numericValue) {
          for (int cp = it.codepoint ; cp <= it.codepointEnd ; ++cp) {
            builder.append(String.format(Locale.ROOT, "%04X", cp)).append('>');
            builder.append(String.format(Locale.ROOT, "%04X", 0x30 + UCharacter.getNumericValue(cp)));
            builder.append("   # ").append(UCharacter.getName(cp));
            builder.append("\n");
          }
        } else {
          builder.append(String.format(Locale.ROOT, "%04X", it.codepoint));
          if (it.codepointEnd > it.codepoint) {
            builder.append("..").append(String.format(Locale.ROOT, "%04X", it.codepointEnd));
          }
          builder.append('>').append(rightHandSide).append("\n");
        }
      } else {
        System.err.println("ERROR: String '" + it.getString() + "' found in UnicodeSet");
        System.exit(1);
      }
    }
  }
View Full Code Here

    if (set.isEmpty()) {
      System.out.println("\t  []");
    }

    HashMap<Character,UnicodeSet> utf16ByLead = new HashMap<>();
    for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) {
      char utf16[] = Character.toChars(it.codepoint);
      UnicodeSet trails = utf16ByLead.get(utf16[0]);
      if (trails == null) {
        trails = new UnicodeSet();
        utf16ByLead.put(utf16[0], trails);
View Full Code Here

    if (set.isEmpty()) {
      System.out.println("\t  []");
    }
   
    HashMap<Character,UnicodeSet> utf16ByLead = new HashMap<>();
    for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) {   
      char utf16[] = Character.toChars(it.codepoint);
      UnicodeSet trails = utf16ByLead.get(utf16[0]);
      if (trails == null) {
        trails = new UnicodeSet();
        utf16ByLead.put(utf16[0], trails);
View Full Code Here

    if (set.isEmpty()) {
      System.out.println("\t  []");
    }

    HashMap<Character,UnicodeSet> utf16ByLead = new HashMap<Character,UnicodeSet>();
    for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) {
      char utf16[] = Character.toChars(it.codepoint);
      UnicodeSet trails = utf16ByLead.get(utf16[0]);
      if (trails == null) {
        trails = new UnicodeSet();
        utf16ByLead.put(utf16[0], trails);
View Full Code Here

   
            /* start with an empty set for [has canonical decomposition] */
            hasDecomp=new UnicodeSet();
   
            /* iterate over all ideographs and remember which canonically decompose */
            UnicodeSetIterator it = new UnicodeSetIterator(set);
            int start, end;
            long norm32;
   
            while(it.nextRange() && (it.codepoint != UnicodeSetIterator.IS_STRING)) {
                start=it.codepoint;
                end=it.codepointEnd;
                while(start<=end) {
                    norm32 = getNorm32(start);
                    if((norm32 & QC_NFD)>0) {
View Full Code Here

TOP

Related Classes of com.ibm.icu.text.UnicodeSetIterator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.