Package com.ibm.icu.text

Examples of com.ibm.icu.text.UnicodeSet


  }

  private static void expandSingleRule
      (StringBuilder builder, String leftHandSide, String rightHandSide)
      throws IllegalArgumentException {
    UnicodeSet set = new UnicodeSet(leftHandSide, UnicodeSet.IGNORE_SPACE);
    boolean numericValue = NUMERIC_VALUE_PATTERN.matcher(rightHandSide).matches();
    for (UnicodeSetIterator it = new UnicodeSetIterator(set) ; it.nextRange() ; ) {
      if (it.codepoint != UnicodeSetIterator.IS_STRING) {
        if (numericValue) {
          for (int cp = it.codepoint ; cp <= it.codepointEnd ; ++cp) {
View Full Code Here


  public void testOptimizer() throws Exception {
    String rules = "a > b; b > c;"; // convert a's to b's and b's to c's
    Transliterator custom = Transliterator.createFromRules("test", rules, Transliterator.FORWARD);
    assertTrue(custom.getFilter() == null);
    new ICUTransformFilter(new KeywordTokenizer(new StringReader("")), custom);
    assertTrue(custom.getFilter().equals(new UnicodeSet("[ab]")));
  }
View Full Code Here

  public void testOptimizerSurrogate() throws Exception {
    String rules = "\\U00020087 > x;"; // convert CJK UNIFIED IDEOGRAPH-20087 to an x
    Transliterator custom = Transliterator.createFromRules("test", rules, Transliterator.FORWARD);
    assertTrue(custom.getFilter() == null);
    new ICUTransformFilter(new KeywordTokenizer(new StringReader("")), custom);
    assertTrue(custom.getFilter().equals(new UnicodeSet("[\\U00020087]")));
  }
View Full Code Here

  }

  // we have to carefully output the possibilities as compact utf-16
  // range expressions, or jflex will OOM!
  static void outputMacro(String name, String pattern) {
    UnicodeSet set = new UnicodeSet(pattern);
    set.removeAll(BMP);
    System.out.println(name + " = (");
    // if the set is empty, we have to do this or jflex will barf
    if (set.isEmpty()) {
      System.out.println("\t  []");
    }

    HashMap<Character,UnicodeSet> utf16ByLead = new HashMap<Character,UnicodeSet>();
    for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) {
      char utf16[] = Character.toChars(it.codepoint);
      UnicodeSet trails = utf16ByLead.get(utf16[0]);
      if (trails == null) {
        trails = new UnicodeSet();
        utf16ByLead.put(utf16[0], trails);
      }
      trails.add(utf16[1]);
    }
   
    Map<String,UnicodeSet> utf16ByTrail = new HashMap<String,UnicodeSet>();
    for (Map.Entry<Character,UnicodeSet> entry : utf16ByLead.entrySet()) {
      String trail = entry.getValue().getRegexEquivalent();
      UnicodeSet leads = utf16ByTrail.get(trail);
      if (leads == null) {
        leads = new UnicodeSet();
        utf16ByTrail.put(trail, leads);
      }
      leads.add(entry.getKey());
    }

    boolean isFirst = true;
    for (Map.Entry<String,UnicodeSet> entry : utf16ByTrail.entrySet()) {
      System.out.print( isFirst ? "\t  " : "\t| ");
View Full Code Here

  }
 
  // we have to carefully output the possibilities as compact utf-16
  // range expressions, or jflex will OOM!
  static void outputMacro(String name, String pattern) {
    UnicodeSet set = new UnicodeSet(pattern);
    set.removeAll(BMP);
    System.out.println(name + " = (");
    // if the set is empty, we have to do this or jflex will barf
    if (set.isEmpty()) {
      System.out.println("\t  []");
    }
   
    HashMap<Character,UnicodeSet> utf16ByLead = new HashMap<Character,UnicodeSet>();
    for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) {   
      char utf16[] = Character.toChars(it.codepoint);
      UnicodeSet trails = utf16ByLead.get(utf16[0]);
      if (trails == null) {
        trails = new UnicodeSet();
        utf16ByLead.put(utf16[0], trails);
      }
      trails.add(utf16[1]);
    }
   
    boolean isFirst = true;
    for (Character c : utf16ByLead.keySet()) {
      UnicodeSet trail = utf16ByLead.get(c);
      System.out.print( isFirst ? "\t  " : "\t| ");
      isFirst = false;
      System.out.println("([\\u" + Integer.toHexString(c) + "]" + trail.getRegexEquivalent() + ")");
    }
    System.out.println(")");
  }
View Full Code Here

  }
 
  // we have to carefully output the possibilities as compact utf-16
  // range expressions, or jflex will OOM!
  static void outputMacro(String name, String pattern) {
    UnicodeSet set = new UnicodeSet(pattern);
    set.removeAll(BMP);
    System.out.println(name + " = (");
    // if the set is empty, we have to do this or jflex will barf
    if (set.isEmpty()) {
      System.out.println("\t  []");
    }
   
    HashMap<Character,UnicodeSet> utf16ByLead = new HashMap<Character,UnicodeSet>();
    for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) {   
      char utf16[] = Character.toChars(it.codepoint);
      UnicodeSet trails = utf16ByLead.get(utf16[0]);
      if (trails == null) {
        trails = new UnicodeSet();
        utf16ByLead.put(utf16[0], trails);
      }
      trails.add(utf16[1]);
    }
   
    boolean isFirst = true;
    for (Character c : utf16ByLead.keySet()) {
      UnicodeSet trail = utf16ByLead.get(c);
      System.out.print( isFirst ? "\t  " : "\t| ");
      isFirst = false;
      System.out.println("([\\u" + Integer.toHexString(c) + "]" + trail.getRegexEquivalent() + ")");
    }
    System.out.println(")");
  }
View Full Code Here

  }

  private static void expandSingleRule
      (StringBuilder builder, String leftHandSide, String rightHandSide)
      throws IllegalArgumentException {
    UnicodeSet set = new UnicodeSet(leftHandSide, UnicodeSet.IGNORE_SPACE);
    boolean numericValue = NUMERIC_VALUE_PATTERN.matcher(rightHandSide).matches();
    for (UnicodeSetIterator it = new UnicodeSetIterator(set) ; it.nextRange() ; ) {
      if (it.codepoint != UnicodeSetIterator.IS_STRING) {
        if (numericValue) {
          for (int cp = it.codepoint ; cp <= it.codepointEnd ; ++cp) {
View Full Code Here

    else
      throw new SolrException(ErrorCode.SERVER_ERROR, "Invalid mode: " + mode);
   
    String filter = args.get("filter");
    if (filter != null) {
      UnicodeSet set = new UnicodeSet(filter);
      if (!set.isEmpty()) {
        set.freeze();
        normalizer = new FilteredNormalizer2(normalizer, set);
      }
    }
  }
View Full Code Here

    else
      throw new IllegalArgumentException("Invalid mode: " + mode);
   
    String filter = args.get("filter");
    if (filter != null) {
      UnicodeSet set = new UnicodeSet(filter);
      if (!set.isEmpty()) {
        set.freeze();
        normalizer = new FilteredNormalizer2(normalizer, set);
      }
    }
  }
View Full Code Here

    }

    public void TestPatternParser() {
        StringBuffer buffer = new StringBuffer();
        PatternTokenizer pp = new PatternTokenizer()
        .setIgnorableCharacters(new UnicodeSet("[-]"))
        .setSyntaxCharacters(new UnicodeSet("[a-zA-Z]"))
        .setEscapeCharacters(new UnicodeSet("[b#]"))
        .setUsingQuote(true);
        logln("Using Quote");
        for (int i = 0; i < patternTestData.length; ++i) {
            String patternTest = (String) patternTestData[i];
            CheckPattern(buffer, pp, patternTest);
View Full Code Here

TOP

Related Classes of com.ibm.icu.text.UnicodeSet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.