Package com.ibm.icu.text

Examples of com.ibm.icu.text.UnicodeSet


                if(locale.toString().indexOf(("in"))<0){
                    errln("UScript.getCode returned null for locale: "+ locale);
                }
                continue;
            }
            UnicodeSet exemplarSets[] = new UnicodeSet[4];

            for (int k=0; k<2; ++k) {  // for casing option in (normal, uncased)
                int option = (k==0) ? 0 : UnicodeSet.CASE;
                for(int h=0; h<2; ++h){ 
                    int type = (h==0) ? LocaleData.ES_STANDARD : LocaleData.ES_AUXILIARY;

                    UnicodeSet exemplarSet = ld.getExemplarSet(option, type);
                    exemplarSets[k*2+h] = exemplarSet;

                    ExemplarGroup exGrp = new ExemplarGroup(exemplarSet, scriptCodes);
                    if (!testedExemplars.contains(exGrp)) {
                        testedExemplars.add(exGrp);
                        UnicodeSet[] sets = new UnicodeSet[scriptCodes.length];
                        // create the UnicodeSets for the script
                        for(int j=0; j < scriptCodes.length; j++){
                            sets[j] = new UnicodeSet("[:" + UScript.getShortName(scriptCodes[j]) + ":]");
                        }
                        boolean existsInScript = false;
                        UnicodeSetIterator iter = new UnicodeSetIterator(exemplarSet);
                        // iterate over the
                        while (!existsInScript && iter.nextRange()) {
View Full Code Here


   
    public RandomCollator() {
       
    }
    protected void init()throws Exception{
        init(1,10, new UnicodeSet("[AZa-z<\\&\\[\\]]"));
    }
View Full Code Here

        }
        return other.equals(UTF16.valueOf(codepoint));
    }
   
    public UnicodeSet getPropertySet(boolean charEqualsValue, UnicodeSet result){
        if (result == null) result = new UnicodeSet();
        matchIterator.reset();
        while (matchIterator.next()) {
            String value = filter.remap(getPropertyValue(matchIterator.codepoint));
            if (equals(matchIterator.codepoint, value) == charEqualsValue) {
                result.add(matchIterator.codepoint);
View Full Code Here

        }
        return result;
    }

    public UnicodeSet getPropertySet(String propertyValue, UnicodeSet result){
        if (result == null) result = new UnicodeSet();
        matchIterator.reset();
        while (matchIterator.next()) {
            String value = filter.remap(getPropertyValue(matchIterator.codepoint));
            if (propertyValue.equals(value)) {
                result.add(matchIterator.codepoint);
View Full Code Here

        }
        return result;
    }

    public UnicodeSet getPropertySet(Matcher matcher, UnicodeSet result) {
        if (result == null) result = new UnicodeSet();
        matchIterator.reset();
        while (matchIterator.next()) {
            String value = filter.remap(getPropertyValue(matchIterator.codepoint));
            if (value == null)
                continue;
View Full Code Here

            }
        }
    }
   
    public UnicodeSet getMatchSet(UnicodeSet result) {
        if (result == null) result = new UnicodeSet();
        addAll(matchIterator, result);
        return result;
    }
View Full Code Here

        output.println("};\n");
    }
   
    public void writeMirroredDataFile(String filename)
    {
        UnicodeSet mirrored = new UnicodeSet("[\\p{Bidi_Mirrored}]");
        int count = mirrored.size();
        int[] chars   = new int[count];
        int[] mirrors = new int[count];
        int total = 0;
       
        System.out.println("There are " + count + " mirrored characters.");
       
        for(int i = 0; i < count; i += 1) {
            int ch = mirrored.charAt(i);
            int m  = UCharacter.getMirror(ch);
           
            if (ch != m) {
                chars[total] = ch & 0xFFFF;
                mirrors[total++] = m & 0xFFFF;
View Full Code Here

   
    private static void buildArabicTables(ScriptList scriptList, FeatureList featureList,
                                                LookupList lookupList, ClassTable classTable) {
        // TODO: Might want to have the ligature table builder explicitly check for ligatures
        // which start with space and tatweel rather than pulling them out here...
        UnicodeSet arabicBlock   = new UnicodeSet("[[\\p{block=Arabic}] & [[:Cf:][:Po:][:So:][:Mn:][:Nd:][:Lm:]]]");
        UnicodeSet oddLigatures  = new UnicodeSet("[\\uFC5E-\\uFC63\\uFCF2-\\uFCF4\\uFE70-\\uFE7F]");
        UnicodeSet arabicLetters = new UnicodeSet("[\\p{Arabic}]");
        ArabicCharacterData arabicData = ArabicCharacterData.factory(arabicLetters.addAll(arabicBlock).removeAll(oddLigatures));

        addArabicGlyphClasses(arabicData, classTable);
       
        ClassTable initClassTable = new ClassTable();
        ClassTable mediClassTable = new ClassTable();
View Full Code Here

     * Hebrew mark order taken from the SBL Hebrew Font manual
     * Arabic mark order per Thomas Milo: hamza < shadda < combining_alef < sukun, vowel_marks < madda < qur'anic_marks
     */
    public static ClassTable buildCombiningClassTable()
    {
        UnicodeSet markSet = new UnicodeSet("[\\P{CanonicalCombiningClass=0}]");
        ClassTable exceptions = new ClassTable();
        ClassTable combiningClasses = new ClassTable();
        int markCount = markSet.size();
       
        exceptions.addMapping(0x05C110); // Point Shin Dot
        exceptions.addMapping(0x05C211); // Point Sin Dot
        exceptions.addMapping(0x05BC21); // Point Dagesh or Mapiq
        exceptions.addMapping(0x05BF23); // Point Rafe
        exceptions.addMapping(0x05B927); // Point Holam
        exceptions.addMapping(0x0323, 220); // Comb. Dot Below (low punctum)
        exceptions.addMapping(0x0591, 220); // Accent Etnahta
        exceptions.addMapping(0x0596, 220); // Accent Tipeha
        exceptions.addMapping(0x059B, 220); // Accent Tevir
        exceptions.addMapping(0x05A3, 220); // Accent Munah
        exceptions.addMapping(0x05A4, 220); // Accent Mahapakh
        exceptions.addMapping(0x05A5, 220); // Accent Merkha
        exceptions.addMapping(0x05A6, 220); // Accent Merkha Kefula
        exceptions.addMapping(0x05A7, 220); // Accent Darga
        exceptions.addMapping(0x05AA, 220); // Accent Yerah Ben Yomo
        exceptions.addMapping(0x05B0, 220); // Point Sheva
        exceptions.addMapping(0x05B1, 220); // Point Hataf Segol
        exceptions.addMapping(0x05B2, 220); // Point Hataf Patah
        exceptions.addMapping(0x05B3, 220); // Point Hataf Qamats
        exceptions.addMapping(0x05B4, 220); // Point Hiriq
        exceptions.addMapping(0x05B5, 220); // Point Tsere
        exceptions.addMapping(0x05B6, 220); // Point Segol
        exceptions.addMapping(0x05B7, 220); // Point Patah
        exceptions.addMapping(0x05B8, 220); // Point Qamats
        exceptions.addMapping(0x05BB, 220); // Point Qubuts
        exceptions.addMapping(0x05BD, 220); // Point Meteg
        exceptions.addMapping(0x059A, 222); // Accent Yetiv
        exceptions.addMapping(0x05AD, 222); // Accent Dehi
        exceptions.addMapping(0x05C4, 230); // Mark Upper Dot (high punctum)
        exceptions.addMapping(0x0593, 230); // Accent Shalshelet
        exceptions.addMapping(0x0594, 230); // Accent Zaqef Qatan
        exceptions.addMapping(0x0595, 230); // Accent Zaqef Gadol
        exceptions.addMapping(0x0597, 230); // Accent Revia
        exceptions.addMapping(0x0598, 230); // Accent Zarqa
        exceptions.addMapping(0x059F, 230); // Accent Qarney Para
        exceptions.addMapping(0x059E, 230); // Accent Gershayim
        exceptions.addMapping(0x059D, 230); // Accent Geresh Muqdam
        exceptions.addMapping(0x059C, 230); // Accent Geresh
        exceptions.addMapping(0x0592, 230); // Accent Segolta
        exceptions.addMapping(0x05A0, 230); // Accent Telisha Gedola
        exceptions.addMapping(0x05AC, 230); // Accent Iluy
        exceptions.addMapping(0x05A8, 230); // Accent Qadma
        exceptions.addMapping(0x05AB, 230); // Accent Ole
        exceptions.addMapping(0x05AF, 230); // Mark Masora Circle
        exceptions.addMapping(0x05A1, 230); // Accent Pazer
      //exceptions.addMapping(0x0307, 230); // Mark Number/Masora Dot
        exceptions.addMapping(0x05AE, 232); // Accent Zinor
        exceptions.addMapping(0x05A9, 232); // Accent Telisha Qetana
        exceptions.addMapping(0x0599, 232); // Accent Pashta
       
        exceptions.addMapping(0x065527); // ARABIC HAMZA BELOW
        exceptions.addMapping(0x065427); // ARABIC HAMZA ABOVE

        exceptions.addMapping(0x065128); // ARABIC SHADDA

        exceptions.addMapping(0x065629); // ARABIC SUBSCRIPT ALEF
        exceptions.addMapping(0x067029); // ARABIC LETTER SUPERSCRIPT ALEF

        exceptions.addMapping(0x064D30); // ARABIC KASRATAN
        exceptions.addMapping(0x065030); // ARABIC KASRA

        exceptions.addMapping(0x065231); // ARABIC SUKUN
        exceptions.addMapping(0x06E131); // ARABIC SMALL HIGH DOTLESS HEAD OF KHAH

        exceptions.addMapping(0x064B31); // ARABIC FATHATAN
        exceptions.addMapping(0x064C31); // ARABIC DAMMATAN
        exceptions.addMapping(0x064E31); // ARABIC FATHA
        exceptions.addMapping(0x064F31); // ARABIC DAMMA
        exceptions.addMapping(0x065731); // ARABIC INVERTED DAMMA
        exceptions.addMapping(0x065831); // ARABIC MARK NOON GHUNNA

        exceptions.addMapping(0x065332); // ARABIC MADDAH ABOVE
       
        exceptions.snapshot();
       
        for (int i = 0; i < markCount; i += 1) {
            int mark = markSet.charAt(i);
            int markClass = exceptions.getGlyphClassID(mark);
           
            if (markClass == 0) {
                markClass = UCharacter.getCombiningClass(mark);
            }
View Full Code Here

   
    public static void buildDecompTables(String fileName)
    {
        // F900 - FAFF are compatibility ideographs. They all decompose to a single other character, and can be ignored.
      //UnicodeSet decompSet = new UnicodeSet("[[[\\P{Hangul}] & [\\p{DecompositionType=Canonical}]] - [\uF900-\uFAFF]]");
        UnicodeSet decompSet = new UnicodeSet("[[\\p{DecompositionType=Canonical}] & [\\P{FullCompositionExclusion}] & [\\P{Hangul}]]");
        CanonicalCharacterData data = CanonicalCharacterData.factory(decompSet);
        ClassTable classTable = new ClassTable();
       
        LookupList  lookupList  = new LookupList();
        FeatureList featureList = new FeatureList();
View Full Code Here

TOP

Related Classes of com.ibm.icu.text.UnicodeSet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.