Examples of java.lang.Character.UnicodeBlock

java.lang.Character.UnicodeBlock

              } catch (IllegalArgumentException e) {
                // ok
              }
            
              try {
                UnicodeBlock found = UnicodeBlock.forName(predefined);
                javaPredefName = "In" + predefined;
              } catch (IllegalArgumentException e) {
                // ok
              }

View Full Code Here

      char ch = myBuffer[i];
      if ((int) ch < 10) {
        sb.append("\\u000" + (int) ch);
        continue;
      }
      UnicodeBlock ub = UnicodeBlock.of(ch);
      if (ub == UnicodeBlock.BASIC_LATIN) {
        // 英文及数字等
        sb.append(myBuffer[i]);
      } else if (ub == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
        // 全角半角字符

View Full Code Here

  static boolean isLatinLetter(char letter) {
    // Combining marks are a subset of non-spacing-mark.
    if (!Character.isLetter(letter) && Character.getType(letter) != Character.NON_SPACING_MARK) {
      return false;
    }
    UnicodeBlock block = UnicodeBlock.of(letter);
    return block.equals(UnicodeBlock.BASIC_LATIN) ||
        block.equals(UnicodeBlock.LATIN_1_SUPPLEMENT) ||
        block.equals(UnicodeBlock.LATIN_EXTENDED_A) ||
        block.equals(UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) ||
        block.equals(UnicodeBlock.LATIN_EXTENDED_B) ||
        block.equals(UnicodeBlock.COMBINING_DIACRITICAL_MARKS);
  }

View Full Code Here

    }
    return false;
  }
  
  private static boolean isChinese(char c) {
    UnicodeBlock ub = UnicodeBlock.of(c);
    if(ub==UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS ||
      ub == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS||
      ub == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A||
      ub == UnicodeBlock.GENERAL_PUNCTUATION||
      ub == UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION||

View Full Code Here

  static boolean isLatinLetter(char letter) {
    // Combining marks are a subset of non-spacing-mark.
    if (!Character.isLetter(letter) && Character.getType(letter) != Character.NON_SPACING_MARK) {
      return false;
    }
    UnicodeBlock block = UnicodeBlock.of(letter);
    return block.equals(UnicodeBlock.BASIC_LATIN) ||
        block.equals(UnicodeBlock.LATIN_1_SUPPLEMENT) ||
        block.equals(UnicodeBlock.LATIN_EXTENDED_A) ||
        block.equals(UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) ||
        block.equals(UnicodeBlock.LATIN_EXTENDED_B) ||
        block.equals(UnicodeBlock.COMBINING_DIACRITICAL_MARKS);
  }

View Full Code Here


    private String checkASCIIString(String string) {
        StringBuffer buf = new StringBuffer();


        for (int i = 0; i < string.length(); i++) {
            UnicodeBlock characterBlock = UnicodeBlock.of(string.charAt(i));
            if (characterBlock == UnicodeBlock.BASIC_LATIN) {
                buf.append(string.charAt(i));
            } else {
                buf.append('_');
            }

View Full Code Here

    public String getUnicodeBlockDistribution(String text)
    {
        HashMap<Character.UnicodeBlock, Integer> map = new HashMap<Character.UnicodeBlock, Integer>();
        for (int i=0; i<text.length(); i++)
        {
            UnicodeBlock ub = Character.UnicodeBlock.of(text.charAt(i));
            if (ub!=null)
            {
                if (map.containsKey(ub))
                    map.put(ub, map.get(ub)+1);
                else

View Full Code Here

      + "\\p{InCombiningDiacriticalMarks}]");


  private static UnicodeBlock getBlock(final String word)
  {
    final int c = word.codePointAt(0);
    final UnicodeBlock block = UnicodeBlock.of(c);
    if (block == UnicodeBlock.BASIC_LATIN && HYPEREXTENDED_LATIN.matcher(word).find())
    {
      return UnicodeBlock.LATIN_EXTENDED_A;
    }
    if (block == UnicodeBlock.BASIC_LATIN && EXTENDED_LATIN.matcher(word).find())

View Full Code Here

    boolean hasExtendedLatin = false;
    boolean hasHyperextendedLatin = false;
    final Counter<UnicodeBlock> counter = new Counter<UnicodeBlock>();
    for (final String word : words)
    {
      final UnicodeBlock block = getBlock(word);
      if (block == UnicodeBlock.LATIN_1_SUPPLEMENT)
      {
        hasExtendedLatin = true;
      }
      if (block == UnicodeBlock.LATIN_EXTENDED_A)
      {
        hasHyperextendedLatin = true;
      }
      counter.note(block);
    }
    final List<UnicodeBlock> mostFrequent = counter.getMostFrequent(1);
    if (mostFrequent.size() == 0)
    {
      return null;
    }
    UnicodeBlock b = mostFrequent.get(0);
    /*
     * If we've seen *any* extended latin, and we're mostly latin, then
     * treat the whole thing as extended.
     */
    if (b == UnicodeBlock.BASIC_LATIN || b == UnicodeBlock.LATIN_1_SUPPLEMENT)

View Full Code Here

            final String u = decode(bytes, "KOI8_U");
            if (! r.equals(u)) {
                differences++;
                final char rc = r.charAt(0);
                final char uc = u.charAt(0);
                final UnicodeBlock rcb = UnicodeBlock.of(rc);
                final UnicodeBlock ucb = UnicodeBlock.of(uc);
                System.out.printf("%02x => %04x %s, %04x %s%n",
                                  i, (int) rc, rcb, (int) uc, ucb);
                check(rcb == UnicodeBlock.BOX_DRAWING &&
                      ucb == UnicodeBlock.CYRILLIC);
            }

View Full Code Here

0 1

TOP

Related Classes of java.lang.Character.UnicodeBlock

com.github.stephenc.javaisotools.iso9660.impl.ISO9660Config

com.google.i18n.phonenumbers.PhoneNumberMatcher

cue.lang.unicode.BlockUtil

erjang.m.re.Native$Options

fr.eolya.extraction.CharsetLanguages

net.loyin.util.UnicodeTool

org.apache.jena.iri.impl.AbsLexer

org.fnlp.nlp.cn.LangDetection

UkrainianIsNotRussian

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.