Package org.fnlp.nlp.cn

Source Code of org.fnlp.nlp.cn.LangDetection

/**
*  This file is part of FNLP (formerly FudanNLP).
*  FNLP is free software: you can redistribute it and/or modify
*  it under the terms of the GNU Lesser General Public License as published by
*  the Free Software Foundation, either version 3 of the License, or
*  (at your option) any later version.
*  FNLP is distributed in the hope that it will be useful,
*  but WITHOUT ANY WARRANTY; without even the implied warranty of
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*  GNU Lesser General Public License for more details.
*  You should have received a copy of the GNU General Public License
*  along with FudanNLP.  If not, see <http://www.gnu.org/licenses/>.
*  Copyright 2009-2014 www.fnlp.org. All rights reserved.
*/

package org.fnlp.nlp.cn;

import java.lang.Character.UnicodeBlock;

public class LangDetection {

  public static String detect(String str){
    char[] ch = str.toCharArray();
    if(isChinese(ch))
      return "cn";
    else
      return "en";
  }
  public static boolean isChinese(char[] ch){
    for(int i=0;i<ch.length;i++){
      if(isChinese(ch[i]))
        return true;
    }
    return false;
  }
 
  private static boolean isChinese(char c) {
    UnicodeBlock ub = UnicodeBlock.of(c);
    if(ub==UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS ||
      ub == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS||
      ub == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A||
      ub == UnicodeBlock.GENERAL_PUNCTUATION||
      ub == UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION||
      ub == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS)
      return true;
    return false;
  }
  /**
   * @param args
   */
  public static void main(String[] args) {
    String str;
    str = ".";
    System.out.println(LangDetection.detect(str)+":\t"+str);
   
    str = "you and me";
    System.out.println(LangDetection.detect(str)+":\t"+str);
   
    str = "()";
    System.out.println(LangDetection.detect(str)+":\t"+str);
   
    str = "。";
    System.out.println(LangDetection.detect(str)+":\t"+str);
    str = "我们";
    System.out.println(LangDetection.detect(str)+":\t"+str);
    str = "我们and";
    System.out.println(LangDetection.detect(str)+":\t"+str);
    str = "《and";
    System.out.println(LangDetection.detect(str)+":\t"+str);

  }

}
TOP

Related Classes of org.fnlp.nlp.cn.LangDetection

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.