Package org.fnlp.nlp.cn.tag

Examples of org.fnlp.nlp.cn.tag.POSTagger


  public static void loadTag(String path) throws LoadModelException {
    if(pos==null){

      String file = path+posModel;
      if(seg==null)
        pos = new POSTagger(file);
      else{
        pos = new POSTagger(seg,file);         
      }
    }
  }
View Full Code Here


  /**
   * 只输入句子,不带词性
   * @throws Exception
   */
  private static void test(String word) throws Exception {   
    POSTagger tag = new POSTagger("../models/seg.m","../models/pos.m");
    String[][] s = tag.tag2Array(word);
    try {
      DependencyTree tree = parser.parse2T(s[0],s[1]);
      System.out.println(tree.toString());
      String stree = parser.parse2String(s[0],s[1],true);
      System.out.println(stree);
View Full Code Here

   */
  public static void main(String[] args) throws Exception {

   
    CWSTagger cws = new CWSTagger("../models/seg.m");
    tag = new POSTagger(cws,"../models/pos.m");
   
    System.out.println("得到支持的词性标签集合");
    System.out.println(tag.getSupportedTags());
    System.out.println(tag.getSupportedTags().size());
    System.out.println("\n");
   
    String str = "媒体计算研究所成立了,高级数据挖掘很难。乐phone很好!";
    String s = tag.tag(str);
    System.out.println("处理未分词的句子");
    System.out.println(s);
   
    System.out.println("使用英文标签");
    tag.SetTagType("en");   
    System.out.println(tag.getSupportedTags());
    System.out.println(tag.getSupportedTags().size());
    s = tag.tag(str);
    System.out.println(s);   
    System.out.println();
   
    CWSTagger cws2 = new CWSTagger("../models/seg.m", new Dictionary("../models/dict.txt"));
   
    //bool值指定该dict是否用于cws分词(分词和词性可以使用不同的词典)
    tag = new POSTagger(cws2, "../models/pos.m"
        , new Dictionary("../models/dict.txt"), true);//true就替换了之前的dict.txt
    tag.removeDictionary(false);//不移除分词的词典
    tag.setDictionary(new Dictionary("../models/dict.txt"), false);//设置POS词典,分词使用原来设置
   
    String str2 = "媒体计算研究所成立了,高级数据挖掘很难。乐phone很好!";
    String s2 = tag.tag(str2);
    System.out.println("处理未分词的句子,使用词典");
    System.out.println(s2);
    System.out.println();
   
    Dictionary dict = new Dictionary();
    dict.add("媒体计算","mypos1","mypos2");
    dict.add("乐phone","专有名");
    tag.setDictionary(dict, true);
    String s22 = tag.tag(str2);
    System.out.println(s22);
    System.out.println();
   
    POSTagger tag1 = new POSTagger("../models/pos.m");
    String str1 = "媒体计算 研究所 成立 了 , 高级 数据挖掘 很 难";
    String[] w = str1.split(" ");
    String[] s1 = tag1.tagSeged(w);
    System.out.println("直接处理分好词的句子:++++++++++");
    for(int i=0;i<s1.length;i++){
      System.out.print(w[i]+"/"+s1[i]+" ");
    }
    System.out.println("\n");
   
    POSTagger tag3 = new POSTagger("../models/pos.m", new Dictionary("../models/dict.txt"));
    String str3 = "媒体计算 研究所 成立 了 , 高级 数据挖掘 很 难 ";
    String[] w3 = str3.split(" ");
    String[] s3 = tag3.tagSeged(w3);
    System.out.println("直接处理分好词的句子,使用词典");
    for(int i=0;i<s3.length;i++){
      System.out.print(w3[i]+"/"+s3[i]+" ");
    }
    System.out.println("\n");
   
    //????????????????????????????
   
    System.out.println("重新构造");
    cws = new CWSTagger("../models/seg.m");
    tag = new POSTagger(cws,"../models/pos.m");
    str = "媒体计算研究所成立了, 高级数据挖掘很难";
    System.out.println(tag.tag(str));
    String[][] sa = tag.tag2Array(str);
    for(int i = 0; i < sa.length; i++) {
      for(int j = 0; j < sa[i].length; j++) {
View Full Code Here

    ep = new EntitiesGetter();   
    cl = Linear.loadFrom(armodel);
  }
 
  public Anaphora(String segmodel, String posmodel, String armodel) throws LoadModelException{
    pos = new POSTagger(segmodel,posmodel);
    ep = new EntitiesGetter();   
    cl = Linear.loadFrom(armodel);
  }
View Full Code Here

  public RuleAnaphora() throws Exception
    ep = new EntitiesGetter()
  }
 
  public RuleAnaphora(String segmodel, String posmodel) throws Exception{
    pos = new POSTagger(segmodel,posmodel);
    ep = new EntitiesGetter()
  }
View Full Code Here

  public LinkedList<Instance> getLlist(){
    return this.llist;
   
  }
  public static void main(String args[]) throws Exception{
    FileGroupReader.tag = new POSTagger("../models/seg.m", "../models/pos.m");
   
    MyDocumentWriter dr1 = new MyDocumentWriter("../tmp/ar");
    dr1.writeOut("../tmp/ar-train.txt");
    System.out.print("已经写入文档");
  }
View Full Code Here

    System.out.print("Done");
  }

  public void addEnTag(String file) throws LoadModelException,
      IOException {
    AbstractTagger cl = new POSTagger(file);
     

    addEnTag(cl,file);
    cl.saveTo(file);
  }
View Full Code Here

   * @param args
   * @throws Exception
   */
  public static void main(String[] args) throws Exception {
    CWSTagger seg = new CWSTagger("./models/seg.m")
    POSTagger pos = new POSTagger(seg, "./models/pos.m");

    RLSeg rlseg = new RLSeg(seg,"./tmpdata/FNLPDATA/all.dict");
//    tag.setDictionary(rlseg.tempdict);
    String file = "./tmpdata/20120927-微博分词-5000-test-utf-8.txt";
    BufferedReader bfr = new BufferedReader(new InputStreamReader(new FileInputStream(file),"utf8"));
View Full Code Here

   * @param args
   * @throws Exception
   */
  public static void main(String[] args) throws Exception {
    parser = new JointParser("../models/dep.m");
    tag = new POSTagger("../models/seg.m","../models/pos.m");

    test();

  }
View Full Code Here

   * @throws IOException
   * @throws 
   */
  public static void main(String[] args) throws Exception {
    // TODO Auto-generated method stub
    tag = new POSTagger("models/seg.m","models/pos.m");
   
    ArrayList<String> str = MyCollection.loadList("./testcase/test case pos.txt",null);
    str.add("周杰伦 生 于 台湾\n我们");
    str.add("分析和比较");
   
View Full Code Here

TOP

Related Classes of org.fnlp.nlp.cn.tag.POSTagger

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.