Package org.fnlp.ml.types

Examples of org.fnlp.ml.types.Dictionary


    System.out.println(tag.getSupportedTags().size());
    s = tag.tag(str);
    System.out.println(s);   
    System.out.println();
   
    CWSTagger cws2 = new CWSTagger("../models/seg.m", new Dictionary("../models/dict.txt"));
   
    //bool值指定该dict是否用于cws分词(分词和词性可以使用不同的词典)
    tag = new POSTagger(cws2, "../models/pos.m"
        , new Dictionary("../models/dict.txt"), true);//true就替换了之前的dict.txt
    tag.removeDictionary(false);//不移除分词的词典
    tag.setDictionary(new Dictionary("../models/dict.txt"), false);//设置POS词典,分词使用原来设置
   
    String str2 = "媒体计算研究所成立了,高级数据挖掘很难。乐phone很好!";
    String s2 = tag.tag(str2);
    System.out.println("处理未分词的句子,使用词典");
    System.out.println(s2);
    System.out.println();
   
    Dictionary dict = new Dictionary();
    dict.add("媒体计算","mypos1","mypos2");
    dict.add("乐phone","专有名");
    tag.setDictionary(dict, true);
    String s22 = tag.tag(str2);
    System.out.println(s22);
    System.out.println();
   
    POSTagger tag1 = new POSTagger("../models/pos.m");
    String str1 = "媒体计算 研究所 成立 了 , 高级 数据挖掘 很 难";
    String[] w = str1.split(" ");
    String[] s1 = tag1.tagSeged(w);
    System.out.println("直接处理分好词的句子:++++++++++");
    for(int i=0;i<s1.length;i++){
      System.out.print(w[i]+"/"+s1[i]+" ");
    }
    System.out.println("\n");
   
    POSTagger tag3 = new POSTagger("../models/pos.m", new Dictionary("../models/dict.txt"));
    String str3 = "媒体计算 研究所 成立 了 , 高级 数据挖掘 很 难 ";
    String[] w3 = str3.split(" ");
    String[] s3 = tag3.tagSeged(w3);
    System.out.println("直接处理分好词的句子,使用词典");
    for(int i=0;i<s3.length;i++){
View Full Code Here


    System.out.println("\n设置临时词典:");
    ArrayList<String> al = new ArrayList<String>();
    al.add("数据挖掘");
    al.add("媒体计算研究所");
    al.add("乐phone");
    Dictionary dict = new Dictionary(false);
    dict.addSegDict(al);
    tag.setDictionary(dict);
    s = tag.tag(str);
    System.out.println(s);
   
   
    CWSTagger tag2 = new CWSTagger("../models/seg.m", new Dictionary("../models/dict.txt"));
    System.out.println("\n使用词典的分词:");
    String str2 = "媒体计算研究所成立了, 高级数据挖掘很难。 乐phone热卖!";
    String s2 = tag2.tag(str2);
    System.out.println(s2);
   
    //使用不严格的词典
    CWSTagger tag3 = new CWSTagger("../models/seg.m", new Dictionary("../models/dict_ambiguity.txt",true));
    //尽量满足词典,比如词典中有“成立”“成立了”和“了”, 会使用Viterbi决定更合理的输出
    System.out.println("\n使用不严格的词典的分词:");
    String str3 = "媒体计算研究所成立了, 高级数据挖掘很难";
    String s3 = tag3.tag(str3);
    System.out.println(s3);
View Full Code Here

  public void setDictionary(THashSet<String> newset) {
    if(newset.size()==0)
      return;
    ArrayList<String> al = new ArrayList<String>();
    MyCollection.TSet2List(newset, al);
    Dictionary dict = new Dictionary();
    dict.addSegDict(al);
    setDictionary(dict);

  }
View Full Code Here

   * @throws IOException
   * @throws 
   */
  public static void main(String[] args) throws Exception {
    CWSTagger tag = new CWSTagger("./models/seg.m");
    Dictionary dict=new Dictionary();
    dict.addFile("./models/dict.txt");
    tag.setDictionary(dict);
    ArrayList<String> str = MyCollection.loadList("./testcase/test case seg.txt",null);
    for(String s:str){     
      String t = tag.tag(s);
//      t = tag.tag(t);
View Full Code Here

   
    ArrayList<String> al = new ArrayList<String>();
    al.add("兴化市");
    al.add("沈伦镇");
    al.add("樊荣村委会");
    Dictionary dict = new Dictionary(false);
    dict.addSegDict(al);
    tag.setDictionary(dict);
    s = tag.tag(str);
    assertTrue(s.equals("江苏省 兴化市 沈伦镇 樊荣村委会"));
  }
View Full Code Here

TOP

Related Classes of org.fnlp.ml.types.Dictionary

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.