Source Code of org.fnlp.demo.nlp.PartsOfSpeechTag

/**
*  This file is part of FNLP (formerly FudanNLP).
*  
*  FNLP is free software: you can redistribute it and/or modify
*  it under the terms of the GNU Lesser General Public License as published by
*  the Free Software Foundation, either version 3 of the License, or
*  (at your option) any later version.
*  
*  FNLP is distributed in the hope that it will be useful,
*  but WITHOUT ANY WARRANTY; without even the implied warranty of
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*  GNU Lesser General Public License for more details.
*  
*  You should have received a copy of the GNU General Public License
*  along with FudanNLP.  If not, see <http://www.gnu.org/licenses/>.
*  
*  Copyright 2009-2014 www.fnlp.org. All rights reserved. 
*/


package org.fnlp.demo.nlp;


import org.fnlp.ml.types.Dictionary;
import org.fnlp.nlp.cn.tag.CWSTagger;
import org.fnlp.nlp.cn.tag.POSTagger;


/**
 * 词性标注使用示例
 * @author xpqiu
 *
 */
public class PartsOfSpeechTag {
  
  static POSTagger tag;


  /**
   * 主程序
   * @param args
   * @throws Exception
   * @throws  
   */
  public static void main(String[] args) throws Exception {


    
    CWSTagger cws = new CWSTagger("../models/seg.m");
    tag = new POSTagger(cws,"../models/pos.m");
    
    System.out.println("得到支持的词性标签集合");
    System.out.println(tag.getSupportedTags());
    System.out.println(tag.getSupportedTags().size());
    System.out.println("\n");
    
    String str = "媒体计算研究所成立了，高级数据挖掘很难。乐phone很好！";
    String s = tag.tag(str);
    System.out.println("处理未分词的句子");
    System.out.println(s);
    
    System.out.println("使用英文标签");
    tag.SetTagType("en");    
    System.out.println(tag.getSupportedTags());
    System.out.println(tag.getSupportedTags().size());
    s = tag.tag(str);
    System.out.println(s);    
    System.out.println();
    
    CWSTagger cws2 = new CWSTagger("../models/seg.m", new Dictionary("../models/dict.txt"));
    
    //bool值指定该dict是否用于cws分词（分词和词性可以使用不同的词典）
    tag = new POSTagger(cws2, "../models/pos.m"
        , new Dictionary("../models/dict.txt"), true);//true就替换了之前的dict.txt
    tag.removeDictionary(false);//不移除分词的词典
    tag.setDictionary(new Dictionary("../models/dict.txt"), false);//设置POS词典，分词使用原来设置
    
    String str2 = "媒体计算研究所成立了，高级数据挖掘很难。乐phone很好！";
    String s2 = tag.tag(str2);
    System.out.println("处理未分词的句子，使用词典");
    System.out.println(s2);
    System.out.println();
    
    Dictionary dict = new Dictionary();
    dict.add("媒体计算","mypos1","mypos2");
    dict.add("乐phone","专有名");
    tag.setDictionary(dict, true);
    String s22 = tag.tag(str2);
    System.out.println(s22);
    System.out.println();
    
    POSTagger tag1 = new POSTagger("../models/pos.m");
    String str1 = "媒体计算 研究所 成立 了 , 高级 数据挖掘 很 难";
    String[] w = str1.split(" ");
    String[] s1 = tag1.tagSeged(w);
    System.out.println("直接处理分好词的句子:++++++++++");
    for(int i=0;i<s1.length;i++){
      System.out.print(w[i]+"/"+s1[i]+" ");
    }
    System.out.println("\n");
    
    POSTagger tag3 = new POSTagger("../models/pos.m", new Dictionary("../models/dict.txt"));
    String str3 = "媒体计算 研究所 成立 了 , 高级 数据挖掘 很 难 ";
    String[] w3 = str3.split(" ");
    String[] s3 = tag3.tagSeged(w3);
    System.out.println("直接处理分好词的句子，使用词典");
    for(int i=0;i<s3.length;i++){
      System.out.print(w3[i]+"/"+s3[i]+" ");
    }
    System.out.println("\n");
    
    //????????????????????????????
    
    System.out.println("重新构造");
    cws = new CWSTagger("../models/seg.m");
    tag = new POSTagger(cws,"../models/pos.m");
    str = "媒体计算研究所成立了, 高级数据挖掘很难";
    System.out.println(tag.tag(str));
    String[][] sa = tag.tag2Array(str);
    for(int i = 0; i < sa.length; i++) {
      for(int j = 0; j < sa[i].length; j++) {
          System.out.print(sa[i][j] + " ");
      }
      System.out.println();
    }
    
    String s4 = tag.tagFile("../example-data/data-tag.txt");
    System.out.println("\n处理文件：");
    System.out.println(s4);
  }


}
Source Code of org.fnlp.demo.nlp.PartsOfSpeechTag

Related Classes of org.fnlp.demo.nlp.PartsOfSpeechTag