Package org.fnlp.nlp.corpus

Examples of org.fnlp.nlp.corpus.StopWords


public class KeyWordExtraction {
 
  public static void main(String[] args) throws Exception {
   
   
    StopWords sw= new StopWords("../models/stopwords");
    CWSTagger seg = new CWSTagger("../models/seg.m");
    AbstractExtractor key = new WordExtract(seg,sw);
   
    System.out.println(key.extract("甬温线特别重大铁路交通事故车辆经过近24小时的清理工作,26日深夜已经全部移出事故现场,之前埋下的D301次动车车头被挖出运走", 20, true));
   
View Full Code Here


    BufferedReader in = new BufferedReader(new InputStreamReader(
        new FileInputStream(infile ), "utf8"));

    //    BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
//        outfile), enc2));
    StopWords sw = new StopWords(stopwordfile);
   
    LabelAlphabet dict = new LabelAlphabet();
    // words in documents
    ArrayList<TIntArrayList> documentsList= new ArrayList<TIntArrayList>();
   
   
    String line = null;
    while ((line = in.readLine()) != null) {
      line = line.trim()
      if(line.length()==0)
        continue;
      String[] toks = line.split("\\s+");
      TIntArrayList wordlist = new TIntArrayList();
      for(int j=0;j<toks.length;j++){
        String tok = toks[j];
        if(sw.isStopWord(tok))
          continue;
        int idx = dict.lookupIndex(tok);
        wordlist.add(idx);
      }
      documentsList.add(wordlist);
View Full Code Here

    dN = 0.85;
  }
 
  public WordExtract(String segPath, String dicPath) throws Exception{
    tag = new CWSTagger(segPath);
    test = new StopWords(dicPath);
  }
View Full Code Here

    test = new StopWords(dicPath);
  }
 
  public WordExtract(CWSTagger tag, String dicPath){
    this.tag = tag;
    test = new StopWords(dicPath);
  }
View Full Code Here

TOP

Related Classes of org.fnlp.nlp.corpus.StopWords

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.