Package gannuWSD.bowmodifiers

Source Code of gannuWSD.bowmodifiers.AddCorpusRelatedLemmas

package gannuWSD.bowmodifiers;

import gannuNLP.corpus.ContainsLemmaFilter;
import gannuNLP.corpus.Corpus;
import gannuNLP.corpus.WSM;
import gannuNLP.data.Input;
import gannuNLP.data.Lemma;
import gannuNLP.data.Sense;
import gannuNLP.keywordextraction.KeywordExtractor;
import gannuNLP.keywordextraction.KeywordsByTFIDF;

import java.util.ArrayList;
/**
* Class for adding all the keywords of the documents related to a concept in a corpus.
* Please specify the corpus through the corpus parameter.
* @author Francisco Viveros-Jiménez. 
*/
public class AddCorpusRelatedLemmas extends BoWModifier {

  /**
   * The base corpus
   */
  static Corpus corpus;

  KeywordExtractor kw;
  private static final long serialVersionUID = 1L;
  /**
   * Instantiate this filter.
   */
  public AddCorpusRelatedLemmas() {
    super("AddCorpusRelatedLemmas");
  }

  @Override
  public void init()throws Exception {
    AddCorpusRelatedLemmas.corpus=new Corpus(this.getValue("corpus"),this.dict,true);
    this.kw=new KeywordsByTFIDF();   
  }

  @Override
  /**
   * This method removes all the duplicated words from all the bag of words of a target lemma.
   * @param lemma The target lemma.
   */
  public void modifyBow(Lemma lemma) throws Exception {
    ContainsLemmaFilter filter=new ContainsLemmaFilter("");   
    Corpus aux=new Corpus(corpus);
    filter.filter(aux, lemma);
    ArrayList<WSM> wsm=new ArrayList<WSM>();
    for(Input document:aux.getDocuments())
    {
      wsm.addAll(this.kw.extractKeywords(document, 10, true));
    }
    for(Sense s:lemma.getSenses())
    {
      for(WSM word:wsm)
      {
        s.getBagOfWords().add(word.getDimension());
      }
    }
  }

}
TOP

Related Classes of gannuWSD.bowmodifiers.AddCorpusRelatedLemmas

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.