Package gannuWSD.bowmodifiers

Source Code of gannuWSD.bowmodifiers.WikiCleaner

package gannuWSD.bowmodifiers;

import java.util.ArrayList;

import gannuNLP.data.Lemma;
import gannuNLP.data.Relation;
import gannuNLP.data.Sense;
import gannuNLP.dictionaries.DataBroker;
import gannuNLP.dictionaries.Wiki;
/**
* Class for removing some language specific undesired pages included as senses. For example:
*  talk pages, category pages, etc. Please always use this when using Wikipedia as your main
*  dictionary. Also, please modify this filter for including such pages in languages other than
*  English and Spanish.
* @author Francisco Viveros-Jiménez. 
*/

public class WikiCleaner extends BoWModifier {
  /**
   * Instantiate this filter.
   */
  public WikiCleaner() {
    super("WikiCleaner");
    // TODO Auto-generated constructor stub
  }

  /**
   *
   */
  private static final long serialVersionUID = 1L;

  @Override
  /**
   * This filter does not need to initiate anything.
   */
  public void init() {

  }

  @Override
  /**
   *  This method removes undesired senses from a lemma extracted from Wikipedia.
   *  @param lemma The target lemma.
   */
  public void modifyBow(Lemma lemma) throws Exception {
    ArrayList<Sense> removeList=new ArrayList<Sense>();
    Wiki w=(Wiki)((DataBroker)this.dict).getSource();
    int i=0;
   
    for(Sense s:lemma.getSenses())
    {
      if(w.isNotAnArticle(s.getSid()))
      {
        removeList.add(s);
      }
      i++;
    }
    for(Sense s:removeList)
    {
      lemma.getSenses().remove(s);
    }
    for(Sense s:lemma.getSenses())
    {
      for(ArrayList<Relation> list: s.getRelations().values())
      {
       
        for(int j=0;j<list.size();j++)
        {
          if(w.isNotAnArticle(list.get(j).getSid()))
          {
            list.remove(j);
            j--;
          }
       
        }
      }
    }
  }

}
TOP

Related Classes of gannuWSD.bowmodifiers.WikiCleaner

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.