package gannuWSD.bowmodifiers;
import java.util.ArrayList;
import gannuNLP.data.Lemma;
import gannuNLP.data.Relation;
import gannuNLP.data.Sense;
import gannuNLP.dictionaries.DataBroker;
import gannuNLP.dictionaries.Wiki;
/**
* Class for removing some language specific undesired pages included as senses. For example:
* talk pages, category pages, etc. Please always use this when using Wikipedia as your main
* dictionary. Also, please modify this filter for including such pages in languages other than
* English and Spanish.
* @author Francisco Viveros-Jiménez.
*/
public class WikiCleaner extends BoWModifier {
/**
* Instantiate this filter.
*/
public WikiCleaner() {
super("WikiCleaner");
// TODO Auto-generated constructor stub
}
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
/**
* This filter does not need to initiate anything.
*/
public void init() {
}
@Override
/**
* This method removes undesired senses from a lemma extracted from Wikipedia.
* @param lemma The target lemma.
*/
public void modifyBow(Lemma lemma) throws Exception {
ArrayList<Sense> removeList=new ArrayList<Sense>();
Wiki w=(Wiki)((DataBroker)this.dict).getSource();
int i=0;
for(Sense s:lemma.getSenses())
{
if(w.isNotAnArticle(s.getSid()))
{
removeList.add(s);
}
i++;
}
for(Sense s:removeList)
{
lemma.getSenses().remove(s);
}
for(Sense s:lemma.getSenses())
{
for(ArrayList<Relation> list: s.getRelations().values())
{
for(int j=0;j<list.size();j++)
{
if(w.isNotAnArticle(list.get(j).getSid()))
{
list.remove(j);
j--;
}
}
}
}
}
}