package gannuWSD.bowmodifiers;
import java.util.ArrayList;
import gannuNLP.data.Lemma;
import gannuNLP.overlapmeasures.Length;
/**
* Class for creating pseudo-clusters of senses of a target lemma.
* First, this filter calculates the Lesk similarity measure with the current bag of words.
* Then, all senses having a similarity of above a user-defined threshold will be homologated (i.e. they will have the same bag of words).
* Please defined this theshold by setting the parameter "threshold:value" where value is a real number in [0.0,1.0] range.
*
* @author Francisco Viveros-Jiménez.
*/
public class LeskClusters extends BoWModifier {
double threshold;
Length scoring;
private static final long serialVersionUID = 1L;
/**
* Instantiate this filter.
*/
public LeskClusters() {
super("LeskCluster");
}
@Override
public void init() {
if(this.getValue("threshold")==null)
{
this.addParameters("threshold:0.3;");
}
this.threshold=Double.parseDouble(this.getValue("threshold"));
scoring=new Length();
}
@Override
/**
* This method extends the bag of words of each sense of a target lemma with its synonyms.
* @param lemma The target lemma.
*/
public void modifyBow(Lemma lemma) throws Exception{
double w[][]=new double[lemma.getSenses().size()][lemma.getSenses().size()];
double lengths[]=new double[lemma.getSenses().size()];
boolean checked[]=new boolean[lemma.getSenses().size()];
for(int i=0;i<(lemma.getSenses().size()-1);i++)
{
lengths[i]=this.scoring.getSize(lemma.getSenses().get(i));
checked[i]=false;
for(int j=i+1;j<lemma.getSenses().size();j++)
{
w[i][j]=this.scoring.calculateOverlap(lemma.getSenses().get(i).getBagOfWords(),lemma.getSenses().get(j).getBagOfWords() , new ArrayList<String>());
}
}
lengths[lemma.getSenses().size()-1]=this.scoring.getSize(lemma.getSenses().get(lemma.getSenses().size()-1));
checked[lemma.getSenses().size()-1]=false;
double aux;
for(int i=0;i<(lemma.getSenses().size()-1);i++)
{
for(int j=i+1;j<lemma.getSenses().size();j++)
{
if(lengths[i]>lengths[j])
{
aux=lengths[i];
}
else
{
aux=lengths[j];
}
if(w[i][j]/aux>this.threshold)
{
lemma.getSenses().get(i).getBagOfWords().addAll(lemma.getSenses().get(j).getBagOfWords());
lemma.getSenses().get(j).setBagOfWords(lemma.getSenses().get(i).getBagOfWords());
}
}
}
}
}