Package gannuWSD.bowmodifiers

Source Code of gannuWSD.bowmodifiers.LeskClusters

package gannuWSD.bowmodifiers;

import java.util.ArrayList;

import gannuNLP.data.Lemma;
import gannuNLP.overlapmeasures.Length;

/**
* Class for creating pseudo-clusters of senses of a target lemma.
* First, this filter calculates the Lesk similarity measure with the current bag of words.
* Then, all senses having a similarity of above a user-defined threshold will be homologated (i.e. they will have the same bag of words).
* Please defined this theshold by setting the parameter "threshold:value" where value is a real number in [0.0,1.0] range.
* @author Francisco Viveros-Jiménez. 
*/
public class LeskClusters extends BoWModifier {


  double threshold;
  Length scoring;
  private static final long serialVersionUID = 1L;

  /**
   * Instantiate this filter.
   */
  public LeskClusters() {
    super("LeskCluster");
  }

  @Override
  public void init() {

    if(this.getValue("threshold")==null)
    {
      this.addParameters("threshold:0.3;");
    }
    this.threshold=Double.parseDouble(this.getValue("threshold"));
    scoring=new Length();
  }

  @Override
  /**
   * This method extends the bag of words of each sense of a target lemma with its synonyms.
   * @param lemma The target lemma.
   */
  public void modifyBow(Lemma lemma) throws Exception{
    double w[][]=new double[lemma.getSenses().size()][lemma.getSenses().size()];
    double lengths[]=new double[lemma.getSenses().size()];
    boolean checked[]=new boolean[lemma.getSenses().size()];
    for(int i=0;i<(lemma.getSenses().size()-1);i++)
    {
      lengths[i]=this.scoring.getSize(lemma.getSenses().get(i));
      checked[i]=false;
      for(int j=i+1;j<lemma.getSenses().size();j++)
      {
        w[i][j]=this.scoring.calculateOverlap(lemma.getSenses().get(i).getBagOfWords(),lemma.getSenses().get(j).getBagOfWords() , new ArrayList<String>());
       
      }
    }
    lengths[lemma.getSenses().size()-1]=this.scoring.getSize(lemma.getSenses().get(lemma.getSenses().size()-1));
    checked[lemma.getSenses().size()-1]=false;
    double aux;
    for(int i=0;i<(lemma.getSenses().size()-1);i++)
    {
      for(int j=i+1;j<lemma.getSenses().size();j++)
      {
        if(lengths[i]>lengths[j])
        {
          aux=lengths[i];
        }
        else
        {
          aux=lengths[j];
        }
        if(w[i][j]/aux>this.threshold)
        {           
          lemma.getSenses().get(i).getBagOfWords().addAll(lemma.getSenses().get(j).getBagOfWords());
          lemma.getSenses().get(j).setBagOfWords(lemma.getSenses().get(i).getBagOfWords());
        }
       
      }
    }
  }

}
TOP

Related Classes of gannuWSD.bowmodifiers.LeskClusters

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.