Package gannuWSD.algorithms

Source Code of gannuWSD.algorithms.Lesk

package gannuWSD.algorithms;

import gannuNLP.data.AmbiguousWord;
import gannuNLP.data.Input;
import gannuNLP.data.Sense;
import gannuNLP.overlapmeasures.OverlapMeasure;
import gannuNLP.overlapmeasures.Word;
import gannuOP.algorithms.Optimizer;
import gannuOP.testing.Report;
import gannuWSD.skipfilters.SkipFilter;
import gannuWSD.testing.Decision;
import gannuWSD.windowfilters.WindowFilter;

import java.util.ArrayList;



/**
* WSD algorithms as proposed by Lesk in "Automatic sense disambiguation using
* machine readable dictionaries: how to tell a pine cone from an ice cream cone".
* You can specify the OverlapMeasure by setting the parameter "scoring:gannu.overlapmeasures.yourSelectedClassName;".
* You can set the optimizer by setting
* the parameter "optimizer:algoriths.yourEvolutionJOptimizer;"
* and "config:Parameter1=value1[,ParameterN=valueN];" for setting its parameters.
* Finally, you can set the maximum function evaluations for the optimizer by setting the parameter "fes:value;".
* @author Francisco Viveros-Jiménez
*
*/
public class Lesk extends WSDAlgorithm {
  /**
   * Optimizer object part of the evolutionJ package.
   */
    Optimizer optimizer;
    /**
     * Maximum function evaluations for being used in the disambiguation algorithm.
     */
    double fes;
    /**
     * OverlapMeasure for calculating the overlap score between senses.
     */
    OverlapMeasure scoring;
  public Lesk()
  {
    super();
    this.name="Lesk";
  }

  @Override
  public Decision disambiguate(AmbiguousWord target,
      ArrayList<AmbiguousWord> window) throws Exception {
    System.out.print(".");
    ArrayList<AmbiguousWord> sentence=new ArrayList<AmbiguousWord>();
    sentence.add(target);
    sentence.addAll(window);
    LeskFunction f=new LeskFunction(window.size()+1);
    f.init(sentence, this);
    int siz=0;
    for(AmbiguousWord word:sentence)
    {
      if(word.getSenses().size()>1)
        siz++;
    }
    ArrayList<Report> answers=optimizer.solve(this.getValue("config"), f, (double)(siz*fes),(double)(siz*fes) ,-1e+20);
    Report answer=answers.get(answers.size()-1);
    double X[]=answer.getBestIndividual()
    Decision d=new Decision(target, window);
    int xindex=(int)Math.floor(X[0]);
    int x=0;
    for(Sense s:target.getSenses())
    {
      ArrayList<String> dwords=new ArrayList<String>();
      for(AmbiguousWord w:window)
      {
        if(this.overlap(s, w, dwords))
        {
          dwords.add(w.getLemma());
        }
      }
      double w=0.0;
      if(x==xindex)
      {
        if(dwords.size()>0)
          w=-X[X.length-1];       
      }
      else
      {
        w=((double)dwords.size())*0.001;
      }
      dwords.trimToSize();
      d.setSense(x, w, dwords);
      x++;
    }
    d.calculateAnswer();     
    return d;
  }

  @Override
  public void init(Input document)throws Exception {
    if(this.getValue("scoring")==null)
    {
      this.scoring=(OverlapMeasure)new Word();
    }
    else
    {
      this.scoring=(OverlapMeasure)Class.forName(this.getValue("scoring")).newInstance();
    }

    if(document!=null)
      System.out.println("\nDisambiguating "+document.toString());
    if(this.getValue("optimizer")==null)
    {
      this.addParameters("optimizer:gannuOP.algorithms.single_point.PS");
      this.addParameters("config:R=30,B=0.5");
    }
    if(this.getValue("fes")==null)
    {
      this.fes=30.0;
    }
    else
    {
      this.fes=Double.parseDouble(this.getValue("fes"));
    }
    optimizer=(Optimizer)Class.forName(this.getValue("optimizer")).newInstance();
  }
  @Override
  public ArrayList<Decision> disambiguate(ArrayList<AmbiguousWord> Window)
      throws Exception {
    ArrayList<AmbiguousWord> sentence=new ArrayList<AmbiguousWord>(Window.size());
    for(AmbiguousWord possibleWord:Window)
    {
      boolean ban=true;     
      for(WindowFilter filter:this.windowFilters)
      {
        ban=ban&&filter.satisfiesCondition(possibleWord, possibleWord, sentence);
      }
      if(ban)
      {
        sentence.add(possibleWord);
      }
    }
    ArrayList<Decision> ds=new ArrayList<Decision>(sentence.size());
    LeskFunction f=new LeskFunction(sentence.size());
    f.init(sentence, this);
    int siz=0;
    for(AmbiguousWord word:sentence)
    {
      if(word.getSenses().size()>0)
        siz++;
    }
    ArrayList<Report> answers=optimizer.solve(this.getValue("config"), f, (double)(siz*fes),(double)(siz*fes) ,-1e+20);
    Report answer=answers.get(answers.size()-1);
    double X[]=answer.getBestIndividual();
    for(int word=0;word<sentence.size();word++)
    {
      System.out.print(".");
      ArrayList<AmbiguousWord> window=new ArrayList<AmbiguousWord>();
      AmbiguousWord target=sentence.get(word);
      window.addAll(sentence);
      window.remove(target);
      Decision d=new Decision(target, window);
      boolean ban=true;
      for(SkipFilter filter:this.skipFilters)
        ban=ban&&filter.shouldITry(target);
      if(ban)
      {
        int xindex=(int)Math.floor(X[word]);
        int x=0;
        for(Sense s:target.getSenses())
        {
          ArrayList<String> dwords=new ArrayList<String>();
          for(AmbiguousWord w:window)
          {
            this.overlap(s, w, dwords);
          }
          double w=0.0;
          if(x==xindex)
          {
            if(dwords.size()>0)
              w=-X[X.length-1];       
          }
          else
          {
            w=((double)dwords.size())*0.001;
          }
          dwords.trimToSize();
          d.setSense(x, w, dwords);
          x++;
        }
      }
      d.calculateAnswer();     
      ds.add(d);
    }   
    return ds;

  }
 
  @Override
  public boolean IsUseful(AmbiguousWord target, AmbiguousWord windowWord)
      throws Exception {
        for(Sense s:target.getSenses())
          for(Sense s2:windowWord.getSenses())
            for(String word:s.getBagOfWords())
              for(String pword:s2.getBagOfWords())
                if(word.equals(pword))
                  return true;
    return false;
  }
}
TOP

Related Classes of gannuWSD.algorithms.Lesk

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.