Package gannuWSD.algorithms

Source Code of gannuWSD.algorithms.WSDAlgorithm

package gannuWSD.algorithms;

import gannuNLP.data.AmbiguousWord;
import gannuNLP.data.Input;
import gannuNLP.data.ParamHandler;
import gannuNLP.data.Sense;
import gannuNLP.dictionaries.Dictionary;
import gannuWSD.skipfilters.SkipFilter;
import gannuWSD.testing.Decision;
import gannuWSD.windowfilters.WindowFilter;

import java.util.ArrayList;

/**
* Generic template for a bag-of-words disambiguation algorithm.
* @author Francisco Viveros-Jiménez
*
*/
public abstract class WSDAlgorithm extends ParamHandler {
  /**
   * List containing all the WindowFilter objects that are going to be applied.
   */
  ArrayList<WindowFilter> windowFilters;
  /**
   * List containing all the SkipFilter objects that are going to be applied.
   */
  ArrayList<SkipFilter> skipFilters;
  /**
   *
   * @return this.name
   */
  public String getName()
  {
    return this.name;
  }
  /**
   *
   * @return this.skipFilters
   */
  public ArrayList<SkipFilter> getSkipFilters() {
    return skipFilters;
  }
  /**
   * Sets the list of SkipFilter objects that are going to be applied.
   * @param skipFilters New list of SkipFilter objects.
   */
  public void setSkipFilters(ArrayList<SkipFilter> skipFilters) {
    this.skipFilters = skipFilters;
  }
  /**
   * Instantiates a new WSDAlgorithm.
   */
  public WSDAlgorithm()
  {
    super();
  }
  /**
   * Sets the list of WindowFilter objects that are going to be applied.
   * @param filters New list of WindowFilter objects.
   */
  public void setWindowFilters(ArrayList<WindowFilter> filters)
  {
    this.windowFilters=filters;   
  }
  /**
   * Class name of this WSDAlgorithm.
   */
  String name;
  /**
   * Base dictionary
   */
  Dictionary dict;
  /**
   *
   * @return this.dict
   */
  public Dictionary getDict() {
    return dict;
  }

 
  /**
   * Sets the new base dictionary.
   * @param dict New base dictionary.
   */
  public void setDict(Dictionary dict) {
    this.dict = dict;
  }
 
  /**
   * Initializes all the parameters specified when needed.
   * @param document Target document that is going to be disambiguated.
   * @throws Exception
   */
  public abstract void init(Input document) throws Exception;
 
  /**
   * Retrieves a window excluding a target word.
   * @param target The target word that is going to be excluded.
   * @param window Base word set for extracting a context window.
   * @return A context window extracted by taking the WindowFilters into consideration.
   * @throws Exception
   */
  public ArrayList<AmbiguousWord> getWindow(AmbiguousWord target,ArrayList<AmbiguousWord> window) throws Exception
  {
    ArrayList<AmbiguousWord> w=new ArrayList<AmbiguousWord>(window.size());
    for(AmbiguousWord possibleWord:window)
    {
      boolean ban=true;
      for(WindowFilter filter:this.windowFilters)
      {
        ban=ban&&filter.satisfiesCondition(target, possibleWord, w);
      }
      if(ban)
      {
        w.add(possibleWord);
      }
    }
    return w;
  }
  /**
   * Retrieves a window excluding a target word.
   * @param target Index of a target word.
   * @param document Target text containing all the words.
   * @return A context window of size="windowSize" extracted by taking the WindowFilters into consideration.
   * @throws Exception
   */
  public ArrayList<AmbiguousWord> getWindow(int target,Input document) throws Exception
  {
    if(this.getValue("windowSize")==null)
    {
      this.addParameters("windowSize:1;");
    }
     
    int windowSize=Integer.parseInt(this.getValue("windowSize"));
    ArrayList<AmbiguousWord> window=new ArrayList<AmbiguousWord>(windowSize);
    AmbiguousWord targetWord=document.getAmbiguousWords().get(target);
    int i=1;
    // Retrieve window
    while((window.size()<windowSize)&&((target-i>=0)||(target+i<document.getAmbiguousWords().size())))
    {
      if(target-i>=0)
      {
        AmbiguousWord word=document.getAmbiguousWords().get(target-i);
        boolean ban=true;
        for(WindowFilter condition:this.windowFilters)
        {
          if(condition.satisfiesCondition(targetWord, word, window)==false)
            {
              ban=false;
              break;
            }
        }
        if(ban)
        {
          window.add(word);
        }
      }
      if(window.size()==windowSize)
        break;
      if(target+i<document.getAmbiguousWords().size())
      {
        AmbiguousWord word=document.getAmbiguousWords().get(target+i);
        boolean ban=true;
        for(WindowFilter condition:this.windowFilters)
        {
          if(!condition.satisfiesCondition(targetWord, word, window))
          {
            ban=false;
            break;
          }
        }
        if(ban)
        {
          window.add(word);
        }
      }
      i++;
    }

    return window;
  }
 
  /**
   * Tells the disambiguation algorithm to solve an input document.
   * @param document The disambiguated document.
   * @param backoff Back-off strategy.
   * @param tie Algorithm to be used for solving ties.
   * @param dict Base dictionary.
   * @return Decisions made by this algorithm.
   */
  public ArrayList<Decision> solve(Input document,  WSDAlgorithm backoff, WSDAlgorithm tie,Dictionary dict)throws Exception
  {
    for(WindowFilter filter:windowFilters)
    {
      filter.initWithDocument(document);
    }
    this.init(document);
    if(tie!=null)
    {
      tie.init(document);
    }
    if(backoff!=null)
    {
      backoff.init(document);
    }
   
    this.dict=dict;
    ArrayList<Decision> decisions=new ArrayList<Decision>(document.getAmbiguousWords().size());
    int target=0;
    for(AmbiguousWord targetWord:document.getAmbiguousWords())
    {
      boolean ban=true;
      for(SkipFilter skip:this.skipFilters)
        ban=ban&&skip.shouldITry(targetWord);
      if(targetWord.getSenses().size()>0)
      {
        ArrayList<AmbiguousWord> window= this.getWindow(target, document);       
        Decision d=new Decision(targetWord,window);
        d.calculateAnswer();
        if(ban)
        {
          d=this.disambiguate(targetWord, window);
          String t="true";
          if(t.equals(this.getValue("asFilter")))
          {
            for(int x=0;x<d.getSenseCount();x++)
            {
              if(d.getWeights()[x]>0.0)
              {
                d.getWeights()[x]=1.0;
              }
            }
            d.calculateAnswer();
          }
         
          if(d.isAttempted())
          {
            if(t.equals(this.getValue("asVoting")))
            {
              Decision d2=tie.disambiguate(targetWord, window);
              //normalize the weigths
              d.normalizeWeigths();
              if(d2.isAttempted())
              {
                d2.normalizeWeigths();
                for(int x=0;x<d.getSenseCount();x++)
                  d.getWeights()[x]+=d2.getWeights()[x];
                d.calculateAnswer();
              }
            } 
            else
            {
              if(d.isTied()&&tie!=null)
              {               
                d.unTie(tie.disambiguate(new AmbiguousWord(targetWord,d), tie.getWindow(target, document)));
              }
            }
                   
          }
          else
          {
            if(backoff!=null)
            {
              d=backoff.disambiguate(targetWord, backoff.getWindow(target, document));
            }
          }

        }
        decisions.add(d);
      }
      target++;
    }
    return decisions;
  }
 
  /**
   *
   * @return this.windowFilters
   */
  public ArrayList<WindowFilter> getWindowFilters() {
    return windowFilters;
  }
  /**
   * Disambiguate a group of words simultaneously.
   * Some algorithms like Lesk or GraphInDegree have its own implementation of this method for creating a great performance boost.
   * @param document Target document.
   * @param backoff Back-off strategy.
   * @param tie Algorithm to be used for solving ties.
   * @param dict Base dictionary.
   * @return Decisions made by this algorithm.
   * @throws Exception
   */
  public ArrayList<Decision> solveMultiple(Input document,  WSDAlgorithm backoff, WSDAlgorithm tie,Dictionary dict)throws Exception
  {
    this.init(document);
    if(tie!=null)
      tie.init(document);
    if(backoff!=null)
      backoff.init(document);
    ArrayList<Decision> decisions=new ArrayList<Decision>();
    ArrayList<ArrayList<AmbiguousWord>> chunks=new ArrayList<ArrayList<AmbiguousWord>>();;
    if(this.getValue("windowSize").equals("text"))
    {
      chunks.add(document.getAmbiguousWords());
    }
    else
    {
      chunks.addAll(document.getSentences());
    }
    for(ArrayList<AmbiguousWord> sentence:chunks)
    {
      ArrayList<Decision> dxs=this.disambiguate(sentence);   
      ArrayList<Decision> ds=new ArrayList<Decision>(dxs.size());
      for(Decision d:dxs)
      {
        String t="true";
        if(t.equals(this.getValue("asFilter")))
        {
          for(int x=0;x<d.getSenseCount();x++)
          {
            if(d.getWeights()[x]>0.0)
            {
              d.getWeights()[x]=1.0;
            }
          }
          d.calculateAnswer();
        }
        ArrayList<AmbiguousWord> window=new ArrayList<AmbiguousWord>();
        Decision dx=d;
        if(d.isAttempted())
        {
          if(d.isTied()&&tie!=null)
          {
            boolean bnx=true;
            for(SkipFilter filter:tie.getSkipFilters())
              bnx=bnx&&filter.shouldITry(d.getTarget());
            if(bnx)
            {
              window.addAll(sentence);
              window.remove(d.getTarget());
              d.unTie(tie.disambiguate(new AmbiguousWord(d.getTarget(),d), tie.getWindow(d.getTarget(), window)));
            }           
          }       
        }
        else
        {
          if(backoff!=null)
          {
            boolean bnx=true;
            for(SkipFilter filter:backoff.getSkipFilters())
              bnx=bnx&&filter.shouldITry(d.getTarget());
            if(bnx)
            {
              window.addAll(sentence);
              window.remove(d.getTarget());
              dx=backoff.disambiguate(d.getTarget(), backoff.getWindow(d.getTarget(), window));
            }                       
          }
        }
        ds.add(dx);
      }
      this.fillAnswerSet(ds, sentence);
      for(int i=0;i<ds.size();i++)
      {
        if(ds.get(i).getTarget().getSenses().size()==0)
        {
          ds.remove(i);
          i--;
        }
      }
      if(ds.size()>0)
        decisions.addAll(ds);
    }
    return decisions;
  }
  /**
   * Internal method for filling an answer set in case of existing duplicate answer.
   * E.G. for example when using the whole document as a window there
   * will be duplicate answers for words having more than a single occurrence.
   * @param ds Target answer set.
   * @param window The words being disambiguated simultaneously.
   * @return Number of answers added.
   */
  int fillAnswerSet(ArrayList<Decision> ds, ArrayList<AmbiguousWord> window)
  {
    int fill=0;
    for(int i=0;ds.size()<window.size();i++)
    {
      if(i>=ds.size()||!ds.get(i).getTarget().getLemma().equals(window.get(i).getLemma()))
      {
        for(Decision d:ds)
        {
          if(d.getTarget().getLemma().equals(window.get(i).getLemma()))
          {
            ds.add(i, d);
            fill++;
            break;
          }
        }
      }
    }
    return fill;
  }
 
  /**
   * Disambiguates a target word using a context window.
   * @param target The AmbiguousWord being disambiguated.
   * @param window The context window that helps disambiguation process.
   * @return The Decision made by the algorithm.
   * @throws Exception
   */
  public abstract Decision disambiguate(AmbiguousWord target,ArrayList<AmbiguousWord> window) throws Exception;
  /**
   * Method that tells if a word increases the disambiguation score of a sense of a target word.
   * @param target Word being disambiguated.
   * @param windowWord Word being assessed.
   * @return True is the word being assessed change a disambiguation score of a sense of the target word.
   * @throws Exception
   */
  public abstract boolean IsUseful(AmbiguousWord target, AmbiguousWord windowWord) throws Exception;
  /**
   * Disambiguates a set of words simultaneously.
   * @param window Target set of words.
   * @return Decisions made by this algorithm.
   * @throws Exception
   */
  public ArrayList<Decision> disambiguate(ArrayList<AmbiguousWord> window) throws Exception
  {
    ArrayList<Decision> ds=new ArrayList<Decision>();
    ArrayList<AmbiguousWord> targets=new ArrayList<AmbiguousWord>(window.size());
    for(AmbiguousWord word:window)
    {
      boolean ban=true;
      for(AmbiguousWord target:targets)
      {
        if(target.getLemma().equals(word.getLemma()))
        {
          ban=false;
          break;
        }
      }
      if(ban)
        targets.add(word);
    }
    for(AmbiguousWord target:targets)
    {
      boolean ban=true;
      for(SkipFilter skip:this.skipFilters)
        ban=ban&&skip.shouldITry(target);
      if(ban)
      {
        ArrayList<AmbiguousWord> w=new ArrayList<AmbiguousWord>();
        w.addAll(window);
        w.remove(target);
        ds.add(this.disambiguate(target, this.getWindow(target, w)));
      }
      else
      {
        Decision d=new Decision(target,window);
        d.calculateAnswer();
        ds.add(d);
      }
    }
    return ds;
  }


  /**
   * Returns this.name+":"+super.toString().
   */
  public String toString()
  {
    return this.name+":"+super.toString();
  }
  /**
   * Tells if the overlap between a sense and a lemma exists.
   * @param sense Target sense.
   * @param lemma Target lemma.
   * @return true when there is an overlap.
   */
  public boolean overlap(Sense sense,String lemma)
  {
    for(String word:sense.getBagOfWords())
      if(word.equals(lemma))
          return true;
      return false;
  }
 
  /**
   * Returns the overlap between two senses.
   * @param sense Sense 1.
   * @param sense2 Sense 2.
   * @param dwords ArrayList for storing the overlapping words.
   * @return true when there is an overlap.
   */
  public boolean overlap(Sense sense,Sense sense2,ArrayList<String> dwords)
  {
    boolean ban=false;
      for(String oword:sense2.getBagOfWords())
      {
        if(this.overlap(sense, oword))
        {
          dwords.add(oword);
          ban=true;
        }
      }
      return ban;
  }
  /**
   * Returns the overlap between two senses.
   * @param sense Sense 1.
   * @param sense2 Sense 2.
   * @return true when there is an overlap.
   */
  public int overlap(Sense sense,Sense sense2)
  {
    int overlap=0;
      for(String oword:sense2.getBagOfWords())
      {
        for(String word:sense.getBagOfWords())
        {
          if(word.equals(oword))
            overlap++;
        }
      }
      return overlap;
  }
 
  /**
   * Returns the overlap between a sense and an AmbiguousWord.
   * @param sense Target sense.
   * @param lemma Target AmbiguousWord.
   * @param dwords ArrayList for storing the overlapping words.
   * @return true when there is an overlap.
   * @throws Exception
   */
  public boolean overlap(Sense sense,AmbiguousWord lemma,ArrayList<String> dwords) throws Exception
  {
    boolean ban=false;
      for(Sense nsense:lemma.getSenses())
        if(this.overlap(sense, nsense, dwords))
          ban=true;
      return ban;
  }
  /**
   * Method for setting a no-filter environment (E.G. no-SkipFilters and no-WindowFilters).
   */
  public void noFilters()
  {
    this.windowFilters=new ArrayList<WindowFilter>(1);
    this.skipFilters=new ArrayList<SkipFilter>(1);
  }
 
}
TOP

Related Classes of gannuWSD.algorithms.WSDAlgorithm

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.