package gannuWSD.algorithms;
import gannuNLP.data.AmbiguousWord;
import gannuNLP.data.Input;
import gannuNLP.data.ParamHandler;
import gannuNLP.data.Sense;
import gannuNLP.dictionaries.Dictionary;
import gannuWSD.skipfilters.SkipFilter;
import gannuWSD.testing.Decision;
import gannuWSD.windowfilters.WindowFilter;
import java.util.ArrayList;
/**
* Generic template for a bag-of-words disambiguation algorithm.
* @author Francisco Viveros-Jiménez
*
*/
public abstract class WSDAlgorithm extends ParamHandler {
/**
* List containing all the WindowFilter objects that are going to be applied.
*/
ArrayList<WindowFilter> windowFilters;
/**
* List containing all the SkipFilter objects that are going to be applied.
*/
ArrayList<SkipFilter> skipFilters;
/**
*
* @return this.name
*/
public String getName()
{
return this.name;
}
/**
*
* @return this.skipFilters
*/
public ArrayList<SkipFilter> getSkipFilters() {
return skipFilters;
}
/**
* Sets the list of SkipFilter objects that are going to be applied.
* @param skipFilters New list of SkipFilter objects.
*/
public void setSkipFilters(ArrayList<SkipFilter> skipFilters) {
this.skipFilters = skipFilters;
}
/**
* Instantiates a new WSDAlgorithm.
*/
public WSDAlgorithm()
{
super();
}
/**
* Sets the list of WindowFilter objects that are going to be applied.
* @param filters New list of WindowFilter objects.
*/
public void setWindowFilters(ArrayList<WindowFilter> filters)
{
this.windowFilters=filters;
}
/**
* Class name of this WSDAlgorithm.
*/
String name;
/**
* Base dictionary
*/
Dictionary dict;
/**
*
* @return this.dict
*/
public Dictionary getDict() {
return dict;
}
/**
* Sets the new base dictionary.
* @param dict New base dictionary.
*/
public void setDict(Dictionary dict) {
this.dict = dict;
}
/**
* Initializes all the parameters specified when needed.
* @param document Target document that is going to be disambiguated.
* @throws Exception
*/
public abstract void init(Input document) throws Exception;
/**
* Retrieves a window excluding a target word.
* @param target The target word that is going to be excluded.
* @param window Base word set for extracting a context window.
* @return A context window extracted by taking the WindowFilters into consideration.
* @throws Exception
*/
public ArrayList<AmbiguousWord> getWindow(AmbiguousWord target,ArrayList<AmbiguousWord> window) throws Exception
{
ArrayList<AmbiguousWord> w=new ArrayList<AmbiguousWord>(window.size());
for(AmbiguousWord possibleWord:window)
{
boolean ban=true;
for(WindowFilter filter:this.windowFilters)
{
ban=ban&&filter.satisfiesCondition(target, possibleWord, w);
}
if(ban)
{
w.add(possibleWord);
}
}
return w;
}
/**
* Retrieves a window excluding a target word.
* @param target Index of a target word.
* @param document Target text containing all the words.
* @return A context window of size="windowSize" extracted by taking the WindowFilters into consideration.
* @throws Exception
*/
public ArrayList<AmbiguousWord> getWindow(int target,Input document) throws Exception
{
if(this.getValue("windowSize")==null)
{
this.addParameters("windowSize:1;");
}
int windowSize=Integer.parseInt(this.getValue("windowSize"));
ArrayList<AmbiguousWord> window=new ArrayList<AmbiguousWord>(windowSize);
AmbiguousWord targetWord=document.getAmbiguousWords().get(target);
int i=1;
// Retrieve window
while((window.size()<windowSize)&&((target-i>=0)||(target+i<document.getAmbiguousWords().size())))
{
if(target-i>=0)
{
AmbiguousWord word=document.getAmbiguousWords().get(target-i);
boolean ban=true;
for(WindowFilter condition:this.windowFilters)
{
if(condition.satisfiesCondition(targetWord, word, window)==false)
{
ban=false;
break;
}
}
if(ban)
{
window.add(word);
}
}
if(window.size()==windowSize)
break;
if(target+i<document.getAmbiguousWords().size())
{
AmbiguousWord word=document.getAmbiguousWords().get(target+i);
boolean ban=true;
for(WindowFilter condition:this.windowFilters)
{
if(!condition.satisfiesCondition(targetWord, word, window))
{
ban=false;
break;
}
}
if(ban)
{
window.add(word);
}
}
i++;
}
return window;
}
/**
* Tells the disambiguation algorithm to solve an input document.
* @param document The disambiguated document.
* @param backoff Back-off strategy.
* @param tie Algorithm to be used for solving ties.
* @param dict Base dictionary.
* @return Decisions made by this algorithm.
*/
public ArrayList<Decision> solve(Input document, WSDAlgorithm backoff, WSDAlgorithm tie,Dictionary dict)throws Exception
{
for(WindowFilter filter:windowFilters)
{
filter.initWithDocument(document);
}
this.init(document);
if(tie!=null)
{
tie.init(document);
}
if(backoff!=null)
{
backoff.init(document);
}
this.dict=dict;
ArrayList<Decision> decisions=new ArrayList<Decision>(document.getAmbiguousWords().size());
int target=0;
for(AmbiguousWord targetWord:document.getAmbiguousWords())
{
boolean ban=true;
for(SkipFilter skip:this.skipFilters)
ban=ban&&skip.shouldITry(targetWord);
if(targetWord.getSenses().size()>0)
{
ArrayList<AmbiguousWord> window= this.getWindow(target, document);
Decision d=new Decision(targetWord,window);
d.calculateAnswer();
if(ban)
{
d=this.disambiguate(targetWord, window);
String t="true";
if(t.equals(this.getValue("asFilter")))
{
for(int x=0;x<d.getSenseCount();x++)
{
if(d.getWeights()[x]>0.0)
{
d.getWeights()[x]=1.0;
}
}
d.calculateAnswer();
}
if(d.isAttempted())
{
if(t.equals(this.getValue("asVoting")))
{
Decision d2=tie.disambiguate(targetWord, window);
//normalize the weigths
d.normalizeWeigths();
if(d2.isAttempted())
{
d2.normalizeWeigths();
for(int x=0;x<d.getSenseCount();x++)
d.getWeights()[x]+=d2.getWeights()[x];
d.calculateAnswer();
}
}
else
{
if(d.isTied()&&tie!=null)
{
d.unTie(tie.disambiguate(new AmbiguousWord(targetWord,d), tie.getWindow(target, document)));
}
}
}
else
{
if(backoff!=null)
{
d=backoff.disambiguate(targetWord, backoff.getWindow(target, document));
}
}
}
decisions.add(d);
}
target++;
}
return decisions;
}
/**
*
* @return this.windowFilters
*/
public ArrayList<WindowFilter> getWindowFilters() {
return windowFilters;
}
/**
* Disambiguate a group of words simultaneously.
* Some algorithms like Lesk or GraphInDegree have its own implementation of this method for creating a great performance boost.
* @param document Target document.
* @param backoff Back-off strategy.
* @param tie Algorithm to be used for solving ties.
* @param dict Base dictionary.
* @return Decisions made by this algorithm.
* @throws Exception
*/
public ArrayList<Decision> solveMultiple(Input document, WSDAlgorithm backoff, WSDAlgorithm tie,Dictionary dict)throws Exception
{
this.init(document);
if(tie!=null)
tie.init(document);
if(backoff!=null)
backoff.init(document);
ArrayList<Decision> decisions=new ArrayList<Decision>();
ArrayList<ArrayList<AmbiguousWord>> chunks=new ArrayList<ArrayList<AmbiguousWord>>();;
if(this.getValue("windowSize").equals("text"))
{
chunks.add(document.getAmbiguousWords());
}
else
{
chunks.addAll(document.getSentences());
}
for(ArrayList<AmbiguousWord> sentence:chunks)
{
ArrayList<Decision> dxs=this.disambiguate(sentence);
ArrayList<Decision> ds=new ArrayList<Decision>(dxs.size());
for(Decision d:dxs)
{
String t="true";
if(t.equals(this.getValue("asFilter")))
{
for(int x=0;x<d.getSenseCount();x++)
{
if(d.getWeights()[x]>0.0)
{
d.getWeights()[x]=1.0;
}
}
d.calculateAnswer();
}
ArrayList<AmbiguousWord> window=new ArrayList<AmbiguousWord>();
Decision dx=d;
if(d.isAttempted())
{
if(d.isTied()&&tie!=null)
{
boolean bnx=true;
for(SkipFilter filter:tie.getSkipFilters())
bnx=bnx&&filter.shouldITry(d.getTarget());
if(bnx)
{
window.addAll(sentence);
window.remove(d.getTarget());
d.unTie(tie.disambiguate(new AmbiguousWord(d.getTarget(),d), tie.getWindow(d.getTarget(), window)));
}
}
}
else
{
if(backoff!=null)
{
boolean bnx=true;
for(SkipFilter filter:backoff.getSkipFilters())
bnx=bnx&&filter.shouldITry(d.getTarget());
if(bnx)
{
window.addAll(sentence);
window.remove(d.getTarget());
dx=backoff.disambiguate(d.getTarget(), backoff.getWindow(d.getTarget(), window));
}
}
}
ds.add(dx);
}
this.fillAnswerSet(ds, sentence);
for(int i=0;i<ds.size();i++)
{
if(ds.get(i).getTarget().getSenses().size()==0)
{
ds.remove(i);
i--;
}
}
if(ds.size()>0)
decisions.addAll(ds);
}
return decisions;
}
/**
* Internal method for filling an answer set in case of existing duplicate answer.
* E.G. for example when using the whole document as a window there
* will be duplicate answers for words having more than a single occurrence.
* @param ds Target answer set.
* @param window The words being disambiguated simultaneously.
* @return Number of answers added.
*/
int fillAnswerSet(ArrayList<Decision> ds, ArrayList<AmbiguousWord> window)
{
int fill=0;
for(int i=0;ds.size()<window.size();i++)
{
if(i>=ds.size()||!ds.get(i).getTarget().getLemma().equals(window.get(i).getLemma()))
{
for(Decision d:ds)
{
if(d.getTarget().getLemma().equals(window.get(i).getLemma()))
{
ds.add(i, d);
fill++;
break;
}
}
}
}
return fill;
}
/**
* Disambiguates a target word using a context window.
* @param target The AmbiguousWord being disambiguated.
* @param window The context window that helps disambiguation process.
* @return The Decision made by the algorithm.
* @throws Exception
*/
public abstract Decision disambiguate(AmbiguousWord target,ArrayList<AmbiguousWord> window) throws Exception;
/**
* Method that tells if a word increases the disambiguation score of a sense of a target word.
* @param target Word being disambiguated.
* @param windowWord Word being assessed.
* @return True is the word being assessed change a disambiguation score of a sense of the target word.
* @throws Exception
*/
public abstract boolean IsUseful(AmbiguousWord target, AmbiguousWord windowWord) throws Exception;
/**
* Disambiguates a set of words simultaneously.
* @param window Target set of words.
* @return Decisions made by this algorithm.
* @throws Exception
*/
public ArrayList<Decision> disambiguate(ArrayList<AmbiguousWord> window) throws Exception
{
ArrayList<Decision> ds=new ArrayList<Decision>();
ArrayList<AmbiguousWord> targets=new ArrayList<AmbiguousWord>(window.size());
for(AmbiguousWord word:window)
{
boolean ban=true;
for(AmbiguousWord target:targets)
{
if(target.getLemma().equals(word.getLemma()))
{
ban=false;
break;
}
}
if(ban)
targets.add(word);
}
for(AmbiguousWord target:targets)
{
boolean ban=true;
for(SkipFilter skip:this.skipFilters)
ban=ban&&skip.shouldITry(target);
if(ban)
{
ArrayList<AmbiguousWord> w=new ArrayList<AmbiguousWord>();
w.addAll(window);
w.remove(target);
ds.add(this.disambiguate(target, this.getWindow(target, w)));
}
else
{
Decision d=new Decision(target,window);
d.calculateAnswer();
ds.add(d);
}
}
return ds;
}
/**
* Returns this.name+":"+super.toString().
*/
public String toString()
{
return this.name+":"+super.toString();
}
/**
* Tells if the overlap between a sense and a lemma exists.
* @param sense Target sense.
* @param lemma Target lemma.
* @return true when there is an overlap.
*/
public boolean overlap(Sense sense,String lemma)
{
for(String word:sense.getBagOfWords())
if(word.equals(lemma))
return true;
return false;
}
/**
* Returns the overlap between two senses.
* @param sense Sense 1.
* @param sense2 Sense 2.
* @param dwords ArrayList for storing the overlapping words.
* @return true when there is an overlap.
*/
public boolean overlap(Sense sense,Sense sense2,ArrayList<String> dwords)
{
boolean ban=false;
for(String oword:sense2.getBagOfWords())
{
if(this.overlap(sense, oword))
{
dwords.add(oword);
ban=true;
}
}
return ban;
}
/**
* Returns the overlap between two senses.
* @param sense Sense 1.
* @param sense2 Sense 2.
* @return true when there is an overlap.
*/
public int overlap(Sense sense,Sense sense2)
{
int overlap=0;
for(String oword:sense2.getBagOfWords())
{
for(String word:sense.getBagOfWords())
{
if(word.equals(oword))
overlap++;
}
}
return overlap;
}
/**
* Returns the overlap between a sense and an AmbiguousWord.
* @param sense Target sense.
* @param lemma Target AmbiguousWord.
* @param dwords ArrayList for storing the overlapping words.
* @return true when there is an overlap.
* @throws Exception
*/
public boolean overlap(Sense sense,AmbiguousWord lemma,ArrayList<String> dwords) throws Exception
{
boolean ban=false;
for(Sense nsense:lemma.getSenses())
if(this.overlap(sense, nsense, dwords))
ban=true;
return ban;
}
/**
* Method for setting a no-filter environment (E.G. no-SkipFilters and no-WindowFilters).
*/
public void noFilters()
{
this.windowFilters=new ArrayList<WindowFilter>(1);
this.skipFilters=new ArrayList<SkipFilter>(1);
}
}