import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.net.URL;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.Map;
import java.util.StringTokenizer;
import edu.smu.tspell.wordnet.NounSynset;
import edu.smu.tspell.wordnet.Synset;
import edu.smu.tspell.wordnet.SynsetType;
import edu.smu.tspell.wordnet.VerbSynset;
import edu.smu.tspell.wordnet.WordNetDatabase;
/**
* @author Abhijeet and Brian Magerko
*
*
*/
/*
* this class finds the least common subsumer of two synset's of the same Synset.TYPE
* implements a bi-directional BFS from the source and target and checks for the first node where they meet
* the BFS travels up the tree so only hypernym relations are considered to get the parents
* IT IS GUARANTEED TO CONVERGE
*/
public class Lcs {
ArrayList<Node> s_toBeExpanded;// nodes in BFS of source yet to be expanded
ArrayList<Node> t_toBeExpanded;// nodes in BFS of target yet to be expanded
Map<Integer,Node> created;
//create Nodes for source and target
Node s_node = new Node();
Node t_node = new Node();
// testing purpose
ClassLoader cl;
static URL url;
static String path = null;
DistanceToRoot dist_to_root = new DistanceToRoot();
/* Remove this instance of wordnet after testing */
public static WordNetDatabase wordnet;
public Lcs ()
{
System.setProperty("wordnet.database.dir", "c:\\WordNet\\2.1\\dict\\");
wordnet = WordNetDatabase.getFileInstance();
cl = getClass().getClassLoader();
url = cl.getResource("Tags");
path = (url.getFile().toString() + "/testWords.txt");
}
/*
* can only find the path is source and target have the same Synset type i.e.
* either both are NOUNS or both VERBS
*/
private Node biDirectionalBFS(Synset source , Synset target)
{
if(source.getType() != target.getType())
{
return null;
}
/* Node for LCS */
Node LCS = new Node();
/* initialize the arraylists */
s_toBeExpanded = new ArrayList<Node>();
t_toBeExpanded = new ArrayList<Node>();
/* initialize the hashtable */
created = new Hashtable<Integer,Node>();
/*
* initialize the Nodes for source and target
*/
s_node.synset = source;
t_node.synset = target;
/* set their respective previous to themselves */
s_node.s_previous = s_node;
t_node.t_previous = t_node;
/* if source = target return node for source */
if(source.equals(target))
{
return s_node;
}
/* mark them */
s_node.s_marked = true;
t_node.t_marked = true;
/* enqueue source and target on the respective toBeExpanded queues */
s_toBeExpanded.add(s_node);
t_toBeExpanded.add(t_node);
/* add them to hashtable */
created.put(s_node.synset.hashCode() , s_node);
created.put(t_node.synset.hashCode() , t_node);
/* while any one of the toBeExpanded queues is not empty run the loop */
while(!s_toBeExpanded.isEmpty() || !t_toBeExpanded.isEmpty())
{
/*
* BFS from source
*/
if(!s_toBeExpanded.isEmpty())
{
Synset [] hypernyms = null;
/* remove the first element from the toBeExpanded queue */
Node u = s_toBeExpanded.remove(0);
/* check if source and target are of type NOUN or VERB */
if(source.getType() == SynsetType.NOUN)
{
/* get all the immediate parents of u */
NounSynset nounsyn = (NounSynset)(u.synset);
hypernyms = nounsyn.getHypernyms();
}
else if(source.getType() == SynsetType.VERB)
{
VerbSynset nounsyn = (VerbSynset)(u.synset);
hypernyms = nounsyn.getHypernyms();
}
/* for each parent of u */
for(Synset v : hypernyms)
{
/* check if hashtable already contains a node for v */
// if yes
if(created.containsKey(v.hashCode()))
{
/* get the node */
Node v_node = created.get(v.hashCode());
/* check if already marked */
if(v_node.s_marked)
continue;
else
{
/* LCS is the node for v */
v_node.s_previous = u;
v_node.s_marked = true;
LCS = v_node;
return LCS;
}
}
// if no
if(!created.containsKey(v.hashCode()))
{
/* create a node for v */
Node v_node = new Node();
v_node.synset = v;
v_node.s_previous = u;
v_node.s_marked = true;
/* add v_node to hashtable */
created.put(v_node.synset.hashCode(), v_node);
/* add node to toBeExpanded */
s_toBeExpanded.add(v_node);
}
}
}
/*
* BFS from target
*/
if(!t_toBeExpanded.isEmpty())
{
Synset [] hypernyms = null;
/* remove the first element from the toBeExpanded queue */
Node u = t_toBeExpanded.remove(0);
/* check if source and target are NOUN or VERB */
if(target.getType() == SynsetType.NOUN)
{
/* get all the immediate parents of u */
NounSynset nounsyn = (NounSynset)(u.synset);
hypernyms = nounsyn.getHypernyms();
}
else if(target.getType() == SynsetType.VERB)
{
VerbSynset nounsyn = (VerbSynset)(u.synset);
hypernyms = nounsyn.getHypernyms();
}
/* for each parent of u */
for(Synset v : hypernyms)
{
/* check is hashtable already contains a node for v */
// if yes
if(created.containsKey(v.hashCode()))
{
/* get the node */
Node v_node = created.get(v.hashCode());
/* check if already marked */
if(v_node.t_marked)
continue;
else
{
/* LCS is the node for v */
v_node.t_previous = u;
v_node.t_marked = true;
LCS = v_node;
return LCS;
}
}
// if no
if(!created.containsKey(v.hashCode()))
{
/* create a node for v */
Node v_node = new Node();
v_node.synset = v;
v_node.t_previous = u;
v_node.t_marked = true;
/* add v_node to hashtable */
created.put(v_node.synset.hashCode(), v_node);
/* add node to toBeExpanded */
t_toBeExpanded.add(v_node);
}
}
}
}// while ends
return null;
}
private ArrayList<Node> getPath(Node lcs)
{
ArrayList<Node> path = new ArrayList<Node>();
/* if source and target are same lcs is source node */
if(lcs.equals(s_node) && s_node.synset.equals(t_node.synset))
{
path.add(lcs);
return path;
}
Node n = lcs;
while(n != s_node)
{
path.add(0,n);
n = n.s_previous;
}
// add the source node
path.add(0,n);
// reset n
n = lcs.t_previous;
while(n != t_node)
{
path.add(n);
n = n.t_previous;
}
/*
* if lcs is the target then it was already added to the path
* if lcs is not the target we need to add it onto the path
*/
if(!lcs.synset.equals(t_node.synset))
path.add(n);
return path;
}
public double getPathSim(Synset source , Synset target)
{
// get the least common subsumer of the two synsets
Node lcs = biDirectionalBFS(source , target);
int dist_lcs_root = 0;
int dist_source_root = 0;
int dist_target_root = 0;
if(lcs != null)
{
// get the paths to the root
ArrayList<DistanceToRoot.Node> lcs_to_root = dist_to_root.getDistanceToRoot(lcs.synset);
ArrayList<DistanceToRoot.Node> source_to_root = dist_to_root.getDistanceToRoot(source);
ArrayList<DistanceToRoot.Node> target_to_root = dist_to_root.getDistanceToRoot(target);
// count the nodes in each
dist_lcs_root = lcs_to_root.size();
dist_source_root = source_to_root.size();
dist_target_root = target_to_root.size();
// return the Wu-Palmer semantic similarity measure
return (double)((2.0 * dist_lcs_root)/(dist_source_root + dist_target_root));
}
return -1;
}
private class Node
{
Node s_previous = null;
Node t_previous = null;
Synset synset = null;
boolean s_marked;
boolean t_marked;
}
/* TESTING */
public static void main(String [] args)
{
Lcs lcs = new Lcs();
BufferedReader br = null;
BufferedWriter bw = null;
double max_wu_palmer = 0;
double similarity;
if(path == null)
{
System.exit(0);
}
try
{
br = new BufferedReader(new FileReader(path));
bw = new BufferedWriter(new FileWriter(url.getFile().toString() + "/testWordsResult.txt"));
String text;
while((text = br.readLine()) != null)
{
StringTokenizer st = new StringTokenizer(text , " ");
while(st.hasMoreTokens())
{
String w1 = st.nextToken().toString();
String w2 = st.nextToken().toString();
double sim = Double.parseDouble(st.nextToken().toString());
Synset noun1[] = wordnet.getSynsets(w1, SynsetType.NOUN);
Synset noun2[] = wordnet.getSynsets(w2, SynsetType.NOUN);
Synset verb1[] = wordnet.getSynsets(w1, SynsetType.VERB);
Synset verb2[] = wordnet.getSynsets(w2, SynsetType.VERB);
for(Synset s1 : noun1)
for(Synset s2 : noun2)
{
similarity = lcs.getPathSim(s1,s2);
if (similarity > max_wu_palmer)
max_wu_palmer = similarity;
}
bw.write(w1);
bw.write(" ");
bw.write(w2);
bw.write(" ");
bw.write(Double.toString(sim));
bw.write(" ");
bw.write(Double.toString(max_wu_palmer));
bw.newLine();
}
max_wu_palmer = 0;
}
br.close();
bw.flush();
bw.close();
}catch(Exception e)
{
e.printStackTrace();
}
}
}