Package

Source Code of Lcs

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.net.URL;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.Map;
import java.util.StringTokenizer;

import edu.smu.tspell.wordnet.NounSynset;
import edu.smu.tspell.wordnet.Synset;
import edu.smu.tspell.wordnet.SynsetType;
import edu.smu.tspell.wordnet.VerbSynset;
import edu.smu.tspell.wordnet.WordNetDatabase;

/**
* @author Abhijeet and Brian Magerko
*
*
*/

/*
* this class finds the least common subsumer of two synset's of the same Synset.TYPE
* implements a bi-directional BFS from the source and target and checks for the first node where they meet
* the BFS travels up the tree so only hypernym relations are considered to get the parents
* IT IS GUARANTEED TO CONVERGE
*/


public class Lcs {
 
  ArrayList<Node> s_toBeExpanded;// nodes in BFS of source yet to be expanded
  ArrayList<Node> t_toBeExpanded;// nodes in BFS of target yet to be expanded
  Map<Integer,Node> created;
  //create Nodes for source and target
  Node s_node = new Node();
  Node t_node = new Node();
 
  // testing purpose
  ClassLoader  cl;
  static URL url;
  static String path = null;
   
  DistanceToRoot dist_to_root = new DistanceToRoot();
 
  /* Remove this instance of wordnet after testing */
  public static WordNetDatabase wordnet;
   
  public Lcs ()
  {
    System.setProperty("wordnet.database.dir", "c:\\WordNet\\2.1\\dict\\");
    wordnet = WordNetDatabase.getFileInstance();
    cl = getClass().getClassLoader();
    url = cl.getResource("Tags");
    path = (url.getFile().toString() + "/testWords.txt");
  }
 
  /*
   * can only find the path is source and target have the same Synset type i.e.
   * either both are NOUNS or both VERBS
   */
  private Node biDirectionalBFS(Synset source , Synset target)
  {
    if(source.getType() != target.getType())
    {
      return null;
    }
   
    /* Node for LCS */
    Node LCS = new Node();
   
       
    /* initialize the arraylists */
    s_toBeExpanded = new ArrayList<Node>();
    t_toBeExpanded = new ArrayList<Node>();
   
    /* initialize the hashtable */
    created = new Hashtable<Integer,Node>();
   
    /*
     * initialize the Nodes for source and target
     */
    s_node.synset = source;
    t_node.synset = target;
   
    /* set their respective previous to themselves */
    s_node.s_previous = s_node;
    t_node.t_previous = t_node;
   
    /* if source = target return node for source */
    if(source.equals(target))
    {
      return s_node;
    }
   
    /* mark them */
      s_node.s_marked = true;
      t_node.t_marked = true;
   
    /* enqueue source and target on the respective toBeExpanded queues */
    s_toBeExpanded.add(s_node);
    t_toBeExpanded.add(t_node);
   
    /* add them to hashtable */
    created.put(s_node.synset.hashCode() , s_node);
    created.put(t_node.synset.hashCode() , t_node);
   
       
    /* while any one of the toBeExpanded queues is not empty run the loop */
    while(!s_toBeExpanded.isEmpty() || !t_toBeExpanded.isEmpty())
    {
     
      /*
       * BFS from source
       */
      if(!s_toBeExpanded.isEmpty())
      {
        Synset [] hypernyms = null;
        /* remove the first element from the toBeExpanded queue */
        Node u = s_toBeExpanded.remove(0);
       
        /* check if source and target are of type NOUN or VERB */
        if(source.getType() == SynsetType.NOUN)
        {
          /* get all the immediate parents of u */
          NounSynset nounsyn = (NounSynset)(u.synset);
          hypernyms = nounsyn.getHypernyms();
        }
        else if(source.getType() == SynsetType.VERB)
        {
          VerbSynset nounsyn = (VerbSynset)(u.synset);
          hypernyms = nounsyn.getHypernyms();
         
        }
         
          /* for each parent of u */
          for(Synset v : hypernyms)
          {
              /* check if hashtable already contains a node for v */
             
              // if yes
              if(created.containsKey(v.hashCode()))
              {
                /* get the node */
                Node v_node =  created.get(v.hashCode());
               
                /* check if already marked */
                if(v_node.s_marked)
                  continue;
                else
                {
                  /* LCS is the node for v */
                  v_node.s_previous = u;
                  v_node.s_marked = true;
                  LCS = v_node;
                  return LCS;
                }
               
              }
             
              // if no
              if(!created.containsKey(v.hashCode()))
              {
                /* create a node for v */
                Node v_node = new Node();
                v_node.synset = v;
                v_node.s_previous = u;
                v_node.s_marked = true;
                /* add v_node to hashtable */
                created.put(v_node.synset.hashCode(), v_node);
                /* add node to toBeExpanded */
                s_toBeExpanded.add(v_node);
              }
                 
           
           }
        }
       
             
      /*
       * BFS from target
       */
      if(!t_toBeExpanded.isEmpty())
      {
     
        Synset [] hypernyms = null;
        /* remove the first element from the toBeExpanded queue */
        Node u = t_toBeExpanded.remove(0);
       
               
        /* check if source and target are NOUN or VERB */
        if(target.getType() == SynsetType.NOUN)
        {
          /* get all the immediate parents of u */
          NounSynset nounsyn = (NounSynset)(u.synset);
          hypernyms = nounsyn.getHypernyms();
        }
        else if(target.getType() == SynsetType.VERB)
        {
          VerbSynset nounsyn = (VerbSynset)(u.synset);
          hypernyms = nounsyn.getHypernyms();
         
        }
         
          /* for each parent of u */
          for(Synset v : hypernyms)
          {
              /* check is hashtable already contains a node for v */
             
              // if yes
              if(created.containsKey(v.hashCode()))
              {
                /* get the node */
                Node v_node =  created.get(v.hashCode());
               
                /* check if already marked */
                if(v_node.t_marked)
                  continue;
               
                else
                {
                  /* LCS is the node for v */
                  v_node.t_previous = u;
                  v_node.t_marked = true;
                  LCS = v_node;
                  return LCS;
                }
                             
              }
             
              // if no
              if(!created.containsKey(v.hashCode()))
              {
                /* create a node for v */
                Node v_node = new Node();
                v_node.synset = v;
                v_node.t_previous = u;
                v_node.t_marked = true;
                /* add v_node to hashtable */
                created.put(v_node.synset.hashCode(), v_node);
                /* add node to toBeExpanded */
                t_toBeExpanded.add(v_node);
              }
                 
           
           }
      }
               
    }// while ends
   
    return null;
   
  }
 
  private ArrayList<Node> getPath(Node lcs)
  {
    ArrayList<Node> path = new ArrayList<Node>();
   
    /* if source and target are same lcs is source node */
    if(lcs.equals(s_node) && s_node.synset.equals(t_node.synset))
    {
      path.add(lcs);
      return path;
    }
   
    Node n = lcs;
    while(n != s_node)
    {
      path.add(0,n);
      n = n.s_previous;
    }
    // add the source node
    path.add(0,n);
       
    // reset n
    n = lcs.t_previous;
    while(n != t_node)
    {
      path.add(n);
      n = n.t_previous;
    }
   
    /*
     * if lcs is the target then it was already added to the path
     * if lcs is not the target we need to add it onto the path
     */
    if(!lcs.synset.equals(t_node.synset))
        path.add(n);
   
    return path;
  }
 
  public double getPathSim(Synset source , Synset target)
  {
    // get the least common subsumer of the two synsets
    Node lcs = biDirectionalBFS(source , target);
    int dist_lcs_root = 0;
    int dist_source_root = 0;
    int dist_target_root = 0;
     
    if(lcs != null)
    {
     
      // get the paths to the root
      ArrayList<DistanceToRoot.Node> lcs_to_root = dist_to_root.getDistanceToRoot(lcs.synset);
      ArrayList<DistanceToRoot.Node> source_to_root = dist_to_root.getDistanceToRoot(source);
      ArrayList<DistanceToRoot.Node> target_to_root = dist_to_root.getDistanceToRoot(target);
     
      // count the nodes in each
      dist_lcs_root = lcs_to_root.size();
      dist_source_root = source_to_root.size();
      dist_target_root = target_to_root.size();
     
      // return the Wu-Palmer semantic similarity measure
      return (double)((2.0 * dist_lcs_root)/(dist_source_root + dist_target_root));
    }
    return -1;
  }
 
  private class Node
  {
    Node s_previous = null;
    Node t_previous = null;
    Synset synset = null;
    boolean s_marked;
    boolean t_marked;
   
  }
 
  /* TESTING */
  public static void main(String [] args)
  {
    Lcs lcs = new Lcs();
    BufferedReader br = null;
    BufferedWriter bw = null;
    double max_wu_palmer = 0;
    double similarity;
   
    if(path == null)
    {
      System.exit(0);
    }
   
    try
    {
      br = new BufferedReader(new FileReader(path));
      bw = new BufferedWriter(new FileWriter(url.getFile().toString() + "/testWordsResult.txt"));
      String text;
     
      while((text = br.readLine()) != null)
      {
        StringTokenizer st = new StringTokenizer(text , "   ");
       
        while(st.hasMoreTokens())
        {
          String w1 = st.nextToken().toString();
          String w2 = st.nextToken().toString();
          double sim = Double.parseDouble(st.nextToken().toString());
         
          Synset noun1[] = wordnet.getSynsets(w1, SynsetType.NOUN);
          Synset noun2[] = wordnet.getSynsets(w2, SynsetType.NOUN);
         
          Synset verb1[] = wordnet.getSynsets(w1, SynsetType.VERB);
          Synset verb2[] = wordnet.getSynsets(w2, SynsetType.VERB);
         
          for(Synset s1 : noun1)
            for(Synset s2 : noun2)
            {
              similarity = lcs.getPathSim(s1,s2);
              if (similarity > max_wu_palmer)
                max_wu_palmer = similarity;
                             
            }
         
          bw.write(w1);
          bw.write("    ");
          bw.write(w2);
          bw.write("    ");
          bw.write(Double.toString(sim));
          bw.write("    ");
          bw.write(Double.toString(max_wu_palmer));
          bw.newLine();
        }
        max_wu_palmer = 0;
      }
      br.close();
      bw.flush();
      bw.close();
    }catch(Exception e)
    {
      e.printStackTrace();
    }
     
  }

}
TOP

Related Classes of Lcs

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.