Package org.sf.mustru.utils

Source Code of org.sf.mustru.utils.WordnetTools

package org.sf.mustru.utils;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;
import org.sf.mustru.utils.Constants;

/**
* A collection of Wordnet tools. <br>
*
* 1. getSynonyms: Pass a word and fetch the associated synonyms
*
*/
public class WordnetTools
{
  private HashMap<String, String> adjSuffixes,   //*-- hash for adjective suffixes
        nounSuffixes,      //*-- hash for noun suffixes
        verbSuffixes;      //*-- hash for verb suffixes
  private Pattern adjSuffixPattern;      //*-- RE to remove an adjective suffix
  private Pattern nounSuffixPattern;      //*-- RE to remove a noun suffix
  private Pattern verbSuffixPattern;      //*-- RE to remove verb suffix
  private HashMap<String, String> synHyper;    //*-- hash to translate words to hypernyms
  static Logger logger = Logger.getLogger(WordnetTools.class.getName() );

  public WordnetTools()
  {    
   //*-- load the synonym hash
   synHyper = new HashMap<String, String> ();
   String synFile = Constants.DATADIR + File.separator + "wordSynonyms.lst"; String s;
   BufferedReader filein = null;
   try
   { filein = new BufferedReader(new FileReader(synFile));
     while ((s = filein.readLine()) != null)
      {  String[] etypes = s.split("!!!");  etypes[0] = etypes[0].trim(); etypes[2] = etypes[2].trim();
         synHyper.put(etypes[0] + "!!!" + etypes[1], etypes[2]); }
   }
   catch (FileNotFoundException fe) { logger.error("Could not find file wordSynonyms.lst " + fe.getMessage()); }
   catch (IOException ie) { logger.error("IO Error " + ie.getMessage()); }
   finally { try { if (filein != null) filein.close(); }
             catch (IOException ie) { logger.error("Ignored error"); }
   }
   //*-- load the suffix replacement hash with suffixes and replacements to find the stem words of word forms
   //*-- noun suffix removal
   nounSuffixes = new HashMap<String, String> ();
   nounSuffixes.put("ches","ch");
   nounSuffixes.put("shes","sh");
   nounSuffixes.put("ses","s");
   nounSuffixes.put("xes","s");
   nounSuffixes.put("zes","z");
   nounSuffixes.put("men","man");
   nounSuffixes.put("ies","y");
   nounSuffixes.put("s","");
   nounSuffixPattern = getPattern(nounSuffixes);

   //*-- verb suffix removal, replacement strings ending with : represent 2 possiblities
   verbSuffixes = new HashMap<String, String>();
   verbSuffixes.put("ies","y");
   verbSuffixes.put("ing","e:");
   verbSuffixes.put("est","e:");
   verbSuffixes.put("es","e:");
   verbSuffixes.put("ed","e:");
   verbSuffixes.put("er","e:");
   verbSuffixes.put("s","");
   verbSuffixPattern = getPattern(verbSuffixes);

   //*-- adjective suffix removal
   adjSuffixes = new HashMap<String, String>();
   adjSuffixes.put("est","e:");
   adjSuffixes.put("er","e:");
   adjSuffixPattern = getPattern(adjSuffixes);
  }

  /**
   * return the synonyms for an entity type
   * @param word
   * @return String containing synonyms
   */
  public String getSynonyms(String word)
  { return getSynonyms(word, "n"); }

  /**
   * Return the synonyms for a given word
   * @param word The word for which synonyms must be located
   * @param pos The part of speech of the word
   * @return String containing synonyms
   */
  public String getSynonyms(String word, String pos)
  {
   //*-- skip short words
   word = word.trim(); word = word.toLowerCase(Constants.locale); word = word.replace(' ', '_');
   String synonyms = synHyper.get(word + "!!!" + pos);
   if (synonyms != null) return(synonyms);

   //*-- try removing a suffix
   Matcher matcher = null;
   if (pos.equals("n")) matcher = nounSuffixPattern.matcher(word);
   else if (pos.equals("v")) matcher = verbSuffixPattern.matcher(word);
   else if (pos.equals("a")) matcher = adjSuffixPattern.matcher(word);
   if (matcher == null) return("");

   //*-- if we found a matching suffix
   String[] words = {"", ""};
   if (matcher.matches())
   {  String root = matcher.group(1); String suffix = matcher.group(2);
      String replacement = (pos.equals("n")) ? (String) nounSuffixes.get(suffix):
                            (pos.equals("v")) ? (String) verbSuffixes.get(suffix): (String) adjSuffixes.get(suffix);

      //*-- create upto two possible replacements suffixes, a suffix ending with a ':' has two possibilities
      if (replacement.endsWith(":"))
       { String[] replStrings = replacement.split(":");
         words[0] = matcher.replaceAll(root + replStrings[0]);
         words[1] = matcher.replaceAll(root);
       }
      else words[0] = matcher.replaceAll(root + replacement);

      //*-- try the longer suffix first
      synonyms = synHyper.get(words[0]); if (synonyms != null) return(synonyms);
      synonyms = synHyper.get(words[1]); if (synonyms != null) return(synonyms);
     
   } //*-- end of if matcher....

   return("");
  }

  //*-- return a RE pattern based on the hash map passed
  private Pattern getPattern(HashMap<String, String> suffixes)
  {
   String[] suffixKeys = new String[suffixes.size()];
   suffixes.keySet().toArray(suffixKeys);
   StringBuffer suffixREBuffer = new StringBuffer();
   for (int i = 0; i < suffixKeys.length; i++)
     { suffixREBuffer.append(suffixKeys[i]); suffixREBuffer.append("|"); }
   String suffixRE = suffixREBuffer.toString(); suffixRE = suffixRE.trim();
   if (suffixRE.endsWith("|")) suffixRE = suffixRE.substring(0, suffixRE.length() - 1);
  
   return ( Pattern.compile("^(.*?)(" + suffixRE + ")$") );     
  }

}
TOP

Related Classes of org.sf.mustru.utils.WordnetTools

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.