package org.sf.mustru.utils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.sf.mustru.utils.Constants;
/**
* A collection of Wordnet tools. <br>
*
* 1. getSynonyms: Pass a word and fetch the associated synonyms
*
*/
public class WordnetTools
{
private HashMap<String, String> adjSuffixes, //*-- hash for adjective suffixes
nounSuffixes, //*-- hash for noun suffixes
verbSuffixes; //*-- hash for verb suffixes
private Pattern adjSuffixPattern; //*-- RE to remove an adjective suffix
private Pattern nounSuffixPattern; //*-- RE to remove a noun suffix
private Pattern verbSuffixPattern; //*-- RE to remove verb suffix
private HashMap<String, String> synHyper; //*-- hash to translate words to hypernyms
static Logger logger = Logger.getLogger(WordnetTools.class.getName() );
public WordnetTools()
{
//*-- load the synonym hash
synHyper = new HashMap<String, String> ();
String synFile = Constants.DATADIR + File.separator + "wordSynonyms.lst"; String s;
BufferedReader filein = null;
try
{ filein = new BufferedReader(new FileReader(synFile));
while ((s = filein.readLine()) != null)
{ String[] etypes = s.split("!!!"); etypes[0] = etypes[0].trim(); etypes[2] = etypes[2].trim();
synHyper.put(etypes[0] + "!!!" + etypes[1], etypes[2]); }
}
catch (FileNotFoundException fe) { logger.error("Could not find file wordSynonyms.lst " + fe.getMessage()); }
catch (IOException ie) { logger.error("IO Error " + ie.getMessage()); }
finally { try { if (filein != null) filein.close(); }
catch (IOException ie) { logger.error("Ignored error"); }
}
//*-- load the suffix replacement hash with suffixes and replacements to find the stem words of word forms
//*-- noun suffix removal
nounSuffixes = new HashMap<String, String> ();
nounSuffixes.put("ches","ch");
nounSuffixes.put("shes","sh");
nounSuffixes.put("ses","s");
nounSuffixes.put("xes","s");
nounSuffixes.put("zes","z");
nounSuffixes.put("men","man");
nounSuffixes.put("ies","y");
nounSuffixes.put("s","");
nounSuffixPattern = getPattern(nounSuffixes);
//*-- verb suffix removal, replacement strings ending with : represent 2 possiblities
verbSuffixes = new HashMap<String, String>();
verbSuffixes.put("ies","y");
verbSuffixes.put("ing","e:");
verbSuffixes.put("est","e:");
verbSuffixes.put("es","e:");
verbSuffixes.put("ed","e:");
verbSuffixes.put("er","e:");
verbSuffixes.put("s","");
verbSuffixPattern = getPattern(verbSuffixes);
//*-- adjective suffix removal
adjSuffixes = new HashMap<String, String>();
adjSuffixes.put("est","e:");
adjSuffixes.put("er","e:");
adjSuffixPattern = getPattern(adjSuffixes);
}
/**
* return the synonyms for an entity type
* @param word
* @return String containing synonyms
*/
public String getSynonyms(String word)
{ return getSynonyms(word, "n"); }
/**
* Return the synonyms for a given word
* @param word The word for which synonyms must be located
* @param pos The part of speech of the word
* @return String containing synonyms
*/
public String getSynonyms(String word, String pos)
{
//*-- skip short words
word = word.trim(); word = word.toLowerCase(Constants.locale); word = word.replace(' ', '_');
String synonyms = synHyper.get(word + "!!!" + pos);
if (synonyms != null) return(synonyms);
//*-- try removing a suffix
Matcher matcher = null;
if (pos.equals("n")) matcher = nounSuffixPattern.matcher(word);
else if (pos.equals("v")) matcher = verbSuffixPattern.matcher(word);
else if (pos.equals("a")) matcher = adjSuffixPattern.matcher(word);
if (matcher == null) return("");
//*-- if we found a matching suffix
String[] words = {"", ""};
if (matcher.matches())
{ String root = matcher.group(1); String suffix = matcher.group(2);
String replacement = (pos.equals("n")) ? (String) nounSuffixes.get(suffix):
(pos.equals("v")) ? (String) verbSuffixes.get(suffix): (String) adjSuffixes.get(suffix);
//*-- create upto two possible replacements suffixes, a suffix ending with a ':' has two possibilities
if (replacement.endsWith(":"))
{ String[] replStrings = replacement.split(":");
words[0] = matcher.replaceAll(root + replStrings[0]);
words[1] = matcher.replaceAll(root);
}
else words[0] = matcher.replaceAll(root + replacement);
//*-- try the longer suffix first
synonyms = synHyper.get(words[0]); if (synonyms != null) return(synonyms);
synonyms = synHyper.get(words[1]); if (synonyms != null) return(synonyms);
} //*-- end of if matcher....
return("");
}
//*-- return a RE pattern based on the hash map passed
private Pattern getPattern(HashMap<String, String> suffixes)
{
String[] suffixKeys = new String[suffixes.size()];
suffixes.keySet().toArray(suffixKeys);
StringBuffer suffixREBuffer = new StringBuffer();
for (int i = 0; i < suffixKeys.length; i++)
{ suffixREBuffer.append(suffixKeys[i]); suffixREBuffer.append("|"); }
String suffixRE = suffixREBuffer.toString(); suffixRE = suffixRE.trim();
if (suffixRE.endsWith("|")) suffixRE = suffixRE.substring(0, suffixRE.length() - 1);
return ( Pattern.compile("^(.*?)(" + suffixRE + ")$") );
}
}