package org.sf.mustru.search;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.Properties;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.sf.mustru.docs.IndexableDoc;
import org.sf.mustru.utils.Constants;
import org.sf.mustru.utils.DbTools;
import org.sf.mustru.utils.StandardBgramAnalyzer;
import org.sf.mustru.utils.StringTools;
import com.aliasi.spell.CompiledSpellChecker;
import com.sleepycat.je.DatabaseEntry;
/**
* Run queries against the index and check the results
*/
public class SearchQuery
{
private Logger logger = null;
public String[] results = new String[220];
private Searcher is = null; //*-- Lucene index searcher
private Query query = null; //*-- Lucene query
private static CompiledSpellChecker sc = null; //*-- Lingpipe spell checker
private String alt = ""; //*-- alternative query string
private StandardBgramAnalyzer bgramAnalyzer = null; //*-- Bigram analyzer
public int resultsSize = 0;
public SearchQuery()
{ PropertyConfigurator.configure (Constants.LOG4J_FILE);
logger = Logger.getLogger(SearchQuery.class.getName());
//*-- get the index searcher
try { is = SearchTools.getSearcher(Constants.getINDEXDIR(), false); bgramAnalyzer = new StandardBgramAnalyzer(); }
catch (IOException ie) { logger.error("Problem with opening the Lucene index directory " + ie.getMessage()); }
//*-- read the spell checker model
try { readModel(Constants.SPELL_CHECK_MODEL); }
catch (IOException ie) { logger.error("Could not read the spell checker file " + ie.getMessage()); }
catch (ClassNotFoundException ce) { logger.error("Class error: " + ce.getMessage()); }
}
/**
* Submit a query and return the hits object. If the query cannot be parsed, then remove extraneous
* characters and re-submit the query to the parser.
* @param str String containing the query
* @return Hits object
*/
public Hits getHits(String str)
{ return getHits(str, false); }
public Hits getHits(String str, boolean filter)
{
Hits hits = null; bgramAnalyzer.setExtractEntities(false);
try
{
QueryParser qp = new QueryParser("contents", bgramAnalyzer );
if (filter) str = str.replaceAll("[^a-zA-Z0-9\"']", " ");
query = qp.parse(str);
logger.info("Query " + str + " is parsed to -->" + query + "<---");
hits = is.search(query);
setAlt( (sc != null) ? sc.didYouMean(query.toString("contents")): "");
}
catch (IOException ie) { logger.error("Problem with reading the Lucene index directory " + ie.getMessage()); }
catch (ParseException pe) { logger.error("Could not parse query " + pe.getMessage());
return((filter) ? hits: getHits(str, true)); }
return(hits);
}
public void dumpHits(Hits hits, boolean explain)
{
logger.info(hits.length() + " hits for query "); int j = 0;
try
{
//*-- read the document types file to find the classes for different document types
Properties dprops = null;
try { dprops = new Properties(); dprops.load(new FileInputStream(Constants.DOCTYPES_FILE)); }
catch (IOException e) { System.out.println("Could not read " + Constants.DOCTYPES_FILE + " " + e.getMessage()); }
//*-- Create the Berkeley DB environment and fetch the matching documents
DbTools dbt = Constants.getDbt();
if (hits.length() == 0) results[j++] = " Try " + getAlt();
LOOP: for (int i = 0; i < hits.length(); i++)
{
//*-- use the key to fetch the matching database entry
Document doc = hits.doc(i);
String key = doc.get("key");
String ftype = doc.get("type");
DatabaseEntry data = new DatabaseEntry();
String dbname = (ftype.equalsIgnoreCase("email")) ? Constants.EXT_MESSAGES_DB: Constants.EXT_FILES_DB;
dbt.openDB(dbname, true, false); //*-- no create and no dups
dbt.fetch(key, data);
dbt.closeDB();
//*-- use the type of the document to create a doc instance of the specified type
String docClass = dprops.getProperty(ftype);
if ( (docClass == null) || (docClass.equals("")) ) docClass = "org.sf.mustru.docs.TextDoc";
Class docType = Class.forName(docClass);
results[j++] = " Hit: " + i + ". Type: " + docClass + " Key: " + key + " Score: " + hits.score(i);
IndexableDoc idoc = (IndexableDoc) docType.newInstance();
idoc = (IndexableDoc) idoc.getBdbBinding().entryToObject(data);
String contents = idoc.getContents().toString(); contents = StringTools.filterChars(contents);
contents = StringTools.fillin(contents, 800, '.');
results[j++] = " File Name: " + idoc.getFileName();
results[j++] = " File Location: " + idoc.getFileLocation();
results[j++] = " File Type: " + idoc.getFileType();
Method method = idoc.getClass().getMethod("getTextType", new Class[] {} ); method.setAccessible(true);
results[j++] = " Text type: " + (String) method.invoke(idoc, new Object[] {} );
results[j++] = " File Size: " + idoc.getFileLength();
results[j++] = " File Rank: " + idoc.getFileRank();
results[j++] = " Title: " + idoc.getTitle();
results[j++] = " Author: " + idoc.getAuthor();
results[j++] = " Language: " + idoc.getLanguage();
j = dumpResults(contents, j);
results[j++] = "";
if (explain)
{ Explanation exp = is.explain(query, hits.id(i));
results[j++] = " Explanation: " + exp; }
if (i >= 9) break LOOP;
} //*-- end of for
resultsSize = j;
} //*-- end of try
catch (IOException ie)
{ logger.error("IO Error: " + ie.getMessage()); }
catch (ClassNotFoundException ce)
{ logger.error("Class not found Error: " + ce.getMessage()); }
catch (IllegalAccessException ie)
{ logger.error("Illegal Access Error: " + ie.getMessage()); }
catch (InstantiationException ie)
{ logger.error("Could instantiate object: " + ie.getMessage()); }
catch (NoSuchMethodException ne)
{ logger.error("No such method error: " + ne.getMessage() ); }
catch (InvocationTargetException te)
{ logger.error("Invocation failed: " + te.getMessage()); }
}
private int dumpResults(String line, int j)
{
int ind = j; int LINE_LENGTH = 80;
int lineLen = line.length();
if (lineLen < LINE_LENGTH) results[ind++] = " Contents: " + line;
else
{ int start = 0; int end = start + LINE_LENGTH;
while (end < lineLen)
{ String header = (start == 0) ? " Contents: ": " ";
results[ind++] = header + line.substring(start, end) + "-";
start = end; end = start + LINE_LENGTH; }
results[ind++] = " " + line.substring(start, lineLen);
}
return(ind);
}
private static void readModel(String filename) throws ClassNotFoundException, IOException
{
//*-- create object input stream from file
if (new File(filename).exists())
{
BufferedInputStream bufIn = new BufferedInputStream(new FileInputStream(new File(filename)));
ObjectInputStream objIn = new ObjectInputStream(bufIn);
//*-- read the spell checker
sc = (CompiledSpellChecker) objIn.readObject();
//*-- close the resources and return result
objIn.close(); bufIn.close();
}
}
public String[] getResults()
{ String[] truncResults = new String[resultsSize];
for (int i = 0; i < resultsSize; i++) truncResults[i] = results[i];
return truncResults; }
public int getResultsSize()
{ return resultsSize; }
public void setAlt(String alt)
{ this.alt = alt; }
public String getAlt()
{ return (alt); }
}