Package org.sf.mustru.search

Source Code of org.sf.mustru.search.SearchQuery

package org.sf.mustru.search;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.Properties;

import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;

import org.sf.mustru.docs.IndexableDoc;
import org.sf.mustru.utils.Constants;
import org.sf.mustru.utils.DbTools;
import org.sf.mustru.utils.StandardBgramAnalyzer;
import org.sf.mustru.utils.StringTools;

import com.aliasi.spell.CompiledSpellChecker;
import com.sleepycat.je.DatabaseEntry;

/**
* Run queries against the index and check the results
*/

public class SearchQuery
{
private Logger logger = null
public String[] results = new String[220];
private Searcher is = null;        //*-- Lucene index searcher
private Query query = null;        //*-- Lucene query
private static CompiledSpellChecker sc = null//*-- Lingpipe spell checker
private String alt = "";        //*-- alternative query string
private StandardBgramAnalyzer bgramAnalyzer = null//*-- Bigram analyzer
public int resultsSize = 0;

public SearchQuery()
{ PropertyConfigurator.configure (Constants.LOG4J_FILE);
   logger = Logger.getLogger(SearchQuery.class.getName());
  
   //*-- get the index searcher
   try { is = SearchTools.getSearcher(Constants.getINDEXDIR(), false); bgramAnalyzer = new StandardBgramAnalyzer()}
   catch (IOException ie) { logger.error("Problem with opening the Lucene index directory " + ie.getMessage()); }
    
   //*-- read the spell checker model
   try { readModel(Constants.SPELL_CHECK_MODEL); }
   catch (IOException ie) { logger.error("Could not read the spell checker file " + ie.getMessage()); }
   catch (ClassNotFoundException ce) { logger.error("Class error: " + ce.getMessage()); }
}

/**
  * Submit a query and return the hits object. If the query cannot be parsed, then remove extraneous
  * characters and re-submit the query to the parser.
  * @param str String containing the query
  * @return Hits object
  */
public Hits getHits(String str)
{ return getHits(str, false); }
public Hits getHits(String str, boolean filter)
{
  Hits hits = null; bgramAnalyzer.setExtractEntities(false);
  try
  {
   QueryParser qp = new QueryParser("contents", bgramAnalyzer );
   if (filter) str = str.replaceAll("[^a-zA-Z0-9\"']", " ");
   query = qp.parse(str);
   logger.info("Query " + str + " is parsed to -->" + query + "<---");
   hits = is.search(query);
   setAlt( (sc != null) ? sc.didYouMean(query.toString("contents")): "");
  }
  catch (IOException ie) { logger.error("Problem with reading the Lucene index directory " + ie.getMessage()); }
  catch (ParseException pe) { logger.error("Could not parse query " + pe.getMessage());
                               return((filter) ? hits: getHits(str, true)); }
  return(hits);
}
public void dumpHits(Hits hits, boolean explain
{
  logger.info(hits.length() + " hits for query "); int j = 0;
  try
  {
   //*-- read the document types file to find the classes for different document types
   Properties dprops = null;
   try { dprops = new Properties(); dprops.load(new FileInputStream(Constants.DOCTYPES_FILE)); }
   catch (IOException e)  { System.out.println("Could not read " + Constants.DOCTYPES_FILE + " " + e.getMessage()); }

   //*-- Create the Berkeley DB environment and fetch the matching documents
   DbTools dbt = Constants.getDbt();
   if (hits.length() == 0results[j++] = " Try " + getAlt();
   LOOP: for (int i = 0; i < hits.length(); i++)
   {
    //*-- use the key to fetch the matching database entry
    Document doc = hits.doc(i);
    String key = doc.get("key");
    String ftype = doc.get("type");
    DatabaseEntry data = new DatabaseEntry();

    String dbname = (ftype.equalsIgnoreCase("email")) ? Constants.EXT_MESSAGES_DB: Constants.EXT_FILES_DB;
    dbt.openDB(dbname, true, false); //*-- no create and no dups            
    dbt.fetch(key, data);
    dbt.closeDB();

    //*-- use the type of the document to create a doc instance of the specified type
    String docClass = dprops.getProperty(ftype);
    if ( (docClass == null) || (docClass.equals("")) ) docClass = "org.sf.mustru.docs.TextDoc";
    Class docType = Class.forName(docClass);
    results[j++] = " Hit: " + i + ". Type: " + docClass + " Key: " + key + " Score: " + hits.score(i);

    IndexableDoc idoc =  (IndexableDoc) docType.newInstance();
    idoc = (IndexableDoc) idoc.getBdbBinding().entryToObject(data);     
    String contents = idoc.getContents().toString(); contents = StringTools.filterChars(contents);
    contents = StringTools.fillin(contents, 800, '.');
    results[j++] = "  File Name: " + idoc.getFileName();
    results[j++] = "  File Location: " + idoc.getFileLocation();
    results[j++] = "  File Type: " + idoc.getFileType();
    Method method = idoc.getClass().getMethod("getTextType", new Class[] {}  ); method.setAccessible(true);
    results[j++] = "  Text type: " + (String) method.invoke(idoc, new Object[] {} );
    results[j++] = "  File Size: " + idoc.getFileLength();
    results[j++] = "  File Rank: " + idoc.getFileRank();
    results[j++] = "  Title: " + idoc.getTitle();
    results[j++] = "  Author: " + idoc.getAuthor();
    results[j++] = "  Language: " + idoc.getLanguage();
    j = dumpResults(contents, j);
    results[j++] = "";
    if (explain
    { Explanation exp = is.explain(query, hits.id(i));
      results[j++] = "  Explanation: " + exp; }
   
    if (i >= 9) break LOOP;

   } //*-- end of for
  
   resultsSize = j;
  } //*-- end of try

  catch (IOException ie)
  { logger.error("IO Error: " + ie.getMessage()); }
  catch (ClassNotFoundException ce)
  { logger.error("Class not found Error: " + ce.getMessage()); }
  catch (IllegalAccessException ie)
  { logger.error("Illegal Access Error: " + ie.getMessage()); }
  catch (InstantiationException ie)
  { logger.error("Could instantiate object: " + ie.getMessage()); }
  catch (NoSuchMethodException ne)
  { logger.error("No such method error: " + ne.getMessage() ); }
  catch (InvocationTargetException te)
  { logger.error("Invocation failed: " + te.getMessage()); }

}

private int dumpResults(String line, int j)
{
  int ind = j; int LINE_LENGTH = 80;
  int lineLen = line.length();
  if (lineLen < LINE_LENGTH) results[ind++] = "  Contents: " + line;
  else
  { int start = 0; int end = start + LINE_LENGTH;
  while (end < lineLen)
  { String header = (start == 0) ? "  Contents: ": "            ";
    results[ind++] =  header + line.substring(start, end) + "-";
    start = end; end = start + LINE_LENGTH; }
    results[ind++] = "            " + line.substring(start, lineLen);    
  }
  return(ind);
}

private static void readModel(String filename) throws ClassNotFoundException, IOException
  {
   //*-- create object input stream from file
   if (new File(filename).exists())
   {
    BufferedInputStream bufIn = new BufferedInputStream(new FileInputStream(new File(filename)));
    ObjectInputStream objIn = new ObjectInputStream(bufIn);

    //*-- read the spell checker
    sc = (CompiledSpellChecker) objIn.readObject();

    //*-- close the resources and return result
    objIn.close(); bufIn.close();
   }
}

public String[] getResults()
{ String[] truncResults = new String[resultsSize];
for (int i = 0; i < resultsSize; i++) truncResults[i] = results[i];
return truncResults; }

public int getResultsSize()
{ return resultsSize; }

public void setAlt(String alt)
{ this.alt = alt; }
public String getAlt()
{ return (alt); }
}
TOP

Related Classes of org.sf.mustru.search.SearchQuery

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.