package com.knowgate.lucene;
import java.util.Date;
import java.util.Arrays;
import java.util.Comparator;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.RangeQuery;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.DateTools.Resolution;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import com.knowgate.debug.DebugFile;
import com.knowgate.misc.Gadgets;
* Search into a Lucene full text index for news messages
* @author Sergio Montoro Ten
* @version 4.0
public class NewsMessageSearcher {
public NewsMessageSearcher() {
* Compose a Lucene query based on given parameters
* @param sLuceneIndexPath String Base path for Lucene indexes excluding WorkArea and table name
* @param sWorkArea String GUID of WorkArea to be searched, cannot be null
* @param sGroup String GUID of NewsGroup to which message belongs
* @param sAuthor String
* @param sTitle String
* @param sText String
* @param iLimit int
* @param oSortBy Comparator
* @return BugRecord[]
* @throws ParseException
* @throws IOException
* @throws NullPointerException
public static NewsMessageRecord[] search (String sLuceneIndexPath,
String sWorkArea, String sNewsGroupCategoryName,
String sAuthor, String sTitle,
Date dtFromDate, Date dtToDate,
String sText, int iLimit,
Comparator oSortBy)
throws ParseException, IOException, NullPointerException {
if (null==sLuceneIndexPath)
throw new NullPointerException("NewsMessageSearcher.search() luceneindex parameter cannot be null");
if (null==sWorkArea)
throw new NullPointerException("NewsMessageSearcher.search() workarea parameter cannot be null");
if (DebugFile.trace) {
DebugFile.writeln("Begin NewsMessageSearcher.search("+sLuceneIndexPath+","+
NewsMessageRecord[] aRetArr;
BooleanQuery oQrx = new BooleanQuery();
oQrx.add(new TermQuery(new Term("workarea",sWorkArea)),BooleanClause.Occur.MUST);
if (null!=sNewsGroupCategoryName)
oQrx.add(new TermQuery(new Term("container",sNewsGroupCategoryName)),BooleanClause.Occur.MUST);
if (dtFromDate!=null && dtToDate!=null)
oQrx.add(new RangeQuery(new Term("created",DateTools.dateToString(dtFromDate, DateTools.Resolution.DAY)),
new Term("created",DateTools.dateToString(dtToDate, DateTools.Resolution.DAY)), true), BooleanClause.Occur.MUST);
else if (dtFromDate!=null)
oQrx.add(new RangeQuery(new Term("created",DateTools.dateToString(dtFromDate, DateTools.Resolution.DAY)),
new Term("created",DateTools.dateToString(new Date(299,11,31), DateTools.Resolution.DAY)), true), BooleanClause.Occur.MUST);
else if (dtToDate!=null)
oQrx.add(new RangeQuery(new Term("created",DateTools.dateToString(new Date(79,11,31), DateTools.Resolution.DAY)),
new Term("created",DateTools.dateToString(dtToDate, DateTools.Resolution.DAY)), true), BooleanClause.Occur.MUST);
BooleanQuery oQry = new BooleanQuery();
if (null!=sAuthor)
oQry.add(new TermQuery(new Term("author",sAuthor)),BooleanClause.Occur.SHOULD);
if (null!=sTitle)
oQry.add(new TermQuery(new Term("title",sTitle)),BooleanClause.Occur.SHOULD);
if (null!=sText)
oQry.add(new TermQuery(new Term("text",sText)),BooleanClause.Occur.SHOULD);
oQrx.add(oQry, BooleanClause.Occur.MUST);
String sSegments = Gadgets.chomp(sLuceneIndexPath,File.separator)+"k_newsmsgs"+File.separator+sWorkArea;
if (DebugFile.trace) DebugFile.writeln("new IndexSearcher("+sSegments+")");
IndexSearcher oSearch = new IndexSearcher(sSegments);
Document oDoc;
if (iLimit>0) {
if (DebugFile.trace) DebugFile.writeln("IndexSearcher.search("+oQrx.toString()+")");
TopDocs oTopSet = oSearch.search(oQrx, null, iLimit);
if (oTopSet.scoreDocs!=null) {
ScoreDoc[] oTopDoc = oTopSet.scoreDocs;
int iDocCount = oTopDoc.length;
aRetArr = new NewsMessageRecord[iDocCount];
for (int d=0; d<iDocCount; d++) {
oDoc = oSearch.doc(oTopDoc[d].doc);
String[] aAbstract = Gadgets.split(oSearch.doc(oTopDoc[d].doc).get("abstract"), '¨');
try {
aRetArr[d] = new NewsMessageRecord(oTopDoc[d].score, oDoc.get("workarea"),
oDoc.get("guid"), oDoc.get("container"), oDoc.get("title"),
oDoc.get("author"), DateTools.stringToDate(oDoc.get("created")), oDoc.get("abstract"));
} catch (java.text.ParseException neverthrown) {
throw new ParseException("NewsMessageSearcher.search() Error parsing date "+oDoc.get("created")+" of document "+oDoc.get("guid"));
} // next
} else {
aRetArr = null;
} else {
Hits oHitSet = oSearch.search(oQrx);
int iHitCount = oHitSet.length();
if (iHitCount>0) {
aRetArr = new NewsMessageRecord[iHitCount];
for (int h=0; h<iHitCount; h++) {
oDoc = oHitSet.doc(h);
try {
aRetArr[h] = new NewsMessageRecord(oHitSet.score(h), oDoc.get("workarea"),
oDoc.get("guid"), oDoc.get("container"), oDoc.get("title"),
oDoc.get("author"), DateTools.stringToDate(oDoc.get("created")), oDoc.get("abstract"));
} catch (java.text.ParseException neverthrown) {
throw new ParseException("NewsMessageSearcher.search() Error parsing date "+oDoc.get("created")+" of document "+oDoc.get("guid"));
} // next
} else {
aRetArr = null;
} // fi (iLimit>0)
if (oSortBy!=null) {
Arrays.sort(aRetArr, oSortBy);
if (DebugFile.trace) {
if (null==aRetArr)
DebugFile.writeln("End NewsMessageSearcher.search() : no records found");
DebugFile.writeln("End NewsMessageSearcher.search() : "+String.valueOf(aRetArr.length));
return aRetArr;
} // search
// ---------------------------------------------------------------------------
* Escape special characters from a Lucene query
* @return The input string with any character of set +-&|!(){}[]^"~*?:\ be preceded by a backslash
public static String escape(String sInput) {
return Gadgets.escapeChars(sInput,"+-&|!(){}[]^\"~*?:\\",'\\');