package com.google.code.ftspc.websearch.Lucene;
import com.google.code.ftspc.websearch.IniAndVars.Vars;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.search.highlight.TokenSources;
/**
*
* @author Arthur Khusnutdinov
*/
public class SearchFunctions {
boolean raw = false;
int hitsPerPage = 10;
IndexSearcher searcher;
Analyzer analyzer;
QueryParser parser;
Analyzer stdanalyzer;
Query query;
ArrayList outMultiData = new ArrayList();
public ArrayList search(String line) {
String field = "fileContent";
try {
searcher = new IndexSearcher(FSDirectory.open(new File(Vars.Lucene_Repo)));
// stdanalyzer = new StandardAnalyzer(Version.LUCENE_30);
analyzer = new RussianAnalyzer(Version.LUCENE_40);
parser = new QueryParser(Version.LUCENE_40, field, analyzer);
query = parser.parse(line);
//System.out.println("Searching for: " + query.toString(field));
outMultiData.add(query.toString(field));
outMultiData.add(doPagingSearch(searcher, query, hitsPerPage, raw, null == null));
outMultiData.add(line);
return outMultiData;
} catch (Exception ex) {
ex.printStackTrace();
return null;
}
}
public ArrayList doPagingSearch(IndexSearcher searcher, Query query,
int hitsPerPage, boolean raw, boolean interactive) throws IOException {
ArrayList outData = new ArrayList();
String accumulator = "";
// Collect enough docs to show 5 pages
TopScoreDocCollector collector = TopScoreDocCollector.create(
5 * hitsPerPage, false);
searcher.search(query, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
int numTotalHits = collector.getTotalHits();
outMultiData.add(numTotalHits + " total matching documents");
int start = 0;
//int end = Math.min(numTotalHits, hitsPerPage);
int end = 0;
end = Math.min(hits.length, start + hitsPerPage);
SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
for (int i = start; i < end; i++) {
/* if (raw) { // output raw format
System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
continue;
}*/
Document doc = searcher.doc(hits[i].doc);
accumulator = " fileName: " + doc.get("fileName")
+ "\r\n <br /> score=" + hits[i].score;
accumulator += "\r\n <br /> Full path " + doc.get("filePath")+"<br />";
String text = doc.get("fileContent");
TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), hits[i].doc, "fileContent", analyzer);
try {
TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//
for (int j = 0; j < frag.length; j++) {
if ((frag[j] != null) && (frag[j].getScore() > 0)) {
accumulator += "\r\n <br /> ..." + frag[j].toString()+"...";
}
}
} catch (Exception ex) {
ex.printStackTrace();
}
outData.add(accumulator);
accumulator = "";
}
return outData;
}
}