Package com.google.code.ftspc.websearch.Lucene

Source Code of com.google.code.ftspc.websearch.Lucene.SearchFunctions

package com.google.code.ftspc.websearch.Lucene;

import com.google.code.ftspc.websearch.IniAndVars.Vars;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.search.highlight.TokenSources;

/**
*
* @author Arthur Khusnutdinov
*/
public class SearchFunctions {

    boolean raw = false;
    int hitsPerPage = 10;
    IndexSearcher searcher;
    Analyzer analyzer;
    QueryParser parser;
    Analyzer stdanalyzer;
    Query query;
    ArrayList outMultiData = new ArrayList();

    public ArrayList search(String line) {
        String field = "fileContent";
        try {
            searcher = new IndexSearcher(FSDirectory.open(new File(Vars.Lucene_Repo)));
            // stdanalyzer = new StandardAnalyzer(Version.LUCENE_30);
            analyzer = new RussianAnalyzer(Version.LUCENE_40);
            parser = new QueryParser(Version.LUCENE_40, field, analyzer);

            query = parser.parse(line);
            //System.out.println("Searching for: " + query.toString(field));
            outMultiData.add(query.toString(field));
            outMultiData.add(doPagingSearch(searcher, query, hitsPerPage, raw, null == null));
            outMultiData.add(line);
            return outMultiData;
        } catch (Exception ex) {
            ex.printStackTrace();
            return null;
        }
    }

    public ArrayList doPagingSearch(IndexSearcher searcher, Query query,
            int hitsPerPage, boolean raw, boolean interactive) throws IOException {
        ArrayList outData = new ArrayList();
        String accumulator = "";

        // Collect enough docs to show 5 pages
        TopScoreDocCollector collector = TopScoreDocCollector.create(
                5 * hitsPerPage, false);
        searcher.search(query, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        int numTotalHits = collector.getTotalHits();
        outMultiData.add(numTotalHits + " total matching documents");

        int start = 0;
        //int end = Math.min(numTotalHits, hitsPerPage);
        int end = 0;

        end = Math.min(hits.length, start + hitsPerPage);

        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
        Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));


        for (int i = start; i < end; i++) {
            /* if (raw) {                              // output raw format
            System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
            continue;
            }*/

            Document doc = searcher.doc(hits[i].doc);
            accumulator = "   fileName: " + doc.get("fileName")
                    + "\r\n <br /> score=" + hits[i].score;
            accumulator += "\r\n <br /> Full path  " + doc.get("filePath")+"<br />";
            String text = doc.get("fileContent");
            TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), hits[i].doc, "fileContent", analyzer);
            try {
                TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//

                for (int j = 0; j < frag.length; j++) {
                    if ((frag[j] != null) && (frag[j].getScore() > 0)) {
                        accumulator += "\r\n <br />   ..." + frag[j].toString()+"...";
                    }
                }

            } catch (Exception ex) {
                ex.printStackTrace();
            }
            outData.add(accumulator);
            accumulator = "";
        }
        return outData;
    }
}
TOP

Related Classes of com.google.code.ftspc.websearch.Lucene.SearchFunctions

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.