Package edu.stanford.nlp.sempre.fbalignment.index

Source Code of edu.stanford.nlp.sempre.fbalignment.index.FbEntitySearcher

package edu.stanford.nlp.sempre.fbalignment.index;

import fig.basic.LogInfo;
import fig.basic.StopWatch;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Pattern;

public class FbEntitySearcher {
 
  private final QueryParser queryParser;
  private final IndexSearcher indexSearcher;
  private int numOfDocs = 50;
  private String searchStrategy;

  public FbEntitySearcher(String indexDir, int numOfDocs, String searchingStrategy) throws IOException {

    LogInfo.begin_track("Constructing Searcher");
    if (!searchingStrategy.equals("exact") && !searchingStrategy.equals("inexact"))
      throw new RuntimeException("Bad searching strategy: " + searchingStrategy);
    this.searchStrategy = searchingStrategy;

    queryParser = new QueryParser(
        Version.LUCENE_44,
        FbIndexField.TEXT.fieldName(),
        searchingStrategy.equals("exact") ? new KeywordAnalyzer() : new StandardAnalyzer(Version.LUCENE_44));
    LogInfo.log("Opening index dir: " + indexDir);
    IndexReader indexReader = DirectoryReader.open(SimpleFSDirectory.open(new File(indexDir)));
    indexSearcher = new IndexSearcher(indexReader);
    LogInfo.log("Opened index with " + indexReader.numDocs() + " documents.");

    this.numOfDocs = numOfDocs;
    LogInfo.end_track("Constructing Searcher");
  }

  public synchronized List<Document> searchDocs(String question) throws IOException, ParseException {

    List<Document> res = new LinkedList<Document>();
    if (searchStrategy.equals("exact"))
      question = "\"" + question + "\"";

    ScoreDoc[] hits = getHits(question);

    for (int i = 0; i < hits.length; ++i) {
      int docId = hits[i].doc;
      Document doc = indexSearcher.doc(docId);
      res.add(doc);
    }
    return res;
  }

  private ScoreDoc[] getHits(String question) throws IOException, ParseException {
    Query luceneQuery = queryParser.parse(question);
    ScoreDoc[] hits = indexSearcher.search(luceneQuery, numOfDocs).scoreDocs;
    return hits;
  }

  public static void main(String[] args) throws IOException, ParseException {

    Pattern QUIT =
        Pattern.compile("quit|exit|q|bye", Pattern.CASE_INSENSITIVE);
    FbEntitySearcher searcher = new FbEntitySearcher(args[0], 10000, args[1]);
    BufferedReader is = new BufferedReader(new InputStreamReader(System.in));
    StopWatch watch = new StopWatch();
    for (; ; ) {
      System.out.print("Search> ");
      String question = is.readLine().trim();
      if (QUIT.matcher(question).matches()) {
        System.out.println("Quitting.");
        break;
      }
      if (question.equals(""))
        continue;
     
      watch.reset();
      watch.start();
      List<Document> docs = searcher.searchDocs(question);
      watch.stop();
      for (Document doc : docs) {
        LogInfo.log(
            "Mid: " + doc.get(FbIndexField.MID.fieldName()) + "\t" +
                "id: " + doc.get(FbIndexField.ID.fieldName()) + "\t" +
                "types: " + doc.get(FbIndexField.TYPES.fieldName()) + "\t" +
                "Name: " + doc.get(FbIndexField.TEXT.fieldName()) + "\t" +
                "Popularity: " + doc.get(FbIndexField.POPULARITY.fieldName()));
      }
      LogInfo.logs("Number of docs: %s, Time: %s",docs.size(), watch);
    }
  }
}
TOP

Related Classes of edu.stanford.nlp.sempre.fbalignment.index.FbEntitySearcher

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.