Package edu.illinois.lis.search

Source Code of edu.illinois.lis.search.RunQueries

package edu.illinois.lis.search;

import java.io.PrintStream;
import java.util.Iterator;
import java.util.List;






import cc.twittertools.search.api.TrecSearchThriftClient;
import cc.twittertools.thrift.gen.TResult;
import edu.illinois.lis.document.FeatureVector;
import edu.illinois.lis.feedback.FeedbackRelevanceModel;
import edu.illinois.lis.query.GQueries;
import edu.illinois.lis.query.GQueriesJsonImpl;
import edu.illinois.lis.query.GQuery;
import edu.illinois.lis.utils.ParameterBroker;
import edu.illinois.lis.utils.Stopper;

public class RunQueries {
  private static final String DEFAULT_RUNTAG = "lucene4lm";

  private static final String HOST_OPTION = "host";
  private static final String PORT_OPTION = "port";
  private static final String QUERIES_OPTION = "queries";
  private static final String STOPPER_OPTION = "stopper";
  private static final String FB_DOCS_OPTION = "fb_docs";
  private static final String FB_TERMS_OPTION = "fb_terms";
  private static final String NUM_RESULTS_OPTION = "num_results";
  private static final String GROUP_OPTION = "group";
  private static final String TOKEN_OPTION = "token";
  private static final String RUNTAG_OPTION = "runtag";

  private static final double ORIG_QUERY_WEIGHT = 0.5;
 
  private RunQueries() {}

  public static void main(String[] args) throws Exception {
    ParameterBroker params = new ParameterBroker(args[0]);

    PrintStream out = new PrintStream(System.out, true, "UTF-8");
    PrintStream err = new PrintStream(System.err, true, "UTF-8");

    GQueries queries = new GQueriesJsonImpl();
    queries.read(params.getParamValue(QUERIES_OPTION));
   
    Stopper stopper = null;
    if(params.getParamValue(STOPPER_OPTION) != null)
      stopper = new Stopper(params.getParamValue(STOPPER_OPTION));
   
    // max number of docs to send to output
    int numResults = 1000;
    try {
      if (params.getParamValue(NUM_RESULTS_OPTION) != null) {
        numResults = Integer.parseInt(params.getParamValue(NUM_RESULTS_OPTION));
      }
    } catch (NumberFormatException e) {
      err.println("Invalid " + NUM_RESULTS_OPTION + ": " + params.getParamValue(NUM_RESULTS_OPTION));
      System.exit(-1);
    }

    int fbDocs = 0;
    try {
      if (params.getParamValue(FB_DOCS_OPTION) != null) {
        fbDocs = Integer.parseInt(params.getParamValue(FB_DOCS_OPTION));
      }
    } catch (NumberFormatException e) {
      err.println("Invalid " + FB_DOCS_OPTION + ": " + params.getParamValue(FB_DOCS_OPTION));
      System.exit(-1);
    }
   
    int fbTerms = 0;
    try {
      if (params.getParamValue(FB_TERMS_OPTION) != null) {
        fbTerms = Integer.parseInt(params.getParamValue(FB_TERMS_OPTION));
      }
    } catch (NumberFormatException e) {
      err.println("Invalid " + FB_TERMS_OPTION + ": " + params.getParamValue(FB_TERMS_OPTION));
      System.exit(-1);
    }
   
    // authentication credentials
    String group = params.getParamValue(GROUP_OPTION);
    if(group==null) {
      err.println("Invalid " + GROUP_OPTION + ": must set a valid group ID");
      System.exit(-1);
    }
    String token = params.getParamValue(TOKEN_OPTION);
    if(group==null) {
      err.println("Invalid " + TOKEN_OPTION + ": must set a valid authentication token");
      System.exit(-1);
    }

    TrecSearchThriftClient client = new TrecSearchThriftClient(params.getParamValue(HOST_OPTION),
        Integer.parseInt(params.getParamValue(PORT_OPTION)), group, token);

    Iterator<GQuery> queryIterator = queries.iterator();
    while(queryIterator.hasNext()) {
      GQuery query = queryIterator.next();
      System.err.println(query.getTitle());
      String queryText = query.getText();
     
      // stupid hack.  need to lowercase the query vector
      FeatureVector temp = new FeatureVector(null);
      Iterator<String> qTerms = query.getFeatureVector().iterator();
      while(qTerms.hasNext()) {
        String term = qTerms.next();
        temp.addTerm(term.toLowerCase(), query.getFeatureVector().getFeaturetWeight(term));
      }
      temp.normalizeToOne();
      query.setFeatureVector(temp);
     
     
      // if we're doing feedback
      if(fbDocs > 0 && fbTerms > 0) {
        List<TResult> results = client.search(queryText, query.getQuerytweettime(), fbDocs);
        FeedbackRelevanceModel fb = new FeedbackRelevanceModel();
        fb.setOriginalQuery(query);
        fb.setRes(results);
        fb.build(stopper);
       
        FeatureVector fbVector = fb.asFeatureVector();
        fbVector.pruneToSize(fbTerms);
        fbVector.normalizeToOne();
        fbVector = FeatureVector.interpolate(query.getFeatureVector(), fbVector, ORIG_QUERY_WEIGHT);
   
        System.err.println(fbVector);
       
        StringBuilder builder = new StringBuilder();
        Iterator<String> terms = fbVector.iterator();
        while(terms.hasNext()) {
          String term = terms.next();
          if(term.length() < 2)
            continue;
          double prob = fbVector.getFeaturetWeight(term);
          builder.append(term + "^" + prob + " ");
        }
        queryText = builder.toString().trim();
       
      }
     
      List<TResult> results = client.search(queryText, query.getQuerytweettime(), numResults);
      String runTag = params.getParamValue(RUNTAG_OPTION);
      if(runTag==null)
        runTag = DEFAULT_RUNTAG;

      int i = 1;
      Iterator<TResult> hitIterator = results.iterator();
      while(hitIterator.hasNext()) {
        TResult hit = hitIterator.next();
        out.println(String.format("%s Q0 %s %d %f %s", query.getTitle(), hit.getId(), i,
            hit.getRsv(), runTag));

        if(i++ >= numResults)
          break;
      }

    }
    out.close();
  }
}
TOP

Related Classes of edu.illinois.lis.search.RunQueries

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.