package edu.illinois.lis.search;
import java.io.PrintStream;
import java.util.Iterator;
import java.util.List;
import cc.twittertools.search.api.TrecSearchThriftClient;
import cc.twittertools.thrift.gen.TResult;
import edu.illinois.lis.document.FeatureVector;
import edu.illinois.lis.feedback.FeedbackRelevanceModel;
import edu.illinois.lis.query.GQueries;
import edu.illinois.lis.query.GQueriesJsonImpl;
import edu.illinois.lis.query.GQuery;
import edu.illinois.lis.utils.ParameterBroker;
import edu.illinois.lis.utils.Stopper;
public class RunQueries {
private static final String DEFAULT_RUNTAG = "lucene4lm";
private static final String HOST_OPTION = "host";
private static final String PORT_OPTION = "port";
private static final String QUERIES_OPTION = "queries";
private static final String STOPPER_OPTION = "stopper";
private static final String FB_DOCS_OPTION = "fb_docs";
private static final String FB_TERMS_OPTION = "fb_terms";
private static final String NUM_RESULTS_OPTION = "num_results";
private static final String GROUP_OPTION = "group";
private static final String TOKEN_OPTION = "token";
private static final String RUNTAG_OPTION = "runtag";
private static final double ORIG_QUERY_WEIGHT = 0.5;
private RunQueries() {}
public static void main(String[] args) throws Exception {
ParameterBroker params = new ParameterBroker(args[0]);
PrintStream out = new PrintStream(System.out, true, "UTF-8");
PrintStream err = new PrintStream(System.err, true, "UTF-8");
GQueries queries = new GQueriesJsonImpl();
queries.read(params.getParamValue(QUERIES_OPTION));
Stopper stopper = null;
if(params.getParamValue(STOPPER_OPTION) != null)
stopper = new Stopper(params.getParamValue(STOPPER_OPTION));
// max number of docs to send to output
int numResults = 1000;
try {
if (params.getParamValue(NUM_RESULTS_OPTION) != null) {
numResults = Integer.parseInt(params.getParamValue(NUM_RESULTS_OPTION));
}
} catch (NumberFormatException e) {
err.println("Invalid " + NUM_RESULTS_OPTION + ": " + params.getParamValue(NUM_RESULTS_OPTION));
System.exit(-1);
}
int fbDocs = 0;
try {
if (params.getParamValue(FB_DOCS_OPTION) != null) {
fbDocs = Integer.parseInt(params.getParamValue(FB_DOCS_OPTION));
}
} catch (NumberFormatException e) {
err.println("Invalid " + FB_DOCS_OPTION + ": " + params.getParamValue(FB_DOCS_OPTION));
System.exit(-1);
}
int fbTerms = 0;
try {
if (params.getParamValue(FB_TERMS_OPTION) != null) {
fbTerms = Integer.parseInt(params.getParamValue(FB_TERMS_OPTION));
}
} catch (NumberFormatException e) {
err.println("Invalid " + FB_TERMS_OPTION + ": " + params.getParamValue(FB_TERMS_OPTION));
System.exit(-1);
}
// authentication credentials
String group = params.getParamValue(GROUP_OPTION);
if(group==null) {
err.println("Invalid " + GROUP_OPTION + ": must set a valid group ID");
System.exit(-1);
}
String token = params.getParamValue(TOKEN_OPTION);
if(group==null) {
err.println("Invalid " + TOKEN_OPTION + ": must set a valid authentication token");
System.exit(-1);
}
TrecSearchThriftClient client = new TrecSearchThriftClient(params.getParamValue(HOST_OPTION),
Integer.parseInt(params.getParamValue(PORT_OPTION)), group, token);
Iterator<GQuery> queryIterator = queries.iterator();
while(queryIterator.hasNext()) {
GQuery query = queryIterator.next();
System.err.println(query.getTitle());
String queryText = query.getText();
// stupid hack. need to lowercase the query vector
FeatureVector temp = new FeatureVector(null);
Iterator<String> qTerms = query.getFeatureVector().iterator();
while(qTerms.hasNext()) {
String term = qTerms.next();
temp.addTerm(term.toLowerCase(), query.getFeatureVector().getFeaturetWeight(term));
}
temp.normalizeToOne();
query.setFeatureVector(temp);
// if we're doing feedback
if(fbDocs > 0 && fbTerms > 0) {
List<TResult> results = client.search(queryText, query.getQuerytweettime(), fbDocs);
FeedbackRelevanceModel fb = new FeedbackRelevanceModel();
fb.setOriginalQuery(query);
fb.setRes(results);
fb.build(stopper);
FeatureVector fbVector = fb.asFeatureVector();
fbVector.pruneToSize(fbTerms);
fbVector.normalizeToOne();
fbVector = FeatureVector.interpolate(query.getFeatureVector(), fbVector, ORIG_QUERY_WEIGHT);
System.err.println(fbVector);
StringBuilder builder = new StringBuilder();
Iterator<String> terms = fbVector.iterator();
while(terms.hasNext()) {
String term = terms.next();
if(term.length() < 2)
continue;
double prob = fbVector.getFeaturetWeight(term);
builder.append(term + "^" + prob + " ");
}
queryText = builder.toString().trim();
}
List<TResult> results = client.search(queryText, query.getQuerytweettime(), numResults);
String runTag = params.getParamValue(RUNTAG_OPTION);
if(runTag==null)
runTag = DEFAULT_RUNTAG;
int i = 1;
Iterator<TResult> hitIterator = results.iterator();
while(hitIterator.hasNext()) {
TResult hit = hitIterator.next();
out.println(String.format("%s Q0 %s %d %f %s", query.getTitle(), hit.getId(), i,
hit.getRsv(), runTag));
if(i++ >= numResults)
break;
}
}
out.close();
}
}