}
return fvalues;
}
public static void main(String[] args) throws Exception {
OptionManager options = new OptionManager(RankAndFeaturesSmallAdaptive.class.getName());
options.addOption(OptionManager.INDEX_ROOT_PATH, "path", "index root", true);
options.addOption(OptionManager.POSTINGS_ROOT_PATH, "path", "Positional postings root", true);
options.addOption(OptionManager.QUERY_PATH, "path", "XML query", true);
options.addOption(OptionManager.JUDGMENT_PATH, "path", "Tab-Delimited documents", true);
options.addOption(OptionManager.FEATURE_PATH, "path", "XML features", true);
options.addOption(OptionManager.HITS, "integer", "number of hits (default: 10,000)", false);
options.addOption(OptionManager.SPAM_PATH, "path", "spam percentile score", false);
options.addOption(OptionManager.OUTPUT_PATH, "", "Print feature values", false);
options.addDependency(OptionManager.OUTPUT_PATH, OptionManager.SPAM_PATH);
try {
options.parse(args);
} catch(Exception exp) {
return;
}
String indexPath = options.getOptionValue(OptionManager.INDEX_ROOT_PATH);
String postingsPath = options.getOptionValue(OptionManager.POSTINGS_ROOT_PATH);
String queryPath = options.getOptionValue(OptionManager.QUERY_PATH);
String qrelPath = options.getOptionValue(OptionManager.JUDGMENT_PATH);
String featurePath = options.getOptionValue(OptionManager.FEATURE_PATH);
boolean writeOutput = options.foundOption(OptionManager.OUTPUT_PATH);
int hits = 10000;
if(options.foundOption(OptionManager.HITS)) {
hits = Integer.parseInt(options.getOptionValue(OptionManager.HITS));
}
FileSystem fs = FileSystem.get(new Configuration());
RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
env.initialize(true);
RankAndFeaturesSmallAdaptive generator = new RankAndFeaturesSmallAdaptive(env, fs);
//Parse queries and find integer codes for the query terms.
HMapIV<String> parsedQueries = QueryUtility.loadQueries(queryPath);
HMapIV<int[]> queries = QueryUtility.queryToIntegerCode(env, parsedQueries);
HMapIF idfs = QueryUtility.loadIdf(env, parsedQueries);
HMapIF cfs = QueryUtility.loadCf(env, parsedQueries);
HMapIV<int[]> qrels = QrelUtility.parseQrelsFromTabDelimited(qrelPath);
Map<String, Feature> featuresMap = FeatureUtility.parseFeatures(featurePath);
Feature[] features = new Feature[featuresMap.size()];
int index = 0;
for(String key: featuresMap.keySet()) {
features[index++] = featuresMap.get(key);
}
generator.prepareStats(idfs, cfs);
generator.preparePostings(postingsPath);
int[] newDocidsLookup = null;
FSDataOutputStream output = null;
if(writeOutput) {
final SpamPercentileScore spamScores = new SpamPercentileScore();
spamScores.initialize(options.getOptionValue(OptionManager.SPAM_PATH), fs);
newDocidsLookup = DocumentUtility.reverseLookupSpamSortedDocids(DocumentUtility.spamSortDocids(spamScores));
output = fs.create(new Path(options.getOptionValue(OptionManager.OUTPUT_PATH)));
}
System.gc();
Thread.currentThread().sleep(20000);
long cnt = 0;