String termidsFile = env.getIndexTermIdsData();
String idToTermFile = env.getIndexTermIdMappingData();
// Take a different code path if we're in standalone mode.
if (conf.get("mapred.job.tracker").equals("local")) {
dictionary = new DefaultCachedFrequencySortedDictionary(new Path(termsFile),
new Path(termidsFile), new Path(idToTermFile), 0.3f, FileSystem.getLocal(conf));
} else {
// We need to figure out which file in the DistributeCache is which...
Map<String, Path> pathMapping = Maps.newHashMap();
Path[] localFiles = DistributedCache.getLocalCacheFiles(context.getConfiguration());
for (Path p : localFiles) {
LOG.info("In DistributedCache: " + p);
if (p.toString().contains(termsFile)) {
pathMapping.put(termsFile, p);
} else if (p.toString().contains(termidsFile)) {
pathMapping.put(termidsFile, p);
} else if (p.toString().contains(idToTermFile)) {
pathMapping.put(idToTermFile, p);
}
}
LOG.info(" - terms: " + pathMapping.get(termsFile));
LOG.info(" - id: " + pathMapping.get(termidsFile));
LOG.info(" - idToTerms: " + pathMapping.get(idToTermFile));
dictionary = new DefaultCachedFrequencySortedDictionary(pathMapping.get(termsFile),
pathMapping.get(termidsFile), pathMapping.get(idToTermFile),
0.3f, FileSystem.getLocal(context.getConfiguration()));
}
} catch (Exception e) {
e.printStackTrace();