idToTermFile = idToTermFile.substring(idToTermFile.lastIndexOf("/") + 1);
LOG.info("Looking for the following files in dcache: " + termsFile + ", " + termidsFile + ", " + idToTermFile);
// Take a different code path if we're in standalone mode.
if (conf.get("mapred.job.tracker").equals("local")) {
dictionary = new DefaultFrequencySortedDictionary(new Path(termsFile),
new Path(termidsFile), new Path(idToTermFile), FileSystem.getLocal(conf));
} else {
// We need to figure out which file in the DistributeCache is which...
Map<String, Path> pathMapping = Maps.newHashMap();
Path[] localFiles = DistributedCache.getLocalCacheFiles(context.getConfiguration());
for (Path p : localFiles) {
LOG.info("In DistributedCache: " + p);
if (p.toString().contains(termsFile)) {
pathMapping.put(termsFile, p);
} else if (p.toString().contains(termidsFile)) {
pathMapping.put(termidsFile, p);
} else if (p.toString().contains(idToTermFile)) {
pathMapping.put(idToTermFile, p);
}
}
LOG.info(" - terms: " + pathMapping.get(termsFile));
LOG.info(" - id: " + pathMapping.get(termidsFile));
LOG.info(" - idToTerms: " + pathMapping.get(idToTermFile));
String s = localFiles.length + " " + localFiles[0].toString() + " " + localFiles[1].toString() + " " + localFiles[2].toString();
if (pathMapping.get(termsFile) == null ) {
throw new RuntimeException(s);
}
dictionary = new DefaultFrequencySortedDictionary(pathMapping.get(termsFile),
pathMapping.get(termidsFile), pathMapping.get(idToTermFile), FileSystem.getLocal(conf));
}
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException("Error initializing data!", e);