linkDbTool.invert(linkDb, segments, true, true, false); // invert links
// index, dedup & merge
FileStatus[] fstats = fs.listStatus(segments, HadoopFSUtil.getPassDirectoriesFilter(fs));
if (isSolrIndex) {
SolrIndexer indexer = new SolrIndexer(conf);
indexer.indexSolr(solrUrl, crawlDb, linkDb,
Arrays.asList(HadoopFSUtil.getPaths(fstats)));
}
else {
DeleteDuplicates dedup = new DeleteDuplicates(conf);
if(indexes != null) {
// Delete old indexes
if (fs.exists(indexes)) {
LOG.info("Deleting old indexes: " + indexes);
fs.delete(indexes, true);
}
// Delete old index
if (fs.exists(index)) {
LOG.info("Deleting old merged index: " + index);
fs.delete(index, true);
}
}
Indexer indexer = new Indexer(conf);
indexer.index(indexes, crawlDb, linkDb,
Arrays.asList(HadoopFSUtil.getPaths(fstats)));
IndexMerger merger = new IndexMerger(conf);
if(indexes != null) {
dedup.dedup(new Path[] { indexes });