Path segments = new Path(dir + "/segments");
res.elapsed = System.currentTimeMillis();
Injector injector = new Injector(getConf());
Generator generator = new Generator(getConf());
Fetcher fetcher = new Fetcher(getConf());
ParseSegment parseSegment = new ParseSegment(getConf());
CrawlDb crawlDbTool = new CrawlDb(getConf());
LinkDb linkDbTool = new LinkDb(getConf());
// initialize crawlDb
long start = System.currentTimeMillis();
injector.inject(crawlDb, rootUrlDir);
long delta = System.currentTimeMillis() - start;
res.addTiming("inject", "0", delta);
int i;
for (i = 0; i < depth; i++) { // generate new segment
start = System.currentTimeMillis();
Path[] segs = generator.generate(crawlDb, segments, -1, topN, System
.currentTimeMillis());
delta = System.currentTimeMillis() - start;
res.addTiming("generate", i + "", delta);
if (segs == null) {
LOG.info("Stopping at depth=" + i + " - no more URLs to fetch.");
break;
}
start = System.currentTimeMillis();
fetcher.fetch(segs[0], threads, org.apache.nutch.fetcher.Fetcher.isParsing(getConf())); // fetch it
delta = System.currentTimeMillis() - start;
res.addTiming("fetch", i + "", delta);
if (!Fetcher.isParsing(job)) {
start = System.currentTimeMillis();
parseSegment.parse(segs[0]); // parse it, if needed
delta = System.currentTimeMillis() - start;
res.addTiming("parse", i + "", delta);
}
start = System.currentTimeMillis();
crawlDbTool.update(crawlDb, segs, true, true); // update crawldb