input = new Path(path.toString());
}
numJobs = 2;
currentJobNum = 0;
status.put(Nutch.STAT_PHASE, "convert input");
currentJob = new NutchJob(getConf(), "inject-p1 " + input);
FileInputFormat.addInputPath(currentJob, input);
currentJob.setMapperClass(UrlMapper.class);
currentJob.setMapOutputKeyClass(String.class);
currentJob.setMapOutputValueClass(WebPage.class);
currentJob.setOutputFormatClass(GoraOutputFormat.class);
DataStore<String, WebPage> store = StorageUtils.createWebStore(currentJob.getConfiguration(),
String.class, WebPage.class);
GoraOutputFormat.setOutput(currentJob, store, true);
currentJob.setReducerClass(Reducer.class);
currentJob.setNumReduceTasks(0);
currentJob.waitForCompletion(true);
ToolUtil.recordJobStatus(null, currentJob, results);
currentJob = null;
status.put(Nutch.STAT_PHASE, "merge input with db");
status.put(Nutch.STAT_PROGRESS, 0.5f);
currentJobNum = 1;
currentJob = new NutchJob(getConf(), "inject-p2 " + input);
StorageUtils.initMapperJob(currentJob, FIELDS, String.class,
WebPage.class, InjectorMapper.class);
currentJob.setNumReduceTasks(0);
ToolUtil.recordJobStatus(null, currentJob, results);
status.put(Nutch.STAT_PROGRESS, 1.0f);