public void testCrawlDbStatTransitionInject() {
LOG.info("Test CrawlDatum states in Injector after inject");
Configuration conf = CrawlDBTestUtil.createConfiguration();
CrawlDbUpdateUtil<Injector.InjectReducer> inject = new CrawlDbUpdateUtil<Injector.InjectReducer>(
new Injector.InjectReducer(), conf);
ScoringFilters scfilters = new ScoringFilters(conf);
for (String sched : schedules) {
LOG.info("Testing inject with " + sched);
conf.set("db.fetch.schedule.class", "org.apache.nutch.crawl."+sched);
FetchSchedule schedule = FetchScheduleFactory
.getFetchSchedule(new JobConf(conf));
List<CrawlDatum> values = new ArrayList<CrawlDatum>();
for (int i = 0; i < fetchDbStatusPairs.length; i++) {
byte fromDbStatus = fetchDbStatusPairs[i][1];
byte toDbStatus = fromDbStatus;
if (fromDbStatus == -1) {
toDbStatus = STATUS_DB_UNFETCHED;
} else {
CrawlDatum fromDb = new CrawlDatum();
fromDb.setStatus(fromDbStatus);
schedule.initializeSchedule(CrawlDbUpdateUtil.dummyURL, fromDb);
values.add(fromDb);
}
LOG.info("inject "
+ (fromDbStatus == -1 ? "<not in CrawlDb>" : CrawlDatum
.getStatusName(fromDbStatus)) + " + "
+ getStatusName(STATUS_INJECTED) + " => "
+ getStatusName(toDbStatus));
CrawlDatum injected = new CrawlDatum(STATUS_INJECTED,
conf.getInt("db.fetch.interval.default", 2592000), 0.1f);
schedule.initializeSchedule(CrawlDbUpdateUtil.dummyURL, injected);
try {
scfilters.injectedScore(CrawlDbUpdateUtil.dummyURL, injected);
} catch (ScoringFilterException e) {
LOG.error(StringUtils.stringifyException(e));
}
values.add(injected);
List<CrawlDatum> res = inject.update(values);