final String CONTENT_DIGEST_SCHEME = "sha1:";
WbmPersistLoadProcessor t = new WbmPersistLoadProcessor();
t.setHttpClient(client);
t.setContentDigestScheme(CONTENT_DIGEST_SCHEME);
CrawlURI curi = new CrawlURI(UURIFactory.getInstance("http://archive.org/"));
// put history entry newer than being loaded (i.e. loaded history entry will not be used for FetchHistoryProcessor
// check below.
long expected_ts = DateUtils.parse14DigitDate(TestNormalHttpResponse.EXPECTED_TS).getTime();
Map<String, Object>[] fetchHistory = (Map[])curi.getData().get(RecrawlAttributeConstants.A_FETCH_HISTORY);
if (fetchHistory == null) {
fetchHistory = new HashMap[2];
curi.getData().put(RecrawlAttributeConstants.A_FETCH_HISTORY, fetchHistory);
}
final byte[] digestValue0 = sha1Digest("0");
final byte[] digestValue1 = sha1Digest("1");
fetchHistory[0] = new HashMap<String, Object>();
fetchHistory[0].put(FetchHistoryHelper.A_TIMESTAMP, expected_ts + 2000);
fetchHistory[0].put(CoreAttributeConstants.A_FETCH_BEGAN_TIME, expected_ts + 2000);
fetchHistory[0].put(RecrawlAttributeConstants.A_CONTENT_DIGEST,
CONTENT_DIGEST_SCHEME + Base32.encode(digestValue0));
fetchHistory[1] = new HashMap<String, Object>();
fetchHistory[1].put(FetchHistoryHelper.A_TIMESTAMP, expected_ts - 2000);
fetchHistory[1].put(RecrawlAttributeConstants.A_CONTENT_DIGEST,
CONTENT_DIGEST_SCHEME + Base32.encode(digestValue1));
ProcessResult result = t.innerProcessResult(curi);
assertEquals("result is PROCEED", ProcessResult.PROCEED, result);
// newly loaded history entry should fall in between two existing entries (index=1)
Map<String, Object> history = getFetchHistory(curi, 1);
assertNotNull("history", history);
String hash = (String)history.get(RecrawlAttributeConstants.A_CONTENT_DIGEST);
assertEquals("CONTENT_DIGEST", CONTENT_DIGEST_SCHEME+TestNormalHttpResponse.EXPECTED_HASH, hash);
Long ts = (Long)history.get(FetchHistoryHelper.A_TIMESTAMP);
assertNotNull("ts is non-null", ts);
assertEquals("'ts' has expected timestamp", expected_ts, ts.longValue());
// Check compatibility with FetchHistoryProcessor.
// TODO: This is not testing WbmPersistLoadProcessor - only testing stub fetchHistory
// setup above (OK as long as it matches WbmPersistLoadProcessor). We need a separate
// test method.
curi.setFetchStatus(200);
curi.setFetchBeginTime(System.currentTimeMillis());
// FetchHistoryProcessor once failed for a revisit case. We'd need to test other cases
// too (TODO).
curi.setContentDigest("sha1", digestValue0);
FetchHistoryProcessor fhp = new FetchHistoryProcessor();
fhp.process(curi);
}