}
}
@Override
public void operate(FlowProcess process, BufferCall<NullContext> bufferCall) {
UrlDatum bestDatum = null;
int ignoredUrls = 0;
long bestFetched = 0;
Iterator<TupleEntry> iter = bufferCall.getArgumentsIterator();
while (iter.hasNext()) {
UrlDatum datum = new UrlDatum(iter.next());
if (bestDatum == null) {
bestDatum = new UrlDatum(datum);
bestFetched = (Long) bestDatum.getPayloadValue(CrawlDbDatum.LAST_FETCHED_FIELD);
} else if ((Long) datum.getPayloadValue(CrawlDbDatum.LAST_FETCHED_FIELD) > bestFetched) {
if (bestFetched != 0) {
_numLater += 1;
// Should never happen that we double-fetch a page
LOGGER.warn("Using URL with later fetch time: " + datum.getUrl());
}
bestDatum.setUrl(datum.getUrl()); // There's really no need to set the url since it should be same
bestDatum.setPayload(datum.getPayload());
bestFetched = (Long) bestDatum.getPayloadValue(CrawlDbDatum.LAST_FETCHED_FIELD);
} else {
ignoredUrls += 1;
}
}