final FetchItemQueue fiq = fetchQueues.getFetchItemQueue(fit.queueID);
fiq.crawlDelay = rules.getCrawlDelay();
}
}
final ProtocolOutput output = protocol.getProtocolOutput(fit.url, fit.page);
final ProtocolStatus status = output.getStatus();
final Content content = output.getContent();
// unblock queue
fetchQueues.finishFetchItem(fit);
context.getCounter("FetcherStatus", ProtocolStatusUtils.getName(status.getCode())).increment(1);
int length = 0;
if (content!=null && content.getContent()!=null) length= content.getContent().length;
updateStatus(length);
switch(status.getCode()) {
case ProtocolStatusCodes.WOULDBLOCK:
// retry ?
fetchQueues.addFetchItem(fit);
break;
case ProtocolStatusCodes.SUCCESS: // got a page
output(fit, content, status, CrawlStatus.STATUS_FETCHED);
break;
case ProtocolStatusCodes.MOVED: // redirect
case ProtocolStatusCodes.TEMP_MOVED:
byte code;
boolean temp;
if (status.getCode() == ProtocolStatusCodes.MOVED) {
code = CrawlStatus.STATUS_REDIR_PERM;
temp = false;
} else {
code = CrawlStatus.STATUS_REDIR_TEMP;
temp = true;
}
final String newUrl = ProtocolStatusUtils.getMessage(status);
handleRedirect(fit.url, newUrl, temp, FetcherJob.PROTOCOL_REDIR, fit.page);
output(fit, content, status, code);
break;
case ProtocolStatusCodes.EXCEPTION:
logFetchFailure(fit.url, ProtocolStatusUtils.getMessage(status));
/* FALLTHROUGH */
case ProtocolStatusCodes.RETRY: // retry
case ProtocolStatusCodes.BLOCKED:
output(fit, null, status, CrawlStatus.STATUS_RETRY);
break;
case ProtocolStatusCodes.GONE: // gone
case ProtocolStatusCodes.NOTFOUND:
case ProtocolStatusCodes.ACCESS_DENIED:
case ProtocolStatusCodes.ROBOTS_DENIED:
output(fit, null, status, CrawlStatus.STATUS_GONE);
break;
case ProtocolStatusCodes.NOTMODIFIED:
output(fit, null, status, CrawlStatus.STATUS_NOTMODIFIED);
break;
default:
if (LOG.isWarnEnabled()) {
LOG.warn("Unknown ProtocolStatus: " + status.getCode());
}
output(fit, null, status, CrawlStatus.STATUS_RETRY);
}
} catch (final Throwable t) { // unexpected exception