if (LOG.isDebugEnabled()) {
LOG.debug("redirectCount=" + redirectCount);
}
redirecting = false;
Protocol protocol = this.protocolFactory.getProtocol(fit.url.toString());
BaseRobotRules rules = protocol.getRobotRules(fit.url, fit.datum);
if (!rules.isAllowed(fit.u.toString())) {
// unblock
fetchQueues.finishFetchItem(fit, true);
if (LOG.isDebugEnabled()) {
LOG.debug("Denied by robots.txt: " + fit.url);
}
output(fit.url, fit.datum, null, ProtocolStatus.STATUS_ROBOTS_DENIED, CrawlDatum.STATUS_FETCH_GONE);
reporter.incrCounter("FetcherStatus", "robots_denied", 1);
continue;
}
if (rules.getCrawlDelay() > 0) {
if (rules.getCrawlDelay() > maxCrawlDelay && maxCrawlDelay >= 0) {
// unblock
fetchQueues.finishFetchItem(fit, true);
LOG.debug("Crawl-Delay for " + fit.url + " too long (" + rules.getCrawlDelay() + "), skipping");
output(fit.url, fit.datum, null, ProtocolStatus.STATUS_ROBOTS_DENIED, CrawlDatum.STATUS_FETCH_GONE);
reporter.incrCounter("FetcherStatus", "robots_denied_maxcrawldelay", 1);
continue;
} else {
FetchItemQueue fiq = fetchQueues.getFetchItemQueue(fit.queueID);
fiq.crawlDelay = rules.getCrawlDelay();
if (LOG.isDebugEnabled()) {
LOG.info("Crawl delay for queue: " + fit.queueID + " is set to " + fiq.crawlDelay + " as per robots.txt. url: " + fit.url);
}
}
}