}
} catch (URIException e) {
logger.severe("Failed get of path for " + curi);
}
CrawlServer cs = serverCache.getServerFor(curi.getUURI());
// require /robots.txt if not present
if (cs.isRobotsExpired(getRobotsValidityDurationSeconds())) {
// Need to get robots
if (logger.isLoggable(Level.FINE)) {
logger.fine( "No valid robots for " + cs +
"; deferring " + curi);
}
// Robots expired - should be refetched even though its already
// crawled.
try {
String prereq = curi.getUURI().resolve("/robots.txt").toString();
curi.markPrerequisite(prereq);
}
catch (URIException e1) {
logger.severe("Failed resolve using " + curi);
throw new RuntimeException(e1); // shouldn't ever happen
}
return true;
}
// test against robots.txt if available
if (cs.isValidRobots()) {
String ua = metadata.getUserAgent();
RobotsPolicy robots = metadata.getRobotsPolicy();
if(!robots.allows(ua, curi, cs.getRobotstxt())) {
if(getCalculateRobotsOnly()) {
// annotate URI as excluded, but continue to process normally
curi.getAnnotations().add("robotExcluded");
return false;
}