validKey = GroupingKey.makeGroupingKey(domainInfo.getHostAddress(), robotRules.getCrawlDelay());
_flowProcess.increment(FetchCounters.DOMAINS_FINISHED, 1);
}
// Use the same key for every URL from this domain
GroupedUrlDatum datum;
while ((datum = _urls.poll()) != null) {
ScoredUrlDatum scoreUrl;
FetchCounters counter;
String url = datum.getUrl();
if (isDeferred) {
counter = FetchCounters.URLS_DEFERRED;
scoreUrl = new ScoredUrlDatum(url, GroupingKey.DEFERRED_GROUPING_KEY, UrlStatus.SKIPPED_DEFERRED, 0.0);
} else if (!robotRules.isAllowed(url)) {
counter = FetchCounters.URLS_BLOCKED;
scoreUrl = new ScoredUrlDatum(url, GroupingKey.BLOCKED_GROUPING_KEY, UrlStatus.SKIPPED_BLOCKED, 0.0);
} else {
double score = _scorer.generateScore(domain, pld, datum);
if (score == BaseScoreGenerator.SKIP_SCORE) {
counter = FetchCounters.URLS_SKIPPED;
scoreUrl = new ScoredUrlDatum(url, GroupingKey.SKIPPED_GROUPING_KEY, UrlStatus.UNFETCHED, score);
} else {
counter = FetchCounters.URLS_ACCEPTED;
scoreUrl = new ScoredUrlDatum(url, validKey, UrlStatus.UNFETCHED, score);
}
}
scoreUrl.setPayload(datum.getPayload());
_flowProcess.increment(counter, 1);
// collectors aren't thread safe
synchronized (_collector) {
_collector.add(BixoPlatform.clone(scoreUrl.getTuple(), _flowProcess));