public void operate(FlowProcess process, FunctionCall<NullContext> funcCall) {
AnalyzedDatum datum = new AnalyzedDatum(funcCall.getArguments().getTuple());
Outlink outlinks[] = datum.getOutlinks();
TupleEntryCollector collector = funcCall.getOutputCollector();
if (outlinks.length > 0) {
float pageScore = datum.getPageScore();
// Give each outlink 1/N th the page score.
// Note : Ideally you would deal with duplicates and also ensure that the
// source url is excluded.
float outlinkScore = pageScore/outlinks.length;
for (Outlink outlink : outlinks) {
String url = outlink.getToUrl().trim();
url = url.replaceAll("[\n\r]", "");
String normalizedUrl = _normalizer.normalize(url);
if (_validator.isValid(normalizedUrl)) {
LinkDatum linkDatum = new LinkDatum(normalizedUrl, 0, outlinkScore);
collector.add(linkDatum.getTuple());
}
}
}
}