throws IOException {
String fromUrl = key.toString();
List<LinkDatum> outlinks = new ArrayList<LinkDatum>();
Node node = null;
LoopSet loops = null;
// aggregate outlinks, assign other values
while (values.hasNext()) {
ObjectWritable write = values.next();
Object obj = write.get();
if (obj instanceof Node) {
node = (Node)obj;
}
else if (obj instanceof LinkDatum) {
outlinks.add(WritableUtils.clone((LinkDatum)obj, conf));
}
else if (obj instanceof LoopSet) {
loops = (LoopSet)obj;
}
}
// Check for the possibility of a LoopSet object without Node and LinkDatum objects. This can happen
// with webgraphs that receive deletes (e.g. link.delete.gone and/or URL filters or normalizers) but
// without an updated Loops database.
// See: https://issues.apache.org/jira/browse/NUTCH-1299
if (node == null && loops != null) {
// Nothing to do
LOG.warn("LoopSet without Node object received for " + key.toString() + " . You should either not use Loops as input of the LinkRank program or rerun the Loops program over the WebGraph.");
return;
}
// get the number of outlinks and the current inlink and outlink scores
// from the node of the url
int numOutlinks = node.getNumOutlinks();
float inlinkScore = node.getInlinkScore();
float outlinkScore = node.getOutlinkScore();
LOG.debug(fromUrl + ": num outlinks " + numOutlinks);
// can't invert if no outlinks
if (numOutlinks > 0) {
Set<String> loopSet = (loops != null) ? loops.getLoopSet() : null;
for (int i = 0; i < outlinks.size(); i++) {
LinkDatum outlink = outlinks.get(i);
String toUrl = outlink.getUrl();
// remove any url that is contained in the loopset