// At this point, we don't hace the fetched data, but we do
// have the original and the new url, so we store the mapping.
redirects.put(rec.newurl, rec.origurl);
} else {
// This is a non-redirected page.
fetchdata.addDoc(new FetchDocument(page, rec.origurl, rec.content, rec.header, success(rec), recoverable(rec), internalError(rec), true));
}
}
}
// Now go through the redirects.
for (SegmentRecord rec : unknownPages) {
FetchDocument doc = null;
if (redirects.containsKey(rec.newurl)) {
rec.origurl = redirects.get(rec.newurl);
page = fetchlist.getPage(rec.origurl);
if (null != page) {
// Override URL with fetched URL if the fetcher is configured to do so
if (!keepUrl) {
try {
page.setUrl(rec.newurl);
} catch (MalformedURLException e) {
logger.debug("Malformed redirect url. Keeping original url.",e);
}
}
// finally we could reconstruct the redirect and can now store the page.
doc = new FetchDocument(page, rec.origurl, rec.content, rec.header, success(rec), recoverable(rec), internalError(rec), true);
}
if (null != doc) {
fetchdata.addDoc(doc);
} else {
logger.error("Unknown page fetched. This is a bug in Nutch9Fetcher.");