datum.setFetchTime(System.currentTimeMillis());
if (pstatus != null) datum.getMetaData().put(Nutch.WRITABLE_PROTO_STATUS_KEY, pstatus);
if (content == null) {
String url = key.toString();
content = new Content(url, url, new byte[0], "", new Metadata(), this.conf);
}
Metadata metadata = content.getMetadata();
// add segment to metadata
metadata.set(Nutch.SEGMENT_NAME_KEY, segmentName);
// add score to content metadata so that ParseSegment can pick it up.
try {
scfilters.passScoreBeforeParsing(key, datum, content);
} catch (Exception e) {
if (LOG.isWarnEnabled()) {
e.printStackTrace(LogUtil.getWarnStream(LOG));
LOG.warn("Couldn't pass score, url " + key + " (" + e + ")");
}
}
Parse parse = null;
if (parsing && status == CrawlDatum.STATUS_FETCH_SUCCESS) {
ParseStatus parseStatus;
try {
parse = this.parseUtil.parse(content);
parseStatus = parse.getData().getStatus();
} catch (Exception e) {
parseStatus = new ParseStatus(e);
}
if (!parseStatus.isSuccess()) {
if (LOG.isWarnEnabled()) {
LOG.warn("Error parsing: " + key + ": " + parseStatus);
}
parse = parseStatus.getEmptyParse(getConf());
}
// Calculate page signature. For non-parsing fetchers this will
// be done in ParseSegment
byte[] signature = SignatureFactory.getSignature(getConf()).calculate(content, parse);
metadata.set(Nutch.SIGNATURE_KEY, StringUtil.toHexString(signature));
datum.setSignature(signature);
// Ensure segment name and score are in parseData metadata
parse.getData().getContentMeta().set(Nutch.SEGMENT_NAME_KEY, segmentName);
parse.getData().getContentMeta().set(Nutch.SIGNATURE_KEY, StringUtil.toHexString(signature));
try {