}
}
// get parse metadata
Metadata metadata = parseData.getContentMeta();
Parse parse = new ParseImpl(parseText, parseData);
// handle redirect urls
Text reprUrlText = (Text)fetchDatum.getMetaData().get(
Nutch.WRITABLE_REPR_URL_KEY);
String reprUrl = reprUrlText != null ? reprUrlText.toString() : null;
String url = key.toString();
String fieldUrl = (reprUrl != null) ? reprUrl : url;
String host = URLUtil.getHost(fieldUrl);
// add segment, used to map from merged index back to segment files
FieldWritable segField = new FieldWritable(Fields.SEGMENT,
metadata.get(Nutch.SEGMENT_NAME_KEY), FieldType.CONTENT, false, true,
false);
fieldsList.add(segField);
// add digest, used by dedup
FieldWritable digestField = new FieldWritable(Fields.DIGEST,
metadata.get(Nutch.SIGNATURE_KEY), FieldType.CONTENT, false, true,
false);
fieldsList.add(digestField);
// url is both stored and indexed, so it's both searchable and returned
fieldsList.add(new FieldWritable(Fields.URL, fieldUrl, FieldType.CONTENT,
true, true, true));
fieldsList.add(new FieldWritable(Fields.SEG_URL, url, FieldType.CONTENT,
false, true, false));
if (reprUrl != null) {
// also store original url as both stored and indexes
fieldsList.add(new FieldWritable(Fields.ORIG_URL, url,
FieldType.CONTENT, true, true, true));
}
if (host != null) {
// add host as un-stored, indexed and tokenized
FieldWritable hostField = new FieldWritable(Fields.HOST, host,
FieldType.CONTENT, true, false, true);
fieldsList.add(hostField);
// add site as un-stored, indexed and un-tokenized
FieldWritable siteField = new FieldWritable(Fields.SITE, host,
FieldType.CONTENT, true, false, false);
fieldsList.add(siteField);
}
// content is indexed, so that it's searchable, but not stored in index
fieldsList.add(new FieldWritable(Fields.CONTENT, parse.getText(),
FieldType.CONTENT, true, false, true));
// title
String title = parse.getData().getTitle();
if (title.length() > MAX_TITLE_LENGTH) { // truncate title if needed
title = title.substring(0, MAX_TITLE_LENGTH);
}
// add title indexed and stored so that it can be displayed
fieldsList.add(new FieldWritable(Fields.TITLE, title, FieldType.CONTENT,
true, true, true));
// add cached content/summary display policy, if available
String caching = parse.getData().getMeta(Nutch.CACHING_FORBIDDEN_KEY);
if (caching != null && !caching.equals(Nutch.CACHING_FORBIDDEN_NONE)) {
fieldsList.add(new FieldWritable(Fields.CACHE, caching,
FieldType.CONTENT, false, true, false));
}