// This field is not stored with document, it is indexed, but it is not
// tokenized prior to indexing.
doc.add(new Field("uid", uid(f, htdocsDumpDir), false, true, false));
//HtmlDocument htmlDoc = new HtmlDocument(f);
HTMLParser parser = new HTMLParser(f);
// Add the summary as an UnIndexed field, so that it is stored and returned
// with hit documents for display.
// Add the title as a separate Text field, so that it can be searched separately.
/*
String title = htmlDoc.getTitle();
if (title != null) {
doc.add(Field.Text("title", title));
} else {
doc.add(Field.Text("title", ""));
}
*/
doc.add(Field.Text("title", parser.getTitle()));
//System.out.println("HTMLDocument.getLuceneDocument(): title field added: " + title);
// Add the tag-stripped contents as a Reader-valued Text field so it will get tokenized and indexed.
/*
String body = htmlDoc.getBody();
String contents = "";
if ((body != null) && (title != null)) {
contents = title + " " + body;
doc.add(Field.Text("contents", title + body));
}
doc.add(Field.Text("contents", contents));
*/
doc.add(Field.Text("contents", parser.getReader()));
//System.out.println("HTMLDocument.getLuceneDocument(): contents field added: " + contents);
return doc;
}