public Document getDocument(File file, File htdocsDumpDir) throws IOException {
Document document;
try {
document = super.getDocument(file, htdocsDumpDir);
HTMLParser parser = HTMLParserFactory.newInstance(file);
parser.parse(file);
document.add(Field.Text("title", parser.getTitle()));
document.add(Field.Text("keywords", parser.getKeywords()));
document.add(Field.Text("contents", parser.getReader()));
} catch (final IOException e) {
throw new IOException(e.toString());
} catch (final ParseException e) {
throw new IOException(e.toString());
}