doc.setAuthor(author); doc.setMetadata(metadata);
} // eod of if
//*-- Populate the contents of the contents with the entire text from the web page
logger.info("Extracting text from body of html file " + ifile);
StringExtractor st = new StringExtractor(ifile);
//*-- string extractor does not input form values -- handle separately
parser.setInputHTML(htmlcontents); StringBuffer inputVal = new StringBuffer();
NodeList nodelist3 = parser.parse(new TagNameFilter ("INPUT"));
for (int i = 0; i < nodelist3.size(); i++)
{ InputTag itag = (InputTag) nodelist3.elementAt(i);
if ((itag != null) && (itag.getAttribute("value") != null) )
{ inputVal.append(" "); inputVal.append( itag.getAttribute("value") ); }
}
//*-- finally set the contents of the document
doc.setContents( new StringBuffer(cleanHTML( st.extractStrings(false)) + " " + inputVal) );
doc.setFileName(ifile);
} //*-- end of try block
catch (OutOfMemoryError exc)
{ logger.error("Ran out of memory for " + ifile + " or could be corrupt file " + exc.getMessage()); }