Examples of org.htmlparser.parserapplications.StringExtractor

    doc.setAuthor(author); doc.setMetadata(metadata);
   } // eod of if


   //*-- Populate the contents of the contents with the entire text from the  web page
   logger.info("Extracting text from body of html file " + ifile);
   StringExtractor st = new StringExtractor(ifile); 


   //*-- string extractor does not input form values -- handle separately
   parser.setInputHTML(htmlcontents); StringBuffer inputVal = new StringBuffer();
   NodeList nodelist3 = parser.parse(new TagNameFilter ("INPUT"));
   for (int i = 0; i < nodelist3.size(); i++)
   { InputTag itag = (InputTag) nodelist3.elementAt(i); 
   if ((itag != null) && (itag.getAttribute("value") != null) )
   { inputVal.append(" "); inputVal.append( itag.getAttribute("value") ); }
   }


   //*-- finally set the contents of the document
   doc.setContents( new StringBuffer(cleanHTML( st.extractStrings(false)) + " " + inputVal) ); 
   doc.setFileName(ifile);


  } //*-- end of try block
  catch (OutOfMemoryError exc) 
  { logger.error("Ran out of memory for " + ifile + " or could be corrupt file " + exc.getMessage()); }

Examples of org.htmlparser.parserapplications.StringExtractor

Related Classes of org.htmlparser.parserapplications.StringExtractor