public String parseOnePartToHTML() throws IOException, SAXException, TikaException {
// Only get things under html -> body -> div (class=header)
XPathParser xhtmlParser = new XPathParser("xhtml", XHTMLContentHandler.XHTML);
Matcher divContentMatcher = xhtmlParser.parse(
"/xhtml:html/xhtml:body/xhtml:div/descendant::node()");
ContentHandler handler = new MatchingContentHandler(
new ToXMLContentHandler(), divContentMatcher);
InputStream stream = ContentHandlerExample.class.getResourceAsStream("test2.doc");
AutoDetectParser parser = new AutoDetectParser();
Metadata metadata = new Metadata();
try {
parser.parse(stream, handler, metadata);
return handler.toString();
} finally {
stream.close();
}
}