// Simple text extraction
String xml = tika.parseToString(new File("pom.xml"));
assertTrue(xml.contains("tika-bundle"));
// Package extraction
ContentHandler handler = new BodyContentHandler();
Parser parser = tika.getParser();
ParseContext context = new ParseContext();
context.set(Parser.class, parser);
InputStream stream =
new FileInputStream("src/test/resources/test-documents.zip");
try {
parser.parse(stream, handler, new Metadata(), context);
} finally {
stream.close();
}
String content = handler.toString();
assertTrue(content.contains("testEXCEL.xls"));
assertTrue(content.contains("Sample Excel Worksheet"));
assertTrue(content.contains("testHTML.html"));
assertTrue(content.contains("Test Indexation Html"));
assertTrue(content.contains("testOpenOffice2.odt"));