public void testExcel() throws Exception {
InputStream input = OOXMLParserTest.class
.getResourceAsStream("/test-documents/testEXCEL.xlsx");
Parser parser = new AutoDetectParser();
Metadata metadata = new Metadata();
// TODO: should auto-detect without the resource name
metadata.set(Metadata.RESOURCE_NAME_KEY, "testEXCEL.xlsx");
ContentHandler handler = new BodyContentHandler();
try {
parser.parse(input, handler, metadata);
assertEquals(
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Simple Excel document", metadata.get(Metadata.TITLE));
assertEquals("Keith Bennett", metadata.get(Metadata.AUTHOR));
String content = handler.toString();
assertTrue(content.contains("Sample Excel Worksheet"));
assertTrue(content.contains("Numbers and their Squares"));
assertTrue(content.contains("9"));
assertFalse(content.contains("9.0"));
assertTrue(content.contains("196"));
assertFalse(content.contains("196.0"));
assertEquals("false", metadata.get(TikaMetadataKeys.PROTECTED));
} finally {
input.close();
}
}