protected void handleEmbededOfficeDoc(
DirectoryEntry dir, XHTMLContentHandler xhtml)
throws IOException, SAXException, TikaException {
// Is it an embedded OLE2 document, or an embedded OOXML document?
try {
Entry ooxml = dir.getEntry("Package");
// It's OOXML
TikaInputStream stream = TikaInputStream.get(
new DocumentInputStream((DocumentEntry) ooxml));
try {
ZipContainerDetector detector = new ZipContainerDetector();
MediaType type = detector.detect(stream, new Metadata());
handleEmbeddedResource(stream, null, type.toString(), xhtml, true);
return;
} finally {
stream.close();
}
} catch(FileNotFoundException e) {
// It's regular OLE2
}
// Need to dump the directory out to a new temp file, so
// it's stand along
POIFSFileSystem newFS = new POIFSFileSystem();
copy(dir, newFS.getRoot());
File tmpFile = File.createTempFile("tika", ".ole2");
try {
FileOutputStream out = new FileOutputStream(tmpFile);
newFS.writeFilesystem(out);
out.close();
// What kind of document is it?
Metadata metadata = new Metadata();
POIFSDocumentType type = POIFSDocumentType.detectType(dir);
TikaInputStream embedded;
if (type==POIFSDocumentType.OLE10_NATIVE) {
Entry entry = dir.getEntry(Ole10Native.OLE10_NATIVE);
ByteArrayOutputStream bos = new ByteArrayOutputStream();
IOUtils.copy(new DocumentInputStream((DocumentEntry) entry), bos);
byte[] data = bos.toByteArray();
try {