//can return null byte[]
private byte[] handleEmbeddedPOIFS(InputStream is, Metadata metadata,
AtomicInteger unknownFilenameCount)
throws IOException {
NPOIFSFileSystem fs = null;
byte[] ret = null;
try {
fs = new NPOIFSFileSystem(is);
DirectoryNode root = fs.getRoot();
if (root == null) {
return ret;
}
if (root.hasEntry("Package")){
Entry ooxml = root.getEntry("Package");
TikaInputStream stream = TikaInputStream.get(new DocumentInputStream((DocumentEntry) ooxml));
ByteArrayOutputStream out = new ByteArrayOutputStream();
IOUtils.copy(stream, out);
ret = out.toByteArray();
} else {
//try poifs
POIFSDocumentType type = POIFSDocumentType.detectType(root);
if (type == POIFSDocumentType.OLE10_NATIVE) {
try {
// Try to un-wrap the OLE10Native record:
Ole10Native ole = Ole10Native.createFromEmbeddedOleObject(root);
ret = ole.getDataBuffer();
} catch (Ole10NativeException ex) {
// Not a valid OLE10Native record, skip it
}
} else if (type == POIFSDocumentType.COMP_OBJ) {
DocumentEntry contentsEntry;
try {
contentsEntry = (DocumentEntry)root.getEntry("CONTENTS");
} catch (FileNotFoundException ioe) {
contentsEntry = (DocumentEntry)root.getEntry("Contents");
}
DocumentInputStream inp = null;
try {
inp = new DocumentInputStream(contentsEntry);
ret = new byte[contentsEntry.getSize()];
inp.readFully(ret);
} finally {
if (inp != null) {
inp.close();
}
}
} else {
ByteArrayOutputStream out = new ByteArrayOutputStream();
is.reset();
IOUtils.copy(is, out);
ret = out.toByteArray();
metadata.set(Metadata.RESOURCE_NAME_KEY, "file_"+unknownFilenameCount.getAndIncrement() + "."+type.getExtension());
metadata.set(Metadata.CONTENT_TYPE, type.getType().toString());
}
}
} finally {
if (fs != null) {
fs.close();
}
}
return ret;
}