}
public static POITextExtractor createExtractor(DirectoryNode poifsDir, POIFSFileSystem fs) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
// Look for certain entries in the stream, to figure it
// out from
for(Iterator<Entry> entries = poifsDir.getEntries(); entries.hasNext(); ) {
Entry entry = entries.next();
if(entry.getName().equals("Workbook")) {
if(getPreferEventExtractor()) {
return new EventBasedExcelExtractor(poifsDir, fs);
} else {
return new ExcelExtractor(poifsDir, fs);
}
}
if(entry.getName().equals("WordDocument")) {
// Old or new style word document?
try {
return new WordExtractor(poifsDir, fs);
} catch(OldWordFileFormatException e) {
return new Word6Extractor(poifsDir, fs);
}
}
if(entry.getName().equals("PowerPoint Document")) {
return new PowerPointExtractor(poifsDir, fs);
}
if(entry.getName().equals("VisioDocument")) {
return new VisioTextExtractor(poifsDir, fs);
}
if(entry.getName().equals("Quill")) {
return new PublisherTextExtractor(poifsDir, fs);
}
if(
entry.getName().equals("__substg1.0_1000001E") ||
entry.getName().equals("__substg1.0_1000001F") ||
entry.getName().equals("__substg1.0_0047001E") ||
entry.getName().equals("__substg1.0_0047001F") ||
entry.getName().equals("__substg1.0_0037001E") ||
entry.getName().equals("__substg1.0_0037001F")
) {
return new OutlookTextExtactor(poifsDir, fs);
}
if(entry.getName().equals("Package")) {
OPCPackage pkg = OPCPackage.open(
poifsDir.createDocumentInputStream(entry.getName())
);
return createExtractor(pkg);
}
}
throw new IllegalArgumentException("No supported documents found in the OLE2 stream");