stream = new CloseShieldInputStream(stream);
// Ensure that the stream supports the mark feature
stream = new BufferedInputStream(stream);
ArchiveInputStream ais;
try {
ArchiveStreamFactory factory = new ArchiveStreamFactory();
ais = factory.createArchiveInputStream(stream);
} catch (ArchiveException e) {
throw new TikaException("Unable to unpack document stream", e);
}
MediaType type = getMediaType(ais);
if (!type.equals(MediaType.OCTET_STREAM)) {
metadata.set(CONTENT_TYPE, type.toString());
}
// Use the delegate parser to parse the contained document
EmbeddedDocumentExtractor extractor = context.get(
EmbeddedDocumentExtractor.class,
new ParsingEmbeddedDocumentExtractor(context));
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
try {
ArchiveEntry entry = ais.getNextEntry();
while (entry != null) {
if (!entry.isDirectory()) {
parseEntry(ais, entry, extractor, xhtml);
}
entry = ais.getNextEntry();
}
} finally {
ais.close();
}
xhtml.endDocument();
}