InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
metadata.set(Metadata.CONTENT_TYPE, "application/x-gzip");
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
// At the end we want to close the gzip stream to release any associated
// resources, but the underlying document stream should not be closed
InputStream gzip =
new GZIPInputStream(new CloseShieldInputStream(stream));
try {
Metadata entrydata = new Metadata();
String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
if (name != null && name.length() > 0) {
entrydata.set(
Metadata.RESOURCE_NAME_KEY,
GzipUtils.getUncompressedFilename(name));
}
// Use the delegate parser to parse the compressed document
super.parse(
new CloseShieldInputStream(gzip),
new EmbeddedContentHandler(
new BodyContentHandler(xhtml)),
entrydata, context);
} finally {
gzip.close();
}
xhtml.endDocument();
}