public void parse(
InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context)
throws SAXException, IOException, TikaException {
EmbeddedDocumentExtractor extractor =
new EmbeddedDocumentExtractor(context);
try {
File file = TikaInputStream.get(stream).getFile();
Archive archive = new Archive(file);
metadata.set(Metadata.CONTENT_TYPE, TYPE.toString());
XHTMLContentHandler xhtml =
new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
for (FileHeader header : archive.getFileHeaders()) {
Metadata entrydata = new Metadata();
entrydata.set(
Metadata.RESOURCE_NAME_KEY,
header.getFileNameString());
if (extractor.shouldParseEmbedded(entrydata)) {
extractor.parseEmbedded(stream, xhtml, entrydata, true);
}
}
xhtml.endDocument();
} catch (RarException e) {
throw new TikaException("Unable to parse a RAR archive", e);