if (!ExternalParser.check(checkCmd))
return;
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
TemporaryResources tmp = new TemporaryResources();
File output = null;
try {
TikaInputStream tikaStream = TikaInputStream.get(stream, tmp);
File input = tikaStream.getFile();
long size = tikaStream.getLength();
if (size >= config.getMinFileSizeToOcr() && size <= config.getMaxFileSizeToOcr()) {
output = tmp.createTemporaryFile();
doOCR(input, output, config);
// Tesseract appends .txt to output file name
output = new File(output.getAbsolutePath() + ".txt");
if (output.exists())
extractOutput(new FileInputStream(output), xhtml);
}
} finally {
tmp.dispose();
if (output != null)
output.delete();
}
}