if (names != null) {
PDEmbeddedFilesNameTreeNode embeddedFiles = names.getEmbeddedFiles();
if (embeddedFiles != null) {
EmbeddedDocumentExtractor embeddedExtractor = context.get(EmbeddedDocumentExtractor.class);
if (embeddedExtractor == null) {
embeddedExtractor = new ParsingEmbeddedDocumentExtractor(context);
}
Map<String, COSObjectable> embeddedFileNames = embeddedFiles.getNames();
if (embeddedFileNames != null) {
for (Map.Entry<String,COSObjectable> ent : embeddedFileNames.entrySet()) {
PDComplexFileSpecification spec = (PDComplexFileSpecification) ent.getValue();
PDEmbeddedFile file = spec.getEmbeddedFile();
Metadata metadata = new Metadata();
// TODO: other metadata?
metadata.set(Metadata.RESOURCE_NAME_KEY, ent.getKey());
metadata.set(Metadata.CONTENT_TYPE, file.getSubtype());
metadata.set(Metadata.CONTENT_LENGTH, Long.toString(file.getSize()));
if (embeddedExtractor.shouldParseEmbedded(metadata)) {
TikaInputStream stream = TikaInputStream.get(file.createInputStream());
try {
embeddedExtractor.parseEmbedded(
stream,
new EmbeddedContentHandler(handler),
metadata, false);
} finally {
stream.close();