PDXObject object = entry.getValue();
if (object instanceof PDXObjectForm) {
extractImages(((PDXObjectForm) object).getResources());
} else if (object instanceof PDXObjectImage) {
PDXObjectImage image = (PDXObjectImage) object;
Metadata metadata = new Metadata();
String extension = "";
if (image instanceof PDJpeg) {
metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
extension = ".jpg";
} else if (image instanceof PDCcitt) {
metadata.set(Metadata.CONTENT_TYPE, "image/tiff");
extension = ".tif";
} else if (image instanceof PDPixelMap) {
metadata.set(Metadata.CONTENT_TYPE, "image/png");
extension = ".png";
}
Integer imageNumber = processedInlineImages.get(entry.getKey());
if (imageNumber == null) {
imageNumber = inlineImageCounter++;
}
String fileName = "image"+imageNumber+extension;
metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
// Output the img tag
AttributesImpl attr = new AttributesImpl();
attr.addAttribute("", "src", "src", "CDATA", "embedded:" + fileName);
attr.addAttribute("", "alt", "alt", "CDATA", fileName);
handler.startElement("img", attr);
handler.endElement("img");
//Do we only want to process unique COSObject ids?
//If so, have we already processed this one?
if (config.getExtractUniqueInlineImagesOnly() == true) {
String cosObjectId = entry.getKey();
if (processedInlineImages.containsKey(cosObjectId)){
continue;
}
processedInlineImages.put(cosObjectId, imageNumber);
}
metadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
TikaCoreProperties.EmbeddedResourceType.INLINE.toString());
EmbeddedDocumentExtractor extractor =
getEmbeddedDocumentExtractor();
if (extractor.shouldParseEmbedded(metadata)) {
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
try {
image.write2OutputStream(buffer);
image.clear();
extractor.parseEmbedded(
new ByteArrayInputStream(buffer.toByteArray()),
new EmbeddedContentHandler(handler),
metadata, false);
} catch (IOException e) {