@Test
public void testInlineConfig() throws Exception {
Parser defaultParser = new AutoDetectParser();
RecursiveParserWrapper p = new RecursiveParserWrapper(defaultParser,
new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.IGNORE, -1));
ParseContext context = new ParseContext();
context.set(org.apache.tika.parser.Parser.class, p);
Metadata metadata = new Metadata();
ContentHandler handler = new BodyContentHandler(-1);
String path = "/test-documents/testPDF_childAttachments.pdf";
InputStream stream = TikaInputStream.get(this.getClass().getResource(path));
p.parse(stream, handler, metadata, context);
List<Metadata> metadatas = p.getMetadata();
int inline = 0;
int attach = 0;
for (Metadata m : metadatas) {
String v = m.get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE);
if (v != null) {
if (v.equals(TikaCoreProperties.EmbeddedResourceType.INLINE.toString())){
inline++;
} else if (v.equals(TikaCoreProperties.EmbeddedResourceType.ATTACHMENT.toString())){
attach++;
}
}
}
assertEquals(0, inline);
assertEquals(2, attach);
stream.close();
p.reset();
//now try turning off inline
stream = TikaInputStream.get(this.getClass().getResource(path));
PDFParserConfig config = new PDFParserConfig();
config.setExtractInlineImages(true);
config.setExtractUniqueInlineImagesOnly(false);
context.set(org.apache.tika.parser.pdf.PDFParserConfig.class, config);
inline = 0;
attach = 0;
handler = new BodyContentHandler(-1);
metadata = new Metadata();
p.parse(stream, handler, metadata, context);
metadatas = p.getMetadata();
for (Metadata m : metadatas) {
String v = m.get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE);
if (v != null) {
if (v.equals(TikaCoreProperties.EmbeddedResourceType.INLINE.toString())){
inline++;