assertContains("<div class=\"embedded\" id=\"test1.txt\" />", xml);
assertContains("<div class=\"embedded\" id=\"test2.txt\" />", xml);
// Also make sure EMBEDDED_RELATIONSHIP_ID was
// passed when parsing the embedded docs:
Parser parser = new AutoDetectParser();
ParseContext context = new ParseContext();
context.set(Parser.class, parser);
GatherRelIDsDocumentExtractor relIDs = new GatherRelIDsDocumentExtractor();
context.set(EmbeddedDocumentExtractor.class, relIDs);
InputStream input = getResourceAsStream("/test-documents/testEmbedded.zip");
try {
parser.parse(input,
new BodyContentHandler(),
new Metadata(),
context);
} finally {
input.close();