Package org.apache.tika.extractor

Examples of org.apache.tika.extractor.ContainerExtractor


       assertTrue(needle > -1);
       assertTrue(needle > pdfHaystack && pdfHaystack > outerHaystack);
      
       TrackingHandler tracker = new TrackingHandler();
       TikaInputStream tis;
       ContainerExtractor ex = new ParserContainerExtractor();
       try{
          tis= TikaInputStream.get(getResourceAsStream("/test-documents/testPDFEmbeddingAndEmbedded.docx"));
          ex.extract(tis, ex, tracker);
       } finally {
          stream.close();
       }
       assertEquals(true, ex.isSupported(tis));
       assertEquals(3, tracker.filenames.size());
       assertEquals(3, tracker.mediaTypes.size());
       assertEquals("image1.emf", tracker.filenames.get(0));
       assertNull(tracker.filenames.get(1));
       assertEquals("Test.docx", tracker.filenames.get(2));
View Full Code Here


     * Check the Rtf and Attachments are returned
     *  as expected
     */
   @Test
    public void testBodyAndAttachments() throws Exception {
       ContainerExtractor extractor = new ParserContainerExtractor();
      
       // Process it with recursing
       // Will have the message body RTF and the attachments
       TrackingHandler handler = process(file, extractor, true);
       assertEquals(6, handler.filenames.size());
View Full Code Here

TOP

Related Classes of org.apache.tika.extractor.ContainerExtractor

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.