Package org.apache.tika.fork

Examples of org.apache.tika.fork.ForkParser


     * If we supply a non serializable object on the ParseContext,
     *  check we get a helpful exception back
     */
    @Test
    public void testParserHandlingOfNonSerializable() throws Exception {
       ForkParser parser = new ForkParser(
             ForkParserIntegrationTest.class.getClassLoader(),
             tika.getParser());
      
       ParseContext context = new ParseContext();
       context.set(Detector.class, new Detector() {
          public MediaType detect(InputStream input, Metadata metadata) {
             return MediaType.OCTET_STREAM;
          }
       });

       try {
          ContentHandler output = new BodyContentHandler();
          InputStream stream = ForkParserIntegrationTest.class.getResourceAsStream(
              "/test-documents/testTXT.txt");
          parser.parse(stream, output, new Metadata(), context);
          fail("Should have blown up with a non serializable ParseContext");
       } catch(TikaException e) {
          // Check the right details
          assertNotNull(e.getCause());
          assertEquals(NotSerializableException.class, e.getCause().getClass());
          assertEquals("Unable to serialize ParseContext to pass to the Forked Parser", e.getMessage());
       } finally {
          parser.close();
       }
    }
View Full Code Here


    public void testAttachingADebuggerOnTheForkedParserShouldWork()
            throws Exception {
        ParseContext context = new ParseContext();
        context.set(Parser.class, tika.getParser());

        ForkParser parser = new ForkParser(
                ForkParserIntegrationTest.class.getClassLoader(),
                tika.getParser());
        parser.setJavaCommand(
                "java -Xmx32m -Xdebug -Xrunjdwp:"
                + "transport=dt_socket,address=54321,server=y,suspend=n");
        try {
            ContentHandler body = new BodyContentHandler();
            InputStream stream = ForkParserIntegrationTest.class.getResourceAsStream(
                    "/test-documents/testTXT.txt");
            parser.parse(stream, body, new Metadata(), context);
            String content = body.toString();
            assertTrue(content.contains("Test d'indexation"));
            assertTrue(content.contains("http://www.apache.org"));
        } finally {
            parser.close();
        }
    }
View Full Code Here

     * TIKA-808 - Ensure that parsing of our test PDFs work under
     * the Fork Parser, to ensure that complex parsing behaves
     */
    @Test
    public void testForkedPDFParsing() throws Exception {
        ForkParser parser = new ForkParser(
                ForkParserIntegrationTest.class.getClassLoader(),
                tika.getParser());
        try {
            ContentHandler output = new BodyContentHandler();
            InputStream stream = ForkParserIntegrationTest.class.getResourceAsStream(
                    "/test-documents/testPDF.pdf");
            ParseContext context = new ParseContext();
            parser.parse(stream, output, new Metadata(), context);

            String content = output.toString();
            assertTrue(content.contains("Apache Tika"));
            assertTrue(content.contains("Tika - Content Analysis Toolkit"));
            assertTrue(content.contains("incubator"));
            assertTrue(content.contains("Apache Software Foundation"));
        } finally {
            parser.close();
        }
    }
View Full Code Here

                Parser.class.getName(),
                parser,
                new Properties());
        forkParserService = context.registerService(
                ForkParser.class.getName(),
                new ForkParser(Activator.class.getClassLoader(), parser),
                new Properties());
    }
View Full Code Here

TOP

Related Classes of org.apache.tika.fork.ForkParser

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.