Package org.apache.tika

Examples of org.apache.tika.Tika


    @Before
    public void setUp() throws Exception{
        TikaConfig config = TikaConfig.getDefaultConfig();
        repo = config.getMimeRepository();
        tika = new Tika(config);
        u = new URL("http://mydomain.com/x.pdf?x=y");
    }
View Full Code Here


        assertTrue("Bundle bundle not found", hasBundle);
    }
   
    @Test
    public void testBundleDetection(BundleContext bc) throws Exception {
        Tika tika = new Tika();

        // Simple type detection
        assertEquals("text/plain", tika.detect("test.txt"));
        assertEquals("application/pdf", tika.detect("test.pdf"));
    }
View Full Code Here

    }

    @Ignore // TODO Fix this test
    @Test
    public void testBundleSimpleText(BundleContext bc) throws Exception {
        Tika tika = new Tika();
       
        // Simple text extraction
        String xml = tika.parseToString(new File("pom.xml"));
        assertTrue(xml.contains("tika-bundle"));
    }
View Full Code Here

    }
   
    @Ignore // TODO Fix this test
    @Test
    public void testTikaBundle(BundleContext bc) throws Exception {
        Tika tika = new Tika();

        // Package extraction
        ContentHandler handler = new BodyContentHandler();

        Parser parser = tika.getParser();
        ParseContext context = new ParseContext();
        context.set(Parser.class, parser);

        InputStream stream =
                new FileInputStream("src/test/resources/test-documents.zip");
View Full Code Here

        out.println("    ports you specify as one or more arguments.");
        out.println();
    }

    private void version() {
        System.out.println(new Tika().toString());
    }
View Full Code Here

    public static String getStringContent(
            InputStream stream, TikaConfig config, String mimeType)
            throws TikaException, IOException {
        Metadata metadata = new Metadata();
        metadata.set(Metadata.CONTENT_TYPE, mimeType);
        return new Tika(config).parseToString(stream, metadata);
    }
View Full Code Here

     * @return the string content parsed from the document
     * @deprecated Use the {@link Tika#parseToString(URL)} method
     */
    public static String getStringContent(URL documentUrl, TikaConfig config)
            throws TikaException, IOException {
        return new Tika(config).parseToString(documentUrl);
    }
View Full Code Here

    public static String getStringContent(
            URL documentUrl, TikaConfig config, String mimeType)
            throws TikaException, IOException {
        Metadata metadata = new Metadata();
        InputStream stream = TikaInputStream.get(documentUrl, metadata);
        return new Tika(config).parseToString(stream, metadata);
    }
View Full Code Here

    public static String getStringContent(
            File documentFile, TikaConfig config, String mimeType)
            throws TikaException, IOException {
        Metadata metadata = new Metadata();
        InputStream stream = TikaInputStream.get(documentFile, metadata);
        return new Tika(config).parseToString(stream, metadata);
    }
View Full Code Here

     * @return the string content parsed from the document
     * @deprecated Use the {@link Tika#parseToString(File)} method
     */
    public static String getStringContent(File documentFile, TikaConfig config)
            throws TikaException, IOException {
        return new Tika(config).parseToString(documentFile);
    }
View Full Code Here

TOP

Related Classes of org.apache.tika.Tika

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.