Package org.apache.tika.metadata

Examples of org.apache.tika.metadata.Metadata


        "application/vnd.ms-excel");
        assertEquals(s1, s2);
        assertTrue("Text does not contain '" + expected + "'", s1
                .contains(expected));
        Parser parser = tc.getParser("application/vnd.ms-excel");
        Metadata metadata = new Metadata();
        InputStream stream = new FileInputStream(file);
        try {
            parser.parse(stream, new DefaultHandler(), metadata);
        } finally {
            stream.close();
        }
        assertEquals("Simple Excel document", metadata.get(Metadata.TITLE));
    }
View Full Code Here


            throws TikaException, IOException {
        try {
            Parser parser = config.getParser(mimeType);
            StringWriter writer = new StringWriter();
            parser.parse(
                    stream, new WriteOutContentHandler(writer), new Metadata());
            return writer.toString();
        } catch (SAXException e) {
            throw new TikaException("Unexpected SAX error", e);
        }
    }
View Full Code Here

            type = TEXT;
        } else if (arg.equals("-m") || arg.equals("--metadata")) {
            type = METADATA;
        } else {
            pipeMode = false;
            metadata = new Metadata();
            if (arg.equals("-")) {
                parser.parse(
                        System.in, type.getContentHandler(),
                        metadata, context);
            } else {
View Full Code Here

            fail("Could not open stream from specified resource: "
                    + tp.resourceRealName);
        }

        try {
            Metadata metadata = new Metadata();
            metadata.set(Metadata.RESOURCE_NAME_KEY, tp.resourceStatedName);
            metadata.set(Metadata.CONTENT_TYPE, tp.statedType);
            ContentHandler handler = new BodyContentHandler();
            new AutoDetectParser().parse(input, handler, metadata);

            assertEquals("Bad content type: " + tp,
                    tp.realType, metadata.get(Metadata.CONTENT_TYPE));

            assertTrue("Expected content not found: " + tp,
                    handler.toString().contains(tp.expectedContentFragment));
        } finally {
            input.close();
View Full Code Here

     */
    public void testZipBombPrevention() throws Exception {
        InputStream tgz = AutoDetectParserTest.class.getResourceAsStream(
                "/test-documents/TIKA-216.tgz");
        try {
            Metadata metadata = new Metadata();
            ContentHandler handler = new BodyContentHandler(-1);
            new AutoDetectParser().parse(tgz, handler, metadata);
            fail("Zip bomb was not detected");
        } catch (TikaException e) {
            // expected
View Full Code Here

        String s1 = ParseUtils.getStringContent(file, tc);
        String s2 = ParseUtils.getStringContent(file, tc,
                "application/vnd.ms-powerpoint");
        assertEquals(s1, s2);
        Parser parser = tc.getParser("application/vnd.ms-powerpoint");
        Metadata metadata = new Metadata();
        InputStream stream = new FileInputStream(file);
        try {
            parser.parse(stream, new DefaultHandler(), metadata);
        } finally {
            stream.close();
        }
        assertEquals("Sample Powerpoint Slide", metadata.get(Metadata.TITLE));
    }
View Full Code Here

        File file = getResourceAsFile("/test-documents/testWORD.doc");
        String s1 = ParseUtils.getStringContent(file, tc);
        String s2 = ParseUtils.getStringContent(file, tc, "application/msword");
        assertEquals(s1, s2);
        Parser parser = tc.getParser("application/msword");
        Metadata metadata = new Metadata();
        InputStream stream = new FileInputStream(file);
        try {
            parser.parse(stream, new DefaultHandler(), metadata);
        } finally {
            stream.close();
        }
        assertEquals("Sample Word Document", metadata.get(Metadata.TITLE));
    }
View Full Code Here

                "application/vnd.ms-excel");
        assertEquals(s1, s2);
        assertTrue("Text does not contain '" + expected + "'", s1
                .contains(expected));
        Parser parser = tc.getParser("application/vnd.ms-excel");
        Metadata metadata = new Metadata();
        InputStream stream = new FileInputStream(file);
        try {
            parser.parse(stream, new DefaultHandler(), metadata);
        } finally {
            stream.close();
        }
        assertEquals("Simple Excel document", metadata.get(Metadata.TITLE));
    }
View Full Code Here

     * Test that with only ID3v1 tags, we get some information out  
     */
    public void testMp3ParsingID3v1() throws Exception {
        Parser parser = new AutoDetectParser(); // Should auto-detect!
        ContentHandler handler = new BodyContentHandler();
        Metadata metadata = new Metadata();

        InputStream stream = Mp3ParserTest.class.getResourceAsStream(
                "/test-documents/testMP3id3v1.mp3");
        try {
            parser.parse(stream, handler, metadata);
        } finally {
            stream.close();
        }

        assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
        assertEquals("Test Title", metadata.get(Metadata.TITLE));
        assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));

        String content = handler.toString();
        assertTrue(content.contains("Test Title"));
        assertTrue(content.contains("Test Artist"));
        assertTrue(content.contains("Test Album"));
        assertTrue(content.contains("2008"));
        assertTrue(content.contains("Test Comment"));
        assertTrue(content.contains("Rock"));
       
        assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
        assertEquals("44100", metadata.get("samplerate"));
        assertEquals("2", metadata.get("channels"));
    }
View Full Code Here

     *  set of information out.
     */
    public void testMp3ParsingID3v2() throws Exception {
        Parser parser = new AutoDetectParser(); // Should auto-detect!
        ContentHandler handler = new BodyContentHandler();
        Metadata metadata = new Metadata();

        InputStream stream = Mp3ParserTest.class.getResourceAsStream(
                "/test-documents/testMP3id3v2.mp3");
        try {
            parser.parse(stream, handler, metadata);
        } finally {
            stream.close();
        }

        assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
        assertEquals("Test Title", metadata.get(Metadata.TITLE));
        assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));

        String content = handler.toString();
        assertTrue(content.contains("Test Title"));
        assertTrue(content.contains("Test Artist"));
        assertTrue(content.contains("Test Album"));
        assertTrue(content.contains("2008"));
        assertTrue(content.contains("Test Comment"));
        assertTrue(content.contains("Rock"));
       
        assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
        assertEquals("44100", metadata.get("samplerate"));
        assertEquals("2", metadata.get("channels"));
    }
View Full Code Here

TOP

Related Classes of org.apache.tika.metadata.Metadata

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.