Package org.apache.tika.metadata

Examples of org.apache.tika.metadata.Metadata


    }

    public void testUTF8Text() throws Exception {
        String text = "I\u00F1t\u00EBrn\u00E2ti\u00F4n\u00E0liz\u00E6ti\u00F8n";

        Metadata metadata = new Metadata();
        StringWriter writer = new StringWriter();
        parser.parse(
                new ByteArrayInputStream(text.getBytes("UTF-8")),
                new WriteOutContentHandler(writer),
                metadata);
        String content = writer.toString();

        assertEquals("text/plain", metadata.get(Metadata.CONTENT_TYPE));
        assertEquals("UTF-8", metadata.get(Metadata.CONTENT_ENCODING));

        assertTrue(content.contains(text));
    }
View Full Code Here


        assertTrue(content.contains(text));
    }

    public void testEmptyText() throws Exception {
        Metadata metadata = new Metadata();
        StringWriter writer = new StringWriter();
        parser.parse(
                new ByteArrayInputStream(new byte[0]),
                new WriteOutContentHandler(writer),
                metadata);
        String content = writer.toString();
        assertEquals("text/plain", metadata.get(Metadata.CONTENT_TYPE));
        assertEquals("", content);
    }
View Full Code Here

    public void testExcelParser() throws Exception {
        InputStream input = ExcelParserTest.class.getResourceAsStream(
                "/test-documents/testEXCEL.xls");
        try {
            Metadata metadata = new Metadata();
            StringWriter writer = new StringWriter();
            ContentHandler handler = new WriteOutContentHandler(writer);
            new ExcelParser().parse(input, handler, metadata);

            assertEquals(
                    "application/vnd.ms-excel",
                    metadata.get(Metadata.CONTENT_TYPE));
            assertEquals("Simple Excel document", metadata.get(Metadata.TITLE));
            assertEquals("Keith Bennett", metadata.get(Metadata.AUTHOR));
            String content = writer.toString();
            assertTrue(content.contains("Sample Excel Worksheet"));
            assertTrue(content.contains("Numbers and their Squares"));
            assertTrue(content.contains("9.0"));
            assertTrue(content.contains("196.0"));
View Full Code Here

    public void testParseAscii() throws IOException, SAXException,
            TikaException {

        StringWriter writer = new StringWriter();
        Metadata metadata = new Metadata();

        parser.parse(getStream("test-documents/testHTML.html"),
                new WriteOutContentHandler(writer), metadata);
        String content = writer.toString();
View Full Code Here

    public void testWordParser() throws Exception {
        InputStream input = WordParserTest.class.getResourceAsStream(
                "/test-documents/testWORD.doc");
        try {
            Metadata metadata = new Metadata();
            StringWriter writer = new StringWriter();
            ContentHandler handler = new WriteOutContentHandler(writer);
            new WordParser().parse(input, handler, metadata);

            assertEquals(
                    "application/msword",
                    metadata.get(Metadata.CONTENT_TYPE));
            assertEquals("Sample Word Document", metadata.get(Metadata.TITLE));
            assertEquals("Keith Bennett", metadata.get(Metadata.AUTHOR));
            String content = writer.toString();
            assertTrue(content.contains("Sample Word Document"));
        } finally {
            input.close();
        }
View Full Code Here

    }

    public void XtestParseUTF8() throws IOException, SAXException, TikaException {

        StringWriter writer = new StringWriter();
        Metadata metadata = new Metadata();

        parser.parse(getStream("test-documents/testHTML_utf8.html"),
                new WriteOutContentHandler(writer), metadata);
        String content = writer.toString();
View Full Code Here

                .contains("åäö"));

    }

    public void testParseEmpty() throws Exception {
        Metadata metadata = new Metadata();
        StringWriter writer = new StringWriter();
        parser.parse(new ByteArrayInputStream(new byte[0]),
                new WriteOutContentHandler(writer), metadata);
        String content = writer.toString();
        assertEquals("", content);
View Full Code Here

    public void testPowerPointParser() throws Exception {
        InputStream input = PowerPointParserTest.class.getResourceAsStream(
                "/test-documents/testPPT.ppt");
        try {
            Metadata metadata = new Metadata();
            StringWriter writer = new StringWriter();
            ContentHandler handler = new WriteOutContentHandler(writer);
            new PowerPointParser().parse(input, handler, metadata);

            assertEquals(
                    "application/vnd.ms-powerpoint",
                    metadata.get(Metadata.CONTENT_TYPE));
            assertEquals("Sample Powerpoint Slide", metadata.get(Metadata.TITLE));
            assertEquals("Keith Bennett", metadata.get(Metadata.AUTHOR));
            String content = writer.toString();
            assertTrue(content.contains("Sample Powerpoint Slide"));
            assertTrue(content.contains("Powerpoint X for Mac"));
        } finally {
            input.close();
View Full Code Here

        String s1 = ParseUtils.getStringContent(file, tc);
        String s2 = ParseUtils.getStringContent(
                file, tc, "application/vnd.ms-powerpoint");
        assertEquals(s1, s2);
        Parser parser = tc.getParser("application/vnd.ms-powerpoint");
        Metadata metadata = new Metadata();
        InputStream stream = new FileInputStream(file);
        try {
            parser.parse(stream, new DefaultHandler(), metadata);
        } finally {
            stream.close();
        }
        assertEquals("Sample Powerpoint Slide", metadata.get(Metadata.TITLE));
    }
View Full Code Here

        File file = getTestFile("testWORD.doc");
        String s1 = ParseUtils.getStringContent(file, tc);
        String s2 = ParseUtils.getStringContent(file, tc, "application/msword");
        assertEquals(s1, s2);
        Parser parser = tc.getParser("application/msword");
        Metadata metadata = new Metadata();
        InputStream stream = new FileInputStream(file);
        try {
            parser.parse(stream, new DefaultHandler(), metadata);
        } finally {
            stream.close();
        }
        assertEquals("Sample Word Document", metadata.get(Metadata.TITLE));
    }
View Full Code Here

TOP

Related Classes of org.apache.tika.metadata.Metadata

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.