Package org.apache.tika.parser

Examples of org.apache.tika.parser.Parser


    /**
     * Test that with only ID3v1 tags, we get some information out  
     */
    public void testMp3ParsingID3v1() throws Exception {
        Parser parser = new AutoDetectParser(); // Should auto-detect!
        ContentHandler handler = new BodyContentHandler();
        Metadata metadata = new Metadata();

        InputStream stream = Mp3ParserTest.class.getResourceAsStream(
                "/test-documents/testMP3id3v1.mp3");
        try {
            parser.parse(stream, handler, metadata);
        } finally {
            stream.close();
        }

        assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
View Full Code Here


                Object instance = parserClass.newInstance();
                if (!(instance instanceof Parser)) {
                    throw new TikaException(
                            "Configured class is not a Tika Parser: " + name);
                }
                Parser parser = (Parser) instance;

                NodeList mimes = node.getElementsByTagName("mime");
                if (mimes.getLength() > 0) {
                    for (int j = 0; j < mimes.getLength(); j++) {
                        parsers.put(getText(mimes.item(j)).trim(), parser);
                    }
                } else {
                    ParseContext context = new ParseContext();
                    for (MediaType type : parser.getSupportedTypes(context)) {
                        parsers.put(type.toString(), parser);
                    }
                }
            } catch (ClassNotFoundException e) {
                throw new TikaException(
View Full Code Here

    public TikaConfig() throws MimeTypeException, IOException {
        ParseContext context = new ParseContext();
        Iterator<Parser> iterator =
            ServiceRegistry.lookupProviders(Parser.class);
        while (iterator.hasNext()) {
            Parser parser = iterator.next();
            for (MediaType type : parser.getSupportedTypes(context)) {
                parsers.put(type.toString(), parser);
            }
        }
        mimeTypes = MimeTypesFactory.create("tika-mimetypes.xml");
    }
View Full Code Here

    /**
     * Test that with only ID3v2 tags, we get the full
     *  set of information out.
     */
    public void testMp3ParsingID3v2() throws Exception {
        Parser parser = new AutoDetectParser(); // Should auto-detect!
        ContentHandler handler = new BodyContentHandler();
        Metadata metadata = new Metadata();

        InputStream stream = Mp3ParserTest.class.getResourceAsStream(
                "/test-documents/testMP3id3v2.mp3");
        try {
            parser.parse(stream, handler, metadata);
        } finally {
            stream.close();
        }

        assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
View Full Code Here

    /**
     * Test that with both id3v2 and id3v1, we prefer the
     *  details from id3v2
     */
    public void testMp3ParsingID3v1v2() throws Exception {
        Parser parser = new AutoDetectParser(); // Should auto-detect!
        ContentHandler handler = new BodyContentHandler();
        Metadata metadata = new Metadata();

        InputStream stream = Mp3ParserTest.class.getResourceAsStream(
                "/test-documents/testMP3id3v1_v2.mp3");
        try {
            parser.parse(stream, handler, metadata);
        } finally {
            stream.close();
        }

        assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
View Full Code Here

* Test case for parsing Outlook files.
*/
public class OutlookParserTest extends TestCase {

    public void testOutlookParsing() throws Exception {
        Parser parser = new AutoDetectParser(); // Should auto-detect!
        ContentHandler handler = new BodyContentHandler();
        Metadata metadata = new Metadata();

        InputStream stream = OutlookParserTest.class.getResourceAsStream(
                "/test-documents/test-outlook.msg");
        try {
            parser.parse(stream, handler, metadata);
        } finally {
            stream.close();
        }

        assertEquals(
View Full Code Here

     * Test case for TIKA-197
     *
     * @see <a href="https://issues.apache.org/jira/browse/TIKA-197">TIKA-197</a>
     */
    public void testMultipleCopies() throws Exception {
        Parser parser = new AutoDetectParser();
        ContentHandler handler = new BodyContentHandler();
        Metadata metadata = new Metadata();

        InputStream stream = OutlookParserTest.class.getResourceAsStream(
                "/test-documents/testMSG.msg");
        try {
            parser.parse(stream, handler, metadata);
        } finally {
            stream.close();
        }

        assertEquals(
View Full Code Here

     * Test case for TIKA-395, to ensure parser works for new Outlook formats.
     *
     * @see <a href="https://issues.apache.org/jira/browse/TIKA-395">TIKA-395</a>
     */
    public void testOutlookNew() throws Exception {
        Parser parser = new AutoDetectParser();
        ContentHandler handler = new BodyContentHandler();
        Metadata metadata = new Metadata();

        InputStream stream = OutlookParserTest.class.getResourceAsStream(
                "/test-documents/test-outlook2003.msg");
        try {
            parser.parse(stream, handler, metadata);
        } finally {
            stream.close();
        }

        assertEquals(
View Full Code Here

    public void testExcel() throws Exception {
        InputStream input = OOXMLParserTest.class
                .getResourceAsStream("/test-documents/testEXCEL.xlsx");

        Parser parser = new AutoDetectParser();
        Metadata metadata = new Metadata();
        // TODO: should auto-detect without the resource name
        metadata.set(Metadata.RESOURCE_NAME_KEY, "testEXCEL.xlsx");
        ContentHandler handler = new BodyContentHandler();

        try {
            parser.parse(input, handler, metadata);

            assertEquals(
                    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                    metadata.get(Metadata.CONTENT_TYPE));
            assertEquals("Simple Excel document", metadata.get(Metadata.TITLE));
View Full Code Here

    public void testExcelFormats() throws Exception {
        InputStream input = OOXMLParserTest.class
                .getResourceAsStream("/test-documents/testEXCEL-formats.xlsx");

        Parser parser = new AutoDetectParser();
        Metadata metadata = new Metadata();
        // TODO: should auto-detect without the resource name
        metadata.set(Metadata.RESOURCE_NAME_KEY, "testEXCEL-formats.xlsx");
        ContentHandler handler = new BodyContentHandler();

        try {
            parser.parse(input, handler, metadata);

            assertEquals(
                    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                    metadata.get(Metadata.CONTENT_TYPE));
View Full Code Here

TOP

Related Classes of org.apache.tika.parser.Parser

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.