Package org.apache.tika.parser

Examples of org.apache.tika.parser.AutoDetectParser


     * See TIKA-437.
     */
    @Test
    public void testProtectedExcelFile() throws Exception {

        Parser parser = new AutoDetectParser();
        Metadata metadata = new Metadata();
        ContentHandler handler = new BodyContentHandler();
        ParseContext context = new ParseContext();

        InputStream input = getTestDocument("protectedFile.xlsx");
        try {
            parser.parse(input, handler, metadata, context);

            assertEquals(
                    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                    metadata.get(Metadata.CONTENT_TYPE));

View Full Code Here


    }

    // TIKA-1024
    @Test
    public void testNakedUTF16BOM() throws Exception {
       Parser parser = new AutoDetectParser(); // Should auto-detect!
       ContentHandler handler = new BodyContentHandler();
       Metadata metadata = new Metadata();

       InputStream stream = Mp3ParserTest.class.getResourceAsStream(
               "/test-documents/testNakedUTF16BOM.mp3");
      
       try {
           parser.parse(stream, handler, metadata, new ParseContext());
       } finally {
           stream.close();
       }
       assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
       assertEquals("", metadata.get(XMPDM.GENRE));
View Full Code Here

     * Test docx without headers
     * TIKA-633
     */
    @Test
    public void testNullHeaders() throws Exception {
        Parser parser = new AutoDetectParser();
        Metadata metadata = new Metadata();
        ContentHandler handler = new BodyContentHandler();
        ParseContext context = new ParseContext();

        InputStream input = getTestDocument("NullHeader.docx");
        try {
            parser.parse(input, handler, metadata, context);
            assertFalse(handler.toString().length()==0);
        } finally {
            input.close();
        }
    }
View Full Code Here

   public void setUp() throws Exception {
      tracker = new EmbeddedTrackingParser();
      trackingContext = new ParseContext();
      trackingContext.set(Parser.class, tracker);
     
      autoDetectParser = new AutoDetectParser();
      recursingContext = new ParseContext();
      recursingContext.set(Parser.class, autoDetectParser);
   }
View Full Code Here

        Metadata metadata = new Metadata();

        InputStream stream = OOXMLParserTest.class.getResourceAsStream(
                "/test-documents/testWORD_various.docx");
        try {
            new AutoDetectParser().parse(stream, handler, metadata, new ParseContext());
        } finally {
            stream.close();
        }

        String content = handler.toString();
View Full Code Here

        Metadata metadata = new Metadata();

        InputStream stream = OOXMLParserTest.class.getResourceAsStream(
                "/test-documents/testPPT_various.pptx");
        try {
            new AutoDetectParser().parse(stream, handler, metadata, new ParseContext());
        } finally {
            stream.close();
        }

        String content = handler.toString();
View Full Code Here

         mediatypes.clear();
      }
     
      public Set<MediaType> getSupportedTypes(ParseContext context) {
         // Cheat!
         return (new AutoDetectParser()).getSupportedTypes(context);
      }
View Full Code Here

        Metadata metadata = new Metadata();

        InputStream stream = OOXMLParserTest.class.getResourceAsStream(
                "/test-documents/testPPT_masterFooter.pptx");
        try {
            new AutoDetectParser().parse(stream, handler, metadata, new ParseContext());
        } finally {
            stream.close();
        }

        String content = handler.toString();
View Full Code Here

        Metadata metadata = new Metadata();

        InputStream stream = OOXMLParserTest.class.getResourceAsStream(
                "/test-documents/testWordArt.pptx");
        try {
            new AutoDetectParser().parse(stream, handler, metadata, new ParseContext());
        } finally {
            stream.close();
        }
        String content = handler.toString();
        assertContains("Here is some red word Art", content);
View Full Code Here

import org.xml.sax.ContentHandler;

public class ArParserTest extends AbstractPkgTest {
    @Test
  public void testArParsing() throws Exception {
    Parser parser = new AutoDetectParser();

    ContentHandler handler = new BodyContentHandler();
    Metadata metadata = new Metadata();

    InputStream stream = ArParserTest.class
        .getResourceAsStream("/test-documents/testARofText.ar");
    try {
      parser.parse(stream, handler, metadata, recursingContext);
    } finally {
      stream.close();
    }

    assertEquals("application/x-archive",
        metadata.get(Metadata.CONTENT_TYPE));
    String content = handler.toString();
    assertTrue(content.contains("testTXT.txt"));
    assertTrue(content.contains("Test d'indexation de Txt"));
    assertTrue(content.contains("http://www.apache.org"));

    stream = ArParserTest.class
        .getResourceAsStream("/test-documents/testARofSND.ar");
    try {
      parser.parse(stream, handler, metadata, recursingContext);
    } finally {
      stream.close();
    }

    assertEquals("application/x-archive",
View Full Code Here

TOP

Related Classes of org.apache.tika.parser.AutoDetectParser

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.