Package org.apache.tika.sax

Examples of org.apache.tika.sax.BodyContentHandler


        String test =
            "<html><head><title>\u017d</title></head><body></body></html>";
        Metadata metadata = new Metadata();
        new HtmlParser().parse (
                new ByteArrayInputStream(test.getBytes("UTF-8")),
                new BodyContentHandler(),  metadata, new ParseContext());
        assertEquals("\u017d", metadata.get(TikaCoreProperties.TITLE));
    }
View Full Code Here


     *  details from id3v2
     */
    @Test
    public void testMp3ParsingID3v1v2() throws Exception {
        Parser parser = new AutoDetectParser(); // Should auto-detect!
        ContentHandler handler = new BodyContentHandler();
        Metadata metadata = new Metadata();

        InputStream stream = Mp3ParserTest.class.getResourceAsStream(
                "/test-documents/testMP3id3v1_v2.mp3");
        try {
            parser.parse(stream, handler, metadata, new ParseContext());
        } finally {
            stream.close();
        }

        assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
        assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
        assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
        assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));

        String content = handler.toString();
        assertTrue(content.contains("Test Title"));
        assertTrue(content.contains("Test Artist"));
        assertTrue(content.contains("Test Album"));
        assertTrue(content.contains("2008"));
        assertTrue(content.contains("Test Comment"));
View Full Code Here

     *  set of information out.
     */
    @Test
    public void testMp3ParsingID3v24() throws Exception {
        Parser parser = new AutoDetectParser(); // Should auto-detect!
        ContentHandler handler = new BodyContentHandler();
        Metadata metadata = new Metadata();

        InputStream stream = Mp3ParserTest.class.getResourceAsStream(
                "/test-documents/testMP3id3v24.mp3");
        try {
            parser.parse(stream, handler, metadata, new ParseContext());
        } finally {
            stream.close();
        }

        assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
        assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
        assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
        assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));

        String content = handler.toString();
        assertTrue(content.contains("Test Title"));
        assertTrue(content.contains("Test Artist"));
        assertTrue(content.contains("Test Album"));
        assertTrue(content.contains("2008"));
        assertTrue(content.contains("Test Comment"));
View Full Code Here

     *  range is correctly handled
     */
    @Test
    public void testMp3ParsingID3i18n() throws Exception {
       Parser parser = new AutoDetectParser(); // Should auto-detect!
       ContentHandler handler = new BodyContentHandler();
       Metadata metadata = new Metadata();

       InputStream stream = Mp3ParserTest.class.getResourceAsStream(
               "/test-documents/testMP3i18n.mp3");
       try {
View Full Code Here

     *  ID3v2 tags gets both extracted correctly
     */
    @Test
    public void testMp3ParsingLyrics() throws Exception {
        Parser parser = new AutoDetectParser(); // Should auto-detect!
        ContentHandler handler = new BodyContentHandler();
        Metadata metadata = new Metadata();

        // Note - our test file has a lyrics tag, but lacks any
        //  lyrics in the tags, so we can't test that bit
        // TODO Find a better sample file
       
        InputStream stream = Mp3ParserTest.class.getResourceAsStream(
                "/test-documents/testMP3lyrics.mp3");
        try {
            parser.parse(stream, handler, metadata, new ParseContext());
        } finally {
            stream.close();
        }

        assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
        assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
        assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
        assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));

        String content = handler.toString();
        assertTrue(content.contains("Test Title"));
        assertTrue(content.contains("Test Artist"));
        assertTrue(content.contains("Test Album"));
        assertTrue(content.contains("2008"));
        assertTrue(content.contains("Test Comment"));
View Full Code Here

public class ArParserTest extends AbstractPkgTest {
    @Test
  public void testArParsing() throws Exception {
    Parser parser = new AutoDetectParser();

    ContentHandler handler = new BodyContentHandler();
    Metadata metadata = new Metadata();

    InputStream stream = ArParserTest.class
        .getResourceAsStream("/test-documents/testARofText.ar");
    try {
      parser.parse(stream, handler, metadata, recursingContext);
    } finally {
      stream.close();
    }

    assertEquals("application/x-archive",
        metadata.get(Metadata.CONTENT_TYPE));
    String content = handler.toString();
    assertTrue(content.contains("testTXT.txt"));
    assertTrue(content.contains("Test d'indexation de Txt"));
    assertTrue(content.contains("http://www.apache.org"));

    stream = ArParserTest.class
        .getResourceAsStream("/test-documents/testARofSND.ar");
    try {
      parser.parse(stream, handler, metadata, recursingContext);
    } finally {
      stream.close();
    }

    assertEquals("application/x-archive",
        metadata.get(Metadata.CONTENT_TYPE));
    content = handler.toString();
    assertTrue(content.contains("testAU.au"));
  }
View Full Code Here

     *  tags.
     */
    @Test
    public void testTIKA424() throws Exception {
       Parser parser = new AutoDetectParser(); // Should auto-detect!
       ContentHandler handler = new BodyContentHandler();
       Metadata metadata = new Metadata();

       InputStream stream = Mp3ParserTest.class.getResourceAsStream(
               "/test-documents/test2.mp3");
       if(stream == null) {
          // You haven't downloaded the file
          // Skip the test
          return;
       }
      
       try {
           parser.parse(stream, handler, metadata, new ParseContext());
       } finally {
           stream.close();
       }

       assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
       assertEquals("Plus loin vers l'ouest", metadata.get(TikaCoreProperties.TITLE));
       assertEquals("Merzhin", metadata.get(TikaCoreProperties.CREATOR));
       assertEquals("Merzhin", metadata.get(Metadata.AUTHOR));

       String content = handler.toString();
       assertTrue(content.contains("Plus loin vers l'ouest"));
      
       assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
       assertEquals("44100", metadata.get("samplerate"));
       assertEquals("2", metadata.get("channels"));
View Full Code Here

   * embedded entries.
   */
    @Test
  public void testEmbedded() throws Exception {
    Parser parser = new AutoDetectParser(); // Should auto-detect!
    ContentHandler handler = new BodyContentHandler();
    Metadata metadata = new Metadata();

    InputStream stream = ArParserTest.class
        .getResourceAsStream("/test-documents/testARofText.ar");
    try {
View Full Code Here

     *  is trunacted before the end of the JPEG bit of the ID3 frame.
     */
    @Test
    public void testTIKA474() throws Exception {
       Parser parser = new AutoDetectParser(); // Should auto-detect!
       ContentHandler handler = new BodyContentHandler();
       Metadata metadata = new Metadata();

       InputStream stream = Mp3ParserTest.class.getResourceAsStream(
               "/test-documents/testMP3truncated.mp3");
      
      
       try {
           parser.parse(stream, handler, metadata, new ParseContext());
       } finally {
           stream.close();
       }

       // Check we could get the headers from the start
       assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
       assertEquals("Girl you have no faith in medicine", metadata.get(TikaCoreProperties.TITLE));
       assertEquals("The White Stripes", metadata.get(TikaCoreProperties.CREATOR));
       assertEquals("The White Stripes", metadata.get(Metadata.AUTHOR));

       String content = handler.toString();
       assertTrue(content.contains("Girl you have no faith in medicine"));
       assertTrue(content.contains("The White Stripes"));
       assertTrue(content.contains("Elephant"));
       assertTrue(content.contains("2003"));
      
View Full Code Here

    // TIKA-1024
    @Test
    public void testNakedUTF16BOM() throws Exception {
       Parser parser = new AutoDetectParser(); // Should auto-detect!
       ContentHandler handler = new BodyContentHandler();
       Metadata metadata = new Metadata();

       InputStream stream = Mp3ParserTest.class.getResourceAsStream(
               "/test-documents/testNakedUTF16BOM.mp3");
      
View Full Code Here

TOP

Related Classes of org.apache.tika.sax.BodyContentHandler

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.