Package org.apache.tika.sax

Examples of org.apache.tika.sax.ToHTMLContentHandler


  @Test
  public void testParse() throws Exception {
    OutlookPSTParser pstParser = new OutlookPSTParser();
    Metadata metadata = new Metadata();
    ContentHandler handler = new ToHTMLContentHandler();

    ParseContext context = new ParseContext();
    EmbeddedTrackingExtrator trackingExtrator = new EmbeddedTrackingExtrator(context);
    context.set(EmbeddedDocumentExtractor.class, trackingExtrator);
    context.set(Parser.class, new AutoDetectParser());

    pstParser.parse(getResourceAsStream("/test-documents/testPST.pst"), handler, metadata, context);

    String output = handler.toString();

    assertFalse(output.isEmpty());
    assertTrue(output.contains("<meta name=\"Content-Length\" content=\"271360\">"));
    assertTrue(output.contains("<meta name=\"Content-Type\" content=\"application/vnd.ms-outlook-pst\">"));
View Full Code Here

TOP

Related Classes of org.apache.tika.sax.ToHTMLContentHandler

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.