Package org.apache.tika.extractor

Examples of org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor


  public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
      throws IOException, TikaException, SAXException {

    EmbeddedDocumentExtractor extractor = context.get(EmbeddedDocumentExtractor.class,
        new ParsingEmbeddedDocumentExtractor(context));

    String charsetName = "windows-1252";

    metadata.set(Metadata.CONTENT_TYPE, MBOX_MIME_TYPE);
    metadata.set(Metadata.CONTENT_ENCODING, charsetName);
View Full Code Here


  public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
      throws IOException, SAXException, TikaException {

    // Use the delegate parser to parse the contained document
    EmbeddedDocumentExtractor embeddedExtractor = context.get(EmbeddedDocumentExtractor.class,
        new ParsingEmbeddedDocumentExtractor(context));

    metadata.set(Metadata.CONTENT_TYPE, MS_OUTLOOK_PST_MIMETYPE.toString());

    XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
    xhtml.startDocument();
View Full Code Here

        }

        // Use the delegate parser to parse the contained document
        EmbeddedDocumentExtractor extractor = context.get(
                EmbeddedDocumentExtractor.class,
                new ParsingEmbeddedDocumentExtractor(context));

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        try {
View Full Code Here

            }

            // Use the delegate parser to parse the compressed document
            EmbeddedDocumentExtractor extractor = context.get(
                    EmbeddedDocumentExtractor.class,
                    new ParsingEmbeddedDocumentExtractor(context));
            if (extractor.shouldParseEmbedded(entrydata)) {
                extractor.parseEmbedded(cis, xhtml, entrydata, true);
            }
        } finally {
            cis.close();
View Full Code Here

     */
    protected void handleCompletedObject() throws IOException, SAXException, TikaException {
       EmbeddedDocumentExtractor embeddedExtractor = context.get(EmbeddedDocumentExtractor.class);
      
       if (embeddedExtractor == null) {
           embeddedExtractor = new ParsingEmbeddedDocumentExtractor(context);
       }
      
       byte[] bytes = os.toByteArray();
       if (state == EMB_STATE.OBJDATA) {
           RTFObjDataParser objParser = new RTFObjDataParser();
View Full Code Here

    protected EmbeddedDocumentExtractor getEmbeddedDocumentExtractor() {
        EmbeddedDocumentExtractor extractor =
                context.get(EmbeddedDocumentExtractor.class);
        if (extractor == null) {
            extractor = new ParsingEmbeddedDocumentExtractor(context);
        }
        return extractor;
    }
View Full Code Here

    protected AbstractPOIFSExtractor(ParseContext context) {
        EmbeddedDocumentExtractor ex = context.get(EmbeddedDocumentExtractor.class);

        if (ex==null) {
            this.extractor = new ParsingEmbeddedDocumentExtractor(context);
        } else {
            this.extractor = ex;
        }
       
        tikaConfig = context.get(TikaConfig.class);
View Full Code Here

TOP

Related Classes of org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.