Package org.apache.tika.sax

Examples of org.apache.tika.sax.TeeContentHandler


            StringWriter textBuffer = new StringWriter();
            StringWriter textMainBuffer = new StringWriter();
            StringWriter xmlBuffer = new StringWriter();
            StringBuilder metadataBuffer = new StringBuilder();

            ContentHandler handler = new TeeContentHandler(
                    getHtmlHandler(htmlBuffer),
                    getTextContentHandler(textBuffer),
                    getTextMainContentHandler(textMainBuffer),
                    getXmlContentHandler(xmlBuffer));
           
View Full Code Here


        Matcher matcher = new CompositeMatcher(
                DC_XPATH.parse("//dc:" + element),
                DC_XPATH.parse("//dc:" + element + "//text()"));
        ContentHandler branch =
            new MatchingContentHandler(new MetadataHandler(md, name), matcher);
        return new TeeContentHandler(ch, branch);
    }
View Full Code Here

        Matcher matcher = new CompositeMatcher(
                META_XPATH.parse("//meta:" + element),
                META_XPATH.parse("//meta:" + element + "//text()"));
        ContentHandler branch =
            new MatchingContentHandler(new MetadataHandler(md, property), matcher);
        return new TeeContentHandler(ch, branch);
    }
View Full Code Here

                META_XPATH.parse("//meta:user-defined//text()"));
        // eg <meta:user-defined meta:name="Info1">Text1</meta:user-defined> becomes custom:Info1=Text1
        ContentHandler branch = new MatchingContentHandler(
              new AttributeDependantMetadataHandler(md, "meta:name", Metadata.USER_DEFINED_METADATA_NAME_PREFIX),
              matcher);
        return new TeeContentHandler(ch, branch);
    }
View Full Code Here

            ContentHandler ch, Metadata md, String name, String attribute) {
        Matcher matcher =
            META_XPATH.parse("//meta:document-statistic/@meta:"+attribute);
        ContentHandler branch = new MatchingContentHandler(
              new AttributeMetadataHandler(META_NS, attribute, md, name), matcher);
        return new TeeContentHandler(ch, branch);
    }
View Full Code Here

          ContentHandler ch, Metadata md, Property property, String attribute) {
      Matcher matcher =
          META_XPATH.parse("//meta:document-statistic/@meta:"+attribute);
      ContentHandler branch = new MatchingContentHandler(
            new AttributeMetadataHandler(META_NS, attribute, md, property), matcher);
      return new TeeContentHandler(ch, branch);
  }
View Full Code Here

  }

    protected ContentHandler getContentHandler(ContentHandler ch, Metadata md, ParseContext context) {
        // We can no longer extend DcXMLParser due to the handling of dc:subject and dc:date
        // Process the Dublin Core Attributes
        ch = new TeeContentHandler(super.getContentHandler(ch, md, context),
                getDublinCoreHandler(md, TikaCoreProperties.TITLE, "title"),
                getDublinCoreHandler(md, TikaCoreProperties.CREATOR, "creator"),
                getDublinCoreHandler(md, TikaCoreProperties.DESCRIPTION, "description"),
                getDublinCoreHandler(md, TikaCoreProperties.PUBLISHER, "publisher"),
                getDublinCoreHandler(md, TikaCoreProperties.CONTRIBUTOR, "contributor"),
                getDublinCoreHandler(md, TikaCoreProperties.TYPE, "type"),
                getDublinCoreHandler(md, TikaCoreProperties.FORMAT, "format"),
                getDublinCoreHandler(md, TikaCoreProperties.IDENTIFIER, "identifier"),
                getDublinCoreHandler(md, TikaCoreProperties.LANGUAGE, "language"),
                getDublinCoreHandler(md, TikaCoreProperties.RIGHTS, "rights"));
       
        // Process the OO Meta Attributes
        ch = getMeta(ch, md, TikaCoreProperties.CREATED, "creation-date");
        // ODF uses dc:date for modified
        ch = new TeeContentHandler(ch, new ElementMetadataHandler(
                DublinCore.NAMESPACE_URI_DC, "date",
                md, TikaCoreProperties.MODIFIED));
       
        // ODF uses dc:subject for description
        ch = new TeeContentHandler(ch, new ElementMetadataHandler(
                DublinCore.NAMESPACE_URI_DC, "subject",
                md, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT));
        ch = getMeta(ch, md, TikaCoreProperties.TRANSITION_KEYWORDS_TO_DC_SUBJECT, "keyword");
       
        ch = getMeta(ch, md, Property.externalText(MSOffice.EDIT_TIME), "editing-duration");       
View Full Code Here

                        }
                    }
                }
            };
            new HtmlParser().parse(
                    stream, new TeeContentHandler(body, link),
                    metadata, new ParseContext());
        } finally {
            stream.close();
        }
View Full Code Here

                metadata, property);
    }

    protected ContentHandler getContentHandler(
            ContentHandler handler, Metadata metadata, ParseContext context) {
        return new TeeContentHandler(
                super.getContentHandler(handler, metadata, context),
                getDublinCoreHandler(metadata, TikaCoreProperties.TITLE, "title"),
                getDublinCoreHandler(metadata, TikaCoreProperties.KEYWORDS, "subject"),
                getDublinCoreHandler(metadata, TikaCoreProperties.CREATOR, "creator"),
                getDublinCoreHandler(metadata, TikaCoreProperties.DESCRIPTION, "description"),
View Full Code Here

        String text = null;

        LinkContentHandler linkHandler = new LinkContentHandler();
        ContentHandler textHandler = new BodyContentHandler();
        TeeContentHandler teeHandler = new TeeContentHandler(linkHandler,
                textHandler);
        ParseContext parseContext = new ParseContext();
        // parse
        try {
            tika.getParser().parse(bais, teeHandler, md, parseContext);
View Full Code Here

TOP

Related Classes of org.apache.tika.sax.TeeContentHandler

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.