Package org.apache.tika.sax.xpath

Examples of org.apache.tika.sax.xpath.Matcher


        return new TeeContentHandler(ch, branch);
    }

    private static ContentHandler getStatistic(
            ContentHandler ch, Metadata md, String name, String attribute) {
        Matcher matcher =
            META_XPATH.parse("//meta:document-statistic/@meta:"+attribute);
        ContentHandler branch = new MatchingContentHandler(
              new AttributeMetadataHandler(META_NS, attribute, md, name), matcher);
        return new TeeContentHandler(ch, branch);
    }
View Full Code Here


    private static final String META_NS = "urn:oasis:names:tc:opendocument:xmlns:meta:1.0";
    private static final XPathParser META_XPATH = new XPathParser("meta", META_NS);

    private static ContentHandler getMeta(
            ContentHandler ch, Metadata md, String name, String element) {
        Matcher matcher = new CompositeMatcher(
                META_XPATH.parse("//meta:" + element),
                META_XPATH.parse("//meta:" + element + "//text()"));
        ContentHandler branch =
            new MatchingContentHandler(new MetadataHandler(md, name), matcher);
        return new TeeContentHandler(ch, branch);
View Full Code Here

        return new TeeContentHandler(ch, branch);
    }

    private static ContentHandler getUserDefined(
            ContentHandler ch, Metadata md) {
        Matcher matcher = new CompositeMatcher(
                META_XPATH.parse("//meta:user-defined/@meta:name"),
                META_XPATH.parse("//meta:user-defined//text()"));
        // eg <meta:user-defined meta:name="Info1">Text1</meta:user-defined> becomes custom:Info1=Text1
        ContentHandler branch = new MatchingContentHandler(
              new AttributeDependantMetadataHandler(md, "meta:name", Metadata.USER_DEFINED_METADATA_NAME_PREFIX),
View Full Code Here

        return new TeeContentHandler(ch, branch);
    }

    private static ContentHandler getStatistic(
            ContentHandler ch, Metadata md, String name, String attribute) {
        Matcher matcher =
            META_XPATH.parse("//meta:document-statistic/@meta:"+attribute);
        ContentHandler branch = new MatchingContentHandler(
              new AttributeMetadataHandler(META_NS, attribute, md, name), matcher);
        return new TeeContentHandler(ch, branch);
    }
View Full Code Here

    private static final XPathParser DC_XPATH = new XPathParser(
            "dc", "http://purl.org/dc/elements/1.1/");

    private static ContentHandler getDublinCore(
            ContentHandler ch, Metadata md, String name, String element) {
        Matcher matcher = new CompositeMatcher(
                DC_XPATH.parse("//dc:" + element),
                DC_XPATH.parse("//dc:" + element + "//text()"));
        ContentHandler branch =
            new MatchingContentHandler(new MetadataHandler(md, name), matcher);
        return new TeeContentHandler(ch, branch);
View Full Code Here

     *  as HTML as a string, excluding the rest
     */
    public String parseOnePartToHTML() throws IOException, SAXException, TikaException {
        // Only get things under html -> body -> div (class=header)
        XPathParser xhtmlParser = new XPathParser("xhtml", XHTMLContentHandler.XHTML);
        Matcher divContentMatcher = xhtmlParser.parse(
                "/xhtml:html/xhtml:body/xhtml:div/descendant::node()");       
        ContentHandler handler = new MatchingContentHandler(
                new ToXMLContentHandler(), divContentMatcher);
       
        InputStream stream = ContentHandlerExample.class.getResourceAsStream("test2.doc");
View Full Code Here

TOP

Related Classes of org.apache.tika.sax.xpath.Matcher

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.