Package org.apache.tika.sax

Examples of org.apache.tika.sax.TeeContentHandler


                metadata, name);
    }

    protected ContentHandler getContentHandler(
            ContentHandler handler, Metadata metadata, ParseContext context) {
        return new TeeContentHandler(
                super.getContentHandler(handler, metadata, context),
                getDublinCoreHandler(metadata, DublinCore.TITLE, "title"),
                getDublinCoreHandler(metadata, DublinCore.SUBJECT, "subject"),
                getDublinCoreHandler(metadata, DublinCore.CREATOR, "creator"),
                getDublinCoreHandler(metadata, DublinCore.DESCRIPTION, "description"),
View Full Code Here


        StringWriter textBuffer = new StringWriter();
        StringWriter textMainBuffer = new StringWriter();
        StringWriter xmlBuffer = new StringWriter();
        StringBuilder metadataBuffer = new StringBuilder();

        ContentHandler handler = new TeeContentHandler(
                getHtmlHandler(htmlBuffer),
                getTextContentHandler(textBuffer),
                getTextMainContentHandler(textMainBuffer),
                getXmlContentHandler(xmlBuffer));
View Full Code Here

        Throwable t;

        boolean alive = false;
        ForkClient client = acquireClient();
        try {
            ContentHandler tee = new TeeContentHandler(
                    handler, new MetadataContentHandler(metadata));
            t = client.call("parse", stream, tee, metadata, context);
            alive = true;
        } catch (TikaException te) {
            // Problem occurred on our side
View Full Code Here

        Matcher matcher = new CompositeMatcher(
                META_XPATH.parse("//meta:" + element),
                META_XPATH.parse("//meta:" + element + "//text()"));
        ContentHandler branch =
            new MatchingContentHandler(new MetadataHandler(md, name), matcher);
        return new TeeContentHandler(ch, branch);
    }
View Full Code Here

                META_XPATH.parse("//meta:user-defined//text()"));
        // eg <meta:user-defined meta:name="Info1">Text1</meta:user-defined> becomes custom:Info1=Text1
        ContentHandler branch = new MatchingContentHandler(
              new AttributeDependantMetadataHandler(md, "meta:name", Metadata.USER_DEFINED_METADATA_NAME_PREFIX),
              matcher);
        return new TeeContentHandler(ch, branch);
    }
View Full Code Here

            ContentHandler ch, Metadata md, String name, String attribute) {
        Matcher matcher =
            META_XPATH.parse("//meta:document-statistic/@meta:"+attribute);
        ContentHandler branch = new MatchingContentHandler(
              new AttributeMetadataHandler(META_NS, attribute, md, name), matcher);
        return new TeeContentHandler(ch, branch);
    }
View Full Code Here

                        }
                    }
                }
            };
            new HtmlParser().parse(
                    stream, new TeeContentHandler(body, link),
                    metadata, new ParseContext());
        } finally {
            stream.close();
        }
View Full Code Here

                    (Property) tikaMetadata);
        }
       
        protected ContentHandler getContentHandler(
                ContentHandler handler, Metadata metadata, ParseContext context) {
            return new TeeContentHandler(
                    super.getContentHandler(handler, metadata, context),
                    getCustomElementHandler(metadata, FIRST_NAME, "FirstName"),
                    getCustomElementHandler(metadata, LAST_NAME, "LastName"));
        }
View Full Code Here

            thread.start();

            TaggedContentHandler tagged = new TaggedContentHandler(handler);
            try {
                context.getSAXParser().parse(
                        stream, new TeeContentHandler(
                                tagged, new MetaHandler(metadata)));
            } catch (SAXException e) {
                tagged.throwIfCauseOf(e);
                throw new TikaException(
                        "Invalid network parser output", e);
View Full Code Here

                        }
                    }
                }
            };
            new HtmlParser().parse(
                    stream, new TeeContentHandler(body, link),
                    metadata, new ParseContext());
        } finally {
            stream.close();
        }
View Full Code Here

TOP

Related Classes of org.apache.tika.sax.TeeContentHandler

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.