Package org.apache.tika.sax

Examples of org.apache.tika.sax.TeeContentHandler


      charset = "UTF-8";
    }
    EchoHandler data = new EchoHandler(charset);
    LinkExtractor extractor = new LinkExtractor(link, elements);
   
    TeeContentHandler parallelHandler = new TeeContentHandler(data, extractor);

    InputStream instream = entity.obtainContent();
    try {
      parser.parse(instream, parallelHandler, metadata);
     
View Full Code Here


                        }
                    }
                }
            };
            new HtmlParser().parse(
                    stream, new TeeContentHandler(body, link),
                    metadata, new ParseContext());
        } finally {
            stream.close();
        }
View Full Code Here

    public void parse(
            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        ContentHandler body = new BodyContentHandler();
        ContentHandler tee = new TeeContentHandler(handler, body);
        super.parse(stream, tee, metadata, context);

        String content = body.toString();
        metadata.set("fulltext", content);
View Full Code Here

            StringWriter htmlBuffer = new StringWriter();
            StringWriter textBuffer = new StringWriter();
            StringWriter xmlBuffer = new StringWriter();
            StringBuilder metadataBuffer = new StringBuilder();

            ContentHandler handler = new TeeContentHandler(
                    getHtmlHandler(htmlBuffer),
                    getTextContentHandler(textBuffer),
                    getXmlContentHandler(xmlBuffer));
            Metadata md = new Metadata();
View Full Code Here

        Matcher matcher = new CompositeMatcher(
                META_XPATH.parse("//meta:" + element),
                META_XPATH.parse("//meta:" + element + "//text()"));
        ContentHandler branch =
            new MatchingContentHandler(new MetadataHandler(md, name), matcher);
        return new TeeContentHandler(ch, branch);
    }
View Full Code Here

            ContentHandler ch, Metadata md, String name, String attribute) {
        Matcher matcher =
            META_XPATH.parse("//meta:document-statistic/@meta:" + attribute);
        ContentHandler branch =
            new MatchingContentHandler(new MetadataHandler(md, name), matcher);
        return new TeeContentHandler(ch, branch);
    }
View Full Code Here

                        }
                    }
                }
            };
            new HtmlParser().parse(
                    stream, new TeeContentHandler(body, link),
                    metadata, new ParseContext());
        } finally {
            stream.close();
        }
View Full Code Here

        Matcher matcher = new CompositeMatcher(
                META_XPATH.parse("//meta:" + element),
                META_XPATH.parse("//meta:" + element + "//text()"));
        ContentHandler branch =
            new MatchingContentHandler(new MetadataHandler(md, name), matcher);
        return new TeeContentHandler(ch, branch);
    }
View Full Code Here

                META_XPATH.parse("//meta:user-defined/@meta:name"),
                META_XPATH.parse("//meta:user-defined//text()"));
        ContentHandler branch = new MatchingContentHandler(
              new AttributeDependantMetadataHandler(md, "meta:name", USER_DEFINED_METADATA_NAME_PREFIX),
              matcher);
        return new TeeContentHandler(ch, branch);
    }
View Full Code Here

            ContentHandler ch, Metadata md, String name, String attribute) {
        Matcher matcher =
            META_XPATH.parse("//meta:document-statistic/@meta:" + attribute);
        ContentHandler branch =
            new MatchingContentHandler(new MetadataHandler(md, name), matcher);
        return new TeeContentHandler(ch, branch);
    }
View Full Code Here

TOP

Related Classes of org.apache.tika.sax.TeeContentHandler

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.