Package org.apache.tika.sax

Examples of org.apache.tika.sax.XHTMLContentHandler.startDocument()


    protected void parseArchive(
            ArchiveInputStream archive, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException {
        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        ArchiveEntry entry = archive.getNextEntry();
        while (entry != null) {
            if (!entry.isDirectory()) {
                xhtml.startElement("div", "class", "package-entry");
View Full Code Here


    public void parse(
            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        // AudioSystem expects the stream to support the mark feature
        InputStream buffered = new BufferedInputStream(stream);
        try {
            AudioFormat format =
View Full Code Here

        metadata.set(Metadata.CONTENT_TYPE, MBOX_MIME_TYPE);
        metadata.set(Metadata.CONTENT_ENCODING, "us-ascii");

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        ParseStates parseState = ParseStates.START;
        String multiLine = null;
        boolean inQuote = false;
        int numEmails = 0;
View Full Code Here

    public void parse(
            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        // MidiSystem expects the stream to support the mark feature
        InputStream buffered = new BufferedInputStream(stream);
        try {
            Sequence sequence = MidiSystem.getSequence(buffered);
View Full Code Here

            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        metadata.set(Metadata.CONTENT_TYPE, "application/x-bzip");

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        // At the end we want to close the bzip2 stream to release any associated
        // resources, but the underlying document stream should not be closed
        InputStream bzip2 =
            new BZip2CompressorInputStream(new CloseShieldInputStream(stream));
View Full Code Here

    public void parse(
            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        POIFSFileSystem filesystem = new POIFSFileSystem(stream);

        // Parse summary entries first, to make metadata available early
        parseSummaryEntryIfExists(
View Full Code Here

            DefaultStyledDocument sd = new DefaultStyledDocument();
            new RTFEditorKit().read(stream, sd, 0);

            XHTMLContentHandler xhtml =
                new XHTMLContentHandler(handler, metadata);
            xhtml.startDocument();
            xhtml.element("p", sd.getText(0, sd.getLength()));
            xhtml.endDocument();
        } catch (BadLocationException e) {
            throw new TikaException("Error parsing an RTF document", e);
        }
View Full Code Here

            metadata.set(Metadata.CONTENT_TYPE, "application/xml");
        }

        final XHTMLContentHandler xhtml =
            new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
        xhtml.startElement("p");

        getSAXParser(context).parse(
                new CloseShieldInputStream(stream),
                new OfflineContentHandler(
View Full Code Here

            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        metadata.set(Metadata.CONTENT_TYPE, "audio/mpeg");

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        byte[] tag = getSuffix(stream, 128);
        if (tag.length == 128
                && tag[0] == 'T' && tag[1] == 'A' && tag[2] == 'G') {
            String title = getString(tag, 3, 33);
View Full Code Here

            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        metadata.set(Metadata.CONTENT_TYPE, "audio/mpeg");

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        // Create handlers for the various kinds of ID3 tags
        ID3TagsAndAudio audioAndTags = getAllTagHandlers(stream, handler);

        if (audioAndTags.tags.length > 0) {
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.