Package org.apache.tika.sax

Examples of org.apache.tika.sax.XHTMLContentHandler.startDocument()


        byte[] header = new byte[128];
        IOUtils.readFully(stream, header);
        String version = new String(header, 0, 6, "US-ASCII");

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        if (version.equals("AC1018")) {
            metadata.set(Metadata.CONTENT_TYPE, TYPE.toString());
            if(skipToPropertyInfoSection(stream, header)){
                get2004Props(stream,metadata,xhtml);
View Full Code Here


        metadata.set(Metadata.TITLE, feedTitle);
        metadata.set(Metadata.DESCRIPTION, feedDesc);
        // store the other fields in the metadata

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        List entries = feed.getEntries();
        for (Iterator i = entries.iterator(); i.hasNext();) {
            SyndEntry entry = (SyndEntry) i.next();
            String link = entry.getLink();
View Full Code Here

        metadata.set(
                Property.internalDate(DublinCore.MODIFIED),
                font.getHeader().getModified().getTime());

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
        xhtml.endDocument();
    }

    /**
     * @deprecated This method will be removed in Apache Tika 1.0.
View Full Code Here

            // caused by the document being corrupted or by the format
            // just being unsupported. So we do nothing.
        }

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
        xhtml.endDocument();
    }

    /**
     * @deprecated This method will be removed in Apache Tika 1.0.
View Full Code Here

        metadata.set(Metadata.CONTENT_TYPE, "video/x-flv");
        metadata.set("hasVideo", Boolean.toString((typeFlags & MASK_VIDEO) != 0));
        metadata.set("hasAudio", Boolean.toString((typeFlags & MASK_AUDIO) != 0));

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        // flv tag stream follows...
        while (true) {
            int type = datainput.read();
            if (type == -1) {
View Full Code Here

            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        metadata.set(Metadata.CONTENT_TYPE, "audio/midi");

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        // MidiSystem expects the stream to support the mark feature
        InputStream buffered = new BufferedInputStream(stream);
        try {
            Sequence sequence = MidiSystem.getSequence(buffered);
View Full Code Here

            Document sd = new CustomStyledDocument();
            new RTFEditorKit().read(stream, sd, 0);

            XHTMLContentHandler xhtml =
                new XHTMLContentHandler(handler, metadata);
            xhtml.startDocument();
            xhtml.element("p", sd.getText(0, sd.getLength()));
            xhtml.endDocument();
        } catch (BadLocationException e) {
            throw new TikaException("Error parsing an RTF document", e);
        }
View Full Code Here

    public void parse(
            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        POIFSFileSystem filesystem;
        if(stream instanceof TikaInputStream &&
          ((TikaInputStream)stream).getOpenContainer() != null) {
            filesystem = (POIFSFileSystem)((TikaInputStream)stream).getOpenContainer();
View Full Code Here

          contentHandler = new NumbersContentHandler(xhtml, metadata);
        } else {
          return;
        }

        xhtml.startDocument();
        context.getSAXParser().parse(
                new CloseShieldInputStream(stream),
                new OfflineContentHandler(contentHandler)
        );
        xhtml.endDocument();
View Full Code Here

            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        new ImageMetadataExtractor(metadata).parseJpeg(stream);

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
        xhtml.endDocument();
    }

}
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.