Package org.apache.tika.sax

Examples of org.apache.tika.sax.XHTMLContentHandler.startDocument()


        metadata.set(
                TikaCoreProperties.MODIFIED,
                font.getHeader().getModified().getTime());

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
        xhtml.endDocument();
    }

}
View Full Code Here


        metadata.set(Metadata.CONTENT_TYPE, MBOX_MIME_TYPE);
        metadata.set(Metadata.CONTENT_ENCODING, "us-ascii");

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        ParseStates parseState = ParseStates.START;
        String multiLine = null;
        boolean inQuote = false;
        int numEmails = 0;
View Full Code Here

            metadata.set(Metadata.CONTENT_TYPE, "application/xml");
        }

        final XHTMLContentHandler xhtml =
            new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
        xhtml.startElement("p");

        TaggedContentHandler tagged = new TaggedContentHandler(handler);
        try {
            context.getSAXParser().parse(
View Full Code Here

            metadata.set(TikaCoreProperties.DESCRIPTION, description);
            // store the other fields in the metadata

            XHTMLContentHandler xhtml =
                new XHTMLContentHandler(handler, metadata);
            xhtml.startDocument();

            xhtml.element("h1", title);
            xhtml.element("p", description);

            xhtml.startElement("ul");
View Full Code Here

            throws IOException, SAXException, TikaException {
        metadata.set(Metadata.CONTENT_TYPE, "audio/mpeg");
        metadata.set(XMPDM.AUDIO_COMPRESSOR, "MP3");

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        // Create handlers for the various kinds of ID3 tags
        ID3TagsAndAudio audioAndTags = getAllTagHandlers(stream, handler);

        if (audioAndTags.tags.length > 0) {
View Full Code Here

        EmbeddedDocumentExtractor extractor = context.get(
                EmbeddedDocumentExtractor.class,
                new ParsingEmbeddedDocumentExtractor(context));

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        try {
            ArchiveEntry entry = ais.getNextEntry();
            while (entry != null) {
                if (!entry.isDirectory()) {
View Full Code Here

               default:
                  throw new TikaException("Unhandled iWorks file " + type);
               }

               metadata.add(Metadata.CONTENT_TYPE, type.getType().toString());
               xhtml.startDocument();
               if (contentHandler != null) {
                  context.getSAXParser().parse(
                          new CloseShieldInputStream(entryStream),
                          new OfflineContentHandler(contentHandler)
                  );
View Full Code Here

        if (!type.equals(MediaType.OCTET_STREAM)) {
            metadata.set(CONTENT_TYPE, type.toString());
        }

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        try {
            Metadata entrydata = new Metadata();
            String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
            if (name != null) {
View Full Code Here

        HashMap<String,String> properties = this.loadProperties(stream);
        this.setMetadata(metadata, properties);

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
        // TODO: put body content here
        xhtml.startElement("p");
        String body = clean(properties.get("body"));
        if (body != null)
           xhtml.characters(body);
View Full Code Here

           return;
        }

       
        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
       
       
        // Pull out some information from the header box
        MovieHeaderBox mHeader = getOrNull(moov, MovieHeaderBox.class);
        if (mHeader != null) {
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.