Package org.apache.tika.sax

Examples of org.apache.tika.sax.EmbeddedContentHandler


        // Call the recursing handler
        if (embeddedExtractor.shouldParseEmbedded(metadata)) {
            embeddedExtractor.parseEmbedded(
                    TikaInputStream.get(part.getInputStream()),
                    new EmbeddedContentHandler(handler),
                    metadata, false);
        }
    }
View Full Code Here


        // Use the delegate parser to parse this entry
        TemporaryResources tmp = new TemporaryResources();
        try {
            DELEGATING_PARSER.parse(
                    TikaInputStream.get(new CloseShieldInputStream(stream), tmp),
                    new EmbeddedContentHandler(new BodyContentHandler(handler)),
                    metadata, context);
        } catch (TikaException e) {
            // Could not parse the entry, just skip the content
        } finally {
            tmp.close();
View Full Code Here

               // TIKA-704: OLE 2.0 embedded non-Office document?
               stream = TikaInputStream.get(
                     fs.createDocumentInputStream("CONTENTS"));
               if (embeddedExtractor.shouldParseEmbedded(metadata)) {
                  embeddedExtractor.parseEmbedded(
                        stream, new EmbeddedContentHandler(handler),
                        metadata, false);
               }
            } else if (POIFSDocumentType.OLE10_NATIVE == type) {
                // TIKA-704: OLE 1.0 embedded document
                Ole10Native ole =
                        Ole10Native.createFromEmbeddedOleObject(fs);
                metadata.set(Metadata.RESOURCE_NAME_KEY, ole.getLabel());
                byte[] data = ole.getDataBuffer();
                if (data != null) {
                    stream = TikaInputStream.get(data);
                }

                if (stream != null
                        && embeddedExtractor.shouldParseEmbedded(metadata)) {
                    embeddedExtractor.parseEmbedded(
                            stream, new EmbeddedContentHandler(handler),
                            metadata, false);
                }
            } else {
                handleEmbeddedFile(part, handler);
            }
View Full Code Here

        // Call the recursing handler
        if (embeddedExtractor.shouldParseEmbedded(metadata)) {
            embeddedExtractor.parseEmbedded(
                    TikaInputStream.get(part.getInputStream()),
                    new EmbeddedContentHandler(handler),
                    metadata, false);
        }
    }
View Full Code Here

                thumbnailMetadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, thumbName);
                thumbnailMetadata.set(Metadata.CONTENT_TYPE, tPart.getContentType());
                thumbnailMetadata.set(TikaCoreProperties.TITLE, tPart.getPartName().getName());
               
                if (embeddedExtractor.shouldParseEmbedded(thumbnailMetadata)) {
                    embeddedExtractor.parseEmbedded(TikaInputStream.get(tStream), new EmbeddedContentHandler(handler), thumbnailMetadata, true);
                }
               
                tStream.close();
                thumbIndex ++;
            }
View Full Code Here

               // TIKA-704: OLE 2.0 embedded non-Office document?
               stream = TikaInputStream.get(
                     fs.createDocumentInputStream("CONTENTS"));
               if (embeddedExtractor.shouldParseEmbedded(metadata)) {
                  embeddedExtractor.parseEmbedded(
                        stream, new EmbeddedContentHandler(handler),
                        metadata, false);
               }
            } else if (POIFSDocumentType.OLE10_NATIVE == type) {
                // TIKA-704: OLE 1.0 embedded document
                Ole10Native ole =
                        Ole10Native.createFromEmbeddedOleObject(fs);
                if (ole.getLabel() != null) {
                    metadata.set(Metadata.RESOURCE_NAME_KEY, ole.getLabel());
                }
                byte[] data = ole.getDataBuffer();
                if (data != null) {
                    stream = TikaInputStream.get(data);
                }

                if (stream != null
                        && embeddedExtractor.shouldParseEmbedded(metadata)) {
                    embeddedExtractor.parseEmbedded(
                            stream, new EmbeddedContentHandler(handler),
                            metadata, false);
                }
            } else {
                handleEmbeddedFile(part, handler, rel);
            }
View Full Code Here

        // Call the recursing handler
        if (embeddedExtractor.shouldParseEmbedded(metadata)) {
            embeddedExtractor.parseEmbedded(
                    TikaInputStream.get(part.getInputStream()),
                    new EmbeddedContentHandler(handler),
                    metadata, false);
        }
    }
View Full Code Here

                    try {
                        image.write2OutputStream(buffer);
                        image.clear();
                        extractor.parseEmbedded(
                                new ByteArrayInputStream(buffer.toByteArray()),
                                new EmbeddedContentHandler(handler),
                                metadata, false);
                    } catch (IOException e) {
                        // could not extract this image, so just skip it...
                    }
                }
View Full Code Here

            TikaInputStream stream = null;
            try{
                stream = TikaInputStream.get(file.createInputStream());
                extractor.parseEmbedded(
                        stream,
                        new EmbeddedContentHandler(handler),
                        metadata, false);
            } finally {
                IOUtils.closeQuietly(stream);
            }
        }
View Full Code Here

              // Decrypt the OLE2 stream, and delegate the resulting OOXML
              //  file to the regular OOXML parser for normal handling
              OOXMLParser parser = new OOXMLParser();

              parser.parse(d.getDataStream(root), new EmbeddedContentHandler(
                    new BodyContentHandler(xhtml)),
                    metadata, context);
           } catch (GeneralSecurityException ex) {
              throw new EncryptedDocumentException(ex);
           }
View Full Code Here

TOP

Related Classes of org.apache.tika.sax.EmbeddedContentHandler

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.