Package org.apache.tika.parser

Examples of org.apache.tika.parser.DefaultParser


        return new DefaultDetector(types, loader);
    }

    private static CompositeParser getDefaultParser(
            MimeTypes types, ServiceLoader loader) {
        return new DefaultParser(types.getMediaTypeRegistry(), loader);
    }
View Full Code Here


                Detector.class.getName(),
                new DefaultDetector(Activator.class.getClassLoader()),
                new Properties());
        parserService = context.registerService(
                Parser.class.getName(),
                new DefaultParser(Activator.class.getClassLoader()),
                new Properties());
    }
View Full Code Here

     * @throws MimeTypeException if the built-in media type rules are broken
     * @throws IOException  if the built-in media type rules can not be read
     */
    public TikaConfig(ClassLoader loader)
            throws MimeTypeException, IOException {
        this(new DefaultParser(loader));
    }
View Full Code Here

     *
     * @throws MimeTypeException if the built-in media type rules are broken
     * @throws IOException  if the built-in media type rules can not be read
     */
    public TikaConfig() throws MimeTypeException, IOException {
        this(new DefaultParser());
    }
View Full Code Here

     * @throws MimeTypeException if the built-in media type rules are broken
     * @throws IOException  if the built-in media type rules can not be read
     */
    public TikaConfig(ClassLoader loader)
            throws MimeTypeException, IOException {
        this.parser = new DefaultParser(loader);
        this.mimeTypes = MimeTypes.getDefaultMimeTypes();
    }
View Full Code Here

        if (config == null) {
            config = System.getenv("TIKA_CONFIG");
        }
        if (config == null) {
            this.mimeTypes = MimeTypes.getDefaultMimeTypes();
            this.parser = new DefaultParser(
                    mimeTypes.getMediaTypeRegistry(), loader);
        } else {
            InputStream stream;
            File file = new File(config);
            if (file.isFile()) {
View Full Code Here

    @Override
    public void extractFrom( final Binary binary,
                             final TextExtractor.Output output,
                             final Context context ) throws Exception {

        final DefaultParser parser = initialize();
        final Integer writeLimit = this.writeLimit;
        processStream(binary, new BinaryOperation<Object>() {
            @Override
            public Object execute( InputStream stream ) throws Exception {
                Metadata metadata = prepareMetadata(binary, context);
                //TODO author=Horia Chiorean date=1/30/13 description=//TIKA 1.2 TXTParser seems to have a bug, always adding 1 ignorable whitespace to the actual chars to be parsed
                //https://issues.apache.org/jira/browse/TIKA-1069
                ContentHandler textHandler = writeLimit == null ? new BodyContentHandler() : new BodyContentHandler(writeLimit + 1);
                try {
                    LOGGER.debug("Using TikaTextExtractor to extract text");
                    // Parse the input stream ...
                    parser.parse(stream, textHandler, metadata, new ParseContext());
                } catch (SAXException sae) {
                    LOGGER.warn(TikaI18n.parseExceptionWhileExtractingText, sae.getMessage());
                } catch (NoClassDefFoundError ncdfe) {
                    LOGGER.warn(TikaI18n.warnNoClassDefFound, ncdfe.getMessage());
                } catch (Throwable e) {
View Full Code Here

    protected DefaultParser initialize() {
        if (parser == null) {
            initLock.lock();
            try {
                if (parser == null) {
                    parser = new DefaultParser(this.getClass().getClassLoader());
                }
                LOGGER.debug("Initializing Tika Text Extractor");
                Map<MediaType, Parser> parsers = parser.getParsers();
                LOGGER.debug("Tika parsers found: {0}",parsers.size());
                for (MediaType mediaType : parsers.keySet()) {
View Full Code Here

        return new DefaultDetector(types, loader);
    }

    private static CompositeParser getDefaultParser(
            MimeTypes types, ServiceLoader loader) {
        return new DefaultParser(types.getMediaTypeRegistry(), loader);
    }
View Full Code Here

     */
    public TikaConfig(ClassLoader loader)
            throws MimeTypeException, IOException {
        this.mimeTypes = MimeTypes.getDefaultMimeTypes();
        this.detector = new DefaultDetector(mimeTypes, loader);
        this.parser = new DefaultParser(mimeTypes.getMediaTypeRegistry(), loader);
    }
View Full Code Here

TOP

Related Classes of org.apache.tika.parser.DefaultParser

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.