Package org.apache.any23.extractor

Examples of org.apache.any23.extractor.SingleDocumentExtraction.run()


        );
        final ExtractionResultImpl extractionResult = new ExtractionResultImpl(extractionContext, extractor, output);
        try {
            if (extractor instanceof BlindExtractor) {
                final BlindExtractor blindExtractor = (BlindExtractor) extractor;
                blindExtractor.run(extractionParameters, extractionContext, documentURI, extractionResult);
            } else if (extractor instanceof ContentExtractor) {
                ensureHasLocalCopy();
                final ContentExtractor contentExtractor = (ContentExtractor) extractor;
                contentExtractor.run(
                        extractionParameters,
View Full Code Here


        );
        final ExtractionResultImpl extractionResult = new ExtractionResultImpl(extractionContext, extractor, output);
        try {
            if (extractor instanceof BlindExtractor) {
                final BlindExtractor blindExtractor = (BlindExtractor) extractor;
                blindExtractor.run(extractionParameters, extractionContext, documentURI, extractionResult);
            } else if (extractor instanceof ContentExtractor) {
                ensureHasLocalCopy();
                final ContentExtractor contentExtractor = (ContentExtractor) extractor;
                contentExtractor.run(
                        extractionParameters,
View Full Code Here

                final BlindExtractor blindExtractor = (BlindExtractor) extractor;
                blindExtractor.run(extractionParameters, extractionContext, documentURI, extractionResult);
            } else if (extractor instanceof ContentExtractor) {
                ensureHasLocalCopy();
                final ContentExtractor contentExtractor = (ContentExtractor) extractor;
                contentExtractor.run(
                        extractionParameters,
                        extractionContext,
                        localDocumentSource.openInputStream(),
                        extractionResult
                );
View Full Code Here

                final BlindExtractor blindExtractor = (BlindExtractor) extractor;
                blindExtractor.run(extractionParameters, extractionContext, documentURI, extractionResult);
            } else if (extractor instanceof ContentExtractor) {
                ensureHasLocalCopy();
                final ContentExtractor contentExtractor = (ContentExtractor) extractor;
                contentExtractor.run(
                        extractionParameters,
                        extractionContext,
                        localDocumentSource.openInputStream(),
                        extractionResult
                );
View Full Code Here

                        extractionResult
                );
            } else if (extractor instanceof TagSoupDOMExtractor) {
                final TagSoupDOMExtractor tagSoupDOMExtractor = (TagSoupDOMExtractor) extractor;
                final DocumentReport documentReport = getTagSoupDOM(extractionParameters);
                tagSoupDOMExtractor.run(
                        extractionParameters,
                        extractionContext,
                        documentReport.getDocument(),
                        extractionResult
                );
View Full Code Here

                        extractionResult
                );
            } else if (extractor instanceof TagSoupDOMExtractor) {
                final TagSoupDOMExtractor tagSoupDOMExtractor = (TagSoupDOMExtractor) extractor;
                final DocumentReport documentReport = getTagSoupDOM(extractionParameters);
                tagSoupDOMExtractor.run(
                        extractionParameters,
                        extractionContext,
                        documentReport.getDocument(),
                        extractionResult
                );
View Full Code Here

        SingleDocumentExtraction ex = new SingleDocumentExtraction(
            new HTMLFixture(copyResourceToTempFile(resource)).getOpener(baseURI.toString()),
            getExtractorFactory(), new RepositoryWriter(conn)
        );
        ex.setMIMETypeDetector(null);
        report = ex.run();
    }

    /**
     * Performs data extraction over the content of a resource
     * and assert that the extraction was fine.
View Full Code Here

        SingleDocumentExtraction ex = new SingleDocumentExtraction(
            new HTMLFixture(resource).getOpener(baseURI.toString()),
            getExtractorFactory(), new RepositoryWriter(conn)
        );
        ex.setMIMETypeDetector(null);
        report = ex.run();
    }

    /**
     * Performs data extraction over the content of a resource
     * and assert that the extraction was fine.
View Full Code Here

    ) throws IOException, ExtractionException {
        final SingleDocumentExtraction ex = new SingleDocumentExtraction(configuration, in, factories, outputHandler);
        ex.setMIMETypeDetector(mimeTypeDetector);
        ex.setLocalCopyFactory(streamCache);
        ex.setParserEncoding(encoding);
        final SingleDocumentExtractionReport sder = ex.run(eps);
        return new ExtractionReport(
                ex.getMatchingExtractors(),
                ex.getParserEncoding(),
                ex.getDetectedMIMEType(),
                sder.getValidationReport(),
View Full Code Here

    ) throws IOException, ExtractionException {
        final SingleDocumentExtraction ex = new SingleDocumentExtraction(configuration, in, factories, outputHandler);
        ex.setMIMETypeDetector(mimeTypeDetector);
        ex.setLocalCopyFactory(streamCache);
        ex.setParserEncoding(encoding);
        final SingleDocumentExtractionReport sder = ex.run(eps);
        return new ExtractionReport(
                ex.getMatchingExtractors(),
                ex.getParserEncoding(),
                ex.getDetectedMIMEType(),
                sder.getValidationReport(),
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.