Examples of SingleDocumentExtraction

org.apache.any23.extractor.SingleDocumentExtraction
This class acts as facade where all the extractors were called on a single document.

Examples of org.apache.any23.extractor.SingleDocumentExtraction

     * @throws IOException
     */
    // TODO: MimeType detector to null forces the execution of all extractors, but extraction
    //       tests should be based on mimetype detection.
    protected void extract(String resource) throws ExtractionException, IOException {
        SingleDocumentExtraction ex = new SingleDocumentExtraction(
            new HTMLFixture(copyResourceToTempFile(resource)).getOpener(baseURI.toString()),
            getExtractorFactory(), new RepositoryWriter(conn)
        );
        ex.setMIMETypeDetector(null);
        report = ex.run();
    }

View Full Code Here

Examples of org.apache.any23.extractor.SingleDocumentExtraction

     * @throws IOException
     */
    // TODO: MimeType detector to null forces the execution of all extractors, but extraction
    //       tests should be based on mimetype detection.
    protected void extract(String resource) throws ExtractionException, IOException {
        SingleDocumentExtraction ex = new SingleDocumentExtraction(
            new HTMLFixture(resource).getOpener(baseURI.toString()),
            getExtractorFactory(), new RepositoryWriter(conn)
        );
        ex.setMIMETypeDetector(null);
        report = ex.run();
    }

View Full Code Here

Examples of org.apache.any23.extractor.SingleDocumentExtraction

            ExtractionParameters eps,
            DocumentSource in,
            TripleHandler outputHandler,
            String encoding
    ) throws IOException, ExtractionException {
        final SingleDocumentExtraction ex = new SingleDocumentExtraction(configuration, in, factories, outputHandler);
        ex.setMIMETypeDetector(mimeTypeDetector);
        ex.setLocalCopyFactory(streamCache);
        ex.setParserEncoding(encoding);
        final SingleDocumentExtractionReport sder = ex.run(eps);
        return new ExtractionReport(
                ex.getMatchingExtractors(),
                ex.getParserEncoding(),
                ex.getDetectedMIMEType(),
                sder.getValidationReport(),
                sder.getExtractorToIssues()
        );
    }

View Full Code Here

Examples of org.apache.any23.extractor.SingleDocumentExtraction

            ExtractionParameters eps,
            DocumentSource in,
            TripleHandler outputHandler,
            String encoding
    ) throws IOException, ExtractionException {
        final SingleDocumentExtraction ex = new SingleDocumentExtraction(configuration, in, factories, outputHandler);
        ex.setMIMETypeDetector(mimeTypeDetector);
        ex.setLocalCopyFactory(streamCache);
        ex.setParserEncoding(encoding);
        final SingleDocumentExtractionReport sder = ex.run(eps);
        return new ExtractionReport(
                ex.getMatchingExtractors(),
                ex.getParserEncoding(),
                ex.getDetectedMIMEType(),
                sder.getValidationReport(),
                sder.getExtractorToIssues()
        );
    }

View Full Code Here

Examples of org.apache.any23.extractor.SingleDocumentExtraction

     * @throws IOException
     */
    // TODO: MimeType detector to null forces the execution of all extractors, but extraction
    //       tests should be based on mimetype detection.
    protected void extract(String resource) throws ExtractionException, IOException {
        SingleDocumentExtraction ex = new SingleDocumentExtraction(
            new HTMLFixture(copyResourceToTempFile(resource)).getOpener(baseURI.toString()),
            getExtractorFactory(), new RepositoryWriter(conn)
        );
        ex.setMIMETypeDetector(null);
        report = ex.run();
    }

View Full Code Here

Examples of org.apache.any23.extractor.SingleDocumentExtraction

            ExtractionParameters eps,
            DocumentSource in,
            TripleHandler outputHandler,
            String encoding
    ) throws IOException, ExtractionException {
        final SingleDocumentExtraction ex = new SingleDocumentExtraction(configuration, in, factories, outputHandler);
        ex.setMIMETypeDetector(mimeTypeDetector);
        ex.setLocalCopyFactory(streamCache);
        ex.setParserEncoding(encoding);
        final SingleDocumentExtractionReport sder = ex.run(eps);
        return new ExtractionReport(
                ex.getMatchingExtractors(),
                ex.getParserEncoding(),
                ex.getDetectedMIMEType(),
                sder.getValidationReport(),
                sder.getExtractorToIssues()
        );
    }

View Full Code Here

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.