Package org.apache.any23.extractor

Examples of org.apache.any23.extractor.ExtractionContext


        processFile(FILE);
    }

    private void processFile(String resource) throws IOException, ExtractionException, TripleHandlerException {
        final ExtractionParameters extractionParameters = ExtractionParameters.newDefault();
        final ExtractionContext extractionContext = new ExtractionContext(
                extractor.getDescription().getExtractorName(),
                RDFUtils.uri("file://" + resource)
        );
        final InputStream is = this.getClass().getResourceAsStream(resource);
        final CompositeTripleHandler compositeTripleHandler = new CompositeTripleHandler();
View Full Code Here


                System.getProperty("test.data", "src/test/resources") +
                        "/html/" + filename);

        Document document = new TagSoupParser(new FileInputStream(file), baseURI.stringValue()).getDOM();
        HCardExtractor hCardExtractor = HCardExtractor.factory.createExtractor();
        ExtractionContext hcExtractionContext = new ExtractionContext(
                hCardExtractor.getDescription().getExtractorName(),
                baseURI
        );
        hCardExtractor.run(
                ExtractionParameters.newDefault(),
                hcExtractionContext,
                document,
                new ExtractionResultImpl(
                        hcExtractionContext,
                        hCardExtractor,
                        new RepositoryWriter(getConnection())
                )
        );
        XFNExtractor xfnExtractor = XFNExtractor.factory.createExtractor();
        ExtractionContext xfnExtractionContext = new ExtractionContext(
                xfnExtractor.getDescription().getExtractorName(),
                baseURI
        );
        xfnExtractor.run(
                        ExtractionParameters.newDefault(),
View Full Code Here

        File file = new File(
                System.getProperty("test.data", "src/test/resources/") + filename);

        Document document = new TagSoupParser(new FileInputStream(file), baseURI.stringValue()).getDOM();
        HCardExtractor hCardExtractor = HCardExtractor.factory.createExtractor();
        ExtractionContext hCardExtractionContext = new ExtractionContext(
                hCardExtractor.getDescription().getExtractorName(), baseURI
        );
        hCardExtractor.run(
                ExtractionParameters.newDefault(),
                hCardExtractionContext,
                document,
                new ExtractionResultImpl(
                        hCardExtractionContext,
                        hCardExtractor, new RepositoryWriter(getConnection())
                )
        );

        GeoExtractor geoExtractor = GeoExtractor.factory.createExtractor();
        ExtractionContext geoExtractionContext = new ExtractionContext(
                geoExtractor.getDescription().getExtractorName(), baseURI
        );
        geoExtractor.run(
                ExtractionParameters.newDefault(),
                geoExtractionContext,
                document,
                new ExtractionResultImpl(
                        geoExtractionContext,
                        geoExtractor,
                        new RepositoryWriter(getConnection())
                )
        );

        AdrExtractor adrExtractor = AdrExtractor.factory.createExtractor();
        ExtractionContext adrExtractionContext = new ExtractionContext(
                adrExtractor.getDescription().getExtractorName(), baseURI
        );
        adrExtractor.run(
                ExtractionParameters.newDefault(),
                adrExtractionContext,
View Full Code Here

        extractHCardAndRelated(filename);
        File file = new File(
                System.getProperty("test.data", "src/test/resources/") + filename);
        Document document = new TagSoupParser(new FileInputStream(file), baseURI.stringValue()).getDOM();
        HReviewExtractor hReviewExtractor = HReviewExtractor.factory.createExtractor();
        ExtractionContext hreviewExtractionContext = new ExtractionContext(
                hReviewExtractor.getDescription().getExtractorName(), baseURI
        );
        hReviewExtractor.run(
                ExtractionParameters.newDefault(),
                hreviewExtractionContext,
View Full Code Here

    public void testTypedLiteralIncompatibleValueSupport()
    throws IOException, ExtractionException, TripleHandlerException {
        final URI uri = RDFUtils.uri("http://host.com/test-malformed-literal.turtle");
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        final TripleHandler th = new RDFXMLWriter(baos);
        final ExtractionContext extractionContext = new ExtractionContext("turtle-extractor", uri);
        final ExtractionResult result = new ExtractionResultImpl(extractionContext, extractor, th);
        extractor.setStopAtFirstError(false);
        try {
            extractor.run(
                    ExtractionParameters.newDefault(),
View Full Code Here

        final ExtractorDescription ed = mock(ExtractorDescription.class);
        when(ed.getExtractorName()).thenReturn(FAKE_EXTRACTOR_NAME);
        when(extractor.getDescription()).thenReturn(ed);

        final TripleHandler th = mock(TripleHandler.class);
        final ExtractionContext extractionContext = new ExtractionContext(
                extractor.getDescription().getExtractorName(),
                new URIImpl("http://fake.document.uri")
        );
        final ExtractionResult er = new ExtractionResultImpl(extractionContext, extractor, th);
        er.notifyIssue(IssueReport.IssueLevel.Fatal  , "Fake fatal error.", 1, 2);
View Full Code Here

    private void checkTriple(String predicate, VerificationMode verificationMode)
    throws TripleHandlerException {
        final String DOCUMENT_URI = "http://an.html.page";
        final TripleHandler mockTripleHandler = mock(TripleHandler.class);
        final ValueFactory valueFactory = new ValueFactoryImpl();
        ExtractionContext extractionContext = new ExtractionContext(
                "test-extractor",
                valueFactory.createURI(DOCUMENT_URI)
        );
        final IgnoreAccidentalRDFa ignoreAccidentalRDFa = new IgnoreAccidentalRDFa(mockTripleHandler, true);
        ignoreAccidentalRDFa.openContext(extractionContext);
View Full Code Here

        handler.expectNamespace("ex", "http://example.com/", "test", docURI, null);
        handler.expectTriple(s, p, o, null, "test", docURI, null);
        handler.expectCloseContext("test", docURI, null);
        handler.expectEndDocument(docURI);

        ExtractionContext context = new ExtractionContext("test", docURI);
        blocker.openContext(context);
        blocker.blockContext(context);
        blocker.receiveNamespace("ex", "http://example.com/", context);
        blocker.receiveTriple(s, p, o, null, context);
        blocker.closeContext(context);
View Full Code Here

    @Test
    public void testRun() throws IOException, ExtractionException {
        final InputStream is = this.getClass().getResourceAsStream("html-scraper-extractor-test.html");
        final ExtractionResult extractionResult = mock(ExtractionResult.class);
        final URI pageURI = ValueFactoryImpl.getInstance().createURI("http://fake/test/page/testrun");
        final ExtractionContext extractionContext = new ExtractionContext(
                extractor.getDescription().getExtractorName(),
                pageURI
        );
        extractor.run(ExtractionParameters.newDefault(), extractionContext, is, extractionResult);
View Full Code Here

        processFile(FILE);
    }

    private void processFile(String resource) throws IOException, ExtractionException, TripleHandlerException {
        final ExtractionParameters extractionParameters = ExtractionParameters.newDefault();
        final ExtractionContext extractionContext = new ExtractionContext(
                extractor.getDescription().getExtractorName(),
                RDFUtils.uri("file://" + resource)
        );
        final InputStream is = this.getClass().getResourceAsStream(resource);
        final CompositeTripleHandler compositeTripleHandler = new CompositeTripleHandler();
View Full Code Here

TOP

Related Classes of org.apache.any23.extractor.ExtractionContext

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.