Package org.apache.any23

Examples of org.apache.any23.ExtractionReport


    public void testOpenGraphStructuredProperties() throws IOException, ExtractionException, RepositoryException {
        assertExtract("/html/rdfa/opengraph-structured-properties.html");
        logger.info( dumpHumanReadableTriples() );

        Assert.assertEquals(8, getStatementsSize(null, null, null) );
        final OGP vOGP = OGP.getInstance();
        assertContains(baseURI, vOGP.audio, RDFUtils.literal("http://example.com/bond/theme.mp3") );
        assertContains(
                baseURI,
                vOGP.description,
                RDFUtils.literal(
View Full Code Here


        fw.setAnnotated(annotate);
        outputMediaType = factory.getMimeType();
        List<TripleHandler> tripleHandlers = new ArrayList<TripleHandler>();
        tripleHandlers.add(new IgnoreAccidentalRDFa(fw));
        tripleHandlers.add(new CountingTripleHandler());
        rdfWriter = new CompositeTripleHandler(tripleHandlers);
        reporter = new ReportingTripleHandler(rdfWriter);
        rdfWriter = new IgnoreAccidentalRDFa(
            new IgnoreTitlesOfEmptyDocuments(reporter),
            true    // suppress stylesheet triples.
        );
View Full Code Here

    private SingleDocumentExtraction getInstance(String file) throws FileNotFoundException, IOException {
        baos = new ByteArrayOutputStream();
        rdfxmlWriter = new RDFXMLWriter(baos);
        repositoryWriter = new RepositoryWriter(conn);

        final CompositeTripleHandler cth = new CompositeTripleHandler();
        cth.addChild(rdfxmlWriter);
        cth.addChild(repositoryWriter);

        final ModifiableConfiguration configuration = DefaultConfiguration.copy();
        configuration.setProperty("any23.extraction.metadata.domain.per.entity", "on");
        SingleDocumentExtraction instance =  new SingleDocumentExtraction(
                configuration,
View Full Code Here

        final ExtractionContext extractionContext = new ExtractionContext(
                extractor.getDescription().getExtractorName(),
                RDFUtils.uri("file://" + resource)
        );
        final InputStream is = this.getClass().getResourceAsStream(resource);
        final CompositeTripleHandler compositeTripleHandler = new CompositeTripleHandler();
        final TripleHandler verifierTripleHandler = Mockito.mock(TripleHandler.class);
        compositeTripleHandler.addChild(verifierTripleHandler);
        final CountingTripleHandler countingTripleHandler = new CountingTripleHandler();
        compositeTripleHandler.addChild(countingTripleHandler);
        final ByteArrayOutputStream out = new ByteArrayOutputStream();
        compositeTripleHandler.addChild( new NTriplesWriter(out) );
        final ExtractionResult extractionResult = new ExtractionResultImpl(
                extractionContext, extractor, compositeTripleHandler
        );
        extractor.run(extractionParameters, extractionContext, is, extractionResult);
        compositeTripleHandler.close();
        logger.info(out.toString());

        verifyPredicateOccurrence(verifierTripleHandler, Excel.getInstance().containsSheet, 2 );
        verifyPredicateOccurrence(verifierTripleHandler, Excel.getInstance().containsRow  , 6 );
        verifyPredicateOccurrence(verifierTripleHandler, Excel.getInstance().containsCell , 18);
View Full Code Here

        FormatWriter fw = factory.getRdfWriter(byteOutStream);
        fw.setAnnotated(annotate);
        outputMediaType = factory.getMimeType();
        List<TripleHandler> tripleHandlers = new ArrayList<TripleHandler>();
        tripleHandlers.add(new IgnoreAccidentalRDFa(fw));
        tripleHandlers.add(new CountingTripleHandler());
        rdfWriter = new CompositeTripleHandler(tripleHandlers);
        reporter = new ReportingTripleHandler(rdfWriter);
        rdfWriter = new IgnoreAccidentalRDFa(
            new IgnoreTitlesOfEmptyDocuments(reporter),
            true    // suppress stylesheet triples.
View Full Code Here

        );
        final InputStream is = this.getClass().getResourceAsStream(resource);
        final CompositeTripleHandler compositeTripleHandler = new CompositeTripleHandler();
        final TripleHandler verifierTripleHandler = Mockito.mock(TripleHandler.class);
        compositeTripleHandler.addChild(verifierTripleHandler);
        final CountingTripleHandler countingTripleHandler = new CountingTripleHandler();
        compositeTripleHandler.addChild(countingTripleHandler);
        final ByteArrayOutputStream out = new ByteArrayOutputStream();
        compositeTripleHandler.addChild( new NTriplesWriter(out) );
        final ExtractionResult extractionResult = new ExtractionResultImpl(
                extractionContext, extractor, compositeTripleHandler
View Full Code Here

                    null,
                    report
            );
            return false;
        }
        FormatWriter fw = factory.getRdfWriter(byteOutStream);
        fw.setAnnotated(annotate);
        outputMediaType = factory.getMimeType();
        List<TripleHandler> tripleHandlers = new ArrayList<TripleHandler>();
        tripleHandlers.add(new IgnoreAccidentalRDFa(fw));
        tripleHandlers.add(new CountingTripleHandler());
        rdfWriter = new CompositeTripleHandler(tripleHandlers);
View Full Code Here

        final TripleHandler verifierTripleHandler = Mockito.mock(TripleHandler.class);
        compositeTripleHandler.addChild(verifierTripleHandler);
        final CountingTripleHandler countingTripleHandler = new CountingTripleHandler();
        compositeTripleHandler.addChild(countingTripleHandler);
        final ByteArrayOutputStream out = new ByteArrayOutputStream();
        compositeTripleHandler.addChild( new NTriplesWriter(out) );
        final ExtractionResult extractionResult = new ExtractionResultImpl(
                extractionContext, extractor, compositeTripleHandler
        );
        extractor.run(extractionParameters, extractionContext, is, extractionResult);
        compositeTripleHandler.close();
View Full Code Here

    @Test
    public void testTypedLiteralIncompatibleValueSupport()
    throws IOException, ExtractionException, TripleHandlerException {
        final URI uri = RDFUtils.uri("http://host.com/test-malformed-literal.turtle");
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        final TripleHandler th = new RDFXMLWriter(baos);
        final ExtractionContext extractionContext = new ExtractionContext("turtle-extractor", uri);
        final ExtractionResult result = new ExtractionResultImpl(extractionContext, extractor, th);
        extractor.setStopAtFirstError(false);
        try {
            extractor.run(
                    ExtractionParameters.newDefault(),
                    extractionContext,
                    this.getClass().getResourceAsStream("/org/apache/any23/extractor/rdf/testMalformedLiteral"),
                    result
            );
        } finally {
            logger.debug(baos.toString());
            th.close();
            result.close();
        }
    }
View Full Code Here

        assertTripleCount(vSINDICE.getProperty(SINDICE.NESTING_ORIGINAL, vREVIEW.hasReview, 1);
    }

    private SingleDocumentExtraction getInstance(String file) throws FileNotFoundException, IOException {
        baos = new ByteArrayOutputStream();
        rdfxmlWriter = new RDFXMLWriter(baos);
        repositoryWriter = new RepositoryWriter(conn);

        final CompositeTripleHandler cth = new CompositeTripleHandler();
        cth.addChild(rdfxmlWriter);
        cth.addChild(repositoryWriter);
View Full Code Here

TOP

Related Classes of org.apache.any23.ExtractionReport

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.