Package org.apache.any23.source

Examples of org.apache.any23.source.DocumentSource


        assertContains(null, csv.numberOfRows, new LiteralImpl("3", XMLSchema.INTEGER));
    }

    @Test
    public void testExtractionSemicolonSeparated() throws RepositoryException {
        CSV csv = CSV.getInstance();
        assertExtract("/org/apache/any23/extractor/csv/test-semicolon.csv");
        logger.debug(dumpModelToRDFXML());

        assertModelNotEmpty();
        assertStatementsSize(null, null, null, 28);
View Full Code Here


        assertContains(null, csv.numberOfRows, new LiteralImpl("3", XMLSchema.INTEGER));
    }

    @Test
    public void testExtractionTabSeparated() throws RepositoryException {
        CSV csv = CSV.getInstance();
        assertExtract("/org/apache/any23/extractor/csv/test-tab.csv");
        logger.debug(dumpModelToRDFXML());

        assertModelNotEmpty();
        assertStatementsSize(null, null, null, 28);
View Full Code Here

        assertContains(null, csv.numberOfRows, new LiteralImpl("3", XMLSchema.INTEGER));
    }

    @Test
    public void testTypeManagement() throws RepositoryException {
        CSV csv = CSV.getInstance();
        assertExtract("/org/apache/any23/extractor/csv/test-type.csv");
        logger.debug(dumpModelToRDFXML());

        assertModelNotEmpty();
        assertStatementsSize(null, null, null, 21);
View Full Code Here

        assertContains(null, null, new LiteralImpl("10" , XMLSchema.INTEGER));
    }
   
    @Test
    public void testExtractionEmptyValue() throws RepositoryException {
        CSV csv = CSV.getInstance();
        assertExtract("/org/apache/any23/extractor/csv/test-missing.csv");
        logger.debug(dumpModelToRDFXML());

        assertModelNotEmpty();
        assertStatementsSize(null, null, null, 25);
View Full Code Here

    public void testOpenGraphStructuredProperties() throws IOException, ExtractionException, RepositoryException {
        assertExtract("/html/rdfa/opengraph-structured-properties.html");
        logger.info( dumpHumanReadableTriples() );

        Assert.assertEquals(8, getStatementsSize(null, null, null) );
        final OGP vOGP = OGP.getInstance();
        assertContains(baseURI, vOGP.audio, RDFUtils.literal("http://example.com/bond/theme.mp3") );
        assertContains(
                baseURI,
                vOGP.description,
                RDFUtils.literal(
View Full Code Here

        fw.setAnnotated(annotate);
        outputMediaType = factory.getMimeType();
        List<TripleHandler> tripleHandlers = new ArrayList<TripleHandler>();
        tripleHandlers.add(new IgnoreAccidentalRDFa(fw));
        tripleHandlers.add(new CountingTripleHandler());
        rdfWriter = new CompositeTripleHandler(tripleHandlers);
        reporter = new ReportingTripleHandler(rdfWriter);
        rdfWriter = new IgnoreAccidentalRDFa(
            new IgnoreTitlesOfEmptyDocuments(reporter),
            true    // suppress stylesheet triples.
        );
View Full Code Here

    private SingleDocumentExtraction getInstance(String file) throws FileNotFoundException, IOException {
        baos = new ByteArrayOutputStream();
        rdfxmlWriter = new RDFXMLWriter(baos);
        repositoryWriter = new RepositoryWriter(conn);

        final CompositeTripleHandler cth = new CompositeTripleHandler();
        cth.addChild(rdfxmlWriter);
        cth.addChild(repositoryWriter);

        final ModifiableConfiguration configuration = DefaultConfiguration.copy();
        configuration.setProperty("any23.extraction.metadata.domain.per.entity", "on");
        SingleDocumentExtraction instance =  new SingleDocumentExtraction(
                configuration,
View Full Code Here

        FormatWriter fw = factory.getRdfWriter(byteOutStream);
        fw.setAnnotated(annotate);
        outputMediaType = factory.getMimeType();
        List<TripleHandler> tripleHandlers = new ArrayList<TripleHandler>();
        tripleHandlers.add(new IgnoreAccidentalRDFa(fw));
        tripleHandlers.add(new CountingTripleHandler());
        rdfWriter = new CompositeTripleHandler(tripleHandlers);
        reporter = new ReportingTripleHandler(rdfWriter);
        rdfWriter = new IgnoreAccidentalRDFa(
            new IgnoreTitlesOfEmptyDocuments(reporter),
            true    // suppress stylesheet triples.
View Full Code Here

                    null,
                    report
            );
            return false;
        }
        FormatWriter fw = factory.getRdfWriter(byteOutStream);
        fw.setAnnotated(annotate);
        outputMediaType = factory.getMimeType();
        List<TripleHandler> tripleHandlers = new ArrayList<TripleHandler>();
        tripleHandlers.add(new IgnoreAccidentalRDFa(fw));
        tripleHandlers.add(new CountingTripleHandler());
        rdfWriter = new CompositeTripleHandler(tripleHandlers);
View Full Code Here

    @Test
    public void testTypedLiteralIncompatibleValueSupport()
    throws IOException, ExtractionException, TripleHandlerException {
        final URI uri = RDFUtils.uri("http://host.com/test-malformed-literal.turtle");
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        final TripleHandler th = new RDFXMLWriter(baos);
        final ExtractionContext extractionContext = new ExtractionContext("turtle-extractor", uri);
        final ExtractionResult result = new ExtractionResultImpl(extractionContext, extractor, th);
        extractor.setStopAtFirstError(false);
        try {
            extractor.run(
                    ExtractionParameters.newDefault(),
                    extractionContext,
                    this.getClass().getResourceAsStream("/org/apache/any23/extractor/rdf/testMalformedLiteral"),
                    result
            );
        } finally {
            logger.debug(baos.toString());
            th.close();
            result.close();
        }
    }
View Full Code Here

TOP

Related Classes of org.apache.any23.source.DocumentSource

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.