Examples of org.apache.any23.source.StringDocumentSource

org.apache.any23.source.StringDocumentSource
String implementation of {@link DocumentSource}.

        Any23 runner = new Any23(parsers.length == 0 ? null : parsers);
        if (parsers.length != 0) {
            runner.setMIMETypeDetector(null);   // Use all the provided extractors.
        }
        final NTriplesWriter tripleHandler = new NTriplesWriter(out);
        runner.extract(new StringDocumentSource(content, PAGE_URL), tripleHandler);
        tripleHandler.close();
        String result = out.toString("us-ascii");
        Assert.assertNotNull(result);
        Assert.assertTrue(result.length() > 10);
    }

View Full Code Here

            String documentURI,
            String contentType,
            String encoding,
            TripleHandler outputHandler
    ) throws IOException, ExtractionException {
        return extract(new StringDocumentSource(in, documentURI, contentType, encoding), outputHandler);
    }

View Full Code Here

     * @throws IOException
     * @throws ExtractionException
     */
    public ExtractionReport extract(String in, String documentURI, TripleHandler outputHandler)
    throws IOException, ExtractionException {
        return extract(new StringDocumentSource(in, documentURI), outputHandler);
    }

View Full Code Here

        @Override
        protected DocumentSource createHTTPDocumentSource(HTTPClient httpClient, String uri)
                throws IOException, URISyntaxException {
            requestedURI = uri;
            if(content != null) {
                return new StringDocumentSource(content, uri);
            } else {
                return super.createHTTPDocumentSource(httpClient, uri);
            }
        }

View Full Code Here

        /* 2 */final String content = "@prefix foo: <http://example.org/ns#> .   "
                + "@prefix : <http://other.example.org/ns#> ."
                + "foo:bar foo: : .                          "
                + ":bar : foo:bar .                           ";
        // The second argument of StringDocumentSource() must be a valid URI.
        /* 3 */DocumentSource source = new StringDocumentSource(content,
                "http://host.com/service");
        /* 4 */ByteArrayOutputStream out = new ByteArrayOutputStream();
        /* 5 */TripleHandler handler = new NTriplesWriter(out);
        try {
            /* 6 */runner.extract(source, handler);

View Full Code Here

            ExtractionException {
        final String documentURI = "http://www.test.com/resource.xml";
        final String contentType = "application/xml";
        final String in = StreamUtils.asString(this.getClass()
                .getResourceAsStream("any23-xml-mimetype.xml"));
        final DocumentSource doc = new StringDocumentSource(in, documentURI,
                contentType);
        final Any23 any23 = new Any23();
        final CountingTripleHandler cth = new CountingTripleHandler(false);
        final ReportingTripleHandler rth = new ReportingTripleHandler(cth);
        final ExtractionReport report = any23.extract(doc, rth);

View Full Code Here

    public void testAbstractMethodErrorIssue186_1() throws IOException,
            ExtractionException {
        final Any23 runner = new Any23();
        final String content = FileUtils
                .readResourceContent("/html/rdfa/rdfa-issue186-1.xhtml");
        final DocumentSource source = new StringDocumentSource(content,
                "http://base.com");
        final ByteArrayOutputStream out = new ByteArrayOutputStream();
        final TripleHandler handler = new NTriplesWriter(out);
        runner.extract(source, handler);
        String n3 = out.toString("UTF-8");

View Full Code Here

    public void testAbstractMethodErrorIssue186_2() throws IOException,
            ExtractionException {
        final Any23 runner = new Any23();
        final String content = FileUtils
                .readResourceContent("/html/rdfa/rdfa-issue186-2.xhtml");
        final DocumentSource source = new StringDocumentSource(content,
                "http://richard.cyganiak.de/");
        final ByteArrayOutputStream out = new ByteArrayOutputStream();
        final TripleHandler handler = new NTriplesWriter(out);
        runner.extract(source, handler);
        final String n3 = out.toString("UTF-8");

View Full Code Here

        modifiableConf.setProperty("any23.extraction.metadata.timesize", "off");
        final Any23 any23 = new Any23(modifiableConf);


        final String content = FileUtils
                .readResourceContent("/rdf/rdf-issue183.ttl");
        final DocumentSource source = new StringDocumentSource(content,
                "http://base.com");
        final ByteArrayOutputStream out = new ByteArrayOutputStream();
        final TripleHandler handler = new NTriplesWriter(out);
        any23.extract(source, handler);
        handler.close();

View Full Code Here

        if (parsers.length != 0) {
            runner.setMIMETypeDetector(null); // Use all the provided
                                              // extractors.
        }
        final NTriplesWriter tripleHandler = new NTriplesWriter(out);
        runner.extract(new StringDocumentSource(content, PAGE_URL),
                tripleHandler);
        tripleHandler.close();
        String result = out.toString("us-ascii");
        Assert.assertNotNull(result);
        Assert.assertTrue(result.length() > 10);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.any23.source.StringDocumentSource

org.apache.any23.AbstractAny23TestBase

org.apache.any23.Any23

org.apache.any23.Any23Test

org.apache.any23.cli.ExtractorDocumentation

org.apache.any23.cli.MicrodataParser

org.apache.any23.cli.MicrodataParser$MicrodataParserDocumentSourceConverter

org.apache.any23.cli.MimeDetector

org.apache.any23.cli.MimeDetector$MimeDetectorDocumentSourceConverter

org.apache.any23.cli.Rover

org.apache.any23.cli.Tool

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.