Package org.apache.any23.filter

Examples of org.apache.any23.filter.IgnoreTitlesOfEmptyDocuments


    @Test
    public void testAbstractMethodErrorIssue186_2() throws IOException, ExtractionException{
        final Any23 runner = new Any23();
        final String content = FileUtils.readResourceContent("/html/rdfa/rdfa-issue186-2.xhtml");
        final DocumentSource source = new StringDocumentSource(content, "http://richard.cyganiak.de/");
        final ByteArrayOutputStream out = new ByteArrayOutputStream();
        final TripleHandler handler = new NTriplesWriter(out);
        runner.extract(source, handler);
        final String n3 = out.toString("UTF-8");
        logger.debug(n3);
View Full Code Here


        final ModifiableConfiguration modifiableConf = DefaultConfiguration.copy();
        modifiableConf.setProperty("any23.extraction.metadata.timesize", "off");
        final Any23 any23 = new Any23(modifiableConf);

        final String content = FileUtils.readResourceContent("/rdf/rdf-issue183.ttl");
        final DocumentSource source = new StringDocumentSource(content, "http://base.com");
        final ByteArrayOutputStream out = new ByteArrayOutputStream();
        final TripleHandler handler = new NTriplesWriter(out);
        any23.extract(source, handler);
        handler.close();
        final String n3 = out.toString("UTF-8");
View Full Code Here

     * @param expectedContent
     * @throws Exception
     */
    private void assertEncodingDetection(String encoding, String input, String expectedContent)
    throws Exception {
        DocumentSource fileDocumentSource = getDocumentSourceFromResource(input);
        Any23 any23;
        RepositoryConnection conn;
        RepositoryWriter repositoryWriter;
       
        any23 = new Any23();
View Full Code Here

    private File getFile() {
        return file;
    }

    public DocumentSource getOpener(String baseURI) {
        return new FileDocumentSource(getFile(), baseURI);
    }
View Full Code Here

        assertContains("<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>", response.getContent());
    }

    @Test
    public void testResponseWithReport() throws Exception {
        content = new FileDocumentSource(
                new File("src/test/resources/org/apache/any23/servlet/missing-og-namespace.html")
        ).readStream();
        acceptHeader = "text/plain";
        HttpTester response = doGetRequest("/best/http://foo.com?validation-mode=validate-fix&report=on");
        Assert.assertEquals(200, response.getStatus());
View Full Code Here

        tempDirectory = testFolder.newFolder();
    }

    protected DocumentSource getDocumentSourceFromResource(
            String resourceLocation) throws IOException {
        return new FileDocumentSource(copyResourceToTempFile(resourceLocation));
    }
View Full Code Here

        return new FileDocumentSource(copyResourceToTempFile(resourceLocation));
    }

    protected DocumentSource getDocumentSourceFromResource(
            String resourceLocation, String baseUri) throws IOException {
        return new FileDocumentSource(copyResourceToTempFile(resourceLocation),
                baseUri);
    }
View Full Code Here

        }
    }

    protected DocumentSource createHTTPDocumentSource(HTTPClient httpClient, String uri)
            throws IOException, URISyntaxException {
        return new HTTPDocumentSource(httpClient, uri);
    }
View Full Code Here

        assumeOnlineAllowed();

        /*1*/ Any23 runner = new Any23();
        /*2*/ runner.setHTTPUserAgent("test-user-agent");
        /*3*/ HTTPClient httpClient = runner.getHTTPClient();
        /*4*/ DocumentSource source = new HTTPDocumentSource(
                 httpClient,
                 "http://dbpedia.org/resource/Trento"
              );
        /*5*/ ByteArrayOutputStream out = new ByteArrayOutputStream();
        /*6*/ TripleHandler handler = new NTriplesWriter(out);
View Full Code Here

        assumeOnlineAllowed();

        Any23 runner = new Any23();
        runner.setHTTPUserAgent("test-user-agent");
        HTTPClient httpClient = runner.getHTTPClient();
        DocumentSource source = new HTTPDocumentSource(
                httpClient,
                "http://products.semweb.bestbuy.com/y/products/7590289/"
        );
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        TripleHandler handler = new NTriplesWriter(out);
View Full Code Here

TOP

Related Classes of org.apache.any23.filter.IgnoreTitlesOfEmptyDocuments

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.