Package org.apache.any23.source

Examples of org.apache.any23.source.FileDocumentSource


    @Test
    public void testExtractionParametersWithNestingDisabled()
    throws IOException, ExtractionException, TripleHandlerException {
        final int EXPECTED_TRIPLES = 19;
        Any23 runner = new Any23();
        DocumentSource source = new FileDocumentSource(
                new File("src/test/resources/microformats/nested-microformats-a1.html"),
                "http://www.test.com"
        );

        ByteArrayOutputStream baos = new ByteArrayOutputStream();
View Full Code Here


    }

    @Test
    public void testExceptionPropagation() throws IOException {
        Any23 any23 = new Any23();
        DocumentSource source = new FileDocumentSource(
                new File("src/test/resources/application/turtle/geolinkeddata.ttl"),
                "http://www.test.com"
        );
        CountingTripleHandler cth1 = new CountingTripleHandler();
        try {
View Full Code Here

     * @param expectedContent
     * @throws Exception
     */
    private void assertEncodingDetection(String encoding, File input, String expectedContent)
    throws Exception {
        FileDocumentSource fileDocumentSource;
        Any23 any23;
        RepositoryConnection conn;
        RepositoryWriter repositoryWriter;

        fileDocumentSource = new FileDocumentSource(input);
        any23 = new Any23();
        Sail store = new MemoryStore();
        store.initialize();
        conn = new SailRepository(store).getConnection();
        repositoryWriter = new RepositoryWriter(conn);
View Full Code Here

                    throw new ParameterException("Invalid source URI: '" + value + "'");
                }
            }
            final Matcher fileMatcher = FILE_DOCUMENT_PATTERN.matcher(value);
            if (fileMatcher.find()) {
                return new FileDocumentSource( new File( fileMatcher.group(1) ) );
            }
            throw new ParameterException("Invalid source protocol: '" + value + "'");
        }
View Full Code Here

     * @throws IOException if an error occurs while initializing the internal {@link org.apache.any23.http.HTTPClient}.
     */
    public DocumentSource createDocumentSource(String documentURI) throws URISyntaxException, IOException {
        if(documentURI == null) throw new NullPointerException("documentURI cannot be null.");
        if (documentURI.toLowerCase().startsWith("file:")) {
            return new FileDocumentSource( new File(new URI(documentURI)) );
        }
        if (documentURI.toLowerCase().startsWith("http:") || documentURI.toLowerCase().startsWith("https:")) {
            return new HTTPDocumentSource(getHTTPClient(), documentURI);
        }
        throw new IllegalArgumentException(
View Full Code Here

     * @throws IOException
     * @throws ExtractionException
     */
    public ExtractionReport extract(File file, TripleHandler outputHandler)
    throws IOException, ExtractionException {
        return extract(new FileDocumentSource(file), outputHandler);
    }
View Full Code Here

        assertContains("<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>", response.getContent());
    }

    @Test
    public void testResponseWithReport() throws Exception {
        content = new FileDocumentSource(
                new File("src/test/resources/org/apache/any23/servlet/missing-og-namespace.html")
        ).readStream();
        acceptHeader = "text/plain";
        HttpTester response = doGetRequest("/best/http://foo.com?validation-mode=validate-fix&report=on");
        Assert.assertEquals(200, response.getStatus());
View Full Code Here

                    throw new ParameterException("Invalid source URI: '" + value + "'");
                }
            }
            final Matcher fileMatcher = FILE_DOCUMENT_PATTERN.matcher(value);
            if (fileMatcher.find()) {
                return new FileDocumentSource( new File( fileMatcher.group(1) ) );
            }
            throw new ParameterException("Invalid source protocol: '" + value + "'");
        }
View Full Code Here

     * @throws IOException if an error occurs while initializing the internal {@link org.apache.any23.http.HTTPClient}.
     */
    public DocumentSource createDocumentSource(String documentURI) throws URISyntaxException, IOException {
        if(documentURI == null) throw new NullPointerException("documentURI cannot be null.");
        if (documentURI.toLowerCase().startsWith("file:")) {
            return new FileDocumentSource( new File(new URI(documentURI)) );
        }
        if (documentURI.toLowerCase().startsWith("http:") || documentURI.toLowerCase().startsWith("https:")) {
            return new HTTPDocumentSource(getHTTPClient(), documentURI);
        }
        throw new IllegalArgumentException(
View Full Code Here

     * @throws IOException
     * @throws ExtractionException
     */
    public ExtractionReport extract(File file, TripleHandler outputHandler)
    throws IOException, ExtractionException {
        return extract(new FileDocumentSource(file), outputHandler);
    }
View Full Code Here

TOP

Related Classes of org.apache.any23.source.FileDocumentSource

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.