Package org.apache.any23.extractor.html

Examples of org.apache.any23.extractor.html.HTMLDocument


    @Test
    public void testXMLMimeTypeManagement() throws IOException, ExtractionException {
        final String documentURI = "http://www.test.com/resource.xml";
        final String contentType = "application/xml";
        final String in = StreamUtils.asString( this.getClass().getResourceAsStream("any23-xml-mimetype.xml") );
        final DocumentSource doc = new StringDocumentSource(in, documentURI, contentType);
        final Any23 any23 = new Any23();
        final CountingTripleHandler cth = new CountingTripleHandler(false);
        final ReportingTripleHandler rth = new ReportingTripleHandler(cth);
        final ExtractionReport report = any23.extract(doc, rth);
        Assert.assertFalse(report.hasMatchingExtractors());
View Full Code Here


    @Test
    public void testAbstractMethodErrorIssue186_1() throws IOException, ExtractionException{
        final Any23 runner = new Any23();
        final String content = FileUtils.readResourceContent("/html/rdfa/rdfa-issue186-1.xhtml");
        final DocumentSource source = new StringDocumentSource(content, "http://base.com");
        final ByteArrayOutputStream out = new ByteArrayOutputStream();
        final TripleHandler handler = new NTriplesWriter(out);
        runner.extract(source, handler);
        String n3 = out.toString("UTF-8");
        logger.debug(n3);
View Full Code Here

    @Test
    public void testAbstractMethodErrorIssue186_2() throws IOException, ExtractionException{
        final Any23 runner = new Any23();
        final String content = FileUtils.readResourceContent("/html/rdfa/rdfa-issue186-2.xhtml");
        final DocumentSource source = new StringDocumentSource(content, "http://richard.cyganiak.de/");
        final ByteArrayOutputStream out = new ByteArrayOutputStream();
        final TripleHandler handler = new NTriplesWriter(out);
        runner.extract(source, handler);
        final String n3 = out.toString("UTF-8");
        logger.debug(n3);
View Full Code Here

        final ModifiableConfiguration modifiableConf = DefaultConfiguration.copy();
        modifiableConf.setProperty("any23.extraction.metadata.timesize", "off");
        final Any23 any23 = new Any23(modifiableConf);

        final String content = FileUtils.readResourceContent("/rdf/rdf-issue183.ttl");
        final DocumentSource source = new StringDocumentSource(content, "http://base.com");
        final ByteArrayOutputStream out = new ByteArrayOutputStream();
        final TripleHandler handler = new NTriplesWriter(out);
        any23.extract(source, handler);
        handler.close();
        final String n3 = out.toString("UTF-8");
View Full Code Here

        ps.println("</issueReport>");

    }

    private void printReport(String msg, Throwable e, ExtractionReport er, PrintStream ps) {
        XMLValidationReportSerializer reportSerializer = new XMLValidationReportSerializer();
        ps.println("<report>");

        // Human readable error message.
        if(msg != null) {
            ps.printf("<message>%s</message>\n", msg);
        } else {
            ps.print("<message/>\n");
        }

        // Error stack trace.
        if(e != null) {
            ps.println("<error>");
            ps.println("<![CDATA[");
            e.printStackTrace(ps);
            ps.println("]]>");
            ps.println("</error>");
        } else {
            ps.println("<error/>");
        }

        // Issue Report.
        printIssueReport(er, ps);

        // Validation report.
        try {
            reportSerializer.serialize(er.getValidationReport(), ps);
        } catch (SerializationException se) {
            ps.println("An error occurred while serializing error.");
            se.printStackTrace(ps);
        }
        ps.println("</report>");
View Full Code Here

        return new CSVExtractorFactory();
    }

    @Test
    public void testExtractionCommaSeparated() throws RepositoryException {
        CSV csv = CSV.getInstance();
        assertExtract("/org/apache/any23/extractor/csv/test-comma.csv");
        logger.debug(dumpModelToRDFXML());

        assertModelNotEmpty();
        assertStatementsSize(null, null, null, 28);
View Full Code Here

        assertContains(null, csv.numberOfRows, new LiteralImpl("3", XMLSchema.INTEGER));
    }

    @Test
    public void testExtractionSemicolonSeparated() throws RepositoryException {
        CSV csv = CSV.getInstance();
        assertExtract("/org/apache/any23/extractor/csv/test-semicolon.csv");
        logger.debug(dumpModelToRDFXML());

        assertModelNotEmpty();
        assertStatementsSize(null, null, null, 28);
View Full Code Here

        assertContains(null, csv.numberOfRows, new LiteralImpl("3", XMLSchema.INTEGER));
    }

    @Test
    public void testExtractionTabSeparated() throws RepositoryException {
        CSV csv = CSV.getInstance();
        assertExtract("/org/apache/any23/extractor/csv/test-tab.csv");
        logger.debug(dumpModelToRDFXML());

        assertModelNotEmpty();
        assertStatementsSize(null, null, null, 28);
View Full Code Here

        assertContains(null, csv.numberOfRows, new LiteralImpl("3", XMLSchema.INTEGER));
    }

    @Test
    public void testTypeManagement() throws RepositoryException {
        CSV csv = CSV.getInstance();
        assertExtract("/org/apache/any23/extractor/csv/test-type.csv");
        logger.debug(dumpModelToRDFXML());

        assertModelNotEmpty();
        assertStatementsSize(null, null, null, 21);
View Full Code Here

        assertContains(null, null, new LiteralImpl("10" , XMLSchema.INTEGER));
    }
   
    @Test
    public void testExtractionEmptyValue() throws RepositoryException {
        CSV csv = CSV.getInstance();
        assertExtract("/org/apache/any23/extractor/csv/test-missing.csv");
        logger.debug(dumpModelToRDFXML());

        assertModelNotEmpty();
        assertStatementsSize(null, null, null, 25);
View Full Code Here

TOP

Related Classes of org.apache.any23.extractor.html.HTMLDocument

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.