Package org.htmlparser.visitors

Examples of org.htmlparser.visitors.TextExtractingVisitor


        super(name);
    }

    public void testSimpleVisit() throws Exception {
        createParser("<HTML><HEAD><TITLE>Hello World</TITLE></HEAD></HTML>");
        TextExtractingVisitor visitor = new TextExtractingVisitor();
        parser.visitAllNodesWith(visitor);
        assertStringEquals(
            "extracted text",
            "Hello World",
            visitor.getExtractedText()
        );
    }
View Full Code Here


        );
    }

    public void testSimpleVisitWithRegisteredScanners() throws Exception {
        createParser("<HTML><HEAD><TITLE>Hello World</TITLE></HEAD></HTML>");
        TextExtractingVisitor visitor = new TextExtractingVisitor();
        parser.visitAllNodesWith(visitor);
        assertStringEquals(
            "extracted text",
            "Hello World",
            visitor.getExtractedText()
        );
    }
View Full Code Here

        );
    }

    public void testVisitHtmlWithSpecialChars() throws Exception {
        createParser("<BODY>Hello World&nbsp;&nbsp;</BODY>");
        TextExtractingVisitor visitor = new TextExtractingVisitor();
        parser.visitAllNodesWith(visitor);
        assertStringEquals(
            "extracted text",
            "Hello World  ",
            visitor.getExtractedText()
        );
    }
View Full Code Here

    public void testVisitHtmlWithPreTags() throws Exception {
        createParser(
            "Some text with &nbsp;<pre>this &nbsp; should be preserved</pre>"
        );
        TextExtractingVisitor visitor = new TextExtractingVisitor();
        parser.visitAllNodesWith(visitor);
        assertStringEquals(
            "extracted text",
            "Some text with  this &nbsp; should be preserved",
            visitor.getExtractedText()
        );
    }
View Full Code Here

TOP

Related Classes of org.htmlparser.visitors.TextExtractingVisitor

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.