Examples of org.htmlparser.lexer.Lexer

org.htmlparser.lexer.Lexer
This class parses the HTML stream into nodes. There are three major types of nodes (lexemes):
- Remark
- Text
- Tag
Each time nextNode() is called, another node is returned until the stream is exhausted, and null is returned.

     * Test operation with attributed tags.
     */
    public void testAttributedTag () throws ParserException
    {
        String reference;
        Lexer lexer;
        Node node;


        reference = "<head lang='en_US' dir=ltr\nprofile=\"http://htmlparser.sourceforge.org/dictionary.html\">";
        lexer = new Lexer (reference);
        node = lexer.nextNode ();
        assertEquals ("Tag contents wrong", reference, node.toHtml ());
    }

View Full Code Here

     * Test operation with comments.
     */
    public void testRemark () throws ParserException
    {
        String reference;
        Lexer lexer;
        Remark node;
        String suffix;


        reference = "<!-- This is a comment -->";
        lexer = new Lexer (reference);
        node = (Remark)lexer.nextNode ();
        assertEquals ("Tag contents wrong", reference, node.toHtml ());


        reference = "<!-- This is a comment --  >";
        lexer = new Lexer (reference);
        node = (Remark)lexer.nextNode ();
        assertEquals ("Tag contents wrong", reference, node.toHtml ());


        reference = "<!-- This is a\nmultiline comment -->";
        lexer = new Lexer (reference);
        node = (Remark)lexer.nextNode ();
        assertEquals ("Tag contents wrong", reference, node.toHtml ());


        suffix = "<head>";
        reference = "<!-- This is a comment -->";
        lexer = new Lexer (reference + suffix);
        node = (Remark)lexer.nextNode ();
        assertEquals ("Tag contents wrong", reference, node.toHtml ());


        reference = "<!-- This is a comment --  >";
        lexer = new Lexer (reference + suffix);
        node = (Remark)lexer.nextNode ();
        assertEquals ("Tag contents wrong", reference, node.toHtml ());


        reference = "<!-- This is a\nmultiline comment -->";
        lexer = new Lexer (reference + suffix);
        node = (Remark)lexer.nextNode ();
        assertEquals ("Tag contents wrong", reference, node.toHtml ());
    }

View Full Code Here

    /**
     * Test the fidelity of the toHtml() method.
     */
    public void testFidelity () throws ParserException, IOException
    {
        Lexer lexer;
        Node node;
        int position;
        StringBuffer buffer;
        String string;
        char[] ref;
        char[] test;


        URL url = new URL ("http://sourceforge.net");
        lexer = new Lexer (url.openConnection ());
        position = 0;
        buffer = new StringBuffer (80000);
        while (null != (node = lexer.nextNode ()))
        {
            string = node.toHtml ();
            if (position != node.getStartPosition ())
                fail ("non-contiguous" + string);
            buffer.append (string);
            position = node.getEndPosition ();
            if (buffer.length () != position)
                fail ("text length differed after encountering node " + string);
        }
        ref = lexer.getPage ().getText ().toCharArray ();
        test = new char[buffer.length ()];
        buffer.getChars (0, buffer.length (), test, 0);
        assertEquals ("different amounts of text", ref.length, test.length);
        for (int i = 0; i < ref.length; i++)
            if (ref[i] != test[i])

View Full Code Here

     * See bug #880283 Character "&gt;" erroneously inserted by Lexer
     */
    public void testJsp () throws ParserException
    {
        String html;
        Lexer lexer;
        Node node;
        
        html = "<% out.urlEncode('abc') + \"<br>\" + out.urlEncode('xyz') %>";
        lexer = new Lexer (html);
        node = lexer.nextNode ();
        if (node == null)
            fail ("too few nodes");
        else
            assertStringEquals ("bad html", html, node.toHtml());
        assertNull ("too many nodes", lexer.nextNode ());
    }

View Full Code Here

     * Unit test for new PI parsing code.
     */
    public void testPI() throws ParserException
    {
        String html;
        Lexer lexer;
        Node node;


        html = "<?php print(\"<p>Hello World!</p>\"); ?>";
        lexer = new Lexer(html);
        node = lexer.nextNode();
        if (node == null)
            fail ("too few nodes");
        else
            assertStringEquals("bad html", html, node.toHtml());
        assertNull("too many nodes", lexer.nextNode());
    }

View Full Code Here

     */
    public void testEscapedQuote () throws ParserException
    {
        String string;
        String html;
        Lexer lexer;
        Node node;
        
        string = "\na='\\'';\n";
        html = string + "</script>";
        lexer = new Lexer (html);
        node = lexer.nextNode (true);
        if (node == null)
            fail ("too few nodes");
        else
            assertStringEquals ("bad string", string, node.toHtml());
        assertNotNull ("too few nodes", lexer.nextNode (true));
        assertNull ("too many nodes", lexer.nextNode (true));
    }

View Full Code Here

    return getMatchingTags(filter).size();
  }


  private NodeList getMatchingTags(NodeFilter filter) throws Exception {
    String html = examiner.html();
    Parser parser = new Parser(new Lexer(new Page(html)));
    NodeList list = parser.parse(null);
    NodeList matches = list.extractAllNodesThatMatch(filter, true);
    return matches;
  }

View Full Code Here

    return createSlimTables(tableScanner);
  }


  private NodeList makeNodeList(TestPage pageToTest) {
    String html = pageToTest.getHtml();
    Parser parser = new Parser(new Lexer(new Page(html)));
    try {
      return parser.parse(null);
    } catch (ParserException e) {
      throw new SlimError(e);
    }

View Full Code Here

    if (page == null || page.equals(""))
      page = "<i>This page intentionally left blank.</i>";


    NodeList htmlTree;
    try {
      Parser parser = new Parser(new Lexer(new Page(page)));
      htmlTree = parser.parse(null);
    } catch (ParserException e) {
      throw new SlimError(e);
    }
    scanForTables(htmlTree);

View Full Code Here

public class HtmlParserToolsTest {


  @Test
  public void shoudlMakeExactCopy() throws ParserException, CloneNotSupportedException {
    String html = "<div class='foo'>funky <em>content</em></div>";
    Parser parser = new Parser(new Lexer(new Page(html)));
    NodeList tree = parser.parse(null);


    NodeList cloneTree = deepClone(tree);


    assertEquals(html, cloneTree.toHtml());

View Full Code Here

0 1 2 3 4 5 6

TOP

Related Classes of org.htmlparser.lexer.Lexer

com.apress.progwt.server.lucene.HTMLAnalyzer

com.apress.progwt.server.lucene.HTMLConverter

com.brewtab.ircbot.applets.TextsFromLastNightApplet

com.google.gdt.eclipse.designer.util.Utils

com.jeecms.cms.manager.assist.impl.CmsKeywordMngImpl

com.jeecms.common.util.StrUtils

fitnesse.fixtures.PageDriver

fitnesse.testsystems.slim.HtmlSlimTestSystem

fitnesse.testsystems.slim.HtmlTableScanner

fitnesse.util.HtmlParserToolsTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.