Package org.htmlparser.util

Examples of org.htmlparser.util.NodeIterator


        assertStringEquals("Raw String", html, bodyTag.toHtml());
    }

    public void testAttributes ()
    {
        NodeIterator iterator;
        Node node;

        try
        {
            createParser("<body style=\"margin-top:4px; margin-left:20px;\" title=\"body\">");
            parser.setNodeFactory (new PrototypicalNodeFactory (new BodyTag ()));
            iterator = parser.elements ();
            node = null;
            while (iterator.hasMoreNodes ())
            {
                node = iterator.nextNode ();
                if (node instanceof BodyTag)
                {
                    assertNotNull ("no style attribute", ((BodyTag)node).getAttribute ("STYLE"));
                    assertNotNull ("no title attribute", ((BodyTag)node).getAttribute ("TITLE"));
                }
                else
                    fail ("not a body tag");
                assertTrue ("more than one node", !iterator.hasMoreNodes ());
            }
            assertNotNull ("no elements", node);
        }
        catch (ParserException pe)
        {
View Full Code Here


    {
        String html = "<html><head><title>sample title</title></head><body inserterr=\"true\" yomama=\"false\"><h3 id=\"heading\">big </invalid>heading</h3><ul id=\"things\"><li><br word=\"broken\"/>&gt;moocow<li><applet/>doohickey<li class=\"last\"><b class=\"item\">final<br>item</b></ul></body></html>";
        Lexer l;
        Parser p;
        CssSelectorNodeFilter it;
        NodeIterator i;
        int count;

        l = new Lexer (html);
        p = new Parser (l);
        it = new CssSelectorNodeFilter ("li + li");
        count = 0;
        for (i = p.extractAllNodesThatMatch (it).elements (); i.hasMoreNodes ();)
        {
            assertEquals ("tag name wrong", "LI", ((Tag)i.nextNode()).getTagName());
            count++;
        }
        assertEquals ("wrong count", 2, count);
    }
View Full Code Here

            + "Conference final. <p>\n"
            + "</body></html>\n";
        Lexer lexer;
        Parser parser;
        RegexFilter filter;
        NodeIterator iterator;
        int count;

        lexer = new Lexer (html);
        parser = new Parser (lexer);
        filter = new RegexFilter ("(19|20)\\d\\d([- \\\\/.](0[1-9]|1[012])[- \\\\/.](0[1-9]|[12][0-9]|3[01]))?");
        count = 0;
        for (iterator = parser.extractAllNodesThatMatch (filter).elements (); iterator.hasMoreNodes ();)
        {
            assertEquals ("text wrong", target, iterator.nextNode ().toHtml ());
            count++;
        }
        assertEquals ("wrong count", 1, count);
    }
View Full Code Here

            ClassNotFoundException,
            ParserException
    {
        Parser parser;
        Vector vector;
        NodeIterator enumeration;
        byte[] data;

        parser = new Parser ("http://htmlparser.sourceforge.net/test/example.html");
        enumeration = parser.elements ();
        vector = new Vector (50);
        while (enumeration.hasMoreNodes ())
            vector.addElement (enumeration.nextNode ());

        data = pickle (parser);
        parser = (Parser)unpickle (data);

        enumeration = parser.elements ();
        while (enumeration.hasMoreNodes ())
            assertEquals (
                "Nodes before and after serialization differ",
                ((Node)vector.remove (0)).toHtml (),
                enumeration.nextNode ().toHtml ());
    }
View Full Code Here

            ClassNotFoundException,
            ParserException
    {
        Parser parser;
        Vector vector;
        NodeIterator enumeration;
        byte[] data;

        parser = new Parser ("http://htmlparser.sourceforge.net/test/example.html");
        enumeration = parser.elements ();
        vector = new Vector (50);
        while (enumeration.hasMoreNodes ())
            vector.addElement (enumeration.nextNode ());

        data = pickle (parser);
        parser = (Parser)unpickle (data);

        enumeration = parser.elements ();
        while (enumeration.hasMoreNodes ())
            assertEquals (
                "Nodes before and after serialization differ",
                ((Node)vector.remove (0)).toHtml (),
                enumeration.nextNode ().toHtml ());
    }
View Full Code Here

    }

    public void getParameterTableFor(String tagContents, boolean dump)
    {
        String html;
        NodeIterator iterator;
        Node node;

        html = "<" + tagContents + ">";
        createParser (html);
        parser.setNodeFactory (new PrototypicalNodeFactory (true));
        try
        {
            iterator = parser.elements ();
            node = iterator.nextNode ();
            if (node instanceof Tag)
            {
                tag = (Tag)node;
                attributes = tag.getAttributesEx ();
                if (dump)
                {
                    for (int i = 0; i < attributes.size (); i++)
                    {
                        System.out.print ("Attribute #" + i);
                        Attribute attribute = (Attribute)attributes.elementAt (i);
                        if (null != attribute.getName ())
                            System.out.print (" Name: '" + attribute.getName () + "'");
                        if (null != attribute.getAssignment ())
                            System.out.print (" Assignment: '" + attribute.getAssignment () + "'");
                        if (0 != attribute.getQuote ())
                            System.out.print (" Quote: " + attribute.getQuote ());
                        if (null != attribute.getValue ())
                            System.out.print (" Value: '" + attribute.getValue () + "'");
                        System.out.println ();
                    }
                    System.out.println ();
                }
            }
            else
                attributes = null;
            String string = node.toHtml ();
            assertEquals ("toHtml differs", html, string);
            assertTrue ("shouldn't be any more nodes", !iterator.hasMoreNodes ());
        }
        catch (ParserException pe)
        {
            fail (pe.getMessage ());
        }
View Full Code Here

        final String DOCTYPE = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">";
        final String HTML = DOCTYPE + "\n<HTML>\n  <HEAD>\n    <TITLE>HTMLParserDocTypeBugTest</TITLE>\n  </HEAD>\n  <BODY>\n    HTMLParser DOCTYPE node bug test.\n  </BODY>\n</HTML>";

        createParser(HTML);

        NodeIterator e = parser.elements();
        Node node = e.nextNode();

        // First node is doctype
        assertStringEquals("Doctype element output is incorrect.", DOCTYPE, node.toHtml());
    }
View Full Code Here

     */
    public void testJIS ()
        throws ParserException
    {
        Parser parser;
        NodeIterator iterator;
       
        parser = new Parser ("http://www.009.com/");
        try
        {
            iterator = parser.elements ();
            while (iterator.hasMoreNodes ())
                checkTagNames (iterator.nextNode ());
        }
        catch (EncodingChangeException ece)
        {
            parser.reset ();
            iterator = parser.elements ();
            while (iterator.hasMoreNodes ())
                checkTagNames (iterator.nextNode ());
        }
    }
View Full Code Here

    public void testConjoined ()
        throws
            ParserException
    {
        StringBuffer buffer;
        NodeIterator iterator;
        Node node;
        String expected;

        expected = "The Title\nThis is the body.";
        String html1 = "<html><title>The Title\n</title>" +
            "<body>This is <a href=\"foo.html\">the body</a>.</body></html>";
        createParser (html1);
        buffer = new StringBuffer ();
        for (iterator = parser.elements (); iterator.hasMoreNodes (); )
        {
            node = iterator.nextNode ();
            String text = node.toPlainTextString ();
            buffer.append (text);
        }
        assertStringEquals ("conjoined text", expected, buffer.toString ());

        String html2 = "<html><title>The Title</title>\n" +
            "<body>This is <a href=\"foo.html\">the body</a>.</body></html>";
        createParser (html2);
        buffer = new StringBuffer ();
        for (iterator = parser.elements (); iterator.hasMoreNodes (); )
        {
            node = iterator.nextNode ();
            String text = node.toPlainTextString ();
            buffer.append (text);
        }
        assertStringEquals ("conjoined text", expected, buffer.toString ());
       
        String html3 = "<html><title>The Title</title>" +
            "<body>\nThis is <a href=\"foo.html\">the body</a>.</body></html>";
        createParser (html3);
        buffer = new StringBuffer ();
        for (iterator = parser.elements (); iterator.hasMoreNodes (); )
        {
            node = iterator.nextNode ();
            String text = node.toPlainTextString ();
            buffer.append (text);
        }
        assertStringEquals ("conjoined text", expected, buffer.toString ());
    }
View Full Code Here

     */
    public void testStackOverflow ()
        throws
            ParserException
    {
        NodeIterator iterator;
        Node node;
        String html;
                                                                                                                                                       
        html = "<a href = \"http://test.com\" />";
        createParser (html);
        for (iterator = parser.elements (); iterator.hasMoreNodes (); )
        {
            node = iterator.nextNode ();
            String text = node.toHtml ();
            assertStringEquals ("no overflow", html, text);
        }
        html = "<a href=\"http://test.com\"/>";
        createParser (html);
        for (iterator = parser.elements (); iterator.hasMoreNodes (); )
        {
            node = iterator.nextNode ();
            String text = node.toHtml ();
            assertStringEquals ("no overflow", html, text);
        }
        html = "<a href = \"http://test.com\"/>";
        createParser (html);
        for (iterator = parser.elements (); iterator.hasMoreNodes (); )
        {
            node = iterator.nextNode ();
            String text = node.toHtml ();
            assertStringEquals ("no overflow", html, text);
        }
    }
View Full Code Here

TOP

Related Classes of org.htmlparser.util.NodeIterator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.