Package org.htmlparser.lexer

Examples of org.htmlparser.lexer.Lexer


    }

    public void testSelectors() throws Exception
    {
        String html = "<html><head><title>sample title</title></head><body inserterr=\"true\" yomama=\"false\"><h3 id=\"heading\">big </invalid>heading</h3><ul id=\"things\"><li><br word=\"broken\"/>&gt;moocow<li><applet/>doohickey<li class=\"last\"><b class=\"item\">final<br>item</b></ul></body></html>";
        Lexer l;
        Parser p;
        CssSelectorNodeFilter it;
        NodeIterator i;
        int count;

        l = new Lexer (html);
        p = new Parser (l);
        it = new CssSelectorNodeFilter ("li + li");
        count = 0;
        for (i = p.extractAllNodesThatMatch (it).elements (); i.hasMoreNodes ();)
        {
View Full Code Here


            + "during the regular season. <p>\n"
            + "\n"
            + "The Lighting advanced by beating the Philadelphia Flyers in the Eastern \n"
            + "Conference final. <p>\n"
            + "</body></html>\n";
        Lexer lexer;
        Parser parser;
        RegexFilter filter;
        NodeIterator iterator;
        int count;

        lexer = new Lexer (html);
        parser = new Parser (lexer);
        filter = new RegexFilter ("(19|20)\\d\\d([- \\\\/.](0[1-9]|1[012])[- \\\\/.](0[1-9]|[12][0-9]|3[01]))?");
        count = 0;
        for (iterator = parser.extractAllNodesThatMatch (filter).elements (); iterator.hasMoreNodes ();)
        {
View Full Code Here

        throws
            IOException,
            ClassNotFoundException,
            ParserException
    {
        Lexer lexer;
        byte[] data;

        lexer = new Lexer ();
        data = pickle (lexer);
        lexer = (Lexer)unpickle (data);
    }
View Full Code Here

            "//**Start Encode**#@~^ZwIAAA==@#@&0;  mDkW  P7nDb0zZKD.n1YAMGhk+Dvb`@#@&P,kW`UC7kLlDGDcl22gl:n~{'~Jtr1DGkW6YP&xDnD  +OPA62sKD+ME#@#@&P,~~k6PvxC\\rLmYGDcCwa.n.kkWU bx[+X66Pcr*cJ#,@*{~!*@#@&P,P~~,D+D;D  `YM;n#p@#@&P~P~n^/n@#@&~P,P~~M+Y;.  `Wl^d#I@#@&)@#@&6E  ^YbWUPT+O)awDK2DblYKCo`*  @#@&~~7l.PkOD8Px~rCl[~Dtr/,8+U,l  Pl1Y!CV,n4,/rO~Pm~wmo+,^G:alDk8Vn~SkOt,Ei@#@&~~7lD~dDD+P{~r4.Khk+DkPKOtD~Y4lU~ri@#@&~P7lD,dOD2P{PEHr^MWdW6OP&xOnMx+O~A62VK.D~lRZPJp@#@&~P7l.PkY.*,'PrAW!VN,4C\\P(+nx~sKl[+9 Jp@#@&~,k0~c7+.k6z;W.M+1YAMWSd+M`b#@#@&~~,PNK^Es+xD ADbY`dY.q,_~/D.&,_~dDDcbI@#@&~Psk+@#@&P,PP9W1;:xORSDrO`/D.F,_PkO. ,_,/ODf~3PdYM*#p@#@&N@#@&z&R @*@#@&qrIAAA==^#~@</SCRIPT>\n" +
            "</HEAD>\n" +
            "<BODY onload=\"getAppropriatePage()\">\n" +
            "</BODY>\n" +
            "</HTML>";
        Lexer lexer;
       
        lexer = new Lexer (cryptext);
        ScriptDecoder.LAST_STATE = ScriptDecoder.STATE_INITIAL; // read everything
        try
        {
            String result = ScriptDecoder.Decode (lexer.getPage (), lexer.getCursor ());
            assertStringEquals ("decoding failed", plaintext, result);
        }
        finally
        {
            ScriptDecoder.LAST_STATE = ScriptDecoder.STATE_DONE;
View Full Code Here

     * Test operation without tags.
     */
    public void testPureText () throws ParserException
    {
        String reference;
        Lexer lexer;
        Text node;

        reference = "Hello world";
        lexer = new Lexer (reference);
        node = (Text)lexer.nextNode ();
        assertEquals ("Text contents wrong", reference, node.getText ());
    }
View Full Code Here

     * Test operation with Unix line endings.
     */
    public void testUnixEOL () throws ParserException
    {
        String reference;
        Lexer lexer;
        Text node;

        reference = "Hello\nworld";
        lexer = new Lexer (reference);
        node = (Text)lexer.nextNode ();
        assertEquals ("Text contents wrong", reference, node.getText ());
    }
View Full Code Here

     * Test operation with Dos line endings.
     */
    public void testDosEOL () throws ParserException
    {
        String reference;
        Lexer lexer;
        Text node;

        reference = "Hello\r\nworld";
        lexer = new Lexer (reference);
        node = (Text)lexer.nextNode ();
        assertEquals ("Text contents wrong", reference, node.getText ());
        reference = "Hello\rworld";
        lexer = new Lexer (reference);
        node = (Text)lexer.nextNode ();
        assertEquals ("Text contents wrong", reference, node.getText ());
    }
View Full Code Here

     * Test operation with line endings near the end of input.
     */
    public void testEOF_EOL () throws ParserException
    {
        String reference;
        Lexer lexer;
        Text node;

        reference = "Hello world\n";
        lexer = new Lexer (reference);
        node = (Text)lexer.nextNode ();
        assertEquals ("Text contents wrong", reference, node.getText ());
        reference = "Hello world\r";
        lexer = new Lexer (reference);
        node = (Text)lexer.nextNode ();
        assertEquals ("Text contents wrong", reference, node.getText ());
        reference = "Hello world\r\n";
        lexer = new Lexer (reference);
        node = (Text)lexer.nextNode ();
        assertEquals ("Text contents wrong", reference, node.getText ());
    }
View Full Code Here

            "</head>",
            "<%=head%>",
            "<?php ?>",
            "<!--head-->",
        };
        Lexer lexer;
        Text node;

        for (int i = 0; i < references.length; i++)
        {
            for (int j = 0; j < suffixes.length; j++)
            {
                lexer = new Lexer (references[i] + suffixes[j]);
                node = (Text)lexer.nextNode ();
                assertEquals ("Text contents wrong", references[i], node.getText ());
            }
        }
    }
View Full Code Here

     */
    public void testPureTag () throws ParserException
    {
        String reference;
        String suffix;
        Lexer lexer;
        Node node;

        reference = "<head>";
        lexer = new Lexer (reference);
        node = lexer.nextNode ();
        assertEquals ("Tag contents wrong", reference, node.toHtml ());

        reference = "<head>";
        suffix = "<body>";
        lexer = new Lexer (reference + suffix);
        node = lexer.nextNode ();
        assertEquals ("Tag contents wrong", reference, node.toHtml ());
        node = lexer.nextNode ();
        assertEquals ("Tag contents wrong", suffix, node.toHtml ());
    }
View Full Code Here

TOP

Related Classes of org.htmlparser.lexer.Lexer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.