Package org.htmlparser

Examples of org.htmlparser.Parser


     * header by a server-side web application.
     * Nonetheless, it would be nice to handle this case.
     */
    public void testSingleQuotedCharset() throws ParserException
    {
        Parser parser;
        String url =
            "http://htmlparser.sourceforge.net/test/SinglequotedCharset.html";

        parser = new Parser(url);
        for (NodeIterator e = parser.elements(); e.hasMoreNodes();)
            e.nextNode();
        assertTrue("Wrong encoding", parser.getEncoding().equals("UTF-8"));
    }
View Full Code Here


     */
    public void testCommaListCharset() throws ParserException
    {
        URL url;
        URLConnection connection;
        Parser parser;
        String idiots = "http://users.aol.com/geinster/rej.htm";

        try
        {
            url = new URL(idiots);
            connection = url.openConnection();
            // this little subclass just gets around normal JDK 1.4 processing
            // that filters out bogus character sets
            parser = new Parser()
            {
                protected String getCharset(String content)
                {
                    int index;
                    String ret;

                    ret = DEFAULT_CHARSET;
                    if (null != content)
                    {
                        index = content.indexOf(CHARSET_STRING);

                        if (index != -1)
                        {
                            content =
                                content
                                    .substring(index + CHARSET_STRING.length())
                                    .trim();
                            if (content.startsWith("="))
                            {
                                content = content.substring(1).trim();
                                index = content.indexOf(";");
                                if (index != -1)
                                    content = content.substring(0, index);

                                //remove any double quotes from around charset string
                                if (content.startsWith("\"")
                                    && content.endsWith("\"")
                                    && (1 < content.length()))
                                    content =
                                        content.substring(
                                            1,
                                            content.length() - 1);

                                //remove any single quote from around charset string
                                if (content.startsWith("'")
                                    && content.endsWith("'")
                                    && (1 < content.length()))
                                    content =
                                        content.substring(
                                            1,
                                            content.length() - 1);

                                ret = content;
                                // short circuit findCharset() processing
                            }
                        }
                    }

                    return (ret);
                }
            };
            parser.setConnection(connection);
            // must be the default
            assertTrue(
                "Wrong encoding",
                parser.getEncoding().equals("ISO-8859-1"));
            for (NodeIterator e = parser.elements(); e.hasMoreNodes();)
                e.nextNode();
            assertTrue(
                "Wrong encoding",
                parser.getEncoding().equals("windows-1252"));
        }
        catch (Exception e)
        {
            fail(e.getMessage());
        }
View Full Code Here

        }
    }

    public void testNullUrl()
    {
        Parser parser;
        try
        {
            parser =
                new Parser("http://someoneexisting.com", Parser.noFeedback);
            assertTrue("Should have thrown an exception!", false);
        }
        catch (ParserException e)
        {
View Full Code Here

        }
    }

    public void testURLWithSpaces() throws ParserException
    {
        Parser parser;
        String url =
            "http://htmlparser.sourceforge.net/test/This is a Test Page.html";

        parser = new Parser(url);
        Node node[] = new Node[30];
        int i = 0;
        for (NodeIterator e = parser.elements(); e.hasMoreNodes();)
        {
            node[i] = e.nextNode();
            i++;

        }
View Full Code Here

{
    FormTag formTag;
    Vector formChildren;
    public void setUp() throws Exception
    {
        Parser parser = Parser.createParser(FormScannerTest.FORM_HTML);
        parser.registerScanners();
        NodeIterator e = parser.elements();
        Node node = e.nextNode();
        formTag = (FormTag) node;
        formChildren = new Vector();
        for (SimpleNodeIterator se = formTag.children(); se.hasMoreNodes();)
        {
View Full Code Here

     * with the standard scanners registered.
     */
    public Generate() throws ParserException
    {
        parser =
            new Parser("http://www.w3.org/TR/REC-html40/sgml/entities.html");
        parser.registerScanners();
    }
View Full Code Here

    protected void createParser(String inputHTML)
    {
        String testHTML = new String(inputHTML);
        StringReader sr = new StringReader(testHTML);
        reader = new NodeReader(new BufferedReader(sr), 5000);
        parser = new Parser(reader, new DefaultParserFeedback());
        node = new Node[40];
    }
View Full Code Here

    protected void createParser(String inputHTML, int numNodes)
    {
        String testHTML = new String(inputHTML);
        StringReader sr = new StringReader(testHTML);
        reader = new NodeReader(new BufferedReader(sr), 5000);
        parser = new Parser(reader, new DefaultParserFeedback());
        node = new Node[numNodes];
    }
View Full Code Here

    protected void createParser(String inputHTML, String url)
    {
        String testHTML = new String(inputHTML);
        StringReader sr = new StringReader(testHTML);
        reader = new NodeReader(new BufferedReader(sr), url);
        parser = new Parser(reader, new DefaultParserFeedback());
        node = new Node[40];
    }
View Full Code Here

    protected void createParser(String inputHTML, String url, int numNodes)
    {
        String testHTML = new String(inputHTML);
        StringReader sr = new StringReader(testHTML);
        reader = new NodeReader(new BufferedReader(sr), url);
        parser = new Parser(reader, new DefaultParserFeedback());
        node = new Node[numNodes];
    }
View Full Code Here

TOP

Related Classes of org.htmlparser.Parser

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.