Package org.htmlparser.tags

Examples of org.htmlparser.tags.MetaTag


   {
    String metadata = ""; String author = "";
    for (int i = 0; i < nodelist2.size(); i++)
    { if (nodelist2.elementAt(i) == null) continue;
    String meta = nodelist2.elementAt(i).getText(); meta = StringTools.filterChars(meta);
    MetaTag mtag = new MetaTag(); mtag.setText("<" + meta + ">");
    String tagName = mtag.getMetaTagName();
    if (tagName == null) continue;
    if (tagName.equalsIgnoreCase("keywords"))
    { metadata += mtag.getMetaContent(); }
    if (tagName.equalsIgnoreCase("authors") ||
      tagName.equalsIgnoreCase("author") )
    { author += mtag.getMetaContent(); }
    } // end of for
    doc.setAuthor(author); doc.setMetadata(metadata);
   } // eod of if

   //*-- Populate the contents of the contents with the entire text from the  web page
View Full Code Here


        parser.registerScanners();

        parseAndAssertNodeCount(9);
        assertTrue("Node 5 should be META Tag", node[4] instanceof MetaTag);
        MetaTag metaTag;
        metaTag = (MetaTag) node[4];
        assertStringEquals(
            "Meta Tag 4 Name",
            "description",
            metaTag.getMetaTagName());
        assertStringEquals(
            "Meta Tag 4 Contents",
            "Protecting the internet community through technology, not legislation.  SpamCop eliminates spam.  Automatically file spam reports with the network administrators who can stop spam at the source.  Subscribe, and filter your email through powerful statistical analysis before it reaches your inbox.",
            metaTag.getMetaContent());
        assertStringEquals(
            "toHTML()",
            "<META CONTENT=\"Protecting the internet community through technology, not legislation.  SpamCop eliminates spam.  Automatically file spam reports with the network administrators who can stop spam at the source.  Subscribe, and filter your email through powerful statistical analysis before it reaches your inbox.\" NAME=\"description\">",
            metaTag.toHtml());
    }
View Full Code Here

        parser.addScanner(scanner);

        parseAndAssertNodeCount(11);
        assertTrue("Node 5 should be End Tag", node[5] instanceof EndTag);
        assertTrue("Node 6 should be META Tag", node[6] instanceof MetaTag);
        MetaTag metaTag;
        metaTag = (MetaTag) node[6];
        assertEquals(
            "Meta Tag 6 Name",
            "description",
            metaTag.getMetaTagName());
        assertEquals(
            "Meta Tag 6 Contents",
            "Protecting the internet community through technology, not legislation.  SpamCop eliminates spam.  Automatically file spam reports with the network administrators who can stop spam at the source.  Subscribe, and filter your email through powerful statistical analysis before it reaches your inbox.",
            metaTag.getMetaContent());

        assertTrue("Node 7 should be META Tag", node[7] instanceof MetaTag);
        assertTrue("Node 8 should be META Tag", node[8] instanceof MetaTag);
        assertTrue("Node 9 should be META Tag", node[9] instanceof MetaTag);

        metaTag = (MetaTag) node[7];
        assertEquals("Meta Tag 7 Name", "keywords", metaTag.getMetaTagName());
        assertEquals(
            "Meta Tag 7 Contents",
            "SpamCop spam cop email filter abuse header headers parse parser utility script net net-abuse filter mail program system trace traceroute dns",
            metaTag.getMetaContent());
        assertNull("Meta Tag 7 Http-Equiv", metaTag.getHttpEquiv());

        metaTag = (MetaTag) node[8];
        assertEquals("Meta Tag 8 Name", "language", metaTag.getMetaTagName());
        assertEquals("Meta Tag 8 Contents", "en", metaTag.getMetaContent());
        assertNull("Meta Tag 8 Http-Equiv", metaTag.getHttpEquiv());

        metaTag = (MetaTag) node[9];
        assertEquals("Meta Tag 9 Name", "owner", metaTag.getMetaTagName());
        assertEquals(
            "Meta Tag 9 Contents",
            "service@admin.spamcop.net",
            metaTag.getMetaContent());
        assertNull("Meta Tag 9 Http-Equiv", metaTag.getHttpEquiv());

        metaTag = (MetaTag) node[10];
        assertNull("Meta Tag 10 Name", metaTag.getMetaTagName());
        assertEquals(
            "Meta Tag 10 Contents",
            "text/html; charset=ISO-8859-1",
            metaTag.getMetaContent());
        assertEquals(
            "Meta Tag 10 Http-Equiv",
            "content-type",
            metaTag.getHttpEquiv());

        assertEquals("This Scanner", scanner, metaTag.getThisScanner());
    }
View Full Code Here

            "http://www.google.com/test/index.html");
        MetaTagScanner scanner = new MetaTagScanner("-t");
        parser.addScanner(scanner);
        parseAndAssertNodeCount(1);
        assertTrue("Node should be meta tag", node[0] instanceof MetaTag);
        MetaTag metaTag = (MetaTag) node[0];
        assertEquals("Meta Tag Name", "Description", metaTag.getMetaTagName());
        assertEquals(
            "Content",
            "Ethnoburb </I>versus Chinatown: Two Types of Urban Ethnic Communities in Los Angeles",
            metaTag.getMetaContent());
    }
View Full Code Here

                + "</head>"
                + "</html>");
        parser.registerScanners();
        parseAndAssertNodeCount(5);
        assertType("Meta Tag expected", MetaTag.class, node[2]);
        MetaTag metaTag = (MetaTag) node[2];

        assertStringEquals(
            "http-equiv",
            "content-type",
            metaTag.getHttpEquiv());
        assertStringEquals(
            "content",
            "text/html; charset=windows-1252",
            metaTag.getMetaContent());
    }
View Full Code Here

                + "</body>"
                + "</html>");
        parser.registerScanners();
        parseAndAssertNodeCount(11);
        assertType("meta tag", MetaTag.class, node[3]);
        MetaTag metaTag = (MetaTag) node[3];
        assertStringEquals("meta content", "a<b", metaTag.getMetaContent());
    }
View Full Code Here

        Hashtable table = tag.getAttributes();
        String metaTagName = (String) table.get("NAME");
        String metaTagContents = (String) table.get("CONTENT");
        String httpEquiv = (String) table.get("HTTP-EQUIV");

        return new MetaTag(tagData, httpEquiv, metaTagName, metaTagContents);
    }
View Full Code Here

                + "</body>"
                + "</html>");
        parser.registerScanners();
        parseAndAssertNodeCount(10);
        assertType("fourth node", MetaTag.class, node[4]);
        MetaTag metaTag = (MetaTag) node[4];

        assertStringEquals(
            "content",
            "text/html; charset=iso-8859-1",
            metaTag.getAttribute("CONTENT"));
    }
View Full Code Here

     */
    public NodeIterator elements() throws ParserException
    {
        boolean remove_scanner;
        Node node;
        MetaTag meta;
        String httpEquiv;
        String charset;
        boolean restart;
        EndTag end;
        IteratorImpl ret;
View Full Code Here

        boolean remove_scanner,
        IteratorImpl ret)
        throws ParserException
    {
        Node node;
        MetaTag meta;
        String httpEquiv;
        String charset;
        EndTag end;
        if (null != url_conn)
            try
            {
                if (null == scanners.get("-m"))
                {
                    addScanner(new MetaTagScanner("-m"));
                    remove_scanner = true;
                }

                /* pre-read up to </HEAD> looking for charset directive */
                while (null != (node = ret.peek()))
                {
                    if (node instanceof MetaTag)
                    { // check for charset on Content-Type
                        meta = (MetaTag) node;
                        httpEquiv = meta.getAttribute("HTTP-EQUIV");
                        if ("Content-Type".equalsIgnoreCase(httpEquiv))
                        {
                            charset = getCharset(meta.getAttribute("CONTENT"));
                            if (!charset.equalsIgnoreCase(character_set))
                            { // oops, different character set, restart
                                character_set = charset;
                                recreateReader();
                                ret =
View Full Code Here

TOP

Related Classes of org.htmlparser.tags.MetaTag

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.