Package org.htmlparser.tags

Examples of org.htmlparser.tags.MetaTag


                 i.hasMoreNodes();)
            {
                Node node = i.nextNode();
                if (node instanceof MetaTag)
                {
                    MetaTag meta = (MetaTag) node;
                    if ( meta.getHttpEquiv() != null &&
                            meta.getHttpEquiv().equalsIgnoreCase(YADIS_XRDS_LOCATION) )
                    {
                        if ( xrdsLocation != null )
                            throw new YadisException(
                                "More than one " + YADIS_XRDS_LOCATION +
                                "META tags found in HEAD: " + head.toHtml(),
                                YadisResult.HTMLMETA_INVALID_RESPONSE);

                        xrdsLocation = meta.getMetaContent();
                        if (DEBUG)
                            _log.debug("Found " + YADIS_XRDS_LOCATION + "META tags.");
                    }
                }
            }
View Full Code Here


              _keywords.add(kw);
            }
          }
          */
        } else if ( n.getClass() == MetaTag.class ) {
          MetaTag t = (MetaTag)n;
          if (t.getAttribute("name") != null && t.getAttribute("name").equals("keywords") ) {
            String[] kws = t.getAttribute("content").split(",");
            for( String kw : kws ) {
              _keywords.add(kw);
            }
          }
        }
View Full Code Here

        String url = "http://htmlparser.sourceforge.net/test/gb2312Charset.html";
        int i;
        Node[] nodes;

        parser = new Parser(url);
        parser.setNodeFactory (new PrototypicalNodeFactory (new MetaTag ()));
        i = 0;
        nodes = new Node[30];
        for (NodeIterator e = parser.elements(); e.hasMoreNodes();)
            nodes[i++] = e.nextNode();
        assertEquals ("Expected nodes", 23, i);
View Full Code Here

    {
        String url;
        int bookmark;
        NodeList list;
        NodeList robots;
        MetaTag robot;
        String content;
        File file;
        File dir;
        PrintWriter out;

        // get the next URL and add it to the done pile
        url = (String)mPages.remove (0);
        System.out.println ("processing " + url);
        mFinished.add (url);

        try
        {
            bookmark = mPages.size ();
            // fetch the page and gather the list of nodes
            mParser.setURL (url);
            try
            {
                list = new NodeList ();
                for (NodeIterator e = mParser.elements (); e.hasMoreNodes (); )
                    list.add (e.nextNode ()); // URL conversion occurs in the tags
            }
            catch (EncodingChangeException ece)
            {
                // fix bug #998195 SiteCatpurer just crashed
                // try again with the encoding now set correctly
                // hopefully mPages, mImages, mCopied and mFinished won't be corrupted
                mParser.reset ();
                list = new NodeList ();
                for (NodeIterator e = mParser.elements (); e.hasMoreNodes (); )
                    list.add (e.nextNode ());
            }

            // handle robots meta tag according to http://www.robotstxt.org/wc/meta-user.html
            // <meta name="robots" content="index,follow" />
            // <meta name="robots" content="noindex,nofollow" />
            robots = list.extractAllNodesThatMatch (
                new AndFilter (
                    new NodeClassFilter (MetaTag.class),
                    new HasAttributeFilter ("name", "robots")), true);
            if (0 != robots.size ())
            {
                robot = (MetaTag)robots.elementAt (0);
                content = robot.getAttribute ("content").toLowerCase ();
                if ((-1 != content.indexOf ("none")) || (-1 != content.indexOf ("nofollow")))
                    // reset mPages
                    for (int i = bookmark; i < mPages.size (); i++)
                        mPages.remove (i);
                if ((-1 != content.indexOf ("none")) || (-1 != content.indexOf ("noindex")))
View Full Code Here

        registerTag (new ImageTag ());
        registerTag (new InputTag ());
        registerTag (new JspTag ());
        registerTag (new LabelTag ());
        registerTag (new LinkTag ());
        registerTag (new MetaTag ());
        registerTag (new ObjectTag ());
        registerTag (new OptionTag ());
        registerTag (new ParagraphTag ());
        registerTag (new ProcessingInstructionTag ());
        registerTag (new ScriptTag ());
View Full Code Here

        assertTrue("The HTML tag should have 3 nodes", 3 == htmlTag.getChildCount ());
        assertTrue("The first child should be a HEAD tag",htmlTag.getChild(0) instanceof HeadTag);
        HeadTag headTag = (HeadTag)htmlTag.getChild(0);
        assertTrue("The HEAD tag should have 2 nodes", 2 == headTag.getChildCount ());
        assertTrue("The second child should be a META tag",headTag.getChild(1) instanceof MetaTag);
        MetaTag metaTag = (MetaTag)headTag.getChild(1);

        assertStringEquals(
            "content",
            "text/html; charset=iso-8859-1",
            metaTag.getAttribute("CONTENT")
        );
    }
View Full Code Here

            new PrototypicalNodeFactory (
                new Tag[]
                {
                    new TitleTag (),
                    new StyleTag (),
                    new MetaTag (),
                }));
        parseAndAssertNodeCount(7);
        assertTrue(node[2] instanceof TitleTag);
        // check the title node
        TitleTag titleTag = (TitleTag) node[2];
View Full Code Here

            new PrototypicalNodeFactory (
                new Tag[]
                {
                    new TitleTag (),
                    new BaseHrefTag (),
                    new MetaTag (),
                    new StyleTag (),
                }));
        parseAndAssertNodeCount(7);
        assertTrue(node[2] instanceof TitleTag);
        TitleTag titleTag = (TitleTag) node[2];
View Full Code Here

        assertTrue("HTML node should have two children",2 == html.getChildCount ());
        assertTrue("Second node should be an HEAD node",html.getChild(1) instanceof HeadTag);
        HeadTag head = (HeadTag)html.getChild(1);
        assertTrue("HEAD node should have eleven children",11 == head.getChildCount ());
        assertTrue("Third child should be a title tag",head.getChild(2) instanceof MetaTag);
        MetaTag metaTag = (MetaTag)head.getChild(2);
        assertStringEquals("Meta Tag Name",description,metaTag.getMetaTagName());
        assertStringEquals("Meta Tag Contents",content,metaTag.getMetaContent());
        assertStringEquals("toHTML()",tag,metaTag.toHtml());
    }
View Full Code Here

        tag + "\n"+
        "<META name=\"keywords\" content=\"SpamCop spam cop email filter abuse header headers parse parser utility script net net-abuse filter mail program system trace traceroute dns\">\n"+
        "<META name=\"language\" content=\"en\">\n"+
        "<META name=\"owner\" content=\"service@admin.spamcop.net\">\n"+
        "<META HTTP-EQUIV=\"content-type\" CONTENT=\"text/html; charset=ISO-8859-1\">");
        parser.setNodeFactory (new PrototypicalNodeFactory (new MetaTag ()));
        parseAndAssertNodeCount(18);
        assertTrue("Node 8 should be End Tag",node[7] instanceof Tag && ((Tag)node[7]).isEndTag ());
        assertTrue("Node 10 should be META Tag",node[9] instanceof MetaTag);
        MetaTag metaTag;
        metaTag = (MetaTag) node[9];
        assertEquals("Meta Tag 10 Name",description,metaTag.getMetaTagName());
        assertEquals("Meta Tag 10 Contents",content,metaTag.getMetaContent());

        assertTrue("Node 12 should be META Tag",node[11] instanceof MetaTag);
        assertTrue("Node 14 should be META Tag",node[13] instanceof MetaTag);
        assertTrue("Node 16 should be META Tag",node[15] instanceof MetaTag);
        assertTrue("Node 18 should be META Tag",node[17] instanceof MetaTag);

        metaTag = (MetaTag) node[11];
        assertEquals("Meta Tag 12 Name","keywords",metaTag.getMetaTagName());
        assertEquals("Meta Tag 12 Contents","SpamCop spam cop email filter abuse header headers parse parser utility script net net-abuse filter mail program system trace traceroute dns",metaTag.getMetaContent());
        assertNull("Meta Tag 12 Http-Equiv",metaTag.getHttpEquiv());

        metaTag = (MetaTag) node[13];
        assertEquals("Meta Tag 14 Name","language",metaTag.getMetaTagName());
        assertEquals("Meta Tag 14 Contents","en",metaTag.getMetaContent());
        assertNull("Meta Tag 14 Http-Equiv",metaTag.getHttpEquiv());

        metaTag = (MetaTag) node[15];
        assertEquals("Meta Tag 16 Name","owner",metaTag.getMetaTagName());
        assertEquals("Meta Tag 16 Contents","service@admin.spamcop.net",metaTag.getMetaContent());
        assertNull("Meta Tag 16 Http-Equiv",metaTag.getHttpEquiv());

        metaTag = (MetaTag) node[17];
        assertNull("Meta Tag 18 Name",metaTag.getMetaTagName());
        assertEquals("Meta Tag 18 Contents","text/html; charset=ISO-8859-1",metaTag.getMetaContent());
        assertEquals("Meta Tag 18 Http-Equiv","content-type",metaTag.getHttpEquiv());
    }
View Full Code Here

TOP

Related Classes of org.htmlparser.tags.MetaTag

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.