Package org.htmlparser.filters

Examples of org.htmlparser.filters.NodeClassFilter


            try
            {
                IOUtils.copy(content, output);
               
                //####TODO: Dependency inject this crap
                filter = new AndFilter(new NodeClassFilter(LinkTag.class),
                                       new NodeFilter()
                                       {
                                           public boolean accept(Node node)
                                           {
                                               return(!((LinkTag)node).isMailLink());
View Full Code Here


    /**
     * Create a wrapper over a new NodeClassFilter.
     */
    public NodeClassFilterWrapper ()
    {
        mFilter = new NodeClassFilter ();

        // add the strategy choice
        mClass = new JComboBox ();
        mClass.addItem ("");
        add (mClass);
View Full Code Here

     * Get the underlying node filter object.
     * @return The node filter object suitable for serialization.
     */
    public NodeFilter getNodeFilter ()
    {
        NodeClassFilter ret;
       
        ret = new NodeClassFilter ();
        ret.setMatchClass (mFilter.getMatchClass ());
           
        return (ret);
    }
View Full Code Here

        "if (!hp.isHomePage('http://www.google.com/')) {document.write(\"<p><a href=\"/mgyhp.html\" onClick=\"style.behavior='url(#default#homepage)';setHomePage('http://www.google.com/');\">Make Google Your Homepage!</a>\");}\n"+
        "</script></font>\n"+
        "<p><font size=-2>&copy;2002 Google</font><font size=-2> - Searching 3,083,324,652 web pages</font></center></body></html>\n"
        );
        NodeList collectionList = new NodeList();
        NodeClassFilter filter = new NodeClassFilter (LinkTag.class);
        for (NodeIterator e = parser.elements();e.hasMoreNodes();)
            e.nextNode().collectInto(collectionList,filter);
        assertEquals("Size of collection vector should be 11",11,collectionList.size());
        // All items in collection vector should be links
        for (SimpleNodeIterator e = collectionList.elements();e.hasMoreNodes();) {
View Full Code Here

            stream.readFully (buffer);
            html = new String (buffer);
            try
            {
                parser.setInputHTML (html);
                nodes = parser.extractAllNodesThatMatch (new NodeClassFilter (LinkTag.class));
            }
            catch (ParserException e)
            {
                e.printStackTrace ();
                nodes = new NodeList ();
View Full Code Here

            // handle robots meta tag according to http://www.robotstxt.org/wc/meta-user.html
            // <meta name="robots" content="index,follow" />
            // <meta name="robots" content="noindex,nofollow" />
            robots = list.extractAllNodesThatMatch (
                new AndFilter (
                    new NodeClassFilter (MetaTag.class),
                    new HasAttributeFilter ("name", "robots")), true);
            if (0 != robots.size ())
            {
                robot = (MetaTag)robots.elementAt (0);
                content = robot.getAttribute ("content").toLowerCase ();
View Full Code Here

            if (null == url)
                System.exit (1);
        }
        else
            url = args[0];
        filter = new NodeClassFilter (LinkTag.class);
        if ((1 < args.length) && args[1].equalsIgnoreCase ("-maillinks"))
            filter = new AndFilter (
                filter,
                new NodeFilter ()
                {
View Full Code Here

        Vector vector;
        LinkTag link;
        URL[] ret;

        mParser.reset ();
        filter = new NodeClassFilter (LinkTag.class);
        try
        {
            list = mParser.extractAllNodesThatMatch (filter);
        }
        catch (EncodingChangeException ece)
View Full Code Here

    {
        NodeFilter filter;
        NodeList ret;
       
        ret = new NodeList ();
        filter = new NodeClassFilter (type);
        node.collectInto (ret, filter);

        return (ret.toNodeArray ());
    }
View Full Code Here

     * @see ParserUtils#splitTags (String input, String[] tags, boolean recursive, boolean insideTag).
     */
    public static String[] splitTags (String input, Class nodeType)
        throws ParserException, UnsupportedEncodingException
    {
        return splitTags (input, new NodeClassFilter (nodeType), true, true);
    }
View Full Code Here

TOP

Related Classes of org.htmlparser.filters.NodeClassFilter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.