Package org.htmlparser.util

Examples of org.htmlparser.util.SimpleNodeIterator.nextNode()


            if (null != getChildren ())
            {
                children = children ();
                while (children.hasMoreNodes ())
                {
                    child = children.nextNode ();
                    child.accept (visitor);
                }
            }
            if ((null != getEndTag ()) && (this != getEndTag ())) // 2nd guard handles <tag/>
                getEndTag ().accept (visitor);
View Full Code Here


  public void accept(NodeVisitor visitor) {
    if (visitor.shouldRecurseChildren()) {
      startTag.accept(visitor);
      SimpleNodeIterator children = children();
      while (children.hasMoreNodes()) {
        Node child = (Node) children.nextNode();
        child.accept(visitor);
      }
      endTag.accept(visitor);
    }
    if (visitor.shouldRecurseSelf())
View Full Code Here

    final HashSet<String> set = new HashSet<>();
    Parser parser = new Parser(html);
    NodeList matches = parser.extractAllNodesThatMatch(LINK_FILTER);
    SimpleNodeIterator it = matches.elements();
    while (it.hasMoreNodes()) {
      LinkTag node = (LinkTag) it.nextNode();
      String link = node.getLink().trim();
      // remove the anchor if present
      if (link.contains("#")) {
        link = link.substring(0, link.lastIndexOf('#'));
      }
View Full Code Here

    String title = "";
    Parser parser = new Parser(html);
    NodeList matches = parser.extractAllNodesThatMatch(TITLE_FILTER);
    SimpleNodeIterator it = matches.elements();
    while (it.hasMoreNodes()) {
      TitleTag node = (TitleTag) it.nextNode();
      title = node.getTitle().trim();
    }
    return title;
  }
View Full Code Here

      htmlParser.setInputHTML(text);
      NodeList nodeList = htmlParser.parse(null);
            SimpleNodeIterator itChildren = nodeList.elements();
            Node current;
            while (itChildren.hasMoreNodes()) {
              current = itChildren.nextNode();
              processNode(current);
             }
            return nodeList.toHtml();
    } catch (ParserException e) {
      // TODO Auto-generated catch block
View Full Code Here

      NodeList childList = node.getChildren();
      if (childList != null) {
        SimpleNodeIterator children =  childList.elements();
        Node child;
        while (children.hasMoreNodes()) {
          child = children.nextNode();
          processNode(child);
        }
      }
    }
  }
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.