Package org.htmlparser.util

Examples of org.htmlparser.util.SimpleNodeIterator


     * @param visitor The <code>NodeVisitor</code> object to be signalled
     * for each child and possibly this tag.
     */
    public void accept (NodeVisitor visitor)
    {
        SimpleNodeIterator children;
        Node child;

        if (visitor.shouldRecurseSelf ())
            visitor.visitTag (this);
        if (visitor.shouldRecurseChildren ())
        {
            if (null != getChildren ())
            {
                children = children ();
                while (children.hasMoreNodes ())
                {
                    child = children.nextNode ();
                    child.accept (visitor);
                }
            }
            if ((null != getEndTag ()) && (this != getEndTag ())) // 2nd guard handles <tag/>
                getEndTag ().accept (visitor);
View Full Code Here


     * Get an iterator over the children of this node.
     * @return Am iterator over the children of this node.
     */
    public SimpleNodeIterator children ()
    {
        SimpleNodeIterator ret;

        if (null != getChildren ())
            ret = getChildren ().elements ();
        else
            ret = (new NodeList ()).elements ();
View Full Code Here

     * @param visitor The <code>NodeVisitor</code> object to be signalled
     * for each child and possibly this tag.
     */
    public void accept (NodeVisitor visitor)
    {
        SimpleNodeIterator children;
        Node child;

        if (visitor.shouldRecurseSelf ())
            visitor.visitTag (this);
        if (visitor.shouldRecurseChildren ())
        {
            if (null != getChildren ())
            {
                children = children ();
                while (children.hasMoreNodes ())
                {
                    child = children.nextNode ();
                    child.accept (visitor);
                }
            }
            if ((null != getEndTag ()) && (this != getEndTag ())) // 2nd guard handles <tag/>
                getEndTag ().accept (visitor);
View Full Code Here

  }

  public void accept(NodeVisitor visitor) {
    if (visitor.shouldRecurseChildren()) {
      startTag.accept(visitor);
      SimpleNodeIterator children = children();
      while (children.hasMoreNodes()) {
        Node child = (Node) children.nextNode();
        child.accept(visitor);
      }
      endTag.accept(visitor);
    }
    if (visitor.shouldRecurseSelf())
View Full Code Here

      return null;

    final HashSet<String> set = new HashSet<>();
    Parser parser = new Parser(html);
    NodeList matches = parser.extractAllNodesThatMatch(LINK_FILTER);
    SimpleNodeIterator it = matches.elements();
    while (it.hasMoreNodes()) {
      LinkTag node = (LinkTag) it.nextNode();
      String link = node.getLink().trim();
      // remove the anchor if present
      if (link.contains("#")) {
        link = link.substring(0, link.lastIndexOf('#'));
      }
View Full Code Here

   */
  public static String extractTitle(String html) throws ParserException {
    String title = "";
    Parser parser = new Parser(html);
    NodeList matches = parser.extractAllNodesThatMatch(TITLE_FILTER);
    SimpleNodeIterator it = matches.elements();
    while (it.hasMoreNodes()) {
      TitleTag node = (TitleTag) it.nextNode();
      title = node.getTitle().trim();
    }
    return title;
  }
View Full Code Here

  public String process(String text) {
    Parser htmlParser = new Parser();
    try {
      htmlParser.setInputHTML(text);
      NodeList nodeList = htmlParser.parse(null);
            SimpleNodeIterator itChildren = nodeList.elements();
            Node current;
            while (itChildren.hasMoreNodes()) {
              current = itChildren.nextNode();
              processNode(current);
             }
            return nodeList.toHtml();
    } catch (ParserException e) {
      // TODO Auto-generated catch block
View Full Code Here

        }
    }
    else if (node instanceof Tag && !forbiddenParents.contains(((Tag)node).getTagName().toLowerCase())){
      NodeList childList = node.getChildren();
      if (childList != null) {
        SimpleNodeIterator children =  childList.elements();
        Node child;
        while (children.hasMoreNodes()) {
          child = children.nextNode();
          processNode(child);
        }
      }
    }
  }
View Full Code Here

TOP

Related Classes of org.htmlparser.util.SimpleNodeIterator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.