Package org.untmpl

Source Code of org.untmpl.Traverser$Callback

package org.untmpl;

import org.htmlcleaner.BaseToken;
import org.htmlcleaner.ContentToken;
import org.htmlcleaner.TagNode;

/**
* Bottom-up traversal of an HTML document, producing hashes based on the
* subtrees of each node as it goes. For every encountered node a call-back
* function will be called.
*
* @author eskil.andreen
*
*/
public class Traverser {

  /**
   * Call-back operations should implement this class.
   *
   * @author eskil.andreen
   *
   */
  public static interface Callback {
    public void handle(TagNode tn, Node n);
  }

  /**
   * Traverses the HTML document starting from root. For every encountered
   * node the handle method of the given call-back class is called.
   *
   * @param <T>
   * @param root
   *            The root node of the HTML document.
   * @param callback
   *            The call-back class.
   * @return Convenience access to the supplied call-back class.
   */
  public static <T extends Callback> T traverse(TagNode root, T callback) {
    traverse(root, 0, callback);
    return callback;
  }

  /**
   * Recursive, bottom-up traversal of the HTML document tree. Each node is
   * assigned a hash value create from, among other things, the hash value of
   * its children. Thus, for two nodes to have the same subtrees they must
   * have the same hash values.
   *
   * @param root
   * @param depth
   * @param callback
   * @return
   */
  private static Node traverse(BaseToken root, int depth, Callback callback) {
    if (root instanceof TagNode) {
      TagNode tn = (TagNode) root;

      int height = -1;
      int hash = HashCodeUtil.SEED;

      hash = HashCodeUtil.hash(hash, tn.getName());
      hash = HashCodeUtil.hash(hash, tn.getAttributes().toString());

      for (Object n : tn.getChildren()) {
        if (isInteresting(n)) {
          Node c = traverse((BaseToken) n, depth + 1, callback);
          hash = HashCodeUtil.hash(hash, c);
          if (c.height > height)
            height = c.height;
        }
      }

      hash = HashCodeUtil.hash(hash, depth);
      hash = HashCodeUtil.hash(hash, height);

      Node out = new Node(hash, depth, height + 1);
      callback.handle(tn, out);

      return out;
    } else if (root instanceof ContentToken) {
      ContentToken cn = (ContentToken) root;

      int hash = HashCodeUtil.SEED;
      hash = HashCodeUtil.hash(hash, cn.getContent());
      hash = HashCodeUtil.hash(hash, depth);
      hash = HashCodeUtil.hash(hash, 0);

      Node out = new Node(hash, depth, 0);
      return out;
    }
    return null;
  }

  private static boolean isInteresting(Object o) {
    return o instanceof TagNode || o instanceof ContentToken;
  }
}
TOP

Related Classes of org.untmpl.Traverser$Callback

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.