Package jodd.lagarto.dom

Source Code of jodd.lagarto.dom.LagartoDOMBuilderTagVisitor

// Copyright (c) 2003-2014, Jodd Team (jodd.org). All Rights Reserved.

package jodd.lagarto.dom;

import jodd.lagarto.Doctype;
import jodd.lagarto.Tag;
import jodd.lagarto.TagType;
import jodd.lagarto.TagUtil;
import jodd.lagarto.TagVisitor;
import jodd.util.Util;
import jodd.util.StringPool;
import jodd.log.Logger;
import jodd.log.LoggerFactory;

/**
* Lagarto tag visitor that builds DOM tree.
* It (still) does not build the tree <i>fully</i> by the HTML specs,
* however, it works good enough for any sane HTML out there.
* In the default mode, the tree builder does <b>not</b> change
* the order of the elements, so the returned tree reflects
* the input. So if the input contains crazy stuff, the tree will
* be weird, too :)
* <p>
* In experimental <i>html-plus</i> mode we do have some
* further HTML5 rules implemented, that according to some rules
* may change the node position. However, not all rules are
* implemented (yet) and this is still just experimental.
*/
public class LagartoDOMBuilderTagVisitor implements TagVisitor {

  private static final Logger log = LoggerFactory.getLogger(LagartoDOMBuilderTagVisitor.class);

  protected final LagartoDOMBuilder domBuilder;
  protected final HtmlImplicitClosingRules implRules = new HtmlImplicitClosingRules();
  protected HtmlVoidRules htmlVoidRules;

  protected Document rootNode;
  protected Node parentNode;

  /**
   * While enabled, nodes will be added to the DOM tree.
   * Useful for skipping some tags.
   */
  protected boolean enabled;

  public LagartoDOMBuilderTagVisitor(LagartoDOMBuilder domBuilder) {
    this.domBuilder = domBuilder;
  }

  /**
   * Returns root {@link Document document} node of parsed DOM tree.
   */
  public Document getDocument() {
    return rootNode;
  }

  // ---------------------------------------------------------------- start/end

  /**
   * Starts with DOM building.
   * Creates root {@link jodd.lagarto.dom.Document} node.
   */
  public void start() {
    log.debug("DomTree builder started");

    if (rootNode == null) {
      rootNode = new Document(domBuilder.config);
    }
    parentNode = rootNode;
    enabled = true;

    if (domBuilder.config.isEnabledVoidTags()) {
      htmlVoidRules = new HtmlVoidRules();
    }
  }

  /**
   * Finishes the tree building. Closes unclosed tags.
   */
  public void end() {
    if (parentNode != rootNode) {

      Node thisNode = parentNode;

      while (thisNode != rootNode) {
        if (domBuilder.config.isImpliedEndTags()) {
          if (implRules.implicitlyCloseTagOnEOF(thisNode.getNodeName())) {
            thisNode = thisNode.getParentNode();
            continue;
          }
        }

        error("Unclosed tag closed: <" + thisNode.getNodeName() + ">");

        thisNode = thisNode.getParentNode();
      }
    }

    // remove whitespaces
    if (domBuilder.config.isIgnoreWhitespacesBetweenTags()) {
      removeLastChildNodeIfEmptyText(parentNode, true);
    }

    // foster
    if (domBuilder.config.isUseFosterRules()) {
      HtmlFosterRules fosterRules = new HtmlFosterRules();
      fosterRules.fixFosterElements(rootNode);
    }

    // elapsed
    rootNode.end();

    if (log.isDebugEnabled()) {
      log.debug("LagartoDom tree created in " + rootNode.getElapsedTime() + " ms");
    }
  }

  // ---------------------------------------------------------------- tag

  /**
   * Creates new element with correct configuration.
   */
  protected Element createElementNode(Tag tag) {
    boolean hasVoidTags = htmlVoidRules != null;

    boolean isVoid = false;
    boolean selfClosed = false;

    if (hasVoidTags) {
      isVoid = htmlVoidRules.isVoidTag(tag.getName());

      // HTML and XHTML
      if (isVoid) {
        // it's void tag, lookup the flag
        selfClosed = domBuilder.config.isSelfCloseVoidTags();
      }
    } else {
      // XML, no voids, lookup the flag
      selfClosed = domBuilder.config.isSelfCloseVoidTags();
    }

    return new Element(rootNode, tag, isVoid, selfClosed);
  }

  /**
   * Visits tags.
   */
  public void tag(Tag tag) {
    if (!enabled) {
      return;
    }

    TagType tagType = tag.getType();
    Element node;

    switch (tagType) {
      case START:
        if (domBuilder.config.isIgnoreWhitespacesBetweenTags()) {
          removeLastChildNodeIfEmptyText(parentNode, false);
        }

        node = createElementNode(tag);

        if (domBuilder.config.isImpliedEndTags()) {
          while (true) {
            String parentNodeName = parentNode.getNodeName();
            if (!implRules.implicitlyCloseParentTagOnNewTag(parentNodeName, node.getNodeName())) {
              break;
            }
            parentNode = parentNode.getParentNode();

            if (log.isDebugEnabled()) {
              log.debug("Implicitly closed tag <" + node.getNodeName() + "> ");
            }
          }
        }

        parentNode.addChild(node);

        if (node.isVoidElement() == false) {
          parentNode = node;
        }
        break;

      case END:
        if (domBuilder.config.isIgnoreWhitespacesBetweenTags()) {
          removeLastChildNodeIfEmptyText(parentNode, true);
        }

        String tagName = tag.getName().toString();

        Node matchingParent = findMatchingParentOpenTag(tagName);

        if (matchingParent == parentNode) {    // regular situation
          parentNode = parentNode.getParentNode();
          break;
        }

        if (matchingParent == null) {      // matching open tag not found, remove it
          error("Orphan closed tag ignored: </" + tagName + "> " + tag.getTagPosition());
          break;
        }

        // try to close it implicitly
        if (domBuilder.config.isImpliedEndTags()) {
          boolean fixed = false;

          while (implRules.implicitlyCloseParentTagOnTagEnd(parentNode.getNodeName(), tagName)) {
            parentNode = parentNode.getParentNode();

            if (log.isDebugEnabled()) {
              log.debug("Implicitly closed tag <" + tagName + ">");
            }

            if (parentNode == matchingParent) {
              parentNode = matchingParent.parentNode;
              fixed = true;
              break;
            }
          }
          if (fixed) {
            break;
          }
        }


        // matching tag found, but it is not a regular situation
        // therefore close all unclosed tags in between
        fixUnclosedTagsUpToMatchingParent(tag, matchingParent);

        break;

      case SELF_CLOSING:
        if (domBuilder.config.isIgnoreWhitespacesBetweenTags()) {
          removeLastChildNodeIfEmptyText(parentNode, false);
        }

        node = createElementNode(tag);
        parentNode.addChild(node);
        break;
    }
  }

  // ---------------------------------------------------------------- util

  /**
   * Removes last child node if contains just empty text.
   */
  protected void removeLastChildNodeIfEmptyText(Node parentNode, boolean closedTag) {
    if (parentNode == null) {
      return;
    }

    Node lastChild = parentNode.getLastChild();
    if (lastChild == null) {
      return;
    }
   
    if (lastChild.getNodeType() != Node.NodeType.TEXT) {
      return;
    }

    if (closedTag) {
      if (parentNode.getChildNodesCount() == 1) {
        return;
      }
    }

    Text text = (Text) lastChild;

    if (text.isBlank()) {
      lastChild.detachFromParent();
    }
  }

  /**
   * Finds matching parent open tag or <code>null</code> if not found.
   */
  protected Node findMatchingParentOpenTag(String tagName) {
    Node parent = parentNode;
   
    if (!rootNode.config.isCaseSensitive()) {
      tagName = tagName.toLowerCase();
    }
   
    while (parent != null) {
      String parentNodeName = parent.getNodeName();

      if (parentNodeName != null) {
        if (!rootNode.config.isCaseSensitive()) {
          parentNodeName = parentNodeName.toLowerCase();
        }
      }
     
      if (tagName.equals(parentNodeName)) {
        return parent;
      }
      parent = parent.getParentNode();
    }
    return null;
  }

  /**
   * Fixes all unclosed tags up to matching parent. Missing end tags will be added
   * just before parent tag is closed, making the whole inner content as its tag body.
   * <p>
   * Tags that can be closed implicitly are checked and closed.
   * <p>
   * There is optional check for detecting orphan tags inside the
   * table or lists. If set, tags can be closed beyond the border of the
   * table and the list and it is reported as orphan tag.
   * <p>
   * This is just a generic solutions, closest to the rules.
   */
  protected void fixUnclosedTagsUpToMatchingParent(Tag tag, Node matchingParent) {
    if (domBuilder.config.isUnclosedTagAsOrphanCheck()) {
      Node thisNode = parentNode;

      if (!TagUtil.equalsIgnoreCase(tag.getName(), "table")) {

        // check if there is table or list between this node
        // and matching parent
        while (thisNode != matchingParent) {
          String thisNodeName = thisNode.getNodeName().toLowerCase();

          if (thisNodeName.equals("table") || thisNodeName.equals("ul") || thisNodeName.equals("ol")) {

            String positionString = tag.getPosition();
            if (positionString == null) {
              positionString = StringPool.EMPTY;
            }

            error("Orphan closed tag ignored: </" + tag.getName() + "> " + positionString);
            return;
          }
          thisNode = thisNode.getParentNode();
        }
      }
    }

    while (true) {
      if (parentNode == matchingParent) {
        parentNode = parentNode.getParentNode();
        break;
      }

      Node parentParentNode = parentNode.getParentNode();

      if (domBuilder.config.isImpliedEndTags()) {
        if (implRules.implicitlyCloseParentTagOnNewTag(
            parentParentNode.getNodeName(), parentNode.getNodeName())) {
          // break the tree: detach this node and append it after parent

          parentNode.detachFromParent();

          parentParentNode.getParentNode().addChild(parentNode);
        }
      }

      // debug message

      error("Unclosed tag closed: <" + parentNode.getNodeName() + ">");

      // continue looping
      parentNode = parentParentNode;
    }
  }

  // ---------------------------------------------------------------- tree

  public void script(Tag tag, CharSequence body) {
    if (!enabled) {
      return;
    }

    Element node = createElementNode(tag);

    parentNode.addChild(node);

    if (body.length() != 0) {
      Node text = new Text(rootNode, body.toString());
      node.addChild(text);
    }
  }

  public void comment(CharSequence comment) {
    if (!enabled) {
      return;
    }

    if (domBuilder.config.isIgnoreWhitespacesBetweenTags()) {
      removeLastChildNodeIfEmptyText(parentNode, false);
    }

    if (domBuilder.config.isIgnoreComments()) {
      return;
    }

    Node node = new Comment(rootNode, comment.toString());

    parentNode.addChild(node);
  }

  public void text(CharSequence text) {
    if (!enabled) {
      return;
    }

    String textValue = text.toString();

    Node node = new Text(rootNode, textValue);

    parentNode.addChild(node);
  }

  public void cdata(CharSequence cdata) {
    if (!enabled) {
      return;
    }

    CData cdataNode = new CData(rootNode, cdata.toString());

    parentNode.addChild(cdataNode);
  }

  public void xml(CharSequence version, CharSequence encoding, CharSequence standalone) {
    if (!enabled) {
      return;
    }

    XmlDeclaration xmlDeclaration = new XmlDeclaration(rootNode, version, encoding, standalone);

    parentNode.addChild(xmlDeclaration);
  }

  public void doctype(Doctype doctype) {
    if (!enabled) {
      return;
    }

    DocumentType documentType = new DocumentType(rootNode,
        Util.toString(doctype.getName()),
        Util.toString(doctype.getPublicIdentifier()),
        Util.toString(doctype.getSystemIdentifier())
    );

    parentNode.addChild(documentType);
  }

  public void condComment(CharSequence expression, boolean isStartingTag, boolean isHidden, boolean isHiddenEndTag) {
    String expressionString = expression.toString().trim();

    if (expressionString.equals("endif")) {
      enabled = true;
      return;
    }

    if (expressionString.equals("if !IE")) {
      enabled = false;
      return;
    }

    float ieVersion = domBuilder.config.getCondCommentIEVersion();

    if (htmlCCommentExpressionMatcher == null) {
      htmlCCommentExpressionMatcher = new HtmlCCommentExpressionMatcher();
    }

    enabled = htmlCCommentExpressionMatcher.match(ieVersion, expressionString);
  }

  protected HtmlCCommentExpressionMatcher htmlCCommentExpressionMatcher;


  // ---------------------------------------------------------------- error

  public void error(String message) {
    rootNode.addError(message);
    log.log(domBuilder.config.getParsingErrorLogLevel(), message);
  }

}
TOP

Related Classes of jodd.lagarto.dom.LagartoDOMBuilderTagVisitor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.