Package com.google.caja.parser.html

Source Code of com.google.caja.parser.html.BooleanAttrs

// Copyright (C) 2009 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.caja.parser.html;

import com.google.caja.lexer.FilePosition;
import com.google.caja.lexer.HtmlEntities;
import com.google.caja.lexer.HtmlTextEscapingMode;
import com.google.caja.lexer.HtmlTokenType;
import com.google.caja.lexer.InputSource;
import com.google.caja.lexer.TokenConsumer;
import com.google.caja.lexer.escaping.Escaping;
import com.google.caja.render.Concatenator;
import com.google.caja.reporting.MarkupRenderMode;
import com.google.caja.reporting.RenderContext;
import com.google.caja.util.SparseBitSet;
import com.google.caja.util.Strings;
import com.google.common.collect.ImmutableSet;

import java.net.URI;
import java.net.URISyntaxException;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.Set;

import javax.annotation.Nullable;

import org.w3c.dom.Attr;
import org.w3c.dom.DocumentType;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.ProcessingInstruction;
import org.w3c.dom.Text;
import org.w3c.dom.UserDataHandler;

/**
* Utilities for dealing with HTML/XML DOM trees.
*
* WARNING: The renderUnsafe methods in this class are unsafe for cajoled
* code because the Caja pipeline does not sanitize comments.  In particular
* IE comments rendered by renderUnsafe will be executable.
*
* @author mikesamuel@gmail.com
*/
public class Nodes {
  private static final String FP_KEY = "caja:filePosition";
  private static final String RAW_TEXT_KEY = "caja:rawHtml";
  private static final String HAS_XMLNS_DECLARATION_KEY = "caja:hasXmlns";

  /**
   * Used to copy the has-xmlns-declaration bit from an element to elements
   * cloned/imported from it.
   */
  private static final UserDataHandler HAS_XMLNS_DECLARATION_DATA_HANDLER
      = new UserDataHandler() {
        public void handle(
            short operation, String key, Object data, Node src, Node dest) {
          switch (operation) {
            case UserDataHandler.NODE_CLONED:
            case UserDataHandler.NODE_IMPORTED:
              dest.setUserData(HAS_XMLNS_DECLARATION_KEY, Boolean.TRUE, this);
              break;
          }
        }
      };

  /** A left to right {@link Iterable} over the children of the given node. */
  public static Iterable<? extends Node> childrenOf(final Node n) {
    return new Iterable<Node>() {
      public Iterator<Node> iterator() {
        return new Iterator<Node>() {
          Node child = n.getFirstChild();
          public boolean hasNext() {
            return child != null;
          }
          public Node next() {
            if (child == null) { throw new NoSuchElementException(); }
            Node result = child;
            child = child.getNextSibling();
            return result;
          }
          public void remove() { throw new UnsupportedOperationException(); }
        };
      }
    };
  }

  /** An {@link Iterable} over the attributes of the given element. */
  public static Iterable<? extends Attr> attributesOf(final Element el) {
    return new Iterable<Attr>() {
      public Iterator<Attr> iterator() {
        return new Iterator<Attr>() {
          NamedNodeMap attrs = el.getAttributes();
          int i = 0;
          // The DOM spec says that elements with no attributes should return
          // null, though Xerces returns an empty map.
          int n = attrs != null ? attrs.getLength() : 0;
          public boolean hasNext() {
            return i < n;
          }
          public Attr next() {
            if (i == n) { throw new NoSuchElementException(); }
            return (Attr) attrs.item(i++);
          }
          public void remove() { throw new UnsupportedOperationException(); }
        };
      }
    };
  }

  /**
   * An {@link Iterable} over the elements of the given node list.
   * @throws ClassCastException if a member is fetched that is not an instance
   *     of outType/
   */
  public static <T extends Node> Iterable<T> nodeListIterable(
      final NodeList nl, final Class<? extends T> outType) {
    return new Iterable<T>() {
      public Iterator<T> iterator() {
        return new Iterator<T>() {
          int i = 0, n = nl != null ? nl.getLength() : 0;
          public boolean hasNext() {
            return i < n;
          }
          public T next() {
            if (i == n) { throw new NoSuchElementException(); }
            return outType.cast(nl.item(i++));
          }
          public void remove() { throw new UnsupportedOperationException(); }
        };
      }
    };
  }

  private static final FilePosition UNKNOWN_START_OF_FILE
      = FilePosition.startOfFile(InputSource.UNKNOWN);

  /**
   * Returns the last file position associated with the given node by
   * {@link #setFilePositionFor} or related methods.
   */
  public static FilePosition getFilePositionFor(Node node) {
    FilePosition pos = (FilePosition) node.getUserData(FP_KEY);
    if (pos != null) { return pos; }
    return UNKNOWN_START_OF_FILE;
  }

  /** The file position of the value of the given attribute. */
  public static FilePosition getFilePositionForValue(Attr a) {
    return getFilePositionFor(a.getFirstChild());
  }

  /** @see #getFilePositionFor */
  public static void setFilePositionFor(Node node, FilePosition pos) {
    node.setUserData(FP_KEY, pos, null);
  }

  public static void setFilePositionForValue(Attr a, FilePosition pos) {
    setFilePositionFor(a.getFirstChild(), pos);
  }

  /**
   * @see #hasXmlnsDeclaration(Element)
   */
  public static void markAsHavingXmlnsDeclaration(Element el) {
    el.setUserData(
        HAS_XMLNS_DECLARATION_KEY, Boolean.TRUE,
        HAS_XMLNS_DECLARATION_DATA_HANDLER);
  }

  /**
   * True for elements that had an {@code xmlns="<namespace-uri>"} declaration
   * when parsed so that the renderer can include one where it is safe to do so.
   */
  public static boolean hasXmlnsDeclaration(Element el) {
    return Boolean.TRUE.equals(el.getUserData(HAS_XMLNS_DECLARATION_KEY));
  }

  /**
   * Associates the HTML textual value as parsed with the given attribute.
   * If a client may have called {@link Node#setNodeValue(String)} or an alias
   * since parsing, the output should not be trusted.
   */
  public static void setRawValue(Attr a, String html) {
    setRawText((Text) a.getFirstChild(), html);
  }

  /** @see #setRawValue */
  public static String getRawValue(Attr a) {
    return getRawText((Text) a.getFirstChild());
  }

  /**
   * Associates the HTML textual value as parsed with the given node.
   * If a client may have called {@link Node#setNodeValue(String)} or an alias
   * since parsing, the output should not be trusted.
   */
  public static void setRawText(Text cd, String html) {
    cd.setUserData(RAW_TEXT_KEY, html, null);
  }

  /** @see #setRawText */
  public static String getRawText(Text cd) {
    return (String) cd.getUserData(RAW_TEXT_KEY);
  }

  /**
   * Replace entity references in HTML CDATA with their plain text equivalents.
   */
  public static String decode(String html) {
    if (html.indexOf('&') < 0) { return html; }
    char[] chars = html.toCharArray();
    int delta = 0;
    int n = chars.length;
    for (int i = 0; i < n;) {
      char ch = chars[i];
      if (chars[i] == '&') {
        long packedEndAndCodepoint = HtmlEntities.decodeEntityAt(chars, i, n);
        int end = (int) (packedEndAndCodepoint >>> 32);
        if (end != i + 1) {
          int codepoint = ((int) packedEndAndCodepoint) & 0xffffff;
          delta += end - (i + Character.toChars(codepoint, chars, i - delta));
          i = end;
        } else {
          chars[i - delta] = ch;
          ++i;
        }
      } else {
        chars[i - delta] = ch;
        ++i;
      }
    }
    if (delta == 0) { return html; }
    return String.valueOf(chars, 0, n - delta);
  }

  /**
   * Convert HTML to plain text by replacing HTML special characters with HTML
   * entities.
   */
  public static String encode(String raw) {
    StringBuilder sb = new StringBuilder((raw.length() * 3) / 2);
    Escaping.escapeXml(raw, false, sb);
    return sb.toString();
  }

  /**
   * Serializes the given DOM node to HTML or XML.
   * @param rc a context where the token consumer is typically a
   *   {@link Concatenator}, and the {@link RenderContext#asXml} is significant.
   */
  public static void render(Node node, Namespaces ns, RenderContext rc) {
    render(node, ns, rc, false);
  }

  /**
   * @deprecated For use only by non-caja clients of the parser/render
   */
  @Deprecated
  public static void renderUnsafe(DocumentType docType, Node node,
      Namespaces ns, RenderContext rc) {
    render(docType, node, ns, rc, true);
  }

  public static void render(DocumentType docType, Node node, Namespaces ns,
      RenderContext rc) {
    render(docType, node, ns, rc, false);
  }

  /**
   * @deprecated For use only by non-caja clients of the parser/render
   */
  @Deprecated
  public static void renderUnsafe(Node node, Namespaces ns, RenderContext rc) {
    render(node, ns, rc, true);
  }

  private static void render(Node node, Namespaces ns,
      RenderContext rc, boolean renderUnsafe) {
    render(null, node, ns, rc, renderUnsafe);
  }

  private static void render(
      DocumentType docType, Node node, Namespaces ns, RenderContext rc,
      boolean renderUnsafe) {
    StringBuilder sb = new StringBuilder(1 << 18);
    if (null != docType) {
      String rendering = renderDocumentType(docType);
      if (null != rendering) {
        sb.append(rendering);
      }
    }
    new Renderer(rc, sb, rc.markupRenderMode(), ns)
        .render(node, ns, renderUnsafe);
    TokenConsumer out = rc.getOut();
    FilePosition pos = getFilePositionFor(node);
    out.mark(FilePosition.startOf(pos));
    out.consume(sb.toString());
    out.mark(FilePosition.endOf(pos));
  }

  /**
   * Returns a rendering of document type.  This is handled explicitly here
   * rather than in {@link Nodes#render(Node, MarkupRenderMode)} to avoid
   * rendering a document type in the middle of a document.
   *
   * @return null if nothing to render or docType is invalid.
   */
  private static @Nullable String renderDocumentType(DocumentType docType) {
    String publicId = docType.getPublicId();
    String systemId = docType.getSystemId();
    String nodeName;

    if (null != docType.getOwnerDocument() &&
        null != docType.getOwnerDocument().getDocumentElement() &&
        null != docType.getOwnerDocument().getDocumentElement().getNodeName()) {
      nodeName = docType.getOwnerDocument()
        .getDocumentElement()
        .getNodeName();
    } else {
      return null;
    }

    if (!DoctypeMaker.isHtml(nodeName, publicId, systemId)) {
      return null;
    }

    StringBuilder sb = new StringBuilder();
    sb.append("<!DOCTYPE ").append(nodeName);
    // The Name in the document type declaration must match the element type
    // of the root element.
    if (null != publicId && publicId.length() > 0) {
      sb.append(" PUBLIC ")
        .append('"')
        .append(publicId.replaceAll("\"", "%22"))
        .append('"');
    }
    if (null != systemId && systemId.length() > 0) {
      // Sanity check - system urls should parse as an absolute uris
      try {
        URI u = new URI(systemId);
        if (u.isAbsolute() &&
            ("http".equals(u.getScheme()) || "https".equals(u.getScheme()))) {
          sb.append(" ")
            .append('"')
            .append(systemId.replaceAll("\"", "%22"))
            .append('"');
        }
      } catch (URISyntaxException e) {
        return null;
      }
    }
    sb.append(">");
    return sb.toString();
  }

  /**
   * Serializes the given DOM node to HTML or XML.
   * @param rc a context where the token consumer is typically a
   *   {@link Concatenator}, and the {@link RenderContext#asXml} is significant.
   */
  public static void render(Node node, RenderContext rc) {
    render(node, rc, false);
  }

  /**
   * @deprecated For use only by non-caja clients of the parser/render
   */
  @Deprecated
  public static void renderUnsafe(Node node, RenderContext rc) {
    render(node, Namespaces.HTML_DEFAULT, rc, true);
  }

  private static void render(Node node, RenderContext rc,
      boolean renderUnsafe) {
    render(node, Namespaces.HTML_DEFAULT, rc, renderUnsafe);
  }

  public static String render(Node node) {
    return render(node, false);
  }

  /**
   * @deprecated For use only by non-caja clients of the parser/render
   */
  @Deprecated
  public static String renderUnsafe(Node node) {
    return renderUnsafe(node, MarkupRenderMode.HTML);
  }

  @Deprecated
  public static String render(Node node, boolean asXml) {
    return render(node, asXml ? MarkupRenderMode.XML : MarkupRenderMode.HTML);
  }

  public static String render(Node node, MarkupRenderMode renderMode) {
    return render(node, renderMode, false);
  }

  /**
   * @deprecated For use only by non-caja clients of the parser/render
   */
  @Deprecated
  public static String renderUnsafe(Node node, MarkupRenderMode renderMode) {
    return render(node, renderMode, true);
  }

  private static String render(Node node, MarkupRenderMode renderMode,
      boolean renderUnsafe) {
    StringBuilder sb = new StringBuilder();
    RenderContext rc = new RenderContext(new Concatenator(sb, null))
        .withMarkupRenderMode(renderMode);
    render(node, rc, renderUnsafe);
    rc.getOut().noMoreTokens();
    return sb.toString();
  }

  public static String render(DocumentType docType, Node node,
      MarkupRenderMode renderMode) {
    return render(docType, node, renderMode, false);
  }

  /**
   * @deprecated For use only by non-caja clients of the parser/render
   */
  @Deprecated
  public static String renderUnsafe(DocumentType docType, Node node,
        MarkupRenderMode renderMode) {
    return render(docType, node, renderMode, true);
  }

  private static String render(DocumentType docType, Node node,
      MarkupRenderMode renderMode, boolean renderUnsafe) {
    StringBuilder sb = new StringBuilder();
    if (null != docType) {
      String rendering = renderDocumentType(docType);
      if (null != rendering) {
        sb.append(rendering);
      }
    }
    sb.append(render(node, renderMode, renderUnsafe));
    return sb.toString();
  }

  private Nodes() { /* uninstantiable */ }
}

final class Renderer {
  final RenderContext rc;
  final StringBuilder out;
  final MarkupRenderMode mode;
  final boolean asXml;
  final int namespaceDepthAtStart;

  Renderer(
      RenderContext rc, StringBuilder out, MarkupRenderMode mode,
      Namespaces ns) {
    this.rc = rc;
    this.out = out;
    this.mode = mode;
    this.asXml = mode == MarkupRenderMode.XML;
    this.namespaceDepthAtStart = depth(ns);
  }

  private static final String HTML_NS = Namespaces.HTML_NAMESPACE_URI;

  /**
   * @deprecated For use only by non-caja clients of the parser/render
   */
  @Deprecated
  void renderUnsafe(Node node, Namespaces ns) {
    render(node, ns, true);
  }

  void render(Node node, Namespaces ns) {
    render(node, ns, false);
  }

  void renderSibs(Node sib, Namespaces ns, boolean renderUnsafe) {
    for (; sib != null; sib = sib.getNextSibling()) {
      render(sib, ns, renderUnsafe);
    }
  }

  void render(Node node, Namespaces ns, boolean renderUnsafe) {
    switch (node.getNodeType()) {
      case Node.DOCUMENT_NODE: case Node.DOCUMENT_FRAGMENT_NODE:
        renderSibs(node.getFirstChild(), ns, renderUnsafe);
        break;
      case Node.ELEMENT_NODE: {
        Element el = (Element) node;
        out.append('<');
        int tagNameStart = out.length();
        boolean addElNs = false;
        Namespaces elNs;
        {
          String nsUri = el.getNamespaceURI();
          if (nsUri == null) { nsUri = HTML_NS; }
          elNs = ns.forUri(nsUri);
          if (elNs == null) {
            elNs = ns = addNamespace(ns, nsUri, el.getPrefix());
            addElNs = true;
          }
        }
        if (elNs.prefix.length() != 0) {
          out.append(elNs.prefix).append(':');
        }
        String localName = el.getLocalName();
        // TODO: do away with the below once Shindig has done away with Neko.
        // This is a workaround for a bug in Element.getLocalName in the version
        // of Neko used by Shindig.
        // See also similar attribute rendering code in this file.
        if (localName == null) {
          localName = el.getTagName();
          if (localName.indexOf(':') >= 0) {
            throw new UncheckedUnrenderableException(localName);
          }
        }
        boolean isHtml = elNs.uri == HTML_NS;
        if (isHtml) { localName = Strings.lower(localName); }
        out.append(localName);
        int tagNameEnd = out.length();

        if (addElNs) {
          out.append(' ');
          renderNamespace(elNs);
        } else if (elNs.prefix == "" && Nodes.hasXmlnsDeclaration(el)) {
          // Since the prefix of the namespace is blank, adding an xmlns cannot
          // possibly change the namespace resolution of contained elements or
          // attributes.
          out.append(" xmlns=\"");
          Escaping.escapeXml(elNs.uri, true, out);
          out.append('"');
        }
        NamedNodeMap attrs = el.getAttributes();
        for (int i = 0, n = attrs.getLength(); i < n; ++i) {
          Attr a = (Attr) attrs.item(i);
          String attrUri = a.getNamespaceURI();
          // Attributes created via setAttribute calls for ISINDEX elements and
          // xmlns attributes have no namespace URI.
          String attrLocalName = a.getLocalName();

          if (Namespaces.XMLNS_NAMESPACE_URI.equals(attrUri)) {
            String nsPrefix = attrLocalName;
            Namespaces added = addNamespaceFromAttribute(
                nsPrefix, a.getValue(), ns);
            if (added == null) { continue; }
            ns = added;
          } else if (attrLocalName == null) {
            attrLocalName = a.getName();
            if (isXmlnsDecl(attrLocalName)) {
              String nsPrefix = "";
              if (attrLocalName.length() > 5) {
                nsPrefix = attrLocalName.substring(6);
              }
              Namespaces added = addNamespaceFromAttribute(
                  nsPrefix, a.getValue(), ns);
              if (added == null) { continue; }
              ns = added;
            } else if (attrLocalName.indexOf(':') >= 0) {
              throw new UncheckedUnrenderableException(null);
            }
          }
          out.append(' ');
          if (attrUri != null && (attrUri = attrUri.intern()) != elNs.uri) {
            Namespaces attrNs = ns.forUri(attrUri);
            if (attrNs == null) {
              attrNs = ns = addNamespace(ns, attrUri, a.getPrefix());
              renderNamespace(attrNs);
              out.append(' ');
            }
            out.append(attrNs.prefix).append(':');
          }
          attrLocalName = emitLocalName(attrLocalName, isHtml);
          // http://www.w3.org/TR/html401/intro/sgmltut.html
          // #didx-boolean_attribute
          // Authors should be aware that many user agents only recognize the
          // minimized form of boolean attributes and not the full form.
          if (!(isHtml && mode == MarkupRenderMode.HTML4_BACKWARDS_COMPAT
                && BooleanAttrs.isBooleanAttr(attrLocalName))) {
            out.append("=\"");
            Escaping.escapeXml(a.getValue(), true, out);
            out.append('"');
          }
        }

        HtmlTextEscapingMode m =
            HtmlTextEscapingMode.getModeForTag(localName);

        Node first = el.getFirstChild();
        if (first == null && m == HtmlTextEscapingMode.VOID) {
          out.append(" />");
        } else {
          out.append(">");
          if (m == HtmlTextEscapingMode.CDATA
              || m == HtmlTextEscapingMode.PLAIN_TEXT) {
            renderCdata(localName, el, asXml);
          } else {
            renderSibs(first, ns, renderUnsafe);
          }
          out.append("</").append(out, tagNameStart, tagNameEnd).append(">");
        }
        break;
      }
      case Node.TEXT_NODE:
        // This is required for all PCDATA content to distinguish it from tags.
        // This is not only appropriate for RCDATA, but is required.
        // http://dev.w3.org/html5/markup/aria/syntax.html#escaping-text-span:
        //   The text in style, script, title, and textarea elements must not
        //   have an escaping text span start that is not followed by an
        //   escaping text span end.
        // The script and style mentioned above have CDATA content, not RCDATA,
        // but title and textarea are the RCDATA to which this is relevant.
        Escaping.escapeXml(node.getNodeValue(), true, out);
        break;
      case Node.CDATA_SECTION_NODE:
        String value = node.getNodeValue();
        Escaping.escapeXml(value, true, out);
        break;
      case Node.ATTRIBUTE_NODE: {
        Attr a = (Attr) node;
        String localName = a.getLocalName();
        if (localName == null) {
          localName = a.getName();
        }
        emitLocalName(localName, HTML_NS.equals(a.getNamespaceURI()));
        out.append("=\"");
        Escaping.escapeXml(a.getValue(), true, out);
        out.append('"');
        break;
      }
      case Node.PROCESSING_INSTRUCTION_NODE: {
        if (!asXml) {
          throw new UncheckedUnrenderableException(
              "XML not renderable as HTML due to processing instruction");
        }
        ProcessingInstruction pi = (ProcessingInstruction) node;
        String target = pi.getTarget();
        String data = pi.getData();
        if (data.contains("?>")) {
          throw new UncheckedUnrenderableException(
              "XML document not renderable due to \"?>\" inside "
              + "processing instruction");
        }
        if (Strings.eqIgnoreCase(target.substring(0, 3), "xml")
            || !isName(target)) {  // isName check avoids targets with "?>".
          throw new UncheckedUnrenderableException(
              "Bad processing instruction target " + target);
        }
        out.append("<?").append(target).append(' ').append(data).append("?>");
        break;
      }
      case Node.COMMENT_NODE: {
        // Comment nodes could either be of the standard HTML comment type,
        // or of the downlevel-hidden type of IE comments or
        // or of the downlevel-revealed type of IE comments.
        // 1) Standard HTML comments and IE downlevel-hidden comments are
        // have HtmlTokenType.COMMENT present in the userData of the node
        // These are only rendered if renderUnsafe is true, and these need
        // <!-- and --> to be attached to them, along with extra sanitization
        // to make sure they match the specs.
        // 2) IE downlevel-revealed comment markers do not need either
        // <!-- or --> and these should be rendered even if renderUnsafe is
        // false, because these are directives processed by non-IE browsers
        // as well.
        String commentType =
                node.getUserData("COMMENT_TYPE") != null ?
                node.getUserData("COMMENT_TYPE").toString() :
                // TODO(anupama): We need to have COMMENT as default because
                // Node.cloneNode does not copy over userData for nodes.
                // Figure out how we can fix this to behave correctly.
                HtmlTokenType.COMMENT.toString();
        String text = node.getNodeValue();
        boolean isStandardComment =
            HtmlTokenType.COMMENT.toString().equals(commentType);
        if (renderUnsafe && isStandardComment) {
          // HTML5 spec 11.1.6
          // Comments must start with the four character sequence (<!--).
          // Following this sequence, the comment may have text, with the
          // additional restriction that the text must not start with a
          // single U+003E GREATER-THAN SIGN character (>), nor start with a
          // U+002D HYPHEN-MINUS character (-) followed by a U+003E
          // GREATER-THAN SIGN (>) character, nor contain two consecutive
          // U+002D HYPHEN-MINUS characters (--), nor end with a U+002D
          // HYPHEN-MINUS character (-). Finally, the comment must be ended
          // by the three character sequence (-->).

          // XML 1.0 spec 2.5
          // Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'

          String problem = null;
          problem = text.startsWith(">") ? "starts with '>'" : problem;
          if (rc.markupRenderMode() != MarkupRenderMode.HTML) {
            problem = text.startsWith("-") ? "starts with '-'" : problem;
            problem = text.endsWith("-") ? "ends with '-'" : problem;
          } else {
            // If the comment starts or ends with "-", we remove these to make
            // it spec-compliant as far as possible.
            if (text.startsWith("-") || text.endsWith("-")) {
              while (text.startsWith("-")) {
                text = text.substring(1);
              }
              while (text.endsWith("-")) {
                text = text.substring(0, text.length() - 1);
              }
            }
          }
          // Comment nodes are only rendered in unsafe mode
          // TODO: Uncommenting the following check makes the comment rendering
          // html5/xml compliant, however, breaks some webpages which rely on
          // broken behaviour in browsers.
          // problem = text.contains("--") ? "contains '--'" : problem;
          if (null != problem) {
            throw new UncheckedUnrenderableException(
                "XML comment unrenderable because it " + problem);
          }
          out.append("<!--");
          out.append(text);
          out.append("-->");
        } else if (!isStandardComment) {
          // Downlevel-revealed comment.
          out.append(text);
        }
      }
      break;
    }
  }

  // This emits the contents of an HTML element that starts a RAWTEXT
  // parsing context, which means no entities or tags are parsed.
  private void renderCdata(String localName, Element el, boolean asXml) {
    StringBuilder cdata = new StringBuilder();
    for (Node c = el.getFirstChild(); c != null; c = c.getNextSibling()) {
      switch (c.getNodeType()) {
        case Node.TEXT_NODE: case Node.CDATA_SECTION_NODE:
          String text = c.getNodeValue();
          if (asXml) {
            Escaping.escapeXml(text, true, cdata);
          } else {
            cdata.append(text);
          }
          break;
        default:
          cdata.append(Nodes.render(c));
          break;
      }
    }

    // Whether we're emitting xml or html, verify that the result will
    // not break html parsing.
    int problemIndex = checkHtmlCdataCloseable(localName, cdata);
    if (problemIndex != -1) {
      throw new UncheckedUnrenderableException(
          "Document not renderable due to '"
           + cdata.subSequence(
               problemIndex,
               Math.min(cdata.length(), problemIndex + 10))
               + "' in RAWTEXT element");
    }

    out.append(cdata);
  }

  private static final int COMMON_NS_DEPTH = depth(Namespaces.COMMON);
  private static Namespaces addNamespace(
      Namespaces base, String uri, String suggestedPrefix) {
    if (isAlphaNumericId(suggestedPrefix)
        && base.forPrefix(suggestedPrefix) == null) {
      return new Namespaces(base, suggestedPrefix, uri);
    }
    // We subtract COMMON_NS_DEPTH so that when we modify Namespaces.COMMON,
    // we do not change the output for documents that do not depend on the
    // added or removed namespaces.
    // It is alright for depth to be negative since dashes can appear in
    // namespace prefixes.
    return new Namespaces(base, "_ns" + (depth(base) - COMMON_NS_DEPTH), uri);
  }

  private Namespaces addNamespaceFromAttribute(
      String nsPrefix, String nsUri, Namespaces ns) {
    Namespaces masked = ns.forPrefix(nsPrefix);
    if (masked != null) {
      if (!masked.uri.equals(nsUri) && depth(masked) <= namespaceDepthAtStart) {
        // Don't output masking declarations.
        return null;
      }
    }
    return new Namespaces(ns, nsPrefix, nsUri);
  }

  private static int depth(Namespaces ns) {
    int depth = 0;
    for (Namespaces p = ns; p != null; p = p.parent) { ++depth; }
    return depth;
  }

  private void renderNamespace(Namespaces ns) {
    out.append("xmlns:").append(ns.prefix).append("=\"");
    Escaping.escapeXml(ns.uri, true, out);
    out.append('"');
  }

  private static boolean isXmlnsDecl(String attrName) {
    int length = attrName.length();
    if (length == 5) {
      return "xmlns".equals(attrName);
    } else if (length > 6) {
      return attrName.startsWith("xmlns:");
    } else {
      return false;
    }
  }

  private static final boolean[] CASE_SENS_NAME_CHARS = new boolean['z' + 1];
  private static final boolean[] CASE_INSENS_NAME_CHARS = new boolean['z' + 1];
  static {
    for (char ch = '0'; ch <= '9'; ++ch) {
      CASE_SENS_NAME_CHARS[ch] = CASE_INSENS_NAME_CHARS[ch] = true;
    }
    for (char ch = 'a'; ch <= 'z'; ++ch) {
      CASE_SENS_NAME_CHARS[ch] = CASE_INSENS_NAME_CHARS[ch] = true;
    }
    for (char ch = 'A'; ch <= 'Z'; ++ch) {
      CASE_SENS_NAME_CHARS[ch] = true;
    }
  }

  private String emitLocalName(String name, boolean isHtml) {
    // speed up common case where we already have lower-cased letters and
    // digits.
    boolean[] simple = isHtml ? CASE_INSENS_NAME_CHARS : CASE_SENS_NAME_CHARS;
    for (int i = 0, n = name.length(); i < n; ++i) {
      char ch = name.charAt(i);
      if (ch > 'z' || !simple[ch]) {
        if (isHtml) { name = Strings.lower(name); }
        Escaping.escapeXml(name, true, out);
        return name;
      }
    }
    out.append(name);
    return name;
  }

  private static boolean isAlphaNumericId(String s) {
    if (s == null) { return false; }
    int n = s.length();
    if (n == 0) { return false; }
    char ch0 = s.charAt(0);
    if (!(('A' <= ch0 && ch0 <= 'Z') || ('a' <= ch0 && ch0 <= 'z'))) {
      return false;
    }
    for (int i = 1; i < n; ++i) {
      char ch = s.charAt(i);
      if (ch > 'z' || !CASE_SENS_NAME_CHARS[ch]) { return false; }
    }
    return true;
  }

  /**
   * Check that the content of a CDATA element does not contain a close tag
   * for that element or unbalanced escaping text spans.
   *
   * @return -1 if the content is safe, or the start index of a close tag or
   *     escaping text span boundary otherwise.
   */
  private static int checkHtmlCdataCloseable(
      String localName, StringBuilder sb) {
    int escapingTextSpanStart = -1;
    for (int i = 0, n = sb.length(); i < n; ++i) {
      char ch = sb.charAt(i);
      // NUL chars tend to confuse parsers
      if (ch == '\u0000') {
        return i;
      }
      switch (ch) {
        case '<':
          if (i + 3 < n
              && '!' == sb.charAt(i + 1)
              && '-' == sb.charAt(i + 2)
              && '-' == sb.charAt(i + 3)) {
            if (escapingTextSpanStart == -1) {
              escapingTextSpanStart = i;
            } else {
              return escapingTextSpanStart;
            }
          } else if (i + 1 + localName.length() < n
                     && '/' == sb.charAt(i + 1)
                     && Strings.eqIgnoreCase(localName,
                         sb.substring(i + 2, i + 2 + localName.length()))) {
            // A close tag contained in the content.
            if (escapingTextSpanStart < 0) {
              // We could try some recovery strategies here.
              // E.g. prepending "/<!--\n" to sb if "script".equals(localName)
              return i;
            }
            if (!"script".equals(localName)) {
              // Script tags are commonly included inside script tags.
              // <script><!--document.write('<script>f()</script>');--></script>
              // but this does not happen in other CDATA element types.
              // Actually allowing an end tag inside others is problematic.
              // Specifically,
              // <style><!--</style>-->/* foo */</style>
              // displays the text "/* foo */" on some browsers.
              return i;
            }
          }
          break;
        case '>':
          // From the HTML5 spec:
          //    The text in style, script, title, and textarea elements must not
          //    have an escaping text span start that is not followed by an
          //    escaping text span end.
          // We look left since the HTML 5 spec allows the escaping text span
          // end to share dashes with the start.
          if (i >= 2 && '-' == sb.charAt(i - 1) && '-' == sb.charAt(i - 2)) {
            if (escapingTextSpanStart < 0) { return i - 2; }
            escapingTextSpanStart = -1;
          }
          break;
      }
    }
    if (escapingTextSpanStart >= 0) {
      // We could try recovery strategies here.
      // E.g. appending "//-->" to the buffer if "script".equals(localName)
      return escapingTextSpanStart;
    }
    return -1;
  }

  /** As defined in section 2.6 of XML version 5. */
  private static boolean isName(String s) {
    int n = s.length();
    if (n == 0) { return false; }
    if (!NAME_START_CHARS.contains(s.codePointAt(0))) { return false; }

    for (int i = 1; i < n; ++i) {
      if (!NAME_CHARS.contains(s.charAt(i))) { return false; }
    }
    return true;
  }

  /**
   * From http://www.w3.org/TR/2008/REC-xml-20081126/#NT-NameStartChar
   * <pre>
   * NameStartChar     ::=      ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6]
   *     | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF]
   *     | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF]
   *     | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
   * </pre>
   */
  private static final SparseBitSet NAME_START_CHARS = SparseBitSet.withRanges(
      0x3a, 0x3b, 0x41, 0x5b, 0x5f, 0x60, 0x61, 0x7b, 0xc0, 0xd7, 0xd8, 0xf7,
      0x2ff, 0x300, 0x370, 0x37e, 0x37f, 0x2000, 0x200c, 0x200e, 0x2070, 0x2190,
      0x2c00, 0x2ff0, 0x3001, 0xd800, 0xf900, 0xfdd0, 0xfdf0, 0xfffe,
      0x10000, 0xf0000);

  /**
   * From http://www.w3.org/TR/2008/REC-xml-20081126/#NT-NameChar
   * <pre>
   * NameChar      ::=      NameStartChar | "-" | "." | [0-9] | #xB7
   *     | [#x0300-#x036F] | [#x203F-#x2040]
   * </pre>
   */
  private static final SparseBitSet NAME_CHARS = SparseBitSet.withRanges(
      0x2d, 0x2f, 0x30, 0x3b, 0x41, 0x5b, 0x5f, 0x60, 0x61, 0x7b, 0xb7, 0xb8,
      0xc0, 0xd7, 0xd8, 0xf7, 0x2ff, 0x37e, 0x37f, 0x2000, 0x200c, 0x200e,
      0x203f, 0x2041, 0x2070, 0x2190, 0x2c00, 0x2ff0, 0x3001, 0xd800,
      0xf900, 0xfdd0, 0xfdf0, 0xfffe, 0x10000, 0xf0000);
}

final class BooleanAttrs {
  /**
   * The set of HTML4.01 attributes that have the sole value {@code (<name>)}
   * where {@code <name>} is the attribute name and that are #IMPLIED.
   * @see <a href="http://www.w3.org/TR/html401/index/attributes.html">
   *    the HTML4.01 attributes index</a>
   */
  private static final Set<String> BOOLEAN_ATTR_NAMES = ImmutableSet.of(
      "checked", "compact", "declare", "defer", "disabled", "ismap", "multiple",
      "nohref", "noresize", "noshade", "nowrap", "readonly", "selected");

  // http://www.w3.org/TR/html401/index/attributes.html
  static boolean isBooleanAttr(String htmlAttrLocalName) {
    return BOOLEAN_ATTR_NAMES.contains(htmlAttrLocalName);
  }
}
TOP

Related Classes of com.google.caja.parser.html.BooleanAttrs

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.