Package com.google.caja.lang.css

Source Code of com.google.caja.lang.css.CssPropertyPatterns$Inspector

// Copyright (C) 2008 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.caja.lang.css;

import com.google.caja.SomethingWidgyHappenedError;
import com.google.caja.config.AllowedFileResolver;
import com.google.caja.config.ConfigUtil;
import com.google.caja.config.ImportResolver;
import com.google.caja.lexer.CssLexer;
import com.google.caja.lexer.FilePosition;
import com.google.caja.lexer.InputSource;
import com.google.caja.lexer.ParseException;
import com.google.caja.lexer.TokenConsumer;
import com.google.caja.parser.ParseTreeNode;
import com.google.caja.parser.css.CssPropertySignature;
import com.google.caja.parser.js.ArrayConstructor;
import com.google.caja.parser.js.Declaration;
import com.google.caja.parser.js.Expression;
import com.google.caja.parser.js.Identifier;
import com.google.caja.parser.js.IntegerLiteral;
import com.google.caja.parser.js.MultiDeclaration;
import com.google.caja.parser.js.ObjectConstructor;
import com.google.caja.parser.js.Operation;
import com.google.caja.parser.js.Operator;
import com.google.caja.parser.js.RegexpLiteral;
import com.google.caja.parser.js.Statement;
import com.google.caja.parser.js.StringLiteral;
import com.google.caja.parser.js.ValueProperty;
import com.google.caja.parser.quasiliteral.QuasiBuilder;
import com.google.caja.plugin.LinkStyleWhitelist;
import com.google.caja.reporting.EchoingMessageQueue;
import com.google.caja.reporting.MessageContext;
import com.google.caja.reporting.MessageQueue;
import com.google.caja.reporting.RenderContext;
import com.google.caja.reporting.SimpleMessageQueue;
import com.google.caja.tools.BuildCommand;
import com.google.caja.util.Bag;
import com.google.caja.util.Charsets;
import com.google.caja.util.Lists;
import com.google.caja.util.Maps;
import com.google.caja.util.Name;
import com.google.caja.util.Pair;
import com.google.caja.util.Sets;
import com.google.caja.util.Strings;
import com.google.common.collect.ImmutableMap;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Writer;

import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;

import javax.annotation.Nullable;

/**
* Operates on CSS property signatures to come up with a simple regular
* expression that validates values where possible.
*
* <p>
* This class produces a javascript file like<pre>
*   var CSS_PROP_BIT_x = ...;
*   // Sets of allowed literal tokens.
*   var CSS_LIT_GROUP = [["auto",...],...];
*   var CSS_REGEX = [/^...$/];
*   var cssSchema = {
*     "float": {
*       // Describe the kinds of tokens that can appear in the named
*       // property's value and any additional restrictions.
*       cssPropBits: CSS_PROP_BIT_x | CSS_PROP_BIT_y | ...,
*       // Aliases for the named property.
*       cssAlternates: ["cssFloat", "styleFloat"],
*       // Groups of literal values allowed.
*       cssLitGroup: [CSS_LIT_GROUP[1],CSS_LIT_GROUP[3],CSS_LIT_GROUP[16]],
*       // Matches any other tokens that can be part of the value including
*       // function calls like rgba(...).
*       cssExtra: CSS_REGEX[2]
*     },
*     ...
*   };
* </pre>
*
* <p>
* The {@code css} map does not contain every property in the given
* {@link CssSchema} since some cannot be matched efficiently by values.
* See comments on {@code [ a || b ]*} constructs inline below.
*
* <h3>Program Flow</h3>
* <p>
* This class examines a schema and builds a list of all allowed CSS properties.
* It then tries to convert each property's signature to a regular expression
* pattern.
* It may fail for some patterns, especially the aggregate ones like
* {@code background} which combine
* {@code background-image}, {@code background-style}, etc.
* <p>
* Next it optimizes the patterns it found.  This includes flattening
* concatenation and union operators, and moving the {@code \s+} out of unions.
* Optimizing {@code /((blue\s+|red\s+|green\s+)|inherit\s+)/} might yield the
* simpler expression {@code /(blue|red|green|inherit)\s+/}.
* <p>
* Once it has a mapping of property names to regular expressions it builds a
* constant pool by hashing on regular expression text.
* This allows properties with identical patterns such as {@code border-top} and
* {@code border-bottom} to share an instance.
* <p>
* Finally it builds a javascript parse tree that assigns the {@code css}
* namespace to an object whose keys are CSS property names, and whose
* values are regular expressions.
*
* <h3>Caveats</h3>
* <p>
* Some of the regular expressions do match URLs.  If valid css text contains
* the string 'uri' case-insensitively, then a client may need to extract and
* rewrite URLs.  Since all strings are double quoted, this should be doable
* without lexing CSS.
*
* @author mikesamuel@gmail.com
*/
public class CssPropertyPatterns {
  private final CssSchema schema;

  public CssPropertyPatterns(CssSchema schema) {
    this.schema = schema;
  }

  static class CssPropertyData {
    final @Nullable String regex;
    final EnumSet<CssPropBit> properties;
    final Set<String> literals;

    CssPropertyData(
        @Nullable String regex, EnumSet<CssPropBit> properties,
        Set<String> literals) {
      this.regex = regex;
      this.properties = properties;
      this.literals = literals;
    }
  }

  /**
   * Generates a regular expression for the given signature if a simple
   * regular expression exists.
   * @return null if no simple regular expression exists
   *     or the text of a JavaScript regular expression like "/foo\s+/i"
   *     that matches values of the given value with one or more trailing
   *     whitespace characters.
   *     If the color property only matched the literal "blue", the resulting
   *     pattern would match "blue ".
   */
  public CssPropertyData cssPropertyToPattern(
      CssPropertySignature sig, boolean complete) {
    return new Inspector(complete).cssPropertyToPattern(sig);
  }

  public Pattern cssPropertyToJavaRegex(CssPropertySignature sig) {
    JSRE p = new Inspector(true).inspect(sig);
    if (p == null) { return null; }
    StringBuilder out = new StringBuilder();
    p.render(out);
    // Since all keywords are case insensitive and we never match text inside
    // strings.
    return Pattern.compile("" + out, Pattern.CASE_INSENSITIVE);
  }

  private static class JSREBuilder {
    final JSRE p;
    final boolean identBefore;

    JSREBuilder(boolean identBefore, JSRE p) {
      assert p != null;
      this.identBefore = identBefore;
      this.p = p;
    }
  }

  private static final JSRE SPACES = JSRE.many(JSRE.raw("\\s"));
  private static final JSRE OPT_SPACES = JSRE.any(JSRE.raw("\\s"));
  // When rendering to be used as part of the client side JS parser, we can
  // rely on all CSS comments and space tokens to have been reduced to a single
  // space character.
  private static final JSRE SPACES_NORMALIZED = JSRE.many(JSRE.raw(" "));
  private static final JSRE OPT_SPACES_NORMALIZED = JSRE.any(SPACES_NORMALIZED);

  private static boolean isIdentChar(char ch) {
    return CssLexer.isNmStart(ch) || ('0' <= ch && ch <= '9')
        || ch == '#' || ch == '.';
  }

  private static final Name COLOR = Name.css("color");
  private static final Name STANDARD_COLOR = Name.css("color-standard");

  private class Inspector {
    final EnumSet<CssPropBit> props;
    final Set<String> literals;
    /**
     * True to include literals and patterns inferrable from props in the regex
     * so as to produce a complete regular expression instead of one focused
     * on just matching the portions of a normalized tokens set not matched
     * by property bits, or literal groups.
     */
    final boolean complete;
    private final Bag<String> refsUsed;

    final JSRE spaces, optSpaces;

    Inspector(boolean complete) {
      this(complete, Bag.<String>newHashBag());
    }

    private Inspector(boolean complete, Bag<String> refsUsed) {
      this.props = EnumSet.noneOf(CssPropBit.class);
      this.literals = Sets.newHashSet();
      this.complete = complete;
      this.refsUsed = refsUsed;
      this.spaces = complete ? SPACES : SPACES_NORMALIZED;
      this.optSpaces = complete ? OPT_SPACES : OPT_SPACES_NORMALIZED;
    }

    /**
     * Generates a regular expression for the given signature if a simple
     * regular expression exists.
     * @return null if no simple regular expression exists
     *     or the text of a JavaScript regular expression like "/^foo$/i"
     *     that matches values of the given value.
     *     If the color property only matched the literal "blue", the resulting
     *     pattern would match "blue ".
     */
    CssPropertyData cssPropertyToPattern(CssPropertySignature sig) {
      JSRE p = inspect(sig);
      String regex = null;
      if (p != null) {
        StringBuilder out = new StringBuilder();
        out.append('/');
        p.render(out);
        // Since all keywords are case insensitive and we never match text
        // inside strings.
        out.append("/i");
        regex = out.toString();
      }
      return new CssPropertyData(
          regex, EnumSet.copyOf(props), Sets.newHashSet(literals));
    }

    JSRE inspect(CssPropertySignature sig) {
      this.literals.clear();
      this.props.clear();
      JSREBuilder b = sigToPattern(false, sig);
      if (b == null) { return null; }
      // TODO(mikesamuel): are the optSpaces really necessary?
      return JSRE.cat(JSRE.raw("^"), optSpaces, b.p, optSpaces, JSRE.raw("$"))
          .optimize();
    }

    private JSREBuilder sigToPattern(
        boolean identBefore, CssPropertySignature sig) {
      // Dispatch to a set of handlers that either append balanced content to
      // out, or append cruft and return null.
      if (sig instanceof CssPropertySignature.LiteralSignature) {
        return litToPattern(
            identBefore, (CssPropertySignature.LiteralSignature) sig);
      } else if (sig instanceof CssPropertySignature.RepeatedSignature) {
        return repToPattern(
            identBefore, (CssPropertySignature.RepeatedSignature) sig);
      } else if (sig instanceof CssPropertySignature.PropertyRefSignature) {
        return refToPattern(
            identBefore, (CssPropertySignature.PropertyRefSignature) sig);
      } else if (sig instanceof CssPropertySignature.SeriesSignature) {
        return seriesToPattern(
            identBefore, (CssPropertySignature.SeriesSignature) sig);
      } else if (sig instanceof CssPropertySignature.SymbolSignature) {
        return symbolToPattern(
            identBefore, (CssPropertySignature.SymbolSignature) sig);
      } else if (sig instanceof CssPropertySignature.SetSignature
                 || sig instanceof CssPropertySignature.ExclusiveSetSignature) {
        return setToPattern(identBefore, sig);
      } else if (sig instanceof CssPropertySignature.CallSignature) {
        return callToPattern(
            identBefore, (CssPropertySignature.CallSignature) sig);
      }
      return null;
    }

    private JSREBuilder litToPattern(
        boolean identBefore, CssPropertySignature.LiteralSignature lit) {
      String litValue = lit.getValue();
      // Match some trailing whitespace.
      // Since some patterns can match nothing (e.g. foo*), we make sure that
      // all positive matches are followed by token-breaking space.
      // The pattern as a whole can then be matched against the value with one
      // space added at the end.
      literals.add(litValue);
      if (!complete) { return null; }
      boolean ident = isIdentChar(litValue.charAt(litValue.length() - 1));
      JSRE p = JSRE.lit(litValue);
      if (p == null) { return null; }
      return new JSREBuilder(
          ident, JSRE.cat(ident && identBefore ? spaces : optSpaces, p));
    }

    private JSREBuilder repToPattern(
        boolean identBefore, CssPropertySignature.RepeatedSignature sig) {
      CssPropertySignature rep = sig.getRepeatedSignature();
      if (rep instanceof CssPropertySignature.ExclusiveSetSignature) {
        // The spec (http://www.w3.org/TR/REC-CSS1/#css1-properties) defines
        // A double bar (A || B) means that either A or B or both must occur
        // in any order.
        // We convert [ a || b ] -> [a | b]+
        // So according to the spec,
        //    a
        //    a b
        //    b
        //    b a
        // are all ok, but
        //    a a
        // is not, though it is allowed by our regular expression to avoid
        // exploding the size of the regular expression or requiring complicated
        // backtracking.
        return exclusiveToPattern(identBefore, rep);
      }
      JSREBuilder repeatedPattern = sigToPattern(identBefore, rep);
      if (repeatedPattern == null) { return null; }
      int min = sig.minCount;
      int max = sig.maxCount;
      if (repeatedPattern.identBefore == identBefore || max == 1) {
        return new JSREBuilder(
            repeatedPattern.identBefore, JSRE.rep(repeatedPattern.p, min, max));
      } else {
        JSREBuilder tail = sigToPattern(repeatedPattern.identBefore, rep);
        if (max != Integer.MAX_VALUE) { --max; }
        JSRE spaceSeparated = JSRE.cat(
            repeatedPattern.p, JSRE.rep(tail.p, min != 0 ? min - 1 : 0, max));
        if (min == 0) {
          spaceSeparated = JSRE.opt(spaceSeparated);
        }
        return new JSREBuilder(tail.identBefore, spaceSeparated);
      }
    }

    private JSREBuilder refToPattern(
        boolean identBefore, CssPropertySignature.PropertyRefSignature sig) {
      refsUsed.incr(sig.getPropertyName().getCanonicalForm());
      CssSchema.CssPropertyInfo p = schema.getCssProperty(
          sig.getPropertyName());
      return p != null ? sigToPattern(identBefore, p.sig) : null;
    }

    private JSREBuilder seriesToPattern(
        boolean identBefore, CssPropertySignature.SeriesSignature sig) {
      List<JSRE> children = Lists.newArrayList();
      for (CssPropertySignature child : sig.children()) {
        JSREBuilder b = sigToPattern(identBefore, child);
        if (children != null) {
          if (b != null) {
            children.add(b.p);
            identBefore = b.identBefore;
          } else if (complete) {
            children = null;
          }
        }
      }
      if (children == null) { return null; }
      return new JSREBuilder(identBefore, JSRE.cat(children));
    }

    private JSREBuilder symbolToPattern(
        boolean identBefore, CssPropertySignature.SymbolSignature sig) {
      Name symbolName = sig.getValue();
      refsUsed.incr(symbolName.getCanonicalForm());
      JSRE builtinMatch = builtinToPattern(symbolName);
      if (builtinMatch != null) {
        String re = builtinMatch.toString();
        boolean ident = isIdentChar(re.charAt(0));
        boolean identAfter = isIdentChar(re.charAt(re.length() - 1));
        // TODO(mikesamuel):
        // This whole way of computing identAfter for regular expressions is
        // horribly broken, and the lookup table should just have a bit along
        // with the regex.
        //
        // Felix points out:
        // Aren't ident and identAfter going to be wrong for builtins like
        // <integer>?
        //
        // example, the spec for text-shadow has
        //   <length>{2,4}
        // which gets translated to
        //   <length> (?: \s* <length>){1,4}
        //
        // which means "00" with no spaces will count as two matches of
        // <length>.
        //
        // This seems harmless, so I'm not sure it's worth fixing, but maybe
        // add a note.
        return new JSREBuilder(
            identAfter,
            JSRE.cat(identBefore && ident ? spaces : optSpaces, builtinMatch));
      }
      CssSchema.SymbolInfo s = schema.getSymbol(symbolName);
      if (COLOR.equals(symbolName) && complete) {
        // Don't blow up the regexs by including the entire X11 color set over
        // and over.
        CssSchema.SymbolInfo standard = schema.getSymbol(STANDARD_COLOR);
        if (standard != null) { s = standard; }
      }
      return s != null ? sigToPattern(identBefore, s.sig) : null;
    }

    private JSREBuilder setToPattern(
        boolean identBefore, CssPropertySignature sig) {
      if (sig.children().isEmpty()) { return null; }
      List<JSRE> children = Lists.newArrayList();
      boolean identAfter = false;
      for (CssPropertySignature child : sig.children()) {
        JSREBuilder b = sigToPattern(identBefore, child);
        if (b != null) {
          children.add(b.p);
          identAfter |= b.identBefore;
        }
      }
      if (children.isEmpty()) { return null; }
      return new JSREBuilder(identAfter, JSRE.alt(children));
    }

    private JSREBuilder callToPattern(
        boolean identBefore, CssPropertySignature.CallSignature sig) {
      Iterator<? extends CssPropertySignature> sigs = sig.children().iterator();
      List<JSRE> children = Lists.newArrayList();
      // Use a complete inspector so we get regular expressions for calls in
      // the output because they cannot be matched by other means.  Do not
      // record as literals literals appearing in the call body.
      Inspector inspector = new Inspector(true, refsUsed);
      JSREBuilder fnPattern = inspector.sigToPattern(identBefore, sigs.next());
      if (fnPattern == null) { return null; }
      children.add(fnPattern.p);
      children.add(JSRE.lit("("));
      boolean first = true;
      while (sigs.hasNext()) {
        if (first) {
          first = false;
        } else {
          children.add(JSRE.lit(","));
        }
        JSREBuilder actual = inspector.sigToPattern(false, sigs.next());
        if (actual == null) { return null; }
        children.add(actual.p);
      }
      children.add(optSpaces);
      children.add(JSRE.lit(")"));
      return new JSREBuilder(false, JSRE.cat(children));
    }

    /**
     * (a||b) means (a | b | a b | b a) but we implement it as (a | b){1,2}
     * which admits (a a | b b) too.
     */
    private JSREBuilder exclusiveToPattern(
        boolean identBefore, CssPropertySignature sig) {
      if (sig.children().isEmpty()) { return null; }
      List<JSRE> head = Lists.newArrayList();
      boolean identAfterHead = false;
      for (CssPropertySignature child : sig.children()) {
        JSREBuilder b = sigToPattern(identBefore, child);
        if (b != null) {
          head.add(b.p);
          identAfterHead = b.identBefore;
        }
      }
      List<JSRE> tail = Lists.newArrayList();
      boolean identAfterTail = false;
      for (CssPropertySignature child : sig.children()) {
        JSREBuilder b = sigToPattern(identAfterHead, child);
        if (b != null) {
          tail.add(b.p);
          identAfterTail = b.identBefore;
        }
      }
      if (tail.isEmpty()) { return null; }
      return new JSREBuilder(
          identAfterTail,
          JSRE.cat(JSRE.alt(head),
                   JSRE.rep(JSRE.alt(tail), 0, tail.size() - 1)));
    }

    private JSRE builtinToPattern(Name name) {
      String key = name.getCanonicalForm();
      int colon = key.lastIndexOf(':');
      boolean negative = key.lastIndexOf('-') > colon;
      String baseKey = colon >= 0 ? key.substring(0, colon) : key;
      boolean zIndex = baseKey.equals("z-index");
      CssPropBit b = BUILTIN_PROP_BITS.get(baseKey);
      if (b != null) {
        this.props.add(b);
        // The negative bit allows for some schemas to reject positioning
        // outside the parents' bounding boxes, and negative offsets for clip
        // regions.
        if (b == CssPropBit.QUANTITY && (colon < 0 || negative)) {
          // TODO: maybe tighten this condition
          this.props.add(CssPropBit.NEGATIVE_QUANTITY);
        }
        if (zIndex) {
          this.props.add(CssPropBit.Z_INDEX);
        }
        if (!complete) { return null; }
      }
      JSRE p = BUILTINS.get(key);
      if (p == null && key != baseKey) {
        p = BUILTINS.get(baseKey);
      }
      return p;
    }
  }

  private static final Map<String, CssPropBit> BUILTIN_PROP_BITS
      = ImmutableMap.<String, CssPropBit>builder()
        .put("number", CssPropBit.QUANTITY)
        .put("percentage", CssPropBit.QUANTITY)
        .put("angle", CssPropBit.QUANTITY)
        .put("frequency", CssPropBit.QUANTITY)
        .put("length", CssPropBit.QUANTITY)
        .put("time", CssPropBit.QUANTITY)
        .put("integer", CssPropBit.QUANTITY)
        .put("hex-color", CssPropBit.HASH_VALUE)
        .put("specific-voice", CssPropBit.QSTRING_CONTENT)
        .put("family-name", CssPropBit.QSTRING_CONTENT)
        .put("uri", CssPropBit.QSTRING_URL)
        .put("z-index", CssPropBit.QUANTITY)
        .build();

  private static final Map<String, JSRE> BUILTINS;
  static {
    // http://www.w3.org/TR/REC-CSS2/syndata.html
    JSRE zero = JSRE.lit("0");
    JSRE one = JSRE.lit("1");
    JSRE digit = JSRE.raw("\\d");
    JSRE digits = JSRE.many(digit);
    JSRE dot = JSRE.raw("\\.");
    JSRE fraction = JSRE.cat(dot, digits);
    JSRE pct = JSRE.lit("%");
    JSRE minus = JSRE.lit("-");
    JSRE sign = JSRE.alt(JSRE.lit("+"), minus);
    JSRE unsignedNum = JSRE.cat(digits, JSRE.opt(fraction));
    JSRE signedNum = JSRE.cat(JSRE.opt(sign), unsignedNum);
    JSRE angleUnits = JSRE.alt(
        JSRE.alt(JSRE.lit("rad"), JSRE.lit("grad")),
        JSRE.lit("deg"));
    JSRE freqUnits = JSRE.alt(JSRE.lit("Hz"), JSRE.lit("kHz"));
    JSRE lengthUnits = JSRE.alt(
        JSRE.alt(JSRE.lit("cm"), JSRE.lit("mm"), JSRE.lit("em")),
        JSRE.lit("ex"), JSRE.lit("in"),
        JSRE.alt(JSRE.lit("pc"), JSRE.lit("pt"), JSRE.lit("px")));
    JSRE timeUnits = JSRE.alt(JSRE.lit("ms"), JSRE.lit("s"));
    JSRE quotedIdentifiers = JSRE.raw(
        "\"\\w(?:[\\w-]*\\w)(?:\\s+\\w([\\w-]*\\w))*\"");
    JSRE hash = JSRE.lit("#");
    JSRE hex = JSRE.alt(digit, JSRE.lit("a"), JSRE.lit("b"), JSRE.lit("c"),
                        JSRE.lit("d"), JSRE.lit("e"), JSRE.lit("f"));
    BUILTINS = Maps.<String, JSRE>immutableMap()
        .put("number:0,", JSRE.alt(zero, unsignedNum).optimize())
        .put("number:0,1", JSRE.alt(
            JSRE.cat(zero, JSRE.opt(fraction)),
            fraction,
            JSRE.cat(one, JSRE.opt(JSRE.cat(dot, JSRE.many(zero)))))
            .optimize())
        .put("number", JSRE.alt(zero, signedNum).optimize())
        .put("percentage:0,", JSRE.alt(zero, JSRE.cat(unsignedNum, pct))
             .optimize())
        .put("percentage:0,100", JSRE.alt(zero, JSRE.cat(unsignedNum, pct))
             .optimize())
        .put("percentage", JSRE.alt(zero, JSRE.cat(signedNum, pct)).optimize())
        .put("angle:0", JSRE.alt(zero, JSRE.cat(unsignedNum, angleUnits)))
        .put("angle", JSRE.alt(zero, JSRE.cat(signedNum, angleUnits)))
        .put("frequency", JSRE.alt(zero, JSRE.cat(unsignedNum, freqUnits)))
        .put("length:0,", JSRE.alt(zero, JSRE.cat(unsignedNum, lengthUnits)))
        .put("length", JSRE.alt(zero, JSRE.cat(signedNum, lengthUnits)))
        .put("time:0,", JSRE.alt(zero, JSRE.cat(unsignedNum, timeUnits)))
        .put("time", JSRE.alt(zero, JSRE.cat(signedNum, timeUnits)))
        .put("integer", JSRE.cat(JSRE.opt(minus), digits))
        .put("integer:0,", digits)
        .put("integer:0,255", digits)
        .put("hex-color", JSRE.cat(hash, JSRE.rep(JSRE.rep(hex, 3, 3), 1, 2)))
        .put("specific-voice", quotedIdentifiers)
        .put("family-name", quotedIdentifiers)
        .put("uri", JSRE.cat(
            JSRE.lit("url(\""),
            JSRE.many(JSRE.raw("[^()\\\\\"\\r\\n]")),
            JSRE.lit("\")")))
        .put("z-index:-9999999,9999999", JSRE.raw("-?\\d{1,7}"))
        .create();
  }

  public static void generatePatterns(CssSchema schema, Appendable out)
      throws IOException {
    FilePosition unk = FilePosition.UNKNOWN;
    CssPropertyPatterns pp = new CssPropertyPatterns(schema);
    List<CssSchema.CssPropertyInfo> props
        = Lists.newArrayList(schema.getCssProperties());
    Collections.sort(
        props, new Comparator<CssSchema.CssPropertyInfo>() {
          public int compare(CssSchema.CssPropertyInfo a,
                             CssSchema.CssPropertyInfo b) {
            return a.name.compareTo(b.name);
          }
        });
    Set<String> commonSubstrings = Sets.newLinkedHashSet();
    commonSubstrings.add("|left|center|right");
    commonSubstrings.add("|top|center|bottom");
    Inspector inspector = pp.new Inspector(false);
    // Seed with some known constant strings.
    for (Name commonSymbol : new Name[] {
        // Derived by counting symbol names into a bag in symbolToPattern above.
        COLOR, Name.css("standard-color"),
        Name.css("length"), Name.css("length:0,"),
        Name.css("border-style"), Name.css("border-width"),
        Name.css("bg-position"), Name.css("bg-size"),
        Name.css("percentage"), Name.css("percentage:0,"),
        Name.css("uri"), Name.css("repeat-style") }) {
      CssPropertySignature.SymbolSignature sig
          = (CssPropertySignature.SymbolSignature)
            CssPropertySignature.Parser.parseSignature(
                "<" + commonSymbol + ">");
      JSREBuilder p = inspector.symbolToPattern(false, sig);
      if (p != null) {
        String commonSubstring = withoutSpacesOrZero(p.p.optimize()).toString();
        if (commonSubstring.length() != 0) {
          commonSubstrings.add(commonSubstring);
        }
      }
    }
    Map<String, int[]> regexPoolMap = Maps.newHashMap();
    Map<String, Integer> commonSubstringMap = Maps.newLinkedHashMap();
    List<Pair<CssSchema.CssPropertyInfo, CssPropertyData>> propData
        = Lists.newArrayList();
    List<Expression> stringPool = Lists.newArrayList();
    List<Expression> regexPool = Lists.newArrayList();

    for (CssSchema.CssPropertyInfo prop : props) {
      if (!schema.isPropertyAllowed(prop.name)) { continue; }
      propData.add(Pair.pair(prop, inspector.cssPropertyToPattern(prop.sig)));
    }

    for (String s : commonSubstrings) {
      int n = 0;
      for (Pair<CssSchema.CssPropertyInfo, CssPropertyData> p : propData) {
        String pattern = p.b.regex;
        if (pattern == null) { continue; }
        for (int index = -1; (index = pattern.indexOf(s, index + s.length())) >= 0;) {
          ++n;
        }
      }
      if (n > 1) {
        int poolIndx = stringPool.size();
        stringPool.add(StringLiteral.valueOf(unk, s));
        commonSubstringMap.put(s, poolIndx);
      }
    }

    for (Pair<CssSchema.CssPropertyInfo, CssPropertyData> p : propData) {
      String pattern = p.b.regex;
      if (pattern == null) { continue; }
      // Keep track of which patterns appear more than once so we can use
      // a constant pool.
      int[] pool = regexPoolMap.get(pattern);
      if (pool == null) {
        regexPoolMap.put(pattern, new int[] { -1 });
      } else if (pool[0] == -1) {
        pool[0] = regexPool.size();
        regexPool.add(makeRegexp(commonSubstringMap, pattern));
      }
    }

    Statement poolDecls = null;
    if (!stringPool.isEmpty()) {
      poolDecls = joinDeclarations(
          poolDecls,
          new Declaration(unk, new Identifier(unk, "s"),
              new ArrayConstructor(unk, stringPool)));
    }
    if (!regexPool.isEmpty()) {
      poolDecls = joinDeclarations(
          poolDecls,
          new Declaration(unk, new Identifier(unk, "c"),
              new ArrayConstructor(unk, regexPool)));
    }

    // Given keyword sets like
    // [['red','blue','green','transparent','inherit',;none'],
    //  ['red','blue','green'],
    //  ['inherit','none','bold','bolder']]
    // recognize that ['red','blue','green'] probably occurs frequently and
    // create a partition like
    // [['red','blue','green'],['bold','bolder'],['inherit',none'],
    //  ['transparent']]
    // and then store indices into the array of partition elements with
    // CSS property names so they can be unioned as needed.
    List<Set<String>> literalSets = Lists.newArrayList();
    for (Pair<CssSchema.CssPropertyInfo, CssPropertyData> p : propData) {
      literalSets.add(p.b.literals);
    }
    Partitions.Partition<String> litPartition = Partitions.partition(
        literalSets, String.class, null);
    List<ArrayConstructor> literalSetArrs = Lists.newArrayList();
    for (int[] literalIndices : litPartition.partition) {
      List<StringLiteral> literalArr = Lists.newArrayList();
      for (int litIndex : literalIndices) {
        literalArr.add(StringLiteral.valueOf(
            unk, litPartition.universe[litIndex]));
      }
      literalSetArrs.add(new ArrayConstructor(unk, literalArr));
    }
    if (!literalSetArrs.isEmpty()) {
      poolDecls = joinDeclarations(
          poolDecls,
          new Declaration(unk, new Identifier(unk, "L"),
              new ArrayConstructor(unk, literalSetArrs)));
    }

    List<ValueProperty> cssSchemaProps = Lists.newArrayList();
    StringLiteral regexObjKey = new StringLiteral(unk, "cssExtra");
    StringLiteral alternatesObjKey = new StringLiteral(unk, "cssAlternates");
    StringLiteral propbitsObjKey = new StringLiteral(unk, "cssPropBits");
    StringLiteral litgroupObjKey = new StringLiteral(unk, "cssLitGroup");

    for (int propIndex = 0, n = propData.size(); propIndex < n; ++propIndex) {
      Pair<CssSchema.CssPropertyInfo, CssPropertyData> d
          = propData.get(propIndex);
      CssSchema.CssPropertyInfo prop = d.a;
      CssPropertyData data = d.b;

      ObjectConstructor dataObj = new ObjectConstructor(unk);

      String regex = data.regex;
      if (regex != null) {
        int poolIndex = regexPoolMap.get(regex)[0];
        Expression re = poolIndex < 0
            ? makeRegexp(commonSubstringMap, regex)
            : (Expression) QuasiBuilder.substV(
                "c[@i]", "i", new IntegerLiteral(unk, poolIndex));
        dataObj.appendChild(new ValueProperty(regexObjKey, re));
      }

      String dom2property = propertyNameToDom2Property(prop.name);
      ArrayConstructor altNames = null;
      for (String altDom2Property : prop.dom2properties) {
        if (altDom2Property.equals(dom2property)) { continue; }
        if (altNames == null) {
          altNames = new ArrayConstructor(
              unk, Collections.<Expression>emptyList());
        }
        altNames.appendChild(StringLiteral.valueOf(unk, altDom2Property));
      }
      if (altNames != null) {
        dataObj.appendChild(new ValueProperty(alternatesObjKey, altNames));
      }

      cssSchemaProps.add(new ValueProperty(
          unk, StringLiteral.valueOf(unk, prop.name.getCanonicalForm()),
          dataObj));

      int propBits = 0;
      for (CssPropBit b : data.properties) {
        propBits |= b.jsValue;
      }
      if (LinkStyleWhitelist.HISTORY_INSENSITIVE_STYLE_WHITELIST
          .contains(prop.name)) {
        propBits |= CssPropBit.HISTORY_INSENSITIVE.jsValue;
      } else if (LinkStyleWhitelist.PROPERTIES_ALLOWED_IN_LINK_CLASSES
                 .contains(prop.name)) {
        propBits |= CssPropBit.ALLOWED_IN_LINK.jsValue;
      }
      dataObj.appendChild(
          new ValueProperty(propbitsObjKey, new IntegerLiteral(unk, propBits)));

      List<Expression> litGroups = Lists.newArrayList();
      for (int groupIndex : litPartition.unions[propIndex]) {
        litGroups.add((Expression) QuasiBuilder.substV(
            "L[@i]", "i", new IntegerLiteral(unk, groupIndex)));
      }
      if (!litGroups.isEmpty()) {
        dataObj.appendChild(new ValueProperty(
            litgroupObjKey, new ArrayConstructor(unk, litGroups)));
      }
    }

    ObjectConstructor cssSchema = new ObjectConstructor(unk, cssSchemaProps);

    ParseTreeNode js = QuasiBuilder.substV(
        ""
        + "var cssSchema = (function () {"
        + "  @poolDecls?;"
        + "  return @cssSchema;"
        + "})();",
        "poolDecls", poolDecls,
        "cssSchema", cssSchema);
    TokenConsumer tc = js.makeRenderer(out, null);
    js.render(new RenderContext(tc));
    tc.noMoreTokens();
    out.append(";\n");
  }

  private static Expression makeRegexp(
      Map<String, Integer> commonSubstrings, String regex) {
    FilePosition unk = FilePosition.UNKNOWN;
    List<Pair<String, Expression>> substrings = Lists.newArrayList();
    for (Map.Entry<String, Integer> e : commonSubstrings.entrySet()) {
      substrings.add(Pair.pair(e.getKey(), (Expression) QuasiBuilder.substV(
          "s[@i]", "i", new IntegerLiteral(unk, e.getValue()))));
    }
    List<Expression> parts = Lists.newArrayList();
    assert regex.startsWith("/") && regex.endsWith("/i");
    String pattern = regex.substring(1, regex.length() - 2);
    makeRegexpOnto(substrings, pattern, 0, parts);
    if (parts.size() == 1 && parts.get(0) instanceof StringLiteral) {
      return new RegexpLiteral(unk, regex);
    } else {
      Expression e = parts.get(0);
      for (int i = 1, n = parts.size(); i < n; ++i) {
        e = Operation.createInfix(Operator.ADDITION, e, parts.get(i));
      }
      return (Expression) QuasiBuilder.substV(
          "RegExp(@pattern, 'i')", "pattern", e);
    }
  }

  private static void makeRegexpOnto(
      List<Pair<String, Expression>> strs, String pattern, int index,
      List<Expression> parts) {
    if ("".equals(pattern)) { return; }
    for (int n = strs.size(); index < n; ++index) {
      Pair<String, Expression> commonString = strs.get(index);
      String s = commonString.a;
      int pos = pattern.indexOf(s);
      if (pos >= 0) {
        makeRegexpOnto(strs, pattern.substring(0, pos), index + 1, parts);
        parts.add(commonString.b);
        makeRegexpOnto(strs, pattern.substring(pos + s.length()), index, parts);
        return;
      }
    }
    parts.add(StringLiteral.valueOf(FilePosition.UNKNOWN, pattern));
  }

  private static Statement joinDeclarations(
      @Nullable Statement decl, Declaration d) {
    if (decl == null) { return d; }
    if (decl instanceof Declaration) {
      decl = new MultiDeclaration(
          FilePosition.UNKNOWN, Arrays.asList((Declaration) decl));
    }
    ((MultiDeclaration) decl).appendChild(d);
    return decl;
  }

  /**
   * Spaces and zero tend to get moved/merged frequently during
   * regex optimization so don't consider them when doing common substring
   * matching.
   */
  private static JSRE withoutSpacesOrZero(JSRE p) {
    if (p instanceof JSRE.Atom) {
      String s = ((JSRE.Atom) p).atom;
      return ("0".equals(s) || "\\s".equals(s)) ? null : p;
    } else if (p instanceof JSRE.Concatenation) {
      JSRE.Concatenation c = (JSRE.Concatenation) p;
      List<JSRE> children = Lists.newArrayList(c.children);
      while (!children.isEmpty()) {
        JSRE p0 = children.get(0);
        JSRE p0w = withoutSpacesOrZero(p0);
        if (p0w == null) {
          children.remove(0);
        } else {
          children.set(0, p0w);
          break;
        }
      }
      if (children.isEmpty()) { return null; }
      return JSRE.cat(children).optimize();
    } else if (p instanceof JSRE.Alternation) {
      List<JSRE> children = ((JSRE.Alternation) p).children;
      List<JSRE> childrenNew = Lists.newArrayList();
      for (JSRE child : children) {
        JSRE wo = withoutSpacesOrZero(child);
        if (wo != null) { childrenNew.add(wo); }
      }
      if (childrenNew.isEmpty()) { return null; }
      return JSRE.alt(childrenNew);
    } else if (p instanceof JSRE.Repetition) {
      JSRE.Repetition rep = (JSRE.Repetition) p;
      JSRE body = withoutSpacesOrZero(rep.body);
      return body != null
          ? JSRE.rep(body, rep.min, rep.max).optimize()
          : null;
    }
    return p;
  }

  public static class Builder implements BuildCommand {
    public boolean build(List<File> inputs, List<File> deps,
                         Map<String, Object> options, File output)
        throws IOException {
      File symbolsAndPropertiesFile = null;
      File functionsFile = null;
      for (File input : inputs) {
        if (input.getName().endsWith(".json")) {
          if (symbolsAndPropertiesFile == null) {
            symbolsAndPropertiesFile = input;
          } else if (functionsFile == null) {
            functionsFile = input;
          } else {
            throw new IOException("Unused input " + input);
          }
        }
      }
      if (symbolsAndPropertiesFile == null) {
        throw new IOException("No JSON whitelist for CSS Symbols + Properties");
      }
      if (functionsFile == null) {
        throw new IOException("No JSON whitelist for CSS Functions");
      }

      FilePosition sps = FilePosition.startOfFile(new InputSource(
          symbolsAndPropertiesFile.getAbsoluteFile().toURI()));
      FilePosition fns = FilePosition.startOfFile(new InputSource(
          functionsFile.getAbsoluteFile().toURI()));

      MessageContext mc = new MessageContext();
      mc.addInputSource(sps.source());
      mc.addInputSource(fns.source());
      MessageQueue mq = new EchoingMessageQueue(
          new PrintWriter(new OutputStreamWriter(System.err), true), mc, false);

      Set<File> inputsAndDeps = Sets.newHashSet();
      for (File f : inputs) { inputsAndDeps.add(f.getAbsoluteFile()); }
      for (File f : deps) { inputsAndDeps.add(f.getAbsoluteFile()); }

      ImportResolver resolver = new AllowedFileResolver(inputsAndDeps);

      CssSchema schema;
      try {
        schema = new CssSchema(
            ConfigUtil.loadWhiteListFromJson(
                sps.source().getUri(), resolver, mq),
            ConfigUtil.loadWhiteListFromJson(
                fns.source().getUri(), resolver, mq));
      } catch (ParseException ex) {
        ex.toMessageQueue(mq);
        throw (IOException) new IOException("Failed to parse schema")
            .initCause(ex);
      }

      Writer out = new OutputStreamWriter(
          new FileOutputStream(output), Charsets.UTF_8.name());
      try {
        String currentDate = "" + new Date();
        if (currentDate.indexOf("*/") >= 0) {
          throw new SomethingWidgyHappenedError("Date should not contain '*/'");
        }
        out.write("/* Copyright Google Inc.\n");
        out.write(" * Licensed under the Apache Licence Version 2.0\n");
        out.write(" * Autogenerated at " + currentDate + "\n");
        out.write(" * \\@overrides window\n");
        out.write(" * \\@provides cssSchema");
        for (CssPropBit b : CssPropBit.values()) {
          out.write(", CSS_PROP_BIT_");
          out.write(Strings.upper(b.name()));
        }
        out.write(" */\n");
        for (CssPropBit b : CssPropBit.values()) {
          out.write("/**\n * @const\n * @type {number}\n */\n");
          out.write("var CSS_PROP_BIT_");
          out.write(Strings.upper(b.name()));
          out.write(" = ");
          out.write(String.valueOf(b.jsValue));
          out.write(";\n");
        }
        generatePatterns(schema, out);
        out.write("if (typeof window !== 'undefined') {\n");
        out.write("  window['cssSchema'] = cssSchema;\n");
        out.write("}\n");
      } finally {
        out.close();
      }
      return true;
    }
  }

  /**
   * Converts a css property name to a javascript identifier, e.g.
   * {@code background-color} => {@code backgroundColor}.
   */
  static String propertyNameToDom2Property(Name cssPropertyName) {
    String lcaseDashed = cssPropertyName.getCanonicalForm();
    int dash = lcaseDashed.indexOf('-');
    if (dash < 0) { return lcaseDashed; }
    StringBuilder sb = new StringBuilder(lcaseDashed.length());
    int written = 0;
    do {
      sb.append(lcaseDashed, written, dash);
      written = dash + 1;
      if (written < lcaseDashed.length()) {
        sb.append(Strings.upper(
            lcaseDashed.substring(written, written + 1)));
        ++written;
      }
      dash = lcaseDashed.indexOf('-', written);
    } while (dash >= 0);
    sb.append(lcaseDashed, written, lcaseDashed.length());
    return sb.toString();
  }

  public static void main(String[] args) throws IOException {
    CssSchema schema = CssSchema.getDefaultCss21Schema(
        new SimpleMessageQueue());
    generatePatterns(schema, System.out);
  }
}
TOP

Related Classes of com.google.caja.lang.css.CssPropertyPatterns$Inspector

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.