Source Code of com.google.caja.lexer.CssSplitter

// Copyright (C) 2006 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


package com.google.caja.lexer;


import com.google.caja.reporting.DevNullMessageQueue;
import com.google.caja.reporting.Message;
import com.google.caja.reporting.MessagePart;
import com.google.caja.reporting.MessageQueue;
import com.google.caja.reporting.MessageType;
import com.google.caja.util.Lists;
import com.google.caja.util.Strings;


import java.util.LinkedList;
import java.util.NoSuchElementException;


/**
 * A lexer that recognizes the
 * <a href="http://www.w3.org/TR/CSS21/grammar.html#scanner">CSS 2.1 Grammar</a>
 * plus line comments as interpreted by most browsers.
 * <p>
 * TODO(mikesamuel): CSS2.1 has changed lexical conventions to effectively
 * decode escapes at lex time in most contexts.  E.g., the rule
 * <code>"@import"              IMPORT_SYM</code> now reads
 * <code>@{I}{M}{P}{O}{R}{T}    {return IMPORT_SYM;}</code> and
 * <code>{num}ms                TIME</code> now reads
 * <code>{num}{M}{S}            {return TIME;}</code>.
 *
 * @author mikesamuel@gmail.com
 */
public final class CssLexer implements TokenStream<CssTokenType> {
  private final CssSplitter splitter;
  private final LinkedList<Token<CssTokenType>> pending = Lists.newLinkedList();


  // TODO(mikesamuel): all clients should pass in a proper queue
  public CssLexer(CharProducer cp) {
    this(cp, DevNullMessageQueue.singleton(), false);
  }


  /**
   * @param allowSubstitutions true iff ${...} style substitutions should be
   *   allowed as described at {@link CssTokenType#SUBSTITUTION}
   */
  public CssLexer(
      CharProducer cp, MessageQueue mq, boolean allowSubstitutions) {
    assert null != cp;
    this.splitter = new CssSplitter(cp, mq, allowSubstitutions);
  }


  public boolean hasNext() throws ParseException {
    return !pending.isEmpty() || splitter.hasNext();
  }


  public Token<CssTokenType> next() throws ParseException {
    produce();
    if (null == pending) { throw new NoSuchElementException(); }
    return pending.removeFirst();
  }


  /**
   * True iff ${...} style substitutions should be
   * allowed as described at {@link CssTokenType#SUBSTITUTION}
   * @see #allowSubstitutions(boolean)
   */
  public boolean areSubstitutionsAllowed() {
    return splitter.areSubstitutionsAllowed();
  }


  /**
   * Changes the substitution policy for this lexer.
   * @see #areSubstitutionsAllowed()
   */
  public void allowSubstitutions(boolean allow) {
    splitter.allowSubstitutions(allow);
  }


  /**
   * Decodes escapes in an identifier
   */
  public static String decodeCssIdentifier(CharSequence ident) {
    StringBuilder sb = null;
    int pos = 0;
    for (int i = 0, n = ident.length(); i < n;) {
      if (ident.charAt(i) == '\\') {
        if (sb == null) { sb = new StringBuilder(); }
        sb.append(ident, pos, i);
        int codepoint = 0;
        while (++i < n && isHexChar(ident.charAt(i))) {
          char ch = ident.charAt(i);
          codepoint <<= 4;
          if (ch >= '0' && ch <= '9') {
            codepoint |= ch - '0';
          } else if (ch >= 'a' && ch <= 'f') {
            codepoint |= ch + 10 - 'a';
          } else {
            codepoint |= ch + 10 - 'A';
          }
        }
        sb.appendCodePoint(codepoint < Character.MAX_CODE_POINT
                           ? codepoint
                           : 0xfffd);
        if (i < n && isSpaceChar(ident.charAt(i))) { ++i; }
        pos = i;
      } else {
        ++i;
      }
    }
    if (sb == null) { return ident.toString(); }
    return sb.append(ident, pos, ident.length()).toString();
  }


  /**
   * <pre>
   * nmstart    [_a-z]|{nonascii}|{escape}
   * nonascii   [\200-\377]
   * </pre>
   * @return true iff ch is a nmstart and is not an escape.
   *     Call {@link #decodeCssIdentifier} before this
   *     method to figure out whether an escape sequence is a nmstart
   */
  public static boolean isNmStart(char ch) {
    return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
        || (ch >= 0200 && ch <= 0377) || ch == '_';
  }


  /**
   * If the character producer has not been exhausted, ensures that there is a
   * token on pending on pending.
   */
  private void produce() throws ParseException {
    if (!pending.isEmpty()) { return; }
    if (!splitter.hasNext()) { return; }


    Token<CssTokenType> t = splitter.next();
    pending.add(t);
    if (t.type == CssTokenType.PUNCTUATION && splitter.hasNext()) {
      if ("!".equals(t.text)) {  // Join !important
        // IMPORTANT_SYM        "!"({w}|{comment})*{I}{M}{P}{O}{R}{T}{A}{N}{T}
        Token<CssTokenType> t2 = splitter.next();
        while (t2 != null && (t2.type == CssTokenType.SPACE
                              || t2.type == CssTokenType.COMMENT)) {
          pending.add(t2);
          t2 = splitter.hasNext() ? splitter.next() : null;
        }
        // The !important is significant regardless of case and whether or not a
        // letter is hex escaped.
        if (null != t2) {
          pending.add(t2);
          if (t2.type == CssTokenType.IDENT
              && Strings.eqIgnoreCase(
                  "important", decodeCssIdentifier(t2.text))) {
            reduce(CssTokenType.DIRECTIVE);
          }
        }
      } else if ("-".equals(t.text)) {  // Join '-'{nmstart}{nmchar}*
        Token<CssTokenType> t2 = splitter.next();
        if (null != t2) {
          pending.add(t2);
          if (t2.type == CssTokenType.IDENT) {
            reduce(CssTokenType.IDENT);
          }
        }
      }
    }
  }


  /**
   * Reduces the pending tokens to a single token with the given type.
   * For example, if the pending list contains an identifier followed by an
   * open parenthesis, then it can be reduced to a single function token.
   * This is necessitated by CSS2's odd lexical convention which classifies as
   * single tokens things that most other languages treat as sequences of
   * primitive tokens.
   * <p>
   * Modifies the pending list in place.
   */
  private void reduce(CssTokenType type) {
    StringBuilder sb = new StringBuilder();
    for (Token<CssTokenType> t : pending) {
      sb.append(t.text);
    }
    FilePosition fp
        = FilePosition.span(pending.getFirst().pos, pending.getLast().pos);
    pending.clear();
    pending.add(Token.instance(sb.toString(), type, fp));
  }


  /**
   * Is the given character a whitespace character according to the CSS 2 spec.
   */
  public static boolean isSpaceChar(char ch) {
    // s      [ \t\r\n\f]+
    // w      {s}?
    switch (ch) {
      case ' ': case '\t': case '\r': case '\n': case '\f':
        return true;
      default:
        return false;
    }
  }


  /** Is the given character a hex digit? */
  public static boolean isHexChar(char ch) {
    // h     [0-9a-f]
    return (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f')
        || (ch >= 'A' && ch <= 'F');
  }
}


final class CssSplitter implements TokenStream<CssTokenType> {
  private final CharProducer cp;
  private final MessageQueue mq;
  private boolean allowSubstitutions;
  private Token<CssTokenType> pending;


  /**
   * @param allowSubstitutions true iff ${...} style substitutions should be
   *   allowed as described at {@link CssTokenType#SUBSTITUTION}
   */
  CssSplitter(CharProducer cp, MessageQueue mq, boolean allowSubstitutions) {
    assert null != cp;
    // Longest punctuation tokens are <!-- and --> so need LA(3).
    this.cp = cp;
    this.mq = mq;
    this.allowSubstitutions = allowSubstitutions;
  }


  public boolean hasNext() throws ParseException {
    produce();
    return null != pending;
  }


  public Token<CssTokenType> next() throws ParseException {
    produce();
    if (null == pending) { throw new NoSuchElementException(); }
    Token<CssTokenType> result = pending;
    pending = null;
    return result;
  }


  boolean areSubstitutionsAllowed() { return allowSubstitutions; }


  public void allowSubstitutions(boolean allow) {
    this.allowSubstitutions = allow;
  }




  private void produce() throws ParseException {
    if (null != pending) { return; }
    if (cp.isEmpty()) { return; }


    char[] buf = cp.getBuffer();
    final int start = cp.getOffset();
    int limit = cp.getLimit();
    int end = start + 1;


    CssTokenType type;
    char ch = buf[start];


    int identEnd;
    if (CssLexer.isSpaceChar(ch)) {
      // [ \t\r\n\f]+        S


      end = parseWhitespace(buf, end, limit);
      type = CssTokenType.SPACE;
    } else if (ch == '/') {
      if (end < limit && buf[end] == '*') {
        // \/\*[^*]*\*+([^/*][^*]*\*+)*\/    /* ignore comments */
        int state = 0;  // 0 - start, 1 - in comment, 2 - saw, 3 - done
        do {
          if (end == limit) { break; }
          ch = buf[end];
          switch (state) {
            case 0: state = 1; break;
            case 1: if (ch == '*') { state = 2; } break;
            case 2:
              if (ch == '/') {
                state = 3;
              } else if (ch != '*') {
                state = 1;
              }
              break;
          }
          ++end;
        } while (state != 3);
        if (state != 3) {
          throw new ParseException(new Message(
              MessageType.UNTERMINATED_COMMENT_TOKEN,
              cp.filePositionForOffsets(start, end)));
        }
        type = CssTokenType.COMMENT;
      } else if (end < limit && buf[end] == '/') {
        do {
          if (++end == limit) { break; }
          ch = buf[end];
          // Line comment does not contain the newline character that ends it
          // since we don't want to break \r\n sequences across two tokens,
          // and for consistency with JavaScript conventions which exclude the
          // newline from the line comment token.
          if (ch == '\r' || ch == '\n') { break; }
        } while (true);
        type = CssTokenType.COMMENT;
        FilePosition commentPos = cp.filePositionForOffsets(start, end);
        mq.addMessage(MessageType.INVALID_CSS_COMMENT, commentPos);
      } else {
        //               *yytext
        type = CssTokenType.PUNCTUATION;
      }
    } else if ('~' == ch || '|' == ch) {
      if (end < limit && '=' == buf[end]) {
        // "~="          INCLUDES
        // "|="          DASHMATCH
        ++end;
      } else {
        //        .      *yytext
      }
      type = CssTokenType.PUNCTUATION;


    } else if (ch == '\'' || ch == '"') {
      end = parseString(cp, start);
      type = CssTokenType.STRING;


    } else if (ch == '@') {


      identEnd = parseIdent(cp, end);
      if (identEnd != -1) {
        // "@import"       IMPORT_SYM
        // "@page"         PAGE_SYM
        // "@media"        MEDIA_SYM
        // "@font-face"    FONT_FACE_SYM
        // "@charset "      CHARSET_SYM
        // "@"{ident}      ATKEYWORD
        type = CssTokenType.SYMBOL;
        end = identEnd;
        // In http://www.w3.org/TR/CSS21/grammar.html, the CHARSET_SYM is
        // allowed to match only "@charset "
        if ((end - start) == 8 && parseMatch(cp, start, "@charset ") > 0) {
          ++end;
        }
      } else {
        //        .        *yytext
        type = CssTokenType.PUNCTUATION;
      }
    } else if (ch == '!') {
      // "!{w}important" IMPORTANT_SYM
      // handled by token joining at a later pass


      //          .      *yytext


      type = CssTokenType.PUNCTUATION;
    } else if (ch == '#') {
      int nameEnd = parseName(cp, end);
      if (nameEnd >= 0) {
        // "#"{name}       HASH
        type = CssTokenType.HASH;
        end = nameEnd;
      } else {
        //          .      *yytext
        type = CssTokenType.PUNCTUATION;
      }


    } else if (ch == '<' || ch == '-') {
      // "<!--"          CDO
      // "-->"           CDC


      int tailEnd = parseMatch(cp, end, ch == '<' ? "!--" : "->");
      if (tailEnd >= 0) { end = tailEnd; }
      type = CssTokenType.PUNCTUATION;


    } else if ((ch >= '0' && ch <= '9') || '.' == ch) {
      // {num}em         EMS
      // {num}ex         EXS
      // {num}px         LENGTH
      // {num}cm         LENGTH
      // {num}mm         LENGTH
      // {num}in         LENGTH
      // {num}pt         LENGTH
      // {num}pc         LENGTH
      // {num}deg        ANGLE
      // {num}rad        ANGLE
      // {num}grad       ANGLE
      // {num}ms         TIME
      // {num}s          TIME
      // {num}Hz         FREQ
      // {num}kHz        FREQ
      // {num}{ident}    DIMEN
      // {num}%          PERCENTAGE
      // {num}           NUMBER
      boolean isNum;
      if ('.' == ch) {
        int numEnd = parseInt(cp, end);
        isNum = numEnd >= 0;
        if (isNum) { end = numEnd; }
      } else {
        isNum = true;
        end = parseNum(cp, start);
      }


      if (isNum) {
        identEnd = parseIdent(cp, end);
        if (identEnd >= 0) {
          end = identEnd;
        } else if (end < limit && '%' == buf[end]) {
          ++end;
        }
        type = CssTokenType.QUANTITY;
      } else {
        // lone .
        //          .      *yytext
        type = CssTokenType.PUNCTUATION;
      }


    } else if ((identEnd = parseIdent(cp, start)) >= 0) {
      end = identEnd;
      if (end - start == 1 && 'U' == ch && end < limit && '+' == buf[end]) {
        // U\+{range}      UNICODERANGE
        // U\+{h}{1,6}-{h}{1,6}    UNICODERANGE
        // range         \?{1,6}|{h}(\?{0,5}|{h}(\?{0,4}|{h}\
        //               (\?{0,3}|{h}(\?{0,2}|{h}(\??|{h})))))


        type = CssTokenType.UNICODE_RANGE;
        ++end;
        end = parseRange(cp, end);
      } else if (end < limit && '(' == buf[end]) {
        ++end;
        if (end - start == 4 && parseMatch(cp, start, "url(") >= 0) {
          // "url("{w}{string}{w}")" URI
          // "url("{w}{url}{w}")"    URI
          end = parseWhitespace(buf, end, limit);
          int stringEnd = parseString(cp, end);
          int uriEnd = stringEnd < 0 ? parseUri(cp, end) : -1;
          if (stringEnd < 0 && uriEnd < 0) {
            throw new ParseException(new Message(
                MessageType.EXPECTED_TOKEN,
                cp.filePositionForOffsets(end, end),
                MessagePart.Factory.valueOf("{url}"), toMessagePart(cp, end)));
          }
          end = stringEnd >= 0 ? stringEnd : uriEnd;
          end = parseWhitespace(buf, end, limit);
          if (end == limit || ')' != buf[end]) {
            throw new ParseException(new Message(
                MessageType.EXPECTED_TOKEN,
                cp.filePositionForOffsets(end, end),
                MessagePart.Factory.valueOf(")"), toMessagePart(cp, end)));
          }
          ++end;
          type = CssTokenType.URI;
        } else {
          // {ident}"("      FUNCTION
          type = CssTokenType.FUNCTION;
        }
      } else {
        // {ident}         IDENT
        type = CssTokenType.IDENT;
      }


    } else if (ch == '$' && allowSubstitutions) {
      // ${<javascript tokens>}


      if (end < limit && buf[end] != '{') {
        type = CssTokenType.PUNCTUATION;
      } else {
        // 0 - non string
        // 1 - quoted string
        // 2 - saw \ in string
        // 3 - saw close paren
        int state = 0;
        // number of parenthetical blocks entered and not exited
        int nOpen = 0;
        char delim = 0;
        do {
          if (end == limit) { break; }
          ch = buf[end];
          switch (state) {
            case 0:
              if (ch == '"' || ch == '\'') {
                delim = ch;
                state = 1;
              } else if (ch == '{') {
                ++nOpen;
              } else if (ch == '}') {
                if (--nOpen == 0) {
                  state = 3;
                }
              }
              break;
            case 1:
              if (ch == delim) {
                state = 0;
              } else if (ch == '\\') {
                state = 2;
              }
              break;
            case 2:
              state = 1;
              break;
          }
          ++end;
        } while (state != 3);
        if (state != 3) {
          throw new ParseException(new Message(
              MessageType.UNTERMINATED_STRING_TOKEN,
              cp.filePositionForOffsets(start, end)));
        }


        identEnd = parseIdent(cp, end);
        if (identEnd >= 0) {end = identEnd;
        } else if (end != limit && '%' == buf[end]) {
          ++end;
        }


        type = CssTokenType.SUBSTITUTION;
      }
    } else {
      //          .      *yytext
      type = CssTokenType.PUNCTUATION;
    }
    assert end > start;
    pending = Token.instance(cp.toString(start, end), type,
                             cp.filePositionForOffsets(start, end));
    cp.consumeTo(end);
  }


  private static int parseMatch(CharProducer cp, int start, String match) {
    int len = match.length();
    int limit = cp.getLimit();
    if (limit - start < len) { return -1; }
    char[] buf = cp.getBuffer();
    for (int i = 0; i < len; ++i) {
      char chB = buf[start + i];
      char chM = match.charAt(i);
      if (!(chB == chM || ((chB | 0x20) == chM && chB >= 'A' && chB < 'Z'))) {
        return -1;
      }
    }
    return start + len;
  }


  private static int parseString(CharProducer cp, int start)
      throws ParseException {
    int limit = cp.getLimit();
    if (start == limit) { return -1; }
    char[] buf = cp.getBuffer();
    char ch = buf[start];
    if (ch != '\'' && ch != '"') { return -1; }


    // {string}        STRING
    // string1         \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
    // string2         \'([^\n\r\f\\']|\\{nl}|{escape})*\'
    // string          {string1}|{string2}


    char delim = ch;
    int end = start + 1;
    while (end < limit) {
      ch = buf[end];
      ++end;
      // escape            {unicode}|\\[^\r\n\f0-9a-f]
      // nl                \n|\r\n|\r|\f
      if (delim == ch) {
        return end;
      } else if (ch == '\\') {
        if (end < limit && isLineBreak(ch = buf[end])) {
          ++end;
          if (ch == '\r' && end < limit && buf[end] == '\n') {
            ++end;
          }
        } else {
          end = parseEscapeBody(cp, end);
        }
      } else if (isLineBreak(ch)) {
        throw new ParseException(new Message(
            MessageType.MALFORMED_STRING,
            cp.filePositionForOffsets(end - 1, end - 1),
            MessagePart.Factory.valueOf("" + ch)));
      }
    }
    throw new ParseException(new Message(
        MessageType.UNTERMINATED_STRING_TOKEN,
        cp.filePositionForOffsets(start, end)));
  }


  private int parseUri(CharProducer cp, int start) throws ParseException {
    // url     ([!#$%&*-~]|{nonascii}|{escape})*
    char[] buf = cp.getBuffer();
    int limit = cp.getLimit();
    int end = start;
    while (end < limit) {
      if (isUriChar(buf[end])) {
        ++end;
      } else if (buf[end] == '\\') {
        end = parseEscapeBody(cp, end + 1);
      } else {
        break;
      }
    }
    return end;
  }
  private static boolean isUriChar(char ch) {
    switch (ch) {
    case '!':
    case '#':
    case '$':
    case '%':
    case '&':
      return true;
    default:
      return (ch >= '*' && ch <= '~') || isNonAscii(ch);
    }
  }


  private static boolean isLineBreak(char ch) {
    // nl                \n|\r\n|\r|\f
    switch (ch) {
      case '\r': case '\n': case '\f':
        return true;
      default:
        return false;
    }
  }


  private int parseWhitespace(char[] buf, int end, int limit) {
    // w       [ \t\r\n\f]*
    while (end < limit && CssLexer.isSpaceChar(buf[end])) { ++end; }
    return end;
  }


  /**
   * Only handles the case where num does not start with a dot since it is
   * hard to distinguish a "." token from a number token with 1 char lookahead.
   */
  private static int parseNum(CharProducer cp, int start)
      throws ParseException {
    //      num     [0-9]+|[0-9]*"."[0-9]+


    int end = parseInt(cp, start);
    assert end >= 0;
    int limit = cp.getLimit();
    char[] buf = cp.getBuffer();
    if (end < limit && '.' == buf[end]) {
      ++end;
      char ch;
      // By CSS rules, 0. is an invalid number.
      if (end == limit || (ch = buf[end]) < '0' || ch > '9') {
        throw new ParseException(new Message(
            MessageType.MALFORMED_NUMBER, cp.filePositionForOffsets(start, end),
            MessagePart.Factory.valueOf(cp.toString(start, end))));
      }
      return parseInt(cp, end);
    }
    return end;
  }


  private static int parseInt(CharProducer cp, int start) {
    int limit = cp.getLimit();
    if (start == limit) { return -1; }
    char[] buf = cp.getBuffer();
    char ch = buf[start];
    if (ch >= '0' && ch <= '9') {
      int end = start;
      do {
        if (++end == limit) { break; }
        ch = buf[end];
      } while (ch >= '0' && ch <= '9');
      return end;
    } else {
      return -1;
    }
  }


  private static int parseIdent(CharProducer cp, int start)
      throws ParseException {
    // ident      -?{nmstart}{nmchar}*
    // We later join '-' to the front of an identifier, so don't start here.
    int end = parseNmStart(cp, start);
    if (end < 0) { return -1; }
    for (int nmCharEnd; (nmCharEnd = parseNmChar(cp, end)) >= 0;) {
      end = nmCharEnd;
    }
    return end;
 }


  private static int parseName(CharProducer cp, int start)
      throws ParseException {
    // name      {nmchar}+
    int end = parseNmChar(cp, start);
    if (end < 0) { return -1; }
    for (int nmCharEnd; (nmCharEnd = parseNmChar(cp, end)) >= 0;) {
      end = nmCharEnd;
    }
    return end;
  }


  private static int parseNmStart(CharProducer cp, int start)
      throws ParseException {
    if (start == cp.getLimit()) { return -1; }
    char ch = cp.getBuffer()[start];
    if (CssLexer.isNmStart(ch)) { return start + 1; }
    if (ch == '\\') { return parseEscapeBody(cp, start + 1); }
    return -1;
  }


  private static int parseNmChar(CharProducer cp, int start)
      throws ParseException {
    // nmchar     [_a-z0-9-]|{nonascii}|{escape}
    int end = parseNmStart(cp, start);
    if (end >= 0) { return end; }
    if (start != cp.getLimit()) {
      char ch = cp.getBuffer()[start];
      if ((ch >= '0' && ch <= '9') || ch == '-') { return start + 1; }
    }
    return -1;
  }


  private static int parseEscapeBody(CharProducer cp, int start)
      throws ParseException {
    // unicode    \\{h}{1,6}(\r\n|[ \t\r\n\f])?
    // escape     {unicode}|\\[^\r\n\f0-9a-f]
    int limit = cp.getLimit();
    char[] buf = cp.getBuffer();
    if (start == limit) {
      throw new ParseException(
          new Message(MessageType.EXPECTED_TOKEN,
                      cp.filePositionForOffsets(start, start),
                      MessagePart.Factory.valueOf("<hex-digit>"),
                      MessagePart.Factory.valueOf("<end-of-input>")));
    }
    char ch = buf[start];
    if (CssLexer.isHexChar(ch)) {
      int end = start + 1;
      for (int i = 5; --i >= 0; ++end) {
        if (end == limit) { break; }
        ch = buf[end];
        if (!CssLexer.isHexChar(ch)) { break; }
      }
      if (end < limit && CssLexer.isSpaceChar(ch = buf[end])) {
        ++end;


        if ('\r' == ch && end < limit && '\n' == buf[end]) {
          ++end;
        }
      }
      return end;
    } else if (isLineBreak(ch)) {
      throw new ParseException(
          new Message(
              MessageType.UNRECOGNIZED_ESCAPE,
              cp.filePositionForOffsets(start, start),
              MessagePart.Factory.valueOf(String.valueOf(ch))));
    } else {
      return start + 1;
    }
  }


  private static int parseRange(CharProducer cp, int start)
      throws ParseException {
    // range         \?{1,6}|{h}(\?{0,5}|{h}(\?{0,4}|{h}\
    //               (\?{0,3}|{h}(\?{0,2}|{h}(\??|{h})))))
    // This method also handles {h}{1,6}-{h}{1,6}


    char[] buf = cp.getBuffer();
    int limit = cp.getLimit();


    int end = start;
    int len = 6;
    boolean isRange = end < limit && buf[end] == '?';
    if (isRange) {
      while (end < limit && '?' == buf[end] && --len >= 0) { ++end; }
    }
    while (end < limit && CssLexer.isHexChar(buf[end]) && --len >= 0) { ++end; }
    if (!isRange) {
      if (end == limit || '-' != buf[end]) {
        throw new ParseException(
            new Message(
                MessageType.EXPECTED_TOKEN,
                cp.filePositionForOffsets(end, end),
                MessagePart.Factory.valueOf("-"), toMessagePart(cp, end)));
      }
      ++end;


      len = 6;
      while (end < limit && '?' == buf[end] && --len >= 0) { ++end; }
      while (end < limit && CssLexer.isHexChar(buf[end]) && --len >= 0) {
        ++end;
      }
    }
    return end != start ? end : -1;
  }


  // nonascii    [\200-\377]
  private static boolean isNonAscii(char ch) {
    return ch >= '\200' && ch <= '\377';
  }


  private static MessagePart toMessagePart(CharProducer cp, int offset) {
    return MessagePart.Factory.valueOf(
        offset == cp.getLimit()
        ? "<end-of-input>"
        : "" + cp.getBuffer()[offset]);
  }
}
Source Code of com.google.caja.lexer.CssSplitter

Related Classes of com.google.caja.lexer.CssSplitter