Package org.idpf.epubcheck.util.css

Source Code of org.idpf.epubcheck.util.css.CssScanner$CssEscapeMemoizer

/*
* Copyright (c) 2012 International Digital Publishing Forum
*
*  Permission is hereby granted, free of charge, to any person obtaining a copy of
*  this software and associated documentation files (the "Software"), to deal in
*  the Software without restriction, including without limitation the rights to
*  use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
*  the Software, and to permit persons to whom the Software is furnished to do so,
*  subject to the following conditions:
*
*  The above copyright notice and this permission notice shall be included in all
*  copies or substantial portions of the Software.
*
*  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
*  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
*  FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
*  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
*  IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
*  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
package org.idpf.epubcheck.util.css;

import com.google.common.base.Ascii;
import com.google.common.base.CharMatcher;
import com.google.common.base.Optional;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.primitives.Ints;
import org.idpf.epubcheck.util.css.CssExceptions.CssErrorCode;
import org.idpf.epubcheck.util.css.CssExceptions.CssException;
import org.idpf.epubcheck.util.css.CssReader.Mark;
import org.idpf.epubcheck.util.css.CssToken.CssTokenConsumer;
import org.idpf.epubcheck.util.css.CssToken.TokenBuilder;
import org.idpf.epubcheck.util.css.CssToken.Type;

import java.io.IOException;
import java.io.Reader;
import java.util.List;
import java.util.Map;

import static com.google.common.base.Preconditions.*;
import static org.idpf.epubcheck.util.css.CssExceptions.CssErrorCode.SCANNER_ILLEGAL_CHAR;
import static org.idpf.epubcheck.util.css.CssExceptions.CssErrorCode.SCANNER_PREMATURE_EOF;

/**
* A lexical scanner for CSS.
* <p>
* Lexical errors are stored as attributes on the tokens in which they occurred.
* The supplied CssErrorHandler is also invoked when a lexical error occurs, so
* that clients can terminate the scanning by a rethrow.
* </p>
*
* @author mgylling
*/

final class CssScanner
{

  private final CssReader reader;
  private final CssToken.CssTokenConsumer consumer;
  private final CssEscapeMemoizer escapes;
  private TokenBuilder builder;
  private final CssErrorHandler errHandler;
  private final boolean debug = false;
  private char cur;

  private CssScanner(final Reader in, final String systemID, final CssErrorHandler errHandler,
      final CssTokenConsumer consumer, final int pushbackBufferSize)
  {
    this.consumer = checkNotNull(consumer);
    this.errHandler = checkNotNull(errHandler);
    this.reader = new CssReader(in, systemID, pushbackBufferSize);
    this.escapes = new CssEscapeMemoizer(reader);
  }

  CssScanner(Reader in, final String systemID, final CssErrorHandler errHandler,
      final CssTokenConsumer consumer)
  {
    this(in, systemID, errHandler, consumer, CssReader.DEFAULT_PUSHBACK_BUFFER_SIZE);
  }

  void scan() throws
      IOException,
      CssException
  {
    int ch;
    int next;
    while (true)
    {
      ch = reader.next();
      if (ch == -1)
      {
        break;
      }
      builder = new TokenBuilder(reader, /* this, */errHandler);
      cur = (char) ch;
      next = reader.peek();
      escapes.reset(builder);

      if (WHITESPACE.matches(cur))
      {
        _ws();
      }
      else if (cur == '-' && equals(reader.peek(2), CDC_LL))
      {
        _cdc();
      }
      else if (O.matches(cur) && matchesOrEOF(reader.at(4), WHITESPACE)
          && equals(reader.peek(3), ONLY_LL, true))
      {
        _only();
      }
      else if (N.matches(cur) && matchesOrEOF(reader.at(3), WHITESPACE)
          && equals(reader.peek(2), NOT_LL, true))
      {
        _not();
      }
      else if (A.matches(cur) && matchesOrEOF(reader.at(3), WHITESPACE)
          && equals(reader.peek(2), AND_LL, true))
      {
        _and();
      }
      else if (U.matches(cur) && equals(reader.peek(3), URI_LL, true))
      {
        _uri();
      }
      else if (U.matches(cur) && next == '+'
          && matchesOrEOF(reader.at(2), URANGESTART))
      {
        _urange();
      }
      else if (NMSTART.matches(cur) || cur == '-'
          && matches(next, NMSTART) || escapes.get(0).isPresent()
          || (cur == '-' && escapes.get(1).isPresent()))
      {
        _ident();
        if (reader.peek() == '(')
        {
          _function();
        }
      }
      else if (cur == '@'
          && ((matches(next, NMSTART) || escapes.get(1).isPresent()) || (next == '-'
          && (matches(reader.at(2), NMSTART)) || escapes.get(
          2).isPresent())))
      {
        _atkeyword();
      }
      else if (NUMEND.matches(cur) || NUMSTART.matches(cur)
          && matches(next, NUMEND) || UNARY.matches(cur)
          && next == '.' && matches(reader.at(2), NUMEND))
      {
        _num();
      }
      else if (cur == '<' && equals(reader.peek(3), CDO_LL))
      {
        _cdo();
      }
      else if (cur == '/' && next == '*')
      {
        _comment();
      }
      else if (QUOTES.matches(cur))
      {
        _string();
      }
      else if (cur == '#'
          && (matches(next, NMCHAR) || escapes.get(1).isPresent()))
      {
        _hashname();
      }
      else if (cur == '.'
          && (matches(next, NMCHAR) || escapes.get(1).isPresent()))
      {
        _classname();
      }
      else if (cur == '!'
          && forwardMatch("important", true, false))
      {
        _important();
      }
      else if (cur == '~' && next == '=')
      {
        _includes();
      }
      else if (cur == '|' && next == '=')
      {
        _dashmatch();
      }
      else if (cur == '^' && next == '=')
      {
        _prefixmatch();
      }
      else if (cur == '$' && next == '=')
      {
        _suffixmatch();
      }
      else if (cur == '*' && next == '=')
      {
        _substringmatch();
      }
      else
      {
        builder.type = Type.CHAR;
        builder.append(cur);
      }
      consumer.add(builder.asToken());
    }
  }

  /**
   * FUNCTION {ident}\(
   *
   * @throws IOException
   */
  private void _function() throws
      IOException
  {
    // Assumes that builder already contains an IDENT
    if (debug)
    {
      checkArgument('(' == reader.peek());
    }
    builder.type = Type.FUNCTION;
    builder.append(reader.next());
  }

  /**
   * URI url\({w}{string}{w}\) |
   * url\({w}([!#$%&*-\[\]-~]|{nonascii}|{escape})*{w}\)
   */
  private void _uri() throws
      IOException,
      CssException
  {
    /*
       * Need to run this before IDENT and FUNCTION since the first three
       * chars match IDENT and the entire trigger token matches FUNCTION.
       */

    builder.append("url(");
    reader.forward(3);
    if (debug)
    {
      checkArgument('(' == reader.curChar || -1 == reader.peek());
    }
    reader.forward(NOT_WHITESPACE);
    int uristart = reader.next();

    if (-1 == uristart)
    {
      builder.error(SCANNER_PREMATURE_EOF, reader);
    }
    else if (QUOTES.matches((char) uristart))
    {
      builder.append('\'');
      _string();
      builder.append('\'');

      if (debug && builder.errors.size() < 1)
      {
        checkArgument(QUOTES.matches((char) reader.curChar));
      }
      reader.forward(NOT_WHITESPACE);
      if (reader.peek() > -1)
      {
        reader.next(); // ')'
      }
    }
    else
    {
      // unquoted uri
      builder.append(uristart);
      if (debug)
      {
        checkArgument(NOT_WHITESPACE.matches((char) reader.curChar));
      }
      StringBuilder buf = new StringBuilder();
      while (true)
      {
        CssReader.Mark mark = reader.mark();
        int ch = reader.next();
        if (ch == -1)
        {
          builder.error(SCANNER_PREMATURE_EOF, reader);
          reader.unread(ch, mark);
          break;
        }
        else if (ch == ')')
        {
          break;
        }
        buf.append((char) ch);
      }
      builder.append(WHITESPACE.trimTrailingFrom(buf.toString()));
    }
    builder.append(')');
    builder.type = Type.URI;

    if (')' != reader.curChar && builder.errors.size() == 0)
    {
      builder.error(CssErrorCode.SCANNER_ILLEGAL_SYNTAX, reader, reader.curChar);
    }
  }

  /**
   * string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
   * string2 \'([^\n\r\f\\']|\\{nl}|{escape})*\'
   */
  private void _string() throws
      IOException,
      CssException
  {

    if (debug)
    {
      checkState(QUOTES.matches((char) reader.curChar));
    }

    /*
       * Note: resulting token excludes start+end quotes
       */

    builder.type = Type.STRING;
    int quoteType = reader.curChar;

    // in strings, we let escapes in general pass through
    while (true)
    {
      CssReader.Mark mark = reader.mark();
      int ch = reader.next();
      if (ch == -1)
      {
        builder.error(SCANNER_PREMATURE_EOF, reader);
        reader.unread(ch, mark);
        break;
      }
      else if (ch == '\n' || ch == '\r' || ch == '\f')
      {
        builder.error(SCANNER_ILLEGAL_CHAR, reader, "NEWLINE",
            Type.STRING.name());
        reader.forward(TERMINATOR);
        break;
      }
      else if (ch == '\\')
      {
        int[] peek = reader.peek(2);
        int nl = isNewLine(peek);
        if (nl > 0)
        {
          // in strings, ignore backslash followed by a literal
          // newline
          reader.forward(nl);
          continue;
        }

      }
      else if (ch == quoteType && reader.prevChar != '\\')
      {
        break;
      }

      builder.append(ch);
    }

    if (debug && builder.errors.size() == 0)
    {
      checkState(QUOTES.matches((char) reader.curChar));
    }
  }

  /**
   * ATKEYWORD '@'[-]?{nmstart}{nmchar}* nmstart [_a-z]|{nonascii}|{escape}
   * nmchar [_a-z0-9-]|{nonascii}|{escape}
   *
   * @throws CssException
   */
  private void _atkeyword() throws
      IOException,
      CssException
  {
    if (debug)
    {
      checkState('@' == reader.curChar);
    }

    builder.type = Type.ATKEYWORD;
    builder.append(cur); // @
    append(NMSTART);
    append(NMCHAR);

    if (debug)
    {
      int nxt = reader.peek();
      checkState(NMCHAR.matches((char) reader.curChar));
      if (nxt > -1)
      {
        checkState(!NMCHAR.matches((char) nxt));
      }
    }
  }

  /**
   * IDENT [-]?{nmstart}{nmchar}*
   */
  private void _ident() throws
      IOException,
      CssException
  {

    builder.type = Type.IDENT;

    Optional<CssEscape> esc = escapes.get(0);

    // first NMSTART char or '-'
    if (esc.isPresent())
    {
      int length = esc.get().render(builder, NMSTART);
      reader.forward(length);
    }
    else
    {
      builder.append(cur);
    }

    if (cur == '-' || (esc.isPresent() && esc.get().character == '-'))
    {
      // The NMSTART that matched in the main loop if clause
      if (escapes.get(1).isPresent())
      {
        reader.forward(escapes.get(1).get().render(builder, NMSTART));
      }
      else
      {
        builder.append(reader.next());
      }

    }
    append(NMCHAR);
  }

  /**
   * DASHMATCH |=
   */
  private void _dashmatch() throws
      IOException
  {
    if (debug)
    {
      checkState(reader.curChar == '|');
    }
    builder.type = Type.DASHMATCH;
    builder.append("|=");
    reader.next();
    if (debug)
    {
      checkState(reader.curChar == '=');
    }
  }

  /**
   * INCLUDES ~=
   */
  private void _includes() throws
      IOException
  {
    if (debug)
    {
      checkState(reader.curChar == '~');
    }
    builder.type = Type.INCLUDES;
    builder.append("~=");
    reader.next();
    if (debug)
    {
      checkState(reader.curChar == '=');
    }
  }

  /**
   * PREFIXMATCH ^=
   */
  private void _prefixmatch() throws
      IOException
  {
    if (debug)
    {
      checkState(reader.curChar == '^');
    }
    builder.type = Type.PREFIXMATCH;
    builder.append("^=");
    reader.next();
    if (debug)
    {
      checkState(reader.curChar == '=');
    }
  }

  /**
   * SUFFIXMATCH $=
   */
  private void _suffixmatch() throws
      IOException
  {
    if (debug)
    {
      checkState(reader.curChar == '$');
    }
    builder.type = Type.SUFFIXMATCH;
    builder.append("$=");
    reader.next();
    if (debug)
    {
      checkState(reader.curChar == '=');
    }
  }

  /**
   * SUBSTRINGMATCH *=
   */
  private void _substringmatch() throws
      IOException
  {
    if (debug)
    {
      checkState(reader.curChar == '*');
    }
    builder.type = Type.SUBSTRINGMATCH;
    builder.append("*=");
    reader.next();
    if (debug)
    {
      checkState(reader.curChar == '=');
    }
  }

  /**
   * HASHNAME "#"{name} name {nmchar}+ [_a-z0-9-]|{nonascii}|{escape}
   *
   * @throws CssException
   */
  private void _hashname() throws
      IOException,
      CssException
  {
    if (debug)
    {
      checkState(reader.curChar == '#');
      checkState(NMCHAR.matches((char) reader.peek()) || isNextEscape());
    }
    builder.type = Type.HASHNAME;
    builder.append('#');
    append(NMCHAR);
  }

  /**
   * CLASSNAME "."{name} This is not part of formal lexical constructs, but
   * seems to be safe to do at scanner level. name {nmchar}+
   * [_a-z0-9-]|{nonascii}|{escape}
   *
   * @throws CssException
   */
  private void _classname() throws
      IOException,
      CssException
  {
    if (debug)
    {
      checkState(reader.curChar == '.');
      checkState(NMCHAR.matches((char) reader.peek()) || isNextEscape());
    }
    builder.type = Type.CLASSNAME;
    builder.append('.');
    append(NMCHAR);
  }

  /**
   * IMPORTANT !{w}important
   */
  private void _important()
  {
    /*
       * Note that #lex needs to use #forwardMatch to maintain correct
       * position
       */
    builder.type = Type.IMPORTANT;
    builder.append("!important");
  }

  /**
   * Builds a comment token, excluding the leading and trailing comment
   * tokens.
   */
  private void _comment() throws
      IOException,
      CssException
  {
    if (debug)
    {
      checkState(reader.curChar == '/' && reader.peek() == '*');
    }
    /*
       * badcomment1 \/\*[^*]*\*+([^/*][^*]*\*+)* badcomment2
       * \/\*[^*]*(\*+[^/*][^*]*)* comment \/\*[^*]*\*+([^/*][^*]*\*+)*\/
       *
       *
       * "comments can not nest" just close at first occurrence of comment
       * close and let the grammar level handle reporting
       */

    builder.type = Type.COMMENT;

    reader.next(); // '*'

    while (true)
    {
      Mark mark = reader.mark();
      int ch = reader.next();
      if (ch == -1)
      {
        builder.error(SCANNER_PREMATURE_EOF, reader);
        reader.unread(ch, mark);
        break;
      }
      else if (ch == '*' && reader.peek() == '/')
      {
        reader.next();
        break;
      }
      else
      {
        builder.append(ch);
      }
    }

    if (debug && builder.errors.size() < 1)
    {
      checkState('/' == reader.curChar && '*' == reader.prevChar);
    }

  }

  private void _cdo() throws
      IOException
  {
    if (debug)
    {
      checkState('<' == reader.curChar);
    }
    builder.type = Type.CDO;
    builder.append("<!--");
    reader.forward(3);

    if (debug)
    {
      checkState('-' == reader.curChar && '-' == reader.prevChar);
    }
  }

  private void _num() throws
      IOException,
      CssException
  {

    /*
       * NUM [0-9]+|[0-9]*\.[0-9]+
       */

    if (debug)
    {
      checkState(NUMSTART.matches(cur));
    }

    builder.type = Type.INTEGER;
    builder.append(cur);
    if (cur == '.')
    {
      builder.type = Type.NUMBER;
    }

    while (true)
    {
      Mark mark = reader.mark();
      int nm = reader.next();
      if (nm == -1)
      {
        if (builder.getLength() == 1 && builder.getLast() == '.')
        {
          builder.type = Type.CHAR;
        }
        reader.unread(nm, mark);
        break;
      }
      else if (!NUM.matches((char) nm))
      {
        reader.unread(nm, mark);
        break;
      }
      else if (nm == '.' && !NUMEND.matches((char) reader.peek()))
      {
        reader.unread(nm, mark);
        break;
      }
      builder.append(nm);

      if (nm == '.')
      {
        builder.type = Type.NUMBER;
      }
    }

    int qnt = reader.peek();
    if (qnt > -1 && (QNTSTART.matches((char) qnt)) || isNextEscape())
    {
      // this num is the start of a quantity literal
      _quantity();
    }
  }

  /**
   * With incoming builder containing a valid NUMBER, and next char being a
   * valid QNTSTART, modify the type and append to the builder
   */
  private void _quantity() throws
      IOException,
      CssException
  {

    if (debug)
    {
      int ch = reader.peek();
      checkState(QNTSTART.matches((char) ch) || isNextEscape());
      checkState(builder.getLength() > 0
          && NUM.matches(builder.getLast()));
    }

    /*
       * Assume we have a {num}{ident} instance (DIMEN), and then override
       * that if a specific quantity literal is found.
       */
    builder.type = Type.QNTY_DIMEN;
    TokenBuilder suffix = new TokenBuilder(reader, errHandler);
    append(QNTSTART, suffix);
    if (suffix.getLast() != '%')
    { // QNTSTART = NMSTART | '%'
      append(NMCHAR, suffix);
    }

    if (suffix.getLength() > QNT_TOKEN_MAXLENGTH)
    {
      // longer than max length in quantities map
      builder.append(suffix.toString());
      return;
    }

    // shorter or equal to max length in quantities map
    // we might have a more specific match
    final int[] ident = suffix.toArray();
    int[] match = null;

    for (int[] test : quantities.keySet())
    {
      if (equals(ident, test, true))
      {
        builder.type = quantities.get(test);
        match = test;
        break;
      }
    }

    if (builder.type == Type.QNTY_DIMEN)
    {
      builder.append(ident);
    }
    else
    {
      if (debug)
      {
        checkState(match != null);
      }
      builder.append(match);
    }

  }

  private void _and() throws
      IOException
  {
    /*
       * Need to run this before IDENT since the token alse matches IDENT
       * Whitespace as terminator required: see prose under example XX in MQ
       * spec
       */
    if (debug)
    {
      checkArgument('A' == reader.curChar || 'a' == reader.curChar);
    }

    builder.type = Type.AND;
    builder.append("and");
    reader.forward(2);

    if (debug)
    {
      checkArgument(('d' == reader.curChar || 'D' == reader.curChar)
          && (reader.peek() == -1 || WHITESPACE.matches((char) reader
          .peek())));
    }
  }

  private void _not() throws
      IOException
  {
    /*
       * Need to run this before IDENT since the token also matches IDENT
       * Whitespace as terminator required: see prose under example XX in MQ
       * spec
       */

    if (debug)
    {
      checkArgument('N' == reader.curChar || 'n' == reader.curChar);
    }

    builder.type = Type.NOT;
    builder.append("not");
    reader.forward(2);

    if (debug)
    {
      checkArgument(('t' == reader.curChar || 'T' == reader.curChar)
          && (reader.peek() == -1 || WHITESPACE.matches((char) reader
          .peek())));
    }
  }

  private void _only() throws
      IOException
  {
    /*
       * Need to run this before IDENT since the token else matches IDENT
       * Whitespace as terminator required: see prose under example XX in MQ
       * spec
       */
    if (debug)
    {
      checkArgument('o' == reader.curChar || 'O' == reader.curChar);
    }

    builder.type = Type.ONLY;
    builder.append("only");
    reader.forward(3);

    if (debug)
    {
      checkArgument(('y' == reader.curChar || 'Y' == reader.curChar)
          && (reader.peek() == -1 || WHITESPACE.matches((char) reader
          .peek())));
    }
  }

  private void _cdc() throws
      IOException
  {
    /*
       * Need to run this before IDENT since the first two chars match IDENT.
       */

    if (debug)
    {
      checkArgument('-' == reader.curChar);
    }

    builder.type = Type.CDC;
    builder.append("-->");
    reader.forward(2);

    if (debug)
    {
      checkArgument('>' == reader.curChar && '-' == reader.prevChar);
    }
  }

  /**
   * Whitespace w ::= wc wc ::= #x9 | #xA | #xC | #xD | #x20
   */
  private void _ws() throws
      IOException
  {
    builder.type = Type.S;
    // no need to preserve exact whitespace
    // but append an S char to keep the token
    // contract of always having a value
    builder.append(' ');
    reader.forward(NOT_WHITESPACE);

    if (debug)
    {
      int nxt = reader.peek();
      checkArgument(WHITESPACE.matches((char) reader.curChar));
      if (nxt > -1)
      {
        checkArgument(NOT_WHITESPACE.matches((char) nxt));
      }
    }
  }

  private static final int QNT_TOKEN_MAXLENGTH = 4; // update when adding more
  // to map
  private static final Map<int[], Type> quantities = new ImmutableMap.Builder<int[], Type>()
      .put(new int[]{'d', 'p', 'c', 'm'}, Type.QNTY_RESOLUTION)
      .put(new int[]{'d', 'p', 'p', 'x'}, Type.QNTY_RESOLUTION)
      .put(new int[]{'g', 'r', 'a', 'd'}, Type.QNTY_ANGLE)
      .put(new int[]{'t', 'u', 'r', 'n'}, Type.QNTY_ANGLE)
      .put(new int[]{'v', 'm', 'i', 'n'}, Type.QNTY_LENGTH)
      .put(new int[]{'d', 'e', 'g'}, Type.QNTY_ANGLE)
      .put(new int[]{'k', 'h', 'z'}, Type.QNTY_FREQ)
      .put(new int[]{'r', 'a', 'd'}, Type.QNTY_ANGLE)
      .put(new int[]{'r', 'e', 'm'}, Type.QNTY_REMS)
      .put(new int[]{'d', 'p', 'i'}, Type.QNTY_RESOLUTION)
      .put(new int[]{'e', 'm'}, Type.QNTY_EMS)
      .put(new int[]{'c', 'm'}, Type.QNTY_LENGTH)
      .put(new int[]{'p', 'x'}, Type.QNTY_LENGTH)
      .put(new int[]{'m', 'm'}, Type.QNTY_LENGTH)
      .put(new int[]{'i', 'n'}, Type.QNTY_LENGTH)
      .put(new int[]{'p', 't'}, Type.QNTY_LENGTH)
      .put(new int[]{'p', 'c'}, Type.QNTY_LENGTH)
      .put(new int[]{'c', 'h'}, Type.QNTY_LENGTH)
      .put(new int[]{'v', 'w'}, Type.QNTY_LENGTH)
      .put(new int[]{'v', 'h'}, Type.QNTY_LENGTH)
      .put(new int[]{'e', 'x'}, Type.QNTY_EXS)
      .put(new int[]{'m', 's'}, Type.QNTY_TIME)
      .put(new int[]{'h', 'z'}, Type.QNTY_FREQ)
      .put(new int[]{'%'}, Type.QNTY_PERCENTAGE)
      .put(new int[]{'s'}, Type.QNTY_TIME).build();

  /**
   * Builds a UNICODE_RANGE token.
   */
  private void _urange() throws
      IOException,
      CssException
  {
    if (debug)
    {
      checkArgument((reader.curChar == 'U' || reader.curChar == 'u')
          && reader.peek() == '+');
    }

    builder.type = Type.URANGE;
    reader.next(); // '+'

    List<Integer> cbuf = Lists.newArrayList();

    int count = 0;

    while (true)
    {
      Mark mark = reader.mark();
      int ch = reader.next();

      if (ch == -1)
      {
        reader.unread(ch, mark);
        break;
      }

      if (URANGECHAR.matches((char) ch))
      {
        count = ch == '-' ? 0 : count + 1;
        if (count == 7)
        {
          builder.error(CssErrorCode.SCANNER_ILLEGAL_URANGE, reader,
              "U+" + toString(cbuf) + (char) ch);
        }
        cbuf.add(ch);
      }
      else
      {
        reader.unread(ch, mark);
        break;
      }
    }
    builder.append("U+");
    builder.append(Ints.toArray(cbuf));
  }

  /**
   * Returns true if reader next() is the start of a valid escape sequence.
   *
   * @return whether or not the reader is at the start of a valid escape sequence.
   * @throws IOException
   */
  private boolean isNextEscape() throws
      IOException
  {
    boolean result = false;
    Mark mark = reader.mark();
    int ch = reader.next();
    if (ch == '\\')
    {
      try
      {
        Optional<CssEscape> esc = new CssEscape(reader, builder)
            .create();
        result = esc.isPresent();
      }
      catch (CssException ignore)
      {

      }
    }
    reader.unread(ch, mark);
    return result;
  }

  /**
   * Parse forward and append to the TokenBuilder field all characters that
   * match matcher, or until the next character is EOF. Escapes are included
   * verbatim if they don't match matcher, else literal.
   *
   * @throws IOException
   * @throws CssException
   */
  private void append(CharMatcher matcher) throws
      IOException,
      CssException
  {
    append(matcher, builder);
  }

  /**
   * Parse forward and append to the supplied builder all characters that
   * match matcher, or until the next character is EOF. Escapes are included
   * verbatim if they don't match matcher, else literal.
   */
  private void append(CharMatcher matcher, TokenBuilder builder)
      throws
      IOException,
      CssException
  {
    while (true)
    {
      Mark mark = reader.mark();
      int ch = reader.next();
      if (ch > -1 && matcher.matches((char) ch))
      {
        builder.append(ch);
      }
      else if (ch == '\\')
      {
        Optional<CssEscape> escape = new CssEscape(reader, builder)
            .create();
        if (escape.isPresent())
        {
          reader.forward(escape.get().render(builder, matcher));
        }
        else
        {
          reader.unread(ch, mark);
          break;
        }
      }
      else
      {
        reader.unread(ch, mark);
        break;
      }
    }
  }

  /**
   * Check if a forward scan will equal given match string
   *
   * @param match       The string to match
   * @param ignoreCase  Whether case should be ignored
   * @param resetOnTrue Whether the reader should be reset on found match
   * @throws IOException
   */
  private boolean forwardMatch(String match, boolean ignoreCase, boolean resetOnTrue)
      throws
      IOException
  {
    Mark mark = reader.mark();
    List<Integer> cbuf = Lists.newArrayList();
    StringBuilder builder = new StringBuilder();

    boolean result = true;
    boolean seenChar = false;

    while (true)
    {
      cbuf.add(reader.next());

      char ch = (char) reader.curChar;

      if (reader.curChar == -1)
      {
        result = false;
        break;
      }
      else if (WHITESPACE.matches(ch))
      {
        if (seenChar)
        {
          builder.append(ch);
        }
      }
      else
      {
        if (builder.length() == 0)
        {
          seenChar = true;
        }
        builder.append(ch);
        int index = builder.length() - 1;

        if (!ignoreCase
            && (builder.charAt(index) == match.charAt(index)))
        {
          result = false;
          break;
        }

        if (ignoreCase
            && (Ascii.toLowerCase(builder.charAt(index)) != Ascii
            .toLowerCase(match.charAt(index))))
        {
          result = false;
          break;
        }
      }

      if (builder.length() == match.length())
      {
        if (!match.equalsIgnoreCase(builder.toString()))
        {
          result = false;
        }
        break;
      }
    }

    if (!result || resetOnTrue)
    {
      reader.unread(cbuf, mark);
    }

    return result;
  }

  private String toString(List<Integer> ints)
  {
    StringBuilder builder = new StringBuilder();
    for (int i : ints)
    {
      builder.append((char) i);
    }
    return builder.toString();
  }

  /**
   * Like Arrays.equals, but does not return true when both are null.
   */
  private static boolean equals(int[] a, int[] b)
  {
    return equals(a, b, false);
  }

  /**
   * Like Arrays.equals, but does not return true when both are null.
   *
   * @param ignoreAsciiCase If true, ascii case differences are ignored.
   */
  private static boolean equals(int[] a, int[] b, boolean ignoreAsciiCase)
  {
    if ((a == null && b == null) || a == null || b == null
        || (a.length != b.length))
    {
      return false;
    }

    for (int i = 0; i < a.length; i++)
    {
      if (!ignoreAsciiCase)
      {
        if (a[i] != b[i])
        {
          return false;
        }
      }
      else
      {
        if (a[i] == -1 && b[i] == -1)
        {
        }
        else if (a[i] == -1 || b[i] == -1)
        {
          return false;
        }
        else if (Ascii.toLowerCase((char) a[i]) != Ascii
            .toLowerCase((char) b[i]))
        {
          return false;
        }
      }
    }
    return true;
  }

  /**
   * Return true if ch represents EOF (-1), or if it matches matcher.
   */
  private static boolean matchesOrEOF(final int ch, CharMatcher matcher)
  {
    return ch == -1 || matcher.matches((char) ch);
  }

  /**
   * Return true if ch matches matcher, false if not or if ch represents EOF
   * (-1).
   */
  private static boolean matches(final int ch, CharMatcher matcher)
  {
    return ch != -1 && matcher.matches((char) ch);
  }

  /**
   * Determine whether a sequence of chars begin with a CSS newline.
   *
   * @param chars An array with minimum two characters
   * @return 0 if there is no newline, else 1 or 2, representing the newline
   *         length in characters.
   */
  static int isNewLine(int[] chars)
  {
    checkArgument(chars.length > 1);
    // nl \n|\r\n|\r|\f
    if (chars[0] == '\r' && chars[1] == '\n')
    {
      return 2;
    }
    else if (chars[0] == '\n' || chars[0] == '\r' || chars[0] == '\f')
    {
      return 1;
    }
    return 0;
  }

  static final CharMatcher WHITESPACE = CharMatcher.anyOf(" \t\n\r\f")
      .precomputed();
  private static final CharMatcher NOT_WHITESPACE = WHITESPACE.negate().precomputed();
  static final CharMatcher QUOTES = CharMatcher.anyOf("\"'").precomputed();
  private static final CharMatcher U = CharMatcher.anyOf("Uu").precomputed();
  private static final CharMatcher O = CharMatcher.anyOf("Oo").precomputed();
  private static final CharMatcher N = CharMatcher.anyOf("Nn").precomputed();
  private static final CharMatcher A = CharMatcher.anyOf("Aa").precomputed();

  /**
   * {nmstart} excluding {escape}
   */
  private static final CharMatcher NMSTART = CharMatcher
      .inRange('A', 'Z')
      .or(CharMatcher.inRange('a', 'z').or(
          CharMatcher.is('_').or(
              CharMatcher.inRange('\u0080', '\uFFFF'))))
      .precomputed();

  /**
   * {nmchar} excluding {escape}
   */
  private static final CharMatcher NMCHAR = NMSTART.or(
      CharMatcher.inRange('0', '9').or(CharMatcher.is('-')))
      .precomputed();

  /**
   * start of quantities that followed after {num} excluding {escape}
   */
  private static final CharMatcher QNTSTART = NMSTART.or(CharMatcher.is('%'))
      .precomputed();

  /**
   * {num} end char cannot be period
   */
  private static final CharMatcher NUMEND = CharMatcher.inRange('0', '9')
      .precomputed();

  /**
   * {num}
   */
  private static final CharMatcher NUM = NUMEND.or(CharMatcher.is('.'))
      .precomputed();

  private static final CharMatcher UNARY = CharMatcher.anyOf("+-")
      .precomputed();

  /**
   * {num} start char can be unary operators
   */
  private static final CharMatcher NUMSTART = NUM.or(UNARY).precomputed();

  static final CharMatcher HEXCHAR = CharMatcher.anyOf(
      "AaBbCcDdEeFf0123456789").precomputed();

  private static final CharMatcher URANGESTART = HEXCHAR.or(CharMatcher
      .anyOf("?"));

  private static final CharMatcher URANGECHAR = HEXCHAR.or(CharMatcher
      .anyOf("?-"));

  static final CharMatcher TERMINATOR = CharMatcher.anyOf(";}{");

  /**
   * The three last characters in the CDO token
   */
  private static final int[] CDO_LL = new int[]{'!', '-', '-'}; // <!--

  /**
   * The two last characters in the CDC token
   */
  private static final int[] CDC_LL = new int[]{'-', '>'}; // -->

  /**
   * The three last characters in the URI start token
   */
  private static final int[] URI_LL = new int[]{'r', 'l', '('};

  /**
   * The three last characters in the ONLY token
   */
  private static final int[] ONLY_LL = new int[]{'n', 'l', 'y'};

  /**
   * The three last characters in the NOT token
   */
  private static final int[] NOT_LL = new int[]{'o', 't'};

  /**
   * The three last characters in the AND token
   */
  private static final int[] AND_LL = new int[]{'n', 'd'};

  /**
   * Memoizer for escapes at forward reader positions. Owner must invoke
   * reset() every time the reader position changes (excluding closured
   * unreads).
   */
  static class CssEscapeMemoizer
  {
    private final Map<Integer, Optional<CssEscape>> map = Maps.newHashMap();
    private TokenBuilder errFunnel;
    private final CssReader reader;

    CssEscapeMemoizer(final CssReader reader)
    {
      this.reader = reader;
    }

    CssEscapeMemoizer reset(final TokenBuilder errFunnel)
    {
      map.clear();
      this.errFunnel = errFunnel;
      return this;
    }

    Optional<CssEscape> get(final int n) throws
        IOException
    {
      checkNotNull(errFunnel);
      if (!map.containsKey(n))
      {
        map.put(n, create(n));
      }
      return map.get(n);
    }

    private Optional<CssEscape> create(int n) throws
        IOException
    {
      List<Integer> cbuf = Lists.newArrayList();
      Mark mark = reader.mark();

      for (int i = 0; i < n; i++)
      {
        int ch = reader.next();
        cbuf.add(ch);
        if (ch == -1)
        {
          reader.unread(cbuf, mark);
          return CssEscape.ABSENT;
        }
      }

      if (reader.curChar == '\\')
      {
        try
        {
          Optional<CssEscape> esc = new CssEscape(reader, errFunnel)
              .create();
          reader.unread(cbuf, mark);
          return esc;
        }
        catch (CssException ignore)
        {

        }
      }
      reader.unread(cbuf, mark);
      return CssEscape.ABSENT;

    }
  }
}
TOP

Related Classes of org.idpf.epubcheck.util.css.CssScanner$CssEscapeMemoizer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.