Package com.google.common.jimfs

Source Code of com.google.common.jimfs.GlobToRegex$State

/*
* Copyright 2013 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.common.jimfs;

import static com.google.common.base.Preconditions.checkNotNull;

import java.util.ArrayDeque;
import java.util.Deque;
import java.util.regex.PatternSyntaxException;

/**
* Translates globs to regex patterns.
*
* @author Colin Decker
*/
final class GlobToRegex {

  /**
   * Converts the given glob to a regular expression pattern. The given separators determine what
   * characters the resulting expression breaks on for glob expressions such as * which should not
   * cross directory boundaries.
   *
   * <p>Basic conversions (assuming / as only separator):
   *
   * <pre>{@code
   * ?        = [^/]
   * *        = [^/]*
   * **       = .*
   * [a-z]    = [[^/]&&[a-z]]
   * [!a-z]   = [[^/]&&[^a-z]]
   * {a,b,c}  = (a|b|c)
   * }</pre>
   */
  public static String toRegex(String glob, String separators) {
    return new GlobToRegex(glob, separators).convert();
  }

  private static final InternalCharMatcher REGEX_RESERVED
      = InternalCharMatcher.anyOf("^$.?+*\\[]{}()");

  private final String glob;
  private final String separators;
  private final InternalCharMatcher separatorMatcher;

  private final StringBuilder builder = new StringBuilder();
  private final Deque<State> states = new ArrayDeque<>();
  private int index;

  private GlobToRegex(String glob, String separators) {
    this.glob = checkNotNull(glob);
    this.separators = separators;
    this.separatorMatcher = InternalCharMatcher.anyOf(separators);
  }

  /**
   * Converts the glob to a regex one character at a time. A state stack (states) is maintained,
   * with the state at the top of the stack being the current state at any given time. The current
   * state is always used to process the next character. When a state processes a character, it may
   * pop the current state or push a new state as the current state. The resulting regex is written
   * to {@code builder}.
   */
  private String convert() {
    pushState(NORMAL);
    for (index = 0; index < glob.length(); index++) {
      currentState().process(this, glob.charAt(index));
    }
    currentState().finish(this);
    return builder.toString();
  }

  /**
   * Enters the given state. The current state becomes the previous state.
   */
  private void pushState(State state) {
    states.push(state);
  }

  /**
   * Returns to the previous state.
   */
  private void popState() {
    states.pop();
  }

  /**
   * Returns the current state.
   */
  private State currentState() {
    return states.peek();
  }

  /**
   * Throws a {@link PatternSyntaxException}.
   */
  private PatternSyntaxException syntaxError(String desc) {
    throw new PatternSyntaxException(desc, glob, index);
  }

  /**
   * Appends the given character as-is to the regex.
   */
  private void appendExact(char c) {
    builder.append(c);
  }

  /**
   * Appends the regex form of the given normal character or separator from the glob.
   */
  private void append(char c) {
    if (separatorMatcher.matches(c)) {
      appendSeparator();
    } else {
      appendNormal(c);
    }
  }

  /**
   * Appends the regex form of the given normal character from the glob.
   */
  private void appendNormal(char c) {
    if (REGEX_RESERVED.matches(c)) {
      builder.append('\\');
    }
    builder.append(c);
  }

  /**
   * Appends the regex form matching the separators for the path type.
   */
  private void appendSeparator() {
    if (separators.length() == 1) {
      appendNormal(separators.charAt(0));
    } else {
      builder.append('[');
      for (int i = 0; i < separators.length(); i++) {
        appendInBracket(separators.charAt(i));
      }
      builder.append("]");
    }
  }

  /**
   * Appends the regex form that matches anything except the separators for the path type.
   */
  private void appendNonSeparator() {
    builder.append("[^");
    for (int i = 0; i < separators.length(); i++) {
      appendInBracket(separators.charAt(i));
    }
    builder.append(']');
  }

  /**
   * Appends the regex form of the glob ? character.
   */
  private void appendQuestionMark() {
    appendNonSeparator();
  }

  /**
   * Appends the regex form of the glob * character.
   */
  private void appendStar() {
    appendNonSeparator();
    builder.append('*');
  }

  /**
   * Appends the regex form of the glob ** pattern.
   */
  private void appendStarStar() {
    builder.append(".*");
  }

  /**
   * Appends the regex form of the start of a glob [] section.
   */
  private void appendBracketStart() {
    builder.append('[');
    appendNonSeparator();
    builder.append("&&[");
  }

  /**
   * Appends the regex form of the end of a glob [] section.
   */
  private void appendBracketEnd() {
    builder.append("]]");
  }

  /**
   * Appends the regex form of the given character within a glob [] section.
   */
  private void appendInBracket(char c) {
    // escape \ in regex character class
    if (c == '\\') {
      builder.append('\\');
    }

    builder.append(c);
  }

  /**
   * Appends the regex form of the start of a glob {} section.
   */
  private void appendCurlyBraceStart() {
    builder.append('(');
  }

  /**
   * Appends the regex form of the separator (,) within a glob {} section.
   */
  private void appendSubpatternSeparator() {
    builder.append('|');
  }

  /**
   * Appends the regex form of the end of a glob {} section.
   */
  private void appendCurlyBraceEnd() {
    builder.append(')');
  }

  /**
   * Converter state.
   */
  private abstract static class State {
    /**
     * Process the next character with the current state, transitioning the converter to a new
     * state if necessary.
     */
    abstract void process(GlobToRegex converter, char c);

    /**
     * Called after all characters have been read.
     */
    void finish(GlobToRegex converter) {
    }
  }

  /**
   * Normal state.
   */
  private static final State NORMAL = new State() {
    @Override
    void process(GlobToRegex converter, char c) {
      switch (c) {
        case '?':
          converter.appendQuestionMark();
          return;
        case '[':
          converter.appendBracketStart();
          converter.pushState(BRACKET_FIRST_CHAR);
          return;
        case '{':
          converter.appendCurlyBraceStart();
          converter.pushState(CURLY_BRACE);
          return;
        case '*':
          converter.pushState(STAR);
          return;
        case '\\':
          converter.pushState(ESCAPE);
          return;
        default:
          converter.append(c);
      }
    }

    @Override
    public String toString() {
      return "NORMAL";
    }
  };

  /**
   * State following the reading of a single \.
   */
  private static final State ESCAPE = new State() {
    @Override
    void process(GlobToRegex converter, char c) {
      converter.append(c);
      converter.popState();
    }

    @Override
    void finish(GlobToRegex converter) {
      throw converter.syntaxError("Hanging escape (\\) at end of pattern");
    }

    @Override
    public String toString() {
      return "ESCAPE";
    }
  };

  /**
   * State following the reading of a single *.
   */
  private static final State STAR = new State() {
    @Override
    void process(GlobToRegex converter, char c) {
      if (c == '*') {
        converter.appendStarStar();
        converter.popState();
      } else {
        converter.appendStar();
        converter.popState();
        converter.currentState().process(converter, c);
      }
    }

    @Override
    void finish(GlobToRegex converter) {
      converter.appendStar();
    }

    @Override
    public String toString() {
      return "STAR";
    }
  };

  /**
   * State immediately following the reading of a [.
   */
  private static final State BRACKET_FIRST_CHAR = new State() {
    @Override
    void process(GlobToRegex converter, char c) {
      if (c == ']') {
        // A glob like "[]]" or "[]q]" is apparently fine in Unix (when used with ls for example)
        // but doesn't work for the default java.nio.file implementations. In the cases of "[]]" it
        // produces:
        // java.util.regex.PatternSyntaxException: Unclosed character class near index 13
        // ^[[^/]&&[]]\]$
        //              ^
        // The error here is slightly different, but trying to make this work would require some
        // kind of lookahead and break the simplicity of char-by-char conversion here. Also, if
        // someone wants to include a ']' inside a character class, they should escape it.
        throw converter.syntaxError("Empty []");
      }
      if (c == '!') {
        converter.appendExact('^');
      } else if (c == '-') {
        converter.appendExact(c);
      } else {
        converter.appendInBracket(c);
      }
      converter.popState();
      converter.pushState(BRACKET);
    }

    @Override
    void finish(GlobToRegex converter) {
      throw converter.syntaxError("Unclosed [");
    }

    @Override
    public String toString() {
      return "BRACKET_FIRST_CHAR";
    }
  };

  /**
   * State inside [brackets], but not at the first character inside the brackets.
   */
  private static final State BRACKET = new State() {
    @Override
    void process(GlobToRegex converter, char c) {
      if (c == ']') {
        converter.appendBracketEnd();
        converter.popState();
      } else {
        converter.appendInBracket(c);
      }
    }

    @Override
    void finish(GlobToRegex converter) {
      throw converter.syntaxError("Unclosed [");
    }

    @Override
    public String toString() {
      return "BRACKET";
    }
  };

  /**
   * State inside {curly braces}.
   */
  private static final State CURLY_BRACE = new State() {
    @Override
    void process(GlobToRegex converter, char c) {
      switch (c) {
        case '?':
          converter.appendQuestionMark();
          break;
        case '[':
          converter.appendBracketStart();
          converter.pushState(BRACKET_FIRST_CHAR);
          break;
        case '{':
          throw converter.syntaxError("{ not allowed in subpattern group");
        case '*':
          converter.pushState(STAR);
          break;
        case '\\':
          converter.pushState(ESCAPE);
          break;
        case '}':
          converter.appendCurlyBraceEnd();
          converter.popState();
          break;
        case ',':
          converter.appendSubpatternSeparator();
          break;
        default:
          converter.append(c);
      }
    }

    @Override
    void finish(GlobToRegex converter) {
      throw converter.syntaxError("Unclosed {");
    }

    @Override
    public String toString() {
      return "CURLY_BRACE";
    }
  };
}
TOP

Related Classes of com.google.common.jimfs.GlobToRegex$State

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.