// Copyright 2012 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.collide.shared.util;
import com.google.gwt.regexp.shared.MatchResult;
import com.google.gwt.regexp.shared.RegExp;
/**
*/
public class RegExpUtils {
private static final RegExp regexpEscape = RegExp.compile("[.*?$^+|()\\[\\]{}\\\\]", "g");
private static final RegExp regexpWildcardEscape = RegExp.compile("[.$^+|()\\[\\]{}\\\\]", "g");
/**
* Escapes all regexp special characters in a string .+?*|()[]{}\
*
* @param pattern
* @return regexp safe string
*/
public static String escape(String pattern) {
return regexpEscape.replace(pattern, "\\$&");
}
/**
* Creates a regexp which will only allow a wildcard type search and will
* escape other extraneous regexp characters. IE: hel?? w*ld
*
* All regex characters in the pattern will be escaped. \S is substituted for
* ? and \S+ is substituted for *. Wildcard's can be escaped using backslashes
* so that they are treated as literals; likewise, backslashes immediately
* preceding a wildcard can be escaped so as to match a literal backslash.
* Backslashes not preceding a wildcard should not be escaped.
*
* TODO: Consider changing the regex from just \S to a class that
* more suitable for programming such as [^\s()+\[\].] or similar
*/
public static RegExp createRegExpForWildcardPattern(String wildcardPattern, String flags) {
return RegExp.compile(createRegExpStringForWildcardPattern(wildcardPattern), flags);
}
/**
* Creates a regular expression which will match the given wildcard pattern
*
* Backslashes can be used to escape a wildcard character and make it a
* literal; likewise, backslashes before wildcard characters can be escaped.
*/
private static String createRegExpStringForWildcardPattern(String wildcardPattern) {
String escaped = regexpWildcardEscape.replace(wildcardPattern, "\\$&");
/**
* We have already run the pattern through the naive regex escape which
* escapes all characters except the * and ?. This leads to double escaped \
* characters that we have to inspect to determine if the user escaped the
* wildcard or if we should replace it with it's regex equivalent.
*
* NOTE: * is replaced with \S+ (matches all non-whitespace characters) and
* ? is replaced with a single \S to match any non-whitespace
*/
RegExp mimicLookbehind = RegExp.compile("([\\\\]*)([?*])", "g");
StringBuilder wildcardStr = new StringBuilder(escaped);
for (MatchResult match = mimicLookbehind.exec(wildcardStr.toString()); match != null;
match = mimicLookbehind.exec(wildcardStr.toString())) {
// in some browsers an optional group is null, in others its empty string
if (match.getGroup(1) != null && !match.getGroup(1).isEmpty()) {
// We undo double-escaping of backslashes performed by the naive escape
int offset = match.getGroup(1).length() / 2;
wildcardStr.delete(match.getIndex(), match.getIndex() + offset);
/*
* An even number of slashes means the wildcard was not escaped so we
* must replace it with its regex equivalent.
*/
if (offset % 2 == 0) {
if (match.getGroup(2).equals("?")) {
wildcardStr.replace(match.getIndex() + offset, match.getIndex() + offset + 1, "\\S");
// we added 1 more character, so we remove 1 less from the index
offset -= 1;
} else {
wildcardStr.replace(match.getIndex() + offset, match.getIndex() + offset + 1, "\\S+");
// we added 2 characters, so we need to remove 2 less from the index
offset -= 2;
}
}
mimicLookbehind.setLastIndex(mimicLookbehind.getLastIndex() - offset);
} else if (match.getGroup(2).equals("?")) {
wildcardStr.replace(match.getIndex(), match.getIndex() + 1, "\\S");
mimicLookbehind.setLastIndex(mimicLookbehind.getLastIndex() + 1);
} else {
wildcardStr.replace(match.getIndex(), match.getIndex() + 1, "\\S+");
mimicLookbehind.setLastIndex(mimicLookbehind.getLastIndex() + 2);
}
}
return wildcardStr.toString();
}
/**
* Returns the number of matches found in a string by a regexp. If the regexp
* is not a global regexp this will return a maximum of 1. This does not
* setLastIndex(0) automatically, you must do it manually.
*
* @returns number of matches
*/
public static int getNumberOfMatches(RegExp regexp, String input) {
if (regexp == null || input == null || input.isEmpty()) {
return 0;
}
// if we don't check here we will loop forever
if (!regexp.getGlobal()) {
return regexp.test(input) ? 1 : 0;
}
int matches = 0;
for (MatchResult result = regexp.exec(input);
result != null && result.getGroup(0).length() != 0; result = regexp.exec(input)) {
matches++;
}
return matches;
}
public static MatchResult findMatchBeforeIndex(
RegExp regexp, String text, int exclusiveEndIndex) {
regexp.setLastIndex(0);
// Find the last match without going over our startIndex
MatchResult lastMatch = null;
for (MatchResult result = regexp.exec(text);
result != null && result.getIndex() < exclusiveEndIndex; result = regexp.exec(text)) {
lastMatch = result;
}
return lastMatch;
}
/**
* Find the next match after exclusiveStartIndex.
*/
public static MatchResult findMatchAfterIndex(
RegExp regexp, String text, int exclusiveStartIndex) {
regexp.setLastIndex(exclusiveStartIndex + 1);
return regexp.exec(text);
}
/**
* Resets the RegExp lastIndex to 0 and returns the number of matches found in
* a string by a regexp. If the regexp is not a global regexp this will return
* a maximum of 1. This does not setLastIndex(0) automatically, you must do it
* manually.
*
* @returns number of matches
*/
public static int resetAndGetNumberOfMatches(RegExp regexp, String input) {
regexp.setLastIndex(0);
return getNumberOfMatches(regexp, input);
}
/**
* Resets the RegExp lastIndex to 0 before testing. This is only useful for
* global RegExps.
*/
public static boolean resetAndTest(RegExp regexp, String input) {
regexp.setLastIndex(0);
return regexp.test(input);
}
}