/*
* Copyright 2004-2010 Brian S O'Neill
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.cojen.util;
import java.io.PrintStream;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import java.security.AccessController;
import java.security.PrivilegedAction;
import org.cojen.classfile.CodeBuilder;
import org.cojen.classfile.Label;
import org.cojen.classfile.LocalVariable;
import org.cojen.classfile.MethodInfo;
import org.cojen.classfile.Modifiers;
import org.cojen.classfile.Opcode;
import org.cojen.classfile.RuntimeClassFile;
import org.cojen.classfile.TypeDesc;
/**
* Provides fast matching of strings against patterns containing wildcards.
* An ordinary map must be supplied in order to create a PatternMatcher. The
* map keys must be strings. Asterisks (*) are treated as wildcard characters.
*
* @author Brian S O'Neill
*/
public abstract class PatternMatcher<V> {
private static final int[] NO_POSITIONS = new int[0];
// Maps pattern sets to auto-generated classes.
private static Cache cPatternMatcherClasses = new SoftValueCache(17);
public static synchronized <V> PatternMatcher<V> forPatterns(Map<String, V> patternMap) {
final Maker maker = new Maker(patternMap);
final Class clazz = (Class)cPatternMatcherClasses.get(maker.getKey());
return AccessController.doPrivileged(new PrivilegedAction<PatternMatcher<V>>() {
public PatternMatcher<V> run() {
Class clz = clazz;
if (clz == null) {
clz = maker.createClassFile().defineClass();
cPatternMatcherClasses.put(maker.getKey(), clz);
}
try {
Constructor ctor = clz.getConstructor(new Class[]{Object[].class});
return (PatternMatcher)ctor.newInstance(new Object[]{maker.getMappedValues()});
} catch (NoSuchMethodException e) {
throw new InternalError(e.toString());
} catch (InstantiationException e) {
throw new InternalError(e.toString());
} catch (IllegalAccessException e) {
throw new InternalError(e.toString());
} catch (InvocationTargetException e) {
throw new InternalError(e.toString());
}
}
});
}
protected final V[] mValues;
protected PatternMatcher(V[] values) {
mValues = values;
}
/**
* Returns null if no match.
*/
public Result<V> getMatch(String lookup) {
int strLen = lookup.length();
char[] chars = new char[strLen + 1];
lookup.getChars(0, strLen, chars, 0);
chars[strLen] = '\uffff';
TinyList resultList = new TinyList();
fillMatchResults(chars, 1, resultList);
return (Result)resultList.mElement;
}
/**
* Returns an empty array if no matches.
*
* @param limit maximum number of results to return
*/
public Result<V>[] getMatches(String lookup, int limit) {
int strLen = lookup.length();
char[] chars = new char[strLen + 1];
lookup.getChars(0, strLen, chars, 0);
chars[strLen] = '\uffff';
List resultList = new ArrayList();
fillMatchResults(chars, limit, resultList);
return (Result[])resultList.toArray(new Result[resultList.size()]);
}
protected abstract void fillMatchResults(char[] lookup,
int limit, List results);
// Returns false if no more results should be added.
protected static boolean addMatchResult(int limit,
List results,
String pattern,
Object value,
int[] positions,
int len)
{
int size = results.size();
if (size < limit) {
if (positions == null || len == 0) {
positions = NO_POSITIONS;
} else {
int[] original = positions;
positions = new int[len];
for (int i=0; i<len; i++) {
positions[i] = original[i];
}
}
results.add(new Result(pattern, value, positions));
return size + 1 < limit;
} else {
return false;
}
}
public static class Result<V> {
private final String mPattern;
private final V mValue;
private final int[] mPositions;
Result(String pattern, V value, int[] positions) {
mPattern = pattern;
mValue = value;
mPositions = positions;
}
public String getPattern() {
return mPattern;
}
/**
* Returns the value associated with the matched pattern.
*/
public V getValue() {
return mValue;
}
/**
* Returns the indexes used to parse the lookup string at wildcard
* positions in order for it to match the pattern. Array length is
* always double the number of wildcards in the pattern. Every even
* element is the start index (inclusive) of a wildcard match, and
* every odd element is the end index (exclusive) of a wildcard match.
*/
public int[] getWildcardPositions() {
return mPositions;
}
}
private static class Maker {
private PatternNode mPatternRoot;
private Object mKey;
private Object[] mMappedValues;
private int mMaxWildPerKey;
private TypeDesc mIntType;
private TypeDesc mBooleanType;
private TypeDesc mListType;
private TypeDesc mStringType;
private TypeDesc mObjectType;
private TypeDesc mIntArrayType;
private CodeBuilder mBuilder;
private LocalVariable mLookupLocal;
private LocalVariable mLimitLocal;
private LocalVariable mResultsLocal;
private LocalVariable mPositionsLocal;
private LocalVariable mIndexLocal;
private Stack mTempLocals;
private Label mReturnLabel;
private int mReferenceLine;
Maker(Map patternMap) {
String[] keys = (String[])patternMap.keySet().toArray(new String[0]);
for (int i=0; i<keys.length; i++) {
String key = keys[i];
// Ensure terminating patterns end in the special
// terminator char.
if (!key.endsWith("*")) {
keys[i] = key.concat("\uffff");
}
}
// Sort the keys in a special order that ensures correct
// "closest match" semantics.
Arrays.sort(keys, new PatternComparator());
mMappedValues = new Object[keys.length];
for (int i=0; i<keys.length; i++) {
String key = keys[i];
if (key.endsWith("\uffff")) {
key = key.substring(0, key.length() - 1);
}
mMappedValues[i] = patternMap.get(key);
}
// Build tree structure for managing pattern matching.
mPatternRoot = new PatternNode();
for (int i=0; i<keys.length; i++) {
String key = keys[i];
mPatternRoot.buildPathTo(key, i);
}
mMaxWildPerKey = mPatternRoot.getMaxWildcardCount();
mKey = KeyFactory.createKey(keys);
}
public Object getKey() {
return mKey;
}
public Object getMappedValues() {
return mMappedValues;
}
public RuntimeClassFile createClassFile() {
RuntimeClassFile cf = new RuntimeClassFile
(PatternMatcher.class.getName(),
PatternMatcher.class.getName(),
PatternMatcher.class.getClassLoader());
cf.markSynthetic();
cf.setSourceFile(PatternMatcher.class.getName());
// constructor
TypeDesc objectArrayType = TypeDesc.OBJECT.toArrayType();
TypeDesc[] params = {objectArrayType};
MethodInfo mi = cf.addConstructor(Modifiers.PUBLIC, params);
mBuilder = new CodeBuilder(mi);
mBuilder.loadThis();
mBuilder.loadLocal(mBuilder.getParameter(0));
mBuilder.invokeSuperConstructor(params);
mBuilder.returnVoid();
mIntType = TypeDesc.INT;
mBooleanType = TypeDesc.BOOLEAN;
mListType = TypeDesc.forClass(List.class);
mStringType = TypeDesc.STRING;
mObjectType = TypeDesc.OBJECT;
mIntArrayType = TypeDesc.INT.toArrayType();
// fillMatchResults method
TypeDesc charArrayType = TypeDesc.CHAR.toArrayType();
params = new TypeDesc[]{charArrayType, mIntType, mListType};
mi = cf.addMethod(Modifiers.PUBLIC, "fillMatchResults", null, params);
mBuilder = new CodeBuilder(mi);
mLookupLocal = mBuilder.getParameter(0);
mLimitLocal = mBuilder.getParameter(1);
mResultsLocal = mBuilder.getParameter(2);
mPositionsLocal = mBuilder.createLocalVariable("positions", mIntArrayType);
mIndexLocal = mBuilder.createLocalVariable("index", mIntType);
mBuilder.mapLineNumber(++mReferenceLine);
mBuilder.loadConstant(mMaxWildPerKey * 2);
mBuilder.newObject(mIntArrayType);
mBuilder.storeLocal(mPositionsLocal);
mBuilder.loadConstant(0);
mBuilder.storeLocal(mIndexLocal);
mTempLocals = new Stack();
mReturnLabel = mBuilder.createLabel();
generateBranches(mPatternRoot, -1, 0);
mReturnLabel.setLocation();
mBuilder.returnVoid();
return cf;
}
private void generateBranches(PatternNode node, int depth,
int posIndex) {
generateBranches(node, depth, posIndex, null);
}
private void generateBranches(PatternNode node, int depth,
int posIndex,
LocalVariable tempChar) {
int c = node.mChar;
List subNodes = node.mSubNodes;
mBuilder.mapLineNumber(++mReferenceLine);
if (c == '*') {
LocalVariable savedIndex;
if (mTempLocals.isEmpty()) {
savedIndex =
mBuilder.createLocalVariable("temp", mIntType);
} else {
savedIndex = (LocalVariable)mTempLocals.pop();
}
mBuilder.loadLocal(mIndexLocal);
mBuilder.storeLocal(savedIndex);
// Save position of wildcard start.
mBuilder.loadLocal(mPositionsLocal);
mBuilder.loadConstant(posIndex);
mBuilder.loadLocal(mIndexLocal);
if (depth > 0) {
mBuilder.loadConstant(depth);
mBuilder.math(Opcode.IADD);
}
mBuilder.storeToArray(TypeDesc.INT);
if (subNodes == null) {
generateWildcard(null, depth, posIndex + 2);
} else {
int size = subNodes.size();
for (int i=0; i<size; i++) {
generateWildcard((PatternNode)subNodes.get(i),
depth, posIndex + 2);
mBuilder.loadLocal(savedIndex);
mBuilder.storeLocal(mIndexLocal);
}
}
mTempLocals.push(savedIndex);
if (node.mPattern != null) {
generateAddMatchResult(node);
}
return;
}
Label noMatch = mBuilder.createLabel();
if (c >= 0) {
if (tempChar != null) {
mBuilder.loadLocal(tempChar);
mTempLocals.push(tempChar);
} else {
mBuilder.loadLocal(mLookupLocal);
mBuilder.loadLocal(mIndexLocal);
if (depth > 0) {
mBuilder.loadConstant(depth);
mBuilder.math(Opcode.IADD);
}
mBuilder.loadFromArray(TypeDesc.CHAR);
}
mBuilder.loadConstant((char)c);
mBuilder.ifComparisonBranch(noMatch, "!=");
}
if (subNodes != null) {
int size = subNodes.size();
for (int i=0; i<size; i++) {
generateBranches
((PatternNode)subNodes.get(i), depth + 1, posIndex);
}
}
if (node.mPattern != null) {
// Matched pattern; save results.
generateAddMatchResult(node);
}
noMatch.setLocation();
}
private void generateWildcard(PatternNode node, int depth,
int posIndex) {
Label loopStart = mBuilder.createLabel().setLocation();
Label loopEnd = mBuilder.createLabel();
Label loopContinue = mBuilder.createLabel();
// Save position of wildcard end.
mBuilder.loadLocal(mPositionsLocal);
mBuilder.loadConstant(posIndex - 1);
mBuilder.loadLocal(mIndexLocal);
if (depth > 0) {
mBuilder.loadConstant(depth);
mBuilder.math(Opcode.IADD);
}
mBuilder.storeToArray(TypeDesc.INT);
mBuilder.loadLocal(mLookupLocal);
mBuilder.loadLocal(mIndexLocal);
if (depth > 0) {
mBuilder.loadConstant(depth);
mBuilder.math(Opcode.IADD);
}
mBuilder.loadFromArray(TypeDesc.CHAR);
if (node == null) {
mBuilder.loadConstant('\uffff');
mBuilder.ifComparisonBranch(loopEnd, "==");
} else {
LocalVariable tempChar;
if (mTempLocals.isEmpty()) {
tempChar =
mBuilder.createLocalVariable("temp", mIntType);
} else {
tempChar = (LocalVariable)mTempLocals.pop();
}
mBuilder.storeLocal(tempChar);
mBuilder.loadLocal(tempChar);
mBuilder.loadConstant('\uffff');
mBuilder.ifComparisonBranch(loopEnd, "==");
generateBranches(node, depth, posIndex, tempChar);
}
loopContinue.setLocation();
mBuilder.integerIncrement(mIndexLocal, 1);
mBuilder.branch(loopStart);
loopEnd.setLocation();
}
private void generateAddMatchResult(PatternNode node) {
mBuilder.mapLineNumber(++mReferenceLine);
mBuilder.loadLocal(mLimitLocal);
mBuilder.loadLocal(mResultsLocal);
mBuilder.loadConstant(node.mPattern);
mBuilder.loadThis();
mBuilder.loadField("mValues", TypeDesc.OBJECT.toArrayType());
mBuilder.loadConstant(node.mOrder);
mBuilder.loadFromArray(TypeDesc.OBJECT);
mBuilder.loadLocal(mPositionsLocal);
mBuilder.loadConstant(node.getWildcardCount() * 2);
TypeDesc[] params = {
mIntType,
mListType,
mStringType,
mObjectType,
mIntArrayType,
mIntType
};
mBuilder.invokeStatic(PatternMatcher.class.getName(),
"addMatchResult", mBooleanType, params);
mBuilder.ifZeroComparisonBranch(mReturnLabel, "==");
}
}
private static class PatternNode {
public final int mChar;
public String mPattern;
public int mOrder;
public List mSubNodes;
public PatternNode() {
mChar = -1;
}
public PatternNode(char c) {
mChar = c;
}
public void buildPathTo(String pattern, int order) {
buildPathTo(pattern, order, 0);
}
public int getHeight() {
int height = 1;
if (mSubNodes != null) {
int size = mSubNodes.size();
for (int i=0; i<size; i++) {
int subH = ((PatternNode)mSubNodes.get(i)).getHeight();
if (subH > height) {
height = subH;
}
}
}
return height;
}
public int getWildcardCount() {
int wildCount = 0;
String pattern = mPattern;
if (pattern != null) {
int len = pattern.length();
for (int i=0; i<len; i++) {
if (pattern.charAt(i) == '*') {
wildCount++;
}
}
}
return wildCount;
}
public int getMaxWildcardCount() {
int wildCount = getWildcardCount();
if (mSubNodes != null) {
for (int i=0; i<mSubNodes.size(); i++) {
int count =
((PatternNode)mSubNodes.get(i)).getMaxWildcardCount();
if (count > wildCount) {
wildCount = count;
}
}
}
return wildCount;
}
private void buildPathTo(String pattern, int order, int index) {
if (index >= pattern.length()) {
if (pattern.endsWith("\uffff")) {
// Trim off the '\uffff'.
pattern = pattern.substring(0, pattern.length() - 1);
}
mPattern = pattern;
mOrder = order;
return;
}
char c = pattern.charAt(index);
if (mSubNodes == null) {
mSubNodes = new ArrayList(10);
}
int size = mSubNodes.size();
for (int i=0; i<size; i++) {
PatternNode node = (PatternNode)mSubNodes.get(i);
if (node.mChar == c) {
node.buildPathTo(pattern, order, index + 1);
return;
}
}
PatternNode node = new PatternNode(c);
mSubNodes.add(node);
node.buildPathTo(pattern, order, index + 1);
return;
}
public void dump(PrintStream out, String indent) {
if (mSubNodes != null) {
String subIndent = indent.concat(" ");
for (int i=0; i<mSubNodes.size(); i++) {
((PatternNode)mSubNodes.get(i)).dump(out, subIndent);
}
}
out.print(indent);
out.print('\'');
out.print((char)mChar);
out.print('\'');
if (mPattern != null) {
out.print(" -> ");
out.print(mPattern);
}
out.println();
}
}
private static class PatternComparator implements Comparator {
public int compare(Object a, Object b) {
String sa = (String)a;
String sb = (String)b;
int alen = sa.length();
int blen = sb.length();
int mlen = Math.min(alen, blen);
for (int i=0; i<mlen; i++) {
char ca = sa.charAt(i);
char cb = sb.charAt(i);
if (ca == '*') {
if (cb != '*') {
// Wildcard sorted high.
return 1;
}
} else if (cb == '*') {
// Wildcard sorted high.
return -1;
} else if (ca < cb) {
return -1;
} else if (ca > cb) {
return 1;
}
}
// The shorter string is sorted high.
if (alen < blen) {
return 1;
} else if (alen > blen) {
return -1;
}
return 0;
}
}
private static class TinyList extends AbstractList {
public Object mElement;
public int size() {
return mElement == null ? 0 : 1;
}
public boolean add(Object obj) {
if (mElement == null) {
mElement = obj;
return true;
} else {
throw new UnsupportedOperationException();
}
}
public Object get(int index) {
if (index == 0 && mElement != null) {
return mElement;
} else {
throw new IndexOutOfBoundsException();
}
}
}
/* Sample auto-generated method.
protected void fillMatchResults(char[] lookup, int limit, List results) {
int[] positions = new int[2]; // At least as large as number of wildcards, times 2.
int i = 0;
if (lookup[i + 0] == '/') {
if (lookup[i + 1] == 'a') {
if (lookup[i + 2] == 'd') {
if (lookup[i + 3] == 'm') {
if (lookup[i + 4] == 'i') {
if (lookup[i + 5] == 'n') {
if (lookup[i + 6] == '2') {
if (lookup[i + 7] == '\uffff') {
addMatchResult(limit, results, "/admin2", mValues[0], null, 0);
}
} else if (lookup[i + 6] == '\uffff') {
addMatchResult(limit, results, "/admin", mValues[1], null, 0);
}
}
}
}
}
} else if (lookup[i + 1] == 't') {
if (lookup[i + 2] == 'e') {
if (lookup[i + 3] == 'a') {
if (lookup[i + 4] == '/') {
// Wildcard pattern. Consume characters until match found.
int saved_i = i;
positions[0] = i + 5;
while (true) {
positions[1] = i + 5;
char c = lookup[i + 5];
if (c == '\uffff') {
break;
} else if (c == '.') {
if (lookup[i + 6] == 'h') {
if (lookup[i + 7] == 't') {
if (lookup[i + 8] == 'm') {
if (lookup[i + 9] == 'l') {
if (lookup[i + 10] == '\uffff') {
addMatchResult(limit, results, "/tea/*.html", mValues[2], positions, 2);
}
}
}
}
}
}
i++;
}
i = saved_i;
addMatchResult(limit, results, "/tea/*", mValues[3], positions, 2);
}
}
}
}
}
// Wildcard pattern. Consume characters until match found.
int saved_i = i;
positions[0] = i;
while (true) {
positions[1] = i;
char c = lookup[i];
if (c == '\uffff') {
break;
} else if (c == '.') {
if (lookup[i + 1] == 'h') {
if (lookup[i + 2] == 't') {
if (lookup[i + 3] == 'm') {
if (lookup[i + 4] == 'l') {
if (lookup[i + 5] == '\uffff') {
addMatchResult(limit, results, "*.html", mValues[4], positions, 2);
}
}
}
}
}
}
i++;
}
i = saved_i;
addMatchResult(limit, results, "*", mValues[5], positions, 2);
}
*/
}