Package org.apache.cocoon.sitemap.util

Source Code of org.apache.cocoon.sitemap.util.WildcardMatcherHelper$Matcher

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.cocoon.sitemap.util;

import java.util.HashMap;
import java.util.Map;

import com.sun.org.apache.regexp.internal.RE;
import com.sun.org.apache.regexp.internal.RECompiler;
import com.sun.org.apache.regexp.internal.REProgram;
import com.sun.org.apache.regexp.internal.RESyntaxException;

/**
* This class is an utility class that perform wildcard-patterns matching and
* isolation.
*
* @version $Id: WildcardMatcherHelper.java 696953 2008-09-19 07:19:44Z reinhard $
*/
public class WildcardMatcherHelper {

    // ~ Static fields/initializers
    // -----------------------------------------------------------------

    /** Default path separator: "/" */
    public static final char ESC = '\\';

    /** Default path separator: "/" */
    public static final char PATHSEP = '/';

    /** Default path separator: "/" */
    public static final char STAR = '*';

    // ~ Methods
    // ------------------------------------------------------------------------------------

    /**
     * Match a pattern agains a string and isolates wildcard replacement into a
     * <code>Map</code>. <br>
     * Here is how the matching algorithm works:
     *
     * <ul>
     * <li> The '*' character, meaning that zero or more characters (excluding
     * the path separator '/') are to be matched. </li>
     * <li> The '**' sequence, meaning that zero or more characters (including
     * the path separator '/') are to be matched. </li>
     * <li> The '\*' sequence is honored as a literal '*' character, not a
     * wildcard </li>
     * </ul>
     * <br>
     * When more than two '*' characters, not separated by another character,
     * are found their value is considered as '**' and immediate succeeding '*'
     * are skipped. <br>
     * The '**' wildcard is greedy and thus the following sample matches as
     * {"foo/bar","baz","bug"}:
     * <dl>
     * <dt>pattern</dt>
     * <dd>STAR,STAR,PATHSEP,STAR,PATHSEP,STAR,STAR (why can't I express it
     * literally?)</dt>
     * <dt>string</dt>
     * <dd>foo/bar/baz/bug</dt>
     * </dl>
     * The first '**' in the pattern will suck up as much as possible without
     * making the match fail.
     *
     * @param pat
     *            The pattern string.
     * @param str
     *            The string to math agains the pattern
     *
     * @return a <code>Map</code> containing the representation of the
     *         extracted pattern. The extracted patterns are keys in the
     *         <code>Map</code> from left to right beginning with "1" for te
     *         left most, "2" for the next, a.s.o. The key "0" is the string
     *         itself. If the return value is null, string does not match to the
     *         pattern .
     */
    public static Map<String, String> match(final String pat, final String str) {
        Matcher matcher;
        synchronized (cache) {
            matcher = cache.get(pat);
            if (matcher == null) {
                matcher = new Matcher(pat);
                cache.put(pat, matcher);
            }
        }

        String[] list = matcher.getMatches(str);
        if (list == null) {
            return null;
        }

        int n = list.length;
        Map<String, String> map = new HashMap<String, String>(n * 2 + 1);
        for (int i = 0; i < n; i++) {
            map.put(String.valueOf(i), list[i]);
        }

        return map;
    }

    /** Cache for compiled pattern matchers */
    private static final Map<String, Matcher> cache = new HashMap<String, Matcher>();

    // ~ Inner Classes
    // ------------------------------------------------------------------------------

    /**
     * The private matcher class
     */
    private static class Matcher {

        /**
         * Regexp to split constant parts from front and back leaving wildcards
         * in the middle.
         */
        private static final REProgram splitter;

        static {
            final String fixedRE = "([^*\\\\]*)";
            final String wcardRE = "(.*[*\\\\])";
            final String splitRE = "^" + fixedRE + wcardRE + fixedRE + "$";

            try {
                splitter = new RECompiler().compile(splitRE);
            } catch (RESyntaxException e) {
                throw new RuntimeException(e);
            }
        }

        /** Wildcard types to short-cut simple '*' and "**' matches. */
        private static final int WC_CONST = 0;

        private static final int WC_STAR = 1;

        private static final int WC_STARSTAR = 2;

        private static final int WC_REGEXP = 3;

        // ~ Instance fields
        // ------------------------------------------------------------------------

        // All fields declared final to emphasize requirement to be thread-safe.

        /** Fixed text at start of pattern. */
        private final String prefix;

        /** Fixed text at end of pattern. */
        private final String suffix;

        /** Length of prefix and suffix. */
        private final int fixlen;

        /** Wildcard type of pattern. */
        private final int wctype;

        /**
         * Compiled regexp equivalent to wildcard pattern between prefix and
         * suffix.
         */
        private final REProgram regexp;

        // ~ Constructors
        // ---------------------------------------------------------------------------

        /**
         * Creates a new Matcher object.
         *
         * @param pat
         *            The pattern
         * @param str
         *            The string
         */
        Matcher(final String pat) {
            RE re = new RE(splitter);

            if (re.match(pat)) {
                // Split pattern into (foo/)(*)(/bar).
                this.prefix = re.getParen(1);
                String wildcard = re.getParen(2);
                String tail = re.getParen(3);

                // If wildcard ends with \ then add the first char of postfix to
                // wildcard.
                if (tail.length() != 0 && wildcard.charAt(wildcard.length() - 1) == ESC) {
                    wildcard = wildcard + tail.substring(0, 1);
                    this.suffix = tail.substring(1);
                } else {
                    this.suffix = tail;
                }

                // Use short-cuts for single * or ** wildcards
                if (wildcard.equals("*")) {
                    this.wctype = WC_STAR;
                    this.regexp = null;
                } else if (wildcard.equals("**")) {
                    this.wctype = WC_STARSTAR;
                    this.regexp = null;
                } else {
                    this.wctype = WC_REGEXP;
                    this.regexp = compileRegexp(wildcard);
                }
            } else {
                // Pattern is a constant without '*' or '\'.
                this.prefix = pat;
                this.suffix = "";
                this.wctype = WC_CONST;
                this.regexp = null;
            }

            this.fixlen = this.prefix.length() + this.suffix.length();
        }

        // ~ Methods
        // --------------------------------------------------------------------------------

        /**
         * Match string against pattern.
         *
         * @param str
         *            The string
         * @return list of wildcard matches, null if match failed
         */
        String[] getMatches(final String str) {
            // Protect against 'foo' matching 'foo*foo'.
            if (str.length() < this.fixlen) {
                return null;
            }

            if (!str.startsWith(this.prefix)) {
                return null;
            }

            if (!str.endsWith(this.suffix)) {
                return null;
            }

            String infix = str.substring(this.prefix.length(), str.length() - this.suffix.length());

            if (this.wctype == WC_REGEXP) {
                RE re = new RE(this.regexp);
                if (!re.match(infix)) {
                    return null;
                }

                int n = re.getParenCount();
                String[] list = new String[n];
                list[0] = str;
                for (int i = 1; i < n; i++) {
                    list[i] = re.getParen(i);
                }
                return list;
            }

            if (this.wctype == WC_CONST) {
                if (infix.length() != 0) {
                    return null;
                }
                return new String[] { str };
            }

            if (this.wctype == WC_STAR) {
                if (infix.indexOf(PATHSEP) != -1) {
                    return null;
                }
            }

            return new String[] { str, infix };
        }
    }

    /**
     * Compile wildcard pattern into regexp pattern.
     *
     * @param pat
     *            The wildcard pattern
     * @return compiled regexp program.
     */
    static REProgram compileRegexp(String pat) {
        StringBuffer repat = new StringBuffer(pat.length() * 6);
        repat.append('^');

        // Add an extra character to allow unchecked wcpat[i+1] accesses.
        // Unterminated ESC sequences are silently handled as '\\'.
        char[] wcpat = (pat + ESC).toCharArray();
        for (int i = 0, n = pat.length(); i < n; i++) {
            char ch = wcpat[i];

            if (ch == STAR) {
                if (wcpat[i + 1] != STAR) {
                    repat.append("([^/]*)");
                    continue;
                }

                // Handle two and more '*' as single '**'.
                while (wcpat[i + 1] == STAR) {
                    i++;
                }
                repat.append("(.*)");
                continue;
            }

            // Match ESC+ESC and ESC+STAR as literal ESC and STAR which needs to
            // be escaped
            // in regexp. Match ESC+other as two characters ESC+other where
            // other may also
            // need to be escaped in regexp.
            if (ch == ESC) {
                ch = wcpat[++i];
                if (ch != ESC && ch != STAR) {
                    repat.append("\\\\");
                }
            }

            if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9' || ch == '/') {
                repat.append(ch);
                continue;
            }

            repat.append('\\');
            repat.append(ch);
        }
        repat.append('$');

        try {
            return new RECompiler().compile(repat.toString());
        } catch (RESyntaxException e) {
            throw new RuntimeException(e);
        }
    }
}
TOP

Related Classes of org.apache.cocoon.sitemap.util.WildcardMatcherHelper$Matcher

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.