Source Code of org.pdf4j.saxon.regex.RegexTranslator$Range

package org.pdf4j.saxon.regex;


import org.pdf4j.saxon.charcode.UTF16;
import org.pdf4j.saxon.om.FastStringBuffer;
import org.pdf4j.saxon.sort.IntHashSet;
import org.pdf4j.saxon.trans.Err;
import org.pdf4j.saxon.value.Whitespace;


import java.math.BigDecimal;
import java.util.Collections;
import java.util.List;


/**
 * Abstract superclass for the various regex translators, which differ according to the target platform.
 */
public abstract class RegexTranslator {


    protected CharSequence regExp;
    protected int xmlVersion;
    protected boolean isXPath;
    protected boolean ignoreWhitespace;
    protected boolean inCharClassExpr;
    protected boolean caseBlind;
    protected int pos = 0;
    protected int length;
    protected char curChar;
    protected boolean eos = false;
    protected int currentCapture = 0;
    protected IntHashSet captures = new IntHashSet();
    protected final FastStringBuffer result = new FastStringBuffer(32);


    protected void translateTop() throws RegexSyntaxException {
         translateRegExp();
         if (!eos) {
             throw makeException("expected end of string");
         }
    }




    protected void translateRegExp() throws RegexSyntaxException {
        translateBranch();
        while (curChar == '|') {
            copyCurChar();
            translateBranch();
        }
    }


    protected void translateBranch() throws RegexSyntaxException {
        while (translateAtom())
            translateQuantifier();
    }


    protected abstract boolean translateAtom() throws RegexSyntaxException;


    protected void translateQuantifier() throws RegexSyntaxException {
        switch (curChar) {
            case '*':
            case '?':
            case '+':
                copyCurChar();
                break;
            case '{':
                copyCurChar();
                translateQuantity();
                expect('}');
                copyCurChar();
                break;
            default:
                return;
        }
        if (curChar == '?' && isXPath) {
            copyCurChar();
        }
    }


    protected void translateQuantity() throws RegexSyntaxException {
        String lower = parseQuantExact().toString();
        int lowerValue = -1;
        try {
            lowerValue = Integer.parseInt(lower);
            result.append(lower);
        } catch (NumberFormatException e) {
            // JDK 1.4 cannot handle ranges bigger than this
            result.append("" + Integer.MAX_VALUE);
        }
        if (curChar == ',') {
            copyCurChar();
            if (curChar != '}') {
                String upper = parseQuantExact().toString();
                try {
                    int upperValue = Integer.parseInt(upper);
                    result.append(upper);
                    if (lowerValue < 0 || upperValue < lowerValue)
                        throw makeException("invalid range in quantifier");
                } catch (NumberFormatException e) {
                    result.append("" + Integer.MAX_VALUE);
                    if (lowerValue < 0 && new BigDecimal(lower).compareTo(new BigDecimal(upper)) > 0)
                        throw makeException("invalid range in quantifier");
                }
            }
        }
    }


    protected CharSequence parseQuantExact() throws RegexSyntaxException {
        FastStringBuffer buf = new FastStringBuffer(10);
        do {
            if ("0123456789".indexOf(curChar) < 0)
                throw makeException("expected digit in quantifier");
            buf.append(curChar);
            advance();
        } while (curChar != ',' && curChar != '}');
        return buf;
    }


    protected void copyCurChar() {
        result.append(curChar);
        advance();
    }


    public static final int NONE = -1;
    public static final int SOME = 0;
    public static final int ALL = 1;


    public static final String SURROGATES1_CLASS = "[\uD800-\uDBFF]";
    public static final String SURROGATES2_CLASS = "[\uDC00-\uDFFF]";
    public static final String NOT_ALLOWED_CLASS = "[\u0000&&[^\u0000]]";


    /**
     * A Range represents a range of consecutive Unicode codepoints
     */


    public static final class Range implements Comparable {
        private final int min;
        private final int max;


        /**
         * Create a range of unicode codepoints
         * @param min the first codepoint in the range
         * @param max the last codepoint in the range
         */


        public Range(int min, int max) {
            this.min = min;
            this.max = max;
        }


        /**
         * Get the start of the range
         * @return the first codepoint in the range
         */


        public int getMin() {
            return min;
        }


        /**
         * Get the end of the range
         * @return the last codepoint in the range
         */


        public int getMax() {
            return max;
        }


        /**
         * Compare this range with another range for ordering purposes. If the two ranges have different
         * start points, the order is the order of the start points; otherwise it is the order of the end
         * points.
         * @param o the other range
         * @return -1 if this range comes first, +1 if the other range comes first, 0 if they are equal
         * (start and end both equal)
         */


        public int compareTo(Object o) {
            Range other = (Range) o;
            if (min < other.min)
                return -1;
            if (min > other.min)
                return 1;
            if (max > other.max)
                return -1;
            if (max < other.max)
                return 1;
            return 0;
        }
    }




    protected void advance() {
        if (pos < length) {
            curChar = regExp.charAt(pos++);
            if (ignoreWhitespace && !inCharClassExpr) {
                while (Whitespace.isWhitespace(curChar)) {
                    advance();
                }
            }
        } else {
            pos++;
            curChar = RegexData.EOS;
            eos = true;
        }
    }


    protected int absorbSurrogatePair() throws RegexSyntaxException {
        if (UTF16.isSurrogate(curChar)) {
            if (!UTF16.isHighSurrogate(curChar))
                throw makeException("invalid surrogate pair");
            char c1 = curChar;
            advance();
            if (!UTF16.isLowSurrogate(curChar))
                throw makeException("invalid surrogate pair");
            return UTF16.combinePair(c1, curChar);
        } else {
            return curChar;
        }
    }


    protected void recede() {
        // The caller must ensure we don't fall off the start of the expression
        if (eos) {
            curChar = regExp.charAt(length - 1);
            pos = length;
            eos = false;
        } else {
            curChar = regExp.charAt((--pos)-1);
        }
        if (ignoreWhitespace && !inCharClassExpr) {
            while (Whitespace.isWhitespace(curChar)) {
                recede();
            }
        }
    }


    protected void expect(char c) throws RegexSyntaxException {
        if (curChar != c) {
            throw makeException("expected", new String(new char[]{c}));
        }
    }


    protected RegexSyntaxException makeException(String key) {
        return new RegexSyntaxException("Error at character " + (pos - 1) +
                " in regular expression " + Err.wrap(regExp, Err.VALUE) + ": " + key);
    }


    protected RegexSyntaxException makeException(String key, String arg) {
        return new RegexSyntaxException("Error at character " + (pos - 1) +
                " in regular expression " + Err.wrap(regExp, Err.VALUE) + ": " + key +
                " (" + arg + ')');
    }


    protected static boolean isJavaMetaChar(int c) {
        switch (c) {
            case '\\':
            case '^':
            case '?':
            case '*':
            case '+':
            case '(':
            case ')':
            case '{':
            case '}':
            case '|':
            case '[':
            case ']':
            case '-':
            case '&':
            case '$':
            case '.':
                return true;
        }
        return false;
    }


    protected static String highSurrogateRanges(List ranges) {
        FastStringBuffer highRanges = new FastStringBuffer(ranges.size() * 2);
        for (int i = 0, len = ranges.size(); i < len; i++) {
            Range r = (Range)ranges.get(i);
            char min1 = UTF16.highSurrogate(r.getMin());
            char min2 = UTF16.lowSurrogate(r.getMin());
            char max1 = UTF16.highSurrogate(r.getMax());
            char max2 = UTF16.lowSurrogate(r.getMax());
            if (min2 != UTF16.SURROGATE2_MIN) {
                min1++;
            }
            if (max2 != UTF16.SURROGATE2_MAX) {
                max1--;
            }
            if (max1 >= min1) {
                highRanges.append(min1);
                highRanges.append(max1);
            }
        }
        return highRanges.toString();
    }


    protected static String lowSurrogateRanges(List ranges) {
        FastStringBuffer lowRanges = new FastStringBuffer(ranges.size() * 2);
        for (int i = 0, len = ranges.size(); i < len; i++) {
            Range r = (Range)ranges.get(i);
            char min1 = UTF16.highSurrogate(r.getMin());
            char min2 = UTF16.lowSurrogate(r.getMin());
            char max1 = UTF16.highSurrogate(r.getMax());
            char max2 = UTF16.lowSurrogate(r.getMax());
            if (min1 == max1) {
                if (min2 != UTF16.SURROGATE2_MIN || max2 != UTF16.SURROGATE2_MAX) {
                    lowRanges.append(min1);
                    lowRanges.append(min2);
                    lowRanges.append(max2);
                }
            } else {
                if (min2 != UTF16.SURROGATE2_MIN) {
                    lowRanges.append(min1);
                    lowRanges.append(min2);
                    lowRanges.append(UTF16.SURROGATE2_MAX);
                }
                if (max2 != UTF16.SURROGATE2_MAX) {
                    lowRanges.append(max1);
                    lowRanges.append(UTF16.SURROGATE2_MIN);
                    lowRanges.append(max2);
                }
            }
        }
        return lowRanges.toString();
    }


    protected static void sortRangeList(List ranges) {
        Collections.sort(ranges);
        int toIndex = 0;
        int fromIndex = 0;
        int len = ranges.size();
        while (fromIndex < len) {
            Range r = (Range)ranges.get(fromIndex);
            int min = r.getMin();
            int max = r.getMax();
            while (++fromIndex < len) {
                Range r2 = (Range)ranges.get(fromIndex);
                if (r2.getMin() > max + 1)
                    break;
                if (r2.getMax() > max)
                    max = r2.getMax();
            }
            if (max != r.getMax())
                r = new Range(min, max);
            ranges.set(toIndex++, r);
        }
        while (len > toIndex)
            ranges.remove(--len);
    }


    protected static boolean isBlock(String name) {
        for (int i = 0; i < RegexData.blockNames.length; i++) {
            if (name.equals(RegexData.blockNames[i])) {
                return true;
            }
        }
        return false;
    }


    protected static boolean isAsciiAlnum(char c) {
        return  'a' <= c && c <= 'z' ||
                'A' <= c && c <= 'Z' ||
                '0' <= c && c <= '9';
    }






}


//
// The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
// you may not use this file except in compliance with the License. You may obtain a copy of the
// License at http://www.mozilla.org/MPL/
//
// Software distributed under the License is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the License for the specific language governing rights and limitations under the License.
//
// The Original Code is: all this file
//
// The Initial Developer of the Original Code is Michael H. Kay.
//
// Contributor(s):
//
Source Code of org.pdf4j.saxon.regex.RegexTranslator$Range

Related Classes of org.pdf4j.saxon.regex.RegexTranslator$Range