Package org.pdf4j.saxon.functions

Source Code of org.pdf4j.saxon.functions.Matches

package org.pdf4j.saxon.functions;
import org.pdf4j.saxon.Configuration;
import org.pdf4j.saxon.Platform;
import org.pdf4j.saxon.expr.*;
import org.pdf4j.saxon.om.Item;
import org.pdf4j.saxon.regex.RegularExpression;
import org.pdf4j.saxon.trans.XPathException;
import org.pdf4j.saxon.type.ItemType;
import org.pdf4j.saxon.value.AtomicValue;
import org.pdf4j.saxon.value.BooleanValue;
import org.pdf4j.saxon.value.StringValue;


/**
* This class implements the matches() function for regular expression matching
*/

public class Matches extends SystemFunction {

    private RegularExpression regexp;

    /**
     * Simplify and validate.
     * This is a pure function so it can be simplified in advance if the arguments are known
     * @return the simplified expression
     * @throws XPathException if any error is found (e.g. invalid regular expression)
     * @param visitor an expression visitor
     */

     public Expression simplify(ExpressionVisitor visitor) throws XPathException {
        Expression e = simplifyArguments(visitor);
        // compile the regular expression once if possible
        if (e == this) {
            maybePrecompile(visitor);
        }
        return e;
    }

    /**
     * Precompile the regular expression if possible
     * @param visitor an expression visitor
     */

    private void maybePrecompile(ExpressionVisitor visitor) throws XPathException {
        if (regexp == null) {
            try {
                regexp = tryToCompile(argument, 1, 2, visitor.getStaticContext());
            } catch (XPathException err) {
                err.setLocator(this);
                throw err;
            }
        }
    }


    /**
     * Perform optimisation of an expression and its subexpressions.
     * <p/>
     * <p>This method is called after all references to functions and variables have been resolved
     * to the declaration of the function or variable, and after all type checking has been done.</p>
     *
     * @param visitor         an expression visitor
     * @param contextItemType the static type of "." at the point where this expression is invoked.
     *                        The parameter is set to null if it is known statically that the context item will be undefined.
     *                        If the type of the context item is not known statically, the argument is set to
     *                        {@link org.pdf4j.saxon.type.Type#ITEM_TYPE}
     * @return the original expression, rewritten if appropriate to optimize execution
     * @throws org.pdf4j.saxon.trans.XPathException
     *          if an error is discovered during this phase
     *          (typically a type error)
     */

    public Expression optimize(ExpressionVisitor visitor, ItemType contextItemType) throws XPathException {
        Expression e = super.optimize(visitor, contextItemType);
        // try once again to compile the regular expression once if possible
        // (used when the regex has been identified as a constant as a result of earlier rewrites)
        if (e == this) {
            maybePrecompile(visitor);
        }
        return e;
    }

    /**
     * Get the compiled regular expression, returning null if the regex has not been compiled
     * @return the compiled regular expression, or null
     */

    public RegularExpression getCompiledRegularExpression() {
        return regexp;
    }

    /**
     * Evaluate the matches() function to give a Boolean value.
     * @param c  The dynamic evaluation context
     * @return the result as a BooleanValue, or null to indicate the empty sequence
     * @throws XPathException on an error
     */

    public Item evaluateItem(XPathContext c) throws XPathException {
        AtomicValue sv0 = (AtomicValue)argument[0].evaluateItem(c);
        if (sv0==null) {
            sv0 = StringValue.EMPTY_STRING;
        }

        RegularExpression re = regexp;

        if (re == null) {
            AtomicValue pat = (AtomicValue)argument[1].evaluateItem(c);
            if (pat==null) return null;

            CharSequence flags;
            if (argument.length==2) {
                flags = "";
            } else {
                AtomicValue sv2 = (AtomicValue)argument[2].evaluateItem(c);
                if (sv2==null) return null;
                flags = sv2.getStringValueCS();
            }

            try {
                final Platform platform = Configuration.getPlatform();
                final int xmlVersion = c.getConfiguration().getXMLVersion();
                re = platform.compileRegularExpression(
                        pat.getStringValueCS(), xmlVersion, RegularExpression.XPATH_SYNTAX, flags);
            } catch (XPathException err) {
                XPathException de = new XPathException(err);
                if (de.getErrorCodeLocalPart() == null) {
                    de.setErrorCode("FORX0002");
                }
                de.setXPathContext(c);
                throw de;
            }
        }
        return BooleanValue.get(re.containsMatch(sv0.getStringValueCS()));
    }

    /**
     * Temporary test rig, used to submit bug report to Sun
     */
//     public static void main(String[] args) throws Exception {
//
//        matches("\u212a", "K");
//        matches("\u212a", "[A-Z]");
//        matches("\u212a", "I|J|K|L");
//        matches("\u212a", "[IJKL]");
//        matches("\u212a", "k");
//        matches("\u212a", "[a-z]");
//        matches("\u212a", "i|j|k|l");
//        matches("\u212a", "[ijkl]");
//    }
//
//    private static void matches(String in, String pattern) {
//        System.err.println("Java version " + System.getProperty("java.version"));
//        int flags = Pattern.UNIX_LINES;
//        flags |= Pattern.CASE_INSENSITIVE;
//        flags |= Pattern.UNICODE_CASE;
//        Pattern p = Pattern.compile(pattern, flags);
//        boolean b = p.matcher(in).find();
//        System.err.println("Pattern " + pattern + ": " + (b ? " match" : "no match"));
//    }

//    Results of this test with JDK 1.5.0_05:
//
//    Pattern K:  match
//    Java version 1.5.0_05
//    Pattern [A-Z]: no match
//    Java version 1.5.0_05
//    Pattern I|J|K|L:  match
//    Java version 1.5.0_05
//    Pattern [IJKL]: no match
//    Java version 1.5.0_05
//    Pattern k:  match
//    Java version 1.5.0_05
//    Pattern [a-z]:  match
//    Java version 1.5.0_05
//    Pattern i|j|k|l:  match
//    Java version 1.5.0_05
//    Pattern [ijkl]: no match

    /**
     * Try to precompile the arguments to the function. This method is shared by
     * the implementations of the three XPath functions matches(), replace(), and
     * tokenize().
     * @param args the supplied arguments to the function, as an array
     * @param patternArg the position of the argument containing the regular expression
     * @param flagsArg the position of the argument containing the flags
     * @param env the static context
     * @return the compiled regular expression, or null indicating that the information
     * is not available statically so it cannot be precompiled
     * @throws XPathException if any failure occurs, in particular, if the regular
     * expression is invalid
     */

    public static RegularExpression tryToCompile(Expression[] args, int patternArg, int flagsArg, StaticContext env)
    throws XPathException {
        if (patternArg > args.length - 1) {
            // too few arguments were supplied; the error will be reported in due course
            return null;
        }
        CharSequence flagstr = null;
        if (args.length-1 < flagsArg) {
            flagstr = "";
        } else if (args[flagsArg] instanceof StringLiteral) {
            flagstr = ((StringLiteral)args[flagsArg]).getStringValue();
        }

        if (args[patternArg] instanceof StringLiteral && flagstr != null) {
            try {
                Platform platform = Configuration.getPlatform();
                String in = ((StringLiteral)args[patternArg]).getStringValue();
                final int xmlVersion = env.getConfiguration().getXMLVersion();
                int syntax = RegularExpression.XPATH_SYNTAX;
                // TODO: Find a better (conformant) way of switching this option on
                if (flagstr.length() > 0 && flagstr.charAt(0) == '!') {
                    flagstr = flagstr.subSequence(1, flagstr.length());
                    syntax = RegularExpression.NATIVE_SYNTAX;
                }
                return platform.compileRegularExpression(in, xmlVersion, syntax, flagstr);
            } catch (XPathException err) {
                if (err.getErrorCodeLocalPart() == null) {
                    err.setErrorCode("FORX0002");
                }
                throw err;
            }
        } else {
            return null;
        }
    }


//    public static void main(String[] args) {
//        System.out.println(Pattern.matches("(X)(2)?(3)?(4)?(5)?(6)?(7)?(8)?(9)?(10)?((Y)(\\12))", "XYY"));
////        String pat = "(X)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11(12)(13)\\11)";
////        System.out.println(Pattern.matches(pat, "X2345678910111213X1"));
//    }
}



//
// The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
// you may not use this file except in compliance with the License. You may obtain a copy of the
// License at http://www.mozilla.org/MPL/
//
// Software distributed under the License is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the License for the specific language governing rights and limitations under the License.
//
// The Original Code is: all this file.
//
// The Initial Developer of the Original Code is Michael H. Kay
//
// Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
//
// Contributor(s): none.
//
TOP

Related Classes of org.pdf4j.saxon.functions.Matches

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.