Package client.net.sf.saxon.ce.om

Source Code of client.net.sf.saxon.ce.om.NameChecker

package client.net.sf.saxon.ce.om;

import client.net.sf.saxon.ce.expr.z.IntRangeSet;
import client.net.sf.saxon.ce.regex.GeneralUnicodeString;
import client.net.sf.saxon.ce.regex.UnicodeString;
import client.net.sf.saxon.ce.trans.Err;
import client.net.sf.saxon.ce.trans.XPathException;


/**
* NameChecker is a utility class containing static methods to perform validation and analysis of XML names,
* as defined in XML 1.1 or XML 1.0 5th edition.
* The class also handles validation of characters against the XML 1.1 rules.
*/

public abstract class NameChecker  {

    /**
     * Validate whether a given string constitutes a valid QName, as defined in XML Namespaces.
     * Note that this does not test whether the prefix is actually declared.
     *
     * @param name the name to be tested
     * @return true if the name is a lexically-valid QName
     */

    public static boolean isQName(String name) {
        int colon = name.indexOf(':');
        if (colon < 0) {
            return isValidNCName(name);
        }
        return colon != 0 &&
                colon != name.length() - 1 &&
                isValidNCName(name.substring(0, colon)) &&
                isValidNCName(name.substring(colon + 1));
    }

    /**
     * Extract the prefix from a QName. Note, the QName is assumed to be valid.
     *
     * @param qname The lexical QName whose prefix is required
     * @return the prefix, that is the part before the colon. Returns an empty
     *         string if there is no prefix
     */

    public static String getPrefix(String qname) {
        int colon = qname.indexOf(':');
        if (colon < 0) {
            return "";
        }
        return qname.substring(0, colon);
    }

    /**
     * Validate a QName, and return the prefix and local name. The local name is checked
     * to ensure it is a valid NCName. The prefix is not checked, on the theory that the caller
     * will look up the prefix to find a URI, and if the prefix is invalid, then no URI will
     * be found.
     *
     * @param qname the lexical QName whose parts are required. Note that leading and trailing
     *              whitespace is not permitted
     * @return an array of two strings, the prefix and the local name. The first
     *         item is a zero-length string if there is no prefix.
     * @throws QNameException if not a valid QName.
     */

    public static String[] getQNameParts(CharSequence qname) throws QNameException {
        String[] parts = new String[2];
        int colon = -1;
        int len = qname.length();
        for (int i = 0; i < len; i++) {
            if (qname.charAt(i) == ':') {
                colon = i;
                break;
            }
        }
        if (colon < 0) {
            parts[0] = "";
            parts[1] = qname.toString();
            if (!isValidNCName(parts[1])) {
                throw new QNameException("Invalid QName " + Err.wrap(qname));
            }
        } else {
            if (colon == 0) {
                throw new QNameException("QName cannot start with colon: " + Err.wrap(qname));
            }
            if (colon == len - 1) {
                throw new QNameException("QName cannot end with colon: " + Err.wrap(qname));
            }
            parts[0] = qname.subSequence(0, colon).toString();
            parts[1] = qname.subSequence(colon + 1, len).toString();

            if (!isValidNCName(parts[1])) {
                if (!isValidNCName(parts[0])) {
                    throw new QNameException("Both the prefix " + Err.wrap(parts[0]) +
                            " and the local part " + Err.wrap(parts[1]) + " are invalid");
                }
                throw new QNameException("Invalid QName local part " + Err.wrap(parts[1]));
            }
        }
        return parts;
    }

    /**
     * Validate a QName, and return the prefix and local name. Both parts are checked
     * to ensure they are valid NCNames.
     * <p/>
     * <p><i>Used from compiled code</i></p>
     *
     * @param qname the lexical QName whose parts are required. Note that leading and trailing
     *              whitespace is not permitted
     * @return an array of two strings, the prefix and the local name. The first
     *         item is a zero-length string if there is no prefix.
     * @throws XPathException if not a valid QName.
     */

    public static String[] checkQNameParts(CharSequence qname) throws XPathException {
        try {
            String[] parts = getQNameParts(qname);
            if (parts[0].length() > 0 && !isValidNCName(parts[0])) {
                throw new XPathException("Invalid QName prefix " + Err.wrap(parts[0]));
            }
            return parts;
        } catch (QNameException e) {
            XPathException err = new XPathException(e.getMessage());
            err.setErrorCode("FORG0001");
            throw err;
        }
    }


    // Both XML 1.0e5 and XML1.1e2 have
    // [4]     NameStartChar     ::=     ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] |
    //                      [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] |
    //                      [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
    // [4a]     NameChar     ::=     NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]

    // XML Namespaces removes the ":" option

    // In the regexes below, instead of matching all non-BMP characters, we match all UCS-16 codepoints in the surrogate
    // pair range #xD800-#xDFFF. This allows through the private use characters in planes 15 and 16 (above xEFFFF), but
    // we can live with that.

    private static String ncNameStartChar = "[A-Za-z_\\xd8-\\xf6\\xf8-\\u02ff\\u0370-\\u037f-\\u1fff\\u200c\\u200d" +
            "\\u2070-\\u218f\\u2c00-\\u2fef\\u3001-\\udfff\\uf900-\\ufdcf\\ufdf0-\\ufffd]";

    private static int[] nameStartRangeStartPoints = {
            'A', '_', 'a', 0xc0, 0xd8, 0xf8, 0x370, 0x37f, 0x200c, 0x2070, 0x2c00, 0x3001, 0xf900, 0xfdf0, 0x10000
    };

    private static int[] nameStartRangeEndPoints = {
            'Z', '_', 'z', 0xd6, 0xf6, 0x2ff, 0x37d, 0x1fff, 0x200d, 0x218f, 0x2fef, 0xd7ff, 0xfdcf, 0xfffd, 0xeffff
    };

    private static IntRangeSet ncNameStartChars = new IntRangeSet(nameStartRangeStartPoints, nameStartRangeEndPoints);

    private static int[] nameRangeStartPoints = {
            '-', '.', '0', 0xb7, 0x300, 0x203f
    };

    private static int[] nameRangeEndPoints = {
            '-', '.', '9', 0xb7, 0x36f, 0x2040
    };

    private static IntRangeSet ncNameChars = new IntRangeSet(nameRangeStartPoints, nameRangeEndPoints);

//    private static String ncNameChar = "[A-Za-z0-9_\\-\\.\\xb7\\xd8-\\xf6\\xf8-\\u037d\\u037f-\\u1fff\\u200c\\u200d" +
//            "\\u203f\\u2040\\u2070-\\u218f\\u2c00-\\u2fef\\u3001-\\udfff\\uf900-\\ufdcf\\ufdf0-\\ufffd]";

    public static boolean isNCNameStartChar(int c) {
        return ncNameStartChars.contains(c);
    }

    public static boolean isNCNameChar(int c) {
        return ncNameStartChars.contains(c) || ncNameChars.contains(c);
    }

    //private static RegExp ncNamePattern = RegExp.compile("^" + ncNameStartChar + ncNameChar + "*$");

    /**
     * Validate whether a given string constitutes a valid NCName, as defined in XML Namespaces.
     *
     * @param ncName the name to be tested. Any whitespace trimming must have already been applied.
     * @return true if the name is a lexically-valid QName
     */

    public static boolean isValidNCName(CharSequence ncName) {
        int len = ncName.length();
        if (len==0) {
            return false;
        }
        UnicodeString us = GeneralUnicodeString.makeUnicodeString(ncName);
        if (!isNCNameStartChar(us.charAt(0))) {
            return false;
        }
        for (int i=1; i<len; i++) {
            if (!isNCNameChar(us.charAt(i))) {
                return false;
            }
        }
        return true;
    }


    /**
     * Test whether a character is a valid XML character
     *
     * @param ch the character to be tested
     * @return true if this is a valid character in XML 1.1
     */

    public static boolean isValidChar(int ch) {
        // from XML 1.0 fifth edition
        // Char     ::=     #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
        // from XML 1.1 second edition
        // Char     ::=     [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]

        return (ch >= 1 && ch <= 0xd7ff) ||
                (ch >= 0xe000 && ch <= 0xfffd) ||
                (ch >= 0x10000 && ch <= 0x10ffff);
    }

}

// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is “Incompatible With Secondary Licenses”, as defined by the Mozilla Public License, v. 2.0.
TOP

Related Classes of client.net.sf.saxon.ce.om.NameChecker

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.