Package client.net.sf.saxon.ce.value

Source Code of client.net.sf.saxon.ce.value.StringValue

package client.net.sf.saxon.ce.value;

import client.net.sf.saxon.ce.expr.XPathContext;
import client.net.sf.saxon.ce.lib.StringCollator;
import client.net.sf.saxon.ce.om.StandardNames;
import client.net.sf.saxon.ce.trans.Err;
import client.net.sf.saxon.ce.trans.XPathException;
import client.net.sf.saxon.ce.tree.util.FastStringBuffer;
import client.net.sf.saxon.ce.tree.util.UTF16CharacterSet;
import client.net.sf.saxon.ce.type.*;


/**
* An atomic value of type xs:string. This class is also used for types derived from xs:string.
* Subclasses of StringValue are used for xs:untypedAtomic and xs:anyURI values.
*/

public class StringValue extends AtomicValue {

    public static final StringValue EMPTY_STRING = new StringValue("");
    public static final StringValue SINGLE_SPACE = new StringValue(" ");
    public static final StringValue TRUE = new StringValue("true");
    public static final StringValue FALSE = new StringValue("false");

    // We hold the value as a CharSequence (it may be a StringBuffer rather than a string)
    // But the first time this is converted to a string, we keep it as a string

    protected CharSequence value;     // may be zero-length, will never be null
    protected boolean noSurrogates = false;

    /**
     * Protected constructor for use by subtypes
     */

    protected StringValue() {
        value = "";
        typeLabel = BuiltInAtomicType.STRING;
    }

    /**
     * Constructor. Note that although a StringValue may wrap any kind of CharSequence
     * (usually a String, but it can also be, for example, a StringBuffer), the caller
     * is responsible for ensuring that the value is immutable.
     * @param value the String value. Null is taken as equivalent to "".
     */

    public StringValue(CharSequence value) {
        this.value = (value == null ? "" : value);
        typeLabel = BuiltInAtomicType.STRING;
    }

    /**
     * Assert that the string is known to contain no surrogate pairs
     */

    public void setContainsNoSurrogates() {
        noSurrogates = true;
    }

    /**
     * Determine the primitive type of the value. This delivers the same answer as
     * getItemType().getPrimitiveItemType(). The primitive types are
     * the 19 primitive types of XML Schema, plus xs:integer, xs:dayTimeDuration and xs:yearMonthDuration,
     * and xs:untypedAtomic. For external objects, the result is AnyAtomicType.
     */

    public BuiltInAtomicType getPrimitiveType() {
        return BuiltInAtomicType.STRING;
    }

    /**
     * Factory method. Unlike the constructor, this avoids creating a new StringValue in the case
     * of a zero-length string (and potentially other strings, in future)
     * @param value the String value. Null is taken as equivalent to "".
     * @return the corresponding StringValue
     */

    public static StringValue makeStringValue(CharSequence value) {
        if (value == null || value.length() == 0) {
            return StringValue.EMPTY_STRING;
        } else {
            return new StringValue(value);
        }
    }

    /**
     * Get the string value as a String
     */

    public final String getPrimitiveStringValue() {
        return (String) (value = value.toString());
    }

    /**
     * Convert a value to another primitive data type, with control over how validation is
     * handled.
     * @param requiredType type code of the required atomic type. This must not be a namespace-sensitive type.
     * @param validate true if validation is required. If set to false, the caller guarantees that
     * the value is valid for the target data type, and that further validation is therefore not required.
     * Note that a validation failure may be reported even if validation was not requested.
     * @return the result of the conversion, if successful. If unsuccessful, the value returned
     * will be a ValidationErrorValue. The caller must check for this condition. No exception is thrown, instead
     * the exception will be encapsulated within the ErrorValue.
     */

    public ConversionResult convertPrimitive(BuiltInAtomicType requiredType, boolean validate) {
        int req = requiredType.getFingerprint();
        if (req == StandardNames.XS_STRING || req == StandardNames.XS_ANY_ATOMIC_TYPE) {
            return this;
        }
        return convertStringToBuiltInType(value, requiredType);
    }

    /**
     * Convert a string value to another built-in data type, with control over how validation is
     * handled.
     * @param value the value to be converted
     * @param requiredType the required atomic type. This must not be a namespace-sensitive type.
     * @return the result of the conversion, if successful. If unsuccessful, the value returned
     * will be a {@link ValidationFailure}. The caller must check for this condition. No exception is thrown, instead
     * the exception will be encapsulated within the ValidationFailure.
     */

    public static ConversionResult convertStringToBuiltInType(CharSequence value, BuiltInAtomicType requiredType) {
        try {
            switch (requiredType.getFingerprint()) {
                case StandardNames.XS_BOOLEAN: {
                    return BooleanValue.fromString(value);
                }
                case StandardNames.XS_NUMERIC:
                case StandardNames.XS_DOUBLE:
                    try {
                        double dbl = StringToDouble.stringToNumber(value);
                        return new DoubleValue(dbl);
                    } catch (NumberFormatException err) {
                        ValidationFailure ve = new ValidationFailure("Cannot convert string to double: " + value.toString());
                        ve.setErrorCode("FORG0001");
                        return ve;
                    }

                case StandardNames.XS_INTEGER:
                    return IntegerValue.stringToInteger(value);

                case StandardNames.XS_DECIMAL:
                    return DecimalValue.makeDecimalValue(value);
                case StandardNames.XS_FLOAT:
                    try {
                        float flt = (float)StringToDouble.stringToNumber(value);
                        return new FloatValue(flt);
                    } catch (NumberFormatException err) {
                        ValidationFailure ve = new ValidationFailure("Cannot convert string to float: " + value.toString());
                        ve.setErrorCode("FORG0001");
                        return ve;
                    }
                case StandardNames.XS_DATE:
                    return DateValue.makeDateValue(value);
                case StandardNames.XS_DATE_TIME:
                    return DateTimeValue.makeDateTimeValue(value);
                case StandardNames.XS_TIME:
                    return TimeValue.makeTimeValue(value);
                case StandardNames.XS_G_YEAR:
                    return GYearValue.makeGYearValue(value);
                case StandardNames.XS_G_YEAR_MONTH:
                    return GYearMonthValue.makeGYearMonthValue(value);
                case StandardNames.XS_G_MONTH:
                    return GMonthValue.makeGMonthValue(value);
                case StandardNames.XS_G_MONTH_DAY:
                    return GMonthDayValue.makeGMonthDayValue(value);
                case StandardNames.XS_G_DAY:
                    return GDayValue.makeGDayValue(value);
                case StandardNames.XS_DURATION:
                    return DurationValue.makeDuration(value);
                case StandardNames.XS_YEAR_MONTH_DURATION:
                    return YearMonthDurationValue.makeYearMonthDurationValue(value);
                case StandardNames.XS_DAY_TIME_DURATION:
                    return DayTimeDurationValue.makeDayTimeDurationValue(value);
                case StandardNames.XS_UNTYPED_ATOMIC:
                case StandardNames.XS_ANY_SIMPLE_TYPE:
                case StandardNames.XS_ANY_ATOMIC_TYPE:
                    return new UntypedAtomicValue(value);
                case StandardNames.XS_STRING:
                    return makeStringValue(value);
                 case StandardNames.XS_ANY_URI:
                    return new AnyURIValue(value);
                case StandardNames.XS_HEX_BINARY:
                    return new HexBinaryValue(value);
                case StandardNames.XS_BASE64_BINARY:
                    return new Base64BinaryValue(value);
                default:
                    ValidationFailure ve = new ValidationFailure("Cannot convert string to type " +
                            Err.wrap(requiredType.getDisplayName()));
                    ve.setErrorCode("XPTY0004");
                    return ve;
            }
        } catch (XPathException err) {
            err.maybeSetErrorCode("FORG0001");
            ValidationFailure vf = new ValidationFailure(err.getMessage());
            vf.setErrorCodeQName(err.getErrorCodeQName());
            if (vf.getErrorCodeQName() == null) {
                vf.setErrorCode("FORG0001");
            }
            return vf;
        }
    }


    /**
     * Get the length of this string, as defined in XPath. This is not the same as the Java length,
     * as a Unicode surrogate pair counts as a single character
     * @return the length of the string in Unicode code points
     */

    public int getStringLength() {
        if (noSurrogates) {
            return value.length();
        } else {
            int len = getStringLength(value);
            if (len == value.length()) {
                noSurrogates = true;
            }
            return len;
        }
    }

    /**
     * Get the length of a string, as defined in XPath. This is not the same as the Java length,
     * as a Unicode surrogate pair counts as a single character.
     * @param s The string whose length is required
     * @return the length of the string in Unicode code points
     */

    public static int getStringLength(CharSequence s) {
        int n = 0;
        for (int i = 0; i < s.length(); i++) {
            int c = (int) s.charAt(i);
            if (c < 55296 || c > 56319) n++;    // don't count high surrogates, i.e. D800 to DBFF
        }
        return n;
    }


    /**
     * Determine whether the string is a zero-length string. This may
     * be more efficient than testing whether the length is equal to zero
     * @return true if the string is zero length
     */

    public boolean isZeroLength() {
        return value.length() == 0;
    }

    /**
     * Determine whether the string contains surrogate pairs
     * @return true if the string contains any non-BMP characters
     */

    public boolean containsSurrogatePairs() {
        //noinspection SimplifiableConditionalExpression
        return (noSurrogates ? false : getStringLength() != value.length());
    }

    /**
     * Ask whether the string is known to contain no surrogate pairs.
     * @return true if it is known to contain no surrogates, false if the answer is not known
     */

    public boolean isKnownToContainNoSurrogates() {
        return noSurrogates;
    }

    /**
     * Expand a string containing surrogate pairs into an array of 32-bit characters
     * @return an array of integers representing the Unicode code points
     */

    public int[] expand() {
        return expand(value);
    }

                        
    /**
     * Expand a string containing surrogate pairs into an array of 32-bit characters
     * @param s the string to be expanded
     * @return an array of integers representing the Unicode code points
     */

    public static int[] expand(CharSequence s) {
        int[] array = new int[getStringLength(s)];
        int o = 0;
        for (int i = 0; i < s.length(); i++) {
            int charval;
            int c = s.charAt(i);
            if (c >= 55296 && c <= 56319) {
                // we'll trust the data to be sound
                charval = ((c - 55296) * 1024) + ((int) s.charAt(i + 1) - 56320) + 65536;
                i++;
            } else {
                charval = c;
            }
            array[o++] = charval;
        }
        return array;
    }

    /**
     * Contract an array of integers containing Unicode codepoints into a Java string
     * @param codes an array of integers representing the Unicode code points
     * @param used the number of items in the array that are actually used
     * @return the constructed string
     */

    public static CharSequence contract(int[] codes, int used) {
        FastStringBuffer sb = new FastStringBuffer(codes.length);
        for (int i=0; i<used; i++) {
            if (codes[i]<65536) {
                sb.append((char)codes[i]);
            }
            else // output a surrogate pair
                sb.append(UTF16CharacterSet.highSurrogate(codes[i]));
                sb.append(UTF16CharacterSet.lowSurrogate(codes[i]));
            }
        }
        return sb;
    }


    /**
     * Get an object value that implements the XPath equality and ordering comparison semantics for this value.
     * If the ordered parameter is set to true, the result will be a Comparable and will support a compareTo()
     * method with the semantics of the XPath lt/gt operator, provided that the other operand is also obtained
     * using the getXPathComparable() method. In all cases the result will support equals() and hashCode() methods
     * that support the semantics of the XPath eq operator, again provided that the other operand is also obtained
     * using the getXPathComparable() method. A context argument is supplied for use in cases where the comparison
     * semantics are context-sensitive, for example where they depend on the implicit timezone or the default
     * collation.
     *
     * @param ordered true if an ordered comparison is required. In this case the result is null if the
     *                type is unordered; in other cases the returned value will be a Comparable.
     * @param collator Collation to be used for comparing strings
     * @param context the XPath dynamic evaluation context, used in cases where the comparison is context
     *                sensitive
     * @return an Object whose equals() and hashCode() methods implement the XPath comparison semantics
     *         with respect to this atomic value. If ordered is specified, the result will either be null if
     *         no ordering is defined, or will be a Comparable
     */

    public Object getXPathComparable(boolean ordered, StringCollator collator, XPathContext context) {
        return collator.getCollationKey(value.toString());
    }

    /**
     * Determine if two AtomicValues are equal, according to XPath rules. (This method
     * is not used for string comparisons, which are always under the control of a collation.
     * If we get here, it's because there's a type error in the comparison.)
     * @throws ClassCastException always
     */

    public boolean equals(Object other) {
      throw new ClassCastException("equals on StringValue is not allowed");
    }
   
    public int hashCode() {
        return value.hashCode();
    }

    /**
     * Test whether this StringValue is equal to another under the rules of the codepoint collation
     * @param other the value to be compared with this value
     * @return true if the strings are equal on a codepoint-by-codepoint basis
     */

    public boolean codepointEquals(StringValue other) {
        // avoid conversion of CharSequence to String if values are different lengths
        return value.length() == other.value.length() &&
                value.toString().equals(other.value.toString());
        // It might be better to do character-by-character comparison in all cases; or it might not.
        // We do it this way in the hope that string comparison compiles to native code.
    }

    /**
     * Get the effective boolean value of a string
     * @return true if the string has length greater than zero
     */

    public boolean effectiveBooleanValue() {
        return value.length() > 0;
    }


    public String toString() {
        return "\"" + value + '\"';
    }

    public static boolean isValidLanguageCode(CharSequence val) {
        String regex = "[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*";
                    // See erratum E2-25 to XML Schema Part 2.
        return (val.toString().matches(regex));
    }

    /**
     * Produce a diagnostic representation of the contents of the string
     * @param s the string
     * @return a string in which non-Ascii-printable characters are replaced by \ uXXXX escapes
     */

    public static String diagnosticDisplay(String s) {
        FastStringBuffer fsb = new FastStringBuffer(s.length());
        for (int i = 0, len = s.length(); i < len; i++) {
            char c = s.charAt(i);
            if (c >= 0x20 && c <= 0x7e) {
                fsb.append(c);
            } else {
                fsb.append("\\u");
                for (int shift = 12; shift >= 0; shift -= 4) {
                    fsb.append("0123456789ABCDEF".charAt((c >> shift) & 0xF));
                }
            }
        }
        return fsb.toString();
    }


}

// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is “Incompatible With Secondary Licenses”, as defined by the Mozilla Public License, v. 2.0.

TOP

Related Classes of client.net.sf.saxon.ce.value.StringValue

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.