package client.net.sf.saxon.ce.value;
import client.net.sf.saxon.ce.expr.XPathContext;
import client.net.sf.saxon.ce.lib.StringCollator;
import client.net.sf.saxon.ce.om.StandardNames;
import client.net.sf.saxon.ce.trans.Err;
import client.net.sf.saxon.ce.trans.XPathException;
import client.net.sf.saxon.ce.tree.util.FastStringBuffer;
import client.net.sf.saxon.ce.tree.util.UTF16CharacterSet;
import client.net.sf.saxon.ce.type.*;
/**
* An atomic value of type xs:string. This class is also used for types derived from xs:string.
* Subclasses of StringValue are used for xs:untypedAtomic and xs:anyURI values.
*/
public class StringValue extends AtomicValue {
public static final StringValue EMPTY_STRING = new StringValue("");
public static final StringValue SINGLE_SPACE = new StringValue(" ");
public static final StringValue TRUE = new StringValue("true");
public static final StringValue FALSE = new StringValue("false");
// We hold the value as a CharSequence (it may be a StringBuffer rather than a string)
// But the first time this is converted to a string, we keep it as a string
protected CharSequence value; // may be zero-length, will never be null
protected boolean noSurrogates = false;
/**
* Protected constructor for use by subtypes
*/
protected StringValue() {
value = "";
typeLabel = BuiltInAtomicType.STRING;
}
/**
* Constructor. Note that although a StringValue may wrap any kind of CharSequence
* (usually a String, but it can also be, for example, a StringBuffer), the caller
* is responsible for ensuring that the value is immutable.
* @param value the String value. Null is taken as equivalent to "".
*/
public StringValue(CharSequence value) {
this.value = (value == null ? "" : value);
typeLabel = BuiltInAtomicType.STRING;
}
/**
* Assert that the string is known to contain no surrogate pairs
*/
public void setContainsNoSurrogates() {
noSurrogates = true;
}
/**
* Determine the primitive type of the value. This delivers the same answer as
* getItemType().getPrimitiveItemType(). The primitive types are
* the 19 primitive types of XML Schema, plus xs:integer, xs:dayTimeDuration and xs:yearMonthDuration,
* and xs:untypedAtomic. For external objects, the result is AnyAtomicType.
*/
public BuiltInAtomicType getPrimitiveType() {
return BuiltInAtomicType.STRING;
}
/**
* Factory method. Unlike the constructor, this avoids creating a new StringValue in the case
* of a zero-length string (and potentially other strings, in future)
* @param value the String value. Null is taken as equivalent to "".
* @return the corresponding StringValue
*/
public static StringValue makeStringValue(CharSequence value) {
if (value == null || value.length() == 0) {
return StringValue.EMPTY_STRING;
} else {
return new StringValue(value);
}
}
/**
* Get the string value as a String
*/
public final String getPrimitiveStringValue() {
return (String) (value = value.toString());
}
/**
* Convert a value to another primitive data type, with control over how validation is
* handled.
* @param requiredType type code of the required atomic type. This must not be a namespace-sensitive type.
* @param validate true if validation is required. If set to false, the caller guarantees that
* the value is valid for the target data type, and that further validation is therefore not required.
* Note that a validation failure may be reported even if validation was not requested.
* @return the result of the conversion, if successful. If unsuccessful, the value returned
* will be a ValidationErrorValue. The caller must check for this condition. No exception is thrown, instead
* the exception will be encapsulated within the ErrorValue.
*/
public ConversionResult convertPrimitive(BuiltInAtomicType requiredType, boolean validate) {
int req = requiredType.getFingerprint();
if (req == StandardNames.XS_STRING || req == StandardNames.XS_ANY_ATOMIC_TYPE) {
return this;
}
return convertStringToBuiltInType(value, requiredType);
}
/**
* Convert a string value to another built-in data type, with control over how validation is
* handled.
* @param value the value to be converted
* @param requiredType the required atomic type. This must not be a namespace-sensitive type.
* @return the result of the conversion, if successful. If unsuccessful, the value returned
* will be a {@link ValidationFailure}. The caller must check for this condition. No exception is thrown, instead
* the exception will be encapsulated within the ValidationFailure.
*/
public static ConversionResult convertStringToBuiltInType(CharSequence value, BuiltInAtomicType requiredType) {
try {
switch (requiredType.getFingerprint()) {
case StandardNames.XS_BOOLEAN: {
return BooleanValue.fromString(value);
}
case StandardNames.XS_NUMERIC:
case StandardNames.XS_DOUBLE:
try {
double dbl = StringToDouble.stringToNumber(value);
return new DoubleValue(dbl);
} catch (NumberFormatException err) {
ValidationFailure ve = new ValidationFailure("Cannot convert string to double: " + value.toString());
ve.setErrorCode("FORG0001");
return ve;
}
case StandardNames.XS_INTEGER:
return IntegerValue.stringToInteger(value);
case StandardNames.XS_DECIMAL:
return DecimalValue.makeDecimalValue(value);
case StandardNames.XS_FLOAT:
try {
float flt = (float)StringToDouble.stringToNumber(value);
return new FloatValue(flt);
} catch (NumberFormatException err) {
ValidationFailure ve = new ValidationFailure("Cannot convert string to float: " + value.toString());
ve.setErrorCode("FORG0001");
return ve;
}
case StandardNames.XS_DATE:
return DateValue.makeDateValue(value);
case StandardNames.XS_DATE_TIME:
return DateTimeValue.makeDateTimeValue(value);
case StandardNames.XS_TIME:
return TimeValue.makeTimeValue(value);
case StandardNames.XS_G_YEAR:
return GYearValue.makeGYearValue(value);
case StandardNames.XS_G_YEAR_MONTH:
return GYearMonthValue.makeGYearMonthValue(value);
case StandardNames.XS_G_MONTH:
return GMonthValue.makeGMonthValue(value);
case StandardNames.XS_G_MONTH_DAY:
return GMonthDayValue.makeGMonthDayValue(value);
case StandardNames.XS_G_DAY:
return GDayValue.makeGDayValue(value);
case StandardNames.XS_DURATION:
return DurationValue.makeDuration(value);
case StandardNames.XS_YEAR_MONTH_DURATION:
return YearMonthDurationValue.makeYearMonthDurationValue(value);
case StandardNames.XS_DAY_TIME_DURATION:
return DayTimeDurationValue.makeDayTimeDurationValue(value);
case StandardNames.XS_UNTYPED_ATOMIC:
case StandardNames.XS_ANY_SIMPLE_TYPE:
case StandardNames.XS_ANY_ATOMIC_TYPE:
return new UntypedAtomicValue(value);
case StandardNames.XS_STRING:
return makeStringValue(value);
case StandardNames.XS_ANY_URI:
return new AnyURIValue(value);
case StandardNames.XS_HEX_BINARY:
return new HexBinaryValue(value);
case StandardNames.XS_BASE64_BINARY:
return new Base64BinaryValue(value);
default:
ValidationFailure ve = new ValidationFailure("Cannot convert string to type " +
Err.wrap(requiredType.getDisplayName()));
ve.setErrorCode("XPTY0004");
return ve;
}
} catch (XPathException err) {
err.maybeSetErrorCode("FORG0001");
ValidationFailure vf = new ValidationFailure(err.getMessage());
vf.setErrorCodeQName(err.getErrorCodeQName());
if (vf.getErrorCodeQName() == null) {
vf.setErrorCode("FORG0001");
}
return vf;
}
}
/**
* Get the length of this string, as defined in XPath. This is not the same as the Java length,
* as a Unicode surrogate pair counts as a single character
* @return the length of the string in Unicode code points
*/
public int getStringLength() {
if (noSurrogates) {
return value.length();
} else {
int len = getStringLength(value);
if (len == value.length()) {
noSurrogates = true;
}
return len;
}
}
/**
* Get the length of a string, as defined in XPath. This is not the same as the Java length,
* as a Unicode surrogate pair counts as a single character.
* @param s The string whose length is required
* @return the length of the string in Unicode code points
*/
public static int getStringLength(CharSequence s) {
int n = 0;
for (int i = 0; i < s.length(); i++) {
int c = (int) s.charAt(i);
if (c < 55296 || c > 56319) n++; // don't count high surrogates, i.e. D800 to DBFF
}
return n;
}
/**
* Determine whether the string is a zero-length string. This may
* be more efficient than testing whether the length is equal to zero
* @return true if the string is zero length
*/
public boolean isZeroLength() {
return value.length() == 0;
}
/**
* Determine whether the string contains surrogate pairs
* @return true if the string contains any non-BMP characters
*/
public boolean containsSurrogatePairs() {
//noinspection SimplifiableConditionalExpression
return (noSurrogates ? false : getStringLength() != value.length());
}
/**
* Ask whether the string is known to contain no surrogate pairs.
* @return true if it is known to contain no surrogates, false if the answer is not known
*/
public boolean isKnownToContainNoSurrogates() {
return noSurrogates;
}
/**
* Expand a string containing surrogate pairs into an array of 32-bit characters
* @return an array of integers representing the Unicode code points
*/
public int[] expand() {
return expand(value);
}
/**
* Expand a string containing surrogate pairs into an array of 32-bit characters
* @param s the string to be expanded
* @return an array of integers representing the Unicode code points
*/
public static int[] expand(CharSequence s) {
int[] array = new int[getStringLength(s)];
int o = 0;
for (int i = 0; i < s.length(); i++) {
int charval;
int c = s.charAt(i);
if (c >= 55296 && c <= 56319) {
// we'll trust the data to be sound
charval = ((c - 55296) * 1024) + ((int) s.charAt(i + 1) - 56320) + 65536;
i++;
} else {
charval = c;
}
array[o++] = charval;
}
return array;
}
/**
* Contract an array of integers containing Unicode codepoints into a Java string
* @param codes an array of integers representing the Unicode code points
* @param used the number of items in the array that are actually used
* @return the constructed string
*/
public static CharSequence contract(int[] codes, int used) {
FastStringBuffer sb = new FastStringBuffer(codes.length);
for (int i=0; i<used; i++) {
if (codes[i]<65536) {
sb.append((char)codes[i]);
}
else { // output a surrogate pair
sb.append(UTF16CharacterSet.highSurrogate(codes[i]));
sb.append(UTF16CharacterSet.lowSurrogate(codes[i]));
}
}
return sb;
}
/**
* Get an object value that implements the XPath equality and ordering comparison semantics for this value.
* If the ordered parameter is set to true, the result will be a Comparable and will support a compareTo()
* method with the semantics of the XPath lt/gt operator, provided that the other operand is also obtained
* using the getXPathComparable() method. In all cases the result will support equals() and hashCode() methods
* that support the semantics of the XPath eq operator, again provided that the other operand is also obtained
* using the getXPathComparable() method. A context argument is supplied for use in cases where the comparison
* semantics are context-sensitive, for example where they depend on the implicit timezone or the default
* collation.
*
* @param ordered true if an ordered comparison is required. In this case the result is null if the
* type is unordered; in other cases the returned value will be a Comparable.
* @param collator Collation to be used for comparing strings
* @param context the XPath dynamic evaluation context, used in cases where the comparison is context
* sensitive
* @return an Object whose equals() and hashCode() methods implement the XPath comparison semantics
* with respect to this atomic value. If ordered is specified, the result will either be null if
* no ordering is defined, or will be a Comparable
*/
public Object getXPathComparable(boolean ordered, StringCollator collator, XPathContext context) {
return collator.getCollationKey(value.toString());
}
/**
* Determine if two AtomicValues are equal, according to XPath rules. (This method
* is not used for string comparisons, which are always under the control of a collation.
* If we get here, it's because there's a type error in the comparison.)
* @throws ClassCastException always
*/
public boolean equals(Object other) {
throw new ClassCastException("equals on StringValue is not allowed");
}
public int hashCode() {
return value.hashCode();
}
/**
* Test whether this StringValue is equal to another under the rules of the codepoint collation
* @param other the value to be compared with this value
* @return true if the strings are equal on a codepoint-by-codepoint basis
*/
public boolean codepointEquals(StringValue other) {
// avoid conversion of CharSequence to String if values are different lengths
return value.length() == other.value.length() &&
value.toString().equals(other.value.toString());
// It might be better to do character-by-character comparison in all cases; or it might not.
// We do it this way in the hope that string comparison compiles to native code.
}
/**
* Get the effective boolean value of a string
* @return true if the string has length greater than zero
*/
public boolean effectiveBooleanValue() {
return value.length() > 0;
}
public String toString() {
return "\"" + value + '\"';
}
public static boolean isValidLanguageCode(CharSequence val) {
String regex = "[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*";
// See erratum E2-25 to XML Schema Part 2.
return (val.toString().matches(regex));
}
/**
* Produce a diagnostic representation of the contents of the string
* @param s the string
* @return a string in which non-Ascii-printable characters are replaced by \ uXXXX escapes
*/
public static String diagnosticDisplay(String s) {
FastStringBuffer fsb = new FastStringBuffer(s.length());
for (int i = 0, len = s.length(); i < len; i++) {
char c = s.charAt(i);
if (c >= 0x20 && c <= 0x7e) {
fsb.append(c);
} else {
fsb.append("\\u");
for (int shift = 12; shift >= 0; shift -= 4) {
fsb.append("0123456789ABCDEF".charAt((c >> shift) & 0xF));
}
}
}
return fsb.toString();
}
}
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is “Incompatible With Secondary Licenses”, as defined by the Mozilla Public License, v. 2.0.