// Copyright 2010-2011 NexJ Systems Inc. This software is licensed under the terms of the Eclipse Public License 1.0
package nexj.core.util;
import java.io.IOException;
import java.io.Writer;
import java.sql.Timestamp;
import java.text.BreakIterator;
import java.text.DateFormatSymbols;
import java.text.FieldPosition;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* String utilities.
*/
public class StringUtil
{
// constants
/**
* Hex digit table.
*/
private final static char[] HEX_DIGITS = "0123456789ABCDEF".toCharArray();
/**
* Array of spaces.
*/
protected final static char[] SPACES = new char[80];
static
{
Arrays.fill(SPACES, ' ');
}
/**
* Pattern used for matching/splitting a string into separate digits and non-digits groups.
*/
private final static Pattern STRING_DIGIT_PARTS = Pattern.compile("^(?:(\\d+)?(\\D+)?)*$");
/**
* The timestamp string format.
*/
private final static SimpleDateFormat s_timestampOutFormat = new SimpleDateFormat("Gyyyy'-'MM'-'dd HH':'mm':'ss", Locale.ENGLISH);
static
{
s_timestampOutFormat.setTimeZone(TZ.UTC);
DateFormatSymbols dfs = s_timestampOutFormat.getDateFormatSymbols();
dfs.setEras(new String[]{"-", ""});
s_timestampOutFormat.setDateFormatSymbols(dfs);
}
/**
* The ignored field position.
*/
private final static FieldPosition s_ignoredFieldPosition = new FieldPosition(SimpleDateFormat.AM_PM_FIELD)
{
/**
* @see java.text.FieldPosition#setBeginIndex(int)
*/
public void setBeginIndex(int nIndex)
{
}
/**
* @see java.text.FieldPosition#setEndIndex(int)
*/
public void setEndIndex(int nIndex)
{
}
};
/**
* The zero char array.
*/
private final static char[] s_zeroCharArray = new char[]{'0', '0', '0', '0', '0', '0', '0', '0', '0'};
// associations
/**
* String identity set for interning: String[].
*/
protected final static Holder s_stringSet = new HashHolder();
// constructors
/**
* Prevents construction.
*/
protected StringUtil()
{
}
// operations
/**
* Allocates unique strings in the regular heap.
* @param s The string to intern. Can be null.
* @return The interned string.
*/
public synchronized static String intern(String s)
{
if (s == null)
{
return null;
}
String sInterned = (String)s_stringSet.get(s);
if (sInterned != null)
{
return sInterned;
}
s_stringSet.add(s);
return s;
}
/**
* Finds the index in a given string of any of the characters from a set.
* @param s The string to search in.
* @param sSet The character set.
* @param nStart The start index. Can be negative.
* @param nEnd The end index (past the last considered character). Can be beyond the length of the string.
* @return The index, or -1 if not found.
*/
public static int findSetIndex(String s, String sSet, int nStart, int nEnd)
{
int nCount = sSet.length();
if (nStart < 0)
{
nStart = 0;
}
if (nEnd > s.length())
{
nEnd = s.length();
}
while (nStart < nEnd)
{
char ch = s.charAt(nStart);
for (int i = 0; i != nCount; ++i)
{
if (ch == sSet.charAt(i))
{
return nStart;
}
}
++nStart;
}
return -1;
}
/**
* Parses a digit from a character.
* @param ch The character to parse.
* @param nRadix The digit radix.
* @return The resulting digit.
* @throws NumberFormatException if the digit is invalid.
*/
public static int parseDigit(char ch, int nRadix) throws NumberFormatException
{
int n = Character.digit(ch, nRadix);
if (n < 0)
{
throw new NumberFormatException("Invalid digit");
}
return n;
}
/**
* Parses an integer from a substring.
* @param s The string.
* @param nStart The start offset.
* @param nEnd The end offset.
* @throws NumberFormatException if the number is invalid.
*/
public static int parseInt(String s, int nStart, int nEnd) throws NumberFormatException
{
int n = 0;
while (nStart < nEnd)
{
n = n * 10 + parseDigit(s.charAt(nStart++), 10);
}
return n;
}
/**
* Verifies that a character occurs at a given offset in a string.
* @param s The string.
* @param nOffset The character offset from the start.
* @param ch The character.
* @throws IllegalArgumentException if the character is missing at that offset.
*/
public static void verifyDelimiter(String s, int nOffset, char ch) throws IllegalArgumentException
{
if (nOffset >= s.length() || s.charAt(nOffset) != ch)
{
throw new IllegalArgumentException("Missing delimiter \"" + ch + "\" at offset " + nOffset);
}
}
/**
* Parses a boolean value.
* @param sValue The string to parse.
* @return The boolean value.
* @throws IllegalArgumentException if the string is is not a valid boolean.
*/
public static boolean parseBoolean(String sValue)
{
if (sValue != null)
{
if (sValue.equals("true") ||
sValue.equals("1"))
{
return true;
}
if (sValue.equals("false") ||
sValue.equals("0"))
{
return false;
}
}
throw new IllegalArgumentException(sValue);
}
/**
* Joins a CharSequence list into a single CharSequence using the specific delimiter.
* @param buf The destination buffer (not null).
* @param array The list to join.
* @param sPrefix The prefix to prepend if multiple values being joined (null == "").
* @param sDelim The delimiter string (null == "").
* @param sSuffix The suffix to append if multiple values being joined (null == "").
* @return The buffer containing the joined arguments.
* @throws RuntimeException on IOexception during append.
*/
public static Appendable join(
Appendable buf, CharSequence[] array, String sPrefix, String sDelim, String sSuffix)
throws RuntimeException
{
if (array == null || array.length == 0)
{
return buf;
}
if (sDelim == null)
{
sDelim = "";
}
assert buf != null;
try
{
if (array.length > 1 && sPrefix != null)
{
buf.append(sPrefix);
}
buf.append(array[0]);
for (int i = 1, nCount = array.length; i < nCount; ++i)
{
buf.append(sDelim);
buf.append(array[i]);
}
if (array.length > 1 && sSuffix != null)
{
buf.append(sSuffix);
}
}
catch (IOException e)
{
ObjUtil.rethrow(e);
}
return buf;
}
/**
* Splits a string into a string array using a character delimiter.
* @param str String to be split into an array
* @param chDelim Character to be used as a delimiter
* @return String[] containing the various substrings as elements
*/
public static String[] split(String str, char chDelim)
{
ArrayList results = new ArrayList();
int nCurrIdx = 0;
int nDelimIdx = str.indexOf(chDelim);
while (nDelimIdx != -1)
{
results.add(str.substring(nCurrIdx, nDelimIdx));
nCurrIdx = nDelimIdx + 1;
nDelimIdx = str.indexOf(chDelim, nCurrIdx);
}
if (nCurrIdx < str.length())
{
results.add(str.substring(nCurrIdx));
}
return (String[]) results.toArray(new String[results.size()]);
}
/**
* Compares two strings ignoring the case.
* @param sLeft The left string. Can be null.
* @param sRight The right string. Can be null.
* @return True if equal, false otherwise.
*/
public static boolean equalIgnoreCase(String sLeft, String sRight)
{
if (sLeft == null)
{
return sRight == null;
}
return sLeft.equalsIgnoreCase(sRight);
}
/**
* Compares two strings ignoring differences in end-of-line encoding.
* Case sensitive. Empty strings are equal to null strings.
* @param sLeft The left string. Can be null.
* @param sRight The right string. Can be null.
* @return True if equal, false otherwise.
*/
public static boolean equalEOL(String sLeft, String sRight)
{
if (sLeft == sRight)
{
return true;
}
if (sRight == null)
{
sRight = "";
}
if (sLeft == null)
{
sLeft = "";
}
int nLeftLength = sLeft.length();
int nRightLength = sRight.length();
int nLeftPos = 0;
int nRightPos = 0;
while (nLeftPos < nLeftLength && nRightPos < nRightLength)
{
char chLeft = sLeft.charAt(nLeftPos);
char chRight = sRight.charAt(nRightPos);
if (chLeft == '\r')
{
if (nLeftPos + 1 < nLeftLength && sLeft.charAt(nLeftPos + 1) == '\n')
{
++nLeftPos;
}
chLeft = '\n';
}
if (chRight == '\r')
{
if (nRightPos + 1 < nRightLength && sRight.charAt(nRightPos + 1) == '\n')
{
++nRightPos;
}
chRight = '\n';
}
if (chLeft != chRight)
{
return false;
}
++nLeftPos;
++nRightPos;
}
return nLeftPos == nLeftLength && nRightPos == nRightLength;
}
/**
* Checks if a string is empty.
* @param s The string to check.
* @return True if the string is null or zero-length.
*/
public static boolean isEmpty(String s)
{
return s == null || s.length() == 0;
}
/**
* Trims a string.
* @param s The string to trim. Can be null.
* @return The trimmed string. Null if the string is empty.
* @see java.lang.String#trim()
*/
public static String trimToNull(String s)
{
if (s != null)
{
s = s.trim();
if (s.length() == 0)
{
s = null;
}
}
return s;
}
/**
* Appends the 16-bit hexadecimal representation of the given character. The
* result is left-padded with zeroes to make 4 digits.
* @param buf The destination buffer.
* @param ch The character to append.
*/
public static void appendHex(StringBuffer buf, char ch)
{
buf.append(HEX_DIGITS[(ch >> 12) & 0xf]);
buf.append(HEX_DIGITS[(ch >> 8) & 0xf]);
buf.append(HEX_DIGITS[(ch >> 4) & 0xf]);
buf.append(HEX_DIGITS[ch & 0xf]);
}
/**
* Appends the lower 16-bit hexadecimal representation of the given int.
* The result is left-padded with zeroes to make 2 digits.
* @param buf The destination buffer.
* @param ch The character to append.
*/
public static void appendHex(StringBuilder buf, int n)
{
buf.append(HEX_DIGITS[(n >> 4) & 0xf]);
buf.append(HEX_DIGITS[n & 0xf]);
}
/**
* Writes the 16-bit hexadecimal representation of the given character. The
* result is left-padded with zeroes to make 4 digits.
* @param writer The output character stream.
* @param ch The character to write.
*/
public static void writeHex(Writer writer, char ch) throws IOException
{
writer.write(HEX_DIGITS[(ch >> 12) & 0xf]);
writer.write(HEX_DIGITS[(ch >> 8) & 0xf]);
writer.write(HEX_DIGITS[(ch >> 4) & 0xf]);
writer.write(HEX_DIGITS[ch & 0xf]);
}
/**
* Writes the given number of spaces.
* @param writer The output character stream.
* @param nCount The number of spaces to write.
*/
public static void writeSpaces(Writer writer, int nCount) throws IOException
{
int nLength = SPACES.length;
while(nCount > 0)
{
int n = Math.min(nCount, nLength);
writer.write(SPACES, 0, n);
nCount -= n;
}
}
/**
* Appends a timestamp to a string buffer.
* @param buf The destination string buffer.
* @param ts The timestamp to append.
* @param bLiteral True to append in a literal format (with T instead of a space).
*/
public static void appendUTC(StringBuffer buf, Timestamp ts, boolean bLiteral)
{
int nStart = buf.length();
((SimpleDateFormat)s_timestampOutFormat.clone()).format(ts, buf, s_ignoredFieldPosition);
int nEnd = buf.append('.').length();
buf.append(ts.getNanos());
buf.insert(nEnd, s_zeroCharArray, 0, 9 - (buf.length() - nEnd));
if (bLiteral)
{
for (int i = nStart + 10; i < nEnd; ++i)
{
if (buf.charAt(i) == ' ')
{
buf.setCharAt(i, 'T');
break;
}
}
}
}
/**
* Lexicographic comparison - "." separated version elements are considered individually,
* from left to right. leading digits are parsed to numbers and numbers compared by their
* magnitude. The rest of the version part is compared alphabetically, in case-insensitive order.
* Ignores final character, if final character is "+".
* For return values of string comparison:
* @see java.lang.String#compareTo(String)
* @param sLeft The left version.
* @param sRight The right version.
* @return The result is a negative if this sLeft < sRight, and positive if sLeft > sRight.
*/
public static int compareVersionRanges(String sLeft, String sRight)
{
if (sLeft.length() > 0 && sLeft.charAt(sLeft.length() - 1) == '+')
{
sLeft = sLeft.substring(0, sLeft.length() - 1);
}
if (sRight.length() > 0 && sRight.charAt(sRight.length() - 1) == '+')
{
sRight = sRight.substring(0, sRight.length() - 1);
}
return compareVersions(sLeft, sRight);
}
/**
* Lexicographic comparison - "." separated version elements are considered individually,
* from left to right. leading digits are parsed to numbers and numbers compared by their
* magnitude. The rest of the version part is compared alphabetically, in case-insensitive order.
* For return values of string comparison:
* @see java.lang.String#compareTo(String)
* @param sLeft The left version.
* @param sRight The right version.
* @return The result is a negative if this sLeft < sRight, and positive if sLeft > sRight.
*/
public static int compareVersions(String sLeft, String sRight)
{
assert sLeft != null && sRight != null;
int nLeftLen = 0; // length of left string already processed
int nRightLen = 0; // length of right string already processed
// for every section delimited by '.' compare the values
// init n*End for first for-loop condition to pass
for (int nLeftEnd = 0, nLeftStart = 0, nRightEnd = 0, nRightStart = 0;
nLeftEnd >= 0 && nRightEnd >= 0;
nLeftStart = nLeftEnd + 1, nRightStart = nRightEnd + 1)
{
nLeftEnd = sLeft.indexOf('.', nLeftStart);
nRightEnd = sRight.indexOf('.', nRightStart);
nLeftLen = nLeftStart;
nRightLen = nRightStart;
String sLeftPart = (nLeftEnd < 0) ? sLeft.substring(nLeftStart)
: sLeft.substring(nLeftStart, nLeftEnd);
String sRightPart = (nRightEnd < 0) ? sRight.substring(nRightStart)
: sRight.substring(nRightStart, nRightEnd);
Matcher leftMatch = STRING_DIGIT_PARTS.matcher(sLeftPart);
Matcher rightMatch = STRING_DIGIT_PARTS.matcher(sRightPart);
if (!leftMatch.find())
{
if (!rightMatch.find())
{
break; // finished this section
}
return rightMatch.group().compareTo("");
}
else if (!rightMatch.find())
{
return -leftMatch.group().compareTo("");
}
String sLeftMatch = leftMatch.group(1); // 1 == digit portion of regex (?:(\\d+)?(\\D+)?)*
String sRightMatch = rightMatch.group(1); //1 == digit portion of regex (?:(\\d+)?(\\D+)?)*
int nMatch;
if (sLeftMatch != null && sRightMatch != null)
{
nMatch = Long.signum(Long.parseLong(sLeftMatch) - Long.parseLong(sRightMatch));
}
else // NPE can occur if one of the digits is absent, treat same as string comparison
{
sLeftMatch = (sLeftMatch == null) ? "" : sLeftMatch; // treat null same as empty value
sRightMatch = (sRightMatch == null) ? "" : sRightMatch; //treat null same as empty value
nMatch = sLeftMatch.compareToIgnoreCase(sRightMatch);
}
if (nMatch == 0)
{
sLeftMatch = leftMatch.group(2); // 2 == non-digit portion of regex (?:(\\d+)?(\\D+)?)*
sLeftMatch = (sLeftMatch == null) ? "" : sLeftMatch; // treat null same as empty value
sRightMatch = rightMatch.group(2); //2 == non-digit portion of regex (?:(\\d+)?(\\D+)?)*
sRightMatch = (sRightMatch == null) ? "" : sRightMatch; //treat null same as empty value
nMatch = sLeftMatch.compareToIgnoreCase(sRightMatch);
}
if (nMatch != 0)
{
return nMatch;
}
}
return sLeft.length() - sRight.length() + nRightLen - nLeftLen;
}
/**
* Converts a timestamp value to string.
* @param value The value to convert. Can be null.
* @return The converted value.
*/
public static String toString(Timestamp value)
{
if (value == null)
{
return null;
}
StringBuffer buf = new StringBuffer(26);
appendUTC(buf, value, false);
return buf.toString();
}
/**
* Calculate UTF-8 byte length of string.
* For UTF-8 ranges @see http://en.wikipedia.org/wiki/Comparison_of_Unicode_encodings
* @param sValue The string to calculate byte length for. Can be null.
* @return Byte length of string.
*/
public static int utf8Length(String sValue)
{
if (sValue == null)
{
return 0;
}
int nSize = 0;
for (int i = 0, nCount = sValue.length(); i < nCount; ++i)
{
int nCh = sValue.codePointAt(i);
if (nCh <= 0x00007F)
{
++nSize; // codepoints 0x000000 - 00007F use 1 byte in UTF-8
}
else if (nCh <= 0x0007FF)
{
nSize += 2; // codepoints 0x000080 - 0x0007FF use 2 bytes in UTF-8
}
else if (nCh <= 0x00FFFF)
{
nSize += 3; // codepoints 0x000800 - 0x00FFFF use 3 bytes in UTF-8
}
else // Unicode does not allocate codepoint > 0x10FFFF
{
nSize += 4; // 2 UTF-16 chars always 4 bytes in UTF-8
++i; // consume next char since this codepoint requires 2 UTF-16 chars
}
}
return nSize;
}
/**
* Converts all of the characters in the given string to title case
* using the rules of the given locale.
* @param sValue The string to convert to title case.
* @param locale The locale.
* @return The string, converted to title case.
*/
public static String toTitleCase(String sValue, Locale locale)
{
int nLength = sValue.length();
StringBuilder sBuf = new StringBuilder(sValue.toLowerCase(locale));
BreakIterator itr = BreakIterator.getWordInstance(locale);
itr.setText(sValue);
for (int i = itr.first(); i < nLength; i = itr.next())
{
sBuf.setCharAt(i, Character.toTitleCase(sValue.charAt(i)));
}
return sBuf.toString();
}
}