/**
* Licensed to Odiago, Inc. under one or more contributor license
* agreements. See the NOTICE.txt file distributed with this work for
* additional information regarding copyright ownership. Odiago, Inc.
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package com.odiago.flumebase.io;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.avro.util.Utf8;
import org.apache.commons.lang.text.StrTokenizer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.odiago.flumebase.lang.ListType;
import com.odiago.flumebase.lang.PreciseType;
import com.odiago.flumebase.lang.Timestamp;
import com.odiago.flumebase.lang.Type;
/**
* Utility methods for parsing string-based values without
* requiring that they be incorporated into a String object.
*/
public class CharBufferUtils {
private static final Logger LOG = LoggerFactory.getLogger(
CharBufferUtils.class.getName());
private static final String TRUE_STR = "true";
private static final String FALSE_STR = "false";
private CharBufferUtils() { }
/**
* Parse a CharSequence into a bool. Only the case-sensitive values
* "true" and "false" are recongized; others result in a ColumnParseException.
*/
public static boolean parseBool(CharSequence chars) throws ColumnParseException {
if (TRUE_STR.contentEquals(chars)) {
return true;
} else if (FALSE_STR.contentEquals(chars)) {
return false;
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Could not parse as boolean: " + chars);
}
throw new ColumnParseException("Invalid boolean");
}
}
/**
* Parses a CharSequence into an integer in base 10.
*/
public static int parseInt(CharBuffer chars) throws ColumnParseException {
int result = 0;
final int limit = chars.limit();
final int start = chars.position();
if (0 == limit - start) {
// The empty string can not be parsed as an integer.
throw new ColumnParseException("No value provided");
}
boolean isNegative = false;
for (int pos = start; pos < limit; pos++) {
char cur = chars.get();
if (pos == start && cur == '-') {
isNegative = true;
if (limit - start == 1) {
// "-" is not an integer we accept.
throw new ColumnParseException("No integer part provided");
}
} else if (Character.isDigit(cur)) {
byte digitVal = (byte)( cur - '0' );
result = result * 10 - digitVal;
// TODO: Detect over/underflow and signal exception?
} else {
throw new ColumnParseException("Invalid character in number");
}
}
// We built up the value as a negative, to use the larger "half" of the
// integer range. If it's not negative, flip it on return.
return isNegative ? result : -result;
}
/**
* Parses a CharSequence into a long in base 10.
*/
public static long parseLong(CharBuffer chars) throws ColumnParseException {
long result = 0L;
final int limit = chars.limit();
final int start = chars.position();
if (0 == limit - start) {
// The empty string can not be parsed as an integer.
throw new ColumnParseException("No value provided");
}
boolean isNegative = false;
for (int pos = start; pos < limit; pos++) {
char cur = chars.get();
if (pos == start && cur == '-') {
isNegative = true;
if (limit - start == 1) {
// "-" is not an integer we accept.
throw new ColumnParseException("No integer part provided");
}
} else if (Character.isDigit(cur)) {
byte digitVal = (byte)( cur - '0' );
result = result * 10 - digitVal;
// TODO: Detect over/underflow and signal exception?
} else {
throw new ColumnParseException("Invalid character in number");
}
}
// We built up the value as a negative, to use the larger "half" of the
// integer range. If it's not negative, flip it on return.
return isNegative ? result : -result;
}
/**
* Parses a CharSequence into a floating-point value.
*/
public static float parseFloat(CharBuffer chars) throws ColumnParseException {
try {
return Float.valueOf(new String(chars.array()));
} catch (NumberFormatException nfe) {
throw new ColumnParseException(nfe);
}
}
/**
* Parses a CharSequence into a double-precision floating-point value.
*/
public static double parseDouble(CharBuffer chars) throws ColumnParseException {
try {
return Double.valueOf(new String(chars.array()));
} catch (NumberFormatException nfe) {
throw new ColumnParseException(nfe);
}
}
public static String parseString(CharBuffer chars) throws ColumnParseException {
return chars.toString();
}
/**
* Parses a CharSequence into a list of values, all of some other type.
*/
public static List<Object> parseList(CharBuffer chars, Type listItemType,
String nullStr, String listDelim) throws ColumnParseException {
StrTokenizer tokenizer = new StrTokenizer(chars.toString(), listDelim.charAt(0));
List<Object> out = new ArrayList<Object>();
while (tokenizer.hasNext()) {
String part = (String) tokenizer.next();
out.add(parseType(CharBuffer.wrap(part), listItemType, nullStr, listDelim));
}
return Collections.unmodifiableList(out);
}
/**
* Parses a CharSequence into a value of a given expected type.
* @param chars the unparsed characters representing the value
* @param expectedType the expected type of the final value
* @param nullStr a token indicating a null String instance.
*/
public static Object parseType(CharBuffer chars, Type expectedType,
String nullStr, String listDelim) throws ColumnParseException {
Type.TypeName primitiveTypeName = expectedType.getPrimitiveTypeName();
// TODO(aaron): Test how this handles a field that is an empty string.
Object out = null;
switch (primitiveTypeName) {
case BINARY:
try {
out = ByteBuffer.wrap(chars.toString().getBytes("UTF-8"));
} catch (UnsupportedEncodingException uee) {
// Shouldn't ever be able to get here.
// (http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html)
LOG.error("Your JVM doesn't support UTF-8. This is really, really bad.");
throw new ColumnParseException(uee);
}
break;
case BOOLEAN:
out = CharBufferUtils.parseBool(chars);
break;
case INT:
out = CharBufferUtils.parseInt(chars);
break;
case BIGINT:
out = CharBufferUtils.parseLong(chars);
break;
case FLOAT:
out = CharBufferUtils.parseFloat(chars);
break;
case DOUBLE:
out = CharBufferUtils.parseDouble(chars);
break;
case STRING:
String asStr = chars.toString();
if (expectedType.isNullable() && asStr.equals(nullStr)) {
out = null;
} else {
out = new Utf8(asStr);
}
break;
case TIMESTAMP:
out = CharBufferUtils.parseLong(chars);
if (null != out) {
out = new Timestamp((Long) out);
}
break;
case TIMESPAN:
// TODO: This should return a TimeSpan object, which is actually two
// fields. We need to work on this... it should not just be a 'long'
// representation.
out = CharBufferUtils.parseLong(chars);
break;
case PRECISE:
PreciseType preciseType = PreciseType.toPreciseType(expectedType);
out = preciseType.parseStringInput(chars.toString());
break;
case LIST:
out = parseList(chars, ListType.toListType(expectedType).getElementType(),
nullStr, listDelim);
break;
default:
throw new ColumnParseException("Cannot parse recursive types");
}
return out;
}
}