Package com.odiago.flumebase.io

Source Code of com.odiago.flumebase.io.CharBufferUtils

/**
* Licensed to Odiago, Inc. under one or more contributor license
* agreements.  See the NOTICE.txt file distributed with this work for
* additional information regarding copyright ownership.  Odiago, Inc.
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the
* License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
* License for the specific language governing permissions and limitations
* under the License.
*/

package com.odiago.flumebase.io;

import java.io.UnsupportedEncodingException;

import java.nio.ByteBuffer;
import java.nio.CharBuffer;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import org.apache.avro.util.Utf8;

import org.apache.commons.lang.text.StrTokenizer;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.odiago.flumebase.lang.ListType;
import com.odiago.flumebase.lang.PreciseType;
import com.odiago.flumebase.lang.Timestamp;
import com.odiago.flumebase.lang.Type;

/**
* Utility methods for parsing string-based values without
* requiring that they be incorporated into a String object.
*/
public class CharBufferUtils {

  private static final Logger LOG = LoggerFactory.getLogger(
      CharBufferUtils.class.getName());

  private static final String TRUE_STR = "true";
  private static final String FALSE_STR = "false";

  private CharBufferUtils() { }

  /**
   * Parse a CharSequence into a bool. Only the case-sensitive values
   * "true" and "false" are recongized; others result in a ColumnParseException.
   */
  public static boolean parseBool(CharSequence chars) throws ColumnParseException {
    if (TRUE_STR.contentEquals(chars)) {
      return true;
    } else if (FALSE_STR.contentEquals(chars)) {
      return false;
    } else {
      if (LOG.isDebugEnabled()) {
        LOG.debug("Could not parse as boolean: " + chars);
      }
      throw new ColumnParseException("Invalid boolean");
    }
  }

  /**
   * Parses a CharSequence into an integer in base 10.
   */
  public static int parseInt(CharBuffer chars) throws ColumnParseException {
    int result = 0;
   
    final int limit = chars.limit();
    final int start = chars.position();
    if (0 == limit - start) {
      // The empty string can not be parsed as an integer.
      throw new ColumnParseException("No value provided");
    }
   
    boolean isNegative = false;
    for (int pos = start; pos < limit; pos++) {
      char cur = chars.get();
      if (pos == start && cur == '-') {
        isNegative = true;
        if (limit - start == 1) {
          // "-" is not an integer we accept.
          throw new ColumnParseException("No integer part provided");
        }
      } else if (Character.isDigit(cur)) {
        byte digitVal = (byte)( cur - '0' );
        result = result * 10 - digitVal;
        // TODO: Detect over/underflow and signal exception?
      } else {
        throw new ColumnParseException("Invalid character in number");
      }
    }

    // We built up the value as a negative, to use the larger "half" of the
    // integer range. If it's not negative, flip it on return.
    return isNegative ? result : -result;
  }

  /**
   * Parses a CharSequence into a long in base 10.
   */
  public static long parseLong(CharBuffer chars) throws ColumnParseException {
    long result = 0L;
   
    final int limit = chars.limit();
    final int start = chars.position();
    if (0 == limit - start) {
      // The empty string can not be parsed as an integer.
      throw new ColumnParseException("No value provided");
    }
   
    boolean isNegative = false;
    for (int pos = start; pos < limit; pos++) {
      char cur = chars.get();
      if (pos == start && cur == '-') {
        isNegative = true;
        if (limit - start == 1) {
          // "-" is not an integer we accept.
          throw new ColumnParseException("No integer part provided");
        }
      } else if (Character.isDigit(cur)) {
        byte digitVal = (byte)( cur - '0' );
        result = result * 10 - digitVal;
        // TODO: Detect over/underflow and signal exception?
      } else {
        throw new ColumnParseException("Invalid character in number");
      }
    }

    // We built up the value as a negative, to use the larger "half" of the
    // integer range. If it's not negative, flip it on return.
    return isNegative ? result : -result;
  }

  /**
   * Parses a CharSequence into a floating-point value.
   */
  public static float parseFloat(CharBuffer chars) throws ColumnParseException {
    try {
      return Float.valueOf(new String(chars.array()));
    } catch (NumberFormatException nfe) {
      throw new ColumnParseException(nfe);
    }
  }

  /**
   * Parses a CharSequence into a double-precision floating-point value.
   */
  public static double parseDouble(CharBuffer chars) throws ColumnParseException {
    try {
      return Double.valueOf(new String(chars.array()));
    } catch (NumberFormatException nfe) {
      throw new ColumnParseException(nfe);
    }
  }

  public static String parseString(CharBuffer chars) throws ColumnParseException {
    return chars.toString();
  }

  /**
   * Parses a CharSequence into a list of values, all of some other type.
   */
  public static List<Object> parseList(CharBuffer chars, Type listItemType,
      String nullStr, String listDelim) throws ColumnParseException {
    StrTokenizer tokenizer = new StrTokenizer(chars.toString(), listDelim.charAt(0));
    List<Object> out = new ArrayList<Object>();

    while (tokenizer.hasNext()) {
      String part = (String) tokenizer.next();
      out.add(parseType(CharBuffer.wrap(part), listItemType, nullStr, listDelim));
    }

    return Collections.unmodifiableList(out);
  }

  /**
   * Parses a CharSequence into a value of a given expected type.
   * @param chars the unparsed characters representing the value
   * @param expectedType the expected type of the final value
   * @param nullStr a token indicating a null String instance.
   */
  public static Object parseType(CharBuffer chars, Type expectedType,
      String nullStr, String listDelim) throws ColumnParseException {
    Type.TypeName primitiveTypeName = expectedType.getPrimitiveTypeName();

    // TODO(aaron): Test how this handles a field that is an empty string.
    Object out = null;
    switch (primitiveTypeName) {
    case BINARY:
      try {
        out = ByteBuffer.wrap(chars.toString().getBytes("UTF-8"));
      } catch (UnsupportedEncodingException uee) {
        // Shouldn't ever be able to get here.
        // (http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html)
        LOG.error("Your JVM doesn't support UTF-8. This is really, really bad.");
        throw new ColumnParseException(uee);
      }
      break;
    case BOOLEAN:
      out = CharBufferUtils.parseBool(chars);
      break;
    case INT:
      out = CharBufferUtils.parseInt(chars);
      break;
    case BIGINT:
      out = CharBufferUtils.parseLong(chars);
      break;
    case FLOAT:
      out = CharBufferUtils.parseFloat(chars);
      break;
    case DOUBLE:
      out = CharBufferUtils.parseDouble(chars);
      break;
    case STRING:
      String asStr = chars.toString();
      if (expectedType.isNullable() && asStr.equals(nullStr)) {
        out = null;
      } else {
        out = new Utf8(asStr);
      }
      break;
    case TIMESTAMP:
      out = CharBufferUtils.parseLong(chars);
      if (null != out) {
        out = new Timestamp((Long) out);
      }
      break;
    case TIMESPAN:
      // TODO: This should return a TimeSpan object, which is actually two
      // fields. We need to work on this... it should not just be a 'long'
      // representation.
      out = CharBufferUtils.parseLong(chars);
      break;
    case PRECISE:
      PreciseType preciseType = PreciseType.toPreciseType(expectedType);
      out = preciseType.parseStringInput(chars.toString());
      break;
    case LIST:
      out = parseList(chars, ListType.toListType(expectedType).getElementType(),
          nullStr, listDelim);
      break;
    default:
      throw new ColumnParseException("Cannot parse recursive types");
    }

    return out;
  }

}
TOP

Related Classes of com.odiago.flumebase.io.CharBufferUtils

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.