Package brickhouse.udf.collect

Source Code of brickhouse.udf.collect.CastArrayUDF

package brickhouse.udf.collect;
/**
* Copyright 2012 Klout, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*    http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**/


import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.log4j.Logger;

/**
*  Cast an Array of objects to an Array of a different type
*    to avoid Hive UDF casting problems
*
*/
public class CastArrayUDF extends GenericUDF {
  private static final Logger LOG = Logger.getLogger(CastArrayUDF.class);
  private ListObjectInspector listInspector;
  private PrimitiveObjectInspector fromInspector;
  private PrimitiveObjectInspector toInspector;
  private String returnElemType;

 
  public List<Object> evaluate( List<Object> uninspArray)  {
    List<Object> newList = new ArrayList<Object>();
    for(Object uninsp : uninspArray ) {
      LOG.info( "Uninspected = " + uninsp);
      Object stdObject = ObjectInspectorUtils.copyToStandardJavaObject(uninsp, fromInspector);
      Object castedObject = coerceObject( stdObject);
      newList.add( castedObject);
    }
    return newList;
  }
 
  private Object coerceObject( Object stdObj ) {
    LOG.info( "Casting " + stdObj + " from " + fromInspector.getPrimitiveCategory() + " to " + toInspector.getPrimitiveCategory() + " of type " + toInspector.getTypeName() );
    if( stdObj == null) {
      return null;
    }
    switch( fromInspector.getPrimitiveCategory()) {
    case STRING :
      String fromStr = (String) stdObj;
      switch(toInspector.getPrimitiveCategory() ) {
      case STRING:
        return fromStr;
      case BOOLEAN:
        if( fromStr.equals("true") ) {
          return Boolean.TRUE;
        } else {
          return Boolean.FALSE;
        }
      case BYTE:
        /// XXX TODO
      case SHORT:
        return Short.parseShort( fromStr);
      case INT:
        return Integer.parseInt(fromStr);
      case LONG:
        return Long.parseLong(fromStr);
      case FLOAT:
        return Float.parseFloat(fromStr);
      case DOUBLE:
        return Double.parseDouble(fromStr);
      case TIMESTAMP:
        //// XXX TODO
      case VOID:
        return null;

      }
      return null;
    case SHORT:
    case INT:
    case FLOAT:
    case LONG:
    case DOUBLE:
      Number fromNum = (Number) stdObj;
      switch(toInspector.getPrimitiveCategory()) {
      case SHORT:
        return fromNum.shortValue();
      case INT:
        return fromNum.intValue();
      case LONG:
        return fromNum.longValue();
      case FLOAT:
        return fromNum.floatValue();
      case DOUBLE:
        return fromNum.doubleValue();
      case STRING:
        return fromNum.toString();
      case TIMESTAMP:
        //// XXX TODO
      case VOID:
        return null;
      }
      return null;
    }
    return null;
  }

  @Override
  public Object evaluate(DeferredObject[] arg0) throws HiveException {
    List argList = listInspector.getList( arg0[0].get() );
    if(argList != null)
        return evaluate( argList);
    else
       return null;
  }

  @Override
  public String getDisplayString(String[] arg0) {
    StringBuilder sb = new StringBuilder( "cast_array(" );
    sb.append( arg0[0]);
    if( arg0.length > 1 ) {
      sb.append(" , ");
      sb.append( arg0[1]);
    }
    return sb.toString();
  }

  private static PrimitiveObjectInspector GetObjectInspectorForTypeName( String typeString) {
    TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeString);
    LOG.info( "Type for " + typeString + " is " + typeInfo);
   
    return (PrimitiveObjectInspector) TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo( typeInfo);
  }
 
  @Override
  public ObjectInspector initialize(ObjectInspector[] arg0)
      throws UDFArgumentException {
    if( arg0[0].getCategory() != Category.LIST ) {
      throw new UDFArgumentException("cast_array() takes a list, and an optional type to cast to.");
    }
    this.listInspector = (ListObjectInspector) arg0[0];
    if( listInspector.getListElementObjectInspector().getCategory() != Category.PRIMITIVE ) {
      throw new UDFArgumentException("cast_array() only handles arrays of primitives.");
    }
    this.fromInspector = (PrimitiveObjectInspector) listInspector.getListElementObjectInspector();

    LOG.info( " Cast Array input type is " + listInspector + " element = " + listInspector.getListElementObjectInspector());
    if( arg0.length > 1) {
      if( !( arg0[1] instanceof ConstantObjectInspector)
          || !( arg0[1] instanceof StringObjectInspector) ){
        throw new UDFArgumentException("cast_array() takes a list, and an optional type to cast to.");
      }
      ConstantObjectInspector constInsp  = (ConstantObjectInspector) arg0[1];
      this.returnElemType =  constInsp.getWritableConstantValue().toString();
      this.toInspector = GetObjectInspectorForTypeName( returnElemType);
      ObjectInspector returnType = ObjectInspectorFactory.getStandardListObjectInspector(toInspector);
      return returnType;
    }

    /// Otherwise, assume we're casting to strings ...
    this.returnElemType =  "string";
    this.toInspector = GetObjectInspectorForTypeName( returnElemType);
    ObjectInspector returnType = ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    return returnType;
  }
}
TOP

Related Classes of brickhouse.udf.collect.CastArrayUDF

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.