package com.livingsocial.hive.udtf;
import com.livingsocial.hive.utils.KISSInspector;
import java.util.ArrayList;
import java.util.List;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory.*;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector;
@Description(
name = "intervals",
value = "_FUNC_(group_by, values) - get all intervals between values by group_by"
)
public class Intervals extends GenericUDTF {
private class Intervaler {
private KISSInspector group_inspector, value_inspector;
private Object current_group = null;
private float last_value;
public Intervaler(ObjectInspector gpoi, ObjectInspector vpoi) {
group_inspector = new KISSInspector(gpoi);
value_inspector = new KISSInspector(vpoi);
}
public Object[] getInterval(Object group, Object value) {
float new_value = value_inspector.toFloat(value);
Object[] result = null;
if(!group_inspector.get(group).equals(current_group)) {
current_group = group_inspector.get(group);
} else {
Float diff = new Float(new_value - last_value);
result = new Object[] { group_inspector.get(group), diff };
}
last_value = new_value;
return result;
}
public AbstractPrimitiveJavaObjectInspector getGroupInspector() {
return group_inspector.getAnInspector();
}
}
Intervaler intervaler;
@Override
public void close() throws HiveException {
}
@Override
public StructObjectInspector initialize(ObjectInspector [] args) throws UDFArgumentException {
if(args.length != 2 || !KISSInspector.isPrimitive(args[0]) || !KISSInspector.isPrimitive(args[1]))
throw new UDFArgumentException("intervals() takes two primitive arguments");
intervaler = new Intervaler(args[0], args[1]);
ArrayList<String> fieldNames = new ArrayList<String>();
ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
fieldNames.add("group");
fieldNames.add("interval");
fieldOIs.add(intervaler.getGroupInspector());
fieldOIs.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveCategory.FLOAT));
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
@Override
public void process(Object [] o) throws HiveException {
Object result[] = intervaler.getInterval(o[0], o[1]);
if(result != null)
forward(result);
}
}