package com.livingsocial.hive.udf;
import java.text.ParseException;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import ua_parser.Parser;
import ua_parser.Client;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@UDFType(deterministic = true)
@Description(name = "user_agent_parser",
value = "_FUNC_(string, string) - returns parsed information about a user agent string",
extended = "Examples:\n"
+ " > SELECT _FUNC_('Mozilla/5.0 (iPhone; CPU iPhone OS 5_1_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9B206 Safari/7534.48.3','os_major') FROM src LIMIT 1;\n"
+ " iOS 5 \n")
public class UserAgentParser extends GenericUDF {
private Text result = new Text();
private ObjectInspectorConverters.Converter[] converters;
static final Log LOG = LogFactory.getLog(UserAgentParser.class.getName());
private static final Parser uaParser;
static {
try {
uaParser = new Parser();
}
catch(IOException e) {
LOG.warn("Caught IOException: " + e.getMessage());
throw new RuntimeException("could not instantiate parser");
}
}
private enum userOptions {
os, os_family, os_major, os_minor, ua, ua_family, ua_major, ua_minor, device
}
public UserAgentParser() {
}
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if (arguments.length > 2 || arguments.length == 0) {
throw new UDFArgumentLengthException("_FUNC_ expects exactly 2 arguments");
}
for (int i = 0; i < arguments.length; i++) {
if (arguments[i].getCategory() != Category.PRIMITIVE) {
throw new UDFArgumentTypeException(i,
"A string argument was expected but an argument of type " + arguments[i].getTypeName()
+ " was given.");
}
// Now that we have made sure that the argument is of primitive type, we can get the primitive
// category
PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector) arguments[i])
.getPrimitiveCategory();
if (primitiveCategory != PrimitiveCategory.STRING
&& primitiveCategory != PrimitiveCategory.VOID) {
throw new UDFArgumentTypeException(i,
"A string argument was expected but an argument of type " + arguments[i].getTypeName()
+ " was given.");
}
}
converters = new ObjectInspectorConverters.Converter[arguments.length];
for (int i = 0; i < arguments.length; i++) {
converters[i] = ObjectInspectorConverters.getConverter(arguments[i],
PrimitiveObjectInspectorFactory.writableStringObjectInspector);
}
// We will be returning a Text object
return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
}
/**
* Get a parsed string from an input user agent string
*
* @param UserAgent - string containing the user agent to parse
*
* @param options - options from the set of strings "os", "device", and "ua". "os" and "ua"
* may optionally append "_family", "_major" and "_minor".
* "os" and "ua" return json; other options return a string only.
* No option returns a JSON formatted string (example: "{user_agent: %s, os: %s, device: %s}")
*
* @return string containing a parsed user agent based upon options entered.
* string.
*/
public Object evaluate(DeferredObject[] arguments) throws HiveException {
assert (arguments.length>0 && arguments.length<3);
Text UserAgent = (Text) converters[0].convert(arguments[0].get());
Text options = (arguments.length == 2 ? (Text) converters[1].convert(arguments[1].get()) : null) ;
if (UserAgent == null ) {
return null;
}
try {
Client c = uaParser.parse(UserAgent.toString());
if (options == null) {
result.set(c.toString());
}
else {
userOptions uo = userOptions.valueOf(options.toString().toLowerCase());
switch (uo) {
case os:
result.set(c.os.toString());
break;
case os_family:
result.set(c.os.family == null ? "null" : c.os.family );
break;
case os_major:
result.set(c.os.major == null ? "null" : c.os.major );
break;
case os_minor:
result.set(c.os.minor == null ? "null" : c.os.minor );
break;
case ua:
result.set(c.userAgent.toString());
break;
case ua_family:
result.set(c.userAgent.family == null ? "null" : c.userAgent.family );
break;
case ua_major:
result.set(c.userAgent.major == null ? "null" : c.userAgent.major );
break;
case ua_minor:
result.set(c.userAgent.minor == null ? "null" : c.userAgent.minor );
break;
case device:
result.set(c.device.family == null ? "null" : c.device.family );
break;
default:
result = null;
break;
}
}
} catch (IllegalArgumentException e) {
LOG.warn("Caught IllegalArgumentException: " + e.getMessage());
return null;
}
return result;
}
// public Text evaluate(Text UserAgent) {
// return evaluate(UserAgent, null);
// }
@Override
public String getDisplayString(String[] children) {
assert (children.length > 0 && children.length < 3);
return "user_agent_parser(" + children[0] + ( children.length == 1 ? "" : ", " + children[1] ) + ")";
}
}