static final Log LOG = LogFactory.getLog(GenericUDAFnGrams.class.getName());
@Override
public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
if (parameters.length != 3 && parameters.length != 4) {
throw new UDFArgumentTypeException(parameters.length-1,
"Please specify either three or four arguments.");
}
// Validate the first parameter, which is the expression to compute over. This should be an
// array of strings type, or an array of arrays of strings.
PrimitiveTypeInfo pti;
if (parameters[0].getCategory() != ObjectInspector.Category.LIST) {
throw new UDFArgumentTypeException(0,
"Only list type arguments are accepted but "
+ parameters[0].getTypeName() + " was passed as parameter 1.");
}
switch (((ListTypeInfo) parameters[0]).getListElementTypeInfo().getCategory()) {
case PRIMITIVE:
// Parameter 1 was an array of primitives, so make sure the primitives are strings.
pti = (PrimitiveTypeInfo) ((ListTypeInfo) parameters[0]).getListElementTypeInfo();
break;
case LIST:
// Parameter 1 was an array of arrays, so make sure that the inner arrays contain
// primitive strings.
ListTypeInfo lti = (ListTypeInfo)
((ListTypeInfo) parameters[0]).getListElementTypeInfo();
pti = (PrimitiveTypeInfo) lti.getListElementTypeInfo();
break;
default:
throw new UDFArgumentTypeException(0,
"Only arrays of strings or arrays of arrays of strings are accepted but "
+ parameters[0].getTypeName() + " was passed as parameter 1.");
}
if(pti.getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
throw new UDFArgumentTypeException(0,
"Only array<string> or array<array<string>> is allowed, but "
+ parameters[0].getTypeName() + " was passed as parameter 1.");
}
// Validate the second parameter, which should be an integer
if(parameters[1].getCategory() != ObjectInspector.Category.PRIMITIVE) {
throw new UDFArgumentTypeException(1, "Only integers are accepted but "
+ parameters[1].getTypeName() + " was passed as parameter 2.");
}
switch(((PrimitiveTypeInfo) parameters[1]).getPrimitiveCategory()) {
case BYTE:
case SHORT:
case INT:
case LONG:
case TIMESTAMP:
break;
default:
throw new UDFArgumentTypeException(1, "Only integers are accepted but "
+ parameters[1].getTypeName() + " was passed as parameter 2.");
}
// Validate the third parameter, which should also be an integer
if(parameters[2].getCategory() != ObjectInspector.Category.PRIMITIVE) {
throw new UDFArgumentTypeException(2, "Only integers are accepted but "
+ parameters[2].getTypeName() + " was passed as parameter 3.");
}
switch(((PrimitiveTypeInfo) parameters[2]).getPrimitiveCategory()) {
case BYTE:
case SHORT:
case INT:
case LONG:
case TIMESTAMP:
break;
default:
throw new UDFArgumentTypeException(2, "Only integers are accepted but "
+ parameters[2].getTypeName() + " was passed as parameter 3.");
}
// If we have the optional fourth parameter, make sure it's also an integer
if(parameters.length == 4) {
if(parameters[3].getCategory() != ObjectInspector.Category.PRIMITIVE) {
throw new UDFArgumentTypeException(3, "Only integers are accepted but "
+ parameters[3].getTypeName() + " was passed as parameter 4.");
}
switch(((PrimitiveTypeInfo) parameters[3]).getPrimitiveCategory()) {
case BYTE:
case SHORT:
case INT:
case LONG:
case TIMESTAMP:
break;
default:
throw new UDFArgumentTypeException(3, "Only integers are accepted but "
+ parameters[3].getTypeName() + " was passed as parameter 4.");
}
}
return new GenericUDAFnGramEvaluator();