static final Log LOG = LogFactory.getLog(GenericUDAFContextNGrams.class.getName());
@Override
public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
if (parameters.length != 3 && parameters.length != 4) {
throw new UDFArgumentTypeException(parameters.length-1,
"Please specify either three or four arguments.");
}
// Validate the first parameter, which is the expression to compute over. This should be an
// array of strings type, or an array of arrays of strings.
PrimitiveTypeInfo pti;
if (parameters[0].getCategory() != ObjectInspector.Category.LIST) {
throw new UDFArgumentTypeException(0,
"Only list type arguments are accepted but "
+ parameters[0].getTypeName() + " was passed as parameter 1.");
}
switch (((ListTypeInfo) parameters[0]).getListElementTypeInfo().getCategory()) {
case PRIMITIVE:
// Parameter 1 was an array of primitives, so make sure the primitives are strings.
pti = (PrimitiveTypeInfo) ((ListTypeInfo) parameters[0]).getListElementTypeInfo();
break;
case LIST:
// Parameter 1 was an array of arrays, so make sure that the inner arrays contain
// primitive strings.
ListTypeInfo lti = (ListTypeInfo)
((ListTypeInfo) parameters[0]).getListElementTypeInfo();
pti = (PrimitiveTypeInfo) lti.getListElementTypeInfo();
break;
default:
throw new UDFArgumentTypeException(0,
"Only arrays of strings or arrays of arrays of strings are accepted but "
+ parameters[0].getTypeName() + " was passed as parameter 1.");
}
if(pti.getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
throw new UDFArgumentTypeException(0,
"Only array<string> or array<array<string>> is allowed, but "
+ parameters[0].getTypeName() + " was passed as parameter 1.");
}
// Validate the second parameter, which should be an array of strings
if(parameters[1].getCategory() != ObjectInspector.Category.LIST ||
((ListTypeInfo) parameters[1]).getListElementTypeInfo().getCategory() !=
ObjectInspector.Category.PRIMITIVE) {
throw new UDFArgumentTypeException(1, "Only arrays of strings are accepted but "
+ parameters[1].getTypeName() + " was passed as parameter 2.");
}
if(((PrimitiveTypeInfo) ((ListTypeInfo)parameters[1]).getListElementTypeInfo()).
getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
throw new UDFArgumentTypeException(1, "Only arrays of strings are accepted but "
+ parameters[1].getTypeName() + " was passed as parameter 2.");
}
// Validate the third parameter, which should be an integer to represent 'k'
if(parameters[2].getCategory() != ObjectInspector.Category.PRIMITIVE) {
throw new UDFArgumentTypeException(2, "Only integers are accepted but "
+ parameters[2].getTypeName() + " was passed as parameter 3.");
}
switch(((PrimitiveTypeInfo) parameters[2]).getPrimitiveCategory()) {
case BYTE:
case SHORT:
case INT:
case LONG:
case TIMESTAMP:
break;
default:
throw new UDFArgumentTypeException(2, "Only integers are accepted but "
+ parameters[2].getTypeName() + " was passed as parameter 3.");
}
// If the fourth parameter -- precision factor 'pf' -- has been specified, make sure it's
// an integer.
if(parameters.length == 4) {
if(parameters[3].getCategory() != ObjectInspector.Category.PRIMITIVE) {
throw new UDFArgumentTypeException(3, "Only integers are accepted but "
+ parameters[3].getTypeName() + " was passed as parameter 4.");
}
switch(((PrimitiveTypeInfo) parameters[3]).getPrimitiveCategory()) {
case BYTE:
case SHORT:
case INT:
case LONG:
case TIMESTAMP:
break;
default:
throw new UDFArgumentTypeException(3, "Only integers are accepted but "
+ parameters[3].getTypeName() + " was passed as parameter 4.");
}
}
return new GenericUDAFContextNGramEvaluator();