if (exprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
}
groupByKeys.add(new ExprNodeColumnDesc(exprInfo.getType(), exprInfo
.getInternalName(), exprInfo.getTabAlias(), exprInfo
.getIsVirtualCol()));
String field = getColumnInternalName(i);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(grpbyExpr,
new ColumnInfo(field, exprInfo.getType(), "", false));
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
// This is only needed if a new grouping set key is being created
int groupingSetsPosition = 0;
// For grouping sets, add a dummy grouping key
if (groupingSetsPresent) {
// Consider the query: select a,b, count(1) from T group by a,b with cube;
// where it is being executed in a single map-reduce job
// The plan is TableScan -> GroupBy1 -> ReduceSink -> GroupBy2 -> FileSink
// GroupBy1 already added the grouping id as part of the row
// This function is called for GroupBy2 to add grouping id as part of the groupby keys
if (!groupingSetsNeedAdditionalMRJob) {
addGroupingSetKey(
groupByKeys,
groupByInputRowResolver,
groupByOutputRowResolver,
outputColumnNames,
colExprMap);
}
else {
groupingSetsPosition = groupByKeys.size();
// The grouping set has not yet been processed. Create a new grouping key
// Consider the query: select a,b, count(1) from T group by a,b with cube;
// where it is being executed in 2 map-reduce jobs
// The plan for 1st MR is TableScan -> GroupBy1 -> ReduceSink -> GroupBy2 -> FileSink
// GroupBy1/ReduceSink worked as if grouping sets were not present
// This function is called for GroupBy2 to create new rows for grouping sets
// For each input row (a,b), 4 rows are created for the example above:
// (a,b), (a,null), (null, b), (null, null)
createNewGroupingKey(groupByKeys,
outputColumnNames,
groupByOutputRowResolver,
colExprMap);
}
}
HashMap<String, ASTNode> aggregationTrees = parseInfo
.getAggregationExprsForClause(dest);
// get the last colName for the reduce KEY
// it represents the column name corresponding to distinct aggr, if any
String lastKeyColName = null;
List<ExprNodeDesc> reduceValues = null;
if (reduceSinkOperatorInfo.getConf() instanceof ReduceSinkDesc) {
List<String> inputKeyCols = ((ReduceSinkDesc)
reduceSinkOperatorInfo.getConf()).getOutputKeyColumnNames();
if (inputKeyCols.size() > 0) {
lastKeyColName = inputKeyCols.get(inputKeyCols.size() - 1);
}
reduceValues = ((ReduceSinkDesc) reduceSinkOperatorInfo.getConf()).getValueCols();
}
int numDistinctUDFs = 0;
boolean containsDistinctAggr = false;
for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
String aggName = unescapeIdentifier(value.getChild(0).getText());
ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
boolean isDistinct = (value.getType() == HiveParser.TOK_FUNCTIONDI);
containsDistinctAggr = containsDistinctAggr || isDistinct;
// If the function is distinct, partial aggregation has not been done on
// the client side.
// If distPartAgg is set, the client is letting us know that partial
// aggregation has not been done.
// For eg: select a, count(b+c), count(distinct d+e) group by a
// For count(b+c), if partial aggregation has been performed, then we
// directly look for count(b+c).
// Otherwise, we look for b+c.
// For distincts, partial aggregation is never performed on the client
// side, so always look for the parameters: d+e
boolean partialAggDone = !(distPartAgg || isDistinct);
if (!partialAggDone) {
// 0 is the function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode paraExpr = (ASTNode) value.getChild(i);
ColumnInfo paraExprInfo =
groupByInputRowResolver.getExpression(paraExpr);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN
.getMsg(paraExpr));
}
String paraExpression = paraExprInfo.getInternalName();
assert (paraExpression != null);
if (isDistinct && lastKeyColName != null) {
// if aggr is distinct, the parameter is name is constructed as
// KEY.lastKeyColName:<tag>._colx
paraExpression = Utilities.ReduceField.KEY.name() + "." +
lastKeyColName + ":" + numDistinctUDFs + "."
+ getColumnInternalName(i - 1);
}
ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(),
paraExpression, paraExprInfo.getTabAlias(),
paraExprInfo.getIsVirtualCol());
ExprNodeDesc reduceValue = isConstantParameterInAggregationParameters(
paraExprInfo.getInternalName(), reduceValues);
if (reduceValue != null) {
// this parameter is a constant
expr = reduceValue;
}
aggParameters.add(expr);
}
} else {
ColumnInfo paraExprInfo = groupByInputRowResolver.getExpression(value);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(value));
}
String paraExpression = paraExprInfo.getInternalName();
assert (paraExpression != null);
aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(),
paraExpression, paraExprInfo.getTabAlias(), paraExprInfo
.getIsVirtualCol()));
}
if (isDistinct) {
numDistinctUDFs++;