GroupByDesc childGroupByDesc =
((GroupByOperator)
(reduceSinkOp.getChildOperators().get(0))).getConf();
for (int pos = 0; pos < childGroupByDesc.getAggregators().size(); pos++) {
AggregationDesc aggr = childGroupByDesc.getAggregators().get(pos);
// Partial aggregation is not done for distincts on the mapper
// However, if the data is bucketed/sorted on the distinct key, partial aggregation
// can be performed on the mapper.
if (aggr.getDistinct()) {
ArrayList<ExprNodeDesc> parameters = new ArrayList<ExprNodeDesc>();
ExprNodeDesc param = aggr.getParameters().get(0);
assert param instanceof ExprNodeColumnDesc;
ExprNodeColumnDesc paramC = (ExprNodeColumnDesc) param;
paramC.setIsPartitionColOrVirtualCol(false);
paramC.setColumn("VALUE._col" + pos);
parameters.add(paramC);
aggr.setParameters(parameters);
aggr.setDistinct(false);
aggr.setMode(Mode.FINAL);
}
}
// Partial aggregation is performed on the mapper, no distinct processing at the reducer
childGroupByDesc.setDistinct(false);
groupByOpDesc.setDontResetAggrsDistinct(true);