int depth) throws SemanticException {
HiveConf hiveConf = ctx.getConf();
GroupByOptimizerSortMatch match = checkSortGroupBy(stack, groupByOp);
boolean useMapperSort =
HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT);
GroupByDesc groupByOpDesc = groupByOp.getConf();
boolean removeReduceSink = false;
boolean optimizeDistincts = false;
boolean setBucketGroup = false;
// Dont remove the operator for distincts
if (useMapperSort &&
(match == GroupByOptimizerSortMatch.COMPLETE_MATCH)) {
if (!groupByOpDesc.isDistinct()) {
removeReduceSink = true;
}
else if (!HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEGROUPBYSKEW)) {
// Optimize the query: select count(distinct keys) from T, where
// T is bucketized and sorted by T
// Partial aggregation can be done by the mappers in this scenario
List<ExprNodeDesc> keys =
((GroupByOperator)
(groupByOp.getChildOperators().get(0).getChildOperators().get(0)))
.getConf().getKeys();
if ((keys == null) || (keys.isEmpty())) {
optimizeDistincts = true;
}
}
}
if ((match == GroupByOptimizerSortMatch.PARTIAL_MATCH) ||
(match == GroupByOptimizerSortMatch.COMPLETE_MATCH)) {
setBucketGroup = true;
}
if (removeReduceSink) {
convertGroupByMapSideSortedGroupBy(hiveConf, groupByOp, depth);
}
else if (optimizeDistincts && !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
// In test mode, dont change the query plan. However, setup a query property
pGraphContext.getQueryProperties().setHasMapGroupBy(true);
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT_TESTMODE)) {
return;
}
ReduceSinkOperator reduceSinkOp =
(ReduceSinkOperator)groupByOp.getChildOperators().get(0);
GroupByDesc childGroupByDesc =
((GroupByOperator)
(reduceSinkOp.getChildOperators().get(0))).getConf();
for (int pos = 0; pos < childGroupByDesc.getAggregators().size(); pos++) {
AggregationDesc aggr = childGroupByDesc.getAggregators().get(pos);
// Partial aggregation is not done for distincts on the mapper
// However, if the data is bucketed/sorted on the distinct key, partial aggregation
// can be performed on the mapper.
if (aggr.getDistinct()) {
ArrayList<ExprNodeDesc> parameters = new ArrayList<ExprNodeDesc>();
ExprNodeDesc param = aggr.getParameters().get(0);
assert param instanceof ExprNodeColumnDesc;
ExprNodeColumnDesc paramC = (ExprNodeColumnDesc) param;
paramC.setIsPartitionColOrVirtualCol(false);
paramC.setColumn("VALUE._col" + pos);
parameters.add(paramC);
aggr.setParameters(parameters);
aggr.setDistinct(false);
aggr.setMode(Mode.FINAL);
}
}
// Partial aggregation is performed on the mapper, no distinct processing at the reducer
childGroupByDesc.setDistinct(false);
groupByOpDesc.setDontResetAggrsDistinct(true);
groupByOpDesc.setBucketGroup(true);
groupByOp.setUseBucketizedHiveInputFormat(true);
// no distinct processing at the reducer
// A query like 'select count(distinct key) from T' is transformed into