}
}
Map<String, ExprNodeDesc> exprMap = selOp.getColumnExprMap();
// Since we have done an exact match on TS-SEL-GBY-RS-GBY-SEL-FS
// we need not to do any instanceof checks for following.
GroupByOperator gbyOp = (GroupByOperator)selOp.getChildren().get(0);
ReduceSinkOperator rsOp = (ReduceSinkOperator)gbyOp.getChildren().get(0);
if (rsOp.getConf().getDistinctColumnIndices().size() > 0) {
// we can't handle distinct
return null;
}
selOp = (SelectOperator)rsOp.getChildOperators().get(0).getChildOperators().get(0);
List<AggregationDesc> aggrs = gbyOp.getConf().getAggregators();
if (!(selOp.getConf().getColList().size() == aggrs.size())) {
// all select columns must be aggregations
return null;
}
for(ExprNodeDesc desc : selOp.getConf().getColList()) {
if (!(desc instanceof ExprNodeColumnDesc)) {
// Probably an expression, cant handle that
return null;
}
}
FileSinkOperator fsOp = (FileSinkOperator)(selOp.getChildren().get(0));
if (fsOp.getChildOperators() != null && fsOp.getChildOperators().size() > 0) {
// looks like a subq plan.
return null;
}
Table tbl = pctx.getTopToTable().get(tsOp);
List<Object> oneRow = new ArrayList<Object>();
List<ObjectInspector> ois = new ArrayList<ObjectInspector>();
Hive hive = Hive.get(pctx.getConf());
for (AggregationDesc aggr : aggrs) {
if (aggr.getDistinct()) {
// our stats for NDV is approx, not accurate.
return null;
}
// Get the aggregate function matching the name in the query.
GenericUDAFResolver udaf =
FunctionRegistry.getGenericUDAFResolver(aggr.getGenericUDAFName());
if (udaf instanceof GenericUDAFSum) {
ExprNodeDesc desc = aggr.getParameters().get(0);
String constant;
if (desc instanceof ExprNodeConstantDesc) {
constant = ((ExprNodeConstantDesc) desc).getValue().toString();
} else if (desc instanceof ExprNodeColumnDesc && exprMap.get(((ExprNodeColumnDesc)desc).getColumn()) instanceof ExprNodeConstantDesc) {
constant = ((ExprNodeConstantDesc)exprMap.get(((ExprNodeColumnDesc)desc).getColumn())).getValue().toString();
} else {
return null;
}
Long rowCnt = getRowCnt(pctx, tsOp, tbl);
if(rowCnt == null) {
return null;
}
oneRow.add(HiveDecimal.create(constant).multiply(HiveDecimal.create(rowCnt)));
ois.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
PrimitiveCategory.DECIMAL));
}
else if (udaf instanceof GenericUDAFCount) {
Long rowCnt = 0L;
if (aggr.getParameters().isEmpty() || aggr.getParameters().get(0) instanceof
ExprNodeConstantDesc || ((aggr.getParameters().get(0) instanceof ExprNodeColumnDesc) &&
exprMap.get(((ExprNodeColumnDesc)aggr.getParameters().get(0)).getColumn()) instanceof ExprNodeConstantDesc)) {
// Its either count (*) or count(1) case
rowCnt = getRowCnt(pctx, tsOp, tbl);
if(rowCnt == null) {
return null;
}
} else {
// Its count(col) case
ExprNodeColumnDesc desc = (ExprNodeColumnDesc)exprMap.get(((ExprNodeColumnDesc)aggr.getParameters().get(0)).getColumn());
String colName = desc.getColumn();
StatType type = getType(desc.getTypeString());
if(!tbl.isPartitioned()) {
if (!StatsSetupConst.areStatsUptoDate(tbl.getParameters())) {
Log.debug("Stats for table : " + tbl.getTableName() + " are not upto date.");
return null;
}
rowCnt = Long.parseLong(tbl.getProperty(StatsSetupConst.ROW_COUNT));
if (rowCnt < 1) {
Log.debug("Table doesn't have upto date stats " + tbl.getTableName());
return null;
}
List<ColumnStatisticsObj> stats = hive.getMSC().getTableColumnStatistics(
tbl.getDbName(),tbl.getTableName(), Lists.newArrayList(colName));
if (stats.isEmpty()) {
Log.debug("No stats for " + tbl.getTableName() + " column " + colName);
return null;
}
Long nullCnt = getNullcountFor(type, stats.get(0).getStatsData());
if (null == nullCnt) {
Log.debug("Unsupported type: " + desc.getTypeString() + " encountered in " +
"metadata optimizer for column : " + colName);
return null;
} else {
rowCnt -= nullCnt;
}
} else {
Set<Partition> parts = pctx.getPrunedPartitions(
tsOp.getConf().getAlias(), tsOp).getPartitions();
for (Partition part : parts) {
if (!StatsSetupConst.areStatsUptoDate(part.getParameters())) {
Log.debug("Stats for part : " + part.getSpec() + " are not upto date.");
return null;
}
Long partRowCnt = Long.parseLong(part.getParameters()
.get(StatsSetupConst.ROW_COUNT));
if (partRowCnt < 1) {
Log.debug("Partition doesn't have upto date stats " + part.getSpec());
return null;
}
rowCnt += partRowCnt;
}
Collection<List<ColumnStatisticsObj>> result =
verifyAndGetPartStats(hive, tbl, colName, parts);
if (result == null) {
return null; // logging inside
}
for (List<ColumnStatisticsObj> statObj : result) {
ColumnStatisticsData statData = validateSingleColStat(statObj);
if (statData == null) return null;
Long nullCnt = getNullcountFor(type, statData);
if (nullCnt == null) {
Log.debug("Unsupported type: " + desc.getTypeString() + " encountered in " +
"metadata optimizer for column : " + colName);
return null;
} else {
rowCnt -= nullCnt;
}
}
}
}
oneRow.add(rowCnt);
ois.add(PrimitiveObjectInspectorFactory.
getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG));
} else if (udaf instanceof GenericUDAFMax) {
ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc)exprMap.get(((ExprNodeColumnDesc)aggr.getParameters().get(0)).getColumn());
String colName = colDesc.getColumn();
StatType type = getType(colDesc.getTypeString());
if(!tbl.isPartitioned()) {
if (!StatsSetupConst.areStatsUptoDate(tbl.getParameters())) {
Log.debug("Stats for table : " + tbl.getTableName() + " are not upto date.");
return null;
}
List<ColumnStatisticsObj> stats = hive.getMSC().getTableColumnStatistics(
tbl.getDbName(),tbl.getTableName(), Lists.newArrayList(colName));
if (stats.isEmpty()) {
Log.debug("No stats for " + tbl.getTableName() + " column " + colName);
return null;
}
ColumnStatisticsData statData = stats.get(0).getStatsData();
switch (type) {
case Integeral:
LongColumnStatsData lstats = statData.getLongStats();
oneRow.add(lstats.isSetHighValue() ? lstats.getHighValue() : null);
ois.add(PrimitiveObjectInspectorFactory.
getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG));
break;
case Double:
DoubleColumnStatsData dstats = statData.getDoubleStats();
oneRow.add(dstats.isSetHighValue() ? dstats.getHighValue() : null);
ois.add(PrimitiveObjectInspectorFactory.
getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE));
break;
default:
// unsupported type
Log.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " +
"metadata optimizer for column : " + colName);
return null;
}
} else {
Set<Partition> parts = pctx.getPrunedPartitions(
tsOp.getConf().getAlias(), tsOp).getPartitions();
switch (type) {
case Integeral: {
Long maxVal = null;
Collection<List<ColumnStatisticsObj>> result =
verifyAndGetPartStats(hive, tbl, colName, parts);
if (result == null) {
return null; // logging inside
}
for (List<ColumnStatisticsObj> statObj : result) {
ColumnStatisticsData statData = validateSingleColStat(statObj);
if (statData == null) return null;
LongColumnStatsData lstats = statData.getLongStats();
if (!lstats.isSetHighValue()) {
continue;
}
long curVal = lstats.getHighValue();
maxVal = maxVal == null ? curVal : Math.max(maxVal, curVal);
}
oneRow.add(maxVal);
ois.add(PrimitiveObjectInspectorFactory.
getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG));
break;
}
case Double: {
Double maxVal = null;
Collection<List<ColumnStatisticsObj>> result =
verifyAndGetPartStats(hive, tbl, colName, parts);
if (result == null) {
return null; // logging inside
}
for (List<ColumnStatisticsObj> statObj : result) {
ColumnStatisticsData statData = validateSingleColStat(statObj);
if (statData == null) return null;
DoubleColumnStatsData dstats = statData.getDoubleStats();
if (!dstats.isSetHighValue()) {
continue;
}
double curVal = statData.getDoubleStats().getHighValue();
maxVal = maxVal == null ? curVal : Math.max(maxVal, curVal);
}
oneRow.add(maxVal);
ois.add(PrimitiveObjectInspectorFactory.
getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE));
break;
}
default:
Log.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " +
"metadata optimizer for column : " + colName);
return null;
}
}
} else if (udaf instanceof GenericUDAFMin) {
ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc)exprMap.get(((ExprNodeColumnDesc)aggr.getParameters().get(0)).getColumn());
String colName = colDesc.getColumn();
StatType type = getType(colDesc.getTypeString());
if (!tbl.isPartitioned()) {
if (!StatsSetupConst.areStatsUptoDate(tbl.getParameters())) {
Log.debug("Stats for table : " + tbl.getTableName() + " are not upto date.");
return null;
}
ColumnStatisticsData statData = hive.getMSC().getTableColumnStatistics(
tbl.getDbName(), tbl.getTableName(), Lists.newArrayList(colName))
.get(0).getStatsData();
switch (type) {
case Integeral:
LongColumnStatsData lstats = statData.getLongStats();
oneRow.add(lstats.isSetLowValue() ? lstats.getLowValue() : null);
ois.add(PrimitiveObjectInspectorFactory.
getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG));
break;
case Double:
DoubleColumnStatsData dstats = statData.getDoubleStats();
oneRow.add(dstats.isSetLowValue() ? dstats.getLowValue() : null);
ois.add(PrimitiveObjectInspectorFactory.
getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE));
break;
default: // unsupported type
Log.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " +
"metadata optimizer for column : " + colName);
return null;
}
} else {
Set<Partition> parts = pctx.getPrunedPartitions(tsOp.getConf().getAlias(), tsOp).getPartitions();
switch(type) {
case Integeral: {
Long minVal = null;
Collection<List<ColumnStatisticsObj>> result =
verifyAndGetPartStats(hive, tbl, colName, parts);
if (result == null) {
return null; // logging inside
}
for (List<ColumnStatisticsObj> statObj : result) {
ColumnStatisticsData statData = validateSingleColStat(statObj);
if (statData == null) return null;
LongColumnStatsData lstats = statData.getLongStats();
if (!lstats.isSetLowValue()) {
continue;
}
long curVal = lstats.getLowValue();
minVal = minVal == null ? curVal : Math.min(minVal, curVal);
}
oneRow.add(minVal);
ois.add(PrimitiveObjectInspectorFactory.
getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG));
break;
}
case Double: {
Double minVal = null;
Collection<List<ColumnStatisticsObj>> result =
verifyAndGetPartStats(hive, tbl, colName, parts);
if (result == null) {
return null; // logging inside
}
for (List<ColumnStatisticsObj> statObj : result) {
ColumnStatisticsData statData = validateSingleColStat(statObj);
if (statData == null) return null;
DoubleColumnStatsData dstats = statData.getDoubleStats();
if (!dstats.isSetLowValue()) {
continue;
}
double curVal = statData.getDoubleStats().getLowValue();
minVal = minVal == null ? curVal : Math.min(minVal, curVal);
}
oneRow.add(minVal);
ois.add(PrimitiveObjectInspectorFactory.
getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE));
break;
}
default: // unsupported type
Log.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " +
"metadata optimizer for column : " + colName);
return null;
}
}
} else { // Unsupported aggregation.
Log.debug("Unsupported aggregation for metadata optimizer: "
+ aggr.getGenericUDAFName());
return null;
}
}
List<List<Object>> allRows = new ArrayList<List<Object>>();
allRows.add(oneRow);
List<String> colNames = new ArrayList<String>();
for (ColumnInfo colInfo: gbyOp.getSchema().getSignature()) {
colNames.add(colInfo.getInternalName());
}
StandardStructObjectInspector sOI = ObjectInspectorFactory.
getStandardStructObjectInspector(colNames, ois);
FetchWork fWork = new FetchWork(allRows, sOI);