TrackOrderPreservingExpressionCompiler groupByVisitor =
new TrackOrderPreservingExpressionCompiler(context,
GroupBy.EMPTY_GROUP_BY, groupByNodes.size(),
Ordering.UNORDERED);
for (ParseNode node : groupByNodes) {
Expression expression = node.accept(groupByVisitor);
if (groupByVisitor.isAggregate()) {
throw new SQLExceptionInfo.Builder(SQLExceptionCode.AGGREGATE_IN_GROUP_BY)
.setMessage(expression.toString()).build().buildException();
}
if (!expression.isStateless()) {
groupByVisitor.addEntry(expression);
}
groupByVisitor.reset();
}
List<Entry> groupByEntries = groupByVisitor.getEntries();
if (groupByEntries.isEmpty()) {
return GroupBy.EMPTY_GROUP_BY;
}
boolean isRowKeyOrderedGrouping = groupByVisitor.isOrderPreserving();
List<Expression> expressions = Lists.newArrayListWithCapacity(groupByEntries.size());
List<Expression> keyExpressions = expressions;
String groupExprAttribName;
// This is true if the GROUP BY is composed of only PK columns. We further check here that
// there are no "gaps" in the PK columns positions used (i.e. we start with the first PK
// column and use each subsequent one in PK order).
if (isRowKeyOrderedGrouping) {
groupExprAttribName = BaseScannerRegionObserver.KEY_ORDERED_GROUP_BY_EXPRESSIONS;
for (Entry groupByEntry : groupByEntries) {
expressions.add(groupByEntry.getExpression());
}
} else {
/*
* Otherwise, our coprocessor needs to collect all distinct groups within a region, sort them, and
* hold on to them until the scan completes.
*/
groupExprAttribName = BaseScannerRegionObserver.UNORDERED_GROUP_BY_EXPRESSIONS;
/*
* Put fixed length nullables at the end, so that we can represent null by the absence of the trailing
* value in the group by key. If there is more than one, we'll need to convert the ones not at the end
* into a Decimal so that we can use an empty byte array as our representation for null (which correctly
* maintains the sort order). We convert the Decimal back to the appropriate type (Integer or Long) when
* it's retrieved from the result set.
*
* More specifically, order into the following buckets:
* 1) non nullable fixed width
* 2) variable width
* 3) nullable fixed width
* Within each bucket, order based on the column position in the schema. Putting the fixed width values
* in the beginning optimizes access to subsequent values.
*/
Collections.sort(groupByEntries, new Comparator<Entry>() {
@Override
public int compare(Entry o1, Entry o2) {
Expression e1 = o1.getExpression();
Expression e2 = o2.getExpression();
boolean isFixed1 = e1.getDataType().isFixedWidth();
boolean isFixed2 = e2.getDataType().isFixedWidth();
boolean isFixedNullable1 = e1.isNullable() &&isFixed1;
boolean isFixedNullable2 = e2.isNullable() && isFixed2;
if (isFixedNullable1 == isFixedNullable2) {
if (isFixed1 == isFixed2) {
// Not strictly necessary, but forces the order to match the schema
// column order (with PK columns before value columns).
return o1.getColumnPosition() - o2.getColumnPosition();
} else if (isFixed1) {
return -1;
} else {
return 1;
}
} else if (isFixedNullable1) {
return 1;
} else {
return -1;
}
}
});
for (Entry groupByEntry : groupByEntries) {
expressions.add(groupByEntry.getExpression());
}
for (int i = expressions.size()-2; i >= 0; i--) {
Expression expression = expressions.get(i);
PDataType keyType = getKeyType(expression);
if (keyType == expression.getDataType()) {
continue;
}
// Copy expressions only when keyExpressions will be different than expressions
if (keyExpressions == expressions) {
keyExpressions = new ArrayList<Expression>(expressions);