@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
BucketingSortingCtx bctx = (BucketingSortingCtx)procCtx;
ExtractOperator exop = (ExtractOperator)nd;
// As of writing this, there is no case where this could be false, this is just protection
// from possible future changes
if (exop.getParentOperators().size() != 1) {
return null;
}
Operator<? extends OperatorDesc> parent = exop.getParentOperators().get(0);
// The caller of this method should guarantee this
assert(parent instanceof ReduceSinkOperator);
ReduceSinkOperator rop = (ReduceSinkOperator)parent;
// Go through the set of partition columns, and find their representatives in the values
// These represent the bucketed columns
List<BucketCol> bucketCols = new ArrayList<BucketCol>();
for (int i = 0; i < rop.getConf().getPartitionCols().size(); i++) {
boolean valueColFound = false;
for (int j = 0; j < rop.getConf().getValueCols().size(); j++) {
if (new ExprNodeDescEqualityWrapper(rop.getConf().getValueCols().get(j)).equals(
new ExprNodeDescEqualityWrapper(rop.getConf().getPartitionCols().get(i)))) {
bucketCols.add(new BucketCol(
rop.getSchema().getSignature().get(j).getInternalName(), j));
valueColFound = true;
break;
}
}
// If the partition columns can't all be found in the values then the data is not bucketed
if (!valueColFound) {
bucketCols.clear();
break;
}
}
// Go through the set of key columns, and find their representatives in the values
// These represent the sorted columns
String sortOrder = rop.getConf().getOrder();
List<SortCol> sortCols = new ArrayList<SortCol>();
for (int i = 0; i < rop.getConf().getKeyCols().size(); i++) {
boolean valueColFound = false;
for (int j = 0; j < rop.getConf().getValueCols().size(); j++) {
if (new ExprNodeDescEqualityWrapper(rop.getConf().getValueCols().get(j)).equals(
new ExprNodeDescEqualityWrapper(rop.getConf().getKeyCols().get(i)))) {
sortCols.add(new SortCol(
rop.getSchema().getSignature().get(j).getInternalName(), j, sortOrder.charAt(i)));
valueColFound = true;
break;
}
}
// If the sorted columns can't all be found in the values then the data is only sorted on
// the columns seen up until now
if (!valueColFound) {
break;
}
}
List<ColumnInfo> colInfos = exop.getSchema().getSignature();
if (!bucketCols.isEmpty()) {
List<BucketCol> newBucketCols = getNewBucketCols(bucketCols, colInfos);
bctx.setBucketedCols(exop, newBucketCols);
}