* The Node Processor for Column Pruning on Select Operators.
*/
public static class ColumnPrunerSelectProc implements NodeProcessor {
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
Object... nodeOutputs) throws SemanticException {
SelectOperator op = (SelectOperator) nd;
ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
List<String> cols = new ArrayList<String>();
if (op.getChildOperators() != null) {
for (Operator<? extends Serializable> child : op.getChildOperators()) {
// If one of my children is a FileSink or Script, return all columns.
// Without this break, a bug in ReduceSink to Extract edge column
// pruning will manifest
// which should be fixed before remove this
if ((child instanceof FileSinkOperator)
|| (child instanceof ScriptOperator)
|| (child instanceof UDTFOperator)
|| (child instanceof LimitOperator)
|| (child instanceof UnionOperator)) {
cppCtx.getPrunedColLists()
.put(op, cppCtx.getColsFromSelectExpr(op));
return null;
}
}
}
cols = cppCtx.genColLists(op);
SelectDesc conf = op.getConf();
// The input to the select does not matter. Go over the expressions
// and return the ones which have a marked column
cppCtx.getPrunedColLists().put(op,
cppCtx.getSelectColsFromChildren(op, cols));
if (conf.isSelStarNoCompute()) {
return null;
}
// do we need to prune the select operator?
List<ExprNodeDesc> originalColList = op.getConf().getColList();
List<String> columns = new ArrayList<String>();
for (ExprNodeDesc expr : originalColList) {
Utilities.mergeUniqElems(columns, expr.getCols());
}
// by now, 'prunedCols' are columns used by child operators, and 'columns'
// are columns used by this select operator.
ArrayList<String> originalOutputColumnNames = conf.getOutputColumnNames();
if (cols.size() < originalOutputColumnNames.size()) {
ArrayList<ExprNodeDesc> newColList = new ArrayList<ExprNodeDesc>();
ArrayList<String> newOutputColumnNames = new ArrayList<String>();
ArrayList<ColumnInfo> rs_oldsignature = op.getSchema().getSignature();
ArrayList<ColumnInfo> rs_newsignature = new ArrayList<ColumnInfo>();
RowResolver old_rr = cppCtx.getOpToParseCtxMap().get(op).getRowResolver();
RowResolver new_rr = new RowResolver();
for (String col : cols) {
int index = originalOutputColumnNames.indexOf(col);
newOutputColumnNames.add(col);
newColList.add(originalColList.get(index));
rs_newsignature.add(rs_oldsignature.get(index));
String[] tabcol = old_rr.reverseLookup(col);
ColumnInfo columnInfo = old_rr.get(tabcol[0], tabcol[1]);
new_rr.put(tabcol[0], tabcol[1], columnInfo);
}
cppCtx.getOpToParseCtxMap().get(op).setRowResolver(new_rr);
op.getSchema().setSignature(rs_newsignature);
conf.setColList(newColList);
conf.setOutputColumnNames(newOutputColumnNames);
handleChildren(op, cols, cppCtx);
}
return null;