* The Node Processor for Column Pruning on Select Operators.
*/
public static class ColumnPrunerSelectProc implements NodeProcessor {
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
Object... nodeOutputs) throws SemanticException {
SelectOperator op = (SelectOperator) nd;
ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
if (op.getChildOperators() != null) {
for (Operator<? extends OperatorDesc> child : op.getChildOperators()) {
// UDTF is not handled yet, so the parent SelectOp of UDTF should just assume
// all columns.
if ((child instanceof UDTFOperator)) {
cppCtx.getPrunedColLists()
.put(op, cppCtx.getColsFromSelectExpr(op));
return null;
}
}
}
LateralViewJoinOperator lvJoin = null;
if (op.getConf().isSelStarNoCompute()) {
assert op.getNumChild() == 1;
Operator<? extends OperatorDesc> child = op.getChildOperators().get(0);
if (child instanceof LateralViewJoinOperator) { // this SEL is SEL(*)
// for LV
lvJoin = (LateralViewJoinOperator) child;
}
}
List<String> cols = cppCtx.genColLists(op);
SelectDesc conf = op.getConf();
if (lvJoin != null) {
// get columns for SEL(*) from LVJ
if (cols != null) {
RowResolver rr = cppCtx.getOpToParseCtxMap().get(op).getRowResolver();
cppCtx.getPrunedColLists().put(op,
cppCtx.getSelectColsFromLVJoin(rr, cols));
}
return null;
}
// The input to the select does not matter. Go over the expressions
// and return the ones which have a marked column
cppCtx.getPrunedColLists().put(op,
cppCtx.getSelectColsFromChildren(op, cols));
if (cols == null || conf.isSelStarNoCompute()) {
return null;
}
// do we need to prune the select operator?
List<ExprNodeDesc> originalColList = op.getConf().getColList();
List<String> columns = new ArrayList<String>();
for (ExprNodeDesc expr : originalColList) {
Utilities.mergeUniqElems(columns, expr.getCols());
}
// by now, 'prunedCols' are columns used by child operators, and 'columns'
// are columns used by this select operator.
List<String> originalOutputColumnNames = conf.getOutputColumnNames();
if (cols.size() < originalOutputColumnNames.size()) {
ArrayList<ExprNodeDesc> newColList = new ArrayList<ExprNodeDesc>();
ArrayList<String> newOutputColumnNames = new ArrayList<String>();
ArrayList<ColumnInfo> rs_oldsignature = op.getSchema().getSignature();
ArrayList<ColumnInfo> rs_newsignature = new ArrayList<ColumnInfo>();
RowResolver old_rr = cppCtx.getOpToParseCtxMap().get(op)
.getRowResolver();
RowResolver new_rr = new RowResolver();
for (String col : cols) {
int index = originalOutputColumnNames.indexOf(col);
newOutputColumnNames.add(col);
newColList.add(originalColList.get(index));
rs_newsignature.add(rs_oldsignature.get(index));
String[] tabcol = old_rr.reverseLookup(col);
ColumnInfo columnInfo = old_rr.get(tabcol[0], tabcol[1]);
new_rr.put(tabcol[0], tabcol[1], columnInfo);
}
cppCtx.getOpToParseCtxMap().get(op).setRowResolver(new_rr);
op.getSchema().setSignature(rs_newsignature);
conf.setColList(newColList);
conf.setOutputColumnNames(newOutputColumnNames);
handleChildren(op, cols, cppCtx);
}
return null;