throws SemanticException {
/*
* 1. Create the PTFDesc from the Qspec attached to this QB.
*/
RowResolver rr = opParseCtx.get(input).getRowResolver();
PTFDesc ptfDesc = translatePTFInvocationSpec(ptfQSpec, rr);
RowResolver rsOpRR = new RowResolver();
/*
* Build an RR for the Extract Op from the ResuceSink Op's RR.
* Why?
* We need to remove the Virtual Columns present in the RS's RR. The OI
* that gets passed to Extract at runtime doesn't contain the Virtual Columns.
* So internal names get changed. Consider testCase testJoinWithLeadLag,
* which is a self join on part and also has a Windowing expression.
* The RR of the RS op at transaltion time looks something like this:
* (_co1,_col2,..,_col7, _col8(vc=true),_col9(vc=true),
* _col10,_col11,.._col15(vc=true),_col16(vc=true),..)
* At runtime the Virtual columns are removed and all the columns after _col7
* are shifted 1 or 2 positions.
* So in child Operators ColumnExprNodeDesc's are no longer referring to the right columns.
*
* So we build a new RR for the Extract Op, with the Virtual Columns removed.
* We hand this to the PTFTranslator as the
* starting RR to use to translate a PTF Chain.
*/
RowResolver extractOpRR = new RowResolver();
/*
* 2. build Map-side Op Graph. Graph template is either:
* Input -> PTF_map -> ReduceSink
* or
* Input -> ReduceSink
*
* Here the ExprNodeDescriptors in the QueryDef are based on the Input Operator's RR.
*/
{
PartitionedTableFunctionDef tabDef = ptfDesc.getStartOfChain();
/*
* a. add Map-side PTF Operator if needed
*/
if (tabDef.isTransformsRawInput() )
{
RowResolver ptfMapRR = tabDef.getRawInputShape().getRr();
ptfDesc.setMapSide(true);
input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc,
new RowSchema(ptfMapRR.getColumnInfos()),
input), ptfMapRR);
rr = opParseCtx.get(input).getRowResolver();
}
/*
* b. Build Reduce Sink Details (keyCols, valueCols, outColNames etc.) for this ptfDesc.
*/
ArrayList<ExprNodeDesc> partCols = new ArrayList<ExprNodeDesc>();
ArrayList<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>();
ArrayList<ExprNodeDesc> orderCols = new ArrayList<ExprNodeDesc>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
List<String> outputColumnNames = new ArrayList<String>();
StringBuilder orderString = new StringBuilder();
/*
* Use the input RR of TableScanOperator in case there is no map-side
* reshape of input.
* If the parent of ReduceSinkOperator is PTFOperator, use it's
* output RR.
*/
buildPTFReduceSinkDetails(tabDef,
rr,
partCols,
valueCols,
orderCols,
colExprMap,
outputColumnNames,
orderString,
rsOpRR,
extractOpRR);
input = putOpInsertMap(OperatorFactory.getAndMakeChild(PlanUtils
.getReduceSinkDesc(orderCols,
valueCols, outputColumnNames, false,
-1, partCols, orderString.toString(), -1),
new RowSchema(rsOpRR.getColumnInfos()), input), rsOpRR);
input.setColumnExprMap(colExprMap);
}
/*
* 3. build Reduce-side Op Graph
*/
{
/*
* b. Construct Extract Operator.
*/
input = putOpInsertMap(OperatorFactory.getAndMakeChild(
new ExtractDesc(
new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
Utilities.ReduceField.VALUE
.toString(), "", false)),
new RowSchema(extractOpRR.getColumnInfos()),
input), extractOpRR);
/*
* c. Rebuilt the QueryDef.
* Why?
* - so that the ExprNodeDescriptors in the QueryDef are based on the
* Extract Operator's RowResolver
*/
rr = opParseCtx.get(input).getRowResolver();
ptfDesc = translatePTFInvocationSpec(ptfQSpec, rr);
/*
* d. Construct PTF Operator.
*/
RowResolver ptfOpRR = ptfDesc.getFuncDef().getOutputShape().getRr();
input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc,
new RowSchema(ptfOpRR.getColumnInfos()),
input), ptfOpRR);
}