// has the return type bag
List<PhysicalOperator> preds = mPlan.getPredecessors(proj);
if(preds == null) return; // this is a leaf project and so not interesting for patching
PhysicalOperator pred = preds.get(0);
if(preds.size() == 1 && pred instanceof PODistinct) {
if(patched) {
// we should not already have been patched since the
// Project-Distinct pair should occur only once
int errCode = 2076;
String msg = "Unexpected Project-Distinct pair while trying to set up plans for use with combiner.";
throw new OptimizerException(msg, errCode, PigException.BUG);
}
// we have stick in the POUserfunc(org.apache.pig.builtin.Distinct)[DataBag]
// in place of the Project-PODistinct pair
PhysicalOperator distinctPredecessor = mPlan.getPredecessors(pred).get(0);
try {
String scope = proj.getOperatorKey().scope;
List<PhysicalOperator> funcInput = new ArrayList<PhysicalOperator>();
FuncSpec fSpec = new FuncSpec(DISTINCT_UDF_CLASSNAME);
funcInput.add(distinctPredecessor);
// explicitly set distinctPredecessor's result type to
// be tuple - this is relevant when distinctPredecessor is
// originally a POForeach with return type BAG - we need to
// set it to tuple so we get a stream of tuples.
distinctPredecessor.setResultType(DataType.TUPLE);
POUserFunc func = new POUserFunc(new OperatorKey(scope,
NodeIdGenerator.getGenerator().getNextNodeId(scope)),-1, funcInput, fSpec);
func.setResultType(DataType.BAG);
mPlan.replace(proj, func);
mPlan.remove(pred);