if (!(root instanceof POPackage)) {
messageCollector.collect("Expected reduce root to be a POPackage", MessageType.Warning,
PigWarning.NON_PACKAGE_REDUCE_PLAN_ROOT);
return;
}
POPackage pack = (POPackage)root;
List<PhysicalOperator> packSuccessors = reducePlan.getSuccessors(root);
if (packSuccessors == null || packSuccessors.size() != 1) {
return;
}
PhysicalOperator successor = packSuccessors.get(0);
if (successor instanceof POLimit) {
// POLimit is acceptable, as long has it has a single foreach as
// successor
List<PhysicalOperator> limitSucs = reducePlan.getSuccessors(successor);
if (limitSucs != null && limitSucs.size() == 1 &&
limitSucs.get(0) instanceof POForEach) {
// the code below will now further examine the foreach
successor = limitSucs.get(0);
}
}
if (successor instanceof POForEach) {
POForEach foreach = (POForEach)successor;
List<PhysicalPlan> feInners = foreach.getInputPlans();
// find algebraic operators and also check if the foreach statement
// is suitable for combiner use
List<Pair<PhysicalOperator, PhysicalPlan>> algebraicOps = findAlgebraicOps(feInners);
if (algebraicOps == null || algebraicOps.size() == 0) {
// the plan is not combinable or there is nothing to combine
// we're done
return;
}
if (combinePlan != null && combinePlan.getRoots().size() != 0) {
messageCollector.collect("Wasn't expecting to find anything already " +
"in the combiner!", MessageType.Warning, PigWarning.NON_EMPTY_COMBINE_PLAN);
return;
}
LOG.info("Choosing to move algebraic foreach to combiner");
try {
// replace PODistinct->Project[*] with distinct udf (which is Algebraic)
for (Pair<PhysicalOperator, PhysicalPlan> op2plan : algebraicOps) {
if (! (op2plan.first instanceof PODistinct)) {
continue;
}
DistinctPatcher distinctPatcher = new DistinctPatcher(op2plan.second);
distinctPatcher.visit();
if (distinctPatcher.getDistinct() == null) {
int errCode = 2073;
String msg = "Problem with replacing distinct operator with distinct built-in function.";
throw new PlanException(msg, errCode, PigException.BUG);
}
op2plan.first = distinctPatcher.getDistinct();
}
// create new map foreach
POForEach mfe = createForEachWithGrpProj(foreach, rearrange.getKeyType());
Map<PhysicalOperator, Integer> op2newpos = Maps.newHashMap();
Integer pos = 1;
// create plan for each algebraic udf and add as inner plan in map-foreach
for (Pair<PhysicalOperator, PhysicalPlan> op2plan : algebraicOps) {
PhysicalPlan udfPlan = createPlanWithPredecessors(op2plan.first, op2plan.second);
mfe.addInputPlan(udfPlan, false);
op2newpos.put(op2plan.first, pos++);
}
changeFunc(mfe, POUserFunc.INITIAL);
// since we will only be creating SingleTupleBag as input to
// the map foreach, we should flag the POProjects in the map
// foreach inner plans to also use SingleTupleBag
for (PhysicalPlan mpl : mfe.getInputPlans()) {
try {
new fixMapProjects(mpl).visit();
} catch (VisitorException e) {
int errCode = 2089;
String msg = "Unable to flag project operator to use single tuple bag.";
throw new PlanException(msg, errCode, PigException.BUG, e);
}
}
// create new combine foreach
POForEach cfe = createForEachWithGrpProj(foreach, rearrange.getKeyType());
// add algebraic functions with appropriate projection
addAlgebraicFuncToCombineFE(cfe, op2newpos);
changeFunc(cfe, POUserFunc.INTERMEDIATE);
// fix projection and function time for algebraic functions in reduce foreach
for (Pair<PhysicalOperator, PhysicalPlan> op2plan : algebraicOps) {
setProjectInput(op2plan.first, op2plan.second, op2newpos.get(op2plan.first));
((POUserFunc)op2plan.first).setAlgebraicFunction(POUserFunc.FINAL);
}
// we have modified the foreach inner plans - so set them again
// for the foreach so that foreach can do any re-initialization
// around them.
// FIXME - this is a necessary evil right now because the leaves
// are explicitly stored in the POForeach as a list rather than
// computed each time at run time from the plans for
// optimization. Do we want to have the Foreach compute the
// leaves each time and have Java optimize it (will Java
// optimize?)?
mfe.setInputPlans(mfe.getInputPlans());
cfe.setInputPlans(cfe.getInputPlans());
foreach.setInputPlans(foreach.getInputPlans());
// tell POCombinerPackage which fields need projected and which
// placed in bags. First field is simple project rest need to go
// into bags
int numFields = algebraicOps.size() + 1; // algebraic funcs + group key
boolean[] bags = new boolean[numFields];
bags[0] = false;
for (int i = 1; i < numFields; i++) {
bags[i] = true;
}
// Use the POCombiner package in the combine plan
// as it needs to act differently than the regular
// package operator.
CombinerPackager pkgr = new CombinerPackager(pack.getPkgr(), bags);
POPackage combinePack = pack.clone();
combinePack.setPkgr(pkgr);
combinePack.setParentPlan(null);
combinePlan.add(combinePack);
combinePlan.add(cfe);
combinePlan.connect(combinePack, cfe);