}
PhysicalOperator mapLeaf = mapLeaves.get(0);
if (!(mapLeaf instanceof POLocalRearrange)) {
return;
}
POLocalRearrange rearrange = (POLocalRearrange)mapLeaf;
List<PhysicalOperator> reduceRoots = mr.reducePlan.getRoots();
if (reduceRoots.size() != 1) {
messageCollector.collect("Expected reduce to have single leaf", MessageType.Warning, PigWarning.MULTI_LEAF_REDUCE);
return;
}
// I expect that the first root should always be a POPackage. If
// not, I don't know what's going on, so I'm out of here.
PhysicalOperator root = reduceRoots.get(0);
if (!(root instanceof POPackage)) {
messageCollector.collect("Expected reduce root to be a POPackage", MessageType.Warning, PigWarning.NON_PACKAGE_REDUCE_PLAN_ROOT);
return;
}
POPackage pack = (POPackage)root;
List<PhysicalOperator> packSuccessors =
mr.reducePlan.getSuccessors(root);
if (packSuccessors == null || packSuccessors.size() != 1) return;
PhysicalOperator successor = packSuccessors.get(0);
// Need to check if this is a distinct.
if (successor instanceof POFilter) {
/*
Later
POFilter filter = (POFilter)successor;
PhysicalPlan filterInner = filter.getPlan();
if (onKeysOnly(filterInner)) {
// TODO move filter to combiner
// TODO Patch up projects of filter successor
// Call ourselves again, as we may be able to move the next
// operator too.
visitMROp(mr);
} else if (algebraic(filterInner)) {
// TODO Duplicate filter to combiner
}
*/
} else if (successor instanceof POForEach) {
POForEach foreach = (POForEach)successor;
List<PhysicalPlan> feInners = foreach.getInputPlans();
List<ExprType> ap = algebraic(feInners, foreach.getToBeFlattened());
if (ap != null) {
log.info("Choosing to move algebraic foreach to combiner");
// Need to insert two new foreachs - one in the combine
// and one in the map plan which will be based on the reduce foreach.
// The map foreach will have one inner plan for each
// inner plan in the foreach we're duplicating. For
// projections, the plan will be the same. For algebraic
// udfs, the plan will have the initial version of the function.
// The combine foreach will have one inner plan for each
// inner plan in the foreach we're duplicating. For
// projections, the project operators will be changed to
// project the same column as its position in the
// foreach. For algebraic udfs, the plan will have the
// intermediate version of the function. The input to the
// udf will be a POProject which will project the column
// corresponding to the position of the udf in the foreach
// In the inner plans of the reduce foreach for
// projections, the project operators will be changed to
// project the same column as its position in the
// foreach. For algebraic udfs, the plan will have the
// final version of the function. The input to the
// udf will be a POProject which will project the column
// corresponding to the position of the udf in the foreach
if (mr.combinePlan.getRoots().size() != 0) {
messageCollector.collect("Wasn't expecting to find anything already "
+ "in the combiner!", MessageType.Warning, PigWarning.NON_EMPTY_COMBINE_PLAN);
return;
}
mr.combinePlan = new PhysicalPlan();
try {
// If we haven't already found the key (and thus the
// key type) we need to figure out the key type now.
if (mKeyType == 0) {
mKeyType = rearrange.getKeyType();
}
POForEach mfe = foreach.clone();
POForEach cfe = foreach.clone();
fixUpForeachs(mfe, cfe, foreach, ap);
// Use the ExprType list returned from algebraic to tell
// POCombinerPackage which fields need projected and
// which placed in bags.
int numFields = (mKeyField >= ap.size()) ? mKeyField + 1 :
ap.size();
boolean[] bags = new boolean[numFields];
for (int i = 0; i < ap.size(); i++) {
if (ap.get(i) == ExprType.SIMPLE_PROJECT) bags[i] = false;
else bags[i] = true;
}
bags[mKeyField] = false;
// Use the POCombiner package in the combine plan
// as it needs to act differently than the regular
// package operator.
POCombinerPackage combinePack =
new POCombinerPackage(pack, bags, keyFieldPositions);
mr.combinePlan.add(combinePack);
mr.combinePlan.add(cfe);
mr.combinePlan.connect(combinePack, cfe);
// No need to connect projections in cfe to cp, because
// PigCombiner directly attaches output from package to
// root of remaining plan.
POLocalRearrange mlr = rearrange.clone();
fixUpRearrange(mlr);
// A specialized local rearrange operator will replace
// the normal local rearrange in the map plan. This behaves
// like the regular local rearrange in the getNext()
// as far as getting its input and constructing the
// "key" out of the input. It then returns a tuple with
// two fields - the key in the first position and the
// "value" inside a bag in the second position. This output
// format resembles the format out of a Package. This output
// will feed to the map foreach which expects this format.
// If the key field isn't in the project of the combiner or map foreach,
// it is added to the end (This is required so that we can
// set up the inner plan of the new Local Rearrange leaf in the map
// and combine plan to contain just the project of the key).
patchUpMap(mr.mapPlan, getPreCombinerLR(rearrange), mfe, mlr);
POLocalRearrange clr = rearrange.clone();
fixUpRearrange(clr);
mr.combinePlan.add(clr);
mr.combinePlan.connect(cfe, clr);