mro.mapPlan.addAsLeaf(nfe1);
// Now set up a POLocalRearrange which has "all" as the key and the output of the
// foreach will be the "value" out of POLocalRearrange
PhysicalPlan ep1 = new PhysicalPlan();
ConstantExpression ce = new ConstantExpression(new OperatorKey(scope,nig.getNextNodeId(scope)));
ce.setValue("all");
ce.setResultType(DataType.CHARARRAY);
ep1.add(ce);
List<PhysicalPlan> eps = new ArrayList<PhysicalPlan>();
eps.add(ep1);
POLocalRearrange lr = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)));
try {
lr.setIndex(0);
} catch (ExecException e) {
int errCode = 2058;
String msg = "Unable to set index on newly created POLocalRearrange.";
throw new PlanException(msg, errCode, PigException.BUG, e);
}
lr.setKeyType(DataType.CHARARRAY);
lr.setPlans(eps);
lr.setResultType(DataType.TUPLE);
lr.setAlias(sort.getAlias());
mro.mapPlan.add(lr);
mro.mapPlan.connect(nfe1, lr);
mro.setMapDone(true);
POPackage pkg = new POPackage(new OperatorKey(scope,nig.getNextNodeId(scope)));
pkg.setKeyType(DataType.CHARARRAY);
pkg.setNumInps(1);
boolean[] inner = {false};
pkg.setInner(inner);
mro.reducePlan.add(pkg);
// Lets start building the plan which will have the sort
// for the foreach
PhysicalPlan fe2Plan = new PhysicalPlan();
// Top level project which just projects the tuple which is coming
// from the foreach after the package
POProject topPrj = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
topPrj.setColumn(1);
topPrj.setResultType(DataType.BAG);
topPrj.setOverloaded(true);
fe2Plan.add(topPrj);
// the projections which will form sort plans
List<PhysicalPlan> nesSortPlanLst = new ArrayList<PhysicalPlan>();
if (sortKeyPlans != null) {
for(int i=0; i<sortKeyPlans.size(); i++) {
nesSortPlanLst.add(sortKeyPlans.get(i));
}
}else{
Pair<POProject, Byte>[] sortProjs = null;
try{
sortProjs = getSortCols(sort.getSortPlans());
}catch(Exception e) {
throw new RuntimeException(e);
}
// Set up the projections of the key columns
if (sortProjs == null) {
PhysicalPlan ep = new PhysicalPlan();
POProject prj = new POProject(new OperatorKey(scope,
nig.getNextNodeId(scope)));
prj.setStar(true);
prj.setOverloaded(false);
prj.setResultType(DataType.TUPLE);
ep.add(prj);
nesSortPlanLst.add(ep);
} else {
for (int i=0; i<sortProjs.length; i++) {
POProject prj =
new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
prj.setResultType(sortProjs[i].second);
if(sortProjs[i].first != null && sortProjs[i].first.isProjectToEnd()){
if(i != sortProjs.length -1){
//project to end has to be the last sort column
throw new AssertionError("Project-range to end (x..)" +
" is supported in order-by only as last sort column");
}
prj.setProjectToEnd(i);
break;
}
else{
prj.setColumn(i);
}
prj.setOverloaded(false);
PhysicalPlan ep = new PhysicalPlan();
ep.add(prj);
nesSortPlanLst.add(ep);
}
}
}
sort.setSortPlans(nesSortPlanLst);
sort.setResultType(DataType.BAG);
fe2Plan.add(sort);
fe2Plan.connect(topPrj, sort);
// The plan which will have a constant representing the
// degree of parallelism for the final order by map-reduce job
// this will either come from a "order by parallel x" in the script
// or will be the default number of reducers for the cluster if
// "parallel x" is not used in the script
PhysicalPlan rpep = new PhysicalPlan();
ConstantExpression rpce = new ConstantExpression(new OperatorKey(scope,nig.getNextNodeId(scope)));
rpce.setRequestedParallelism(rp);
int val = rp;
if(val<=0){
HExecutionEngine eng = pigContext.getExecutionEngine();
if(pigContext.getExecType() != ExecType.LOCAL){
try {
if(val<=0)
val = pigContext.defaultParallel;
if (val<=0)
val = eng.getJobConf().getNumReduceTasks();
if (val<=0)
val = 1;
} catch (Exception e) {
int errCode = 6015;
String msg = "Problem getting the default number of reduces from the Job Client.";
throw new MRCompilerException(msg, errCode, PigException.REMOTE_ENVIRONMENT, e);
}
} else {
val = 1; // local mode, set it to 1
}
}
int parallelismForSort = (rp <= 0 ? val : rp);
rpce.setValue(parallelismForSort);
rpce.setResultType(DataType.INTEGER);
rpep.add(rpce);
List<PhysicalPlan> genEps = new ArrayList<PhysicalPlan>();
genEps.add(rpep);
genEps.add(fe2Plan);