}
else{
int errCode = 2022;
String msg = "Both map and reduce phases have been done. This is unexpected while compiling.";
throw new PlanException(msg, errCode, PigException.BUG);
}
joinOp.setupRightPipeline(rightPipelinePlan);
// At this point, we must be operating on map plan of right input and it would contain nothing else other then a POLoad.
POLoad rightLoader = (POLoad)rightMROpr.mapPlan.getRoots().get(0);
joinOp.setRightLoaderFuncSpec(rightLoader.getLFile().getFuncSpec());
// Replace POLoad with indexer.
String[] indexerArgs = new String[3];
indexerArgs[0] = rightLoader.getLFile().getFuncSpec().toString();
if (! (PigContext.instantiateFuncFromSpec(indexerArgs[0]) instanceof SamplableLoader)){
int errCode = 1104;
String errMsg = "Right input of merge-join must implement SamplableLoader interface. The specified loader " + indexerArgs[0] + " doesn't implement it";
throw new MRCompilerException(errMsg,errCode);
}
List<PhysicalPlan> rightInpPlans = joinOp.getInnerPlansOf(1);
indexerArgs[1] = ObjectSerializer.serialize((Serializable)rightInpPlans);
indexerArgs[2] = ObjectSerializer.serialize(rightPipelinePlan);
FileSpec lFile = new FileSpec(rightLoader.getLFile().getFileName(),new FuncSpec(MergeJoinIndexer.class.getName(), indexerArgs));
rightLoader.setLFile(lFile);
// Loader of mro will return a tuple of form (key1, key2, ..,filename, offset)
// Now set up a POLocalRearrange which has "all" as the key and tuple fetched
// by loader as the "value" of POLocalRearrange
// Sorting of index can possibly be achieved by using Hadoop sorting between map and reduce instead of Pig doing sort. If that is so,
// it will simplify lot of the code below.
PhysicalPlan lrPP = new PhysicalPlan();
ConstantExpression ce = new ConstantExpression(new OperatorKey(scope,nig.getNextNodeId(scope)));
ce.setValue("all");
ce.setResultType(DataType.CHARARRAY);
lrPP.add(ce);
List<PhysicalPlan> lrInnerPlans = new ArrayList<PhysicalPlan>();
lrInnerPlans.add(lrPP);
POLocalRearrange lr = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)));
lr.setIndex(0);
lr.setKeyType(DataType.CHARARRAY);
lr.setPlans(lrInnerPlans);
lr.setResultType(DataType.TUPLE);
rightMROpr.mapPlan.addAsLeaf(lr);
rightMROpr.setMapDone(true);
// On the reduce side of this indexing job, there will be a global rearrange followed by POSort.
// Output of POSort will be index file dumped on the DFS.
// First add POPackage.
POPackage pkg = new POPackage(new OperatorKey(scope,nig.getNextNodeId(scope)));
pkg.setKeyType(DataType.CHARARRAY);
pkg.setNumInps(1);
pkg.setInner(new boolean[]{false});
rightMROpr.reducePlan.add(pkg);
// Next project tuples from the bag created by POPackage.
POProject topPrj = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
topPrj.setColumn(1);
topPrj.setResultType(DataType.TUPLE);
topPrj.setOverloaded(true);
rightMROpr.reducePlan.add(topPrj);
rightMROpr.reducePlan.connect(pkg, topPrj);
// Now create and add POSort. Sort plan is project *.
List<PhysicalPlan> sortPlans = new ArrayList<PhysicalPlan>(1);
PhysicalPlan innerSortPlan = new PhysicalPlan();
POProject prj = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
prj.setStar(true);
prj.setOverloaded(false);
prj.setResultType(DataType.TUPLE);
innerSortPlan.add(prj);
sortPlans.add(innerSortPlan);
// Currently we assume all columns are in asc order.
// Add two because filename and offset are added by Indexer in addition to keys.
List<Boolean> mAscCols = new ArrayList<Boolean>(rightInpPlans.size()+2);
for(int i=0; i< rightInpPlans.size()+2; i++)
mAscCols.add(true);
POSort sortOp = new POSort(new OperatorKey(scope,nig.getNextNodeId(scope)),1, null, sortPlans, mAscCols, null);
rightMROpr.reducePlan.add(sortOp);
rightMROpr.reducePlan.connect(topPrj, sortOp);
POStore st = getStore();
FileSpec strFile = getTempFileSpec();
st.setSFile(strFile);
rightMROpr.reducePlan.addAsLeaf(st);
rightMROpr.setReduceDone(true);
joinOp.setIndexFile(strFile);
// We are done with right side. Lets work on left now.
// Join will be materialized in leftMROper.
if(!curMROp.mapDone) // Life is easy
curMROp.mapPlan.addAsLeaf(joinOp);
else if(!curMROp.reduceDone){ // This is a map-side join. Close this MROper and start afresh.
POStore leftStore = getStore();
FileSpec leftStrFile = getTempFileSpec();
leftStore.setSFile(leftStrFile);
curMROp.setReduceDone(true);
curMROp = startNew(leftStrFile, curMROp);
curMROp.mapPlan.addAsLeaf(joinOp);
}
else{
int errCode = 2022;
String msg = "Both map and reduce phases have been done. This is unexpected while compiling.";
throw new PlanException(msg, errCode, PigException.BUG);
}
// We want to ensure indexing job runs prior to actual join job. So, connect them in order.
MRPlan.connect(rightMROpr, curMROp);
}