rightMROpr.requestedParallelism = 1; // we need exactly one reducer for indexing job.
// At this point, we must be operating on map plan of right input and it would contain nothing else other then a POLoad.
POLoad rightLoader = (POLoad)rightMROpr.mapPlan.getRoots().get(0);
joinOp.setSignature(rightLoader.getSignature());
LoadFunc rightLoadFunc = rightLoader.getLoadFunc();
List<String> udfs = new ArrayList<String>();
if(IndexableLoadFunc.class.isAssignableFrom(rightLoadFunc.getClass())) {
joinOp.setRightLoaderFuncSpec(rightLoader.getLFile().getFuncSpec());
joinOp.setRightInputFileName(rightLoader.getLFile().getFileName());
udfs.add(rightLoader.getLFile().getFuncSpec().toString());
// we don't need the right MROper since
// the right loader is an IndexableLoadFunc which can handle the index
// itself
MRPlan.remove(rightMROpr);
if(rightMROpr == compiledInputs[0]) {
compiledInputs[0] = null;
} else if(rightMROpr == compiledInputs[1]) {
compiledInputs[1] = null;
}
rightMROpr = null;
// validate that the join keys in merge join are only
// simple column projections or '*' and not expression - expressions
// cannot be handled when the index is built by the storage layer on the sorted
// data when the sorted data (and corresponding index) is written.
// So merge join will be restricted not have expressions as
// join keys
int numInputs = mPlan.getPredecessors(joinOp).size(); // should be 2
for(int i = 0; i < numInputs; i++) {
List<PhysicalPlan> keyPlans = joinOp.getInnerPlansOf(i);
for (PhysicalPlan keyPlan : keyPlans) {
for(PhysicalOperator op : keyPlan) {
if(!(op instanceof POProject)) {
int errCode = 1106;
String errMsg = "Merge join is possible only for simple column or '*' join keys when using " +
rightLoader.getLFile().getFuncSpec() + " as the loader";
throw new MRCompilerException(errMsg, errCode, PigException.INPUT);
}
}
}
}
} else {
// Replace POLoad with indexer.
LoadFunc loadFunc = rightLoader.getLoadFunc();
if (! (OrderedLoadFunc.class.isAssignableFrom(loadFunc.getClass()))){
int errCode = 1104;
String errMsg = "Right input of merge-join must implement " +
"OrderedLoadFunc interface. The specified loader "
+ loadFunc + " doesn't implement it";
throw new MRCompilerException(errMsg,errCode);