MapReduceOper[] joinInputs = new MapReduceOper[] {startNew(fSpec, sampleJobPair.first), compiledInputs[1]};
MapReduceOper[] rearrangeOutputs = new MapReduceOper[2];
compiledInputs = new MapReduceOper[] {joinInputs[0]};
// run POLocalRearrange for first join table
POLocalRearrange lr = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)), rp);
try {
lr.setIndex(0);
} catch (ExecException e) {
int errCode = 2058;
String msg = "Unable to set index on newly created POLocalRearrange.";
throw new PlanException(msg, errCode, PigException.BUG, e);
}
List<PhysicalOperator> l = plan.getPredecessors(op);
MultiMap<PhysicalOperator, PhysicalPlan> joinPlans = op.getJoinPlans();
List<PhysicalPlan> groups = (List<PhysicalPlan>)joinPlans.get(l.get(0));
// check the type of group keys, if there are more than one field, the key is TUPLE.
byte type = DataType.TUPLE;
if (groups.size() == 1) {
type = groups.get(0).getLeaves().get(0).getResultType();
}
lr.setKeyType(type);
lr.setPlans(groups);
lr.setResultType(DataType.TUPLE);
lr.visit(this);
if(lr.getRequestedParallelism() > curMROp.requestedParallelism)
curMROp.requestedParallelism = lr.getRequestedParallelism();
rearrangeOutputs[0] = curMROp;
compiledInputs = new MapReduceOper[] {joinInputs[1]};
// if the map for current input is already closed, then start a new job
if (compiledInputs[0].isMapDone() && !compiledInputs[0].isReduceDone()) {
FileSpec f = getTempFileSpec();
POStore s = getStore();
s.setSFile(f);
compiledInputs[0].reducePlan.addAsLeaf(s);
compiledInputs[0].setReduceDone(true);
compiledInputs[0] = startNew(f, compiledInputs[0]);
}
// run POPartitionRearrange for second join table
lr = new POPartitionRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)), rp);
try {
lr.setIndex(1);
} catch (ExecException e) {
int errCode = 2058;
String msg = "Unable to set index on newly created POLocalRearrange.";
throw new PlanException(msg, errCode, PigException.BUG, e);
}
((POPartitionRearrange)lr).setPartitionFile(partitionFile.getFileName());
groups = (List<PhysicalPlan>)joinPlans.get(l.get(1));
lr.setPlans(groups);
lr.setKeyType(type);
lr.setResultType(DataType.BAG);
lr.visit(this);
if(lr.getRequestedParallelism() > curMROp.requestedParallelism)
curMROp.requestedParallelism = lr.getRequestedParallelism();
rearrangeOutputs[1] = curMROp;
compiledInputs = rearrangeOutputs;
// create POGlobalRearrange