}
//change plan to store the first join input into a temp file
FileSpec fSpec = getTempFileSpec();
MapReduceOper mro = compiledInputs[0];
POStore str = getStore();
str.setSFile(fSpec);
if (!mro.isMapDone()) {
mro.mapPlan.addAsLeaf(str);
mro.setMapDoneSingle(true);
} else if (mro.isMapDone() && !mro.isReduceDone()) {
mro.reducePlan.addAsLeaf(str);
mro.setReduceDone(true);
} else {
int errCode = 2022;
String msg = "Both map and reduce phases have been done. This is unexpected while compiling.";
throw new PlanException(msg, errCode, PigException.BUG);
}
FileSpec partitionFile = getTempFileSpec();
int rp = op.getRequestedParallelism();
Pair<MapReduceOper, Integer> sampleJobPair = getSkewedJoinSampleJob(op, mro, fSpec, partitionFile, rp);
rp = sampleJobPair.second;
// set parallelism of SkewedJoin as the value calculated by sampling job
// if "parallel" is specified in join statement, "rp" is equal to that number
// if not specified, use the value that sampling process calculated
// based on default.
op.setRequestedParallelism(rp);
// load the temp file for first table as input of join
MapReduceOper[] joinInputs = new MapReduceOper[] {startNew(fSpec, sampleJobPair.first), compiledInputs[1]};
MapReduceOper[] rearrangeOutputs = new MapReduceOper[2];
compiledInputs = new MapReduceOper[] {joinInputs[0]};
// run POLocalRearrange for first join table
POLocalRearrange lr = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)), rp);
try {
lr.setIndex(0);
} catch (ExecException e) {
int errCode = 2058;
String msg = "Unable to set index on newly created POLocalRearrange.";
throw new PlanException(msg, errCode, PigException.BUG, e);
}
List<PhysicalOperator> l = plan.getPredecessors(op);
MultiMap<PhysicalOperator, PhysicalPlan> joinPlans = op.getJoinPlans();
List<PhysicalPlan> groups = (List<PhysicalPlan>)joinPlans.get(l.get(0));
// check the type of group keys, if there are more than one field, the key is TUPLE.
byte type = DataType.TUPLE;
if (groups.size() == 1) {
type = groups.get(0).getLeaves().get(0).getResultType();
}
lr.setKeyType(type);
lr.setPlans(groups);
lr.setResultType(DataType.TUPLE);
lr.visit(this);
if(lr.getRequestedParallelism() > curMROp.requestedParallelism)
curMROp.requestedParallelism = lr.getRequestedParallelism();
rearrangeOutputs[0] = curMROp;
compiledInputs = new MapReduceOper[] {joinInputs[1]};
// if the map for current input is already closed, then start a new job
if (compiledInputs[0].isMapDone() && !compiledInputs[0].isReduceDone()) {
FileSpec f = getTempFileSpec();
POStore s = getStore();
s.setSFile(f);
compiledInputs[0].reducePlan.addAsLeaf(s);
compiledInputs[0].setReduceDone(true);
compiledInputs[0] = startNew(f, compiledInputs[0]);
}