lrTez.setKeyType(type);
lrTez.setPlans(groups);
lrTez.setSkewedJoin(true);
lrTez.setResultType(DataType.TUPLE);
POIdentityInOutTez identityInOutTez = new POIdentityInOutTez(
OperatorKey.genOpKey(scope), lrTez);
identityInOutTez.setInputKey(prevOp.getOperatorKey().toString());
joinJobs[0].plan.addAsLeaf(identityInOutTez);
joinJobs[0].setClosed(true);
joinJobs[0].markSampleBasedPartitioner();
rearrangeOutputs[0] = joinJobs[0];
compiledInputs = new TezOperator[] {joinInputs[1]};
// Run POPartitionRearrange for second join table. Note we set the
// parallelism of POPartitionRearrange to -1, so its parallelism
// will be determined by the size of streaming table.
POPartitionRearrangeTez pr =
new POPartitionRearrangeTez(OperatorKey.genOpKey(scope));
try {
pr.setIndex(1);
} catch (ExecException e) {
int errCode = 2058;
String msg = "Unable to set index on newly created POPartitionRearrange.";
throw new PlanException(msg, errCode, PigException.BUG, e);
}
groups = joinPlans.get(l.get(1));
pr.setPlans(groups);
pr.setKeyType(type);
pr.setSkewedJoin(true);
pr.setResultType(DataType.TUPLE);
joinJobs[1].plan.addAsLeaf(pr);
joinJobs[1].setClosed(true);
rearrangeOutputs[1] = joinJobs[1];
compiledInputs = rearrangeOutputs;
// Create POGlobalRearrange
POGlobalRearrange gr =
new POGlobalRearrange(OperatorKey.genOpKey(scope), rp);
// Skewed join has its own special partitioner
gr.setResultType(DataType.TUPLE);
gr.visit(this);
joinJobs[2] = curTezOp;
joinJobs[2].setRequestedParallelism(rp);
compiledInputs = new TezOperator[] {joinJobs[2]};
// Create POPakcage
POPackage pkg = getPackage(2, type);
pkg.setResultType(DataType.TUPLE);
boolean [] inner = op.getInnerFlags();
pkg.getPkgr().setInner(inner);
pkg.visit(this);
compiledInputs = new TezOperator[] {curTezOp};
// Create POForEach
List<PhysicalPlan> eps = new ArrayList<PhysicalPlan>();
List<Boolean> flat = new ArrayList<Boolean>();
// Add corresponding POProjects
for (int i=0; i < 2; i++) {
ep = new PhysicalPlan();
POProject prj = new POProject(OperatorKey.genOpKey(scope));
prj.setColumn(i+1);
prj.setOverloaded(false);
prj.setResultType(DataType.BAG);
ep.add(prj);
eps.add(ep);
if (!inner[i]) {
// Add an empty bag for outer join
CompilerUtils.addEmptyBagOuterJoin(ep, op.getSchema(i));
}
flat.add(true);
}
POForEach fe =
new POForEach(OperatorKey.genOpKey(scope), -1, eps, flat);
fe.setResultType(DataType.TUPLE);
fe.visit(this);
// Connect vertices
lrTez.setOutputKey(joinJobs[0].getOperatorKey().toString());
lrTezSample.setOutputKey(sampleJobPair.first.getOperatorKey().toString());
identityInOutTez.setOutputKey(joinJobs[2].getOperatorKey().toString());
pr.setOutputKey(joinJobs[2].getOperatorKey().toString());
TezEdgeDescriptor edge = joinJobs[0].inEdges.get(prevOp.getOperatorKey());
joinJobs[0].setUseMRMapSettings(prevOp.isUseMRMapSettings());
// TODO: Convert to unsorted shuffle after TEZ-661