payloadConf.setBoolean(MRConfiguration.MAPPER_NEW_API, true);
payloadConf.setClass(MRConfiguration.INPUTFORMAT_CLASS,
PigInputFormat.class, InputFormat.class);
// Set parent plan for all operators in the Tez plan.
new PhyPlanSetter(tezOp.plan).visit();
// Set the endOfAllInput flag on the physical plan if certain operators that
// use this property (such as STREAM) are present in the plan.
EndOfAllInputSetter.EndOfAllInputChecker checker =
new EndOfAllInputSetter.EndOfAllInputChecker(tezOp.plan);
checker.visit();
if (checker.isEndOfAllInputPresent()) {
payloadConf.set(JobControlCompiler.END_OF_INP_IN_MAP, "true");
}
// Configure the classes for incoming shuffles to this TezOp
// TODO: Refactor out resetting input keys, PIG-3957
List<PhysicalOperator> roots = tezOp.plan.getRoots();
if (roots.size() == 1 && roots.get(0) instanceof POPackage) {
POPackage pack = (POPackage) roots.get(0);
List<PhysicalOperator> succsList = tezOp.plan.getSuccessors(pack);
if (succsList != null) {
succsList = new ArrayList<PhysicalOperator>(succsList);
}
byte keyType = pack.getPkgr().getKeyType();
tezOp.plan.remove(pack);
payloadConf.set("pig.reduce.package", ObjectSerializer.serialize(pack));
setIntermediateOutputKeyValue(keyType, payloadConf, tezOp);
POShuffleTezLoad newPack = new POShuffleTezLoad(pack);
if (tezOp.isSkewedJoin()) {
newPack.setSkewedJoins(true);
}
tezOp.plan.add(newPack);
// Set input keys for POShuffleTezLoad. This is used to identify
// the inputs that are attached to the POShuffleTezLoad in the
// backend.
Map<Integer, String> localRearrangeMap = new TreeMap<Integer, String>();
for (TezOperator pred : mPlan.getPredecessors(tezOp)) {
if (tezOp.getSampleOperator() != null && tezOp.getSampleOperator() == pred) {
// skip sample vertex input
} else {
String inputKey = pred.getOperatorKey().toString();
if (pred.isVertexGroup()) {
pred = mPlan.getOperator(pred.getVertexGroupMembers().get(0));
}
LinkedList<POLocalRearrangeTez> lrs =
PlanHelper.getPhysicalOperators(pred.plan, POLocalRearrangeTez.class);
for (POLocalRearrangeTez lr : lrs) {
if (lr.isConnectedToPackage()
&& lr.getOutputKey().equals(tezOp.getOperatorKey().toString())) {
localRearrangeMap.put((int) lr.getIndex(), inputKey);
}
}
}
}
for (Map.Entry<Integer, String> entry : localRearrangeMap.entrySet()) {
newPack.addInputKey(entry.getValue());
}
if (succsList != null) {
for (PhysicalOperator succs : succsList) {
tezOp.plan.connect(newPack, succs);
}
}
setIntermediateOutputKeyValue(pack.getPkgr().getKeyType(), payloadConf, tezOp);
} else if (roots.size() == 1 && roots.get(0) instanceof POIdentityInOutTez) {
POIdentityInOutTez identityInOut = (POIdentityInOutTez) roots.get(0);
// TODO Need to fix multiple input key mapping
TezOperator identityInOutPred = null;
for (TezOperator pred : mPlan.getPredecessors(tezOp)) {
if (!pred.isSampleAggregation()) {
identityInOutPred = pred;
break;
}
}
identityInOut.setInputKey(identityInOutPred.getOperatorKey().toString());
} else if (roots.size() == 1 && roots.get(0) instanceof POValueInputTez) {
POValueInputTez valueInput = (POValueInputTez) roots.get(0);
LinkedList<String> scalarInputs = new LinkedList<String>();
for (POUserFunc userFunc : PlanHelper.getPhysicalOperators(tezOp.plan, POUserFunc.class) ) {
if (userFunc.getFunc() instanceof ReadScalarsTez) {
scalarInputs.add(((ReadScalarsTez)userFunc.getFunc()).getTezInputs()[0]);
}
}
// Make sure we don't find the scalar
for (TezOperator pred : mPlan.getPredecessors(tezOp)) {
if (!scalarInputs.contains(pred.getOperatorKey().toString())) {
valueInput.setInputKey(pred.getOperatorKey().toString());
break;
}
}
}
setOutputFormat(job);
// set parent plan in all operators. currently the parent plan is really
// used only when POStream, POSplit are present in the plan
new PhyPlanSetter(tezOp.plan).visit();
// Serialize the execution plan
payloadConf.set(PigProcessor.PLAN,
ObjectSerializer.serialize(tezOp.plan));