private EdgeProperty newEdge(TezOperator from, TezOperator to)
throws IOException {
TezEdgeDescriptor edge = to.inEdges.get(from.getOperatorKey());
PhysicalPlan combinePlan = edge.combinePlan;
InputDescriptor in = InputDescriptor.create(edge.inputClassName);
OutputDescriptor out = OutputDescriptor.create(edge.outputClassName);
Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties(), false);
if (!combinePlan.isEmpty()) {
addCombiner(combinePlan, to, conf);
}
List<POLocalRearrangeTez> lrs = PlanHelper.getPhysicalOperators(from.plan,
POLocalRearrangeTez.class);
for (POLocalRearrangeTez lr : lrs) {
if (lr.getOutputKey().equals(to.getOperatorKey().toString())) {
byte keyType = lr.getKeyType();
setIntermediateOutputKeyValue(keyType, conf, to, lr.isConnectedToPackage());
// In case of secondary key sort, main key type is the actual key type
conf.set("pig.reduce.key.type", Byte.toString(lr.getMainKeyType()));
break;
}
}
conf.setIfUnset(TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS,
MRPartitioner.class.getName());
if (edge.getIntermediateOutputKeyClass() != null) {
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS,
edge.getIntermediateOutputKeyClass());
}
if (edge.getIntermediateOutputValueClass() != null) {
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS,
edge.getIntermediateOutputValueClass());
}
if (edge.getIntermediateOutputKeyComparatorClass() != null) {
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_COMPARATOR_CLASS,
edge.getIntermediateOutputKeyComparatorClass());
}
conf.setBoolean(MRConfiguration.MAPPER_NEW_API, true);
conf.set("pig.pigContext", ObjectSerializer.serialize(pc));
conf.set("udf.import.list",
ObjectSerializer.serialize(PigContext.getPackageImportList()));
if(to.isGlobalSort() || to.isLimitAfterSort()){
conf.set("pig.sortOrder",
ObjectSerializer.serialize(to.getSortOrder()));
}
if (edge.isUseSecondaryKey()) {
conf.set("pig.secondarySortOrder",
ObjectSerializer.serialize(edge.getSecondarySortOrder()));
conf.set(org.apache.hadoop.mapreduce.MRJobConfig.PARTITIONER_CLASS_ATTR,
SecondaryKeyPartitioner.class.getName());
// These needs to be on the vertex as well for POShuffleTezLoad to pick it up.
// Tez framework also expects this to be per vertex and not edge. IFile.java picks
// up keyClass and valueClass from vertex config. TODO - check with Tez folks
// In MR - job.setSortComparatorClass() or MRJobConfig.KEY_COMPARATOR
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_COMPARATOR_CLASS,
PigSecondaryKeyComparator.class.getName());
// In MR - job.setOutputKeyClass() or MRJobConfig.OUTPUT_KEY_CLASS
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, NullableTuple.class.getName());
setGroupingComparator(conf, PigSecondaryKeyGroupComparator.class.getName());
}
if (edge.partitionerClass != null) {
conf.set(org.apache.hadoop.mapreduce.MRJobConfig.PARTITIONER_CLASS_ATTR,
edge.partitionerClass.getName());
}
conf.set("udf.import.list",
ObjectSerializer.serialize(PigContext.getPackageImportList()));
MRToTezHelper.processMRSettings(conf, globalConf);
in.setUserPayload(TezUtils.createUserPayloadFromConf(conf));
out.setUserPayload(TezUtils.createUserPayloadFromConf(conf));
if (edge.dataMovementType!=DataMovementType.BROADCAST && to.getEstimatedParallelism()!=-1 && (to.isGlobalSort()||to.isSkewedJoin())) {
// Use custom edge
return EdgeProperty.create((EdgeManagerPluginDescriptor)null,