} else {
dataSource = MRInputLegacy.createConfigBuilder(mapStageConf, SleepInputFormat.class)
.generateSplitsInAM(generateSplitsInAM).build();
}
DAG dag = DAG.create("MRRSleepJob");
String jarPath = ClassUtil.findContainingJar(getClass());
if (jarPath == null) {
throw new TezUncheckedException("Could not find any jar containing"
+ " MRRSleepJob.class in the classpath");
}
Path remoteJarPath = remoteFs.makeQualified(
new Path(remoteStagingDir, "dag_job.jar"));
remoteFs.copyFromLocalFile(new Path(jarPath), remoteJarPath);
FileStatus jarFileStatus = remoteFs.getFileStatus(remoteJarPath);
TokenCache.obtainTokensForNamenodes(this.credentials, new Path[] { remoteJarPath },
mapStageConf);
Map<String, LocalResource> commonLocalResources =
new HashMap<String, LocalResource>();
LocalResource dagJarLocalRsrc = LocalResource.newInstance(
ConverterUtils.getYarnUrlFromPath(remoteJarPath),
LocalResourceType.FILE,
LocalResourceVisibility.APPLICATION,
jarFileStatus.getLen(),
jarFileStatus.getModificationTime());
commonLocalResources.put("dag_job.jar", dagJarLocalRsrc);
List<Vertex> vertices = new ArrayList<Vertex>();
UserPayload mapUserPayload = TezUtils.createUserPayloadFromConf(mapStageConf);
int numTasks = generateSplitsInAM ? -1 : numMapper;
Vertex mapVertex = Vertex.create("map", ProcessorDescriptor.create(
MapProcessor.class.getName()).setUserPayload(mapUserPayload), numTasks)
.addTaskLocalFiles(commonLocalResources);
mapVertex.addDataSource("MRInput", dataSource);
vertices.add(mapVertex);
if (iReduceStagesCount > 0
&& numIReducer > 0) {
for (int i = 0; i < iReduceStagesCount; ++i) {
Configuration iconf =
intermediateReduceStageConfs[i];
UserPayload iReduceUserPayload = TezUtils.createUserPayloadFromConf(iconf);
Vertex ivertex = Vertex.create("ireduce" + (i + 1),
ProcessorDescriptor.create(ReduceProcessor.class.getName()).
setUserPayload(iReduceUserPayload), numIReducer);
ivertex.addTaskLocalFiles(commonLocalResources);
vertices.add(ivertex);
}
}
Vertex finalReduceVertex = null;
if (numReducer > 0) {
UserPayload reducePayload = TezUtils.createUserPayloadFromConf(finalReduceConf);
finalReduceVertex = Vertex.create("reduce", ProcessorDescriptor.create(
ReduceProcessor.class.getName()).setUserPayload(reducePayload), numReducer);
finalReduceVertex.addTaskLocalFiles(commonLocalResources);
finalReduceVertex.addDataSink("MROutput", MROutputLegacy.createConfigBuilder(finalReduceConf,
NullOutputFormat.class).build());
vertices.add(finalReduceVertex);
} else {
// Map only job
mapVertex.addDataSink("MROutput",
MROutputLegacy.createConfigBuilder(mapStageConf, NullOutputFormat.class).build());
}
Map<String, String> partitionerConf = Maps.newHashMap();
partitionerConf.put(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName());
OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
.newBuilder(IntWritable.class.getName(), IntWritable.class.getName(),
HashPartitioner.class.getName(), partitionerConf).configureInput().useLegacyInput()
.done().build();
for (int i = 0; i < vertices.size(); ++i) {
dag.addVertex(vertices.get(i));
if (i != 0) {
dag.addEdge(
Edge.create(vertices.get(i - 1), vertices.get(i), edgeConf.createDefaultEdgeProperty()));
}
}
return dag;