tokenizerVertex.setJavaOpts(MRHelpers.getMapJavaOpts(mapStageConf));
Map<String, String> mapEnv = new HashMap<String, String>();
MRHelpers.updateEnvironmentForMRTasks(mapStageConf, mapEnv, true);
tokenizerVertex.setTaskEnvironment(mapEnv);
Class<? extends TezRootInputInitializer> initializerClazz = MRInputAMSplitGenerator.class;
InputDescriptor id = new InputDescriptor(MRInput.class.getName()).
setUserPayload(mapInputPayload);
tokenizerVertex.addInput("MRInput", id, initializerClazz);
byte[] finalReducePayload = MRHelpers.createUserPayloadFromConf(finalReduceConf);
Vertex summerVertex = new Vertex("summer",
new ProcessorDescriptor(
SumProcessor.class.getName()).setUserPayload(finalReducePayload),
1, MRHelpers.getReduceResource(finalReduceConf));
summerVertex.setJavaOpts(
MRHelpers.getReduceJavaOpts(finalReduceConf));
Map<String, String> reduceEnv = new HashMap<String, String>();
MRHelpers.updateEnvironmentForMRTasks(finalReduceConf, reduceEnv, false);
summerVertex.setTaskEnvironment(reduceEnv);
OutputDescriptor od = new OutputDescriptor(MROutput.class.getName())
.setUserPayload(finalReducePayload);
summerVertex.addOutput("MROutput", od, MROutputCommitter.class);
DAG dag = new DAG("WordCount");
dag.addVertex(tokenizerVertex)
.addVertex(summerVertex)
.addEdge(
new Edge(tokenizerVertex, summerVertex, new EdgeProperty(
DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED,
SchedulingType.SEQUENTIAL,
new OutputDescriptor(OnFileSortedOutput.class.getName())
.setUserPayload(mapPayload),
new InputDescriptor(ShuffledMergedInput.class.getName())
.setUserPayload(finalReducePayload))));
return dag;
}