// Configure the Input for stage1
Class<? extends TezRootInputInitializer> initializerClazz = generateSplitsInClient ? null
: MRInputAMSplitGenerator.class;
stage1Vertex.addInput("MRInput",
new InputDescriptor(MRInputLegacy.class.getName())
.setUserPayload(MRHelpers.createMRInputPayload(stage1Payload, null)),
initializerClazz);
// Setup stage2 Vertex
Vertex stage2Vertex = new Vertex("stage2", new ProcessorDescriptor(
FilterByWordOutputProcessor.class.getName()).setUserPayload(MRHelpers
.createUserPayloadFromConf(stage2Conf)), stage1NumTasks,
MRHelpers.getMapResource(stage2Conf));
stage2Vertex.setJavaOpts(MRHelpers.getReduceJavaOpts(stage2Conf)).setTaskLocalResources(commonLocalResources);
Map<String, String> stage2Env = new HashMap<String, String>();
MRHelpers.updateEnvironmentForMRTasks(stage2Conf, stage2Env, false);
stage2Vertex.setTaskEnvironment(stage2Env);
// Configure the Output for stage2
stage2Vertex.addOutput("MROutput",
new OutputDescriptor(MROutput.class.getName()).setUserPayload(MRHelpers
.createUserPayloadFromConf(stage2Conf)));
DAG dag = new DAG("FilterLinesByWord");
Edge edge = new Edge(stage1Vertex, stage2Vertex, new EdgeProperty(
DataMovementType.ONE_TO_ONE, DataSourceType.PERSISTED,
SchedulingType.SEQUENTIAL, new OutputDescriptor(
OnFileUnorderedKVOutput.class.getName()), new InputDescriptor(
ShuffledUnorderedKVInput.class.getName())));
dag.addVertex(stage1Vertex).addVertex(stage2Vertex).addEdge(edge);
LOG.info("Submitting DAG to Tez Session");
DAGClient dagClient = tezSession.submitDAG(dag);