AMConfiguration amConf = new AMConfiguration(null, commonLocalResources, tezConf, credentials);
TezSessionConfiguration sessionConf = new TezSessionConfiguration(amConf, tezConf);
TezSession tezSession = new TezSession("FilterLinesByWordSession", sessionConf);
tezSession.start(); // Why do I need to start the TezSession.
Configuration stage1Conf = new JobConf(conf);
stage1Conf.set(FileInputFormat.INPUT_DIR, inputPath);
stage1Conf.setBoolean("mapred.mapper.new-api", false);
stage1Conf.set(TezJobConfig.TEZ_RUNTIME_INTERMEDIATE_OUTPUT_KEY_CLASS, Text.class.getName());
stage1Conf.set(TezJobConfig.TEZ_RUNTIME_INTERMEDIATE_OUTPUT_VALUE_CLASS, TextLongPair.class.getName());
stage1Conf.set(FILTER_PARAM_NAME, filterWord);
InputSplitInfo inputSplitInfo = null;
if (generateSplitsInClient) {
inputSplitInfo = MRHelpers.generateInputSplits(stage1Conf, stagingDir);
if (inputSplitInfo.getCredentials() != null) {
credentials.addAll(inputSplitInfo.getCredentials());
}
}
MultiStageMRConfToTezTranslator.translateVertexConfToTez(stage1Conf, null);
Configuration stage2Conf = new JobConf(conf);
stage2Conf.set(TezJobConfig.TEZ_RUNTIME_INTERMEDIATE_INPUT_KEY_CLASS, Text.class.getName());
stage2Conf.set(TezJobConfig.TEZ_RUNTIME_INTERMEDIATE_INPUT_VALUE_CLASS, TextLongPair.class.getName());
stage2Conf.set(FileOutputFormat.OUTDIR, outputPath);
stage2Conf.setBoolean("mapred.mapper.new-api", false);
MultiStageMRConfToTezTranslator.translateVertexConfToTez(stage2Conf, stage1Conf);
MRHelpers.doJobClientMagic(stage1Conf);
MRHelpers.doJobClientMagic(stage2Conf);
byte[] stage1Payload = MRHelpers.createUserPayloadFromConf(stage1Conf);
// Setup stage1 Vertex
int stage1NumTasks = generateSplitsInClient ? inputSplitInfo.getNumTasks() : -1;
Vertex stage1Vertex = new Vertex("stage1", new ProcessorDescriptor(
FilterByWordInputProcessor.class.getName()).setUserPayload(stage1Payload),
stage1NumTasks, MRHelpers.getMapResource(stage1Conf));
stage1Vertex.setJavaOpts(MRHelpers.getMapJavaOpts(stage1Conf));
if (generateSplitsInClient) {
stage1Vertex.setTaskLocationsHint(inputSplitInfo.getTaskLocationHints());
Map<String, LocalResource> stage1LocalResources = new HashMap<String, LocalResource>();
stage1LocalResources.putAll(commonLocalResources);
MRHelpers.updateLocalResourcesForInputSplits(fs, inputSplitInfo, stage1LocalResources);
stage1Vertex.setTaskLocalResources(stage1LocalResources);
} else {
stage1Vertex.setTaskLocalResources(commonLocalResources);
}
Map<String, String> stage1Env = new HashMap<String, String>();
MRHelpers.updateEnvironmentForMRTasks(stage1Conf, stage1Env, true);
stage1Vertex.setTaskEnvironment(stage1Env);
// Configure the Input for stage1
Class<? extends TezRootInputInitializer> initializerClazz = generateSplitsInClient ? null
: MRInputAMSplitGenerator.class;
stage1Vertex.addInput("MRInput",
new InputDescriptor(MRInputLegacy.class.getName())
.setUserPayload(MRHelpers.createMRInputPayload(stage1Payload, null)),
initializerClazz);
// Setup stage2 Vertex
Vertex stage2Vertex = new Vertex("stage2", new ProcessorDescriptor(
FilterByWordOutputProcessor.class.getName()).setUserPayload(MRHelpers
.createUserPayloadFromConf(stage2Conf)), 1,
MRHelpers.getReduceResource(stage2Conf));
stage2Vertex.setJavaOpts(MRHelpers.getReduceJavaOpts(stage2Conf)).setTaskLocalResources(commonLocalResources);
Map<String, String> stage2Env = new HashMap<String, String>();
MRHelpers.updateEnvironmentForMRTasks(stage2Conf, stage2Env, false);
stage2Vertex.setTaskEnvironment(stage2Env);
// Configure the Output for stage2
OutputDescriptor od = new OutputDescriptor(MROutput.class.getName())
.setUserPayload(MRHelpers.createUserPayloadFromConf(stage2Conf));
stage2Vertex.addOutput("MROutput", od, MROutputCommitter.class);
DAG dag = new DAG("FilterLinesByWord");
Edge edge = new Edge(stage1Vertex, stage2Vertex, new EdgeProperty(
DataMovementType.BROADCAST, DataSourceType.PERSISTED,
SchedulingType.SEQUENTIAL, new OutputDescriptor(
OnFileUnorderedKVOutput.class.getName()), new InputDescriptor(
ShuffledUnorderedKVInput.class.getName())));
dag.addVertex(stage1Vertex).addVertex(stage2Vertex).addEdge(edge);
LOG.info("Submitting DAG to Tez Session");
DAGClient dagClient = tezSession.submitDAG(dag);
LOG.info("Submitted DAG to Tez Session");
DAGStatus dagStatus = null;
String[] vNames = { "stage1", "stage2" };
try {
while (true) {
dagStatus = dagClient.getDAGStatus(null);
if(dagStatus.getState() == DAGStatus.State.RUNNING ||
dagStatus.getState() == DAGStatus.State.SUCCEEDED ||
dagStatus.getState() == DAGStatus.State.FAILED ||
dagStatus.getState() == DAGStatus.State.KILLED ||
dagStatus.getState() == DAGStatus.State.ERROR) {
break;
}
try {
Thread.sleep(500);
} catch (InterruptedException e) {
// continue;
}
}
while (dagStatus.getState() == DAGStatus.State.RUNNING) {
try {
ExampleDriver.printDAGStatus(dagClient, vNames);
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
// continue;
}
dagStatus = dagClient.getDAGStatus(null);
} catch (TezException e) {
LOG.fatal("Failed to get application progress. Exiting");
errorCode = -1;
return;
}
}
dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));
counters = dagStatus.getDAGCounters();
} finally {
fs.delete(stagingDir, true);
tezSession.stop();
}
ExampleDriver.printDAGStatus(dagClient, vNames, true, true);
LOG.info("Application completed. " + "FinalState=" + dagStatus.getState());
errorCode = (dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1);