byte[] mapPayload = MRHelpers.createUserPayloadFromConf(mapStageConf);
byte[] mapInputPayload = MRHelpers.createMRInputPayloadWithGrouping(mapPayload,
TextInputFormat.class.getName());
int numMaps = -1;
Vertex mapVertex1 = new Vertex("map1", new ProcessorDescriptor(
TokenProcessor.class.getName()),
numMaps, MRHelpers.getMapResource(mapStageConf));
mapVertex1.setJavaOpts(MRHelpers.getMapJavaOpts(mapStageConf));
Map<String, String> mapEnv = new HashMap<String, String>();
MRHelpers.updateEnvironmentForMRTasks(mapStageConf, mapEnv, true);
mapVertex1.setTaskEnvironment(mapEnv);
Class<? extends TezRootInputInitializer> initializerClazz = MRInputAMSplitGenerator.class;
InputDescriptor id = new InputDescriptor(MRInput.class.getName()).
setUserPayload(mapInputPayload);
mapVertex1.addInput("MRInput", id, initializerClazz);
Vertex mapVertex2 = new Vertex("map2", new ProcessorDescriptor(
TokenProcessor.class.getName()),
numMaps, MRHelpers.getMapResource(mapStageConf));
mapVertex2.setJavaOpts(MRHelpers.getMapJavaOpts(mapStageConf));
MRHelpers.updateEnvironmentForMRTasks(mapStageConf, mapEnv, true);
mapVertex2.setTaskEnvironment(mapEnv);
mapVertex2.addInput("MRInput", id, initializerClazz);
Vertex mapVertex3 = new Vertex("map3", new ProcessorDescriptor(
TokenProcessor.class.getName()),
numMaps, MRHelpers.getMapResource(mapStageConf));
mapVertex3.setJavaOpts(MRHelpers.getMapJavaOpts(mapStageConf));
MRHelpers.updateEnvironmentForMRTasks(mapStageConf, mapEnv, true);
mapVertex3.setTaskEnvironment(mapEnv);
mapVertex3.addInput("MRInput", id, initializerClazz);
byte[] finalReducePayload = MRHelpers.createUserPayloadFromConf(finalReduceConf);
Vertex checkerVertex = new Vertex("checker",
new ProcessorDescriptor(
UnionProcessor.class.getName()).setUserPayload(finalReducePayload),
1, MRHelpers.getReduceResource(finalReduceConf));
checkerVertex.setJavaOpts(
MRHelpers.getReduceJavaOpts(finalReduceConf));
Map<String, String> reduceEnv = new HashMap<String, String>();
MRHelpers.updateEnvironmentForMRTasks(finalReduceConf, reduceEnv, false);
checkerVertex.setTaskEnvironment(reduceEnv);
OutputDescriptor od = new OutputDescriptor(MROutput.class.getName())
.setUserPayload(finalReducePayload);
checkerVertex.addOutput("union", od, MROutputCommitter.class);
Configuration partsConf = new Configuration(finalReduceConf);
partsConf.set(FileOutputFormat.OUTDIR, outputPath+"-parts");
byte[] partsPayload = MRHelpers.createUserPayloadFromConf(partsConf);
DAG dag = new DAG("UnionExample");
VertexGroup unionVertex = dag.createVertexGroup("union", mapVertex1, mapVertex2);
OutputDescriptor od1 = new OutputDescriptor(MROutput.class.getName())
.setUserPayload(partsPayload);
Configuration allPartsConf = new Configuration(finalReduceConf);
allPartsConf.set(FileOutputFormat.OUTDIR, outputPath+"-all-parts");
byte[] allPartsPayload = MRHelpers.createUserPayloadFromConf(allPartsConf);
OutputDescriptor od2 = new OutputDescriptor(MROutput.class.getName())
.setUserPayload(allPartsPayload);
unionVertex.addOutput("parts", od1, MROutputCommitter.class);
checkerVertex.addOutput("all-parts", od2, MROutputCommitter.class);
dag.addVertex(mapVertex1)
.addVertex(mapVertex2)
.addVertex(mapVertex3)