inputConf.set("mapred.input.format.class", TextInputFormat.class.getName());
inputConf.set(FileInputFormat.INPUT_DIR, inputPath);
MRInput.MRInputConfigBuilder configurer = MRInput.createConfigBuilder(inputConf, null);
DataSourceDescriptor dataSource = configurer.generateSplitsInAM(false).build();
Vertex mapVertex1 = Vertex.create("map1", ProcessorDescriptor.create(
TokenProcessor.class.getName()), numMaps).addDataSource("MRInput", dataSource);
Vertex mapVertex2 = Vertex.create("map2", ProcessorDescriptor.create(
TokenProcessor.class.getName()), numMaps).addDataSource("MRInput", dataSource);
Vertex mapVertex3 = Vertex.create("map3", ProcessorDescriptor.create(
TokenProcessor.class.getName()), numMaps).addDataSource("MRInput", dataSource);
Vertex checkerVertex = Vertex.create("checker", ProcessorDescriptor.create(
UnionProcessor.class.getName()), 1);
Configuration outputConf = new Configuration(tezConf);
outputConf.setBoolean("mapred.reducer.new-api", false);
outputConf.set("mapred.output.format.class", TextOutputFormat.class.getName());
outputConf.set(FileOutputFormat.OUTDIR, outputPath);
DataSinkDescriptor od = MROutput.createConfigBuilder(outputConf, null).build();
checkerVertex.addDataSink("union", od);
Configuration allPartsConf = new Configuration(tezConf);
DataSinkDescriptor od2 = MROutput.createConfigBuilder(allPartsConf,
TextOutputFormat.class, outputPath + "-all-parts").build();
checkerVertex.addDataSink("all-parts", od2);
Configuration partsConf = new Configuration(tezConf);
DataSinkDescriptor od1 = MROutput.createConfigBuilder(partsConf,
TextOutputFormat.class, outputPath + "-parts").build();
VertexGroup unionVertex = dag.createVertexGroup("union", mapVertex1, mapVertex2);