Package org.apache.tez.dag.api

Examples of org.apache.tez.dag.api.Vertex


      mapInputPayload = MRHelpers.createMRInputPayload(
          mapUserPayload, inputSplitInfo.getSplitsProto());
    }
    int numTasks = generateSplitsInAM ? -1 : numMapper;
   
    Vertex mapVertex = new Vertex("map", new ProcessorDescriptor(
        MapProcessor.class.getName()).setUserPayload(mapUserPayload),
        numTasks, MRHelpers.getMapResource(mapStageConf));
    mapVertex.setJavaOpts(MRHelpers.getMapJavaOpts(mapStageConf));
    if (!generateSplitsInAM) {
      mapVertex.setTaskLocationsHint(inputSplitInfo.getTaskLocationHints());
    }
   
    if (writeSplitsToDFS) {
      Map<String, LocalResource> mapLocalResources = new HashMap<String, LocalResource>();
      mapLocalResources.putAll(commonLocalResources);
      MRHelpers.updateLocalResourcesForInputSplits(remoteFs, inputSplitInfo,
          mapLocalResources);
      mapVertex.setTaskLocalResources(mapLocalResources);
    } else {
      mapVertex.setTaskLocalResources(commonLocalResources);
    }

    Map<String, String> mapEnv = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(mapStageConf, mapEnv, true);
    mapVertex.setTaskEnvironment(mapEnv);
    if (generateSplitsInAM) {
      MRHelpers.addMRInput(mapVertex, mapInputPayload, MRInputAMSplitGenerator.class);
    } else {
      if (writeSplitsToDFS) {
        MRHelpers.addMRInput(mapVertex, mapInputPayload, null);
      } else {
        MRHelpers.addMRInput(mapVertex, mapInputPayload, MRInputSplitDistributor.class);
      }
    }
    vertices.add(mapVertex);

    if (iReduceStagesCount > 0
        && numIReducer > 0) {
      for (int i = 0; i < iReduceStagesCount; ++i) {
        Configuration iconf =
            intermediateReduceStageConfs[i];
        byte[] iReduceUserPayload = MRHelpers.createUserPayloadFromConf(iconf);
        Vertex ivertex = new Vertex("ireduce" + (i+1),
                new ProcessorDescriptor(ReduceProcessor.class.getName()).
                setUserPayload(iReduceUserPayload), numIReducer,
                MRHelpers.getReduceResource(iconf));
        ivertex.setJavaOpts(MRHelpers.getReduceJavaOpts(iconf));
        ivertex.setTaskLocalResources(commonLocalResources);
        Map<String, String> reduceEnv = new HashMap<String, String>();
        MRHelpers.updateEnvironmentForMRTasks(iconf, reduceEnv, false);
        ivertex.setTaskEnvironment(reduceEnv);
        vertices.add(ivertex);
      }
    }

    Vertex finalReduceVertex = null;
    if (numReducer > 0) {
      byte[] reducePayload = MRHelpers.createUserPayloadFromConf(finalReduceConf);
      finalReduceVertex = new Vertex("reduce", new ProcessorDescriptor(
          ReduceProcessor.class.getName()).setUserPayload(reducePayload),
          numReducer, MRHelpers.getReduceResource(finalReduceConf));
      finalReduceVertex.setJavaOpts(
          MRHelpers.getReduceJavaOpts(finalReduceConf));
      finalReduceVertex.setTaskLocalResources(commonLocalResources);
      Map<String, String> reduceEnv = new HashMap<String, String>();
      MRHelpers.updateEnvironmentForMRTasks(finalReduceConf, reduceEnv, false);
      finalReduceVertex.setTaskEnvironment(reduceEnv);
      MRHelpers.addMROutput(finalReduceVertex, reducePayload);
      vertices.add(finalReduceVertex);
    } else {
      // Map only job
      MRHelpers.addMROutput(mapVertex, mapUserPayload);
View Full Code Here


    }

    Resource taskResource = isMap ? MRHelpers.getMapResource(stageConf)
        : MRHelpers.getReduceResource(stageConf);
    byte[] vertexUserPayload = MRHelpers.createUserPayloadFromConf(stageConf);
    Vertex vertex = new Vertex(vertexName, new ProcessorDescriptor(processorName).
        setUserPayload(vertexUserPayload),
        numTasks, taskResource);
    if (isMap) {
      byte[] mapInputPayload = MRHelpers.createMRInputPayload(vertexUserPayload, null);
      MRHelpers.addMRInput(vertex, mapInputPayload, null);
    }
    // Map only jobs.
    if (stageNum == totalStages -1) {
      MRHelpers.addMROutput(vertex, vertexUserPayload);
    }

    Map<String, String> taskEnv = new HashMap<String, String>();
    setupMapReduceEnv(stageConf, taskEnv, isMap);

    Map<String, LocalResource> taskLocalResources =
        new TreeMap<String, LocalResource>();
    // PRECOMMIT Remove split localization for reduce tasks if it's being set
    // here
    taskLocalResources.putAll(jobLocalResources);

    String taskJavaOpts = isMap ? MRHelpers.getMapJavaOpts(stageConf)
        : MRHelpers.getReduceJavaOpts(stageConf);

    vertex.setTaskEnvironment(taskEnv)
        .setTaskLocalResources(taskLocalResources)
        .setTaskLocationsHint(locations)
        .setJavaOpts(taskJavaOpts);

    if (LOG.isDebugEnabled()) {
      LOG.debug("Adding vertex to DAG" + ", vertexName="
          + vertex.getVertexName() + ", processor="
          + vertex.getProcessorDescriptor().getClassName() + ", parallelism="
          + vertex.getParallelism() + ", javaOpts=" + vertex.getJavaOpts()
          + ", resources=" + vertex.getTaskResource()
      // TODO Add localResources and Environment
      );
    }

    return vertex;
View Full Code Here

   
    DAG dag = new DAG("testMRRSleepJobDagSubmit");
    int stage1NumTasks = genSplitsInAM ? -1 : inputSplitInfo.getNumTasks();
    Class<? extends TezRootInputInitializer> inputInitializerClazz = genSplitsInAM ? MRInputAMSplitGenerator.class
        : null;
    Vertex stage1Vertex = new Vertex("map", new ProcessorDescriptor(
        MapProcessor.class.getName()).setUserPayload(stage1Payload),
        stage1NumTasks, Resource.newInstance(256, 1));
    MRHelpers.addMRInput(stage1Vertex, stage1InputPayload, inputInitializerClazz);
    Vertex stage2Vertex = new Vertex("ireduce", new ProcessorDescriptor(
        ReduceProcessor.class.getName()).setUserPayload(
        MRHelpers.createUserPayloadFromConf(stage2Conf)),
        1, Resource.newInstance(256, 1));
    Vertex stage3Vertex = new Vertex("reduce", new ProcessorDescriptor(
        ReduceProcessor.class.getName()).setUserPayload(stage3Payload),
        1, Resource.newInstance(256, 1));
    MRHelpers.addMROutput(stage3Vertex, stage3Payload);

    Map<String, LocalResource> commonLocalResources = new HashMap<String, LocalResource>();
    Map<String, String> commonEnv = createCommonEnv();

    if (!genSplitsInAM) {
      // TODO Use utility method post TEZ-205.
      Map<String, LocalResource> stage1LocalResources = new HashMap<String, LocalResource>();
      stage1LocalResources.put(
          inputSplitInfo.getSplitsFile().getName(),
          createLocalResource(remoteFs, inputSplitInfo.getSplitsFile(),
              LocalResourceType.FILE, LocalResourceVisibility.APPLICATION));
      stage1LocalResources.put(
          inputSplitInfo.getSplitsMetaInfoFile().getName(),
          createLocalResource(remoteFs, inputSplitInfo.getSplitsMetaInfoFile(),
              LocalResourceType.FILE, LocalResourceVisibility.APPLICATION));
      stage1LocalResources.putAll(commonLocalResources);

      stage1Vertex.setTaskLocalResources(stage1LocalResources);
      stage1Vertex.setTaskLocationsHint(inputSplitInfo.getTaskLocationHints());
    } else {
      stage1Vertex.setTaskLocalResources(commonLocalResources);
    }

    stage1Vertex.setJavaOpts(MRHelpers.getMapJavaOpts(stage1Conf));
    stage1Vertex.setTaskEnvironment(commonEnv);

    // TODO env, resources

    stage2Vertex.setJavaOpts(MRHelpers.getReduceJavaOpts(stage2Conf));
    stage2Vertex.setTaskLocalResources(commonLocalResources);
    stage2Vertex.setTaskEnvironment(commonEnv);

    stage3Vertex.setJavaOpts(MRHelpers.getReduceJavaOpts(stage3Conf));
    stage3Vertex.setTaskLocalResources(commonLocalResources);
    stage3Vertex.setTaskEnvironment(commonEnv);

    dag.addVertex(stage1Vertex);
    dag.addVertex(stage2Vertex);
    dag.addVertex(stage3Vertex);
View Full Code Here

    MRHelpers.doJobClientMagic(stage2Conf);

    byte[] stage1Payload = MRHelpers.createUserPayloadFromConf(stage1Conf);
    // Setup stage1 Vertex
    int stage1NumTasks = generateSplitsInClient ? inputSplitInfo.getNumTasks() : -1;
    Vertex stage1Vertex = new Vertex("stage1", new ProcessorDescriptor(
        FilterByWordInputProcessor.class.getName()).setUserPayload(stage1Payload),
        stage1NumTasks, MRHelpers.getMapResource(stage1Conf));
    stage1Vertex.setJavaOpts(MRHelpers.getMapJavaOpts(stage1Conf));
    if (generateSplitsInClient) {
      stage1Vertex.setTaskLocationsHint(inputSplitInfo.getTaskLocationHints());
      Map<String, LocalResource> stage1LocalResources = new HashMap<String, LocalResource>();
      stage1LocalResources.putAll(commonLocalResources);
      MRHelpers.updateLocalResourcesForInputSplits(fs, inputSplitInfo, stage1LocalResources);
      stage1Vertex.setTaskLocalResources(stage1LocalResources);
    } else {
      stage1Vertex.setTaskLocalResources(commonLocalResources);
    }
    Map<String, String> stage1Env = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(stage1Conf, stage1Env, true);
    stage1Vertex.setTaskEnvironment(stage1Env);

    // Configure the Input for stage1
    Class<? extends TezRootInputInitializer> initializerClazz = generateSplitsInClient ? null
        : MRInputAMSplitGenerator.class;
    stage1Vertex.addInput("MRInput",
        new InputDescriptor(MRInputLegacy.class.getName())
            .setUserPayload(MRHelpers.createMRInputPayload(stage1Payload, null)),
        initializerClazz);

    // Setup stage2 Vertex
    Vertex stage2Vertex = new Vertex("stage2", new ProcessorDescriptor(
        FilterByWordOutputProcessor.class.getName()).setUserPayload(MRHelpers
        .createUserPayloadFromConf(stage2Conf)), stage1NumTasks,
        MRHelpers.getMapResource(stage2Conf));
    stage2Vertex.setJavaOpts(MRHelpers.getReduceJavaOpts(stage2Conf)).setTaskLocalResources(commonLocalResources);
    Map<String, String> stage2Env = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(stage2Conf, stage2Env, false);
    stage2Vertex.setTaskEnvironment(stage2Env);

    // Configure the Output for stage2
    stage2Vertex.addOutput("MROutput",
        new OutputDescriptor(MROutput.class.getName()).setUserPayload(MRHelpers
            .createUserPayloadFromConf(stage2Conf)));

    DAG dag = new DAG("FilterLinesByWord");
    Edge edge = new Edge(stage1Vertex, stage2Vertex, new EdgeProperty(
View Full Code Here

    byte[] mapPayload = MRHelpers.createUserPayloadFromConf(mapStageConf);
    byte[] mapInputPayload =
        MRHelpers.createMRInputPayload(mapPayload, null);
    int numMaps = generateSplitsInClient ? inputSplitInfo.getNumTasks() : -1;
    Vertex mapVertex = new Vertex("initialmap", new ProcessorDescriptor(
        MapProcessor.class.getName()).setUserPayload(mapPayload),
        numMaps, MRHelpers.getMapResource(mapStageConf));
    mapVertex.setJavaOpts(MRHelpers.getMapJavaOpts(mapStageConf));
    if (generateSplitsInClient) {
      mapVertex.setTaskLocationsHint(inputSplitInfo.getTaskLocationHints());
      Map<String, LocalResource> mapLocalResources =
          new HashMap<String, LocalResource>();
      mapLocalResources.putAll(commonLocalResources);
      MRHelpers.updateLocalResourcesForInputSplits(fs, inputSplitInfo,
          mapLocalResources);
      mapVertex.setTaskLocalResources(mapLocalResources);
    } else {
      mapVertex.setTaskLocalResources(commonLocalResources);
    }

    Map<String, String> mapEnv = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(mapStageConf, mapEnv, true);
    mapVertex.setTaskEnvironment(mapEnv);
    Class<? extends TezRootInputInitializer> initializerClazz = generateSplitsInClient ? null
        : MRInputAMSplitGenerator.class;
    MRHelpers.addMRInput(mapVertex, mapInputPayload, initializerClazz);
    vertices.add(mapVertex);

    Vertex ivertex = new Vertex("intermediate_reducer", new ProcessorDescriptor(
        ReduceProcessor.class.getName()).
        setUserPayload(MRHelpers.createUserPayloadFromConf(iReduceStageConf)),
        2,
        MRHelpers.getReduceResource(iReduceStageConf));
    ivertex.setJavaOpts(MRHelpers.getReduceJavaOpts(iReduceStageConf));
    ivertex.setTaskLocalResources(commonLocalResources);
    Map<String, String> ireduceEnv = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(iReduceStageConf, ireduceEnv, false);
    ivertex.setTaskEnvironment(ireduceEnv);
    vertices.add(ivertex);

    byte[] finalReducePayload = MRHelpers.createUserPayloadFromConf(finalReduceConf);
    Vertex finalReduceVertex = new Vertex("finalreduce",
        new ProcessorDescriptor(
            ReduceProcessor.class.getName()).setUserPayload(finalReducePayload),
                1, MRHelpers.getReduceResource(finalReduceConf));
    finalReduceVertex.setJavaOpts(
        MRHelpers.getReduceJavaOpts(finalReduceConf));
    finalReduceVertex.setTaskLocalResources(commonLocalResources);
    Map<String, String> reduceEnv = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(finalReduceConf, reduceEnv, false);
    finalReduceVertex.setTaskEnvironment(reduceEnv);
    MRHelpers.addMROutput(finalReduceVertex, finalReducePayload);
    vertices.add(finalReduceVertex);

    DAG dag = new DAG("OrderedWordCount" + dagIndex);
    for (int i = 0; i < vertices.size(); ++i) {
View Full Code Here

    int stage1NumTasks = genSplitsInAM ? -1 : inputSplitInfo.getNumTasks();
    Class<? extends TezRootInputInitializer> inputInitializerClazz =
        genSplitsInAM ? (initializerClass == null ? MRInputAMSplitGenerator.class : initializerClass)
        : null;
    LOG.info("Using initializer class: " + initializerClass);
    Vertex stage1Vertex = new Vertex("map", new ProcessorDescriptor(
        MapProcessor.class.getName()).setUserPayload(stage1Payload),
        stage1NumTasks, Resource.newInstance(256, 1));
    MRHelpers.addMRInput(stage1Vertex, stage1InputPayload, inputInitializerClazz);
    Vertex stage2Vertex = new Vertex("ireduce", new ProcessorDescriptor(
        ReduceProcessor.class.getName()).setUserPayload(
        MRHelpers.createUserPayloadFromConf(stage2Conf)),
        1, Resource.newInstance(256, 1));
    Vertex stage3Vertex = new Vertex("reduce", new ProcessorDescriptor(
        ReduceProcessor.class.getName()).setUserPayload(stage3Payload),
        1, Resource.newInstance(256, 1));
    MRHelpers.addMROutputLegacy(stage3Vertex, stage3Payload);

    Map<String, LocalResource> commonLocalResources = new HashMap<String, LocalResource>();
    Map<String, String> commonEnv = createCommonEnv();

    if (!genSplitsInAM) {
      // TODO Use utility method post TEZ-205.
      Map<String, LocalResource> stage1LocalResources = new HashMap<String, LocalResource>();
      stage1LocalResources.put(
          inputSplitInfo.getSplitsFile().getName(),
          createLocalResource(remoteFs, inputSplitInfo.getSplitsFile(),
              LocalResourceType.FILE, LocalResourceVisibility.APPLICATION));
      stage1LocalResources.put(
          inputSplitInfo.getSplitsMetaInfoFile().getName(),
          createLocalResource(remoteFs, inputSplitInfo.getSplitsMetaInfoFile(),
              LocalResourceType.FILE, LocalResourceVisibility.APPLICATION));
      stage1LocalResources.putAll(commonLocalResources);

      stage1Vertex.setTaskLocalResources(stage1LocalResources);
      stage1Vertex.setTaskLocationsHint(inputSplitInfo.getTaskLocationHints());
    } else {
      stage1Vertex.setTaskLocalResources(commonLocalResources);
    }

    stage1Vertex.setJavaOpts(MRHelpers.getMapJavaOpts(stage1Conf));
    stage1Vertex.setTaskEnvironment(commonEnv);

    // TODO env, resources

    stage2Vertex.setJavaOpts(MRHelpers.getReduceJavaOpts(stage2Conf));
    stage2Vertex.setTaskLocalResources(commonLocalResources);
    stage2Vertex.setTaskEnvironment(commonEnv);

    stage3Vertex.setJavaOpts(MRHelpers.getReduceJavaOpts(stage3Conf));
    stage3Vertex.setTaskLocalResources(commonLocalResources);
    stage3Vertex.setTaskEnvironment(commonEnv);

    dag.addVertex(stage1Vertex);
    dag.addVertex(stage2Vertex);
    dag.addVertex(stage3Vertex);
View Full Code Here

        : MRHelpers.getReduceResource(stageConf);
   
    stageConf.set(MRJobConfig.MROUTPUT_FILE_NAME_PREFIX, "part");
   
    byte[] vertexUserPayload = MRHelpers.createUserPayloadFromConf(stageConf);
    Vertex vertex = new Vertex(vertexName, new ProcessorDescriptor(processorName).
        setUserPayload(vertexUserPayload),
        numTasks, taskResource);
    if (isMap) {
      byte[] mapInputPayload = MRHelpers.createMRInputPayload(vertexUserPayload, null);
      MRHelpers.addMRInput(vertex, mapInputPayload, null);
    }
    // Map only jobs.
    if (stageNum == totalStages -1) {
      MRHelpers.addMROutputLegacy(vertex, vertexUserPayload);
    }

    Map<String, String> taskEnv = new HashMap<String, String>();
    setupMapReduceEnv(stageConf, taskEnv, isMap);

    Map<String, LocalResource> taskLocalResources =
        new TreeMap<String, LocalResource>();
    // PRECOMMIT Remove split localization for reduce tasks if it's being set
    // here
    taskLocalResources.putAll(jobLocalResources);

    String taskJavaOpts = isMap ? MRHelpers.getMapJavaOpts(stageConf)
        : MRHelpers.getReduceJavaOpts(stageConf);

    vertex.setTaskEnvironment(taskEnv)
        .setTaskLocalResources(taskLocalResources)
        .setTaskLocationsHint(locations)
        .setJavaOpts(taskJavaOpts);
   
    if (!isMap) {
      vertex.setVertexManagerPlugin(new VertexManagerPluginDescriptor(
          ShuffleVertexManager.class.getName()));
    }

    if (LOG.isDebugEnabled()) {
      LOG.debug("Adding vertex to DAG" + ", vertexName="
          + vertex.getVertexName() + ", processor="
          + vertex.getProcessorDescriptor().getClassName() + ", parallelism="
          + vertex.getParallelism() + ", javaOpts=" + vertex.getJavaOpts()
          + ", resources=" + vertex.getTaskResource()
      // TODO Add localResources and Environment
      );
    }

    return vertex;
View Full Code Here

    MRHelpers.doJobClientMagic(stage2Conf);

    byte[] stage1Payload = MRHelpers.createUserPayloadFromConf(stage1Conf);
    // Setup stage1 Vertex
    int stage1NumTasks = generateSplitsInClient ? inputSplitInfo.getNumTasks() : -1;
    Vertex stage1Vertex = new Vertex("stage1", new ProcessorDescriptor(
        FilterByWordInputProcessor.class.getName()).setUserPayload(stage1Payload),
        stage1NumTasks, MRHelpers.getMapResource(stage1Conf));
    stage1Vertex.setJavaOpts(MRHelpers.getMapJavaOpts(stage1Conf));
    if (generateSplitsInClient) {
      stage1Vertex.setTaskLocationsHint(inputSplitInfo.getTaskLocationHints());
      Map<String, LocalResource> stage1LocalResources = new HashMap<String, LocalResource>();
      stage1LocalResources.putAll(commonLocalResources);
      MRHelpers.updateLocalResourcesForInputSplits(fs, inputSplitInfo, stage1LocalResources);
      stage1Vertex.setTaskLocalResources(stage1LocalResources);
    } else {
      stage1Vertex.setTaskLocalResources(commonLocalResources);
    }
    Map<String, String> stage1Env = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(stage1Conf, stage1Env, true);
    stage1Vertex.setTaskEnvironment(stage1Env);

    // Configure the Input for stage1
    Class<? extends TezRootInputInitializer> initializerClazz = generateSplitsInClient ? null
        : MRInputAMSplitGenerator.class;
    stage1Vertex.addInput("MRInput",
        new InputDescriptor(MRInputLegacy.class.getName())
            .setUserPayload(MRHelpers.createMRInputPayload(stage1Payload, null)),
        initializerClazz);

    // Setup stage2 Vertex
    Vertex stage2Vertex = new Vertex("stage2", new ProcessorDescriptor(
        FilterByWordOutputProcessor.class.getName()).setUserPayload(MRHelpers
        .createUserPayloadFromConf(stage2Conf)), 1,
        MRHelpers.getReduceResource(stage2Conf));
    stage2Vertex.setJavaOpts(MRHelpers.getReduceJavaOpts(stage2Conf)).setTaskLocalResources(commonLocalResources);
    Map<String, String> stage2Env = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(stage2Conf, stage2Env, false);
    stage2Vertex.setTaskEnvironment(stage2Env);

    // Configure the Output for stage2
    OutputDescriptor od = new OutputDescriptor(MROutput.class.getName())
        .setUserPayload(MRHelpers.createUserPayloadFromConf(stage2Conf));
    stage2Vertex.addOutput("MROutput", od, MROutputCommitter.class);

    DAG dag = new DAG("FilterLinesByWord");
    Edge edge = new Edge(stage1Vertex, stage2Vertex, new EdgeProperty(
        DataMovementType.BROADCAST, DataSourceType.PERSISTED,
        SchedulingType.SEQUENTIAL, new OutputDescriptor(
View Full Code Here

    byte[] mapPayload = MRHelpers.createUserPayloadFromConf(mapStageConf);
    byte[] mapInputPayload = MRHelpers.createMRInputPayloadWithGrouping(mapPayload,
            TextInputFormat.class.getName());
    int numMaps = -1;
    Vertex tokenizerVertex = new Vertex("tokenizer", new ProcessorDescriptor(
        TokenProcessor.class.getName()),
        numMaps, MRHelpers.getMapResource(mapStageConf));
    tokenizerVertex.setJavaOpts(MRHelpers.getMapJavaOpts(mapStageConf));
    Map<String, String> mapEnv = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(mapStageConf, mapEnv, true);
    tokenizerVertex.setTaskEnvironment(mapEnv);
    Class<? extends TezRootInputInitializer> initializerClazz = MRInputAMSplitGenerator.class;
    InputDescriptor id = new InputDescriptor(MRInput.class.getName()).
        setUserPayload(mapInputPayload);
    tokenizerVertex.addInput("MRInput", id, initializerClazz);

    byte[] finalReducePayload = MRHelpers.createUserPayloadFromConf(finalReduceConf);
    Vertex summerVertex = new Vertex("summer",
        new ProcessorDescriptor(
            SumProcessor.class.getName()).setUserPayload(finalReducePayload),
                1, MRHelpers.getReduceResource(finalReduceConf));
    summerVertex.setJavaOpts(
        MRHelpers.getReduceJavaOpts(finalReduceConf));
    Map<String, String> reduceEnv = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(finalReduceConf, reduceEnv, false);
    summerVertex.setTaskEnvironment(reduceEnv);
    OutputDescriptor od = new OutputDescriptor(MROutput.class.getName())
        .setUserPayload(finalReducePayload);
    summerVertex.addOutput("MROutput", od, MROutputCommitter.class);
   
    DAG dag = new DAG("WordCount");
    dag.addVertex(tokenizerVertex)
        .addVertex(summerVertex)
        .addEdge(
View Full Code Here

    byte[] mapPayload = MRHelpers.createUserPayloadFromConf(mapStageConf);
    byte[] mapInputPayload = MRHelpers.createMRInputPayloadWithGrouping(mapPayload,
            TextInputFormat.class.getName());
    int numMaps = -1;
    Vertex mapVertex1 = new Vertex("map1", new ProcessorDescriptor(
        TokenProcessor.class.getName()),
        numMaps, MRHelpers.getMapResource(mapStageConf));
    mapVertex1.setJavaOpts(MRHelpers.getMapJavaOpts(mapStageConf));
    Map<String, String> mapEnv = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(mapStageConf, mapEnv, true);
    mapVertex1.setTaskEnvironment(mapEnv);
    Class<? extends TezRootInputInitializer> initializerClazz = MRInputAMSplitGenerator.class;
    InputDescriptor id = new InputDescriptor(MRInput.class.getName()).
        setUserPayload(mapInputPayload);
    mapVertex1.addInput("MRInput", id, initializerClazz);

    Vertex mapVertex2 = new Vertex("map2", new ProcessorDescriptor(
        TokenProcessor.class.getName()),
        numMaps, MRHelpers.getMapResource(mapStageConf));
    mapVertex2.setJavaOpts(MRHelpers.getMapJavaOpts(mapStageConf));
    MRHelpers.updateEnvironmentForMRTasks(mapStageConf, mapEnv, true);
    mapVertex2.setTaskEnvironment(mapEnv);
    mapVertex2.addInput("MRInput", id, initializerClazz);

    Vertex mapVertex3 = new Vertex("map3", new ProcessorDescriptor(
        TokenProcessor.class.getName()),
        numMaps, MRHelpers.getMapResource(mapStageConf));
    mapVertex3.setJavaOpts(MRHelpers.getMapJavaOpts(mapStageConf));
    MRHelpers.updateEnvironmentForMRTasks(mapStageConf, mapEnv, true);
    mapVertex3.setTaskEnvironment(mapEnv);
    mapVertex3.addInput("MRInput", id, initializerClazz);
   
    byte[] finalReducePayload = MRHelpers.createUserPayloadFromConf(finalReduceConf);
    Vertex checkerVertex = new Vertex("checker",
        new ProcessorDescriptor(
            UnionProcessor.class.getName()).setUserPayload(finalReducePayload),
                1, MRHelpers.getReduceResource(finalReduceConf));
    checkerVertex.setJavaOpts(
        MRHelpers.getReduceJavaOpts(finalReduceConf));
    Map<String, String> reduceEnv = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(finalReduceConf, reduceEnv, false);
    checkerVertex.setTaskEnvironment(reduceEnv);
    OutputDescriptor od = new OutputDescriptor(MROutput.class.getName())
      .setUserPayload(finalReducePayload);
    checkerVertex.addOutput("union", od, MROutputCommitter.class);

    Configuration partsConf = new Configuration(finalReduceConf);
    partsConf.set(FileOutputFormat.OUTDIR, outputPath+"-parts");
    byte[] partsPayload = MRHelpers.createUserPayloadFromConf(partsConf);
   
    DAG dag = new DAG("UnionExample");
   
    VertexGroup unionVertex = dag.createVertexGroup("union", mapVertex1, mapVertex2);
    OutputDescriptor od1 = new OutputDescriptor(MROutput.class.getName())
      .setUserPayload(partsPayload);
    Configuration allPartsConf = new Configuration(finalReduceConf);
    allPartsConf.set(FileOutputFormat.OUTDIR, outputPath+"-all-parts");
    byte[] allPartsPayload = MRHelpers.createUserPayloadFromConf(allPartsConf);
    OutputDescriptor od2 = new OutputDescriptor(MROutput.class.getName())
      .setUserPayload(allPartsPayload);
    unionVertex.addOutput("parts", od1, MROutputCommitter.class);
    checkerVertex.addOutput("all-parts", od2, MROutputCommitter.class);
   
   
    dag.addVertex(mapVertex1)
        .addVertex(mapVertex2)
        .addVertex(mapVertex3)
View Full Code Here

TOP

Related Classes of org.apache.tez.dag.api.Vertex

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.