Package org.apache.tez.dag.api

Examples of org.apache.tez.dag.api.Vertex


  @Test
  public void testNonDefaultFSStagingDir() throws Exception {
    SleepProcessorConfig spConf = new SleepProcessorConfig(1);

    DAG dag = new DAG("TezSleepProcessor");
    Vertex vertex = new Vertex("SleepVertex", new ProcessorDescriptor(
        SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 1,
        Resource.newInstance(1024, 1));
    dag.addVertex(vertex);

    TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
View Full Code Here


    protected static Vertex l5v1, l5v2, l5v3;
    protected static Vertex l6v1;
   
    protected static void addDAGVerticesAndEdges() {
        ThreeLevelsFailingDAG.addDAGVerticesAndEdges();
        l4v1 = new Vertex("l4v1", TestProcessor.getProcDesc(payload), 10, defaultResource);
        dag.addVertex(l4v1);
        addEdge(l3v1, l4v1, DataMovementType.SCATTER_GATHER);
        addEdge(l3v2, l4v1, DataMovementType.SCATTER_GATHER);
        l5v1 = new Vertex("l5v1", TestProcessor.getProcDesc(payload), 2, defaultResource);
        dag.addVertex(l5v1);
        addEdge(l4v1, l5v1, DataMovementType.SCATTER_GATHER);
        l5v2 = new Vertex("l5v2", TestProcessor.getProcDesc(payload), 4, defaultResource);
        dag.addVertex(l5v2);
        addEdge(l4v1, l5v2, DataMovementType.SCATTER_GATHER);
        l5v3 = new Vertex("l5v3", TestProcessor.getProcDesc(payload), 1, defaultResource);
        dag.addVertex(l5v3);
        addEdge(l4v1, l5v3, DataMovementType.SCATTER_GATHER);
        l6v1 = new Vertex("l6v1", TestProcessor.getProcDesc(payload), 4, defaultResource);
        dag.addVertex(l6v1);
        addEdge(l5v1, l6v1, DataMovementType.SCATTER_GATHER);
        addEdge(l5v2, l6v1, DataMovementType.SCATTER_GATHER);
        addEdge(l5v3, l6v1, DataMovementType.SCATTER_GATHER);
    }
View Full Code Here

    if (conf != null) {
      taskCount = conf.getInt(TEZ_SIMPLE_REVERSE_V_DAG_NUM_TASKS, TEZ_SIMPLE_REVERSE_V_DAG_NUM_TASKS_DEFAULT);
      payload = TezUtils.createUserPayloadFromConf(conf);
    }
    DAG dag = new DAG(name);
    Vertex v1 = new Vertex("v1", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
    Vertex v2 = new Vertex("v2", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
    Vertex v3 = new Vertex("v3", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
    dag.addVertex(v1).addVertex(v2).addVertex(v3);
    dag.addEdge(new Edge(v1, v2,
        new EdgeProperty(DataMovementType.SCATTER_GATHER,
            DataSourceType.PERSISTED,
            SchedulingType.SEQUENTIAL,
View Full Code Here

    MRHelpers.doJobClientMagic(stage2Conf);

    byte[] stage1Payload = MRHelpers.createUserPayloadFromConf(stage1Conf);
    // Setup stage1 Vertex
    int stage1NumTasks = generateSplitsInClient ? inputSplitInfo.getNumTasks() : -1;
    Vertex stage1Vertex = new Vertex("stage1", new ProcessorDescriptor(
        FilterByWordInputProcessor.class.getName()).setUserPayload(stage1Payload),
        stage1NumTasks, MRHelpers.getMapResource(stage1Conf));
    stage1Vertex.setJavaOpts(MRHelpers.getMapJavaOpts(stage1Conf));
    if (generateSplitsInClient) {
      stage1Vertex.setTaskLocationsHint(inputSplitInfo.getTaskLocationHints());
      Map<String, LocalResource> stage1LocalResources = new HashMap<String, LocalResource>();
      stage1LocalResources.putAll(commonLocalResources);
      MRHelpers.updateLocalResourcesForInputSplits(fs, inputSplitInfo, stage1LocalResources);
      stage1Vertex.setTaskLocalResources(stage1LocalResources);
    } else {
      stage1Vertex.setTaskLocalResources(commonLocalResources);
    }
    Map<String, String> stage1Env = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(stage1Conf, stage1Env, true);
    stage1Vertex.setTaskEnvironment(stage1Env);

    // Configure the Input for stage1
    Class<? extends TezRootInputInitializer> initializerClazz = generateSplitsInClient ? null
        : MRInputAMSplitGenerator.class;
    stage1Vertex.addInput("MRInput",
        new InputDescriptor(MRInputLegacy.class.getName())
            .setUserPayload(MRHelpers.createMRInputPayload(stage1Payload, null)),
        initializerClazz);

    // Setup stage2 Vertex
    Vertex stage2Vertex = new Vertex("stage2", new ProcessorDescriptor(
        FilterByWordOutputProcessor.class.getName()).setUserPayload(MRHelpers
        .createUserPayloadFromConf(stage2Conf)), stage1NumTasks,
        MRHelpers.getMapResource(stage2Conf));
    stage2Vertex.setJavaOpts(MRHelpers.getReduceJavaOpts(stage2Conf)).setTaskLocalResources(commonLocalResources);
    Map<String, String> stage2Env = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(stage2Conf, stage2Env, false);
    stage2Vertex.setTaskEnvironment(stage2Env);

    // Configure the Output for stage2
    stage2Vertex.addOutput("MROutput",
        new OutputDescriptor(MROutput.class.getName()).setUserPayload(MRHelpers
            .createUserPayloadFromConf(stage2Conf)),
        MROutputCommitter.class);

    DAG dag = new DAG("FilterLinesByWord");
View Full Code Here

          dag.addEdge(e);
        }
      } else {
        // Regular vertices
        JobConf wxConf = utils.initializeVertexConf(conf, w);
        Vertex wx = utils.createVertex(wxConf, w, scratchDir, appJarLr,
          additionalLr, fs, ctx, !isFinal, work);
        dag.addVertex(wx);
        utils.addCredentials(w, dag);
        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_CREATE_VERTEX + w.getName());
        workToVertex.put(w, wx);
View Full Code Here

    // Tez ask us to call this even if there's no preceding vertex
    MultiStageMRConfToTezTranslator.translateVertexConfToTez(conf, null);

    // finally create the vertex
    Vertex map = null;

    // use tez to combine splits
    boolean useTezGroupedSplits = true;

    int numTasks = -1;
    Class amSplitGeneratorClass = null;
    InputSplitInfo inputSplitInfo = null;
    Class inputFormatClass = conf.getClass("mapred.input.format.class",
        InputFormat.class);

    boolean vertexHasCustomInput = false;
    if (tezWork != null) {
      for (BaseWork baseWork : tezWork.getParents(mapWork)) {
        if (tezWork.getEdgeType(baseWork, mapWork) == EdgeType.CUSTOM_EDGE) {
          vertexHasCustomInput = true;
        }
      }
    }

    // we cannot currently allow grouping of splits where each split is a different input format
    // or has different deserializers similar to the checks in CombineHiveInputFormat. We do not
    // need the check for the opList because we will not process different opLists at this time.
    // Long term fix would be to have a custom input format
    // logic that groups only the splits that share the same input format
    Class<?> previousInputFormatClass = null;
    Class<?> previousDeserializerClass = null;
    for (String path : mapWork.getPathToPartitionInfo().keySet()) {
      PartitionDesc pd = mapWork.getPathToPartitionInfo().get(path);
      Class<?> currentDeserializerClass = pd.getDeserializer(conf).getClass();
      Class<?> currentInputFormatClass = pd.getInputFileFormatClass();
      if (previousInputFormatClass == null) {
        previousInputFormatClass = currentInputFormatClass;
      }
      if (previousDeserializerClass == null) {
        previousDeserializerClass = currentDeserializerClass;
      }
      if (LOG.isDebugEnabled()) {
        LOG.debug("Current input format class = "+currentInputFormatClass+", previous input format class = "
          + previousInputFormatClass + ", verifying " + " current deserializer class = "
          + currentDeserializerClass + " previous deserializer class = " + previousDeserializerClass);
      }
      if ((currentInputFormatClass != previousInputFormatClass) ||
          (currentDeserializerClass != previousDeserializerClass)) {
        useTezGroupedSplits = false;
        break;
      }
    }
    if (vertexHasCustomInput) {
      // if it is the case of different input formats for different partitions, we cannot group
      // in the custom vertex for now. Long term, this can be improved to group the buckets that
      // share the same input format.
      if (useTezGroupedSplits == false) {
        conf.setBoolean(CustomPartitionVertex.GROUP_SPLITS, false);
      } else {
        conf.setBoolean(CustomPartitionVertex.GROUP_SPLITS, true);
      }
      // grouping happens in execution phase. Setting the class to TezGroupedSplitsInputFormat
      // here would cause pre-mature grouping which would be incorrect.
      inputFormatClass = HiveInputFormat.class;
      conf.setClass("mapred.input.format.class", HiveInputFormat.class, InputFormat.class);
      // mapreduce.tez.input.initializer.serialize.event.payload should be set to false when using
      // this plug-in to avoid getting a serialized event at run-time.
      conf.setBoolean("mapreduce.tez.input.initializer.serialize.event.payload", false);
    } else if (useTezGroupedSplits) {
      // we'll set up tez to combine spits for us iff the input format
      // is HiveInputFormat
      if (inputFormatClass == HiveInputFormat.class) {
        conf.setClass("mapred.input.format.class", TezGroupedSplitsInputFormat.class, InputFormat.class);
      } else {
        conf.setClass("mapred.input.format.class", CombineHiveInputFormat.class, InputFormat.class);
        useTezGroupedSplits = false;
      }
    } else {
      conf.setClass("mapred.input.format.class", CombineHiveInputFormat.class, InputFormat.class);
    }

    if (HiveConf.getBoolVar(conf, ConfVars.HIVE_AM_SPLIT_GENERATION)) {
      // if we're generating the splits in the AM, we just need to set
      // the correct plugin.
      amSplitGeneratorClass = MRInputAMSplitGenerator.class;
    } else {
      // client side split generation means we have to compute them now
      inputSplitInfo = MRHelpers.generateInputSplits(conf,
          new Path(tezDir, "split_"+mapWork.getName().replaceAll(" ", "_")));
      numTasks = inputSplitInfo.getNumTasks();
    }

    byte[] serializedConf = MRHelpers.createUserPayloadFromConf(conf);
    map = new Vertex(mapWork.getName(),
        new ProcessorDescriptor(MapTezProcessor.class.getName()).
        setUserPayload(serializedConf), numTasks, getContainerResource(conf));
    Map<String, String> environment = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(conf, environment, true);
    map.setTaskEnvironment(environment);
    map.setJavaOpts(getContainerJavaOpts(conf));

    assert mapWork.getAliasToWork().keySet().size() == 1;

    String alias = mapWork.getAliasToWork().keySet().iterator().next();

    byte[] mrInput = null;
    if (useTezGroupedSplits) {
      mrInput = MRHelpers.createMRInputPayloadWithGrouping(serializedConf,
          HiveInputFormat.class.getName());
    } else {
      mrInput = MRHelpers.createMRInputPayload(serializedConf, null);
    }
    map.addInput(alias,
        new InputDescriptor(MRInputLegacy.class.getName()).
        setUserPayload(mrInput), amSplitGeneratorClass);

    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
    localResources.put(getBaseName(appJarLr), appJarLr);
    for (LocalResource lr: additionalLr) {
      localResources.put(getBaseName(lr), lr);
    }

    if (inputSplitInfo != null) {
      // only relevant for client-side split generation
      map.setTaskLocationsHint(inputSplitInfo.getTaskLocationHints());
      MRHelpers.updateLocalResourcesForInputSplits(FileSystem.get(conf), inputSplitInfo,
          localResources);
    }

    map.setTaskLocalResources(localResources);
    return map;
  }
View Full Code Here

    // Call once here, will be updated when we find edges
    MultiStageMRConfToTezTranslator.translateVertexConfToTez(conf, null);

    // create the vertex
    Vertex reducer = new Vertex(reduceWork.getName(),
        new ProcessorDescriptor(ReduceTezProcessor.class.getName()).
        setUserPayload(MRHelpers.createUserPayloadFromConf(conf)),
        reduceWork.getNumReduceTasks(), getContainerResource(conf));

    Map<String, String> environment = new HashMap<String, String>();

    MRHelpers.updateEnvironmentForMRTasks(conf, environment, false);
    reducer.setTaskEnvironment(environment);

    reducer.setJavaOpts(getContainerJavaOpts(conf));

    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
    localResources.put(getBaseName(appJarLr), appJarLr);
    for (LocalResource lr: additionalLr) {
      localResources.put(getBaseName(lr), lr);
    }
    reducer.setTaskLocalResources(localResources);

    return reducer;
  }
View Full Code Here

  public Vertex createVertex(JobConf conf, BaseWork work,
      Path scratchDir, LocalResource appJarLr,
      List<LocalResource> additionalLr,
      FileSystem fileSystem, Context ctx, boolean hasChildren, TezWork tezWork) throws Exception {

    Vertex v = null;
    // simply dispatch the call to the right method for the actual (sub-) type of
    // BaseWork.
    if (work instanceof MapWork) {
      v = createVertex(conf, (MapWork) work, appJarLr,
          additionalLr, fileSystem, scratchDir, ctx, tezWork);
    } else if (work instanceof ReduceWork) {
      v = createVertex(conf, (ReduceWork) work, appJarLr,
          additionalLr, fileSystem, scratchDir, ctx);
    } else {
      // something is seriously wrong if this is happening
      throw new HiveException(ErrorMsg.GENERIC_ERROR.getErrorCodedMsg());
    }

    // initialize stats publisher if necessary
    if (work.isGatheringStats()) {
      StatsPublisher statsPublisher;
      StatsFactory factory = StatsFactory.newFactory(conf);
      if (factory != null) {
        statsPublisher = factory.getStatsPublisher();
        if (!statsPublisher.init(conf)) { // creating stats table if not exists
          if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
            throw
              new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
          }
        }
      }
    }


    // final vertices need to have at least one output
    if (!hasChildren) {
      v.addOutput("out_"+work.getName(),
          new OutputDescriptor(MROutput.class.getName())
          .setUserPayload(MRHelpers.createUserPayloadFromConf(conf)));
    }

    return v;
View Full Code Here

        any(List.class), any(FileSystem.class), any(Context.class), anyBoolean(), any(TezWork.class))).thenAnswer(new Answer<Vertex>() {

          @Override
          public Vertex answer(InvocationOnMock invocation) throws Throwable {
            Object[] args = invocation.getArguments();
            return new Vertex(((BaseWork)args[1]).getName(),
                mock(ProcessorDescriptor.class), 0, mock(Resource.class));
          }
        });

    when(utils.createEdge(any(JobConf.class), any(Vertex.class), any(JobConf.class),
View Full Code Here

  @Test
  public void testBuildDag() throws IllegalArgumentException, IOException, Exception {
    DAG dag = task.build(conf, work, path, appLr, null, new Context(conf));
    for (BaseWork w: work.getAllWork()) {
      Vertex v = dag.getVertex(w.getName());
      assertNotNull(v);
      List<Vertex> outs = v.getOutputVertices();
      for (BaseWork x: work.getChildren(w)) {
        boolean found = false;
        for (Vertex u: outs) {
          if (u.getVertexName().equals(x.getName())) {
            found = true;
View Full Code Here

TOP

Related Classes of org.apache.tez.dag.api.Vertex

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.