Examples of org.apache.tez.dag.api.Vertex

org.apache.tez.dag.api.Vertex
Defines a vertex in the DAG. It represents the application logic that processes and transforms the input data to create the output data. The vertex represents the template from which tasks are created to execute the application in parallel across a distributed execution environment.


    // Call once here, will be updated when we find edges
    MultiStageMRConfToTezTranslator.translateVertexConfToTez(conf, null);


    // create the vertex
    Vertex reducer = new Vertex(reduceWork.getName(),
        new ProcessorDescriptor(ReduceTezProcessor.class.getName()).
        setUserPayload(MRHelpers.createUserPayloadFromConf(conf)),
        reduceWork.getNumReduceTasks(), getContainerResource(conf));


    Map<String, String> environment = new HashMap<String, String>();


    MRHelpers.updateEnvironmentForMRTasks(conf, environment, false);
    reducer.setTaskEnvironment(environment);


    reducer.setJavaOpts(getContainerJavaOpts(conf));


    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
    localResources.put(getBaseName(appJarLr), appJarLr);
    for (LocalResource lr: additionalLr) {
      localResources.put(getBaseName(lr), lr);
    }
    reducer.setTaskLocalResources(localResources);


    return reducer;
  }

View Full Code Here

  public Vertex createVertex(JobConf conf, BaseWork work,
      Path scratchDir, LocalResource appJarLr, 
      List<LocalResource> additionalLr,
      FileSystem fileSystem, Context ctx, boolean hasChildren, TezWork tezWork) throws Exception {


    Vertex v = null;
    // simply dispatch the call to the right method for the actual (sub-) type of
    // BaseWork.
    if (work instanceof MapWork) {
      v = createVertex(conf, (MapWork) work, appJarLr,
          additionalLr, fileSystem, scratchDir, ctx, tezWork);
    } else if (work instanceof ReduceWork) {
      v = createVertex(conf, (ReduceWork) work, appJarLr,
          additionalLr, fileSystem, scratchDir, ctx);
    } else {
      // something is seriously wrong if this is happening
      throw new HiveException(ErrorMsg.GENERIC_ERROR.getErrorCodedMsg());
    }


    // initialize stats publisher if necessary
    if (work.isGatheringStats()) {
      StatsPublisher statsPublisher;
      StatsFactory factory = StatsFactory.newFactory(conf);
      if (factory != null) {
        statsPublisher = factory.getStatsPublisher();
        if (!statsPublisher.init(conf)) { // creating stats table if not exists
          if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
            throw
              new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
          }
        }
      }
    }




    // final vertices need to have at least one output
    if (!hasChildren) {
      v.addOutput("out_"+work.getName(),
          new OutputDescriptor(MROutput.class.getName())
          .setUserPayload(MRHelpers.createUserPayloadFromConf(conf)));
    }


    return v;

View Full Code Here

  @Test(timeout = 60000)
  public void testSleepJob() throws TezException, IOException, InterruptedException {
    SleepProcessorConfig spConf = new SleepProcessorConfig(1);


    DAG dag = new DAG("TezSleepProcessor");
    Vertex vertex = new Vertex("SleepVertex", new ProcessorDescriptor(
        SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 1,
        Resource.newInstance(1024, 1));
    dag.addVertex(vertex);


    TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());

View Full Code Here


    byte[] mapPayload = MRHelpers.createUserPayloadFromConf(mapStageConf);
    byte[] mapInputPayload = MRHelpers.createMRInputPayloadWithGrouping(mapPayload,
      TextInputFormat.class.getName());
    int numMaps = generateSplitsInClient ? inputSplitInfo.getNumTasks() : -1;
    Vertex mapVertex = new Vertex("initialmap", new ProcessorDescriptor(
        MapProcessor.class.getName()).setUserPayload(mapPayload),
        numMaps, MRHelpers.getMapResource(mapStageConf));
    mapVertex.setJavaOpts(MRHelpers.getMapJavaOpts(mapStageConf));
    if (generateSplitsInClient) {
      mapVertex.setTaskLocationsHint(inputSplitInfo.getTaskLocationHints());
      Map<String, LocalResource> mapLocalResources =
          new HashMap<String, LocalResource>();
      mapLocalResources.putAll(commonLocalResources);
      MRHelpers.updateLocalResourcesForInputSplits(fs, inputSplitInfo,
          mapLocalResources);
      mapVertex.setTaskLocalResources(mapLocalResources);
    } else {
      mapVertex.setTaskLocalResources(commonLocalResources);
    }


    Map<String, String> mapEnv = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(mapStageConf, mapEnv, true);
    mapVertex.setTaskEnvironment(mapEnv);
    Class<? extends TezRootInputInitializer> initializerClazz = generateSplitsInClient ? null
        : MRInputAMSplitGenerator.class;
    MRHelpers.addMRInput(mapVertex, mapInputPayload, initializerClazz);
    vertices.add(mapVertex);


    Vertex ivertex = new Vertex("intermediate_reducer", new ProcessorDescriptor(
        ReduceProcessor.class.getName()).
        setUserPayload(MRHelpers.createUserPayloadFromConf(iReduceStageConf)),
        2,
        MRHelpers.getReduceResource(iReduceStageConf));
    ivertex.setJavaOpts(MRHelpers.getReduceJavaOpts(iReduceStageConf));
    ivertex.setTaskLocalResources(commonLocalResources);
    Map<String, String> ireduceEnv = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(iReduceStageConf, ireduceEnv, false);
    ivertex.setTaskEnvironment(ireduceEnv);
    vertices.add(ivertex);


    byte[] finalReducePayload = MRHelpers.createUserPayloadFromConf(finalReduceConf);
    Vertex finalReduceVertex = new Vertex("finalreduce",
        new ProcessorDescriptor(
            ReduceProcessor.class.getName()).setUserPayload(finalReducePayload),
                1, MRHelpers.getReduceResource(finalReduceConf));
    finalReduceVertex.setJavaOpts(
        MRHelpers.getReduceJavaOpts(finalReduceConf));
    finalReduceVertex.setTaskLocalResources(commonLocalResources);
    Map<String, String> reduceEnv = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(finalReduceConf, reduceEnv, false);
    finalReduceVertex.setTaskEnvironment(reduceEnv);
    MRHelpers.addMROutputLegacy(finalReduceVertex, finalReducePayload);
    vertices.add(finalReduceVertex);


    DAG dag = new DAG("OrderedWordCount" + dagIndex);
    for (int i = 0; i < vertices.size(); ++i) {

View Full Code Here

    int stage1NumTasks = genSplitsInAM ? -1 : inputSplitInfo.getNumTasks();
    Class<? extends TezRootInputInitializer> inputInitializerClazz =
        genSplitsInAM ? (initializerClass == null ? MRInputAMSplitGenerator.class : initializerClass)
        : null;
    LOG.info("Using initializer class: " + initializerClass);
    Vertex stage1Vertex = new Vertex("map", new ProcessorDescriptor(
        MapProcessor.class.getName()).setUserPayload(stage1Payload),
        stage1NumTasks, Resource.newInstance(256, 1));
    MRHelpers.addMRInput(stage1Vertex, stage1InputPayload, inputInitializerClazz);
    Vertex stage2Vertex = new Vertex("ireduce", new ProcessorDescriptor(
        ReduceProcessor.class.getName()).setUserPayload(
        MRHelpers.createUserPayloadFromConf(stage2Conf)),
        1, Resource.newInstance(256, 1));
    Vertex stage3Vertex = new Vertex("reduce", new ProcessorDescriptor(
        ReduceProcessor.class.getName()).setUserPayload(stage3Payload),
        1, Resource.newInstance(256, 1));
    MRHelpers.addMROutputLegacy(stage3Vertex, stage3Payload);


    Map<String, LocalResource> commonLocalResources = new HashMap<String, LocalResource>();
    Map<String, String> commonEnv = createCommonEnv();


    if (!genSplitsInAM) {
      // TODO Use utility method post TEZ-205.
      Map<String, LocalResource> stage1LocalResources = new HashMap<String, LocalResource>();
      stage1LocalResources.put(
          inputSplitInfo.getSplitsFile().getName(),
          createLocalResource(remoteFs, inputSplitInfo.getSplitsFile(),
              LocalResourceType.FILE, LocalResourceVisibility.APPLICATION));
      stage1LocalResources.put(
          inputSplitInfo.getSplitsMetaInfoFile().getName(),
          createLocalResource(remoteFs, inputSplitInfo.getSplitsMetaInfoFile(),
              LocalResourceType.FILE, LocalResourceVisibility.APPLICATION));
      stage1LocalResources.putAll(commonLocalResources);


      stage1Vertex.setTaskLocalResources(stage1LocalResources);
      stage1Vertex.setTaskLocationsHint(inputSplitInfo.getTaskLocationHints());
    } else {
      stage1Vertex.setTaskLocalResources(commonLocalResources);
    }


    stage1Vertex.setJavaOpts(MRHelpers.getMapJavaOpts(stage1Conf));
    stage1Vertex.setTaskEnvironment(commonEnv);


    // TODO env, resources


    stage2Vertex.setJavaOpts(MRHelpers.getReduceJavaOpts(stage2Conf));
    stage2Vertex.setTaskLocalResources(commonLocalResources);
    stage2Vertex.setTaskEnvironment(commonEnv);


    stage3Vertex.setJavaOpts(MRHelpers.getReduceJavaOpts(stage3Conf));
    stage3Vertex.setTaskLocalResources(commonLocalResources);
    stage3Vertex.setTaskEnvironment(commonEnv);


    dag.addVertex(stage1Vertex);
    dag.addVertex(stage2Vertex);
    dag.addVertex(stage3Vertex);

View Full Code Here


    while( iterator.hasNext() )
      {
      FlowNode flowNode = iterator.next();


      Vertex vertex = createVertex( flowProcess, initializedConfig, flowNode );
      dag.addVertex( vertex );


      vertexMap.put( flowNode, vertex );
      }


    LinkedList<ProcessGraph.ProcessEdge> processedEdges = new LinkedList<>();


    for( ProcessGraph.ProcessEdge processEdge : nodeGraph.edgeSet() )
      {
      if( processedEdges.contains( processEdge ) )
        continue;


      FlowNode edgeTargetFlowNode = nodeGraph.getEdgeTarget( processEdge );


      FlowElement flowElement = processEdge.getFlowElement();
      List<FlowNode> sourceNodes = nodeGraph.getElementSourceProcesses( flowElement );


      EdgeProperty edgeProperty = createEdgeProperty( initializedConfig, processEdge );


      Vertex targetVertex = vertexMap.get( edgeTargetFlowNode );


      if( sourceNodes.size() == 1 || flowElement instanceof CoGroup || flowElement instanceof Boundary ) // todo: create group vertices around incoming ordinal
        {
        FlowNode edgeSourceFlowNode = nodeGraph.getEdgeSource( processEdge );
        Vertex sourceVertex = vertexMap.get( edgeSourceFlowNode );


        LOG.debug( "adding edge between: {} and {}", sourceVertex, targetVertex );


        dag.addEdge( Edge.create( sourceVertex, targetVertex, edgeProperty ) );
        }

View Full Code Here

    int parallelism = getParallelism( flowNode, conf );


    if( parallelism == 0 )
      throw new FlowException( getName(), "the default number of gather partitions must be set, see cascading.flow.FlowRuntimeProps" );


    Vertex vertex = newVertex( flowNode, conf, parallelism );


    for( FlowElement flowElement : sourceConfigs.keySet() )
      {
      if( !( flowElement instanceof Tap ) )
        continue;


      Configuration sourceConf = sourceConfigs.get( flowElement );
      MRInput.MRInputConfigBuilder configBuilder = MRInput.createConfigBuilder( sourceConf, null );


      // grouping splits loses file name info, breaking partition tap default impl
      if( flowElement instanceof PartitionTap ) // todo: generify
        configBuilder.groupSplits( false );


      DataSourceDescriptor dataSourceDescriptor = configBuilder.build();


      vertex.addDataSource( FlowElements.id( flowElement ), dataSourceDescriptor );
      }


    for( FlowElement flowElement : sinkConfigs.keySet() )
      {
      if( !( flowElement instanceof Tap ) )
        continue;


      Configuration sinkConf = sinkConfigs.get( flowElement );


      Class outputFormatClass;
      String outputPath;


      // we have to set sane defaults if not set by the tap
      // typically the case of MultiSinkTap
      String formatClassName = sinkConf.get( "mapred.output.format.class", sinkConf.get( MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR ) );


      if( formatClassName == null )
        {
        outputFormatClass = TextOutputFormat.class; // unused, use "new" api, its the default
        outputPath = Hfs.getTempPath( sinkConf ).toString(); // unused
        }
      else
        {
        outputFormatClass = Util.loadClass( formatClassName );
        outputPath = sinkConf.get( "mapred.output.dir" );
        }


      if( outputPath == null && sinkConf.get( "mapred.output.dir" ) == null )
        outputPath = Hfs.getTempPath( sinkConf ).toString(); // unused


      MROutput.MROutputConfigBuilder configBuilder = MROutput.createConfigBuilder( sinkConf, outputFormatClass, outputPath );


      DataSinkDescriptor dataSinkDescriptor = configBuilder.build();


      vertex.addDataSink( FlowElements.id( flowElement ), dataSinkDescriptor );
      }


    addRemoteDebug( flowNode, vertex );
    addRemoteProfiling( flowNode, vertex );

View Full Code Here


    ProcessorDescriptor descriptor = ProcessorDescriptor.create( FlowProcessor.class.getName() );


    descriptor.setUserPayload( getPayload( conf ) );


    Vertex vertex = Vertex.create( flowNode.getID(), descriptor, parallelism );


    if( environment != null )
      vertex.setTaskEnvironment( environment );


    return vertex;
    }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.tez.dag.api.Vertex

cascading.flow.tez.Hadoop2TezFlowStep

org.apache.hadoop.hive.ql.exec.tez.DagUtils

org.apache.hadoop.hive.ql.exec.tez.TestTezTask

org.apache.hadoop.hive.ql.exec.tez.TezJobMonitor

org.apache.hadoop.hive.ql.exec.tez.TezTask

org.apache.pig.backend.hadoop.executionengine.tez.TezDagBuilder

org.apache.pig.tez.TestTezJobControlCompiler

org.apache.pig.tools.pigstats.tez.TezDAGStats

org.apache.tez.client.TestTezClient

org.apache.tez.examples.JoinDataGen

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.