Package org.apache.tez.dag.api

Examples of org.apache.tez.dag.api.Vertex


    // Call once here, will be updated when we find edges
    MultiStageMRConfToTezTranslator.translateVertexConfToTez(conf, null);

    // create the vertex
    Vertex reducer = new Vertex(reduceWork.getName(),
        new ProcessorDescriptor(ReduceTezProcessor.class.getName()).
        setUserPayload(MRHelpers.createUserPayloadFromConf(conf)),
        reduceWork.getNumReduceTasks(), getContainerResource(conf));

    Map<String, String> environment = new HashMap<String, String>();

    MRHelpers.updateEnvironmentForMRTasks(conf, environment, false);
    reducer.setTaskEnvironment(environment);

    reducer.setJavaOpts(getContainerJavaOpts(conf));

    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
    localResources.put(getBaseName(appJarLr), appJarLr);
    for (LocalResource lr: additionalLr) {
      localResources.put(getBaseName(lr), lr);
    }
    reducer.setTaskLocalResources(localResources);

    return reducer;
  }
View Full Code Here


  public Vertex createVertex(JobConf conf, BaseWork work,
      Path scratchDir, LocalResource appJarLr,
      List<LocalResource> additionalLr,
      FileSystem fileSystem, Context ctx, boolean hasChildren, TezWork tezWork) throws Exception {

    Vertex v = null;
    // simply dispatch the call to the right method for the actual (sub-) type of
    // BaseWork.
    if (work instanceof MapWork) {
      v = createVertex(conf, (MapWork) work, appJarLr,
          additionalLr, fileSystem, scratchDir, ctx, tezWork);
    } else if (work instanceof ReduceWork) {
      v = createVertex(conf, (ReduceWork) work, appJarLr,
          additionalLr, fileSystem, scratchDir, ctx);
    } else {
      // something is seriously wrong if this is happening
      throw new HiveException(ErrorMsg.GENERIC_ERROR.getErrorCodedMsg());
    }

    // initialize stats publisher if necessary
    if (work.isGatheringStats()) {
      StatsPublisher statsPublisher;
      StatsFactory factory = StatsFactory.newFactory(conf);
      if (factory != null) {
        statsPublisher = factory.getStatsPublisher();
        if (!statsPublisher.init(conf)) { // creating stats table if not exists
          if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
            throw
              new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
          }
        }
      }
    }


    // final vertices need to have at least one output
    if (!hasChildren) {
      v.addOutput("out_"+work.getName(),
          new OutputDescriptor(MROutput.class.getName())
          .setUserPayload(MRHelpers.createUserPayloadFromConf(conf)));
    }

    return v;
View Full Code Here

  @Test(timeout = 60000)
  public void testSleepJob() throws TezException, IOException, InterruptedException {
    SleepProcessorConfig spConf = new SleepProcessorConfig(1);

    DAG dag = new DAG("TezSleepProcessor");
    Vertex vertex = new Vertex("SleepVertex", new ProcessorDescriptor(
        SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 1,
        Resource.newInstance(1024, 1));
    dag.addVertex(vertex);

    TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
View Full Code Here

    byte[] mapPayload = MRHelpers.createUserPayloadFromConf(mapStageConf);
    byte[] mapInputPayload = MRHelpers.createMRInputPayloadWithGrouping(mapPayload,
      TextInputFormat.class.getName());
    int numMaps = generateSplitsInClient ? inputSplitInfo.getNumTasks() : -1;
    Vertex mapVertex = new Vertex("initialmap", new ProcessorDescriptor(
        MapProcessor.class.getName()).setUserPayload(mapPayload),
        numMaps, MRHelpers.getMapResource(mapStageConf));
    mapVertex.setJavaOpts(MRHelpers.getMapJavaOpts(mapStageConf));
    if (generateSplitsInClient) {
      mapVertex.setTaskLocationsHint(inputSplitInfo.getTaskLocationHints());
      Map<String, LocalResource> mapLocalResources =
          new HashMap<String, LocalResource>();
      mapLocalResources.putAll(commonLocalResources);
      MRHelpers.updateLocalResourcesForInputSplits(fs, inputSplitInfo,
          mapLocalResources);
      mapVertex.setTaskLocalResources(mapLocalResources);
    } else {
      mapVertex.setTaskLocalResources(commonLocalResources);
    }

    Map<String, String> mapEnv = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(mapStageConf, mapEnv, true);
    mapVertex.setTaskEnvironment(mapEnv);
    Class<? extends TezRootInputInitializer> initializerClazz = generateSplitsInClient ? null
        : MRInputAMSplitGenerator.class;
    MRHelpers.addMRInput(mapVertex, mapInputPayload, initializerClazz);
    vertices.add(mapVertex);

    Vertex ivertex = new Vertex("intermediate_reducer", new ProcessorDescriptor(
        ReduceProcessor.class.getName()).
        setUserPayload(MRHelpers.createUserPayloadFromConf(iReduceStageConf)),
        2,
        MRHelpers.getReduceResource(iReduceStageConf));
    ivertex.setJavaOpts(MRHelpers.getReduceJavaOpts(iReduceStageConf));
    ivertex.setTaskLocalResources(commonLocalResources);
    Map<String, String> ireduceEnv = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(iReduceStageConf, ireduceEnv, false);
    ivertex.setTaskEnvironment(ireduceEnv);
    vertices.add(ivertex);

    byte[] finalReducePayload = MRHelpers.createUserPayloadFromConf(finalReduceConf);
    Vertex finalReduceVertex = new Vertex("finalreduce",
        new ProcessorDescriptor(
            ReduceProcessor.class.getName()).setUserPayload(finalReducePayload),
                1, MRHelpers.getReduceResource(finalReduceConf));
    finalReduceVertex.setJavaOpts(
        MRHelpers.getReduceJavaOpts(finalReduceConf));
    finalReduceVertex.setTaskLocalResources(commonLocalResources);
    Map<String, String> reduceEnv = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(finalReduceConf, reduceEnv, false);
    finalReduceVertex.setTaskEnvironment(reduceEnv);
    MRHelpers.addMROutputLegacy(finalReduceVertex, finalReducePayload);
    vertices.add(finalReduceVertex);

    DAG dag = new DAG("OrderedWordCount" + dagIndex);
    for (int i = 0; i < vertices.size(); ++i) {
View Full Code Here

    int stage1NumTasks = genSplitsInAM ? -1 : inputSplitInfo.getNumTasks();
    Class<? extends TezRootInputInitializer> inputInitializerClazz =
        genSplitsInAM ? (initializerClass == null ? MRInputAMSplitGenerator.class : initializerClass)
        : null;
    LOG.info("Using initializer class: " + initializerClass);
    Vertex stage1Vertex = new Vertex("map", new ProcessorDescriptor(
        MapProcessor.class.getName()).setUserPayload(stage1Payload),
        stage1NumTasks, Resource.newInstance(256, 1));
    MRHelpers.addMRInput(stage1Vertex, stage1InputPayload, inputInitializerClazz);
    Vertex stage2Vertex = new Vertex("ireduce", new ProcessorDescriptor(
        ReduceProcessor.class.getName()).setUserPayload(
        MRHelpers.createUserPayloadFromConf(stage2Conf)),
        1, Resource.newInstance(256, 1));
    Vertex stage3Vertex = new Vertex("reduce", new ProcessorDescriptor(
        ReduceProcessor.class.getName()).setUserPayload(stage3Payload),
        1, Resource.newInstance(256, 1));
    MRHelpers.addMROutputLegacy(stage3Vertex, stage3Payload);

    Map<String, LocalResource> commonLocalResources = new HashMap<String, LocalResource>();
    Map<String, String> commonEnv = createCommonEnv();

    if (!genSplitsInAM) {
      // TODO Use utility method post TEZ-205.
      Map<String, LocalResource> stage1LocalResources = new HashMap<String, LocalResource>();
      stage1LocalResources.put(
          inputSplitInfo.getSplitsFile().getName(),
          createLocalResource(remoteFs, inputSplitInfo.getSplitsFile(),
              LocalResourceType.FILE, LocalResourceVisibility.APPLICATION));
      stage1LocalResources.put(
          inputSplitInfo.getSplitsMetaInfoFile().getName(),
          createLocalResource(remoteFs, inputSplitInfo.getSplitsMetaInfoFile(),
              LocalResourceType.FILE, LocalResourceVisibility.APPLICATION));
      stage1LocalResources.putAll(commonLocalResources);

      stage1Vertex.setTaskLocalResources(stage1LocalResources);
      stage1Vertex.setTaskLocationsHint(inputSplitInfo.getTaskLocationHints());
    } else {
      stage1Vertex.setTaskLocalResources(commonLocalResources);
    }

    stage1Vertex.setJavaOpts(MRHelpers.getMapJavaOpts(stage1Conf));
    stage1Vertex.setTaskEnvironment(commonEnv);

    // TODO env, resources

    stage2Vertex.setJavaOpts(MRHelpers.getReduceJavaOpts(stage2Conf));
    stage2Vertex.setTaskLocalResources(commonLocalResources);
    stage2Vertex.setTaskEnvironment(commonEnv);

    stage3Vertex.setJavaOpts(MRHelpers.getReduceJavaOpts(stage3Conf));
    stage3Vertex.setTaskLocalResources(commonLocalResources);
    stage3Vertex.setTaskEnvironment(commonEnv);

    dag.addVertex(stage1Vertex);
    dag.addVertex(stage2Vertex);
    dag.addVertex(stage3Vertex);
View Full Code Here

    while( iterator.hasNext() )
      {
      FlowNode flowNode = iterator.next();

      Vertex vertex = createVertex( flowProcess, initializedConfig, flowNode );
      dag.addVertex( vertex );

      vertexMap.put( flowNode, vertex );
      }

    LinkedList<ProcessGraph.ProcessEdge> processedEdges = new LinkedList<>();

    for( ProcessGraph.ProcessEdge processEdge : nodeGraph.edgeSet() )
      {
      if( processedEdges.contains( processEdge ) )
        continue;

      FlowNode edgeTargetFlowNode = nodeGraph.getEdgeTarget( processEdge );

      FlowElement flowElement = processEdge.getFlowElement();
      List<FlowNode> sourceNodes = nodeGraph.getElementSourceProcesses( flowElement );

      EdgeProperty edgeProperty = createEdgeProperty( initializedConfig, processEdge );

      Vertex targetVertex = vertexMap.get( edgeTargetFlowNode );

      if( sourceNodes.size() == 1 || flowElement instanceof CoGroup || flowElement instanceof Boundary ) // todo: create group vertices around incoming ordinal
        {
        FlowNode edgeSourceFlowNode = nodeGraph.getEdgeSource( processEdge );
        Vertex sourceVertex = vertexMap.get( edgeSourceFlowNode );

        LOG.debug( "adding edge between: {} and {}", sourceVertex, targetVertex );

        dag.addEdge( Edge.create( sourceVertex, targetVertex, edgeProperty ) );
        }
View Full Code Here

    int parallelism = getParallelism( flowNode, conf );

    if( parallelism == 0 )
      throw new FlowException( getName(), "the default number of gather partitions must be set, see cascading.flow.FlowRuntimeProps" );

    Vertex vertex = newVertex( flowNode, conf, parallelism );

    for( FlowElement flowElement : sourceConfigs.keySet() )
      {
      if( !( flowElement instanceof Tap ) )
        continue;

      Configuration sourceConf = sourceConfigs.get( flowElement );
      MRInput.MRInputConfigBuilder configBuilder = MRInput.createConfigBuilder( sourceConf, null );

      // grouping splits loses file name info, breaking partition tap default impl
      if( flowElement instanceof PartitionTap ) // todo: generify
        configBuilder.groupSplits( false );

      DataSourceDescriptor dataSourceDescriptor = configBuilder.build();

      vertex.addDataSource( FlowElements.id( flowElement ), dataSourceDescriptor );
      }

    for( FlowElement flowElement : sinkConfigs.keySet() )
      {
      if( !( flowElement instanceof Tap ) )
        continue;

      Configuration sinkConf = sinkConfigs.get( flowElement );

      Class outputFormatClass;
      String outputPath;

      // we have to set sane defaults if not set by the tap
      // typically the case of MultiSinkTap
      String formatClassName = sinkConf.get( "mapred.output.format.class", sinkConf.get( MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR ) );

      if( formatClassName == null )
        {
        outputFormatClass = TextOutputFormat.class; // unused, use "new" api, its the default
        outputPath = Hfs.getTempPath( sinkConf ).toString(); // unused
        }
      else
        {
        outputFormatClass = Util.loadClass( formatClassName );
        outputPath = sinkConf.get( "mapred.output.dir" );
        }

      if( outputPath == null && sinkConf.get( "mapred.output.dir" ) == null )
        outputPath = Hfs.getTempPath( sinkConf ).toString(); // unused

      MROutput.MROutputConfigBuilder configBuilder = MROutput.createConfigBuilder( sinkConf, outputFormatClass, outputPath );

      DataSinkDescriptor dataSinkDescriptor = configBuilder.build();

      vertex.addDataSink( FlowElements.id( flowElement ), dataSinkDescriptor );
      }

    addRemoteDebug( flowNode, vertex );
    addRemoteProfiling( flowNode, vertex );
View Full Code Here

    ProcessorDescriptor descriptor = ProcessorDescriptor.create( FlowProcessor.class.getName() );

    descriptor.setUserPayload( getPayload( conf ) );

    Vertex vertex = Vertex.create( flowNode.getID(), descriptor, parallelism );

    if( environment != null )
      vertex.setTaskEnvironment( environment );

    return vertex;
    }
View Full Code Here

TOP

Related Classes of org.apache.tez.dag.api.Vertex

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.