Package org.apache.tez.dag.api

Examples of org.apache.tez.dag.api.InputDescriptor


      this.assigned = assignedSize;
    }
  }

  private InputDescriptor createTestInputDescriptor(Class<? extends LogicalInput> inputClazz) {
    InputDescriptor desc = mock(InputDescriptor.class);
    doReturn(inputClazz.getName()).when(desc).getClassName();
    return desc;
  }
View Full Code Here


   
    Path mapInput = new Path(workDir, "map0");
    MapUtils.generateInputSplit(localFs, workDir, mapConf, mapInput);
   
    InputSpec mapInputSpec = new InputSpec("NullSrcVertex",
        new InputDescriptor(MRInputLegacy.class.getName())
            .setUserPayload(MRHelpers.createMRInputPayload(mapConf, null)),
        0);
    OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex", new OutputDescriptor(LocalOnFileSorterOutput.class.getName()), 1);
    // Run a map
    LogicalIOProcessorRuntimeTask mapTask = MapUtils.createLogicalTask(localFs, workDir, mapConf, 0,
        mapInput, new TestUmbilical(), dagName, mapVertexName,
        Collections.singletonList(mapInputSpec),
        Collections.singletonList(mapOutputSpec));

    mapTask.initialize();
    mapTask.run();
    mapTask.close();
   
    LOG.info("Starting reduce...");
   
    Token<JobTokenIdentifier> shuffleToken = new Token<JobTokenIdentifier>();
   
    Configuration reduceStageConf = MultiStageMRConfigUtil.getConfForVertex(conf,
        reduceVertexName);
    JobConf reduceConf = new JobConf(reduceStageConf);
    reduceConf.setOutputFormat(SequenceFileOutputFormat.class);
    reduceConf.set(TezJobConfig.TASK_LOCAL_RESOURCE_DIR, new Path(workDir,
        "localized-resources").toUri().toString());
    FileOutputFormat.setOutputPath(reduceConf, new Path(workDir, "output"));
    ProcessorDescriptor reduceProcessorDesc = new ProcessorDescriptor(
        ReduceProcessor.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(reduceConf));
   
    InputSpec reduceInputSpec = new InputSpec(mapVertexName,
        new InputDescriptor(LocalMergedInput.class.getName()), 1);
    OutputSpec reduceOutputSpec = new OutputSpec("NullDestinationVertex",
        new OutputDescriptor(MROutputLegacy.class.getName()), 1);

    // Now run a reduce
    TaskSpec taskSpec = new TaskSpec(
View Full Code Here

    dag.addVertex(stage3Vertex);

    Edge edge1 = new Edge(stage1Vertex, stage2Vertex, new EdgeProperty(
        DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED,
        SchedulingType.SEQUENTIAL, new OutputDescriptor(
        OnFileSortedOutput.class.getName()), new InputDescriptor(
                ShuffledMergedInputLegacy.class.getName())));
    Edge edge2 = new Edge(stage2Vertex, stage3Vertex, new EdgeProperty(
        DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED,
        SchedulingType.SEQUENTIAL, new OutputDescriptor(
        OnFileSortedOutput.class.getName()), new InputDescriptor(
                ShuffledMergedInputLegacy.class.getName())));

    dag.addEdge(edge1);
    dag.addEdge(edge2);
View Full Code Here

   
   
    MapUtils.generateInputSplit(localFs, workDir, job, mapInput);
   
    InputSpec mapInputSpec = new InputSpec("NullSrcVertex",
        new InputDescriptor(MRInputLegacy.class.getName())
            .setUserPayload(MRHelpers.createMRInputPayload(job, null)),
        0);
    OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex", new OutputDescriptor(LocalOnFileSorterOutput.class.getName()), 1);

    LogicalIOProcessorRuntimeTask task = MapUtils.createLogicalTask(localFs, workDir, job, 0,
View Full Code Here

  @Test(timeout=120000)
  public void testDelayedInit() throws Exception {
    DAG dag = SimpleVTestDAG.createDAG("DelayedInitDAG", null);
    dag.getVertex("v1").addInput("i1",
        new InputDescriptor(NoOpInput.class.getName()),
        FailingInputInitializer.class);
    runDAGAndVerify(dag, State.SUCCEEDED);
  }
View Full Code Here

      if (i > 0) {
        EdgeProperty edgeProperty = new EdgeProperty(
            DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED,
            SchedulingType.SEQUENTIAL,
            new OutputDescriptor(OnFileSortedOutput.class.getName()),
            new InputDescriptor(ShuffledMergedInputLegacy.class.getName()));

        Edge edge = null;
        edge = new Edge(vertices[i - 1], vertices[i], edgeProperty);
        dag.addEdge(edge);
      }
View Full Code Here

   * @param userPayload
   * @param initClazz class to init the input in the AM
   */
  public static void addMRInput(Vertex vertex, byte[] userPayload,
      Class<? extends TezRootInputInitializer> initClazz) {
    InputDescriptor id = new InputDescriptor(MRInputLegacy.class.getName())
        .setUserPayload(userPayload);
    vertex.addInput("MRInput", id, initClazz);
  }
View Full Code Here

    // Configure the Input for stage1
    Class<? extends TezRootInputInitializer> initializerClazz = generateSplitsInClient ? null
        : MRInputAMSplitGenerator.class;
    stage1Vertex.addInput("MRInput",
        new InputDescriptor(MRInputLegacy.class.getName())
            .setUserPayload(MRHelpers.createMRInputPayload(stage1Payload, null)),
        initializerClazz);

    // Setup stage2 Vertex
    Vertex stage2Vertex = new Vertex("stage2", new ProcessorDescriptor(
        FilterByWordOutputProcessor.class.getName()).setUserPayload(MRHelpers
        .createUserPayloadFromConf(stage2Conf)), 1,
        MRHelpers.getReduceResource(stage2Conf));
    stage2Vertex.setJavaOpts(MRHelpers.getReduceJavaOpts(stage2Conf)).setTaskLocalResources(commonLocalResources);
    Map<String, String> stage2Env = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(stage2Conf, stage2Env, false);
    stage2Vertex.setTaskEnvironment(stage2Env);

    // Configure the Output for stage2
    OutputDescriptor od = new OutputDescriptor(MROutput.class.getName())
        .setUserPayload(MRHelpers.createUserPayloadFromConf(stage2Conf));
    stage2Vertex.addOutput("MROutput", od, MROutputCommitter.class);

    DAG dag = new DAG("FilterLinesByWord");
    Edge edge = new Edge(stage1Vertex, stage2Vertex, new EdgeProperty(
        DataMovementType.BROADCAST, DataSourceType.PERSISTED,
        SchedulingType.SEQUENTIAL, new OutputDescriptor(
            OnFileUnorderedKVOutput.class.getName()), new InputDescriptor(
            ShuffledUnorderedKVInput.class.getName())));
    dag.addVertex(stage1Vertex).addVertex(stage2Vertex).addEdge(edge);

    LOG.info("Submitting DAG to Tez Session");
    DAGClient dagClient = tezSession.submitDAG(dag);
View Full Code Here

    tokenizerVertex.setJavaOpts(MRHelpers.getMapJavaOpts(mapStageConf));
    Map<String, String> mapEnv = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(mapStageConf, mapEnv, true);
    tokenizerVertex.setTaskEnvironment(mapEnv);
    Class<? extends TezRootInputInitializer> initializerClazz = MRInputAMSplitGenerator.class;
    InputDescriptor id = new InputDescriptor(MRInput.class.getName()).
        setUserPayload(mapInputPayload);
    tokenizerVertex.addInput("MRInput", id, initializerClazz);

    byte[] finalReducePayload = MRHelpers.createUserPayloadFromConf(finalReduceConf);
    Vertex summerVertex = new Vertex("summer",
        new ProcessorDescriptor(
            SumProcessor.class.getName()).setUserPayload(finalReducePayload),
                1, MRHelpers.getReduceResource(finalReduceConf));
    summerVertex.setJavaOpts(
        MRHelpers.getReduceJavaOpts(finalReduceConf));
    Map<String, String> reduceEnv = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(finalReduceConf, reduceEnv, false);
    summerVertex.setTaskEnvironment(reduceEnv);
    OutputDescriptor od = new OutputDescriptor(MROutput.class.getName())
        .setUserPayload(finalReducePayload);
    summerVertex.addOutput("MROutput", od, MROutputCommitter.class);
   
    DAG dag = new DAG("WordCount");
    dag.addVertex(tokenizerVertex)
        .addVertex(summerVertex)
        .addEdge(
            new Edge(tokenizerVertex, summerVertex, new EdgeProperty(
                DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED,
                SchedulingType.SEQUENTIAL,
                new OutputDescriptor(OnFileSortedOutput.class.getName())
                        .setUserPayload(mapPayload),
                new InputDescriptor(ShuffledMergedInput.class.getName())
                        .setUserPayload(finalReducePayload))));
    return dag; 
  }
View Full Code Here

    mapVertex1.setJavaOpts(MRHelpers.getMapJavaOpts(mapStageConf));
    Map<String, String> mapEnv = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(mapStageConf, mapEnv, true);
    mapVertex1.setTaskEnvironment(mapEnv);
    Class<? extends TezRootInputInitializer> initializerClazz = MRInputAMSplitGenerator.class;
    InputDescriptor id = new InputDescriptor(MRInput.class.getName()).
        setUserPayload(mapInputPayload);
    mapVertex1.addInput("MRInput", id, initializerClazz);

    Vertex mapVertex2 = new Vertex("map2", new ProcessorDescriptor(
        TokenProcessor.class.getName()),
        numMaps, MRHelpers.getMapResource(mapStageConf));
    mapVertex2.setJavaOpts(MRHelpers.getMapJavaOpts(mapStageConf));
    MRHelpers.updateEnvironmentForMRTasks(mapStageConf, mapEnv, true);
    mapVertex2.setTaskEnvironment(mapEnv);
    mapVertex2.addInput("MRInput", id, initializerClazz);

    Vertex mapVertex3 = new Vertex("map3", new ProcessorDescriptor(
        TokenProcessor.class.getName()),
        numMaps, MRHelpers.getMapResource(mapStageConf));
    mapVertex3.setJavaOpts(MRHelpers.getMapJavaOpts(mapStageConf));
    MRHelpers.updateEnvironmentForMRTasks(mapStageConf, mapEnv, true);
    mapVertex3.setTaskEnvironment(mapEnv);
    mapVertex3.addInput("MRInput", id, initializerClazz);
   
    byte[] finalReducePayload = MRHelpers.createUserPayloadFromConf(finalReduceConf);
    Vertex checkerVertex = new Vertex("checker",
        new ProcessorDescriptor(
            UnionProcessor.class.getName()).setUserPayload(finalReducePayload),
                1, MRHelpers.getReduceResource(finalReduceConf));
    checkerVertex.setJavaOpts(
        MRHelpers.getReduceJavaOpts(finalReduceConf));
    Map<String, String> reduceEnv = new HashMap<String, String>();
    MRHelpers.updateEnvironmentForMRTasks(finalReduceConf, reduceEnv, false);
    checkerVertex.setTaskEnvironment(reduceEnv);
    OutputDescriptor od = new OutputDescriptor(MROutput.class.getName())
      .setUserPayload(finalReducePayload);
    checkerVertex.addOutput("union", od, MROutputCommitter.class);

    Configuration partsConf = new Configuration(finalReduceConf);
    partsConf.set(FileOutputFormat.OUTDIR, outputPath+"-parts");
    byte[] partsPayload = MRHelpers.createUserPayloadFromConf(partsConf);
   
    DAG dag = new DAG("UnionExample");
   
    VertexGroup unionVertex = dag.createVertexGroup("union", mapVertex1, mapVertex2);
    OutputDescriptor od1 = new OutputDescriptor(MROutput.class.getName())
      .setUserPayload(partsPayload);
    Configuration allPartsConf = new Configuration(finalReduceConf);
    allPartsConf.set(FileOutputFormat.OUTDIR, outputPath+"-all-parts");
    byte[] allPartsPayload = MRHelpers.createUserPayloadFromConf(allPartsConf);
    OutputDescriptor od2 = new OutputDescriptor(MROutput.class.getName())
      .setUserPayload(allPartsPayload);
    unionVertex.addOutput("parts", od1, MROutputCommitter.class);
    checkerVertex.addOutput("all-parts", od2, MROutputCommitter.class);
   
   
    dag.addVertex(mapVertex1)
        .addVertex(mapVertex2)
        .addVertex(mapVertex3)
        .addVertex(checkerVertex)
        .addEdge(
            new Edge(mapVertex3, checkerVertex, new EdgeProperty(
                DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED,
                SchedulingType.SEQUENTIAL,
                new OutputDescriptor(OnFileSortedOutput.class.getName())
                        .setUserPayload(mapPayload),
                new InputDescriptor(ShuffledMergedInput.class.getName())
                        .setUserPayload(finalReducePayload))))
        .addEdge(
            new GroupInputEdge(unionVertex, checkerVertex, new EdgeProperty(
                DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED,
                SchedulingType.SEQUENTIAL,
                new OutputDescriptor(OnFileSortedOutput.class.getName())
                    .setUserPayload(mapPayload),
                new InputDescriptor(ShuffledMergedInput.class.getName())
                    .setUserPayload(finalReducePayload)),
                new InputDescriptor(
                    ConcatenatedMergedKeyValuesInput.class.getName())));
    return dag; 
  }
View Full Code Here

TOP

Related Classes of org.apache.tez.dag.api.InputDescriptor

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.