Package org.apache.tez.dag.api

Examples of org.apache.tez.dag.api.OutputDescriptor


        edgeDesc.setUserPayload(userPayload);
        edgeProperty =
          new EdgeProperty(edgeDesc,
              DataSourceType.PERSISTED,
              SchedulingType.SEQUENTIAL,
              new OutputDescriptor(logicalOutputClass.getName()),
              new InputDescriptor(logicalInputClass.getName()));
        break;

      case CUSTOM_SIMPLE_EDGE:
        dataMovementType = DataMovementType.SCATTER_GATHER;
        logicalOutputClass = OnFileUnorderedPartitionedKVOutput.class;
        logicalInputClass = ShuffledUnorderedKVInput.class;
        break;

      case SIMPLE_EDGE:
      default:
        dataMovementType = DataMovementType.SCATTER_GATHER;
        logicalOutputClass = OnFileSortedOutput.class;
        logicalInputClass = ShuffledMergedInputLegacy.class;
        break;
    }

    if (edgeProperty == null) {
      edgeProperty =
        new EdgeProperty(dataMovementType,
            DataSourceType.PERSISTED,
            SchedulingType.SEQUENTIAL,
            new OutputDescriptor(logicalOutputClass.getName()),
            new InputDescriptor(logicalInputClass.getName()));
    }

    return edgeProperty;
  }
View Full Code Here



    // final vertices need to have at least one output
    if (!hasChildren) {
      v.addOutput("out_"+work.getName(),
          new OutputDescriptor(MROutput.class.getName())
          .setUserPayload(MRHelpers.createUserPayloadFromConf(conf)));
    }

    return v;
  }
View Full Code Here

            throws IOException {
        TezEdgeDescriptor edge = to.inEdges.get(from.getOperatorKey());
        PhysicalPlan combinePlan = edge.combinePlan;

        InputDescriptor in = InputDescriptor.create(edge.inputClassName);
        OutputDescriptor out = OutputDescriptor.create(edge.outputClassName);

        Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties(), false);
        if (!combinePlan.isEmpty()) {
            addCombiner(combinePlan, to, conf);
        }

        List<POLocalRearrangeTez> lrs = PlanHelper.getPhysicalOperators(from.plan,
                POLocalRearrangeTez.class);

        for (POLocalRearrangeTez lr : lrs) {
            if (lr.getOutputKey().equals(to.getOperatorKey().toString())) {
                byte keyType = lr.getKeyType();
                setIntermediateOutputKeyValue(keyType, conf, to, lr.isConnectedToPackage());
                // In case of secondary key sort, main key type is the actual key type
                conf.set("pig.reduce.key.type", Byte.toString(lr.getMainKeyType()));
                break;
            }
        }

        conf.setIfUnset(TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS,
                MRPartitioner.class.getName());

        if (edge.getIntermediateOutputKeyClass() != null) {
            conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS,
                    edge.getIntermediateOutputKeyClass());
        }

        if (edge.getIntermediateOutputValueClass() != null) {
            conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS,
                    edge.getIntermediateOutputValueClass());
        }

        if (edge.getIntermediateOutputKeyComparatorClass() != null) {
            conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_COMPARATOR_CLASS,
                    edge.getIntermediateOutputKeyComparatorClass());
        }

        conf.setBoolean(MRConfiguration.MAPPER_NEW_API, true);
        conf.set("pig.pigContext", ObjectSerializer.serialize(pc));
        conf.set("udf.import.list",
                ObjectSerializer.serialize(PigContext.getPackageImportList()));

        if(to.isGlobalSort() || to.isLimitAfterSort()){
            conf.set("pig.sortOrder",
                    ObjectSerializer.serialize(to.getSortOrder()));
        }

        if (edge.isUseSecondaryKey()) {
            conf.set("pig.secondarySortOrder",
                    ObjectSerializer.serialize(edge.getSecondarySortOrder()));
            conf.set(org.apache.hadoop.mapreduce.MRJobConfig.PARTITIONER_CLASS_ATTR,
                    SecondaryKeyPartitioner.class.getName());
            // These needs to be on the vertex as well for POShuffleTezLoad to pick it up.
            // Tez framework also expects this to be per vertex and not edge. IFile.java picks
            // up keyClass and valueClass from vertex config. TODO - check with Tez folks
            // In MR - job.setSortComparatorClass() or MRJobConfig.KEY_COMPARATOR
            conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_COMPARATOR_CLASS,
                    PigSecondaryKeyComparator.class.getName());
            // In MR - job.setOutputKeyClass() or MRJobConfig.OUTPUT_KEY_CLASS
            conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, NullableTuple.class.getName());
            setGroupingComparator(conf, PigSecondaryKeyGroupComparator.class.getName());
        }

        if (edge.partitionerClass != null) {
            conf.set(org.apache.hadoop.mapreduce.MRJobConfig.PARTITIONER_CLASS_ATTR,
                    edge.partitionerClass.getName());
        }

        conf.set("udf.import.list",
                ObjectSerializer.serialize(PigContext.getPackageImportList()));

        MRToTezHelper.processMRSettings(conf, globalConf);

        in.setUserPayload(TezUtils.createUserPayloadFromConf(conf));
        out.setUserPayload(TezUtils.createUserPayloadFromConf(conf));

        if (edge.dataMovementType!=DataMovementType.BROADCAST && to.getEstimatedParallelism()!=-1 && (to.isGlobalSort()||to.isSkewedJoin())) {
            // Use custom edge
            return EdgeProperty.create((EdgeManagerPluginDescriptor)null,
                    edge.dataSourceType, edge.schedulingType, out, in);
View Full Code Here

            outputPayLoad.set(JobControlCompiler.PIG_MAP_STORES,
                    ObjectSerializer.serialize(emptyList));
            outputPayLoad.set(JobControlCompiler.PIG_REDUCE_STORES,
                    ObjectSerializer.serialize(singleStore));

            OutputDescriptor storeOutDescriptor = OutputDescriptor.create(
                    MROutput.class.getName()).setUserPayload(TezUtils
                    .createUserPayloadFromConf(outputPayLoad));
            if (tezOp.getVertexGroupStores() != null) {
                OperatorKey vertexGroupKey = tezOp.getVertexGroupStores().get(store.getOperatorKey());
                if (vertexGroupKey != null) {
View Full Code Here

      vertex.addDataSource("MRInput",
          configureMRInputWithLegacySplitsGenerated(stageConf, true));
    }
    // Map only jobs.
    if (stageNum == totalStages -1) {
      OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName())
          .setUserPayload(vertexUserPayload);
      vertex.addDataSink("MROutput", new DataSinkDescriptor(od,
          OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null));
    }
View Full Code Here

  }

  @Test(timeout=120000)
  public void testFailingCommitter() throws Exception {
    DAG dag = SimpleVTestDAG.createDAG("FailingCommitterDAG", null);
    OutputDescriptor od =
        OutputDescriptor.create(MultiAttemptDAG.NoOpOutput.class.getName());
    od.setUserPayload(UserPayload.create(ByteBuffer.wrap(
        new MultiAttemptDAG.FailingOutputCommitter.FailingOutputCommitterConfig(true)
            .toUserPayload())));
    OutputCommitterDescriptor ocd = OutputCommitterDescriptor.create(
        MultiAttemptDAG.FailingOutputCommitter.class.getName());
    dag.getVertex("v3").addDataSink("FailingOutput", new DataSinkDescriptor(od, ocd, null));
View Full Code Here

    dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
   
    // Third request - output
    MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
    OutputContext e3OutputContext1 = createTestOutputContext();
    OutputDescriptor e3OutDesc2 = createTestOutputDescriptor();
    dist.requestMemory(5000, e3Callback, e3OutputContext1, e3OutDesc2);
   
    dist.makeInitialAllocations();
   
    // Total available: 70% of 10K = 7000
View Full Code Here

    dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
   
    // Third request - output
    MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
    OutputContext e3OutputContext1 = createTestOutputContext();
    OutputDescriptor e3OutDesc1 = createTestOutputDescriptor();
    dist.requestMemory(5000, e3Callback, e3OutputContext1, e3OutDesc1);
   
    // Fourth request - processor
    MemoryUpdateCallbackForTest e4Callback = new MemoryUpdateCallbackForTest();
    ProcessorContext e4ProcessorContext1 = createTestProcessortContext();
View Full Code Here

    dist.requestMemory(10000, e2Callback, e2InputContext2, e2InDesc2);
   
    // Third request - output
    MemoryUpdateCallbackForTest e3Callback = new MemoryUpdateCallbackForTest();
    OutputContext e3OutputContext1 = createTestOutputContext();
    OutputDescriptor e3OutDesc2 = createTestOutputDescriptor();
    dist.requestMemory(5000, e3Callback, e3OutputContext1, e3OutDesc2);
   
    dist.makeInitialAllocations();
   
    // Total available: 50% of 10K = 7000
View Full Code Here

    doReturn("InputClass").when(desc).getClassName();
    return desc;
  }

  protected OutputDescriptor createTestOutputDescriptor() {
    OutputDescriptor desc = mock(OutputDescriptor.class);
    doReturn("OutputClass").when(desc).getClassName();
    return desc;
  }
View Full Code Here

TOP

Related Classes of org.apache.tez.dag.api.OutputDescriptor

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.