Examples of org.apache.tez.runtime.library.conf.OrderedPartitionedKVEdgeConfig

Package org.apache.tez.runtime.library.conf

Examples of org.apache.tez.runtime.library.conf.OrderedPartitionedKVEdgeConfig

org.apache.tez.runtime.library.conf.OrderedPartitionedKVEdgeConfig
Configure payloads for the OrderedPartitionedKVOutput and OrderedGroupedKVInput pair
Values will be picked up from tez-site if not specified, otherwise defaults from {@link org.apache.tez.runtime.library.api.TezRuntimeConfiguration} will be used.

          partitionerConf = Maps.newHashMap();
          for (Map.Entry<String, String> entry : stageConfs[i - 1]) {
            partitionerConf.put(entry.getKey(), entry.getValue());
          }
        }
        OrderedPartitionedKVEdgeConfig edgeConf =
            OrderedPartitionedKVEdgeConfig.newBuilder(stageConfs[i - 1].get(
                    TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS),
                stageConfs[i - 1].get(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS),
                MRPartitioner.class.getName(), partitionerConf)
                .configureInput().useLegacyInput().done()
                .setFromConfiguration(stageConfs[i - 1]).build();
        Edge edge = Edge.create(vertices[i - 1], vertices[i], edgeConf.createDefaultEdgeProperty());
        dag.addEdge(edge);
      }


    }
    return dag;

View Full Code Here

    DAG dag = DAG.create("JoinValidate");


    // Configuration for intermediate output - shared by Vertex1 and Vertex2
    // This should only be setting selective keys from the underlying conf. Fix after there's a
    // better mechanism to configure the IOs.
    OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
        .newBuilder(Text.class.getName(), NullWritable.class.getName(),
            HashPartitioner.class.getName()).build();


    Vertex lhsVertex = Vertex.create(LHS_INPUT_NAME, ProcessorDescriptor.create(
        ForwardingProcessor.class.getName())).addDataSource("lhs",
        MRInput
            .createConfigBuilder(new Configuration(tezConf), TextInputFormat.class,
                lhs.toUri().toString()).groupSplits(false).build());


    Vertex rhsVertex = Vertex.create(RHS_INPUT_NAME, ProcessorDescriptor.create(
        ForwardingProcessor.class.getName())).addDataSource("rhs",
        MRInput
            .createConfigBuilder(new Configuration(tezConf), TextInputFormat.class,
                rhs.toUri().toString()).groupSplits(false).build());


    Vertex joinValidateVertex = Vertex.create("joinvalidate", ProcessorDescriptor.create(
        JoinValidateProcessor.class.getName()), numPartitions);


    Edge e1 = Edge.create(lhsVertex, joinValidateVertex, edgeConf.createDefaultEdgeProperty());
    Edge e2 = Edge.create(rhsVertex, joinValidateVertex, edgeConf.createDefaultEdgeProperty());


    dag.addVertex(lhsVertex).addVertex(rhsVertex).addVertex(joinValidateVertex).addEdge(e1)
        .addEdge(e2);
    return dag;
  }

View Full Code Here

    // edge. Internally, it sets up matching Tez inputs and outputs that can perform this logic.
    // We specify the key, value and partitioner type. Here the key type is Text (for word), the 
    // value type is IntWritable (for count) and we using a hash based partitioner. This is a helper
    // object. The edge can be configured by configuring the input, output etc individually without
    // using this helper.
    OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
        .newBuilder(Text.class.getName(), IntWritable.class.getName(),
            HashPartitioner.class.getName()).build();


    // Create a vertex that reads the tokenized data and calculates the sum using the SumProcessor.
    // The number of tasks that do the work of this vertex depends on the number of partitions used 
    // to distribute the sum processing. In this case, its been made configurable via the 
    // numPartitions parameter.
    Vertex summationVertex = Vertex.create(SUMMATION,
        ProcessorDescriptor.create(SumProcessor.class.getName()), numPartitions)
        .addDataSink(OUTPUT, dataSink);


    // No need to add jar containing this class as assumed to be part of the Tez jars. Otherwise 
    // we would have to add the jars for this code as local files to the vertices.
    
    // Create DAG and add the vertices. Connect the producer and consumer vertices via the edge
    DAG dag = DAG.create("WordCount");
    dag.addVertex(tokenizerVertex)
        .addVertex(summationVertex)
        .addEdge(
            Edge.create(tokenizerVertex, summationVertex, edgeConf.createDefaultEdgeProperty()));
    return dag;  
  }

View Full Code Here

    Vertex tokenizerVertex = Vertex.create(TOKENIZER, ProcessorDescriptor.create(
        TokenProcessor.class.getName()));
    tokenizerVertex.addDataSource(INPUT, dataSource);


    // Use Text key and IntWritable value to bring counts for each word in the same partition
    OrderedPartitionedKVEdgeConfig summationEdgeConf = OrderedPartitionedKVEdgeConfig
        .newBuilder(Text.class.getName(), IntWritable.class.getName(),
            HashPartitioner.class.getName()).build();


    // This vertex will be reading intermediate data via an input edge and writing intermediate data
    // via an output edge.
    Vertex summationVertex = Vertex.create(SUMMATION, ProcessorDescriptor.create(
        SumProcessor.class.getName()), numPartitions);
    
    // Use IntWritable key and Text value to bring all words with the same count in the same 
    // partition. The data will be ordered by count and words grouped by count.
    OrderedPartitionedKVEdgeConfig sorterEdgeConf = OrderedPartitionedKVEdgeConfig
        .newBuilder(IntWritable.class.getName(), Text.class.getName(),
            HashPartitioner.class.getName()).build();


    // Use 1 task to bring all the data in one place for global sorted order. Essentially the number
    // of partitions is 1. So the NoOpSorter can be used to produce the globally ordered output
    Vertex sorterVertex = Vertex.create(SORTER, ProcessorDescriptor.create(
        NoOpSorter.class.getName()), 1);
    sorterVertex.addDataSink(OUTPUT, dataSink);


    // No need to add jar containing this class as assumed to be part of the tez jars.
    
    DAG dag = DAG.create(dagName);
    dag.addVertex(tokenizerVertex)
        .addVertex(summationVertex)
        .addVertex(sorterVertex)
        .addEdge(
            Edge.create(tokenizerVertex, summationVertex,
                summationEdgeConf.createDefaultEdgeProperty()))
        .addEdge(
            Edge.create(summationVertex, sorterVertex, sorterEdgeConf.createDefaultEdgeProperty()));
    return dag;  
  }

View Full Code Here

    DAG dag = DAG.create("OrderedWordCount" + dagIndex);
    for (int i = 0; i < vertices.size(); ++i) {
      dag.addVertex(vertices.get(i));
    }


    OrderedPartitionedKVEdgeConfig edgeConf1 = OrderedPartitionedKVEdgeConfig
        .newBuilder(Text.class.getName(), IntWritable.class.getName(),
            HashPartitioner.class.getName()).setFromConfiguration(conf)
      .configureInput().useLegacyInput().done().build();
    dag.addEdge(
        Edge.create(dag.getVertex("initialmap"), dag.getVertex("intermediate_reducer"),
            edgeConf1.createDefaultEdgeProperty()));


    OrderedPartitionedKVEdgeConfig edgeConf2 = OrderedPartitionedKVEdgeConfig
        .newBuilder(IntWritable.class.getName(), Text.class.getName(),
            HashPartitioner.class.getName()).setFromConfiguration(conf)
            .configureInput().useLegacyInput().done().build();
    dag.addEdge(
        Edge.create(dag.getVertex("intermediate_reducer"), dag.getVertex("finalreduce"),
            edgeConf2.createDefaultEdgeProperty()));


    return dag;
  }

View Full Code Here

    }




    Map<String, String> partitionerConf = Maps.newHashMap();
    partitionerConf.put(MRJobConfig.PARTITIONER_CLASS_ATTR, MRRSleepJobPartitioner.class.getName());
    OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
        .newBuilder(IntWritable.class.getName(), IntWritable.class.getName(),
            HashPartitioner.class.getName(), partitionerConf).configureInput().useLegacyInput()
        .done().build();


    for (int i = 0; i < vertices.size(); ++i) {
      dag.addVertex(vertices.get(i));
      if (i != 0) {
        dag.addEdge(
            Edge.create(vertices.get(i - 1), vertices.get(i), edgeConf.createDefaultEdgeProperty()));
      }
    }


    return dag;
  }

View Full Code Here

    DataSinkDescriptor od1 = MROutput.createConfigBuilder(partsConf,
        TextOutputFormat.class, outputPath + "-parts").build();
    VertexGroup unionVertex = dag.createVertexGroup("union", mapVertex1, mapVertex2);
    unionVertex.addDataSink("parts", od1);


    OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
        .newBuilder(Text.class.getName(), IntWritable.class.getName(),
            HashPartitioner.class.getName()).build();


    dag.addVertex(mapVertex1)
        .addVertex(mapVertex2)
        .addVertex(mapVertex3)
        .addVertex(checkerVertex)
        .addEdge(
            Edge.create(mapVertex3, checkerVertex, edgeConf.createDefaultEdgeProperty()))
        .addEdge(
            GroupInputEdge.create(unionVertex, checkerVertex, edgeConf.createDefaultEdgeProperty(),
                InputDescriptor.create(
                    ConcatenatedMergedKeyValuesInput.class.getName())));
    return dag;  
  }

View Full Code Here

      return et3Conf.createDefaultEdgeProperty();
    case SIMPLE_EDGE:
    default:
      assert partitionerClassName != null;
      partitionerConf = createPartitionerConf(partitionerClassName, conf);
      OrderedPartitionedKVEdgeConfig et4Conf = OrderedPartitionedKVEdgeConfig
          .newBuilder(keyClass, valClass, MRPartitioner.class.getName(), partitionerConf)
          .setFromConfiguration(conf)
          .setKeySerializationClass(TezBytesWritableSerialization.class.getName(),
              TezBytesComparator.class.getName(), null)
          .setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null)
          .build();
      return et4Conf.createDefaultEdgeProperty();
    }
  }

View Full Code Here

TOP

Related Classes of org.apache.tez.runtime.library.conf.OrderedPartitionedKVEdgeConfig

org.apache.hadoop.hive.ql.exec.tez.DagUtils

org.apache.tez.dag.api.EdgeProperty

org.apache.tez.examples.JoinValidate

org.apache.tez.examples.OrderedWordCount

org.apache.tez.examples.WordCount

org.apache.tez.mapreduce.client.YARNRunner

org.apache.tez.mapreduce.examples.MRRSleepJob

org.apache.tez.mapreduce.examples.TestOrderedWordCount

org.apache.tez.mapreduce.examples.UnionExample

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.