Examples of storm.trident.TridentTopology.newStream()

storm.trident.TridentTopology.newStream()


    // Stream can be parallelized with "parallelismHint"
    // Parallelism hint is applied downwards until a partitioning operation (we will see this later).
    // This topology creates 5 spouts and 5 bolts:
    // Let's debug that with TridentOperationContext . partitionIndex !
    topology.newStream("parallel", spout).each(new Fields("text", "actor"), new PereTweetsFilter())
        .parallelismHint(5).each(new Fields("text", "actor"), new Utils.PrintFilter());


    // A stream can be partitioned in various ways.
    // Let's partition it by "actor". What happens with previous example?
    topology.newStream("parallel_and_partitioned", spout).partitionBy(new Fields("actor"))

View Full Code Here

    topology.newStream("parallel", spout).each(new Fields("text", "actor"), new PereTweetsFilter())
        .parallelismHint(5).each(new Fields("text", "actor"), new Utils.PrintFilter());


    // A stream can be partitioned in various ways.
    // Let's partition it by "actor". What happens with previous example?
    topology.newStream("parallel_and_partitioned", spout).partitionBy(new Fields("actor"))
        .each(new Fields("text", "actor"), new PereTweetsFilter()).parallelismHint(5)
        .each(new Fields("text", "actor"), new Utils.PrintFilter());


    // Only one partition is filtering, which makes sense for the case.
    // If we remove the partitionBy we get the previous behavior.

View Full Code Here

    // We need to specify a partitioning policy for that to happen.
    // (We said that parallelism hint is applied downwards until a partitioning operation is found).


    // But if we don't want to partition by any field, we can just use shuffle()
    // We could also choose global() - with care!
    topology.newStream("parallel_and_partitioned", spout).parallelismHint(1).shuffle()
        .each(new Fields("text", "actor"), new PereTweetsFilter()).parallelismHint(5)
        .each(new Fields("text", "actor"), new Utils.PrintFilter());


    // Because data is batched, we can aggregate batches for efficiency.
    // The aggregate primitive aggregates one full batch. Useful if we want to persist the result of each batch only

View Full Code Here


    // Because data is batched, we can aggregate batches for efficiency.
    // The aggregate primitive aggregates one full batch. Useful if we want to persist the result of each batch only
    // once.
    // The aggregation for each batch is executed in a random partition as can be seen:
    topology.newStream("aggregation", spout).parallelismHint(1)
        .aggregate(new Fields("location"), new LocationAggregator(), new Fields("aggregated_result"))
        .parallelismHint(5).each(new Fields("aggregated_result"), new Utils.PrintFilter());


    // The partitionAggregate on the other hand only executes the aggregator within one partition's part of the batch.
    // Let's debug that with TridentOperationContext . partitionIndex !

View Full Code Here

        .aggregate(new Fields("location"), new LocationAggregator(), new Fields("aggregated_result"))
        .parallelismHint(5).each(new Fields("aggregated_result"), new Utils.PrintFilter());


    // The partitionAggregate on the other hand only executes the aggregator within one partition's part of the batch.
    // Let's debug that with TridentOperationContext . partitionIndex !
    topology
        .newStream("partial_aggregation", spout)
        .parallelismHint(1)
        .shuffle()
        .partitionAggregate(new Fields("location"), new LocationAggregator(),
            new Fields("aggregated_result")).parallelismHint(6)

View Full Code Here


    // A useful primitive is groupBy.
    // It splits the stream into groups so that aggregations only ocurr within a group.
    // Because now we are grouping, the aggregation function can be much simpler (Count())
    // We don't need to use HashMaps anymore.
    topology.newStream("aggregation", spout).parallelismHint(1).groupBy(new Fields("location"))
        .aggregate(new Fields("location"), new Count(), new Fields("count")).parallelismHint(5)
        .each(new Fields("location", "count"), new Utils.PrintFilter());


    // EXERCISE: Use Functions and Aggregators to parallelize per-hashtag counts.
    // Step by step: 1) Obtain and select hashtags, 2) Write the Aggregator.

View Full Code Here


  public static StormTopology buildTopology(LocalDRPC drpc) throws IOException {
    FakeTweetsBatchSpout spout = new FakeTweetsBatchSpout();


    TridentTopology topology = new TridentTopology();
    topology.newStream("spout", spout).each(new Fields("id", "text", "actor", "location", "date"),
        new Utils.PrintFilter());


    return topology.build();
  }

View Full Code Here


  public static StormTopology buildTopology(LocalDRPC drpc) throws IOException {
    FakeTweetsBatchSpout spout = new FakeTweetsBatchSpout();


    TridentTopology topology = new TridentTopology();
    topology.newStream("spout", spout)
        .parallelismHint(2)
        .partitionBy(new Fields("actor"))
        // .shuffle()
        .each(new Fields("actor", "text"), new PerActorTweetsFilter("dave")).parallelismHint(5)
        .each(new Fields("actor", "text"), new Utils.PrintFilter());

View Full Code Here

  
  public static StormTopology buildTopology(LocalDRPC drpc) throws IOException {
    FakeTweetsBatchSpout spout = new FakeTweetsBatchSpout(100);


    TridentTopology topology = new TridentTopology();
    topology.newStream("spout", spout)
      .aggregate(new Fields("location"), new LocationAggregator(), new Fields("location_counts"))
      .each(new Fields("location_counts"), new Utils.PrintFilter());
    
    return topology.build();
  }

View Full Code Here


  public static StormTopology buildTopology(LocalDRPC drpc) throws IOException {
    FakeTweetsBatchSpout spout = new FakeTweetsBatchSpout(100);


    TridentTopology topology = new TridentTopology();
    topology.newStream("spout", spout)
      .groupBy(new Fields("location"))
      .aggregate(new Fields("location"), new Count(), new Fields("count"))
      .each(new Fields("location", "count"), new Utils.PrintFilter());
    
    return topology.build();

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.