Examples of com.tdunning.plume.PCollection

com.tdunning.plume.PCollection
Parallel collection.

  @SuppressWarnings("unchecked")
  protected void reduce(final PlumeObject arg0, java.lang.Iterable<PlumeObject> values,
      Reducer<PlumeObject,PlumeObject,NullWritable,NullWritable>.Context arg2)
    throws IOException, InterruptedException {
    
    PCollection col  = mscr.getChannelByNumber().get(arg0.sourceId);
    OutputChannel oC = mscr.getOutputChannels().get(col);
    if(oC.reducer != null) {
      // apply reducer
      ParallelDo pDo = oC.reducer;
      DoFn reducer = pDo.getFunction(); // TODO how to check / report this

View Full Code Here

  @SuppressWarnings("unchecked")
  protected void reduce(final PlumeObject arg0, java.lang.Iterable<PlumeObject> values,
      Reducer<PlumeObject, PlumeObject, PlumeObject, PlumeObject>.Context context)
    throws IOException, InterruptedException {


    PCollection col  = mscr.getChannelByNumber().get(arg0.sourceId);
    OutputChannel oC = mscr.getOutputChannels().get(col);
    if(oC.combiner != null) {
      // Apply combiner function for this channel
      List<WritableComparable> vals = Lists.newArrayList();
      for(PlumeObject val: values) {

View Full Code Here

    public void build() {
      init();


      LazyPlume plume = new LazyPlume();
      // Get input files
      PCollection inputEvent2, inputLogFile;
      try {
        inputEvent2  = plume.readFile(inputPathEvent2, collectionOf(strings()));
        inputLogFile = plume.readFile(inputPathLogFile, collectionOf(strings()));
        // Add as inputs
        addInput(inputEvent2);
        addInput(inputLogFile);
      } catch (IOException e) {
        throw new RuntimeException();
      }      
        
      /**
       * We use flatten to aggregate one log file we have with a list of users that used one new event.
       * The list of users is converted to the log format before flattening by adding a date and a event name.
       */
      PCollection aggregateLog = plume.flatten(
        inputLogFile,
        inputEvent2.map(new DoFn<Text, Text>() {
          @Override
          public void process(Text v, EmitFn emitter) {
            emitter.emit(new Text(new SimpleDateFormat("yyyy/MM/dd").format(new Date())+"\t"+"new_event"+"\t"+v.toString()));
          }
        }, collectionOf(strings())));
        
      /**
       * We use the aggregate log to calculate a map of [date, user] -> #clicks
       */
      PCollection dateUserClicks = aggregateLog.map(new DoFn<Text, Pair>() {
          @Override
          public void process(Text v, EmitFn<Pair> emitter) {
            String[] splittedLine = v.toString().split("\t");
            Text dateUser = new Text(splittedLine[0]+"\t"+splittedLine[2]);
            emitter.emit(Pair.create(dateUser, new IntWritable(1)));
          }
        }, tableOf(strings(), integers()))
        .groupByKey()
        .combine(countCombiner)
        .map(countReduceToText, tableOf(strings(), strings()));


      /**
       * We use the aggregate log to calculate a map of [date] -> #clicks
       */
      PCollection dateClicks = aggregateLog.map(new DoFn<Text, Pair>() {
          @Override
          public void process(Text v, EmitFn<Pair> emitter) {
            String[] splittedLine = v.toString().split("\t");
            emitter.emit(Pair.create(new Text(splittedLine[0]), new IntWritable(1)));
          }
        }, tableOf(strings(), integers()))
        .groupByKey()
        .combine(countCombiner)
        .map(countReduceToText, tableOf(strings(), strings()));
  
      /**
       * We use the aggregate log to calculate a list of uniq users
       */
      PCollection uniqUsers = aggregateLog.map(new DoFn<Text, Pair>() {
        @Override
        public void process(Text v, EmitFn<Pair> emitter) {
          String[] splittedLine = v.toString().split("\t");
          emitter.emit(Pair.create(new Text(splittedLine[2]), new Text("")));
        }

View Full Code Here

    @Override
    public void build() {
      init();
      
      LazyPlume plume = new LazyPlume();
      PCollection input;
      try {
        // Read input
        input = plume.readFile("/tmp/input-wordcount.txt", collectionOf(strings()));
        // Add it as workflow's input
        addInput(input);
      } catch (IOException e) {
        throw new RuntimeException();
      }
      
      // Define the wordcount map
      DoFn wordCountMap = new DoFn() {
        @Override
        public void process(Object v, EmitFn emitter) {
          StringTokenizer itr = new StringTokenizer(v.toString());
          while (itr.hasMoreTokens()) {
            emitter.emit(Pair.create(new Text(itr.nextToken()), new IntWritable(1)));
          }
        }
      };


      // Define the wordcount output
      PCollection output = input.map(wordCountMap, tableOf(strings(), integers()))
        .groupByKey()
        .combine(countCombiner)
        .map(countReduceToText, tableOf(strings(), strings()));
      
      // Add wordcount's output as workflow's output

View Full Code Here

    @Override
    public void build() {
      init();
      
      LazyPlume plume = new LazyPlume();
      PCollection input;
      try {
        // Read input
        input = plume.readFile("/tmp/input-wordcount.txt", collectionOf(strings()));
        // Add it as workflow's input
        addInput(input);
      } catch (IOException e) {
        throw new RuntimeException();
      }
      
      PCollection output = input.map(new DoFn() {
        @Override
        public void process(Object v, EmitFn emitter) {
          Text t = (Text)v;
          // do some foo processing
          emitter.emit(Pair.create(t, new Text("foo")));

View Full Code Here

    public void build() {
      init();
      
      // Get one input file
      LazyPlume plume = new LazyPlume();
      PCollection input;
      try {
        input = plume.readFile("/tmp/input-wordcount.txt", collectionOf(strings()));
        // Add as input
        addInput(input);
      } catch (IOException e) {
        throw new RuntimeException();
      }
      
      final IntWritable one = new IntWritable(1);
      
      // Define a map that counts and group by #chars of line
      PCollection po1 = input.map(new DoFn() {
        @Override
        public void process(Object v, EmitFn emitter) {
          StringTokenizer itr = new StringTokenizer(v.toString());
          int length = 0;
          while (itr.hasMoreTokens()) {
            length += itr.nextToken().length();
          }
          emitter.emit(Pair.create(new IntWritable(length), one));
        }
      }, tableOf(integers(), integers()))
       .groupByKey()
       .map(countReduceToText, tableOf(integers(), strings()));
      
      // Define a map that counts and group by #tokens of line
      PCollection po2 = input.map(new DoFn() {
        @Override
        public void process(Object v, EmitFn emitter) {
          StringTokenizer itr = new StringTokenizer(v.toString());
          int length = 0;
          while (itr.hasMoreTokens()) {
            length ++;
            itr.nextToken();
          }
          emitter.emit(Pair.create(new IntWritable(length), one));
        }
      }, tableOf(integers(), integers()))
       .groupByKey()
       .map(countReduceToText, tableOf(integers(), strings()));
      
      // Define a map that counts appearances of chars
      PCollection po3 = input.map(new DoFn() {
        @Override
        public void process(Object v, EmitFn emitter) {
          StringTokenizer itr = new StringTokenizer(v.toString());
          while (itr.hasMoreTokens()) {
            String token = itr.nextToken();

View Full Code Here

    @Override
    public void build() {
      init();
      
      LazyPlume plume = new LazyPlume();
      PCollection input;
      PCollection input2;
      try {
        // Read input
        input = plume.readFile("/tmp/input-wordcount.txt", collectionOf(strings()));
        input2 = plume.readFile("/tmp/input-moretext.txt", collectionOf(strings()));
        // Add it as workflow's input
        addInput(input);
      } catch (IOException e) {
        throw new RuntimeException();
      }
      
      PCollection transform = input.map(new DoFn() {
        @Override
        public void process(Object v, EmitFn emitter) {
          Text t = (Text)v;
          emitter.emit(new Text(t.toString()+"-bar"));
        }}, collectionOf(strings()));
      
      addOutput(plume.flatten(input2, transform)); // flatten with another file
      
      PCollection groupedTransform = input.map(new DoFn() {
        @Override
        public void process(Object v, EmitFn emitter) {
          Text t = (Text)v;
          emitter.emit(Pair.create(t, new Text("foo")));
        }}, tableOf(strings(), strings())).groupByKey();

View Full Code Here

  @Test
  public void testFigure4() {
    // Get Plume runtime
    LazyPlume plume = new LazyPlume();
    // Create simple data 
    PCollection input1 = plume.fromJava(Lists.newArrayList(Pair.create(1, 1)));
    PCollection input2 = plume.fromJava(Lists.newArrayList(Pair.create(2, 2)));
    PCollection input3 = plume.fromJava(Lists.newArrayList(Pair.create(3, 3)));
    PCollection input4 = plume.fromJava(Lists.newArrayList(Pair.create(4, 4)));
    
    PCollection output1 = plume.flatten(tableOf(integers(), integers()),
        input1.map(identity, tableOf(integers(), integers())),   
        input2.map(identity, tableOf(integers(), integers())))
        .groupByKey();
        
    PCollection output2 = plume.flatten(tableOf(integers(), integers()),
        input2.map(identity, tableOf(integers(), integers())),
        input3.map(identity, tableOf(integers(), integers())),
        input4.map(identity, tableOf(integers(), integers())))
        .groupByKey()
        .combine(dummyCombiner)
        .map(identity, null);
    
    PCollection output3 = plume.flatten(tableOf(integers(), integers()),
        input4.map(identity, tableOf(integers(), integers())))
        .groupByKey()
        .map(identity, null);
    
    Optimizer optimizer = new Optimizer();

View Full Code Here

  @SuppressWarnings({ "rawtypes", "unchecked" })
  public void testFigure5() {
    // Get Plume runtime
    LazyPlume plume = new LazyPlume();
    // Create simple data 
    PCollection input1 = plume.fromJava(Lists.newArrayList(Pair.create(1, 1)));
    PCollection input2 = plume.fromJava(Lists.newArrayList(Pair.create(2, 2)));
    PCollection input3 = plume.fromJava(Lists.newArrayList(Pair.create(3, 3)));
    PCollection input4 = plume.fromJava(Lists.newArrayList(Pair.create(4, 4)));


    PCollection partial1 = input1.map(identity, tableOf(integers(), integers()));
    PCollection partial2 =
      plume.flatten(tableOf(integers(), integers()),
          input2.map(identity, tableOf(integers(), integers())),
          input3.map(identity, tableOf(integers(), integers()))
          .map(identity, null)
          .map(identity, null));
    
    PCollection partial3 =
      input4.map(identity, tableOf(integers(), integers()))
        .groupByKey()
        .combine(dummyCombiner)
        .map(identity, null);
  
    PCollection output = plume.flatten(tableOf(integers(), integers()), partial1, partial2, partial3)
      .groupByKey()
      .map(identity, null);
    
    Optimizer optimizer = new Optimizer();
    ExecutionStep step = optimizer.optimize(

View Full Code Here

    @SuppressWarnings("unchecked")
    @Override
    public void build() {
      init();
      LazyPlume plume = new LazyPlume();
      PCollection input;
      try {
        // Get input file
        input = plume.readFile(inputPath, tableOf(integers(), integers()));
        // Add as input for this workflow
        addInput(input);
      } catch (IOException e) {
       throw new RuntimeException(e);
      }
      // Define its output
      PCollection output = input.map(new DoFn<Pair<IntWritable, IntWritable>, Pair<IntWritable, IntWritable>>() {
        @Override
        public void process(Pair<IntWritable, IntWritable> v,
            EmitFn<Pair<IntWritable, IntWritable>> emitter) {
          emitter.emit(Pair.create(new IntWritable(v.getKey().get() + 1), new IntWritable(v.getValue().get() + 1)));
        }

View Full Code Here

0 1

TOP

Related Classes of com.tdunning.plume.PCollection

com.tdunning.plume.local.lazy.MapRedBypassTest$MapRedBypassWorkflow

com.tdunning.plume.local.lazy.MapRedFlattenTest$MapRedFlattenTestWorkflow

com.tdunning.plume.local.lazy.MapRedMultipleGroupsTest$MultipleGroupsWorkflow

com.tdunning.plume.local.lazy.MapRedOnlyFlattensTest$MapRedOnlyFlattensTestWorkflow

com.tdunning.plume.local.lazy.MapRedSequenceFileTest$OtherWorkflow

com.tdunning.plume.local.lazy.MapRedSingleFlattenChannelTest$MapRedSingleFlattenChannelTestWorkflow

com.tdunning.plume.local.lazy.MapRedTwoSequentialGBKTest$TwoSequentialGBKWorkflow

com.tdunning.plume.local.lazy.MapRedWordCountTest$WordCountWorkflow

com.tdunning.plume.local.lazy.MSCRCombiner

com.tdunning.plume.local.lazy.MSCRReducer

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.