Examples of com.datasalt.pangool.io.Tuple

com.datasalt.pangool.io.Tuple
This is the basic implementation of {@link ITuple}.

    @Override
    public void map(LongWritable key, Text value, TupleMRContext context, Collector collector)
        throws IOException, InterruptedException {


      // Just parsing the text input and emitting a Tuple
      Tuple tuple = new Tuple(schema);
      String[] fields = value.toString().trim().split("\t");
      tuple.set("url", fields[0]);
      tuple.set("date", fields[1]);
      tuple.set("visits", Integer.parseInt(fields[2]));
      collector.write(tuple);
    }

View Full Code Here

    job.addIntermediateSchema(INTERMEDIATE_SCHEMA);
    // perform per-category word count mapping
    job.addInput(new Path(inputExamples), new HadoopInputFormat(TextInputFormat.class),
        new TupleMapper<LongWritable, Text>() {


          ITuple tuple = new Tuple(INTERMEDIATE_SCHEMA);


          @Override
          public void map(LongWritable toIgnore, Text value, TupleMRContext context, Collector collector)
              throws IOException, InterruptedException {


            Category category = Category.valueOf(value.toString().split("\t")[0]);
            StringTokenizer itr = new StringTokenizer(value.toString().split("\t")[1]);
            tuple.set("category", category);
            tuple.set("count", 1);
            while(itr.hasMoreTokens()) {
              tuple.set("word", normalizeWord(itr.nextToken()));
              collector.write(tuple);
            }
          }
        });


    TupleReducer countReducer = new TupleReducer<ITuple, NullWritable>() {


      public void reduce(ITuple group, Iterable<ITuple> tuples, TupleMRContext context, Collector collector)
          throws IOException, InterruptedException, TupleMRException {
        int count = 0;
        ITuple outputTuple = null;
        for(ITuple tuple : tuples) {
          count += (Integer) tuple.get("count");
          outputTuple = tuple;
        }
        outputTuple.set("count", count);
        collector.write(outputTuple, NullWritable.get());
      }

View Full Code Here

  private static class UserActivityProcessor extends TupleMapper<LongWritable, Text> {


    private Tuple tuple;


    public void setup(TupleMRContext context, Collector collector) throws IOException, InterruptedException {
      this.tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema("my_schema"));
    }

View Full Code Here

  public static class CountCombinerHandler extends TupleReducer<ITuple, NullWritable> {


    private Tuple tuple;


    public void setup(TupleMRContext context, Collector collector) throws IOException, InterruptedException {
      tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema("my_schema"));
    }

View Full Code Here

      this.n = n;
    }


    public void setup(TupleMRContext context, Collector collector) throws IOException, InterruptedException,
        TupleMRException {
      outputCountTuple = new Tuple(getOutputCountSchema());
    };

View Full Code Here

  private static class TweetsMapper extends TupleMapper<AvroWrapper<Record>, NullWritable> {


    private Tuple tuple;


    public void setup(TupleMRContext context, Collector collector) throws IOException, InterruptedException {
      tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema("tweet"));
    };

View Full Code Here

  @SuppressWarnings("serial")
  private static class RetweetsMapper extends TupleMapper<LongWritable, Text> {
    private Tuple tuple;


    public void setup(TupleMRContext context, Collector collector) throws IOException, InterruptedException {
      tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema("retweet"));
    };

View Full Code Here

    job.setCustomPartitionFields("min");
    // Define the input and its associated mapper
    // The mapper will just emit the (min, max) pairs to the reduce stage
    job.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new TupleMapper<LongWritable, Text>() {


      Tuple tuple = new Tuple(schema);


      @Override
      public void map(LongWritable key, Text value, TupleMRContext context, Collector collector) throws IOException,
          InterruptedException {
        String[] fields = value.toString().split("\t");
        tuple.set("min", Integer.parseInt(fields[0]));
        tuple.set("max", Integer.parseInt(fields[1]));
        collector.write(tuple);
      }
    });


    // Define the reducer

View Full Code Here

    protected ObjectMapper mapper;


    public void setup(TupleMRContext context, Collector collector) 
        throws IOException, InterruptedException {
      this.mapper = new ObjectMapper();
      tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema(0));
      record = new Record(getAvroSchema());
      tuple.set("my_avro",record);
    };

View Full Code Here

    protected Tuple tuple;
    protected ObjectMapper mapper;


    public void setup(TupleMRContext context, Collector collector) throws IOException, InterruptedException {
      this.mapper = new ObjectMapper();
      tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema(0));
    };

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of com.datasalt.pangool.io.Tuple

com.datasalt.pangool.examples.avro.AvroCustomSerializationJob$TokenizeMapper

com.datasalt.pangool.examples.avro.AvroTopicalWordCount$TokenizeMapper

com.datasalt.pangool.examples.avro.AvroTweetsJoin$RetweetsMapper

com.datasalt.pangool.examples.avro.AvroTweetsJoin$TweetsMapper

com.datasalt.pangool.examples.avro.TestAvroTopicalWordCount

com.datasalt.pangool.examples.gameoflife.GameOfLifeJob

com.datasalt.pangool.examples.movingaverage.MovingAverage$URLVisitsProcessor

com.datasalt.pangool.examples.naivebayes.NaiveBayesClassifier

com.datasalt.pangool.examples.naivebayes.NaiveBayesGenerate

com.datasalt.pangool.examples.secondarysort.SecondarySort$IProcessor

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.