Examples of com.datasalt.pangool.io.Tuple

com.datasalt.pangool.io.Tuple
This is the basic implementation of {@link ITuple}.

  public static class MyHandler extends TupleMapper<LongWritable, Text> {


    @Override
    public void map(LongWritable key, Text value, TupleMRContext context, Collector collector) throws IOException,
        InterruptedException {
      ITuple tuple = new Tuple(schema1);
      tuple.set("a", (int) (Math.random() * 1000));
      tuple.set("b", value.toString());


      ITuple mTuple = new Tuple(getMetaSchema1());
      mTuple.set("partition", (int) (Math.random() * 10));
      mTuple.set("tuple", tuple);


      ITuple mTuple2 = new Tuple(getMetaSchema2());
      mTuple2.set("group", value.toString());
      mTuple2.set("metatuple", mTuple);


      collector.write(mTuple2);
    }

View Full Code Here

    private Tuple tuple;


    public void setup(TupleMRContext context, Collector collector) throws IOException,
        InterruptedException {
      Schema schema = context.getTupleMRConfig().getIntermediateSchema(0);
      this.tuple = new Tuple(schema);
      tuple.set("count", 1);
    }

View Full Code Here

    private Tuple tuple;


    public void setup(TupleMRContext context, Collector collector) throws IOException,
        InterruptedException {
      Schema schema = context.getTupleMRConfig().getIntermediateSchema("schema");
      this.tuple = new Tuple(schema);
    }

View Full Code Here

    Schema targetSchema = new Schema("target", Fields.parse("a:string, b:int?, c:double, d:long?, e:boolean?"));
    
    Configuration conf = new Configuration();
    HadoopSerialization hadoopSerDe = new HadoopSerialization(conf);


    ITuple tuple = new Tuple(schema);
    tuple.set("a", "foo");
    tuple.set("b", 10);
    tuple.set("c", 5d);
    
    SimpleTupleSerializer ser = new SimpleTupleSerializer(schema, hadoopSerDe, conf);
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    ser.open(bos);
    
    for(int i = 0; i < 10; i++) {
      ser.serialize(tuple);
    }
    
    ser.close();
    
    bos.close();
    ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
    
    SimpleTupleDeserializer des = new SimpleTupleDeserializer(schema, targetSchema, hadoopSerDe, conf);
    des.open(bis);
    
    ITuple targetTuple = new Tuple(targetSchema);
    for(int i = 0; i < 10; i++) {
      des.deserialize(targetTuple);
    }
    
    assertEquals("foo", targetTuple.getString("a"));
    assertEquals(10, targetTuple.get("b"));
    assertEquals(5d, targetTuple.get("c"));
    assertNull(targetTuple.get("d"));
    assertNull(targetTuple.get("e"));
    
    // Something important is that if we read a file that doesn't contains a field
    // just after a file that contains this field, we should clear the field even
    // in the case that no default value was provided.
    schema = new Schema("schema", Fields.parse("a:string, c:double"));
    tuple = new Tuple(schema);
    tuple.set("a", "foo");
    tuple.set("c", 5d);
    
    bos = new ByteArrayOutputStream();
    ser = new SimpleTupleSerializer(schema, hadoopSerDe, conf);
    ser.open(bos);
    
    for(int i = 0; i < 10; i++) {
      ser.serialize(tuple);
    }
    
    ser.close();
    bos.close();    
    bis = new ByteArrayInputStream(bos.toByteArray());    
    des = new SimpleTupleDeserializer(schema, targetSchema, hadoopSerDe, conf);
    des.open(bis);
    
    for(int i = 0; i < 10; i++) {
      des.deserialize(targetTuple);
    }
    
    assertEquals("foo", targetTuple.getString("a"));
    assertNull(targetTuple.get("b"));
    assertEquals(5d, targetTuple.get("c"));
    assertNull(targetTuple.get("d"));
    assertNull(targetTuple.get("e"));
    
    bis.close();
  }

View Full Code Here


    Configuration conf = getConf();
    String input = TestTupleMRJob.class + "-input";
    String output = TestTupleMRJob.class + "-output";


    ITuple tuple = new Tuple(SCHEMA);
    for(int i = 0; i < NUM_ROWS_TO_GENERATE; i++) {
      withTupleInput(input, fillTuple(true, tuple));
    }


    TupleMRBuilder builder = new TupleMRBuilder(getConf(), "test");

View Full Code Here

    String input2 = TestTupleMRJob.class.getCanonicalName() + "-input2";
    String output = TestTupleMRJob.class.getCanonicalName() + "-output";


    final Schema schemaNoNulls = new Schema("NoNulls", Fields.parse("f1:int,f2:string"));
    final Schema schemaNulls = new Schema("Nulls", Fields.parse("f1:int?,f2:string?"));
    Tuple t1 = new Tuple(schemaNoNulls);
    Tuple t2 = new Tuple(schemaNulls);


    t1.set(0, 0);
    t1.set(1, "nn");
    withTupleInput(input1, t1);


    Object tuples[][] = new Object[][] { new Object[] { 0, null }, new Object[] { 0, "n1" },
        new Object[] { null, "n2" } };
    for(Object[] tuple : tuples) {
      t2.set(0, tuple[0]);
      t2.set(1, tuple[1]);
      withTupleInput(input2, t2);
    }


    TupleMRBuilder builder = new TupleMRBuilder(getConf(), "test");
    builder.addTupleInput(new Path(input1), new IdentityTupleMapper());
    builder.addTupleInput(new Path(input2), new IdentityTupleMapper());


    builder.setTupleReducer(new TupleReducer<ITuple, NullWritable>() {
      @Override
      public void reduce(ITuple group, Iterable<ITuple> tuples, TupleMRContext context,
          Collector collector) throws IOException, InterruptedException, TupleMRException {
        int count = 0;
        for(ITuple tuple : tuples) {
          Tuple t = new Tuple(schemaNulls);
          t.set(0, tuple.get(0));
          t.set(1, tuple.get(1));
          collector.write(t, NullWritable.get());
          count++;
        }
        if(group.get(0) == null) {
          assertEquals(1, count);
        } else if(((Integer) group.get(0)) == 0) {
          assertEquals(3, count);
        }
      }
    });
    builder.addIntermediateSchema(schemaNoNulls);
    builder.addIntermediateSchema(schemaNulls);
    builder.setGroupByFields("f1");
    builder.setOrderBy(OrderBy.parse("f1:desc|null_smallest").addSchemaOrder(Criteria.Order.ASC));
    builder.setSpecificOrderBy("NoNulls", OrderBy.parse("f2:asc|null_biggest"));
    builder.setSpecificOrderBy("Nulls", OrderBy.parse("f2:asc|null_biggest"));
    builder.setTupleOutput(new Path(output), schemaNulls);


    Job job = builder.createJob();
    job.setNumReduceTasks(1);
    try {
      assertRun(job);
    } finally {
      builder.cleanUpInstanceFiles();
    }


    final Object expectedOutput[][] = new Object[][] { new Object[] { 0, "nn" },
        new Object[] { 0, "n1" }, new Object[] { 0, null }, new Object[] { null, "n2" } };


    boolean debug = false;
    if(debug) {
      readTuples(new Path(output + "/part-r-00000"), getConf(), new TupleVisitor() {
        @Override
        public void onTuple(ITuple t) {
          System.out.println(t);
        }
      });
    }


    readTuples(new Path(output + "/part-r-00000"), getConf(), new TupleVisitor() {
      int i = 0;


      @Override
      public void onTuple(ITuple t) {
        assertEqualsNull(expectedOutput[i][0], t.get(0));
        Object f2 = t.get(1);
        f2 = (f2 != null) ? f2.toString() : f2;
        assertEqualsNull(expectedOutput[i][1], f2);
        i++;
      }
    });

View Full Code Here


    public void setup(TupleMRContext context, Collector collector) throws IOException,
        InterruptedException {
      Schema peopleSchema = context.getTupleMRConfig().getIntermediateSchema("user");
      Schema countrySchema = context.getTupleMRConfig().getIntermediateSchema("country");
      user = new Tuple(peopleSchema);
      country = new Tuple(countrySchema);
    }

View Full Code Here

    // Define the input and its associated mapper.
    // We'll just have a Mapper, reducer will be Identity
    job.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class),
        new TupleMapper<LongWritable, Text>() {


          Tuple tuple = new Tuple(schema);


          @Override
          public void map(LongWritable key, Text value, TupleMRContext context, Collector collector)
              throws IOException, InterruptedException {
            String[] fields = value.toString().split("\t");
            String language = fields[1];
            tuple.set("user_id", fields[0]);
            tuple.set("message", fields[2]);
            if(language.equals("en")) {
              // English -> write to main output
              collector.write(tuple);
            } else if(language.equals("fr")) {
              // French -> write to french index

View Full Code Here

    Schema targetSchema = Mutator.superSetOf(schema, cField, dField);
    
    Configuration conf = new Configuration();
    HadoopSerialization hadoopSerDe = new HadoopSerialization(conf);


    ITuple tuple = new Tuple(schema);
    tuple.set("a", "foo");
    tuple.set("b", 10);
    
    SimpleTupleSerializer ser = new SimpleTupleSerializer(schema, hadoopSerDe, conf);
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    ser.open(bos);
    
    for(int i = 0; i < 100000; i++) {
      ser.serialize(tuple);
    }
    
    ser.close();
    
    bos.close();
    ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
    
    SimpleTupleDeserializer des = new SimpleTupleDeserializer(schema, targetSchema, hadoopSerDe, conf);
    des.open(bis);
    
    ITuple targetTuple = new Tuple(targetSchema);
    long start = System.currentTimeMillis();
    for(int i = 0; i < 100000; i++) {
      des.deserialize(targetTuple);
    }
    long end = System.currentTimeMillis();
    System.out.println(end - start);
    
    assertEquals("foo", targetTuple.getString("a"));
    assertEquals(10, targetTuple.get("b"));
    assertEquals(100d, targetTuple.get("c"));
    assertEquals(1000l, targetTuple.get("d"));
  }

View Full Code Here

  public static int assertOutput(String output, Configuration conf) throws NumberFormatException, IOException, InterruptedException {
    int validatedOutputLines = 0;


    Path outPath = new Path(output);
    TupleFile.Reader reader = new TupleFile.Reader(FileSystem.get(outPath.toUri(), conf), conf, outPath);
    Tuple tuple = new Tuple(reader.getSchema());


    while(reader.next(tuple)) {
      int topicId = (Integer) tuple.get("topic");
      String word = ((Utf8) tuple.get("word")).toString();
      int count   = (Integer) tuple.get("count");
      if(topicId == 1) {
        if(word.equals("bar") || word.equals("foo")) {
          assertEquals(2, count);
          validatedOutputLines++;
        } else if(word.equals("blah") || word.equals("bloh")) {

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of com.datasalt.pangool.io.Tuple

com.datasalt.pangool.examples.avro.AvroCustomSerializationJob$TokenizeMapper

com.datasalt.pangool.examples.avro.AvroTopicalWordCount$TokenizeMapper

com.datasalt.pangool.examples.avro.AvroTweetsJoin$RetweetsMapper

com.datasalt.pangool.examples.avro.AvroTweetsJoin$TweetsMapper

com.datasalt.pangool.examples.avro.TestAvroTopicalWordCount

com.datasalt.pangool.examples.gameoflife.GameOfLifeJob

com.datasalt.pangool.examples.movingaverage.MovingAverage$URLVisitsProcessor

com.datasalt.pangool.examples.naivebayes.NaiveBayesClassifier

com.datasalt.pangool.examples.naivebayes.NaiveBayesGenerate

com.datasalt.pangool.examples.secondarysort.SecondarySort$IProcessor

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.