Package com.datasalt.pangool.io

Examples of com.datasalt.pangool.io.Tuple$IDontKnowHowToCopyThisStuff


  public static class MyHandler extends TupleMapper<LongWritable, Text> {

    @Override
    public void map(LongWritable key, Text value, TupleMRContext context, Collector collector) throws IOException,
        InterruptedException {
      ITuple tuple = new Tuple(schema1);
      tuple.set("a", (int) (Math.random() * 1000));
      tuple.set("b", value.toString());

      ITuple mTuple = new Tuple(getMetaSchema1());
      mTuple.set("partition", (int) (Math.random() * 10));
      mTuple.set("tuple", tuple);

      ITuple mTuple2 = new Tuple(getMetaSchema2());
      mTuple2.set("group", value.toString());
      mTuple2.set("metatuple", mTuple);

      collector.write(mTuple2);
    }
View Full Code Here


    private Tuple tuple;

    public void setup(TupleMRContext context, Collector collector) throws IOException,
        InterruptedException {
      Schema schema = context.getTupleMRConfig().getIntermediateSchema(0);
      this.tuple = new Tuple(schema);
      tuple.set("count", 1);
    }
View Full Code Here

    private Tuple tuple;

    public void setup(TupleMRContext context, Collector collector) throws IOException,
        InterruptedException {
      Schema schema = context.getTupleMRConfig().getIntermediateSchema("schema");
      this.tuple = new Tuple(schema);
    }
View Full Code Here

    Schema targetSchema = new Schema("target", Fields.parse("a:string, b:int?, c:double, d:long?, e:boolean?"));
   
    Configuration conf = new Configuration();
    HadoopSerialization hadoopSerDe = new HadoopSerialization(conf);

    ITuple tuple = new Tuple(schema);
    tuple.set("a", "foo");
    tuple.set("b", 10);
    tuple.set("c", 5d);
   
    SimpleTupleSerializer ser = new SimpleTupleSerializer(schema, hadoopSerDe, conf);
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    ser.open(bos);
   
    for(int i = 0; i < 10; i++) {
      ser.serialize(tuple);
    }
   
    ser.close();
   
    bos.close();
    ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
   
    SimpleTupleDeserializer des = new SimpleTupleDeserializer(schema, targetSchema, hadoopSerDe, conf);
    des.open(bis);
   
    ITuple targetTuple = new Tuple(targetSchema);
    for(int i = 0; i < 10; i++) {
      des.deserialize(targetTuple);
    }
   
    assertEquals("foo", targetTuple.getString("a"));
    assertEquals(10, targetTuple.get("b"));
    assertEquals(5d, targetTuple.get("c"));
    assertNull(targetTuple.get("d"));
    assertNull(targetTuple.get("e"));
   
    // Something important is that if we read a file that doesn't contains a field
    // just after a file that contains this field, we should clear the field even
    // in the case that no default value was provided.
    schema = new Schema("schema", Fields.parse("a:string, c:double"));
    tuple = new Tuple(schema);
    tuple.set("a", "foo");
    tuple.set("c", 5d);
   
    bos = new ByteArrayOutputStream();
    ser = new SimpleTupleSerializer(schema, hadoopSerDe, conf);
    ser.open(bos);
   
    for(int i = 0; i < 10; i++) {
      ser.serialize(tuple);
    }
   
    ser.close();
    bos.close();   
    bis = new ByteArrayInputStream(bos.toByteArray());   
    des = new SimpleTupleDeserializer(schema, targetSchema, hadoopSerDe, conf);
    des.open(bis);
   
    for(int i = 0; i < 10; i++) {
      des.deserialize(targetTuple);
    }
   
    assertEquals("foo", targetTuple.getString("a"));
    assertNull(targetTuple.get("b"));
    assertEquals(5d, targetTuple.get("c"));
    assertNull(targetTuple.get("d"));
    assertNull(targetTuple.get("e"));
   
    bis.close();
  }
View Full Code Here

    Configuration conf = getConf();
    String input = TestTupleMRJob.class + "-input";
    String output = TestTupleMRJob.class + "-output";

    ITuple tuple = new Tuple(SCHEMA);
    for(int i = 0; i < NUM_ROWS_TO_GENERATE; i++) {
      withTupleInput(input, fillTuple(true, tuple));
    }

    TupleMRBuilder builder = new TupleMRBuilder(getConf(), "test");
View Full Code Here

    String input2 = TestTupleMRJob.class.getCanonicalName() + "-input2";
    String output = TestTupleMRJob.class.getCanonicalName() + "-output";

    final Schema schemaNoNulls = new Schema("NoNulls", Fields.parse("f1:int,f2:string"));
    final Schema schemaNulls = new Schema("Nulls", Fields.parse("f1:int?,f2:string?"));
    Tuple t1 = new Tuple(schemaNoNulls);
    Tuple t2 = new Tuple(schemaNulls);

    t1.set(0, 0);
    t1.set(1, "nn");
    withTupleInput(input1, t1);

    Object tuples[][] = new Object[][] { new Object[] { 0, null }, new Object[] { 0, "n1" },
        new Object[] { null, "n2" } };
    for(Object[] tuple : tuples) {
      t2.set(0, tuple[0]);
      t2.set(1, tuple[1]);
      withTupleInput(input2, t2);
    }

    TupleMRBuilder builder = new TupleMRBuilder(getConf(), "test");
    builder.addTupleInput(new Path(input1), new IdentityTupleMapper());
    builder.addTupleInput(new Path(input2), new IdentityTupleMapper());

    builder.setTupleReducer(new TupleReducer<ITuple, NullWritable>() {
      @Override
      public void reduce(ITuple group, Iterable<ITuple> tuples, TupleMRContext context,
          Collector collector) throws IOException, InterruptedException, TupleMRException {
        int count = 0;
        for(ITuple tuple : tuples) {
          Tuple t = new Tuple(schemaNulls);
          t.set(0, tuple.get(0));
          t.set(1, tuple.get(1));
          collector.write(t, NullWritable.get());
          count++;
        }
        if(group.get(0) == null) {
          assertEquals(1, count);
        } else if(((Integer) group.get(0)) == 0) {
          assertEquals(3, count);
        }
      }
    });
    builder.addIntermediateSchema(schemaNoNulls);
    builder.addIntermediateSchema(schemaNulls);
    builder.setGroupByFields("f1");
    builder.setOrderBy(OrderBy.parse("f1:desc|null_smallest").addSchemaOrder(Criteria.Order.ASC));
    builder.setSpecificOrderBy("NoNulls", OrderBy.parse("f2:asc|null_biggest"));
    builder.setSpecificOrderBy("Nulls", OrderBy.parse("f2:asc|null_biggest"));
    builder.setTupleOutput(new Path(output), schemaNulls);

    Job job = builder.createJob();
    job.setNumReduceTasks(1);
    try {
      assertRun(job);
    } finally {
      builder.cleanUpInstanceFiles();
    }

    final Object expectedOutput[][] = new Object[][] { new Object[] { 0, "nn" },
        new Object[] { 0, "n1" }, new Object[] { 0, null }, new Object[] { null, "n2" } };

    boolean debug = false;
    if(debug) {
      readTuples(new Path(output + "/part-r-00000"), getConf(), new TupleVisitor() {
        @Override
        public void onTuple(ITuple t) {
          System.out.println(t);
        }
      });
    }

    readTuples(new Path(output + "/part-r-00000"), getConf(), new TupleVisitor() {
      int i = 0;

      @Override
      public void onTuple(ITuple t) {
        assertEqualsNull(expectedOutput[i][0], t.get(0));
        Object f2 = t.get(1);
        f2 = (f2 != null) ? f2.toString() : f2;
        assertEqualsNull(expectedOutput[i][1], f2);
        i++;
      }
    });
View Full Code Here

    public void setup(TupleMRContext context, Collector collector) throws IOException,
        InterruptedException {
      Schema peopleSchema = context.getTupleMRConfig().getIntermediateSchema("user");
      Schema countrySchema = context.getTupleMRConfig().getIntermediateSchema("country");
      user = new Tuple(peopleSchema);
      country = new Tuple(countrySchema);
    }
View Full Code Here

    // Define the input and its associated mapper.
    // We'll just have a Mapper, reducer will be Identity
    job.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class),
        new TupleMapper<LongWritable, Text>() {

          Tuple tuple = new Tuple(schema);

          @Override
          public void map(LongWritable key, Text value, TupleMRContext context, Collector collector)
              throws IOException, InterruptedException {
            String[] fields = value.toString().split("\t");
            String language = fields[1];
            tuple.set("user_id", fields[0]);
            tuple.set("message", fields[2]);
            if(language.equals("en")) {
              // English -> write to main output
              collector.write(tuple);
            } else if(language.equals("fr")) {
              // French -> write to french index
View Full Code Here

    Schema targetSchema = Mutator.superSetOf(schema, cField, dField);
   
    Configuration conf = new Configuration();
    HadoopSerialization hadoopSerDe = new HadoopSerialization(conf);

    ITuple tuple = new Tuple(schema);
    tuple.set("a", "foo");
    tuple.set("b", 10);
   
    SimpleTupleSerializer ser = new SimpleTupleSerializer(schema, hadoopSerDe, conf);
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    ser.open(bos);
   
    for(int i = 0; i < 100000; i++) {
      ser.serialize(tuple);
    }
   
    ser.close();
   
    bos.close();
    ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
   
    SimpleTupleDeserializer des = new SimpleTupleDeserializer(schema, targetSchema, hadoopSerDe, conf);
    des.open(bis);
   
    ITuple targetTuple = new Tuple(targetSchema);
    long start = System.currentTimeMillis();
    for(int i = 0; i < 100000; i++) {
      des.deserialize(targetTuple);
    }
    long end = System.currentTimeMillis();
    System.out.println(end - start);
   
    assertEquals("foo", targetTuple.getString("a"));
    assertEquals(10, targetTuple.get("b"));
    assertEquals(100d, targetTuple.get("c"));
    assertEquals(1000l, targetTuple.get("d"));
  }
View Full Code Here

  public static int assertOutput(String output, Configuration conf) throws NumberFormatException, IOException, InterruptedException {
    int validatedOutputLines = 0;

    Path outPath = new Path(output);
    TupleFile.Reader reader = new TupleFile.Reader(FileSystem.get(outPath.toUri(), conf), conf, outPath);
    Tuple tuple = new Tuple(reader.getSchema());

    while(reader.next(tuple)) {
      int topicId = (Integer) tuple.get("topic");
      String word = ((Utf8) tuple.get("word")).toString();
      int count   = (Integer) tuple.get("count");
      if(topicId == 1) {
        if(word.equals("bar") || word.equals("foo")) {
          assertEquals(2, count);
          validatedOutputLines++;
        } else if(word.equals("blah") || word.equals("bloh")) {
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.io.Tuple$IDontKnowHowToCopyThisStuff

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.