Examples of com.datasalt.pangool.io.ITuple

com.datasalt.pangool.io.ITuple
pedia.org/wiki/Tuple">Tuple

  public void testSplits(long maxSplitSize, int generatedRows) throws IOException, InterruptedException, IllegalArgumentException, SecurityException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {
    logger.info("Testing maxSplitSize: " + maxSplitSize + " and generatedRows:" + generatedRows);
    FileSystem fS = FileSystem.get(getConf());
    Random r = new Random(1);
    Schema schema = new Schema("schema", Fields.parse("i:int,s:string"));
    ITuple tuple = new Tuple(schema);


    Path outPath = new Path(OUT);
    TupleFile.Writer writer = new TupleFile.Writer(FileSystem.get(getConf()), getConf(), outPath, schema);
    for(int i = 0; i < generatedRows; i++) {
      tuple.set("i", r.nextInt());
      tuple.set("s", r.nextLong() + "");
      writer.append(tuple);
    }
    writer.close();


    TupleInputFormat format = ReflectionUtils.newInstance(TupleInputFormat.class, getConf());

View Full Code Here

  public static class MyHandler extends TupleMapper<LongWritable, Text> {


    @Override
    public void map(LongWritable key, Text value, TupleMRContext context, Collector collector) throws IOException,
        InterruptedException {
      ITuple tuple = new Tuple(schema1);
      tuple.set("a", (int) (Math.random() * 1000));
      tuple.set("b", value.toString());


      ITuple mTuple = new Tuple(getMetaSchema1());
      mTuple.set("partition", (int) (Math.random() * 10));
      mTuple.set("tuple", tuple);


      ITuple mTuple2 = new Tuple(getMetaSchema2());
      mTuple2.set("group", value.toString());
      mTuple2.set("metatuple", mTuple);


      collector.write(mTuple2);
    }

View Full Code Here

    Schema targetSchema = new Schema("target", Fields.parse("a:string, b:int?, c:double, d:long?, e:boolean?"));
    
    Configuration conf = new Configuration();
    HadoopSerialization hadoopSerDe = new HadoopSerialization(conf);


    ITuple tuple = new Tuple(schema);
    tuple.set("a", "foo");
    tuple.set("b", 10);
    tuple.set("c", 5d);
    
    SimpleTupleSerializer ser = new SimpleTupleSerializer(schema, hadoopSerDe, conf);
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    ser.open(bos);
    
    for(int i = 0; i < 10; i++) {
      ser.serialize(tuple);
    }
    
    ser.close();
    
    bos.close();
    ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
    
    SimpleTupleDeserializer des = new SimpleTupleDeserializer(schema, targetSchema, hadoopSerDe, conf);
    des.open(bis);
    
    ITuple targetTuple = new Tuple(targetSchema);
    for(int i = 0; i < 10; i++) {
      des.deserialize(targetTuple);
    }
    
    assertEquals("foo", targetTuple.getString("a"));
    assertEquals(10, targetTuple.get("b"));
    assertEquals(5d, targetTuple.get("c"));
    assertNull(targetTuple.get("d"));
    assertNull(targetTuple.get("e"));
    
    // Something important is that if we read a file that doesn't contains a field
    // just after a file that contains this field, we should clear the field even
    // in the case that no default value was provided.
    schema = new Schema("schema", Fields.parse("a:string, c:double"));
    tuple = new Tuple(schema);
    tuple.set("a", "foo");
    tuple.set("c", 5d);
    
    bos = new ByteArrayOutputStream();
    ser = new SimpleTupleSerializer(schema, hadoopSerDe, conf);
    ser.open(bos);
    
    for(int i = 0; i < 10; i++) {
      ser.serialize(tuple);
    }
    
    ser.close();
    bos.close();    
    bis = new ByteArrayInputStream(bos.toByteArray());    
    des = new SimpleTupleDeserializer(schema, targetSchema, hadoopSerDe, conf);
    des.open(bis);
    
    for(int i = 0; i < 10; i++) {
      des.deserialize(targetTuple);
    }
    
    assertEquals("foo", targetTuple.getString("a"));
    assertNull(targetTuple.get("b"));
    assertEquals(5d, targetTuple.get("c"));
    assertNull(targetTuple.get("d"));
    assertNull(targetTuple.get("e"));
    
    bis.close();
  }

View Full Code Here

    @Override
    public void reduce(ITuple group, Iterable<ITuple> tuples, TupleMRContext context, Collector collector)
        throws IOException, InterruptedException, TupleMRException {
      Iterator<ITuple> iterator = tuples.iterator();
      while(iterator.hasNext()) {
        ITuple tuple = iterator.next();
        Utf8 text = (Utf8) tuple.get("word");
        countToEmit.set((Integer) tuple.get("count"));
        collector.write(text, countToEmit);
        Assert.assertFalse(iterator.hasNext());
      }
    }

View Full Code Here

    Schema targetSchema = Mutator.superSetOf(schema, cField, dField);
    
    Configuration conf = new Configuration();
    HadoopSerialization hadoopSerDe = new HadoopSerialization(conf);


    ITuple tuple = new Tuple(schema);
    tuple.set("a", "foo");
    tuple.set("b", 10);
    
    SimpleTupleSerializer ser = new SimpleTupleSerializer(schema, hadoopSerDe, conf);
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    ser.open(bos);
    
    for(int i = 0; i < 100000; i++) {
      ser.serialize(tuple);
    }
    
    ser.close();
    
    bos.close();
    ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
    
    SimpleTupleDeserializer des = new SimpleTupleDeserializer(schema, targetSchema, hadoopSerDe, conf);
    des.open(bis);
    
    ITuple targetTuple = new Tuple(targetSchema);
    long start = System.currentTimeMillis();
    for(int i = 0; i < 100000; i++) {
      des.deserialize(targetTuple);
    }
    long end = System.currentTimeMillis();
    System.out.println(end - start);
    
    assertEquals("foo", targetTuple.getString("a"));
    assertEquals(10, targetTuple.get("b"));
    assertEquals(100d, targetTuple.get("c"));
    assertEquals(1000l, targetTuple.get("d"));
  }

View Full Code Here


    Configuration conf = getConf();
    String input = TestTupleMRJob.class + "-input";
    String output = TestTupleMRJob.class + "-output";


    ITuple tuple = new Tuple(SCHEMA);
    for(int i = 0; i < NUM_ROWS_TO_GENERATE; i++) {
      withTupleInput(input, fillTuple(true, tuple));
    }


    TupleMRBuilder builder = new TupleMRBuilder(getConf(), "test");

View Full Code Here

    FileSystem fs = FileSystem.get(inPath.toUri(), conf);
    TupleFile.Writer writer = new TupleFile.Writer(fs, conf, inPath, TopicalWordCount.getSchema());


    // Topic 1, words: { a, 10 } { b, 1 } , { c, 5 }
    // Top 2 words = a(10), c(5)
    ITuple tuple = new Tuple(TopicalWordCount.getSchema());
    tuple.set("word", "a");
    tuple.set("topic", 1);
    tuple.set("count", 10);
    writer.append(tuple);
    
    tuple.set("word", "b");
    tuple.set("topic", 1);
    tuple.set("count", 1);
    writer.append(tuple);


    tuple.set("word", "c");
    tuple.set("topic", 1);
    tuple.set("count", 5);
    writer.append(tuple);
    
    // Topic 2, words: { a, 10 } { b, 9 } , { c, 5 }
    // Top 2 words = a(10), b(9)
    tuple.set("word", "a");
    tuple.set("topic", 2);
    tuple.set("count", 10);
    writer.append(tuple);
    
    tuple.set("word", "b");
    tuple.set("topic", 2);
    tuple.set("count", 9);
    writer.append(tuple);


    tuple.set("word", "c");
    tuple.set("topic", 2);
    tuple.set("count", 5);
    writer.append(tuple);


    writer.close();
  }

View Full Code Here


      public void reduce(ITuple group, Iterable<ITuple> tuples, TupleMRContext context, Collector collector)
          throws IOException, InterruptedException, TupleMRException {
        
        Iterator<ITuple> iterator = tuples.iterator();
        ITuple currentTuple;
        
        assertEquals(0, group.get("partitionId"));
        
        currentTuple = iterator.next();
        assertEquals("foo1", ((ITuple)currentTuple.get("tuple1")).get("a").toString());
        assertEquals(30, ((ITuple)currentTuple.get("tuple1")).get("b"));
        
        currentTuple = iterator.next();
        assertEquals("foo2", ((ITuple)currentTuple.get("tuple1")).get("a").toString());
        assertEquals(20, ((ITuple)currentTuple.get("tuple1")).get("b"));


        currentTuple = iterator.next();
        assertEquals("foo3", ((ITuple)currentTuple.get("tuple1")).get("a").toString());
        assertEquals(140, ((ITuple)currentTuple.get("tuple1")).get("b"));


        currentTuple = iterator.next();
        assertEquals("foo4", ((ITuple)currentTuple.get("tuple1")).get("a").toString());
        assertEquals(110, ((ITuple)currentTuple.get("tuple1")).get("b"));


        currentTuple = iterator.next();
        assertEquals("foo5", ((ITuple)currentTuple.get("tuple1")).get("a").toString());
        assertEquals(220, ((ITuple)currentTuple.get("tuple1")).get("b"));


        currentTuple = iterator.next();
        assertEquals("foo6", ((ITuple)currentTuple.get("tuple1")).get("a").toString());
        assertEquals(260, ((ITuple)currentTuple.get("tuple1")).get("b"));


        // Second data source BEGINS
        currentTuple = iterator.next();
        assertEquals(4.5, ((ITuple)currentTuple.get("tuple2")).get("c"));
        assertEquals(true, ((ITuple)currentTuple.get("tuple2")).get("d"));
        
        currentTuple = iterator.next();
        assertEquals(4.6, ((ITuple)currentTuple.get("tuple2")).get("c"));
        assertEquals(false, ((ITuple)currentTuple.get("tuple2")).get("d"));
      };
    });
    builder.setGroupByFields("partitionId");
    builder.setOutput(new Path(OUTPUT), new HadoopOutputFormat(TextOutputFormat.class), Text.class, NullWritable.class);
    Job job = builder.createJob();

View Full Code Here

    public void reduce(ITuple group, Iterable<ITuple> tuples, TupleMRContext context, Collector collector)
        throws IOException, InterruptedException {
      Iterator<ITuple> iterator = tuples.iterator();
      outputKey.set("ELEMENT");
      while(iterator.hasNext()) {
        ITuple tuple = iterator.next();
        outputValue.set(tuple.toString());
        collector.write(outputKey, outputValue);
        System.out.println(outputKey + " => " + outputValue);
      }
    }

View Full Code Here

        for (ITuple tuple : tuples) {
          fillTuple(true, tuple, minIndex, maxIndex);
        }
        for (int indexTuple1 = 0; indexTuple1 < tuples.length; indexTuple1++) {
          for (int indexTuple2 = indexTuple1 + 1; indexTuple2 < tuples.length; indexTuple2++) {
            ITuple tuple1 = tuples[indexTuple1];
            ITuple tuple2 = tuples[indexTuple2];
            assertSameComparison("Sort comparator", sortComparator, tuple1, tuple2);
            assertOppositeOrEqualsComparison(sortComparator, tuple1, tuple2);
            assertSameComparison("Group comparator", groupComparator, tuple1, tuple2);
            assertOppositeOrEqualsComparison(groupComparator, tuple1, tuple2);
          }

View Full Code Here

0 1 2 3 4 5 6 7

TOP

Related Classes of com.datasalt.pangool.io.ITuple

com.datasalt.pangool.examples.avro.AvroCustomSerializationJob$CountReducer

com.datasalt.pangool.examples.avro.AvroTopicalWordCount$CountReducer

com.datasalt.pangool.examples.naivebayes.NaiveBayesClassifier

com.datasalt.pangool.examples.naivebayes.NaiveBayesGenerate

com.datasalt.pangool.examples.topicalwordcount.TestTopicFingerprint

com.datasalt.pangool.examples.topicalwordcount.TopicalWordCount$CountReducer

com.datasalt.pangool.examples.topicalwordcount.TopicFingerprint$TopNWords

com.datasalt.pangool.tuplemr.mapred.lib.output.TestTupleInputOutputFormat

com.datasalt.pangool.tuplemr.mapred.RollupReducer

com.datasalt.pangool.tuplemr.mapred.SimpleCombiner

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.