Examples of org.apache.avro.generic.GenericData.Record

Package org.apache.avro.generic.GenericData

Examples of org.apache.avro.generic.GenericData.Record

org.apache.avro.generic.GenericData.Record

  }


  @Test(expected=AvroRuntimeException.class)
  public void testRecordCreateEmptySchema() throws Exception {
    Schema s = Schema.createRecord("schemaName", "schemaDoc", "namespace", false);
    Record r = new GenericData.Record(s);
  }

View Full Code Here

  public void testRecordPutInvalidField() throws Exception {
    Schema s = Schema.createRecord("schemaName", "schemaDoc", "namespace", false);
    List<Schema.Field> fields = new ArrayList<Schema.Field>();
    fields.add(new Schema.Field("someFieldName", s, "docs", null));
    s.setFields(fields);
    Record r = new GenericData.Record(s);
    r.put("invalidFieldName", "someValue");
  }

View Full Code Here

    ByteArrayOutputStream b1 = new ByteArrayOutputStream(5);
    ByteArrayOutputStream b2 = new ByteArrayOutputStream(5);
    BinaryEncoder b1Enc = EncoderFactory.get().binaryEncoder(b1, null);
    BinaryEncoder b2Enc = EncoderFactory.get().binaryEncoder(b2, null);
    // Prepare two different datums
    Record testDatum1 = new Record(record);
    testDatum1.put(0, 1);
    Record testDatum2 = new Record(record);
    testDatum2.put(0, 2);
    GenericDatumWriter<Record> gWriter = new GenericDatumWriter<Record>(record);
    Integer start1 = 0, start2 = 0;
    try {
      // Write two datums in each stream
      // and get the offset length after the first write in each.

View Full Code Here

  /**
   * Moves data between a Tuple and an Avro Record
   */
  @SuppressWarnings({ "unchecked", "rawtypes" })
  public Record toRecord(ITuple tuple, Record reuse) throws IOException {
    Record record = reuse;
    if (record == null){
      record = new Record(avroSchema);
    }
    if (schemaValidation && !tuple.getSchema().equals(pangoolSchema)){
      throw new IOException("Tuple '"+tuple + "' " +
          "contains schema not expected." +
          "Expected schema '"+ pangoolSchema + " and actual: " + tuple.getSchema());
    }
    for(int i = 0; i < pangoolSchema.getFields().size(); i++) {
      Object obj = tuple.get(i);
      Field field = pangoolSchema.getField(i);
      if (obj == null){
        throw new IOException("Field '" 
      + field.getName() + "' can't be null in tuple:" + tuple);
      }
      
      switch(field.getType()){
      case INT:
      case LONG:
      case FLOAT:
      case BOOLEAN:
      case DOUBLE:
      case BYTES:
        record.put(i, obj); //optimistic
        break;
      case OBJECT:
        Serializer customSer = customSerializers[i];
        DataOutputBuffer buffer = buffers[i];
        buffer.reset();
        if (customSer != null){
          customSer.open(buffer);
          customSer.serialize(obj);
          customSer.close(); //TODO is this safe ?
        } else {
          hadoopSer.ser(obj, buffer);
        }
        //TODO this byteBuffer instances should be cached and reused
        ByteBuffer byteBuffer = ByteBuffer.wrap(buffer.getData(), 0,buffer.getLength());
        record.put(i, byteBuffer);
        break;
      case ENUM:
        record.put(i,obj.toString());
        break;
      case STRING:
        record.put(i,new Utf8(obj.toString())); //could be directly String ?
        break;
      default:
          throw 
          new IOException("Not correspondence to Avro type from Pangool type " + field.getType());
      }

View Full Code Here

      tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema("tweet"));
    };


    public void map(AvroWrapper<Record> key, NullWritable value, TupleMRContext context, Collector collector)
        throws IOException, InterruptedException {
      Record tweet = key.datum();
      tuple.set("tweet_id", tweet.get("id"));
      tuple.set("tweet_hashtags", tweet.get("hashtags"));
      collector.write(tuple);
    }

View Full Code Here


    private Record outputRecord;
    private AvroWrapper<Record> wrapper;


    public void setup(TupleMRContext context, Collector collector) throws IOException, InterruptedException {
      outputRecord = new Record(getAvroOutputSchema());
      wrapper = new AvroWrapper<Record>();
    };

View Full Code Here


    public void setup(TupleMRContext context, Collector collector) 
        throws IOException, InterruptedException {
      this.mapper = new ObjectMapper();
      tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema(0));
      record = new Record(getAvroSchema());
      tuple.set("my_avro",record);
    };

View Full Code Here

    public void reduce(ITuple group, Iterable<ITuple> tuples, TupleMRContext context, Collector collector)
        throws IOException, InterruptedException, TupleMRException {


      int count = 0;
      ITuple outputTuple = null;
      Record outputRecord=null;
      for(ITuple tuple : tuples) {
        Record record = (Record)tuple.get("my_avro");
        count += (Integer) record.get("count");
        outputTuple = tuple;
        outputRecord = record;
      }
      outputRecord.put("count",count);
      outputTuple.set("my_avro",outputRecord);

View Full Code Here

  }


  @Test(expected=AvroRuntimeException.class)
  public void testRecordCreateEmptySchema() throws Exception {
    Schema s = Schema.createRecord("schemaName", "schemaDoc", "namespace", false);
    Record r = new GenericData.Record(s);
  }

View Full Code Here

  public void testRecordPutInvalidField() throws Exception {
    Schema s = Schema.createRecord("schemaName", "schemaDoc", "namespace", false);
    List<Schema.Field> fields = new ArrayList<Schema.Field>();
    fields.add(new Schema.Field("someFieldName", s, "docs", null));
    s.setFields(fields);
    Record r = new GenericData.Record(s);
    r.put("invalidFieldName", "someValue");
  }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.avro.generic.GenericData.Record

com.cloudera.cdk.data.filesystem.TestPartitionedDatasetWriter

com.datasalt.pangool.BaseTest

com.datasalt.pangool.examples.avro.AvroCustomSerializationJob$CountReducer

com.datasalt.pangool.examples.avro.AvroCustomSerializationJob$TokenizeMapper

com.datasalt.pangool.examples.avro.AvroTopicalWordCount$CountReducer

com.datasalt.pangool.examples.avro.AvroTopicalWordCount$TokenizeMapper

com.datasalt.pangool.examples.avro.AvroTweetsJoin$Red

com.datasalt.pangool.examples.avro.AvroTweetsJoin$TweetsMapper

com.datasalt.pangool.examples.avro.TestAvroTopicalWordCount

com.datasalt.pangool.examples.avro.TestAvroTweetsJoin

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.