Package com.datasalt.pangool.io

Examples of com.datasalt.pangool.io.ITuple


    try {
      Iterator<NullWritable> iterator = values.iterator();
      tupleIterator.setIterator(iterator);

      // We get the firts tuple, to create the groupTuple view
      ITuple firstTupleGroup = key.datum();

      // A view is created over the first tuple to give the user the group
      // fields
      if(isMultipleSources) {
        int schemaId = tupleMRConfig.getSchemaIdByName(firstTupleGroup.getSchema()
            .getName());
        int[] indexTranslation = serInfo.getGroupSchemaIndexTranslation(schemaId);
        groupTuple.setContained(firstTupleGroup, indexTranslation);
      } else {
        groupTuple.setContained(firstTupleGroup);
View Full Code Here


    if(isRollup) {
      t.swapInstances();
      this.cachedTuples.swapInstances();
    }

    ITuple tuple = (multipleSources) ? deserializeMultipleSources() : deserializeOneSource(t.datum());
    t.datum(tuple);

    return t;
  }
View Full Code Here

    return t;
  }

  private ITuple deserializeMultipleSources() throws IOException {
    CachedTuples tuples = cachedTuples.datum();
    ITuple commonTuple = tuples.commonTuple;

    simpleTupleDeSer.readFields(commonTuple, commonTuple.getSchema(), serInfo.getCommonSchemaDeserializers());
    int schemaId = WritableUtils.readVInt(simpleTupleDeSer.getInput());
    ITuple specificTuple = tuples.specificTuples.get(schemaId);
    simpleTupleDeSer.readFields(specificTuple, specificTuple.getSchema(), serInfo.getSpecificSchemaDeserializers().get(schemaId));
    ITuple result = tuples.resultTuples.get(schemaId);
    mixIntermediateIntoResult(commonTuple, specificTuple, result, schemaId);
    return result;
  }
View Full Code Here

    }
  }

  private ITuple deserializeOneSource(ITuple reuse) throws IOException {
    CachedTuples tuples = cachedTuples.datum();
    ITuple commonTuple = tuples.commonTuple;
    simpleTupleDeSer.readFields(commonTuple, commonTuple.getSchema(), serInfo.getCommonSchemaDeserializers());
    if(reuse == null) {
      reuse = tuples.resultTuples.get(0);
    }
    int[] commonTranslation = serInfo.getCommonSchemaIndexTranslation(0); // just one common schema
    for(int i = 0; i < commonTranslation.length; i++) {
      int destPos = commonTranslation[i];
      reuse.set(destPos, commonTuple.get(i));
    }
    return reuse;
  }
View Full Code Here

  public int getPartition(DatumWrapper<ITuple> key, NullWritable value, int numPartitions) {
    if(numPartitions == 1) {
      // in this case the schema is not checked if it's valid
      return 0;
    } else {
      ITuple tuple = key.datum();
      String sourceName = tuple.getSchema().getName();
      Integer schemaId = tupleMRConfig.getSchemaIdByName(sourceName);
      if(schemaId == null) {
        throw new RuntimeException("Schema name '" + sourceName
            + "' is unknown. Known schemas are : "
            + tupleMRConfig.getIntermediateSchemaNames());
View Full Code Here

      Context context) throws IOException, InterruptedException {

    try {
      Iterator<NullWritable> iterator = values.iterator();
      tupleIterator.setIterator(iterator);
      ITuple currentTuple = key.datum();
      ITuple previousKey = key.previousDatum();
      int indexMismatch;
      if(firstRun) {
        indexMismatch = minDepth;
        firstRun = false;
      } else {
View Full Code Here

    try {
      Iterator<NullWritable> iterator = values.iterator();
      tupleIterator.setIterator(iterator);

      // We get the firts tuple, to create the groupTuple view
      ITuple firstTupleGroup = key.datum();

      // A view is created over the first tuple to give the user the group
      // fields
      if(isMultipleSources) {
        int schemaId = tupleMRConfig.getSchemaIdByName(firstTupleGroup.getSchema()
            .getName());
        int[] indexTranslation = serInfo.getGroupSchemaIndexTranslation(schemaId);
        groupTuple.setContained(firstTupleGroup, indexTranslation);
      } else {
        groupTuple.setContained(firstTupleGroup);
View Full Code Here

  /**
   * Moves data between a Record and a Tuple
   */
  public ITuple toTuple(Record record, ITuple reuse) throws IOException {
    ITuple tuple = reuse;
    if(tuple == null) {
      tuple = new Tuple(pangoolSchema);
    }

    Schema pangoolSchema = tuple.getSchema();
    for(org.apache.avro.Schema.Field avroField : avroSchema.getFields()) {
      int pos = avroField.pos();
      Object objRecord = record.get(pos);
      Field pangoolField = pangoolSchema.getField(pos);
      switch(pangoolField.getType()) {
      case INT:
      case LONG:
      case BOOLEAN:
      case FLOAT:
      case DOUBLE:
        tuple.set(pos, objRecord); // very optimistic
        break;
      case STRING: {
        if(!(tuple.get(pos) instanceof Utf8)) {
          tuple.set(pos, new com.datasalt.pangool.io.Utf8());
        }
        com.datasalt.pangool.io.Utf8 utf8 = (com.datasalt.pangool.io.Utf8) tuple.get(pos);
        if(objRecord instanceof String) {
          utf8.set((String) objRecord);
        } else if(objRecord instanceof Utf8) {
          Utf8 avroUtf8 = (Utf8) objRecord;
          utf8.set(avroUtf8.getBytes(), 0, avroUtf8.getByteLength());
        } else {
          throw new IOException("Not supported avro field " + org.apache.avro.Schema.Type.STRING + " with instance "
              + objRecord.getClass().getName());
        }
        break;
      }
      case ENUM: {
        Class clazz = pangoolField.getObjectClass();
        Enum e = Enum.valueOf(clazz, objRecord.toString());
        tuple.set(pos, e);
        break;
      }
      case BYTES:
        tuple.set(pos, objRecord); // TODO FIXME this should copy bytes really, not reference!
        break;
      case OBJECT:
        Deserializer customDeser = customDeserializers[pos];
        if(objRecord instanceof byte[]) {
          inputBuffer.reset((byte[]) objRecord, ((byte[]) objRecord).length);
        } else if(objRecord instanceof ByteBuffer) {
          ByteBuffer buffer = (ByteBuffer) objRecord;
          int offset = buffer.arrayOffset() + buffer.position();
          int length = buffer.limit() - buffer.position();
          inputBuffer.reset(buffer.array(), offset, length);
        } else {
          throw new PangoolRuntimeException("Can't convert to OBJECT from instance " + objRecord.getClass());
        }
        if(customDeser != null) {
          customDeser.open(inputBuffer);
          tuple.set(pos, customDeser.deserialize(tuple.get(pos))); // TODO FIXME avro deserializer shouldn't reuse
                                                                   // objects sometimes (UNION ?)
          customDeser.close(); // TODO is this ok ?
        } else {
          // no custom deser , then use Hadoop serializers registered in "io.serializations"
          Class clazz = pangoolField.getObjectClass();
          if(tuple.get(pos) == null || tuple.get(pos).getClass() != clazz) {
            tuple.set(pos, ReflectionUtils.newInstance(clazz, conf));
          }
          hadoopSer.deser(tuple.get(pos), inputBuffer);
        }
        break;
      default:
        throw new IOException("Not supported avro type : " + avroField.schema().getType());
      }
View Full Code Here

      Context context) throws IOException, InterruptedException {

    try {
      Iterator<NullWritable> iterator = values.iterator();
      tupleIterator.setIterator(iterator);
      ITuple currentTuple = key.datum();
      ITuple previousKey = key.previousDatum();
      int indexMismatch;
      if(firstRun) {
        indexMismatch = minDepth;
        firstRun = false;
      } else {
View Full Code Here

    try {
      Iterator<NullWritable> iterator = values.iterator();
      tupleIterator.setIterator(iterator);

      // We get the firts tuple, to create the groupTuple view
      ITuple firstTupleGroup = key.datum();

      // A view is created over the first tuple to give the user the group
      // fields
      if(isMultipleSources) {
        int schemaId = tupleMRConfig.getSchemaIdByName(firstTupleGroup.getSchema()
            .getName());
        int[] indexTranslation = serInfo.getGroupSchemaIndexTranslation(schemaId);
        groupTuple.setContained(firstTupleGroup, indexTranslation);
      } else {
        groupTuple.setContained(firstTupleGroup);
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.io.ITuple

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.