Package com.datasalt.pangool.io.Schema

Examples of com.datasalt.pangool.io.Schema.Field


   */
  public static Tuple deepCopy(ITuple tuple, Map<String, FieldClonator> customClonators) {
    Schema schema = tuple.getSchema();
    Tuple newTuple = new Tuple(schema);
    for (int i = 0; i < schema.getFields().size(); i++) {
      Field field = schema.getField(i);

      Object value = tuple.get(i);
      if (value == null) {
        continue;
      }

      if (customClonators != null && customClonators.containsKey(field.getName())) {
        // There is a custom clonator
        newTuple.set(i, customClonators.get(field.getName()).giveMeACopy(value));
        continue;
      }

      switch (field.getType()) {
        case BYTES:
          if (value instanceof ByteBuffer) {
            newTuple.set(i, cloneByteBuffer((ByteBuffer) value));
          } else if (value instanceof byte[]) {
            newTuple.set(i, Arrays.copyOf((byte[]) value, ((byte[]) value).length));
          } else {
            throw new IllegalArgumentException("Field " + field.getName() + " of type " + field.getType()
                + " cannot contains values of class " + value.getClass().getCanonicalName());
          }
          break;
        case OBJECT:
          if (value instanceof ITuple) {
            throw new IDontKnowHowToCopyThisStuff("Tuples inside tuples requires a custom FieldClonator" +
                "to perform the copy. Please, provide a custom FieldClonator for field " + field.getName()
                + ". It usually is as simple as create one that calls the deepCopy method for the " +
                "inner tuple");
          } else {
            throw new IDontKnowHowToCopyThisStuff("I don't know how to copy the field " + field.getName()
                + " with type " + value.getClass().getCanonicalName() + ". Please, provide a custom " +
                "FieldClonator for this field in order to be able to perform deep copies");
          }
        case STRING:
          if (value instanceof String) {
            newTuple.set(i, tuple.get(i));
          } else if (value instanceof Utf8 || value instanceof Text) {
            newTuple.set(i, new Utf8(value.toString()));
          } else {
            throw new IllegalArgumentException("Field " + field.getName() + " of type " + field.getType()
                + " cannot contains values of class " + value.getClass().getCanonicalName());
          }
          break;
        default:
          newTuple.set(i, tuple.get(i));
View Full Code Here


  }

  public static Serializer[] getSerializers(Schema schema, Configuration conf) {
    Serializer[] result = new Serializer[schema.getFields().size()];
    for (int i = 0; i < result.length; i++) {
      Field field = schema.getField(i);
      if (field.getObjectSerialization() != null) {
        Serialization serialization = ReflectionUtils.newInstance(field.getObjectSerialization(), conf);
        if (serialization instanceof FieldConfigurable) {
          ((FieldConfigurable) serialization).setFieldProperties(field.getProps());
        }
        result[i] = serialization.getSerializer(field.getObjectClass());
      }
    }
    return result;
  }
View Full Code Here

  }

  public static Deserializer[] getDeserializers(Schema schema, Configuration conf) {
    Deserializer[] result = new Deserializer[schema.getFields().size()];
    for (int i = 0; i < result.length; i++) {
      Field field = schema.getField(i);
      if (field.getObjectSerialization() != null) {
        Serialization serialization = ReflectionUtils.newInstance(field.getObjectSerialization(), conf);
        if (serialization instanceof FieldConfigurable) {
          ((FieldConfigurable) serialization).setFieldProperties(field.getProps());
        }
        result[i] = serialization.getDeserializer(field.getObjectClass());
      }
    }
    return result;
  }
View Full Code Here

    Schema intermediateSchema = mrConfig.getIntermediateSchemas().get(0);
    Criteria commonSortCriteria = mrConfig.getCommonCriteria();
    List<Field> commonFields = new ArrayList<Field>();
    for (SortElement sortElement : commonSortCriteria.getElements()) {
      String fieldName = sortElement.getName();
      Field field = checkFieldInAllSchemas(fieldName);
      commonFields.add(Field.cloneField(field, fieldName));
    }

    // adding the rest
    for (Field field : intermediateSchema.getFields()) {
      Map<String, String> aliases = mrConfig.getFieldAliases(intermediateSchema.getName());
      if (!containsField(field.getName(), commonFields, aliases)) {
        commonFields.add(field);
      }
    }
    this.commonSchema = new Schema("common", commonFields);
  }
View Full Code Here

  private void calculateMultipleSourcesSubSchemas() throws TupleMRException {
    Criteria commonSortCriteria = mrConfig.getCommonCriteria();
    List<Field> commonFields = new ArrayList<Field>();
    for (SortElement sortElement : commonSortCriteria.getElements()) {
      String fieldName = sortElement.getName();
      Field field = checkFieldInAllSchemas(fieldName);

      commonFields.add(Field.cloneField(field, fieldName));
    }

    this.commonSchema = new Schema("common", commonFields);
    this.specificSchemas = new ArrayList<Schema>();
    List<List<Field>> specificFieldsBySource = new ArrayList<List<Field>>();

    for (int schemaId = 0; schemaId < mrConfig.getNumIntermediateSchemas(); schemaId++) {
      Criteria specificCriteria = mrConfig.getSpecificOrderBys().get(schemaId);
      List<Field> specificFields = new ArrayList<Field>();
      if (specificCriteria != null) {
        for (SortElement sortElement : specificCriteria.getElements()) {
          String fieldName = sortElement.getName();
          Field field = checkFieldInSchema(fieldName, schemaId);
          specificFields.add(Field.cloneField(field, fieldName));
        }
      }
      specificFieldsBySource.add(specificFields);
    }

    for (int i = 0; i < mrConfig.getNumIntermediateSchemas(); i++) {
      Schema sourceSchema = mrConfig.getIntermediateSchema(i);
      List<Field> specificFields = specificFieldsBySource.get(i);
      for (Field field : sourceSchema.getFields()) {
        Map<String, String> sourceAliases = mrConfig.getFieldAliases(sourceSchema.getName());
        if (!containsField(field.getName(), commonSchema.getFields(), sourceAliases)
            && !containsField(field.getName(), specificFields, sourceAliases)) {
          specificFields.add(field);
        }
      }
      this.specificSchemas.add(new Schema("specific", specificFields));
    }
View Full Code Here

   * select a representative field that will be used for serializing. In the case of
   * having a mixture of fields, some of them nullable and some others no nullables,
   * a nullable Field will be returned.
   */
  private Field checkFieldInAllSchemas(String name) throws TupleMRException {
    Field field = null;
    for (int i = 0; i < mrConfig.getIntermediateSchemas().size(); i++) {
      Field fieldInSource = checkFieldInSchema(name, i);
      if (field == null) {
        field = fieldInSource;
      } else if (field.getType() != fieldInSource.getType() || field.getObjectClass() != fieldInSource.getObjectClass()) {
        throw new TupleMRException("The type for field '" + name
            + "' is not the same in all the sources");
      } else if (fieldInSource.isNullable()) {
        // IMPORTANT CASE. Nullable fields must be returned when present nullable and non nullable fields mixed
        field = fieldInSource;
      }
    }
    return field;
View Full Code Here

  }

  private Field checkFieldInSchema(String fieldName, int schemaId)
      throws TupleMRException {
    Schema schema = mrConfig.getIntermediateSchema(schemaId);
    Field field = getFieldUsingAliases(schema, fieldName);
    if (field == null) {
      throw new TupleMRException("Field '" + fieldName + "' not present in source '"
          + schema.getName() + "' " + schema);
    }
    return field;
View Full Code Here

   * or deserialized
   * @return
   */
  public static Field createAvroField(String name,
       org.apache.avro.Schema avroSchema,  boolean isReflect){
    Field field = Field.createObject(name,Object.class);
    field.setObjectSerialization(AvroFieldSerialization.class);
    field.addProp("avro.schema",avroSchema.toString());
    field.addProp("avro.reflection",Boolean.toString(isReflect));
    return field;
  }
View Full Code Here

      }
    }
  }

  private static Schema getPangoolTweetSchema() {
    Field tweetIdField = Field.create("tweet_id", Schema.Field.Type.INT);
    Field tweetHashTags = Fields.createAvroField("tweet_hashtags", getAvroStringArraySchema(), false);
    return new Schema("tweet", Arrays.asList(tweetIdField, tweetHashTags));
  }
View Full Code Here

    Field tweetHashTags = Fields.createAvroField("tweet_hashtags", getAvroStringArraySchema(), false);
    return new Schema("tweet", Arrays.asList(tweetIdField, tweetHashTags));
  }

  private static Schema getPangoolRetweetSchema() {
    Field userId = Field.create("username", Schema.Field.Type.STRING);
    Field tweetId = Field.create("tweet_id", Schema.Field.Type.INT);
    return new Schema("retweet", Arrays.asList(userId, tweetId));
  }
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.io.Schema.Field

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.