Package com.datasalt.pangool.io

Examples of com.datasalt.pangool.io.Schema


      this.input = new DataInputStream(input);
    }
  }

  public void readFields(ITuple tuple, Deserializer[] customDeserializers) throws IOException {
    Schema schema = tuple.getSchema();
    for(int index = 0; index < schema.getFields().size(); index++) {
      Deserializer customDeser = customDeserializers[index];
      Field field = schema.getField(index);
      switch(field.getType()) {
      case INT:
        tuple.set(index, WritableUtils.readVInt(input));
        break;
      case LONG:
View Full Code Here


  @Override
  public RecordWriter<ITuple, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException,
      InterruptedException {

    Schema pangoolOutputSchema = Schema.parse(this.pangoolOutputSchema);
    org.apache.avro.Schema avroSchema = AvroUtils.toAvroSchema(pangoolOutputSchema);
    DataFileWriter<Record> writer = new DataFileWriter<Record>(new ReflectDatumWriter<Record>());

    // Compression etc - use Avro codecs
    Configuration conf = context.getConfiguration();
View Full Code Here

          "Need to specify source order in common OrderBy when using specific OrderBy");
    }
    if(ordering.getSchemaOrderIndex() != null) {
      throw new TupleMRException("Not allowed to set source order in specific order");
    }
    Schema schema = getSchemaByName(schemaName);
    for(SortElement e : ordering.getElements()) {
      if(!schema.containsField(e.getName())) {
        throw new TupleMRException("Source '" + schemaName + "' doesn't contain field '"
            + e.getName());
      }
    }
View Full Code Here

  @Override
  public RecordWriter<ITuple, NullWritable> getRecordWriter(TaskAttemptContext context)
      throws IOException, InterruptedException {

    Schema pangoolOutputSchema = Schema.parse(this.pangoolOutputSchema);
    org.apache.avro.Schema avroSchema = AvroUtils.toAvroSchema(pangoolOutputSchema);
    DataFileWriter<Record> writer = new DataFileWriter<Record>(
        new ReflectDatumWriter<Record>());

    // Compression etc - use Avro codecs
View Full Code Here

  static Schema getSchema() {
    org.apache.avro.Schema avroSchema = getAvroSchema();
    Field avroField = Field.createObject("my_avro",Object.class);
    avroField.setObjectSerialization(AvroFieldSerialization.class);
    avroField.addProp("avro.schema",avroSchema.toString());
    return new Schema("schema",Arrays.asList(avroField));
  }
View Full Code Here

  private static Schema getPangoolTweetSchema() {
    Field tweetIdField = Field.create("tweet_id",Schema.Field.Type.INT);
    Field tweetHashTags = Field.createObject("tweet_hashtags",Array.class);
    tweetHashTags.setObjectSerialization(AvroFieldSerialization.class);
    tweetHashTags.addProp("avro.schema",getAvroStringArraySchema().toString());
    return new Schema("tweet",Arrays.asList(tweetIdField,tweetHashTags));
  }
View Full Code Here

  }
 
  private static Schema getPangoolRetweetSchema(){
    Field userId = Field.create("username",Schema.Field.Type.STRING);
    Field tweetId = Field.create("tweet_id",Schema.Field.Type.INT);
    return new Schema("retweet",Arrays.asList(userId,tweetId));
  }
View Full Code Here

  }

  private void calculateGroupSchema() {
    List<Field> fields = commonSchema.getFields();
    List<Field> groupFields = fields.subList(0, mrConfig.getGroupByFields().size());
    this.groupSchema = new Schema("group", groupFields);
  }
View Full Code Here

      fieldsToPartition.add(posFields);
    }
  }

  private void calculateOneSourceCommonSchema() throws TupleMRException {
    Schema sourceSchema = mrConfig.getIntermediateSchemas().get(0);

    Criteria commonSortCriteria = mrConfig.getCommonCriteria();
    List<Field> commonFields = new ArrayList<Field>();
    for(SortElement sortElement : commonSortCriteria.getElements()) {
      String fieldName = sortElement.getName();
      Field field = checkFieldInAllSchemas(fieldName);
      commonFields.add(field);
    }

    // adding the rest
    for(Field field : sourceSchema.getFields()) {
      if(!containsFieldName(field.getName(), commonFields)) {
        commonFields.add(field);
      }
    }
    this.commonSchema = new Schema("common", commonFields);
  }
View Full Code Here

      String fieldName = sortElement.getName();
      Field field = checkFieldInAllSchemas(fieldName);
      commonFields.add(field);
    }

    this.commonSchema = new Schema("common", commonFields);
    this.specificSchemas = new ArrayList<Schema>();
    List<List<Field>> specificFieldsBySource = new ArrayList<List<Field>>();

    for(int schemaId = 0; schemaId < mrConfig.getNumIntermediateSchemas(); schemaId++) {
      Criteria specificCriteria = mrConfig.getSpecificOrderBys().get(schemaId);
      List<Field> specificFields = new ArrayList<Field>();
      if(specificCriteria != null) {
        for(SortElement sortElement : specificCriteria.getElements()) {
          String fieldName = sortElement.getName();
          Field field = checkFieldInSchema(fieldName, schemaId);
          specificFields.add(field);
        }
      }
      specificFieldsBySource.add(specificFields);
    }

    for(int i = 0; i < mrConfig.getNumIntermediateSchemas(); i++) {
      Schema sourceSchema = mrConfig.getIntermediateSchema(i);
      List<Field> specificFields = specificFieldsBySource.get(i);
      for(Field field : sourceSchema.getFields()) {
        if(!commonSchema.containsField(field.getName())
            && !containsFieldName(field.getName(), specificFields)) {
          specificFields.add(field);
        }
      }
      this.specificSchemas.add(new Schema("specific", specificFields));

    }
    this.specificSchemas = Collections.unmodifiableList(this.specificSchemas);
  }
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.io.Schema

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.