Package com.datasalt.pangool.io

Examples of com.datasalt.pangool.io.Schema


      this.input = new DataInputStream(input);
    }
  }

  public void readFields(ITuple tuple, Deserializer[] customDeserializers) throws IOException {
    Schema schema = tuple.getSchema();
    for(int index = 0; index < schema.getFields().size(); index++) {
      Deserializer customDeser = customDeserializers[index];
      Field field = schema.getField(index);
      switch(field.getType()) {
      case INT:
        tuple.set(index, WritableUtils.readVInt(input));
        break;
      case LONG:
View Full Code Here


  }

  @Override
  public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
    try{
    Schema groupSchema = serInfo.getGroupSchema();
    return compare(b1,s1,b2,s2,groupSchema,groupCriteria,offsets);
    } catch(IOException e){
      throw new RuntimeException(e);
    }
  }
View Full Code Here

    }
  }

  private void oneSourceSerialization(ITuple tuple) throws IOException {
    if (inputSchemaValidation){
      Schema expectedSchema = tupleMRConfig.getIntermediateSchema(0);
      if (!tuple.getSchema().equals(expectedSchema)){
        throw new IOException("Tuple '"+tuple + "' " +
            "contains schema not expected." +
            "Expected schema '"+ expectedSchema + " and actual: " + tuple.getSchema());
      }
View Full Code Here

    Integer schemaId = tupleMRConfig.getSchemaIdByName(schemaName);
    if (schemaId == null){
      throw new IOException("Schema '" + tuple.getSchema() +"' is not a valid intermediate schema");
    }
    if (inputSchemaValidation){
      Schema expectedSchema = tupleMRConfig.getIntermediateSchema(schemaId);
      if (!expectedSchema.equals(tuple.getSchema())){
        throw new IOException("Tuple '"+tuple + "' " +
          "contains not expected schema." +
          "Expected schema '"+ expectedSchema + " and actual: " + tuple.getSchema());
      }
    }
    int[] commonTranslation = serInfo.getCommonSchemaIndexTranslation(schemaId);
    // Serialize common
    tupleSerializer.write(commonSchema, tuple, commonTranslation, serInfo.getCommonSchemaSerializers());
    // Serialize schema id
    WritableUtils.writeVInt(tupleSerializer.getOut(), schemaId);
    // Serialize rest of the fields
    Schema specificSchema = serInfo.getSpecificSchema(schemaId);
    int[] specificTranslation = serInfo
        .getSpecificSchemaIndexTranslation(schemaId);
    tupleSerializer.write(specificSchema, tuple, specificTranslation, serInfo.getSpecificSchemaSerializers().get(schemaId));
  }
View Full Code Here

    List<Field> fields = new ArrayList<Field>();
    fields.add(Field.create("intField", Type.INT));
    fields.add(Field.create("strField", Type.STRING));
    fields.add(Field.create("longField", Type.LONG));
    fields.add(Field.create("doubleField", Type.DOUBLE));
    Schema schema = new Schema("schema", fields);

    TupleMRBuilder mr = new TupleMRBuilder(conf, "Pangool Secondary Sort");
    mr.addIntermediateSchema(schema);
    mr.setGroupByFields("intField", "strField");
    mr.setOrderBy(new OrderBy().add("intField", Order.ASC).add("strField", Order.ASC).add("longField", Order.ASC));
View Full Code Here

    List<Field> fields = new ArrayList<Field>();
    fields.add(Field.create("location", Type.STRING));
    fields.add(Field.create("date", Type.STRING));
    fields.add(Field.create("hashtag", Type.STRING));
    fields.add(Field.create("count", Type.INT));
    Schema schema = new Schema("my_schema", fields);

    TupleMRBuilder mr = new TupleMRBuilder(conf);
    mr.addIntermediateSchema(schema);
    mr.setGroupByFields("location", "date", "hashtag");
    mr.setOrderBy(new OrderBy().add("location", Order.ASC).add("date", Order.ASC).add("hashtag", Order.ASC));
View Full Code Here

    List<Field> fields = new ArrayList<Field>();
    fields.add(Field.create("url", Type.STRING));
    fields.add(Field.create("date", Type.STRING));
    fields.add(Field.create("visits",Type.INT));

    Schema schema = new Schema("my_schema", fields);

    TupleMRBuilder mr = new TupleMRBuilder(conf);
    mr.addIntermediateSchema(schema);
    mr.setGroupByFields("url");
    mr.setOrderBy(new OrderBy().add("url", Order.ASC).add("date", Order.ASC));
View Full Code Here

    fields.add(Field.create("user", Type.STRING));
    fields.add(Field.create("feature", Type.STRING));
    fields.add(Field.create("all",Type.BOOLEAN));
    fields.add(Field.create("clicks", Type.INT));

    Schema schema = new Schema("my_schema", fields);

    TupleMRBuilder mr = new TupleMRBuilder(conf);
    mr.addIntermediateSchema(schema);
    mr.setGroupByFields("user", "all", "feature");
    mr.setOrderBy(new OrderBy().add("user", Order.ASC).add("all", Order.DESC).add("feature", Order.ASC));
View Full Code Here

  public static Schema getOutputCountSchema() {
    List<Field> fields = new ArrayList<Field>();
    fields.add(Field.create("topic", Type.INT));
    fields.add(Field.create("totalcount", Type.INT));
    return new Schema("outputcount", fields);
  }
View Full Code Here

  }

  private static Schema getPangoolTweetSchema() {
    Field tweetIdField = Field.create("tweet_id", Schema.Field.Type.INT);
    Field tweetHashTags = Fields.createAvroField("tweet_hashtags", getAvroStringArraySchema(), false);
    return new Schema("tweet", Arrays.asList(tweetIdField, tweetHashTags));
  }
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.io.Schema

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.