Package parquet.schema

Examples of parquet.schema.MessageType


                        mapping.source.getType(),
                        mapping.source.getPath()[0]));
                properties.add(mapping.target);
            }
        }
        this.materializeSchema = new MessageType(schema.getName(), fields);
        this.root = new DataModelConverter(properties);
    }
View Full Code Here


        List<Type> fields = new ArrayList<Type>();
        for (PropertyDescriptor property : descriptor.getPropertyDescriptors()) {
            Type field = computeParquetType(property);
            fields.add(field);
        }
        return new MessageType(
                descriptor.getDataModelClass().getName(),
                fields);
    }
View Full Code Here

  }

  private void testConversion(final String columnNamesStr, final String columnsTypeStr, final String expectedSchema) throws Exception {
    final List<String> columnNames = createHiveColumnsFrom(columnNamesStr);
    final List<TypeInfo> columnTypes = createHiveTypeInfoFrom(columnsTypeStr);
    final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes);
    final MessageType expectedMT = MessageTypeParser.parseMessageType(expectedSchema);
    assertEquals("converting " + columnNamesStr + ": " + columnsTypeStr + " to " + expectedSchema, expectedMT, messageTypeFound);
  }
View Full Code Here

import parquet.schema.Type.Repetition;

public class HiveSchemaConverter {

  public static MessageType convert(final List<String> columnNames, final List<TypeInfo> columnTypes) {
    final MessageType schema = new MessageType("hive_schema", convertTypes(columnNames, columnTypes));
    return schema;
  }
View Full Code Here

  }

  private void testTajoToParquetConversion(
      Schema tajoSchema, String schemaString) throws Exception {
    TajoSchemaConverter converter = new TajoSchemaConverter();
    MessageType schema = converter.convert(tajoSchema);
    MessageType expected = MessageTypeParser.parseMessageType(schemaString);
    assertEquals("converting " + schema + " to " + schemaString,
                 expected.toString(), schema.toString());
  }
View Full Code Here

    // remove the last semicolon, java really needs a join method for strings...
    // TODO - nvm apparently it requires a semicolon after every field decl, might want to file a bug
    // messageSchema = messageSchema.substring(schemaType, messageSchema.length() - 1);
    messageSchema += "}";

    MessageType schema = MessageTypeParser.parseMessageType(messageSchema);

    CompressionCodecName codec = CompressionCodecName.UNCOMPRESSED;
    ParquetFileWriter w = new ParquetFileWriter(configuration, schema, path);
    w.start();
    HashMap<String, Integer> columnValuesWritten = new HashMap();
    int valsWritten;
    for (int k = 0; k < numberRowGroups; k++) {
      w.startBlock(1);
      currentBooleanByte = 0;
      booleanBitCounter.reset();

      for (FieldInfo fieldInfo : fields.values()) {

        if (!columnValuesWritten.containsKey(fieldInfo.name)) {
          columnValuesWritten.put((String) fieldInfo.name, 0);
          valsWritten = 0;
        } else {
          valsWritten = columnValuesWritten.get(fieldInfo.name);
        }

        String[] path1 = { (String) fieldInfo.name };
        ColumnDescriptor c1 = schema.getColumnDescription(path1);

        w.startColumn(c1, recordsPerRowGroup, codec);
        int valsPerPage = (int) Math.ceil(recordsPerRowGroup / (float) fieldInfo.numberOfPages);
        byte[] bytes;
        // for variable length binary fields
View Full Code Here

    // remove the last semicolon, java really needs a join method for strings...
    // TODO - nvm apparently it requires a semicolon after every field decl, might want to file a bug
    //messageSchema = messageSchema.substring(schemaType, messageSchema.length() - 1);
    messageSchema += "}";

    MessageType schema = MessageTypeParser.parseMessageType(messageSchema);

    CompressionCodecName codec = CompressionCodecName.UNCOMPRESSED;
    ParquetFileWriter w = new ParquetFileWriter(configuration, schema, path);
    w.start();
    HashMap<String, Integer> columnValuesWritten = new HashMap();
    int valsWritten;
    for (int k = 0; k < props.numberRowGroups; k++){
      w.startBlock(1);
      currentBooleanByte = 0;
      booleanBitCounter.reset();

      for (FieldInfo fieldInfo : props.fields.values()) {

        if ( ! columnValuesWritten.containsKey(fieldInfo.name)){
          columnValuesWritten.put((String) fieldInfo.name, 0);
          valsWritten = 0;
        } else {
          valsWritten = columnValuesWritten.get(fieldInfo.name);
        }

        String[] path1 = {(String) fieldInfo.name};
        ColumnDescriptor c1 = schema.getColumnDescription(path1);

        w.startColumn(c1, props.recordsPerRowGroup, codec);
        int valsPerPage = (int) Math.ceil(props.recordsPerRowGroup / (float) fieldInfo.numberOfPages);
        byte[] bytes;
        // for variable length binary fields
View Full Code Here

  @Override
  public ReadContext init(InitContext context) {
    if (requestedSchema == null) {
      throw new RuntimeException("requestedSchema is null.");
    }
    MessageType requestedParquetSchema =
      new TajoSchemaConverter().convert(requestedSchema);
    LOG.debug("Reading data with projection:\n" + requestedParquetSchema);
    return new ReadContext(requestedParquetSchema);
  }
View Full Code Here

      tajoReadSchema = CatalogGsonHelper.fromJson(
          metadataReadSchema, Schema.class);
    } else {
      tajoReadSchema = readSchema;
    }
    MessageType parquetRequestedSchema = readContext.getRequestedSchema();
    return new TajoRecordMaterializer(parquetRequestedSchema, requestedSchema,
                                      tajoReadSchema);
  }
View Full Code Here

      if (column.getDataType().getType() == TajoDataTypes.Type.NULL_TYPE) {
        continue;
      }
      types.add(convertColumn(column));
    }
    return new MessageType(TABLE_SCHEMA, types);
  }
View Full Code Here

TOP

Related Classes of parquet.schema.MessageType

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.