Package parquet.schema

Examples of parquet.schema.MessageType


import parquet.schema.Type;
import parquet.schema.Type.Repetition;

public class ParquetSchemaMerge {
  public static void main(String[] args) {
    MessageType message1;
    MessageType message2;

    PrimitiveType c = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.INT32, "c");
    GroupType b = new GroupType(Repetition.REQUIRED, "b");
    GroupType a = new GroupType(Repetition.OPTIONAL, "a", b);
    message1 = new MessageType("root", a);

    PrimitiveType c2 = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.INT32, "d");
    GroupType b2 = new GroupType(Repetition.OPTIONAL, "b", c2);
    GroupType a2 = new GroupType(Repetition.OPTIONAL, "a", b2);
    message2 = new MessageType("root", a2);

    MessageType message3 = message1.union(message2);

    StringBuilder builder = new StringBuilder();
    message3.writeToStringBuilder(builder, "");
    System.out.println(builder);
  }
View Full Code Here


    this.columns = columns;
    this.entry = entry;
  }

  public static MessageType getProjection(MessageType schema, List<SchemaPath> columns) {
    MessageType projection = null;
    for (SchemaPath path : columns) {
      List<String> segments = Lists.newArrayList();
      PathSegment rootSegment = path.getRootSegment();
      PathSegment seg = rootSegment;
      String messageName = schema.getName();
      while(seg != null){
        if(seg.isNamed()) {
          segments.add(seg.getNameSegment().getPath());
        }
        seg = seg.getChild();
      }
      String[] pathSegments = new String[segments.size()];
      segments.toArray(pathSegments);
      Type type = null;
      try {
        type = schema.getType(pathSegments);
      } catch (InvalidRecordException e) {
        logger.warn("Invalid record" , e);
      }
      if (type != null) {
        Type t = getType(pathSegments, 0, schema);
        if (projection == null) {
          projection = new MessageType(messageName, t);
        } else {
          projection = projection.union(new MessageType(messageName, t));
        }
      }
    }
    return projection;
  }
View Full Code Here

  @Override
  public void setup(OutputMutator output) throws ExecutionSetupException {

    try {
      schema = footer.getFileMetaData().getSchema();
      MessageType projection = null;

      if (columns == null || columns.size() == 0) {
        projection = schema;
      } else {
        projection = getProjection(schema, columns);
View Full Code Here

    // remove the last semicolon, java really needs a join method for strings...
    // TODO - nvm apparently it requires a semicolon after every field decl, might want to file a bug
    //messageSchema = messageSchema.substring(schemaType, messageSchema.length() - 1);
    messageSchema += "}";

    MessageType schema = MessageTypeParser.parseMessageType(messageSchema);

    CompressionCodecName codec = CompressionCodecName.UNCOMPRESSED;
    ParquetFileWriter w = new ParquetFileWriter(configuration, schema, path);
    w.start();
    HashMap<String, Integer> columnValuesWritten = new HashMap();
    int valsWritten;
    for (int k = 0; k < props.numberRowGroups; k++){
      w.startBlock(props.recordsPerRowGroup);
      currentBooleanByte = 0;
      booleanBitCounter.reset();

      for (FieldInfo fieldInfo : props.fields.values()) {

        if ( ! columnValuesWritten.containsKey(fieldInfo.name)){
          columnValuesWritten.put((String) fieldInfo.name, 0);
          valsWritten = 0;
        } else {
          valsWritten = columnValuesWritten.get(fieldInfo.name);
        }

        String[] path1 = {(String) fieldInfo.name};
        ColumnDescriptor c1 = schema.getColumnDescription(path1);

        w.startColumn(c1, props.recordsPerRowGroup, codec);
        int valsPerPage = (int) Math.ceil(props.recordsPerRowGroup / (float) fieldInfo.numberOfPages);
        byte[] bytes;
        RunLengthBitPackingHybridValuesWriter defLevels = new RunLengthBitPackingHybridValuesWriter(MAX_EXPECTED_BIT_WIDTH_FOR_DEFINITION_LEVELS, valsPerPage);
View Full Code Here

      DecimalMetadata decimalMetadata = ParquetTypeHelper.getDecimalMetadataForField(field);
      int length = ParquetTypeHelper.getLengthForMinorType(minorType);
      parquet.schema.Type type = new parquet.schema.PrimitiveType(repetition, primitiveTypeName, length, name, originalType, decimalMetadata);
      types.add(type);
    }
    schema = new MessageType("root", types);

    Path fileName = new Path(location, prefix + "_" + index + ".parquet");
    w = new ParquetFileWriter(conf, schema, fileName);
    w.start();
View Full Code Here

    return new ScanBatch(rowGroupScan, context, readers.iterator(), partitionColumns, selectedPartitionColumns);
  }

  private static boolean isComplex(ParquetMetadata footer) {
    MessageType schema = footer.getFileMetaData().getSchema();
    for (Type type : schema.getFields()) {
      if (!type.isPrimitive()) {
        return true;
      }
    }
    return false;
View Full Code Here

  }

  private void testConversion(final String columnNamesStr, final String columnsTypeStr, final String expectedSchema) throws Exception {
    final List<String> columnNames = createHiveColumnsFrom(columnNamesStr);
    final List<TypeInfo> columnTypes = createHiveTypeInfoFrom(columnsTypeStr);
    final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes);
    final MessageType expectedMT = MessageTypeParser.parseMessageType(expectedSchema);
    assertEquals("converting " + columnNamesStr + ": " + columnsTypeStr + " to " + expectedSchema, expectedMT, messageTypeFound);
  }
View Full Code Here

  public void testMapOriginalType() throws Exception {
    final String hiveColumnTypes = "map<string,string>";
    final String hiveColumnNames = "mapCol";
    final List<String> columnNames = createHiveColumnsFrom(hiveColumnNames);
    final List<TypeInfo> columnTypes = createHiveTypeInfoFrom(hiveColumnTypes);
    final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes);
    // this messageType only has one optional field, whose name is mapCol, original Type is MAP
    assertEquals(1, messageTypeFound.getFieldCount());
    parquet.schema.Type topLevel = messageTypeFound.getFields().get(0);
    assertEquals("mapCol",topLevel.getName());
    assertEquals(OriginalType.MAP, topLevel.getOriginalType());
    assertEquals(Repetition.OPTIONAL, topLevel.getRepetition());

    assertEquals(1, topLevel.asGroupType().getFieldCount());
View Full Code Here

        } else {
          // below allows schema evolution
          typeListTable.add(new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, col));
        }
      }
      MessageType tableSchema = new MessageType(TABLE_SCHEMA, typeListTable);
      contextMetadata.put(HIVE_SCHEMA_KEY, tableSchema.toString());

      MessageType requestedSchemaByUser = tableSchema;
      final List<Integer> indexColumnsWanted = ColumnProjectionUtils.getReadColumnIDs(configuration);

      final List<Type> typeListWanted = new ArrayList<Type>();
      for (final Integer idx : indexColumnsWanted) {
        typeListWanted.add(tableSchema.getType(listColumns.get(idx)));
      }
      requestedSchemaByUser = new MessageType(fileSchema.getName(), typeListWanted);

      return new ReadContext(requestedSchemaByUser, contextMetadata);
    } else {
      contextMetadata.put(HIVE_SCHEMA_KEY, fileSchema.toString());
      return new ReadContext(fileSchema, contextMetadata);
View Full Code Here

    final Map<String, String> metadata = readContext.getReadSupportMetadata();
    if (metadata == null) {
      throw new IllegalStateException("ReadContext not initialized properly. " +
        "Don't know the Hive Schema.");
    }
    final MessageType tableSchema = MessageTypeParser.
        parseMessageType(metadata.get(HIVE_SCHEMA_KEY));
    return new DataWritableRecordConverter(readContext.getRequestedSchema(), tableSchema);
  }
View Full Code Here

TOP

Related Classes of parquet.schema.MessageType

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.