Package parquet.schema

Examples of parquet.schema.MessageType


      boolean validating,
      WriterVersion writerVersion) throws IOException {
    Configuration conf = new Configuration();

    WriteSupport.WriteContext writeContext = writeSupport.init(conf);
    MessageType schema = writeContext.getSchema();

    ParquetFileWriter fileWriter = new ParquetFileWriter(conf, schema, file);
    fileWriter.start();

    CodecFactory codecFactory = new CodecFactory(conf);
View Full Code Here


   * @return the typed schema that should be used to read
   */
  public static MessageType getSchemaForRead(MessageType fileMessageType, String partialReadSchemaString) {
    if (partialReadSchemaString == null)
      return fileMessageType;
    MessageType requestedMessageType = MessageTypeParser.parseMessageType(partialReadSchemaString);
    return getSchemaForRead(fileMessageType, requestedMessageType);
  }
View Full Code Here

    List<BlockMetaData> blocks = new ArrayList<BlockMetaData>();
    for (Footer footer : footers) {
      blocks.addAll(footer.getParquetMetadata().getBlocks());
    }

    MessageType schema = globalMetaData.getSchema();
    Map<String, Set<String>> extraMetadata = globalMetaData.getKeyValueMetaData();
    readContext = readSupport.init(new InitContext(conf, extraMetadata, schema));
  }
View Full Code Here

    this.entry = entry;
    setColumns(columns);
  }

  public static MessageType getProjection(MessageType schema, Collection<SchemaPath> columns) {
    MessageType projection = null;
    for (SchemaPath path : columns) {
      List<String> segments = Lists.newArrayList();
      PathSegment rootSegment = path.getRootSegment();
      PathSegment seg = rootSegment;
      String messageName = schema.getName();
      while(seg != null){
        if(seg.isNamed()) {
          segments.add(seg.getNameSegment().getPath());
        }
        seg = seg.getChild();
      }
      String[] pathSegments = new String[segments.size()];
      segments.toArray(pathSegments);
      Type type = null;
      try {
        type = schema.getType(pathSegments);
      } catch (InvalidRecordException e) {
        logger.warn("Invalid record" , e);
      }
      if (type != null) {
        Type t = getType(pathSegments, 0, schema);
        if (projection == null) {
          projection = new MessageType(messageName, t);
        } else {
          projection = projection.union(new MessageType(messageName, t));
        }
      }
    }
    return projection;
  }
View Full Code Here

  @Override
  public void setup(OutputMutator output) throws ExecutionSetupException {

    try {
      schema = footer.getFileMetaData().getSchema();
      MessageType projection = null;

      if (isStarQuery()) {
        projection = schema;
      } else {
        projection = getProjection(schema, getColumns());
View Full Code Here

    return s;
  }

  private static boolean isComplex(ParquetMetadata footer) {
    MessageType schema = footer.getFileMetaData().getSchema();

    for (Type type : schema.getFields()) {
      if (!type.isPrimitive()) {
        return true;
      }
    }
    for (ColumnDescriptor col : schema.getColumns()) {
      if (col.getMaxRepetitionLevel() > 0) {
        return true;
      }
    }
    return false;
View Full Code Here

  private void newSchema() throws IOException {
    List<Type> types = Lists.newArrayList();
    for (MaterializedField field : batchSchema) {
      types.add(getType(field));
    }
    schema = new MessageType("root", types);

    Path fileName = new Path(location, prefix + "_" + index + ".parquet");
    parquetFileWriter = new ParquetFileWriter(conf, schema, fileName);
    parquetFileWriter.start();
View Full Code Here

import parquet.schema.PrimitiveType.PrimitiveTypeName;
import parquet.schema.Type.Repetition;

public class ParquetSchemaMerge {
  public static void main(String[] args) {
    MessageType message1;
    MessageType message2;

    PrimitiveType c = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.INT32, "c");
    GroupType b = new GroupType(Repetition.REQUIRED, "b");
    GroupType a = new GroupType(Repetition.OPTIONAL, "a", b);
    message1 = new MessageType("root", a);

    PrimitiveType c2 = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.INT32, "d");
    GroupType b2 = new GroupType(Repetition.OPTIONAL, "b", c2);
    GroupType a2 = new GroupType(Repetition.OPTIONAL, "a", b2);
    message2 = new MessageType("root", a2);

    MessageType message3 = message1.union(message2);

    StringBuilder builder = new StringBuilder();
    message3.writeToStringBuilder(builder, "");
    System.out.println(builder);
  }
View Full Code Here

    // remove the last semicolon, java really needs a join method for strings...
    // TODO - nvm apparently it requires a semicolon after every field decl, might want to file a bug
    //messageSchema = messageSchema.substring(schemaType, messageSchema.length() - 1);
    messageSchema += "}";

    MessageType schema = MessageTypeParser.parseMessageType(messageSchema);

    CompressionCodecName codec = CompressionCodecName.UNCOMPRESSED;
    ParquetFileWriter w = new ParquetFileWriter(configuration, schema, path);
    w.start();
    HashMap<String, Integer> columnValuesWritten = new HashMap();
    int valsWritten;
    for (int k = 0; k < props.numberRowGroups; k++) {
      w.startBlock(props.recordsPerRowGroup);
      currentBooleanByte = 0;
      booleanBitCounter.reset();

      for (FieldInfo fieldInfo : props.fields.values()) {

        if ( ! columnValuesWritten.containsKey(fieldInfo.name)) {
          columnValuesWritten.put((String) fieldInfo.name, 0);
          valsWritten = 0;
        } else {
          valsWritten = columnValuesWritten.get(fieldInfo.name);
        }

        String[] path1 = {(String) fieldInfo.name};
        ColumnDescriptor c1 = schema.getColumnDescription(path1);

        w.startColumn(c1, props.recordsPerRowGroup, codec);
        int valsPerPage = (int) Math.ceil(props.recordsPerRowGroup / (float) fieldInfo.numberOfPages);
        byte[] bytes;
        RunLengthBitPackingHybridValuesWriter defLevels = new RunLengthBitPackingHybridValuesWriter(
View Full Code Here

    if (Log.DEBUG) LOG.debug(ParquetMetadata.toPrettyJSON(parquetMetadata));
    return parquetMetadata;
  }

  public ParquetMetadata fromParquetMetadata(FileMetaData parquetMetadata) throws IOException {
    MessageType messageType = fromParquetSchema(parquetMetadata.getSchema());
    List<BlockMetaData> blocks = new ArrayList<BlockMetaData>();
    List<RowGroup> row_groups = parquetMetadata.getRow_groups();
    for (RowGroup rowGroup : row_groups) {
      BlockMetaData blockMetaData = new BlockMetaData();
      blockMetaData.setRowCount(rowGroup.getNum_rows());
      blockMetaData.setTotalByteSize(rowGroup.getTotal_byte_size());
      List<ColumnChunk> columns = rowGroup.getColumns();
      String filePath = columns.get(0).getFile_path();
      for (ColumnChunk columnChunk : columns) {
        if ((filePath == null && columnChunk.getFile_path() != null)
            || (filePath != null && !filePath.equals(columnChunk.getFile_path()))) {
          throw new ParquetDecodingException("all column chunks of the same row group must be in the same file for now");
        }
        parquet.format.ColumnMetaData metaData = columnChunk.meta_data;
        ColumnPath path = getPath(metaData);
        ColumnChunkMetaData column = ColumnChunkMetaData.get(
            path,
            messageType.getType(path.toArray()).asPrimitiveType().getPrimitiveTypeName(),
            CompressionCodecName.fromParquet(metaData.codec),
            fromFormatEncodings(metaData.encodings),
            metaData.data_page_offset,
            metaData.dictionary_page_offset,
            metaData.num_values,
View Full Code Here

TOP

Related Classes of parquet.schema.MessageType

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.