Package parquet.hadoop

Examples of parquet.hadoop.ParquetFileWriter


    messageSchema += "}";

    MessageType schema = MessageTypeParser.parseMessageType(messageSchema);

    CompressionCodecName codec = CompressionCodecName.UNCOMPRESSED;
    ParquetFileWriter w = new ParquetFileWriter(configuration, schema, path);
    w.start();
    HashMap<String, Integer> columnValuesWritten = new HashMap();
    int valsWritten;
    for (int k = 0; k < numberRowGroups; k++) {
      w.startBlock(1);
      currentBooleanByte = 0;
      booleanBitCounter.reset();

      for (FieldInfo fieldInfo : fields.values()) {

        if (!columnValuesWritten.containsKey(fieldInfo.name)) {
          columnValuesWritten.put((String) fieldInfo.name, 0);
          valsWritten = 0;
        } else {
          valsWritten = columnValuesWritten.get(fieldInfo.name);
        }

        String[] path1 = { (String) fieldInfo.name };
        ColumnDescriptor c1 = schema.getColumnDescription(path1);

        w.startColumn(c1, recordsPerRowGroup, codec);
        int valsPerPage = (int) Math.ceil(recordsPerRowGroup / (float) fieldInfo.numberOfPages);
        byte[] bytes;
        // for variable length binary fields
        int bytesNeededToEncodeLength = 4;
        if ((int) fieldInfo.bitLength > 0) {
          bytes = new byte[(int) Math.ceil(valsPerPage * (int) fieldInfo.bitLength / 8.0)];
        } else {
          // the twelve at the end is to account for storing a 4 byte length with each value
          int totalValLength = ((byte[]) fieldInfo.values[0]).length + ((byte[]) fieldInfo.values[1]).length
              + ((byte[]) fieldInfo.values[2]).length + 3 * bytesNeededToEncodeLength;
          // used for the case where there is a number of values in this row group that is not divisible by 3
          int leftOverBytes = 0;
          if (valsPerPage % 3 > 0)
            leftOverBytes += ((byte[]) fieldInfo.values[1]).length + 4;
          if (valsPerPage % 3 > 1)
            leftOverBytes += ((byte[]) fieldInfo.values[2]).length + 4;
          bytes = new byte[valsPerPage / 3 * totalValLength + leftOverBytes];
        }
        int bytesPerPage = (int) (valsPerPage * ((int) fieldInfo.bitLength / 8.0));
        int bytesWritten = 0;
        for (int z = 0; z < (int) fieldInfo.numberOfPages; z++, bytesWritten = 0) {
          for (int i = 0; i < valsPerPage; i++) {
            // System.out.print(i + ", " + (i % 25 == 0 ? "\n gen " + fieldInfo.name + ": " : ""));
            if (fieldInfo.values[0] instanceof Boolean) {

              bytes[currentBooleanByte] |= bitFields[booleanBitCounter.val]
                  & ((boolean) fieldInfo.values[valsWritten % 3] ? allBitsTrue : allBitsFalse);
              booleanBitCounter.increment();
              if (booleanBitCounter.val == 0) {
                currentBooleanByte++;
              }
              valsWritten++;
              if (currentBooleanByte > bytesPerPage)
                break;
            } else {
              if (fieldInfo.values[valsWritten % 3] instanceof byte[]) {
                System.arraycopy(ByteArrayUtil.toByta(((byte[]) fieldInfo.values[valsWritten % 3]).length), 0, bytes,
                    bytesWritten, bytesNeededToEncodeLength);
                System.arraycopy(fieldInfo.values[valsWritten % 3], 0, bytes, bytesWritten + bytesNeededToEncodeLength,
                    ((byte[]) fieldInfo.values[valsWritten % 3]).length);
                bytesWritten += ((byte[]) fieldInfo.values[valsWritten % 3]).length + bytesNeededToEncodeLength;
              } else {
                System.arraycopy(ByteArrayUtil.toByta(fieldInfo.values[valsWritten % 3]), 0, bytes, i
                    * ((int) fieldInfo.bitLength / 8), (int) fieldInfo.bitLength / 8);
              }
              valsWritten++;
            }

          }
          w.writeDataPage((int) (recordsPerRowGroup / (int) fieldInfo.numberOfPages), bytes.length,
              BytesInput.from(bytes), PLAIN, PLAIN, PLAIN);
          currentBooleanByte = 0;
        }
        w.endColumn();
        columnValuesWritten.remove((String) fieldInfo.name);
        columnValuesWritten.put((String) fieldInfo.name, valsWritten);
      }

      w.endBlock();
    }
    w.end(new HashMap<String, String>());
    logger.debug("Finished generating parquet file.");
  }
View Full Code Here


    messageSchema += "}";

    MessageType schema = MessageTypeParser.parseMessageType(messageSchema);

    CompressionCodecName codec = CompressionCodecName.UNCOMPRESSED;
    ParquetFileWriter w = new ParquetFileWriter(configuration, schema, path);
    w.start();
    HashMap<String, Integer> columnValuesWritten = new HashMap();
    int valsWritten;
    for (int k = 0; k < props.numberRowGroups; k++){
      w.startBlock(1);
      currentBooleanByte = 0;
      booleanBitCounter.reset();

      for (FieldInfo fieldInfo : props.fields.values()) {

        if ( ! columnValuesWritten.containsKey(fieldInfo.name)){
          columnValuesWritten.put((String) fieldInfo.name, 0);
          valsWritten = 0;
        } else {
          valsWritten = columnValuesWritten.get(fieldInfo.name);
        }

        String[] path1 = {(String) fieldInfo.name};
        ColumnDescriptor c1 = schema.getColumnDescription(path1);

        w.startColumn(c1, props.recordsPerRowGroup, codec);
        int valsPerPage = (int) Math.ceil(props.recordsPerRowGroup / (float) fieldInfo.numberOfPages);
        byte[] bytes;
        // for variable length binary fields
        int bytesNeededToEncodeLength = 4;
        if ((int) fieldInfo.bitLength > 0) {
          bytes = new byte[(int) Math.ceil(valsPerPage * (int) fieldInfo.bitLength / 8.0)];
        } else {
          // the twelve at the end is to account for storing a 4 byte length with each value
          int totalValLength = ((byte[]) fieldInfo.values[0]).length + ((byte[]) fieldInfo.values[1]).length + ((byte[]) fieldInfo.values[2]).length + 3 * bytesNeededToEncodeLength;
          // used for the case where there is a number of values in this row group that is not divisible by 3
          int leftOverBytes = 0;
          if ( valsPerPage % 3 > 0 ) leftOverBytes += ((byte[])fieldInfo.values[1]).length + bytesNeededToEncodeLength;
          if ( valsPerPage % 3 > 1 ) leftOverBytes += ((byte[])fieldInfo.values[2]).length + bytesNeededToEncodeLength;
          bytes = new byte[valsPerPage / 3 * totalValLength + leftOverBytes];
        }
        int bytesPerPage = (int) (valsPerPage * ((int) fieldInfo.bitLength / 8.0));
        int bytesWritten = 0;
        for (int z = 0; z < (int) fieldInfo.numberOfPages; z++, bytesWritten = 0) {
          for (int i = 0; i < valsPerPage; i++) {
            //System.out.print(i + ", " + (i % 25 == 0 ? "\n gen " + fieldInfo.name + ": " : ""));
            if (fieldInfo.values[0] instanceof Boolean) {

              bytes[currentBooleanByte] |= bitFields[booleanBitCounter.val] & ((boolean) fieldInfo.values[valsWritten % 3]
                  ? allBitsTrue : allBitsFalse);
              booleanBitCounter.increment();
              if (booleanBitCounter.val == 0) {
                currentBooleanByte++;
              }
              valsWritten++;
              if (currentBooleanByte > bytesPerPage) break;
            } else {
              if (fieldInfo.values[valsWritten % 3] instanceof byte[]){
                System.arraycopy(ByteArrayUtil.toByta(((byte[])fieldInfo.values[valsWritten % 3]).length),
                    0, bytes, bytesWritten, bytesNeededToEncodeLength);
                try{
                System.arraycopy(fieldInfo.values[valsWritten % 3],
                    0, bytes, bytesWritten + bytesNeededToEncodeLength, ((byte[])fieldInfo.values[valsWritten % 3]).length);
                }
                catch (Exception ex){
                  Math.min(4, 5);
                }
                bytesWritten += ((byte[])fieldInfo.values[valsWritten % 3]).length + bytesNeededToEncodeLength;
              }
              else{
                System.arraycopy( ByteArrayUtil.toByta(fieldInfo.values[valsWritten % 3]),
                    0, bytes, i * ((int) fieldInfo.bitLength / 8), (int) fieldInfo.bitLength / 8);
              }
              valsWritten++;
            }

          }
          w.writeDataPage((int) (props.recordsPerRowGroup / (int) fieldInfo.numberOfPages), bytes.length, BytesInput.from(bytes), PLAIN, PLAIN, PLAIN);
          currentBooleanByte = 0;
        }
        w.endColumn();
        columnValuesWritten.remove((String) fieldInfo.name);
        columnValuesWritten.put((String) fieldInfo.name, valsWritten);
      }

      w.endBlock();
    }
    w.end(new HashMap<String, String>());
    logger.debug("Finished generating parquet file.");
  }
View Full Code Here

      types.add(type);
    }
    schema = new MessageType("root", types);

    Path fileName = new Path(location, prefix + "_" + index + ".parquet");
    w = new ParquetFileWriter(conf, schema, fileName);
    w.start();

    int initialBlockBufferSize = max(MINIMUM_BUFFER_SIZE, blockSize / this.schema.getColumns().size() / 5);
    pageStore = ColumnChunkPageWriteStoreExposer.newColumnChunkPageWriteStore(codec, pageSize, this.schema, initialBlockBufferSize);
    int initialPageBufferSize = max(MINIMUM_BUFFER_SIZE, min(pageSize + pageSize / 10, initialBlockBufferSize));
View Full Code Here

    messageSchema += "}";

    MessageType schema = MessageTypeParser.parseMessageType(messageSchema);

    CompressionCodecName codec = CompressionCodecName.UNCOMPRESSED;
    ParquetFileWriter w = new ParquetFileWriter(configuration, schema, path);
    w.start();
    HashMap<String, Integer> columnValuesWritten = new HashMap();
    int valsWritten;
    for (int k = 0; k < props.numberRowGroups; k++){
      w.startBlock(props.recordsPerRowGroup);
      currentBooleanByte = 0;
      booleanBitCounter.reset();

      for (FieldInfo fieldInfo : props.fields.values()) {

        if ( ! columnValuesWritten.containsKey(fieldInfo.name)){
          columnValuesWritten.put((String) fieldInfo.name, 0);
          valsWritten = 0;
        } else {
          valsWritten = columnValuesWritten.get(fieldInfo.name);
        }

        String[] path1 = {(String) fieldInfo.name};
        ColumnDescriptor c1 = schema.getColumnDescription(path1);

        w.startColumn(c1, props.recordsPerRowGroup, codec);
        int valsPerPage = (int) Math.ceil(props.recordsPerRowGroup / (float) fieldInfo.numberOfPages);
        byte[] bytes;
        RunLengthBitPackingHybridValuesWriter defLevels = new RunLengthBitPackingHybridValuesWriter(MAX_EXPECTED_BIT_WIDTH_FOR_DEFINITION_LEVELS, valsPerPage);
        RunLengthBitPackingHybridValuesWriter repLevels = new RunLengthBitPackingHybridValuesWriter(MAX_EXPECTED_BIT_WIDTH_FOR_DEFINITION_LEVELS, valsPerPage);
        // for variable length binary fields
        int bytesNeededToEncodeLength = 4;
        if ((int) fieldInfo.bitLength > 0) {
          bytes = new byte[(int) Math.ceil(valsPerPage * (int) fieldInfo.bitLength / 8.0)];
        } else {
          // the twelve at the end is to account for storing a 4 byte length with each value
          int totalValLength = ((byte[]) fieldInfo.values[0]).length + ((byte[]) fieldInfo.values[1]).length + ((byte[]) fieldInfo.values[2]).length + 3 * bytesNeededToEncodeLength;
          // used for the case where there is a number of values in this row group that is not divisible by 3
          int leftOverBytes = 0;
          if ( valsPerPage % 3 > 0 ) leftOverBytes += ((byte[])fieldInfo.values[1]).length + bytesNeededToEncodeLength;
          if ( valsPerPage % 3 > 1 ) leftOverBytes += ((byte[])fieldInfo.values[2]).length + bytesNeededToEncodeLength;
          bytes = new byte[valsPerPage / 3 * totalValLength + leftOverBytes];
        }
        int bytesPerPage = (int) (valsPerPage * ((int) fieldInfo.bitLength / 8.0));
        int bytesWritten = 0;
        for (int z = 0; z < (int) fieldInfo.numberOfPages; z++, bytesWritten = 0) {
          for (int i = 0; i < valsPerPage; i++) {
            repLevels.writeInteger(0);
            defLevels.writeInteger(1);
            //System.out.print(i + ", " + (i % 25 == 0 ? "\n gen " + fieldInfo.name + ": " : ""));
            if (fieldInfo.values[0] instanceof Boolean) {

              bytes[currentBooleanByte] |= bitFields[booleanBitCounter.val]
                  & ((boolean) fieldInfo.values[valsWritten % 3] ? allBitsTrue : allBitsFalse);
              booleanBitCounter.increment();
              if (booleanBitCounter.val == 0) {
                currentBooleanByte++;
              }
              valsWritten++;
              if (currentBooleanByte > bytesPerPage) break;
            } else {
              if (fieldInfo.values[valsWritten % 3] instanceof byte[]){
                System.arraycopy(ByteArrayUtil.toByta(((byte[])fieldInfo.values[valsWritten % 3]).length),
                    0, bytes, bytesWritten, bytesNeededToEncodeLength);
                System.arraycopy(fieldInfo.values[valsWritten % 3],
                    0, bytes, bytesWritten + bytesNeededToEncodeLength, ((byte[])fieldInfo.values[valsWritten % 3]).length);
                bytesWritten += ((byte[])fieldInfo.values[valsWritten % 3]).length + bytesNeededToEncodeLength;
              } else{
                System.arraycopy( ByteArrayUtil.toByta(fieldInfo.values[valsWritten % 3]),
                    0, bytes, i * ((int) fieldInfo.bitLength / 8), (int) fieldInfo.bitLength / 8);
              }
              valsWritten++;
            }

          }
          byte[] fullPage = new byte[2 * 4 * valsPerPage + bytes.length];
          byte[] repLevelBytes = repLevels.getBytes().toByteArray();
          byte[] defLevelBytes = defLevels.getBytes().toByteArray();
          System.arraycopy(bytes, 0, fullPage, 0, bytes.length);
          System.arraycopy(repLevelBytes, 0, fullPage, bytes.length, repLevelBytes.length);
          System.arraycopy(defLevelBytes, 0, fullPage, bytes.length + repLevelBytes.length, defLevelBytes.length);
          w.writeDataPage( (props.recordsPerRowGroup / fieldInfo.numberOfPages), fullPage.length, BytesInput.from(fullPage), RLE, RLE, PLAIN);
          currentBooleanByte = 0;
        }
        w.endColumn();
        columnValuesWritten.remove((String) fieldInfo.name);
        columnValuesWritten.put((String) fieldInfo.name, valsWritten);
      }

      w.endBlock();
    }
    w.end(new HashMap<String, String>());
    logger.debug("Finished generating parquet file.");
  }
View Full Code Here

    messageSchema += "}";

    MessageType schema = MessageTypeParser.parseMessageType(messageSchema);

    CompressionCodecName codec = CompressionCodecName.UNCOMPRESSED;
    ParquetFileWriter w = new ParquetFileWriter(configuration, schema, path);
    w.start();
    HashMap<String, Integer> columnValuesWritten = new HashMap();
    int valsWritten;
    for (int k = 0; k < props.numberRowGroups; k++){
      w.startBlock(props.recordsPerRowGroup);
      currentBooleanByte = 0;
      booleanBitCounter.reset();

      for (FieldInfo fieldInfo : props.fields.values()) {

        if ( ! columnValuesWritten.containsKey(fieldInfo.name)){
          columnValuesWritten.put((String) fieldInfo.name, 0);
          valsWritten = 0;
        } else {
          valsWritten = columnValuesWritten.get(fieldInfo.name);
        }

        String[] path1 = {(String) fieldInfo.name};
        ColumnDescriptor c1 = schema.getColumnDescription(path1);

        w.startColumn(c1, props.recordsPerRowGroup, codec);
        int valsPerPage = (int) Math.ceil(props.recordsPerRowGroup / (float) fieldInfo.numberOfPages);
        byte[] bytes;
        RunLengthBitPackingHybridValuesWriter defLevels = new RunLengthBitPackingHybridValuesWriter(
          MAX_EXPECTED_BIT_WIDTH_FOR_DEFINITION_LEVELS,
          valsPerPage,
          new DirectByteBufferAllocator());
        RunLengthBitPackingHybridValuesWriter repLevels = new RunLengthBitPackingHybridValuesWriter(
          MAX_EXPECTED_BIT_WIDTH_FOR_DEFINITION_LEVELS,
          valsPerPage,
          new DirectByteBufferAllocator());
        // for variable length binary fields
        int bytesNeededToEncodeLength = 4;
        if ((int) fieldInfo.bitLength > 0) {
          bytes = new byte[(int) Math.ceil(valsPerPage * (int) fieldInfo.bitLength / 8.0)];
        } else {
          // the twelve at the end is to account for storing a 4 byte length with each value
          int totalValLength = ((byte[]) fieldInfo.values[0]).length + ((byte[]) fieldInfo.values[1]).length + ((byte[]) fieldInfo.values[2]).length + 3 * bytesNeededToEncodeLength;
          // used for the case where there is a number of values in this row group that is not divisible by 3
          int leftOverBytes = 0;
          if ( valsPerPage % 3 > 0 ) leftOverBytes += ((byte[])fieldInfo.values[1]).length + bytesNeededToEncodeLength;
          if ( valsPerPage % 3 > 1 ) leftOverBytes += ((byte[])fieldInfo.values[2]).length + bytesNeededToEncodeLength;
          bytes = new byte[valsPerPage / 3 * totalValLength + leftOverBytes];
        }
        int bytesPerPage = (int) (valsPerPage * ((int) fieldInfo.bitLength / 8.0));
        int bytesWritten = 0;
        for (int z = 0; z < (int) fieldInfo.numberOfPages; z++, bytesWritten = 0) {
          for (int i = 0; i < valsPerPage; i++) {
            repLevels.writeInteger(0);
            defLevels.writeInteger(1);
            //System.out.print(i + ", " + (i % 25 == 0 ? "\n gen " + fieldInfo.name + ": " : ""));
            if (fieldInfo.values[0] instanceof Boolean) {

              bytes[currentBooleanByte] |= bitFields[booleanBitCounter.val]
                  & ((boolean) fieldInfo.values[valsWritten % 3] ? allBitsTrue : allBitsFalse);
              booleanBitCounter.increment();
              if (booleanBitCounter.val == 0) {
                currentBooleanByte++;
              }
              valsWritten++;
              if (currentBooleanByte > bytesPerPage) break;
            } else {
              if (fieldInfo.values[valsWritten % 3] instanceof byte[]){
                System.arraycopy(ByteArrayUtil.toByta(((byte[])fieldInfo.values[valsWritten % 3]).length),
                    0, bytes, bytesWritten, bytesNeededToEncodeLength);
                System.arraycopy(fieldInfo.values[valsWritten % 3],
                    0, bytes, bytesWritten + bytesNeededToEncodeLength, ((byte[])fieldInfo.values[valsWritten % 3]).length);
                bytesWritten += ((byte[])fieldInfo.values[valsWritten % 3]).length + bytesNeededToEncodeLength;
              } else{
                System.arraycopy( ByteArrayUtil.toByta(fieldInfo.values[valsWritten % 3]),
                    0, bytes, i * ((int) fieldInfo.bitLength / 8), (int) fieldInfo.bitLength / 8);
              }
              valsWritten++;
            }

          }
          byte[] fullPage = new byte[2 * 4 * valsPerPage + bytes.length];
          byte[] repLevelBytes = repLevels.getBytes().toByteArray();
          byte[] defLevelBytes = defLevels.getBytes().toByteArray();
          System.arraycopy(bytes, 0, fullPage, 0, bytes.length);
          System.arraycopy(repLevelBytes, 0, fullPage, bytes.length, repLevelBytes.length);
          System.arraycopy(defLevelBytes, 0, fullPage, bytes.length + repLevelBytes.length, defLevelBytes.length);
          w.writeDataPage( (props.recordsPerRowGroup / fieldInfo.numberOfPages), fullPage.length, BytesInput.from(fullPage), RLE, RLE, PLAIN);
          currentBooleanByte = 0;
        }
        w.endColumn();
        columnValuesWritten.remove((String) fieldInfo.name);
        columnValuesWritten.put((String) fieldInfo.name, valsWritten);
      }

      w.endBlock();
    }
    w.end(new HashMap<String, String>());
    logger.debug("Finished generating parquet file.");
  }
View Full Code Here

      types.add(getType(field));
    }
    schema = new MessageType("root", types);

    Path fileName = new Path(location, prefix + "_" + index + ".parquet");
    parquetFileWriter = new ParquetFileWriter(conf, schema, fileName);
    parquetFileWriter.start();

    int initialBlockBufferSize = max(MINIMUM_BUFFER_SIZE, blockSize / this.schema.getColumns().size() / 5);
    pageStore = ColumnChunkPageWriteStoreExposer.newColumnChunkPageWriteStore(this.oContext,
      codec,
View Full Code Here

    messageSchema += "}";

    MessageType schema = MessageTypeParser.parseMessageType(messageSchema);

    CompressionCodecName codec = CompressionCodecName.UNCOMPRESSED;
    ParquetFileWriter w = new ParquetFileWriter(configuration, schema, path);
    w.start();
    HashMap<String, Integer> columnValuesWritten = new HashMap();
    int valsWritten;
    for (int k = 0; k < props.numberRowGroups; k++) {
      w.startBlock(props.recordsPerRowGroup);
      currentBooleanByte = 0;
      booleanBitCounter.reset();

      for (FieldInfo fieldInfo : props.fields.values()) {

        if ( ! columnValuesWritten.containsKey(fieldInfo.name)) {
          columnValuesWritten.put((String) fieldInfo.name, 0);
          valsWritten = 0;
        } else {
          valsWritten = columnValuesWritten.get(fieldInfo.name);
        }

        String[] path1 = {(String) fieldInfo.name};
        ColumnDescriptor c1 = schema.getColumnDescription(path1);

        w.startColumn(c1, props.recordsPerRowGroup, codec);
        int valsPerPage = (int) Math.ceil(props.recordsPerRowGroup / (float) fieldInfo.numberOfPages);
        byte[] bytes;
        RunLengthBitPackingHybridValuesWriter defLevels = new RunLengthBitPackingHybridValuesWriter(
          MAX_EXPECTED_BIT_WIDTH_FOR_DEFINITION_LEVELS,
          valsPerPage,
          new DirectByteBufferAllocator());
        RunLengthBitPackingHybridValuesWriter repLevels = new RunLengthBitPackingHybridValuesWriter(
          MAX_EXPECTED_BIT_WIDTH_FOR_DEFINITION_LEVELS,
          valsPerPage,
          new DirectByteBufferAllocator());
        // for variable length binary fields
        int bytesNeededToEncodeLength = 4;
        if ((int) fieldInfo.bitLength > 0) {
          bytes = new byte[(int) Math.ceil(valsPerPage * (int) fieldInfo.bitLength / 8.0)];
        } else {
          // the twelve at the end is to account for storing a 4 byte length with each value
          int totalValLength = ((byte[]) fieldInfo.values[0]).length + ((byte[]) fieldInfo.values[1]).length + ((byte[]) fieldInfo.values[2]).length + 3 * bytesNeededToEncodeLength;
          // used for the case where there is a number of values in this row group that is not divisible by 3
          int leftOverBytes = 0;
          if ( valsPerPage % 3 > 0 ) {
            leftOverBytes += ((byte[])fieldInfo.values[1]).length + bytesNeededToEncodeLength;
          }
          if ( valsPerPage % 3 > 1 ) {
            leftOverBytes += ((byte[])fieldInfo.values[2]).length + bytesNeededToEncodeLength;
          }
          bytes = new byte[valsPerPage / 3 * totalValLength + leftOverBytes];
        }
        int bytesPerPage = (int) (valsPerPage * ((int) fieldInfo.bitLength / 8.0));
        int bytesWritten = 0;
        for (int z = 0; z < (int) fieldInfo.numberOfPages; z++, bytesWritten = 0) {
          for (int i = 0; i < valsPerPage; i++) {
            repLevels.writeInteger(0);
            defLevels.writeInteger(1);
            //System.out.print(i + ", " + (i % 25 == 0 ? "\n gen " + fieldInfo.name + ": " : ""));
            if (fieldInfo.values[0] instanceof Boolean) {

              bytes[currentBooleanByte] |= bitFields[booleanBitCounter.val]
                  & ((boolean) fieldInfo.values[valsWritten % 3] ? allBitsTrue : allBitsFalse);
              booleanBitCounter.increment();
              if (booleanBitCounter.val == 0) {
                currentBooleanByte++;
              }
              valsWritten++;
              if (currentBooleanByte > bytesPerPage) {
                break;
              }
            } else {
              if (fieldInfo.values[valsWritten % 3] instanceof byte[]) {
                System.arraycopy(ByteArrayUtil.toByta(((byte[])fieldInfo.values[valsWritten % 3]).length),
                    0, bytes, bytesWritten, bytesNeededToEncodeLength);
                System.arraycopy(fieldInfo.values[valsWritten % 3],
                    0, bytes, bytesWritten + bytesNeededToEncodeLength, ((byte[])fieldInfo.values[valsWritten % 3]).length);
                bytesWritten += ((byte[])fieldInfo.values[valsWritten % 3]).length + bytesNeededToEncodeLength;
              } else{
                System.arraycopy( ByteArrayUtil.toByta(fieldInfo.values[valsWritten % 3]),
                    0, bytes, i * ((int) fieldInfo.bitLength / 8), (int) fieldInfo.bitLength / 8);
              }
              valsWritten++;
            }

          }
          byte[] fullPage = new byte[2 * 4 * valsPerPage + bytes.length];
          byte[] repLevelBytes = repLevels.getBytes().toByteArray();
          byte[] defLevelBytes = defLevels.getBytes().toByteArray();
          System.arraycopy(bytes, 0, fullPage, 0, bytes.length);
          System.arraycopy(repLevelBytes, 0, fullPage, bytes.length, repLevelBytes.length);
          System.arraycopy(defLevelBytes, 0, fullPage, bytes.length + repLevelBytes.length, defLevelBytes.length);
          w.writeDataPage( (props.recordsPerRowGroup / fieldInfo.numberOfPages), fullPage.length, BytesInput.from(fullPage), RLE, RLE, PLAIN);
          currentBooleanByte = 0;
        }
        w.endColumn();
        columnValuesWritten.remove((String) fieldInfo.name);
        columnValuesWritten.put((String) fieldInfo.name, valsWritten);
      }

      w.endBlock();
    }
    w.end(new HashMap<String, String>());
    logger.debug("Finished generating parquet file.");
  }
View Full Code Here

      types.add(getType(field));
    }
    schema = new MessageType("root", types);

    Path fileName = new Path(location, prefix + "_" + index + ".parquet");
    parquetFileWriter = new ParquetFileWriter(conf, schema, fileName);
    parquetFileWriter.start();

    int initialBlockBufferSize = max(MINIMUM_BUFFER_SIZE, blockSize / this.schema.getColumns().size() / 5);
    pageStore = ColumnChunkPageWriteStoreExposer.newColumnChunkPageWriteStore(this.oContext,
      codec,
View Full Code Here

TOP

Related Classes of parquet.hadoop.ParquetFileWriter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.