Package org.apache.drill.exec.store.parquet

Source Code of org.apache.drill.exec.store.parquet.ColumnReader

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.store.parquet;

import io.netty.buffer.ByteBuf;
import org.apache.drill.exec.store.VectorHolder;
import org.apache.drill.exec.vector.BaseValueVector;
import org.apache.drill.exec.vector.ValueVector;
import parquet.column.ColumnDescriptor;
import parquet.hadoop.metadata.ColumnChunkMetaData;
import parquet.schema.PrimitiveType;

import java.io.IOException;

public abstract class ColumnReader {
  // Value Vector for this column
  VectorHolder valueVecHolder;
  // column description from the parquet library
  ColumnDescriptor columnDescriptor;
  // metadata of the column, from the parquet library
  ColumnChunkMetaData columnChunkMetaData;
  // status information on the current page
  PageReadStatus pageReadStatus;

  long readPositionInBuffer;

  // quick reference to see if the field is fixed length (as this requires an instanceof)
  boolean isFixedLength;
  // counter for the total number of values read from one or more pages
  // when a batch is filled all of these values should be the same for each column
  int totalValuesRead;
  // counter for the values that have been read in this pass (a single call to the next() method)
  int valuesReadInCurrentPass;
  // length of single data value in bits, if the length is fixed
  int dataTypeLengthInBits;
  int bytesReadInCurrentPass;
  ParquetRecordReader parentReader;

  ByteBuf vectorData;

  // variables for a single read pass
  long readStartInBytes = 0, readLength = 0, readLengthInBits = 0, recordsReadInThisIteration = 0;
  byte[] bytes;

  ColumnReader(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor, ColumnChunkMetaData columnChunkMetaData,
               boolean fixedLength, ValueVector v){
    this.parentReader = parentReader;
    if (allocateSize > 1) valueVecHolder = new VectorHolder(allocateSize, v);
    else valueVecHolder = new VectorHolder(5000, v);

    columnDescriptor = descriptor;
    this.columnChunkMetaData = columnChunkMetaData;
    isFixedLength = fixedLength;

    pageReadStatus = new PageReadStatus(this, parentReader.getRowGroupIndex(), parentReader.getBufferWithAllData());

    if (parentReader.getRowGroupIndex() != 0) readPositionInBuffer = columnChunkMetaData.getFirstDataPageOffset() - 4;
    else readPositionInBuffer = columnChunkMetaData.getFirstDataPageOffset();

    if (columnDescriptor.getType() != PrimitiveType.PrimitiveTypeName.BINARY) {
      dataTypeLengthInBits = ParquetRecordReader.getTypeLengthInBits(columnDescriptor.getType());
    }
  }

  public void readAllFixedFields(long recordsToReadInThisPass, ColumnReader firstColumnStatus) throws IOException {
    readStartInBytes = 0;
    readLength = 0;
    readLengthInBits = 0;
    recordsReadInThisIteration = 0;
    vectorData = ((BaseValueVector)valueVecHolder.getValueVector()).getData();
    do {
      // if no page has been read, or all of the records have been read out of a page, read the next one
      if (pageReadStatus.currentPage == null
          || pageReadStatus.valuesRead == pageReadStatus.currentPage.getValueCount()) {
        if (!pageReadStatus.next()) {
          break;
        }
      }

      readField( recordsToReadInThisPass, firstColumnStatus);

      valuesReadInCurrentPass += recordsReadInThisIteration;
      totalValuesRead += recordsReadInThisIteration;
      pageReadStatus.valuesRead += recordsReadInThisIteration;
      if (readStartInBytes + readLength >= pageReadStatus.byteLength) {
        pageReadStatus.next();
      } else {
        pageReadStatus.readPosInBytes = readStartInBytes + readLength;
      }
    }
    while (valuesReadInCurrentPass < recordsToReadInThisPass && pageReadStatus.currentPage != null);
    valueVecHolder.getValueVector().getMutator().setValueCount(
        valuesReadInCurrentPass);
  }

  protected abstract void readField(long recordsToRead, ColumnReader firstColumnStatus);
}
TOP

Related Classes of org.apache.drill.exec.store.parquet.ColumnReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.