// loop to add up the length of the fixed width columns and build the schema
for (int i = 0; i < columns.size(); ++i) {
column = columns.get(i);
logger.debug("name: " + fileMetaData.getSchema().get(i).name);
SchemaElement se = schemaElements.get(column.getPath()[0]);
MajorType mt = ParquetToDrillTypeConverter.toMajorType(column.getType(), se.getType_length(), getDataMode(column), se);
field = MaterializedField.create(toFieldName(column.getPath()),mt);
if ( ! fieldSelected(field)){
continue;
}
columnsToScan++;
// sum the lengths of all of the fixed length fields
if (column.getType() != PrimitiveType.PrimitiveTypeName.BINARY) {
if (column.getMaxRepetitionLevel() > 0) {
allFieldsFixedLength = false;
}
// There is not support for the fixed binary type yet in parquet, leaving a task here as a reminder
// TODO - implement this when the feature is added upstream
if (column.getType() == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY){
bitWidthAllFixedFields += se.getType_length() * 8;
} else {
bitWidthAllFixedFields += getTypeLengthInBits(column.getType());
}
} else {
allFieldsFixedLength = false;
}
}
rowGroupOffset = footer.getBlocks().get(rowGroupIndex).getColumns().get(0).getFirstDataPageOffset();
// none of the columns in the parquet file matched the request columns from the query
if (columnsToScan == 0){
throw new ExecutionSetupException("Error reading from parquet file. No columns requested were found in the file.");
}
if (allFieldsFixedLength) {
recordsPerBatch = (int) Math.min(Math.min(batchSize / bitWidthAllFixedFields,
footer.getBlocks().get(0).getColumns().get(0).getValueCount()), 65535);
}
else {
recordsPerBatch = DEFAULT_RECORDS_TO_READ_IF_NOT_FIXED_WIDTH;
}
try {
ValueVector v;
ConvertedType convertedType;
SchemaElement schemaElement;
ArrayList<VarLengthColumn> varLengthColumns = new ArrayList<>();
// initialize all of the column read status objects
boolean fieldFixedLength = false;
for (int i = 0; i < columns.size(); ++i) {
column = columns.get(i);
columnChunkMetaData = footer.getBlocks().get(rowGroupIndex).getColumns().get(i);
schemaElement = schemaElements.get(column.getPath()[0]);
convertedType = schemaElement.getConverted_type();
MajorType type = ParquetToDrillTypeConverter.toMajorType(column.getType(), schemaElement.getType_length(), getDataMode(column), schemaElement);
field = MaterializedField.create(toFieldName(column.getPath()), type);
// the field was not requested to be read
if ( ! fieldSelected(field)) continue;
fieldFixedLength = column.getType() != PrimitiveType.PrimitiveTypeName.BINARY;
v = output.addField(field, (Class<? extends ValueVector>) TypeHelper.getValueVectorClass(type.getMinorType(), type.getMode()));
if (column.getType() != PrimitiveType.PrimitiveTypeName.BINARY) {
if (column.getMaxRepetitionLevel() > 0) {
ColumnReader dataReader = ColumnReaderFactory.createFixedColumnReader(this, fieldFixedLength, column, columnChunkMetaData, recordsPerBatch,
((RepeatedFixedWidthVector) v).getMutator().getDataVector(), schemaElement);
varLengthColumns.add(new FixedWidthRepeatedReader(this, dataReader,