LOG.info(MessageFormat.format(
"Loading Parquet file metadata ({0}): {1}",
descriptor.getDataModelClass().getSimpleName(),
path));
}
ParquetMetadata footer = ParquetFileReader.readFooter(hadoopConfiguration, path);
List<BlockMetaData> blocks = filterBlocks(footer.getBlocks());
if (blocks.isEmpty()) {
return null;
}
long totalRecords = computeTotalRecords(blocks);
this.averageBytesPerRecord = (double) fragmentSize / totalRecords;
if (LOG.isInfoEnabled()) {
LOG.info(MessageFormat.format(
"Loading Parquet file contents ({0}): path={1}, range={2}+{3}",
descriptor.getDataModelClass().getSimpleName(),
path,
offset,
fragmentSize));
}
this.fileReader = new ParquetFileReader(
hadoopConfiguration,
path,
blocks,
footer.getFileMetaData().getSchema().getColumns());
this.materializer = new DataModelMaterializer(
descriptor,
footer.getFileMetaData().getSchema(),
mappingConfiguration);
this.columnIo = new ColumnIOFactory().getColumnIO(
materializer.getMaterializeSchema(),
footer.getFileMetaData().getSchema());
}
return fileReader.readNextRowGroup();
}