for (VarLenBinaryReader.NullableVarLengthColumn r : varLengthReader.nullableColumns) {
output.addField(r.valueVecHolder.getValueVector());
}
output.setNewSchema();
}catch(SchemaChangeException e) {
throw new ExecutionSetupException("Error setting up output mutator.", e);
}
// the method for reading into a ByteBuf from a stream copies all of the data into a giant buffer
// here we do the same thing in a loop to not initialize so much on heap
// TODO - this should be replaced by an enhancement in Hadoop 2.0 that will allow reading
// directly into a ByteBuf passed into the reading method
int totalByteLength = 0;
long start = 0;
if (rowGroupIndex == 0){
totalByteLength = 4;
}
else{
start = rowGroupOffset;
}
// TODO - the methods for get total size and get total uncompressed size seem to have the opposite results of
// what they should
// I found the bug in the mainline and made a issue for it, hopefully it will be fixed soon
for (ColumnReader crs : columnStatuses){
totalByteLength += crs.columnChunkMetaData.getTotalSize();
}
for (VarLenBinaryReader.VarLengthColumn r : varLengthReader.columns){
totalByteLength += r.columnChunkMetaData.getTotalSize();
}
for (VarLenBinaryReader.NullableVarLengthColumn r : varLengthReader.nullableColumns){
totalByteLength += r.columnChunkMetaData.getTotalSize();
}
int bufferSize = 64*1024;
long totalBytesWritten = 0;
int validBytesInCurrentBuffer;
byte[] buffer = new byte[bufferSize];
try (FSDataInputStream inputStream = fileSystem.open(hadoopPath)) {
bufferWithAllData = allocator.buffer(totalByteLength);
inputStream.seek(start);
while (totalBytesWritten < totalByteLength){
validBytesInCurrentBuffer = (int) Math.min(bufferSize, totalByteLength - totalBytesWritten);
inputStream.read(buffer, 0 , validBytesInCurrentBuffer);
bufferWithAllData.writeBytes(buffer, 0 , (int) validBytesInCurrentBuffer);
totalBytesWritten += validBytesInCurrentBuffer;
}
} catch (IOException e) {
throw new ExecutionSetupException("Error opening or reading metatdata for parquet file at location: " + hadoopPath.getName());
}
}