long recordsReadInCurrentPass = 0;
int lengthVarFieldsInCurrentRecord;
boolean rowGroupFinished = false;
byte[] bytes;
VarBinaryVector currVec;
NullableVarBinaryVector currNullVec;
// write the first 0 offset
for (ColumnReader columnReader : columns) {
currVec = (VarBinaryVector) columnReader.valueVecHolder.getValueVector();
currVec.getAccessor().getOffsetVector().getData().writeInt(0);
columnReader.bytesReadInCurrentPass = 0;
columnReader.valuesReadInCurrentPass = 0;
}
// same for the nullable columns
for (NullableVarLengthColumn columnReader : nullableColumns) {
currNullVec = (NullableVarBinaryVector) columnReader.valueVecHolder.getValueVector();
currNullVec.getMutator().getVectorWithValues().getAccessor().getOffsetVector().getData().writeInt(0);
columnReader.bytesReadInCurrentPass = 0;
columnReader.valuesReadInCurrentPass = 0;
columnReader.nullsRead = 0;
}
do {
lengthVarFieldsInCurrentRecord = 0;
for (ColumnReader columnReader : columns) {
if (columnReader.pageReadStatus.currentPage == null
|| columnReader.pageReadStatus.valuesRead == columnReader.pageReadStatus.currentPage.getValueCount()) {
columnReader.totalValuesRead += columnReader.pageReadStatus.valuesRead;
if (!columnReader.pageReadStatus.next()) {
rowGroupFinished = true;
break;
}
}
bytes = columnReader.pageReadStatus.pageDataByteArray;
// re-purposing this field here for length in BYTES to prevent repetitive multiplication/division
columnReader.dataTypeLengthInBits = BytesUtils.readIntLittleEndian(bytes,
(int) columnReader.pageReadStatus.readPosInBytes);
lengthVarFieldsInCurrentRecord += columnReader.dataTypeLengthInBits;
}
for (NullableVarLengthColumn columnReader : nullableColumns) {
if (columnReader.pageReadStatus.currentPage == null
|| columnReader.pageReadStatus.valuesRead == columnReader.pageReadStatus.currentPage.getValueCount()) {
columnReader.totalValuesRead += columnReader.pageReadStatus.valuesRead;
if (!columnReader.pageReadStatus.next()) {
rowGroupFinished = true;
break;
}
}
bytes = columnReader.pageReadStatus.pageDataByteArray;
if ( columnReader.columnDescriptor.getMaxDefinitionLevel() > columnReader.pageReadStatus.definitionLevels.readInteger()){
columnReader.currentValNull = true;
columnReader.dataTypeLengthInBits = 0;
columnReader.nullsRead++;
continue;// field is null, no length to add to data vector
}
// re-purposing this field here for length in BYTES to prevent repetitive multiplication/division
columnReader.dataTypeLengthInBits = BytesUtils.readIntLittleEndian(bytes,
(int) columnReader.pageReadStatus.readPosInBytes);
lengthVarFieldsInCurrentRecord += columnReader.dataTypeLengthInBits;
}
// check that the next record will fit in the batch
if (rowGroupFinished || (recordsReadInCurrentPass + 1) * parentReader.getBitWidthAllFixedFields() + lengthVarFieldsInCurrentRecord
> parentReader.getBatchSize()){
break;
}
else{
recordsReadInCurrentPass++;
}
for (ColumnReader columnReader : columns) {
bytes = columnReader.pageReadStatus.pageDataByteArray;
currVec = (VarBinaryVector) columnReader.valueVecHolder.getValueVector();
// again, I am re-purposing the unused field here, it is a length n BYTES, not bits
currVec.getAccessor().getOffsetVector().getData().writeInt((int) columnReader.bytesReadInCurrentPass +
columnReader.dataTypeLengthInBits - 4 * (int) columnReader.valuesReadInCurrentPass);
currVec.getData().writeBytes(bytes, (int) columnReader.pageReadStatus.readPosInBytes + 4,
columnReader.dataTypeLengthInBits);
columnReader.pageReadStatus.readPosInBytes += columnReader.dataTypeLengthInBits + 4;
columnReader.bytesReadInCurrentPass += columnReader.dataTypeLengthInBits + 4;
columnReader.pageReadStatus.valuesRead++;
columnReader.valuesReadInCurrentPass++;
currVec.getMutator().setValueCount((int)recordsReadInCurrentPass);
}
for (NullableVarLengthColumn columnReader : nullableColumns) {
bytes = columnReader.pageReadStatus.pageDataByteArray;
currNullVec = (NullableVarBinaryVector) columnReader.valueVecHolder.getValueVector();
// again, I am re-purposing the unused field here, it is a length n BYTES, not bits
currNullVec.getMutator().getVectorWithValues().getAccessor().getOffsetVector().getData()
.writeInt(
(int) columnReader.bytesReadInCurrentPass +
columnReader.dataTypeLengthInBits - 4 * (columnReader.valuesReadInCurrentPass -
(columnReader.currentValNull ? Math.max (0, columnReader.nullsRead - 1) : columnReader.nullsRead)));
columnReader.currentValNull = false;
if (columnReader.dataTypeLengthInBits > 0){
currNullVec.getData().writeBytes(bytes, (int) columnReader.pageReadStatus.readPosInBytes + 4,
columnReader.dataTypeLengthInBits);
((NullableVarBinaryVector)columnReader.valueVecHolder.getValueVector()).getMutator().setIndexDefined(columnReader.valuesReadInCurrentPass);
}
if (columnReader.dataTypeLengthInBits > 0){
columnReader.pageReadStatus.readPosInBytes += columnReader.dataTypeLengthInBits + 4;
columnReader.bytesReadInCurrentPass += columnReader.dataTypeLengthInBits + 4;
}
columnReader.pageReadStatus.valuesRead++;
columnReader.valuesReadInCurrentPass++;
currNullVec.getMutator().setValueCount((int)recordsReadInCurrentPass);
// reached the end of a page
if ( columnReader.pageReadStatus.valuesRead == columnReader.pageReadStatus.currentPage.getValueCount()) {
columnReader.pageReadStatus.next();
}
}