writer.close();
OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_READ_COMPRESSION_STRIDES, 2);
OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_EAGER_HDFS_READ, false);
Reader reader = OrcFile.createReader(fs, testFilePath, conf);
StructObjectInspector readerInspector = (StructObjectInspector) reader.getObjectInspector();
List<? extends StructField> fields = readerInspector.getAllStructFieldRefs();
DoubleObjectInspector columnInspector =
(DoubleObjectInspector) fields.get(0).getFieldObjectInspector();
RecordReader rows = reader.rows(null);
Object row = null;
// Skip enough values to get to the 2nd index stride in the first chunk
for (int i = 0; i < 40001; i++) {
row = rows.next(row);
}
// This will set previousOffset to be the size of the first compression block and the
// compressionOffset to some other value (doesn't matter what point is it's different from the
// start of the compression block)
assertEquals(values[40000], columnInspector.get(readerInspector.getStructFieldData(row,
fields.get(0))));
// Skip enough values to get to the 2nd index stride of the second chunk
for (int i = 0; i < 80000; i++) {
rows.next(row);
}
// When seek is called, previousOffset will equal newCompressedOffset since the former is the
// the length of the first compression block and the latter is the length of the third
// compression block (remember the chunks contain 2 index strides), so if we only check this
// (or for some other reason) we will not adjust compressedIndex, we will read the wrong data
assertEquals(values[120000], columnInspector.get(readerInspector.getStructFieldData(row, fields.get(0))));
rows.close();
}