OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_SORT_KEYS, true);
WriterImplWithForceFlush writer = new WriterImplWithForceFlush(fs, testFilePath, conf,
inspector, 1000000, CompressionKind.NONE, 100, 1000, new MemoryManager(conf));
// Write a stripe which is not dictionary encoded
for (int i = 0; i < 2000; i++) {
writer.addRow(new StringStruct(Integer.toString(i)));
}
writer.forceFlushStripe();
// Write another stripe (doesn't matter what)
for (int i = 0; i < 2000; i++) {
writer.addRow(new StringStruct(Integer.toString(i)));
}
writer.forceFlushStripe();
// Write a stripe which will be dictionary encoded
// Note: it is important that this string is lexicographically after the string in the next
// index stride. This way, if sorting by index strides is not working, this value will appear
// after the next one, though it should appear before, yielding incorrect results.
writer.addRow(new StringStruct("b"));
for (int i = 0; i < 999; i++) {
writer.addRow(new StringStruct("123"));
}
writer.addRow(new StringStruct("a"));
for (int i = 0; i < 999; i++) {
writer.addRow(new StringStruct("123"));
}
writer.forceFlushStripe();
writer.close();
Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);