OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_SORT_KEYS, true);
Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
1000000, CompressionKind.NONE, 100, 1000, memory);
// Write a stripe which is not dictionary encoded
for (int i = 0; i < 2000; i++) {
writer.addRow(new StringStruct(Integer.toString(i)));
}
memory.forceFlushStripe();
// Write another stripe (doesn't matter what)
for (int i = 0; i < 2000; i++) {
writer.addRow(new StringStruct(Integer.toString(i)));
}
memory.forceFlushStripe();
// Write a stripe which will be dictionary encoded
// Note: it is important that this string is lexicographically after the string in the next
// index stride. This way, if sorting by index strides is not working, this value will appear
// after the next one, though it should appear before, yielding incorrect results.
writer.addRow(new StringStruct("b"));
for (int i = 0; i < 999; i++) {
writer.addRow(new StringStruct("123"));
}
writer.addRow(new StringStruct("a"));
for (int i = 0; i < 999; i++) {
writer.addRow(new StringStruct("123"));
}
memory.forceFlushStripe();
writer.close();
Reader reader = OrcFile.createReader(fs, testFilePath, conf);
RecordReader rows = reader.rows(null);