}
@Test
public void testInOutFormat() throws Exception {
Properties properties = new Properties();
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector)
ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
SerDe serde = new OrcSerde();
HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
FileSinkOperator.RecordWriter writer =
outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
properties, Reporter.NULL);
ReaderWriterProfiler.setProfilerOptions(conf);
writer.write(serde.serialize(new MyRow(1,2), inspector));
writer.write(serde.serialize(new MyRow(2,2), inspector));
writer.write(serde.serialize(new MyRow(3,2), inspector));
writer.close(true);
serde = new OrcSerde();
properties.setProperty("columns", "x,y");
properties.setProperty("columns.types", "int:int");
serde.initialize(conf, properties);
assertEquals(OrcSerde.OrcSerdeRow.class, serde.getSerializedClass());
inspector = (StructObjectInspector) serde.getObjectInspector();
assertEquals("struct<x:int,y:int>", inspector.getTypeName());
InputFormat<?,?> in = new OrcInputFormat();
FileInputFormat.setInputPaths(conf, testFilePath.toString());
InputSplit[] splits = in.getSplits(conf, 1);
assertEquals(1, splits.length);
// the the validate input method
ArrayList<FileStatus> fileList = new ArrayList<FileStatus>(3);
assertEquals(false,
((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
fileList.add(fs.getFileStatus(testFilePath));
assertEquals(true,
((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
fileList.add(fs.getFileStatus(workDir));
assertEquals(false,
((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
// read the whole file
org.apache.hadoop.mapred.RecordReader reader =
in.getRecordReader(splits[0], conf, Reporter.NULL);
Object key = reader.createKey();
Writable value = (Writable) reader.createValue();
int rowNum = 0;
List<? extends StructField> fields =inspector.getAllStructFieldRefs();
IntObjectInspector intInspector =
(IntObjectInspector) fields.get(0).getFieldObjectInspector();
assertEquals(0.0, reader.getProgress(), 0.00001);
assertEquals(0, reader.getPos());
while (reader.next(key, value)) {
assertEquals(++rowNum, intInspector.get(inspector.
getStructFieldData(serde.deserialize(value), fields.get(0))));
assertEquals(2, intInspector.get(inspector.
getStructFieldData(serde.deserialize(value), fields.get(1))));
}
assertEquals(3, rowNum);
assertEquals(1.0, reader.getProgress(), 0.00001);
reader.close();
// read just the first column
conf.set("hive.io.file.readcolumn.ids", "0");
reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
key = reader.createKey();
value = (Writable) reader.createValue();
rowNum = 0;
fields = inspector.getAllStructFieldRefs();
while (reader.next(key, value)) {
assertEquals(++rowNum, intInspector.get(inspector.
getStructFieldData(value, fields.get(0))));
assertEquals(null, inspector.getStructFieldData(value, fields.get(1)));
}
assertEquals(3, rowNum);
reader.close();
// test the mapping of empty string to all columns
conf.set("hive.io.file.readcolumn.ids", "");
reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
key = reader.createKey();
value = (Writable) reader.createValue();
rowNum = 0;
fields = inspector.getAllStructFieldRefs();
while (reader.next(key, value)) {
assertEquals(++rowNum, intInspector.get(inspector.
getStructFieldData(value, fields.get(0))));
assertEquals(2, intInspector.get(inspector.
getStructFieldData(serde.deserialize(value), fields.get(1))));
}
assertEquals(3, rowNum);
reader.close();
}