Package org.apache.hadoop.hive.serde2.objectinspector

Examples of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector


    writer.close();
    OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_READ_COMPRESSION_STRIDES, 2);
    OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_EAGER_HDFS_READ, false);
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);

    StructObjectInspector readerInspector = (StructObjectInspector) reader.getObjectInspector();
    List<? extends StructField> fields = readerInspector.getAllStructFieldRefs();
    DoubleObjectInspector columnInspector =
        (DoubleObjectInspector) fields.get(0).getFieldObjectInspector();

    RecordReader rows = reader.rows(null);
    Object row = null;

    // Skip enough values to get to the 2nd index stride in the first chunk
    for (int i = 0; i < 40001; i++) {
      row = rows.next(row);
    }

    // This will set previousOffset to be the size of the first compression block and the
    // compressionOffset to some other value (doesn't matter what point is it's different from the
    // start of the compression block)
    assertEquals(values[40000], columnInspector.get(readerInspector.getStructFieldData(row,
        fields.get(0))));

    // Skip enough values to get to the 2nd index stride of the second chunk
    for (int i = 0; i < 80000; i++) {
      rows.next(row);
    }

    // When seek is called, previousOffset will equal newCompressedOffset since the former is the
    // the length of the first compression block and the latter is the length of the third
    // compression block (remember the chunks contain 2 index strides), so if we only check this
    // (or for some other reason) we will not adjust compressedIndex, we will read the wrong data
    assertEquals(values[120000], columnInspector.get(readerInspector.getStructFieldData(row, fields.get(0))));

    rows.close();
  }
View Full Code Here


        stats[7].toString());

    assertEquals("count: 2 min: bye max: hi", stats[9].toString());

    // check the inspectors
    StructObjectInspector readerInspector =
        (StructObjectInspector) reader.getObjectInspector();
    assertEquals(ObjectInspector.Category.STRUCT,
        readerInspector.getCategory());
    assertEquals("struct<boolean1:boolean,byte1:tinyint,short1:smallint,"
        + "int1:int,long1:bigint,float1:float,double1:double,bytes1:"
        + "binary,string1:string,middle:struct<list:array<struct<int1:int,"
        + "string1:string>>>,list:array<struct<int1:int,string1:string>>,"
        + "map:map<string,struct<int1:int,string1:string>>>",
        readerInspector.getTypeName());
    List<? extends StructField> fields =
        readerInspector.getAllStructFieldRefs();
    BooleanObjectInspector bo = (BooleanObjectInspector) readerInspector.
        getStructFieldRef("boolean1").getFieldObjectInspector();
    ByteObjectInspector by = (ByteObjectInspector) readerInspector.
        getStructFieldRef("byte1").getFieldObjectInspector();
    ShortObjectInspector sh = (ShortObjectInspector) readerInspector.
        getStructFieldRef("short1").getFieldObjectInspector();
    IntObjectInspector in = (IntObjectInspector) readerInspector.
        getStructFieldRef("int1").getFieldObjectInspector();
    LongObjectInspector lo = (LongObjectInspector) readerInspector.
        getStructFieldRef("long1").getFieldObjectInspector();
    FloatObjectInspector fl = (FloatObjectInspector) readerInspector.
        getStructFieldRef("float1").getFieldObjectInspector();
    DoubleObjectInspector dbl = (DoubleObjectInspector) readerInspector.
        getStructFieldRef("double1").getFieldObjectInspector();
    BinaryObjectInspector bi = (BinaryObjectInspector) readerInspector.
        getStructFieldRef("bytes1").getFieldObjectInspector();
    StringObjectInspector st = (StringObjectInspector) readerInspector.
        getStructFieldRef("string1").getFieldObjectInspector();
    StructObjectInspector mid = (StructObjectInspector) readerInspector.
        getStructFieldRef("middle").getFieldObjectInspector();
    List<? extends StructField> midFields =
        mid.getAllStructFieldRefs();
    ListObjectInspector midli =
        (ListObjectInspector) midFields.get(0).getFieldObjectInspector();
    StructObjectInspector inner = (StructObjectInspector)
        midli.getListElementObjectInspector();
    List<? extends StructField> inFields = inner.getAllStructFieldRefs();
    IntObjectInspector inner_in = (IntObjectInspector) inFields.get(0).getFieldObjectInspector();
    StringObjectInspector inner_st = (StringObjectInspector) inFields.get(1).getFieldObjectInspector();
    ListObjectInspector li = (ListObjectInspector) readerInspector.
        getStructFieldRef("list").getFieldObjectInspector();
    MapObjectInspector ma = (MapObjectInspector) readerInspector.
        getStructFieldRef("map").getFieldObjectInspector();
    StructObjectInspector lc = (StructObjectInspector)
        li.getListElementObjectInspector();
    StringObjectInspector mk = (StringObjectInspector)
        ma.getMapKeyObjectInspector();
    StructObjectInspector mv = (StructObjectInspector)
        ma.getMapValueObjectInspector();
    RecordReader rows = reader.rows(null);
    Object row = rows.next(null);
    assertNotNull(row);
    // check the contents of the first row
View Full Code Here

                     StreamFactory writer,
                     boolean nullable, Configuration conf,
                     boolean useVInts, boolean lowMemoryMode,
                     MemoryEstimate memoryEstimate) throws IOException {
      super(columnId, inspector, writer, nullable, conf, useVInts, memoryEstimate);
      StructObjectInspector structObjectInspector =
        (StructObjectInspector) inspector;
      fields = structObjectInspector.getAllStructFieldRefs();
      childrenWriters = new TreeWriter[fields.size()];
      for(int i=0; i < childrenWriters.length; ++i) {
        childrenWriters[i] = createTreeWriter(
          fields.get(i).getFieldObjectInspector(), writer, true, conf, useVInts,
          lowMemoryMode, memoryEstimate);
View Full Code Here

    @Override
    void write(Object obj) throws IOException {
      long rawDataSize = 0;
      if (obj != null) {
        StructObjectInspector insp = (StructObjectInspector) inspector;
        List<Object> fieldDataList = insp.getStructFieldsDataAsList(obj);

        for(int i = 0; i < fields.size(); ++i) {
          TreeWriter writer = childrenWriters[i];
          writer.write(fieldDataList.get(i));
          rawDataSize += writer.getRowRawDataSize();
View Full Code Here

  }

  @Test
  public void testInOutFormat() throws Exception {
    Properties properties = new Properties();
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    SerDe serde = new OrcSerde();
    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    FileSinkOperator.RecordWriter writer =
        outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
            properties, Reporter.NULL);
    ReaderWriterProfiler.setProfilerOptions(conf);
    writer.write(serde.serialize(new MyRow(1,2), inspector));
    writer.write(serde.serialize(new MyRow(2,2), inspector));
    writer.write(serde.serialize(new MyRow(3,2), inspector));
    writer.close(true);
    serde = new OrcSerde();
    properties.setProperty("columns", "x,y");
    properties.setProperty("columns.types", "int:int");
    serde.initialize(conf, properties);
    assertEquals(OrcSerde.OrcSerdeRow.class, serde.getSerializedClass());
    inspector = (StructObjectInspector) serde.getObjectInspector();
    assertEquals("struct<x:int,y:int>", inspector.getTypeName());
    InputFormat<?,?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);

    // the the validate input method
    ArrayList<FileStatus> fileList = new ArrayList<FileStatus>(3);
    assertEquals(false,
        ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
    fileList.add(fs.getFileStatus(testFilePath));
    assertEquals(true,
        ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
    fileList.add(fs.getFileStatus(workDir));
    assertEquals(false,
        ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));


    // read the whole file
    org.apache.hadoop.mapred.RecordReader reader =
        in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Writable value = (Writable) reader.createValue();
    int rowNum = 0;
    List<? extends StructField> fields =inspector.getAllStructFieldRefs();
    IntObjectInspector intInspector =
        (IntObjectInspector) fields.get(0).getFieldObjectInspector();
    assertEquals(0.0, reader.getProgress(), 0.00001);
    assertEquals(0, reader.getPos());
    while (reader.next(key, value)) {
      assertEquals(++rowNum, intInspector.get(inspector.
          getStructFieldData(serde.deserialize(value), fields.get(0))));
      assertEquals(2, intInspector.get(inspector.
          getStructFieldData(serde.deserialize(value), fields.get(1))));
    }
    assertEquals(3, rowNum);
    assertEquals(1.0, reader.getProgress(), 0.00001);
    reader.close();

    // read just the first column
    conf.set("hive.io.file.readcolumn.ids", "0");
    reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    key = reader.createKey();
    value = (Writable) reader.createValue();
    rowNum = 0;
    fields = inspector.getAllStructFieldRefs();
    while (reader.next(key, value)) {
      assertEquals(++rowNum, intInspector.get(inspector.
          getStructFieldData(value, fields.get(0))));
      assertEquals(null, inspector.getStructFieldData(value, fields.get(1)));
    }
    assertEquals(3, rowNum);
    reader.close();

    // test the mapping of empty string to all columns
    conf.set("hive.io.file.readcolumn.ids", "");
    reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    key = reader.createKey();
    value = (Writable) reader.createValue();
    rowNum = 0;
    fields = inspector.getAllStructFieldRefs();
    while (reader.next(key, value)) {
      assertEquals(++rowNum, intInspector.get(inspector.
          getStructFieldData(value, fields.get(0))));
      assertEquals(2, intInspector.get(inspector.
          getStructFieldData(serde.deserialize(value), fields.get(1))));
    }
    assertEquals(3, rowNum);
    reader.close();
  }
View Full Code Here

  @Test
  public void testMROutput() throws Exception {
    JobConf job = new JobConf(conf);
    Properties properties = new Properties();
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    SerDe serde = new OrcSerde();
    OutputFormat<?, ?> outFormat = new OrcOutputFormat();
    RecordWriter writer =
        outFormat.getRecordWriter(fs, conf, testFilePath.toString(),
            Reporter.NULL);
    writer.write(NullWritable.get(),
        serde.serialize(new NestedRow(1,2,3), inspector));
    writer.write(NullWritable.get(),
        serde.serialize(new NestedRow(4,5,6), inspector));
    writer.write(NullWritable.get(),
        serde.serialize(new NestedRow(7,8,9), inspector));
    writer.close(Reporter.NULL);
    serde = new OrcSerde();
    properties.setProperty("columns", "z,r");
    properties.setProperty("columns.types", "int:struct<x:int,y:int>");
    serde.initialize(conf, properties);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    InputFormat<?,?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);
    conf.set("hive.io.file.readcolumn.ids", "1");
    org.apache.hadoop.mapred.RecordReader reader =
        in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Object value = reader.createValue();
    int rowNum = 0;
    List<? extends StructField> fields = inspector.getAllStructFieldRefs();
    StructObjectInspector inner = (StructObjectInspector)
        fields.get(1).getFieldObjectInspector();
    List<? extends StructField> inFields = inner.getAllStructFieldRefs();
    IntObjectInspector intInspector =
        (IntObjectInspector) inFields.get(0).getFieldObjectInspector();
    while (reader.next(key, value)) {
      assertEquals(null, inspector.getStructFieldData(value, fields.get(0)));
      Object sub = inspector.getStructFieldData(value, fields.get(1));
      assertEquals(3*rowNum+1, intInspector.get(inner.getStructFieldData(sub,
          inFields.get(0))));
      assertEquals(3*rowNum+2, intInspector.get(inner.getStructFieldData(sub,
          inFields.get(1))));
      rowNum += 1;
    }
    assertEquals(3, rowNum);
    reader.close();
View Full Code Here

  public void testMROutput2() throws Exception {
    JobConf job = new JobConf(conf);
    // Test that you can set the output directory using this config
    job.set("mapred.work.output.dir", testFilePath.getParent().toString());
    Properties properties = new Properties();
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    SerDe serde = new OrcSerde();
    OutputFormat<?, ?> outFormat = new OrcOutputFormat();
    RecordWriter writer =
        outFormat.getRecordWriter(fs, job, testFilePath.getName(),
            Reporter.NULL);
    writer.write(NullWritable.get(),
        serde.serialize(new StringRow("a"), inspector));
    writer.close(Reporter.NULL);
    serde = new OrcSerde();
    properties.setProperty("columns", "col");
    properties.setProperty("columns.types", "string");
    serde.initialize(conf, properties);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    InputFormat<?,?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);
    org.apache.hadoop.mapred.RecordReader reader =
        in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Object value = reader.createValue();
    int rowNum = 0;
    List<? extends StructField> fields = inspector.getAllStructFieldRefs();
    reader.next(key, value);
    assertEquals("a",
        ((StringObjectInspector) fields.get(0).getFieldObjectInspector()).getPrimitiveJavaObject(
            inspector.getStructFieldData(value, fields.get(0))));
    reader.close();

  }
View Full Code Here

  @Test
  public void testDefaultTypes() throws Exception {
    JobConf job = new JobConf(conf);
    Properties properties = new Properties();
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    SerDe serde = new OrcSerde();
    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    FileSinkOperator.RecordWriter writer =
        outFormat.getHiveRecordWriter(conf, testFilePath, StringRow.class,
            true, properties, Reporter.NULL);
    writer.write(serde.serialize(new StringRow("owen"), inspector));
    writer.write(serde.serialize(new StringRow("beth"), inspector));
    writer.write(serde.serialize(new StringRow("laurel"), inspector));
    writer.write(serde.serialize(new StringRow("hazen"), inspector));
    writer.write(serde.serialize(new StringRow("colin"), inspector));
    writer.write(serde.serialize(new StringRow("miles"), inspector));
    writer.close(true);
    serde = new OrcSerde();
    properties.setProperty("columns", "str,str2");
    serde.initialize(conf, properties);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    assertEquals("struct<str:string,str2:string>", inspector.getTypeName());
    InputFormat<?,?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);

    // read the whole file
    org.apache.hadoop.mapred.RecordReader reader =
        in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Writable value = (Writable) reader.createValue();
    List<? extends StructField> fields =inspector.getAllStructFieldRefs();
    StringObjectInspector strInspector = (StringObjectInspector)
        fields.get(0).getFieldObjectInspector();
    assertEquals(true, reader.next(key, value));
    assertEquals("owen", strInspector.getPrimitiveJavaObject(inspector.
        getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("beth", strInspector.getPrimitiveJavaObject(inspector.
        getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("laurel", strInspector.getPrimitiveJavaObject(inspector.
        getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("hazen", strInspector.getPrimitiveJavaObject(inspector.
        getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("colin", strInspector.getPrimitiveJavaObject(inspector.
        getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("miles", strInspector.getPrimitiveJavaObject(inspector.
        getStructFieldData(value, fields.get(0))));
    assertEquals(false, reader.next(key, value));
    reader.close();
  }
View Full Code Here

  @Test
  public void testNullFileSystem() throws Exception {
    conf.set("mapred.work.output.dir", testFilePath.getParent().toString());
    JobConf job = new JobConf(conf);
    Properties properties = new Properties();
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    OrcSerde serde = new OrcSerde();
    OrcOutputFormat outFormat = new OrcOutputFormat();
    RecordWriter<NullWritable, OrcSerdeRow> writer =
        outFormat.getRecordWriter(null, conf, testFilePath.getName(), Reporter.NULL);

    writer.write(NullWritable.get(), (OrcSerdeRow) serde.serialize(new StringRow("a"), inspector));
    writer.write(NullWritable.get(), (OrcSerdeRow) serde.serialize(new StringRow("b"), inspector));
    writer.write(NullWritable.get(), (OrcSerdeRow) serde.serialize(new StringRow("c"), inspector));
    writer.close(Reporter.NULL);
    serde = new OrcSerde();
    properties.setProperty("columns", "str,str2");
    serde.initialize(conf, properties);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    OrcInputFormat in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);

    // read the whole file
    org.apache.hadoop.mapred.RecordReader<NullWritable, OrcLazyRow> reader =
        in.getRecordReader(splits[0], conf, Reporter.NULL);
    NullWritable key = reader.createKey();
    OrcLazyRow value = (OrcLazyRow) reader.createValue();
    List<? extends StructField> fields =inspector.getAllStructFieldRefs();
    StringObjectInspector strInspector = (StringObjectInspector)
        fields.get(0).getFieldObjectInspector();
    assertEquals(true, reader.next(key, value));
    assertEquals("a", strInspector.getPrimitiveJavaObject(inspector.
        getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("b", strInspector.getPrimitiveJavaObject(inspector.
        getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("c", strInspector.getPrimitiveJavaObject(inspector.
        getStructFieldData(value, fields.get(0))));
    assertEquals(false, reader.next(key, value));
    reader.close();
  }
View Full Code Here

    TypeInfo typeInfo =
        TypeInfoUtils.getTypeInfoFromTypeString("struct<c1:boolean,c2:tinyint" +
            ",c3:smallint,c4:int,c5:bigint,c6:float,c7:double,c8:binary," +
            "c9:string,c10:struct<c1:int>,c11:map<int,int>,c12:uniontype<int>" +
            ",c13:array<timestamp>>");
    StructObjectInspector inspector = (StructObjectInspector)
        OrcLazyObjectInspectorUtils.createWritableObjectInspector(typeInfo);
    assertEquals("struct<c1:boolean,c2:tinyint,c3:smallint,c4:int,c5:" +
        "bigint,c6:float,c7:double,c8:binary,c9:string,c10:struct<" +
        "c1:int>,c11:map<int,int>,c12:uniontype<int>,c13:array<timestamp>>",
        inspector.getTypeName());
    assertEquals(null,
        inspector.getAllStructFieldRefs().get(0).getFieldComment());
    assertEquals(null, inspector.getStructFieldRef("UNKNOWN"));
    final List<String> fieldNames = ImmutableList.of(
        "field0", "field1", "field2", "field3", "field4",
        "field5", "field6", "field7", "field8", "field9",
        "field10", "field11", "field12");

    OrcStruct s1 = new OrcStruct(fieldNames);
    for(int i=0; i < 13; ++i) {
      s1.setFieldValue(i, i);
    }

    final List<Object> list = ImmutableList.of(
        (Object)0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
    assertEquals(list, inspector.getStructFieldsDataAsList(s1));
    ListObjectInspector listOI = (ListObjectInspector)
        inspector.getAllStructFieldRefs().get(12).getFieldObjectInspector();
    assertEquals(ObjectInspector.Category.LIST, listOI.getCategory());
    assertEquals(10, listOI.getListElement(list, 10));
    assertEquals(13, listOI.getListLength(list));

    final Map<Integer, Integer> map = ImmutableMap.of(1,2,
                                                2,4,
                                                3,6);
    MapObjectInspector mapOI = (MapObjectInspector)
        inspector.getAllStructFieldRefs().get(10).getFieldObjectInspector();
    assertEquals(3, mapOI.getMapSize(map));
    assertEquals(4, mapOI.getMapValueElement(map, 2));
  }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.