Examples of SerDe


Examples of org.apache.hadoop.hive.serde2.SerDe

    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    SerDe serde = new OrcSerde();
    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    FileSinkOperator.RecordWriter writer =
        outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
            properties, Reporter.NULL);
    ReaderWriterProfiler.setProfilerOptions(conf);
    writer.write(serde.serialize(new MyRow(1,2), inspector));
    writer.write(serde.serialize(new MyRow(2,2), inspector));
    writer.write(serde.serialize(new MyRow(3,2), inspector));
    writer.close(true);
    serde = new OrcSerde();
    properties.setProperty("columns", "x,y");
    properties.setProperty("columns.types", "int:int");
    serde.initialize(conf, properties);
    assertEquals(OrcSerde.OrcSerdeRow.class, serde.getSerializedClass());
    inspector = (StructObjectInspector) serde.getObjectInspector();
    assertEquals("struct<x:int,y:int>", inspector.getTypeName());
    InputFormat<?,?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);

    // the the validate input method
    ArrayList<FileStatus> fileList = new ArrayList<FileStatus>();
    assertEquals(false,
        ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
    fileList.add(fs.getFileStatus(testFilePath));
    assertEquals(true,
        ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
    fileList.add(fs.getFileStatus(workDir));
    assertEquals(false,
        ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));


    // read the whole file
    org.apache.hadoop.mapred.RecordReader reader =
        in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Writable value = (Writable) reader.createValue();
    int rowNum = 0;
    List<? extends StructField> fields =inspector.getAllStructFieldRefs();
    IntObjectInspector intInspector =
        (IntObjectInspector) fields.get(0).getFieldObjectInspector();
    assertEquals(0.0, reader.getProgress(), 0.00001);
    assertEquals(0, reader.getPos());
    while (reader.next(key, value)) {
      assertEquals(++rowNum, intInspector.get(inspector.
          getStructFieldData(serde.deserialize(value), fields.get(0))));
      assertEquals(2, intInspector.get(inspector.
          getStructFieldData(serde.deserialize(value), fields.get(1))));
    }
    assertEquals(3, rowNum);
    assertEquals(1.0, reader.getProgress(), 0.00001);
    reader.close();

    // read just the first column
    conf.set("hive.io.file.readcolumn.ids", "0");
    reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    key = reader.createKey();
    value = (Writable) reader.createValue();
    rowNum = 0;
    fields = inspector.getAllStructFieldRefs();
    while (reader.next(key, value)) {
      assertEquals(++rowNum, intInspector.get(inspector.
          getStructFieldData(value, fields.get(0))));
      assertEquals(null, inspector.getStructFieldData(value, fields.get(1)));
    }
    assertEquals(3, rowNum);
    reader.close();

    // test the mapping of empty string to all columns
    conf.set("hive.io.file.readcolumn.ids", "");
    reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    key = reader.createKey();
    value = (Writable) reader.createValue();
    rowNum = 0;
    fields = inspector.getAllStructFieldRefs();
    while (reader.next(key, value)) {
      assertEquals(++rowNum, intInspector.get(inspector.
          getStructFieldData(value, fields.get(0))));
      assertEquals(2, intInspector.get(inspector.
          getStructFieldData(serde.deserialize(value), fields.get(1))));
    }
    assertEquals(3, rowNum);
    reader.close();
  }
View Full Code Here

Examples of org.apache.hadoop.hive.serde2.SerDe

    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    SerDe serde = new OrcSerde();
    OutputFormat<?, ?> outFormat = new OrcOutputFormat();
    RecordWriter writer =
        outFormat.getRecordWriter(fs, conf, testFilePath.toString(),
            Reporter.NULL);
    writer.write(NullWritable.get(),
        serde.serialize(new NestedRow(1,2,3), inspector));
    writer.write(NullWritable.get(),
        serde.serialize(new NestedRow(4,5,6), inspector));
    writer.write(NullWritable.get(),
        serde.serialize(new NestedRow(7,8,9), inspector));
    writer.close(Reporter.NULL);
    serde = new OrcSerde();
    properties.setProperty("columns", "z,r");
    properties.setProperty("columns.types", "int:struct<x:int,y:int>");
    serde.initialize(conf, properties);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    InputFormat<?,?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);
    conf.set("hive.io.file.readcolumn.ids", "1");
View Full Code Here

Examples of org.apache.hadoop.hive.serde2.SerDe

        outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
            properties, Reporter.NULL);
    writer.close(true);
    properties.setProperty("columns", "x,y");
    properties.setProperty("columns.types", "int:int");
    SerDe serde = new OrcSerde();
    serde.initialize(conf, properties);
    InputFormat<?,?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);

    // read the whole file
    conf.set("hive.io.file.readcolumn.ids", "0,1");
    org.apache.hadoop.mapred.RecordReader reader =
        in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Object value = reader.createValue();
    assertEquals(0.0, reader.getProgress(), 0.00001);
    assertEquals(0, reader.getPos());
    assertEquals(false, reader.next(key, value));
    reader.close();
    assertEquals(null, serde.getSerDeStats());
  }
View Full Code Here

Examples of org.apache.hadoop.hive.serde2.SerDe

    synchronized (TestOrcFile.class) {
      inspector = (StructObjectInspector)
          ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    SerDe serde = new OrcSerde();
    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    FileSinkOperator.RecordWriter writer =
        outFormat.getHiveRecordWriter(conf, testFilePath, StringRow.class,
            true, properties, Reporter.NULL);
    writer.write(serde.serialize(new StringRow("owen"), inspector));
    writer.write(serde.serialize(new StringRow("beth"), inspector));
    writer.write(serde.serialize(new StringRow("laurel"), inspector));
    writer.write(serde.serialize(new StringRow("hazen"), inspector));
    writer.write(serde.serialize(new StringRow("colin"), inspector));
    writer.write(serde.serialize(new StringRow("miles"), inspector));
    writer.close(true);
    serde = new OrcSerde();
    properties.setProperty("columns", "str,str2");
    serde.initialize(conf, properties);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    assertEquals("struct<str:string,str2:string>", inspector.getTypeName());
    InputFormat<?,?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);
View Full Code Here

Examples of org.apache.hadoop.hive.serde2.SerDe

    StructObjectInspector rowOI1 = (StructObjectInspector)ObjectInspectorFactory
    .getReflectionObjectInspector(MyTestClassBigger.class,
         ObjectInspectorOptions.JAVA);   
    String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
    String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);         
    SerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
    ObjectInspector serdeOI1 = serde1.getObjectInspector();
   
    StructObjectInspector rowOI2 = (StructObjectInspector)ObjectInspectorFactory
    .getReflectionObjectInspector(MyTestClass.class,
         ObjectInspectorOptions.JAVA);   
    String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
    String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);         
    SerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
    ObjectInspector serdeOI2 = serde2.getObjectInspector();
   
    int num = 100;
    for (int itest=0; itest<num; itest++) {
      int randField = r.nextInt(11);
      Byte b    = randField > 0 ? null : Byte.valueOf((byte)r.nextInt());
      Short s   = randField > 1 ? null : Short.valueOf((short)r.nextInt());
      Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt());
      Long l    = randField > 3 ? null : Long.valueOf(r.nextLong());
      Float f   = randField > 4 ? null : Float.valueOf(r.nextFloat());
      Double d  = randField > 5 ? null : Double.valueOf(r.nextDouble());
      String st = randField > 6 ? null : TestBinarySortableSerDe.getRandString(r);
      MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r.nextInt(5)-2, r.nextInt(5)-2);
      List<Integer> li = randField > 8 ? null: TestBinarySortableSerDe.getRandIntegerArray(r);
      Map<String, List<MyTestInnerStruct>> mp = new HashMap<String, List<MyTestInnerStruct>>();
      String key = TestBinarySortableSerDe.getRandString(r);
      List<MyTestInnerStruct> value = randField > 10 ? null: getRandStructArray(r);
      mp.put(key, value);
      String key1 = TestBinarySortableSerDe.getRandString(r);   
      mp.put(key1, null);
      String key2 = TestBinarySortableSerDe.getRandString(r);
      List<MyTestInnerStruct> value2 = getRandStructArray(r);
      mp.put(key2, value2);
     
      MyTestClassBigger input = new MyTestClassBigger(b,s,n,l,f,d,st,is,li,mp);  
      BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1);   
      Object output = serde2.deserialize(bw);   
     
      if(0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) {
        System.out.println("structs      = " + SerDeUtils.getJSONString(input, rowOI1));
        System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
        System.out.println("serialized   = " + TestBinarySortableSerDe.hexString(bw));
View Full Code Here

Examples of org.apache.hadoop.hive.serde2.SerDe

    StructObjectInspector rowOI1 = (StructObjectInspector)ObjectInspectorFactory
    .getReflectionObjectInspector(MyTestClass.class,
         ObjectInspectorOptions.JAVA);   
    String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
    String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);         
    SerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
    ObjectInspector serdeOI1 = serde1.getObjectInspector();

    StructObjectInspector rowOI2 = (StructObjectInspector)ObjectInspectorFactory
    .getReflectionObjectInspector(MyTestClassSmaller.class,
         ObjectInspectorOptions.JAVA);   
    String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
    String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);         
    SerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
    ObjectInspector serdeOI2 = serde2.getObjectInspector();

    int num = 100;
    for (int itest=0; itest<num; itest++) {
      int randField = r.nextInt(10);
      Byte b    = randField > 0 ? null : Byte.valueOf((byte)r.nextInt());
      Short s   = randField > 1 ? null : Short.valueOf((short)r.nextInt());
      Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt());
      Long l    = randField > 3 ? null : Long.valueOf(r.nextLong());
      Float f   = randField > 4 ? null : Float.valueOf(r.nextFloat());
      Double d  = randField > 5 ? null : Double.valueOf(r.nextDouble());
      String st = randField > 6 ? null : TestBinarySortableSerDe.getRandString(r);
      MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r.nextInt(5)-2, r.nextInt(5)-2);
      List<Integer> li = randField > 8 ? null: TestBinarySortableSerDe.getRandIntegerArray(r);
     
      MyTestClass input = new MyTestClass(b,s,n,l,f,d,st,is,li);  
      BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1);
      Object output = serde2.deserialize(bw);   
     
      if(0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) {
        System.out.println("structs      = " + SerDeUtils.getJSONString(input, rowOI1));
        System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
        System.out.println("serialized   = " + TestBinarySortableSerDe.hexString(bw));
View Full Code Here

Examples of org.apache.hadoop.hive.serde2.SerDe

    StructObjectInspector rowOI1 = (StructObjectInspector)ObjectInspectorFactory
    .getReflectionObjectInspector(MyTestClass.class,
         ObjectInspectorOptions.JAVA);   
    String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
    String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);         
    SerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
    ObjectInspector serdeOI1 = serde1.getObjectInspector();

    StructObjectInspector rowOI2 = (StructObjectInspector)ObjectInspectorFactory
    .getReflectionObjectInspector(MyTestClassBigger.class,
         ObjectInspectorOptions.JAVA);   
    String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
    String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);         
    SerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
    ObjectInspector serdeOI2 = serde2.getObjectInspector();

    int num = 100;
    for (int itest=0; itest<num; itest++) {
      int randField = r.nextInt(10);
      Byte b    = randField > 0 ? null : Byte.valueOf((byte)r.nextInt());
      Short s   = randField > 1 ? null : Short.valueOf((short)r.nextInt());
      Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt());
      Long l    = randField > 3 ? null : Long.valueOf(r.nextLong());
      Float f   = randField > 4 ? null : Float.valueOf(r.nextFloat());
      Double d  = randField > 5 ? null : Double.valueOf(r.nextDouble());
      String st = randField > 6 ? null : TestBinarySortableSerDe.getRandString(r);
      MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r.nextInt(5)-2, r.nextInt(5)-2);
      List<Integer> li = randField > 8 ? null: TestBinarySortableSerDe.getRandIntegerArray(r);
     
      MyTestClass input = new MyTestClass(b,s,n,l,f,d,st,is,li);  
      BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1);
      Object output = serde2.deserialize(bw);   
     
      if (0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) {
        System.out.println("structs      = " + SerDeUtils.getJSONString(input, rowOI1));
        System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
        System.out.println("serialized   = " + TestBinarySortableSerDe.hexString(bw));  
View Full Code Here

Examples of org.apache.hadoop.hive.serde2.SerDe

    StructObjectInspector rowOI1 = (StructObjectInspector)ObjectInspectorFactory
    .getReflectionObjectInspector(MyTestClassSmaller.class,
         ObjectInspectorOptions.JAVA);   
    String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
    String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);         
    SerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
    ObjectInspector serdeOI1 = serde1.getObjectInspector();

    StructObjectInspector rowOI2 = (StructObjectInspector)ObjectInspectorFactory
    .getReflectionObjectInspector(MyTestClass.class,
         ObjectInspectorOptions.JAVA);   
    String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
    String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);         
    SerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
    ObjectInspector serdeOI2 = serde2.getObjectInspector();

    int num = 100;
    for (int itest=0; itest<num; itest++) {
      int randField = r.nextInt(9);
      Byte b    = randField > 0 ? null : Byte.valueOf((byte)r.nextInt());
      Short s   = randField > 1 ? null : Short.valueOf((short)r.nextInt());
      Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt());
      Long l    = randField > 3 ? null : Long.valueOf(r.nextLong());
      Float f   = randField > 4 ? null : Float.valueOf(r.nextFloat());
      Double d  = randField > 5 ? null : Double.valueOf(r.nextDouble());
      String st = randField > 6 ? null : TestBinarySortableSerDe.getRandString(r);
      MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r.nextInt(5)-2, r.nextInt(5)-2);

      MyTestClassSmaller input = new MyTestClassSmaller(b,s,n,l,f,d,st,is);  
      BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1);      
      Object output = serde2.deserialize(bw);   
     
      if (0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) {
        System.out.println("structs      = " + SerDeUtils.getJSONString(input, rowOI1));
        System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
        System.out.println("serialized   = " + TestBinarySortableSerDe.hexString(bw));  
View Full Code Here

Examples of org.apache.hadoop.hive.serde2.SerDe

    StructObjectInspector rowOI = (StructObjectInspector)ObjectInspectorFactory
    .getReflectionObjectInspector(MyTestClassBigger.class,
         ObjectInspectorOptions.JAVA);   
    String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI);
    String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI);         
    SerDe serde = getSerDe(fieldNames, fieldTypes);
    ObjectInspector serdeOI = serde.getObjectInspector();

    StructObjectInspector soi1 = (StructObjectInspector)serdeOI;
    List<? extends StructField> fields1 = soi1.getAllStructFieldRefs();
    LazyBinaryMapObjectInspector lazympoi = (LazyBinaryMapObjectInspector) fields1.get(9).getFieldObjectInspector();
    ObjectInspector lazympkeyoi   = lazympoi.getMapKeyObjectInspector();
    ObjectInspector lazympvalueoi = lazympoi.getMapValueObjectInspector();   
   
    StructObjectInspector soi2 = (StructObjectInspector)rowOI;
    List<? extends StructField> fields2 = soi2.getAllStructFieldRefs();   
    MapObjectInspector inputmpoi = (MapObjectInspector) fields2.get(9).getFieldObjectInspector();
    ObjectInspector inputmpkeyoi = inputmpoi.getMapKeyObjectInspector();
    ObjectInspector inputmpvalueoi = inputmpoi.getMapValueObjectInspector();

    int num = 100;
    for (int testi=0; testi<num; testi++) {

      Map<String, List<MyTestInnerStruct>> mp = new LinkedHashMap<String, List<MyTestInnerStruct>>();
     
      int randFields = r.nextInt(10);
      for (int i=0; i<randFields; i++) {
        String key = TestBinarySortableSerDe.getRandString(r);
        int randField = r.nextInt(10);
        List<MyTestInnerStruct> value = randField > 4 ? null: getRandStructArray(r);
        mp.put(key, value);       
      }
           
      MyTestClassBigger input = new MyTestClassBigger(null,null,null,null,null,null,null,null,null,mp);  
      BytesWritable bw = (BytesWritable) serde.serialize(input, rowOI);   
      Object output = serde.deserialize(bw);     
      Object lazyobj = soi1.getStructFieldData(output, fields1.get(9));
      Map<?, ?> outputmp = lazympoi.getMap(lazyobj);

      if (outputmp.size() != mp.size()) {
        throw new RuntimeException("Map size changed from " + mp.size() + " to " + outputmp.size() + " after serialization!");
View Full Code Here

Examples of org.apache.hadoop.hive.serde2.SerDe

      if (tag != posBigTable) {
        if (firstRow) {
          metadataKeyTag = nextVal++;
         
          tableDesc keyTableDesc = conf.getKeyTblDesc();
          SerDe keySerializer = (SerDe)ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
          keySerializer.initialize(null, keyTableDesc.getProperties());

          mapMetadata.put(Integer.valueOf(metadataKeyTag),
              new MapJoinObjectCtx(
                  ObjectInspectorUtils.getStandardObjectInspector(keySerializer.getObjectInspector(),
                      ObjectInspectorCopyOption.WRITABLE),
                  keySerializer));
         
          firstRow = false;
        }
       
        // Send some status perodically
        numMapRowsRead++;
        if (((numMapRowsRead % heartbeatInterval) == 0) && (reporter != null))
          reporter.progress();

        HTree hashTable = mapJoinTables.get(alias);
        MapJoinObjectKey keyMap = new MapJoinObjectKey(metadataKeyTag, key);
        MapJoinObjectValue o = (MapJoinObjectValue)hashTable.get(keyMap);
        ArrayList<ArrayList<Object>> res = null;
       
        if (o == null) {
          res = new ArrayList<ArrayList<Object>>();
        }
        else {
          res = o.getObj();
        }
       
        res.add(value);
 
        if (metadataValueTag[tag] == -1) {
          metadataValueTag[tag] = nextVal++;
                   
          tableDesc valueTableDesc = conf.getValueTblDescs().get(tag);
          SerDe valueSerDe = (SerDe)ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null);
          valueSerDe.initialize(null, valueTableDesc.getProperties());
          mapMetadata.put(Integer.valueOf(metadataValueTag[tag]),
              new MapJoinObjectCtx(
                  ObjectInspectorUtils.getStandardObjectInspector(valueSerDe.getObjectInspector(),
                      ObjectInspectorCopyOption.WRITABLE),
              valueSerDe));
        }
       
        // Construct externalizable objects for key and value
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.