Package org.apache.hadoop.hive.serde2.lazybinary

Source Code of org.apache.hadoop.hive.serde2.lazybinary.TestLazyBinarySerDe

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.serde2.lazybinary;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Random;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde.Constants;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass;
import org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct;
import org.apache.hadoop.hive.serde2.binarysortable.TestBinarySortableSerDe;
import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
import org.apache.hadoop.io.BytesWritable;

import junit.framework.TestCase;

public class TestLazyBinarySerDe  extends TestCase {
   
  /**
   * Generate a random struct array
   * @param r   random number generator
   * @return    an struct array
   */
  static List<MyTestInnerStruct> getRandStructArray(Random r) {
    int length = r.nextInt(10);
    ArrayList<MyTestInnerStruct> result = new ArrayList<MyTestInnerStruct>(length);
    for(int i=0; i<length; i++) {
      MyTestInnerStruct ti = new MyTestInnerStruct(r.nextInt(), r.nextInt());
      result.add(ti);
    }
    return result;   
  }
 
  /**
   * Initialize the LazyBinarySerDe
   * @param fieldNames    table field names
   * @param fieldTypes    table field types
   * @return              the initialized LazyBinarySerDe
   * @throws Throwable
   */
  private SerDe getSerDe(String fieldNames, String fieldTypes) throws Throwable {
    Properties schema = new Properties();
    schema.setProperty(Constants.LIST_COLUMNS, fieldNames);
    schema.setProperty(Constants.LIST_COLUMN_TYPES, fieldTypes);

    LazyBinarySerDe serde = new LazyBinarySerDe();
    serde.initialize(new Configuration(), schema);
    return serde;
  }
 
  /**
   * Test the LazyBinarySerDe.
   * @param rows        array of structs to be serialized
   * @param rowOI       array of struct object inspectors
   * @param serde       the serde
   * @throws Throwable 
   */
  private void testLazyBinarySerDe(Object[] rows,
      ObjectInspector rowOI, SerDe serde) throws Throwable{
   
    ObjectInspector serdeOI = serde.getObjectInspector();

    // Try to serialize
    BytesWritable bytes[] = new BytesWritable[rows.length];
    for (int i=0; i<rows.length; i++) {
      BytesWritable s = (BytesWritable)serde.serialize(rows[i], rowOI);
      bytes[i] = new BytesWritable();
      bytes[i].set(s);
    }

    // Try to deserialize
    Object[] deserialized = new Object[rows.length];
    for (int i=0; i<rows.length; i++) {
      deserialized[i] = serde.deserialize(bytes[i]);
      if (0 != ObjectInspectorUtils.compare(rows[i], rowOI, deserialized[i], serdeOI)) {
        System.out.println("structs[" + i + "] = " + SerDeUtils.getJSONString(rows[i], rowOI));
        System.out.println("deserialized[" + i + "] = " + SerDeUtils.getJSONString(deserialized[i], serdeOI));
        System.out.println("serialized[" + i + "] = " + TestBinarySortableSerDe.hexString(bytes[i]));
        assertEquals(rows[i], deserialized[i]);
      }
    }
  }
 
  /**
   * Compare two structs that have different number of fields.
   * We just compare the first few common fields, ignoring the
   * fields existing in one struct but not the other.
   *
   * @see ObjectInspectorUtils#compare(Object, ObjectInspector, Object, ObjectInspector)
   */
  int compareDiffSizedStructs(Object o1, ObjectInspector oi1, Object o2, ObjectInspector oi2) {   
    StructObjectInspector soi1 = (StructObjectInspector)oi1;
    StructObjectInspector soi2 = (StructObjectInspector)oi2;
    List<? extends StructField> fields1 = soi1.getAllStructFieldRefs();
    List<? extends StructField> fields2 = soi2.getAllStructFieldRefs();
    int minimum = Math.min(fields1.size(), fields2.size());
    for (int i=0; i<minimum; i++) {
      int result = ObjectInspectorUtils.compare(
          soi1.getStructFieldData(o1, fields1.get(i)),
          fields1.get(i).getFieldObjectInspector(),
          soi2.getStructFieldData(o2, fields2.get(i)),
          fields2.get(i).getFieldObjectInspector());
      if (result != 0) {
        return result;
      }
    }
    return 0;
  }
 
 
  /**
   * Test shorter schema deserialization where a bigger struct is serialized
   * and it is then deserialized with a smaller struct.
   * Here the serialized struct has 10 fields and we deserialized to a
   * struct of 9 fields.
   */
  private void testShorterSchemaDeserialization(Random r) throws Throwable{
   
    StructObjectInspector rowOI1 = (StructObjectInspector)ObjectInspectorFactory
    .getReflectionObjectInspector(MyTestClassBigger.class,
         ObjectInspectorOptions.JAVA);   
    String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
    String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);         
    SerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
    ObjectInspector serdeOI1 = serde1.getObjectInspector();
   
    StructObjectInspector rowOI2 = (StructObjectInspector)ObjectInspectorFactory
    .getReflectionObjectInspector(MyTestClass.class,
         ObjectInspectorOptions.JAVA);   
    String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
    String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);         
    SerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
    ObjectInspector serdeOI2 = serde2.getObjectInspector();
   
    int num = 100;
    for (int itest=0; itest<num; itest++) {
      int randField = r.nextInt(11);
      Byte b    = randField > 0 ? null : Byte.valueOf((byte)r.nextInt());
      Short s   = randField > 1 ? null : Short.valueOf((short)r.nextInt());
      Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt());
      Long l    = randField > 3 ? null : Long.valueOf(r.nextLong());
      Float f   = randField > 4 ? null : Float.valueOf(r.nextFloat());
      Double d  = randField > 5 ? null : Double.valueOf(r.nextDouble());
      String st = randField > 6 ? null : TestBinarySortableSerDe.getRandString(r);
      MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r.nextInt(5)-2, r.nextInt(5)-2);
      List<Integer> li = randField > 8 ? null: TestBinarySortableSerDe.getRandIntegerArray(r);
      Map<String, List<MyTestInnerStruct>> mp = new HashMap<String, List<MyTestInnerStruct>>();
      String key = TestBinarySortableSerDe.getRandString(r);
      List<MyTestInnerStruct> value = randField > 10 ? null: getRandStructArray(r);
      mp.put(key, value);
      String key1 = TestBinarySortableSerDe.getRandString(r);   
      mp.put(key1, null);
      String key2 = TestBinarySortableSerDe.getRandString(r);
      List<MyTestInnerStruct> value2 = getRandStructArray(r);
      mp.put(key2, value2);
     
      MyTestClassBigger input = new MyTestClassBigger(b,s,n,l,f,d,st,is,li,mp);  
      BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1);   
      Object output = serde2.deserialize(bw);   
     
      if(0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) {
        System.out.println("structs      = " + SerDeUtils.getJSONString(input, rowOI1));
        System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
        System.out.println("serialized   = " + TestBinarySortableSerDe.hexString(bw));
        assertEquals(input, output);
      }
    }
  }
 
  /**
   * Test shorter schema deserialization where a bigger struct is serialized
   * and it is then deserialized with a smaller struct.
   * Here the serialized struct has 9 fields and we deserialized to a
   * struct of 8 fields.
   */
  private void testShorterSchemaDeserialization1(Random r) throws Throwable{
   
    StructObjectInspector rowOI1 = (StructObjectInspector)ObjectInspectorFactory
    .getReflectionObjectInspector(MyTestClass.class,
         ObjectInspectorOptions.JAVA);   
    String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
    String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);         
    SerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
    ObjectInspector serdeOI1 = serde1.getObjectInspector();

    StructObjectInspector rowOI2 = (StructObjectInspector)ObjectInspectorFactory
    .getReflectionObjectInspector(MyTestClassSmaller.class,
         ObjectInspectorOptions.JAVA);   
    String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
    String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);         
    SerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
    ObjectInspector serdeOI2 = serde2.getObjectInspector();

    int num = 100;
    for (int itest=0; itest<num; itest++) {
      int randField = r.nextInt(10);
      Byte b    = randField > 0 ? null : Byte.valueOf((byte)r.nextInt());
      Short s   = randField > 1 ? null : Short.valueOf((short)r.nextInt());
      Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt());
      Long l    = randField > 3 ? null : Long.valueOf(r.nextLong());
      Float f   = randField > 4 ? null : Float.valueOf(r.nextFloat());
      Double d  = randField > 5 ? null : Double.valueOf(r.nextDouble());
      String st = randField > 6 ? null : TestBinarySortableSerDe.getRandString(r);
      MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r.nextInt(5)-2, r.nextInt(5)-2);
      List<Integer> li = randField > 8 ? null: TestBinarySortableSerDe.getRandIntegerArray(r);
     
      MyTestClass input = new MyTestClass(b,s,n,l,f,d,st,is,li);  
      BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1);
      Object output = serde2.deserialize(bw);   
     
      if(0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) {
        System.out.println("structs      = " + SerDeUtils.getJSONString(input, rowOI1));
        System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
        System.out.println("serialized   = " + TestBinarySortableSerDe.hexString(bw));
        assertEquals(input, output);
      }
    }
  }
 
  /**
   * Test longer schema deserialization where a smaller struct is serialized
   * and it is then deserialized with a bigger struct
   * Here the serialized struct has 9 fields and we deserialized to a
   * struct of 10 fields.
   */
  void testLongerSchemaDeserialization(Random r) throws Throwable{
   
    StructObjectInspector rowOI1 = (StructObjectInspector)ObjectInspectorFactory
    .getReflectionObjectInspector(MyTestClass.class,
         ObjectInspectorOptions.JAVA);   
    String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
    String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);         
    SerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
    ObjectInspector serdeOI1 = serde1.getObjectInspector();

    StructObjectInspector rowOI2 = (StructObjectInspector)ObjectInspectorFactory
    .getReflectionObjectInspector(MyTestClassBigger.class,
         ObjectInspectorOptions.JAVA);   
    String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
    String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);         
    SerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
    ObjectInspector serdeOI2 = serde2.getObjectInspector();

    int num = 100;
    for (int itest=0; itest<num; itest++) {
      int randField = r.nextInt(10);
      Byte b    = randField > 0 ? null : Byte.valueOf((byte)r.nextInt());
      Short s   = randField > 1 ? null : Short.valueOf((short)r.nextInt());
      Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt());
      Long l    = randField > 3 ? null : Long.valueOf(r.nextLong());
      Float f   = randField > 4 ? null : Float.valueOf(r.nextFloat());
      Double d  = randField > 5 ? null : Double.valueOf(r.nextDouble());
      String st = randField > 6 ? null : TestBinarySortableSerDe.getRandString(r);
      MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r.nextInt(5)-2, r.nextInt(5)-2);
      List<Integer> li = randField > 8 ? null: TestBinarySortableSerDe.getRandIntegerArray(r);
     
      MyTestClass input = new MyTestClass(b,s,n,l,f,d,st,is,li);  
      BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1);
      Object output = serde2.deserialize(bw);   
     
      if (0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) {
        System.out.println("structs      = " + SerDeUtils.getJSONString(input, rowOI1));
        System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
        System.out.println("serialized   = " + TestBinarySortableSerDe.hexString(bw));  
        assertEquals(input, output);
      }
    }
  }
 
  /**
   * Test longer schema deserialization where a smaller struct is serialized
   * and it is then deserialized with a bigger struct
   * Here the serialized struct has 8 fields and we deserialized to a
   * struct of 9 fields.
   */
  void testLongerSchemaDeserialization1(Random r) throws Throwable{

    StructObjectInspector rowOI1 = (StructObjectInspector)ObjectInspectorFactory
    .getReflectionObjectInspector(MyTestClassSmaller.class,
         ObjectInspectorOptions.JAVA);   
    String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
    String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);         
    SerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
    ObjectInspector serdeOI1 = serde1.getObjectInspector();

    StructObjectInspector rowOI2 = (StructObjectInspector)ObjectInspectorFactory
    .getReflectionObjectInspector(MyTestClass.class,
         ObjectInspectorOptions.JAVA);   
    String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
    String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);         
    SerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
    ObjectInspector serdeOI2 = serde2.getObjectInspector();

    int num = 100;
    for (int itest=0; itest<num; itest++) {
      int randField = r.nextInt(9);
      Byte b    = randField > 0 ? null : Byte.valueOf((byte)r.nextInt());
      Short s   = randField > 1 ? null : Short.valueOf((short)r.nextInt());
      Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt());
      Long l    = randField > 3 ? null : Long.valueOf(r.nextLong());
      Float f   = randField > 4 ? null : Float.valueOf(r.nextFloat());
      Double d  = randField > 5 ? null : Double.valueOf(r.nextDouble());
      String st = randField > 6 ? null : TestBinarySortableSerDe.getRandString(r);
      MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r.nextInt(5)-2, r.nextInt(5)-2);

      MyTestClassSmaller input = new MyTestClassSmaller(b,s,n,l,f,d,st,is);  
      BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1);      
      Object output = serde2.deserialize(bw);   
     
      if (0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) {
        System.out.println("structs      = " + SerDeUtils.getJSONString(input, rowOI1));
        System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
        System.out.println("serialized   = " + TestBinarySortableSerDe.hexString(bw));  
        assertEquals(input, output);
      }
    }
  }
 
  void testLazyBinaryMap(Random rthrows Throwable {
   
    StructObjectInspector rowOI = (StructObjectInspector)ObjectInspectorFactory
    .getReflectionObjectInspector(MyTestClassBigger.class,
         ObjectInspectorOptions.JAVA);   
    String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI);
    String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI);         
    SerDe serde = getSerDe(fieldNames, fieldTypes);
    ObjectInspector serdeOI = serde.getObjectInspector();

    StructObjectInspector soi1 = (StructObjectInspector)serdeOI;
    List<? extends StructField> fields1 = soi1.getAllStructFieldRefs();
    LazyBinaryMapObjectInspector lazympoi = (LazyBinaryMapObjectInspector) fields1.get(9).getFieldObjectInspector();
    ObjectInspector lazympkeyoi   = lazympoi.getMapKeyObjectInspector();
    ObjectInspector lazympvalueoi = lazympoi.getMapValueObjectInspector();   
   
    StructObjectInspector soi2 = (StructObjectInspector)rowOI;
    List<? extends StructField> fields2 = soi2.getAllStructFieldRefs();   
    MapObjectInspector inputmpoi = (MapObjectInspector) fields2.get(9).getFieldObjectInspector();
    ObjectInspector inputmpkeyoi = inputmpoi.getMapKeyObjectInspector();
    ObjectInspector inputmpvalueoi = inputmpoi.getMapValueObjectInspector();

    int num = 100;
    for (int testi=0; testi<num; testi++) {

      Map<String, List<MyTestInnerStruct>> mp = new LinkedHashMap<String, List<MyTestInnerStruct>>();
     
      int randFields = r.nextInt(10);
      for (int i=0; i<randFields; i++) {
        String key = TestBinarySortableSerDe.getRandString(r);
        int randField = r.nextInt(10);
        List<MyTestInnerStruct> value = randField > 4 ? null: getRandStructArray(r);
        mp.put(key, value);       
      }
           
      MyTestClassBigger input = new MyTestClassBigger(null,null,null,null,null,null,null,null,null,mp);  
      BytesWritable bw = (BytesWritable) serde.serialize(input, rowOI);   
      Object output = serde.deserialize(bw);     
      Object lazyobj = soi1.getStructFieldData(output, fields1.get(9));
      Map<?, ?> outputmp = lazympoi.getMap(lazyobj);

      if (outputmp.size() != mp.size()) {
        throw new RuntimeException("Map size changed from " + mp.size() + " to " + outputmp.size() + " after serialization!");
      }
     
      for (Map.Entry<?, ?> entryinput: mp.entrySet()) {
        boolean bEqual = false;
        for (Map.Entry<?, ?> entryoutput: outputmp.entrySet()) {
          // find the same key
          if (0 == ObjectInspectorUtils.compare(entryoutput.getKey(), lazympkeyoi, entryinput.getKey(), inputmpkeyoi)) {
            if(0 != ObjectInspectorUtils.compare(entryoutput.getValue(), lazympvalueoi, entryinput.getValue(), inputmpvalueoi)) {
              assertEquals(entryoutput.getValue(), entryinput.getValue());   
            } else {
              bEqual = true;
            }
            break;
          }         
        }
        if(!bEqual)
          throw new RuntimeException("Could not find matched key in deserialized map : " + entryinput.getKey());
      }
    }
  }
 
  /**
   * The test entrance function
   * @throws Throwable
   */
  public void testLazyBinarySerDe() throws Throwable {
    try {

      System.out.println("Beginning Test TestLazyBinarySerDe:");
     
      // generate the data
      int num = 1000;
      Random r = new Random(1234);
      MyTestClass rows[] = new MyTestClass[num];
      for (int i=0; i<num; i++) {
        int randField = r.nextInt(10);
        Byte b    = randField > 0 ? null : Byte.valueOf((byte)r.nextInt());
        Short s   = randField > 1 ? null : Short.valueOf((short)r.nextInt());
        Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt());
        Long l    = randField > 3 ? null : Long.valueOf(r.nextLong());
        Float f   = randField > 4 ? null : Float.valueOf(r.nextFloat());
        Double d  = randField > 5 ? null : Double.valueOf(r.nextDouble());
        String st = randField > 6 ? null : TestBinarySortableSerDe.getRandString(r);
        MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r.nextInt(5)-2, r.nextInt(5)-2);
        List<Integer> li = randField > 8 ? null: TestBinarySortableSerDe.getRandIntegerArray(r);
        MyTestClass t = new MyTestClass(b,s,n,l,f,d,st,is,li);
        rows[i] = t;
      }
     
      StructObjectInspector rowOI = (StructObjectInspector)ObjectInspectorFactory
      .getReflectionObjectInspector(MyTestClass.class,
           ObjectInspectorOptions.JAVA);
     
      String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI);
      String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI);
     
      // call the tests
      // 1/ test LazyBinarySerDe
      testLazyBinarySerDe(rows, rowOI, getSerDe(fieldNames, fieldTypes));
      // 2/ test LazyBinaryMap
      testLazyBinaryMap(r);
      // 3/ test serialization and deserialization with different schemas
      testShorterSchemaDeserialization(r);
      // 4/ test serialization and deserialization with different schemas
      testLongerSchemaDeserialization(r);   
      // 5/ test serialization and deserialization with different schemas
      testShorterSchemaDeserialization1(r);
      // 6/ test serialization and deserialization with different schemas
      testLongerSchemaDeserialization1(r);     
     
      System.out.println("Test TestLazyBinarySerDe passed!");
    } catch (Throwable e) {
      e.printStackTrace();
      throw e;
    }
  }
}
TOP

Related Classes of org.apache.hadoop.hive.serde2.lazybinary.TestLazyBinarySerDe

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.