Package org.apache.hadoop.hive.hbase

Source Code of org.apache.hadoop.hive.hbase.TestHBaseSerDe

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hive.hbase;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import junit.framework.Assert;
import junit.framework.TestCase;

import org.apache.avro.Schema;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.EncoderFactory;
import org.apache.avro.specific.SpecificDatumWriter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hive.hbase.avro.Address;
import org.apache.hadoop.hive.hbase.avro.ContactInfo;
import org.apache.hadoop.hive.hbase.avro.Employee;
import org.apache.hadoop.hive.hbase.avro.Gender;
import org.apache.hadoop.hive.hbase.avro.HomePhone;
import org.apache.hadoop.hive.hbase.avro.OfficePhone;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.serde2.lazy.LazyStruct;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.thrift.TException;

/**
* Tests the HBaseSerDe class.
*/
public class TestHBaseSerDe extends TestCase {

  static final byte[] TEST_BYTE_ARRAY = Bytes.toBytes("test");

  private static final String RECORD_SCHEMA = "{\n" +
      "  \"namespace\": \"testing.test.mctesty\",\n" +
      "  \"name\": \"oneRecord\",\n" +
      "  \"type\": \"record\",\n" +
      "  \"fields\": [\n" +
      "    {\n" +
      "      \"name\":\"aRecord\",\n" +
      "      \"type\":{\"type\":\"record\",\n" +
      "              \"name\":\"recordWithinARecord\",\n" +
      "              \"fields\": [\n" +
      "                 {\n" +
      "                  \"name\":\"int1\",\n" +
      "                  \"type\":\"int\"\n" +
      "                },\n" +
      "                {\n" +
      "                  \"name\":\"boolean1\",\n" +
      "                  \"type\":\"boolean\"\n" +
      "                },\n" +
      "                {\n" +
      "                  \"name\":\"long1\",\n" +
      "                  \"type\":\"long\"\n" +
      "                }\n" +
      "      ]}\n" +
      "    }\n" +
      "  ]\n" +
      "}";

  private static final String RECORD_SCHEMA_EVOLVED = "{\n" +
      "  \"namespace\": \"testing.test.mctesty\",\n" +
      "  \"name\": \"oneRecord\",\n" +
      "  \"type\": \"record\",\n" +
      "  \"fields\": [\n" +
      "    {\n" +
      "      \"name\":\"aRecord\",\n" +
      "      \"type\":{\"type\":\"record\",\n" +
      "              \"name\":\"recordWithinARecord\",\n" +
      "              \"fields\": [\n" +
      "                 {\n" +
      "                  \"name\":\"int1\",\n" +
      "                  \"type\":\"int\"\n" +
      "                },\n" +
      "                {\n" +
      "                  \"name\":\"string1\",\n" +
      "                  \"type\":\"string\", \"default\": \"test\"\n" +
      "                },\n" +
      "                {\n" +
      "                  \"name\":\"boolean1\",\n" +
      "                  \"type\":\"boolean\"\n" +
      "                },\n" +
      "                {\n" +
      "                  \"name\":\"long1\",\n" +
      "                  \"type\":\"long\"\n" +
      "                }\n" +
      "      ]}\n" +
      "    }\n" +
      "  ]\n" +
      "}";

  private static final String EXPECTED_DESERIALIZED_AVRO_STRING =
      "{\"key\":\"test-row1\",\"cola_avro\":{\"arecord\":{\"int1\":42,\"boolean1\":true,"
          + "\"long1\":42432234234}}}";

  private static final String EXPECTED_DESERIALIZED_AVRO_STRING_2 =
"{\"key\":\"test-row1\","
      + "\"cola_avro\":{\"employeename\":\"Avro Employee1\","
      + "\"employeeid\":11111,\"age\":25,\"gender\":\"FEMALE\","
      + "\"contactinfo\":{\"address\":[{\"address1\":\"Avro First Address1\",\"address2\":"
      + "\"Avro Second Address1\",\"city\":\"Avro City1\",\"zipcode\":123456,\"county\":"
      + "{0:{\"areacode\":999,\"number\":1234567890}},\"aliases\":null,\"metadata\":"
      + "{\"testkey\":\"testvalue\"}},{\"address1\":\"Avro First Address1\",\"address2\":"
      + "\"Avro Second Address1\",\"city\":\"Avro City1\",\"zipcode\":123456,\"county\":"
      + "{0:{\"areacode\":999,\"number\":1234567890}},\"aliases\":null,\"metadata\":"
      + "{\"testkey\":\"testvalue\"}}],\"homephone\":{\"areacode\":999,\"number\":1234567890},"
      + "\"officephone\":{\"areacode\":999,\"number\":1234455555}}}}";

  private static final String EXPECTED_DESERIALIZED_AVRO_STRING_3 =
      "{\"key\":\"test-row1\",\"cola_avro\":{\"arecord\":{\"int1\":42,\"string1\":\"test\","
          + "\"boolean1\":true,\"long1\":42432234234}}}";

  /**
   * Test the default behavior of the Lazy family of objects and object inspectors.
   */
  public void testHBaseSerDeI() throws SerDeException {

    byte [] cfa = "cola".getBytes();
    byte [] cfb = "colb".getBytes();
    byte [] cfc = "colc".getBytes();

    byte [] qualByte = "byte".getBytes();
    byte [] qualShort = "short".getBytes();
    byte [] qualInt = "int".getBytes();
    byte [] qualLong = "long".getBytes();
    byte [] qualFloat = "float".getBytes();
    byte [] qualDouble = "double".getBytes();
    byte [] qualString = "string".getBytes();
    byte [] qualBool = "boolean".getBytes();

    byte [] rowKey = Bytes.toBytes("test-row1");

    // Data
    List<KeyValue> kvs = new ArrayList<KeyValue>();

    kvs.add(new KeyValue(rowKey, cfa, qualByte, Bytes.toBytes("123")));
    kvs.add(new KeyValue(rowKey, cfb, qualShort, Bytes.toBytes("456")));
    kvs.add(new KeyValue(rowKey, cfc, qualInt, Bytes.toBytes("789")));
    kvs.add(new KeyValue(rowKey, cfa, qualLong, Bytes.toBytes("1000")));
    kvs.add(new KeyValue(rowKey, cfb, qualFloat, Bytes.toBytes("-0.01")));
    kvs.add(new KeyValue(rowKey, cfc, qualDouble, Bytes.toBytes("5.3")));
    kvs.add(new KeyValue(rowKey, cfa, qualString, Bytes.toBytes("Hadoop, HBase, and Hive")));
    kvs.add(new KeyValue(rowKey, cfb, qualBool, Bytes.toBytes("true")));
    Collections.sort(kvs, KeyValue.COMPARATOR);

    Result r = new Result(kvs);

    Put p = new Put(rowKey);

    p.add(cfa, qualByte, Bytes.toBytes("123"));
    p.add(cfb, qualShort, Bytes.toBytes("456"));
    p.add(cfc, qualInt, Bytes.toBytes("789"));
    p.add(cfa, qualLong, Bytes.toBytes("1000"));
    p.add(cfb, qualFloat, Bytes.toBytes("-0.01"));
    p.add(cfc, qualDouble, Bytes.toBytes("5.3"));
    p.add(cfa, qualString, Bytes.toBytes("Hadoop, HBase, and Hive"));
    p.add(cfb, qualBool, Bytes.toBytes("true"));

    Object[] expectedFieldsData = {
      new Text("test-row1"),
      new ByteWritable((byte)123),
      new ShortWritable((short)456),
      new IntWritable(789),
      new LongWritable(1000),
      new FloatWritable(-0.01F),
      new DoubleWritable(5.3),
      new Text("Hadoop, HBase, and Hive"),
      new BooleanWritable(true)
    };

    // Create, initialize, and test the SerDe
    HBaseSerDe serDe = new HBaseSerDe();
    Configuration conf = new Configuration();
    Properties tbl = createPropertiesI_I();
    SerDeUtils.initializeSerDe(serDe, conf, tbl, null);

    deserializeAndSerialize(serDe, r, p, expectedFieldsData);

    serDe = new HBaseSerDe();
    conf = new Configuration();
    tbl = createPropertiesI_II();
    SerDeUtils.initializeSerDe(serDe, conf, tbl, null);

    deserializeAndSerialize(serDe, r, p, expectedFieldsData);

    serDe = new HBaseSerDe();
    conf = new Configuration();
    tbl = createPropertiesI_III();
    SerDeUtils.initializeSerDe(serDe, conf, tbl, null);

    deserializeAndSerialize(serDe, r, p, expectedFieldsData);

    serDe = new HBaseSerDe();
    conf = new Configuration();
    tbl = createPropertiesI_IV();
    SerDeUtils.initializeSerDe(serDe, conf, tbl, null);

    deserializeAndSerialize(serDe, r, p, expectedFieldsData);
  }

  public void testHBaseSerDeWithTimestamp() throws SerDeException {
    // Create the SerDe
    HBaseSerDe serDe = new HBaseSerDe();
    Configuration conf = new Configuration();
    Properties tbl = createPropertiesI_I();
    long putTimestamp = 1;
    tbl.setProperty(HBaseSerDe.HBASE_PUT_TIMESTAMP,
            Long.toString(putTimestamp));
    SerDeUtils.initializeSerDe(serDe, conf, tbl, null);


    byte [] cfa = "cola".getBytes();
    byte [] cfb = "colb".getBytes();
    byte [] cfc = "colc".getBytes();

    byte [] qualByte = "byte".getBytes();
    byte [] qualShort = "short".getBytes();
    byte [] qualInt = "int".getBytes();
    byte [] qualLong = "long".getBytes();
    byte [] qualFloat = "float".getBytes();
    byte [] qualDouble = "double".getBytes();
    byte [] qualString = "string".getBytes();
    byte [] qualBool = "boolean".getBytes();

    byte [] rowKey = Bytes.toBytes("test-row1");

    // Data
    List<KeyValue> kvs = new ArrayList<KeyValue>();

    kvs.add(new KeyValue(rowKey, cfa, qualByte, Bytes.toBytes("123")));
    kvs.add(new KeyValue(rowKey, cfb, qualShort, Bytes.toBytes("456")));
    kvs.add(new KeyValue(rowKey, cfc, qualInt, Bytes.toBytes("789")));
    kvs.add(new KeyValue(rowKey, cfa, qualLong, Bytes.toBytes("1000")));
    kvs.add(new KeyValue(rowKey, cfb, qualFloat, Bytes.toBytes("-0.01")));
    kvs.add(new KeyValue(rowKey, cfc, qualDouble, Bytes.toBytes("5.3")));
    kvs.add(new KeyValue(rowKey, cfa, qualString, Bytes.toBytes("Hadoop, HBase, and Hive")));
    kvs.add(new KeyValue(rowKey, cfb, qualBool, Bytes.toBytes("true")));
    Collections.sort(kvs, KeyValue.COMPARATOR);

    Result r = new Result(kvs);

    Put p = new Put(rowKey,putTimestamp);

    p.add(cfa, qualByte, Bytes.toBytes("123"));
    p.add(cfb, qualShort, Bytes.toBytes("456"));
    p.add(cfc, qualInt, Bytes.toBytes("789"));
    p.add(cfa, qualLong, Bytes.toBytes("1000"));
    p.add(cfb, qualFloat, Bytes.toBytes("-0.01"));
    p.add(cfc, qualDouble, Bytes.toBytes("5.3"));
    p.add(cfa, qualString, Bytes.toBytes("Hadoop, HBase, and Hive"));
    p.add(cfb, qualBool, Bytes.toBytes("true"));

    Object[] expectedFieldsData = {
      new Text("test-row1"),
      new ByteWritable((byte)123),
      new ShortWritable((short)456),
      new IntWritable(789),
      new LongWritable(1000),
      new FloatWritable(-0.01F),
      new DoubleWritable(5.3),
      new Text("Hadoop, HBase, and Hive"),
      new BooleanWritable(true)
    };

    deserializeAndSerialize(serDe, r, p, expectedFieldsData);
  }

  private void deserializeAndSerialize(
      HBaseSerDe serDe, Result r, Put p,
      Object[] expectedFieldsData) throws SerDeException {

    // Get the row structure
    StructObjectInspector oi = (StructObjectInspector)
      serDe.getObjectInspector();
    List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
    assertEquals(9, fieldRefs.size());

    // Deserialize
    Object row = serDe.deserialize(new ResultWritable(r));
    for (int i = 0; i < fieldRefs.size(); i++) {
      Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
      if (fieldData != null) {
        fieldData = ((LazyPrimitive<?, ?>)fieldData).getWritableObject();
      }
      assertEquals("Field " + i, expectedFieldsData[i], fieldData);
    }
    // Serialize
    assertEquals(PutWritable.class, serDe.getSerializedClass());
    PutWritable serializedPut = (PutWritable) serDe.serialize(row, oi);
    assertEquals("Serialized data", p.toString(),String.valueOf(serializedPut.getPut()));
  }

  // No specifications default to UTF8 String storage for backwards compatibility
  private Properties createPropertiesI_I() {
    Properties tbl = new Properties();

    // Set the configuration parameters
    tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9");
    tbl.setProperty("columns", "key,abyte,ashort,aint,along,afloat,adouble,astring,abool");
    tbl.setProperty("columns.types",
        "string,tinyint:smallint:int:bigint:float:double:string:boolean");
    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING,
        "cola:byte,colb:short,colc:int,cola:long,colb:float,colc:double,cola:string,colb:boolean");
    return tbl;
  }

  // Default column storage specification inherits from table level default
  // (in this case a missing specification) of UTF String storage
  private Properties createPropertiesI_II() {
    Properties tbl = new Properties();

    // Set the configuration parameters
    tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9");
    tbl.setProperty("columns", "key,abyte,ashort,aint,along,afloat,adouble,astring,abool");
    tbl.setProperty("columns.types",
        "string,tinyint:smallint:int:bigint:float:double:string:boolean");
    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING,
        ":key#-,cola:byte#s,colb:short#-,colc:int#s,cola:long#s,colb:float#-,colc:double#-," +
        "cola:string#s,colb:boolean#s");
    return tbl;
  }

  // String storage type overrides table level default of binary storage
  private Properties createPropertiesI_III() {
    Properties tbl = new Properties();

    // Set the configuration parameters
    tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9");
    tbl.setProperty("columns", "key,abyte,ashort,aint,along,afloat,adouble,astring,abool");
    tbl.setProperty("columns.types",
        "string,tinyint:smallint:int:bigint:float:double:string:boolean");
    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING,
        ":key#s,cola:byte#s,colb:short#s,colc:int#s,cola:long#s,colb:float#s,colc:double#s," +
        "cola:string#s,colb:boolean#s");
    tbl.setProperty(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE, "binary");
    return tbl;
  }

  // String type is never stored as anything other than an escaped string
  // A specification of binary storage should not affect ser/de.
  private Properties createPropertiesI_IV() {
    Properties tbl = new Properties();

    // Set the configuration parameters
    tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9");
    tbl.setProperty("columns", "key,abyte,ashort,aint,along,afloat,adouble,astring,abool");
    tbl.setProperty("columns.types",
        "string,tinyint:smallint:int:bigint:float:double:string:boolean");
    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING,
        ":key#-,cola:byte#s,colb:short#s,colc:int#s,cola:long#s,colb:float#s,colc:double#s," +
        "cola:string#b,colb:boolean#s");
    tbl.setProperty(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE, "binary");
    return tbl;
  }

  public void testHBaseSerDeII() throws SerDeException {

    byte [] cfa = "cfa".getBytes();
    byte [] cfb = "cfb".getBytes();
    byte [] cfc = "cfc".getBytes();

    byte [] qualByte = "byte".getBytes();
    byte [] qualShort = "short".getBytes();
    byte [] qualInt = "int".getBytes();
    byte [] qualLong = "long".getBytes();
    byte [] qualFloat = "float".getBytes();
    byte [] qualDouble = "double".getBytes();
    byte [] qualString = "string".getBytes();
    byte [] qualBool = "boolean".getBytes();

    byte [] rowKey = Bytes.toBytes("test-row-2");

    // Data
    List<KeyValue> kvs = new ArrayList<KeyValue>();

    kvs.add(new KeyValue(rowKey, cfa, qualByte, new byte [] { Byte.MIN_VALUE }));
    kvs.add(new KeyValue(rowKey, cfb, qualShort, Bytes.toBytes(Short.MIN_VALUE)));
    kvs.add(new KeyValue(rowKey, cfc, qualInt, Bytes.toBytes(Integer.MIN_VALUE)));
    kvs.add(new KeyValue(rowKey, cfa, qualLong, Bytes.toBytes(Long.MIN_VALUE)));
    kvs.add(new KeyValue(rowKey, cfb, qualFloat, Bytes.toBytes(Float.MIN_VALUE)));
    kvs.add(new KeyValue(rowKey, cfc, qualDouble, Bytes.toBytes(Double.MAX_VALUE)));
    kvs.add(new KeyValue(rowKey, cfa, qualString, Bytes.toBytes(
      "Hadoop, HBase, and Hive Again!")));
    kvs.add(new KeyValue(rowKey, cfb, qualBool, Bytes.toBytes(false)));

    Collections.sort(kvs, KeyValue.COMPARATOR);
    Result r = new Result(kvs);

    Put p = new Put(rowKey);

    p.add(cfa, qualByte, new byte [] { Byte.MIN_VALUE });
    p.add(cfb, qualShort, Bytes.toBytes(Short.MIN_VALUE));
    p.add(cfc, qualInt, Bytes.toBytes(Integer.MIN_VALUE));
    p.add(cfa, qualLong, Bytes.toBytes(Long.MIN_VALUE));
    p.add(cfb, qualFloat, Bytes.toBytes(Float.MIN_VALUE));
    p.add(cfc, qualDouble, Bytes.toBytes(Double.MAX_VALUE));
    p.add(cfa, qualString, Bytes.toBytes("Hadoop, HBase, and Hive Again!"));
    p.add(cfb, qualBool, Bytes.toBytes(false));

    Object[] expectedFieldsData = {
      new Text("test-row-2"),
      new ByteWritable(Byte.MIN_VALUE),
      new ShortWritable(Short.MIN_VALUE),
      new IntWritable(Integer.MIN_VALUE),
      new LongWritable(Long.MIN_VALUE),
      new FloatWritable(Float.MIN_VALUE),
      new DoubleWritable(Double.MAX_VALUE),
      new Text("Hadoop, HBase, and Hive Again!"),
      new BooleanWritable(false)
    };

    // Create, initialize, and test the SerDe
    HBaseSerDe serDe = new HBaseSerDe();
    Configuration conf = new Configuration();
    Properties tbl = createPropertiesII_I();
    SerDeUtils.initializeSerDe(serDe, conf, tbl, null);

    deserializeAndSerialize(serDe, r, p, expectedFieldsData);

    serDe = new HBaseSerDe();
    conf = new Configuration();
    tbl = createPropertiesII_II();
    SerDeUtils.initializeSerDe(serDe, conf, tbl, null);

    deserializeAndSerialize(serDe, r, p, expectedFieldsData);

    serDe = new HBaseSerDe();
    conf = new Configuration();
    tbl = createPropertiesII_III();
    SerDeUtils.initializeSerDe(serDe, conf, tbl, null);

    deserializeAndSerialize(serDe, r, p, expectedFieldsData);
  }

  private Properties createPropertiesII_I() {
    Properties tbl = new Properties();

    // Set the configuration parameters
    tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9");
    tbl.setProperty("columns", "key,abyte,ashort,aint,along,afloat,adouble,astring,abool");
    tbl.setProperty("columns.types",
        "string,tinyint:smallint:int:bigint:float:double:string:boolean");
    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING,
        ":key#-,cfa:byte#b,cfb:short#b,cfc:int#-,cfa:long#b,cfb:float#-,cfc:double#b," +
        "cfa:string#b,cfb:boolean#-");
    tbl.setProperty(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE, "binary");
    return tbl;
  }

  private Properties createPropertiesII_II() {
    Properties tbl = new Properties();

    // Set the configuration parameters
    tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9");
    tbl.setProperty("columns", "key,abyte,ashort,aint,along,afloat,adouble,astring,abool");
    tbl.setProperty("columns.types",
        "string,tinyint:smallint:int:bigint:float:double:string:boolean");
    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING,
        ":key#b,cfa:byte#b,cfb:short#b,cfc:int#b,cfa:long#b,cfb:float#b,cfc:double#b," +
        "cfa:string#b,cfb:boolean#b");
    tbl.setProperty(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE, "string");
    return tbl;
  }

  private Properties createPropertiesII_III() {
    Properties tbl = new Properties();

    // Set the configuration parameters
    tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9");
    tbl.setProperty("columns", "key,abyte,ashort,aint,along,afloat,adouble,astring,abool");
    tbl.setProperty("columns.types",
        "string,tinyint:smallint:int:bigint:float:double:string:boolean");
    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING,
        ":key#-,cfa:byte#b,cfb:short#b,cfc:int#b,cfa:long#b,cfb:float#b,cfc:double#b," +
        "cfa:string#-,cfb:boolean#b");
    return tbl;
  }

  public void testHBaseSerDeWithHiveMapToHBaseColumnFamily() throws SerDeException {

    byte [] cfint = "cf-int".getBytes();
    byte [] cfbyte = "cf-byte".getBytes();
    byte [] cfshort = "cf-short".getBytes();
    byte [] cflong = "cf-long".getBytes();
    byte [] cffloat = "cf-float".getBytes();
    byte [] cfdouble = "cf-double".getBytes();
    byte [] cfbool = "cf-bool".getBytes();

    byte [][] columnFamilies =
      new byte [][] {cfint, cfbyte, cfshort, cflong, cffloat, cfdouble, cfbool};

    byte [][] rowKeys = new byte [][] {
        Integer.toString(1).getBytes(),
        Integer.toString(Integer.MIN_VALUE).getBytes(),
        Integer.toString(Integer.MAX_VALUE).getBytes()
    };

    byte [][][] columnQualifiersAndValues = new byte [][][] {
        {Bytes.toBytes(1), new byte [] {1}, Bytes.toBytes((short) 1),
         Bytes.toBytes((long) 1), Bytes.toBytes((float) 1.0F), Bytes.toBytes(1.0),
         Bytes.toBytes(true)},
        {Bytes.toBytes(Integer.MIN_VALUE), new byte [] {Byte.MIN_VALUE},
         Bytes.toBytes((short) Short.MIN_VALUE), Bytes.toBytes((long) Long.MIN_VALUE),
         Bytes.toBytes((float) Float.MIN_VALUE), Bytes.toBytes(Double.MIN_VALUE),
         Bytes.toBytes(false)},
        {Bytes.toBytes(Integer.MAX_VALUE), new byte [] {Byte.MAX_VALUE},
         Bytes.toBytes((short) Short.MAX_VALUE), Bytes.toBytes((long) Long.MAX_VALUE),
         Bytes.toBytes((float) Float.MAX_VALUE), Bytes.toBytes(Double.MAX_VALUE),
         Bytes.toBytes(true)}
    };

    List<KeyValue> kvs = new ArrayList<KeyValue>();
    Result [] r = new Result [] {null, null, null};
    Put [] p = new Put [] {null, null, null};

    for (int i = 0; i < r.length; i++) {
      kvs.clear();
      p[i] = new Put(rowKeys[i]);

      for (int j = 0; j < columnQualifiersAndValues[i].length; j++) {
        kvs.add(new KeyValue(rowKeys[i], columnFamilies[j], columnQualifiersAndValues[i][j],
            columnQualifiersAndValues[i][j]));
        p[i].add(columnFamilies[j], columnQualifiersAndValues[i][j],
            columnQualifiersAndValues[i][j]);
      }

      r[i] = new Result(kvs);
    }

    Object [][] expectedData = {
        {new Text(Integer.toString(1)), new IntWritable(1), new ByteWritable((byte) 1),
         new ShortWritable((short) 1), new LongWritable(1), new FloatWritable(1.0F),
         new DoubleWritable(1.0), new BooleanWritable(true)},
        {new Text(Integer.toString(Integer.MIN_VALUE)), new IntWritable(Integer.MIN_VALUE),
         new ByteWritable(Byte.MIN_VALUE), new ShortWritable(Short.MIN_VALUE),
         new LongWritable(Long.MIN_VALUE), new FloatWritable(Float.MIN_VALUE),
         new DoubleWritable(Double.MIN_VALUE), new BooleanWritable(false)},
        {new Text(Integer.toString(Integer.MAX_VALUE)), new IntWritable(Integer.MAX_VALUE),
         new ByteWritable(Byte.MAX_VALUE), new ShortWritable(Short.MAX_VALUE),
         new LongWritable(Long.MAX_VALUE), new FloatWritable(Float.MAX_VALUE),
         new DoubleWritable(Double.MAX_VALUE), new BooleanWritable(true)}};

    HBaseSerDe hbaseSerDe = new HBaseSerDe();
    Configuration conf = new Configuration();
    Properties tbl = createPropertiesForHiveMapHBaseColumnFamily();
    SerDeUtils.initializeSerDe(hbaseSerDe, conf, tbl, null);

    deserializeAndSerializeHiveMapHBaseColumnFamily(hbaseSerDe, r, p, expectedData, rowKeys,
        columnFamilies, columnQualifiersAndValues);

    hbaseSerDe = new HBaseSerDe();
    conf = new Configuration();
    tbl = createPropertiesForHiveMapHBaseColumnFamilyII();
    SerDeUtils.initializeSerDe(hbaseSerDe, conf, tbl, null);

    deserializeAndSerializeHiveMapHBaseColumnFamily(hbaseSerDe, r, p, expectedData, rowKeys,
        columnFamilies, columnQualifiersAndValues);
  }

  private void deserializeAndSerializeHiveMapHBaseColumnFamily(
      HBaseSerDe hbaseSerDe,
      Result [] r,
      Put [] p,
      Object [][] expectedData,
      byte [][] rowKeys,
      byte [][] columnFamilies,
      byte [][][] columnQualifiersAndValues) throws SerDeException {

    StructObjectInspector soi = (StructObjectInspector) hbaseSerDe.getObjectInspector();
    List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();
    assertEquals(8, fieldRefs.size());

    // Deserialize
    for (int i = 0; i < r.length; i++) {
      Object row = hbaseSerDe.deserialize(new ResultWritable(r[i]));
      Put serializedPut = ((PutWritable) hbaseSerDe.serialize(row, soi)).getPut();
      byte [] rowKey = serializedPut.getRow();

      for (int k = 0; k < rowKey.length; k++) {
        assertEquals(rowKey[k], rowKeys[i][k]);
      }

      assertEquals(columnFamilies.length, serializedPut.numFamilies());

      for (int j = 0; j < fieldRefs.size(); j++) {
        Object fieldData = soi.getStructFieldData(row, fieldRefs.get(j));

        assertNotNull(fieldData);

        if (fieldData instanceof LazyPrimitive<?, ?>) {
          assertEquals(expectedData[i][j],
              ((LazyPrimitive<?, ?>) fieldData).getWritableObject());
        } else if (fieldData instanceof LazyHBaseCellMap) {
          LazyPrimitive<?, ?> lazyPrimitive = (LazyPrimitive<?, ?>)
              ((LazyHBaseCellMap) fieldData).getMapValueElement(expectedData[i][j]);
          assertEquals(expectedData[i][j], lazyPrimitive.getWritableObject());
        } else {
          fail("Error: field data not an instance of LazyPrimitive<?,?> or LazyMap");
        }
      }
    }
  }

  private Properties createPropertiesForHiveMapHBaseColumnFamily() {
    Properties tbl = new Properties();
    tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9");
    tbl.setProperty(serdeConstants.LIST_COLUMNS,
        "key,valint,valbyte,valshort,vallong,valfloat,valdouble,valbool");
    tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES,
        "string:map<int,int>:map<tinyint,tinyint>:map<smallint,smallint>:map<bigint,bigint>:"
            + "map<float,float>:map<double,double>:map<boolean,boolean>");
    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING,
        ":key#-,cf-int:#b:b,cf-byte:#b:b,cf-short:#b:b,cf-long:#b:b,cf-float:#b:b,cf-double:#b:b," +
        "cf-bool:#b:b");
    return tbl;
  }

  private Properties createPropertiesForHiveMapHBaseColumnFamilyII() {
    Properties tbl = new Properties();
    tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9");
    tbl.setProperty(serdeConstants.LIST_COLUMNS,
        "key,valint,valbyte,valshort,vallong,valfloat,valdouble,valbool");
    tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES,
        "string:map<int,int>:map<tinyint,tinyint>:map<smallint,smallint>:map<bigint,bigint>:"
            + "map<float,float>:map<double,double>:map<boolean,boolean>");
    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING,
        ":key#-,cf-int:#-:-,cf-byte:#-:-,cf-short:#-:-,cf-long:#-:-,cf-float:#-:-,cf-double:#-:-," +
        "cf-bool:#-:-");
    tbl.setProperty(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE, "binary");
    return tbl;
  }

  public void testHBaseSerDeWithHiveMapToHBaseColumnFamilyII() throws SerDeException {

    byte [] cfbyte = "cf-byte".getBytes();
    byte [] cfshort = "cf-short".getBytes();
    byte [] cfint = "cf-int".getBytes();
    byte [] cflong = "cf-long".getBytes();
    byte [] cffloat = "cf-float".getBytes();
    byte [] cfdouble = "cf-double".getBytes();
    byte [] cfstring = "cf-string".getBytes();
    byte [] cfbool = "cf-bool".getBytes();

    byte [][] columnFamilies =
      new byte [][] {cfbyte, cfshort, cfint, cflong, cffloat, cfdouble, cfstring, cfbool};

    byte [] rowKey = Bytes.toBytes("row-key");

    byte [][] columnQualifiersAndValues = new byte [][] {
        Bytes.toBytes("123"), Bytes.toBytes("456"), Bytes.toBytes("789"), Bytes.toBytes("1000"),
        Bytes.toBytes("-0.01"), Bytes.toBytes("5.3"), Bytes.toBytes("Hive"),
        Bytes.toBytes("true")
    };

    Put p = new Put(rowKey);
    List<KeyValue> kvs = new ArrayList<KeyValue>();

    for (int j = 0; j < columnQualifiersAndValues.length; j++) {
      kvs.add(new KeyValue(rowKey,
          columnFamilies[j], columnQualifiersAndValues[j], columnQualifiersAndValues[j]));
      p.add(columnFamilies[j], columnQualifiersAndValues[j], columnQualifiersAndValues[j]);
    }

    Result r = new Result(kvs);

    Object [] expectedData = {
        new Text("row-key"), new ByteWritable((byte) 123), new ShortWritable((short) 456),
        new IntWritable(789), new LongWritable(1000), new FloatWritable(-0.01F),
        new DoubleWritable(5.3), new Text("Hive"), new BooleanWritable(true)
    };

    HBaseSerDe hbaseSerDe = new HBaseSerDe();
    Configuration conf = new Configuration();
    Properties tbl = createPropertiesForHiveMapHBaseColumnFamilyII_I();
    SerDeUtils.initializeSerDe(hbaseSerDe, conf, tbl, null);

    deserializeAndSerializeHiveMapHBaseColumnFamilyII(hbaseSerDe, r, p, expectedData,
        columnFamilies, columnQualifiersAndValues);

    hbaseSerDe = new HBaseSerDe();
    conf = new Configuration();
    tbl = createPropertiesForHiveMapHBaseColumnFamilyII_II();
    SerDeUtils.initializeSerDe(hbaseSerDe, conf, tbl, null);

    deserializeAndSerializeHiveMapHBaseColumnFamilyII(hbaseSerDe, r, p, expectedData,
        columnFamilies, columnQualifiersAndValues);
  }

  private Properties createPropertiesForHiveMapHBaseColumnFamilyII_I() {
    Properties tbl = new Properties();
    tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9");
    tbl.setProperty(serdeConstants.LIST_COLUMNS,
        "key,valbyte,valshort,valint,vallong,valfloat,valdouble,valstring,valbool");
    tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES,
        "string:map<tinyint,tinyint>:map<smallint,smallint>:map<int,int>:map<bigint,bigint>:"
            + "map<float,float>:map<double,double>:map<string,string>:map<boolean,boolean>");
    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING,
        ":key#s,cf-byte:#-:s,cf-short:#s:-,cf-int:#s:s,cf-long:#-:-,cf-float:#s:-,cf-double:#-:s," +
        "cf-string:#s:s,cf-bool:#-:-");
    return tbl;
  }

  private Properties createPropertiesForHiveMapHBaseColumnFamilyII_II() {
    Properties tbl = new Properties();
    tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9");
    tbl.setProperty(serdeConstants.LIST_COLUMNS,
        "key,valbyte,valshort,valint,vallong,valfloat,valdouble,valstring,valbool");
    tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES,
        "string:map<tinyint,tinyint>:map<smallint,smallint>:map<int,int>:map<bigint,bigint>:"
            + "map<float,float>:map<double,double>:map<string,string>:map<boolean,boolean>");
    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING,
        ":key#s,cf-byte:#s:s,cf-short:#s:s,cf-int:#s:s,cf-long:#s:s,cf-float:#s:s,cf-double:#s:s," +
        "cf-string:#s:s,cf-bool:#s:s");
    tbl.setProperty(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE, "binary");
    return tbl;
  }

  private void deserializeAndSerializeHiveMapHBaseColumnFamilyII(
      HBaseSerDe hbaseSerDe,
      Result r,
      Put p,
      Object [] expectedData,
      byte [][] columnFamilies,
      byte [][] columnQualifiersAndValues) throws SerDeException {

    StructObjectInspector soi = (StructObjectInspector) hbaseSerDe.getObjectInspector();
    List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();
    assertEquals(9, fieldRefs.size());

    // Deserialize
    Object row = hbaseSerDe.deserialize(new ResultWritable(r));

    for (int j = 0; j < fieldRefs.size(); j++) {
      Object fieldData = soi.getStructFieldData(row, fieldRefs.get(j));
      assertNotNull(fieldData);

      if (fieldData instanceof LazyPrimitive<?, ?>) {
        assertEquals(expectedData[j], ((LazyPrimitive<?, ?>) fieldData).getWritableObject());
      } else if (fieldData instanceof LazyHBaseCellMap) {
        LazyPrimitive<?, ?> lazyPrimitive = (LazyPrimitive<?, ?>)
          ((LazyHBaseCellMap) fieldData).getMapValueElement(expectedData[j]);
        assertEquals(expectedData[j], lazyPrimitive.getWritableObject());
      } else {
        fail("Error: field data not an instance of LazyPrimitive<?, ?> or LazyHBaseCellMap");
      }
    }

    // Serialize
    Put serializedPut = ((PutWritable) hbaseSerDe.serialize(row, soi)).getPut();
    assertEquals("Serialized data: ", p.toString(), serializedPut.toString());
  }

  public void testHBaseSerDeWithColumnPrefixes()
      throws Exception {
    byte[] cfa = "cola".getBytes();

    byte[] qualA = "prefixA_col1".getBytes();
    byte[] qualB = "prefixB_col2".getBytes();
    byte[] qualC = "prefixB_col3".getBytes();
    byte[] qualD = "unwanted_col".getBytes();

    List<Object> qualifiers = new ArrayList<Object>();
    qualifiers.add(new Text("prefixA_col1"));
    qualifiers.add(new Text("prefixB_col2"));
    qualifiers.add(new Text("prefixB_col3"));
    qualifiers.add(new Text("unwanted_col"));

    List<Object> expectedQualifiers = new ArrayList<Object>();
    expectedQualifiers.add(new Text("prefixA_col1"));
    expectedQualifiers.add(new Text("prefixB_col2"));
    expectedQualifiers.add(new Text("prefixB_col3"));

    byte[] rowKey = Bytes.toBytes("test-row1");

    // Data
    List<KeyValue> kvs = new ArrayList<KeyValue>();

    byte[] dataA = "This is first test data".getBytes();
    byte[] dataB = "This is second test data".getBytes();
    byte[] dataC = "This is third test data".getBytes();
    byte[] dataD = "Unwanted data".getBytes();

    kvs.add(new KeyValue(rowKey, cfa, qualA, dataA));
    kvs.add(new KeyValue(rowKey, cfa, qualB, dataB));
    kvs.add(new KeyValue(rowKey, cfa, qualC, dataC));
    kvs.add(new KeyValue(rowKey, cfa, qualD, dataD));

    Result r = new Result(kvs);

    Put p = new Put(rowKey);

    p.add(new KeyValue(rowKey, cfa, qualA, dataA));
    p.add(new KeyValue(rowKey, cfa, qualB, dataB));
    p.add(new KeyValue(rowKey, cfa, qualC, dataC));

    Object[] expectedFieldsData = {
        new Text("test-row1"),
        new String("This is first test data"),
        new String("This is second test data"),
        new String("This is third test data")};

    int[] expectedMapSize = new int[] {1, 2};

    // Create, initialize, and test the SerDe
    HBaseSerDe serDe = new HBaseSerDe();
    Configuration conf = new Configuration();
    Properties tbl = createPropertiesForColumnPrefixes();
    SerDeUtils.initializeSerDe(serDe, conf, tbl, null);

    Object notPresentKey = new Text("unwanted_col");

    deserializeAndSerializeHivePrefixColumnFamily(serDe, r, p, expectedFieldsData, expectedMapSize,
        expectedQualifiers,
        notPresentKey);
  }

  private Properties createPropertiesForColumnPrefixes() {
    Properties tbl = new Properties();
    tbl.setProperty(serdeConstants.LIST_COLUMNS,
        "key,astring,along");
    tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES,
        "string:map<string,string>:map<string,string>");
    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING,
        ":key,cola:prefixA_.*,cola:prefixB_.*");

    return tbl;
  }

  private void deserializeAndSerializeHivePrefixColumnFamily(HBaseSerDe serDe, Result r, Put p,
      Object[] expectedFieldsData, int[] expectedMapSize, List<Object> expectedQualifiers,
      Object notPresentKey)
      throws SerDeException, IOException {
    StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector();

    List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();

    Object row = serDe.deserialize(new ResultWritable(r));

    int j = 0;

    for (int i = 0; i < fieldRefs.size(); i++) {
      Object fieldData = soi.getStructFieldData(row, fieldRefs.get(i));
      assertNotNull(fieldData);

      if (fieldData instanceof LazyPrimitive<?, ?>) {
        assertEquals(expectedFieldsData[i], ((LazyPrimitive<?, ?>) fieldData).getWritableObject());
      } else if (fieldData instanceof LazyHBaseCellMap) {
        assertEquals(expectedFieldsData[i], ((LazyHBaseCellMap) fieldData)
            .getMapValueElement(expectedQualifiers.get(j)).toString().trim());

        assertEquals(expectedMapSize[j], ((LazyHBaseCellMap) fieldData).getMapSize());
        // Make sure that the unwanted key is not present in the map
        assertNull(((LazyHBaseCellMap) fieldData).getMapValueElement(notPresentKey));

        j++;

      } else {
        fail("Error: field data not an instance of LazyPrimitive<?, ?> or LazyHBaseCellMap");
      }
    }

    SerDeUtils.getJSONString(row, soi);

    // Now serialize
    Put put = ((PutWritable) serDe.serialize(row, soi)).getPut();

    if (p != null) {
      assertEquals("Serialized put:", p.toString(), put.toString());
    }
  }

  public void testHBaseSerDeCompositeKeyWithSeparator() throws SerDeException, TException,
      IOException {
    byte[] cfa = "cola".getBytes();

    byte[] qualStruct = "struct".getBytes();

    TestStruct testStruct = new TestStruct("A", "B", "C", true, (byte) 45);

    byte[] rowKey = testStruct.getBytes();

    // Data
    List<KeyValue> kvs = new ArrayList<KeyValue>();

    byte[] testData = "This is a test data".getBytes();

    kvs.add(new KeyValue(rowKey, cfa, qualStruct, testData));

    Result r = new Result(kvs);

    Put p = new Put(rowKey);

    // Post serialization, separators are automatically inserted between different fields in the
    // struct. Currently there is not way to disable that. So the work around here is to pad the
    // data with the separator bytes before creating a "Put" object
    p.add(new KeyValue(rowKey, cfa, qualStruct, testData));

    // Create, initialize, and test the SerDe
    HBaseSerDe serDe = new HBaseSerDe();
    Configuration conf = new Configuration();
    Properties tbl = createPropertiesForCompositeKeyWithSeparator();
    SerDeUtils.initializeSerDe(serDe, conf, tbl, null);

    deserializeAndSerializeHBaseCompositeKey(serDe, r, p);
  }

  private Properties createPropertiesForCompositeKeyWithSeparator() {
    Properties tbl = new Properties();
    tbl.setProperty(serdeConstants.LIST_COLUMNS,
        "key,astring");
    tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES,
        "struct<col1:string,col2:string,col3:string>,string");
    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING,
        ":key,cola:struct");
    tbl.setProperty(serdeConstants.COLLECTION_DELIM, "-");

    return tbl;
  }

  public void testHBaseSerDeCompositeKeyWithoutSeparator() throws SerDeException, TException,
      IOException {
    byte[] cfa = "cola".getBytes();

    byte[] qualStruct = "struct".getBytes();

    TestStruct testStruct = new TestStruct("A", "B", "C", false, (byte) 0);

    byte[] rowKey = testStruct.getBytes();

    // Data
    List<KeyValue> kvs = new ArrayList<KeyValue>();

    byte[] testData = "This is a test data".getBytes();

    kvs.add(new KeyValue(rowKey, cfa, qualStruct, testData));

    Result r = new Result(kvs);

    byte[] putRowKey = testStruct.getBytesWithDelimiters();

    Put p = new Put(putRowKey);

    // Post serialization, separators are automatically inserted between different fields in the
    // struct. Currently there is not way to disable that. So the work around here is to pad the
    // data with the separator bytes before creating a "Put" object
    p.add(new KeyValue(putRowKey, cfa, qualStruct, testData));

    // Create, initialize, and test the SerDe
    HBaseSerDe serDe = new HBaseSerDe();
    Configuration conf = new Configuration();
    Properties tbl = createPropertiesForCompositeKeyWithoutSeparator();
    SerDeUtils.initializeSerDe(serDe, conf, tbl, null);

    deserializeAndSerializeHBaseCompositeKey(serDe, r, p);
  }

  private Properties createPropertiesForCompositeKeyWithoutSeparator() {
    Properties tbl = new Properties();
    tbl.setProperty(serdeConstants.LIST_COLUMNS,
        "key,astring");
    tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES,
        "struct<col1:string,col2:string,col3:string>,string");
    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING,
        ":key,cola:struct");
    tbl.setProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_CLASS,
        "org.apache.hadoop.hive.hbase.HBaseTestCompositeKey");

    return tbl;
  }

  private void deserializeAndSerializeHBaseCompositeKey(HBaseSerDe serDe, Result r, Put p)
      throws SerDeException, IOException {
    StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector();

    List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();

    Object row = serDe.deserialize(new ResultWritable(r));

    for (int j = 0; j < fieldRefs.size(); j++) {
      Object fieldData = soi.getStructFieldData(row, fieldRefs.get(j));
      assertNotNull(fieldData);
    }

    assertEquals(
        "{\"key\":{\"col1\":\"A\",\"col2\":\"B\",\"col3\":\"C\"},\"astring\":\"This is a test data\"}",
        SerDeUtils.getJSONString(row, soi));

    // Now serialize
    Put put = ((PutWritable) serDe.serialize(row, soi)).getPut();

    assertEquals("Serialized put:", p.toString(), put.toString());
  }

  public void testHBaseSerDeWithAvroSchemaInline() throws SerDeException, IOException {
    byte[] cfa = "cola".getBytes();

    byte[] qualAvro = "avro".getBytes();

    byte[] rowKey = Bytes.toBytes("test-row1");

    // Data
    List<KeyValue> kvs = new ArrayList<KeyValue>();

    byte[] avroData = getTestAvroBytesFromSchema(RECORD_SCHEMA);

    kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData));

    Result r = new Result(kvs);

    Put p = new Put(rowKey);

    // Post serialization, separators are automatically inserted between different fields in the
    // struct. Currently there is not way to disable that. So the work around here is to pad the
    // data with the separator bytes before creating a "Put" object
    p.add(new KeyValue(rowKey, cfa, qualAvro, avroData));

    Object[] expectedFieldsData = {new String("test-row1"), new String("[[42, true, 42432234234]]")};

    // Create, initialize, and test the SerDe
    HBaseSerDe serDe = new HBaseSerDe();
    Configuration conf = new Configuration();
    Properties tbl = createPropertiesForHiveAvroSchemaInline();
    serDe.initialize(conf, tbl);

    deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData,
        EXPECTED_DESERIALIZED_AVRO_STRING);
  }

  private Properties createPropertiesForHiveAvroSchemaInline() {
    Properties tbl = new Properties();
    tbl.setProperty("cola.avro.serialization.type", "avro");
    tbl.setProperty("cola.avro." + AvroSerdeUtils.SCHEMA_LITERAL, RECORD_SCHEMA);
    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro");
    tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true");

    return tbl;
  }

  public void testHBaseSerDeWithForwardEvolvedSchema() throws SerDeException, IOException {
    byte[] cfa = "cola".getBytes();

    byte[] qualAvro = "avro".getBytes();

    byte[] rowKey = Bytes.toBytes("test-row1");

    // Data
    List<KeyValue> kvs = new ArrayList<KeyValue>();

    byte[] avroData = getTestAvroBytesFromSchema(RECORD_SCHEMA);

    kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData));

    Result r = new Result(kvs);

    Put p = new Put(rowKey);

    // Post serialization, separators are automatically inserted between different fields in the
    // struct. Currently there is not way to disable that. So the work around here is to pad the
    // data with the separator bytes before creating a "Put" object
    p.add(new KeyValue(rowKey, cfa, qualAvro, avroData));

    Object[] expectedFieldsData = {new String("test-row1"),
        new String("[[42, test, true, 42432234234]]")};

    // Create, initialize, and test the SerDe
    HBaseSerDe serDe = new HBaseSerDe();
    Configuration conf = new Configuration();
    Properties tbl = createPropertiesForHiveAvroForwardEvolvedSchema();
    serDe.initialize(conf, tbl);

    deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData,
        EXPECTED_DESERIALIZED_AVRO_STRING_3);
  }

  private Properties createPropertiesForHiveAvroForwardEvolvedSchema() {
    Properties tbl = new Properties();
    tbl.setProperty("cola.avro.serialization.type", "avro");
    tbl.setProperty("cola.avro." + AvroSerdeUtils.SCHEMA_LITERAL, RECORD_SCHEMA_EVOLVED);
    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro");
    tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true");

    return tbl;
  }

  public void testHBaseSerDeWithBackwardEvolvedSchema() throws SerDeException, IOException {
    byte[] cfa = "cola".getBytes();

    byte[] qualAvro = "avro".getBytes();

    byte[] rowKey = Bytes.toBytes("test-row1");

    // Data
    List<KeyValue> kvs = new ArrayList<KeyValue>();

    byte[] avroData = getTestAvroBytesFromSchema(RECORD_SCHEMA_EVOLVED);

    kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData));

    Result r = new Result(kvs);

    Put p = new Put(rowKey);

    // Post serialization, separators are automatically inserted between different fields in the
    // struct. Currently there is not way to disable that. So the work around here is to pad the
    // data with the separator bytes before creating a "Put" object
    p.add(new KeyValue(rowKey, cfa, qualAvro, avroData));

    Object[] expectedFieldsData = {new String("test-row1"), new String("[[42, true, 42432234234]]")};

    // Create, initialize, and test the SerDe
    HBaseSerDe serDe = new HBaseSerDe();
    Configuration conf = new Configuration();
    Properties tbl = createPropertiesForHiveAvroBackwardEvolvedSchema();
    serDe.initialize(conf, tbl);

    deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData,
        EXPECTED_DESERIALIZED_AVRO_STRING);
  }

  private Properties createPropertiesForHiveAvroBackwardEvolvedSchema() {
    Properties tbl = new Properties();
    tbl.setProperty("cola.avro.serialization.type", "avro");
    tbl.setProperty("cola.avro." + AvroSerdeUtils.SCHEMA_LITERAL, RECORD_SCHEMA);
    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro");
    tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true");

    return tbl;
  }

  public void testHBaseSerDeWithAvroSerClass() throws SerDeException, IOException {
    byte[] cfa = "cola".getBytes();

    byte[] qualAvro = "avro".getBytes();

    byte[] rowKey = Bytes.toBytes("test-row1");

    // Data
    List<KeyValue> kvs = new ArrayList<KeyValue>();

    byte[] avroData = getTestAvroBytesFromClass1(1);

    kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData));

    Result r = new Result(kvs);

    Put p = new Put(rowKey);

    // Post serialization, separators are automatically inserted between different fields in the
    // struct. Currently there is not way to disable that. So the work around here is to pad the
    // data with the separator bytes before creating a "Put" object
    p.add(new KeyValue(rowKey, cfa, qualAvro, avroData));

    Object[] expectedFieldsData = {
        new String("test-row1"),
        new String(
            "[Avro Employee1, 11111, 25, FEMALE, [[[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}], "
                + "[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}]], "
                + "[999, 1234567890], [999, 1234455555]]]")};

    // Create, initialize, and test the SerDe
    HBaseSerDe serDe = new HBaseSerDe();
    Configuration conf = new Configuration();
    Properties tbl = createPropertiesForHiveAvroSerClass();
    serDe.initialize(conf, tbl);

    deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData,
        EXPECTED_DESERIALIZED_AVRO_STRING_2);
  }

  private Properties createPropertiesForHiveAvroSerClass() {
    Properties tbl = new Properties();
    tbl.setProperty("cola.avro.serialization.type", "avro");
    tbl.setProperty("cola.avro." + serdeConstants.SERIALIZATION_CLASS,
        "org.apache.hadoop.hive.hbase.avro.Employee");
    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro");
    tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true");

    return tbl;
  }

  public void testHBaseSerDeWithAvroSchemaUrl() throws SerDeException, IOException {
    byte[] cfa = "cola".getBytes();

    byte[] qualAvro = "avro".getBytes();

    byte[] rowKey = Bytes.toBytes("test-row1");

    // Data
    List<KeyValue> kvs = new ArrayList<KeyValue>();

    byte[] avroData = getTestAvroBytesFromSchema(RECORD_SCHEMA);

    kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData));

    Result r = new Result(kvs);

    Put p = new Put(rowKey);

    // Post serialization, separators are automatically inserted between different fields in the
    // struct. Currently there is not way to disable that. So the work around here is to pad the
    // data with the separator bytes before creating a "Put" object
    p.add(new KeyValue(rowKey, cfa, qualAvro, avroData));

    Object[] expectedFieldsData = {new String("test-row1"), new String("[[42, true, 42432234234]]")};

    MiniDFSCluster miniDfs = null;

    try {
      // MiniDFSCluster litters files and folders all over the place.
      miniDfs = new MiniDFSCluster(new Configuration(), 1, true, null);

      miniDfs.getFileSystem().mkdirs(new Path("/path/to/schema"));
      FSDataOutputStream out = miniDfs.getFileSystem().create(
          new Path("/path/to/schema/schema.avsc"));
      out.writeBytes(RECORD_SCHEMA);
      out.close();
      String onHDFS = miniDfs.getFileSystem().getUri() + "/path/to/schema/schema.avsc";

      // Create, initialize, and test the SerDe
      HBaseSerDe serDe = new HBaseSerDe();
      Configuration conf = new Configuration();
      Properties tbl = createPropertiesForHiveAvroSchemaUrl(onHDFS);
      serDe.initialize(conf, tbl);

      deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData,
          EXPECTED_DESERIALIZED_AVRO_STRING);
    } finally {
      // Teardown the cluster
      if (miniDfs != null) {
        miniDfs.shutdown();
      }
    }
  }

  private Properties createPropertiesForHiveAvroSchemaUrl(String schemaUrl) {
    Properties tbl = new Properties();
    tbl.setProperty("cola.avro.serialization.type", "avro");
    tbl.setProperty("cola.avro." + AvroSerdeUtils.SCHEMA_URL, schemaUrl);
    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro");
    tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true");

    return tbl;
  }

  public void testHBaseSerDeWithAvroExternalSchema() throws SerDeException, IOException {
    byte[] cfa = "cola".getBytes();

    byte[] qualAvro = "avro".getBytes();

    byte[] rowKey = Bytes.toBytes("test-row1");

    // Data
    List<KeyValue> kvs = new ArrayList<KeyValue>();

    byte[] avroData = getTestAvroBytesFromClass2(1);

    kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData));

    Result r = new Result(kvs);

    Put p = new Put(rowKey);

    // Post serialization, separators are automatically inserted between different fields in the
    // struct. Currently there is not way to disable that. So the work around here is to pad the
    // data with the separator bytes before creating a "Put" object
    p.add(new KeyValue(rowKey, cfa, qualAvro, avroData));

    Object[] expectedFieldsData = {
        new String("test-row1"),
        new String(
            "[Avro Employee1, 11111, 25, FEMALE, [[[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}], [Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}]], "
                + "[999, 1234567890], [999, 1234455555]]]")};

    // Create, initialize, and test the SerDe
    HBaseSerDe serDe = new HBaseSerDe();
    Configuration conf = new Configuration();

    Properties tbl = createPropertiesForHiveAvroExternalSchema();
    serDe.initialize(conf, tbl);

    deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData,
        EXPECTED_DESERIALIZED_AVRO_STRING_2);
  }

  private Properties createPropertiesForHiveAvroExternalSchema() {
    Properties tbl = new Properties();
    tbl.setProperty("cola.avro.serialization.type", "avro");
    tbl.setProperty(AvroSerdeUtils.SCHEMA_RETRIEVER,
        "org.apache.hadoop.hive.hbase.HBaseTestAvroSchemaRetriever");
    tbl.setProperty("cola.avro." + serdeConstants.SERIALIZATION_CLASS,
        "org.apache.hadoop.hive.hbase.avro.Employee");
    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro");
    tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true");

    return tbl;
  }

  public void testHBaseSerDeWithHiveMapToHBaseAvroColumnFamily() throws Exception {
    byte[] cfa = "cola".getBytes();

    byte[] qualAvroA = "prefixA_avro1".getBytes();
    byte[] qualAvroB = "prefixB_avro2".getBytes();
    byte[] qualAvroC = "prefixB_avro3".getBytes();

    List<Object> qualifiers = new ArrayList<Object>();
    qualifiers.add(new Text("prefixA_avro1"));
    qualifiers.add(new Text("prefixB_avro2"));
    qualifiers.add(new Text("prefixB_avro3"));

    List<Object> expectedQualifiers = new ArrayList<Object>();
    expectedQualifiers.add(new Text("prefixB_avro2"));
    expectedQualifiers.add(new Text("prefixB_avro3"));

    byte[] rowKey = Bytes.toBytes("test-row1");

    // Data
    List<KeyValue> kvs = new ArrayList<KeyValue>();

    byte[] avroDataA = getTestAvroBytesFromSchema(RECORD_SCHEMA);
    byte[] avroDataB = getTestAvroBytesFromClass1(1);
    byte[] avroDataC = getTestAvroBytesFromClass1(2);

    kvs.add(new KeyValue(rowKey, cfa, qualAvroA, avroDataA));
    kvs.add(new KeyValue(rowKey, cfa, qualAvroB, avroDataB));
    kvs.add(new KeyValue(rowKey, cfa, qualAvroC, avroDataC));

    Result r = new Result(kvs);

    Put p = new Put(rowKey);

    // Post serialization, separators are automatically inserted between different fields in the
    // struct. Currently there is not way to disable that. So the work around here is to pad the
    // data with the separator bytes before creating a "Put" object
    p.add(new KeyValue(rowKey, cfa, qualAvroB, Bytes.padTail(avroDataB, 11)));
    p.add(new KeyValue(rowKey, cfa, qualAvroC, Bytes.padTail(avroDataC, 11)));

    Object[] expectedFieldsData = {
        new Text("test-row1"),
        new String(
            "[Avro Employee1, 11111, 25, FEMALE, [[[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}], [Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}]], "
                + "[999, 1234567890], [999, 1234455555]]]"),
        new String(
            "[Avro Employee2, 11111, 25, FEMALE, [[[Avro First Address2, Avro Second Address2, Avro City2, 123456, 0:[999, 1234567890], null, {testkey=testvalue}], [Avro First Address2, Avro Second Address2, Avro City2, 123456, 0:[999, 1234567890], null, {testkey=testvalue}]], "
                + "[999, 1234567890], [999, 1234455555]]]")};

    int[] expectedMapSize = new int[] {2};

    // Create, initialize, and test the SerDe
    HBaseSerDe serDe = new HBaseSerDe();
    Configuration conf = new Configuration();
    Properties tbl = createPropertiesForHiveAvroColumnFamilyMap();
    serDe.initialize(conf, tbl);

    Object notPresentKey = new Text("prefixA_avro1");

    deserializeAndSerializeHiveStructColumnFamily(serDe, r, p, expectedFieldsData, expectedMapSize,
        expectedQualifiers,
        notPresentKey);
  }

  private Properties createPropertiesForHiveAvroColumnFamilyMap() {
    Properties tbl = new Properties();
    tbl.setProperty("cola.prefixB_.serialization.type", "avro");
    tbl.setProperty("cola.prefixB_." + serdeConstants.SERIALIZATION_CLASS,
        "org.apache.hadoop.hive.hbase.avro.Employee");
    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, "cola:prefixB_.*");
    tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true");
    tbl.setProperty(LazySimpleSerDe.SERIALIZATION_EXTEND_NESTING_LEVELS, "true");

    return tbl;
  }

  public void testHBaseSerDeCustomStructValue() throws IOException, SerDeException {

    byte[] cfa = "cola".getBytes();
    byte[] qualStruct = "struct".getBytes();

    TestStruct testStruct = new TestStruct("A", "B", "C", false, (byte) 0);
    byte[] key = testStruct.getBytes();
    // Data
    List<KeyValue> kvs = new ArrayList<KeyValue>();

    byte[] testData = testStruct.getBytes();
    kvs.add(new KeyValue(key, cfa, qualStruct, testData));

    Result r = new Result(kvs);
    byte[] putKey = testStruct.getBytesWithDelimiters();

    Put p = new Put(putKey);

    // Post serialization, separators are automatically inserted between different fields in the
    // struct. Currently there is not way to disable that. So the work around here is to pad the
    // data with the separator bytes before creating a "Put" object
    p.add(new KeyValue(putKey, cfa, qualStruct, Bytes.padTail(testData, 2)));

    // Create, initialize, and test the SerDe
    HBaseSerDe serDe = new HBaseSerDe();
    Configuration conf = new Configuration();
    Properties tbl = createPropertiesForValueStruct();
    serDe.initialize(conf, tbl);

    deserializeAndSerializeHBaseValueStruct(serDe, r, p);

  }

  private Properties createPropertiesForValueStruct() {
    Properties tbl = new Properties();
    tbl.setProperty("cola.struct.serialization.type", "struct");
    tbl.setProperty("cola.struct.test.value", "test value");
    tbl.setProperty(HBaseSerDe.HBASE_STRUCT_SERIALIZER_CLASS,
        "org.apache.hadoop.hive.hbase.HBaseTestStructSerializer");
    tbl.setProperty(serdeConstants.LIST_COLUMNS, "key,astring");
    tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES,
        "struct<col1:string,col2:string,col3:string>,struct<col1:string,col2:string,col3:string>");
    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:struct");
    tbl.setProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_CLASS,
        "org.apache.hadoop.hive.hbase.HBaseTestCompositeKey");
    return tbl;
  }

  private void deserializeAndSerializeHBaseValueStruct(HBaseSerDe serDe, Result r, Put p)
      throws SerDeException, IOException {
    StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector();

    List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();

    Object row = serDe.deserialize(new ResultWritable(r));

    Object fieldData = null;
    for (int j = 0; j < fieldRefs.size(); j++) {
      fieldData = soi.getStructFieldData(row, fieldRefs.get(j));
      assertNotNull(fieldData);
      if (fieldData instanceof LazyStruct) {
        assertEquals(((LazyStruct) fieldData).getField(0).toString(), "A");
        assertEquals(((LazyStruct) fieldData).getField(1).toString(), "B");
        assertEquals(((LazyStruct) fieldData).getField(2).toString(), "C");
      } else {
        Assert.fail("fieldData should be an instance of LazyStruct");
      }
    }

    assertEquals(
        "{\"key\":{\"col1\":\"A\",\"col2\":\"B\",\"col3\":\"C\"},\"astring\":{\"col1\":\"A\",\"col2\":\"B\",\"col3\":\"C\"}}",
        SerDeUtils.getJSONString(row, soi));

    // Now serialize
    Put put = ((PutWritable) serDe.serialize(row, soi)).getPut();

    assertEquals("Serialized put:", p.toString(), put.toString());
  }

  private void deserializeAndSerializeHiveAvro(HBaseSerDe serDe, Result r, Put p,
      Object[] expectedFieldsData, String expectedDeserializedAvroString)
      throws SerDeException, IOException {
    StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector();

    List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();

    Object row = serDe.deserialize(new ResultWritable(r));

    for (int j = 0; j < fieldRefs.size(); j++) {
      Object fieldData = soi.getStructFieldData(row, fieldRefs.get(j));
      assertNotNull(fieldData);
      assertEquals(expectedFieldsData[j], fieldData.toString().trim());
    }
   
    assertEquals(expectedDeserializedAvroString, SerDeUtils.getJSONString(row, soi));

    // Now serialize
    Put put = ((PutWritable) serDe.serialize(row, soi)).getPut();

    assertNotNull(put);
    assertEquals(p.getFamilyCellMap(), put.getFamilyCellMap());
  }

  private void deserializeAndSerializeHiveStructColumnFamily(HBaseSerDe serDe, Result r, Put p,
      Object[] expectedFieldsData,
      int[] expectedMapSize, List<Object> expectedQualifiers, Object notPresentKey)
      throws SerDeException, IOException {
    StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector();

    List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();

    Object row = serDe.deserialize(new ResultWritable(r));

    int k = 0;

    for (int i = 0; i < fieldRefs.size(); i++) {
      Object fieldData = soi.getStructFieldData(row, fieldRefs.get(i));
      assertNotNull(fieldData);

      if (fieldData instanceof LazyPrimitive<?, ?>) {
        assertEquals(expectedFieldsData[i], ((LazyPrimitive<?, ?>) fieldData).getWritableObject());
      } else if (fieldData instanceof LazyHBaseCellMap) {

        for (int j = 0; j < ((LazyHBaseCellMap) fieldData).getMapSize(); j++) {
          assertEquals(expectedFieldsData[k + 1],
              ((LazyHBaseCellMap) fieldData).getMapValueElement(expectedQualifiers.get(k))
                  .toString().trim());
          k++;
        }

        assertEquals(expectedMapSize[i - 1], ((LazyHBaseCellMap) fieldData).getMapSize());

        // Make sure that the unwanted key is not present in the map
        assertNull(((LazyHBaseCellMap) fieldData).getMapValueElement(notPresentKey));

      } else {
        fail("Error: field data not an instance of LazyPrimitive<?, ?> or LazyHBaseCellMap");
      }
    }

    SerDeUtils.getJSONString(row, soi);

    // Now serialize
    Put put = ((PutWritable) serDe.serialize(row, soi)).getPut();

    assertNotNull(put);
  }

  private byte[] getTestAvroBytesFromSchema(String schemaToUse) throws IOException {
    Schema s = Schema.parse(schemaToUse);
    GenericData.Record record = new GenericData.Record(s);
    GenericData.Record innerRecord = new GenericData.Record(s.getField("aRecord").schema());
    innerRecord.put("int1", 42);
    innerRecord.put("boolean1", true);
    innerRecord.put("long1", 42432234234l);

    if (schemaToUse.equals(RECORD_SCHEMA_EVOLVED)) {
      innerRecord.put("string1", "new value");
    }

    record.put("aRecord", innerRecord);

    DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(s);
    ByteArrayOutputStream out = new ByteArrayOutputStream();

    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
    dataFileWriter.create(s, out);
    dataFileWriter.append(record);
    dataFileWriter.close();

    byte[] data = out.toByteArray();

    out.close();
    return data;
  }

  private byte[] getTestAvroBytesFromClass1(int i) throws IOException {
    Employee employee = new Employee();

    employee.setEmployeeName("Avro Employee" + i);
    employee.setEmployeeID(11111L);
    employee.setGender(Gender.FEMALE);
    employee.setAge(25L);

    Address address = new Address();

    address.setAddress1("Avro First Address" + i);
    address.setAddress2("Avro Second Address" + i);
    address.setCity("Avro City" + i);
    address.setZipcode(123456L);

    Map<CharSequence, CharSequence> metadata = new HashMap<CharSequence, CharSequence>();

    metadata.put("testkey", "testvalue");

    address.setMetadata(metadata);

    HomePhone hPhone = new HomePhone();

    hPhone.setAreaCode(999L);
    hPhone.setNumber(1234567890L);

    OfficePhone oPhone = new OfficePhone();

    oPhone.setAreaCode(999L);
    oPhone.setNumber(1234455555L);

    ContactInfo contact = new ContactInfo();

    List<Address> addresses = new ArrayList<Address>();
    address.setCounty(hPhone); // set value for the union type
    addresses.add(address);
    addresses.add(address);

    contact.setAddress(addresses);

    contact.setHomePhone(hPhone);
    contact.setOfficePhone(oPhone);

    employee.setContactInfo(contact);

    DatumWriter<Employee> datumWriter = new SpecificDatumWriter<Employee>(Employee.class);
    DataFileWriter<Employee> dataFileWriter = new DataFileWriter<Employee>(datumWriter);

    ByteArrayOutputStream out = new ByteArrayOutputStream();

    dataFileWriter.create(employee.getSchema(), out);
    dataFileWriter.append(employee);
    dataFileWriter.close();

    return out.toByteArray();
  }

  private byte[] getTestAvroBytesFromClass2(int i) throws IOException {
    Employee employee = new Employee();

    employee.setEmployeeName("Avro Employee" + i);
    employee.setEmployeeID(11111L);
    employee.setGender(Gender.FEMALE);
    employee.setAge(25L);

    Address address = new Address();

    address.setAddress1("Avro First Address" + i);
    address.setAddress2("Avro Second Address" + i);
    address.setCity("Avro City" + i);
    address.setZipcode(123456L);

    Map<CharSequence, CharSequence> metadata = new HashMap<CharSequence, CharSequence>();

    metadata.put("testkey", "testvalue");

    address.setMetadata(metadata);

    HomePhone hPhone = new HomePhone();

    hPhone.setAreaCode(999L);
    hPhone.setNumber(1234567890L);

    OfficePhone oPhone = new OfficePhone();

    oPhone.setAreaCode(999L);
    oPhone.setNumber(1234455555L);

    ContactInfo contact = new ContactInfo();

    List<Address> addresses = new ArrayList<Address>();
    address.setCounty(hPhone); // set value for the union type
    addresses.add(address);
    addresses.add(address);

    contact.setAddress(addresses);

    contact.setHomePhone(hPhone);
    contact.setOfficePhone(oPhone);

    employee.setContactInfo(contact);

    DatumWriter<Employee> employeeWriter = new SpecificDatumWriter<Employee>(Employee.class);

    ByteArrayOutputStream out = new ByteArrayOutputStream();

    Encoder encoder = EncoderFactory.get().binaryEncoder(out, null);

    // write out a header for the payload
    out.write(TEST_BYTE_ARRAY);

    employeeWriter.write(employee, encoder);

    encoder.flush();

    return out.toByteArray();
  }

  class TestStruct {
    String f1;
    String f2;
    String f3;
    boolean hasSeparator;
    byte separator;

    TestStruct(String f1, String f2, String f3, boolean hasSeparator, byte separator) {
      this.f1 = f1;
      this.f2 = f2;
      this.f3 = f3;
      this.hasSeparator = hasSeparator;
      this.separator = separator;
    }

    public byte[] getBytes() throws IOException {
      ByteArrayOutputStream bos = new ByteArrayOutputStream();

      bos.write(f1.getBytes());
      if (hasSeparator) {
        bos.write(separator); // Add field separator
      }
      bos.write(f2.getBytes());
      if (hasSeparator) {
        bos.write(separator);
      }
      bos.write(f3.getBytes());

      return bos.toByteArray();
    }

    public byte[] getBytesWithDelimiters() throws IOException {
      // Add Ctrl-B delimiter between the fields. This is necessary because for structs in case no
      // delimiter is provided, hive automatically adds Ctrl-B as a default delimiter between fields
      ByteArrayOutputStream bos = new ByteArrayOutputStream();

      bos.write(f1.getBytes());
      bos.write("\002".getBytes("UTF8"));
      bos.write(f2.getBytes());
      bos.write("\002".getBytes("UTF8"));
      bos.write(f3.getBytes());

      return bos.toByteArray();
    }
  }
}
TOP

Related Classes of org.apache.hadoop.hive.hbase.TestHBaseSerDe

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.