Package com.linkedin.haivvreo

Source Code of com.linkedin.haivvreo.TestAvroObjectInspectorGenerator

/*
* Copyright 2011 LinkedIn
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*    http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.haivvreo;

import com.linkedin.haivvreo.AvroObjectInspectorGenerator;
import org.apache.avro.Schema;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.typeinfo.*;
import org.junit.Test;

import java.util.ArrayList;
import java.util.List;

import static org.junit.Assert.*;

public class TestAvroObjectInspectorGenerator {
  private final TypeInfo STRING = TypeInfoFactory.getPrimitiveTypeInfo("string");
  private final TypeInfo INT = TypeInfoFactory.getPrimitiveTypeInfo("int");
  private final TypeInfo BOOLEAN = TypeInfoFactory.getPrimitiveTypeInfo("boolean");
  private final TypeInfo LONG = TypeInfoFactory.getPrimitiveTypeInfo("bigint");
  private final TypeInfo FLOAT = TypeInfoFactory.getPrimitiveTypeInfo("float");
  private final TypeInfo DOUBLE = TypeInfoFactory.getPrimitiveTypeInfo("double");
  private final TypeInfo VOID = TypeInfoFactory.getPrimitiveTypeInfo("void");

  // These schemata are used in other tests
  static public final String MAP_WITH_PRIMITIVE_VALUE_TYPE = "{\n" +
      "  \"namespace\": \"testing\",\n" +
      "  \"name\": \"oneMap\",\n" +
      "  \"type\": \"record\",\n" +
      "  \"fields\": [\n" +
      "    {\n" +
      "      \"name\":\"aMap\",\n" +
      "      \"type\":{\"type\":\"map\",\n" +
      "      \"values\":\"long\"}\n" +
      "\t}\n" +
      "  ]\n" +
      "}";
  static public final String ARRAY_WITH_PRIMITIVE_ELEMENT_TYPE = "{\n" +
      "  \"namespace\": \"testing\",\n" +
      "  \"name\": \"oneArray\",\n" +
      "  \"type\": \"record\",\n" +
      "  \"fields\": [\n" +
      "    {\n" +
      "      \"name\":\"anArray\",\n" +
      "      \"type\":{\"type\":\"array\",\n" +
      "      \"items\":\"string\"}\n" +
      "\t}\n" +
      "  ]\n" +
      "}";
  public static final String RECORD_SCHEMA = "{\n" +
      "  \"namespace\": \"testing.test.mctesty\",\n" +
      "  \"name\": \"oneRecord\",\n" +
      "  \"type\": \"record\",\n" +
      "  \"fields\": [\n" +
      "    {\n" +
      "      \"name\":\"aRecord\",\n" +
      "      \"type\":{\"type\":\"record\",\n" +
      "              \"name\":\"recordWithinARecord\",\n" +
      "              \"fields\": [\n" +
      "                 {\n" +
      "                  \"name\":\"int1\",\n" +
      "                  \"type\":\"int\"\n" +
      "                },\n" +
      "                {\n" +
      "                  \"name\":\"boolean1\",\n" +
      "                  \"type\":\"boolean\"\n" +
      "                },\n" +
      "                {\n" +
      "                  \"name\":\"long1\",\n" +
      "                  \"type\":\"long\"\n" +
      "                }\n" +
      "      ]}\n" +
      "    }\n" +
      "  ]\n" +
      "}";
  public static final String UNION_SCHEMA = "{\n" +
      "  \"namespace\": \"test.a.rossa\",\n" +
      "  \"name\": \"oneUnion\",\n" +
      "  \"type\": \"record\",\n" +
      "  \"fields\": [\n" +
      "    {\n" +
      "      \"name\":\"aUnion\",\n" +
      "      \"type\":[\"int\", \"string\"]\n" +
      "    }\n" +
      "  ]\n" +
      "}";
  public static final String ENUM_SCHEMA = "{\n" +
      "  \"namespace\": \"clever.namespace.name.in.space\",\n" +
      "  \"name\": \"oneEnum\",\n" +
      "  \"type\": \"record\",\n" +
      "  \"fields\": [\n" +
      "   {\n" +
      "      \"name\":\"baddies\",\n" +
      "      \"type\":{\"type\":\"enum\",\"name\":\"villians\", \"symbols\": [\"DALEKS\", \"CYBERMEN\", \"SLITHEEN\", \"JAGRAFESS\"]}\n" +
      "      \n" +
      "      \n" +
      "    }\n" +
      "  ]\n" +
      "}";
  public static final String FIXED_SCHEMA = "{\n" +
      "  \"namespace\": \"ecapseman\",\n" +
      "  \"name\": \"oneFixed\",\n" +
      "  \"type\": \"record\",\n" +
      "  \"fields\": [\n" +
      "   {\n" +
      "      \"name\":\"hash\",\n" +
      "      \"type\":{\"type\": \"fixed\", \"name\": \"MD5\", \"size\": 16}\n" +
      "    }\n" +
      "  ]\n" +
      "}";
  public static final String NULLABLE_STRING_SCHEMA = "{\n" +
      "  \"type\": \"record\", \n" +
      "  \"name\": \"nullableUnionTest\",\n" +
      "  \"fields\" : [\n" +
      "    {\"name\":\"nullableString\", \"type\":[\"null\", \"string\"]}\n" +
      "  ]\n" +
      "}";
  public static final String BYTES_SCHEMA = "{\n" +
      "  \"type\": \"record\", \n" +
      "  \"name\": \"bytesTest\",\n" +
      "  \"fields\" : [\n" +
      "    {\"name\":\"bytesField\", \"type\":\"bytes\"}\n" +
      "  ]\n" +
      "}";

  public static final String KITCHEN_SINK_SCHEMA = "{\n" +
      "  \"namespace\": \"com.linkedin.haivvreo\",\n" +
      "  \"name\": \"kitchsink\",\n" +
      "  \"type\": \"record\",\n" +
      "  \"fields\": [\n" +
      "    {\n" +
      "      \"name\":\"string1\",\n" +
      "      \"type\":\"string\"\n" +
      "    },\n" +
      "    {\n" +
      "      \"name\":\"string2\",\n" +
      "      \"type\":\"string\"\n" +
      "    },\n" +
      "    {\n" +
      "      \"name\":\"int1\",\n" +
      "      \"type\":\"int\"\n" +
      "    },\n" +
      "    {\n" +
      "      \"name\":\"boolean1\",\n" +
      "      \"type\":\"boolean\"\n" +
      "    },\n" +
      "    {\n" +
      "      \"name\":\"long1\",\n" +
      "      \"type\":\"long\"\n" +
      "    },\n" +
      "    {\n" +
      "      \"name\":\"float1\",\n" +
      "      \"type\":\"float\"\n" +
      "    },\n" +
      "    {\n" +
      "      \"name\":\"double1\",\n" +
      "      \"type\":\"double\"\n" +
      "    },\n" +
      "    {\n" +
      "      \"name\":\"inner_record1\",\n" +
      "      \"type\":{ \"type\":\"record\",\n" +
      "               \"name\":\"inner_record1_impl\",\n" +
      "               \"fields\": [\n" +
      "                          {\"name\":\"int_in_inner_record1\",\n" +
      "                           \"type\":\"int\"},\n" +
      "                          {\"name\":\"string_in_inner_record1\",\n" +
      "                           \"type\":\"string\"}\n" +
      "                         ]\n" +
      "       }\n" +
      "    },\n" +
      "    {\n" +
      "      \"name\":\"enum1\",\n" +
      "      \"type\":{\"type\":\"enum\", \"name\":\"enum1_values\", \"symbols\":[\"ENUM1_VALUES_VALUE1\",\"ENUM1_VALUES_VALUE2\", \"ENUM1_VALUES_VALUE3\"]}\n" +
      "    },\n" +
      "    {\n" +
      "      \"name\":\"array1\",\n" +
      "      \"type\":{\"type\":\"array\", \"items\":\"string\"}\n" +
      "    },\n" +
      "    {\n" +
      "      \"name\":\"map1\",\n" +
      "      \"type\":{\"type\":\"map\", \"values\":\"string\"}\n" +
      "    },\n" +
      "    {\n" +
      "      \"name\":\"union1\",\n" +
      "      \"type\":[\"float\", \"boolean\", \"string\"]\n" +
      "    },\n" +
      "    {\n" +
      "      \"name\":\"fixed1\",\n" +
      "      \"type\":{\"type\":\"fixed\", \"name\":\"fourbytes\", \"size\":4}\n" +
      "    },\n" +
      "    {\n" +
      "      \"name\":\"null1\",\n" +
      "      \"type\":\"null\"\n" +
      "    },\n" +
      "    {\n" +
      "      \"name\":\"UnionNullInt\",\n" +
      "      \"type\":[\"int\", \"null\"]\n" +
      "    },\n" +
      "    {\n" +
      "      \"name\":\"bytes1\",\n" +
      "      \"type\":\"bytes\"\n" +
      "    }\n" +
      "  ]\n" +
      "}";

  @Test // that we can only process records
  public void failOnNonRecords() throws Exception {
    String nonRecordSchema = "{ \"type\": \"enum\",\n" +
        "  \"name\": \"Suit\",\n" +
        "  \"symbols\" : [\"SPADES\", \"HEARTS\", \"DIAMONDS\", \"CLUBS\"]\n" +
        "}";

    Schema s = Schema.parse(nonRecordSchema);
    try {
      new AvroObjectInspectorGenerator(s);
      fail("Should not be able to handle non-record Avro types");
    } catch(SerDeException sde) {
      assertTrue(sde.getMessage().startsWith("Schema for table must be of type RECORD"));
    }
  }

  @Test
  public void primitiveTypesWorkCorrectly() throws SerDeException {
    final String bunchOfPrimitives = "{\n" +
        "  \"namespace\": \"testing\",\n" +
        "  \"name\": \"PrimitiveTypes\",\n" +
        "  \"type\": \"record\",\n" +
        "  \"fields\": [\n" +
        "    {\n" +
        "      \"name\":\"aString\",\n" +
        "      \"type\":\"string\"\n" +
        "    },\n" +
        "    {\n" +
        "      \"name\":\"anInt\",\n" +
        "      \"type\":\"int\"\n" +
        "    },\n" +
        "    {\n" +
        "      \"name\":\"aBoolean\",\n" +
        "      \"type\":\"boolean\"\n" +
        "    },\n" +
        "    {\n" +
        "      \"name\":\"aLong\",\n" +
        "      \"type\":\"long\"\n" +
        "    },\n" +
        "    {\n" +
        "      \"name\":\"aFloat\",\n" +
        "      \"type\":\"float\"\n" +
        "    },\n" +
        "    {\n" +
        "      \"name\":\"aDouble\",\n" +
        "      \"type\":\"double\"\n" +
        "    },\n" +
        "    {\n" +
        "      \"name\":\"aNull\",\n" +
        "      \"type\":\"null\"\n" +
        "    }\n" +
        "  ]\n" +
        "}";
    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(Schema.parse(bunchOfPrimitives));

    String [] expectedColumnNames = {"aString", "anInt", "aBoolean", "aLong", "aFloat", "aDouble", "aNull"};
    verifyColumnNames(expectedColumnNames, aoig.getColumnNames());

    TypeInfo [] expectedColumnTypes = {STRING, INT, BOOLEAN, LONG, FLOAT, DOUBLE, VOID};
    verifyColumnTypes(expectedColumnTypes, aoig.getColumnTypes());

    // Rip apart the object inspector, making sure we got what we expect.
    final ObjectInspector oi = aoig.getObjectInspector();
    assertTrue(oi instanceof StandardStructObjectInspector);
    final StandardStructObjectInspector ssoi = (StandardStructObjectInspector)oi;
    List<? extends StructField> structFields = ssoi.getAllStructFieldRefs();
    assertEquals(expectedColumnNames.length, structFields.size());

    for(int i = 0; i < expectedColumnNames.length;i++) {
      assertEquals("Column names don't match", expectedColumnNames[i].toLowerCase(), structFields.get(i).getFieldName());
      assertEquals("Column types don't match", expectedColumnTypes[i].getTypeName(), structFields.get(i).getFieldObjectInspector().getTypeName());
    }
  }

  private void verifyColumnTypes(TypeInfo[] expectedColumnTypes, List<TypeInfo> columnTypes) {
    for(int i = 0; i < expectedColumnTypes.length; i++) {
      assertEquals(expectedColumnTypes[i], columnTypes.get(i));

    }
  }

  private void verifyColumnNames(String[] expectedColumnNames, List<String> columnNames) {
    for(int i = 0; i < expectedColumnNames.length; i++) {
      assertEquals(expectedColumnNames[i], columnNames.get(i));
    }
  }

  @Test
  public void canHandleMapsWithPrimitiveValueTypes() throws SerDeException {
    Schema s = Schema.parse(MAP_WITH_PRIMITIVE_VALUE_TYPE);
    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);

    // Column names
    assertEquals(1, aoig.getColumnNames().size());
    assertEquals("aMap", aoig.getColumnNames().get(0));

    // Column types
    assertEquals(1, aoig.getColumnTypes().size());
    TypeInfo typeInfo = aoig.getColumnTypes().get(0);
    assertEquals(ObjectInspector.Category.MAP, typeInfo.getCategory());
    assertTrue(typeInfo instanceof MapTypeInfo);
    MapTypeInfo mapTypeInfo = (MapTypeInfo)typeInfo;

    assertEquals("bigint" /* == long in Avro */, mapTypeInfo.getMapValueTypeInfo().getTypeName());
    assertEquals("string", mapTypeInfo.getMapKeyTypeInfo().getTypeName());
  }

  @Test
  public void canHandleArrays() throws SerDeException {
    Schema s = Schema.parse(ARRAY_WITH_PRIMITIVE_ELEMENT_TYPE);
    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);

    // Column names
    assertEquals(1, aoig.getColumnNames().size());
    assertEquals("anArray", aoig.getColumnNames().get(0));

    // Column types
    assertEquals(1, aoig.getColumnTypes().size());
    TypeInfo typeInfo = aoig.getColumnTypes().get(0);
    assertEquals(ObjectInspector.Category.LIST, typeInfo.getCategory());
    assertTrue(typeInfo instanceof ListTypeInfo);
    ListTypeInfo listTypeInfo = (ListTypeInfo)typeInfo;

    assertEquals("string", listTypeInfo.getListElementTypeInfo().getTypeName());
  }

  @Test
  public void canHandleRecords() throws SerDeException {
    Schema s = Schema.parse(RECORD_SCHEMA);
    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);

    // Column names
    assertEquals(1, aoig.getColumnNames().size());
    assertEquals("aRecord", aoig.getColumnNames().get(0));

    // Column types
    assertEquals(1, aoig.getColumnTypes().size());
    TypeInfo typeInfo = aoig.getColumnTypes().get(0);
    assertEquals(ObjectInspector.Category.STRUCT, typeInfo.getCategory());
    assertTrue(typeInfo instanceof StructTypeInfo);
    StructTypeInfo structTypeInfo = (StructTypeInfo)typeInfo;

    // Check individual elements of subrecord
    ArrayList<String> allStructFieldNames = structTypeInfo.getAllStructFieldNames();
    ArrayList<TypeInfo> allStructFieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
    assertEquals(allStructFieldNames.size(), 3);
    String[] names = new String[]{"int1", "boolean1", "long1"};
    String [] typeInfoStrings = new String [] {"int", "boolean", "bigint"};
    for(int i = 0; i < allStructFieldNames.size(); i++) {
      assertEquals("Fieldname " + allStructFieldNames.get(i) + " doesn't match expected " + names[i], names[i], allStructFieldNames.get(i));
      assertEquals("Typeinfo " + allStructFieldTypeInfos.get(i) + " doesn't match expected " + typeInfoStrings[i], typeInfoStrings[i], allStructFieldTypeInfos.get(i).getTypeName());
    }
  }

  @Test
  public void canHandleUnions() throws SerDeException {
    Schema s = Schema.parse(UNION_SCHEMA);
    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);

    // Column names
    assertEquals(1, aoig.getColumnNames().size());
    assertEquals("aUnion", aoig.getColumnNames().get(0));

    // Column types
    assertEquals(1, aoig.getColumnTypes().size());
    TypeInfo typeInfo = aoig.getColumnTypes().get(0);
    assertTrue(typeInfo instanceof UnionTypeInfo);
    UnionTypeInfo uti = (UnionTypeInfo)typeInfo;

    // Check that the union has come out unscathed. No scathing of unions allowed.
    List<TypeInfo> typeInfos = uti.getAllUnionObjectTypeInfos();
    assertEquals(2, typeInfos.size());
    assertEquals(INT, typeInfos.get(0));
    assertEquals(STRING, typeInfos.get(1));
    assertEquals("uniontype<int,string>", uti.getTypeName());
  }

  @Test // Enums are one of two Avro types that Hive doesn't have any native support for.
  public void canHandleEnums() throws SerDeException {
    Schema s = Schema.parse(ENUM_SCHEMA);
    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);

    // Column names - we lose the enumness of this schema
    assertEquals(1, aoig.getColumnNames().size());
    assertEquals("baddies", aoig.getColumnNames().get(0));

    // Column types
    assertEquals(1, aoig.getColumnTypes().size());
    assertEquals(STRING, aoig.getColumnTypes().get(0));
  }

  @Test // Hive has no concept of Avro's fixed type.  Fixed -> arrays of bytes
  public void canHandleFixed() throws SerDeException {
    Schema s = Schema.parse(FIXED_SCHEMA);

    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);

    // Column names
    assertEquals(1, aoig.getColumnNames().size());
    assertEquals("hash", aoig.getColumnNames().get(0));

    // Column types
    assertEquals(1, aoig.getColumnTypes().size());
    TypeInfo typeInfo = aoig.getColumnTypes().get(0);
    assertTrue(typeInfo instanceof ListTypeInfo);
    ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
    assertTrue(listTypeInfo.getListElementTypeInfo() instanceof PrimitiveTypeInfo);
    assertEquals("tinyint", listTypeInfo.getListElementTypeInfo().getTypeName());
  }

  @Test // Avro considers bytes primitive, Hive doesn't. Make them list of tinyint.
  public void canHandleBytes() throws SerDeException {
    Schema s = Schema.parse(BYTES_SCHEMA);

    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);

    // Column names
    assertEquals(1, aoig.getColumnNames().size());
    assertEquals("bytesField", aoig.getColumnNames().get(0));

    // Column types
    assertEquals(1, aoig.getColumnTypes().size());
    TypeInfo typeInfo = aoig.getColumnTypes().get(0);
    assertTrue(typeInfo instanceof ListTypeInfo);
    ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
    assertTrue(listTypeInfo.getListElementTypeInfo() instanceof PrimitiveTypeInfo);
    assertEquals("tinyint", listTypeInfo.getListElementTypeInfo().getTypeName());
  }

  @Test // That Union[T, NULL] is converted to just T.
  public void convertsNullableTypes() throws SerDeException {
    Schema s = Schema.parse(NULLABLE_STRING_SCHEMA);

    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
    assertEquals(1, aoig.getColumnNames().size());
    assertEquals("nullableString", aoig.getColumnNames().get(0));

    // Column types
    assertEquals(1, aoig.getColumnTypes().size());
    TypeInfo typeInfo = aoig.getColumnTypes().get(0);
    assertTrue(typeInfo instanceof PrimitiveTypeInfo);
    PrimitiveTypeInfo pti = (PrimitiveTypeInfo) typeInfo;
    // Verify the union has been hidden and just the main type has been returned.
    assertEquals(PrimitiveObjectInspector.PrimitiveCategory.STRING, pti.getPrimitiveCategory());
  }

  @Test
  public void objectInspectorsAreCached() throws SerDeException {
    // Verify that Hive is caching the object inspectors for us.
    Schema s = Schema.parse(KITCHEN_SINK_SCHEMA);
    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);

    Schema s2 = Schema.parse(KITCHEN_SINK_SCHEMA);
    AvroObjectInspectorGenerator aoig2 = new AvroObjectInspectorGenerator(s2);


    assertEquals(aoig.getObjectInspector(), aoig2.getObjectInspector());
    // For once we actually want reference equality in Java.
    assertTrue(aoig.getObjectInspector() == aoig2.getObjectInspector());
  }
}
TOP

Related Classes of com.linkedin.haivvreo.TestAvroObjectInspectorGenerator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.