Package org.apache.hadoop.hive.ql.exec.vector.udf

Source Code of org.apache.hadoop.hive.ql.exec.vector.udf.TestVectorUDFAdaptor

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hive.ql.exec.vector.udf;

import static org.junit.Assert.*;

import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor;
import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc;
import org.apache.hadoop.hive.ql.exec.vector.udf.generic.GenericUDFIsNull;
import org.apache.hadoop.hive.ql.exec.vector.udf.legacy.ConcatTextLongDoubleUDF;
import org.apache.hadoop.hive.ql.exec.vector.udf.legacy.LongUDF;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.junit.Test;

/*
* Test the vectorized UDF adaptor to verify that custom legacy and generic
* UDFs can be run in vectorized mode.
*/

public class TestVectorUDFAdaptor {

  static byte[] blue = null;
  static byte[] red = null;

  static {
    try {
      blue = "blue".getBytes("UTF-8");
      red = "red".getBytes("UTF-8");
    } catch (Exception e) {
      ; // do nothing
    }
  }

  @Test
  public void testLongUDF()  {

    // create a syntax tree for a simple function call "longudf(col0)"
    ExprNodeGenericFuncDesc funcDesc;
    TypeInfo typeInfo = TypeInfoFactory.longTypeInfo;
    GenericUDFBridge genericUDFBridge = new GenericUDFBridge("longudf", false,
        LongUDF.class.getName());
    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
    ExprNodeColumnDesc colDesc
       = new ExprNodeColumnDesc(typeInfo, "col0", "tablename", false);
    children.add(colDesc);
    VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[1];
    argDescs[0] = new VectorUDFArgDesc();
    argDescs[0].setVariable(0);
    funcDesc = new ExprNodeGenericFuncDesc(typeInfo, genericUDFBridge,
        genericUDFBridge.getUdfName(), children);

    // create the adaptor for this function call to work in vector mode
    VectorUDFAdaptor vudf = null;
    try {
      vudf = new VectorUDFAdaptor(funcDesc, 1, "Long", argDescs);
    } catch (HiveException e) {

      // We should never get here.
      assertTrue(false);
    }

    VectorizedRowBatch b = getBatchLongInLongOut();
    vudf.evaluate(b);

    // verify output
    LongColumnVector out = (LongColumnVector) b.cols[1];
    assertEquals(1000, out.vector[0]);
    assertEquals(1001, out.vector[1]);
    assertEquals(1002, out.vector[2]);
    assertTrue(out.noNulls);
    assertFalse(out.isRepeating);

    // with nulls
    b = getBatchLongInLongOut();
    out = (LongColumnVector) b.cols[1];
    b.cols[0].noNulls = false;
    vudf.evaluate(b);
    assertFalse(out.noNulls);
    assertEquals(1000, out.vector[0]);
    assertEquals(1001, out.vector[1]);
    assertTrue(out.isNull[2]);
    assertFalse(out.isRepeating);

    // with repeating
    b = getBatchLongInLongOut();
    out = (LongColumnVector) b.cols[1];
    b.cols[0].isRepeating = true;
    vudf.evaluate(b);

    // The implementation may or may not set output it isRepeting.
    // That is implementation-defined.
    assertTrue(b.cols[1].isRepeating && out.vector[0] == 1000
        || !b.cols[1].isRepeating && out.vector[2] == 1000);
    assertEquals(3, b.size);
  }

  @Test
  public void testMultiArgumentUDF() {

    // create a syntax tree for a function call "testudf(col0, col1, col2)"
    ExprNodeGenericFuncDesc funcDesc;
    TypeInfo typeInfoStr = TypeInfoFactory.stringTypeInfo;
    TypeInfo typeInfoLong = TypeInfoFactory.longTypeInfo;
    TypeInfo typeInfoDbl = TypeInfoFactory.doubleTypeInfo;
    GenericUDFBridge genericUDFBridge = new GenericUDFBridge("testudf", false,
        ConcatTextLongDoubleUDF.class.getName());
    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
    children.add(new ExprNodeColumnDesc(typeInfoStr, "col0", "tablename", false));
    children.add(new ExprNodeColumnDesc(typeInfoLong, "col1", "tablename", false));
    children.add(new ExprNodeColumnDesc(typeInfoDbl, "col2", "tablename", false));

    VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[3];
    for (int i = 0; i < 3; i++) {
      argDescs[i] = new VectorUDFArgDesc();
      argDescs[i].setVariable(i);
    }
    funcDesc = new ExprNodeGenericFuncDesc(typeInfoStr, genericUDFBridge,
        genericUDFBridge.getUdfName(), children);

    // create the adaptor for this function call to work in vector mode
    VectorUDFAdaptor vudf = null;
    try {
      vudf = new VectorUDFAdaptor(funcDesc, 3, "String", argDescs);
    } catch (HiveException e) {

      // We should never get here.
      assertTrue(false);
      throw new RuntimeException(e);
    }

    // with no nulls
    VectorizedRowBatch b = getBatchStrDblLongWithStrOut();
    vudf.evaluate(b);
    byte[] result = null;
    byte[] result2 = null;
    try {
      result = "red:1:1.0".getBytes("UTF-8");
      result2 = "blue:0:0.0".getBytes("UTF-8");
    } catch (Exception e) {
      ;
    }
    BytesColumnVector out = (BytesColumnVector) b.cols[3];
    int cmp = StringExpr.compare(result, 0, result.length, out.vector[1],
        out.start[1], out.length[1]);
    assertEquals(0, cmp);
    assertTrue(out.noNulls);

    // with nulls
    b = getBatchStrDblLongWithStrOut();
    b.cols[1].noNulls = false;
    vudf.evaluate(b);
    out = (BytesColumnVector) b.cols[3];
    assertFalse(out.noNulls);
    assertTrue(out.isNull[1]);

    // with all input columns repeating
    b = getBatchStrDblLongWithStrOut();
    b.cols[0].isRepeating = true;
    b.cols[1].isRepeating = true;
    b.cols[2].isRepeating = true;
    vudf.evaluate(b);

    out = (BytesColumnVector) b.cols[3];
    assertTrue(out.isRepeating);
    cmp = StringExpr.compare(result2, 0, result2.length, out.vector[0],
        out.start[0], out.length[0]);
    assertEquals(0, cmp);
    assertTrue(out.noNulls);
  }

  private VectorizedRowBatch getBatchLongInLongOut() {
    VectorizedRowBatch b = new VectorizedRowBatch(2);
    LongColumnVector in = new LongColumnVector();
    LongColumnVector out = new LongColumnVector();
    b.cols[0] = in;
    b.cols[1] = out;
    in.vector[0] = 0;
    in.vector[1] = 1;
    in.vector[2] = 2;
    in.isNull[2] = true;
    in.noNulls = true;
    b.size = 3;
    return b;
  }

  private VectorizedRowBatch getBatchStrDblLongWithStrOut() {
    VectorizedRowBatch b = new VectorizedRowBatch(4);
    BytesColumnVector strCol = new BytesColumnVector();
    LongColumnVector longCol = new LongColumnVector();
    DoubleColumnVector dblCol = new DoubleColumnVector();
    BytesColumnVector outCol = new BytesColumnVector();
    b.cols[0] = strCol;
    b.cols[1] = longCol;
    b.cols[2] = dblCol;
    b.cols[3] = outCol;

    strCol.initBuffer();
    strCol.setVal(0, blue, 0, blue.length);
    strCol.setVal(1, red, 0, red.length);
    longCol.vector[0] = 0;
    longCol.vector[1] = 1;
    dblCol.vector[0] = 0.0;
    dblCol.vector[1] = 1.0;

    // set one null value for possible later use
    longCol.isNull[1] = true;

    // but have no nulls initially
    longCol.noNulls = true;
    strCol.noNulls = true;
    dblCol.noNulls = true;
    outCol.initBuffer();
    b.size = 2;
    return b;
  }


  // test the UDF adaptor for a generic UDF (as opposed to a legacy UDF)
  @Test
  public void testGenericUDF() {

    // create a syntax tree for a function call 'myisnull(col0, "UNKNOWN")'
    ExprNodeGenericFuncDesc funcDesc;
    GenericUDF genericUDF = new GenericUDFIsNull();
    TypeInfo typeInfoStr = TypeInfoFactory.stringTypeInfo;

    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
    children.add(new ExprNodeColumnDesc(typeInfoStr, "col0", "tablename", false));
    children.add(new ExprNodeConstantDesc(typeInfoStr, "UNKNOWN"));

    VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[2];
    for (int i = 0; i < 2; i++) {
      argDescs[i] = new VectorUDFArgDesc();
    }
    argDescs[0].setVariable(0);
    argDescs[1].setConstant((ExprNodeConstantDesc) children.get(1));
    funcDesc = new ExprNodeGenericFuncDesc(typeInfoStr, genericUDF, "myisnull", children);

    // create the adaptor for this function call to work in vector mode
    VectorUDFAdaptor vudf = null;
    try {
      vudf = new VectorUDFAdaptor(funcDesc, 3, "String", argDescs);
    } catch (HiveException e) {

      // We should never get here.
      assertTrue(false);
    }

    VectorizedRowBatch b;

    byte[] red = null;
    byte[] unknown = null;
    try {
      red = "red".getBytes("UTF-8");
      unknown = "UNKNOWN".getBytes("UTF-8");
    } catch (Exception e) {
      ;
    }
    BytesColumnVector out;

    // with nulls
    b = getBatchStrDblLongWithStrOut();
    b.cols[0].noNulls = false;
    b.cols[0].isNull[0] = true; // set 1st entry to null
    vudf.evaluate(b);
    out = (BytesColumnVector) b.cols[3];

    // verify outputs
    int cmp = StringExpr.compare(red, 0, red.length,
        out.vector[1], out.start[1], out.length[1]);
    assertEquals(0, cmp);
    cmp = StringExpr.compare(unknown, 0, unknown.length,
        out.vector[0], out.start[0], out.length[0]);
    assertEquals(0, cmp);

    // output entry should not be null for null input for this particular generic UDF
    assertTrue(out.noNulls || !out.isNull[0]);
  }
}
TOP

Related Classes of org.apache.hadoop.hive.ql.exec.vector.udf.TestVectorUDFAdaptor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.