Package org.apache.hadoop.hive.ql.exec.vector.expressions

Source Code of org.apache.hadoop.hive.ql.exec.vector.expressions.TestVectorDateExpressions

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hive.ql.exec.vector.expressions;

import junit.framework.Assert;
import org.apache.commons.lang.ArrayUtils;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
import org.apache.hadoop.hive.ql.udf.UDFMonth;
import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear;
import org.apache.hadoop.hive.ql.udf.UDFYear;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.junit.Test;

import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.List;
import java.util.Random;

public class TestVectorDateExpressions {
  /* copied over from VectorUDFTimestampFieldLong */
  private TimestampWritable toTimestampWritable(long daysSinceEpoch) {
    Timestamp ts = new Timestamp(DateWritable.daysToMillis((int) daysSinceEpoch));
    return new TimestampWritable(ts);
  }

  private int[] getAllBoundaries() {
    List<Integer> boundaries = new ArrayList<Integer>(1);
    Calendar c = Calendar.getInstance();
    c.setTimeInMillis(0); // c.set doesn't reset millis
    for (int year = 1902; year <= 2038; year++) {
      c.set(year, Calendar.JANUARY, 1, 0, 0, 0);
      int exactly = (int) (c.getTimeInMillis() / (24 * 60 * 60 * 1000));
      int before = exactly - 1;
      int after = exactly + 1;
      boundaries.add(Integer.valueOf(before));
      boundaries.add(Integer.valueOf(exactly));
      boundaries.add(Integer.valueOf(after));
    }
    Integer[] indices = boundaries.toArray(new Integer[1]);
    return ArrayUtils.toPrimitive(indices);
  }

  private VectorizedRowBatch getVectorizedRandomRowBatch(int seed, int size) {
    VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
    LongColumnVector lcv = new LongColumnVector(size);
    Random rand = new Random(seed);
    for (int i = 0; i < size; i++) {
      lcv.vector[i] = (rand.nextInt());
    }
    batch.cols[0] = lcv;
    batch.cols[1] = new LongColumnVector(size);
    batch.size = size;
    return batch;
  }

  /*
   * Input array is used to fill the entire size of the vector row batch
   */
  private VectorizedRowBatch getVectorizedRowBatch(int[] inputs, int size) {
    VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
    LongColumnVector lcv = new LongColumnVector(size);
    for (int i = 0; i < size; i++) {
      lcv.vector[i] = inputs[i % inputs.length];
    }
    batch.cols[0] = lcv;
    batch.cols[1] = new LongColumnVector(size);
    batch.size = size;
    return batch;
  }

  private void compareToUDFYearDate(long t, int y) {
    UDFYear udf = new UDFYear();
    TimestampWritable tsw = toTimestampWritable(t);
    IntWritable res = udf.evaluate(tsw);
    Assert.assertEquals(res.get(), y);
  }

  private void verifyUDFYear(VectorizedRowBatch batch) {
    VectorExpression udf = null;
    udf = new VectorUDFYearLong(0, 1);
    udf.setInputTypes(VectorExpression.Type.DATE);
    udf.evaluate(batch);
    final int in = 0;
    final int out = 1;

    for (int i = 0; i < batch.size; i++) {
      if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
        if (!batch.cols[in].noNulls) {
          Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
        }
        long t = ((LongColumnVector) batch.cols[in]).vector[i];
        long y = ((LongColumnVector) batch.cols[out]).vector[i];
        compareToUDFYearDate(t, (int) y);
      } else {
        Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
      }
    }
  }

  @Test
  public void testVectorUDFYear() {
    VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0},
            VectorizedRowBatch.DEFAULT_SIZE);
    Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
    Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
    verifyUDFYear(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
    verifyUDFYear(batch);

    int[] boundaries = getAllBoundaries();
    batch = getVectorizedRowBatch(boundaries, boundaries.length);
    verifyUDFYear(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
    verifyUDFYear(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
    verifyUDFYear(batch);

    batch = getVectorizedRowBatch(new int[] {0}, 1);
    batch.cols[0].isRepeating = true;
    verifyUDFYear(batch);
    batch.cols[0].noNulls = false;
    batch.cols[0].isNull[0] = true;
    verifyUDFYear(batch);

    batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE);
    verifyUDFYear(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
    verifyUDFYear(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
    verifyUDFYear(batch);
  }

  private void compareToUDFDayOfMonthDate(long t, int y) {
    UDFDayOfMonth udf = new UDFDayOfMonth();
    TimestampWritable tsw = toTimestampWritable(t);
    IntWritable res = udf.evaluate(tsw);
    Assert.assertEquals(res.get(), y);
  }

  private void verifyUDFDayOfMonth(VectorizedRowBatch batch) {
    VectorExpression udf = null;
    udf = new VectorUDFDayOfMonthLong(0, 1);
    udf.setInputTypes(VectorExpression.Type.DATE);
    udf.evaluate(batch);
    final int in = 0;
    final int out = 1;

    for (int i = 0; i < batch.size; i++) {
      if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
        if (!batch.cols[in].noNulls) {
          Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
        }
        long t = ((LongColumnVector) batch.cols[in]).vector[i];
        long y = ((LongColumnVector) batch.cols[out]).vector[i];
        compareToUDFDayOfMonthDate(t, (int) y);
      } else {
        Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
      }
    }
  }

  @Test
  public void testVectorUDFDayOfMonth() {
    VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0},
            VectorizedRowBatch.DEFAULT_SIZE);
    Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
    Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
    verifyUDFDayOfMonth(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
    verifyUDFDayOfMonth(batch);

    int[] boundaries = getAllBoundaries();
    batch = getVectorizedRowBatch(boundaries, boundaries.length);
    verifyUDFDayOfMonth(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
    verifyUDFDayOfMonth(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
    verifyUDFDayOfMonth(batch);

    batch = getVectorizedRowBatch(new int[] {0}, 1);
    batch.cols[0].isRepeating = true;
    verifyUDFDayOfMonth(batch);
    batch.cols[0].noNulls = false;
    batch.cols[0].isNull[0] = true;
    verifyUDFDayOfMonth(batch);

    batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE);
    verifyUDFDayOfMonth(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
    verifyUDFDayOfMonth(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
    verifyUDFDayOfMonth(batch);
  }

  private void compareToUDFMonthDate(long t, int y) {
    UDFMonth udf = new UDFMonth();
    TimestampWritable tsw = toTimestampWritable(t);
    IntWritable res = udf.evaluate(tsw);
    Assert.assertEquals(res.get(), y);
  }

  private void verifyUDFMonth(VectorizedRowBatch batch) {
    VectorExpression udf;
      udf = new VectorUDFMonthLong(0, 1);
    udf.setInputTypes(VectorExpression.Type.DATE);
    udf.evaluate(batch);
    final int in = 0;
    final int out = 1;

    for (int i = 0; i < batch.size; i++) {
      if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
        if (!batch.cols[in].noNulls) {
          Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
        }
        long t = ((LongColumnVector) batch.cols[in]).vector[i];
        long y = ((LongColumnVector) batch.cols[out]).vector[i];
        compareToUDFMonthDate(t, (int) y);
      } else {
        Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
      }
    }
  }

  @Test
  public void testVectorUDFMonth() {
    VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0},
            VectorizedRowBatch.DEFAULT_SIZE);
    Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
    Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
    verifyUDFMonth(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
    verifyUDFMonth(batch);

    int[] boundaries = getAllBoundaries();
    batch = getVectorizedRowBatch(boundaries, boundaries.length);
    verifyUDFMonth(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
    verifyUDFMonth(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
    verifyUDFMonth(batch);

    batch = getVectorizedRowBatch(new int[] {0}, 1);
    batch.cols[0].isRepeating = true;
    verifyUDFMonth(batch);
    batch.cols[0].noNulls = false;
    batch.cols[0].isNull[0] = true;
    verifyUDFMonth(batch);

    batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE);
    verifyUDFMonth(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
    verifyUDFMonth(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
    verifyUDFMonth(batch);
  }

  private LongWritable getLongWritable(TimestampWritable i) {
    LongWritable result = new LongWritable();
    if (i == null) {
      return null;
    } else {
      result.set(i.getSeconds());
      return result;
    }
  }

  private void compareToUDFUnixTimeStampDate(long t, long y) {
    TimestampWritable tsw = toTimestampWritable(t);
    LongWritable res = getLongWritable(tsw);
    if(res.get() != y) {
      System.out.printf("%d vs %d for %d, %d\n", res.get(), y, t,
              tsw.getTimestamp().getTime()/1000);
    }

    Assert.assertEquals(res.get(), y);
  }

  private void verifyUDFUnixTimeStamp(VectorizedRowBatch batch) {
    VectorExpression udf;
    udf = new VectorUDFUnixTimeStampLong(0, 1);
    udf.setInputTypes(VectorExpression.Type.DATE);
    udf.evaluate(batch);
    final int in = 0;
    final int out = 1;

    for (int i = 0; i < batch.size; i++) {
      if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
        if (!batch.cols[out].noNulls) {
          Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
        }
        long t = ((LongColumnVector) batch.cols[in]).vector[i];
        long y = ((LongColumnVector) batch.cols[out]).vector[i];
        compareToUDFUnixTimeStampDate(t, y);
      } else {
        Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
      }
    }
  }

  @Test
  public void testVectorUDFUnixTimeStamp() {
    VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0},
            VectorizedRowBatch.DEFAULT_SIZE);
    Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
    Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
    verifyUDFUnixTimeStamp(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
    verifyUDFUnixTimeStamp(batch);

    int[] boundaries = getAllBoundaries();
    batch = getVectorizedRowBatch(boundaries, boundaries.length);
    verifyUDFUnixTimeStamp(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
    verifyUDFUnixTimeStamp(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
    verifyUDFUnixTimeStamp(batch);

    batch = getVectorizedRowBatch(new int[] {0}, 1);
    batch.cols[0].isRepeating = true;
    verifyUDFUnixTimeStamp(batch);
    batch.cols[0].noNulls = false;
    batch.cols[0].isNull[0] = true;
    verifyUDFUnixTimeStamp(batch);

    batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE);
    verifyUDFUnixTimeStamp(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
    verifyUDFUnixTimeStamp(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
    verifyUDFUnixTimeStamp(batch);
  }

  private void compareToUDFWeekOfYearDate(long t, int y) {
    UDFWeekOfYear udf = new UDFWeekOfYear();
    TimestampWritable tsw = toTimestampWritable(t);
    IntWritable res = udf.evaluate(tsw);
    Assert.assertEquals(res.get(), y);
  }

  private void verifyUDFWeekOfYear(VectorizedRowBatch batch) {
    VectorExpression udf;
    udf = new VectorUDFWeekOfYearLong(0, 1);
    udf.setInputTypes(VectorExpression.Type.DATE);
    udf.evaluate(batch);
    final int in = 0;
    final int out = 1;

    for (int i = 0; i < batch.size; i++) {
      if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
        long t = ((LongColumnVector) batch.cols[in]).vector[i];
        long y = ((LongColumnVector) batch.cols[out]).vector[i];
        compareToUDFWeekOfYearDate(t, (int) y);
      } else {
        Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
      }
    }
  }

  @Test
  public void testVectorUDFWeekOfYear() {
    VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0},
            VectorizedRowBatch.DEFAULT_SIZE);
    Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
    Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
    verifyUDFWeekOfYear(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
    verifyUDFWeekOfYear(batch);

    int[] boundaries = getAllBoundaries();
    batch = getVectorizedRowBatch(boundaries, boundaries.length);
    verifyUDFWeekOfYear(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
    verifyUDFWeekOfYear(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
    verifyUDFWeekOfYear(batch);

    batch = getVectorizedRowBatch(new int[] {0}, 1);
    batch.cols[0].isRepeating = true;
    verifyUDFWeekOfYear(batch);
    batch.cols[0].noNulls = false;
    batch.cols[0].isNull[0] = true;
    verifyUDFWeekOfYear(batch);

    batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE);
    verifyUDFWeekOfYear(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
    verifyUDFWeekOfYear(batch);
    TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
    verifyUDFWeekOfYear(batch);
  }

  public static void main(String[] args) {
    TestVectorDateExpressions self = new TestVectorDateExpressions();
    self.testVectorUDFYear();
    self.testVectorUDFMonth();
    self.testVectorUDFDayOfMonth();
    self.testVectorUDFWeekOfYear();
    self.testVectorUDFUnixTimeStamp();
  }
}
TOP

Related Classes of org.apache.hadoop.hive.ql.exec.vector.expressions.TestVectorDateExpressions

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.