/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.exec.vector.expressions;
import junit.framework.Assert;
import org.apache.commons.lang.ArrayUtils;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
import org.apache.hadoop.hive.ql.udf.UDFMonth;
import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear;
import org.apache.hadoop.hive.ql.udf.UDFYear;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.junit.Test;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.List;
import java.util.Random;
public class TestVectorDateExpressions {
/* copied over from VectorUDFTimestampFieldLong */
private TimestampWritable toTimestampWritable(long daysSinceEpoch) {
Timestamp ts = new Timestamp(DateWritable.daysToMillis((int) daysSinceEpoch));
return new TimestampWritable(ts);
}
private int[] getAllBoundaries() {
List<Integer> boundaries = new ArrayList<Integer>(1);
Calendar c = Calendar.getInstance();
c.setTimeInMillis(0); // c.set doesn't reset millis
for (int year = 1902; year <= 2038; year++) {
c.set(year, Calendar.JANUARY, 1, 0, 0, 0);
int exactly = (int) (c.getTimeInMillis() / (24 * 60 * 60 * 1000));
int before = exactly - 1;
int after = exactly + 1;
boundaries.add(Integer.valueOf(before));
boundaries.add(Integer.valueOf(exactly));
boundaries.add(Integer.valueOf(after));
}
Integer[] indices = boundaries.toArray(new Integer[1]);
return ArrayUtils.toPrimitive(indices);
}
private VectorizedRowBatch getVectorizedRandomRowBatch(int seed, int size) {
VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
LongColumnVector lcv = new LongColumnVector(size);
Random rand = new Random(seed);
for (int i = 0; i < size; i++) {
lcv.vector[i] = (rand.nextInt());
}
batch.cols[0] = lcv;
batch.cols[1] = new LongColumnVector(size);
batch.size = size;
return batch;
}
/*
* Input array is used to fill the entire size of the vector row batch
*/
private VectorizedRowBatch getVectorizedRowBatch(int[] inputs, int size) {
VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
LongColumnVector lcv = new LongColumnVector(size);
for (int i = 0; i < size; i++) {
lcv.vector[i] = inputs[i % inputs.length];
}
batch.cols[0] = lcv;
batch.cols[1] = new LongColumnVector(size);
batch.size = size;
return batch;
}
private void compareToUDFYearDate(long t, int y) {
UDFYear udf = new UDFYear();
TimestampWritable tsw = toTimestampWritable(t);
IntWritable res = udf.evaluate(tsw);
Assert.assertEquals(res.get(), y);
}
private void verifyUDFYear(VectorizedRowBatch batch) {
VectorExpression udf = null;
udf = new VectorUDFYearLong(0, 1);
udf.setInputTypes(VectorExpression.Type.DATE);
udf.evaluate(batch);
final int in = 0;
final int out = 1;
for (int i = 0; i < batch.size; i++) {
if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
if (!batch.cols[in].noNulls) {
Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
}
long t = ((LongColumnVector) batch.cols[in]).vector[i];
long y = ((LongColumnVector) batch.cols[out]).vector[i];
compareToUDFYearDate(t, (int) y);
} else {
Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
}
}
}
@Test
public void testVectorUDFYear() {
VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0},
VectorizedRowBatch.DEFAULT_SIZE);
Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
verifyUDFYear(batch);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFYear(batch);
int[] boundaries = getAllBoundaries();
batch = getVectorizedRowBatch(boundaries, boundaries.length);
verifyUDFYear(batch);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFYear(batch);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFYear(batch);
batch = getVectorizedRowBatch(new int[] {0}, 1);
batch.cols[0].isRepeating = true;
verifyUDFYear(batch);
batch.cols[0].noNulls = false;
batch.cols[0].isNull[0] = true;
verifyUDFYear(batch);
batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE);
verifyUDFYear(batch);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFYear(batch);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFYear(batch);
}
private void compareToUDFDayOfMonthDate(long t, int y) {
UDFDayOfMonth udf = new UDFDayOfMonth();
TimestampWritable tsw = toTimestampWritable(t);
IntWritable res = udf.evaluate(tsw);
Assert.assertEquals(res.get(), y);
}
private void verifyUDFDayOfMonth(VectorizedRowBatch batch) {
VectorExpression udf = null;
udf = new VectorUDFDayOfMonthLong(0, 1);
udf.setInputTypes(VectorExpression.Type.DATE);
udf.evaluate(batch);
final int in = 0;
final int out = 1;
for (int i = 0; i < batch.size; i++) {
if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
if (!batch.cols[in].noNulls) {
Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
}
long t = ((LongColumnVector) batch.cols[in]).vector[i];
long y = ((LongColumnVector) batch.cols[out]).vector[i];
compareToUDFDayOfMonthDate(t, (int) y);
} else {
Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
}
}
}
@Test
public void testVectorUDFDayOfMonth() {
VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0},
VectorizedRowBatch.DEFAULT_SIZE);
Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
verifyUDFDayOfMonth(batch);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFDayOfMonth(batch);
int[] boundaries = getAllBoundaries();
batch = getVectorizedRowBatch(boundaries, boundaries.length);
verifyUDFDayOfMonth(batch);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFDayOfMonth(batch);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFDayOfMonth(batch);
batch = getVectorizedRowBatch(new int[] {0}, 1);
batch.cols[0].isRepeating = true;
verifyUDFDayOfMonth(batch);
batch.cols[0].noNulls = false;
batch.cols[0].isNull[0] = true;
verifyUDFDayOfMonth(batch);
batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE);
verifyUDFDayOfMonth(batch);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFDayOfMonth(batch);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFDayOfMonth(batch);
}
private void compareToUDFMonthDate(long t, int y) {
UDFMonth udf = new UDFMonth();
TimestampWritable tsw = toTimestampWritable(t);
IntWritable res = udf.evaluate(tsw);
Assert.assertEquals(res.get(), y);
}
private void verifyUDFMonth(VectorizedRowBatch batch) {
VectorExpression udf;
udf = new VectorUDFMonthLong(0, 1);
udf.setInputTypes(VectorExpression.Type.DATE);
udf.evaluate(batch);
final int in = 0;
final int out = 1;
for (int i = 0; i < batch.size; i++) {
if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
if (!batch.cols[in].noNulls) {
Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
}
long t = ((LongColumnVector) batch.cols[in]).vector[i];
long y = ((LongColumnVector) batch.cols[out]).vector[i];
compareToUDFMonthDate(t, (int) y);
} else {
Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
}
}
}
@Test
public void testVectorUDFMonth() {
VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0},
VectorizedRowBatch.DEFAULT_SIZE);
Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
verifyUDFMonth(batch);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFMonth(batch);
int[] boundaries = getAllBoundaries();
batch = getVectorizedRowBatch(boundaries, boundaries.length);
verifyUDFMonth(batch);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFMonth(batch);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFMonth(batch);
batch = getVectorizedRowBatch(new int[] {0}, 1);
batch.cols[0].isRepeating = true;
verifyUDFMonth(batch);
batch.cols[0].noNulls = false;
batch.cols[0].isNull[0] = true;
verifyUDFMonth(batch);
batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE);
verifyUDFMonth(batch);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFMonth(batch);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFMonth(batch);
}
private LongWritable getLongWritable(TimestampWritable i) {
LongWritable result = new LongWritable();
if (i == null) {
return null;
} else {
result.set(i.getSeconds());
return result;
}
}
private void compareToUDFUnixTimeStampDate(long t, long y) {
TimestampWritable tsw = toTimestampWritable(t);
LongWritable res = getLongWritable(tsw);
if(res.get() != y) {
System.out.printf("%d vs %d for %d, %d\n", res.get(), y, t,
tsw.getTimestamp().getTime()/1000);
}
Assert.assertEquals(res.get(), y);
}
private void verifyUDFUnixTimeStamp(VectorizedRowBatch batch) {
VectorExpression udf;
udf = new VectorUDFUnixTimeStampLong(0, 1);
udf.setInputTypes(VectorExpression.Type.DATE);
udf.evaluate(batch);
final int in = 0;
final int out = 1;
for (int i = 0; i < batch.size; i++) {
if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
if (!batch.cols[out].noNulls) {
Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
}
long t = ((LongColumnVector) batch.cols[in]).vector[i];
long y = ((LongColumnVector) batch.cols[out]).vector[i];
compareToUDFUnixTimeStampDate(t, y);
} else {
Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
}
}
}
@Test
public void testVectorUDFUnixTimeStamp() {
VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0},
VectorizedRowBatch.DEFAULT_SIZE);
Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
verifyUDFUnixTimeStamp(batch);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFUnixTimeStamp(batch);
int[] boundaries = getAllBoundaries();
batch = getVectorizedRowBatch(boundaries, boundaries.length);
verifyUDFUnixTimeStamp(batch);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFUnixTimeStamp(batch);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFUnixTimeStamp(batch);
batch = getVectorizedRowBatch(new int[] {0}, 1);
batch.cols[0].isRepeating = true;
verifyUDFUnixTimeStamp(batch);
batch.cols[0].noNulls = false;
batch.cols[0].isNull[0] = true;
verifyUDFUnixTimeStamp(batch);
batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE);
verifyUDFUnixTimeStamp(batch);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFUnixTimeStamp(batch);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFUnixTimeStamp(batch);
}
private void compareToUDFWeekOfYearDate(long t, int y) {
UDFWeekOfYear udf = new UDFWeekOfYear();
TimestampWritable tsw = toTimestampWritable(t);
IntWritable res = udf.evaluate(tsw);
Assert.assertEquals(res.get(), y);
}
private void verifyUDFWeekOfYear(VectorizedRowBatch batch) {
VectorExpression udf;
udf = new VectorUDFWeekOfYearLong(0, 1);
udf.setInputTypes(VectorExpression.Type.DATE);
udf.evaluate(batch);
final int in = 0;
final int out = 1;
for (int i = 0; i < batch.size; i++) {
if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) {
long t = ((LongColumnVector) batch.cols[in]).vector[i];
long y = ((LongColumnVector) batch.cols[out]).vector[i];
compareToUDFWeekOfYearDate(t, (int) y);
} else {
Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]);
}
}
}
@Test
public void testVectorUDFWeekOfYear() {
VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0},
VectorizedRowBatch.DEFAULT_SIZE);
Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls);
Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating);
verifyUDFWeekOfYear(batch);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFWeekOfYear(batch);
int[] boundaries = getAllBoundaries();
batch = getVectorizedRowBatch(boundaries, boundaries.length);
verifyUDFWeekOfYear(batch);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFWeekOfYear(batch);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFWeekOfYear(batch);
batch = getVectorizedRowBatch(new int[] {0}, 1);
batch.cols[0].isRepeating = true;
verifyUDFWeekOfYear(batch);
batch.cols[0].noNulls = false;
batch.cols[0].isNull[0] = true;
verifyUDFWeekOfYear(batch);
batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE);
verifyUDFWeekOfYear(batch);
TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
verifyUDFWeekOfYear(batch);
TestVectorizedRowBatch.addRandomNulls(batch.cols[1]);
verifyUDFWeekOfYear(batch);
}
public static void main(String[] args) {
TestVectorDateExpressions self = new TestVectorDateExpressions();
self.testVectorUDFYear();
self.testVectorUDFMonth();
self.testVectorUDFDayOfMonth();
self.testVectorUDFWeekOfYear();
self.testVectorUDFUnixTimeStamp();
}
}