@Test
public void testSubstrStartLen() throws UnsupportedEncodingException {
// Testing no nulls and no repeating
VectorizedRowBatch batch = new VectorizedRowBatch(2);
BytesColumnVector v = new BytesColumnVector();
batch.cols[0] = v;
BytesColumnVector outV = new BytesColumnVector();
batch.cols[1] = outV;
byte[] data1 = "abcd string".getBytes("UTF-8");
byte[] data2 = "efgh string".getBytes("UTF-8");
byte[] data3 = "efgh".getBytes("UTF-8");
batch.size = 3;
v.noNulls = true;
v.setRef(0, data1, 0, data1.length);
v.isNull[0] = false;
v.setRef(1, data2, 0, data2.length);
v.isNull[1] = false;
v.setRef(2, data3, 0, data3.length);
v.isNull[2] = false;
outV.isRepeating = true;
outV.noNulls = false;
StringSubstrColStartLen expr = new StringSubstrColStartLen(0, 6, 6, 1);
expr.evaluate(batch);
BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
Assert.assertEquals(3, batch.size);
Assert.assertTrue(outCol.noNulls);
Assert.assertFalse(outCol.isRepeating);
byte[] expected = "string".getBytes("UTF-8");
Assert.assertEquals(0,
StringExpr.compare(
expected, 0, expected.length, outCol.vector[0], outCol.start[0], outCol.length[0]
)
);
Assert.assertEquals(0,
StringExpr.compare(
expected, 0, expected.length, outCol.vector[1], outCol.start[1], outCol.length[1]
)
);
Assert.assertEquals(0,
StringExpr.compare(
emptyString, 0, emptyString.length, outCol.vector[2], outCol.start[2], outCol.length[2]
)
);
// Testing negative substring index
outV.isRepeating = true;
outV.noNulls = false;
expr = new StringSubstrColStartLen(0, -6, 6, 1);
expr.evaluate(batch);
outCol = (BytesColumnVector) batch.cols[1];
Assert.assertTrue(outCol.noNulls);
Assert.assertFalse(outCol.isRepeating);
Assert.assertEquals(3, batch.size);
Assert.assertEquals(0,
StringExpr.compare(
expected, 0, expected.length, outCol.vector[0], outCol.start[0], outCol.length[0]
)
);
Assert.assertEquals(0,
StringExpr.compare(
expected, 0, expected.length, outCol.vector[1], outCol.start[1], outCol.length[1]
)
);
// This yields empty because starting index is out of bounds
Assert.assertEquals(0,
StringExpr.compare(
emptyString, 0, emptyString.length, outCol.vector[2], outCol.start[2], outCol.length[2]
)
);
//Testing substring index starting with 1 and zero length
outV.isRepeating = true;
outV.noNulls = false;
expr = new StringSubstrColStartLen(0, 1, 0, 1);
outCol = (BytesColumnVector) batch.cols[1];
expr.evaluate(batch);
Assert.assertEquals(3, batch.size);
Assert.assertTrue(outCol.noNulls);
Assert.assertFalse(outCol.isRepeating);
Assert.assertEquals(0,
StringExpr.compare(
data1, 1, 0, outCol.vector[0], outCol.start[0], outCol.length[0]
)
);
Assert.assertEquals(0,
StringExpr.compare(
data2, 1, 0, outCol.vector[1], outCol.start[1], outCol.length[1]
)
);
Assert.assertEquals(0,
StringExpr.compare(
data3, 1, 0, outCol.vector[2], outCol.start[2], outCol.length[2]
)
);
//Testing substring index starting with 0 and length equal to array length
outV.isRepeating = true;
outV.noNulls = false;
expr = new StringSubstrColStartLen(0, 0, 11, 1);
outCol = (BytesColumnVector) batch.cols[1];
expr.evaluate(batch);
Assert.assertEquals(3, batch.size);
Assert.assertTrue(outCol.noNulls);
Assert.assertFalse(outCol.isRepeating);
Assert.assertEquals(0,
StringExpr.compare(
data1, 0, data1.length, outCol.vector[0], outCol.start[0], outCol.length[0]
)
);
Assert.assertEquals(0,
StringExpr.compare(
data2, 0, data2.length, outCol.vector[1], outCol.start[1], outCol.length[1]
)
);
Assert.assertEquals(0,
StringExpr.compare(
data3, 0, data3.length, outCol.vector[2], outCol.start[2], outCol.length[2]
)
);
// Testing setting length larger than array length, which should cap to the length itself
outV.isRepeating = true;
outV.noNulls = false;
expr = new StringSubstrColStartLen(0, 6, 10, 1);
expr.evaluate(batch);
outCol = (BytesColumnVector) batch.cols[1];
Assert.assertEquals(3, batch.size);
Assert.assertTrue(outCol.noNulls);
Assert.assertFalse(outCol.isRepeating);
Assert.assertEquals(0,
StringExpr.compare(
expected, 0, expected.length, outCol.vector[0], outCol.start[0], outCol.length[0]
)
);
Assert.assertEquals(0,
StringExpr.compare(
expected, 0, expected.length, outCol.vector[1], outCol.start[1], outCol.length[1]
)
);
Assert.assertEquals(0,
StringExpr.compare(
emptyString, 0, emptyString.length, outCol.vector[2], outCol.start[2], outCol.length[2]
)
);
outV.isRepeating = true;
outV.noNulls = true;
// Testing with nulls
v.noNulls = false;
v.isNull[0] = true;
expr.evaluate(batch);
Assert.assertEquals(3, batch.size);
Assert.assertFalse(outV.noNulls);
Assert.assertTrue(outV.isNull[0]);
Assert.assertFalse(outCol.isRepeating);
Assert.assertEquals(0,
StringExpr.compare(
expected, 0, expected.length, outCol.vector[1], outCol.start[1], outCol.length[1]
)
);
Assert.assertEquals(0,
StringExpr.compare(
emptyString, 0, emptyString.length, outCol.vector[2], outCol.start[2], outCol.length[2]
)
);
// Testing with repeating and no nulls
outV = new BytesColumnVector();
v = new BytesColumnVector();
outV.isRepeating = false;
outV.noNulls = true;
v.isRepeating = true;
v.noNulls = false;
v.setRef(0, data1, 0, data1.length);
batch = new VectorizedRowBatch(2);
batch.cols[0] = v;
batch.cols[1] = outV;
expr.evaluate(batch);
outCol = (BytesColumnVector) batch.cols[1];
Assert.assertTrue(outCol.noNulls);
Assert.assertTrue(outCol.isRepeating);
Assert.assertEquals(0,
StringExpr.compare(
expected, 0, expected.length, outCol.vector[0], outCol.start[0], outCol.length[0]
)
);
// Testing with multiByte String
v = new BytesColumnVector();
v.isRepeating = false;
v.noNulls = true;
batch.size = 1;
v.setRef(0, multiByte, 0, 10);
batch.cols[0] = v;
batch.cols[1] = outV;
outV.isRepeating = true;
outV.noNulls = false;
expr = new StringSubstrColStartLen(0, 3, 2, 1);
expr.evaluate(batch);
Assert.assertEquals(1, batch.size);
Assert.assertFalse(outV.isRepeating);
Assert.assertTrue(outV.noNulls);
Assert.assertEquals(0,
StringExpr.compare(
// 3rd char starts at index 3, and with length 2 it is covering the rest of the array.
multiByte, 3, 10 - 3, outCol.vector[0], outCol.start[0], outCol.length[0]
)
);
// Testing multiByte string with reference set to mid array
v = new BytesColumnVector();
v.isRepeating = false;
v.noNulls = true;
outV = new BytesColumnVector();
batch.size = 1;
v.setRef(0, multiByte, 3, 7);
batch.cols[0] = v;
batch.cols[1] = outV;
outV.isRepeating = true;