/**
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.data.hbase.avro;
import org.kitesdk.data.DatasetException;
import org.kitesdk.data.spi.PartitionKey;
import org.kitesdk.data.hbase.avro.entities.ArrayRecord;
import org.kitesdk.data.hbase.avro.entities.EmbeddedRecord;
import org.kitesdk.data.hbase.avro.entities.TestEnum;
import org.kitesdk.data.hbase.avro.entities.TestIncrement;
import org.kitesdk.data.hbase.avro.entities.TestRecord;
import org.kitesdk.data.hbase.impl.Dao;
import org.kitesdk.data.hbase.impl.EntityBatch;
import org.kitesdk.data.hbase.impl.EntityScanner;
import org.kitesdk.data.hbase.testing.HBaseTestUtils;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.hbase.client.HTablePool;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
public class AvroDaoTest {
private static final String schemaString;
private static final String incrementSchemaString;
private static final String tableName = "test_table";
private static final String incrementTableName = "test_increment_table";
private HTablePool tablePool;
static {
try {
schemaString = AvroUtils.inputStreamToString(AvroDaoTest.class
.getResourceAsStream("/TestRecord.avsc"));
incrementSchemaString = AvroUtils.inputStreamToString(AvroDaoTest.class
.getResourceAsStream("/TestIncrement.avsc"));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
@BeforeClass
public static void beforeClass() throws Exception {
HBaseTestUtils.getMiniCluster();
byte[] tableNameBytes = Bytes.toBytes(tableName);
byte[] incrementTableNameBytes = Bytes.toBytes(incrementTableName);
byte[][] cfNames = { Bytes.toBytes("meta"), Bytes.toBytes("string"),
Bytes.toBytes("embedded"), Bytes.toBytes("_s") };
HBaseTestUtils.util.createTable(tableNameBytes, cfNames);
HBaseTestUtils.util.createTable(incrementTableNameBytes, cfNames);
}
@AfterClass
public static void afterClass() throws Exception {
HBaseTestUtils.util.deleteTable(Bytes.toBytes(tableName));
HBaseTestUtils.util.deleteTable(Bytes.toBytes(incrementTableName));
}
@Before
public void beforeTest() throws Exception {
HBaseTestUtils.util.truncateTable(Bytes.toBytes(tableName));
tablePool = new HTablePool(HBaseTestUtils.getConf(), 10);
}
@After
public void afterTest() {
try {
tablePool.close();
} catch (Exception e) {
}
}
@Test
public void testGeneric() throws Exception {
Dao<GenericRecord> dao = new GenericAvroDao(tablePool, tableName,
schemaString);
for (int i = 0; i < 10; ++i) {
@SuppressWarnings("deprecation")
GenericRecord entity = new GenericData.Record(Schema.parse(schemaString));
entity.put("keyPart1", "part1_" + i);
entity.put("keyPart2", "part2_" + i);
entity.put("field1", "field1_" + i);
entity.put("field2", "field2_" + i);
dao.put(entity);
}
for (int i = 0; i < 10; ++i) {
PartitionKey key = new PartitionKey(
"part1_" + Integer.toString(i), "part2_" + Integer.toString(i));
GenericRecord genericRecord = dao.get(key);
assertEquals("field1_" + i, genericRecord.get("field1").toString());
assertEquals("field2_" + i, genericRecord.get("field2").toString());
}
int cnt = 0;
EntityScanner<GenericRecord> entityScanner = dao.getScanner();
entityScanner.initialize();
try {
for (GenericRecord entity : entityScanner) {
assertEquals("field1_" + cnt, entity.get("field1").toString());
assertEquals("field2_" + cnt, entity.get("field2").toString());
cnt++;
}
assertEquals(10, cnt);
} finally {
entityScanner.close();
}
cnt = 5;
PartitionKey startKey = new PartitionKey("part1_5");
entityScanner = dao.getScanner(startKey, null);
entityScanner.initialize();
try {
for (GenericRecord entity : entityScanner) {
assertEquals("field1_" + cnt, entity.get("field1").toString());
assertEquals("field2_" + cnt, entity.get("field2").toString());
cnt++;
}
assertEquals(10, cnt);
} finally {
if (entityScanner != null) {
entityScanner.close();
}
}
PartitionKey key = new PartitionKey("part1_5", "part2_5");
dao.delete(key);
GenericRecord deletedRecord = dao.get(key);
assertNull(deletedRecord);
}
@Test
public void testSpecific() throws Exception {
Dao<TestRecord> dao = new SpecificAvroDao<TestRecord>(tablePool,
tableName, schemaString, TestRecord.class);
for (TestRecord testRecord : this.createSpecificEntities(10)) {
assertTrue(dao.put(testRecord));
}
for (int i = 0; i < 10; ++i) {
PartitionKey partitionKey = new PartitionKey("part1_" + i, "part2_" + i);
TestRecord record = dao.get(partitionKey);
assertEquals("field1_" + i, record.getField1());
assertEquals("field2_" + i, record.getField2());
assertEquals(TestEnum.ENUM3, record.getEnum$());
assertEquals("field3_value_1_" + i,
record.getField3().get("field3_key_1_" + i));
assertEquals("field3_value_2_" + i,
record.getField3().get("field3_key_2_" + i));
assertEquals("embedded1_" + i, record.getField4().getEmbeddedField1());
assertEquals(i, (long) record.getField4().getEmbeddedField2());
assertEquals(2, record.getField5().size());
// check 1st subrecord
assertEquals("subfield1_" + i, record.getField5().get(0).getSubfield1());
assertEquals(i, (long) record.getField5().get(0).getSubfield2());
assertEquals("subfield3_" + i, record.getField5().get(0).getSubfield3());
assertEquals("subfield4_" + i, record.getField5().get(1).getSubfield1());
assertEquals(i, (long) record.getField5().get(1).getSubfield2());
assertEquals("subfield6_" + i, record.getField5().get(1).getSubfield3());
}
int cnt = 0;
EntityScanner<TestRecord> entityScanner = dao.getScanner();
entityScanner.initialize();
try {
for (TestRecord entity : entityScanner) {
assertEquals("field1_" + cnt, entity.getField1());
assertEquals("field2_" + cnt, entity.getField2());
cnt++;
}
assertEquals(10, cnt);
} finally {
entityScanner.close();
}
// Test scanner with null keys
PartitionKey key1 = new PartitionKey("part1_5");
entityScanner = dao.getScanner(key1, null);
entityScanner.initialize();
assertEquals("field1_5", entityScanner.iterator().next().getField1());
entityScanner = dao.getScanner(null, key1);
entityScanner.initialize();
assertEquals("field1_0", entityScanner.iterator().next().getField1());
PartitionKey deleteKey = new PartitionKey("part1_5", "part2_5");
dao.delete(deleteKey);
assertNull(dao.get(deleteKey));
}
@Test
public void testIncrement() {
Dao<TestIncrement> dao = new SpecificAvroDao<TestIncrement>(tablePool,
incrementTableName, incrementSchemaString,
TestIncrement.class);
TestIncrement entity = TestIncrement.newBuilder().setKeyPart1("part1")
.setKeyPart2("part2").setField1(10).build();
assertTrue(dao.put(entity));
PartitionKey key = new PartitionKey("part1", "part2");
long incrementResult = dao.increment(key, "field1", 5);
assertEquals(15L, incrementResult);
assertEquals(15L, (long) dao.get(key).getField1());
}
@Test
public void testConflict() throws Exception {
Dao<TestRecord> dao = new SpecificAvroDao<TestRecord>(tablePool,
tableName, schemaString, TestRecord.class);
// create key and entity, and do a put
TestRecord entity = createSpecificEntity("part1", "part2");
assertTrue(dao.put(entity));
// now fetch the entity twice. Change one, and do a put. Change the other,
// and the second put should fail.
PartitionKey key = new PartitionKey("part1", "part2");
TestRecord recordRef1 = TestRecord.newBuilder(dao.get(key))
.setField1("part1_1").build();
TestRecord recordRef2 = TestRecord.newBuilder(dao.get(key))
.setField1("part1_2").build();
assertTrue(dao.put(recordRef1));
assertFalse(dao.put(recordRef2));
// Now get the latest version, change it, and put should succeed.
recordRef2 = dao.get(key);
assertEquals("part1_1", recordRef2.getField1());
recordRef2 = TestRecord.newBuilder(recordRef2).setField1("part1_2").build();
assertTrue(dao.put(recordRef2));
// validate the most recent values.
TestRecord finalRecord = dao.get(key);
assertEquals("part1_2", finalRecord.getField1());
// if we put a new entity, there should be a conflict
assertFalse(dao.put(entity));
}
@Test
public void testEmptyCollections() throws Exception {
Dao<TestRecord> dao = new SpecificAvroDao<TestRecord>(tablePool,
tableName, schemaString, TestRecord.class);
Map<String, String> field3Map = new HashMap<String, String>();
EmbeddedRecord embeddedRecord = EmbeddedRecord.newBuilder()
.setEmbeddedField1("embedded1").setEmbeddedField2(2).build();
TestRecord entity = TestRecord.newBuilder().setKeyPart1("part1")
.setKeyPart2("part2").setField1("field1").setField2("field2")
.setEnum$(TestEnum.ENUM3).setField3(field3Map)
.setField4(embeddedRecord).setField5(new ArrayList<ArrayRecord>())
.build();
assertTrue(dao.put(entity));
PartitionKey key = new PartitionKey("part1", "part2");
TestRecord record = dao.get(key);
assertEquals("field1", record.getField1());
assertEquals("field2", record.getField2());
assertEquals(TestEnum.ENUM3, record.getEnum$());
assertEquals(0, record.getField3().size());
assertEquals("embedded1", record.getField4().getEmbeddedField1());
assertEquals(2L, (long) record.getField4().getEmbeddedField2());
assertEquals(0, record.getField5().size());
}
/*
* Regression test for the deleteColumn vs deleteColumns issue
*/
@Test
public void testDeleteAfterMultiplePuts() throws Exception {
Dao<TestRecord> dao = new SpecificAvroDao<TestRecord>(tablePool,
tableName, schemaString, TestRecord.class);
for (int i = 0; i < 10; ++i) {
TestRecord entity = createSpecificEntity("part1_" + i, "part2_" + i);
assertTrue(dao.put(entity));
}
// get and put it a couple of times to build up versions
PartitionKey key = new PartitionKey("part1_5", "part2_5");
TestRecord entity = dao.get(key);
dao.put(entity);
entity = dao.get(key);
dao.put(entity);
// now make sure the dao removes all versions of all columns
dao.delete(key);
TestRecord deletedRecord = dao.get(key);
assertNull(deletedRecord);
}
@Test
public void testBatchPutOperation() throws Exception {
Dao<TestRecord> dao = new SpecificAvroDao<TestRecord>(tablePool, tableName,
schemaString, TestRecord.class);
EntityBatch<TestRecord> batch = dao.newBatch();
batch.initialize();
for (TestRecord entity : createSpecificEntities(100)) {
batch.put(entity);
}
batch.close();
for (int i = 0; i < 100; i++) {
PartitionKey key = new PartitionKey("part1_" + i, "part2_" + i);
TestRecord record = dao.get(key);
assertEquals("field1_" + i, record.getField1());
}
}
@Test(expected = DatasetException.class)
public void testPutWithNullKey() throws Exception {
Dao<GenericRecord> dao = new GenericAvroDao(tablePool, tableName,
schemaString);
@SuppressWarnings("deprecation")
GenericRecord entity = new GenericData.Record(Schema.parse(schemaString));
entity.put("keyPart1", "part1");
entity.put("keyPart2", null);
entity.put("field1", "field1");
entity.put("field2", "field2");
dao.put(entity);
}
private TestRecord createSpecificEntity(String keyPart1, String keyPart2) {
Map<String, String> field3Map = new HashMap<String, String>();
field3Map.put("field3_key_1", "field3_value_1");
field3Map.put("field3_key_2", "field3_value_2");
EmbeddedRecord embeddedRecord = EmbeddedRecord.newBuilder()
.setEmbeddedField1("embedded1").setEmbeddedField2(2).build();
List<ArrayRecord> arrayRecordList = new ArrayList<ArrayRecord>(2);
ArrayRecord subRecord = ArrayRecord.newBuilder().setSubfield1("subfield1")
.setSubfield2(1L).setSubfield3("subfield3").build();
arrayRecordList.add(subRecord);
subRecord = ArrayRecord.newBuilder().setSubfield1("subfield4")
.setSubfield2(1L).setSubfield3("subfield6").build();
arrayRecordList.add(subRecord);
TestRecord entity = TestRecord.newBuilder().setKeyPart1(keyPart1)
.setKeyPart2(keyPart2).setField1("field1").setField2("field2")
.setEnum$(TestEnum.ENUM3).setField3(field3Map)
.setField4(embeddedRecord).setField5(arrayRecordList).build();
return entity;
}
private List<TestRecord> createSpecificEntities(int cnt) {
List<TestRecord> entities = new ArrayList<TestRecord>();
for (int i = 0; i < cnt; i++) {
Map<String, String> field3Map = new HashMap<String, String>();
field3Map.put("field3_key_1_" + i, "field3_value_1_" + i);
field3Map.put("field3_key_2_" + i, "field3_value_2_" + i);
EmbeddedRecord embeddedRecord = EmbeddedRecord.newBuilder()
.setEmbeddedField1("embedded1_" + i).setEmbeddedField2(i).build();
List<ArrayRecord> arrayRecordList = new ArrayList<ArrayRecord>(2);
arrayRecordList.add(ArrayRecord.newBuilder()
.setSubfield1("subfield1_" + i).setSubfield2(i)
.setSubfield3("subfield3_" + i).build());
arrayRecordList.add(ArrayRecord.newBuilder()
.setSubfield1("subfield4_" + i).setSubfield2(i)
.setSubfield3("subfield6_" + i).build());
TestRecord entity = TestRecord.newBuilder().setKeyPart1("part1_" + i)
.setKeyPart2("part2_" + i).setField1("field1_" + i)
.setField2("field2_" + i).setEnum$(TestEnum.ENUM3)
.setField3(field3Map).setField4(embeddedRecord)
.setField5(arrayRecordList).build();
entities.add(entity);
}
return entities;
}
}