/**
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.cdk.data.hbase;
import com.cloudera.cdk.data.DatasetDescriptor;
import com.cloudera.cdk.data.DatasetReader;
import com.cloudera.cdk.data.DatasetWriter;
import com.cloudera.cdk.data.spi.Marker;
import com.cloudera.cdk.data.Key;
import com.cloudera.cdk.data.RandomAccessDataset;
import com.cloudera.cdk.data.hbase.avro.AvroUtils;
import com.cloudera.cdk.data.hbase.avro.entities.ArrayRecord;
import com.cloudera.cdk.data.hbase.avro.entities.EmbeddedRecord;
import com.cloudera.cdk.data.hbase.avro.entities.TestEntity;
import com.cloudera.cdk.data.hbase.avro.entities.TestEnum;
import com.cloudera.cdk.data.hbase.testing.HBaseTestUtils;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.avro.util.Utf8;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
public class HBaseDatasetRepositoryTest {
private static final String testEntity;
private static final String testGenericEntity;
private static final String tableName = "testtable";
private static final String managedTableName = "managed_schemas";
static {
try {
testEntity = AvroUtils.inputStreamToString(HBaseDatasetRepositoryTest.class
.getResourceAsStream("/TestEntity.avsc"));
testGenericEntity = AvroUtils.inputStreamToString(HBaseDatasetRepositoryTest.class
.getResourceAsStream("/TestGenericEntity.avsc"));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
@BeforeClass
public static void beforeClass() throws Exception {
HBaseTestUtils.getMiniCluster();
// managed table should be created by HBaseDatasetRepository
HBaseTestUtils.util.deleteTable(Bytes.toBytes(managedTableName));
}
@AfterClass
public static void afterClass() throws Exception {
HBaseTestUtils.util.deleteTable(Bytes.toBytes(tableName));
}
@After
public void after() throws Exception {
HBaseTestUtils.util.truncateTable(Bytes.toBytes(tableName));
HBaseTestUtils.util.truncateTable(Bytes.toBytes(managedTableName));
}
@Test
@SuppressWarnings("unchecked")
public void testGeneric() throws Exception {
HBaseDatasetRepository repo = new HBaseDatasetRepository.Builder()
.configuration(HBaseTestUtils.getConf()).build();
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schemaLiteral(testGenericEntity)
.build();
DaoDataset<GenericRecord> ds = (DaoDataset) repo.create(tableName, descriptor);
// Create the new entities
ds.put(createGenericEntity(0));
ds.put(createGenericEntity(1));
DatasetWriter<GenericRecord> writer = ds.newWriter();
assertFalse("Writer should not be open before calling open", writer.isOpen());
writer.open();
assertTrue("Writer should be open after calling open", writer.isOpen());
try {
for (int i = 2; i < 10; ++i) {
GenericRecord entity = createGenericEntity(i);
writer.write(entity);
}
} finally {
writer.close();
assertFalse("Writer should be closed after calling close", writer.isOpen());
}
// reload
ds = (DaoDataset) repo.load(tableName);
// ensure the new entities are what we expect with get operations
for (int i = 0; i < 10; ++i) {
String iStr = Long.toString(i);
Key key = new Key.Builder(ds)
.add("part1", "part1_" + iStr)
.add("part2", "part2_" + iStr).build();
compareEntitiesWithUtf8(i, ds.get(key));
}
// ensure the new entities are what we expect with scan operations
int cnt = 0;
DatasetReader<GenericRecord> reader = ds.newReader();
assertFalse("Reader should not be open before calling open", reader.isOpen());
reader.open();
assertTrue("Reader should be open after calling open", reader.isOpen());
try {
for (GenericRecord entity : reader) {
compareEntitiesWithUtf8(cnt, entity);
cnt++;
}
assertEquals(10, cnt);
} finally {
reader.close();
assertFalse("Reader should be closed after calling close", reader.isOpen());
}
// test a partial scan
cnt = 3;
reader = new DaoView<GenericRecord>(ds)
.from(new Marker.Builder().add("part1", "part1_3").add("part2", "part2_3").build())
.to(new Marker.Builder().add("part1", "part1_7").add("part2", "part2_7").build())
.newReader();
reader.open();
try {
for (GenericRecord entity : reader) {
compareEntitiesWithUtf8(cnt, entity);
cnt++;
}
assertEquals(8, cnt);
} finally {
reader.close();
}
Key key = new Key.Builder(ds)
.add("part1", "part1_5")
.add("part2", "part2_5").build();
// test delete
ds.delete(key);
GenericRecord deletedRecord = ds.get(key);
assertNull(deletedRecord);
}
@Test
public void testSpecific() throws Exception {
HBaseDatasetRepository repo = new HBaseDatasetRepository.Builder()
.configuration(HBaseTestUtils.getConf()).build();
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schemaLiteral(testEntity)
.build();
RandomAccessDataset<TestEntity> ds = repo.create(tableName, descriptor);
// Create the new entities
ds.put(createSpecificEntity(0));
ds.put(createSpecificEntity(1));
DatasetWriter<TestEntity> writer = ds.newWriter();
writer.open();
try {
for (int i = 2; i < 10; ++i) {
TestEntity entity = createSpecificEntity(i);
writer.write(entity);
}
} finally {
writer.close();
}
// ensure the new entities are what we expect with get operations
for (int i = 0; i < 10; ++i) {
String iStr = Long.toString(i);
Key key = new Key.Builder(ds)
.add("part1", "part1_" + iStr)
.add("part2", "part2_" + iStr).build();
compareEntitiesWithString(i, ds.get(key));
}
// ensure the new entities are what we expect with scan operations
int cnt = 0;
DatasetReader<TestEntity> reader = ds.newReader();
reader.open();
try {
for (TestEntity entity : reader) {
compareEntitiesWithString(cnt, entity);
cnt++;
}
assertEquals(10, cnt);
} finally {
reader.close();
}
Key key = new Key.Builder(ds)
.add("part1", "part1_5")
.add("part2", "part2_5").build();
// test delete
ds.delete(key);
TestEntity deletedRecord = ds.get(key);
assertNull(deletedRecord);
}
@Test
public void testDeleteDataset() throws Exception {
HBaseDatasetRepository repo = new HBaseDatasetRepository.Builder()
.configuration(HBaseTestUtils.getConf()).build();
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schemaLiteral(testGenericEntity)
.build();
RandomAccessDataset<GenericRecord> ds = repo.create(tableName, descriptor);
// Create a new entity
ds.put(createGenericEntity(0));
// Retrieve the entity
String iStr = Long.toString(0);
Key key = new Key.Builder(ds)
.add("part1", "part1_" + iStr)
.add("part2", "part2_" + iStr).build();
compareEntitiesWithUtf8(0, ds.get(key));
// delete dataset
boolean success = repo.delete(tableName);
assertTrue("table should have been successfully deleted", success);
// check that tables have no rows
assertEquals(0, HBaseTestUtils.util.countRows(new HTable(HBaseTestUtils.getConf(), managedTableName)));
assertEquals(0, HBaseTestUtils.util.countRows(new HTable(HBaseTestUtils.getConf(), tableName)));
// create the dataset again
ds = repo.create(tableName, descriptor);
// Create a new entity
ds.put(createGenericEntity(0));
// Retrieve the entity
compareEntitiesWithUtf8(0, ds.get(key));
}
// TODO: remove duplication from ManagedDaoTest
public static GenericRecord createGenericEntity(long uniqueIdx) {
return createGenericEntity(uniqueIdx, testGenericEntity);
}
/**
* Creates a generic entity with the given schema, which must be migratable
* from TestRecord. Data from TestRecord is filled in.
*
* @param uniqueIdx
* @param schemaString
* @return
*/
private static GenericRecord createGenericEntity(long uniqueIdx, String schemaString) {
String iStr = Long.toString(uniqueIdx);
Schema.Parser parser = new Schema.Parser();
Schema entitySchema = parser.parse(schemaString);
Schema embeddedSchema = entitySchema.getField("field4").schema();
Schema arrayValueSchema = entitySchema.getField("field5").schema()
.getElementType();
GenericRecord entity = new GenericData.Record(entitySchema);
entity.put("part1", "part1_" + iStr);
entity.put("part2", "part2_" + iStr);
entity.put("field1", "field1_" + iStr);
entity.put("field2", "field2_" + iStr);
entity.put("enum", "ENUM3");
Map<CharSequence, CharSequence> field3Map = new HashMap<CharSequence, CharSequence>();
field3Map.put("field3_key_1_" + iStr, "field3_value_1_" + iStr);
field3Map.put("field3_key_2_" + iStr, "field3_value_2_" + iStr);
entity.put("field3", field3Map);
GenericRecord embedded = new GenericData.Record(embeddedSchema);
embedded.put("embeddedField1", "embedded1");
embedded.put("embeddedField2", 2L);
entity.put("field4", embedded);
List<GenericRecord> arrayRecordList = new ArrayList<GenericRecord>();
GenericRecord subRecord = new GenericData.Record(arrayValueSchema);
subRecord.put("subfield1", "subfield1");
subRecord.put("subfield2", 1L);
subRecord.put("subfield3", "subfield3");
arrayRecordList.add(subRecord);
subRecord = new GenericData.Record(arrayValueSchema);
subRecord.put("subfield1", "subfield4");
subRecord.put("subfield2", 1L);
subRecord.put("subfield3", "subfield6");
arrayRecordList.add(subRecord);
entity.put("field5", arrayRecordList);
return entity;
}
@SuppressWarnings("unchecked")
public static void compareEntitiesWithUtf8(long uniqueIdx, IndexedRecord record) {
String iStr = Long.toString(uniqueIdx);
assertEquals(new String("part1_" + iStr), record.get(0).toString()); // TODO: check type
assertEquals(new String("part2_" + iStr), record.get(1).toString()); // TODO: check type
assertEquals(new Utf8("field1_" + iStr), record.get(2));
assertEquals(new Utf8("field2_" + iStr), record.get(3));
assertEquals(TestEnum.ENUM3.toString(), record.get(4).toString());
assertEquals(new Utf8("field3_value_1_" + iStr),
((Map<CharSequence, CharSequence>) record.get(5)).get(new Utf8(
"field3_key_1_" + iStr)));
assertEquals(new Utf8("field3_value_2_" + iStr),
((Map<CharSequence, CharSequence>) record.get(5)).get(new Utf8(
"field3_key_2_" + iStr)));
assertEquals(new Utf8("embedded1"), ((IndexedRecord) record.get(6)).get(0));
assertEquals(2L, ((IndexedRecord) record.get(6)).get(1));
assertEquals(2, ((List<?>) record.get(7)).size());
assertEquals(new Utf8("subfield1"),
((IndexedRecord) ((List<?>) record.get(7)).get(0)).get(0));
assertEquals(1L, ((IndexedRecord) ((List<?>) record.get(7)).get(0)).get(1));
assertEquals(new Utf8("subfield3"),
((IndexedRecord) ((List<?>) record.get(7)).get(0)).get(2));
assertEquals(new Utf8("subfield4"),
((IndexedRecord) ((List<?>) record.get(7)).get(1)).get(0));
assertEquals(1L, ((IndexedRecord) ((List<?>) record.get(7)).get(1)).get(1));
assertEquals(new Utf8("subfield6"),
((IndexedRecord) ((List<?>) record.get(7)).get(1)).get(2));
}
@SuppressWarnings("unchecked")
private void compareEntitiesWithString(long uniqueIdx, IndexedRecord record) {
String iStr = Long.toString(uniqueIdx);
assertEquals(new String("part1_" + iStr), record.get(0).toString()); // TODO: check type
assertEquals(new String("part2_" + iStr), record.get(1).toString()); // TODO: check type
assertEquals("field1_" + iStr, record.get(2));
assertEquals("field2_" + iStr, record.get(3));
assertEquals(TestEnum.ENUM3.toString(), record.get(4).toString());
assertEquals(
"field3_value_1_" + iStr,
((Map<CharSequence, CharSequence>) record.get(5)).get("field3_key_1_"
+ iStr));
assertEquals(
"field3_value_2_" + iStr,
((Map<CharSequence, CharSequence>) record.get(5)).get("field3_key_2_"
+ iStr));
assertEquals("embedded1", ((IndexedRecord) record.get(6)).get(0));
assertEquals(2L, ((IndexedRecord) record.get(6)).get(1));
assertEquals(2, ((List<?>) record.get(7)).size());
assertEquals("subfield1",
((IndexedRecord) ((List<?>) record.get(7)).get(0)).get(0));
assertEquals(1L, ((IndexedRecord) ((List<?>) record.get(7)).get(0)).get(1));
assertEquals("subfield3",
((IndexedRecord) ((List<?>) record.get(7)).get(0)).get(2));
assertEquals("subfield4",
((IndexedRecord) ((List<?>) record.get(7)).get(1)).get(0));
assertEquals(1L, ((IndexedRecord) ((List<?>) record.get(7)).get(1)).get(1));
assertEquals("subfield6",
((IndexedRecord) ((List<?>) record.get(7)).get(1)).get(2));
}
private TestEntity createSpecificEntity(long uniqueIdx) {
String iStr = Long.toString(uniqueIdx);
Map<String, String> field3Map = new HashMap<String, String>();
field3Map.put("field3_key_1_" + iStr, "field3_value_1_" + iStr);
field3Map.put("field3_key_2_" + iStr, "field3_value_2_" + iStr);
EmbeddedRecord embeddedRecord = EmbeddedRecord.newBuilder()
.setEmbeddedField1("embedded1").setEmbeddedField2(2L).build();
List<ArrayRecord> arrayRecordList = new ArrayList<ArrayRecord>(2);
ArrayRecord subRecord = ArrayRecord.newBuilder().setSubfield1("subfield1")
.setSubfield2(1L).setSubfield3("subfield3").build();
arrayRecordList.add(subRecord);
subRecord = ArrayRecord.newBuilder().setSubfield1("subfield4")
.setSubfield2(1L).setSubfield3("subfield6").build();
arrayRecordList.add(subRecord);
TestEntity entity = TestEntity.newBuilder()
.setPart1("part1_" + iStr).setPart2("part2_" + iStr)
.setField1("field1_" + iStr)
.setField2("field2_" + iStr).setEnum$(TestEnum.ENUM3)
.setField3(field3Map).setField4(embeddedRecord)
.setField5(arrayRecordList).build();
return entity;
}
}