/*
* Copyright 2013 NGDATA nv
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ngdata.hbaseindexer.mr;
import com.google.common.base.Charsets;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.io.Resources;
import com.ngdata.hbaseindexer.conf.DefaultIndexerComponentFactory;
import com.ngdata.hbaseindexer.model.api.IndexerDefinition;
import com.ngdata.hbaseindexer.model.api.IndexerDefinitionBuilder;
import com.ngdata.hbaseindexer.model.impl.IndexerModelImpl;
import com.ngdata.hbaseindexer.util.net.NetUtils;
import com.ngdata.hbaseindexer.util.solr.SolrTestingUtility;
import com.ngdata.sep.util.io.Closer;
import com.ngdata.sep.util.zookeeper.ZkUtil;
import com.ngdata.sep.util.zookeeper.ZooKeeperItf;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class HBaseMapReduceIndexerToolDirectWriteTest {
private static final byte[] TEST_TABLE_NAME = Bytes.toBytes("record");
private static final byte[] TEST_COLFAM_NAME = Bytes.toBytes("info");
private static final HBaseTestingUtility HBASE_TEST_UTILITY = HBaseTestingUtilityFactory.createTestUtility();
private static MRTestUtil MR_TEST_UTIL;
private static SolrTestingUtility SOLR_TEST_UTILITY;
private static CloudSolrServer COLLECTION1;
private static CloudSolrServer COLLECTION2;
private static HBaseAdmin HBASE_ADMIN;
private static String SOLR_ZK;
private static String INDEXER_ZK;
private static IndexerModelImpl INDEXER_MODEL;
private HTable recordTable;
private Configuration indexerToolConf;
@BeforeClass
public static void setupBeforeClass() throws Exception {
MR_TEST_UTIL = new MRTestUtil(HBASE_TEST_UTILITY);
HBASE_TEST_UTILITY.startMiniCluster();
MR_TEST_UTIL.startMrCluster();
int zkClientPort = HBASE_TEST_UTILITY.getZkCluster().getClientPort();
SOLR_TEST_UTILITY = new SolrTestingUtility(zkClientPort, NetUtils.getFreePort());
SOLR_TEST_UTILITY.start();
SOLR_TEST_UTILITY.uploadConfig("config1",
Resources.toByteArray(Resources.getResource(HBaseMapReduceIndexerToolDirectWriteTest.class, "schema.xml")),
Resources.toByteArray(Resources.getResource(HBaseMapReduceIndexerToolDirectWriteTest.class, "solrconfig.xml")));
SOLR_TEST_UTILITY.createCore("collection1_core1", "collection1", "config1", 1);
SOLR_TEST_UTILITY.createCore("collection2_core1", "collection2", "config1", 1);
COLLECTION1 = new CloudSolrServer(SOLR_TEST_UTILITY.getZkConnectString());
COLLECTION1.setDefaultCollection("collection1");
COLLECTION2 = new CloudSolrServer(SOLR_TEST_UTILITY.getZkConnectString());
COLLECTION2.setDefaultCollection("collection2");
SOLR_ZK = "127.0.0.1:" + zkClientPort + "/solr";
INDEXER_ZK = "localhost:" + zkClientPort;
ZooKeeperItf zkItf = ZkUtil.connect(INDEXER_ZK, 15000);
INDEXER_MODEL = new IndexerModelImpl(zkItf, "/ngdata/hbaseindexer");
IndexerDefinition indexerDef = new IndexerDefinitionBuilder()
.name("zkindexerdef")
.indexerComponentFactory(DefaultIndexerComponentFactory.class.getName())
.configuration(Resources.toByteArray(Resources.getResource(
HBaseMapReduceIndexerToolDirectWriteTest.class, "user_indexer.xml")))
.connectionParams(ImmutableMap.of(
"solr.zk", SOLR_ZK,
"solr.collection", "collection1"))
.build();
addAndWaitForIndexer(indexerDef);
Closer.close(zkItf);
HBASE_ADMIN = new HBaseAdmin(HBASE_TEST_UTILITY.getConfiguration());
}
@AfterClass
public static void tearDownClass() throws Exception {
SOLR_TEST_UTILITY.stop();
HBASE_ADMIN.close();
HBASE_TEST_UTILITY.shutdownMiniMapReduceCluster();
HBASE_TEST_UTILITY.shutdownMiniCluster();
}
@Before
public void setUp() throws Exception {
createHTable(TEST_TABLE_NAME);
recordTable = new HTable(HBASE_TEST_UTILITY.getConfiguration(), TEST_TABLE_NAME);
indexerToolConf = HBASE_TEST_UTILITY.getConfiguration();
}
@After
public void tearDown() throws IOException, SolrServerException {
HBASE_ADMIN.disableTable(TEST_TABLE_NAME);
HBASE_ADMIN.deleteTable(TEST_TABLE_NAME);
recordTable.close();
COLLECTION1.deleteByQuery("*:*");
COLLECTION1.commit();
COLLECTION2.deleteByQuery("*:*");
COLLECTION2.commit();
// Be extra sure Solr is empty now
QueryResponse response = COLLECTION1.query(new SolrQuery("*:*"));
assertTrue(response.getResults().isEmpty());
}
private static void addAndWaitForIndexer(IndexerDefinition indexerDef) throws Exception {
long startTime = System.currentTimeMillis();
INDEXER_MODEL.addIndexer(indexerDef);
// Wait max 5 seconds
while (System.currentTimeMillis() - startTime < 15000) {
if (INDEXER_MODEL.hasIndexer(indexerDef.getName())) {
return;
}
Thread.sleep(200);
}
throw new RuntimeException("Failed to add indexer: " + indexerDef);
}
/**
* Write String values to HBase. Direct string-to-bytes encoding is used for
* writing all values to HBase. All values are stored in the TEST_COLFAM_NAME
* column family.
*
*
* @param row row key under which are to be stored
* @param qualifiersAndValues map of column qualifiers to cell values
*/
private void writeHBaseRecord(String row, Map<String,String> qualifiersAndValues) throws IOException {
writeHBaseRecord(row, qualifiersAndValues, recordTable);
}
/**
* Write String values to HBase. Direct string-to-bytes encoding is used for
* writing all values to HBase. All values are stored in the TEST_COLFAM_NAME
* column family.
*
*
* @param row row key under which are to be stored
* @param qualifiersAndValues map of column qualifiers to cell values
* @param table htable to write to
*/
private static void writeHBaseRecord(String row, Map<String,String> qualifiersAndValues, HTable table) throws IOException {
Put put = new Put(Bytes.toBytes(row));
for (Entry<String, String> entry : qualifiersAndValues.entrySet()) {
put.add(TEST_COLFAM_NAME, Bytes.toBytes(entry.getKey()), Bytes.toBytes(entry.getValue()));
}
table.put(put);
}
/**
* Execute a Solr query on COLLECTION1.
*
* @param queryString Solr query string
* @return list of results from Solr
*/
private SolrDocumentList executeSolrQuery(String queryString) throws SolrServerException {
return executeSolrQuery(COLLECTION1, queryString);
}
/**
* Execute a Solr query on a specific collection.
*/
private SolrDocumentList executeSolrQuery(CloudSolrServer collection, String queryString) throws SolrServerException {
QueryResponse response = collection.query(new SolrQuery(queryString));
return response.getResults();
}
@Test
public void testIndexer_DirectWrite() throws Exception {
writeHBaseRecord("row1", ImmutableMap.of(
"firstname", "John",
"lastname", "Doe"));
MR_TEST_UTIL.runTool(
"--hbase-indexer-file", new File(Resources.getResource(getClass(), "user_indexer.xml").toURI()).toString(),
"--reducers", "0",
"--collection", "collection1",
"--zk-host", SOLR_ZK);
assertEquals(1, executeSolrQuery("firstname_s:John lastname_s:Doe").size());
}
@Test
public void testIndexer_ZkBasedIndexerDefinition() throws Exception {
writeHBaseRecord("row1", ImmutableMap.of(
"firstname", "John",
"lastname", "Doe"));
MR_TEST_UTIL.runTool(
"--hbase-indexer-name", "zkindexerdef",
"--hbase-indexer-zk", INDEXER_ZK,
"--reducers", "0");
assertEquals(1, executeSolrQuery("firstname_s:John lastname_s:Doe").size());
}
@Test
public void testIndexer_Morphline() throws Exception {
writeHBaseRecord("row1", ImmutableMap.of(
"firstname", "John",
"lastname", "Doe"));
indexerToolConf.set("morphlineField.forcedMoo", "forcedBaz");
indexerToolConf.set("morphlineVariable.myFoo", "myBar");
File indexerConfigFile = MRTestUtil.substituteZkHost(
new File("target/test-classes/morphline_indexer.xml"), SOLR_TEST_UTILITY.getZkConnectString());
MR_TEST_UTIL.runTool(
"--hbase-indexer-file", indexerConfigFile.toString(),
"--morphline-file", new File("src/test/resources/extractHBaseCell.conf").toString(),
"--morphline-id", "morphline1",
"--reducers", "0",
"--collection", "collection1",
"--zk-host", SOLR_ZK);
assertEquals(1, executeSolrQuery("firstname_s:John lastname_s:Doe").size());
}
@Test
public void testIndexer_Morphline_With_DryRun() throws Exception {
writeHBaseRecord("row1", ImmutableMap.of(
"firstname", "John",
"lastname", "Doe"));
indexerToolConf.set("morphlineField.forcedMoo", "forcedBaz");
indexerToolConf.set("morphlineVariable.myFoo", "myBar");
File indexerConfigFile = MRTestUtil.substituteZkHost(
new File("target/test-classes/morphline_indexer.xml"), SOLR_TEST_UTILITY.getZkConnectString());
MR_TEST_UTIL.runTool(
"--hbase-indexer-file", indexerConfigFile.toString(),
"--morphline-file", new File("src/test/resources/extractHBaseCell.conf").toString(),
"--morphline-id", "morphline1",
"--reducers", "0",
"--collection", "collection1",
"--zk-host", SOLR_ZK,
"--dry-run");
assertEquals(0, executeSolrQuery("firstname_s:John lastname_s:Doe").size());
}
@Test
public void testIndexer_AlternateCollection() throws Exception {
writeHBaseRecord("row1", ImmutableMap.of(
"firstname", "John",
"lastname", "Doe"));
MR_TEST_UTIL.runTool(
"--hbase-indexer-file", new File(Resources.getResource(getClass(), "user_indexer.xml").toURI()).toString(),
"--reducers", "0",
"--collection", "collection2",
"--zk-host", SOLR_ZK);
String solrQuery = "firstname_s:John lastname_s:Doe";
assertTrue(executeSolrQuery(COLLECTION1, solrQuery).isEmpty());
assertEquals(1, executeSolrQuery(COLLECTION2, solrQuery).size());
}
@Test
public void testIndexer_ClearIndex() throws Exception {
indexClearTester(true);
}
@Test
public void testIndexer_NoClearIndex() throws Exception {
indexClearTester(false);
}
private void indexClearTester(boolean clear) throws Exception{
SolrInputDocument solrDoc = new SolrInputDocument();
solrDoc.addField("id", "nomatter");
solrDoc.addField("firstname_s", "John");
solrDoc.addField("lastname_s", "Doe");
COLLECTION1.add(solrDoc);
COLLECTION1.commit();
String solrQuery = "firstname_s:John lastname_s:Doe";
assertEquals(1, executeSolrQuery(COLLECTION1, solrQuery).size());
writeHBaseRecord("row1", ImmutableMap.of(
"firstname", "John",
"lastname", "Doe"));
List<String> args = Lists.newArrayList(
"--hbase-indexer-file", new File(Resources.getResource(getClass(), "user_indexer.xml").toURI()).toString(),
"--reducers", "0",
"--collection", "collection1",
"--zk-host", SOLR_ZK);
if (clear) {
args.add("--clear-index");
}
MR_TEST_UTIL.runTool(args.toArray(new String[args.size()]));
if (clear) {
assertEquals(1, executeSolrQuery(COLLECTION1, solrQuery).size());
} else {
assertEquals(2, executeSolrQuery(COLLECTION1, solrQuery).size());
}
}
@Test
public void testIndexer_StartRowDefined() throws Exception {
writeHBaseRecord("a", ImmutableMap.of("firstname", "Aaron"));
writeHBaseRecord("b", ImmutableMap.of("firstname", "Brian"));
writeHBaseRecord("c", ImmutableMap.of("firstname", "Carl"));
MR_TEST_UTIL.runTool(
"--hbase-indexer-file", new File(Resources.getResource(getClass(), "user_indexer.xml").toURI()).toString(),
"--reducers", "0",
"--collection", "collection1",
"--zk-host", SOLR_ZK,
"--hbase-start-row", "b");
assertEquals(2, executeSolrQuery("*:*").size());
assertTrue(executeSolrQuery("firstname_s:Aaron").isEmpty());
}
@Test
public void testIndexer_EndRowDefined() throws Exception {
writeHBaseRecord("a", ImmutableMap.of("firstname", "Aaron"));
writeHBaseRecord("b", ImmutableMap.of("firstname", "Brian"));
writeHBaseRecord("c", ImmutableMap.of("firstname", "Carl"));
MR_TEST_UTIL.runTool(
"--hbase-indexer-file", new File(Resources.getResource(getClass(), "user_indexer.xml").toURI()).toString(),
"--reducers", "0",
"--collection", "collection1",
"--zk-host", SOLR_ZK,
"--hbase-end-row", "c");
assertEquals(2, executeSolrQuery("*:*").size());
assertTrue(executeSolrQuery("firstname_s:Carl").isEmpty());
}
@Test
public void testIndexer_StartAndEndRowDefined() throws Exception {
writeHBaseRecord("a", ImmutableMap.of("firstname", "Aaron"));
writeHBaseRecord("b", ImmutableMap.of("firstname", "Brian"));
writeHBaseRecord("c", ImmutableMap.of("firstname", "Carl"));
MR_TEST_UTIL.runTool(
"--hbase-indexer-file", new File(Resources.getResource(getClass(), "user_indexer.xml").toURI()).toString(),
"--reducers", "0",
"--collection", "collection1",
"--zk-host", SOLR_ZK,
"--hbase-start-row", "b",
"--hbase-end-row", "c");
assertEquals(1, executeSolrQuery("*:*").size());
assertEquals(1, executeSolrQuery("firstname_s:Brian").size());
}
@Test
public void testIndexer_StartTimeDefined() throws Exception {
Put putEarly = new Put(Bytes.toBytes("early"));
putEarly.add(TEST_COLFAM_NAME, Bytes.toBytes("firstname"), 1L, Bytes.toBytes("Early"));
Put putOntime = new Put(Bytes.toBytes("ontime"));
putOntime.add(TEST_COLFAM_NAME, Bytes.toBytes("firstname"), 2L, Bytes.toBytes("Ontime"));
Put putLate = new Put(Bytes.toBytes("late"));
putLate.add(TEST_COLFAM_NAME, Bytes.toBytes("firstname"), 3L, Bytes.toBytes("Late"));
recordTable.put(ImmutableList.of(putEarly, putOntime, putLate));
MR_TEST_UTIL.runTool(
"--hbase-indexer-file", new File(Resources.getResource(getClass(), "user_indexer.xml").toURI()).toString(),
"--reducers", "0",
"--collection", "collection1",
"--zk-host", SOLR_ZK,
"--hbase-start-time", "2");
assertEquals(2, executeSolrQuery("*:*").size());
assertTrue(executeSolrQuery("firstname_s:Early").isEmpty());
}
@Test
public void testIndexer_EndTimeDefined() throws Exception {
Put putEarly = new Put(Bytes.toBytes("early"));
putEarly.add(TEST_COLFAM_NAME, Bytes.toBytes("firstname"), 1L, Bytes.toBytes("Early"));
Put putOntime = new Put(Bytes.toBytes("ontime"));
putOntime.add(TEST_COLFAM_NAME, Bytes.toBytes("firstname"), 2L, Bytes.toBytes("Ontime"));
Put putLate = new Put(Bytes.toBytes("late"));
putLate.add(TEST_COLFAM_NAME, Bytes.toBytes("firstname"), 3L, Bytes.toBytes("Late"));
recordTable.put(ImmutableList.of(putEarly, putOntime, putLate));
MR_TEST_UTIL.runTool(
"--hbase-indexer-file", new File(Resources.getResource(getClass(), "user_indexer.xml").toURI()).toString(),
"--reducers", "0",
"--collection", "collection1",
"--zk-host", SOLR_ZK,
"--hbase-end-time", "3");
assertEquals(2, executeSolrQuery("*:*").size());
assertTrue(executeSolrQuery("firstname_s:Late").isEmpty());
}
@Test
public void testIndexer_StartAndEndTimeDefined() throws Exception {
Put putEarly = new Put(Bytes.toBytes("early"));
putEarly.add(TEST_COLFAM_NAME, Bytes.toBytes("firstname"), 1L, Bytes.toBytes("Early"));
Put putOntime = new Put(Bytes.toBytes("ontime"));
putOntime.add(TEST_COLFAM_NAME, Bytes.toBytes("firstname"), 2L, Bytes.toBytes("Ontime"));
Put putLate = new Put(Bytes.toBytes("late"));
putLate.add(TEST_COLFAM_NAME, Bytes.toBytes("firstname"), 3L, Bytes.toBytes("Late"));
recordTable.put(ImmutableList.of(putEarly, putOntime, putLate));
MR_TEST_UTIL.runTool(
"--hbase-indexer-file", new File(Resources.getResource(getClass(), "user_indexer.xml").toURI()).toString(),
"--reducers", "0",
"--collection", "collection1",
"--zk-host", SOLR_ZK,
"--hbase-start-time", "2",
"--hbase-end-time", "3");
assertEquals(1, executeSolrQuery("*:*").size());
assertTrue(executeSolrQuery("firstname_s:Early").isEmpty());
assertEquals(1, executeSolrQuery("firstname_s:Ontime").size());
assertTrue(executeSolrQuery("firstname_s:Late").isEmpty());
}
@Test
public void testIndexer_Multitable() throws Exception {
String tablePrefix = "_multitable_";
HTableDescriptor descriptorA = createHTable((tablePrefix + "a_").getBytes(Charsets.UTF_8));
HTableDescriptor descriptorB = createHTable((tablePrefix + "b_").getBytes(Charsets.UTF_8));
HTable recordTable2 = new HTable(HBASE_TEST_UTILITY.getConfiguration(), tablePrefix + "a_");
HTable recordTable3 = new HTable(HBASE_TEST_UTILITY.getConfiguration(), tablePrefix + "b_");
String hbaseTableName = tablePrefix + ".*";
try {
writeHBaseRecord("row1", ImmutableMap.of(
"firstname", "John",
"lastname", "Doe"), recordTable2);
writeHBaseRecord("row2", ImmutableMap.of(
"firstname", "John",
"lastname", "Doe"), recordTable3);
MR_TEST_UTIL.runTool(
"--hbase-indexer-file", new File(Resources.getResource(getClass(), "multitable_indexer.xml").toURI()).toString(),
"--reducers", "0",
"--collection", "collection1",
"--zk-host", SOLR_ZK);
assertEquals(2, executeSolrQuery("firstname_s:John lastname_s:Doe").size());
} finally {
HBASE_ADMIN.disableTables(hbaseTableName);
HBASE_ADMIN.deleteTables(hbaseTableName);
recordTable2.close();
recordTable3.close();
}
}
private static HTableDescriptor createHTable (byte[] tableName) throws Exception{
HTableDescriptor tableDescriptor = new HTableDescriptor(tableName);
tableDescriptor.addFamily(new HColumnDescriptor(TEST_COLFAM_NAME));
HBASE_ADMIN.createTable(tableDescriptor);
return tableDescriptor;
}
}