Package com.ngdata.hbaseindexer.mr

Source Code of com.ngdata.hbaseindexer.mr.HBaseMapReduceIndexerToolDirectWriteTest

/*
* Copyright 2013 NGDATA nv
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ngdata.hbaseindexer.mr;

import com.google.common.base.Charsets;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.io.Resources;
import com.ngdata.hbaseindexer.conf.DefaultIndexerComponentFactory;
import com.ngdata.hbaseindexer.model.api.IndexerDefinition;
import com.ngdata.hbaseindexer.model.api.IndexerDefinitionBuilder;
import com.ngdata.hbaseindexer.model.impl.IndexerModelImpl;
import com.ngdata.hbaseindexer.util.net.NetUtils;
import com.ngdata.hbaseindexer.util.solr.SolrTestingUtility;
import com.ngdata.sep.util.io.Closer;
import com.ngdata.sep.util.zookeeper.ZkUtil;
import com.ngdata.sep.util.zookeeper.ZooKeeperItf;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;

import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

public class HBaseMapReduceIndexerToolDirectWriteTest {

    private static final byte[] TEST_TABLE_NAME = Bytes.toBytes("record");
    private static final byte[] TEST_COLFAM_NAME = Bytes.toBytes("info");
   
    private static final HBaseTestingUtility HBASE_TEST_UTILITY = HBaseTestingUtilityFactory.createTestUtility();
    private static MRTestUtil MR_TEST_UTIL;
    private static SolrTestingUtility SOLR_TEST_UTILITY;
   
   
    private static CloudSolrServer COLLECTION1;
    private static CloudSolrServer COLLECTION2;
    private static HBaseAdmin HBASE_ADMIN;
    private static String SOLR_ZK;
    private static String INDEXER_ZK;
    private static IndexerModelImpl INDEXER_MODEL;

    private HTable recordTable;
   
    private Configuration indexerToolConf;
   
    @BeforeClass
    public static void setupBeforeClass() throws Exception {
        MR_TEST_UTIL = new MRTestUtil(HBASE_TEST_UTILITY);
        HBASE_TEST_UTILITY.startMiniCluster();
        MR_TEST_UTIL.startMrCluster();
       
        int zkClientPort = HBASE_TEST_UTILITY.getZkCluster().getClientPort();
       
        SOLR_TEST_UTILITY = new SolrTestingUtility(zkClientPort, NetUtils.getFreePort());
        SOLR_TEST_UTILITY.start();
        SOLR_TEST_UTILITY.uploadConfig("config1",
                Resources.toByteArray(Resources.getResource(HBaseMapReduceIndexerToolDirectWriteTest.class, "schema.xml")),
                Resources.toByteArray(Resources.getResource(HBaseMapReduceIndexerToolDirectWriteTest.class, "solrconfig.xml")));
        SOLR_TEST_UTILITY.createCore("collection1_core1", "collection1", "config1", 1);
        SOLR_TEST_UTILITY.createCore("collection2_core1", "collection2", "config1", 1);

        COLLECTION1 = new CloudSolrServer(SOLR_TEST_UTILITY.getZkConnectString());
        COLLECTION1.setDefaultCollection("collection1");

        COLLECTION2 = new CloudSolrServer(SOLR_TEST_UTILITY.getZkConnectString());
        COLLECTION2.setDefaultCollection("collection2");
       
        SOLR_ZK = "127.0.0.1:" + zkClientPort + "/solr";
        INDEXER_ZK = "localhost:" + zkClientPort;
        ZooKeeperItf zkItf = ZkUtil.connect(INDEXER_ZK, 15000);
        INDEXER_MODEL = new IndexerModelImpl(zkItf, "/ngdata/hbaseindexer");
        IndexerDefinition indexerDef = new IndexerDefinitionBuilder()
        .name("zkindexerdef")
        .indexerComponentFactory(DefaultIndexerComponentFactory.class.getName())
        .configuration(Resources.toByteArray(Resources.getResource(
                HBaseMapReduceIndexerToolDirectWriteTest.class, "user_indexer.xml")))
        .connectionParams(ImmutableMap.of(
                "solr.zk", SOLR_ZK,
                "solr.collection", "collection1"))
        .build();

        addAndWaitForIndexer(indexerDef);
       
        Closer.close(zkItf);
       
        HBASE_ADMIN = new HBaseAdmin(HBASE_TEST_UTILITY.getConfiguration());
       
    }
   
    @AfterClass
    public static void tearDownClass() throws Exception {
        SOLR_TEST_UTILITY.stop();
        HBASE_ADMIN.close();
        HBASE_TEST_UTILITY.shutdownMiniMapReduceCluster();
        HBASE_TEST_UTILITY.shutdownMiniCluster();
    }
   
    @Before
    public void setUp() throws Exception {
        createHTable(TEST_TABLE_NAME);
        recordTable = new HTable(HBASE_TEST_UTILITY.getConfiguration(), TEST_TABLE_NAME);
        indexerToolConf = HBASE_TEST_UTILITY.getConfiguration();
    }
   
    @After
    public void tearDown() throws IOException, SolrServerException {
        HBASE_ADMIN.disableTable(TEST_TABLE_NAME);
        HBASE_ADMIN.deleteTable(TEST_TABLE_NAME);
       
        recordTable.close();
       
        COLLECTION1.deleteByQuery("*:*");
        COLLECTION1.commit();
       
        COLLECTION2.deleteByQuery("*:*");
        COLLECTION2.commit();
       
        // Be extra sure Solr is empty now
        QueryResponse response = COLLECTION1.query(new SolrQuery("*:*"));
        assertTrue(response.getResults().isEmpty());
    }
   
    private static void addAndWaitForIndexer(IndexerDefinition indexerDef) throws Exception {
        long startTime = System.currentTimeMillis();
        INDEXER_MODEL.addIndexer(indexerDef);
       
        // Wait max 5 seconds
        while (System.currentTimeMillis() - startTime < 15000) {
            if (INDEXER_MODEL.hasIndexer(indexerDef.getName())) {
                return;
            }
            Thread.sleep(200);
        }
        throw new RuntimeException("Failed to add indexer: " + indexerDef);
    }

    /**
     * Write String values to HBase. Direct string-to-bytes encoding is used for
     * writing all values to HBase. All values are stored in the TEST_COLFAM_NAME
     * column family.
     *
     *
     * @param row row key under which are to be stored
     * @param qualifiersAndValues map of column qualifiers to cell values
     */
    private void writeHBaseRecord(String row, Map<String,String> qualifiersAndValues) throws IOException {
        writeHBaseRecord(row, qualifiersAndValues, recordTable);
    }

    /**
     * Write String values to HBase. Direct string-to-bytes encoding is used for
     * writing all values to HBase. All values are stored in the TEST_COLFAM_NAME
     * column family.
     *
     *
     * @param row row key under which are to be stored
     * @param qualifiersAndValues map of column qualifiers to cell values
     * @param table htable to write to
     */
     private static void writeHBaseRecord(String row, Map<String,String> qualifiersAndValues, HTable table) throws IOException {
        Put put = new Put(Bytes.toBytes(row));
        for (Entry<String, String> entry : qualifiersAndValues.entrySet()) {
            put.add(TEST_COLFAM_NAME, Bytes.toBytes(entry.getKey()), Bytes.toBytes(entry.getValue()));
        }
        table.put(put);
     }
   
    /**
     * Execute a Solr query on COLLECTION1.
     *
     * @param queryString Solr query string
     * @return list of results from Solr
     */
    private SolrDocumentList executeSolrQuery(String queryString) throws SolrServerException {
        return executeSolrQuery(COLLECTION1, queryString);
    }
   
    /**
     * Execute a Solr query on a specific collection.
     */
    private SolrDocumentList executeSolrQuery(CloudSolrServer collection, String queryString) throws SolrServerException {
        QueryResponse response = collection.query(new SolrQuery(queryString));
        return response.getResults();
    }
   
    @Test
    public void testIndexer_DirectWrite() throws Exception {
        writeHBaseRecord("row1", ImmutableMap.of(
                                "firstname", "John",
                                "lastname", "Doe"));

        MR_TEST_UTIL.runTool(
                "--hbase-indexer-file", new File(Resources.getResource(getClass(), "user_indexer.xml").toURI()).toString(),
                "--reducers", "0",
                "--collection", "collection1",
                "--zk-host", SOLR_ZK);
       
        assertEquals(1, executeSolrQuery("firstname_s:John lastname_s:Doe").size());
    }
   
    @Test
    public void testIndexer_ZkBasedIndexerDefinition() throws Exception {
        writeHBaseRecord("row1", ImmutableMap.of(
                                "firstname", "John",
                                "lastname", "Doe"));

        MR_TEST_UTIL.runTool(
                "--hbase-indexer-name", "zkindexerdef",
                "--hbase-indexer-zk", INDEXER_ZK,
                "--reducers", "0");
       
        assertEquals(1, executeSolrQuery("firstname_s:John lastname_s:Doe").size());
    }
   
    @Test
    public void testIndexer_Morphline() throws Exception {
        writeHBaseRecord("row1", ImmutableMap.of(
                                "firstname", "John",
                                "lastname", "Doe"));
       
        indexerToolConf.set("morphlineField.forcedMoo", "forcedBaz");
        indexerToolConf.set("morphlineVariable.myFoo", "myBar");
        File indexerConfigFile = MRTestUtil.substituteZkHost(
            new File("target/test-classes/morphline_indexer.xml"), SOLR_TEST_UTILITY.getZkConnectString());
       
        MR_TEST_UTIL.runTool(
                "--hbase-indexer-file", indexerConfigFile.toString(),
                "--morphline-file", new File("src/test/resources/extractHBaseCell.conf").toString(),
                "--morphline-id", "morphline1",
                "--reducers", "0",
                "--collection", "collection1",
                "--zk-host", SOLR_ZK);
              
        assertEquals(1, executeSolrQuery("firstname_s:John lastname_s:Doe").size());
    }
   
    @Test
    public void testIndexer_Morphline_With_DryRun() throws Exception {
        writeHBaseRecord("row1", ImmutableMap.of(
                                "firstname", "John",
                                "lastname", "Doe"));
       
        indexerToolConf.set("morphlineField.forcedMoo", "forcedBaz");
        indexerToolConf.set("morphlineVariable.myFoo", "myBar");
        File indexerConfigFile = MRTestUtil.substituteZkHost(
            new File("target/test-classes/morphline_indexer.xml"), SOLR_TEST_UTILITY.getZkConnectString());
       
        MR_TEST_UTIL.runTool(
                "--hbase-indexer-file", indexerConfigFile.toString(),
                "--morphline-file", new File("src/test/resources/extractHBaseCell.conf").toString(),
                "--morphline-id", "morphline1",
                "--reducers", "0",
                "--collection", "collection1",
                "--zk-host", SOLR_ZK,
                "--dry-run");
       
        assertEquals(0, executeSolrQuery("firstname_s:John lastname_s:Doe").size());
    }
   
    @Test
    public void testIndexer_AlternateCollection() throws Exception {
        writeHBaseRecord("row1", ImmutableMap.of(
                                "firstname", "John",
                                "lastname", "Doe"));
       
        MR_TEST_UTIL.runTool(
                "--hbase-indexer-file", new File(Resources.getResource(getClass(), "user_indexer.xml").toURI()).toString(),
                "--reducers", "0",
                "--collection", "collection2",
                "--zk-host", SOLR_ZK);
       
        String solrQuery = "firstname_s:John lastname_s:Doe";
       
        assertTrue(executeSolrQuery(COLLECTION1, solrQuery).isEmpty());
        assertEquals(1, executeSolrQuery(COLLECTION2, solrQuery).size());
    }

    @Test
    public void testIndexer_ClearIndex() throws Exception {
        indexClearTester(true);
    }

    @Test
    public void testIndexer_NoClearIndex() throws Exception {
        indexClearTester(false);
    }

    private void indexClearTester(boolean clear) throws Exception{
        SolrInputDocument solrDoc = new SolrInputDocument();
        solrDoc.addField("id", "nomatter");
        solrDoc.addField("firstname_s", "John");
        solrDoc.addField("lastname_s", "Doe");

        COLLECTION1.add(solrDoc);
        COLLECTION1.commit();

        String solrQuery = "firstname_s:John lastname_s:Doe";

        assertEquals(1, executeSolrQuery(COLLECTION1, solrQuery).size());

        writeHBaseRecord("row1", ImmutableMap.of(
                "firstname", "John",
                "lastname", "Doe"));

        List<String> args = Lists.newArrayList(
                "--hbase-indexer-file", new File(Resources.getResource(getClass(), "user_indexer.xml").toURI()).toString(),
                "--reducers", "0",
                "--collection", "collection1",
                "--zk-host", SOLR_ZK);

        if (clear) {
            args.add("--clear-index");
        }

        MR_TEST_UTIL.runTool(args.toArray(new String[args.size()]));

        if (clear) {
            assertEquals(1, executeSolrQuery(COLLECTION1, solrQuery).size());
        } else {
            assertEquals(2, executeSolrQuery(COLLECTION1, solrQuery).size());
        }

    }
   
    @Test
    public void testIndexer_StartRowDefined() throws Exception {
        writeHBaseRecord("a", ImmutableMap.of("firstname", "Aaron"));
        writeHBaseRecord("b", ImmutableMap.of("firstname", "Brian"));
        writeHBaseRecord("c", ImmutableMap.of("firstname", "Carl"));
       
        MR_TEST_UTIL.runTool(
                "--hbase-indexer-file", new File(Resources.getResource(getClass(), "user_indexer.xml").toURI()).toString(),
                "--reducers", "0",
                "--collection", "collection1",
                "--zk-host", SOLR_ZK,
                "--hbase-start-row", "b");
       
        assertEquals(2, executeSolrQuery("*:*").size());
        assertTrue(executeSolrQuery("firstname_s:Aaron").isEmpty());
       
    }
   
    @Test
    public void testIndexer_EndRowDefined() throws Exception {
        writeHBaseRecord("a", ImmutableMap.of("firstname", "Aaron"));
        writeHBaseRecord("b", ImmutableMap.of("firstname", "Brian"));
        writeHBaseRecord("c", ImmutableMap.of("firstname", "Carl"));
       
        MR_TEST_UTIL.runTool(
                "--hbase-indexer-file", new File(Resources.getResource(getClass(), "user_indexer.xml").toURI()).toString(),
                "--reducers", "0",
                "--collection", "collection1",
                "--zk-host", SOLR_ZK,
                "--hbase-end-row", "c");
       
        assertEquals(2, executeSolrQuery("*:*").size());
        assertTrue(executeSolrQuery("firstname_s:Carl").isEmpty());
    }
   
    @Test
    public void testIndexer_StartAndEndRowDefined() throws Exception {
        writeHBaseRecord("a", ImmutableMap.of("firstname", "Aaron"));
        writeHBaseRecord("b", ImmutableMap.of("firstname", "Brian"));
        writeHBaseRecord("c", ImmutableMap.of("firstname", "Carl"));
       
        MR_TEST_UTIL.runTool(
                "--hbase-indexer-file", new File(Resources.getResource(getClass(), "user_indexer.xml").toURI()).toString(),
                "--reducers", "0",
                "--collection", "collection1",
                "--zk-host", SOLR_ZK,
                "--hbase-start-row", "b",
                "--hbase-end-row", "c");
       
        assertEquals(1, executeSolrQuery("*:*").size());
        assertEquals(1, executeSolrQuery("firstname_s:Brian").size());
    }
   
    @Test
    public void testIndexer_StartTimeDefined() throws Exception {
        Put putEarly = new Put(Bytes.toBytes("early"));
        putEarly.add(TEST_COLFAM_NAME, Bytes.toBytes("firstname"), 1L, Bytes.toBytes("Early"));
       
        Put putOntime = new Put(Bytes.toBytes("ontime"));
        putOntime.add(TEST_COLFAM_NAME, Bytes.toBytes("firstname"), 2L, Bytes.toBytes("Ontime"));

        Put putLate = new Put(Bytes.toBytes("late"));
        putLate.add(TEST_COLFAM_NAME, Bytes.toBytes("firstname"), 3L, Bytes.toBytes("Late"));
       
        recordTable.put(ImmutableList.of(putEarly, putOntime, putLate));
       
        MR_TEST_UTIL.runTool(
                "--hbase-indexer-file", new File(Resources.getResource(getClass(), "user_indexer.xml").toURI()).toString(),
                "--reducers", "0",
                "--collection", "collection1",
                "--zk-host", SOLR_ZK,
                "--hbase-start-time", "2");
       
        assertEquals(2, executeSolrQuery("*:*").size());
        assertTrue(executeSolrQuery("firstname_s:Early").isEmpty());
    }
   
    @Test
    public void testIndexer_EndTimeDefined() throws Exception {
        Put putEarly = new Put(Bytes.toBytes("early"));
        putEarly.add(TEST_COLFAM_NAME, Bytes.toBytes("firstname"), 1L, Bytes.toBytes("Early"));
       
        Put putOntime = new Put(Bytes.toBytes("ontime"));
        putOntime.add(TEST_COLFAM_NAME, Bytes.toBytes("firstname"), 2L, Bytes.toBytes("Ontime"));

        Put putLate = new Put(Bytes.toBytes("late"));
        putLate.add(TEST_COLFAM_NAME, Bytes.toBytes("firstname"), 3L, Bytes.toBytes("Late"));
       
        recordTable.put(ImmutableList.of(putEarly, putOntime, putLate));
       
        MR_TEST_UTIL.runTool(
                "--hbase-indexer-file", new File(Resources.getResource(getClass(), "user_indexer.xml").toURI()).toString(),
                "--reducers", "0",
                "--collection", "collection1",
                "--zk-host", SOLR_ZK,
                "--hbase-end-time", "3");
       
        assertEquals(2, executeSolrQuery("*:*").size());
        assertTrue(executeSolrQuery("firstname_s:Late").isEmpty());
    }
   
    @Test
    public void testIndexer_StartAndEndTimeDefined() throws Exception {
        Put putEarly = new Put(Bytes.toBytes("early"));
        putEarly.add(TEST_COLFAM_NAME, Bytes.toBytes("firstname"), 1L, Bytes.toBytes("Early"));
       
        Put putOntime = new Put(Bytes.toBytes("ontime"));
        putOntime.add(TEST_COLFAM_NAME, Bytes.toBytes("firstname"), 2L, Bytes.toBytes("Ontime"));

        Put putLate = new Put(Bytes.toBytes("late"));
        putLate.add(TEST_COLFAM_NAME, Bytes.toBytes("firstname"), 3L, Bytes.toBytes("Late"));
       
        recordTable.put(ImmutableList.of(putEarly, putOntime, putLate));

        MR_TEST_UTIL.runTool(
                "--hbase-indexer-file", new File(Resources.getResource(getClass(), "user_indexer.xml").toURI()).toString(),
                "--reducers", "0",
                "--collection", "collection1",
                "--zk-host", SOLR_ZK,
                "--hbase-start-time", "2",
                "--hbase-end-time", "3");
       
        assertEquals(1, executeSolrQuery("*:*").size());
        assertTrue(executeSolrQuery("firstname_s:Early").isEmpty());
        assertEquals(1, executeSolrQuery("firstname_s:Ontime").size());
        assertTrue(executeSolrQuery("firstname_s:Late").isEmpty());
    }

    @Test
    public void testIndexer_Multitable() throws Exception {
        String tablePrefix = "_multitable_";
        HTableDescriptor descriptorA = createHTable((tablePrefix + "a_").getBytes(Charsets.UTF_8));
        HTableDescriptor descriptorB = createHTable((tablePrefix + "b_").getBytes(Charsets.UTF_8));
        HTable recordTable2 = new HTable(HBASE_TEST_UTILITY.getConfiguration(), tablePrefix + "a_");
        HTable recordTable3 =  new HTable(HBASE_TEST_UTILITY.getConfiguration(), tablePrefix + "b_");

        String hbaseTableName = tablePrefix + ".*";
        try {
            writeHBaseRecord("row1", ImmutableMap.of(
                    "firstname", "John",
                    "lastname", "Doe"), recordTable2);
            writeHBaseRecord("row2", ImmutableMap.of(
                    "firstname", "John",
                    "lastname", "Doe"), recordTable3);

            MR_TEST_UTIL.runTool(
                    "--hbase-indexer-file", new File(Resources.getResource(getClass(), "multitable_indexer.xml").toURI()).toString(),
                    "--reducers", "0",
                    "--collection", "collection1",
                    "--zk-host", SOLR_ZK);

            assertEquals(2, executeSolrQuery("firstname_s:John lastname_s:Doe").size());
        } finally {
            HBASE_ADMIN.disableTables(hbaseTableName);
            HBASE_ADMIN.deleteTables(hbaseTableName);

            recordTable2.close();
            recordTable3.close();
        }
    }

    private static HTableDescriptor createHTable (byte[] tableName) throws Exception{
        HTableDescriptor tableDescriptor = new HTableDescriptor(tableName);
        tableDescriptor.addFamily(new HColumnDescriptor(TEST_COLFAM_NAME));
        HBASE_ADMIN.createTable(tableDescriptor);

        return tableDescriptor;
    }
   
}
TOP

Related Classes of com.ngdata.hbaseindexer.mr.HBaseMapReduceIndexerToolDirectWriteTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.