Package com.urbanairship.datacube

Source Code of com.urbanairship.datacube.HBaseBackfillerTest

/*
Copyright 2012 Urban Airship and Contributors
*/

package com.urbanairship.datacube;

import java.util.Iterator;
import java.util.List;

import org.apache.commons.lang.ArrayUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.HTablePool;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Pair;
import org.junit.After;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.Multimap;
import com.urbanairship.datacube.DbHarness.CommitType;
import com.urbanairship.datacube.backfill.BackfillUtil;
import com.urbanairship.datacube.backfill.HBaseBackfillMerger;
import com.urbanairship.datacube.backfill.HBaseSnapshotter;
import com.urbanairship.datacube.bucketers.StringToBytesBucketer;
import com.urbanairship.datacube.dbharnesses.HBaseDbHarness;
import com.urbanairship.datacube.idservices.HBaseIdService;
import com.urbanairship.datacube.ops.LongOp;

public class HBaseBackfillerTest extends EmbeddedClusterTestAbstract {
    public static final byte[] CUBE_DATA_TABLE = "cube_data".getBytes();
    public static final byte[] SNAPSHOT_DEST_TABLE = "snapshot".getBytes();
    public static final byte[] BACKFILLED_TABLE = "backfilled".getBytes();
    public static final byte[] IDSERVICE_LOOKUP_TABLE = "idservice_data".getBytes();
    public static final byte[] IDSERVICE_COUNTER_TABLE = "idservice_counter".getBytes();
    public static final byte[] CF = "c".getBytes();
   
    @BeforeClass
    public static void setupCluster() throws Exception {
        getTestUtil().createTable(CUBE_DATA_TABLE, CF);
        getTestUtil().createTable(BACKFILLED_TABLE, CF);
        getTestUtil().createTable(IDSERVICE_COUNTER_TABLE, CF);
        getTestUtil().createTable(IDSERVICE_LOOKUP_TABLE, CF);
    }

    @After
    public void removeSnapshotAndBackfillTables() throws Exception {
        HBaseAdmin hba = getTestUtil().getHBaseAdmin();
        if(hba.tableExists(SNAPSHOT_DEST_TABLE)) {
            hba.disableTable(SNAPSHOT_DEST_TABLE);
            hba.deleteTable(SNAPSHOT_DEST_TABLE);
        }
    }
   
    /**
     * Backfilling shouldn't change any live values if the backfilled counts are the same as the live
     * counts.
     */
    @Test
    public void testBackfillIdempotence() throws Exception {
        Configuration conf = getTestUtil().getConfiguration();
        HTablePool pool = new HTablePool(conf, Integer.MAX_VALUE);
       
        IdService idService = new HBaseIdService(conf, IDSERVICE_LOOKUP_TABLE,
                IDSERVICE_COUNTER_TABLE, CF, ArrayUtils.EMPTY_BYTE_ARRAY);
        DbHarness<LongOp> hbaseDbHarness = new HBaseDbHarness<LongOp>(pool,
                ArrayUtils.EMPTY_BYTE_ARRAY, CUBE_DATA_TABLE, CF, LongOp.DESERIALIZER,
                idService, CommitType.INCREMENT);
       
        // Get some cube data into the source table, doesn't really matter what.
        DbHarnessTests.basicTest(hbaseDbHarness);
       
        // Snapshot the source table
        Assert.assertTrue(new HBaseSnapshotter(conf, CUBE_DATA_TABLE, CF, SNAPSHOT_DEST_TABLE,
                new Path("hdfs:///test_hfiles"), false, null, null).runWithCheckedExceptions());
        // The snapshot should be equal to the source table
        assertTablesEqual(conf, CUBE_DATA_TABLE, SNAPSHOT_DEST_TABLE);
       
        // Simulate a backfill by copying the live cube
        Assert.assertTrue(new HBaseSnapshotter(conf, CUBE_DATA_TABLE, CF, BACKFILLED_TABLE,
                new Path("hdfs:///test_hfiles"), true, null, null).runWithCheckedExceptions());
       
        // Since the backfilled table is identical to the snapshot, there should be no changes to the
        // live production table
        HBaseBackfillMerger backfiller = new HBaseBackfillMerger(conf, ArrayUtils.EMPTY_BYTE_ARRAY,
                CUBE_DATA_TABLE, SNAPSHOT_DEST_TABLE, BACKFILLED_TABLE, CF, LongOp.LongOpDeserializer.class);
        Assert.assertTrue(backfiller.runWithCheckedExceptions());
        assertTablesEqual(conf, CUBE_DATA_TABLE, SNAPSHOT_DEST_TABLE);
       
    }

    /**
     * Backfilling 0 events should lead to an empty live table
     */
    @Test
    public void testBackfillingWithEmpty() throws Exception {
        Configuration conf = getTestUtil().getConfiguration();
        HTablePool pool = new HTablePool(conf, Integer.MAX_VALUE);
       
        IdService idService = new HBaseIdService(conf, IDSERVICE_LOOKUP_TABLE,
                IDSERVICE_COUNTER_TABLE, CF, ArrayUtils.EMPTY_BYTE_ARRAY);
        DbHarness<LongOp> hbaseDbHarness = new HBaseDbHarness<LongOp>(pool,
                ArrayUtils.EMPTY_BYTE_ARRAY, CUBE_DATA_TABLE, CF, LongOp.DESERIALIZER,
                idService, CommitType.INCREMENT);
       
        // Get some cube data into the source table, doesn't really matter what.
        DbHarnessTests.basicTest(hbaseDbHarness);
       
        // Copy the source table using the snapshotter
        Assert.assertTrue(new HBaseSnapshotter(conf, CUBE_DATA_TABLE, CF, SNAPSHOT_DEST_TABLE,
                new Path("hdfs:///test_hfiles"), false, null, null).runWithCheckedExceptions());
       
        // The snapshot should be equal to the source table
        assertTablesEqual(conf, CUBE_DATA_TABLE, SNAPSHOT_DEST_TABLE);
       
        HBaseBackfillMerger backfiller = new HBaseBackfillMerger(conf, ArrayUtils.EMPTY_BYTE_ARRAY,
                CUBE_DATA_TABLE, SNAPSHOT_DEST_TABLE, BACKFILLED_TABLE, CF,
                LongOp.LongOpDeserializer.class);
        Assert.assertTrue(backfiller.runWithCheckedExceptions());
       
        // After backfilling from an empty "backfilled" table, the live cube should have no rows
        Assert.assertTrue(!new HTable(conf, CUBE_DATA_TABLE).getScanner(CF).iterator().hasNext());
    }
   
    @Test
    public void testMutationsWhileBackfilling() throws Exception {
        Configuration conf = getTestUtil().getConfiguration();
        HTablePool pool = new HTablePool(conf, Integer.MAX_VALUE);
       
        IdService idService = new HBaseIdService(conf, IDSERVICE_LOOKUP_TABLE,
                IDSERVICE_COUNTER_TABLE, CF, ArrayUtils.EMPTY_BYTE_ARRAY);
        DbHarness<LongOp> hbaseDbHarness = new HBaseDbHarness<LongOp>(pool,
                ArrayUtils.EMPTY_BYTE_ARRAY, CUBE_DATA_TABLE, CF, LongOp.DESERIALIZER,
                idService, CommitType.INCREMENT);
       
        Dimension<String> onlyDimension = new Dimension<String>("mydimension",
                new StringToBytesBucketer(), true, 2);
       
        Rollup rollup = new Rollup(onlyDimension, BucketType.IDENTITY);
        List<Dimension<?>> dims = ImmutableList.<Dimension<?>>of(onlyDimension);
        List<Rollup> rollups = ImmutableList.of(rollup);
        DataCube<LongOp> cube = new DataCube<LongOp>(dims, rollups);
        DataCubeIo<LongOp> cubeIo = new DataCubeIo<LongOp>(cube, hbaseDbHarness, 1,
                Long.MAX_VALUE, SyncLevel.FULL_SYNC);
       
        // Before doing any snapshotting/backfilling, there's one value "5" in the cube.
        cubeIo.writeSync(new LongOp(5), new WriteBuilder(cube).at(onlyDimension, "coord1"));
       
        // Snapshot the source table
        Assert.assertTrue(new HBaseSnapshotter(conf, CUBE_DATA_TABLE, CF, SNAPSHOT_DEST_TABLE,
                new Path("hdfs:///test_hfiles"), false, null, null).runWithCheckedExceptions());

        // Simulate a backfill by copying the live cube
        Assert.assertTrue(new HBaseSnapshotter(conf, CUBE_DATA_TABLE, CF, BACKFILLED_TABLE,
                new Path("hdfs:///test_hfiles"), true, null, null).runWithCheckedExceptions());
       
        // Simulate two writes to the live table that wouldn't be seen by the app as it backfills.
        // This is like a client doing a write concurrently with a backfill.
        cubeIo.writeSync(new LongOp(6), new WriteBuilder(cube).at(onlyDimension, "coord1"));
        cubeIo.writeSync(new LongOp(7), new WriteBuilder(cube).at(onlyDimension, "coord2"));
       
        HBaseBackfillMerger backfiller = new HBaseBackfillMerger(conf, ArrayUtils.EMPTY_BYTE_ARRAY,
                CUBE_DATA_TABLE, SNAPSHOT_DEST_TABLE, BACKFILLED_TABLE, CF,
                LongOp.LongOpDeserializer.class);
        Assert.assertTrue(backfiller.runWithCheckedExceptions());
       
        // After everyhing's done, the two writes that occurred concurrently with the backfill
        // should be seen in the live table. coord1 should be 5+6=11 and coord2 should be 7.
        Assert.assertEquals(11L,
                cubeIo.get(new ReadBuilder(cube).at(onlyDimension, "coord1")).get().getLong());
        Assert.assertEquals(7L,
                cubeIo.get(new ReadBuilder(cube).at(onlyDimension, "coord2")).get().getLong());
    }

    public static void assertTablesEqual(Configuration conf, byte[] leftTableName,
            byte[] rightTableName) throws Exception {
        HTable leftTable = new HTable(conf, leftTableName);
        HTable rightTable = new HTable(conf, rightTableName);
       
        ResultScanner leftRs = leftTable.getScanner(CF);
        ResultScanner rightRs = rightTable.getScanner(CF);
        Iterator<Result> leftIterator = leftRs.iterator();
        Iterator<Result> rightIterator = rightRs.iterator();
       
        MergeIterator<Result> mergeIt = new MergeIterator<Result>(ResultComparator.INSTANCE,
                ImmutableList.of(leftIterator, rightIterator));
       
        while(mergeIt.hasNext()) {
            Multimap<Iterator<Result>,Result> results = mergeIt.next();
           
            List<KeyValue> leftKvs = results.get(leftIterator).iterator().next().list();
            List<KeyValue> rightKvs = results.get(rightIterator).iterator().next().list();
           
            Assert.assertEquals(leftKvs.size(), rightKvs.size());
            for(int i=0; i<leftKvs.size(); i++) {
                KeyValue leftKv = leftKvs.get(i);
                KeyValue rightKv = rightKvs.get(i);
               
                // Compare every field except timestamp
                Assert.assertArrayEquals(leftKv.getFamily(), rightKv.getFamily());
                Assert.assertArrayEquals(leftKv.getQualifier(), rightKv.getQualifier());
                Assert.assertArrayEquals(leftKv.getValue(), rightKv.getValue());
            }
        }
       
        leftTable.close();
        rightTable.close();
    }
   
    @Test
    public void testTableSplits() throws Exception {
        HBaseAdmin admin = getTestUtil().getHBaseAdmin();
        byte[] tableName = "oneSplitTest".getBytes();
       
        // create table with one region, should be one scan
        HTable hTable = getTestUtil().createTable(tableName, CF);
        String[] keys = new String[] {"a", "c00", "c11", "e", "g"};
        // Insert some keys, so we can split between them.
        for(String key: keys) {
            byte[] keyBytes = key.getBytes();
            Put put = new Put(keyBytes);
            put.add(CF, keyBytes, keyBytes);
            hTable.put(put);
        }
        admin.flush(tableName);
       
        byte[][] internalSplitKeys = BackfillUtil.getSplitKeys(hTable.getStartEndKeys());
        List<Scan> scans = HBaseBackfillMerger.scansThisCubeOnly(new byte[] {}, internalSplitKeys);
        Assert.assertEquals(1, scans.size());
       
        admin.split(tableName, "d".getBytes());
        Thread.sleep(5000L); // hack: wait for split to happen
        admin.split(tableName, "c01".getBytes());
        Thread.sleep(5000L); // hack: wait for split to happen
        Pair<byte[][],byte[][]> startsEnds = hTable.getStartEndKeys();
        Assert.assertEquals("Regions didn't split as requested, wtf", 3, startsEnds.getFirst().length);
        internalSplitKeys = BackfillUtil.getSplitKeys(startsEnds);
        scans = HBaseBackfillMerger.scansThisCubeOnly(new byte[] {}, internalSplitKeys);
        Assert.assertEquals(3, scans.size());
       
        scans = HBaseBackfillMerger.scansThisCubeOnly(new byte[] {'e'}, internalSplitKeys);
        Assert.assertEquals(1, scans.size());
       
        // Scanning cube 'd' shouldn't include any rows, since there's no such cube
        scans = HBaseBackfillMerger.scansThisCubeOnly(new byte[] {'d'}, internalSplitKeys);
        Assert.assertEquals(1, scans.size());
        ResultScanner dScanner = hTable.getScanner(scans.get(0));
        Assert.assertEquals(0, iteratorCount(dScanner.iterator()));
        dScanner.close();
       
        // Scanning cube 'e' should only see the one row for 'e', and not see the row for 'g'. There
        // should only be one scan since all the rows should be in the same region.
        scans = HBaseBackfillMerger.scansThisCubeOnly(new byte[] {'e'}, internalSplitKeys);
        Assert.assertEquals(1, scans.size());
        ResultScanner eScanner = hTable.getScanner(scans.get(0));
        Assert.assertEquals(1, iteratorCount(eScanner.iterator()));
        eScanner.close();
       
        // Splitting cube c in half should yield 2 scans when we scan over it, and we should only get
        // back the 2 rows for cube c.
        scans = HBaseBackfillMerger.scansThisCubeOnly(new byte[] {'c'}, internalSplitKeys);
        Assert.assertEquals(2, scans.size());
        ResultScanner cScanner1 = hTable.getScanner(scans.get(0));
        ResultScanner cScanner2 = hTable.getScanner(scans.get(1));
        Assert.assertEquals(1, iteratorCount(cScanner1.iterator()));
        Assert.assertEquals(1, iteratorCount(cScanner2.iterator()));
        cScanner1.close();
        cScanner2.close();
    }               
               
    private static int iteratorCount(Iterator<?> it) {
        int count = 0;
        while(it.hasNext()) {
           count++;
           it.next();
        }
        return count;
    }
}
TOP

Related Classes of com.urbanairship.datacube.HBaseBackfillerTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.