Source Code of org.apache.phoenix.hbase.index.write.TestWALRecoveryCaching$ReleaseLatchOnFailurePolicy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.phoenix.hbase.index.write;


import static org.apache.phoenix.query.BaseTest.setUpConfigForMiniCluster;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;


import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;


import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.regionserver.wal.HLogKey;
import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
import org.apache.phoenix.end2end.NeedsOwnMiniClusterTest;
import org.apache.phoenix.hbase.index.IndexTestingUtils;
import org.apache.phoenix.hbase.index.Indexer;
import org.apache.phoenix.hbase.index.TableName;
import org.apache.phoenix.hbase.index.covered.example.ColumnGroup;
import org.apache.phoenix.hbase.index.covered.example.CoveredColumn;
import org.apache.phoenix.hbase.index.covered.example.CoveredColumnIndexSpecifierBuilder;
import org.apache.phoenix.hbase.index.covered.example.CoveredColumnIndexer;
import org.apache.phoenix.hbase.index.table.HTableInterfaceReference;
import org.apache.phoenix.hbase.index.util.IndexManagementUtil;
import org.apache.phoenix.hbase.index.write.recovery.PerRegionIndexWriteCache;
import org.apache.phoenix.hbase.index.write.recovery.StoreFailuresInCachePolicy;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Rule;
import org.junit.Test;
import org.junit.experimental.categories.Category;


import com.google.common.collect.Multimap;


/**
 * When a regionserver crashes, its WAL is split and then replayed to the server. If the index
 * region was present on the same server, we have to make a best effort to not kill the server for
 * not succeeding on index writes while the index region is coming up.
 */
@Category(NeedsOwnMiniClusterTest.class)
public class TestWALRecoveryCaching {


  private static final Log LOG = LogFactory.getLog(TestWALRecoveryCaching.class);
  private static final long ONE_SEC = 1000;
  private static final long ONE_MIN = 60 * ONE_SEC;
  private static final long TIMEOUT = ONE_MIN;


  @Rule
  public TableName testTable = new TableName();


  private String getIndexTableName() {
    return this.testTable.getTableNameString() + "_index";
  }


  // -----------------------------------------------------------------------------------------------
  // Warning! The classes here rely on this static. Adding multiple tests to this class and running
  // them concurrently could have unexpected results (including, but not limited to, odd failures
  // and flapping tests).
  // -----------------------------------------------------------------------------------------------
  private static CountDownLatch allowIndexTableToRecover;


  public static class IndexTableBlockingReplayObserver extends BaseRegionObserver {


    @Override
    public void preWALRestore(ObserverContext<RegionCoprocessorEnvironment> env, HRegionInfo info,
        HLogKey logKey, WALEdit logEdit) throws IOException {
      try {
        LOG.debug("Restoring logs for index table");
        if (allowIndexTableToRecover != null) {
          allowIndexTableToRecover.await();
          LOG.debug("Completed index table recovery wait latch");
        }
      } catch (InterruptedException e) {
        Assert.fail("Should not be interrupted while waiting to allow the index to restore WALs.");
      }
    }
  }


  public static class ReleaseLatchOnFailurePolicy extends StoreFailuresInCachePolicy {


    /**
     * @param failedIndexEdits
     */
    public ReleaseLatchOnFailurePolicy(PerRegionIndexWriteCache failedIndexEdits) {
      super(failedIndexEdits);
    }


    @Override
    public void handleFailure(Multimap<HTableInterfaceReference, Mutation> attempted,
        Exception cause) throws IOException {
      LOG.debug("Found index update failure!");
      if (allowIndexTableToRecover != null) {
        LOG.info("failed index write on WAL recovery - allowing index table to be restored.");
        allowIndexTableToRecover.countDown();
      }
      super.handleFailure(attempted, cause);
    }


  }


  //TODO: Jesse to fix
  @Ignore("Configuration issue - valid test, just needs fixing")
  @Test
  public void testWaitsOnIndexRegionToReload() throws Exception {
    HBaseTestingUtility util = new HBaseTestingUtility();
    Configuration conf = util.getConfiguration();
    setUpConfigForMiniCluster(conf);


    // setup other useful stats
    IndexTestingUtils.setupConfig(conf);
    conf.setBoolean(Indexer.CHECK_VERSION_CONF_KEY, false);


    // make sure everything is setup correctly
    IndexManagementUtil.ensureMutableIndexingCorrectlyConfigured(conf);


    // start the cluster with 2 rs
    util.startMiniCluster(2);


    HBaseAdmin admin = util.getHBaseAdmin();
    // setup the index
    byte[] family = Bytes.toBytes("family");
    byte[] qual = Bytes.toBytes("qualifier");
    byte[] nonIndexedFamily = Bytes.toBytes("nonIndexedFamily");
    String indexedTableName = getIndexTableName();
    ColumnGroup columns = new ColumnGroup(indexedTableName);
    columns.add(new CoveredColumn(family, qual));
    CoveredColumnIndexSpecifierBuilder builder = new CoveredColumnIndexSpecifierBuilder();
    builder.addIndexGroup(columns);


    // create the primary table w/ indexing enabled
    HTableDescriptor primaryTable = new HTableDescriptor(testTable.getTableName());
    primaryTable.addFamily(new HColumnDescriptor(family));
    primaryTable.addFamily(new HColumnDescriptor(nonIndexedFamily));
    builder.addArbitraryConfigForTesting(Indexer.RecoveryFailurePolicyKeyForTesting,
      ReleaseLatchOnFailurePolicy.class.getName());
    builder.build(primaryTable);
    admin.createTable(primaryTable);


    // create the index table
    HTableDescriptor indexTableDesc = new HTableDescriptor(Bytes.toBytes(getIndexTableName()));
    indexTableDesc.addCoprocessor(IndexTableBlockingReplayObserver.class.getName());
    CoveredColumnIndexer.createIndexTable(admin, indexTableDesc);


    // figure out where our tables live
    ServerName shared =
        ensureTablesLiveOnSameServer(util.getMiniHBaseCluster(), Bytes.toBytes(indexedTableName),
          testTable.getTableName());


    // load some data into the table
    Put p = new Put(Bytes.toBytes("row"));
    p.add(family, qual, Bytes.toBytes("value"));
    HTable primary = new HTable(conf, testTable.getTableName());
    primary.put(p);
    primary.flushCommits();


    // turn on the recovery latch
    allowIndexTableToRecover = new CountDownLatch(1);


    // kill the server where the tables live - this should trigger distributed log splitting
    // find the regionserver that matches the passed server
    List<HRegion> online = new ArrayList<HRegion>();
    online.addAll(getRegionsFromServerForTable(util.getMiniHBaseCluster(), shared,
      testTable.getTableName()));
    online.addAll(getRegionsFromServerForTable(util.getMiniHBaseCluster(), shared,
      Bytes.toBytes(indexedTableName)));


    // log all the current state of the server
    LOG.info("Current Server/Region paring: ");
    for (RegionServerThread t : util.getMiniHBaseCluster().getRegionServerThreads()) {
      // check all the conditions for the server to be done
      HRegionServer server = t.getRegionServer();
      if (server.isStopping() || server.isStopped() || server.isAborted()) {
        LOG.info("\t== Offline: " + server.getServerName());
        continue;
      }
      List<HRegionInfo> regions = server.getOnlineRegions();
      LOG.info("\t" + server.getServerName() + " regions: " + regions);
    }


    LOG.debug("Killing server " + shared);
    util.getMiniHBaseCluster().killRegionServer(shared);
    LOG.debug("Waiting on server " + shared + "to die");
    util.getMiniHBaseCluster().waitForRegionServerToStop(shared, TIMEOUT);
    // force reassign the regions from the table
    // LOG.debug("Forcing region reassignment from the killed server: " + shared);
    // for (HRegion region : online) {
    // util.getMiniHBaseCluster().getMaster().assign(region.getRegionName());
    // }
    System.out.println(" ====== Killed shared server ==== ");


    // make a second put that (1), isn't indexed, so we can be sure of the index state and (2)
    // ensures that our table is back up
    Put p2 = new Put(p.getRow());
    p2.add(nonIndexedFamily, Bytes.toBytes("Not indexed"), Bytes.toBytes("non-indexed value"));
    primary.put(p2);
    primary.flushCommits();


    // make sure that we actually failed the write once (within a 5 minute window)
    assertTrue("Didn't find an error writing to index table within timeout!",
      allowIndexTableToRecover.await(ONE_MIN * 5, TimeUnit.MILLISECONDS));


    // scan the index to make sure it has the one entry, (that had to be replayed from the WAL,
    // since we hard killed the server)
    Scan s = new Scan();
    HTable index = new HTable(conf, getIndexTableName());
    ResultScanner scanner = index.getScanner(s);
    int count = 0;
    for (Result r : scanner) {
      LOG.info("Got index table result:" + r);
      count++;
    }
    assertEquals("Got an unexpected found of index rows", 1, count);


    // cleanup
    scanner.close();
    index.close();
    primary.close();
    util.shutdownMiniCluster();
  }


  /**
   * @param miniHBaseCluster
   * @param server
   * @param bs
   * @return
   */
  private List<HRegion> getRegionsFromServerForTable(MiniHBaseCluster cluster, ServerName server,
      byte[] table) {
    List<HRegion> online = Collections.emptyList();
    for (RegionServerThread rst : cluster.getRegionServerThreads()) {
      // if its the server we are going to kill, get the regions we want to reassign
      if (rst.getRegionServer().getServerName().equals(server)) {
        online = rst.getRegionServer().getOnlineRegions(table);
        break;
      }
    }
    return online;
  }


  /**
   * @param miniHBaseCluster
   * @param indexedTableName
   * @param tableNameString
   */
  private ServerName ensureTablesLiveOnSameServer(MiniHBaseCluster cluster, byte[] indexTable,
      byte[] primaryTable) throws Exception {


    ServerName shared = getSharedServer(cluster, indexTable, primaryTable);
    boolean tryIndex = true;
    while (shared == null) {


      // start killing servers until we get an overlap
      Set<ServerName> servers;
      byte[] table = null;
      // switch which server we kill each time to get region movement
      if (tryIndex) {
        table = indexTable;
      } else {
        table = primaryTable;
      }
      servers = getServersForTable(cluster, table);
      tryIndex = !tryIndex;
      for (ServerName server : servers) {
        // find the regionserver that matches the passed server
        List<HRegion> online = getRegionsFromServerForTable(cluster, server, table);


        LOG.info("Shutting down and reassigning regions from " + server);
        cluster.stopRegionServer(server);
        cluster.waitForRegionServerToStop(server, TIMEOUT);


        // force reassign the regions from the table
        for (HRegion region : online) {
          cluster.getMaster().assign(region.getRegionName());
        }


        LOG.info("Starting region server:" + server.getHostname());
        cluster.startRegionServer(server.getHostname());


        cluster.waitForRegionServerToStart(server.getHostname(), TIMEOUT);


        // start a server to get back to the base number of servers
        LOG.info("STarting server to replace " + server);
        cluster.startRegionServer();
        break;
      }


      shared = getSharedServer(cluster, indexTable, primaryTable);
    }
    return shared;
  }


  /**
   * @param cluster
   * @param indexTable
   * @param primaryTable
   * @return
   * @throws Exception
   */
  private ServerName getSharedServer(MiniHBaseCluster cluster, byte[] indexTable,
      byte[] primaryTable) throws Exception {
    Set<ServerName> indexServers = getServersForTable(cluster, indexTable);
    Set<ServerName> primaryServers = getServersForTable(cluster, primaryTable);


    Set<ServerName> joinSet = new HashSet<ServerName>(indexServers);
    joinSet.addAll(primaryServers);
    // if there is already an overlap, then find it and return it
    if (joinSet.size() < indexServers.size() + primaryServers.size()) {
      // find the first overlapping server
      for (ServerName server : joinSet) {
        if (indexServers.contains(server) && primaryServers.contains(server)) {
          return server;
        }
      }
      throw new RuntimeException(
          "Couldn't find a matching server on which both the primary and index table live, "
              + "even though they have overlapping server sets");
    }
    return null;
  }


  private Set<ServerName> getServersForTable(MiniHBaseCluster cluster, byte[] table)
      throws Exception {
    List<HRegion> indexRegions = cluster.getRegions(table);
    Set<ServerName> indexServers = new HashSet<ServerName>();
    for (HRegion region : indexRegions) {
      indexServers.add(cluster.getServerHoldingRegion(region.getRegionName()));
    }
    return indexServers;
  }
}
Source Code of org.apache.phoenix.hbase.index.write.TestWALRecoveryCaching$ReleaseLatchOnFailurePolicy

Related Classes of org.apache.phoenix.hbase.index.write.TestWALRecoveryCaching$ReleaseLatchOnFailurePolicy