Package org.apache.hadoop.hbase.zookeeper

Source Code of org.apache.hadoop.hbase.zookeeper.ZKAssign

/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.zookeeper;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.RegionTransition;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.executor.EventType;
import org.apache.zookeeper.AsyncCallback;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.Code;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.KeeperException.NodeExistsException;
import org.apache.zookeeper.data.Stat;

import java.util.List;

// We should not be importing this Type here, nor a RegionTransition, etc.  This class should be
// about zk and bytes only.

/**
* Utility class for doing region assignment in ZooKeeper.  This class extends
* stuff done in {@link ZKUtil} to cover specific assignment operations.
* <p>
* Contains only static methods and constants.
* <p>
* Used by both the Master and RegionServer.
* <p>
* All valid transitions outlined below:
* <p>
* <b>MASTER</b>
* <ol>
*   <li>
*     Master creates an unassigned node as OFFLINE.
*     - Cluster startup and table enabling.
*   </li>
*   <li>
*     Master forces an existing unassigned node to OFFLINE.
*     - RegionServer failure.
*     - Allows transitions from all states to OFFLINE.
*   </li>
*   <li>
*     Master deletes an unassigned node that was in a OPENED state.
*     - Normal region transitions.  Besides cluster startup, no other deletions
*     of unassigned nodes is allowed.
*   </li>
*   <li>
*     Master deletes all unassigned nodes regardless of state.
*     - Cluster startup before any assignment happens.
*   </li>
* </ol>
* <p>
* <b>REGIONSERVER</b>
* <ol>
*   <li>
*     RegionServer creates an unassigned node as CLOSING.
*     - All region closes will do this in response to a CLOSE RPC from Master.
*     - A node can never be transitioned to CLOSING, only created.
*   </li>
*   <li>
*     RegionServer transitions an unassigned node from CLOSING to CLOSED.
*     - Normal region closes.  CAS operation.
*   </li>
*   <li>
*     RegionServer transitions an unassigned node from OFFLINE to OPENING.
*     - All region opens will do this in response to an OPEN RPC from the Master.
*     - Normal region opens.  CAS operation.
*   </li>
*   <li>
*     RegionServer transitions an unassigned node from OPENING to OPENED.
*     - Normal region opens.  CAS operation.
*   </li>
* </ol>
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class ZKAssign {
  private static final Log LOG = LogFactory.getLog(ZKAssign.class);

  /**
   * Gets the full path node name for the unassigned node for the specified
   * region.
   * @param zkw zk reference
   * @param regionName region name
   * @return full path node name
   */
  public static String getNodeName(ZooKeeperWatcher zkw, String regionName) {
    return ZKUtil.joinZNode(zkw.assignmentZNode, regionName);
  }

  /**
   * Gets the region name from the full path node name of an unassigned node.
   * @param path full zk path
   * @return region name
   */
  public static String getRegionName(ZooKeeperWatcher zkw, String path) {
    return path.substring(zkw.assignmentZNode.length()+1);
  }

  // Master methods

  /**
   * Creates a new unassigned node in the OFFLINE state for the specified region.
   *
   * <p>Does not transition nodes from other states.  If a node already exists
   * for this region, a {@link NodeExistsException} will be thrown.
   *
   * <p>Sets a watcher on the unassigned region node if the method is successful.
   *
   * <p>This method should only be used during cluster startup and the enabling
   * of a table.
   *
   * @param zkw zk reference
   * @param region region to be created as offline
   * @param serverName server transition will happen on
   * @throws KeeperException if unexpected zookeeper exception
   * @throws KeeperException.NodeExistsException if node already exists
   */
  public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
      ServerName serverName)
  throws KeeperException, KeeperException.NodeExistsException {
    createNodeOffline(zkw, region, serverName, EventType.M_ZK_REGION_OFFLINE);
  }

  public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
      ServerName serverName, final EventType event)
  throws KeeperException, KeeperException.NodeExistsException {
    LOG.debug(zkw.prefix("Creating unassigned node for " +
      region.getEncodedName() + " in OFFLINE state"));
    RegionTransition rt =
      RegionTransition.createRegionTransition(event, region.getRegionName(), serverName);
    String node = getNodeName(zkw, region.getEncodedName());
    ZKUtil.createAndWatch(zkw, node, rt.toByteArray());
  }

  /**
   * Creates an unassigned node in the OFFLINE state for the specified region.
   * <p>
   * Runs asynchronously.  Depends on no pre-existing znode.
   *
   * <p>Sets a watcher on the unassigned region node.
   *
   * @param zkw zk reference
   * @param region region to be created as offline
   * @param serverName server transition will happen on
   * @param cb
   * @param ctx
   * @throws KeeperException if unexpected zookeeper exception
   * @throws KeeperException.NodeExistsException if node already exists
   */
  public static void asyncCreateNodeOffline(ZooKeeperWatcher zkw,
      HRegionInfo region, ServerName serverName,
      final AsyncCallback.StringCallback cb, final Object ctx)
  throws KeeperException {
    LOG.debug(zkw.prefix("Async create of unassigned node for " +
      region.getEncodedName() + " with OFFLINE state"));
    RegionTransition rt =
      RegionTransition.createRegionTransition(
          EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName);
    String node = getNodeName(zkw, region.getEncodedName());
    ZKUtil.asyncCreate(zkw, node, rt.toByteArray(), cb, ctx);
  }

  /**
   * Creates or force updates an unassigned node to the OFFLINE state for the
   * specified region.
   * <p>
   * Attempts to create the node but if it exists will force it to transition to
   * and OFFLINE state.
   *
   * <p>Sets a watcher on the unassigned region node if the method is
   * successful.
   *
   * <p>This method should be used when assigning a region.
   *
   * @param zkw zk reference
   * @param region region to be created as offline
   * @param serverName server transition will happen on
   * @return the version of the znode created in OFFLINE state, -1 if
   *         unsuccessful.
   * @throws KeeperException if unexpected zookeeper exception
   * @throws KeeperException.NodeExistsException if node already exists
   */
  public static int createOrForceNodeOffline(ZooKeeperWatcher zkw,
      HRegionInfo region, ServerName serverName) throws KeeperException {
    LOG.debug(zkw.prefix("Creating (or updating) unassigned node for " +
      region.getEncodedName() + " with OFFLINE state"));
    RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_OFFLINE,
      region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY);
    byte [] data = rt.toByteArray();
    String node = getNodeName(zkw, region.getEncodedName());
    zkw.sync(node);
    int version = ZKUtil.checkExists(zkw, node);
    if (version == -1) {
      return ZKUtil.createAndWatch(zkw, node, data);
    } else {
      boolean setData = false;
      try {
        setData = ZKUtil.setData(zkw, node, data, version);
        // Setdata throws KeeperException which aborts the Master. So we are
        // catching it here.
        // If just before setting the znode to OFFLINE if the RS has made any
        // change to the
        // znode state then we need to return -1.
      } catch (KeeperException kpe) {
        LOG.info("Version mismatch while setting the node to OFFLINE state.");
        return -1;
      }
      if (!setData) {
        return -1;
      } else {
        // We successfully forced to OFFLINE, reset watch and handle if
        // the state changed in between our set and the watch
        byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
        rt = getRegionTransition(bytes);
        if (rt.getEventType() != EventType.M_ZK_REGION_OFFLINE) {
          // state changed, need to process
          return -1;
        }
      }
    }
    return version + 1;
  }

  /**
   * Deletes an existing unassigned node that is in the OPENED state for the
   * specified region.
   *
   * <p>If a node does not already exist for this region, a
   * {@link NoNodeException} will be thrown.
   *
   * <p>No watcher is set whether this succeeds or not.
   *
   * <p>Returns false if the node was not in the proper state but did exist.
   *
   * <p>This method is used during normal region transitions when a region
   * finishes successfully opening.  This is the Master acknowledging completion
   * of the specified regions transition.
   *
   * @param zkw zk reference
   * @param encodedRegionName opened region to be deleted from zk
   * @throws KeeperException if unexpected zookeeper exception
   * @throws KeeperException.NoNodeException if node does not exist
   */
  public static boolean deleteOpenedNode(ZooKeeperWatcher zkw,
      String encodedRegionName)
  throws KeeperException, KeeperException.NoNodeException {
    return deleteNode(zkw, encodedRegionName, EventType.RS_ZK_REGION_OPENED);
  }

  /**
   * Deletes an existing unassigned node that is in the OFFLINE state for the
   * specified region.
   *
   * <p>If a node does not already exist for this region, a
   * {@link NoNodeException} will be thrown.
   *
   * <p>No watcher is set whether this succeeds or not.
   *
   * <p>Returns false if the node was not in the proper state but did exist.
   *
   * <p>This method is used during master failover when the regions on an RS
   * that has died are all set to OFFLINE before being processed.
   *
   * @param zkw zk reference
   * @param encodedRegionName closed region to be deleted from zk
   * @throws KeeperException if unexpected zookeeper exception
   * @throws KeeperException.NoNodeException if node does not exist
   */
  public static boolean deleteOfflineNode(ZooKeeperWatcher zkw,
      String encodedRegionName)
  throws KeeperException, KeeperException.NoNodeException {
    return deleteNode(zkw, encodedRegionName, EventType.M_ZK_REGION_OFFLINE);
  }

  /**
   * Deletes an existing unassigned node that is in the CLOSED state for the
   * specified region.
   *
   * <p>If a node does not already exist for this region, a
   * {@link NoNodeException} will be thrown.
   *
   * <p>No watcher is set whether this succeeds or not.
   *
   * <p>Returns false if the node was not in the proper state but did exist.
   *
   * <p>This method is used during table disables when a region finishes
   * successfully closing.  This is the Master acknowledging completion
   * of the specified regions transition to being closed.
   *
   * @param zkw zk reference
   * @param encodedRegionName closed region to be deleted from zk
   * @throws KeeperException if unexpected zookeeper exception
   * @throws KeeperException.NoNodeException if node does not exist
   */
  public static boolean deleteClosedNode(ZooKeeperWatcher zkw,
      String encodedRegionName)
  throws KeeperException, KeeperException.NoNodeException {
    return deleteNode(zkw, encodedRegionName, EventType.RS_ZK_REGION_CLOSED);
  }

  /**
   * Deletes an existing unassigned node that is in the CLOSING state for the
   * specified region.
   *
   * <p>If a node does not already exist for this region, a
   * {@link NoNodeException} will be thrown.
   *
   * <p>No watcher is set whether this succeeds or not.
   *
   * <p>Returns false if the node was not in the proper state but did exist.
   *
   * <p>This method is used during table disables when a region finishes
   * successfully closing.  This is the Master acknowledging completion
   * of the specified regions transition to being closed.
   *
   * @param zkw zk reference
   * @param region closing region to be deleted from zk
   * @throws KeeperException if unexpected zookeeper exception
   * @throws KeeperException.NoNodeException if node does not exist
   */
  public static boolean deleteClosingNode(ZooKeeperWatcher zkw,
      HRegionInfo region)
  throws KeeperException, KeeperException.NoNodeException {
    String encodedRegionName = region.getEncodedName();
    return deleteNode(zkw, encodedRegionName, EventType.M_ZK_REGION_CLOSING);
  }

  /**
   * Deletes an existing unassigned node that is in the specified state for the
   * specified region.
   *
   * <p>If a node does not already exist for this region, a
   * {@link NoNodeException} will be thrown.
   *
   * <p>No watcher is set whether this succeeds or not.
   *
   * <p>Returns false if the node was not in the proper state but did exist.
   *
   * <p>This method is used when a region finishes opening/closing.
   * The Master acknowledges completion
   * of the specified regions transition to being closed/opened.
   *
   * @param zkw zk reference
   * @param encodedRegionName region to be deleted from zk
   * @param expectedState state region must be in for delete to complete
   * @throws KeeperException if unexpected zookeeper exception
   * @throws KeeperException.NoNodeException if node does not exist
   */
  public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName,
      EventType expectedState)
  throws KeeperException, KeeperException.NoNodeException {
    return deleteNode(zkw, encodedRegionName, expectedState, -1);
  }

  /**
   * Deletes an existing unassigned node that is in the specified state for the
   * specified region.
   *
   * <p>If a node does not already exist for this region, a
   * {@link NoNodeException} will be thrown.
   *
   * <p>No watcher is set whether this succeeds or not.
   *
   * <p>Returns false if the node was not in the proper state but did exist.
   *
   * <p>This method is used when a region finishes opening/closing.
   * The Master acknowledges completion
   * of the specified regions transition to being closed/opened.
   *
   * @param zkw zk reference
   * @param encodedRegionName region to be deleted from zk
   * @param expectedState state region must be in for delete to complete
   * @param expectedVersion of the znode that is to be deleted.
   *        If expectedVersion need not be compared while deleting the znode
   *        pass -1
   * @throws KeeperException if unexpected zookeeper exception
   * @throws KeeperException.NoNodeException if node does not exist
   */
  public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName,
      EventType expectedState, int expectedVersion)
  throws KeeperException, KeeperException.NoNodeException {
    LOG.debug(zkw.prefix("Deleting existing unassigned " +
      "node for " + encodedRegionName + " that is in expected state " + expectedState));
    String node = getNodeName(zkw, encodedRegionName);
    zkw.sync(node);
    Stat stat = new Stat();
    byte [] bytes = ZKUtil.getDataNoWatch(zkw, node, stat);
    if (bytes == null) {
      // If it came back null, node does not exist.
      throw KeeperException.create(Code.NONODE);
    }
    RegionTransition rt = getRegionTransition(bytes);
    EventType et = rt.getEventType();
    if (!et.equals(expectedState)) {
      LOG.warn(zkw.prefix("Attempting to delete unassigned node " + encodedRegionName + " in " +
        expectedState + " state but node is in " + et + " state"));
      return false;
    }
    if (expectedVersion != -1
        && stat.getVersion() != expectedVersion) {
      LOG.warn("The node " + encodedRegionName + " we are trying to delete is not" +
        " the expected one. Got a version mismatch");
      return false;
    }
    if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) {
      LOG.warn(zkw.prefix("Attempting to delete " +
          "unassigned node " + encodedRegionName + " in " + expectedState +
          " state but after verifying state, we got a version mismatch"));
      return false;
    }
    LOG.debug(zkw.prefix("Successfully deleted unassigned node for region " +
        encodedRegionName + " in expected state " + expectedState));
    return true;
  }

  /**
   * Deletes all unassigned nodes regardless of their state.
   *
   * <p>No watchers are set.
   *
   * <p>This method is used by the Master during cluster startup to clear out
   * any existing state from other cluster runs.
   *
   * @param zkw zk reference
   * @throws KeeperException if unexpected zookeeper exception
   */
  public static void deleteAllNodes(ZooKeeperWatcher zkw)
  throws KeeperException {
    LOG.debug(zkw.prefix("Deleting any existing unassigned nodes"));
    ZKUtil.deleteChildrenRecursively(zkw, zkw.assignmentZNode);
  }

  /**
   * Creates a new unassigned node in the CLOSING state for the specified
   * region.
   *
   * <p>Does not transition nodes from any states.  If a node already exists
   * for this region, a {@link NodeExistsException} will be thrown.
   *
   * <p>If creation is successful, returns the version number of the CLOSING
   * node created.
   *
   * <p>Set a watch.
   *
   * <p>This method should only be used by a Master when initiating a
   * close of a region before sending a close request to the region server.
   *
   * @param zkw zk reference
   * @param region region to be created as closing
   * @param serverName server transition will happen on
   * @return version of node after transition, -1 if unsuccessful transition
   * @throws KeeperException if unexpected zookeeper exception
   * @throws KeeperException.NodeExistsException if node already exists
   */
  public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region,
      ServerName serverName)
  throws KeeperException, KeeperException.NodeExistsException {
    LOG.debug(zkw.prefix("Creating unassigned node for " +
      region.getEncodedName() + " in a CLOSING state"));
    RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_CLOSING,
      region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY);
    String node = getNodeName(zkw, region.getEncodedName());
    return ZKUtil.createAndWatch(zkw, node, rt.toByteArray());
  }

  // RegionServer methods

  /**
   * Transitions an existing unassigned node for the specified region which is
   * currently in the CLOSING state to be in the CLOSED state.
   *
   * <p>Does not transition nodes from other states.  If for some reason the
   * node could not be transitioned, the method returns -1.  If the transition
   * is successful, the version of the node after transition is returned.
   *
   * <p>This method can fail and return false for three different reasons:
   * <ul><li>Unassigned node for this region does not exist</li>
   * <li>Unassigned node for this region is not in CLOSING state</li>
   * <li>After verifying CLOSING state, update fails because of wrong version
   * (someone else already transitioned the node)</li>
   * </ul>
   *
   * <p>Does not set any watches.
   *
   * <p>This method should only be used by a RegionServer when initiating a
   * close of a region after receiving a CLOSE RPC from the Master.
   *
   * @param zkw zk reference
   * @param region region to be transitioned to closed
   * @param serverName server transition happens on
   * @return version of node after transition, -1 if unsuccessful transition
   * @throws KeeperException if unexpected zookeeper exception
   */
  public static int transitionNodeClosed(ZooKeeperWatcher zkw,
      HRegionInfo region, ServerName serverName, int expectedVersion)
  throws KeeperException {
    return transitionNode(zkw, region, serverName,
        EventType.M_ZK_REGION_CLOSING,
        EventType.RS_ZK_REGION_CLOSED, expectedVersion);
  }

  /**
   * Transitions an existing unassigned node for the specified region which is
   * currently in the OFFLINE state to be in the OPENING state.
   *
   * <p>Does not transition nodes from other states.  If for some reason the
   * node could not be transitioned, the method returns -1.  If the transition
   * is successful, the version of the node written as OPENING is returned.
   *
   * <p>This method can fail and return -1 for three different reasons:
   * <ul><li>Unassigned node for this region does not exist</li>
   * <li>Unassigned node for this region is not in OFFLINE state</li>
   * <li>After verifying OFFLINE state, update fails because of wrong version
   * (someone else already transitioned the node)</li>
   * </ul>
   *
   * <p>Does not set any watches.
   *
   * <p>This method should only be used by a RegionServer when initiating an
   * open of a region after receiving an OPEN RPC from the Master.
   *
   * @param zkw zk reference
   * @param region region to be transitioned to opening
   * @param serverName server transition happens on
   * @return version of node after transition, -1 if unsuccessful transition
   * @throws KeeperException if unexpected zookeeper exception
   */
  public static int transitionNodeOpening(ZooKeeperWatcher zkw,
      HRegionInfo region, ServerName serverName)
  throws KeeperException {
    return transitionNodeOpening(zkw, region, serverName,
      EventType.M_ZK_REGION_OFFLINE);
  }

  public static int transitionNodeOpening(ZooKeeperWatcher zkw,
      HRegionInfo region, ServerName serverName, final EventType beginState)
  throws KeeperException {
    return transitionNode(zkw, region, serverName, beginState,
      EventType.RS_ZK_REGION_OPENING, -1);
  }

  /**
   * Retransitions an existing unassigned node for the specified region which is
   * currently in the OPENING state to be in the OPENING state.
   *
   * <p>Does not transition nodes from other states.  If for some reason the
   * node could not be transitioned, the method returns -1.  If the transition
   * is successful, the version of the node rewritten as OPENING is returned.
   *
   * <p>This method can fail and return -1 for three different reasons:
   * <ul><li>Unassigned node for this region does not exist</li>
   * <li>Unassigned node for this region is not in OPENING state</li>
   * <li>After verifying OPENING state, update fails because of wrong version
   * (someone else already transitioned the node)</li>
   * </ul>
   *
   * <p>Does not set any watches.
   *
   * <p>This method should only be used by a RegionServer when initiating an
   * open of a region after receiving an OPEN RPC from the Master.
   *
   * @param zkw zk reference
   * @param region region to be transitioned to opening
   * @param serverName server transition happens on
   * @param updateZNode write the znode. If false, we only check.
   * @return version of node after transition, -1 if unsuccessful transition
   * @throws KeeperException if unexpected zookeeper exception
   */
  public static int retransitionNodeOpening(ZooKeeperWatcher zkw,
      HRegionInfo region, ServerName serverName, int expectedVersion, boolean updateZNode)
  throws KeeperException {

    String encoded = region.getEncodedName();
    if(LOG.isDebugEnabled()) {
      LOG.debug(zkw.prefix("Attempting to retransition the opening state of node " +
          HRegionInfo.prettyPrint(encoded)));
    }

    String node = getNodeName(zkw, encoded);
    zkw.sync(node);

    // Read existing data of the node
    Stat stat = new Stat();
    byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat);
    if (existingBytes == null) {
      // Node no longer exists.  Return -1. It means unsuccessful transition.
      return -1;
    }
    RegionTransition rt = getRegionTransition(existingBytes);

    // Verify it is the expected version
    if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
      LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " +
          "unassigned node for " + encoded + " failed, " +
          "the node existed but was version " + stat.getVersion() +
          " not the expected version " + expectedVersion));
      return -1;
    }

    // Verify it is in expected state
    EventType et = rt.getEventType();
    if (!et.equals(EventType.RS_ZK_REGION_OPENING)) {
      String existingServer = (rt.getServerName() == null)
          ? "<unknown>" : rt.getServerName().toString();
      LOG.warn(zkw.prefix("Attempt to retransition the opening state of the unassigned node for "
          + encoded + " failed, the node existed but was in the state " + et +
          " set by the server " + existingServer));
      return -1;
    }

    // We don't have to write the new state: the check is complete.
    if (!updateZNode){
      return expectedVersion;
    }

    // Write new data, ensuring data has not changed since we last read it
    try {
      rt = RegionTransition.createRegionTransition(
          EventType.RS_ZK_REGION_OPENING, region.getRegionName(), serverName, null);
      if(!ZKUtil.setData(zkw, node, rt.toByteArray(), stat.getVersion())) {
        LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " +
            "unassigned node for " + encoded + " failed, " +
            "the node existed and was in the expected state but then when " +
            "setting data we got a version mismatch"));
        return -1;
      }
      if(LOG.isDebugEnabled()) {
        LOG.debug(zkw.prefix("Successfully retransition the opening state of node " + encoded));
      }
      return stat.getVersion() + 1;
    } catch (KeeperException.NoNodeException nne) {
      LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " +
          "unassigned node for " + encoded + " failed, " +
          "the node existed and was in the expected state but then when " +
          "setting data it no longer existed"));
      return -1;
    }
  }

  /**
   * Transitions an existing unassigned node for the specified region which is
   * currently in the OPENING state to be in the OPENED state.
   *
   * <p>Does not transition nodes from other states.  If for some reason the
   * node could not be transitioned, the method returns -1.  If the transition
   * is successful, the version of the node after transition is returned.
   *
   * <p>This method can fail and return false for three different reasons:
   * <ul><li>Unassigned node for this region does not exist</li>
   * <li>Unassigned node for this region is not in OPENING state</li>
   * <li>After verifying OPENING state, update fails because of wrong version
   * (this should never actually happen since an RS only does this transition
   * following a transition to OPENING.  if two RS are conflicting, one would
   * fail the original transition to OPENING and not this transition)</li>
   * </ul>
   *
   * <p>Does not set any watches.
   *
   * <p>This method should only be used by a RegionServer when completing the
   * open of a region.
   *
   * @param zkw zk reference
   * @param region region to be transitioned to opened
   * @param serverName server transition happens on
   * @return version of node after transition, -1 if unsuccessful transition
   * @throws KeeperException if unexpected zookeeper exception
   */
  public static int transitionNodeOpened(ZooKeeperWatcher zkw,
      HRegionInfo region, ServerName serverName, int expectedVersion)
  throws KeeperException {
    return transitionNode(zkw, region, serverName,
        EventType.RS_ZK_REGION_OPENING,
        EventType.RS_ZK_REGION_OPENED, expectedVersion);
  }

  /**
   *
   * @param zkw zk reference
   * @param region region to be closed
   * @param expectedVersion expected version of the znode
   * @return true if the znode exists, has the right version and the right state. False otherwise.
   * @throws KeeperException
   */
  public static boolean checkClosingState(ZooKeeperWatcher zkw, HRegionInfo region,
                                          int expectedVersion) throws KeeperException {

    final String encoded = getNodeName(zkw, region.getEncodedName());
    zkw.sync(encoded);

    // Read existing data of the node
    Stat stat = new Stat();
    byte[] existingBytes = ZKUtil.getDataNoWatch(zkw, encoded, stat);

    if (existingBytes == null) {
      LOG.warn(zkw.prefix("Attempt to check the " +
          "closing node for " + encoded +
          ". The node does not exist"));
      return false;
    }

    if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
      LOG.warn(zkw.prefix("Attempt to check the " +
          "closing node for " + encoded +
          ". The node existed but was version " + stat.getVersion() +
          " not the expected version " + expectedVersion));
      return false;
    }

    RegionTransition rt = getRegionTransition(existingBytes);

    if (!EventType.M_ZK_REGION_CLOSING.equals(rt.getEventType())) {
      LOG.warn(zkw.prefix("Attempt to check the " +
          "closing node for " + encoded +
          ". The node existed but was in an unexpected state: " + rt.getEventType()));
      return false;
    }

    return true;
  }

  /**
   * Method that actually performs unassigned node transitions.
   *
   * <p>Attempts to transition the unassigned node for the specified region
   * from the expected state to the state in the specified transition data.
   *
   * <p>Method first reads existing data and verifies it is in the expected
   * state.  If the node does not exist or the node is not in the expected
   * state, the method returns -1.  If the transition is successful, the
   * version number of the node following the transition is returned.
   *
   * <p>If the read state is what is expected, it attempts to write the new
   * state and data into the node.  When doing this, it includes the expected
   * version (determined when the existing state was verified) to ensure that
   * only one transition is successful.  If there is a version mismatch, the
   * method returns -1.
   *
   * <p>If the write is successful, no watch is set and the method returns true.
   *
   * @param zkw zk reference
   * @param region region to be transitioned to opened
   * @param serverName server transition happens on
   * @param endState state to transition node to if all checks pass
   * @param beginState state the node must currently be in to do transition
   * @param expectedVersion expected version of data before modification, or -1
   * @return version of node after transition, -1 if unsuccessful transition
   * @throws KeeperException if unexpected zookeeper exception
   */
  public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
      ServerName serverName, EventType beginState, EventType endState,
      int expectedVersion)
  throws KeeperException {
    return transitionNode(zkw, region, serverName, beginState, endState, expectedVersion, null);
  }


  public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
      ServerName serverName, EventType beginState, EventType endState,
      int expectedVersion, final byte [] payload)
  throws KeeperException {
    String encoded = region.getEncodedName();
    if(LOG.isDebugEnabled()) {
      LOG.debug(zkw.prefix("Transitioning " + HRegionInfo.prettyPrint(encoded) +
        " from " + beginState.toString() + " to " + endState.toString()));
    }

    String node = getNodeName(zkw, encoded);
    zkw.sync(node);

    // Read existing data of the node
    Stat stat = new Stat();
    byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat);
    if (existingBytes == null) {
      // Node no longer exists.  Return -1. It means unsuccessful transition.
      return -1;
    }

    // Verify it is the expected version
    if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
      LOG.warn(zkw.prefix("Attempt to transition the " +
        "unassigned node for " + encoded +
        " from " + beginState + " to " + endState + " failed, " +
        "the node existed but was version " + stat.getVersion() +
        " not the expected version " + expectedVersion));
        return -1;
    }

    if (beginState.equals(EventType.M_ZK_REGION_OFFLINE)
        && endState.equals(EventType.RS_ZK_REGION_OPENING)
        && expectedVersion == -1 && stat.getVersion() != 0) {
      // the below check ensures that double assignment doesnot happen.
      // When the node is created for the first time then the expected version
      // that is passed will be -1 and the version in znode will be 0.
      // In all other cases the version in znode will be > 0.
      LOG.warn(zkw.prefix("Attempt to transition the " + "unassigned node for "
          + encoded + " from " + beginState + " to " + endState + " failed, "
          + "the node existed but was version " + stat.getVersion()
          + " not the expected version " + expectedVersion));
      return -1;
    }

    RegionTransition rt = getRegionTransition(existingBytes);

    // Verify the server transition happens on is not changed
    if (!rt.getServerName().equals(serverName)) {
      LOG.warn(zkw.prefix("Attempt to transition the " +
        "unassigned node for " + encoded +
        " from " + beginState + " to " + endState + " failed, " +
        "the server that tried to transition was " + serverName +
        " not the expected " + rt.getServerName()));
      return -1;
    }

    // Verify it is in expected state
    EventType et = rt.getEventType();
    if (!et.equals(beginState)) {
      String existingServer = (rt.getServerName() == null)
        ? "<unknown>" : rt.getServerName().toString();
      LOG.warn(zkw.prefix("Attempt to transition the unassigned node for " + encoded
        + " from " + beginState + " to " + endState + " failed, the node existed but"
        + " was in the state " + et + " set by the server " + existingServer));
      return -1;
    }

    // Write new data, ensuring data has not changed since we last read it
    try {
      rt = RegionTransition.createRegionTransition(
          endState, region.getRegionName(), serverName, payload);
      if(!ZKUtil.setData(zkw, node, rt.toByteArray(), stat.getVersion())) {
        LOG.warn(zkw.prefix("Attempt to transition the " +
        "unassigned node for " + encoded +
        " from " + beginState + " to " + endState + " failed, " +
        "the node existed and was in the expected state but then when " +
        "setting data we got a version mismatch"));
        return -1;
      }
      if(LOG.isDebugEnabled()) {
        LOG.debug(zkw.prefix("Successfully transitioned node " + encoded +
          " from " + beginState + " to " + endState));
      }
      return stat.getVersion() + 1;
    } catch (KeeperException.NoNodeException nne) {
      LOG.warn(zkw.prefix("Attempt to transition the " +
        "unassigned node for " + encoded +
        " from " + beginState + " to " + endState + " failed, " +
        "the node existed and was in the expected state but then when " +
        "setting data it no longer existed"));
      return -1;
    }
  }

  private static RegionTransition getRegionTransition(final byte [] bytes) throws KeeperException {
    try {
      return RegionTransition.parseFrom(bytes);
    } catch (DeserializationException e) {
      // Convert to a zk exception for now.  Otherwise have to change API
      throw ZKUtil.convert(e);
    }
  }

  /**
   * Gets the current data in the unassigned node for the specified region name
   * or fully-qualified path.
   *
   * <p>Returns null if the region does not currently have a node.
   *
   * <p>Sets a watch on the node if the node exists.
   *
   * @param zkw zk reference
   * @param pathOrRegionName fully-specified path or region name
   * @return znode content
   * @throws KeeperException if unexpected zookeeper exception
   */
  public static byte [] getData(ZooKeeperWatcher zkw,
      String pathOrRegionName)
  throws KeeperException {
    String node = getPath(zkw, pathOrRegionName);
    return ZKUtil.getDataAndWatch(zkw, node);
  }

  /**
   * Gets the current data in the unassigned node for the specified region name
   * or fully-qualified path.
   *
   * <p>Returns null if the region does not currently have a node.
   *
   * <p>Sets a watch on the node if the node exists.
   *
   * @param zkw zk reference
   * @param pathOrRegionName fully-specified path or region name
   * @param stat object to populate the version.
   * @return znode content
   * @throws KeeperException if unexpected zookeeper exception
   */
  public static byte [] getDataAndWatch(ZooKeeperWatcher zkw,
      String pathOrRegionName, Stat stat)
  throws KeeperException {
    String node = getPath(zkw, pathOrRegionName);
    return ZKUtil.getDataAndWatch(zkw, node, stat);
  }

  /**
   * Gets the current data in the unassigned node for the specified region name
   * or fully-qualified path.
   *
   * <p>Returns null if the region does not currently have a node.
   *
   * <p>Does not set a watch.
   *
   * @param zkw zk reference
   * @param pathOrRegionName fully-specified path or region name
   * @param stat object to store node info into on getData call
   * @return znode content
   * @throws KeeperException if unexpected zookeeper exception
   */
  public static byte [] getDataNoWatch(ZooKeeperWatcher zkw,
      String pathOrRegionName, Stat stat)
  throws KeeperException {
    String node = getPath(zkw, pathOrRegionName);
    return ZKUtil.getDataNoWatch(zkw, node, stat);
  }

  /**
   * @param zkw
   * @param pathOrRegionName
   * @return Path to znode
   */
  public static String getPath(final ZooKeeperWatcher zkw, final String pathOrRegionName) {
    return pathOrRegionName.startsWith("/")? pathOrRegionName : getNodeName(zkw, pathOrRegionName);
  }

  /**
   * Get the version of the specified znode
   * @param zkw zk reference
   * @param region region's info
   * @return the version of the znode, -1 if it doesn't exist
   * @throws KeeperException
   */
  public static int getVersion(ZooKeeperWatcher zkw, HRegionInfo region)
    throws KeeperException {
    String znode = getNodeName(zkw, region.getEncodedName());
    return ZKUtil.checkExists(zkw, znode);
  }

  /**
   * Delete the assignment node regardless of its current state.
   * <p>
   * Fail silent even if the node does not exist at all.
   * @param watcher
   * @param regionInfo
   * @throws KeeperException
   */
  public static void deleteNodeFailSilent(ZooKeeperWatcher watcher,
      HRegionInfo regionInfo)
  throws KeeperException {
    String node = getNodeName(watcher, regionInfo.getEncodedName());
    ZKUtil.deleteNodeFailSilent(watcher, node);
  }

  /**
   * Blocks until there are no node in regions in transition.
   * <p>
   * Used in testing only.
   * @param zkw zk reference
   * @throws KeeperException
   * @throws InterruptedException
   */
  public static void blockUntilNoRIT(ZooKeeperWatcher zkw)
  throws KeeperException, InterruptedException {
    while (ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
      List<String> znodes =
        ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
      if (znodes != null && !znodes.isEmpty()) {
        LOG.debug("Waiting on RIT: " + znodes);
      }
      Thread.sleep(100);
    }
  }

  /**
   * Blocks until there is at least one node in regions in transition.
   * <p>
   * Used in testing only.
   * @param zkw zk reference
   * @throws KeeperException
   * @throws InterruptedException
   */
  public static void blockUntilRIT(ZooKeeperWatcher zkw)
  throws KeeperException, InterruptedException {
    while (!ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
      List<String> znodes =
        ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
      if (znodes == null || znodes.isEmpty()) {
        LOG.debug("No RIT in ZK");
      }
      Thread.sleep(100);
    }
  }

  /**
   * Presume bytes are serialized unassigned data structure
   * @param znodeBytes
   * @return String of the deserialized znode bytes.
   */
  static String toString(final byte[] znodeBytes) {
    // This method should not exist.  Used by ZKUtil stringifying RegionTransition.  Have the
    // method in here so RegionTransition does not leak into ZKUtil.
    try {
      RegionTransition rt = RegionTransition.parseFrom(znodeBytes);
      return rt.toString();
    } catch (DeserializationException e) {
      return "";
    }
  }
}
TOP

Related Classes of org.apache.hadoop.hbase.zookeeper.ZKAssign

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.