Package com.alibaba.wasp.fserver.handler

Source Code of com.alibaba.wasp.fserver.handler.OpenEntityGroupHandler

/**
* Copyright 2010 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.alibaba.wasp.fserver.handler;

import com.alibaba.wasp.EntityGroupInfo;
import com.alibaba.wasp.Server;
import com.alibaba.wasp.executor.EventHandler;
import com.alibaba.wasp.fserver.EntityGroup;
import com.alibaba.wasp.fserver.FServerServices;
import com.alibaba.wasp.meta.FTable;
import com.alibaba.wasp.zookeeper.ZKAssign;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.util.CancelableProgressable;
import org.apache.zookeeper.KeeperException;

import java.io.IOException;
import java.util.concurrent.atomic.AtomicBoolean;

/**
* Handles opening of a entityGroup on a FServer.
*/
public class OpenEntityGroupHandler extends EventHandler {
  // NOTE on priorities shutting down. There are none for close. There are some
  // for open. I think that is right. On shutdown, we want the meta to close
  // before root and both to close after the user entityGroups have closed. What
  // about the case where master tells us to shutdown a catalog entityGroup and
  // we
  // have a running queue of user entityGroups to close?
  private static final Log LOG = LogFactory
      .getLog(OpenEntityGroupHandler.class);

  private final int FAILED = -1;
  int expectedVersion = FAILED;

  // We get version of our znode at start of open process and monitor it across
  // the total open. We'll fail the open if someone hijacks our znode; we can
  // tell this has happened if version is not as expected.
  private volatile int version = -1;

  private final FServerServices fsServices;

  private final EntityGroupInfo entityGroupInfo;

  private final FTable table;

  // version of the offline node that was set by the master
  private volatile int versionOfOfflineNode = -1;

  // If true, the hosting server is aborting. EntityGroup close process is
  // different
  // when we are aborting.

  // Update zk on closing transitions. Usually true. Its false if cluster
  // is going down. In this case, its the rs that initiates the entityGroup
  // close -- not the master process so state up in zk will unlikely be
  // CLOSING.

   /**
   * Default base class constructor.
   */
  public OpenEntityGroupHandler(final Server server,
      final FServerServices fsServices, EntityGroupInfo entityGroupInfo,
      FTable table) {
    this(server, fsServices, entityGroupInfo, table, EventType.M_FSERVER_OPEN_ENTITYGROUP, -1);
  }

  public OpenEntityGroupHandler(final Server server,
      final FServerServices fsServices, EntityGroupInfo entityGroupInfo,
      final FTable table, EventType eventType, int versionOfOfflineNode) {
    super(server, eventType);
    this.fsServices = fsServices;
    this.entityGroupInfo = entityGroupInfo;
    this.table = table;
    this.versionOfOfflineNode = versionOfOfflineNode;
  }
 
  public OpenEntityGroupHandler(final Server server,
      final FServerServices fsServices, EntityGroupInfo entityGroupInfo,
      FTable ftd, int versionOfOfflineNode) {
    this(server, fsServices, entityGroupInfo, ftd, EventType.M_FSERVER_OPEN_ENTITYGROUP,
        versionOfOfflineNode);
  }
 
  public EntityGroupInfo getEntityGroupInfo() {
    return entityGroupInfo;
  }

  @Override
  public void process() throws IOException {
    try {
      final String name = entityGroupInfo.getEntityGroupNameAsString();
      if (this.server.isStopped() || this.fsServices.isStopping()) {
        return;
      }
      final String encodedName = entityGroupInfo.getEncodedName();

      // Check that this entityGroup is not already online
      EntityGroup entityGroup = this.fsServices
          .getFromOnlineEntityGroups(encodedName);

      // If fails, just return. Someone stole the entityGroup from under us.
      // Calling transitionZookeeperOfflineToOpening initalizes this.version.
      if (!transitionZookeeperOfflineToOpening(encodedName,
          versionOfOfflineNode)) {
        LOG.warn("EntityGroup was hijacked? It no longer exists, encodedName="
            + encodedName);
        return;
      }

      // Open entityGroup. After a successful open, failures in subsequent
      // processing needs to do a close as part of cleanup.
      entityGroup = openEntityGroup();
      if (entityGroup == null) {
        tryTransitionToFailedOpen(entityGroupInfo);
        return;
      }
      boolean failed = true;
      if (tickleOpening("post_entitygroup_open")) {
        if (updateMeta(entityGroup)) {
          failed = false;
        }
      }
      if (failed || this.server.isStopped() || this.fsServices.isStopping()) {
        cleanupFailedOpen(entityGroup);
        tryTransitionToFailedOpen(entityGroupInfo);
        return;
      }

      if (!transitionToOpened(entityGroup)) {
        // If we fail to transition to opened, it's because of one of two cases:
        // (a) we lost our ZK lease
        // OR (b) someone else opened the entityGroup before us
        // In either case, we don't need to transition to FAILED_OPEN state.
        // In case (a), the Master will process us as a dead server. In case
        // (b) the entityGroup is already being handled elsewhere anyway.
        cleanupFailedOpen(entityGroup);
        return;
      }
      // Successful entityGroup open, and add it to OnlineEntityGroups
      this.fsServices.addToOnlineEntityGroups(entityGroup);

      // Done! Successful entityGroup open
      LOG.debug("Opened " + name + " on server:" + this.server.getServerName());
    } finally {
      this.fsServices.getEntityGroupsInTransitionInFS().remove(
          this.entityGroupInfo.getEncodedNameAsBytes());
    }
  }

  private void cleanupFailedOpen(EntityGroup entityGroup) throws IOException {
    if (entityGroup != null)
      entityGroup.close();

  }

  /**
   * Update ZK, ROOT or META. This can take a while if for example the .META. is
   * not available -- if server hosting .META. crashed and we are waiting on it
   * to come back -- so run in a thread and keep updating znode state meantime
   * so master doesn't timeout our entityGroup-in-transition. Caller must
   * cleanup entityGroup if this fails.
   */
  boolean updateMeta(final EntityGroup entityGroup) {
    if (this.server.isStopped() || this.fsServices.isStopping()) {
      return false;
    }
    // Object we do wait/notify on. Make it boolean. If set, we're done.
    // Else, wait.
    final AtomicBoolean signaller = new AtomicBoolean(false);
    PostOpenDeployTasksThread t = new PostOpenDeployTasksThread(entityGroup,
        this.server, this.fsServices, signaller);
    t.start();
    int assignmentTimeout = this.server.getConfiguration().getInt(
        "wasp.master.assignment.timeoutmonitor.period", 10000);
    // Total timeout for meta edit. If we fail adding the edit then close out
    // the entityGroup and let it be assigned elsewhere.
    long timeout = assignmentTimeout * 10;
    long now = System.currentTimeMillis();
    long endTime = now + timeout;
    // Let our period at which we update OPENING state to be be 1/3rd of the
    // entityGroups-in-transition timeout period.
    long period = Math.max(1, assignmentTimeout / 3);
    long lastUpdate = now;
    boolean tickleOpening = true;
    while (!signaller.get() && t.isAlive() && !this.server.isStopped()
        && !this.fsServices.isStopping() && (endTime > now)) {
      long elapsed = now - lastUpdate;
      if (elapsed > period) {
        // Only tickle OPENING if postOpenDeployTasks is taking some time.
        lastUpdate = now;
        tickleOpening = tickleOpening("post_open_deploy");
      }
      synchronized (signaller) {
        try {
          signaller.wait(period);
        } catch (InterruptedException e) {
          // Go to the loop check.
        }
      }
      now = System.currentTimeMillis();
    }
    // Is thread still alive? We may have left above loop because server is
    // stopping or we timed out the edit. Is so, interrupt it.
    if (t.isAlive()) {
      if (!signaller.get()) {
        // Thread still running; interrupt
        LOG.debug("Interrupting thread " + t);
        t.interrupt();
      }
      try {
        t.join();
      } catch (InterruptedException ie) {
        LOG.warn("Interrupted joining "
            + entityGroup.getEntityGroupInfo().getEntityGroupNameAsString(), ie);
        Thread.currentThread().interrupt();
      }
    }

    // Was there an exception opening the entityGroup? This should trigger on
    // InterruptedException too. If so, we failed. Even if tickle opening fails
    // then it is a failure.
    return ((!Thread.interrupted() && t.getException() == null) && tickleOpening);
  }

  /**
   * Thread to run entityGroup post open tasks. Call {@link #getException()}
   * after the thread finishes to check for exceptions running
   * {@link com.alibaba.wasp.fserver.FServerServices#postOpenDeployTasks(com.alibaba.wasp.fserver.EntityGroup, boolean)} .
   */
  static class PostOpenDeployTasksThread extends Thread {
    private Exception exception = null;
    private final Server server;
    private final FServerServices services;
    private final EntityGroup entityGroup;
    private final AtomicBoolean signaller;

    PostOpenDeployTasksThread(final EntityGroup entityGroup,
        final Server server, final FServerServices services,
        final AtomicBoolean signaller) {
      super("PostOpenDeployTasks:"
          + entityGroup.getEntityGroupInfo().getEncodedName());
      this.setDaemon(true);
      this.server = server;
      this.services = services;
      this.entityGroup = entityGroup;
      this.signaller = signaller;
    }

    public void run() {
      try {
        this.services.postOpenDeployTasks(this.entityGroup, false);
      } catch (Exception e) {
        LOG.warn("Exception running postOpenDeployTasks; entityGroup="
            + this.entityGroup.getEntityGroupInfo().getEncodedName(), e);
        this.exception = e;
      }
      // We're done. Set flag then wake up anyone waiting on thread to complete.
      this.signaller.set(true);
      synchronized (this.signaller) {
        this.signaller.notify();
      }
    }

    /**
     * @return Null or the run exception; call this method after thread is done.
     */
    Exception getException() {
      return this.exception;
    }
  }

  /**
   * Transition ZK node from OFFLINE to OPENING.
   *
   * @param encodedName
   *          Name of the znode file (EntityGroup encodedName is the znode
   *          name).
   * @param versionOfOfflineNode
   *          - version Of OfflineNode that needs to be compared before changing
   *          the node's state from OFFLINE
   * @return True if successful transition.
   */
  boolean transitionZookeeperOfflineToOpening(final String encodedName,
      int versionOfOfflineNode) {
    try {
      // Initialize the znode version.
      this.version = ZKAssign.transitionNode(server.getZooKeeper(),
          entityGroupInfo, server.getServerName(),
          EventType.M_ZK_ENTITYGROUP_OFFLINE,
          EventType.FSERVER_ZK_ENTITYGROUP_OPENING, versionOfOfflineNode);
    } catch (KeeperException e) {
      LOG.error("Error transition from OFFLINE to OPENING for entityGroup="
          + encodedName, e);
    }
    boolean b = isGoodVersion();
    if (!b) {
      LOG.warn("Failed transition from OFFLINE to OPENING for entityGroup="
          + encodedName);
    }
    return b;
  }

  /**
   * @param entityGroup
   *          EntityGroup we're working on.
   * @return whether znode is successfully transitioned to OPENED state.
   * @throws java.io.IOException
   */
  private boolean transitionToOpened(final EntityGroup entityGroup)
      throws IOException {
    boolean result = false;
    EntityGroupInfo egi = entityGroup.getEntityGroupInfo();
    final String name = egi.getEntityGroupNameAsString();
    // Finally, Transition ZK node to OPENED
    try {
      if (ZKAssign.transitionNodeOpened(this.server.getZooKeeper(), egi,
          this.server.getServerName(), this.version) == -1) {
        LOG.warn("Completed the OPEN of entityGroup "
            + name
            + " but when transitioning from "
            + " OPENING to OPENED got a version mismatch, someone else clashed "
            + "so now unassigning -- closing entityGroup on server: "
            + this.server.getServerName());
      } else {
        LOG.debug("entityGroup transitioned to opened in zookeeper: "
            + entityGroup.getEntityGroupInfo() + ", server: "
            + this.server.getServerName());
        result = true;
      }
    } catch (KeeperException e) {
      LOG.error("Failed transitioning node " + name
          + " from OPENING to OPENED -- closing entityGroup", e);
    }
    return result;
  }

  /**
   * @param egi
   *          we're working on. This is not guaranteed to succeed, we just do
   *          our best.
   * @return whether znode is successfully transitioned to FAILED_OPEN state.
   */
  private boolean tryTransitionToFailedOpen(final EntityGroupInfo egi) {
    boolean result = false;
    final String name = egi.getEntityGroupNameAsString();
    try {
      LOG.info("Opening of entityGroup " + egi
          + " failed, marking as FAILED_OPEN in ZK");
      if (ZKAssign.transitionNode(this.server.getZooKeeper(), egi,
          this.server.getServerName(),
          EventType.FSERVER_ZK_ENTITYGROUP_OPENING,
          EventType.FSERVER_ZK_ENTITYGROUP_FAILED_OPEN, this.version) == -1) {
        LOG.warn("Unable to mark entityGroup " + egi + " as FAILED_OPEN. "
            + "It's likely that the master already timed out this open "
            + "attempt, and thus another RS already has the entityGroup.");
      } else {
        result = true;
      }
    } catch (KeeperException e) {
      LOG.error("Failed transitioning node " + name
          + " from OPENING to FAILED_OPEN", e);
    }
    return result;
  }

  /**
   * @return Instance of EntityGroup if successful open else null.
   */
  EntityGroup openEntityGroup() {
    EntityGroup entityGroup = null;
    try {
      // Instantiate the entityGroup. This also periodically tickles our zk
      // OPENING
      // state so master doesn't timeout this entityGroup in transition.
      entityGroup = EntityGroup.openEntityGroup(this.entityGroupInfo,
          this.table, this.server.getConfiguration(), this.fsServices,
          new CancelableProgressable() {
            public boolean progress() {
              // We may lose the znode ownership during the open. Currently its
              // too hard interrupting ongoing entityGroup open. Just let it
              // complete
              // and check we still have the znode after entityGroup open.
              return tickleOpening("open_entitygroup_progress");
            }
          });
    } catch (Throwable t) {
      // We failed open. Our caller will see the 'null' return value
      // and transition the node back to FAILED_OPEN. If that fails,
      // we rely on the Timeout Monitor in the master to reassign.
      LOG.error(
          "Failed open of entityGroup="
              + this.entityGroupInfo.getEntityGroupNameAsString()
              + ", starting to roll back the global memstore size.", t);
    }
    return entityGroup;
  }

  /**
   * Transition ZK node to CLOSED
   *
   * @param expectedVersion
   * @return If the state is set successfully
   */
  private boolean setClosedState(final int expectedVersion,
      final EntityGroupInfo entityGroupInfo) {
    return false;
  }

  /**
   * Update our OPENING state in zookeeper. Do this so master doesn't timeout
   * this entityGroup-in-transition.
   *
   * @param context
   *          Some context to add to logs if failure
   * @return True if successful transition.
   */
  boolean tickleOpening(final String context) {
    // If previous checks failed... do not try again.
    if (!isGoodVersion())
      return false;
    String encodedName = this.entityGroupInfo.getEncodedName();
    try {
      this.version = ZKAssign.retransitionNodeOpening(server.getZooKeeper(),
          this.entityGroupInfo, this.server.getServerName(), this.version);
    } catch (KeeperException e) {
      LOG.warn("Exception refreshing OPENING; entityGroup=" + encodedName
          + ", context=" + context, e);
      this.version = -1;
    }
    boolean b = isGoodVersion();
    if (!b) {
      LOG.warn("Failed refreshing OPENING; entityGroup=" + encodedName
          + ", context=" + context);
    }
    return b;
  }

  private boolean isGoodVersion() {
    return this.version != -1;
  }

}
TOP

Related Classes of com.alibaba.wasp.fserver.handler.OpenEntityGroupHandler

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.