/**
* Copyright 2010 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.alibaba.wasp.fserver.handler;
import com.alibaba.wasp.EntityGroupInfo;
import com.alibaba.wasp.Server;
import com.alibaba.wasp.executor.EventHandler;
import com.alibaba.wasp.fserver.EntityGroup;
import com.alibaba.wasp.fserver.FServerServices;
import com.alibaba.wasp.meta.FTable;
import com.alibaba.wasp.zookeeper.ZKAssign;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.util.CancelableProgressable;
import org.apache.zookeeper.KeeperException;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicBoolean;
/**
* Handles opening of a entityGroup on a FServer.
*/
public class OpenEntityGroupHandler extends EventHandler {
// NOTE on priorities shutting down. There are none for close. There are some
// for open. I think that is right. On shutdown, we want the meta to close
// before root and both to close after the user entityGroups have closed. What
// about the case where master tells us to shutdown a catalog entityGroup and
// we
// have a running queue of user entityGroups to close?
private static final Log LOG = LogFactory
.getLog(OpenEntityGroupHandler.class);
private final int FAILED = -1;
int expectedVersion = FAILED;
// We get version of our znode at start of open process and monitor it across
// the total open. We'll fail the open if someone hijacks our znode; we can
// tell this has happened if version is not as expected.
private volatile int version = -1;
private final FServerServices fsServices;
private final EntityGroupInfo entityGroupInfo;
private final FTable table;
// version of the offline node that was set by the master
private volatile int versionOfOfflineNode = -1;
// If true, the hosting server is aborting. EntityGroup close process is
// different
// when we are aborting.
// Update zk on closing transitions. Usually true. Its false if cluster
// is going down. In this case, its the rs that initiates the entityGroup
// close -- not the master process so state up in zk will unlikely be
// CLOSING.
/**
* Default base class constructor.
*/
public OpenEntityGroupHandler(final Server server,
final FServerServices fsServices, EntityGroupInfo entityGroupInfo,
FTable table) {
this(server, fsServices, entityGroupInfo, table, EventType.M_FSERVER_OPEN_ENTITYGROUP, -1);
}
public OpenEntityGroupHandler(final Server server,
final FServerServices fsServices, EntityGroupInfo entityGroupInfo,
final FTable table, EventType eventType, int versionOfOfflineNode) {
super(server, eventType);
this.fsServices = fsServices;
this.entityGroupInfo = entityGroupInfo;
this.table = table;
this.versionOfOfflineNode = versionOfOfflineNode;
}
public OpenEntityGroupHandler(final Server server,
final FServerServices fsServices, EntityGroupInfo entityGroupInfo,
FTable ftd, int versionOfOfflineNode) {
this(server, fsServices, entityGroupInfo, ftd, EventType.M_FSERVER_OPEN_ENTITYGROUP,
versionOfOfflineNode);
}
public EntityGroupInfo getEntityGroupInfo() {
return entityGroupInfo;
}
@Override
public void process() throws IOException {
try {
final String name = entityGroupInfo.getEntityGroupNameAsString();
if (this.server.isStopped() || this.fsServices.isStopping()) {
return;
}
final String encodedName = entityGroupInfo.getEncodedName();
// Check that this entityGroup is not already online
EntityGroup entityGroup = this.fsServices
.getFromOnlineEntityGroups(encodedName);
// If fails, just return. Someone stole the entityGroup from under us.
// Calling transitionZookeeperOfflineToOpening initalizes this.version.
if (!transitionZookeeperOfflineToOpening(encodedName,
versionOfOfflineNode)) {
LOG.warn("EntityGroup was hijacked? It no longer exists, encodedName="
+ encodedName);
return;
}
// Open entityGroup. After a successful open, failures in subsequent
// processing needs to do a close as part of cleanup.
entityGroup = openEntityGroup();
if (entityGroup == null) {
tryTransitionToFailedOpen(entityGroupInfo);
return;
}
boolean failed = true;
if (tickleOpening("post_entitygroup_open")) {
if (updateMeta(entityGroup)) {
failed = false;
}
}
if (failed || this.server.isStopped() || this.fsServices.isStopping()) {
cleanupFailedOpen(entityGroup);
tryTransitionToFailedOpen(entityGroupInfo);
return;
}
if (!transitionToOpened(entityGroup)) {
// If we fail to transition to opened, it's because of one of two cases:
// (a) we lost our ZK lease
// OR (b) someone else opened the entityGroup before us
// In either case, we don't need to transition to FAILED_OPEN state.
// In case (a), the Master will process us as a dead server. In case
// (b) the entityGroup is already being handled elsewhere anyway.
cleanupFailedOpen(entityGroup);
return;
}
// Successful entityGroup open, and add it to OnlineEntityGroups
this.fsServices.addToOnlineEntityGroups(entityGroup);
// Done! Successful entityGroup open
LOG.debug("Opened " + name + " on server:" + this.server.getServerName());
} finally {
this.fsServices.getEntityGroupsInTransitionInFS().remove(
this.entityGroupInfo.getEncodedNameAsBytes());
}
}
private void cleanupFailedOpen(EntityGroup entityGroup) throws IOException {
if (entityGroup != null)
entityGroup.close();
}
/**
* Update ZK, ROOT or META. This can take a while if for example the .META. is
* not available -- if server hosting .META. crashed and we are waiting on it
* to come back -- so run in a thread and keep updating znode state meantime
* so master doesn't timeout our entityGroup-in-transition. Caller must
* cleanup entityGroup if this fails.
*/
boolean updateMeta(final EntityGroup entityGroup) {
if (this.server.isStopped() || this.fsServices.isStopping()) {
return false;
}
// Object we do wait/notify on. Make it boolean. If set, we're done.
// Else, wait.
final AtomicBoolean signaller = new AtomicBoolean(false);
PostOpenDeployTasksThread t = new PostOpenDeployTasksThread(entityGroup,
this.server, this.fsServices, signaller);
t.start();
int assignmentTimeout = this.server.getConfiguration().getInt(
"wasp.master.assignment.timeoutmonitor.period", 10000);
// Total timeout for meta edit. If we fail adding the edit then close out
// the entityGroup and let it be assigned elsewhere.
long timeout = assignmentTimeout * 10;
long now = System.currentTimeMillis();
long endTime = now + timeout;
// Let our period at which we update OPENING state to be be 1/3rd of the
// entityGroups-in-transition timeout period.
long period = Math.max(1, assignmentTimeout / 3);
long lastUpdate = now;
boolean tickleOpening = true;
while (!signaller.get() && t.isAlive() && !this.server.isStopped()
&& !this.fsServices.isStopping() && (endTime > now)) {
long elapsed = now - lastUpdate;
if (elapsed > period) {
// Only tickle OPENING if postOpenDeployTasks is taking some time.
lastUpdate = now;
tickleOpening = tickleOpening("post_open_deploy");
}
synchronized (signaller) {
try {
signaller.wait(period);
} catch (InterruptedException e) {
// Go to the loop check.
}
}
now = System.currentTimeMillis();
}
// Is thread still alive? We may have left above loop because server is
// stopping or we timed out the edit. Is so, interrupt it.
if (t.isAlive()) {
if (!signaller.get()) {
// Thread still running; interrupt
LOG.debug("Interrupting thread " + t);
t.interrupt();
}
try {
t.join();
} catch (InterruptedException ie) {
LOG.warn("Interrupted joining "
+ entityGroup.getEntityGroupInfo().getEntityGroupNameAsString(), ie);
Thread.currentThread().interrupt();
}
}
// Was there an exception opening the entityGroup? This should trigger on
// InterruptedException too. If so, we failed. Even if tickle opening fails
// then it is a failure.
return ((!Thread.interrupted() && t.getException() == null) && tickleOpening);
}
/**
* Thread to run entityGroup post open tasks. Call {@link #getException()}
* after the thread finishes to check for exceptions running
* {@link com.alibaba.wasp.fserver.FServerServices#postOpenDeployTasks(com.alibaba.wasp.fserver.EntityGroup, boolean)} .
*/
static class PostOpenDeployTasksThread extends Thread {
private Exception exception = null;
private final Server server;
private final FServerServices services;
private final EntityGroup entityGroup;
private final AtomicBoolean signaller;
PostOpenDeployTasksThread(final EntityGroup entityGroup,
final Server server, final FServerServices services,
final AtomicBoolean signaller) {
super("PostOpenDeployTasks:"
+ entityGroup.getEntityGroupInfo().getEncodedName());
this.setDaemon(true);
this.server = server;
this.services = services;
this.entityGroup = entityGroup;
this.signaller = signaller;
}
public void run() {
try {
this.services.postOpenDeployTasks(this.entityGroup, false);
} catch (Exception e) {
LOG.warn("Exception running postOpenDeployTasks; entityGroup="
+ this.entityGroup.getEntityGroupInfo().getEncodedName(), e);
this.exception = e;
}
// We're done. Set flag then wake up anyone waiting on thread to complete.
this.signaller.set(true);
synchronized (this.signaller) {
this.signaller.notify();
}
}
/**
* @return Null or the run exception; call this method after thread is done.
*/
Exception getException() {
return this.exception;
}
}
/**
* Transition ZK node from OFFLINE to OPENING.
*
* @param encodedName
* Name of the znode file (EntityGroup encodedName is the znode
* name).
* @param versionOfOfflineNode
* - version Of OfflineNode that needs to be compared before changing
* the node's state from OFFLINE
* @return True if successful transition.
*/
boolean transitionZookeeperOfflineToOpening(final String encodedName,
int versionOfOfflineNode) {
try {
// Initialize the znode version.
this.version = ZKAssign.transitionNode(server.getZooKeeper(),
entityGroupInfo, server.getServerName(),
EventType.M_ZK_ENTITYGROUP_OFFLINE,
EventType.FSERVER_ZK_ENTITYGROUP_OPENING, versionOfOfflineNode);
} catch (KeeperException e) {
LOG.error("Error transition from OFFLINE to OPENING for entityGroup="
+ encodedName, e);
}
boolean b = isGoodVersion();
if (!b) {
LOG.warn("Failed transition from OFFLINE to OPENING for entityGroup="
+ encodedName);
}
return b;
}
/**
* @param entityGroup
* EntityGroup we're working on.
* @return whether znode is successfully transitioned to OPENED state.
* @throws java.io.IOException
*/
private boolean transitionToOpened(final EntityGroup entityGroup)
throws IOException {
boolean result = false;
EntityGroupInfo egi = entityGroup.getEntityGroupInfo();
final String name = egi.getEntityGroupNameAsString();
// Finally, Transition ZK node to OPENED
try {
if (ZKAssign.transitionNodeOpened(this.server.getZooKeeper(), egi,
this.server.getServerName(), this.version) == -1) {
LOG.warn("Completed the OPEN of entityGroup "
+ name
+ " but when transitioning from "
+ " OPENING to OPENED got a version mismatch, someone else clashed "
+ "so now unassigning -- closing entityGroup on server: "
+ this.server.getServerName());
} else {
LOG.debug("entityGroup transitioned to opened in zookeeper: "
+ entityGroup.getEntityGroupInfo() + ", server: "
+ this.server.getServerName());
result = true;
}
} catch (KeeperException e) {
LOG.error("Failed transitioning node " + name
+ " from OPENING to OPENED -- closing entityGroup", e);
}
return result;
}
/**
* @param egi
* we're working on. This is not guaranteed to succeed, we just do
* our best.
* @return whether znode is successfully transitioned to FAILED_OPEN state.
*/
private boolean tryTransitionToFailedOpen(final EntityGroupInfo egi) {
boolean result = false;
final String name = egi.getEntityGroupNameAsString();
try {
LOG.info("Opening of entityGroup " + egi
+ " failed, marking as FAILED_OPEN in ZK");
if (ZKAssign.transitionNode(this.server.getZooKeeper(), egi,
this.server.getServerName(),
EventType.FSERVER_ZK_ENTITYGROUP_OPENING,
EventType.FSERVER_ZK_ENTITYGROUP_FAILED_OPEN, this.version) == -1) {
LOG.warn("Unable to mark entityGroup " + egi + " as FAILED_OPEN. "
+ "It's likely that the master already timed out this open "
+ "attempt, and thus another RS already has the entityGroup.");
} else {
result = true;
}
} catch (KeeperException e) {
LOG.error("Failed transitioning node " + name
+ " from OPENING to FAILED_OPEN", e);
}
return result;
}
/**
* @return Instance of EntityGroup if successful open else null.
*/
EntityGroup openEntityGroup() {
EntityGroup entityGroup = null;
try {
// Instantiate the entityGroup. This also periodically tickles our zk
// OPENING
// state so master doesn't timeout this entityGroup in transition.
entityGroup = EntityGroup.openEntityGroup(this.entityGroupInfo,
this.table, this.server.getConfiguration(), this.fsServices,
new CancelableProgressable() {
public boolean progress() {
// We may lose the znode ownership during the open. Currently its
// too hard interrupting ongoing entityGroup open. Just let it
// complete
// and check we still have the znode after entityGroup open.
return tickleOpening("open_entitygroup_progress");
}
});
} catch (Throwable t) {
// We failed open. Our caller will see the 'null' return value
// and transition the node back to FAILED_OPEN. If that fails,
// we rely on the Timeout Monitor in the master to reassign.
LOG.error(
"Failed open of entityGroup="
+ this.entityGroupInfo.getEntityGroupNameAsString()
+ ", starting to roll back the global memstore size.", t);
}
return entityGroup;
}
/**
* Transition ZK node to CLOSED
*
* @param expectedVersion
* @return If the state is set successfully
*/
private boolean setClosedState(final int expectedVersion,
final EntityGroupInfo entityGroupInfo) {
return false;
}
/**
* Update our OPENING state in zookeeper. Do this so master doesn't timeout
* this entityGroup-in-transition.
*
* @param context
* Some context to add to logs if failure
* @return True if successful transition.
*/
boolean tickleOpening(final String context) {
// If previous checks failed... do not try again.
if (!isGoodVersion())
return false;
String encodedName = this.entityGroupInfo.getEncodedName();
try {
this.version = ZKAssign.retransitionNodeOpening(server.getZooKeeper(),
this.entityGroupInfo, this.server.getServerName(), this.version);
} catch (KeeperException e) {
LOG.warn("Exception refreshing OPENING; entityGroup=" + encodedName
+ ", context=" + context, e);
this.version = -1;
}
boolean b = isGoodVersion();
if (!b) {
LOG.warn("Failed refreshing OPENING; entityGroup=" + encodedName
+ ", context=" + context);
}
return b;
}
private boolean isGoodVersion() {
return this.version != -1;
}
}