/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.alibaba.wasp.master;
import com.alibaba.wasp.DeserializationException;
import com.alibaba.wasp.EntityGroupInfo;
import com.alibaba.wasp.EntityGroupTransaction;
import com.alibaba.wasp.FConstants;
import com.alibaba.wasp.NotServingEntityGroupException;
import com.alibaba.wasp.Server;
import com.alibaba.wasp.ServerName;
import com.alibaba.wasp.TableNotFoundException;
import com.alibaba.wasp.executor.EventHandler;
import com.alibaba.wasp.executor.EventHandler.EventType;
import com.alibaba.wasp.executor.ExecutorService;
import com.alibaba.wasp.fserver.EntityGroupAlreadyInTransitionException;
import com.alibaba.wasp.fserver.EntityGroupOpeningState;
import com.alibaba.wasp.fserver.FServerStoppedException;
import com.alibaba.wasp.ipc.ServerNotRunningYetException;
import com.alibaba.wasp.master.handler.ClosedEntityGroupHandler;
import com.alibaba.wasp.master.handler.DisableTableHandler;
import com.alibaba.wasp.master.handler.EnableTableHandler;
import com.alibaba.wasp.master.handler.OpenedEntityGroupHandler;
import com.alibaba.wasp.master.handler.SplitEntityGroupHandler;
import com.alibaba.wasp.master.metrics.MetricsMaster;
import com.alibaba.wasp.meta.FMetaReader;
import com.alibaba.wasp.meta.FMetaScanner;
import com.alibaba.wasp.util.KeyLocker;
import com.alibaba.wasp.zookeeper.ZKAssign;
import com.alibaba.wasp.zookeeper.ZKTable;
import com.alibaba.wasp.zookeeper.ZKUtil;
import com.alibaba.wasp.zookeeper.ZooKeeperListener;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Chore;
import org.apache.hadoop.hbase.Stoppable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.zookeeper.AsyncCallback;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.KeeperException.NodeExistsException;
import org.apache.zookeeper.data.Stat;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentSkipListSet;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
/**
* Manages and performs entityGroup assignment.
* <p>
* Monitors ZooKeeper for events related to entityGroups in transition.
* <p>
* Handles existing entityGroups in transition during master failover.
*/
public class AssignmentManager extends ZooKeeperListener {
private static final Log LOG = LogFactory.getLog(AssignmentManager.class);
public static final ServerName HBCK_CODE_SERVERNAME = new ServerName(
FConstants.WBCK_CODE_NAME, -1, -1L);
protected final Server server;
private FServerManager serverManager;
final TimeoutMonitor timeoutMonitor;
private TimerUpdater timerUpdater;
private LoadBalancer balancer;
final private KeyLocker<String> locker = new KeyLocker<String>();
/**
* Map of entityGroups to reopen after the schema of a table is changed. Key -
* encoded entityGroup name, value - EntityGroupInfo
*/
private final Map<String, EntityGroupInfo> entityGroupsToReopen;
/*
* Maximum times we recurse an assignment/unassignment. See below in {@link
* #assign()} and {@link #unassign()}.
*/
private final int maximumAttempts;
/**
* Plans for entityGroup movement. Key is the encoded version of a entityGroup
* name
*/
// TODO: When do plans get cleaned out? Ever? In server open and in server
// shutdown processing -- St.Ack
// All access to this Map must be synchronized.
final NavigableMap<String, EntityGroupPlan> entityGroupPlans = new TreeMap<String, EntityGroupPlan>();
private final ZKTable zkTable;
/**
* Contains the server which need to update timer, these servers will be
* handled by {@link TimerUpdater}
*/
private final ConcurrentSkipListSet<ServerName> serversInUpdatingTimer = new ConcurrentSkipListSet<ServerName>();
private final ExecutorService executorService;
// Thread pool executor service for timeout monitor
private java.util.concurrent.ExecutorService threadPoolExecutorService;
// A bunch of ZK events workers. Each is a single thread executor service
private java.util.concurrent.ExecutorService[] zkEventWorkers;
private List<EventType> ignoreStatesFSOffline = Arrays
.asList(new EventType[] { EventType.FSERVER_ZK_ENTITYGROUP_FAILED_OPEN,
EventType.FSERVER_ZK_ENTITYGROUP_CLOSED });
// metrics instance to send metrics for EGITs
MetricsMaster metricsMaster;
private final EntityGroupStates entityGroupStates;
/**
* Indicator that AssignmentManager has recovered the entityGroup states so
* that ServerShutdownHandler can be fully enabled and re-assign entityGroups
* of dead servers. So that when re-assignment happens, AssignmentManager has
* proper entityGroup states.
*/
final AtomicBoolean failoverCleanupDone = new AtomicBoolean(false);
/**
* Constructs a new assignment manager.
*
* @param server
* @param serverManager
* @param service
* @param metricsMaster
* @throws org.apache.zookeeper.KeeperException
* @throws java.io.IOException
*/
public AssignmentManager(Server server, FServerManager serverManager,
final LoadBalancer balancer, final ExecutorService service,
MetricsMaster metricsMaster) throws KeeperException, IOException {
super(server.getZooKeeper());
this.server = server;
this.serverManager = serverManager;
this.executorService = service;
this.entityGroupsToReopen = Collections
.synchronizedMap(new HashMap<String, EntityGroupInfo>());
Configuration conf = server.getConfiguration();
this.timeoutMonitor = new TimeoutMonitor(conf.getInt(
"wasp.master.assignment.timeoutmonitor.period", 30000), server,
serverManager, conf.getInt(
"wasp.master.assignment.timeoutmonitor.timeout", 600000));
this.timerUpdater = new TimerUpdater(conf.getInt(
"wasp.master.assignment.timerupdater.period", 10000), server);
Threads.setDaemonThreadRunning(timerUpdater.getThread(),
server.getServerName() + ".timerUpdater");
this.zkTable = new ZKTable(this.watcher);
this.maximumAttempts = this.server.getConfiguration().getInt(
"wasp.assignment.maximum.attempts", 10);
this.balancer = balancer;
int maxThreads = conf.getInt("wasp.assignment.threads.max", 30);
this.threadPoolExecutorService = Threads.getBoundedCachedThreadPool(
maxThreads, 60L, TimeUnit.SECONDS, newDaemonThreadFactory("hbase-am"));
this.metricsMaster = metricsMaster;// can be null only with tests.
this.entityGroupStates = new EntityGroupStates(server, serverManager);
int workers = conf.getInt("wasp.assignment.zkevent.workers", 5);
zkEventWorkers = new java.util.concurrent.ExecutorService[workers];
ThreadFactory threadFactory = newDaemonThreadFactory("am-zkevent-worker");
for (int i = 0; i < workers; i++) {
zkEventWorkers[i] = Threads.getBoundedCachedThreadPool(1, 60L,
TimeUnit.SECONDS, threadFactory);
}
}
void startTimeOutMonitor() {
Threads.setDaemonThreadRunning(timeoutMonitor.getThread(),
server.getServerName() + ".timeoutMonitor");
}
/**
* Get a named {@link java.util.concurrent.ThreadFactory} that just builds daemon threads
*
* @param prefix
* name prefix for all threads created from the factory
* @return a thread factory that creates named, daemon threads
*/
private static ThreadFactory newDaemonThreadFactory(final String prefix) {
final ThreadFactory namedFactory = Threads.getNamedThreadFactory(prefix);
return new ThreadFactory() {
@Override
public Thread newThread(Runnable r) {
Thread t = namedFactory.newThread(r);
if (!t.isDaemon()) {
t.setDaemon(true);
}
if (t.getPriority() != Thread.NORM_PRIORITY) {
t.setPriority(Thread.NORM_PRIORITY);
}
return t;
}
};
}
/**
* @return Instance of ZKTable.
*/
public ZKTable getZKTable() {
// These are 'expensive' to make involving trip to zk ensemble so allow
// sharing.
return this.zkTable;
}
/**
* This SHOULD not be public. It is public now because of some unit tests.
*
* TODO: make it package private and keep EntityGroupStates in the master
* package
*/
public EntityGroupStates getEntityGroupStates() {
return entityGroupStates;
}
public EntityGroupPlan getEntityGroupReopenPlan(EntityGroupInfo egInfo) {
return new EntityGroupPlan(egInfo, null,
entityGroupStates.getFServerOfEntityGroup(egInfo));
}
/**
* Add a entityGroupPlan for the specified entityGroup.
*
* @param encodedName
* @param plan
*/
public void addPlan(String encodedName, EntityGroupPlan plan) {
synchronized (entityGroupPlans) {
entityGroupPlans.put(encodedName, plan);
}
}
/**
* Add a map of entityGroup plans.
*/
public void addPlans(Map<String, EntityGroupPlan> plans) {
synchronized (entityGroupPlans) {
entityGroupPlans.putAll(plans);
}
}
/**
* Set the list of entityGroups that will be reopened because of an update in
* table schema
*
* @param entityGroups
* list of entityGroups that should be tracked for reopen
*/
public void setEntityGroupsToReopen(List<EntityGroupInfo> entityGroups) {
for (EntityGroupInfo egInfo : entityGroups) {
entityGroupsToReopen.put(egInfo.getEncodedName(), egInfo);
}
}
/**
* Used by the client to identify if all entityGroups have the schema updates
*
* @param tableName
* @return Pair indicating the status of the alter command
* @throws java.io.IOException
*/
public Pair<Integer, Integer> getReopenStatus(byte[] tableName)
throws IOException {
List<EntityGroupInfo> egInfos = FMetaReader.getTableEntityGroups(
server.getConfiguration(), tableName);
Integer pending = 0;
for (EntityGroupInfo egInfo : egInfos) {
String name = egInfo.getEncodedName();
// no lock concurrent access ok: sequential consistency respected.
if (entityGroupsToReopen.containsKey(name)
|| entityGroupStates.isEntityGroupInTransition(name)) {
pending++;
}
}
return new Pair<Integer, Integer>(pending, egInfos.size());
}
/**
* Used by ServerShutdownHandler to make sure AssignmentManager has completed
* the failover cleanup before re-assigning entityGroups of dead servers. So
* that when re-assignment happens, AssignmentManager has proper entityGroup
* states.
*/
public boolean isFailoverCleanupDone() {
return failoverCleanupDone.get();
}
/**
* Now, failover cleanup is completed. Notify server manager to process queued
* up dead servers processing, if any.
*/
void failoverCleanupDone() {
failoverCleanupDone.set(true);
serverManager.processQueuedDeadServers();
}
/**
* Called on startup. Figures whether a fresh cluster start of we are joining
* extant running cluster.
*
* @throws java.io.IOException
* @throws org.apache.zookeeper.KeeperException
* @throws InterruptedException
*/
void joinCluster() throws IOException, KeeperException, InterruptedException {
// Concurrency note: In the below the accesses on entityGroupsInTransition
// are
// outside of a synchronization block where usually all accesses to EGIT are
// synchronized. The presumption is that in this case it is safe since this
// method is being played by a single thread on startup.
// TODO: EntityGroups that have a null location and are not in
// entityGroupsInTransitions
// need to be handled.
// Scan FMETA to build list of existing entityGroups, servers, and
// assignment
// Returns servers who have not checked in (assumed dead) and their
// entityGroups
Map<ServerName, List<EntityGroupInfo>> deadServers = rebuildUserEntityGroups();
// This method will assign all user entityGroups if a clean server startup
// or
// it will reconstruct master state and cleanup any leftovers from
// previous master process.
processDeadServersAndEntityGroupsInTransition(deadServers);
recoverTableInDisablingState();
recoverTableInEnablingState();
}
/**
* Process all entityGroups that are in transition in zookeeper and also
* processes the list of dead servers by scanning the FMETA. Used by master
* joining an cluster. If we figure this is a clean cluster startup, will
* assign all user entityGroups.
*
* @param deadServers
* Map of dead servers and their entityGroups. Can be null.
* @throws org.apache.zookeeper.KeeperException
* @throws java.io.IOException
* @throws InterruptedException
*/
void processDeadServersAndEntityGroupsInTransition(
final Map<ServerName, List<EntityGroupInfo>> deadServers)
throws KeeperException, IOException, InterruptedException {
List<String> nodes = ZKUtil.listChildrenNoWatch(watcher,
watcher.assignmentZNode);
if (nodes == null) {
String errorMessage = "Failed to get the children from ZK";
server.abort(errorMessage, new IOException(errorMessage));
return;
}
boolean failover = !serverManager.getDeadServers().isEmpty();
if (!failover) {
// Run through all entityGroups. If they are not assigned and not in EGIT,
// then
// its a clean cluster startup, else its a failover.
Map<EntityGroupInfo, ServerName> entityGroups = entityGroupStates
.getEntityGroupAssignments();
for (Map.Entry<EntityGroupInfo, ServerName> e : entityGroups.entrySet()) {
if (e.getValue() != null) {
LOG.debug("Found " + e + " out on cluster");
failover = true;
break;
}
if (nodes.contains(e.getKey().getEncodedName())) {
LOG.debug("Found " + e.getKey().getEntityGroupNameAsString()
+ " in EGITs");
failover = true;
break;
}
}
}
// If we found user entityGroups out on cluster, its a failover.
if (failover) {
LOG.info("Found entityGroups out on cluster or in EGIT; failover");
// Process list of dead servers and entityGroups in EGIT.
processDeadServersAndRecoverLostEntityGroups(deadServers, nodes);
} else {
// Fresh cluster startup.
LOG.info("Clean cluster startup. Assigning user entityGroups");
assignAllUserEntityGroups();
}
}
/**
* If entityGroup is up in zk in transition, then do fixup and block and wait
* until the entityGroup is assigned and out of transition. Used on startup
* for catalog entityGroups.
*
* @param egInfo
* EntityGroup to look for.
* @return True if we processed a entityGroup in transition else false if
* entityGroup was not up in zk in transition.
* @throws InterruptedException
* @throws org.apache.zookeeper.KeeperException
* @throws java.io.IOException
*/
boolean processEntityGroupInTransitionAndBlockUntilAssigned(
final EntityGroupInfo egInfo) throws InterruptedException,
KeeperException, IOException {
boolean intransistion = processEntityGroupInTransition(
egInfo.getEncodedName(), egInfo);
if (!intransistion)
return intransistion;
LOG.debug("Waiting on " + egInfo.getEncodedName());
while (!this.server.isStopped()
&& this.entityGroupStates.isEntityGroupInTransition(egInfo
.getEncodedName())) {
// We put a timeout because we may have the entityGroup getting in just
// between the test
// and the waitForUpdate
this.entityGroupStates.waitForUpdate(100);
}
return intransistion;
}
/**
* Process failover of new master for entityGroup
* <code>encodedEntityGroupName</code> up in zookeeper.
*
* @param encodedEntityGroupName
* EntityGroup to process failover for.
* @param entityGroupInfo
* If null we'll go get it from meta table.
* @return True if we processed <code>entityGroupInfo</code> as a EGIT.
* @throws org.apache.zookeeper.KeeperException
* @throws java.io.IOException
*/
boolean processEntityGroupInTransition(final String encodedEntityGroupName,
final EntityGroupInfo entityGroupInfo) throws KeeperException,
IOException {
// We need a lock here to ensure that we will not put the same entityGroup
// twice
// It has no reason to be a lock shared with the other operations.
// We can do the lock on the entityGroup only, instead of a global lock:
// what we want to ensure
// is that we don't have two threads working on the same entityGroup.
Lock lock = locker.acquireLock(encodedEntityGroupName);
try {
Stat stat = new Stat();
byte[] data = ZKAssign.getDataAndWatch(watcher, encodedEntityGroupName,
stat);
if (data == null)
return false;
EntityGroupTransaction rt;
try {
rt = EntityGroupTransaction.parseFrom(data);
} catch (DeserializationException e) {
LOG.warn("Failed parse znode data", e);
return false;
}
EntityGroupInfo egInfo = entityGroupInfo;
if (egInfo == null) {
egInfo = entityGroupStates.getEntityGroupInfo(rt.getEntityGroupName());
if (egInfo == null)
return false;
}
processEntityGroupsInTransition(rt, egInfo, stat.getVersion());
return true;
} finally {
lock.unlock();
}
}
/**
* This call is invoked only during failover mode startup, zk assignment node
* processing. The locker is set in the caller.
*
* It should be private but it is used by some test too.
*/
void processEntityGroupsInTransition(
final EntityGroupTransaction egTransition,
final EntityGroupInfo entityGroupInfo, int expectedVersion)
throws KeeperException {
EventType et = egTransition.getEventType();
// Get ServerName. Could not be null.
ServerName sn = egTransition.getServerName();
String encodedEntityGroupName = entityGroupInfo.getEncodedName();
LOG.info("Processing entityGroup "
+ entityGroupInfo.getEntityGroupNameAsString() + " in state " + et);
if (entityGroupStates.isEntityGroupInTransition(encodedEntityGroupName)) {
// Just return
return;
}
switch (et) {
case M_ZK_ENTITYGROUP_CLOSING:
// If zk node of the entityGroup was updated by a live server skip this
// entityGroup and just add it into EGIT.
if (!serverManager.isServerOnline(sn)) {
// If was not online, its closed now. Force to OFFLINE and this
// will get it reassigned if appropriate
forceOffline(entityGroupInfo, egTransition);
} else {
// Just insert entityGroup into EGIT.
// If this never updates the timeout will trigger new assignment
entityGroupStates.updateEntityGroupState(egTransition,
EntityGroupState.State.CLOSING);
}
break;
case FSERVER_ZK_ENTITYGROUP_CLOSED:
case FSERVER_ZK_ENTITYGROUP_FAILED_OPEN:
// EntityGroup is closed, insert into EGIT and handle it
addToEGITandCallClose(entityGroupInfo, EntityGroupState.State.CLOSED,
egTransition);
break;
case M_ZK_ENTITYGROUP_OFFLINE:
// If zk node of the entityGroup was updated by a live server skip this
// entityGroup and just add it into EGIT.
if (!serverManager.isServerOnline(sn)) {
// EntityGroup is offline, insert into EGIT and handle it like a closed
addToEGITandCallClose(entityGroupInfo, EntityGroupState.State.OFFLINE,
egTransition);
} else {
// Just insert entityGroup into EGIT.
// If this never updates the timeout will trigger new assignment
entityGroupStates.updateEntityGroupState(egTransition,
EntityGroupState.State.PENDING_OPEN);
}
break;
case FSERVER_ZK_ENTITYGROUP_OPENING:
if (!serverManager.isServerOnline(sn)) {
entityGroupStates.updateEntityGroupState(egTransition,
EntityGroupState.State.OPENING);
// If the server is not online, it takes some time for timeout monitor
// to kick in.
// We know the entityGroup won't open. So we will assign the opening
// entityGroup
// immediately too.
processOpeningState(entityGroupInfo);
} else {
// Just insert entityGroup into EGIT.
// If this never updates the timeout will trigger new assignment
entityGroupStates.updateEntityGroupState(egTransition,
EntityGroupState.State.OPENING);
}
break;
case FSERVER_ZK_ENTITYGROUP_OPENED:
if (!serverManager.isServerOnline(sn)) {
forceOffline(entityGroupInfo, egTransition);
} else {
// EntityGroup is opened, insert into EGIT and handle it
entityGroupStates.updateEntityGroupState(egTransition,
EntityGroupState.State.OPEN);
new OpenedEntityGroupHandler(server, this, entityGroupInfo, sn,
expectedVersion).process();
}
break;
case FSERVER_ZK_ENTITYGROUP_SPLITTING:
LOG.debug("Processed entityGroup in state : " + et);
break;
case FSERVER_ZK_ENTITYGROUP_SPLIT:
LOG.debug("Processed entityGroup in state : " + et);
break;
default:
throw new IllegalStateException("Received entityGroup in state :" + et
+ " is not valid");
}
}
/**
* Put the entityGroup <code>egInfo</code> into an offline state up in zk.
*
* You need to have lock on the entityGroup before calling this method.
*
* @param egInfo
* @param oldEGt
* @throws org.apache.zookeeper.KeeperException
*/
private void forceOffline(final EntityGroupInfo egInfo,
final EntityGroupTransaction oldEGt) throws KeeperException {
// If was on dead server, its closed now. Force to OFFLINE and then
// handle it like a close; this will get it reassigned if appropriate
LOG.debug("EGIT " + egInfo.getEncodedName() + " in state="
+ oldEGt.getEventType() + " was on deadserver; forcing offline");
ZKAssign.createOrForceNodeOffline(this.watcher, egInfo,
oldEGt.getServerName());
addToEGITandCallClose(egInfo, EntityGroupState.State.OFFLINE, oldEGt);
}
/**
* Add to the in-memory copy of entityGroups in transition and then call close
* handler on passed entityGroup <code>egInfo</code>
*
* @param egInfo
* @param state
* @param oldData
*/
private void addToEGITandCallClose(final EntityGroupInfo egInfo,
final EntityGroupState.State state, final EntityGroupTransaction oldData) {
entityGroupStates.updateEntityGroupState(oldData, state);
new ClosedEntityGroupHandler(this.server, this, egInfo).process();
}
/**
* When a entityGroup is closed, it should be removed from the
* entityGroupsToReopen
*
* @param egInfo
* EntityGroupInfo of the entityGroup which was closed
*/
public void removeClosedEntityGroup(EntityGroupInfo egInfo) {
if (entityGroupsToReopen.remove(egInfo.getEncodedName()) != null) {
LOG.debug("Removed entityGroup from reopening entityGroups because it was closed");
}
}
/**
* Handles various states an unassigned node can be in.
* <p>
* Method is called when a state change is suspected for an unassigned node.
* <p>
* This deals with skipped transitions (we got a CLOSED but didn't see CLOSING
* yet).
*
* @param egTransition
* @param expectedVersion
*/
private void handleEntityGroup(final EntityGroupTransaction egTransition,
int expectedVersion) {
if (egTransition == null) {
LOG.warn("Unexpected NULL input " + egTransition);
return;
}
final ServerName sn = egTransition.getServerName();
// Check if this is a special HBCK transition
if (sn.equals(HBCK_CODE_SERVERNAME)) {
handleHBCK(egTransition);
return;
}
final long createTime = egTransition.getCreateTime();
final byte[] entityGroupName = egTransition.getEntityGroupName();
String encodedName = EntityGroupInfo.encodeEntityGroupName(entityGroupName);
// Verify this is a known server
if (!serverManager.isServerOnline(sn)
&& !ignoreStatesFSOffline.contains(egTransition.getEventType())) {
LOG.warn("Attempted to handle entityGroup transition for server but "
+ "server is not online: " + encodedName);
return;
}
EntityGroupState entityGroupState = entityGroupStates
.getEntityGroupTransitionState(encodedName);
long startTime = System.currentTimeMillis();
if (LOG.isDebugEnabled()) {
boolean lateEvent = createTime < (startTime - 15000);
LOG.debug("Handling transition=" + egTransition.getEventType()
+ ", server=" + sn + ", entityGroup="
+ (encodedName == null ? "null" : encodedName)
+ (lateEvent ? ", which is more than 15 seconds late" : "")
+ ", current state from entityGroup state map =" + entityGroupState);
}
// We don't do anything for this event,
// so separate it out, no need to lock/unlock anything
if (egTransition.getEventType() == EventType.M_ZK_ENTITYGROUP_OFFLINE) {
return;
}
// We need a lock on the entityGroup as we could update it
Lock lock = locker.acquireLock(encodedName);
try {
EntityGroupState latestState = entityGroupStates
.getEntityGroupTransitionState(encodedName);
if ((entityGroupState == null && latestState != null)
|| (entityGroupState != null && latestState == null)
|| (entityGroupState != null && latestState != null && latestState
.getState() != entityGroupState.getState())) {
LOG.warn("EntityGroup state changed from " + entityGroupState + " to "
+ latestState + ", while acquiring lock");
}
long waitedTime = System.currentTimeMillis() - startTime;
if (waitedTime > 5000) {
LOG.warn("Took " + waitedTime + "ms to acquire the lock");
}
entityGroupState = latestState;
switch (egTransition.getEventType()) {
case FSERVER_ZK_ENTITYGROUP_SPLITTING:
if (!isInStateForSplitting(entityGroupState))
break;
entityGroupStates.updateEntityGroupState(egTransition,
EntityGroupState.State.SPLITTING);
break;
case FSERVER_ZK_ENTITYGROUP_SPLIT:
// EntityGroupState must be null, or SPLITTING or PENDING_CLOSE.
if (!isInStateForSplitting(entityGroupState))
break;
// If null, add SPLITTING state before going to SPLIT
if (entityGroupState == null) {
entityGroupState = entityGroupStates.updateEntityGroupState(
egTransition, EntityGroupState.State.SPLITTING);
String message = "Received SPLIT for entityGroup " + encodedName
+ " from server " + sn;
// If still null, it means we cannot find it and it was already
// processed
if (entityGroupState == null) {
LOG.warn(message + " but it doesn't exist anymore,"
+ " probably already processed its split");
break;
}
LOG.info(message
+ " but entityGroup was not first in SPLITTING state; continuing");
}
// Check it has daughters.
byte[] payload = egTransition.getPayload();
List<EntityGroupInfo> daughters = null;
try {
daughters = EntityGroupInfo.parseDelimitedFrom(payload, 0,
payload.length);
} catch (IOException e) {
LOG.error("Dropped split! Failed reading split payload for "
+ encodedName);
break;
}
assert daughters.size() == 2;
// Assert that we can get a serverinfo for this server.
if (!this.serverManager.isServerOnline(sn)) {
LOG.error("Dropped split! ServerName=" + sn + " unknown.");
break;
}
// Run handler to do the rest of the SPLIT handling.
this.executorService.submit(new SplitEntityGroupHandler(server, this,
entityGroupState.getEntityGroup(), sn, daughters));
break;
case M_ZK_ENTITYGROUP_CLOSING:
// Should see CLOSING after we have asked it to CLOSE or additional
// times after already being in state of CLOSING
if (entityGroupState != null
&& !entityGroupState.isPendingCloseOrClosingOnServer(sn)) {
LOG.warn("Received CLOSING for entityGroup " + encodedName
+ " from server " + sn + " but entityGroup was in the state "
+ entityGroupState
+ " and not in expected PENDING_CLOSE or CLOSING states,"
+ " or not on the expected server");
return;
}
// Transition to CLOSING (or update stamp if already CLOSING)
entityGroupStates.updateEntityGroupState(egTransition,
EntityGroupState.State.CLOSING);
break;
case FSERVER_ZK_ENTITYGROUP_CLOSED:
// Should see CLOSED after CLOSING but possible after PENDING_CLOSE
if (entityGroupState != null
&& !entityGroupState.isPendingCloseOrClosingOnServer(sn)) {
LOG.warn("Received CLOSED for entityGroup " + encodedName
+ " from server " + sn + " but entityGroup was in the state "
+ entityGroupState
+ " and not in expected PENDING_CLOSE or CLOSING states,"
+ " or not on the expected server");
return;
}
// Handle CLOSED by assigning elsewhere or stopping if a disable
// If we got here all is good. Need to update EntityGroupState -- else
// what follows will fail because not in expected state.
entityGroupState = entityGroupStates.updateEntityGroupState(
egTransition, EntityGroupState.State.CLOSED);
if (entityGroupState != null) {
removeClosedEntityGroup(entityGroupState.getEntityGroup());
this.executorService.submit(new ClosedEntityGroupHandler(server,
this, entityGroupState.getEntityGroup()));
}
break;
case FSERVER_ZK_ENTITYGROUP_FAILED_OPEN:
if (entityGroupState != null
&& !entityGroupState.isPendingOpenOrOpeningOnServer(sn)) {
LOG.warn("Received FAILED_OPEN for entityGroup " + encodedName
+ " from server " + sn + " but entityGroup was in the state "
+ entityGroupState
+ " and not in expected PENDING_OPEN or OPENING states,"
+ " or not on the expected server");
return;
}
// Handle this the same as if it were opened and then closed.
entityGroupState = entityGroupStates.updateEntityGroupState(
egTransition, EntityGroupState.State.CLOSED);
// When there are more than one entityGroup server a new FSERVER is
// selected as the
// destination and the same is updated in the entityGroupplan.
// (HBASE-5546)
if (entityGroupState != null) {
getEntityGroupPlan(entityGroupState.getEntityGroup(), sn, true);
this.executorService.submit(new ClosedEntityGroupHandler(server,
this, entityGroupState.getEntityGroup()));
}
break;
case FSERVER_ZK_ENTITYGROUP_OPENING:
// Should see OPENING after we have asked it to OPEN or additional
// times after already being in state of OPENING
if (entityGroupState != null
&& !entityGroupState.isPendingOpenOrOpeningOnServer(sn)) {
LOG.warn("Received OPENING for entityGroup " + encodedName
+ " from server " + sn + " but entityGroup was in the state "
+ entityGroupState
+ " and not in expected PENDING_OPEN or OPENING states,"
+ " or not on the expected server");
return;
}
// Transition to OPENING (or update stamp if already OPENING)
entityGroupStates.updateEntityGroupState(egTransition,
EntityGroupState.State.OPENING);
break;
case FSERVER_ZK_ENTITYGROUP_OPENED:
// Should see OPENED after OPENING but possible after PENDING_OPEN
if (entityGroupState != null
&& !entityGroupState.isPendingOpenOrOpeningOnServer(sn)) {
LOG.warn("Received OPENED for entityGroup " + encodedName
+ " from server " + sn + " but entityGroup was in the state "
+ entityGroupState
+ " and not in expected PENDING_OPEN or OPENING states,"
+ " or not on the expected server");
return;
}
// Handle OPENED by removing from transition and deleted zk node
entityGroupState = entityGroupStates.updateEntityGroupState(
egTransition, EntityGroupState.State.OPEN);
if (entityGroupState != null) {
this.executorService.submit(new OpenedEntityGroupHandler(server,
this, entityGroupState.getEntityGroup(), sn, expectedVersion));
}
break;
default:
throw new IllegalStateException("Received event is not valid.");
}
} finally {
lock.unlock();
}
}
/**
* @return Returns true if this EntityGroupState is splittable; i.e. the
* EntityGroupState is currently in splitting state or pending_close
* or null (Anything else will return false). (Anything else will
* return false).
*/
private boolean isInStateForSplitting(final EntityGroupState egState) {
if (egState == null)
return true;
if (egState.isSplitting())
return true;
if (convertPendingCloseToSplitting(egState))
return true;
LOG.warn("Dropped entityGroup split! Not in state good for SPLITTING; egState="
+ egState);
return false;
}
/**
* If the passed entityGroupState is in PENDING_CLOSE, clean up PENDING_CLOSE
* state and convert it to SPLITTING instead. This can happen in case where
* master wants to close a entityGroup at same time a entityGroupserver starts
* a split. The split won. Clean out old PENDING_CLOSE state.
*
* @param egState
* @return True if we converted from PENDING_CLOSE to SPLITTING
*/
private boolean convertPendingCloseToSplitting(final EntityGroupState egState) {
if (!egState.isPendingClose())
return false;
LOG.debug("Converting PENDING_CLOSE to SPLITING; egState=" + egState);
entityGroupStates.updateEntityGroupState(egState.getEntityGroup(),
EntityGroupState.State.SPLITTING);
// Clean up existing state. Clear from entityGroup plans seems all we
// have to do here by way of clean up of PENDING_CLOSE.
clearEntityGroupPlan(egState.getEntityGroup());
return true;
}
/**
* Handle a ZK unassigned node transition triggered by HBCK repair tool.
* <p>
* This is handled in a separate code path because it breaks the normal rules.
*
* @param egTransition
*/
private void handleHBCK(EntityGroupTransaction egTransition) {
String encodedName = EntityGroupInfo.encodeEntityGroupName(egTransition
.getEntityGroupName());
LOG.info("Handling HBCK triggered transition="
+ egTransition.getEventType() + ", server="
+ egTransition.getServerName() + ", entityGroup=" + encodedName);
EntityGroupState entityGroupState = entityGroupStates
.getEntityGroupTransitionState(encodedName);
switch (egTransition.getEventType()) {
case M_ZK_ENTITYGROUP_OFFLINE:
EntityGroupInfo entityGroupInfo = null;
if (entityGroupState != null) {
entityGroupInfo = entityGroupState.getEntityGroup();
} else {
try {
byte[] name = egTransition.getEntityGroupName();
Pair<EntityGroupInfo, ServerName> p = FMetaReader
.getEntityGroupAndLocation(server.getConfiguration(), name);
entityGroupInfo = p.getFirst();
} catch (IOException e) {
LOG.info("Exception reading META doing HBCK repair operation", e);
return;
}
}
LOG.info("HBCK repair is triggering assignment of entityGroup="
+ entityGroupInfo.getEntityGroupNameAsString());
// trigger assign, node is already in OFFLINE so don't need to update ZK
assign(entityGroupInfo, false);
break;
default:
LOG.warn("Received unexpected entityGroup state from HBCK: "
+ egTransition.toString());
break;
}
}
// ZooKeeper events
/**
* New unassigned node has been created.
*
* <p>
* This happens when an FSERVER begins the OPENING or CLOSING of a entityGroup
* by creating an unassigned node.
*
* <p>
* When this happens we must:
* <ol>
* <li>Watch the node for further events</li>
* <li>Read and handle the state in the node</li>
* </ol>
*/
@Override
public void nodeCreated(String path) {
handleAssignmentEvent(path);
}
/**
* Existing unassigned node has had data changed.
*
* <p>
* This happens when an FSERVER transitions from OFFLINE to OPENING, or
* between OPENING/OPENED and CLOSING/CLOSED.
*
* <p>
* When this happens we must:
* <ol>
* <li>Watch the node for further events</li>
* <li>Read and handle the state in the node</li>
* </ol>
*/
@Override
public void nodeDataChanged(String path) {
handleAssignmentEvent(path);
}
@Override
public void nodeDeleted(final String path) {
if (path.startsWith(watcher.assignmentZNode)) {
int wi = Math.abs(path.hashCode() % zkEventWorkers.length);
zkEventWorkers[wi].submit(new Runnable() {
@Override
public void run() {
String entityGroupName = ZKAssign.getEntityGroupName(watcher, path);
Lock lock = locker.acquireLock(entityGroupName);
try {
EntityGroupState egState = entityGroupStates
.getEntityGroupTransitionState(entityGroupName);
if (egState == null)
return;
EntityGroupInfo entityGroupInfo = egState.getEntityGroup();
if (egState.isSplit()) {
LOG.debug("Ephemeral node deleted, entityGroupserver crashed?, "
+ "clearing from EGIT; egState=" + egState);
entityGroupOffline(egState.getEntityGroup());
} else {
LOG.debug("The znode of entityGroup "
+ entityGroupInfo.getEntityGroupNameAsString()
+ " has been deleted.");
if (egState.isOpened()) {
ServerName serverName = egState.getServerName();
entityGroupOnline(entityGroupInfo, serverName);
LOG.info("The master has opened the entityGroup "
+ entityGroupInfo.getEntityGroupNameAsString()
+ " that was online on " + serverName);
if (getZKTable().isDisablingOrDisabledTable(
entityGroupInfo.getTableNameAsString())) {
LOG.debug("Opened entityGroup "
+ entityGroupInfo.getEntityGroupNameAsString()
+ " but "
+ "this table is disabled, triggering close of entityGroup");
unassign(entityGroupInfo);
}
}
}
} finally {
lock.unlock();
}
}
});
}
}
/**
* New unassigned node has been created.
*
* <p>
* This happens when an FSERVER begins the OPENING, SPLITTING or CLOSING of a
* entityGroup by creating a znode.
*
* <p>
* When this happens we must:
* <ol>
* <li>Watch the node for further children changed events</li>
* <li>Watch all new children for changed events</li>
* </ol>
*/
@Override
public void nodeChildrenChanged(String path) {
if (path.equals(watcher.assignmentZNode)) {
int wi = Math.abs(path.hashCode() % zkEventWorkers.length);
zkEventWorkers[wi].submit(new Runnable() {
@Override
public void run() {
try {
// Just make sure we see the changes for the new znodes
List<String> children = ZKUtil.listChildrenAndWatchForNewChildren(
watcher, watcher.assignmentZNode);
if (children != null) {
for (String child : children) {
// if entityGroup is in transition, we already have a watch
// on it, so no need to watch it again. So, as I know for now,
// this is needed to watch splitting nodes only.
if (!entityGroupStates.isEntityGroupInTransition(child)) {
ZKUtil.watchAndCheckExists(watcher,
ZKUtil.joinZNode(watcher.assignmentZNode, child));
}
}
}
} catch (KeeperException e) {
server.abort("Unexpected ZK exception reading unassigned children",
e);
}
}
});
}
}
/**
* Marks the entityGroup as online. Removes it from entityGroups in transition
* and updates the in-memory assignment information.
* <p>
* Used when a entityGroup has been successfully opened on a entityGroup
* server.
*
* @param entityGroupInfo
* @param sn
*/
void entityGroupOnline(EntityGroupInfo entityGroupInfo, ServerName sn) {
if (!serverManager.isServerOnline(sn)) {
LOG.warn("A entityGroup was opened on a dead server, ServerName=" + sn
+ ", entityGroup=" + entityGroupInfo.getEncodedName());
}
entityGroupStates.entityGroupOnline(entityGroupInfo, sn);
// Remove plan if one.
clearEntityGroupPlan(entityGroupInfo);
// Add the server to serversInUpdatingTimer
addToServersInUpdatingTimer(sn);
}
/**
* Pass the assignment event to a worker for processing. Each worker is a
* single thread executor service. The reason for just one thread is to make
* sure all events for a given entityGroup are processed in order.
*
* @param path
*/
private void handleAssignmentEvent(final String path) {
if (path.startsWith(watcher.assignmentZNode)) {
int wi = Math.abs(path.hashCode() % zkEventWorkers.length);
zkEventWorkers[wi].submit(new Runnable() {
@Override
public void run() {
try {
Stat stat = new Stat();
byte[] data = ZKAssign.getDataAndWatch(watcher, path, stat);
if (data == null)
return;
EntityGroupTransaction rt = EntityGroupTransaction.parseFrom(data);
handleEntityGroup(rt, stat.getVersion());
} catch (KeeperException e) {
server.abort(
"Unexpected ZK exception reading unassigned node data", e);
} catch (DeserializationException e) {
server.abort("Unexpected exception deserializing node data", e);
}
}
});
}
}
/**
* Add the server to the set serversInUpdatingTimer, then {@link TimerUpdater}
* will update timers for this server in background
*
* @param sn
*/
private void addToServersInUpdatingTimer(final ServerName sn) {
this.serversInUpdatingTimer.add(sn);
}
/**
* Touch timers for all entityGroups in transition that have the passed
* <code>sn</code> in common. Call this method whenever a server checks in.
* Doing so helps the case where a new entityGroupserver has joined the
* cluster and its been given 1k entityGroups to open. If this method is
* tickled every time the entityGroup reports in a successful open then the
* 1k-th entityGroup won't be timed out just because its sitting behind the
* open of 999 other entityGroups. This method is NOT used as part of bulk
* assign -- there we have a different mechanism for extending the
* entityGroups in transition timer (we turn it off temporarily -- because
* there is no entityGroupplan involved when bulk assigning.
*
* @param sn
*/
private void updateTimers(final ServerName sn) {
if (sn == null)
return;
// This loop could be expensive.
// First make a copy of current entityGroupPlan rather than hold sync while
// looping because holding sync can cause deadlock. Its ok in this loop
// if the Map we're going against is a little stale
List<Map.Entry<String, EntityGroupPlan>> rps;
synchronized (this.entityGroupPlans) {
rps = new ArrayList<Map.Entry<String, EntityGroupPlan>>(
entityGroupPlans.entrySet());
}
for (Map.Entry<String, EntityGroupPlan> e : rps) {
if (e.getValue() != null && e.getKey() != null
&& sn.equals(e.getValue().getDestination())) {
EntityGroupState entityGroupState = entityGroupStates
.getEntityGroupTransitionState(e.getKey());
if (entityGroupState != null) {
entityGroupState.updateTimestampToNow();
}
}
}
}
/**
* Marks the entityGroup as offline. Removes it from entityGroups in
* transition and removes in-memory assignment information.
* <p>
* Used when a entityGroup has been closed and should remain closed.
*
* @param entityGroupInfo
*/
public void entityGroupOffline(final EntityGroupInfo entityGroupInfo) {
entityGroupStates.entityGroupOffline(entityGroupInfo);
// remove the entityGroup plan as well just in case.
clearEntityGroupPlan(entityGroupInfo);
}
public void offlineDisabledEntityGroup(EntityGroupInfo entityGroupInfo) {
// Disabling so should not be reassigned, just delete the CLOSED node
LOG.debug("Table being disabled so deleting ZK node and removing from "
+ "entityGroups in transition, skipping assignment of entityGroup "
+ entityGroupInfo.getEntityGroupNameAsString());
try {
if (!ZKAssign.deleteClosedNode(watcher, entityGroupInfo.getEncodedName())) {
// Could also be in OFFLINE mode
ZKAssign.deleteOfflineNode(watcher, entityGroupInfo.getEncodedName());
}
} catch (KeeperException.NoNodeException nne) {
LOG.debug("Tried to delete closed node for " + entityGroupInfo
+ " but it " + "does not exist so just offlining");
} catch (KeeperException e) {
this.server.abort("Error deleting CLOSED node in ZK", e);
}
entityGroupOffline(entityGroupInfo);
}
// Assignment methods
/**
* Assigns the specified entityGroup.
* <p>
* If a EntityGroupPlan is available with a valid destination then it will be
* used to determine what server entityGroup is assigned to. If no
* EntityGroupPlan is available, entityGroup will be assigned to a random
* available server.
* <p>
* Updates the EntityGroupState and sends the OPEN RPC.
* <p>
* This will only succeed if the entityGroup is in transition and in a CLOSED
* or OFFLINE state or not in transition (in-memory not zk), and of course,
* the chosen server is up and running (It may have just crashed!). If the
* in-memory checks pass, the zk node is forced to OFFLINE before assigning.
*
* @param entityGroup
* server to be assigned
* @param setOfflineInZK
* whether ZK node should be created/transitioned to an OFFLINE state
* before assigning the entityGroup
*/
public void assign(EntityGroupInfo entityGroup, boolean setOfflineInZK) {
assign(entityGroup, setOfflineInZK, false);
}
/**
* Use care with forceNewPlan. It could cause double assignment.
*/
public void assign(EntityGroupInfo entityGroup, boolean setOfflineInZK,
boolean forceNewPlan) {
if (!setOfflineInZK && isDisabledorDisablingEntityGroupInEGIT(entityGroup)) {
return;
}
if (this.serverManager.isClusterShutdown()) {
LOG.info("Cluster shutdown is set; skipping assign of "
+ entityGroup.getEntityGroupNameAsString());
return;
}
String encodedName = entityGroup.getEncodedName();
Lock lock = locker.acquireLock(encodedName);
try {
EntityGroupState state = forceEntityGroupStateToOffline(entityGroup,
forceNewPlan);
if (state != null) {
assign(state, setOfflineInZK, forceNewPlan);
}
} finally {
lock.unlock();
}
}
/**
* Bulk assign entityGroups to <code>destination</code>.
*
* @param destination
* @param entityGroups
* EntityGroups to assign.
* @return true if successful
*/
boolean assign(final ServerName destination,
final List<EntityGroupInfo> entityGroups) {
int entityGroupCount = entityGroups.size();
if (entityGroupCount == 0) {
return true;
}
LOG.debug("Bulk assigning " + entityGroupCount + " entityGroup(s) to "
+ destination.toString());
Set<String> encodedNames = new HashSet<String>(entityGroupCount);
for (EntityGroupInfo entityGroup : entityGroups) {
encodedNames.add(entityGroup.getEncodedName());
}
List<EntityGroupInfo> failedToOpenEntityGroups = new ArrayList<EntityGroupInfo>();
Map<String, Lock> locks = locker.acquireLocks(encodedNames);
try {
AtomicInteger counter = new AtomicInteger(0);
Map<String, Integer> offlineNodesVersions = new ConcurrentHashMap<String, Integer>();
OfflineCallback cb = new OfflineCallback(watcher, destination, counter,
offlineNodesVersions);
Map<String, EntityGroupPlan> plans = new HashMap<String, EntityGroupPlan>(
entityGroups.size());
List<EntityGroupState> states = new ArrayList<EntityGroupState>(
entityGroups.size());
for (EntityGroupInfo entityGroup : entityGroups) {
String encodedEntityGroupName = entityGroup.getEncodedName();
EntityGroupState state = forceEntityGroupStateToOffline(entityGroup,
true);
if (state != null && asyncSetOfflineInZooKeeper(state, cb, destination)) {
EntityGroupPlan plan = new EntityGroupPlan(entityGroup,
state.getServerName(), destination);
plans.put(encodedEntityGroupName, plan);
states.add(state);
} else {
LOG.warn("failed to force entityGroup state to offline or "
+ "failed to set it offline in ZK, will reassign later: "
+ entityGroup);
failedToOpenEntityGroups.add(entityGroup); // assign individually
// later
Lock lock = locks.remove(encodedEntityGroupName);
lock.unlock();
}
}
// Wait until all unassigned nodes have been put up and watchers set.
int total = states.size();
for (int oldCounter = 0; !server.isStopped();) {
int count = counter.get();
if (oldCounter != count) {
LOG.info(destination.toString() + " unassigned znodes=" + count
+ " of total=" + total);
oldCounter = count;
}
if (count >= total)
break;
Threads.sleep(5);
}
if (server.isStopped()) {
return false;
}
// Add entityGroup plans, so we can updateTimers when one entityGroup is
// opened so
// that unnecessary timeout on EGIT is reduced.
this.addPlans(plans);
List<EntityGroupInfo> entityGroupOpenInfos = new ArrayList<EntityGroupInfo>(
states.size());
for (EntityGroupState state : states) {
EntityGroupInfo entityGroup = state.getEntityGroup();
String encodedEntityGroupName = entityGroup.getEncodedName();
Integer nodeVersion = offlineNodesVersions.get(encodedEntityGroupName);
if (nodeVersion == null || nodeVersion.intValue() == -1) {
LOG.warn("failed to offline in zookeeper: " + entityGroup);
failedToOpenEntityGroups.add(entityGroup); // assign individually
// later
Lock lock = locks.remove(encodedEntityGroupName);
lock.unlock();
} else {
entityGroupStates.updateEntityGroupState(entityGroup,
EntityGroupState.State.PENDING_OPEN, destination);
entityGroupOpenInfos.add(entityGroup);
}
}
// Move on to open entityGroups.
try {
// Send OPEN RPC. If it fails on a IOE or RemoteException, the
// TimeoutMonitor will pick up the pieces.
long maxWaitTime = System.currentTimeMillis()
+ this.server.getConfiguration().getLong(
"wasp.entityGroupserver.rpc.startup.waittime", 60000);
for (int i = 1; i <= maximumAttempts && !server.isStopped(); i++) {
try {
List<EntityGroupOpeningState> entityGroupOpeningStateList = serverManager
.sendEntityGroupsOpen(destination, entityGroupOpenInfos);
if (entityGroupOpeningStateList == null) {
// Failed getting RPC connection to this server
return false;
}
for (int k = 0, n = entityGroupOpeningStateList.size(); k < n; k++) {
EntityGroupOpeningState openingState = entityGroupOpeningStateList
.get(k);
if (openingState != EntityGroupOpeningState.OPENED) {
EntityGroupInfo entityGroup = entityGroupOpenInfos.get(k);
if (openingState == EntityGroupOpeningState.ALREADY_OPENED) {
processAlreadyOpenedEntityGroup(entityGroup, destination);
} else if (openingState == EntityGroupOpeningState.FAILED_OPENING) {
// Failed opening this entityGroup, reassign it later
failedToOpenEntityGroups.add(entityGroup);
} else {
LOG.warn("THIS SHOULD NOT HAPPEN: unknown opening state "
+ openingState + " in assigning entityGroup "
+ entityGroup);
}
}
}
break;
} catch (IOException e) {
if (e instanceof RemoteException) {
e = ((RemoteException) e).unwrapRemoteException();
}
if (e instanceof FServerStoppedException) {
LOG.warn("The fserver was shut down, ", e);
// No need to retry, the entityGroup server is a goner.
return false;
} else if (e instanceof ServerNotRunningYetException) {
long now = System.currentTimeMillis();
if (now < maxWaitTime) {
LOG.debug("Server is not yet up; waiting up to "
+ (maxWaitTime - now) + "ms", e);
Thread.sleep(100);
i--; // reset the try count
continue;
}
} else if (e instanceof java.net.SocketTimeoutException
&& this.serverManager.isServerOnline(destination)) {
// In case socket is timed out and the entityGroup server is still
// online,
// the openEntityGroup RPC could have been accepted by the server
// and
// just the response didn't go through. So we will retry to
// open the entityGroup on the same server.
if (LOG.isDebugEnabled()) {
LOG.debug("Bulk assigner openEntityGroup() to " + destination
+ " has timed out, but the entityGroups might"
+ " already be opened on it.", e);
}
continue;
}
throw e;
}
}
} catch (IOException e) {
// Can be a socket timeout, EOF, NoRouteToHost, etc
LOG.info("Unable to communicate with the fserver in order"
+ " to assign entityGroups", e);
return false;
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
} finally {
for (Lock lock : locks.values()) {
lock.unlock();
}
}
if (!failedToOpenEntityGroups.isEmpty()) {
for (EntityGroupInfo entityGroup : failedToOpenEntityGroups) {
invokeAssign(entityGroup);
}
}
LOG.debug("Bulk assigning done for " + destination.toString());
return true;
}
/**
* Send CLOSE RPC if the server is online, otherwise, offline the entityGroup
*/
private void unassign(final EntityGroupInfo entityGroup,
final EntityGroupState state, final int versionOfClosingNode,
final ServerName dest, final boolean transitionInZK) {
// Send CLOSE RPC
ServerName server = state.getServerName();
// ClosedEntityGrouphandler can remove the server from this.entityGroups
if (!serverManager.isServerOnline(server)) {
// delete the node. if no node exists need not bother.
deleteClosingOrClosedNode(entityGroup);
entityGroupOffline(entityGroup);
return;
}
for (int i = 1; i <= this.maximumAttempts; i++) {
try {
if (serverManager.sendEntityGroupClose(server, entityGroup,
versionOfClosingNode, dest, transitionInZK)) {
LOG.debug("Sent CLOSE to " + server + " for entityGroup "
+ entityGroup.getEntityGroupNameAsString());
return;
}
// This never happens. Currently entityGroupserver close always return
// true.
LOG.warn("Server " + server
+ " entityGroup CLOSE RPC returned false for "
+ entityGroup.getEntityGroupNameAsString());
} catch (Throwable t) {
if (t instanceof RemoteException) {
t = ((RemoteException) t).unwrapRemoteException();
}
if (t instanceof NotServingEntityGroupException) {
deleteClosingOrClosedNode(entityGroup);
entityGroupOffline(entityGroup);
return;
} else if (t instanceof EntityGroupAlreadyInTransitionException) {
// FSERVER is already processing this entityGroup, only need to update
// the timestamp
LOG.debug("update " + state + " the timestamp.");
state.updateTimestampToNow();
}
LOG.info(
"Server " + server + " returned " + t + " for "
+ entityGroup.getEntityGroupNameAsString() + ", try=" + i
+ " of " + this.maximumAttempts, t);
// Presume retry or server will expire.
}
}
}
/**
* Set entityGroup to OFFLINE unless it is opening and forceNewPlan is false.
*/
private EntityGroupState forceEntityGroupStateToOffline(
final EntityGroupInfo entityGroup, final boolean forceNewPlan) {
EntityGroupState state = entityGroupStates.getEntityGroupState(entityGroup);
if (state == null) {
LOG.warn("Assigning a entityGroup not in entityGroup states: "
+ entityGroup);
state = entityGroupStates.createEntityGroupState(entityGroup);
} else {
switch (state.getState()) {
case OPEN:
case OPENING:
case PENDING_OPEN:
if (!forceNewPlan) {
LOG.debug("Attempting to assign entityGroup " + entityGroup
+ " but it is already in transition: " + state);
return null;
}
case CLOSING:
case PENDING_CLOSE:
unassign(entityGroup, state, -1, null, false);
case CLOSED:
if (!state.isOffline()) {
LOG.debug("Forcing OFFLINE; was=" + state);
state = entityGroupStates.updateEntityGroupState(entityGroup,
EntityGroupState.State.OFFLINE);
}
case OFFLINE:
break;
default:
LOG.error("Trying to assign entityGroup " + entityGroup
+ ", which is in state " + state);
return null;
}
}
return state;
}
/**
* Caller must hold lock on the passed <code>state</code> object.
*
* @param state
* @param setOfflineInZK
* @param forceNewPlan
*/
private void assign(EntityGroupState state, final boolean setOfflineInZK,
final boolean forceNewPlan) {
EntityGroupState currentState = state;
int versionOfOfflineNode = -1;
EntityGroupPlan plan = null;
long maxEntityGroupServerStartupWaitTime = -1;
EntityGroupInfo entityGroup = state.getEntityGroup();
for (int i = 1; i <= maximumAttempts && !server.isStopped(); i++) {
if (plan == null) { // Get a server for the entityGroup at first
plan = getEntityGroupPlan(entityGroup, forceNewPlan);
}
if (plan == null) {
LOG.debug("Unable to determine a plan to assign " + entityGroup);
this.timeoutMonitor.setAllEntityGroupServersOffline(true);
return; // Should get reassigned later when EGIT times out.
}
if (setOfflineInZK && versionOfOfflineNode == -1) {
// get the version of the znode after setting it to OFFLINE.
// versionOfOfflineNode will be -1 if the znode was not set to OFFLINE
versionOfOfflineNode = setOfflineInZooKeeper(currentState,
plan.getDestination());
if (versionOfOfflineNode != -1) {
if (isDisabledorDisablingEntityGroupInEGIT(entityGroup)) {
return;
}
// In case of assignment from EnableTableHandler table state is
// ENABLING. Any how
// EnableTableHandler will set ENABLED after assigning all the table
// entityGroups. If we
// try to set to ENABLED directly then client API may think table is
// enabled.
// When we have a case such as all the entityGroups are added directly
// into .META. and we call
// assignEntityGroup then we need to make the table ENABLED. Hence in
// such case the table
// will not be in ENABLING or ENABLED state.
String tableName = entityGroup.getTableNameAsString();
if (!zkTable.isEnablingTable(tableName)
&& !zkTable.isEnabledTable(tableName)) {
LOG.debug("Setting table " + tableName + " to ENABLED state.");
setEnabledTable(tableName);
}
}
}
if (setOfflineInZK && versionOfOfflineNode == -1) {
return;
}
if (this.server.isStopped()) {
LOG.debug("Server stopped; skipping assign of " + entityGroup);
return;
}
try {
LOG.info("Assigning entityGroup "
+ entityGroup.getEntityGroupNameAsString() + " to "
+ plan.getDestination().toString());
// Transition EntityGroupState to PENDING_OPEN
currentState = entityGroupStates.updateEntityGroupState(entityGroup,
EntityGroupState.State.PENDING_OPEN, plan.getDestination());
// Send OPEN RPC. This can fail if the server on other end is is not up.
// Pass the version that was obtained while setting the node to OFFLINE.
EntityGroupOpeningState entityGroupOpenState = serverManager
.sendEntityGroupOpen(plan.getDestination(), entityGroup,
versionOfOfflineNode);
if (entityGroupOpenState == EntityGroupOpeningState.ALREADY_OPENED) {
processAlreadyOpenedEntityGroup(entityGroup, plan.getDestination());
} else if (entityGroupOpenState == EntityGroupOpeningState.FAILED_OPENING) {
// Failed opening this entityGroup
throw new Exception("Get entityGroupOpeningState="
+ entityGroupOpenState);
}
break;
} catch (Throwable t) {
if (t instanceof RemoteException) {
t = ((RemoteException) t).unwrapRemoteException();
}
boolean entityGroupAlreadyInTransitionException = false;
boolean serverNotRunningYet = false;
boolean socketTimedOut = false;
if (t instanceof EntityGroupAlreadyInTransitionException) {
entityGroupAlreadyInTransitionException = true;
if (LOG.isDebugEnabled()) {
LOG.debug("Failed assignment in: " + plan.getDestination()
+ " due to " + t.getMessage());
}
} else if (t instanceof ServerNotRunningYetException) {
if (maxEntityGroupServerStartupWaitTime < 0) {
maxEntityGroupServerStartupWaitTime = System.currentTimeMillis()
+ this.server.getConfiguration().getLong(
"wasp.entityGroupserver.rpc.startup.waittime", 60000);
}
try {
long now = System.currentTimeMillis();
if (now < maxEntityGroupServerStartupWaitTime) {
LOG.debug("Server is not yet up; waiting up to "
+ (maxEntityGroupServerStartupWaitTime - now) + "ms", t);
serverNotRunningYet = true;
Thread.sleep(100);
i--; // reset the try count
} else {
LOG.debug("Server is not up for a while; try a new one", t);
}
} catch (InterruptedException ie) {
LOG.warn(
"Failed to assign " + entityGroup.getEntityGroupNameAsString()
+ " since interrupted", ie);
Thread.currentThread().interrupt();
return;
}
} else if (t instanceof java.net.SocketTimeoutException
&& this.serverManager.isServerOnline(plan.getDestination())) {
// In case socket is timed out and the entityGroup server is still
// online,
// the openEntityGroup RPC could have been accepted by the server and
// just the response didn't go through. So we will retry to
// open the entityGroup on the same server to avoid possible
// double assignment.
socketTimedOut = true;
if (LOG.isDebugEnabled()) {
LOG.debug(
"Call openEntityGroup() to " + plan.getDestination()
+ " has timed out when trying to assign "
+ entityGroup.getEntityGroupNameAsString()
+ ", but the entityGroup might already be opened on "
+ plan.getDestination() + ".", t);
}
}
LOG.warn(
"Failed assignment of "
+ entityGroup.getEntityGroupNameAsString()
+ " to "
+ plan.getDestination()
+ ", trying to assign "
+ (entityGroupAlreadyInTransitionException
|| serverNotRunningYet || socketTimedOut ? "to the same entityGroup server because of EntityGroupAlreadyInTransitionException"
+ "/ServerNotRunningYetException/SocketTimeoutException;"
: "elsewhere instead; ") + "try=" + i + " of "
+ this.maximumAttempts, t);
if (i == this.maximumAttempts) {
// Don't reset the entityGroup state or get a new plan any more.
// This is the last try.
continue;
}
// If entityGroup opened on destination of present plan, reassigning to
// new
// FSERVER may cause double assignments. In case of
// EntityGroupAlreadyInTransitionException
// reassigning to same FSERVER.
EntityGroupPlan newPlan = plan;
if (!(entityGroupAlreadyInTransitionException || serverNotRunningYet || socketTimedOut)) {
// Force a new plan and reassign. Will return null if no servers.
// The new plan could be the same as the existing plan since we don't
// exclude the server of the original plan, which should not be
// excluded since it could be the only server up now.
newPlan = getEntityGroupPlan(entityGroup, true);
}
if (newPlan == null) {
this.timeoutMonitor.setAllEntityGroupServersOffline(true);
LOG.warn("Unable to find a viable location to assign entityGroup "
+ entityGroup.getEntityGroupNameAsString());
return;
}
if (plan != newPlan
&& !plan.getDestination().equals(newPlan.getDestination())) {
// Clean out plan we failed execute and one that doesn't look like
// it'll
// succeed anyways; we need a new plan!
// Transition back to OFFLINE
currentState = entityGroupStates.updateEntityGroupState(entityGroup,
EntityGroupState.State.OFFLINE);
versionOfOfflineNode = -1;
plan = newPlan;
}
}
}
}
private void processAlreadyOpenedEntityGroup(EntityGroupInfo entityGroup,
ServerName sn) {
// Remove entityGroup from in-memory transition and unassigned node from ZK
// While trying to enable the table the entityGroups of the table were
// already enabled.
LOG.debug("ALREADY_OPENED entityGroup "
+ entityGroup.getEntityGroupNameAsString() + " to " + sn);
String encodedEntityGroupName = entityGroup.getEncodedName();
try {
ZKAssign.deleteOfflineNode(watcher, encodedEntityGroupName);
} catch (KeeperException.NoNodeException e) {
if (LOG.isDebugEnabled()) {
LOG.debug("The unassigned node " + encodedEntityGroupName
+ " doesnot exist.");
}
} catch (KeeperException e) {
server.abort(
"Error deleting OFFLINED node in ZK for transition ZK node ("
+ encodedEntityGroupName + ")", e);
}
entityGroupStates.entityGroupOnline(entityGroup, sn);
}
private boolean isDisabledorDisablingEntityGroupInEGIT(
final EntityGroupInfo entityGroup) {
String tableName = entityGroup.getTableNameAsString();
boolean disabled = this.zkTable.isDisabledTable(tableName);
if (disabled || this.zkTable.isDisablingTable(tableName)) {
LOG.info("Table " + tableName + (disabled ? " disabled;" : " disabling;")
+ " skipping assign of " + entityGroup.getEntityGroupNameAsString());
offlineDisabledEntityGroup(entityGroup);
return true;
}
return false;
}
/**
* Set entityGroup as OFFLINED up in zookeeper
*
* @param state
* @return the version of the offline node if setting of the OFFLINE node was
* successful, -1 otherwise.
*/
private int setOfflineInZooKeeper(final EntityGroupState state,
final ServerName destination) {
if (!state.isClosed() && !state.isOffline()) {
String msg = "Unexpected state : " + state
+ " .. Cannot transit it to OFFLINE.";
this.server.abort(msg, new IllegalStateException(msg));
return -1;
}
entityGroupStates.updateEntityGroupState(state.getEntityGroup(),
EntityGroupState.State.OFFLINE);
int versionOfOfflineNode = -1;
try {
// get the version after setting the znode to OFFLINE
versionOfOfflineNode = ZKAssign.createOrForceNodeOffline(watcher,
state.getEntityGroup(), destination);
if (versionOfOfflineNode == -1) {
LOG.warn("Attempted to create/force node into OFFLINE state before "
+ "completing assignment but failed to do so for " + state);
return -1;
}
} catch (KeeperException e) {
server.abort("Unexpected ZK exception creating/setting node OFFLINE", e);
return -1;
}
return versionOfOfflineNode;
}
/**
* @param entityGroup
* the entityGroup to assign
* @return Plan for passed <code>entityGroup</code> (If none currently, it
* creates one or if no servers to assign, it returns null).
*/
private EntityGroupPlan getEntityGroupPlan(final EntityGroupInfo entityGroup,
final boolean forceNewPlan) {
return getEntityGroupPlan(entityGroup, null, forceNewPlan);
}
/**
* @param entityGroup
* the entityGroup to assign
* @param serverToExclude
* Server to exclude (we know its bad). Pass null if all servers are
* thought to be assignable.
* @param forceNewPlan
* If true, then if an existing plan exists, a new plan will be
* generated.
* @return Plan for passed <code>entityGroup</code> (If none currently, it
* creates one or if no servers to assign, it returns null).
*/
private EntityGroupPlan getEntityGroupPlan(final EntityGroupInfo entityGroup,
final ServerName serverToExclude, final boolean forceNewPlan) {
// Pickup existing plan or make a new one
final String encodedName = entityGroup.getEncodedName();
final List<ServerName> destServers = serverManager
.createDestinationServersList(serverToExclude);
if (destServers.isEmpty()) {
LOG.warn("Can't move the entityGroup " + encodedName
+ ", there is no destination server available.");
return null;
}
EntityGroupPlan randomPlan = null;
boolean newPlan = false;
EntityGroupPlan existingPlan = null;
synchronized (this.entityGroupPlans) {
existingPlan = this.entityGroupPlans.get(encodedName);
if (existingPlan != null && existingPlan.getDestination() != null) {
LOG.debug("Found an existing plan for "
+ entityGroup.getEntityGroupNameAsString()
+ " destination server is " + existingPlan.getDestination());
}
if (forceNewPlan || existingPlan == null
|| existingPlan.getDestination() == null
|| !destServers.contains(existingPlan.getDestination())) {
newPlan = true;
randomPlan = new EntityGroupPlan(entityGroup, null,
balancer.randomAssignment(entityGroup, destServers));
this.entityGroupPlans.put(encodedName, randomPlan);
}
}
if (newPlan) {
LOG.debug("No previous transition plan was found (or we are ignoring "
+ "an existing plan) for " + entityGroup.getEntityGroupNameAsString()
+ " so generated a random one; " + randomPlan + "; "
+ serverManager.countOfFServers() + " (online="
+ serverManager.getOnlineServers().size() + ", available="
+ destServers.size() + ") available servers");
return randomPlan;
}
LOG.debug("Using pre-existing plan for entityGroup "
+ entityGroup.getEntityGroupNameAsString() + "; plan=" + existingPlan);
return existingPlan;
}
/**
* Unassign the list of entityGroups. Configuration knobs:
* wasp.bulk.waitbetween.reopen indicates the number of milliseconds to wait
* before unassigning another entityGroup from this entityGroup server
*
* @param entityGroups
* @throws InterruptedException
*/
public void unassign(List<EntityGroupInfo> entityGroups) {
int waitTime = this.server.getConfiguration().getInt(
"wasp.bulk.waitbetween.reopen", 0);
for (EntityGroupInfo entityGroup : entityGroups) {
if (entityGroupStates.isEntityGroupInTransition(entityGroup))
continue;
unassign(entityGroup, false);
while (entityGroupStates.isEntityGroupInTransition(entityGroup)) {
try {
Thread.sleep(10);
} catch (InterruptedException e) {
// Do nothing, continue
}
}
if (waitTime > 0)
try {
Thread.sleep(waitTime);
} catch (InterruptedException e) {
// Do nothing, continue
}
}
}
/**
* Unassigns the specified entityGroup.
* <p>
* Updates the EntityGroupState and sends the CLOSE RPC unless entityGroup is
* being split by entityGroupserver; then the unassign fails (silently)
* because we presume the entityGroup being unassigned no longer exists (its
* been split out of existence). TODO: What to do if split fails and is rolled
* back and parent is revivified?
* <p>
* If a EntityGroupPlan is already set, it will remain.
*
* @param entityGroup
* server to be unassigned
*/
public void unassign(EntityGroupInfo entityGroup) {
unassign(entityGroup, false);
}
/**
* Unassigns the specified entityGroup.
* <p>
* Updates the EntityGroupState and sends the CLOSE RPC unless entityGroup is
* being split by entityGroupserver; then the unassign fails (silently)
* because we presume the entityGroup being unassigned no longer exists (its
* been split out of existence). TODO: What to do if split fails and is rolled
* back and parent is revivified?
* <p>
* If a EntityGroupPlan is already set, it will remain.
*
* @param entityGroup
* server to be unassigned
* @param force
* if entityGroup should be closed even if already closing
*/
public void unassign(EntityGroupInfo entityGroup, boolean force,
ServerName dest) {
// TODO: Method needs refactoring. Ugly buried returns throughout. Beware!
LOG.debug("Starting unassignment of entityGroup "
+ entityGroup.getEntityGroupNameAsString() + " (offlining)");
String encodedName = entityGroup.getEncodedName();
// Grab the state of this entityGroup and synchronize on it
int versionOfClosingNode = -1;
// We need a lock here as we're going to do a put later and we don't want
// multiple states
// creation
ReentrantLock lock = locker.acquireLock(encodedName);
EntityGroupState state = entityGroupStates
.getEntityGroupTransitionState(encodedName);
try {
if (state == null) {
// Create the znode in CLOSING state
try {
state = entityGroupStates.getEntityGroupState(entityGroup);
if (state == null || state.getServerName() == null) {
// We don't know where the entityGroup is, offline it.
// No need to send CLOSE RPC
entityGroupOffline(entityGroup);
return;
}
versionOfClosingNode = ZKAssign.createNodeClosing(watcher,
entityGroup, state.getServerName());
if (versionOfClosingNode == -1) {
LOG.debug("Attempting to unassign entityGroup "
+ entityGroup.getEntityGroupNameAsString()
+ " but ZK closing node " + "can't be created.");
return;
}
} catch (KeeperException ee) {
Exception e = ee;
if (e instanceof NodeExistsException) {
// Handle race between master initiated close and entityGroupserver
// orchestrated splitting. See if existing node is in a
// SPLITTING or SPLIT state. If so, the entityGroupserver started
// an op on node before we could get our CLOSING in. Deal.
NodeExistsException nee = (NodeExistsException) e;
String path = nee.getPath();
try {
if (isSplitOrSplitting(path)) {
LOG.debug(path
+ " is SPLIT or SPLITTING; "
+ "skipping unassign because entityGroup no longer exists -- its split");
return;
}
} catch (KeeperException.NoNodeException ke) {
LOG.warn("Failed getData on SPLITTING/SPLIT at " + path
+ "; presuming split and that the entityGroup to unassign, "
+ encodedName + ", no longer exists -- confirm", ke);
return;
} catch (KeeperException ke) {
LOG.error("Unexpected zk state", ke);
} catch (DeserializationException de) {
LOG.error("Failed parse", de);
}
}
// If we get here, don't understand whats going on -- abort.
server.abort("Unexpected ZK exception creating node CLOSING", e);
return;
}
state = entityGroupStates.updateEntityGroupState(entityGroup,
EntityGroupState.State.PENDING_CLOSE);
} else if (force && (state.isPendingClose() || state.isClosing())) {
LOG.debug("Attempting to unassign entityGroup "
+ entityGroup.getEntityGroupNameAsString() + " which is already "
+ state.getState() + " but forcing to send a CLOSE RPC again ");
state.updateTimestampToNow();
} else {
LOG.debug("Attempting to unassign entityGroup "
+ entityGroup.getEntityGroupNameAsString() + " but it is "
+ "already in transition (" + state.getState() + ", force=" + force
+ ")");
return;
}
unassign(entityGroup, state, versionOfClosingNode, dest, true);
} finally {
lock.unlock();
}
}
public void unassign(EntityGroupInfo entityGroup, boolean force) {
unassign(entityGroup, force, null);
}
/**
*
* @param entityGroup
* entityGroupinfo of znode to be deleted.
*/
public void deleteClosingOrClosedNode(EntityGroupInfo entityGroup) {
try {
if (!ZKAssign.deleteNode(watcher, entityGroup.getEncodedName(),
EventHandler.EventType.M_ZK_ENTITYGROUP_CLOSING)) {
boolean deleteNode = ZKAssign.deleteNode(watcher,
entityGroup.getEncodedName(),
EventHandler.EventType.FSERVER_ZK_ENTITYGROUP_CLOSED);
// TODO : We don't abort if the delete node returns false. Is there any
// such corner case?
if (!deleteNode) {
LOG.error("The deletion of the CLOSED node for the entityGroup "
+ entityGroup.getEncodedName() + " returned " + deleteNode);
}
}
} catch (NoNodeException e) {
LOG.debug("CLOSING/CLOSED node for the entityGroup "
+ entityGroup.getEncodedName() + " already deleted");
} catch (KeeperException ke) {
server.abort(
"Unexpected ZK exception deleting node CLOSING/CLOSED for the entityGroup "
+ entityGroup.getEncodedName(), ke);
return;
}
}
/**
* @param path
* @return True if znode is in SPLIT or SPLITTING state.
* @throws org.apache.zookeeper.KeeperException
* Can happen if the znode went away in meantime.
* @throws com.alibaba.wasp.DeserializationException
*/
private boolean isSplitOrSplitting(final String path) throws KeeperException,
DeserializationException {
boolean result = false;
// This may fail if the SPLIT or SPLITTING znode gets cleaned up before we
// can get data from it.
byte[] data = ZKAssign.getData(watcher, path);
if (data == null)
return false;
EntityGroupTransaction rt = EntityGroupTransaction.parseFrom(data);
switch (rt.getEventType()) {
case FSERVER_ZK_ENTITYGROUP_SPLIT:
case FSERVER_ZK_ENTITYGROUP_SPLITTING:
result = true;
break;
default:
break;
}
return result;
}
/**
* Waits until the specified entityGroup has completed assignment.
* <p>
* If the entityGroup is already assigned, returns immediately. Otherwise,
* method blocks until the entityGroup is assigned.
*
* @param entityGroupInfo
* entityGroup to wait on assignment for
* @throws InterruptedException
*/
public void waitForAssignment(EntityGroupInfo entityGroupInfo)
throws InterruptedException {
while (!this.server.isStopped()
&& !entityGroupStates.isEntityGroupAssigned(entityGroupInfo)) {
// We should receive a notification, but it's
// better to have a timeout to recheck the condition here:
// it lowers the impact of a race condition if any
entityGroupStates.waitForUpdate(100);
}
}
/**
* Assigns specified entityGroups retaining assignments, if any.
* <p>
* This is a synchronous call and will return once every entityGroup has been
* assigned. If anything fails, an exception is thrown
*
* @throws InterruptedException
* @throws java.io.IOException
*/
public void assign(Map<EntityGroupInfo, ServerName> entityGroups)
throws IOException, InterruptedException {
if (entityGroups == null || entityGroups.isEmpty()) {
return;
}
List<ServerName> servers = serverManager.createDestinationServersList();
if (servers == null || servers.isEmpty()) {
throw new IOException(
"Found no destination server to assign entityGroup(s)");
}
// Reuse existing assignment info
Map<ServerName, List<EntityGroupInfo>> bulkPlan = balancer
.retainAssignment(entityGroups, servers);
LOG.info("Bulk assigning " + entityGroups.size()
+ " entityGroup(s) across " + servers.size()
+ " server(s), retainAssignment=true");
BulkAssigner ba = new GeneralBulkAssigner(this.server, bulkPlan, this);
ba.bulkAssign();
LOG.info("Bulk assigning done");
}
/**
* Assigns specified entityGroups round robin, if any.
* <p>
* This is a synchronous call and will return once every entityGroup has been
* assigned. If anything fails, an exception is thrown
*
* @throws InterruptedException
* @throws java.io.IOException
*/
public void assign(List<EntityGroupInfo> entityGroups) throws IOException,
InterruptedException {
if (entityGroups == null || entityGroups.isEmpty()) {
return;
}
List<ServerName> servers = serverManager.createDestinationServersList();
if (servers == null || servers.isEmpty()) {
throw new IOException(
"Found no destination server to assign entityGroup(s)");
}
// Generate a round-robin bulk assignment plan
Map<ServerName, List<EntityGroupInfo>> bulkPlan = balancer
.roundRobinAssignment(entityGroups, servers);
LOG.info("Bulk assigning " + entityGroups.size()
+ " entityGroup(s) round-robin across " + servers.size() + " server(s)");
// Use fixed count thread pool assigning.
BulkAssigner ba = new GeneralBulkAssigner(this.server, bulkPlan, this);
ba.bulkAssign();
LOG.info("Bulk assigning done");
}
/**
* Assigns all user entityGroups, if any exist. Used during cluster startup.
* <p>
* This is a synchronous call and will return once every entityGroup has been
* assigned. If anything fails, an exception is thrown and the cluster should
* be shutdown.
*
* @throws InterruptedException
* @throws java.io.IOException
* @throws org.apache.zookeeper.KeeperException
*/
private void assignAllUserEntityGroups() throws IOException,
InterruptedException, KeeperException {
// Cleanup any existing ZK nodes and start watching
ZKAssign.deleteAllNodes(watcher);
ZKUtil.listChildrenAndWatchForNewChildren(this.watcher,
this.watcher.assignmentZNode);
failoverCleanupDone();
// Skip assignment for entityGroups of tables in DISABLING state because
// during clean cluster startup
// no FSERVER is alive and entityGroups map also doesn't have any
// information about the entityGroups.
// See HBASE-6281.
Set<String> disabledOrDisablingOrEnabling = ZKTable
.getDisabledOrDisablingTables(watcher);
disabledOrDisablingOrEnabling.addAll(ZKTable.getEnablingTables(watcher));
// Scan META for all user entityGroups, skipping any disabled tables
Map<EntityGroupInfo, ServerName> allEntityGroups = FMetaScanner.fullScan(
server.getConfiguration(), disabledOrDisablingOrEnabling, true);
if (allEntityGroups == null || allEntityGroups.isEmpty())
return;
// Determine what type of assignment to do on startup
boolean retainAssignment = server.getConfiguration().getBoolean(
"wasp.master.startup.retainassign", true);
if (retainAssignment) {
assign(allEntityGroups);
} else {
List<EntityGroupInfo> entityGroups = new ArrayList<EntityGroupInfo>(
allEntityGroups.keySet());
assign(entityGroups);
}
for (EntityGroupInfo egInfo : allEntityGroups.keySet()) {
String tableName = egInfo.getTableNameAsString();
if (!zkTable.isEnabledTable(tableName)) {
setEnabledTable(tableName);
}
}
}
/**
* Wait until no entityGroups in transition.
*
* @param timeout
* How long to wait.
* @return True if nothing in entityGroups in transition.
* @throws InterruptedException
*/
boolean waitUntilNoEntityGroupsInTransition(final long timeout)
throws InterruptedException {
// Blocks until there are no entityGroups in transition. It is possible that
// there
// are entityGroups in transition immediately after this returns but
// guarantees
// that if it returns without an exception that there was a period of time
// with no entityGroups in transition from the point-of-view of the
// in-memory
// state of the Master.
final long endTime = System.currentTimeMillis() + timeout;
while (!this.server.isStopped()
&& entityGroupStates.isEntityGroupsInTransition()
&& endTime > System.currentTimeMillis()) {
entityGroupStates.waitForUpdate(100);
}
return !entityGroupStates.isEntityGroupsInTransition();
}
/**
* Rebuild the list of user entityGroups and assignment information.
* <p>
* Returns a map of servers that are not found to be online and the
* entityGroups they were hosting.
*
* @return map of servers not online to their assigned entityGroups, as stored
* in META
* @throws java.io.IOException
*/
Map<ServerName, List<EntityGroupInfo>> rebuildUserEntityGroups()
throws IOException, KeeperException {
Set<String> enablingTables = ZKTable.getEnablingTables(watcher);
Set<String> disabledOrEnablingTables = ZKTable.getDisabledTables(watcher);
disabledOrEnablingTables.addAll(enablingTables);
Set<String> disabledOrDisablingOrEnabling = ZKTable
.getDisablingTables(watcher);
disabledOrDisablingOrEnabling.addAll(disabledOrEnablingTables);
// EntityGroup assignment from FMETA
List<Result> results = FMetaScanner.fullScan(server.getConfiguration());
// Get any new but slow to checkin entityGroup server that joined the
// cluster
Set<ServerName> onlineServers = serverManager.getOnlineServers().keySet();
// Map of offline servers and their entityGroups to be returned
Map<ServerName, List<EntityGroupInfo>> offlineServers = new TreeMap<ServerName, List<EntityGroupInfo>>();
// Iterate entityGroups in META
if (results != null) {
for (Result result : results) {
Pair<EntityGroupInfo, ServerName> entityGroup = EntityGroupInfo
.getEntityGroupInfoAndServerName(result);
if (entityGroup == null)
continue;
EntityGroupInfo entityGroupInfo = entityGroup.getFirst();
ServerName entityGroupLocation = entityGroup.getSecond();
if (entityGroupInfo == null)
continue;
entityGroupStates.createEntityGroupState(entityGroupInfo);
String tableName = entityGroupInfo.getTableNameAsString();
if (entityGroupLocation == null) {
// entityGroupLocation could be null if createTable didn't finish
// properly.
// When createTable is in progress, HMaster restarts.
// Some entityGroups have been added to .META., but have not been
// assigned.
// When this happens, the entityGroup's table must be in ENABLING
// state.
// It can't be in ENABLED state as that is set when all entityGroups
// are
// assigned.
// It can't be in DISABLING state, because DISABLING state transitions
// from ENABLED state when application calls disableTable.
// It can't be in DISABLED state, because DISABLED states transitions
// from DISABLING state.
if (!enablingTables.contains(tableName)) {
LOG.warn("EntityGroup " + entityGroupInfo.getEncodedName()
+ " has null entityGroupLocation." + " But its table "
+ tableName + " isn't in ENABLING state.");
}
} else if (!onlineServers.contains(entityGroupLocation)) {
// EntityGroup is located on a server that isn't online
List<EntityGroupInfo> offlineEntityGroups = offlineServers
.get(entityGroupLocation);
if (offlineEntityGroups == null) {
offlineEntityGroups = new ArrayList<EntityGroupInfo>(1);
offlineServers.put(entityGroupLocation, offlineEntityGroups);
}
offlineEntityGroups.add(entityGroupInfo);
// need to enable the table if not disabled or disabling or enabling
// this will be used in rolling restarts
if (!disabledOrDisablingOrEnabling.contains(tableName)
&& !getZKTable().isEnabledTable(tableName)) {
setEnabledTable(tableName);
}
} else {
// If entityGroup is in offline and split state check the ZKNode
if (entityGroupInfo.isOffline() && entityGroupInfo.isSplit()) {
String node = ZKAssign.getNodeName(this.watcher,
entityGroupInfo.getEncodedName());
Stat stat = new Stat();
byte[] data = ZKUtil.getDataNoWatch(this.watcher, node, stat);
// If znode does not exist, don't consider this entityGroup
if (data == null) {
LOG.debug("EntityGroup "
+ entityGroupInfo.getEntityGroupNameAsString()
+ " split is completed. Hence need not add to entityGroups list");
continue;
}
}
// EntityGroup is being served and on an active server
// add only if entityGroup not in disabled or enabling table
if (!disabledOrEnablingTables.contains(tableName)) {
entityGroupStates.entityGroupOnline(entityGroupInfo,
entityGroupLocation);
}
// need to enable the table if not disabled or disabling or enabling
// this will be used in rolling restarts
if (!disabledOrDisablingOrEnabling.contains(tableName)
&& !getZKTable().isEnabledTable(tableName)) {
setEnabledTable(tableName);
}
}
}
}
return offlineServers;
}
/**
* Recover the tables that were not fully moved to DISABLED state. These
* tables are in DISABLING state when the master restarted/switched.
*
* @throws org.apache.zookeeper.KeeperException
* @throws com.alibaba.wasp.TableNotFoundException
* @throws java.io.IOException
*/
private void recoverTableInDisablingState() throws KeeperException,
TableNotFoundException, IOException {
Set<String> disablingTables = ZKTable.getDisablingTables(watcher);
if (disablingTables.size() != 0) {
for (String tableName : disablingTables) {
// Recover by calling DisableTableHandler
LOG.info("The table " + tableName
+ " is in DISABLING state. Hence recovering by moving the table"
+ " to DISABLED state.");
new DisableTableHandler(this.server, this, tableName.getBytes(),
(FMasterServices) this.server, true).process();
}
}
}
/**
* Recover the tables that are not fully moved to ENABLED state. These tables
* are in ENABLING state when the master restarted/switched
*
* @throws org.apache.zookeeper.KeeperException
* @throws com.alibaba.wasp.TableNotFoundException
* @throws java.io.IOException
*/
private void recoverTableInEnablingState() throws KeeperException,
TableNotFoundException, IOException {
Set<String> enablingTables = ZKTable.getEnablingTables(watcher);
if (enablingTables.size() != 0) {
for (String tableName : enablingTables) {
// Recover by calling EnableTableHandler
LOG.info("The table " + tableName
+ " is in ENABLING state. Hence recovering by moving the table"
+ " to ENABLED state.");
// enableTable in sync way during master startup,
new EnableTableHandler(this.server, (FMasterServices) this.server,this, tableName.getBytes(), true)
.process();
}
}
}
/**
* Processes list of dead servers from result of FMETA scan and entityGroups
* in EGIT
* <p>
* This is used for failover to recover the lost entityGroups that belonged to
* EntityGroupServers which failed while there was no active master or
* entityGroups that were in EGIT.
* <p>
*
* @param deadServers
* The list of dead servers which failed while there was no active
* master. Can be null.
* @param nodes
* The entityGroups in EGIT
* @throws java.io.IOException
* @throws org.apache.zookeeper.KeeperException
*/
private void processDeadServersAndRecoverLostEntityGroups(
Map<ServerName, List<EntityGroupInfo>> deadServers, List<String> nodes)
throws IOException, KeeperException {
if (deadServers != null) {
for (Map.Entry<ServerName, List<EntityGroupInfo>> server : deadServers
.entrySet()) {
ServerName serverName = server.getKey();
if (!serverManager.isServerDead(serverName)) {
serverManager.expireServer(serverName); // Let SSH do entityGroup
// re-assign
}
}
}
nodes = ZKUtil.listChildrenAndWatchForNewChildren(this.watcher,
this.watcher.assignmentZNode);
if (!nodes.isEmpty()) {
for (String encodedEntityGroupName : nodes) {
processEntityGroupInTransition(encodedEntityGroupName, null);
}
}
// Now we can safely claim failover cleanup completed and enable
// ServerShutdownHandler for further processing. The nodes (below)
// in transition, if any, are for entityGroups not related to those
// dead servers at all, and can be done in parallel to SSH.
failoverCleanupDone();
}
/**
* Set EntityGroups in transitions metrics. This takes an iterator on the
* EntityGroupInTransition map (CLSM), and is not synchronized. This iterator
* is not fail fast, which may lead to stale read; but that's better than
* creating a copy of the map for metrics computation, as this method will be
* invoked on a frequent interval.
*/
public void updateEntityGroupsInTransitionMetrics() {
long currentTime = System.currentTimeMillis();
int totalEGITs = 0;
int totalEGITsOverThreshold = 0;
long oldestEGITTime = 0;
int egitThreshold = this.server.getConfiguration().getInt(
FConstants.METRICS_EGIT_STUCK_WARNING_THRESHOLD, 60000);
for (EntityGroupState state : entityGroupStates
.getEntityGroupsInTransition().values()) {
totalEGITs++;
long egitTime = currentTime - state.getStamp();
if (egitTime > egitThreshold) { // more than the threshold
totalEGITsOverThreshold++;
}
if (oldestEGITTime < egitTime) {
oldestEGITTime = egitTime;
}
}
if (this.metricsMaster != null) {
this.metricsMaster.updateEGITOldestAge(oldestEGITTime);
this.metricsMaster.updateEGITCount(totalEGITs);
this.metricsMaster.updateEGITCountOverThreshold(totalEGITsOverThreshold);
}
}
/**
* @param entityGroup
* EntityGroup whose plan we are to clear.
*/
void clearEntityGroupPlan(final EntityGroupInfo entityGroup) {
synchronized (this.entityGroupPlans) {
this.entityGroupPlans.remove(entityGroup.getEncodedName());
}
}
/**
* Wait on entityGroup to clear entityGroups-in-transition.
*
* @param egInfo
* EntityGroup to wait on.
* @throws java.io.IOException
*/
public void waitOnEntityGroupToClearEntityGroupsInTransition(
final EntityGroupInfo egInfo) throws IOException, InterruptedException {
if (!entityGroupStates.isEntityGroupInTransition(egInfo))
return;
EntityGroupState egState = null;
// There is already a timeout monitor on entityGroups in transition so I
// should not have to have one here too?
while (!this.server.isStopped()
&& entityGroupStates.isEntityGroupInTransition(egInfo)) {
LOG.info("Waiting on " + egState + " to clear entityGroups-in-transition");
entityGroupStates.waitForUpdate(100);
}
if (this.server.isStopped()) {
LOG.info("Giving up wait on entityGroups in "
+ "transition because stoppable.isStopped is set");
}
}
/**
* Update timers for all entityGroups in transition going against the server
* in the serversInUpdatingTimer.
*/
public class TimerUpdater extends Chore {
public TimerUpdater(final int period, final Stoppable stopper) {
super("AssignmentTimerUpdater", period, stopper);
}
@Override
protected void chore() {
ServerName serverToUpdateTimer = null;
while (!serversInUpdatingTimer.isEmpty() && !stopper.isStopped()) {
if (serverToUpdateTimer == null) {
serverToUpdateTimer = serversInUpdatingTimer.first();
} else {
serverToUpdateTimer = serversInUpdatingTimer
.higher(serverToUpdateTimer);
}
if (serverToUpdateTimer == null) {
break;
}
updateTimers(serverToUpdateTimer);
serversInUpdatingTimer.remove(serverToUpdateTimer);
}
}
}
/**
* Monitor to check for time outs on entityGroup transition operations
*/
public class TimeoutMonitor extends Chore {
private boolean allEntityGroupServersOffline = false;
private FServerManager serverManager;
private final int timeout;
/**
* Creates a periodic monitor to check for time outs on entityGroup
* transition operations. This will deal with retries if for some reason
* something doesn't happen within the specified timeout.
*
* @param period
* @param stopper
* When {@link org.apache.hadoop.hbase.Stoppable#isStopped()} is true, this thread will
* cleanup and exit cleanly.
* @param timeout
*/
public TimeoutMonitor(final int period, final Stoppable stopper,
FServerManager serverManager, final int timeout) {
super("AssignmentTimeoutMonitor", period, stopper);
this.timeout = timeout;
this.serverManager = serverManager;
}
private synchronized void setAllEntityGroupServersOffline(
boolean allEntityGroupServersOffline) {
this.allEntityGroupServersOffline = allEntityGroupServersOffline;
}
@Override
protected void chore() {
boolean noFSERVERAvailable = this.serverManager
.createDestinationServersList().isEmpty();
// Iterate all entityGroups in transition checking for time outs
long now = System.currentTimeMillis();
// no lock concurrent access ok: we will be working on a copy, and it's
// java-valid to do
// a copy while another thread is adding/removing items
for (String entityGroupName : entityGroupStates
.getEntityGroupsInTransition().keySet()) {
EntityGroupState entityGroupState = entityGroupStates
.getEntityGroupTransitionState(entityGroupName);
if (entityGroupState == null)
continue;
if (entityGroupState.getStamp() + timeout <= now) {
// decide on action upon timeout
actOnTimeOut(entityGroupState);
} else if (this.allEntityGroupServersOffline && !noFSERVERAvailable) {
EntityGroupPlan existingPlan = entityGroupPlans.get(entityGroupName);
if (existingPlan == null
|| !this.serverManager.isServerOnline(existingPlan
.getDestination())) {
// if some FSERVERs just came back online, we can start the
// assignment
// right away
actOnTimeOut(entityGroupState);
}
}
}
setAllEntityGroupServersOffline(noFSERVERAvailable);
}
private void actOnTimeOut(EntityGroupState entityGroupState) {
EntityGroupInfo entityGroupInfo = entityGroupState.getEntityGroup();
LOG.info("EntityGroups in transition timed out: " + entityGroupState);
// Expired! Do a retry.
switch (entityGroupState.getState()) {
case CLOSED:
LOG.info("EntityGroup " + entityGroupInfo.getEncodedName()
+ " has been CLOSED for too long, waiting on queued "
+ "ClosedEntityGroupHandler to run or server shutdown");
// Update our timestamp.
entityGroupState.updateTimestampToNow();
break;
case OFFLINE:
LOG.info("EntityGroup has been OFFLINE for too long, " + "reassigning "
+ entityGroupInfo.getEntityGroupNameAsString()
+ " to a random server");
invokeAssign(entityGroupInfo);
break;
case PENDING_OPEN:
LOG.info("EntityGroup has been PENDING_OPEN for too "
+ "long, reassigning entityGroup="
+ entityGroupInfo.getEntityGroupNameAsString());
invokeAssign(entityGroupInfo);
break;
case OPENING:
processOpeningState(entityGroupInfo);
break;
case OPEN:
LOG.error("EntityGroup has been OPEN for too long, "
+ "we don't know where entityGroup was opened so can't do anything");
entityGroupState.updateTimestampToNow();
break;
case PENDING_CLOSE:
LOG.info("EntityGroup has been PENDING_CLOSE for too "
+ "long, running forced unassign again on entityGroup="
+ entityGroupInfo.getEntityGroupNameAsString());
invokeUnassign(entityGroupInfo);
break;
case CLOSING:
LOG.info("EntityGroup has been CLOSING for too "
+ "long, this should eventually complete or the server will "
+ "expire, send RPC again");
invokeUnassign(entityGroupInfo);
break;
case SPLIT:
case SPLITTING:
break;
default:
throw new IllegalStateException("Received event is not valid.");
}
}
}
private void processOpeningState(EntityGroupInfo entityGroupInfo) {
LOG.info("EntityGroup has been OPENING for too long, reassigning entityGroup="
+ entityGroupInfo.getEntityGroupNameAsString());
// Should have a ZK node in OPENING state
try {
String node = ZKAssign.getNodeName(watcher,
entityGroupInfo.getEncodedName());
Stat stat = new Stat();
byte[] data = ZKAssign.getDataNoWatch(watcher, node, stat);
if (data == null) {
LOG.warn("Data is null, node " + node + " no longer exists");
return;
}
EntityGroupTransaction rt = EntityGroupTransaction.parseFrom(data);
EventType et = rt.getEventType();
if (et == EventType.FSERVER_ZK_ENTITYGROUP_OPENED) {
LOG.debug("EntityGroup has transitioned to OPENED, allowing "
+ "watched event handlers to process");
return;
} else if (et != EventType.FSERVER_ZK_ENTITYGROUP_OPENING
&& et != EventType.FSERVER_ZK_ENTITYGROUP_FAILED_OPEN) {
LOG.warn("While timing out a entityGroup, found ZK node in unexpected state: "
+ et);
return;
}
invokeAssign(entityGroupInfo);
} catch (KeeperException ke) {
LOG.error("Unexpected ZK exception timing out CLOSING entityGroup", ke);
return;
} catch (DeserializationException e) {
LOG.error("Unexpected exception parsing CLOSING entityGroup", e);
return;
}
return;
}
void invokeAssign(EntityGroupInfo entityGroupInfo) {
threadPoolExecutorService.submit(new AssignCallable(this, entityGroupInfo));
}
private void invokeUnassign(EntityGroupInfo entityGroupInfo) {
threadPoolExecutorService
.submit(new UnAssignCallable(this, entityGroupInfo));
}
/**
* Check if the shutdown server carries the specific entityGroup. We have a
* bunch of places that store entityGroup location Those values aren't
* consistent. There is a delay of notification. The location from zookeeper
* unassigned node has the most recent data; but the node could be deleted
* after the entityGroup is opened by AM. The AM's info could be old when
* OpenedEntityGroupHandler processing hasn't finished yet when server
* shutdown occurs.
*
* @return whether the serverName currently hosts the entityGroup
*/
private boolean isCarryingEntityGroup(ServerName serverName,
EntityGroupInfo egInfo) {
EntityGroupTransaction rt = null;
try {
byte[] data = ZKAssign.getData(watcher, egInfo.getEncodedName());
// This call can legitimately come by null
rt = data == null ? null : EntityGroupTransaction.parseFrom(data);
} catch (KeeperException e) {
server.abort("Exception reading unassigned node for entityGroup="
+ egInfo.getEncodedName(), e);
} catch (DeserializationException e) {
server.abort("Exception parsing unassigned node for entityGroup="
+ egInfo.getEncodedName(), e);
}
ServerName addressFromZK = rt != null ? rt.getServerName() : null;
if (addressFromZK != null) {
// if we get something from ZK, we will use the data
boolean matchZK = (addressFromZK != null && addressFromZK
.equals(serverName));
LOG.debug("based on ZK, current entityGroup="
+ egInfo.getEntityGroupNameAsString() + " is on server="
+ addressFromZK + " server being checked=: " + serverName);
return matchZK;
}
ServerName addressFromAM = entityGroupStates
.getFServerOfEntityGroup(egInfo);
boolean matchAM = (addressFromAM != null && addressFromAM
.equals(serverName));
LOG.debug("based on AM, current entityGroup="
+ egInfo.getEntityGroupNameAsString() + " is on server="
+ (addressFromAM != null ? addressFromAM : "null")
+ " server being checked: " + serverName);
return matchAM;
}
/**
* Process shutdown server removing any assignments.
*
* @param sn
* Server that went down.
* @return list of entityGroups in transition on this server
*/
public List<EntityGroupState> processServerShutdown(final ServerName sn) {
// Clean out any existing assignment plans for this server
synchronized (this.entityGroupPlans) {
for (Iterator<Map.Entry<String, EntityGroupPlan>> i = this.entityGroupPlans
.entrySet().iterator(); i.hasNext();) {
Map.Entry<String, EntityGroupPlan> e = i.next();
ServerName otherSn = e.getValue().getDestination();
// The name will be null if the entityGroup is planned for a random
// assign.
if (otherSn != null && otherSn.equals(sn)) {
// Use iterator's remove else we'll get CME
i.remove();
}
}
}
return entityGroupStates.serverOffline(sn);
}
/**
* Update inmemory structures.
*
* @param sn
* Server that reported the split
* @param parent
* Parent entityGroup that was split
* @param a
* Daughter entityGroup A
* @param b
* Daughter entityGroup B
*/
public void handleSplitReport(final ServerName sn,
final EntityGroupInfo parent, final EntityGroupInfo a,
final EntityGroupInfo b) {
entityGroupOffline(parent);
entityGroupOnline(a, sn);
entityGroupOnline(b, sn);
// There's a possibility that the entityGroup was splitting while a user
// asked
// the master to disable, we need to make sure we close those entityGroups
// in
// that case. This is not racing with the entityGroup server itself since
// FSERVER
// report is done after the split transaction completed.
if (this.zkTable.isDisablingOrDisabledTable(parent.getTableNameAsString())) {
unassign(a);
unassign(b);
}
}
/**
* @param plan
* Plan to execute.
*/
void balance(final EntityGroupPlan plan) {
synchronized (this.entityGroupPlans) {
this.entityGroupPlans.put(plan.getEntityGroupName(), plan);
}
unassign(plan.getEntityGroupInfo(), false, plan.getDestination());
}
public void stop() {
this.timeoutMonitor.interrupt();
this.timerUpdater.interrupt();
}
/**
* Shutdown the threadpool executor service
*/
public void shutdown() {
threadPoolExecutorService.shutdownNow();
for (int i = 0, n = zkEventWorkers.length; i < n; i++) {
zkEventWorkers[i].shutdownNow();
}
}
protected void setEnabledTable(String tableName) {
try {
this.zkTable.setEnabledTable(tableName);
} catch (KeeperException e) {
// here we can abort as it is the start up flow
String errorMsg = "Unable to ensure that the table " + tableName
+ " will be" + " enabled because of a ZooKeeper issue";
LOG.error(errorMsg);
this.server.abort(errorMsg, e);
}
}
/**
* Set entityGroup as OFFLINED up in zookeeper asynchronously.
*
* @param state
* @return True if we succeeded, false otherwise (State was incorrect or
* failed updating zk).
*/
private boolean asyncSetOfflineInZooKeeper(final EntityGroupState state,
final AsyncCallback.StringCallback cb, final ServerName destination) {
if (!state.isClosed() && !state.isOffline()) {
this.server.abort("Unexpected state trying to OFFLINE; " + state,
new IllegalStateException());
return false;
}
entityGroupStates.updateEntityGroupState(state.getEntityGroup(),
EntityGroupState.State.OFFLINE);
try {
ZKAssign.asyncCreateNodeOffline(watcher, state.getEntityGroup(),
destination, cb, state);
} catch (KeeperException e) {
if (e instanceof NodeExistsException) {
LOG.warn("Node for " + state.getEntityGroup() + " already exists");
} else {
server
.abort("Unexpected ZK exception creating/setting node OFFLINE", e);
}
return false;
}
return true;
}
}