// executor pool is always available.
//
// If AssignmentManager hasn't finished rebuilding user regions,
// we are not ready to assign dead regions either. So we re-queue up
// the dead server for further processing too.
AssignmentManager am = services.getAssignmentManager();
if (isCarryingMeta() // hbase:meta
|| !am.isFailoverCleanupDone()) {
this.services.getServerManager().processDeadServer(serverName, this.shouldSplitHlog);
return;
}
// Wait on meta to come online; we need it to progress.
// TODO: Best way to hold strictly here? We should build this retry logic
// into the MetaReader operations themselves.
// TODO: Is the reading of hbase:meta necessary when the Master has state of
// cluster in its head? It should be possible to do without reading hbase:meta
// in all but one case. On split, the RS updates the hbase:meta
// table and THEN informs the master of the split via zk nodes in
// 'unassigned' dir. Currently the RS puts ephemeral nodes into zk so if
// the regionserver dies, these nodes do not stick around and this server
// shutdown processing does fixup (see the fixupDaughters method below).
// If we wanted to skip the hbase:meta scan, we'd have to change at least the
// final SPLIT message to be permanent in zk so in here we'd know a SPLIT
// completed (zk is updated after edits to hbase:meta have gone in). See
// {@link SplitTransaction}. We'd also have to be figure another way for
// doing the below hbase:meta daughters fixup.
NavigableMap<HRegionInfo, Result> hris = null;
while (!this.server.isStopped()) {
try {
this.server.getCatalogTracker().waitForMeta();
hris = MetaReader.getServerUserRegions(this.server.getCatalogTracker(),
this.serverName);
break;
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new IOException("Interrupted", e);
} catch (IOException ioe) {
LOG.info("Received exception accessing hbase:meta during server shutdown of " +
serverName + ", retrying hbase:meta read", ioe);
}
}
if (this.server.isStopped()) {
throw new IOException("Server is stopped");
}
try {
if (this.shouldSplitHlog) {
LOG.info("Splitting logs for " + serverName + " before assignment.");
if (this.distributedLogReplay) {
LOG.info("Mark regions in recovery before assignment.");
Set<ServerName> serverNames = new HashSet<ServerName>();
serverNames.add(serverName);
this.services.getMasterFileSystem().prepareLogReplay(serverNames);
} else {
this.services.getMasterFileSystem().splitLog(serverName);
}
am.getRegionStates().logSplit(serverName);
} else {
LOG.info("Skipping log splitting for " + serverName);
}
} catch (IOException ioe) {
resubmit(serverName, ioe);
}
// Clean out anything in regions in transition. Being conservative and
// doing after log splitting. Could do some states before -- OPENING?
// OFFLINE? -- and then others after like CLOSING that depend on log
// splitting.
List<HRegionInfo> regionsInTransition = am.processServerShutdown(serverName);
LOG.info("Reassigning " + ((hris == null)? 0: hris.size()) +
" region(s) that " + (serverName == null? "null": serverName) +
" was carrying (and " + regionsInTransition.size() +
" regions(s) that were opening on this server)");
List<HRegionInfo> toAssignRegions = new ArrayList<HRegionInfo>();
toAssignRegions.addAll(regionsInTransition);
// Iterate regions that were on this server and assign them
if (hris != null) {
RegionStates regionStates = am.getRegionStates();
for (Map.Entry<HRegionInfo, Result> e: hris.entrySet()) {
HRegionInfo hri = e.getKey();
if (regionsInTransition.contains(hri)) {
continue;
}
String encodedName = hri.getEncodedName();
Lock lock = am.acquireRegionLock(encodedName);
try {
RegionState rit = regionStates.getRegionTransitionState(hri);
if (processDeadRegion(hri, e.getValue(), am, server.getCatalogTracker())) {
ServerName addressFromAM = regionStates.getRegionServerOfRegion(hri);
if (addressFromAM != null && !addressFromAM.equals(this.serverName)) {
// If this region is in transition on the dead server, it must be
// opening or pending_open, which should have been covered by AM#processServerShutdown
LOG.info("Skip assigning region " + hri.getRegionNameAsString()
+ " because it has been opened in " + addressFromAM.getServerName());
continue;
}
if (rit != null) {
if (rit.getServerName() != null && !rit.isOnServer(serverName)) {
// Skip regions that are in transition on other server
LOG.info("Skip assigning region in transition on other server" + rit);
continue;
}
try{
//clean zk node
LOG.info("Reassigning region with rs = " + rit + " and deleting zk node if exists");
ZKAssign.deleteNodeFailSilent(services.getZooKeeper(), hri);
regionStates.updateRegionState(hri, State.OFFLINE);
} catch (KeeperException ke) {
this.server.abort("Unexpected ZK exception deleting unassigned node " + hri, ke);
return;
}
} else if (regionStates.isRegionInState(
hri, State.SPLITTING_NEW, State.MERGING_NEW)) {
regionStates.regionOffline(hri);
}
toAssignRegions.add(hri);
} else if (rit != null) {
if (rit.isPendingCloseOrClosing()
&& am.getZKTable().isDisablingOrDisabledTable(hri.getTable())) {
// If the table was partially disabled and the RS went down, we should clear the RIT
// and remove the node for the region.
// The rit that we use may be stale in case the table was in DISABLING state
// but though we did assign we will not be clearing the znode in CLOSING state.
// Doing this will have no harm. See HBASE-5927
regionStates.updateRegionState(hri, State.OFFLINE);
am.deleteClosingOrClosedNode(hri, rit.getServerName());
am.offlineDisabledRegion(hri);
} else {
LOG.warn("THIS SHOULD NOT HAPPEN: unexpected region in transition "
+ rit + " not to be assigned by SSH of server " + serverName);
}
}
} finally {
lock.unlock();
}
}
}
try {
am.assign(toAssignRegions);
} catch (InterruptedException ie) {
LOG.error("Caught " + ie + " during round-robin assignment");
throw new IOException(ie);
}
if (this.shouldSplitHlog && this.distributedLogReplay) {
// wait for region assignment completes
for (HRegionInfo hri : toAssignRegions) {
try {
if (!am.waitOnRegionToClearRegionsInTransition(hri, regionAssignmentWaitTimeout)) {
// Wait here is to avoid log replay hits current dead server and incur a RPC timeout
// when replay happens before region assignment completes.
LOG.warn("Region " + hri.getEncodedName()
+ " didn't complete assignment in time");
}