//
// If AssignmentManager hasn't finished rebuilding user regions,
// we are not ready to assign dead regions either. So we re-queue up
// the dead server for further processing too.
AssignmentManager am = services.getAssignmentManager();
ServerManager serverManager = services.getServerManager();
if (isCarryingMeta() /* hbase:meta */ || !am.isFailoverCleanupDone()) {
serverManager.processDeadServer(serverName, this.shouldSplitHlog);
return;
}
// Wait on meta to come online; we need it to progress.
// TODO: Best way to hold strictly here? We should build this retry logic
// into the MetaTableAccessor operations themselves.
// TODO: Is the reading of hbase:meta necessary when the Master has state of
// cluster in its head? It should be possible to do without reading hbase:meta
// in all but one case. On split, the RS updates the hbase:meta
// table and THEN informs the master of the split via zk nodes in
// 'unassigned' dir. Currently the RS puts ephemeral nodes into zk so if
// the regionserver dies, these nodes do not stick around and this server
// shutdown processing does fixup (see the fixupDaughters method below).
// If we wanted to skip the hbase:meta scan, we'd have to change at least the
// final SPLIT message to be permanent in zk so in here we'd know a SPLIT
// completed (zk is updated after edits to hbase:meta have gone in). See
// {@link SplitTransaction}. We'd also have to be figure another way for
// doing the below hbase:meta daughters fixup.
Set<HRegionInfo> hris = null;
try {
server.getMetaTableLocator().waitMetaRegionLocation(server.getZooKeeper());
if (BaseLoadBalancer.tablesOnMaster(server.getConfiguration())) {
while (!this.server.isStopped() && serverManager.countOfRegionServers() < 2) {
// Wait till at least another regionserver is up besides the active master
// so that we don't assign all regions to the active master.
// This is best of efforts, because newly joined regionserver
// could crash right after that.
Thread.sleep(100);
}
}
hris = am.getRegionStates().getServerRegions(serverName);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw (InterruptedIOException)new InterruptedIOException().initCause(e);
}
if (this.server.isStopped()) {
throw new IOException("Server is stopped");
}
// delayed to set recovery mode based on configuration only after all outstanding splitlogtask
// drained
this.services.getMasterFileSystem().setLogRecoveryMode();
boolean distributedLogReplay =
(this.services.getMasterFileSystem().getLogRecoveryMode() == RecoveryMode.LOG_REPLAY);
try {
if (this.shouldSplitHlog) {
if (distributedLogReplay) {
LOG.info("Mark regions in recovery for crashed server " + serverName +
" before assignment; regions=" + hris);
MasterFileSystem mfs = this.services.getMasterFileSystem();
mfs.prepareLogReplay(serverName, hris);
} else {
LOG.info("Splitting logs for " + serverName +
" before assignment; region count=" + (hris == null ? 0 : hris.size()));
this.services.getMasterFileSystem().splitLog(serverName);
}
am.getRegionStates().logSplit(serverName);
} else {
LOG.info("Skipping log splitting for " + serverName);
}
} catch (IOException ioe) {
resubmit(serverName, ioe);
}
// Clean out anything in regions in transition. Being conservative and
// doing after log splitting. Could do some states before -- OPENING?
// OFFLINE? -- and then others after like CLOSING that depend on log
// splitting.
List<HRegionInfo> regionsInTransition = am.processServerShutdown(serverName);
LOG.info("Reassigning " + ((hris == null)? 0: hris.size()) +
" region(s) that " + (serverName == null? "null": serverName) +
" was carrying (and " + regionsInTransition.size() +
" regions(s) that were opening on this server)");
List<HRegionInfo> toAssignRegions = new ArrayList<HRegionInfo>();
toAssignRegions.addAll(regionsInTransition);
// Iterate regions that were on this server and assign them
if (hris != null && !hris.isEmpty()) {
RegionStates regionStates = am.getRegionStates();
for (HRegionInfo hri: hris) {
if (regionsInTransition.contains(hri)) {
continue;
}
String encodedName = hri.getEncodedName();
Lock lock = am.acquireRegionLock(encodedName);
try {
RegionState rit = regionStates.getRegionTransitionState(hri);
if (processDeadRegion(hri, am)) {
ServerName addressFromAM = regionStates.getRegionServerOfRegion(hri);
if (addressFromAM != null && !addressFromAM.equals(this.serverName)) {
// If this region is in transition on the dead server, it must be
// opening or pending_open, which should have been covered by AM#processServerShutdown
LOG.info("Skip assigning region " + hri.getRegionNameAsString()
+ " because it has been opened in " + addressFromAM.getServerName());
continue;
}
if (rit != null) {
if (rit.getServerName() != null && !rit.isOnServer(serverName)) {
// Skip regions that are in transition on other server
LOG.info("Skip assigning region in transition on other server" + rit);
continue;
}
LOG.info("Reassigning region with rs = " + rit);
regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
} else if (regionStates.isRegionInState(
hri, RegionState.State.SPLITTING_NEW, RegionState.State.MERGING_NEW)) {
regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
}
toAssignRegions.add(hri);
} else if (rit != null) {
if ((rit.isClosing() || rit.isFailedClose() || rit.isOffline())
&& am.getTableStateManager().isTableState(hri.getTable(),
TableState.State.DISABLED, TableState.State.DISABLING) ||
am.getReplicasToClose().contains(hri)) {
// If the table was partially disabled and the RS went down, we should clear the RIT
// and remove the node for the region.
// The rit that we use may be stale in case the table was in DISABLING state
// but though we did assign we will not be clearing the znode in CLOSING state.
// Doing this will have no harm. See HBASE-5927
regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
am.offlineDisabledRegion(hri);
} else {
LOG.warn("THIS SHOULD NOT HAPPEN: unexpected region in transition "
+ rit + " not to be assigned by SSH of server " + serverName);
}
}
} finally {
lock.unlock();
}
}
}
try {
am.assign(toAssignRegions);
} catch (InterruptedException ie) {
LOG.error("Caught " + ie + " during round-robin assignment");
throw (InterruptedIOException)new InterruptedIOException().initCause(ie);
} catch (IOException ioe) {
LOG.info("Caught " + ioe + " during region assignment, will retry");
// Only do HLog splitting if shouldSplitHlog and in DLR mode
serverManager.processDeadServer(serverName,
this.shouldSplitHlog && distributedLogReplay);
return;
}
if (this.shouldSplitHlog && distributedLogReplay) {