}
sleeper.sleep();
LOG.warn("No response from master on reportForDuty. Sleeping and " +
"then trying again.");
}
HMsg outboundArray[] = null;
long lastMsg = 0;
// Now ask master what it wants us to do and tell it what we have done
for (int tries = 0; !stopRequested.get() && isHealthy();) {
// Try to get the root region location from the master.
if (!haveRootRegion.get()) {
HServerAddress rootServer = zooKeeperWrapper.readRootRegionLocation();
if (rootServer != null) {
// By setting the root region location, we bypass the wait imposed on
// HTable for all regions being assigned.
this.connection.setRootRegionLocation(
new HRegionLocation(HRegionInfo.ROOT_REGIONINFO, rootServer));
haveRootRegion.set(true);
}
}
long now = System.currentTimeMillis();
if (lastMsg != 0 && (now - lastMsg) >= serverLeaseTimeout) {
// It has been way too long since we last reported to the master.
LOG.warn("unable to report to master for " + (now - lastMsg) +
" milliseconds - retrying");
}
// Send messages to the master IF this.msgInterval has elapsed OR if
// we have something to tell (and we didn't just fail sending master).
if ((now - lastMsg) >= msgInterval ||
(outboundArray.length == 0 && !this.outboundMsgs.isEmpty())) {
try {
doMetrics();
MemoryUsage memory =
ManagementFactory.getMemoryMXBean().getHeapMemoryUsage();
HServerLoad hsl = new HServerLoad(requestCount.get(),
(int)(memory.getUsed()/1024/1024),
(int)(memory.getMax()/1024/1024));
for (HRegion r: onlineRegions.values()) {
hsl.addRegionInfo(createRegionLoad(r));
}
this.serverInfo.setLoad(hsl);
this.requestCount.set(0);
outboundArray = getOutboundMsgs(outboundArray);
HMsg msgs[] = hbaseMaster.regionServerReport(
serverInfo, outboundArray, getMostLoadedRegions());
lastMsg = System.currentTimeMillis();
outboundArray = updateOutboundMsgs(outboundArray);
if (this.quiesced.get() && onlineRegions.size() == 0) {
// We've just told the master we're exiting because we aren't
// serving any regions. So set the stop bit and exit.
LOG.info("Server quiesced and not serving any regions. " +
"Starting shutdown");
stopRequested.set(true);
this.outboundMsgs.clear();
continue;
}
// Queue up the HMaster's instruction stream for processing
boolean restart = false;
for(int i = 0;
!restart && !stopRequested.get() && i < msgs.length;
i++) {
LOG.info(msgs[i].toString());
if (safeMode.get()) {
if (zooKeeperWrapper.checkOutOfSafeMode()) {
this.connection.unsetRootRegionLocation();
synchronized (safeMode) {
safeMode.set(false);
safeMode.notifyAll();
}
}
}
switch(msgs[i].getType()) {
case MSG_CALL_SERVER_STARTUP:
// We the MSG_CALL_SERVER_STARTUP on startup but we can also
// get it when the master is panicking because for instance
// the HDFS has been yanked out from under it. Be wary of
// this message.
if (checkFileSystem()) {
closeAllRegions();
try {
hlog.closeAndDelete();
} catch (Exception e) {
LOG.error("error closing and deleting HLog", e);
}
try {
serverInfo.setStartCode(System.currentTimeMillis());
hlog = setupHLog();
this.hlogFlusher.setHLog(hlog);
} catch (IOException e) {
this.abortRequested = true;
this.stopRequested.set(true);
e = RemoteExceptionHandler.checkIOException(e);
LOG.fatal("error restarting server", e);
break;
}
reportForDuty();
restart = true;
} else {
LOG.fatal("file system available check failed. " +
"Shutting down server.");
}
break;
case MSG_REGIONSERVER_STOP:
stopRequested.set(true);
break;
case MSG_REGIONSERVER_QUIESCE:
if (!quiesceRequested) {
try {
toDo.put(new ToDoEntry(msgs[i]));
} catch (InterruptedException e) {
throw new RuntimeException("Putting into msgQueue was " +
"interrupted.", e);
}
quiesceRequested = true;
}
break;
default:
if (fsOk) {
try {
toDo.put(new ToDoEntry(msgs[i]));
} catch (InterruptedException e) {
throw new RuntimeException("Putting into msgQueue was " +
"interrupted.", e);
}
}
}
}
// Reset tries count if we had a successful transaction.
tries = 0;
if (restart || this.stopRequested.get()) {
toDo.clear();
continue;
}
} catch (Exception e) {
if (e instanceof IOException) {
e = RemoteExceptionHandler.checkIOException((IOException) e);
}
tries++;
if (tries > 0 && (tries % this.numRetries) == 0) {
// Check filesystem every so often.
checkFileSystem();
}
if (this.stopRequested.get()) {
LOG.info("Stop requested, clearing toDo despite exception");
toDo.clear();
continue;
}
LOG.warn("Attempt=" + tries, e);
// No point retrying immediately; this is probably connection to
// master issue. Doing below will cause us to sleep.
lastMsg = System.currentTimeMillis();
}
}
// Do some housekeeping before going to sleep
housekeeping();
sleeper.sleep(lastMsg);
} // for
} catch (Throwable t) {
if (!checkOOME(t)) {
LOG.fatal("Unhandled exception. Aborting...", t);
abort();
}
}
this.leases.closeAfterLeasesExpire();
this.worker.stop();
this.server.stop();
if (this.infoServer != null) {
LOG.info("Stopping infoServer");
try {
this.infoServer.stop();
} catch (Exception e) {
e.printStackTrace();
}
}
// Send cache a shutdown.
LruBlockCache c = (LruBlockCache)StoreFile.getBlockCache(this.conf);
if (c != null) c.shutdown();
// Send interrupts to wake up threads if sleeping so they notice shutdown.
// TODO: Should we check they are alive? If OOME could have exited already
cacheFlusher.interruptIfNecessary();
hlogFlusher.interrupt();
compactSplitThread.interruptIfNecessary();
hlogRoller.interruptIfNecessary();
this.majorCompactionChecker.interrupt();
if (abortRequested) {
if (this.fsOk) {
// Only try to clean up if the file system is available
try {
if (this.hlog != null) {
this.hlog.close();
LOG.info("On abort, closed hlog");
}
} catch (Throwable e) {
LOG.error("Unable to close log in abort",
RemoteExceptionHandler.checkThrowable(e));
}
closeAllRegions(); // Don't leave any open file handles
}
LOG.info("aborting server at: " +
serverInfo.getServerAddress().toString());
} else {
ArrayList<HRegion> closedRegions = closeAllRegions();
try {
if (this.hlog != null) {
hlog.closeAndDelete();
}
} catch (Throwable e) {
LOG.error("Close and delete failed",
RemoteExceptionHandler.checkThrowable(e));
}
try {
HMsg[] exitMsg = new HMsg[closedRegions.size() + 1];
exitMsg[0] = REPORT_EXITING;
// Tell the master what regions we are/were serving
int i = 1;
for (HRegion region: closedRegions) {
exitMsg[i++] = new HMsg(HMsg.Type.MSG_REPORT_CLOSE,
region.getRegionInfo());
}
LOG.info("telling master that region server is shutting down at: " +
serverInfo.getServerAddress().toString());