package org.apache.hadoop.hdfs;
import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.server.datanode.AvatarDataNode;
import org.apache.hadoop.hdfs.server.datanode.AvatarDataNode.ServicePair;
import org.apache.hadoop.hdfs.server.datanode.NamespaceService;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.util.InjectionEvent;
import org.apache.hadoop.hdfs.util.InjectionHandler;
import org.apache.hadoop.hdfs.protocol.FSConstants.DatanodeReportType;
import org.apache.hadoop.hdfs.server.namenode.LeaseManager;
import org.apache.hadoop.hdfs.server.namenode.StandbySafeMode;
import org.junit.After;
import org.junit.AfterClass;
import static org.junit.Assert.*;
import org.junit.BeforeClass;
import org.junit.Test;
public class TestStandbySafeMode {
private static MiniAvatarCluster cluster;
private static Configuration conf;
private static FileSystem fs;
private static int BLOCK_SIZE = 1024;
private static int MAX_BLOCKS = 20;
private static int MAX_FILE_SIZE = MAX_BLOCKS * BLOCK_SIZE;
private static int MAX_WAIT_TIME = 30 * 1000;
private static Log LOG = LogFactory.getLog(TestStandbySafeMode.class);
TestStandbySafeModeHandler h;
private static boolean pass = true;
private static Random random = new Random();
private static final int LEASE_PERIOD = 10000;
@BeforeClass
public static void setUpBeforeClass() throws Exception {
MiniAvatarCluster.createAndStartZooKeeper();
}
public void setUp(boolean shortlease) throws Exception {
setUp(shortlease, true);
}
public void setUp(boolean shortlease, boolean shortFBR) throws Exception {
h = new TestStandbySafeModeHandler();
InjectionHandler.set(h);
conf = new Configuration();
conf.setInt("dfs.block.size", BLOCK_SIZE);
conf.setFloat("dfs.namenode.replqueue.threshold-pct", 0.1f);
conf.setInt("dfs.datanode.blockreceived.retry.internval", 200);
conf.setInt("dfs.heartbeat.interval", 1);
if (shortFBR) {
conf.setInt("dfs.datanode.fullblockreport.delay", 1000);
}
if (shortlease) {
conf.setInt("dfs.softlease.period", LEASE_PERIOD);
conf.setBoolean("fs.ha.retrywrites", true);
conf.setInt("fs.avatar.failover.checkperiod", 200);
}
cluster = new MiniAvatarCluster(conf, 3, true, null, null);
fs = cluster.getFileSystem();
pass = true;
}
@After
public void tearDown() throws Exception {
cluster.shutDown();
InjectionHandler.clear();
}
@AfterClass
public static void tearDownAfterClass() throws Exception {
MiniAvatarCluster.shutDownZooKeeper();
}
private void waitAndVerifyBlocks() throws Exception {
FSNamesystem standbyNS = cluster.getStandbyAvatar(0).avatar.namesystem;
FSNamesystem primaryNS = cluster.getPrimaryAvatar(0).avatar.namesystem;
assertTrue(standbyNS.isInSafeMode());
long primaryBlocks = primaryNS.getBlocksTotal();
long standbyBlocks = standbyNS.getBlocksTotal();
long start = System.currentTimeMillis();
long standbySafeBlocks = 0;
// Wait for standby safe mode to catch up to all blocks.
while (System.currentTimeMillis() - start <= MAX_WAIT_TIME
&& (primaryBlocks != standbyBlocks || standbySafeBlocks != primaryBlocks)) {
primaryBlocks = primaryNS.getBlocksTotal();
standbyBlocks = standbyNS.getBlocksTotal();
standbySafeBlocks = standbyNS.getSafeBlocks();
Thread.sleep(1000);
}
// Final verification of block counts.
assertEquals(primaryBlocks, standbyBlocks);
assertEquals(primaryBlocks, standbySafeBlocks);
}
private void createTestFiles(String topDir) throws Exception {
DFSTestUtil util = new DFSTestUtil(topDir, 10, 1, MAX_FILE_SIZE);
util.createFiles(fs, topDir);
util.checkFiles(fs, topDir);
}
@Test
public void testStandbyFullBlockReport() throws Exception {
setUp(false);
h.setIgnoreDatanodes(false);
// Create test files.
createTestFiles("/testStandbySafeMode");
LOG.info("before restart");
cluster.killStandby();
cluster.restartStandby();
LOG.info("after restart");
long lastTxid = cluster.getPrimaryAvatar(0).avatar.getLastWrittenTxId();
// Need to quiesce to ingest edits.new since checkpointing is disabled.
cluster.getStandbyAvatar(0).avatar.quiesceStandby(lastTxid);
waitAndVerifyBlocks();
}
@Test
public void testStandbySafeMode() throws Exception {
setUp(false);
h.setIgnoreDatanodes(false);
// Create test files.
createTestFiles("/testStandbySafeMode");
// Sync all data to the edit log.
long lastTxid = cluster.getPrimaryAvatar(0).avatar.getLastWrittenTxId();
cluster.getPrimaryAvatar(0).avatar.getFSImage().getEditLog().logSync();
// Need to quiesce to ingest edits.new since checkpointing is disabled.
cluster.getStandbyAvatar(0).avatar.quiesceStandby(lastTxid);
waitAndVerifyBlocks();
}
private long getDatanodeBlocks() throws Exception {
long blocks = 0;
for (AvatarDataNode dn : cluster.getDataNodes()) {
blocks += dn.data.getBlockReport(cluster.getPrimaryAvatar(0).avatar
.getNamespaceID()).length;
}
return blocks;
}
@Test
public void testActiveDeletes() throws Exception {
setUp(false);
h.setIgnoreDatanodes(false);
// Create test files.
String fileName = "/testActiveDeletes";
Path p = new Path(fileName);
DFSTestUtil.createFile(fs, p, (long) MAX_FILE_SIZE, (short) 3,
System.currentTimeMillis());
fs.delete(p, false);
long start = System.currentTimeMillis();
while (System.currentTimeMillis() - start <= MAX_WAIT_TIME
&& getDatanodeBlocks() != 0) {
Thread.sleep(1000);
}
// All blocks on the datanode should be deleted.
assertEquals(0, getDatanodeBlocks());
}
@Test
public void testClearDatanodeRetryList() throws Exception {
setUp(false);
cluster.restartAvatarNodes();
h.setIgnoreDatanodes(true);
// Create test files.
String fileName = "/testClearDatanodeRetryList";
Path p = new Path(fileName);
DFSTestUtil.createFile(fs, p, (long) MAX_FILE_SIZE, (short) 3,
System.currentTimeMillis());
h.forceblockreport = true;
// Force full block reports, incremental reports still get backed off.
for (AvatarDataNode dn : cluster.getDataNodes()) {
dn.scheduleNSBlockReport(0);
}
long blockReports =
NameNode.getNameNodeMetrics().numBlockReport.getCurrentIntervalValue();
// Wait for full block reports.
long start = System.currentTimeMillis();
while (System.currentTimeMillis() - start <= MAX_WAIT_TIME
&& NameNode.getNameNodeMetrics().numBlockReport
.getCurrentIntervalValue() - 3 < blockReports) {
Thread.sleep(500);
}
// Full block reports should have been processed and the incremental list
// should be cleared.
assertTrue(NameNode.getNameNodeMetrics()
.numBlockReport.getCurrentIntervalValue() - 3 >= blockReports);
// Disable BACKOFF so that incremental reports now go through.
h.setIgnoreDatanodes(false);
long lastTxid = cluster.getPrimaryAvatar(0).avatar.getLastWrittenTxId();
// Need to quiesce to ingest edits.new since checkpointing is disabled.
cluster.getStandbyAvatar(0).avatar.quiesceStandby(lastTxid);
waitAndVerifyBlocks();
}
class TestStandbySafeModeHandler extends InjectionHandler {
volatile boolean forceheartbeat = false;
volatile boolean forceblockreport = false;
boolean ignoreDatanodes;
public volatile boolean stallIngest = false;
public void setIgnoreDatanodes(boolean v) {
ignoreDatanodes = v;
}
@Override
protected boolean _falseCondition(InjectionEvent event, Object... args) {
if (event == InjectionEvent.STANDBY_FELL_BEHIND) {
return ignoreDatanodes;
}
if (event == InjectionEvent.OFFERSERVICE_SCHEDULE_HEARTBEAT) {
return forceheartbeat;
}
if (event == InjectionEvent.OFFERSERVICE_SCHEDULE_BR) {
return forceblockreport;
}
return false;
}
@Override
protected void _processEvent(InjectionEvent event, Object... args) {
if (event == InjectionEvent.INGEST_BEFORE_LOAD_EDIT) {
while (stallIngest) {
try {
Thread.sleep(1000);
} catch (InterruptedException ie) {
}
}
}
}
}
@Test
public void testDeadDatanodeFailover() throws Exception {
setUp(false);
h.setIgnoreDatanodes(false);
// Create test files.
createTestFiles("/testDeadDatanodeFailover");
cluster.shutDownDataNode(0);
FSNamesystem ns = cluster.getStandbyAvatar(0).avatar.namesystem;
StandbySafeMode safeMode = cluster.getStandbyAvatar(0).avatar.getStandbySafeMode();
new ExitSafeMode(safeMode, ns).start();
cluster.failOver();
// One datanode should be removed after failover
assertEquals(2,
cluster.getPrimaryAvatar(0).avatar.namesystem
.datanodeReport(DatanodeReportType.LIVE).length);
assertTrue(pass);
}
private class ExitSafeMode extends Thread {
private final StandbySafeMode safeMode;
private final FSNamesystem namesystem;
public ExitSafeMode(StandbySafeMode safeMode, FSNamesystem namesystem) {
this.safeMode = safeMode;
this.namesystem = namesystem;
}
public void run() {
try {
while (true) {
if (safeMode.failoverInProgress()
&& safeMode.getOutStandingHeartbeats().size() == 1
&& safeMode.getOutStandingReports().size() == 0
&& namesystem.getBlocksTotal() == namesystem.getSafeBlocks()) {
safeMode.leave(false);
break;
}
safeMode.canLeave();
Thread.sleep(1000);
}
} catch (Exception e) {
LOG.info("SafeMode exit failed", e);
pass = false;
}
}
}
@Test
public void testLeaseExpiry() throws Exception {
setUp(true);
h.setIgnoreDatanodes(false);
LeaseManager leaseManager = cluster.getStandbyAvatar(0).avatar.namesystem.leaseManager;
// Set low lease periods.
leaseManager.setLeasePeriod(LEASE_PERIOD, LEASE_PERIOD);
String src = "/testLeaseExpiry";
// Create some data.
FSDataOutputStream out = fs.create(new Path(src));
byte[] buffer = new byte[BLOCK_SIZE * 2];
random.nextBytes(buffer);
out.write(buffer);
out.sync();
// Wait for the hard lease time to expire.
Thread.sleep(LEASE_PERIOD * 2);
cluster.failOver();
LOG.info("Failover done");
// Renew lease.
((DistributedFileSystem)fs).getClient().leasechecker.renew();
LOG.info("Lease renewal done");
// Wait to see whether lease expires.
long start = System.currentTimeMillis();
while (System.currentTimeMillis() - start < MAX_WAIT_TIME
&& leaseManager.getLeaseByPath(src) != null) {
Thread.sleep(1000);
}
LOG.info("Wait for lease done");
// Now try to write to the file.
out.write(buffer);
out.sync();
}
private void blocksEqual() throws Exception {
long primaryBlocks = cluster.getPrimaryAvatar(0).avatar.namesystem
.getBlocksTotal();
long standbyBlocks = cluster.getStandbyAvatar(0).avatar.namesystem
.getBlocksTotal();
while (primaryBlocks != standbyBlocks) {
primaryBlocks = cluster.getPrimaryAvatar(0).avatar.namesystem
.getBlocksTotal();
standbyBlocks = cluster.getStandbyAvatar(0).avatar.namesystem
.getBlocksTotal();
Thread.sleep(300);
}
}
private void syncEditLog() throws Exception {
// Sync all data to the edit log.
long lastTxid = cluster.getPrimaryAvatar(0).avatar.getLastWrittenTxId();
cluster.getPrimaryAvatar(0).avatar.getFSImage().getEditLog().logSync();
cluster.getStandbyAvatar(0).avatar.quiesceStandby(lastTxid);
}
@Test
public void testStandbySafeModeDel() throws Exception {
setUp(false, false);
h.setIgnoreDatanodes(false);
// Create test files.
String topDir = "/testStandbySafeModeDel";
DFSTestUtil util = new DFSTestUtil(topDir, 1, 1, MAX_FILE_SIZE);
util.createFiles(fs, topDir);
syncEditLog();
// Force full block reports, incremental reports still get backed off.
h.forceheartbeat = true;
Thread.sleep(3000);
for (AvatarDataNode dn : cluster.getDataNodes()) {
dn.scheduleNSBlockReport(0);
}
waitAndVerifyBlocks();
h.forceheartbeat = false;
cluster.restartAvatarNodes();
fs = cluster.getFileSystem();
// Wait for ingest.
blocksEqual();
h.forceheartbeat = true;
Thread.sleep(3000);
for (AvatarDataNode dn : cluster.getDataNodes()) {
dn.scheduleNSBlockReport(0);
}
waitAndVerifyBlocks();
h.forceheartbeat = false;
// Stall ingest so that incremental block reports are retried.
h.stallIngest = true;
for (String fileName : util.getFileNames(topDir)) {
fs.delete(new Path(fileName), false);
}
DFSTestUtil util1 = new DFSTestUtil(topDir, 1, 1, MAX_FILE_SIZE);
util1.createFiles(fs, topDir);
// Wait for incremental block reports.
Thread.sleep(10000);
// Restart ingest.
h.stallIngest = false;
// Wait for ingest.
blocksEqual();
waitAndVerifyBlocks();
}
@Test
public void testStandbySafeModeDel1() throws Exception {
setUp(false, false);
h.setIgnoreDatanodes(false);
// Create test files.
String topDir = "/testStandbySafeModeDel";
DFSTestUtil util = new DFSTestUtil(topDir, 1, 1, MAX_FILE_SIZE);
util.createFiles(fs, topDir);
syncEditLog();
h.forceheartbeat = true;
Thread.sleep(3000);
// Force full block reports, incremental reports still get backed off.
for (AvatarDataNode dn : cluster.getDataNodes()) {
dn.scheduleNSBlockReport(0);
}
waitAndVerifyBlocks();
h.forceheartbeat = false;
cluster.restartAvatarNodes();
fs = cluster.getFileSystem();
// Wait for ingest.
blocksEqual();
h.forceheartbeat = true;
Thread.sleep(3000);
for (AvatarDataNode dn : cluster.getDataNodes()) {
dn.scheduleNSBlockReport(0);
}
waitAndVerifyBlocks();
h.forceheartbeat = false;
h.setIgnoreDatanodes(true);
for (String fileName : util.getFileNames(topDir)) {
fs.delete(new Path(fileName), false);
}
DFSTestUtil util1 = new DFSTestUtil(topDir, 1, 1, MAX_FILE_SIZE);
util1.createFiles(fs, topDir);
// Wait for ingest.
blocksEqual();
h.setIgnoreDatanodes(false);
waitAndVerifyBlocks();
}
}