final MiniDFSCluster cluster =
new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION_FACTOR).build();
try {
final FSNamesystem namesystem = cluster.getNamesystem();
final BlockManager bm = namesystem.getBlockManager();
final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager();
final FileSystem fs = cluster.getFileSystem();
// populate the cluster with a one block file
final Path FILE_PATH = new Path("/testfile");
DFSTestUtil.createFile(fs, FILE_PATH, 1L, REPLICATION_FACTOR, 1L);
DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, FILE_PATH);
// keep a copy of all datanode descriptor
final DatanodeDescriptor[] datanodes = hm.getDatanodes();
// start two new nodes
cluster.startDataNodes(conf, 2, true, null, null);
cluster.waitActive();
// bring down first datanode
DatanodeDescriptor datanode = datanodes[0];
DataNodeProperties dnprop = cluster.stopDataNode(datanode.getName());
// make sure that NN detects that the datanode is down
try {
namesystem.writeLock();
synchronized (hm) {
datanode.setLastUpdate(0); // mark it dead
hm.heartbeatCheck();
}
} finally {
namesystem.writeUnlock();
}
// the block will be replicated
DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
// restart the first datanode
cluster.restartDataNode(dnprop);
cluster.waitActive();
// check if excessive replica is detected (transient)
initializeTimeout(TIMEOUT);
while (countNodes(block.getLocalBlock(), namesystem).excessReplicas() == 0) {
checkTimeout("excess replicas not detected");
}
// find out a non-excess node
final Iterator<DatanodeDescriptor> iter = bm.blocksMap
.nodeIterator(block.getLocalBlock());
DatanodeDescriptor nonExcessDN = null;
while (iter.hasNext()) {
DatanodeDescriptor dn = iter.next();
Collection<Block> blocks = bm.excessReplicateMap.get(dn.getStorageID());
if (blocks == null || !blocks.contains(block) ) {
nonExcessDN = dn;
break;
}
}
assertTrue(nonExcessDN!=null);
// bring down non excessive datanode
dnprop = cluster.stopDataNode(nonExcessDN.getName());
// make sure that NN detects that the datanode is down
try {
namesystem.writeLock();
synchronized(hm) {
nonExcessDN.setLastUpdate(0); // mark it dead
hm.heartbeatCheck();
}
} finally {
namesystem.writeUnlock();
}