namesystem.getInServiceXceiverAverage(), EPSILON);
// shutdown half the nodes and force a heartbeat check to ensure
// counts are accurate
for (int i=0; i < nodes/2; i++) {
DataNode dn = datanodes.get(i);
DatanodeDescriptor dnd = dnm.getDatanode(dn.getDatanodeId());
dn.shutdown();
dnd.setLastUpdate(0L);
BlockManagerTestUtil.checkHeartbeat(namesystem.getBlockManager());
expectedInServiceNodes--;
assertEquals(expectedInServiceNodes, namesystem.getNumLiveDataNodes());
assertEquals(expectedInServiceNodes, namesystem.getNumDatanodesInService());
}
// restart the nodes to verify that counts are correct after
// node re-registration
cluster.restartDataNodes();
cluster.waitActive();
datanodes = cluster.getDataNodes();
expectedInServiceNodes = nodes;
assertEquals(nodes, datanodes.size());
assertEquals(nodes, namesystem.getNumLiveDataNodes());
assertEquals(expectedInServiceNodes, namesystem.getNumDatanodesInService());
assertEquals(expectedTotalLoad, namesystem.getTotalLoad());
assertEquals((double)expectedInServiceLoad/expectedInServiceLoad,
namesystem.getInServiceXceiverAverage(), EPSILON);
// create streams and hsync to force datastreamers to start
DFSOutputStream[] streams = new DFSOutputStream[fileCount];
for (int i=0; i < fileCount; i++) {
streams[i] = (DFSOutputStream)fs.create(new Path("/f"+i), fileRepl)
.getWrappedStream();
streams[i].write("1".getBytes());
streams[i].hsync();
// the load for writers is 2 because both the write xceiver & packet
// responder threads are counted in the load
expectedTotalLoad += 2*fileRepl;
expectedInServiceLoad += 2*fileRepl;
}
// force nodes to send load update
triggerHeartbeats(datanodes);
assertEquals(nodes, namesystem.getNumLiveDataNodes());
assertEquals(expectedInServiceNodes,
namesystem.getNumDatanodesInService());
assertEquals(expectedTotalLoad, namesystem.getTotalLoad());
assertEquals((double)expectedInServiceLoad/expectedInServiceNodes,
namesystem.getInServiceXceiverAverage(), EPSILON);
// decomm a few nodes, substract their load from the expected load,
// trigger heartbeat to force load update
for (int i=0; i < fileRepl; i++) {
expectedInServiceNodes--;
DatanodeDescriptor dnd =
dnm.getDatanode(datanodes.get(i).getDatanodeId());
expectedInServiceLoad -= dnd.getXceiverCount();
dnm.startDecommission(dnd);
DataNodeTestUtils.triggerHeartbeat(datanodes.get(i));
Thread.sleep(100);
assertEquals(nodes, namesystem.getNumLiveDataNodes());
assertEquals(expectedInServiceNodes,
namesystem.getNumDatanodesInService());
assertEquals(expectedTotalLoad, namesystem.getTotalLoad());
assertEquals((double)expectedInServiceLoad/expectedInServiceNodes,
namesystem.getInServiceXceiverAverage(), EPSILON);
}
// check expected load while closing each stream. recalc expected
// load based on whether the nodes in the pipeline are decomm
for (int i=0; i < fileCount; i++) {
int decomm = 0;
for (DatanodeInfo dni : streams[i].getPipeline()) {
DatanodeDescriptor dnd = dnm.getDatanode(dni);
expectedTotalLoad -= 2;
if (dnd.isDecommissionInProgress() || dnd.isDecommissioned()) {
decomm++;
} else {
expectedInServiceLoad -= 2;
}
}
try {
streams[i].close();
} catch (IOException ioe) {
// nodes will go decommissioned even if there's a UC block whose
// other locations are decommissioned too. we'll ignore that
// bug for now
if (decomm < fileRepl) {
throw ioe;
}
}
triggerHeartbeats(datanodes);
// verify node count and loads
assertEquals(nodes, namesystem.getNumLiveDataNodes());
assertEquals(expectedInServiceNodes,
namesystem.getNumDatanodesInService());
assertEquals(expectedTotalLoad, namesystem.getTotalLoad());
assertEquals((double)expectedInServiceLoad/expectedInServiceNodes,
namesystem.getInServiceXceiverAverage(), EPSILON);
}
// shutdown each node, verify node counts based on decomm state
for (int i=0; i < nodes; i++) {
DataNode dn = datanodes.get(i);
dn.shutdown();
// force it to appear dead so live count decreases
DatanodeDescriptor dnDesc = dnm.getDatanode(dn.getDatanodeId());
dnDesc.setLastUpdate(0L);
BlockManagerTestUtil.checkHeartbeat(namesystem.getBlockManager());
assertEquals(nodes-1-i, namesystem.getNumLiveDataNodes());
// first few nodes are already out of service
if (i >= fileRepl) {