StaticMapping.addNodeToRack(hosts4[0], rack4[0]+nodeGroup4[0]);
}
public void testTaskPlacement() throws IOException {
String namenode = null;
MiniDFSClusterWithNodeGroup dfs = null;
MiniMRCluster mr = null;
FileSystem fileSys = null;
String testName = "TestForNodeGroupAwareness";
try {
final int taskTrackers = 1;
/* Start 4 datanodes, two in rack r1/nodegroup1, one in r1/nodegroup2 and
* the other one in r2/nodegroup3. Create three
* files (splits).
* 1) file1, just after starting the datanode on r1/nodegroup1, with
* a repl factor of 1, and,
* 2) file2 & file3 after starting the two datanodes in r1/nodegroup2 and
* r2/nodegroup3, with a repl factor of 3.
* 3) start the last data node (datanode4) in r1/nodegroup1
* At the end, file1 will be present on only datanode1, and, file2 and
* file3, will be present on all datanodes except datanode4.
*/
Configuration conf = new Configuration();
conf.setBoolean("dfs.replication.considerLoad", false);
conf.set("dfs.block.replicator.classname",
"org.apache.hadoop.hdfs.server.namenode.BlockPlacementPolicyWithNodeGroup");
conf.set("net.topology.impl",
"org.apache.hadoop.net.NetworkTopologyWithNodeGroup");
conf.setBoolean("net.topology.nodegroup.aware", true);
conf.setBoolean("mapred.jobtracker.nodegroup.aware", true);
conf.setInt("mapred.task.cache.levels", 3);
conf.set("mapred.jobtracker.jobSchedulable",
"org.apache.hadoop.mapred.JobSchedulableWithNodeGroup");
JobConf jobConf = new JobConf(conf);
MiniDFSClusterWithNodeGroup.setNodeGroups(nodeGroup1);
// start the dfs cluster with datanode1 only.
dfs = new MiniDFSClusterWithNodeGroup(0, conf, 1,
true, true, null, rack1, hosts1, null);
dfs.waitActive();
fileSys = dfs.getFileSystem();
if (!fileSys.mkdirs(inDir)) {
throw new IOException("Mkdirs failed to create " + inDir.toString());
}
// write file1 on datanode1 with 1 replica
UtilsForTests.writeFile(
dfs.getNameNode(), conf, new Path(inDir + "/file1"), (short)1);
// start another two datanodes (2 and 3)
dfs.startDataNodes(conf, 2, true, null, rack2, nodeGroup2, hosts2, null);
dfs.waitActive();
// write two files with 3 replica, so each datanodes will have one replica
// of file2 and file3
UtilsForTests.writeFile(
dfs.getNameNode(), conf, new Path(inDir + "/file2"), (short)3);
UtilsForTests.writeFile(
dfs.getNameNode(), conf, new Path(inDir + "/file3"), (short)3);
namenode = (dfs.getFileSystem()).getUri().getHost() + ":" +
(dfs.getFileSystem()).getUri().getPort();
/* Run a job with the (only)tasktracker which is under r2/nodegroup3 and
* check the task placement that how many data/nodegroup/rack local maps
* it runs. The hostname of the tasktracker is set to same as datanode3.
*/
mr = new MiniMRClusterWithNodeGroup(taskTrackers, namenode, 1, rack3,
nodeGroup3, hosts3, jobConf);
/* The job is configured with three maps since there are three
* (non-splittable) files. On rack2, there are two files and both
* have repl of three. The blocks for those files must therefore be
* present on all the datanodes (except datanode4), in particular,
* the datanode3 on rack2. The third input file is pulled from rack1,
* thus the result should be 2 rack-local maps.
*/
launchJobAndTestCounters(testName, mr, fileSys, inDir, outputPath, 3, 0,
0, 0, 2, jobConf);
mr.shutdown();
/* Run a job with the (only)tasktracker on datanode4.
*/
mr = new MiniMRClusterWithNodeGroup(taskTrackers, namenode, 1, rack4,
nodeGroup4, hosts4, jobConf);
/* The job is configured with three maps since there are three
* (non-splittable) files. As the way in which repl was setup while
* creating the files, we will have all the three files on datanode1 which
* is on the same nodegroup with datanode4 where the only tasktracker run.
* Thus, the result should be 3 nodegroup-local maps.
* The MapReduce cluster have only 1 node which is host4 but no datanode
* running on that host. So this is to verify that in compute/data node
* separation case, it still can get nodegroup level locality in task
* scheduling.
*/
launchJobAndTestCounters(testName, mr, fileSys, inDir, outputPath, 3, 0,
0, 3, 0, jobConf);
mr.shutdown();
} finally {
if (dfs != null) {
dfs.shutdown();
}
if (mr != null) {
mr.shutdown();
}
}