conf2.set("mapred.local.dir", ROOT_MAPRED_LOCAL_DIR.toString());
conf2.setLong("local.cache.size", LOCAL_CACHE_LIMIT);
conf2.setLong("mapreduce.tasktracker.distributedcache.checkperiod", CACHE_DELETE_PERIOD_MS);
refreshConf(conf2);
TrackerDistributedCacheManager manager =
new TrackerDistributedCacheManager(conf2, taskController);
manager.startCleanupThread();
try {
FileSystem localfs = FileSystem.getLocal(conf2);
long now = System.currentTimeMillis();
String userName = getJobOwnerName();
conf2.set("user.name", userName);
// We first test the size limit
FileStatus stat = fs.getFileStatus(firstCacheFilePublic);
CacheFile cfile1 = new CacheFile(firstCacheFilePublic.toUri(),
CacheFile.FileType.REGULAR, true,
stat.getModificationTime(),
true);
Path firstLocalCache = manager.getLocalCache(firstCacheFilePublic.toUri(), conf2,
TaskTracker.getPrivateDistributedCacheDir(userName),
fs.getFileStatus(firstCacheFilePublic), false,
fs.getFileStatus(firstCacheFilePublic).getModificationTime(), true,
cfile1);
manager.releaseCache(cfile1.getStatus());
//in above code,localized a file of size 4K and then release the cache
// which will cause the cache be deleted when the limit goes out.
// The below code localize another cache which's designed to
//sweep away the first cache.
stat = fs.getFileStatus(secondCacheFilePublic);
CacheFile cfile2 = new CacheFile(secondCacheFilePublic.toUri(),
CacheFile.FileType.REGULAR, true,
stat.getModificationTime(),
true);
assertTrue("DistributedCache currently doesn't have cached file",
localfs.exists(firstLocalCache));
Path secondLocalCache = manager.getLocalCache(secondCacheFilePublic.toUri(), conf2,
TaskTracker.getPrivateDistributedCacheDir(userName),
fs.getFileStatus(secondCacheFilePublic), false,
fs.getFileStatus(secondCacheFilePublic).getModificationTime(), true,
cfile2);
checkCacheDeletion(localfs, firstLocalCache,
"DistributedCache failed deleting old" +
" cache when the cache store is full");
// find the root directory of distributed caches
Path firstCursor = firstLocalCache;
Path secondCursor = secondLocalCache;
while (!firstCursor.equals(secondCursor)) {
// Debug code, to see what these things look like
System.err.println("cursors: " + firstCursor);
System.err.println(" and " + secondCursor);
firstCursor = firstCursor.getParent();
secondCursor = secondCursor.getParent();
}
System.err.println("The final cursor is " + firstCursor);
System.err.println("That directory ends up with "
+ localfs.listStatus(firstCursor).length
+ " subdirectories");
Path cachesBase = firstCursor;
assertFalse
("DistributedCache did not delete the gensym'ed distcache "
+ "directory names when it deleted the files they contained "
+ "because they collectively exceeded the size limit.",
localfs.listStatus(cachesBase).length > 1);
conf2.setLong("local.cache.size", LOCAL_CACHE_LIMIT * 10);
conf2.setLong("mapreduce.tasktracker.local.cache.numberdirectories",
LOCAL_CACHE_SUBDIR_LIMIT);
manager.stopCleanupThread();
manager =
new TrackerDistributedCacheManager(conf2, taskController);
manager.startCleanupThread();
// Now we test the number of sub directories limit
// Create the temporary cache files to be used in the tests.
Path thirdCacheFile = new Path(TEST_ROOT_DIR, "thirdcachefile");
Path fourthCacheFile = new Path(TEST_ROOT_DIR, "fourthcachefile");
// Adding two more small files, so it triggers the number of sub directory
// limit but does not trigger the file size limit.
createPrivateTempFile(thirdCacheFile);
createPrivateTempFile(fourthCacheFile);
DistributedCache.setCacheFiles(new URI[]{thirdCacheFile.toUri()}, conf2);
TrackerDistributedCacheManager.determineTimestampsAndCacheVisibilities(conf2);
stat = fs.getFileStatus(thirdCacheFile);
CacheFile cfile3 = new CacheFile(thirdCacheFile.toUri(),
CacheFile.FileType.REGULAR, false,
stat.getModificationTime(),
true);
Path thirdLocalCache = manager.getLocalCache(thirdCacheFile.toUri(), conf2,
TaskTracker.getPrivateDistributedCacheDir(userName),
fs.getFileStatus(thirdCacheFile), false,
fs.getFileStatus(thirdCacheFile).getModificationTime(),
false, cfile3);
DistributedCache.setLocalFiles(conf2, thirdLocalCache.toString());
JobLocalizer.downloadPrivateCache(conf2);
// Release the third cache so that it can be deleted while sweeping
manager.releaseCache(cfile3.getStatus());
// Getting the fourth cache will make the number of sub directories becomes
// 3 which is greater than 2. So the released cache will be deleted.
stat = fs.getFileStatus(fourthCacheFile);
CacheFile cfile4 = new CacheFile(fourthCacheFile.toUri(),
CacheFile.FileType.REGULAR, false,
stat.getModificationTime(),
true);
assertTrue("DistributedCache currently doesn't have cached file",
localfs.exists(thirdLocalCache));
DistributedCache.setCacheFiles(new URI[]{fourthCacheFile.toUri()}, conf2);
DistributedCache.setLocalFiles(conf2, thirdCacheFile.toUri().toString());
TrackerDistributedCacheManager.determineTimestampsAndCacheVisibilities(conf2);
Path fourthLocalCache = manager.getLocalCache(fourthCacheFile.toUri(), conf2,
TaskTracker.getPrivateDistributedCacheDir(userName),
fs.getFileStatus(fourthCacheFile), false,
fs.getFileStatus(fourthCacheFile).getModificationTime(), false, cfile4);
checkCacheDeletion(localfs, thirdLocalCache,
"DistributedCache failed deleting old" +
" cache when the cache exceeds the number of sub directories limit.");
assertFalse
("DistributedCache did not delete the gensym'ed distcache "
+ "directory names when it deleted the files they contained "
+ "because there were too many.",
localfs.listStatus(cachesBase).length > LOCAL_CACHE_SUBDIR_LIMIT);
// Clean up the files created in this test
new File(thirdCacheFile.toString()).delete();
new File(fourthCacheFile.toString()).delete();
} finally {
manager.stopCleanupThread();
}
}