Package org.apache.hadoop.filecache

Examples of org.apache.hadoop.filecache.TrackerDistributedCacheManager


    RunningJob rjob = new RunningJob(jobId);
    TaskController taskController = new DefaultTaskController();
    taskController.setConf(trackerFConf);
    rjob.distCacheMgr =
      new TrackerDistributedCacheManager(trackerFConf, taskController).
      newTaskDistributedCacheManager(jobId, trackerFConf);
     
    TaskRunner runner = task.createRunner(tracker, tip, rjob);
    tip.setTaskRunner(runner);
View Full Code Here


      this.localJobFile = new Path(this.localJobDir, id + ".xml");

      // Manage the distributed cache.  If there are files to be copied,
      // this will trigger localFile to be re-written again.
      this.trackerDistributedCacheManager =
        new TrackerDistributedCacheManager(conf, taskController);
      this.taskDistributedCacheManager =
        trackerDistributedCacheManager.newTaskDistributedCacheManager(
            jobid, conf);
      taskDistributedCacheManager.setupCache(conf, "archive", "archive");
     
View Full Code Here

    conf2.set("mapred.local.dir", ROOT_MAPRED_LOCAL_DIR.toString());
    conf2.setLong("local.cache.size", LOCAL_CACHE_LIMIT);
    conf2.setLong("mapreduce.tasktracker.distributedcache.checkperiod", CACHE_DELETE_PERIOD_MS);
   
    refreshConf(conf2);
    TrackerDistributedCacheManager manager =
        new TrackerDistributedCacheManager(conf2, taskController);
    manager.startCleanupThread();
    try {
    FileSystem localfs = FileSystem.getLocal(conf2);
    long now = System.currentTimeMillis();
    String userName = getJobOwnerName();
    conf2.set("user.name", userName);

    // We first test the size limit
    FileStatus stat = fs.getFileStatus(firstCacheFilePublic);
    CacheFile cfile1 = new CacheFile(firstCacheFilePublic.toUri(),
                                 CacheFile.FileType.REGULAR, true,
                                 stat.getModificationTime(),
                                 true);
    Path firstLocalCache = manager.getLocalCache(firstCacheFilePublic.toUri(), conf2,
        TaskTracker.getPrivateDistributedCacheDir(userName),
        fs.getFileStatus(firstCacheFilePublic), false,
        fs.getFileStatus(firstCacheFilePublic).getModificationTime(), true,
        cfile1);
    manager.releaseCache(cfile1.getStatus());
    //in above code,localized a file of size 4K and then release the cache
    // which will cause the cache be deleted when the limit goes out.
    // The below code localize another cache which's designed to
    //sweep away the first cache.
    stat = fs.getFileStatus(secondCacheFilePublic);
    CacheFile cfile2 = new CacheFile(secondCacheFilePublic.toUri(),
                                 CacheFile.FileType.REGULAR, true,
                                 stat.getModificationTime(),
                                 true);
    assertTrue("DistributedCache currently doesn't have cached file",
        localfs.exists(firstLocalCache));
    Path secondLocalCache = manager.getLocalCache(secondCacheFilePublic.toUri(), conf2,
        TaskTracker.getPrivateDistributedCacheDir(userName),
        fs.getFileStatus(secondCacheFilePublic), false,
        fs.getFileStatus(secondCacheFilePublic).getModificationTime(), true,
        cfile2);
    checkCacheDeletion(localfs, firstLocalCache,
        "DistributedCache failed deleting old" +
        " cache when the cache store is full");

    // find the root directory of distributed caches
    Path firstCursor = firstLocalCache;
    Path secondCursor = secondLocalCache;

    while (!firstCursor.equals(secondCursor)) {
      // Debug code, to see what these things look like
      System.err.println("cursors: " + firstCursor);
      System.err.println(" and " + secondCursor);

      firstCursor = firstCursor.getParent();
      secondCursor = secondCursor.getParent();
    }

    System.err.println("The final cursor is " + firstCursor);

    System.err.println("That directory ends up with "
                       + localfs.listStatus(firstCursor).length
                       + " subdirectories");
    Path cachesBase = firstCursor;

    assertFalse
      ("DistributedCache did not delete the gensym'ed distcache "
           + "directory names when it deleted the files they contained "
           + "because they collectively exceeded the size limit.",
       localfs.listStatus(cachesBase).length > 1);
    conf2.setLong("local.cache.size", LOCAL_CACHE_LIMIT * 10);
    conf2.setLong("mapreduce.tasktracker.local.cache.numberdirectories",
        LOCAL_CACHE_SUBDIR_LIMIT);
    manager.stopCleanupThread();
   
    manager =
      new TrackerDistributedCacheManager(conf2, taskController);
    manager.startCleanupThread();
   
    // Now we test the number of sub directories limit
    // Create the temporary cache files to be used in the tests.
    Path thirdCacheFile = new Path(TEST_ROOT_DIR, "thirdcachefile");
    Path fourthCacheFile = new Path(TEST_ROOT_DIR, "fourthcachefile");
    // Adding two more small files, so it triggers the number of sub directory
    // limit but does not trigger the file size limit.
    createPrivateTempFile(thirdCacheFile);
    createPrivateTempFile(fourthCacheFile);
    DistributedCache.setCacheFiles(new URI[]{thirdCacheFile.toUri()}, conf2);
    TrackerDistributedCacheManager.determineCacheVisibilities(conf2);
    TrackerDistributedCacheManager.determineTimestamps(conf2);
    stat = fs.getFileStatus(thirdCacheFile);
    CacheFile cfile3 = new CacheFile(thirdCacheFile.toUri(),
            CacheFile.FileType.REGULAR, false,
            stat.getModificationTime(),
            true);
    Path thirdLocalCache = manager.getLocalCache(thirdCacheFile.toUri(), conf2,
        TaskTracker.getPrivateDistributedCacheDir(userName),
        fs.getFileStatus(thirdCacheFile), false,
        fs.getFileStatus(thirdCacheFile).getModificationTime(),
        false, cfile3);
    DistributedCache.setLocalFiles(conf2, thirdLocalCache.toString());
    JobLocalizer.downloadPrivateCache(conf2);
    // Release the third cache so that it can be deleted while sweeping
    manager.releaseCache(cfile3.getStatus());
    // Getting the fourth cache will make the number of sub directories becomes
    // 3 which is greater than 2. So the released cache will be deleted.
    stat = fs.getFileStatus(fourthCacheFile);
    CacheFile cfile4 = new CacheFile(fourthCacheFile.toUri(),
            CacheFile.FileType.REGULAR, false,
            stat.getModificationTime(),
            true);
    assertTrue("DistributedCache currently doesn't have cached file",
        localfs.exists(thirdLocalCache));
   
    DistributedCache.setCacheFiles(new URI[]{fourthCacheFile.toUri()}, conf2);
    DistributedCache.setLocalFiles(conf2, thirdCacheFile.toUri().toString());
    TrackerDistributedCacheManager.determineCacheVisibilities(conf2);
    TrackerDistributedCacheManager.determineTimestamps(conf2);
    Path fourthLocalCache = manager.getLocalCache(fourthCacheFile.toUri(), conf2,
        TaskTracker.getPrivateDistributedCacheDir(userName),
        fs.getFileStatus(fourthCacheFile), false,
        fs.getFileStatus(fourthCacheFile).getModificationTime(), false, cfile4);
    checkCacheDeletion(localfs, thirdLocalCache,
        "DistributedCache failed deleting old" +
        " cache when the cache exceeds the number of sub directories limit.");

    assertFalse
      ("DistributedCache did not delete the gensym'ed distcache "
           + "directory names when it deleted the files they contained "
           + "because there were too many.",
       localfs.listStatus(cachesBase).length > LOCAL_CACHE_SUBDIR_LIMIT);

    // Clean up the files created in this test
    new File(thirdCacheFile.toString()).delete();
    new File(fourthCacheFile.toString()).delete();
    } finally {
      manager.stopCleanupThread();
    }
  }
View Full Code Here

  public void testFileSystemOtherThanDefault() throws Exception {
    if (!canRun()) {
      return;
    }
    TrackerDistributedCacheManager manager =
      new TrackerDistributedCacheManager(conf, taskController);
    conf.set("fs.fakefile.impl", conf.get("fs.file.impl"));
    String userName = getJobOwnerName();
    conf.set("user.name", userName);
    Path fileToCache = new Path("fakefile:///"
        + firstCacheFile.toUri().getPath());
    CacheFile file = new CacheFile(fileToCache.toUri(),
                               CacheFile.FileType.REGULAR,
                               false, 0, false);
    Path result = manager.getLocalCache(fileToCache.toUri(), conf,
        TaskTracker.getPrivateDistributedCacheDir(userName),
        fs.getFileStatus(firstCacheFile), false,
        System.currentTimeMillis(),
        false, file);
    assertNotNull("DistributedCache cached file on non-default filesystem.",
View Full Code Here

    Configuration myConf = new Configuration(conf);
    myConf.set("fs.default.name", "refresh:///");
    myConf.setClass("fs.refresh.impl", FakeFileSystem.class, FileSystem.class);
    String userName = getJobOwnerName();

    TrackerDistributedCacheManager manager =
      new TrackerDistributedCacheManager(myConf, taskController);
    // ****** Imitate JobClient code
    // Configures a task/job with both a regular file and a "classpath" file.
    Configuration subConf = new Configuration(myConf);
    subConf.set("user.name", userName);
    DistributedCache.addCacheFile(firstCacheFile.toUri(), subConf);
    TrackerDistributedCacheManager.determineTimestamps(subConf);
    TrackerDistributedCacheManager.determineCacheVisibilities(subConf);
    // ****** End of imitating JobClient code

    // ****** Imitate TaskRunner code.
    TaskDistributedCacheManager handle =
      manager.newTaskDistributedCacheManager(new JobID("jt", 1), subConf);
    assertNull(null, DistributedCache.getLocalCacheFiles(subConf));
    File workDir = new File(new Path(TEST_ROOT_DIR, "workdir").toString());
    handle.setupCache(subConf, TaskTracker.getPublicDistributedCacheDir(),
        TaskTracker.getPrivateDistributedCacheDir(userName));
    //TODO this doesn't really happen in the TaskRunner
//    handle.setupPrivateCache(localDirAllocator, TaskTracker
//        .getPrivateDistributedCacheDir(userName));
    // ****** End of imitating TaskRunner code

    Path[] localCacheFiles = DistributedCache.getLocalCacheFiles(subConf);
    assertNotNull(null, localCacheFiles);
    assertEquals(1, localCacheFiles.length);
    Path cachedFirstFile = localCacheFiles[0];
    assertFileLengthEquals(firstCacheFile, cachedFirstFile);
    assertFalse("Paths should be different.",
        firstCacheFile.equals(cachedFirstFile));
    // release
    handle.release();
   
    // change the file timestamp
    FileSystem fs = FileSystem.get(myConf);
    ((FakeFileSystem)fs).advanceClock(1);

    // running a task of the same job
    Throwable th = null;
    try {
      handle.setupCache(subConf, TaskTracker.getPublicDistributedCacheDir(),
          TaskTracker.getPrivateDistributedCacheDir(userName));
//      handle.setupPrivateCache(localDirAllocator, TaskTracker
//          .getPrivateDistributedCacheDir(userName));
    } catch (IOException ie) {
      th = ie;
    }
    assertNotNull("Throwable is null", th);
    assertTrue("Exception message does not match",
        th.getMessage().contains("has changed on HDFS since job started"));
    // release
    handle.release();
   
    // submit another job
    Configuration subConf2 = new Configuration(myConf);
    subConf2.set("user.name", userName);
    DistributedCache.addCacheFile(firstCacheFile.toUri(), subConf2);
    TrackerDistributedCacheManager.determineTimestamps(subConf2);
    TrackerDistributedCacheManager.determineCacheVisibilities(subConf2);
   
    handle =
      manager.newTaskDistributedCacheManager(new JobID("jt", 2), subConf2);
    handle.setupCache(subConf2, TaskTracker.getPublicDistributedCacheDir(),
        TaskTracker.getPrivateDistributedCacheDir(userName));
    Path[] localCacheFiles2 = DistributedCache.getLocalCacheFiles(subConf2);
    assertNotNull(null, localCacheFiles2);
    assertEquals(1, localCacheFiles2.length);
View Full Code Here

    if (!canRun()) {
      return;
    }
    String userName = getJobOwnerName();
    conf.set("user.name", userName);
    TrackerDistributedCacheManager manager =
        new TrackerDistributedCacheManager(conf, taskController);
    FileSystem localfs = FileSystem.getLocal(conf);
    long now = System.currentTimeMillis();

    Path[] localCache = new Path[2];
    FileStatus stat = fs.getFileStatus(firstCacheFile);
    CacheFile file = new CacheFile(firstCacheFilePublic.toUri(),
                               CacheFile.FileType.REGULAR, true,
                               stat.getModificationTime(), false);
    localCache[0] = manager.getLocalCache(firstCacheFilePublic.toUri(), conf,
        TaskTracker.getPrivateDistributedCacheDir(userName),
        fs.getFileStatus(firstCacheFilePublic), false,
        fs.getFileStatus(firstCacheFilePublic).getModificationTime(), true,
        file);
    FsPermission myPermission = new FsPermission((short)0600);
    Path myFile = new Path(localCache[0].getParent(), "myfile.txt");
    if (FileSystem.create(localfs, myFile, myPermission) == null) {
      throw new IOException("Could not create " + myFile);
    }
    try {
      stat = fs.getFileStatus(secondCacheFilePublic);
      file = new CacheFile(secondCacheFilePublic.toUri(),
                       CacheFile.FileType.REGULAR,
                       true, stat.getModificationTime(), false);
      localCache[1] = manager.getLocalCache(secondCacheFilePublic.toUri(), conf,
          TaskTracker.getPrivateDistributedCacheDir(userName),
          fs.getFileStatus(secondCacheFilePublic), false,
          fs.getFileStatus(secondCacheFilePublic).getModificationTime(), true,
          file);
      stat = localfs.getFileStatus(myFile);
View Full Code Here

  public void testRemoveTaskDistributedCacheManager() throws Exception {
    if (!canRun()) {
      return;
    }
    TrackerDistributedCacheManager manager = new TrackerDistributedCacheManager(
        conf, taskController);
    JobID jobId = new JobID("jobtracker", 1);
    manager.newTaskDistributedCacheManager(jobId, conf);

    TaskDistributedCacheManager taskDistributedCacheManager = manager
        .getTaskDistributedCacheManager(jobId);
    assertNotNull(taskDistributedCacheManager);

    manager.removeTaskDistributedCacheManager(jobId);

    taskDistributedCacheManager = manager.getTaskDistributedCacheManager(jobId);
    assertNull(taskDistributedCacheManager);
  }
View Full Code Here

    FileOutputStream os = new FileOutputStream(new File(jobFile.toString()));
    subConf.writeXml(os);
    os.close();

    // ****** Imitate TaskRunner code.
    TrackerDistributedCacheManager manager =
      new TrackerDistributedCacheManager(conf, taskController);
    TaskDistributedCacheManager handle =
      manager.newTaskDistributedCacheManager(jobid, subConf);
    assertNull(null, DistributedCache.getLocalCacheFiles(subConf));
    File workDir = new File(new Path(TEST_ROOT_DIR, "workdir").toString());
    handle.setupCache(subConf, TaskTracker.getPublicDistributedCacheDir(),
        TaskTracker.getPrivateDistributedCacheDir(userName));
    JobLocalizer.downloadPrivateCache(subConf);
    // DOESN'T ACTUALLY HAPPEN IN THE TaskRunner (THIS IS A TODO)
//    handle.setupPrivateCache(localDirAllocator, TaskTracker
//        .getPrivateDistributedCacheDir(userName));
//    // ****** End of imitating TaskRunner code

    Path[] localCacheFiles = DistributedCache.getLocalCacheFiles(subConf);
    assertNotNull(null, localCacheFiles);
    assertEquals(2, localCacheFiles.length);
    Path cachedFirstFile = localCacheFiles[0];
    Path cachedSecondFile = localCacheFiles[1];
    assertFileLengthEquals(firstCacheFile, cachedFirstFile);
    assertFalse("Paths should be different.",
        firstCacheFile.equals(cachedFirstFile));

    assertEquals(1, handle.getClassPaths().size());
    assertEquals(cachedSecondFile.toString(), handle.getClassPaths().get(0));

    checkFilePermissions(localCacheFiles);

    // Cleanup
    handle.release();
    manager.purgeCache();
    assertFalse(pathToFile(cachedFirstFile).exists());
  }
View Full Code Here

  public void testReferenceCount() throws IOException, LoginException,
      URISyntaxException, InterruptedException {
    if (!canRun()) {
      return;
    }
    TrackerDistributedCacheManager manager =
      new FakeTrackerDistributedCacheManager(conf);

    String userName = getJobOwnerName();
    File workDir = new File(new Path(TEST_ROOT_DIR, "workdir").toString());

    // Configures a job with a regular file
    Job job1 = new Job(conf);
    Configuration conf1 = job1.getConfiguration();
    conf1.set("user.name", userName);
    DistributedCache.addCacheFile(secondCacheFile.toUri(), conf1);
   
    TrackerDistributedCacheManager.determineTimestamps(conf1);
    TrackerDistributedCacheManager.determineCacheVisibilities(conf1);

    // Task localizing for first job
    JobID jobId = new JobID("jt", 1);
    TaskDistributedCacheManager handle = manager
        .newTaskDistributedCacheManager(jobId, conf1);
    handle.setupCache(conf1, TaskTracker.getPublicDistributedCacheDir(),
        TaskTracker.getPrivateDistributedCacheDir(userName));
    long[] sizes = JobLocalizer.downloadPrivateCache(conf1);
    if (sizes != null) {
      manager.setArchiveSizes(jobId, sizes);
    }
    handle.release();
    for (TaskDistributedCacheManager.CacheFile c : handle.getCacheFiles()) {
      assertEquals(0, manager.getReferenceCount(c.getStatus()));
      long filesize = FileUtil.getDU(new File(c.getStatus().localizedLoadPath.getParent().toString()));
      assertTrue("filesize is not greater than 0", filesize > 0);
      assertEquals(filesize, c.getStatus().size);
    }

    // Test specifying directories to go into distributed cache and make
    // their sizes are calculated properly.
    Job job2 = new Job(conf);
    Configuration conf2 = job2.getConfiguration();
    conf1.set("user.name", userName);
    DistributedCache.addCacheFile(firstCacheDirPublic.toUri(), conf2);
    DistributedCache.addCacheFile(firstCacheDirPrivate.toUri(), conf2);

    TrackerDistributedCacheManager.determineTimestamps(conf2);
    TrackerDistributedCacheManager.determineCacheVisibilities(conf2);

    // Task localizing for second job
    JobID job2Id = new JobID("jt", 2);
    handle = manager.newTaskDistributedCacheManager(job2Id, conf2);
    handle.setupCache(conf2, TaskTracker.getPublicDistributedCacheDir(),
        TaskTracker.getPrivateDistributedCacheDir(userName));
    long[] sizes2 = JobLocalizer.downloadPrivateCache(conf2);
    for (int j=0; j > sizes2.length; j++) {
      LOG.info("size is: " + sizes2[j]);
    }
    if (sizes2 != null) {
      manager.setArchiveSizes(job2Id, sizes2);
    }
    handle.release();
    for (TaskDistributedCacheManager.CacheFile c : handle.getCacheFiles()) {
      assertEquals(0, manager.getReferenceCount(c.getStatus()));
      long filesize = FileUtil.getDU(new File(c.getStatus().localizedLoadPath.getParent().toString()));
      assertTrue("filesize is not greater than 0", filesize > 0);
      assertEquals(filesize, c.getStatus().size);
    }
   
    Path thirdCacheFile = new Path(TEST_ROOT_DIR, "thirdcachefile");
    createPrivateTempFile(thirdCacheFile);
   
    // Configures another job with three regular files.
    Job job3 = new Job(conf);
    Configuration conf3 = job3.getConfiguration();
    conf3.set("user.name", userName);
    // add a file that would get failed to localize
    DistributedCache.addCacheFile(firstCacheFilePublic.toUri(), conf3);
    // add a file that is already localized by different job
    DistributedCache.addCacheFile(secondCacheFile.toUri(), conf3);
    // add a file that is never localized
    DistributedCache.addCacheFile(thirdCacheFile.toUri(), conf3);
   
    TrackerDistributedCacheManager.determineTimestamps(conf3);
    TrackerDistributedCacheManager.determineCacheVisibilities(conf3);

    // Task localizing for third job
    // localization for the "firstCacheFile" will fail.
    handle = manager.newTaskDistributedCacheManager(new JobID("jt", 3), conf3);
    Throwable th = null;
    try {
      handle.setupCache(conf3, TaskTracker.getPublicDistributedCacheDir(),
          TaskTracker.getPrivateDistributedCacheDir(userName));
      JobLocalizer.downloadPrivateCache(conf3);
    } catch (IOException e) {
      th = e;
      LOG.info("Exception during setup", e);
    }
    assertNotNull(th);
    assertTrue(th.getMessage().contains("fake fail"));
    handle.release();
    th = null;
    for (TaskDistributedCacheManager.CacheFile c : handle.getCacheFiles()) {
      try {
        int refcount = manager.getReferenceCount(c.getStatus());
        LOG.info("checking refcount " + c.uri + " of " + refcount);
        assertEquals(0, refcount);
      } catch (NullPointerException ie) {
        th = ie;
        LOG.info("Exception getting reference count for " + c.uri, ie);
View Full Code Here

  public void testSameNameFileArchiveCache() throws IOException,
      InterruptedException {
    if (!canRun()) {
      return;
    }
    TrackerDistributedCacheManager manager = new TrackerDistributedCacheManager(
        conf, taskController);
    String userName = getJobOwnerName();
    File workDir = new File(TEST_ROOT_DIR, "workdir");
    Path cacheFile = new Path(TEST_ROOT_DIR, "fileArchiveCacheFile");

    createPublicTempFile(cacheFile);
    Configuration conf1 = new Configuration(conf);
    conf1.set("user.name", userName);

    DistributedCache.addCacheFile(cacheFile.toUri(), conf1);
    DistributedCache.addCacheArchive(cacheFile.toUri(), conf1);
    TrackerDistributedCacheManager.determineTimestamps(conf1);
    TrackerDistributedCacheManager.determineCacheVisibilities(conf1);
    dumpState(conf1);

    TaskDistributedCacheManager handle = manager
        .newTaskDistributedCacheManager(new JobID("jt", 1), conf1);
    handle.setupCache(conf1, TaskTracker.getPublicDistributedCacheDir(),
        TaskTracker.getPrivateDistributedCacheDir(userName));

    TaskDistributedCacheManager.CacheFile cFile = handle.getCacheFiles().get(0);
    TaskDistributedCacheManager.CacheFile cArchive = handle.getCacheFiles()
        .get(1);

    String distCacheDir = TaskTracker.getPublicDistributedCacheDir();

    Path localizedPathForFile = manager.getLocalCache(cacheFile.toUri(), conf1,
        distCacheDir, fs.getFileStatus(cacheFile), false, cFile.timestamp,
        true, cFile);

    Path localizedPathForArchive = manager.getLocalCache(cacheFile.toUri(),
        conf1, distCacheDir, fs.getFileStatus(cacheFile), true,
        cArchive.timestamp, true, cArchive);
    assertNotSame("File and Archive resolve to the same path: "
        + localizedPathForFile + ". Should differ.", localizedPathForFile,
        localizedPathForArchive);
View Full Code Here

TOP

Related Classes of org.apache.hadoop.filecache.TrackerDistributedCacheManager

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.