Package org.apache.hadoop.fs

Examples of org.apache.hadoop.fs.ContentSummary


          Path unsorted = new Path(logFile.unsortedFileName());
          if (fs.exists(unsorted))
            return fs.getFileStatus(unsorted).getLen();
          return fs.getFileStatus(new Path(logFile.copyTempFileName())).getLen();
        } else {
          ContentSummary contentSummary = fs.getContentSummary(new Path(logFile.recoveryFileName()));
          // map files are bigger than sequence files
          return (long) (contentSummary.getSpaceConsumed() * .8);
        }
      } catch (Exception ex) {
        return 0;
      }
    }
View Full Code Here


        || spaceQuota == null) {
      return null;
    }

    try {
      return new ContentSummary(
          Long.parseLong(length),
          Long.parseLong(fileCount),
          Long.parseLong(directoryCount),
          Long.parseLong(quota),
          Long.parseLong(spaceConsumed),
View Full Code Here

  }

  /** {@inheritDoc} */
  public ContentSummary getContentSummary(Path f) throws IOException {
    final String s = makeQualified(f).toUri().getPath();
    final ContentSummary cs = new ContentSummaryParser().getContentSummary(s);
    return cs != null? cs: super.getContentSummary(f);
  }
View Full Code Here

        System.out.println("sizelimit=" + sizelimit);

        ToolRunner.run(distcp,
            new String[]{"-sizelimit", ""+sizelimit, nnUri+srcrootdir, nnUri+dstrootdir});
       
        ContentSummary summary = fs.getContentSummary(dstrootpath);
        System.out.println("summary=" + summary);
        assertTrue(summary.getLength() <= sizelimit);
        deldir(fs, dstrootdir);
        deldir(fs, srcrootdir);
      }

      {//test update
        final MyFile[] srcs = createFiles(URI.create(nnUri), srcrootdir);
        final long totalsize = fs.getContentSummary(srcrootpath).getLength();
        System.out.println("src.length=" + srcs.length);
        System.out.println("totalsize =" + totalsize);
        fs.mkdirs(dstrootpath);
        final int parts = RAN.nextInt(NFILES/3 - 1) + 2;
        final int filelimit = srcs.length/parts;
        final long sizelimit = totalsize/parts;
        System.out.println("filelimit=" + filelimit);
        System.out.println("sizelimit=" + sizelimit);
        System.out.println("parts    =" + parts);
        final String[] args = {"-filelimit", ""+filelimit, "-sizelimit", ""+sizelimit,
            "-update", nnUri+srcrootdir, nnUri+dstrootdir};

        int dstfilecount = 0;
        long dstsize = 0;
        for(int i = 0; i <= parts; i++) {
          ToolRunner.run(distcp, args);
       
          FileStatus[] dststat = getFileStatus(fs, dstrootdir, srcs, true);
          System.out.println(i + ") dststat.length=" + dststat.length);
          assertTrue(dststat.length - dstfilecount <= filelimit);
          ContentSummary summary = fs.getContentSummary(dstrootpath);
          System.out.println(i + ") summary.getLength()=" + summary.getLength());
          assertTrue(summary.getLength() - dstsize <= sizelimit);
          assertTrue(checkFiles(fs, dstrootdir, srcs, true));
          dstfilecount = dststat.length;
          dstsize = summary.getLength();
        }

        deldir(fs, dstrootdir);
        deldir(fs, srcrootdir);
      }
View Full Code Here

        if (filter != null && !filter.accept(p)) {
          continue;
        }

        ContentSummary cs = ctx.getCS(path);
        if (cs == null) {
          if (path == null) {
            continue;
          }
          pathNeedProcess.add(path);
        } else {
          summary[0] += cs.getLength();
          summary[1] += cs.getFileCount();
          summary[2] += cs.getDirectoryCount();
        }
      }

      // Process the case when name node call is needed
      final Map<String, ContentSummary> resultMap = new ConcurrentHashMap<String, ContentSummary>();
      ArrayList<Future<?>> results = new ArrayList<Future<?>>();
      final ThreadPoolExecutor executor;
      int maxThreads = ctx.getConf().getInt("mapred.dfsclient.parallelism.max", 0);
      if (pathNeedProcess.size() > 1 && maxThreads > 1) {
        int numExecutors = Math.min(pathNeedProcess.size(), maxThreads);
        LOG.info("Using " + numExecutors + " threads for getContentSummary");
        executor = new ThreadPoolExecutor(numExecutors, numExecutors, 60, TimeUnit.SECONDS,
            new LinkedBlockingQueue<Runnable>());
      } else {
        executor = null;
      }

      HiveInterruptCallback interrup = HiveInterruptUtils.add(new HiveInterruptCallback() {
        @Override
        public void interrupt() {
          if (executor != null) {
            executor.shutdownNow();
          }
        }
      });
      try {
        Configuration conf = ctx.getConf();
        JobConf jobConf = new JobConf(conf);
        for (String path : pathNeedProcess) {
          final Path p = new Path(path);
          final String pathStr = path;
          // All threads share the same Configuration and JobConf based on the
          // assumption that they are thread safe if only read operations are
          // executed. It is not stated in Hadoop's javadoc, the sourcce codes
          // clearly showed that they made efforts for it and we believe it is
          // thread safe. Will revisit this piece of codes if we find the assumption
          // is not correct.
          final Configuration myConf = conf;
          final JobConf myJobConf = jobConf;
          final PartitionDesc partDesc = work.getPathToPartitionInfo().get(
              p.toString());
          Runnable r = new Runnable() {
            public void run() {
              try {
                ContentSummary resultCs;

                Class<? extends InputFormat> inputFormatCls = partDesc
                    .getInputFileFormatClass();
                InputFormat inputFormatObj = HiveInputFormat.getInputFormatFromCache(
                    inputFormatCls, myJobConf);
                if (inputFormatObj instanceof ContentSummaryInputFormat) {
                  resultCs = ((ContentSummaryInputFormat) inputFormatObj).getContentSummary(p,
                      myJobConf);
                } else {
                  FileSystem fs = p.getFileSystem(myConf);
                  resultCs = fs.getContentSummary(p);
                }
                resultMap.put(pathStr, resultCs);
              } catch (IOException e) {
                // We safely ignore this exception for summary data.
                // We don't update the cache to protect it from polluting other
                // usages. The worst case is that IOException will always be
                // retried for another getInputSummary(), which is fine as
                // IOException is not considered as a common case.
                LOG.info("Cannot get size of " + pathStr + ". Safely ignored.");
              }
            }
          };

          if (executor == null) {
            r.run();
          } else {
            Future<?> result = executor.submit(r);
            results.add(result);
          }
        }

        if (executor != null) {
          for (Future<?> result : results) {
            boolean executorDone = false;
            do {
              try {
                result.get();
                executorDone = true;
              } catch (InterruptedException e) {
                LOG.info("Interrupted when waiting threads: ", e);
                Thread.currentThread().interrupt();
                break;
              } catch (ExecutionException e) {
                throw new IOException(e);
              }
            } while (!executorDone);
    }
          executor.shutdown();
        }
        HiveInterruptUtils.checkInterrupted();
        for (Map.Entry<String, ContentSummary> entry : resultMap.entrySet()) {
          ContentSummary cs = entry.getValue();

          summary[0] += cs.getLength();
          summary[1] += cs.getFileCount();
          summary[2] += cs.getDirectoryCount();

          ctx.addCS(entry.getKey(), cs);
          LOG.info("Cache Content Summary for " + entry.getKey() + " length: " + cs.getLength()
              + " file count: "
              + cs.getFileCount() + " directory count: " + cs.getDirectoryCount());
        }

        return new ContentSummary(summary[0], summary[1], summary[2]);
      } finally {
        HiveInterruptUtils.remove(interrup);
      }
    }
  }
View Full Code Here

    }
  }

  public static boolean isEmptyPath(JobConf job, String dirPath, Context ctx)
      throws Exception {
    ContentSummary cs = ctx.getCS(dirPath);
    if (cs != null) {
      LOG.info("Content Summary " + dirPath + "length: " + cs.getLength() + " num files: "
          + cs.getFileCount() + " num directories: " + cs.getDirectoryCount());
      return (cs.getLength() == 0 && cs.getFileCount() == 0 && cs.getDirectoryCount() <= 1);
    } else {
      LOG.info("Content Summary not cached for " + dirPath);
    }
    Path p = new Path(dirPath);
    return isEmptyPath(job, p);
View Full Code Here

        // is choosen as big table, what's the total size of left tables, which
        // are going to be small tables.
        for (Map.Entry<String, ArrayList<String>> entry : pathToAliases.entrySet()) {
          String path = entry.getKey();
          List<String> aliasList = entry.getValue();
          ContentSummary cs = context.getCS(path);
          if (cs != null) {
            long size = cs.getLength();
            for (String alias : aliasList) {
              aliasTotalKnownInputSize += size;
              Long es = aliasToSize.get(alias);
              if(es == null) {
                es = new Long(0);
View Full Code Here

    // map-reduce jobs will be run locally based on data size
    // first find out if any of the jobs needs to run non-locally
    boolean hasNonLocalJob = false;
    for (ExecDriver mrtask: mrtasks) {
      try {
        ContentSummary inputSummary = Utilities.getInputSummary
          (ctx, (MapredWork)mrtask.getWork(), p);
        int numReducers = getNumberOfReducers(mrtask.getWork(), conf);

        long estimatedInput;

        if (globalLimitCtx != null && globalLimitCtx.isEnable()) {
          // If the global limit optimization is triggered, we will
          // estimate input data actually needed based on limit rows.
          // estimated Input = (num_limit * max_size_per_row) * (estimated_map + 2)
          //
          long sizePerRow = HiveConf.getLongVar(conf,
              HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
          estimatedInput = globalLimitCtx.getGlobalLimit() * sizePerRow;
          long minSplitSize = HiveConf.getLongVar(conf,
              HiveConf.ConfVars.MAPREDMINSPLITSIZE);
          long estimatedNumMap = inputSummary.getLength() / minSplitSize + 1;
          estimatedInput = estimatedInput * (estimatedNumMap + 1);
        } else {
          estimatedInput = inputSummary.getLength();
        }

        if (LOG.isDebugEnabled()) {
          LOG.debug("Task: " + mrtask.getId() + ", Summary: " +
                   inputSummary.getLength() + "," + inputSummary.getFileCount() + ","
                   + numReducers + ", estimated Input: " + estimatedInput);
        }

        if (MapRedTask.isEligibleForLocalMode(conf, numReducers,
            estimatedInput, inputSummary.getFileCount()) != null) {
          hasNonLocalJob = true;
          break;
        } else {
          mrtask.setLocalMode(true);
        }
View Full Code Here

      assertTrue(dfs.mkdirs(new Path("/nqdir0/qdir1/qdir20/nqdir30")));

      // set the quota of /nqdir0/qdir1 to 4 * fileSpace
      final Path quotaDir1 = new Path("/nqdir0/qdir1");
      dfs.setQuota(quotaDir1, HdfsConstants.QUOTA_DONT_SET, 4 * fileSpace);
      ContentSummary c = dfs.getContentSummary(quotaDir1);
      assertEquals(c.getSpaceQuota(), 4 * fileSpace);
     
      // set the quota of /nqdir0/qdir1/qdir20 to 6 * fileSpace
      final Path quotaDir20 = new Path("/nqdir0/qdir1/qdir20");
      dfs.setQuota(quotaDir20, HdfsConstants.QUOTA_DONT_SET, 6 * fileSpace);
      c = dfs.getContentSummary(quotaDir20);
      assertEquals(c.getSpaceQuota(), 6 * fileSpace);


      // Create /nqdir0/qdir1/qdir21 and set its space quota to 2 * fileSpace
      final Path quotaDir21 = new Path("/nqdir0/qdir1/qdir21");
      assertTrue(dfs.mkdirs(quotaDir21));
      dfs.setQuota(quotaDir21, HdfsConstants.QUOTA_DONT_SET, 2 * fileSpace);
      c = dfs.getContentSummary(quotaDir21);
      assertEquals(c.getSpaceQuota(), 2 * fileSpace);

      // 5: Create directory /nqdir0/qdir1/qdir21/nqdir32
      Path tempPath = new Path(quotaDir21, "nqdir32");
      assertTrue(dfs.mkdirs(tempPath));
     
      // create a file under nqdir32/fileDir
      DFSTestUtil.createFile(dfs, new Path(tempPath, "fileDir/file1"), fileLen,
                             replication, 0);
      c = dfs.getContentSummary(quotaDir21);
      assertEquals(c.getSpaceConsumed(), fileSpace);
     
      // Create a larger file /nqdir0/qdir1/qdir21/nqdir33/
      boolean hasException = false;
      try {
        DFSTestUtil.createFile(dfs, new Path(quotaDir21, "nqdir33/file2"),
                               2*fileLen, replication, 0);
      } catch (DSQuotaExceededException e) {
        hasException = true;
      }
      assertTrue(hasException);
      // delete nqdir33
      assertTrue(dfs.delete(new Path(quotaDir21, "nqdir33"), true));
      c = dfs.getContentSummary(quotaDir21);
      assertEquals(c.getSpaceConsumed(), fileSpace);
      assertEquals(c.getSpaceQuota(), 2*fileSpace);

      // Verify space before the move:
      c = dfs.getContentSummary(quotaDir20);
      assertEquals(c.getSpaceConsumed(), 0);
     
      // Move /nqdir0/qdir1/qdir21/nqdir32 /nqdir0/qdir1/qdir20/nqdir30
      Path dstPath = new Path(quotaDir20, "nqdir30");
      Path srcPath = new Path(quotaDir21, "nqdir32");
      assertTrue(dfs.rename(srcPath, dstPath));
     
      // verify space after the move
      c = dfs.getContentSummary(quotaDir20);
      assertEquals(c.getSpaceConsumed(), fileSpace);
      // verify space for its parent
      c = dfs.getContentSummary(quotaDir1);
      assertEquals(c.getSpaceConsumed(), fileSpace);
      // verify space for source for the move
      c = dfs.getContentSummary(quotaDir21);
      assertEquals(c.getSpaceConsumed(), 0);
     
      final Path file2 = new Path(dstPath, "fileDir/file2");
      int file2Len = 2 * fileLen;
      // create a larger file under /nqdir0/qdir1/qdir20/nqdir30
      DFSTestUtil.createFile(dfs, file2, file2Len, replication, 0);
     
      c = dfs.getContentSummary(quotaDir20);
      assertEquals(c.getSpaceConsumed(), 3 * fileSpace);
      c = dfs.getContentSummary(quotaDir21);
      assertEquals(c.getSpaceConsumed(), 0);
     
      // Reverse: Move /nqdir0/qdir1/qdir20/nqdir30 to /nqdir0/qdir1/qdir21/
      hasException = false;
      try {
        assertFalse(dfs.rename(dstPath, srcPath));
      } catch (DSQuotaExceededException e) {
        hasException = true;
      }
      assertTrue(hasException);
     
      // make sure no intermediate directories left by failed rename
      assertFalse(dfs.exists(srcPath));
      // directory should exist
      assertTrue(dfs.exists(dstPath));
           
      // verify space after the failed move
      c = dfs.getContentSummary(quotaDir20);
      assertEquals(c.getSpaceConsumed(), 3 * fileSpace);
      c = dfs.getContentSummary(quotaDir21);
      assertEquals(c.getSpaceConsumed(), 0);
     
      // Test Append :
     
      // verify space quota
      c = dfs.getContentSummary(quotaDir1);
      assertEquals(c.getSpaceQuota(), 4 * fileSpace);
     
      // verify space before append;
      c = dfs.getContentSummary(dstPath);
      assertEquals(c.getSpaceConsumed(), 3 * fileSpace);
     
      OutputStream out = dfs.append(file2);
      // appending 1 fileLen should succeed
      out.write(new byte[fileLen]);
      out.close();
     
      file2Len += fileLen; // after append
     
      // verify space after append;
      c = dfs.getContentSummary(dstPath);
      assertEquals(c.getSpaceConsumed(), 4 * fileSpace);
     
      // now increase the quota for quotaDir1
      dfs.setQuota(quotaDir1, HdfsConstants.QUOTA_DONT_SET, 5 * fileSpace);
      // Now, appending more than 1 fileLen should result in an error
      out = dfs.append(file2);
      hasException = false;
      try {
        out.write(new byte[fileLen + 1024]);
        out.flush();
        out.close();
      } catch (DSQuotaExceededException e) {
        hasException = true;
        IOUtils.closeStream(out);
      }
      assertTrue(hasException);
     
      file2Len += fileLen; // after partial append
     
      // verify space after partial append
      c = dfs.getContentSummary(dstPath);
      assertEquals(c.getSpaceConsumed(), 5 * fileSpace);
     
      // Test set replication :
     
      // first reduce the replication
      dfs.setReplication(file2, (short)(replication-1));
     
      // verify that space is reduced by file2Len
      c = dfs.getContentSummary(dstPath);
      assertEquals(c.getSpaceConsumed(), 5 * fileSpace - file2Len);
     
      // now try to increase the replication and and expect an error.
      hasException = false;
      try {
        dfs.setReplication(file2, (short)(replication+1));
      } catch (DSQuotaExceededException e) {
        hasException = true;
      }
      assertTrue(hasException);

      // verify space consumed remains unchanged.
      c = dfs.getContentSummary(dstPath);
      assertEquals(c.getSpaceConsumed(), 5 * fileSpace - file2Len);
     
      // now increase the quota for quotaDir1 and quotaDir20
      dfs.setQuota(quotaDir1, HdfsConstants.QUOTA_DONT_SET, 10 * fileSpace);
      dfs.setQuota(quotaDir20, HdfsConstants.QUOTA_DONT_SET, 10 * fileSpace);
     
      // then increasing replication should be ok.
      dfs.setReplication(file2, (short)(replication+1));
      // verify increase in space
      c = dfs.getContentSummary(dstPath);
      assertEquals(c.getSpaceConsumed(), 5 * fileSpace + file2Len);

      // Test HDFS-2053 :

      // Create directory /hdfs-2053
      final Path quotaDir2053 = new Path("/hdfs-2053");
      assertTrue(dfs.mkdirs(quotaDir2053));

      // Create subdirectories /hdfs-2053/{A,B,C}
      final Path quotaDir2053_A = new Path(quotaDir2053, "A");
      assertTrue(dfs.mkdirs(quotaDir2053_A));
      final Path quotaDir2053_B = new Path(quotaDir2053, "B");
      assertTrue(dfs.mkdirs(quotaDir2053_B));
      final Path quotaDir2053_C = new Path(quotaDir2053, "C");
      assertTrue(dfs.mkdirs(quotaDir2053_C));

      // Factors to vary the sizes of test files created in each subdir.
      // The actual factors are not really important but they allow us to create
      // identifiable file sizes per subdir, which helps during debugging.
      int sizeFactorA = 1;
      int sizeFactorB = 2;
      int sizeFactorC = 4;

      // Set space quota for subdirectory C
      dfs.setQuota(quotaDir2053_C, HdfsConstants.QUOTA_DONT_SET,
          (sizeFactorC + 1) * fileSpace);
      c = dfs.getContentSummary(quotaDir2053_C);
      assertEquals(c.getSpaceQuota(), (sizeFactorC + 1) * fileSpace);

      // Create a file under subdirectory A
      DFSTestUtil.createFile(dfs, new Path(quotaDir2053_A, "fileA"),
          sizeFactorA * fileLen, replication, 0);
      c = dfs.getContentSummary(quotaDir2053_A);
      assertEquals(c.getSpaceConsumed(), sizeFactorA * fileSpace);

      // Create a file under subdirectory B
      DFSTestUtil.createFile(dfs, new Path(quotaDir2053_B, "fileB"),
          sizeFactorB * fileLen, replication, 0);
      c = dfs.getContentSummary(quotaDir2053_B);
      assertEquals(c.getSpaceConsumed(), sizeFactorB * fileSpace);

      // Create a file under subdirectory C (which has a space quota)
      DFSTestUtil.createFile(dfs, new Path(quotaDir2053_C, "fileC"),
          sizeFactorC * fileLen, replication, 0);
      c = dfs.getContentSummary(quotaDir2053_C);
      assertEquals(c.getSpaceConsumed(), sizeFactorC * fileSpace);

      // Check space consumed for /hdfs-2053
      c = dfs.getContentSummary(quotaDir2053);
      assertEquals(c.getSpaceConsumed(),
          (sizeFactorA + sizeFactorB + sizeFactorC) * fileSpace);

    } finally {
      cluster.shutdown();
    }
View Full Code Here

      Path file2 = new Path("/test/test2");
      boolean exceededQuota = false;
      final int QUOTA_SIZE = 3 * BLOCK_SIZE; // total space usage including
                                             // repl.
      final int FILE_SIZE = BLOCK_SIZE / 2;
      ContentSummary c;
     
      // Create the directory and set the quota
      assertTrue(fs.mkdirs(dir));
      runCommand(admin, false, "-setSpaceQuota", Integer.toString(QUOTA_SIZE),
           dir.toString());

      // Creating a file should use half the quota
      DFSTestUtil.createFile(fs, file1, FILE_SIZE, (short) 3, 1L);
      DFSTestUtil.waitReplication(fs, file1, (short) 3);
      c = fs.getContentSummary(dir);
      checkContentSummary(c, webhdfs.getContentSummary(dir));
      assertEquals("Quota is half consumed", QUOTA_SIZE / 2,
                   c.getSpaceConsumed());

      // We can not create the 2nd file because even though the total spaced
      // used by two files (2 * 3 * 512/2) would fit within the quota (3 * 512)
      // when a block for a file is created the space used is adjusted
      // conservatively (3 * block size, ie assumes a full block is written)
View Full Code Here

TOP

Related Classes of org.apache.hadoop.fs.ContentSummary

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.