Package org.apache.hadoop.fs

Examples of org.apache.hadoop.fs.ContentSummary


  abstract int collectSubtreeBlocksAndClear(List<Block> v);

  /** Compute {@link ContentSummary}. */
  final ContentSummary computeContentSummary() {
    long[] a = computeContentSummary(new long[]{0,0,0});
    return new ContentSummary(a[0], a[1], a[2], getQuota());
  }
View Full Code Here


          if (taskOutputPath != null) {
            // Get the file-system for the task output directory
            FileSystem fs = taskOutputPath.getFileSystem(conf);
            if (fs.exists(taskOutputPath)) {
              // Get the summary for the folder
              ContentSummary summary = fs.getContentSummary(taskOutputPath);
              // Check if the directory contains data to be promoted
              // i.e total-files + total-folders - 1(itself)
              if (summary != null
                  && (summary.getFileCount() + summary.getDirectoryCount() - 1)
                      > 0) {
                shouldBePromoted = true;
              }
            } else {
              LOG.info(getTaskID() + ": No outputs to promote from " +
View Full Code Here

    FileSystem fs = getFs(f);
    return fsHandler.deleteDir(fs, f, recursive, conf);
  }

  public boolean isEmpty(Path path) throws IOException, MetaException {
    ContentSummary contents = getFs(path).getContentSummary(path);
    if (contents != null && contents.getFileCount() == 0 && contents.getDirectoryCount() == 1) {
      return true;
    }
    return false;
  }
View Full Code Here

        if (filter != null && !filter.accept(p)) {
          continue;
        }

        ContentSummary cs = ctx.getCS(path);
        if (cs == null) {
          if (path == null) {
            continue;
          }
          pathNeedProcess.add(path);
        } else {
          summary[0] += cs.getLength();
          summary[1] += cs.getFileCount();
          summary[2] += cs.getDirectoryCount();
        }
      }

      // Process the case when name node call is needed
      final Map<String, ContentSummary> resultMap = new ConcurrentHashMap<String, ContentSummary>();
      ArrayList<Future<?>> results = new ArrayList<Future<?>>();
      final ThreadPoolExecutor executor;
      int maxThreads = ctx.getConf().getInt("mapred.dfsclient.parallelism.max", 0);
      if (pathNeedProcess.size() > 1 && maxThreads > 1) {
        int numExecutors = Math.min(pathNeedProcess.size(), maxThreads);
        LOG.info("Using " + numExecutors + " threads for getContentSummary");
        executor = new ThreadPoolExecutor(numExecutors, numExecutors, 60, TimeUnit.SECONDS,
            new LinkedBlockingQueue<Runnable>());
      } else {
        executor = null;
      }

      HiveInterruptCallback interrup = HiveInterruptUtils.add(new HiveInterruptCallback() {
        @Override
        public void interrupt() {
          for (String path : pathNeedProcess) {
            try {
              new Path(path).getFileSystem(ctx.getConf()).close();
            } catch (IOException ignore) {
                LOG.debug(ignore);
            }
          }
          if (executor != null) {
            executor.shutdownNow();
          }
        }
      });
      try {
        Configuration conf = ctx.getConf();
        JobConf jobConf = new JobConf(conf);
        for (String path : pathNeedProcess) {
          final Path p = new Path(path);
          final String pathStr = path;
          // All threads share the same Configuration and JobConf based on the
          // assumption that they are thread safe if only read operations are
          // executed. It is not stated in Hadoop's javadoc, the sourcce codes
          // clearly showed that they made efforts for it and we believe it is
          // thread safe. Will revisit this piece of codes if we find the assumption
          // is not correct.
          final Configuration myConf = conf;
          final JobConf myJobConf = jobConf;
          final Map<String, Operator<?>> aliasToWork = work.getAliasToWork();
          final Map<String, ArrayList<String>> pathToAlias = work.getPathToAliases();
          final PartitionDesc partDesc = work.getPathToPartitionInfo().get(
              p.toString());
          Runnable r = new Runnable() {
            @Override
            public void run() {
              try {
                Class<? extends InputFormat> inputFormatCls = partDesc
                    .getInputFileFormatClass();
                InputFormat inputFormatObj = HiveInputFormat.getInputFormatFromCache(
                    inputFormatCls, myJobConf);
                if (inputFormatObj instanceof ContentSummaryInputFormat) {
                  ContentSummaryInputFormat cs = (ContentSummaryInputFormat) inputFormatObj;
                  resultMap.put(pathStr, cs.getContentSummary(p, myJobConf));
                  return;
                }
                HiveStorageHandler handler = HiveUtils.getStorageHandler(myConf,
                    partDesc.getOverlayedProperties().getProperty(
                    hive_metastoreConstants.META_TABLE_STORAGE));
                if (handler instanceof InputEstimator) {
                  long total = 0;
                  TableDesc tableDesc = partDesc.getTableDesc();
                  InputEstimator estimator = (InputEstimator) handler;
                  for (String alias : HiveFileFormatUtils.doGetAliasesFromPath(pathToAlias, p)) {
                    JobConf jobConf = new JobConf(myJobConf);
                    TableScanOperator scanOp = (TableScanOperator) aliasToWork.get(alias);
                    Utilities.setColumnNameList(jobConf, scanOp, true);
                    Utilities.setColumnTypeList(jobConf, scanOp, true);
                    PlanUtils.configureInputJobPropertiesForStorageHandler(tableDesc);
                    Utilities.copyTableJobPropertiesToConf(tableDesc, jobConf);
                    total += estimator.estimate(myJobConf, scanOp, -1).getTotalLength();
                  }
                  resultMap.put(pathStr, new ContentSummary(total, -1, -1));
                }
                // todo: should nullify summary for non-native tables,
                // not to be selected as a mapjoin target
                FileSystem fs = p.getFileSystem(myConf);
                resultMap.put(pathStr, fs.getContentSummary(p));
              } catch (Exception e) {
                // We safely ignore this exception for summary data.
                // We don't update the cache to protect it from polluting other
                // usages. The worst case is that IOException will always be
                // retried for another getInputSummary(), which is fine as
                // IOException is not considered as a common case.
                LOG.info("Cannot get size of " + pathStr + ". Safely ignored.");
              }
            }
          };

          if (executor == null) {
            r.run();
          } else {
            Future<?> result = executor.submit(r);
            results.add(result);
          }
        }

        if (executor != null) {
          for (Future<?> result : results) {
            boolean executorDone = false;
            do {
              try {
                result.get();
                executorDone = true;
              } catch (InterruptedException e) {
                LOG.info("Interrupted when waiting threads: ", e);
                Thread.currentThread().interrupt();
                break;
              } catch (ExecutionException e) {
                throw new IOException(e);
              }
            } while (!executorDone);
          }
          executor.shutdown();
        }
        HiveInterruptUtils.checkInterrupted();
        for (Map.Entry<String, ContentSummary> entry : resultMap.entrySet()) {
          ContentSummary cs = entry.getValue();

          summary[0] += cs.getLength();
          summary[1] += cs.getFileCount();
          summary[2] += cs.getDirectoryCount();

          ctx.addCS(entry.getKey(), cs);
          LOG.info("Cache Content Summary for " + entry.getKey() + " length: " + cs.getLength()
              + " file count: "
              + cs.getFileCount() + " directory count: " + cs.getDirectoryCount());
        }

        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.INPUT_SUMMARY);
        return new ContentSummary(summary[0], summary[1], summary[2]);
      } finally {
        HiveInterruptUtils.remove(interrup);
      }
    }
  }
View Full Code Here

    return total;
  }

  public static boolean isEmptyPath(JobConf job, Path dirPath, Context ctx)
      throws Exception {
    ContentSummary cs = ctx.getCS(dirPath);
    if (cs != null) {
      LOG.info("Content Summary " + dirPath + "length: " + cs.getLength() + " num files: "
          + cs.getFileCount() + " num directories: " + cs.getDirectoryCount());
      return (cs.getLength() == 0 && cs.getFileCount() == 0 && cs.getDirectoryCount() <= 1);
    } else {
      LOG.info("Content Summary not cached for " + dirPath);
    }
    return isEmptyPath(job, dirPath);
  }
View Full Code Here

          Path unsorted = new Path(logFile.unsortedFileName());
          if (fs.exists(unsorted))
            return fs.getFileStatus(unsorted).getLen();
          return fs.getFileStatus(new Path(logFile.copyTempFileName())).getLen();
        } else {
          ContentSummary contentSummary = fs.getContentSummary(new Path(logFile.recoveryFileName()));
          // map files are bigger than sequence files
          return (long) (contentSummary.getSpaceConsumed() * .8);
        }
      } catch (Exception ex) {
        return 0;
      }
    }
View Full Code Here

  public final long getTableSize(String table) throws IOException {
    checkTable(table);
    String tableUri = getTableContext(table).getTablePath().toUri().toString();
    Path tablePath = new Path(tableUri);
    FileSystem fileSystem = FileSystem.get(tablePath.toUri(), _configuration);
    ContentSummary contentSummary = fileSystem.getContentSummary(tablePath);
    return contentSummary.getLength();
  }
View Full Code Here

        || spaceQuota == null) {
      return null;
    }

    try {
      return new ContentSummary(
          Long.parseLong(length),
          Long.parseLong(fileCount),
          Long.parseLong(directoryCount),
          Long.parseLong(quota),
          Long.parseLong(spaceConsumed),
View Full Code Here

  }

  /** {@inheritDoc} */
  public ContentSummary getContentSummary(Path f) throws IOException {
    final String s = makeQualified(f).toUri().getPath();
    final ContentSummary cs = new ContentSummaryParser().getContentSummary(s);
    return cs != null? cs: super.getContentSummary(f);
  }
View Full Code Here

      assertTrue(dfs.mkdirs(new Path("/nqdir0/qdir1/qdir20/nqdir30")));

      // set the quota of /nqdir0/qdir1 to 4 * fileSpace
      final Path quotaDir1 = new Path("/nqdir0/qdir1");
      dfs.setQuota(quotaDir1, FSConstants.QUOTA_DONT_SET, 4 * fileSpace);
      ContentSummary c = dfs.getContentSummary(quotaDir1);
      assertEquals(c.getSpaceQuota(), 4 * fileSpace);
     
      // set the quota of /nqdir0/qdir1/qdir20 to 6 * fileSpace
      final Path quotaDir20 = new Path("/nqdir0/qdir1/qdir20");
      dfs.setQuota(quotaDir20, FSConstants.QUOTA_DONT_SET, 6 * fileSpace);
      c = dfs.getContentSummary(quotaDir20);
      assertEquals(c.getSpaceQuota(), 6 * fileSpace);


      // Create /nqdir0/qdir1/qdir21 and set its space quota to 2 * fileSpace
      final Path quotaDir21 = new Path("/nqdir0/qdir1/qdir21");
      assertTrue(dfs.mkdirs(quotaDir21));
      dfs.setQuota(quotaDir21, FSConstants.QUOTA_DONT_SET, 2 * fileSpace);
      c = dfs.getContentSummary(quotaDir21);
      assertEquals(c.getSpaceQuota(), 2 * fileSpace);

      // 5: Create directory /nqdir0/qdir1/qdir21/nqdir32
      Path tempPath = new Path(quotaDir21, "nqdir32");
      assertTrue(dfs.mkdirs(tempPath));
     
      // create a file under nqdir32/fileDir
      DFSTestUtil.createFile(dfs, new Path(tempPath, "fileDir/file1"), fileLen,
                             replication, 0);
      c = dfs.getContentSummary(quotaDir21);
      assertEquals(c.getSpaceConsumed(), fileSpace);
     
      // Create a larger file /nqdir0/qdir1/qdir21/nqdir33/
      boolean hasException = false;
      try {
        DFSTestUtil.createFile(dfs, new Path(quotaDir21, "nqdir33/file2"),
                               2*fileLen, replication, 0);
      } catch (DSQuotaExceededException e) {
        hasException = true;
      }
      assertTrue(hasException);
      // delete nqdir33
      assertTrue(dfs.delete(new Path(quotaDir21, "nqdir33"), true));
      c = dfs.getContentSummary(quotaDir21);
      assertEquals(c.getSpaceConsumed(), fileSpace);
      assertEquals(c.getSpaceQuota(), 2*fileSpace);

      // Verify space before the move:
      c = dfs.getContentSummary(quotaDir20);
      assertEquals(c.getSpaceConsumed(), 0);
     
      // Move /nqdir0/qdir1/qdir21/nqdir32 /nqdir0/qdir1/qdir20/nqdir30
      Path dstPath = new Path(quotaDir20, "nqdir30");
      Path srcPath = new Path(quotaDir21, "nqdir32");
      assertTrue(dfs.rename(srcPath, dstPath));
     
      // verify space after the move
      c = dfs.getContentSummary(quotaDir20);
      assertEquals(c.getSpaceConsumed(), fileSpace);
      // verify space for its parent
      c = dfs.getContentSummary(quotaDir1);
      assertEquals(c.getSpaceConsumed(), fileSpace);
      // verify space for source for the move
      c = dfs.getContentSummary(quotaDir21);
      assertEquals(c.getSpaceConsumed(), 0);
     
      final Path file2 = new Path(dstPath, "fileDir/file2");
      int file2Len = 2 * fileLen;
      // create a larger file under /nqdir0/qdir1/qdir20/nqdir30
      DFSTestUtil.createFile(dfs, file2, file2Len, replication, 0);
     
      c = dfs.getContentSummary(quotaDir20);
      assertEquals(c.getSpaceConsumed(), 3 * fileSpace);
      c = dfs.getContentSummary(quotaDir21);
      assertEquals(c.getSpaceConsumed(), 0);
     
      // Reverse: Move /nqdir0/qdir1/qdir20/nqdir30 to /nqdir0/qdir1/qdir21/
      hasException = false;
      try {
        assertFalse(dfs.rename(dstPath, srcPath));
      } catch (DSQuotaExceededException e) {
        hasException = true;
      }
      assertTrue(hasException);
     
      // make sure no intermediate directories left by failed rename
      assertFalse(dfs.exists(srcPath));
      // directory should exist
      assertTrue(dfs.exists(dstPath));
           
      // verify space after the failed move
      c = dfs.getContentSummary(quotaDir20);
      assertEquals(c.getSpaceConsumed(), 3 * fileSpace);
      c = dfs.getContentSummary(quotaDir21);
      assertEquals(c.getSpaceConsumed(), 0);
     
      // Test Append :
     
      // verify space quota
      c = dfs.getContentSummary(quotaDir1);
      assertEquals(c.getSpaceQuota(), 4 * fileSpace);
     
      // verify space before append;
      c = dfs.getContentSummary(dstPath);
      assertEquals(c.getSpaceConsumed(), 3 * fileSpace);
     
      OutputStream out = dfs.append(file2);
      // appending 1 fileLen should succeed
      out.write(new byte[fileLen]);
      out.close();
     
      file2Len += fileLen; // after append
     
      // verify space after append;
      c = dfs.getContentSummary(dstPath);
      assertEquals(c.getSpaceConsumed(), 4 * fileSpace);
     
      // now increase the quota for quotaDir1
      dfs.setQuota(quotaDir1, FSConstants.QUOTA_DONT_SET, 5 * fileSpace);
      // Now, appending more than 1 fileLen should result in an error
      out = dfs.append(file2);
      hasException = false;
      try {
        out.write(new byte[fileLen + 1024]);
        out.flush();
        out.close();
      } catch (DSQuotaExceededException e) {
        hasException = true;
        IOUtils.closeStream(out);
      }
      assertTrue(hasException);
     
      file2Len += fileLen; // after partial append
     
      // verify space after partial append
      c = dfs.getContentSummary(dstPath);
      assertEquals(c.getSpaceConsumed(), 5 * fileSpace);
     
      // Test set replication :
     
      // first reduce the replication
      dfs.setReplication(file2, (short)(replication-1));
     
      // verify that space is reduced by file2Len
      c = dfs.getContentSummary(dstPath);
      assertEquals(c.getSpaceConsumed(), 5 * fileSpace - file2Len);
     
      // now try to increase the replication and and expect an error.
      hasException = false;
      try {
        dfs.setReplication(file2, (short)(replication+1));
      } catch (DSQuotaExceededException e) {
        hasException = true;
      }
      assertTrue(hasException);

      // verify space consumed remains unchanged.
      c = dfs.getContentSummary(dstPath);
      assertEquals(c.getSpaceConsumed(), 5 * fileSpace - file2Len);
     
      // now increase the quota for quotaDir1 and quotaDir20
      dfs.setQuota(quotaDir1, FSConstants.QUOTA_DONT_SET, 10 * fileSpace);
      dfs.setQuota(quotaDir20, FSConstants.QUOTA_DONT_SET, 10 * fileSpace);
     
      // then increasing replication should be ok.
      dfs.setReplication(file2, (short)(replication+1));
      // verify increase in space
      c = dfs.getContentSummary(dstPath);
      assertEquals(c.getSpaceConsumed(), 5 * fileSpace + file2Len);
     
    } finally {
      cluster.shutdown();
    }
  }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.fs.ContentSummary

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.