Examples of org.apache.hadoop.fs.ContentSummary

org.apache.hadoop.fs.ContentSummary
Store the summary of a content (a directory or a file).

      String diskUsed = "Unknown";
      try {
        Path path = new Path(ServerConfiguration.getSystemConfiguration().get(Property.INSTANCE_DFS_DIR));
        log.debug("Reading the content summary for " + path);
        try {
          ContentSummary acu = fs.getContentSummary(path);
          diskUsed = bytes(acu.getSpaceConsumed());
          ContentSummary rootSummary = fs.getContentSummary(new Path("/"));
          consumed = String.format("%.2f%%", acu.getSpaceConsumed() * 100. / rootSummary.getSpaceConsumed());
        } catch (Exception ex) {
          log.trace("Unable to get disk usage information from hdfs", ex);
        }


        boolean highlight = false;

View Full Code Here

          Path unsorted = new Path(logFile.unsortedFileName());
          if (fs.exists(unsorted))
            return fs.getFileStatus(unsorted).getLen();
          return fs.getFileStatus(new Path(logFile.copyTempFileName())).getLen();
        } else {
          ContentSummary contentSummary = fs.getContentSummary(new Path(logFile.recoveryFileName()));
          // map files are bigger than sequence files
          return (long) (contentSummary.getSpaceConsumed() * .8);
        }
      } catch (Exception ex) {
        return 0;
      }
    }

View Full Code Here


        if (filter != null && !filter.accept(p)) {
          continue;
        }


        ContentSummary cs = ctx.getCS(path);
        if (cs == null) {
          if (path == null) {
            continue;
          }
          pathNeedProcess.add(path);
        } else {
          summary[0] += cs.getLength();
          summary[1] += cs.getFileCount();
          summary[2] += cs.getDirectoryCount();
        }
      }


      // Process the case when name node call is needed
      final Map<String, ContentSummary> resultMap = new ConcurrentHashMap<String, ContentSummary>();
      ArrayList<Future<?>> results = new ArrayList<Future<?>>();
      final ThreadPoolExecutor executor;
      int maxThreads = ctx.getConf().getInt("mapred.dfsclient.parallelism.max", 0);
      if (pathNeedProcess.size() > 1 && maxThreads > 1) {
        int numExecutors = Math.min(pathNeedProcess.size(), maxThreads);
        LOG.info("Using " + numExecutors + " threads for getContentSummary");
        executor = new ThreadPoolExecutor(numExecutors, numExecutors, 60, TimeUnit.SECONDS,
            new LinkedBlockingQueue<Runnable>());
      } else {
        executor = null;
      }


      HiveInterruptCallback interrup = HiveInterruptUtils.add(new HiveInterruptCallback() {
        @Override
        public void interrupt() {
          if (executor != null) {
            executor.shutdownNow();
          }
        }
      });
      try {
        Configuration conf = ctx.getConf();
        JobConf jobConf = new JobConf(conf);
        for (String path : pathNeedProcess) {
          final Path p = new Path(path);
          final String pathStr = path;
          // All threads share the same Configuration and JobConf based on the
          // assumption that they are thread safe if only read operations are
          // executed. It is not stated in Hadoop's javadoc, the sourcce codes
          // clearly showed that they made efforts for it and we believe it is
          // thread safe. Will revisit this piece of codes if we find the assumption
          // is not correct.
          final Configuration myConf = conf;
          final JobConf myJobConf = jobConf;
          final PartitionDesc partDesc = work.getPathToPartitionInfo().get(
              p.toString());
          Runnable r = new Runnable() {
            public void run() {
              try {
                ContentSummary resultCs;


                Class<? extends InputFormat> inputFormatCls = partDesc
                    .getInputFileFormatClass();
                InputFormat inputFormatObj = HiveInputFormat.getInputFormatFromCache(
                    inputFormatCls, myJobConf);
                if (inputFormatObj instanceof ContentSummaryInputFormat) {
                  resultCs = ((ContentSummaryInputFormat) inputFormatObj).getContentSummary(p,
                      myJobConf);
                } else {
                  FileSystem fs = p.getFileSystem(myConf);
                  resultCs = fs.getContentSummary(p);
                }
                resultMap.put(pathStr, resultCs);
              } catch (IOException e) {
                // We safely ignore this exception for summary data.
                // We don't update the cache to protect it from polluting other
                // usages. The worst case is that IOException will always be
                // retried for another getInputSummary(), which is fine as
                // IOException is not considered as a common case.
                LOG.info("Cannot get size of " + pathStr + ". Safely ignored.");
              }
            }
          };


          if (executor == null) {
            r.run();
          } else {
            Future<?> result = executor.submit(r);
            results.add(result);
          }
        }


        if (executor != null) {
          for (Future<?> result : results) {
            boolean executorDone = false;
            do {
              try {
                result.get();
                executorDone = true;
              } catch (InterruptedException e) {
                LOG.info("Interrupted when waiting threads: ", e);
                Thread.currentThread().interrupt();
                break;
              } catch (ExecutionException e) {
                throw new IOException(e);
              }
            } while (!executorDone);
    }
          executor.shutdown();
        }
        HiveInterruptUtils.checkInterrupted();
        for (Map.Entry<String, ContentSummary> entry : resultMap.entrySet()) {
          ContentSummary cs = entry.getValue();


          summary[0] += cs.getLength();
          summary[1] += cs.getFileCount();
          summary[2] += cs.getDirectoryCount();


          ctx.addCS(entry.getKey(), cs);
          LOG.info("Cache Content Summary for " + entry.getKey() + " length: " + cs.getLength()
              + " file count: "
              + cs.getFileCount() + " directory count: " + cs.getDirectoryCount());
        }


        return new ContentSummary(summary[0], summary[1], summary[2]);
      } finally {
        HiveInterruptUtils.remove(interrup);
      }
    }
  }

View Full Code Here

    }
  }


  public static boolean isEmptyPath(JobConf job, String dirPath, Context ctx)
      throws Exception {
    ContentSummary cs = ctx.getCS(dirPath);
    if (cs != null) {
      LOG.info("Content Summary " + dirPath + "length: " + cs.getLength() + " num files: "
          + cs.getFileCount() + " num directories: " + cs.getDirectoryCount());
      return (cs.getLength() == 0 && cs.getFileCount() == 0 && cs.getDirectoryCount() <= 1);
    } else {
      LOG.info("Content Summary not cached for " + dirPath);
    }
    Path p = new Path(dirPath);
    return isEmptyPath(job, p);

View Full Code Here

    if (desc.getPath() != null) {
      try {
        FileSystem fs = desc.getPath().getFileSystem(new Configuration());
        FileStatus status = fs.getFileStatus(desc.getPath());
        if (desc.getMeta().getStat() != null && (status.isDirectory() || status.isFile())) {
          ContentSummary summary = fs.getContentSummary(desc.getPath());
          if (summary != null) {
            long volume = summary.getLength();
            desc.getMeta().getStat().setNumBytes(volume);
          }
        }
      } catch (Throwable t) {
        LOG.warn(t);

View Full Code Here

      }


      TableMeta meta = subQuery.getTableMeta();
      try {
        FileSystem fs = finalOutputDir.getFileSystem(query.systemConf);
        ContentSummary directorySummary = fs.getContentSummary(finalOutputDir);
        if(meta.getStat() == null) meta.setStat(new TableStat());
        meta.getStat().setNumBytes(directorySummary.getLength());
      } catch (IOException e) {
        LOG.error(e.getMessage(), e);
      }
      TableDesc outputTableDesc = new TableDescImpl(outputTableName, meta, finalOutputDir);
      TableDesc finalTableDesc = outputTableDesc;

View Full Code Here

        .getSpaceConsumed();
    long ns = quota.get(Quota.NAMESPACE);
    long ds = quota.get(Quota.DISKSPACE);
    assertEquals(2, ns); // foo and bar
    assertEquals(currentFileLen * REPLICATION, ds);
    ContentSummary c = dfs.getContentSummary(foo);
    assertEquals(c.getSpaceConsumed(), ds);


    // append another block, the previous file length is not at block boundary
    DFSTestUtil.appendFile(dfs, bar, BLOCKSIZE);
    currentFileLen += BLOCKSIZE;


    quota = fooNode.getDirectoryWithQuotaFeature().getSpaceConsumed();
    ns = quota.get(Quota.NAMESPACE);
    ds = quota.get(Quota.DISKSPACE);
    assertEquals(2, ns); // foo and bar
    assertEquals(currentFileLen * REPLICATION, ds);
    c = dfs.getContentSummary(foo);
    assertEquals(c.getSpaceConsumed(), ds);


    // append several blocks
    DFSTestUtil.appendFile(dfs, bar, BLOCKSIZE * 3 + BLOCKSIZE / 8);
    currentFileLen += (BLOCKSIZE * 3 + BLOCKSIZE / 8);


    quota = fooNode.getDirectoryWithQuotaFeature().getSpaceConsumed();
    ns = quota.get(Quota.NAMESPACE);
    ds = quota.get(Quota.DISKSPACE);
    assertEquals(2, ns); // foo and bar
    assertEquals(currentFileLen * REPLICATION, ds);
    c = dfs.getContentSummary(foo);
    assertEquals(c.getSpaceConsumed(), ds);
  }

View Full Code Here

    HttpURLConnection conn =
      getConnection(Operation.GETCONTENTSUMMARY.getMethod(), params, f, true);
    HttpFSUtils.validateResponse(conn, HttpURLConnection.HTTP_OK);
    JSONObject json = (JSONObject) ((JSONObject)
      HttpFSUtils.jsonParse(conn)).get(CONTENT_SUMMARY_JSON);
    return new ContentSummary((Long) json.get(CONTENT_SUMMARY_LENGTH_JSON),
                              (Long) json.get(CONTENT_SUMMARY_FILE_COUNT_JSON),
                              (Long) json.get(CONTENT_SUMMARY_DIRECTORY_COUNT_JSON),
                              (Long) json.get(CONTENT_SUMMARY_QUOTA_JSON),
                              (Long) json.get(CONTENT_SUMMARY_SPACE_CONSUMED_JSON),
                              (Long) json.get(CONTENT_SUMMARY_SPACE_QUOTA_JSON)

View Full Code Here

    FileSystem fs = FileSystem.get(getProxiedFSConf());
    Path path = new Path(getProxiedFSTestDir(), "foo.txt");
    OutputStream os = fs.create(path);
    os.write(1);
    os.close();
    ContentSummary hdfsContentSummary = fs.getContentSummary(path);
    fs.close();
    fs = getHttpFSFileSystem();
    ContentSummary httpContentSummary = fs.getContentSummary(path);
    fs.close();
    Assert.assertEquals(httpContentSummary.getDirectoryCount(), hdfsContentSummary.getDirectoryCount());
    Assert.assertEquals(httpContentSummary.getFileCount(), hdfsContentSummary.getFileCount());
    Assert.assertEquals(httpContentSummary.getLength(), hdfsContentSummary.getLength());
    Assert.assertEquals(httpContentSummary.getQuota(), hdfsContentSummary.getQuota());
    Assert.assertEquals(httpContentSummary.getSpaceConsumed(), hdfsContentSummary.getSpaceConsumed());
    Assert.assertEquals(httpContentSummary.getSpaceQuota(), hdfsContentSummary.getSpaceQuota());
  }

View Full Code Here

     *
     * @throws IOException thrown if an IO error occured.
     */
    @Override
    public Map execute(FileSystem fs) throws IOException {
      ContentSummary contentSummary = fs.getContentSummary(path);
      return contentSummaryToJSON(contentSummary);
    }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.fs.ContentSummary

co.cask.cdap.internal.app.runtime.distributed.DistributedProgramRuntimeService$ClusterResourceReporter

com.cloudera.sqoop.TestTargetDir

com.hdfsTools.impl.checkImpl

com.linkedin.camus.etl.kafka.CamusJob

com.quantcast.qfs.hadoop.QuantcastFileSystem

com.taobao.zeus.jobs.sub.tool.OutputCheckJob

com.twitter.elephantbird.util.HdfsUtils

org.apache.accumulo.monitor.servlets.DefaultServlet

org.apache.accumulo.server.master.CoordinateRecoveryTask$RecoveryJob

org.apache.accumulo.server.monitor.servlets.DefaultServlet

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.