Package org.apache.flink.core.fs

Examples of org.apache.flink.core.fs.FileSystem


    final FileBaseStatistics cachedFileStats = (cachedStats != null && cachedStats instanceof FileBaseStatistics) ?
      (FileBaseStatistics) cachedStats : null;
       
    try {
      final Path path = this.filePath;
      final FileSystem fs = FileSystem.get(path.toUri());
     
      return getFileStats(cachedFileStats, path, fs, new ArrayList<FileStatus>(1));
    } catch (IOException ioex) {
      if (LOG.isWarnEnabled()) {
        LOG.warn("Could not determine statistics for file '" + this.filePath + "' due to an io error: "
View Full Code Here


    // get all the files that are involved in the splits
    List<FileStatus> files = new ArrayList<FileStatus>();
    long totalLength = 0;

    final FileSystem fs = path.getFileSystem();
    final FileStatus pathFile = fs.getFileStatus(path);

    if(!acceptFile(pathFile)) {
      throw new IOException("The given file does not pass the file-filter");
    }
    if (pathFile.isDir()) {
      // input is directory. list all contained files
      final FileStatus[] dir = fs.listStatus(path);
      for (int i = 0; i < dir.length; i++) {
        if (!dir[i].isDir() && acceptFile(dir[i])) {
          files.add(dir[i]);
          totalLength += dir[i].getLen();
          // as soon as there is one deflate file in a directory, we can not split it
          testForUnsplittable(dir[i]);
        }
      }
    } else {
      testForUnsplittable(pathFile);
     
      files.add(pathFile);
      totalLength += pathFile.getLen();
    }
    // returns if unsplittable
    if(unsplittable) {
      int splitNum = 0;
      for (final FileStatus file : files) {
        final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, file.getLen());
        Set<String> hosts = new HashSet<String>();
        for(BlockLocation block : blocks) {
          hosts.addAll(Arrays.asList(block.getHosts()));
        }
        long len = file.getLen();
        if(testForUnsplittable(file)) {
          len = READ_WHOLE_SPLIT_FLAG;
        }
        FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), 0, len,
            hosts.toArray(new String[hosts.size()]));
        inputSplits.add(fis);
      }
      return inputSplits.toArray(new FileInputSplit[inputSplits.size()]);
    }
   

    final long maxSplitSize = (minNumSplits < 1) ? Long.MAX_VALUE : (totalLength / minNumSplits +
          (totalLength % minNumSplits == 0 ? 0 : 1));

    // now that we have the files, generate the splits
    int splitNum = 0;
    for (final FileStatus file : files) {

      final long len = file.getLen();
      final long blockSize = file.getBlockSize();
     
      final long minSplitSize;
      if (this.minSplitSize <= blockSize) {
        minSplitSize = this.minSplitSize;
      }
      else {
        if (LOG.isWarnEnabled()) {
          LOG.warn("Minimal split size of " + this.minSplitSize + " is larger than the block size of " +
            blockSize + ". Decreasing minimal split size to block size.");
        }
        minSplitSize = blockSize;
      }

      final long splitSize = Math.max(minSplitSize, Math.min(maxSplitSize, blockSize));
      final long halfSplit = splitSize >>> 1;

      final long maxBytesForLastSplit = (long) (splitSize * MAX_SPLIT_SIZE_DISCREPANCY);

      if (len > 0) {

        // get the block locations and make sure they are in order with respect to their offset
        final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, len);
        Arrays.sort(blocks);

        long bytesUnassigned = len;
        long position = 0;

        int blockIndex = 0;

        while (bytesUnassigned > maxBytesForLastSplit) {
          // get the block containing the majority of the data
          blockIndex = getBlockIndexForPosition(blocks, position, halfSplit, blockIndex);
          // create a new split
          FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), position, splitSize,
            blocks[blockIndex].getHosts());
          inputSplits.add(fis);

          // adjust the positions
          position += splitSize;
          bytesUnassigned -= splitSize;
        }

        // assign the last split
        if (bytesUnassigned > 0) {
          blockIndex = getBlockIndexForPosition(blocks, position, halfSplit, blockIndex);
          final FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), position,
            bytesUnassigned, blocks[blockIndex].getHosts());
          inputSplits.add(fis);
        }
      } else {
        // special case with a file of zero bytes size
        final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, 0);
        String[] hosts;
        if (blocks.length > 0) {
          hosts = blocks[0].getHosts();
        } else {
          hosts = new String[0];
View Full Code Here

    }

    @Override
    public void run() {
      try {
        final FileSystem fs = FileSystem.get(this.split.getPath().toUri());
        this.fdis = fs.open(this.split.getPath());
       
        // check for canceling and close the stream in that case, because no one will obtain it
        if (this.aborted) {
          final FSDataInputStream f = this.fdis;
          this.fdis = null;
View Full Code Here

    inputFormat.setFilePath(normalizedPath);
    inputFormat.setOpenTimeout(0);
    inputFormat.configure(configuration);

    final FileSystem fs = FileSystem.get(normalizedPath.toUri());
    FileStatus fileStatus = fs.getFileStatus(normalizedPath);

    BlockLocation[] blocks = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
    inputFormat.open(new FileInputSplit(0, new Path(path), 0, fileStatus.getLen(), blocks[0].getHosts()));
    return inputFormat;
  }
View Full Code Here

   */
  @SuppressWarnings("unchecked")
  public static <T, F extends FileInputFormat<T>> List<F> openAllInputs(
      Class<F> inputFormatClass, String path, Configuration configuration) throws IOException {
    Path nephelePath = new Path(path);
    FileSystem fs = nephelePath.getFileSystem();
    FileStatus fileStatus = fs.getFileStatus(nephelePath);
    if (!fileStatus.isDir()) {
      return Arrays.asList(openInput(inputFormatClass, path, configuration));
    }
    FileStatus[] list = fs.listStatus(nephelePath);
    List<F> formats = new ArrayList<F>();
    for (int index = 0; index < list.length; index++) {
      formats.add(openInput(inputFormatClass, list[index].getPath().toString(), configuration));
    }
    return formats;
View Full Code Here

    final String bucketName = getRandomName();
    final Path bucketPath = new Path(S3_BASE_URI + bucketName + Path.SEPARATOR);

    try {

      final FileSystem fs = bucketPath.getFileSystem();

      // Create directory
      fs.mkdirs(bucketPath);

      // Check if directory is correctly displayed in file system hierarchy
      final FileStatus[] content = fs.listStatus(new Path(S3_BASE_URI));
      boolean entryFound = false;
      for (final FileStatus entry : content) {
        if (bucketPath.equals(entry.getPath())) {
          entryFound = true;
          break;
        }
      }

      if (!entryFound) {
        fail("Cannot find entry " + bucketName + " in directory " + S3_BASE_URI);
      }

      // Check the concrete directory file status
      try {
        final FileStatus directoryFileStatus = fs.getFileStatus(bucketPath);
        assertTrue(directoryFileStatus.isDir());
        assertEquals(0L, directoryFileStatus.getAccessTime());
        assertTrue(directoryFileStatus.getModificationTime() > 0L);

      } catch (FileNotFoundException e) {
        fail(e.getMessage());
      }

      // Delete the bucket
      fs.delete(bucketPath, true);

      // Make sure the bucket no longer exists
      try {
        fs.getFileStatus(bucketPath);
        fail("Expected FileNotFoundException for " + bucketPath.toUri());
      } catch (FileNotFoundException e) {
        // This is an expected exception
      }
View Full Code Here

      + subsubdirName + Path.SEPARATOR);
    final Path file = new Path(S3_BASE_URI + dirName + Path.SEPARATOR + subdirName + Path.SEPARATOR + fileName);

    try {

      final FileSystem fs = dir.getFileSystem();

      fs.mkdirs(subsubdir);

      final OutputStream os = fs.create(file, true);
      generateTestData(os, SMALL_FILE_SIZE);
      os.close();

      // On this directory levels there should only be one subdirectory
      FileStatus[] list = fs.listStatus(dir);
      int numberOfDirs = 0;
      int numberOfFiles = 0;
      for (final FileStatus entry : list) {

        if (entry.isDir()) {
          ++numberOfDirs;
          assertEquals(subdir, entry.getPath());
        } else {
          fail(entry.getPath() + " is a file which must not appear on this directory level");
        }
      }

      assertEquals(1, numberOfDirs);
      assertEquals(0, numberOfFiles);

      list = fs.listStatus(subdir);
      numberOfDirs = 0;

      for (final FileStatus entry : list) {
        if (entry.isDir()) {
          assertEquals(subsubdir, entry.getPath());
          ++numberOfDirs;
        } else {
          assertEquals(file, entry.getPath());
          ++numberOfFiles;
        }
      }

      assertEquals(1, numberOfDirs);
      assertEquals(1, numberOfFiles);

      fs.delete(dir, true);

    } catch (IOException ioe) {
      fail(ioe.getMessage());
    }
  }
View Full Code Here

    final Path dir = new Path(S3_BASE_URI + dirName + Path.SEPARATOR);
    final Path file = new Path(S3_BASE_URI + dirName + Path.SEPARATOR + fileName);

    try {

      final FileSystem fs = dir.getFileSystem();

      fs.mkdirs(dir);

      final OutputStream os = fs.create(file, true);
      generateTestData(os, SMALL_FILE_SIZE);
      os.close();

      final FileStatus fileStatus = fs.getFileStatus(file);
      assertNotNull(fileStatus);

      BlockLocation[] blockLocations = fs.getFileBlockLocations(fileStatus, 0, SMALL_FILE_SIZE + 1);
      assertNull(blockLocations);

      blockLocations = fs.getFileBlockLocations(fileStatus, 0, SMALL_FILE_SIZE);
      assertEquals(1, blockLocations.length);

      final BlockLocation bl = blockLocations[0];
      assertNotNull(bl.getHosts());
      assertEquals(1, bl.getHosts().length);
      assertEquals(SMALL_FILE_SIZE, bl.getLength());
      assertEquals(0, bl.getOffset());
      final URI s3Uri = fs.getUri();
      assertNotNull(s3Uri);
      assertEquals(s3Uri.getHost(), bl.getHosts()[0]);

      fs.delete(dir, true);

    } catch (IOException ioe) {
      fail(ioe.getMessage());
    }
  }
View Full Code Here

    final String bucketName = getRandomName();
    final String objectName = getRandomName();
    final Path bucketPath = new Path(S3_BASE_URI + bucketName + Path.SEPARATOR);
    final Path objectPath = new Path(S3_BASE_URI + bucketName + Path.SEPARATOR + objectName);

    FileSystem fs = bucketPath.getFileSystem();

    // Create test bucket
    fs.mkdirs(bucketPath);

    // Write test file to S3
    final FSDataOutputStream outputStream = fs.create(objectPath, false);
    generateTestData(outputStream, fileSize);
    outputStream.close();

    // Now read the same file back from S3
    final FSDataInputStream inputStream = fs.open(objectPath);
    testReceivedData(inputStream, fileSize);
    inputStream.close();

    // Delete test bucket
    fs.delete(bucketPath, true);
  }
View Full Code Here

   
    // get the file info and check whether the cached statistics are still valid.
    for(org.apache.hadoop.fs.Path hadoopPath : hadoopFilePaths) {
     
      final Path filePath = new Path(hadoopPath.toUri());
      final FileSystem fs = FileSystem.get(filePath.toUri());
     
      final FileStatus file = fs.getFileStatus(filePath);
      latestModTime = Math.max(latestModTime, file.getModificationTime());
     
      // enumerate all files and check their modification time stamp.
      if (file.isDir()) {
        FileStatus[] fss = fs.listStatus(filePath);
        files.ensureCapacity(files.size() + fss.length);
       
        for (FileStatus s : fss) {
          if (!s.isDir()) {
            files.add(s);
View Full Code Here

TOP

Related Classes of org.apache.flink.core.fs.FileSystem

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.