Examples of org.apache.hadoop.fs.PathFilter

org.apache.hadoop.fs.PathFilter

   * @throws IOException When scanning the files fails.
   */
  static List<Path> getStoreFiles(FileSystem fs, Path regionDir)
      throws IOException {
    List<Path> res = new ArrayList<Path>();
    PathFilter dirFilter = new FSUtils.DirFilter(fs);
    FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter);
    for(FileStatus dir : familyDirs) {
      FileStatus[] files = fs.listStatus(dir.getPath());
      for (FileStatus file : files) {
        if (!file.isDir()) {

View Full Code Here

    int dsup = globs.length;
    for(int d=0; d<dsup; d++) {
      String leafName = globs[d].getName();
      LOG.info("StreamInputFormat: globs[" + d + "] leafName = " + leafName);
      Path[] paths; Path dir;
    PathFilter filter = new GlobFilter(fs, leafName);
    dir = new Path(globs[d].getParent().toString());
      if(dir == null) dir = new Path(".");
    paths = fs.listPaths(dir, filter);
      list.addAll(Arrays.asList(paths));
    }

View Full Code Here


    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
      filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);


    for (Path p : dirs) {
      FileSystem fs = p.getFileSystem(job.getConfiguration());


      FileStatus[] matches = null;

View Full Code Here


      // Look for reference files.  Call listStatus with an anonymous
      // instance of PathFilter.


      FileStatus [] ps = master.fs.listStatus(p,
          new PathFilter () {
            public boolean accept(Path path) {
              return HStore.isReference(path);
            }
          }
      );

View Full Code Here

  static Schema getSchemaFromPath(Path path, Configuration conf) {
    DataFileReader reader = null;
    try {
      FileSystem fs = FileSystem.get(conf);
      if (!fs.isFile(path)) {
        FileStatus[] fstat = fs.listStatus(path, new PathFilter() {
          @Override
          public boolean accept(Path path) {
            String name = path.getName();
            return !name.startsWith("_") && !name.startsWith(".");
          }

View Full Code Here

      totalBytes += it.next().getSize();
    }
    long goalSize = totalBytes / (numSplits < 1 ? 1 : numSplits);
    StringBuilder sb = new StringBuilder();
    boolean first = true;
    PathFilter filter = null;
    List<BasicTable.Reader> realReaders = new ArrayList<BasicTable.Reader>();


    for (int i = 0; i < readers.size(); ++i) {
      BasicTable.Reader reader = readers.get(i);
      /* Get the index of the column group that will be used for row-split.*/
      int splitCGIndex = reader.getRowSplitCGIndex();
      
      /* We can create input splits only if there does exist a valid column group for split.
       * Otherwise, we do not create input splits. */
      if (splitCGIndex >= 0) {        
        realReaders.add(reader);
        if (first)
        {
          // filter is identical across tables
          filter = reader.getPathFilter(conf);
          first = false;
        } else
          sb.append(",");
        sb.append(reader.getPath().toString() + "/" + reader.getName(splitCGIndex));
      }
    }
    
    DummyFileInputFormat helper = new DummyFileInputFormat(minSplitSize, realReaders);


    if (!realReaders.isEmpty())
    {
      DummyFileInputFormat.setInputPaths(conf, sb.toString());
      DummyFileInputFormat.setInputPathFilter(conf, filter.getClass());
      InputSplit[] inputSplits = helper.getSplits(conf, (numSplits < 1 ? 1 : numSplits));


      int batchesPerSplit = inputSplits.length / (numSplits < 1 ? 1 : numSplits);
      if (batchesPerSplit <= 0)
        batchesPerSplit = 1;

View Full Code Here

    {
      totalBytes += it.next().getSize();
    }
    StringBuilder sb = new StringBuilder();
    boolean first = true;
    PathFilter filter = null;
    List<BasicTable.Reader> realReaders = new ArrayList<BasicTable.Reader>();
    int[] realReaderIndices = new int[readers.size()];


    for (int i = 0; i < readers.size(); ++i) {
      BasicTable.Reader reader = readers.get(i);
      /* Get the index of the column group that will be used for row-split.*/
      int splitCGIndex = reader.getRowSplitCGIndex();
      
      /* We can create input splits only if there does exist a valid column group for split.
       * Otherwise, we do not create input splits. */
      if (splitCGIndex >= 0) {
        realReaderIndices[realReaders.size()] = i;
        realReaders.add(reader);
         if (first)
         {
           // filter is identical across tables
           filter = reader.getPathFilter(conf);
           first = false;
         } else
           sb.append(",");
         sb.append(reader.getPath().toString() + "/" + reader.getName(splitCGIndex));
       }
     }
     
     DummyFileInputFormat helper = new DummyFileInputFormat(job,minSplitSize, realReaders);
 
     if (!realReaders.isEmpty())
     {
       DummyFileInputFormat.setInputPaths(job, sb.toString());
       DummyFileInputFormat.setInputPathFilter(job, filter.getClass());
       List<InputSplit> inputSplitList = helper.getSplits(job);
       InputSplit[] inputSplits = inputSplitList.toArray(new InputSplit[0]);
 
       /*
        * Potential file batching optimizations include:

View Full Code Here

      
      // creates a MultiPathFilter with the hiddenFileFilter and the
      // user provided one (if any).
      List<PathFilter> filters = new ArrayList<PathFilter>();
      filters.add(hiddenFileFilter);
      PathFilter jobFilter = getInputPathFilter(jobContext);
      if (jobFilter != null) {
        filters.add(jobFilter);
      }
      PathFilter inputFilter = new MultiPathFilter(filters);


      ArrayList<Integer> fileNumberList  = new ArrayList<Integer>();
      int index = 0;
      for (Path p: dirs) {
        FileSystem fs = p.getFileSystem(job);

View Full Code Here

      
      // creates a MultiPathFilter with the hiddenFileFilter and the
      // user provided one (if any).
      List<PathFilter> filters = new ArrayList<PathFilter>();
      filters.add(hiddenFileFilter);
      PathFilter jobFilter = getInputPathFilter(job);
      if (jobFilter != null) {
        filters.add(jobFilter);
      }
      PathFilter inputFilter = new MultiPathFilter(filters);


      ArrayList<Integer> fileNumberList  = new ArrayList<Integer>();
      int index = 0;
      for (Path p: dirs) {
        FileSystem fs = p.getFileSystem(job);

View Full Code Here

            for(String path: inputPaths) {
                // Implied stuff, but good implied stuff
                if(path.endsWith(LATEST_SUFFIX)) {
                    FileSystem fs = FileSystem.get(conf);


                    PathFilter filter = new PathFilter() {


                        @Override
                        public boolean accept(Path arg0) {
                            return !arg0.getName().startsWith("_")
                                   && !arg0.getName().startsWith(".");

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.fs.PathFilter

com.aamend.hadoop.clustering.job.CanopyDriver

com.alexholmes.hadooputils.test.TextIOJobBuilder

com.bah.geterdun.WriteAheadLogManager

com.linkedin.camus.etl.kafka.CamusJob

com.m6d.filecrush.crush.Crush

com.twitter.elephantbird.mapreduce.output.LuceneIndexOutputFormat

com.twitter.elephantbird.util.HdfsUtils

com.twitter.elephantbird.util.TestPathFilters

org.apache.accumulo.server.upgrade.UpgradeMetadataTable

org.apache.blur.manager.writer.IndexImporter

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.