Examples of org.apache.hadoop.mapreduce.lib.input.CombineFileSplit

Package org.apache.hadoop.mapreduce.lib.input

Examples of org.apache.hadoop.mapreduce.lib.input.CombineFileSplit

org.apache.hadoop.mapreduce.lib.input.CombineFileSplit
A sub-collection of input files. Unlike {@link FileSplit}, CombineFileSplit class does not represent a split of a file, but a split of input files into smaller sets. A split may contain blocks from different file but all the blocks in the same split are probably local to some rack
CombineFileSplit can be used to implement {@link RecordReader}'s, with reading one record per file. @see FileSplit @see CombineFileInputFormat

    Arrays.fill(start, 0L);
    Arrays.fill(len, BLOCK);


    final ByteArrayOutputStream out = fillVerif();
    final FileQueue q =
      new FileQueue(new CombineFileSplit(paths, start, len, loc), conf);
    final byte[] verif = out.toByteArray();
    final byte[] check = new byte[2 * NFILES * BLOCK];
    q.read(check, 0, NFILES * BLOCK);
    assertArrayEquals(verif, Arrays.copyOf(check, NFILES * BLOCK));

View Full Code Here

    for (int i = 0; i < NFILES; i += 2) {
      start[i] += B2;
      len[i] -= B2;
    }
    final FileQueue q =
      new FileQueue(new CombineFileSplit(paths, start, len, loc), conf);
    final ByteArrayOutputStream out = fillVerif();
    final byte[] verif = out.toByteArray();
    final byte[] check = new byte[NFILES / 2 * BLOCK + NFILES / 2 * B2];
    q.read(check, 0, verif.length);
    assertArrayEquals(verif, Arrays.copyOf(check, verif.length));

View Full Code Here


  @Test
  public void testEmpty() throws Exception {
    final Configuration conf = new Configuration();
    // verify OK if unused
    final FileQueue q = new FileQueue(new CombineFileSplit(
          new Path[0], new long[0], new long[0], new String[0]), conf);
  }

View Full Code Here

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException 
    {
      List<CharSequence> inputSplits = new ArrayList<CharSequence>();
      
      CombineFileSplit fileSplit = (CombineFileSplit) context.getInputSplit();
                  
      for (Path path : fileSplit.getPaths())
      {        
        inputSplits.add(path.toString());
      }
      
      String line = value.toString();

View Full Code Here

    Collections.sort(sort, hostRank);
    final String[] hosts = new String[Math.min(nLocs, sort.size())];
    for (int i = 0; i < nLocs && i < sort.size(); ++i) {
      hosts[i] = sort.get(i).getKey();
    }
    return new CombineFileSplit(paths.toArray(new Path[0]),
        toLongArray(start), toLongArray(length), hosts);
  }

View Full Code Here

    writer.close();
    
    compressedFile = compressedFile.suffix(".gz");
    // now read back the data from the compressed stream using FileQueue
    long fileSize = lfs.listStatus(compressedFile)[0].getLen();
    CombineFileSplit split = 
      new CombineFileSplit(new Path[] {compressedFile}, new long[] {fileSize});
    FileQueue queue = new FileQueue(split, conf);
    byte[] bytes = new byte[inputLine.getBytes().length];
    queue.read(bytes);
    queue.close();
    String readLine = new String(bytes);

View Full Code Here

    Path[] files = {new Path("one"), new Path("two")};
    long[] start = {1, 2};
    long[] lengths = {100, 200};
    String[] locations = {"locOne", "loctwo"};


    CombineFileSplit cfSplit = new CombineFileSplit(files, start, lengths,
            locations);
    ResourceUsageMetrics metrics = new ResourceUsageMetrics();
    metrics.setCumulativeCpuUsage(200);


    double[] reduceBytes = {8.1d, 8.2d};

View Full Code Here

    Path[] files = {new Path("one"), new Path("two")};
    long[] start = {1, 2};
    long[] lengths = {100, 200};
    String[] locations = {"locOne", "loctwo"};


    CombineFileSplit cfSplit = new CombineFileSplit(files, start, lengths,
            locations);
    ResourceUsageMetrics metrics = new ResourceUsageMetrics();
    metrics.setCumulativeCpuUsage(200);
    ResourceUsageMetrics[] rMetrics = {metrics};

View Full Code Here

    Path[] paths = {p1, p2};


    long[] start = {0, 0};
    long[] lengths = {1000, 1000};
    String[] locations = {"temp1", "temp2"};
    CombineFileSplit cfsplit = new CombineFileSplit(paths, start, lengths,
            locations);
    double[] reduceBytes = {100, 100};
    double[] reduceRecords = {2, 2};
    long[] reduceOutputBytes = {500, 500};
    long[] reduceOutputRecords = {2, 2};

View Full Code Here

    addAllSplits(arg0, Arrays.asList(getInputPaths(arg0)), splits, fs);


    List<InputSplit> cleanedSplits = new ArrayList<InputSplit>();


    for (int i = 0; i < splits.size(); i++) {
      CombineFileSplit oldSplit = (CombineFileSplit) splits.get(i);
      String[] locations = oldSplit.getLocations();


      if (locations.length > 10)
        locations = Arrays.copyOf(locations, 10);


      cleanedSplits.add(new CombineFileSplit(oldSplit.getPaths(), oldSplit.getStartOffsets(), oldSplit.getLengths(),
          locations));
    }
    return cleanedSplits;
  }

View Full Code Here

0 1 2

TOP

Related Classes of org.apache.hadoop.mapreduce.lib.input.CombineFileSplit

com.alexholmes.hadooputils.combine.common.mapreduce.SplitMetricsCombineInputFormat

com.linkedin.camus.sweeper.mapreduce.AvroKeyCombineFileInputFormat

com.linkedin.whiteelephant.parsing.ParseJobsFromLogs$TheMapper

org.apache.hadoop.fs.Path

org.apache.hadoop.mapred.gridmix.InputStriper

org.apache.hadoop.mapred.gridmix.TestCompressionEmulationUtils

org.apache.hadoop.mapred.gridmix.TestFileQueue

org.apache.hadoop.mapred.gridmix.TestGridMixClasses

org.apache.sqoop.mapreduce.ExportInputFormat

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.