Examples of org.apache.hadoop.mapreduce.InputSplit

org.apache.hadoop.mapreduce.InputSplit
InputSplit represents the data to be processed by an individual {@link Mapper}.
Typically, it presents a byte-oriented view on the input and is the responsibility of {@link RecordReader} of the job to process this and presenta record-oriented view. @see InputFormat @see RecordReader

    
    // to compute firstIds, process the splits in file order
    long slowest = 0; // duration of slowest map
    int firstId = 0;
    for (int p = 0; p < nbSplits; p++) {
      InputSplit split = splits.get(p);
      int hp = ArrayUtils.indexOf(sorted, split); // hadoop's partition
      
      RecordReader<LongWritable, Text> reader = input.createRecordReader(split, task);
      reader.initialize(split, task);

View Full Code Here

    secondOutput = new MockContext(new Step2Mapper(), conf, task.getTaskAttemptID(), numTrees);
    long slowest = 0; // duration of slowest map


    for (int partition = 0; partition < nbSplits; partition++) {
      
      InputSplit split = sorted[partition];
      RecordReader<LongWritable, Text> reader = input.createRecordReader(split, task);


      // load the output of the 1st step
      int nbConcerned = Step2Mapper.nbConcerned(nbSplits, numTrees, partition);
      TreeID[] fsKeys = new TreeID[nbConcerned];

View Full Code Here


    Step0Context context = new Step0Context(new Step0Mapper(), job.getConfiguration(),
                                            new TaskAttemptID(), numMaps);


    for (int p = 0; p < numMaps; p++) {
      InputSplit split = sorted[p];


      RecordReader<LongWritable, Text> reader = input.createRecordReader(split,
                                                                         context);
      reader.initialize(split, context);

View Full Code Here


    TaskAttemptContext context = new TaskAttemptContext(job.getConfiguration(),
        new TaskAttemptID());


    for (int p = 0; p < numMaps; p++) {
      InputSplit split = sorted[p];
      RecordReader<LongWritable, Text> reader = input.createRecordReader(split,
          context);
      reader.initialize(split, context);


      Long firstKey = null;

View Full Code Here

            keys.getFirst()[i] : startRow;
        byte[] splitStop = (stopRow.length == 0 ||
          Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0) &&
          keys.getSecond()[i].length > 0 ?
            keys.getSecond()[i] : stopRow;
        InputSplit split = new TableSplit(table.getTableName(),
          splitStart, splitStop, regionLocation);
        splits.add(split);
        if (LOG.isDebugEnabled())
          LOG.debug("getSplits: split -> " + (count++) + " -> " + split);
      }

View Full Code Here

      long seed = r.nextLong();
      r.setSeed(seed);
      LOG.debug("seed: " + seed);
      // shuffle splits
      for (int i = 0; i < splits.size(); ++i) {
        InputSplit tmp = splits.get(i);
        int j = r.nextInt(splits.size());
        splits.set(i, splits.get(j));
        splits.set(j, tmp);
      }
      // our target rate is in terms of the maximum number of sample splits,

View Full Code Here

    List<InputSplit> splits = defaultInputFormat.getSplits(hiveConf, client);
    System.err.println("getSplits returned " + splits.size() + " splits");


    long numRows = 0;
    for (int i = 0; i < splits.size(); ++i) {
      InputSplit split = splits.get(i);
      TaskAttemptID taskID = new TaskAttemptID();
      TaskAttemptContext taskContext = new TaskAttemptContext(hiveConf, taskID);
      if (i % args.splitPrintPeriod == 0) {
        System.err.println("Handling split " + i + " of " + splits.size());
      }

View Full Code Here


  private static long readSplits(Context context)
  {
    long totalRows = 0;
    while (!context.splitsQueue.isEmpty()) {
      InputSplit split = context.splitsQueue.poll();
      try {
        totalRows += readSplit(split, context);
      } catch (Exception e) {
        System.err.println("Failed to read split " + split);
        e.printStackTrace();

View Full Code Here

    private Text filenameKey;
    
    @Override
    protected void setup(Context context) throws IOException,
        InterruptedException {
      InputSplit split = context.getInputSplit();
      Path path = ((FileSplit) split).getPath();
      filenameKey = new Text(path.toString());
    }

View Full Code Here

  }


  private static void readSplits(Context context)
  {
    while (context.hasMoreSplitsToRead()) {
      InputSplit split = context.splitsQueue.poll();
      try {
        readSplit(split, context);
      } catch (Exception e) {
        LOG.error("Failed to read split {}", split, e);
      }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.mapreduce.InputSplit

bulkimport.BulkImportJobExample$VerboseInputSampler$VerboseRandomSampler

co.gridport.kafka.hadoop.KafkaInputFormat

co.nubetech.hiho.merge.MergeKeyMapper

co.nubetech.hiho.merge.MergeValueMapper

com.alimama.mdrill.index.IndexMapper

com.asakusafw.runtime.stage.input.DefaultSplitCombinerTest

com.asakusafw.runtime.stage.input.StageInputSplit

com.asakusafw.testdriver.file.FileInputFormatDriver

com.facebook.giraph.hive.impl.input.benchmark.InputBenchmark

com.facebook.giraph.hive.input.benchmark.InputBenchmark

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.