Examples of org.apache.hadoop.mapreduce.InputSplit

org.apache.hadoop.mapreduce.InputSplit
InputSplit represents the data to be processed by an individual {@link Mapper}.
Typically, it presents a byte-oriented view on the input and is the responsibility of {@link RecordReader} of the job to process this and presenta record-oriented view. @see InputFormat @see RecordReader


    context = new Step0Context(new Step0Mapper(), job.getConfiguration(),
        new TaskAttemptID(), numMaps);


    for (int p = 0; p < numMaps; p++) {
      InputSplit split = sorted[p];


      RecordReader<LongWritable, Text> reader = input.createRecordReader(split,
          context);
      reader.initialize(split, context);

View Full Code Here


    TaskAttemptContext context = new TaskAttemptContext(job.getConfiguration(),
        new TaskAttemptID());


    for (int p = 0; p < numMaps; p++) {
      InputSplit split = sorted[p];
      RecordReader<LongWritable, Text> reader = input.createRecordReader(split,
          context);
      reader.initialize(split, context);


      Long firstKey = null;

View Full Code Here

          inputDesc.getNumSplits(), partitionNum+1, Iterables.size(partitions),
          inputPartition.getInputSplitData().getPartitionValues(), baseSplits.length,
          baseInputFormat.getClass().getCanonicalName());


      for (org.apache.hadoop.mapred.InputSplit baseSplit : baseSplits)  {
        InputSplit split = new HInputSplit(baseInputFormat, baseSplit,
            tableSchema, columnIds, inputPartition.getInputSplitData(), conf);
        splits.add(split);
      }


      partitionNum++;

View Full Code Here

        List<List<InputSplit>> result = new ArrayList<List<InputSplit>>();
        List<Long> resultLengths = new ArrayList<Long>();
        long comparableSplitId = 0;
        
        int size = 0, nSplits = oneInputSplits.size();
        InputSplit lastSplit = null;
        int emptyCnt = 0;
        for (InputSplit split : oneInputSplits) {
            if (split.getLength() == 0) {
                emptyCnt++; 
                continue;

View Full Code Here

            keys.getFirst()[i] : startRow;
        byte[] splitStop = (stopRow.length == 0 || 
          Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0) &&
          keys.getSecond()[i].length > 0 ? 
            keys.getSecond()[i] : stopRow;
        InputSplit split = new TableSplit(table.getTableName(),
          splitStart, splitStop, regionLocation);
        splits.add(split);
        if (LOG.isDebugEnabled()) 
          LOG.debug("getSplits: split -> " + (count++) + " -> " + split);
      }

View Full Code Here

                    .getFirst()[i] : startRow;
            byte[] splitStop =
                (stopRow.length == 0 || Bytes.compareTo(keys.getSecond()[i],
                    stopRow) <= 0) && keys.getSecond()[i].length > 0 ? keys
                    .getSecond()[i] : stopRow;
            InputSplit split =
                new TableSplit(table.getName(),
                    scan, splitStart, splitStop, regionLocation);
            splits.add(split);
            if (LOG.isDebugEnabled())
              LOG.debug("getSplits: split -> " + (count++) + " -> " + split);

View Full Code Here

      long seed = r.nextLong();
      r.setSeed(seed);
      LOG.debug("seed: " + seed);
      // shuffle splits
      for (int i = 0; i < splits.size(); ++i) {
        InputSplit tmp = splits.get(i);
        int j = r.nextInt(splits.size());
        splits.set(i, splits.get(j));
        splits.set(j, tmp);
      }
      // our target rate is in terms of the maximum number of sample splits,

View Full Code Here

    
    String[] inpSlitsToRead = args[1].split(",");
    List<InputSplit> splits = cntxt.getSplits();
    
    for (int i = 0; i < inpSlitsToRead.length; i++){
      InputSplit split = splits.get(Integer.parseInt(inpSlitsToRead[i]));
      HCatReader reader = DataTransferFactory.getHCatReader(split, cntxt.getConf());
      Iterator<HCatRecord> itr = reader.read();
      File f = new File(args[2]+"-"+i);
      f.delete();
      BufferedWriter outFile = new BufferedWriter(new FileWriter(f));

View Full Code Here

    JobContext jobContext = mock(JobContext.class);
    when(jobContext.getConfiguration()).thenReturn(conf);
    List<InputSplit> splits = fif.getSplits(jobContext);
    assertEquals(8, splits.size());
    for (int i = 0 ; i < splits.size() ; i++) {
      InputSplit split = splits.get(i);
      assertEquals(("host" + i), split.getLocations()[0]);
    }
  }

View Full Code Here

    JobContext jobContext = mock(JobContext.class);
    when(jobContext.getConfiguration()).thenReturn(conf);
    List<InputSplit> splits = fif.getSplits(jobContext);
    assertEquals(8, splits.size());
    for (int i = 0; i < splits.size(); i++) {
      InputSplit split = splits.get(i);
      assertEquals(("host" + i), split.getLocations()[0]);
    }
  }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.mapreduce.InputSplit

bulkimport.BulkImportJobExample$VerboseInputSampler$VerboseRandomSampler

co.gridport.kafka.hadoop.KafkaInputFormat

co.nubetech.hiho.merge.MergeKeyMapper

co.nubetech.hiho.merge.MergeValueMapper

com.alimama.mdrill.index.IndexMapper

com.asakusafw.runtime.stage.input.DefaultSplitCombinerTest

com.asakusafw.runtime.stage.input.StageInputSplit

com.asakusafw.testdriver.file.FileInputFormatDriver

com.facebook.giraph.hive.impl.input.benchmark.InputBenchmark

com.facebook.giraph.hive.input.benchmark.InputBenchmark

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.