Package org.apache.hadoop.mapreduce

Examples of org.apache.hadoop.mapreduce.InputSplit


   
    // to compute firstIds, process the splits in file order
    long slowest = 0; // duration of slowest map
    int firstId = 0;
    for (int p = 0; p < nbSplits; p++) {
      InputSplit split = splits.get(p);
      int hp = ArrayUtils.indexOf(sorted, split); // hadoop's partition
     
      RecordReader<LongWritable, Text> reader = input.createRecordReader(split, task);
      reader.initialize(split, task);
     
View Full Code Here


    secondOutput = new MockContext(new Step2Mapper(), conf, task.getTaskAttemptID(), numTrees);
    long slowest = 0; // duration of slowest map

    for (int partition = 0; partition < nbSplits; partition++) {
     
      InputSplit split = sorted[partition];
      RecordReader<LongWritable, Text> reader = input.createRecordReader(split, task);

      // load the output of the 1st step
      int nbConcerned = Step2Mapper.nbConcerned(nbSplits, numTrees, partition);
      TreeID[] fsKeys = new TreeID[nbConcerned];
View Full Code Here

    Step0Context context = new Step0Context(new Step0Mapper(), job.getConfiguration(),
                                            new TaskAttemptID(), numMaps);

    for (int p = 0; p < numMaps; p++) {
      InputSplit split = sorted[p];

      RecordReader<LongWritable, Text> reader = input.createRecordReader(split,
                                                                         context);
      reader.initialize(split, context);
View Full Code Here

    TaskAttemptContext context = new TaskAttemptContext(job.getConfiguration(),
        new TaskAttemptID());

    for (int p = 0; p < numMaps; p++) {
      InputSplit split = sorted[p];
      RecordReader<LongWritable, Text> reader = input.createRecordReader(split,
          context);
      reader.initialize(split, context);

      Long firstKey = null;
View Full Code Here

            keys.getFirst()[i] : startRow;
        byte[] splitStop = (stopRow.length == 0 ||
          Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0) &&
          keys.getSecond()[i].length > 0 ?
            keys.getSecond()[i] : stopRow;
        InputSplit split = new TableSplit(table.getTableName(),
          splitStart, splitStop, regionLocation);
        splits.add(split);
        if (LOG.isDebugEnabled())
          LOG.debug("getSplits: split -> " + (count++) + " -> " + split);
      }
View Full Code Here

      long seed = r.nextLong();
      r.setSeed(seed);
      LOG.debug("seed: " + seed);
      // shuffle splits
      for (int i = 0; i < splits.size(); ++i) {
        InputSplit tmp = splits.get(i);
        int j = r.nextInt(splits.size());
        splits.set(i, splits.get(j));
        splits.set(j, tmp);
      }
      // our target rate is in terms of the maximum number of sample splits,
View Full Code Here

    List<InputSplit> splits = defaultInputFormat.getSplits(hiveConf, client);
    System.err.println("getSplits returned " + splits.size() + " splits");

    long numRows = 0;
    for (int i = 0; i < splits.size(); ++i) {
      InputSplit split = splits.get(i);
      TaskAttemptID taskID = new TaskAttemptID();
      TaskAttemptContext taskContext = new TaskAttemptContext(hiveConf, taskID);
      if (i % args.splitPrintPeriod == 0) {
        System.err.println("Handling split " + i + " of " + splits.size());
      }
View Full Code Here

  private static long readSplits(Context context)
  {
    long totalRows = 0;
    while (!context.splitsQueue.isEmpty()) {
      InputSplit split = context.splitsQueue.poll();
      try {
        totalRows += readSplit(split, context);
      } catch (Exception e) {
        System.err.println("Failed to read split " + split);
        e.printStackTrace();
View Full Code Here

    private Text filenameKey;
   
    @Override
    protected void setup(Context context) throws IOException,
        InterruptedException {
      InputSplit split = context.getInputSplit();
      Path path = ((FileSplit) split).getPath();
      filenameKey = new Text(path.toString());
    }
View Full Code Here

  }

  private static void readSplits(Context context)
  {
    while (context.hasMoreSplitsToRead()) {
      InputSplit split = context.splitsQueue.poll();
      try {
        readSplit(split, context);
      } catch (Exception e) {
        LOG.error("Failed to read split {}", split, e);
      }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapreduce.InputSplit

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.