Examples of org.apache.hadoop.mapreduce.lib.input.FileSplit

Package org.apache.hadoop.mapreduce.lib.input

Examples of org.apache.hadoop.mapreduce.lib.input.FileSplit

org.apache.hadoop.mapreduce.lib.input.FileSplit
A section of an input file. Returned by {@link InputFormat#getSplits(JobContext)} and passed to{@link InputFormat#createRecordReader(InputSplit,TaskAttemptContext)}.

    List<InputSplit> splits = new ArrayList<InputSplit>(nSplits);
    
    for (int i=0; i< nSplits; ++i) {
      TaskID taskId = new TaskID(jobContext.getJobID(), TaskType.MAP, i);
      chunks.get(i).assignTo(taskId);
      splits.add(new FileSplit(chunks.get(i).getPath(), 0,
          // Setting non-zero length for FileSplit size, to avoid a possible
          // future when 0-sized file-splits are considered "empty" and skipped
          // over.
          getMinRecordsPerChunk(jobContext.getConfiguration()),
          null));

View Full Code Here

  }


  private void openForRead(TaskAttemptContext taskAttemptContext)
          throws IOException, InterruptedException {
    reader = new SequenceFileRecordReader<K, V>();
    reader.initialize(new FileSplit(chunkFilePath, 0,
            DistCpUtils.getFileSize(chunkFilePath, configuration), null),
            taskAttemptContext);
  }

View Full Code Here

    long lastEnd = 0;


    //Verify if each split's start is matching with the previous end and
    //we are not missing anything
    for (InputSplit split : splits) {
      FileSplit fileSplit = (FileSplit) split;
      long start = fileSplit.getStart();
      Assert.assertEquals(lastEnd, start);
      lastEnd = start + fileSplit.getLength();
    }


    //Verify there is nothing more to read from the input file
    SequenceFile.Reader reader
            = new SequenceFile.Reader(cluster.getFileSystem().getConf(),

View Full Code Here

      reader = getListingFileReader(configuration);
      while (reader.next(srcRelPath, srcFileStatus)) {
        // If adding the current file would cause the bytes per map to exceed
        // limit. Add the current file to new split
        if (currentSplitSize + srcFileStatus.getLen() > nBytesPerSplit && lastPosition != 0) {
          FileSplit split = new FileSplit(listingFilePath, lastSplitStart,
              lastPosition - lastSplitStart, null);
          if (LOG.isDebugEnabled()) {
            LOG.debug ("Creating split : " + split + ", bytes in split: " + currentSplitSize);
          }
          splits.add(split);
          lastSplitStart = lastPosition;
          currentSplitSize = 0;
        }
        currentSplitSize += srcFileStatus.getLen();
        lastPosition = reader.getPosition();
      }
      if (lastPosition > lastSplitStart) {
        FileSplit split = new FileSplit(listingFilePath, lastSplitStart,
            lastPosition - lastSplitStart, null);
        if (LOG.isDebugEnabled()) {
          LOG.info ("Creating split : " + split + ", bytes in split: " + currentSplitSize);
        }
        splits.add(split);

View Full Code Here

    public KryoRecordReader() {
    }


    @Override
    public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException {
        final FileSplit split = (FileSplit) genericSplit;
        final Configuration job = context.getConfiguration();
        long start = split.getStart();
        final Path file = split.getPath();
        if (null != new CompressionCodecFactory(job).getCodec(file)) {
            throw new IllegalStateException("Compression is not supported for the (binary) Gremlin Kryo format");
        }
        // open the file and seek to the start of the split
        this.inputStream = file.getFileSystem(job).open(split.getPath());
        this.inputStream.seek(start);
        final long newStart = seekToHeader(this.inputStream, start);
        this.vertexStreamIterator = new VertexStreamIterator(this.inputStream, split.getLength() - (newStart - start), KryoReader.build().create());
    }

View Full Code Here

      return progress;
    }
    
    @Override
    public void initialize(InputSplit is, TaskAttemptContext context) throws IOException, InterruptedException {
      FileSplit fileSplit = (FileSplit) is;
      
      Configuration conf = new Configuration();
      FileSystem fs = FileSystem.get(conf);
      
      key = new LogFileKey();
      value = new LogFileValue();
      
      fsdis = fs.open(fileSplit.getPath());
      FileStatus status = fs.getFileStatus(fileSplit.getPath());
      length = status.getLen();
    }

View Full Code Here

    }


    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
      if (first) {
        FileSplit split = (FileSplit) context.getInputSplit();
        Path path = split.getPath(); // current split path
        lvalue.set(path.getName());
        lkey.set(key.get());
        context.write(lkey, lvalue);


        first = false;

View Full Code Here

      private final FileSplit fileSplit;


      @SuppressWarnings("unused")
      public SequenceFileRecordReaderWrapper(CombineFileSplit split, TaskAttemptContext context, Integer index)
          throws IOException {
        fileSplit = new FileSplit(split.getPath(index), split.getOffset(index), split.getLength(index),
            split.getLocations());
        delegate = new SequenceFileInputFormat<Writable, Text>().createRecordReader(fileSplit, context);
      }

View Full Code Here

  }


  protected Path getCurrentFile(Context context) throws IOException {
    InputSplit split = context.getInputSplit();
    if (split != null && split instanceof FileSplit) {
      FileSplit inputSplit = (FileSplit) split;
      Path path = inputSplit.getPath();
      return path.makeQualified(path.getFileSystem(context.getConfiguration()));
    }
    return null;
  }

View Full Code Here

      private final FileSplit fileSplit;


    @SuppressWarnings("unused")
    public SequenceFileRecordReaderWrapper(CombineFileSplit split,
            TaskAttemptContext context, Integer index) throws IOException{
            fileSplit = new FileSplit(split.getPath(index),
                      split.getOffset(index), split.getLength(index),
                      split.getLocations());
            delegate = new SequenceFileInputFormat<Writable,Text>().createRecordReader(fileSplit, context);
        }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.mapreduce.lib.input.FileSplit

com.asakusafw.runtime.stage.input.TemporaryInputFormat

com.asakusafw.runtime.stage.input.TemporaryInputFormatTest

com.chine.kmeans.mapreduce.dataprep.DataPrepMapper

com.datasalt.pangool.tuplemr.mapred.lib.input.TupleInputFormat$TupleInputReader

com.datasalt.pangool.tuplemr.mapred.lib.input.TupleTextInputFormat$TupleTextInputReader

com.hadoop.mapreduce.LzoLineRecordReader

com.hadoop.mapreduce.LzoSplitRecordReader

com.hadoop.mapreduce.LzoTextInputFormat

com.mongodb.hadoop.splitter.BSONSplitter

com.thinkaurelius.faunus.formats.graphson.GraphSONRecordReaderTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.