Package org.apache.hadoop.mapreduce.lib.input

Examples of org.apache.hadoop.mapreduce.lib.input.FileSplit


    List<InputSplit> splits = new ArrayList<InputSplit>(nSplits);
   
    for (int i=0; i< nSplits; ++i) {
      TaskID taskId = new TaskID(jobContext.getJobID(), TaskType.MAP, i);
      chunks.get(i).assignTo(taskId);
      splits.add(new FileSplit(chunks.get(i).getPath(), 0,
          // Setting non-zero length for FileSplit size, to avoid a possible
          // future when 0-sized file-splits are considered "empty" and skipped
          // over.
          getMinRecordsPerChunk(jobContext.getConfiguration()),
          null));
View Full Code Here


  }

  private void openForRead(TaskAttemptContext taskAttemptContext)
          throws IOException, InterruptedException {
    reader = new SequenceFileRecordReader<K, V>();
    reader.initialize(new FileSplit(chunkFilePath, 0,
            DistCpUtils.getFileSize(chunkFilePath, configuration), null),
            taskAttemptContext);
  }
View Full Code Here

    long lastEnd = 0;

    //Verify if each split's start is matching with the previous end and
    //we are not missing anything
    for (InputSplit split : splits) {
      FileSplit fileSplit = (FileSplit) split;
      long start = fileSplit.getStart();
      Assert.assertEquals(lastEnd, start);
      lastEnd = start + fileSplit.getLength();
    }

    //Verify there is nothing more to read from the input file
    SequenceFile.Reader reader
            = new SequenceFile.Reader(cluster.getFileSystem().getConf(),
View Full Code Here

      reader = getListingFileReader(configuration);
      while (reader.next(srcRelPath, srcFileStatus)) {
        // If adding the current file would cause the bytes per map to exceed
        // limit. Add the current file to new split
        if (currentSplitSize + srcFileStatus.getLen() > nBytesPerSplit && lastPosition != 0) {
          FileSplit split = new FileSplit(listingFilePath, lastSplitStart,
              lastPosition - lastSplitStart, null);
          if (LOG.isDebugEnabled()) {
            LOG.debug ("Creating split : " + split + ", bytes in split: " + currentSplitSize);
          }
          splits.add(split);
          lastSplitStart = lastPosition;
          currentSplitSize = 0;
        }
        currentSplitSize += srcFileStatus.getLen();
        lastPosition = reader.getPosition();
      }
      if (lastPosition > lastSplitStart) {
        FileSplit split = new FileSplit(listingFilePath, lastSplitStart,
            lastPosition - lastSplitStart, null);
        if (LOG.isDebugEnabled()) {
          LOG.info ("Creating split : " + split + ", bytes in split: " + currentSplitSize);
        }
        splits.add(split);
View Full Code Here

    public KryoRecordReader() {
    }

    @Override
    public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException {
        final FileSplit split = (FileSplit) genericSplit;
        final Configuration job = context.getConfiguration();
        long start = split.getStart();
        final Path file = split.getPath();
        if (null != new CompressionCodecFactory(job).getCodec(file)) {
            throw new IllegalStateException("Compression is not supported for the (binary) Gremlin Kryo format");
        }
        // open the file and seek to the start of the split
        this.inputStream = file.getFileSystem(job).open(split.getPath());
        this.inputStream.seek(start);
        final long newStart = seekToHeader(this.inputStream, start);
        this.vertexStreamIterator = new VertexStreamIterator(this.inputStream, split.getLength() - (newStart - start), KryoReader.build().create());
    }
View Full Code Here

      return progress;
    }
   
    @Override
    public void initialize(InputSplit is, TaskAttemptContext context) throws IOException, InterruptedException {
      FileSplit fileSplit = (FileSplit) is;
     
      Configuration conf = new Configuration();
      FileSystem fs = FileSystem.get(conf);
     
      key = new LogFileKey();
      value = new LogFileValue();
     
      fsdis = fs.open(fileSplit.getPath());
      FileStatus status = fs.getFileStatus(fileSplit.getPath());
      length = status.getLen();
    }
View Full Code Here

    }

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
      if (first) {
        FileSplit split = (FileSplit) context.getInputSplit();
        Path path = split.getPath(); // current split path
        lvalue.set(path.getName());
        lkey.set(key.get());
        context.write(lkey, lvalue);

        first = false;
View Full Code Here

      private final FileSplit fileSplit;

      @SuppressWarnings("unused")
      public SequenceFileRecordReaderWrapper(CombineFileSplit split, TaskAttemptContext context, Integer index)
          throws IOException {
        fileSplit = new FileSplit(split.getPath(index), split.getOffset(index), split.getLength(index),
            split.getLocations());
        delegate = new SequenceFileInputFormat<Writable, Text>().createRecordReader(fileSplit, context);
      }
View Full Code Here

  }

  protected Path getCurrentFile(Context context) throws IOException {
    InputSplit split = context.getInputSplit();
    if (split != null && split instanceof FileSplit) {
      FileSplit inputSplit = (FileSplit) split;
      Path path = inputSplit.getPath();
      return path.makeQualified(path.getFileSystem(context.getConfiguration()));
    }
    return null;
  }
View Full Code Here

      private final FileSplit fileSplit;

    @SuppressWarnings("unused")
    public SequenceFileRecordReaderWrapper(CombineFileSplit split,
            TaskAttemptContext context, Integer index) throws IOException{
            fileSplit = new FileSplit(split.getPath(index),
                      split.getOffset(index), split.getLength(index),
                      split.getLocations());
            delegate = new SequenceFileInputFormat<Writable,Text>().createRecordReader(fileSplit, context);
        }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapreduce.lib.input.FileSplit

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.