Package org.apache.hadoop.mapreduce.lib.input

Examples of org.apache.hadoop.mapreduce.lib.input.FileSplit


            @Override
            public void initialize(InputSplit genericSplit, TaskAttemptContext context)
            throws IOException, InterruptedException {

                FileSplit split = (FileSplit) genericSplit;
                Configuration job = context.getConfiguration();
                this.maxLineLength = job.getInt(MRConfiguration.LINERECORDREADER_MAXLENGTH, Integer.MAX_VALUE);
                start = split.getStart();
                end = start + split.getLength();
                final Path file = split.getPath();

                FileSystem fs = file.getFileSystem(job);
                FSDataInputStream fileIn = fs.open(split.getPath());
                boolean skipFirstLine = false;
                if (start != 0) {
                    skipFirstLine = true;
                    --start;
                    fileIn.seek(start);
View Full Code Here


    public void initialize(InputSplit split, TaskAttemptContext context)
  throws IOException, InterruptedException {
      key = new LongWritable();
      value = new Text();
      if (split instanceof FileSplit) {
  FileSplit fsplit = (FileSplit) split;
  originalEnd = fsplit.getStart() + fsplit.getLength();
  Path path = fsplit.getPath();
  long fileEnd = path.getFileSystem(context.getConfiguration()).getFileStatus(path).getLen();
  FileSplit extendedSplit = new FileSplit(path, fsplit.getStart(),
      Math.min(fsplit.getLength() * 10, fileEnd - fsplit.getStart()), fsplit.getLocations());
  this.wrapped.initialize(extendedSplit, context);
      } else {
  throw new RuntimeException("Cannot override a split of type'"+
      split.getClass()+"'");
View Full Code Here

      for (; sl.next(key, value); last = sl.getPosition()) {
        // if adding this split would put this split past the target size,
        // cut the last split and put this next file in the next split.
        if (acc + value.getLen() > targetsize && acc != 0) {
          long splitsize = last - pos;
          FileSplit fileSplit = new FileSplit(listFile, pos, splitsize, null);
          LOG.info ("Creating split : " + fileSplit + ", bytes in split: " + splitsize);
          splits.add(fileSplit);
          cbrem -= splitsize;
          pos = last;
          acc = 0L;
        }
        acc += value.getLen();
      }
    }
    finally {
      IOUtils.closeStream(sl);
    }
    if (cbrem != 0) {
      FileSplit fileSplit = new FileSplit(listFile, pos, cbrem, null);
      LOG.info ("Creating split : " + fileSplit + ", bytes in split: " + cbrem);
      splits.add(fileSplit);
    }

    return splits;
View Full Code Here

    long lastEnd = 0;

    //Verify if each split's start is matching with the previous end and
    //we are not missing anything
    for (InputSplit split : splits) {
      FileSplit fileSplit = (FileSplit) split;
      long start = fileSplit.getStart();
      Assert.assertEquals(lastEnd, start);
      lastEnd = start + fileSplit.getLength();
    }

    //Verify there is nothing more to read from the input file
    FileSystem fs = cluster.getFileSystem();
    SequenceFile.Reader reader
View Full Code Here

                                  List<InputSplit> legacySplits)
      throws IOException, InterruptedException {

    Assert.assertEquals(legacySplits.size(), splits.size());
    for (int index = 0; index < splits.size(); index++) {
      FileSplit fileSplit = (FileSplit) splits.get(index);
      FileSplit legacyFileSplit = (FileSplit) legacySplits.get(index);
      Assert.assertEquals(fileSplit.getStart(), legacyFileSplit.getStart());
    }
  }
View Full Code Here

    List<InputSplit> splits = new ArrayList<InputSplit>(nSplits);
   
    for (int i=0; i< nSplits; ++i) {
      TaskID taskId = new TaskID(jobContext.getJobID(), true, i);
      chunks.get(i).assignTo(taskId);
      splits.add(new FileSplit(chunks.get(i).getPath(), 0,
          // Setting non-zero length for FileSplit size, to avoid a possible
          // future when 0-sized file-splits are considered "empty" and skipped
          // over.
          MIN_RECORDS_PER_CHUNK,
          null));
View Full Code Here

  }

  private void openForRead(TaskAttemptContext taskAttemptContext)
          throws IOException, InterruptedException {
    reader = new SequenceFileRecordReader<K, V>();
    reader.initialize(new FileSplit(chunkFilePath, 0,
            DistCpUtils.getFileSize(chunkFilePath, configuration), null),
            taskAttemptContext);
  }
View Full Code Here

  @SuppressWarnings({ "rawtypes" })
  private void setInputPath(PartitionQuery<K,T> partitionQuery
      , TaskAttemptContext context) throws IOException {
    //if the data store is file based
    if(partitionQuery instanceof FileSplitPartitionQuery) {
      FileSplit split = ((FileSplitPartitionQuery<K,T>)partitionQuery).getSplit();
      //set the input path to FileSplit's path.
      ((FileBackedDataStore)partitionQuery.getDataStore()).setInputPath(
          split.getPath().toString());
    }
  }
View Full Code Here

  @Override
  public void readFields(DataInput in) throws IOException {
    super.readFields(in);
    if(split == null)
      split = new FileSplit(null, 0, 0, null); //change to new FileSplit() once hadoop-core.jar is updated
    split.readFields(in);
  }
View Full Code Here

      for (; sl.next(key, value); last = sl.getPosition()) {
        // if adding this split would put this split past the target size,
        // cut the last split and put this next file in the next split.
        if (acc + value.getLen() > targetsize && acc != 0) {
          long splitsize = last - pos;
          FileSplit fileSplit = new FileSplit(listFile, pos, splitsize, null);
          LOG.info ("Creating split : " + fileSplit + ", bytes in split: " + splitsize);
          splits.add(fileSplit);
          cbrem -= splitsize;
          pos = last;
          acc = 0L;
        }
        acc += value.getLen();
      }
    }
    finally {
      IOUtils.closeStream(sl);
    }
    if (cbrem != 0) {
      FileSplit fileSplit = new FileSplit(listFile, pos, cbrem, null);
      LOG.info ("Creating split : " + fileSplit + ", bytes in split: " + cbrem);
      splits.add(fileSplit);
    }

    return splits;
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapreduce.lib.input.FileSplit

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.