Package org.apache.hadoop.mapreduce.lib.input

Examples of org.apache.hadoop.mapreduce.lib.input.FileSplit


        long splitSize = computeSplitSize(blockSize, minSize, maxSize);

        long bytesRemaining = length;
        while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
          int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
          splits.add(new FileSplit(path, length - bytesRemaining, splitSize,
              blkLocations[blkIndex].getHosts()));
          bytesRemaining -= splitSize;
        }

        if (bytesRemaining != 0) {
          splits.add(new FileSplit(path, length - bytesRemaining,
              bytesRemaining,
              blkLocations[blkLocations.length - 1].getHosts()));
        }
      } else if (length != 0) {
        splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts()));
      } else {
        //Create empty hosts array for zero length files
        splits.add(new FileSplit(path, 0, length, new String[0]));
      }
    }
    return splits;
  }
View Full Code Here


        long pos = 0;
        int n;
        try {
          while ((n = reader.readLine(key)) > 0) {
            String[] hosts = getStoreDirHosts(fs, path);
            splits.add(new FileSplit(path, pos, n, hosts));
            pos += n;
          }
        } finally {
          reader.close();
        }
View Full Code Here

  public static class InputSplitDetailMapper
    extends Mapper<NullWritable, NullWritable, Text, LongWritable> {
    @Override
    protected void map(NullWritable key, NullWritable value, Context context)
        throws IOException, InterruptedException {
      FileSplit split = (FileSplit)context.getInputSplit();
      context.write(new Text(split.getPath().toString()),
          new LongWritable(split.getLength()));
    }
View Full Code Here

      // The input stream will freak out if we try to seek past the EOF
      if (currentPosition >= fileSize) {
        currentPosition = fileSize;
        endOfFile = true;
        final FileSplit fileSplit = new FileSplit(fileName, splitStart, currentPosition - splitStart, new String[] {});
        splitsList.add(fileSplit);
        break;
      }

      // Every time we seek to the new approximate split point,
      // we need to create a new CSVLineReader around the stream.
      inputStream.seek(currentPosition);
      final CSVLineReader csvLineReader = new CSVLineReader(inputStream, this.bufferSize, this.inputFileEncoding,
          this.openQuoteChar, this.closeQuoteChar, this.escapeChar);

      // This line is potentially garbage because we most likely just sought to
      // the middle of a line. Read the rest of the line and leave it for the
      // previous split. Then reset the multi-line CSV record boolean, because
      // the partial line will have a very high chance of falsely triggering the
      // class wide multi-line logic.
      currentPosition += csvLineReader.readFileLine(new Text());
      csvLineReader.resetMultiLine();

      // Now, we may still be in the middle of a multi-line CSV record.
      currentPosition += csvLineReader.readFileLine(new Text());

      // If we are, read until we are not.
      while (csvLineReader.isInMultiLine()) {
        final int bytesRead = csvLineReader.readFileLine(new Text());
        // End of file
        if (bytesRead <= 0) {
          break;
        }
        currentPosition += bytesRead;
      }

      // We're out of the multi-line CSV record, so it's safe to end the
      // previous split.
      splitsList.add(new FileSplit(fileName, splitStart, currentPosition - splitStart, new String[] {}));
    }

    return splitsList;
  }
View Full Code Here

   * @throws IOException
   *           if an IOException occurs while handling the file to be read
   */
  @Override
  public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException {
    final FileSplit split = (FileSplit) genericSplit;
    final Configuration job = context.getConfiguration();

    start = split.getStart();
    end = start + split.getLength();
    this.pos = start;

    final Path file = split.getPath();
    LOGGER.info("Initializing processing of split for file: " + file);
    LOGGER.info("File size is: " + file.getFileSystem(job).getFileStatus(file).getLen());
    LOGGER.info("Split starts at: " + start);
    LOGGER.info("Split will end at: " + end);

View Full Code Here

  static class RandomInputFormat extends InputFormat<Text, LongWritable> {
    public List<InputSplit> getSplits(JobContext job) throws IOException {
      List<InputSplit> result = new ArrayList<InputSplit>();
      int numSplits = job.getConfiguration().getInt(NUM_MAPS_KEY, NUM_MAPS);
      for (int i = 0; i < numSplits; ++i) {
        result.add(new FileSplit(new Path("/tmp", "dummy-split-" + i), 0, 1, null));
      }
      return result;
    }
View Full Code Here

                    tuple.set(0,key);
                    writer.write(null, tuple);
                }
                writer.close(null);
                int size = (int) tFile.length();
                FileSplit split = new FileSplit(basicTFile, 0, size, null);
                TFileRecordReader reader = new TFileRecordReader();
                reader.initialize(split,
                    HadoopShims.createTaskAttemptContext(
                        conf,
                        HadoopShims.createTaskAttemptID("jt", 1, true, 1, 1)));
View Full Code Here

            StringBuilder sb = new StringBuilder();
            for(int i = 0; i < numPaths; i++) {
              InputSplit wrappedSplit = pigSplit.getWrappedSplit(i);
              if (wrappedSplit instanceof FileSplit) {
                  FileSplit mapInputFileSplit = (FileSplit)wrappedSplit;
                  sb.append("\nInput-split: file=");
                  sb.append(mapInputFileSplit.getPath());
                  sb.append(" start-offset=");
                  sb.append(Long.toString(mapInputFileSplit.getStart()));
                  sb.append(" length=");
                  sb.append(Long.toString(mapInputFileSplit.getLength()));
                  processError(sb.toString());
                  sb.setLength(0);
              }
            }
        }
View Full Code Here

        }

        @Override
        public void initialize(InputSplit split, TaskAttemptContext context)
                throws IOException, InterruptedException {
            FileSplit fSplit = (FileSplit) split;
            Path p = fSplit.getPath();
            location = p.toString();
            LOG.info("location: " + location);   
            conf = context.getConfiguration();
        }
View Full Code Here

            this.readers = new IndexedStorageRecordReader[fileSplits.size()];

            int idx = 0;
            Iterator<FileSplit> it = fileSplits.iterator();
            while (it.hasNext()) {
                FileSplit fileSplit = it.next();
                TaskAttemptContext context = HadoopShims.createTaskAttemptContext(conf, id);
                IndexedStorageRecordReader r = (IndexedStorageRecordReader) inputFormat.createRecordReader(fileSplit, context);
                r.initialize(fileSplit, context);
                this.readers[idx] = r;
                idx++;
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapreduce.lib.input.FileSplit

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.