Package org.apache.hadoop.mapreduce.lib.input

Examples of org.apache.hadoop.mapreduce.lib.input.FileSplit


        long pos = 0;
        int n;
        try {
          while ((n = reader.readLine(key)) > 0) {
            String[] hosts = getStoreDirHosts(fs, path);
            splits.add(new FileSplit(path, pos, n, hosts));
            pos += n;
          }
        } finally {
          reader.close();
        }
View Full Code Here


    private FSDataInputStream fileIn = null;
    private static InterSedes sedes = InterSedesFactory.getInterSedesInstance();

    public void initialize(InputSplit genericSplit, TaskAttemptContext context)
                    throws IOException {
        FileSplit split = (FileSplit) genericSplit;
        Configuration job = context.getConfiguration();
        start = split.getStart();
        end = start + split.getLength();
        final Path file = split.getPath();

        // open the file and seek to the start of the split
        FileSystem fs = file.getFileSystem(job);
        fileIn = fs.open(split.getPath());
        reader = new Reader(fileIn, fs.getFileStatus(file).getLen(), job);
        scanner = reader.createScannerByByteRange(start, split.getLength());
    }
View Full Code Here

  public static final int RECORD_3 = 0x03;
  private DataInputStream inData = null;

  public void initialize(InputSplit genericSplit,
                         TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    if (start != 0) {
        fileIn.seek(start);
    }
    in = new BufferedPositionedInputStream(fileIn, start);
    inData = new DataInputStream(in);
View Full Code Here

  private DataInputStream inData = null;
  private static InterSedes sedes = InterSedesFactory.getInterSedesInstance();

  public void initialize(InputSplit genericSplit,
                         TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    if (start != 0) {
        fileIn.seek(start);
    }
    in = new BufferedPositionedInputStream(fileIn, start);
    inData = new DataInputStream(in);
View Full Code Here

     
      ByteRange ret = new ByteRange(val);
     
      InputSplit inSplit = context.getInputSplit();
      if(inSplit instanceof FileSplit) {
        FileSplit fs = (FileSplit) inSplit;
        fname = fs.getPath().getName();
      }
      ret.split = fname;
     
      if(!valid) {
        context.getCounter("app", "badchunks").increment(1);
View Full Code Here

  @Override
  public void initialize(InputSplit split, TaskAttemptContext context) throws IOException,
      InterruptedException {

    FileSplit fSplit = (FileSplit)split;
    Path path = fSplit.getPath();
    Configuration conf = context.getConfiguration();
    this.in = new RCFile.Reader(path.getFileSystem(conf), path, conf);
    this.end = fSplit.getStart() + fSplit.getLength();

    if(fSplit.getStart() > in.getPosition()) {
      in.sync(fSplit.getStart());
    }

    this.start = in.getPosition();
    more = start < end;
View Full Code Here

    @SuppressWarnings("deprecation")
    @Override
    public void initialize(InputSplit split, TaskAttemptContext ctx)
    throws IOException, InterruptedException {

        FileSplit fileSplit = (FileSplit)split;
        Configuration conf = ctx.getConfiguration();
        splitPath = fileSplit.getPath();

        rcFileRecordReader = new RCFileRecordReader<LongWritable, BytesRefArrayWritable>(conf,
                new org.apache.hadoop.mapred.FileSplit(splitPath, fileSplit.getStart(), fileSplit.getLength(),
                        new org.apache.hadoop.mapred.JobConf(conf)) );


        key = rcFileRecordReader.createKey();
        value = rcFileRecordReader.createValue();
View Full Code Here

      @Override
      public void map(LongWritable key, Text value, Context context)
      throws IOException, InterruptedException {
        if (filePath == null) {
          FileSplit split = (FileSplit) context.getInputSplit();
          filePath = split.getPath().toString();
        }
        String line = value.toString();
        StringTokenizer st = new StringTokenizer(line, " ");
        while (st.hasMoreElements()) {
          byte[] word = st.nextToken().getBytes();
View Full Code Here

public abstract class FileInputLoadFunc extends LoadFunc implements OrderedLoadFunc  {
   
    @Override
    public WritableComparable<?> getSplitComparable(InputSplit split)
    throws IOException{
        FileSplit fileSplit = null;
        if(split instanceof FileSplit){
            fileSplit = (FileSplit)split;
        }else{
            throw new RuntimeException("LoadFunc expected split of type FileSplit");
        }
       
        return new FileSplitComparable(
                fileSplit.getPath().toString(),
                fileSplit.getStart()
        );
    }
View Full Code Here

    private long count;
    private boolean seeked = false;

    @Override
    public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
      FileSplit fileSplit = (FileSplit) split;
      conf = context.getConfiguration();
      Path path = fileSplit.getPath();
      FileSystem fs = path.getFileSystem(conf);
      LOG.info("Initialize HFileRecordReader for " + path);
      this.in = HFile.createReader(fs, path, new CacheConfig(conf), conf);

      // The file info must be loaded before the scanner can be used.
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapreduce.lib.input.FileSplit

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.