Examples of org.apache.hadoop.mapreduce.lib.input.FileSplit

Package org.apache.hadoop.mapreduce.lib.input

Examples of org.apache.hadoop.mapreduce.lib.input.FileSplit

org.apache.hadoop.mapreduce.lib.input.FileSplit
A section of an input file. Returned by {@link InputFormat#getSplits(JobContext)} and passed to{@link InputFormat#createRecordReader(InputSplit,TaskAttemptContext)}.

        BlockMap blocks = blocks("testing", TemporaryFile.BLOCK_SIZE, TemporaryFile.BLOCK_SIZE + 10);
        List<FileSplit> splits = TemporaryInputFormat.computeSplits(new Path("testing"), blocks, m(64));


        assertThat(splits, hasSize(2));


        FileSplit s0 = find(splits, 0);
        assertThat(s0.getLength(), is((long) TemporaryFile.BLOCK_SIZE));


        FileSplit s1 = find(splits, TemporaryFile.BLOCK_SIZE);
        assertThat(s1.getLength(), is((long) TemporaryFile.BLOCK_SIZE + 10));
    }

View Full Code Here

        List<FileSplit> splits = TemporaryInputFormat.computeSplits(
                new Path("testing"), blocks, TemporaryFile.BLOCK_SIZE + 1);


        assertThat(splits, hasSize(5));


        FileSplit s0 = find(splits, TemporaryFile.BLOCK_SIZE * 0);
        assertThat(s0.getLength(), is((long) TemporaryFile.BLOCK_SIZE * 2));
        FileSplit s1 = find(splits, TemporaryFile.BLOCK_SIZE * 2);
        assertThat(s1.getLength(), is((long) TemporaryFile.BLOCK_SIZE * 2));
        FileSplit s2 = find(splits, TemporaryFile.BLOCK_SIZE * 4);
        assertThat(s2.getLength(), is((long) TemporaryFile.BLOCK_SIZE * 2));
        FileSplit s3 = find(splits, TemporaryFile.BLOCK_SIZE * 6);
        assertThat(s3.getLength(), is((long) TemporaryFile.BLOCK_SIZE * 2));
        FileSplit s4 = find(splits, TemporaryFile.BLOCK_SIZE * 8);
        assertThat(s4.getLength(), is((long) TemporaryFile.BLOCK_SIZE * 2));
    }

View Full Code Here

    public void splits_suppress() {
        BlockMap blocks = blocks("testing", TemporaryFile.BLOCK_SIZE * 10);
        List<FileSplit> splits = TemporaryInputFormat.computeSplits(new Path("testing"), blocks, 0);


        assertThat(splits, hasSize(1));
        FileSplit s0 = find(splits, 0);
        assertThat(s0.getLength(), is((long) TemporaryFile.BLOCK_SIZE * 10));
    }

View Full Code Here

        Configuration conf = new ConfigurationProvider().newInstance();
        FileStatus stat = write(conf, 1);
        RecordReader<NullWritable, Text> reader = TemporaryInputFormat.createRecordReader();
        try {
            reader.initialize(
                    new FileSplit(stat.getPath(), 0, stat.getLen(), null),
                    JobCompatibility.newTaskAttemptContext(conf, id()));


            assertThat(reader.nextKeyValue(), is(true));
            assertThat(reader.getCurrentValue(), is(new Text("Hello, world!")));

View Full Code Here

  public static class InputSplitDetailMapper
    extends Mapper<NullWritable, NullWritable, Text, LongWritable> {
    @Override
    protected void map(NullWritable key, NullWritable value, Context context) 
        throws IOException, InterruptedException {
      FileSplit split = (FileSplit)context.getInputSplit();
      context.write(new Text(split.getPath().toString()), 
          new LongWritable(split.getLength()));
    }

View Full Code Here

        }
        
        @Override
        public void initialize(InputSplit genericSplit, TaskAttemptContext context)
                throws IOException, InterruptedException {
            FileSplit split = (FileSplit) genericSplit;
            Configuration job = context.getConfiguration();
 
            start = split.getStart();
            end = start + split.getLength();
            final Path file = split.getPath();
 
            // open the file and seek to the start of the split
            FileSystem fs = file.getFileSystem(job);
            FSDataInputStream fileIn = fs.open(split.getPath());
        
            this.xmlLoaderBPIS = new XMLLoaderBufferedPositionedInputStream(fileIn);
        }

View Full Code Here

        }


        @Override
        public void initialize(InputSplit split, TaskAttemptContext context)
                throws IOException, InterruptedException {
            FileSplit fSplit = (FileSplit) split; 
            Path p = fSplit.getPath();
            location = p.toString();
            LOG.info("location: " + location);    
            conf = context.getConfiguration();
        }

View Full Code Here


      @Override
      public void map(LongWritable key, Text value, Context context)
      throws IOException, InterruptedException {
        if (filePath == null) {
          FileSplit split = (FileSplit) context.getInputSplit();
          filePath = split.getPath().toString();
        }
        String line = value.toString();
        StringTokenizer st = new StringTokenizer(line, " ");
        while (st.hasMoreElements()) {
          byte[] word = st.nextToken().getBytes();

View Full Code Here

  static class RandomInputFormat extends InputFormat<Text, LongWritable> {
    public List<InputSplit> getSplits(JobContext job) throws IOException {
      List<InputSplit> result = new ArrayList<InputSplit>();
      int numSplits = job.getConfiguration().getInt(NUM_MAPS_KEY, NUM_MAPS);
      for (int i = 0; i < numSplits; ++i) {
        result.add(new FileSplit(new Path("/tmp", "dummy-split-" + i), 0, 1, null));
      }
      return result;
    }

View Full Code Here

  }


  private Path getCurrentFile(Context context) throws IOException {
    InputSplit split = context.getInputSplit();
    if (split != null) {
      FileSplit inputSplit = (FileSplit) split;
      Path path = inputSplit.getPath();
      return path.makeQualified(path.getFileSystem(context.getConfiguration()));
    }
    return null;
  }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.mapreduce.lib.input.FileSplit

com.asakusafw.runtime.stage.input.TemporaryInputFormat

com.asakusafw.runtime.stage.input.TemporaryInputFormatTest

com.chine.kmeans.mapreduce.dataprep.DataPrepMapper

com.datasalt.pangool.tuplemr.mapred.lib.input.TupleInputFormat$TupleInputReader

com.datasalt.pangool.tuplemr.mapred.lib.input.TupleTextInputFormat$TupleTextInputReader

com.hadoop.mapreduce.LzoLineRecordReader

com.hadoop.mapreduce.LzoSplitRecordReader

com.hadoop.mapreduce.LzoTextInputFormat

com.mongodb.hadoop.splitter.BSONSplitter

com.thinkaurelius.faunus.formats.graphson.GraphSONRecordReaderTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.