Package org.apache.hadoop.mapreduce.lib.input

Examples of org.apache.hadoop.mapreduce.lib.input.FileSplit


        m.setup(c1);

        Mapper.Context c2=mock(Mapper.Context.class);
        stub(c2.getInputSplit()).toReturn(
                new FileSplit(
                    new Path("s3n://basekb-now/2013-11-10/sieved/links/links-m-00328.nt.gz")
                    ,0
                    ,0
                    ,null
                )
        );
        m.map(new LongWritable(666),new Text("Ganbaru!"),c2);
        verify(c2).getInputSplit();
        verify(c2).write(
                new TaggedTextItem(new Text("Ganbaru!"),new VIntWritable(75)), new VIntWritable(75)
        );
        verifyNoMoreInteractions(c2);

        Mapper.Context c3=mock(Mapper.Context.class);
        stub(c3.getInputSplit()).toReturn(
                new FileSplit(
                        new Path("s3n://basekb-now/2013-11-10/sieved/description/description-m-00099.nt.gz")
                        ,0
                        ,0
                        ,null
                )
        );
        m.map(new LongWritable(667),new Text("He was some kind of recording genius"),c3);
        verify(c3).getInputSplit();
        verify(c3).write(
                new TaggedTextItem(new Text("He was some kind of recording genius"),new VIntWritable(0)), new VIntWritable(0)
        );
        verifyNoMoreInteractions(c3);

        Mapper.Context c4=mock(Mapper.Context.class);
        stub(c4.getInputSplit()).toReturn(
                new FileSplit(
                        new Path("s3n://basekb-now/2013-11-10/sieved/a/a-m-21345.nt.gz")
                        ,0
                        ,0
                        ,null
                )
View Full Code Here


    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
        Configuration that=context.getConfiguration();
        mapping=getPathMapping(that);
        FileSplit split=(FileSplit) context.getInputSplit();
        String thePath=split.getPath().toString();
        currentTag = determineTag(mapping,thePath);
    }
View Full Code Here

        return Iterables.getLast(dotSplitter.split(input));
    }

    @Override
    protected void map(LongWritable key, T value, Context context) throws IOException, InterruptedException {
        FileSplit split=(FileSplit) context.getInputSplit();
        String thePath=split.getPath().toString();
        VIntWritable currentTag = determineTag(mapping,thePath);
        context.write(newTaggedKey(value,currentTag),currentTag);
    }
View Full Code Here

            Path path, BlockMap blockMap, long start, long end, long splitSize) {
        if (start >= end) {
            return Collections.emptyList();
        }
        if (splitSize <= 0) {
            FileSplit split = getSplit(blockMap, path, start, end);
            return Collections.singletonList(split);
        }
        long threashold = (long) (splitSize * 1.2);
        List<FileSplit> results = new ArrayList<FileSplit>();
        long current = start;
        while (current < end) {
            long next;
            if (end - current < threashold) {
                next = end;
            } else {
                next = current + splitSize;
            }
            FileSplit split = getSplit(blockMap, path, current, next);
            results.add(split);
            current = next;
        }
        return results;
    }
View Full Code Here

    }

    private static FileSplit getSplit(BlockMap blockMap, Path path, long start, long end) {
        DirectInputFragment f = blockMap.get(start, end);
        List<String> owners = f.getOwnerNodeNames();
        FileSplit split = new FileSplit(
                path, start, end - start,
                owners.toArray(new String[owners.size()]));
        return split;
    }
View Full Code Here

    @Override
    public RecordReader<NullWritable, T> createRecordReader(
            InputSplit split,
            TaskAttemptContext context) throws IOException, InterruptedException {
        FileSplit s = (FileSplit) split;
        assert s.getStart() % TemporaryFile.BLOCK_SIZE == 0;
        assert s.getStart() > 0 || s.getLength() > 0;
        return createRecordReader();
    }
View Full Code Here

        }

        @SuppressWarnings("unchecked")
        @Override
        public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
            FileSplit s = (FileSplit) split;
            this.size = s.getLength();
            Path path = s.getPath();
            FileSystem fs = path.getFileSystem(context.getConfiguration());
            int blocks = computeBlocks(s);
            FSDataInputStream stream = fs.open(path);
            boolean succeed = false;
            try {
                if (s.getStart() != 0) {
                    assert s.getStart() % TemporaryFile.BLOCK_SIZE == 0;
                    stream.seek(s.getStart());
                }
                this.input = (TemporaryFileInput<T>) new TemporaryFileInput<Writable>(stream, blocks);
                Class<?> aClass = context.getConfiguration().getClassByName(input.getDataTypeName());
                this.value = (T) ReflectionUtils.newInstance(aClass, context.getConfiguration());
                succeed = true;
View Full Code Here

    public void splits_simple() {
        BlockMap blocks = blocks("testing", m(10));
        List<FileSplit> splits = TemporaryInputFormat.computeSplits(new Path("testing"), blocks, m(64));

        assertThat(splits, hasSize(1));
        FileSplit s0 = find(splits, 0);
        assertThat(s0.getLength(), is(m(10)));
    }
View Full Code Here

        BlockMap blocks = blocks("testing", TemporaryFile.BLOCK_SIZE, TemporaryFile.BLOCK_SIZE);
        List<FileSplit> splits = TemporaryInputFormat.computeSplits(new Path("testing"), blocks, m(64));

        assertThat(splits, hasSize(2));

        FileSplit s0 = find(splits, 0);
        assertThat(s0.getLength(), is((long) TemporaryFile.BLOCK_SIZE));

        FileSplit s1 = find(splits, TemporaryFile.BLOCK_SIZE);
        assertThat(s1.getLength(), is((long) TemporaryFile.BLOCK_SIZE));
    }
View Full Code Here

        BlockMap blocks = blocks("testing", TemporaryFile.BLOCK_SIZE - 10, TemporaryFile.BLOCK_SIZE);
        List<FileSplit> splits = TemporaryInputFormat.computeSplits(new Path("testing"), blocks, m(128));

        assertThat(splits, hasSize(2));

        FileSplit s0 = find(splits, 0);
        assertThat(s0.getLength(), is((long) TemporaryFile.BLOCK_SIZE));

        FileSplit s1 = find(splits, TemporaryFile.BLOCK_SIZE);
        assertThat(s1.getLength(), is((long) TemporaryFile.BLOCK_SIZE - 10));
    }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapreduce.lib.input.FileSplit

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.