Package org.apache.hadoop.mapreduce.lib.input

Examples of org.apache.hadoop.mapreduce.lib.input.FileSplit


  }

  protected Path getCurrentFile(Context context) throws IOException {
    InputSplit split = context.getInputSplit();
    if (split != null && split instanceof FileSplit) {
      FileSplit inputSplit = (FileSplit) split;
      Path path = inputSplit.getPath();
      FileSystem fileSystem = path.getFileSystem(context.getConfiguration());
      return path.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory());
    }
    return null;
  }
View Full Code Here


  public static class InputSplitDetailMapper
    extends Mapper<NullWritable, NullWritable, Text, LongWritable> {
    @Override
    protected void map(NullWritable key, NullWritable value, Context context)
        throws IOException, InterruptedException {
      FileSplit split = (FileSplit)context.getInputSplit();
      context.write(new Text(split.getPath().toString()),
          new LongWritable(split.getLength()));
    }
View Full Code Here

    for(int group = 0; group < numGroups; group++)
    {
      for(InputSplit split:superSplits)
      {
        FileSplit fileSplit = (FileSplit)split;
        splits.add(new WikipediaInputSplit(fileSplit,group));
      }
    }
    return splits;
  }
View Full Code Here

        int numHosts = in.readInt();
        hosts = new String[numHosts];
        for(int i = 0; i < numHosts; i++)
          hosts[i] = in.readUTF();
      }
      fileSplit = new FileSplit(file, start, length, hosts);
      partition = in.readInt();
    }
View Full Code Here

   
    WikipediaInputSplit wiSplit = (WikipediaInputSplit)context.getInputSplit();
    myGroup = wiSplit.getPartition();
    numGroups = WikipediaConfiguration.getNumGroups(conf);
   
    FileSplit split = wiSplit.getFileSplit();
    String fileName = split.getPath().getName();
    Matcher matcher = languagePattern.matcher(fileName);
    if (matcher.matches()) {
      language = matcher.group(1).replace('_', '-').toLowerCase();
    } else {
      throw new RuntimeException("Unknown ingest language! " + fileName);
View Full Code Here

  private LongWritable key = null;
  private Text value = null;
 
  @Override
  public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);
   
    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
      in = new LfLineReader(codec.createInputStream(fileIn), job);
      end = Long.MAX_VALUE;
    } else {
View Full Code Here

   
    WikipediaInputSplit wiSplit = (WikipediaInputSplit)context.getInputSplit();
    myGroup = wiSplit.getPartition();
    numGroups = WikipediaConfiguration.getNumGroups(conf);
   
    FileSplit split = wiSplit.getFileSplit();
    String fileName = split.getPath().getName();
    Matcher matcher = languagePattern.matcher(fileName);
    if (matcher.matches()) {
      language = matcher.group(1).replace('_', '-').toLowerCase();
    } else {
      throw new RuntimeException("Unknown ingest language! " + fileName);
View Full Code Here

  public void testIncorrectArgs() throws Exception {
    File f = createFile(xml1);
   
    // Create FileSplit
    Path p = new Path(f.toURI().toString());
    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
    AggregatingRecordReader reader = new AggregatingRecordReader();
    try {
      // Clear the values for BEGIN and STOP TOKEN
      conf.set(AggregatingRecordReader.START_TOKEN, null);
      conf.set(AggregatingRecordReader.END_TOKEN, null);
View Full Code Here

  public void testCorrectXML() throws Exception {
    File f = createFile(xml1);
   
    // Create FileSplit
    Path p = new Path(f.toURI().toString());
    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
   
    // Initialize the RecordReader
    AggregatingRecordReader reader = new AggregatingRecordReader();
    reader.initialize(split, ctx);
    assertTrue(reader.nextKeyValue());
View Full Code Here

  public void testPartialXML() throws Exception {
    File f = createFile(xml2);
   
    // Create FileSplit
    Path p = new Path(f.toURI().toString());
    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
   
    // Initialize the RecordReader
    AggregatingRecordReader reader = new AggregatingRecordReader();
    reader.initialize(split, ctx);
    assertTrue(reader.nextKeyValue());
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapreduce.lib.input.FileSplit

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.