Examples of org.apache.hadoop.mapreduce.lib.input.FileSplit

Package org.apache.hadoop.mapreduce.lib.input

Examples of org.apache.hadoop.mapreduce.lib.input.FileSplit

org.apache.hadoop.mapreduce.lib.input.FileSplit
A section of an input file. Returned by {@link InputFormat#getSplits(JobContext)} and passed to{@link InputFormat#createRecordReader(InputSplit,TaskAttemptContext)}.

  }


  protected Path getCurrentFile(Context context) throws IOException {
    InputSplit split = context.getInputSplit();
    if (split != null && split instanceof FileSplit) {
      FileSplit inputSplit = (FileSplit) split;
      Path path = inputSplit.getPath();
      FileSystem fileSystem = path.getFileSystem(context.getConfiguration());
      return path.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory());
    }
    return null;
  }

View Full Code Here

  public static class InputSplitDetailMapper
    extends Mapper<NullWritable, NullWritable, Text, LongWritable> {
    @Override
    protected void map(NullWritable key, NullWritable value, Context context)
        throws IOException, InterruptedException {
      FileSplit split = (FileSplit)context.getInputSplit();
      context.write(new Text(split.getPath().toString()),
          new LongWritable(split.getLength()));
    }

View Full Code Here


    for(int group = 0; group < numGroups; group++)
    {
      for(InputSplit split:superSplits)
      {
        FileSplit fileSplit = (FileSplit)split;
        splits.add(new WikipediaInputSplit(fileSplit,group));
      }
    }
    return splits;
  }

View Full Code Here

        int numHosts = in.readInt();
        hosts = new String[numHosts];
        for(int i = 0; i < numHosts; i++)
          hosts[i] = in.readUTF();
      }
      fileSplit = new FileSplit(file, start, length, hosts);
      partition = in.readInt();
    }

View Full Code Here

    
    WikipediaInputSplit wiSplit = (WikipediaInputSplit)context.getInputSplit();
    myGroup = wiSplit.getPartition();
    numGroups = WikipediaConfiguration.getNumGroups(conf);
    
    FileSplit split = wiSplit.getFileSplit();
    String fileName = split.getPath().getName();
    Matcher matcher = languagePattern.matcher(fileName);
    if (matcher.matches()) {
      language = matcher.group(1).replace('_', '-').toLowerCase();
    } else {
      throw new RuntimeException("Unknown ingest language! " + fileName);

View Full Code Here

  private LongWritable key = null;
  private Text value = null;
  
  @Override
  public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);
    
    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
      in = new LfLineReader(codec.createInputStream(fileIn), job);
      end = Long.MAX_VALUE;
    } else {

View Full Code Here

    
    WikipediaInputSplit wiSplit = (WikipediaInputSplit)context.getInputSplit();
    myGroup = wiSplit.getPartition();
    numGroups = WikipediaConfiguration.getNumGroups(conf);
    
    FileSplit split = wiSplit.getFileSplit();
    String fileName = split.getPath().getName();
    Matcher matcher = languagePattern.matcher(fileName);
    if (matcher.matches()) {
      language = matcher.group(1).replace('_', '-').toLowerCase();
    } else {
      throw new RuntimeException("Unknown ingest language! " + fileName);

View Full Code Here

  public void testIncorrectArgs() throws Exception {
    File f = createFile(xml1);
    
    // Create FileSplit
    Path p = new Path(f.toURI().toString());
    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
    AggregatingRecordReader reader = new AggregatingRecordReader();
    try {
      // Clear the values for BEGIN and STOP TOKEN
      conf.set(AggregatingRecordReader.START_TOKEN, null);
      conf.set(AggregatingRecordReader.END_TOKEN, null);

View Full Code Here

  public void testCorrectXML() throws Exception {
    File f = createFile(xml1);
    
    // Create FileSplit
    Path p = new Path(f.toURI().toString());
    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
    
    // Initialize the RecordReader
    AggregatingRecordReader reader = new AggregatingRecordReader();
    reader.initialize(split, ctx);
    assertTrue(reader.nextKeyValue());

View Full Code Here

  public void testPartialXML() throws Exception {
    File f = createFile(xml2);
    
    // Create FileSplit
    Path p = new Path(f.toURI().toString());
    WikipediaInputSplit split = new WikipediaInputSplit(new FileSplit(p, 0, f.length(), null),0);
    
    // Initialize the RecordReader
    AggregatingRecordReader reader = new AggregatingRecordReader();
    reader.initialize(split, ctx);
    assertTrue(reader.nextKeyValue());

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.mapreduce.lib.input.FileSplit

com.asakusafw.runtime.stage.input.TemporaryInputFormat

com.asakusafw.runtime.stage.input.TemporaryInputFormatTest

com.chine.kmeans.mapreduce.dataprep.DataPrepMapper

com.datasalt.pangool.tuplemr.mapred.lib.input.TupleInputFormat$TupleInputReader

com.datasalt.pangool.tuplemr.mapred.lib.input.TupleTextInputFormat$TupleTextInputReader

com.hadoop.mapreduce.LzoLineRecordReader

com.hadoop.mapreduce.LzoSplitRecordReader

com.hadoop.mapreduce.LzoTextInputFormat

com.mongodb.hadoop.splitter.BSONSplitter

com.thinkaurelius.faunus.formats.graphson.GraphSONRecordReaderTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.