Source Code of gov.sandia.dfgleic.NullInputFormat$RandomRecordReader

/**
 * @author David F. Gleich
 */ 


package gov.sandia.dfgleic;


import java.util.ArrayList;
import java.util.List;
import java.lang.reflect.Field;


import java.io.IOException;


import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.BytesWritable;




import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;


import org.apache.hadoop.mapred.ClusterStatus;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.FileOutputFormat;


import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.JobContext;




  /**
   * A custom input format that creates virtual inputs of a single string
   * for each map.
   */
  public class NullInputFormat implements InputFormat<Text, BytesWritable> {


    /** 
     * Generate the requested number of file splits, with the filename
     * set to the filename of the output file.
     */
    public InputSplit[] getSplits(JobConf job, int inputSplits) throws IOException {
      List<InputSplit> result = new ArrayList<InputSplit>();
      Path outDir = FileOutputFormat.getOutputPath(job);
      String numMapsConfig = null;
      try {
          Class c = Class.forName("org.apache.hadoop.mapreduce.MRJobConfig");
          Field f = c.getField("NUM_MAPS");
          numMapsConfig = (String)f.get(null);
      } catch (Throwable e) {
          numMapsConfig = "mapred.map.tasks";
      }
      int numSplits = job.getInt(numMapsConfig, 1);
      for(int i=0; i < numSplits; ++i) {
        result.add(new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, 
                                  (String[])null));
      }
      InputSplit[] rval = {result.get(0)};
      rval = result.toArray(rval); 
      return rval;
    }


    /**
     * Return a single record (filename, "") where the filename is taken from
     * the file split.
     */
    static class RandomRecordReader implements RecordReader<Text, BytesWritable> {
      Path name;
      BytesWritable value = new BytesWritable();
      
      public RandomRecordReader(Path p, JobConf job) {
        name = p;
      }


      public Text createKey() {
        return new Text();
      }


      public BytesWritable createValue() {
        return new BytesWritable();
      }
      
      public boolean next(Text key, BytesWritable value) {
        if (name != null) {
          key.set(name.getName());
          value.set(this.value);
          name = null;
          return true;
        } else {
          return false;
        }
      }
        
      
      public void close() {}


      public float getProgress() {
        return 0.0f;
      }
      public long getPos() {
        return 0;
      }
    }


    @Override
    public RecordReader<Text, BytesWritable> getRecordReader(
          InputSplit split, JobConf job, Reporter reporter) throws IOException {
        return new RandomRecordReader(((FileSplit) split).getPath(), job);
    }
  }
Source Code of gov.sandia.dfgleic.NullInputFormat$RandomRecordReader

Related Classes of gov.sandia.dfgleic.NullInputFormat$RandomRecordReader