Examples of org.apache.hadoop.mapred.TextInputFormat

org.apache.hadoop.mapred.TextInputFormat
An {@link InputFormat} for plain text files. Files are broken into lines.Either linefeed or carriage-return are used to signal end of line. Keys are the position in the file, and values are the line of text..

  }


  @Override
  protected void runJob(JobConf job) throws IOException {
    // retrieve the splits
    TextInputFormat input = (TextInputFormat) job.getInputFormat();
    InputSplit[] splits = input.getSplits(job, job.getNumMapTasks());
    log.debug("Nb splits : {}", splits.length);


    InputSplit[] sorted = Arrays.copyOf(splits, splits.length);
    Builder.sortSplits(sorted);


    int numTrees = Builder.getNbTrees(job); // total number of trees


    firstOutput = new PartialOutputCollector(numTrees);
    Reporter reporter = Reporter.NULL;


    firstIds = new int[splits.length];
    sizes = new int[splits.length];
    
    // to compute firstIds, process the splits in file order
    int firstId = 0;
    long slowest = 0; // duration of slowest map
    for (InputSplit split : splits) {
      int hp = ArrayUtils.indexOf(sorted, split); // hadoop's partition


      RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter);


      LongWritable key = reader.createKey();
      Text value = reader.createValue();


      Step1Mapper mapper = new MockStep1Mapper(getTreeBuilder(), dataset, getSeed(),

View Full Code Here

   * 
   */
  void secondStep(JobConf job, Path forestPath,
      PredictionCallback callback) throws IOException {
    // retrieve the splits
    TextInputFormat input = (TextInputFormat) job.getInputFormat();
    InputSplit[] splits = input.getSplits(job, job.getNumMapTasks());
    log.debug("Nb splits : {}", splits.length);


    Builder.sortSplits(splits);


    int numTrees = Builder.getNbTrees(job); // total number of trees


    // compute the expected number of outputs
    int total = 0;
    for (int p = 0; p < splits.length; p++) {
      total += Step2Mapper.nbConcerned(splits.length, numTrees, p);
    }


    secondOutput = new PartialOutputCollector(total);
    Reporter reporter = Reporter.NULL;
    long slowest = 0; // duration of slowest map


    for (int partition = 0; partition < splits.length; partition++) {
      InputSplit split = splits[partition];
      RecordReader<LongWritable, Text> reader = input.getRecordReader(split,
          job, reporter);


      LongWritable key = reader.createKey();
      Text value = reader.createValue();

View Full Code Here

    job.setNumMapTasks(numMaps);


    FileInputFormat.setInputPaths(job, dataPath);


    // retrieve the splits
    TextInputFormat input = (TextInputFormat) job.getInputFormat();
    InputSplit[] splits = input.getSplits(job, numMaps);


    InputSplit[] sorted = Arrays.copyOf(splits, splits.length);
    Builder.sortSplits(sorted);


    Step0OutputCollector collector = new Step0OutputCollector(numMaps);
    Reporter reporter = Reporter.NULL;


    for (int p = 0; p < numMaps; p++) {
      InputSplit split = sorted[p];
      RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter);


      LongWritable key = reader.createKey();
      Text value = reader.createValue();


      Step0Mapper mapper = new Step0Mapper();

View Full Code Here

    JobConf job = new JobConf();
    job.setNumMapTasks(numMaps);
    FileInputFormat.setInputPaths(job, dataPath);


    // retrieve the splits
    TextInputFormat input = (TextInputFormat) job.getInputFormat();
    InputSplit[] splits = input.getSplits(job, numMaps);


    InputSplit[] sorted = Arrays.copyOf(splits, splits.length);
    Builder.sortSplits(sorted);


    Reporter reporter = Reporter.NULL;


    int[] keys = new int[numMaps];
    Step0Output[] values = new Step0Output[numMaps];
    
    int[] expectedIds = new int[numMaps];
    
    for (int p = 0; p < numMaps; p++) {
      InputSplit split = sorted[p];
      RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter);


      LongWritable key = reader.createKey();
      Text value = reader.createValue();


      Long firstKey = null;

View Full Code Here

  public RecordReader<LongWritable, Text> getRecordReader(
      InputSplit split, JobConf job, Reporter reporter) throws IOException {
    InputSplit targetSplit = ((SymlinkTextInputSplit)split).getTargetSplit();


    // The target data is in TextInputFormat.
    TextInputFormat inputFormat = new TextInputFormat();
    inputFormat.configure(job);
    return inputFormat.getRecordReader(targetSplit, job, reporter);
  }

View Full Code Here

    if (targetPaths.size() == 0) {
      return new InputSplit[0];
    }


    // The input should be in TextInputFormat.
    TextInputFormat inputFormat = new TextInputFormat();
    JobConf newjob = new JobConf(job);
    newjob.setInputFormat(TextInputFormat.class);
    inputFormat.configure(newjob);


    List<InputSplit> result = new ArrayList<InputSplit>();


    // ceil(numSplits / numPaths), so we can get at least numSplits splits.
    int numPaths = targetPaths.size();
    int numSubSplits = (numSplits + numPaths - 1) / numPaths;


    // For each path, do getSplits().
    for (int i = 0; i < numPaths; ++i) {
      Path targetPath = targetPaths.get(i);
      Path symlinkPath = symlinkPaths.get(i);


      FileInputFormat.setInputPaths(newjob, targetPath);


      InputSplit[] iss = inputFormat.getSplits(newjob, numSubSplits);
      for (InputSplit is : iss) {
        result.add(new SymlinkTextInputSplit(symlinkPath, (FileSplit)is));
      }
    }
    return result.toArray(new InputSplit[result.size()]);

View Full Code Here


  TextInputFormat format;
  JobConf job;


  public Base64TextInputFormat() {
    format = new TextInputFormat();
  }

View Full Code Here

  
  TextInputFormat format;
  JobConf job;
  
  public Base64TextInputFormat() {
    format = new TextInputFormat();
  }

View Full Code Here


  TextInputFormat format;
  JobConf job;


  public Base64TextInputFormat() {
    format = new TextInputFormat();
  }

View Full Code Here

  public RecordReader<LongWritable, Text> getRecordReader(
      InputSplit split, JobConf job, Reporter reporter) throws IOException {
    InputSplit targetSplit = ((SymlinkTextInputSplit)split).getTargetSplit();


    // The target data is in TextInputFormat.
    TextInputFormat inputFormat = new TextInputFormat();
    inputFormat.configure(job);
    RecordReader innerReader = null;
    try {
      innerReader = inputFormat.getRecordReader(targetSplit, job,
          reporter);
    } catch (Exception e) {
      innerReader = HiveIOExceptionHandlerUtil
          .handleRecordReaderCreationException(e, job);
    }

View Full Code Here

0 1 2 3 4

TOP

Related Classes of org.apache.hadoop.mapred.TextInputFormat

com.cloudera.iterativereduce.io.TextRecordParser

com.cloudera.iterativereduce.irunit.IRUnitDriver

com.m6d.filecrush.crush.KeyValuePreservingTextInputFormat

eu.stratosphere.hadoopcompatibility.example.WordCount

eu.stratosphere.hadoopcompatibility.example.WordCountWithHadoopOutputFormat

org.apache.drill.exec.store.text.DrillTextRecordReader

org.apache.flink.hadoopcompatibility.mapred.example.HadoopMapredCompatWordCount

org.apache.flink.hadoopcompatibility.mapred.record.example.WordCount

org.apache.flink.hadoopcompatibility.mapred.record.example.WordCountWithOutputFormat

org.apache.hadoop.fs.FSDataInputStream

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.