Package org.apache.hadoop.mapred

Examples of org.apache.hadoop.mapred.InputFormat


      } finally {
        writer.close();
      }

      // try splitting the file in a variety of sizes
      InputFormat format = new SequenceFileInputFormat();
      RecInt key = new RecInt();
      RecBuffer value = new RecBuffer();
      for (int i = 0; i < 3; i++) {
        int numSplits =
          random.nextInt(MAX_LENGTH/(SequenceFile.SYNC_INTERVAL/20))+1;
        //LOG.info("splitting: requesting = " + numSplits);
        FileSplit[] splits = format.getSplits(fs, job, numSplits);
        //LOG.info("splitting: got =        " + splits.length);

        // check each split
        BitSet bits = new BitSet(length);
        for (int j = 0; j < splits.length; j++) {
          RecordReader reader =
            format.getRecordReader(fs, splits[j], job, reporter);
          try {
            int count = 0;
            while (reader.next(key, value)) {
              // if (bits.get(key.get())) {
              // LOG.info("splits["+j+"]="+splits[j]+" : " + key.get());
View Full Code Here


   * Obscures the InputFormat and location information to simulate maps
   * reading input from arbitrary locations (&quot;indirect&quot; reads).
   */
  static class IndirectInputFormat implements InputFormat {
    public void validateInput(JobConf job) throws IOException {
      InputFormat indirIF = (InputFormat)ReflectionUtils.newInstance(
          job.getClass("mapred.indirect.input.format",
            SequenceFileInputFormat.class), job);
      indirIF.validateInput(job);
    }
View Full Code Here

      return splits.toArray(new IndirectSplit[splits.size()]);
    }

    public RecordReader getRecordReader(InputSplit split, JobConf job,
        Reporter reporter) throws IOException {
      InputFormat indirIF = (InputFormat)ReflectionUtils.newInstance(
          job.getClass("mapred.indirect.input.format",
            SequenceFileInputFormat.class), job);
      IndirectSplit is = ((IndirectSplit)split);
      return indirIF.getRecordReader(new FileSplit(is.getPath(), 0,
            is.getLength(), (String[])null),
          job, reporter);
    }
View Full Code Here

   * Obscures the InputFormat and location information to simulate maps
   * reading input from arbitrary locations (&quot;indirect&quot; reads).
   */
  static class IndirectInputFormat implements InputFormat {
    public void validateInput(JobConf job) throws IOException {
      InputFormat indirIF = (InputFormat)ReflectionUtils.newInstance(
          job.getClass("mapred.indirect.input.format",
            SequenceFileInputFormat.class), job);
      indirIF.validateInput(job);
    }
View Full Code Here

      return splits.toArray(new IndirectSplit[splits.size()]);
    }

    public RecordReader getRecordReader(InputSplit split, JobConf job,
        Reporter reporter) throws IOException {
      InputFormat indirIF = (InputFormat)ReflectionUtils.newInstance(
          job.getClass("mapred.indirect.input.format",
            SequenceFileInputFormat.class), job);
      IndirectSplit is = ((IndirectSplit)split);
      return indirIF.getRecordReader(new FileSplit(is.getPath(), 0,
            is.getLength(), (String[])null),
          job, reporter);
    }
View Full Code Here

          .getPartitionDescFromPathRecursively(pathToPartitionInfo, dir,
              IOPrepareCache.get().allocatePartitionDescMap(), true);
      // create a new InputFormat instance if this is the first time to see this
      // class
      Class inputFormatClass = part.getInputFileFormatClass();
      InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
      Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), newjob);

      FileInputFormat.setInputPaths(newjob, dir);
      newjob.setInputFormat(inputFormat.getClass());
      InputSplit[] iss = inputFormat.getSplits(newjob, numSplits / dirs.length);
      for (InputSplit is : iss) {
        result.add(new HiveInputSplit(is, inputFormatClass.getName()));
      }
    }
    return result.toArray(new HiveInputSplit[result.size()]);
View Full Code Here

    // from Utilities.getInputSummary()
    private long getFileLength(JobConf conf, Path path, Class<? extends InputFormat> clazz)
        throws IOException {
      ContentSummary summary;
      if (ContentSummaryInputFormat.class.isAssignableFrom(clazz)) {
        InputFormat input = HiveInputFormat.getInputFormatFromCache(clazz, conf);
        summary = ((ContentSummaryInputFormat)input).getContentSummary(path, conf);
      } else {
        summary = path.getFileSystem(conf).getContentSummary(path);
      }
      return summary.getLength();
View Full Code Here

            @Override
            public void run() {
              try {
                Class<? extends InputFormat> inputFormatCls = partDesc
                    .getInputFileFormatClass();
                InputFormat inputFormatObj = HiveInputFormat.getInputFormatFromCache(
                    inputFormatCls, myJobConf);
                if (inputFormatObj instanceof ContentSummaryInputFormat) {
                  ContentSummaryInputFormat cs = (ContentSummaryInputFormat) inputFormatObj;
                  resultMap.put(pathStr, cs.getContentSummary(p, myJobConf));
                  return;
View Full Code Here

      inputFormatClass = Class.forName(inputFormatClassName);
    } catch (ClassNotFoundException e) {
      throw new IOException("CombineHiveRecordReader: class not found "
          + inputFormatClassName);
    }
    InputFormat inputFormat = HiveInputFormat.getInputFormatFromCache(
        inputFormatClass, jobConf);

    // create a split for the given partition
    FileSplit fsplit = new FileSplit(hsplit.getPaths()[partition], hsplit
        .getStartOffsets()[partition], hsplit.getLengths()[partition], hsplit
        .getLocations());

    this.setRecordReader(inputFormat.getRecordReader(fsplit, jobConf, reporter));

    this.initIOContext(fsplit, jobConf, inputFormatClass, this.recordReader);
  }
View Full Code Here

      }

      // Use HiveInputFormat if any of the paths is not splittable
      Class inputFormatClass = part.getInputFileFormatClass();
      String inputFormatClassName = inputFormatClass.getName();
      InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
      String deserializerClassName = null;
      try {
        deserializerClassName = part.getDeserializer(job).getClass().getName();
      } catch (Exception e) {
        // ignore
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapred.InputFormat

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.