Package org.apache.hadoop.mapred

Examples of org.apache.hadoop.mapred.InputFormat


      return splits.toArray(new IndirectSplit[splits.size()]);
    }

    public RecordReader getRecordReader(InputSplit split, JobConf job,
        Reporter reporter) throws IOException {
      InputFormat indirIF = (InputFormat)ReflectionUtils.newInstance(
          job.getClass("mapred.indirect.input.format",
            SequenceFileInputFormat.class), job);
      IndirectSplit is = ((IndirectSplit)split);
      return indirIF.getRecordReader(new FileSplit(is.getPath(), 0,
            is.getLength(), job),
          job, reporter);
    }
View Full Code Here


   * Obscures the InputFormat and location information to simulate maps
   * reading input from arbitrary locations ("indirect" reads).
   */
  static class IndirectInputFormat implements InputFormat {
    public void validateInput(JobConf job) throws IOException {
      InputFormat indirIF = (InputFormat)ReflectionUtils.newInstance(
          job.getClass("mapred.indirect.input.format",
            SequenceFileInputFormat.class), job);
      indirIF.validateInput(job);
    }
View Full Code Here

      return splits.toArray(new IndirectSplit[splits.size()]);
    }

    public RecordReader getRecordReader(InputSplit split, JobConf job,
        Reporter reporter) throws IOException {
      InputFormat indirIF = (InputFormat)ReflectionUtils.newInstance(
          job.getClass("mapred.indirect.input.format",
            SequenceFileInputFormat.class), job);
      IndirectSplit is = ((IndirectSplit)split);
      return indirIF.getRecordReader(new FileSplit(is.getPath(), 0,
            is.getLength(), job),
          job, reporter);
    }
View Full Code Here

              try {
                ContentSummary resultCs;

                Class<? extends InputFormat> inputFormatCls = partDesc
                    .getInputFileFormatClass();
                InputFormat inputFormatObj = HiveInputFormat.getInputFormatFromCache(
                    inputFormatCls, myJobConf);
                if (inputFormatObj instanceof ContentSummaryInputFormat) {
                  resultCs = ((ContentSummaryInputFormat) inputFormatObj).getContentSummary(p,
                      myJobConf);
                } else {
View Full Code Here

      }

      // Use HiveInputFormat if any of the paths is not splittable
      Class inputFormatClass = part.getInputFileFormatClass();
      String inputFormatClassName = inputFormatClass.getName();
      InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
      String deserializerClassName = part.getDeserializerClass() == null ? null
          : part.getDeserializerClass().getName();

      // Since there is no easy way of knowing whether MAPREDUCE-1597 is present in the tree or not,
      // we use a configuration variable for the same
View Full Code Here

    }

    pushProjectionsAndFilters(cloneJobConf, inputFormatClass, hsplit.getPath()
      .toString(), hsplit.getPath().toUri().getPath(), nonNative);

    InputFormat inputFormat = getInputFormatFromCache(inputFormatClass,
        cloneJobConf);
    RecordReader innerReader = null;
    try {
      innerReader = inputFormat.getRecordReader(inputSplit,
        cloneJobConf, reporter);
    } catch (Exception e) {
      innerReader = HiveIOExceptionHandlerUtil
          .handleRecordReaderCreationException(e, cloneJobConf);
    }
View Full Code Here

    for (Path dir : dirs) {
      PartitionDesc part = getPartitionDescFromPath(pathToPartitionInfo, dir);
      // create a new InputFormat instance if this is the first time to see this
      // class
      Class inputFormatClass = part.getInputFileFormatClass();
      InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
      Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), newjob);

      // Make filter pushdown information available to getSplits.
      ArrayList<String> aliases =
        mrwork.getPathToAliases().get(dir.toUri().toString());
      if ((aliases != null) && (aliases.size() == 1)) {
        Operator op = mrwork.getAliasToWork().get(aliases.get(0));
        if ((op != null) && (op instanceof TableScanOperator)) {
          TableScanOperator tableScan = (TableScanOperator) op;
          pushFilters(newjob, tableScan);
        }
      }

      FileInputFormat.setInputPaths(newjob, dir);
      newjob.setInputFormat(inputFormat.getClass());
      InputSplit[] iss = inputFormat.getSplits(newjob, numSplits / dirs.length);
      for (InputSplit is : iss) {
        result.add(new HiveInputSplit(is, inputFormatClass.getName()));
      }
    }
View Full Code Here

    // for each dir, get the InputFormat, and do validateInput.
    for (Path dir : dirs) {
      PartitionDesc part = getPartitionDescFromPath(pathToPartitionInfo, dir);
      // create a new InputFormat instance if this is the first time to see this
      // class
      InputFormat inputFormat = getInputFormatFromCache(part
          .getInputFileFormatClass(), job);

      FileInputFormat.setInputPaths(newjob, dir);
      newjob.setInputFormat(inputFormat.getClass());
      ShimLoader.getHadoopShims().inputFormatValidateInput(inputFormat, newjob);
    }
  }
View Full Code Here

    for (Entry<Schema, List<Path>> schemaEntry :
        schemaPaths.entrySet()) {
      Schema schema = schemaEntry.getKey();
      System.out.println(schema);
      InputFormat format = (InputFormat) ReflectionUtils.newInstance(
         AvroInputFormat.class, conf);
      List<Path> paths = schemaEntry.getValue();

      Map<Class<? extends AvroMapper>, List<Path>> mapperPaths
          = new HashMap<Class<? extends AvroMapper>, List<Path>>();

      // Now, for each set of paths that have a common Schema, build
      // a map of Mappers to the paths they're used for
      for (Path path : paths) {
       Class<? extends AvroMapper> mapperClass = mapperMap.get(path);
       if (!mapperPaths.containsKey(mapperClass)) {
         mapperPaths.put(mapperClass, new LinkedList<Path>());
       }

       mapperPaths.get(mapperClass).add(path);
      }

      // Now each set of paths that has a common InputFormat and Mapper can
      // be added to the same job, and split together.
      for (Entry<Class<? extends AvroMapper>, List<Path>> mapEntry : mapperPaths
         .entrySet()) {
       paths = mapEntry.getValue();
       Class<? extends AvroMapper> mapperClass = mapEntry.getKey();

       if (mapperClass == null) {
         mapperClass = (Class<? extends AvroMapper>) conf.getMapperClass();
       }

       FileInputFormat.setInputPaths(confCopy, paths.toArray(new Path[paths
           .size()]));

       // Get splits for each input path and tag with InputFormat
       // and Mapper types by wrapping in a TaggedInputSplit.
       InputSplit[] pathSplits = format.getSplits(confCopy, numSplits);
       for (InputSplit pathSplit : pathSplits) {
         splits.add(new TaggedInputSplit(pathSplit, conf, format.getClass(),
             mapperClass, schema));
       }
      }
    }
View Full Code Here

          .getPartitionDescFromPathRecursively(pathToPartitionInfo, dir,
              IOPrepareCache.get().allocatePartitionDescMap(), true);
      // create a new InputFormat instance if this is the first time to see this
      // class
      Class inputFormatClass = part.getInputFileFormatClass();
      InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
      Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), newjob);

      FileInputFormat.setInputPaths(newjob, dir);
      newjob.setInputFormat(inputFormat.getClass());
      InputSplit[] iss = inputFormat.getSplits(newjob, numSplits / dirs.length);
      for (InputSplit is : iss) {
        result.add(new HiveInputSplit(is, inputFormatClass.getName()));
      }
    }
    return result.toArray(new HiveInputSplit[result.size()]);
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapred.InputFormat

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.