Package org.apache.hadoop.mapreduce

Examples of org.apache.hadoop.mapreduce.InputFormat


                        inputSpecificJob);
                // The above setLocation call could write to the conf within
                // the inputSpecificJob - use this updated conf
               
                // get the InputFormat from it and ask for splits
                InputFormat inpFormat = loadFunc.getInputFormat();
                List<InputSplit> oneInputSplits = inpFormat.getSplits(
                        HadoopShims.createJobContext(inputSpecificJob.getConfiguration(),
                                jobcontext.getJobID()));
                List<InputSplit> oneInputPigSplits = getPigSplits(
                        oneInputSplits, i, inpTargets.get(i),
                        HadoopShims.getDefaultBlockSize(fs, isFsPath? path: fs.getWorkingDirectory()),
View Full Code Here


    Map<Path, String> formatMap = PangoolMultipleInputs.getInputFormatMap(job);
    Map<Path, String> mapperMap = PangoolMultipleInputs.getInputProcessorFileMap(job);

    for(Map.Entry<Path, String> entry : formatMap.entrySet()) {
      FileInputFormat.setInputPaths(jobCopy, entry.getKey());
      InputFormat inputFormat = DCUtils.loadSerializedObjectInDC(conf, InputFormat.class,
          entry.getValue(), true);
      List<InputSplit> pathSplits = inputFormat.getSplits(jobCopy);
      for(InputSplit pathSplit : pathSplits) {
        splits.add(new TaggedInputSplit(pathSplit, conf, entry.getValue(), mapperMap
            .get(entry.getKey())));
      }
    }
View Full Code Here

    @SuppressWarnings("unchecked")
    public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler)
      throws IOException, ClassNotFoundException, InterruptedException {
      LinkedList<K> splits = new LinkedList<K>();
      Configuration conf = job.getConfiguration();
      final InputFormat inf =
        ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
      int numPartitions = job.getNumReduceTasks();
      K[] samples = sampler.getSample(inf, job);
      LOG.info("Using " + samples.length + " samples");
      RawComparator<K> comparator = (RawComparator<K>) job.getGroupingComparator();
View Full Code Here

   */
  @SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator
  public static <K,V> void writePartitionFile(Job job, Sampler<K,V> sampler)
      throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = job.getConfiguration();
    final InputFormat inf =
        ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
    int numPartitions = job.getNumReduceTasks();
    K[] samples = (K[])sampler.getSample(inf, job);
    LOG.info("Using " + samples.length + " samples");
    RawComparator<K> comparator =
View Full Code Here

  @SuppressWarnings("unchecked")
  // getInputFormat, getOutputKeyComparator
  public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler) throws IOException, ClassNotFoundException, InterruptedException {
    log.debug("writePartitionFile({},{})", job, sampler);
    Configuration conf = job.getConfiguration();
    @SuppressWarnings("rawtypes")
    final InputFormat inf = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
    int numPartitions = job.getNumReduceTasks() / 9;
    log.debug("Number of partitions is {} for each index", numPartitions);
    K[] samples = sampler.getSample(inf, job);
    log.info("Using " + samples.length + " samples");
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapreduce.InputFormat

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.