Package org.apache.hadoop.mapred

Examples of org.apache.hadoop.mapred.InputFormat


    try {
      inputFormatClass = Class.forName(inputFormatClassName);
    } catch (ClassNotFoundException e) {
      throw new IOException ("CombineHiveRecordReader: class not found " + inputFormatClassName);
    }
    InputFormat inputFormat = CombineHiveInputFormat.getInputFormatFromCache(inputFormatClass, job);
   
    // create a split for the given partition
    FileSplit fsplit = new FileSplit(hsplit.getPaths()[partition],
                                     hsplit.getStartOffsets()[partition],
                                     hsplit.getLengths()[partition],
                                     hsplit.getLocations());
   
    this.recordReader = inputFormat.getRecordReader(fsplit, job, reporter);
  }
View Full Code Here


    //clone a jobConf for setting needed columns for reading
    JobConf cloneJobConf = new JobConf(job);
    initColumnsNeeded(cloneJobConf, inputFormatClass, hsplit.getPath().toString(),
                      hsplit.getPath().toUri().getPath());

    InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, cloneJobConf);
    return new HiveRecordReader(inputFormat.getRecordReader(inputSplit,
        cloneJobConf, reporter));
  }
View Full Code Here

    // for each dir, get the InputFormat, and do getSplits.
    for(Path dir: dirs) {
      partitionDesc part = getPartitionDescFromPath(pathToPartitionInfo, dir);
      // create a new InputFormat instance if this is the first time to see this class
      Class inputFormatClass = part.getInputFileFormatClass();
      InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);

      FileInputFormat.setInputPaths(newjob, dir);
      newjob.setInputFormat(inputFormat.getClass());
      InputSplit[] iss = inputFormat.getSplits(newjob, numSplits/dirs.length);
      for(InputSplit is: iss) {
        result.add(new HiveInputSplit(is, inputFormatClass.getName()));
      }
    }
    return result.toArray(new HiveInputSplit[result.size()]);
View Full Code Here

    // for each dir, get the InputFormat, and do validateInput.
    for (Path dir: dirs) {
      partitionDesc part = getPartitionDescFromPath(pathToPartitionInfo, dir);
      // create a new InputFormat instance if this is the first time to see this class
      InputFormat inputFormat = getInputFormatFromCache(part.getInputFileFormatClass(), job);

      FileInputFormat.setInputPaths(newjob, dir);
      newjob.setInputFormat(inputFormat.getClass());
      ShimLoader.getHadoopShims().inputFormatValidateInput(inputFormat, newjob);
    }
  }
View Full Code Here

      }

      // Use HiveInputFormat if any of the paths is not splittable
      Class inputFormatClass = part.getInputFileFormatClass();
      String inputFormatClassName = inputFormatClass.getName();
      InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);

      // Since there is no easy way of knowing whether MAPREDUCE-1597 is present in the tree or not,
      // we use a configuration variable for the same
      if (this.mrwork != null && !this.mrwork.getHadoopSupportsSplittable()) {
        // The following code should be removed, once
View Full Code Here

              try {
                ContentSummary resultCs;

                Class<? extends InputFormat> inputFormatCls = partDesc
                    .getInputFileFormatClass();
                InputFormat inputFormatObj = HiveInputFormat.getInputFormatFromCache(
                    inputFormatCls, myJobConf);
                if (inputFormatObj instanceof ContentSummaryInputFormat) {
                  resultCs = ((ContentSummaryInputFormat) inputFormatObj).getContentSummary(p,
                      myJobConf);
                } else {
View Full Code Here

      } finally {
        writer.close();
      }

      // try splitting the file in a variety of sizes
      InputFormat format = new SequenceFileInputFormat();
      RecInt key = new RecInt();
      RecBuffer value = new RecBuffer();
      for (int i = 0; i < 3; i++) {
        int numSplits =
          random.nextInt(MAX_LENGTH/(SequenceFile.SYNC_INTERVAL/20))+1;
        InputSplit[] splits = format.getSplits(job, numSplits);

        // check each split
        BitSet bits = new BitSet(length);
        for (int j = 0; j < splits.length; j++) {
          RecordReader reader =
            format.getRecordReader(splits[j], job, reporter);
          try {
            int count = 0;
            while (reader.next(key, value)) {
              assertFalse("Key in multiple partitions.", bits.get(key.getData()));
              bits.set(key.getData());
View Full Code Here

      } finally {
        writer.close();
      }

      // try splitting the file in a variety of sizes
      InputFormat format = new SequenceFileInputFormat();
      RecInt key = new RecInt();
      RecBuffer value = new RecBuffer();
      for (int i = 0; i < 3; i++) {
        int numSplits =
          random.nextInt(MAX_LENGTH/(SequenceFile.SYNC_INTERVAL/20))+1;
        //LOG.info("splitting: requesting = " + numSplits);
        InputSplit[] splits = format.getSplits(job, numSplits);
        //LOG.info("splitting: got =        " + splits.length);

        // check each split
        BitSet bits = new BitSet(length);
        for (int j = 0; j < splits.length; j++) {
          RecordReader reader =
            format.getRecordReader(splits[j], job, reporter);
          try {
            int count = 0;
            while (reader.next(key, value)) {
              // if (bits.get(key.get())) {
              // LOG.info("splits["+j+"]="+splits[j]+" : " + key.get());
View Full Code Here

      inputFormatClass = job.getClassByName(inputFormatClassName);
    } catch (Exception e) {
      throw new IOException("cannot find class " + inputFormatClassName);
    }

    InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);

   
    if (this.mrwork == null)
      init(job);
    JobConf jobConf = new JobConf(job);
    ArrayList<String> aliases = new ArrayList<String>();
    Iterator<Entry<String, ArrayList<String>>> iterator = this.mrwork
        .getPathToAliases().entrySet().iterator();
    String splitPath = hsplit.getPath().toString();
    String splitPathWithNoSchema = hsplit.getPath().toUri().getPath();
    while (iterator.hasNext()) {
      Entry<String, ArrayList<String>> entry = iterator.next();
      String key = entry.getKey();
      if (splitPath.startsWith(key) || splitPathWithNoSchema.startsWith(key)) {
        ArrayList<String> list = entry.getValue();
        for (String val : list)
          aliases.add(val);
      }
    }
    for (String alias : aliases) {
      Operator<? extends Serializable> op = this.mrwork.getAliasToWork().get(
          alias);
      if (op instanceof TableScanOperator) {
        TableScanOperator tableScan = (TableScanOperator) op;
        ArrayList<Integer> list = tableScan.getNeededColumnIDs();
        if (list != null)
          HiveFileFormatUtils.setReadColumnIDs(jobConf, list);
        else
          HiveFileFormatUtils.setFullyReadColumns(jobConf);
      }
    }
    return new HiveRecordReader(inputFormat.getRecordReader(inputSplit,
        jobConf, reporter));
  }
View Full Code Here

    // for each dir, get the InputFormat, and do getSplits.
    for(Path dir: dirs) {
      tableDesc table = getTableDescFromPath(dir);
      // create a new InputFormat instance if this is the first time to see this class
      Class inputFormatClass = table.getInputFileFormatClass();
      InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);

      FileInputFormat.setInputPaths(newjob, dir);
      newjob.setInputFormat(inputFormat.getClass());
      InputSplit[] iss = inputFormat.getSplits(newjob, numSplits/dirs.length);
      for(InputSplit is: iss) {
        result.add(new HiveInputSplit(is, inputFormatClass.getName()));
      }
    }
    return result.toArray(new HiveInputSplit[result.size()]);
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapred.InputFormat

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.