Package org.apache.hadoop.mapred

Examples of org.apache.hadoop.mapred.InputFormat


    // for each dir, get the InputFormat, and do validateInput.
    for (Path dir: dirs) {
      tableDesc table = getTableDescFromPath(dir);
      // create a new InputFormat instance if this is the first time to see this class
      InputFormat inputFormat = getInputFormatFromCache(table.getInputFileFormatClass(), job);

      FileInputFormat.setInputPaths(newjob, dir);
      newjob.setInputFormat(inputFormat.getClass());
      ShimLoader.getHadoopShims().inputFormatValidateInput(inputFormat, newjob);
    }
  }
View Full Code Here


              try {
                ContentSummary resultCs;

                Class<? extends InputFormat> inputFormatCls = partDesc
                    .getInputFileFormatClass();
                InputFormat inputFormatObj = HiveInputFormat.getInputFormatFromCache(
                    inputFormatCls, myJobConf);
                if (inputFormatObj instanceof ContentSummaryInputFormat) {
                  resultCs = ((ContentSummaryInputFormat) inputFormatObj).getContentSummary(p,
                      myJobConf);
                } else {
View Full Code Here

      inputFormatClass = Class.forName(inputFormatClassName);
    } catch (ClassNotFoundException e) {
      throw new IOException("CombineHiveRecordReader: class not found "
          + inputFormatClassName);
    }
    InputFormat inputFormat = HiveInputFormat.getInputFormatFromCache(
        inputFormatClass, jobConf);

    // create a split for the given partition
    FileSplit fsplit = new FileSplit(hsplit.getPaths()[partition], hsplit
        .getStartOffsets()[partition], hsplit.getLengths()[partition], hsplit
        .getLocations());

    this.setRecordReader(inputFormat.getRecordReader(fsplit, jobConf, reporter));

    this.initIOContext(fsplit, jobConf, inputFormatClass, this.recordReader);
  }
View Full Code Here

    }

    pushProjectionsAndFilters(cloneJobConf, inputFormatClass, hsplit.getPath()
      .toString(), hsplit.getPath().toUri().getPath(), nonNative);

    InputFormat inputFormat = getInputFormatFromCache(inputFormatClass,
        cloneJobConf);
    RecordReader innerReader = null;
    try {
      innerReader = inputFormat.getRecordReader(inputSplit,
        cloneJobConf, reporter);
    } catch (Exception e) {
      innerReader = HiveIOExceptionHandlerUtil
          .handleRecordReaderCreationException(e, cloneJobConf);
    }
View Full Code Here

    for (Path dir : dirs) {
      PartitionDesc part = getPartitionDescFromPath(pathToPartitionInfo, dir);
      // create a new InputFormat instance if this is the first time to see this
      // class
      Class inputFormatClass = part.getInputFileFormatClass();
      InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
      Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), newjob);

      // Make filter pushdown information available to getSplits.
      ArrayList<String> aliases =
        mrwork.getPathToAliases().get(dir.toUri().toString());
      if ((aliases != null) && (aliases.size() == 1)) {
        Operator op = mrwork.getAliasToWork().get(aliases.get(0));
        if ((op != null) && (op instanceof TableScanOperator)) {
          TableScanOperator tableScan = (TableScanOperator) op;
          pushFilters(newjob, tableScan);
        }
      }

      FileInputFormat.setInputPaths(newjob, dir);
      newjob.setInputFormat(inputFormat.getClass());
      InputSplit[] iss = inputFormat.getSplits(newjob, numSplits / dirs.length);
      for (InputSplit is : iss) {
        result.add(new HiveInputSplit(is, inputFormatClass.getName()));
      }
    }
View Full Code Here

    // for each dir, get the InputFormat, and do validateInput.
    for (Path dir : dirs) {
      PartitionDesc part = getPartitionDescFromPath(pathToPartitionInfo, dir);
      // create a new InputFormat instance if this is the first time to see this
      // class
      InputFormat inputFormat = getInputFormatFromCache(part
          .getInputFileFormatClass(), job);

      FileInputFormat.setInputPaths(newjob, dir);
      newjob.setInputFormat(inputFormat.getClass());
      ShimLoader.getHadoopShims().inputFormatValidateInput(inputFormat, newjob);
    }
  }
View Full Code Here

    // clone a jobConf for setting needed columns for reading
    JobConf cloneJobConf = new JobConf(job);
    pushProjectionsAndFilters(cloneJobConf, inputFormatClass, hsplit.getPath()
        .toString(), hsplit.getPath().toUri().getPath());

    InputFormat inputFormat = getInputFormatFromCache(inputFormatClass,
        cloneJobConf);
    BucketizedHiveRecordReader<K, V> rr= new BucketizedHiveRecordReader(inputFormat, hsplit, cloneJobConf,
        reporter);
    rr.initIOContext(hsplit, cloneJobConf, inputFormatClass);
    return rr;
View Full Code Here

    for (Path dir : dirs) {
      PartitionDesc part = getPartitionDescFromPath(pathToPartitionInfo, dir);
      // create a new InputFormat instance if this is the first time to see this
      // class
      Class inputFormatClass = part.getInputFileFormatClass();
      InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
      newjob.setInputFormat(inputFormat.getClass());

      FileStatus[] listStatus = listStatus(newjob, dir);

      for (FileStatus status : listStatus) {
        LOG.info("block size: " + status.getBlockSize());
        LOG.info("file length: " + status.getLen());
        FileInputFormat.setInputPaths(newjob, status.getPath());
        InputSplit[] iss = inputFormat.getSplits(newjob, 0);
        if (iss != null && iss.length > 0) {
          numOrigSplits += iss.length;
          result.add(new BucketizedHiveInputSplit(iss, inputFormatClass
              .getName()));
        }
View Full Code Here

      }

      // Use HiveInputFormat if any of the paths is not splittable
      Class inputFormatClass = part.getInputFileFormatClass();
      String inputFormatClassName = inputFormatClass.getName();
      InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);

      // Since there is no easy way of knowing whether MAPREDUCE-1597 is present in the tree or not,
      // we use a configuration variable for the same
      if (this.mrwork != null && !this.mrwork.getHadoopSupportsSplittable()) {
        // The following code should be removed, once
View Full Code Here

 
  @SuppressWarnings({ "rawtypes", "unchecked" })
  @Test(timeout=10000)
  public void testGroupedSplitSize() throws IOException {
    JobConf job = new JobConf(defaultConf);
    InputFormat mockWrappedFormat = mock(InputFormat.class);
    TezGroupedSplitsInputFormat<LongWritable , Text> format =
        new TezGroupedSplitsInputFormat<LongWritable, Text>();
    format.setConf(job);
    format.setInputFormat(mockWrappedFormat);
   
    job = (JobConf) TezMapReduceSplitsGrouper.createConfigBuilder(job)
        .setGroupingSplitSize(50*1000*1000l, 500*1000*1000l)
        .build();
    InputSplit mockSplit1 = mock(InputSplit.class);
    when(mockSplit1.getLength()).thenReturn(10*1000*1000l);
    when(mockSplit1.getLocations()).thenReturn(null);
    int numSplits = 100;
    InputSplit[] mockSplits = new InputSplit[numSplits];
    for (int i=0; i<numSplits; i++) {
      mockSplits[i] = mockSplit1;
    }
    when(mockWrappedFormat.getSplits((JobConf)anyObject(), anyInt())).thenReturn(mockSplits);
   
    // desired splits not set. We end up choosing min/max split size based on
    // total data and num original splits. In this case, min size will be hit
    InputSplit[] splits = format.getSplits(job, 0);
    Assert.assertEquals(25, splits.length);
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapred.InputFormat

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.