Examples of org.apache.hadoop.mapreduce.InputSplit

org.apache.hadoop.mapreduce.InputSplit
InputSplit represents the data to be processed by an individual {@link Mapper}.
Typically, it presents a byte-oriented view on the input and is the responsibility of {@link RecordReader} of the job to process this and presenta record-oriented view. @see InputFormat @see RecordReader

            keys.getFirst()[i] : startRow;
        byte[] splitStop = (stopRow.length == 0 ||
          Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0) &&
          keys.getSecond()[i].length > 0 ?
            keys.getSecond()[i] : stopRow;
        InputSplit split = new TableSplit(table.getTableName(),
          splitStart, splitStop, regionLocation);
        splits.add(split);
        if (LOG.isDebugEnabled())
          LOG.debug("getSplits: split -> " + (count++) + " -> " + split);
      }

View Full Code Here

    HCatSchema schema = buildHiveSchema();
    RCFileInputDriver sd = new RCFileInputDriver();
    JobContext jc = new JobContext(conf, new JobID());
    sd.setInputPath(jc, file.toString());
    InputFormat<?,?> iF = sd.getInputFormat(null);
    InputSplit split = iF.getSplits(jc).get(0);
    sd.setOriginalSchema(jc, schema);
    sd.setOutputSchema(jc, schema);
    sd.initialize(jc, getProps());


    TaskAttemptContext tac = new TaskAttemptContext(conf, new TaskAttemptID());

View Full Code Here


    RCFileInputDriver sd = new RCFileInputDriver();
    JobContext jc = new JobContext(conf, new JobID());
    sd.setInputPath(jc, file.toString());
    InputFormat<?,?> iF = sd.getInputFormat(null);
    InputSplit split = iF.getSplits(jc).get(0);
    sd.setOriginalSchema(jc, buildHiveSchema());
    sd.setOutputSchema(jc, buildPrunedSchema());


    sd.initialize(jc, getProps());
    conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR,jc.getConfiguration().get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR));

View Full Code Here


    RCFileInputDriver sd = new RCFileInputDriver();
    JobContext jc = new JobContext(conf, new JobID());
    sd.setInputPath(jc, file.toString());
    InputFormat<?,?> iF = sd.getInputFormat(null);
    InputSplit split = iF.getSplits(jc).get(0);
    sd.setOriginalSchema(jc, buildHiveSchema());
    sd.setOutputSchema(jc, buildReorderedSchema());


    sd.initialize(jc, getProps());
    Map<String,String> map = new HashMap<String,String>(1);

View Full Code Here

    
    int expectedSplitCount = (int)(totLength/maxSize);
    Assert.assertEquals(expectedSplitCount, splits.size());
    HashMultiset<String> nodeSplits = HashMultiset.create();
    for(int i=0; i<expectedSplitCount; ++i) {
      InputSplit inSplit = splits.get(i);
      Assert.assertEquals(maxSize, inSplit.getLength());
      Assert.assertEquals(1, inSplit.getLocations().length);
      nodeSplits.add(inSplit.getLocations()[0]);
    }
    Assert.assertEquals(3, nodeSplits.count(locations[0]));
    Assert.assertEquals(3, nodeSplits.count(locations[1]));
  }

View Full Code Here

      LOG.info("splitting: got =        " + splits.size());


      // we should have a single split as the length is comfortably smaller than
      // the block size
      assertEquals("We got more than one splits!", 1, splits.size());
      InputSplit split = splits.get(0);
      assertEquals("It should be CombineFileSplit",
        CombineFileSplit.class, split.getClass());


      // check the split
      BitSet bits = new BitSet(length);
      LOG.debug("split= " + split);
      TaskAttemptContext context = MapReduceTestUtil.

View Full Code Here

      LOG.info("splitting: got =        " + splits.size());


      // we should have a single split as the length is comfortably smaller than
      // the block size
      assertEquals("We got more than one splits!", 1, splits.size());
      InputSplit split = splits.get(0);
      assertEquals("It should be CombineFileSplit",
        CombineFileSplit.class, split.getClass());


      // check the split
      BitSet bits = new BitSet(length);
      RecordReader<IntWritable,BytesWritable> reader =
        format.createRecordReader(split, context);

View Full Code Here

        List<List<InputSplit>> result = new ArrayList<List<InputSplit>>();
        List<Long> resultLengths = new ArrayList<Long>();
        long comparableSplitId = 0;
        
        int size = 0, nSplits = oneInputSplits.size();
        InputSplit lastSplit = null;
        int emptyCnt = 0;
        for (InputSplit split : oneInputSplits) {
            if (split.getLength() == 0) {
                emptyCnt++; 
                continue;

View Full Code Here

            try {
                ByteArrayOutputStream byteArrayOutputStream =
                    new ByteArrayOutputStream();
                DataOutput outputStream =
                    new DataOutputStream(byteArrayOutputStream);
                InputSplit inputSplit = splitList.get(i);
                Text.writeString(outputStream,
                                 inputSplit.getClass().getName());
                ((Writable) inputSplit).write(outputStream);
                inputSplitPath = INPUT_SPLIT_PATH + "/" + i;
                getZkExt().createExt(inputSplitPath,
                                     byteArrayOutputStream.toByteArray(),
                                     Ids.OPEN_ACL_UNSAFE,

View Full Code Here


    List<InputSplit> splits = aif.getSplits(job);


    Assert.assertEquals(1, splits.size());


    InputSplit split = splits.get(0);


    Assert.assertEquals(RangeInputSplit.class, split.getClass());


    RangeInputSplit risplit = (RangeInputSplit) split;


    Assert.assertEquals(username, risplit.getPrincipal());
    Assert.assertEquals(table, risplit.getTableName());

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.mapreduce.InputSplit

bulkimport.BulkImportJobExample$VerboseInputSampler$VerboseRandomSampler

co.gridport.kafka.hadoop.KafkaInputFormat

co.nubetech.hiho.merge.MergeKeyMapper

co.nubetech.hiho.merge.MergeValueMapper

com.alimama.mdrill.index.IndexMapper

com.asakusafw.runtime.stage.input.DefaultSplitCombinerTest

com.asakusafw.runtime.stage.input.StageInputSplit

com.asakusafw.testdriver.file.FileInputFormatDriver

com.facebook.giraph.hive.impl.input.benchmark.InputBenchmark

com.facebook.giraph.hive.input.benchmark.InputBenchmark

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.