Examples of org.apache.hadoop.mapreduce.InputSplit

org.apache.hadoop.mapreduce.InputSplit
InputSplit represents the data to be processed by an individual {@link Mapper}.
Typically, it presents a byte-oriented view on the input and is the responsibility of {@link RecordReader} of the job to process this and presenta record-oriented view. @see InputFormat @see RecordReader

     * @throws IllegalAccessException
     */
    private VertexEdgeCount loadVerticesFromInputSplit(String inputSplitPath)
        throws IOException, ClassNotFoundException, InterruptedException,
               InstantiationException, IllegalAccessException {
        InputSplit inputSplit = getInputSplitForVertices(inputSplitPath);
        VertexEdgeCount vertexEdgeCount =
            readVerticesFromInputSplit(inputSplit);
        if (LOG.isInfoEnabled()) {
            LOG.info("loadVerticesFromInputSplit: Finished loading " +
                     inputSplitPath + " " + vertexEdgeCount);

View Full Code Here

        getContext().progress();


        DataInputStream inputStream =
            new DataInputStream(new ByteArrayInputStream(splitList));
        String inputSplitClass = Text.readString(inputStream);
        InputSplit inputSplit = (InputSplit)
            ReflectionUtils.newInstance(
                getConfiguration().getClassByName(inputSplitClass),
                getConfiguration());
        ((Writable) inputSplit).readFields(inputStream);


        if (LOG.isInfoEnabled()) {
            LOG.info("getInputSplitForVertices: Reserved " + inputSplitPath +
                 " from ZooKeeper and got input split '" +
                 inputSplit.toString() + "'");
        }
        return inputSplit;
    }

View Full Code Here

    }
    ExecutorService taskExecutor =
        Executors.newFixedThreadPool(inputSplitThreadCount);
    boolean writeLocations = USE_INPUT_SPLIT_LOCALITY.get(conf);
    for (int i = 0; i < splitList.size(); ++i) {
      InputSplit inputSplit = splitList.get(i);
      taskExecutor.submit(new LogStacktraceCallable<Void>(
          new WriteInputSplit(inputSplit, inputSplitsPath, i, writeLocations)));
    }
    taskExecutor.shutdown();
    ProgressableUtils.awaitExecutorTermination(taskExecutor, getContext());

View Full Code Here

      HRegionLocation regLoc = table.getRegionLocation(HConstants.EMPTY_BYTE_ARRAY, false);
      if (null == regLoc) {
        throw new IOException("Expecting at least one region.");
      }
      List<InputSplit> splits = new ArrayList<InputSplit>(1);
      InputSplit split = new TableSplit(table.getName(),
          HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY, regLoc
              .getHostnamePort().split(Addressing.HOSTNAME_PORT_SEPARATOR)[0]);
      splits.add(split);
      return splits;
    }
    List<InputSplit> splits = new ArrayList<InputSplit>(keys.getFirst().length);
    for (int i = 0; i < keys.getFirst().length; i++) {
      if ( !includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
        continue;
      }
      HRegionLocation location = table.getRegionLocation(keys.getFirst()[i], false);
      // The below InetSocketAddress creation does a name resolution.
      InetSocketAddress isa = new InetSocketAddress(location.getHostname(), location.getPort());
      if (isa.isUnresolved()) {
        LOG.warn("Failed resolve " + isa);
      }
      InetAddress regionAddress = isa.getAddress();
      String regionLocation;
      try {
        regionLocation = reverseDNS(regionAddress);
      } catch (NamingException e) {
        LOG.error("Cannot resolve the host name for " + regionAddress + " because of " + e);
        regionLocation = location.getHostname();
      }


      byte[] startRow = scan.getStartRow();
      byte[] stopRow = scan.getStopRow();
      // determine if the given start an stop key fall into the region
      if ((startRow.length == 0 || keys.getSecond()[i].length == 0 ||
          Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) &&
          (stopRow.length == 0 ||
           Bytes.compareTo(stopRow, keys.getFirst()[i]) > 0)) {
        byte[] splitStart = startRow.length == 0 ||
          Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ?
            keys.getFirst()[i] : startRow;
        byte[] splitStop = (stopRow.length == 0 ||
          Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0) &&
          keys.getSecond()[i].length > 0 ?
            keys.getSecond()[i] : stopRow;
        InputSplit split = new TableSplit(table.getName(),
          splitStart, splitStop, regionLocation);
        splits.add(split);
        if (LOG.isDebugEnabled()) {
          LOG.debug("getSplits: split -> " + i + " -> " + split);
        }

View Full Code Here


    List<InputSplit> splits = aif.getSplits(job);


    Assert.assertEquals(1, splits.size());


    InputSplit split = splits.get(0);


    Assert.assertEquals(RangeInputSplit.class, split.getClass());


    RangeInputSplit risplit = (RangeInputSplit) split;


    Assert.assertEquals(username, risplit.getPrincipal());
    Assert.assertEquals(table, risplit.getTable());

View Full Code Here

      long seed = r.nextLong();
      r.setSeed(seed);
      LOG.debug("seed: " + seed);
      // shuffle splits
      for (int i = 0; i < splits.size(); ++i) {
        InputSplit tmp = splits.get(i);
        int j = r.nextInt(splits.size());
        splits.set(i, splits.get(j));
        splits.set(j, tmp);
      }
      // our target rate is in terms of the maximum number of sample splits,

View Full Code Here

            keys.getFirst()[i] : startRow;
        byte[] splitStop = (stopRow.length == 0 ||
          Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0) &&
          keys.getSecond()[i].length > 0 ?
            keys.getSecond()[i] : stopRow;
        InputSplit split = new TableSplit(table.getTableName(),
          splitStart, splitStop, regionLocation);
        splits.add(split);
        if (LOG.isDebugEnabled())
          LOG.debug("getSplits: split -> " + (count++) + " -> " + split);
      }

View Full Code Here

                    .getFirst()[i] : startRow;
            byte[] splitStop =
                (stopRow.length == 0 || Bytes.compareTo(keys.getSecond()[i],
                    stopRow) <= 0) && keys.getSecond()[i].length > 0 ? keys
                    .getSecond()[i] : stopRow;
            InputSplit split =
                new TableSplit(table.getName(),
                    scan, splitStart, splitStop, regionLocation);
            splits.add(split);
            if (LOG.isDebugEnabled())
              LOG.debug("getSplits: split -> " + (count++) + " -> " + split);

View Full Code Here

  private VertexEdgeCount loadInputSplit(
      String inputSplitPath,
      GraphState<I, V, E, M> graphState)
    throws IOException, ClassNotFoundException, InterruptedException,
      InstantiationException, IllegalAccessException {
    InputSplit inputSplit = getInputSplit(inputSplitPath);
    VertexEdgeCount vertexEdgeCount =
        readInputSplit(inputSplit, graphState);
    if (LOG.isInfoEnabled()) {
      LOG.info("loadFromInputSplit: Finished loading " +
          inputSplitPath + " " + vertexEdgeCount);

View Full Code Here

        new DataInputStream(new ByteArrayInputStream(splitList));
    if (useLocality) {
      Text.readString(inputStream); // location data unused here, skip
    }
    String inputSplitClass = Text.readString(inputStream);
    InputSplit inputSplit = (InputSplit)
        ReflectionUtils.newInstance(
            configuration.getClassByName(inputSplitClass),
            configuration);
    ((Writable) inputSplit).readFields(inputStream);


    if (LOG.isInfoEnabled()) {
      LOG.info("getInputSplit: Reserved " + inputSplitPath +
          " from ZooKeeper and got input split '" +
          inputSplit.toString() + "'");
    }
    return inputSplit;
  }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.mapreduce.InputSplit

bulkimport.BulkImportJobExample$VerboseInputSampler$VerboseRandomSampler

co.gridport.kafka.hadoop.KafkaInputFormat

co.nubetech.hiho.merge.MergeKeyMapper

co.nubetech.hiho.merge.MergeValueMapper

com.alimama.mdrill.index.IndexMapper

com.asakusafw.runtime.stage.input.DefaultSplitCombinerTest

com.asakusafw.runtime.stage.input.StageInputSplit

com.asakusafw.testdriver.file.FileInputFormatDriver

com.facebook.giraph.hive.impl.input.benchmark.InputBenchmark

com.facebook.giraph.hive.input.benchmark.InputBenchmark

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.