Package org.apache.tez.mapreduce.protos.MRRuntimeProtos

Examples of org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto


  // This class should not be used by more than one test in a single run, since
  // the path it writes to is not dynamic.
  private static String RELOCALIZATION_TEST_CLASS_NAME = "AMClassloadTestDummyClass";
  public static class MRInputAMSplitGeneratorRelocalizationTest extends MRInputAMSplitGenerator {
    public List<Event> initialize(TezRootInputInitializerContext rootInputContextthrows Exception {
      MRInputUserPayloadProto userPayloadProto = MRHelpers
          .parseMRInputPayload(rootInputContext.getUserPayload());
      Configuration conf = MRHelpers.createConfFromByteString(userPayloadProto
          .getConfigurationBytes());

      try {
        RuntimeUtils.getClazz(RELOCALIZATION_TEST_CLASS_NAME);
        LOG.info("Class found");
View Full Code Here


      throws Exception {
    Stopwatch sw = null;
    if (LOG.isDebugEnabled()) {
      sw = new Stopwatch().start();
    }
    MRInputUserPayloadProto userPayloadProto = MRHelpers
        .parseMRInputPayload(rootInputContext.getUserPayload());
    if (LOG.isDebugEnabled()) {
      sw.stop();
      LOG.debug("Time to parse MRInput payload into prot: "
          + sw.elapsedMillis());
    }
    if (LOG.isDebugEnabled()) {
      sw.reset().start();
    }
    Configuration conf = MRHelpers.createConfFromByteString(userPayloadProto
        .getConfigurationBytes());
   
    sendSerializedEvents = conf.getBoolean(
        MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLAOD,
        MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLAOD_DEFAULT);
    LOG.info("Emitting serialized splits: " + sendSerializedEvents);
    if (LOG.isDebugEnabled()) {
      sw.stop();
      LOG.debug("Time converting ByteString to configuration: " + sw.elapsedMillis());
    }

    if (LOG.isDebugEnabled()) {
      sw.reset().start();
    }

    int totalResource = rootInputContext.getTotalAvailableResource().getMemory();
    int taskResource = rootInputContext.getVertexTaskResource().getMemory();
    float waves = conf.getFloat(
        TezConfiguration.TEZ_AM_GROUPING_SPLIT_WAVES,
        TezConfiguration.TEZ_AM_GROUPING_SPLIT_WAVES_DEFAULT);

    int numTasks = (int)((totalResource*waves)/taskResource);

    LOG.info("Input " + rootInputContext.getInputName() + " asking for " + numTasks
        + " tasks. Headroom: " + totalResource + " Task Resource: "
        + taskResource + " waves: " + waves);

    // Read all credentials into the credentials instance stored in JobConf.
    JobConf jobConf = new JobConf(conf);
    jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials());

    InputSplitInfoMem inputSplitInfo = null;
    String realInputFormatName = userPayloadProto.getInputFormatName();
    if ( realInputFormatName != null && !realInputFormatName.isEmpty()) {
      // split grouping on the AM
      if (jobConf.getUseNewMapper()) {
        LOG.info("Grouping mapreduce api input splits");
        Job job = Job.getInstance(jobConf);
View Full Code Here

  @Override
  public List<Event> initialize(TezInputContext inputContext) throws IOException {
    this.inputContext = inputContext;
    this.inputContext.requestInitialMemory(0l, null); //mandatory call
    this.inputContext.inputIsReady();
    MRInputUserPayloadProto mrUserPayload =
      MRHelpers.parseMRInputPayload(inputContext.getUserPayload());
    Preconditions.checkArgument(mrUserPayload.hasSplits() == false,
        "Split information not expected in MRInput");
    Configuration conf =
      MRHelpers.createConfFromByteString(mrUserPayload.getConfigurationBytes());
    this.jobConf = new JobConf(conf);
    // Add tokens to the jobConf - in case they are accessed within the RR / IF
    jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials());

    TaskAttemptID taskAttemptId = new TaskAttemptID(
View Full Code Here

    rootVertexInitialized = true;
    try {
      // This is using the payload from the RootVertexInitializer corresponding
      // to InputName. Ideally it should be using it's own configuration class - but that
      // means serializing another instance.
      MRInputUserPayloadProto protoPayload =
          MRHelpers.parseMRInputPayload(inputDescriptor.getUserPayload());
      this.conf = MRHelpers.createConfFromByteString(protoPayload.getConfigurationBytes());

      /*
       * Currently in tez, the flow of events is thus: "Generate Splits -> Initialize Vertex"
       * (with parallelism info obtained from the generate splits phase). The generate splits
       * phase groups splits using the TezGroupedSplitsInputFormat. However, for bucket map joins
       * the grouping done by this input format results in incorrect results as the grouper has no
       * knowledge of buckets. So, we initially set the input format to be HiveInputFormat
       * (in DagUtils) for the case of bucket map joins so as to obtain un-grouped splits.
       * We then group the splits corresponding to buckets using the tez grouper which returns
       * TezGroupedSplits.
       */

      if (conf.getBoolean(GROUP_SPLITS, true)) {
        // Changing the InputFormat - so that the correct one is initialized in MRInput.
        this.conf.set("mapred.input.format.class", TezGroupedSplitsInputFormat.class.getName());
        MRInputUserPayloadProto updatedPayload = MRInputUserPayloadProto
            .newBuilder(protoPayload)
            .setConfigurationBytes(MRHelpers.createByteStringFromConf(conf))
            .build();
        inputDescriptor.setUserPayload(updatedPayload.toByteArray());
      }
    } catch (IOException e) {
      e.printStackTrace();
      throw new RuntimeException(e);
    }
View Full Code Here

  public List<Event> initialize() throws Exception {
    InputInitializerContext rootInputContext = getContext();

    context = rootInputContext;

    MRInputUserPayloadProto userPayloadProto =
        MRInputHelpers.parseMRInputPayload(rootInputContext.getInputUserPayload());

    Configuration conf =
        TezUtils.createConfFromByteString(userPayloadProto.getConfigurationBytes());

    boolean sendSerializedEvents =
        conf.getBoolean("mapreduce.tez.input.initializer.serialize.event.payload", true);

    // Read all credentials into the credentials instance stored in JobConf.
    JobConf jobConf = new JobConf(conf);
    ShimLoader.getHadoopShims().getMergedCredentials(jobConf);

    MapWork work = Utilities.getMapWork(jobConf);

    // perform dynamic partition pruning
    pruner.prune(work, jobConf, context);

    InputSplitInfoMem inputSplitInfo = null;
    String realInputFormatName = conf.get("mapred.input.format.class");
    boolean groupingEnabled = userPayloadProto.getGroupingEnabled();
    if (groupingEnabled) {
      // Need to instantiate the realInputFormat
      InputFormat<?, ?> inputFormat =
          (InputFormat<?, ?>) ReflectionUtils.newInstance(Class.forName(realInputFormatName),
              jobConf);
View Full Code Here

    try {
      // This is using the payload from the RootVertexInitializer corresponding
      // to InputName. Ideally it should be using it's own configuration class -
      // but that
      // means serializing another instance.
      MRInputUserPayloadProto protoPayload =
          MRInputHelpers.parseMRInputPayload(inputDescriptor.getUserPayload());
      this.conf = TezUtils.createConfFromByteString(protoPayload.getConfigurationBytes());

      /*
       * Currently in tez, the flow of events is thus:
       * "Generate Splits -> Initialize Vertex" (with parallelism info obtained
       * from the generate splits phase). The generate splits phase groups
       * splits using the TezGroupedSplitsInputFormat. However, for bucket map
       * joins the grouping done by this input format results in incorrect
       * results as the grouper has no knowledge of buckets. So, we initially
       * set the input format to be HiveInputFormat (in DagUtils) for the case
       * of bucket map joins so as to obtain un-grouped splits. We then group
       * the splits corresponding to buckets using the tez grouper which returns
       * TezGroupedSplits.
       */

      // This assumes that Grouping will always be used.
      // Enabling grouping on the payload.
      MRInputUserPayloadProto updatedPayload =
          MRInputUserPayloadProto.newBuilder(protoPayload).setGroupingEnabled(true).build();
      inputDescriptor.setUserPayload(UserPayload.create(updatedPayload.toByteString().asReadOnlyByteBuffer()));
    } catch (IOException e) {
      e.printStackTrace();
      throw new RuntimeException(e);
    }

View Full Code Here

    rootVertexInitialized = true;
    try {
      // This is using the payload from the RootVertexInitializer corresponding
      // to InputName. Ideally it should be using it's own configuration class - but that
      // means serializing another instance.
      MRInputUserPayloadProto protoPayload =
          MRHelpers.parseMRInputPayload(inputDescriptor.getUserPayload());
      this.conf = MRHelpers.createConfFromByteString(protoPayload.getConfigurationBytes());

      /*
       * Currently in tez, the flow of events is thus: "Generate Splits -> Initialize Vertex"
       * (with parallelism info obtained from the generate splits phase). The generate splits
       * phase groups splits using the TezGroupedSplitsInputFormat. However, for bucket map joins
       * the grouping done by this input format results in incorrect results as the grouper has no
       * knowledge of buckets. So, we initially set the input format to be HiveInputFormat
       * (in DagUtils) for the case of bucket map joins so as to obtain un-grouped splits.
       * We then group the splits corresponding to buckets using the tez grouper which returns
       * TezGroupedSplits.
       */

      if (conf.getBoolean(GROUP_SPLITS, true)) {
        // Changing the InputFormat - so that the correct one is initialized in MRInput.
        this.conf.set("mapred.input.format.class", TezGroupedSplitsInputFormat.class.getName());
        MRInputUserPayloadProto updatedPayload = MRInputUserPayloadProto
            .newBuilder(protoPayload)
            .setConfigurationBytes(MRHelpers.createByteStringFromConf(conf))
            .build();
        inputDescriptor.setUserPayload(updatedPayload.toByteArray());
      }
    } catch (IOException e) {
      e.printStackTrace();
      throw new RuntimeException(e);
    }
View Full Code Here

    rootVertexInitialized = true;
    try {
      // This is using the payload from the RootVertexInitializer corresponding
      // to InputName. Ideally it should be using it's own configuration class - but that
      // means serializing another instance.
      MRInputUserPayloadProto protoPayload =
          MRHelpers.parseMRInputPayload(inputDescriptor.getUserPayload());
      this.conf = MRHelpers.createConfFromByteString(protoPayload.getConfigurationBytes());

      /*
       * Currently in tez, the flow of events is thus: "Generate Splits -> Initialize Vertex"
       * (with parallelism info obtained from the generate splits phase). The generate splits
       * phase groups splits using the TezGroupedSplitsInputFormat. However, for bucket map joins
       * the grouping done by this input format results in incorrect results as the grouper has no
       * knowledge of buckets. So, we initially set the input format to be HiveInputFormat
       * (in DagUtils) for the case of bucket map joins so as to obtain un-grouped splits.
       * We then group the splits corresponding to buckets using the tez grouper which returns
       * TezGroupedSplits.
       */

      // This assumes that Grouping will always be used.
      // Changing the InputFormat - so that the correct one is initialized in MRInput.
      this.conf.set("mapred.input.format.class", TezGroupedSplitsInputFormat.class.getName());
      MRInputUserPayloadProto updatedPayload = MRInputUserPayloadProto
          .newBuilder(protoPayload)
          .setConfigurationBytes(MRHelpers.createByteStringFromConf(conf))
          .build();
      inputDescriptor.setUserPayload(updatedPayload.toByteArray());
    } catch (IOException e) {
      e.printStackTrace();
      throw new RuntimeException(e);
    }
    boolean dataInformationEventSeen = false;
View Full Code Here

  // This class should not be used by more than one test in a single run, since
  // the path it writes to is not dynamic.
  private static String RELOCALIZATION_TEST_CLASS_NAME = "AMClassloadTestDummyClass";
  public static class MRInputAMSplitGeneratorRelocalizationTest extends MRInputAMSplitGenerator {
    public List<Event> initialize(TezRootInputInitializerContext rootInputContextthrows Exception {
      MRInputUserPayloadProto userPayloadProto = MRHelpers
          .parseMRInputPayload(rootInputContext.getUserPayload());
      Configuration conf = MRHelpers.createConfFromByteString(userPayloadProto
          .getConfigurationBytes());

      try {
        RuntimeUtils.getClazz(RELOCALIZATION_TEST_CLASS_NAME);
        LOG.info("Class found");
View Full Code Here

TOP

Related Classes of org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.