Examples of MRInputUserPayloadProto

org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto

Examples of org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto

  // This class should not be used by more than one test in a single run, since
  // the path it writes to is not dynamic.
  private static String RELOCALIZATION_TEST_CLASS_NAME = "AMClassloadTestDummyClass";
  public static class MRInputAMSplitGeneratorRelocalizationTest extends MRInputAMSplitGenerator {
    public List<Event> initialize(TezRootInputInitializerContext rootInputContext)  throws Exception {
      MRInputUserPayloadProto userPayloadProto = MRHelpers
          .parseMRInputPayload(rootInputContext.getUserPayload());
      Configuration conf = MRHelpers.createConfFromByteString(userPayloadProto
          .getConfigurationBytes());


      try {
        RuntimeUtils.getClazz(RELOCALIZATION_TEST_CLASS_NAME);
        LOG.info("Class found");

View Full Code Here

Examples of org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto

      throws Exception {
    Stopwatch sw = null;
    if (LOG.isDebugEnabled()) {
      sw = new Stopwatch().start();
    }
    MRInputUserPayloadProto userPayloadProto = MRHelpers
        .parseMRInputPayload(rootInputContext.getUserPayload());
    if (LOG.isDebugEnabled()) {
      sw.stop();
      LOG.debug("Time to parse MRInput payload into prot: "
          + sw.elapsedMillis());
    }
    if (LOG.isDebugEnabled()) {
      sw.reset().start();
    }
    Configuration conf = MRHelpers.createConfFromByteString(userPayloadProto
        .getConfigurationBytes());
    
    sendSerializedEvents = conf.getBoolean(
        MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLAOD,
        MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLAOD_DEFAULT);
    LOG.info("Emitting serialized splits: " + sendSerializedEvents);
    if (LOG.isDebugEnabled()) {
      sw.stop();
      LOG.debug("Time converting ByteString to configuration: " + sw.elapsedMillis());
    }


    if (LOG.isDebugEnabled()) {
      sw.reset().start();
    }


    int totalResource = rootInputContext.getTotalAvailableResource().getMemory();
    int taskResource = rootInputContext.getVertexTaskResource().getMemory();
    float waves = conf.getFloat(
        TezConfiguration.TEZ_AM_GROUPING_SPLIT_WAVES,
        TezConfiguration.TEZ_AM_GROUPING_SPLIT_WAVES_DEFAULT);


    int numTasks = (int)((totalResource*waves)/taskResource);


    LOG.info("Input " + rootInputContext.getInputName() + " asking for " + numTasks
        + " tasks. Headroom: " + totalResource + " Task Resource: "
        + taskResource + " waves: " + waves);


    // Read all credentials into the credentials instance stored in JobConf.
    JobConf jobConf = new JobConf(conf);
    jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials());


    InputSplitInfoMem inputSplitInfo = null;
    String realInputFormatName = userPayloadProto.getInputFormatName(); 
    if ( realInputFormatName != null && !realInputFormatName.isEmpty()) {
      // split grouping on the AM
      if (jobConf.getUseNewMapper()) {
        LOG.info("Grouping mapreduce api input splits");
        Job job = Job.getInstance(jobConf);

View Full Code Here

Examples of org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto

  @Override
  public List<Event> initialize(TezInputContext inputContext) throws IOException {
    this.inputContext = inputContext;
    this.inputContext.requestInitialMemory(0l, null); //mandatory call
    this.inputContext.inputIsReady();
    MRInputUserPayloadProto mrUserPayload =
      MRHelpers.parseMRInputPayload(inputContext.getUserPayload());
    Preconditions.checkArgument(mrUserPayload.hasSplits() == false,
        "Split information not expected in MRInput");
    Configuration conf =
      MRHelpers.createConfFromByteString(mrUserPayload.getConfigurationBytes());
    this.jobConf = new JobConf(conf);
    // Add tokens to the jobConf - in case they are accessed within the RR / IF
    jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials());


    TaskAttemptID taskAttemptId = new TaskAttemptID(

View Full Code Here

Examples of org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto

    rootVertexInitialized = true;
    try {
      // This is using the payload from the RootVertexInitializer corresponding
      // to InputName. Ideally it should be using it's own configuration class - but that
      // means serializing another instance.
      MRInputUserPayloadProto protoPayload = 
          MRHelpers.parseMRInputPayload(inputDescriptor.getUserPayload());
      this.conf = MRHelpers.createConfFromByteString(protoPayload.getConfigurationBytes());


      /*
       * Currently in tez, the flow of events is thus: "Generate Splits -> Initialize Vertex"
       * (with parallelism info obtained from the generate splits phase). The generate splits
       * phase groups splits using the TezGroupedSplitsInputFormat. However, for bucket map joins
       * the grouping done by this input format results in incorrect results as the grouper has no
       * knowledge of buckets. So, we initially set the input format to be HiveInputFormat
       * (in DagUtils) for the case of bucket map joins so as to obtain un-grouped splits.
       * We then group the splits corresponding to buckets using the tez grouper which returns
       * TezGroupedSplits.
       */


      if (conf.getBoolean(GROUP_SPLITS, true)) {
        // Changing the InputFormat - so that the correct one is initialized in MRInput.
        this.conf.set("mapred.input.format.class", TezGroupedSplitsInputFormat.class.getName());
        MRInputUserPayloadProto updatedPayload = MRInputUserPayloadProto
            .newBuilder(protoPayload)
            .setConfigurationBytes(MRHelpers.createByteStringFromConf(conf))
            .build();
        inputDescriptor.setUserPayload(updatedPayload.toByteArray());
      }
    } catch (IOException e) {
      e.printStackTrace();
      throw new RuntimeException(e);
    }

View Full Code Here

Examples of org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto

  public List<Event> initialize() throws Exception {
    InputInitializerContext rootInputContext = getContext();


    context = rootInputContext;


    MRInputUserPayloadProto userPayloadProto =
        MRInputHelpers.parseMRInputPayload(rootInputContext.getInputUserPayload());


    Configuration conf =
        TezUtils.createConfFromByteString(userPayloadProto.getConfigurationBytes());


    boolean sendSerializedEvents =
        conf.getBoolean("mapreduce.tez.input.initializer.serialize.event.payload", true);


    // Read all credentials into the credentials instance stored in JobConf.
    JobConf jobConf = new JobConf(conf);
    ShimLoader.getHadoopShims().getMergedCredentials(jobConf);


    MapWork work = Utilities.getMapWork(jobConf);


    // perform dynamic partition pruning
    pruner.prune(work, jobConf, context);


    InputSplitInfoMem inputSplitInfo = null;
    String realInputFormatName = conf.get("mapred.input.format.class");
    boolean groupingEnabled = userPayloadProto.getGroupingEnabled();
    if (groupingEnabled) {
      // Need to instantiate the realInputFormat
      InputFormat<?, ?> inputFormat =
          (InputFormat<?, ?>) ReflectionUtils.newInstance(Class.forName(realInputFormatName),
              jobConf);

View Full Code Here

Examples of org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto

    try {
      // This is using the payload from the RootVertexInitializer corresponding
      // to InputName. Ideally it should be using it's own configuration class -
      // but that
      // means serializing another instance.
      MRInputUserPayloadProto protoPayload =
          MRInputHelpers.parseMRInputPayload(inputDescriptor.getUserPayload());
      this.conf = TezUtils.createConfFromByteString(protoPayload.getConfigurationBytes());


      /*
       * Currently in tez, the flow of events is thus:
       * "Generate Splits -> Initialize Vertex" (with parallelism info obtained
       * from the generate splits phase). The generate splits phase groups
       * splits using the TezGroupedSplitsInputFormat. However, for bucket map
       * joins the grouping done by this input format results in incorrect
       * results as the grouper has no knowledge of buckets. So, we initially
       * set the input format to be HiveInputFormat (in DagUtils) for the case
       * of bucket map joins so as to obtain un-grouped splits. We then group
       * the splits corresponding to buckets using the tez grouper which returns
       * TezGroupedSplits.
       */


      // This assumes that Grouping will always be used.
      // Enabling grouping on the payload.
      MRInputUserPayloadProto updatedPayload =
          MRInputUserPayloadProto.newBuilder(protoPayload).setGroupingEnabled(true).build();
      inputDescriptor.setUserPayload(UserPayload.create(updatedPayload.toByteString().asReadOnlyByteBuffer()));
    } catch (IOException e) {
      e.printStackTrace();
      throw new RuntimeException(e);
    }

View Full Code Here

Examples of org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto

    rootVertexInitialized = true;
    try {
      // This is using the payload from the RootVertexInitializer corresponding
      // to InputName. Ideally it should be using it's own configuration class - but that
      // means serializing another instance.
      MRInputUserPayloadProto protoPayload = 
          MRHelpers.parseMRInputPayload(inputDescriptor.getUserPayload());
      this.conf = MRHelpers.createConfFromByteString(protoPayload.getConfigurationBytes());


      /*
       * Currently in tez, the flow of events is thus: "Generate Splits -> Initialize Vertex"
       * (with parallelism info obtained from the generate splits phase). The generate splits
       * phase groups splits using the TezGroupedSplitsInputFormat. However, for bucket map joins
       * the grouping done by this input format results in incorrect results as the grouper has no
       * knowledge of buckets. So, we initially set the input format to be HiveInputFormat
       * (in DagUtils) for the case of bucket map joins so as to obtain un-grouped splits.
       * We then group the splits corresponding to buckets using the tez grouper which returns
       * TezGroupedSplits.
       */


      if (conf.getBoolean(GROUP_SPLITS, true)) {
        // Changing the InputFormat - so that the correct one is initialized in MRInput.
        this.conf.set("mapred.input.format.class", TezGroupedSplitsInputFormat.class.getName());
        MRInputUserPayloadProto updatedPayload = MRInputUserPayloadProto
            .newBuilder(protoPayload)
            .setConfigurationBytes(MRHelpers.createByteStringFromConf(conf))
            .build();
        inputDescriptor.setUserPayload(updatedPayload.toByteArray());
      }
    } catch (IOException e) {
      e.printStackTrace();
      throw new RuntimeException(e);
    }

View Full Code Here

Examples of org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto

    rootVertexInitialized = true;
    try {
      // This is using the payload from the RootVertexInitializer corresponding
      // to InputName. Ideally it should be using it's own configuration class - but that
      // means serializing another instance.
      MRInputUserPayloadProto protoPayload = 
          MRHelpers.parseMRInputPayload(inputDescriptor.getUserPayload());
      this.conf = MRHelpers.createConfFromByteString(protoPayload.getConfigurationBytes());


      /*
       * Currently in tez, the flow of events is thus: "Generate Splits -> Initialize Vertex"
       * (with parallelism info obtained from the generate splits phase). The generate splits
       * phase groups splits using the TezGroupedSplitsInputFormat. However, for bucket map joins
       * the grouping done by this input format results in incorrect results as the grouper has no
       * knowledge of buckets. So, we initially set the input format to be HiveInputFormat
       * (in DagUtils) for the case of bucket map joins so as to obtain un-grouped splits.
       * We then group the splits corresponding to buckets using the tez grouper which returns
       * TezGroupedSplits.
       */


      // This assumes that Grouping will always be used. 
      // Changing the InputFormat - so that the correct one is initialized in MRInput.
      this.conf.set("mapred.input.format.class", TezGroupedSplitsInputFormat.class.getName());
      MRInputUserPayloadProto updatedPayload = MRInputUserPayloadProto
          .newBuilder(protoPayload)
          .setConfigurationBytes(MRHelpers.createByteStringFromConf(conf))
          .build();
      inputDescriptor.setUserPayload(updatedPayload.toByteArray());
    } catch (IOException e) {
      e.printStackTrace();
      throw new RuntimeException(e);
    }
    boolean dataInformationEventSeen = false;

View Full Code Here

Examples of org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto

  // This class should not be used by more than one test in a single run, since
  // the path it writes to is not dynamic.
  private static String RELOCALIZATION_TEST_CLASS_NAME = "AMClassloadTestDummyClass";
  public static class MRInputAMSplitGeneratorRelocalizationTest extends MRInputAMSplitGenerator {
    public List<Event> initialize(TezRootInputInitializerContext rootInputContext)  throws Exception {
      MRInputUserPayloadProto userPayloadProto = MRHelpers
          .parseMRInputPayload(rootInputContext.getUserPayload());
      Configuration conf = MRHelpers.createConfFromByteString(userPayloadProto
          .getConfigurationBytes());


      try {
        RuntimeUtils.getClazz(RELOCALIZATION_TEST_CLASS_NAME);
        LOG.info("Class found");

View Full Code Here

0 1

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.