Package org.apache.hadoop.yarn.server.resourcemanager

Source Code of org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary$SummaryBuilder

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.LinkedList;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.DataInputByteBuffer;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.ipc.RPCUtil;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationState;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.Recoverable;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRejectedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
import org.apache.hadoop.yarn.server.utils.BuilderUtils;

/**
* This class manages the list of applications for the resource manager.
*/
public class RMAppManager implements EventHandler<RMAppManagerEvent>,
                                        Recoverable {

  private static final Log LOG = LogFactory.getLog(RMAppManager.class);

  private int completedAppsMax = YarnConfiguration.DEFAULT_RM_MAX_COMPLETED_APPLICATIONS;
  private int globalMaxAppAttempts;
  private LinkedList<ApplicationId> completedApps = new LinkedList<ApplicationId>();

  private final RMContext rmContext;
  private final ApplicationMasterService masterService;
  private final YarnScheduler scheduler;
  private final ApplicationACLsManager applicationACLsManager;
  private Configuration conf;

  public RMAppManager(RMContext context,
      YarnScheduler scheduler, ApplicationMasterService masterService,
      ApplicationACLsManager applicationACLsManager, Configuration conf) {
    this.rmContext = context;
    this.scheduler = scheduler;
    this.masterService = masterService;
    this.applicationACLsManager = applicationACLsManager;
    this.conf = conf;
    setCompletedAppsMax(conf.getInt(
        YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS,
        YarnConfiguration.DEFAULT_RM_MAX_COMPLETED_APPLICATIONS));
    globalMaxAppAttempts = conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
        YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
  }

  /**
   *  This class is for logging the application summary.
   */
  static class ApplicationSummary {
    static final Log LOG = LogFactory.getLog(ApplicationSummary.class);

    // Escape sequences
    static final char EQUALS = '=';
    static final char[] charsToEscape =
      {StringUtils.COMMA, EQUALS, StringUtils.ESCAPE_CHAR};

    static class SummaryBuilder {
      final StringBuilder buffer = new StringBuilder();

      // A little optimization for a very common case
      SummaryBuilder add(String key, long value) {
        return _add(key, Long.toString(value));
      }

      <T> SummaryBuilder add(String key, T value) {
        return _add(key, StringUtils.escapeString(String.valueOf(value),
                    StringUtils.ESCAPE_CHAR, charsToEscape));
      }

      SummaryBuilder add(SummaryBuilder summary) {
        if (buffer.length() > 0) buffer.append(StringUtils.COMMA);
        buffer.append(summary.buffer);
        return this;
      }

      SummaryBuilder _add(String key, String value) {
        if (buffer.length() > 0) buffer.append(StringUtils.COMMA);
        buffer.append(key).append(EQUALS).append(value);
        return this;
      }

      @Override public String toString() {
        return buffer.toString();
      }
    }

    /**
     * create a summary of the application's runtime.
     *
     * @param app {@link RMApp} whose summary is to be created, cannot
     *            be <code>null</code>.
     */
    public static SummaryBuilder createAppSummary(RMApp app) {
      String trackingUrl = "N/A";
      String host = "N/A";
      RMAppAttempt attempt = app.getCurrentAppAttempt();
      if (attempt != null) {
        trackingUrl = attempt.getTrackingUrl();
        host = attempt.getHost();
      }
      SummaryBuilder summary = new SummaryBuilder()
          .add("appId", app.getApplicationId())
          .add("name", app.getName())
          .add("user", app.getUser())
          .add("queue", app.getQueue())
          .add("state", app.getState())
          .add("trackingUrl", trackingUrl)
          .add("appMasterHost", host)
          .add("startTime", app.getStartTime())
          .add("finishTime", app.getFinishTime())
          .add("finalStatus", app.getFinalApplicationStatus());
      return summary;
    }

    /**
     * Log a summary of the application's runtime.
     *
     * @param app {@link RMApp} whose summary is to be logged
     */
    public static void logAppSummary(RMApp app) {
      if (app != null) {
        LOG.info(createAppSummary(app));
      }
    }
  }

  protected synchronized void setCompletedAppsMax(int max) {
    this.completedAppsMax = max;
  }

  protected synchronized int getCompletedAppsListSize() {
    return this.completedApps.size();
  }

  protected synchronized void finishApplication(ApplicationId applicationId) {
    if (applicationId == null) {
      LOG.error("RMAppManager received completed appId of null, skipping");
    } else {
      // Inform the DelegationTokenRenewer
      if (UserGroupInformation.isSecurityEnabled()) {
        rmContext.getDelegationTokenRenewer().applicationFinished(applicationId);
      }
     
      completedApps.add(applicationId)
      writeAuditLog(applicationId);
    }
  }

  protected void writeAuditLog(ApplicationId appId) {
    RMApp app = rmContext.getRMApps().get(appId);
    String operation = "UNKONWN";
    boolean success = false;
    switch (app.getState()) {
      case FAILED:
        operation = AuditConstants.FINISH_FAILED_APP;
        break;
      case FINISHED:
        operation = AuditConstants.FINISH_SUCCESS_APP;
        success = true;
        break;
      case KILLED:
        operation = AuditConstants.FINISH_KILLED_APP;
        success = true;
        break;
      default:
    }
   
    if (success) {
      RMAuditLogger.logSuccess(app.getUser(), operation,
          "RMAppManager", app.getApplicationId());
    } else {
      StringBuilder diag = app.getDiagnostics();
      String msg = diag == null ? null : diag.toString();
      RMAuditLogger.logFailure(app.getUser(), operation, msg, "RMAppManager",
          "App failed with state: " + app.getState(), appId);
    }
  }

  /*
   * check to see if hit the limit for max # completed apps kept
   */
  protected synchronized void checkAppNumCompletedLimit() {
    while (completedApps.size() > this.completedAppsMax) {
      ApplicationId removeId = completedApps.remove()
      LOG.info("Application should be expired, max # apps"
          + " met. Removing app: " + removeId);
      rmContext.getRMApps().remove(removeId);
      this.applicationACLsManager.removeApplication(removeId);
    }
  }

  @SuppressWarnings("unchecked")
  protected void submitApplication(
      ApplicationSubmissionContext submissionContext, long submitTime,
      boolean isRecovered, String user) throws YarnException {
    ApplicationId applicationId = submissionContext.getApplicationId();

    // Validation of the ApplicationSubmissionContext needs to be completed
    // here. Only those fields that are dependent on RM's configuration are
    // checked here as they have to be validated whether they are part of new
    // submission or just being recovered.

    // Check whether AM resource requirements are within required limits
    if (!submissionContext.getUnmanagedAM()) {
      ResourceRequest amReq = BuilderUtils.newResourceRequest(
          RMAppAttemptImpl.AM_CONTAINER_PRIORITY, ResourceRequest.ANY,
          submissionContext.getResource(), 1);
      try {
        SchedulerUtils.validateResourceRequest(amReq,
            scheduler.getMaximumResourceCapability());
      } catch (InvalidResourceRequestException e) {
        LOG.warn("RM app submission failed in validating AM resource request"
            + " for application " + applicationId, e);
        throw e;
      }
    }

    // Create RMApp
    RMApp application =
        new RMAppImpl(applicationId, rmContext, this.conf,
            submissionContext.getApplicationName(), user,
            submissionContext.getQueue(),
            submissionContext, this.scheduler, this.masterService,
            submitTime, submissionContext.getApplicationType());

    // Concurrent app submissions with same applicationId will fail here
    // Concurrent app submissions with different applicationIds will not
    // influence each other
    if (rmContext.getRMApps().putIfAbsent(applicationId, application) !=
        null) {
      String message = "Application with id " + applicationId
          + " is already present! Cannot add a duplicate!";
      LOG.warn(message);
      throw RPCUtil.getRemoteException(message);
    }

    // Inform the ACLs Manager
    this.applicationACLsManager.addApplication(applicationId,
        submissionContext.getAMContainerSpec().getApplicationACLs());

    try {
      // Setup tokens for renewal
      if (UserGroupInformation.isSecurityEnabled()) {
        this.rmContext.getDelegationTokenRenewer().addApplication(
            applicationId,parseCredentials(submissionContext),
            submissionContext.getCancelTokensWhenComplete()
            );
      }
    } catch (IOException ie) {
      LOG.warn(
          "Unable to add the application to the delegation token renewer.",
          ie);
      // Sending APP_REJECTED is fine, since we assume that the
      // RMApp is in NEW state and thus we havne't yet informed the
      // Scheduler about the existence of the application
      this.rmContext.getDispatcher().getEventHandler().handle(
          new RMAppRejectedEvent(applicationId, ie.getMessage()));
      throw RPCUtil.getRemoteException(ie);
    }

    // All done, start the RMApp
    this.rmContext.getDispatcher().getEventHandler().handle(
        new RMAppEvent(applicationId, isRecovered ? RMAppEventType.RECOVER:
            RMAppEventType.START));
  }
 
  private Credentials parseCredentials(ApplicationSubmissionContext application)
      throws IOException {
    Credentials credentials = new Credentials();
    DataInputByteBuffer dibb = new DataInputByteBuffer();
    ByteBuffer tokens = application.getAMContainerSpec().getTokens();
    if (tokens != null) {
      dibb.reset(tokens);
      credentials.readTokenStorageStream(dibb);
      tokens.rewind();
    }
    return credentials;
  }
 
  @Override
  public void recover(RMState state) throws Exception {
    RMStateStore store = rmContext.getStateStore();
    assert store != null;
    // recover applications
    Map<ApplicationId, ApplicationState> appStates = state.getApplicationState();
    LOG.info("Recovering " + appStates.size() + " applications");
    for(ApplicationState appState : appStates.values()) {
      boolean shouldRecover = true;
      if(appState.getApplicationSubmissionContext().getUnmanagedAM()) {
        // do not recover unmanaged applications since current recovery
        // mechanism of restarting attempts does not work for them.
        // This will need to be changed in work preserving recovery in which
        // RM will re-connect with the running AM's instead of restarting them
        LOG.info("Not recovering unmanaged application " + appState.getAppId());
        shouldRecover = false;
      }
      int individualMaxAppAttempts = appState.getApplicationSubmissionContext()
          .getMaxAppAttempts();
      int maxAppAttempts;
      if (individualMaxAppAttempts <= 0 ||
          individualMaxAppAttempts > globalMaxAppAttempts) {
        maxAppAttempts = globalMaxAppAttempts;
        LOG.warn("The specific max attempts: " + individualMaxAppAttempts
            + " for application: " + appState.getAppId()
            + " is invalid, because it is out of the range [1, "
            + globalMaxAppAttempts + "]. Use the global max attempts instead.");
      } else {
        maxAppAttempts = individualMaxAppAttempts;
      }
      // In work-preserve restart, if attemptCount == maxAttempts, the job still
      // needs to be recovered because the last attempt may still be running.
      if(appState.getAttemptCount() >= maxAppAttempts) {
        LOG.info("Not recovering application " + appState.getAppId() +
            " due to recovering attempt is beyond maxAppAttempt limit");
        shouldRecover = false;
      }

      // re-submit the application
      // this is going to send an app start event but since the async dispatcher
      // has not started that event will be queued until we have completed re
      // populating the state
      if(shouldRecover) {
        LOG.info("Recovering application " + appState.getAppId());
        submitApplication(appState.getApplicationSubmissionContext(),
                        appState.getSubmitTime(), true, appState.getUser());
        // re-populate attempt information in application
        RMAppImpl appImpl = (RMAppImpl) rmContext.getRMApps().get(
                                                        appState.getAppId());
        appImpl.recover(state);
      }
      else {
        store.removeApplication(appState);
      }
    }
  }

  @Override
  public void handle(RMAppManagerEvent event) {
    ApplicationId applicationId = event.getApplicationId();
    LOG.debug("RMAppManager processing event for "
        + applicationId + " of type " + event.getType());
    switch(event.getType()) {
      case APP_COMPLETED:
      {
        finishApplication(applicationId);
        ApplicationSummary.logAppSummary(
            rmContext.getRMApps().get(applicationId));
        checkAppNumCompletedLimit();
      }
      break;
      default:
        LOG.error("Invalid eventtype " + event.getType() + ". Ignoring!");
      }
  }
}
TOP

Related Classes of org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary$SummaryBuilder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.