Package org.apache.twill.internal.appmaster

Source Code of org.apache.twill.internal.appmaster.ApplicationMasterService$ServiceDelegate

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.twill.internal.appmaster;

import com.google.common.base.Charsets;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.base.Supplier;
import com.google.common.base.Throwables;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableMultimap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Multiset;
import com.google.common.collect.Sets;
import com.google.common.io.CharStreams;
import com.google.common.io.Files;
import com.google.common.io.InputSupplier;
import com.google.common.reflect.TypeToken;
import com.google.common.util.concurrent.AbstractExecutionThreadService;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.Service;
import com.google.common.util.concurrent.SettableFuture;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonElement;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.util.Records;
import org.apache.twill.api.Command;
import org.apache.twill.api.EventHandler;
import org.apache.twill.api.EventHandlerSpecification;
import org.apache.twill.api.LocalFile;
import org.apache.twill.api.ResourceSpecification;
import org.apache.twill.api.RunId;
import org.apache.twill.api.RuntimeSpecification;
import org.apache.twill.api.TwillRunResources;
import org.apache.twill.api.TwillSpecification;
import org.apache.twill.common.Threads;
import org.apache.twill.filesystem.Location;
import org.apache.twill.internal.AbstractTwillService;
import org.apache.twill.internal.Configs;
import org.apache.twill.internal.Constants;
import org.apache.twill.internal.DefaultTwillRunResources;
import org.apache.twill.internal.EnvKeys;
import org.apache.twill.internal.ProcessLauncher;
import org.apache.twill.internal.TwillContainerLauncher;
import org.apache.twill.internal.ZKServiceDecorator;
import org.apache.twill.internal.json.LocalFileCodec;
import org.apache.twill.internal.json.TwillSpecificationAdapter;
import org.apache.twill.internal.kafka.EmbeddedKafkaServer;
import org.apache.twill.internal.logging.Loggings;
import org.apache.twill.internal.state.Message;
import org.apache.twill.internal.state.MessageCallback;
import org.apache.twill.internal.utils.Instances;
import org.apache.twill.internal.utils.Networks;
import org.apache.twill.internal.yarn.YarnAMClient;
import org.apache.twill.internal.yarn.YarnAMClientFactory;
import org.apache.twill.internal.yarn.YarnContainerInfo;
import org.apache.twill.internal.yarn.YarnContainerStatus;
import org.apache.twill.internal.yarn.YarnUtils;
import org.apache.twill.zookeeper.ZKClient;
import org.apache.twill.zookeeper.ZKClients;
import org.apache.zookeeper.CreateMode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.net.URI;
import java.net.URL;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;

/**
*
*/
public final class ApplicationMasterService extends AbstractTwillService {

  private static final Logger LOG = LoggerFactory.getLogger(ApplicationMasterService.class);

  // Copied from org.apache.hadoop.yarn.security.AMRMTokenIdentifier.KIND_NAME since it's missing in Hadoop-2.0
  private static final Text AMRM_TOKEN_KIND_NAME = new Text("YARN_AM_RM_TOKEN");

  private final RunId runId;
  private final ZKClient zkClient;
  private final TwillSpecification twillSpec;
  private final ApplicationMasterLiveNodeData amLiveNode;
  private final ZKServiceDecorator serviceDelegate;
  private final RunningContainers runningContainers;
  private final ExpectedContainers expectedContainers;
  private final TrackerService trackerService;
  private final YarnAMClient amClient;
  private final String jvmOpts;
  private final int reservedMemory;
  private final EventHandler eventHandler;
  private final Location applicationLocation;

  private EmbeddedKafkaServer kafkaServer;
  private Queue<RunnableContainerRequest> runnableContainerRequests;
  private ExecutorService instanceChangeExecutor;

  public ApplicationMasterService(RunId runId, ZKClient zkClient, File twillSpecFile,
                                  YarnAMClientFactory amClientFactory, Location applicationLocation) throws Exception {
    super(applicationLocation);

    this.runId = runId;
    this.twillSpec = TwillSpecificationAdapter.create().fromJson(twillSpecFile);
    this.zkClient = zkClient;
    this.applicationLocation = applicationLocation;
    this.amClient = amClientFactory.create();
    this.credentials = createCredentials();
    this.jvmOpts = loadJvmOptions();
    this.reservedMemory = getReservedMemory();

    amLiveNode = new ApplicationMasterLiveNodeData(Integer.parseInt(System.getenv(EnvKeys.YARN_APP_ID)),
                                                   Long.parseLong(System.getenv(EnvKeys.YARN_APP_ID_CLUSTER_TIME)),
                                                   amClient.getContainerId().toString());

    serviceDelegate = new ZKServiceDecorator(zkClient, runId, createLiveNodeDataSupplier(),
                                             new ServiceDelegate(), new Runnable() {
      @Override
      public void run() {
        amClient.stopAndWait();
      }
    });
    expectedContainers = initExpectedContainers(twillSpec);
    runningContainers = initRunningContainers(amClient.getContainerId(), amClient.getHost());
    trackerService = new TrackerService(runningContainers.getResourceReport(), amClient.getHost());
    eventHandler = createEventHandler(twillSpec);
  }

  private String loadJvmOptions() throws IOException {
    final File jvmOptsFile = new File(Constants.Files.JVM_OPTIONS);
    if (!jvmOptsFile.exists()) {
      return "";
    }

    return CharStreams.toString(new InputSupplier<Reader>() {
      @Override
      public Reader getInput() throws IOException {
        return new FileReader(jvmOptsFile);
      }
    });
  }

  private int getReservedMemory() {
    String value = System.getenv(EnvKeys.TWILL_RESERVED_MEMORY_MB);
    if (value == null) {
      return Configs.Defaults.JAVA_RESERVED_MEMORY_MB;
    }
    try {
      return Integer.parseInt(value);
    } catch (Exception e) {
      return Configs.Defaults.JAVA_RESERVED_MEMORY_MB;
    }
  }

  private EventHandler createEventHandler(TwillSpecification twillSpec) {
    try {
      // Should be able to load by this class ClassLoader, as they packaged in the same jar.
      EventHandlerSpecification handlerSpec = twillSpec.getEventHandler();

      Class<?> handlerClass = getClass().getClassLoader().loadClass(handlerSpec.getClassName());
      Preconditions.checkArgument(EventHandler.class.isAssignableFrom(handlerClass),
                                  "Class {} does not implements {}",
                                  handlerClass, EventHandler.class.getName());
      return Instances.newInstance((Class<? extends EventHandler>) handlerClass);
    } catch (Exception e) {
      throw Throwables.propagate(e);
    }
  }

  private Supplier<? extends JsonElement> createLiveNodeDataSupplier() {
    return new Supplier<JsonElement>() {
      @Override
      public JsonElement get() {
        return new Gson().toJsonTree(amLiveNode);
      }
    };
  }

  private RunningContainers initRunningContainers(ContainerId appMasterContainerId,
                                                  String appMasterHost) throws Exception {
    TwillRunResources appMasterResources = new DefaultTwillRunResources(
      0,
      appMasterContainerId.toString(),
      Integer.parseInt(System.getenv(EnvKeys.YARN_CONTAINER_VIRTUAL_CORES)),
      Integer.parseInt(System.getenv(EnvKeys.YARN_CONTAINER_MEMORY_MB)),
      appMasterHost);
    String appId = appMasterContainerId.getApplicationAttemptId().getApplicationId().toString();
    return new RunningContainers(appId, appMasterResources);
  }

  private ExpectedContainers initExpectedContainers(TwillSpecification twillSpec) {
    Map<String, Integer> expectedCounts = Maps.newHashMap();
    for (RuntimeSpecification runtimeSpec : twillSpec.getRunnables().values()) {
      expectedCounts.put(runtimeSpec.getName(), runtimeSpec.getResourceSpecification().getInstances());
    }
    return new ExpectedContainers(expectedCounts);
  }

  private void doStart() throws Exception {
    LOG.info("Start application master with spec: " + TwillSpecificationAdapter.create().toJson(twillSpec));

    // initialize the event handler, if it fails, it will fail the application.
    eventHandler.initialize(new BasicEventHandlerContext(twillSpec.getEventHandler()));

    instanceChangeExecutor = Executors.newSingleThreadExecutor(Threads.createDaemonThreadFactory("instanceChanger"));

    kafkaServer = new EmbeddedKafkaServer(generateKafkaConfig());

    // Must start tracker before start AMClient
    LOG.info("Starting application master tracker server");
    trackerService.startAndWait();
    URL trackerUrl = trackerService.getUrl();
    LOG.info("Started application master tracker server on " + trackerUrl);

    amClient.setTracker(trackerService.getBindAddress(), trackerUrl);
    amClient.startAndWait();

    // Creates ZK path for runnable and kafka logging service
    Futures.allAsList(ImmutableList.of(
      zkClient.create("/" + runId.getId() + "/runnables", null, CreateMode.PERSISTENT),
      zkClient.create("/" + runId.getId() + "/kafka", null, CreateMode.PERSISTENT))
    ).get();

    // Starts kafka server
    LOG.info("Starting kafka server");

    kafkaServer.startAndWait();
    LOG.info("Kafka server started");

    runnableContainerRequests = initContainerRequests();
  }

  private void doStop() throws Exception {
    Thread.interrupted();     // This is just to clear the interrupt flag

    LOG.info("Stop application master with spec: {}", TwillSpecificationAdapter.create().toJson(twillSpec));

    try {
      // call event handler destroy. If there is error, only log and not affected stop sequence.
      eventHandler.destroy();
    } catch (Throwable t) {
      LOG.warn("Exception when calling {}.destroy()", twillSpec.getEventHandler().getClassName(), t);
    }

    instanceChangeExecutor.shutdownNow();

    // For checking if all containers are stopped.
    final Set<String> ids = Sets.newHashSet(runningContainers.getContainerIds());
    YarnAMClient.AllocateHandler handler = new YarnAMClient.AllocateHandler() {
      @Override
      public void acquired(List<ProcessLauncher<YarnContainerInfo>> launchers) {
        // no-op
      }

      @Override
      public void completed(List<YarnContainerStatus> completed) {
        for (YarnContainerStatus status : completed) {
          ids.remove(status.getContainerId());
        }
      }
    };

    runningContainers.stopAll();

    // Poll for 5 seconds to wait for containers to stop.
    int count = 0;
    while (!ids.isEmpty() && count++ < 5) {
      amClient.allocate(0.0f, handler);
      TimeUnit.SECONDS.sleep(1);
    }

    LOG.info("Stopping application master tracker server");
    try {
      trackerService.stopAndWait();
      LOG.info("Stopped application master tracker server");
    } catch (Exception e) {
      LOG.error("Failed to stop tracker service.", e);
    } finally {
      try {
        // App location cleanup
        cleanupDir(URI.create(System.getenv(EnvKeys.TWILL_APP_DIR)));
        Loggings.forceFlush();
        // Sleep a short while to let kafka clients to have chance to fetch the log
        TimeUnit.SECONDS.sleep(1);
      } finally {
        kafkaServer.stopAndWait();
        LOG.info("Kafka server stopped");
      }
    }
  }

  private void cleanupDir(URI appDir) {
    try {
      if (applicationLocation.delete(true)) {
        LOG.info("Application directory deleted: {}", appDir);
      } else {
        LOG.warn("Failed to cleanup directory {}.", appDir);
      }
    } catch (Exception e) {
      LOG.warn("Exception while cleanup directory {}.", appDir, e);
    }
  }


  private void doRun() throws Exception {
    // The main loop
    Map.Entry<Resource, ? extends Collection<RuntimeSpecification>> currentRequest = null;
    final Queue<ProvisionRequest> provisioning = Lists.newLinkedList();

    YarnAMClient.AllocateHandler allocateHandler = new YarnAMClient.AllocateHandler() {
      @Override
      public void acquired(List<ProcessLauncher<YarnContainerInfo>> launchers) {
        launchRunnable(launchers, provisioning);
      }

      @Override
      public void completed(List<YarnContainerStatus> completed) {
        handleCompleted(completed);
      }
    };

    long nextTimeoutCheck = System.currentTimeMillis() + Constants.PROVISION_TIMEOUT;
    while (isRunning()) {
      // Call allocate. It has to be made at first in order to be able to get cluster resource availability.
      amClient.allocate(0.0f, allocateHandler);

      // Looks for containers requests.
      if (provisioning.isEmpty() && runnableContainerRequests.isEmpty() && runningContainers.isEmpty()) {
        LOG.info("All containers completed. Shutting down application master.");
        break;
      }

      // If nothing is in provisioning, and no pending request, move to next one
      while (provisioning.isEmpty() && currentRequest == null && !runnableContainerRequests.isEmpty()) {
        currentRequest = runnableContainerRequests.peek().takeRequest();
        if (currentRequest == null) {
          // All different types of resource request from current order is done, move to next one
          // TODO: Need to handle order type as well
          runnableContainerRequests.poll();
        }
      }
      // Nothing in provision, makes the next batch of provision request
      if (provisioning.isEmpty() && currentRequest != null) {
        addContainerRequests(currentRequest.getKey(), currentRequest.getValue(), provisioning);
        currentRequest = null;
      }

      nextTimeoutCheck = checkProvisionTimeout(nextTimeoutCheck);

      if (isRunning()) {
        TimeUnit.SECONDS.sleep(1);
      }
    }
  }

  /**
   * Handling containers that are completed.
   */
  private void handleCompleted(List<YarnContainerStatus> completedContainersStatuses) {
    Multiset<String> restartRunnables = HashMultiset.create();
    for (YarnContainerStatus status : completedContainersStatuses) {
      LOG.info("Container {} completed with {}:{}.",
               status.getContainerId(), status.getState(), status.getDiagnostics());
      runningContainers.handleCompleted(status, restartRunnables);
    }

    for (Multiset.Entry<String> entry : restartRunnables.entrySet()) {
      LOG.info("Re-request container for {} with {} instances.", entry.getElement(), entry.getCount());
      for (int i = 0; i < entry.getCount(); i++) {
        runnableContainerRequests.add(createRunnableContainerRequest(entry.getElement()));
      }
    }

    // For all runnables that needs to re-request for containers, update the expected count timestamp
    // so that the EventHandler would triggered with the right expiration timestamp.
    expectedContainers.updateRequestTime(restartRunnables.elementSet());
  }

  /**
   * Check for containers provision timeout and invoke eventHandler if necessary.
   *
   * @return the timestamp for the next time this method needs to be called.
   */
  private long checkProvisionTimeout(long nextTimeoutCheck) {
    if (System.currentTimeMillis() < nextTimeoutCheck) {
      return nextTimeoutCheck;
    }

    // Invoke event handler for provision request timeout
    Map<String, ExpectedContainers.ExpectedCount> expiredRequests = expectedContainers.getAll();
    Map<String, Integer> runningCounts = runningContainers.countAll();

    List<EventHandler.TimeoutEvent> timeoutEvents = Lists.newArrayList();
    for (Map.Entry<String, ExpectedContainers.ExpectedCount> entry : expiredRequests.entrySet()) {
      String runnableName = entry.getKey();
      ExpectedContainers.ExpectedCount expectedCount = entry.getValue();
      int runningCount = runningCounts.containsKey(runnableName) ? runningCounts.get(runnableName) : 0;
      if (expectedCount.getCount() != runningCount) {
        timeoutEvents.add(new EventHandler.TimeoutEvent(runnableName, expectedCount.getCount(),
                                                                   runningCount, expectedCount.getTimestamp()));
      }
    }

    if (!timeoutEvents.isEmpty()) {
      try {
        EventHandler.TimeoutAction action = eventHandler.launchTimeout(timeoutEvents);
        if (action.getTimeout() < 0) {
          // Abort application
          stop();
        } else {
          return nextTimeoutCheck + action.getTimeout();
        }
      } catch (Throwable t) {
        LOG.warn("Exception when calling EventHandler {}. Ignore the result.", t);
      }
    }
    return nextTimeoutCheck + Constants.PROVISION_TIMEOUT;
  }

  private Credentials createCredentials() {
    Credentials credentials = new Credentials();
    if (!UserGroupInformation.isSecurityEnabled()) {
      return credentials;
    }

    try {
      credentials.addAll(UserGroupInformation.getCurrentUser().getCredentials());

      // Remove the AM->RM tokens
      Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
      while (iter.hasNext()) {
        Token<?> token = iter.next();
        if (token.getKind().equals(AMRM_TOKEN_KIND_NAME)) {
          iter.remove();
        }
      }
    } catch (IOException e) {
      LOG.warn("Failed to get current user. No credentials will be provided to containers.", e);
    }

    return credentials;
  }

  private Queue<RunnableContainerRequest> initContainerRequests() {
    // Orderly stores container requests.
    Queue<RunnableContainerRequest> requests = Lists.newLinkedList();
    // For each order in the twillSpec, create container request for each runnable.
    for (TwillSpecification.Order order : twillSpec.getOrders()) {
      // Group container requests based on resource requirement.
      ImmutableMultimap.Builder<Resource, RuntimeSpecification> builder = ImmutableMultimap.builder();
      for (String runnableName : order.getNames()) {
        RuntimeSpecification runtimeSpec = twillSpec.getRunnables().get(runnableName);
        Resource capability = createCapability(runtimeSpec.getResourceSpecification());
        builder.put(capability, runtimeSpec);
      }
      requests.add(new RunnableContainerRequest(order.getType(), builder.build()));
    }
    return requests;
  }

  /**
   * Adds container requests with the given resource capability for each runtime.
   */
  private void addContainerRequests(Resource capability,
                                    Collection<RuntimeSpecification> runtimeSpecs,
                                    Queue<ProvisionRequest> provisioning) {
    for (RuntimeSpecification runtimeSpec : runtimeSpecs) {
      String name = runtimeSpec.getName();
      int newContainers = expectedContainers.getExpected(name) - runningContainers.count(name);
      if (newContainers > 0) {
        // TODO: Allow user to set priority?
        LOG.info("Request {} container with capability {}", newContainers, capability);
        String requestId = amClient.addContainerRequest(capability, newContainers).setPriority(0).apply();
        provisioning.add(new ProvisionRequest(runtimeSpec, requestId, newContainers));
      }
    }
  }

  /**
   * Launches runnables in the provisioned containers.
   */
  private void launchRunnable(List<ProcessLauncher<YarnContainerInfo>> launchers,
                              Queue<ProvisionRequest> provisioning) {
    for (ProcessLauncher<YarnContainerInfo> processLauncher : launchers) {
      LOG.info("Got container {}", processLauncher.getContainerInfo().getId());
      ProvisionRequest provisionRequest = provisioning.peek();
      if (provisionRequest == null) {
        continue;
      }

      String runnableName = provisionRequest.getRuntimeSpec().getName();
      LOG.info("Starting runnable {} with {}", runnableName, processLauncher);

      int containerCount = expectedContainers.getExpected(runnableName);

      ProcessLauncher.PrepareLaunchContext launchContext = processLauncher.prepareLaunch(
        ImmutableMap.<String, String>builder()
          .put(EnvKeys.TWILL_APP_DIR, System.getenv(EnvKeys.TWILL_APP_DIR))
          .put(EnvKeys.TWILL_FS_USER, System.getenv(EnvKeys.TWILL_FS_USER))
          .put(EnvKeys.TWILL_APP_RUN_ID, runId.getId())
          .put(EnvKeys.TWILL_APP_NAME, twillSpec.getName())
          .put(EnvKeys.TWILL_ZK_CONNECT, zkClient.getConnectString())
          .put(EnvKeys.TWILL_LOG_KAFKA_ZK, getKafkaZKConnect())
          .build()
        , getLocalizeFiles(), credentials
      );

      TwillContainerLauncher launcher = new TwillContainerLauncher(
        twillSpec.getRunnables().get(runnableName), launchContext,
        ZKClients.namespace(zkClient, getZKNamespace(runnableName)),
        containerCount, jvmOpts, reservedMemory, getSecureStoreLocation());

      runningContainers.start(runnableName, processLauncher.getContainerInfo(), launcher);

      // Need to call complete to workaround bug in YARN AMRMClient
      if (provisionRequest.containerAcquired()) {
        amClient.completeContainerRequest(provisionRequest.getRequestId());
      }

      if (expectedContainers.getExpected(runnableName) == runningContainers.count(runnableName)) {
        LOG.info("Runnable " + runnableName + " fully provisioned with " + containerCount + " instances.");
        provisioning.poll();
      }
    }
  }

  private List<LocalFile> getLocalizeFiles() {
    try {
      Reader reader = Files.newReader(new File(Constants.Files.LOCALIZE_FILES), Charsets.UTF_8);
      try {
        return new GsonBuilder().registerTypeAdapter(LocalFile.class, new LocalFileCodec())
                                .create().fromJson(reader, new TypeToken<List<LocalFile>>() { }.getType());
      } finally {
        reader.close();
      }
    } catch (IOException e) {
      throw Throwables.propagate(e);
    }
  }

  private String getZKNamespace(String runnableName) {
    return String.format("/%s/runnables/%s", runId.getId(), runnableName);
  }

  private String getKafkaZKConnect() {
    return String.format("%s/%s/kafka", zkClient.getConnectString(), runId.getId());
  }

  private Properties generateKafkaConfig() {
    int port = Networks.getRandomPort();
    Preconditions.checkState(port > 0, "Failed to get random port.");

    Properties prop = new Properties();
    prop.setProperty("log.dir", new File("kafka-logs").getAbsolutePath());
    prop.setProperty("port", Integer.toString(port));
    prop.setProperty("broker.id", "1");
    prop.setProperty("socket.send.buffer.bytes", "1048576");
    prop.setProperty("socket.receive.buffer.bytes", "1048576");
    prop.setProperty("socket.request.max.bytes", "104857600");
    prop.setProperty("num.partitions", "1");
    prop.setProperty("log.retention.hours", "24");
    prop.setProperty("log.flush.interval.messages", "10000");
    prop.setProperty("log.flush.interval.ms", "1000");
    prop.setProperty("log.segment.bytes", "536870912");
    prop.setProperty("zookeeper.connect", getKafkaZKConnect());
    prop.setProperty("zookeeper.connection.timeout.ms", "1000000");
    prop.setProperty("default.replication.factor", "1");
    return prop;
  }

  private ListenableFuture<String> processMessage(final String messageId, Message message) {
    LOG.debug("Message received: {} {}.", messageId, message);

    SettableFuture<String> result = SettableFuture.create();
    Runnable completion = getMessageCompletion(messageId, result);

    if (handleSecureStoreUpdate(message)) {
      runningContainers.sendToAll(message, completion);
      return result;
    }

    if (handleSetInstances(message, completion)) {
      return result;
    }

    // Replicate messages to all runnables
    if (message.getScope() == Message.Scope.ALL_RUNNABLE) {
      runningContainers.sendToAll(message, completion);
      return result;
    }

    // Replicate message to a particular runnable.
    if (message.getScope() == Message.Scope.RUNNABLE) {
      runningContainers.sendToRunnable(message.getRunnableName(), message, completion);
      return result;
    }

    LOG.info("Message ignored. {}", message);
    return Futures.immediateFuture(messageId);
  }

  /**
   * Attempts to change the number of running instances.
   * @return {@code true} if the message does requests for changes in number of running instances of a runnable,
   *         {@code false} otherwise.
   */
  private boolean handleSetInstances(final Message message, final Runnable completion) {
    if (message.getType() != Message.Type.SYSTEM || message.getScope() != Message.Scope.RUNNABLE) {
      return false;
    }

    Command command = message.getCommand();
    Map<String, String> options = command.getOptions();
    if (!"instances".equals(command.getCommand()) || !options.containsKey("count")) {
      return false;
    }

    final String runnableName = message.getRunnableName();
    if (runnableName == null || runnableName.isEmpty() || !twillSpec.getRunnables().containsKey(runnableName)) {
      LOG.info("Unknown runnable {}", runnableName);
      return false;
    }

    final int newCount = Integer.parseInt(options.get("count"));
    final int oldCount = expectedContainers.getExpected(runnableName);

    LOG.info("Received change instances request for {}, from {} to {}.", runnableName, oldCount, newCount);

    if (newCount == oldCount) {   // Nothing to do, simply complete the request.
      completion.run();
      return true;
    }

    instanceChangeExecutor.execute(createSetInstanceRunnable(message, completion, oldCount, newCount));
    return true;
  }

  /**
   * Creates a Runnable for execution of change instance request.
   */
  private Runnable createSetInstanceRunnable(final Message message, final Runnable completion,
                                             final int oldCount, final int newCount) {
    return new Runnable() {
      @Override
      public void run() {
        final String runnableName = message.getRunnableName();

        LOG.info("Processing change instance request for {}, from {} to {}.", runnableName, oldCount, newCount);
        try {
          // Wait until running container count is the same as old count
          runningContainers.waitForCount(runnableName, oldCount);
          LOG.info("Confirmed {} containers running for {}.", oldCount, runnableName);

          expectedContainers.setExpected(runnableName, newCount);

          try {
            if (newCount < oldCount) {
              // Shutdown some running containers
              for (int i = 0; i < oldCount - newCount; i++) {
                runningContainers.removeLast(runnableName);
              }
            } else {
              // Increase the number of instances
              runnableContainerRequests.add(createRunnableContainerRequest(runnableName));
            }
          } finally {
            runningContainers.sendToRunnable(runnableName, message, completion);
            LOG.info("Change instances request completed. From {} to {}.", oldCount, newCount);
          }
        } catch (InterruptedException e) {
          // If the wait is being interrupted, discard the message.
          completion.run();
        }
      }
    };
  }

  private RunnableContainerRequest createRunnableContainerRequest(final String runnableName) {
    // Find the current order of the given runnable in order to create a RunnableContainerRequest.
    TwillSpecification.Order order = Iterables.find(twillSpec.getOrders(), new Predicate<TwillSpecification.Order>() {
      @Override
      public boolean apply(TwillSpecification.Order input) {
        return (input.getNames().contains(runnableName));
      }
    });

    RuntimeSpecification runtimeSpec = twillSpec.getRunnables().get(runnableName);
    Resource capability = createCapability(runtimeSpec.getResourceSpecification());
    return new RunnableContainerRequest(order.getType(), ImmutableMultimap.of(capability, runtimeSpec));
  }

  private Runnable getMessageCompletion(final String messageId, final SettableFuture<String> future) {
    return new Runnable() {
      @Override
      public void run() {
        future.set(messageId);
      }
    };
  }

  private Resource createCapability(ResourceSpecification resourceSpec) {
    Resource capability = Records.newRecord(Resource.class);

    if (!YarnUtils.setVirtualCores(capability, resourceSpec.getVirtualCores())) {
      LOG.debug("Virtual cores limit not supported.");
    }

    capability.setMemory(resourceSpec.getMemorySize());
    return capability;
  }

  @Override
  protected Service getServiceDelegate() {
    return serviceDelegate;
  }

  /**
   * A private class for service lifecycle. It's done this way so that we can have {@link ZKServiceDecorator} to
   * wrap around this to reflect status in ZK.
   */
  private final class ServiceDelegate extends AbstractExecutionThreadService implements MessageCallback {

    private volatile Thread runThread;

    @Override
    protected void run() throws Exception {
      runThread = Thread.currentThread();
      try {
        doRun();
      } catch (InterruptedException e) {
        // It's ok to get interrupted exception, as it's a signal to stop
        Thread.currentThread().interrupt();
      }
    }

    @Override
    protected void startUp() throws Exception {
      doStart();
    }

    @Override
    protected void shutDown() throws Exception {
      doStop();
    }

    @Override
    protected void triggerShutdown() {
      Thread runThread = this.runThread;
      if (runThread != null) {
        runThread.interrupt();
      }
    }

    @Override
    public ListenableFuture<String> onReceived(String messageId, Message message) {
      return processMessage(messageId, message);
    }
  }
}
TOP

Related Classes of org.apache.twill.internal.appmaster.ApplicationMasterService$ServiceDelegate

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.