Package org.apache.aurora.scheduler.mesos

Source Code of org.apache.aurora.scheduler.mesos.MesosSchedulerImpl

/**
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.aurora.scheduler.mesos;

import java.lang.annotation.Retention;
import java.lang.annotation.Target;
import java.util.List;
import java.util.concurrent.Executor;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Level;
import java.util.logging.Logger;

import javax.inject.Inject;
import javax.inject.Qualifier;

import com.google.common.base.Preconditions;
import com.twitter.common.application.Lifecycle;
import com.twitter.common.inject.TimedInterceptor.Timed;
import com.twitter.common.stats.Stats;

import org.apache.aurora.GuiceUtils.AllowUnchecked;
import org.apache.aurora.scheduler.HostOffer;
import org.apache.aurora.scheduler.TaskLauncher;
import org.apache.aurora.scheduler.base.SchedulerException;
import org.apache.aurora.scheduler.events.EventSink;
import org.apache.aurora.scheduler.events.PubsubEvent.DriverDisconnected;
import org.apache.aurora.scheduler.events.PubsubEvent.DriverRegistered;
import org.apache.aurora.scheduler.storage.AttributeStore;
import org.apache.aurora.scheduler.storage.Storage;
import org.apache.aurora.scheduler.storage.Storage.MutableStoreProvider;
import org.apache.aurora.scheduler.storage.Storage.MutateWork;
import org.apache.aurora.scheduler.storage.entities.IHostAttributes;
import org.apache.mesos.Protos.ExecutorID;
import org.apache.mesos.Protos.FrameworkID;
import org.apache.mesos.Protos.MasterInfo;
import org.apache.mesos.Protos.OfferID;
import org.apache.mesos.Protos.SlaveID;
import org.apache.mesos.Protos.TaskStatus;
import org.apache.mesos.Scheduler;
import org.apache.mesos.SchedulerDriver;

import static java.lang.annotation.ElementType.FIELD;
import static java.lang.annotation.ElementType.METHOD;
import static java.lang.annotation.ElementType.PARAMETER;
import static java.lang.annotation.RetentionPolicy.RUNTIME;
import static java.util.Objects.requireNonNull;

import static org.apache.mesos.Protos.Offer;

/**
* Location for communication with mesos.
*/
class MesosSchedulerImpl implements Scheduler {
  private static final Logger LOG = Logger.getLogger(MesosSchedulerImpl.class.getName());

  private final AtomicLong totalResourceOffers = Stats.exportLong("scheduler_resource_offers");
  private final AtomicLong totalFailedStatusUpdates = Stats.exportLong("scheduler_status_updates");
  private final AtomicLong totalFrameworkDisconnects =
      Stats.exportLong("scheduler_framework_disconnects");
  private final AtomicLong totalFrameworkReregisters =
      Stats.exportLong("scheduler_framework_reregisters");
  private final AtomicLong totalLostExecutors = Stats.exportLong("scheduler_lost_executors");

  private final List<TaskLauncher> taskLaunchers;

  private final Storage storage;
  private final Lifecycle lifecycle;
  private final EventSink eventSink;
  private final Executor executor;
  private volatile boolean isRegistered = false;

  /**
   * Binding annotation for the executor the incoming Mesos message handler uses.
   */
  @Qualifier
  @Target({ FIELD, PARAMETER, METHOD }) @Retention(RUNTIME)
  @interface SchedulerExecutor { }

  /**
   * Creates a new scheduler.
   *
   * @param storage Store to save host attributes into.
   * @param lifecycle Application lifecycle manager.
   * @param taskLaunchers Task launchers, which will be used in order.  Calls to
   *                      {@link TaskLauncher#willUse(HostOffer)} and
   *                      {@link TaskLauncher#statusUpdate(TaskStatus)} are propagated to provided
   *                      launchers, ceasing after the first match (based on a return value of
   *                      {@code true}.
   * @param eventSink Pubsub sink to send driver status changes to.
   * @param executor Executor for async work
   */
  @Inject
  public MesosSchedulerImpl(
      Storage storage,
      final Lifecycle lifecycle,
      List<TaskLauncher> taskLaunchers,
      EventSink eventSink,
      @SchedulerExecutor Executor executor) {

    this.storage = requireNonNull(storage);
    this.lifecycle = requireNonNull(lifecycle);
    this.taskLaunchers = requireNonNull(taskLaunchers);
    this.eventSink = requireNonNull(eventSink);
    this.executor = requireNonNull(executor);
  }

  @Override
  public void slaveLost(SchedulerDriver schedulerDriver, SlaveID slaveId) {
    LOG.info("Received notification of lost slave: " + slaveId);
  }

  @Override
  public void registered(
      SchedulerDriver driver,
      final FrameworkID frameworkId,
      MasterInfo masterInfo) {

    LOG.info("Registered with ID " + frameworkId + ", master: " + masterInfo);

    storage.write(new MutateWork.NoResult.Quiet() {
      @Override
      protected void execute(MutableStoreProvider storeProvider) {
        storeProvider.getSchedulerStore().saveFrameworkId(frameworkId.getValue());
      }
    });
    isRegistered = true;
    eventSink.post(new DriverRegistered());
  }

  @Override
  public void disconnected(SchedulerDriver schedulerDriver) {
    LOG.warning("Framework disconnected.");
    totalFrameworkDisconnects.incrementAndGet();
    eventSink.post(new DriverDisconnected());
  }

  @Override
  public void reregistered(SchedulerDriver schedulerDriver, MasterInfo masterInfo) {
    LOG.info("Framework re-registered with master " + masterInfo);
    totalFrameworkReregisters.incrementAndGet();
  }

  @Timed("scheduler_resource_offers")
  @Override
  public void resourceOffers(SchedulerDriver driver, final List<Offer> offers) {
    Preconditions.checkState(isRegistered, "Must be registered before receiving offers.");

    executor.execute(new Runnable() {
      @Override
      public void run() {
        // TODO(wfarner): Reconsider the requirements here, augment the task scheduler to skip over
        //                offers when the host attributes cannot be found. (AURORA-137)
        storage.write(new MutateWork.NoResult.Quiet() {
          @Override
          protected void execute(MutableStoreProvider storeProvider) {
            for (Offer offer : offers) {
              IHostAttributes attributes =
                  AttributeStore.Util.mergeOffer(storeProvider.getAttributeStore(), offer);
              storeProvider.getAttributeStore().saveHostAttributes(attributes);
              if (LOG.isLoggable(Level.FINE)) {
                LOG.log(Level.FINE, String.format("Received offer: %s", offer));
              }
              totalResourceOffers.incrementAndGet();
              for (TaskLauncher launcher : taskLaunchers) {
                if (launcher.willUse(new HostOffer(offer, attributes))) {
                  break;
                }
              }
            }
          }
        });
      }
    });
  }

  @Override
  public void offerRescinded(SchedulerDriver schedulerDriver, OfferID offerId) {
    LOG.info("Offer rescinded: " + offerId);
    for (TaskLauncher launcher : taskLaunchers) {
      launcher.cancelOffer(offerId);
    }
  }

  @AllowUnchecked
  @Timed("scheduler_status_update")
  @Override
  public void statusUpdate(SchedulerDriver driver, TaskStatus status) {
    String info = status.hasData() ? status.getData().toStringUtf8() : null;
    String infoMsg = info == null ? "" : " with info " + info;
    String coreMsg = status.hasMessage() ? " with core message " + status.getMessage() : "";
    LOG.info("Received status update for task " + status.getTaskId().getValue()
        + " in state " + status.getState() + infoMsg + coreMsg);

    try {
      for (TaskLauncher launcher : taskLaunchers) {
        if (launcher.statusUpdate(status)) {
          return;
        }
      }
    } catch (SchedulerException e) {
      LOG.log(Level.SEVERE, "Status update failed due to scheduler exception: " + e, e);
      // We re-throw the exception here in an effort to NACK the status update and trigger an
      // abort of the driver.  Previously we directly issued driver.abort(), but the re-entrancy
      // guarantees of that are uncertain (and we believe it was not working).  However, this
      // was difficult to discern since logging is unreliable during JVM shutdown and we would not
      // see the above log message.
      throw e;
    }

    LOG.warning("Unhandled status update " + status);
    totalFailedStatusUpdates.incrementAndGet();
  }

  @Override
  public void error(SchedulerDriver driver, String message) {
    LOG.severe("Received error message: " + message);
    lifecycle.shutdown();
  }

  @Override
  public void executorLost(SchedulerDriver schedulerDriver, ExecutorID executorID, SlaveID slaveID,
      int status) {

    LOG.info("Lost executor " + executorID);
    totalLostExecutors.incrementAndGet();
  }

  @Timed("scheduler_framework_message")
  @Override
  public void frameworkMessage(
      SchedulerDriver driver,
      ExecutorID executorID,
      SlaveID slave,
      byte[] data) {

    LOG.warning("Ignoring framework message.");
  }
}
TOP

Related Classes of org.apache.aurora.scheduler.mesos.MesosSchedulerImpl

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.