Package org.apache.aurora.scheduler.async

Source Code of org.apache.aurora.scheduler.async.AsyncModule$RegisterGauges

/**
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.aurora.scheduler.async;

import java.lang.annotation.Retention;
import java.lang.annotation.Target;
import java.util.concurrent.Executor;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.logging.Logger;

import javax.inject.Inject;
import javax.inject.Qualifier;
import javax.inject.Singleton;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Optional;
import com.google.common.base.Supplier;
import com.google.common.util.concurrent.RateLimiter;
import com.google.inject.AbstractModule;
import com.google.inject.Binder;
import com.google.inject.Key;
import com.google.inject.PrivateModule;
import com.google.inject.TypeLiteral;
import com.twitter.common.application.modules.LifecycleModule;
import com.twitter.common.args.Arg;
import com.twitter.common.args.CmdLine;
import com.twitter.common.args.constraints.NotNegative;
import com.twitter.common.args.constraints.Positive;
import com.twitter.common.base.Command;
import com.twitter.common.quantity.Amount;
import com.twitter.common.quantity.Time;
import com.twitter.common.stats.StatsProvider;
import com.twitter.common.util.BackoffStrategy;
import com.twitter.common.util.Random;
import com.twitter.common.util.TruncatedBinaryBackoff;

import org.apache.aurora.scheduler.SchedulerModule;
import org.apache.aurora.scheduler.async.GcExecutorLauncher.GcExecutorSettings;
import org.apache.aurora.scheduler.async.GcExecutorLauncher.RandomGcExecutorSettings;
import org.apache.aurora.scheduler.async.OfferQueue.OfferQueueImpl;
import org.apache.aurora.scheduler.async.OfferQueue.OfferReturnDelay;
import org.apache.aurora.scheduler.async.RescheduleCalculator.RescheduleCalculatorImpl;
import org.apache.aurora.scheduler.async.TaskGroups.TaskGroupsSettings;
import org.apache.aurora.scheduler.async.TaskHistoryPruner.HistoryPrunnerSettings;
import org.apache.aurora.scheduler.async.TaskScheduler.TaskSchedulerImpl;
import org.apache.aurora.scheduler.base.AsyncUtil;
import org.apache.aurora.scheduler.events.PubsubEventModule;
import org.apache.aurora.scheduler.filter.AttributeAggregate;

import static java.lang.annotation.ElementType.FIELD;
import static java.lang.annotation.ElementType.METHOD;
import static java.lang.annotation.ElementType.PARAMETER;
import static java.lang.annotation.RetentionPolicy.RUNTIME;
import static java.util.Objects.requireNonNull;

import static org.apache.aurora.scheduler.async.Preemptor.PreemptorImpl;
import static org.apache.aurora.scheduler.async.Preemptor.PreemptorImpl.PreemptionDelay;
import static org.apache.aurora.scheduler.async.TaskScheduler.TaskSchedulerImpl.ReservationDuration;

/**
* Binding module for async task management.
*/
public class AsyncModule extends AbstractModule {

  private static final Logger LOG = Logger.getLogger(AsyncModule.class.getName());

  @CmdLine(name = "async_worker_threads",
      help = "The number of worker threads to process async task operations with.")
  private static final Arg<Integer> ASYNC_WORKER_THREADS = Arg.create(1);

  @CmdLine(name = "transient_task_state_timeout",
      help = "The amount of time after which to treat a task stuck in a transient state as LOST.")
  private static final Arg<Amount<Long, Time>> TRANSIENT_TASK_STATE_TIMEOUT =
      Arg.create(Amount.of(5L, Time.MINUTES));

  @Positive
  @CmdLine(name = "first_schedule_delay",
      help = "Initial amount of time to wait before first attempting to schedule a PENDING task.")
  private static final Arg<Amount<Long, Time>> FIRST_SCHEDULE_DELAY =
      Arg.create(Amount.of(1L, Time.MILLISECONDS));

  @Positive
  @CmdLine(name = "initial_schedule_penalty",
      help = "Initial amount of time to wait before attempting to schedule a task that has failed"
          + " to schedule.")
  private static final Arg<Amount<Long, Time>> INITIAL_SCHEDULE_PENALTY =
      Arg.create(Amount.of(1L, Time.SECONDS));

  @CmdLine(name = "max_schedule_penalty",
      help = "Maximum delay between attempts to schedule a PENDING tasks.")
  private static final Arg<Amount<Long, Time>> MAX_SCHEDULE_PENALTY =
      Arg.create(Amount.of(1L, Time.MINUTES));

  @CmdLine(name = "min_offer_hold_time",
      help = "Minimum amount of time to hold a resource offer before declining.")
  @NotNegative
  private static final Arg<Amount<Integer, Time>> MIN_OFFER_HOLD_TIME =
      Arg.create(Amount.of(5, Time.MINUTES));

  @CmdLine(name = "offer_hold_jitter_window",
      help = "Maximum amount of random jitter to add to the offer hold time window.")
  @NotNegative
  private static final Arg<Amount<Integer, Time>> OFFER_HOLD_JITTER_WINDOW =
      Arg.create(Amount.of(1, Time.MINUTES));

  @CmdLine(name = "history_prune_threshold",
      help = "Time after which the scheduler will prune terminated task history.")
  private static final Arg<Amount<Long, Time>> HISTORY_PRUNE_THRESHOLD =
      Arg.create(Amount.of(2L, Time.DAYS));

  @CmdLine(name = "history_max_per_job_threshold",
      help = "Maximum number of terminated tasks to retain in a job history.")
  private static final Arg<Integer> HISTORY_MAX_PER_JOB_THRESHOLD = Arg.create(100);

  @CmdLine(name = "history_min_retention_threshold",
      help = "Minimum guaranteed time for task history retention before any pruning is attempted.")
  private static final Arg<Amount<Long, Time>> HISTORY_MIN_RETENTION_THRESHOLD =
      Arg.create(Amount.of(1L, Time.HOURS));

  @CmdLine(name = "max_schedule_attempts_per_sec",
      help = "Maximum number of scheduling attempts to make per second.")
  private static final Arg<Double> MAX_SCHEDULE_ATTEMPTS_PER_SEC = Arg.create(20D);

  @CmdLine(name = "flapping_task_threshold",
      help = "A task that repeatedly runs for less than this time is considered to be flapping.")
  private static final Arg<Amount<Long, Time>> FLAPPING_THRESHOLD =
      Arg.create(Amount.of(5L, Time.MINUTES));

  @CmdLine(name = "initial_flapping_task_delay",
      help = "Initial amount of time to wait before attempting to schedule a flapping task.")
  private static final Arg<Amount<Long, Time>> INITIAL_FLAPPING_DELAY =
      Arg.create(Amount.of(30L, Time.SECONDS));

  @CmdLine(name = "max_flapping_task_delay",
      help = "Maximum delay between attempts to schedule a flapping task.")
  private static final Arg<Amount<Long, Time>> MAX_FLAPPING_DELAY =
      Arg.create(Amount.of(5L, Time.MINUTES));

  @CmdLine(name = "max_reschedule_task_delay_on_startup",
      help = "Upper bound of random delay for pending task rescheduling on scheduler startup.")
  private static final Arg<Amount<Integer, Time>> MAX_RESCHEDULING_DELAY =
      Arg.create(Amount.of(30, Time.SECONDS));

  @CmdLine(name = "preemption_delay",
      help = "Time interval after which a pending task becomes eligible to preempt other tasks")
  private static final Arg<Amount<Long, Time>> PREEMPTION_DELAY =
      Arg.create(Amount.of(10L, Time.MINUTES));

  @CmdLine(name = "enable_preemptor",
      help = "Enable the preemptor and preemption")
  private static final Arg<Boolean> ENABLE_PREEMPTOR = Arg.create(true);

  @CmdLine(name = "job_update_history_per_job_threshold",
      help = "Maximum number of completed job updates to retain in a job update history.")
  private static final Arg<Integer> JOB_UPDATE_HISTORY_PER_JOB_THRESHOLD = Arg.create(10);

  @CmdLine(name = "job_update_history_pruning_interval",
      help = "Job update history pruning interval.")
  private static final Arg<Amount<Long, Time>> JOB_UPDATE_HISTORY_PRUNING_INTERVAL =
      Arg.create(Amount.of(15L, Time.MINUTES));

  @CmdLine(name = "job_update_history_pruning_threshold",
      help = "Time after which the scheduler will prune completed job update history.")
  private static final Arg<Amount<Long, Time>> JOB_UPDATE_HISTORY_PRUNING_THRESHOLD =
      Arg.create(Amount.of(30L, Time.DAYS));

  @CmdLine(name = "initial_task_kill_retry_interval",
      help = "When killing a task, retry after this delay if mesos has not responded,"
          + " backing off up to transient_task_state_timeout")
  private static final Arg<Amount<Long, Time>> INITIAL_TASK_KILL_RETRY_INTERVAL =
      Arg.create(Amount.of(5L, Time.SECONDS));

  private static final Preemptor NULL_PREEMPTOR = new Preemptor() {
    @Override
    public Optional<String> findPreemptionSlotFor(
        String taskId,
        AttributeAggregate attributeAggregate) {

      return Optional.absent();
    }
  };

  @CmdLine(name = "offer_reservation_duration", help = "Time to reserve a slave's offers while "
      + "trying to satisfy a task preempting another.")
  private static final Arg<Amount<Long, Time>> RESERVATION_DURATION =
      Arg.create(Amount.of(3L, Time.MINUTES));

  @Qualifier
  @Target({ FIELD, PARAMETER, METHOD }) @Retention(RUNTIME)
  private @interface PreemptionBinding { }

  @VisibleForTesting
  static final Key<Preemptor> PREEMPTOR_KEY = Key.get(Preemptor.class, PreemptionBinding.class);

  @CmdLine(name = "executor_gc_interval",
      help = "Max interval on which to run the GC executor on a host to clean up dead tasks.")
  private static final Arg<Amount<Long, Time>> EXECUTOR_GC_INTERVAL =
      Arg.create(Amount.of(1L, Time.HOURS));

  @CmdLine(name = "gc_executor_path", help = "Path to the gc executor launch script.")
  private static final Arg<String> GC_EXECUTOR_PATH = Arg.create(null);

  @Qualifier
  @Target({ FIELD, PARAMETER, METHOD }) @Retention(RUNTIME)
  private @interface AsyncExecutor { }

  private final boolean enablePreemptor;

  @VisibleForTesting
  AsyncModule(boolean enablePreemptor) {
    this.enablePreemptor = enablePreemptor;
  }

  public AsyncModule() {
    this(ENABLE_PREEMPTOR.get());
  }

  @VisibleForTesting
  static final String TIMEOUT_QUEUE_GAUGE = "timeout_queue_size";

  @VisibleForTesting
  static final String ASYNC_TASKS_GAUGE = "async_tasks_completed";

  @Override
  protected void configure() {
    // Don't worry about clean shutdown, these can be daemon and cleanup-free.
    final ScheduledThreadPoolExecutor executor =
        AsyncUtil.loggingScheduledExecutor(ASYNC_WORKER_THREADS.get(), "AsyncProcessor-%d", LOG);
    bind(ScheduledThreadPoolExecutor.class).annotatedWith(AsyncExecutor.class).toInstance(executor);
    LifecycleModule.bindStartupAction(binder(), RegisterGauges.class);

    // AsyncModule itself is not a subclass of PrivateModule because TaskEventModule internally uses
    // a MultiBinder, which cannot span multiple injectors.
    install(new PrivateModule() {
      @Override
      protected void configure() {
        bind(new TypeLiteral<Amount<Long, Time>>() { })
            .toInstance(TRANSIENT_TASK_STATE_TIMEOUT.get());
        bind(ScheduledExecutorService.class).toInstance(executor);

        bind(TaskTimeout.class).in(Singleton.class);
        expose(TaskTimeout.class);
      }
    });
    PubsubEventModule.bindSubscriber(binder(), TaskTimeout.class);
    SchedulerModule.addSchedulerActiveServiceBinding(binder()).to(TaskTimeout.class);

    install(new PrivateModule() {
      @Override
      protected void configure() {
        bind(TaskGroupsSettings.class).toInstance(new TaskGroupsSettings(
            FIRST_SCHEDULE_DELAY.get(),
            new TruncatedBinaryBackoff(
                INITIAL_SCHEDULE_PENALTY.get(),
                MAX_SCHEDULE_PENALTY.get()),
            RateLimiter.create(MAX_SCHEDULE_ATTEMPTS_PER_SEC.get())));

        bind(RescheduleCalculatorImpl.RescheduleCalculatorSettings.class)
            .toInstance(new RescheduleCalculatorImpl.RescheduleCalculatorSettings(
                new TruncatedBinaryBackoff(INITIAL_FLAPPING_DELAY.get(), MAX_FLAPPING_DELAY.get()),
                FLAPPING_THRESHOLD.get(),
                MAX_RESCHEDULING_DELAY.get()));

        bind(RescheduleCalculator.class).to(RescheduleCalculatorImpl.class).in(Singleton.class);
        expose(RescheduleCalculator.class);
        if (enablePreemptor) {
          bind(PREEMPTOR_KEY).to(PreemptorImpl.class);
          bind(PreemptorImpl.class).in(Singleton.class);
          LOG.info("Preemptor Enabled.");
        } else {
          bind(PREEMPTOR_KEY).toInstance(NULL_PREEMPTOR);
          LOG.warning("Preemptor Disabled.");
        }
        expose(PREEMPTOR_KEY);
        bind(new TypeLiteral<Amount<Long, Time>>() { }).annotatedWith(PreemptionDelay.class)
            .toInstance(PREEMPTION_DELAY.get());
        bind(TaskGroups.class).in(Singleton.class);
        expose(TaskGroups.class);
      }
    });
    bindTaskScheduler(binder(), PREEMPTOR_KEY, RESERVATION_DURATION.get());
    PubsubEventModule.bindSubscriber(binder(), TaskGroups.class);

    install(new PrivateModule() {
      @Override
      protected void configure() {
        bind(OfferReturnDelay.class).toInstance(
            new RandomJitterReturnDelay(
                MIN_OFFER_HOLD_TIME.get().as(Time.MILLISECONDS),
                OFFER_HOLD_JITTER_WINDOW.get().as(Time.MILLISECONDS),
                new Random.SystemRandom(new java.util.Random())));
        bind(ScheduledExecutorService.class).toInstance(executor);
        bind(OfferQueue.class).to(OfferQueueImpl.class);
        bind(OfferQueueImpl.class).in(Singleton.class);
        expose(OfferQueue.class);
      }
    });
    PubsubEventModule.bindSubscriber(binder(), OfferQueue.class);

    install(new PrivateModule() {
      @Override
      protected void configure() {
        // TODO(ksweeney): Create a configuration validator module so this can be injected.
        // TODO(William Farner): Revert this once large task counts is cheap ala hierarchichal store
        bind(HistoryPrunnerSettings.class).toInstance(new HistoryPrunnerSettings(
            HISTORY_PRUNE_THRESHOLD.get(),
            HISTORY_MIN_RETENTION_THRESHOLD.get(),
            HISTORY_MAX_PER_JOB_THRESHOLD.get()
        ));
        bind(ScheduledExecutorService.class).toInstance(executor);

        bind(TaskHistoryPruner.class).in(Singleton.class);
        expose(TaskHistoryPruner.class);
      }
    });
    PubsubEventModule.bindSubscriber(binder(), TaskHistoryPruner.class);

    install(new PrivateModule() {
      @Override
      protected void configure() {
        bind(ScheduledExecutorService.class).toInstance(executor);
        bind(TaskThrottler.class).in(Singleton.class);
        expose(TaskThrottler.class);
      }
    });
    PubsubEventModule.bindSubscriber(binder(), TaskThrottler.class);

    install(new PrivateModule() {
      @Override
      protected void configure() {
        bind(GcExecutorSettings.class).toInstance(new RandomGcExecutorSettings(
            EXECUTOR_GC_INTERVAL.get(),
            Optional.fromNullable(GC_EXECUTOR_PATH.get())));
        bind(Executor.class).toInstance(executor);

        bind(GcExecutorLauncher.class).in(Singleton.class);
        expose(GcExecutorLauncher.class);
      }
    });

    install(new PrivateModule() {
      @Override
      protected void configure() {
        bind(JobUpdateHistoryPruner.HistoryPrunerSettings.class).toInstance(
            new JobUpdateHistoryPruner.HistoryPrunerSettings(
                JOB_UPDATE_HISTORY_PRUNING_INTERVAL.get(),
                JOB_UPDATE_HISTORY_PRUNING_THRESHOLD.get(),
                JOB_UPDATE_HISTORY_PER_JOB_THRESHOLD.get()));

        bind(ScheduledExecutorService.class).toInstance(
            AsyncUtil.singleThreadLoggingScheduledExecutor("JobUpdatePruner-%d", LOG));

        bind(JobUpdateHistoryPruner.class).in(Singleton.class);
        expose(JobUpdateHistoryPruner.class);
      }
    });
    LifecycleModule.bindStartupAction(binder(), JobUpdateHistoryPruner.class);

    install(new PrivateModule() {
      @Override
      protected void configure() {
        bind(ScheduledExecutorService.class).toInstance(executor);
        bind(BackoffStrategy.class).toInstance(
            new TruncatedBinaryBackoff(
                INITIAL_TASK_KILL_RETRY_INTERVAL.get(),
                TRANSIENT_TASK_STATE_TIMEOUT.get()));
        bind(KillRetry.class).in(Singleton.class);
        expose(KillRetry.class);
      }
    });
    PubsubEventModule.bindSubscriber(binder(), KillRetry.class);
  }

  /**
   * This method exists because we want to test the wiring up of TaskSchedulerImpl class to the
   * PubSub system in the TaskSchedulerImplTest class. The method has a complex signature because
   * the binding of the TaskScheduler and friends occurs in a PrivateModule which does not interact
   * well with the MultiBinder that backs the PubSub system.
   */
  @VisibleForTesting
  static void bindTaskScheduler(
      Binder binder,
      final Key<Preemptor> preemptorKey,
      final Amount<Long, Time> reservationDuration) {

    binder.install(new PrivateModule() {
      @Override
      protected void configure() {
        bind(Preemptor.class).to(preemptorKey);
        bind(new TypeLiteral<Amount<Long, Time>>() { }).annotatedWith(ReservationDuration.class)
            .toInstance(reservationDuration);
        bind(TaskScheduler.class).to(TaskSchedulerImpl.class);
        bind(TaskSchedulerImpl.class).in(Singleton.class);
        expose(TaskScheduler.class);
      }
    });
    PubsubEventModule.bindSubscriber(binder, TaskScheduler.class);
  }

  static class RegisterGauges implements Command {
    private final StatsProvider statsProvider;
    private final ScheduledThreadPoolExecutor executor;

    @Inject
    RegisterGauges(
        StatsProvider statsProvider,
        @AsyncExecutor ScheduledThreadPoolExecutor executor) {

      this.statsProvider = requireNonNull(statsProvider);
      this.executor = requireNonNull(executor);
    }

    @Override
    public void execute() throws RuntimeException {
      statsProvider.makeGauge(
          TIMEOUT_QUEUE_GAUGE,
          new Supplier<Integer>() {
            @Override
            public Integer get() {
              return executor.getQueue().size();
            }
          });
      statsProvider.makeGauge(
          ASYNC_TASKS_GAUGE,
          new Supplier<Long>() {
            @Override
            public Long get() {
              return executor.getCompletedTaskCount();
            }
          }
      );
    }
  }
}
TOP

Related Classes of org.apache.aurora.scheduler.async.AsyncModule$RegisterGauges

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.