Package co.cask.cdap.data.runtime.main

Source Code of co.cask.cdap.data.runtime.main.MasterServiceMain

/*
* Copyright © 2014 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package co.cask.cdap.data.runtime.main;

import co.cask.cdap.app.guice.AppFabricServiceRuntimeModule;
import co.cask.cdap.app.guice.ProgramRunnerRuntimeModule;
import co.cask.cdap.app.guice.ServiceStoreModules;
import co.cask.cdap.app.store.ServiceStore;
import co.cask.cdap.common.conf.CConfiguration;
import co.cask.cdap.common.conf.Constants;
import co.cask.cdap.common.guice.ConfigModule;
import co.cask.cdap.common.guice.DiscoveryRuntimeModule;
import co.cask.cdap.common.guice.IOModule;
import co.cask.cdap.common.guice.KafkaClientModule;
import co.cask.cdap.common.guice.LocationRuntimeModule;
import co.cask.cdap.common.guice.TwillModule;
import co.cask.cdap.common.guice.ZKClientModule;
import co.cask.cdap.common.metrics.MetricsCollectionService;
import co.cask.cdap.common.runtime.DaemonMain;
import co.cask.cdap.data.runtime.DataFabricModules;
import co.cask.cdap.data.runtime.DataSetServiceModules;
import co.cask.cdap.data.runtime.DataSetsModules;
import co.cask.cdap.data.security.HBaseSecureStoreUpdater;
import co.cask.cdap.data2.datafabric.dataset.service.DatasetService;
import co.cask.cdap.data2.util.hbase.ConfigurationTable;
import co.cask.cdap.data2.util.hbase.HBaseTableUtilFactory;
import co.cask.cdap.explore.client.ExploreClient;
import co.cask.cdap.explore.guice.ExploreClientModule;
import co.cask.cdap.explore.service.ExploreServiceUtils;
import co.cask.cdap.gateway.auth.AuthModule;
import co.cask.cdap.internal.app.services.AppFabricServer;
import co.cask.cdap.logging.appender.LogAppenderInitializer;
import co.cask.cdap.logging.guice.LoggingModules;
import co.cask.cdap.metrics.guice.MetricsClientRuntimeModule;
import com.google.common.base.Charsets;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import com.google.common.io.Files;
import com.google.common.util.concurrent.MoreExecutors;
import com.google.common.util.concurrent.Service;
import com.google.inject.Guice;
import com.google.inject.Injector;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.security.User;
import org.apache.twill.api.ElectionHandler;
import org.apache.twill.api.TwillApplication;
import org.apache.twill.api.TwillController;
import org.apache.twill.api.TwillPreparer;
import org.apache.twill.api.TwillRunner;
import org.apache.twill.api.TwillRunnerService;
import org.apache.twill.api.logging.PrinterLogHandler;
import org.apache.twill.common.ServiceListenerAdapter;
import org.apache.twill.common.Services;
import org.apache.twill.internal.zookeeper.LeaderElection;
import org.apache.twill.kafka.client.KafkaClientService;
import org.apache.twill.zookeeper.ZKClientService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Writer;
import java.net.InetAddress;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.UnknownHostException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;

/**
* Driver class for starting all master services.
* AppFabricHttpService
* TwillRunnables: MetricsProcessor, MetricsHttp, LogSaver, TransactionService, StreamHandler.
*/
public class MasterServiceMain extends DaemonMain {
  private static final Logger LOG = LoggerFactory.getLogger(MasterServiceMain.class);

  private static final long MAX_BACKOFF_TIME_MS = TimeUnit.MILLISECONDS.convert(10, TimeUnit.MINUTES);
  private static final long SUCCESSFUL_RUN_DURATON_MS = TimeUnit.MILLISECONDS.convert(20, TimeUnit.MINUTES);

  public MasterServiceMain(CConfiguration cConf, Configuration hConf) {
    this.cConf = cConf;
    this.hConf = hConf;
  }
  private boolean stopFlag = false;

  protected final CConfiguration cConf;
  protected final Configuration hConf;

  private final AtomicBoolean isLeader = new AtomicBoolean(false);

  private Injector baseInjector;
  private ZKClientService zkClientService;
  private LeaderElection leaderElection;
  private volatile TwillRunnerService twillRunnerService;
  private volatile TwillController twillController;
  private AppFabricServer appFabricServer;
  private KafkaClientService kafkaClientService;
  private MetricsCollectionService metricsCollectionService;
  private DatasetService dsService;
  private ServiceStore serviceStore;
  private HBaseSecureStoreUpdater secureStoreUpdater;
  private ExploreClient exploreClient;

  private String serviceName;
  private TwillApplication twillApplication;
  private long lastRunTimeMs = System.currentTimeMillis();
  private int currentRun = 0;
  private boolean isExploreEnabled;

  public static void main(final String[] args) throws Exception {
    LOG.info("Starting {}", MasterServiceMain.class.getSimpleName());
    new MasterServiceMain(CConfiguration.create(), HBaseConfiguration.create()).doMain(args);
  }

  @Override
  public void init(String[] args) {
    isExploreEnabled = cConf.getBoolean(Constants.Explore.EXPLORE_ENABLED);
    serviceName = Constants.Service.MASTER_SERVICES;
    cConf.set(Constants.Dataset.Manager.ADDRESS, getLocalHost().getCanonicalHostName());

    baseInjector = Guice.createInjector(
      new ConfigModule(cConf, hConf),
      new ZKClientModule(),
      new LocationRuntimeModule().getDistributedModules(),
      new LoggingModules().getDistributedModules(),
      new IOModule(),
      new AuthModule(),
      new KafkaClientModule(),
      new TwillModule(),
      new DiscoveryRuntimeModule().getDistributedModules(),
      new AppFabricServiceRuntimeModule().getDistributedModules(),
      new ProgramRunnerRuntimeModule().getDistributedModules(),
      new DataSetServiceModules().getDistributedModule(),
      new DataFabricModules().getDistributedModules(),
      new DataSetsModules().getDistributedModule(),
      new MetricsClientRuntimeModule().getDistributedModules(),
      new ServiceStoreModules().getDistributedModule(),
      new ExploreClientModule()
    );

    // Initialize ZK client
    zkClientService = baseInjector.getInstance(ZKClientService.class);
    kafkaClientService = baseInjector.getInstance(KafkaClientService.class);
    metricsCollectionService = baseInjector.getInstance(MetricsCollectionService.class);
    dsService = baseInjector.getInstance(DatasetService.class);
    serviceStore = baseInjector.getInstance(ServiceStore.class);
    exploreClient = baseInjector.getInstance(ExploreClient.class);

    secureStoreUpdater = baseInjector.getInstance(HBaseSecureStoreUpdater.class);

    checkTransactionRequirements();
    checkExploreRequirements();
  }

  /**
   * The transaction coprocessors (0.94 and 0.96 versions of {@code DefaultTransactionProcessor}) need access
   * to CConfiguration values in order to load transaction snapshots for data cleanup.
   */
  private void checkTransactionRequirements() {
    try {
      new ConfigurationTable(hConf).write(ConfigurationTable.Type.DEFAULT, cConf);
    } catch (IOException ioe) {
      throw Throwables.propagate(ioe);
    }
  }

  /**
   * Check that if Explore is enabled, the correct jars are present on master node,
   * and that the distribution of Hive is supported.
   */
  private void checkExploreRequirements() {
    if (!isExploreEnabled) {
      return;
    }

    // This checking will throw an exception if Hive is not present or if its distribution is unsupported
    ExploreServiceUtils.checkHiveSupportWithSecurity(hConf);
  }

  @Override
  public void start() {
    LogAppenderInitializer logAppenderInitializer = baseInjector.getInstance(LogAppenderInitializer.class);
    logAppenderInitializer.initialize();

    Services.chainStart(zkClientService, kafkaClientService, metricsCollectionService);

    leaderElection = new LeaderElection(zkClientService, "/election/" + serviceName, new ElectionHandler() {
      @Override
      public void leader() {
        Map<String, Integer> instanceCount = getSystemServiceInstances();
        twillApplication = createTwillApplication(instanceCount);
        if (twillApplication == null) {
          throw new IllegalArgumentException("TwillApplication cannot be null");
        }

        LOG.info("Became leader");
        Injector injector = baseInjector.createChildInjector();

        twillRunnerService = injector.getInstance(TwillRunnerService.class);
        twillRunnerService.startAndWait();
        // app fabric uses twillRunnerService for reporting some AM container metrics and getting live-info for apps,
        // make sure its started after twill runner is started.
        appFabricServer = injector.getInstance(AppFabricServer.class);
        appFabricServer.startAndWait();
        scheduleSecureStoreUpdate(twillRunnerService);
        runTwillApps();
        isLeader.set(true);
      }

      @Override
      public void follower() {
        LOG.info("Became follower");

        dsService.stopAndWait();
        if (twillRunnerService != null) {
          twillRunnerService.stopAndWait();
        }
        if (appFabricServer != null) {
          appFabricServer.stopAndWait();
        }
        isLeader.set(false);
      }
    });
    leaderElection.start();
  }

  @Override
  public void stop() {
    LOG.info("Stopping {}", serviceName);
    stopFlag = true;

    dsService.stopAndWait();
    if (isLeader.get() && twillController != null) {
      twillController.stopAndWait();
    }

    if (leaderElection != null) {
      leaderElection.stopAndWait();
    }
    Services.chainStop(metricsCollectionService, kafkaClientService, zkClientService);

    try {
      exploreClient.close();
    } catch (IOException e) {
      LOG.error("Could not close Explore client", e);
      throw Throwables.propagate(e);
    }
  }

  @Override
  public void destroy() {
  }

  private InetAddress getLocalHost() {
    try {
      return InetAddress.getLocalHost();
    } catch (UnknownHostException e) {
      LOG.error("Error obtaining localhost address", e);
      throw Throwables.propagate(e);
    }
  }

  /**
   * Returns a map from system service name to a map from property to configuration key.
   */
  private Map<String, Map<String, String>> getConfigKeys() {
    Map<String, Map<String, String>> configKeys = Maps.newHashMap();

    configKeys.put(Constants.Service.LOGSAVER,
                   ImmutableMap.of("default", Constants.LogSaver.NUM_INSTANCES,
                                   "max", Constants.LogSaver.MAX_INSTANCES));
    configKeys.put(Constants.Service.TRANSACTION,
                   ImmutableMap.of("default", Constants.Transaction.Container.NUM_INSTANCES,
                                   "max", Constants.Transaction.Container.MAX_INSTANCES));
    configKeys.put(Constants.Service.METRICS_PROCESSOR,
                   ImmutableMap.of("default", Constants.MetricsProcessor.NUM_INSTANCES,
                                   "max", Constants.MetricsProcessor.MAX_INSTANCES));
    configKeys.put(Constants.Service.METRICS,
                   ImmutableMap.of("default", Constants.Metrics.NUM_INSTANCES,
                                   "max", Constants.Metrics.MAX_INSTANCES));
    configKeys.put(Constants.Service.STREAMS,
                   ImmutableMap.of("default", Constants.Stream.CONTAINER_INSTANCES,
                                   "max", Constants.Stream.MAX_INSTANCES));
    configKeys.put(Constants.Service.DATASET_EXECUTOR,
                   ImmutableMap.of("default", Constants.Dataset.Executor.CONTAINER_INSTANCES,
                                   "max", Constants.Dataset.Executor.MAX_INSTANCES));
    configKeys.put(Constants.Service.EXPLORE_HTTP_USER_SERVICE,
                   ImmutableMap.of("default", Constants.Explore.CONTAINER_INSTANCES,
                                   "max", Constants.Explore.MAX_INSTANCES));
    return configKeys;
  }

  private Map<String, Integer> getSystemServiceInstances() {
    Map<String, Integer> instanceCountMap = new HashMap<String, Integer>();
    for (Map.Entry<String, Map<String, String>> entry : getConfigKeys().entrySet()) {
      String service = entry.getKey();
      Map<String, String> configKeys = entry.getValue();
      try {
        int maxCount = cConf.getInt(configKeys.get("max"));

        Integer savedCount = serviceStore.getServiceInstance(service);
        if (savedCount == null || savedCount == 0) {
          savedCount = Math.min(maxCount, cConf.getInt(configKeys.get("default")));
        } else {
          // If the max value is smaller than the saved instance count, update the store to the max value.
          if (savedCount > maxCount) {
            savedCount = maxCount;
          }
        }

        serviceStore.setServiceInstance(service, savedCount);
        instanceCountMap.put(service, savedCount);
        LOG.info("Setting instance count of {} Service to {}", service, savedCount);
      } catch (Exception e) {
        LOG.error("Couldn't retrieve instance count {}: {}", service, e.getMessage(), e);
      }
    }
    return instanceCountMap;
  }

  private TwillApplication createTwillApplication(final Map<String, Integer> instanceCountMap) {
    try {
      return new MasterTwillApplication(cConf, getSavedCConf(), getSavedHConf(), isExploreEnabled, instanceCountMap);
    } catch (Exception e) {
      throw  Throwables.propagate(e);
    }
  }

  private void scheduleSecureStoreUpdate(TwillRunner twillRunner) {
    if (User.isHBaseSecurityEnabled(hConf)) {
      twillRunner.scheduleSecureStoreUpdate(secureStoreUpdater, 30000L, secureStoreUpdater.getUpdateInterval(),
                                            TimeUnit.MILLISECONDS);
    }
  }


  private TwillPreparer prepare(TwillPreparer preparer) {
    return preparer.withDependencies(new HBaseTableUtilFactory().get().getClass())
      .addSecureStore(secureStoreUpdater.update(null, null)); // HBaseSecureStoreUpdater.update() ignores parameters
  }

  private void runTwillApps() {
    // If service is already running, return handle to that instance
    Iterable<TwillController> twillControllers = lookupService();
    Iterator<TwillController> iterator = twillControllers.iterator();

    if (iterator.hasNext()) {
      LOG.info("{} application is already running", serviceName);
      twillController = iterator.next();

      if (iterator.hasNext()) {
        LOG.warn("Found more than one instance of {} running; stopping the others", serviceName);
        for (; iterator.hasNext(); ) {
          TwillController controller = iterator.next();
          LOG.warn("Stopping one extra instance of {}", serviceName);
          controller.stopAndWait();
        }
        LOG.warn("Stopped extra instances of {}", serviceName);
      }
    } else {
      LOG.info("Starting {} application", serviceName);
      TwillPreparer twillPreparer = getPreparer();
      twillController = twillPreparer.start();

      twillController.addListener(new ServiceListenerAdapter() {

        @Override
        public void running() {
          if (!dsService.isRunning()) {
            LOG.info("Starting Dataset service");
            dsService.startAndWait();
          }
        }

        @Override
        public void failed(Service.State from, Throwable failure) {
          LOG.error("{} failed with exception; restarting with back-off", serviceName, failure);
          backOffRun();
        }

        @Override
        public void terminated(Service.State from) {
          LOG.warn("{} was terminated; restarting with back-off", serviceName);
          backOffRun();
        }
      }, MoreExecutors.sameThreadExecutor());
    }
  }

  private File getSavedHConf() throws IOException {
    File hConfFile = saveHConf(hConf, File.createTempFile("hConf", ".xml"));
    hConfFile.deleteOnExit();
    return hConfFile;
  }

  private File getSavedCConf() throws IOException {
    File cConfFile = saveCConf(cConf, File.createTempFile("cConf", ".xml"));
    cConfFile.deleteOnExit();
    return cConfFile;
  }

  /**
   * Wait for sometime while looking up service in twill.
   */
  private Iterable<TwillController> lookupService() {
    int count = 100;
    Iterable<TwillController> iterable = twillRunnerService.lookup(serviceName);

    try {

      for (int i = 0; i < count; ++i) {
        if (iterable.iterator().hasNext()) {
          return iterable;
        }

        TimeUnit.MILLISECONDS.sleep(20);
      }
    } catch (InterruptedException e) {
      LOG.warn("Caught interrupted exception", e);
      Thread.currentThread().interrupt();
    }

    return iterable;
  }

  /**
   * Prepare the specs of the twill application for the Explore twill runnable.
   * Add jars needed by the Explore module in the classpath of the containers, and
   * add conf files (hive_site.xml, etc) as resources available for the Explore twill
   * runnable.
   */
  private TwillPreparer prepareExploreContainer(TwillPreparer preparer) {
    if (!isExploreEnabled) {
      return preparer;
    }

    try {
      // Put jars needed by Hive in the containers classpath. Those jars are localized in the Explore
      // container by MasterTwillApplication, so they are available for ExploreServiceTwillRunnable
      Set<File> jars = ExploreServiceUtils.traceExploreDependencies();
      for (File jarFile : jars) {
        LOG.trace("Adding jar file to classpath: {}", jarFile.getName());
        preparer = preparer.withClassPaths(jarFile.getName());
      }
    } catch (IOException e) {
      throw new RuntimeException("Unable to trace Explore dependencies", e);
    }

    // EXPLORE_CONF_FILES will be defined in startup scripts if Hive is installed.
    String hiveConfFiles = System.getProperty(Constants.Explore.EXPLORE_CONF_FILES);
    LOG.debug("Hive conf files = {}", hiveConfFiles);
    if (hiveConfFiles == null) {
      throw new RuntimeException("System property " + Constants.Explore.EXPLORE_CONF_FILES + " is not set");
    }

    // Add all the conf files needed by hive as resources available to containers
    Iterable<File> hiveConfFilesFiles = ExploreServiceUtils.getClassPathJarsFiles(hiveConfFiles);
    for (File file : hiveConfFilesFiles) {
      if (file.getName().matches(".*\\.xml")) {
        preparer = preparer.withResources(ExploreServiceUtils.hijackHiveConfFile(file).toURI());
      }
    }

    return preparer;
  }

  private TwillPreparer getPreparer() {
    TwillPreparer preparer = twillRunnerService.prepare(twillApplication)
      .addLogHandler(new PrinterLogHandler(new PrintWriter(System.out)));

    // Add system logback file to the preparer
    URL logbackUrl = getClass().getResource("/logback.xml");
    if (logbackUrl == null) {
      LOG.warn("Cannot find logback.xml to pass onto Twill Runnables!");
    } else {
      try {
        preparer.withResources(logbackUrl.toURI());
      } catch (URISyntaxException e) {
        LOG.error("Got exception while getting URI for logback.xml - {}", logbackUrl);
      }
    }
    preparer = prepareExploreContainer(preparer);
    return prepare(preparer);
  }

  private void backOffRun() {
    if (stopFlag) {
      LOG.warn("Not starting a new run when stopFlag is true");
      return;
    }

    if (System.currentTimeMillis() - lastRunTimeMs > SUCCESSFUL_RUN_DURATON_MS) {
      currentRun = 0;
    }

    try {

      long sleepMs = Math.min(500 * (long) Math.pow(2, currentRun), MAX_BACKOFF_TIME_MS);
      LOG.info("Current restart run = {}; backing off for {} ms", currentRun, sleepMs);
      TimeUnit.MILLISECONDS.sleep(sleepMs);

    } catch (InterruptedException e) {
      LOG.warn("Caught interrupted exception", e);
      Thread.currentThread().interrupt();
    }

    runTwillApps();
    ++currentRun;
    lastRunTimeMs = System.currentTimeMillis();
  }

  private static File saveHConf(Configuration conf, File file) throws IOException {
    Writer writer = Files.newWriter(file, Charsets.UTF_8);
    try {
      conf.writeXml(writer);
    } finally {
      writer.close();
    }
    return file;
  }

  private File saveCConf(CConfiguration conf, File file) throws IOException {
    Writer writer = Files.newWriter(file, Charsets.UTF_8);
    try {
      conf.writeXml(writer);
    } finally {
      writer.close();
    }
    return file;
  }
}
TOP

Related Classes of co.cask.cdap.data.runtime.main.MasterServiceMain

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.