Package org.apache.helix.manager.zk

Source Code of org.apache.helix.manager.zk.ZKHelixManager

package org.apache.helix.manager.zk;

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/

import static org.apache.helix.HelixConstants.ChangeType.CONFIG;
import static org.apache.helix.HelixConstants.ChangeType.CURRENT_STATE;
import static org.apache.helix.HelixConstants.ChangeType.EXTERNAL_VIEW;
import static org.apache.helix.HelixConstants.ChangeType.HEALTH;
import static org.apache.helix.HelixConstants.ChangeType.IDEAL_STATE;
import static org.apache.helix.HelixConstants.ChangeType.LIVE_INSTANCE;
import static org.apache.helix.HelixConstants.ChangeType.MESSAGE;
import static org.apache.helix.HelixConstants.ChangeType.MESSAGES_CONTROLLER;

import java.lang.management.ManagementFactory;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Timer;
import java.util.concurrent.TimeUnit;

import org.I0Itec.zkclient.ZkConnection;
import org.apache.helix.BaseDataAccessor;
import org.apache.helix.ClusterMessagingService;
import org.apache.helix.ConfigAccessor;
import org.apache.helix.ConfigChangeListener;
import org.apache.helix.ConfigScope.ConfigScopeProperty;
import org.apache.helix.ControllerChangeListener;
import org.apache.helix.CurrentStateChangeListener;
import org.apache.helix.ExternalViewChangeListener;
import org.apache.helix.HealthStateChangeListener;
import org.apache.helix.HelixAdmin;
import org.apache.helix.HelixConstants.ChangeType;
import org.apache.helix.HelixDataAccessor;
import org.apache.helix.HelixException;
import org.apache.helix.HelixManager;
import org.apache.helix.HelixTimerTask;
import org.apache.helix.IdealStateChangeListener;
import org.apache.helix.InstanceType;
import org.apache.helix.LiveInstanceChangeListener;
import org.apache.helix.MessageListener;
import org.apache.helix.PreConnectCallback;
import org.apache.helix.PropertyKey.Builder;
import org.apache.helix.PropertyPathConfig;
import org.apache.helix.PropertyType;
import org.apache.helix.ZNRecord;
import org.apache.helix.controller.restlet.ZKPropertyTransferServer;
import org.apache.helix.healthcheck.HealthStatsAggregationTask;
import org.apache.helix.healthcheck.ParticipantHealthReportCollector;
import org.apache.helix.healthcheck.ParticipantHealthReportCollectorImpl;
import org.apache.helix.messaging.DefaultMessagingService;
import org.apache.helix.messaging.handling.MessageHandlerFactory;
import org.apache.helix.model.CurrentState;
import org.apache.helix.model.LiveInstance;
import org.apache.helix.model.Message.MessageType;
import org.apache.helix.model.StateModelDefinition;
import org.apache.helix.monitoring.ZKPathDataDumpTask;
import org.apache.helix.participant.DistClusterControllerElection;
import org.apache.helix.participant.HelixStateMachineEngine;
import org.apache.helix.participant.StateMachineEngine;
import org.apache.helix.store.ZNRecordJsonSerializer;
import org.apache.helix.store.zk.ZkHelixPropertyStore;
import org.apache.helix.tools.PropertiesReader;
import org.apache.log4j.Logger;
import org.apache.zookeeper.Watcher.Event.EventType;
import org.apache.zookeeper.Watcher.Event.KeeperState;


public class ZKHelixManager implements HelixManager
{
  private static Logger                        logger                  =
                                                                           Logger.getLogger(ZKHelixManager.class);
  private static final int                     RETRY_LIMIT             = 3;
  private static final int                     CONNECTIONTIMEOUT       = 60 * 1000;
  private final String                         _clusterName;
  private final String                         _instanceName;
  private final String                         _zkConnectString;
  private static final int                     DEFAULT_SESSION_TIMEOUT = 30 * 1000;
  private ZKHelixDataAccessor                  _helixAccessor;
  private ConfigAccessor                       _configAccessor;
  protected ZkClient                           _zkClient;
  private final List<CallbackHandler>          _handlers;
  private final ZkStateChangeListener          _zkStateChangeListener;
  private final InstanceType                   _instanceType;
  volatile String                              _sessionId;
  private Timer                                _timer;
  private CallbackHandler                      _leaderElectionHandler;
  private ParticipantHealthReportCollectorImpl _participantHealthCheckInfoCollector;
  private final DefaultMessagingService        _messagingService;
  private ZKHelixAdmin                         _managementTool;
  private final String                         _version;
  private final StateMachineEngine             _stateMachEngine;
  private int                                  _sessionTimeout;
  private ZkHelixPropertyStore<ZNRecord>       _helixPropertyStore;
  private final List<HelixTimerTask>           _controllerTimerTasks;
  private BaseDataAccessor<ZNRecord>           _baseDataAccessor;
  List<PreConnectCallback>                     _preConnectCallbacks    =
                                                                           new LinkedList<PreConnectCallback>();
  ZKPropertyTransferServer                     _transferServer         = null;

  public ZKHelixManager(String clusterName,
                        String instanceName,
                        InstanceType instanceType,
                        String zkConnectString) throws Exception
  {
    logger.info("Create a zk-based cluster manager. clusterName:" + clusterName
        + ", instanceName:" + instanceName + ", type:" + instanceType + ", zkSvr:"
        + zkConnectString);
    int sessionTimeoutInt = -1;
    try
    {
      sessionTimeoutInt =
          Integer.parseInt(System.getProperty("zk.session.timeout", ""
              + DEFAULT_SESSION_TIMEOUT));
    }
    catch (NumberFormatException e)
    {
      logger.warn("Exception while parsing session timeout: "
          + System.getProperty("zk.session.timeout", "" + DEFAULT_SESSION_TIMEOUT));
    }
    if (sessionTimeoutInt > 0)
    {
      _sessionTimeout = sessionTimeoutInt;
    }
    else
    {
      _sessionTimeout = DEFAULT_SESSION_TIMEOUT;
    }
    if (instanceName == null)
    {
      try
      {
        instanceName =
            InetAddress.getLocalHost().getCanonicalHostName() + "-"
                + instanceType.toString();
      }
      catch (UnknownHostException e)
      {
        // can ignore it
        logger.info("Unable to get host name. Will set it to UNKNOWN, mostly ignorable",
                    e);
        instanceName = "UNKNOWN";
      }
    }

    _clusterName = clusterName;
    _instanceName = instanceName;
    _instanceType = instanceType;
    _zkConnectString = zkConnectString;
    _zkStateChangeListener = new ZkStateChangeListener(this);
    _timer = null;

    _handlers = new ArrayList<CallbackHandler>();

    _messagingService = new DefaultMessagingService(this);

    _version =
        new PropertiesReader("cluster-manager-version.properties").getProperty("clustermanager.version");

    _stateMachEngine = new HelixStateMachineEngine(this);

    // add all timer tasks
    _controllerTimerTasks = new ArrayList<HelixTimerTask>();
    if (_instanceType == InstanceType.CONTROLLER)
    {
      _controllerTimerTasks.add(new HealthStatsAggregationTask(this));
    }
  }

  private boolean isInstanceSetup()
  {
    if (_instanceType == InstanceType.PARTICIPANT
        || _instanceType == InstanceType.CONTROLLER_PARTICIPANT)
    {
      boolean isValid =
          _zkClient.exists(PropertyPathConfig.getPath(PropertyType.CONFIGS,
                                                      _clusterName,
                                                      ConfigScopeProperty.PARTICIPANT.toString(),
                                                      _instanceName))
              && _zkClient.exists(PropertyPathConfig.getPath(PropertyType.MESSAGES,
                                                             _clusterName,
                                                             _instanceName))
              && _zkClient.exists(PropertyPathConfig.getPath(PropertyType.CURRENTSTATES,
                                                             _clusterName,
                                                             _instanceName))
              && _zkClient.exists(PropertyPathConfig.getPath(PropertyType.STATUSUPDATES,
                                                             _clusterName,
                                                             _instanceName))
              && _zkClient.exists(PropertyPathConfig.getPath(PropertyType.ERRORS,
                                                             _clusterName,
                                                             _instanceName));

      return isValid;
    }
    return true;
  }

  @Override
  public void addIdealStateChangeListener(final IdealStateChangeListener listener) throws Exception
  {
    logger.info("ClusterManager.addIdealStateChangeListener()");
    checkConnected();
    final String path =
        PropertyPathConfig.getPath(PropertyType.IDEALSTATES, _clusterName);
    CallbackHandler callbackHandler =
        createCallBackHandler(path,
                              listener,
                              new EventType[] { EventType.NodeDataChanged,
                                  EventType.NodeDeleted, EventType.NodeCreated },
                              IDEAL_STATE);
    addListener(callbackHandler);
  }

  @Override
  public void addLiveInstanceChangeListener(LiveInstanceChangeListener listener) throws Exception
  {
    logger.info("ClusterManager.addLiveInstanceChangeListener()");
    checkConnected();
    final String path = _helixAccessor.keyBuilder().liveInstances().getPath();
    CallbackHandler callbackHandler =
        createCallBackHandler(path,
                              listener,
                              new EventType[] { EventType.NodeDataChanged, EventType.NodeChildrenChanged,
                                  EventType.NodeDeleted, EventType.NodeCreated },
                              LIVE_INSTANCE);
    addListener(callbackHandler);
  }

  @Override
  public void addConfigChangeListener(ConfigChangeListener listener)
  {
    logger.info("ClusterManager.addConfigChangeListener()");
    checkConnected();
    final String path =
        PropertyPathConfig.getPath(PropertyType.CONFIGS,
                                   _clusterName,
                                   ConfigScopeProperty.PARTICIPANT.toString());

    CallbackHandler callbackHandler =
        createCallBackHandler(path,
                              listener,
                              new EventType[] { EventType.NodeChildrenChanged },
                              CONFIG);
    addListener(callbackHandler);

  }

  // TODO: Decide if do we still need this since we are exposing
  // ClusterMessagingService
  @Override
  public void addMessageListener(MessageListener listener, String instanceName)
  {
    logger.info("ClusterManager.addMessageListener() " + instanceName);
    checkConnected();
    final String path = _helixAccessor.keyBuilder().messages(instanceName).getPath();
    CallbackHandler callbackHandler =
        createCallBackHandler(path,
                              listener,
                              new EventType[] { EventType.NodeChildrenChanged,
                                  EventType.NodeDeleted, EventType.NodeCreated },
                              MESSAGE);
    addListener(callbackHandler);
  }

  void addControllerMessageListener(MessageListener listener)
  {
    logger.info("ClusterManager.addControllerMessageListener()");
    checkConnected();
    final String path = _helixAccessor.keyBuilder().controllerMessages().getPath();

    CallbackHandler callbackHandler =
        createCallBackHandler(path,
                              listener,
                              new EventType[] { EventType.NodeChildrenChanged,
                                  EventType.NodeDeleted, EventType.NodeCreated },
                              MESSAGES_CONTROLLER);
    addListener(callbackHandler);
  }

  @Override
  public void addCurrentStateChangeListener(CurrentStateChangeListener listener,
                                            String instanceName,
                                            String sessionId)
  {
    logger.info("ClusterManager.addCurrentStateChangeListener() " + instanceName + " "
        + sessionId);
    checkConnected();
    final String path =
        _helixAccessor.keyBuilder().currentStates(instanceName, sessionId).getPath();

    CallbackHandler callbackHandler =
        createCallBackHandler(path,
                              listener,
                              new EventType[] { EventType.NodeChildrenChanged,
                                  EventType.NodeDeleted, EventType.NodeCreated },
                              CURRENT_STATE);
    addListener(callbackHandler);
  }

  @Override
  public void addHealthStateChangeListener(HealthStateChangeListener listener,
                                           String instanceName)
  {
    // System.out.println("ZKClusterManager.addHealthStateChangeListener()");
    // TODO: re-form this for stats checking
    logger.info("ClusterManager.addHealthStateChangeListener()" + instanceName);
    checkConnected();
    final String path = _helixAccessor.keyBuilder().healthReports(instanceName).getPath();

    CallbackHandler callbackHandler =
        createCallBackHandler(path, listener, new EventType[] {
            EventType.NodeChildrenChanged, EventType.NodeDataChanged,
            EventType.NodeDeleted, EventType.NodeCreated }, HEALTH);
    addListener(callbackHandler);
  }

  @Override
  public void addExternalViewChangeListener(ExternalViewChangeListener listener)
  {
    logger.info("ClusterManager.addExternalViewChangeListener()");
    checkConnected();
    final String path = _helixAccessor.keyBuilder().externalViews().getPath();

    CallbackHandler callbackHandler =
        createCallBackHandler(path,
                              listener,
                              new EventType[] { EventType.NodeDataChanged,
                                  EventType.NodeDeleted, EventType.NodeCreated },
                              EXTERNAL_VIEW);
    addListener(callbackHandler);
  }

  @Override
  public HelixDataAccessor getHelixDataAccessor()
  {
    checkConnected();
    return _helixAccessor;
  }

  @Override
  public ConfigAccessor getConfigAccessor()
  {
    checkConnected();
    return _configAccessor;
  }

  @Override
  public String getClusterName()
  {
    return _clusterName;
  }

  @Override
  public String getInstanceName()
  {
    return _instanceName;
  }

  @Override
  public void connect() throws Exception
  {
    logger.info("ClusterManager.connect()");
    if (_zkStateChangeListener.isConnected())
    {
      logger.warn("Cluster manager " + _clusterName + " " + _instanceName
          + " already connected");
      return;
    }

    try
    {
      createClient(_zkConnectString);
      _messagingService.onConnected();
    }
    catch (Exception e)
    {
      logger.error(e);
      disconnect();
      throw e;
    }
  }

  @Override
  public void disconnect()
  {

    if (!isConnected())
    {
      logger.warn("ClusterManager " + _instanceName + " already disconnected");
      return;
    }

    logger.info("disconnect " + _instanceName + "(" + _instanceType + ") from "
        + _clusterName);

    /**
     * shutdown thread pool first to avoid reset() being invoked in the middle of state
     * transition
     */
    _messagingService.getExecutor().shutDown();
    resetHandlers();

    _helixAccessor.shutdown();

    if (_leaderElectionHandler != null)
    {
      _leaderElectionHandler.reset();
    }

    if (_participantHealthCheckInfoCollector != null)
    {
      _participantHealthCheckInfoCollector.stop();
    }

    if (_timer != null)
    {
      _timer.cancel();
      _timer = null;
    }

    if (_instanceType == InstanceType.CONTROLLER)
    {
      stopTimerTasks();
    }

    // unsubscribe accessor from controllerChange
    _zkClient.unsubscribeAll();

    _zkClient.close();

    // HACK seems that zkClient is not sending DISCONNECT event
    _zkStateChangeListener.disconnect();
    logger.info("Cluster manager: " + _instanceName + " disconnected");
  }

  @Override
  public String getSessionId()
  {
    checkConnected();
    return _sessionId;
  }

  @Override
  public boolean isConnected()
  {
    return _zkStateChangeListener.isConnected();
  }

  @Override
  public long getLastNotificationTime()
  {
    return -1;
  }

  @Override
  public void addControllerListener(ControllerChangeListener listener)
  {
    checkConnected();
    final String path = _helixAccessor.keyBuilder().controller().getPath();
    logger.info("Add controller listener at: " + path);
    CallbackHandler callbackHandler =
        createCallBackHandler(path,
                              listener,
                              new EventType[] { EventType.NodeChildrenChanged,
                                  EventType.NodeDeleted, EventType.NodeCreated },
                              ChangeType.CONTROLLER);

    // System.out.println("add controller listeners to " + _instanceName +
    // " for " + _clusterName);
    // _handlers.add(callbackHandler);
    addListener(callbackHandler);
  }

  @Override
  public boolean removeListener(Object listener)
  {
    logger.info("remove listener: " + listener + " from " + _instanceName);

    synchronized (this)
    {
      Iterator<CallbackHandler> iterator = _handlers.iterator();
      while (iterator.hasNext())
      {
        CallbackHandler handler = iterator.next();
        // simply compare reference
        if (handler.getListener().equals(listener))
        {
          handler.reset();
          iterator.remove();
        }
      }
    }

    return true;
  }

  private void addLiveInstance()
  {
    LiveInstance liveInstance = new LiveInstance(_instanceName);
    liveInstance.setSessionId(_sessionId);
    liveInstance.setHelixVersion(_version);
    liveInstance.setLiveInstance(ManagementFactory.getRuntimeMXBean().getName());

    logger.info("Add live instance: InstanceName: " + _instanceName + " Session id:"
        + _sessionId);
    Builder keyBuilder = _helixAccessor.keyBuilder();
    if (!_helixAccessor.createProperty(keyBuilder.liveInstance(_instanceName),
                                       liveInstance))
    {
      String errorMsg =
          "Fail to create live instance node after waiting, so quit. instance:"
              + _instanceName;
      logger.warn(errorMsg);
      throw new HelixException(errorMsg);

    }
    String currentStatePathParent =
        PropertyPathConfig.getPath(PropertyType.CURRENTSTATES,
                                   _clusterName,
                                   _instanceName,
                                   getSessionId());

    if (!_zkClient.exists(currentStatePathParent))
    {
      _zkClient.createPersistent(currentStatePathParent);
      logger.info("Creating current state path " + currentStatePathParent);
    }
  }

  private void startStatusUpdatedumpTask()
  {
    long initialDelay = 30 * 60 * 1000;
    long period = 120 * 60 * 1000;
    int timeThresholdNoChange = 180 * 60 * 1000;

    if (_timer == null)
    {
      _timer = new Timer(true);
      _timer.scheduleAtFixedRate(new ZKPathDataDumpTask(this,
                                                        _zkClient,
                                                        timeThresholdNoChange),
                                 initialDelay,
                                 period);
    }
  }

  private void createClient(String zkServers) throws Exception
  {
    String propertyStorePath =
        PropertyPathConfig.getPath(PropertyType.PROPERTYSTORE, _clusterName);

    // by default use ZNRecordStreamingSerializer except for paths within the property
    // store which expects raw byte[] serialization/deserialization
    PathBasedZkSerializer zkSerializer =
        ChainedPathZkSerializer.builder(new ZNRecordStreamingSerializer())
                               .serialize(propertyStorePath, new ByteArraySerializer())
                               .build();

    _zkClient = new ZkClient(zkServers, _sessionTimeout, CONNECTIONTIMEOUT, zkSerializer);

    ZkBaseDataAccessor<ZNRecord> baseDataAccessor =
        new ZkBaseDataAccessor<ZNRecord>(_zkClient);
    if (_instanceType == InstanceType.PARTICIPANT)
    {
      String curStatePath =
          PropertyPathConfig.getPath(PropertyType.CURRENTSTATES,
                                     _clusterName,
                                     _instanceName);
      _baseDataAccessor =
          new ZkCacheBaseDataAccessor<ZNRecord>(baseDataAccessor,
                                                Arrays.asList(curStatePath));
    }
    else if (_instanceType == InstanceType.CONTROLLER)
    {
      String extViewPath = PropertyPathConfig.getPath(PropertyType.EXTERNALVIEW,

      _clusterName);
      _baseDataAccessor =
          new ZkCacheBaseDataAccessor<ZNRecord>(baseDataAccessor,
                                                Arrays.asList(extViewPath));

    }
    else
    {
      _baseDataAccessor = baseDataAccessor;
    }

    _helixAccessor =
        new ZKHelixDataAccessor(_clusterName, _instanceType, _baseDataAccessor);
    _configAccessor = new ConfigAccessor(_zkClient);
    int retryCount = 0;

    _zkClient.subscribeStateChanges(_zkStateChangeListener);
    while (retryCount < RETRY_LIMIT)
    {
      try
      {
        _zkClient.waitUntilConnected(_sessionTimeout, TimeUnit.MILLISECONDS);
        _zkStateChangeListener.handleStateChanged(KeeperState.SyncConnected);
        _zkStateChangeListener.handleNewSession();
        break;
      }
      catch (HelixException e)
      {
        logger.error("fail to createClient.", e);
        throw e;
      }
      catch (Exception e)
      {
        retryCount++;

        logger.error("fail to createClient. retry " + retryCount, e);
        if (retryCount == RETRY_LIMIT)
        {
          throw e;
        }
      }
    }
  }

  private CallbackHandler createCallBackHandler(String path,
                                                Object listener,
                                                EventType[] eventTypes,
                                                ChangeType changeType)
  {
    if (listener == null)
    {
      throw new HelixException("Listener cannot be null");
    }
    return new CallbackHandler(this, _zkClient, path, listener, eventTypes, changeType);
  }

  /**
   * This will be invoked when ever a new session is created<br/>
   *
   * case 1: the cluster manager was a participant carry over current state, add live
   * instance, and invoke message listener; case 2: the cluster manager was controller and
   * was a leader before do leader election, and if it becomes leader again, invoke ideal
   * state listener, current state listener, etc. if it fails to become leader in the new
   * session, then becomes standby; case 3: the cluster manager was controller and was NOT
   * a leader before do leader election, and if it becomes leader, instantiate and invoke
   * ideal state listener, current state listener, etc. if if fails to become leader in
   * the new session, stay as standby
   */

  protected void handleNewSession()
  {
    boolean isConnected = _zkClient.waitUntilConnected(CONNECTIONTIMEOUT, TimeUnit.MILLISECONDS);
    while (!isConnected)
    {
      logger.error("Could NOT connect to zk server in " + CONNECTIONTIMEOUT + "ms. zkServer: "
          + _zkConnectString + ", expiredSessionId: " + _sessionId + ", clusterName: "
          + _clusterName);
      isConnected = _zkClient.waitUntilConnected(CONNECTIONTIMEOUT, TimeUnit.MILLISECONDS);
    }

    ZkConnection zkConnection = ((ZkConnection) _zkClient.getConnection());
   
    synchronized (this)
    {
      _sessionId = Long.toHexString(zkConnection.getZookeeper().getSessionId());
    }
    _baseDataAccessor.reset();

    resetHandlers();

    logger.info("Handling new session, session id:" + _sessionId + ", instance:"
        + _instanceName + ", instanceTye: " + _instanceType + ", cluster: " + _clusterName);

    logger.info(zkConnection.getZookeeper());

    if (!ZKUtil.isClusterSetup(_clusterName, _zkClient))
    {
      throw new HelixException("Initial cluster structure is not set up for cluster:"
          + _clusterName);
    }
    if (!isInstanceSetup())
    {
      throw new HelixException("Initial cluster structure is not set up for instance:"
          + _instanceName + " instanceType:" + _instanceType);
    }

    if (_instanceType == InstanceType.PARTICIPANT
        || _instanceType == InstanceType.CONTROLLER_PARTICIPANT)
    {
      handleNewSessionAsParticipant();
    }

    if (_instanceType == InstanceType.CONTROLLER
        || _instanceType == InstanceType.CONTROLLER_PARTICIPANT)
    {
      addControllerMessageListener(_messagingService.getExecutor());
      MessageHandlerFactory defaultControllerMsgHandlerFactory =
          new DefaultControllerMessageHandlerFactory();
      _messagingService.getExecutor()
                       .registerMessageHandlerFactory(defaultControllerMsgHandlerFactory.getMessageType(),
                                                      defaultControllerMsgHandlerFactory);
      MessageHandlerFactory defaultSchedulerMsgHandlerFactory =
          new DefaultSchedulerMessageHandlerFactory(this);
      _messagingService.getExecutor()
                       .registerMessageHandlerFactory(defaultSchedulerMsgHandlerFactory.getMessageType(),
                                                      defaultSchedulerMsgHandlerFactory);
      MessageHandlerFactory defaultParticipantErrorMessageHandlerFactory =
          new DefaultParticipantErrorMessageHandlerFactory(this);
      _messagingService.getExecutor()
                       .registerMessageHandlerFactory(defaultParticipantErrorMessageHandlerFactory.getMessageType(),
                                                      defaultParticipantErrorMessageHandlerFactory);

      if (_leaderElectionHandler == null)
      {
        final String path =
            PropertyPathConfig.getPath(PropertyType.CONTROLLER, _clusterName);

        _leaderElectionHandler =
            createCallBackHandler(path,
                                  new DistClusterControllerElection(_zkConnectString),
                                  new EventType[] { EventType.NodeChildrenChanged,
                                      EventType.NodeDeleted, EventType.NodeCreated },
                                  ChangeType.CONTROLLER);
      }
      else
      {
        _leaderElectionHandler.init();
      }
    }

    if (_instanceType == InstanceType.PARTICIPANT
        || _instanceType == InstanceType.CONTROLLER_PARTICIPANT
        || (_instanceType == InstanceType.CONTROLLER && isLeader()))
    {
      initHandlers();
    }
  }

  private void handleNewSessionAsParticipant()
  {
    // In case there is a live instance record on zookeeper
    Builder keyBuilder = _helixAccessor.keyBuilder();

    if (_helixAccessor.getProperty(keyBuilder.liveInstance(_instanceName)) != null)
    {
      logger.warn("Found another instance with same instanceName: " + _instanceName
          + " in cluster " + _clusterName);
      // Wait for a while, in case previous storage node exits unexpectedly
      // and its liveinstance
      // still hangs around until session timeout happens
      try
      {
        Thread.sleep(_sessionTimeout + 5000);
      }
      catch (InterruptedException e)
      {
        logger.warn("Sleep interrupted while waiting for previous liveinstance to go away.",
                    e);
      }

      if (_helixAccessor.getProperty(keyBuilder.liveInstance(_instanceName)) != null)
      {
        String errorMessage =
            "instance " + _instanceName + " already has a liveinstance in cluster "
                + _clusterName;
        logger.error(errorMessage);
        throw new HelixException(errorMessage);
      }
    }
    // Invoke the PreConnectCallbacks
    for (PreConnectCallback callback : _preConnectCallbacks)
    {
      callback.onPreConnect();
    }
    addLiveInstance();
    carryOverPreviousCurrentState();

    // In case the cluster manager is running as a participant, setup message
    // listener
    _messagingService.registerMessageHandlerFactory(MessageType.STATE_TRANSITION.toString(),
                                                    _stateMachEngine);
    addMessageListener(_messagingService.getExecutor(), _instanceName);
    addControllerListener(_helixAccessor);

    if (_participantHealthCheckInfoCollector == null)
    {
      _participantHealthCheckInfoCollector =
          new ParticipantHealthReportCollectorImpl(this, _instanceName);
      _participantHealthCheckInfoCollector.start();
    }
    // start the participant health check timer, also create zk path for health
    // check info
    String healthCheckInfoPath =
        _helixAccessor.keyBuilder().healthReports(_instanceName).getPath();
    if (!_zkClient.exists(healthCheckInfoPath))
    {
      _zkClient.createPersistent(healthCheckInfoPath, true);
      logger.info("Creating healthcheck info path " + healthCheckInfoPath);
    }
  }

  @Override
  public void addPreConnectCallback(PreConnectCallback callback)
  {
    logger.info("Adding preconnect callback");
    _preConnectCallbacks.add(callback);
  }

  private void resetHandlers()
  {
    synchronized (this)
    {
      // get a copy of the list and iterate over the copy list
      // in case handler.reset() will modify the original handler list
      List<CallbackHandler> handlers = new ArrayList<CallbackHandler>();
      handlers.addAll(_handlers);

      for (CallbackHandler handler : handlers)
      {
        handler.reset();
        logger.info("reset handler: " + handler.getPath() + " by "
            + Thread.currentThread().getName());
      }
    }
  }

  private void initHandlers()
  {
    // may add new currentState and message listeners during init()
    // so make a copy and iterate over the copy
    synchronized (this)
    {
      List<CallbackHandler> handlers = new ArrayList<CallbackHandler>();
      handlers.addAll(_handlers);
      for (CallbackHandler handler : handlers)
      {
        handler.init();
      }
    }
  }

  private void addListener(CallbackHandler handler)
  {
    synchronized (this)
    {
      _handlers.add(handler);
      logger.info("add handler: " + handler.getPath() + " by "
          + Thread.currentThread().getName());
    }
  }

  @Override
  public boolean isLeader()
  {
    if (!isConnected())
    {
      return false;
    }

    if (_instanceType != InstanceType.CONTROLLER)
    {
      return false;
    }

    Builder keyBuilder = _helixAccessor.keyBuilder();
    LiveInstance leader = _helixAccessor.getProperty(keyBuilder.controllerLeader());
    if (leader == null)
    {
      return false;
    }
    else
    {
      String leaderName = leader.getInstanceName();
      // TODO need check sessionId also, but in distributed mode, leader's
      // sessionId is
      // not equal to
      // the leader znode's sessionId field which is the sessionId of the
      // controller_participant that
      // successfully creates the leader node
      if (leaderName == null || !leaderName.equals(_instanceName))
      {
        return false;
      }
    }
    return true;
  }

  private void carryOverPreviousCurrentState()
  {
    Builder keyBuilder = _helixAccessor.keyBuilder();

    List<String> subPaths =
        _helixAccessor.getChildNames(keyBuilder.sessions(_instanceName));
    for (String previousSessionId : subPaths)
    {
      List<CurrentState> previousCurrentStates =
          _helixAccessor.getChildValues(keyBuilder.currentStates(_instanceName,
                                                                 previousSessionId));

      for (CurrentState previousCurrentState : previousCurrentStates)
      {
        if (!previousSessionId.equalsIgnoreCase(_sessionId))
        {
          logger.info("Carrying over old session:" + previousSessionId + " resource "
              + previousCurrentState.getId() + " to new session:" + _sessionId);
          String stateModelDefRef = previousCurrentState.getStateModelDefRef();
          if (stateModelDefRef == null)
          {
            logger.error("pervious current state doesn't have a state model def. skip it. prevCS: "
                + previousCurrentState);
            continue;
          }
          StateModelDefinition stateModel =
              _helixAccessor.getProperty(keyBuilder.stateModelDef(stateModelDefRef));
          for (String partitionName : previousCurrentState.getPartitionStateMap()
                                                          .keySet())
          {

            previousCurrentState.setState(partitionName, stateModel.getInitialState());
          }
          previousCurrentState.setSessionId(_sessionId);
          _helixAccessor.setProperty(keyBuilder.currentState(_instanceName,
                                                             _sessionId,
                                                             previousCurrentState.getId()),
                                     previousCurrentState);
        }
      }
    }
    // Deleted old current state
    for (String previousSessionId : subPaths)
    {
      if (!previousSessionId.equalsIgnoreCase(_sessionId))
      {
        String path =
            _helixAccessor.keyBuilder()
                          .currentStates(_instanceName, previousSessionId)
                          .getPath();
        logger.info("Deleting previous current state. path: " + path + "/"
            + previousSessionId);
        _zkClient.deleteRecursive(path);

      }
    }
  }

  @Override
  public synchronized ZkHelixPropertyStore<ZNRecord> getHelixPropertyStore()
  {
    checkConnected();

    if (_helixPropertyStore == null)
    {
      String path =
          PropertyPathConfig.getPath(PropertyType.HELIX_PROPERTYSTORE, _clusterName);

      _helixPropertyStore =
          new ZkHelixPropertyStore<ZNRecord>(new ZkBaseDataAccessor<ZNRecord>(_zkClient),
                                             path,
                                             null);
    }

    return _helixPropertyStore;
  }

  @Override
  public synchronized HelixAdmin getClusterManagmentTool()
  {
    checkConnected();
    if (_zkClient != null)
    {
      _managementTool = new ZKHelixAdmin(_zkClient);
    }
    else
    {
      logger.error("Couldn't get ZKClusterManagementTool because zkClient is null");
    }

    return _managementTool;
  }

  @Override
  public ClusterMessagingService getMessagingService()
  {
    // The caller can register message handler factories on messaging service before the
    // helix manager is connected. Thus we do not do connected check here.
    return _messagingService;
  }

  @Override
  public ParticipantHealthReportCollector getHealthReportCollector()
  {
    checkConnected();
    return _participantHealthCheckInfoCollector;
  }

  @Override
  public InstanceType getInstanceType()
  {
    return _instanceType;
  }

  private void checkConnected()
  {
    if (!isConnected())
    {
      throw new HelixException("ClusterManager not connected. Call clusterManager.connect()");
    }
  }

  @Override
  public String getVersion()
  {
    return _version;
  }

  @Override
  public StateMachineEngine getStateMachineEngine()
  {
    return _stateMachEngine;
  }

  protected List<CallbackHandler> getHandlers()
  {
    return _handlers;
  }

  // TODO: rename this and not expose this function as part of interface
  @Override
  public void startTimerTasks()
  {
    for (HelixTimerTask task : _controllerTimerTasks)
    {
      task.start();
    }
    startStatusUpdatedumpTask();
  }

  @Override
  public void stopTimerTasks()
  {
    for (HelixTimerTask task : _controllerTimerTasks)
    {
      task.stop();
    }
  }
}
TOP

Related Classes of org.apache.helix.manager.zk.ZKHelixManager

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.