Package org.hornetq.core.server.impl

Source Code of org.hornetq.core.server.impl.QuorumManager$QuorumVoteServerConnect

package org.hornetq.core.server.impl;

import java.util.Collection;
import java.util.LinkedList;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;

import org.hornetq.api.core.HornetQException;
import org.hornetq.api.core.HornetQExceptionType;
import org.hornetq.api.core.Pair;
import org.hornetq.api.core.TransportConfiguration;
import org.hornetq.api.core.client.ClientSession;
import org.hornetq.api.core.client.ClientSessionFactory;
import org.hornetq.api.core.client.ClusterTopologyListener;
import org.hornetq.api.core.client.HornetQClient;
import org.hornetq.api.core.client.ServerLocator;
import org.hornetq.api.core.client.SessionFailureListener;
import org.hornetq.api.core.client.TopologyMember;
import org.hornetq.core.client.impl.ClientSessionFactoryInternal;
import org.hornetq.core.client.impl.ServerLocatorImpl;
import org.hornetq.core.client.impl.Topology;
import org.hornetq.core.client.impl.TopologyMemberImpl;
import org.hornetq.core.protocol.core.CoreRemotingConnection;
import org.hornetq.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage;
import org.hornetq.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage.LiveStopping;
import org.hornetq.core.remoting.FailureListener;
import org.hornetq.core.server.HornetQServerLogger;
import org.hornetq.core.server.NodeManager;

/**
* Manages a quorum of servers used to determine whether a given server is running or not.
* <p>
* The use case scenario is an eventual connection loss between the live and the backup, where the
* quorum will help a remote backup deciding whether to replace its 'live' server or to keep trying
* to reconnect.
* <p>
* If the live server does an orderly shutdown, it will explicitly notify the backup through the
* replication channel. In this scenario, no voting takes place and the backup fails-over. See
* {@link #failOver(boolean)}.
* <p>
* In case of
* <ol>
* <li>broken connection, which would trigger a
* {@link FailureListener#connectionFailed(HornetQException, boolean)}. This is affected by the TTL
* configuration.</li>
* <li>getting a "nodeDown" topology notification. This would happen if a given server failed to
* PING our live</li>
* </ol>
* The backup will try to connect to all the topology members, if it manages to reach at least half
* of the servers, it concludes the network is operating normally and fail-over. If the backup finds
* less than half of its peers, then it assumes a network failure and attempts to reconnect with the
* live.
*/
public final class QuorumManager implements SessionFailureListener, ClusterTopologyListener
{
   private String targetServerID = "";
   private final ExecutorService executor;
   private final String serverIdentity;
   private CountDownLatch latch;
   private volatile BACKUP_ACTIVATION signal;
   private ClientSessionFactoryInternal sessionFactory;
   private final Topology topology;
   private CoreRemotingConnection connection;

   /** safety parameter to make _sure_ we get out of await() */
   private static final int LATCH_TIMEOUT = 30;
   private static final int RECONNECT_ATTEMPTS = 5;

   private final Object decisionGuard = new Object();
   private final NodeManager nodeManager;
   private final ServerLocator serverLocator;
   private final ScheduledExecutorService scheduledPool;

   /**
    * This is a safety net in case the live sends the first {@link ReplicationLiveIsStoppingMessage}
    * with code {@link LiveStopping#STOP_CALLED} and crashes before sending the second with
    * {@link LiveStopping#FAIL_OVER}.
    * <p>
    * If the second message does come within this dead line, we fail over anyway.
    */
   private static final int WAIT_TIME_AFTER_FIRST_LIVE_STOPPING_MSG = 60;

   public QuorumManager(ServerLocator serverLocator, ExecutorService executor, ScheduledExecutorService scheduledPool,
                        String identity, NodeManager nodeManager)
   {
      this.serverIdentity = identity;
      this.executor = executor;
      this.scheduledPool = scheduledPool;
      this.latch = new CountDownLatch(1);
      this.nodeManager = nodeManager;
      this.serverLocator = serverLocator;
      topology = serverLocator.getTopology();
   }

   @Override
   public void nodeUP(TopologyMember topologyMember, boolean last)
   {
      //noop
   }

   @Override
   public void nodeDown(long eventUID, String nodeID)
   {
      if (targetServerID.equals(nodeID))
      {
         decideOnAction();
      }
   }

   public void setLiveID(String liveID)
   {
      targetServerID = liveID;
      nodeManager.setNodeID(liveID);
      //now we are replicating we can start waiting for disconnect notifications so we can fail over
      sessionFactory.addFailureListener(this);
   }

   private boolean isLiveDown()
   {
      Collection<TopologyMemberImpl> nodes = topology.getMembers();
      Collection<ServerLocator> locatorsList = new LinkedList<ServerLocator>();
      AtomicInteger pingCount = new AtomicInteger(0);
      int total = 0;
      for (TopologyMemberImpl tm : nodes)
         if (useIt(tm))
            total++;

      if (total < 1)
         return true;

      final CountDownLatch voteLatch = new CountDownLatch(total);
      try
      {
         for (TopologyMemberImpl tm : nodes)
         {
            Pair<TransportConfiguration, TransportConfiguration> pair = tm.getConnector();

            TransportConfiguration serverTC = pair.getA();
            if (useIt(tm))
            {
               ServerLocatorImpl locator = (ServerLocatorImpl)HornetQClient.createServerLocatorWithoutHA(serverTC);
               locatorsList.add(locator);
               executor.submit(new QuorumVoteServerConnect(voteLatch, total, pingCount, locator, serverTC));
            }
         }

         try
         {
            voteLatch.await(LATCH_TIMEOUT, TimeUnit.SECONDS);
         }
         catch (InterruptedException interruption)
         {
            // No-op. The best the quorum can do now is to return the latest number it has
         }
         // -1: because the live server is not being filtered out.
         boolean vote = nodeIsDown(total, pingCount.get());
         HornetQServerLogger.LOGGER.trace("quorum vote is liveIsDown=" + vote + ", count=" + pingCount);
         return vote;
      }
      finally
      {
         for (ServerLocator locator : locatorsList)
         {
            try
            {
               locator.close();
            }
            catch (Exception e)
            {
               // no-op
            }
         }
      }
   }

   /**
    * @param tm
    * @return
    */
   private boolean useIt(TopologyMemberImpl tm)
   {
      return tm.getLive() != null && !targetServerID.equals(tm.getLive().getName());
   }

   @Override
   public String toString()
   {
      return QuorumManager.class.getSimpleName() + "(server=" + serverIdentity + ")";
   }

   /**
    * Decides whether the server is to be considered down or not.
    * @param totalServers
    * @param reachedServers
    * @return
    */
   private static boolean nodeIsDown(int totalServers, int reachedServers)
   { // at least half of the servers were reached
      return reachedServers * 2 >= totalServers - 1;
   }

   public void notifyRegistrationFailed()
   {
      signal = BACKUP_ACTIVATION.FAILURE_REPLICATING;
      latch.countDown();
   }

   public void notifyAlreadyReplicating()
   {
      signal = BACKUP_ACTIVATION.ALREADY_REPLICATING;
      latch.countDown();
   }

   /**
    * Attempts to connect to a given server.
    */
   private static class QuorumVoteServerConnect implements Runnable
   {
      private final ServerLocatorImpl locator;
      private final CountDownLatch latch;
      private final AtomicInteger count;
      private final TransportConfiguration tc;
      private final int total;

      public QuorumVoteServerConnect(CountDownLatch latch, int total, AtomicInteger count,
                           ServerLocatorImpl serverLocator,
                           TransportConfiguration serverTC)
      {
         this.total = total;
         this.locator = serverLocator;
         this.latch=latch;
         this.count = count;
         this.tc = serverTC;
      }


      @Override
      public void run()
      {
         locator.setReconnectAttempts(0);

         final ClientSessionFactory sessionFactory;
         ClientSession session;
         try
         {
            sessionFactory = locator.createSessionFactory(tc);
            if (sessionFactory != null)
            {
               session = sessionFactory.createSession();
               if (session != null)
               {
                  if (nodeIsDown(total, count.incrementAndGet()))
                  {
                     while (latch.getCount() > 0)
                        latch.countDown();
                  }
                  session.close();
                  sessionFactory.close();
               }
            }
         }
         catch (Exception e)
         {
            // no-op
         }
         finally
         {
            latch.countDown();
            locator.close();
         }
      }
   }

   @Override
   public void beforeReconnect(HornetQException exception)
   {
      //noop
   }

   @Override
   public void connectionFailed(HornetQException exception, boolean failedOver)
   {
      decideOnAction();
   }

   private void decideOnAction()
   {
      //we may get called via multiple paths so need to guard
      synchronized (decisionGuard)
      {
         if (signal == BACKUP_ACTIVATION.FAIL_OVER)
         {
            return;
         }
         if (!isLiveDown())
         {
            try
            {
               // no point in repeating all the reconnection logic
               sessionFactory.connect(RECONNECT_ATTEMPTS, false);
               return;
            }
            catch (HornetQException e)
            {
               if (e.getType() != HornetQExceptionType.NOT_CONNECTED)
                  HornetQServerLogger.LOGGER.errorReConnecting(e);
            }
         }
         // live is assumed to be down, backup fails-over
         signal = BACKUP_ACTIVATION.FAIL_OVER;
      }
      latch.countDown();
   }

   enum BACKUP_ACTIVATION
   {
      FAIL_OVER, FAILURE_REPLICATING, ALREADY_REPLICATING, STOP;
   }

   /**
    * Called by the replicating backup (i.e. "SharedNothing" backup) to wait for the signal to
    * fail-over or to stop.
    * @return signal, indicating whether to stop or to fail-over
    */
   public final BACKUP_ACTIVATION waitForStatusChange()
   {
      try
      {
         latch.await();
      }
      catch (InterruptedException e)
      {
         return BACKUP_ACTIVATION.STOP;
      }
      return signal;
   }

   /**
    * Cause the Activation thread to exit and the server to be stopped.
    * @param explicitSignal the state we want to set the quorum manager to return
    */
   public synchronized void causeExit(BACKUP_ACTIVATION explicitSignal)
   {
      removeListener();
      this.signal = explicitSignal;
      latch.countDown();
   }

   private void removeListener()
   {
      serverLocator.removeClusterTopologyListener(this);
      if (connection != null)
         connection.removeFailureListener(this);
   }

   /**
    * Releases the latch, causing the backup activation thread to fail-over.
    * <p>
    * The use case is for when the 'live' has an orderly shutdown, in which case it informs the
    * backup that it should fail-over.
    */
   public synchronized void failOver(LiveStopping finalMessage)
   {
      removeListener();
      signal = BACKUP_ACTIVATION.FAIL_OVER;
      if (finalMessage == LiveStopping.FAIL_OVER)
      {
         latch.countDown();
      }
      if (finalMessage == LiveStopping.STOP_CALLED)
      {
         final CountDownLatch localLatch = latch;
         scheduledPool.schedule(new Runnable()
         {
            @Override
            public void run()
            {
               localLatch.countDown();
            }

         }, WAIT_TIME_AFTER_FIRST_LIVE_STOPPING_MSG, TimeUnit.SECONDS);
      }
   }

   /**
    * @param sessionFactory the session factory used to connect to the live server
    */
   public void setSessionFactory(ClientSessionFactoryInternal sessionFactory)
   {
      this.sessionFactory = sessionFactory;
   }

   /**
    * @param liveConnection
    */
   public void addAsFailureListenerOf(CoreRemotingConnection liveConnection)
   {
      this.connection = liveConnection;
      connection.addFailureListener(this);
      //connection.addCloseListener(this);
   }

   public synchronized void reset()
   {
      latch = new CountDownLatch(1);
   }
}
TOP

Related Classes of org.hornetq.core.server.impl.QuorumManager$QuorumVoteServerConnect

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.