Package com.nokia.dempsy.cluster.zookeeper

Source Code of com.nokia.dempsy.cluster.zookeeper.TestZookeeperClusterResilience

/*
* Copyright 2012 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.nokia.dempsy.cluster.zookeeper;

import static com.nokia.dempsy.TestUtils.createClusterLevel;
import static com.nokia.dempsy.TestUtils.poll;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;

import org.apache.zookeeper.ZooKeeper;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.nokia.dempsy.Dempsy;
import com.nokia.dempsy.TestUtils;
import com.nokia.dempsy.TestUtils.Condition;
import com.nokia.dempsy.cluster.ClusterInfoException;
import com.nokia.dempsy.cluster.ClusterInfoSession;
import com.nokia.dempsy.cluster.ClusterInfoSessionFactory;
import com.nokia.dempsy.cluster.ClusterInfoWatcher;
import com.nokia.dempsy.cluster.DirMode;
import com.nokia.dempsy.config.ApplicationDefinition;
import com.nokia.dempsy.config.ClusterId;
import com.nokia.dempsy.messagetransport.tcp.TcpTransport;
import com.nokia.dempsy.monitoring.coda.StatsCollectorFactoryCoda;
import com.nokia.dempsy.router.DecentralizedRoutingStrategy;
import com.nokia.dempsy.router.SpecificClusterCheck;
import com.nokia.dempsy.serialization.java.JavaSerializer;

/**
* The goal here is to make sure the cluster is always consistent even if it looses
* the zookeeper session connection or doesn't have it to begin with.
*/
public class TestZookeeperClusterResilience
{
   public static final String appname = TestZookeeperClusterResilience.class.getSimpleName();
   private static Logger logger = LoggerFactory.getLogger(TestZookeeperClusterResilience.class);
   static private final long baseTimeoutMillis = 20000;
  
   private int port;
  
   @Before
   public void setup() throws IOException
   {
      port = ZookeeperTestServer.findNextPort();
      logger.debug("Running zookeeper test server on port " + port);
   }
  
   public static abstract class TestWatcher implements ClusterInfoWatcher
   {
      AtomicBoolean called = new AtomicBoolean(false);
      ZookeeperSession session;
     
      public TestWatcher(ZookeeperSession session) { this.session = session; }
     
   }
  
   volatile boolean connected = false;
  
   @Test
   public void testBouncingServer() throws Throwable
   {
      ZookeeperTestServer server = new ZookeeperTestServer();
      ZookeeperSession session = null;
      final ClusterId clusterId = new ClusterId(appname,"testBouncingServer");
     
      try
      {
         server.start();

         ZookeeperSessionFactory factory = new ZookeeperSessionFactory("127.0.0.1:" + port,5000);
         session = (ZookeeperSession)factory.createSession();
         final ZookeeperSession cluster = session;
         createClusterLevel(clusterId, session);
         TestWatcher callback = new TestWatcher(cluster)
         {
           
            @Override
            public void process()
            {
               boolean done = false;
               while (!done)
               {
                  done = true;
                 
                  try
                  {
                     if (session.getSubdirs(clusterId.asPath(), this).size() == 0)
                        session.mkdir(clusterId.asPath() + "/slot1",null,DirMode.EPHEMERAL);
                     called.set(true);
                  }
                  catch(ClusterInfoException.NoNodeException e)
                  {
                     try
                     {
                        createClusterLevel(clusterId,session);
                        done = false;
                     }
                     catch (ClusterInfoException e1)
                     {
                        throw new RuntimeException(e1);
                     }
                  }
                  catch(ClusterInfoException e)
                  {
                     // this will fail when the connection is severed... that's ok.
                  }
               }
            }

         };

         cluster.exists(clusterId.asPath(), callback);
         callback.process();
        
         // create another session and look
         ZookeeperSession session2 = (ZookeeperSession)factory.createSession();
         assertEquals(1,session2.getSubdirs(new ClusterId(appname,"testBouncingServer").asPath(), null).size());
         session2.stop();

         // kill the server.
         server.shutdown(false);

         // reset the flags
         callback.called.set(false);

         // restart the server
         server.start(false);
        
         // wait for the call
         assertTrue(TestUtils.poll(baseTimeoutMillis, callback, new TestUtils.Condition<TestWatcher>()
         {
            @Override
            public boolean conditionMet(TestWatcher o) { return o.called.get(); }
         }));

         // get the view from a new session.
         session2 = (ZookeeperSession)factory.createSession();
         assertEquals(1,session2.getSubdirs(new ClusterId(appname,"testBouncingServer").asPath(), null).size());
         session2.stop();
      }
      finally
      {
         if (server != null)
            server.shutdown();
        
         if (session != null)
            session.stop();
      }
   }

   @Test
   public void testBouncingServerWithCleanDataDir() throws Throwable
   {
      ZookeeperTestServer server = new ZookeeperTestServer();
      ZookeeperSession session = null;
      final ClusterId clusterId = new ClusterId(appname,"testBouncingServerWithCleanDataDir");
     
      try
      {
         server.start();

         ZookeeperSessionFactory factory = new ZookeeperSessionFactory("127.0.0.1:" + port,5000);
         session = (ZookeeperSession)factory.createSession();
         final ZookeeperSession cluster = session;
         createClusterLevel(clusterId, session);
         TestWatcher callback = new TestWatcher(cluster)
         {
           
            @Override
            public void process()
            {
               boolean done = false;
               while (!done)
               {
                  done = true;
                 
                  try
                  {
                     if (session.getSubdirs(clusterId.asPath(), this).size() == 0)
                        session.mkdir(clusterId.asPath() + "/slot1",null,DirMode.EPHEMERAL);
                     called.set(true);
                  }
                  catch(ClusterInfoException.NoNodeException e)
                  {
                     try
                     {
                        createClusterLevel(clusterId,session);
                        done = false;
                     }
                     catch (ClusterInfoException e1)
                     {
                        throw new RuntimeException(e1);
                     }
                  }
                  catch(ClusterInfoException e)
                  {
                     // this will fail when the connection is severed... that's ok.
                  }
               }
            }

         };

         cluster.exists(clusterId.asPath(), callback);
         callback.process();
        
         // create another session and look
         ZookeeperSession session2 = (ZookeeperSession)factory.createSession();
         assertEquals(1,session2.getSubdirs(new ClusterId(appname,"testBouncingServerWithCleanDataDir").asPath(), null).size());
         session2.stop();

         // kill the server.
         server.shutdown(true);

         // reset the flags
         callback.called.set(false);

         // restart the server
         server.start(true);
        
         // wait for the call
         assertTrue(TestUtils.poll(baseTimeoutMillis, callback, new TestUtils.Condition<TestWatcher>()
         {
            @Override
            public boolean conditionMet(TestWatcher o) { return o.called.get(); }
         }));

         // get the view from a new session.
         session2 = (ZookeeperSession)factory.createSession();
         assertEquals(1,session2.getSubdirs(new ClusterId(appname,"testBouncingServerWithCleanDataDir").asPath(), null).size());
         session2.stop();
      }
      finally
      {
         if (server != null)
            server.shutdown();
        
         if (session != null)
            session.stop();
      }
   }

   @Test
   public void testNoServerOnStartup() throws Throwable
   {
      // create a session factory
      ZookeeperSessionFactory factory = new ZookeeperSessionFactory("127.0.0.1:" + port,5000);
     
      // create a session from the session factory
      ZookeeperSession session = (ZookeeperSession)factory.createSession();
     
      ClusterId clusterId = new ClusterId(appname,"testNoServerOnStartup");
     
      // hook a test watch to make sure that callbacks work correctly
      TestWatcher callback = new TestWatcher(session)
      {
         @Override public void process() { called.set(true); }
      };
     
      // now accessing the cluster should get us an error.
      boolean gotCorrectError = false;
      try { session.getSubdirs(clusterId.asPath(), callback); } catch (ClusterInfoException e) { gotCorrectError = true; }
      assertTrue(gotCorrectError);
     
      // now lets startup the server.
      ZookeeperTestServer server = null;
      try
      {
         server = new ZookeeperTestServer();
         server.start();
        
         // create a cluster from the session
         TestUtils.createClusterLevel(clusterId,session);
        
         // wait until this works.
         assertTrue(TestUtils.poll(baseTimeoutMillis, callback, new Condition<TestWatcher>() {
            @Override public boolean conditionMet(TestWatcher o){  return o.called.get(); }
         }));
        
         callback.called.set(false); // reset the callbacker ...
        
         // now see if the cluster works.
         assertTrue(TestUtils.poll(baseTimeoutMillis, callback, new Condition<TestWatcher>() {
            @Override public boolean conditionMet(TestWatcher o){  return !o.called.get(); }
         }));

         session.getSubdirs(clusterId.asPath(), callback);
        
         ZooKeeper origZk = session.zkref.get();
         ZookeeperTestServer.forceSessionExpiration(origZk);
        
         // wait for the callback
         assertTrue(TestUtils.poll(baseTimeoutMillis, callback, new Condition<TestWatcher>() {
            @Override public boolean conditionMet(TestWatcher o){  return o.called.get(); }
         }));
        
         // unfortunately I cannot check the getActiveSlots for failure because there's a race condition I can't fix.
         //  No matter how fast I check it's possible that it's okay again OR that allSlots hasn't been cleared.
         //
         // however, they should eventually recover.
         gotCorrectError = true;
         for (long endTime = System.currentTimeMillis() + baseTimeoutMillis; endTime > System.currentTimeMillis() && gotCorrectError;)
         {
            Thread.sleep(1);
            try { session.getSubdirs(clusterId.asPath(), callback); gotCorrectError = false; } catch (ClusterInfoException e) {  }
         }

         session.getSubdirs(clusterId.asPath(), callback);

         // And join should work
         gotCorrectError = true;
         for (long endTime = System.currentTimeMillis() + baseTimeoutMillis; endTime > System.currentTimeMillis() && gotCorrectError;)
         {
            Thread.sleep(1);
            try { session.mkdir(clusterId.asPath() + "/join-1", null, DirMode.EPHEMERAL); gotCorrectError = false; } catch (ClusterInfoException e) {  }
         }
        
         assertFalse(gotCorrectError);
      }
      finally
      {
         if (server != null)
            server.shutdown();
        
         if (session != null)
            session.stop();
      }
   }
  
   @Test
   public void testSessionExpired() throws Throwable
   {
      // now lets startup the server.
      ZookeeperTestServer server = null;
      ZookeeperSession session = null;
     
      try
      {
         server = new ZookeeperTestServer();
         server.start();

         // the createExpireSessionClient actually results in a Disconnected/SyncConnected rotating events.
         // ... so we need to filter those out since it will result in a callback.
         session =  new ZookeeperSession("127.0.0.1:" + port,5000);

         final ClusterId clusterId = new ClusterId(appname,"testSessionExpired");
         createClusterLevel(clusterId,session);
         TestWatcher callback = new TestWatcher(session)
         {
            @Override
            public void process()
            {
               try
               {
                  called.set(true);
                  logger.trace("process called on TestWatcher.");
                  session.exists(clusterId.asPath(), this);
                  session.getSubdirs(clusterId.asPath(), this);
               }
               catch (ClusterInfoException cie)
               {
                  throw new RuntimeException(cie);
               }
            }
         };
        
         // now see if the cluster works.
         callback.process(); // this registers the session with the callback as the Watcher
        
         // now reset the condition
         callback.called.set(false);
        
         ZookeeperTestServer.forceSessionExpiration(session.zkref.get());

         // we should see the session expiration in a callback
         assertTrue(poll(5000,callback,new Condition<TestWatcher>() {  @Override public boolean conditionMet(TestWatcher o) {  return o.called.get(); } }));
        
         // and eventually a reconnect
         assertTrue(poll(5000,callback,new Condition<TestWatcher>()
         { 
            @Override public boolean conditionMet(TestWatcher o)
            {
               try
               {
                  o.process();
                  return true;
               }
               catch (Throwable th)
               {
                  return false;
               }
            }
         }));
        
         createClusterLevel(clusterId,session);
         assertTrue(session.exists(clusterId.asPath(), callback));
      }
      finally
      {
         if (server != null)
            server.shutdown();
        
         if (session != null)
            session.stop();
      }
   }
  
   private static Dempsy getDempsyFor(ClusterId clusterId, ApplicationDefinition ad) throws Throwable
   {
      //------------------------------------------------------------------------------
      // here is a complete non-spring, non-DI Dempsy instantiation
      //------------------------------------------------------------------------------
      List<ApplicationDefinition> ads = new ArrayList<ApplicationDefinition>();
      ads.add(ad);
     
      Dempsy dempsy = new Dempsy();
      dempsy.setApplicationDefinitions(ads);
      dempsy.setClusterCheck(new SpecificClusterCheck(clusterId));
      dempsy.setDefaultRoutingStrategy(new DecentralizedRoutingStrategy(20, 1));
      dempsy.setDefaultSerializer(new JavaSerializer<Object>());
      dempsy.setDefaultStatsCollectorFactory(new StatsCollectorFactoryCoda());
      dempsy.setDefaultTransport(new TcpTransport());
      //------------------------------------------------------------------------------

      return dempsy;
   }
  
   @Test
   public void testSessionExpiredWithFullApp() throws Throwable
   {
      // now lets startup the server.
      ZookeeperTestServer server = null;
      final AtomicReference<ZookeeperSession> sessionRef = new AtomicReference<ZookeeperSession>();
      ZookeeperSession session = null;
      final AtomicLong processCount = new AtomicLong(0);
     
      Dempsy[] dempsy = new Dempsy[3];
      try
      {
         server = new ZookeeperTestServer();
         server.start();

         session = new ZookeeperSession("127.0.0.1:" + port,5000) {
            @Override
            public WatcherProxy makeWatcherProxy(ClusterInfoWatcher w)
            {
                     processCount.incrementAndGet();
                     return super.makeWatcherProxy(w);
            };
         };
         sessionRef.set(session);

         final FullApplication app = new FullApplication();
         ApplicationDefinition ad = app.getTopology();

         assertEquals(0,processCount.intValue()); // no calls yet

         dempsy[0] = getDempsyFor(new ClusterId(FullApplication.class.getSimpleName(),FullApplication.MyAdaptor.class.getSimpleName()),ad);
         dempsy[0].setClusterSessionFactory(new ZookeeperSessionFactory("127.0.0.1:" + port,5000));

         dempsy[1] = getDempsyFor(new ClusterId(FullApplication.class.getSimpleName(),FullApplication.MyMp.class.getSimpleName()),ad);
         dempsy[1].setClusterSessionFactory(new ZookeeperSessionFactory("127.0.0.1:" + port,5000));

         dempsy[2] = getDempsyFor(new ClusterId(FullApplication.class.getSimpleName(),FullApplication.MyRankMp.class.getSimpleName()),ad);
//         dempsy[2].setClusterSessionFactory(new ZookeeperSessionFactory<ClusterInformation, SlotInformation>("127.0.0.1:" + port,5000));
        
         dempsy[2].setClusterSessionFactory(new ClusterInfoSessionFactory()
         {
           
            @Override
            public ClusterInfoSession createSession() throws ClusterInfoException
            {
               return sessionRef.get();
            }
         });

         // start everything in reverse order
         for (int i = 2; i >= 0; i--)
            dempsy[i].start();
        
         // make sure the final count is incrementing
         long curCount = app.finalMessageCount.get();
         assertTrue(poll(30000,curCount,new Condition<Long>(){

            @Override
            public boolean conditionMet(Long o)
            {
               return app.finalMessageCount.get() > (o + 100L);
            }
           
         }));

         logger.trace("Killing zookeeper");
         ZooKeeper origZk = session.zkref.get();
         ZookeeperTestServer.forceSessionExpiration(origZk);
         logger.trace("Killed zookeeper");
        
         // wait for the current session to go invalid
         assertTrue(poll(baseTimeoutMillis, origZk, new Condition<ZooKeeper>()
         {
            @Override
            public boolean conditionMet(ZooKeeper o) { return !o.getState().isAlive(); }
         }));
        
         // make sure the final count is STILL incrementing
         curCount = app.finalMessageCount.get();
         assertTrue(poll(30000,curCount,new Condition<Long>(){

            @Override
            public boolean conditionMet(Long o)
            {
               return app.finalMessageCount.get() > (o + 100L);
            }
           
         }));

      }
      finally
      {
         if (server != null)
            server.shutdown();
        
         if (session != null)
            session.stop();
        
         for (int i = 0; i < dempsy.length; i++)
            if (dempsy[i] != null)
               dempsy[i].stop();
        
         for (int i = 0; i < dempsy.length; i++)
            if (dempsy[i] != null)
               assertTrue(dempsy[i].waitToBeStopped(baseTimeoutMillis));
      }
   }

   private AtomicBoolean forceIOException = new AtomicBoolean(false);
   private CountDownLatch forceIOExceptionLatch = new CountDownLatch(5);
  
   @Test
   public void testRecoverWithIOException() throws Throwable
   {
      // now lets startup the server.
      ZookeeperTestServer server = null;
      ZookeeperSession sessiong = null;
      try
      {
         server = new ZookeeperTestServer();
         server.start();

         final ZookeeperSession session = new ZookeeperSession("127.0.0.1:" + port,5000) {
            @Override
            protected ZooKeeper makeZooKeeperClient(String connectString, int sessionTimeout) throws IOException
            {
               if (forceIOException.get())
               {
                  forceIOExceptionLatch.countDown();
                  throw new IOException("Fake IO Problem.");
               }
               return super.makeZooKeeperClient(connectString, sessionTimeout);
            }
         };
         sessiong = session;
        
         final ClusterId clusterId = new ClusterId(appname,"testRecoverWithIOException");
         TestUtils.createClusterLevel(clusterId, session);
         TestWatcher callback = new TestWatcher(session)
         {
            @Override public void process()
            {
               try {
                  session.getSubdirs(clusterId.asPath(),this);
                  called.set(true);
               }
               catch (ClusterInfoException cie) { throw new RuntimeException(cie); }
            }
         };
        
         callback.process();
        
         // force the ioexception to happen
         forceIOException.set(true);
        
         ZookeeperTestServer.forceSessionExpiration(session.zkref.get());
        
         // now in the background it should be retrying but hosed.
         assertTrue(forceIOExceptionLatch.await(baseTimeoutMillis * 3, TimeUnit.MILLISECONDS));

         // now the getActiveSlots call should fail since i'm preventing the recovery by throwing IOExceptions
         assertTrue(TestUtils.poll(baseTimeoutMillis, clusterId, new Condition<ClusterId>()
         {
            @Override
            public boolean conditionMet(ClusterId o) throws Throwable {
               try { session.mkdir(o.asPath() + "/join-1", null, DirMode.EPHEMERAL); return false; } catch (ClusterInfoException e) { return true; }
            }
         }));
        
         callback.called.set(false); // reset the callbacker ...
        
         // now we should allow the code to proceed.
         forceIOException.set(false);
        
         // wait for the callback
         assertTrue(poll(baseTimeoutMillis,callback,new Condition<TestWatcher>() { @Override public boolean conditionMet(TestWatcher o) { return o.called.get(); } }));
        
         // this should eventually recover.
         assertTrue(TestUtils.poll(baseTimeoutMillis, clusterId, new Condition<ClusterId>()
         {
            @Override
            public boolean conditionMet(ClusterId o) throws Throwable {
               try { TestUtils.createClusterLevel(o, session); session.mkdir(o.asPath() + "/join-1", null, DirMode.EPHEMERAL); return true; } catch (ClusterInfoException e) { return false; }
            }
         }));
        
         session.getSubdirs(clusterId.asPath(),callback);
        
         // And join should work
         // And join should work
         assertTrue(TestUtils.poll(baseTimeoutMillis,clusterId , new Condition<ClusterId>()
         {
            @Override
            public boolean conditionMet(ClusterId o) throws Throwable {
               try { session.mkdir(o.asPath() + "/join-1", null, DirMode.EPHEMERAL); return true; } catch (ClusterInfoException e) { }
               return false;
            }
         }));

      }
      finally
      {
         if (server != null)
            server.shutdown();
        
         if (sessiong != null)
            sessiong.stop();
      }
   }
}
TOP

Related Classes of com.nokia.dempsy.cluster.zookeeper.TestZookeeperClusterResilience

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.