/*
* JBoss, the OpenSource J2EE webOS
*
* Distributable under LGPL license.
* See terms of license at gnu.org.
*/
package org.jboss.cache.statetransfer;
import java.util.Map;
import org.jboss.cache.Cache;
import org.jboss.cache.CacheException;
import org.jboss.cache.CacheSPI;
import org.jboss.cache.UnitTestCacheFactory;
import org.jboss.cache.Fqn;
import org.jboss.cache.Node;
import org.jboss.cache.Region;
import org.jboss.cache.RegionImpl;
import org.jboss.cache.config.Configuration;
import org.jboss.cache.config.Configuration.CacheMode;
import org.jboss.cache.config.Configuration.NodeLockingScheme;
import org.jboss.cache.factories.UnitTestCacheConfigurationFactory;
import org.jboss.cache.marshall.InactiveRegionException;
import org.jboss.cache.util.TestingUtil;
import org.jboss.cache.util.internals.EvictionController;
import org.jboss.cache.util.internals.ReplicationQueueNotifier;
import static org.testng.AssertJUnit.*;
import org.testng.annotations.Test;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
/**
* Abstract superclass of "StateTransferVersion"-specific tests
* of CacheSPI's state transfer capability.
* <p/>
*
* @author <a href="mailto://brian.stansberry@jboss.com">Brian Stansberry</a>
*/
@Test(groups = "functional", enabled = false)
public class StateTransferConcurrencyTest extends StateTransferTestBase
{
protected String getReplicationVersion()
{
return "3.0.0.GA";
}
/**
* Tests concurrent activation of the same subtree by multiple nodes in a
* REPL_SYNC environment. The idea is to see what would happen with a
* farmed deployment. See <code>concurrentActivationTest</code> for details.
*
* @throws Exception
*/
public void testConcurrentActivationSync() throws Exception
{
concurrentActivationTest(true);
}
/**
* Tests concurrent activation of the same subtree by multiple nodes in a
* REPL_ASYNC environment. The idea is to see what would happen with a
* farmed deployment. See <code>concurrentActivationTest</code> for details.
*
* @throws Exception
*/
public void testConcurrentActivationAsync() throws Exception
{
concurrentActivationTest(false);
}
/**
* Starts 5 caches and then concurrently activates the same region under
* all 5, causing each to attempt a partial state transfer from the others.
* As soon as each cache has activated its region, it does a put to a node
* in the region, thus complicating the lives of the other caches trying
* to get partial state.
* <p/>
* Failure condition is if any node sees an exception or if the final state
* of all caches is not consistent.
*
* @param sync use REPL_SYNC or REPL_ASYNC
* @throws Exception
*/
private void concurrentActivationTest(boolean sync)
{
String[] names = {"A", "B", "C", "D", "E"};
int count = names.length;
CacheActivator[] activators = new CacheActivator[count];
try
{
// Create a semaphore and take all its tickets
Semaphore semaphore = new Semaphore(count);
semaphore.acquire(count);
// Create activation threads that will block on the semaphore
CacheSPI[] caches = new CacheSPI[count];
for (int i = 0; i < count; i++)
{
activators[i] = new CacheActivator(semaphore, names[i], sync, caches);
caches[i] = activators[i].getCacheSPI();
activators[i].start();
}
// Make sure everyone is in sync
TestingUtil.blockUntilViewsReceived(caches, 60000);
// Release the semaphore to allow the threads to start work
semaphore.release(count);
// Sleep to ensure the threads get all the semaphore tickets
while (semaphore.availablePermits() != 0) TestingUtil.sleepThread(100);
// Reacquire the semaphore tickets; when we have them all
// we know the threads are done
for (int i = 0; i < count; i++)
{
boolean acquired = semaphore.tryAcquire(60, TimeUnit.SECONDS);
if (!acquired) fail("failed to acquire semaphore " + i);
}
// allow any async calls to clear
if (!sync)
{
waitTillAllReplicationsFinish(count, caches);
}
// Ensure the caches held by the activators see all the values
for (int i = 0; i < count; i++)
{
Exception aException = activators[i].getException();
boolean gotUnexpectedException = aException != null
&& !(aException instanceof InactiveRegionException ||
aException.getCause() instanceof InactiveRegionException);
if (gotUnexpectedException)
{
fail("Activator " + names[i] + " caught an exception " + aException);
}
for (int j = 0; j < count; j++)
{
Fqn fqn = Fqn.fromRelativeElements(A_B, names[j]);
assertEquals("Incorrect value for " + fqn + " on activator " + names[i],
"VALUE", activators[i].getCacheValue(fqn));
// System.out.println(names[i] + ":" + fqn + " = " + activators[i].getCacheValue(fqn));
}
}
}
catch (Exception ex)
{
ex.printStackTrace();
fail(ex.getLocalizedMessage());
}
finally
{
for (int i = 0; i < count; i++)
{
activators[i].cleanup();
}
}
}
private void waitTillAllReplicationsFinish(int count, CacheSPI[] caches)
throws Exception
{
for (int i = 0; i < count; i++)
{
new ReplicationQueueNotifier(caches[i]).waitUntillAllReplicated(5000);
}
}
/**
* Starts two caches where each cache has N regions. We put some data in each of the regions.
* We run two threads where each thread creates a cache then goes into a loop where it
* activates the N regions, with a 1 sec pause between activations.
* <p/>
* Threads are started with 10 sec difference.
* <p/>
* This test simulates a 10 sec staggered start of 2 servers in a cluster, with each server
* then deploying webapps.
* <p/>
* <p/>
* <p/>
* Failure condition is if any node sees an exception or if the final state
* of all caches is not consistent.
*
* @param sync use REPL_SYNC or REPL_ASYNC
* @throws Exception
*/
private void concurrentActivationTest2(boolean sync)
{
String[] names = {"A", "B"};
int count = names.length;
int regionsToActivate = 15;
int sleepTimeBetweenNodeStarts = 10000;
StaggeredWebDeployerActivator[] activators = new StaggeredWebDeployerActivator[count];
try
{
// Create a semaphore and take all its tickets
Semaphore semaphore = new Semaphore(count);
semaphore.acquire(count);
// Create activation threads that will block on the semaphore
CacheSPI[] caches = new CacheSPI[count];
for (int i = 0; i < count; i++)
{
activators[i] = new StaggeredWebDeployerActivator(semaphore, names[i], sync, regionsToActivate);
caches[i] = activators[i].getCacheSPI();
// Release the semaphore to allow the thread to start working
semaphore.release(1);
activators[i].start();
TestingUtil.sleepThread(sleepTimeBetweenNodeStarts);
}
// Make sure everyone is in sync
TestingUtil.blockUntilViewsReceived(caches, 60000);
// Sleep to ensure the threads get all the semaphore tickets
TestingUtil.sleepThread(1000);
// Reacquire the semaphore tickets; when we have them all
// we know the threads are done
for (int i = 0; i < count; i++)
{
boolean acquired = semaphore.tryAcquire(60, TimeUnit.SECONDS);
if (!acquired)
{
fail("failed to acquire semaphore " + i);
}
}
// Sleep to allow any async calls to clear
if (!sync)
{
waitTillAllReplicationsFinish(count, caches);
}
// Ensure the caches held by the activators see all the values
for (int i = 0; i < count; i++)
{
Exception aException = activators[i].getException();
boolean gotUnexpectedException = aException != null
&& !(aException instanceof InactiveRegionException ||
aException.getCause() instanceof InactiveRegionException);
if (gotUnexpectedException)
{
fail("Activator " + names[i] + " caught an exception " + aException);
}
for (int j = 0; j < regionsToActivate; j++)
{
Fqn fqn = Fqn.fromString("/a/" + i + "/" + names[i]);
assertEquals("Incorrect value for " + fqn + " on activator " + names[i],
"VALUE", activators[i].getCacheValue(fqn));
}
}
}
catch (Exception ex)
{
fail(ex.getLocalizedMessage());
}
finally
{
for (int i = 0; i < count; i++)
{
activators[i].cleanup();
}
}
}
/**
* Starts two caches where each cache has N regions. We put some data in each of the regions.
* We run two threads where each thread creates a cache then goes into a loop where it
* activates the N regions, with a 1 sec pause between activations.
* <p/>
* Threads are started with 10 sec difference.
* <p/>
* This test simulates a 10 sec staggered start of 2 servers in a cluster, with each server
* then deploying webapps.
* <p/>
* <p/>
* <p/>
* Failure condition is if any node sees an exception or if the final state
* of all caches is not consistent.
*/
public void testConcurrentStartupActivationAsync() throws Exception
{
concurrentActivationTest2(false);
}
/**
* Starts two caches where each cache has N regions. We put some data in each of the regions.
* We run two threads where each thread creates a cache then goes into a loop where it
* activates the N regions, with a 1 sec pause between activations.
* <p/>
* Threads are started with 10 sec difference.
* <p/>
* This test simulates a 10 sec staggered start of 2 servers in a cluster, with each server
* then deploying webapps.
* <p/>
* <p/>
* <p/>
* Failure condition is if any node sees an exception or if the final state
* of all caches is not consistent.
*/
public void testConcurrentStartupActivationSync() throws Exception
{
concurrentActivationTest2(true);
}
/**
* Tests partial state transfer under heavy concurrent load and REPL_SYNC.
* See <code>concurrentUseTest</code> for details.
*
* @throws Exception
*/
public void testConcurrentUseSync() throws Exception
{
concurrentUseTest(true);
}
/**
* Tests partial state transfer under heavy concurrent load and REPL_ASYNC.
* See <code>concurrentUseTest</code> for details.
*
* @throws Exception
*/
public void testConcurrentUseAsync() throws Exception
{
concurrentUseTest(false);
}
/**
* Initiates 5 caches, 4 with active trees and one with an inactive tree.
* Each of the active caches begins rapidly generating puts against nodes
* in a subtree for which it is responsible. The 5th cache activates
* each subtree, and at the end confirms no node saw any exceptions and
* that each node has consistent state.
*
* @param sync whether to use REPL_SYNC or REPL_ASYNCE
* @throws Exception
*/
private void concurrentUseTest(boolean sync) throws Exception
{
String[] names = {"B", "C", "D", "E"};
int count = names.length;
CacheStressor[] stressors = new CacheStressor[count];
try
{
// The first cache we create is inactivated.
CacheSPI<Object, Object> cacheA = createCache("cacheA", sync, true, false);
CacheSPI[] caches = new CacheSPI[count + 1];
caches[0] = cacheA;
// Create a semaphore and take all its tickets
Semaphore semaphore = new Semaphore(count);
semaphore.acquire(count);
// Create stressor threads that will block on the semaphore
for (int i = 0; i < count; i++)
{
stressors[i] = new CacheStressor(semaphore, names[i], sync);
caches[i + 1] = stressors[i].getCacheSPI();
stressors[i].start();
}
// Make sure everyone's views are in sync
TestingUtil.blockUntilViewsReceived(caches, 60000);
// Repeat the basic test four times
//for (int x = 0; x < 4; x++)
for (int x = 0; x < 1; x++)
{
// Reset things by inactivating the region
// and enabling the stressors
for (int i = 0; i < count; i++)
{
Region r = cacheA.getRegion(Fqn.fromString("/" + names[i]), true);
r.registerContextClassLoader(getClass().getClassLoader());
r.deactivate();
System.out.println("Run " + x + "-- /" + names[i] + " deactivated on A");
stressors[i].startPuts();
}
// Release the semaphore to allow the threads to start work
semaphore.release(count);
// Sleep to ensure the threads get all the semaphore tickets
// and to ensure puts are actively in progress
TestingUtil.sleepThread((long) 1000);
// Activate cacheA
for (CacheStressor stressor : stressors)
{
System.out.println("Activating /" + stressor.getName() + " on A");
cacheA.getRegion(Fqn.fromString("/" + stressor.getName()), true).activate();
stressor.stopPuts();
System.out.println("Run " + x + "-- /" + stressor.getName() + " activated on A");
// Reacquire one semaphore ticket
boolean acquired = semaphore.tryAcquire(60, TimeUnit.SECONDS);
if (!acquired)
{
fail("failed to acquire semaphore " + stressor.getName());
}
// Pause to allow other work to proceed
TestingUtil.sleepThread(100);
}
// Sleep to allow any async calls to clear
if (!sync)
{
waitTillAllReplicationsFinish(count, caches);
}
// Ensure the stressors saw no exceptions
for (int i = 0; i < count; i++)
{
if (stressors[i].getException() != null && !(stressors[i].getException() instanceof InactiveRegionException))
{
fail("Stressor " + names[i] + " caught an exception " + stressors[i].getException());
}
}
// Compare cache contents
for (int i = 0; i < count; i++)
{
for (int j = 0; j < SUBTREE_SIZE; j++)
{
Fqn fqn = Fqn.fromString("/" + names[i] + "/" + j);
assertEquals("/A/" + j + " matches " + fqn,
cacheA.get(fqn, "KEY"),
stressors[i].getCacheSPI().get(fqn, "KEY"));
}
}
}
for (int i = 0; i < count; i++)
{
stressors[i].stopThread();
}
}
finally
{
for (int i = 0; i < count; i++)
{
if (stressors[i] != null)
{
stressors[i].cleanup();
}
}
}
}
/**
* Test for JBCACHE-913
*/
public void testEvictionSeesStateTransfer() throws Exception
{
Map<String, Cache> caches = cachesTL.get();
Configuration c = UnitTestCacheConfigurationFactory.createConfiguration(CacheMode.REPL_SYNC, true);
additionalConfiguration(c);
Cache<Object, Object> cache1 = new UnitTestCacheFactory<Object, Object>().createCache(c);
caches.put("evict1", cache1);
cache1.put(Fqn.fromString("/a/b/c"), "key", "value");
c = UnitTestCacheConfigurationFactory.createConfiguration(CacheMode.REPL_SYNC, true);
additionalConfiguration(c);
Cache<Object, Object> cache2 = new UnitTestCacheFactory<Object, Object>().createCache(c);
caches.put("evict2", cache2);
RegionImpl region = (RegionImpl) cache2.getRegion(Fqn.ROOT, false);
// We expect a VISIT event for / and ADD events for /a, /a/b and /a/b/c
int nodeEventQueueSize = region.getEvictionEventQueue().size();
int i = 0;
int events = nodeEventQueueSize;
while (events > 0)
{
System.out.println(++i + ") Queue contains : " + region.getEvictionEventQueue().poll(0, TimeUnit.MILLISECONDS));
events = region.getEvictionEventQueue().size();
}
boolean mvcc = cache2.getConfiguration().getNodeLockingScheme() == NodeLockingScheme.MVCC;
assertEquals("Saw the expected number of node events", mvcc ? 5 : 3, nodeEventQueueSize);
}
/**
* Further test for JBCACHE-913
*/
public void testEvictionAfterStateTransfer() throws Exception
{
Configuration c = UnitTestCacheConfigurationFactory.createConfiguration(CacheMode.REPL_SYNC, true);
additionalConfiguration(c);
Cache<Object, Object> cache1 = new UnitTestCacheFactory<Object, Object>().createCache(c);
Map<String, Cache> caches = cachesTL.get();
caches.put("evict1", cache1);
for (int i = 0; i < 25000; i++)
{
cache1.put(Fqn.fromString("/org/jboss/data/" + i), "key", "base" + i);
if (i < 5)
{
cache1.put(Fqn.fromString("/org/jboss/test/data/" + i), "key", "data" + i);
if (i == 0)
{
cache1.getRoot().getChild(Fqn.fromString("/org/jboss/data")).setResident(true); //so that it won't be counted for eviction
}
}
}
EvictionController ec1 = new EvictionController(cache1);
ec1.startEviction();
int childrenSize = cache1.getRoot().getChild(Fqn.fromString("/org/jboss/data")).getChildren().size();
assert childrenSize == 5000 : "Expected 5000, saw " + childrenSize;
c = UnitTestCacheConfigurationFactory.createConfiguration(CacheMode.REPL_SYNC, true);
additionalConfiguration(c);
final Cache<Object, Object> cache2 = new UnitTestCacheFactory<Object, Object>().createCache(c);
caches.put("evict2", cache2);
Node<Object, Object> parent;// = cache2.getRoot().getChild(Fqn.fromString("/org/jboss/test/data"));
parent = cache2.getRoot().getChild(Fqn.fromString("/org/jboss/data"));
Set children = parent.getChildren();
//4999 because the root of the region will also be counted, as it is not resident
assertTrue("Minimum number of base children transferred", children.size() >= 4999);
// Sleep 2.5 secs so the nodes we are about to create in data won't
// exceed the 4 sec TTL when eviction thread runs
TestingUtil.sleepThread(2500);
class Putter extends Thread
{
Cache<Object, Object> cache = null;
boolean stopped = false;
Exception ex = null;
public void run()
{
int i = 25000;
while (!stopped)
{
try
{
cache.put(Fqn.fromString("/org/jboss/data/" + i), "key", "base" + i);
cache.put(Fqn.fromString("/org/jboss/test/data/" + i), "key", "data" + i);
i++;
}
catch (Exception e)
{
ex = e;
}
}
}
}
Putter p1 = new Putter();
p1.cache = cache1;
p1.start();
Putter p2 = new Putter();
p2.cache = cache2;
p2.start();
Random rnd = new Random();
TestingUtil.sleepThread(rnd.nextInt(200));
int maxCountBase = 0;
int maxCountData = 0;
boolean sawBaseDecrease = false;
boolean sawDataDecrease = false;
long start = System.currentTimeMillis();
Node root = cache2.getRoot();
while ((System.currentTimeMillis() - start) < 10000)
{
parent = root.getChild(Fqn.fromString("/org/jboss/test/data"));
children = parent.getChildren();
if (children != null)
{
int dataCount = children.size();
if (dataCount < maxCountData)
{
System.out.println("data " + dataCount + " < " + maxCountData + " elapsed = " + (System.currentTimeMillis() - start));
sawDataDecrease = true;
}
else
{
maxCountData = dataCount;
}
}
parent = cache2.getRoot().getChild(Fqn.fromString("/org/jboss/data"));
children = parent.getChildren();
if (children != null)
{
int baseCount = children.size();
if (baseCount < maxCountBase)
{
System.out.println("base " + baseCount + " < " + maxCountBase + " elapsed = " + (System.currentTimeMillis() - start));
sawBaseDecrease = true;
}
else
{
maxCountBase = baseCount;
}
}
if (sawDataDecrease && sawBaseDecrease)
{
break;
}
TestingUtil.sleepThread(50);
}
p1.stopped = true;
p2.stopped = true;
p1.join(1000);
p2.join(1000);
assertTrue("Saw data decrease", sawDataDecrease);
assertTrue("Saw base decrease", sawBaseDecrease);
assertNull("No exceptions in p1", p1.ex);
assertNull("No exceptions in p2", p2.ex);
EvictionController ec2 = new EvictionController(cache2);
ec2.startEviction();
parent = cache2.getRoot().getChild(Fqn.fromString("/org/jboss/test/data"));
children = parent.getChildren();
if (children != null)
{
System.out.println(children.size());
assertTrue("Excess children evicted", children.size() <= 5);
}
parent = cache2.getRoot().getChild(Fqn.fromString("/org/jboss/data"));
children = parent.getChildren();
if (children != null)
{
System.out.println(children.size());
assertTrue("Excess children evicted", children.size() <= 25000);
}
// Sleep more to let the eviction thread run again,
// which will evict all data nodes due to their ttl of 4 secs
ec2.evictRegionWithTimeToLive("/org/jboss/test/data");
parent = cache2.getRoot().getChild(Fqn.fromString("/org/jboss/test/data"));
if (parent != null)
{
children = parent.getChildren();
if (children != null)
{
assertEquals("All data children evicted", 0, children.size());
}
}
}
private class CacheActivator extends CacheUser
{
private CacheSPI[] caches;
CacheActivator(Semaphore semaphore,
String name,
boolean sync, CacheSPI[] caches)
throws Exception
{
super(semaphore, name, sync, false, 120000);
this.caches = caches;
}
@SuppressWarnings("unchecked")
void useCache() throws Exception
{
System.out.println("---- Cache" + name + " = " + cache.getLocalAddress() + " being used");
TestingUtil.sleepRandom(5000);
createAndActivateRegion(cache, A_B);
System.out.println(name + " activated region" + " " + System.currentTimeMillis());
Fqn childFqn = Fqn.fromRelativeElements(A_B, name);
cache.put(childFqn, "KEY", "VALUE");
}
public Object getCacheValue(Fqn fqn) throws CacheException
{
return cache.get(fqn, "KEY");
}
}
private class StaggeredWebDeployerActivator extends CacheUser
{
int regionCount = 15;
StaggeredWebDeployerActivator(Semaphore semaphore,
String name,
boolean sync,
int regionCount)
throws Exception
{
super(semaphore, name, sync, false);
this.regionCount = regionCount;
}
void useCache() throws Exception
{
for (int i = 0; i < regionCount; i++)
{
createAndActivateRegion(cache, Fqn.fromString("/a/" + i));
Fqn childFqn = Fqn.fromString("/a/" + i + "/" + name);
cache.put(childFqn, "KEY", "VALUE");
TestingUtil.sleepThread(1000);
}
}
public Object getCacheValue(Fqn fqn) throws CacheException
{
return cache.get(fqn, "KEY");
}
}
private class CacheStressor extends CacheUser
{
private Random random = new Random(System.currentTimeMillis());
private boolean putsStopped = false;
private boolean stopped = false;
CacheStressor(Semaphore semaphore,
String name,
boolean sync)
throws Exception
{
super(semaphore, name, sync, true);
}
void useCache() throws Exception
{
// Do continuous puts into the cache. Use our own nodes,
// as we're not testing conflicts between writer nodes,
// just whether activation causes problems
int factor = 0;
int i = 0;
Fqn fqn = null;
boolean acquired;
while (!stopped)
{
if (i > 0)
{
acquired = semaphore.tryAcquire(60, TimeUnit.SECONDS);
if (!acquired)
{
throw new Exception(name + " cannot acquire semaphore");
}
}
while (!putsStopped)
{
factor = random.nextInt(50);
fqn = Fqn.fromString("/" + name + "/" + String.valueOf(factor % SUBTREE_SIZE));
Integer value = factor / SUBTREE_SIZE;
cache.put(fqn, "KEY", value);
TestingUtil.sleepThread((long) factor);
i++;
}
System.out.println(name + ": last put [#" + i + "] -- " + fqn + " = " + (factor / SUBTREE_SIZE));
semaphore.release();
// Go to sleep until directed otherwise
while (!stopped && putsStopped)
{
TestingUtil.sleepThread((long) 100);
}
}
}
public void stopPuts()
{
putsStopped = true;
}
public void startPuts()
{
putsStopped = false;
}
public void stopThread()
{
stopped = true;
if (thread.isAlive())
{
thread.interrupt();
}
}
}
}