package org.jboss.cache.transaction;
import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
import org.jboss.cache.Fqn;
import org.jboss.cache.PropertyConfigurator;
import org.jboss.cache.TreeCache;
import javax.naming.Context;
import javax.naming.InitialContext;
import javax.transaction.UserTransaction;
import java.util.Properties;
/**
* A test created to simulate an unexpected TimeoutException in JBossCache
* 1.3.0SP1 (not relevant for 1.2.4 and earlier releases). The error has been
* initially observed in a production environment, the test has been created to
* simplify the analysis without the complexity of hundreds concurrent
* transactions.<br>
* This test is relevant for REPL_SYNC mode, (default) isolation level
* REPEATABLE_READ and SyncCommitPhase.<br>
* The error scenario is:
* <ul>
* <li>Two caches: SrcCache is the one where we put the modifications, DstCache
* only receives the replicated modifications.</li>
* <li>Two threads (resp. transactions) A and B modifying the same node X
* concurrently on SrcCache.</li>
* <li>Let's assume the transaction A is faster in aquiring the lock on X.</li>
* <li>Transaction A modifies the node X and starts committing: local prepare,
* remote prepare, local commit. The remote prepare will lock the X on DstCache
* and it will stay locked until the remote commit releases it later. Note that
* in JBossCache 1.3.0SP1 the transaction will release the lock on SrcCache
* immediately after the local commit step and before the remote (sync.) commit.
* It seems that this have changed between 1.2.4 and 1.3.0 releases.</li>
* <li>As soon as the lock on SrcCache is released by A, transaction B aquires
* the lock immediately and starts local modifications.<b>Note that in some
* cases B is fast enough to do the local prepare and send a remote prepare
* message before the remote commit message of A. </b>B is able to do this
* because the lock is released by A before its remote commit call.</li>
* <li>Now, we have the X locked by A on DstCache waiting for the remote commit
* of A to release it. The next messages from SrcCache are in the following
* order coming up the JGROUPS stack of the DstCache: remote prepare for B,
* remote commit for A.</li>
* <li>The remote prepare of B blocks on DstCache, trying to acquire the lock
* still held by A.</li>
* <li>The remote commit of A waits in the UP queue of the STATE_TRANSFER,
* waiting for the previous message (which is the remote prepare of B) to be
* processed.</li>
* <li>So A cannot be committed because it's blocked by B, which cannot be
* prepared, because it's blocked by A, which cannot be committed. Of course the
* result is a TimeoutException and too many rolled back transactions.</li>
* </ul>
*
* @author Marian Nikolov
* @author $Author: msurtani $
* @version $RCSfile$
* @version $Revision: 1842 $
* @version $Date: 2006-05-05 10:37:45 -0400 (Fri, 05 May 2006) $
*/
public class ReplicatedTransactionDeadlockTest extends TestCase {
/** The number of worker threads to start concurrently. */
private static final int NUM_WORKERS = 2;
/** The number of test runs to perform. */
private static final int NUM_RUNS = 100;
/** The initial context factory properties. */
private static final Properties PROPERTIES;
/** The context factory to be used for the test. */
private static final String CONTEXT_FACTORY =
"org.jboss.cache.transaction.DummyContextFactory";
/** The original context factory to be restored after the test. */
private String m_contextFactory = null;
/** Exception recorded if any of the worker threads fails. */
private static volatile Exception mcl_exception = null;
/** The source cache where we put modifications. */
private TreeCache m_srcCache = null;
/** The target cache where we replicate modifications. */
private TreeCache m_dstCache = null;
static {
PROPERTIES = new Properties();
PROPERTIES.put(Context.INITIAL_CONTEXT_FACTORY,
"org.jboss.cache.transaction.DummyContextFactory");
}
/**
* Constructor.
*
* @param name The test name.
*/
public ReplicatedTransactionDeadlockTest(String name) {
super(name);
}
/**
* {@inheritDoc}
*/
protected void setUp() throws Exception {
super.setUp();
mcl_exception = null;
m_contextFactory = System.getProperty(Context.INITIAL_CONTEXT_FACTORY);
System.setProperty(Context.INITIAL_CONTEXT_FACTORY, CONTEXT_FACTORY);
DummyTransactionManager.getInstance();
PropertyConfigurator config = new PropertyConfigurator();
// setup and start the source cache
m_srcCache = new TreeCache();
config.configure(m_srcCache, "META-INF/replSync-service.xml");
m_srcCache.setTransactionManagerLookupClass(
"org.jboss.cache.DummyTransactionManagerLookup");
m_srcCache.setCacheMode(TreeCache.REPL_SYNC);
m_srcCache.setSyncCommitPhase(true);
m_srcCache.createService();
m_srcCache.startService();
// setup and start the destination cache
m_dstCache = new TreeCache();
config.configure(m_dstCache, "META-INF/replSync-service.xml");
m_dstCache.setTransactionManagerLookupClass(
"org.jboss.cache.DummyTransactionManagerLookup");
m_dstCache.setCacheMode(TreeCache.REPL_SYNC);
m_dstCache.setSyncCommitPhase(true);
m_dstCache.createService();
m_dstCache.startService();
}
/**
* {@inheritDoc}
*/
protected void tearDown() throws Exception {
super.tearDown();
DummyTransactionManager.destroy();
m_srcCache.stopService();
m_srcCache = null;
m_dstCache.stopService();
m_dstCache = null;
if (m_contextFactory != null) {
System.setProperty(Context.INITIAL_CONTEXT_FACTORY,
m_contextFactory);
m_contextFactory = null;
}
}
/**
* Test for a synchronously replicated cache with concurrent transactions on
* the same node.<br>
* This test fails very often with a TimeoutException.
*
* @throws Exception Any exception if thrown by the cache.
*/
public void testConcurrentReplicatedTransaction() throws Exception {
performTest();
}
/**
* Perform a single test, using the pre-configured cache.
*
* @throws Exception Any exception if thrown by the cache.
*/
private void performTest() throws Exception {
// repeat the test several times since it's not always reproducible
for (int i = 0; i < NUM_RUNS; i++) {
if (mcl_exception != null) {
// terminate the test on the first failed worker
fail("Due to an exception: " + mcl_exception);
}
// start several worker threads to work with the same FQN
Worker[] t = new Worker[NUM_WORKERS];
for (int j = 0; j < t.length; j++) {
t[j] = new Worker("worker " + i + ":" +j);
t[j].start();
}
// wait for all workers to complete before repeating the test
for (int j = 0; j < t.length; j++) {
t[j].join();
}
}
}
/**
* Returns a user transaction to be associated with the calling thread.
*
* @return A user transaction.
* @throws Exception Any exception thrown by the context lookup.
*/
private UserTransaction getTransaction() throws Exception {
return (UserTransaction) new InitialContext(PROPERTIES)
.lookup("UserTransaction");
}
/**
* Log a message.
*
* @param msg The meessage to be logged.
*/
private void log(String msg) {
System.out.println(System.currentTimeMillis() + " "
+ Thread.currentThread() + " " + msg);
}
/**
* A worker thread that applies the concurrent modifications.
*
* @author Marian Nikolov
* @author $Author: msurtani $
* @version $RCSfile$
* @version $Revision: 1842 $
* @version $Date: 2006-05-05 10:37:45 -0400 (Fri, 05 May 2006) $
*/
private class Worker extends Thread {
/**
* Constructor.
*/
public Worker(String name) {
super(name);
}
/**
* {@inheritDoc}
*/
public void run() {
try {
UserTransaction tx = getTransaction();
log("begin");
tx.begin();
log("put");
m_srcCache.put(new Fqn("Node"), Boolean.FALSE, Boolean.TRUE);
log("commit");
tx.commit();
log("leave");
} catch (Exception e) {
log("caught exception " + e);
mcl_exception = e;
}
}
}
public static Test suite() {
return new TestSuite(ReplicatedTransactionDeadlockTest.class);
}
public static void main(String[] args) throws Exception {
junit.textui.TestRunner.run(suite());
}
}