/*
* JBoss, Home of Professional Open Source
*
* Distributable under LGPL license.
* See terms of license at gnu.org.
*/
package org.jboss.cache;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.jboss.cache.config.Configuration;
import org.jboss.cache.config.RuntimeConfig;
import org.jboss.cache.factories.annotations.ComponentName;
import org.jboss.cache.factories.annotations.Inject;
import org.jboss.cache.factories.annotations.Stop;
import org.jboss.cache.invocation.RemoteCacheInvocationDelegate;
import org.jboss.cache.lock.LockUtil;
import org.jboss.cache.lock.NodeLock;
import org.jboss.cache.lock.TimeoutException;
import org.jboss.cache.marshall.InactiveRegionAwareRpcDispatcher;
import org.jboss.cache.marshall.Marshaller;
import org.jboss.cache.marshall.MethodCall;
import org.jboss.cache.marshall.MethodDeclarations;
import org.jboss.cache.notifications.Notifier;
import org.jboss.cache.remoting.jgroups.CacheMessageListener;
import org.jboss.cache.statetransfer.StateTransferManager;
import org.jboss.cache.transaction.GlobalTransaction;
import org.jboss.cache.transaction.TransactionTable;
import org.jboss.cache.util.ThreadGate;
import org.jboss.cache.util.reflect.ReflectionUtil;
import org.jgroups.Address;
import org.jgroups.Channel;
import org.jgroups.ChannelException;
import org.jgroups.ChannelFactory;
import org.jgroups.ExtendedMembershipListener;
import org.jgroups.JChannel;
import org.jgroups.StateTransferException;
import org.jgroups.View;
import org.jgroups.blocks.GroupRequest;
import org.jgroups.blocks.RpcDispatcher;
import org.jgroups.blocks.RspFilter;
import org.jgroups.util.Rsp;
import org.jgroups.util.RspList;
import javax.transaction.TransactionManager;
import java.io.NotSerializableException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.Vector;
/**
* Manager that handles all RPC calls between JBoss Cache instances
*
* @author <a href="mailto:manik@jboss.org">Manik Surtani (manik@jboss.org)</a>
*/
public class RPCManagerImpl implements RPCManager
{
private Channel channel;
private Log log = LogFactory.getLog(RPCManagerImpl.class);
private List<Address> members = new LinkedList<Address>();
/**
* True if this Cache is the coordinator.
*/
private volatile boolean coordinator = false;
/**
* Thread gate used to block Dispatcher during JGroups FLUSH protocol
*/
private final ThreadGate flushBlockGate = new ThreadGate();
/**
* JGroups RpcDispatcher in use.
*/
private RpcDispatcher disp = null;
/**
* JGroups message listener.
*/
private CacheMessageListener messageListener;
private Configuration configuration;
private Notifier notifier;
private CacheSPI spi;
private boolean trace = log.isTraceEnabled();
private RemoteCacheInvocationDelegate remoteDelegate;
private Marshaller marshaller;
private TransactionManager txManager;
private TransactionTable txTable;
private boolean isUsingBuddyReplication;
private boolean isInLocalMode;
@Inject
private void setupDependencies(CacheMessageListener messageListener, Configuration configuration,
Notifier notifier, CacheSPI spi, Marshaller marshaller,
@ComponentName("remoteDelegate")RemoteCacheInvocationDelegate remoteDelegate,
TransactionTable txTable, TransactionManager txManager)
{
this.messageListener = messageListener;
this.configuration = configuration;
this.notifier = notifier;
this.spi = spi;
this.remoteDelegate = remoteDelegate;
this.marshaller = marshaller;
this.txManager = txManager;
this.txTable = txTable;
}
// ------------ START: Lifecycle methods ------------
// TODO: This needs to be started manually for now, rather than by @Start. See CacheImpl.internalStart()
public void start()
{
switch (configuration.getCacheMode())
{
case LOCAL:
log.debug("cache mode is local, will not create the channel");
isInLocalMode = true;
break;
case REPL_SYNC:
case REPL_ASYNC:
case INVALIDATION_ASYNC:
case INVALIDATION_SYNC:
isInLocalMode = false;
isUsingBuddyReplication = configuration.getBuddyReplicationConfig() != null && configuration.getBuddyReplicationConfig().isEnabled();
if (log.isDebugEnabled()) log.debug("Cache mode is " + configuration.getCacheMode());
initialiseChannelAndRpcDispatcher();
if (shouldFetchStateOnStartup())
{
try
{
long start = System.currentTimeMillis();
// connect and state transfer
channel.connect(configuration.getClusterName(), null, null, configuration.getStateRetrievalTimeout());
//if I am not the only and the first member than wait for a state to arrive
if (getMembers().size() > 1) messageListener.waitForState();
if (log.isDebugEnabled())
log.debug("connected, state was retrieved successfully (in " + (System.currentTimeMillis() - start) + " milliseconds)");
}
catch (StateTransferException ste)
{
// make sure we disconnect from the channel before we throw this exception!
// JBCACHE-761
disconnect();
throw new CacheException("Unable to fetch state on startup", ste);
}
catch (ChannelException e)
{
throw new CacheException("Unable to connect to JGroups channel", e);
}
catch (Exception ex)
{
throw new CacheException("Unable to fetch state on startup", ex);
}
}
else
{
//otherwise just connect
try
{
channel.connect(configuration.getClusterName());
}
catch (ChannelException e)
{
throw new CacheException("Unable to connect to JGroups channel", e);
}
}
if (log.isInfoEnabled()) log.info("Cache local address is " + getLocalAddress());
}
isUsingBuddyReplication = configuration.getBuddyReplicationConfig() != null && configuration.getBuddyReplicationConfig().isEnabled();
}
public void disconnect()
{
if (channel != null && channel.isOpen())
{
log.info("Disconnecting and closing the Channel");
channel.disconnect();
channel.close();
}
}
@Stop
public void stop()
{
try
{
disconnect();
}
catch (Exception toLog)
{
log.error("Problem closing channel; setting it to null", toLog);
}
channel = null;
configuration.getRuntimeConfig().setChannel(null);
if (disp != null)
{
log.info("Stopping the RpcDispatcher");
disp.stop();
}
if (members != null)
{
synchronized (members)
{
members.clear();
}
}
coordinator = false;
disp = null;
}
/**
* @return true if we need to fetch state on startup. I.e., initiate a state transfer.
*/
private boolean shouldFetchStateOnStartup()
{
boolean loaderFetch = configuration.getCacheLoaderConfig() != null && configuration.getCacheLoaderConfig().isFetchPersistentState();
return !configuration.isInactiveOnStartup() && !isUsingBuddyReplication && (configuration.isFetchInMemoryState() || loaderFetch);
}
private void initialiseChannelAndRpcDispatcher() throws CacheException
{
channel = configuration.getRuntimeConfig().getChannel();
if (channel == null)
{
// Try to create a multiplexer channel
channel = getMultiplexerChannel();
if (channel != null)
{
ReflectionUtil.setValue(configuration, "accessible", true);
configuration.setUsingMultiplexer(true);
if (log.isDebugEnabled())
log.debug("Created Multiplexer Channel for cache cluster " + configuration.getClusterName() + " using stack " + configuration.getMultiplexerStack());
}
else
{
try
{
if (configuration.getClusterConfig() == null)
{
log.debug("setting cluster properties to default value");
channel = new JChannel(configuration.getDefaultClusterConfig());
}
else
{
if (trace)
{
log.trace("Cache cluster properties: " + configuration.getClusterConfig());
}
channel = new JChannel(configuration.getClusterConfig());
}
}
catch (ChannelException el)
{
el.printStackTrace();
}
}
configuration.getRuntimeConfig().setChannel(channel);
}
channel.setOpt(Channel.AUTO_RECONNECT, true);
channel.setOpt(Channel.AUTO_GETSTATE, true);
channel.setOpt(Channel.BLOCK, true);
// always use the InactiveRegionAwareRpcDispatcher - exceptions due to regions not being active should not propagate to remote
// nodes as errors. - Manik
// but only if we are using region based marshalling?!??
if (configuration.isUseRegionBasedMarshalling())
{
disp = new InactiveRegionAwareRpcDispatcher(channel, messageListener, new MembershipListenerAdaptor(), remoteDelegate);
}
else
{
disp = new RpcDispatcher(channel, messageListener, new MembershipListenerAdaptor(), remoteDelegate);
}
disp.setRequestMarshaller(marshaller);
disp.setResponseMarshaller(marshaller);
}
private JChannel getMultiplexerChannel() throws CacheException
{
String stackName = configuration.getMultiplexerStack();
RuntimeConfig rtc = configuration.getRuntimeConfig();
ChannelFactory channelFactory = rtc.getMuxChannelFactory();
JChannel muxchannel = null;
if (channelFactory != null)
{
try
{
muxchannel = (JChannel) channelFactory.createMultiplexerChannel(stackName, configuration.getClusterName());
}
catch (Exception e)
{
throw new CacheException("Failed to create multiplexed channel using stack " + stackName, e);
}
}
return muxchannel;
}
private void removeLocksForDeadMembers(NodeSPI node, List deadMembers)
{
Set<GlobalTransaction> deadOwners = new HashSet<GlobalTransaction>();
NodeLock lock = node.getLock();
Object owner = lock.getWriterOwner();
if (isLockOwnerDead(owner, deadMembers))
{
deadOwners.add((GlobalTransaction) owner);
}
for (Object readOwner : lock.getReaderOwners())
{
if (isLockOwnerDead(readOwner, deadMembers))
{
deadOwners.add((GlobalTransaction) readOwner);
}
}
for (GlobalTransaction deadOwner : deadOwners)
{
boolean localTx = deadOwner.getAddress().equals(getLocalAddress());
boolean broken = LockUtil.breakTransactionLock(lock, deadOwner, localTx, txTable, txManager);
if (broken && trace) log.trace("Broke lock for node " + node.getFqn() + " held by " + deadOwner);
}
// Recursively unlock children
for (Object child : node.getChildrenDirect())
{
removeLocksForDeadMembers((NodeSPI) child, deadMembers);
}
}
private boolean isLockOwnerDead(Object owner, List deadMembers)
{
boolean result = false;
if (owner != null && owner instanceof GlobalTransaction)
{
Object addr = ((GlobalTransaction) owner).getAddress();
result = deadMembers.contains(addr);
}
return result;
}
// ------------ END: Lifecycle methods ------------
// ------------ START: RPC call methods ------------
public List<Object> callRemoteMethods(List<Address> recipients, MethodCall methodCall, int mode, boolean excludeSelf, long timeout, boolean useOutOfBandMessage) throws Exception
{
return callRemoteMethods(recipients, methodCall, mode, excludeSelf, timeout, null, useOutOfBandMessage);
}
public List<Object> callRemoteMethods(List<Address> recipients, MethodCall methodCall, boolean synchronous, boolean excludeSelf, int timeout, boolean useOutOfBandMessage) throws Exception
{
return callRemoteMethods(recipients, methodCall, synchronous ? GroupRequest.GET_ALL : GroupRequest.GET_NONE, excludeSelf, timeout, useOutOfBandMessage);
}
public List<Object> callRemoteMethods(List<Address> recipients, MethodCall methodCall, int mode, boolean excludeSelf, long timeout, RspFilter responseFilter, boolean useOutOfBandMessage) throws Exception
{
int modeToUse = mode;
int preferredMode;
if ((preferredMode = spi.getInvocationContext().getOptionOverrides().getGroupRequestMode()) > -1)
modeToUse = preferredMode;
RspList rsps;
List<Object> retval;
Vector<Address> validMembers;
if (disp == null)
{
return null;
}
if (recipients != null)
validMembers = new Vector<Address>(recipients);
else
{
synchronized (members)
{
validMembers = new Vector<Address>(members);
}
}
if (excludeSelf && !validMembers.isEmpty())
{
Address local_addr = getLocalAddress();
if (local_addr != null) validMembers.remove(local_addr);
}
if (validMembers.isEmpty())
{
if (trace) log.trace("destination list is empty, discarding call");
return null;
}
if (trace)
log.trace("callRemoteMethods(): valid members are " + validMembers + " methods: " + methodCall + " Using OOB? " + useOutOfBandMessage);
if (channel.flushSupported())
{
if (!flushBlockGate.await(configuration.getStateRetrievalTimeout()))
throw new TimeoutException("State retrieval timed out waiting for flush unblock.");
}
// useOutOfBandMessage = false;
rsps = responseFilter == null
? disp.callRemoteMethods(validMembers, methodCall, modeToUse, timeout, isUsingBuddyReplication, useOutOfBandMessage)
: disp.callRemoteMethods(validMembers, methodCall, modeToUse, timeout, isUsingBuddyReplication, useOutOfBandMessage, responseFilter);
// a null response is 99% likely to be due to a marshalling problem - we throw a NSE, this needs to be changed when
// JGroups supports http://jira.jboss.com/jira/browse/JGRP-193
if (rsps == null)
{
// return null;
throw new NotSerializableException("RpcDispatcher returned a null. This is most often caused by args for " + methodCall.getName() + " not being serializable.");
}
if (mode == GroupRequest.GET_NONE) return Collections.emptyList();// async case
if (trace) log.trace("(" + getLocalAddress() + "): responses for method " + methodCall.getName() + ":\n" + rsps);
retval = new ArrayList<Object>(rsps.size());
for (Rsp rsp : rsps.values())
{
if (rsp.wasSuspected() || !rsp.wasReceived())
{
CacheException ex;
if (rsp.wasSuspected())
{
ex = new SuspectException("Suspected member: " + rsp.getSender());
}
else
{
ex = new TimeoutException("Replication timeout for " + rsp.getSender());
}
retval.add(new ReplicationException("rsp=" + rsp, ex));
}
else
{
Object value = rsp.getValue();
if (value instanceof Exception && !(value instanceof ReplicationException))
{
// if we have any application-level exceptions make sure we throw them!!
if (trace) log.trace("Recieved exception'" + value + "' from " + rsp.getSender());
throw (Exception) value;
}
retval.add(value);
}
}
return retval;
}
private boolean isCommitMethod(MethodCall call)
{
return call.getMethodId() == MethodDeclarations.commitMethod_id ||
(call.getMethodId() == MethodDeclarations.replicateMethod_id && isCommitMethod((MethodCall) call.getArgs()[0]));
}
// ------------ END: RPC call methods ------------
// ------------ START: Partial state transfer methods ------------
public void fetchPartialState(List<Address> sources, Fqn sourceTarget, Fqn integrationTarget) throws Exception
{
String encodedStateId = sourceTarget + StateTransferManager.PARTIAL_STATE_DELIMITER + integrationTarget;
fetchPartialState(sources, encodedStateId);
}
public void fetchPartialState(List<Address> sources, Fqn subtree) throws Exception
{
if (subtree == null)
{
throw new IllegalArgumentException("Cannot fetch partial state. Null subtree.");
}
fetchPartialState(sources, subtree.toString());
}
private void fetchPartialState(List<Address> sources, String stateId) throws Exception
{
if (sources == null || sources.isEmpty() || stateId == null)
{
// should this really be throwing an exception? Are there valid use cases where partial state may not be available? - Manik
// Yes -- cache is configured LOCAL but app doesn't know it -- Brian
//throw new IllegalArgumentException("Cannot fetch partial state, targets are " + sources + " and stateId is " + stateId);
if (log.isWarnEnabled())
log.warn("Cannot fetch partial state, targets are " + sources + " and stateId is " + stateId);
return;
}
List<Address> targets = new LinkedList<Address>(sources);
//skip *this* node as a target
targets.remove(getLocalAddress());
if (targets.isEmpty())
{
// Definitely no exception here -- this happens every time the 1st node in the
// cluster activates a region!! -- Brian
if (log.isDebugEnabled()) log.debug("Cannot fetch partial state. There are no target members specified");
return;
}
if (log.isDebugEnabled())
log.debug("Node " + getLocalAddress() + " fetching partial state " + stateId + " from members " + targets);
boolean successfulTransfer = false;
for (Address target : targets)
{
if (log.isDebugEnabled())
log.debug("Node " + getLocalAddress() + " fetching partial state " + stateId + " from member " + target);
messageListener.setStateSet(false);
successfulTransfer = channel.getState(target, stateId, configuration.getStateRetrievalTimeout());
if (successfulTransfer)
{
try
{
messageListener.waitForState();
}
catch (Exception transferFailed)
{
successfulTransfer = false;
}
}
if (log.isDebugEnabled())
log.debug("Node " + getLocalAddress() + " fetching partial state " + stateId + " from member " + target + (successfulTransfer ? " successful" : " failed"));
if (successfulTransfer) break;
}
if (!successfulTransfer)
{
if (log.isDebugEnabled())
log.debug("Node " + getLocalAddress() + " could not fetch partial state " + stateId + " from any member " + targets);
}
}
// ------------ END: Partial state transfer methods ------------
// ------------ START: Informational methods ------------
public Address getLocalAddress()
{
return channel != null ? channel.getLocalAddress() : null;
}
public List<Address> getMembers()
{
if (isInLocalMode) return null;
synchronized (members)
{
return new ArrayList<Address>(members);
}
}
public boolean isCoordinator()
{
return coordinator;
}
public Address getCoordinator()
{
if (channel == null)
{
return null;
}
synchronized (members)
{
while (members.isEmpty())
{
log.debug("getCoordinator(): waiting on viewAccepted()");
try
{
members.wait();
}
catch (InterruptedException e)
{
log.error("getCoordinator(): Interrupted while waiting for members to be set", e);
break;
}
}
return members.size() > 0 ? members.get(0) : null;
}
}
// ------------ END: Informational methods ------------
/*----------------------- MembershipListener ------------------------*/
protected class MembershipListenerAdaptor implements ExtendedMembershipListener
{
public void viewAccepted(View new_view)
{
Vector<Address> new_mbrs = new_view.getMembers();
if (log.isInfoEnabled()) log.info("Received new cluster view: " + new_view);
synchronized (members)
{
boolean needNotification = false;
if (new_mbrs != null)
{
// Determine what members have been removed
// and roll back any tx and break any locks
Vector<Address> removed = new Vector<Address>(members);
removed.removeAll(new_mbrs);
removeLocksForDeadMembers(spi.getRoot(), removed);
members.clear();
members.addAll(new_mbrs);
needNotification = true;
}
// Now that we have a view, figure out if we are the coordinator
coordinator = (members.size() != 0 && members.get(0).equals(getLocalAddress()));
// now notify listeners - *after* updating the coordinator. - JBCACHE-662
if (needNotification && notifier != null)
{
InvocationContext ctx = spi.getInvocationContext();
notifier.notifyViewChange(new_view, ctx);
}
// Wake up any threads that are waiting to know who the members
// are so they can figure out who the coordinator is
members.notifyAll();
}
}
/**
* Called when a member is suspected.
*/
public void suspect(Address suspected_mbr)
{
}
/**
* Indicates that a channel has received a BLOCK event from FLUSH protocol.
*/
public void block()
{
flushBlockGate.close();
if (log.isDebugEnabled()) log.debug("Block received at " + getLocalAddress());
remoteDelegate.block();
if (log.isDebugEnabled()) log.debug("Block processed at " + getLocalAddress());
}
/**
* Indicates that a channel has received a UNBLOCK event from FLUSH protocol.
*/
public void unblock()
{
if (log.isDebugEnabled()) log.debug("UnBlock received at " + getLocalAddress());
remoteDelegate.unblock();
if (log.isDebugEnabled()) log.debug("UnBlock processed at " + getLocalAddress());
flushBlockGate.open();
}
}
/*------------------- End of MembershipListener ----------------------*/
}