private void performFailover(Integer failedNodeID) throws Exception
{
log.info("JBoss Messaging is failing over for failed node " + failedNodeID +
". If there are many messages to reload this may take some time...");
ClusterNotification notification = new ClusterNotification(ClusterNotification.TYPE_FAILOVER_START, failedNodeID.intValue(), null);
clusterNotifier.sendNotification(notification);
log.debug(this + " announced it is starting failover procedure");
pm.mergeTransactions(failedNodeID.intValue(), thisNodeID);
// Need to lock
lock.writeLock().acquire();
try
{
Map nameMap = (Map)nameMaps.get(failedNodeID);
List toRemove = new ArrayList();
if (nameMap != null)
{
Iterator iter = nameMap.values().iterator();
while (iter.hasNext())
{
Binding binding = (Binding)iter.next();
Queue queue = binding.queue;
if (queue.isRecoverable() && queue.getNodeID() == failedNodeID.intValue())
{
toRemove.add(binding);
}
}
}
Iterator iter = toRemove.iterator();
while (iter.hasNext())
{
Binding binding = (Binding)iter.next();
Condition condition = binding.condition;
Queue queue = binding.queue;
// Sanity check
if (!queue.isRecoverable())
{
throw new IllegalStateException("Found non recoverable queue " +
queue.getName() + " in map, these should have been removed!");
}
// Sanity check
if (!queue.isClustered())
{
throw new IllegalStateException("Queue " + queue.getName() + " is not clustered!");
}
//Remove from the in-memory map - no need to broadcast anything - they will get removed from other nodes in memory
//maps when the other nodes detect failure
removeBindingInMemory(binding.queue.getNodeID(), binding.queue.getName());
//Find if there is a local queue with the same name
Queue localQueue = null;
if (localNameMap != null)
{
Binding b = (Binding)localNameMap.get(queue.getName());
if (b != null)
{
localQueue = b.queue;
}
}
if (localQueue != null)
{
//need to merge the queues
log.debug(this + " has already a queue: " + queue.getName() + " queue so merging queues");
localQueue.mergeIn(queue.getChannelID(), failedNodeID.intValue());
log.debug("Merged queue");
}
else
{
//Cannot failover if there is no queue deployed.
throw new IllegalStateException("Cannot failover " + queue.getName() + " since it does not exist on this node. " +
"You must deploy your clustered destinations on ALL nodes of the cluster");
}
//Delete from storage
//Note we must do this *after* we have done any merge.
//This is because if we did it first, then the merge failed, we'd be left with the old channel deleted
//but the messages would have still be in the old channel
//meaning they would have disappeared from the users point of view and it would involve manual
//database intervention to correct it
//See http://jira.jboss.com/jira/browse/JBMESSAGING-1113
deleteBindingFromStorage(queue);
log.debug(this + " deleted binding for " + queue.getName());
// Note we do not need to send an unbind request across the cluster - this is because
// when the node crashes a view change will hit the other nodes and that will cause
// all binding data for that node to be removed anyway.
}
log.debug(this + ": server side fail over is now complete");
}
finally
{
lock.writeLock().release();
}
//Now clean the data for the failed node
//TODO - does this need to be inside the lock above?
cleanDataForNode(failedNodeID);
log.debug(this + " announcing that failover procedure is complete");
notification = new ClusterNotification(ClusterNotification.TYPE_FAILOVER_END, failedNodeID.intValue(), null);
clusterNotifier.sendNotification(notification);
//for testing only
sendJMXNotification(FAILOVER_COMPLETED_NOTIFICATION);