// Because of the majority check in onAbruptLeave, we assume that at most one partition was able to evolve
// and install new cache topologies. The other(s) would have entered degraded mode, and they would keep
// the original topology.
// One scenario not covered is if two partitions started separately and they have completely different topologies.
// In that case, there is no way to prevent the two partitions from having inconsistent data.
CacheTopology maxActiveTopology = null;
CacheTopology maxDegradedTopology = null;
CacheTopology maxUnavailableTopology = null;
CacheTopology maxStableTopology = null;
for (CacheStatusResponse response : statusResponses) {
CacheTopology partitionStableTopology = response.getStableTopology();
if (maxStableTopology == null || !maxStableTopology.equals(partitionStableTopology)) {
log.tracef("Found stable partition topology: %s", maxStableTopology);
}
if (partitionStableTopology == null) {
// The node hasn't properly joined yet.
continue;
}
if (maxStableTopology == null || maxStableTopology.getTopologyId() < partitionStableTopology.getTopologyId()) {
maxStableTopology = partitionStableTopology;
}
CacheTopology partitionTopology = response.getCacheTopology();
if (partitionTopology == null) {
// The node hasn't properly joined yet.
continue;
}
if (response.getAvailabilityMode() == AvailabilityMode.AVAILABLE) {
if (maxActiveTopology == null || !maxActiveTopology.equals(partitionTopology)) {
log.tracef("Found active partition topology: %s", maxActiveTopology);
}
if (maxActiveTopology == null || maxActiveTopology.getTopologyId() < partitionTopology.getTopologyId()) {
maxActiveTopology = partitionTopology;
}
} else if (response.getAvailabilityMode() == AvailabilityMode.DEGRADED_MODE) {
if (maxDegradedTopology == null || !maxDegradedTopology.equals(partitionTopology)) {
log.tracef("Found degraded partition topology: %s", maxDegradedTopology);
}
if (maxDegradedTopology == null || maxDegradedTopology.getTopologyId() < partitionTopology.getTopologyId()) {
maxDegradedTopology = partitionTopology;
}
} else if (response.getAvailabilityMode() == AvailabilityMode.UNAVAILABLE) {
if (maxUnavailableTopology == null || !maxUnavailableTopology.equals(partitionTopology)) {
log.tracef("Found unavailable partition topology: %s", maxUnavailableTopology);
}
if (maxUnavailableTopology == null || maxUnavailableTopology.getTopologyId() < partitionTopology.getTopologyId()) {
maxUnavailableTopology = partitionTopology;
}
} else {
log.unexpectedAvailabilityMode(context.getAvailabilityMode(), context.getCacheName(),
response.getCacheTopology());
}
}
CacheTopology mergedTopology;
AvailabilityMode mergedAvailabilityMode;
if (maxUnavailableTopology != null) {
log.debugf("One of the partitions is unavailable, using that partition's topology and staying in unavailable mode");
mergedAvailabilityMode = AvailabilityMode.UNAVAILABLE;
mergedTopology = maxUnavailableTopology;
} else if (maxActiveTopology != null) {
log.debugf("One of the partitions is available, using that partition's topology and staying in available mode");
mergedAvailabilityMode = AvailabilityMode.AVAILABLE;
mergedTopology = maxActiveTopology;
} else if (maxDegradedTopology != null) {
log.debugf("No active or unavailable partitions, so all the partitions must be in degraded mode.");
mergedAvailabilityMode = AvailabilityMode.DEGRADED_MODE;
mergedTopology = maxDegradedTopology;
} else {
log.debugf("No current topology, recovered only joiners for cache %s", context.getCacheName());
mergedAvailabilityMode = AvailabilityMode.AVAILABLE;
mergedTopology = null;
}
// Cancel any pending rebalance by removing the pending CH.
// Needed because we don't recover the rebalance confirmation status (yet).
// By definition, the stable topology doesn't have a rebalance in progress.
if (mergedTopology != null && mergedTopology.getPendingCH() != null) {
mergedTopology = new CacheTopology(mergedTopology.getTopologyId() + 1, mergedTopology.getRebalanceId(),
mergedTopology.getCurrentCH(), null);
}
log.debugf("Updating topologies after merge for cache %s, current topology = %s, stable topology = %s, availability mode = %s",
context.getCacheName(), mergedTopology, maxStableTopology, mergedAvailabilityMode);
context.updateTopologiesAfterMerge(mergedTopology, maxStableTopology, mergedAvailabilityMode);
// It shouldn't be possible to recover from unavailable mode without user action
if (mergedAvailabilityMode == AvailabilityMode.UNAVAILABLE) {
log.debugf("After merge, cache %s is staying in unavailable mode", context.getCacheName());
context.updateAvailabilityMode(AvailabilityMode.UNAVAILABLE);
return;
}
List<Address> newMembers = new ArrayList<>(mergedTopology.getMembers());
newMembers.retainAll(context.getExpectedMembers());
if (maxStableTopology != null) {
List<Address> stableMembers = maxStableTopology.getMembers();
List<Address> lostMembers = new ArrayList<>(stableMembers);
lostMembers.removeAll(context.getExpectedMembers());