int pointsPerWeight = config.getPointsPerWeight();
DegraderLoadBalancerState newState;
for (TrackerClientUpdater clientUpdater : trackerClientUpdaters)
{
TrackerClient client = clientUpdater.getTrackerClient();
double averageLatency = client.getDegraderControl(DEFAULT_PARTITION_ID).getLatency();
long callCount = client.getDegraderControl(DEFAULT_PARTITION_ID).getCallCount();
oldState.getPreviousMaxDropRate().put(client, clientUpdater.getMaxDropRate());
sumOfClusterLatencies += averageLatency * callCount;
totalClusterCallCount += callCount;
double clientDropRate = client.getDegraderControl(DEFAULT_PARTITION_ID).getCurrentComputedDropRate();
computedClusterDropSum += client.getPartitionWeight(DEFAULT_PARTITION_ID) * clientDropRate;
computedClusterWeight += client.getPartitionWeight(DEFAULT_PARTITION_ID);
boolean recoveryMapContainsClient = newRecoveryMap.containsKey(client);
// The following block of code calculates and updates the maxDropRate if the client had been
// fully degraded in the past and has not received any requests since being fully degraded.
// To increase the chances of the client receiving a request, we change the maxDropRate, which
// influences the maximum value of computedDropRate, which is used to compute the number of
// points in the hash ring for the clients.
if (callCount == 0)
{
// if this client is enrolled in the program, decrease the maxDropRate
// it is important to note that this excludes clients that haven't gotten traffic
// due solely to low volume.
if (recoveryMapContainsClient)
{
// if it's the hash ring's turn to adjust, then adjust the maxDropRate.
// Otherwise, we let the call dropping strategy take it's turn, even if
// it may do nothing.
if(strategy == DegraderLoadBalancerState.Strategy.LOAD_BALANCE)
{
double oldMaxDropRate = clientUpdater.getMaxDropRate();
double transmissionRate = 1.0 - oldMaxDropRate;
if( transmissionRate <= 0.0)
{
// We use the initialRecoveryLevel to indicate how many points to initially set
// the tracker client to when traffic has stopped flowing to this node.
transmissionRate = initialRecoveryLevel;
}
else
{
transmissionRate *= ringRampFactor;
transmissionRate = Math.min(transmissionRate, 1.0);
}
double newMaxDropRate = 1.0 - transmissionRate;
clientUpdater.setMaxDropRate(newMaxDropRate);
}
recoveryMapChanges = true;
}
} //else we don't really need to change the client maxDropRate.
else if(recoveryMapContainsClient)
{
// else if the recovery map contains the client and the call count was > 0
// tough love here, once the rehab clients start taking traffic, we
// restore their maxDropRate to it's original value, and unenroll them
// from the program.
// This is safe because the hash ring points are controlled by the
// computedDropRate variable, and the call dropping rate is controlled by
// the overrideDropRate. The maxDropRate only serves to cap the computedDropRate and
// overrideDropRate.
// We store the maxDropRate and restore it here because the initialRecoveryLevel could
// potentially be higher than what the default maxDropRate allowed. (the maxDropRate doesn't
// necessarily have to be 1.0). For instance, if the maxDropRate was 0.99, and the
// initialRecoveryLevel was 0.05 then we need to store the old maxDropRate.
clientUpdater.setMaxDropRate(newRecoveryMap.get(client));
newRecoveryMap.remove(client);
recoveryMapChanges = true;
}
}
double computedClusterDropRate = computedClusterDropSum / computedClusterWeight;
debug(_log, "total cluster call count: ", totalClusterCallCount);
debug(_log,
"computed cluster drop rate for ",
trackerClientUpdaters.size(),
" nodes: ",
computedClusterDropRate);
if (oldState.getClusterGenerationId() == clusterGenerationId
&& totalClusterCallCount <= 0 && !recoveryMapChanges)
{
// if the cluster has not been called recently (total cluster call count is <= 0)
// and we already have a state with the same set of URIs (same cluster generation),
// and no clients are in rehab, then don't change anything.
debug(_log, "New state is the same as the old state so we're not changing anything. Old state = ", oldState
, ", config=", config);
return new DegraderLoadBalancerState(oldState, clusterGenerationId, config.getUpdateIntervalMs(),
config.getClock().currentTimeMillis());
}
// update our overrides.
double newCurrentAvgClusterLatency = -1;
if (totalClusterCallCount > 0)
{
newCurrentAvgClusterLatency = sumOfClusterLatencies / totalClusterCallCount;
}
debug(_log, "average cluster latency: ", newCurrentAvgClusterLatency);
// This points map stores how many hash map points to allocate for each tracker client.
Map<URI, Integer> points = new HashMap<URI, Integer>();
Map<URI, Integer> oldPointsMap = oldState.getPointsMap();
for (TrackerClientUpdater clientUpdater : trackerClientUpdaters)
{
TrackerClient client = clientUpdater.getTrackerClient();
double successfulTransmissionWeight;
URI clientUri = client.getUri();
// Don't take into account cluster health when calculating the number of points
// for each client. This is because the individual clients already take into account
// latency, and a successfulTransmissionWeight can and should be made
// independent of other nodes in the cluster. Otherwise, one unhealthy client in a small
// cluster can take down the entire cluster if the avg latency is too high.
// The global drop rate will take into account the cluster latency. High cluster-wide error
// rates are not something d2 can address.
//
// this client's maxDropRate and currentComputedDropRate may have been adjusted if it's in the
// rehab program (to gradually send traffic it's way).
double dropRate = Math.min(client.getDegraderControl(DEFAULT_PARTITION_ID).getCurrentComputedDropRate(),
clientUpdater.getMaxDropRate());
// calculate the weight as the probability of successful transmission to this
// node divided by the probability of successful transmission to the entire
// cluster
successfulTransmissionWeight = client.getPartitionWeight(DEFAULT_PARTITION_ID) * (1.0 - dropRate);
// calculate the weight as the probability of a successful transmission to this node
// multiplied by the client's self-defined weight. thus, the node's final weight
// takes into account both the self defined weight (to account for different
// hardware in the same cluster) and the performance of the node (as defined by the