PartitionDegraderLoadBalancerState newState;
for (TrackerClientUpdater clientUpdater : trackerClientUpdaters)
{
TrackerClient client = clientUpdater.getTrackerClient();
DegraderControl degraderControl = client.getDegraderControl(partitionId);
double averageLatency = degraderControl.getLatency();
long callCount = degraderControl.getCallCount();
oldState.getPreviousMaxDropRate().put(client, clientUpdater.getMaxDropRate());
double clientWeight = client.getPartitionWeight(partitionId);
sumOfClusterLatencies += averageLatency * callCount;
totalClusterCallCount += callCount;
clientDropRate = degraderControl.getCurrentComputedDropRate();
computedClusterDropSum += clientWeight * clientDropRate;
computedClusterWeight += clientWeight;
boolean recoveryMapContainsClient = newRecoveryMap.containsKey(client);
// The following block of code calculates and updates the maxDropRate if the client had been
// fully degraded in the past and has not received any requests since being fully degraded.
// To increase the chances of the client receiving a request, we change the maxDropRate, which
// influences the maximum value of computedDropRate, which is used to compute the number of
// points in the hash ring for the clients.
if (callCount == 0)
{
// if this client is enrolled in the program, decrease the maxDropRate
// it is important to note that this excludes clients that haven't gotten traffic
// due solely to low volume.
if (recoveryMapContainsClient)
{
double oldMaxDropRate = clientUpdater.getMaxDropRate();
double transmissionRate = 1.0 - oldMaxDropRate;
if( transmissionRate <= 0.0)
{
// We use the initialRecoveryLevel to indicate how many points to initially set
// the tracker client to when traffic has stopped flowing to this node.
transmissionRate = initialRecoveryLevel;
}
else
{
transmissionRate *= ringRampFactor;
transmissionRate = Math.min(transmissionRate, 1.0);
}
newMaxDropRate = 1.0 - transmissionRate;
if (strategy == PartitionDegraderLoadBalancerState.Strategy.LOAD_BALANCE)
{
// if it's the hash ring's turn to adjust, then adjust the maxDropRate.
// Otherwise, we let the call dropping strategy take it's turn, even if
// it may do nothing.
clientUpdater.setMaxDropRate(newMaxDropRate);
}
recoveryMapChanges = true;
}
}
else if(recoveryMapContainsClient)
{
// else if the recovery map contains the client and the call count was > 0
// tough love here, once the rehab clients start taking traffic, we
// restore their maxDropRate to it's original value, and unenroll them
// from the program.
// This is safe because the hash ring points are controlled by the
// computedDropRate variable, and the call dropping rate is controlled by
// the overrideDropRate. The maxDropRate only serves to cap the computedDropRate and
// overrideDropRate.
// We store the maxDropRate and restore it here because the initialRecoveryLevel could
// potentially be higher than what the default maxDropRate allowed. (the maxDropRate doesn't
// necessarily have to be 1.0). For instance, if the maxDropRate was 0.99, and the
// initialRecoveryLevel was 0.05 then we need to store the old maxDropRate.
clientUpdater.setMaxDropRate(newRecoveryMap.get(client));
newRecoveryMap.remove(client);
recoveryMapChanges = true;
}
}
computedClusterDropRate = computedClusterDropSum / computedClusterWeight;
debug(_log, "total cluster call count: ", totalClusterCallCount);
debug(_log,
"computed cluster drop rate for ",
trackerClientUpdaters.size(),
" nodes: ",
computedClusterDropRate);
if (oldState.getClusterGenerationId() == clusterGenerationId
&& totalClusterCallCount <= 0 && !recoveryMapChanges)
{
// if the cluster has not been called recently (total cluster call count is <= 0)
// and we already have a state with the same set of URIs (same cluster generation),
// and no clients are in rehab, then don't change anything.
debug(_log, "New state is the same as the old state so we're not changing anything. Old state = ", oldState
,", config= ", config);
return new PartitionDegraderLoadBalancerState(oldState, clusterGenerationId,
config.getClock().currentTimeMillis());
}
// update our overrides.
double newCurrentAvgClusterLatency = -1;
if(totalClusterCallCount > 0)
{
newCurrentAvgClusterLatency = sumOfClusterLatencies / totalClusterCallCount;
}
debug(_log, "average cluster latency: ", newCurrentAvgClusterLatency);
// compute points for every node in the cluster
double computedClusterSuccessRate = computedClusterWeight - computedClusterDropRate;
// This points map stores how many hash map points to allocate for each tracker client.
Map<URI, Integer> points = new HashMap<URI, Integer>();
Map<URI, Integer> oldPointsMap = oldState.getPointsMap();
for (TrackerClientUpdater clientUpdater : trackerClientUpdaters)
{
TrackerClient client = clientUpdater.getTrackerClient();
double successfulTransmissionWeight;
URI clientUri = client.getUri();
// Don't take into account cluster health when calculating the number of points
// for each client. This is because the individual clients already take into account
// latency and errors, and a successfulTransmissionWeight can and should be made
// independent of other nodes in the cluster. Otherwise, one unhealthy client in a small
// cluster can take down the entire cluster if the avg latency is too high.
// The global drop rate will take into account the cluster latency. High cluster-wide error
// rates are not something d2 can address.
//
// this client's maxDropRate and currentComputedDropRate may have been adjusted if it's in the
// rehab program (to gradually send traffic it's way).
DegraderControl degraderControl = client.getDegraderControl(partitionId);
double dropRate = Math.min(degraderControl.getCurrentComputedDropRate(),
clientUpdater.getMaxDropRate());
// calculate the weight as the probability of successful transmission to this
// node divided by the probability of successful transmission to the entire
// cluster