Examples of com.linkedin.util.degrader.DegraderControl

com.linkedin.util.degrader.DegraderControl


  TrackerClientUpdater(TrackerClient trackerClient, int partitionId)
  {
    _trackerClient = trackerClient;
    _partitionId = partitionId;
    DegraderControl degraderControl = _trackerClient.getDegraderControl(_partitionId);
    _overrideDropRate = degraderControl.getOverrideDropRate();
    _overrideMinCallCount = degraderControl.getOverrideMinCallCount();
    _maxDropRate = degraderControl.getMaxDropRate();
  }

View Full Code Here

    _overrideMinCallCount = overrideMinCallCount;
  }


  void update()
  {
    DegraderControl degraderControl = _trackerClient.getDegraderControl(_partitionId);
    degraderControl.setOverrideDropRate(_overrideDropRate);
    degraderControl.setMaxDropRate(_maxDropRate);
    degraderControl.setOverrideMinCallCount(_overrideMinCallCount);
  }

View Full Code Here

       */
      int mapSize = partitionDataMap.size();
      Map<Integer, PartitionState>partitionStates = new HashMap<Integer, PartitionState>(mapSize * 2);
      config.setName("TrackerClient Degrader: " + uri);
      DegraderImpl degrader = new DegraderImpl(config);
      DegraderControl degraderControl = new DegraderControl(degrader);
      for (Map.Entry<Integer, PartitionData> entry : partitionDataMap.entrySet())
      {
        int partitionId = entry.getKey();
        PartitionState partitionState = new PartitionState(entry.getValue(), degrader, degraderControl);
        partitionStates.put(partitionId, partitionState);

View Full Code Here

        new TrackerClient(uri2, getDefaultPartitionData(0.8d), new TestLoadBalancerClient(uri2), clock2, null);


    clients.add(client1);
    clients.add(client2);


    DegraderControl dcClient2Default = client2.getDegraderControl(DEFAULT_PARTITION_ID);
    dcClient2Default.setOverrideMinCallCount(1);
    dcClient2Default.setMinCallCount(1);
    dcClient2Default.setMaxDropRate(1d);
    dcClient2Default.setUpStep(0.4d);
    dcClient2Default.setHighErrorRate(0);
    CallCompletion cc = client2.getCallTracker().startCall();
    clock2.addMs(1);
    cc.endCallWithError();


    clock1.addMs(15000);
    clock2.addMs(5000);


    System.err.println(dcClient2Default.getCurrentComputedDropRate());
    System.err.println(client1.getDegraderControl(DEFAULT_PARTITION_ID).getCurrentComputedDropRate());


    // trigger a state update
    assertNotNull(getTrackerClient(strategy, null, new RequestContext(), 1, clients));

View Full Code Here

              new TrackerClient(uri1, getDefaultPartitionData(1d), new TestLoadBalancerClient(uri1), clock, null);


      clients.add(client1);


      // force client1 to be disabled
      DegraderControl dcClient1Default = client1.getDegraderControl(DEFAULT_PARTITION_ID);
      dcClient1Default.setOverrideMinCallCount(5);
      dcClient1Default.setMinCallCount(5);
      dcClient1Default.setMaxDropRate(1d);
      dcClient1Default.setUpStep(1.0d);


      List<CallCompletion> ccList = new ArrayList<CallCompletion>();
      CallCompletion cc;
      for (int j = 0; j < NUM_CHECKS; j++)


      {
        cc = client1.getCallTracker().startCall();


        ccList.add(cc);
      }


      // add high latency and errors to shut off traffic to this tracker client.
      clock.addMs(3500);


      for (Iterator<CallCompletion> iter = ccList.listIterator(); iter.hasNext();)
      {
        cc = iter.next();
        cc.endCallWithError();
        iter.remove();
      }


      // go to next time interval.
      clock.addMs(TIME_INTERVAL);


      Assert.assertEquals(dcClient1Default.getCurrentComputedDropRate(), 1.0);


      // trigger a state update
      TrackerClient resultTC = getTrackerClient(strategy, request, new RequestContext(), 1, clients);


      // now we mimic the high latency and force the state to drop all calls so to make

View Full Code Here

            new TrackerClient(uri1, getDefaultPartitionData(1d), new TestLoadBalancerClient(uri1), clock, null);


    clients.add(client1);


    // force client1 to be disabled
    DegraderControl dcClient1Default = client1.getDegraderControl(DEFAULT_PARTITION_ID);
    dcClient1Default.setOverrideMinCallCount(5);
    dcClient1Default.setMinCallCount(5);
    dcClient1Default.setMaxDropRate(1d);
    dcClient1Default.setUpStep(1.0d);


    List<CallCompletion> ccList = new ArrayList<CallCompletion>();
    CallCompletion cc;
    for (int j = 0; j < NUM_CHECKS; j++)


    {
      cc = client1.getCallTracker().startCall();


      ccList.add(cc);
    }


    // add high latency and errors to shut off traffic to this tracker client.
    // note: the default values for highError and lowError in the degrader are 1.1,
    // which means we don't use errorRates when deciding when to lb/degrade.
    // In addition, because we changed to use the
    clock.addMs(3500);
    //for (int j = 0; j < NUM_CHECKS; j++)
    for (Iterator<CallCompletion> iter = ccList.listIterator(); iter.hasNext();)
    {
      cc = iter.next();
      cc.endCallWithError();
      iter.remove();
    }


    // go to next time interval.
    clock.addMs(TIME_INTERVAL);


    Assert.assertEquals(dcClient1Default.getCurrentComputedDropRate(), 1.0);


    // trigger a state update
    TrackerClient resultTC = getTrackerClient(strategy, request, new RequestContext(), 1, clients);
    if (config.getInitialRecoveryLevel() < 0.01)
    {
      //the returned TrackerClient should be null
      assertNull(resultTC,"expected null trackerclient");


      // In the next time interval, the load balancer should reintroduce the TC
      // back into the ring because there was an entire time interval where no calls went to this
      // tracker client, so it's time to try it out. We need to enter this code at least once.
      do
      {
        // go to next time interval.
        clock.addMs(TIME_INTERVAL);
        // try adjusting the hash ring on this updateState
        if (strategyV3 != null)
        {
          strategy.setStrategyV3(DEFAULT_PARTITION_ID, strategyV3);
        }
        else if (strategyV2 != null)
        {
          strategy.setStrategyV2(strategyV2);
        }
        else
        {
          fail("should set strategy (either LoadBalance or Degrader");
        }
        resultTC = getTrackerClient(strategy, request, new RequestContext(), 1, clients);
        localStepsToFullRecovery--;
      }
      while (localStepsToFullRecovery > 0);
    }
    assertNotNull(resultTC,"expected non-null trackerclient");


    // make calls to the tracker client to verify that it's on the road to healthy status.
    for (int j = 0; j < NUM_CHECKS; j++)
    {
      cc = resultTC.getCallTracker().startCall();
      ccList.add(cc);
    }


    clock.addMs(10);


    for (Iterator<CallCompletion> iter = ccList.listIterator(); iter.hasNext();)
    {
      cc = iter.next();
      cc.endCall();
      iter.remove();
    }


    // go to next time interval.
    clock.addMs(TIME_INTERVAL);


    Assert.assertTrue(dcClient1Default.getCurrentComputedDropRate() < 1d);


    resultTC = getTrackerClient(strategy, request, new RequestContext(), 1, clients);
    assertNotNull(resultTC,"expected non-null trackerclient");
  }

View Full Code Here

    TrackerClient client1 =
            new TrackerClient(uri1, getDefaultPartitionData(1d), new TestLoadBalancerClient(uri1), clock, null);


    clients.add(client1);


    DegraderControl dcClient1Default = client1.getDegraderControl(DEFAULT_PARTITION_ID);
    dcClient1Default.setOverrideMinCallCount(5);
    dcClient1Default.setMinCallCount(5);


    List<CallCompletion> ccList = new ArrayList<CallCompletion>();
    CallCompletion cc;


    TrackerClient resultTC = getTrackerClient(strategy, request, new RequestContext(), 1, clients);
    // The override drop rate should be zero at this point.
    assertEquals(dcClient1Default.getOverrideDropRate(),0.0);


    // make high latency calls to the tracker client, verify the override drop rate doesn't change
    for (int j = 0; j < NUM_CHECKS; j++)
    {
      cc = client1.getCallTracker().startCall();
      ccList.add(cc);
    }


    clock.addMs((long)highWaterMark);


    for (Iterator<CallCompletion> iter = ccList.listIterator(); iter.hasNext();)
    {
      cc = iter.next();
      cc.endCall();
      iter.remove();
    }


    // go to next time interval.
    clock.addMs(timeInterval);


    // try call dropping on the next updateState
    strategy.setStrategy(DEFAULT_PARTITION_ID,
        DegraderLoadBalancerStrategyV3.PartitionDegraderLoadBalancerState.Strategy.CALL_DROPPING);
    resultTC = getTrackerClient(strategy, request, new RequestContext(), 1, clients);
    // we now expect that the override drop rate stepped up because updateState
    // made that decision.
    assertEquals(dcClient1Default.getOverrideDropRate(), globalStepUp);


    // make mid latency calls to the tracker client, verify the override drop rate doesn't change
    for (int j = 0; j < NUM_CHECKS; j++)
    {
      // need to use client1 because the resultTC may be null
      cc = client1.getCallTracker().startCall();
      ccList.add(cc);
    }


    clock.addMs((long)highWaterMark - 1);


    for (Iterator<CallCompletion> iter = ccList.listIterator(); iter.hasNext();)
    {
      cc = iter.next();
      cc.endCall();
      iter.remove();
    }


    // go to next time interval.
    clock.addMs(timeInterval);


    double previousOverrideDropRate = dcClient1Default.getOverrideDropRate();


    // try call dropping on the next updateState
    strategy.setStrategy(DEFAULT_PARTITION_ID,
        DegraderLoadBalancerStrategyV3.PartitionDegraderLoadBalancerState.Strategy.CALL_DROPPING);
    resultTC = getTrackerClient(strategy, request, new RequestContext(), 1, clients);
    assertEquals(dcClient1Default.getOverrideDropRate(), previousOverrideDropRate );


    // make low latency calls to the tracker client, verify the override drop rate decreases
    for (int j = 0; j < NUM_CHECKS; j++)
    {
      cc = client1.getCallTracker().startCall();

View Full Code Here


    clients.add(client1);
    clients.add(client2);


    // force client1 to be disabled if we encounter errors/high latency
    DegraderControl dcClient1Default = client1.getDegraderControl(DEFAULT_PARTITION_ID);
    dcClient1Default.setMinCallCount(5);
    dcClient1Default.setOverrideMinCallCount(5);
    dcClient1Default.setUpStep(1.0);
    // force client2 to be disabled if we encounter errors/high latency
    DegraderControl dcClient2Default = client2.getDegraderControl(DEFAULT_PARTITION_ID);
    dcClient2Default.setOverrideMinCallCount(5);
    dcClient2Default.setMinCallCount(5);
    dcClient2Default.setUpStep(0.4);


    // Have one cycle of successful calls to verify valid tracker clients returned.
    // try load balancing on this updateState, need to updateState before forcing the strategy.
    TrackerClient resultTC = getTrackerClient(strategy, request, new RequestContext(), 1, clients);
    strategy.setStrategy(DEFAULT_PARTITION_ID,
        DegraderLoadBalancerStrategyV3.PartitionDegraderLoadBalancerState.Strategy.LOAD_BALANCE);
    resultTC = getTrackerClient(strategy, request, new RequestContext(), 1, clients);
    assertNotNull(resultTC, "expected non-null trackerclient");
    for (int j = 0; j < NUM_CHECKS; j++)
    {
      ccList.add(client1.getCallTracker().startCall());
      ccList.add(client2.getCallTracker().startCall());
    }


    clock.addMs(1);
    for (Iterator<CallCompletion> iter = ccList.listIterator(); iter.hasNext();)
    {
      cc = iter.next();
      cc.endCall();
    }


    // bump to next interval, and get stats.
    clock.addMs(5000);


    // try Load balancing on this updateState
    strategy.setStrategy(DEFAULT_PARTITION_ID,
        DegraderLoadBalancerStrategyV3.PartitionDegraderLoadBalancerState.Strategy.LOAD_BALANCE);
    resultTC = getTrackerClient(strategy, request, new RequestContext(), 1, clients);
    assertNotNull(resultTC,"expected non-null trackerclient");


    Assert.assertEquals(dcClient1Default.getCurrentComputedDropRate(), 0.0);
    Assert.assertEquals(dcClient2Default.getCurrentComputedDropRate(), 0.0);


    // now simulate a bad cluster state with high error and high latency
    for (int j = 0; j < NUM_CHECKS; j++)
    {
      ccList.add(client1.getCallTracker().startCall());
      ccList.add(client2.getCallTracker().startCall());
    }


    clock.addMs(3500);
    for (Iterator<CallCompletion> iter = ccList.listIterator(); iter.hasNext();)
    {
      cc = iter.next();
      cc.endCallWithError();
    }


    // go to next interval
    clock.addMs(5000);


    Assert.assertEquals(dcClient1Default.getCurrentComputedDropRate(), 1.0);
    Assert.assertEquals(dcClient2Default.getCurrentComputedDropRate(), 0.4);


    // trigger a state update, the returned TrackerClient should be client2
    // because client 1 should have gone up to a 1.0 drop rate, and the cluster should
    // be unhealthy
    strategy.setStrategy(DEFAULT_PARTITION_ID,
        DegraderLoadBalancerStrategyV3.PartitionDegraderLoadBalancerState.Strategy.LOAD_BALANCE);
    resultTC = getTrackerClient(strategy, request, new RequestContext(), 1, clients);
    assertEquals(resultTC, client2);




    // Simulate several time cycles without any calls. The ring recovery mechanism should bump
    // client1 up to full weight in an attempt to route some calls to it. Client2 will stay at
    // it's current drop rate.
    do
    {
      // go to next time interval.
      clock.addMs(TIME_INTERVAL);
      // adjust the hash ring this time.
      strategy.setStrategy(DEFAULT_PARTITION_ID,
          DegraderLoadBalancerStrategyV3.PartitionDegraderLoadBalancerState.Strategy.LOAD_BALANCE);
      resultTC = getTrackerClient(strategy, request, new RequestContext(), 1, clients);
      localStepsToFullRecovery--;
    }
    while (localStepsToFullRecovery > 0);
    assertNotNull(resultTC,"expected non-null trackerclient");


    assertTrue(strategy.getState().getPartitionState(DEFAULT_PARTITION_ID).getPointsMap().get(client1.getUri()) ==
                       client1.getPartitionWeight(DEFAULT_PARTITION_ID) * config.getPointsPerWeight(),
               "client1 did not recover to full weight in hash map.");
    Assert.assertEquals(dcClient2Default.getCurrentComputedDropRate(), 0.4,
                        "client2 drop rate not as expected");




    cc = client1.getCallTracker().startCall();
    clock.addMs(10);

View Full Code Here

            new TrackerClient(uri1, getDefaultPartitionData(1d), new TestLoadBalancerClient(uri1), clock, null);


    clients.add(client1);


    // force client1 to be disabled if we encounter errors/high latency
    DegraderControl dcClient1Default = client1.getDegraderControl(DEFAULT_PARTITION_ID);
    dcClient1Default.setOverrideMinCallCount(5);
    dcClient1Default.setMinCallCount(5);
    dcClient1Default.setUpStep(1.0);
    dcClient1Default.setHighErrorRate(0);


    // Issue high latency calls to reduce client1 to the minimum number of hash points allowed.
    // (1 in this case)
    TrackerClient resultTC = getTrackerClient(strategy, request, new RequestContext(), 1, clients);
    assertNotNull(resultTC, "expected non-null trackerclient");
    for (int j = 0; j < NUM_CHECKS; j++)
    {
      cc = resultTC.getCallTracker().startCall();


      ccList.add(cc);
    }


    clock.addMs(3500);
    for (int j = 0; j < NUM_CHECKS; j++)
    {
      cc = ccList.get(j);
      cc.endCall();
    }
    // bump to next interval, and get stats.
    clock.addMs(5000);


    // because we want to test out the adjusted min drop rate, force the hash ring adjustment now.
    strategy.setStrategy(DEFAULT_PARTITION_ID,
        DegraderLoadBalancerStrategyV3.PartitionDegraderLoadBalancerState.Strategy.LOAD_BALANCE);
    resultTC = getTrackerClient(strategy, request, new RequestContext(), 1, clients);
    // client1 should be reduced to 1 hash point, but since it is the only TC, it should be the
    // TC returned.
    assertEquals(resultTC, client1, "expected non-null trackerclient");


    assertEquals((int)(strategy.getState().getPartitionState(DEFAULT_PARTITION_ID).getPointsMap().get(client1.getUri())), 1,
                 "expected client1 to have only 1 point in hash map");


    // make low latency call, we expect the computedDropRate to be adjusted because the minimum
    // call count was also scaled down.
    cc = client1.getCallTracker().startCall();
    clock.addMs(10);
    cc.endCall();
    clock.addMs(TIME_INTERVAL);


    Assert.assertTrue(dcClient1Default.getCurrentComputedDropRate() < 1.0,
                      "client1 drop rate not less than 1.");
  }

View Full Code Here

    PartitionDegraderLoadBalancerState newState;


    for (TrackerClientUpdater clientUpdater : trackerClientUpdaters)
    {
      TrackerClient client = clientUpdater.getTrackerClient();
      DegraderControl degraderControl = client.getDegraderControl(partitionId);
      double averageLatency = degraderControl.getLatency();
      long callCount = degraderControl.getCallCount();
      oldState.getPreviousMaxDropRate().put(client, clientUpdater.getMaxDropRate());
      double clientWeight =  client.getPartitionWeight(partitionId);


      sumOfClusterLatencies += averageLatency * callCount;
      totalClusterCallCount += callCount;
      clientDropRate = degraderControl.getCurrentComputedDropRate();
      computedClusterDropSum += clientWeight * clientDropRate;


      computedClusterWeight += clientWeight;


      boolean recoveryMapContainsClient = newRecoveryMap.containsKey(client);


      // The following block of code calculates and updates the maxDropRate if the client had been
      // fully degraded in the past and has not received any requests since being fully degraded.
      // To increase the chances of the client receiving a request, we change the maxDropRate, which
      // influences the maximum value of computedDropRate, which is used to compute the number of
      // points in the hash ring for the clients.
      if (callCount == 0)
      {
        // if this client is enrolled in the program, decrease the maxDropRate
        // it is important to note that this excludes clients that haven't gotten traffic
        // due solely to low volume.
        if (recoveryMapContainsClient)
        {
          double oldMaxDropRate = clientUpdater.getMaxDropRate();
          double transmissionRate = 1.0 - oldMaxDropRate;
          if( transmissionRate <= 0.0)
          {
            // We use the initialRecoveryLevel to indicate how many points to initially set
            // the tracker client to when traffic has stopped flowing to this node.
            transmissionRate = initialRecoveryLevel;
          }
          else
          {
            transmissionRate *= ringRampFactor;
            transmissionRate = Math.min(transmissionRate, 1.0);
          }
          newMaxDropRate = 1.0 - transmissionRate;


          if (strategy == PartitionDegraderLoadBalancerState.Strategy.LOAD_BALANCE)
          {
            // if it's the hash ring's turn to adjust, then adjust the maxDropRate.
            // Otherwise, we let the call dropping strategy take it's turn, even if
            // it may do nothing.
            clientUpdater.setMaxDropRate(newMaxDropRate);
          }
          recoveryMapChanges = true;
        }
      }
      else if(recoveryMapContainsClient)
      {
        // else if the recovery map contains the client and the call count was > 0


        // tough love here, once the rehab clients start taking traffic, we
        // restore their maxDropRate to it's original value, and unenroll them
        // from the program.
        // This is safe because the hash ring points are controlled by the
        // computedDropRate variable, and the call dropping rate is controlled by
        // the overrideDropRate. The maxDropRate only serves to cap the computedDropRate and
        // overrideDropRate.
        // We store the maxDropRate and restore it here because the initialRecoveryLevel could
        // potentially be higher than what the default maxDropRate allowed. (the maxDropRate doesn't
        // necessarily have to be 1.0). For instance, if the maxDropRate was 0.99, and the
        // initialRecoveryLevel was 0.05  then we need to store the old maxDropRate.
        clientUpdater.setMaxDropRate(newRecoveryMap.get(client));
        newRecoveryMap.remove(client);
        recoveryMapChanges = true;
      }
    }


    computedClusterDropRate = computedClusterDropSum / computedClusterWeight;
    debug(_log, "total cluster call count: ", totalClusterCallCount);
    debug(_log,
          "computed cluster drop rate for ",
          trackerClientUpdaters.size(),
          " nodes: ",
          computedClusterDropRate);


    if (oldState.getClusterGenerationId() == clusterGenerationId
        && totalClusterCallCount <= 0 && !recoveryMapChanges)
    {
      // if the cluster has not been called recently (total cluster call count is <= 0)
      // and we already have a state with the same set of URIs (same cluster generation),
      // and no clients are in rehab, then don't change anything.
      debug(_log, "New state is the same as the old state so we're not changing anything. Old state = ", oldState
          ,", config= ", config);
      return new PartitionDegraderLoadBalancerState(oldState, clusterGenerationId,
                                                    config.getClock().currentTimeMillis());
    }


    // update our overrides.
    double newCurrentAvgClusterLatency = -1;


    if(totalClusterCallCount > 0)
    {
      newCurrentAvgClusterLatency = sumOfClusterLatencies / totalClusterCallCount;
    }


    debug(_log, "average cluster latency: ", newCurrentAvgClusterLatency);


    // compute points for every node in the cluster
    double computedClusterSuccessRate = computedClusterWeight - computedClusterDropRate;


    // This points map stores how many hash map points to allocate for each tracker client.


    Map<URI, Integer> points = new HashMap<URI, Integer>();
    Map<URI, Integer> oldPointsMap = oldState.getPointsMap();


    for (TrackerClientUpdater clientUpdater : trackerClientUpdaters)
    {
      TrackerClient client = clientUpdater.getTrackerClient();
      double successfulTransmissionWeight;
      URI clientUri = client.getUri();


      // Don't take into account cluster health when calculating the number of points
      // for each client. This is because the individual clients already take into account
      // latency and errors, and a successfulTransmissionWeight can and should be made
      // independent of other nodes in the cluster. Otherwise, one unhealthy client in a small
      // cluster can take down the entire cluster if the avg latency is too high.
      // The global drop rate will take into account the cluster latency. High cluster-wide error
      // rates are not something d2 can address.
      //
      // this client's maxDropRate and currentComputedDropRate may have been adjusted if it's in the
      // rehab program (to gradually send traffic it's way).
      DegraderControl degraderControl = client.getDegraderControl(partitionId);
      double dropRate = Math.min(degraderControl.getCurrentComputedDropRate(),
                                 clientUpdater.getMaxDropRate());


      // calculate the weight as the probability of successful transmission to this
      // node divided by the probability of successful transmission to the entire
      // cluster

View Full Code Here

0 1

TOP

Related Classes of com.linkedin.util.degrader.DegraderControl

com.linkedin.d2.balancer.clients.TrackerClient

com.linkedin.d2.balancer.strategies.degrader.DegraderLoadBalancerStrategyV3

com.linkedin.d2.balancer.strategies.degrader.DegraderLoadBalancerTest

com.linkedin.d2.balancer.strategies.degrader.TrackerClientUpdater

java.util.Date

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.