diff --git a/concurrency-limits-core/src/main/java/com/netflix/concurrency/limits/limit/Gradient2Limit.java b/concurrency-limits-core/src/main/java/com/netflix/concurrency/limits/limit/Gradient2Limit.java index 11ec7d57..f3b53e9b 100644 --- a/concurrency-limits-core/src/main/java/com/netflix/concurrency/limits/limit/Gradient2Limit.java +++ b/concurrency-limits-core/src/main/java/com/netflix/concurrency/limits/limit/Gradient2Limit.java @@ -39,11 +39,14 @@ * applied to the base RTT so that the value is kept stable yet is allowed to adapt to long term changes in latency * characteristics. * - * The core algorithm re-calculates the limit every sampling window (ex. 1 second) using the formula + * The core algorithm re-calculates the limit on each request * * // Calculate the gradient limiting to the range [0.5, 1.0] to filter outliers * gradient = max(0.5, min(1.0, longtermRtt / currentRtt)); * + * // If gradient is 1.0 and in-flight requests are less than half of the currentLimit, skip + * // adjusting the limit. + * * // Calculate the new limit by applying the gradient and allowing for some queuing * newLimit = gradient * currentLimit + queueSize; * @@ -60,7 +63,7 @@ * 2. Transition from steady state to load * * In this state either the RPS to latency has spiked. The gradient is {@literal <} 1.0 due to a growing request queue that - * cannot be handled by the system. Excessive requests and rejected due to the low limit. The baseline RTT grows using + * cannot be handled by the system. Excessive requests are rejected due to the low limit. The baseline RTT grows using * exponential decay but lags the current measurement, which keeps the gradient {@literal <} 1.0 and limit low. * * 3. Transition from load to steady state @@ -258,15 +261,12 @@ private Gradient2Limit(Builder builder) { @Override public int _update(final long startTime, final long rtt, final int inflight, final boolean didDrop) { - final double queueSize = this.queueSize.apply((int)this.estimatedLimit); - this.lastRtt = rtt; final double shortRtt = (double)rtt; final double longRtt = this.longRtt.add(rtt).doubleValue(); shortRttSampleListener.addSample(shortRtt); longRttSampleListener.addSample(longRtt); - queueSizeSampleListener.addSample(queueSize); // If the long RTT is substantially larger than the short RTT then reduce the long RTT measurement. // This can happen when latency returns to normal after a prolonged prior of excessive load. Reducing the @@ -275,16 +275,24 @@ public int _update(final long startTime, final long rtt, final int inflight, fin this.longRtt.update(current -> current.doubleValue() * 0.95); } - // Don't grow the limit if we are app limited - if (inflight < estimatedLimit / 2) { - return (int) estimatedLimit; + // Rtt could be higher than longRtt because of smoothing longRtt updates + // so set to 1.0 to indicate no queuing. Otherwise calculate the slope and don't + // allow it to be reduced by more than half to avoid aggressive load-shedding due to + // outliers. If a request is dropped, choose lowest gradient value to initiate + // load-shedding. + final double gradient = didDrop ? 0.5 + : Math.max(0.5, Math.min(1.0, tolerance * longRtt / shortRtt)); + + double queueSize = 0; + if (gradient == 1.0) { + if (inflight < estimatedLimit / 2) { + // Don't grow the limit if not necessary + return (int) estimatedLimit; + } + queueSize = this.queueSize.apply((int)this.estimatedLimit); + queueSizeSampleListener.addSample(queueSize); } - // Rtt could be higher than rtt_noload because of smoothing rtt noload updates - // so set to 1.0 to indicate no queuing. Otherwise calculate the slope and don't - // allow it to be reduced by more than half to avoid aggressive load-shedding due to - // outliers. - final double gradient = Math.max(0.5, Math.min(1.0, tolerance * longRtt / shortRtt)); double newLimit = estimatedLimit * gradient + queueSize; newLimit = estimatedLimit * (1 - smoothing) + newLimit * smoothing; newLimit = Math.max(minLimit, Math.min(maxLimit, newLimit));