Skip to content

Commit fd9529f

Browse files
authored
Stabilize performance test SLAs with tolerance thresholds (#16371)
* Stabilize performance test SLAs with tolerance thresholds Fixes #14793 This change adds tolerance thresholds to SLA checks that previously used strict equality for request counts/rates. Due to timing variations in test execution, actual counts can vary slightly from expected values, causing false SLA failures. Changes: - scale-from-zero SLA 3: Allow 1 request tolerance - dataplane-probe SLA 2: Allow 0.1% tolerance (min 1 request) - rollout-probe SLA 2: Allow 1% rate tolerance This approach follows the pattern already established in load-test. Signed-off-by: aviralgarg05 <gargaviral99@gmail.com> * Fix performance SLA edge cases * Fix style check failures --------- Signed-off-by: aviralgarg05 <gargaviral99@gmail.com>
1 parent c745b31 commit fd9529f

File tree

5 files changed

+125
-11
lines changed

5 files changed

+125
-11
lines changed

test/performance/benchmarks/dataplane-probe/main.go

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,11 +175,24 @@ func checkSLA(results *vegeta.Metrics, slaMin time.Duration, slaMax time.Duratio
175175
return fmt.Errorf("SLA 1 failed. P95 latency is not in %d-%dms time range: %s", slaMin, slaMax, results.Latencies.P95)
176176
}
177177

178-
// SLA 2: making sure the defined total request is met
179-
if results.Requests == uint64(rate.Rate(time.Second)*duration.Seconds()) {
180-
log.Printf("SLA 2 passed. vegeta total request is %d", results.Requests)
178+
// SLA 2: making sure the defined total request is met (within 0.1% threshold)
179+
expectedRequests := uint64(rate.Rate(time.Second) * duration.Seconds())
180+
threshold := expectedRequests / 1000 // 0.1% tolerance
181+
if threshold == 0 {
182+
threshold = 1 // Minimum tolerance of 1 request
183+
}
184+
185+
var difference uint64
186+
if results.Requests >= expectedRequests {
187+
difference = results.Requests - expectedRequests
188+
} else {
189+
difference = expectedRequests - results.Requests
190+
}
191+
192+
if difference <= threshold {
193+
log.Printf("SLA 2 passed. vegeta total request is %d, expected requests is %d (tolerance: %d)", results.Requests, expectedRequests, threshold)
181194
} else {
182-
return fmt.Errorf("SLA 2 failed. vegeta total request is %d, expected total request is %f", results.Requests, rate.Rate(time.Second)*duration.Seconds())
195+
return fmt.Errorf("SLA 2 failed. vegeta total request is %d, expected requests is %d (tolerance: %d)", results.Requests, expectedRequests, threshold)
183196
}
184197

185198
return nil
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/*
2+
Copyright 2026 The Knative Authors
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package main
18+
19+
import (
20+
"testing"
21+
"time"
22+
23+
vegeta "github.com/tsenart/vegeta/v12/lib"
24+
)
25+
26+
func TestCheckSLATotalRequestsTolerance(t *testing.T) {
27+
rate := vegeta.Rate{Freq: 1000, Per: time.Second}
28+
29+
tests := []struct {
30+
name string
31+
requests uint64
32+
wantErr bool
33+
}{
34+
{name: "within tolerance below expected", requests: 999},
35+
{name: "within tolerance above expected", requests: 1001},
36+
{name: "outside tolerance below expected", requests: 998, wantErr: true},
37+
{name: "outside tolerance above expected", requests: 1002, wantErr: true},
38+
}
39+
40+
for _, tt := range tests {
41+
t.Run(tt.name, func(t *testing.T) {
42+
results := &vegeta.Metrics{
43+
Requests: tt.requests,
44+
Latencies: vegeta.LatencyMetrics{
45+
P95: 10 * time.Millisecond,
46+
},
47+
}
48+
49+
err := checkSLA(results, 0, 20*time.Millisecond, rate, time.Second)
50+
if (err != nil) != tt.wantErr {
51+
t.Fatalf("checkSLA() error = %v, wantErr %v", err, tt.wantErr)
52+
}
53+
})
54+
}
55+
}

test/performance/benchmarks/rollout-probe/main.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -228,11 +228,13 @@ func checkSLA(results *vegeta.Metrics, rate vegeta.ConstantPacer) error {
228228
return fmt.Errorf("SLA 1 failed. P95 latency is not in 100-110ms time range: %s", results.Latencies.P95)
229229
}
230230

231-
// SLA 2: making sure the defined vegeta rates is met
232-
if math.Round(results.Rate) == rate.Rate(time.Second) {
233-
log.Printf("SLA 2 passed. vegeta rate is %f", rate.Rate(time.Second))
231+
// SLA 2: making sure the defined vegeta rates is met (within 1% tolerance)
232+
expectedRate := rate.Rate(time.Second)
233+
tolerance := expectedRate * 0.01 // 1% tolerance
234+
if math.Abs(results.Rate-expectedRate) <= tolerance {
235+
log.Printf("SLA 2 passed. vegeta rate is %f, expected rate is %f (tolerance: 1%%)", results.Rate, expectedRate)
234236
} else {
235-
return fmt.Errorf("SLA 2 failed. vegeta rate is %f, expected Rate is %f", results.Rate, rate.Rate(time.Second))
237+
return fmt.Errorf("SLA 2 failed. vegeta rate is %f, expected rate is %f (tolerance: 1%%)", results.Rate, expectedRate)
236238
}
237239

238240
return nil

test/performance/benchmarks/scale-from-zero/main.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -360,10 +360,16 @@ func checkSLA(results *vegeta.Metrics, p95min time.Duration, p95max time.Duratio
360360
}
361361

362362
// SLA 3: making sure the defined vegeta total requests is met, the defined vegeta total requests should equal to the count of ksvcs we want to run scale-from-zero in parallel
363-
if results.Requests == uint64(parallel) {
364-
log.Printf("SLA 3 passed. total requests is %d", results.Requests)
363+
// Allow a tolerance of 1 request to account for timing variations
364+
expectedRequests := uint64(parallel)
365+
lowerBound := expectedRequests
366+
if lowerBound > 0 {
367+
lowerBound--
368+
}
369+
if results.Requests >= lowerBound && results.Requests <= expectedRequests {
370+
log.Printf("SLA 3 passed. total requests is %d, expected requests is %d (tolerance: 1)", results.Requests, expectedRequests)
365371
} else {
366-
return fmt.Errorf("SLA 3 failed. total requests is %d, expected total requests is %d", results.Requests, uint64(parallel))
372+
return fmt.Errorf("SLA 3 failed. total requests is %d, expected requests is %d (tolerance: 1)", results.Requests, expectedRequests)
367373
}
368374

369375
return nil
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/*
2+
Copyright 2026 The Knative Authors
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package main
18+
19+
import (
20+
"testing"
21+
"time"
22+
23+
vegeta "github.com/tsenart/vegeta/v12/lib"
24+
)
25+
26+
func TestCheckSLAZeroParallelDoesNotUnderflow(t *testing.T) {
27+
results := &vegeta.Metrics{
28+
Requests: 0,
29+
Latencies: vegeta.LatencyMetrics{
30+
P95: 0,
31+
Max: 0,
32+
},
33+
}
34+
35+
if err := checkSLA(results, 0, time.Millisecond, time.Millisecond, 0); err != nil {
36+
t.Fatalf("checkSLA() error = %v, want nil", err)
37+
}
38+
}

0 commit comments

Comments
 (0)