Skip to content

Commit ec0d5df

Browse files
committed
Improving output and adjusting failure threshold for benchmarks
1 parent 6fcd542 commit ec0d5df

File tree

1 file changed

+27
-15
lines changed

1 file changed

+27
-15
lines changed

.github/workflows/benchmark-pr.yml

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,8 @@ jobs:
7878
import sys
7979
import os
8080
81-
THRESHOLD = 20.0 # 20% regression threshold
81+
THRESHOLD = 40.0 # 40% regression threshold
82+
MIN_REGRESSIONS = 5 # Minimum number of regressions to fail
8283
8384
def parse_benchmarks(file_path):
8485
"""Parse concatenated BenchmarkDotNet JSON files."""
@@ -154,16 +155,22 @@ jobs:
154155
print(f"Improvements (<-{THRESHOLD}%): {len(improvements)}")
155156
156157
if regressions:
157-
print("\n::error::Performance regressions detected!")
158+
print(f"\nPerformance regressions detected:")
158159
for r in sorted(regressions, key=lambda x: -x['change']):
159160
print(f" - {r['name']}: +{r['change']:.1f}% slower ({r['baseline']:.2f}ns -> {r['current']:.2f}ns)")
160161
162+
# Only fail if we have at least MIN_REGRESSIONS benchmarks regressing
163+
if len(regressions) >= MIN_REGRESSIONS:
164+
print(f"\n::error::{len(regressions)} benchmarks regressed >{THRESHOLD}% (threshold: {MIN_REGRESSIONS})")
161165
with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
162166
f.write("has_regressions=true\n")
163167
f.write(f"regression_count={len(regressions)}\n")
164168
sys.exit(1)
165169
else:
166-
print("\nNo significant performance regressions detected.")
170+
if regressions:
171+
print(f"\n{len(regressions)} regression(s) detected but below threshold of {MIN_REGRESSIONS} required to fail.")
172+
else:
173+
print("\nNo significant performance regressions detected.")
167174
with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
168175
f.write("has_regressions=false\n")
169176
EOF
@@ -183,12 +190,19 @@ jobs:
183190
// Sort by change percentage (worst regressions first)
184191
results.sort((a, b) => b.change - a.change);
185192
186-
const regressions = results.filter(r => r.change > 20);
187-
const warnings = results.filter(r => r.change > 10 && r.change <= 20);
188-
const improvements = results.filter(r => r.change < -10);
193+
const regressions = results.filter(r => r.change > 40);
194+
const warnings = results.filter(r => r.change > 20 && r.change <= 40);
195+
const improvements = results.filter(r => r.change < -20);
196+
197+
if (regressions.length >= 5) {
198+
body += `:x: **${regressions.length} benchmarks regressed >40% (PR blocked)**\n\n`;
199+
} else if (regressions.length > 0) {
200+
body += `:warning: **${regressions.length} benchmark(s) regressed >40% (below threshold of 5 to block)**\n\n`;
201+
} else {
202+
body += ':white_check_mark: **No significant performance regressions detected**\n\n';
203+
}
189204
190205
if (regressions.length > 0) {
191-
body += ':x: **Performance regressions detected (>20%)**\n\n';
192206
body += '| Benchmark | Baseline | Current | Change |\n';
193207
body += '|-----------|----------|---------|--------|\n';
194208
for (const r of regressions.slice(0, 10)) {
@@ -198,27 +212,25 @@ jobs:
198212
if (regressions.length > 10) {
199213
body += `\n*...and ${regressions.length - 10} more regressions*\n`;
200214
}
201-
} else {
202-
body += ':white_check_mark: **No significant performance regressions detected**\n\n';
203215
}
204216
205217
if (warnings.length > 0) {
206-
body += `\n### :warning: Minor regressions (10-20%)\n`;
218+
body += `\n### :warning: Minor performance degradation (20-40%)\n`;
207219
body += `${warnings.length} benchmarks showed minor slowdown\n`;
208220
}
209221
210222
if (improvements.length > 0) {
211223
body += `\n### :rocket: Improvements\n`;
212-
body += `${improvements.length} benchmarks showed improvement (>10% faster)\n`;
224+
body += `${improvements.length} benchmarks showed improvement (>20% faster)\n`;
213225
}
214226
215227
body += `\n<details><summary>All results (${results.length} benchmarks)</summary>\n\n`;
216-
body += '| Benchmark | Change |\n|-----------|--------|\n';
228+
body += '| Benchmark | Baseline | Current | Change |\n|-----------|----------|---------|--------|\n';
217229
for (const r of results) {
218230
const shortName = r.name.split('.').slice(-2).join('.');
219-
const emoji = r.change > 20 ? ':x:' : r.change > 10 ? ':warning:' : r.change < -10 ? ':rocket:' : ':white_check_mark:';
231+
const emoji = r.change > 40 ? ':x:' : r.change > 20 ? ':warning:' : r.change < -20 ? ':rocket:' : ':white_check_mark:';
220232
const sign = r.change > 0 ? '+' : '';
221-
body += `| ${shortName} | ${emoji} ${sign}${r.change.toFixed(1)}% |\n`;
233+
body += `| ${shortName} | ${r.baseline.toFixed(2)}ns | ${r.current.toFixed(2)}ns | ${emoji} ${sign}${r.change.toFixed(1)}% |\n`;
222234
}
223235
body += '</details>\n';
224236
@@ -267,5 +279,5 @@ jobs:
267279
- name: Fail if regressions detected
268280
if: steps.compare.outputs.has_regressions == 'true'
269281
run: |
270-
echo "::error::PR blocked due to performance regressions exceeding 20% threshold"
282+
echo "::error::PR blocked: 5+ benchmarks regressed more than 40%"
271283
exit 1

0 commit comments

Comments
 (0)