Skip to content

Commit 2b31f08

Browse files
iduartgomezclaude
andauthored
ci: add transport performance regression detection (#2262)
Co-authored-by: Claude <noreply@anthropic.com>
1 parent 2a67e72 commit 2b31f08

File tree

1 file changed

+206
-0
lines changed

1 file changed

+206
-0
lines changed

.github/workflows/benchmarks.yml

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
name: Performance Benchmarks
2+
3+
on:
4+
push:
5+
branches: [main]
6+
paths:
7+
- 'crates/core/src/transport/**'
8+
- 'crates/core/benches/**'
9+
- '.github/workflows/benchmarks.yml'
10+
pull_request:
11+
paths:
12+
- 'crates/core/src/transport/**'
13+
- 'crates/core/benches/**'
14+
- '.github/workflows/benchmarks.yml'
15+
# Allow manual trigger
16+
workflow_dispatch:
17+
inputs:
18+
benchmark_level:
19+
description: 'Benchmark level to run (level0, level1, all)'
20+
required: false
21+
default: 'level0'
22+
23+
# Cancel in-progress runs when a new commit is pushed
24+
concurrency:
25+
group: benchmarks-${{ github.ref }}
26+
cancel-in-progress: true
27+
28+
jobs:
29+
benchmark:
30+
name: Performance Benchmarks
31+
runs-on: self-hosted
32+
# Don't fail the whole workflow if benchmarks detect regressions
33+
continue-on-error: true
34+
35+
env:
36+
CARGO_TARGET_DIR: ${{ github.workspace }}/target
37+
# Reduce noise from logging during benchmarks
38+
FREENET_LOG: error
39+
40+
steps:
41+
- uses: actions/checkout@v6
42+
43+
- uses: dtolnay/rust-toolchain@stable
44+
with:
45+
toolchain: stable
46+
47+
- uses: Swatinem/rust-cache@v2
48+
with:
49+
# Cache benchmarks separately from test builds
50+
prefix-key: bench
51+
save-if: ${{ github.ref == 'refs/heads/main' }}
52+
53+
# Download baseline from main branch for comparison
54+
# PRs compare against main's baseline; main branch updates the baseline
55+
# Note: Cache keys include SHA so each main commit creates a new baseline
56+
# restore-keys finds the most recent baseline from main
57+
- name: Download main branch baseline
58+
id: baseline-cache
59+
uses: actions/cache/restore@v4
60+
with:
61+
path: target/criterion
62+
# Try to find an exact match first (won't happen for PRs)
63+
key: criterion-baseline-main-${{ runner.os }}-${{ github.sha }}
64+
# Fall back to most recent main branch baseline
65+
restore-keys: |
66+
criterion-baseline-main-${{ runner.os }}-
67+
68+
- name: Report baseline status
69+
run: |
70+
if [ "${{ steps.baseline-cache.outputs.cache-hit }}" == "true" ]; then
71+
echo "✅ Loaded exact baseline match" >> $GITHUB_STEP_SUMMARY
72+
elif [ -d "target/criterion" ]; then
73+
echo "✅ Loaded baseline from main branch (via restore-keys)" >> $GITHUB_STEP_SUMMARY
74+
else
75+
echo "⚠️ No baseline found - this run will establish the baseline" >> $GITHUB_STEP_SUMMARY
76+
fi
77+
78+
# Run Level 0 benchmarks (pure logic, deterministic)
79+
- name: Run Level 0 Benchmarks (Pure Logic)
80+
id: bench_level0
81+
run: |
82+
echo "## Level 0: Pure Logic Benchmarks" >> $GITHUB_STEP_SUMMARY
83+
echo "" >> $GITHUB_STEP_SUMMARY
84+
echo "These benchmarks measure pure computation without I/O:" >> $GITHUB_STEP_SUMMARY
85+
echo "- AES-GCM encryption/decryption" >> $GITHUB_STEP_SUMMARY
86+
echo "- Serialization" >> $GITHUB_STEP_SUMMARY
87+
echo "- Nonce generation" >> $GITHUB_STEP_SUMMARY
88+
echo "" >> $GITHUB_STEP_SUMMARY
89+
90+
# Run benchmarks and capture output
91+
cargo bench --bench transport_perf -- level0 2>&1 | tee bench_output.txt
92+
93+
# Parse for regressions (criterion outputs "regressed" for performance decreases)
94+
if grep -q "regressed" bench_output.txt; then
95+
echo "regression_detected=true" >> $GITHUB_OUTPUT
96+
echo "### ⚠️ Performance Regressions Detected" >> $GITHUB_STEP_SUMMARY
97+
echo "" >> $GITHUB_STEP_SUMMARY
98+
echo '```' >> $GITHUB_STEP_SUMMARY
99+
grep -A2 "regressed" bench_output.txt >> $GITHUB_STEP_SUMMARY || true
100+
echo '```' >> $GITHUB_STEP_SUMMARY
101+
else
102+
echo "regression_detected=false" >> $GITHUB_OUTPUT
103+
echo "### ✅ No Regressions Detected" >> $GITHUB_STEP_SUMMARY
104+
fi
105+
106+
# Also capture any improvements
107+
if grep -q "improved" bench_output.txt; then
108+
echo "" >> $GITHUB_STEP_SUMMARY
109+
echo "### 🚀 Performance Improvements" >> $GITHUB_STEP_SUMMARY
110+
echo '```' >> $GITHUB_STEP_SUMMARY
111+
grep -A2 "improved" bench_output.txt >> $GITHUB_STEP_SUMMARY || true
112+
echo '```' >> $GITHUB_STEP_SUMMARY
113+
fi
114+
115+
# Run Level 1 benchmarks (mock I/O) - only on main or manual trigger
116+
- name: Run Level 1 Benchmarks (Mock I/O)
117+
if: github.ref == 'refs/heads/main' || github.event.inputs.benchmark_level == 'level1' || github.event.inputs.benchmark_level == 'all'
118+
run: |
119+
echo "" >> $GITHUB_STEP_SUMMARY
120+
echo "## Level 1: Mock I/O Benchmarks" >> $GITHUB_STEP_SUMMARY
121+
echo "" >> $GITHUB_STEP_SUMMARY
122+
echo "These benchmarks measure protocol logic with mock sockets:" >> $GITHUB_STEP_SUMMARY
123+
echo "- Channel throughput" >> $GITHUB_STEP_SUMMARY
124+
echo "- Packet routing" >> $GITHUB_STEP_SUMMARY
125+
echo "" >> $GITHUB_STEP_SUMMARY
126+
127+
cargo bench --bench transport_perf -- level1 2>&1 | tee -a bench_output.txt
128+
129+
if grep -q "regressed" bench_output.txt; then
130+
echo '```' >> $GITHUB_STEP_SUMMARY
131+
grep -A2 "regressed" bench_output.txt | tail -20 >> $GITHUB_STEP_SUMMARY || true
132+
echo '```' >> $GITHUB_STEP_SUMMARY
133+
fi
134+
135+
# Save baseline for future comparisons (only on main branch)
136+
# Each main branch commit creates a new baseline with SHA in the key
137+
# PRs use restore-keys to find the most recent one
138+
- name: Save Baseline to Main
139+
if: github.ref == 'refs/heads/main'
140+
uses: actions/cache/save@v4
141+
with:
142+
path: target/criterion
143+
# Include SHA so each main commit has its own baseline
144+
key: criterion-baseline-main-${{ runner.os }}-${{ github.sha }}
145+
146+
# Post comment on PR with regression summary
147+
- name: Comment on PR
148+
if: github.event_name == 'pull_request' && steps.bench_level0.outputs.regression_detected == 'true'
149+
uses: actions/github-script@v7
150+
with:
151+
script: |
152+
const fs = require('fs');
153+
const output = fs.readFileSync('bench_output.txt', 'utf8');
154+
155+
// Extract regression lines
156+
const regressions = output.split('\n')
157+
.filter(line => line.includes('regressed') || line.includes('change:'))
158+
.slice(0, 20) // Limit to 20 lines
159+
.join('\n');
160+
161+
const body = `## ⚠️ Performance Benchmark Regressions Detected
162+
163+
The following benchmarks show performance regression compared to the baseline:
164+
165+
\`\`\`
166+
${regressions || 'See workflow summary for details'}
167+
\`\`\`
168+
169+
> **Note:** This is informational only and does not block the PR. Please review if the regression is expected or needs investigation.
170+
171+
[View full benchmark results](${process.env.GITHUB_SERVER_URL}/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID})`;
172+
173+
github.rest.issues.createComment({
174+
issue_number: context.issue.number,
175+
owner: context.repo.owner,
176+
repo: context.repo.repo,
177+
body: body
178+
});
179+
180+
# Upload benchmark results as artifact
181+
- name: Upload Benchmark Results
182+
uses: actions/upload-artifact@v4
183+
with:
184+
name: benchmark-results
185+
path: |
186+
bench_output.txt
187+
target/criterion/**/report/index.html
188+
retention-days: 30
189+
190+
# Summary job that always succeeds (so PR can merge)
191+
benchmark-summary:
192+
name: Benchmark Summary
193+
runs-on: ubuntu-latest
194+
needs: benchmark
195+
# Always run, even if benchmark job "fails" (detected regression)
196+
if: always()
197+
198+
steps:
199+
- name: Check Benchmark Status
200+
run: |
201+
if [ "${{ needs.benchmark.result }}" == "failure" ]; then
202+
echo "⚠️ Benchmarks detected performance regressions, but this is non-blocking."
203+
echo "Please review the benchmark results in the workflow summary."
204+
else
205+
echo "✅ Benchmarks completed successfully."
206+
fi

0 commit comments

Comments
 (0)