diff --git a/crates/bashkit/benches/parallel_execution.rs b/crates/bashkit/benches/parallel_execution.rs index 7587a1b1d..b464afd61 100644 --- a/crates/bashkit/benches/parallel_execution.rs +++ b/crates/bashkit/benches/parallel_execution.rs @@ -14,8 +14,10 @@ use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_m use std::sync::Arc; use tokio::runtime::Runtime; -/// Number of parallel sessions to benchmark -const SESSION_COUNTS: &[usize] = &[10, 50, 100, 200]; +/// Number of parallel sessions to benchmark. +/// Goes up to 1000 to confirm large fan-outs stay healthy (no per-session +/// thread/process; sessions are heap objects + tokio tasks). +const SESSION_COUNTS: &[usize] = &[10, 50, 100, 200, 500, 1000]; /// Heavy workload: file creation, text processing with grep/awk/sed const HEAVY_SCRIPT: &str = r#" diff --git a/crates/bashkit/benches/results/criterion-parallel-vm-linux-x86_64-1782162173.md b/crates/bashkit/benches/results/criterion-parallel-vm-linux-x86_64-1782162173.md new file mode 100644 index 000000000..92ce7ae85 --- /dev/null +++ b/crates/bashkit/benches/results/criterion-parallel-vm-linux-x86_64-1782162173.md @@ -0,0 +1,67 @@ +# Criterion Parallel Execution Benchmark + +## System Information + +- **Moniker**: `vm-linux-x86_64` +- **Hostname**: vm +- **OS**: linux +- **Architecture**: x86_64 +- **CPUs**: 4 +- **Timestamp**: 1782162173 + +## Workload Comparison (50 sessions) + +| Benchmark | Time | +|-----------|------| +| workload_types/light_sequential | 2.9735 ms | +| workload_types/light_parallel | 982.07 µs | +| workload_types/medium_sequential | 14.484 ms | +| workload_types/medium_parallel | 4.1479 ms | +| workload_types/heavy_sequential | 47.101 ms | +| workload_types/heavy_parallel | 12.260 ms | + +## Parallel Scaling (medium workload) + +| Benchmark | Time | +|-----------|------| +| parallel_scaling/medium_seq/10 | 2.8015 ms | +| parallel_scaling/medium_par/10 | 1.0280 ms | +| parallel_scaling/shared_fs/10 | 661.04 µs | +| parallel_scaling/medium_seq/50 | 14.232 ms | +| parallel_scaling/medium_par/50 | 4.0529 ms | +| parallel_scaling/shared_fs/50 | 2.6310 ms | +| parallel_scaling/medium_seq/100 | 27.965 ms | +| parallel_scaling/medium_par/100 | 7.9751 ms | +| parallel_scaling/shared_fs/100 | 5.5717 ms | +| parallel_scaling/medium_seq/200 | 57.317 ms | +| parallel_scaling/medium_par/200 | 15.607 ms | +| parallel_scaling/shared_fs/200 | 14.397 ms | + +## Single Operations + +| Benchmark | Time | +|-----------|------| +| single_bash_new | 31.669 µs | +| single_echo | 38.924 µs | +| single_file_write_read | 60.190 µs | +| single_grep | 58.610 µs | +| single_awk | 62.195 µs | +| single_sed | 153.72 µs | +| single_light_script | 65.103 µs | +| single_medium_script | 297.61 µs | +| single_heavy_script | 943.32 µs | + +## Speedup Summary + +| Workload | Sequential | Parallel | Speedup | +|----------|-----------|----------|---------| +| light | 2.974 ms | 0.982 ms | **3.03x** | +| medium | 14.484 ms | 4.148 ms | **3.49x** | +| heavy | 47.101 ms | 12.260 ms | **3.84x** | + +| Sessions | Sequential | Parallel | Shared FS | Par Speedup | +|----------|-----------|----------|-----------|-------------| +| 10 | 2.801 ms | 1.028 ms | 0.661 ms | **2.73x** | +| 50 | 14.232 ms | 4.053 ms | 2.631 ms | **3.51x** | +| 100 | 27.965 ms | 7.975 ms | 5.572 ms | **3.51x** | +| 200 | 57.317 ms | 15.607 ms | 14.397 ms | **3.67x** | diff --git a/crates/bashkit/benches/results/criterion-parallel-vm-linux-x86_64-1782168239.md b/crates/bashkit/benches/results/criterion-parallel-vm-linux-x86_64-1782168239.md new file mode 100644 index 000000000..eef9b2f91 --- /dev/null +++ b/crates/bashkit/benches/results/criterion-parallel-vm-linux-x86_64-1782168239.md @@ -0,0 +1,75 @@ +# Criterion Parallel Execution Benchmark + +## System Information + +- **Moniker**: `vm-linux-x86_64` +- **Hostname**: vm +- **OS**: linux +- **Architecture**: x86_64 +- **CPUs**: 4 +- **Timestamp**: 1782168239 + +## Workload Comparison (50 sessions) + +| Benchmark | Time | +|-----------|------| +| workload_types/light_sequential | 3.8932 ms | +| workload_types/light_parallel | 1.3279 ms | +| workload_types/medium_sequential | 18.656 ms | +| workload_types/medium_parallel | 5.1439 ms | +| workload_types/heavy_sequential | 56.175 ms | +| workload_types/heavy_parallel | 14.620 ms | + +## Parallel Scaling (medium workload) + +| Benchmark | Time | +|-----------|------| +| parallel_scaling/medium_seq/10 | 3.7044 ms | +| parallel_scaling/medium_par/10 | 1.2981 ms | +| parallel_scaling/shared_fs/10 | 807.42 µs | +| parallel_scaling/medium_seq/50 | 18.635 ms | +| parallel_scaling/medium_par/50 | 5.2968 ms | +| parallel_scaling/shared_fs/50 | 3.5919 ms | +| parallel_scaling/medium_seq/100 | 37.804 ms | +| parallel_scaling/medium_par/100 | 10.336 ms | +| parallel_scaling/shared_fs/100 | 6.8304 ms | +| parallel_scaling/medium_seq/200 | 74.215 ms | +| parallel_scaling/medium_par/200 | 20.338 ms | +| parallel_scaling/shared_fs/200 | 16.870 ms | +| parallel_scaling/medium_seq/500 | 182.29 ms | +| parallel_scaling/medium_par/500 | 50.491 ms | +| parallel_scaling/shared_fs/500 | 47.912 ms | +| parallel_scaling/medium_seq/1000 | 371.62 ms | +| parallel_scaling/medium_par/1000 | 97.672 ms | +| parallel_scaling/shared_fs/1000 | 140.56 ms | + +## Single Operations + +| Benchmark | Time | +|-----------|------| +| single_bash_new | 39.904 µs | +| single_echo | 48.200 µs | +| single_file_write_read | 81.555 µs | +| single_grep | 76.044 µs | +| single_awk | 72.376 µs | +| single_sed | 194.96 µs | +| single_light_script | 75.035 µs | +| single_medium_script | 376.66 µs | +| single_heavy_script | 1.0537 ms | + +## Speedup Summary + +| Workload | Sequential | Parallel | Speedup | +|----------|-----------|----------|---------| +| light | 3.893 ms | 1.328 ms | **2.93x** | +| medium | 18.656 ms | 5.144 ms | **3.63x** | +| heavy | 56.175 ms | 14.620 ms | **3.84x** | + +| Sessions | Sequential | Parallel | Shared FS | Par Speedup | +|----------|-----------|----------|-----------|-------------| +| 10 | 3.704 ms | 1.298 ms | 0.807 ms | **2.85x** | +| 50 | 18.635 ms | 5.297 ms | 3.592 ms | **3.52x** | +| 100 | 37.804 ms | 10.336 ms | 6.830 ms | **3.66x** | +| 200 | 74.215 ms | 20.338 ms | 16.870 ms | **3.65x** | +| 500 | 182.290 ms | 50.491 ms | 47.912 ms | **3.61x** | +| 1000 | 371.620 ms | 97.672 ms | 140.560 ms | **3.80x** | diff --git a/crates/bashkit/tests/integration/main.rs b/crates/bashkit/tests/integration/main.rs index 880327783..c562128cd 100644 --- a/crates/bashkit/tests/integration/main.rs +++ b/crates/bashkit/tests/integration/main.rs @@ -68,6 +68,7 @@ pub mod mkfifo_tests; pub mod nested_subscript_tests; pub mod network_security_tests; pub mod output_truncation_tests; +pub mod parallel_sessions_tests; pub mod proptest_differential; pub mod python_integration_tests; pub mod python_security_tests; diff --git a/crates/bashkit/tests/integration/parallel_sessions_tests.rs b/crates/bashkit/tests/integration/parallel_sessions_tests.rs new file mode 100644 index 000000000..37c37a080 --- /dev/null +++ b/crates/bashkit/tests/integration/parallel_sessions_tests.rs @@ -0,0 +1,74 @@ +//! Large parallel fan-out tests. +//! +//! A bashkit session is a plain heap object + tokio task — no per-session OS +//! process or thread (see `specs/parallel-execution.md`, L-PROC-003). These +//! tests confirm a large fan-out (1000 sessions) actually does real work and +//! produces correct output, rather than spawning and returning instantly +//! because every session errored out (e.g. hit a limit). The timing of this +//! fan-out is benchmarked separately in `benches/parallel_execution.rs`. + +use bashkit::{Bash, FileSystem, InMemoryFs}; +use std::sync::Arc; + +/// 1000 parallel sessions, each with its own `Bash` instance but sharing one +/// `Arc`. Each session must succeed and compute the right sum. +#[tokio::test(flavor = "multi_thread")] +async fn thousand_parallel_sessions_do_real_work() { + const N: usize = 1000; + let fs: Arc = Arc::new(InMemoryFs::new()); + + let handles: Vec<_> = (0..N) + .map(|i| { + let fs = Arc::clone(&fs); + tokio::spawn(async move { + // Write a unique file, then sum its values. + // Expected sum = (1+2+...+10) * i = 55 * i. + let script = format!( + r#" +for j in 1 2 3 4 5 6 7 8 9 10; do + echo "value=$((j * {i}))" +done > /tmp/session_{i}.txt +awk -F= '{{s+=$2}} END {{print s}}' /tmp/session_{i}.txt +"# + ); + let mut bash = Bash::builder().fs(fs).build(); + let result = bash.exec(&script).await.expect("session must succeed"); + (i, result.exit_code, result.stdout.trim().to_string()) + }) + }) + .collect(); + + let mut completed = 0; + for handle in handles { + let (i, exit_code, stdout) = handle.await.expect("task must not panic"); + assert_eq!(exit_code, 0, "session {i} should exit 0"); + assert_eq!(stdout, (55 * i).to_string(), "session {i} wrong sum"); + completed += 1; + } + assert_eq!(completed, N, "all {N} sessions must complete"); +} + +/// Sessions sharing one filesystem must not corrupt each other's files: each +/// writes to a distinct path and reads back exactly what it wrote. +#[tokio::test(flavor = "multi_thread")] +async fn parallel_sessions_shared_fs_no_cross_contamination() { + const N: usize = 500; + let fs: Arc = Arc::new(InMemoryFs::new()); + + let handles: Vec<_> = (0..N) + .map(|i| { + let fs = Arc::clone(&fs); + tokio::spawn(async move { + let script = format!("echo marker-{i} > /tmp/f_{i}.txt; cat /tmp/f_{i}.txt"); + let mut bash = Bash::builder().fs(fs).build(); + let out = bash.exec(&script).await.expect("session must succeed"); + (i, out.stdout.trim().to_string()) + }) + }) + .collect(); + + for handle in handles { + let (i, stdout) = handle.await.expect("task must not panic"); + assert_eq!(stdout, format!("marker-{i}"), "session {i} saw wrong file"); + } +} diff --git a/scripts/bench-parallel.sh b/scripts/bench-parallel.sh index bca188b53..69b5ab3a2 100755 --- a/scripts/bench-parallel.sh +++ b/scripts/bench-parallel.sh @@ -146,7 +146,12 @@ for w in ['light', 'medium', 'heavy']: print() print('| Sessions | Sequential | Parallel | Shared FS | Par Speedup |') print('|----------|-----------|----------|-----------|-------------|') -for n in [10, 50, 100, 200]: +scaling_counts = sorted({ + int(k.rsplit('/', 1)[1]) + for k in results + if k.startswith('parallel_scaling/medium_seq/') +}) +for n in scaling_counts: seq = results.get(f'parallel_scaling/medium_seq/{n}') par = results.get(f'parallel_scaling/medium_par/{n}') sfs = results.get(f'parallel_scaling/shared_fs/{n}') diff --git a/specs/parallel-execution.md b/specs/parallel-execution.md index a66743558..42d18b1cf 100644 --- a/specs/parallel-execution.md +++ b/specs/parallel-execution.md @@ -20,9 +20,18 @@ Run `cargo bench --bench parallel_execution` when changes touch: | Benchmark | What it measures | |-----------|------------------| | `workload_types/*` | Parallel vs sequential speedup | -| `parallel_scaling/*` | Scaling with session count | +| `parallel_scaling/*` | Scaling with session count (10–1000 sessions) | | `single_*` | Individual operation overhead | +### Correctness at Scale + +Throughput numbers are meaningless if sessions silently error out. The +`parallel_sessions_tests` integration suite asserts that a 1000-session +fan-out (each its own `Bash`, sharing one `Arc`) actually +produces correct per-session output, and that concurrent sessions sharing a +filesystem don't cross-contaminate. Run via `just test` (no extra features). + + ### Expected Results - Light workload: ~2x parallel speedup