everruns · chaliy · Jun 23, 2026 · Jun 22, 2026 · Jun 22, 2026 · Jun 23, 2026
diff --git a/crates/bashkit/benches/parallel_execution.rs b/crates/bashkit/benches/parallel_execution.rs
@@ -14,8 +14,10 @@ use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_m
 use std::sync::Arc;
 use tokio::runtime::Runtime;
 
-/// Number of parallel sessions to benchmark
-const SESSION_COUNTS: &[usize] = &[10, 50, 100, 200];
+/// Number of parallel sessions to benchmark.
+/// Goes up to 1000 to confirm large fan-outs stay healthy (no per-session
+/// thread/process; sessions are heap objects + tokio tasks).
+const SESSION_COUNTS: &[usize] = &[10, 50, 100, 200, 500, 1000];
 
 /// Heavy workload: file creation, text processing with grep/awk/sed
 const HEAVY_SCRIPT: &str = r#"

diff --git a/crates/bashkit/benches/results/criterion-parallel-vm-linux-x86_64-1782162173.md b/crates/bashkit/benches/results/criterion-parallel-vm-linux-x86_64-1782162173.md
@@ -0,0 +1,67 @@
+# Criterion Parallel Execution Benchmark
+
+## System Information
+
+- **Moniker**: `vm-linux-x86_64`
+- **Hostname**: vm
+- **OS**: linux
+- **Architecture**: x86_64
+- **CPUs**: 4
+- **Timestamp**: 1782162173
+
+## Workload Comparison (50 sessions)
+
+| Benchmark | Time |
+|-----------|------|
+| workload_types/light_sequential | 2.9735 ms |
+| workload_types/light_parallel | 982.07 µs |
+| workload_types/medium_sequential | 14.484 ms |
+| workload_types/medium_parallel | 4.1479 ms |
+| workload_types/heavy_sequential | 47.101 ms |
+| workload_types/heavy_parallel | 12.260 ms |
+
+## Parallel Scaling (medium workload)
+
+| Benchmark | Time |
+|-----------|------|
+| parallel_scaling/medium_seq/10 | 2.8015 ms |
+| parallel_scaling/medium_par/10 | 1.0280 ms |
+| parallel_scaling/shared_fs/10 | 661.04 µs |
+| parallel_scaling/medium_seq/50 | 14.232 ms |
+| parallel_scaling/medium_par/50 | 4.0529 ms |
+| parallel_scaling/shared_fs/50 | 2.6310 ms |
+| parallel_scaling/medium_seq/100 | 27.965 ms |
+| parallel_scaling/medium_par/100 | 7.9751 ms |
+| parallel_scaling/shared_fs/100 | 5.5717 ms |
+| parallel_scaling/medium_seq/200 | 57.317 ms |
+| parallel_scaling/medium_par/200 | 15.607 ms |
+| parallel_scaling/shared_fs/200 | 14.397 ms |
+
+## Single Operations
+
+| Benchmark | Time |
+|-----------|------|
+| single_bash_new | 31.669 µs |
+| single_echo | 38.924 µs |
+| single_file_write_read | 60.190 µs |
+| single_grep | 58.610 µs |
+| single_awk | 62.195 µs |
+| single_sed | 153.72 µs |
+| single_light_script | 65.103 µs |
+| single_medium_script | 297.61 µs |
+| single_heavy_script | 943.32 µs |
+
+## Speedup Summary
+
+| Workload | Sequential | Parallel | Speedup |
+|----------|-----------|----------|---------|
+| light | 2.974 ms | 0.982 ms | **3.03x** |
+| medium | 14.484 ms | 4.148 ms | **3.49x** |
+| heavy | 47.101 ms | 12.260 ms | **3.84x** |
+
+| Sessions | Sequential | Parallel | Shared FS | Par Speedup |
+|----------|-----------|----------|-----------|-------------|
+| 10 | 2.801 ms | 1.028 ms | 0.661 ms | **2.73x** |
+| 50 | 14.232 ms | 4.053 ms | 2.631 ms | **3.51x** |
+| 100 | 27.965 ms | 7.975 ms | 5.572 ms | **3.51x** |
+| 200 | 57.317 ms | 15.607 ms | 14.397 ms | **3.67x** |
diff --git a/crates/bashkit/benches/results/criterion-parallel-vm-linux-x86_64-1782168239.md b/crates/bashkit/benches/results/criterion-parallel-vm-linux-x86_64-1782168239.md
@@ -0,0 +1,75 @@
+# Criterion Parallel Execution Benchmark
+
+## System Information
+
+- **Moniker**: `vm-linux-x86_64`
+- **Hostname**: vm
+- **OS**: linux
+- **Architecture**: x86_64
+- **CPUs**: 4
+- **Timestamp**: 1782168239
+
+## Workload Comparison (50 sessions)
+
+| Benchmark | Time |
+|-----------|------|
+| workload_types/light_sequential | 3.8932 ms |
+| workload_types/light_parallel | 1.3279 ms |
+| workload_types/medium_sequential | 18.656 ms |
+| workload_types/medium_parallel | 5.1439 ms |
+| workload_types/heavy_sequential | 56.175 ms |
+| workload_types/heavy_parallel | 14.620 ms |
+
+## Parallel Scaling (medium workload)
+
+| Benchmark | Time |
+|-----------|------|
+| parallel_scaling/medium_seq/10 | 3.7044 ms |
+| parallel_scaling/medium_par/10 | 1.2981 ms |
+| parallel_scaling/shared_fs/10 | 807.42 µs |
+| parallel_scaling/medium_seq/50 | 18.635 ms |
+| parallel_scaling/medium_par/50 | 5.2968 ms |
+| parallel_scaling/shared_fs/50 | 3.5919 ms |
+| parallel_scaling/medium_seq/100 | 37.804 ms |
+| parallel_scaling/medium_par/100 | 10.336 ms |
+| parallel_scaling/shared_fs/100 | 6.8304 ms |
+| parallel_scaling/medium_seq/200 | 74.215 ms |
+| parallel_scaling/medium_par/200 | 20.338 ms |
+| parallel_scaling/shared_fs/200 | 16.870 ms |
+| parallel_scaling/medium_seq/500 | 182.29 ms |
+| parallel_scaling/medium_par/500 | 50.491 ms |
+| parallel_scaling/shared_fs/500 | 47.912 ms |
+| parallel_scaling/medium_seq/1000 | 371.62 ms |
+| parallel_scaling/medium_par/1000 | 97.672 ms |
+| parallel_scaling/shared_fs/1000 | 140.56 ms |
+
+## Single Operations
+
+| Benchmark | Time |
+|-----------|------|
+| single_bash_new | 39.904 µs |
+| single_echo | 48.200 µs |
+| single_file_write_read | 81.555 µs |
+| single_grep | 76.044 µs |
+| single_awk | 72.376 µs |
+| single_sed | 194.96 µs |
+| single_light_script | 75.035 µs |
+| single_medium_script | 376.66 µs |
+| single_heavy_script | 1.0537 ms |
+
+## Speedup Summary
+
+| Workload | Sequential | Parallel | Speedup |
+|----------|-----------|----------|---------|
+| light | 3.893 ms | 1.328 ms | **2.93x** |
+| medium | 18.656 ms | 5.144 ms | **3.63x** |
+| heavy | 56.175 ms | 14.620 ms | **3.84x** |
+
+| Sessions | Sequential | Parallel | Shared FS | Par Speedup |
+|----------|-----------|----------|-----------|-------------|
+| 10 | 3.704 ms | 1.298 ms | 0.807 ms | **2.85x** |
+| 50 | 18.635 ms | 5.297 ms | 3.592 ms | **3.52x** |
+| 100 | 37.804 ms | 10.336 ms | 6.830 ms | **3.66x** |
+| 200 | 74.215 ms | 20.338 ms | 16.870 ms | **3.65x** |
+| 500 | 182.290 ms | 50.491 ms | 47.912 ms | **3.61x** |
+| 1000 | 371.620 ms | 97.672 ms | 140.560 ms | **3.80x** |
diff --git a/crates/bashkit/tests/integration/main.rs b/crates/bashkit/tests/integration/main.rs
@@ -68,6 +68,7 @@ pub mod mkfifo_tests;
 pub mod nested_subscript_tests;
 pub mod network_security_tests;
 pub mod output_truncation_tests;
+pub mod parallel_sessions_tests;
 pub mod proptest_differential;
 pub mod python_integration_tests;
 pub mod python_security_tests;

diff --git a/crates/bashkit/tests/integration/parallel_sessions_tests.rs b/crates/bashkit/tests/integration/parallel_sessions_tests.rs
@@ -0,0 +1,74 @@
+//! Large parallel fan-out tests.
+//!
+//! A bashkit session is a plain heap object + tokio task — no per-session OS
+//! process or thread (see `specs/parallel-execution.md`, L-PROC-003). These
+//! tests confirm a large fan-out (1000 sessions) actually does real work and
+//! produces correct output, rather than spawning and returning instantly
+//! because every session errored out (e.g. hit a limit). The timing of this
+//! fan-out is benchmarked separately in `benches/parallel_execution.rs`.
+
+use bashkit::{Bash, FileSystem, InMemoryFs};
+use std::sync::Arc;
+
+/// 1000 parallel sessions, each with its own `Bash` instance but sharing one
+/// `Arc<dyn FileSystem>`. Each session must succeed and compute the right sum.
+#[tokio::test(flavor = "multi_thread")]
+async fn thousand_parallel_sessions_do_real_work() {
+    const N: usize = 1000;
+    let fs: Arc<dyn FileSystem> = Arc::new(InMemoryFs::new());
+
+    let handles: Vec<_> = (0..N)
+        .map(|i| {
+            let fs = Arc::clone(&fs);
+            tokio::spawn(async move {
+                // Write a unique file, then sum its values.
+                // Expected sum = (1+2+...+10) * i = 55 * i.
+                let script = format!(
+                    r#"
+for j in 1 2 3 4 5 6 7 8 9 10; do
+    echo "value=$((j * {i}))"
+done > /tmp/session_{i}.txt
+awk -F= '{{s+=$2}} END {{print s}}' /tmp/session_{i}.txt
+"#
+                );
+                let mut bash = Bash::builder().fs(fs).build();
+                let result = bash.exec(&script).await.expect("session must succeed");
+                (i, result.exit_code, result.stdout.trim().to_string())
+            })
+        })
+        .collect();
+
+    let mut completed = 0;
+    for handle in handles {
+        let (i, exit_code, stdout) = handle.await.expect("task must not panic");
+        assert_eq!(exit_code, 0, "session {i} should exit 0");
+        assert_eq!(stdout, (55 * i).to_string(), "session {i} wrong sum");
+        completed += 1;
+    }
+    assert_eq!(completed, N, "all {N} sessions must complete");
+}
+
+/// Sessions sharing one filesystem must not corrupt each other's files: each
+/// writes to a distinct path and reads back exactly what it wrote.
+#[tokio::test(flavor = "multi_thread")]
+async fn parallel_sessions_shared_fs_no_cross_contamination() {
+    const N: usize = 500;
+    let fs: Arc<dyn FileSystem> = Arc::new(InMemoryFs::new());
+
+    let handles: Vec<_> = (0..N)
+        .map(|i| {
+            let fs = Arc::clone(&fs);
+            tokio::spawn(async move {
+                let script = format!("echo marker-{i} > /tmp/f_{i}.txt; cat /tmp/f_{i}.txt");
+                let mut bash = Bash::builder().fs(fs).build();
+                let out = bash.exec(&script).await.expect("session must succeed");
+                (i, out.stdout.trim().to_string())
+            })
+        })
+        .collect();
+
+    for handle in handles {
+        let (i, stdout) = handle.await.expect("task must not panic");
+        assert_eq!(stdout, format!("marker-{i}"), "session {i} saw wrong file");
+    }
+}
diff --git a/scripts/bench-parallel.sh b/scripts/bench-parallel.sh
@@ -146,7 +146,12 @@ for w in ['light', 'medium', 'heavy']:
 print()
 print('| Sessions | Sequential | Parallel | Shared FS | Par Speedup |')
 print('|----------|-----------|----------|-----------|-------------|')
-for n in [10, 50, 100, 200]:
+scaling_counts = sorted({
+    int(k.rsplit('/', 1)[1])
+    for k in results
+    if k.startswith('parallel_scaling/medium_seq/')
+})
+for n in scaling_counts:
     seq = results.get(f'parallel_scaling/medium_seq/{n}')
     par = results.get(f'parallel_scaling/medium_par/{n}')
     sfs = results.get(f'parallel_scaling/shared_fs/{n}')

diff --git a/specs/parallel-execution.md b/specs/parallel-execution.md
@@ -20,9 +20,18 @@ Run `cargo bench --bench parallel_execution` when changes touch:
 | Benchmark | What it measures |
 |-----------|------------------|
 | `workload_types/*` | Parallel vs sequential speedup |
-| `parallel_scaling/*` | Scaling with session count |
+| `parallel_scaling/*` | Scaling with session count (10–1000 sessions) |
 | `single_*` | Individual operation overhead |
 
+### Correctness at Scale
+
+Throughput numbers are meaningless if sessions silently error out. The
+`parallel_sessions_tests` integration suite asserts that a 1000-session
+fan-out (each its own `Bash`, sharing one `Arc<dyn FileSystem>`) actually
+produces correct per-session output, and that concurrent sessions sharing a
+filesystem don't cross-contaminate. Run via `just test` (no extra features).
+
+
 ### Expected Results
 
 - Light workload: ~2x parallel speedup