From 15d4266ed2d903b0c0bfbef23bfb175434f0fa5e Mon Sep 17 00:00:00 2001
From: Robbie McKinstry <robbie@wack.run>
Date: Wed, 1 Jul 2026 18:07:04 -0400
Subject: [PATCH] feat(check): opt-in session-trace archive for `multi check`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add `--trace-archive <PATH>` (off by default; also `MULTI_CHECKS_TRACE_ARCHIVE`
/ `[checks].trace_archive`) to capture every check agent session and bundle
them into a single in-memory `.tar.gz` for debugging failed runs. Captures all
executions, not just failures.

Capture is via `Agent::on_event`, not cersei's `.memory()` session
persistence: the executor cancels its agent the instant a verdict lands and
wraps the run in a drop-on-timeout, and cersei only flushes its transcript on
the agentic loop's normal exit — so a memory-backed transcript would miss
exactly the cancelled/timed-out/errored runs this feature exists to debug.
`emit()` fires the event handler for every event before those early returns,
so an event-sourced trace survives them.

- executor: a coalescing `TraceRecorder` (text/thinking deltas joined into
  per-turn blocks, tool results bounded) serializes each execution into a
  self-contained NDJSON doc — header, records, outcome footer — returned in
  `AgentOutcome::trace_jsonl`.
- execution actor: pushes each attempt (retries included) into a shared
  `TraceCollector` before signalling completion downstream, so retry traces
  are captured and collection is race-free with run finalization.
- run: builds the `.tar.gz` with pure-Rust `tar` + `flate2` (no temp dir, no
  subprocess), laid out as `{NN}-{requirement}/{check}.attempt-{k}.jsonl` —
  one directory per requirement, one file per check execution.

Trace archiving is best-effort: a failure logs but never fails an otherwise
successful check run.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 BUGS.md                       |  22 ++
 Cargo.lock                    |  23 ++
 Cargo.toml                    |   5 +
 src/checks/config/mod.rs      |  13 +-
 src/checks/config/schema.rs   |  12 +
 src/checks/e2e.rs             |   2 +
 src/checks/execution.rs       |  30 +-
 src/checks/executor/cersei.rs |  45 ++-
 src/checks/executor/claude.rs |   2 +
 src/checks/executor/fake.rs   |   3 +
 src/checks/executor/mod.rs    |   7 +
 src/checks/executor/trace.rs  | 625 ++++++++++++++++++++++++++++++++++
 src/checks/mod.rs             |  72 +++-
 src/checks/trace_archive.rs   | 238 +++++++++++++
 src/config/check/mod.rs       |   8 +
 15 files changed, 1099 insertions(+), 8 deletions(-)
 create mode 100644 src/checks/executor/trace.rs
 create mode 100644 src/checks/trace_archive.rs
diff --git a/BUGS.md b/BUGS.md
index 423bd33..67fad6b 100644
--- a/BUGS.md
+++ b/BUGS.md
@@ -2,6 +2,15 @@
 
 These are bugs (or missing features) I've observed while working with `multi checks`.
 
+- [ ] Output is now hanging. I suspect this is recent (within the last few commits) and it started
+happening after implement the changes to the `Presenter` actor to fix writing text off-screen without wrapping.
+
+- [ ] No limit on max turns.
+
+- [ ] Remove the `Claude -p` executor.
+
+- [ ] Logs no longer report the id of the check that failed (or the number of attempted retries)
+
 - [ ] No use of Cersei workflows to chain multiple prompts together.
 
 - [ ] No support for Fireworks AI.
@@ -22,6 +31,19 @@ These are bugs (or missing features) I've observed while working with `multi che
 
 - CERSEI: `append_system_prompt()` function is dead unless routed through the separate build_system_prompt() composer.
 
+- [ ] `Ctrl-C` (shutdown signals) needs to be handled gracefully and cross-platform.
+The `multi check` presenter installs a raw `libc::signal(SIGINT, …)` handler
+(`install_terminal_guards`, src/checks/presenter/inline.rs:224) that is Unix-only
+(won't build/run on Windows) and hard-`_exit(130)`s: it restores the cursor but
+skips graceful teardown, so in-flight agent sessions, the MCP result server, and
+spawned Cersei subprocesses are killed without cleanup and no partial
+results/traces get flushed. Contrast `multi run`, which already does this
+correctly and cross-platform via `tokio_graceful_shutdown::Toplevel::catch_signals()`
++ `handle_shutdown_requests()` (src/cmd/run/canary_mode.rs:63). `check` should adopt
+the same graceful-shutdown path (or an equivalent `tokio::signal::ctrl_c` + coordinated
+cancellation covering SIGINT/SIGTERM and Windows Ctrl-C/Ctrl-Break) while still
+guaranteeing the terminal is restored on the way out.
+
 ## Fixes
 
 - [x] No loading of CLAUDE.md files
diff --git a/Cargo.lock b/Cargo.lock
index 67c8cc0..feacf67 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3468,6 +3468,7 @@ dependencies = [
  "directories",
  "failsafe",
  "figment",
+ "flate2",
  "futures-core",
  "futures-util",
  "ignore",
@@ -3489,6 +3490,7 @@ dependencies = [
  "serde_json5",
  "serde_with",
  "static_assertions",
+ "tar",
  "tempfile",
  "textwrap",
  "thiserror 2.0.18",
@@ -5868,6 +5870,17 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "tar"
+version = "0.4.46"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f6221d9a6003c78398e3b239969f352578258df48c8eb051caadae0015bc840"
+dependencies = [
+ "filetime",
+ "libc",
+ "xattr",
+]
+
 [[package]]
 name = "tempfile"
 version = "3.27.0"
@@ -7165,6 +7178,16 @@ version = "0.6.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4"
 
+[[package]]
+name = "xattr"
+version = "1.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156"
+dependencies = [
+ "libc",
+ "rustix 1.1.4",
+]
+
 [[package]]
 name = "xmlparser"
 version = "0.13.6"
diff --git a/Cargo.toml b/Cargo.toml
index f6abe84..9cc1ea0 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -38,6 +38,9 @@ derive-getters = "0.5.0"
 dialoguer = "0.11.0"
 directories = "6.0"
 figment = { version = "0.10", default-features = false, features = ["env"] }
+# Pure-Rust gzip (miniz_oxide backend, the default). Used with `tar` to build the
+# opt-in `multi check --trace-archive` bundle entirely in memory.
+flate2 = "1.0"
 failsafe = "1.3.0"
 futures-core = "0.3.31"
 futures-util = "0.3.31"
@@ -60,6 +63,8 @@ serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
 serde_json5 = "0.2.1"
 serde_with = { version = "3.12", features = ["chrono"] }
+# Pure-Rust tar writer for the in-memory `multi check --trace-archive` bundle.
+tar = "0.4"
 thiserror.workspace = true
 tokio = { workspace = true, features = ["full"] }
 tokio-graceful-shutdown = "0.16.0"
diff --git a/src/checks/config/mod.rs b/src/checks/config/mod.rs
index cea1930..9b9aded 100644
--- a/src/checks/config/mod.rs
+++ b/src/checks/config/mod.rs
@@ -18,6 +18,7 @@ mod providers;
 mod schema;
 
 use std::num::NonZeroUsize;
+use std::path::PathBuf;
 use std::time::Duration;
 
 use figment::{
@@ -74,6 +75,9 @@ pub struct Config {
     /// calling the judge tool; a fresh attempt usually succeeds. A check only
     /// resolves as errored after all attempts are exhausted.
     pub max_attempts: usize,
+    /// Where to bundle the opt-in session-trace archive, or `None` (default) to
+    /// disable trace capture. See [`crate::checks::trace_archive`].
+    pub trace_archive: Option<PathBuf>,
 }
 
 impl Config {
@@ -135,6 +139,9 @@ impl Resolved {
                 cfg.model.clone(),
                 cfg.effort,
                 cfg.agent_timeout,
+                // The archive path lives at the orchestration layer; the executor
+                // only needs to know whether to capture a per-execution trace.
+                cfg.trace_archive.is_some(),
             )),
             ExecutorKind::Claude => cfg.build_claude_executor(),
         };
@@ -198,6 +205,7 @@ pub fn load(overrides: CliOverrides) -> Result<Resolved> {
         concurrency,
         agent_timeout: DEFAULT_AGENT_TIMEOUT,
         max_attempts: DEFAULT_MAX_ATTEMPTS,
+        trace_archive: checks.trace_archive,
     };
 
     Ok(Resolved {
@@ -224,6 +232,7 @@ pub fn configuration() -> Config {
         concurrency: default_concurrency(),
         agent_timeout: DEFAULT_AGENT_TIMEOUT,
         max_attempts: DEFAULT_MAX_ATTEMPTS,
+        trace_archive: None,
     }
 }
 
@@ -245,6 +254,7 @@ mod tests {
                 effort: Some(Effort::Low),
                 executor: None,
                 concurrency: None,
+                trace_archive: None,
                 providers: ProvidersSection::default(),
             },
         }
@@ -273,6 +283,7 @@ mod tests {
                 None,
                 None,
                 None,
+                None,
             );
             let checks = resolve_layers(file, overrides).unwrap();
             assert_eq!(checks.provider, Some(ProviderKind::OpenAi));
@@ -307,7 +318,7 @@ mod tests {
 
             // ...and a flag outranks env.
             let overrides =
-                CliOverrides::new(None, Some("claude-opus-4-8".into()), None, None, None);
+                CliOverrides::new(None, Some("claude-opus-4-8".into()), None, None, None, None);
             let checks = resolve_layers(file, overrides).unwrap();
             assert_eq!(checks.model.as_deref(), Some("claude-opus-4-8"));
             Ok(())
diff --git a/src/checks/config/schema.rs b/src/checks/config/schema.rs
index 73acafc..7a67bfb 100644
--- a/src/checks/config/schema.rs
+++ b/src/checks/config/schema.rs
@@ -5,6 +5,8 @@
 //! overrides, so figment can merge them by key path with `flag > env > file`
 //! precedence. See [`super::load`].
 
+use std::path::PathBuf;
+
 use clap::ValueEnum;
 use serde::{Deserialize, Serialize};
 
@@ -81,6 +83,12 @@ pub struct ChecksSection {
     /// available CPU cores).
     #[serde(default, skip_serializing_if = "Option::is_none")]
     pub concurrency: Option<usize>,
+    /// Where to write the opt-in session-trace archive. When set, every check
+    /// execution's agent session is captured and bundled into this `.tar.gz`
+    /// (see [`crate::checks::trace_archive`]); unset (the default) disables
+    /// capture entirely.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub trace_archive: Option<PathBuf>,
     /// Optional, non-secret per-provider base-URL overrides.
     #[serde(default)]
     pub providers: ProvidersSection,
@@ -140,6 +148,8 @@ pub struct CliChecksOverrides {
     pub executor: Option<ExecutorKind>,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub concurrency: Option<usize>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub trace_archive: Option<PathBuf>,
 }
 
 impl CliOverrides {
@@ -150,6 +160,7 @@ impl CliOverrides {
         effort: Option<Effort>,
         executor: Option<ExecutorKind>,
         concurrency: Option<usize>,
+        trace_archive: Option<PathBuf>,
     ) -> Self {
         Self {
             checks: CliChecksOverrides {
@@ -158,6 +169,7 @@ impl CliOverrides {
                 effort,
                 executor,
                 concurrency,
+                trace_archive,
             },
         }
     }
diff --git a/src/checks/e2e.rs b/src/checks/e2e.rs
index 7d66aff..acd3b2b 100644
--- a/src/checks/e2e.rs
+++ b/src/checks/e2e.rs
@@ -201,6 +201,7 @@ impl CheckExecutor for InterleavingExecutor {
             stop_reason: Some("interleaving probe".into()),
             turns: 1,
             error: None,
+            trace_jsonl: None,
         })
     }
 }
@@ -267,6 +268,7 @@ async fn invalid_suite_aborts_run_without_spawning_agents() {
         Arc::new(NoopSandbox),
         dir.path(),
         null_backend(),
+        None,
     )
     .await;
 
diff --git a/src/checks/execution.rs b/src/checks/execution.rs
index af84c91..0fc424c 100644
--- a/src/checks/execution.rs
+++ b/src/checks/execution.rs
@@ -31,6 +31,7 @@ use crate::checks::model::{Check, CheckId, CheckOutcome, Verdict};
 use crate::checks::presenter::{PresenterActor, UiEvent};
 use crate::checks::reporting::ReportingActor;
 use crate::checks::sandbox::Sandbox;
+use crate::checks::trace_archive::{TraceCollector, TraceEntry};
 
 /// The execution actor: turns a stream of discovered checks into a stream of
 /// completed checks, fanning agent runs out onto bounded background tasks.
@@ -46,6 +47,11 @@ pub(crate) struct ExecutionActor {
     reporting: ActorRef<ReportingActor>,
     /// The display-only presenter, told of each check's lifecycle milestones.
     presenter: ActorRef<PresenterActor>,
+    /// Opt-in sink for per-execution session traces (`multi check
+    /// --trace-archive`). `None` disables capture. Shared across the spawned
+    /// per-attempt tasks; every attempt (including retries) pushes its trace
+    /// here before signalling completion downstream.
+    trace_collector: Option<Arc<TraceCollector>>,
 }
 
 impl Actor for ExecutionActor {
@@ -62,6 +68,7 @@ impl Actor for ExecutionActor {
 
 impl ExecutionActor {
     /// Build the actor. `concurrency` and `max_attempts` are clamped to ≥1.
+    #[allow(clippy::too_many_arguments)]
     pub(crate) fn new(
         executor: Arc<dyn CheckExecutor + Send + Sync>,
         sandbox: Arc<dyn Sandbox + Send + Sync>,
@@ -70,6 +77,7 @@ impl ExecutionActor {
         max_attempts: usize,
         reporting: ActorRef<ReportingActor>,
         presenter: ActorRef<PresenterActor>,
+        trace_collector: Option<Arc<TraceCollector>>,
     ) -> Self {
         Self {
             executor,
@@ -79,6 +87,7 @@ impl ExecutionActor {
             max_attempts: max_attempts.max(1),
             reporting,
             presenter,
+            trace_collector,
         }
     }
 
@@ -92,6 +101,7 @@ impl ExecutionActor {
         let working_dir = self.working_dir.clone();
         let reporting = self.reporting.clone();
         let presenter = self.presenter.clone();
+        let trace_collector = self.trace_collector.clone();
         let me = ctx.actor_ref().clone();
         let id = job.id;
 
@@ -105,7 +115,25 @@ impl ExecutionActor {
             // Permit acquired ⇒ the agent is about to run: mark the check Running.
             let _ = presenter.tell(UiEvent::CheckStarted { id }).await;
 
-            let result = run_one(executor, sandbox, job.id, job.check.clone(), &working_dir).await;
+            let mut result =
+                run_one(executor, sandbox, job.id, job.check.clone(), &working_dir).await;
+
+            // Harvest this attempt's trace *before* signalling completion, so it
+            // is collected even for retried attempts (whose outcome never reaches
+            // reporting) and is race-free with run finalization (the collector is
+            // populated before the `tell` that could trigger it).
+            if let Some(collector) = &trace_collector
+                && let Some(bytes) = result.as_mut().ok().and_then(|o| o.trace_jsonl.take())
+            {
+                collector.push(TraceEntry {
+                    req_index: job.req_index,
+                    req_title: job.req_title.clone(),
+                    check_id: job.id,
+                    check_title: job.check.title.clone(),
+                    attempt,
+                    bytes,
+                });
+            }
 
             if has_verdict(Some(&result)) {
                 // The agent reported: reconcile, surface the verdict to the
diff --git a/src/checks/executor/cersei.rs b/src/checks/executor/cersei.rs
index 8e9a4db..5672858 100644
--- a/src/checks/executor/cersei.rs
+++ b/src/checks/executor/cersei.rs
@@ -3,6 +3,7 @@
 //! per-check judge tool — no `claude -p` subprocess, no MCP endpoints.
 
 use std::path::Path;
+use std::sync::Arc;
 use std::time::Duration;
 
 use async_trait::async_trait;
@@ -15,6 +16,7 @@ use miette::{Result, miette};
 use tokio_util::sync::CancellationToken;
 
 use super::judge::{JudgeTool, VerdictSink};
+use super::trace::{TraceHeader, TraceRecorder, serialize_trace};
 use super::{
     AgentOutcome, AgentRunRequest, CheckExecutor, assemble_instructions, judge_tool_directive,
 };
@@ -42,15 +44,25 @@ pub struct CerseiExecutor {
     /// Per-agent wall-clock timeout; on expiry the run is dropped (which stops
     /// the in-process agent) and the check resolves as errored.
     timeout: Duration,
+    /// Whether to capture each execution's agent session as a trace (returned in
+    /// [`AgentOutcome::trace_jsonl`]). Driven by `multi check --trace-archive`.
+    capture_traces: bool,
 }
 
 impl CerseiExecutor {
-    pub fn new(factory: ProviderFactory, model: String, effort: Effort, timeout: Duration) -> Self {
+    pub fn new(
+        factory: ProviderFactory,
+        model: String,
+        effort: Effort,
+        timeout: Duration,
+        capture_traces: bool,
+    ) -> Self {
         Self {
             factory,
             model,
             effort,
             timeout,
+            capture_traces,
         }
     }
 }
@@ -151,6 +163,17 @@ impl CheckExecutor for CerseiExecutor {
             agent_builder = agent_builder.system_prompt(project_prompt);
         }
 
+        // When trace capture is on, tee every agent event into a recorder.
+        // `emit` invokes this synchronously for each event *before* the loop's
+        // early returns, so the trace survives post-verdict cancellation and the
+        // drop-on-timeout below (which cersei's own session persistence would
+        // miss). The executor owns a clone, so a dropped agent doesn't lose it.
+        let recorder = self.capture_traces.then(|| Arc::new(TraceRecorder::new()));
+        if let Some(recorder) = &recorder {
+            let recorder = Arc::clone(recorder);
+            agent_builder = agent_builder.on_event(move |event| recorder.record(event));
+        }
+
         let agent = agent_builder
             .build()
             .map_err(|e| miette!("building check agent: {e}"))?;
@@ -166,12 +189,13 @@ impl CheckExecutor for CerseiExecutor {
         // which surfaces as `CerseiError::Cancelled`).
         let verdict = sink.verdict();
 
-        let outcome = match result {
+        let mut outcome = match result {
             Ok(Ok(output)) => AgentOutcome {
                 verdict,
                 stop_reason: Some(format!("{:?}", output.stop_reason)),
                 turns: output.turns,
                 error: None,
+                trace_jsonl: None,
             },
             Ok(Err(err)) => {
                 let reported = verdict.is_some();
@@ -182,6 +206,7 @@ impl CheckExecutor for CerseiExecutor {
                         .then(|| "cancelled".to_string()),
                     turns: 0,
                     error: (!reported).then(|| err.to_string()),
+                    trace_jsonl: None,
                 }
             }
             Err(_elapsed) => AgentOutcome {
@@ -190,9 +215,25 @@ impl CheckExecutor for CerseiExecutor {
                 stop_reason: None,
                 turns: 0,
                 error: Some(format!("agent timed out after {:?}", self.timeout)),
+                trace_jsonl: None,
             },
         };
 
+        // Render the captured session (if any) now that the outcome is known, so
+        // the trace's footer carries the authoritative verdict/stop/error.
+        if let Some(recorder) = &recorder {
+            let header = TraceHeader {
+                check_id: req.check_id,
+                check_title: &req.check.title,
+                model: &self.model,
+                effort: self.effort,
+                working_dir: &req.working_dir,
+                session_id: &session_id,
+            };
+            let bytes = serialize_trace(recorder, &header, &outcome);
+            outcome.trace_jsonl = Some(bytes);
+        }
+
         Ok(outcome)
     }
 }
diff --git a/src/checks/executor/claude.rs b/src/checks/executor/claude.rs
index 39fdbfa..c32c042 100644
--- a/src/checks/executor/claude.rs
+++ b/src/checks/executor/claude.rs
@@ -138,6 +138,7 @@ impl CheckExecutor for ClaudeExecutor {
                     stop_reason: Some(format!("exit {:?}", output.status.code())),
                     turns: 0,
                     error,
+                    trace_jsonl: None,
                 })
             }
             Err(_elapsed) => {
@@ -148,6 +149,7 @@ impl CheckExecutor for ClaudeExecutor {
                     stop_reason: None,
                     turns: 0,
                     error: Some(format!("agent timed out after {:?}", self.timeout)),
+                    trace_jsonl: None,
                 })
             }
         }
diff --git a/src/checks/executor/fake.rs b/src/checks/executor/fake.rs
index 7eaa68f..570dec8 100644
--- a/src/checks/executor/fake.rs
+++ b/src/checks/executor/fake.rs
@@ -91,6 +91,7 @@ impl CheckExecutor for FakeExecutor {
                 stop_reason: Some("fake: finished without reporting".into()),
                 turns: 1,
                 error: None,
+                trace_jsonl: None,
             });
         }
 
@@ -102,6 +103,7 @@ impl CheckExecutor for FakeExecutor {
                 stop_reason: Some("fake: silent-until".into()),
                 turns: 1,
                 error: None,
+                trace_jsonl: None,
             });
         }
 
@@ -110,6 +112,7 @@ impl CheckExecutor for FakeExecutor {
             stop_reason: Some("fake: reported".into()),
             turns: 1,
             error: None,
+            trace_jsonl: None,
         })
     }
 }
diff --git a/src/checks/executor/mod.rs b/src/checks/executor/mod.rs
index 8468caf..b14918c 100644
--- a/src/checks/executor/mod.rs
+++ b/src/checks/executor/mod.rs
@@ -14,6 +14,7 @@ pub mod claude;
 #[cfg(test)]
 mod fake;
 pub mod judge;
+mod trace;
 
 use std::path::PathBuf;
 
@@ -58,6 +59,12 @@ pub struct AgentOutcome {
     /// An execution-level error distinct from a check merely *failing* (stream
     /// error, agent-build error, or timeout). `None` on a clean finish.
     pub error: Option<String>,
+    /// The self-contained NDJSON session trace for this one execution, when
+    /// trace capture is enabled (`multi check --trace-archive`). `None` when
+    /// capture is off and for executors that don't produce traces (the `claude`
+    /// fallback and the test fake). The execution layer moves these into the
+    /// per-run [`crate::checks::trace_archive`] bundle.
+    pub trace_jsonl: Option<Vec<u8>>,
 }
 
 impl AgentOutcome {
diff --git a/src/checks/executor/trace.rs b/src/checks/executor/trace.rs
new file mode 100644
index 0000000..4224c01
--- /dev/null
+++ b/src/checks/executor/trace.rs
@@ -0,0 +1,625 @@
+//! Opt-in session-trace capture for the in-process cersei executor.
+//!
+//! When `multi check --trace-archive` is set, each check execution attaches a
+//! [`TraceRecorder`] as the agent's `on_event` sink. The recorder coalesces the
+//! streamed [`AgentEvent`]s into a compact ordered list of [`TraceRecord`]s
+//! (token-level text/thinking deltas are joined into per-turn blocks), and once
+//! the run resolves [`serialize_trace`] renders them as a self-contained NDJSON
+//! document: a `header` line, the records, then an `outcome` footer. Those bytes
+//! travel back in [`AgentOutcome::trace_jsonl`] and are bundled by
+//! [`crate::checks::trace_archive`].
+//!
+//! ## Why `on_event`, not cersei's `.memory()` session persistence
+//!
+//! cersei can persist a Claude-Code-compatible transcript via a `Memory`
+//! backend, but it flushes `memory.store()` only on the agentic loop's *normal*
+//! exit. This executor cancels its agent the instant a verdict lands and wraps
+//! the run in a drop-on-timeout, so the store call is skipped for exactly the
+//! cancelled / timed-out / errored runs this feature exists to debug. `emit()`
+//! fires the `on_event` handler for every event *before* those early returns, so
+//! an event-sourced trace survives them. (Two events — `ModelRequestStart` /
+//! `ModelResponseStart` — are emitted only on the streaming channel, which the
+//! blocking `run()` path drops, so they never reach us; everything else does.)
+
+use std::path::Path;
+use std::sync::Mutex;
+use std::time::Instant;
+
+use cersei_agent::events::AgentEvent;
+use cersei_types::{StopReason, Usage};
+use chrono::{DateTime, Utc};
+use serde::Serialize;
+use serde_json::{Value, json};
+
+use super::AgentOutcome;
+use crate::checks::config::Effort;
+
+/// Cap on a single captured tool result. File reads and greps can be enormous;
+/// past this we truncate (with a marker) to keep the archive bounded.
+const MAX_TOOL_RESULT_BYTES: usize = 16 * 1024;
+
+/// The on-disk trace format version, stamped in the header so a reader can adapt.
+const TRACE_FORMAT_VERSION: u32 = 1;
+
+/// A recorder attached to one agent run via `Agent::on_event`. Interior-mutable
+/// so it can live behind an `Arc` shared with the (synchronous) event callback.
+pub(super) struct TraceRecorder {
+    inner: Mutex<Coalescer>,
+    /// Monotonic start, for the relative `elapsed_ms` stamp on each record.
+    started: Instant,
+    /// Wall-clock start, recorded in the trace header.
+    started_at: DateTime<Utc>,
+}
+
+impl TraceRecorder {
+    pub(super) fn new() -> Self {
+        Self {
+            inner: Mutex::new(Coalescer::default()),
+            started: Instant::now(),
+            started_at: Utc::now(),
+        }
+    }
+
+    /// Record one event. Called synchronously from `Agent::emit` for every event
+    /// on the run, including on the cancel/timeout/error paths.
+    pub(super) fn record(&self, event: &AgentEvent) {
+        let elapsed_ms = self.started.elapsed().as_millis() as u64;
+        // A poisoned lock only means a prior panic while recording; recover the
+        // guard and keep capturing rather than panicking the agent's event path.
+        let mut c = self.inner.lock().unwrap_or_else(|e| e.into_inner());
+        match event {
+            AgentEvent::TextDelta(t) => {
+                c.flush_thinking(elapsed_ms);
+                c.pending_text.push_str(t);
+            }
+            AgentEvent::ThinkingDelta(t) => {
+                c.flush_text(elapsed_ms);
+                c.pending_thinking.push_str(t);
+            }
+            AgentEvent::ToolStart { name, id, input } => c.push(
+                elapsed_ms,
+                TraceRecord::ToolStart {
+                    name: name.clone(),
+                    id: id.clone(),
+                    input: input.clone(),
+                },
+            ),
+            AgentEvent::ToolEnd {
+                name,
+                id,
+                result,
+                is_error,
+                duration,
+                ..
+            } => c.push(
+                elapsed_ms,
+                TraceRecord::ToolEnd {
+                    name: name.clone(),
+                    id: id.clone(),
+                    is_error: *is_error,
+                    duration_ms: duration.as_millis() as u64,
+                    result: truncate_result(result),
+                },
+            ),
+            AgentEvent::ToolPermissionCheck { name, id, level } => c.push(
+                elapsed_ms,
+                TraceRecord::ToolPermissionCheck {
+                    name: name.clone(),
+                    id: id.clone(),
+                    level: format!("{level:?}"),
+                },
+            ),
+            AgentEvent::PermissionRequired(req) => c.push(
+                elapsed_ms,
+                TraceRecord::PermissionRequired {
+                    detail: format!("{req:?}"),
+                },
+            ),
+            AgentEvent::TurnStart { turn } => {
+                c.push(elapsed_ms, TraceRecord::TurnStart { turn: *turn })
+            }
+            AgentEvent::TurnComplete {
+                turn,
+                stop_reason,
+                usage,
+            } => c.push(
+                elapsed_ms,
+                TraceRecord::TurnComplete {
+                    turn: *turn,
+                    stop_reason: stop_reason.clone(),
+                    usage: usage.clone(),
+                },
+            ),
+            // Emitted only on the streaming channel, which the blocking `run()`
+            // path drops — recorded here for completeness should that change.
+            AgentEvent::ModelRequestStart {
+                turn,
+                message_count,
+                token_estimate,
+            } => c.push(
+                elapsed_ms,
+                TraceRecord::ModelRequestStart {
+                    turn: *turn,
+                    message_count: *message_count,
+                    token_estimate: *token_estimate,
+                },
+            ),
+            AgentEvent::ModelResponseStart { turn, model } => c.push(
+                elapsed_ms,
+                TraceRecord::ModelResponseStart {
+                    turn: *turn,
+                    model: model.clone(),
+                },
+            ),
+            AgentEvent::TokenWarning { pct_used, state } => c.push(
+                elapsed_ms,
+                TraceRecord::TokenWarning {
+                    pct_used: *pct_used,
+                    state: format!("{state:?}"),
+                },
+            ),
+            AgentEvent::CompactStart {
+                reason,
+                messages_before,
+            } => c.push(
+                elapsed_ms,
+                TraceRecord::CompactStart {
+                    reason: format!("{reason:?}"),
+                    messages_before: *messages_before,
+                },
+            ),
+            AgentEvent::CompactEnd {
+                messages_after,
+                tokens_freed,
+            } => c.push(
+                elapsed_ms,
+                TraceRecord::CompactEnd {
+                    messages_after: *messages_after,
+                    tokens_freed: *tokens_freed,
+                },
+            ),
+            AgentEvent::SessionLoaded {
+                session_id,
+                message_count,
+            } => c.push(
+                elapsed_ms,
+                TraceRecord::SessionLoaded {
+                    session_id: session_id.clone(),
+                    message_count: *message_count,
+                },
+            ),
+            AgentEvent::SessionSaved { session_id } => c.push(
+                elapsed_ms,
+                TraceRecord::SessionSaved {
+                    session_id: session_id.clone(),
+                },
+            ),
+            AgentEvent::CostUpdate {
+                turn_cost,
+                cumulative_cost,
+                input_tokens,
+                output_tokens,
+            } => c.push(
+                elapsed_ms,
+                TraceRecord::CostUpdate {
+                    turn_cost: *turn_cost,
+                    cumulative_cost: *cumulative_cost,
+                    input_tokens: *input_tokens,
+                    output_tokens: *output_tokens,
+                },
+            ),
+            AgentEvent::SubAgentSpawned { agent_id, prompt } => c.push(
+                elapsed_ms,
+                TraceRecord::SubAgentSpawned {
+                    agent_id: agent_id.clone(),
+                    prompt: prompt.clone(),
+                },
+            ),
+            AgentEvent::SubAgentComplete { agent_id, result } => c.push(
+                elapsed_ms,
+                TraceRecord::SubAgentComplete {
+                    agent_id: agent_id.clone(),
+                    turns: result.turns,
+                },
+            ),
+            AgentEvent::HookFired { event, hook_name } => c.push(
+                elapsed_ms,
+                TraceRecord::HookFired {
+                    hook_event: format!("{event:?}"),
+                    hook_name: hook_name.clone(),
+                },
+            ),
+            AgentEvent::HookBlocked {
+                event,
+                hook_name,
+                reason,
+            } => c.push(
+                elapsed_ms,
+                TraceRecord::HookBlocked {
+                    hook_event: format!("{event:?}"),
+                    hook_name: hook_name.clone(),
+                    reason: reason.clone(),
+                },
+            ),
+            AgentEvent::Status(s) => c.push(elapsed_ms, TraceRecord::Status { message: s.clone() }),
+            AgentEvent::Error(s) => c.push(elapsed_ms, TraceRecord::Error { message: s.clone() }),
+            AgentEvent::Complete(output) => c.push(
+                elapsed_ms,
+                TraceRecord::Complete {
+                    turns: output.turns,
+                    stop_reason: output.stop_reason.clone(),
+                },
+            ),
+        }
+    }
+
+    /// Drain the coalesced records, flushing any pending delta buffers first.
+    fn drain(&self) -> Vec<StampedRecord> {
+        let elapsed_ms = self.started.elapsed().as_millis() as u64;
+        let mut c = self.inner.lock().unwrap_or_else(|e| e.into_inner());
+        c.flush_deltas(elapsed_ms);
+        std::mem::take(&mut c.records)
+    }
+}
+
+/// Accumulates records, coalescing contiguous runs of text/thinking deltas.
+#[derive(Default)]
+struct Coalescer {
+    records: Vec<StampedRecord>,
+    pending_text: String,
+    pending_thinking: String,
+}
+
+impl Coalescer {
+    fn flush_thinking(&mut self, elapsed_ms: u64) {
+        if !self.pending_thinking.is_empty() {
+            let text = std::mem::take(&mut self.pending_thinking);
+            self.records.push(StampedRecord {
+                elapsed_ms,
+                record: TraceRecord::Thinking { text },
+            });
+        }
+    }
+
+    fn flush_text(&mut self, elapsed_ms: u64) {
+        if !self.pending_text.is_empty() {
+            let text = std::mem::take(&mut self.pending_text);
+            self.records.push(StampedRecord {
+                elapsed_ms,
+                record: TraceRecord::Text { text },
+            });
+        }
+    }
+
+    fn flush_deltas(&mut self, elapsed_ms: u64) {
+        self.flush_thinking(elapsed_ms);
+        self.flush_text(elapsed_ms);
+    }
+
+    /// Flush any pending deltas (to preserve ordering), then push `record`.
+    fn push(&mut self, elapsed_ms: u64, record: TraceRecord) {
+        self.flush_deltas(elapsed_ms);
+        self.records.push(StampedRecord { elapsed_ms, record });
+    }
+}
+
+/// A record plus the milliseconds since the run started.
+struct StampedRecord {
+    elapsed_ms: u64,
+    record: TraceRecord,
+}
+
+impl StampedRecord {
+    /// Render as a single NDJSON object, splicing `elapsed_ms` alongside the
+    /// record's own fields. Built via `to_value` + insert rather than
+    /// `#[serde(flatten)]` to sidestep flatten's internally-tagged-enum quirks.
+    fn to_json_line(&self) -> String {
+        let mut value = serde_json::to_value(&self.record).unwrap_or_else(
+            |e| json!({ "event": "trace_serialize_error", "detail": e.to_string() }),
+        );
+        if let Value::Object(map) = &mut value {
+            map.insert("elapsed_ms".to_string(), Value::from(self.elapsed_ms));
+        }
+        value.to_string()
+    }
+}
+
+/// One coalesced moment in the agent session. Serialized as an internally-tagged
+/// object (`{"event": "...", ...}`); non-serializable cersei payloads (permission
+/// requests, hook events, warning/compaction enums) are captured as `Debug`
+/// strings, which is plenty for post-hoc debugging.
+#[derive(Serialize)]
+#[serde(tag = "event", rename_all = "snake_case")]
+enum TraceRecord {
+    Text {
+        text: String,
+    },
+    Thinking {
+        text: String,
+    },
+    ToolStart {
+        name: String,
+        id: String,
+        input: Value,
+    },
+    ToolEnd {
+        name: String,
+        id: String,
+        is_error: bool,
+        duration_ms: u64,
+        result: String,
+    },
+    ToolPermissionCheck {
+        name: String,
+        id: String,
+        level: String,
+    },
+    PermissionRequired {
+        detail: String,
+    },
+    TurnStart {
+        turn: u32,
+    },
+    TurnComplete {
+        turn: u32,
+        stop_reason: StopReason,
+        usage: Usage,
+    },
+    ModelRequestStart {
+        turn: u32,
+        message_count: usize,
+        token_estimate: u64,
+    },
+    ModelResponseStart {
+        turn: u32,
+        model: String,
+    },
+    TokenWarning {
+        pct_used: f64,
+        state: String,
+    },
+    CompactStart {
+        reason: String,
+        messages_before: usize,
+    },
+    CompactEnd {
+        messages_after: usize,
+        tokens_freed: u64,
+    },
+    SessionLoaded {
+        session_id: String,
+        message_count: usize,
+    },
+    SessionSaved {
+        session_id: String,
+    },
+    CostUpdate {
+        turn_cost: f64,
+        cumulative_cost: f64,
+        input_tokens: u64,
+        output_tokens: u64,
+    },
+    SubAgentSpawned {
+        agent_id: String,
+        prompt: String,
+    },
+    SubAgentComplete {
+        agent_id: String,
+        turns: u32,
+    },
+    HookFired {
+        hook_event: String,
+        hook_name: String,
+    },
+    HookBlocked {
+        hook_event: String,
+        hook_name: String,
+        reason: String,
+    },
+    Status {
+        message: String,
+    },
+    Error {
+        message: String,
+    },
+    Complete {
+        turns: u32,
+        stop_reason: StopReason,
+    },
+}
+
+/// Truncate a tool result to [`MAX_TOOL_RESULT_BYTES`] on a char boundary,
+/// appending a marker noting how many bytes were dropped.
+fn truncate_result(s: &str) -> String {
+    if s.len() <= MAX_TOOL_RESULT_BYTES {
+        return s.to_string();
+    }
+    let mut end = MAX_TOOL_RESULT_BYTES;
+    while end > 0 && !s.is_char_boundary(end) {
+        end -= 1;
+    }
+    format!("{}…[truncated {} bytes]", &s[..end], s.len() - end)
+}
+
+/// Fields the executor knows about the execution, stamped into the trace header.
+/// (Requirement grouping and the retry/attempt number are added by the archive
+/// layer as the file's path, since the executor doesn't know them.)
+pub(super) struct TraceHeader<'a> {
+    pub check_id: usize,
+    pub check_title: &'a str,
+    pub model: &'a str,
+    pub effort: Effort,
+    pub working_dir: &'a Path,
+    pub session_id: &'a str,
+}
+
+/// Render the recorded session as a self-contained NDJSON document: a `header`
+/// line, one line per coalesced record, then an `outcome` footer summarizing the
+/// authoritative [`AgentOutcome`] (which is meaningful even when no terminal
+/// `Complete`/`Error` event arrived, e.g. on a drop-on-timeout).
+pub(super) fn serialize_trace(
+    recorder: &TraceRecorder,
+    header: &TraceHeader<'_>,
+    outcome: &AgentOutcome,
+) -> Vec<u8> {
+    let records = recorder.drain();
+
+    let mut out = String::new();
+
+    let header_line = json!({
+        "event": "header",
+        "trace_format": TRACE_FORMAT_VERSION,
+        "check_id": header.check_id,
+        "check_title": header.check_title,
+        "model": header.model,
+        "effort": format!("{:?}", header.effort).to_lowercase(),
+        "working_dir": header.working_dir.display().to_string(),
+        "session_id": header.session_id,
+        "started_at": recorder.started_at.to_rfc3339(),
+    });
+    out.push_str(&header_line.to_string());
+    out.push('\n');
+
+    for rec in &records {
+        out.push_str(&rec.to_json_line());
+        out.push('\n');
+    }
+
+    let verdict = outcome
+        .verdict
+        .as_ref()
+        .map(|v| json!({ "success": v.success, "evidence": v.evidence }));
+    let footer_line = json!({
+        "event": "outcome",
+        "verdict": verdict,
+        "stop_reason": outcome.stop_reason,
+        "turns": outcome.turns,
+        "error": outcome.error,
+        "finished_at": Utc::now().to_rfc3339(),
+    });
+    out.push_str(&footer_line.to_string());
+    out.push('\n');
+
+    out.into_bytes()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::time::Duration;
+
+    use crate::checks::executor::CheckReport;
+
+    fn parse_lines(bytes: &[u8]) -> Vec<Value> {
+        String::from_utf8(bytes.to_vec())
+            .unwrap()
+            .lines()
+            .map(|l| serde_json::from_str::<Value>(l).unwrap())
+            .collect()
+    }
+
+    #[test]
+    fn coalesces_deltas_and_frames_header_and_outcome() {
+        let rec = TraceRecorder::new();
+        // Two contiguous text deltas must coalesce into one record.
+        rec.record(&AgentEvent::TextDelta("Hello ".into()));
+        rec.record(&AgentEvent::TextDelta("world".into()));
+        rec.record(&AgentEvent::ToolStart {
+            name: "grep_tool".into(),
+            id: "t1".into(),
+            input: json!({ "pattern": "yellow" }),
+        });
+        rec.record(&AgentEvent::ToolEnd {
+            name: "grep_tool".into(),
+            id: "t1".into(),
+            result: "no matches".into(),
+            is_error: false,
+            duration: Duration::from_millis(5),
+            compression: None,
+        });
+        rec.record(&AgentEvent::TurnComplete {
+            turn: 1,
+            stop_reason: StopReason::EndTurn,
+            usage: Usage::default(),
+        });
+
+        let outcome = AgentOutcome {
+            verdict: Some(CheckReport {
+                success: true,
+                evidence: Some("ok".into()),
+            }),
+            stop_reason: Some("EndTurn".into()),
+            turns: 1,
+            error: None,
+            trace_jsonl: None,
+        };
+        let header = TraceHeader {
+            check_id: 3,
+            check_title: "No yellow",
+            model: "claude-sonnet-4-6",
+            effort: Effort::Low,
+            working_dir: Path::new("/tmp/sandbox"),
+            session_id: "multi-check-3",
+        };
+
+        let events = parse_lines(&serialize_trace(&rec, &header, &outcome));
+
+        // First line is the header, last is the authoritative outcome.
+        assert_eq!(events.first().unwrap()["event"], "header");
+        assert_eq!(events.first().unwrap()["check_id"], 3);
+        assert_eq!(events.first().unwrap()["effort"], "low");
+        let last = events.last().unwrap();
+        assert_eq!(last["event"], "outcome");
+        assert_eq!(last["verdict"]["success"], true);
+        assert_eq!(last["turns"], 1);
+
+        // The two text deltas became a single coalesced record.
+        let texts: Vec<_> = events.iter().filter(|e| e["event"] == "text").collect();
+        assert_eq!(texts.len(), 1);
+        assert_eq!(texts[0]["text"], "Hello world");
+
+        assert!(events.iter().any(|e| e["event"] == "tool_start"));
+        assert!(
+            events
+                .iter()
+                .any(|e| e["event"] == "tool_end" && e["result"] == "no matches")
+        );
+        // Every record line (between the header and outcome frames) carries a
+        // relative timestamp; the frames themselves don't.
+        assert!(
+            events
+                .iter()
+                .filter(|e| e["event"] != "header" && e["event"] != "outcome")
+                .all(|e| e.get("elapsed_ms").is_some())
+        );
+    }
+
+    #[test]
+    fn truncates_oversized_tool_results() {
+        let rec = TraceRecorder::new();
+        rec.record(&AgentEvent::ToolEnd {
+            name: "file_read".into(),
+            id: "t1".into(),
+            result: "x".repeat(MAX_TOOL_RESULT_BYTES + 100),
+            is_error: false,
+            duration: Duration::from_millis(1),
+            compression: None,
+        });
+        let outcome = AgentOutcome::default();
+        let header = TraceHeader {
+            check_id: 0,
+            check_title: "t",
+            model: "m",
+            effort: Effort::Low,
+            working_dir: Path::new("."),
+            session_id: "s",
+        };
+
+        let events = parse_lines(&serialize_trace(&rec, &header, &outcome));
+        let tool_end = events.iter().find(|e| e["event"] == "tool_end").unwrap();
+        let result = tool_end["result"].as_str().unwrap();
+        assert!(result.contains("truncated"), "expected truncation marker");
+        // Bounded to the cap plus the short marker.
+        assert!(result.len() <= MAX_TOOL_RESULT_BYTES + 40);
+    }
+}
diff --git a/src/checks/mod.rs b/src/checks/mod.rs
index b0f8a61..bfbec47 100644
--- a/src/checks/mod.rs
+++ b/src/checks/mod.rs
@@ -33,10 +33,12 @@ pub mod model;
 mod presenter;
 mod reporting;
 pub mod sandbox;
+mod trace_archive;
 
 #[cfg(test)]
 mod e2e;
 
+use std::io::Write;
 use std::path::Path;
 use std::sync::Arc;
 
@@ -44,6 +46,8 @@ use kameo::actor::{ActorRef, Spawn};
 use miette::{IntoDiagnostic, Result, miette};
 use tokio::sync::oneshot;
 
+use crate::checks::trace_archive::TraceCollector;
+
 use crate::Terminal;
 use crate::checks::config::{CliOverrides, Config};
 use crate::checks::discovery::DiscoveryActor;
@@ -88,9 +92,35 @@ pub async fn run(terminal: &Terminal, working_dir: &Path, overrides: CliOverride
         owns_record,
     } = select_backend(terminal.stdout_allows_color());
 
+    // If trace capture is enabled, create the shared collector now and hand a
+    // clone to the pipeline: the executor fills each execution's trace and the
+    // execution actor routes every attempt (retries included) here.
+    let trace_collector = resolved
+        .config
+        .trace_archive
+        .as_ref()
+        .map(|_| Arc::new(TraceCollector::new()));
+
     // Phases 2–5: drive the actor pipeline (with the presenter) to its terminal
     // result, then render the record.
-    let outcomes = run_pipeline(&resolved.config, executor, sandbox, working_dir, backend).await?;
+    let outcomes = run_pipeline(
+        &resolved.config,
+        executor,
+        sandbox,
+        working_dir,
+        backend,
+        trace_collector.clone(),
+    )
+    .await?;
+
+    // Bundle the captured traces. Best-effort: a trace-archiving failure must not
+    // fail an otherwise-successful check run. `run_pipeline` has already torn the
+    // presenter down, so stderr is free for the notice.
+    if let (Some(collector), Some(path)) =
+        (&trace_collector, resolved.config.trace_archive.as_deref())
+    {
+        write_trace_archive(collector, path);
+    }
 
     if owns_record {
         // The inline TUI was the sole terminal writer and has already flushed the
@@ -117,6 +147,7 @@ fn spawn_core(
     sandbox: Arc<dyn Sandbox + Send + Sync>,
     working_dir: &Path,
     backend: Box<dyn RenderBackend>,
+    trace_collector: Option<Arc<TraceCollector>>,
 ) -> (
     ActorRef<ExecutionActor>,
     ActorRef<ReportingActor>,
@@ -134,6 +165,7 @@ fn spawn_core(
         cfg.max_attempts,
         reporting.clone(),
         presenter.clone(),
+        trace_collector,
     ));
     (execution, reporting, presenter, rx)
 }
@@ -202,6 +234,31 @@ async fn stream_requirements(
     Ok(())
 }
 
+/// Build and write the opt-in session-trace archive. Best-effort: on failure it
+/// logs and returns rather than failing an otherwise-successful check run. Only
+/// called when `--trace-archive` is set.
+fn write_trace_archive(collector: &TraceCollector, path: &Path) {
+    if collector.is_empty() {
+        tracing::info!("trace capture enabled but no check executions ran; no archive written");
+        return;
+    }
+    match trace_archive::write_archive(collector, path) {
+        Ok(count) => {
+            tracing::info!(count, path = %path.display(), "wrote check session-trace archive");
+            let _ = writeln!(
+                std::io::stderr(),
+                "Wrote {count} check session trace(s) to {}",
+                path.display()
+            );
+        }
+        Err(e) => tracing::error!(
+            error = %e,
+            path = %path.display(),
+            "failed to write check session-trace archive",
+        ),
+    }
+}
+
 /// Await the pipeline's terminal result, mapping a dropped channel (a dead
 /// reporting actor) to a diagnostic so a crashed actor fails the run rather than
 /// hanging it.
@@ -223,9 +280,16 @@ async fn run_pipeline(
     sandbox: Arc<dyn Sandbox + Send + Sync>,
     working_dir: &Path,
     backend: Box<dyn RenderBackend>,
+    trace_collector: Option<Arc<TraceCollector>>,
 ) -> Result<Vec<RequirementOutcome>> {
-    let (execution, reporting, presenter, rx) =
-        spawn_core(cfg, executor, sandbox, working_dir, backend);
+    let (execution, reporting, presenter, rx) = spawn_core(
+        cfg,
+        executor,
+        sandbox,
+        working_dir,
+        backend,
+        trace_collector,
+    );
     let discovery = DiscoveryActor::spawn(DiscoveryActor::new(
         working_dir.to_path_buf(),
         execution.clone(),
@@ -262,7 +326,7 @@ async fn run_to_outcomes(
     backend: Box<dyn RenderBackend>,
 ) -> Result<Vec<RequirementOutcome>> {
     let (execution, reporting, presenter, rx) =
-        spawn_core(cfg, executor, sandbox, working_dir, backend);
+        spawn_core(cfg, executor, sandbox, working_dir, backend, None);
     stream_requirements(&execution, &presenter, requirements).await?;
     let outcomes = await_result(rx).await;
     shutdown_presenter(&presenter).await;
diff --git a/src/checks/trace_archive.rs b/src/checks/trace_archive.rs
new file mode 100644
index 0000000..47cb0d8
--- /dev/null
+++ b/src/checks/trace_archive.rs
@@ -0,0 +1,238 @@
+//! In-memory assembly of the opt-in session-trace archive
+//! (`multi check --trace-archive <PATH>`).
+//!
+//! Each check *execution* — including every retry — contributes one NDJSON trace
+//! produced by the executor (see [`crate::checks::executor::trace`]) and carried
+//! back in [`AgentOutcome::trace_jsonl`]. The execution actor pushes each one
+//! into a shared [`TraceCollector`], tagged with its requirement, check title,
+//! and 1-based attempt number. When the run finishes, [`TraceCollector::into_tar_gz`]
+//! lays the traces out as
+//!
+//! ```text
+//! {NN}-{requirement-slug}/{check-slug}.attempt-{k}.jsonl
+//! ```
+//!
+//! — one directory per requirement, one file per (check, attempt) — and streams
+//! them through a `tar` builder wrapped in a `flate2` gzip encoder, entirely in
+//! memory: pure Rust, no temp directory, no `tar`/`gzip` subprocess.
+//!
+//! [`AgentOutcome::trace_jsonl`]: crate::checks::executor::AgentOutcome::trace_jsonl
+
+use std::collections::HashSet;
+use std::io::Write;
+use std::sync::Mutex;
+
+use flate2::Compression;
+use flate2::write::GzEncoder;
+use miette::{IntoDiagnostic, Result};
+use tar::{Builder, Header};
+
+use crate::checks::model::CheckId;
+
+/// One captured execution trace plus the metadata that places it in the archive.
+pub(crate) struct TraceEntry {
+    /// The requirement's declaration-order index (its archive directory).
+    pub req_index: usize,
+    /// The requirement title (slugged into the directory name).
+    pub req_title: String,
+    /// The check's run-unique id, used only to disambiguate a same-title clash.
+    pub check_id: CheckId,
+    /// The check title (slugged into the file name).
+    pub check_title: String,
+    /// 1-based attempt number; retries increment it.
+    pub attempt: usize,
+    /// The self-contained NDJSON trace document for this one execution.
+    pub bytes: Vec<u8>,
+}
+
+/// Thread-safe sink the execution tasks push into as each attempt completes.
+/// Shared behind an `Arc` across the concurrent per-check tasks.
+#[derive(Default)]
+pub(crate) struct TraceCollector {
+    entries: Mutex<Vec<TraceEntry>>,
+}
+
+impl TraceCollector {
+    pub(crate) fn new() -> Self {
+        Self::default()
+    }
+
+    /// Record one execution's trace. Never blocks meaningfully — a brief lock on
+    /// a `Vec` push.
+    pub(crate) fn push(&self, entry: TraceEntry) {
+        self.lock().push(entry);
+    }
+
+    /// How many execution traces have been collected.
+    pub(crate) fn len(&self) -> usize {
+        self.lock().len()
+    }
+
+    /// Whether no execution traces have been collected.
+    pub(crate) fn is_empty(&self) -> bool {
+        self.lock().is_empty()
+    }
+
+    /// Consume the collected traces into a gzip-compressed tar archive, built
+    /// entirely in memory.
+    pub(crate) fn into_tar_gz(&self) -> Result<Vec<u8>> {
+        let mut entries = std::mem::take(&mut *self.lock());
+        // Deterministic layout: group by requirement, then check, then attempt.
+        entries.sort_by(|a, b| {
+            a.req_index
+                .cmp(&b.req_index)
+                .then(a.check_id.cmp(&b.check_id))
+                .then(a.attempt.cmp(&b.attempt))
+        });
+
+        let mut used_paths: HashSet<String> = HashSet::new();
+        let mut tar = Builder::new(GzEncoder::new(Vec::new(), Compression::default()));
+
+        for entry in &entries {
+            let dir = format!("{:02}-{}", entry.req_index + 1, slug(&entry.req_title));
+            let base = slug(&entry.check_title);
+            let mut path = format!("{dir}/{base}.attempt-{}.jsonl", entry.attempt);
+            // Two distinct checks under one requirement can share a title (e.g.
+            // both inherited it from the requirement), colliding on the same
+            // (title, attempt) path. Disambiguate the loser with its check id.
+            if !used_paths.insert(path.clone()) {
+                path = format!(
+                    "{dir}/{base}.check-{}.attempt-{}.jsonl",
+                    entry.check_id, entry.attempt
+                );
+                used_paths.insert(path.clone());
+            }
+
+            let mut header = Header::new_gnu();
+            header.set_size(entry.bytes.len() as u64);
+            header.set_mode(0o644);
+            // Fixed mtime keeps the archive byte-reproducible for a given input.
+            header.set_mtime(0);
+            header.set_cksum();
+            tar.append_data(&mut header, &path, entry.bytes.as_slice())
+                .into_diagnostic()?;
+        }
+
+        // `into_inner` finishes the tar (writing its trailer), then `finish`
+        // flushes the gzip trailer and hands back the compressed bytes.
+        let gz = tar.into_inner().into_diagnostic()?;
+        gz.finish().into_diagnostic()
+    }
+
+    fn lock(&self) -> std::sync::MutexGuard<'_, Vec<TraceEntry>> {
+        // A poisoned lock only means a task panicked mid-push; recover the guard
+        // rather than propagating the panic into the archive step.
+        self.entries.lock().unwrap_or_else(|e| e.into_inner())
+    }
+}
+
+/// Slugify a title into one filesystem-safe path segment: lowercase alphanumerics
+/// kept, every other run collapsed to a single `-`, ends trimmed. Empty input (or
+/// all-punctuation) becomes `untitled`.
+fn slug(title: &str) -> String {
+    let mut out = String::with_capacity(title.len());
+    let mut prev_dash = false;
+    for ch in title.chars() {
+        if ch.is_ascii_alphanumeric() {
+            out.push(ch.to_ascii_lowercase());
+            prev_dash = false;
+        } else if !prev_dash {
+            out.push('-');
+            prev_dash = true;
+        }
+    }
+    let trimmed = out.trim_matches('-');
+    if trimmed.is_empty() {
+        "untitled".to_string()
+    } else {
+        trimmed.to_string()
+    }
+}
+
+/// Build the archive from `collector` and write it to `path`, creating parent
+/// directories as needed. A single self-contained step so the orchestrator can
+/// treat trace archiving as best-effort: failures are surfaced as `Err` for the
+/// caller to log without failing the check run.
+pub(crate) fn write_archive(collector: &TraceCollector, path: &std::path::Path) -> Result<usize> {
+    let count = collector.len();
+    let bytes = collector.into_tar_gz()?;
+    if let Some(parent) = path.parent()
+        && !parent.as_os_str().is_empty()
+    {
+        std::fs::create_dir_all(parent).into_diagnostic()?;
+    }
+    let mut file = std::fs::File::create(path).into_diagnostic()?;
+    file.write_all(&bytes).into_diagnostic()?;
+    Ok(count)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn entry(
+        req_index: usize,
+        req_title: &str,
+        check_id: CheckId,
+        check_title: &str,
+        attempt: usize,
+    ) -> TraceEntry {
+        TraceEntry {
+            req_index,
+            req_title: req_title.to_string(),
+            check_id,
+            check_title: check_title.to_string(),
+            attempt,
+            bytes: format!("{{\"event\":\"header\",\"check_id\":{check_id}}}\n").into_bytes(),
+        }
+    }
+
+    #[test]
+    fn slug_is_filesystem_safe() {
+        assert_eq!(slug("No YELLOW text!"), "no-yellow-text");
+        assert_eq!(slug("  spaces  "), "spaces");
+        assert_eq!(slug("a/b\\c"), "a-b-c");
+        assert_eq!(slug("***"), "untitled");
+        assert_eq!(slug(""), "untitled");
+    }
+
+    #[test]
+    fn archive_round_trips_expected_paths() {
+        use flate2::read::GzDecoder;
+        use std::io::Read;
+
+        let collector = TraceCollector::new();
+        // Requirement 0 has one check run twice (attempt 1 then 2).
+        collector.push(entry(0, "First req", 0, "Check A", 1));
+        collector.push(entry(0, "First req", 0, "Check A", 2));
+        // Requirement 1 has two checks that share a title (collision path).
+        collector.push(entry(1, "Second req", 1, "Same", 1));
+        collector.push(entry(1, "Second req", 2, "Same", 1));
+
+        assert_eq!(collector.len(), 4);
+        let gz = collector.into_tar_gz().unwrap();
+        // Draining left the collector empty.
+        assert_eq!(collector.len(), 0);
+
+        let mut tar_bytes = Vec::new();
+        GzDecoder::new(&gz[..]).read_to_end(&mut tar_bytes).unwrap();
+        let mut archive = tar::Archive::new(&tar_bytes[..]);
+        let mut paths: Vec<String> = archive
+            .entries()
+            .unwrap()
+            .map(|e| e.unwrap().path().unwrap().to_string_lossy().into_owned())
+            .collect();
+        paths.sort();
+
+        assert_eq!(
+            paths,
+            vec![
+                "01-first-req/check-a.attempt-1.jsonl".to_string(),
+                "01-first-req/check-a.attempt-2.jsonl".to_string(),
+                // The second check with the same title is disambiguated by id.
+                "02-second-req/same.attempt-1.jsonl".to_string(),
+                "02-second-req/same.check-2.attempt-1.jsonl".to_string(),
+            ]
+        );
+    }
+}
diff --git a/src/config/check/mod.rs b/src/config/check/mod.rs
index f6f1ff3..169b3fd 100644
--- a/src/config/check/mod.rs
+++ b/src/config/check/mod.rs
@@ -39,6 +39,13 @@ pub struct CheckSubcommand {
     /// available CPU cores.
     #[arg(long)]
     concurrency: Option<NonZeroUsize>,
+
+    /// Capture every agent check session and bundle the traces into this
+    /// `.tar.gz` archive (off by default). One directory per requirement, one
+    /// file per check execution named by check title and numbered by retry.
+    /// Overrides `checks.trace_archive` from env/file.
+    #[arg(long, value_name = "PATH")]
+    trace_archive: Option<PathBuf>,
 }
 
 impl CheckSubcommand {
@@ -56,6 +63,7 @@ impl CheckSubcommand {
             self.effort,
             self.executor,
             self.concurrency.map(NonZeroUsize::get),
+            self.trace_archive.clone(),
         )
     }
 }