wack · RobbieMcKinstry · Jul 2, 2026
diff --git a/src/checks/execution.rs b/src/checks/execution.rs
@@ -115,8 +115,15 @@ impl ExecutionActor {
             // Permit acquired ⇒ the agent is about to run: mark the check Running.
             let _ = presenter.tell(UiEvent::CheckStarted { id }).await;
 
-            let mut result =
-                run_one(executor, sandbox, job.id, job.check.clone(), &working_dir).await;
+            let mut result = run_one(
+                executor,
+                sandbox,
+                job.id,
+                job.check.clone(),
+                &working_dir,
+                attempt,
+            )
+            .await;
 
             // Harvest this attempt's trace *before* signalling completion, so it
             // is collected even for retried attempts (whose outcome never reaches
@@ -249,13 +256,15 @@ async fn run_one(
     id: CheckId,
     check: Check,
     working_dir: &Path,
+    attempt: usize,
 ) -> Result<AgentOutcome> {
     let handle = sandbox.create(working_dir).await?;
 
     let request = crate::checks::executor::AgentRunRequest {
         check_id: id,
         check,
         working_dir: handle.path().to_path_buf(),
+        attempt: u32::try_from(attempt).unwrap_or(u32::MAX),
     };
 
     let outcome = executor.run_check(request).await;
@@ -422,7 +431,9 @@ mod tests {
         .await
         .unwrap();
         assert!(out[0].satisfied);
-        // Ran twice: one silent attempt, then one reporting attempt.
-        assert_eq!(executor.seen().len(), 2);
+        // Ran twice: one silent attempt, then one reporting attempt — and the
+        // executor was told which attempt each was (retries must be able to
+        // vary temperature/instructions rather than replaying attempt 1).
+        assert_eq!(executor.seen_attempts(), vec![(0, 1), (0, 2)]);
     }
 }
diff --git a/src/checks/executor/cersei.rs b/src/checks/executor/cersei.rs
@@ -15,6 +15,7 @@ use cersei_types::CerseiError;
 use miette::{Result, miette};
 use tokio_util::sync::CancellationToken;
 
+use super::jail::Jailed;
 use super::judge::{JudgeTool, VerdictSink};
 use super::trace::{TraceHeader, TraceRecorder, serialize_trace};
 use super::{
@@ -70,11 +71,27 @@ impl CerseiExecutor {
 /// The read-only tool set a verification agent gets by default: observe, do not
 /// mutate. Execution-requiring checks (which would need Bash/Write) are gated
 /// separately and are future work — the default is least privilege.
+///
+/// Each tool is [`Jailed`] to the agent's working directory: "read-only" alone
+/// still allowed reading anywhere the user can, which let lost agents launch
+/// unbounded globs over the host filesystem (timeouts) and grade the live
+/// repository instead of the sandbox (postmortem C5). The jail turns an
+/// out-of-sandbox path into an immediate tool error that steers the agent back.
 fn read_only_tools() -> Vec<Box<dyn Tool>> {
     vec![
-        Box::new(cersei_tools::file_read::FileReadTool),
-        Box::new(cersei_tools::grep_tool::GrepTool),
-        Box::new(cersei_tools::glob_tool::GlobTool),
+        Box::new(Jailed::path_keys(
+            cersei_tools::file_read::FileReadTool,
+            &["file_path"],
+        )),
+        Box::new(Jailed::path_keys(
+            cersei_tools::grep_tool::GrepTool,
+            &["path"],
+        )),
+        Box::new(Jailed::glob(
+            cersei_tools::glob_tool::GlobTool,
+            &["path"],
+            "pattern",
+        )),
     ]
 }
 
@@ -111,6 +128,16 @@ fn effort_temperature(effort: Effort) -> f32 {
     }
 }
 
+/// The sampling temperature for a given attempt: the effort base, raised by
+/// 0.5 per retry and capped at 1.0. At effort=low the base is 0.0, and a
+/// temperature-0 retry is a replay: the 2026-07-01 postmortem caught one check
+/// reproducing its fatal trajectory near-verbatim on all three attempts. A
+/// retry has to sample differently to be worth its wall-clock.
+fn attempt_temperature(effort: Effort, attempt: u32) -> f32 {
+    let base = effort_temperature(effort);
+    (base + 0.5 * attempt.saturating_sub(1) as f32).min(1.0)
+}
+
 #[async_trait]
 impl CheckExecutor for CerseiExecutor {
     async fn run_check(&self, req: AgentRunRequest) -> Result<AgentOutcome> {
@@ -136,7 +163,12 @@ impl CheckExecutor for CerseiExecutor {
         let cancel = CancellationToken::new();
         let judge = JudgeTool::new(sink.clone(), cancel.clone());
 
-        let instructions = assemble_instructions(&req.check, &judge_tool_directive());
+        let instructions = assemble_instructions(
+            &req.check,
+            &judge_tool_directive(),
+            &req.working_dir,
+            req.attempt,
+        );
 
         let mut agent_builder = Agent::builder()
             .provider_boxed(provider)
@@ -149,7 +181,7 @@ impl CheckExecutor for CerseiExecutor {
             .tools(read_only_tools())
             .tool(judge)
             // Thinking is intentionally left disabled (see `effort_temperature`).
-            .temperature(effort_temperature(self.effort))
+            .temperature(attempt_temperature(self.effort, req.attempt))
             .max_turns(MAX_TURNS)
             .cancel_token(cancel.clone());
 
@@ -237,3 +269,18 @@ impl CheckExecutor for CerseiExecutor {
         Ok(outcome)
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn retries_raise_the_temperature_up_to_the_cap() {
+        assert_eq!(attempt_temperature(Effort::Low, 1), 0.0);
+        assert_eq!(attempt_temperature(Effort::Low, 2), 0.5);
+        assert_eq!(attempt_temperature(Effort::Low, 3), 1.0);
+        assert_eq!(attempt_temperature(Effort::Medium, 2), 1.0);
+        // Already at the cap: retries must not push past valid API range.
+        assert_eq!(attempt_temperature(Effort::High, 3), 1.0);
+    }
+}
diff --git a/src/checks/executor/claude.rs b/src/checks/executor/claude.rs
@@ -97,7 +97,12 @@ impl CheckExecutor for ClaudeExecutor {
             "dispatching claude -p check (fallback)",
         );
 
-        let instructions = assemble_instructions(&req.check, &file_report_directive());
+        let instructions = assemble_instructions(
+            &req.check,
+            &file_report_directive(),
+            &req.working_dir,
+            req.attempt,
+        );
 
         let mut cmd = Command::new(&self.program);
         cmd.arg("-p")

diff --git a/src/checks/executor/fake.rs b/src/checks/executor/fake.rs
@@ -22,7 +22,8 @@ pub struct FakeExecutor {
     /// id to `(report_on_attempt, report)`. Exercises the retry path: the same
     /// `CheckId` is re-run, so the fake counts attempts per id.
     silent_until: HashMap<CheckId, (usize, CheckReport)>,
-    seen: Mutex<Vec<CheckId>>,
+    /// Every `(check_id, attempt)` the fake was asked to run, in call order.
+    seen: Mutex<Vec<(CheckId, u32)>>,
 }
 
 impl FakeExecutor {
@@ -72,6 +73,18 @@ impl FakeExecutor {
 
     /// The check ids the fake was asked to run, in call order.
     pub fn seen(&self) -> Vec<CheckId> {
+        self.seen
+            .lock()
+            .unwrap()
+            .iter()
+            .map(|(id, _)| *id)
+            .collect()
+    }
+
+    /// The `(check_id, attempt)` pairs the fake was asked to run, in call
+    /// order — lets tests assert the retry plumbing threads attempt numbers
+    /// through to the executor.
+    pub fn seen_attempts(&self) -> Vec<(CheckId, u32)> {
         self.seen.lock().unwrap().clone()
     }
 }
@@ -81,8 +94,8 @@ impl CheckExecutor for FakeExecutor {
     async fn run_check(&self, req: AgentRunRequest) -> Result<AgentOutcome> {
         let attempt = {
             let mut seen = self.seen.lock().unwrap();
-            seen.push(req.check_id);
-            seen.iter().filter(|id| **id == req.check_id).count()
+            seen.push((req.check_id, req.attempt));
+            seen.iter().filter(|(id, _)| *id == req.check_id).count()
         };
 
         if self.silent.contains(&req.check_id) {