diff --git a/BUGS.md b/BUGS.md new file mode 100644 index 0000000..423bd33 --- /dev/null +++ b/BUGS.md @@ -0,0 +1,43 @@ +# Bugs + +These are bugs (or missing features) I've observed while working with `multi checks`. + +- [ ] No use of Cersei workflows to chain multiple prompts together. + +- [ ] No support for Fireworks AI. + +- [ ] No GitHub Action available. + +- [ ] No loading of skills. + +- [ ] No loading of RULES.md files from the .claude directory. + +- [ ] Assemble_instructions is hard-coded: src/checks/executor/mod.rs:98 (definition), called from src/checks/executor/cersei.rs:110 + +- [ ] No system prompt provided. + +- [ ] Not sure if prompt caching is enabled at all. + +- [ ] No trace capture. We need a way to record all session traces so that we can analyze why they failed. + +- CERSEI: `append_system_prompt()` function is dead unless routed through the separate build_system_prompt() composer. + +## Fixes + +- [x] No loading of CLAUDE.md files + +- [x] Concurrency still not respected. + +- [x] Full error text got cut off at the end of the terminal screen instead of wrapping. Turned out to +live in the *presenter* (`src/checks/presenter/inline.rs`), not the `Reporter` — the inline TUI is the +sole terminal writer for the whole run (see `owns_record` in `src/checks/mod.rs`), so `Reporter::report()` +never even runs in a TTY session. The presenter renders into a fixed-size `ratatui::Buffer` via +`insert_before`, which clips instead of wrapping; fixed by word-wrapping every flushed line to the +terminal width before building it. + +- [x] A `tracing::info!`/`debug!` log line fired mid-run (e.g. the "retrying check whose agent did not +report" line) wrote raw bytes straight to stdout, corrupting the inline TUI's cursor-managed viewport. +Fixed by giving the presenter full ownership of log output: `PresenterActor` now registers itself as +`tracing`'s active sink (`src/terminal/logging.rs::route_logs`) for the run's duration and folds each +line in as a `UiEvent::Log`. The inline TUI flushes each line to permanent scrollback (same mechanism as +a completed requirement) and also keeps the last few in a small live pane, separate from the tree. diff --git a/Cargo.lock b/Cargo.lock index e68796c..67c8cc0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3455,6 +3455,7 @@ dependencies = [ "bigdecimal", "bon", "cersei-agent", + "cersei-memory", "cersei-provider", "cersei-tools", "cersei-types", @@ -3489,6 +3490,7 @@ dependencies = [ "serde_with", "static_assertions", "tempfile", + "textwrap", "thiserror 2.0.18", "tokio", "tokio-graceful-shutdown", @@ -5499,6 +5501,12 @@ version = "1.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ed6a63f02c8539c91a8685a86f4099661ba3da017932f6ebbea6de3f0fa7c90" +[[package]] +name = "smawk" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8e2fb0f499abb4d162f2bedad68f5ef91a1682b5a03596ddb67efd37768d100" + [[package]] name = "socket2" version = "0.5.10" @@ -5915,6 +5923,7 @@ version = "0.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057" dependencies = [ + "smawk", "unicode-linebreak", "unicode-width 0.2.0", ] diff --git a/Cargo.toml b/Cargo.toml index 84bd9a8..f6abe84 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -84,8 +84,10 @@ comrak = { version = "0.52", default-features = false } libc = "0.2.186" tempfile = "3" cersei-agent = { git = "https://github.com/wack/cersei.git", branch = "trunk" } +cersei-memory = { git = "https://github.com/wack/cersei.git", branch = "trunk" } cersei-tools = { git = "https://github.com/wack/cersei.git", branch = "trunk" } cersei-types = { git = "https://github.com/wack/cersei.git", branch = "trunk" } +textwrap = "0.16" [workspace.dependencies] pretty_assertions = "1.4" diff --git a/src/checks/executor/cersei.rs b/src/checks/executor/cersei.rs index 4895247..8e9a4db 100644 --- a/src/checks/executor/cersei.rs +++ b/src/checks/executor/cersei.rs @@ -2,10 +2,12 @@ //! `cersei_agent::Agent` in its CoW sandbox, capturing the verdict through a //! per-check judge tool — no `claude -p` subprocess, no MCP endpoints. +use std::path::Path; use std::time::Duration; use async_trait::async_trait; use cersei_agent::Agent; +use cersei_memory::claudemd; use cersei_tools::permissions::AllowReadOnly; use cersei_tools::{Tool, clear_session_shell_state}; use cersei_types::CerseiError; @@ -64,6 +66,21 @@ fn read_only_tools() -> Vec> { ] } +/// Load the project's `CLAUDE.md` hierarchy (managed `~/.claude/rules/*.md`, +/// user `~/.claude/CLAUDE.md`, project `{root}/CLAUDE.md`, local +/// `{root}/.claude/CLAUDE.md`, with `@include` expansion) as a single system +/// prompt string, or `None` if no instruction files were found. +/// +/// Calls `claudemd` directly rather than going through +/// `cersei_memory::manager::MemoryManager` so this stays a stateless +/// filesystem read — no session storage, no graph memory, no multi-session +/// persistence gets pulled in as a side effect. +fn project_instructions(project_root: &Path) -> Option { + let files = claudemd::load_all_memory_files(project_root); + let prompt = claudemd::build_memory_prompt(&files); + (!prompt.trim().is_empty()).then_some(prompt) +} + /// Map our coarse [`Effort`] onto a sampling temperature. /// /// Extended thinking would be the natural effort vehicle, but cersei-provider @@ -109,7 +126,7 @@ impl CheckExecutor for CerseiExecutor { let instructions = assemble_instructions(&req.check, &judge_tool_directive()); - let agent = Agent::builder() + let mut agent_builder = Agent::builder() .provider_boxed(provider) .model(self.model.clone()) .working_dir(req.working_dir.clone()) @@ -122,7 +139,19 @@ impl CheckExecutor for CerseiExecutor { // Thinking is intentionally left disabled (see `effort_temperature`). .temperature(effort_temperature(self.effort)) .max_turns(MAX_TURNS) - .cancel_token(cancel.clone()) + .cancel_token(cancel.clone()); + + // `.system_prompt()`, not `.append_system_prompt()`: cersei's agent + // runner only ever reads `Agent.system_prompt` when building each + // completion request (`append_system_prompt` is exclusively consumed + // by the separate `cersei_agent::system_prompt::build_system_prompt` + // composer, which this executor doesn't use), and we don't set a base + // system prompt anywhere else here. + if let Some(project_prompt) = project_instructions(&req.working_dir) { + agent_builder = agent_builder.system_prompt(project_prompt); + } + + let agent = agent_builder .build() .map_err(|e| miette!("building check agent: {e}"))?; diff --git a/src/checks/presenter/heartbeat.rs b/src/checks/presenter/heartbeat.rs index bf90114..536fd40 100644 --- a/src/checks/presenter/heartbeat.rs +++ b/src/checks/presenter/heartbeat.rs @@ -73,8 +73,18 @@ impl HeartbeatBackend { } impl RenderBackend for HeartbeatBackend { - fn apply(&mut self, _state: &PresenterState, _event: &UiEvent) { - // Heartbeat is purely time-driven; events only update shared state. + fn apply(&mut self, _state: &PresenterState, event: &UiEvent) { + // Heartbeat is otherwise purely time-driven; events only update shared + // state. Routed log lines are the exception: the presenter is now + // `tracing`'s sole sink for the run (see the module docs), so if we + // don't re-emit them here they simply vanish. Stderr, not stdout, to + // honor this backend's own invariant that stdout stays reserved for the + // reporting actor's byte-for-byte report. + if let UiEvent::Log(line) = event { + let mut err = std::io::stderr().lock(); + let _ = writeln!(err, "{line}"); + let _ = err.flush(); + } } fn tick(&mut self, state: &PresenterState) { diff --git a/src/checks/presenter/inline.rs b/src/checks/presenter/inline.rs index 5ce42ea..61b1347 100644 --- a/src/checks/presenter/inline.rs +++ b/src/checks/presenter/inline.rs @@ -13,7 +13,25 @@ //! The flushed record reuses the reporting actor's exact text (the requirement //! line + [`failing_check_text`]) and the same AND-aggregation, so the TTY //! scrollback and the non-TTY stdout report differ only in inter-requirement -//! order (completion vs declaration), never in per-requirement content. +//! order (completion vs declaration), never in per-requirement content. Unlike +//! [`report`], which prints through the real terminal and so gets its line +//! wrapping for free, a `ratatui` [`Buffer`] is a fixed-size grid: any row wider +//! than the buffer is silently clipped rather than wrapped. Because +//! `insert_before` renders into such a buffer, long evidence text (which +//! [`report`] never truncates) is word-wrapped to the terminal width *before* +//! it becomes [`Line`]s, so every wrapped row is reserved height and nothing is +//! lost. +//! +//! [`report`]: crate::checks::reporting::report +//! [`Buffer`]: ratatui::buffer::Buffer +//! +//! [`UiEvent::Log`]s get the same durable treatment: each one is flushed to +//! scrollback the instant it arrives ([`InlineTuiBackend::flush_log_line`]), via +//! the identical `insert_before` mechanism, so a `tracing::info!` mid-run can +//! never write raw bytes over the live region. The live region *also* keeps the +//! last few lines in a small pane below the tree — separate from it, never +//! interleaved into a check's row — purely as an ephemeral "what just happened" +//! glance; the scrollback copy is the permanent record. //! //! [`Terminal::insert_before`]: ratatui::Terminal::insert_before //! [`failing_check_text`]: crate::checks::reporting::failing_check_text @@ -38,9 +56,10 @@ use super::backend::RenderBackend; use super::format::{clock_elapsed, gauge_bar, human_elapsed, spinner_frame}; use super::state::{CheckState, PresenterState}; -/// Reserved height for the live region. Completed requirements flush out, so this -/// only ever needs to hold the in-flight tree + the gauge header. -const VIEWPORT_HEIGHT: u16 = 12; +/// Reserved height for the live region. Completed requirements flush out, so +/// this only ever needs to hold the in-flight tree + the gauge header, plus a +/// few rows for the recent-log pane (header + up to `RECENT_LOGS_CAP` lines). +const VIEWPORT_HEIGHT: u16 = 16; /// Width of the textual gauge bar. const GAUGE_WIDTH: usize = 12; /// Redraw cadence: ~20fps keeps the spinner and elapsed timers fluid. @@ -82,6 +101,14 @@ impl InlineTuiBackend { }) } + /// The terminal's current column count — the exact width `insert_before` + /// renders into (see the module docs), so this is what callers wrap text to + /// before reserving height for it. Falls back to a sane default if the + /// terminal can't report its size (never observed in practice). + fn terminal_width(&self) -> u16 { + self.terminal.size().map(|s| s.width).unwrap_or(80) + } + /// Flush every requirement that has fully settled (and isn't already flushed) /// into scrollback, in `req_index` order. Only runs once discovery is complete /// so a requirement's check count is final. @@ -96,7 +123,11 @@ impl InlineTuiBackend { let Some(outcome) = state.requirement_outcome(req_index) else { continue; }; - let lines = requirement_record_lines(&outcome, self.color); + // Word-wrap to the render width now: each wrapped row becomes its own + // `Line`, which makes `lines.len()` the correct height to reserve — + // no row gets clipped. + let width = self.terminal_width(); + let lines = requirement_record_lines(&outcome, self.color, width); let height = lines.len() as u16; let _ = self.terminal.insert_before(height, move |buf| { let area = buf.area; @@ -106,6 +137,23 @@ impl InlineTuiBackend { } } + /// Flush a single routed log line into scrollback immediately, the same way + /// a completed requirement is flushed — so it never fights the live region + /// for control of the cursor, and (per the module docs) is word-wrapped + /// rather than clipped. This is the *permanent* record of the line; the live + /// region separately keeps the last few for at-a-glance context (see + /// [`live_lines`]). + fn flush_log_line(&mut self, line: &str) { + let width = self.terminal_width(); + let style = self.color.then(|| Style::new().add_modifier(Modifier::DIM)); + let lines = styled_wrapped_lines(line, width, style); + let height = lines.len() as u16; + let _ = self.terminal.insert_before(height, move |buf| { + let area = buf.area; + Paragraph::new(lines).render(area, buf); + }); + } + /// Redraw the live region (gauge header + in-flight tree). fn draw_live(&mut self, state: &PresenterState) { let lines = live_lines(state, &self.flushed, self.frame, GAUGE_WIDTH); @@ -121,13 +169,15 @@ impl RenderBackend for InlineTuiBackend { if self.torn_down { return; } - // A settle (or the discovery-complete gate opening) can complete a - // requirement; flush it to scrollback immediately so the record is durable. - if matches!( - event, - UiEvent::CheckSettled { .. } | UiEvent::DiscoveryComplete { .. } - ) { - self.flush_completed(state); + match event { + // A settle (or the discovery-complete gate opening) can complete a + // requirement; flush it to scrollback immediately so the record is + // durable. + UiEvent::CheckSettled { .. } | UiEvent::DiscoveryComplete { .. } => { + self.flush_completed(state); + } + UiEvent::Log(line) => self.flush_log_line(line), + _ => {} } } @@ -210,36 +260,65 @@ extern "C" fn restore_on_sigint(_sig: libc::c_int) { } } -/// The persistent record for one completed requirement — byte-identical in text -/// to the reporting actor's output, only styled for the terminal. -fn requirement_record_lines(outcome: &RequirementOutcome, color: bool) -> Vec> { +/// The persistent record for one completed requirement — same text content as +/// the reporting actor's output (just spread across more rows when a line is +/// word-wrapped to `width`), styled for the terminal. +fn requirement_record_lines( + outcome: &RequirementOutcome, + color: bool, + width: u16, +) -> Vec> { let mut lines = Vec::new(); - let title = outcome.title.clone(); - lines.push(if color { - let mut style = Style::new().add_modifier(Modifier::BOLD); - style = style.fg(if outcome.satisfied { - Color::Green - } else { - Color::Red - }); - Line::styled(title, style) + let title_style = color.then(|| { + Style::new() + .add_modifier(Modifier::BOLD) + .fg(if outcome.satisfied { + Color::Green + } else { + Color::Red + }) + }); + let title_text = if color { + outcome.title.clone() } else { let mark = if outcome.satisfied { "PASS" } else { "FAIL" }; - Line::raw(format!("[{mark}] {title}")) - }); + format!("[{mark}] {}", outcome.title) + }; + lines.extend(styled_wrapped_lines(&title_text, width, title_style)); + if !outcome.satisfied { + let evidence_style = color.then(|| Style::new().fg(Color::Red)); for check in outcome.failing_checks() { let text = failing_check_text(check); - lines.push(if color { - Line::styled(text, Style::new().fg(Color::Red)) - } else { - Line::raw(text) - }); + lines.extend(styled_wrapped_lines(&text, width, evidence_style)); } } lines } +/// Word-wrap `text` to `width` columns and render each resulting row as its own +/// `Line`, uniformly styled. Wrapping (rather than clipping) is what keeps long +/// evidence text from being lost off the edge of the fixed-size `Buffer` that +/// [`InlineTuiBackend::flush_completed`] renders into. +fn styled_wrapped_lines(text: &str, width: u16, style: Option