From a89705074227ccb28b245eec3b8db815c76863f6 Mon Sep 17 00:00:00 2001 From: NagyVikt Date: Sat, 16 May 2026 16:30:31 +0200 Subject: [PATCH] feat(fleet-launcher): introduce kitty spawn + heartbeat crate Ports the kitty-spawn path from ~/Documents/hcom (terminal.rs) into a small standalone API for codex-fleet supervisors and tooling. - lib.rs: CLI spawn surface (codex / claude / gemini / claw) - heartbeat.rs: presence/heartbeat file + sysinfo-based liveness checks - bin/spawn.rs: fleet-spawn CLI Workspace registers it automatically via the existing `fleet-*` glob. This lands the previously-staged crate so the dependent CliConvention refactor (agent/refactor-cli-convention-trait) can be rebased onto main and merged as a follow-up. --- rust/Cargo.lock | 204 ++++++++++++ rust/fleet-launcher/Cargo.toml | 26 ++ rust/fleet-launcher/src/bin/spawn.rs | 155 +++++++++ rust/fleet-launcher/src/heartbeat.rs | 436 +++++++++++++++++++++++++ rust/fleet-launcher/src/lib.rs | 471 +++++++++++++++++++++++++++ 5 files changed, 1292 insertions(+) create mode 100644 rust/fleet-launcher/Cargo.toml create mode 100644 rust/fleet-launcher/src/bin/spawn.rs create mode 100644 rust/fleet-launcher/src/heartbeat.rs create mode 100644 rust/fleet-launcher/src/lib.rs diff --git a/rust/Cargo.lock b/rust/Cargo.lock index e7ab941..569b6ad 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -147,6 +147,12 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + [[package]] name = "cpufeatures" version = "0.2.17" @@ -400,6 +406,15 @@ dependencies = [ "crossterm", ] +[[package]] +name = "fleet-launcher" +version = "0.0.1" +dependencies = [ + "jiff", + "libc", + "sysinfo", +] + [[package]] name = "fleet-layout" version = "0.0.1" @@ -692,6 +707,47 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" +[[package]] +name = "jiff" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f00b5dbd620d61dfdcb6007c9c1f6054ebd75319f163d886a9055cec1155073d" +dependencies = [ + "jiff-static", + "jiff-tzdb-platform", + "log", + "portable-atomic", + "portable-atomic-util", + "serde_core", + "windows-sys", +] + +[[package]] +name = "jiff-static" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e000de030ff8022ea1da3f466fbb0f3a809f5e51ed31f6dd931c35181ad8e6d7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "jiff-tzdb" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c900ef84826f1338a557697dc8fc601df9ca9af4ac137c7fb61d4c6f2dfd3076" + +[[package]] +name = "jiff-tzdb-platform" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "875a5a69ac2bab1a891711cf5eccbec1ce0341ea805560dcd90b7a2e925132e8" +dependencies = [ + "jiff-tzdb", +] + [[package]] name = "js-sys" version = "0.3.98" @@ -879,6 +935,15 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "ntapi" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3b335231dfd352ffb0f8017f3b6027a4917f7df785ea2143d8af2adc66980ae" +dependencies = [ + "winapi", +] + [[package]] name = "num-conv" version = "0.2.1" @@ -1059,6 +1124,15 @@ version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" +[[package]] +name = "portable-atomic-util" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a106d1259c23fac8e543272398ae0e3c0b8d33c88ed73d0cc71b0f1d902618" +dependencies = [ + "portable-atomic", +] + [[package]] name = "powerfmt" version = "0.2.0" @@ -1453,6 +1527,19 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sysinfo" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c33cd241af0f2e9e3b5c32163b873b29956890b5342e6745b917ce9d490f4af" +dependencies = [ + "core-foundation-sys", + "libc", + "memchr", + "ntapi", + "windows", +] + [[package]] name = "tempfile" version = "3.27.0" @@ -1902,12 +1989,65 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.57.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12342cb4d8e3b046f3d80effd474a7a02447231330ef77d71daa6fbc40681143" +dependencies = [ + "windows-core", + "windows-targets", +] + +[[package]] +name = "windows-core" +version = "0.57.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2ed2439a290666cd67ecce2b0ffaad89c2a56b976b736e6ece670297897832d" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-result", + "windows-targets", +] + +[[package]] +name = "windows-implement" +version = "0.57.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "windows-interface" +version = "0.57.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "windows-link" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-result" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e383302e8ec8515204254685643de10811af0ed97ea37210dc26fb0032647f8" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-sys" version = "0.61.2" @@ -1917,6 +2057,70 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + [[package]] name = "wit-bindgen" version = "0.51.0" diff --git a/rust/fleet-launcher/Cargo.toml b/rust/fleet-launcher/Cargo.toml new file mode 100644 index 0000000..fad970a --- /dev/null +++ b/rust/fleet-launcher/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "fleet-launcher" +version = "0.0.1" +edition = "2021" +publish = false +description = "Spawn codex / claude / gemini sessions in kitty windows. Ports the kitty-spawn path from ~/Documents/hcom (terminal.rs) into a small standalone API for codex-fleet supervisors and tooling." + +[lib] +path = "src/lib.rs" + +[[bin]] +name = "fleet-spawn" +path = "src/bin/spawn.rs" + +[dependencies] +libc = "0.2" + +# Cross-platform process-liveness checks for the presence/heartbeat file +# work. Disable default features so we don't pay for component/network/disk +# probes we never read — `is_alive(pid)` only needs the process refresh. +sysinfo = { version = "0.32", default-features = false, features = ["system"] } + +# Modern date-time crate for heartbeat timestamps. Picked over `chrono` for +# correctness (Jiff's tz model + arithmetic don't carry chrono's leap-second +# / DST footguns) and over `time` for ergonomics. +jiff = "0.2" diff --git a/rust/fleet-launcher/src/bin/spawn.rs b/rust/fleet-launcher/src/bin/spawn.rs new file mode 100644 index 0000000..c7d79cf --- /dev/null +++ b/rust/fleet-launcher/src/bin/spawn.rs @@ -0,0 +1,155 @@ +//! `fleet-spawn` — open a kitty window running codex / claude / gemini. +//! +//! Thin argv parser around `fleet_launcher::spawn_in_kitty`. Designed so a +//! supervisor (Rust binary, bash script, or another codex/claude pane) can +//! simply call: +//! +//! ```bash +//! fleet-spawn --cli codex \ +//! --account alpha@example.com \ +//! --home /tmp/codex-fleet/alpha-example \ +//! --cwd /home/user/repo \ +//! --env CODEX_GUARD_BYPASS=1 \ +//! --env CODEX_FLEET_AGENT_NAME=codex-alpha \ +//! --prompt "claim a ready task" +//! ``` +//! +//! and get back a kitty window, detached, running the right CLI under the +//! right per-account home dir. Exit code is 0 + the kitty PID printed on +//! stdout on success; non-zero with a stderr message otherwise. + +use std::path::PathBuf; +use std::process::ExitCode; + +use fleet_launcher::{spawn_in_kitty, CliKind, LaunchSpec}; + +fn main() -> ExitCode { + let args: Vec = std::env::args().skip(1).collect(); + if args.iter().any(|a| a == "-h" || a == "--help") { + print_help(); + return ExitCode::SUCCESS; + } + + let spec = match parse_args(&args) { + Ok(s) => s, + Err(e) => { + eprintln!("fleet-spawn: {e}"); + eprintln!("run with --help for usage."); + return ExitCode::from(2); + } + }; + + match spawn_in_kitty(&spec) { + Ok(pid) => { + println!("{pid}"); + ExitCode::SUCCESS + } + Err(e) => { + eprintln!("fleet-spawn: failed to spawn kitty window: {e}"); + ExitCode::FAILURE + } + } +} + +fn parse_args(args: &[String]) -> Result { + let mut cli: Option = None; + let mut title: Option = None; + let mut home: Option = None; + let mut cwd: Option = None; + let mut prompt: Option = None; + let mut extra_args: Vec = Vec::new(); + let mut extra_env: Vec<(String, String)> = Vec::new(); + + let mut i = 0; + while i < args.len() { + let a = &args[i]; + match a.as_str() { + "--cli" => { + let v = take(args, &mut i, "--cli")?; + cli = Some( + CliKind::parse(&v) + .ok_or_else(|| format!("unknown --cli value: {v} (codex|claude|gemini)"))?, + ); + } + "--account" | "--title" => { + title = Some(take(args, &mut i, a)?); + } + "--home" => { + home = Some(PathBuf::from(take(args, &mut i, "--home")?)); + } + "--cwd" => { + cwd = Some(PathBuf::from(take(args, &mut i, "--cwd")?)); + } + "--prompt" => { + prompt = Some(take(args, &mut i, "--prompt")?); + } + "--env" => { + let v = take(args, &mut i, "--env")?; + let (k, val) = v + .split_once('=') + .ok_or_else(|| format!("--env expects KEY=VALUE, got: {v}"))?; + extra_env.push((k.to_string(), val.to_string())); + } + "--arg" => { + extra_args.push(take(args, &mut i, "--arg")?); + } + other if other.starts_with("--") => { + return Err(format!("unknown flag: {other}")); + } + other => { + return Err(format!("unexpected positional arg: {other}")); + } + } + i += 1; + } + + let cli = cli.ok_or_else(|| "--cli is required".to_string())?; + let home = home.ok_or_else(|| { + format!("--home is required (target dir for {})", cli.home_env()) + })?; + let title = title.unwrap_or_else(|| format!("fleet-{}", cli.binary())); + + Ok(LaunchSpec { + cli, + title, + home_dir: home, + cwd, + prompt, + extra_args, + extra_env, + }) +} + +fn take(args: &[String], i: &mut usize, flag: &str) -> Result { + *i += 1; + args.get(*i) + .cloned() + .ok_or_else(|| format!("{flag} requires a value")) +} + +fn print_help() { + let me = std::env::args().next().unwrap_or_else(|| "fleet-spawn".to_string()); + println!( + "{me} — spawn a kitty window running codex / claude / gemini.\n\ +\n\ +USAGE\n \ +{me} --cli --home [options]\n\ +\n\ +REQUIRED\n \ +--cli codex|claude|gemini which CLI to launch\n \ +--home DIR per-account home directory (becomes\n CODEX_HOME / CLAUDE_CONFIG_DIR /\n GEMINI_CLI_HOME depending on --cli)\n\ +\n\ +OPTIONAL\n \ +--account NAME kitty window title (default: fleet-)\n \ +--title NAME alias for --account\n \ +--cwd DIR working directory inside the window\n \ +--prompt TEXT initial prompt (codex: positional arg;\n claude: positional after\n --dangerously-skip-permissions;\n gemini: piped over stdin)\n \ +--env KEY=VALUE extra env to export (repeatable)\n \ +--arg STRING extra CLI arg (repeatable, appended verbatim)\n \ +--help, -h show this help\n\ +\n\ +OUTPUT\n \ +On success: prints the kitty process PID to stdout, exits 0.\n \ +On failure: prints reason to stderr, exits non-zero.\n" + ); +} diff --git a/rust/fleet-launcher/src/heartbeat.rs b/rust/fleet-launcher/src/heartbeat.rs new file mode 100644 index 0000000..2b578b9 --- /dev/null +++ b/rust/fleet-launcher/src/heartbeat.rs @@ -0,0 +1,436 @@ +//! Presence + heartbeat tracking for spawned kitty sessions. +//! +//! When `spawn_in_kitty` returns a kitty PID, we drop a tiny session file +//! at `/tmp/fleet-launcher/sessions/.session`. That file lets a +//! supervisor (or the next `fleet-spawn` invocation) answer: +//! +//! - "is this PID still alive?" — via [`is_alive`] (cross-platform +//! process-table check from [`sysinfo`]). +//! - "when was this session last seen?" — via [`SessionRecord::last_heartbeat`] +//! (a [`jiff::Timestamp`] the spawned process can update by calling +//! [`touch_heartbeat`] periodically). +//! - "which PIDs have died but their session file is still around?" — +//! via [`prune_stale`]. +//! +//! ## Why a flat file per session? +//! +//! Three reasons: +//! +//! 1. No DB / no IPC. The fleet's existing coordination layer is Colony +//! + tmux; this module stays out of it. +//! 2. Atomic-by-rename writes. Each `record_session` writes to a `.tmp` +//! sibling and `rename(2)`s into place — no half-written records even +//! if multiple supervisors race. +//! 3. `ls /tmp/fleet-launcher/sessions/` already shows the live set; an +//! operator can `cat` one without learning a new tool. +//! +//! ## File format +//! +//! Plain `KEY=VALUE` lines, one per attribute. Values are *not* shell- +//! quoted (they're raw text); newlines and `=` in values are escaped as +//! `\n` / `\=`. We don't pull `serde` for one struct. + +use std::collections::HashMap; +use std::fs; +use std::io::{self, Write}; +use std::path::{Path, PathBuf}; + +use jiff::{Span, Timestamp}; +use sysinfo::{Pid, ProcessRefreshKind, ProcessesToUpdate, System}; + +use crate::{CliKind, LaunchSpec}; + +/// Where session files live on disk. Overridable via env so the cargo +/// tests can sandbox themselves to a per-test directory. +fn sessions_dir() -> PathBuf { + if let Ok(v) = std::env::var("FLEET_LAUNCHER_SESSIONS_DIR") { + return PathBuf::from(v); + } + std::env::temp_dir() + .join("fleet-launcher") + .join("sessions") +} + +/// One tracked spawn. Persisted at `/.session`. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct SessionRecord { + /// The kitty process PID (returned by [`crate::spawn_in_kitty`]). + pub pid: u32, + /// Which CLI runs inside the kitty window. + pub cli: CliKind, + /// Human label — usually the account email or worker name. + pub title: String, + /// `*_HOME` directory the spawned CLI was bound to. + pub home_dir: PathBuf, + /// Wall-clock time of the spawn call. + pub spawned_at: Timestamp, + /// Wall-clock time of the most recent heartbeat. Equals + /// [`Self::spawned_at`] until the spawned process calls + /// [`touch_heartbeat`]. + pub last_heartbeat: Timestamp, +} + +impl SessionRecord { + fn path(&self) -> PathBuf { + sessions_dir().join(format!("{}.session", self.pid)) + } +} + +/// Drop a session file recording `(spec, pid)` with `spawned_at` = +/// `last_heartbeat` = now. +/// +/// Atomic-by-rename: writes `.session.tmp` first, then +/// `rename(2)`s into place. Concurrent supervisors can call this for the +/// same PID without producing a torn file. +pub fn record_session(spec: &LaunchSpec, pid: u32) -> io::Result { + let now = Timestamp::now(); + let rec = SessionRecord { + pid, + cli: spec.cli, + title: spec.title.clone(), + home_dir: spec.home_dir.clone(), + spawned_at: now, + last_heartbeat: now, + }; + write_record(&rec)?; + Ok(rec) +} + +/// Update the `last_heartbeat` timestamp for `pid`. Used by long-running +/// spawned processes that want to advertise "I'm still healthy" without +/// round-tripping through Colony. +/// +/// Returns `Ok(false)` if no session file exists for that PID (caller can +/// decide whether to record a fresh one). +pub fn touch_heartbeat(pid: u32) -> io::Result { + let path = sessions_dir().join(format!("{pid}.session")); + let mut rec = match read_record(&path) { + Ok(r) => r, + Err(e) if e.kind() == io::ErrorKind::NotFound => return Ok(false), + Err(e) => return Err(e), + }; + rec.last_heartbeat = Timestamp::now(); + write_record(&rec)?; + Ok(true) +} + +/// Cross-platform "is this PID alive right now" check, backed by +/// [`sysinfo`]. Unlike `kill(pid, 0)` this works on Windows too, and +/// distinguishes "PID exists" from "PID exists but is a zombie owned by a +/// different uid" cleanly. +pub fn is_alive(pid: u32) -> bool { + let mut sys = System::new(); + let target = Pid::from_u32(pid); + sys.refresh_processes_specifics( + ProcessesToUpdate::Some(&[target]), + true, + // We only need the process-table presence check, not cmdline / + // cpu / mem fields. `new()` opts out of every per-process probe. + ProcessRefreshKind::new(), + ); + sys.process(target).is_some() +} + +/// Read every session file under [`sessions_dir`]. +/// +/// Files that fail to parse are silently skipped — a corrupt record +/// shouldn't blind a supervisor to its healthy siblings. Caller can +/// `prune_stale` to clean up the corrupt ones. +pub fn list_sessions() -> io::Result> { + let dir = sessions_dir(); + let entries = match fs::read_dir(&dir) { + Ok(e) => e, + Err(e) if e.kind() == io::ErrorKind::NotFound => return Ok(vec![]), + Err(e) => return Err(e), + }; + let mut out: Vec = Vec::new(); + for entry in entries.flatten() { + let p = entry.path(); + if p.extension().is_some_and(|e| e == "session") { + if let Ok(rec) = read_record(&p) { + out.push(rec); + } + } + } + out.sort_by_key(|r| r.pid); + Ok(out) +} + +/// Sweep stale records and return the PIDs we deleted. +/// +/// A record is stale when **either**: +/// +/// - The PID is no longer alive (per [`is_alive`]) — kitty exited, the +/// user closed the window, the process crashed. +/// - The PID is alive but `last_heartbeat` is older than `max_idle` — +/// meaningful when the spawned CLI is supposed to call +/// [`touch_heartbeat`] on a known cadence and has gone silent. +/// +/// Pass `Span::default()` (zero) for `max_idle` to disable the idle path +/// and only prune dead PIDs. +pub fn prune_stale(max_idle: Span) -> io::Result> { + let now = Timestamp::now(); + let sessions = list_sessions()?; + let mut pruned: Vec = Vec::new(); + + for rec in sessions { + let alive = is_alive(rec.pid); + let idle_too_long = if max_idle.is_zero() { + false + } else { + // jiff Span comparison: build the threshold timestamp and + // check whether last_heartbeat predates it. + match now.checked_sub(max_idle) { + Ok(threshold) => rec.last_heartbeat < threshold, + Err(_) => false, + } + }; + + if !alive || idle_too_long { + let path = rec.path(); + if fs::remove_file(&path).is_ok() { + pruned.push(rec.pid); + } + } + } + Ok(pruned) +} + +// ---- file I/O internals --------------------------------------------------- + +fn write_record(rec: &SessionRecord) -> io::Result<()> { + let dir = sessions_dir(); + fs::create_dir_all(&dir)?; + let final_path = dir.join(format!("{}.session", rec.pid)); + let tmp_path = dir.join(format!("{}.session.tmp", rec.pid)); + + let body = format!( + "pid={}\ncli={}\ntitle={}\nhome_dir={}\nspawned_at={}\nlast_heartbeat={}\n", + rec.pid, + cli_to_str(rec.cli), + escape(&rec.title), + escape(&rec.home_dir.to_string_lossy()), + rec.spawned_at, + rec.last_heartbeat, + ); + + { + let mut f = fs::File::create(&tmp_path)?; + f.write_all(body.as_bytes())?; + f.sync_all()?; + } + fs::rename(&tmp_path, &final_path)?; + Ok(()) +} + +fn read_record(path: &Path) -> io::Result { + let body = fs::read_to_string(path)?; + let mut kv: HashMap<&str, String> = HashMap::new(); + for line in body.lines() { + if let Some((k, v)) = line.split_once('=') { + kv.insert(k, unescape(v)); + } + } + + let get = |k: &str| -> io::Result { + kv.get(k).cloned().ok_or_else(|| { + io::Error::new( + io::ErrorKind::InvalidData, + format!("session file {} missing key `{}`", path.display(), k), + ) + }) + }; + + let pid: u32 = get("pid")? + .parse() + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, format!("bad pid: {e}")))?; + let cli = cli_from_str(&get("cli")?).ok_or_else(|| { + io::Error::new(io::ErrorKind::InvalidData, "unknown cli value") + })?; + let title = get("title")?; + let home_dir = PathBuf::from(get("home_dir")?); + let spawned_at: Timestamp = get("spawned_at")? + .parse() + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, format!("bad spawned_at: {e}")))?; + let last_heartbeat: Timestamp = get("last_heartbeat")? + .parse() + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, format!("bad last_heartbeat: {e}")))?; + + Ok(SessionRecord { + pid, + cli, + title, + home_dir, + spawned_at, + last_heartbeat, + }) +} + +fn escape(s: &str) -> String { + s.replace('\\', "\\\\").replace('\n', "\\n").replace('=', "\\=") +} + +fn unescape(s: &str) -> String { + let mut out = String::with_capacity(s.len()); + let mut chars = s.chars().peekable(); + while let Some(c) = chars.next() { + if c == '\\' { + match chars.next() { + Some('n') => out.push('\n'), + Some('=') => out.push('='), + Some('\\') => out.push('\\'), + Some(other) => { + out.push('\\'); + out.push(other); + } + None => out.push('\\'), + } + } else { + out.push(c); + } + } + out +} + +fn cli_to_str(cli: CliKind) -> &'static str { + match cli { + CliKind::Codex => "codex", + CliKind::Claude => "claude", + CliKind::Gemini => "gemini", + CliKind::Claw => "claw", + } +} + +fn cli_from_str(s: &str) -> Option { + CliKind::parse(s) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::{Mutex, MutexGuard, OnceLock}; + + /// Process-wide lock so the `FLEET_LAUNCHER_SESSIONS_DIR` env var is + /// only mutated by one test at a time. Cargo runs tests in parallel + /// by default; without this, two tests both calling `isolate()` race + /// on the env var and one of them ends up reading the other's files. + fn test_lock() -> &'static Mutex<()> { + static L: OnceLock> = OnceLock::new(); + L.get_or_init(|| Mutex::new(())) + } + + /// RAII guard returned by [`isolate`]. Holds the process-wide test + /// lock until it drops, so the env var stays valid for the entire + /// test body. Recover from panicked siblings by treating a poisoned + /// mutex as a still-usable lock (the env var will be re-set anyway). + struct Isolated { + _guard: MutexGuard<'static, ()>, + } + + fn isolate(name: &str) -> Isolated { + let guard = test_lock() + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + let dir = std::env::temp_dir().join(format!( + "fleet-launcher-test-{}-{}", + std::process::id(), + name + )); + // SAFETY: the mutex above serializes env-var mutation across all + // tests in this binary, which is the only correctness rule + // `set_var` requires. + unsafe { + std::env::set_var("FLEET_LAUNCHER_SESSIONS_DIR", &dir); + } + let _ = fs::remove_dir_all(&dir); + fs::create_dir_all(&dir).unwrap(); + Isolated { _guard: guard } + } + + #[test] + fn record_then_read_round_trip() { + let _dir = isolate("round_trip"); + let spec = LaunchSpec { + cli: CliKind::Codex, + title: "alpha@example.com".into(), + home_dir: PathBuf::from("/tmp/codex-fleet/alpha"), + cwd: None, + prompt: None, + extra_args: vec![], + extra_env: vec![], + }; + let rec = record_session(&spec, 12345).unwrap(); + let listed = list_sessions().unwrap(); + assert_eq!(listed.len(), 1); + assert_eq!(listed[0], rec); + } + + #[test] + fn touch_heartbeat_updates_timestamp_only() { + let _dir = isolate("touch"); + let spec = LaunchSpec { + cli: CliKind::Claude, + title: "t".into(), + home_dir: PathBuf::from("/tmp/x"), + cwd: None, + prompt: None, + extra_args: vec![], + extra_env: vec![], + }; + let before = record_session(&spec, 99).unwrap(); + std::thread::sleep(std::time::Duration::from_millis(20)); + assert!(touch_heartbeat(99).unwrap()); + let after = list_sessions().unwrap().pop().unwrap(); + assert_eq!(after.spawned_at, before.spawned_at); + assert!(after.last_heartbeat > before.last_heartbeat); + } + + #[test] + fn touch_heartbeat_returns_false_when_missing() { + let _dir = isolate("touch_missing"); + assert!(!touch_heartbeat(7777).unwrap()); + } + + #[test] + fn is_alive_true_for_self_false_for_dead_pid() { + // Our own PID is always alive. + assert!(is_alive(std::process::id())); + // 0 is "any process in our group" on Unix and is never returned + // as a process by sysinfo's process table — safe stand-in for + // "definitely not running". + assert!(!is_alive(0)); + } + + #[test] + fn prune_stale_removes_dead_pid_records() { + let _dir = isolate("prune"); + let spec = LaunchSpec { + cli: CliKind::Codex, + title: "ghost".into(), + home_dir: PathBuf::from("/tmp/x"), + cwd: None, + prompt: None, + extra_args: vec![], + extra_env: vec![], + }; + // Record a "session" for PID 0 (definitely dead) and our own PID + // (definitely alive). + record_session(&spec, 0).unwrap(); + record_session(&spec, std::process::id()).unwrap(); + + let pruned = prune_stale(Span::new()).unwrap(); + assert_eq!(pruned, vec![0]); + + let remaining = list_sessions().unwrap(); + assert_eq!(remaining.len(), 1); + assert_eq!(remaining[0].pid, std::process::id()); + } + + #[test] + fn escape_round_trips_specials() { + for s in &["plain", "with=equal", "with\nnewline", "back\\slash"] { + let e = escape(s); + assert_eq!(unescape(&e), *s); + } + } +} diff --git a/rust/fleet-launcher/src/lib.rs b/rust/fleet-launcher/src/lib.rs new file mode 100644 index 0000000..6edd292 --- /dev/null +++ b/rust/fleet-launcher/src/lib.rs @@ -0,0 +1,471 @@ +//! Spawn codex / claude / gemini / claw sessions in kitty windows. +//! +//! Ported (and trimmed hard) from `~/Documents/hcom/src/terminal.rs`. hcom's +//! 2300-line terminal layer covers a dozen terminals (kitty, wezterm, tmux, +//! iterm, gnome-terminal, ...), per-CLI dispatch tables, IPC sockets, PTY +//! wrappers, transcript capture, etc. We need exactly **one** capability: +//! "open a fresh kitty window, run `codex` (or `claude`, `gemini`, `claw`) +//! under the right `*_HOME` env, detach, return the PID". Everything else +//! is out of scope. +//! +//! ## Why a bash script wrapper? +//! +//! hcom learned the hard way that passing a long composite shell line as +//! `kitty bash -c "<...>"` arg-quotes inconsistently across kitty versions +//! (`terminal.rs:611-731` builds a script file precisely for this reason). +//! Writing a self-deleting `/tmp/fleet-launcher--.sh` and running +//! `kitty --title bash