From 5a418e0cd6192f6e79967eb822df2db60939d7e9 Mon Sep 17 00:00:00 2001 From: Will Killian Date: Tue, 19 May 2026 09:29:56 -0400 Subject: [PATCH 1/5] fix: align coding agent trace scopes Signed-off-by: Will Killian --- crates/cli/src/alignment/claude_code.rs | 49 + crates/cli/src/alignment/codex.rs | 338 +++++ crates/cli/src/alignment/mod.rs | 540 ++++++++ crates/cli/src/gateway.rs | 254 ++-- crates/cli/src/main.rs | 1 + crates/cli/src/session.rs | 712 ++++++++--- .../coverage/alignment_claude_code_tests.rs | 90 ++ .../tests/coverage/alignment_codex_tests.rs | 268 ++++ crates/cli/tests/coverage/alignment_tests.rs | 316 +++++ crates/cli/tests/coverage/gateway_tests.rs | 220 +++- crates/cli/tests/coverage/session_tests.rs | 1115 +++++++++++++++++ 11 files changed, 3520 insertions(+), 383 deletions(-) create mode 100644 crates/cli/src/alignment/claude_code.rs create mode 100644 crates/cli/src/alignment/codex.rs create mode 100644 crates/cli/src/alignment/mod.rs create mode 100644 crates/cli/tests/coverage/alignment_claude_code_tests.rs create mode 100644 crates/cli/tests/coverage/alignment_codex_tests.rs create mode 100644 crates/cli/tests/coverage/alignment_tests.rs diff --git a/crates/cli/src/alignment/claude_code.rs b/crates/cli/src/alignment/claude_code.rs new file mode 100644 index 00000000..e2bbaa46 --- /dev/null +++ b/crates/cli/src/alignment/claude_code.rs @@ -0,0 +1,49 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Claude Code-specific trace alignment. +//! +//! Claude Code already propagates a native session header and can report subagent completion via +//! the `Agent` tool result. These helpers keep those vendor-specific hints outside the generic +//! session state machine. + +use axum::http::HeaderMap; + +use crate::alignment::json_string_at; +use crate::config::header_string; +use crate::model::{AgentKind, ToolEvent}; + +// Identifies gateway providers that should be labeled as Claude-owned when an Anthropic request +// arrives before a SessionStart hook. Other providers are left generic so mixed gateway traffic +// does not inherit Claude scope metadata by route alone. +pub(crate) fn owns_gateway_provider(provider: &str) -> bool { + matches!(provider, "anthropic.messages" | "anthropic.count_tokens") +} + +// Claude Code already has a stable session id header. Accept it after the explicit NeMo Flow +// header so existing Claude environments correlate without extra gateway-specific configuration. +pub(crate) fn session_id_from_headers(headers: &HeaderMap) -> Option { + header_string(headers, "x-claude-code-session-id") +} + +// Claude's `Agent` tool result names the spawned worker as `agentId`. Treating that as a +// completion signal gives the CLI a deterministic subagent end even when Claude does not emit a +// separate `SubagentStop` hook until session teardown. +pub(crate) fn completed_subagent_from_agent_tool(event: &ToolEvent) -> Option { + if event.agent_kind != AgentKind::ClaudeCode || event.tool_name != "Agent" { + return None; + } + json_string_at( + &event.result, + &[ + &["agentId"][..], + &["agent_id"][..], + &["subagentId"][..], + &["subagent_id"][..], + ], + ) +} + +#[cfg(test)] +#[path = "../../tests/coverage/alignment_claude_code_tests.rs"] +mod tests; diff --git a/crates/cli/src/alignment/codex.rs b/crates/cli/src/alignment/codex.rs new file mode 100644 index 00000000..e510ec88 --- /dev/null +++ b/crates/cli/src/alignment/codex.rs @@ -0,0 +1,338 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Codex-specific trace alignment and gateway normalization. +//! +//! Codex subagents run as child threads. Their hook stream can look like an independent top-level +//! session unless we recover the parent thread id from Codex metadata or the transcript's first +//! `session_meta` record. The helpers here turn that Codex-specific shape into the generic +//! [`SessionAlias`](super::SessionAlias) contract used by the session manager. + +use std::io::{BufRead, BufReader}; + +use axum::http::HeaderMap; +use serde_json::{Map, Value, json}; + +use crate::alignment::{ + GatewayRouteKind, SessionAlias, insert_optional, json_string_at, merge_metadata, +}; +use crate::model::{AgentKind, SessionEvent, SubagentEvent}; + +// ChatGPT backend base URL used by Codex when authenticated with ChatGPT-Plus OAuth. This mirrors +// Codex's own `CHATGPT_CODEX_BASE_URL`; API-key auth continues through the normal OpenAI base. +const CHATGPT_CODEX_BASE_URL: &str = "https://chatgpt.com/backend-api/codex"; + +// The Codex fields needed to turn a child thread into a parent-owned subagent scope. Optional +// display fields are copied through because they make Phoenix traces easier to inspect, but only +// the parent session id is required for correlation. +#[derive(Debug, Clone)] +pub(crate) struct SubagentContext { + pub(crate) parent_session_id: String, + nickname: Option, + role: Option, + depth: Option, +} + +// Identifies gateway LLM providers that Codex owns for implicit session creation. Today Codex +// emits OpenAI Responses requests through the gateway, while OpenAI chat/model endpoints may come +// from generic clients and should not be labeled as Codex by route alone. +pub(crate) fn owns_gateway_provider(provider: &str) -> bool { + provider == "openai.responses" +} + +// Codex currently does not forward a stable session header on OpenAI Responses requests. When the +// request carries Codex client metadata, the `prompt_cache_key` is the rollout/thread id. The +// metadata check prevents treating arbitrary application prompt-cache keys as session ids. +pub(crate) fn prompt_cache_session_id(body: &Value, route: GatewayRouteKind) -> Option { + if route != GatewayRouteKind::OpenAiResponses { + return None; + } + let has_codex_metadata = body + .get("client_metadata") + .and_then(|metadata| metadata.get("x-codex-installation-id")) + .and_then(Value::as_str) + .is_some_and(|value| !value.is_empty()); + if !has_codex_metadata { + return None; + } + json_string_at(body, &[&["prompt_cache_key"][..]]) +} + +// Gives the gateway a Codex-native upstream only when the inbound token is the ChatGPT OAuth JWT +// shape and no `OPENAI_API_KEY` is available to substitute. The gateway stays generic by asking +// alignment for an optional override instead of knowing Codex backend URLs. +pub(crate) fn chatgpt_upstream_url_if_needed( + headers: &HeaderMap, + route: GatewayRouteKind, + path_and_query: &str, + has_replacement_key: bool, +) -> Option { + (is_openai_route(route) && has_chatgpt_oauth_jwt(headers) && !has_replacement_key) + .then(|| chatgpt_upstream_url(path_and_query)) +} + +// Removes Codex ChatGPT OAuth JWTs from OpenAI-family routes when the gateway has a real API key +// to inject. Real provider keys and non-OpenAI routes are preserved. +pub(crate) fn strip_chatgpt_oauth_for_openai_route( + headers: &HeaderMap, + route: GatewayRouteKind, + has_replacement_key: bool, +) -> HeaderMap { + if !is_openai_route(route) || !has_replacement_key { + return headers.clone(); + } + let mut out = headers.clone(); + if has_chatgpt_oauth_jwt(&out) { + out.remove(http::header::AUTHORIZATION); + } + out +} + +// OpenAI API bases commonly include `/v1`, while the ChatGPT backend is rooted at +// `/backend-api/codex`. Strip any `/v1` prefix from gateway routes before appending the path. +fn chatgpt_upstream_url(path_and_query: &str) -> String { + let path = path_and_query.strip_prefix("/v1").unwrap_or(path_and_query); + format!("{CHATGPT_CODEX_BASE_URL}{path}") +} + +// Codex stores ChatGPT-Plus OAuth tokens in `~/.codex/auth.json`; the access token is a JWT whose +// bearer value starts with `eyJ`. Provider API keys (`sk-...`) and opaque tokens do not match. +fn has_chatgpt_oauth_jwt(headers: &HeaderMap) -> bool { + headers + .get(http::header::AUTHORIZATION) + .and_then(|value| value.to_str().ok()) + .is_some_and(|value| value.starts_with("Bearer eyJ")) +} + +// The ChatGPT OAuth transport fallback applies only to OpenAI-family routes. Anthropic routes use +// a different auth scheme and should never be redirected through Codex's ChatGPT backend. +fn is_openai_route(route: GatewayRouteKind) -> bool { + matches!( + route, + GatewayRouteKind::OpenAiResponses + | GatewayRouteKind::OpenAiChatCompletions + | GatewayRouteKind::OpenAiModels + ) +} + +// Extracts Codex subagent thread-spawn context from a SessionStart. It looks first at the hook +// payload, then hook metadata, then the first transcript line. Returning None keeps ordinary Codex +// root sessions as root sessions and prevents self-parenting loops. +pub(crate) fn subagent_context(event: &SessionEvent) -> Option { + if event.agent_kind != AgentKind::Codex { + return None; + } + subagent_context_from_value(&event.payload) + .or_else(|| subagent_context_from_value(&event.metadata)) + .or_else(|| subagent_context_from_transcript(event)) + .filter(|context| context.parent_session_id != event.session_id) +} + +// Codex sometimes supplies only a transcript path in the hook payload. The first transcript line +// is a `session_meta` object and carries the thread-spawn parent id that Phoenix needs for +// parentage. Reading one line keeps this cheap and avoids treating the full transcript as input. +fn subagent_context_from_transcript(event: &SessionEvent) -> Option { + let transcript_path = json_string_at(&event.metadata, &[&["transcript_path"][..]]) + .or_else(|| json_string_at(&event.payload, &[&["transcript_path"][..]]))?; + let file = std::fs::File::open(transcript_path).ok()?; + let mut reader = BufReader::new(file); + let mut line = String::new(); + if reader.read_line(&mut line).ok()? == 0 { + return None; + } + let value = serde_json::from_str::(&line).ok()?; + subagent_context_from_value(&value) +} + +// Searches the known Codex shapes for thread-spawn data. Recent traces have placed +// `parent_thread_id`, nickname, role, and depth under direct hook payloads, nested `payload`, and +// transcript `session_meta.payload`; all of those paths describe the same child-thread parentage. +fn subagent_context_from_value(value: &Value) -> Option { + let parent_session_id = json_string_at( + value, + &[ + &["source", "subagent", "thread_spawn", "parent_thread_id"][..], + &[ + "payload", + "source", + "subagent", + "thread_spawn", + "parent_thread_id", + ][..], + &[ + "session_meta", + "payload", + "source", + "subagent", + "thread_spawn", + "parent_thread_id", + ][..], + &[ + "extra", + "source", + "subagent", + "thread_spawn", + "parent_thread_id", + ][..], + ], + )?; + Some(SubagentContext { + parent_session_id, + nickname: json_string_at( + value, + &[ + &["agent_nickname"][..], + &["payload", "agent_nickname"][..], + &["session_meta", "payload", "agent_nickname"][..], + &["source", "subagent", "thread_spawn", "agent_nickname"][..], + &[ + "payload", + "source", + "subagent", + "thread_spawn", + "agent_nickname", + ][..], + &[ + "session_meta", + "payload", + "source", + "subagent", + "thread_spawn", + "agent_nickname", + ][..], + ], + ), + role: json_string_at( + value, + &[ + &["agent_role"][..], + &["payload", "agent_role"][..], + &["session_meta", "payload", "agent_role"][..], + &["source", "subagent", "thread_spawn", "agent_role"][..], + &[ + "payload", + "source", + "subagent", + "thread_spawn", + "agent_role", + ][..], + &[ + "session_meta", + "payload", + "source", + "subagent", + "thread_spawn", + "agent_role", + ][..], + ], + ), + depth: json_string_at( + value, + &[ + &["source", "subagent", "thread_spawn", "depth"][..], + &["payload", "source", "subagent", "thread_spawn", "depth"][..], + &[ + "session_meta", + "payload", + "source", + "subagent", + "thread_spawn", + "depth", + ][..], + ], + ), + }) +} + +// Stamps the child thread's hook metadata with parent-thread details before the session manager +// converts it into a subagent start. This makes the scope itself filterable even before any LLM +// ownership heuristics run. +pub(crate) fn augment_subagent_metadata(metadata: Value, context: &SubagentContext) -> Value { + let mut object = Map::new(); + object.insert("thread_source".into(), json!("subagent")); + object.insert( + "codex_parent_thread_id".into(), + json!(context.parent_session_id.clone()), + ); + insert_optional(&mut object, "agent_nickname", context.nickname.as_deref()); + insert_optional(&mut object, "agent_role", context.role.as_deref()); + insert_optional( + &mut object, + "codex_subagent_depth", + context.depth.as_deref(), + ); + merge_metadata(metadata, Value::Object(object)) +} + +// Converts the child thread SessionStart into a real subagent start under the parent thread. The +// child session id becomes the subagent id because subsequent gateway/tool events can use that id +// for deterministic ownership. +pub(crate) fn subagent_start_event( + event: &SessionEvent, + context: &SubagentContext, +) -> SubagentEvent { + SubagentEvent { + session_id: context.parent_session_id.clone(), + agent_kind: event.agent_kind, + event_name: event.event_name.clone(), + subagent_id: event.session_id.clone(), + payload: event.payload.clone(), + metadata: merge_metadata( + event.metadata.clone(), + json!({ "codex_subagent_session_id": event.session_id.clone() }), + ), + } +} + +// Creates the routing alias that keeps later child-thread events under the same parent subagent. +// The alias metadata intentionally repeats parent and child thread ids so every rewritten event can +// be debugged without consulting session-manager state. +pub(crate) fn alias_for_child_session( + child_session_id: String, + context: &SubagentContext, +) -> SessionAlias { + SessionAlias::new( + context.parent_session_id.clone(), + child_session_id.clone(), + json!({ + "thread_source": "subagent", + "codex_parent_thread_id": context.parent_session_id.clone(), + "codex_subagent_session_id": child_session_id, + }), + ) +} + +// Copies Codex thread fields from the subagent scope onto LLM spans whenever ownership resolves to +// that subagent. The owner may have been explicit, sticky, inferred from a recent tool, or hinted; +// using the scope metadata gives every path the same debug/filter fields. Local transcript paths +// stay on the subagent scope only; copying them onto every LLM span would broaden filesystem path +// exposure in remote exporters. +pub(crate) fn llm_owner_metadata(scope_metadata: Option<&Value>) -> Value { + let Some(Value::Object(scope_metadata)) = scope_metadata else { + return Value::Null; + }; + let mut metadata = Map::new(); + for key in [ + "thread_source", + "codex_parent_thread_id", + "codex_subagent_session_id", + "codex_subagent_depth", + "agent_nickname", + "agent_role", + ] { + if let Some(value) = scope_metadata.get(key) + && !value.is_null() + { + metadata.insert(key.to_string(), value.clone()); + } + } + if metadata.is_empty() { + Value::Null + } else { + Value::Object(metadata) + } +} + +#[cfg(test)] +#[path = "../../tests/coverage/alignment_codex_tests.rs"] +mod tests; diff --git a/crates/cli/src/alignment/mod.rs b/crates/cli/src/alignment/mod.rs new file mode 100644 index 00000000..0322f6ca --- /dev/null +++ b/crates/cli/src/alignment/mod.rs @@ -0,0 +1,540 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Provider-specific alignment and gateway-normalization helpers for the CLI. +//! +//! The session and gateway modules own generic lifecycle mechanics. This module owns the cases +//! where a coding agent's wire format needs interpretation before those generic mechanics can +//! attach LLMs/tools to the right scope or forward the request to the right upstream. + +use std::collections::HashMap; + +use axum::http::HeaderMap; +use serde_json::{Map, Value, json}; + +use crate::config::header_string; +use crate::model::{AgentKind, LlmEvent, NormalizedEvent, SessionEvent, SubagentEvent, ToolEvent}; + +pub(crate) mod claude_code; +pub(crate) mod codex; + +#[derive(Debug, Clone)] +pub(crate) enum SubagentSessionContext { + Codex(codex::SubagentContext), +} + +impl SubagentSessionContext { + pub(crate) fn parent_session_id(&self) -> &str { + match self { + Self::Codex(context) => &context.parent_session_id, + } + } +} + +// Minimal route taxonomy used by alignment code. The gateway has richer routing state, but these +// variants are the only distinctions provider-specific correlation needs: Codex-owned OpenAI +// Responses, Claude-owned Anthropic endpoints, and other OpenAI-compatible traffic. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub(crate) enum GatewayRouteKind { + OpenAiResponses, + OpenAiChatCompletions, + OpenAiModels, + AnthropicMessages, + AnthropicCountTokens, +} + +// Records that a provider-created child session is really a subagent under another session. The +// session manager stores this until the child emits its terminal AgentEnded event, then removes the +// alias so future unrelated events cannot be reparented through stale state. +#[derive(Debug, Clone)] +pub(crate) struct SessionAlias { + pub(crate) parent_session_id: String, + pub(crate) subagent_id: String, + // Metadata explains why this alias exists and is stamped on rewritten events. Phoenix traces + // then stay filterable/debuggable even after the event has been moved under its parent scope. + metadata: Value, +} + +impl SessionAlias { + // Builds the explicit child-session-to-parent-subagent mapping after an adapter has proven the + // child session is not an independent root agent. + pub(crate) fn new(parent_session_id: String, subagent_id: String, metadata: Value) -> Self { + Self { + parent_session_id, + subagent_id, + metadata, + } + } + + // Returns owned metadata because routing consumes events by value and may need to merge the + // same alias explanation into several lifecycle events before the alias is closed. + pub(crate) fn metadata(&self) -> Value { + self.metadata.clone() + } +} + +#[derive(Debug, Clone)] +pub(crate) struct PendingSubagentStart { + // The original child SessionStart is retained because promotion may happen on a later parent + // hook or gateway request, after this hook request has already returned. + pub(crate) event: SessionEvent, + context: SubagentSessionContext, +} + +impl PendingSubagentStart { + pub(crate) fn parent_session_id(&self) -> &str { + self.context.parent_session_id() + } + + pub(crate) fn subagent_start_event(&self) -> SubagentEvent { + subagent_start_event(&self.event, &self.context) + } + + pub(crate) fn alias_for_child_session(&self, child_session_id: String) -> SessionAlias { + alias_for_child_session(child_session_id, &self.context) + } +} + +// Owns all cross-session correlation state used by the session manager. Keeping aliases and +// pending child starts together makes lifecycle cleanup atomic: any code that removes stale alias +// state also removes the matching pending state before later events can observe a half-updated map. +#[derive(Debug, Default)] +pub(crate) struct SessionAlignmentState { + aliases: HashMap, + pending_subagents: HashMap, +} + +impl SessionAlignmentState { + pub(crate) fn clear(&mut self) { + self.aliases.clear(); + self.pending_subagents.clear(); + } + + pub(crate) fn alias_for_session(&self, session_id: &str) -> Option { + self.aliases.get(session_id).cloned() + } + + #[cfg(test)] + pub(crate) fn has_alias(&self, session_id: &str) -> bool { + self.aliases.contains_key(session_id) + } + + #[cfg(test)] + pub(crate) fn has_pending_session(&self, session_id: &str) -> bool { + self.pending_subagents.contains_key(session_id) + } + + pub(crate) fn pending_for_session(&mut self, session_id: &str) -> Option { + self.pending_subagents.remove(session_id) + } + + pub(crate) fn insert_pending( + &mut self, + child_session_id: String, + pending: PendingSubagentStart, + ) { + self.pending_subagents.insert(child_session_id, pending); + } + + pub(crate) fn remove_pending(&mut self, child_session_id: &str) { + self.pending_subagents.remove(child_session_id); + } + + pub(crate) fn insert_alias(&mut self, child_session_id: String, alias: SessionAlias) { + self.aliases.insert(child_session_id, alias); + } + + pub(crate) fn route_event(&mut self, event: NormalizedEvent) -> NormalizedEvent { + let (event, finished_alias) = route_event_through_alias(event, &self.aliases); + let session_id = event.session_id().to_string(); + if let Some(child_session_id) = finished_alias.as_ref() { + // Remove aliases before terminal skip checks so a late child AgentEnd cannot leave + // stale reparenting state after its parent session has already closed. + self.aliases.remove(child_session_id); + self.pending_subagents.remove(child_session_id); + } + if matches!(&event, NormalizedEvent::AgentEnded(_)) { + self.clear_for_ended_agent(&session_id); + } + event + } + + pub(crate) fn pending_for_parent( + &mut self, + parent_session_id: &str, + ) -> Vec<(String, PendingSubagentStart)> { + let child_session_ids = self + .pending_subagents + .iter() + .filter_map(|(child_session_id, pending)| { + (pending.parent_session_id() == parent_session_id) + .then_some(child_session_id.clone()) + }) + .collect::>(); + child_session_ids + .into_iter() + .filter_map(|child_session_id| { + self.pending_subagents + .remove(&child_session_id) + .map(|pending| (child_session_id, pending)) + }) + .collect() + } + + fn clear_for_ended_agent(&mut self, session_id: &str) { + self.aliases.retain(|child_session_id, alias| { + child_session_id != session_id && alias.parent_session_id != session_id + }); + self.pending_subagents.retain(|child_session_id, pending| { + child_session_id != session_id && pending.parent_session_id() != session_id + }); + } +} + +// Resolves the session id for a gateway request in precedence order: +// explicit NeMo Flow header, agent-native headers, then agent-specific body fallbacks. Keeping the +// provider fallbacks behind one function makes a new agent integration add one small alignment +// adapter instead of threading bespoke checks through gateway request construction. +pub(crate) fn gateway_session_id( + headers: &HeaderMap, + body: &Value, + route: GatewayRouteKind, +) -> Option { + header_string(headers, "x-nemo-flow-session-id") + .or_else(|| claude_code::session_id_from_headers(headers)) + .or_else(|| codex::prompt_cache_session_id(body, route)) +} + +// Gives provider adapters a chance to select an agent-native upstream before the gateway falls +// back to configured provider bases. Codex uses this for ChatGPT OAuth tokens that target the +// ChatGPT backend instead of the public OpenAI API. +pub(crate) fn gateway_upstream_url_override( + headers: &HeaderMap, + route: GatewayRouteKind, + path_and_query: &str, + has_openai_replacement_key: bool, +) -> Option { + codex::chatgpt_upstream_url_if_needed( + headers, + route, + path_and_query, + has_openai_replacement_key, + ) +} + +// Lets provider adapters remove or preserve agent-native auth material before generic provider +// auth injection runs. Codex strips ChatGPT OAuth JWTs only when an OpenAI API key is available to +// replace them. +pub(crate) fn gateway_forward_headers( + headers: &HeaderMap, + route: GatewayRouteKind, + has_openai_replacement_key: bool, +) -> HeaderMap { + codex::strip_chatgpt_oauth_for_openai_route(headers, route, has_openai_replacement_key) +} + +// Reads the explicit subagent header that callers can use when the gateway request already knows +// the target worker scope. Unlike session ids, there is intentionally no body fallback here: +// subagent body fields are provider-specific and easy to confuse with tool-call payload content. +pub(crate) fn gateway_subagent_id(headers: &HeaderMap) -> Option { + header_string(headers, "x-nemo-flow-subagent-id") +} + +// Resolves a correlation identifier from a dedicated header before trying known JSON body paths. +// Header precedence lets callers disambiguate requests even when provider payloads contain stale +// or differently scoped identifiers. +pub(crate) fn gateway_identifier( + headers: &HeaderMap, + body: &Value, + header_name: &'static str, + body_paths: &[&[&str]], +) -> Option { + header_string(headers, header_name).or_else(|| json_string_at(body, body_paths)) +} + +// Infers the owning agent for a session created by a gateway request that beat its SessionStart +// hook. This is the last chance to label the root scope correctly because exporter identities are +// baked when the scope opens. +pub(crate) fn agent_kind_for_gateway_provider(provider: &str) -> AgentKind { + if claude_code::owns_gateway_provider(provider) { + AgentKind::ClaudeCode + } else if codex::owns_gateway_provider(provider) { + AgentKind::Codex + } else { + AgentKind::Gateway + } +} + +// Detects agent harnesses that report a child session which should become a subagent under another +// session. Codex is the first such harness: it starts child threads with parent-thread metadata. +// Future harness-specific detectors should plug in here so the session manager can stay provider +// neutral. +pub(crate) fn subagent_session_context(event: &SessionEvent) -> Option { + codex::subagent_context(event).map(SubagentSessionContext::Codex) +} + +// Converts an AgentStarted event into a pending child-session record when a harness explicitly +// reports parentage. The caller still decides whether the child session is empty enough to promote. +pub(crate) fn pending_subagent_start( + event: &mut NormalizedEvent, +) -> Option<(String, PendingSubagentStart)> { + let NormalizedEvent::AgentStarted(session_event) = event else { + return None; + }; + let context = subagent_session_context(session_event)?; + let child_session_id = session_event.session_id.clone(); + if context.parent_session_id() == child_session_id { + return None; + } + session_event.metadata = + augment_subagent_session_metadata(session_event.metadata.clone(), &context); + Some(( + child_session_id, + PendingSubagentStart { + event: session_event.clone(), + context, + }, + )) +} + +// Lets the owning alignment adapter stamp provider-specific debug fields on a child SessionStart +// before the generic session manager promotes it to a subagent. +pub(crate) fn augment_subagent_session_metadata( + metadata: Value, + context: &SubagentSessionContext, +) -> Value { + match context { + SubagentSessionContext::Codex(context) => { + codex::augment_subagent_metadata(metadata, context) + } + } +} + +// Converts a child SessionStart into the provider-appropriate SubagentStarted event. The session +// manager only knows that a child session should be promoted; the adapter owns how to preserve the +// provider's original metadata. +pub(crate) fn subagent_start_event( + event: &SessionEvent, + context: &SubagentSessionContext, +) -> SubagentEvent { + match context { + SubagentSessionContext::Codex(context) => codex::subagent_start_event(event, context), + } +} + +// Builds the alias used to route later child-session events through the promoted parent/subagent +// pair. The adapter supplies provider-specific metadata explaining why the alias exists. +pub(crate) fn alias_for_child_session( + child_session_id: String, + context: &SubagentSessionContext, +) -> SessionAlias { + match context { + SubagentSessionContext::Codex(context) => { + codex::alias_for_child_session(child_session_id, context) + } + } +} + +// Recovers provider-specific metadata from a subagent scope and copies only the fields that should +// follow LLM spans. Codex contributes thread identifiers today; other harnesses can add filters +// here without changing session ownership code. +pub(crate) fn llm_owner_metadata(scope_metadata: Option<&Value>) -> Value { + codex::llm_owner_metadata(scope_metadata) +} + +// Detects tool results that imply a subagent completed. Claude Code reports this through the +// `Agent` tool today; keeping the check here avoids leaking that tool shape into session teardown. +pub(crate) fn completed_subagent_from_tool(event: &ToolEvent) -> Option { + claude_code::completed_subagent_from_agent_tool(event) +} + +// Routes events from an aliased child session through the parent session/subagent pair. The alias +// records why the child is not a top-level agent; this generic router only rewrites ownership and +// preserves the adapter-supplied metadata for filtering/debugging in Phoenix. +pub(crate) fn route_event_through_alias( + event: NormalizedEvent, + aliases: &HashMap, +) -> (NormalizedEvent, Option) { + let child_session_id = event.session_id().to_string(); + let Some(alias) = aliases.get(&child_session_id).cloned() else { + return (event, None); + }; + let metadata = alias.metadata(); + match event { + NormalizedEvent::AgentStarted(event) => ( + NormalizedEvent::SubagentStarted(SubagentEvent { + session_id: alias.parent_session_id, + agent_kind: event.agent_kind, + event_name: event.event_name, + subagent_id: alias.subagent_id, + payload: event.payload, + metadata: merge_metadata(event.metadata, metadata), + }), + None, + ), + NormalizedEvent::AgentEnded(event) => ( + NormalizedEvent::SubagentEnded(SubagentEvent { + session_id: alias.parent_session_id, + agent_kind: event.agent_kind, + event_name: event.event_name, + subagent_id: alias.subagent_id, + payload: event.payload, + metadata: merge_metadata(event.metadata, metadata), + }), + Some(child_session_id), + ), + NormalizedEvent::TurnEnded(mut event) => { + route_session_event(&mut event, &alias, metadata); + (NormalizedEvent::TurnEnded(event), None) + } + NormalizedEvent::PromptSubmitted(mut event) => { + route_session_event(&mut event, &alias, metadata); + (NormalizedEvent::PromptSubmitted(event), None) + } + NormalizedEvent::Compaction(mut event) => { + route_session_event(&mut event, &alias, metadata); + (NormalizedEvent::Compaction(event), None) + } + NormalizedEvent::Notification(mut event) => { + route_session_event(&mut event, &alias, metadata); + (NormalizedEvent::Notification(event), None) + } + NormalizedEvent::HookMark(mut event) => { + route_session_event(&mut event, &alias, metadata); + (NormalizedEvent::HookMark(event), None) + } + NormalizedEvent::SubagentStarted(mut event) => { + route_subagent_event(&mut event, &alias, metadata); + (NormalizedEvent::SubagentStarted(event), None) + } + NormalizedEvent::SubagentEnded(mut event) => { + route_subagent_event(&mut event, &alias, metadata); + (NormalizedEvent::SubagentEnded(event), None) + } + NormalizedEvent::LlmHint(mut event) => { + event.session_id = alias.parent_session_id; + event.subagent_id = Some(alias.subagent_id); + event.metadata = merge_metadata(event.metadata, metadata); + (NormalizedEvent::LlmHint(event), None) + } + NormalizedEvent::LlmStarted(mut event) => { + route_llm_event(&mut event, &alias, metadata); + (NormalizedEvent::LlmStarted(event), None) + } + NormalizedEvent::LlmEnded(mut event) => { + route_llm_event(&mut event, &alias, metadata); + (NormalizedEvent::LlmEnded(event), None) + } + NormalizedEvent::ToolStarted(mut event) => { + route_tool_event(&mut event, &alias, metadata); + (NormalizedEvent::ToolStarted(event), None) + } + NormalizedEvent::ToolEnded(mut event) => { + route_tool_event(&mut event, &alias, metadata); + (NormalizedEvent::ToolEnded(event), None) + } + } +} + +// Rewrites session-level child events after an alias match. These events still describe the parent +// session's timeline once the child thread is known to be a subagent, so only the session id and +// debug metadata change. +fn route_session_event(event: &mut SessionEvent, alias: &SessionAlias, metadata: Value) { + event.session_id = alias.parent_session_id.clone(); + event.metadata = merge_metadata(event.metadata.clone(), metadata); +} + +// Rewrites nested subagent events that appeared inside an aliased child session. The inner +// subagent id remains intact, while the containing session id moves to the real parent session. +fn route_subagent_event(event: &mut SubagentEvent, alias: &SessionAlias, metadata: Value) { + event.session_id = alias.parent_session_id.clone(); + event.metadata = merge_metadata(event.metadata.clone(), metadata); +} + +// Rewrites hook-originated LLM events from aliased child sessions. `LlmEvent` does not have a +// first-class subagent id field, so the alias owner is stamped into metadata where the session +// manager's hook-LLM path can recover it and choose the subagent scope. +fn route_llm_event(event: &mut LlmEvent, alias: &SessionAlias, metadata: Value) { + event.session_id = alias.parent_session_id.clone(); + event.metadata = merge_metadata( + event.metadata.clone(), + merge_metadata( + metadata, + json!({ + "llm_correlation_status": "session_alias", + "llm_correlation_source": "session_alias", + "llm_correlation_subagent_id": alias.subagent_id.clone(), + }), + ), + ); +} + +// Rewrites tool calls emitted by an aliased child session so they attach under the aliased +// subagent. This is the common case for Codex child-thread tool activity that would otherwise show +// up as root-agent tool calls. +fn route_tool_event(event: &mut ToolEvent, alias: &SessionAlias, metadata: Value) { + event.session_id = alias.parent_session_id.clone(); + event.subagent_id = Some(alias.subagent_id.clone()); + event.metadata = merge_metadata(event.metadata.clone(), metadata); +} + +// Reads the first string-like value from any candidate JSON path. Scalar numbers and booleans are +// accepted for IDs because provider payloads are not always strict about identifier types. +pub(crate) fn json_string_at(payload: &Value, paths: &[&[&str]]) -> Option { + json_value_at(payload, paths) + .and_then(|value| match value { + Value::String(value) => Some(value), + Value::Number(value) => Some(value.to_string()), + Value::Bool(value) => Some(value.to_string()), + _ => None, + }) + .filter(|value| !value.is_empty()) +} + +// Reads the first JSON value from any candidate path. The clone is intentional because extracted +// correlation data must live independently of the provider payload it was read from. +pub(crate) fn json_value_at(payload: &Value, paths: &[&[&str]]) -> Option { + paths.iter().find_map(|path| { + let mut current = payload; + for key in *path { + current = current.get(*key)?; + } + Some(current.clone()) + }) +} + +// Inserts an optional string value into a JSON object while omitting absent fields entirely. This +// keeps correlation metadata compact and avoids serializing nulls as meaningful observations. +pub(crate) fn insert_optional(object: &mut Map, key: &str, value: Option<&str>) { + if let Some(value) = value { + object.insert(key.to_string(), json!(value)); + } +} + +// Merges metadata objects with right-hand values taking precedence and null right-hand fields +// ignored. Non-object values are preserved under separate keys so callers do not lose unusual +// metadata shapes supplied by configuration or hooks. +pub(crate) fn merge_metadata(left: Value, right: Value) -> Value { + match (left, right) { + (Value::Object(mut left), Value::Object(right)) => { + for (key, value) in right { + if !value.is_null() { + left.insert(key, value); + } + } + Value::Object(left) + } + (Value::Null, right) => right, + (left, Value::Null) => left, + (left, right) => { + let mut object = Map::new(); + object.insert("metadata".into(), left); + object.insert("extra_metadata".into(), right); + Value::Object(object) + } + } +} + +#[cfg(test)] +#[path = "../../tests/coverage/alignment_tests.rs"] +mod tests; diff --git a/crates/cli/src/gateway.rs b/crates/cli/src/gateway.rs index b03ade05..46120ffc 100644 --- a/crates/cli/src/gateway.rs +++ b/crates/cli/src/gateway.rs @@ -23,6 +23,7 @@ use nemo_flow::codec::traits::LlmResponseCodec; use nemo_flow::error::FlowError; use serde_json::{Map, Value, json}; +use crate::alignment::{self, GatewayRouteKind}; use crate::config::header_string; use crate::error::CliError; use crate::server::AppState; @@ -30,12 +31,6 @@ use crate::session::{GatewayCallPrep, LlmGatewayStart}; const MAX_BODY_BYTES: usize = 100 * 1024 * 1024; -// ChatGPT backend base URL used by Codex when authenticated with ChatGPT-Plus OAuth. Mirrors the -// `CHATGPT_CODEX_BASE_URL` constant in `codex-rs/model-provider-info/src/lib.rs`, which Codex -// selects in `ModelProviderInfo::to_api_provider` when `auth_mode` is `Chatgpt` or -// `ChatgptAuthTokens`. The standard `api.openai.com/v1` base is used for API-key auth instead. -const CHATGPT_CODEX_BASE_URL: &str = "https://chatgpt.com/backend-api/codex"; - /// Proxies supported LLM API requests through NeMo Flow's managed execution pipeline. /// /// The gateway buffers the inbound body once, opens a managed LLM call against the resolved @@ -92,11 +87,8 @@ async fn prepare_gateway_request( .path_and_query() .map(|p| p.as_str()) .unwrap_or(parts.uri.path()); - let upstream_url = if should_use_chatgpt_backend(provider, &parts.headers) { - chatgpt_upstream_url(path_and_query) - } else { - provider.upstream_url(config, path_and_query) - }; + let upstream_url = gateway_upstream_url_override(provider, &parts.headers, path_and_query) + .unwrap_or_else(|| provider.upstream_url(config, path_and_query)); let streaming = request_json .get("stream") .and_then(Value::as_bool) @@ -115,15 +107,23 @@ async fn prepare_gateway_request( // Builds the [`LlmGatewayStart`] payload from a prepared request. Identifier resolution is shared // across streaming and non-streaming paths so correlation behavior is consistent for every route. +// Provider-specific fallbacks are resolved here, before request execution leaves the gateway path, +// because the later runtime-managed LLM call only sees this normalized start payload. fn build_llm_gateway_start(request: &PreparedGatewayRequest) -> LlmGatewayStart { LlmGatewayStart { - session_id: gateway_session_id(&request.headers), + // Explicit NeMo Flow headers still win, but alignment can recover agent-native session + // signals when available. Applies to Claude Code's session header and Codex's Responses + // prompt-cache thread id today. + session_id: gateway_session_id(&request.headers, &request.request_json, request.provider), provider: request.provider.name().to_string(), model_name: request .request_json .get("model") .and_then(Value::as_str) .map(ToOwned::to_owned), + // Subagent ownership is intentionally header-only at the gateway layer. Body fields can be + // provider payload content rather than scope identity, so the session layer handles other + // ownership hints. subagent_id: gateway_subagent_id(&request.headers), conversation_id: gateway_identifier( &request.headers, @@ -152,6 +152,8 @@ fn build_llm_gateway_start(request: &PreparedGatewayRequest) -> LlmGatewayStart &["metadata", "request_id"], ], ) + // Preserve a transport request id as a weak fallback for debugging even when the provider + // body does not expose an LLM request id. .or_else(|| header_string(&request.headers, "x-request-id")), request: LlmRequest { headers: observable_headers(&request.headers), @@ -545,9 +547,8 @@ fn encode_sse_frame(event_json: &Value, route: ProviderRoute) -> String { } // Forwards the buffered request to the upstream provider with only the safe request headers. This -// is shared by the buffered and streaming managed funcs so header filtering stays consistent. When -// `OPENAI_API_KEY` is set the gateway replaces any inbound ChatGPT-Plus OAuth JWT with the env -// key; otherwise the inbound credentials are forwarded as-is. +// is shared by the buffered and streaming managed funcs so header filtering stays consistent. +// Agent-native credential quirks are normalized by alignment before provider auth injection runs. async fn forward_upstream_request( http: &reqwest::Client, method: &Method, @@ -556,17 +557,7 @@ async fn forward_upstream_request( headers: &HeaderMap, route: ProviderRoute, ) -> Result { - // Only strip the inbound JWT when we actually have a replacement key to inject. Without one - // the upstream just receives no auth and 401s, which is no better than letting it reject the - // JWT itself — and stripping silently can break setups that point the gateway at an upstream - // that happens to accept the ChatGPT-Plus token. - // Whitespace-only keys are effectively missing: stripping the inbound JWT and injecting an - // empty/whitespace bearer just trades one 401 for another while losing observability. - let has_openai_env = std::env::var("OPENAI_API_KEY") - .ok() - .filter(|v| !v.trim().is_empty()) - .is_some(); - let sanitized = strip_chatgpt_oauth_for_openai_route(headers, route, has_openai_env); + let sanitized = gateway_forward_headers(headers, route); let mut upstream = http.request(method.clone(), url).body(body_bytes.clone()); for (name, value) in &sanitized { if should_forward_request_header(name) { @@ -577,82 +568,10 @@ async fn forward_upstream_request( upstream.send().await } -// Builds the upstream URL for the ChatGPT backend. OpenAI API bases commonly include `/v1`, while -// the ChatGPT backend base is -// `chatgpt.com/backend-api/codex` (no `/v1`). Both append `/responses` to their base, so the -// ChatGPT path is `.../codex/responses`, not `.../codex/v1/responses`. Strip any `/v1` prefix -// that the gateway's route matcher may have included from the inbound request path. -fn chatgpt_upstream_url(path_and_query: &str) -> String { - let path = path_and_query.strip_prefix("/v1").unwrap_or(path_and_query); - format!("{CHATGPT_CODEX_BASE_URL}{path}") -} - -// Returns `true` when the `Authorization` header carries a JWT-shaped bearer token (`Bearer eyJ` -// prefix). Codex stores ChatGPT-Plus OAuth tokens in `~/.codex/auth.json` as a `TokenData` -// struct with `access_token`, `refresh_token`, and `id_token` fields (see -// `codex-rs/login/src/token_data.rs`). The access token is a JWT whose base64 header starts -// with `eyJ`. Real `sk-...` API keys and opaque tokens do not match this pattern. -fn has_chatgpt_jwt(headers: &HeaderMap) -> bool { - headers - .get(http::header::AUTHORIZATION) - .and_then(|value| value.to_str().ok()) - .is_some_and(|value| value.starts_with("Bearer eyJ")) -} - -// Returns `true` when the gateway should route the request to the ChatGPT backend -// (`chatgpt.com/backend-api/codex`) instead of the configured `openai_base_url`. Mirrors the -// base-URL selection in Codex's `ModelProviderInfo::to_api_provider` (`codex-rs/model-provider- -// info/src/lib.rs`): ChatGPT OAuth routes to `CHATGPT_CODEX_BASE_URL`, API-key auth routes to -// `api.openai.com/v1`. Fires when all of: (1) the route is OpenAI-family, (2) the inbound -// request carries a ChatGPT OAuth JWT, and (3) no `OPENAI_API_KEY` is available to substitute. -fn should_use_chatgpt_backend(route: ProviderRoute, headers: &HeaderMap) -> bool { - route.is_openai() - && has_chatgpt_jwt(headers) - && std::env::var("OPENAI_API_KEY") - .ok() - .filter(|v| !v.trim().is_empty()) - .is_none() -} - -// Removes JWT-shaped bearer tokens from inbound `Authorization` on OpenAI routes when we have a -// replacement `OPENAI_API_KEY` to inject. Codex's `BearerAuthProvider` (`codex-rs/model-provider/ -// src/bearer_auth_provider.rs`) always sets an `Authorization: Bearer ` header — either -// the ChatGPT OAuth access token (a JWT starting `eyJ`) or a plain API key (`sk-...`). When -// `OPENAI_API_KEY` is set, the gateway strips the JWT so `inject_provider_auth` can substitute -// the env key for the `api.openai.com` route. When the key is absent, the JWT is preserved and -// `should_use_chatgpt_backend` routes to the ChatGPT backend. Real `sk-...` keys are unaffected. -fn strip_chatgpt_oauth_for_openai_route( - headers: &HeaderMap, - route: ProviderRoute, - has_replacement_key: bool, -) -> HeaderMap { - if !matches!( - route, - ProviderRoute::OpenAiResponses - | ProviderRoute::OpenAiChatCompletions - | ProviderRoute::OpenAiModels - ) || !has_replacement_key - { - return headers.clone(); - } - let mut out = headers.clone(); - let looks_like_jwt = out - .get(http::header::AUTHORIZATION) - .and_then(|value| value.to_str().ok()) - .map(|value| value.starts_with("Bearer eyJ")) - .unwrap_or(false); - if looks_like_jwt { - out.remove(http::header::AUTHORIZATION); - } - out -} - // If the inbound request has no provider auth header (Authorization / x-api-key / api-key), read -// the provider's standard API key env var and attach it to the outbound request. When codex sends -// its ChatGPT-Plus OAuth JWT the gateway forwards it unless `OPENAI_API_KEY` is set, in which case -// `strip_chatgpt_oauth_for_openai_route` removes the JWT first and this function injects the env -// key. If neither inbound auth nor the env var is present, the request is forwarded as-is and the -// upstream returns a real 401 (caller can detect and surface). +// the provider's standard API key env var and attach it to the outbound request. Alignment may +// have already normalized agent-native auth material; this function remains provider-generic and +// only handles standard upstream auth injection. fn inject_provider_auth( builder: reqwest::RequestBuilder, route: ProviderRoute, @@ -772,18 +691,9 @@ pub(crate) async fn models( .path_and_query() .map(|p| p.as_str()) .unwrap_or(parts.uri.path()); - let upstream_url = if should_use_chatgpt_backend(provider, &parts.headers) { - chatgpt_upstream_url(path_and_query) - } else { - provider.upstream_url(&state.config, path_and_query) - }; - // Whitespace-only keys are effectively missing: stripping the inbound JWT and injecting an - // empty/whitespace bearer just trades one 401 for another while losing observability. - let has_openai_env = std::env::var("OPENAI_API_KEY") - .ok() - .filter(|v| !v.trim().is_empty()) - .is_some(); - let sanitized = strip_chatgpt_oauth_for_openai_route(&parts.headers, provider, has_openai_env); + let upstream_url = gateway_upstream_url_override(provider, &parts.headers, path_and_query) + .unwrap_or_else(|| provider.upstream_url(&state.config, path_and_query)); + let sanitized = gateway_forward_headers(&parts.headers, provider); let mut upstream = state.http.get(upstream_url); for (name, value) in &sanitized { if should_forward_request_header(name) { @@ -824,13 +734,6 @@ impl ProviderRoute { } } - const fn is_openai(self) -> bool { - matches!( - self, - Self::OpenAiResponses | Self::OpenAiChatCompletions | Self::OpenAiModels - ) - } - // Returns the provider route name recorded in LLM event metadata. These names split OpenAI API // variants because their request/response schemas differ even when they share a base URL. const fn name(self) -> &'static str { @@ -858,8 +761,8 @@ impl ProviderRoute { self.upstream_url_with_base(base, path_and_query) } - // Like `upstream_url` but with an explicit base URL. Used by the ChatGPT OAuth fallback path - // which routes to `CHATGPT_CODEX_BASE_URL` instead of the configured `openai_base_url`. + // Like `upstream_url` but with an explicit base URL. This keeps OpenAI `/v1` normalization in + // one place for configured public, enterprise, or local proxy bases. fn upstream_url_with_base(self, base: &str, path_and_query: &str) -> String { let base = base.trim_end_matches('/'); let path_and_query = match self { @@ -870,6 +773,19 @@ impl ProviderRoute { }; format!("{base}{path_and_query}") } + + // Narrows gateway routing to the smaller taxonomy used by trace alignment. Keeping this + // conversion here prevents provider-specific alignment code from depending on gateway URL + // routing internals. + const fn alignment_route(self) -> GatewayRouteKind { + match self { + Self::OpenAiResponses => GatewayRouteKind::OpenAiResponses, + Self::OpenAiChatCompletions => GatewayRouteKind::OpenAiChatCompletions, + Self::OpenAiModels => GatewayRouteKind::OpenAiModels, + Self::AnthropicMessages => GatewayRouteKind::AnthropicMessages, + Self::AnthropicCountTokens => GatewayRouteKind::AnthropicCountTokens, + } + } } fn normalize_openai_path_for_base(base: &str, path_and_query: &str) -> String { @@ -883,47 +799,81 @@ fn normalize_openai_path_for_base(base: &str, path_and_query: &str) -> String { } } -// Reads the gateway session id from explicit gateway headers first, with Claude's session header -// accepted for compatibility with Claude Code environments that already propagate it. -fn gateway_session_id(headers: &HeaderMap) -> Option { - header_string(headers, "x-nemo-flow-session-id") - .or_else(|| header_string(headers, "x-claude-code-session-id")) +// Gives alignment adapters a chance to choose an agent-native upstream before default provider +// routing runs. Today this supports Codex ChatGPT OAuth; future harness fallbacks should stay in +// alignment rather than adding provider-shaped checks here. +fn gateway_upstream_url_override( + route: ProviderRoute, + headers: &HeaderMap, + path_and_query: &str, +) -> Option { + gateway_upstream_url_override_with_openai_key_state( + route, + headers, + path_and_query, + env_var_is_nonempty("OPENAI_API_KEY"), + ) +} + +fn gateway_upstream_url_override_with_openai_key_state( + route: ProviderRoute, + headers: &HeaderMap, + path_and_query: &str, + has_openai_replacement_key: bool, +) -> Option { + alignment::gateway_upstream_url_override( + headers, + route.alignment_route(), + path_and_query, + has_openai_replacement_key, + ) +} + +// Lets alignment adapters normalize agent-native credentials before the gateway injects standard +// provider API keys. Whitespace-only env vars are treated as missing because forwarding an empty +// bearer value only replaces one authentication failure with another. +fn gateway_forward_headers(headers: &HeaderMap, route: ProviderRoute) -> HeaderMap { + gateway_forward_headers_with_openai_key_state( + headers, + route, + env_var_is_nonempty("OPENAI_API_KEY"), + ) +} + +fn gateway_forward_headers_with_openai_key_state( + headers: &HeaderMap, + route: ProviderRoute, + has_openai_replacement_key: bool, +) -> HeaderMap { + alignment::gateway_forward_headers(headers, route.alignment_route(), has_openai_replacement_key) +} + +fn env_var_is_nonempty(name: &str) -> bool { + std::env::var(name) + .ok() + .filter(|value| !value.trim().is_empty()) + .is_some() +} + +// Delegates provider-specific session fallbacks to `alignment` so request construction stays +// generic and each coding-agent quirk has one documented adapter. +fn gateway_session_id(headers: &HeaderMap, body: &Value, route: ProviderRoute) -> Option { + alignment::gateway_session_id(headers, body, route.alignment_route()) } fn gateway_subagent_id(headers: &HeaderMap) -> Option { - header_string(headers, "x-nemo-flow-subagent-id") + alignment::gateway_subagent_id(headers) } -// Resolves a correlation identifier from a dedicated header before trying known JSON body paths. -// Header precedence lets callers disambiguate requests even when provider payloads contain stale -// or differently scoped identifiers. +// Keeps the gateway-facing helper local for tests while the generic extraction pattern lives in +// `alignment`. fn gateway_identifier( headers: &HeaderMap, body: &Value, header_name: &'static str, body_paths: &[&[&str]], ) -> Option { - header_string(headers, header_name).or_else(|| { - body_paths - .iter() - .find_map(|path| string_at(body, path)) - .filter(|value| !value.is_empty()) - }) -} - -// Reads nested JSON as a string, accepting scalar numeric and boolean forms for provider metadata -// fields that are not consistently serialized as strings. Arrays and objects are ignored. -fn string_at(payload: &Value, path: &[&str]) -> Option { - let mut current = payload; - for key in path { - current = current.get(*key)?; - } - match current { - Value::String(value) => Some(value.clone()), - Value::Number(value) => Some(value.to_string()), - Value::Bool(value) => Some(value.to_string()), - _ => None, - } + alignment::gateway_identifier(headers, body, header_name, body_paths) } // Copies only non-sensitive, forwardable request headers into LLM request metadata. This preserves diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index 8f723059..90dfde9a 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -4,6 +4,7 @@ //! NeMo Flow coding-agent gateway CLI. mod adapters; +mod alignment; mod banner; mod completions_install; mod config; diff --git a/crates/cli/src/session.rs b/crates/cli/src/session.rs index e5f1105f..38de4e26 100644 --- a/crates/cli/src/session.rs +++ b/crates/cli/src/session.rs @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::sync::Arc; use std::time::{Duration, Instant}; @@ -9,7 +9,9 @@ use axum::http::HeaderMap; use nemo_flow::api::llm::{ LlmAttributes, LlmCallEndParams, LlmCallParams, LlmHandle, LlmRequest, llm_call, llm_call_end, }; -use nemo_flow::api::runtime::{ScopeStackHandle, TASK_SCOPE_STACK, create_scope_stack}; +use nemo_flow::api::runtime::{ + ScopeStackHandle, TASK_SCOPE_STACK, create_scope_stack, task_scope_push, +}; use nemo_flow::api::scope::{ EmitMarkEventParams, PopScopeParams, PushScopeParams, ScopeHandle, ScopeType, event as emit_mark_event, get_handle, pop_scope, push_scope, @@ -20,6 +22,10 @@ use nemo_flow::api::tool::{ use serde_json::{Map, Value, json}; use tokio::sync::Mutex; +use crate::alignment::{ + self, PendingSubagentStart, SessionAlias, SessionAlignmentState, insert_optional, + json_string_at, json_value_at, merge_metadata, +}; use crate::config::{GatewayConfig, SessionConfig}; use crate::error::CliError; use crate::model::{ @@ -33,6 +39,10 @@ const LAST_OWNER_TTL: Duration = Duration::from_secs(300); #[derive(Clone)] pub(crate) struct SessionManager { inner: Arc>>, + // Cross-session alignment state owns child-session aliases and child-first SessionStart hooks. + // Applies to Codex child threads today; the generic state lives in `alignment` so session code + // only orchestrates when promotion is safe. + alignment: Arc>, default_config: GatewayConfig, } @@ -87,7 +97,13 @@ struct Session { scope_stack: ScopeStackHandle, agent_scope: Option, subagents: HashMap, + // Each active subagent gets its own scope stack seeded with the parent agent handle. This lets + // sibling workers close out of order without corrupting the task-local stack. + subagent_stacks: HashMap, subagent_stack: Vec, + // Tracks subagents closed by synthetic or provider-specific completion signals so a later + // duplicate end hook does not reopen or mark an already-closed worker. + completed_subagents: HashSet, llms: HashMap, tools: HashMap, pending_llm_hints: Vec, @@ -119,8 +135,36 @@ struct ToolHint { #[derive(Debug, Clone)] struct LastLlmOwner { - subagent_id: Option, + subagent_id: String, updated_at: Instant, + // The source is exported in correlation metadata, which makes sticky ownership easier to audit + // in Phoenix when explicit gateway headers are absent. + source: LastLlmOwnerSource, +} + +#[derive(Debug, Clone, Copy)] +enum LastLlmOwnerSource { + Llm, + Tool, + SubagentStart, +} + +impl LastLlmOwnerSource { + const fn status(self) -> &'static str { + match self { + Self::Llm => "sticky_last_owner", + Self::Tool => "recent_tool_owner", + Self::SubagentStart => "subagent_start", + } + } + + const fn metadata_source(self) -> Option<&'static str> { + match self { + Self::Llm => None, + Self::Tool => Some("tool_owner"), + Self::SubagentStart => Some("subagent_start"), + } + } } struct LlmOwnerResolution { @@ -129,6 +173,7 @@ struct LlmOwnerResolution { status: &'static str, source: Option, hint: Option, + metadata: Value, } struct ToolOwnerResolution { @@ -147,6 +192,7 @@ impl SessionManager { pub(crate) fn new(default_config: GatewayConfig) -> Self { Self { inner: Arc::new(Mutex::new(HashMap::new())), + alignment: Arc::new(Mutex::new(SessionAlignmentState::default())), default_config, } } @@ -168,30 +214,50 @@ impl SessionManager { headers: &HeaderMap, events: Vec, ) -> Result<(), CliError> { + let mut alignment_state = self.alignment.lock().await; let mut sessions = self.inner.lock().await; for event in events { + let mut event = event; + let config = self.default_config.session_config_from_headers(headers); + if queue_or_promote_child_start( + &mut event, + &mut sessions, + &mut alignment_state, + config.clone(), + ) + .await? + { + continue; + } + + let event = alignment_state.route_event(event); let session_id = event.session_id().to_string(); + let is_agent_started = matches!(&event, NormalizedEvent::AgentStarted(_)); if event.is_terminal() && !sessions.contains_key(&session_id) { continue; } - let config = self.default_config.session_config_from_headers(headers); let event_kind = event_agent_kind(&event); - let session = sessions - .entry(session_id.clone()) - .or_insert_with(|| Session::new(session_id.clone(), event_kind, config.clone())); - if matches!(&event, NormalizedEvent::AgentStarted(_)) - && session.agent_kind == AgentKind::Gateway - && event_kind != AgentKind::Gateway - { - session.agent_kind = event_kind; + let should_remove_session = apply_event_to_session( + &mut sessions, + &session_id, + event, + event_kind, + config.clone(), + is_agent_started, + ) + .await?; + if is_agent_started { + // A just-opened parent may unlock one or more child SessionStart hooks that arrived + // earlier in this batch or an earlier request. + promote_pending_subagents_for_parent( + &mut sessions, + &mut alignment_state, + &session_id, + config.clone(), + ) + .await?; } - session.apply(event).await?; - if session.agent_scope.is_none() - && session.subagents.is_empty() - && session.subagent_stack.is_empty() - && session.llms.is_empty() - && session.tools.is_empty() - { + if should_remove_session { sessions.remove(&session_id); } } @@ -217,18 +283,25 @@ impl SessionManager { headers: &HeaderMap, start: LlmGatewayStart, ) -> Result { - let mut sessions = self.inner.lock().await; + let mut start = start; let config = self.default_config.session_config_from_headers(headers); + let alias = self.resolve_start_alias(&mut start, config.clone()).await?; + let mut sessions = self.inner.lock().await; let session_id = start .session_id .clone() .or_else(|| single_active_session_id(&sessions)) .unwrap_or_else(|| format!("{}-gateway", AgentKind::Gateway.as_str())); - let inferred_agent_kind = agent_kind_for_gateway_provider(&start.provider); + let inferred_agent_kind = alignment::agent_kind_for_gateway_provider(&start.provider); let session = sessions .entry(session_id.clone()) .or_insert_with(|| Session::new(session_id, inferred_agent_kind, config)); - session.start_llm(start).await + let mut active = session.start_llm(start).await?; + if let Some(alias) = alias { + active.session_id = alias.parent_session_id; + active.owner_subagent_id = active.owner_subagent_id.or(Some(alias.subagent_id)); + } + Ok(active) } /// Prepares a managed LLM execution against the right session and scope context. @@ -246,8 +319,10 @@ impl SessionManager { headers: &HeaderMap, start: LlmGatewayStart, ) -> Result { - let mut sessions = self.inner.lock().await; + let mut start = start; let config = self.default_config.session_config_from_headers(headers); + self.resolve_start_alias(&mut start, config.clone()).await?; + let mut sessions = self.inner.lock().await; let session_id = start .session_id .clone() @@ -256,7 +331,7 @@ impl SessionManager { // Match `start_llm`: when this path creates a brand-new session (real agent's gateway // request beats its SessionStart hook), label the session by the provider so ATIF and // Phoenix scopes carry the agent identity instead of freezing on "gateway". - let inferred_agent_kind = agent_kind_for_gateway_provider(&start.provider); + let inferred_agent_kind = alignment::agent_kind_for_gateway_provider(&start.provider); let session = sessions .entry(session_id.clone()) .or_insert_with(|| Session::new(session_id, inferred_agent_kind, config)); @@ -319,18 +394,30 @@ impl SessionManager { owner_subagent_id: Option, response: Value, ) { + let alias = { + let alignment_state = self.alignment.lock().await; + alignment_state.alias_for_session(session_id) + }; + let (session_id, owner_subagent_id) = match alias { + Some(alias) => ( + alias.parent_session_id, + owner_subagent_id.or(Some(alias.subagent_id)), + ), + None => (session_id.to_string(), owner_subagent_id), + }; let mut sessions = self.inner.lock().await; - if let Some(session) = sessions.get_mut(session_id) { + if let Some(session) = sessions.get_mut(&session_id) { session.add_tool_hints_from_llm_response(response, owner_subagent_id); } } /// Closes every still-open session before gateway teardown. /// - /// Codex transparent runs can exit without a native `SessionEnd` hook. Gateway shutdown is the - /// last deterministic lifecycle boundary for those sessions, so close open scopes while - /// observability plugins are still active. + /// Some harnesses can exit without a native `SessionEnd` hook. Gateway shutdown is the last + /// deterministic lifecycle boundary for those sessions, so close open scopes while + /// observability plugins are still active. Applies to Codex transparent runs today. pub(crate) async fn close_all(&self, reason: &str) -> Result<(), CliError> { + self.alignment.lock().await.clear(); let mut sessions = { let mut guard = self.inner.lock().await; guard @@ -340,6 +427,173 @@ impl SessionManager { }; close_sessions_for_shutdown(&mut sessions, reason).await } + + // Applies known or pending child-session aliases before the gateway chooses a session. This is + // deliberately before the `inner` lock in `start_llm`/`prepare_gateway_call` so a child-thread + // gateway request can promote its pending parent/subagent relationship instead of creating a + // synthetic child root. + async fn resolve_start_alias( + &self, + start: &mut LlmGatewayStart, + config: SessionConfig, + ) -> Result, CliError> { + let Some(session_id) = start.session_id.clone() else { + return Ok(None); + }; + let mut alignment_state = self.alignment.lock().await; + if let Some(alias) = alignment_state.alias_for_session(&session_id) { + apply_start_alias(start, &alias); + return Ok(Some(alias)); + } + let Some(pending) = alignment_state.pending_for_session(&session_id) else { + return Ok(None); + }; + let mut sessions = self.inner.lock().await; + let alias = promote_pending_subagent( + &mut sessions, + &mut alignment_state, + session_id, + pending, + config, + ) + .await?; + if let Some(alias) = alias.as_ref() { + apply_start_alias(start, alias); + } + Ok(alias) + } +} + +// Mutates a gateway LLM start in place after alias resolution. The parent session id is what the +// runtime session manager should open, while the subagent id and alias metadata preserve the child +// thread as the LLM owner. +fn apply_start_alias(start: &mut LlmGatewayStart, alias: &SessionAlias) { + start.session_id = Some(alias.parent_session_id.clone()); + if start.subagent_id.is_none() { + start.subagent_id = Some(alias.subagent_id.clone()); + } + start.metadata = merge_metadata(start.metadata.clone(), alias.metadata()); +} + +// Handles child SessionStart events before normal per-session dispatch. Some harnesses advertise a +// parent session on SessionStart; when the child is still empty, queue or promote that start as a +// subagent instead of letting it open a new root trace. Applies to Codex child threads today. +async fn queue_or_promote_child_start( + event: &mut NormalizedEvent, + sessions: &mut HashMap, + alignment_state: &mut SessionAlignmentState, + config: SessionConfig, +) -> Result { + let Some((child_session_id, pending)) = alignment::pending_subagent_start(event) else { + return Ok(false); + }; + if sessions + .get(&child_session_id) + .is_some_and(|session| !session.can_reparent_as_subagent_alias()) + { + return Ok(false); + } + if sessions.contains_key(pending.parent_session_id()) { + alignment_state.remove_pending(&child_session_id); + promote_pending_subagent(sessions, alignment_state, child_session_id, pending, config) + .await?; + } else { + // Child-first ordering is possible for harness-managed children. Drop any empty child + // placeholder and wait until the parent hook or a gateway LLM forces promotion. Applies to + // Codex transparent runs today. + sessions.remove(&child_session_id); + alignment_state.insert_pending(child_session_id, pending); + } + Ok(true) +} + +async fn apply_event_to_session( + sessions: &mut HashMap, + session_id: &str, + event: NormalizedEvent, + event_kind: AgentKind, + config: SessionConfig, + is_agent_started: bool, +) -> Result { + let session = sessions + .entry(session_id.to_string()) + .or_insert_with(|| Session::new(session_id.to_string(), event_kind, config)); + if is_agent_started + && session.agent_kind == AgentKind::Gateway + && event_kind != AgentKind::Gateway + { + session.agent_kind = event_kind; + } + session.apply(event).await?; + Ok(session.is_empty()) +} + +// Promotes all child SessionStart hooks that were waiting on a newly opened parent. Multiple +// children can wait for the same parent when parallel harness-managed subagents start before the +// root hook is observed. Applies to Codex child threads today. +async fn promote_pending_subagents_for_parent( + sessions: &mut HashMap, + alignment_state: &mut SessionAlignmentState, + parent_session_id: &str, + config: SessionConfig, +) -> Result<(), CliError> { + for (child_session_id, pending) in alignment_state.pending_for_parent(parent_session_id) { + promote_pending_subagent( + sessions, + alignment_state, + child_session_id, + pending, + config.clone(), + ) + .await?; + } + Ok(()) +} + +// Converts one pending child SessionStart into a parent-owned subagent and installs the alias used +// by later child-session events. If the child session gained real activity while pending, promotion +// is skipped rather than moving existing LLM/tool handles across scopes. Applies to Codex child +// threads today. +async fn promote_pending_subagent( + sessions: &mut HashMap, + alignment_state: &mut SessionAlignmentState, + child_session_id: String, + pending: PendingSubagentStart, + config: SessionConfig, +) -> Result, CliError> { + if sessions + .get(&child_session_id) + .is_some_and(|session| !session.can_reparent_as_subagent_alias()) + { + return Ok(None); + } + sessions.remove(&child_session_id); + let parent_session_id = pending.parent_session_id().to_string(); + let parent_session = sessions + .entry(parent_session_id.clone()) + .or_insert_with(|| { + Session::new(parent_session_id.clone(), pending.event.agent_kind, config) + }); + if parent_session.agent_scope.is_none() { + // Gateway traffic can be the first signal that forces promotion. In that case, synthesize a + // minimal parent agent scope so the subagent still has a valid runtime parent. + parent_session + .apply(NormalizedEvent::AgentStarted(SessionEvent { + session_id: parent_session_id, + agent_kind: pending.event.agent_kind, + event_name: "implicit_parent_for_aligned_subagent".into(), + payload: Value::Null, + metadata: Value::Null, + })) + .await?; + } + let subagent_event = pending.subagent_start_event(); + parent_session + .apply(NormalizedEvent::SubagentStarted(subagent_event)) + .await?; + let alias = pending.alias_for_child_session(child_session_id.clone()); + alignment_state.insert_alias(child_session_id, alias.clone()); + Ok(Some(alias)) } async fn close_sessions_for_shutdown( @@ -368,7 +622,9 @@ impl Session { scope_stack: create_scope_stack(), agent_scope: None, subagents: HashMap::new(), + subagent_stacks: HashMap::new(), subagent_stack: Vec::new(), + completed_subagents: HashSet::new(), llms: HashMap::new(), tools: HashMap::new(), pending_llm_hints: Vec::new(), @@ -378,6 +634,22 @@ impl Session { } } + // A child session can only be converted into a subagent before any real scope, LLM, or tool + // state has been opened for it. Once work exists under the child, reparenting would move only + // future events and leave an inconsistent trace. + fn can_reparent_as_subagent_alias(&self) -> bool { + self.is_empty() + } + + fn is_empty(&self) -> bool { + self.agent_scope.is_none() + && self.subagents.is_empty() + && self.subagent_stacks.is_empty() + && self.subagent_stack.is_empty() + && self.llms.is_empty() + && self.tools.is_empty() + } + // Runs one normalized hook event inside this session's scope stack. Dispatch stays synchronous // inside the scoped closure so lifecycle ordering from each hook request is preserved exactly. async fn apply(&mut self, event: NormalizedEvent) -> Result<(), CliError> { @@ -386,15 +658,15 @@ impl Session { .scope(stack, async move { match event { NormalizedEvent::AgentStarted(event) => self.start_agent(event), - NormalizedEvent::AgentEnded(event) => self.end_agent(event), + NormalizedEvent::AgentEnded(event) => self.end_agent(event).await, NormalizedEvent::TurnEnded(_) => Ok(()), - NormalizedEvent::SubagentStarted(event) => self.start_subagent(event), - NormalizedEvent::SubagentEnded(event) => self.end_subagent(event), + NormalizedEvent::SubagentStarted(event) => self.start_subagent(event).await, + NormalizedEvent::SubagentEnded(event) => self.end_subagent(event).await, NormalizedEvent::LlmHint(event) => self.add_llm_hint(event), NormalizedEvent::LlmStarted(event) => self.start_hook_llm(event), NormalizedEvent::LlmEnded(event) => self.end_hook_llm(event), NormalizedEvent::ToolStarted(event) => self.start_tool(event), - NormalizedEvent::ToolEnded(event) => self.end_tool(event), + NormalizedEvent::ToolEnded(event) => self.end_tool(event).await, NormalizedEvent::PromptSubmitted(event) => self.mark("prompt_submitted", event), NormalizedEvent::Compaction(event) => self.mark("compaction", event), NormalizedEvent::Notification(event) => self.mark("notification", event), @@ -417,12 +689,15 @@ impl Session { attributes |= LlmAttributes::STREAMING; } let owner = self.resolve_llm_owner(&start); - let metadata = llm_correlation_metadata( - start.metadata, - owner.status, - owner.source.as_deref(), - owner.subagent_id.as_deref(), - owner.hint.as_ref(), + let metadata = merge_metadata( + llm_correlation_metadata( + start.metadata, + owner.status, + owner.source.as_deref(), + owner.subagent_id.as_deref(), + owner.hint.as_ref(), + ), + owner.metadata, ); let handle = llm_call( LlmCallParams::builder() @@ -464,12 +739,15 @@ impl Session { attributes |= LlmAttributes::STREAMING; } let owner = self.resolve_llm_owner(&start); - let metadata = llm_correlation_metadata( - start.metadata, - owner.status, - owner.source.as_deref(), - owner.subagent_id.as_deref(), - owner.hint.as_ref(), + let metadata = merge_metadata( + llm_correlation_metadata( + start.metadata, + owner.status, + owner.source.as_deref(), + owner.subagent_id.as_deref(), + owner.hint.as_ref(), + ), + owner.metadata, ); Ok(GatewayCallPrep { scope_stack: stack, @@ -525,7 +803,7 @@ impl Session { // Closes the session in a fail-safe order: active LLMs/tools first, nested subagents from the // top down, correlation state, then the root agent scope. - fn end_agent(&mut self, event: SessionEvent) -> Result<(), CliError> { + async fn end_agent(&mut self, event: SessionEvent) -> Result<(), CliError> { // Duplicate agent-end hooks (e.g., hermes-agent emitting `on_session_end` more than once // per session) must not reopen the agent scope. if self.agent_scope.is_none() { @@ -534,7 +812,7 @@ impl Session { self.ensure_agent_started(event.metadata.clone())?; self.close_active_llms_for_agent_end()?; self.close_active_tools_for_agent_end()?; - self.close_active_subagents_for_agent_end()?; + self.close_active_subagents_for_agent_end().await?; self.clear_correlation_state(); self.close_agent_scope(event.payload)?; Ok(()) @@ -550,7 +828,7 @@ impl Session { } self.close_active_llms_for_agent_end()?; self.close_active_tools_for_agent_end()?; - self.close_active_subagents_for_agent_end()?; + self.close_active_subagents_for_agent_end().await?; self.clear_correlation_state(); self.close_agent_scope(payload)?; Ok(()) @@ -589,20 +867,16 @@ impl Session { Ok(()) } - // Pops active subagent scopes in stack order so nested subagents close from child to parent. The - // map is cleared afterward to discard any out-of-order stale handles not present in the stack. - fn close_active_subagents_for_agent_end(&mut self) -> Result<(), CliError> { + // Pops active subagent scopes in reverse start order. Each subagent owns an independent runtime + // stack so parallel harness workers can still close cleanly when their completion hooks arrive + // out of order. Applies to Claude Code Agent workers and Codex child threads today. + async fn close_active_subagents_for_agent_end(&mut self) -> Result<(), CliError> { while let Some(subagent_id) = self.subagent_stack.pop() { - if let Some(handle) = self.subagents.remove(&subagent_id) { - pop_scope( - PopScopeParams::builder() - .handle_uuid(&handle.uuid) - .output(json!({ "status": "closed_by_agent_end" })) - .build(), - )?; - } + self.close_subagent_scope(&subagent_id, json!({ "status": "closed_by_agent_end" })) + .await?; } self.subagents.clear(); + self.subagent_stacks.clear(); Ok(()) } @@ -628,32 +902,59 @@ impl Session { Ok(()) } - // Starts a subagent scope under the current session. Duplicate subagent starts are ignored so + // Starts a subagent scope under the root agent scope. Duplicate subagent starts are ignored so // integrations that retry or emit both "start" and "created" style hooks do not double-nest. - fn start_subagent(&mut self, event: SubagentEvent) -> Result<(), CliError> { + // + // Subagents get their own runtime stack seeded with the root agent handle. That keeps Phoenix + // parentage sibling-shaped while still allowing parallel workers to end out of order. + async fn start_subagent(&mut self, event: SubagentEvent) -> Result<(), CliError> { self.ensure_agent_started(event.metadata.clone())?; if self.subagents.contains_key(&event.subagent_id) { return Ok(()); } - let scope = push_scope( - PushScopeParams::builder() - .name(format!("subagent:{}", event.subagent_id).as_str()) - .scope_type(ScopeType::Agent) - .metadata(event.metadata) - .input(event.payload) - .build(), - )?; - self.subagent_stack.push(event.subagent_id.clone()); - self.subagents.insert(event.subagent_id, scope); + let has_parallel_sibling = !self.subagents.is_empty(); + let agent_scope = self + .agent_scope + .clone() + .expect("ensure_agent_started should initialize the agent scope"); + let subagent_id = event.subagent_id; + let subagent_name = format!("subagent:{subagent_id}"); + let subagent_stack = create_scope_stack(); + let scope = TASK_SCOPE_STACK + .scope(subagent_stack.clone(), async { + task_scope_push(agent_scope.clone()); + push_scope( + PushScopeParams::builder() + .name(subagent_name.as_str()) + .scope_type(ScopeType::Agent) + .parent(&agent_scope) + .metadata(event.metadata) + .input(event.payload) + .build(), + ) + .map_err(CliError::from) + }) + .await?; + self.completed_subagents.remove(&subagent_id); + if has_parallel_sibling { + self.set_last_subagent_start_owner(Some(subagent_id.clone())); + } + self.subagent_stack.push(subagent_id.clone()); + self.subagent_stacks + .insert(subagent_id.clone(), subagent_stack); + self.subagents.insert(subagent_id, scope); Ok(()) } - // Ends a subagent only when it is the current top of the subagent stack. Unknown or out-of-order - // endings become mark events instead of corrupting the scope stack, preserving evidence of the - // mismatch for observability consumers. - fn end_subagent(&mut self, event: SubagentEvent) -> Result<(), CliError> { + // Ends a subagent by id. Unknown endings become mark events, while duplicate endings for a + // subagent already closed by another provider-specific completion signal are ignored. Applies + // to Claude Code Agent-tool completion today. + async fn end_subagent(&mut self, event: SubagentEvent) -> Result<(), CliError> { self.ensure_agent_started(event.metadata.clone())?; - let Some(scope) = self.subagents.get(&event.subagent_id).cloned() else { + if self.completed_subagents.contains(&event.subagent_id) { + return Ok(()); + } + if !self.subagents.contains_key(&event.subagent_id) { eprintln!( "nemo-flow CLI gateway: received {} for subagent {} without a matching start", event.event_name, event.subagent_id @@ -669,45 +970,49 @@ impl Session { }, ); }; - if self.subagent_stack.last() != Some(&event.subagent_id) { - return emit_mark_event( - EmitMarkEventParams::builder() - .name("subagent_end_not_top") - .data(event.payload) - .metadata(event.metadata) - .build(), - ) - .map_err(CliError::from); - } - if pop_scope( - PopScopeParams::builder() - .handle_uuid(&scope.uuid) - .output(event.payload.clone()) - .build(), - ) - .is_err() - { - return emit_mark_event( - EmitMarkEventParams::builder() - .name("subagent_end_not_top") - .data(event.payload) - .metadata(event.metadata) - .build(), - ) - .map_err(CliError::from); - } - self.subagent_stack.pop(); - self.subagents.remove(&event.subagent_id); + self.close_subagent_scope(&event.subagent_id, event.payload) + .await?; + Ok(()) + } + + // Closes one subagent using that subagent's own scope stack. This is shared by explicit end + // hooks, provider-specific tool-completion signals, and agent shutdown so all paths clean up + // ownership hints the same way. Applies to Claude Code Agent-tool completion today. + async fn close_subagent_scope( + &mut self, + subagent_id: &str, + output: Value, + ) -> Result { + let Some(scope) = self.subagents.remove(subagent_id) else { + return Ok(false); + }; + let stack = self + .subagent_stacks + .remove(subagent_id) + .unwrap_or_else(|| self.scope_stack.clone()); + TASK_SCOPE_STACK + .scope(stack, async { + pop_scope( + PopScopeParams::builder() + .handle_uuid(&scope.uuid) + .output(output) + .build(), + ) + .map_err(CliError::from) + }) + .await?; + self.subagent_stack.retain(|id| id != subagent_id); + self.completed_subagents.insert(subagent_id.to_string()); self.pending_tool_hints - .retain(|pending| pending.hint.subagent_id.as_ref() != Some(&event.subagent_id)); + .retain(|pending| pending.hint.subagent_id.as_deref() != Some(subagent_id)); if self .last_llm_owner .as_ref() - .is_some_and(|owner| owner.subagent_id.as_ref() == Some(&event.subagent_id)) + .is_some_and(|owner| owner.subagent_id == subagent_id) { self.last_llm_owner = None; } - Ok(()) + Ok(true) } // Stores an LLM correlation hint from hook activity after pruning expired hints. Hints do not @@ -725,12 +1030,15 @@ impl Session { } // Starts an LLM call from hook activity such as Hermes API request hooks. Duplicate call IDs are - // ignored so repeated pre hooks do not create parallel handles for one provider call. + // ignored so repeated pre hooks do not create parallel handles for one provider call. Aliased + // child-session LLMs carry their subagent owner in metadata and are resolved by + // `hook_llm_owner`. fn start_hook_llm(&mut self, event: LlmEvent) -> Result<(), CliError> { self.ensure_agent_started(event.metadata.clone())?; if self.llms.contains_key(&event.api_call_id) { return Ok(()); } + let (parent, metadata) = self.hook_llm_owner(event.metadata); let handle = llm_call( LlmCallParams::builder() .name(event.provider.as_str()) @@ -738,8 +1046,9 @@ impl Session { headers: Map::new(), content: event.request, }) + .parent_opt(parent.as_ref()) .attributes(LlmAttributes::empty()) - .metadata(event.metadata) + .metadata(metadata) .model_name_opt(event.model_name) .build(), )?; @@ -747,8 +1056,12 @@ impl Session { Ok(()) } + // Ends a hook-observed LLM call, synthesizing a start if only the post hook arrives. The same + // alias metadata recovery used by `start_hook_llm` keeps post-only aliased child LLMs under the + // subagent instead of falling back to the root agent. fn end_hook_llm(&mut self, event: LlmEvent) -> Result<(), CliError> { self.ensure_agent_started(event.metadata.clone())?; + let (parent, metadata) = self.hook_llm_owner(event.metadata); let handle = match self.llms.remove(&event.api_call_id) { Some(handle) => handle, None => llm_call( @@ -758,8 +1071,9 @@ impl Session { headers: Map::new(), content: event.request, }) + .parent_opt(parent.as_ref()) .attributes(LlmAttributes::empty()) - .metadata(event.metadata.clone()) + .metadata(metadata.clone()) .model_name_opt(event.model_name.clone()) .build(), )?, @@ -768,12 +1082,31 @@ impl Session { LlmCallEndParams::builder() .handle(&handle) .response(event.response) - .metadata(event.metadata) + .metadata(metadata) .build(), )?; Ok(()) } + // Recovers owner information stamped by alignment when a hook-originated LLM event came from + // an aliased child session. Gateway LLM calls have first-class owner resolution, but hook LLM + // events only carry metadata, so this is the bridge that keeps aliased child LLMs under the + // subagent instead of the root agent. + fn hook_llm_owner(&mut self, metadata: Value) -> (Option, Value) { + let Some(subagent_id) = json_string_at(&metadata, &[&["llm_correlation_subagent_id"][..]]) + else { + return (self.agent_scope.clone(), metadata); + }; + let Some(scope) = self.subagents.get(&subagent_id).cloned() else { + return (self.agent_scope.clone(), metadata); + }; + self.set_last_llm_owner(Some(subagent_id.clone())); + ( + Some(scope), + merge_metadata(metadata, self.subagent_llm_metadata(&subagent_id)), + ) + } + // Starts a tool call under an explicit subagent when available, otherwise under the agent // scope. Duplicate tool IDs are ignored so repeated pre-tool hooks do not create parallel // handles for one agent tool invocation. @@ -799,6 +1132,7 @@ impl Session { owner.subagent_id.as_deref(), owner.hint.as_ref(), ); + self.set_last_tool_owner(owner.subagent_id.clone()); let handle = tool_call( ToolCallParams::builder() .name(event.tool_name.as_str()) @@ -814,8 +1148,13 @@ impl Session { // Ends a tool call, synthesizing a start if no matching handle exists. This keeps post-only // hooks observable and preserves the final result/status instead of dropping orphaned endings. - fn end_tool(&mut self, event: ToolEvent) -> Result<(), CliError> { + async fn end_tool(&mut self, event: ToolEvent) -> Result<(), CliError> { self.ensure_agent_started(event.metadata.clone())?; + let completed_agent_subagent_id = alignment::completed_subagent_from_tool(&event); + let explicit_subagent_id = event + .subagent_id + .clone() + .filter(|subagent_id| self.subagents.contains_key(subagent_id)); let handle = match self.tools.remove(&event.tool_call_id) { Some(handle) => handle, None => { @@ -836,6 +1175,7 @@ impl Session { owner.subagent_id.as_deref(), owner.hint.as_ref(), ); + self.set_last_tool_owner(owner.subagent_id.clone()); tool_call( ToolCallParams::builder() .name(event.tool_name.as_str()) @@ -850,13 +1190,18 @@ impl Session { tool_call_end( ToolCallEndParams::builder() .handle(&handle) - .result(event.result) + .result(event.result.clone()) .metadata(merge_metadata( event.metadata, json!({ "status": event.status }), )) .build(), )?; + self.set_last_tool_owner(explicit_subagent_id); + if let Some(subagent_id) = completed_agent_subagent_id { + self.close_subagent_scope(&subagent_id, event.result) + .await?; + } Ok(()) } @@ -930,6 +1275,7 @@ impl Session { status: "explicit", source: Some("gateway_header".to_string()), hint: None, + metadata: self.subagent_llm_metadata(subagent_id), }); } None @@ -959,14 +1305,15 @@ impl Session { // This covers agents that emit one hint followed by a cluster of related provider calls. fn sticky_llm_owner(&self) -> Option { if let Some(owner) = self.last_llm_owner.as_ref() - && let Some(parent) = self.scope_for_owner(owner.subagent_id.as_deref()) + && let Some(parent) = self.subagents.get(&owner.subagent_id).cloned() { return Some(LlmOwnerResolution { parent: Some(parent), - subagent_id: owner.subagent_id.clone(), - status: "sticky_last_owner", - source: None, + subagent_id: Some(owner.subagent_id.clone()), + status: owner.source.status(), + source: owner.source.metadata_source().map(ToOwned::to_owned), hint: None, + metadata: self.subagent_llm_metadata(&owner.subagent_id), }); } None @@ -981,6 +1328,7 @@ impl Session { { let subagent_id = subagent_id.clone(); let scope = scope.clone(); + let metadata = self.subagent_llm_metadata(&subagent_id); self.set_last_llm_owner(Some(subagent_id.clone())); return Some(LlmOwnerResolution { parent: Some(scope), @@ -988,6 +1336,7 @@ impl Session { status: "active_subagent", source: None, hint: None, + metadata, }); } None @@ -1006,6 +1355,7 @@ impl Session { }, source: None, hint: None, + metadata: Value::Null, } } @@ -1018,12 +1368,16 @@ impl Session { status: &'static str, ) -> LlmOwnerResolution { let hinted_subagent_id = hint.subagent_id.clone().or_else(|| hint.agent_id.clone()); - let (parent, subagent_id) = match hinted_subagent_id.as_deref() { + let (parent, subagent_id, metadata) = match hinted_subagent_id.as_deref() { Some(id) => match self.subagents.get(id).cloned() { - Some(scope) => (Some(scope), Some(id.to_string())), - None => (self.agent_scope.clone(), None), + Some(scope) => ( + Some(scope), + Some(id.to_string()), + self.subagent_llm_metadata(id), + ), + None => (self.agent_scope.clone(), None, Value::Null), }, - None => (self.agent_scope.clone(), None), + None => (self.agent_scope.clone(), None, Value::Null), }; if parent.is_some() { self.set_last_llm_owner(subagent_id.clone()); @@ -1034,9 +1388,17 @@ impl Session { status, source: Some(hint.event_name.clone()), hint: Some(hint), + metadata, } } + fn subagent_llm_metadata(&self, subagent_id: &str) -> Value { + let Some(scope) = self.subagents.get(subagent_id) else { + return Value::Null; + }; + alignment::llm_owner_metadata(scope.metadata.as_ref()) + } + // Finds a single best pending hint for a gateway call. Ties are treated as ambiguous and return // `None`, causing the caller to use fallback behavior rather than guessing between subagents. fn matching_hint_index(&self, start: &LlmGatewayStart) -> Option { @@ -1057,23 +1419,42 @@ impl Session { (best.len() == 1).then_some(best[0].0) } - // Resolves a stored owner back to a live scope, falling back to the agent scope when no subagent - // id is present or the subagent has already ended. - fn scope_for_owner(&self, subagent_id: Option<&str>) -> Option { - subagent_id - .and_then(|id| self.subagents.get(id).cloned()) - .or_else(|| self.agent_scope.clone()) - } - // Records the most recent LLM owner with a timestamp so nearby gateway calls can inherit the // same parent scope when explicit IDs and hints are absent. fn set_last_llm_owner(&mut self, subagent_id: Option) { - self.last_llm_owner = Some(LastLlmOwner { + self.last_llm_owner = subagent_id.map(|subagent_id| LastLlmOwner { subagent_id, updated_at: Instant::now(), + source: LastLlmOwnerSource::Llm, }); } + // Records explicit or hint-resolved tool ownership as a short-lived cue for the next unhinted + // LLM call. Coding-agent hooks often identify tool ownership more reliably than provider + // requests, especially for subagents that do not propagate gateway headers. + fn set_last_tool_owner(&mut self, subagent_id: Option) { + if let Some(subagent_id) = subagent_id { + self.last_llm_owner = Some(LastLlmOwner { + subagent_id, + updated_at: Instant::now(), + source: LastLlmOwnerSource::Tool, + }); + } + } + + // Parallel subagent starts are a weak but useful ownership signal: if a new worker starts while + // siblings are active and the next LLM lacks headers/hints, prefer the newest worker over the + // root agent. Single-subagent ownership is handled by `sole_subagent_owner`. + fn set_last_subagent_start_owner(&mut self, subagent_id: Option) { + if let Some(subagent_id) = subagent_id { + self.last_llm_owner = Some(LastLlmOwner { + subagent_id, + updated_at: Instant::now(), + source: LastLlmOwnerSource::SubagentStart, + }); + } + } + // Records tool-call suggestions from LLM responses as private correlation hints. These hints // are not emitted as events; they only help later tool hooks choose the same subagent scope as // the LLM that proposed the call. @@ -1369,31 +1750,6 @@ fn same_optional(left: Option<&str>, right: Option<&str>) -> bool { matches!((left, right), (Some(left), Some(right)) if left == right) } -// Reads the first string-like value from any candidate JSON path. Scalar numbers and booleans are -// accepted for IDs because provider payloads are not always strict about identifier types. -fn json_string_at(payload: &Value, paths: &[&[&str]]) -> Option { - json_value_at(payload, paths) - .and_then(|value| match value { - Value::String(value) => Some(value), - Value::Number(value) => Some(value.to_string()), - Value::Bool(value) => Some(value.to_string()), - _ => None, - }) - .filter(|value| !value.is_empty()) -} - -// Reads the first JSON value from any candidate path. The clone is intentional because extracted -// hint data must live independently of the response body stored on the LLM end event. -fn json_value_at(payload: &Value, paths: &[&[&str]]) -> Option { - paths.iter().find_map(|path| { - let mut current = payload; - for key in *path { - current = current.get(*key)?; - } - Some(current.clone()) - }) -} - // Parses stringified tool arguments when providers encode them as JSON text. Non-JSON strings are // preserved as strings so metadata still reflects what the provider actually returned. fn normalize_tool_arguments(arguments: Value) -> Value { @@ -1481,14 +1837,6 @@ fn tool_correlation_metadata( merge_metadata(metadata, Value::Object(correlation)) } -// Inserts an optional string value into a JSON object while omitting absent fields entirely. This -// keeps correlation metadata compact and avoids serializing nulls as meaningful observations. -fn insert_optional(object: &mut Map, key: &str, value: Option<&str>) { - if let Some(value) = value { - object.insert(key.to_string(), json!(value)); - } -} - // Extracts the source agent kind from any normalized event variant so newly created sessions can // inherit the correct agent identity before an explicit agent-start hook arrives. fn event_agent_kind(event: &NormalizedEvent) -> AgentKind { @@ -1517,44 +1865,6 @@ fn single_active_session_id(sessions: &HashMap) -> Option AgentKind { - match provider { - "anthropic.messages" | "anthropic.count_tokens" => AgentKind::ClaudeCode, - "openai.responses" => AgentKind::Codex, - _ => AgentKind::Gateway, - } -} - -// Merges metadata objects with right-hand values taking precedence and null right-hand fields -// ignored. Non-object values are preserved under separate keys so callers do not lose unusual -// metadata shapes supplied by configuration or hooks. -fn merge_metadata(left: Value, right: Value) -> Value { - match (left, right) { - (Value::Object(mut left), Value::Object(right)) => { - for (key, value) in right { - if !value.is_null() { - left.insert(key, value); - } - } - Value::Object(left) - } - (Value::Null, right) => right, - (left, Value::Null) => left, - (left, right) => { - let mut object = Map::new(); - object.insert("metadata".into(), left); - object.insert("extra_metadata".into(), right); - Value::Object(object) - } - } -} - #[cfg(test)] #[path = "../tests/coverage/session_tests.rs"] mod tests; diff --git a/crates/cli/tests/coverage/alignment_claude_code_tests.rs b/crates/cli/tests/coverage/alignment_claude_code_tests.rs new file mode 100644 index 00000000..e92a63a4 --- /dev/null +++ b/crates/cli/tests/coverage/alignment_claude_code_tests.rs @@ -0,0 +1,90 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use axum::http::HeaderValue; +use serde_json::{Value, json}; + +use super::*; + +fn tool_event(agent_kind: AgentKind, tool_name: &str, result: Value) -> ToolEvent { + ToolEvent { + session_id: "session".into(), + agent_kind, + event_name: "PostToolUse".into(), + tool_call_id: "tool-1".into(), + tool_name: tool_name.into(), + subagent_id: None, + arguments: Value::Null, + result, + status: Some("success".into()), + payload: json!({}), + metadata: json!({}), + } +} + +#[test] +fn owns_anthropic_gateway_providers_only() { + assert!(owns_gateway_provider("anthropic.messages")); + assert!(owns_gateway_provider("anthropic.count_tokens")); + assert!(!owns_gateway_provider("openai.responses")); +} + +#[test] +fn session_id_from_headers_reads_claude_native_header() { + let mut headers = HeaderMap::new(); + assert_eq!(session_id_from_headers(&headers), None); + + headers.insert( + "x-claude-code-session-id", + HeaderValue::from_static("claude-session"), + ); + assert_eq!( + session_id_from_headers(&headers).as_deref(), + Some("claude-session") + ); +} + +#[test] +fn completed_subagent_from_agent_tool_accepts_known_result_keys() { + for (key, expected) in [ + ("agentId", "agent-id"), + ("agent_id", "agent-id-snake"), + ("subagentId", "subagent-id"), + ("subagent_id", "subagent-id-snake"), + ] { + let event = tool_event(AgentKind::ClaudeCode, "Agent", json!({ key: expected })); + assert_eq!( + completed_subagent_from_agent_tool(&event).as_deref(), + Some(expected), + "key {key} should close the matching subagent" + ); + } +} + +#[test] +fn completed_subagent_from_agent_tool_rejects_unrelated_tools_and_agents() { + assert_eq!( + completed_subagent_from_agent_tool(&tool_event( + AgentKind::Codex, + "Agent", + json!({ "agentId": "worker" }), + )), + None + ); + assert_eq!( + completed_subagent_from_agent_tool(&tool_event( + AgentKind::ClaudeCode, + "Read", + json!({ "agentId": "worker" }), + )), + None + ); + assert_eq!( + completed_subagent_from_agent_tool(&tool_event( + AgentKind::ClaudeCode, + "Agent", + json!({ "status": "completed" }), + )), + None + ); +} diff --git a/crates/cli/tests/coverage/alignment_codex_tests.rs b/crates/cli/tests/coverage/alignment_codex_tests.rs new file mode 100644 index 00000000..22858d16 --- /dev/null +++ b/crates/cli/tests/coverage/alignment_codex_tests.rs @@ -0,0 +1,268 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use super::*; + +fn codex_session_event(session_id: &str, payload: Value, metadata: Value) -> SessionEvent { + SessionEvent { + session_id: session_id.into(), + agent_kind: AgentKind::Codex, + event_name: "SessionStart".into(), + payload, + metadata, + } +} + +fn thread_spawn(parent_thread_id: &str) -> Value { + json!({ + "source": { + "subagent": { + "thread_spawn": { + "parent_thread_id": parent_thread_id, + "depth": 2, + "agent_nickname": "Curie", + "agent_role": "worker" + } + } + } + }) +} + +#[test] +fn prompt_cache_session_id_requires_codex_responses_metadata() { + let body = json!({ + "prompt_cache_key": "thread-1", + "client_metadata": { "x-codex-installation-id": "install-1" } + }); + + assert_eq!( + prompt_cache_session_id(&body, GatewayRouteKind::OpenAiResponses).as_deref(), + Some("thread-1") + ); + assert_eq!( + prompt_cache_session_id(&body, GatewayRouteKind::OpenAiChatCompletions), + None + ); + assert_eq!( + prompt_cache_session_id( + &json!({ "prompt_cache_key": "plain-cache" }), + GatewayRouteKind::OpenAiResponses, + ), + None + ); + assert_eq!( + prompt_cache_session_id( + &json!({ + "prompt_cache_key": "", + "client_metadata": { "x-codex-installation-id": "install-1" } + }), + GatewayRouteKind::OpenAiResponses, + ), + None + ); +} + +#[test] +fn chatgpt_backend_override_requires_jwt_and_missing_replacement_key() { + let mut headers = axum::http::HeaderMap::new(); + headers.insert( + "authorization", + axum::http::HeaderValue::from_static("Bearer eyJhbGciOiJIUzI1NiJ9.deadbeef.signature"), + ); + + assert_eq!( + chatgpt_upstream_url_if_needed( + &headers, + GatewayRouteKind::OpenAiResponses, + "/v1/responses", + false, + ) + .as_deref(), + Some("https://chatgpt.com/backend-api/codex/responses") + ); + assert_eq!( + chatgpt_upstream_url_if_needed( + &headers, + GatewayRouteKind::AnthropicMessages, + "/v1/messages", + false, + ), + None + ); + assert_eq!( + chatgpt_upstream_url_if_needed( + &headers, + GatewayRouteKind::OpenAiResponses, + "/v1/responses", + true, + ), + None + ); +} + +#[test] +fn chatgpt_oauth_strip_preserves_provider_keys_and_non_openai_routes() { + let mut jwt_headers = axum::http::HeaderMap::new(); + jwt_headers.insert( + "authorization", + axum::http::HeaderValue::from_static("Bearer eyJhbGciOiJIUzI1NiJ9.deadbeef.signature"), + ); + let stripped = + strip_chatgpt_oauth_for_openai_route(&jwt_headers, GatewayRouteKind::OpenAiResponses, true); + assert!(stripped.get("authorization").is_none()); + + let preserved_without_replacement = strip_chatgpt_oauth_for_openai_route( + &jwt_headers, + GatewayRouteKind::OpenAiResponses, + false, + ); + assert!(preserved_without_replacement.get("authorization").is_some()); + + let preserved_non_openai = strip_chatgpt_oauth_for_openai_route( + &jwt_headers, + GatewayRouteKind::AnthropicMessages, + true, + ); + assert!(preserved_non_openai.get("authorization").is_some()); + + let mut key_headers = axum::http::HeaderMap::new(); + key_headers.insert( + "authorization", + axum::http::HeaderValue::from_static("Bearer sk-real-provider-key"), + ); + let preserved_key = + strip_chatgpt_oauth_for_openai_route(&key_headers, GatewayRouteKind::OpenAiResponses, true); + assert_eq!( + preserved_key.get("authorization").unwrap(), + "Bearer sk-real-provider-key" + ); +} + +#[test] +fn subagent_context_reads_payload_metadata_and_rejects_non_subagents() { + let payload_event = codex_session_event("child", thread_spawn("parent"), json!({})); + let payload_context = subagent_context(&payload_event).unwrap(); + assert_eq!(payload_context.parent_session_id, "parent"); + assert_eq!(payload_context.nickname.as_deref(), Some("Curie")); + assert_eq!(payload_context.role.as_deref(), Some("worker")); + assert_eq!(payload_context.depth.as_deref(), Some("2")); + + let metadata_event = codex_session_event("child", json!({}), thread_spawn("parent")); + assert_eq!( + subagent_context(&metadata_event).unwrap().parent_session_id, + "parent" + ); + + let self_parent = codex_session_event("child", thread_spawn("child"), json!({})); + assert!(subagent_context(&self_parent).is_none()); + + let mut non_codex = codex_session_event("child", thread_spawn("parent"), json!({})); + non_codex.agent_kind = AgentKind::ClaudeCode; + assert!(subagent_context(&non_codex).is_none()); +} + +#[test] +fn subagent_context_reads_first_transcript_line() { + let temp = tempfile::tempdir().unwrap(); + let transcript = temp.path().join("rollout.jsonl"); + std::fs::write( + &transcript, + serde_json::to_string(&json!({ + "session_meta": { + "payload": { + "source": { + "subagent": { + "thread_spawn": { + "parent_thread_id": "parent-from-transcript", + "agent_nickname": "Noether", + "agent_role": "explorer" + } + } + } + } + } + })) + .unwrap() + + "\n" + + r#"{"later":true}"#, + ) + .unwrap(); + let event = codex_session_event("child", json!({ "transcript_path": transcript }), json!({})); + + let context = subagent_context(&event).unwrap(); + assert_eq!(context.parent_session_id, "parent-from-transcript"); + assert_eq!(context.nickname.as_deref(), Some("Noether")); + assert_eq!(context.role.as_deref(), Some("explorer")); +} + +#[test] +fn subagent_metadata_start_event_and_alias_share_thread_fields() { + let event = codex_session_event("child", thread_spawn("parent"), json!({ "keep": true })); + let context = subagent_context(&event).unwrap(); + + let metadata = augment_subagent_metadata(event.metadata.clone(), &context); + assert_eq!(metadata["keep"], json!(true)); + assert_eq!(metadata["thread_source"], json!("subagent")); + assert_eq!(metadata["codex_parent_thread_id"], json!("parent")); + assert_eq!(metadata["codex_subagent_depth"], json!("2")); + assert_eq!(metadata["agent_nickname"], json!("Curie")); + assert_eq!(metadata["agent_role"], json!("worker")); + + let subagent_event = subagent_start_event(&event, &context); + assert_eq!(subagent_event.session_id, "parent"); + assert_eq!(subagent_event.subagent_id, "child"); + assert_eq!( + subagent_event.metadata["codex_subagent_session_id"], + json!("child") + ); + + let alias = alias_for_child_session("child".into(), &context); + assert_eq!(alias.parent_session_id, "parent"); + assert_eq!(alias.subagent_id, "child"); + assert_eq!(alias.metadata()["thread_source"], json!("subagent")); + assert_eq!(alias.metadata()["codex_parent_thread_id"], json!("parent")); + assert_eq!( + alias.metadata()["codex_subagent_session_id"], + json!("child") + ); +} + +#[test] +fn llm_owner_metadata_filters_codex_debug_fields() { + let scope_metadata = json!({ + "thread_source": "subagent", + "codex_parent_thread_id": "parent", + "codex_subagent_session_id": "child", + "codex_subagent_depth": 1, + "agent_nickname": "Ada", + "agent_role": null, + "transcript_path": "/tmp/transcript.jsonl", + "unrelated": "skip" + }); + + let metadata = llm_owner_metadata(Some(&scope_metadata)); + assert_eq!( + metadata, + json!({ + "thread_source": "subagent", + "codex_parent_thread_id": "parent", + "codex_subagent_session_id": "child", + "codex_subagent_depth": 1, + "agent_nickname": "Ada" + }) + ); + assert!(metadata.get("transcript_path").is_none()); + assert_eq!( + llm_owner_metadata(Some(&json!({ "unrelated": true }))), + Value::Null + ); + assert_eq!(llm_owner_metadata(Some(&json!("scalar"))), Value::Null); + assert_eq!(llm_owner_metadata(None), Value::Null); +} + +#[test] +fn owns_only_openai_responses_gateway_provider() { + assert!(owns_gateway_provider("openai.responses")); + assert!(!owns_gateway_provider("openai.chat_completions")); + assert!(!owns_gateway_provider("anthropic.messages")); +} diff --git a/crates/cli/tests/coverage/alignment_tests.rs b/crates/cli/tests/coverage/alignment_tests.rs new file mode 100644 index 00000000..4171de3d --- /dev/null +++ b/crates/cli/tests/coverage/alignment_tests.rs @@ -0,0 +1,316 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use axum::http::HeaderValue; + +use super::*; +use crate::model::{LlmEvent, LlmHintEvent}; + +fn session_event(session_id: &str, event_name: &str) -> SessionEvent { + SessionEvent { + session_id: session_id.into(), + agent_kind: AgentKind::Codex, + event_name: event_name.into(), + payload: json!({ "event": event_name }), + metadata: json!({ "event_metadata": event_name }), + } +} + +fn subagent_event(session_id: &str, event_name: &str) -> SubagentEvent { + SubagentEvent { + session_id: session_id.into(), + agent_kind: AgentKind::Codex, + event_name: event_name.into(), + subagent_id: "nested-child".into(), + payload: json!({ "event": event_name }), + metadata: json!({ "event_metadata": event_name }), + } +} + +fn llm_hint_event(session_id: &str) -> LlmHintEvent { + LlmHintEvent { + session_id: session_id.into(), + agent_kind: AgentKind::Codex, + event_name: "Stop".into(), + subagent_id: Some("payload-child".into()), + agent_id: None, + agent_type: Some("explorer".into()), + conversation_id: Some("conversation-1".into()), + generation_id: Some("generation-1".into()), + request_id: Some("request-1".into()), + model: Some("gpt-test".into()), + payload: json!({ "hint": true }), + metadata: json!({ "event_metadata": "hint" }), + } +} + +fn llm_event(session_id: &str, event_name: &str) -> LlmEvent { + LlmEvent { + session_id: session_id.into(), + agent_kind: AgentKind::Codex, + event_name: event_name.into(), + api_call_id: "api-call-1".into(), + provider: "openai.responses".into(), + model_name: Some("gpt-test".into()), + request: json!({ "input": "hello" }), + response: json!({ "output_text": "hi" }), + metadata: json!({ "event_metadata": event_name }), + } +} + +fn tool_event(session_id: &str, event_name: &str) -> ToolEvent { + ToolEvent { + session_id: session_id.into(), + agent_kind: AgentKind::Codex, + event_name: event_name.into(), + tool_call_id: "tool-1".into(), + tool_name: "exec_command".into(), + subagent_id: Some("payload-child".into()), + arguments: json!({ "cmd": "true" }), + result: json!({ "ok": true }), + status: Some("success".into()), + payload: json!({ "tool": true }), + metadata: json!({ "event_metadata": event_name }), + } +} + +fn aliases() -> HashMap { + HashMap::from([( + "child".into(), + SessionAlias::new( + "parent".into(), + "child".into(), + json!({ "alias_metadata": true }), + ), + )]) +} + +#[test] +fn gateway_session_id_uses_explicit_claude_then_codex_fallbacks() { + let mut headers = HeaderMap::new(); + let codex_body = json!({ + "prompt_cache_key": "codex-thread", + "client_metadata": { "x-codex-installation-id": "install-1" } + }); + + assert_eq!( + gateway_session_id(&headers, &codex_body, GatewayRouteKind::OpenAiResponses).as_deref(), + Some("codex-thread") + ); + + headers.insert( + "x-claude-code-session-id", + HeaderValue::from_static("claude-thread"), + ); + assert_eq!( + gateway_session_id(&headers, &codex_body, GatewayRouteKind::OpenAiResponses).as_deref(), + Some("claude-thread") + ); + + headers.insert( + "x-nemo-flow-session-id", + HeaderValue::from_static("explicit-thread"), + ); + assert_eq!( + gateway_session_id(&headers, &codex_body, GatewayRouteKind::OpenAiResponses).as_deref(), + Some("explicit-thread") + ); +} + +#[test] +fn gateway_subagent_and_identifier_helpers_respect_header_precedence() { + let mut headers = HeaderMap::new(); + headers.insert( + "x-nemo-flow-subagent-id", + HeaderValue::from_static("worker-1"), + ); + headers.insert( + "x-nemo-flow-request-id", + HeaderValue::from_static("request-header"), + ); + let body = json!({ + "conversation": { "id": 42 }, + "request": { "id": "request-body" }, + "object": { "id": { "nested": true } } + }); + + assert_eq!(gateway_subagent_id(&headers).as_deref(), Some("worker-1")); + assert_eq!( + gateway_identifier( + &headers, + &body, + "x-nemo-flow-request-id", + &[&["request", "id"]] + ) + .as_deref(), + Some("request-header") + ); + assert_eq!( + gateway_identifier( + &HeaderMap::new(), + &body, + "missing", + &[&["conversation", "id"]] + ) + .as_deref(), + Some("42") + ); + assert_eq!( + gateway_identifier(&HeaderMap::new(), &body, "missing", &[&["object", "id"]]), + None + ); +} + +#[test] +fn agent_kind_inference_covers_known_provider_names() { + assert_eq!( + agent_kind_for_gateway_provider("anthropic.messages"), + AgentKind::ClaudeCode + ); + assert_eq!( + agent_kind_for_gateway_provider("anthropic.count_tokens"), + AgentKind::ClaudeCode + ); + assert_eq!( + agent_kind_for_gateway_provider("openai.responses"), + AgentKind::Codex + ); + assert_eq!( + agent_kind_for_gateway_provider("openai.chat_completions"), + AgentKind::Gateway + ); +} + +#[test] +fn route_event_through_alias_covers_all_event_variants() { + let aliases = aliases(); + let cases = vec![ + NormalizedEvent::AgentStarted(session_event("child", "SessionStart")), + NormalizedEvent::AgentEnded(session_event("child", "SessionEnd")), + NormalizedEvent::TurnEnded(session_event("child", "Stop")), + NormalizedEvent::PromptSubmitted(session_event("child", "Prompt")), + NormalizedEvent::Compaction(session_event("child", "Compact")), + NormalizedEvent::Notification(session_event("child", "Notify")), + NormalizedEvent::HookMark(session_event("child", "Mark")), + NormalizedEvent::SubagentStarted(subagent_event("child", "SubagentStart")), + NormalizedEvent::SubagentEnded(subagent_event("child", "SubagentEnd")), + NormalizedEvent::LlmHint(llm_hint_event("child")), + NormalizedEvent::LlmStarted(llm_event("child", "LlmStart")), + NormalizedEvent::LlmEnded(llm_event("child", "LlmEnd")), + NormalizedEvent::ToolStarted(tool_event("child", "ToolStart")), + NormalizedEvent::ToolEnded(tool_event("child", "ToolEnd")), + ]; + + for event in cases { + let was_agent_end = matches!(event, NormalizedEvent::AgentEnded(_)); + let (event, finished_alias) = route_event_through_alias(event, &aliases); + assert_eq!(event.session_id(), "parent"); + assert_eq!( + event_metadata(&event)["alias_metadata"], + json!(true), + "alias metadata should be stamped on {event:?}" + ); + assert_eq!(finished_alias.as_deref(), was_agent_end.then_some("child")); + + match event { + NormalizedEvent::AgentStarted(event) => panic!("unexpected agent start: {event:?}"), + NormalizedEvent::AgentEnded(event) => panic!("unexpected agent end: {event:?}"), + NormalizedEvent::SubagentStarted(event) | NormalizedEvent::SubagentEnded(event) => { + assert!(!event.subagent_id.is_empty()); + } + NormalizedEvent::LlmHint(event) => { + assert_eq!(event.subagent_id.as_deref(), Some("child")); + } + NormalizedEvent::ToolStarted(event) | NormalizedEvent::ToolEnded(event) => { + assert_eq!(event.subagent_id.as_deref(), Some("child")); + } + NormalizedEvent::TurnEnded(_) + | NormalizedEvent::PromptSubmitted(_) + | NormalizedEvent::Compaction(_) + | NormalizedEvent::Notification(_) + | NormalizedEvent::HookMark(_) + | NormalizedEvent::LlmStarted(_) + | NormalizedEvent::LlmEnded(_) => {} + } + } +} + +#[test] +fn route_event_without_alias_is_unchanged() { + let event = NormalizedEvent::ToolStarted(tool_event("unknown-child", "ToolStart")); + let (routed, finished_alias) = route_event_through_alias(event.clone(), &aliases()); + + assert_eq!(routed, event); + assert_eq!(finished_alias, None); +} + +#[test] +fn json_helpers_and_metadata_merge_cover_edge_shapes() { + let payload = json!({ + "string": "value", + "number": 7, + "boolean": false, + "empty": "", + "object": { "nested": true } + }); + + assert_eq!( + json_string_at(&payload, &[&["missing"][..], &["string"][..]]).as_deref(), + Some("value") + ); + assert_eq!( + json_string_at(&payload, &[&["number"][..]]).as_deref(), + Some("7") + ); + assert_eq!( + json_string_at(&payload, &[&["boolean"][..]]).as_deref(), + Some("false") + ); + assert_eq!(json_string_at(&payload, &[&["empty"][..]]), None); + assert_eq!(json_string_at(&payload, &[&["object"][..]]), None); + assert_eq!( + json_value_at(&payload, &[&["object"][..]]), + Some(json!({ "nested": true })) + ); + + let mut inserted = Map::new(); + insert_optional(&mut inserted, "present", Some("value")); + insert_optional(&mut inserted, "absent", None); + assert_eq!(inserted.get("present"), Some(&json!("value"))); + assert!(!inserted.contains_key("absent")); + + assert_eq!( + merge_metadata(json!({ "a": 1 }), json!({ "b": 2, "c": null })), + json!({ "a": 1, "b": 2 }) + ); + assert_eq!( + merge_metadata(Value::Null, json!({ "a": 1 })), + json!({ "a": 1 }) + ); + assert_eq!( + merge_metadata(json!({ "a": 1 }), Value::Null), + json!({ "a": 1 }) + ); + assert_eq!( + merge_metadata(json!("left"), json!("right")), + json!({ "metadata": "left", "extra_metadata": "right" }) + ); +} + +fn event_metadata(event: &NormalizedEvent) -> &Value { + match event { + NormalizedEvent::AgentStarted(event) + | NormalizedEvent::AgentEnded(event) + | NormalizedEvent::TurnEnded(event) + | NormalizedEvent::PromptSubmitted(event) + | NormalizedEvent::Compaction(event) + | NormalizedEvent::Notification(event) + | NormalizedEvent::HookMark(event) => &event.metadata, + NormalizedEvent::SubagentStarted(event) | NormalizedEvent::SubagentEnded(event) => { + &event.metadata + } + NormalizedEvent::LlmHint(event) => &event.metadata, + NormalizedEvent::LlmStarted(event) | NormalizedEvent::LlmEnded(event) => &event.metadata, + NormalizedEvent::ToolStarted(event) | NormalizedEvent::ToolEnded(event) => &event.metadata, + } +} diff --git a/crates/cli/tests/coverage/gateway_tests.rs b/crates/cli/tests/coverage/gateway_tests.rs index 6f46c382..08a8c4d5 100644 --- a/crates/cli/tests/coverage/gateway_tests.rs +++ b/crates/cli/tests/coverage/gateway_tests.rs @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use super::*; +use crate::alignment::GatewayRouteKind; use crate::config::GatewayConfig; use crate::server::AppState; use crate::session::SessionManager; @@ -78,6 +79,26 @@ fn selects_provider_routes() { ProviderRoute::AnthropicCountTokens.name(), "anthropic.count_tokens" ); + assert_eq!( + ProviderRoute::OpenAiResponses.alignment_route(), + GatewayRouteKind::OpenAiResponses + ); + assert_eq!( + ProviderRoute::OpenAiChatCompletions.alignment_route(), + GatewayRouteKind::OpenAiChatCompletions + ); + assert_eq!( + ProviderRoute::OpenAiModels.alignment_route(), + GatewayRouteKind::OpenAiModels + ); + assert_eq!( + ProviderRoute::AnthropicMessages.alignment_route(), + GatewayRouteKind::AnthropicMessages + ); + assert_eq!( + ProviderRoute::AnthropicCountTokens.alignment_route(), + GatewayRouteKind::AnthropicCountTokens + ); assert_eq!(ProviderRoute::from_path("/unsupported"), None); } @@ -143,18 +164,25 @@ fn openai_upstream_url_accepts_origin_or_v1_base() { #[test] fn gateway_session_id_prefers_headers_and_has_fallbacks() { let mut headers = HeaderMap::new(); + let codex_body = json!({ + "prompt_cache_key": "codex-session", + "client_metadata": { "x-codex-installation-id": "install-1" } + }); headers.insert( "anthropic-beta", HeaderValue::from_static("prompt-caching-2024-07-31"), ); - assert_eq!(gateway_session_id(&headers), None); + assert_eq!( + gateway_session_id(&headers, &Value::Null, ProviderRoute::AnthropicMessages), + None + ); headers.insert( "x-claude-code-session-id", HeaderValue::from_static("claude-session"), ); assert_eq!( - gateway_session_id(&headers).as_deref(), + gateway_session_id(&headers, &codex_body, ProviderRoute::OpenAiResponses).as_deref(), Some("claude-session") ); @@ -163,11 +191,35 @@ fn gateway_session_id_prefers_headers_and_has_fallbacks() { HeaderValue::from_static("explicit-session"), ); assert_eq!( - gateway_session_id(&headers).as_deref(), + gateway_session_id(&headers, &codex_body, ProviderRoute::OpenAiResponses).as_deref(), Some("explicit-session") ); - assert_eq!(gateway_session_id(&HeaderMap::new()), None); + assert_eq!( + gateway_session_id( + &HeaderMap::new(), + &codex_body, + ProviderRoute::OpenAiResponses + ) + .as_deref(), + Some("codex-session") + ); + assert_eq!( + gateway_session_id( + &HeaderMap::new(), + &json!({ "prompt_cache_key": "plain-cache-key" }), + ProviderRoute::OpenAiResponses, + ), + None + ); + assert_eq!( + gateway_session_id( + &HeaderMap::new(), + &codex_body, + ProviderRoute::OpenAiChatCompletions, + ), + None + ); } #[test] @@ -220,6 +272,52 @@ fn gateway_identifiers_accept_headers_and_scalar_body_values() { ); } +#[test] +fn build_llm_gateway_start_uses_alignment_identifiers_and_metadata() { + let mut headers = HeaderMap::new(); + headers.insert( + "x-nemo-flow-subagent-id", + HeaderValue::from_static("worker-1"), + ); + headers.insert("x-request-id", HeaderValue::from_static("transport-req")); + headers.insert("authorization", HeaderValue::from_static("Bearer secret")); + let request_json = json!({ + "model": "gpt-test", + "stream": true, + "prompt_cache_key": "codex-thread", + "client_metadata": { "x-codex-installation-id": "install-1" }, + "conversation_id": "conversation-1", + "generation": { "id": "generation-1" } + }); + let prepared = PreparedGatewayRequest { + method: Method::POST, + headers, + path: "/responses".into(), + provider: ProviderRoute::OpenAiResponses, + upstream_url: "http://openai/v1/responses".into(), + body_bytes: axum::body::Bytes::new(), + request_json: request_json.clone(), + streaming: true, + }; + + let start = build_llm_gateway_start(&prepared); + + assert_eq!(start.session_id.as_deref(), Some("codex-thread")); + assert_eq!(start.provider, "openai.responses"); + assert_eq!(start.model_name.as_deref(), Some("gpt-test")); + assert_eq!(start.subagent_id.as_deref(), Some("worker-1")); + assert_eq!(start.conversation_id.as_deref(), Some("conversation-1")); + assert_eq!(start.generation_id.as_deref(), Some("generation-1")); + assert_eq!(start.request_id.as_deref(), Some("transport-req")); + assert!(start.streaming); + assert_eq!(start.metadata["gateway_path"], json!("/responses")); + assert_eq!(start.request.content, request_json); + assert!( + !start.request.headers.contains_key("authorization"), + "observable headers should not leak auth secrets" + ); +} + #[test] fn observable_headers_omit_secrets_and_transport_headers() { let mut headers = HeaderMap::new(); @@ -246,8 +344,11 @@ fn strips_chatgpt_plus_jwt_from_openai_route_inbound() { "authorization", HeaderValue::from_static("Bearer eyJhbGciOiJIUzI1NiJ9.deadbeef.signature"), ); - let sanitized = - strip_chatgpt_oauth_for_openai_route(&inbound, ProviderRoute::OpenAiResponses, true); + let sanitized = gateway_forward_headers_with_openai_key_state( + &inbound, + ProviderRoute::OpenAiResponses, + true, + ); assert!(sanitized.get("authorization").is_none()); } @@ -260,8 +361,11 @@ fn preserves_real_bearer_keys_on_openai_route() { "authorization", HeaderValue::from_static("Bearer sk-real-provider-key"), ); - let sanitized = - strip_chatgpt_oauth_for_openai_route(&inbound, ProviderRoute::OpenAiResponses, true); + let sanitized = gateway_forward_headers_with_openai_key_state( + &inbound, + ProviderRoute::OpenAiResponses, + true, + ); assert_eq!( sanitized.get("authorization").unwrap(), "Bearer sk-real-provider-key" @@ -278,8 +382,11 @@ fn does_not_touch_anthropic_route_authorization() { "authorization", HeaderValue::from_static("Bearer eyJ.anthropic.case"), ); - let sanitized = - strip_chatgpt_oauth_for_openai_route(&inbound, ProviderRoute::AnthropicMessages, true); + let sanitized = gateway_forward_headers_with_openai_key_state( + &inbound, + ProviderRoute::AnthropicMessages, + true, + ); assert!(sanitized.get("authorization").is_some()); } @@ -293,8 +400,11 @@ fn preserves_jwt_when_no_replacement_key_available() { "authorization", HeaderValue::from_static("Bearer eyJhbGciOiJIUzI1NiJ9.deadbeef.signature"), ); - let sanitized = - strip_chatgpt_oauth_for_openai_route(&inbound, ProviderRoute::OpenAiResponses, false); + let sanitized = gateway_forward_headers_with_openai_key_state( + &inbound, + ProviderRoute::OpenAiResponses, + false, + ); assert!(sanitized.get("authorization").is_some()); } @@ -383,14 +493,17 @@ fn chatgpt_jwt_routes_to_chatgpt_backend_when_no_api_key() { "authorization", HeaderValue::from_static("Bearer eyJhbGciOiJIUzI1NiJ9.deadbeef.signature"), ); - // With no OPENAI_API_KEY and a JWT, should_use_chatgpt_backend returns true and the URL is - // built against the ChatGPT backend (no /v1 prefix — ChatGPT backend doesn't use it). - let result = chatgpt_upstream_url("/responses"); - assert_eq!(result, "https://chatgpt.com/backend-api/codex/responses"); - - // has_chatgpt_jwt should detect the JWT - assert!(has_chatgpt_jwt(&headers)); - assert!(ProviderRoute::OpenAiResponses.is_openai()); + // With no OPENAI_API_KEY and a JWT, alignment returns the ChatGPT backend override. + assert_eq!( + gateway_upstream_url_override_with_openai_key_state( + ProviderRoute::OpenAiResponses, + &headers, + "/responses", + false, + ) + .as_deref(), + Some("https://chatgpt.com/backend-api/codex/responses") + ); } #[test] @@ -400,10 +513,26 @@ fn no_jwt_does_not_trigger_chatgpt_backend() { "authorization", HeaderValue::from_static("Bearer sk-real-api-key"), ); - assert!(!has_chatgpt_jwt(&headers)); + assert_eq!( + gateway_upstream_url_override_with_openai_key_state( + ProviderRoute::OpenAiResponses, + &headers, + "/responses", + false, + ), + None + ); - // Empty headers also should not trigger - assert!(!has_chatgpt_jwt(&HeaderMap::new())); + // Empty headers also should not trigger. + assert_eq!( + gateway_upstream_url_override_with_openai_key_state( + ProviderRoute::OpenAiResponses, + &HeaderMap::new(), + "/responses", + false, + ), + None + ); } #[test] @@ -413,24 +542,55 @@ fn anthropic_route_never_triggers_chatgpt_backend() { "authorization", HeaderValue::from_static("Bearer eyJhbGciOiJIUzI1NiJ9.deadbeef.signature"), ); - assert!(!ProviderRoute::AnthropicMessages.is_openai()); + assert_eq!( + gateway_upstream_url_override_with_openai_key_state( + ProviderRoute::AnthropicMessages, + &headers, + "/v1/messages", + false, + ), + None + ); } #[test] fn chatgpt_backend_url_omits_v1_prefix() { + let mut headers = HeaderMap::new(); + headers.insert( + "authorization", + HeaderValue::from_static("Bearer eyJhbGciOiJIUzI1NiJ9.deadbeef.signature"), + ); // The ChatGPT backend expects paths directly under the base, not /v1-prefixed. assert_eq!( - chatgpt_upstream_url("/responses"), - "https://chatgpt.com/backend-api/codex/responses" + gateway_upstream_url_override_with_openai_key_state( + ProviderRoute::OpenAiResponses, + &headers, + "/responses", + false, + ) + .as_deref(), + Some("https://chatgpt.com/backend-api/codex/responses") ); assert_eq!( - chatgpt_upstream_url("/models"), - "https://chatgpt.com/backend-api/codex/models" + gateway_upstream_url_override_with_openai_key_state( + ProviderRoute::OpenAiModels, + &headers, + "/models", + false, + ) + .as_deref(), + Some("https://chatgpt.com/backend-api/codex/models") ); // /v1-prefixed inbound paths are stripped assert_eq!( - chatgpt_upstream_url("/v1/responses"), - "https://chatgpt.com/backend-api/codex/responses" + gateway_upstream_url_override_with_openai_key_state( + ProviderRoute::OpenAiResponses, + &headers, + "/v1/responses", + false, + ) + .as_deref(), + Some("https://chatgpt.com/backend-api/codex/responses") ); } diff --git a/crates/cli/tests/coverage/session_tests.rs b/crates/cli/tests/coverage/session_tests.rs index d0588e14..1fabe665 100644 --- a/crates/cli/tests/coverage/session_tests.rs +++ b/crates/cli/tests/coverage/session_tests.rs @@ -66,6 +66,22 @@ fn event_has_session_id(event: &Value, session_id: &str) -> bool { || event["data"]["extra"]["session_id"] == json!(session_id) } +async fn alignment_alias(manager: &SessionManager, session_id: &str) -> Option { + manager.alignment.lock().await.alias_for_session(session_id) +} + +async fn has_alignment_alias(manager: &SessionManager, session_id: &str) -> bool { + manager.alignment.lock().await.has_alias(session_id) +} + +async fn has_pending_alignment(manager: &SessionManager, session_id: &str) -> bool { + manager + .alignment + .lock() + .await + .has_pending_session(session_id) +} + #[tokio::test] async fn nests_agent_subagent_and_tool_lifecycle() { let config = GatewayConfig { @@ -140,6 +156,813 @@ async fn nests_agent_subagent_and_tool_lifecycle() { assert!(manager.inner.lock().await.is_empty()); } +#[tokio::test] +async fn parallel_subagents_are_siblings_under_agent_scope() { + let manager = SessionManager::new(session_test_config()); + manager + .apply_events( + &HeaderMap::new(), + vec![ + NormalizedEvent::AgentStarted(session_event("sibling-subagents", "SessionStart")), + NormalizedEvent::SubagentStarted(SubagentEvent { + session_id: "sibling-subagents".into(), + agent_kind: AgentKind::ClaudeCode, + event_name: "SubagentStart".into(), + subagent_id: "worker-1".into(), + payload: json!({}), + metadata: json!({}), + }), + NormalizedEvent::SubagentStarted(SubagentEvent { + session_id: "sibling-subagents".into(), + agent_kind: AgentKind::ClaudeCode, + event_name: "SubagentStart".into(), + subagent_id: "worker-2".into(), + payload: json!({}), + metadata: json!({}), + }), + ], + ) + .await + .unwrap(); + + let sessions = manager.inner.lock().await; + let session = sessions.get("sibling-subagents").unwrap(); + let agent_uuid = session.agent_scope.as_ref().unwrap().uuid; + assert_eq!( + session.subagents.get("worker-1").unwrap().parent_uuid, + Some(agent_uuid) + ); + assert_eq!( + session.subagents.get("worker-2").unwrap().parent_uuid, + Some(agent_uuid) + ); +} + +#[tokio::test] +async fn new_subagent_claims_first_unhinted_llm_when_siblings_active() { + let manager = SessionManager::new(session_test_config()); + manager + .apply_events( + &HeaderMap::new(), + vec![ + NormalizedEvent::AgentStarted(session_event("new-subagent-owner", "SessionStart")), + NormalizedEvent::SubagentStarted(SubagentEvent { + session_id: "new-subagent-owner".into(), + agent_kind: AgentKind::ClaudeCode, + event_name: "SubagentStart".into(), + subagent_id: "worker-1".into(), + payload: json!({}), + metadata: json!({}), + }), + ], + ) + .await + .unwrap(); + + let first = manager + .start_llm( + &HeaderMap::new(), + LlmGatewayStart { + session_id: Some("new-subagent-owner".into()), + ..llm_start() + }, + ) + .await + .unwrap(); + manager + .end_llm(first, json!({ "output_text": "worker-1" }), json!({})) + .await + .unwrap(); + + manager + .apply_events( + &HeaderMap::new(), + vec![NormalizedEvent::SubagentStarted(SubagentEvent { + session_id: "new-subagent-owner".into(), + agent_kind: AgentKind::ClaudeCode, + event_name: "SubagentStart".into(), + subagent_id: "worker-2".into(), + payload: json!({}), + metadata: json!({}), + })], + ) + .await + .unwrap(); + + let worker_2_uuid = { + let sessions = manager.inner.lock().await; + sessions + .get("new-subagent-owner") + .unwrap() + .subagents + .get("worker-2") + .unwrap() + .uuid + }; + let second = manager + .start_llm( + &HeaderMap::new(), + LlmGatewayStart { + session_id: Some("new-subagent-owner".into()), + ..llm_start() + }, + ) + .await + .unwrap(); + + assert_eq!(second.handle.parent_uuid, Some(worker_2_uuid)); + assert_eq!( + second.handle.metadata.as_ref().unwrap()["llm_correlation_status"], + json!("subagent_start") + ); + assert_eq!( + second.handle.metadata.as_ref().unwrap()["llm_correlation_source"], + json!("subagent_start") + ); + manager + .end_llm(second, json!({ "output_text": "worker-2" }), json!({})) + .await + .unwrap(); +} + +#[tokio::test] +async fn codex_subagent_session_start_uses_transcript_parent_thread() { + let manager = SessionManager::new(session_test_config()); + let temp = tempfile::tempdir().unwrap(); + let child_transcript = temp.path().join("child.jsonl"); + std::fs::write( + &child_transcript, + serde_json::to_string(&json!({ + "type": "session_meta", + "payload": { + "id": "child-thread", + "source": { + "subagent": { + "thread_spawn": { + "parent_thread_id": "parent-thread", + "depth": 1, + "agent_nickname": "Hume", + "agent_role": "explorer" + } + } + }, + "thread_source": "subagent", + "agent_nickname": "Hume", + "agent_role": "explorer" + } + })) + .unwrap() + + "\n", + ) + .unwrap(); + + manager + .apply_events( + &HeaderMap::new(), + vec![ + NormalizedEvent::AgentStarted(codex_session_event( + "parent-thread", + "SessionStart", + json!({}), + )), + NormalizedEvent::AgentStarted(codex_session_event( + "child-thread", + "SessionStart", + json!({ "transcript_path": child_transcript }), + )), + ], + ) + .await + .unwrap(); + + let sessions = manager.inner.lock().await; + assert!(sessions.get("child-thread").is_none()); + let parent = sessions.get("parent-thread").unwrap(); + let agent_uuid = parent.agent_scope.as_ref().unwrap().uuid; + assert_eq!( + parent.subagents.get("child-thread").unwrap().parent_uuid, + Some(agent_uuid) + ); + drop(sessions); + + let alias = alignment_alias(&manager, "child-thread").await.unwrap(); + assert_eq!(alias.parent_session_id, "parent-thread"); + assert_eq!(alias.subagent_id, "child-thread"); +} + +#[tokio::test] +async fn codex_subagent_agent_end_removes_alias_and_closes_scope() { + let manager = SessionManager::new(session_test_config()); + manager + .apply_events( + &HeaderMap::new(), + vec![ + NormalizedEvent::AgentStarted(codex_session_event( + "parent-thread", + "SessionStart", + json!({}), + )), + NormalizedEvent::AgentStarted(SessionEvent { + session_id: "child-thread".into(), + agent_kind: AgentKind::Codex, + event_name: "SessionStart".into(), + payload: json!({ + "source": { + "subagent": { + "thread_spawn": { + "parent_thread_id": "parent-thread" + } + } + } + }), + metadata: json!({}), + }), + ], + ) + .await + .unwrap(); + assert!(has_alignment_alias(&manager, "child-thread").await); + + manager + .apply_events( + &HeaderMap::new(), + vec![NormalizedEvent::AgentEnded(SessionEvent { + session_id: "child-thread".into(), + agent_kind: AgentKind::Codex, + event_name: "SessionEnd".into(), + payload: json!({ "done": true }), + metadata: json!({}), + })], + ) + .await + .unwrap(); + + assert!(!has_alignment_alias(&manager, "child-thread").await); + let sessions = manager.inner.lock().await; + let parent = sessions.get("parent-thread").unwrap(); + assert!(!parent.subagents.contains_key("child-thread")); +} + +#[tokio::test] +async fn codex_parent_end_clears_alias_before_late_child_end() { + let manager = SessionManager::new(session_test_config()); + manager + .apply_events( + &HeaderMap::new(), + vec![ + NormalizedEvent::AgentStarted(codex_session_event( + "parent-thread", + "SessionStart", + json!({}), + )), + NormalizedEvent::AgentStarted(SessionEvent { + session_id: "child-thread".into(), + agent_kind: AgentKind::Codex, + event_name: "SessionStart".into(), + payload: json!({ + "source": { + "subagent": { + "thread_spawn": { + "parent_thread_id": "parent-thread" + } + } + } + }), + metadata: json!({}), + }), + ], + ) + .await + .unwrap(); + assert!(has_alignment_alias(&manager, "child-thread").await); + + manager + .apply_events( + &HeaderMap::new(), + vec![NormalizedEvent::AgentEnded(codex_session_event( + "parent-thread", + "SessionEnd", + json!({}), + ))], + ) + .await + .unwrap(); + + assert!(!has_alignment_alias(&manager, "child-thread").await); + assert!(!manager.inner.lock().await.contains_key("parent-thread")); + + manager + .apply_events( + &HeaderMap::new(), + vec![NormalizedEvent::AgentEnded(SessionEvent { + session_id: "child-thread".into(), + agent_kind: AgentKind::Codex, + event_name: "SessionEnd".into(), + payload: json!({ "late": true }), + metadata: json!({}), + })], + ) + .await + .unwrap(); + + assert!(!has_alignment_alias(&manager, "child-thread").await); + assert!(!manager.inner.lock().await.contains_key("parent-thread")); +} + +#[tokio::test] +async fn codex_child_session_start_waits_for_parent_session() { + let manager = SessionManager::new(session_test_config()); + manager + .apply_events( + &HeaderMap::new(), + vec![NormalizedEvent::AgentStarted(SessionEvent { + session_id: "child-thread".into(), + agent_kind: AgentKind::Codex, + event_name: "SessionStart".into(), + payload: json!({ + "source": { + "subagent": { + "thread_spawn": { + "parent_thread_id": "parent-thread", + "agent_nickname": "Late", + "agent_role": "worker" + } + } + } + }), + metadata: json!({}), + })], + ) + .await + .unwrap(); + + assert!(!manager.inner.lock().await.contains_key("child-thread")); + assert!(has_pending_alignment(&manager, "child-thread").await); + + manager + .apply_events( + &HeaderMap::new(), + vec![NormalizedEvent::AgentStarted(codex_session_event( + "parent-thread", + "SessionStart", + json!({}), + ))], + ) + .await + .unwrap(); + + assert!(!has_pending_alignment(&manager, "child-thread").await); + assert!(has_alignment_alias(&manager, "child-thread").await); + let sessions = manager.inner.lock().await; + assert!(!sessions.contains_key("child-thread")); + assert!( + sessions + .get("parent-thread") + .unwrap() + .subagents + .contains_key("child-thread") + ); +} + +#[tokio::test] +async fn codex_pending_child_gateway_llm_promotes_parent_subagent() { + let manager = SessionManager::new(session_test_config()); + manager + .apply_events( + &HeaderMap::new(), + vec![NormalizedEvent::AgentStarted(SessionEvent { + session_id: "child-thread".into(), + agent_kind: AgentKind::Codex, + event_name: "SessionStart".into(), + payload: json!({ + "source": { + "subagent": { + "thread_spawn": { + "parent_thread_id": "parent-thread" + } + } + } + }), + metadata: json!({}), + })], + ) + .await + .unwrap(); + + let active = manager + .start_llm( + &HeaderMap::new(), + LlmGatewayStart { + session_id: Some("child-thread".into()), + ..llm_start() + }, + ) + .await + .unwrap(); + + assert_eq!(active.session_id, "parent-thread"); + assert_eq!(active.owner_subagent_id.as_deref(), Some("child-thread")); + assert!(!has_pending_alignment(&manager, "child-thread").await); + assert!(has_alignment_alias(&manager, "child-thread").await); + { + let sessions = manager.inner.lock().await; + assert!(!sessions.contains_key("child-thread")); + assert!( + sessions + .get("parent-thread") + .unwrap() + .subagents + .contains_key("child-thread") + ); + } + + manager + .end_llm(active, json!({ "output_text": "done" }), json!({})) + .await + .unwrap(); + manager.close_all("test_shutdown").await.unwrap(); +} + +#[tokio::test] +async fn codex_subagent_start_does_not_reparent_active_child_session() { + let manager = SessionManager::new(session_test_config()); + manager + .apply_events( + &HeaderMap::new(), + vec![NormalizedEvent::AgentStarted(codex_session_event( + "parent-thread", + "SessionStart", + json!({}), + ))], + ) + .await + .unwrap(); + + let active_child_llm = manager + .start_llm( + &HeaderMap::new(), + LlmGatewayStart { + session_id: Some("child-thread".into()), + ..llm_start() + }, + ) + .await + .unwrap(); + + manager + .apply_events( + &HeaderMap::new(), + vec![NormalizedEvent::AgentStarted(SessionEvent { + session_id: "child-thread".into(), + agent_kind: AgentKind::Codex, + event_name: "SessionStart".into(), + payload: json!({ + "source": { + "subagent": { + "thread_spawn": { + "parent_thread_id": "parent-thread" + } + } + } + }), + metadata: json!({}), + })], + ) + .await + .unwrap(); + + assert!(!has_alignment_alias(&manager, "child-thread").await); + { + let sessions = manager.inner.lock().await; + assert!(sessions.contains_key("child-thread")); + assert!( + !sessions + .get("parent-thread") + .unwrap() + .subagents + .contains_key("child-thread") + ); + } + + manager + .end_llm( + active_child_llm, + json!({ "output_text": "child" }), + json!({}), + ) + .await + .unwrap(); + manager.close_all("test_shutdown").await.unwrap(); +} + +#[tokio::test] +async fn codex_aliased_hook_llm_routes_to_subagent_scope() { + let manager = SessionManager::new(session_test_config()); + manager + .apply_events( + &HeaderMap::new(), + vec![ + NormalizedEvent::AgentStarted(codex_session_event( + "parent-thread", + "SessionStart", + json!({}), + )), + NormalizedEvent::AgentStarted(SessionEvent { + session_id: "child-thread".into(), + agent_kind: AgentKind::Codex, + event_name: "SessionStart".into(), + payload: json!({ + "source": { + "subagent": { + "thread_spawn": { + "parent_thread_id": "parent-thread" + } + } + } + }), + metadata: json!({}), + }), + NormalizedEvent::LlmStarted(LlmEvent { + session_id: "child-thread".into(), + agent_kind: AgentKind::Codex, + event_name: "PreLlm".into(), + api_call_id: "hook-llm".into(), + provider: "openai.responses".into(), + model_name: Some("gpt-test".into()), + request: json!({ "input": "hello" }), + response: Value::Null, + metadata: json!({}), + }), + ], + ) + .await + .unwrap(); + + let sessions = manager.inner.lock().await; + let parent = sessions.get("parent-thread").unwrap(); + let subagent_uuid = parent.subagents.get("child-thread").unwrap().uuid; + let handle = parent.llms.get("hook-llm").unwrap(); + assert_eq!(handle.parent_uuid, Some(subagent_uuid)); + assert_eq!( + handle.metadata.as_ref().unwrap()["llm_correlation_status"], + json!("session_alias") + ); + assert_eq!( + handle.metadata.as_ref().unwrap()["llm_correlation_subagent_id"], + json!("child-thread") + ); + drop(sessions); + + manager.close_all("test_shutdown").await.unwrap(); +} + +#[tokio::test] +async fn codex_subagent_gateway_llm_routes_to_parent_subagent() { + let manager = SessionManager::new(session_test_config()); + manager + .apply_events( + &HeaderMap::new(), + vec![ + NormalizedEvent::AgentStarted(codex_session_event( + "parent-thread", + "SessionStart", + json!({}), + )), + NormalizedEvent::AgentStarted(SessionEvent { + session_id: "child-thread".into(), + agent_kind: AgentKind::Codex, + event_name: "SessionStart".into(), + payload: json!({ + "source": { + "subagent": { + "thread_spawn": { + "parent_thread_id": "parent-thread", + "agent_nickname": "Bohr", + "agent_role": "explorer" + } + } + } + }), + metadata: json!({}), + }), + ], + ) + .await + .unwrap(); + + let subagent_uuid = { + let sessions = manager.inner.lock().await; + sessions + .get("parent-thread") + .unwrap() + .subagents + .get("child-thread") + .unwrap() + .uuid + }; + + let active = manager + .start_llm( + &HeaderMap::new(), + LlmGatewayStart { + session_id: Some("child-thread".into()), + ..llm_start() + }, + ) + .await + .unwrap(); + + assert_eq!(active.session_id, "parent-thread"); + assert_eq!(active.owner_subagent_id.as_deref(), Some("child-thread")); + assert_eq!(active.handle.parent_uuid, Some(subagent_uuid)); + assert_eq!( + active.handle.metadata.as_ref().unwrap()["llm_correlation_status"], + json!("explicit") + ); + assert_eq!( + active.handle.metadata.as_ref().unwrap()["llm_correlation_subagent_id"], + json!("child-thread") + ); + assert_eq!( + active.handle.metadata.as_ref().unwrap()["codex_parent_thread_id"], + json!("parent-thread") + ); + assert_eq!( + active.handle.metadata.as_ref().unwrap()["codex_subagent_session_id"], + json!("child-thread") + ); + assert_eq!( + active.handle.metadata.as_ref().unwrap()["agent_nickname"], + json!("Bohr") + ); + + manager + .end_llm(active, json!({ "output_text": "child" }), json!({})) + .await + .unwrap(); + + let sticky = manager + .start_llm( + &HeaderMap::new(), + LlmGatewayStart { + session_id: Some("parent-thread".into()), + ..llm_start() + }, + ) + .await + .unwrap(); + + assert_eq!(sticky.session_id, "parent-thread"); + assert_eq!(sticky.owner_subagent_id.as_deref(), Some("child-thread")); + assert_eq!(sticky.handle.parent_uuid, Some(subagent_uuid)); + assert_eq!( + sticky.handle.metadata.as_ref().unwrap()["llm_correlation_status"], + json!("sticky_last_owner") + ); + assert_eq!( + sticky.handle.metadata.as_ref().unwrap()["llm_correlation_subagent_id"], + json!("child-thread") + ); + assert_eq!( + sticky.handle.metadata.as_ref().unwrap()["codex_parent_thread_id"], + json!("parent-thread") + ); + assert_eq!( + sticky.handle.metadata.as_ref().unwrap()["codex_subagent_session_id"], + json!("child-thread") + ); + assert_eq!( + sticky.handle.metadata.as_ref().unwrap()["agent_nickname"], + json!("Bohr") + ); + + manager + .end_llm(sticky, json!({ "output_text": "child-again" }), json!({})) + .await + .unwrap(); + + manager + .apply_events( + &HeaderMap::new(), + vec![ + NormalizedEvent::ToolStarted(ToolEvent { + session_id: "parent-thread".into(), + agent_kind: AgentKind::Codex, + event_name: "PreToolUse".into(), + tool_call_id: "tool-1".into(), + tool_name: "exec_command".into(), + subagent_id: Some("child-thread".into()), + arguments: json!({ "cmd": "true" }), + result: Value::Null, + status: None, + payload: json!({}), + metadata: json!({}), + }), + NormalizedEvent::ToolEnded(ToolEvent { + session_id: "parent-thread".into(), + agent_kind: AgentKind::Codex, + event_name: "PostToolUse".into(), + tool_call_id: "tool-1".into(), + tool_name: "exec_command".into(), + subagent_id: Some("child-thread".into()), + arguments: Value::Null, + result: json!({ "ok": true }), + status: Some("success".into()), + payload: json!({}), + metadata: json!({}), + }), + ], + ) + .await + .unwrap(); + + let tool_owned = manager + .start_llm( + &HeaderMap::new(), + LlmGatewayStart { + session_id: Some("parent-thread".into()), + ..llm_start() + }, + ) + .await + .unwrap(); + + assert_eq!(tool_owned.handle.parent_uuid, Some(subagent_uuid)); + assert_eq!( + tool_owned.handle.metadata.as_ref().unwrap()["llm_correlation_status"], + json!("recent_tool_owner") + ); + assert_eq!( + tool_owned.handle.metadata.as_ref().unwrap()["codex_parent_thread_id"], + json!("parent-thread") + ); + assert_eq!( + tool_owned.handle.metadata.as_ref().unwrap()["codex_subagent_session_id"], + json!("child-thread") + ); + + manager + .end_llm( + tool_owned, + json!({ "output_text": "after-tool" }), + json!({}), + ) + .await + .unwrap(); + + manager + .apply_events( + &HeaderMap::new(), + vec![NormalizedEvent::LlmHint(LlmHintEvent { + session_id: "parent-thread".into(), + agent_kind: AgentKind::Codex, + event_name: "AgentMessageDelta".into(), + subagent_id: Some("child-thread".into()), + agent_id: None, + agent_type: Some("explorer".into()), + conversation_id: None, + generation_id: Some("generation-1".into()), + request_id: None, + model: Some("gpt-test".into()), + payload: json!({}), + metadata: json!({}), + })], + ) + .await + .unwrap(); + + let hinted = manager + .start_llm( + &HeaderMap::new(), + LlmGatewayStart { + session_id: Some("parent-thread".into()), + generation_id: Some("generation-1".into()), + ..llm_start() + }, + ) + .await + .unwrap(); + + assert_eq!(hinted.handle.parent_uuid, Some(subagent_uuid)); + assert_eq!( + hinted.handle.metadata.as_ref().unwrap()["llm_correlation_status"], + json!("single_hint") + ); + assert_eq!( + hinted.handle.metadata.as_ref().unwrap()["codex_parent_thread_id"], + json!("parent-thread") + ); + assert_eq!( + hinted.handle.metadata.as_ref().unwrap()["codex_subagent_session_id"], + json!("child-thread") + ); + + manager + .end_llm(hinted, json!({ "output_text": "after-hint" }), json!({})) + .await + .unwrap(); +} + #[tokio::test] async fn writes_atif_on_session_end_from_plugin_config() { let _guard = OBSERVABILITY_PLUGIN_TEST_LOCK.lock().await; @@ -1261,6 +2084,288 @@ async fn no_active_hint_reuses_last_llm_owner() { .unwrap(); } +#[tokio::test] +async fn root_llm_hint_does_not_stick_over_later_subagent() { + let manager = SessionManager::new(session_test_config()); + manager + .apply_events( + &HeaderMap::new(), + vec![ + NormalizedEvent::AgentStarted(session_event("root-sticky", "SessionStart")), + NormalizedEvent::LlmHint(LlmHintEvent { + session_id: "root-sticky".into(), + agent_kind: AgentKind::ClaudeCode, + event_name: "UserPromptSubmit".into(), + subagent_id: None, + agent_id: None, + agent_type: None, + conversation_id: None, + generation_id: None, + request_id: None, + model: Some("gpt-test".into()), + payload: json!({}), + metadata: json!({}), + }), + ], + ) + .await + .unwrap(); + + let first = manager + .start_llm( + &HeaderMap::new(), + LlmGatewayStart { + session_id: Some("root-sticky".into()), + ..llm_start() + }, + ) + .await + .unwrap(); + assert_eq!( + first.handle.metadata.as_ref().unwrap()["llm_correlation_status"], + json!("single_hint") + ); + manager + .end_llm(first, json!({ "output_text": "root" }), json!({})) + .await + .unwrap(); + + manager + .apply_events( + &HeaderMap::new(), + vec![NormalizedEvent::SubagentStarted(SubagentEvent { + session_id: "root-sticky".into(), + agent_kind: AgentKind::ClaudeCode, + event_name: "SubagentStart".into(), + subagent_id: "worker".into(), + payload: json!({}), + metadata: json!({}), + })], + ) + .await + .unwrap(); + + let worker_uuid = { + let sessions = manager.inner.lock().await; + sessions + .get("root-sticky") + .unwrap() + .subagents + .get("worker") + .unwrap() + .uuid + }; + let second = manager + .start_llm( + &HeaderMap::new(), + LlmGatewayStart { + session_id: Some("root-sticky".into()), + ..llm_start() + }, + ) + .await + .unwrap(); + + assert_eq!(second.handle.parent_uuid, Some(worker_uuid)); + assert_eq!( + second.handle.metadata.as_ref().unwrap()["llm_correlation_status"], + json!("active_subagent") + ); + manager + .end_llm(second, json!({ "output_text": "worker" }), json!({})) + .await + .unwrap(); +} + +#[tokio::test] +async fn explicit_subagent_tool_owner_claims_next_unhinted_llm() { + let manager = SessionManager::new(session_test_config()); + manager + .apply_events( + &HeaderMap::new(), + vec![ + NormalizedEvent::AgentStarted(session_event("tool-owner", "SessionStart")), + NormalizedEvent::SubagentStarted(SubagentEvent { + session_id: "tool-owner".into(), + agent_kind: AgentKind::ClaudeCode, + event_name: "SubagentStart".into(), + subagent_id: "worker-1".into(), + payload: json!({}), + metadata: json!({}), + }), + NormalizedEvent::SubagentStarted(SubagentEvent { + session_id: "tool-owner".into(), + agent_kind: AgentKind::ClaudeCode, + event_name: "SubagentStart".into(), + subagent_id: "worker-2".into(), + payload: json!({}), + metadata: json!({}), + }), + NormalizedEvent::ToolStarted(ToolEvent { + session_id: "tool-owner".into(), + agent_kind: AgentKind::ClaudeCode, + event_name: "PreToolUse".into(), + tool_call_id: "tool-1".into(), + tool_name: "Read".into(), + subagent_id: Some("worker-1".into()), + arguments: json!({ "file_path": "README.md" }), + result: Value::Null, + status: None, + payload: json!({}), + metadata: json!({}), + }), + NormalizedEvent::ToolEnded(ToolEvent { + session_id: "tool-owner".into(), + agent_kind: AgentKind::ClaudeCode, + event_name: "PostToolUse".into(), + tool_call_id: "tool-1".into(), + tool_name: "Read".into(), + subagent_id: Some("worker-1".into()), + arguments: Value::Null, + result: json!({ "ok": true }), + status: Some("success".into()), + payload: json!({}), + metadata: json!({}), + }), + ], + ) + .await + .unwrap(); + + let worker_uuid = { + let sessions = manager.inner.lock().await; + sessions + .get("tool-owner") + .unwrap() + .subagents + .get("worker-1") + .unwrap() + .uuid + }; + let active = manager + .start_llm( + &HeaderMap::new(), + LlmGatewayStart { + session_id: Some("tool-owner".into()), + ..llm_start() + }, + ) + .await + .unwrap(); + + assert_eq!(active.handle.parent_uuid, Some(worker_uuid)); + assert_eq!( + active.handle.metadata.as_ref().unwrap()["llm_correlation_status"], + json!("recent_tool_owner") + ); + assert_eq!( + active.handle.metadata.as_ref().unwrap()["llm_correlation_source"], + json!("tool_owner") + ); + manager + .end_llm(active, json!({ "output_text": "again" }), json!({})) + .await + .unwrap(); +} + +#[tokio::test] +async fn claude_agent_tool_completion_closes_subagents_before_final_llm() { + let manager = SessionManager::new(session_test_config()); + manager + .apply_events( + &HeaderMap::new(), + vec![ + NormalizedEvent::AgentStarted(session_event("agent-tool-finish", "SessionStart")), + NormalizedEvent::SubagentStarted(SubagentEvent { + session_id: "agent-tool-finish".into(), + agent_kind: AgentKind::ClaudeCode, + event_name: "SubagentStart".into(), + subagent_id: "worker-1".into(), + payload: json!({}), + metadata: json!({}), + }), + NormalizedEvent::SubagentStarted(SubagentEvent { + session_id: "agent-tool-finish".into(), + agent_kind: AgentKind::ClaudeCode, + event_name: "SubagentStart".into(), + subagent_id: "worker-2".into(), + payload: json!({}), + metadata: json!({}), + }), + NormalizedEvent::ToolEnded(ToolEvent { + session_id: "agent-tool-finish".into(), + agent_kind: AgentKind::ClaudeCode, + event_name: "PostToolUse".into(), + tool_call_id: "agent-tool-1".into(), + tool_name: "Agent".into(), + subagent_id: None, + arguments: Value::Null, + result: json!({ + "agentId": "worker-1", + "status": "completed" + }), + status: Some("completed".into()), + payload: json!({}), + metadata: json!({}), + }), + NormalizedEvent::ToolEnded(ToolEvent { + session_id: "agent-tool-finish".into(), + agent_kind: AgentKind::ClaudeCode, + event_name: "PostToolUse".into(), + tool_call_id: "agent-tool-2".into(), + tool_name: "Agent".into(), + subagent_id: None, + arguments: Value::Null, + result: json!({ + "agentId": "worker-2", + "status": "completed" + }), + status: Some("completed".into()), + payload: json!({}), + metadata: json!({}), + }), + NormalizedEvent::SubagentEnded(SubagentEvent { + session_id: "agent-tool-finish".into(), + agent_kind: AgentKind::ClaudeCode, + event_name: "SubagentStop".into(), + subagent_id: "worker-2".into(), + payload: json!({ "duplicate": true }), + metadata: json!({}), + }), + ], + ) + .await + .unwrap(); + + let agent_uuid = { + let sessions = manager.inner.lock().await; + let session = sessions.get("agent-tool-finish").unwrap(); + assert!(session.subagents.is_empty()); + assert!(session.subagent_stacks.is_empty()); + session.agent_scope.as_ref().unwrap().uuid + }; + let final_llm = manager + .start_llm( + &HeaderMap::new(), + LlmGatewayStart { + session_id: Some("agent-tool-finish".into()), + ..llm_start() + }, + ) + .await + .unwrap(); + + assert_eq!(final_llm.handle.parent_uuid, Some(agent_uuid)); + assert_eq!( + final_llm.handle.metadata.as_ref().unwrap()["llm_correlation_status"], + json!("agent_fallback") + ); + manager + .end_llm(final_llm, json!({ "output_text": "final" }), json!({})) + .await + .unwrap(); +} + #[tokio::test] async fn agent_end_closes_active_tools_and_duplicate_starts_are_ignored() { let manager = SessionManager::new(session_test_config()); @@ -1907,6 +3012,16 @@ fn session_event(session_id: &str, event_name: &str) -> SessionEvent { } } +fn codex_session_event(session_id: &str, event_name: &str, metadata: Value) -> SessionEvent { + SessionEvent { + session_id: session_id.into(), + agent_kind: AgentKind::Codex, + event_name: event_name.into(), + payload: json!({ "event": event_name }), + metadata, + } +} + fn llm_start() -> LlmGatewayStart { LlmGatewayStart { session_id: Some("llm".into()), From 01e380cb9479309bd1231717e7a700b9abab5aef Mon Sep 17 00:00:00 2001 From: Will Killian Date: Tue, 19 May 2026 09:42:32 -0400 Subject: [PATCH 2/5] fix: stabilize parallel subagent llm affinity Signed-off-by: Will Killian --- crates/cli/src/alignment/mod.rs | 61 +++++++ crates/cli/src/session.rs | 88 ++++++++++ crates/cli/tests/coverage/alignment_tests.rs | 35 ++++ crates/cli/tests/coverage/session_tests.rs | 167 +++++++++++++++++++ 4 files changed, 351 insertions(+) diff --git a/crates/cli/src/alignment/mod.rs b/crates/cli/src/alignment/mod.rs index 0322f6ca..f738c1d5 100644 --- a/crates/cli/src/alignment/mod.rs +++ b/crates/cli/src/alignment/mod.rs @@ -10,6 +10,7 @@ use std::collections::HashMap; use axum::http::HeaderMap; +use nemo_flow::api::llm::LlmRequest; use serde_json::{Map, Value, json}; use crate::config::header_string; @@ -342,6 +343,17 @@ pub(crate) fn llm_owner_metadata(scope_metadata: Option<&Value>) -> Value { codex::llm_owner_metadata(scope_metadata) } +// Builds a stable request affinity key from the user task text carried in common chat payloads. +// Claude Code subagent calls repeat the original worker prompt on later model requests, which is a +// stronger signal than the last tool owner when several subagents run in parallel. The extractor is +// intentionally conservative: raw count-token/file payloads and system-reminder-only messages do +// not produce keys, so they continue through the normal hint/sticky/fallback resolution path. +pub(crate) fn llm_request_affinity_key(request: &LlmRequest) -> Option { + let task_text = first_user_task_text(&request.content)?; + let normalized = normalize_affinity_text(task_text); + (normalized.len() >= 24).then(|| truncate_affinity_text(&normalized, 512)) +} + // Detects tool results that imply a subagent completed. Claude Code reports this through the // `Agent` tool today; keeping the check here avoids leaking that tool shape into session teardown. pub(crate) fn completed_subagent_from_tool(event: &ToolEvent) -> Option { @@ -478,6 +490,55 @@ fn route_tool_event(event: &mut ToolEvent, alias: &SessionAlias, metadata: Value event.metadata = merge_metadata(event.metadata.clone(), metadata); } +fn first_user_task_text(payload: &Value) -> Option<&str> { + payload + .get("messages") + .and_then(Value::as_array) + .and_then(|messages| messages.iter().find_map(user_message_task_text)) + .or_else(|| openai_responses_input_task_text(payload.get("input")?)) +} + +fn user_message_task_text(message: &Value) -> Option<&str> { + if message.get("role").and_then(Value::as_str) != Some("user") { + return None; + } + content_task_text(message.get("content")?) +} + +fn openai_responses_input_task_text(input: &Value) -> Option<&str> { + match input { + Value::String(text) => Some(text.as_str()).filter(|text| is_affinity_candidate(text)), + Value::Array(items) => items.iter().find_map(user_message_task_text), + _ => None, + } +} + +fn content_task_text(content: &Value) -> Option<&str> { + match content { + Value::String(text) => Some(text.as_str()).filter(|text| is_affinity_candidate(text)), + Value::Array(blocks) => blocks.iter().find_map(|block| { + block + .get("text") + .and_then(Value::as_str) + .filter(|text| is_affinity_candidate(text)) + }), + _ => None, + } +} + +fn is_affinity_candidate(text: &str) -> bool { + let trimmed = text.trim_start(); + !trimmed.is_empty() && !trimmed.starts_with("") +} + +fn normalize_affinity_text(text: &str) -> String { + text.split_whitespace().collect::>().join(" ") +} + +fn truncate_affinity_text(text: &str, max_chars: usize) -> String { + text.chars().take(max_chars).collect() +} + // Reads the first string-like value from any candidate JSON path. Scalar numbers and booleans are // accepted for IDs because provider payloads are not always strict about identifier types. pub(crate) fn json_string_at(payload: &Value, paths: &[&[&str]]) -> Option { diff --git a/crates/cli/src/session.rs b/crates/cli/src/session.rs index 38de4e26..a4d8d387 100644 --- a/crates/cli/src/session.rs +++ b/crates/cli/src/session.rs @@ -108,6 +108,10 @@ struct Session { tools: HashMap, pending_llm_hints: Vec, pending_tool_hints: Vec, + // Maps stable user-task text from confidently owned LLM requests to the subagent that owns + // that conversation. This gives parallel Claude Code workers a stronger fallback than a single + // global "last tool owner" when their Anthropic requests lack subagent headers. + llm_request_affinity: HashMap>, last_llm_owner: Option, config: SessionConfig, } @@ -629,6 +633,7 @@ impl Session { tools: HashMap::new(), pending_llm_hints: Vec::new(), pending_tool_hints: Vec::new(), + llm_request_affinity: HashMap::new(), last_llm_owner: None, config, } @@ -689,6 +694,11 @@ impl Session { attributes |= LlmAttributes::STREAMING; } let owner = self.resolve_llm_owner(&start); + self.record_llm_request_affinity( + &start.request, + owner.subagent_id.as_deref(), + owner.status, + ); let metadata = merge_metadata( llm_correlation_metadata( start.metadata, @@ -739,6 +749,11 @@ impl Session { attributes |= LlmAttributes::STREAMING; } let owner = self.resolve_llm_owner(&start); + self.record_llm_request_affinity( + &start.request, + owner.subagent_id.as_deref(), + owner.status, + ); let metadata = merge_metadata( llm_correlation_metadata( start.metadata, @@ -884,6 +899,7 @@ impl Session { fn clear_correlation_state(&mut self) { self.pending_llm_hints.clear(); self.pending_tool_hints.clear(); + self.llm_request_affinity.clear(); self.last_llm_owner = None; } @@ -1005,6 +1021,8 @@ impl Session { self.completed_subagents.insert(subagent_id.to_string()); self.pending_tool_hints .retain(|pending| pending.hint.subagent_id.as_deref() != Some(subagent_id)); + self.llm_request_affinity + .retain(|_, owner| owner.as_deref() != Some(subagent_id)); if self .last_llm_owner .as_ref() @@ -1251,6 +1269,9 @@ impl Session { if let Some(resolution) = self.matched_hint_owner(start) { return resolution; } + if let Some(resolution) = self.request_affinity_owner(start) { + return resolution; + } if let Some(resolution) = self.sticky_llm_owner() { return resolution; } @@ -1301,6 +1322,31 @@ impl Session { None } + // Reuses a learned request affinity before falling back to the session-global sticky owner. + // This addresses Claude Code's parallel subagents: their provider requests often repeat the + // original worker prompt but omit an explicit worker id, so task-text affinity is safer than + // whichever subagent happened to emit the last tool hook. + fn request_affinity_owner(&mut self, start: &LlmGatewayStart) -> Option { + let key = alignment::llm_request_affinity_key(&start.request)?; + let subagent_id = self.llm_request_affinity.get(&key).cloned().flatten()?; + let parent = match self.subagents.get(&subagent_id).cloned() { + Some(parent) => parent, + None => { + self.llm_request_affinity.remove(&key); + return None; + } + }; + self.set_last_llm_owner(Some(subagent_id.clone())); + Some(LlmOwnerResolution { + parent: Some(parent), + subagent_id: Some(subagent_id.clone()), + status: "request_affinity", + source: Some("llm_request".to_string()), + hint: None, + metadata: self.subagent_llm_metadata(&subagent_id), + }) + } + // Reuses the previous LLM owner while its TTL is valid and its scope can still be resolved. // This covers agents that emit one hint followed by a cluster of related provider calls. fn sticky_llm_owner(&self) -> Option { @@ -1455,6 +1501,36 @@ impl Session { } } + // Learns a subagent owner from high-confidence LLM resolutions only. Tool-owned and sticky + // resolutions are intentionally excluded because they are the ambiguous path this affinity map + // is meant to correct in parallel Claude Code traces. + fn record_llm_request_affinity( + &mut self, + request: &LlmRequest, + subagent_id: Option<&str>, + status: &str, + ) { + if !llm_status_teaches_request_affinity(status) { + return; + } + let Some(subagent_id) = subagent_id else { + return; + }; + let Some(key) = alignment::llm_request_affinity_key(request) else { + return; + }; + match self.llm_request_affinity.get_mut(&key) { + Some(Some(existing)) if existing == subagent_id => {} + // If two live subagents share the same prompt text, the key is no longer safe as a + // discriminator. Mark it ambiguous instead of allowing either worker to claim it. + Some(owner) => *owner = None, + None => { + self.llm_request_affinity + .insert(key, Some(subagent_id.to_string())); + } + } + } + // Records tool-call suggestions from LLM responses as private correlation hints. These hints // are not emitted as events; they only help later tool hooks choose the same subagent scope as // the LLM that proposed the call. @@ -1750,6 +1826,18 @@ fn same_optional(left: Option<&str>, right: Option<&str>) -> bool { matches!((left, right), (Some(left), Some(right)) if left == right) } +fn llm_status_teaches_request_affinity(status: &str) -> bool { + matches!( + status, + "explicit" + | "single_hint" + | "matched_hint" + | "active_subagent" + | "subagent_start" + | "request_affinity" + ) +} + // Parses stringified tool arguments when providers encode them as JSON text. Non-JSON strings are // preserved as strings so metadata still reflects what the provider actually returned. fn normalize_tool_arguments(arguments: Value) -> Value { diff --git a/crates/cli/tests/coverage/alignment_tests.rs b/crates/cli/tests/coverage/alignment_tests.rs index 4171de3d..d5c09aeb 100644 --- a/crates/cli/tests/coverage/alignment_tests.rs +++ b/crates/cli/tests/coverage/alignment_tests.rs @@ -2,6 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 use axum::http::HeaderValue; +use nemo_flow::api::llm::LlmRequest; +use serde_json::Map; use super::*; use crate::model::{LlmEvent, LlmHintEvent}; @@ -181,6 +183,39 @@ fn agent_kind_inference_covers_known_provider_names() { ); } +#[test] +fn llm_request_affinity_key_uses_stable_user_task_text() { + let request = LlmRequest { + headers: Map::new(), + content: json!({ + "messages": [ + { + "role": "user", + "content": [ + { "type": "text", "text": "\nToday is 2026-05-19.\n" }, + { "type": "text", "text": " Analyze the python binding\n\nwith detail. " } + ] + } + ] + }), + }; + + assert_eq!( + llm_request_affinity_key(&request).as_deref(), + Some("Analyze the python binding with detail.") + ); +} + +#[test] +fn llm_request_affinity_key_ignores_count_token_style_payloads() { + let request = LlmRequest { + headers: Map::new(), + content: json!("// source text without a chat user task"), + }; + + assert_eq!(llm_request_affinity_key(&request), None); +} + #[test] fn route_event_through_alias_covers_all_event_variants() { let aliases = aliases(); diff --git a/crates/cli/tests/coverage/session_tests.rs b/crates/cli/tests/coverage/session_tests.rs index 1fabe665..167da60a 100644 --- a/crates/cli/tests/coverage/session_tests.rs +++ b/crates/cli/tests/coverage/session_tests.rs @@ -2268,6 +2268,138 @@ async fn explicit_subagent_tool_owner_claims_next_unhinted_llm() { .unwrap(); } +#[tokio::test] +async fn request_affinity_overrides_stale_tool_owner_for_parallel_subagents() { + let manager = SessionManager::new(session_test_config()); + manager + .apply_events( + &HeaderMap::new(), + vec![ + NormalizedEvent::AgentStarted(session_event("parallel-affinity", "SessionStart")), + NormalizedEvent::SubagentStarted(SubagentEvent { + session_id: "parallel-affinity".into(), + agent_kind: AgentKind::ClaudeCode, + event_name: "SubagentStart".into(), + subagent_id: "python-worker".into(), + payload: json!({}), + metadata: json!({}), + }), + NormalizedEvent::SubagentStarted(SubagentEvent { + session_id: "parallel-affinity".into(), + agent_kind: AgentKind::ClaudeCode, + event_name: "SubagentStart".into(), + subagent_id: "go-worker".into(), + payload: json!({}), + metadata: json!({}), + }), + ], + ) + .await + .unwrap(); + + let python_first = manager + .start_llm( + &HeaderMap::new(), + LlmGatewayStart { + subagent_id: Some("python-worker".into()), + ..llm_start_with_user_task( + "parallel-affinity", + "Very thorough analysis of the python/nemo_flow package.", + ) + }, + ) + .await + .unwrap(); + manager + .end_llm(python_first, json!({ "output_text": "python" }), json!({})) + .await + .unwrap(); + + let go_first = manager + .start_llm( + &HeaderMap::new(), + LlmGatewayStart { + subagent_id: Some("go-worker".into()), + ..llm_start_with_user_task( + "parallel-affinity", + "Very thorough analysis of the go/nemo_flow binding.", + ) + }, + ) + .await + .unwrap(); + manager + .end_llm(go_first, json!({ "output_text": "go" }), json!({})) + .await + .unwrap(); + + manager + .apply_events( + &HeaderMap::new(), + vec![ + NormalizedEvent::ToolStarted(ToolEvent { + session_id: "parallel-affinity".into(), + agent_kind: AgentKind::ClaudeCode, + event_name: "PreToolUse".into(), + tool_call_id: "go-tool".into(), + tool_name: "Read".into(), + subagent_id: Some("go-worker".into()), + arguments: json!({ "file_path": "go/nemo_flow/nemo_flow.go" }), + result: Value::Null, + status: None, + payload: json!({}), + metadata: json!({}), + }), + NormalizedEvent::ToolEnded(ToolEvent { + session_id: "parallel-affinity".into(), + agent_kind: AgentKind::ClaudeCode, + event_name: "PostToolUse".into(), + tool_call_id: "go-tool".into(), + tool_name: "Read".into(), + subagent_id: Some("go-worker".into()), + arguments: Value::Null, + result: json!({ "ok": true }), + status: Some("success".into()), + payload: json!({}), + metadata: json!({}), + }), + ], + ) + .await + .unwrap(); + + let python_uuid = { + let sessions = manager.inner.lock().await; + sessions + .get("parallel-affinity") + .unwrap() + .subagents + .get("python-worker") + .unwrap() + .uuid + }; + let python_later = manager + .start_llm( + &HeaderMap::new(), + llm_start_with_user_task( + "parallel-affinity", + "Very thorough analysis of the python/nemo_flow package.", + ), + ) + .await + .unwrap(); + + assert_eq!(python_later.handle.parent_uuid, Some(python_uuid)); + assert_eq!( + python_later.handle.metadata.as_ref().unwrap()["llm_correlation_status"], + json!("request_affinity") + ); + assert_eq!( + python_later.handle.metadata.as_ref().unwrap()["llm_correlation_source"], + json!("llm_request") + ); +} + #[tokio::test] async fn claude_agent_tool_completion_closes_subagents_before_final_llm() { let manager = SessionManager::new(session_test_config()); @@ -3039,3 +3171,38 @@ fn llm_start() -> LlmGatewayStart { metadata: json!({}), } } + +fn llm_start_with_user_task(session_id: &str, task: &str) -> LlmGatewayStart { + LlmGatewayStart { + session_id: Some(session_id.into()), + provider: "anthropic.messages".into(), + model_name: Some("claude-test".into()), + subagent_id: None, + conversation_id: None, + generation_id: None, + request_id: None, + request: LlmRequest { + headers: Map::new(), + content: json!({ + "model": "claude-test", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "\nToday is 2026-05-19.\n" + }, + { + "type": "text", + "text": task + } + ] + } + ] + }), + }, + streaming: false, + metadata: json!({}), + } +} From 7a3228ebb96d541dfaf0f1ad6cd302726f325a5d Mon Sep 17 00:00:00 2001 From: Will Killian Date: Tue, 19 May 2026 09:47:01 -0400 Subject: [PATCH 3/5] fix: generalize request affinity correlation Signed-off-by: Will Killian --- crates/cli/src/alignment/mod.rs | 25 ++-- crates/cli/src/session.rs | 22 ++-- crates/cli/tests/coverage/alignment_tests.rs | 61 ++++++++- crates/cli/tests/coverage/session_tests.rs | 125 ++++++++++++++----- 4 files changed, 177 insertions(+), 56 deletions(-) diff --git a/crates/cli/src/alignment/mod.rs b/crates/cli/src/alignment/mod.rs index f738c1d5..4557b211 100644 --- a/crates/cli/src/alignment/mod.rs +++ b/crates/cli/src/alignment/mod.rs @@ -343,13 +343,13 @@ pub(crate) fn llm_owner_metadata(scope_metadata: Option<&Value>) -> Value { codex::llm_owner_metadata(scope_metadata) } -// Builds a stable request affinity key from the user task text carried in common chat payloads. -// Claude Code subagent calls repeat the original worker prompt on later model requests, which is a -// stronger signal than the last tool owner when several subagents run in parallel. The extractor is -// intentionally conservative: raw count-token/file payloads and system-reminder-only messages do -// not produce keys, so they continue through the normal hint/sticky/fallback resolution path. -pub(crate) fn llm_request_affinity_key(request: &LlmRequest) -> Option { - let task_text = first_user_task_text(&request.content)?; +// Builds a provider-neutral affinity key from the user task text inside common LLM request +// formats. Coding agents often replay the same task prompt on later provider calls without a +// worker id; this key lets session correlation pair those calls with the subagent that first owned +// the task. The extractor understands Anthropic Messages, OpenAI Chat Completions, and OpenAI +// Responses shapes, and deliberately ignores raw count-token/file payloads. +pub(crate) fn request_affinity_key(request: &LlmRequest) -> Option { + let task_text = request_user_task_text(&request.content)?; let normalized = normalize_affinity_text(task_text); (normalized.len() >= 24).then(|| truncate_affinity_text(&normalized, 512)) } @@ -490,12 +490,13 @@ fn route_tool_event(event: &mut ToolEvent, alias: &SessionAlias, metadata: Value event.metadata = merge_metadata(event.metadata.clone(), metadata); } -fn first_user_task_text(payload: &Value) -> Option<&str> { +fn request_user_task_text(payload: &Value) -> Option<&str> { payload .get("messages") .and_then(Value::as_array) .and_then(|messages| messages.iter().find_map(user_message_task_text)) - .or_else(|| openai_responses_input_task_text(payload.get("input")?)) + .or_else(|| responses_input_task_text(payload.get("input")?)) + .or_else(|| prompt_task_text(payload.get("prompt")?)) } fn user_message_task_text(message: &Value) -> Option<&str> { @@ -505,7 +506,7 @@ fn user_message_task_text(message: &Value) -> Option<&str> { content_task_text(message.get("content")?) } -fn openai_responses_input_task_text(input: &Value) -> Option<&str> { +fn responses_input_task_text(input: &Value) -> Option<&str> { match input { Value::String(text) => Some(text.as_str()).filter(|text| is_affinity_candidate(text)), Value::Array(items) => items.iter().find_map(user_message_task_text), @@ -513,6 +514,10 @@ fn openai_responses_input_task_text(input: &Value) -> Option<&str> { } } +fn prompt_task_text(prompt: &Value) -> Option<&str> { + prompt.as_str().filter(|text| is_affinity_candidate(text)) +} + fn content_task_text(content: &Value) -> Option<&str> { match content { Value::String(text) => Some(text.as_str()).filter(|text| is_affinity_candidate(text)), diff --git a/crates/cli/src/session.rs b/crates/cli/src/session.rs index a4d8d387..ca16d150 100644 --- a/crates/cli/src/session.rs +++ b/crates/cli/src/session.rs @@ -109,8 +109,8 @@ struct Session { pending_llm_hints: Vec, pending_tool_hints: Vec, // Maps stable user-task text from confidently owned LLM requests to the subagent that owns - // that conversation. This gives parallel Claude Code workers a stronger fallback than a single - // global "last tool owner" when their Anthropic requests lack subagent headers. + // that conversation. This gives parallel coding-agent workers a stronger provider-format + // neutral fallback than a single global "last tool owner" when requests lack subagent headers. llm_request_affinity: HashMap>, last_llm_owner: Option, config: SessionConfig, @@ -1323,11 +1323,11 @@ impl Session { } // Reuses a learned request affinity before falling back to the session-global sticky owner. - // This addresses Claude Code's parallel subagents: their provider requests often repeat the - // original worker prompt but omit an explicit worker id, so task-text affinity is safer than - // whichever subagent happened to emit the last tool hook. + // The key is derived from provider request payloads, not a harness-specific field, so it can + // pair unhinted Anthropic Messages, OpenAI Chat Completions, and OpenAI Responses calls with + // the subagent that first owned the same coding task. fn request_affinity_owner(&mut self, start: &LlmGatewayStart) -> Option { - let key = alignment::llm_request_affinity_key(&start.request)?; + let key = alignment::request_affinity_key(&start.request)?; let subagent_id = self.llm_request_affinity.get(&key).cloned().flatten()?; let parent = match self.subagents.get(&subagent_id).cloned() { Some(parent) => parent, @@ -1341,7 +1341,7 @@ impl Session { parent: Some(parent), subagent_id: Some(subagent_id.clone()), status: "request_affinity", - source: Some("llm_request".to_string()), + source: Some("request_payload".to_string()), hint: None, metadata: self.subagent_llm_metadata(&subagent_id), }) @@ -1503,20 +1503,20 @@ impl Session { // Learns a subagent owner from high-confidence LLM resolutions only. Tool-owned and sticky // resolutions are intentionally excluded because they are the ambiguous path this affinity map - // is meant to correct in parallel Claude Code traces. + // is meant to correct when multiple coding-agent workers share a root session. fn record_llm_request_affinity( &mut self, request: &LlmRequest, subagent_id: Option<&str>, status: &str, ) { - if !llm_status_teaches_request_affinity(status) { + if !owner_status_teaches_request_affinity(status) { return; } let Some(subagent_id) = subagent_id else { return; }; - let Some(key) = alignment::llm_request_affinity_key(request) else { + let Some(key) = alignment::request_affinity_key(request) else { return; }; match self.llm_request_affinity.get_mut(&key) { @@ -1826,7 +1826,7 @@ fn same_optional(left: Option<&str>, right: Option<&str>) -> bool { matches!((left, right), (Some(left), Some(right)) if left == right) } -fn llm_status_teaches_request_affinity(status: &str) -> bool { +fn owner_status_teaches_request_affinity(status: &str) -> bool { matches!( status, "explicit" diff --git a/crates/cli/tests/coverage/alignment_tests.rs b/crates/cli/tests/coverage/alignment_tests.rs index d5c09aeb..fa21268e 100644 --- a/crates/cli/tests/coverage/alignment_tests.rs +++ b/crates/cli/tests/coverage/alignment_tests.rs @@ -184,7 +184,7 @@ fn agent_kind_inference_covers_known_provider_names() { } #[test] -fn llm_request_affinity_key_uses_stable_user_task_text() { +fn request_affinity_key_reads_messages_content_blocks() { let request = LlmRequest { headers: Map::new(), content: json!({ @@ -201,19 +201,72 @@ fn llm_request_affinity_key_uses_stable_user_task_text() { }; assert_eq!( - llm_request_affinity_key(&request).as_deref(), + request_affinity_key(&request).as_deref(), Some("Analyze the python binding with detail.") ); } #[test] -fn llm_request_affinity_key_ignores_count_token_style_payloads() { +fn request_affinity_key_reads_chat_completion_string_messages() { + let request = LlmRequest { + headers: Map::new(), + content: json!({ + "messages": [ + { "role": "system", "content": "You are a coding agent." }, + { "role": "user", "content": "Review the Rust CLI gateway alignment code." } + ] + }), + }; + + assert_eq!( + request_affinity_key(&request).as_deref(), + Some("Review the Rust CLI gateway alignment code.") + ); +} + +#[test] +fn request_affinity_key_reads_responses_input_items_and_prompt() { + let responses_request = LlmRequest { + headers: Map::new(), + content: json!({ + "input": [ + { + "role": "user", + "content": [ + { + "type": "input_text", + "text": "Analyze the Node binding architecture." + } + ] + } + ] + }), + }; + let prompt_request = LlmRequest { + headers: Map::new(), + content: json!({ + "prompt": "Summarize the Go binding architecture." + }), + }; + + assert_eq!( + request_affinity_key(&responses_request).as_deref(), + Some("Analyze the Node binding architecture.") + ); + assert_eq!( + request_affinity_key(&prompt_request).as_deref(), + Some("Summarize the Go binding architecture.") + ); +} + +#[test] +fn request_affinity_key_ignores_count_token_style_payloads() { let request = LlmRequest { headers: Map::new(), content: json!("// source text without a chat user task"), }; - assert_eq!(llm_request_affinity_key(&request), None); + assert_eq!(request_affinity_key(&request), None); } #[test] diff --git a/crates/cli/tests/coverage/session_tests.rs b/crates/cli/tests/coverage/session_tests.rs index 167da60a..6fa76fba 100644 --- a/crates/cli/tests/coverage/session_tests.rs +++ b/crates/cli/tests/coverage/session_tests.rs @@ -2269,16 +2269,22 @@ async fn explicit_subagent_tool_owner_claims_next_unhinted_llm() { } #[tokio::test] -async fn request_affinity_overrides_stale_tool_owner_for_parallel_subagents() { +async fn request_affinity_pairs_parallel_subagents_across_provider_formats() { let manager = SessionManager::new(session_test_config()); manager .apply_events( &HeaderMap::new(), vec![ - NormalizedEvent::AgentStarted(session_event("parallel-affinity", "SessionStart")), + NormalizedEvent::AgentStarted(SessionEvent { + session_id: "parallel-affinity".into(), + agent_kind: AgentKind::Codex, + event_name: "SessionStart".into(), + payload: json!({}), + metadata: json!({}), + }), NormalizedEvent::SubagentStarted(SubagentEvent { session_id: "parallel-affinity".into(), - agent_kind: AgentKind::ClaudeCode, + agent_kind: AgentKind::Codex, event_name: "SubagentStart".into(), subagent_id: "python-worker".into(), payload: json!({}), @@ -2286,7 +2292,7 @@ async fn request_affinity_overrides_stale_tool_owner_for_parallel_subagents() { }), NormalizedEvent::SubagentStarted(SubagentEvent { session_id: "parallel-affinity".into(), - agent_kind: AgentKind::ClaudeCode, + agent_kind: AgentKind::Codex, event_name: "SubagentStart".into(), subagent_id: "go-worker".into(), payload: json!({}), @@ -2302,7 +2308,7 @@ async fn request_affinity_overrides_stale_tool_owner_for_parallel_subagents() { &HeaderMap::new(), LlmGatewayStart { subagent_id: Some("python-worker".into()), - ..llm_start_with_user_task( + ..llm_start_with_responses_task( "parallel-affinity", "Very thorough analysis of the python/nemo_flow package.", ) @@ -2320,7 +2326,7 @@ async fn request_affinity_overrides_stale_tool_owner_for_parallel_subagents() { &HeaderMap::new(), LlmGatewayStart { subagent_id: Some("go-worker".into()), - ..llm_start_with_user_task( + ..llm_start_with_messages_task( "parallel-affinity", "Very thorough analysis of the go/nemo_flow binding.", ) @@ -2339,7 +2345,7 @@ async fn request_affinity_overrides_stale_tool_owner_for_parallel_subagents() { vec![ NormalizedEvent::ToolStarted(ToolEvent { session_id: "parallel-affinity".into(), - agent_kind: AgentKind::ClaudeCode, + agent_kind: AgentKind::Codex, event_name: "PreToolUse".into(), tool_call_id: "go-tool".into(), tool_name: "Read".into(), @@ -2352,7 +2358,7 @@ async fn request_affinity_overrides_stale_tool_owner_for_parallel_subagents() { }), NormalizedEvent::ToolEnded(ToolEvent { session_id: "parallel-affinity".into(), - agent_kind: AgentKind::ClaudeCode, + agent_kind: AgentKind::Codex, event_name: "PostToolUse".into(), tool_call_id: "go-tool".into(), tool_name: "Read".into(), @@ -2381,7 +2387,7 @@ async fn request_affinity_overrides_stale_tool_owner_for_parallel_subagents() { let python_later = manager .start_llm( &HeaderMap::new(), - llm_start_with_user_task( + llm_start_with_chat_completion_task( "parallel-affinity", "Very thorough analysis of the python/nemo_flow package.", ), @@ -2396,7 +2402,7 @@ async fn request_affinity_overrides_stale_tool_owner_for_parallel_subagents() { ); assert_eq!( python_later.handle.metadata.as_ref().unwrap()["llm_correlation_source"], - json!("llm_request") + json!("request_payload") ); } @@ -3172,35 +3178,92 @@ fn llm_start() -> LlmGatewayStart { } } -fn llm_start_with_user_task(session_id: &str, task: &str) -> LlmGatewayStart { +fn llm_start_with_messages_task(session_id: &str, task: &str) -> LlmGatewayStart { + llm_start_with_content( + session_id, + "anthropic.messages", + "claude-test", + json!({ + "model": "claude-test", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "\nToday is 2026-05-19.\n" + }, + { + "type": "text", + "text": task + } + ] + } + ] + }), + ) +} + +fn llm_start_with_responses_task(session_id: &str, task: &str) -> LlmGatewayStart { + llm_start_with_content( + session_id, + "openai.responses", + "gpt-test", + json!({ + "model": "gpt-test", + "input": [ + { + "role": "user", + "content": [ + { + "type": "input_text", + "text": task + } + ] + } + ] + }), + ) +} + +fn llm_start_with_chat_completion_task(session_id: &str, task: &str) -> LlmGatewayStart { + llm_start_with_content( + session_id, + "openai.chat_completions", + "gpt-test", + json!({ + "model": "gpt-test", + "messages": [ + { + "role": "system", + "content": "You are a coding agent." + }, + { + "role": "user", + "content": task + } + ] + }), + ) +} + +fn llm_start_with_content( + session_id: &str, + provider: &str, + model_name: &str, + content: Value, +) -> LlmGatewayStart { LlmGatewayStart { session_id: Some(session_id.into()), - provider: "anthropic.messages".into(), - model_name: Some("claude-test".into()), + provider: provider.into(), + model_name: Some(model_name.into()), subagent_id: None, conversation_id: None, generation_id: None, request_id: None, request: LlmRequest { headers: Map::new(), - content: json!({ - "model": "claude-test", - "messages": [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "\nToday is 2026-05-19.\n" - }, - { - "type": "text", - "text": task - } - ] - } - ] - }), + content, }, streaming: false, metadata: json!({}), From fbfa7090d56174796851fc6ebd213796647d5fbd Mon Sep 17 00:00:00 2001 From: Will Killian Date: Tue, 19 May 2026 12:37:09 -0400 Subject: [PATCH 4/5] fix: improve coding agent trace alignment Signed-off-by: Will Killian --- crates/cli/src/adapters/mod.rs | 14 +- crates/cli/src/alignment/claude_code.rs | 38 +- crates/cli/src/alignment/mod.rs | 102 ++- crates/cli/src/gateway.rs | 132 +++- crates/cli/src/server.rs | 1 + crates/cli/src/session.rs | 548 ++++++++++++---- crates/cli/tests/coverage/adapters_tests.rs | 8 +- .../coverage/alignment_claude_code_tests.rs | 49 +- crates/cli/tests/coverage/alignment_tests.rs | 137 +++- crates/cli/tests/coverage/gateway_tests.rs | 139 +++- crates/cli/tests/coverage/server_tests.rs | 8 +- crates/cli/tests/coverage/session_tests.rs | 610 ++++++++++++++++-- .../src/observability/plugin_component.rs | 32 +- 13 files changed, 1596 insertions(+), 222 deletions(-) diff --git a/crates/cli/src/adapters/mod.rs b/crates/cli/src/adapters/mod.rs index 8be695f1..c8a91a21 100644 --- a/crates/cli/src/adapters/mod.rs +++ b/crates/cli/src/adapters/mod.rs @@ -380,8 +380,17 @@ fn classify( headers: &HeaderMap, rules: &ClassificationRules<'_>, ) -> Vec { - let primary = classify_primary(payload, headers, rules); let normalized = normalize_name(&event_name(payload)); + if matches!( + normalized.as_str(), + "beforesubmitprompt" | "promptsubmitted" | "userpromptsubmit" + ) { + return vec![ + NormalizedEvent::PromptSubmitted(common_session_event(payload, headers, rules.kind)), + NormalizedEvent::LlmHint(common_llm_hint_event(payload, headers, rules.kind)), + ]; + } + let primary = classify_primary(payload, headers, rules); if normalized == "stop" && !primary.is_terminal() { return vec![ primary, @@ -439,9 +448,6 @@ fn classify_primary( NormalizedEvent::ToolEnded(common_tool_event(payload, headers, rules.kind)) } else { match normalized.as_str() { - "beforesubmitprompt" | "promptsubmitted" | "userpromptsubmit" => { - NormalizedEvent::LlmHint(common_llm_hint_event(payload, headers, rules.kind)) - } "afteragentresponse" | "agentresponse" | "assistantresponse" | "afteragentthought" | "prellmcall" | "postllmcall" | "stop" => { NormalizedEvent::LlmHint(common_llm_hint_event(payload, headers, rules.kind)) diff --git a/crates/cli/src/alignment/claude_code.rs b/crates/cli/src/alignment/claude_code.rs index e2bbaa46..b85a821f 100644 --- a/crates/cli/src/alignment/claude_code.rs +++ b/crates/cli/src/alignment/claude_code.rs @@ -26,13 +26,16 @@ pub(crate) fn session_id_from_headers(headers: &HeaderMap) -> Option { header_string(headers, "x-claude-code-session-id") } -// Claude's `Agent` tool result names the spawned worker as `agentId`. Treating that as a -// completion signal gives the CLI a deterministic subagent end even when Claude does not emit a -// separate `SubagentStop` hook until session teardown. +// Claude's `Agent` tool can report either an asynchronous launch acknowledgement or a terminal +// worker result. Only the terminal result should close the subagent scope; otherwise parallel +// workers that launch in the background are closed before their later tool/LLM hooks arrive. pub(crate) fn completed_subagent_from_agent_tool(event: &ToolEvent) -> Option { if event.agent_kind != AgentKind::ClaudeCode || event.tool_name != "Agent" { return None; } + if !is_terminal_agent_tool_result(&event.result) { + return None; + } json_string_at( &event.result, &[ @@ -44,6 +47,35 @@ pub(crate) fn completed_subagent_from_agent_tool(event: &ToolEvent) -> Option bool { + let status = json_string_at(result, &[&["status"][..]]) + .map(|status| status.trim().to_ascii_lowercase().replace(['-', ' '], "_")); + match status.as_deref() { + Some("async_launched" | "launched" | "started" | "running" | "pending" | "in_progress") => { + false + } + Some( + "completed" | "complete" | "success" | "succeeded" | "failed" | "error" | "errored" + | "cancelled" | "canceled" | "timeout" | "timed_out", + ) => true, + Some(_) | None => has_terminal_agent_tool_evidence(result), + } +} + +fn has_terminal_agent_tool_evidence(result: &serde_json::Value) -> bool { + [ + "content", + "output", + "totalDurationMs", + "totalTokens", + "totalToolUseCount", + "durationMs", + "usage", + ] + .into_iter() + .any(|key| result.get(key).is_some_and(|value| !value.is_null())) +} + #[cfg(test)] #[path = "../../tests/coverage/alignment_claude_code_tests.rs"] mod tests; diff --git a/crates/cli/src/alignment/mod.rs b/crates/cli/src/alignment/mod.rs index 4557b211..ed937d15 100644 --- a/crates/cli/src/alignment/mod.rs +++ b/crates/cli/src/alignment/mod.rs @@ -19,6 +19,8 @@ use crate::model::{AgentKind, LlmEvent, NormalizedEvent, SessionEvent, SubagentE pub(crate) mod claude_code; pub(crate) mod codex; +const REQUEST_AFFINITY_KEY_MAX_CHARS: usize = 4096; + #[derive(Debug, Clone)] pub(crate) enum SubagentSessionContext { Codex(codex::SubagentContext), @@ -149,8 +151,8 @@ impl SessionAlignmentState { let (event, finished_alias) = route_event_through_alias(event, &self.aliases); let session_id = event.session_id().to_string(); if let Some(child_session_id) = finished_alias.as_ref() { - // Remove aliases before terminal skip checks so a late child AgentEnd cannot leave - // stale reparenting state after its parent session has already closed. + // Remove aliases before terminal skip checks so a late child AgentEnd, or a child + // TurnEnded used as a subagent completion signal, cannot leave stale reparenting state. self.aliases.remove(child_session_id); self.pending_subagents.remove(child_session_id); } @@ -182,7 +184,7 @@ impl SessionAlignmentState { .collect() } - fn clear_for_ended_agent(&mut self, session_id: &str) { + pub(crate) fn clear_for_ended_agent(&mut self, session_id: &str) { self.aliases.retain(|child_session_id, alias| { child_session_id != session_id && alias.parent_session_id != session_id }); @@ -190,6 +192,19 @@ impl SessionAlignmentState { child_session_id != session_id && pending.parent_session_id() != session_id }); } + + pub(crate) fn clear_for_ended_subagent(&mut self, parent_session_id: &str, subagent_id: &str) { + self.aliases.retain(|child_session_id, alias| { + child_session_id != subagent_id + && !(alias.parent_session_id == parent_session_id + && alias.subagent_id == subagent_id) + }); + self.pending_subagents.retain(|child_session_id, pending| { + child_session_id != subagent_id + && !(pending.parent_session_id() == parent_session_id + && pending.event.session_id == subagent_id) + }); + } } // Resolves the session id for a gateway request in precedence order: @@ -266,6 +281,13 @@ pub(crate) fn agent_kind_for_gateway_provider(provider: &str) -> AgentKind { } } +// Not every harness has a reliable process/session end signal. Claude Code and Codex sessions can +// outlive a user-visible run, so the CLI represents their work with bounded turn scopes instead of +// exporting a long-lived agent scope that needs synthetic termination. +pub(crate) fn should_emit_session_agent_scope(agent_kind: AgentKind) -> bool { + !matches!(agent_kind, AgentKind::ClaudeCode | AgentKind::Codex) +} + // Detects agent harnesses that report a child session which should become a subagent under another // session. Codex is the first such harness: it starts child threads with parent-thread metadata. // Future harness-specific detectors should plug in here so the session manager can stay provider @@ -350,8 +372,9 @@ pub(crate) fn llm_owner_metadata(scope_metadata: Option<&Value>) -> Value { // Responses shapes, and deliberately ignores raw count-token/file payloads. pub(crate) fn request_affinity_key(request: &LlmRequest) -> Option { let task_text = request_user_task_text(&request.content)?; - let normalized = normalize_affinity_text(task_text); - (normalized.len() >= 24).then(|| truncate_affinity_text(&normalized, 512)) + let normalized = normalize_affinity_text(&task_text); + (normalized.len() >= 24) + .then(|| truncate_affinity_text(&normalized, REQUEST_AFFINITY_KEY_MAX_CHARS)) } // Detects tool results that imply a subagent completed. Claude Code reports this through the @@ -360,6 +383,18 @@ pub(crate) fn completed_subagent_from_tool(event: &ToolEvent) -> Option claude_code::completed_subagent_from_agent_tool(event) } +// Some harnesses route child-session turn boundaries through a parent-owned subagent alias. A +// child turn end should close that subagent, not the parent turn containing all sibling work. +pub(crate) fn aliased_turn_subagent_id(event: &SessionEvent) -> Option { + json_string_at( + &event.metadata, + &[ + &["codex_subagent_session_id"][..], + &["subagent_session_id"][..], + ], + ) +} + // Routes events from an aliased child session through the parent session/subagent pair. The alias // records why the child is not a top-level agent; this generic router only rewrites ownership and // preserves the adapter-supplied metadata for filtering/debugging in Phoenix. @@ -397,7 +432,7 @@ pub(crate) fn route_event_through_alias( ), NormalizedEvent::TurnEnded(mut event) => { route_session_event(&mut event, &alias, metadata); - (NormalizedEvent::TurnEnded(event), None) + (NormalizedEvent::TurnEnded(event), Some(child_session_id)) } NormalizedEvent::PromptSubmitted(mut event) => { route_session_event(&mut event, &alias, metadata); @@ -490,50 +525,67 @@ fn route_tool_event(event: &mut ToolEvent, alias: &SessionAlias, metadata: Value event.metadata = merge_metadata(event.metadata.clone(), metadata); } -fn request_user_task_text(payload: &Value) -> Option<&str> { +fn request_user_task_text(payload: &Value) -> Option { payload .get("messages") .and_then(Value::as_array) - .and_then(|messages| messages.iter().find_map(user_message_task_text)) + .and_then(|messages| messages.iter().rev().find_map(user_message_task_text)) .or_else(|| responses_input_task_text(payload.get("input")?)) .or_else(|| prompt_task_text(payload.get("prompt")?)) } -fn user_message_task_text(message: &Value) -> Option<&str> { +fn user_message_task_text(message: &Value) -> Option { if message.get("role").and_then(Value::as_str) != Some("user") { return None; } content_task_text(message.get("content")?) } -fn responses_input_task_text(input: &Value) -> Option<&str> { +fn responses_input_task_text(input: &Value) -> Option { match input { - Value::String(text) => Some(text.as_str()).filter(|text| is_affinity_candidate(text)), - Value::Array(items) => items.iter().find_map(user_message_task_text), + Value::String(text) => affinity_candidate_text(text), + Value::Array(items) => items.iter().rev().find_map(user_message_task_text), _ => None, } } -fn prompt_task_text(prompt: &Value) -> Option<&str> { - prompt.as_str().filter(|text| is_affinity_candidate(text)) +fn prompt_task_text(prompt: &Value) -> Option { + prompt.as_str().and_then(affinity_candidate_text) } -fn content_task_text(content: &Value) -> Option<&str> { +fn content_task_text(content: &Value) -> Option { match content { - Value::String(text) => Some(text.as_str()).filter(|text| is_affinity_candidate(text)), - Value::Array(blocks) => blocks.iter().find_map(|block| { - block - .get("text") - .and_then(Value::as_str) - .filter(|text| is_affinity_candidate(text)) - }), + Value::String(text) => affinity_candidate_text(text), + Value::Array(blocks) => blocks.iter().rev().find_map(content_block_task_text), _ => None, } } -fn is_affinity_candidate(text: &str) -> bool { - let trimmed = text.trim_start(); - !trimmed.is_empty() && !trimmed.starts_with("") +fn content_block_task_text(block: &Value) -> Option { + if let Some(block_type) = block.get("type").and_then(Value::as_str) + && !matches!(block_type, "text" | "input_text") + { + return None; + } + block + .get("text") + .and_then(Value::as_str) + .and_then(affinity_candidate_text) +} + +fn affinity_candidate_text(text: &str) -> Option { + let cleaned = text.trim(); + if cleaned.is_empty() || looks_like_json_payload(cleaned) { + return None; + } + Some(cleaned.to_string()) +} + +fn looks_like_json_payload(text: &str) -> bool { + matches!( + serde_json::from_str::(text), + Ok(Value::Object(_) | Value::Array(_)) + ) } fn normalize_affinity_text(text: &str) -> String { diff --git a/crates/cli/src/gateway.rs b/crates/cli/src/gateway.rs index 46120ffc..9b6aaffd 100644 --- a/crates/cli/src/gateway.rs +++ b/crates/cli/src/gateway.rs @@ -27,7 +27,7 @@ use crate::alignment::{self, GatewayRouteKind}; use crate::config::header_string; use crate::error::CliError; use crate::server::AppState; -use crate::session::{GatewayCallPrep, LlmGatewayStart}; +use crate::session::{GatewayCallPrep, LlmGatewayStart, SessionManager}; const MAX_BODY_BYTES: usize = 100 * 1024 * 1024; @@ -267,6 +267,7 @@ async fn run_managed_buffered( .sessions .record_gateway_response_hints(&session_id, owner_subagent_id, response_json) .await; + state.sessions.finish_gateway_call(&session_id).await; let (status, headers) = upstream_info .lock() .expect("upstream info lock poisoned") @@ -279,7 +280,10 @@ async fn run_managed_buffered( .unwrap_or_default(); build_response(status, headers, Body::from(bytes)) } - Err(error) => Err(translate_runtime_error(error, &upstream_error)), + Err(error) => { + state.sessions.finish_gateway_call(&session_id).await; + Err(translate_runtime_error(error, &upstream_error)) + } } } @@ -364,10 +368,20 @@ async fn run_managed_streaming( // collector and finalizer for managed streaming, so without a codec we cannot use the managed // pipeline. This keeps non-LLM streaming paths working while typed codecs remain optional. let Some(streaming_codec) = codecs.streaming else { + state.sessions.finish_gateway_call(&prep.session_id).await; return passthrough_streaming(state, prepared).await; }; let collector = streaming_codec.collector(); - let finalizer = streaming_codec.finalizer(); + let final_response = Arc::new(Mutex::new(None)); + let final_response_for_finalizer = final_response.clone(); + let original_finalizer = streaming_codec.finalizer(); + let finalizer = Box::new(move || { + let response = original_finalizer(); + *final_response_for_finalizer + .lock() + .expect("stream final response lock poisoned") = Some(response.clone()); + response + }); let GatewayCallPrep { scope_stack, @@ -398,23 +412,30 @@ async fn run_managed_streaming( async move { llm_stream_call_execute(params).await }, ) .await; - let json_stream = - json_stream_result.map_err(|error| translate_runtime_error(error, &upstream_error))?; + let json_stream = match json_stream_result { + Ok(json_stream) => json_stream, + Err(error) => { + state.sessions.finish_gateway_call(&session_id).await; + return Err(translate_runtime_error(error, &upstream_error)); + } + }; let (status, headers) = upstream_info .lock() .expect("upstream info lock poisoned") .take() .unwrap_or((StatusCode::OK, HeaderMap::new())); - let body = client_sse_body(json_stream, provider_route); - - // Tool hint extraction from streamed responses is intentionally deferred: the runtime - // synthesizes the aggregate response inside `LlmStreamWrapper::emit_end_event` and does not - // surface it back to the caller. Non-streamed responses continue to feed - // `record_gateway_response_hints` from `run_managed_buffered`. Wiring streamed hints back would - // require either a runtime hook or a finalizer-tap channel; neither is in scope for the - // initial managed-execution refactor. - let _ = (session_id, owner_subagent_id); + let body = client_sse_body( + json_stream, + provider_route, + state.sessions.clone(), + session_id.clone(), + owner_subagent_id, + final_response, + ); + // Streamed responses are finalized inside the runtime stream wrapper. The small finalizer tap + // above copies only the aggregate JSON payload so the session can update turn output and tool + // hints after the downstream client consumes the stream, without buffering SSE bytes here. build_response(status, headers, body) } @@ -506,8 +527,16 @@ fn sse_json_stream(response: reqwest::Response) -> LlmJsonStream { // names are reconstructed from the JSON `type` field where providers populate it (Anthropic // Messages, OpenAI Responses); OpenAI Chat omits the `event:` line and appends the original // `data: [DONE]` terminator after the runtime stream completes. -fn client_sse_body(json_stream: LlmJsonStream, route: ProviderRoute) -> Body { +fn client_sse_body( + json_stream: LlmJsonStream, + route: ProviderRoute, + sessions: SessionManager, + session_id: String, + owner_subagent_id: Option, + final_response: Arc>>, +) -> Body { let mut json_stream = json_stream; + let mut guard = GatewayCallGuard::new(sessions, session_id, owner_subagent_id, final_response); let stream = stream! { while let Some(item) = json_stream.next().await { match item { @@ -516,11 +545,13 @@ fn client_sse_body(json_stream: LlmJsonStream, route: ProviderRoute) -> Body { yield Ok::(Bytes::from(frame)); } Err(error) => { + guard.finish().await; yield Err(CliError::InvalidPayload(error.to_string())); return; } } } + guard.finish().await; if matches!(route, ProviderRoute::OpenAiChatCompletions) { yield Ok::(Bytes::from_static(b"data: [DONE]\n\n")); } @@ -528,6 +559,77 @@ fn client_sse_body(json_stream: LlmJsonStream, route: ProviderRoute) -> Body { Body::from_stream(stream) } +// Keeps the session idle detector honest for streaming responses. Normal completion calls +// `finish`, while early client disconnects drop the body stream and use the drop path to release +// the in-flight gateway call asynchronously. +struct GatewayCallGuard { + sessions: Option, + session_id: String, + owner_subagent_id: Option, + final_response: Arc>>, +} + +impl GatewayCallGuard { + fn new( + sessions: SessionManager, + session_id: String, + owner_subagent_id: Option, + final_response: Arc>>, + ) -> Self { + Self { + sessions: Some(sessions), + session_id, + owner_subagent_id, + final_response, + } + } + + async fn finish(&mut self) { + if let Some(sessions) = self.sessions.take() { + let response = self + .final_response + .lock() + .expect("stream final response lock poisoned") + .take(); + if let Some(response) = response { + sessions + .record_gateway_response_hints( + &self.session_id, + self.owner_subagent_id.clone(), + response, + ) + .await; + } + sessions.finish_gateway_call(&self.session_id).await; + } + } +} + +impl Drop for GatewayCallGuard { + fn drop(&mut self) { + let Some(sessions) = self.sessions.take() else { + return; + }; + let session_id = self.session_id.clone(); + let owner_subagent_id = self.owner_subagent_id.clone(); + let response = self + .final_response + .lock() + .expect("stream final response lock poisoned") + .take(); + if let Ok(handle) = tokio::runtime::Handle::try_current() { + handle.spawn(async move { + if let Some(response) = response { + sessions + .record_gateway_response_hints(&session_id, owner_subagent_id, response) + .await; + } + sessions.finish_gateway_call(&session_id).await; + }); + } + } +} + // Formats one SSE frame from a parsed event payload. Anthropic and OpenAI Responses events carry // the event name in the `type` field, so it is mirrored back onto the `event:` line; OpenAI Chat // chunks have no event name and emit only `data:`. diff --git a/crates/cli/src/server.rs b/crates/cli/src/server.rs index 9e937152..af72c1ca 100644 --- a/crates/cli/src/server.rs +++ b/crates/cli/src/server.rs @@ -107,6 +107,7 @@ impl AppState { fn new(config: GatewayConfig) -> Self { crate::tls::install_rustls_crypto_provider(); let sessions = SessionManager::new(config.clone()); + sessions.start_idle_sweeper(); let http = Client::builder() .connect_timeout(HTTP_CONNECT_TIMEOUT) .timeout(HTTP_REQUEST_TIMEOUT) diff --git a/crates/cli/src/session.rs b/crates/cli/src/session.rs index ca16d150..bb289863 100644 --- a/crates/cli/src/session.rs +++ b/crates/cli/src/session.rs @@ -35,6 +35,8 @@ use crate::model::{ const LLM_HINT_TTL: Duration = Duration::from_secs(300); const TOOL_HINT_TTL: Duration = Duration::from_secs(300); const LAST_OWNER_TTL: Duration = Duration::from_secs(300); +const AGENT_IDLE_TIMEOUT: Duration = Duration::from_secs(30); +const AGENT_IDLE_SWEEP_INTERVAL: Duration = Duration::from_secs(5); #[derive(Clone)] pub(crate) struct SessionManager { @@ -77,7 +79,7 @@ pub(crate) struct ActiveLlm { /// /// The session lock is released after the prep is built, so the gateway can run /// the upstream HTTP work without blocking unrelated session activity. The -/// preserved `scope_stack` is what restores the agent/subagent scope context +/// preserved `scope_stack` is what restores the turn/subagent scope context /// the call was opened against when the runtime emits start/end events. pub(crate) struct GatewayCallPrep { pub(crate) scope_stack: ScopeStackHandle, @@ -95,7 +97,12 @@ struct Session { agent_kind: AgentKind, session_id: String, scope_stack: ScopeStackHandle, + session_started: bool, + session_metadata: Value, agent_scope: Option, + turn_scope: Option, + turn_index: u64, + last_turn_llm_output: Option, subagents: HashMap, // Each active subagent gets its own scope stack seeded with the parent agent handle. This lets // sibling workers close out of order without corrupting the task-local stack. @@ -113,6 +120,8 @@ struct Session { // neutral fallback than a single global "last tool owner" when requests lack subagent headers. llm_request_affinity: HashMap>, last_llm_owner: Option, + last_activity: Instant, + active_gateway_calls: usize, config: SessionConfig, } @@ -201,6 +210,39 @@ impl SessionManager { } } + /// Starts the fail-safe idle closer used by the HTTP gateway. + /// + /// Some coding agents, notably Codex child threads, do not always emit native agent-end hooks. + /// The sweeper is provider-neutral: it closes any open turn that has had no hook or gateway + /// activity for a short interval, while leaving turns with active tools or managed LLM calls + /// alone. Weak references keep the task from extending the manager lifetime in tests or + /// shutdown paths. + pub(crate) fn start_idle_sweeper(&self) { + let inner = Arc::downgrade(&self.inner); + let alignment = Arc::downgrade(&self.alignment); + tokio::spawn(async move { + let mut interval = tokio::time::interval(AGENT_IDLE_SWEEP_INTERVAL); + interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); + loop { + interval.tick().await; + let (Some(inner), Some(alignment)) = (inner.upgrade(), alignment.upgrade()) else { + break; + }; + if let Err(error) = close_idle_sessions_from_parts( + &inner, + &alignment, + Instant::now(), + AGENT_IDLE_TIMEOUT, + "idle_timeout", + ) + .await + { + eprintln!("nemo-flow CLI gateway: idle session teardown failed: {error}"); + } + } + }); + } + /// Applies normalized hook events to their owning sessions in arrival order. /// /// Session configuration is re-read from headers for each request so installed hook commands can @@ -310,7 +352,7 @@ impl SessionManager { /// Prepares a managed LLM execution against the right session and scope context. /// - /// Resolves the session, opens the agent scope if needed, computes the parent scope and + /// Resolves the session, opens the turn scope if needed, computes the parent scope and /// correlation metadata, and returns a [`GatewayCallPrep`]. The returned prep carries the /// `ScopeStackHandle` that callers must restore around `llm_call_execute` / /// `llm_stream_call_execute` so the runtime emits start/end events under the same agent or @@ -342,6 +384,18 @@ impl SessionManager { session.prepare_gateway_call(start).await } + /// Marks a managed gateway LLM call as finished for idle-timeout purposes. + /// + /// Runtime-managed LLM spans are emitted outside the session lock, so the session keeps a small + /// in-flight counter to prevent the idle sweeper from closing a turn while an upstream + /// provider request or streaming response is still active. + pub(crate) async fn finish_gateway_call(&self, session_id: &str) { + let mut sessions = self.inner.lock().await; + if let Some(session) = sessions.get_mut(session_id) { + session.finish_gateway_call(); + } + } + /// Legacy manual-lifecycle close paired with [`Self::start_llm`]. Production gateway traffic /// no longer needs this helper because managed execution emits the end event automatically. /// @@ -381,7 +435,7 @@ impl SessionManager { .await?; let mut sessions = self.inner.lock().await; if let Some(session) = sessions.get_mut(&session_id) { - session.add_tool_hints_from_llm_response(response_for_hints, owner_subagent_id); + session.record_completed_llm_response(response_for_hints, owner_subagent_id); } Ok(()) } @@ -411,7 +465,7 @@ impl SessionManager { }; let mut sessions = self.inner.lock().await; if let Some(session) = sessions.get_mut(&session_id) { - session.add_tool_hints_from_llm_response(response, owner_subagent_id); + session.record_completed_llm_response(response, owner_subagent_id); } } @@ -432,6 +486,16 @@ impl SessionManager { close_sessions_for_shutdown(&mut sessions, reason).await } + #[cfg(test)] + pub(crate) async fn close_idle_sessions_at( + &self, + now: Instant, + timeout: Duration, + reason: &str, + ) -> Result { + close_idle_sessions_from_parts(&self.inner, &self.alignment, now, timeout, reason).await + } + // Applies known or pending child-session aliases before the gateway chooses a session. This is // deliberately before the `inner` lock in `start_llm`/`prepare_gateway_call` so a child-thread // gateway request can promote its pending parent/subagent relationship instead of creating a @@ -578,9 +642,9 @@ async fn promote_pending_subagent( .or_insert_with(|| { Session::new(parent_session_id.clone(), pending.event.agent_kind, config) }); - if parent_session.agent_scope.is_none() { - // Gateway traffic can be the first signal that forces promotion. In that case, synthesize a - // minimal parent agent scope so the subagent still has a valid runtime parent. + if !parent_session.session_started && parent_session.agent_scope.is_none() { + // Gateway traffic can be the first signal that forces promotion. In that case, synthesize + // the parent session metadata; the later subagent start will create a turn-scoped parent. parent_session .apply(NormalizedEvent::AgentStarted(SessionEvent { session_id: parent_session_id, @@ -615,6 +679,69 @@ async fn close_sessions_for_shutdown( first_error.map_or(Ok(()), Err) } +async fn close_idle_sessions_from_parts( + inner: &Arc>>, + alignment: &Arc>, + now: Instant, + timeout: Duration, + reason: &str, +) -> Result { + let mut idle_sessions = Vec::new(); + { + let mut sessions = inner.lock().await; + let ids = sessions + .iter() + .filter_map(|(session_id, session)| { + session + .is_idle_for(now, timeout) + .then_some(session_id.clone()) + }) + .collect::>(); + for session_id in ids { + if let Some(session) = sessions.remove(&session_id) { + idle_sessions.push((session_id, session)); + } + } + } + if idle_sessions.is_empty() { + return Ok(0); + } + let mut closed_turns = 0; + let mut closed_subagents = Vec::new(); + let mut retained_sessions = Vec::new(); + let mut first_error = None; + for (session_id, mut session) in idle_sessions { + let stack = session.scope_stack.clone(); + let result = TASK_SCOPE_STACK + .scope(stack, async { session.close_turn_for_reason(reason).await }) + .await; + match result { + Ok(subagent_ids) => { + closed_turns += 1; + for subagent_id in subagent_ids { + closed_subagents.push((session_id.clone(), subagent_id)); + } + } + Err(error) if first_error.is_none() => first_error = Some(error), + Err(_) => {} + } + if !session.is_empty() { + retained_sessions.push((session_id, session)); + } + } + { + let mut sessions = inner.lock().await; + sessions.extend(retained_sessions); + } + if !closed_subagents.is_empty() { + let mut alignment_state = alignment.lock().await; + for (session_id, subagent_id) in closed_subagents { + alignment_state.clear_for_ended_subagent(&session_id, &subagent_id); + } + } + first_error.map_or(Ok(closed_turns), Err) +} + impl Session { // Constructs per-session runtime state without creating a scope yet. The root agent scope is // opened lazily on the first event or gateway LLM call so sessions created from hints and pure @@ -624,7 +751,12 @@ impl Session { agent_kind, session_id, scope_stack: create_scope_stack(), + session_started: false, + session_metadata: Value::Null, agent_scope: None, + turn_scope: None, + turn_index: 0, + last_turn_llm_output: None, subagents: HashMap::new(), subagent_stacks: HashMap::new(), subagent_stack: Vec::new(), @@ -635,6 +767,8 @@ impl Session { pending_tool_hints: Vec::new(), llm_request_affinity: HashMap::new(), last_llm_owner: None, + last_activity: Instant::now(), + active_gateway_calls: 0, config, } } @@ -647,7 +781,9 @@ impl Session { } fn is_empty(&self) -> bool { - self.agent_scope.is_none() + !self.session_started + && self.agent_scope.is_none() + && self.turn_scope.is_none() && self.subagents.is_empty() && self.subagent_stacks.is_empty() && self.subagent_stack.is_empty() @@ -655,16 +791,39 @@ impl Session { && self.tools.is_empty() } + fn touch_activity(&mut self) { + self.last_activity = Instant::now(); + } + + fn begin_gateway_call(&mut self) { + self.touch_activity(); + self.active_gateway_calls += 1; + } + + fn finish_gateway_call(&mut self) { + self.touch_activity(); + self.active_gateway_calls = self.active_gateway_calls.saturating_sub(1); + } + + fn is_idle_for(&self, now: Instant, timeout: Duration) -> bool { + self.turn_scope.is_some() + && self.active_gateway_calls == 0 + && self.llms.is_empty() + && self.tools.is_empty() + && now.duration_since(self.last_activity) >= timeout + } + // Runs one normalized hook event inside this session's scope stack. Dispatch stays synchronous // inside the scoped closure so lifecycle ordering from each hook request is preserved exactly. async fn apply(&mut self, event: NormalizedEvent) -> Result<(), CliError> { + self.touch_activity(); let stack = self.scope_stack.clone(); TASK_SCOPE_STACK .scope(stack, async move { match event { NormalizedEvent::AgentStarted(event) => self.start_agent(event), NormalizedEvent::AgentEnded(event) => self.end_agent(event).await, - NormalizedEvent::TurnEnded(_) => Ok(()), + NormalizedEvent::TurnEnded(event) => self.end_turn(event).await, NormalizedEvent::SubagentStarted(event) => self.start_subagent(event).await, NormalizedEvent::SubagentEnded(event) => self.end_subagent(event).await, NormalizedEvent::LlmHint(event) => self.add_llm_hint(event), @@ -672,7 +831,7 @@ impl Session { NormalizedEvent::LlmEnded(event) => self.end_hook_llm(event), NormalizedEvent::ToolStarted(event) => self.start_tool(event), NormalizedEvent::ToolEnded(event) => self.end_tool(event).await, - NormalizedEvent::PromptSubmitted(event) => self.mark("prompt_submitted", event), + NormalizedEvent::PromptSubmitted(event) => self.start_turn(event).await, NormalizedEvent::Compaction(event) => self.mark("compaction", event), NormalizedEvent::Notification(event) => self.mark("notification", event), NormalizedEvent::HookMark(event) => self.mark("hook_mark", event), @@ -685,10 +844,11 @@ impl Session { // `prepare_gateway_call` + managed execution. #[cfg(test)] async fn start_llm(&mut self, start: LlmGatewayStart) -> Result { + self.touch_activity(); let stack = self.scope_stack.clone(); TASK_SCOPE_STACK .scope(stack.clone(), async move { - self.ensure_agent_started(Value::Null)?; + self.ensure_turn_started(Value::Null)?; let mut attributes = LlmAttributes::empty(); if start.streaming { attributes |= LlmAttributes::STREAMING; @@ -740,10 +900,11 @@ impl Session { &mut self, start: LlmGatewayStart, ) -> Result { + self.begin_gateway_call(); let stack = self.scope_stack.clone(); - TASK_SCOPE_STACK - .scope(stack.clone(), async move { - self.ensure_agent_started(Value::Null)?; + let result = TASK_SCOPE_STACK + .scope(stack.clone(), async { + self.ensure_turn_started(Value::Null)?; let mut attributes = LlmAttributes::empty(); if start.streaming { attributes |= LlmAttributes::STREAMING; @@ -765,7 +926,7 @@ impl Session { owner.metadata, ); Ok(GatewayCallPrep { - scope_stack: stack, + scope_stack: stack.clone(), session_id: self.session_id.clone(), provider_name: start.provider, request: start.request, @@ -776,34 +937,37 @@ impl Session { owner_subagent_id: owner.subagent_id, }) }) - .await + .await; + if result.is_err() { + self.finish_gateway_call(); + } + result } - // Records an explicit top-level agent start. Repeated start hooks are idempotent because - // `ensure_agent_started` leaves an existing agent scope open and only updates agent kind first. + // Records a harness session start without assuming that every harness exposes a reliable + // session-length span. Some session ids can outlive user-visible work, so those harnesses store + // metadata here and wait for a bounded turn scope before emitting trace structure. fn start_agent(&mut self, event: SessionEvent) -> Result<(), CliError> { self.agent_kind = event.agent_kind; + self.session_started = true; + self.session_metadata = + merge_metadata(self.session_metadata.clone(), event.metadata.clone()); self.ensure_agent_started(event.metadata) } - // Lazily opens the root agent scope and merges metadata from config, event payload, and - // gateway headers. Later calls are no-ops to keep duplicate hooks from nesting agent scopes. + // Lazily opens the root agent scope for harnesses that have a meaningful session boundary. + // Harnesses without a reliable session end deliberately skip this and use bounded turn agent + // scopes as the top-level observable unit. fn ensure_agent_started(&mut self, event_metadata: Value) -> Result<(), CliError> { - if self.agent_scope.is_some() { + if self.agent_scope.is_some() + || !alignment::should_emit_session_agent_scope(self.agent_kind) + { return Ok(()); } let _root = get_handle()?; let metadata = merge_metadata( - merge_metadata( - self.config.metadata.clone().unwrap_or(Value::Null), - event_metadata, - ), - json!({ - "session_id": self.session_id, - "gateway_config_profile": self.config.profile, - "plugin_config": self.config.plugin_config, - "gateway_mode": self.config.gateway_mode, - }), + self.scope_metadata(event_metadata), + json!({ "nemo_flow_scope_role": "session" }), ); let scope = push_scope( PushScopeParams::builder() @@ -816,20 +980,121 @@ impl Session { Ok(()) } - // Closes the session in a fail-safe order: active LLMs/tools first, nested subagents from the - // top down, correlation state, then the root agent scope. + // Opens a new turn agent scope for a user prompt. If the previous turn never received a + // terminal hook, close it first so each user input gets a bounded reviewable trace segment. + async fn start_turn(&mut self, event: SessionEvent) -> Result<(), CliError> { + if alignment::aliased_turn_subagent_id(&event).is_some() { + self.ensure_turn_started(event.metadata.clone())?; + return self.mark("prompt_submitted", event); + } + if self.turn_scope.is_some() { + self.close_turn_for_reason("superseded_by_next_turn") + .await?; + } + self.open_turn(event.metadata, event.payload, "user_prompt") + } + + // Lazily creates an implicit turn when gateway/tool/LLM activity arrives before a prompt hook. + // This keeps direct gateway traffic and sparse hook streams bounded by the same lifecycle as + // prompt-driven turns. + fn ensure_turn_started(&mut self, event_metadata: Value) -> Result<(), CliError> { + if self.turn_scope.is_some() { + return Ok(()); + } + self.open_turn(event_metadata, Value::Null, "implicit") + } + + fn open_turn( + &mut self, + event_metadata: Value, + input: Value, + turn_source: &str, + ) -> Result<(), CliError> { + self.ensure_agent_started(event_metadata.clone())?; + self.turn_index += 1; + let metadata = merge_metadata( + self.scope_metadata(event_metadata), + json!({ + "nemo_flow_scope_role": "turn", + "turn_index": self.turn_index, + "turn_source": turn_source, + }), + ); + let turn_name = self.turn_scope_name(); + let scope = push_scope( + PushScopeParams::builder() + .name(turn_name.as_str()) + .scope_type(ScopeType::Agent) + .parent_opt(self.agent_scope.as_ref()) + .metadata(metadata) + .input(input) + .build(), + )?; + self.turn_scope = Some(scope); + self.last_turn_llm_output = None; + Ok(()) + } + + fn turn_scope_name(&self) -> String { + format!("{}-turn", self.agent_kind.as_str()) + } + + fn scope_metadata(&self, event_metadata: Value) -> Value { + merge_metadata( + merge_metadata( + merge_metadata( + self.config.metadata.clone().unwrap_or(Value::Null), + self.session_metadata.clone(), + ), + event_metadata, + ), + json!({ + "session_id": self.session_id, + "agent_kind": self.agent_kind.as_str(), + "gateway_config_profile": self.config.profile, + "plugin_config": self.config.plugin_config, + "gateway_mode": self.config.gateway_mode, + }), + ) + } + + async fn end_turn(&mut self, event: SessionEvent) -> Result<(), CliError> { + if let Some(subagent_id) = alignment::aliased_turn_subagent_id(&event) { + self.close_subagent_scope(&subagent_id, event.payload) + .await?; + return Ok(()); + } + self.close_turn(event.payload, "closed_by_turn_end").await?; + Ok(()) + } + + async fn close_turn_for_reason(&mut self, reason: &str) -> Result, CliError> { + self.close_turn(json!({ "status": reason }), reason).await + } + + async fn close_turn(&mut self, output: Value, reason: &str) -> Result, CliError> { + if self.turn_scope.is_none() { + return Ok(Vec::new()); + } + self.close_active_llms(reason)?; + self.close_active_tools(reason)?; + let closed_subagents = self.close_active_subagents(reason).await?; + let output = self.last_turn_llm_output.take().unwrap_or(output); + self.clear_correlation_state(); + self.close_turn_scope(output)?; + Ok(closed_subagents) + } + + // Closes the session in a fail-safe order: active turn first, then the root agent scope when + // the harness has one. Duplicate terminal hooks must not reopen scopes. async fn end_agent(&mut self, event: SessionEvent) -> Result<(), CliError> { - // Duplicate agent-end hooks (e.g., hermes-agent emitting `on_session_end` more than once - // per session) must not reopen the agent scope. - if self.agent_scope.is_none() { + if !self.session_started && self.agent_scope.is_none() && self.turn_scope.is_none() { return Ok(()); } - self.ensure_agent_started(event.metadata.clone())?; - self.close_active_llms_for_agent_end()?; - self.close_active_tools_for_agent_end()?; - self.close_active_subagents_for_agent_end().await?; + self.close_turn_for_reason("closed_by_agent_end").await?; self.clear_correlation_state(); self.close_agent_scope(event.payload)?; + self.session_started = false; Ok(()) } @@ -838,28 +1103,27 @@ impl Session { let payload = json!({ "status": reason }); TASK_SCOPE_STACK .scope(stack, async move { - if self.agent_scope.is_none() { + if self.agent_scope.is_none() && self.turn_scope.is_none() { return Ok(()); } - self.close_active_llms_for_agent_end()?; - self.close_active_tools_for_agent_end()?; - self.close_active_subagents_for_agent_end().await?; + self.close_turn_for_reason(reason).await?; self.clear_correlation_state(); self.close_agent_scope(payload)?; + self.session_started = false; Ok(()) }) .await } // Ends all active hook-observed LLM calls before closing their containing scopes. - fn close_active_llms_for_agent_end(&mut self) -> Result<(), CliError> { + fn close_active_llms(&mut self, reason: &str) -> Result<(), CliError> { let active_llms: Vec<_> = self.llms.drain().map(|(_, handle)| handle).collect(); for handle in active_llms { llm_call_end( LlmCallEndParams::builder() .handle(&handle) - .response(json!({ "status": "closed_by_agent_end" })) - .metadata(json!({ "status": "closed_by_agent_end" })) + .response(json!({ "status": reason })) + .metadata(json!({ "status": reason })) .build(), )?; } @@ -868,14 +1132,14 @@ impl Session { // Ends all active tool calls with a synthetic close result before ending their containing scopes. // Draining first avoids holding mutable map state while the runtime emits lifecycle events. - fn close_active_tools_for_agent_end(&mut self) -> Result<(), CliError> { + fn close_active_tools(&mut self, reason: &str) -> Result<(), CliError> { let active_tools: Vec<_> = self.tools.drain().map(|(_, handle)| handle).collect(); for handle in active_tools { tool_call_end( ToolCallEndParams::builder() .handle(&handle) - .result(json!({ "status": "closed_by_agent_end" })) - .metadata(json!({ "status": "closed_by_agent_end" })) + .result(json!({ "status": reason })) + .metadata(json!({ "status": reason })) .build(), )?; } @@ -885,17 +1149,19 @@ impl Session { // Pops active subagent scopes in reverse start order. Each subagent owns an independent runtime // stack so parallel harness workers can still close cleanly when their completion hooks arrive // out of order. Applies to Claude Code Agent workers and Codex child threads today. - async fn close_active_subagents_for_agent_end(&mut self) -> Result<(), CliError> { + async fn close_active_subagents(&mut self, reason: &str) -> Result, CliError> { + let mut closed = Vec::new(); while let Some(subagent_id) = self.subagent_stack.pop() { - self.close_subagent_scope(&subagent_id, json!({ "status": "closed_by_agent_end" })) + self.close_subagent_scope(&subagent_id, json!({ "status": reason })) .await?; + closed.push(subagent_id); } self.subagents.clear(); self.subagent_stacks.clear(); - Ok(()) + Ok(closed) } - // Clears sticky LLM/tool ownership hints that should not survive an agent root shutdown. + // Clears sticky LLM/tool ownership hints that should not survive a turn boundary. fn clear_correlation_state(&mut self) { self.pending_llm_hints.clear(); self.pending_tool_hints.clear(); @@ -918,33 +1184,59 @@ impl Session { Ok(()) } - // Starts a subagent scope under the root agent scope. Duplicate subagent starts are ignored so + fn close_turn_scope(&mut self, output: Value) -> Result<(), CliError> { + let Some(scope) = self.turn_scope.take() else { + return Ok(()); + }; + pop_scope( + PopScopeParams::builder() + .handle_uuid(&scope.uuid) + .output(output) + .build(), + )?; + Ok(()) + } + + fn root_work_scope(&self) -> Option { + self.turn_scope.clone().or_else(|| self.agent_scope.clone()) + } + + // Starts a subagent agent scope under the active turn. Duplicate subagent starts are ignored so // integrations that retry or emit both "start" and "created" style hooks do not double-nest. // - // Subagents get their own runtime stack seeded with the root agent handle. That keeps Phoenix - // parentage sibling-shaped while still allowing parallel workers to end out of order. + // Subagents get their own runtime stack seeded with the turn parent. That keeps Phoenix + // parentage sibling-shaped within a turn while still allowing parallel workers to end out of + // order. async fn start_subagent(&mut self, event: SubagentEvent) -> Result<(), CliError> { - self.ensure_agent_started(event.metadata.clone())?; + self.ensure_turn_started(event.metadata.clone())?; if self.subagents.contains_key(&event.subagent_id) { return Ok(()); } let has_parallel_sibling = !self.subagents.is_empty(); - let agent_scope = self - .agent_scope + let parent_scope = self + .turn_scope .clone() - .expect("ensure_agent_started should initialize the agent scope"); + .expect("ensure_turn_started should initialize the turn scope"); + let agent_scope = self.agent_scope.clone(); let subagent_id = event.subagent_id; let subagent_name = format!("subagent:{subagent_id}"); + let metadata = merge_metadata( + event.metadata, + json!({ "nemo_flow_scope_role": "subagent" }), + ); let subagent_stack = create_scope_stack(); let scope = TASK_SCOPE_STACK .scope(subagent_stack.clone(), async { - task_scope_push(agent_scope.clone()); + if let Some(agent_scope) = agent_scope { + task_scope_push(agent_scope); + } + task_scope_push(parent_scope.clone()); push_scope( PushScopeParams::builder() .name(subagent_name.as_str()) .scope_type(ScopeType::Agent) - .parent(&agent_scope) - .metadata(event.metadata) + .parent(&parent_scope) + .metadata(metadata) .input(event.payload) .build(), ) @@ -966,10 +1258,10 @@ impl Session { // subagent already closed by another provider-specific completion signal are ignored. Applies // to Claude Code Agent-tool completion today. async fn end_subagent(&mut self, event: SubagentEvent) -> Result<(), CliError> { - self.ensure_agent_started(event.metadata.clone())?; if self.completed_subagents.contains(&event.subagent_id) { return Ok(()); } + self.ensure_turn_started(event.metadata.clone())?; if !self.subagents.contains_key(&event.subagent_id) { eprintln!( "nemo-flow CLI gateway: received {} for subagent {} without a matching start", @@ -1036,7 +1328,7 @@ impl Session { // Stores an LLM correlation hint from hook activity after pruning expired hints. Hints do not // emit runtime events themselves; they are consumed by the next matching gateway LLM call. fn add_llm_hint(&mut self, event: LlmHintEvent) -> Result<(), CliError> { - self.ensure_agent_started(event.metadata.clone())?; + self.ensure_turn_started(event.metadata.clone())?; self.cleanup_correlation_state(); let owner_subagent_id = event.subagent_id.clone().or_else(|| event.agent_id.clone()); self.add_tool_hints_from_llm_response(event.payload.clone(), owner_subagent_id); @@ -1052,7 +1344,7 @@ impl Session { // child-session LLMs carry their subagent owner in metadata and are resolved by // `hook_llm_owner`. fn start_hook_llm(&mut self, event: LlmEvent) -> Result<(), CliError> { - self.ensure_agent_started(event.metadata.clone())?; + self.ensure_turn_started(event.metadata.clone())?; if self.llms.contains_key(&event.api_call_id) { return Ok(()); } @@ -1078,7 +1370,7 @@ impl Session { // alias metadata recovery used by `start_hook_llm` keeps post-only aliased child LLMs under the // subagent instead of falling back to the root agent. fn end_hook_llm(&mut self, event: LlmEvent) -> Result<(), CliError> { - self.ensure_agent_started(event.metadata.clone())?; + self.ensure_turn_started(event.metadata.clone())?; let (parent, metadata) = self.hook_llm_owner(event.metadata); let handle = match self.llms.remove(&event.api_call_id) { Some(handle) => handle, @@ -1096,10 +1388,16 @@ impl Session { .build(), )?, }; + let output = event.response; + let root_owned = + json_string_at(&metadata, &[&["llm_correlation_subagent_id"][..]]).is_none(); + if root_owned { + self.record_turn_llm_output(output.clone()); + } llm_call_end( LlmCallEndParams::builder() .handle(&handle) - .response(event.response) + .response(output) .metadata(metadata) .build(), )?; @@ -1113,10 +1411,10 @@ impl Session { fn hook_llm_owner(&mut self, metadata: Value) -> (Option, Value) { let Some(subagent_id) = json_string_at(&metadata, &[&["llm_correlation_subagent_id"][..]]) else { - return (self.agent_scope.clone(), metadata); + return (self.root_work_scope(), metadata); }; let Some(scope) = self.subagents.get(&subagent_id).cloned() else { - return (self.agent_scope.clone(), metadata); + return (self.root_work_scope(), metadata); }; self.set_last_llm_owner(Some(subagent_id.clone())); ( @@ -1125,11 +1423,11 @@ impl Session { ) } - // Starts a tool call under an explicit subagent when available, otherwise under the agent + // Starts a tool call under an explicit subagent when available, otherwise under the turn // scope. Duplicate tool IDs are ignored so repeated pre-tool hooks do not create parallel // handles for one agent tool invocation. fn start_tool(&mut self, event: ToolEvent) -> Result<(), CliError> { - self.ensure_agent_started(event.metadata.clone())?; + self.ensure_turn_started(event.metadata.clone())?; if self.tools.contains_key(&event.tool_call_id) { return Ok(()); } @@ -1167,7 +1465,7 @@ impl Session { // Ends a tool call, synthesizing a start if no matching handle exists. This keeps post-only // hooks observable and preserves the final result/status instead of dropping orphaned endings. async fn end_tool(&mut self, event: ToolEvent) -> Result<(), CliError> { - self.ensure_agent_started(event.metadata.clone())?; + self.ensure_turn_started(event.metadata.clone())?; let completed_agent_subagent_id = alignment::completed_subagent_from_tool(&event); let explicit_subagent_id = event .subagent_id @@ -1223,10 +1521,10 @@ impl Session { Ok(()) } - // Emits a mark event after ensuring the agent scope exists. Generic and unknown hooks use this + // Emits a mark event after ensuring the turn scope exists. Generic and unknown hooks use this // path so unsupported agent events remain visible without changing scope structure. fn mark(&mut self, name: &str, event_payload: SessionEvent) -> Result<(), CliError> { - self.ensure_agent_started(event_payload.metadata.clone())?; + self.ensure_turn_started(event_payload.metadata.clone())?; emit_mark_event( EmitMarkEventParams::builder() .name(name) @@ -1366,7 +1664,7 @@ impl Session { } // Assigns an unhinted gateway call to the only active subagent. Multiple active subagents are - // deliberately not guessed here; those cases fall back to the agent scope with ambiguity + // deliberately not guessed here; those cases fall back to the turn scope with ambiguity // metadata. fn sole_subagent_owner(&mut self) -> Option { if self.subagents.len() == 1 @@ -1392,7 +1690,7 @@ impl Session { // left intact in ambiguous cases so later calls with stronger identifiers can still match them. fn fallback_llm_owner(&self) -> LlmOwnerResolution { LlmOwnerResolution { - parent: self.agent_scope.clone(), + parent: self.root_work_scope(), subagent_id: None, status: if self.pending_llm_hints.is_empty() { "agent_fallback" @@ -1405,9 +1703,9 @@ impl Session { } } - // Converts a consumed hint into an ownership resolution. If the hinted subagent is not currently - // active, the LLM is attached to the agent scope but the hint metadata is still preserved for - // correlation diagnostics. + // Converts a consumed hint into an ownership resolution. If the hinted subagent is not + // currently active, the LLM is attached to the turn scope but the hint metadata is still + // preserved for correlation diagnostics. fn resolution_from_hint( &mut self, hint: LlmHintEvent, @@ -1421,9 +1719,9 @@ impl Session { Some(id.to_string()), self.subagent_llm_metadata(id), ), - None => (self.agent_scope.clone(), None, Value::Null), + None => (self.root_work_scope(), None, Value::Null), }, - None => (self.agent_scope.clone(), None, Value::Null), + None => (self.root_work_scope(), None, Value::Null), }; if parent.is_some() { self.set_last_llm_owner(subagent_id.clone()); @@ -1548,8 +1846,29 @@ impl Session { })); } + // Remembers the latest completed LLM response owned by the turn/root agent so the enclosing + // turn agent scope can export the final assistant output. Subagent-owned responses are + // deliberately excluded; otherwise a worker's last local answer can overwrite the parent + // agent's final synthesis. + fn record_completed_llm_response( + &mut self, + response: Value, + owner_subagent_id: Option, + ) { + if owner_subagent_id.is_none() { + self.record_turn_llm_output(response.clone()); + } + self.add_tool_hints_from_llm_response(response, owner_subagent_id); + } + + fn record_turn_llm_output(&mut self, response: Value) { + if self.turn_scope.is_some() { + self.last_turn_llm_output = Some(response); + } + } + // Resolves tool hook ownership from explicit subagent data first, then private tool hints - // extracted from LLM responses, and finally the agent scope. + // extracted from LLM responses, and finally the turn scope. fn resolve_tool_owner(&mut self, event: &ToolEvent) -> ToolOwnerResolution { self.cleanup_correlation_state(); @@ -1566,18 +1885,18 @@ impl Session { }; } - if self.pending_tool_hints.len() == 1 { - let hint = self.pending_tool_hints.remove(0).hint; - return self.tool_resolution_from_hint(hint, "single_hint"); - } - if let Some(index) = self.matching_tool_hint_index(event) { + let status = if self.pending_tool_hints.len() == 1 { + "single_hint" + } else { + "matched_hint" + }; let hint = self.pending_tool_hints.remove(index).hint; - return self.tool_resolution_from_hint(hint, "matched_hint"); + return self.tool_resolution_from_hint(hint, status); } ToolOwnerResolution { - parent: self.agent_scope.clone(), + parent: self.root_work_scope(), subagent_id: None, status: if self.pending_tool_hints.is_empty() { "agent_fallback" @@ -1589,7 +1908,7 @@ impl Session { } } - // Converts a consumed tool hint into a live parent scope, falling back to the root agent if the + // Converts a consumed tool hint into a live parent scope, falling back to the turn scope if the // hinted subagent has already ended or never existed. fn tool_resolution_from_hint( &mut self, @@ -1599,9 +1918,9 @@ impl Session { let (parent, subagent_id) = match hint.subagent_id.as_deref() { Some(id) => match self.subagents.get(id).cloned() { Some(scope) => (Some(scope), Some(id.to_string())), - None => (self.agent_scope.clone(), None), + None => (self.root_work_scope(), None), }, - None => (self.agent_scope.clone(), None), + None => (self.root_work_scope(), None), }; ToolOwnerResolution { parent, @@ -1619,8 +1938,9 @@ impl Session { } } - // Finds a unique best-scoring tool hint by call id, name, and argument equality. Ties remain - // ambiguous and are not consumed. + // Finds a unique best-scoring tool hint by call id or name-plus-argument equality. Ties remain + // ambiguous and are not consumed. Name-only matches are ignored because high-frequency + // coding-agent tools repeat across parallel workers and are too weak to prove ownership. fn matching_tool_hint_index(&self, event: &ToolEvent) -> Option { let matches: Vec<_> = self .pending_tool_hints @@ -1775,8 +2095,9 @@ fn collect_anthropic_tool_hints( } } -// Appends one provider tool hint when an object carries at least a tool-call id or tool name. -// Argument-only hints are intentionally skipped because they over-match across unrelated tools. +// Appends one provider tool hint when an object carries either a tool-call id or enough +// name-plus-argument data to disambiguate common tool names. Name-only and argument-only hints are +// skipped because they over-match across unrelated tools in parallel coding-agent sessions. fn push_tool_hint( hints: &mut Vec, object: &Value, @@ -1788,37 +2109,46 @@ fn push_tool_hint( ) { let tool_call_id = json_string_at(object, id_paths); let tool_name = json_string_at(object, name_paths); - if tool_call_id.is_none() && tool_name.is_none() { + let arguments = json_value_at(object, argument_paths) + .map(normalize_tool_arguments) + .unwrap_or(Value::Null); + if tool_call_id.is_none() && (tool_name.is_none() || arguments.is_null()) { return; } hints.push(ToolHint { tool_call_id, tool_name, subagent_id: owner_subagent_id.map(ToOwned::to_owned), - arguments: json_value_at(object, argument_paths) - .map(normalize_tool_arguments) - .unwrap_or(Value::Null), + arguments, source: source.to_string(), }); } -// Scores how strongly a pending provider tool hint matches an observed hook event. Tool-call id is -// strongest, tool name is secondary, and exact argument equality is only a tie breaker. +// Scores how strongly a pending provider tool hint matches an observed hook event. A shared +// provider call id is strongest. Without an id match, require both tool name and exact arguments so +// repeated coding-agent tool names cannot claim unrelated hooks. fn tool_hint_match_score(hint: &ToolHint, event: &ToolEvent) -> u8 { let mut score = 0; - if same_optional( + let id_matches = same_optional( hint.tool_call_id.as_deref(), Some(event.tool_call_id.as_str()), - ) { + ); + let name_matches = same_optional(hint.tool_name.as_deref(), Some(event.tool_name.as_str())); + let arguments_match = !hint.arguments.is_null() + && !event.arguments.is_null() + && hint.arguments == event.arguments; + if id_matches { score += 12; } - if same_optional(hint.tool_name.as_deref(), Some(event.tool_name.as_str())) { + if id_matches && name_matches { score += 4; } - if !hint.arguments.is_null() && !event.arguments.is_null() && hint.arguments == event.arguments - { + if id_matches && arguments_match { score += 1; } + if !id_matches && name_matches && arguments_match { + score += 5; + } score } diff --git a/crates/cli/tests/coverage/adapters_tests.rs b/crates/cli/tests/coverage/adapters_tests.rs index 507c26f1..fac410db 100644 --- a/crates/cli/tests/coverage/adapters_tests.rs +++ b/crates/cli/tests/coverage/adapters_tests.rs @@ -619,7 +619,7 @@ fn normalizes_mark_style_events_and_header_session_ids() { &headers, ); let (session_id, metadata) = match &outcome.events[0] { - NormalizedEvent::LlmHint(event) if expected == "prompt" => { + NormalizedEvent::PromptSubmitted(event) if expected == "prompt" => { (event.session_id.as_str(), &event.metadata) } NormalizedEvent::LlmHint(event) if expected == "response" => { @@ -636,6 +636,12 @@ fn normalizes_mark_style_events_and_header_session_ids() { } event => panic!("unexpected event for {event_name}: {event:?}"), }; + if expected == "prompt" { + assert!( + matches!(outcome.events.get(1), Some(NormalizedEvent::LlmHint(_))), + "prompt hooks should also emit a private LLM hint" + ); + } assert_eq!(session_id, "header-session"); assert_eq!(metadata["model"], json!("model-a")); assert_eq!(metadata["cwd"], json!("/repo")); diff --git a/crates/cli/tests/coverage/alignment_claude_code_tests.rs b/crates/cli/tests/coverage/alignment_claude_code_tests.rs index e92a63a4..be590a99 100644 --- a/crates/cli/tests/coverage/alignment_claude_code_tests.rs +++ b/crates/cli/tests/coverage/alignment_claude_code_tests.rs @@ -52,7 +52,11 @@ fn completed_subagent_from_agent_tool_accepts_known_result_keys() { ("subagentId", "subagent-id"), ("subagent_id", "subagent-id-snake"), ] { - let event = tool_event(AgentKind::ClaudeCode, "Agent", json!({ key: expected })); + let event = tool_event( + AgentKind::ClaudeCode, + "Agent", + json!({ key: expected, "status": "completed" }), + ); assert_eq!( completed_subagent_from_agent_tool(&event).as_deref(), Some(expected), @@ -61,6 +65,49 @@ fn completed_subagent_from_agent_tool_accepts_known_result_keys() { } } +#[test] +fn completed_subagent_from_agent_tool_rejects_async_launch_results() { + for status in ["async_launched", "started", "running", "in-progress"] { + let event = tool_event( + AgentKind::ClaudeCode, + "Agent", + json!({ + "agentId": "worker", + "status": status + }), + ); + assert_eq!( + completed_subagent_from_agent_tool(&event), + None, + "status {status} is not a terminal worker result" + ); + } +} + +#[test] +fn completed_subagent_from_agent_tool_requires_terminal_result_evidence() { + assert_eq!( + completed_subagent_from_agent_tool(&tool_event( + AgentKind::ClaudeCode, + "Agent", + json!({ "agentId": "worker" }), + )), + None + ); + assert_eq!( + completed_subagent_from_agent_tool(&tool_event( + AgentKind::ClaudeCode, + "Agent", + json!({ + "agentId": "worker", + "totalDurationMs": 123 + }), + )) + .as_deref(), + Some("worker") + ); +} + #[test] fn completed_subagent_from_agent_tool_rejects_unrelated_tools_and_agents() { assert_eq!( diff --git a/crates/cli/tests/coverage/alignment_tests.rs b/crates/cli/tests/coverage/alignment_tests.rs index fa21268e..acf8e5d6 100644 --- a/crates/cli/tests/coverage/alignment_tests.rs +++ b/crates/cli/tests/coverage/alignment_tests.rs @@ -183,6 +183,13 @@ fn agent_kind_inference_covers_known_provider_names() { ); } +#[test] +fn session_agent_scope_policy_skips_unbounded_coding_agent_sessions() { + assert!(!should_emit_session_agent_scope(AgentKind::ClaudeCode)); + assert!(!should_emit_session_agent_scope(AgentKind::Codex)); + assert!(should_emit_session_agent_scope(AgentKind::Gateway)); +} + #[test] fn request_affinity_key_reads_messages_content_blocks() { let request = LlmRequest { @@ -224,6 +231,100 @@ fn request_affinity_key_reads_chat_completion_string_messages() { ); } +#[test] +fn request_affinity_key_preserves_leading_tagged_context_text() { + let request = LlmRequest { + headers: Map::new(), + content: json!({ + "messages": [ + { + "role": "user", + "content": "\nTrace run 7.\n\n\nToday is 2026-05-19.\n\n\nReview the gateway correlation logic." + } + ] + }), + }; + + assert_eq!( + request_affinity_key(&request).as_deref(), + Some( + " Trace run 7. Today is 2026-05-19. Review the gateway correlation logic." + ) + ); +} + +#[test] +fn request_affinity_key_keeps_task_after_large_prefix() { + let prefix = "volatile context ".repeat(80); + let task = "Review the gateway correlation logic."; + let request = LlmRequest { + headers: Map::new(), + content: json!({ + "messages": [ + { + "role": "user", + "content": format!("{prefix} {task}") + } + ] + }), + }; + + let key = request_affinity_key(&request).unwrap(); + assert!(key.starts_with("volatile context")); + assert!( + key.ends_with(task), + "larger affinity prefixes should preserve the task text after volatile context" + ); +} + +#[test] +fn request_affinity_key_preserves_fully_tagged_prompt_text() { + let request = LlmRequest { + headers: Map::new(), + content: json!({ + "messages": [ + { + "role": "user", + "content": "Review the gateway correlation logic." + } + ] + }), + }; + + assert_eq!( + request_affinity_key(&request).as_deref(), + Some("Review the gateway correlation logic.") + ); +} + +#[test] +fn request_affinity_key_prefers_latest_task_message_over_root_history() { + let request = LlmRequest { + headers: Map::new(), + content: json!({ + "messages": [ + { + "role": "user", + "content": "Can you analyze the whole codebase with parallel subagents?" + }, + { + "role": "assistant", + "content": "I will delegate the directory reviews." + }, + { + "role": "user", + "content": "Thoroughly explore the crates/ffi directory." + } + ] + }), + }; + + assert_eq!( + request_affinity_key(&request).as_deref(), + Some("Thoroughly explore the crates/ffi directory.") + ); +} + #[test] fn request_affinity_key_reads_responses_input_items_and_prompt() { let responses_request = LlmRequest { @@ -269,6 +370,35 @@ fn request_affinity_key_ignores_count_token_style_payloads() { assert_eq!(request_affinity_key(&request), None); } +#[test] +fn request_affinity_key_ignores_tool_results_and_sidecar_json() { + let tool_result = LlmRequest { + headers: Map::new(), + content: json!({ + "messages": [ + { + "role": "user", + "content": [ + { + "type": "tool_result", + "content": "// SPDX-FileCopyrightText: Copyright (c) 2026\npub fn source() {}" + } + ] + } + ] + }), + }; + let sidecar_json = LlmRequest { + headers: Map::new(), + content: json!({ + "input": "{\"parentUuid\":\"scope\",\"childUuid\":\"scope\"}" + }), + }; + + assert_eq!(request_affinity_key(&tool_result), None); + assert_eq!(request_affinity_key(&sidecar_json), None); +} + #[test] fn route_event_through_alias_covers_all_event_variants() { let aliases = aliases(); @@ -290,7 +420,10 @@ fn route_event_through_alias_covers_all_event_variants() { ]; for event in cases { - let was_agent_end = matches!(event, NormalizedEvent::AgentEnded(_)); + let closes_alias = matches!( + event, + NormalizedEvent::AgentEnded(_) | NormalizedEvent::TurnEnded(_) + ); let (event, finished_alias) = route_event_through_alias(event, &aliases); assert_eq!(event.session_id(), "parent"); assert_eq!( @@ -298,7 +431,7 @@ fn route_event_through_alias_covers_all_event_variants() { json!(true), "alias metadata should be stamped on {event:?}" ); - assert_eq!(finished_alias.as_deref(), was_agent_end.then_some("child")); + assert_eq!(finished_alias.as_deref(), closes_alias.then_some("child")); match event { NormalizedEvent::AgentStarted(event) => panic!("unexpected agent start: {event:?}"), diff --git a/crates/cli/tests/coverage/gateway_tests.rs b/crates/cli/tests/coverage/gateway_tests.rs index 08a8c4d5..919ae902 100644 --- a/crates/cli/tests/coverage/gateway_tests.rs +++ b/crates/cli/tests/coverage/gateway_tests.rs @@ -5,7 +5,7 @@ use super::*; use crate::alignment::GatewayRouteKind; use crate::config::GatewayConfig; use crate::server::AppState; -use crate::session::SessionManager; +use crate::session::{LlmGatewayStart, SessionManager}; use axum::body::Body; use axum::extract::State; use axum::http::{HeaderMap, HeaderValue, Method, Request, StatusCode}; @@ -666,7 +666,136 @@ fn response_headers_preserve_duplicates() { assert_eq!(copied.get_all("set-cookie").iter().count(), 2); } -// `stream_response_records_preview_and_truncation` and `streaming_llm_guard_closes_on_drop` were -// removed when the gateway moved to `llm_stream_call_execute`. The runtime now owns stream-end -// lifecycle (start/end events emitted by `LlmStreamWrapper`); core tests cover that contract, -// and the gateway no longer carries a stream preview/truncation helper or a separate guard struct. +#[tokio::test] +async fn streaming_gateway_call_guard_finishes_when_body_is_dropped() { + let manager = SessionManager::new(GatewayConfig::default()); + let prep = manager + .prepare_gateway_call( + &HeaderMap::new(), + LlmGatewayStart { + session_id: Some("stream-drop".into()), + provider: "openai.responses".into(), + model_name: Some("gpt-test".into()), + subagent_id: None, + conversation_id: None, + generation_id: None, + request_id: None, + request: LlmRequest { + headers: Map::new(), + content: json!({ + "input": "Analyze enough text to create a stable idle-timeout test." + }), + }, + streaming: true, + metadata: json!({}), + }, + ) + .await + .unwrap(); + + let stream: LlmJsonStream = Box::pin(futures_util::stream::pending::< + std::result::Result, + >()); + let body = client_sse_body( + stream, + ProviderRoute::OpenAiResponses, + manager.clone(), + prep.session_id, + prep.owner_subagent_id, + Arc::new(Mutex::new(None)), + ); + + drop(body); + tokio::task::yield_now().await; + + let closed = manager + .close_idle_sessions_at( + std::time::Instant::now() + std::time::Duration::from_secs(1), + std::time::Duration::from_millis(1), + "idle_timeout", + ) + .await + .unwrap(); + assert_eq!(closed, 1); +} + +#[tokio::test] +async fn streaming_body_records_final_response_for_turn_output() { + let subscriber_name = "gateway-stream-final-response-turn-output-test"; + let _ = nemo_flow::api::subscriber::deregister_subscriber(subscriber_name); + let captured_output = Arc::new(Mutex::new(None::)); + let captured = captured_output.clone(); + nemo_flow::api::subscriber::register_subscriber( + subscriber_name, + Arc::new(move |event| { + if event.scope_category() == Some(nemo_flow::api::event::ScopeCategory::End) + && event.name() == "codex-turn" + && event + .metadata() + .and_then(|metadata| metadata.get("session_id")) + .and_then(Value::as_str) + == Some("stream-final") + { + *captured.lock().unwrap() = event.output().cloned(); + } + }), + ) + .unwrap(); + + let manager = SessionManager::new(GatewayConfig::default()); + let prep = manager + .prepare_gateway_call( + &HeaderMap::new(), + LlmGatewayStart { + session_id: Some("stream-final".into()), + provider: "openai.responses".into(), + model_name: Some("gpt-test".into()), + subagent_id: None, + conversation_id: None, + generation_id: None, + request_id: None, + request: LlmRequest { + headers: Map::new(), + content: json!({ + "input": "Stream enough text to create a final response." + }), + }, + streaming: true, + metadata: json!({}), + }, + ) + .await + .unwrap(); + let session_id = prep.session_id.clone(); + let owner_subagent_id = prep.owner_subagent_id.clone(); + let final_response = json!({ "output_text": "streamed final" }); + let stream: LlmJsonStream = Box::pin(futures_util::stream::empty::< + std::result::Result, + >()); + let body = client_sse_body( + stream, + ProviderRoute::OpenAiResponses, + manager.clone(), + session_id, + owner_subagent_id, + Arc::new(Mutex::new(Some(final_response.clone()))), + ); + let _ = body.collect().await.unwrap(); + + manager + .close_idle_sessions_at( + std::time::Instant::now() + std::time::Duration::from_secs(1), + std::time::Duration::from_millis(1), + "idle_timeout", + ) + .await + .unwrap(); + + assert_eq!(*captured_output.lock().unwrap(), Some(final_response)); + nemo_flow::api::subscriber::deregister_subscriber(subscriber_name).unwrap(); +} + +// `stream_response_records_preview_and_truncation` was removed when the gateway moved to +// `llm_stream_call_execute`. The runtime now owns stream-end lifecycle (start/end events emitted +// by `LlmStreamWrapper`); core tests cover that contract, and the gateway no longer carries a +// stream preview/truncation helper. diff --git a/crates/cli/tests/coverage/server_tests.rs b/crates/cli/tests/coverage/server_tests.rs index a2a8c142..b4d47c43 100644 --- a/crates/cli/tests/coverage/server_tests.rs +++ b/crates/cli/tests/coverage/server_tests.rs @@ -273,7 +273,8 @@ async fn serve_listener_observability_plugin_records_non_hermes_hooks() { "SessionEnd", ), ] { - for hook_event_name in [start_event, end_event] { + let hook_events = vec![start_event, "UserPromptSubmit", end_event]; + for hook_event_name in hook_events { let response = client .post(format!("{url}{path}")) .json(&json!({ @@ -302,8 +303,9 @@ async fn serve_listener_observability_plugin_records_non_hermes_hooks() { }) .filter_map(|event| event["name"].as_str().map(ToOwned::to_owned)) .collect::>(); - assert!(agent_starts.contains(&"codex".to_string())); - assert!(agent_starts.contains(&"claude-code".to_string())); + assert!(agent_starts.contains(&"codex-turn".to_string())); + assert!(agent_starts.contains(&"claude-code-turn".to_string())); + assert!(!agent_starts.contains(&"claude-code".to_string())); } #[tokio::test] diff --git a/crates/cli/tests/coverage/session_tests.rs b/crates/cli/tests/coverage/session_tests.rs index 6fa76fba..2a218472 100644 --- a/crates/cli/tests/coverage/session_tests.rs +++ b/crates/cli/tests/coverage/session_tests.rs @@ -66,6 +66,17 @@ fn event_has_session_id(event: &Value, session_id: &str) -> bool { || event["data"]["extra"]["session_id"] == json!(session_id) } +fn active_turn_uuid(session: &Session) -> uuid::Uuid { + active_turn_scope(session).uuid +} + +fn active_turn_scope(session: &Session) -> &ScopeHandle { + session + .turn_scope + .as_ref() + .expect("expected active turn scope") +} + async fn alignment_alias(manager: &SessionManager, session_id: &str) -> Option { manager.alignment.lock().await.alias_for_session(session_id) } @@ -157,7 +168,7 @@ async fn nests_agent_subagent_and_tool_lifecycle() { } #[tokio::test] -async fn parallel_subagents_are_siblings_under_agent_scope() { +async fn parallel_subagents_are_siblings_under_turn_scope() { let manager = SessionManager::new(session_test_config()); manager .apply_events( @@ -187,17 +198,180 @@ async fn parallel_subagents_are_siblings_under_agent_scope() { let sessions = manager.inner.lock().await; let session = sessions.get("sibling-subagents").unwrap(); - let agent_uuid = session.agent_scope.as_ref().unwrap().uuid; + assert!(session.agent_scope.is_none()); + let turn_uuid = active_turn_uuid(session); + assert_eq!( + session.subagents.get("worker-1").unwrap().scope_type, + ScopeType::Agent + ); + assert_eq!( + session + .subagents + .get("worker-1") + .unwrap() + .metadata + .as_ref() + .unwrap()["nemo_flow_scope_role"], + json!("subagent") + ); assert_eq!( session.subagents.get("worker-1").unwrap().parent_uuid, - Some(agent_uuid) + Some(turn_uuid) ); assert_eq!( session.subagents.get("worker-2").unwrap().parent_uuid, - Some(agent_uuid) + Some(turn_uuid) ); } +#[tokio::test] +async fn codex_turn_is_agent_scope_with_turn_role_metadata() { + let manager = SessionManager::new(session_test_config()); + manager + .apply_events( + &HeaderMap::new(), + vec![ + NormalizedEvent::AgentStarted(codex_session_event( + "codex-turn-agent", + "SessionStart", + json!({ "transcript_path": "/tmp/session.jsonl" }), + )), + NormalizedEvent::PromptSubmitted(SessionEvent { + session_id: "codex-turn-agent".into(), + agent_kind: AgentKind::Codex, + event_name: "UserPromptSubmit".into(), + payload: json!({ "prompt": "inspect the repo" }), + metadata: json!({}), + }), + ], + ) + .await + .unwrap(); + + let sessions = manager.inner.lock().await; + let session = sessions.get("codex-turn-agent").unwrap(); + assert!(session.agent_scope.is_none()); + let turn = active_turn_scope(session); + assert_eq!(turn.name, "codex-turn"); + assert_eq!(turn.scope_type, ScopeType::Agent); + assert_eq!( + turn.metadata.as_ref().unwrap()["nemo_flow_scope_role"], + json!("turn") + ); +} + +#[tokio::test] +async fn turn_output_uses_last_root_owned_llm_response() { + let subscriber_name = "cli-turn-output-root-llm-test"; + let _ = deregister_subscriber(subscriber_name); + let captured_output = Arc::new(StdMutex::new(None::)); + let captured = captured_output.clone(); + register_subscriber( + subscriber_name, + Arc::new(move |event| { + if event.scope_category() == Some(ScopeCategory::End) + && event.name() == "claude-code-turn" + && event + .metadata() + .and_then(|metadata| metadata.get("session_id")) + .and_then(Value::as_str) + == Some("turn-output") + { + *captured.lock().unwrap() = event.output().cloned(); + } + }), + ) + .unwrap(); + + let manager = SessionManager::new(session_test_config()); + manager + .apply_events( + &HeaderMap::new(), + vec![ + NormalizedEvent::AgentStarted(session_event("turn-output", "SessionStart")), + NormalizedEvent::PromptSubmitted(SessionEvent { + session_id: "turn-output".into(), + agent_kind: AgentKind::ClaudeCode, + event_name: "UserPromptSubmit".into(), + payload: json!({ "prompt": "summarize" }), + metadata: json!({}), + }), + NormalizedEvent::SubagentStarted(SubagentEvent { + session_id: "turn-output".into(), + agent_kind: AgentKind::ClaudeCode, + event_name: "SubagentStart".into(), + subagent_id: "worker".into(), + payload: json!({}), + metadata: json!({}), + }), + ], + ) + .await + .unwrap(); + + let worker_llm = manager + .start_llm( + &HeaderMap::new(), + LlmGatewayStart { + session_id: Some("turn-output".into()), + ..llm_start() + }, + ) + .await + .unwrap(); + manager + .end_llm( + worker_llm, + json!({ "output_text": "worker answer" }), + json!({}), + ) + .await + .unwrap(); + manager + .apply_events( + &HeaderMap::new(), + vec![NormalizedEvent::SubagentEnded(SubagentEvent { + session_id: "turn-output".into(), + agent_kind: AgentKind::ClaudeCode, + event_name: "SubagentStop".into(), + subagent_id: "worker".into(), + payload: json!({ "done": true }), + metadata: json!({}), + })], + ) + .await + .unwrap(); + + let final_response = json!({ "output_text": "final answer" }); + let root_llm = manager + .start_llm( + &HeaderMap::new(), + LlmGatewayStart { + session_id: Some("turn-output".into()), + ..llm_start() + }, + ) + .await + .unwrap(); + manager + .end_llm(root_llm, final_response.clone(), json!({})) + .await + .unwrap(); + manager + .apply_events( + &HeaderMap::new(), + vec![NormalizedEvent::AgentEnded(session_event( + "turn-output", + "SessionEnd", + ))], + ) + .await + .unwrap(); + + assert_eq!(*captured_output.lock().unwrap(), Some(final_response)); + deregister_subscriber(subscriber_name).unwrap(); +} + #[tokio::test] async fn new_subagent_claims_first_unhinted_llm_when_siblings_active() { let manager = SessionManager::new(session_test_config()); @@ -338,10 +512,11 @@ async fn codex_subagent_session_start_uses_transcript_parent_thread() { let sessions = manager.inner.lock().await; assert!(sessions.get("child-thread").is_none()); let parent = sessions.get("parent-thread").unwrap(); - let agent_uuid = parent.agent_scope.as_ref().unwrap().uuid; + assert!(parent.agent_scope.is_none()); + let turn_uuid = active_turn_uuid(parent); assert_eq!( parent.subagents.get("child-thread").unwrap().parent_uuid, - Some(agent_uuid) + Some(turn_uuid) ); drop(sessions); @@ -1020,7 +1195,11 @@ async fn writes_atif_on_session_end_from_plugin_config() { assert!( atif["extra"]["observed_events"] .as_array() - .is_some_and(|events| events.len() >= 3) + .is_some_and(|events| events.len() >= 2) + ); + assert_eq!( + atif["extra"]["observed_events"][0]["name"], + json!("codex-turn") ); } @@ -1076,10 +1255,10 @@ async fn duplicate_agent_end_does_not_overwrite_atif_with_empty_session() { .unwrap(); let first = read_atif_for_session(&atif_dir, "dup-end"); - let first_steps = first["steps"].as_array().unwrap().len(); + let first_events = first["extra"]["observed_events"].as_array().unwrap().len(); assert!( - first_steps > 0, - "first AgentEnded should produce a non-empty ATIF" + first_events > 0, + "first AgentEnded should produce observed ATIF events" ); // Second AgentEnded for the same session — must be a no-op, not overwrite with empty. @@ -1099,10 +1278,10 @@ async fn duplicate_agent_end_does_not_overwrite_atif_with_empty_session() { clear_plugin_configuration().unwrap(); let second = read_atif_for_session(&atif_dir, "dup-end"); - let second_steps = second["steps"].as_array().unwrap().len(); + let second_events = second["extra"]["observed_events"].as_array().unwrap().len(); assert_eq!( - first_steps, second_steps, - "duplicate AgentEnded must not change the ATIF step count" + first_events, second_events, + "duplicate AgentEnded must not change the ATIF event count" ); } @@ -1317,7 +1496,7 @@ async fn hermes_orphan_subagent_stop_exports_readable_mark_with_lineage() { ); assert_eq!( steps[0]["extra"]["ancestry"]["parent_name"], - json!("hermes") + json!("hermes-turn") ); } @@ -1932,15 +2111,9 @@ async fn ambiguous_llm_hints_fall_back_to_agent_scope() { .await .unwrap(); - let agent_uuid = { + let turn_uuid = { let sessions = manager.inner.lock().await; - sessions - .get("ambiguous-session") - .unwrap() - .agent_scope - .as_ref() - .unwrap() - .uuid + active_turn_uuid(sessions.get("ambiguous-session").unwrap()) }; let active = manager .start_llm( @@ -1964,7 +2137,7 @@ async fn ambiguous_llm_hints_fall_back_to_agent_scope() { .await .unwrap(); - assert_eq!(active.handle.parent_uuid, Some(agent_uuid)); + assert_eq!(active.handle.parent_uuid, Some(turn_uuid)); assert_eq!( active.handle.metadata.as_ref().unwrap()["llm_correlation_status"], json!("ambiguous_fallback") @@ -2475,12 +2648,12 @@ async fn claude_agent_tool_completion_closes_subagents_before_final_llm() { .await .unwrap(); - let agent_uuid = { + let turn_uuid = { let sessions = manager.inner.lock().await; let session = sessions.get("agent-tool-finish").unwrap(); assert!(session.subagents.is_empty()); assert!(session.subagent_stacks.is_empty()); - session.agent_scope.as_ref().unwrap().uuid + active_turn_uuid(session) }; let final_llm = manager .start_llm( @@ -2493,7 +2666,7 @@ async fn claude_agent_tool_completion_closes_subagents_before_final_llm() { .await .unwrap(); - assert_eq!(final_llm.handle.parent_uuid, Some(agent_uuid)); + assert_eq!(final_llm.handle.parent_uuid, Some(turn_uuid)); assert_eq!( final_llm.handle.metadata.as_ref().unwrap()["llm_correlation_status"], json!("agent_fallback") @@ -2504,6 +2677,87 @@ async fn claude_agent_tool_completion_closes_subagents_before_final_llm() { .unwrap(); } +#[tokio::test] +async fn claude_agent_tool_async_launch_keeps_subagent_open_for_later_hooks() { + let manager = SessionManager::new(session_test_config()); + manager + .apply_events( + &HeaderMap::new(), + vec![ + NormalizedEvent::AgentStarted(session_event("agent-tool-async", "SessionStart")), + NormalizedEvent::SubagentStarted(SubagentEvent { + session_id: "agent-tool-async".into(), + agent_kind: AgentKind::ClaudeCode, + event_name: "SubagentStart".into(), + subagent_id: "worker".into(), + payload: json!({}), + metadata: json!({}), + }), + NormalizedEvent::ToolEnded(ToolEvent { + session_id: "agent-tool-async".into(), + agent_kind: AgentKind::ClaudeCode, + event_name: "PostToolUse".into(), + tool_call_id: "agent-tool".into(), + tool_name: "Agent".into(), + subagent_id: None, + arguments: Value::Null, + result: json!({ + "agentId": "worker", + "status": "async_launched" + }), + status: Some("success".into()), + payload: json!({}), + metadata: json!({}), + }), + ], + ) + .await + .unwrap(); + + let worker_uuid = { + let sessions = manager.inner.lock().await; + let session = sessions.get("agent-tool-async").unwrap(); + session.subagents.get("worker").unwrap().uuid + }; + + manager + .apply_events( + &HeaderMap::new(), + vec![NormalizedEvent::ToolStarted(ToolEvent { + session_id: "agent-tool-async".into(), + agent_kind: AgentKind::ClaudeCode, + event_name: "PreToolUse".into(), + tool_call_id: "worker-tool".into(), + tool_name: "Read".into(), + subagent_id: Some("worker".into()), + arguments: json!({ "file_path": "README.md" }), + result: Value::Null, + status: None, + payload: json!({}), + metadata: json!({}), + })], + ) + .await + .unwrap(); + + let sessions = manager.inner.lock().await; + let tool = sessions + .get("agent-tool-async") + .unwrap() + .tools + .get("worker-tool") + .unwrap(); + assert_eq!(tool.parent_uuid, Some(worker_uuid)); + assert_eq!( + tool.metadata.as_ref().unwrap()["tool_correlation_status"], + json!("explicit") + ); + assert_eq!( + tool.metadata.as_ref().unwrap()["tool_correlation_subagent_id"], + json!("worker") + ); +} + #[tokio::test] async fn agent_end_closes_active_tools_and_duplicate_starts_are_ignored() { let manager = SessionManager::new(session_test_config()); @@ -2604,6 +2858,137 @@ async fn gateway_shutdown_closes_codex_sessions_without_session_end_hook() { assert!(manager.inner.lock().await.is_empty()); } +#[tokio::test] +async fn idle_timeout_closes_codex_session_without_session_end_hook() { + let subscriber_name = "cli-idle-timeout-close-reason-test"; + let _ = deregister_subscriber(subscriber_name); + let close_statuses = Arc::new(StdMutex::new(Vec::<(String, String)>::new())); + let captured = close_statuses.clone(); + register_subscriber( + subscriber_name, + Arc::new(move |event| { + if event.scope_category() == Some(ScopeCategory::End) + && event + .metadata() + .and_then(|metadata| metadata.get("session_id")) + .and_then(Value::as_str) + == Some("codex-idle") + { + let status = event + .output() + .and_then(|output| output.get("status")) + .and_then(Value::as_str) + .unwrap_or_default() + .to_string(); + captured + .lock() + .unwrap() + .push((event.name().to_string(), status)); + } + }), + ) + .unwrap(); + + let manager = SessionManager::new(session_test_config()); + manager + .apply_events( + &HeaderMap::new(), + vec![ + NormalizedEvent::AgentStarted(SessionEvent { + session_id: "codex-idle".into(), + agent_kind: AgentKind::Codex, + event_name: "SessionStart".into(), + payload: json!({}), + metadata: json!({}), + }), + NormalizedEvent::SubagentStarted(SubagentEvent { + session_id: "codex-idle".into(), + agent_kind: AgentKind::Codex, + event_name: "SubagentStart".into(), + subagent_id: "worker".into(), + payload: json!({}), + metadata: json!({ "session_id": "codex-idle" }), + }), + ], + ) + .await + .unwrap(); + + let closed = manager + .close_idle_sessions_at( + Instant::now() + AGENT_IDLE_TIMEOUT + Duration::from_secs(1), + AGENT_IDLE_TIMEOUT, + "idle_timeout", + ) + .await + .unwrap(); + + assert_eq!(closed, 1); + { + let sessions = manager.inner.lock().await; + let session = sessions.get("codex-idle").unwrap(); + assert!(session.turn_scope.is_none()); + assert!(session.subagents.is_empty()); + } + + let statuses = close_statuses.lock().unwrap().clone(); + assert!( + statuses.contains(&("subagent:worker".to_string(), "idle_timeout".to_string())), + "expected idle timeout to close the child scope, got {statuses:?}" + ); + assert!( + statuses.contains(&("codex-turn".to_string(), "idle_timeout".to_string())), + "expected idle timeout to close the turn scope, got {statuses:?}" + ); + + deregister_subscriber(subscriber_name).unwrap(); +} + +#[tokio::test] +async fn idle_timeout_waits_for_active_gateway_llm_call() { + let manager = SessionManager::new(session_test_config()); + let prep = manager + .prepare_gateway_call( + &HeaderMap::new(), + LlmGatewayStart { + session_id: Some("active-gateway-call".into()), + ..llm_start() + }, + ) + .await + .unwrap(); + + let closed = manager + .close_idle_sessions_at( + Instant::now() + AGENT_IDLE_TIMEOUT + Duration::from_secs(1), + AGENT_IDLE_TIMEOUT, + "idle_timeout", + ) + .await + .unwrap(); + assert_eq!(closed, 0); + assert!( + manager + .inner + .lock() + .await + .contains_key("active-gateway-call") + ); + + manager.finish_gateway_call(&prep.session_id).await; + let closed = manager + .close_idle_sessions_at( + Instant::now() + AGENT_IDLE_TIMEOUT + Duration::from_secs(1), + AGENT_IDLE_TIMEOUT, + "idle_timeout", + ) + .await + .unwrap(); + + assert_eq!(closed, 1); + assert!(manager.inner.lock().await.is_empty()); +} + #[tokio::test] async fn gateway_shutdown_attempts_remaining_sessions_after_close_error() { let subscriber_name = "cli-close-all-deferred-error-test"; @@ -2627,7 +3012,7 @@ async fn gateway_shutdown_attempts_remaining_sessions_after_close_error() { .unwrap(); let config = SessionConfig::default(); - let mut bad = Session::new("bad-shutdown".into(), AgentKind::Codex, config.clone()); + let mut bad = Session::new("bad-shutdown".into(), AgentKind::ClaudeCode, config.clone()); bad.agent_scope = Some( ScopeHandle::builder() .name("missing-agent-scope") @@ -2635,11 +3020,12 @@ async fn gateway_shutdown_attempts_remaining_sessions_after_close_error() { .build(), ); - let mut good = Session::new("good-shutdown".into(), AgentKind::Codex, config); + let mut good = Session::new("good-shutdown".into(), AgentKind::ClaudeCode, config); let stack = good.scope_stack.clone(); TASK_SCOPE_STACK .scope(stack, async { - good.ensure_agent_started(json!({})).unwrap(); + good.open_turn(json!({}), json!({ "prompt": "close me" }), "test") + .unwrap(); }) .await; @@ -2861,6 +3247,130 @@ async fn llm_response_tool_hint_claims_next_tool_hook() { ); } +#[tokio::test] +async fn single_tool_hint_does_not_claim_same_name_with_different_call_and_args() { + for (agent_kind, label, tool_name, expected_args, actual_args) in [ + ( + AgentKind::ClaudeCode, + "claude", + "Read", + json!({ "file_path": "README.md" }), + json!({ "file_path": "Cargo.toml" }), + ), + ( + AgentKind::Codex, + "codex", + "exec_command", + json!({ "cmd": "pwd" }), + json!({ "cmd": "ls" }), + ), + ( + AgentKind::Hermes, + "hermes", + "shell", + json!({ "command": "pwd" }), + json!({ "command": "ls" }), + ), + ] { + let manager = SessionManager::new(session_test_config()); + let session_id = format!("weak-tool-hint-{label}"); + manager + .apply_events( + &HeaderMap::new(), + vec![ + NormalizedEvent::AgentStarted(SessionEvent { + session_id: session_id.clone(), + agent_kind, + event_name: "SessionStart".into(), + payload: json!({}), + metadata: json!({}), + }), + NormalizedEvent::SubagentStarted(SubagentEvent { + session_id: session_id.clone(), + agent_kind, + event_name: "SubagentStart".into(), + subagent_id: "worker".into(), + payload: json!({}), + metadata: json!({}), + }), + ], + ) + .await + .unwrap(); + + let turn_uuid = { + let sessions = manager.inner.lock().await; + active_turn_uuid(sessions.get(&session_id).unwrap()) + }; + let active = manager + .start_llm( + &HeaderMap::new(), + LlmGatewayStart { + session_id: Some(session_id.clone()), + subagent_id: Some("worker".into()), + ..llm_start() + }, + ) + .await + .unwrap(); + manager + .end_llm( + active, + json!({ + "output": [ + { + "type": "function_call", + "call_id": "expected-call", + "name": tool_name, + "arguments": serde_json::to_string(&expected_args).unwrap() + } + ] + }), + json!({}), + ) + .await + .unwrap(); + + manager + .apply_events( + &HeaderMap::new(), + vec![NormalizedEvent::ToolStarted(ToolEvent { + session_id: session_id.clone(), + agent_kind, + event_name: "PreToolUse".into(), + tool_call_id: "actual-call".into(), + tool_name: tool_name.into(), + subagent_id: None, + arguments: actual_args, + result: Value::Null, + status: None, + payload: json!({}), + metadata: json!({}), + })], + ) + .await + .unwrap(); + + let sessions = manager.inner.lock().await; + let handle = sessions + .get(&session_id) + .unwrap() + .tools + .get("actual-call") + .unwrap(); + assert_eq!(handle.parent_uuid, Some(turn_uuid), "case {label}"); + assert_eq!( + handle.metadata.as_ref().unwrap()["tool_correlation_status"], + json!("ambiguous_fallback"), + "case {label}" + ); + assert!( + handle.metadata.as_ref().unwrap()["tool_correlation_subagent_id"].is_null(), + "case {label}" + ); + } +} + #[test] fn openai_response_tool_hints_ignore_non_tool_output_items() { let mut hints = Vec::new(); @@ -2890,6 +3400,34 @@ fn openai_response_tool_hints_ignore_non_tool_output_items() { assert_eq!(hints[0].tool_call_id.as_deref(), Some("call-1")); } +#[test] +fn provider_tool_hints_require_call_id_or_name_with_arguments() { + let mut hints = Vec::new(); + + collect_openai_response_tool_hints( + &json!({ + "output": [ + { + "type": "function_call", + "name": "Read" + }, + { + "type": "function_call", + "name": "Read", + "arguments": "{\"file_path\":\"README.md\"}" + } + ] + }), + Some("worker"), + &mut hints, + ); + + assert_eq!(hints.len(), 1); + assert_eq!(hints[0].tool_call_id.as_deref(), None); + assert_eq!(hints[0].tool_name.as_deref(), Some("Read")); + assert_eq!(hints[0].arguments, json!({ "file_path": "README.md" })); +} + #[tokio::test] async fn multiple_tool_hints_resolve_by_tool_call_id() { let manager = SessionManager::new(session_test_config()); @@ -3005,15 +3543,9 @@ async fn hint_for_missing_subagent_falls_back_to_agent_scope() { .await .unwrap(); - let agent_uuid = { + let turn_uuid = { let sessions = manager.inner.lock().await; - sessions - .get("missing-hint-owner") - .unwrap() - .agent_scope - .as_ref() - .unwrap() - .uuid + active_turn_uuid(sessions.get("missing-hint-owner").unwrap()) }; let active = manager .start_llm( @@ -3026,7 +3558,7 @@ async fn hint_for_missing_subagent_falls_back_to_agent_scope() { .await .unwrap(); - assert_eq!(active.handle.parent_uuid, Some(agent_uuid)); + assert_eq!(active.handle.parent_uuid, Some(turn_uuid)); assert_eq!( active.handle.metadata.as_ref().unwrap()["llm_correlation_status"], json!("single_hint") @@ -3112,7 +3644,7 @@ fn session_test_config() -> GatewayConfig { } #[tokio::test] -async fn turn_ended_is_noop_for_session_with_no_agent_scope() { +async fn turn_ended_is_noop_without_active_turn_scope() { let temp = tempfile::tempdir().unwrap(); let config = GatewayConfig { bind: "127.0.0.1:0".parse().unwrap(), diff --git a/crates/core/src/observability/plugin_component.rs b/crates/core/src/observability/plugin_component.rs index a1ba7274..4322948c 100644 --- a/crates/core/src/observability/plugin_component.rs +++ b/crates/core/src/observability/plugin_component.rs @@ -12,8 +12,9 @@ //! so configuration remains portable across bindings. Agent Trajectory //! Observability Format (ATOF), OpenTelemetry, and OpenInference each register //! one global subscriber when enabled. Agent Trajectory Interchange Format -//! (ATIF) uses a global dispatcher that detects direct child agent scopes and -//! creates one scope-local exporter for each top-level agent run. +//! (ATIF) uses a global dispatcher that detects top-level agent scopes and +//! creates one scope-local exporter for each trajectory run. Coding-agent turns +//! that need bounded traces are represented as agent scopes with role metadata. use std::collections::HashMap; use std::future::Future; @@ -170,12 +171,12 @@ impl Default for AtofSectionConfig { } } -/// Per-agent ATIF trajectory exporter config. +/// Per-trajectory ATIF exporter config. /// /// When enabled, this section creates a dispatcher that opens a separate -/// [`crate::observability::atif::AtifExporter`] for each top-level agent scope. The `{session_id}` -/// placeholder in [`AtifSectionConfig::filename_template`] is required so -/// concurrent sibling agents cannot overwrite each other's trajectory files. +/// [`crate::observability::atif::AtifExporter`] for each top-level agent or turn scope. The +/// `{session_id}` placeholder in [`AtifSectionConfig::filename_template`] is required so +/// concurrent sibling trajectories cannot overwrite each other's files. #[derive(Debug, Clone, Serialize, Deserialize)] #[cfg_attr(feature = "schema", derive(schemars::JsonSchema))] pub struct AtifSectionConfig { @@ -200,7 +201,7 @@ pub struct AtifSectionConfig { /// Directory containing trajectory JSON files. #[serde(default, skip_serializing_if = "Option::is_none")] pub output_directory: Option, - /// Filename template. `{session_id}` is replaced with the top-level agent scope UUID. + /// Filename template. `{session_id}` is replaced with the top-level trajectory scope UUID. #[serde(default = "default_atif_filename_template")] pub filename_template: String, } @@ -642,11 +643,11 @@ impl AtifDispatcher { } fn observe_global(&mut self, event: &Event, subscriber_prefix: &str, state: Arc>) { - if self.last_error.is_some() || !is_top_level_agent_start(event) { + if self.last_error.is_some() || !is_top_level_trajectory_start(event) { return; } - // The top-level agent scope UUID is the ATIF session ID. The global + // The top-level trajectory scope UUID is the ATIF session ID. The global // dispatcher records the start event itself because the scope-local // subscriber is attached after that start event has already been // emitted. @@ -667,8 +668,8 @@ impl AtifDispatcher { let agent_uuid = event.uuid(); let name = format!("{subscriber_prefix}{agent_uuid}"); let callback = atif_scope_subscriber(state, agent_uuid); - // Attach the per-agent subscriber to the agent scope rather than the - // global registry so sibling top-level agents never share events. + // Attach the scoped subscriber to the trajectory root rather than the + // global registry so sibling top-level trajectories never share events. if let Err(err) = scope_register_subscriber(&agent_uuid, &name, callback) { self.last_error = Some(format!("failed to register ATIF scope subscriber: {err}")); } else { @@ -858,10 +859,11 @@ fn write_atif_file(write: &PendingAtifWrite) -> std::io::Result<()> { Ok(()) } -fn is_top_level_agent_start(event: &Event) -> bool { - if event.scope_category() != Some(ScopeCategory::Start) - || event.scope_type() != Some(ScopeType::Agent) - { +fn is_top_level_trajectory_start(event: &Event) -> bool { + if event.scope_category() != Some(ScopeCategory::Start) { + return false; + } + if event.scope_type() != Some(ScopeType::Agent) { return false; } let Some(parent_uuid) = event.parent_uuid() else { From 8060db58f68bc97b555c925b5382e6abb3874162 Mon Sep 17 00:00:00 2001 From: Will Killian Date: Tue, 19 May 2026 12:49:04 -0400 Subject: [PATCH 5/5] Address coderabbit feedback Signed-off-by: Will Killian --- crates/cli/src/alignment/codex.rs | 34 ++++++++++++------- crates/cli/src/alignment/mod.rs | 21 ++++++++---- crates/cli/src/session.rs | 6 ++-- .../tests/coverage/alignment_codex_tests.rs | 27 ++++++++------- crates/cli/tests/coverage/alignment_tests.rs | 7 ++++ crates/cli/tests/coverage/session_tests.rs | 20 +++++++++++ 6 files changed, 80 insertions(+), 35 deletions(-) diff --git a/crates/cli/src/alignment/codex.rs b/crates/cli/src/alignment/codex.rs index e510ec88..9c6d8078 100644 --- a/crates/cli/src/alignment/codex.rs +++ b/crates/cli/src/alignment/codex.rs @@ -118,30 +118,38 @@ fn is_openai_route(route: GatewayRouteKind) -> bool { // Extracts Codex subagent thread-spawn context from a SessionStart. It looks first at the hook // payload, then hook metadata, then the first transcript line. Returning None keeps ordinary Codex // root sessions as root sessions and prevents self-parenting loops. -pub(crate) fn subagent_context(event: &SessionEvent) -> Option { +pub(crate) async fn subagent_context(event: &SessionEvent) -> Option { if event.agent_kind != AgentKind::Codex { return None; } - subagent_context_from_value(&event.payload) + let context = match subagent_context_from_value(&event.payload) .or_else(|| subagent_context_from_value(&event.metadata)) - .or_else(|| subagent_context_from_transcript(event)) - .filter(|context| context.parent_session_id != event.session_id) + { + Some(context) => Some(context), + None => subagent_context_from_transcript(event).await, + }; + context.filter(|context| context.parent_session_id != event.session_id) } // Codex sometimes supplies only a transcript path in the hook payload. The first transcript line // is a `session_meta` object and carries the thread-spawn parent id that Phoenix needs for // parentage. Reading one line keeps this cheap and avoids treating the full transcript as input. -fn subagent_context_from_transcript(event: &SessionEvent) -> Option { +async fn subagent_context_from_transcript(event: &SessionEvent) -> Option { let transcript_path = json_string_at(&event.metadata, &[&["transcript_path"][..]]) .or_else(|| json_string_at(&event.payload, &[&["transcript_path"][..]]))?; - let file = std::fs::File::open(transcript_path).ok()?; - let mut reader = BufReader::new(file); - let mut line = String::new(); - if reader.read_line(&mut line).ok()? == 0 { - return None; - } - let value = serde_json::from_str::(&line).ok()?; - subagent_context_from_value(&value) + tokio::task::spawn_blocking(move || { + let file = std::fs::File::open(transcript_path).ok()?; + let mut reader = BufReader::new(file); + let mut line = String::new(); + if reader.read_line(&mut line).ok()? == 0 { + return None; + } + let value = serde_json::from_str::(&line).ok()?; + subagent_context_from_value(&value) + }) + .await + .ok() + .flatten() } // Searches the known Codex shapes for thread-spawn data. Recent traces have placed diff --git a/crates/cli/src/alignment/mod.rs b/crates/cli/src/alignment/mod.rs index ed937d15..7eb94feb 100644 --- a/crates/cli/src/alignment/mod.rs +++ b/crates/cli/src/alignment/mod.rs @@ -19,6 +19,7 @@ use crate::model::{AgentKind, LlmEvent, NormalizedEvent, SessionEvent, SubagentE pub(crate) mod claude_code; pub(crate) mod codex; +const REQUEST_AFFINITY_KEY_MIN_CHARS: usize = 24; const REQUEST_AFFINITY_KEY_MAX_CHARS: usize = 4096; #[derive(Debug, Clone)] @@ -292,19 +293,23 @@ pub(crate) fn should_emit_session_agent_scope(agent_kind: AgentKind) -> bool { // session. Codex is the first such harness: it starts child threads with parent-thread metadata. // Future harness-specific detectors should plug in here so the session manager can stay provider // neutral. -pub(crate) fn subagent_session_context(event: &SessionEvent) -> Option { - codex::subagent_context(event).map(SubagentSessionContext::Codex) +pub(crate) async fn subagent_session_context( + event: &SessionEvent, +) -> Option { + codex::subagent_context(event) + .await + .map(SubagentSessionContext::Codex) } // Converts an AgentStarted event into a pending child-session record when a harness explicitly // reports parentage. The caller still decides whether the child session is empty enough to promote. -pub(crate) fn pending_subagent_start( +pub(crate) async fn pending_subagent_start( event: &mut NormalizedEvent, ) -> Option<(String, PendingSubagentStart)> { let NormalizedEvent::AgentStarted(session_event) = event else { return None; }; - let context = subagent_session_context(session_event)?; + let context = subagent_session_context(session_event).await?; let child_session_id = session_event.session_id.clone(); if context.parent_session_id() == child_session_id { return None; @@ -373,7 +378,7 @@ pub(crate) fn llm_owner_metadata(scope_metadata: Option<&Value>) -> Value { pub(crate) fn request_affinity_key(request: &LlmRequest) -> Option { let task_text = request_user_task_text(&request.content)?; let normalized = normalize_affinity_text(&task_text); - (normalized.len() >= 24) + (normalized.chars().count() >= REQUEST_AFFINITY_KEY_MIN_CHARS) .then(|| truncate_affinity_text(&normalized, REQUEST_AFFINITY_KEY_MAX_CHARS)) } @@ -582,8 +587,12 @@ fn affinity_candidate_text(text: &str) -> Option { } fn looks_like_json_payload(text: &str) -> bool { + let trimmed = text.trim_start(); + if !matches!(trimmed.chars().next(), Some('{' | '[')) { + return false; + } matches!( - serde_json::from_str::(text), + serde_json::from_str::(trimmed), Ok(Value::Object(_) | Value::Array(_)) ) } diff --git a/crates/cli/src/session.rs b/crates/cli/src/session.rs index bb289863..f4369df2 100644 --- a/crates/cli/src/session.rs +++ b/crates/cli/src/session.rs @@ -537,9 +537,7 @@ impl SessionManager { // thread as the LLM owner. fn apply_start_alias(start: &mut LlmGatewayStart, alias: &SessionAlias) { start.session_id = Some(alias.parent_session_id.clone()); - if start.subagent_id.is_none() { - start.subagent_id = Some(alias.subagent_id.clone()); - } + start.subagent_id = Some(alias.subagent_id.clone()); start.metadata = merge_metadata(start.metadata.clone(), alias.metadata()); } @@ -552,7 +550,7 @@ async fn queue_or_promote_child_start( alignment_state: &mut SessionAlignmentState, config: SessionConfig, ) -> Result { - let Some((child_session_id, pending)) = alignment::pending_subagent_start(event) else { + let Some((child_session_id, pending)) = alignment::pending_subagent_start(event).await else { return Ok(false); }; if sessions diff --git a/crates/cli/tests/coverage/alignment_codex_tests.rs b/crates/cli/tests/coverage/alignment_codex_tests.rs index 22858d16..a14199b2 100644 --- a/crates/cli/tests/coverage/alignment_codex_tests.rs +++ b/crates/cli/tests/coverage/alignment_codex_tests.rs @@ -138,10 +138,10 @@ fn chatgpt_oauth_strip_preserves_provider_keys_and_non_openai_routes() { ); } -#[test] -fn subagent_context_reads_payload_metadata_and_rejects_non_subagents() { +#[tokio::test] +async fn subagent_context_reads_payload_metadata_and_rejects_non_subagents() { let payload_event = codex_session_event("child", thread_spawn("parent"), json!({})); - let payload_context = subagent_context(&payload_event).unwrap(); + let payload_context = subagent_context(&payload_event).await.unwrap(); assert_eq!(payload_context.parent_session_id, "parent"); assert_eq!(payload_context.nickname.as_deref(), Some("Curie")); assert_eq!(payload_context.role.as_deref(), Some("worker")); @@ -149,20 +149,23 @@ fn subagent_context_reads_payload_metadata_and_rejects_non_subagents() { let metadata_event = codex_session_event("child", json!({}), thread_spawn("parent")); assert_eq!( - subagent_context(&metadata_event).unwrap().parent_session_id, + subagent_context(&metadata_event) + .await + .unwrap() + .parent_session_id, "parent" ); let self_parent = codex_session_event("child", thread_spawn("child"), json!({})); - assert!(subagent_context(&self_parent).is_none()); + assert!(subagent_context(&self_parent).await.is_none()); let mut non_codex = codex_session_event("child", thread_spawn("parent"), json!({})); non_codex.agent_kind = AgentKind::ClaudeCode; - assert!(subagent_context(&non_codex).is_none()); + assert!(subagent_context(&non_codex).await.is_none()); } -#[test] -fn subagent_context_reads_first_transcript_line() { +#[tokio::test] +async fn subagent_context_reads_first_transcript_line() { let temp = tempfile::tempdir().unwrap(); let transcript = temp.path().join("rollout.jsonl"); std::fs::write( @@ -189,16 +192,16 @@ fn subagent_context_reads_first_transcript_line() { .unwrap(); let event = codex_session_event("child", json!({ "transcript_path": transcript }), json!({})); - let context = subagent_context(&event).unwrap(); + let context = subagent_context(&event).await.unwrap(); assert_eq!(context.parent_session_id, "parent-from-transcript"); assert_eq!(context.nickname.as_deref(), Some("Noether")); assert_eq!(context.role.as_deref(), Some("explorer")); } -#[test] -fn subagent_metadata_start_event_and_alias_share_thread_fields() { +#[tokio::test] +async fn subagent_metadata_start_event_and_alias_share_thread_fields() { let event = codex_session_event("child", thread_spawn("parent"), json!({ "keep": true })); - let context = subagent_context(&event).unwrap(); + let context = subagent_context(&event).await.unwrap(); let metadata = augment_subagent_metadata(event.metadata.clone(), &context); assert_eq!(metadata["keep"], json!(true)); diff --git a/crates/cli/tests/coverage/alignment_tests.rs b/crates/cli/tests/coverage/alignment_tests.rs index acf8e5d6..bd1eb097 100644 --- a/crates/cli/tests/coverage/alignment_tests.rs +++ b/crates/cli/tests/coverage/alignment_tests.rs @@ -394,9 +394,16 @@ fn request_affinity_key_ignores_tool_results_and_sidecar_json() { "input": "{\"parentUuid\":\"scope\",\"childUuid\":\"scope\"}" }), }; + let sidecar_json_array = LlmRequest { + headers: Map::new(), + content: json!({ + "input": " [{\"parentUuid\":\"scope\",\"childUuid\":\"scope\"}]" + }), + }; assert_eq!(request_affinity_key(&tool_result), None); assert_eq!(request_affinity_key(&sidecar_json), None); + assert_eq!(request_affinity_key(&sidecar_json_array), None); } #[test] diff --git a/crates/cli/tests/coverage/session_tests.rs b/crates/cli/tests/coverage/session_tests.rs index 2a218472..003db5d6 100644 --- a/crates/cli/tests/coverage/session_tests.rs +++ b/crates/cli/tests/coverage/session_tests.rs @@ -260,6 +260,26 @@ async fn codex_turn_is_agent_scope_with_turn_role_metadata() { ); } +#[test] +fn apply_start_alias_overrides_conflicting_subagent_id() { + let mut start = llm_start(); + start.session_id = Some("child-session".into()); + start.subagent_id = Some("stale-subagent".into()); + start.metadata = json!({ "request": "metadata" }); + let alias = SessionAlias::new( + "parent-session".into(), + "child-session".into(), + json!({ "alias": "metadata" }), + ); + + apply_start_alias(&mut start, &alias); + + assert_eq!(start.session_id.as_deref(), Some("parent-session")); + assert_eq!(start.subagent_id.as_deref(), Some("child-session")); + assert_eq!(start.metadata["request"], json!("metadata")); + assert_eq!(start.metadata["alias"], json!("metadata")); +} + #[tokio::test] async fn turn_output_uses_last_root_owned_llm_response() { let subscriber_name = "cli-turn-output-root-llm-test";