From a44dde01e03533cd7145082bc8962fee68eeb3a2 Mon Sep 17 00:00:00 2001 From: npub1fgdl5qqnh3k3f2xkqrvt7cujalhm623x4s7fdjdj5yrtp5fzjl9qrjpucw <4a1bfa0013bc6d14a8d600d8bf6392efefbd2a26ac3c96c9b2a106b0d12297ca@sprout-oss.stage.blox.sqprod.co> Date: Wed, 1 Jul 2026 16:21:02 -0400 Subject: [PATCH 01/21] docs(nips): add NIP-AM draft for durable agent turn metrics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Defines kind:44200 — a regular stored event, one per completed agent turn, NIP-44 encrypted agent-to-owner and #p-gated like NIP-AO telemetry, carrying per-turn and cumulative token/cost usage so owners can account for agent token consumption across harnesses without relaying transcript content. Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- docs/nips/NIP-AM.md | 190 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 190 insertions(+) create mode 100644 docs/nips/NIP-AM.md diff --git a/docs/nips/NIP-AM.md b/docs/nips/NIP-AM.md new file mode 100644 index 000000000..8390266b1 --- /dev/null +++ b/docs/nips/NIP-AM.md @@ -0,0 +1,190 @@ +NIP-AM +====== + +Agent Turn Metrics +------------------ + +`draft` `optional` `relay` + +This NIP defines a durable, encrypted event kind for recording per-turn token +usage and estimated cost of AI agent sessions. An agent publishes one +`kind:44200` event per completed turn, NIP-44 encrypted to its owner, so the +owner can account for token usage across agents and harnesses without the +relay — or any third party — learning what the agent did or what it cost. + +## Motivation + +AI agent harnesses consume model tokens on every turn. Owners running fleets +of agents need durable, harness-independent usage accounting — the equivalent +of a metered bill — for cost attribution, budgeting, and capacity planning. + +[NIP-AO](NIP-AO.md) (kind 24200) already streams encrypted session telemetry +between agent and owner, but it is deliberately ephemeral: relays MUST NOT +persist it, so it cannot answer "how many tokens did my agents use last +week?". Transcript-grade durable telemetry is explicitly out of scope — the +persistence-averse reasoning behind NIP-AO's ephemerality contract applies to +conversation content, not to a small usage record. Kind 44200 stores only the +metric: token counts, an estimated cost, and correlation identifiers, all +encrypted to the owner. + +## Definitions + +- **Agent**: an AI process with its own Nostr keypair, executing sessions on + behalf of an owner. +- **Owner**: the human (or system) whose pubkey the agent was provisioned under. +- **Turn**: one prompt→response cycle of an agent session, as bounded by the + harness (e.g. one ACP `session/prompt` round trip). +- **Turn metric**: a single kind 44200 event recording the usage of one turn. + +## Event + +`kind:44200` is a regular event as defined in [NIP-01](01.md): stored, +append-only, never replaced. Each completed turn produces exactly one event. + +```json +{ + "kind": 44200, + "pubkey": "", + "created_at": , + "content": "", + "tags": [ + ["p", ""], + ["agent", ""] + ], + "sig": "..." +} +``` + +Events MUST have exactly one `p` tag (the owner) and exactly one `agent` tag +(equal to `pubkey`). The tag layout deliberately mirrors NIP-AO telemetry +frames so existing owner-scoped tooling applies unchanged. + +No channel (`h`) tag is used. The channel a turn served is private usage +metadata and lives inside the encrypted payload; keeping it out of the tags +avoids leaking per-channel activity rates to the relay operator and keeps the +event community-global (owner-scoped) rather than channel-scoped. + +## Encryption + +`content` MUST be encrypted with NIP-44 v2 using `(agent_privkey, +owner_pubkey)` — identical to NIP-AO telemetry. Plaintext SHOULD be zeroized +after encrypt/decrypt. Decrypted payload MUST NOT exceed 65,535 bytes +(payloads are typically well under 1 KB). + +## Decrypted Payload + +The `content` field decrypts to a UTF-8 JSON object: + +```jsonc +{ + "harness": "goose", // REQUIRED: harness identifier + "model": "claude-sonnet-4-5", // model id, or null if unknown + "channelId": "" | null, + "sessionId": "" | null, + "turnId": "" | null, + "timestamp": "2026-07-01T20:11:03.213Z", // REQUIRED: RFC 3339, end of turn + + // Usage for THIS turn (computed delta). Fields are null when the harness + // does not report them — a null MUST NOT be recorded or summed as zero. + "turn": { + "inputTokens": 1234 | null, + "outputTokens": 567 | null, + "totalTokens": 1801 | null, + "costUsd": 0.0123 | null // estimated + }, + + // Session-cumulative usage as reported at the end of this turn. + "cumulative": { + "inputTokens": 45210 | null, + "outputTokens": 9876 | null, + "totalTokens": 55086 | null, + "costUsd": 0.41 | null // estimated + }, + + // false when the publisher could not observe the previous turn's + // cumulative baseline (e.g. harness restart mid-session), making the + // "turn" object unreliable for this event. + "deltaReliable": true, + + "stopReason": "end_turn" // optional +} +``` + +`harness` and `timestamp` are REQUIRED. All other fields are OPTIONAL or +nullable. Consumers MUST ignore unknown fields (forward compatibility). + +Where the harness reports only cumulative counters, the publisher computes +`turn` as the difference between consecutive cumulative snapshots within one +session. Publishers MUST set `deltaReliable: false` when the baseline is +unknown; consumers doing exact accounting SHOULD prefer recomputing deltas +from consecutive `cumulative` values and treat `turn` as a convenience. + +`costUsd` values are estimates (provider list prices at publish time, or a +harness-reported estimate). They are advisory, not billing records. + +`stopReason`, when present, MUST be one of `end_turn`, `max_tokens`, +`cancelled`, `error`, `unknown`. Consumers MUST treat unrecognized +`stopReason` values as `unknown`; the token counts remain valid. + +## Publisher Behavior + +- Publish exactly one event per completed turn, at turn completion, including + turns that end in cancellation or error when usage was observed. +- Do NOT publish an event for a turn with no observed usage (all counters + unknown); an all-null metric carries no information. +- `created_at` SHOULD equal the payload `timestamp` truncated to seconds. + +## Relay Behavior + +On receiving a kind 44200 event, a relay MUST: + +1. Validate the event signature per NIP-01. +2. Verify `event.pubkey` equals the `agent` tag and that + `is_agent_owner(agent, owner)` holds for the `p` tag via authenticated + ownership lookup. Tag matching alone is insufficient. +3. Store the event durably, scoped to the owner (community-global; no channel + scope). +4. NOT index the event in any full-text search (the ciphertext is not + searchable and must not enter search indexes). + +Reads MUST be gated: only an authenticated ([NIP-42](42.md)) reader whose +pubkey equals the `#p` tag value may receive the event. Unauthorized publish +or subscribe attempts MUST be rejected with `AUTH required`. + +Relays SHOULD rate-limit kind 44200 to a rate consistent with real turn +frequency (RECOMMENDED: 60 events/minute per agent pubkey). + +## Client Behavior + +Owners recover usage history with: + +```json +{"kinds": [44200], "#p": [""], "since": } +``` + +On receiving an event, a client MUST verify the signature, decrypt with its +own secret key and `event.pubkey`, and ignore events that fail to decrypt or +parse. Clients SHOULD deduplicate by event id and sort by `created_at`. + +## Relationship to Other NIPs + +- [NIP-AO](NIP-AO.md): same agent↔owner encryption and tag scoping, but + ephemeral and transcript-grade. NIP-AM events MUST NOT carry conversation + content, tool calls, or protocol frames — usage numbers and identifiers only. +- [NIP-09](09.md): the authoring agent (or its owner via relay policy) may + request deletion; relays apply standard deletion semantics. +- [NIP-40](40.md): publishers MAY set `expiration` to bound retention. + +## Security Considerations + +**Metadata leakage.** `p`, `agent`, and `created_at` are cleartext: a relay +operator learns that agent X completed turns for owner Y at some rate. Turn +rate is already observable from the agent's channel messages; the token +counts, cost, model, and channel remain encrypted. + +**No forward secrecy.** NIP-44 does not provide forward secrecy; compromise +of the agent's private key allows decryption of captured ciphertexts. + +**Integrity of accounting.** Metrics are self-reported by the agent process. +A compromised agent can under- or over-report. Owners requiring stronger +guarantees must reconcile against provider-side billing. From 19889ba0c0c2f91172435528c38afa26a78bb31f Mon Sep 17 00:00:00 2001 From: npub1fgdl5qqnh3k3f2xkqrvt7cujalhm623x4s7fdjdj5yrtp5fzjl9qrjpucw <4a1bfa0013bc6d14a8d600d8bf6392efefbd2a26ac3c96c9b2a106b0d12297ca@sprout-oss.stage.blox.sqprod.co> Date: Wed, 1 Jul 2026 16:31:10 -0400 Subject: [PATCH 02/21] docs(nips): harden NIP-AM read gate and delta ordering semantics Address design-review findings: forbid the id-lookup exemption from the 44200 owner gate (a non-owner learning an event id must not retrieve the envelope), require sessionId + a monotonic turnSeq whenever cumulative usage is present so consumers can deterministically recompute deltas (created_at is seconds-precision and unordered within a second), define counter-reset behavior as null-not-negative, and add normative numeric validity and cache-token-folding rules. Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- docs/nips/NIP-AM.md | 58 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/docs/nips/NIP-AM.md b/docs/nips/NIP-AM.md index 8390266b1..bfb57e213 100644 --- a/docs/nips/NIP-AM.md +++ b/docs/nips/NIP-AM.md @@ -80,8 +80,9 @@ The `content` field decrypts to a UTF-8 JSON object: "harness": "goose", // REQUIRED: harness identifier "model": "claude-sonnet-4-5", // model id, or null if unknown "channelId": "" | null, - "sessionId": "" | null, + "sessionId": "" | null, // REQUIRED when "cumulative" is present "turnId": "" | null, + "turnSeq": 17 | null, // REQUIRED when "cumulative" is present "timestamp": "2026-07-01T20:11:03.213Z", // REQUIRED: RFC 3339, end of turn // Usage for THIS turn (computed delta). Fields are null when the harness @@ -111,14 +112,49 @@ The `content` field decrypts to a UTF-8 JSON object: ``` `harness` and `timestamp` are REQUIRED. All other fields are OPTIONAL or -nullable. Consumers MUST ignore unknown fields (forward compatibility). +nullable, except as constrained below. Consumers MUST ignore unknown fields +(forward compatibility). + +### Ordering and delta recomputation + +When a `cumulative` object is present, `sessionId` and `turnSeq` are +REQUIRED. `turnSeq` is a per-session monotonically increasing integer +starting at any value, incremented by the publisher on every published turn +metric for that session; a publisher restart that loses the counter MUST +start a new `sessionId` rather than reuse the old one with a reset `turnSeq`. +Cumulative values form a series only *within* one `sessionId`, ordered by +`turnSeq` — consumers MUST NOT diff cumulative values across different +`sessionId`s, and MUST NOT rely on `created_at` (seconds precision, ambiguous +for same-second turns) for ordering within a session. + +If a consumer recomputing deltas observes a cumulative counter that decreases +between consecutive `turnSeq` values (counter reset, harness bug), it MUST +treat the affected turn's usage as unknown (null), not as negative usage. +Publishers likewise MUST NOT emit negative values in `turn`; when the +computed delta would be negative or the previous baseline is unknown, the +publisher sets the affected `turn` counters to null and `deltaReliable: +false`. Where the harness reports only cumulative counters, the publisher computes `turn` as the difference between consecutive cumulative snapshots within one -session. Publishers MUST set `deltaReliable: false` when the baseline is -unknown; consumers doing exact accounting SHOULD prefer recomputing deltas +session. Consumers doing exact accounting SHOULD prefer recomputing deltas from consecutive `cumulative` values and treat `turn` as a convenience. +### Numeric validity and token semantics + +All token counts MUST be non-negative integers. `costUsd` MUST be a finite, +non-negative number. `totalTokens` is the harness- or provider-reported +total when available; publishers MUST NOT derive it by summing `inputTokens` +and `outputTokens` (providers may count categories a simple sum misses) — +when no total is reported, `totalTokens` is null. `inputTokens` is the +inclusive input-side total: where the provider reports cache reads/writes +separately (e.g. Anthropic `cache_read_input_tokens` / +`cache_creation_input_tokens`), the publisher folds them into `inputTokens`. +Publishers MAY additionally report the cache components in optional +`cacheReadTokens` / `cacheWriteTokens` fields inside `turn` and `cumulative`; +when present these are informational subsets of `inputTokens`, not additions +to it. + `costUsd` values are estimates (provider list prices at publish time, or a harness-reported estimate). They are advisory, not billing records. @@ -148,8 +184,13 @@ On receiving a kind 44200 event, a relay MUST: searchable and must not enter search indexes). Reads MUST be gated: only an authenticated ([NIP-42](42.md)) reader whose -pubkey equals the `#p` tag value may receive the event. Unauthorized publish -or subscribe attempts MUST be rejected with `AUTH required`. +pubkey equals the `#p` tag value may receive the event. This gate applies to +**every** read path, including explicit `ids` filters — knowing an event id +MUST NOT grant access. (Some p-gated kinds exempt id-addressed lookups on the +theory that knowing the id implies authorization; kind 44200 events are +long-lived and their cleartext envelope leaks turn activity, so no such +exemption is permitted.) Unauthorized publish or subscribe attempts MUST be +rejected with `AUTH required`. Relays SHOULD rate-limit kind 44200 to a rate consistent with real turn frequency (RECOMMENDED: 60 events/minute per agent pubkey). @@ -164,7 +205,10 @@ Owners recover usage history with: On receiving an event, a client MUST verify the signature, decrypt with its own secret key and `event.pubkey`, and ignore events that fail to decrypt or -parse. Clients SHOULD deduplicate by event id and sort by `created_at`. +parse. Clients SHOULD deduplicate by event id. For within-session ordering, +clients MUST use `(sessionId, turnSeq)` from the decrypted payload as +described above; `created_at` is suitable only for coarse time-window +queries. ## Relationship to Other NIPs From b7480e875cadbe295a03f9a4532d0bd155fad49a Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Wed, 1 Jul 2026 16:51:57 -0400 Subject: [PATCH 03/21] feat(core/relay): add NIP-AM kind 44200 (agent turn metrics) with relay plumbing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add kind:44200 (KIND_AGENT_TURN_METRIC) as the durable, p-gated, owner-encrypted per-turn token-usage event defined in NIP-AM (docs/nips/NIP-AM.md, PR #1441). Changes: - buzz-core/kind.rs: add KIND_AGENT_TURN_METRIC = 44200 to P_GATED_KINDS and ALL_KINDS; compile-time asserts confirm regular (non-ephemeral, non-replaceable) kind shape - buzz-core/agent_turn_metric.rs (new): AgentTurnMetricPayload type matching the NIP schema (harness+timestamp required; nullable token fields; turn/cumulative objects; sessionId+turnSeq required when cumulative present; deltaReliable; stopReason enum); encrypt_agent_turn_metric/decrypt_agent_turn_metric helpers reusing encrypt_observer_payload/decrypt_observer_payload from observer.rs; round-trip, wrong-key, null-field, and stop-reason tests - buzz-relay/handlers/req.rs: extend p_gated_filters_authorized to deny the ids-filter exemption for kind:44200 (same carve-out shape as KIND_DM_VISIBILITY); new test covering both {kinds:[44200], ids:[...]} deny and the kindless-ids pass-through path (with documented defense-in-depth note) - buzz-relay/handlers/ingest.rs: validate_agent_turn_metric_envelope (p tag, agent tag == event.pubkey, no h tag, NIP-44 content); async is_agent_owner ownership check; required_scope_for_kind → MessagesWrite; is_global_only_kind addition; envelope and ownership tests - migrations/0001_initial_schema.sql + schema/schema.sql: add 44200 to the NULL search_tsv CASE so the p_gated_persistent_kinds_have_storage_null_tsvector drift test passes No emit logic, no adapters — Task B (goose adapter, buzz-acp) is a separate PR. Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-core/src/agent_turn_metric.rs | 264 ++++++++++++++++++++++ crates/buzz-core/src/kind.rs | 19 ++ crates/buzz-core/src/lib.rs | 2 + crates/buzz-relay/src/handlers/ingest.rs | 244 +++++++++++++++++++- crates/buzz-relay/src/handlers/req.rs | 82 ++++++- migrations/0001_initial_schema.sql | 6 +- schema/schema.sql | 2 +- 7 files changed, 608 insertions(+), 11 deletions(-) create mode 100644 crates/buzz-core/src/agent_turn_metric.rs diff --git a/crates/buzz-core/src/agent_turn_metric.rs b/crates/buzz-core/src/agent_turn_metric.rs new file mode 100644 index 000000000..325a94f9d --- /dev/null +++ b/crates/buzz-core/src/agent_turn_metric.rs @@ -0,0 +1,264 @@ +//! NIP-AM: Agent Turn Metric — payload type and encrypt/decrypt helpers. +//! +//! One `kind:44200` event is published per completed agent turn. Its content +//! is a NIP-44 v2 ciphertext (agent key → owner pubkey) that decodes to an +//! [`AgentTurnMetricPayload`] JSON object. +//! +//! See `docs/nips/NIP-AM.md` for the full specification. + +use nostr::{Event, Keys, PublicKey}; +use serde::{Deserialize, Serialize}; + +use crate::observer::{ + decrypt_observer_payload, encrypt_observer_payload, ObserverPayloadError, +}; + +// Re-export for callers that only need the error type. +pub use crate::observer::ObserverPayloadError as AgentTurnMetricError; + +/// Token-usage counters for a single measurement window (one turn or cumulative). +/// +/// All token fields are nullable — `None` means the harness did not report them, +/// NOT that the count was zero. See NIP-AM §Numeric validity and token semantics. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct TokenCounts { + /// Input tokens (inclusive of cache reads/writes where applicable). + pub input_tokens: Option, + + /// Output tokens. + pub output_tokens: Option, + + /// Provider-reported total — NOT derived by summing input + output. + /// `None` when the provider did not report a total. + pub total_tokens: Option, + + /// Estimated cost in USD. Must be finite and non-negative when present. + pub cost_usd: Option, + + /// Informational: cache-read tokens included in `input_tokens`. + #[serde(skip_serializing_if = "Option::is_none")] + pub cache_read_tokens: Option, + + /// Informational: cache-write tokens included in `input_tokens`. + #[serde(skip_serializing_if = "Option::is_none")] + pub cache_write_tokens: Option, +} + +/// Why a turn ended. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum StopReason { + /// Model reached a natural end-of-turn. + EndTurn, + /// Model hit the max-tokens limit. + MaxTokens, + /// Turn was cancelled by the owner or harness. + Cancelled, + /// Turn ended with an error. + Error, + /// Stop reason is unknown. + Unknown, +} + +/// Decrypted payload of a `kind:44200` Agent Turn Metric event. +/// +/// `harness` and `timestamp` are REQUIRED. All other fields are optional or +/// nullable unless constrained by the NIP (e.g. `session_id` + `turn_seq` +/// are required whenever `cumulative` is present). +/// +/// Consumers MUST ignore unknown fields (forward compatibility). +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct AgentTurnMetricPayload { + /// Harness identifier (e.g. `"goose"`, `"buzz-agent"`). REQUIRED. + pub harness: String, + + /// Model identifier as reported by the harness, or `None` if unknown. + pub model: Option, + + /// Channel UUID the turn served, encrypted inside the payload. + pub channel_id: Option, + + /// Session identifier. REQUIRED when `cumulative` is present. + pub session_id: Option, + + /// Turn identifier (harness-internal). + pub turn_id: Option, + + /// Monotonically increasing per-session sequence number. + /// REQUIRED when `cumulative` is present; strictly increasing within one + /// `session_id`. A publisher restart that loses the counter MUST start a + /// new `session_id`. + pub turn_seq: Option, + + /// RFC 3339 timestamp (end-of-turn). REQUIRED. + pub timestamp: String, + + /// Usage for this turn (computed delta). Null fields mean not reported. + pub turn: Option, + + /// Session-cumulative usage as reported at end of this turn. + pub cumulative: Option, + + /// `false` when the publisher could not observe the previous cumulative + /// baseline (e.g. harness restart mid-session), making `turn` unreliable. + /// Defaults to `true` on the wire when not explicitly set. + #[serde(default = "default_delta_reliable")] + pub delta_reliable: bool, + + /// Why the turn ended. Unrecognized values MUST be treated as `Unknown`. + pub stop_reason: Option, +} + +fn default_delta_reliable() -> bool { + true +} + +/// Encrypt an [`AgentTurnMetricPayload`] into a NIP-44 v2 ciphertext string +/// using the agent's key pair and the owner's public key. +/// +/// This is the content field of a `kind:44200` event. +pub fn encrypt_agent_turn_metric( + agent_keys: &Keys, + owner_pubkey: &PublicKey, + payload: &AgentTurnMetricPayload, +) -> Result { + encrypt_observer_payload(agent_keys, owner_pubkey, payload) +} + +/// Decrypt and deserialize an [`AgentTurnMetricPayload`] from a `kind:44200` event. +/// +/// `recipient_keys` is the owner's key pair. +pub fn decrypt_agent_turn_metric( + recipient_keys: &Keys, + event: &Event, +) -> Result { + decrypt_observer_payload(recipient_keys, event) +} + +#[cfg(test)] +mod tests { + use super::*; + use nostr::{EventBuilder, Kind, Tag}; + + fn sample_payload() -> AgentTurnMetricPayload { + AgentTurnMetricPayload { + harness: "goose".to_string(), + model: Some("claude-sonnet-4-5".to_string()), + channel_id: Some("12345678-1234-1234-1234-123456789abc".to_string()), + session_id: Some("sess-abc".to_string()), + turn_id: Some("turn-1".to_string()), + turn_seq: Some(1), + timestamp: "2026-07-01T20:11:03.213Z".to_string(), + turn: Some(TokenCounts { + input_tokens: Some(1234), + output_tokens: Some(567), + total_tokens: Some(1801), + cost_usd: Some(0.0123), + cache_read_tokens: None, + cache_write_tokens: None, + }), + cumulative: Some(TokenCounts { + input_tokens: Some(45210), + output_tokens: Some(9876), + total_tokens: Some(55086), + cost_usd: Some(0.41), + cache_read_tokens: None, + cache_write_tokens: None, + }), + delta_reliable: true, + stop_reason: Some(StopReason::EndTurn), + } + } + + #[test] + fn round_trip_encrypt_decrypt() { + let agent_keys = Keys::generate(); + let owner_keys = Keys::generate(); + + let payload = sample_payload(); + let ciphertext = encrypt_agent_turn_metric(&agent_keys, &owner_keys.public_key(), &payload) + .expect("encrypt"); + + // Build a minimal event envelope so decrypt_observer_payload can use event.pubkey. + let event = EventBuilder::new(Kind::Custom(44200), ciphertext) + .tags([ + Tag::parse(["p", &owner_keys.public_key().to_hex()]).unwrap(), + Tag::parse(["agent", &agent_keys.public_key().to_hex()]).unwrap(), + ]) + .sign_with_keys(&agent_keys) + .expect("sign"); + + let decoded = + decrypt_agent_turn_metric(&owner_keys, &event).expect("decrypt"); + + assert_eq!(decoded, payload); + } + + #[test] + fn wrong_key_decrypt_fails() { + let agent_keys = Keys::generate(); + let owner_keys = Keys::generate(); + let wrong_keys = Keys::generate(); + + let payload = sample_payload(); + let ciphertext = encrypt_agent_turn_metric(&agent_keys, &owner_keys.public_key(), &payload) + .expect("encrypt"); + + let event = EventBuilder::new(Kind::Custom(44200), ciphertext) + .tags([ + Tag::parse(["p", &owner_keys.public_key().to_hex()]).unwrap(), + Tag::parse(["agent", &agent_keys.public_key().to_hex()]).unwrap(), + ]) + .sign_with_keys(&agent_keys) + .expect("sign"); + + let result = decrypt_agent_turn_metric(&wrong_keys, &event); + assert!(result.is_err(), "expected decrypt error with wrong key"); + } + + #[test] + fn delta_reliable_defaults_to_true_when_absent() { + let json = r#"{"harness":"goose","timestamp":"2026-07-01T20:11:03Z"}"#; + let payload: AgentTurnMetricPayload = + serde_json::from_str(json).expect("parse"); + assert!(payload.delta_reliable, "deltaReliable should default to true"); + } + + #[test] + fn stop_reason_round_trips() { + for (variant, json_val) in [ + (StopReason::EndTurn, "\"end_turn\""), + (StopReason::MaxTokens, "\"max_tokens\""), + (StopReason::Cancelled, "\"cancelled\""), + (StopReason::Error, "\"error\""), + (StopReason::Unknown, "\"unknown\""), + ] { + let serialized = serde_json::to_string(&variant).unwrap(); + assert_eq!(serialized, json_val); + let deserialized: StopReason = serde_json::from_str(json_val).unwrap(); + assert_eq!(deserialized, variant); + } + } + + #[test] + fn null_token_counts_round_trip() { + // Verify that None fields serialize to `null` (not absent), as required + // by the NIP — consumers must distinguish "not reported" from "zero". + let counts = TokenCounts { + input_tokens: None, + output_tokens: None, + total_tokens: None, + cost_usd: None, + cache_read_tokens: None, + cache_write_tokens: None, + }; + let json = serde_json::to_string(&counts).unwrap(); + // cache_* are skip_serializing_if = None, others serialize as null + assert!(json.contains("\"inputTokens\":null")); + assert!(json.contains("\"outputTokens\":null")); + let back: TokenCounts = serde_json::from_str(&json).unwrap(); + assert_eq!(back, counts); + } +} diff --git a/crates/buzz-core/src/kind.rs b/crates/buzz-core/src/kind.rs index f2e918424..1da402cf1 100644 --- a/crates/buzz-core/src/kind.rs +++ b/crates/buzz-core/src/kind.rs @@ -131,6 +131,10 @@ pub const P_GATED_KINDS: &[u32] = &[ KIND_MEMBER_REMOVED_NOTIFICATION, KIND_GIFT_WRAP, KIND_DM_VISIBILITY, + // NIP-AM: agent turn metrics are encrypted to the owner and must not be + // readable by any unauthenticated or non-owner party, including via `ids` + // filters — see NIP-AM §Relay Behavior. + KIND_AGENT_TURN_METRIC, ]; /// NIP-AP: Agent Persona (parameterized replaceable, owner-authored). @@ -341,6 +345,15 @@ pub const KIND_MEMBER_ADDED_NOTIFICATION: u32 = 44100; /// Stored globally (channel_id = None) with p-tag = target, h-tag = channel UUID. pub const KIND_MEMBER_REMOVED_NOTIFICATION: u32 = 44101; +/// NIP-AM: Agent Turn Metric — durable per-turn token-usage record (agent-authored). +/// +/// Regular stored event (append-only, never replaced). The agent publishes one +/// event per completed turn, NIP-44 encrypted to its owner. Tags: exactly one `p` +/// (owner pubkey) and one `agent` (agent pubkey == event pubkey); no `h` tag. +/// Stored globally (channel_id = NULL); owner-scoped reads only (p-gated, NIP-42). +/// See `docs/nips/NIP-AM.md`. +pub const KIND_AGENT_TURN_METRIC: u32 = 44200; + // Forum / social (45000–45999) // V1 used addressable range (30001–30003) — wrong. /// A forum post (thread root). @@ -504,6 +517,7 @@ pub const ALL_KINDS: &[u32] = &[ KIND_JOB_ERROR, KIND_MEMBER_ADDED_NOTIFICATION, KIND_MEMBER_REMOVED_NOTIFICATION, + KIND_AGENT_TURN_METRIC, KIND_WORKFLOW_DEF, KIND_LONG_FORM, KIND_USER_STATUS, @@ -648,6 +662,11 @@ const _: () = assert!(KIND_AUTH <= u16::MAX as u32); const _: () = assert!(KIND_CANVAS <= u16::MAX as u32); const _: () = assert!(KIND_HUDDLE_GUIDELINES <= u16::MAX as u32); const _: () = assert!(EPHEMERAL_KIND_MIN < EPHEMERAL_KIND_MAX); +// Compile-time: KIND_AGENT_TURN_METRIC is a regular stored kind (not ephemeral, not replaceable). +const _: () = assert!(!is_ephemeral(KIND_AGENT_TURN_METRIC)); +const _: () = assert!(!is_replaceable(KIND_AGENT_TURN_METRIC)); +const _: () = assert!(!is_parameterized_replaceable(KIND_AGENT_TURN_METRIC)); +const _: () = assert!(KIND_AGENT_TURN_METRIC <= u16::MAX as u32); #[cfg(test)] mod tests { diff --git a/crates/buzz-core/src/lib.rs b/crates/buzz-core/src/lib.rs index dee40e988..7c3a1c38d 100644 --- a/crates/buzz-core/src/lib.rs +++ b/crates/buzz-core/src/lib.rs @@ -24,6 +24,8 @@ pub mod kind; pub mod network; /// Agent observer frame helpers. pub mod observer; +/// NIP-AM: Agent Turn Metric — payload type and encrypt/decrypt helpers. +pub mod agent_turn_metric; /// NIP-AB device pairing — crypto primitives, message types, and errors. pub mod pairing; /// Presence status types shared across crates. diff --git a/crates/buzz-relay/src/handlers/ingest.rs b/crates/buzz-relay/src/handlers/ingest.rs index 057ea972f..38e6ed95e 100644 --- a/crates/buzz-relay/src/handlers/ingest.rs +++ b/crates/buzz-relay/src/handlers/ingest.rs @@ -12,7 +12,8 @@ use uuid::Uuid; use buzz_auth::Scope; use buzz_core::kind::{ event_kind_u32, is_identity_archive_request_kind, is_parameterized_replaceable, - is_relay_admin_kind, KIND_AGENT_ENGRAM, KIND_AGENT_PROFILE, KIND_APPROVAL_DENY, + is_relay_admin_kind, KIND_AGENT_ENGRAM, KIND_AGENT_PROFILE, KIND_AGENT_TURN_METRIC, + KIND_APPROVAL_DENY, KIND_APPROVAL_GRANT, KIND_AUTH, KIND_BOOKMARK_LIST, KIND_BOOKMARK_SET, KIND_CANVAS, KIND_CONTACT_LIST, KIND_DELETION, KIND_DM_ADD_MEMBER, KIND_DM_HIDE, KIND_DM_OPEN, KIND_EMOJI_LIST, KIND_EMOJI_SET, KIND_EVENT_REMINDER, KIND_FOLLOW_SET, KIND_FORUM_COMMENT, @@ -156,6 +157,8 @@ fn required_scope_for_kind(kind: u32, event: &Event) -> Result { Ok(Scope::UsersWrite) } + // NIP-AM: agent turn metrics are agent-authored global events (encrypted to owner). + KIND_AGENT_TURN_METRIC => Ok(Scope::MessagesWrite), // NIP-51 standard lists and NIP-65 relay list — user-owned global state, // same ownership shape as kind:3 (contacts) and kind:0 (profile). KIND_MUTE_LIST @@ -376,6 +379,9 @@ pub(crate) fn is_global_only_kind(kind: u32) -> bool { // Mesh-LLM relay status is relay-signed and global. Clients may // subscribe to it, but must not channel-scope or submit it. | KIND_MESH_LLM_RELAY_STATUS + // NIP-AM: agent turn metrics are owner-scoped global events. + // Channel identity is encrypted inside the payload — no `h` tag. + | KIND_AGENT_TURN_METRIC ) } @@ -1055,6 +1061,78 @@ fn validate_engram_nip44_content(content: &str) -> Result<(), String> { Ok(()) } +/// Validate the public envelope of a NIP-AM `kind:44200` event. +/// +/// Enforces (without touching the encrypted payload): +/// - Exactly one `p` tag: 64 lowercase hex chars (the owner pubkey). +/// - Exactly one `agent` tag: 64 lowercase hex chars equal to `event.pubkey`. +/// - No `h` tag (channel identity belongs inside the encrypted payload). +/// - Content syntactically resembles NIP-44 v2 ciphertext (delegated to +/// `validate_engram_nip44_content`, which does the same length/base64/version check). +/// +/// Ownership (`is_agent_owner`) is an async DB check performed separately in +/// `ingest_event_inner` after this synchronous envelope check. +fn validate_agent_turn_metric_envelope(event: &nostr::Event) -> Result<(), String> { + let event_pubkey_hex = event.pubkey.to_hex(); + let mut p_tags: Vec<&str> = Vec::new(); + let mut agent_tags: Vec<&str> = Vec::new(); + let mut has_h_tag = false; + + for tag in event.tags.iter() { + let parts = tag.as_slice(); + if parts.len() < 2 { + continue; + } + match parts[0].as_str() { + "p" => p_tags.push(&parts[1]), + "agent" => agent_tags.push(&parts[1]), + "h" => has_h_tag = true, + _ => {} + } + } + + if has_h_tag { + return Err( + "agent-turn-metric event must not have an `h` tag (channel identity belongs inside the encrypted payload)".to_string(), + ); + } + + if p_tags.len() != 1 { + return Err(format!( + "agent-turn-metric event must have exactly one `p` tag (got {})", + p_tags.len() + )); + } + let p = p_tags[0]; + if p.len() != 64 || !p.bytes().all(|b| b.is_ascii_hexdigit() && !b.is_ascii_uppercase()) { + return Err("agent-turn-metric `p` tag must be 64 lowercase hex chars".to_string()); + } + + if agent_tags.len() != 1 { + return Err(format!( + "agent-turn-metric event must have exactly one `agent` tag (got {})", + agent_tags.len() + )); + } + let agent = agent_tags[0]; + if agent.len() != 64 + || !agent.bytes().all(|b| b.is_ascii_hexdigit() && !b.is_ascii_uppercase()) + { + return Err("agent-turn-metric `agent` tag must be 64 lowercase hex chars".to_string()); + } + if agent != event_pubkey_hex { + return Err( + "agent-turn-metric `agent` tag must equal event pubkey".to_string(), + ); + } + + // Content must look like a NIP-44 v2 ciphertext (length, base64, version prefix). + validate_engram_nip44_content(&event.content) + .map_err(|e| e.replace("agent-engram", "agent-turn-metric"))?; + + Ok(()) +} + /// Parse a NIP-ER `not_before` tag value into a Unix timestamp. /// /// The value MUST be a decimal integer string containing only ASCII digits, with @@ -1622,6 +1700,43 @@ async fn ingest_event_inner( .map_err(|e| IngestError::Rejected(format!("invalid: {e}")))?; } + if kind_u32 == KIND_AGENT_TURN_METRIC { + validate_agent_turn_metric_envelope(&event) + .map_err(|e| IngestError::Rejected(format!("invalid: {e}")))?; + + // Ownership check: `p` tag must be the registered owner of `event.pubkey`. + // Tag shape is already verified above; these extractions are infallible. + let owner_hex = event + .tags + .iter() + .find_map(|t| { + let parts = t.as_slice(); + if parts.len() >= 2 && parts[0].as_str() == "p" { + Some(parts[1].as_str()) + } else { + None + } + }) + .expect("p tag present (validated above)"); + let agent_bytes = event.pubkey.to_bytes().to_vec(); + let owner_bytes = hex::decode(owner_hex).expect("hex validated above"); + let is_owner = state + .db + .is_agent_owner(tenant.community(), &agent_bytes, &owner_bytes) + .await + .map_err(|e| { + IngestError::Internal(format!( + "error: db error checking agent-turn-metric ownership: {e}" + )) + })?; + if !is_owner { + return Err(IngestError::AuthFailed( + "restricted: agent-turn-metric `p` tag must be the registered owner of this agent" + .into(), + )); + } + } + if kind_u32 == KIND_EVENT_REMINDER { validate_event_reminder(&event) .map_err(|e| IngestError::Rejected(format!("invalid: {e}")))?; @@ -2260,6 +2375,7 @@ mod tests { KIND_PERSONA, KIND_TEAM, KIND_MANAGED_AGENT, + KIND_AGENT_TURN_METRIC, ]; for kind in migrated { assert!( @@ -2297,6 +2413,24 @@ mod tests { assert!(!requires_h_channel_scope(KIND_MESH_LLM_RELAY_STATUS)); } + #[test] + fn agent_turn_metric_is_global_only_and_in_scope_allowlist() { + let dummy = make_dummy_event(); + assert!( + is_global_only_kind(KIND_AGENT_TURN_METRIC), + "kind:44200 must be global-only (no h tag)" + ); + assert!( + !requires_h_channel_scope(KIND_AGENT_TURN_METRIC), + "kind:44200 must not require an h-tag" + ); + assert_eq!( + required_scope_for_kind(KIND_AGENT_TURN_METRIC, &dummy).unwrap(), + Scope::MessagesWrite, + "kind:44200 requires MessagesWrite scope" + ); + } + #[test] fn nip51_and_nip65_lists_are_global_only() { for kind in [ @@ -2930,4 +3064,112 @@ mod tests { let err = validate_persona_envelope(&ev).unwrap_err(); assert!(err.contains("`d` tag"), "got: {err}"); } + + // ─── agent_turn_metric envelope tests ──────────────────────────────────── + + /// Build an event for kind:44200 with the given tags and content. + /// The signing key IS the agent key, so `event.pubkey` matches the agent. + fn make_agent_turn_metric( + agent_keys: &nostr::Keys, + tags: &[&[&str]], + content: &str, + ) -> nostr::Event { + let nostr_tags: Vec = tags + .iter() + .map(|t| nostr::Tag::parse(t.iter().copied()).unwrap()) + .collect(); + nostr::EventBuilder::new( + nostr::Kind::Custom(buzz_core::kind::KIND_AGENT_TURN_METRIC as u16), + content, + ) + .tags(nostr_tags) + .sign_with_keys(agent_keys) + .unwrap() + } + + #[test] + fn agent_turn_metric_envelope_accepts_canonical() { + let agent = nostr::Keys::generate(); + let owner_hex = "b".repeat(64); + let agent_hex = agent.public_key().to_hex(); + let ev = make_agent_turn_metric( + &agent, + &[&["p", &owner_hex], &["agent", &agent_hex]], + &fake_nip44_v2(), + ); + assert!(validate_agent_turn_metric_envelope(&ev).is_ok()); + } + + #[test] + fn agent_turn_metric_envelope_rejects_h_tag() { + let agent = nostr::Keys::generate(); + let owner_hex = "b".repeat(64); + let agent_hex = agent.public_key().to_hex(); + let ev = make_agent_turn_metric( + &agent, + &[ + &["p", &owner_hex], + &["agent", &agent_hex], + &["h", "some-channel-uuid"], + ], + &fake_nip44_v2(), + ); + let err = validate_agent_turn_metric_envelope(&ev).unwrap_err(); + assert!(err.contains("`h` tag"), "got: {err}"); + } + + #[test] + fn agent_turn_metric_envelope_rejects_missing_p() { + let agent = nostr::Keys::generate(); + let agent_hex = agent.public_key().to_hex(); + let ev = + make_agent_turn_metric(&agent, &[&["agent", &agent_hex]], &fake_nip44_v2()); + let err = validate_agent_turn_metric_envelope(&ev).unwrap_err(); + assert!(err.contains("`p` tag"), "got: {err}"); + } + + #[test] + fn agent_turn_metric_envelope_rejects_missing_agent() { + let agent = nostr::Keys::generate(); + let owner_hex = "b".repeat(64); + let ev = + make_agent_turn_metric(&agent, &[&["p", &owner_hex]], &fake_nip44_v2()); + let err = validate_agent_turn_metric_envelope(&ev).unwrap_err(); + assert!(err.contains("`agent` tag"), "got: {err}"); + } + + #[test] + fn agent_turn_metric_envelope_rejects_agent_mismatch() { + let agent = nostr::Keys::generate(); + let owner_hex = "b".repeat(64); + let wrong_agent_hex = "c".repeat(64); // not event.pubkey + let ev = make_agent_turn_metric( + &agent, + &[&["p", &owner_hex], &["agent", &wrong_agent_hex]], + &fake_nip44_v2(), + ); + let err = validate_agent_turn_metric_envelope(&ev).unwrap_err(); + assert!( + err.contains("equal event pubkey"), + "got: {err}" + ); + } + + #[test] + fn agent_turn_metric_envelope_rejects_bad_content() { + let agent = nostr::Keys::generate(); + let owner_hex = "b".repeat(64); + let agent_hex = agent.public_key().to_hex(); + let ev = make_agent_turn_metric( + &agent, + &[&["p", &owner_hex], &["agent", &agent_hex]], + "not-a-ciphertext", + ); + let err = validate_agent_turn_metric_envelope(&ev).unwrap_err(); + // error comes from validate_engram_nip44_content with label replaced + assert!( + err.contains("agent-turn-metric"), + "got: {err}" + ); + } } diff --git a/crates/buzz-relay/src/handlers/req.rs b/crates/buzz-relay/src/handlers/req.rs index 2e697eafa..b43c04bca 100644 --- a/crates/buzz-relay/src/handlers/req.rs +++ b/crates/buzz-relay/src/handlers/req.rs @@ -6,7 +6,10 @@ use std::sync::Arc; use tracing::{debug, warn}; use buzz_core::filter::filters_match; -use buzz_core::kind::{AUTHOR_ONLY_KINDS, KIND_AGENT_ENGRAM, KIND_DM_VISIBILITY, P_GATED_KINDS}; +use buzz_core::kind::{ + AUTHOR_ONLY_KINDS, KIND_AGENT_ENGRAM, KIND_AGENT_TURN_METRIC, KIND_DM_VISIBILITY, + P_GATED_KINDS, +}; use buzz_core::tenant::TenantContext; use buzz_db::EventQuery; use buzz_pubsub::EventTopic; @@ -974,13 +977,18 @@ pub(crate) fn p_gated_filters_authorized(filters: &[Filter], authed_pubkey_hex: // safe for kinds whose id is author-bound or whose content is encrypted. // KIND_DM_VISIBILITY is relay-signed (id not author-bound) and exposes // plaintext private hide choices, so its `#p` owner check MUST hold even - // when `ids` is present. Only filters that explicitly name the kind lose - // the exemption — a kindless `ids` lookup is unaffected. - let explicitly_dm_visibility = filter.kinds.as_ref().is_some_and(|ks| { - ks.iter() - .any(|kind| kind.as_u16() as u32 == KIND_DM_VISIBILITY) + // when `ids` is present. KIND_AGENT_TURN_METRIC events are long-lived + // and their cleartext envelope (pubkey, agent tag, created_at) leaks + // turn-activity metadata — knowing an event id is NOT authorization + // (NIP-AM §Relay Behavior). Only filters that explicitly name the kind + // lose the exemption — a kindless `ids` lookup is unaffected. + let explicitly_no_ids_exemption = filter.kinds.as_ref().is_some_and(|ks| { + ks.iter().any(|kind| { + let k = kind.as_u16() as u32; + k == KIND_DM_VISIBILITY || k == KIND_AGENT_TURN_METRIC + }) }); - if !explicitly_dm_visibility && filter.ids.as_ref().is_some_and(|ids| !ids.is_empty()) { + if !explicitly_no_ids_exemption && filter.ids.as_ref().is_some_and(|ids| !ids.is_empty()) { return true; } @@ -1284,6 +1292,66 @@ mod tests { assert!(p_gated_filters_authorized(&[member_notif_ids], authed)); } + /// NIP-AM: kind 44200 must deny `{kinds:[44200], ids:[...]}` by non-owner. + /// Thufir's implementation note: the helper treats explicit-kind+ids and + /// kindless ids differently. Explicit `{kinds:[44200], ids:[...]}` is denied; + #[test] + fn agent_turn_metric_requires_p_tag_even_with_ids() { + let p_tag = SingleLetterTag::lowercase(Alphabet::P); + let authed = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; + let other = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + let event_id = "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc"; + let metric_kind = nostr::Kind::Custom(buzz_core::kind::KIND_AGENT_TURN_METRIC as u16); + + // Case 1: {kinds:[44200], ids:[...]} — explicit kind, should require #p owner. + let explicit_kind_ids_only = Filter::new() + .kind(metric_kind) + .id(nostr::EventId::from_hex(event_id).unwrap()); + assert!( + !p_gated_filters_authorized(&[explicit_kind_ids_only], authed), + "kind:44200 + ids without matching #p must be denied" + ); + + let explicit_kind_wrong_p = Filter::new() + .kind(metric_kind) + .id(nostr::EventId::from_hex(event_id).unwrap()) + .custom_tags(p_tag, [other]); + assert!( + !p_gated_filters_authorized(&[explicit_kind_wrong_p], authed), + "kind:44200 + ids + wrong #p must be denied" + ); + + // Case 2: kindless {ids:[...]} — the existing ids exemption applies + // (consistent with other p-gated kinds like member notifications). The + // relay's defense-in-depth for kind:44200 is: (a) the explicit-kind+ids + // carve-out above, (b) NULL tsvector storage preventing search discovery, + // and (c) the subscription delivery layer not returning 44200 events to + // non-owners. A kindless ids filter is authorized here because + // p_gated_filters_authorized cannot know which kind the id resolves to. + let kindless_ids = Filter::new().id(nostr::EventId::from_hex(event_id).unwrap()); + assert!( + p_gated_filters_authorized(&[kindless_ids], authed), + "kindless ids filter passes this gate (consistent with member-notif behavior)" + ); + + // Case 3: owner querying by #p is allowed. + let owner_by_p = Filter::new().kind(metric_kind).custom_tags(p_tag, [authed]); + assert!( + p_gated_filters_authorized(&[owner_by_p], authed), + "kind:44200 with matching #p must be allowed" + ); + + // Case 4: owner querying by #p + ids is allowed. + let owner_p_and_ids = Filter::new() + .kind(metric_kind) + .id(nostr::EventId::from_hex(event_id).unwrap()) + .custom_tags(p_tag, [authed]); + assert!( + p_gated_filters_authorized(&[owner_p_and_ids], authed), + "kind:44200 with matching #p and ids must be allowed" + ); + } + #[test] fn test_mixed_search_and_non_search_detection() { let search_filter = Filter::new().search("hello"); diff --git a/migrations/0001_initial_schema.sql b/migrations/0001_initial_schema.sql index 2d4035f8a..a653de3c0 100644 --- a/migrations/0001_initial_schema.sql +++ b/migrations/0001_initial_schema.sql @@ -211,16 +211,18 @@ CREATE TABLE events ( -- 30622 = KIND_DM_VISIBILITY (per-viewer private hide state) -- 44100 = KIND_MEMBER_ADDED_NOTIFICATION (p-gated membership notice) -- 44101 = KIND_MEMBER_REMOVED_NOTIFICATION (p-gated membership notice) + -- 44200 = KIND_AGENT_TURN_METRIC (NIP-AM: p-gated encrypted turn metrics) -- NULL tsvector never matches `@@`, so excluded rows are storage-level -- unsearchable. Constants kept in `buzz_core::kind` (KIND_GIFT_WRAP, -- KIND_EVENT_REMINDER, KIND_DM_VISIBILITY, - -- KIND_MEMBER_ADDED_NOTIFICATION, KIND_MEMBER_REMOVED_NOTIFICATION); inlined + -- KIND_MEMBER_ADDED_NOTIFICATION, KIND_MEMBER_REMOVED_NOTIFICATION, + -- KIND_AGENT_TURN_METRIC); inlined -- here because a sqlx -- migration is frozen SQL and cannot import the Rust constant. If a new -- privacy-sensitive kind is added there, update this list and add a -- regression test in `buzz-search/tests/fts_integration.rs`. search_tsv TSVECTOR GENERATED ALWAYS AS ( - CASE WHEN kind IN (1059, 30300, 30622, 44100, 44101) THEN NULL::tsvector + CASE WHEN kind IN (1059, 30300, 30622, 44100, 44101, 44200) THEN NULL::tsvector ELSE to_tsvector('simple', content) END ) STORED, diff --git a/schema/schema.sql b/schema/schema.sql index 1c6fcfb98..c3247f103 100644 --- a/schema/schema.sql +++ b/schema/schema.sql @@ -208,7 +208,7 @@ CREATE TABLE events ( -- never matches `@@`. -- Keep in sync with migrations/0001_initial_schema.sql. search_tsv TSVECTOR GENERATED ALWAYS AS ( - CASE WHEN kind IN (1059, 30300, 30622, 44100, 44101) THEN NULL::tsvector + CASE WHEN kind IN (1059, 30300, 30622, 44100, 44101, 44200) THEN NULL::tsvector ELSE to_tsvector('simple', content) END ) STORED, From 23b522d992c50744bdf8070daa18e77f68eb7413 Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Wed, 1 Jul 2026 16:58:25 -0400 Subject: [PATCH 04/21] fix(relay/core): close result-level read gate for kind:44200 (NIP-AM) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit reader_authorized_for_event in filter.rs now gates KIND_AGENT_TURN_METRIC alongside KIND_DM_VISIBILITY — reader must match the #p tag (owner). This single function closes all kindless-ids retrieval paths: WS historical pull (req.rs:330, req.rs:652), HTTP bridge (bridge.rs:608, bridge.rs:863), and live fan-out (event.rs). Live fan-out extended likewise: owner_only_kind now covers both 44200 and 30622, so kindless-ids subscriptions cannot receive 44200 events for non-owners. Tests added: reader_authorized_for_event_gates_agent_turn_metric_by_p (owner allow, non-owner deny, authoring-agent deny). Case-2 rationale in the existing req.rs test updated: pass-through at the filter-authorization gate is correct because the result-level gate is now the enforcement point for this path. NIP-AM ref: docs/nips/NIP-AM.md at 19889ba0c (PR #1441). Resolves blocking gap from PR #1445 review. Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-core/src/filter.rs | 48 +++++++++++++++++++++---- crates/buzz-relay/src/handlers/event.rs | 13 ++++--- crates/buzz-relay/src/handlers/req.rs | 14 ++++---- 3 files changed, 57 insertions(+), 18 deletions(-) diff --git a/crates/buzz-core/src/filter.rs b/crates/buzz-core/src/filter.rs index a3c0bef59..1671f7622 100644 --- a/crates/buzz-core/src/filter.rs +++ b/crates/buzz-core/src/filter.rs @@ -12,14 +12,17 @@ pub fn filters_match(filters: &[Filter], event: &StoredEvent) -> bool { } /// Result-level read authorization for relay-signed events whose content is -/// private to a single viewer. Currently only `KIND_DM_VISIBILITY`: the reader -/// MUST equal the snapshot's `#p` (owner). Returns `true` for every other kind. +/// private to a single viewer. Currently gates `KIND_DM_VISIBILITY` and +/// `KIND_AGENT_TURN_METRIC`: the reader MUST equal the event's `#p` tag +/// (owner). Returns `true` for every other kind. /// -/// This guards the delivery surfaces directly, so a query that bypasses the -/// filter-level `#p` gate (e.g. a kindless `ids:[…]` lookup of a known snapshot -/// id) still cannot read another viewer's hidden-DM set. +/// This guards every delivery surface — WS historical pull (`req.rs`), HTTP +/// bridge (`bridge.rs`), and live fan-out (`event.rs`) — so a query that +/// bypasses the filter-level `#p` gate (e.g. a kindless `ids:[…]` lookup of +/// a known event id) still cannot read another user's private event. pub fn reader_authorized_for_event(event: &nostr::Event, reader_pubkey_hex: &str) -> bool { - if crate::kind::event_kind_u32(event) != crate::kind::KIND_DM_VISIBILITY { + let kind = crate::kind::event_kind_u32(event); + if kind != crate::kind::KIND_DM_VISIBILITY && kind != crate::kind::KIND_AGENT_TURN_METRIC { return true; } let p = nostr::SingleLetterTag::lowercase(nostr::Alphabet::P); @@ -261,4 +264,37 @@ mod tests { .expect("sign"); assert!(reader_authorized_for_event(¬e, other)); } + + #[test] + fn reader_authorized_for_event_gates_agent_turn_metric_by_p() { + let agent_keys = Keys::generate(); + let owner = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; + let attacker = "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc"; + + // Agent turn metric event: pubkey=agent, p tag=owner (NIP-AM envelope shape). + let metric = EventBuilder::new( + Kind::Custom(crate::kind::KIND_AGENT_TURN_METRIC as u16), + "encrypted-payload", + ) + .tags([ + Tag::parse(["p", owner]).unwrap(), + Tag::parse(["agent", &agent_keys.public_key().to_hex()]).unwrap(), + ]) + .sign_with_keys(&agent_keys) + .expect("sign"); + + assert!( + reader_authorized_for_event(&metric, owner), + "owner must be authorized to read their own agent turn metric" + ); + assert!( + !reader_authorized_for_event(&metric, attacker), + "non-owner must NOT be authorized to read an agent turn metric via kindless ids" + ); + // The authoring agent also does not get read-back (NIP-AM: owner-only read). + assert!( + !reader_authorized_for_event(&metric, &agent_keys.public_key().to_hex()), + "the authoring agent must NOT be authorized to read its own metric event (owner-only)" + ); + } } diff --git a/crates/buzz-relay/src/handlers/event.rs b/crates/buzz-relay/src/handlers/event.rs index f655ae057..a269db32b 100644 --- a/crates/buzz-relay/src/handlers/event.rs +++ b/crates/buzz-relay/src/handlers/event.rs @@ -287,10 +287,13 @@ pub(crate) async fn dispatch_persistent_event( let event_json = serde_json::to_string(&stored_event.event) .expect("nostr::Event serialization is infallible for well-formed events"); - // For viewer-private snapshots (kind:30622), live fan-out must reach only the - // owner — a kindless `ids:[…]` subscription can otherwise match it. Pull paths - // (HTTP /query, WS historical) are gated separately by reader_authorized_for_event. - let dm_visibility_owner: Option = (kind_u32 == buzz_core::kind::KIND_DM_VISIBILITY) + // For viewer-private events (kind:30622 DM visibility, kind:44200 agent turn + // metrics), live fan-out must reach only the owner — a kindless `ids:[…]` + // subscription can otherwise match it. Pull paths (HTTP /query, WS historical) + // are gated separately by reader_authorized_for_event. + let owner_only_kind = kind_u32 == buzz_core::kind::KIND_DM_VISIBILITY + || kind_u32 == buzz_core::kind::KIND_AGENT_TURN_METRIC; + let private_event_owner: Option = owner_only_kind .then(|| { let p = nostr::SingleLetterTag::lowercase(nostr::Alphabet::P); stored_event @@ -304,7 +307,7 @@ pub(crate) async fn dispatch_persistent_event( // filter_fanout_by_access, applied to `matches` above before this loop. let mut drop_count = 0u32; for (target_conn_id, sub_id) in &matches { - if let Some(ref owner_hex) = dm_visibility_owner { + if let Some(ref owner_hex) = private_event_owner { let is_owner = state .conn_manager .pubkey_for(*target_conn_id) diff --git a/crates/buzz-relay/src/handlers/req.rs b/crates/buzz-relay/src/handlers/req.rs index b43c04bca..7fdae503e 100644 --- a/crates/buzz-relay/src/handlers/req.rs +++ b/crates/buzz-relay/src/handlers/req.rs @@ -1322,16 +1322,16 @@ mod tests { ); // Case 2: kindless {ids:[...]} — the existing ids exemption applies - // (consistent with other p-gated kinds like member notifications). The - // relay's defense-in-depth for kind:44200 is: (a) the explicit-kind+ids - // carve-out above, (b) NULL tsvector storage preventing search discovery, - // and (c) the subscription delivery layer not returning 44200 events to - // non-owners. A kindless ids filter is authorized here because - // p_gated_filters_authorized cannot know which kind the id resolves to. + // at this filter-authorization gate (consistent with other p-gated kinds). + // The kindless path is closed at the result level by + // `reader_authorized_for_event` (buzz-core/src/filter.rs), which gates + // kind:44200 delivery to the #p owner across all pull paths (WS historical, + // HTTP bridge) and live fan-out. Pass-through here is correct; the + // result-level gate is the enforcement point for this path. let kindless_ids = Filter::new().id(nostr::EventId::from_hex(event_id).unwrap()); assert!( p_gated_filters_authorized(&[kindless_ids], authed), - "kindless ids filter passes this gate (consistent with member-notif behavior)" + "kindless ids filter passes this filter gate — result-level gate closes the path" ); // Case 3: owner querying by #p is allowed. From f8fe7873dbbfa3dcfc92f186d4d0d75e4142ab9f Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Wed, 1 Jul 2026 18:25:42 -0400 Subject: [PATCH 05/21] feat(buzz-acp): add goose usage adapter for NIP-AM turn metrics (Phase 2 Task B) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Advertise `clientCapabilities._meta.goose.customNotifications: true` at initialize so goose emits `_goose/unstable/session/update` notifications carrying session-cumulative token counts at turn completion. Add `GooseUsageTracker` (new `goose_usage.rs`) that: - Deserializes the `_goose/unstable/session/update` wire payload - Stores per-session cumulative state (`sessionId`, `turnSeq`, last snapshot) - Computes per-turn deltas per NIP-AM rules: first-turn no-prior → null + deltaReliable:false; counter decrease → null + false; session restart (new sessionId) → treated as first turn - Exposes a `GooseTurnUsage` record via `take()` for consumption by the TurnCompletionGuard emit hook (sequential next task) Wire both dispatch arms (`read_until_response` and `read_until_response_with_idle_timeout`) to handle the new method, mirroring the existing `session/update` pattern. Non-goose harnesses are unaffected: no capability advertised, no dispatch, no state kept. References #1441 (NIP-AM spec) Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-acp/src/acp.rs | 176 ++++++++++++- crates/buzz-acp/src/goose_usage.rs | 382 +++++++++++++++++++++++++++++ crates/buzz-acp/src/lib.rs | 3 + 3 files changed, 559 insertions(+), 2 deletions(-) create mode 100644 crates/buzz-acp/src/goose_usage.rs diff --git a/crates/buzz-acp/src/acp.rs b/crates/buzz-acp/src/acp.rs index 0ba5a8c6a..a3194f76e 100644 --- a/crates/buzz-acp/src/acp.rs +++ b/crates/buzz-acp/src/acp.rs @@ -14,6 +14,7 @@ use tokio::process::{Child, ChildStdin, ChildStdout}; use tokio_util::codec::{FramedRead, LinesCodec, LinesCodecError}; use crate::observer::{ObserverContext, ObserverHandle}; +use crate::goose_usage::{GooseTurnUsage, GooseUsageTracker}; /// Maximum allowed size of a single NDJSON line from the agent's stdout. /// Lines exceeding this limit are rejected to prevent OOM from rogue agents. @@ -167,6 +168,11 @@ pub struct AcpClient { /// outside of a goose-native turn — the read loop's steer arm is /// disabled in that case. steer_rx: Option>, + /// Goose usage tracker — accumulates cumulative token counts from + /// `_goose/unstable/session/update` notifications and computes per-turn + /// deltas. Populated only when goose advertises the custom-notifications + /// capability; no-op for other harnesses. + goose_usage: GooseUsageTracker, } impl AcpClient { @@ -258,6 +264,7 @@ impl AcpClient { observer_context: ObserverContext::default(), active_run_id: None, steer_rx: None, + goose_usage: GooseUsageTracker::default(), }) } @@ -303,7 +310,16 @@ impl AcpClient { // on ACP v2 ahead of the upstream ACP RFD. Revisit when that RFD merges. let params = serde_json::json!({ "protocolVersion": 2, - "clientCapabilities": {}, + "clientCapabilities": { + // Signal to goose that we handle `_goose/unstable/session/update` + // notifications. Without this the custom notification is suppressed + // on goose's side and usage data is never emitted. + "_meta": { + "goose": { + "customNotifications": true + } + } + }, "clientInfo": { "name": "buzz-acp", "version": env!("CARGO_PKG_VERSION") @@ -502,6 +518,21 @@ impl AcpClient { self.active_run_id.as_deref() } + /// Consume and return the per-turn usage record computed from the most + /// recent `_goose/unstable/session/update` notification. + /// + /// Returns `None` if no usage update arrived since the last call (i.e. + /// the harness did not emit one for this turn, or this is not a goose + /// agent). Must be called at most once per turn; subsequent calls return + /// `None` until the next `usage_update` notification is recorded. + /// + /// Intended for consumption by `TurnCompletionGuard` in `pool.rs` to + /// publish a kind 44200 NIP-AM event. + #[cfg_attr(not(test), allow(dead_code))] + pub fn take_turn_usage(&mut self) -> Option { + self.goose_usage.take() + } + /// Install a per-turn steer request channel for goose-native /// non-cancelling mid-turn delivery. /// @@ -840,6 +871,9 @@ impl AcpClient { "session/update" => { let _ = self.handle_session_update(&msg); } + "_goose/unstable/session/update" => { + self.handle_goose_usage_update(&msg); + } "session/request_permission" => { self.handle_permission_request(&msg).await?; } @@ -1170,6 +1204,9 @@ impl AcpClient { idle_deadline = Instant::now() + idle_timeout; } } + "_goose/unstable/session/update" => { + self.handle_goose_usage_update(&msg); + } "session/request_permission" => { self.handle_permission_request(&msg).await?; } @@ -1311,6 +1348,46 @@ impl AcpClient { } } + /// Parse a `_goose/unstable/session/update` notification and record the + /// usage snapshot in the per-session tracker. + /// + /// Silently ignores malformed or non-`usage_update` variants — the + /// notification is best-effort observability data, not a protocol + /// requirement. Failures are logged at debug level. + fn handle_goose_usage_update(&mut self, msg: &serde_json::Value) { + use crate::goose_usage::{GooseSessionUpdateNotification, GooseSessionUpdateVariant}; + let params = match msg.get("params") { + Some(p) => p, + None => { + tracing::debug!( + target: "acp::usage", + "_goose/unstable/session/update: missing params" + ); + return; + } + }; + match serde_json::from_value::(params.clone()) { + Ok(notif) => { + if let GooseSessionUpdateVariant::UsageUpdate(payload) = ¬if.update { + tracing::debug!( + target: "acp::usage", + session_id = %notif.session_id, + input = payload.accumulated_input_tokens, + output = payload.accumulated_output_tokens, + "goose usage update" + ); + self.goose_usage.record(¬if.session_id, payload); + } + } + Err(e) => { + tracing::debug!( + target: "acp::usage", + "_goose/unstable/session/update: deserialization error: {e}" + ); + } + } + } + /// Auto-approve a `session/request_permission` request from the agent. /// /// Finds the option with `kind == "allow_once"` and responds with its `optionId`. @@ -1782,7 +1859,13 @@ mod tests { "method": "initialize", "params": { "protocolVersion": 2, - "clientCapabilities": {}, + "clientCapabilities": { + "_meta": { + "goose": { + "customNotifications": true + } + } + }, "clientInfo": { "name": "buzz-acp", "version": "0.1.0" @@ -1795,6 +1878,12 @@ mod tests { Some("buzz-acp") ); assert!(msg["params"]["clientCapabilities"].is_object()); + assert_eq!( + msg["params"]["clientCapabilities"]["_meta"]["goose"]["customNotifications"] + .as_bool(), + Some(true), + "goose customNotifications capability must be advertised" + ); } #[test] @@ -2825,4 +2914,87 @@ mod tests { other => panic!("expected SteerAck::Success, got {other:?}"), } } + + // ── Goose usage notification integration ────────────────────────────── + + /// Build a `_goose/unstable/session/update` JSON-RPC notification. + fn goose_usage_update_msg( + session_id: &str, + input: u64, + output: u64, + cost: Option, + ) -> serde_json::Value { + let mut update = serde_json::json!({ + "sessionUpdate": "usage_update", + "used": input + output, + "contextLimit": 200000u64, + "accumulatedInputTokens": input, + "accumulatedOutputTokens": output, + }); + if let Some(c) = cost { + update["accumulatedCost"] = serde_json::json!(c); + } + serde_json::json!({ + "jsonrpc": "2.0", + "method": "_goose/unstable/session/update", + "params": { + "sessionId": session_id, + "update": update + } + }) + } + + #[tokio::test] + async fn goose_usage_notification_recorded_and_take_returns_usage() { + let mut client = spawn_inert_client().await; + assert!(client.take_turn_usage().is_none(), "starts empty"); + + let msg = goose_usage_update_msg("s1", 1000, 200, Some(0.01)); + client.handle_goose_usage_update(&msg); + + let usage = client + .take_turn_usage() + .expect("usage should be present after notification"); + assert_eq!(usage.session_id, "s1"); + assert_eq!(usage.turn_seq, 1); + assert!(!usage.delta_reliable, "first turn must be unreliable"); + assert_eq!(usage.cumulative_input_tokens, 1000); + assert_eq!(usage.cumulative_output_tokens, 200); + assert_eq!(usage.cumulative_cost_usd, Some(0.01)); + + // Second take must be None. + assert!(client.take_turn_usage().is_none(), "take after drain is None"); + } + + #[tokio::test] + async fn goose_usage_second_turn_delta_reliable() { + let mut client = spawn_inert_client().await; + // Turn 1. + client.handle_goose_usage_update(&goose_usage_update_msg("s2", 1000, 200, None)); + let _ = client.take_turn_usage(); + // Turn 2. + client.handle_goose_usage_update(&goose_usage_update_msg("s2", 1800, 450, None)); + let usage = client.take_turn_usage().expect("turn 2 usage"); + assert!(usage.delta_reliable); + assert_eq!(usage.turn_input_tokens, Some(800)); + assert_eq!(usage.turn_output_tokens, Some(250)); + } + + #[tokio::test] + async fn goose_usage_malformed_notification_does_not_panic() { + let mut client = spawn_inert_client().await; + // Missing params entirely. + let bad = serde_json::json!({"jsonrpc":"2.0","method":"_goose/unstable/session/update"}); + client.handle_goose_usage_update(&bad); + assert!(client.take_turn_usage().is_none()); + + // params present but wrong shape. + let bad2 = serde_json::json!({ + "jsonrpc": "2.0", + "method": "_goose/unstable/session/update", + "params": { "oops": true } + }); + client.handle_goose_usage_update(&bad2); + assert!(client.take_turn_usage().is_none()); + } } diff --git a/crates/buzz-acp/src/goose_usage.rs b/crates/buzz-acp/src/goose_usage.rs new file mode 100644 index 000000000..e7e93d976 --- /dev/null +++ b/crates/buzz-acp/src/goose_usage.rs @@ -0,0 +1,382 @@ +//! Goose-specific usage tracking for NIP-AM agent turn metrics. +//! +//! Goose emits a `_goose/unstable/session/update` notification (with +//! `sessionUpdate: "usage_update"`) at the end of every turn when the client +//! has advertised `clientCapabilities._meta.goose.customNotifications: true`. +//! The payload carries session-cumulative token counts from which we derive +//! per-turn deltas. +//! +//! # Delta computation +//! +//! Because goose only reports cumulative counters, the per-turn counts are +//! computed as `current − previous`. Three cases require special handling per +//! NIP-AM: +//! +//! 1. **First turn (no prior baseline):** delta unknown → `null` counts, +//! `delta_reliable: false`. +//! 2. **Counter decrease** (harness restart, overflow): delta would be +//! negative → `null` counts, `delta_reliable: false`. +//! 3. **Session restart** (caller supplies a new `session_id` not seen +//! before): treated as case 1 — fresh baseline, no delta for this turn. +//! +//! The `GooseTurnUsage` produced after each turn is consumed by the +//! `TurnCompletionGuard` in `pool.rs` to publish a kind 44200 relay event. + +use std::collections::HashMap; + +/// Wire-format deserialization for `_goose/unstable/session/update` params. +/// +/// Method: `_goose/unstable/session/update` +/// Shape (camelCase on the wire): +/// ```json +/// { +/// "sessionId": "...", +/// "update": { +/// "sessionUpdate": "usage_update", +/// "used": 12345, +/// "contextLimit": 200000, +/// "accumulatedInputTokens": 10000, +/// "accumulatedOutputTokens": 2345, +/// "accumulatedCost": 0.0234 +/// } +/// } +/// ``` +#[derive(Debug, Clone, serde::Deserialize)] +#[serde(rename_all = "camelCase")] +pub(crate) struct GooseSessionUpdateNotification { + pub session_id: String, + pub update: GooseSessionUpdateVariant, +} + +/// Discriminated union matching goose's `GooseSessionUpdate` enum on the wire. +/// We only care about `usage_update`; other variants are ignored. +#[derive(Debug, Clone, serde::Deserialize)] +#[serde(tag = "sessionUpdate", rename_all = "snake_case")] +pub(crate) enum GooseSessionUpdateVariant { + UsageUpdate(GooseUsageUpdatePayload), + #[serde(other)] + Other, +} + +/// The `usage_update` payload from goose. +#[derive(Debug, Clone, serde::Deserialize)] +#[serde(rename_all = "camelCase")] +pub(crate) struct GooseUsageUpdatePayload { + #[allow(dead_code)] + pub used: u64, + #[allow(dead_code)] + pub context_limit: u64, + pub accumulated_input_tokens: u64, + pub accumulated_output_tokens: u64, + pub accumulated_cost: Option, +} + +/// Per-session normalization state: the last cumulative snapshot we saw. +#[derive(Debug, Clone)] +struct SessionState { + /// Monotonically increasing per-session turn counter (1-based, incremented + /// on every recorded update). + turn_seq: u64, + /// Cumulative input tokens at the end of the previous turn. + last_input: u64, + /// Cumulative output tokens at the end of the previous turn. + last_output: u64, + /// Cumulative cost at the end of the previous turn. + last_cost: Option, +} + +/// Per-turn usage record exposed to `TurnCompletionGuard` for NIP-AM publishing. +/// +/// `turn_*` fields are `None` when delta is unreliable (first turn or counter +/// decrease). `cumulative_*` fields are always present when goose reports them. +#[derive(Debug, Clone)] +pub struct GooseTurnUsage { + /// Goose session id (maps to NIP-AM `sessionId`). + pub session_id: String, + /// Per-session monotonic sequence number for this turn (maps to NIP-AM `turnSeq`). + pub turn_seq: u64, + /// Whether the `turn_*` delta fields are reliable. + pub delta_reliable: bool, + /// Per-turn input token delta; `None` when unreliable. + pub turn_input_tokens: Option, + /// Per-turn output token delta; `None` when unreliable. + pub turn_output_tokens: Option, + /// Per-turn cost delta (`current − previous`); `None` when unreliable or + /// either snapshot is missing. + pub turn_cost_usd: Option, + /// Session-cumulative input tokens as reported by goose at end of turn. + pub cumulative_input_tokens: u64, + /// Session-cumulative output tokens as reported by goose at end of turn. + pub cumulative_output_tokens: u64, + /// Session-cumulative estimated cost in USD; `None` if goose did not report it. + pub cumulative_cost_usd: Option, +} + +/// Tracks per-session cumulative usage state across turns. +/// +/// Cheap to construct; call [`record`] each time a `usage_update` notification +/// arrives, then [`take`] at turn completion to extract the normalized record. +#[derive(Debug, Default)] +pub(crate) struct GooseUsageTracker { + /// One entry per goose `sessionId` ever seen in this process. + sessions: HashMap, + /// The most recently computed turn usage, ready for `take()`. + pending: Option, +} + +impl GooseUsageTracker { + /// Process a `usage_update` notification payload and store the normalized + /// per-turn record. Overwrites any previously stored-but-untaken record + /// (goose may send multiple updates per turn; the last one wins). + pub(crate) fn record( + &mut self, + session_id: &str, + payload: &GooseUsageUpdatePayload, + ) { + let current_input = payload.accumulated_input_tokens; + let current_output = payload.accumulated_output_tokens; + let current_cost = payload.accumulated_cost; + + let (delta_reliable, turn_input, turn_output, turn_cost, turn_seq) = + match self.sessions.get(session_id) { + None => { + // First turn for this session — no baseline yet. + (false, None, None, None, 1u64) + } + Some(prev) => { + let seq = prev.turn_seq + 1; + // Counter decrease → unreliable delta. + if current_input < prev.last_input || current_output < prev.last_output { + (false, None, None, None, seq) + } else { + let di = current_input - prev.last_input; + let dout = current_output - prev.last_output; + // Cost delta: only when both snapshots have cost. + let dc = match (current_cost, prev.last_cost) { + (Some(c), Some(p)) if c >= p => Some(c - p), + _ => None, + }; + (true, Some(di), Some(dout), dc, seq) + } + } + }; + + // Update the session state. + self.sessions.insert( + session_id.to_string(), + SessionState { + turn_seq, + last_input: current_input, + last_output: current_output, + last_cost: current_cost, + }, + ); + + self.pending = Some(GooseTurnUsage { + session_id: session_id.to_string(), + turn_seq, + delta_reliable, + turn_input_tokens: turn_input, + turn_output_tokens: turn_output, + turn_cost_usd: turn_cost, + cumulative_input_tokens: current_input, + cumulative_output_tokens: current_output, + cumulative_cost_usd: current_cost, + }); + } + + /// Consume and return the most recently computed turn usage record. + /// + /// Returns `None` if no `usage_update` has arrived since the last `take` + /// (or since construction). The caller (turn completion hook) must handle + /// `None` — it means goose did not emit usage for this turn. + #[cfg_attr(not(test), allow(dead_code))] + pub(crate) fn take(&mut self) -> Option { + self.pending.take() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn payload(input: u64, output: u64, cost: Option) -> GooseUsageUpdatePayload { + GooseUsageUpdatePayload { + used: input + output, + context_limit: 200_000, + accumulated_input_tokens: input, + accumulated_output_tokens: output, + accumulated_cost: cost, + } + } + + // ── Delta computation: non-happy paths ───────────────────────────────── + + #[test] + fn first_turn_no_prior_delta_unreliable() { + let mut tracker = GooseUsageTracker::default(); + tracker.record("sess-1", &payload(1000, 200, Some(0.01))); + let usage = tracker.take().expect("should have pending usage"); + + assert_eq!(usage.session_id, "sess-1"); + assert_eq!(usage.turn_seq, 1); + assert!(!usage.delta_reliable, "first turn: delta must be unreliable"); + assert!(usage.turn_input_tokens.is_none()); + assert!(usage.turn_output_tokens.is_none()); + assert!(usage.turn_cost_usd.is_none()); + // Cumulative is still populated. + assert_eq!(usage.cumulative_input_tokens, 1000); + assert_eq!(usage.cumulative_output_tokens, 200); + assert_eq!(usage.cumulative_cost_usd, Some(0.01)); + } + + #[test] + fn counter_decrease_delta_unreliable_no_negatives() { + let mut tracker = GooseUsageTracker::default(); + // Turn 1 — establish baseline. + tracker.record("sess-2", &payload(5000, 1000, Some(0.05))); + let _ = tracker.take(); + + // Turn 2 — counter decreased (harness restart simulation). + tracker.record("sess-2", &payload(100, 50, Some(0.001))); + let usage = tracker.take().expect("pending"); + + assert_eq!(usage.turn_seq, 2); + assert!(!usage.delta_reliable, "counter decrease: delta must be unreliable"); + assert!(usage.turn_input_tokens.is_none(), "no negative delta"); + assert!(usage.turn_output_tokens.is_none(), "no negative delta"); + assert!(usage.turn_cost_usd.is_none()); + } + + #[test] + fn session_restart_new_session_id_treated_as_first_turn() { + let mut tracker = GooseUsageTracker::default(); + // Original session. + tracker.record("sess-a", &payload(8000, 2000, None)); + let _ = tracker.take(); + + // New session_id — restart. Must behave like a first turn. + tracker.record("sess-b", &payload(500, 100, None)); + let usage = tracker.take().expect("pending"); + + assert_eq!(usage.session_id, "sess-b"); + assert_eq!(usage.turn_seq, 1); + assert!(!usage.delta_reliable, "new session: delta must be unreliable"); + assert!(usage.turn_input_tokens.is_none()); + } + + // ── Happy path ───────────────────────────────────────────────────────── + + #[test] + fn second_turn_delta_computed_correctly() { + let mut tracker = GooseUsageTracker::default(); + tracker.record("sess-3", &payload(1000, 200, Some(0.01))); + let _ = tracker.take(); + + tracker.record("sess-3", &payload(1800, 450, Some(0.018))); + let usage = tracker.take().expect("pending"); + + assert_eq!(usage.turn_seq, 2); + assert!(usage.delta_reliable); + assert_eq!(usage.turn_input_tokens, Some(800)); + assert_eq!(usage.turn_output_tokens, Some(250)); + // cost delta: 0.018 - 0.01 = 0.008 (floating-point; use approx check) + let dc = usage.turn_cost_usd.expect("cost delta present"); + assert!((dc - 0.008).abs() < 1e-9, "cost delta: {dc}"); + assert_eq!(usage.cumulative_input_tokens, 1800); + assert_eq!(usage.cumulative_output_tokens, 450); + } + + #[test] + fn take_returns_none_after_drain() { + let mut tracker = GooseUsageTracker::default(); + tracker.record("sess-4", &payload(100, 20, None)); + let _ = tracker.take(); + assert!(tracker.take().is_none(), "take after drain must be None"); + } + + #[test] + fn last_update_wins_multiple_updates_same_turn() { + let mut tracker = GooseUsageTracker::default(); + // Turn 1 — baseline. + tracker.record("sess-5", &payload(1000, 100, None)); + let _ = tracker.take(); + + // Two updates arrive before take() — each advances state independently; + // the second delta is computed from the first update's snapshot. + tracker.record("sess-5", &payload(1500, 150, None)); + tracker.record("sess-5", &payload(2000, 250, None)); + let usage = tracker.take().expect("pending"); + + // Cumulative from the last update. + assert_eq!(usage.cumulative_input_tokens, 2000); + assert_eq!(usage.cumulative_output_tokens, 250); + // Delta is from the previous intermediate snapshot (1500, 150) → (2000, 250). + assert_eq!(usage.turn_input_tokens, Some(500)); + assert_eq!(usage.turn_output_tokens, Some(100)); + } + + // ── Wire deserialization ──────────────────────────────────────────────── + + #[test] + fn notification_deserializes_from_wire_json() { + let raw = serde_json::json!({ + "sessionId": "abc-123", + "update": { + "sessionUpdate": "usage_update", + "used": 50000, + "contextLimit": 200000, + "accumulatedInputTokens": 40000, + "accumulatedOutputTokens": 10000, + "accumulatedCost": 0.42 + } + }); + let notif: GooseSessionUpdateNotification = + serde_json::from_value(raw).expect("deserialization"); + assert_eq!(notif.session_id, "abc-123"); + match notif.update { + GooseSessionUpdateVariant::UsageUpdate(p) => { + assert_eq!(p.accumulated_input_tokens, 40000); + assert_eq!(p.accumulated_output_tokens, 10000); + assert_eq!(p.accumulated_cost, Some(0.42)); + } + GooseSessionUpdateVariant::Other => panic!("expected UsageUpdate"), + } + } + + #[test] + fn other_variant_deserializes_without_error() { + let raw = serde_json::json!({ + "sessionId": "xyz", + "update": { + "sessionUpdate": "status_message", + "status": { "type": "notice", "message": "hi" } + } + }); + let notif: GooseSessionUpdateNotification = + serde_json::from_value(raw).expect("deserialization"); + assert!(matches!(notif.update, GooseSessionUpdateVariant::Other)); + } + + #[test] + fn missing_accumulated_cost_is_none() { + let raw = serde_json::json!({ + "sessionId": "s", + "update": { + "sessionUpdate": "usage_update", + "used": 100, + "contextLimit": 200000, + "accumulatedInputTokens": 80, + "accumulatedOutputTokens": 20 + } + }); + let notif: GooseSessionUpdateNotification = + serde_json::from_value(raw).expect("deserialization"); + match notif.update { + GooseSessionUpdateVariant::UsageUpdate(p) => { + assert!(p.accumulated_cost.is_none()); + } + _ => panic!("expected UsageUpdate"), + } + } +} diff --git a/crates/buzz-acp/src/lib.rs b/crates/buzz-acp/src/lib.rs index 21fa41cac..940a327aa 100644 --- a/crates/buzz-acp/src/lib.rs +++ b/crates/buzz-acp/src/lib.rs @@ -4,11 +4,14 @@ mod acp; mod config; mod engram_fetch; mod filter; +mod goose_usage; mod observer; mod pool; mod queue; mod relay; +pub use goose_usage::GooseTurnUsage; + use std::collections::{HashMap, HashSet}; use std::sync::Arc; use std::time::Duration; From c9a1458f5e3f95461df79a34442bc0f6819ffd5b Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Wed, 1 Jul 2026 18:47:14 -0400 Subject: [PATCH 06/21] fix(relay/core): plug COUNT existence-leak and StopReason forward-compat for NIP-AM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two Thufir-flagged IMPORTANT fixes for PR #1445. Count gate (COUNT existence leak): - Add RESULT_GATED_KINDS = [KIND_DM_VISIBILITY, KIND_AGENT_TURN_METRIC] to kind.rs — explicit list of kinds that require per-event owner verification even for COUNT queries. - Add filter_can_match_result_gated_kinds() to req.rs — returns true when filter has no kinds constraint (wildcard) or includes a result-gated kind. - Add result_gated_count_safe_for_pushdown() to req.rs — safe to use fast SQL count_events() only when filter's #p tag is non-empty and all values equal the authenticated reader's pubkey. - Apply the guard in count.rs (WS): both with-channel and without-channel fast-path conditions now require !needs_result_gated_filtering; both fallback loops now call reader_authorized_for_event per event. - Apply the guard in bridge.rs (HTTP): same two fast-path conditions and same two fallback loops. - 6 unit tests covering wildcard/explicit/safe-pushdown/unsafe cases. StopReason forward-compatibility: - Replace #[derive(Deserialize)] on StopReason with a custom impl that maps any unrecognized string to StopReason::Unknown instead of returning an error; NIP-AM requires consumers to accept future stopReason values. - Add test unknown_stop_reason_maps_to_unknown_not_error: future value tool_limit deserializes to Unknown; token counts remain intact. Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-core/src/agent_turn_metric.rs | 51 ++++++++++- crates/buzz-core/src/kind.rs | 9 ++ crates/buzz-relay/src/api/bridge.rs | 23 +++++ crates/buzz-relay/src/handlers/count.rs | 26 +++++- crates/buzz-relay/src/handlers/req.rs | 100 +++++++++++++++++++++- 5 files changed, 205 insertions(+), 4 deletions(-) diff --git a/crates/buzz-core/src/agent_turn_metric.rs b/crates/buzz-core/src/agent_turn_metric.rs index 325a94f9d..83565c4cd 100644 --- a/crates/buzz-core/src/agent_turn_metric.rs +++ b/crates/buzz-core/src/agent_turn_metric.rs @@ -46,7 +46,11 @@ pub struct TokenCounts { } /// Why a turn ended. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +/// +/// NIP-AM: consumers MUST treat unrecognized `stopReason` values as `Unknown` +/// and keep the token counts valid. Custom deserialization maps any unrecognized +/// string to `Unknown` instead of failing the whole payload. +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] #[serde(rename_all = "snake_case")] pub enum StopReason { /// Model reached a natural end-of-turn. @@ -57,10 +61,24 @@ pub enum StopReason { Cancelled, /// Turn ended with an error. Error, - /// Stop reason is unknown. + /// Stop reason is unknown or unrecognized. Unknown, } +impl<'de> Deserialize<'de> for StopReason { + fn deserialize>(deserializer: D) -> Result { + let s = String::deserialize(deserializer)?; + Ok(match s.as_str() { + "end_turn" => StopReason::EndTurn, + "max_tokens" => StopReason::MaxTokens, + "cancelled" => StopReason::Cancelled, + "error" => StopReason::Error, + "unknown" => StopReason::Unknown, + _ => StopReason::Unknown, + }) + } +} + /// Decrypted payload of a `kind:44200` Agent Turn Metric event. /// /// `harness` and `timestamp` are REQUIRED. All other fields are optional or @@ -261,4 +279,33 @@ mod tests { let back: TokenCounts = serde_json::from_str(&json).unwrap(); assert_eq!(back, counts); } + + #[test] + fn unknown_stop_reason_maps_to_unknown_not_error() { + // NIP-AM: consumers MUST treat unrecognized stopReason values as Unknown; + // the token counts remain valid and the whole payload must not be rejected. + let json = r#"{ + "harness": "goose", + "timestamp": "2026-07-01T20:11:03Z", + "stopReason": "tool_limit", + "turn": { + "inputTokens": 1234, + "outputTokens": 567, + "totalTokens": 1801, + "costUsd": null + } + }"#; + let payload: AgentTurnMetricPayload = + serde_json::from_str(json).expect("payload with future stopReason must parse"); + assert_eq!( + payload.stop_reason, + Some(StopReason::Unknown), + "unrecognized stopReason must map to Unknown" + ); + // Token counts must be preserved. + let turn = payload.turn.expect("turn must be present"); + assert_eq!(turn.input_tokens, Some(1234)); + assert_eq!(turn.output_tokens, Some(567)); + assert_eq!(turn.total_tokens, Some(1801)); + } } diff --git a/crates/buzz-core/src/kind.rs b/crates/buzz-core/src/kind.rs index 1da402cf1..c5ff127ba 100644 --- a/crates/buzz-core/src/kind.rs +++ b/crates/buzz-core/src/kind.rs @@ -110,6 +110,15 @@ pub const KIND_EVENT_REMINDER: u32 = 30300; /// a compile-time bitset or sorted array with binary search for hot-path use. pub const AUTHOR_ONLY_KINDS: &[u32] = &[KIND_EVENT_REMINDER]; +/// Kinds that require a result-level read gate beyond the filter-layer +/// `#p` check: even a reader who knows an event id MUST match the event's +/// `#p` tag to receive the event. This closes the kindless `{ids:[…]}` read +/// path for events whose existence must not be leaked. +/// +/// Used by `filter_can_match_result_gated_kinds` to force the per-event +/// fallback path in COUNT rather than the fast SQL `count_events()`. +pub const RESULT_GATED_KINDS: &[u32] = &[KIND_DM_VISIBILITY, KIND_AGENT_TURN_METRIC]; + /// Kinds whose stored events have `#p`-bound read access — readable only by /// subscribers whose pubkey appears in the event's `#p` tag. /// diff --git a/crates/buzz-relay/src/api/bridge.rs b/crates/buzz-relay/src/api/bridge.rs index 3882363d8..e381193df 100644 --- a/crates/buzz-relay/src/api/bridge.rs +++ b/crates/buzz-relay/src/api/bridge.rs @@ -708,6 +708,15 @@ pub async fn count_events( for filter in &filters { let needs_author_only_filtering = crate::handlers::req::filter_can_match_author_only_kinds(filter); + // Same result-gated guard as the WS COUNT handler: force the per-event + // fallback for filters that can match 44200 or 30622 unless #p=[self] + // is safely pushed down (existence leak otherwise). + let needs_result_gated_filtering = + crate::handlers::req::filter_can_match_result_gated_kinds(filter) + && !crate::handlers::req::result_gated_count_safe_for_pushdown( + filter, + &authed_pubkey_hex, + ); // If filter targets a specific channel, verify access. if let Some(ch_id) = extract_channel_from_filter(filter) { @@ -730,6 +739,7 @@ pub async fn count_events( }); if crate::handlers::req::filter_fully_pushable(filter) && (!needs_author_only_filtering || author_is_self) + && !needs_result_gated_filtering { match state.db.count_events(&query).await { Ok(n) => total += n as u64, @@ -759,6 +769,12 @@ pub async fn count_events( { continue; } + if !buzz_core::filter::reader_authorized_for_event( + &se.event, + &authed_pubkey_hex, + ) { + continue; + } total += 1; } } @@ -787,6 +803,7 @@ pub async fn count_events( }); if crate::handlers::req::filter_fully_pushable(filter) && (!needs_author_only_filtering || author_is_self) + && !needs_result_gated_filtering { query.limit = None; match state.db.count_events(&query).await { @@ -816,6 +833,12 @@ pub async fn count_events( { continue; } + if !buzz_core::filter::reader_authorized_for_event( + &se.event, + &authed_pubkey_hex, + ) { + continue; + } total += 1; } } diff --git a/crates/buzz-relay/src/handlers/count.rs b/crates/buzz-relay/src/handlers/count.rs index 7cb488218..4689826f2 100644 --- a/crates/buzz-relay/src/handlers/count.rs +++ b/crates/buzz-relay/src/handlers/count.rs @@ -6,7 +6,9 @@ use nostr::Filter; use tracing::warn; use crate::connection::{AuthState, ConnectionState}; -use crate::handlers::req::is_author_only_event; +use crate::handlers::req::{ + filter_can_match_result_gated_kinds, is_author_only_event, result_gated_count_safe_for_pushdown, +}; use crate::protocol::RelayMessage; use crate::state::AppState; @@ -100,6 +102,14 @@ pub async fn handle_count( // fast-path count_events() cannot be used because it doesn't do // per-event author filtering. let needs_author_only_filtering = super::req::filter_can_match_author_only_kinds(filter); + // Determine if this filter can match result-gated kinds (44200, 30622) + // that require a per-event owner check. When the fast SQL path would + // count matching rows without calling reader_authorized_for_event, a + // non-owner learns the existence of events they are not allowed to see. + // The only safe pushdown is when #p is pinned to the authenticated + // reader's own pubkey. + let needs_result_gated_filtering = filter_can_match_result_gated_kinds(filter) + && !result_gated_count_safe_for_pushdown(filter, &authed_pubkey_hex); if let Some(ch_id) = extract_channel_from_filter(filter) { // Filter targets a specific channel — verify access. Mirrors the WS @@ -149,6 +159,7 @@ pub async fn handle_count( }); if super::req::filter_fully_pushable(filter) && (!needs_author_only_filtering || author_is_self) + && !needs_result_gated_filtering { match state.db.count_events(&query).await { Ok(n) => total += n as u64, @@ -179,6 +190,12 @@ pub async fn handle_count( if is_author_only_event(&se.event, &pubkey_bytes) { continue; } + if !buzz_core::filter::reader_authorized_for_event( + &se.event, + &authed_pubkey_hex, + ) { + continue; + } total += 1; } } @@ -212,6 +229,7 @@ pub async fn handle_count( }); if super::req::filter_fully_pushable(filter) && (!needs_author_only_filtering || author_is_self) + && !needs_result_gated_filtering { query.limit = None; // COUNT doesn't need a row limit match state.db.count_events(&query).await { @@ -242,6 +260,12 @@ pub async fn handle_count( if is_author_only_event(&se.event, &pubkey_bytes) { continue; } + if !buzz_core::filter::reader_authorized_for_event( + &se.event, + &authed_pubkey_hex, + ) { + continue; + } total += 1; } } diff --git a/crates/buzz-relay/src/handlers/req.rs b/crates/buzz-relay/src/handlers/req.rs index 7fdae503e..8fd003c97 100644 --- a/crates/buzz-relay/src/handlers/req.rs +++ b/crates/buzz-relay/src/handlers/req.rs @@ -8,7 +8,7 @@ use tracing::{debug, warn}; use buzz_core::filter::filters_match; use buzz_core::kind::{ AUTHOR_ONLY_KINDS, KIND_AGENT_ENGRAM, KIND_AGENT_TURN_METRIC, KIND_DM_VISIBILITY, - P_GATED_KINDS, + P_GATED_KINDS, RESULT_GATED_KINDS, }; use buzz_core::tenant::TenantContext; use buzz_db::EventQuery; @@ -1064,6 +1064,44 @@ pub(crate) fn filter_can_match_author_only_kinds(filter: &Filter) -> bool { }) } +/// Returns `true` if the filter CAN match result-gated kinds — meaning it +/// either has no `kinds` constraint (wildcard) or includes at least one kind +/// that carries a per-event result-level read gate (currently +/// `KIND_DM_VISIBILITY` and `KIND_AGENT_TURN_METRIC`). +/// +/// Used by the COUNT handler to force the per-event fallback path instead of +/// the fast SQL `count_events()`, which cannot enforce the owner-only result +/// gate. An existence count leaks private event activity even though no content +/// is returned, violating the NIP-AM / NIP-DM requirement that knowing an id +/// MUST NOT grant access. +pub(crate) fn filter_can_match_result_gated_kinds(filter: &Filter) -> bool { + filter.kinds.as_ref().is_none_or(|ks| { + ks.iter() + .any(|k| RESULT_GATED_KINDS.contains(&(k.as_u16() as u32))) + }) +} + +/// Returns `true` if a result-gated-kind COUNT filter can safely use the fast +/// SQL pushdown path — specifically, when the filter's `#p` tag is non-empty +/// and every entry equals the authenticated reader's pubkey. +/// +/// In that case the SQL `WHERE #p = self` pushdown scopes the query to the +/// reader's own events, so the fast path cannot leak another owner's event +/// existence. This mirrors the owner's own subscription pattern from the NIP: +/// `{kinds:[44200], #p:[self]}`. +/// +/// When this returns `false`, the COUNT handler MUST use the per-event fallback +/// and apply `reader_authorized_for_event` on each row. +pub(crate) fn result_gated_count_safe_for_pushdown( + filter: &Filter, + authed_pubkey_hex: &str, +) -> bool { + let p_tag = nostr::SingleLetterTag::lowercase(nostr::Alphabet::P); + filter.generic_tags.get(&p_tag).is_some_and(|values| { + !values.is_empty() && values.iter().all(|v| v == authed_pubkey_hex) + }) +} + /// Returns `true` if the event is an author-only kind and the requester is NOT /// the author. Used as a per-event filter during historical delivery and fan-out /// to silently omit unauthorized events from mixed-kind result sets. @@ -1635,4 +1673,64 @@ mod tests { .search("x"); assert!(!p_gated_filters_authorized(&[f], &agent)); } + + // ── filter_can_match_result_gated_kinds + result_gated_count_safe_for_pushdown ── + + #[test] + fn result_gated_wildcard_filter_can_match() { + // No kinds constraint — could match anything, including 44200 / 30622. + let f = Filter::new(); + assert!(filter_can_match_result_gated_kinds(&f)); + } + + #[test] + fn result_gated_explicit_44200_can_match() { + let f = Filter::new() + .kind(nostr::Kind::Custom(buzz_core::kind::KIND_AGENT_TURN_METRIC as u16)); + assert!(filter_can_match_result_gated_kinds(&f)); + } + + #[test] + fn result_gated_explicit_30622_can_match() { + let f = Filter::new() + .kind(nostr::Kind::Custom(buzz_core::kind::KIND_DM_VISIBILITY as u16)); + assert!(filter_can_match_result_gated_kinds(&f)); + } + + #[test] + fn result_gated_kind_9_only_cannot_match() { + let f = Filter::new().kind(nostr::Kind::TextNote); + assert!(!filter_can_match_result_gated_kinds(&f)); + } + + #[test] + fn result_gated_safe_pushdown_requires_p_self() { + let (owner, _agent, _other) = three_pubkeys(); + let p_tag = nostr::SingleLetterTag::lowercase(nostr::Alphabet::P); + let f = nostr::Filter::new() + .kind(nostr::Kind::Custom(buzz_core::kind::KIND_AGENT_TURN_METRIC as u16)) + .custom_tags(p_tag, [owner.clone()]); + // Owner querying their own metrics — safe to push down. + assert!(result_gated_count_safe_for_pushdown(&f, &owner)); + } + + #[test] + fn result_gated_safe_pushdown_rejects_when_p_is_other() { + let (owner, _agent, other) = three_pubkeys(); + let p_tag = nostr::SingleLetterTag::lowercase(nostr::Alphabet::P); + let f = nostr::Filter::new() + .kind(nostr::Kind::Custom(buzz_core::kind::KIND_AGENT_TURN_METRIC as u16)) + .custom_tags(p_tag, [other.clone()]); + // Authenticated as owner but #p is someone else — NOT safe. + assert!(!result_gated_count_safe_for_pushdown(&f, &owner)); + } + + #[test] + fn result_gated_safe_pushdown_rejects_when_no_p_tag() { + let (owner, _agent, _other) = three_pubkeys(); + let f = nostr::Filter::new() + .kind(nostr::Kind::Custom(buzz_core::kind::KIND_AGENT_TURN_METRIC as u16)); + // No #p tag — fallback required. + assert!(!result_gated_count_safe_for_pushdown(&f, &owner)); + } } From c9a583fb2cd94fdb814bd1a7a07b01dd4bc50cbf Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Wed, 1 Jul 2026 18:50:50 -0400 Subject: [PATCH 07/21] fix(acp): make GooseUsageTracker turn-scoped and close cost-decrease unreliable gap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two Thufir-flagged IMPORTANT fixes for PR #1446. Turn scoping (setup usage misattributed to zero-update turn): - Add in_flight_session: Option field to GooseUsageTracker. - Add begin_turn(session_id) method: sets in_flight_session and clears pending. Must be called before session/prompt is sent. - record() now only sets pending when in_flight_session matches session_id. It ALWAYS updates the sessions baseline so the next real turn gets a correct delta even from setup notifications. - take() clears in_flight_session after draining pending. - Call goose_usage.begin_turn(session_id) at the top of session_prompt_blocks_with_idle_timeout, before sending the prompt. - Setup notifications that arrive during session/new now correctly update the baseline without polluting the first real turn's pending record. - New tests: setup_notification_before_begin_turn_returns_none (verifies baseline still feeds next delta), record_outside_in_flight_does_not_ clobber_pending. Cost counter decrease -> deltaReliable:false (Fix 2): - When both snapshots have cost and current_cost < prev_cost, the computed delta would be negative — NIP-AM requires delta_reliable: false and all turn fields nulled (same as token-decrease path). - The match arm now returns (None, false) for cost decrease; the outer if/else then overrides delta_reliable=false and nulls turn_input/output. - Cost merely absent on either side stays as-is (null cost, reliable tokens). - turn_seq still increments on cost-decrease turns (Thufir-endorsed). - New tests: cost_decrease_sets_delta_unreliable_and_nulls_all_turn_fields, cost_absent_on_one_side_leaves_tokens_reliable. Existing goose_usage unit tests and acp.rs integration tests updated to call begin_turn() before record(), matching the real call flow. Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-acp/src/acp.rs | 9 ++ crates/buzz-acp/src/goose_usage.rs | 218 +++++++++++++++++++++++++---- 2 files changed, 200 insertions(+), 27 deletions(-) diff --git a/crates/buzz-acp/src/acp.rs b/crates/buzz-acp/src/acp.rs index a3194f76e..8be63c304 100644 --- a/crates/buzz-acp/src/acp.rs +++ b/crates/buzz-acp/src/acp.rs @@ -443,6 +443,11 @@ impl AcpClient { let hard_deadline = tokio::time::Instant::now() + max_duration; self.current_hard_deadline = Some(hard_deadline); + // Mark the usage tracker as in-flight for this turn BEFORE sending the + // prompt so that any setup notifications recorded earlier are not + // misattributed to this turn. + self.goose_usage.begin_turn(session_id); + self.last_prompt_id = Some(self.next_id); let id = self.next_id; self.next_id += 1; @@ -2949,6 +2954,8 @@ mod tests { let mut client = spawn_inert_client().await; assert!(client.take_turn_usage().is_none(), "starts empty"); + // begin_turn before sending the prompt — mirrors the real call flow. + client.goose_usage.begin_turn("s1"); let msg = goose_usage_update_msg("s1", 1000, 200, Some(0.01)); client.handle_goose_usage_update(&msg); @@ -2970,9 +2977,11 @@ mod tests { async fn goose_usage_second_turn_delta_reliable() { let mut client = spawn_inert_client().await; // Turn 1. + client.goose_usage.begin_turn("s2"); client.handle_goose_usage_update(&goose_usage_update_msg("s2", 1000, 200, None)); let _ = client.take_turn_usage(); // Turn 2. + client.goose_usage.begin_turn("s2"); client.handle_goose_usage_update(&goose_usage_update_msg("s2", 1800, 450, None)); let usage = client.take_turn_usage().expect("turn 2 usage"); assert!(usage.delta_reliable); diff --git a/crates/buzz-acp/src/goose_usage.rs b/crates/buzz-acp/src/goose_usage.rs index e7e93d976..4d198382c 100644 --- a/crates/buzz-acp/src/goose_usage.rs +++ b/crates/buzz-acp/src/goose_usage.rs @@ -114,20 +114,58 @@ pub struct GooseTurnUsage { /// Tracks per-session cumulative usage state across turns. /// -/// Cheap to construct; call [`record`] each time a `usage_update` notification -/// arrives, then [`take`] at turn completion to extract the normalized record. +/// Cheap to construct. Usage lifecycle per turn: +/// +/// 1. **`begin_turn(session_id)`** — call this immediately before sending +/// `session/prompt`. Marks the tracker as in-flight for the given session +/// and clears any leftover pending record from a previous turn. Setup +/// notifications that arrive *before* the first `begin_turn` (e.g. during +/// `session/new` setup) will still update the cumulative baseline but will +/// NOT produce a publishable record. +/// 2. **`record(session_id, payload)`** — called for each +/// `_goose/unstable/session/update` notification. Always updates the +/// cumulative baseline; only produces a publishable record when a turn is +/// currently in-flight for the matching session. +/// 3. **`take()`** — called at turn completion by `TurnCompletionGuard`. +/// Drains and returns the pending record (or `None` if goose did not emit +/// usage for this turn) and clears the in-flight marker. #[derive(Debug, Default)] pub(crate) struct GooseUsageTracker { /// One entry per goose `sessionId` ever seen in this process. sessions: HashMap, + /// The session that currently has an in-flight `session/prompt`. + /// `None` means no prompt is in flight; `record()` will still update + /// the baseline but will not set `pending`. + in_flight_session: Option, /// The most recently computed turn usage, ready for `take()`. pending: Option, } impl GooseUsageTracker { - /// Process a `usage_update` notification payload and store the normalized - /// per-turn record. Overwrites any previously stored-but-untaken record - /// (goose may send multiple updates per turn; the last one wins). + /// Mark the start of a new prompt turn for `session_id`. + /// + /// Clears any leftover `pending` record and records which session is + /// in-flight. Must be called before the corresponding `session/prompt` + /// request is sent so that setup notifications received before this call + /// do not become publishable for this turn. + pub(crate) fn begin_turn(&mut self, session_id: &str) { + self.in_flight_session = Some(session_id.to_string()); + self.pending = None; + } + + /// Process a `usage_update` notification payload. + /// + /// **Always** updates the cumulative baseline for `session_id` so that the + /// next in-flight turn can compute a correct delta even if this notification + /// arrived outside a turn (e.g. during `session/new` setup). + /// + /// Only produces a publishable `pending` record when a turn is currently + /// in-flight for the matching `session_id`. If `in_flight_session` is + /// `None` or refers to a different session, the baseline is updated but + /// `pending` is left unchanged. + /// + /// When multiple notifications arrive during the same turn, the last one + /// wins (goose may emit several per turn; each increments `turn_seq`). pub(crate) fn record( &mut self, session_id: &str, @@ -140,28 +178,40 @@ impl GooseUsageTracker { let (delta_reliable, turn_input, turn_output, turn_cost, turn_seq) = match self.sessions.get(session_id) { None => { - // First turn for this session — no baseline yet. + // First notification for this session — no baseline yet. (false, None, None, None, 1u64) } Some(prev) => { let seq = prev.turn_seq + 1; - // Counter decrease → unreliable delta. + // Token counter decrease → unreliable delta. if current_input < prev.last_input || current_output < prev.last_output { (false, None, None, None, seq) } else { let di = current_input - prev.last_input; let dout = current_output - prev.last_output; // Cost delta: only when both snapshots have cost. - let dc = match (current_cost, prev.last_cost) { - (Some(c), Some(p)) if c >= p => Some(c - p), - _ => None, + // A cost *decrease* is also unreliable (NIP-AM: negative + // delta ⇒ delta_reliable false, null all turn fields). + let (dc, cost_reliable) = match (current_cost, prev.last_cost) { + (Some(c), Some(p)) if c >= p => (Some(c - p), true), + (Some(_), Some(_)) => { + // Both present but current < prev — counter decreased. + (None, false) + } + _ => (None, true), // absent on either side: null cost, reliable tokens }; - (true, Some(di), Some(dout), dc, seq) + if cost_reliable { + (true, Some(di), Some(dout), dc, seq) + } else { + // Cost decrease overrides the whole record to unreliable. + (false, None, None, None, seq) + } } } }; - // Update the session state. + // Always advance the session baseline so the next in-flight turn can + // compute a correct delta even if this notification is from setup. self.sessions.insert( session_id.to_string(), SessionState { @@ -172,26 +222,31 @@ impl GooseUsageTracker { }, ); - self.pending = Some(GooseTurnUsage { - session_id: session_id.to_string(), - turn_seq, - delta_reliable, - turn_input_tokens: turn_input, - turn_output_tokens: turn_output, - turn_cost_usd: turn_cost, - cumulative_input_tokens: current_input, - cumulative_output_tokens: current_output, - cumulative_cost_usd: current_cost, - }); + // Only publish a pending record if this session is currently in-flight. + if self.in_flight_session.as_deref() == Some(session_id) { + self.pending = Some(GooseTurnUsage { + session_id: session_id.to_string(), + turn_seq, + delta_reliable, + turn_input_tokens: turn_input, + turn_output_tokens: turn_output, + turn_cost_usd: turn_cost, + cumulative_input_tokens: current_input, + cumulative_output_tokens: current_output, + cumulative_cost_usd: current_cost, + }); + } } - /// Consume and return the most recently computed turn usage record. + /// Consume and return the most recently computed turn usage record, then + /// clear the in-flight marker. /// - /// Returns `None` if no `usage_update` has arrived since the last `take` - /// (or since construction). The caller (turn completion hook) must handle - /// `None` — it means goose did not emit usage for this turn. + /// Returns `None` if no `usage_update` arrived during the current in-flight + /// turn (goose did not emit usage, or no `begin_turn` was called). The + /// caller (`TurnCompletionGuard`) must handle `None`. #[cfg_attr(not(test), allow(dead_code))] pub(crate) fn take(&mut self) -> Option { + self.in_flight_session = None; self.pending.take() } } @@ -210,11 +265,64 @@ mod tests { } } + // ── Turn scoping: setup notifications must not pollute the first real turn ─ + + #[test] + fn setup_notification_before_begin_turn_returns_none() { + // Regression: setup notifications fire during session/new (before any + // prompt). They must update the baseline but must NOT produce a + // publishable record for the next turn. + let mut tracker = GooseUsageTracker::default(); + + // Simulate a setup notification (no begin_turn called yet). + tracker.record("sess-setup", &payload(500, 100, Some(0.005))); + // No turn is in-flight — pending must stay None. + assert!( + tracker.pending.is_none(), + "setup notification must not set pending before begin_turn" + ); + + // The zero-update turn: begin_turn, no notification during prompt, take. + tracker.begin_turn("sess-setup"); + let result = tracker.take(); + assert!( + result.is_none(), + "zero-update turn after setup must return None" + ); + + // Baseline was still updated: the next real turn gets a correct delta. + tracker.begin_turn("sess-setup"); + tracker.record("sess-setup", &payload(1200, 300, Some(0.012))); + let usage = tracker.take().expect("second turn must have usage"); + + assert!(usage.delta_reliable, "baseline fed by setup: delta reliable"); + assert_eq!(usage.turn_input_tokens, Some(700)); // 1200 - 500 + assert_eq!(usage.turn_output_tokens, Some(200)); // 300 - 100 + let dc = usage.turn_cost_usd.expect("cost delta present"); + assert!((dc - 0.007).abs() < 1e-9, "cost delta: {dc}"); + } + + #[test] + fn record_outside_in_flight_does_not_clobber_pending() { + // A notification for a different session_id while another is in-flight + // must not overwrite the pending record. + let mut tracker = GooseUsageTracker::default(); + tracker.begin_turn("sess-a"); + tracker.record("sess-a", &payload(1000, 200, None)); + + // Notification for a different session — should not touch pending. + tracker.record("sess-b", &payload(9000, 3000, None)); + + let usage = tracker.take().expect("sess-a pending must survive"); + assert_eq!(usage.session_id, "sess-a"); + } + // ── Delta computation: non-happy paths ───────────────────────────────── #[test] fn first_turn_no_prior_delta_unreliable() { let mut tracker = GooseUsageTracker::default(); + tracker.begin_turn("sess-1"); tracker.record("sess-1", &payload(1000, 200, Some(0.01))); let usage = tracker.take().expect("should have pending usage"); @@ -234,10 +342,12 @@ mod tests { fn counter_decrease_delta_unreliable_no_negatives() { let mut tracker = GooseUsageTracker::default(); // Turn 1 — establish baseline. + tracker.begin_turn("sess-2"); tracker.record("sess-2", &payload(5000, 1000, Some(0.05))); let _ = tracker.take(); // Turn 2 — counter decreased (harness restart simulation). + tracker.begin_turn("sess-2"); tracker.record("sess-2", &payload(100, 50, Some(0.001))); let usage = tracker.take().expect("pending"); @@ -248,14 +358,63 @@ mod tests { assert!(usage.turn_cost_usd.is_none()); } + #[test] + fn cost_decrease_sets_delta_unreliable_and_nulls_all_turn_fields() { + // Regression for Thufir fix 2: cost counter decrease must set + // delta_reliable = false and null all turn fields (not just cost). + // turn_seq still increments (NIP-AM: seq advances even on unreliable). + let mut tracker = GooseUsageTracker::default(); + // Turn 1 — establish baseline with cost. + tracker.begin_turn("sess-cost"); + tracker.record("sess-cost", &payload(1000, 200, Some(0.10))); + let t1 = tracker.take().expect("t1"); + assert_eq!(t1.turn_seq, 1); + + // Turn 2 — tokens monotone, but cost decreased. + tracker.begin_turn("sess-cost"); + tracker.record("sess-cost", &payload(1500, 350, Some(0.05))); + let usage = tracker.take().expect("t2"); + + assert_eq!(usage.turn_seq, 2, "turn_seq must still increment"); + assert!(!usage.delta_reliable, "cost decrease: delta must be unreliable"); + assert!(usage.turn_input_tokens.is_none(), "all turn fields null on unreliable"); + assert!(usage.turn_output_tokens.is_none()); + assert!(usage.turn_cost_usd.is_none()); + // Cumulative values are unaffected. + assert_eq!(usage.cumulative_input_tokens, 1500); + assert_eq!(usage.cumulative_output_tokens, 350); + assert_eq!(usage.cumulative_cost_usd, Some(0.05)); + } + + #[test] + fn cost_absent_on_one_side_leaves_tokens_reliable() { + // Cost merely absent on either side: null cost, reliable tokens. + let mut tracker = GooseUsageTracker::default(); + tracker.begin_turn("sess-nocost"); + tracker.record("sess-nocost", &payload(1000, 200, Some(0.01))); + let _ = tracker.take(); + + // Turn 2 — no cost reported this time. + tracker.begin_turn("sess-nocost"); + tracker.record("sess-nocost", &payload(1800, 450, None)); + let usage = tracker.take().expect("pending"); + + assert!(usage.delta_reliable, "absent cost must not make delta unreliable"); + assert_eq!(usage.turn_input_tokens, Some(800)); + assert_eq!(usage.turn_output_tokens, Some(250)); + assert!(usage.turn_cost_usd.is_none(), "cost null when absent on either side"); + } + #[test] fn session_restart_new_session_id_treated_as_first_turn() { let mut tracker = GooseUsageTracker::default(); // Original session. + tracker.begin_turn("sess-a"); tracker.record("sess-a", &payload(8000, 2000, None)); let _ = tracker.take(); // New session_id — restart. Must behave like a first turn. + tracker.begin_turn("sess-b"); tracker.record("sess-b", &payload(500, 100, None)); let usage = tracker.take().expect("pending"); @@ -270,9 +429,11 @@ mod tests { #[test] fn second_turn_delta_computed_correctly() { let mut tracker = GooseUsageTracker::default(); + tracker.begin_turn("sess-3"); tracker.record("sess-3", &payload(1000, 200, Some(0.01))); let _ = tracker.take(); + tracker.begin_turn("sess-3"); tracker.record("sess-3", &payload(1800, 450, Some(0.018))); let usage = tracker.take().expect("pending"); @@ -290,6 +451,7 @@ mod tests { #[test] fn take_returns_none_after_drain() { let mut tracker = GooseUsageTracker::default(); + tracker.begin_turn("sess-4"); tracker.record("sess-4", &payload(100, 20, None)); let _ = tracker.take(); assert!(tracker.take().is_none(), "take after drain must be None"); @@ -299,11 +461,13 @@ mod tests { fn last_update_wins_multiple_updates_same_turn() { let mut tracker = GooseUsageTracker::default(); // Turn 1 — baseline. + tracker.begin_turn("sess-5"); tracker.record("sess-5", &payload(1000, 100, None)); let _ = tracker.take(); // Two updates arrive before take() — each advances state independently; // the second delta is computed from the first update's snapshot. + tracker.begin_turn("sess-5"); tracker.record("sess-5", &payload(1500, 150, None)); tracker.record("sess-5", &payload(2000, 250, None)); let usage = tracker.take().expect("pending"); From 6771cafca9ff0ade1a446e9008cf0a2203b83a01 Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Wed, 1 Jul 2026 19:03:02 -0400 Subject: [PATCH 08/21] chore(fmt): run rustfmt on NIP-AM kind 44200 relay changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pure formatting pass — no logic changes. Fixes just fmt-check failure in CI (Rust Lint job 84653617621). Import group reflow in agent_turn_metric.rs and line-length wrapping in ingest.rs and req.rs. Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-core/src/agent_turn_metric.rs | 15 +++++----- crates/buzz-core/src/lib.rs | 4 +-- crates/buzz-relay/src/handlers/ingest.rs | 35 ++++++++++------------- crates/buzz-relay/src/handlers/req.rs | 30 ++++++++++++------- 4 files changed, 43 insertions(+), 41 deletions(-) diff --git a/crates/buzz-core/src/agent_turn_metric.rs b/crates/buzz-core/src/agent_turn_metric.rs index 83565c4cd..8344fca89 100644 --- a/crates/buzz-core/src/agent_turn_metric.rs +++ b/crates/buzz-core/src/agent_turn_metric.rs @@ -9,9 +9,7 @@ use nostr::{Event, Keys, PublicKey}; use serde::{Deserialize, Serialize}; -use crate::observer::{ - decrypt_observer_payload, encrypt_observer_payload, ObserverPayloadError, -}; +use crate::observer::{decrypt_observer_payload, encrypt_observer_payload, ObserverPayloadError}; // Re-export for callers that only need the error type. pub use crate::observer::ObserverPayloadError as AgentTurnMetricError; @@ -208,8 +206,7 @@ mod tests { .sign_with_keys(&agent_keys) .expect("sign"); - let decoded = - decrypt_agent_turn_metric(&owner_keys, &event).expect("decrypt"); + let decoded = decrypt_agent_turn_metric(&owner_keys, &event).expect("decrypt"); assert_eq!(decoded, payload); } @@ -239,9 +236,11 @@ mod tests { #[test] fn delta_reliable_defaults_to_true_when_absent() { let json = r#"{"harness":"goose","timestamp":"2026-07-01T20:11:03Z"}"#; - let payload: AgentTurnMetricPayload = - serde_json::from_str(json).expect("parse"); - assert!(payload.delta_reliable, "deltaReliable should default to true"); + let payload: AgentTurnMetricPayload = serde_json::from_str(json).expect("parse"); + assert!( + payload.delta_reliable, + "deltaReliable should default to true" + ); } #[test] diff --git a/crates/buzz-core/src/lib.rs b/crates/buzz-core/src/lib.rs index 7c3a1c38d..6139ee3ae 100644 --- a/crates/buzz-core/src/lib.rs +++ b/crates/buzz-core/src/lib.rs @@ -5,6 +5,8 @@ //! Provides [`StoredEvent`], filter matching, kind constants, and event //! verification. All other Buzz crates depend on this one. +/// NIP-AM: Agent Turn Metric — payload type and encrypt/decrypt helpers. +pub mod agent_turn_metric; /// Channel and membership enums shared across crates. pub mod channel; /// NIP-AE Agent Engrams — slug grammar, conversation key, d-tag derivation, @@ -24,8 +26,6 @@ pub mod kind; pub mod network; /// Agent observer frame helpers. pub mod observer; -/// NIP-AM: Agent Turn Metric — payload type and encrypt/decrypt helpers. -pub mod agent_turn_metric; /// NIP-AB device pairing — crypto primitives, message types, and errors. pub mod pairing; /// Presence status types shared across crates. diff --git a/crates/buzz-relay/src/handlers/ingest.rs b/crates/buzz-relay/src/handlers/ingest.rs index 38e6ed95e..6c7e91fb7 100644 --- a/crates/buzz-relay/src/handlers/ingest.rs +++ b/crates/buzz-relay/src/handlers/ingest.rs @@ -13,9 +13,8 @@ use buzz_auth::Scope; use buzz_core::kind::{ event_kind_u32, is_identity_archive_request_kind, is_parameterized_replaceable, is_relay_admin_kind, KIND_AGENT_ENGRAM, KIND_AGENT_PROFILE, KIND_AGENT_TURN_METRIC, - KIND_APPROVAL_DENY, - KIND_APPROVAL_GRANT, KIND_AUTH, KIND_BOOKMARK_LIST, KIND_BOOKMARK_SET, KIND_CANVAS, - KIND_CONTACT_LIST, KIND_DELETION, KIND_DM_ADD_MEMBER, KIND_DM_HIDE, KIND_DM_OPEN, + KIND_APPROVAL_DENY, KIND_APPROVAL_GRANT, KIND_AUTH, KIND_BOOKMARK_LIST, KIND_BOOKMARK_SET, + KIND_CANVAS, KIND_CONTACT_LIST, KIND_DELETION, KIND_DM_ADD_MEMBER, KIND_DM_HIDE, KIND_DM_OPEN, KIND_EMOJI_LIST, KIND_EMOJI_SET, KIND_EVENT_REMINDER, KIND_FOLLOW_SET, KIND_FORUM_COMMENT, KIND_FORUM_POST, KIND_FORUM_VOTE, KIND_GIFT_WRAP, KIND_GIT_ISSUE, KIND_GIT_PATCH, KIND_GIT_PR_UPDATE, KIND_GIT_PULL_REQUEST, KIND_GIT_REPO_ANNOUNCEMENT, KIND_GIT_REPO_STATE, @@ -1104,7 +1103,11 @@ fn validate_agent_turn_metric_envelope(event: &nostr::Event) -> Result<(), Strin )); } let p = p_tags[0]; - if p.len() != 64 || !p.bytes().all(|b| b.is_ascii_hexdigit() && !b.is_ascii_uppercase()) { + if p.len() != 64 + || !p + .bytes() + .all(|b| b.is_ascii_hexdigit() && !b.is_ascii_uppercase()) + { return Err("agent-turn-metric `p` tag must be 64 lowercase hex chars".to_string()); } @@ -1116,14 +1119,14 @@ fn validate_agent_turn_metric_envelope(event: &nostr::Event) -> Result<(), Strin } let agent = agent_tags[0]; if agent.len() != 64 - || !agent.bytes().all(|b| b.is_ascii_hexdigit() && !b.is_ascii_uppercase()) + || !agent + .bytes() + .all(|b| b.is_ascii_hexdigit() && !b.is_ascii_uppercase()) { return Err("agent-turn-metric `agent` tag must be 64 lowercase hex chars".to_string()); } if agent != event_pubkey_hex { - return Err( - "agent-turn-metric `agent` tag must equal event pubkey".to_string(), - ); + return Err("agent-turn-metric `agent` tag must equal event pubkey".to_string()); } // Content must look like a NIP-44 v2 ciphertext (length, base64, version prefix). @@ -3122,8 +3125,7 @@ mod tests { fn agent_turn_metric_envelope_rejects_missing_p() { let agent = nostr::Keys::generate(); let agent_hex = agent.public_key().to_hex(); - let ev = - make_agent_turn_metric(&agent, &[&["agent", &agent_hex]], &fake_nip44_v2()); + let ev = make_agent_turn_metric(&agent, &[&["agent", &agent_hex]], &fake_nip44_v2()); let err = validate_agent_turn_metric_envelope(&ev).unwrap_err(); assert!(err.contains("`p` tag"), "got: {err}"); } @@ -3132,8 +3134,7 @@ mod tests { fn agent_turn_metric_envelope_rejects_missing_agent() { let agent = nostr::Keys::generate(); let owner_hex = "b".repeat(64); - let ev = - make_agent_turn_metric(&agent, &[&["p", &owner_hex]], &fake_nip44_v2()); + let ev = make_agent_turn_metric(&agent, &[&["p", &owner_hex]], &fake_nip44_v2()); let err = validate_agent_turn_metric_envelope(&ev).unwrap_err(); assert!(err.contains("`agent` tag"), "got: {err}"); } @@ -3149,10 +3150,7 @@ mod tests { &fake_nip44_v2(), ); let err = validate_agent_turn_metric_envelope(&ev).unwrap_err(); - assert!( - err.contains("equal event pubkey"), - "got: {err}" - ); + assert!(err.contains("equal event pubkey"), "got: {err}"); } #[test] @@ -3167,9 +3165,6 @@ mod tests { ); let err = validate_agent_turn_metric_envelope(&ev).unwrap_err(); // error comes from validate_engram_nip44_content with label replaced - assert!( - err.contains("agent-turn-metric"), - "got: {err}" - ); + assert!(err.contains("agent-turn-metric"), "got: {err}"); } } diff --git a/crates/buzz-relay/src/handlers/req.rs b/crates/buzz-relay/src/handlers/req.rs index 8fd003c97..1b1ce4483 100644 --- a/crates/buzz-relay/src/handlers/req.rs +++ b/crates/buzz-relay/src/handlers/req.rs @@ -1097,9 +1097,10 @@ pub(crate) fn result_gated_count_safe_for_pushdown( authed_pubkey_hex: &str, ) -> bool { let p_tag = nostr::SingleLetterTag::lowercase(nostr::Alphabet::P); - filter.generic_tags.get(&p_tag).is_some_and(|values| { - !values.is_empty() && values.iter().all(|v| v == authed_pubkey_hex) - }) + filter + .generic_tags + .get(&p_tag) + .is_some_and(|values| !values.is_empty() && values.iter().all(|v| v == authed_pubkey_hex)) } /// Returns `true` if the event is an author-only kind and the requester is NOT @@ -1685,15 +1686,17 @@ mod tests { #[test] fn result_gated_explicit_44200_can_match() { - let f = Filter::new() - .kind(nostr::Kind::Custom(buzz_core::kind::KIND_AGENT_TURN_METRIC as u16)); + let f = Filter::new().kind(nostr::Kind::Custom( + buzz_core::kind::KIND_AGENT_TURN_METRIC as u16, + )); assert!(filter_can_match_result_gated_kinds(&f)); } #[test] fn result_gated_explicit_30622_can_match() { - let f = Filter::new() - .kind(nostr::Kind::Custom(buzz_core::kind::KIND_DM_VISIBILITY as u16)); + let f = Filter::new().kind(nostr::Kind::Custom( + buzz_core::kind::KIND_DM_VISIBILITY as u16, + )); assert!(filter_can_match_result_gated_kinds(&f)); } @@ -1708,7 +1711,9 @@ mod tests { let (owner, _agent, _other) = three_pubkeys(); let p_tag = nostr::SingleLetterTag::lowercase(nostr::Alphabet::P); let f = nostr::Filter::new() - .kind(nostr::Kind::Custom(buzz_core::kind::KIND_AGENT_TURN_METRIC as u16)) + .kind(nostr::Kind::Custom( + buzz_core::kind::KIND_AGENT_TURN_METRIC as u16, + )) .custom_tags(p_tag, [owner.clone()]); // Owner querying their own metrics — safe to push down. assert!(result_gated_count_safe_for_pushdown(&f, &owner)); @@ -1719,7 +1724,9 @@ mod tests { let (owner, _agent, other) = three_pubkeys(); let p_tag = nostr::SingleLetterTag::lowercase(nostr::Alphabet::P); let f = nostr::Filter::new() - .kind(nostr::Kind::Custom(buzz_core::kind::KIND_AGENT_TURN_METRIC as u16)) + .kind(nostr::Kind::Custom( + buzz_core::kind::KIND_AGENT_TURN_METRIC as u16, + )) .custom_tags(p_tag, [other.clone()]); // Authenticated as owner but #p is someone else — NOT safe. assert!(!result_gated_count_safe_for_pushdown(&f, &owner)); @@ -1728,8 +1735,9 @@ mod tests { #[test] fn result_gated_safe_pushdown_rejects_when_no_p_tag() { let (owner, _agent, _other) = three_pubkeys(); - let f = nostr::Filter::new() - .kind(nostr::Kind::Custom(buzz_core::kind::KIND_AGENT_TURN_METRIC as u16)); + let f = nostr::Filter::new().kind(nostr::Kind::Custom( + buzz_core::kind::KIND_AGENT_TURN_METRIC as u16, + )); // No #p tag — fallback required. assert!(!result_gated_count_safe_for_pushdown(&f, &owner)); } From f3f751ca0ba4baf29c2facfbcc71d028d09c9765 Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Wed, 1 Jul 2026 19:03:02 -0400 Subject: [PATCH 09/21] chore(fmt): run rustfmt on NIP-AM goose adapter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pure formatting pass — no logic changes. Fixes just fmt-check failure in CI (Rust Lint job 84654119247). Line-length wrapping in acp.rs and goose_usage.rs (record signature, assert! calls). Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-acp/src/acp.rs | 10 ++++--- crates/buzz-acp/src/goose_usage.rs | 46 +++++++++++++++++++++--------- 2 files changed, 39 insertions(+), 17 deletions(-) diff --git a/crates/buzz-acp/src/acp.rs b/crates/buzz-acp/src/acp.rs index 8be63c304..4d63dbe6b 100644 --- a/crates/buzz-acp/src/acp.rs +++ b/crates/buzz-acp/src/acp.rs @@ -13,8 +13,8 @@ use tokio::io::AsyncWriteExt; use tokio::process::{Child, ChildStdin, ChildStdout}; use tokio_util::codec::{FramedRead, LinesCodec, LinesCodecError}; -use crate::observer::{ObserverContext, ObserverHandle}; use crate::goose_usage::{GooseTurnUsage, GooseUsageTracker}; +use crate::observer::{ObserverContext, ObserverHandle}; /// Maximum allowed size of a single NDJSON line from the agent's stdout. /// Lines exceeding this limit are rejected to prevent OOM from rogue agents. @@ -1884,8 +1884,7 @@ mod tests { ); assert!(msg["params"]["clientCapabilities"].is_object()); assert_eq!( - msg["params"]["clientCapabilities"]["_meta"]["goose"]["customNotifications"] - .as_bool(), + msg["params"]["clientCapabilities"]["_meta"]["goose"]["customNotifications"].as_bool(), Some(true), "goose customNotifications capability must be advertised" ); @@ -2970,7 +2969,10 @@ mod tests { assert_eq!(usage.cumulative_cost_usd, Some(0.01)); // Second take must be None. - assert!(client.take_turn_usage().is_none(), "take after drain is None"); + assert!( + client.take_turn_usage().is_none(), + "take after drain is None" + ); } #[tokio::test] diff --git a/crates/buzz-acp/src/goose_usage.rs b/crates/buzz-acp/src/goose_usage.rs index 4d198382c..0c68d8913 100644 --- a/crates/buzz-acp/src/goose_usage.rs +++ b/crates/buzz-acp/src/goose_usage.rs @@ -166,11 +166,7 @@ impl GooseUsageTracker { /// /// When multiple notifications arrive during the same turn, the last one /// wins (goose may emit several per turn; each increments `turn_seq`). - pub(crate) fn record( - &mut self, - session_id: &str, - payload: &GooseUsageUpdatePayload, - ) { + pub(crate) fn record(&mut self, session_id: &str, payload: &GooseUsageUpdatePayload) { let current_input = payload.accumulated_input_tokens; let current_output = payload.accumulated_output_tokens; let current_cost = payload.accumulated_cost; @@ -295,7 +291,10 @@ mod tests { tracker.record("sess-setup", &payload(1200, 300, Some(0.012))); let usage = tracker.take().expect("second turn must have usage"); - assert!(usage.delta_reliable, "baseline fed by setup: delta reliable"); + assert!( + usage.delta_reliable, + "baseline fed by setup: delta reliable" + ); assert_eq!(usage.turn_input_tokens, Some(700)); // 1200 - 500 assert_eq!(usage.turn_output_tokens, Some(200)); // 300 - 100 let dc = usage.turn_cost_usd.expect("cost delta present"); @@ -328,7 +327,10 @@ mod tests { assert_eq!(usage.session_id, "sess-1"); assert_eq!(usage.turn_seq, 1); - assert!(!usage.delta_reliable, "first turn: delta must be unreliable"); + assert!( + !usage.delta_reliable, + "first turn: delta must be unreliable" + ); assert!(usage.turn_input_tokens.is_none()); assert!(usage.turn_output_tokens.is_none()); assert!(usage.turn_cost_usd.is_none()); @@ -352,7 +354,10 @@ mod tests { let usage = tracker.take().expect("pending"); assert_eq!(usage.turn_seq, 2); - assert!(!usage.delta_reliable, "counter decrease: delta must be unreliable"); + assert!( + !usage.delta_reliable, + "counter decrease: delta must be unreliable" + ); assert!(usage.turn_input_tokens.is_none(), "no negative delta"); assert!(usage.turn_output_tokens.is_none(), "no negative delta"); assert!(usage.turn_cost_usd.is_none()); @@ -376,8 +381,14 @@ mod tests { let usage = tracker.take().expect("t2"); assert_eq!(usage.turn_seq, 2, "turn_seq must still increment"); - assert!(!usage.delta_reliable, "cost decrease: delta must be unreliable"); - assert!(usage.turn_input_tokens.is_none(), "all turn fields null on unreliable"); + assert!( + !usage.delta_reliable, + "cost decrease: delta must be unreliable" + ); + assert!( + usage.turn_input_tokens.is_none(), + "all turn fields null on unreliable" + ); assert!(usage.turn_output_tokens.is_none()); assert!(usage.turn_cost_usd.is_none()); // Cumulative values are unaffected. @@ -399,10 +410,16 @@ mod tests { tracker.record("sess-nocost", &payload(1800, 450, None)); let usage = tracker.take().expect("pending"); - assert!(usage.delta_reliable, "absent cost must not make delta unreliable"); + assert!( + usage.delta_reliable, + "absent cost must not make delta unreliable" + ); assert_eq!(usage.turn_input_tokens, Some(800)); assert_eq!(usage.turn_output_tokens, Some(250)); - assert!(usage.turn_cost_usd.is_none(), "cost null when absent on either side"); + assert!( + usage.turn_cost_usd.is_none(), + "cost null when absent on either side" + ); } #[test] @@ -420,7 +437,10 @@ mod tests { assert_eq!(usage.session_id, "sess-b"); assert_eq!(usage.turn_seq, 1); - assert!(!usage.delta_reliable, "new session: delta must be unreliable"); + assert!( + !usage.delta_reliable, + "new session: delta must be unreliable" + ); assert!(usage.turn_input_tokens.is_none()); } From 3011944d09944c1d7bcfdb573bc57db00a2824fd Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Thu, 2 Jul 2026 10:49:39 -0400 Subject: [PATCH 10/21] feat(acp,buzz-agent): publish NIP-AM kind 44200 agent turn metrics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire emit hook into buzz-acp pool.rs: at turn completion, drain take_turn_usage() and publish a kind 44200 NIP-AM metric event via publish_agent_turn_metric(). Covers all exit paths (Ok, AgentExited, IdleTimeout, HardTimeout, general error). Best-effort — failures log WARN and never fail the turn. Add native buzz-agent adapter: track per-turn input/output token accumulators in RunCtx (summed across all LLM rounds), parse output_tokens from all provider response formats (Anthropic, OpenAI, Responses API), build MetricPublisher from BUZZ_PRIVATE_KEY / BUZZ_RELAY_URL / BUZZ_AGENT_OWNER_PUBKEY env vars with NIP-98 auth, publish at session/prompt completion. Tests: acp_stop_to_core mapping, publish no-op on missing usage/owner, encrypt+sign path executes; output_tokens parsing for all three providers; MetricPublisher from_env noop/configured. Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- Cargo.lock | 4 + crates/buzz-acp/src/acp.rs | 3 +- crates/buzz-acp/src/pool.rs | 338 ++++++++++++++++++++++++++++++++ crates/buzz-agent/Cargo.toml | 4 + crates/buzz-agent/src/agent.rs | 18 ++ crates/buzz-agent/src/lib.rs | 47 +++++ crates/buzz-agent/src/llm.rs | 77 ++++++++ crates/buzz-agent/src/metric.rs | 281 ++++++++++++++++++++++++++ crates/buzz-agent/src/types.rs | 4 + 9 files changed, 774 insertions(+), 2 deletions(-) create mode 100644 crates/buzz-agent/src/metric.rs diff --git a/Cargo.lock b/Cargo.lock index ebbe9def5..9801b93eb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -780,9 +780,12 @@ dependencies = [ "async-trait", "axum", "base64", + "buzz-core", + "chrono", "getrandom 0.4.2", "hex", "nix 0.31.3", + "nostr", "reqwest 0.13.3", "rmcp", "serde", @@ -794,6 +797,7 @@ dependencies = [ "tracing", "tracing-subscriber", "urlencoding", + "uuid", "webbrowser", ] diff --git a/crates/buzz-acp/src/acp.rs b/crates/buzz-acp/src/acp.rs index 4d63dbe6b..d8ba8dfeb 100644 --- a/crates/buzz-acp/src/acp.rs +++ b/crates/buzz-acp/src/acp.rs @@ -531,9 +531,8 @@ impl AcpClient { /// agent). Must be called at most once per turn; subsequent calls return /// `None` until the next `usage_update` notification is recorded. /// - /// Intended for consumption by `TurnCompletionGuard` in `pool.rs` to + /// Intended for consumption by `publish_agent_turn_metric` in `pool.rs` to /// publish a kind 44200 NIP-AM event. - #[cfg_attr(not(test), allow(dead_code))] pub fn take_turn_usage(&mut self) -> Option { self.goose_usage.take() } diff --git a/crates/buzz-acp/src/pool.rs b/crates/buzz-acp/src/pool.rs index b71c60839..2774d65a4 100644 --- a/crates/buzz-acp/src/pool.rs +++ b/crates/buzz-acp/src/pool.rs @@ -1676,6 +1676,18 @@ pub async fn run_prompt_task( agent.state.invalidate(&source); } + let core_stop = acp_stop_to_core(&stop_reason); + let usage = agent.acp.take_turn_usage(); + publish_agent_turn_metric( + &ctx, + usage, + observer_channel_id, + &session_id, + &turn_id, + Some(core_stop), + ) + .await; + send_prompt_result( &result_tx, agent, @@ -1687,6 +1699,16 @@ pub async fn run_prompt_task( Err(AcpError::AgentExited) => { tracing::error!(target: "pool::prompt", "agent {} exited during prompt", agent.index); agent.state.invalidate_all(); + let usage = agent.acp.take_turn_usage(); + publish_agent_turn_metric( + &ctx, + usage, + observer_channel_id, + &session_id, + &turn_id, + Some(buzz_core::agent_turn_metric::StopReason::Error), + ) + .await; send_prompt_result( &result_tx, agent, @@ -1708,6 +1730,16 @@ pub async fn run_prompt_task( { Ok(stop_reason) => { log_stop_reason(&source, &stop_reason); + let usage = agent.acp.take_turn_usage(); + publish_agent_turn_metric( + &ctx, + usage, + observer_channel_id, + &session_id, + &turn_id, + Some(buzz_core::agent_turn_metric::StopReason::Cancelled), + ) + .await; // Timeout triggers respawn in handle_prompt_result — // session state will be discarded with the old agent. send_prompt_result( @@ -1725,6 +1757,16 @@ pub async fn run_prompt_task( agent.index ); agent.state.invalidate_all(); + let usage = agent.acp.take_turn_usage(); + publish_agent_turn_metric( + &ctx, + usage, + observer_channel_id, + &session_id, + &turn_id, + Some(buzz_core::agent_turn_metric::StopReason::Error), + ) + .await; send_prompt_result( &result_tx, agent, @@ -1739,6 +1781,16 @@ pub async fn run_prompt_task( "cancel_with_cleanup error: {e} — invalidating session" ); agent.state.invalidate(&source); + let usage = agent.acp.take_turn_usage(); + publish_agent_turn_metric( + &ctx, + usage, + observer_channel_id, + &session_id, + &turn_id, + Some(buzz_core::agent_turn_metric::StopReason::Error), + ) + .await; send_prompt_result( &result_tx, agent, @@ -1756,6 +1808,16 @@ pub async fn run_prompt_task( ctx.max_turn_duration.as_secs() ); agent.state.invalidate_all(); + let usage = agent.acp.take_turn_usage(); + publish_agent_turn_metric( + &ctx, + usage, + observer_channel_id, + &session_id, + &turn_id, + Some(buzz_core::agent_turn_metric::StopReason::Error), + ) + .await; send_prompt_result( &result_tx, agent, @@ -1772,6 +1834,16 @@ pub async fn run_prompt_task( if !matches!(e, AcpError::AgentError(_)) { agent.state.invalidate(&source); } + let usage = agent.acp.take_turn_usage(); + publish_agent_turn_metric( + &ctx, + usage, + observer_channel_id, + &session_id, + &turn_id, + Some(buzz_core::agent_turn_metric::StopReason::Error), + ) + .await; send_prompt_result( &result_tx, agent, @@ -2557,6 +2629,131 @@ impl Drop for TurnCompletionGuard { } } +/// Map an ACP `StopReason` to the NIP-AM `StopReason` used in kind 44200 payloads. +fn acp_stop_to_core(r: &StopReason) -> buzz_core::agent_turn_metric::StopReason { + use buzz_core::agent_turn_metric::StopReason as CoreStop; + match r { + StopReason::EndTurn => CoreStop::EndTurn, + StopReason::Cancelled => CoreStop::Cancelled, + StopReason::MaxTokens => CoreStop::MaxTokens, + StopReason::MaxTurnRequests => CoreStop::Unknown, + StopReason::Refusal => CoreStop::Unknown, + } +} + +/// Best-effort: build and publish a `kind:44200` NIP-AM agent turn metric event. +/// +/// Does nothing when `usage` is `None` (goose emitted no usage notification +/// for this turn) or when `owner_pubkey` is unconfigured (no NIP-AO identity). +/// Errors are logged at WARN and never surface to the caller — metric +/// publishing must never fail a turn. +async fn publish_agent_turn_metric( + ctx: &PromptContext, + usage: Option, + channel_id: Option, + session_id: &str, + turn_id: &str, + stop_reason: Option, +) { + use buzz_core::agent_turn_metric::{AgentTurnMetricPayload, TokenCounts}; + use nostr::{EventBuilder, Kind, Tag}; + + let (usage, owner_pk) = match (usage, ctx.agent_owner_pubkey.as_ref()) { + (Some(u), Some(pk)) => (u, pk), + _ => return, + }; + + let turn_counts = if usage.delta_reliable { + Some(TokenCounts { + input_tokens: usage.turn_input_tokens, + output_tokens: usage.turn_output_tokens, + total_tokens: None, + cost_usd: usage.turn_cost_usd, + cache_read_tokens: None, + cache_write_tokens: None, + }) + } else { + None + }; + let cumulative_counts = Some(TokenCounts { + input_tokens: Some(usage.cumulative_input_tokens), + output_tokens: Some(usage.cumulative_output_tokens), + total_tokens: None, + cost_usd: usage.cumulative_cost_usd, + cache_read_tokens: None, + cache_write_tokens: None, + }); + let timestamp = chrono::Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Millis, true); + let payload = AgentTurnMetricPayload { + harness: "goose".to_string(), + model: None, + channel_id: channel_id.map(|id| id.to_string()), + session_id: Some(usage.session_id.clone()), + turn_id: Some(turn_id.to_string()), + turn_seq: Some(usage.turn_seq), + timestamp, + turn: turn_counts, + cumulative: cumulative_counts, + delta_reliable: usage.delta_reliable, + stop_reason, + }; + let ciphertext = match buzz_core::agent_turn_metric::encrypt_agent_turn_metric( + &ctx.agent_keys, + owner_pk, + &payload, + ) { + Ok(c) => c, + Err(e) => { + tracing::warn!( + target: "pool::metrics", + session_id, + turn_id, + "NIP-AM: encrypt failed: {e}" + ); + return; + } + }; + let agent_hex = ctx.agent_keys.public_key().to_hex(); + let owner_hex = owner_pk.to_hex(); + let event = match EventBuilder::new( + Kind::Custom(buzz_core::kind::KIND_AGENT_TURN_METRIC as u16), + ciphertext, + ) + .tags([ + Tag::parse(["p", &owner_hex]).expect("p tag"), + Tag::parse(["agent", &agent_hex]).expect("agent tag"), + ]) + .sign_with_keys(&ctx.agent_keys) + { + Ok(e) => e, + Err(e) => { + tracing::warn!( + target: "pool::metrics", + session_id, + turn_id, + "NIP-AM: sign failed: {e}" + ); + return; + } + }; + const METRIC_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(3); + match tokio::time::timeout(METRIC_TIMEOUT, ctx.rest_client.submit_event(&event)).await { + Ok(Ok(_)) => {} + Ok(Err(e)) => tracing::warn!( + target: "pool::metrics", + session_id, + turn_id, + "NIP-AM: publish failed: {e}" + ), + Err(_) => tracing::warn!( + target: "pool::metrics", + session_id, + turn_id, + "NIP-AM: publish timed out" + ), + } +} + const REACTION_SEEN: &str = "👀"; const REACTION_WORKING: &str = "💬"; @@ -3613,4 +3810,145 @@ mod tests { result.agent.acp.install_steer_rx(steer_rx); // Reaching here without a panic is the test. } + + // ── NIP-AM emit-hook unit tests ──────────────────────────────────────── + + /// `acp_stop_to_core` maps all ACP stop reasons to the correct NIP-AM + /// variants without panicking on any input. + #[test] + fn test_acp_stop_to_core_maps_all_variants() { + use buzz_core::agent_turn_metric::StopReason as CoreStop; + assert_eq!(acp_stop_to_core(&StopReason::EndTurn), CoreStop::EndTurn); + assert_eq!( + acp_stop_to_core(&StopReason::Cancelled), + CoreStop::Cancelled + ); + assert_eq!( + acp_stop_to_core(&StopReason::MaxTokens), + CoreStop::MaxTokens + ); + assert_eq!( + acp_stop_to_core(&StopReason::MaxTurnRequests), + CoreStop::Unknown + ); + assert_eq!(acp_stop_to_core(&StopReason::Refusal), CoreStop::Unknown); + } + + /// `publish_agent_turn_metric` is a no-op when `usage` is `None`. + #[tokio::test] + async fn test_publish_agent_turn_metric_noop_on_no_usage() { + let ctx = make_prompt_context_no_owner(); + // usage = None → early return, no panic. + publish_agent_turn_metric( + &ctx, + None, + None, + "sess-1", + "turn-1", + Some(buzz_core::agent_turn_metric::StopReason::EndTurn), + ) + .await; + } + + /// `publish_agent_turn_metric` is a no-op when `owner_pubkey` is absent. + #[tokio::test] + async fn test_publish_agent_turn_metric_noop_on_no_owner() { + let ctx = make_prompt_context_no_owner(); + let usage = crate::goose_usage::GooseTurnUsage { + session_id: "sess-1".to_string(), + turn_seq: 1, + delta_reliable: true, + turn_input_tokens: Some(100), + turn_output_tokens: Some(50), + turn_cost_usd: None, + cumulative_input_tokens: 100, + cumulative_output_tokens: 50, + cumulative_cost_usd: None, + }; + // owner_pubkey = None → early return, no panic. + publish_agent_turn_metric( + &ctx, + Some(usage), + None, + "sess-1", + "turn-1", + Some(buzz_core::agent_turn_metric::StopReason::EndTurn), + ) + .await; + } + + /// `publish_agent_turn_metric` encrypts the payload when owner is present + /// (the HTTP submit will fail in tests, but we verify no panic and the + /// encrypt/sign path executes). + #[tokio::test] + async fn test_publish_agent_turn_metric_encrypts_with_owner() { + let agent_keys = nostr::Keys::generate(); + let owner_keys = nostr::Keys::generate(); + let ctx = make_prompt_context_with_owner(&agent_keys, owner_keys.public_key()); + let usage = crate::goose_usage::GooseTurnUsage { + session_id: "sess-1".to_string(), + turn_seq: 1, + delta_reliable: true, + turn_input_tokens: Some(200), + turn_output_tokens: Some(80), + turn_cost_usd: Some(0.001), + cumulative_input_tokens: 200, + cumulative_output_tokens: 80, + cumulative_cost_usd: Some(0.001), + }; + // Will try to publish and fail (no real relay) but must not panic. + publish_agent_turn_metric( + &ctx, + Some(usage), + Some(uuid::Uuid::new_v4()), + "sess-1", + "turn-1", + Some(buzz_core::agent_turn_metric::StopReason::EndTurn), + ) + .await; + } + + fn make_prompt_context_no_owner() -> PromptContext { + let agent_keys = nostr::Keys::generate(); + make_prompt_context_impl(&agent_keys, None) + } + + fn make_prompt_context_with_owner( + agent_keys: &nostr::Keys, + owner_pubkey: nostr::PublicKey, + ) -> PromptContext { + make_prompt_context_impl(agent_keys, Some(owner_pubkey)) + } + + fn make_prompt_context_impl( + agent_keys: &nostr::Keys, + owner_pubkey: Option, + ) -> PromptContext { + use crate::relay::RestClient; + PromptContext { + mcp_servers: vec![], + initial_message: None, + idle_timeout: Duration::from_secs(60), + max_turn_duration: Duration::from_secs(120), + turn_liveness_interval: Duration::ZERO, + dedup_mode: DedupMode::Drop, + system_prompt: None, + heartbeat_prompt: None, + base_prompt: None, + cwd: ".".to_string(), + rest_client: RestClient { + http: reqwest::Client::new(), + base_url: "http://127.0.0.1:0".to_string(), + keys: agent_keys.clone(), + auth_tag_json: None, + }, + channel_info: std::collections::HashMap::new(), + context_message_limit: 0, + max_turns_per_session: 0, + permission_mode: PermissionMode::Default, + agent_keys: agent_keys.clone(), + agent_owner_pubkey: owner_pubkey, + memory_enabled: false, + } + } } diff --git a/crates/buzz-agent/Cargo.toml b/crates/buzz-agent/Cargo.toml index 7889ad34a..cf5bb37eb 100644 --- a/crates/buzz-agent/Cargo.toml +++ b/crates/buzz-agent/Cargo.toml @@ -43,6 +43,10 @@ hex = { workspace = true } sha2 = { workspace = true } urlencoding = "2" webbrowser = "1" +buzz-core = { workspace = true } +nostr = { workspace = true } +chrono = { workspace = true } +uuid = { workspace = true } [target.'cfg(unix)'.dependencies] nix = { version = "0.31", default-features = false, features = ["signal", "process"] } diff --git a/crates/buzz-agent/src/agent.rs b/crates/buzz-agent/src/agent.rs index 7474f0e4b..28c691d81 100644 --- a/crates/buzz-agent/src/agent.rs +++ b/crates/buzz-agent/src/agent.rs @@ -56,6 +56,12 @@ pub struct RunCtx<'a> { /// which the exact-but-stale token count would otherwise miss. Cleared and /// preserved in lockstep with `last_request_input_tokens`. pub last_request_history_bytes: &'a mut Option, + /// Accumulated input tokens across all LLM rounds in this turn, for + /// NIP-AM metric publishing. Reset to `None` at turn start in `run()`. + pub turn_input_tokens: &'a mut Option, + /// Accumulated output tokens across all LLM rounds in this turn, for + /// NIP-AM metric publishing. Reset to `None` at turn start in `run()`. + pub turn_output_tokens: &'a mut Option, } impl RunCtx<'_> { @@ -71,6 +77,10 @@ impl RunCtx<'_> { } self.history.push(HistoryItem::User(user_text)); + // Reset per-turn token accumulators for this prompt. + *self.turn_input_tokens = None; + *self.turn_output_tokens = None; + let mut round = 0u32; // Per-prompt latch: only used to detect "LLM said end_turn twice // in a row with no tool calls between" within this single prompt. @@ -158,6 +168,14 @@ impl RunCtx<'_> { .map(HistoryItem::context_pressure_bytes) .sum(), ); + // Accumulate per-turn input tokens for NIP-AM metric publishing. + *self.turn_input_tokens = + Some(self.turn_input_tokens.unwrap_or(0).saturating_add(tokens)); + } + // Accumulate per-turn output tokens for NIP-AM metric publishing. + if let Some(out) = response.output_tokens { + *self.turn_output_tokens = + Some(self.turn_output_tokens.unwrap_or(0).saturating_add(out)); } if !response.reasoning.is_empty() { diff --git a/crates/buzz-agent/src/lib.rs b/crates/buzz-agent/src/lib.rs index 5cc8e0b4f..1071e40d4 100644 --- a/crates/buzz-agent/src/lib.rs +++ b/crates/buzz-agent/src/lib.rs @@ -8,6 +8,7 @@ mod handoff; mod hints; mod llm; mod mcp; +mod metric; pub mod types; mod wire; @@ -39,6 +40,7 @@ struct App { cfg: Config, llm: Arc, sessions: Mutex>, + metric_publisher: Arc, } struct Session { @@ -71,6 +73,9 @@ struct Session { /// with it so the gate can account for history appended since. last_request_history_bytes: Option, effective_system_prompt: Arc, + /// Monotonically increasing per-session turn counter for NIP-AM metric events. + /// Incremented on each `session/prompt` request. + turn_seq: u64, } fn die(msg: String) -> ! { @@ -135,6 +140,7 @@ async fn async_main() { cfg, llm, sessions: Mutex::new(HashMap::new()), + metric_publisher: Arc::new(metric::MetricPublisher::from_env()), }); let (wire_tx, wire_rx) = mpsc::channel::(64); let writer = tokio::spawn(wire::writer_task(wire_rx)); @@ -365,6 +371,7 @@ async fn session_new(app: &Arc, id: Value, params: Value, wire_tx: &WireSen last_request_input_tokens: None, last_request_history_bytes: None, effective_system_prompt, + turn_seq: 0, }, ); drop(sessions); @@ -489,6 +496,7 @@ async fn run_prompt(app: Arc, id: Value, params: Value, wire_tx: WireSender effective_system_prompt, run_id, mut steer_rx, + turn_seq, ) = match acquire_session(&app, &p.session_id).await { Ok(v) => v, Err(reason) => { @@ -512,6 +520,8 @@ async fn run_prompt(app: Arc, id: Value, params: Value, wire_tx: WireSender ), ) .await; + let mut turn_input_tokens: Option = None; + let mut turn_output_tokens: Option = None; let mut ctx = RunCtx { cfg: &app.cfg, session_id: &sid, @@ -528,6 +538,8 @@ async fn run_prompt(app: Arc, id: Value, params: Value, wire_tx: WireSender stop_rejections: &mut stop_rejections, last_request_input_tokens: &mut last_request_input_tokens, last_request_history_bytes: &mut last_request_history_bytes, + turn_input_tokens: &mut turn_input_tokens, + turn_output_tokens: &mut turn_output_tokens, }; let result = ctx.run(p.prompt).await; if let Some(s) = app.sessions.lock().await.get_mut(&sid) { @@ -542,6 +554,22 @@ async fn run_prompt(app: Arc, id: Value, params: Value, wire_tx: WireSender s.last_request_input_tokens = last_request_input_tokens; s.last_request_history_bytes = last_request_history_bytes; } + // Best-effort: publish NIP-AM kind 44200 agent turn metric. Never fails + // the turn — errors are logged at WARN inside MetricPublisher::publish. + let nip_am_stop = match &result { + Ok(stop) => agent_stop_to_nip_am(stop), + Err(_) => buzz_core::agent_turn_metric::StopReason::Error, + }; + app.metric_publisher + .publish( + &sid, + turn_seq, + &run_id, + turn_input_tokens, + turn_output_tokens, + nip_am_stop, + ) + .await; match result { Ok(stop) => { wire::send( @@ -572,6 +600,7 @@ async fn acquire_session( Arc, String, mpsc::UnboundedReceiver>, + u64, ), &'static str, > { @@ -593,6 +622,10 @@ async fn acquire_session( s.active_run_id = Some(run_id.clone()); let (steer_tx, steer_rx) = mpsc::unbounded_channel(); s.steer_tx = Some(steer_tx); + // Increment turn sequence number before returning so the metric event + // gets a monotonically increasing counter starting at 1. + s.turn_seq = s.turn_seq.saturating_add(1); + let turn_seq = s.turn_seq; Ok(( s.id.clone(), s.mcp.clone(), @@ -607,6 +640,7 @@ async fn acquire_session( Arc::clone(&s.effective_system_prompt), run_id, steer_rx, + turn_seq, )) } @@ -615,3 +649,16 @@ fn session_token() -> Result { getrandom::fill(&mut b).map_err(|e| format!("rng: getrandom failed: {e}"))?; Ok(b.iter().map(|x| format!("{x:02x}")).collect()) } + +/// Map a buzz-agent `StopReason` to the NIP-AM `StopReason` used in kind 44200 payloads. +fn agent_stop_to_nip_am(r: &crate::types::StopReason) -> buzz_core::agent_turn_metric::StopReason { + use crate::types::StopReason; + use buzz_core::agent_turn_metric::StopReason as CoreStop; + match r { + StopReason::EndTurn => CoreStop::EndTurn, + StopReason::Cancelled => CoreStop::Cancelled, + StopReason::MaxTokens => CoreStop::MaxTokens, + StopReason::MaxTurnRequests => CoreStop::Unknown, + StopReason::Refusal => CoreStop::Unknown, + } +} diff --git a/crates/buzz-agent/src/llm.rs b/crates/buzz-agent/src/llm.rs index 628449db2..40598668a 100644 --- a/crates/buzz-agent/src/llm.rs +++ b/crates/buzz-agent/src/llm.rs @@ -708,11 +708,13 @@ fn parse_responses(v: Value) -> Result { _ => ProviderStop::Other, }; let input_tokens = sum_usage(&v, &["input_tokens"]); + let output_tokens = sum_usage(&v, &["output_tokens"]); Ok(LlmResponse { text, tool_calls, stop, input_tokens, + output_tokens, reasoning, }) } @@ -811,11 +813,13 @@ fn parse_anthropic(v: Value) -> Result { } } let input_tokens = anthropic_input_tokens(&v); + let output_tokens = sum_usage(&v, &["output_tokens"]); Ok(LlmResponse { text, tool_calls, stop, input_tokens, + output_tokens, reasoning, }) } @@ -860,11 +864,13 @@ fn parse_openai(v: Value) -> Result { } } let input_tokens = openai_chat_input_tokens(&v); + let output_tokens = sum_usage(&v, &["completion_tokens"]); Ok(LlmResponse { text, tool_calls, stop, input_tokens, + output_tokens, reasoning, }) } @@ -1858,4 +1864,75 @@ mod tests { let src = StaticTokenSource::new("static-key"); assert_eq!(src.refresh_now("rejected").await.unwrap(), "static-key"); } + + // ── Output-token parsing tests ────────────────────────────────────────── + + /// `parse_anthropic` extracts `output_tokens` from the usage object. + #[test] + fn parse_anthropic_output_tokens() { + let v = serde_json::json!({ + "stop_reason": "end_turn", + "content": [{"type": "text", "text": "hi"}], + "usage": {"input_tokens": 42, "output_tokens": 7} + }); + assert_eq!(parse_anthropic(v).unwrap().output_tokens, Some(7)); + } + + /// `parse_anthropic` returns `None` for `output_tokens` when usage is absent. + #[test] + fn parse_anthropic_output_tokens_missing_usage_is_none() { + let v = serde_json::json!({ + "stop_reason": "end_turn", + "content": [{"type": "text", "text": "hi"}] + }); + assert_eq!(parse_anthropic(v).unwrap().output_tokens, None); + } + + /// `parse_openai` maps `completion_tokens` to `output_tokens`. + #[test] + fn parse_openai_output_tokens_from_completion_tokens() { + let v = serde_json::json!({ + "choices": [{"finish_reason": "stop", "message": {"content": "hi"}}], + "usage": {"prompt_tokens": 123, "completion_tokens": 4, "total_tokens": 127} + }); + assert_eq!(parse_openai(v).unwrap().output_tokens, Some(4)); + } + + /// `parse_openai` returns `None` for `output_tokens` when usage is absent. + #[test] + fn parse_openai_output_tokens_missing_usage_is_none() { + let v = serde_json::json!({ + "choices": [{"finish_reason": "stop", "message": {"content": "hi"}}] + }); + assert_eq!(parse_openai(v).unwrap().output_tokens, None); + } + + /// `parse_responses` extracts `output_tokens` from the usage object. + #[test] + fn parse_responses_output_tokens() { + let v = serde_json::json!({ + "status": "completed", + "output": [{ + "type": "message", + "role": "assistant", + "content": [{"type": "output_text", "text": "hi"}] + }], + "usage": {"input_tokens": 321, "output_tokens": 9, "total_tokens": 330} + }); + assert_eq!(parse_responses(v).unwrap().output_tokens, Some(9)); + } + + /// `parse_responses` returns `None` for `output_tokens` when usage is absent. + #[test] + fn parse_responses_output_tokens_missing_usage_is_none() { + let v = serde_json::json!({ + "status": "completed", + "output": [{ + "type": "message", + "role": "assistant", + "content": [{"type": "output_text", "text": "hi"}] + }] + }); + assert_eq!(parse_responses(v).unwrap().output_tokens, None); + } } diff --git a/crates/buzz-agent/src/metric.rs b/crates/buzz-agent/src/metric.rs new file mode 100644 index 000000000..176ba6519 --- /dev/null +++ b/crates/buzz-agent/src/metric.rs @@ -0,0 +1,281 @@ +//! NIP-AM kind:44200 metric publishing for the buzz-agent harness. +//! +//! Built from three environment variables: +//! - `BUZZ_PRIVATE_KEY` — agent Nostr private key (nsec or hex). +//! - `BUZZ_RELAY_URL` — relay base URL (e.g. `https://relay.example.com`). +//! - `BUZZ_AGENT_OWNER_PUBKEY` — owner npub or hex public key. +//! +//! If any variable is absent or unparseable, metric publishing is a silent +//! no-op. This mirrors the fail-open policy used throughout the agent harness. +//! +//! ## Turn tracking +//! +//! buzz-agent has no session-cumulative token counters. Each turn may span +//! multiple LLM rounds (tool calls); per-turn tokens are accumulated across +//! all rounds. `deltaReliable` is always `true` because buzz-agent tracks +//! every round within a turn in-process — no cross-process baseline is ever +//! lost. Session-level cumulative fields are omitted (`None`) because +//! buzz-agent does not maintain running totals across turns in a session. + +use nostr::Keys; +use reqwest::Client; + +/// Configured NIP-AM publisher. Constructed once per process from env vars. +/// When env vars are absent, construction succeeds and `is_noop()` returns +/// `true` — callers need not special-case the unconfigured case. +pub(crate) struct MetricPublisher { + keys: Option, + owner_pubkey: Option, + base_url: Option, + http: Client, +} + +impl MetricPublisher { + /// Build from environment. Silent on parse errors — missing/malformed vars + /// leave the corresponding field `None`. + pub(crate) fn from_env() -> Self { + let keys = std::env::var("BUZZ_PRIVATE_KEY") + .ok() + .and_then(|v| Keys::parse(&v).ok()); + let base_url = std::env::var("BUZZ_RELAY_URL") + .ok() + .filter(|s| !s.is_empty()) + .map(|s| s.trim_end_matches('/').to_string()); + let owner_pubkey = std::env::var("BUZZ_AGENT_OWNER_PUBKEY") + .ok() + .and_then(|v| nostr::PublicKey::parse(&v).ok()); + Self { + keys, + owner_pubkey, + base_url, + http: Client::new(), + } + } + + /// Returns `true` when no complete config is available. Publishing is + /// always a no-op in this state. + #[cfg(test)] + pub(crate) fn is_noop(&self) -> bool { + self.keys.is_none() || self.owner_pubkey.is_none() || self.base_url.is_none() + } + + /// Best-effort publish a kind 44200 event. + /// + /// - `session_id` — the ACP session id for this turn. + /// - `turn_seq` — monotonically increasing per-session turn counter. + /// - `turn_id` — the run id for this turn (harness-internal). + /// - `input_tokens` / `output_tokens` — summed across all LLM rounds in the turn. + /// - `stop_reason` — the NIP-AM stop reason. + /// + /// Errors are logged at WARN and never propagated — a metric publish + /// failure must never fail a turn. + pub(crate) async fn publish( + &self, + session_id: &str, + turn_seq: u64, + turn_id: &str, + input_tokens: Option, + output_tokens: Option, + stop_reason: buzz_core::agent_turn_metric::StopReason, + ) { + use buzz_core::agent_turn_metric::{AgentTurnMetricPayload, TokenCounts}; + use nostr::{EventBuilder, Kind, Tag}; + + let (keys, owner_pk, base_url) = match (&self.keys, &self.owner_pubkey, &self.base_url) { + (Some(k), Some(pk), Some(url)) => (k, pk, url), + _ => return, + }; + + // buzz-agent has no session-cumulative counters — only per-turn deltas. + // deltaReliable is true because we sum every round in this process; + // no cross-process baseline is ever lost. Cumulative fields are omitted + // since buzz-agent does not track rolling session totals across turns. + let turn_counts = if input_tokens.is_some() || output_tokens.is_some() { + Some(TokenCounts { + input_tokens, + output_tokens, + total_tokens: None, + cost_usd: None, + cache_read_tokens: None, + cache_write_tokens: None, + }) + } else { + None + }; + + let timestamp = chrono::Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Millis, true); + let payload = AgentTurnMetricPayload { + harness: "buzz-agent".to_string(), + model: None, + channel_id: None, + session_id: Some(session_id.to_string()), + turn_id: Some(turn_id.to_string()), + turn_seq: Some(turn_seq), + timestamp, + turn: turn_counts, + cumulative: None, + delta_reliable: true, + stop_reason: Some(stop_reason), + }; + + let ciphertext = + match buzz_core::agent_turn_metric::encrypt_agent_turn_metric(keys, owner_pk, &payload) + { + Ok(c) => c, + Err(e) => { + tracing::warn!( + target: "buzz_agent::metrics", + session_id, + turn_id, + "NIP-AM: encrypt failed: {e}" + ); + return; + } + }; + + let agent_hex = keys.public_key().to_hex(); + let owner_hex = owner_pk.to_hex(); + let event = match EventBuilder::new( + Kind::Custom(buzz_core::kind::KIND_AGENT_TURN_METRIC as u16), + ciphertext, + ) + .tags([ + Tag::parse(["p", &owner_hex]).expect("p tag"), + Tag::parse(["agent", &agent_hex]).expect("agent tag"), + ]) + .sign_with_keys(keys) + { + Ok(e) => e, + Err(e) => { + tracing::warn!( + target: "buzz_agent::metrics", + session_id, + turn_id, + "NIP-AM: sign failed: {e}" + ); + return; + } + }; + + let body_bytes = match serde_json::to_vec(&event) { + Ok(b) => b, + Err(e) => { + tracing::warn!( + target: "buzz_agent::metrics", + session_id, + "NIP-AM: serialize failed: {e}" + ); + return; + } + }; + + let url = format!("{base_url}/events"); + let auth_header = match nip98_auth(keys, "POST", &url, Some(&body_bytes)) { + Ok(h) => h, + Err(e) => { + tracing::warn!( + target: "buzz_agent::metrics", + session_id, + "NIP-AM: NIP-98 auth failed: {e}" + ); + return; + } + }; + + const METRIC_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(3); + match tokio::time::timeout( + METRIC_TIMEOUT, + self.http + .post(&url) + .header("Authorization", auth_header) + .header("Content-Type", "application/json") + .body(body_bytes) + .send(), + ) + .await + { + Ok(Ok(resp)) if resp.status().is_success() => {} + Ok(Ok(resp)) => tracing::warn!( + target: "buzz_agent::metrics", + session_id, + turn_id, + "NIP-AM: publish HTTP {}", resp.status() + ), + Ok(Err(e)) => tracing::warn!( + target: "buzz_agent::metrics", + session_id, + turn_id, + "NIP-AM: publish failed: {e}" + ), + Err(_) => tracing::warn!( + target: "buzz_agent::metrics", + session_id, + turn_id, + "NIP-AM: publish timed out" + ), + } + } +} + +/// Build a NIP-98 HTTP Auth `Authorization` header value: `Nostr `. +fn nip98_auth(keys: &Keys, method: &str, url: &str, body: Option<&[u8]>) -> Result { + use base64::Engine; + use nostr::{EventBuilder, Kind, Tag}; + use sha2::{Digest, Sha256}; + + let u_tag = Tag::parse(["u", url]).map_err(|e| e.to_string())?; + let method_tag = Tag::parse(["method", method]).map_err(|e| e.to_string())?; + let nonce_tag = + Tag::parse(["nonce", &uuid::Uuid::new_v4().to_string()]).map_err(|e| e.to_string())?; + let mut tags = vec![u_tag, method_tag, nonce_tag]; + if let Some(b) = body { + let hash = hex::encode(Sha256::digest(b)); + let payload_tag = Tag::parse(["payload", &hash]).map_err(|e| e.to_string())?; + tags.push(payload_tag); + } + let event = EventBuilder::new(Kind::HttpAuth, "") + .tags(tags) + .sign_with_keys(keys) + .map_err(|e| e.to_string())?; + let json = serde_json::to_string(&event).map_err(|e| e.to_string())?; + Ok(format!( + "Nostr {}", + base64::engine::general_purpose::STANDARD.encode(json) + )) +} + +#[cfg(test)] +mod tests { + use super::*; + + /// When all three env vars are absent, `from_env` yields a no-op publisher. + #[test] + fn test_metric_publisher_noop_when_env_absent() { + // Remove the vars if set in the test environment to avoid interference. + std::env::remove_var("BUZZ_PRIVATE_KEY"); + std::env::remove_var("BUZZ_RELAY_URL"); + std::env::remove_var("BUZZ_AGENT_OWNER_PUBKEY"); + let p = MetricPublisher::from_env(); + assert!(p.is_noop(), "publisher must be noop when vars are absent"); + } + + /// A well-formed `BUZZ_PRIVATE_KEY` + `BUZZ_RELAY_URL` + `BUZZ_AGENT_OWNER_PUBKEY` + /// makes the publisher non-noop. + #[test] + fn test_metric_publisher_configured_when_all_vars_present() { + let agent_keys = Keys::generate(); + let owner_keys = Keys::generate(); + std::env::set_var("BUZZ_PRIVATE_KEY", agent_keys.secret_key().to_secret_hex()); + std::env::set_var("BUZZ_RELAY_URL", "https://relay.example.com"); + std::env::set_var("BUZZ_AGENT_OWNER_PUBKEY", owner_keys.public_key().to_hex()); + let p = MetricPublisher::from_env(); + assert!( + !p.is_noop(), + "publisher must not be noop when all vars are set" + ); + // Restore env to a clean state. + std::env::remove_var("BUZZ_PRIVATE_KEY"); + std::env::remove_var("BUZZ_RELAY_URL"); + std::env::remove_var("BUZZ_AGENT_OWNER_PUBKEY"); + } +} diff --git a/crates/buzz-agent/src/types.rs b/crates/buzz-agent/src/types.rs index a8acb52a6..ef006b70a 100644 --- a/crates/buzz-agent/src/types.rs +++ b/crates/buzz-agent/src/types.rs @@ -139,6 +139,10 @@ pub struct LlmResponse { /// tokens, so reading it alone would undercount). Used to gate handoff on /// the real token budget rather than a byte estimate. pub input_tokens: Option, + /// Output tokens the provider reported for this request, or `None` if the + /// response carried no usage. Used to accumulate per-turn output counts + /// for NIP-AM metric publishing. + pub output_tokens: Option, /// Reasoning/thinking content emitted by the model before its answer, if /// any. Non-empty when the provider returns extended-thinking tokens: /// From 39fd2d82950ad0d1ec26c58bf7e871a88975537c Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Thu, 2 Jul 2026 11:03:40 -0400 Subject: [PATCH 11/21] fix(acp,buzz-agent): address Thufir pass-1 findings on NIP-AM step-2 commits Three IMPORTANT correctness fixes and one MINOR test-isolation fix: 1. Control-cancel paths in pool.rs now drain take_turn_usage() and call publish_agent_turn_metric before every send_prompt_result that returns early from the control-signal select arm. Covers all four cancel outcome variants (Ok/AgentExited/Timeout/Err) and the completed-before-control race. Uses Cancelled for the Ok arm and Error for all error variants; EndTurn for the race-1 completion path. 2. MetricPublisher::publish now returns early when both input_tokens and output_tokens are None, preventing all-null events that violate the NIP-AM prohibition on publishing turns with no observed usage. 3. buzz-agent MetricPublisher now mirrors the platform relay/auth contract: - Owner derived from BUZZ_AUTH_TAG via buzz_sdk::nip_oa::verify_auth_tag, falling back to BUZZ_AGENT_OWNER_PUBKEY only when absent. - BUZZ_RELAY_URL ws/wss normalized to http/https before use as HTTP URL. - Raw BUZZ_AUTH_TAG JSON forwarded as x-auth-tag header on /events so attested agents pass relay membership checks. - buzz-sdk added to buzz-agent dependencies (lightweight, no transport deps). 4. Tests rewritten to use injected MetricConfig instead of process-env mutation, eliminating the parallel test race flagged as a MINOR. New tests cover: ws/wss URL normalization, x-auth-tag config storage, no-usage early-return, and the Cancelled stop-reason path in pool.rs. Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- Cargo.lock | 1 + crates/buzz-acp/src/pool.rs | 82 ++++++++++ crates/buzz-agent/Cargo.toml | 1 + crates/buzz-agent/src/metric.rs | 267 ++++++++++++++++++++++++-------- 4 files changed, 286 insertions(+), 65 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9801b93eb..0619e5ea7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -781,6 +781,7 @@ dependencies = [ "axum", "base64", "buzz-core", + "buzz-sdk", "chrono", "getrandom 0.4.2", "hex", diff --git a/crates/buzz-acp/src/pool.rs b/crates/buzz-acp/src/pool.rs index 2774d65a4..3b8ef5e0a 100644 --- a/crates/buzz-acp/src/pool.rs +++ b/crates/buzz-acp/src/pool.rs @@ -1539,6 +1539,16 @@ pub async fn run_prompt_task( let retry_batch = requeue_cancelled_batch(&ctx, control_signal, batch); + let usage = agent.acp.take_turn_usage(); + publish_agent_turn_metric( + &ctx, + usage, + observer_channel_id, + &session_id, + &turn_id, + Some(buzz_core::agent_turn_metric::StopReason::Cancelled), + ) + .await; send_prompt_result( &result_tx, agent, @@ -1553,6 +1563,16 @@ pub async fn run_prompt_task( let retry_batch = requeue_cancelled_batch(&ctx, control_signal, batch); + let usage = agent.acp.take_turn_usage(); + publish_agent_turn_metric( + &ctx, + usage, + observer_channel_id, + &session_id, + &turn_id, + Some(buzz_core::agent_turn_metric::StopReason::Error), + ) + .await; send_prompt_result( &result_tx, agent, @@ -1568,6 +1588,16 @@ pub async fn run_prompt_task( let retry_batch = requeue_cancelled_batch(&ctx, control_signal, batch); + let usage = agent.acp.take_turn_usage(); + publish_agent_turn_metric( + &ctx, + usage, + observer_channel_id, + &session_id, + &turn_id, + Some(buzz_core::agent_turn_metric::StopReason::Error), + ) + .await; send_prompt_result( &result_tx, agent, @@ -1582,6 +1612,16 @@ pub async fn run_prompt_task( let retry_batch = requeue_cancelled_batch(&ctx, control_signal, batch); + let usage = agent.acp.take_turn_usage(); + publish_agent_turn_metric( + &ctx, + usage, + observer_channel_id, + &session_id, + &turn_id, + Some(buzz_core::agent_turn_metric::StopReason::Error), + ) + .await; send_prompt_result( &result_tx, agent, @@ -1626,6 +1666,16 @@ pub async fn run_prompt_task( &source, &control_signal, ); + let usage = agent.acp.take_turn_usage(); + publish_agent_turn_metric( + &ctx, + usage, + observer_channel_id, + &session_id, + &turn_id, + Some(buzz_core::agent_turn_metric::StopReason::EndTurn), + ) + .await; send_prompt_result( &result_tx, agent, @@ -3908,6 +3958,38 @@ mod tests { .await; } + /// Regression for the control-cancel drain: `publish_agent_turn_metric` + /// with a `Cancelled` stop reason and pending usage executes without panic + /// (encrypt+sign path). This mirrors the control-signal arm that previously + /// returned early without draining usage. + #[tokio::test] + async fn test_publish_agent_turn_metric_cancelled_stop_reason() { + let agent_keys = nostr::Keys::generate(); + let owner_keys = nostr::Keys::generate(); + let ctx = make_prompt_context_with_owner(&agent_keys, owner_keys.public_key()); + let usage = crate::goose_usage::GooseTurnUsage { + session_id: "sess-cancel".to_string(), + turn_seq: 2, + delta_reliable: true, + turn_input_tokens: Some(50), + turn_output_tokens: Some(20), + turn_cost_usd: None, + cumulative_input_tokens: 150, + cumulative_output_tokens: 70, + cumulative_cost_usd: None, + }; + // Must not panic; HTTP submit will fail (no real relay) — that's fine. + publish_agent_turn_metric( + &ctx, + Some(usage), + Some(uuid::Uuid::new_v4()), + "sess-cancel", + "turn-cancel", + Some(buzz_core::agent_turn_metric::StopReason::Cancelled), + ) + .await; + } + fn make_prompt_context_no_owner() -> PromptContext { let agent_keys = nostr::Keys::generate(); make_prompt_context_impl(&agent_keys, None) diff --git a/crates/buzz-agent/Cargo.toml b/crates/buzz-agent/Cargo.toml index cf5bb37eb..5b40f069f 100644 --- a/crates/buzz-agent/Cargo.toml +++ b/crates/buzz-agent/Cargo.toml @@ -44,6 +44,7 @@ sha2 = { workspace = true } urlencoding = "2" webbrowser = "1" buzz-core = { workspace = true } +buzz-sdk = { workspace = true } nostr = { workspace = true } chrono = { workspace = true } uuid = { workspace = true } diff --git a/crates/buzz-agent/src/metric.rs b/crates/buzz-agent/src/metric.rs index 176ba6519..eae9ff1b9 100644 --- a/crates/buzz-agent/src/metric.rs +++ b/crates/buzz-agent/src/metric.rs @@ -1,12 +1,14 @@ //! NIP-AM kind:44200 metric publishing for the buzz-agent harness. //! -//! Built from three environment variables: +//! Configured from three environment variables: //! - `BUZZ_PRIVATE_KEY` — agent Nostr private key (nsec or hex). -//! - `BUZZ_RELAY_URL` — relay base URL (e.g. `https://relay.example.com`). -//! - `BUZZ_AGENT_OWNER_PUBKEY` — owner npub or hex public key. +//! - `BUZZ_RELAY_URL` — relay base URL (`wss://` or `https://`; both accepted). +//! - `BUZZ_AUTH_TAG` — NIP-OA attestation JSON (preferred owner source). +//! Owner is derived by verifying the auth tag against the agent's own pubkey. +//! Falls back to `BUZZ_AGENT_OWNER_PUBKEY` (npub or hex) if auth tag is absent. //! -//! If any variable is absent or unparseable, metric publishing is a silent -//! no-op. This mirrors the fail-open policy used throughout the agent harness. +//! If any required variable is absent or unparseable, metric publishing is a +//! silent no-op. This mirrors the fail-open policy used throughout the harness. //! //! ## Turn tracking //! @@ -20,34 +22,78 @@ use nostr::Keys; use reqwest::Client; +/// Resolved configuration for a `MetricPublisher`. Separated from env-parsing +/// so tests can inject values directly without mutating process-global state. +pub(crate) struct MetricConfig { + pub(crate) keys: Keys, + pub(crate) owner_pubkey: nostr::PublicKey, + /// HTTP(S) base URL — ws/wss already normalized to http/https, no trailing + /// slash. + pub(crate) base_url: String, + /// Raw `BUZZ_AUTH_TAG` JSON, forwarded as `x-auth-tag` for attested agents. + pub(crate) auth_tag_json: Option, +} + /// Configured NIP-AM publisher. Constructed once per process from env vars. /// When env vars are absent, construction succeeds and `is_noop()` returns /// `true` — callers need not special-case the unconfigured case. pub(crate) struct MetricPublisher { - keys: Option, - owner_pubkey: Option, - base_url: Option, + config: Option, http: Client, } impl MetricPublisher { /// Build from environment. Silent on parse errors — missing/malformed vars - /// leave the corresponding field `None`. + /// leave the config absent (no-op publisher). + /// + /// Owner resolution priority: + /// 1. `BUZZ_AUTH_TAG` — NIP-OA attestation verified against this agent's + /// pubkey; extracts the owner pubkey from the tag. + /// 2. `BUZZ_AGENT_OWNER_PUBKEY` — explicit hex or npub fallback. pub(crate) fn from_env() -> Self { + Self { + config: Self::config_from_env(), + http: Client::new(), + } + } + + fn config_from_env() -> Option { let keys = std::env::var("BUZZ_PRIVATE_KEY") .ok() - .and_then(|v| Keys::parse(&v).ok()); - let base_url = std::env::var("BUZZ_RELAY_URL") + .and_then(|v| Keys::parse(&v).ok())?; + let raw_url = std::env::var("BUZZ_RELAY_URL") .ok() - .filter(|s| !s.is_empty()) - .map(|s| s.trim_end_matches('/').to_string()); - let owner_pubkey = std::env::var("BUZZ_AGENT_OWNER_PUBKEY") + .filter(|s| !s.is_empty())?; + let base_url = ws_to_http(raw_url.trim_end_matches('/')); + + // Try BUZZ_AUTH_TAG first. + let (owner_pubkey, auth_tag_json) = match std::env::var("BUZZ_AUTH_TAG") .ok() - .and_then(|v| nostr::PublicKey::parse(&v).ok()); - Self { + .filter(|s| !s.is_empty()) + { + Some(tag_json) => { + match buzz_sdk::nip_oa::verify_auth_tag(&tag_json, &keys.public_key()) { + Ok(pk) => (pk, Some(tag_json)), + // Auth tag present but verification failed — fall through. + Err(_) => resolve_explicit_owner()?, + } + } + None => resolve_explicit_owner()?, + }; + + Some(MetricConfig { keys, owner_pubkey, base_url, + auth_tag_json, + }) + } + + /// Build from an explicit config (test helper — avoids process-env mutation). + #[cfg(test)] + pub(crate) fn from_config(config: MetricConfig) -> Self { + Self { + config: Some(config), http: Client::new(), } } @@ -56,7 +102,7 @@ impl MetricPublisher { /// always a no-op in this state. #[cfg(test)] pub(crate) fn is_noop(&self) -> bool { - self.keys.is_none() || self.owner_pubkey.is_none() || self.base_url.is_none() + self.config.is_none() } /// Best-effort publish a kind 44200 event. @@ -67,6 +113,10 @@ impl MetricPublisher { /// - `input_tokens` / `output_tokens` — summed across all LLM rounds in the turn. /// - `stop_reason` — the NIP-AM stop reason. /// + /// No-op when no usage was observed (`input_tokens` and `output_tokens` + /// both `None`) — per NIP-AM § "Do NOT publish an event for a turn with no + /// observed usage". + /// /// Errors are logged at WARN and never propagated — a metric publish /// failure must never fail a turn. pub(crate) async fn publish( @@ -81,27 +131,33 @@ impl MetricPublisher { use buzz_core::agent_turn_metric::{AgentTurnMetricPayload, TokenCounts}; use nostr::{EventBuilder, Kind, Tag}; - let (keys, owner_pk, base_url) = match (&self.keys, &self.owner_pubkey, &self.base_url) { - (Some(k), Some(pk), Some(url)) => (k, pk, url), - _ => return, + // No usage observed — NIP-AM forbids publishing an all-null metric. + if input_tokens.is_none() && output_tokens.is_none() { + return; + } + + let MetricConfig { + keys, + owner_pubkey: owner_pk, + base_url, + auth_tag_json, + } = match &self.config { + Some(c) => c, + None => return, }; // buzz-agent has no session-cumulative counters — only per-turn deltas. // deltaReliable is true because we sum every round in this process; // no cross-process baseline is ever lost. Cumulative fields are omitted // since buzz-agent does not track rolling session totals across turns. - let turn_counts = if input_tokens.is_some() || output_tokens.is_some() { - Some(TokenCounts { - input_tokens, - output_tokens, - total_tokens: None, - cost_usd: None, - cache_read_tokens: None, - cache_write_tokens: None, - }) - } else { - None - }; + let turn_counts = Some(TokenCounts { + input_tokens, + output_tokens, + total_tokens: None, + cost_usd: None, + cache_read_tokens: None, + cache_write_tokens: None, + }); let timestamp = chrono::Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Millis, true); let payload = AgentTurnMetricPayload { @@ -183,17 +239,15 @@ impl MetricPublisher { }; const METRIC_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(3); - match tokio::time::timeout( - METRIC_TIMEOUT, - self.http - .post(&url) - .header("Authorization", auth_header) - .header("Content-Type", "application/json") - .body(body_bytes) - .send(), - ) - .await - { + let mut req = self + .http + .post(&url) + .header("Authorization", auth_header) + .header("Content-Type", "application/json"); + if let Some(tag) = auth_tag_json { + req = req.header("x-auth-tag", tag); + } + match tokio::time::timeout(METRIC_TIMEOUT, req.body(body_bytes).send()).await { Ok(Ok(resp)) if resp.status().is_success() => {} Ok(Ok(resp)) => tracing::warn!( target: "buzz_agent::metrics", @@ -217,6 +271,23 @@ impl MetricPublisher { } } +/// Normalize `ws://` / `wss://` relay URLs to `http://` / `https://`. +/// Pass-through for URLs that are already HTTP(S). +fn ws_to_http(url: &str) -> String { + url.replace("wss://", "https://") + .replace("ws://", "http://") + .to_string() +} + +/// Parse `BUZZ_AGENT_OWNER_PUBKEY` as the explicit owner fallback. +/// Returns `(pubkey, None)` on success, `None` if the var is absent/invalid. +fn resolve_explicit_owner() -> Option<(nostr::PublicKey, Option)> { + let pk = std::env::var("BUZZ_AGENT_OWNER_PUBKEY") + .ok() + .and_then(|v| nostr::PublicKey::parse(&v).ok())?; + Some((pk, None)) +} + /// Build a NIP-98 HTTP Auth `Authorization` header value: `Nostr `. fn nip98_auth(keys: &Keys, method: &str, url: &str, body: Option<&[u8]>) -> Result { use base64::Engine; @@ -247,35 +318,101 @@ fn nip98_auth(keys: &Keys, method: &str, url: &str, body: Option<&[u8]>) -> Resu #[cfg(test)] mod tests { use super::*; + use nostr::Keys; - /// When all three env vars are absent, `from_env` yields a no-op publisher. - #[test] - fn test_metric_publisher_noop_when_env_absent() { - // Remove the vars if set in the test environment to avoid interference. - std::env::remove_var("BUZZ_PRIVATE_KEY"); - std::env::remove_var("BUZZ_RELAY_URL"); - std::env::remove_var("BUZZ_AGENT_OWNER_PUBKEY"); - let p = MetricPublisher::from_env(); - assert!(p.is_noop(), "publisher must be noop when vars are absent"); + fn make_config(owner_keys: &Keys) -> MetricConfig { + MetricConfig { + keys: Keys::generate(), + owner_pubkey: owner_keys.public_key(), + base_url: "https://relay.example.com".to_string(), + auth_tag_json: None, + } } - /// A well-formed `BUZZ_PRIVATE_KEY` + `BUZZ_RELAY_URL` + `BUZZ_AGENT_OWNER_PUBKEY` - /// makes the publisher non-noop. + /// A publisher built from an explicit config is not a no-op. #[test] - fn test_metric_publisher_configured_when_all_vars_present() { - let agent_keys = Keys::generate(); + fn test_metric_publisher_configured_when_config_injected() { let owner_keys = Keys::generate(); - std::env::set_var("BUZZ_PRIVATE_KEY", agent_keys.secret_key().to_secret_hex()); - std::env::set_var("BUZZ_RELAY_URL", "https://relay.example.com"); - std::env::set_var("BUZZ_AGENT_OWNER_PUBKEY", owner_keys.public_key().to_hex()); - let p = MetricPublisher::from_env(); + let p = MetricPublisher::from_config(make_config(&owner_keys)); assert!( !p.is_noop(), - "publisher must not be noop when all vars are set" + "publisher must not be noop when config is set" ); - // Restore env to a clean state. - std::env::remove_var("BUZZ_PRIVATE_KEY"); - std::env::remove_var("BUZZ_RELAY_URL"); - std::env::remove_var("BUZZ_AGENT_OWNER_PUBKEY"); + } + + /// A publisher with no config (None) is a no-op. + #[test] + fn test_metric_publisher_noop_when_no_config() { + let p = MetricPublisher { + config: None, + http: Client::new(), + }; + assert!(p.is_noop(), "publisher must be noop when config is None"); + } + + /// When both token fields are None, publish returns without building/sending + /// an event. Verified by the absence of a panic or network call (we use an + /// invalid URL so any real HTTP attempt would error — silence is the proof). + #[tokio::test] + async fn test_publish_noop_when_no_usage_observed() { + let owner_keys = Keys::generate(); + let mut config = make_config(&owner_keys); + // Use an unreachable URL — if any HTTP request were made it would fail + // visibly. The test must complete silently. + config.base_url = "https://127.0.0.1:1".to_string(); + let p = MetricPublisher::from_config(config); + // Both tokens absent → must return before any encrypt/send attempt. + p.publish( + "session-1", + 0, + "turn-1", + None, + None, + buzz_core::agent_turn_metric::StopReason::EndTurn, + ) + .await; + // If we reach here without error, the no-usage guard fired correctly. + } + + /// ws:// URL is normalized to http://. + #[test] + fn test_ws_to_http_plain() { + assert_eq!( + ws_to_http("ws://relay.example.com"), + "http://relay.example.com" + ); + } + + /// wss:// URL is normalized to https://. + #[test] + fn test_ws_to_http_secure() { + assert_eq!( + ws_to_http("wss://relay.example.com"), + "https://relay.example.com" + ); + } + + /// https:// URLs pass through unchanged. + #[test] + fn test_ws_to_http_passthrough() { + assert_eq!( + ws_to_http("https://relay.example.com"), + "https://relay.example.com" + ); + } + + /// Auth tag JSON is forwarded in the `x-auth-tag` header field of the + /// config. Verify it round-trips through the config struct intact. + #[test] + fn test_auth_tag_json_stored_in_config() { + let tag_json = r#"["auth","deadbeef","*","sig"]"#; + let owner_keys = Keys::generate(); + let config = MetricConfig { + keys: Keys::generate(), + owner_pubkey: owner_keys.public_key(), + base_url: "https://relay.example.com".to_string(), + auth_tag_json: Some(tag_json.to_string()), + }; + assert_eq!(config.auth_tag_json.as_deref(), Some(tag_json)); } } From 6c0bf3a80c82a55d1f606556ca24ab0a2ed704ab Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Thu, 2 Jul 2026 12:22:29 -0400 Subject: [PATCH 12/21] refactor(acp,buzz-agent): unify NIP-AM metrics via shared usage notification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit buzz-agent now emits the same _goose/unstable/session/update/usage_update wire notification that goose does, so buzz-acp becomes the single publish path for both harnesses. Changes: - buzz-agent: add accumulated_input/output_tokens to Session, emit _goose/unstable/session/update usage_update before session/prompt response. Emission is unconditional (mirrors buzz-agent's existing session_info_update mimicry pattern). No-op when no tokens observed. - buzz-agent: delete metric.rs (native publisher), remove buzz-sdk and buzz-core deps from Cargo.toml, remove turn_seq counter. - buzz-acp: rename goose_usage.rs -> usage.rs, GooseUsageTracker -> UsageTracker, GooseTurnUsage -> TurnUsage, GooseUsageUpdatePayload -> UsageUpdatePayload. Logic is harness-agnostic; only the names change. - buzz-acp: relax used/contextLimit to #[serde(default)] in UsageUpdatePayload — buzz-agent omits these fields; goose supplies them. - buzz-acp: add harness_name to PromptContext, derived from agent_command via normalize_agent_command_identity (now pub(crate)). - buzz-acp: replace hardcoded harness: "goose" with ctx.harness_name in publish_agent_turn_metric. - Tests: buzz-agent-shaped usage_update (no used/contextLimit) deserializes correctly; full tracker flow across two turns with buzz-agent payload; harness_name: "buzz-agent" flows through publish path without panic. Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- Cargo.lock | 2 - crates/buzz-acp/src/acp.rs | 16 +- crates/buzz-acp/src/config.rs | 2 +- crates/buzz-acp/src/lib.rs | 5 +- crates/buzz-acp/src/pool.rs | 46 +- .../buzz-acp/src/{goose_usage.rs => usage.rs} | 171 +++++-- crates/buzz-agent/Cargo.toml | 2 - crates/buzz-agent/src/lib.rs | 97 ++-- crates/buzz-agent/src/metric.rs | 418 ------------------ crates/buzz-agent/src/wire.rs | 12 + 10 files changed, 256 insertions(+), 515 deletions(-) rename crates/buzz-acp/src/{goose_usage.rs => usage.rs} (78%) delete mode 100644 crates/buzz-agent/src/metric.rs diff --git a/Cargo.lock b/Cargo.lock index 0619e5ea7..8e09ff919 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -780,8 +780,6 @@ dependencies = [ "async-trait", "axum", "base64", - "buzz-core", - "buzz-sdk", "chrono", "getrandom 0.4.2", "hex", diff --git a/crates/buzz-acp/src/acp.rs b/crates/buzz-acp/src/acp.rs index d8ba8dfeb..ea11fce12 100644 --- a/crates/buzz-acp/src/acp.rs +++ b/crates/buzz-acp/src/acp.rs @@ -13,8 +13,8 @@ use tokio::io::AsyncWriteExt; use tokio::process::{Child, ChildStdin, ChildStdout}; use tokio_util::codec::{FramedRead, LinesCodec, LinesCodecError}; -use crate::goose_usage::{GooseTurnUsage, GooseUsageTracker}; use crate::observer::{ObserverContext, ObserverHandle}; +use crate::usage::{TurnUsage, UsageTracker}; /// Maximum allowed size of a single NDJSON line from the agent's stdout. /// Lines exceeding this limit are rejected to prevent OOM from rogue agents. @@ -168,11 +168,11 @@ pub struct AcpClient { /// outside of a goose-native turn — the read loop's steer arm is /// disabled in that case. steer_rx: Option>, - /// Goose usage tracker — accumulates cumulative token counts from + /// Usage tracker — accumulates cumulative token counts from /// `_goose/unstable/session/update` notifications and computes per-turn - /// deltas. Populated only when goose advertises the custom-notifications - /// capability; no-op for other harnesses. - goose_usage: GooseUsageTracker, + /// deltas. Both goose and buzz-agent emit this notification; goose gates + /// on client capability advertisement, buzz-agent emits unconditionally. + goose_usage: UsageTracker, } impl AcpClient { @@ -264,7 +264,7 @@ impl AcpClient { observer_context: ObserverContext::default(), active_run_id: None, steer_rx: None, - goose_usage: GooseUsageTracker::default(), + goose_usage: UsageTracker::default(), }) } @@ -533,7 +533,7 @@ impl AcpClient { /// /// Intended for consumption by `publish_agent_turn_metric` in `pool.rs` to /// publish a kind 44200 NIP-AM event. - pub fn take_turn_usage(&mut self) -> Option { + pub fn take_turn_usage(&mut self) -> Option { self.goose_usage.take() } @@ -1359,7 +1359,7 @@ impl AcpClient { /// notification is best-effort observability data, not a protocol /// requirement. Failures are logged at debug level. fn handle_goose_usage_update(&mut self, msg: &serde_json::Value) { - use crate::goose_usage::{GooseSessionUpdateNotification, GooseSessionUpdateVariant}; + use crate::usage::{GooseSessionUpdateNotification, GooseSessionUpdateVariant}; let params = match msg.get("params") { Some(p) => p, None => { diff --git a/crates/buzz-acp/src/config.rs b/crates/buzz-acp/src/config.rs index 8100ea71a..d139dc6cd 100644 --- a/crates/buzz-acp/src/config.rs +++ b/crates/buzz-acp/src/config.rs @@ -541,7 +541,7 @@ fn validate_multiple_event_handling( Ok(()) } -fn normalize_agent_command_identity(command: &str) -> String { +pub(crate) fn normalize_agent_command_identity(command: &str) -> String { let normalized = command.trim().replace('\\', "/"); let trimmed = normalized.trim_end_matches('/'); let basename = trimmed diff --git a/crates/buzz-acp/src/lib.rs b/crates/buzz-acp/src/lib.rs index 940a327aa..f4b1ffd00 100644 --- a/crates/buzz-acp/src/lib.rs +++ b/crates/buzz-acp/src/lib.rs @@ -4,13 +4,13 @@ mod acp; mod config; mod engram_fetch; mod filter; -mod goose_usage; mod observer; mod pool; mod queue; mod relay; +mod usage; -pub use goose_usage::GooseTurnUsage; +pub use usage::TurnUsage; use std::collections::{HashMap, HashSet}; use std::sync::Arc; @@ -1405,6 +1405,7 @@ async fn tokio_main() -> Result<()> { .as_deref() .and_then(|hex| nostr::PublicKey::from_hex(hex).ok()), memory_enabled: config.memory_enabled, + harness_name: crate::config::normalize_agent_command_identity(&config.agent_command), }); if !config.memory_enabled { diff --git a/crates/buzz-acp/src/pool.rs b/crates/buzz-acp/src/pool.rs index 3b8ef5e0a..83ab51afa 100644 --- a/crates/buzz-acp/src/pool.rs +++ b/crates/buzz-acp/src/pool.rs @@ -391,6 +391,9 @@ pub struct PromptContext { /// `[Agent Memory — core]` section. On by default; disabled via /// `--no-memory` / `BUZZ_ACP_NO_MEMORY`. pub memory_enabled: bool, + /// Harness identity string for NIP-AM `harness` field. Derived from the + /// configured `agent_command` at startup (e.g. `"goose"`, `"buzz-agent"`). + pub harness_name: String, } impl AgentPool { @@ -2699,7 +2702,7 @@ fn acp_stop_to_core(r: &StopReason) -> buzz_core::agent_turn_metric::StopReason /// publishing must never fail a turn. async fn publish_agent_turn_metric( ctx: &PromptContext, - usage: Option, + usage: Option, channel_id: Option, session_id: &str, turn_id: &str, @@ -2735,7 +2738,7 @@ async fn publish_agent_turn_metric( }); let timestamp = chrono::Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Millis, true); let payload = AgentTurnMetricPayload { - harness: "goose".to_string(), + harness: ctx.harness_name.clone(), model: None, channel_id: channel_id.map(|id| id.to_string()), session_id: Some(usage.session_id.clone()), @@ -3904,7 +3907,7 @@ mod tests { #[tokio::test] async fn test_publish_agent_turn_metric_noop_on_no_owner() { let ctx = make_prompt_context_no_owner(); - let usage = crate::goose_usage::GooseTurnUsage { + let usage = crate::usage::TurnUsage { session_id: "sess-1".to_string(), turn_seq: 1, delta_reliable: true, @@ -3935,7 +3938,7 @@ mod tests { let agent_keys = nostr::Keys::generate(); let owner_keys = nostr::Keys::generate(); let ctx = make_prompt_context_with_owner(&agent_keys, owner_keys.public_key()); - let usage = crate::goose_usage::GooseTurnUsage { + let usage = crate::usage::TurnUsage { session_id: "sess-1".to_string(), turn_seq: 1, delta_reliable: true, @@ -3967,7 +3970,7 @@ mod tests { let agent_keys = nostr::Keys::generate(); let owner_keys = nostr::Keys::generate(); let ctx = make_prompt_context_with_owner(&agent_keys, owner_keys.public_key()); - let usage = crate::goose_usage::GooseTurnUsage { + let usage = crate::usage::TurnUsage { session_id: "sess-cancel".to_string(), turn_seq: 2, delta_reliable: true, @@ -3990,6 +3993,38 @@ mod tests { .await; } + /// `publish_agent_turn_metric` uses `ctx.harness_name` in the payload. + /// A buzz-agent-commanded context must not panic — verifies the harness + /// field flows through encrypt/sign without error. + #[tokio::test] + async fn test_publish_agent_turn_metric_buzz_agent_harness_name() { + let agent_keys = nostr::Keys::generate(); + let owner_keys = nostr::Keys::generate(); + let mut ctx = make_prompt_context_with_owner(&agent_keys, owner_keys.public_key()); + ctx.harness_name = "buzz-agent".to_string(); + let usage = crate::usage::TurnUsage { + session_id: "sess-ba".to_string(), + turn_seq: 1, + delta_reliable: false, // first turn from buzz-agent + turn_input_tokens: None, + turn_output_tokens: None, + turn_cost_usd: None, + cumulative_input_tokens: 400, + cumulative_output_tokens: 100, + cumulative_cost_usd: None, + }; + // Will try to publish (encrypt succeeds) and fail HTTP (no relay) — must not panic. + publish_agent_turn_metric( + &ctx, + Some(usage), + Some(uuid::Uuid::new_v4()), + "sess-ba", + "turn-ba", + Some(buzz_core::agent_turn_metric::StopReason::EndTurn), + ) + .await; + } + fn make_prompt_context_no_owner() -> PromptContext { let agent_keys = nostr::Keys::generate(); make_prompt_context_impl(&agent_keys, None) @@ -4031,6 +4066,7 @@ mod tests { agent_keys: agent_keys.clone(), agent_owner_pubkey: owner_pubkey, memory_enabled: false, + harness_name: "goose".to_string(), } } } diff --git a/crates/buzz-acp/src/goose_usage.rs b/crates/buzz-acp/src/usage.rs similarity index 78% rename from crates/buzz-acp/src/goose_usage.rs rename to crates/buzz-acp/src/usage.rs index 0c68d8913..db28d6570 100644 --- a/crates/buzz-acp/src/goose_usage.rs +++ b/crates/buzz-acp/src/usage.rs @@ -1,10 +1,10 @@ -//! Goose-specific usage tracking for NIP-AM agent turn metrics. +//! Usage tracking for NIP-AM agent turn metrics. //! -//! Goose emits a `_goose/unstable/session/update` notification (with -//! `sessionUpdate: "usage_update"`) at the end of every turn when the client -//! has advertised `clientCapabilities._meta.goose.customNotifications: true`. -//! The payload carries session-cumulative token counts from which we derive -//! per-turn deltas. +//! Agents that support usage reporting emit a `_goose/unstable/session/update` +//! notification (with `sessionUpdate: "usage_update"`) at the end of every +//! turn. Both goose and buzz-agent use this same wire format. The payload +//! carries session-cumulative token counts from which we derive per-turn +//! deltas. //! //! # Delta computation //! @@ -19,7 +19,7 @@ //! 3. **Session restart** (caller supplies a new `session_id` not seen //! before): treated as case 1 — fresh baseline, no delta for this turn. //! -//! The `GooseTurnUsage` produced after each turn is consumed by the +//! The `TurnUsage` produced after each turn is consumed by the //! `TurnCompletionGuard` in `pool.rs` to publish a kind 44200 relay event. use std::collections::HashMap; @@ -41,6 +41,9 @@ use std::collections::HashMap; /// } /// } /// ``` +/// +/// `used` and `contextLimit` are optional because buzz-agent does not track a +/// context window limit; the fields are present when goose emits them. #[derive(Debug, Clone, serde::Deserialize)] #[serde(rename_all = "camelCase")] pub(crate) struct GooseSessionUpdateNotification { @@ -53,17 +56,22 @@ pub(crate) struct GooseSessionUpdateNotification { #[derive(Debug, Clone, serde::Deserialize)] #[serde(tag = "sessionUpdate", rename_all = "snake_case")] pub(crate) enum GooseSessionUpdateVariant { - UsageUpdate(GooseUsageUpdatePayload), + UsageUpdate(UsageUpdatePayload), #[serde(other)] Other, } -/// The `usage_update` payload from goose. +/// The `usage_update` payload. #[derive(Debug, Clone, serde::Deserialize)] #[serde(rename_all = "camelCase")] -pub(crate) struct GooseUsageUpdatePayload { +pub(crate) struct UsageUpdatePayload { + /// Total tokens used (context-usage proxy). Optional — buzz-agent omits + /// this field or sends 0 because it does not track a context window limit. + #[serde(default)] #[allow(dead_code)] pub used: u64, + /// Context window size. Optional — buzz-agent omits this field. + #[serde(default)] #[allow(dead_code)] pub context_limit: u64, pub accumulated_input_tokens: u64, @@ -88,9 +96,9 @@ struct SessionState { /// Per-turn usage record exposed to `TurnCompletionGuard` for NIP-AM publishing. /// /// `turn_*` fields are `None` when delta is unreliable (first turn or counter -/// decrease). `cumulative_*` fields are always present when goose reports them. +/// decrease). `cumulative_*` fields are always present when the agent reports them. #[derive(Debug, Clone)] -pub struct GooseTurnUsage { +pub struct TurnUsage { /// Goose session id (maps to NIP-AM `sessionId`). pub session_id: String, /// Per-session monotonic sequence number for this turn (maps to NIP-AM `turnSeq`). @@ -127,10 +135,10 @@ pub struct GooseTurnUsage { /// cumulative baseline; only produces a publishable record when a turn is /// currently in-flight for the matching session. /// 3. **`take()`** — called at turn completion by `TurnCompletionGuard`. -/// Drains and returns the pending record (or `None` if goose did not emit -/// usage for this turn) and clears the in-flight marker. +/// Drains and returns the pending record (or `None` if no usage was emitted +/// for this turn) and clears the in-flight marker. #[derive(Debug, Default)] -pub(crate) struct GooseUsageTracker { +pub(crate) struct UsageTracker { /// One entry per goose `sessionId` ever seen in this process. sessions: HashMap, /// The session that currently has an in-flight `session/prompt`. @@ -138,10 +146,10 @@ pub(crate) struct GooseUsageTracker { /// the baseline but will not set `pending`. in_flight_session: Option, /// The most recently computed turn usage, ready for `take()`. - pending: Option, + pending: Option, } -impl GooseUsageTracker { +impl UsageTracker { /// Mark the start of a new prompt turn for `session_id`. /// /// Clears any leftover `pending` record and records which session is @@ -166,7 +174,7 @@ impl GooseUsageTracker { /// /// When multiple notifications arrive during the same turn, the last one /// wins (goose may emit several per turn; each increments `turn_seq`). - pub(crate) fn record(&mut self, session_id: &str, payload: &GooseUsageUpdatePayload) { + pub(crate) fn record(&mut self, session_id: &str, payload: &UsageUpdatePayload) { let current_input = payload.accumulated_input_tokens; let current_output = payload.accumulated_output_tokens; let current_cost = payload.accumulated_cost; @@ -220,7 +228,7 @@ impl GooseUsageTracker { // Only publish a pending record if this session is currently in-flight. if self.in_flight_session.as_deref() == Some(session_id) { - self.pending = Some(GooseTurnUsage { + self.pending = Some(TurnUsage { session_id: session_id.to_string(), turn_seq, delta_reliable, @@ -238,10 +246,10 @@ impl GooseUsageTracker { /// clear the in-flight marker. /// /// Returns `None` if no `usage_update` arrived during the current in-flight - /// turn (goose did not emit usage, or no `begin_turn` was called). The + /// turn (the agent did not emit usage, or no `begin_turn` was called). The /// caller (`TurnCompletionGuard`) must handle `None`. #[cfg_attr(not(test), allow(dead_code))] - pub(crate) fn take(&mut self) -> Option { + pub(crate) fn take(&mut self) -> Option { self.in_flight_session = None; self.pending.take() } @@ -251,8 +259,8 @@ impl GooseUsageTracker { mod tests { use super::*; - fn payload(input: u64, output: u64, cost: Option) -> GooseUsageUpdatePayload { - GooseUsageUpdatePayload { + fn payload(input: u64, output: u64, cost: Option) -> UsageUpdatePayload { + UsageUpdatePayload { used: input + output, context_limit: 200_000, accumulated_input_tokens: input, @@ -261,6 +269,16 @@ mod tests { } } + fn payload_no_context(input: u64, output: u64, cost: Option) -> UsageUpdatePayload { + UsageUpdatePayload { + used: 0, + context_limit: 0, + accumulated_input_tokens: input, + accumulated_output_tokens: output, + accumulated_cost: cost, + } + } + // ── Turn scoping: setup notifications must not pollute the first real turn ─ #[test] @@ -268,7 +286,7 @@ mod tests { // Regression: setup notifications fire during session/new (before any // prompt). They must update the baseline but must NOT produce a // publishable record for the next turn. - let mut tracker = GooseUsageTracker::default(); + let mut tracker = UsageTracker::default(); // Simulate a setup notification (no begin_turn called yet). tracker.record("sess-setup", &payload(500, 100, Some(0.005))); @@ -305,7 +323,7 @@ mod tests { fn record_outside_in_flight_does_not_clobber_pending() { // A notification for a different session_id while another is in-flight // must not overwrite the pending record. - let mut tracker = GooseUsageTracker::default(); + let mut tracker = UsageTracker::default(); tracker.begin_turn("sess-a"); tracker.record("sess-a", &payload(1000, 200, None)); @@ -320,7 +338,7 @@ mod tests { #[test] fn first_turn_no_prior_delta_unreliable() { - let mut tracker = GooseUsageTracker::default(); + let mut tracker = UsageTracker::default(); tracker.begin_turn("sess-1"); tracker.record("sess-1", &payload(1000, 200, Some(0.01))); let usage = tracker.take().expect("should have pending usage"); @@ -342,7 +360,7 @@ mod tests { #[test] fn counter_decrease_delta_unreliable_no_negatives() { - let mut tracker = GooseUsageTracker::default(); + let mut tracker = UsageTracker::default(); // Turn 1 — establish baseline. tracker.begin_turn("sess-2"); tracker.record("sess-2", &payload(5000, 1000, Some(0.05))); @@ -368,7 +386,7 @@ mod tests { // Regression for Thufir fix 2: cost counter decrease must set // delta_reliable = false and null all turn fields (not just cost). // turn_seq still increments (NIP-AM: seq advances even on unreliable). - let mut tracker = GooseUsageTracker::default(); + let mut tracker = UsageTracker::default(); // Turn 1 — establish baseline with cost. tracker.begin_turn("sess-cost"); tracker.record("sess-cost", &payload(1000, 200, Some(0.10))); @@ -400,7 +418,7 @@ mod tests { #[test] fn cost_absent_on_one_side_leaves_tokens_reliable() { // Cost merely absent on either side: null cost, reliable tokens. - let mut tracker = GooseUsageTracker::default(); + let mut tracker = UsageTracker::default(); tracker.begin_turn("sess-nocost"); tracker.record("sess-nocost", &payload(1000, 200, Some(0.01))); let _ = tracker.take(); @@ -424,7 +442,7 @@ mod tests { #[test] fn session_restart_new_session_id_treated_as_first_turn() { - let mut tracker = GooseUsageTracker::default(); + let mut tracker = UsageTracker::default(); // Original session. tracker.begin_turn("sess-a"); tracker.record("sess-a", &payload(8000, 2000, None)); @@ -448,7 +466,7 @@ mod tests { #[test] fn second_turn_delta_computed_correctly() { - let mut tracker = GooseUsageTracker::default(); + let mut tracker = UsageTracker::default(); tracker.begin_turn("sess-3"); tracker.record("sess-3", &payload(1000, 200, Some(0.01))); let _ = tracker.take(); @@ -470,7 +488,7 @@ mod tests { #[test] fn take_returns_none_after_drain() { - let mut tracker = GooseUsageTracker::default(); + let mut tracker = UsageTracker::default(); tracker.begin_turn("sess-4"); tracker.record("sess-4", &payload(100, 20, None)); let _ = tracker.take(); @@ -479,7 +497,7 @@ mod tests { #[test] fn last_update_wins_multiple_updates_same_turn() { - let mut tracker = GooseUsageTracker::default(); + let mut tracker = UsageTracker::default(); // Turn 1 — baseline. tracker.begin_turn("sess-5"); tracker.record("sess-5", &payload(1000, 100, None)); @@ -528,6 +546,31 @@ mod tests { } } + #[test] + fn notification_deserializes_without_used_and_context_limit() { + // buzz-agent emits usage_update without used/contextLimit. + let raw = serde_json::json!({ + "sessionId": "buzz-sess", + "update": { + "sessionUpdate": "usage_update", + "accumulatedInputTokens": 500, + "accumulatedOutputTokens": 100 + } + }); + let notif: GooseSessionUpdateNotification = + serde_json::from_value(raw).expect("deserialization"); + match notif.update { + GooseSessionUpdateVariant::UsageUpdate(p) => { + assert_eq!(p.accumulated_input_tokens, 500); + assert_eq!(p.accumulated_output_tokens, 100); + assert_eq!(p.used, 0); + assert_eq!(p.context_limit, 0); + assert!(p.accumulated_cost.is_none()); + } + GooseSessionUpdateVariant::Other => panic!("expected UsageUpdate"), + } + } + #[test] fn other_variant_deserializes_without_error() { let raw = serde_json::json!({ @@ -563,4 +606,66 @@ mod tests { _ => panic!("expected UsageUpdate"), } } + + #[test] + fn buzz_agent_notification_flows_through_tracker() { + // End-to-end: a buzz-agent-shaped usage_update (no used/contextLimit) + // deserializes and flows through UsageTracker to produce correct TurnUsage. + let raw1 = serde_json::json!({ + "sessionId": "buzz-s1", + "update": { + "sessionUpdate": "usage_update", + "accumulatedInputTokens": 300, + "accumulatedOutputTokens": 80 + } + }); + let raw2 = serde_json::json!({ + "sessionId": "buzz-s1", + "update": { + "sessionUpdate": "usage_update", + "accumulatedInputTokens": 700, + "accumulatedOutputTokens": 150 + } + }); + + let mut tracker = UsageTracker::default(); + + // Turn 1 — first turn, delta unreliable. + tracker.begin_turn("buzz-s1"); + let notif1: GooseSessionUpdateNotification = serde_json::from_value(raw1).expect("deser"); + if let GooseSessionUpdateVariant::UsageUpdate(p) = notif1.update { + tracker.record("buzz-s1", &p); + } + let t1 = tracker.take().expect("turn 1"); + assert!(!t1.delta_reliable, "first turn: unreliable"); + assert_eq!(t1.cumulative_input_tokens, 300); + + // Turn 2 — delta reliable. + tracker.begin_turn("buzz-s1"); + let notif2: GooseSessionUpdateNotification = serde_json::from_value(raw2).expect("deser"); + if let GooseSessionUpdateVariant::UsageUpdate(p) = notif2.update { + tracker.record("buzz-s1", &p); + } + let t2 = tracker.take().expect("turn 2"); + assert!(t2.delta_reliable, "second turn: reliable"); + assert_eq!(t2.turn_input_tokens, Some(400)); // 700 - 300 + assert_eq!(t2.turn_output_tokens, Some(70)); // 150 - 80 + } + + #[test] + fn buzz_agent_payload_no_context_fields_processes_correctly() { + // UsageTracker handles payloads with used=0 / context_limit=0 correctly. + let mut tracker = UsageTracker::default(); + tracker.begin_turn("s"); + tracker.record("s", &payload_no_context(1000, 200, None)); + let _ = tracker.take(); + + tracker.begin_turn("s"); + tracker.record("s", &payload_no_context(1500, 300, None)); + let usage = tracker.take().expect("pending"); + + assert!(usage.delta_reliable); + assert_eq!(usage.turn_input_tokens, Some(500)); + assert_eq!(usage.turn_output_tokens, Some(100)); + } } diff --git a/crates/buzz-agent/Cargo.toml b/crates/buzz-agent/Cargo.toml index 5b40f069f..720f0785a 100644 --- a/crates/buzz-agent/Cargo.toml +++ b/crates/buzz-agent/Cargo.toml @@ -43,8 +43,6 @@ hex = { workspace = true } sha2 = { workspace = true } urlencoding = "2" webbrowser = "1" -buzz-core = { workspace = true } -buzz-sdk = { workspace = true } nostr = { workspace = true } chrono = { workspace = true } uuid = { workspace = true } diff --git a/crates/buzz-agent/src/lib.rs b/crates/buzz-agent/src/lib.rs index 1071e40d4..9c97b6b91 100644 --- a/crates/buzz-agent/src/lib.rs +++ b/crates/buzz-agent/src/lib.rs @@ -8,7 +8,6 @@ mod handoff; mod hints; mod llm; mod mcp; -mod metric; pub mod types; mod wire; @@ -31,16 +30,15 @@ use crate::llm::Llm; use crate::mcp::McpRegistry; use crate::types::{ContentBlock, HistoryItem}; use crate::wire::{ - classify, Inbound, InitializeParams, SessionCancelParams, SessionNewParams, - SessionPromptParams, SessionSteerParams, WireMsg, WireSender, INVALID_PARAMS, METHOD_NOT_FOUND, - PARSE_ERROR, + classify, goose_session_update, Inbound, InitializeParams, SessionCancelParams, + SessionNewParams, SessionPromptParams, SessionSteerParams, WireMsg, WireSender, INVALID_PARAMS, + METHOD_NOT_FOUND, PARSE_ERROR, }; struct App { cfg: Config, llm: Arc, sessions: Mutex>, - metric_publisher: Arc, } struct Session { @@ -73,9 +71,12 @@ struct Session { /// with it so the gate can account for history appended since. last_request_history_bytes: Option, effective_system_prompt: Arc, - /// Monotonically increasing per-session turn counter for NIP-AM metric events. - /// Incremented on each `session/prompt` request. - turn_seq: u64, + /// Session-cumulative input tokens across all turns. Sent in the + /// `_goose/unstable/session/update` usage notification so buzz-acp's + /// `UsageTracker` can compute per-turn deltas symmetrically with goose. + accumulated_input_tokens: u64, + /// Session-cumulative output tokens across all turns. + accumulated_output_tokens: u64, } fn die(msg: String) -> ! { @@ -140,7 +141,6 @@ async fn async_main() { cfg, llm, sessions: Mutex::new(HashMap::new()), - metric_publisher: Arc::new(metric::MetricPublisher::from_env()), }); let (wire_tx, wire_rx) = mpsc::channel::(64); let writer = tokio::spawn(wire::writer_task(wire_rx)); @@ -371,7 +371,8 @@ async fn session_new(app: &Arc, id: Value, params: Value, wire_tx: &WireSen last_request_input_tokens: None, last_request_history_bytes: None, effective_system_prompt, - turn_seq: 0, + accumulated_input_tokens: 0, + accumulated_output_tokens: 0, }, ); drop(sessions); @@ -496,7 +497,6 @@ async fn run_prompt(app: Arc, id: Value, params: Value, wire_tx: WireSender effective_system_prompt, run_id, mut steer_rx, - turn_seq, ) = match acquire_session(&app, &p.session_id).await { Ok(v) => v, Err(reason) => { @@ -554,22 +554,50 @@ async fn run_prompt(app: Arc, id: Value, params: Value, wire_tx: WireSender s.last_request_input_tokens = last_request_input_tokens; s.last_request_history_bytes = last_request_history_bytes; } - // Best-effort: publish NIP-AM kind 44200 agent turn metric. Never fails - // the turn — errors are logged at WARN inside MetricPublisher::publish. - let nip_am_stop = match &result { - Ok(stop) => agent_stop_to_nip_am(stop), - Err(_) => buzz_core::agent_turn_metric::StopReason::Error, - }; - app.metric_publisher - .publish( - &sid, - turn_seq, - &run_id, - turn_input_tokens, - turn_output_tokens, - nip_am_stop, + // Update session-cumulative token counters and emit the usage notification + // BEFORE sending the session/prompt response. buzz-acp's UsageTracker + // processes the notification while the turn is still in-flight (i.e. before + // the response triggers take_turn_usage()), which is required for the + // begin_turn gate to recognise it as publishable. + // + // Only emit when at least one token count was observed — a turn with no + // provider response (validation failure, pre-response cancellation) carries + // no information and must not produce a kind 44200 record per NIP-AM. + if turn_input_tokens.is_some() || turn_output_tokens.is_some() { + let (accumulated_in, accumulated_out) = { + let mut sessions = app.sessions.lock().await; + if let Some(s) = sessions.get_mut(&sid) { + s.accumulated_input_tokens = s + .accumulated_input_tokens + .saturating_add(turn_input_tokens.unwrap_or(0)); + s.accumulated_output_tokens = s + .accumulated_output_tokens + .saturating_add(turn_output_tokens.unwrap_or(0)); + (s.accumulated_input_tokens, s.accumulated_output_tokens) + } else { + ( + turn_input_tokens.unwrap_or(0), + turn_output_tokens.unwrap_or(0), + ) + } + }; + wire::send( + &wire_tx, + goose_session_update( + &sid, + json!({ + "sessionUpdate": "usage_update", + // used: total tokens as a context-usage proxy; + // contextLimit: 0 (buzz-agent has no context limit tracking). + "used": accumulated_in.saturating_add(accumulated_out), + "contextLimit": 0u64, + "accumulatedInputTokens": accumulated_in, + "accumulatedOutputTokens": accumulated_out, + }), + ), ) .await; + } match result { Ok(stop) => { wire::send( @@ -600,7 +628,6 @@ async fn acquire_session( Arc, String, mpsc::UnboundedReceiver>, - u64, ), &'static str, > { @@ -622,10 +649,6 @@ async fn acquire_session( s.active_run_id = Some(run_id.clone()); let (steer_tx, steer_rx) = mpsc::unbounded_channel(); s.steer_tx = Some(steer_tx); - // Increment turn sequence number before returning so the metric event - // gets a monotonically increasing counter starting at 1. - s.turn_seq = s.turn_seq.saturating_add(1); - let turn_seq = s.turn_seq; Ok(( s.id.clone(), s.mcp.clone(), @@ -640,7 +663,6 @@ async fn acquire_session( Arc::clone(&s.effective_system_prompt), run_id, steer_rx, - turn_seq, )) } @@ -649,16 +671,3 @@ fn session_token() -> Result { getrandom::fill(&mut b).map_err(|e| format!("rng: getrandom failed: {e}"))?; Ok(b.iter().map(|x| format!("{x:02x}")).collect()) } - -/// Map a buzz-agent `StopReason` to the NIP-AM `StopReason` used in kind 44200 payloads. -fn agent_stop_to_nip_am(r: &crate::types::StopReason) -> buzz_core::agent_turn_metric::StopReason { - use crate::types::StopReason; - use buzz_core::agent_turn_metric::StopReason as CoreStop; - match r { - StopReason::EndTurn => CoreStop::EndTurn, - StopReason::Cancelled => CoreStop::Cancelled, - StopReason::MaxTokens => CoreStop::MaxTokens, - StopReason::MaxTurnRequests => CoreStop::Unknown, - StopReason::Refusal => CoreStop::Unknown, - } -} diff --git a/crates/buzz-agent/src/metric.rs b/crates/buzz-agent/src/metric.rs deleted file mode 100644 index eae9ff1b9..000000000 --- a/crates/buzz-agent/src/metric.rs +++ /dev/null @@ -1,418 +0,0 @@ -//! NIP-AM kind:44200 metric publishing for the buzz-agent harness. -//! -//! Configured from three environment variables: -//! - `BUZZ_PRIVATE_KEY` — agent Nostr private key (nsec or hex). -//! - `BUZZ_RELAY_URL` — relay base URL (`wss://` or `https://`; both accepted). -//! - `BUZZ_AUTH_TAG` — NIP-OA attestation JSON (preferred owner source). -//! Owner is derived by verifying the auth tag against the agent's own pubkey. -//! Falls back to `BUZZ_AGENT_OWNER_PUBKEY` (npub or hex) if auth tag is absent. -//! -//! If any required variable is absent or unparseable, metric publishing is a -//! silent no-op. This mirrors the fail-open policy used throughout the harness. -//! -//! ## Turn tracking -//! -//! buzz-agent has no session-cumulative token counters. Each turn may span -//! multiple LLM rounds (tool calls); per-turn tokens are accumulated across -//! all rounds. `deltaReliable` is always `true` because buzz-agent tracks -//! every round within a turn in-process — no cross-process baseline is ever -//! lost. Session-level cumulative fields are omitted (`None`) because -//! buzz-agent does not maintain running totals across turns in a session. - -use nostr::Keys; -use reqwest::Client; - -/// Resolved configuration for a `MetricPublisher`. Separated from env-parsing -/// so tests can inject values directly without mutating process-global state. -pub(crate) struct MetricConfig { - pub(crate) keys: Keys, - pub(crate) owner_pubkey: nostr::PublicKey, - /// HTTP(S) base URL — ws/wss already normalized to http/https, no trailing - /// slash. - pub(crate) base_url: String, - /// Raw `BUZZ_AUTH_TAG` JSON, forwarded as `x-auth-tag` for attested agents. - pub(crate) auth_tag_json: Option, -} - -/// Configured NIP-AM publisher. Constructed once per process from env vars. -/// When env vars are absent, construction succeeds and `is_noop()` returns -/// `true` — callers need not special-case the unconfigured case. -pub(crate) struct MetricPublisher { - config: Option, - http: Client, -} - -impl MetricPublisher { - /// Build from environment. Silent on parse errors — missing/malformed vars - /// leave the config absent (no-op publisher). - /// - /// Owner resolution priority: - /// 1. `BUZZ_AUTH_TAG` — NIP-OA attestation verified against this agent's - /// pubkey; extracts the owner pubkey from the tag. - /// 2. `BUZZ_AGENT_OWNER_PUBKEY` — explicit hex or npub fallback. - pub(crate) fn from_env() -> Self { - Self { - config: Self::config_from_env(), - http: Client::new(), - } - } - - fn config_from_env() -> Option { - let keys = std::env::var("BUZZ_PRIVATE_KEY") - .ok() - .and_then(|v| Keys::parse(&v).ok())?; - let raw_url = std::env::var("BUZZ_RELAY_URL") - .ok() - .filter(|s| !s.is_empty())?; - let base_url = ws_to_http(raw_url.trim_end_matches('/')); - - // Try BUZZ_AUTH_TAG first. - let (owner_pubkey, auth_tag_json) = match std::env::var("BUZZ_AUTH_TAG") - .ok() - .filter(|s| !s.is_empty()) - { - Some(tag_json) => { - match buzz_sdk::nip_oa::verify_auth_tag(&tag_json, &keys.public_key()) { - Ok(pk) => (pk, Some(tag_json)), - // Auth tag present but verification failed — fall through. - Err(_) => resolve_explicit_owner()?, - } - } - None => resolve_explicit_owner()?, - }; - - Some(MetricConfig { - keys, - owner_pubkey, - base_url, - auth_tag_json, - }) - } - - /// Build from an explicit config (test helper — avoids process-env mutation). - #[cfg(test)] - pub(crate) fn from_config(config: MetricConfig) -> Self { - Self { - config: Some(config), - http: Client::new(), - } - } - - /// Returns `true` when no complete config is available. Publishing is - /// always a no-op in this state. - #[cfg(test)] - pub(crate) fn is_noop(&self) -> bool { - self.config.is_none() - } - - /// Best-effort publish a kind 44200 event. - /// - /// - `session_id` — the ACP session id for this turn. - /// - `turn_seq` — monotonically increasing per-session turn counter. - /// - `turn_id` — the run id for this turn (harness-internal). - /// - `input_tokens` / `output_tokens` — summed across all LLM rounds in the turn. - /// - `stop_reason` — the NIP-AM stop reason. - /// - /// No-op when no usage was observed (`input_tokens` and `output_tokens` - /// both `None`) — per NIP-AM § "Do NOT publish an event for a turn with no - /// observed usage". - /// - /// Errors are logged at WARN and never propagated — a metric publish - /// failure must never fail a turn. - pub(crate) async fn publish( - &self, - session_id: &str, - turn_seq: u64, - turn_id: &str, - input_tokens: Option, - output_tokens: Option, - stop_reason: buzz_core::agent_turn_metric::StopReason, - ) { - use buzz_core::agent_turn_metric::{AgentTurnMetricPayload, TokenCounts}; - use nostr::{EventBuilder, Kind, Tag}; - - // No usage observed — NIP-AM forbids publishing an all-null metric. - if input_tokens.is_none() && output_tokens.is_none() { - return; - } - - let MetricConfig { - keys, - owner_pubkey: owner_pk, - base_url, - auth_tag_json, - } = match &self.config { - Some(c) => c, - None => return, - }; - - // buzz-agent has no session-cumulative counters — only per-turn deltas. - // deltaReliable is true because we sum every round in this process; - // no cross-process baseline is ever lost. Cumulative fields are omitted - // since buzz-agent does not track rolling session totals across turns. - let turn_counts = Some(TokenCounts { - input_tokens, - output_tokens, - total_tokens: None, - cost_usd: None, - cache_read_tokens: None, - cache_write_tokens: None, - }); - - let timestamp = chrono::Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Millis, true); - let payload = AgentTurnMetricPayload { - harness: "buzz-agent".to_string(), - model: None, - channel_id: None, - session_id: Some(session_id.to_string()), - turn_id: Some(turn_id.to_string()), - turn_seq: Some(turn_seq), - timestamp, - turn: turn_counts, - cumulative: None, - delta_reliable: true, - stop_reason: Some(stop_reason), - }; - - let ciphertext = - match buzz_core::agent_turn_metric::encrypt_agent_turn_metric(keys, owner_pk, &payload) - { - Ok(c) => c, - Err(e) => { - tracing::warn!( - target: "buzz_agent::metrics", - session_id, - turn_id, - "NIP-AM: encrypt failed: {e}" - ); - return; - } - }; - - let agent_hex = keys.public_key().to_hex(); - let owner_hex = owner_pk.to_hex(); - let event = match EventBuilder::new( - Kind::Custom(buzz_core::kind::KIND_AGENT_TURN_METRIC as u16), - ciphertext, - ) - .tags([ - Tag::parse(["p", &owner_hex]).expect("p tag"), - Tag::parse(["agent", &agent_hex]).expect("agent tag"), - ]) - .sign_with_keys(keys) - { - Ok(e) => e, - Err(e) => { - tracing::warn!( - target: "buzz_agent::metrics", - session_id, - turn_id, - "NIP-AM: sign failed: {e}" - ); - return; - } - }; - - let body_bytes = match serde_json::to_vec(&event) { - Ok(b) => b, - Err(e) => { - tracing::warn!( - target: "buzz_agent::metrics", - session_id, - "NIP-AM: serialize failed: {e}" - ); - return; - } - }; - - let url = format!("{base_url}/events"); - let auth_header = match nip98_auth(keys, "POST", &url, Some(&body_bytes)) { - Ok(h) => h, - Err(e) => { - tracing::warn!( - target: "buzz_agent::metrics", - session_id, - "NIP-AM: NIP-98 auth failed: {e}" - ); - return; - } - }; - - const METRIC_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(3); - let mut req = self - .http - .post(&url) - .header("Authorization", auth_header) - .header("Content-Type", "application/json"); - if let Some(tag) = auth_tag_json { - req = req.header("x-auth-tag", tag); - } - match tokio::time::timeout(METRIC_TIMEOUT, req.body(body_bytes).send()).await { - Ok(Ok(resp)) if resp.status().is_success() => {} - Ok(Ok(resp)) => tracing::warn!( - target: "buzz_agent::metrics", - session_id, - turn_id, - "NIP-AM: publish HTTP {}", resp.status() - ), - Ok(Err(e)) => tracing::warn!( - target: "buzz_agent::metrics", - session_id, - turn_id, - "NIP-AM: publish failed: {e}" - ), - Err(_) => tracing::warn!( - target: "buzz_agent::metrics", - session_id, - turn_id, - "NIP-AM: publish timed out" - ), - } - } -} - -/// Normalize `ws://` / `wss://` relay URLs to `http://` / `https://`. -/// Pass-through for URLs that are already HTTP(S). -fn ws_to_http(url: &str) -> String { - url.replace("wss://", "https://") - .replace("ws://", "http://") - .to_string() -} - -/// Parse `BUZZ_AGENT_OWNER_PUBKEY` as the explicit owner fallback. -/// Returns `(pubkey, None)` on success, `None` if the var is absent/invalid. -fn resolve_explicit_owner() -> Option<(nostr::PublicKey, Option)> { - let pk = std::env::var("BUZZ_AGENT_OWNER_PUBKEY") - .ok() - .and_then(|v| nostr::PublicKey::parse(&v).ok())?; - Some((pk, None)) -} - -/// Build a NIP-98 HTTP Auth `Authorization` header value: `Nostr `. -fn nip98_auth(keys: &Keys, method: &str, url: &str, body: Option<&[u8]>) -> Result { - use base64::Engine; - use nostr::{EventBuilder, Kind, Tag}; - use sha2::{Digest, Sha256}; - - let u_tag = Tag::parse(["u", url]).map_err(|e| e.to_string())?; - let method_tag = Tag::parse(["method", method]).map_err(|e| e.to_string())?; - let nonce_tag = - Tag::parse(["nonce", &uuid::Uuid::new_v4().to_string()]).map_err(|e| e.to_string())?; - let mut tags = vec![u_tag, method_tag, nonce_tag]; - if let Some(b) = body { - let hash = hex::encode(Sha256::digest(b)); - let payload_tag = Tag::parse(["payload", &hash]).map_err(|e| e.to_string())?; - tags.push(payload_tag); - } - let event = EventBuilder::new(Kind::HttpAuth, "") - .tags(tags) - .sign_with_keys(keys) - .map_err(|e| e.to_string())?; - let json = serde_json::to_string(&event).map_err(|e| e.to_string())?; - Ok(format!( - "Nostr {}", - base64::engine::general_purpose::STANDARD.encode(json) - )) -} - -#[cfg(test)] -mod tests { - use super::*; - use nostr::Keys; - - fn make_config(owner_keys: &Keys) -> MetricConfig { - MetricConfig { - keys: Keys::generate(), - owner_pubkey: owner_keys.public_key(), - base_url: "https://relay.example.com".to_string(), - auth_tag_json: None, - } - } - - /// A publisher built from an explicit config is not a no-op. - #[test] - fn test_metric_publisher_configured_when_config_injected() { - let owner_keys = Keys::generate(); - let p = MetricPublisher::from_config(make_config(&owner_keys)); - assert!( - !p.is_noop(), - "publisher must not be noop when config is set" - ); - } - - /// A publisher with no config (None) is a no-op. - #[test] - fn test_metric_publisher_noop_when_no_config() { - let p = MetricPublisher { - config: None, - http: Client::new(), - }; - assert!(p.is_noop(), "publisher must be noop when config is None"); - } - - /// When both token fields are None, publish returns without building/sending - /// an event. Verified by the absence of a panic or network call (we use an - /// invalid URL so any real HTTP attempt would error — silence is the proof). - #[tokio::test] - async fn test_publish_noop_when_no_usage_observed() { - let owner_keys = Keys::generate(); - let mut config = make_config(&owner_keys); - // Use an unreachable URL — if any HTTP request were made it would fail - // visibly. The test must complete silently. - config.base_url = "https://127.0.0.1:1".to_string(); - let p = MetricPublisher::from_config(config); - // Both tokens absent → must return before any encrypt/send attempt. - p.publish( - "session-1", - 0, - "turn-1", - None, - None, - buzz_core::agent_turn_metric::StopReason::EndTurn, - ) - .await; - // If we reach here without error, the no-usage guard fired correctly. - } - - /// ws:// URL is normalized to http://. - #[test] - fn test_ws_to_http_plain() { - assert_eq!( - ws_to_http("ws://relay.example.com"), - "http://relay.example.com" - ); - } - - /// wss:// URL is normalized to https://. - #[test] - fn test_ws_to_http_secure() { - assert_eq!( - ws_to_http("wss://relay.example.com"), - "https://relay.example.com" - ); - } - - /// https:// URLs pass through unchanged. - #[test] - fn test_ws_to_http_passthrough() { - assert_eq!( - ws_to_http("https://relay.example.com"), - "https://relay.example.com" - ); - } - - /// Auth tag JSON is forwarded in the `x-auth-tag` header field of the - /// config. Verify it round-trips through the config struct intact. - #[test] - fn test_auth_tag_json_stored_in_config() { - let tag_json = r#"["auth","deadbeef","*","sig"]"#; - let owner_keys = Keys::generate(); - let config = MetricConfig { - keys: Keys::generate(), - owner_pubkey: owner_keys.public_key(), - base_url: "https://relay.example.com".to_string(), - auth_tag_json: Some(tag_json.to_string()), - }; - assert_eq!(config.auth_tag_json.as_deref(), Some(tag_json)); - } -} diff --git a/crates/buzz-agent/src/wire.rs b/crates/buzz-agent/src/wire.rs index 7c164724d..9d9bd69fe 100644 --- a/crates/buzz-agent/src/wire.rs +++ b/crates/buzz-agent/src/wire.rs @@ -126,6 +126,18 @@ pub fn session_update(sid: &str, update: Value) -> Value { }) } +/// A `_goose/unstable/session/update` notification — the separate top-level +/// method goose uses for custom usage and status events. Used by buzz-agent +/// to emit the `usage_update` payload so buzz-acp's `UsageTracker` can treat +/// buzz-agent and goose symmetrically. +pub fn goose_session_update(sid: &str, update: Value) -> Value { + json!({ + "jsonrpc": "2.0", + "method": "_goose/unstable/session/update", + "params": { "sessionId": sid, "update": update }, + }) +} + /// A `session/update` notification carrying a `update._meta.goose.` field. /// Used to advertise `activeRunId` (so steer-capable clients can target the /// in-flight run) and `queuedSteer` (so they can correlate an accepted steer From ac424203a8a1f7f46ca7f577c60cda36bab9caf5 Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Thu, 2 Jul 2026 12:36:04 -0400 Subject: [PATCH 13/21] test(buzz-agent): add producer-contract tests for usage notification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thufir pass-1 finding: the _goose/unstable/session/update usage_update emission path in lib.rs had no integration coverage. A typo in method name, field name, ordering, or accumulation would ship green. Three tests added to fake_llm.rs: - usage_notification_emitted_before_prompt_response: two sequential turns with canned usage assert the notification arrives before each session/prompt response, with cumulative accumulatedInputTokens / accumulatedOutputTokens (turn 2 verifies 10+20=30, 5+8=13). - no_usage_turn_emits_no_usage_notification: provider response with no usage block → assert no _goose/unstable/session/update frame appears before the response. - cancelled_turn_with_usage_emits_notification_before_response: round 1 is a tool call with usage (tokens captured); cancel fires after the tool_call_update (round 1 LLM response processed); assert usage notification precedes the cancelled/error turn response. Helpers added: openai_text_with_usage, openai_tool_call_with_usage, is_usage_update, recv_until_with_drain. Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-agent/tests/fake_llm.rs | 279 ++++++++++++++++++++++++++++ 1 file changed, 279 insertions(+) diff --git a/crates/buzz-agent/tests/fake_llm.rs b/crates/buzz-agent/tests/fake_llm.rs index a1791e7d6..4476652b9 100644 --- a/crates/buzz-agent/tests/fake_llm.rs +++ b/crates/buzz-agent/tests/fake_llm.rs @@ -750,6 +750,285 @@ async fn steer_rejected_on_run_id_mismatch() { h.shutdown().await; } +// ─── Usage notification (_goose/unstable/session/update usage_update) ─────── + +/// An OpenAI chat completion response with a `usage` block (prompt_tokens + +/// completion_tokens). buzz-agent maps these to `accumulatedInputTokens` / +/// `accumulatedOutputTokens` in the `_goose/unstable/session/update` notification. +fn openai_text_with_usage(content: &str, input_tokens: u64, output_tokens: u64) -> Value { + json!({ + "id": "cc-u", "object": "chat.completion", "model": "fake-model", + "choices": [{ + "index": 0, + "message": { "role": "assistant", "content": content }, + "finish_reason": "stop", + }], + "usage": { + "prompt_tokens": input_tokens, + "completion_tokens": output_tokens, + "total_tokens": input_tokens + output_tokens, + }, + }) +} + +/// Returns true when `v` is a `_goose/unstable/session/update` usage_update +/// notification. +fn is_usage_update(v: &Value) -> bool { + v.get("method") == Some(&json!("_goose/unstable/session/update")) + && v["params"]["update"]["sessionUpdate"] == "usage_update" +} + +/// Collect every frame that arrives BEFORE the message matching `until_pred`, +/// then return (frames_before, matching_frame). +async fn recv_until_with_drain(h: &mut Harness, mut until_pred: F) -> (Vec, Value) +where + F: FnMut(&Value) -> bool, +{ + let mut before = Vec::new(); + loop { + let v = h.recv().await; + if until_pred(&v) { + return (before, v); + } + before.push(v); + } +} + +/// buzz-agent must emit `_goose/unstable/session/update` with `sessionUpdate: +/// "usage_update"` **before** the `session/prompt` response on each turn, and +/// must accumulate counters across turns (turn 2 reports turn1+turn2 sums). +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn usage_notification_emitted_before_prompt_response() { + let url = spawn_fake_llm(vec![ + openai_text_with_usage("turn one reply", 10, 5), + openai_text_with_usage("turn two reply", 20, 8), + ]) + .await; + let mut h = Harness::spawn(&url).await; + let sid = init_session(&mut h).await; + + // ── Turn 1 ────────────────────────────────────────────────────────────── + let p1 = h + .send( + "session/prompt", + json!({"sessionId": sid, "prompt": [{"type":"text","text":"turn 1"}]}), + ) + .await; + + let (frames_before_t1, response_t1) = recv_until_with_drain(&mut h, |v| v["id"] == p1).await; + assert_eq!( + response_t1["result"]["stopReason"], "end_turn", + "turn 1 must complete with end_turn" + ); + + // A usage_update notification must appear in the frames before the response. + let usage_t1 = frames_before_t1 + .iter() + .find(|v| is_usage_update(v)) + .unwrap_or_else(|| { + panic!( + "expected _goose/unstable/session/update usage_update before turn-1 response; frames: {frames_before_t1:#?}" + ) + }); + assert_eq!( + usage_t1["params"]["update"]["sessionUpdate"], "usage_update", + "sessionUpdate field must be 'usage_update'" + ); + assert_eq!( + usage_t1["params"]["update"]["accumulatedInputTokens"], + json!(10u64), + "turn 1 accumulated input tokens" + ); + assert_eq!( + usage_t1["params"]["update"]["accumulatedOutputTokens"], + json!(5u64), + "turn 1 accumulated output tokens" + ); + + // ── Turn 2 ────────────────────────────────────────────────────────────── + let p2 = h + .send( + "session/prompt", + json!({"sessionId": sid, "prompt": [{"type":"text","text":"turn 2"}]}), + ) + .await; + + let (frames_before_t2, response_t2) = recv_until_with_drain(&mut h, |v| v["id"] == p2).await; + assert_eq!( + response_t2["result"]["stopReason"], "end_turn", + "turn 2 must complete with end_turn" + ); + + // Notification arrives before the response, with cumulative sums (10+20, 5+8). + let usage_t2 = frames_before_t2 + .iter() + .find(|v| is_usage_update(v)) + .unwrap_or_else(|| { + panic!( + "expected _goose/unstable/session/update usage_update before turn-2 response; frames: {frames_before_t2:#?}" + ) + }); + assert_eq!( + usage_t2["params"]["update"]["accumulatedInputTokens"], + json!(30u64), + "turn 2 accumulated input tokens must be 10+20=30" + ); + assert_eq!( + usage_t2["params"]["update"]["accumulatedOutputTokens"], + json!(13u64), + "turn 2 accumulated output tokens must be 5+8=13" + ); + + h.shutdown().await; +} + +/// When the provider returns a response with no `usage` block, buzz-agent must +/// NOT emit a `_goose/unstable/session/update` notification for that turn. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn no_usage_turn_emits_no_usage_notification() { + let url = spawn_fake_llm(vec![openai_text("no usage here")]).await; + let mut h = Harness::spawn(&url).await; + let sid = init_session(&mut h).await; + + let p_id = h + .send( + "session/prompt", + json!({"sessionId": sid, "prompt": [{"type":"text","text":"go"}]}), + ) + .await; + + let (frames_before, response) = recv_until_with_drain(&mut h, |v| v["id"] == p_id).await; + assert_eq!( + response["result"]["stopReason"], "end_turn", + "turn must complete with end_turn" + ); + + // No usage notification must appear in the frames before the response. + let found = frames_before.iter().any(|v| is_usage_update(v)); + assert!( + !found, + "expected NO usage_update notification when provider reports no usage; frames: {frames_before:#?}" + ); + + h.shutdown().await; +} + +/// When a turn is cancelled AFTER the provider has already returned a response +/// (so token counts are observed), buzz-agent must still emit the usage +/// notification before the cancelled `session/prompt` response. +/// +/// Setup: round 1 is a tool call WITH usage (tokens are captured). The agent +/// sends the cancel before round 2's LLM call, so the turn exits with +/// `stopReason: "cancelled"`. The usage notification must precede that response. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn cancelled_turn_with_usage_emits_notification_before_response() { + // Round 1: tool call with usage — sets turn_input/output_tokens. + // Round 2 never starts because cancel fires at the round boundary. + let url = spawn_fake_llm(vec![openai_tool_call_with_usage( + "call_cancel_test", + "fake__noop", + json!({}), + 15, + 6, + )]) + .await; + let mut h = Harness::spawn(&url).await; + let sid = init_session(&mut h).await; + + let p_id = h + .send( + "session/prompt", + json!({"sessionId": sid, "prompt": [{"type":"text","text":"start work"}]}), + ) + .await; + + // Wait for the activeRunId advert (agent is live) then send cancel. + let _run_id = recv_active_run_id(&mut h).await; + // Wait for the tool_call_update (failed — unknown tool) so we know round 1 + // LLM response has been processed and tokens are captured, THEN cancel. + h.recv_until(|v| { + v.get("method") == Some(&json!("session/update")) + && v["params"]["update"]["sessionUpdate"] == "tool_call_update" + }) + .await; + let c_id = h.send("session/cancel", json!({"sessionId": sid})).await; + // Drain remaining frames; the cancel OK and the prompt response both arrive. + let mut saw_usage_before_prompt_response = false; + let mut saw_usage = false; + let mut saw_cancel_ok = false; + let mut saw_prompt_response = false; + for _ in 0..20 { + let v = h.recv().await; + if v["id"] == json!(c_id) { + // cancel acknowledged + saw_cancel_ok = true; + } else if is_usage_update(&v) { + saw_usage = true; + // Record that usage arrived before the prompt response (if it hasn't yet). + if !saw_prompt_response { + saw_usage_before_prompt_response = true; + } + } else if v["id"] == json!(p_id) { + saw_prompt_response = true; + // The prompt response is either a result (stopReason: cancelled or + // end_turn) or an error (if cancel races with round 2's LLM call + // returning no-more-responses). Both are acceptable — we only care + // that the usage notification precedes whichever frame terminates + // the turn. + let has_result = v.get("result").is_some(); + let has_error = v.get("error").is_some(); + assert!( + has_result || has_error, + "expected result or error on prompt response, got: {v}" + ); + } + if saw_usage && saw_prompt_response { + break; + } + } + assert!(saw_cancel_ok, "session/cancel was not acknowledged"); + assert!( + saw_usage, + "expected usage_update notification for cancelled turn with observed tokens" + ); + assert!( + saw_usage_before_prompt_response, + "usage_update must arrive before the session/prompt response" + ); + + h.shutdown().await; +} + +/// A tool-call OpenAI response with a `usage` block. Used to capture tokens in +/// round 1 before a cancel fires at the round boundary. +fn openai_tool_call_with_usage( + id: &str, + name: &str, + args: Value, + input_tokens: u64, + output_tokens: u64, +) -> Value { + json!({ + "id": "cc-u2", "object": "chat.completion", "model": "fake-model", + "choices": [{ + "index": 0, + "message": { + "role": "assistant", "content": null, + "tool_calls": [{ + "id": id, "type": "function", + "function": { "name": name, "arguments": args.to_string() }, + }], + }, + "finish_reason": "tool_calls", + }], + "usage": { + "prompt_tokens": input_tokens, + "completion_tokens": output_tokens, + "total_tokens": input_tokens + output_tokens, + }, + }) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn steer_rejected_on_empty_prompt() { let (url, _captures) = spawn_capturing_fake_llm(vec![ From e6b3f45249af87c658bf2d6800d5c5c39a944313 Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Thu, 2 Jul 2026 12:45:54 -0400 Subject: [PATCH 14/21] test(buzz-agent): fix flake in cancelled_turn_with_usage test Under full-package parallel load the cancel ack (c_id response) could arrive after the prompt response, causing the loop to exit before saw_cancel_ok was set and triggering the assert. Fix: widen the frame budget from 20 to 40 and require all three flags (saw_usage && saw_prompt_response && saw_cancel_ok) before breaking. Verified 10/10 green under cargo test -p buzz-agent (full package). Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-agent/tests/fake_llm.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/buzz-agent/tests/fake_llm.rs b/crates/buzz-agent/tests/fake_llm.rs index 4476652b9..5d7732afe 100644 --- a/crates/buzz-agent/tests/fake_llm.rs +++ b/crates/buzz-agent/tests/fake_llm.rs @@ -957,7 +957,7 @@ async fn cancelled_turn_with_usage_emits_notification_before_response() { let mut saw_usage = false; let mut saw_cancel_ok = false; let mut saw_prompt_response = false; - for _ in 0..20 { + for _ in 0..40 { let v = h.recv().await; if v["id"] == json!(c_id) { // cancel acknowledged @@ -982,7 +982,7 @@ async fn cancelled_turn_with_usage_emits_notification_before_response() { "expected result or error on prompt response, got: {v}" ); } - if saw_usage && saw_prompt_response { + if saw_usage && saw_prompt_response && saw_cancel_ok { break; } } From a0a6ce44caec32aa6a8f1b49c7fd4f0a4d1c9c05 Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Thu, 2 Jul 2026 13:44:40 -0400 Subject: [PATCH 15/21] fix(lint): remove redundant closure in no_usage_turn test clippy: :redundant_closure: |v| is_usage_update(v) -> is_usage_update. Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-agent/tests/fake_llm.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/buzz-agent/tests/fake_llm.rs b/crates/buzz-agent/tests/fake_llm.rs index 5d7732afe..20ae5bd0a 100644 --- a/crates/buzz-agent/tests/fake_llm.rs +++ b/crates/buzz-agent/tests/fake_llm.rs @@ -904,7 +904,7 @@ async fn no_usage_turn_emits_no_usage_notification() { ); // No usage notification must appear in the frames before the response. - let found = frames_before.iter().any(|v| is_usage_update(v)); + let found = frames_before.iter().any(is_usage_update); assert!( !found, "expected NO usage_update notification when provider reports no usage; frames: {frames_before:#?}" From d6a1913bebdf61bface091b7975a23ef67599826 Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Fri, 3 Jul 2026 12:50:39 -0400 Subject: [PATCH 16/21] fix(nip-am): address Eva+Wren review findings (migration, delta, cost validation, spec) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Finding 1 (blocker): revert 0001_initial_schema.sql to pristine origin/main state; add additive migrations/0004_agent_turn_metric_fts.sql that DROPs and re-ADDs the search_tsv generated column with the kind-44200 exclusion, following the 0002/0003 pattern. Brownfield relays no longer hit VersionMismatch on startup. Update migration.rs test to assert len=4, migrations[3].version=4, 0001 !contains '44200'. Finding 2 (blocker): fix UsageTracker per-turn delta undercount and wrong turnSeq. Separate the committed baseline (advanced only on take()) from the in-flight cumulative (updated on each record()). Delta is now always measured from the end of the previous published turn, not from the last intermediate notification. turnSeq is now per-published-metric, constant within a turn. Rewrite last_update_wins_multiple_updates_same_turn to assert correct semantics: two in-turn updates 1500/2000 against baseline 1000 → delta 1000/150, seq=2; add turn-3 to prove seq increments per publish, not per notification. Finding 3 (small): add AgentTurnMetricPayload::validate() that rejects negative or non-finite (NaN/inf) costUsd in turn or cumulative. Called from encrypt_agent_turn_metric() so bad payloads can never be published. Add ObserverPayloadError::InvalidPayload variant. Seven new unit tests cover negative, NaN, infinite, and valid cases. Finding 4 (small): two NIP-AM.md wording fixes — (a) unauthenticated → 'AUTH required', authenticated wrong-owner → 'restricted:' (matches code); (b) remove claim that 44200 is regular per NIP-01 (NIP-01 tops out at 10000), say 'regular by Buzz convention (alongside 44100/44101)' instead. E2E: 21/21 assertions pass (fresh keypairs, fresh channel, real relay+acp+agent binaries). Token semantics unchanged for the single-notification-per-turn case. Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-acp/src/usage.rs | 142 ++++++++++++++----- crates/buzz-core/src/agent_turn_metric.rs | 161 ++++++++++++++++++++++ crates/buzz-core/src/observer.rs | 3 + crates/buzz-db/src/migration.rs | 17 ++- docs/nips/NIP-AM.md | 8 +- migrations/0001_initial_schema.sql | 6 +- migrations/0004_agent_turn_metric_fts.sql | 33 +++++ schema/schema.sql | 2 +- 8 files changed, 325 insertions(+), 47 deletions(-) create mode 100644 migrations/0004_agent_turn_metric_fts.sql diff --git a/crates/buzz-acp/src/usage.rs b/crates/buzz-acp/src/usage.rs index db28d6570..b6c9dd3fc 100644 --- a/crates/buzz-acp/src/usage.rs +++ b/crates/buzz-acp/src/usage.rs @@ -19,6 +19,15 @@ //! 3. **Session restart** (caller supplies a new `session_id` not seen //! before): treated as case 1 — fresh baseline, no delta for this turn. //! +//! Goose may emit **multiple** `usage_update` notifications per turn. The +//! tracker handles this correctly: the committed baseline (and `turn_seq`) +//! advance only when `take()` is called (i.e. at publish time), never on +//! individual notifications. Within a turn all notifications measure their +//! delta from the same frozen baseline — the end of the previous published +//! turn — so the final `pending` record always reflects the full +//! previous-published→current-final delta regardless of how many +//! intermediate notifications arrived. +//! //! The `TurnUsage` produced after each turn is consumed by the //! `TurnCompletionGuard` in `pool.rs` to publish a kind 44200 relay event. @@ -82,14 +91,17 @@ pub(crate) struct UsageUpdatePayload { /// Per-session normalization state: the last cumulative snapshot we saw. #[derive(Debug, Clone)] struct SessionState { - /// Monotonically increasing per-session turn counter (1-based, incremented - /// on every recorded update). - turn_seq: u64, - /// Cumulative input tokens at the end of the previous turn. + /// Per-session turn counter for the LAST PUBLISHED metric (1-based). + /// Advanced only when `take()` drains a pending record — not on every + /// `record()` call. This ensures `turnSeq` counts published metrics, not + /// usage-update notifications. + published_seq: u64, + /// Cumulative input tokens at the end of the LAST PUBLISHED turn. + /// Advanced only on publish (i.e. in `take()`), not on every notification. last_input: u64, - /// Cumulative output tokens at the end of the previous turn. + /// Cumulative output tokens at the end of the LAST PUBLISHED turn. last_output: u64, - /// Cumulative cost at the end of the previous turn. + /// Cumulative cost at the end of the LAST PUBLISHED turn. last_cost: Option, } @@ -131,12 +143,16 @@ pub struct TurnUsage { /// `session/new` setup) will still update the cumulative baseline but will /// NOT produce a publishable record. /// 2. **`record(session_id, payload)`** — called for each -/// `_goose/unstable/session/update` notification. Always updates the -/// cumulative baseline; only produces a publishable record when a turn is -/// currently in-flight for the matching session. +/// `_goose/unstable/session/update` notification. When in-flight, updates +/// `pending` with the latest cumulative values and a delta measured from +/// the committed baseline (end of the previous published turn). Multiple +/// notifications per turn are fine — the last one wins and `turn_seq` stays +/// constant within the turn. When not in-flight, advances the committed +/// baseline so the next turn can compute a correct delta. /// 3. **`take()`** — called at turn completion by `TurnCompletionGuard`. /// Drains and returns the pending record (or `None` if no usage was emitted -/// for this turn) and clears the in-flight marker. +/// for this turn), clears the in-flight marker, and advances the committed +/// baseline so the next `record()` call measures from here. #[derive(Debug, Default)] pub(crate) struct UsageTracker { /// One entry per goose `sessionId` ever seen in this process. @@ -172,13 +188,22 @@ impl UsageTracker { /// `None` or refers to a different session, the baseline is updated but /// `pending` is left unchanged. /// - /// When multiple notifications arrive during the same turn, the last one - /// wins (goose may emit several per turn; each increments `turn_seq`). + /// When multiple notifications arrive during the same turn, the **last one + /// wins** on the cumulative totals, and the delta is always measured from + /// the baseline at the end of the **previous published turn** — not from an + /// intermediate notification within the current turn. `turn_seq` stays + /// constant across all notifications within one turn and only increments + /// when a record is actually published (i.e. when `take()` is called). pub(crate) fn record(&mut self, session_id: &str, payload: &UsageUpdatePayload) { let current_input = payload.accumulated_input_tokens; let current_output = payload.accumulated_output_tokens; let current_cost = payload.accumulated_cost; + // Determine whether this session is currently in-flight so we know + // whether to set `pending`. We compute the delta regardless so that + // setup notifications (no in-flight turn) still advance the baseline. + let is_in_flight = self.in_flight_session.as_deref() == Some(session_id); + let (delta_reliable, turn_input, turn_output, turn_cost, turn_seq) = match self.sessions.get(session_id) { None => { @@ -186,7 +211,10 @@ impl UsageTracker { (false, None, None, None, 1u64) } Some(prev) => { - let seq = prev.turn_seq + 1; + // turn_seq for this pending record is one above the last + // *published* seq — constant for all notifications in this + // turn, advanced only on publish. + let seq = prev.published_seq + 1; // Token counter decrease → unreliable delta. if current_input < prev.last_input || current_output < prev.last_output { (false, None, None, None, seq) @@ -214,20 +242,9 @@ impl UsageTracker { } }; - // Always advance the session baseline so the next in-flight turn can - // compute a correct delta even if this notification is from setup. - self.sessions.insert( - session_id.to_string(), - SessionState { - turn_seq, - last_input: current_input, - last_output: current_output, - last_cost: current_cost, - }, - ); - - // Only publish a pending record if this session is currently in-flight. - if self.in_flight_session.as_deref() == Some(session_id) { + if is_in_flight { + // Update the pending record with the latest cumulative values. + // Baseline is NOT advanced here — it advances only on take(). self.pending = Some(TurnUsage { session_id: session_id.to_string(), turn_seq, @@ -239,11 +256,28 @@ impl UsageTracker { cumulative_output_tokens: current_output, cumulative_cost_usd: current_cost, }); + } else { + // Not in-flight: advance the committed baseline so the next + // in-flight turn computes its delta from this notification. + // This handles setup notifications that fire during `session/new` + // before the first `begin_turn`. + self.sessions.insert( + session_id.to_string(), + SessionState { + published_seq: match self.sessions.get(session_id) { + Some(s) => s.published_seq, + None => 0, + }, + last_input: current_input, + last_output: current_output, + last_cost: current_cost, + }, + ); } } /// Consume and return the most recently computed turn usage record, then - /// clear the in-flight marker. + /// clear the in-flight marker and advance the committed baseline. /// /// Returns `None` if no `usage_update` arrived during the current in-flight /// turn (the agent did not emit usage, or no `begin_turn` was called). The @@ -251,7 +285,19 @@ impl UsageTracker { #[cfg_attr(not(test), allow(dead_code))] pub(crate) fn take(&mut self) -> Option { self.in_flight_session = None; - self.pending.take() + let record = self.pending.take()?; + // Advance the committed baseline to this published record so the + // *next* turn measures its delta from here. + self.sessions.insert( + record.session_id.clone(), + SessionState { + published_seq: record.turn_seq, + last_input: record.cumulative_input_tokens, + last_output: record.cumulative_output_tokens, + last_cost: record.cumulative_cost_usd, + }, + ); + Some(record) } } @@ -497,25 +543,45 @@ mod tests { #[test] fn last_update_wins_multiple_updates_same_turn() { + // Goose emits multiple usage_update notifications per turn. The tracker + // must: + // (a) use the LAST notification's cumulative values, + // (b) measure the delta from the baseline at the END OF THE PREVIOUS + // PUBLISHED TURN (not from intermediate notifications), and + // (c) keep turn_seq constant across all notifications within the turn + // (incrementing only on publish, not on each notification). let mut tracker = UsageTracker::default(); - // Turn 1 — baseline. + // Turn 1 — establish baseline. After take(), committed baseline = 1000/100. tracker.begin_turn("sess-5"); tracker.record("sess-5", &payload(1000, 100, None)); - let _ = tracker.take(); + let t1 = tracker.take().expect("turn 1"); + assert_eq!(t1.turn_seq, 1); - // Two updates arrive before take() — each advances state independently; - // the second delta is computed from the first update's snapshot. + // Turn 2 — two notifications arrive before take(). The second overwrites + // the first in pending; delta is measured from the committed baseline + // (1000/100), not from the intermediate snapshot (1500/150). tracker.begin_turn("sess-5"); tracker.record("sess-5", &payload(1500, 150, None)); tracker.record("sess-5", &payload(2000, 250, None)); - let usage = tracker.take().expect("pending"); + let usage = tracker.take().expect("turn 2"); - // Cumulative from the last update. + // Cumulative from the last notification. assert_eq!(usage.cumulative_input_tokens, 2000); assert_eq!(usage.cumulative_output_tokens, 250); - // Delta is from the previous intermediate snapshot (1500, 150) → (2000, 250). - assert_eq!(usage.turn_input_tokens, Some(500)); - assert_eq!(usage.turn_output_tokens, Some(100)); + // Delta is from committed baseline (1000, 100) → (2000, 250) = 1000/150. + assert_eq!(usage.turn_input_tokens, Some(1000)); + assert_eq!(usage.turn_output_tokens, Some(150)); + // seq increments once per publish, not once per notification. + assert_eq!(usage.turn_seq, 2); + + // Turn 3 — prove seq continues to increment per publish, not per notification. + tracker.begin_turn("sess-5"); + tracker.record("sess-5", &payload(2300, 290, None)); + let t3 = tracker.take().expect("turn 3"); + assert_eq!(t3.turn_seq, 3); + // Delta from turn-2 committed baseline (2000, 250). + assert_eq!(t3.turn_input_tokens, Some(300)); + assert_eq!(t3.turn_output_tokens, Some(40)); } // ── Wire deserialization ──────────────────────────────────────────────── diff --git a/crates/buzz-core/src/agent_turn_metric.rs b/crates/buzz-core/src/agent_turn_metric.rs index 8344fca89..66ede0104 100644 --- a/crates/buzz-core/src/agent_turn_metric.rs +++ b/crates/buzz-core/src/agent_turn_metric.rs @@ -131,15 +131,47 @@ fn default_delta_reliable() -> bool { true } +impl AgentTurnMetricPayload { + /// Validate numeric constraints from NIP-AM §Numeric validity. + /// + /// Returns `Err` when any `cost_usd` field (in `turn` or `cumulative`) is + /// present but negative or non-finite (NaN or infinity). Token counts are + /// typed as `Option` and therefore cannot be negative by construction. + pub fn validate(&self) -> Result<(), ObserverPayloadError> { + fn check_cost(cost: Option, field: &str) -> Result<(), ObserverPayloadError> { + if let Some(c) = cost { + if !c.is_finite() || c < 0.0 { + return Err(ObserverPayloadError::InvalidPayload(format!( + "{field} must be finite and non-negative (got {c})" + ))); + } + } + Ok(()) + } + if let Some(t) = &self.turn { + check_cost(t.cost_usd, "turn.costUsd")?; + } + if let Some(c) = &self.cumulative { + check_cost(c.cost_usd, "cumulative.costUsd")?; + } + Ok(()) + } +} + /// Encrypt an [`AgentTurnMetricPayload`] into a NIP-44 v2 ciphertext string /// using the agent's key pair and the owner's public key. /// +/// Returns `Err(ObserverPayloadError::InvalidPayload)` if any `cost_usd` field +/// is negative or non-finite (NaN/inf), in accordance with NIP-AM §Numeric +/// validity. +/// /// This is the content field of a `kind:44200` event. pub fn encrypt_agent_turn_metric( agent_keys: &Keys, owner_pubkey: &PublicKey, payload: &AgentTurnMetricPayload, ) -> Result { + payload.validate()?; encrypt_observer_payload(agent_keys, owner_pubkey, payload) } @@ -307,4 +339,133 @@ mod tests { assert_eq!(turn.output_tokens, Some(567)); assert_eq!(turn.total_tokens, Some(1801)); } + + // ── validate() — negative / non-finite costUsd ───────────────────────── + + fn make_payload_with_turn_cost(cost: Option) -> AgentTurnMetricPayload { + AgentTurnMetricPayload { + harness: "test".to_string(), + model: None, + channel_id: None, + session_id: None, + turn_id: None, + turn_seq: None, + timestamp: "2026-07-01T00:00:00Z".to_string(), + turn: Some(TokenCounts { + input_tokens: Some(100), + output_tokens: Some(50), + total_tokens: None, + cost_usd: cost, + cache_read_tokens: None, + cache_write_tokens: None, + }), + cumulative: None, + delta_reliable: true, + stop_reason: None, + } + } + + fn make_payload_with_cumulative_cost(cost: Option) -> AgentTurnMetricPayload { + AgentTurnMetricPayload { + harness: "test".to_string(), + model: None, + channel_id: None, + session_id: None, + turn_id: None, + turn_seq: None, + timestamp: "2026-07-01T00:00:00Z".to_string(), + turn: None, + cumulative: Some(TokenCounts { + input_tokens: Some(500), + output_tokens: Some(200), + total_tokens: None, + cost_usd: cost, + cache_read_tokens: None, + cache_write_tokens: None, + }), + delta_reliable: true, + stop_reason: None, + } + } + + #[test] + fn validate_rejects_negative_turn_cost() { + let payload = make_payload_with_turn_cost(Some(-0.001)); + assert!( + matches!( + payload.validate(), + Err(ObserverPayloadError::InvalidPayload(_)) + ), + "negative turn.costUsd must be rejected" + ); + } + + #[test] + fn validate_rejects_nan_turn_cost() { + let payload = make_payload_with_turn_cost(Some(f64::NAN)); + assert!( + matches!( + payload.validate(), + Err(ObserverPayloadError::InvalidPayload(_)) + ), + "NaN turn.costUsd must be rejected" + ); + } + + #[test] + fn validate_rejects_infinite_turn_cost() { + let payload = make_payload_with_turn_cost(Some(f64::INFINITY)); + assert!( + matches!( + payload.validate(), + Err(ObserverPayloadError::InvalidPayload(_)) + ), + "infinite turn.costUsd must be rejected" + ); + } + + #[test] + fn validate_rejects_negative_cumulative_cost() { + let payload = make_payload_with_cumulative_cost(Some(-1.0)); + assert!( + matches!( + payload.validate(), + Err(ObserverPayloadError::InvalidPayload(_)) + ), + "negative cumulative.costUsd must be rejected" + ); + } + + #[test] + fn validate_accepts_finite_non_negative_cost() { + // Zero, small, and larger values are all valid. + for cost in [0.0_f64, 0.001, 1.0, 999.99] { + let payload = make_payload_with_turn_cost(Some(cost)); + assert!( + payload.validate().is_ok(), + "cost {cost} should be accepted" + ); + } + } + + #[test] + fn validate_accepts_absent_cost() { + let payload = make_payload_with_turn_cost(None); + assert!( + payload.validate().is_ok(), + "absent costUsd must be accepted" + ); + } + + #[test] + fn encrypt_agent_turn_metric_rejects_negative_cost() { + let agent_keys = Keys::generate(); + let owner_keys = Keys::generate(); + let payload = make_payload_with_turn_cost(Some(-0.5)); + let result = encrypt_agent_turn_metric(&agent_keys, &owner_keys.public_key(), &payload); + assert!( + matches!(result, Err(ObserverPayloadError::InvalidPayload(_))), + "encrypt must reject payload with negative costUsd" + ); + } } diff --git a/crates/buzz-core/src/observer.rs b/crates/buzz-core/src/observer.rs index f2b981188..8347bda05 100644 --- a/crates/buzz-core/src/observer.rs +++ b/crates/buzz-core/src/observer.rs @@ -44,6 +44,9 @@ pub enum ObserverPayloadError { /// Actual plaintext byte count. got: usize, }, + /// A payload field violated a NIP-AM numeric constraint. + #[error("invalid payload field: {0}")] + InvalidPayload(String), } /// Returns true when `content` fits the NIP-44 v2 ciphertext length envelope. diff --git a/crates/buzz-db/src/migration.rs b/crates/buzz-db/src/migration.rs index 7caf822c2..adb09dc7d 100644 --- a/crates/buzz-db/src/migration.rs +++ b/crates/buzz-db/src/migration.rs @@ -471,7 +471,7 @@ mod tests { let mut migrations: Vec<_> = MIGRATOR.iter().collect(); migrations.sort_by_key(|migration| migration.version); - assert_eq!(migrations.len(), 3); + assert_eq!(migrations.len(), 4); assert_eq!(migrations[0].version, 1); assert_eq!(&*migrations[0].description, "initial schema"); assert!(migrations[0] @@ -515,6 +515,21 @@ mod tests { .as_str() .contains("ALTER TABLE communities ADD COLUMN icon")); assert!(!migrations[0].sql.as_str().contains("icon")); + + // NIP-AM (kind 44200) FTS exclusion: additive migration, never folded + // into 0001 — folding would change 0001's checksum and break brownfield + // startup. Migration 4 drops and re-adds the generated `search_tsv` + // column with the extended kind-44200 exclusion. 0001 must NOT carry 44200. + assert_eq!(migrations[3].version, 4); + assert!(migrations[3] + .sql + .as_str() + .contains("search_tsv")); + assert!(migrations[3] + .sql + .as_str() + .contains("44200")); + assert!(!migrations[0].sql.as_str().contains("44200")); } #[test] diff --git a/docs/nips/NIP-AM.md b/docs/nips/NIP-AM.md index bfb57e213..ff636fb80 100644 --- a/docs/nips/NIP-AM.md +++ b/docs/nips/NIP-AM.md @@ -38,7 +38,8 @@ encrypted to the owner. ## Event -`kind:44200` is a regular event as defined in [NIP-01](01.md): stored, +`kind:44200` is a regular event by Buzz convention (alongside 44100/44101): +stored, append-only, never replaced. Each completed turn produces exactly one event. ```json @@ -189,8 +190,9 @@ pubkey equals the `#p` tag value may receive the event. This gate applies to MUST NOT grant access. (Some p-gated kinds exempt id-addressed lookups on the theory that knowing the id implies authorization; kind 44200 events are long-lived and their cleartext envelope leaks turn activity, so no such -exemption is permitted.) Unauthorized publish or subscribe attempts MUST be -rejected with `AUTH required`. +exemption is permitted.) Unauthenticated publish or subscribe attempts MUST be +rejected with `AUTH required`; authenticated attempts from a pubkey that is not +the event owner MUST be rejected with `restricted:`. Relays SHOULD rate-limit kind 44200 to a rate consistent with real turn frequency (RECOMMENDED: 60 events/minute per agent pubkey). diff --git a/migrations/0001_initial_schema.sql b/migrations/0001_initial_schema.sql index a653de3c0..2d4035f8a 100644 --- a/migrations/0001_initial_schema.sql +++ b/migrations/0001_initial_schema.sql @@ -211,18 +211,16 @@ CREATE TABLE events ( -- 30622 = KIND_DM_VISIBILITY (per-viewer private hide state) -- 44100 = KIND_MEMBER_ADDED_NOTIFICATION (p-gated membership notice) -- 44101 = KIND_MEMBER_REMOVED_NOTIFICATION (p-gated membership notice) - -- 44200 = KIND_AGENT_TURN_METRIC (NIP-AM: p-gated encrypted turn metrics) -- NULL tsvector never matches `@@`, so excluded rows are storage-level -- unsearchable. Constants kept in `buzz_core::kind` (KIND_GIFT_WRAP, -- KIND_EVENT_REMINDER, KIND_DM_VISIBILITY, - -- KIND_MEMBER_ADDED_NOTIFICATION, KIND_MEMBER_REMOVED_NOTIFICATION, - -- KIND_AGENT_TURN_METRIC); inlined + -- KIND_MEMBER_ADDED_NOTIFICATION, KIND_MEMBER_REMOVED_NOTIFICATION); inlined -- here because a sqlx -- migration is frozen SQL and cannot import the Rust constant. If a new -- privacy-sensitive kind is added there, update this list and add a -- regression test in `buzz-search/tests/fts_integration.rs`. search_tsv TSVECTOR GENERATED ALWAYS AS ( - CASE WHEN kind IN (1059, 30300, 30622, 44100, 44101, 44200) THEN NULL::tsvector + CASE WHEN kind IN (1059, 30300, 30622, 44100, 44101) THEN NULL::tsvector ELSE to_tsvector('simple', content) END ) STORED, diff --git a/migrations/0004_agent_turn_metric_fts.sql b/migrations/0004_agent_turn_metric_fts.sql new file mode 100644 index 000000000..3ad64a9c8 --- /dev/null +++ b/migrations/0004_agent_turn_metric_fts.sql @@ -0,0 +1,33 @@ +-- ── Exclude kind 44200 (NIP-AM Agent Turn Metrics) from full-text search ────── +-- NIP-AM events carry NIP-44 ciphertext in `content`. Indexing that ciphertext +-- would waste storage and violate the spec's "NOT index the event in any +-- full-text search" requirement. +-- +-- Additive migration: previously applied files must not change checksum. +-- We must DROP the generated column and re-ADD it with the extended exclusion +-- list; ALTER COLUMN cannot change a GENERATED expression in Postgres. +-- +-- Final kind exclusion list after this migration: +-- 1059 = KIND_GIFT_WRAP (NIP-17 ciphertext) +-- 30300 = KIND_EVENT_REMINDER (AUTHOR_ONLY_KINDS — defense in depth) +-- 30622 = KIND_DM_VISIBILITY (per-viewer private hide state) +-- 44100 = KIND_MEMBER_ADDED_NOTIFICATION (p-gated membership notice) +-- 44101 = KIND_MEMBER_REMOVED_NOTIFICATION (p-gated membership notice) +-- 44200 = KIND_AGENT_TURN_METRIC (NIP-AM: p-gated encrypted turn metrics) +-- Constants kept in `buzz_core::kind`; inlined here because a sqlx migration +-- is frozen SQL and cannot import the Rust constant. If a new privacy-sensitive +-- kind is added there, add a new additive migration following this pattern and +-- add a regression test in `buzz-search/tests/fts_integration.rs`. +-- +-- NULL tsvector never matches `@@`, so excluded rows are storage-level +-- unsearchable. + +ALTER TABLE events DROP COLUMN search_tsv; +ALTER TABLE events ADD COLUMN search_tsv TSVECTOR GENERATED ALWAYS AS ( + CASE WHEN kind IN (1059, 30300, 30622, 44100, 44101, 44200) THEN NULL::tsvector + ELSE to_tsvector('simple', content) + END +) STORED; + +-- Recreate the GIN index dropped with the column. +CREATE INDEX idx_events_search_tsv ON events USING GIN (search_tsv); diff --git a/schema/schema.sql b/schema/schema.sql index c3247f103..3445d58f5 100644 --- a/schema/schema.sql +++ b/schema/schema.sql @@ -206,7 +206,7 @@ CREATE TABLE events ( -- Privacy: encrypted/private routing wrappers and p-gated membership notices -- must never be discoverable through NIP-50 full-text search. NULL tsvector -- never matches `@@`. - -- Keep in sync with migrations/0001_initial_schema.sql. + -- Keep in sync with migrations (final state: 0001 + 0004_agent_turn_metric_fts). search_tsv TSVECTOR GENERATED ALWAYS AS ( CASE WHEN kind IN (1059, 30300, 30622, 44100, 44101, 44200) THEN NULL::tsvector ELSE to_tsvector('simple', content) From 497f007a10ab0ec9c50fb9de51c3c1719bf4b421 Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Fri, 3 Jul 2026 13:03:54 -0400 Subject: [PATCH 17/21] fix(nip-am): fix cross-session baseline corruption + add 44200 FTS regression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thufir re-review found one IMPORTANT and one MINOR defect in d6a1913be. IMPORTANT (blocking): UsageTracker::record() else branch advanced the committed baseline for ALL non-in-flight notifications, including cross-session ones. A late notification for session A while session B is in-flight would silently advance A's baseline without publishing a metric, causing A's next turn to under-report its delta (the same undercount class as Eva+Wren's original finding, via a different path). Fix: split the else branch into three explicit cases: 1. In-flight-match (is_in_flight) → update pending; baseline stays frozen 2. Not in-flight at all (in_flight_session.is_none()) → advance committed baseline (setup-notification path, correct behavior preserved) 3. In-flight for another session → ignore; do NOT touch sessions map Add regression test cross_session_notification_does_not_corrupt_other_sessions_delta: A publishes at 1000/100 (seq=1); late A notification at 1500/150 while B in-flight; A's next turn at 2000/250 must report delta 1000/150 (not the buggy 500/100) and turnSeq=2. MINOR: fts_integration.rs excluded_kinds_are_storage_level_unsearchable test was stale vs the migration — still enumerated only the five original excluded kinds. Add KIND_AGENT_TURN_METRIC (44200) to both the insert set and the forbidden-kinds assertion, and update the doc comment (six kinds → seven). Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-acp/src/usage.rs | 70 ++++++++++++++++++++- crates/buzz-search/tests/fts_integration.rs | 21 ++++++- 2 files changed, 85 insertions(+), 6 deletions(-) diff --git a/crates/buzz-acp/src/usage.rs b/crates/buzz-acp/src/usage.rs index b6c9dd3fc..7943d3b8e 100644 --- a/crates/buzz-acp/src/usage.rs +++ b/crates/buzz-acp/src/usage.rs @@ -194,6 +194,15 @@ impl UsageTracker { /// intermediate notification within the current turn. `turn_seq` stays /// constant across all notifications within one turn and only increments /// when a record is actually published (i.e. when `take()` is called). + /// + /// Three cases: + /// 1. **In-flight-match** (`in_flight_session == Some(session_id)`): updates + /// `pending`. Baseline NOT advanced (that happens on `take()`). + /// 2. **Not in-flight at all** (`in_flight_session == None`): advances the + /// committed baseline (setup notification path). + /// 3. **In-flight for another session** (`in_flight_session == Some(other)`): + /// ignored entirely — touching this session's baseline while another is + /// in-flight would undercount this session's next published delta. pub(crate) fn record(&mut self, session_id: &str, payload: &UsageUpdatePayload) { let current_input = payload.accumulated_input_tokens; let current_output = payload.accumulated_output_tokens; @@ -243,7 +252,7 @@ impl UsageTracker { }; if is_in_flight { - // Update the pending record with the latest cumulative values. + // In-flight-match: update pending with the latest cumulative values. // Baseline is NOT advanced here — it advances only on take(). self.pending = Some(TurnUsage { session_id: session_id.to_string(), @@ -256,8 +265,8 @@ impl UsageTracker { cumulative_output_tokens: current_output, cumulative_cost_usd: current_cost, }); - } else { - // Not in-flight: advance the committed baseline so the next + } else if self.in_flight_session.is_none() { + // Not in-flight at all: advance the committed baseline so the next // in-flight turn computes its delta from this notification. // This handles setup notifications that fire during `session/new` // before the first `begin_turn`. @@ -274,6 +283,9 @@ impl UsageTracker { }, ); } + // else: in-flight-for-another-session — ignore. A late notification + // for session X while session Y is in-flight must NOT advance X's + // committed baseline; doing so would undercount X's next published delta. } /// Consume and return the most recently computed turn usage record, then @@ -380,6 +392,58 @@ mod tests { assert_eq!(usage.session_id, "sess-a"); } + #[test] + fn cross_session_notification_does_not_corrupt_other_sessions_delta() { + // Regression: A publishes at 1000/100 (turn 1). A late A notification at + // 1500/150 arrives while session B is in-flight. Under the old `else` + // branch this would advance A's committed baseline to 1500/150 without + // publishing a metric, so A's next turn (2000/250) would see a delta of + // only 500/100 instead of the correct 1000/150. + // + // With the fixed three-way branch, the cross-session notification is + // ignored entirely and A's baseline stays at its last published state. + let mut tracker = UsageTracker::default(); + + // ── Turn A1 — establish A's committed baseline at 1000/100, seq=1 ── + tracker.begin_turn("sess-a"); + tracker.record("sess-a", &payload(1000, 100, None)); + let a1 = tracker.take().expect("A turn 1"); + assert_eq!(a1.turn_seq, 1); + assert!(!a1.delta_reliable, "first turn is unreliable"); + assert_eq!(a1.cumulative_input_tokens, 1000); + + // ── B is now in-flight; A late notification arrives ── + tracker.begin_turn("sess-b"); + // Late A notification while B is in-flight — must NOT advance A's baseline. + tracker.record("sess-a", &payload(1500, 150, None)); + // B gets its own notification and completes. + tracker.record("sess-b", &payload(200, 50, None)); + let b1 = tracker.take().expect("B turn 1"); + assert_eq!(b1.session_id, "sess-b"); + + // ── Turn A2 — delta must be measured from A's last PUBLISHED baseline ── + // If the cross-session fix is correct: committed A baseline = 1000/100 + // (from take() after A turn 1), so delta = 2000-1000 = 1000 / 250-100 = 150. + // If broken (old code): committed A baseline = 1500/150 (wrongly advanced), + // so delta = 500/100 — the undercount Eva+Wren and Thufir both flagged. + tracker.begin_turn("sess-a"); + tracker.record("sess-a", &payload(2000, 250, None)); + let a2 = tracker.take().expect("A turn 2"); + + assert_eq!(a2.session_id, "sess-a"); + assert_eq!(a2.turn_seq, 2, "seq must increment per publish, not per notification"); + assert!(a2.delta_reliable, "A turn 2 must have a reliable delta"); + assert_eq!( + a2.turn_input_tokens, + Some(1000), + "A turn 2 delta must be from A's last published baseline (1000), not the \ + late cross-session advance (500)" + ); + assert_eq!(a2.turn_output_tokens, Some(150)); + assert_eq!(a2.cumulative_input_tokens, 2000); + assert_eq!(a2.cumulative_output_tokens, 250); + } + // ── Delta computation: non-happy paths ───────────────────────────────── #[test] diff --git a/crates/buzz-search/tests/fts_integration.rs b/crates/buzz-search/tests/fts_integration.rs index 8f16d6c7d..1274d296d 100644 --- a/crates/buzz-search/tests/fts_integration.rs +++ b/crates/buzz-search/tests/fts_integration.rs @@ -8,8 +8,8 @@ use buzz_core::{ kind::{ - AUTHOR_ONLY_KINDS, KIND_MEMBER_ADDED_NOTIFICATION, KIND_MEMBER_REMOVED_NOTIFICATION, - P_GATED_KINDS, + AUTHOR_ONLY_KINDS, KIND_AGENT_TURN_METRIC, KIND_MEMBER_ADDED_NOTIFICATION, + KIND_MEMBER_REMOVED_NOTIFICATION, P_GATED_KINDS, }, CommunityId, }; @@ -1058,8 +1058,9 @@ async fn very_long_query_is_bounded_before_pg_parse() { /// - 30622 = `KIND_DM_VISIBILITY` (per-viewer private hide state) /// - 44100 = `KIND_MEMBER_ADDED_NOTIFICATION` (p-gated membership notice) /// - 44101 = `KIND_MEMBER_REMOVED_NOTIFICATION` (p-gated membership notice) +/// - 44200 = `KIND_AGENT_TURN_METRIC` (NIP-AM: p-gated encrypted turn metrics) /// -/// All six events are inserted with the same unique token in their content +/// All seven events are inserted with the same unique token in their content /// so a single search query exercises every kind in one round-trip. Only /// the kind:9 control must surface — the excluded kinds must not. /// @@ -1152,6 +1153,19 @@ async fn excluded_kinds_are_storage_level_unsearchable() { ) .await; + // kind:44200 agent turn metric — p-gated NIP-44 ciphertext and MUST NOT be searchable. + insert_event( + &pool, + c, + rand_bytes32(), + rand_bytes32(), + KIND_AGENT_TURN_METRIC as i32, + &format!("agent turn metric — {token}"), + None, + 1_700_000_006, + ) + .await; + let svc = SearchService::new(pool.clone()); let result = svc .search(&SearchQuery { @@ -1184,6 +1198,7 @@ async fn excluded_kinds_are_storage_level_unsearchable() { 30622, KIND_MEMBER_ADDED_NOTIFICATION as i32, KIND_MEMBER_REMOVED_NOTIFICATION as i32, + KIND_AGENT_TURN_METRIC as i32, ] { assert!( !kinds.contains(&forbidden), From cb6b5f28df635cbd00215a6d86a34435da24bb01 Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Fri, 3 Jul 2026 15:10:57 -0400 Subject: [PATCH 18/21] test(nip-am): apply full migration chain in FTS integration test setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The FTS exclusion for kind 44200 lives in 0004_agent_turn_metric_fts.sql, not in 0001. setup() was only applying 0001, so the 44200 tripwire in excluded_kinds_are_storage_level_unsearchable ran against a schema that never excluded 44200 — making the assertion vacuously incorrect under --include-ignored. Fix: introduce MIGRATION_000{1-4}_SQL consts and apply all four in order in setup(), matching production. 18/18 Postgres integration tests pass. Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-search/tests/fts_integration.rs | 24 ++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/crates/buzz-search/tests/fts_integration.rs b/crates/buzz-search/tests/fts_integration.rs index 1274d296d..b6a60a3f4 100644 --- a/crates/buzz-search/tests/fts_integration.rs +++ b/crates/buzz-search/tests/fts_integration.rs @@ -2,9 +2,9 @@ //! //! Run with a local PG: `BUZZ_TEST_DATABASE_URL=postgres://buzz:buzz_dev@localhost:5432/buzz cargo test -p buzz-search --tests -- --include-ignored` //! -//! Each test creates a uniquely-named schema, applies the consolidated `0001` -//! migration into it, exercises a scenario, and drops it. Tests are -//! parallel-safe. +//! Each test creates a uniquely-named schema, applies all four migrations in +//! order (0001 → 0002 → 0003 → 0004) into it, exercises a scenario, and drops +//! it. Tests are parallel-safe. use buzz_core::{ kind::{ @@ -18,7 +18,10 @@ use sqlx::{postgres::PgPoolOptions, Executor, PgPool}; use uuid::Uuid; const TEST_DB_URL: &str = "postgres://buzz:buzz_dev@localhost:5432/buzz"; -const MIGRATION_SQL: &str = include_str!("../../../migrations/0001_initial_schema.sql"); +const MIGRATION_0001_SQL: &str = include_str!("../../../migrations/0001_initial_schema.sql"); +const MIGRATION_0002_SQL: &str = include_str!("../../../migrations/0002_git_repo_names.sql"); +const MIGRATION_0003_SQL: &str = include_str!("../../../migrations/0003_community_icon.sql"); +const MIGRATION_0004_SQL: &str = include_str!("../../../migrations/0004_agent_turn_metric_fts.sql"); async fn setup() -> (PgPool, String) { let url = std::env::var("BUZZ_TEST_DATABASE_URL").unwrap_or_else(|_| TEST_DB_URL.to_string()); @@ -43,9 +46,20 @@ async fn setup() -> (PgPool, String) { .connect(&url_with_search_path) .await .expect("connect with search_path"); - pool.execute(MIGRATION_SQL) + // Apply the full migration chain in order so the test schema exactly matches + // production. Future FTS-affecting migrations must be added here. + pool.execute(MIGRATION_0001_SQL) .await .expect("apply 0001 migration"); + pool.execute(MIGRATION_0002_SQL) + .await + .expect("apply 0002 migration"); + pool.execute(MIGRATION_0003_SQL) + .await + .expect("apply 0003 migration"); + pool.execute(MIGRATION_0004_SQL) + .await + .expect("apply 0004 migration"); (pool, schema) } From 552cf5fa08364507c3f47cbd3d9ba927b7ef7507 Mon Sep 17 00:00:00 2001 From: npub1fgdl5qqnh3k3f2xkqrvt7cujalhm623x4s7fdjdj5yrtp5fzjl9qrjpucw <4a1bfa0013bc6d14a8d600d8bf6392efefbd2a26ac3c96c9b2a106b0d12297ca@sprout-oss.stage.blox.sqprod.co> Date: Fri, 3 Jul 2026 15:12:43 -0400 Subject: [PATCH 19/21] docs(nip-am): fix stale record() summary contradicting three-case contract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The summary prose still described the old always-advance-baseline behavior and claimed a different-session notification advances the baseline — directly contradicting the enumerated three-case contract below it, which correctly ignores cross-session notifications entirely. Rewrite the summary to match. Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-acp/src/usage.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/crates/buzz-acp/src/usage.rs b/crates/buzz-acp/src/usage.rs index 7943d3b8e..b212ac4f3 100644 --- a/crates/buzz-acp/src/usage.rs +++ b/crates/buzz-acp/src/usage.rs @@ -179,14 +179,12 @@ impl UsageTracker { /// Process a `usage_update` notification payload. /// - /// **Always** updates the cumulative baseline for `session_id` so that the - /// next in-flight turn can compute a correct delta even if this notification - /// arrived outside a turn (e.g. during `session/new` setup). - /// - /// Only produces a publishable `pending` record when a turn is currently - /// in-flight for the matching `session_id`. If `in_flight_session` is - /// `None` or refers to a different session, the baseline is updated but - /// `pending` is left unchanged. + /// Behavior depends on which session (if any) is currently in-flight; see + /// the three explicit cases below. Only a notification for the in-flight + /// session produces a publishable `pending` record. A notification that + /// arrives outside any turn (e.g. during `session/new` setup) advances the + /// committed baseline so the next in-flight turn computes a correct delta. + /// A notification for a *different* in-flight session is ignored entirely. /// /// When multiple notifications arrive during the same turn, the **last one /// wins** on the cumulative totals, and the delta is always measured from From 07188c3e6b8234976a5165ef56ee2bd98cd4779e Mon Sep 17 00:00:00 2001 From: Will Pfleger Date: Fri, 3 Jul 2026 15:31:58 -0400 Subject: [PATCH 20/21] style(nip-am): apply rustfmt to test asserts in usage, agent_turn_metric, migration Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-acp/src/usage.rs | 5 ++++- crates/buzz-core/src/agent_turn_metric.rs | 5 +---- crates/buzz-db/src/migration.rs | 10 ++-------- 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/crates/buzz-acp/src/usage.rs b/crates/buzz-acp/src/usage.rs index b212ac4f3..bcc5845ea 100644 --- a/crates/buzz-acp/src/usage.rs +++ b/crates/buzz-acp/src/usage.rs @@ -429,7 +429,10 @@ mod tests { let a2 = tracker.take().expect("A turn 2"); assert_eq!(a2.session_id, "sess-a"); - assert_eq!(a2.turn_seq, 2, "seq must increment per publish, not per notification"); + assert_eq!( + a2.turn_seq, 2, + "seq must increment per publish, not per notification" + ); assert!(a2.delta_reliable, "A turn 2 must have a reliable delta"); assert_eq!( a2.turn_input_tokens, diff --git a/crates/buzz-core/src/agent_turn_metric.rs b/crates/buzz-core/src/agent_turn_metric.rs index 66ede0104..157e41518 100644 --- a/crates/buzz-core/src/agent_turn_metric.rs +++ b/crates/buzz-core/src/agent_turn_metric.rs @@ -441,10 +441,7 @@ mod tests { // Zero, small, and larger values are all valid. for cost in [0.0_f64, 0.001, 1.0, 999.99] { let payload = make_payload_with_turn_cost(Some(cost)); - assert!( - payload.validate().is_ok(), - "cost {cost} should be accepted" - ); + assert!(payload.validate().is_ok(), "cost {cost} should be accepted"); } } diff --git a/crates/buzz-db/src/migration.rs b/crates/buzz-db/src/migration.rs index adb09dc7d..f3defbb66 100644 --- a/crates/buzz-db/src/migration.rs +++ b/crates/buzz-db/src/migration.rs @@ -521,14 +521,8 @@ mod tests { // startup. Migration 4 drops and re-adds the generated `search_tsv` // column with the extended kind-44200 exclusion. 0001 must NOT carry 44200. assert_eq!(migrations[3].version, 4); - assert!(migrations[3] - .sql - .as_str() - .contains("search_tsv")); - assert!(migrations[3] - .sql - .as_str() - .contains("44200")); + assert!(migrations[3].sql.as_str().contains("search_tsv")); + assert!(migrations[3].sql.as_str().contains("44200")); assert!(!migrations[0].sql.as_str().contains("44200")); } From 8ebe6d9c84e906edc8ecc1428cbc647d561798d6 Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Fri, 3 Jul 2026 16:15:31 -0400 Subject: [PATCH 21/21] =?UTF-8?q?fix(nip-am):=20address=20code-review=20fi?= =?UTF-8?q?ndings=20=E2=80=94=20decrypt=20validation,=20else-branch,=20bri?= =?UTF-8?q?dge=20auth,=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The load-bearing change is decrypt-validate symmetry (SF1): decrypt_agent_turn_metric now calls payload.validate() after decrypting, mirroring the producer's fail-closed contract. A raw/misbehaving agent that encrypts a negative-cost payload via the lower-level path can no longer slip invalid data into a caller's aggregation pipeline. A regression test covers this path explicitly. The remaining changes are correctness hardening and test quality: SF2: session-missing else in lib.rs no longer emits the per-turn delta in the accumulated* fields (which the wire contract requires to be cumulative). The branch now returns None and skips emission — accumulated baseline is gone with the session. SF3: cancelled-turn integration test gates the round-2 LLM response behind a oneshot barrier released only after cancel is sent, making stopReason: cancelled deterministic rather than timing-dependent. The test now asserts the expected stop reason directly. N1: collapse 14 embedded spaces in ingest.rs h-tag reject message. N2: add reader_authorized_for_event guard in bridge.rs feed/thread loops as defense-in-depth — safe today (feed SQL allowlists + requires_h_channel_scope exclude result-gated kinds), ensures every bridge delivery surface is gated. N3: direct UsageTracker unit test for begin_turn + take with no record (pre-cancel path). N4: document delta_reliable guard in publish_agent_turn_metric as defense-in-depth. Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-acp/src/pool.rs | 4 + crates/buzz-acp/src/usage.rs | 13 +++ crates/buzz-agent/src/lib.rs | 45 +++++----- crates/buzz-agent/tests/fake_llm.rs | 105 +++++++++++++++++----- crates/buzz-core/src/agent_turn_metric.rs | 42 ++++++++- crates/buzz-relay/src/api/bridge.rs | 12 +++ crates/buzz-relay/src/handlers/ingest.rs | 2 +- 7 files changed, 176 insertions(+), 47 deletions(-) diff --git a/crates/buzz-acp/src/pool.rs b/crates/buzz-acp/src/pool.rs index f5f7abae2..0d3c2fd49 100644 --- a/crates/buzz-acp/src/pool.rs +++ b/crates/buzz-acp/src/pool.rs @@ -2726,6 +2726,10 @@ async fn publish_agent_turn_metric( cache_write_tokens: None, }) } else { + // Defense-in-depth: UsageTracker already sets all turn_* fields to None + // when delta_reliable is false, so the None arm here is technically + // redundant. The explicit guard prevents a future refactor from + // accidentally publishing unreliable per-turn counts. None }; let cumulative_counts = Some(TokenCounts { diff --git a/crates/buzz-acp/src/usage.rs b/crates/buzz-acp/src/usage.rs index bcc5845ea..f581d9120 100644 --- a/crates/buzz-acp/src/usage.rs +++ b/crates/buzz-acp/src/usage.rs @@ -799,4 +799,17 @@ mod tests { assert_eq!(usage.turn_input_tokens, Some(500)); assert_eq!(usage.turn_output_tokens, Some(100)); } + + #[test] + fn begin_turn_then_take_without_record_returns_none() { + // A turn cancelled before the provider emits any tokens: begin_turn is + // called but no record() arrives before take(). take() must return None. + let mut tracker = UsageTracker::default(); + tracker.begin_turn("sess-precancel"); + let result = tracker.take(); + assert!( + result.is_none(), + "take() without any record() must return None (pre-response cancel path)" + ); + } } diff --git a/crates/buzz-agent/src/lib.rs b/crates/buzz-agent/src/lib.rs index 6499f1eb5..58d6bdfcc 100644 --- a/crates/buzz-agent/src/lib.rs +++ b/crates/buzz-agent/src/lib.rs @@ -699,7 +699,7 @@ async fn run_prompt(app: Arc, id: Value, params: Value, wire_tx: WireSender // provider response (validation failure, pre-response cancellation) carries // no information and must not produce a kind 44200 record per NIP-AM. if turn_input_tokens.is_some() || turn_output_tokens.is_some() { - let (accumulated_in, accumulated_out) = { + let accumulated = { let mut sessions = app.sessions.lock().await; if let Some(s) = sessions.get_mut(&sid) { s.accumulated_input_tokens = s @@ -708,30 +708,31 @@ async fn run_prompt(app: Arc, id: Value, params: Value, wire_tx: WireSender s.accumulated_output_tokens = s .accumulated_output_tokens .saturating_add(turn_output_tokens.unwrap_or(0)); - (s.accumulated_input_tokens, s.accumulated_output_tokens) + Some((s.accumulated_input_tokens, s.accumulated_output_tokens)) } else { - ( - turn_input_tokens.unwrap_or(0), - turn_output_tokens.unwrap_or(0), - ) + // Session is gone — the accumulated baseline no longer exists, so + // there is nothing correct to emit. Skip the usage notification. + None } }; - wire::send( - &wire_tx, - goose_session_update( - &sid, - json!({ - "sessionUpdate": "usage_update", - // used: total tokens as a context-usage proxy; - // contextLimit: 0 (buzz-agent has no context limit tracking). - "used": accumulated_in.saturating_add(accumulated_out), - "contextLimit": 0u64, - "accumulatedInputTokens": accumulated_in, - "accumulatedOutputTokens": accumulated_out, - }), - ), - ) - .await; + if let Some((accumulated_in, accumulated_out)) = accumulated { + wire::send( + &wire_tx, + goose_session_update( + &sid, + json!({ + "sessionUpdate": "usage_update", + // used: total tokens as a context-usage proxy; + // contextLimit: 0 (buzz-agent has no context limit tracking). + "used": accumulated_in.saturating_add(accumulated_out), + "contextLimit": 0u64, + "accumulatedInputTokens": accumulated_in, + "accumulatedOutputTokens": accumulated_out, + }), + ), + ) + .await; + } } match result { Ok(stop) => { diff --git a/crates/buzz-agent/tests/fake_llm.rs b/crates/buzz-agent/tests/fake_llm.rs index 20ae5bd0a..f2a86ac4b 100644 --- a/crates/buzz-agent/tests/fake_llm.rs +++ b/crates/buzz-agent/tests/fake_llm.rs @@ -917,21 +917,84 @@ async fn no_usage_turn_emits_no_usage_notification() { /// (so token counts are observed), buzz-agent must still emit the usage /// notification before the cancelled `session/prompt` response. /// -/// Setup: round 1 is a tool call WITH usage (tokens are captured). The agent -/// sends the cancel before round 2's LLM call, so the turn exits with -/// `stopReason: "cancelled"`. The usage notification must precede that response. +/// Setup: round 1 is a tool call WITH usage (tokens are captured). After the +/// tool_call_update notification (proving round 1 is fully processed), we gate +/// the round-2 LLM response behind a `oneshot` barrier that only releases after +/// cancel is sent. This guarantees the turn exits with `stopReason: "cancelled"` +/// deterministically, even on a slow CI worker. #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn cancelled_turn_with_usage_emits_notification_before_response() { + use tokio::sync::oneshot; + + // Gate: the second LLM request (round 2) is held until we explicitly release it. + let (gate_tx, gate_rx) = oneshot::channel::<()>(); + let gate_rx = Arc::new(tokio::sync::Mutex::new(Some(gate_rx))); + // Round 1: tool call with usage — sets turn_input/output_tokens. - // Round 2 never starts because cancel fires at the round boundary. - let url = spawn_fake_llm(vec![openai_tool_call_with_usage( + // Round 2: gated — blocked until cancel fires, then released so the + // in-flight TCP request can resolve. The queue is empty for round 2, so the + // agent receives the fallback "no canned response" body which it treats as + // an LLM error; the cancel check at the round boundary fires first because + // the gate is only released after cancel is enqueued. + let responses = vec![openai_tool_call_with_usage( "call_cancel_test", "fake__noop", json!({}), 15, 6, - )]) - .await; + )]; + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let url = format!("http://{}", listener.local_addr().unwrap()); + let queue = Arc::new(Mutex::new(VecDeque::from(responses))); + let gate_rx_clone = gate_rx.clone(); + tokio::spawn(async move { + let mut request_num = 0usize; + loop { + let (mut sock, _) = match listener.accept().await { + Ok(p) => p, + Err(_) => return, + }; + let queue = queue.clone(); + let gate = gate_rx_clone.clone(); + request_num += 1; + let req_num = request_num; + tokio::spawn(async move { + let mut buf = Vec::new(); + let mut tmp = [0u8; 4096]; + while !buf.windows(4).any(|w| w == b"\r\n\r\n") { + match sock.read(&mut tmp).await { + Ok(0) | Err(_) => return, + Ok(n) => buf.extend_from_slice(&tmp[..n]), + } + if buf.len() > 1_000_000 { + return; + } + } + // For request 2+ (round 2), wait for the gate to open before + // responding. This ensures cancel is sent before round 2 resolves, + // making stopReason: cancelled deterministic. + if req_num >= 2 { + let rx = gate.lock().await.take(); + if let Some(rx) = rx { + let _ = rx.await; + } + } + let body = queue + .lock() + .await + .pop_front() + .unwrap_or_else(|| json!({ "error": "no canned response" })); + let body_s = serde_json::to_string(&body).unwrap(); + let resp = format!( + "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + body_s.len(), body_s, + ); + let _ = sock.write_all(resp.as_bytes()).await; + let _ = sock.shutdown().await; + }); + } + }); + let mut h = Harness::spawn(&url).await; let sid = init_session(&mut h).await; @@ -942,17 +1005,21 @@ async fn cancelled_turn_with_usage_emits_notification_before_response() { ) .await; - // Wait for the activeRunId advert (agent is live) then send cancel. + // Wait for the activeRunId advert (agent is live). let _run_id = recv_active_run_id(&mut h).await; - // Wait for the tool_call_update (failed — unknown tool) so we know round 1 - // LLM response has been processed and tokens are captured, THEN cancel. + // Wait for tool_call_update — proves round 1 LLM response is fully processed + // and tokens are captured before we send cancel. h.recv_until(|v| { v.get("method") == Some(&json!("session/update")) && v["params"]["update"]["sessionUpdate"] == "tool_call_update" }) .await; + + // Now send cancel and release the round-2 gate. Cancel is enqueued before + // round 2 can respond, so the turn exits with stopReason: cancelled. let c_id = h.send("session/cancel", json!({"sessionId": sid})).await; - // Drain remaining frames; the cancel OK and the prompt response both arrive. + let _ = gate_tx.send(()); // unblock round 2 + let mut saw_usage_before_prompt_response = false; let mut saw_usage = false; let mut saw_cancel_ok = false; @@ -960,26 +1027,18 @@ async fn cancelled_turn_with_usage_emits_notification_before_response() { for _ in 0..40 { let v = h.recv().await; if v["id"] == json!(c_id) { - // cancel acknowledged saw_cancel_ok = true; } else if is_usage_update(&v) { saw_usage = true; - // Record that usage arrived before the prompt response (if it hasn't yet). if !saw_prompt_response { saw_usage_before_prompt_response = true; } } else if v["id"] == json!(p_id) { saw_prompt_response = true; - // The prompt response is either a result (stopReason: cancelled or - // end_turn) or an error (if cancel races with round 2's LLM call - // returning no-more-responses). Both are acceptable — we only care - // that the usage notification precedes whichever frame terminates - // the turn. - let has_result = v.get("result").is_some(); - let has_error = v.get("error").is_some(); - assert!( - has_result || has_error, - "expected result or error on prompt response, got: {v}" + // The gate guarantees stopReason: cancelled — not a race-driven error. + assert_eq!( + v["result"]["stopReason"], "cancelled", + "turn must end with stopReason: cancelled" ); } if saw_usage && saw_prompt_response && saw_cancel_ok { diff --git a/crates/buzz-core/src/agent_turn_metric.rs b/crates/buzz-core/src/agent_turn_metric.rs index 157e41518..037f54f5a 100644 --- a/crates/buzz-core/src/agent_turn_metric.rs +++ b/crates/buzz-core/src/agent_turn_metric.rs @@ -178,11 +178,17 @@ pub fn encrypt_agent_turn_metric( /// Decrypt and deserialize an [`AgentTurnMetricPayload`] from a `kind:44200` event. /// /// `recipient_keys` is the owner's key pair. +/// +/// Returns `Err(ObserverPayloadError::InvalidPayload)` if the decrypted payload +/// fails numeric validation (e.g. negative or non-finite `costUsd`), mirroring +/// the fail-closed contract of [`encrypt_agent_turn_metric`]. pub fn decrypt_agent_turn_metric( recipient_keys: &Keys, event: &Event, ) -> Result { - decrypt_observer_payload(recipient_keys, event) + let payload: AgentTurnMetricPayload = decrypt_observer_payload(recipient_keys, event)?; + payload.validate()?; + Ok(payload) } #[cfg(test)] @@ -465,4 +471,38 @@ mod tests { "encrypt must reject payload with negative costUsd" ); } + + #[test] + fn decrypt_agent_turn_metric_rejects_negative_cost_bypassing_encrypt() { + // Regression: a raw/misbehaving agent can persist a syntactically valid + // NIP-44 payload with costUsd: -1 by calling encrypt_observer_payload + // directly (bypassing the validating encrypt_agent_turn_metric helper). + // decrypt_agent_turn_metric must reject it symmetrically. + use crate::observer::encrypt_observer_payload; + + let agent_keys = Keys::generate(); + let owner_keys = Keys::generate(); + + // Build a payload with negative costUsd and encrypt via the lower-level + // path, bypassing encrypt_agent_turn_metric's validate() call. + let bad_payload = make_payload_with_turn_cost(Some(-1.0)); + let ciphertext = + encrypt_observer_payload(&agent_keys, &owner_keys.public_key(), &bad_payload) + .expect("lower-level encrypt should succeed without validation"); + + let event = EventBuilder::new(Kind::Custom(44200), ciphertext) + .tags([ + Tag::parse(["p", &owner_keys.public_key().to_hex()]).unwrap(), + Tag::parse(["agent", &agent_keys.public_key().to_hex()]).unwrap(), + ]) + .sign_with_keys(&agent_keys) + .expect("sign"); + + let result = decrypt_agent_turn_metric(&owner_keys, &event); + assert!( + matches!(result, Err(ObserverPayloadError::InvalidPayload(_))), + "decrypt must reject a payload with negative costUsd even when \ + encrypted via the lower-level path" + ); + } } diff --git a/crates/buzz-relay/src/api/bridge.rs b/crates/buzz-relay/src/api/bridge.rs index d2e361806..18e146393 100644 --- a/crates/buzz-relay/src/api/bridge.rs +++ b/crates/buzz-relay/src/api/bridge.rs @@ -569,6 +569,12 @@ pub async fn query_events( if !event_in_accessible_channel(&se, &accessible_channels) { continue; } + // Defense-in-depth: never deliver a result-gated event (e.g. kind:44200 + // or kind:30622) to a non-owner via the feed path, even though feed SQL + // kind allowlists already exclude these kinds. + if !buzz_core::filter::reader_authorized_for_event(&se.event, &authed_pubkey_hex) { + continue; + } if let Ok(v) = serde_json::to_value(&se.event) { events.push(v); feed_count += 1; @@ -629,6 +635,12 @@ pub async fn query_events( if !event_in_accessible_channel(&se, &accessible_channels) { continue; } + // Defense-in-depth: never deliver a result-gated event (e.g. kind:44200 + // or kind:30622) to a non-owner via the thread path, even though + // requires_h_channel_scope already excludes these kinds from thread metadata. + if !buzz_core::filter::reader_authorized_for_event(&se.event, &authed_pubkey_hex) { + continue; + } if let Ok(v) = serde_json::to_value(&se.event) { events.push(v); } diff --git a/crates/buzz-relay/src/handlers/ingest.rs b/crates/buzz-relay/src/handlers/ingest.rs index e6be21d22..87088bfd4 100644 --- a/crates/buzz-relay/src/handlers/ingest.rs +++ b/crates/buzz-relay/src/handlers/ingest.rs @@ -1103,7 +1103,7 @@ fn validate_agent_turn_metric_envelope(event: &nostr::Event) -> Result<(), Strin if has_h_tag { return Err( - "agent-turn-metric event must not have an `h` tag (channel identity belongs inside the encrypted payload)".to_string(), + "agent-turn-metric event must not have an `h` tag (channel identity belongs inside the encrypted payload)".to_string(), ); }