Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
310 changes: 310 additions & 0 deletions crates/buzz-core/src/agent_turn_metric.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,310 @@
//! NIP-AM: Agent Turn Metric — payload type and encrypt/decrypt helpers.
//!
//! One `kind:44200` event is published per completed agent turn. Its content
//! is a NIP-44 v2 ciphertext (agent key → owner pubkey) that decodes to an
//! [`AgentTurnMetricPayload`] JSON object.
//!
//! See `docs/nips/NIP-AM.md` for the full specification.

use nostr::{Event, Keys, PublicKey};
use serde::{Deserialize, Serialize};

use crate::observer::{decrypt_observer_payload, encrypt_observer_payload, ObserverPayloadError};

// Re-export for callers that only need the error type.
pub use crate::observer::ObserverPayloadError as AgentTurnMetricError;

/// Token-usage counters for a single measurement window (one turn or cumulative).
///
/// All token fields are nullable — `None` means the harness did not report them,
/// NOT that the count was zero. See NIP-AM §Numeric validity and token semantics.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct TokenCounts {
/// Input tokens (inclusive of cache reads/writes where applicable).
pub input_tokens: Option<u64>,

/// Output tokens.
pub output_tokens: Option<u64>,

/// Provider-reported total — NOT derived by summing input + output.
/// `None` when the provider did not report a total.
pub total_tokens: Option<u64>,

/// Estimated cost in USD. Must be finite and non-negative when present.
pub cost_usd: Option<f64>,

/// Informational: cache-read tokens included in `input_tokens`.
#[serde(skip_serializing_if = "Option::is_none")]
pub cache_read_tokens: Option<u64>,

/// Informational: cache-write tokens included in `input_tokens`.
#[serde(skip_serializing_if = "Option::is_none")]
pub cache_write_tokens: Option<u64>,
}

/// Why a turn ended.
///
/// NIP-AM: consumers MUST treat unrecognized `stopReason` values as `Unknown`
/// and keep the token counts valid. Custom deserialization maps any unrecognized
/// string to `Unknown` instead of failing the whole payload.
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum StopReason {
/// Model reached a natural end-of-turn.
EndTurn,
/// Model hit the max-tokens limit.
MaxTokens,
/// Turn was cancelled by the owner or harness.
Cancelled,
/// Turn ended with an error.
Error,
/// Stop reason is unknown or unrecognized.
Unknown,
}

impl<'de> Deserialize<'de> for StopReason {
fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
let s = String::deserialize(deserializer)?;
Ok(match s.as_str() {
"end_turn" => StopReason::EndTurn,
"max_tokens" => StopReason::MaxTokens,
"cancelled" => StopReason::Cancelled,
"error" => StopReason::Error,
"unknown" => StopReason::Unknown,
_ => StopReason::Unknown,
})
}
}

/// Decrypted payload of a `kind:44200` Agent Turn Metric event.
///
/// `harness` and `timestamp` are REQUIRED. All other fields are optional or
/// nullable unless constrained by the NIP (e.g. `session_id` + `turn_seq`
/// are required whenever `cumulative` is present).
///
/// Consumers MUST ignore unknown fields (forward compatibility).
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct AgentTurnMetricPayload {
/// Harness identifier (e.g. `"goose"`, `"buzz-agent"`). REQUIRED.
pub harness: String,

/// Model identifier as reported by the harness, or `None` if unknown.
pub model: Option<String>,

/// Channel UUID the turn served, encrypted inside the payload.
pub channel_id: Option<String>,

/// Session identifier. REQUIRED when `cumulative` is present.
pub session_id: Option<String>,

/// Turn identifier (harness-internal).
pub turn_id: Option<String>,

/// Monotonically increasing per-session sequence number.
/// REQUIRED when `cumulative` is present; strictly increasing within one
/// `session_id`. A publisher restart that loses the counter MUST start a
/// new `session_id`.
pub turn_seq: Option<u64>,

/// RFC 3339 timestamp (end-of-turn). REQUIRED.
pub timestamp: String,

/// Usage for this turn (computed delta). Null fields mean not reported.
pub turn: Option<TokenCounts>,

/// Session-cumulative usage as reported at end of this turn.
pub cumulative: Option<TokenCounts>,

/// `false` when the publisher could not observe the previous cumulative
/// baseline (e.g. harness restart mid-session), making `turn` unreliable.
/// Defaults to `true` on the wire when not explicitly set.
#[serde(default = "default_delta_reliable")]
pub delta_reliable: bool,

/// Why the turn ended. Unrecognized values MUST be treated as `Unknown`.
pub stop_reason: Option<StopReason>,
}

fn default_delta_reliable() -> bool {
true
}

/// Encrypt an [`AgentTurnMetricPayload`] into a NIP-44 v2 ciphertext string
/// using the agent's key pair and the owner's public key.
///
/// This is the content field of a `kind:44200` event.
pub fn encrypt_agent_turn_metric(
agent_keys: &Keys,
owner_pubkey: &PublicKey,
payload: &AgentTurnMetricPayload,
) -> Result<String, ObserverPayloadError> {
encrypt_observer_payload(agent_keys, owner_pubkey, payload)
}

/// Decrypt and deserialize an [`AgentTurnMetricPayload`] from a `kind:44200` event.
///
/// `recipient_keys` is the owner's key pair.
pub fn decrypt_agent_turn_metric(
recipient_keys: &Keys,
event: &Event,
) -> Result<AgentTurnMetricPayload, ObserverPayloadError> {
decrypt_observer_payload(recipient_keys, event)
}

#[cfg(test)]
mod tests {
use super::*;
use nostr::{EventBuilder, Kind, Tag};

fn sample_payload() -> AgentTurnMetricPayload {
AgentTurnMetricPayload {
harness: "goose".to_string(),
model: Some("claude-sonnet-4-5".to_string()),
channel_id: Some("12345678-1234-1234-1234-123456789abc".to_string()),
session_id: Some("sess-abc".to_string()),
turn_id: Some("turn-1".to_string()),
turn_seq: Some(1),
timestamp: "2026-07-01T20:11:03.213Z".to_string(),
turn: Some(TokenCounts {
input_tokens: Some(1234),
output_tokens: Some(567),
total_tokens: Some(1801),
cost_usd: Some(0.0123),
cache_read_tokens: None,
cache_write_tokens: None,
}),
cumulative: Some(TokenCounts {
input_tokens: Some(45210),
output_tokens: Some(9876),
total_tokens: Some(55086),
cost_usd: Some(0.41),
cache_read_tokens: None,
cache_write_tokens: None,
}),
delta_reliable: true,
stop_reason: Some(StopReason::EndTurn),
}
}

#[test]
fn round_trip_encrypt_decrypt() {
let agent_keys = Keys::generate();
let owner_keys = Keys::generate();

let payload = sample_payload();
let ciphertext = encrypt_agent_turn_metric(&agent_keys, &owner_keys.public_key(), &payload)
.expect("encrypt");

// Build a minimal event envelope so decrypt_observer_payload can use event.pubkey.
let event = EventBuilder::new(Kind::Custom(44200), ciphertext)
.tags([
Tag::parse(["p", &owner_keys.public_key().to_hex()]).unwrap(),
Tag::parse(["agent", &agent_keys.public_key().to_hex()]).unwrap(),
])
.sign_with_keys(&agent_keys)
.expect("sign");

let decoded = decrypt_agent_turn_metric(&owner_keys, &event).expect("decrypt");

assert_eq!(decoded, payload);
}

#[test]
fn wrong_key_decrypt_fails() {
let agent_keys = Keys::generate();
let owner_keys = Keys::generate();
let wrong_keys = Keys::generate();

let payload = sample_payload();
let ciphertext = encrypt_agent_turn_metric(&agent_keys, &owner_keys.public_key(), &payload)
.expect("encrypt");

let event = EventBuilder::new(Kind::Custom(44200), ciphertext)
.tags([
Tag::parse(["p", &owner_keys.public_key().to_hex()]).unwrap(),
Tag::parse(["agent", &agent_keys.public_key().to_hex()]).unwrap(),
])
.sign_with_keys(&agent_keys)
.expect("sign");

let result = decrypt_agent_turn_metric(&wrong_keys, &event);
assert!(result.is_err(), "expected decrypt error with wrong key");
}

#[test]
fn delta_reliable_defaults_to_true_when_absent() {
let json = r#"{"harness":"goose","timestamp":"2026-07-01T20:11:03Z"}"#;
let payload: AgentTurnMetricPayload = serde_json::from_str(json).expect("parse");
assert!(
payload.delta_reliable,
"deltaReliable should default to true"
);
}

#[test]
fn stop_reason_round_trips() {
for (variant, json_val) in [
(StopReason::EndTurn, "\"end_turn\""),
(StopReason::MaxTokens, "\"max_tokens\""),
(StopReason::Cancelled, "\"cancelled\""),
(StopReason::Error, "\"error\""),
(StopReason::Unknown, "\"unknown\""),
] {
let serialized = serde_json::to_string(&variant).unwrap();
assert_eq!(serialized, json_val);
let deserialized: StopReason = serde_json::from_str(json_val).unwrap();
assert_eq!(deserialized, variant);
}
}

#[test]
fn null_token_counts_round_trip() {
// Verify that None fields serialize to `null` (not absent), as required
// by the NIP — consumers must distinguish "not reported" from "zero".
let counts = TokenCounts {
input_tokens: None,
output_tokens: None,
total_tokens: None,
cost_usd: None,
cache_read_tokens: None,
cache_write_tokens: None,
};
let json = serde_json::to_string(&counts).unwrap();
// cache_* are skip_serializing_if = None, others serialize as null
assert!(json.contains("\"inputTokens\":null"));
assert!(json.contains("\"outputTokens\":null"));
let back: TokenCounts = serde_json::from_str(&json).unwrap();
assert_eq!(back, counts);
}

#[test]
fn unknown_stop_reason_maps_to_unknown_not_error() {
// NIP-AM: consumers MUST treat unrecognized stopReason values as Unknown;
// the token counts remain valid and the whole payload must not be rejected.
let json = r#"{
"harness": "goose",
"timestamp": "2026-07-01T20:11:03Z",
"stopReason": "tool_limit",
"turn": {
"inputTokens": 1234,
"outputTokens": 567,
"totalTokens": 1801,
"costUsd": null
}
}"#;
let payload: AgentTurnMetricPayload =
serde_json::from_str(json).expect("payload with future stopReason must parse");
assert_eq!(
payload.stop_reason,
Some(StopReason::Unknown),
"unrecognized stopReason must map to Unknown"
);
// Token counts must be preserved.
let turn = payload.turn.expect("turn must be present");
assert_eq!(turn.input_tokens, Some(1234));
assert_eq!(turn.output_tokens, Some(567));
assert_eq!(turn.total_tokens, Some(1801));
}
}
48 changes: 42 additions & 6 deletions crates/buzz-core/src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,17 @@ pub fn filters_match(filters: &[Filter], event: &StoredEvent) -> bool {
}

/// Result-level read authorization for relay-signed events whose content is
/// private to a single viewer. Currently only `KIND_DM_VISIBILITY`: the reader
/// MUST equal the snapshot's `#p` (owner). Returns `true` for every other kind.
/// private to a single viewer. Currently gates `KIND_DM_VISIBILITY` and
/// `KIND_AGENT_TURN_METRIC`: the reader MUST equal the event's `#p` tag
/// (owner). Returns `true` for every other kind.
///
/// This guards the delivery surfaces directly, so a query that bypasses the
/// filter-level `#p` gate (e.g. a kindless `ids:[…]` lookup of a known snapshot
/// id) still cannot read another viewer's hidden-DM set.
/// This guards every delivery surface — WS historical pull (`req.rs`), HTTP
/// bridge (`bridge.rs`), and live fan-out (`event.rs`) — so a query that
/// bypasses the filter-level `#p` gate (e.g. a kindless `ids:[…]` lookup of
/// a known event id) still cannot read another user's private event.
pub fn reader_authorized_for_event(event: &nostr::Event, reader_pubkey_hex: &str) -> bool {
if crate::kind::event_kind_u32(event) != crate::kind::KIND_DM_VISIBILITY {
let kind = crate::kind::event_kind_u32(event);
if kind != crate::kind::KIND_DM_VISIBILITY && kind != crate::kind::KIND_AGENT_TURN_METRIC {
return true;
}
let p = nostr::SingleLetterTag::lowercase(nostr::Alphabet::P);
Expand Down Expand Up @@ -261,4 +264,37 @@ mod tests {
.expect("sign");
assert!(reader_authorized_for_event(&note, other));
}

#[test]
fn reader_authorized_for_event_gates_agent_turn_metric_by_p() {
let agent_keys = Keys::generate();
let owner = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
let attacker = "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc";

// Agent turn metric event: pubkey=agent, p tag=owner (NIP-AM envelope shape).
let metric = EventBuilder::new(
Kind::Custom(crate::kind::KIND_AGENT_TURN_METRIC as u16),
"encrypted-payload",
)
.tags([
Tag::parse(["p", owner]).unwrap(),
Tag::parse(["agent", &agent_keys.public_key().to_hex()]).unwrap(),
])
.sign_with_keys(&agent_keys)
.expect("sign");

assert!(
reader_authorized_for_event(&metric, owner),
"owner must be authorized to read their own agent turn metric"
);
assert!(
!reader_authorized_for_event(&metric, attacker),
"non-owner must NOT be authorized to read an agent turn metric via kindless ids"
);
// The authoring agent also does not get read-back (NIP-AM: owner-only read).
assert!(
!reader_authorized_for_event(&metric, &agent_keys.public_key().to_hex()),
"the authoring agent must NOT be authorized to read its own metric event (owner-only)"
);
}
}
Loading
Loading