From ef7a3817cb64f7731e459d9ed1e193873ab4b52e Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 10 May 2026 20:35:33 +0000 Subject: [PATCH 1/5] feat: add W3C-compatible correlation IDs to event envelope MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Generate stable trace_id (16 bytes / 32 hex) and per-event span_id (8 bytes / 16 hex) on every emitted envelope, with parent_span_id set for known event chains (PreToolUse→PostToolUse, SubagentStart→SubagentStop, TaskCreated→TaskCompleted, Elicitation→ElicitationResult, PreCompact→PostCompact, UserPromptSubmit→Stop, SessionStart→SessionEnd). Trace IDs persist per session under ~/.config/promptconduit/traces so they remain stable across separate hook process invocations. Atomic writes and O_CREATE|O_EXCL handle concurrent hook races. Probabilistic GC removes records older than 30 days. This is purely additive — envelope_version bumped to 1.1 and the new field is optional, so older servers continue to work. The CLI does not import the OpenTelemetry SDK; it only produces IDs an OTLP exporter would later need. Adds `promptconduit debug trace ` for inspecting locally recorded trace state. https://claude.ai/code/session_01XjfDVBhyo2F4NwTSvGA9aa --- CLAUDE.md | 1 + cmd/correlation.go | 136 ++++++++++++ cmd/debug.go | 77 +++++++ cmd/hook.go | 13 +- cmd/root.go | 1 + internal/correlation/id.go | 73 ++++++ internal/correlation/id_test.go | 90 ++++++++ internal/correlation/store.go | 346 +++++++++++++++++++++++++++++ internal/correlation/store_test.go | 170 ++++++++++++++ internal/envelope/envelope.go | 23 +- 10 files changed, 925 insertions(+), 5 deletions(-) create mode 100644 cmd/correlation.go create mode 100644 cmd/debug.go create mode 100644 internal/correlation/id.go create mode 100644 internal/correlation/id_test.go create mode 100644 internal/correlation/store.go create mode 100644 internal/correlation/store_test.go diff --git a/CLAUDE.md b/CLAUDE.md index 568e7de..7d188e0 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -27,6 +27,7 @@ cli/ ├── cmd/ # CLI commands (install, uninstall, status, test, hook, config, sync) ├── internal/ │ ├── client/ # HTTP client, config loading +│ ├── correlation/ # W3C trace_id/span_id generation and per-session persistence │ ├── envelope/ # Raw event envelope types │ ├── git/ # Git context extraction │ ├── sync/ # Transcript sync and parsing (Claude Code parser, state management) diff --git a/cmd/correlation.go b/cmd/correlation.go new file mode 100644 index 0000000..efb3e36 --- /dev/null +++ b/cmd/correlation.go @@ -0,0 +1,136 @@ +package cmd + +import ( + "github.com/promptconduit/cli/internal/client" + "github.com/promptconduit/cli/internal/correlation" + "github.com/promptconduit/cli/internal/envelope" +) + +// buildCorrelation populates trace/span IDs for an outgoing envelope and +// records the new span for future parent lookups. Failures are silent — +// correlation is best-effort and must never block the hook. +// +// hookEvent is the tool-native event name (Claude Code uses PreToolUse, +// PostToolUse, etc.). nativeEvent is the parsed payload; sessionID has +// already been pulled out by the caller. +func buildCorrelation(tool, hookEvent, sessionID string, nativeEvent map[string]interface{}) *envelope.Correlation { + store := correlation.NewStore(client.ConfigDir()) + store.MaybeGC() + + spanID := correlation.NewSpanID() + + // No session ID: emit an orphan trace for this event only. + if sessionID == "" { + return &envelope.Correlation{ + TraceID: correlation.NewTraceID(), + SpanID: spanID, + } + } + + rec, err := store.LoadOrCreateTrace(sessionID) + if err != nil || rec == nil { + return &envelope.Correlation{ + TraceID: correlation.NewTraceID(), + SpanID: spanID, + } + } + + parentSpanID := lookupParentSpan(store, tool, hookEvent, sessionID, nativeEvent) + recordSpan(store, tool, hookEvent, sessionID, spanID, nativeEvent) + + return &envelope.Correlation{ + TraceID: rec.TraceID, + SpanID: spanID, + ParentSpanID: parentSpanID, + } +} + +// lookupParentSpan resolves parent_span_id for known event chains. +// Returns "" if no parent applies or the lookup misses. +func lookupParentSpan(store *correlation.Store, tool, hookEvent, sessionID string, e map[string]interface{}) string { + switch tool { + case "claude-code": + return lookupParentClaudeCode(store, hookEvent, sessionID, e) + } + return "" +} + +func lookupParentClaudeCode(store *correlation.Store, hookEvent, sessionID string, e map[string]interface{}) string { + switch hookEvent { + case "PostToolUse", "PostToolUseFailure": + if id := stringField(e, "tool_use_id"); id != "" { + return store.LookupParent(sessionID, correlation.SpanKindToolUse, id) + } + case "SubagentStop": + if id := firstStringField(e, "subagent_id", "agent_id"); id != "" { + return store.LookupParent(sessionID, correlation.SpanKindSubagent, id) + } + case "TaskCompleted": + if id := stringField(e, "task_id"); id != "" { + return store.LookupParent(sessionID, correlation.SpanKindTask, id) + } + case "ElicitationResult": + if id := stringField(e, "elicitation_id"); id != "" { + return store.LookupParent(sessionID, correlation.SpanKindElicitation, id) + } + case "PostCompact": + return store.LookupParent(sessionID, correlation.SpanKindContextCompact, sessionID) + case "Stop", "StopFailure": + // Agent response: parent is the originating user prompt. + return store.LookupLastPromptSubmit(sessionID) + case "SessionEnd": + return store.LookupRootSpan(sessionID) + } + return "" +} + +// recordSpan persists span IDs that may become future parents. +func recordSpan(store *correlation.Store, tool, hookEvent, sessionID, spanID string, e map[string]interface{}) { + switch tool { + case "claude-code": + recordSpanClaudeCode(store, hookEvent, sessionID, spanID, e) + } +} + +func recordSpanClaudeCode(store *correlation.Store, hookEvent, sessionID, spanID string, e map[string]interface{}) { + switch hookEvent { + case "SessionStart": + _ = store.RecordRootSpan(sessionID, spanID) + case "UserPromptSubmit": + _ = store.RecordLastPromptSubmit(sessionID, spanID) + case "PreToolUse": + if id := stringField(e, "tool_use_id"); id != "" { + _ = store.RecordSpan(sessionID, correlation.SpanKindToolUse, id, spanID) + } + case "SubagentStart": + if id := firstStringField(e, "subagent_id", "agent_id"); id != "" { + _ = store.RecordSpan(sessionID, correlation.SpanKindSubagent, id, spanID) + } + case "TaskCreated": + if id := stringField(e, "task_id"); id != "" { + _ = store.RecordSpan(sessionID, correlation.SpanKindTask, id, spanID) + } + case "Elicitation": + if id := stringField(e, "elicitation_id"); id != "" { + _ = store.RecordSpan(sessionID, correlation.SpanKindElicitation, id, spanID) + } + case "PreCompact": + _ = store.RecordSpan(sessionID, correlation.SpanKindContextCompact, sessionID, spanID) + } +} + +func stringField(e map[string]interface{}, key string) string { + if v, ok := e[key].(string); ok { + return v + } + return "" +} + +func firstStringField(e map[string]interface{}, keys ...string) string { + for _, k := range keys { + if v := stringField(e, k); v != "" { + return v + } + } + return "" +} diff --git a/cmd/debug.go b/cmd/debug.go new file mode 100644 index 0000000..06ccc83 --- /dev/null +++ b/cmd/debug.go @@ -0,0 +1,77 @@ +package cmd + +import ( + "fmt" + "os" + + "github.com/promptconduit/cli/internal/client" + "github.com/promptconduit/cli/internal/correlation" + "github.com/spf13/cobra" +) + +var debugCmd = &cobra.Command{ + Use: "debug", + Short: "Debugging utilities", +} + +var debugTraceCmd = &cobra.Command{ + Use: "trace ", + Short: "Print the correlation trace tree for a session", + Long: `Print the locally-stored trace ID and recorded parent spans for a session. + +Useful for support and self-debugging when correlation IDs look wrong. +Reads from ~/.config/promptconduit/traces/.{json,spans.json}.`, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + sessionID := args[0] + store := correlation.NewStore(client.ConfigDir()) + + rec, err := store.LoadTrace(sessionID) + if err != nil { + if os.IsNotExist(err) { + return fmt.Errorf("no trace record for session %s", sessionID) + } + return fmt.Errorf("read trace: %w", err) + } + + cmd.Printf("Session: %s\n", rec.SessionID) + cmd.Printf("Trace ID: %s\n", rec.TraceID) + cmd.Printf("Created: %s\n", rec.CreatedAt.Format("2006-01-02T15:04:05Z07:00")) + cmd.Printf("Last seen: %s\n", rec.LastSeenAt.Format("2006-01-02T15:04:05Z07:00")) + + spans, err := store.LoadSpans(sessionID) + if err != nil { + return fmt.Errorf("read spans: %w", err) + } + + cmd.Println() + cmd.Println("Recorded parent spans:") + if spans.RootSpan != "" { + cmd.Printf(" root_span: %s\n", spans.RootSpan) + } + if spans.LastPromptSubmit != "" { + cmd.Printf(" last_prompt_submit: %s\n", spans.LastPromptSubmit) + } + printSpanMap(cmd, "tool_uses", spans.ToolUses) + printSpanMap(cmd, "subagents", spans.Subagents) + printSpanMap(cmd, "tasks", spans.Tasks) + printSpanMap(cmd, "elicitations", spans.Elicitations) + printSpanMap(cmd, "context_compacts", spans.ContextCompacts) + + return nil + }, +} + +func printSpanMap(cmd *cobra.Command, label string, m map[string]string) { + if len(m) == 0 { + return + } + cmd.Printf(" %s:\n", label) + for k, v := range m { + cmd.Printf(" %s -> %s\n", k, v) + } +} + +func init() { + debugCmd.AddCommand(debugTraceCmd) +} diff --git a/cmd/hook.go b/cmd/hook.go index ad43f30..82a05e6 100644 --- a/cmd/hook.go +++ b/cmd/hook.go @@ -91,15 +91,23 @@ func processHookEvent() error { // Detect tool (simple heuristics) tool := detectTool(nativeEvent) hookEvent := getHookEventName(nativeEvent) + sessionID := getSessionID(nativeEvent) fileLog("Detected tool: %s, hook event: %s", tool, hookEvent) + // Build correlation IDs (W3C-compatible trace_id/span_id). + // Stable across hook fires within a session; best-effort, never blocks. + corr := buildCorrelation(tool, hookEvent, sessionID, nativeEvent) + if corr != nil { + fileLog("correlation: trace=%s span=%s parent=%s", corr.TraceID, corr.SpanID, corr.ParentSpanID) + } + // Extract git context from working directory var gitCtx *envelope.GitContext cwd := getWorkingDirectory(nativeEvent) // Write to local events file for macOS app - writeLocalEvent(hookEvent, cwd, getSessionID(nativeEvent)) + writeLocalEvent(hookEvent, cwd, sessionID) // Trigger auto-sync on SessionEnd or Stop events // SessionEnd: Fires when user explicitly ends session (rare - users often just close terminal) @@ -107,7 +115,6 @@ func processHookEvent() error { // The sync logic deduplicates via hash checking, so frequent triggers are safe // NOTE: Called directly (not in goroutine) since it spawns a subprocess and returns quickly if hookEvent == "SessionEnd" || hookEvent == "Stop" { - sessionID := getSessionID(nativeEvent) if sessionID != "" { triggerAutoSync(sessionID) } @@ -158,6 +165,7 @@ func processHookEvent() error { // Create envelope with attachment metadata env := envelope.NewWithAttachments(Version, tool, hookEvent, rawInput, gitCtx, envAttachments) + env.Correlation = corr // Send via multipart with binary attachments if err := apiClient.SendEnvelopeWithAttachmentsAsync(env, attachmentData); err != nil { @@ -173,6 +181,7 @@ func processHookEvent() error { // Create envelope with raw payload (no attachments case, or non-UserPromptSubmit events) env := envelope.New(Version, tool, hookEvent, rawInput, gitCtx) + env.Correlation = corr fileLog("Created envelope: tool=%s, event=%s", tool, hookEvent) diff --git a/cmd/root.go b/cmd/root.go index 72df9a5..ec79b73 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -40,6 +40,7 @@ func init() { rootCmd.AddCommand(configCmd) rootCmd.AddCommand(insightsCmd) rootCmd.AddCommand(skillsCmd) + rootCmd.AddCommand(debugCmd) } var versionCmd = &cobra.Command{ diff --git a/internal/correlation/id.go b/internal/correlation/id.go new file mode 100644 index 0000000..e789874 --- /dev/null +++ b/internal/correlation/id.go @@ -0,0 +1,73 @@ +// Package correlation generates and persists W3C Trace Context-compatible +// correlation IDs (trace_id, span_id, parent_span_id) so events emitted by +// the CLI can be stitched into a single trace server-side. +// +// The package is intentionally light: it does not depend on the OpenTelemetry +// SDK and does not speak OTLP. It only produces IDs in the format an OTEL +// exporter would later need. +package correlation + +import ( + "crypto/rand" + "encoding/hex" + "regexp" +) + +const ( + // TraceIDLen is the W3C trace ID length in bytes (16 bytes / 32 hex chars). + TraceIDLen = 16 + // SpanIDLen is the W3C span ID length in bytes (8 bytes / 16 hex chars). + SpanIDLen = 8 + + zeroTraceID = "00000000000000000000000000000000" + zeroSpanID = "0000000000000000" +) + +var ( + traceIDPattern = regexp.MustCompile(`^[0-9a-f]{32}$`) + spanIDPattern = regexp.MustCompile(`^[0-9a-f]{16}$`) +) + +// NewTraceID returns a fresh 16-byte trace ID encoded as 32 lowercase hex chars. +// The all-zero value is rejected per the W3C spec; on the astronomically +// unlikely chance crypto/rand produces it, a single retry is attempted. +func NewTraceID() string { + for i := 0; i < 2; i++ { + var b [TraceIDLen]byte + if _, err := rand.Read(b[:]); err != nil { + continue + } + id := hex.EncodeToString(b[:]) + if id != zeroTraceID { + return id + } + } + // Fallback: flip a bit so we never return the invalid all-zero value. + return "00000000000000000000000000000001" +} + +// NewSpanID returns a fresh 8-byte span ID encoded as 16 lowercase hex chars. +// The all-zero value is rejected per the W3C spec. +func NewSpanID() string { + for i := 0; i < 2; i++ { + var b [SpanIDLen]byte + if _, err := rand.Read(b[:]); err != nil { + continue + } + id := hex.EncodeToString(b[:]) + if id != zeroSpanID { + return id + } + } + return "0000000000000001" +} + +// IsValidTraceID reports whether s is a non-zero 32-char lowercase hex string. +func IsValidTraceID(s string) bool { + return s != zeroTraceID && traceIDPattern.MatchString(s) +} + +// IsValidSpanID reports whether s is a non-zero 16-char lowercase hex string. +func IsValidSpanID(s string) bool { + return s != zeroSpanID && spanIDPattern.MatchString(s) +} diff --git a/internal/correlation/id_test.go b/internal/correlation/id_test.go new file mode 100644 index 0000000..d3146dd --- /dev/null +++ b/internal/correlation/id_test.go @@ -0,0 +1,90 @@ +package correlation + +import ( + "testing" +) + +func TestNewTraceID_Format(t *testing.T) { + id := NewTraceID() + if len(id) != 32 { + t.Fatalf("trace id length = %d, want 32", len(id)) + } + if !IsValidTraceID(id) { + t.Fatalf("trace id %q failed validation", id) + } +} + +func TestNewSpanID_Format(t *testing.T) { + id := NewSpanID() + if len(id) != 16 { + t.Fatalf("span id length = %d, want 16", len(id)) + } + if !IsValidSpanID(id) { + t.Fatalf("span id %q failed validation", id) + } +} + +func TestNewTraceID_Uniqueness(t *testing.T) { + const n = 100000 + seen := make(map[string]struct{}, n) + for i := 0; i < n; i++ { + id := NewTraceID() + if _, dup := seen[id]; dup { + t.Fatalf("duplicate trace id at iteration %d: %s", i, id) + } + seen[id] = struct{}{} + } +} + +func TestNewSpanID_Uniqueness(t *testing.T) { + const n = 100000 + seen := make(map[string]struct{}, n) + for i := 0; i < n; i++ { + id := NewSpanID() + if _, dup := seen[id]; dup { + t.Fatalf("duplicate span id at iteration %d: %s", i, id) + } + seen[id] = struct{}{} + } +} + +func TestIsValidTraceID(t *testing.T) { + cases := []struct { + in string + want bool + }{ + {"4bf92f3577b34da6a3ce929d0e0e4736", true}, + {"00000000000000000000000000000000", false}, // all-zero + {"4BF92F3577B34DA6A3CE929D0E0E4736", false}, // uppercase + {"4bf92f3577b34da6a3ce929d0e0e473", false}, // 31 chars + {"4bf92f3577b34da6a3ce929d0e0e47366", false}, // 33 chars + {"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", false}, // non-hex + {"", false}, + } + for _, tc := range cases { + got := IsValidTraceID(tc.in) + if got != tc.want { + t.Errorf("IsValidTraceID(%q) = %v, want %v", tc.in, got, tc.want) + } + } +} + +func TestIsValidSpanID(t *testing.T) { + cases := []struct { + in string + want bool + }{ + {"00f067aa0ba902b7", true}, + {"0000000000000000", false}, + {"00F067AA0BA902B7", false}, + {"00f067aa0ba902b", false}, + {"00f067aa0ba902b77", false}, + {"", false}, + } + for _, tc := range cases { + got := IsValidSpanID(tc.in) + if got != tc.want { + t.Errorf("IsValidSpanID(%q) = %v, want %v", tc.in, got, tc.want) + } + } +} diff --git a/internal/correlation/store.go b/internal/correlation/store.go new file mode 100644 index 0000000..0c68462 --- /dev/null +++ b/internal/correlation/store.go @@ -0,0 +1,346 @@ +package correlation + +import ( + "crypto/rand" + "encoding/binary" + "encoding/json" + "errors" + "fmt" + "os" + "path/filepath" + "time" +) + +// SpanKind names the lookup table inside a session's spans file. +type SpanKind string + +const ( + SpanKindToolUse SpanKind = "tool_uses" + SpanKindSubagent SpanKind = "subagents" + SpanKindTask SpanKind = "tasks" + SpanKindElicitation SpanKind = "elicitations" + SpanKindContextCompact SpanKind = "context_compacts" +) + +const ( + // LastPromptSubmitKey is the well-known key for the most recent + // prompt_submit span in a session. Stored alongside the per-chain tables. + LastPromptSubmitKey = "last_prompt_submit" + // RootSpanKey is the well-known key for the session_start span ID, + // used as the parent for session_end. + RootSpanKey = "root_span" + + tracesSubdir = "traces" + gcMaxAge = 30 * 24 * time.Hour + // gcProbability is the chance per hook fire that GC runs. 1/100 ≈ "~1%" + // in the PRD; we use 256 as a power of two for a cheap bitmask. + gcProbabilityDenominator = 100 +) + +// TraceRecord is persisted per session. +type TraceRecord struct { + SessionID string `json:"session_id"` + TraceID string `json:"trace_id"` + CreatedAt time.Time `json:"created_at"` + LastSeenAt time.Time `json:"last_seen_at"` +} + +// SpansRecord is persisted per session: keyed parent span lookup tables. +type SpansRecord struct { + ToolUses map[string]string `json:"tool_uses,omitempty"` + Subagents map[string]string `json:"subagents,omitempty"` + Tasks map[string]string `json:"tasks,omitempty"` + Elicitations map[string]string `json:"elicitations,omitempty"` + ContextCompacts map[string]string `json:"context_compacts,omitempty"` + LastPromptSubmit string `json:"last_prompt_submit,omitempty"` + RootSpan string `json:"root_span,omitempty"` +} + +// Store persists trace and span lookup state under baseDir. +type Store struct { + baseDir string +} + +// NewStore returns a Store rooted at baseDir (typically the +// promptconduit config dir). Directories are created on demand. +func NewStore(baseDir string) *Store { + return &Store{baseDir: baseDir} +} + +func (s *Store) tracesDir() string { return filepath.Join(s.baseDir, tracesSubdir) } + +func (s *Store) traceFile(sessionID string) string { + return filepath.Join(s.tracesDir(), sessionID+".json") +} + +func (s *Store) spansFile(sessionID string) string { + return filepath.Join(s.tracesDir(), sessionID+".spans.json") +} + +// LoadOrCreateTrace returns the trace ID for sessionID, creating and +// persisting a new one if none exists. The last_seen_at timestamp is +// refreshed on every call. +// +// Concurrent hook processes are reconciled via O_CREATE|O_EXCL: only the +// first writer wins; subsequent callers re-read the existing file. +func (s *Store) LoadOrCreateTrace(sessionID string) (*TraceRecord, error) { + if sessionID == "" { + return nil, errors.New("correlation: empty session id") + } + if err := os.MkdirAll(s.tracesDir(), 0700); err != nil { + return nil, fmt.Errorf("correlation: mkdir traces: %w", err) + } + + path := s.traceFile(sessionID) + + // Fast path: file already exists. + if rec, err := readTrace(path); err == nil { + rec.LastSeenAt = time.Now().UTC() + _ = writeTraceAtomic(path, rec) // refresh; ignore errors (non-fatal) + return rec, nil + } + + // Slow path: try to create exclusively. + rec := &TraceRecord{ + SessionID: sessionID, + TraceID: NewTraceID(), + CreatedAt: time.Now().UTC(), + LastSeenAt: time.Now().UTC(), + } + + data, err := json.Marshal(rec) + if err != nil { + return nil, fmt.Errorf("correlation: marshal trace: %w", err) + } + + f, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0600) + if err != nil { + if os.IsExist(err) { + // Lost the race; another hook wrote it first. + if existing, rerr := readTrace(path); rerr == nil { + return existing, nil + } + } + return nil, fmt.Errorf("correlation: create trace: %w", err) + } + defer f.Close() + if _, err := f.Write(data); err != nil { + return nil, fmt.Errorf("correlation: write trace: %w", err) + } + return rec, nil +} + +// RecordSpan stores a parent span ID under the given kind+key for sessionID. +// Returns nil on success. Errors are non-fatal at the call site. +func (s *Store) RecordSpan(sessionID string, kind SpanKind, key, spanID string) error { + if sessionID == "" || spanID == "" { + return errors.New("correlation: empty session or span id") + } + rec, err := s.loadSpans(sessionID) + if err != nil { + return err + } + switch kind { + case SpanKindToolUse: + ensureMap(&rec.ToolUses)[key] = spanID + case SpanKindSubagent: + ensureMap(&rec.Subagents)[key] = spanID + case SpanKindTask: + ensureMap(&rec.Tasks)[key] = spanID + case SpanKindElicitation: + ensureMap(&rec.Elicitations)[key] = spanID + case SpanKindContextCompact: + ensureMap(&rec.ContextCompacts)[key] = spanID + default: + return fmt.Errorf("correlation: unknown span kind %q", kind) + } + return s.writeSpans(sessionID, rec) +} + +// RecordLastPromptSubmit stores the most recent prompt_submit span ID. +func (s *Store) RecordLastPromptSubmit(sessionID, spanID string) error { + if sessionID == "" || spanID == "" { + return errors.New("correlation: empty session or span id") + } + rec, err := s.loadSpans(sessionID) + if err != nil { + return err + } + rec.LastPromptSubmit = spanID + return s.writeSpans(sessionID, rec) +} + +// RecordRootSpan stores the session_start span ID. +func (s *Store) RecordRootSpan(sessionID, spanID string) error { + if sessionID == "" || spanID == "" { + return errors.New("correlation: empty session or span id") + } + rec, err := s.loadSpans(sessionID) + if err != nil { + return err + } + rec.RootSpan = spanID + return s.writeSpans(sessionID, rec) +} + +// LookupParent returns the parent span ID for kind+key, or "" if absent. +func (s *Store) LookupParent(sessionID string, kind SpanKind, key string) string { + rec, err := s.loadSpans(sessionID) + if err != nil { + return "" + } + switch kind { + case SpanKindToolUse: + return rec.ToolUses[key] + case SpanKindSubagent: + return rec.Subagents[key] + case SpanKindTask: + return rec.Tasks[key] + case SpanKindElicitation: + return rec.Elicitations[key] + case SpanKindContextCompact: + return rec.ContextCompacts[key] + } + return "" +} + +// LookupLastPromptSubmit returns the most recent prompt_submit span ID, or "". +func (s *Store) LookupLastPromptSubmit(sessionID string) string { + rec, err := s.loadSpans(sessionID) + if err != nil { + return "" + } + return rec.LastPromptSubmit +} + +// LookupRootSpan returns the session_start span ID, or "". +func (s *Store) LookupRootSpan(sessionID string) string { + rec, err := s.loadSpans(sessionID) + if err != nil { + return "" + } + return rec.RootSpan +} + +// LoadSpans returns the spans record for sessionID (or a fresh one). +// Exposed for the debug command. +func (s *Store) LoadSpans(sessionID string) (*SpansRecord, error) { + return s.loadSpans(sessionID) +} + +// LoadTrace returns the trace record for sessionID without updating it. +// Returns os.ErrNotExist if there is no record. +func (s *Store) LoadTrace(sessionID string) (*TraceRecord, error) { + return readTrace(s.traceFile(sessionID)) +} + +// MaybeGC opportunistically deletes trace files older than gcMaxAge. +// Runs roughly once per gcProbabilityDenominator invocations; never blocks. +func (s *Store) MaybeGC() { + var b [4]byte + if _, err := rand.Read(b[:]); err != nil { + return + } + if binary.BigEndian.Uint32(b[:])%gcProbabilityDenominator != 0 { + return + } + s.gc() +} + +func (s *Store) gc() { + entries, err := os.ReadDir(s.tracesDir()) + if err != nil { + return + } + cutoff := time.Now().Add(-gcMaxAge) + for _, e := range entries { + if e.IsDir() { + continue + } + info, err := e.Info() + if err != nil { + continue + } + // Use mtime as a cheap proxy for last_seen_at — refreshed on every + // LoadOrCreateTrace via atomic rewrite. + if info.ModTime().Before(cutoff) { + _ = os.Remove(filepath.Join(s.tracesDir(), e.Name())) + } + } +} + +func (s *Store) loadSpans(sessionID string) (*SpansRecord, error) { + if sessionID == "" { + return nil, errors.New("correlation: empty session id") + } + path := s.spansFile(sessionID) + data, err := os.ReadFile(path) + if err != nil { + if os.IsNotExist(err) { + return &SpansRecord{}, nil + } + return nil, err + } + var rec SpansRecord + if err := json.Unmarshal(data, &rec); err != nil { + // Corrupt file: treat as empty rather than crashing the hook. + return &SpansRecord{}, nil + } + return &rec, nil +} + +func (s *Store) writeSpans(sessionID string, rec *SpansRecord) error { + if err := os.MkdirAll(s.tracesDir(), 0700); err != nil { + return err + } + return writeJSONAtomic(s.spansFile(sessionID), rec) +} + +func readTrace(path string) (*TraceRecord, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, err + } + var rec TraceRecord + if err := json.Unmarshal(data, &rec); err != nil { + return nil, err + } + if !IsValidTraceID(rec.TraceID) { + return nil, fmt.Errorf("correlation: invalid trace_id in %s", path) + } + return &rec, nil +} + +func writeTraceAtomic(path string, rec *TraceRecord) error { + return writeJSONAtomic(path, rec) +} + +func writeJSONAtomic(path string, v interface{}) error { + data, err := json.Marshal(v) + if err != nil { + return err + } + dir := filepath.Dir(path) + tmp, err := os.CreateTemp(dir, ".tmp-*") + if err != nil { + return err + } + tmpName := tmp.Name() + if _, err := tmp.Write(data); err != nil { + tmp.Close() + os.Remove(tmpName) + return err + } + if err := tmp.Close(); err != nil { + os.Remove(tmpName) + return err + } + return os.Rename(tmpName, path) +} + +func ensureMap(m *map[string]string) map[string]string { + if *m == nil { + *m = make(map[string]string) + } + return *m +} diff --git a/internal/correlation/store_test.go b/internal/correlation/store_test.go new file mode 100644 index 0000000..28af335 --- /dev/null +++ b/internal/correlation/store_test.go @@ -0,0 +1,170 @@ +package correlation + +import ( + "os" + "path/filepath" + "sync" + "testing" +) + +func TestLoadOrCreateTrace_NewSession(t *testing.T) { + dir := t.TempDir() + s := NewStore(dir) + + rec, err := s.LoadOrCreateTrace("sess-1") + if err != nil { + t.Fatalf("LoadOrCreateTrace: %v", err) + } + if rec.SessionID != "sess-1" { + t.Errorf("session id = %q", rec.SessionID) + } + if !IsValidTraceID(rec.TraceID) { + t.Errorf("invalid trace id %q", rec.TraceID) + } + + // File exists on disk. + if _, err := os.Stat(filepath.Join(dir, "traces", "sess-1.json")); err != nil { + t.Errorf("trace file missing: %v", err) + } +} + +func TestLoadOrCreateTrace_Reuses(t *testing.T) { + dir := t.TempDir() + s := NewStore(dir) + + a, err := s.LoadOrCreateTrace("sess-1") + if err != nil { + t.Fatal(err) + } + b, err := s.LoadOrCreateTrace("sess-1") + if err != nil { + t.Fatal(err) + } + if a.TraceID != b.TraceID { + t.Errorf("trace ids differ: %s vs %s", a.TraceID, b.TraceID) + } +} + +func TestLoadOrCreateTrace_Concurrent(t *testing.T) { + dir := t.TempDir() + s := NewStore(dir) + + const n = 50 + results := make([]string, n) + var wg sync.WaitGroup + wg.Add(n) + for i := 0; i < n; i++ { + i := i + go func() { + defer wg.Done() + rec, err := s.LoadOrCreateTrace("race-session") + if err != nil { + t.Errorf("LoadOrCreateTrace: %v", err) + return + } + results[i] = rec.TraceID + }() + } + wg.Wait() + + first := results[0] + for i, id := range results { + if id != first { + t.Errorf("goroutine %d got trace_id %s, want %s", i, id, first) + } + } +} + +func TestLoadOrCreateTrace_EmptySession(t *testing.T) { + s := NewStore(t.TempDir()) + if _, err := s.LoadOrCreateTrace(""); err == nil { + t.Fatal("expected error for empty session id") + } +} + +func TestRecordAndLookupSpan(t *testing.T) { + s := NewStore(t.TempDir()) + + if err := s.RecordSpan("s1", SpanKindToolUse, "toolu_abc", "00f067aa0ba902b7"); err != nil { + t.Fatalf("RecordSpan: %v", err) + } + got := s.LookupParent("s1", SpanKindToolUse, "toolu_abc") + if got != "00f067aa0ba902b7" { + t.Errorf("LookupParent = %q, want 00f067aa0ba902b7", got) + } + + // Missing key returns empty. + if got := s.LookupParent("s1", SpanKindToolUse, "missing"); got != "" { + t.Errorf("missing key lookup = %q", got) + } +} + +func TestLastPromptSubmit(t *testing.T) { + s := NewStore(t.TempDir()) + + if err := s.RecordLastPromptSubmit("s1", "aabbccddeeff0011"); err != nil { + t.Fatalf("RecordLastPromptSubmit: %v", err) + } + if got := s.LookupLastPromptSubmit("s1"); got != "aabbccddeeff0011" { + t.Errorf("LookupLastPromptSubmit = %q", got) + } + + // Overwrite. + if err := s.RecordLastPromptSubmit("s1", "1122334455667788"); err != nil { + t.Fatalf("RecordLastPromptSubmit: %v", err) + } + if got := s.LookupLastPromptSubmit("s1"); got != "1122334455667788" { + t.Errorf("after overwrite = %q", got) + } +} + +func TestRootSpan(t *testing.T) { + s := NewStore(t.TempDir()) + + if err := s.RecordRootSpan("s1", "1111111111111111"); err != nil { + t.Fatalf("RecordRootSpan: %v", err) + } + if got := s.LookupRootSpan("s1"); got != "1111111111111111" { + t.Errorf("LookupRootSpan = %q", got) + } +} + +func TestLoadSpans_CorruptFile(t *testing.T) { + dir := t.TempDir() + s := NewStore(dir) + + // Pre-create a corrupt spans file. + if err := os.MkdirAll(filepath.Join(dir, "traces"), 0700); err != nil { + t.Fatal(err) + } + corrupt := filepath.Join(dir, "traces", "s1.spans.json") + if err := os.WriteFile(corrupt, []byte("not json {"), 0600); err != nil { + t.Fatal(err) + } + + // Should treat as empty, not crash. + rec, err := s.LoadSpans("s1") + if err != nil { + t.Fatalf("LoadSpans: %v", err) + } + if rec == nil { + t.Fatal("nil record") + } + if len(rec.ToolUses) != 0 { + t.Errorf("expected empty ToolUses, got %v", rec.ToolUses) + } +} + +func TestLookupParent_UnknownKind(t *testing.T) { + s := NewStore(t.TempDir()) + if got := s.LookupParent("s1", SpanKind("nonsense"), "key"); got != "" { + t.Errorf("unknown kind lookup = %q", got) + } +} + +func TestRecordSpan_UnknownKind(t *testing.T) { + s := NewStore(t.TempDir()) + if err := s.RecordSpan("s1", SpanKind("nonsense"), "k", "00f067aa0ba902b7"); err == nil { + t.Error("expected error for unknown kind") + } +} diff --git a/internal/envelope/envelope.go b/internal/envelope/envelope.go index 38957d7..d61eb2c 100644 --- a/internal/envelope/envelope.go +++ b/internal/envelope/envelope.go @@ -9,7 +9,7 @@ import ( // The platform handles all transformation to canonical format. type RawEventEnvelope struct { // Envelope metadata - EnvelopeVersion string `json:"envelope_version"` // Currently "1.0" + EnvelopeVersion string `json:"envelope_version"` // Currently "1.1" CliVersion string `json:"cli_version"` // CLI semver // Tool identification @@ -27,6 +27,23 @@ type RawEventEnvelope struct { // Attachment metadata (binary data sent separately in multipart) Attachments []AttachmentMetadata `json:"attachments,omitempty"` + + // W3C Trace Context-compatible correlation IDs. + // Optional: older CLIs won't emit it; servers should treat absence as + // "no correlation, fall back to existing heuristics". + Correlation *Correlation `json:"correlation,omitempty"` +} + +// Correlation carries W3C Trace Context-compatible IDs so events can be +// stitched into a single trace. Generated locally; not OTEL-SDK backed. +type Correlation struct { + // TraceID is 32 lowercase hex chars (16 bytes), stable per session. + TraceID string `json:"trace_id"` + // SpanID is 16 lowercase hex chars (8 bytes), unique per event. + SpanID string `json:"span_id"` + // ParentSpanID is 16 lowercase hex chars when this event has a known + // parent in a defined event-chain (tool_post → tool_pre, etc.). + ParentSpanID string `json:"parent_span_id,omitempty"` } // AttachmentMetadata describes an attachment sent with the envelope. @@ -62,7 +79,7 @@ type GitContext struct { // New creates a new RawEventEnvelope func New(cliVersion, tool, hookEvent string, nativePayload []byte, git *GitContext) *RawEventEnvelope { return &RawEventEnvelope{ - EnvelopeVersion: "1.0", + EnvelopeVersion: "1.1", CliVersion: cliVersion, Tool: tool, HookEvent: hookEvent, @@ -75,7 +92,7 @@ func New(cliVersion, tool, hookEvent string, nativePayload []byte, git *GitConte // NewWithAttachments creates a new RawEventEnvelope with attachment metadata func NewWithAttachments(cliVersion, tool, hookEvent string, nativePayload []byte, git *GitContext, attachments []AttachmentMetadata) *RawEventEnvelope { return &RawEventEnvelope{ - EnvelopeVersion: "1.0", + EnvelopeVersion: "1.1", CliVersion: cliVersion, Tool: tool, HookEvent: hookEvent, From f921fe754d3955cfe0276626ce0588675e6370b4 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 10 May 2026 20:38:06 +0000 Subject: [PATCH 2/5] fix(correlation): close race in LoadOrCreateTrace concurrent creation O_CREATE|O_EXCL leaves a window where another process can see the file exists but hasn't been written yet, so the read-after-EEXIST path returned malformed JSON under load. Switch to write-tempfile + os.Link: the target only becomes visible once it's fully populated, and Link fails with EEXIST atomically when another writer won the race. Verified with 20 -race iterations of TestLoadOrCreateTrace_Concurrent. https://claude.ai/code/session_01XjfDVBhyo2F4NwTSvGA9aa --- internal/correlation/store.go | 40 +++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/internal/correlation/store.go b/internal/correlation/store.go index 0c68462..2e71e93 100644 --- a/internal/correlation/store.go +++ b/internal/correlation/store.go @@ -100,32 +100,46 @@ func (s *Store) LoadOrCreateTrace(sessionID string) (*TraceRecord, error) { return rec, nil } - // Slow path: try to create exclusively. + // Slow path: write to a tempfile, then atomically link to the target. + // os.Link fails with EEXIST when the target already exists, which is the + // only race-safe primitive for "create-only" — unlike Rename, it never + // overwrites, and unlike O_CREATE|O_EXCL + sequential write, the file is + // fully populated at the moment any concurrent reader can see it. rec := &TraceRecord{ SessionID: sessionID, TraceID: NewTraceID(), CreatedAt: time.Now().UTC(), LastSeenAt: time.Now().UTC(), } - data, err := json.Marshal(rec) if err != nil { return nil, fmt.Errorf("correlation: marshal trace: %w", err) } - f, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0600) + tmp, err := os.CreateTemp(s.tracesDir(), ".tmp-*") if err != nil { - if os.IsExist(err) { - // Lost the race; another hook wrote it first. - if existing, rerr := readTrace(path); rerr == nil { - return existing, nil - } - } - return nil, fmt.Errorf("correlation: create trace: %w", err) + return nil, fmt.Errorf("correlation: tempfile: %w", err) + } + tmpName := tmp.Name() + defer os.Remove(tmpName) + if _, err := tmp.Write(data); err != nil { + tmp.Close() + return nil, fmt.Errorf("correlation: write tempfile: %w", err) } - defer f.Close() - if _, err := f.Write(data); err != nil { - return nil, fmt.Errorf("correlation: write trace: %w", err) + if err := tmp.Close(); err != nil { + return nil, fmt.Errorf("correlation: close tempfile: %w", err) + } + + if err := os.Link(tmpName, path); err != nil { + if !os.IsExist(err) { + return nil, fmt.Errorf("correlation: link trace: %w", err) + } + // Lost the race; another process won. Read theirs. + existing, rerr := readTrace(path) + if rerr != nil { + return nil, fmt.Errorf("correlation: read after race: %w", rerr) + } + return existing, nil } return rec, nil } From 3301c2fe22e5163b64f891ecb62b4fef0812089c Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 10 May 2026 21:02:23 +0000 Subject: [PATCH 3/5] docs(correlation): note that hook payloads don't carry tool_use_id Per Claude Code hook docs, PreToolUse/PostToolUse and the elicitation/ task events only carry session_id + transcript_path; rich identifiers (tool_use_id, task_id, elicitation_id) live in the transcript JSONL. The chain-keying remains correct for synthetic input and future Claude Code versions, and SubagentStart/Stop still resolves via agent_id. https://claude.ai/code/session_01XjfDVBhyo2F4NwTSvGA9aa --- cmd/correlation.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cmd/correlation.go b/cmd/correlation.go index efb3e36..c279ebf 100644 --- a/cmd/correlation.go +++ b/cmd/correlation.go @@ -47,6 +47,14 @@ func buildCorrelation(tool, hookEvent, sessionID string, nativeEvent map[string] // lookupParentSpan resolves parent_span_id for known event chains. // Returns "" if no parent applies or the lookup misses. +// +// Note: Claude Code's hook payloads do NOT currently carry tool_use_id / +// task_id / elicitation_id directly — those fields only appear in the +// transcript JSONL referenced by transcript_path. The chain-keying below +// remains a no-op for those events in real traffic; server-side adapters +// can enrich linkage by parsing transcript_path. SubagentStart/Stop do +// carry agent_id and will resolve. session_id-keyed chains (PreCompact, +// SessionEnd, Stop) work everywhere. func lookupParentSpan(store *correlation.Store, tool, hookEvent, sessionID string, e map[string]interface{}) string { switch tool { case "claude-code": From 0ac81d9524fec3153cae668c2de47a70acab5ecf Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 10 May 2026 21:10:25 +0000 Subject: [PATCH 4/5] refactor: introduce enrichment envelope block (envelope_version 1.2) Move git context and correlation IDs out of top-level envelope fields and into a single `enrichment` block alongside new source/host/os/arch fields. The CLI stays a thin client: it forwards the tool's raw native_payload untouched, plus enrichment hints the server can use when normalizing. Envelope shape (1.2): { envelope_version, cli_version, tool, hook_event, captured_at, native_payload, # raw, untouched attachments, # multipart binary metadata enrichment: { # CLI-computed context git, source, correlation, host, os, arch } } Source provider is derived from the git remote URL (github / gitlab / bitbucket / azure / codeberg / sourcehut), supports both SSH and HTTPS forms; returns "" for unrecognized hosts. Server-side handles all version translation per the thin-client design. https://claude.ai/code/session_01XjfDVBhyo2F4NwTSvGA9aa --- cmd/hook.go | 35 ++++++++++++++++--- internal/envelope/envelope.go | 63 +++++++++++++++++++++++++--------- internal/git/source.go | 62 +++++++++++++++++++++++++++++++++ internal/git/source_test.go | 64 +++++++++++++++++++++++++++++++++++ 4 files changed, 204 insertions(+), 20 deletions(-) create mode 100644 internal/git/source.go create mode 100644 internal/git/source_test.go diff --git a/cmd/hook.go b/cmd/hook.go index 82a05e6..8a407d7 100644 --- a/cmd/hook.go +++ b/cmd/hook.go @@ -7,6 +7,7 @@ import ( "os" "os/exec" "path/filepath" + "runtime" "time" "github.com/google/uuid" @@ -127,6 +128,8 @@ func processHookEvent() error { } } + enr := buildEnrichment(gitCtx, corr) + apiClient := client.NewClient(cfg, Version) // For UserPromptSubmit events, check if the user's message includes attachments @@ -164,8 +167,7 @@ func processHookEvent() error { } // Create envelope with attachment metadata - env := envelope.NewWithAttachments(Version, tool, hookEvent, rawInput, gitCtx, envAttachments) - env.Correlation = corr + env := envelope.NewWithAttachments(Version, tool, hookEvent, rawInput, enr, envAttachments) // Send via multipart with binary attachments if err := apiClient.SendEnvelopeWithAttachmentsAsync(env, attachmentData); err != nil { @@ -180,8 +182,7 @@ func processHookEvent() error { } // Create envelope with raw payload (no attachments case, or non-UserPromptSubmit events) - env := envelope.New(Version, tool, hookEvent, rawInput, gitCtx) - env.Correlation = corr + env := envelope.New(Version, tool, hookEvent, rawInput, enr) fileLog("Created envelope: tool=%s, event=%s", tool, hookEvent) @@ -195,6 +196,32 @@ func processHookEvent() error { return nil } +// buildEnrichment assembles the enrichment block: CLI-computed context that +// augments the raw native payload (git, source provider, correlation IDs, +// host/os/arch). Returns nil only when there's nothing to send. +func buildEnrichment(gitCtx *envelope.GitContext, corr *envelope.Correlation) *envelope.Enrichment { + enr := &envelope.Enrichment{ + Git: gitCtx, + Correlation: corr, + Host: hostname(), + OS: runtime.GOOS, + Arch: runtime.GOARCH, + } + if gitCtx != nil { + enr.Source = git.DetectSource(gitCtx.RemoteURL) + } + return enr +} + +// hostname returns the machine hostname or "" if unavailable. +func hostname() string { + h, err := os.Hostname() + if err != nil { + return "" + } + return h +} + // detectTool identifies which AI tool generated the event func detectTool(event map[string]interface{}) string { // Check environment variable override first diff --git a/internal/envelope/envelope.go b/internal/envelope/envelope.go index d61eb2c..e590542 100644 --- a/internal/envelope/envelope.go +++ b/internal/envelope/envelope.go @@ -5,12 +5,21 @@ import ( "time" ) +// EnvelopeVersion is the current envelope schema version. The platform is +// expected to handle older versions transparently — the CLI is intentionally +// thin and the server normalizes everything. +const EnvelopeVersion = "1.2" + // RawEventEnvelope is the wrapper sent to the platform API. -// The platform handles all transformation to canonical format. +// +// The CLI is a thin client: it forwards the tool's raw event payload +// untouched (`native_payload`), plus an `enrichment` block of locally +// computed/inferred context (git, host, correlation IDs, etc.) to help the +// server normalize. All canonical-format work happens server-side. type RawEventEnvelope struct { // Envelope metadata - EnvelopeVersion string `json:"envelope_version"` // Currently "1.1" - CliVersion string `json:"cli_version"` // CLI semver + EnvelopeVersion string `json:"envelope_version"` + CliVersion string `json:"cli_version"` // Tool identification Tool string `json:"tool"` // claude-code, cursor, gemini-cli, etc. @@ -19,19 +28,40 @@ type RawEventEnvelope struct { // Timing CapturedAt string `json:"captured_at"` // ISO8601 timestamp - // Git context (extracted by CLI) - Git *GitContext `json:"git,omitempty"` - // Raw native payload (passed through untouched) NativePayload json.RawMessage `json:"native_payload"` // Attachment metadata (binary data sent separately in multipart) Attachments []AttachmentMetadata `json:"attachments,omitempty"` + // Enrichment is everything the CLI added on top of the raw payload. + // Optional: the server should treat absence as "no enrichment available" + // rather than erroring. + Enrichment *Enrichment `json:"enrichment,omitempty"` +} + +// Enrichment carries CLI-computed context that augments the raw payload. +// Add new fields here rather than at the top level so the envelope keeps a +// clean separation between metadata, raw data, and enrichment. +type Enrichment struct { + // Git context (extracted by walking up from cwd). + Git *GitContext `json:"git,omitempty"` + + // Source provider derived from the git remote URL: "github", "gitlab", + // "bitbucket", "azure", or "" when unknown / no remote. + Source string `json:"source,omitempty"` + // W3C Trace Context-compatible correlation IDs. - // Optional: older CLIs won't emit it; servers should treat absence as - // "no correlation, fall back to existing heuristics". Correlation *Correlation `json:"correlation,omitempty"` + + // Host is the machine hostname (best-effort; "" if unavailable). + Host string `json:"host,omitempty"` + + // OS is runtime.GOOS (linux, darwin, windows, ...). + OS string `json:"os,omitempty"` + + // Arch is runtime.GOARCH (amd64, arm64, ...). + Arch string `json:"arch,omitempty"` } // Correlation carries W3C Trace Context-compatible IDs so events can be @@ -76,30 +106,31 @@ type GitContext struct { IsDetachedHead bool `json:"is_detached_head,omitempty"` } -// New creates a new RawEventEnvelope -func New(cliVersion, tool, hookEvent string, nativePayload []byte, git *GitContext) *RawEventEnvelope { +// New creates a new RawEventEnvelope with the given enrichment block. +// Pass nil for enrichment if none is available. +func New(cliVersion, tool, hookEvent string, nativePayload []byte, enr *Enrichment) *RawEventEnvelope { return &RawEventEnvelope{ - EnvelopeVersion: "1.1", + EnvelopeVersion: EnvelopeVersion, CliVersion: cliVersion, Tool: tool, HookEvent: hookEvent, CapturedAt: time.Now().UTC().Format(time.RFC3339), - Git: git, NativePayload: nativePayload, + Enrichment: enr, } } -// NewWithAttachments creates a new RawEventEnvelope with attachment metadata -func NewWithAttachments(cliVersion, tool, hookEvent string, nativePayload []byte, git *GitContext, attachments []AttachmentMetadata) *RawEventEnvelope { +// NewWithAttachments creates a new RawEventEnvelope with attachment metadata. +func NewWithAttachments(cliVersion, tool, hookEvent string, nativePayload []byte, enr *Enrichment, attachments []AttachmentMetadata) *RawEventEnvelope { return &RawEventEnvelope{ - EnvelopeVersion: "1.1", + EnvelopeVersion: EnvelopeVersion, CliVersion: cliVersion, Tool: tool, HookEvent: hookEvent, CapturedAt: time.Now().UTC().Format(time.RFC3339), - Git: git, NativePayload: nativePayload, Attachments: attachments, + Enrichment: enr, } } diff --git a/internal/git/source.go b/internal/git/source.go new file mode 100644 index 0000000..aec799b --- /dev/null +++ b/internal/git/source.go @@ -0,0 +1,62 @@ +package git + +import ( + "strings" +) + +// DetectSource maps a git remote URL to a short provider name. +// Returns "" when the URL is empty or the provider is not recognized. +// +// Supports both SSH (git@github.com:org/repo.git) and HTTPS +// (https://github.com/org/repo) forms. +func DetectSource(remoteURL string) string { + if remoteURL == "" { + return "" + } + host := extractHost(remoteURL) + host = strings.ToLower(host) + + switch { + case host == "github.com" || strings.HasSuffix(host, ".github.com"): + return "github" + case host == "gitlab.com" || strings.HasSuffix(host, ".gitlab.com"): + return "gitlab" + case host == "bitbucket.org" || strings.HasSuffix(host, ".bitbucket.org"): + return "bitbucket" + case host == "dev.azure.com" || strings.HasSuffix(host, ".visualstudio.com"): + return "azure" + case host == "codeberg.org": + return "codeberg" + case host == "git.sr.ht": + return "sourcehut" + } + return "" +} + +// extractHost pulls the hostname out of common git remote URL forms. +func extractHost(remoteURL string) string { + // SSH: git@host:path or ssh://git@host/path + if strings.HasPrefix(remoteURL, "git@") { + rest := strings.TrimPrefix(remoteURL, "git@") + if idx := strings.IndexAny(rest, ":/"); idx >= 0 { + return rest[:idx] + } + return rest + } + + // scheme://[user@]host[:port]/path + if idx := strings.Index(remoteURL, "://"); idx >= 0 { + rest := remoteURL[idx+3:] + // Strip optional userinfo + if at := strings.Index(rest, "@"); at >= 0 { + rest = rest[at+1:] + } + // Up to first '/' or ':' + if end := strings.IndexAny(rest, "/:"); end >= 0 { + return rest[:end] + } + return rest + } + + return "" +} diff --git a/internal/git/source_test.go b/internal/git/source_test.go new file mode 100644 index 0000000..70b2d4a --- /dev/null +++ b/internal/git/source_test.go @@ -0,0 +1,64 @@ +package git + +import "testing" + +func TestDetectSource(t *testing.T) { + cases := []struct { + in string + want string + }{ + // GitHub + {"git@github.com:promptconduit/cli.git", "github"}, + {"https://github.com/promptconduit/cli.git", "github"}, + {"https://user:pat@github.com/foo/bar", "github"}, + {"ssh://git@github.com/foo/bar.git", "github"}, + // Enterprise GitHub subdomain + {"git@code.github.com:foo/bar.git", "github"}, + // GitLab + {"git@gitlab.com:group/proj.git", "gitlab"}, + {"https://gitlab.com/group/proj", "gitlab"}, + {"git@gitlab.example.gitlab.com:foo/bar.git", "gitlab"}, + // Bitbucket + {"git@bitbucket.org:team/repo.git", "bitbucket"}, + {"https://bitbucket.org/team/repo", "bitbucket"}, + // Azure DevOps + {"https://dev.azure.com/org/proj/_git/repo", "azure"}, + {"https://org.visualstudio.com/proj/_git/repo", "azure"}, + // Codeberg / SourceHut + {"https://codeberg.org/foo/bar", "codeberg"}, + {"git@git.sr.ht:~user/repo", "sourcehut"}, + // Unknown / empty + {"", ""}, + {"git@self-hosted.example.com:foo/bar.git", ""}, + {"https://internal.corp/foo/bar.git", ""}, + // Case insensitive + {"git@GitHub.com:foo/bar.git", "github"}, + } + + for _, tc := range cases { + got := DetectSource(tc.in) + if got != tc.want { + t.Errorf("DetectSource(%q) = %q, want %q", tc.in, got, tc.want) + } + } +} + +func TestExtractHost(t *testing.T) { + cases := []struct { + in string + want string + }{ + {"git@github.com:foo/bar.git", "github.com"}, + {"https://github.com/foo/bar", "github.com"}, + {"https://user@github.com:443/foo/bar", "github.com"}, + {"ssh://git@host.example/foo", "host.example"}, + {"", ""}, + {"not-a-url", ""}, + } + for _, tc := range cases { + got := extractHost(tc.in) + if got != tc.want { + t.Errorf("extractHost(%q) = %q, want %q", tc.in, got, tc.want) + } + } +} From 1304ad938259c194950db8f9ec6ee97e4d89e073 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 10 May 2026 21:17:31 +0000 Subject: [PATCH 5/5] feat(envelope): mirror enrichment.git/correlation to top-level fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Servers expecting the 1.0/1.1 envelope shape read git context and correlation IDs from top-level fields. To avoid a deploy-ordering risk between this CLI and the platform, restore those top-level fields and have the envelope constructor mirror them automatically from enrichment.git and enrichment.correlation. Both shapes are now on the wire for one or two releases: - Old servers continue reading top-level git / correlation - New servers can begin reading enrichment.git / enrichment.correlation Drop the legacy fields once all servers are upgraded — single mirrorLegacyFields() removal. Adds envelope_test.go covering mirroring, both-shapes-present, nil enrichment, and version pinning. https://claude.ai/code/session_01XjfDVBhyo2F4NwTSvGA9aa --- internal/envelope/envelope.go | 35 +++++++++++- internal/envelope/envelope_test.go | 91 ++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+), 2 deletions(-) create mode 100644 internal/envelope/envelope_test.go diff --git a/internal/envelope/envelope.go b/internal/envelope/envelope.go index e590542..6277075 100644 --- a/internal/envelope/envelope.go +++ b/internal/envelope/envelope.go @@ -38,6 +38,17 @@ type RawEventEnvelope struct { // Optional: the server should treat absence as "no enrichment available" // rather than erroring. Enrichment *Enrichment `json:"enrichment,omitempty"` + + // Deprecated: use Enrichment.Git. Mirrored for one or two releases so + // servers expecting the 1.0/1.1 shape continue to receive git context. + // Remove once all servers read from enrichment.git. + Git *GitContext `json:"git,omitempty"` + + // Deprecated: use Enrichment.Correlation. Mirrored for one or two + // releases so servers expecting the 1.1 shape continue to receive + // correlation IDs. Remove once all servers read from + // enrichment.correlation. + Correlation *Correlation `json:"correlation,omitempty"` } // Enrichment carries CLI-computed context that augments the raw payload. @@ -109,7 +120,7 @@ type GitContext struct { // New creates a new RawEventEnvelope with the given enrichment block. // Pass nil for enrichment if none is available. func New(cliVersion, tool, hookEvent string, nativePayload []byte, enr *Enrichment) *RawEventEnvelope { - return &RawEventEnvelope{ + env := &RawEventEnvelope{ EnvelopeVersion: EnvelopeVersion, CliVersion: cliVersion, Tool: tool, @@ -118,11 +129,13 @@ func New(cliVersion, tool, hookEvent string, nativePayload []byte, enr *Enrichme NativePayload: nativePayload, Enrichment: enr, } + mirrorLegacyFields(env) + return env } // NewWithAttachments creates a new RawEventEnvelope with attachment metadata. func NewWithAttachments(cliVersion, tool, hookEvent string, nativePayload []byte, enr *Enrichment, attachments []AttachmentMetadata) *RawEventEnvelope { - return &RawEventEnvelope{ + env := &RawEventEnvelope{ EnvelopeVersion: EnvelopeVersion, CliVersion: cliVersion, Tool: tool, @@ -132,6 +145,24 @@ func NewWithAttachments(cliVersion, tool, hookEvent string, nativePayload []byte Attachments: attachments, Enrichment: enr, } + mirrorLegacyFields(env) + return env +} + +// mirrorLegacyFields copies enrichment.git and enrichment.correlation to the +// deprecated top-level fields so older servers that read the 1.1 shape keep +// working during the transition. Remove once all servers consume from +// enrichment.* directly. +func mirrorLegacyFields(env *RawEventEnvelope) { + if env.Enrichment == nil { + return + } + if env.Enrichment.Git != nil { + env.Git = env.Enrichment.Git + } + if env.Enrichment.Correlation != nil { + env.Correlation = env.Enrichment.Correlation + } } // ToJSON serializes the envelope to JSON diff --git a/internal/envelope/envelope_test.go b/internal/envelope/envelope_test.go new file mode 100644 index 0000000..d964f24 --- /dev/null +++ b/internal/envelope/envelope_test.go @@ -0,0 +1,91 @@ +package envelope + +import ( + "encoding/json" + "testing" +) + +func TestNew_MirrorsLegacyFields(t *testing.T) { + enr := &Enrichment{ + Git: &GitContext{RepoName: "cli", Branch: "main"}, + Correlation: &Correlation{ + TraceID: "4bf92f3577b34da6a3ce929d0e0e4736", + SpanID: "00f067aa0ba902b7", + }, + } + env := New("dev", "claude-code", "SessionStart", []byte(`{}`), enr) + + if env.Git == nil { + t.Fatal("expected top-level Git to be mirrored from enrichment") + } + if env.Git.RepoName != "cli" { + t.Errorf("Git.RepoName = %q", env.Git.RepoName) + } + if env.Correlation == nil { + t.Fatal("expected top-level Correlation to be mirrored from enrichment") + } + if env.Correlation.TraceID != "4bf92f3577b34da6a3ce929d0e0e4736" { + t.Errorf("Correlation.TraceID = %q", env.Correlation.TraceID) + } +} + +func TestToJSON_BothShapesPresent(t *testing.T) { + enr := &Enrichment{ + Git: &GitContext{RepoName: "cli", Branch: "main"}, + Source: "github", + Correlation: &Correlation{TraceID: "4bf92f3577b34da6a3ce929d0e0e4736", SpanID: "00f067aa0ba902b7"}, + Host: "host", + OS: "linux", + Arch: "amd64", + } + env := New("dev", "claude-code", "SessionStart", []byte(`{}`), enr) + out, err := env.ToJSON() + if err != nil { + t.Fatal(err) + } + + var got map[string]interface{} + if err := json.Unmarshal(out, &got); err != nil { + t.Fatal(err) + } + + // Top-level mirrored + if _, ok := got["git"]; !ok { + t.Errorf("top-level git missing from wire envelope: %s", out) + } + if _, ok := got["correlation"]; !ok { + t.Errorf("top-level correlation missing from wire envelope: %s", out) + } + + // Enrichment block present + gotEnr, ok := got["enrichment"].(map[string]interface{}) + if !ok { + t.Fatalf("enrichment missing or wrong type: %s", out) + } + if _, ok := gotEnr["git"]; !ok { + t.Errorf("enrichment.git missing") + } + if _, ok := gotEnr["correlation"]; !ok { + t.Errorf("enrichment.correlation missing") + } + if gotEnr["source"] != "github" { + t.Errorf("enrichment.source = %v", gotEnr["source"]) + } +} + +func TestNew_NilEnrichment(t *testing.T) { + env := New("dev", "test", "test", []byte(`{}`), nil) + if env.Git != nil { + t.Errorf("expected nil Git when enrichment nil, got %+v", env.Git) + } + if env.Correlation != nil { + t.Errorf("expected nil Correlation when enrichment nil") + } +} + +func TestEnvelopeVersion(t *testing.T) { + env := New("dev", "test", "test", []byte(`{}`), nil) + if env.EnvelopeVersion != "1.2" { + t.Errorf("envelope version = %q, want 1.2", env.EnvelopeVersion) + } +}