From b2add82c6669a4e6b569472da7ff86a4df296166 Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Fri, 22 May 2026 15:11:17 -0400 Subject: [PATCH] fix(tracing): Strip runtime context and XML wrappers from observable input messages The `gen_ai.input.messages` attribute on `gen_ai.chat` spans included internal runtime metadata and XML wrapper blocks as part of user message content. The Sentry Conversations view reads this attribute and renders it as the user message. Clean user messages before serializing to `gen_ai.input.messages`: 1. Drop `` content parts (skills, config, capabilities). 2. Unwrap `` to extract the actual user text. 3. Strip ``, ``, `` blocks so prior assistant responses don't bleed into user messages. Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/junior/src/chat/pi/traced-stream.ts | 74 ++++++++++++++++++- .../tests/unit/chat/pi/traced-stream.test.ts | 56 ++++++++++++++ 2 files changed, 129 insertions(+), 1 deletion(-) diff --git a/packages/junior/src/chat/pi/traced-stream.ts b/packages/junior/src/chat/pi/traced-stream.ts index e6ee7328..49295b3c 100644 --- a/packages/junior/src/chat/pi/traced-stream.ts +++ b/packages/junior/src/chat/pi/traced-stream.ts @@ -13,6 +13,75 @@ import { serializeGenAiAttribute, } from "@/chat/logging"; import { GEN_AI_PROVIDER_NAME } from "@/chat/pi/client"; +import { TURN_CONTEXT_TAG } from "@/chat/turn-context-tag"; + +const TURN_CONTEXT_OPEN = `<${TURN_CONTEXT_TAG}>`; +const CURRENT_INSTRUCTION_RE = + /]*>\n?([\s\S]*?)\n?<\/current-instruction>/; +const WRAPPER_BLOCKS_RE = + /<(?:thread-background|session-context|turn-context)>[\s\S]*?<\/(?:thread-background|session-context|turn-context)>\n*/g; + +/** Extract the user's actual instruction from a `buildUserTurnText` output. */ +function extractUserInstruction(text: string): string { + const match = CURRENT_INSTRUCTION_RE.exec(text); + if (match) { + return match[1].trim(); + } + return text.replace(WRAPPER_BLOCKS_RE, "").trim(); +} + +/** + * Clean user messages for the observable `gen_ai.input.messages` attribute: + * 1. Drop `` content parts entirely (volatile runtime metadata). + * 2. Unwrap `` / strip `` etc. from text + * so only the actual user instruction remains. + */ +function cleanUserMessagesForObservability(messages: unknown[]): unknown[] { + return messages.map((msg) => { + const record = msg as Record | null; + if (!record || record.role !== "user") { + return msg; + } + + const content = record.content; + if (!Array.isArray(content)) { + return msg; + } + + let changed = false; + const cleaned = content + .filter((part) => { + if ( + part && + typeof part === "object" && + (part as { type?: unknown }).type === "text" + ) { + const text = (part as { text?: unknown }).text; + if (typeof text === "string" && text.startsWith(TURN_CONTEXT_OPEN)) { + changed = true; + return false; + } + } + return true; + }) + .map((part) => { + if ( + part && + typeof part === "object" && + (part as { type?: unknown }).type === "text" + ) { + const text = (part as { text?: unknown }).text; + if (typeof text === "string" && CURRENT_INSTRUCTION_RE.test(text)) { + changed = true; + return { ...part, text: extractUserInstruction(text) }; + } + } + return part; + }); + + return changed ? { ...record, content: cleaned } : msg; + }); +} // Compose only the OTel GenAI attributes that are knowable at span start // (request-shape + system instructions). End-of-call attributes such as @@ -27,7 +96,10 @@ function buildChatStartAttributes( "gen_ai.request.model": model.id, }; - const inputMessages = serializeGenAiAttribute(context.messages); + const observableMessages = cleanUserMessagesForObservability( + context.messages, + ); + const inputMessages = serializeGenAiAttribute(observableMessages); if (inputMessages) { attributes["gen_ai.input.messages"] = inputMessages; } diff --git a/packages/junior/tests/unit/chat/pi/traced-stream.test.ts b/packages/junior/tests/unit/chat/pi/traced-stream.test.ts index c4636d5f..863743a1 100644 --- a/packages/junior/tests/unit/chat/pi/traced-stream.test.ts +++ b/packages/junior/tests/unit/chat/pi/traced-stream.test.ts @@ -116,6 +116,62 @@ describe("createTracedStreamFn", () => { expect(opts.attributes["gen_ai.request.model"]).toBe("openai/gpt-5.4"); }); + it("strips runtime context and XML wrappers from observable user messages", async () => { + const { createTracedStreamFn } = await import("@/chat/pi/traced-stream"); + const stream = createAssistantMessageEventStream(); + const base = vi.fn(() => stream); + + const userText = [ + "", + "", + ' prior question', + ' prior answer', + "", + "", + "", + "", + "- gen_ai.conversation.id: conv123", + "", + "", + '', + "what is sentry?", + "", + ].join("\n"); + + const traced = createTracedStreamFn(base as unknown as StreamFn); + await traced( + fakeModel("openai/gpt-5.4"), + { + systemPrompt: "you are junior", + messages: [ + { + role: "user", + content: [ + { + type: "text", + text: "\nskills and config...\n", + }, + { type: "text", text: userText }, + ], + timestamp: 0, + }, + ], + }, + undefined, + ); + + const opts = startInactiveSpan.mock.calls[0]?.[0] as unknown as { + attributes: Record; + }; + const inputMessages = opts.attributes["gen_ai.input.messages"] as string; + expect(inputMessages).toContain("what is sentry?"); + expect(inputMessages).not.toContain("runtime-turn-context"); + expect(inputMessages).not.toContain("thread-background"); + expect(inputMessages).not.toContain("session-context"); + expect(inputMessages).not.toContain("current-instruction"); + expect(inputMessages).not.toContain("prior answer"); + }); + it("sets output.messages, usage tokens, finish_reasons, response.model after stream completion", async () => { const { createTracedStreamFn } = await import("@/chat/pi/traced-stream"); const stream = createAssistantMessageEventStream();