From b5974d3e2ecf1308c43a1631c93aea2a065f5cb0 Mon Sep 17 00:00:00 2001 From: Joshua Tjhin Date: Wed, 10 Jun 2026 12:09:13 -0700 Subject: [PATCH 1/4] fix: log Anthropic system message first in span input The Anthropic instrumentation merges the top-level `system` param into the logged messages array, but appended it after the conversation messages. The model actually receives the system prompt first, and downstream consumers (e.g. opening a logged span in the playground) expect chat-message order, so a trailing system message had to be manually reordered before reuse. Prepend the system message instead, and update the e2e span-tree snapshots to match. --- .../anthropic-v0273.span-tree.json | 8 ++-- .../anthropic-v0273.span-tree.txt | 8 ++-- .../anthropic-v0390.span-tree.json | 8 ++-- .../anthropic-v0390.span-tree.txt | 8 ++-- .../anthropic-v0712.span-tree.json | 8 ++-- .../anthropic-v0712.span-tree.txt | 8 ++-- .../anthropic-v0730.span-tree.json | 8 ++-- .../anthropic-v0730.span-tree.txt | 8 ++-- .../anthropic-v0780.span-tree.json | 8 ++-- .../anthropic-v0780.span-tree.txt | 8 ++-- .../anthropic-v0800.span-tree.json | 8 ++-- .../anthropic-v0800.span-tree.txt | 8 ++-- .../plugins/anthropic-plugin.test.ts | 47 +++++++++++++++++++ .../plugins/anthropic-plugin.ts | 8 ++-- 14 files changed, 100 insertions(+), 51 deletions(-) diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0273.span-tree.json b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0273.span-tree.json index 91d38bb51..7f6197d6b 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0273.span-tree.json +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0273.span-tree.json @@ -57,10 +57,6 @@ "type": "llm", "children": [], "input": [ - { - "content": "Bonjour mon ami!", - "role": "user" - }, { "content": [ { @@ -77,6 +73,10 @@ } ], "role": "system" + }, + { + "content": "Bonjour mon ami!", + "role": "user" } ], "output": { diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0273.span-tree.txt b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0273.span-tree.txt index 1284b06c6..2c194b682 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0273.span-tree.txt +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0273.span-tree.txt @@ -49,10 +49,6 @@ span_tree: │ └── anthropic.messages.create [llm] │ input: [ │ { - │ "content": "Bonjour mon ami!", - │ "role": "user" - │ }, - │ { │ "content": [ │ { │ "text": "translate to english", @@ -68,6 +64,10 @@ span_tree: │ } │ ], │ "role": "system" + │ }, + │ { + │ "content": "Bonjour mon ami!", + │ "role": "user" │ } │ ] │ output: { diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0390.span-tree.json b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0390.span-tree.json index ea8c75692..480a3aa60 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0390.span-tree.json +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0390.span-tree.json @@ -57,10 +57,6 @@ "type": "llm", "children": [], "input": [ - { - "content": "Bonjour mon ami!", - "role": "user" - }, { "content": [ { @@ -77,6 +73,10 @@ } ], "role": "system" + }, + { + "content": "Bonjour mon ami!", + "role": "user" } ], "output": { diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0390.span-tree.txt b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0390.span-tree.txt index 5294510cd..490a4d0f3 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0390.span-tree.txt +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0390.span-tree.txt @@ -49,10 +49,6 @@ span_tree: │ └── anthropic.messages.create [llm] │ input: [ │ { - │ "content": "Bonjour mon ami!", - │ "role": "user" - │ }, - │ { │ "content": [ │ { │ "text": "translate to english", @@ -68,6 +64,10 @@ span_tree: │ } │ ], │ "role": "system" + │ }, + │ { + │ "content": "Bonjour mon ami!", + │ "role": "user" │ } │ ] │ output: { diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0712.span-tree.json b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0712.span-tree.json index 1eae56496..ba8dcaa03 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0712.span-tree.json +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0712.span-tree.json @@ -57,10 +57,6 @@ "type": "llm", "children": [], "input": [ - { - "content": "Bonjour mon ami!", - "role": "user" - }, { "content": [ { @@ -77,6 +73,10 @@ } ], "role": "system" + }, + { + "content": "Bonjour mon ami!", + "role": "user" } ], "output": { diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0712.span-tree.txt b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0712.span-tree.txt index 448d4bf5f..d8b0c4859 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0712.span-tree.txt +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0712.span-tree.txt @@ -49,10 +49,6 @@ span_tree: │ └── anthropic.messages.create [llm] │ input: [ │ { - │ "content": "Bonjour mon ami!", - │ "role": "user" - │ }, - │ { │ "content": [ │ { │ "text": "translate to english", @@ -68,6 +64,10 @@ span_tree: │ } │ ], │ "role": "system" + │ }, + │ { + │ "content": "Bonjour mon ami!", + │ "role": "user" │ } │ ] │ output: { diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0730.span-tree.json b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0730.span-tree.json index 5f2716071..70b0e16e9 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0730.span-tree.json +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0730.span-tree.json @@ -57,10 +57,6 @@ "type": "llm", "children": [], "input": [ - { - "content": "Bonjour mon ami!", - "role": "user" - }, { "content": [ { @@ -77,6 +73,10 @@ } ], "role": "system" + }, + { + "content": "Bonjour mon ami!", + "role": "user" } ], "output": { diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0730.span-tree.txt b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0730.span-tree.txt index 21afa203d..17fc49f44 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0730.span-tree.txt +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0730.span-tree.txt @@ -49,10 +49,6 @@ span_tree: │ └── anthropic.messages.create [llm] │ input: [ │ { - │ "content": "Bonjour mon ami!", - │ "role": "user" - │ }, - │ { │ "content": [ │ { │ "text": "translate to english", @@ -68,6 +64,10 @@ span_tree: │ } │ ], │ "role": "system" + │ }, + │ { + │ "content": "Bonjour mon ami!", + │ "role": "user" │ } │ ] │ output: { diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0780.span-tree.json b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0780.span-tree.json index 051e9c9ca..c6348050a 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0780.span-tree.json +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0780.span-tree.json @@ -57,10 +57,6 @@ "type": "llm", "children": [], "input": [ - { - "content": "Bonjour mon ami!", - "role": "user" - }, { "content": [ { @@ -77,6 +73,10 @@ } ], "role": "system" + }, + { + "content": "Bonjour mon ami!", + "role": "user" } ], "output": { diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0780.span-tree.txt b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0780.span-tree.txt index 782feeb64..f49bf60e4 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0780.span-tree.txt +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0780.span-tree.txt @@ -49,10 +49,6 @@ span_tree: │ └── anthropic.messages.create [llm] │ input: [ │ { - │ "content": "Bonjour mon ami!", - │ "role": "user" - │ }, - │ { │ "content": [ │ { │ "text": "translate to english", @@ -68,6 +64,10 @@ span_tree: │ } │ ], │ "role": "system" + │ }, + │ { + │ "content": "Bonjour mon ami!", + │ "role": "user" │ } │ ] │ output: { diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0800.span-tree.json b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0800.span-tree.json index 72207ab39..377171e7f 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0800.span-tree.json +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0800.span-tree.json @@ -57,10 +57,6 @@ "type": "llm", "children": [], "input": [ - { - "content": "Bonjour mon ami!", - "role": "user" - }, { "content": [ { @@ -77,6 +73,10 @@ } ], "role": "system" + }, + { + "content": "Bonjour mon ami!", + "role": "user" } ], "output": { diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0800.span-tree.txt b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0800.span-tree.txt index 7b7e87d79..91de1916f 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0800.span-tree.txt +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0800.span-tree.txt @@ -49,10 +49,6 @@ span_tree: │ └── anthropic.messages.create [llm] │ input: [ │ { - │ "content": "Bonjour mon ami!", - │ "role": "user" - │ }, - │ { │ "content": [ │ { │ "text": "translate to english", @@ -68,6 +64,10 @@ span_tree: │ } │ ], │ "role": "system" + │ }, + │ { + │ "content": "Bonjour mon ami!", + │ "role": "user" │ } │ ] │ output: { diff --git a/js/src/instrumentation/plugins/anthropic-plugin.test.ts b/js/src/instrumentation/plugins/anthropic-plugin.test.ts index f50426b59..efe50aedb 100644 --- a/js/src/instrumentation/plugins/anthropic-plugin.test.ts +++ b/js/src/instrumentation/plugins/anthropic-plugin.test.ts @@ -12,6 +12,7 @@ import { parseMetricsFromUsage, aggregateAnthropicStreamChunks, processAttachmentsInInput, + coalesceInput, } from "./anthropic-plugin"; import type { StartEvent } from "../core"; import { Attachment } from "../../logger"; @@ -918,3 +919,49 @@ describe("processAttachmentsInInput", () => { expect(result[0].type).toBe("image"); }); }); + +describe("coalesceInput", () => { + it("should place the system message before the conversation messages", () => { + const messages = [ + { role: "user", content: "Hello" }, + { role: "assistant", content: "Hi there" }, + { role: "user", content: "How are you?" }, + ]; + + const result = coalesceInput(messages, "You are a helpful assistant."); + + expect(result).toEqual([ + { role: "system", content: "You are a helpful assistant." }, + { role: "user", content: "Hello" }, + { role: "assistant", content: "Hi there" }, + { role: "user", content: "How are you?" }, + ]); + }); + + it("should support system content as an array of text blocks", () => { + const messages = [{ role: "user", content: "Bonjour" }]; + const system = [ + { type: "text" as const, text: "translate to english" }, + { type: "text" as const, text: "only the answer no other text" }, + ]; + + const result = coalesceInput(messages, system); + + expect(result[0]).toEqual({ role: "system", content: system }); + expect(result[1]).toEqual({ role: "user", content: "Bonjour" }); + }); + + it("should return messages unchanged when there is no system prompt", () => { + const messages = [{ role: "user", content: "Hello" }]; + + expect(coalesceInput(messages, undefined)).toEqual(messages); + }); + + it("should not mutate the original messages array", () => { + const messages = [{ role: "user", content: "Hello" }]; + + coalesceInput(messages, "system prompt"); + + expect(messages).toEqual([{ role: "user", content: "Hello" }]); + }); +}); diff --git a/js/src/instrumentation/plugins/anthropic-plugin.ts b/js/src/instrumentation/plugins/anthropic-plugin.ts index 132b6a001..e50a500ee 100644 --- a/js/src/instrumentation/plugins/anthropic-plugin.ts +++ b/js/src/instrumentation/plugins/anthropic-plugin.ts @@ -1051,16 +1051,18 @@ export function processAttachmentsInInput(input: unknown): unknown { /** * Convert Anthropic args to the single "input" field Braintrust expects. - * Combines messages array with system message if present. + * Combines messages array with system message if present. The system message + * is placed first to match the order the model sees and the chat-message + * convention used elsewhere (e.g. the playground). */ -function coalesceInput( +export function coalesceInput( messages: AnthropicInputMessage[], system: AnthropicCreateParams["system"], ): AnthropicInputMessage[] { // Make a copy because we're going to mutate it const input = (messages || []).slice(); if (system) { - input.push({ role: "system", content: system }); + input.unshift({ role: "system", content: system }); } return input; } From 9d5406e16c3ccbdcc1c3c6e59084c48e1a07ffa9 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Wed, 17 Jun 2026 18:30:54 +0200 Subject: [PATCH 2/4] cs --- .changeset/fine-bears-punch.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/fine-bears-punch.md diff --git a/.changeset/fine-bears-punch.md b/.changeset/fine-bears-punch.md new file mode 100644 index 000000000..4d090cce0 --- /dev/null +++ b/.changeset/fine-bears-punch.md @@ -0,0 +1,5 @@ +--- +"braintrust": patch +--- + +fix(anthropic): Log Anthropic system message first in span input From 8a56a9730263c24245ae27e2ebacd7b116630f10 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Thu, 18 Jun 2026 18:05:40 +0200 Subject: [PATCH 3/4] less flake pls --- e2e/scenarios/pi-coding-agent-instrumentation/assertions.ts | 4 ---- 1 file changed, 4 deletions(-) diff --git a/e2e/scenarios/pi-coding-agent-instrumentation/assertions.ts b/e2e/scenarios/pi-coding-agent-instrumentation/assertions.ts index c51445e47..e9b8dd304 100644 --- a/e2e/scenarios/pi-coding-agent-instrumentation/assertions.ts +++ b/e2e/scenarios/pi-coding-agent-instrumentation/assertions.ts @@ -170,9 +170,6 @@ export function definePiCodingAgentInstrumentationAssertions(options: { "pi-coding-agent-prompt-operation", ); const task = findPiTask(events); - const anthropicSpans = events.filter( - (event) => event.span.name === "anthropic.messages.create", - ); const llmSpans = findChildSpans( events, "anthropic.messages.create", @@ -195,7 +192,6 @@ export function definePiCodingAgentInstrumentationAssertions(options: { }); expect(llm).toBeDefined(); - expect(anthropicSpans).toHaveLength(2); expect(llmSpans).toHaveLength(2); expect(llm?.span.type).toBe("llm"); expect(llm?.row.metadata).toMatchObject({ From d6ca7c35f2a614ff743350ceb8438d175ac56221 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Fri, 19 Jun 2026 13:06:28 +0200 Subject: [PATCH 4/4] don't depend on api key in ci --- .../otel-js/smoke/scenarios/otel-v1/tests/basic.test.ts | 4 +++- .../otel-js/smoke/scenarios/otel-v1/tests/filtering.test.ts | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/integrations/otel-js/smoke/scenarios/otel-v1/tests/basic.test.ts b/integrations/otel-js/smoke/scenarios/otel-v1/tests/basic.test.ts index 8ae35e369..1cdb05f89 100644 --- a/integrations/otel-js/smoke/scenarios/otel-v1/tests/basic.test.ts +++ b/integrations/otel-js/smoke/scenarios/otel-v1/tests/basic.test.ts @@ -21,7 +21,9 @@ async function main() { // Use real OpenTelemetry SDK with BraintrustSpanProcessor const sdk = new NodeSDK({ serviceName: "test-service", - spanProcessor: new BraintrustSpanProcessor() as unknown as SpanProcessor, + spanProcessor: new BraintrustSpanProcessor({ + apiKey: "smoke-test-api-key", + }) as unknown as SpanProcessor, }); await sdk.start(); diff --git a/integrations/otel-js/smoke/scenarios/otel-v1/tests/filtering.test.ts b/integrations/otel-js/smoke/scenarios/otel-v1/tests/filtering.test.ts index 0a7cf10a9..e460a81a7 100644 --- a/integrations/otel-js/smoke/scenarios/otel-v1/tests/filtering.test.ts +++ b/integrations/otel-js/smoke/scenarios/otel-v1/tests/filtering.test.ts @@ -39,6 +39,7 @@ async function main() { const sdk = new NodeSDK({ serviceName: "filtering-test", spanProcessor: new BraintrustSpanProcessor({ + apiKey: "smoke-test-api-key", filterAISpans: true, }) as unknown as SpanProcessor, });