From 49d1fcca18326a12008b75936dbf0dd601d84639 Mon Sep 17 00:00:00 2001 From: Andrew Kent Date: Mon, 8 Jun 2026 13:44:09 -0600 Subject: [PATCH] squash BrainstoreTrace LLM convo bug brainstore trace llm convo required a root span for DFS traversal. This didn't work inside of trace scorers because the root span of an eval is not reported until after scoring completes. This change updates the logic to run convo dfs over forest roots in addition to the real root --- .../dev/braintrust/trace/BrainstoreTrace.java | 31 ++++++++--- .../braintrust/trace/BrainstoreTraceTest.java | 54 +++++++++++++++++++ 2 files changed, 77 insertions(+), 8 deletions(-) diff --git a/braintrust-sdk/src/main/java/dev/braintrust/trace/BrainstoreTrace.java b/braintrust-sdk/src/main/java/dev/braintrust/trace/BrainstoreTrace.java index 361980f2..efc8e5c4 100644 --- a/braintrust-sdk/src/main/java/dev/braintrust/trace/BrainstoreTrace.java +++ b/braintrust-sdk/src/main/java/dev/braintrust/trace/BrainstoreTrace.java @@ -146,9 +146,14 @@ public List> getSpans(@Nonnull String spanType) { public List> getLLMConversationThread() { var allSpans = getSpans(); - // Build children map: parent_id → children sorted by start_time + // Build children map: parent_id → children sorted by start_time, and collect the set of + // span IDs present in the fetched results. var children = new java.util.LinkedHashMap>>(); + var presentSpanIds = new java.util.HashSet(); for (var span : allSpans) { + if (span.get("span_id") instanceof String sid) { + presentSpanIds.add(sid); + } var parents = span.get("span_parents"); if (parents instanceof List parentList && !parentList.isEmpty()) { if (parentList.get(0) instanceof String pid) { @@ -163,24 +168,34 @@ public List> getLLMConversationThread() { (a, b) -> Double.compare(getStartTime(a), getStartTime(b)))); - // Find root span (no parents) - var root = + // Find forest roots: spans with no parents, or whose parent is absent from the fetched + // set (their true ancestor hasn't been ingested yet). Sort by start time so the DFS + // visits orphan subtrees in chronological order. + var forestRoots = allSpans.stream() .filter( s -> { var p = s.get("span_parents"); - return p == null || (p instanceof List l && l.isEmpty()); + if (p == null || (p instanceof List l && l.isEmpty())) { + return true; + } + return p instanceof List l + && l.get(0) instanceof String pid + && !presentSpanIds.contains(pid); }) - .findFirst() - .orElse(null); - if (root == null) return List.of(); + .sorted((a, b) -> Double.compare(getStartTime(a), getStartTime(b))) + .toList(); + if (forestRoots.isEmpty()) return List.of(); // Pre-order DFS to get all LLM spans in hierarchy order. // Prune entire subtrees rooted at scorer spans (purpose == "scorer") — these are // synthetic spans injected by the Braintrust backend and not part of the real trace. var llmSpansInOrder = new ArrayList>(); var stack = new java.util.ArrayDeque>(); - stack.push(root); + // Push forest roots in reverse start order so the earliest is processed first. + for (int i = forestRoots.size() - 1; i >= 0; i--) { + stack.push(forestRoots.get(i)); + } while (!stack.isEmpty()) { var span = stack.pop(); if ("automation".equals(getSpanType(span))) { diff --git a/braintrust-sdk/src/test/java/dev/braintrust/trace/BrainstoreTraceTest.java b/braintrust-sdk/src/test/java/dev/braintrust/trace/BrainstoreTraceTest.java index 2e86f852..a9ea8dce 100644 --- a/braintrust-sdk/src/test/java/dev/braintrust/trace/BrainstoreTraceTest.java +++ b/braintrust-sdk/src/test/java/dev/braintrust/trace/BrainstoreTraceTest.java @@ -370,6 +370,60 @@ void getLLMConversationThreadPrunesAutomationSubtrees() { assertEquals(choice, thread.get(1)); } + @Test + void getLLMConversationThreadWorksWithoutRootSpan() { + // Trace scorers run mid-flight: spans close bottom-up, so the parent/root span may not + // yet be ingested. Here the task span's parent ("root") is absent from the fetched set, + // making the task span a forest root. The thread must still be reconstructed. + var sysMsg = Map.of("role", "system", "content", "be helpful"); + var userMsg = Map.of("role", "user", "content", "strawberry"); + var assistantMsg = Map.of("role", "assistant", "content", "fruit"); + var choice = + Map.of( + "finish_reason", "stop", "index", 0, "message", assistantMsg); + + // task span points at a "root" parent that was NOT fetched (root not ended/ingested yet) + var task = taskSpan("task", "root", 1.0); + var llm = llmSpan("llm1", "task", 1.1, List.of(sysMsg, userMsg), List.of(choice)); + + var trace = traceWithSpans(List.of(task, llm)); // no root span present + var thread = trace.getLLMConversationThread(); + + assertEquals(3, thread.size(), "thread must be reconstructed even without the root span"); + assertEquals("system", thread.get(0).get("role")); + assertEquals("user", thread.get(1).get("role")); + assertEquals(assistantMsg, thread.get(2).get("message")); + } + + @Test + void getLLMConversationThreadHandlesMultipleOrphanSubtrees() { + // Multiple orphan subtrees whose common ancestor ("root") is absent. Each subtree is a + // forest root; they must be visited in start-time order and their messages concatenated. + var user1 = Map.of("role", "user", "content", "Q1"); + var asst1 = Map.of("role", "assistant", "content", "A1"); + var choice1 = Map.of("finish_reason", "stop", "message", asst1); + + var user2 = Map.of("role", "user", "content", "Q2"); + var asst2 = Map.of("role", "assistant", "content", "A2"); + var choice2 = Map.of("finish_reason", "stop", "message", asst2); + + // Two task subtrees both parented at a missing "root". Provide out of order; turn2 starts + // later than turn1, so turn1's messages must come first. + var turn2 = taskSpan("turn2", "root", 2.0); + var llm2 = llmSpan("llm2", "turn2", 2.1, List.of(user2), List.of(choice2)); + var turn1 = taskSpan("turn1", "root", 1.0); + var llm1 = llmSpan("llm1", "turn1", 1.1, List.of(user1), List.of(choice1)); + + var trace = traceWithSpans(List.of(turn2, llm2, turn1, llm1)); // root absent, out of order + var thread = trace.getLLMConversationThread(); + + assertEquals(4, thread.size()); + assertEquals("Q1", thread.get(0).get("content")); + assertEquals(asst1, thread.get(1).get("message")); + assertEquals("Q2", thread.get(2).get("content")); + assertEquals(asst2, thread.get(3).get("message")); + } + // ------------------------------------------------------------------------- // fetchWithRetry — retry logic (via package-private constructor with custom supplier) // -------------------------------------------------------------------------