diff --git a/.changeset/beige-ducks-sneeze.md b/.changeset/beige-ducks-sneeze.md new file mode 100644 index 000000000..682615056 --- /dev/null +++ b/.changeset/beige-ducks-sneeze.md @@ -0,0 +1,5 @@ +--- +"braintrust": patch +--- + +Fix: dataset-backed eval origin for copied dataset rows diff --git a/js/src/framework.test.ts b/js/src/framework.test.ts index 87135dd28..49eb1b091 100644 --- a/js/src/framework.test.ts +++ b/js/src/framework.test.ts @@ -166,6 +166,37 @@ function makeTestScorer( } describe("runEvaluator", () => { + function makeDatasetData( + datasetId: string, + datum: { + input: number; + id: string; + _xact_id?: string; + created?: string; + origin?: { + object_type: + | "project_logs" + | "experiment" + | "dataset" + | "prompt" + | "function" + | "prompt_session"; + object_id: string; + id: string; + _xact_id?: string | null; + created?: string | null; + }; + }, + ) { + return { + __braintrust_dataset_marker: true, + id: Promise.resolve(datasetId), + async *[Symbol.asyncIterator]() { + yield datum; + }, + }; + } + test("preserves a valid inline origin", async () => { const origin: { object_type: "dataset"; @@ -198,7 +229,124 @@ describe("runEvaluator", () => { expect(out.results[0].origin).toEqual(origin); }); - test("rejects an invalid inline origin", async () => { + test("prefers dataset row origin over source origin for dataset-backed evals", async () => { + const datasetId = "00000000-0000-0000-0000-000000000001"; + const data = makeDatasetData(datasetId, { + input: 1, + id: "dataset-row-1", + _xact_id: "dataset-xact", + created: "2026-06-02T00:00:00.000Z", + origin: { + object_type: "project_logs", + object_id: "00000000-0000-0000-0000-000000000002", + id: "source-log-row-1", + _xact_id: "source-xact", + created: "2026-06-01T00:00:00.000Z", + }, + }); + + const out = await runEvaluator( + null, + { + projectName: "proj", + evalName: "eval", + data, + task: async (input: number) => input * 2, + scores: [], + }, + new NoopProgressReporter(), + [], + undefined, + ); + + expect(out.results[0].origin).toEqual({ + object_type: "dataset", + object_id: datasetId, + id: "dataset-row-1", + _xact_id: "dataset-xact", + created: "2026-06-02T00:00:00.000Z", + }); + }); + + test("falls back to source origin when dataset row origin is incomplete", async () => { + const datasetId = "00000000-0000-0000-0000-000000000001"; + const sourceOrigin = { + object_type: "project_logs" as const, + object_id: "00000000-0000-0000-0000-000000000002", + id: "source-log-row-1", + _xact_id: "source-xact", + created: "2026-06-01T00:00:00.000Z", + }; + const data = makeDatasetData(datasetId, { + input: 1, + id: "dataset-row-1", + created: "2026-06-02T00:00:00.000Z", + origin: sourceOrigin, + }); + + const out = await runEvaluator( + null, + { + projectName: "proj", + evalName: "eval", + data, + task: async (input: number) => input * 2, + scores: [], + }, + new NoopProgressReporter(), + [], + undefined, + ); + + expect(out.results[0].origin).toEqual(sourceOrigin); + }); + + test("reports progress with dataset row origin for dataset-backed evals", async () => { + const datasetId = "00000000-0000-0000-0000-000000000001"; + const data = makeDatasetData(datasetId, { + input: 1, + id: "dataset-row-1", + _xact_id: "dataset-xact", + created: "2026-06-02T00:00:00.000Z", + origin: { + object_type: "project_logs", + object_id: "00000000-0000-0000-0000-000000000002", + id: "source-log-row-1", + }, + }); + const streamEvents: unknown[] = []; + + await runEvaluator( + null, + { + projectName: "proj", + evalName: "eval", + data, + task: async (input: number, hooks) => { + hooks.reportProgress({ object_type: "progress", progress: 0.5 }); + return input * 2; + }, + scores: [], + }, + new NoopProgressReporter(), + [], + (event) => streamEvents.push(event), + ); + + expect(streamEvents).toEqual([ + expect.objectContaining({ + origin: { + object_type: "dataset", + object_id: datasetId, + id: "dataset-row-1", + _xact_id: "dataset-xact", + created: "2026-06-02T00:00:00.000Z", + }, + }), + ]); + }); + + test("ignores an invalid inline origin", async () => { const origin: { object_type: "dataset"; object_id: string; @@ -210,22 +358,22 @@ describe("runEvaluator", () => { }; const task = vi.fn(async (input: number) => input * 2); - await expect( - runEvaluator( - null, - { - projectName: "proj", - evalName: "eval", - data: [{ input: 1, origin }], - task, - scores: [], - }, - new NoopProgressReporter(), - [], - undefined, - ), - ).rejects.toThrow(); - expect(task).not.toHaveBeenCalled(); + const out = await runEvaluator( + null, + { + projectName: "proj", + evalName: "eval", + data: [{ input: 1, origin }], + task, + scores: [], + }, + new NoopProgressReporter(), + [], + undefined, + ); + + expect(task).toHaveBeenCalled(); + expect(out.results[0].origin).toBeUndefined(); }); describe("errors", () => { diff --git a/js/src/framework.ts b/js/src/framework.ts index 4f0ec0c89..3a86339c1 100644 --- a/js/src/framework.ts +++ b/js/src/framework.ts @@ -1180,13 +1180,8 @@ async function runEvaluatorInternal( : Dataset.isDataset(evaluator.data) ? evaluator.data : undefined; - const inlineOrigin = - datum.origin === undefined - ? undefined - : ObjectReferenceSchema.parse(datum.origin); - const origin = - inlineOrigin ?? - (eventDataset && datum.id && datum._xact_id + const inlineDatasetOrigin: ObjectReference | undefined = + eventDataset && datum.id && datum._xact_id ? { object_type: "dataset", object_id: await eventDataset.id, @@ -1194,7 +1189,11 @@ async function runEvaluatorInternal( created: datum.created, _xact_id: datum._xact_id, } - : undefined); + : undefined; + const parsedDatumOrigin = ObjectReferenceSchema.safeParse(datum.origin); + const origin = + inlineDatasetOrigin ?? + (parsedDatumOrigin?.success ? parsedDatumOrigin.data : undefined); const baseEvent: StartSpanArgs = { name: "eval",