From 7da7460d86a6ca613145c21e96d5eb766a7ba376 Mon Sep 17 00:00:00 2001
From: Kenny Wong <wong@coda.io>
Date: Tue, 2 Jun 2026 09:00:23 -0400
Subject: [PATCH] [OAI] Support braintrust >=0.13 wrapping (fix Python CI)

braintrust 0.13 removed the wrapper classes and changed wrap_openai to patch
resource methods in place (wrapt FunctionWrapper) instead of returning a
NamedWrapper proxy. The isinstance(client, NamedWrapper) check no longer
detected wrapping (is_wrapped wrongly False, so scorer spans lost their
purpose), and test_oai.py imported now-removed classes, aborting collection.

Detect wrapping in a version-agnostic way: NamedWrapper for <0.13, wrapt
wrapper type on create() for >=0.13. Note >=0.13 only instruments the v1 SDK,
so v0 clients are no longer traced; tests updated to match.
---
 py/autoevals/oai.py      | 42 ++++++++++++++++++++++++++++++++++++++--
 py/autoevals/test_oai.py | 29 +++++++++++++--------------
 2 files changed, 55 insertions(+), 16 deletions(-)

diff --git a/py/autoevals/oai.py b/py/autoevals/oai.py
index fa21554b..c74942bc 100644
--- a/py/autoevals/oai.py
+++ b/py/autoevals/oai.py
@@ -9,6 +9,15 @@
 from dataclasses import dataclass
 from typing import Any, Optional, Protocol, TypedDict, TypeVar, Union, cast, runtime_checkable
 
+try:
+    # Braintrust (>= 0.13) patches OpenAI resource methods with wrapt wrappers.
+    # wrapt is a Braintrust dependency, so it's present whenever wrapping can occur.
+    from wrapt import BoundFunctionWrapper, FunctionWrapper
+
+    _WRAPT_WRAPPER_TYPES: tuple[type, ...] = (FunctionWrapper, BoundFunctionWrapper)
+except ImportError:
+    _WRAPT_WRAPPER_TYPES = ()
+
 PROXY_URL = "https://api.braintrust.dev/v1/proxy"
 
 
@@ -126,6 +135,33 @@ def is_gpt5_model(model: str) -> bool:
     return model.startswith("gpt-5")
 
 
+def openai_client_is_wrapped(client: Any, named_wrapper: type) -> bool:
+    """Detect whether an OpenAI client has been instrumented by Braintrust.
+
+    Works across Braintrust versions:
+    - < 0.13 wrapped the whole client in a ``NamedWrapper`` proxy.
+    - >= 0.13 patches resource methods in place, replacing ``create`` with a
+      ``wrapt`` function wrapper while leaving the client object unchanged.
+      Note: >= 0.13 only instruments the v1 SDK, so v0 clients are no longer
+      traced. (``create`` exposes ``__wrapped__`` even when unwrapped, so we
+      check the wrapper type rather than that attribute.)
+    """
+    if isinstance(client, named_wrapper):
+        return True
+    if not _WRAPT_WRAPPER_TYPES:
+        return False
+    for path in (("chat", "completions", "create"), ("responses", "create")):
+        obj = client
+        for attr in path:
+            obj = getattr(obj, attr, None)
+            if obj is None:
+                break
+        else:
+            if isinstance(obj, _WRAPT_WRAPPER_TYPES):
+                return True
+    return False
+
+
 @dataclass
 class LLMClient:
     """A client wrapper for LLM operations that supports both OpenAI SDK v0 and v1.
@@ -192,10 +228,12 @@ def __post_init__(self):
         has_customization = self.complete is not None or self.embed is not None or self.moderation is not None  # type: ignore  # Pyright doesn't understand our design choice
 
         # avoid wrapping if we have custom methods (the user may intend not to wrap)
-        if not has_customization and not isinstance(self.openai, NamedWrapper):
+        # wrap_openai is idempotent (braintrust >= 0.13) / returns a NamedWrapper proxy
+        # (< 0.13), so it's safe to call whenever the client isn't already wrapped.
+        if not has_customization and not openai_client_is_wrapped(self.openai, NamedWrapper):
             self.openai = wrap_openai(self.openai)
 
-        self._is_wrapped = isinstance(self.openai, NamedWrapper)
+        self._is_wrapped = openai_client_is_wrapped(self.openai, NamedWrapper)
 
         openai_module = get_openai_module()
 
diff --git a/py/autoevals/test_oai.py b/py/autoevals/test_oai.py
index 414c3c78..01129145 100644
--- a/py/autoevals/test_oai.py
+++ b/py/autoevals/test_oai.py
@@ -4,11 +4,7 @@
 import openai
 import pytest
 from braintrust.oai import (
-    ChatCompletionV0Wrapper,
-    CompletionsV1Wrapper,
     NamedWrapper,
-    OpenAIV0Wrapper,
-    OpenAIV1Wrapper,
     wrap_openai,
 )
 from openai.resources.chat.completions import AsyncCompletions
@@ -27,7 +23,9 @@
 
 
 def unwrap_named_wrapper(obj: NamedWrapper | OpenAIV1Module.OpenAI | OpenAIV0Module) -> Any:
-    return getattr(obj, "_NamedWrapper__wrapped")
+    # braintrust < 0.13 wrapped clients in a NamedWrapper proxy; >= 0.13 patches the
+    # client in place, so there's nothing to unwrap and we return it as-is.
+    return getattr(obj, "_NamedWrapper__wrapped", obj)
 
 
 @pytest.fixture(autouse=True)
@@ -83,8 +81,8 @@ def test_init_creates_async_llmclient_if_needed(mock_openai_v0: OpenAIV0Module):
     prepared_client = prepare_openai()
 
     assert isinstance(prepared_client, LLMClient)
-    assert prepared_client.is_wrapped
-    assert isinstance(prepared_client.openai, OpenAIV0Wrapper)
+    # braintrust >= 0.13 only instruments the v1 SDK, so v0 clients are not wrapped.
+    assert not prepared_client.is_wrapped
     assert prepared_client.complete.__name__ == "acreate"
 
 
@@ -106,7 +104,8 @@ def test_prepare_openai_with_plain_openai():
     prepared_client = prepare_openai(client=client)
 
     assert prepared_client.is_wrapped
-    assert isinstance(prepared_client.openai, OpenAIV1Wrapper)
+    # braintrust >= 0.13 patches the client in place rather than returning a proxy.
+    assert prepared_client.openai is client
 
 
 def test_prepare_openai_async():
@@ -114,7 +113,7 @@ def test_prepare_openai_async():
 
     assert isinstance(prepared_client, LLMClient)
     assert prepared_client.is_wrapped
-    assert isinstance(prepared_client.openai, OpenAIV1Wrapper)
+    assert isinstance(prepared_client.openai, openai.AsyncOpenAI)
 
     assert callable(prepared_client.complete)
     assert prepared_client.complete.__name__ == "complete_wrapper"
@@ -228,16 +227,17 @@ class RateLimitError(Exception):
 def test_prepare_openai_v0_sdk(mock_openai_v0: OpenAIV0Module):
     prepared_client = prepare_openai()
 
-    assert prepared_client.is_wrapped
+    # braintrust >= 0.13 only instruments the v1 SDK, so v0 clients are not wrapped.
+    assert not prepared_client.is_wrapped
     assert prepared_client.openai.api_key == "test-key"
-
-    assert isinstance(getattr(prepared_client.complete, "__self__", None), ChatCompletionV0Wrapper)
+    assert prepared_client.complete.__name__ == "create"
 
 
 def test_prepare_openai_v0_async(mock_openai_v0: OpenAIV0Module):
     prepared_client = prepare_openai(is_async=True)
 
-    assert prepared_client.is_wrapped
+    # braintrust >= 0.13 only instruments the v1 SDK, so v0 clients are not wrapped.
+    assert not prepared_client.is_wrapped
     assert prepared_client.openai.api_key == "test-key"
 
     assert prepared_client.complete.__name__ == "acreate"
@@ -248,7 +248,8 @@ def test_prepare_openai_v0_with_client(mock_openai_v0: OpenAIV0Module):
 
     prepared_client = prepare_openai(client=client)
 
-    assert prepared_client.is_wrapped
+    # braintrust >= 0.13 only instruments the v1 SDK, so v0 clients are not wrapped.
+    assert not prepared_client.is_wrapped
     assert prepared_client.openai.api_key is mock_openai_v0.api_key  # must be set by the user
     assert prepared_client.complete.__name__ == "acreate"