Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 18 additions & 15 deletions sentry_sdk/integrations/litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,21 @@
raise DidNotEnable("LiteLLM not installed")


def _get_metadata_dict(kwargs: "Dict[str, Any]") -> "Dict[str, Any]":
"""Get the metadata dictionary from the kwargs."""
litellm_params = kwargs.setdefault("litellm_params", {})
# Stash the span on a top-level key of the per-request kwargs dict litellm passes
# to every callback, so it lives and dies with the request.
_SPAN_KEY = "_sentry_span"

# we need this weird little dance, as metadata might be set but may be None initially
metadata = litellm_params.get("metadata")
if metadata is None:
metadata = {}
litellm_params["metadata"] = metadata
return metadata

def _store_span(kwargs: "Dict[str, Any]", span: "Any") -> None:
kwargs[_SPAN_KEY] = span


def _peek_span(kwargs: "Dict[str, Any]") -> "Any":
return kwargs.get(_SPAN_KEY)


def _pop_span(kwargs: "Dict[str, Any]") -> "Any":
return kwargs.pop(_SPAN_KEY, None)


def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, Any]]":
Expand Down Expand Up @@ -117,8 +122,7 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
)
span.__enter__()

# Store span for later
_get_metadata_dict(kwargs)["_sentry_span"] = span
_store_span(kwargs, span)

# Set basic data
set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, provider)
Expand Down Expand Up @@ -198,8 +202,7 @@ def _success_callback(
) -> None:
"""Handle successful completion."""

metadata = _get_metadata_dict(kwargs)
span = metadata.get("_sentry_span")
span = _peek_span(kwargs)
if span is None:
return

Expand Down Expand Up @@ -259,7 +262,7 @@ def _success_callback(
or "complete_streaming_response" in kwargs
or "async_complete_streaming_response" in kwargs
):
span = metadata.pop("_sentry_span", None)
span = _pop_span(kwargs)
if span is not None:
span.__exit__(None, None, None)

Expand All @@ -285,7 +288,7 @@ def _failure_callback(
end_time: "datetime",
) -> None:
"""Handle request failure."""
span = _get_metadata_dict(kwargs).get("_sentry_span")
span = _pop_span(kwargs)
if span is None:
Comment thread
jgreer013 marked this conversation as resolved.
return

Expand Down
56 changes: 56 additions & 0 deletions tests/integrations/litellm/test_litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from datetime import datetime
from unittest import mock

import httpx
import pytest

import sentry_sdk
Expand Down Expand Up @@ -2532,6 +2533,61 @@ def test_integration_setup(sentry_init):
assert _failure_callback in (litellm.failure_callback or [])


@pytest.mark.asyncio(loop_scope="session")
async def test_anthropic_passthrough_request_stays_serializable(
reset_litellm_executor, sentry_init
):
"""Regression test for GH-6596: litellm's Anthropic ``/v1/messages``
passthrough forwards the caller's ``metadata`` into the request body, so the
integration must not make that body unserializable. Drive the real
passthrough with a mocked transport and assert the request body serializes.
"""
sentry_init(
integrations=[LiteLLMIntegration()],
disabled_integrations=[StdlibIntegration],
traces_sample_rate=1.0,
send_default_pii=True,
)

captured = {}
anthropic_response = {
"id": "msg_1",
"type": "message",
"role": "assistant",
"content": [{"type": "text", "text": "Hi there"}],
"model": "claude-3-5-sonnet-latest",
"stop_reason": "end_turn",
"stop_sequence": None,
"usage": {"input_tokens": 1, "output_tokens": 1},
}

client = AsyncHTTPHandler()

def capture_post(*args, **kwargs):
captured["data"] = kwargs.get("data")
return httpx.Response(
200,
json=anthropic_response,
request=httpx.Request("POST", "https://api.anthropic.com/v1/messages"),
)

with mock.patch.object(client, "post", side_effect=capture_post), start_transaction(
name="litellm test"
):
await litellm.anthropic.messages.acreate(
model="anthropic/claude-3-5-sonnet-latest",
messages=[{"role": "user", "content": "Hello!"}],
max_tokens=16,
metadata={"user_id": "my-org"},
api_key="test-key",
client=client,
)

assert "data" in captured
request_body = json.loads(captured["data"])
assert request_body["metadata"] == {"user_id": "my-org"}


def test_litellm_message_truncation(sentry_init, capture_events):
"""Test that large messages are truncated properly in LiteLLM integration."""
sentry_init(
Expand Down