From a71932761aae7db3a4638f7896be83ca73e76dd3 Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Thu, 18 Dec 2025 18:44:44 +0000
Subject: [PATCH 1/7] fix: extract model from response for OpenAI stored
 prompts

When using OpenAI stored prompts, the model is defined in the OpenAI
dashboard rather than passed in the API request. This change adds a
fallback to extract the model from the response object when not
provided in kwargs.

Fixes PostHog/posthog#42861

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 posthog/ai/openai/openai.py       | 29 +++++++++++++++++++++++++++--
 posthog/ai/openai/openai_async.py | 29 +++++++++++++++++++++++++++--
 posthog/ai/utils.py               |  4 ++--
 3 files changed, 56 insertions(+), 6 deletions(-)

diff --git a/posthog/ai/openai/openai.py b/posthog/ai/openai/openai.py
index 11b3fe92..09984745 100644
--- a/posthog/ai/openai/openai.py
+++ b/posthog/ai/openai/openai.py
@@ -124,14 +124,23 @@ def _create_streaming(
         start_time = time.time()
         usage_stats: TokenUsage = TokenUsage()
         final_content = []
+        model_from_response: Optional[str] = None
         response = self._original.create(**kwargs)
 
         def generator():
             nonlocal usage_stats
             nonlocal final_content  # noqa: F824
+            nonlocal model_from_response
 
             try:
                 for chunk in response:
+                    # Extract model from response object in chunk (for stored prompts)
+                    if hasattr(chunk, "response") and chunk.response:
+                        if model_from_response is None and hasattr(
+                            chunk.response, "model"
+                        ):
+                            model_from_response = chunk.response.model
+
                     # Extract usage stats from chunk
                     chunk_usage = extract_openai_usage_from_chunk(chunk, "responses")
 
@@ -161,6 +170,7 @@ def generator():
                     latency,
                     output,
                     None,  # Responses API doesn't have tools
+                    model_from_response,
                 )
 
         return generator()
@@ -177,6 +187,7 @@ def _capture_streaming_event(
         latency: float,
         output: Any,
         available_tool_calls: Optional[List[Dict[str, Any]]] = None,
+        model_from_response: Optional[str] = None,
     ):
         from posthog.ai.types import StreamingEventData
         from posthog.ai.openai.openai_converter import (
@@ -189,9 +200,12 @@ def _capture_streaming_event(
         formatted_input = format_openai_streaming_input(kwargs, "responses")
         sanitized_input = sanitize_openai_response(formatted_input)
 
+        # Use model from kwargs, fallback to model from response
+        model = kwargs.get("model") or model_from_response or "unknown"
+
         event_data = StreamingEventData(
             provider="openai",
-            model=kwargs.get("model", "unknown"),
+            model=model,
             base_url=str(self._client.base_url),
             kwargs=kwargs,
             formatted_input=sanitized_input,
@@ -320,6 +334,7 @@ def _create_streaming(
         usage_stats: TokenUsage = TokenUsage()
         accumulated_content = []
         accumulated_tool_calls: Dict[int, Dict[str, Any]] = {}
+        model_from_response: Optional[str] = None
         if "stream_options" not in kwargs:
             kwargs["stream_options"] = {}
         kwargs["stream_options"]["include_usage"] = True
@@ -329,9 +344,14 @@ def generator():
             nonlocal usage_stats
             nonlocal accumulated_content  # noqa: F824
             nonlocal accumulated_tool_calls
+            nonlocal model_from_response
 
             try:
                 for chunk in response:
+                    # Extract model from chunk (Chat Completions chunks have model field)
+                    if model_from_response is None and hasattr(chunk, "model"):
+                        model_from_response = chunk.model
+
                     # Extract usage stats from chunk
                     chunk_usage = extract_openai_usage_from_chunk(chunk, "chat")
 
@@ -376,6 +396,7 @@ def generator():
                     accumulated_content,
                     tool_calls_list,
                     extract_available_tool_calls("openai", kwargs),
+                    model_from_response,
                 )
 
         return generator()
@@ -393,6 +414,7 @@ def _capture_streaming_event(
         output: Any,
         tool_calls: Optional[List[Dict[str, Any]]] = None,
         available_tool_calls: Optional[List[Dict[str, Any]]] = None,
+        model_from_response: Optional[str] = None,
     ):
         from posthog.ai.types import StreamingEventData
         from posthog.ai.openai.openai_converter import (
@@ -405,9 +427,12 @@ def _capture_streaming_event(
         formatted_input = format_openai_streaming_input(kwargs, "chat")
         sanitized_input = sanitize_openai(formatted_input)
 
+        # Use model from kwargs, fallback to model from response
+        model = kwargs.get("model") or model_from_response or "unknown"
+
         event_data = StreamingEventData(
             provider="openai",
-            model=kwargs.get("model", "unknown"),
+            model=model,
             base_url=str(self._client.base_url),
             kwargs=kwargs,
             formatted_input=sanitized_input,
diff --git a/posthog/ai/openai/openai_async.py b/posthog/ai/openai/openai_async.py
index 404895fc..755822ef 100644
--- a/posthog/ai/openai/openai_async.py
+++ b/posthog/ai/openai/openai_async.py
@@ -128,14 +128,23 @@ async def _create_streaming(
         start_time = time.time()
         usage_stats: TokenUsage = TokenUsage()
         final_content = []
+        model_from_response: Optional[str] = None
         response = await self._original.create(**kwargs)
 
         async def async_generator():
             nonlocal usage_stats
             nonlocal final_content  # noqa: F824
+            nonlocal model_from_response
 
             try:
                 async for chunk in response:
+                    # Extract model from response object in chunk (for stored prompts)
+                    if hasattr(chunk, "response") and chunk.response:
+                        if model_from_response is None and hasattr(
+                            chunk.response, "model"
+                        ):
+                            model_from_response = chunk.response.model
+
                     # Extract usage stats from chunk
                     chunk_usage = extract_openai_usage_from_chunk(chunk, "responses")
 
@@ -166,6 +175,7 @@ async def async_generator():
                     latency,
                     output,
                     extract_available_tool_calls("openai", kwargs),
+                    model_from_response,
                 )
 
         return async_generator()
@@ -182,13 +192,17 @@ async def _capture_streaming_event(
         latency: float,
         output: Any,
         available_tool_calls: Optional[List[Dict[str, Any]]] = None,
+        model_from_response: Optional[str] = None,
     ):
         if posthog_trace_id is None:
             posthog_trace_id = str(uuid.uuid4())
 
+        # Use model from kwargs, fallback to model from response
+        model = kwargs.get("model") or model_from_response
+
         event_properties = {
             "$ai_provider": "openai",
-            "$ai_model": kwargs.get("model"),
+            "$ai_model": model,
             "$ai_model_parameters": get_model_params(kwargs),
             "$ai_input": with_privacy_mode(
                 self._client._ph_client,
@@ -350,6 +364,7 @@ async def _create_streaming(
         usage_stats: TokenUsage = TokenUsage()
         accumulated_content = []
         accumulated_tool_calls: Dict[int, Dict[str, Any]] = {}
+        model_from_response: Optional[str] = None
 
         if "stream_options" not in kwargs:
             kwargs["stream_options"] = {}
@@ -360,9 +375,14 @@ async def async_generator():
             nonlocal usage_stats
             nonlocal accumulated_content  # noqa: F824
             nonlocal accumulated_tool_calls
+            nonlocal model_from_response
 
             try:
                 async for chunk in response:
+                    # Extract model from chunk (Chat Completions chunks have model field)
+                    if model_from_response is None and hasattr(chunk, "model"):
+                        model_from_response = chunk.model
+
                     # Extract usage stats from chunk
                     chunk_usage = extract_openai_usage_from_chunk(chunk, "chat")
                     if chunk_usage:
@@ -405,6 +425,7 @@ async def async_generator():
                     accumulated_content,
                     tool_calls_list,
                     extract_available_tool_calls("openai", kwargs),
+                    model_from_response,
                 )
 
         return async_generator()
@@ -422,13 +443,17 @@ async def _capture_streaming_event(
         output: Any,
         tool_calls: Optional[List[Dict[str, Any]]] = None,
         available_tool_calls: Optional[List[Dict[str, Any]]] = None,
+        model_from_response: Optional[str] = None,
     ):
         if posthog_trace_id is None:
             posthog_trace_id = str(uuid.uuid4())
 
+        # Use model from kwargs, fallback to model from response
+        model = kwargs.get("model") or model_from_response
+
         event_properties = {
             "$ai_provider": "openai",
-            "$ai_model": kwargs.get("model"),
+            "$ai_model": model,
             "$ai_model_parameters": get_model_params(kwargs),
             "$ai_input": with_privacy_mode(
                 self._client._ph_client,
diff --git a/posthog/ai/utils.py b/posthog/ai/utils.py
index 559860cc..5f8a4b14 100644
--- a/posthog/ai/utils.py
+++ b/posthog/ai/utils.py
@@ -285,7 +285,7 @@ def call_llm_and_track_usage(
 
         event_properties = {
             "$ai_provider": provider,
-            "$ai_model": kwargs.get("model"),
+            "$ai_model": kwargs.get("model") or getattr(response, "model", None),
             "$ai_model_parameters": get_model_params(kwargs),
             "$ai_input": with_privacy_mode(
                 ph_client, posthog_privacy_mode, sanitized_messages
@@ -396,7 +396,7 @@ async def call_llm_and_track_usage_async(
 
         event_properties = {
             "$ai_provider": provider,
-            "$ai_model": kwargs.get("model"),
+            "$ai_model": kwargs.get("model") or getattr(response, "model", None),
             "$ai_model_parameters": get_model_params(kwargs),
             "$ai_input": with_privacy_mode(
                 ph_client, posthog_privacy_mode, sanitized_messages

From 809606f89cbf8649577cf96f6b83a189e680bfbd Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Fri, 19 Dec 2025 16:05:03 +0000
Subject: [PATCH 2/7] Apply suggestion from @greptile-apps[bot]

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
---
 posthog/ai/openai/openai_async.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/posthog/ai/openai/openai_async.py b/posthog/ai/openai/openai_async.py
index 755822ef..8327a33d 100644
--- a/posthog/ai/openai/openai_async.py
+++ b/posthog/ai/openai/openai_async.py
@@ -198,7 +198,7 @@ async def _capture_streaming_event(
             posthog_trace_id = str(uuid.uuid4())
 
         # Use model from kwargs, fallback to model from response
-        model = kwargs.get("model") or model_from_response
+        model = kwargs.get("model") or model_from_response or "unknown"
 
         event_properties = {
             "$ai_provider": "openai",

From c4f8230e0a93c64bb4f49ca462ddbb9d44edd082 Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Fri, 19 Dec 2025 16:05:27 +0000
Subject: [PATCH 3/7] Apply suggestion from @greptile-apps[bot]

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
---
 posthog/ai/openai/openai_async.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/posthog/ai/openai/openai_async.py b/posthog/ai/openai/openai_async.py
index 8327a33d..77c9b260 100644
--- a/posthog/ai/openai/openai_async.py
+++ b/posthog/ai/openai/openai_async.py
@@ -449,7 +449,7 @@ async def _capture_streaming_event(
             posthog_trace_id = str(uuid.uuid4())
 
         # Use model from kwargs, fallback to model from response
-        model = kwargs.get("model") or model_from_response
+        model = kwargs.get("model") or model_from_response or "unknown"
 
         event_properties = {
             "$ai_provider": "openai",

From abda4580dbc2ce9de517c75afbcd7961ba0db514 Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Fri, 19 Dec 2025 16:16:18 +0000
Subject: [PATCH 4/7] test: add tests for model extraction fallback and bump to
 7.4.1
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add 8 tests covering model extraction from response for stored prompts
- Fix utils.py to add 'unknown' fallback for consistency
- Bump version to 7.4.1
- Update CHANGELOG.md

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 CHANGELOG.md                          |   6 +
 posthog/ai/utils.py                   |   4 +-
 posthog/test/ai/openai/test_openai.py | 410 ++++++++++++++++++++++++++
 posthog/version.py                    |   2 +-
 4 files changed, 419 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9d228159..a2770840 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,9 @@
+# 7.4.1 - 2025-12-19
+
+fix: extract model from response for OpenAI stored prompts
+
+When using OpenAI stored prompts, the model is defined in the OpenAI dashboard rather than passed in the API request. This fix adds a fallback to extract the model from the response object when not provided in kwargs, ensuring generations show up with the correct model and enabling cost calculations.
+
 # 7.4.0 - 2025-12-16
 
 feat: Add automatic retries for feature flag requests
diff --git a/posthog/ai/utils.py b/posthog/ai/utils.py
index 5f8a4b14..6b217f66 100644
--- a/posthog/ai/utils.py
+++ b/posthog/ai/utils.py
@@ -285,7 +285,7 @@ def call_llm_and_track_usage(
 
         event_properties = {
             "$ai_provider": provider,
-            "$ai_model": kwargs.get("model") or getattr(response, "model", None),
+            "$ai_model": kwargs.get("model") or getattr(response, "model", None) or "unknown",
             "$ai_model_parameters": get_model_params(kwargs),
             "$ai_input": with_privacy_mode(
                 ph_client, posthog_privacy_mode, sanitized_messages
@@ -396,7 +396,7 @@ async def call_llm_and_track_usage_async(
 
         event_properties = {
             "$ai_provider": provider,
-            "$ai_model": kwargs.get("model") or getattr(response, "model", None),
+            "$ai_model": kwargs.get("model") or getattr(response, "model", None) or "unknown",
             "$ai_model_parameters": get_model_params(kwargs),
             "$ai_input": with_privacy_mode(
                 ph_client, posthog_privacy_mode, sanitized_messages
diff --git a/posthog/test/ai/openai/test_openai.py b/posthog/test/ai/openai/test_openai.py
index 07d45753..75b1b8e4 100644
--- a/posthog/test/ai/openai/test_openai.py
+++ b/posthog/test/ai/openai/test_openai.py
@@ -1676,3 +1676,413 @@ async def chunk_iterable():
     assert props["$ai_web_search_count"] == 1
     assert props["$ai_input_tokens"] == 20
     assert props["$ai_output_tokens"] == 15
+
+
+# Tests for model extraction fallback (stored prompts support)
+
+
+def test_streaming_chat_extracts_model_from_chunk_when_not_in_kwargs(mock_client):
+    """Test that model is extracted from streaming chunks when not provided in kwargs (stored prompts)."""
+
+    # Create streaming chunks with model field but we won't pass model in kwargs
+    chunks = [
+        ChatCompletionChunk(
+            id="chunk1",
+            model="gpt-4o-stored-prompt",  # Model comes from response, not request
+            object="chat.completion.chunk",
+            created=1234567890,
+            choices=[
+                ChoiceChunk(
+                    index=0,
+                    delta=ChoiceDelta(role="assistant", content="Hello"),
+                    finish_reason=None,
+                )
+            ],
+        ),
+        ChatCompletionChunk(
+            id="chunk2",
+            model="gpt-4o-stored-prompt",
+            object="chat.completion.chunk",
+            created=1234567891,
+            choices=[
+                ChoiceChunk(
+                    index=0,
+                    delta=ChoiceDelta(content=" world"),
+                    finish_reason="stop",
+                )
+            ],
+            usage=CompletionUsage(
+                prompt_tokens=10,
+                completion_tokens=5,
+                total_tokens=15,
+            ),
+        ),
+    ]
+
+    with patch("openai.resources.chat.completions.Completions.create") as mock_create:
+        mock_create.return_value = chunks
+
+        client = OpenAI(api_key="test-key", posthog_client=mock_client)
+
+        # Note: NOT passing model in kwargs - simulates stored prompt usage
+        response_generator = client.chat.completions.create(
+            messages=[{"role": "user", "content": "Hello"}],
+            stream=True,
+            posthog_distinct_id="test-id",
+        )
+
+        # Consume the generator
+        list(response_generator)
+
+        assert mock_client.capture.call_count == 1
+        call_args = mock_client.capture.call_args[1]
+        props = call_args["properties"]
+
+        # Model should be extracted from chunk, not kwargs
+        assert props["$ai_model"] == "gpt-4o-stored-prompt"
+
+
+def test_streaming_chat_prefers_kwargs_model_over_chunk_model(mock_client):
+    """Test that model from kwargs takes precedence over model from chunk."""
+    chunks = [
+        ChatCompletionChunk(
+            id="chunk1",
+            model="gpt-4o-from-response",
+            object="chat.completion.chunk",
+            created=1234567890,
+            choices=[
+                ChoiceChunk(
+                    index=0,
+                    delta=ChoiceDelta(role="assistant", content="Hello"),
+                    finish_reason="stop",
+                )
+            ],
+            usage=CompletionUsage(
+                prompt_tokens=10,
+                completion_tokens=5,
+                total_tokens=15,
+            ),
+        ),
+    ]
+
+    with patch("openai.resources.chat.completions.Completions.create") as mock_create:
+        mock_create.return_value = chunks
+
+        client = OpenAI(api_key="test-key", posthog_client=mock_client)
+
+        response_generator = client.chat.completions.create(
+            model="gpt-4o-from-kwargs",  # Explicitly passed model
+            messages=[{"role": "user", "content": "Hello"}],
+            stream=True,
+            posthog_distinct_id="test-id",
+        )
+
+        list(response_generator)
+
+        call_args = mock_client.capture.call_args[1]
+        props = call_args["properties"]
+
+        # kwargs model should take precedence
+        assert props["$ai_model"] == "gpt-4o-from-kwargs"
+
+
+def test_streaming_responses_api_extracts_model_from_response_object(mock_client):
+    """Test that Responses API streaming extracts model from chunk.response.model (stored prompts)."""
+    from unittest.mock import MagicMock
+    from openai.types.responses import ResponseUsage
+
+    chunks = []
+
+    # Content chunk
+    chunk1 = MagicMock()
+    chunk1.type = "response.text.delta"
+    chunk1.text = "Test response"
+    # No response attribute on content chunks
+    del chunk1.response
+    chunks.append(chunk1)
+
+    # Final chunk with response object containing model
+    chunk2 = MagicMock()
+    chunk2.type = "response.completed"
+    chunk2.response = MagicMock()
+    chunk2.response.model = "gpt-4o-mini-stored"  # Model from stored prompt
+    chunk2.response.usage = ResponseUsage(
+        input_tokens=20,
+        output_tokens=10,
+        total_tokens=30,
+        input_tokens_details={"prompt_tokens": 20, "cached_tokens": 0},
+        output_tokens_details={"reasoning_tokens": 0},
+    )
+    chunk2.response.output = ["Test response"]
+    chunks.append(chunk2)
+
+    with patch("openai.resources.responses.Responses.create") as mock_create:
+        mock_create.return_value = iter(chunks)
+
+        client = OpenAI(api_key="test-key", posthog_client=mock_client)
+
+        # Note: NOT passing model - simulates stored prompt
+        response_generator = client.responses.create(
+            input=[{"role": "user", "content": "Hello"}],
+            stream=True,
+            posthog_distinct_id="test-id",
+        )
+
+        list(response_generator)
+
+        call_args = mock_client.capture.call_args[1]
+        props = call_args["properties"]
+
+        # Model should be extracted from chunk.response.model
+        assert props["$ai_model"] == "gpt-4o-mini-stored"
+
+
+def test_non_streaming_extracts_model_from_response(mock_client):
+    """Test that non-streaming calls extract model from response when not in kwargs."""
+    # Create a response with model but we won't pass model in kwargs
+    mock_response = ChatCompletion(
+        id="test",
+        model="gpt-4o-stored-prompt",
+        object="chat.completion",
+        created=int(time.time()),
+        choices=[
+            Choice(
+                finish_reason="stop",
+                index=0,
+                message=ChatCompletionMessage(
+                    content="Test response",
+                    role="assistant",
+                ),
+            )
+        ],
+        usage=CompletionUsage(
+            completion_tokens=10,
+            prompt_tokens=20,
+            total_tokens=30,
+        ),
+    )
+
+    with patch(
+        "openai.resources.chat.completions.Completions.create",
+        return_value=mock_response,
+    ):
+        client = OpenAI(api_key="test-key", posthog_client=mock_client)
+
+        # Note: NOT passing model in kwargs
+        response = client.chat.completions.create(
+            messages=[{"role": "user", "content": "Hello"}],
+            posthog_distinct_id="test-id",
+        )
+
+        assert response == mock_response
+        call_args = mock_client.capture.call_args[1]
+        props = call_args["properties"]
+
+        # Model should be extracted from response.model
+        assert props["$ai_model"] == "gpt-4o-stored-prompt"
+
+
+def test_non_streaming_responses_api_extracts_model_from_response(mock_client):
+    """Test that non-streaming Responses API extracts model from response when not in kwargs."""
+    mock_response = Response(
+        id="test",
+        model="gpt-4o-mini-stored",
+        object="response",
+        created_at=1741476542,
+        status="completed",
+        error=None,
+        incomplete_details=None,
+        instructions=None,
+        max_output_tokens=None,
+        tools=[],
+        tool_choice="auto",
+        output=[
+            ResponseOutputMessage(
+                id="msg_123",
+                type="message",
+                role="assistant",
+                status="completed",
+                content=[
+                    ResponseOutputText(
+                        type="output_text",
+                        text="Test response",
+                        annotations=[],
+                    )
+                ],
+            )
+        ],
+        parallel_tool_calls=True,
+        previous_response_id=None,
+        usage=ResponseUsage(
+            input_tokens=10,
+            output_tokens=10,
+            input_tokens_details={"prompt_tokens": 10, "cached_tokens": 0},
+            output_tokens_details={"reasoning_tokens": 0},
+            total_tokens=20,
+        ),
+        user=None,
+        metadata={},
+    )
+
+    with patch(
+        "openai.resources.responses.Responses.create",
+        return_value=mock_response,
+    ):
+        client = OpenAI(api_key="test-key", posthog_client=mock_client)
+
+        # Note: NOT passing model in kwargs
+        response = client.responses.create(
+            input="Hello",
+            posthog_distinct_id="test-id",
+        )
+
+        assert response == mock_response
+        call_args = mock_client.capture.call_args[1]
+        props = call_args["properties"]
+
+        # Model should be extracted from response.model
+        assert props["$ai_model"] == "gpt-4o-mini-stored"
+
+
+def test_streaming_falls_back_to_unknown_when_no_model(mock_client):
+    """Test that streaming falls back to 'unknown' when model is not available anywhere."""
+    from unittest.mock import MagicMock
+
+    # Create a chunk without model attribute
+    chunk = MagicMock()
+    chunk.choices = [MagicMock()]
+    chunk.choices[0].delta = MagicMock()
+    chunk.choices[0].delta.content = "Hello"
+    chunk.choices[0].delta.role = "assistant"
+    chunk.choices[0].delta.tool_calls = None
+    chunk.usage = CompletionUsage(
+        prompt_tokens=10,
+        completion_tokens=5,
+        total_tokens=15,
+    )
+    # Explicitly remove model attribute
+    del chunk.model
+
+    with patch("openai.resources.chat.completions.Completions.create") as mock_create:
+        mock_create.return_value = [chunk]
+
+        client = OpenAI(api_key="test-key", posthog_client=mock_client)
+
+        response_generator = client.chat.completions.create(
+            messages=[{"role": "user", "content": "Hello"}],
+            stream=True,
+            posthog_distinct_id="test-id",
+        )
+
+        list(response_generator)
+
+        call_args = mock_client.capture.call_args[1]
+        props = call_args["properties"]
+
+        # Should fall back to "unknown"
+        assert props["$ai_model"] == "unknown"
+
+
+@pytest.mark.asyncio
+async def test_async_streaming_chat_extracts_model_from_chunk(mock_client):
+    """Test async streaming extracts model from chunk when not in kwargs."""
+    chunks = [
+        ChatCompletionChunk(
+            id="chunk1",
+            model="gpt-4o-async-stored",
+            object="chat.completion.chunk",
+            created=1234567890,
+            choices=[
+                ChoiceChunk(
+                    index=0,
+                    delta=ChoiceDelta(role="assistant", content="Hello"),
+                    finish_reason="stop",
+                )
+            ],
+            usage=CompletionUsage(
+                prompt_tokens=10,
+                completion_tokens=5,
+                total_tokens=15,
+            ),
+        ),
+    ]
+
+    async def mock_create(self, **kwargs):
+        async def chunk_iterable():
+            for chunk in chunks:
+                yield chunk
+
+        return chunk_iterable()
+
+    with patch(
+        "openai.resources.chat.completions.AsyncCompletions.create", new=mock_create
+    ):
+        client = AsyncOpenAI(api_key="test-key", posthog_client=mock_client)
+
+        # Note: NOT passing model
+        response_stream = await client.chat.completions.create(
+            messages=[{"role": "user", "content": "Hello"}],
+            stream=True,
+            posthog_distinct_id="test-id",
+        )
+
+        async for _ in response_stream:
+            pass
+
+    call_args = mock_client.capture.call_args[1]
+    props = call_args["properties"]
+
+    assert props["$ai_model"] == "gpt-4o-async-stored"
+
+
+@pytest.mark.asyncio
+async def test_async_streaming_responses_extracts_model_from_response(mock_client):
+    """Test async Responses API streaming extracts model from chunk.response.model."""
+    from unittest.mock import MagicMock
+    from openai.types.responses import ResponseUsage
+
+    chunks = []
+
+    chunk1 = MagicMock()
+    chunk1.type = "response.text.delta"
+    chunk1.text = "Test"
+    del chunk1.response
+    chunks.append(chunk1)
+
+    chunk2 = MagicMock()
+    chunk2.type = "response.completed"
+    chunk2.response = MagicMock()
+    chunk2.response.model = "gpt-4o-mini-async-stored"
+    chunk2.response.usage = ResponseUsage(
+        input_tokens=20,
+        output_tokens=10,
+        total_tokens=30,
+        input_tokens_details={"prompt_tokens": 20, "cached_tokens": 0},
+        output_tokens_details={"reasoning_tokens": 0},
+    )
+    chunk2.response.output = ["Test"]
+    chunks.append(chunk2)
+
+    async def mock_create(self, **kwargs):
+        async def chunk_iterable():
+            for chunk in chunks:
+                yield chunk
+
+        return chunk_iterable()
+
+    with patch("openai.resources.responses.AsyncResponses.create", new=mock_create):
+        client = AsyncOpenAI(api_key="test-key", posthog_client=mock_client)
+
+        response_stream = await client.responses.create(
+            input=[{"role": "user", "content": "Hello"}],
+            stream=True,
+            posthog_distinct_id="test-id",
+        )
+
+        async for _ in response_stream:
+            pass
+
+    call_args = mock_client.capture.call_args[1]
+    props = call_args["properties"]
+
+    assert props["$ai_model"] == "gpt-4o-mini-async-stored"
diff --git a/posthog/version.py b/posthog/version.py
index 3de0587a..10523a6e 100644
--- a/posthog/version.py
+++ b/posthog/version.py
@@ -1,4 +1,4 @@
-VERSION = "7.4.0"
+VERSION = "7.4.1"
 
 if __name__ == "__main__":
     print(VERSION, end="")  # noqa: T201

From 8dccb794529231c3033487ac8284d0f0c26def33 Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Fri, 19 Dec 2025 16:21:28 +0000
Subject: [PATCH 5/7] style: format utils.py with ruff
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 posthog/ai/utils.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/posthog/ai/utils.py b/posthog/ai/utils.py
index 6b217f66..e85cbb84 100644
--- a/posthog/ai/utils.py
+++ b/posthog/ai/utils.py
@@ -285,7 +285,9 @@ def call_llm_and_track_usage(
 
         event_properties = {
             "$ai_provider": provider,
-            "$ai_model": kwargs.get("model") or getattr(response, "model", None) or "unknown",
+            "$ai_model": kwargs.get("model")
+            or getattr(response, "model", None)
+            or "unknown",
             "$ai_model_parameters": get_model_params(kwargs),
             "$ai_input": with_privacy_mode(
                 ph_client, posthog_privacy_mode, sanitized_messages
@@ -396,7 +398,9 @@ async def call_llm_and_track_usage_async(
 
         event_properties = {
             "$ai_provider": provider,
-            "$ai_model": kwargs.get("model") or getattr(response, "model", None) or "unknown",
+            "$ai_model": kwargs.get("model")
+            or getattr(response, "model", None)
+            or "unknown",
             "$ai_model_parameters": get_model_params(kwargs),
             "$ai_input": with_privacy_mode(
                 ph_client, posthog_privacy_mode, sanitized_messages

From cc401a0b829a18b8f3bfce95739e6aa2c24adb6b Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Fri, 19 Dec 2025 17:33:53 +0000
Subject: [PATCH 6/7] fix: remove 'unknown' fallback from non-streaming to
 match original behavior
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Non-streaming originally returned None when model wasn't in kwargs.
Streaming keeps "unknown" fallback as that was the original behavior.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 posthog/ai/utils.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/posthog/ai/utils.py b/posthog/ai/utils.py
index e85cbb84..5f8a4b14 100644
--- a/posthog/ai/utils.py
+++ b/posthog/ai/utils.py
@@ -285,9 +285,7 @@ def call_llm_and_track_usage(
 
         event_properties = {
             "$ai_provider": provider,
-            "$ai_model": kwargs.get("model")
-            or getattr(response, "model", None)
-            or "unknown",
+            "$ai_model": kwargs.get("model") or getattr(response, "model", None),
             "$ai_model_parameters": get_model_params(kwargs),
             "$ai_input": with_privacy_mode(
                 ph_client, posthog_privacy_mode, sanitized_messages
@@ -398,9 +396,7 @@ async def call_llm_and_track_usage_async(
 
         event_properties = {
             "$ai_provider": provider,
-            "$ai_model": kwargs.get("model")
-            or getattr(response, "model", None)
-            or "unknown",
+            "$ai_model": kwargs.get("model") or getattr(response, "model", None),
             "$ai_model_parameters": get_model_params(kwargs),
             "$ai_input": with_privacy_mode(
                 ph_client, posthog_privacy_mode, sanitized_messages

From ae3b67ae3876570548648a36a70dc7813b2c7b4c Mon Sep 17 00:00:00 2001
From: Andrew Maguire <andrewm4894@gmail.com>
Date: Fri, 19 Dec 2025 17:39:12 +0000
Subject: [PATCH 7/7] test: add test for None model fallback in non-streaming
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Verifies that non-streaming returns None (not "unknown") when model
is not available in kwargs or response, matching original behavior.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 posthog/test/ai/openai/test_openai.py | 46 +++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/posthog/test/ai/openai/test_openai.py b/posthog/test/ai/openai/test_openai.py
index 75b1b8e4..116ba2d1 100644
--- a/posthog/test/ai/openai/test_openai.py
+++ b/posthog/test/ai/openai/test_openai.py
@@ -1944,6 +1944,52 @@ def test_non_streaming_responses_api_extracts_model_from_response(mock_client):
         assert props["$ai_model"] == "gpt-4o-mini-stored"
 
 
+def test_non_streaming_returns_none_when_no_model(mock_client):
+    """Test that non-streaming returns None (not 'unknown') when model is not available anywhere."""
+    # Create a response without model attribute using real OpenAI types
+    mock_response = ChatCompletion(
+        id="test",
+        model="",  # Will be removed below
+        object="chat.completion",
+        created=int(time.time()),
+        choices=[
+            Choice(
+                finish_reason="stop",
+                index=0,
+                message=ChatCompletionMessage(
+                    content="Test response",
+                    role="assistant",
+                ),
+            )
+        ],
+        usage=CompletionUsage(
+            completion_tokens=5,
+            prompt_tokens=10,
+            total_tokens=15,
+        ),
+    )
+    # Remove model attribute to simulate missing model
+    object.__delattr__(mock_response, "model")
+
+    with patch(
+        "openai.resources.chat.completions.Completions.create",
+        return_value=mock_response,
+    ):
+        client = OpenAI(api_key="test-key", posthog_client=mock_client)
+
+        # Note: NOT passing model in kwargs and response has no model
+        client.chat.completions.create(
+            messages=[{"role": "user", "content": "Hello"}],
+            posthog_distinct_id="test-id",
+        )
+
+        call_args = mock_client.capture.call_args[1]
+        props = call_args["properties"]
+
+        # Should be None, NOT "unknown" (to avoid incorrect cost matching)
+        assert props["$ai_model"] is None
+
+
 def test_streaming_falls_back_to_unknown_when_no_model(mock_client):
     """Test that streaming falls back to 'unknown' when model is not available anywhere."""
     from unittest.mock import MagicMock