diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9d228159..a2770840 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,9 @@
+# 7.4.1 - 2025-12-19
+
+fix: extract model from response for OpenAI stored prompts
+
+When using OpenAI stored prompts, the model is defined in the OpenAI dashboard rather than passed in the API request. This fix adds a fallback to extract the model from the response object when not provided in kwargs, ensuring generations show up with the correct model and enabling cost calculations.
+
 # 7.4.0 - 2025-12-16
 
 feat: Add automatic retries for feature flag requests
diff --git a/posthog/ai/openai/openai.py b/posthog/ai/openai/openai.py
index 11b3fe92..09984745 100644
--- a/posthog/ai/openai/openai.py
+++ b/posthog/ai/openai/openai.py
@@ -124,14 +124,23 @@ def _create_streaming(
         start_time = time.time()
         usage_stats: TokenUsage = TokenUsage()
         final_content = []
+        model_from_response: Optional[str] = None
         response = self._original.create(**kwargs)
 
         def generator():
             nonlocal usage_stats
             nonlocal final_content  # noqa: F824
+            nonlocal model_from_response
 
             try:
                 for chunk in response:
+                    # Extract model from response object in chunk (for stored prompts)
+                    if hasattr(chunk, "response") and chunk.response:
+                        if model_from_response is None and hasattr(
+                            chunk.response, "model"
+                        ):
+                            model_from_response = chunk.response.model
+
                     # Extract usage stats from chunk
                     chunk_usage = extract_openai_usage_from_chunk(chunk, "responses")
 
@@ -161,6 +170,7 @@ def generator():
                     latency,
                     output,
                     None,  # Responses API doesn't have tools
+                    model_from_response,
                 )
 
         return generator()
@@ -177,6 +187,7 @@ def _capture_streaming_event(
         latency: float,
         output: Any,
         available_tool_calls: Optional[List[Dict[str, Any]]] = None,
+        model_from_response: Optional[str] = None,
     ):
         from posthog.ai.types import StreamingEventData
         from posthog.ai.openai.openai_converter import (
@@ -189,9 +200,12 @@ def _capture_streaming_event(
         formatted_input = format_openai_streaming_input(kwargs, "responses")
         sanitized_input = sanitize_openai_response(formatted_input)
 
+        # Use model from kwargs, fallback to model from response
+        model = kwargs.get("model") or model_from_response or "unknown"
+
         event_data = StreamingEventData(
             provider="openai",
-            model=kwargs.get("model", "unknown"),
+            model=model,
             base_url=str(self._client.base_url),
             kwargs=kwargs,
             formatted_input=sanitized_input,
@@ -320,6 +334,7 @@ def _create_streaming(
         usage_stats: TokenUsage = TokenUsage()
         accumulated_content = []
         accumulated_tool_calls: Dict[int, Dict[str, Any]] = {}
+        model_from_response: Optional[str] = None
         if "stream_options" not in kwargs:
             kwargs["stream_options"] = {}
         kwargs["stream_options"]["include_usage"] = True
@@ -329,9 +344,14 @@ def generator():
             nonlocal usage_stats
             nonlocal accumulated_content  # noqa: F824
             nonlocal accumulated_tool_calls
+            nonlocal model_from_response
 
             try:
                 for chunk in response:
+                    # Extract model from chunk (Chat Completions chunks have model field)
+                    if model_from_response is None and hasattr(chunk, "model"):
+                        model_from_response = chunk.model
+
                     # Extract usage stats from chunk
                     chunk_usage = extract_openai_usage_from_chunk(chunk, "chat")
 
@@ -376,6 +396,7 @@ def generator():
                     accumulated_content,
                     tool_calls_list,
                     extract_available_tool_calls("openai", kwargs),
+                    model_from_response,
                 )
 
         return generator()
@@ -393,6 +414,7 @@ def _capture_streaming_event(
         output: Any,
         tool_calls: Optional[List[Dict[str, Any]]] = None,
         available_tool_calls: Optional[List[Dict[str, Any]]] = None,
+        model_from_response: Optional[str] = None,
     ):
         from posthog.ai.types import StreamingEventData
         from posthog.ai.openai.openai_converter import (
@@ -405,9 +427,12 @@ def _capture_streaming_event(
         formatted_input = format_openai_streaming_input(kwargs, "chat")
         sanitized_input = sanitize_openai(formatted_input)
 
+        # Use model from kwargs, fallback to model from response
+        model = kwargs.get("model") or model_from_response or "unknown"
+
         event_data = StreamingEventData(
             provider="openai",
-            model=kwargs.get("model", "unknown"),
+            model=model,
             base_url=str(self._client.base_url),
             kwargs=kwargs,
             formatted_input=sanitized_input,
diff --git a/posthog/ai/openai/openai_async.py b/posthog/ai/openai/openai_async.py
index 404895fc..77c9b260 100644
--- a/posthog/ai/openai/openai_async.py
+++ b/posthog/ai/openai/openai_async.py
@@ -128,14 +128,23 @@ async def _create_streaming(
         start_time = time.time()
         usage_stats: TokenUsage = TokenUsage()
         final_content = []
+        model_from_response: Optional[str] = None
         response = await self._original.create(**kwargs)
 
         async def async_generator():
             nonlocal usage_stats
             nonlocal final_content  # noqa: F824
+            nonlocal model_from_response
 
             try:
                 async for chunk in response:
+                    # Extract model from response object in chunk (for stored prompts)
+                    if hasattr(chunk, "response") and chunk.response:
+                        if model_from_response is None and hasattr(
+                            chunk.response, "model"
+                        ):
+                            model_from_response = chunk.response.model
+
                     # Extract usage stats from chunk
                     chunk_usage = extract_openai_usage_from_chunk(chunk, "responses")
 
@@ -166,6 +175,7 @@ async def async_generator():
                     latency,
                     output,
                     extract_available_tool_calls("openai", kwargs),
+                    model_from_response,
                 )
 
         return async_generator()
@@ -182,13 +192,17 @@ async def _capture_streaming_event(
         latency: float,
         output: Any,
         available_tool_calls: Optional[List[Dict[str, Any]]] = None,
+        model_from_response: Optional[str] = None,
     ):
         if posthog_trace_id is None:
             posthog_trace_id = str(uuid.uuid4())
 
+        # Use model from kwargs, fallback to model from response
+        model = kwargs.get("model") or model_from_response or "unknown"
+
         event_properties = {
             "$ai_provider": "openai",
-            "$ai_model": kwargs.get("model"),
+            "$ai_model": model,
             "$ai_model_parameters": get_model_params(kwargs),
             "$ai_input": with_privacy_mode(
                 self._client._ph_client,
@@ -350,6 +364,7 @@ async def _create_streaming(
         usage_stats: TokenUsage = TokenUsage()
         accumulated_content = []
         accumulated_tool_calls: Dict[int, Dict[str, Any]] = {}
+        model_from_response: Optional[str] = None
 
         if "stream_options" not in kwargs:
             kwargs["stream_options"] = {}
@@ -360,9 +375,14 @@ async def async_generator():
             nonlocal usage_stats
             nonlocal accumulated_content  # noqa: F824
             nonlocal accumulated_tool_calls
+            nonlocal model_from_response
 
             try:
                 async for chunk in response:
+                    # Extract model from chunk (Chat Completions chunks have model field)
+                    if model_from_response is None and hasattr(chunk, "model"):
+                        model_from_response = chunk.model
+
                     # Extract usage stats from chunk
                     chunk_usage = extract_openai_usage_from_chunk(chunk, "chat")
                     if chunk_usage:
@@ -405,6 +425,7 @@ async def async_generator():
                     accumulated_content,
                     tool_calls_list,
                     extract_available_tool_calls("openai", kwargs),
+                    model_from_response,
                 )
 
         return async_generator()
@@ -422,13 +443,17 @@ async def _capture_streaming_event(
         output: Any,
         tool_calls: Optional[List[Dict[str, Any]]] = None,
         available_tool_calls: Optional[List[Dict[str, Any]]] = None,
+        model_from_response: Optional[str] = None,
     ):
         if posthog_trace_id is None:
             posthog_trace_id = str(uuid.uuid4())
 
+        # Use model from kwargs, fallback to model from response
+        model = kwargs.get("model") or model_from_response or "unknown"
+
         event_properties = {
             "$ai_provider": "openai",
-            "$ai_model": kwargs.get("model"),
+            "$ai_model": model,
             "$ai_model_parameters": get_model_params(kwargs),
             "$ai_input": with_privacy_mode(
                 self._client._ph_client,
diff --git a/posthog/ai/utils.py b/posthog/ai/utils.py
index 559860cc..5f8a4b14 100644
--- a/posthog/ai/utils.py
+++ b/posthog/ai/utils.py
@@ -285,7 +285,7 @@ def call_llm_and_track_usage(
 
         event_properties = {
             "$ai_provider": provider,
-            "$ai_model": kwargs.get("model"),
+            "$ai_model": kwargs.get("model") or getattr(response, "model", None),
             "$ai_model_parameters": get_model_params(kwargs),
             "$ai_input": with_privacy_mode(
                 ph_client, posthog_privacy_mode, sanitized_messages
@@ -396,7 +396,7 @@ async def call_llm_and_track_usage_async(
 
         event_properties = {
             "$ai_provider": provider,
-            "$ai_model": kwargs.get("model"),
+            "$ai_model": kwargs.get("model") or getattr(response, "model", None),
             "$ai_model_parameters": get_model_params(kwargs),
             "$ai_input": with_privacy_mode(
                 ph_client, posthog_privacy_mode, sanitized_messages
diff --git a/posthog/test/ai/openai/test_openai.py b/posthog/test/ai/openai/test_openai.py
index 07d45753..116ba2d1 100644
--- a/posthog/test/ai/openai/test_openai.py
+++ b/posthog/test/ai/openai/test_openai.py
@@ -1676,3 +1676,459 @@ async def chunk_iterable():
     assert props["$ai_web_search_count"] == 1
     assert props["$ai_input_tokens"] == 20
     assert props["$ai_output_tokens"] == 15
+
+
+# Tests for model extraction fallback (stored prompts support)
+
+
+def test_streaming_chat_extracts_model_from_chunk_when_not_in_kwargs(mock_client):
+    """Test that model is extracted from streaming chunks when not provided in kwargs (stored prompts)."""
+
+    # Create streaming chunks with model field but we won't pass model in kwargs
+    chunks = [
+        ChatCompletionChunk(
+            id="chunk1",
+            model="gpt-4o-stored-prompt",  # Model comes from response, not request
+            object="chat.completion.chunk",
+            created=1234567890,
+            choices=[
+                ChoiceChunk(
+                    index=0,
+                    delta=ChoiceDelta(role="assistant", content="Hello"),
+                    finish_reason=None,
+                )
+            ],
+        ),
+        ChatCompletionChunk(
+            id="chunk2",
+            model="gpt-4o-stored-prompt",
+            object="chat.completion.chunk",
+            created=1234567891,
+            choices=[
+                ChoiceChunk(
+                    index=0,
+                    delta=ChoiceDelta(content=" world"),
+                    finish_reason="stop",
+                )
+            ],
+            usage=CompletionUsage(
+                prompt_tokens=10,
+                completion_tokens=5,
+                total_tokens=15,
+            ),
+        ),
+    ]
+
+    with patch("openai.resources.chat.completions.Completions.create") as mock_create:
+        mock_create.return_value = chunks
+
+        client = OpenAI(api_key="test-key", posthog_client=mock_client)
+
+        # Note: NOT passing model in kwargs - simulates stored prompt usage
+        response_generator = client.chat.completions.create(
+            messages=[{"role": "user", "content": "Hello"}],
+            stream=True,
+            posthog_distinct_id="test-id",
+        )
+
+        # Consume the generator
+        list(response_generator)
+
+        assert mock_client.capture.call_count == 1
+        call_args = mock_client.capture.call_args[1]
+        props = call_args["properties"]
+
+        # Model should be extracted from chunk, not kwargs
+        assert props["$ai_model"] == "gpt-4o-stored-prompt"
+
+
+def test_streaming_chat_prefers_kwargs_model_over_chunk_model(mock_client):
+    """Test that model from kwargs takes precedence over model from chunk."""
+    chunks = [
+        ChatCompletionChunk(
+            id="chunk1",
+            model="gpt-4o-from-response",
+            object="chat.completion.chunk",
+            created=1234567890,
+            choices=[
+                ChoiceChunk(
+                    index=0,
+                    delta=ChoiceDelta(role="assistant", content="Hello"),
+                    finish_reason="stop",
+                )
+            ],
+            usage=CompletionUsage(
+                prompt_tokens=10,
+                completion_tokens=5,
+                total_tokens=15,
+            ),
+        ),
+    ]
+
+    with patch("openai.resources.chat.completions.Completions.create") as mock_create:
+        mock_create.return_value = chunks
+
+        client = OpenAI(api_key="test-key", posthog_client=mock_client)
+
+        response_generator = client.chat.completions.create(
+            model="gpt-4o-from-kwargs",  # Explicitly passed model
+            messages=[{"role": "user", "content": "Hello"}],
+            stream=True,
+            posthog_distinct_id="test-id",
+        )
+
+        list(response_generator)
+
+        call_args = mock_client.capture.call_args[1]
+        props = call_args["properties"]
+
+        # kwargs model should take precedence
+        assert props["$ai_model"] == "gpt-4o-from-kwargs"
+
+
+def test_streaming_responses_api_extracts_model_from_response_object(mock_client):
+    """Test that Responses API streaming extracts model from chunk.response.model (stored prompts)."""
+    from unittest.mock import MagicMock
+    from openai.types.responses import ResponseUsage
+
+    chunks = []
+
+    # Content chunk
+    chunk1 = MagicMock()
+    chunk1.type = "response.text.delta"
+    chunk1.text = "Test response"
+    # No response attribute on content chunks
+    del chunk1.response
+    chunks.append(chunk1)
+
+    # Final chunk with response object containing model
+    chunk2 = MagicMock()
+    chunk2.type = "response.completed"
+    chunk2.response = MagicMock()
+    chunk2.response.model = "gpt-4o-mini-stored"  # Model from stored prompt
+    chunk2.response.usage = ResponseUsage(
+        input_tokens=20,
+        output_tokens=10,
+        total_tokens=30,
+        input_tokens_details={"prompt_tokens": 20, "cached_tokens": 0},
+        output_tokens_details={"reasoning_tokens": 0},
+    )
+    chunk2.response.output = ["Test response"]
+    chunks.append(chunk2)
+
+    with patch("openai.resources.responses.Responses.create") as mock_create:
+        mock_create.return_value = iter(chunks)
+
+        client = OpenAI(api_key="test-key", posthog_client=mock_client)
+
+        # Note: NOT passing model - simulates stored prompt
+        response_generator = client.responses.create(
+            input=[{"role": "user", "content": "Hello"}],
+            stream=True,
+            posthog_distinct_id="test-id",
+        )
+
+        list(response_generator)
+
+        call_args = mock_client.capture.call_args[1]
+        props = call_args["properties"]
+
+        # Model should be extracted from chunk.response.model
+        assert props["$ai_model"] == "gpt-4o-mini-stored"
+
+
+def test_non_streaming_extracts_model_from_response(mock_client):
+    """Test that non-streaming calls extract model from response when not in kwargs."""
+    # Create a response with model but we won't pass model in kwargs
+    mock_response = ChatCompletion(
+        id="test",
+        model="gpt-4o-stored-prompt",
+        object="chat.completion",
+        created=int(time.time()),
+        choices=[
+            Choice(
+                finish_reason="stop",
+                index=0,
+                message=ChatCompletionMessage(
+                    content="Test response",
+                    role="assistant",
+                ),
+            )
+        ],
+        usage=CompletionUsage(
+            completion_tokens=10,
+            prompt_tokens=20,
+            total_tokens=30,
+        ),
+    )
+
+    with patch(
+        "openai.resources.chat.completions.Completions.create",
+        return_value=mock_response,
+    ):
+        client = OpenAI(api_key="test-key", posthog_client=mock_client)
+
+        # Note: NOT passing model in kwargs
+        response = client.chat.completions.create(
+            messages=[{"role": "user", "content": "Hello"}],
+            posthog_distinct_id="test-id",
+        )
+
+        assert response == mock_response
+        call_args = mock_client.capture.call_args[1]
+        props = call_args["properties"]
+
+        # Model should be extracted from response.model
+        assert props["$ai_model"] == "gpt-4o-stored-prompt"
+
+
+def test_non_streaming_responses_api_extracts_model_from_response(mock_client):
+    """Test that non-streaming Responses API extracts model from response when not in kwargs."""
+    mock_response = Response(
+        id="test",
+        model="gpt-4o-mini-stored",
+        object="response",
+        created_at=1741476542,
+        status="completed",
+        error=None,
+        incomplete_details=None,
+        instructions=None,
+        max_output_tokens=None,
+        tools=[],
+        tool_choice="auto",
+        output=[
+            ResponseOutputMessage(
+                id="msg_123",
+                type="message",
+                role="assistant",
+                status="completed",
+                content=[
+                    ResponseOutputText(
+                        type="output_text",
+                        text="Test response",
+                        annotations=[],
+                    )
+                ],
+            )
+        ],
+        parallel_tool_calls=True,
+        previous_response_id=None,
+        usage=ResponseUsage(
+            input_tokens=10,
+            output_tokens=10,
+            input_tokens_details={"prompt_tokens": 10, "cached_tokens": 0},
+            output_tokens_details={"reasoning_tokens": 0},
+            total_tokens=20,
+        ),
+        user=None,
+        metadata={},
+    )
+
+    with patch(
+        "openai.resources.responses.Responses.create",
+        return_value=mock_response,
+    ):
+        client = OpenAI(api_key="test-key", posthog_client=mock_client)
+
+        # Note: NOT passing model in kwargs
+        response = client.responses.create(
+            input="Hello",
+            posthog_distinct_id="test-id",
+        )
+
+        assert response == mock_response
+        call_args = mock_client.capture.call_args[1]
+        props = call_args["properties"]
+
+        # Model should be extracted from response.model
+        assert props["$ai_model"] == "gpt-4o-mini-stored"
+
+
+def test_non_streaming_returns_none_when_no_model(mock_client):
+    """Test that non-streaming returns None (not 'unknown') when model is not available anywhere."""
+    # Create a response without model attribute using real OpenAI types
+    mock_response = ChatCompletion(
+        id="test",
+        model="",  # Will be removed below
+        object="chat.completion",
+        created=int(time.time()),
+        choices=[
+            Choice(
+                finish_reason="stop",
+                index=0,
+                message=ChatCompletionMessage(
+                    content="Test response",
+                    role="assistant",
+                ),
+            )
+        ],
+        usage=CompletionUsage(
+            completion_tokens=5,
+            prompt_tokens=10,
+            total_tokens=15,
+        ),
+    )
+    # Remove model attribute to simulate missing model
+    object.__delattr__(mock_response, "model")
+
+    with patch(
+        "openai.resources.chat.completions.Completions.create",
+        return_value=mock_response,
+    ):
+        client = OpenAI(api_key="test-key", posthog_client=mock_client)
+
+        # Note: NOT passing model in kwargs and response has no model
+        client.chat.completions.create(
+            messages=[{"role": "user", "content": "Hello"}],
+            posthog_distinct_id="test-id",
+        )
+
+        call_args = mock_client.capture.call_args[1]
+        props = call_args["properties"]
+
+        # Should be None, NOT "unknown" (to avoid incorrect cost matching)
+        assert props["$ai_model"] is None
+
+
+def test_streaming_falls_back_to_unknown_when_no_model(mock_client):
+    """Test that streaming falls back to 'unknown' when model is not available anywhere."""
+    from unittest.mock import MagicMock
+
+    # Create a chunk without model attribute
+    chunk = MagicMock()
+    chunk.choices = [MagicMock()]
+    chunk.choices[0].delta = MagicMock()
+    chunk.choices[0].delta.content = "Hello"
+    chunk.choices[0].delta.role = "assistant"
+    chunk.choices[0].delta.tool_calls = None
+    chunk.usage = CompletionUsage(
+        prompt_tokens=10,
+        completion_tokens=5,
+        total_tokens=15,
+    )
+    # Explicitly remove model attribute
+    del chunk.model
+
+    with patch("openai.resources.chat.completions.Completions.create") as mock_create:
+        mock_create.return_value = [chunk]
+
+        client = OpenAI(api_key="test-key", posthog_client=mock_client)
+
+        response_generator = client.chat.completions.create(
+            messages=[{"role": "user", "content": "Hello"}],
+            stream=True,
+            posthog_distinct_id="test-id",
+        )
+
+        list(response_generator)
+
+        call_args = mock_client.capture.call_args[1]
+        props = call_args["properties"]
+
+        # Should fall back to "unknown"
+        assert props["$ai_model"] == "unknown"
+
+
+@pytest.mark.asyncio
+async def test_async_streaming_chat_extracts_model_from_chunk(mock_client):
+    """Test async streaming extracts model from chunk when not in kwargs."""
+    chunks = [
+        ChatCompletionChunk(
+            id="chunk1",
+            model="gpt-4o-async-stored",
+            object="chat.completion.chunk",
+            created=1234567890,
+            choices=[
+                ChoiceChunk(
+                    index=0,
+                    delta=ChoiceDelta(role="assistant", content="Hello"),
+                    finish_reason="stop",
+                )
+            ],
+            usage=CompletionUsage(
+                prompt_tokens=10,
+                completion_tokens=5,
+                total_tokens=15,
+            ),
+        ),
+    ]
+
+    async def mock_create(self, **kwargs):
+        async def chunk_iterable():
+            for chunk in chunks:
+                yield chunk
+
+        return chunk_iterable()
+
+    with patch(
+        "openai.resources.chat.completions.AsyncCompletions.create", new=mock_create
+    ):
+        client = AsyncOpenAI(api_key="test-key", posthog_client=mock_client)
+
+        # Note: NOT passing model
+        response_stream = await client.chat.completions.create(
+            messages=[{"role": "user", "content": "Hello"}],
+            stream=True,
+            posthog_distinct_id="test-id",
+        )
+
+        async for _ in response_stream:
+            pass
+
+    call_args = mock_client.capture.call_args[1]
+    props = call_args["properties"]
+
+    assert props["$ai_model"] == "gpt-4o-async-stored"
+
+
+@pytest.mark.asyncio
+async def test_async_streaming_responses_extracts_model_from_response(mock_client):
+    """Test async Responses API streaming extracts model from chunk.response.model."""
+    from unittest.mock import MagicMock
+    from openai.types.responses import ResponseUsage
+
+    chunks = []
+
+    chunk1 = MagicMock()
+    chunk1.type = "response.text.delta"
+    chunk1.text = "Test"
+    del chunk1.response
+    chunks.append(chunk1)
+
+    chunk2 = MagicMock()
+    chunk2.type = "response.completed"
+    chunk2.response = MagicMock()
+    chunk2.response.model = "gpt-4o-mini-async-stored"
+    chunk2.response.usage = ResponseUsage(
+        input_tokens=20,
+        output_tokens=10,
+        total_tokens=30,
+        input_tokens_details={"prompt_tokens": 20, "cached_tokens": 0},
+        output_tokens_details={"reasoning_tokens": 0},
+    )
+    chunk2.response.output = ["Test"]
+    chunks.append(chunk2)
+
+    async def mock_create(self, **kwargs):
+        async def chunk_iterable():
+            for chunk in chunks:
+                yield chunk
+
+        return chunk_iterable()
+
+    with patch("openai.resources.responses.AsyncResponses.create", new=mock_create):
+        client = AsyncOpenAI(api_key="test-key", posthog_client=mock_client)
+
+        response_stream = await client.responses.create(
+            input=[{"role": "user", "content": "Hello"}],
+            stream=True,
+            posthog_distinct_id="test-id",
+        )
+
+        async for _ in response_stream:
+            pass
+
+    call_args = mock_client.capture.call_args[1]
+    props = call_args["properties"]
+
+    assert props["$ai_model"] == "gpt-4o-mini-async-stored"
diff --git a/posthog/version.py b/posthog/version.py
index 3de0587a..10523a6e 100644
--- a/posthog/version.py
+++ b/posthog/version.py
@@ -1,4 +1,4 @@
-VERSION = "7.4.0"
+VERSION = "7.4.1"
 
 if __name__ == "__main__":
     print(VERSION, end="")  # noqa: T201