diff --git a/providers/openai/openai_test.go b/providers/openai/openai_test.go index 07bcdc981..0d03a629a 100644 --- a/providers/openai/openai_test.go +++ b/providers/openai/openai_test.go @@ -4016,20 +4016,69 @@ func TestResponsesToPrompt_ReasoningWithStore(t *testing.T) { }, } - t.Run("store true skips reasoning", func(t *testing.T) { + t.Run("store true emits item_reference for reasoning", func(t *testing.T) { t.Parallel() input, warnings := toResponsesPrompt(prompt, "system", true) require.Empty(t, warnings) - // With store=true: user, assistant text (reasoning - // skipped), follow-up user. - require.Len(t, input, 3) + // With store=true the reasoning item is replayed as an + // item_reference so any following provider-executed + // item (e.g. web_search_call) pairs correctly. Order: + // user, item_reference(rs_*), assistant text, user. + require.Len(t, input, 4) - // Verify no reasoning item leaked through. + // No inline OfReasoning items should ever appear; the + // API rejects them when store is enabled. for _, item := range input { require.Nil(t, item.OfReasoning, - "reasoning items must not appear when store=true") + "reasoning items must not appear inline when store=true") + } + + require.NotNil(t, input[1].OfItemReference, + "expected reasoning replayed via item_reference") + require.Equal(t, reasoningItemID, input[1].OfItemReference.ID) + }) + + t.Run("store true skips reasoning when item id missing", func(t *testing.T) { + t.Parallel() + + noIDPrompt := fantasy.Prompt{ + { + Role: fantasy.MessageRoleUser, + Content: []fantasy.MessagePart{ + fantasy.TextPart{Text: "What is 2+2?"}, + }, + }, + { + Role: fantasy.MessageRoleAssistant, + Content: []fantasy.MessagePart{ + fantasy.ReasoningPart{ + Text: "thinking", + ProviderOptions: fantasy.ProviderOptions{ + Name: &ResponsesReasoningMetadata{Summary: []string{}}, + }, + }, + fantasy.TextPart{Text: "4"}, + }, + }, + { + Role: fantasy.MessageRoleUser, + Content: []fantasy.MessagePart{ + fantasy.TextPart{Text: "And 3+3?"}, + }, + }, + } + + input, warnings := toResponsesPrompt(noIDPrompt, "system", true) + require.Empty(t, warnings) + + // Without an ItemID we cannot reference; the reasoning + // part falls through. user, assistant text, user. + require.Len(t, input, 3) + for _, item := range input { + require.Nil(t, item.OfReasoning) + require.Nil(t, item.OfItemReference) } }) @@ -4040,15 +4089,175 @@ func TestResponsesToPrompt_ReasoningWithStore(t *testing.T) { require.Empty(t, warnings) // With store=false: user, assistant text, follow-up user. + // Reasoning IDs are ephemeral and cannot be referenced. require.Len(t, input, 3) for _, item := range input { require.Nil(t, item.OfReasoning, "reasoning items must not appear when store=false") + require.Nil(t, item.OfItemReference, + "reasoning item_reference must not appear when store=false") + } + }) +} + +// TestResponsesToPrompt_ReasoningWithWebSearchCombined verifies that a +// single assistant turn containing reasoning + a provider-executed +// web_search_call replays both items as item_references in order, so +// the OpenAI API does not reject the input with: +// +// Item 'ws_xxx' of type 'web_search_call' was provided without its +// required 'reasoning' item: 'rs_xxx'. +func TestResponsesToPrompt_ReasoningWithWebSearchCombined(t *testing.T) { + t.Parallel() + + reasoningItemID := "rs_002" + webSearchItemID := "ws_002" + + prompt := fantasy.Prompt{ + { + Role: fantasy.MessageRoleUser, + Content: []fantasy.MessagePart{ + fantasy.TextPart{Text: "What is the weather in San Francisco?"}, + }, + }, + { + Role: fantasy.MessageRoleAssistant, + Content: []fantasy.MessagePart{ + fantasy.ReasoningPart{ + Text: "I should look this up.", + ProviderOptions: fantasy.ProviderOptions{ + Name: &ResponsesReasoningMetadata{ + ItemID: reasoningItemID, + Summary: []string{}, + }, + }, + }, + fantasy.ToolCallPart{ + ToolCallID: webSearchItemID, + ToolName: "web_search", + ProviderExecuted: true, + }, + fantasy.ToolResultPart{ + ToolCallID: webSearchItemID, + ProviderExecuted: true, + }, + fantasy.TextPart{Text: "Sunny."}, + }, + }, + { + Role: fantasy.MessageRoleUser, + Content: []fantasy.MessagePart{ + fantasy.TextPart{Text: "And Tokyo?"}, + }, + }, + } + + t.Run("store true pairs reasoning and web search via item_reference", func(t *testing.T) { + t.Parallel() + + input, warnings := toResponsesPrompt(prompt, "system", true) + require.Empty(t, warnings) + + // Order: user, item_reference(rs_*), item_reference(ws_*), + // assistant text, user. + require.Len(t, input, 5) + + require.NotNil(t, input[1].OfItemReference) + require.Equal(t, reasoningItemID, input[1].OfItemReference.ID, + "reasoning item_reference must precede web_search item_reference") + + require.NotNil(t, input[2].OfItemReference) + require.Equal(t, webSearchItemID, input[2].OfItemReference.ID) + }) + + t.Run("store false skips both reasoning and provider tool call", func(t *testing.T) { + t.Parallel() + + input, warnings := toResponsesPrompt(prompt, "system", false) + require.Empty(t, warnings) + + // Both reasoning and the provider-executed web_search_call + // are skipped under store=false. user, assistant text, user. + require.Len(t, input, 3) + for _, item := range input { + require.Nil(t, item.OfItemReference) + require.Nil(t, item.OfReasoning) } }) } +// TestResponsesToPrompt_ReasoningWithFunctionCallCombined verifies that +// reasoning + a regular function_call (and its function_call_output) +// round-trips with the reasoning item_reference preceding the function +// call when store=true. +func TestResponsesToPrompt_ReasoningWithFunctionCallCombined(t *testing.T) { + t.Parallel() + + reasoningItemID := "rs_003" + functionCallID := "call_003" + + prompt := fantasy.Prompt{ + { + Role: fantasy.MessageRoleUser, + Content: []fantasy.MessagePart{ + fantasy.TextPart{Text: "compute 1+1"}, + }, + }, + { + Role: fantasy.MessageRoleAssistant, + Content: []fantasy.MessagePart{ + fantasy.ReasoningPart{ + Text: "I'll call add.", + ProviderOptions: fantasy.ProviderOptions{ + Name: &ResponsesReasoningMetadata{ + ItemID: reasoningItemID, + Summary: []string{}, + }, + }, + }, + fantasy.ToolCallPart{ + ToolCallID: functionCallID, + ToolName: "add", + Input: `{"a":1,"b":1}`, + }, + }, + }, + { + Role: fantasy.MessageRoleTool, + Content: []fantasy.MessagePart{ + fantasy.ToolResultPart{ + ToolCallID: functionCallID, + Output: fantasy.ToolResultOutputContentText{Text: "2"}, + }, + }, + }, + { + Role: fantasy.MessageRoleUser, + Content: []fantasy.MessagePart{ + fantasy.TextPart{Text: "thanks"}, + }, + }, + } + + input, warnings := toResponsesPrompt(prompt, "system", true) + require.Empty(t, warnings) + + // Order: user, item_reference(rs_003), function_call(call_003), + // function_call_output(call_003), user. + require.Len(t, input, 5) + + require.NotNil(t, input[1].OfItemReference) + require.Equal(t, reasoningItemID, input[1].OfItemReference.ID, + "reasoning item_reference must precede function_call") + + require.NotNil(t, input[2].OfFunctionCall) + require.Equal(t, functionCallID, input[2].OfFunctionCall.CallID) + + require.NotNil(t, input[3].OfFunctionCallOutput) + require.Equal(t, functionCallID, input[3].OfFunctionCallOutput.CallID) +} + func TestResponsesStream_WebSearchResponse(t *testing.T) { t.Parallel() diff --git a/providers/openai/responses_language_model.go b/providers/openai/responses_language_model.go index eb027109e..b8b344cc8 100644 --- a/providers/openai/responses_language_model.go +++ b/providers/openai/responses_language_model.go @@ -565,12 +565,44 @@ func toResponsesPrompt(prompt fantasy.Prompt, systemMessageMode string, store bo // recognised Responses API input type; skip. continue case fantasy.ContentTypeReasoning: - // Reasoning items are always skipped during replay. - // When store is enabled, the API already has them - // persisted server-side. When store is disabled, the - // item IDs are ephemeral and referencing them causes - // "Item not found" errors. In both cases, replaying - // reasoning inline is not supported by the API. + if !store { + // When store is disabled, server-side reasoning + // items are ephemeral and the IDs cannot be + // referenced. Provider-executed tool calls in the + // same turn are also skipped under store=false, + // so there is nothing to pair with. + continue + } + // Store=true: replay the reasoning item via + // item_reference using the persisted ItemID. Without + // this reference the API rejects any following + // provider-executed item (e.g. web_search_call) with: + // + // Item 'ws_xxx' of type 'web_search_call' was + // provided without its required 'reasoning' item: + // 'rs_xxx'. + // + // Inline OfReasoning replay is intentionally not + // used: the API rejects reconstructed reasoning + // items because they cannot be paired with the + // output items that originally followed them + // (see fantasy upstream PR #181). + reasoningPart, ok := fantasy.AsContentType[fantasy.ReasoningPart](c) + if !ok { + warnings = append(warnings, fantasy.CallWarning{ + Type: fantasy.CallWarningTypeOther, + Message: "assistant reasoning part does not have the right type", + }) + continue + } + meta := GetReasoningMetadata(reasoningPart.ProviderOptions) + if meta == nil || meta.ItemID == "" { + // No persisted ID to reference. Falling back to + // skipping is safe; the rest of the assistant + // message still replays. + continue + } + input = append(input, responses.ResponseInputItemParamOfItemReference(meta.ItemID)) continue } }