diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts index dc41e6382..66170c4da 100644 --- a/src/routes/messages/non-stream-translation.ts +++ b/src/routes/messages/non-stream-translation.ts @@ -29,12 +29,14 @@ import { mapOpenAIStopReasonToAnthropic } from "./utils" export function translateToOpenAI( payload: AnthropicMessagesPayload, ): ChatCompletionsPayload { + const messages = translateAnthropicMessagesToOpenAI( + payload.messages, + payload.system, + ) + fixTrailingAssistantPrefill(messages) return { model: translateModelName(payload.model), - messages: translateAnthropicMessagesToOpenAI( - payload.messages, - payload.system, - ), + messages, max_tokens: payload.max_tokens, stop: payload.stop_sequences, stream: payload.stream, @@ -46,14 +48,61 @@ export function translateToOpenAI( } } -function translateModelName(model: string): string { - // Subagent requests use a specific model number which Copilot doesn't support - if (model.startsWith("claude-sonnet-4-")) { - return model.replace(/^claude-sonnet-4-.*/, "claude-sonnet-4") - } else if (model.startsWith("claude-opus-")) { - return model.replace(/^claude-opus-4-.*/, "claude-opus-4") +// Some Copilot upstream models reject a request whose message list ends with +// an assistant turn ("assistant message prefill"), responding with a 400: +// "This model does not support assistant message prefill. The conversation +// must end with a user message." +// Anthropic clients (e.g. Claude Code) legitimately use prefill to constrain a +// reply. To stay compatible we drop the trailing assistant prefill and re-add +// it as a user instruction asking the model to emit only the continuation, +// reproducing Anthropic's prefill contract (the response excludes the prefill). +function fixTrailingAssistantPrefill(messages: Array): void { + const last = messages.at(-1) + if (!last || last.role !== "assistant") { + return + } + // A trailing tool call is part of an in-flight tool exchange, not a prefill. + if ("tool_calls" in last && last.tool_calls && last.tool_calls.length > 0) { + return + } + + const prefill = extractAssistantText(last.content) + + messages.pop() + + if (prefill.trim().length === 0) { + messages.push({ role: "user", content: "Continue." }) + return } - return model + + messages.push({ + role: "user", + content: + "You have already begun your reply with the text below. Do not repeat" + + " it and do not add any preamble: output only the text that continues" + + ` seamlessly from it.\n\n--- Your reply so far ---\n${prefill}`, + }) +} + +function extractAssistantText(content: Message["content"]): string { + if (typeof content === "string") { + return content + } + if (Array.isArray(content)) { + return content + .filter((part): part is TextPart => part.type === "text") + .map((part) => part.text) + .join("") + } + return "" +} + +function translateModelName(model: string): string { + // Copilot exposes Claude models with dotted minor versions (e.g. + // "claude-opus-4.8"), while Anthropic clients (Claude Code) send dashed IDs + // ("claude-opus-4-8"). Rewrite the trailing "-N" minor version to ".N" so the + // requested model resolves. Mirrors upstream copilot-api normalization. + return model.replace(/^(claude-(?:opus|sonnet|haiku)-\d+)-(\d+)/, "$1.$2") } function translateAnthropicMessagesToOpenAI( diff --git a/tests/anthropic-request.test.ts b/tests/anthropic-request.test.ts index 06c663778..ae26da2f6 100644 --- a/tests/anthropic-request.test.ts +++ b/tests/anthropic-request.test.ts @@ -140,6 +140,7 @@ describe("Anthropic to OpenAI translation logic", () => { { type: "text", text: "2+2 equals 4." }, ], }, + { role: "user", content: "Thanks! And what is 3+3?" }, ], max_tokens: 100, } @@ -197,6 +198,70 @@ describe("Anthropic to OpenAI translation logic", () => { expect(assistantMessage?.tool_calls).toHaveLength(1) expect(assistantMessage?.tool_calls?.[0].function.name).toBe("get_weather") }) + + test("should rewrite a trailing assistant prefill into a user message", () => { + const anthropicPayload: AnthropicMessagesPayload = { + model: "claude-3-5-sonnet-20241022", + messages: [ + { role: "user", content: "Give me a JSON object for the sky color." }, + { role: "assistant", content: '{"color": "' }, + ], + max_tokens: 100, + } + const openAIPayload = translateToOpenAI(anthropicPayload) + + // Upstream rejects a request ending with an assistant message, so the + // conversation must end with a user message. + const lastMessage = openAIPayload.messages.at(-1) + expect(lastMessage?.role).toBe("user") + expect(openAIPayload.messages.some((m) => m.role === "assistant")).toBe( + false, + ) + // The prefill text is preserved inside the injected user instruction. + expect(lastMessage?.content).toContain('{"color": "') + }) + + test("should rewrite an empty trailing assistant prefill", () => { + const anthropicPayload: AnthropicMessagesPayload = { + model: "claude-3-5-sonnet-20241022", + messages: [ + { role: "user", content: "Hello" }, + { role: "assistant", content: "" }, + ], + max_tokens: 100, + } + const openAIPayload = translateToOpenAI(anthropicPayload) + + const lastMessage = openAIPayload.messages.at(-1) + expect(lastMessage?.role).toBe("user") + expect(lastMessage?.content).toBe("Continue.") + }) + + test("should not treat a trailing assistant tool call as a prefill", () => { + const anthropicPayload: AnthropicMessagesPayload = { + model: "claude-3-5-sonnet-20241022", + messages: [ + { role: "user", content: "What's the weather?" }, + { + role: "assistant", + content: [ + { + type: "tool_use", + id: "call_123", + name: "get_weather", + input: { location: "New York" }, + }, + ], + }, + ], + max_tokens: 100, + } + const openAIPayload = translateToOpenAI(anthropicPayload) + + const lastMessage = openAIPayload.messages.at(-1) + expect(lastMessage?.role).toBe("assistant") + expect(lastMessage?.tool_calls).toHaveLength(1) + }) }) describe("OpenAI Chat Completion v1 Request Payload Validation with Zod", () => {