From a46a42ea3717f5ea95c66f00436ae03a41de7fa4 Mon Sep 17 00:00:00 2001 From: Ekaterina Krivich Date: Mon, 8 Jun 2026 19:58:36 +0300 Subject: [PATCH 1/2] fix(messages): rewrite trailing assistant prefill to a user message Some Copilot upstream models reject a request whose message list ends with an assistant turn ("assistant message prefill") with a 400: "This model does not support assistant message prefill. The conversation must end with a user message." Anthropic clients such as Claude Code legitimately use prefill to constrain a reply, so these requests fail when proxied. Detect a trailing assistant message (that is not an in-flight tool call), drop it, and re-inject its text as a user instruction asking the model to emit only the seamless continuation. This reproduces Anthropic's prefill contract (the response excludes the prefill text) while satisfying upstream's "must end with a user message" requirement. Add tests covering text prefill, empty prefill, and the tool-call exception, and adjust the existing thinking-block test so its assistant message is no longer the final message. --- src/routes/messages/non-stream-translation.ts | 59 +++++++++++++++-- tests/anthropic-request.test.ts | 65 +++++++++++++++++++ 2 files changed, 120 insertions(+), 4 deletions(-) diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts index dc41e6382..be5fcf8e3 100644 --- a/src/routes/messages/non-stream-translation.ts +++ b/src/routes/messages/non-stream-translation.ts @@ -29,12 +29,14 @@ import { mapOpenAIStopReasonToAnthropic } from "./utils" export function translateToOpenAI( payload: AnthropicMessagesPayload, ): ChatCompletionsPayload { + const messages = translateAnthropicMessagesToOpenAI( + payload.messages, + payload.system, + ) + fixTrailingAssistantPrefill(messages) return { model: translateModelName(payload.model), - messages: translateAnthropicMessagesToOpenAI( - payload.messages, - payload.system, - ), + messages, max_tokens: payload.max_tokens, stop: payload.stop_sequences, stream: payload.stream, @@ -46,6 +48,55 @@ export function translateToOpenAI( } } +// Some Copilot upstream models reject a request whose message list ends with +// an assistant turn ("assistant message prefill"), responding with a 400: +// "This model does not support assistant message prefill. The conversation +// must end with a user message." +// Anthropic clients (e.g. Claude Code) legitimately use prefill to constrain a +// reply. To stay compatible we drop the trailing assistant prefill and re-add +// it as a user instruction asking the model to emit only the continuation, +// reproducing Anthropic's prefill contract (the response excludes the prefill). +function fixTrailingAssistantPrefill(messages: Array): void { + const last = messages.at(-1) + if (!last || last.role !== "assistant") { + return + } + // A trailing tool call is part of an in-flight tool exchange, not a prefill. + if ("tool_calls" in last && last.tool_calls && last.tool_calls.length > 0) { + return + } + + const prefill = extractAssistantText(last.content) + + messages.pop() + + if (prefill.trim().length === 0) { + messages.push({ role: "user", content: "Continue." }) + return + } + + messages.push({ + role: "user", + content: + "You have already begun your reply with the text below. Do not repeat" + + " it and do not add any preamble: output only the text that continues" + + ` seamlessly from it.\n\n--- Your reply so far ---\n${prefill}`, + }) +} + +function extractAssistantText(content: Message["content"]): string { + if (typeof content === "string") { + return content + } + if (Array.isArray(content)) { + return content + .filter((part): part is TextPart => part.type === "text") + .map((part) => part.text) + .join("") + } + return "" +} + function translateModelName(model: string): string { // Subagent requests use a specific model number which Copilot doesn't support if (model.startsWith("claude-sonnet-4-")) { diff --git a/tests/anthropic-request.test.ts b/tests/anthropic-request.test.ts index 06c663778..ae26da2f6 100644 --- a/tests/anthropic-request.test.ts +++ b/tests/anthropic-request.test.ts @@ -140,6 +140,7 @@ describe("Anthropic to OpenAI translation logic", () => { { type: "text", text: "2+2 equals 4." }, ], }, + { role: "user", content: "Thanks! And what is 3+3?" }, ], max_tokens: 100, } @@ -197,6 +198,70 @@ describe("Anthropic to OpenAI translation logic", () => { expect(assistantMessage?.tool_calls).toHaveLength(1) expect(assistantMessage?.tool_calls?.[0].function.name).toBe("get_weather") }) + + test("should rewrite a trailing assistant prefill into a user message", () => { + const anthropicPayload: AnthropicMessagesPayload = { + model: "claude-3-5-sonnet-20241022", + messages: [ + { role: "user", content: "Give me a JSON object for the sky color." }, + { role: "assistant", content: '{"color": "' }, + ], + max_tokens: 100, + } + const openAIPayload = translateToOpenAI(anthropicPayload) + + // Upstream rejects a request ending with an assistant message, so the + // conversation must end with a user message. + const lastMessage = openAIPayload.messages.at(-1) + expect(lastMessage?.role).toBe("user") + expect(openAIPayload.messages.some((m) => m.role === "assistant")).toBe( + false, + ) + // The prefill text is preserved inside the injected user instruction. + expect(lastMessage?.content).toContain('{"color": "') + }) + + test("should rewrite an empty trailing assistant prefill", () => { + const anthropicPayload: AnthropicMessagesPayload = { + model: "claude-3-5-sonnet-20241022", + messages: [ + { role: "user", content: "Hello" }, + { role: "assistant", content: "" }, + ], + max_tokens: 100, + } + const openAIPayload = translateToOpenAI(anthropicPayload) + + const lastMessage = openAIPayload.messages.at(-1) + expect(lastMessage?.role).toBe("user") + expect(lastMessage?.content).toBe("Continue.") + }) + + test("should not treat a trailing assistant tool call as a prefill", () => { + const anthropicPayload: AnthropicMessagesPayload = { + model: "claude-3-5-sonnet-20241022", + messages: [ + { role: "user", content: "What's the weather?" }, + { + role: "assistant", + content: [ + { + type: "tool_use", + id: "call_123", + name: "get_weather", + input: { location: "New York" }, + }, + ], + }, + ], + max_tokens: 100, + } + const openAIPayload = translateToOpenAI(anthropicPayload) + + const lastMessage = openAIPayload.messages.at(-1) + expect(lastMessage?.role).toBe("assistant") + expect(lastMessage?.tool_calls).toHaveLength(1) + }) }) describe("OpenAI Chat Completion v1 Request Payload Validation with Zod", () => { From f390f3f52a26312115e8f08c7e8758baa9f64eb3 Mon Sep 17 00:00:00 2001 From: Ekaterina Krivich Date: Sun, 14 Jun 2026 10:48:02 +0300 Subject: [PATCH 2/2] fix(messages): normalize dashed Claude model IDs to dotted Copilot IDs translateModelName collapsed claude-opus-4-8 -> claude-opus-4, but Copilot has no bare "claude-opus-4" model (its IDs are dotted: claude-opus-4.8, claude-sonnet-4.6, ...), so dashed IDs returned HTTP 400 model_not_supported. Claude Code sends dashed IDs by default (ANTHROPIC_MODEL=claude-opus-4-8), so the proxy rejected every request from it. Rewrite the trailing "-N" minor version to ".N" across opus/sonnet/haiku, mirroring upstream copilot-api normalization. Verified: claude-opus-4-8 and claude-sonnet-4-6 with trailing-assistant prefill both return 200. --- src/routes/messages/non-stream-translation.ts | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts index be5fcf8e3..66170c4da 100644 --- a/src/routes/messages/non-stream-translation.ts +++ b/src/routes/messages/non-stream-translation.ts @@ -98,13 +98,11 @@ function extractAssistantText(content: Message["content"]): string { } function translateModelName(model: string): string { - // Subagent requests use a specific model number which Copilot doesn't support - if (model.startsWith("claude-sonnet-4-")) { - return model.replace(/^claude-sonnet-4-.*/, "claude-sonnet-4") - } else if (model.startsWith("claude-opus-")) { - return model.replace(/^claude-opus-4-.*/, "claude-opus-4") - } - return model + // Copilot exposes Claude models with dotted minor versions (e.g. + // "claude-opus-4.8"), while Anthropic clients (Claude Code) send dashed IDs + // ("claude-opus-4-8"). Rewrite the trailing "-N" minor version to ".N" so the + // requested model resolves. Mirrors upstream copilot-api normalization. + return model.replace(/^(claude-(?:opus|sonnet|haiku)-\d+)-(\d+)/, "$1.$2") } function translateAnthropicMessagesToOpenAI(