From a46a42ea3717f5ea95c66f00436ae03a41de7fa4 Mon Sep 17 00:00:00 2001
From: Ekaterina Krivich <krivich.ekaterina@gmail.com>
Date: Mon, 8 Jun 2026 19:58:36 +0300
Subject: [PATCH 1/2] fix(messages): rewrite trailing assistant prefill to a
 user message

Some Copilot upstream models reject a request whose message list ends
with an assistant turn ("assistant message prefill") with a 400:

  "This model does not support assistant message prefill. The
   conversation must end with a user message."

Anthropic clients such as Claude Code legitimately use prefill to
constrain a reply, so these requests fail when proxied. Detect a
trailing assistant message (that is not an in-flight tool call), drop
it, and re-inject its text as a user instruction asking the model to
emit only the seamless continuation. This reproduces Anthropic's
prefill contract (the response excludes the prefill text) while
satisfying upstream's "must end with a user message" requirement.

Add tests covering text prefill, empty prefill, and the tool-call
exception, and adjust the existing thinking-block test so its assistant
message is no longer the final message.
---
 src/routes/messages/non-stream-translation.ts | 59 +++++++++++++++--
 tests/anthropic-request.test.ts               | 65 +++++++++++++++++++
 2 files changed, 120 insertions(+), 4 deletions(-)
diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts
index dc41e6382..be5fcf8e3 100644
--- a/src/routes/messages/non-stream-translation.ts
+++ b/src/routes/messages/non-stream-translation.ts
@@ -29,12 +29,14 @@ import { mapOpenAIStopReasonToAnthropic } from "./utils"
 export function translateToOpenAI(
   payload: AnthropicMessagesPayload,
 ): ChatCompletionsPayload {
+  const messages = translateAnthropicMessagesToOpenAI(
+    payload.messages,
+    payload.system,
+  )
+  fixTrailingAssistantPrefill(messages)
   return {
     model: translateModelName(payload.model),
-    messages: translateAnthropicMessagesToOpenAI(
-      payload.messages,
-      payload.system,
-    ),
+    messages,
     max_tokens: payload.max_tokens,
     stop: payload.stop_sequences,
     stream: payload.stream,
@@ -46,6 +48,55 @@ export function translateToOpenAI(
   }
 }
 
+// Some Copilot upstream models reject a request whose message list ends with
+// an assistant turn ("assistant message prefill"), responding with a 400:
+//   "This model does not support assistant message prefill. The conversation
+//    must end with a user message."
+// Anthropic clients (e.g. Claude Code) legitimately use prefill to constrain a
+// reply. To stay compatible we drop the trailing assistant prefill and re-add
+// it as a user instruction asking the model to emit only the continuation,
+// reproducing Anthropic's prefill contract (the response excludes the prefill).
+function fixTrailingAssistantPrefill(messages: Array<Message>): void {
+  const last = messages.at(-1)
+  if (!last || last.role !== "assistant") {
+    return
+  }
+  // A trailing tool call is part of an in-flight tool exchange, not a prefill.
+  if ("tool_calls" in last && last.tool_calls && last.tool_calls.length > 0) {
+    return
+  }
+
+  const prefill = extractAssistantText(last.content)
+
+  messages.pop()
+
+  if (prefill.trim().length === 0) {
+    messages.push({ role: "user", content: "Continue." })
+    return
+  }
+
+  messages.push({
+    role: "user",
+    content:
+      "You have already begun your reply with the text below. Do not repeat"
+      + " it and do not add any preamble: output only the text that continues"
+      + ` seamlessly from it.\n\n--- Your reply so far ---\n${prefill}`,
+  })
+}
+
+function extractAssistantText(content: Message["content"]): string {
+  if (typeof content === "string") {
+    return content
+  }
+  if (Array.isArray(content)) {
+    return content
+      .filter((part): part is TextPart => part.type === "text")
+      .map((part) => part.text)
+      .join("")
+  }
+  return ""
+}
+
 function translateModelName(model: string): string {
   // Subagent requests use a specific model number which Copilot doesn't support
   if (model.startsWith("claude-sonnet-4-")) {
diff --git a/tests/anthropic-request.test.ts b/tests/anthropic-request.test.ts
index 06c663778..ae26da2f6 100644
--- a/tests/anthropic-request.test.ts
+++ b/tests/anthropic-request.test.ts
@@ -140,6 +140,7 @@ describe("Anthropic to OpenAI translation logic", () => {
             { type: "text", text: "2+2 equals 4." },
           ],
         },
+        { role: "user", content: "Thanks! And what is 3+3?" },
       ],
       max_tokens: 100,
     }
@@ -197,6 +198,70 @@ describe("Anthropic to OpenAI translation logic", () => {
     expect(assistantMessage?.tool_calls).toHaveLength(1)
     expect(assistantMessage?.tool_calls?.[0].function.name).toBe("get_weather")
   })
+
+  test("should rewrite a trailing assistant prefill into a user message", () => {
+    const anthropicPayload: AnthropicMessagesPayload = {
+      model: "claude-3-5-sonnet-20241022",
+      messages: [
+        { role: "user", content: "Give me a JSON object for the sky color." },
+        { role: "assistant", content: '{"color": "' },
+      ],
+      max_tokens: 100,
+    }
+    const openAIPayload = translateToOpenAI(anthropicPayload)
+
+    // Upstream rejects a request ending with an assistant message, so the
+    // conversation must end with a user message.
+    const lastMessage = openAIPayload.messages.at(-1)
+    expect(lastMessage?.role).toBe("user")
+    expect(openAIPayload.messages.some((m) => m.role === "assistant")).toBe(
+      false,
+    )
+    // The prefill text is preserved inside the injected user instruction.
+    expect(lastMessage?.content).toContain('{"color": "')
+  })
+
+  test("should rewrite an empty trailing assistant prefill", () => {
+    const anthropicPayload: AnthropicMessagesPayload = {
+      model: "claude-3-5-sonnet-20241022",
+      messages: [
+        { role: "user", content: "Hello" },
+        { role: "assistant", content: "" },
+      ],
+      max_tokens: 100,
+    }
+    const openAIPayload = translateToOpenAI(anthropicPayload)
+
+    const lastMessage = openAIPayload.messages.at(-1)
+    expect(lastMessage?.role).toBe("user")
+    expect(lastMessage?.content).toBe("Continue.")
+  })
+
+  test("should not treat a trailing assistant tool call as a prefill", () => {
+    const anthropicPayload: AnthropicMessagesPayload = {
+      model: "claude-3-5-sonnet-20241022",
+      messages: [
+        { role: "user", content: "What's the weather?" },
+        {
+          role: "assistant",
+          content: [
+            {
+              type: "tool_use",
+              id: "call_123",
+              name: "get_weather",
+              input: { location: "New York" },
+            },
+          ],
+        },
+      ],
+      max_tokens: 100,
+    }
+    const openAIPayload = translateToOpenAI(anthropicPayload)
+
+    const lastMessage = openAIPayload.messages.at(-1)
+    expect(lastMessage?.role).toBe("assistant")
+    expect(lastMessage?.tool_calls).toHaveLength(1)
+  })
 })
 
 describe("OpenAI Chat Completion v1 Request Payload Validation with Zod", () => {

From f390f3f52a26312115e8f08c7e8758baa9f64eb3 Mon Sep 17 00:00:00 2001
From: Ekaterina Krivich <krivich.ekaterina@gmail.com>
Date: Sun, 14 Jun 2026 10:48:02 +0300
Subject: [PATCH 2/2] fix(messages): normalize dashed Claude model IDs to
 dotted Copilot IDs

translateModelName collapsed claude-opus-4-8 -> claude-opus-4, but Copilot
has no bare "claude-opus-4" model (its IDs are dotted: claude-opus-4.8,
claude-sonnet-4.6, ...), so dashed IDs returned HTTP 400 model_not_supported.
Claude Code sends dashed IDs by default (ANTHROPIC_MODEL=claude-opus-4-8),
so the proxy rejected every request from it.

Rewrite the trailing "-N" minor version to ".N" across opus/sonnet/haiku,
mirroring upstream copilot-api normalization. Verified: claude-opus-4-8 and
claude-sonnet-4-6 with trailing-assistant prefill both return 200.
---
 src/routes/messages/non-stream-translation.ts | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts
index be5fcf8e3..66170c4da 100644
--- a/src/routes/messages/non-stream-translation.ts
+++ b/src/routes/messages/non-stream-translation.ts
@@ -98,13 +98,11 @@ function extractAssistantText(content: Message["content"]): string {
 }
 
 function translateModelName(model: string): string {
-  // Subagent requests use a specific model number which Copilot doesn't support
-  if (model.startsWith("claude-sonnet-4-")) {
-    return model.replace(/^claude-sonnet-4-.*/, "claude-sonnet-4")
-  } else if (model.startsWith("claude-opus-")) {
-    return model.replace(/^claude-opus-4-.*/, "claude-opus-4")
-  }
-  return model
+  // Copilot exposes Claude models with dotted minor versions (e.g.
+  // "claude-opus-4.8"), while Anthropic clients (Claude Code) send dashed IDs
+  // ("claude-opus-4-8"). Rewrite the trailing "-N" minor version to ".N" so the
+  // requested model resolves. Mirrors upstream copilot-api normalization.
+  return model.replace(/^(claude-(?:opus|sonnet|haiku)-\d+)-(\d+)/, "$1.$2")
 }
 
 function translateAnthropicMessagesToOpenAI(