ericc-ch · kiote · Jun 8, 2026 · Jun 14, 2026
diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts
@@ -29,12 +29,14 @@ import { mapOpenAIStopReasonToAnthropic } from "./utils"
 export function translateToOpenAI(
   payload: AnthropicMessagesPayload,
 ): ChatCompletionsPayload {
+  const messages = translateAnthropicMessagesToOpenAI(
+    payload.messages,
+    payload.system,
+  )
+  fixTrailingAssistantPrefill(messages)
   return {
     model: translateModelName(payload.model),
-    messages: translateAnthropicMessagesToOpenAI(
-      payload.messages,
-      payload.system,
-    ),
+    messages,
     max_tokens: payload.max_tokens,
     stop: payload.stop_sequences,
     stream: payload.stream,
@@ -46,14 +48,61 @@ export function translateToOpenAI(
   }
 }
 
-function translateModelName(model: string): string {
-  // Subagent requests use a specific model number which Copilot doesn't support
-  if (model.startsWith("claude-sonnet-4-")) {
-    return model.replace(/^claude-sonnet-4-.*/, "claude-sonnet-4")
-  } else if (model.startsWith("claude-opus-")) {
-    return model.replace(/^claude-opus-4-.*/, "claude-opus-4")
+// Some Copilot upstream models reject a request whose message list ends with
+// an assistant turn ("assistant message prefill"), responding with a 400:
+//   "This model does not support assistant message prefill. The conversation
+//    must end with a user message."
+// Anthropic clients (e.g. Claude Code) legitimately use prefill to constrain a
+// reply. To stay compatible we drop the trailing assistant prefill and re-add
+// it as a user instruction asking the model to emit only the continuation,
+// reproducing Anthropic's prefill contract (the response excludes the prefill).
+function fixTrailingAssistantPrefill(messages: Array<Message>): void {
+  const last = messages.at(-1)
+  if (!last || last.role !== "assistant") {
+    return
+  }
+  // A trailing tool call is part of an in-flight tool exchange, not a prefill.
+  if ("tool_calls" in last && last.tool_calls && last.tool_calls.length > 0) {
+    return
+  }
+
+  const prefill = extractAssistantText(last.content)
+
+  messages.pop()
+
+  if (prefill.trim().length === 0) {
+    messages.push({ role: "user", content: "Continue." })
+    return
   }
-  return model
+
+  messages.push({
+    role: "user",
+    content:
+      "You have already begun your reply with the text below. Do not repeat"
+      + " it and do not add any preamble: output only the text that continues"
+      + ` seamlessly from it.\n\n--- Your reply so far ---\n${prefill}`,
+  })
+}
+
+function extractAssistantText(content: Message["content"]): string {
+  if (typeof content === "string") {
+    return content
+  }
+  if (Array.isArray(content)) {
+    return content
+      .filter((part): part is TextPart => part.type === "text")
+      .map((part) => part.text)
+      .join("")
+  }
+  return ""
+}
+
+function translateModelName(model: string): string {
+  // Copilot exposes Claude models with dotted minor versions (e.g.
+  // "claude-opus-4.8"), while Anthropic clients (Claude Code) send dashed IDs
+  // ("claude-opus-4-8"). Rewrite the trailing "-N" minor version to ".N" so the
+  // requested model resolves. Mirrors upstream copilot-api normalization.
+  return model.replace(/^(claude-(?:opus|sonnet|haiku)-\d+)-(\d+)/, "$1.$2")
 }
 
 function translateAnthropicMessagesToOpenAI(

diff --git a/tests/anthropic-request.test.ts b/tests/anthropic-request.test.ts
@@ -140,6 +140,7 @@ describe("Anthropic to OpenAI translation logic", () => {
             { type: "text", text: "2+2 equals 4." },
           ],
         },
+        { role: "user", content: "Thanks! And what is 3+3?" },
       ],
       max_tokens: 100,
     }
@@ -197,6 +198,70 @@ describe("Anthropic to OpenAI translation logic", () => {
     expect(assistantMessage?.tool_calls).toHaveLength(1)
     expect(assistantMessage?.tool_calls?.[0].function.name).toBe("get_weather")
   })
+
+  test("should rewrite a trailing assistant prefill into a user message", () => {
+    const anthropicPayload: AnthropicMessagesPayload = {
+      model: "claude-3-5-sonnet-20241022",
+      messages: [
+        { role: "user", content: "Give me a JSON object for the sky color." },
+        { role: "assistant", content: '{"color": "' },
+      ],
+      max_tokens: 100,
+    }
+    const openAIPayload = translateToOpenAI(anthropicPayload)
+
+    // Upstream rejects a request ending with an assistant message, so the
+    // conversation must end with a user message.
+    const lastMessage = openAIPayload.messages.at(-1)
+    expect(lastMessage?.role).toBe("user")
+    expect(openAIPayload.messages.some((m) => m.role === "assistant")).toBe(
+      false,
+    )
+    // The prefill text is preserved inside the injected user instruction.
+    expect(lastMessage?.content).toContain('{"color": "')
+  })
+
+  test("should rewrite an empty trailing assistant prefill", () => {
+    const anthropicPayload: AnthropicMessagesPayload = {
+      model: "claude-3-5-sonnet-20241022",
+      messages: [
+        { role: "user", content: "Hello" },
+        { role: "assistant", content: "" },
+      ],
+      max_tokens: 100,
+    }
+    const openAIPayload = translateToOpenAI(anthropicPayload)
+
+    const lastMessage = openAIPayload.messages.at(-1)
+    expect(lastMessage?.role).toBe("user")
+    expect(lastMessage?.content).toBe("Continue.")
+  })
+
+  test("should not treat a trailing assistant tool call as a prefill", () => {
+    const anthropicPayload: AnthropicMessagesPayload = {
+      model: "claude-3-5-sonnet-20241022",
+      messages: [
+        { role: "user", content: "What's the weather?" },
+        {
+          role: "assistant",
+          content: [
+            {
+              type: "tool_use",
+              id: "call_123",
+              name: "get_weather",
+              input: { location: "New York" },
+            },
+          ],
+        },
+      ],
+      max_tokens: 100,
+    }
+    const openAIPayload = translateToOpenAI(anthropicPayload)
+
+    const lastMessage = openAIPayload.messages.at(-1)
+    expect(lastMessage?.role).toBe("assistant")
+    expect(lastMessage?.tool_calls).toHaveLength(1)
+  })
 })
 
 describe("OpenAI Chat Completion v1 Request Payload Validation with Zod", () => {