From 12b9fb48278ffe2415e5e29fb83e6bf13aacb4b1 Mon Sep 17 00:00:00 2001 From: Jacky Zeng Date: Sat, 18 Apr 2026 18:10:49 +0800 Subject: [PATCH] Fix Claude CLI reasoning effort fallback --- src/lib/anthropic-types.ts | 2 +- src/lib/reasoning.ts | 47 +++++++++++++- src/routes/messages.ts | 114 ++++++++++++++++++++++++++++---- src/routes/messages_test.ts | 125 ++++++++++++++++++++++++++++++++++++ 4 files changed, 273 insertions(+), 15 deletions(-) diff --git a/src/lib/anthropic-types.ts b/src/lib/anthropic-types.ts index aabeacd3..7ea82999 100644 --- a/src/lib/anthropic-types.ts +++ b/src/lib/anthropic-types.ts @@ -17,7 +17,7 @@ export interface AnthropicMessagesPayload { name?: string; }; thinking?: { type: "enabled" | "adaptive"; budget_tokens?: number }; - output_config?: { effort?: "low" | "medium" | "high" | "max" }; + output_config?: { effort?: "low" | "medium" | "high" | "xhigh" | "max" }; service_tier?: "auto" | "standard_only"; } diff --git a/src/lib/reasoning.ts b/src/lib/reasoning.ts index 7c5b167f..bfda7e6e 100644 --- a/src/lib/reasoning.ts +++ b/src/lib/reasoning.ts @@ -1,10 +1,26 @@ import type { AnthropicMessagesPayload } from "./anthropic-types.ts"; import type { ResponsesPayload } from "./responses-types.ts"; +export type AnthropicOutputConfigEffort = NonNullable< + NonNullable["effort"] +>; + export type ResponsesReasoningEffort = NonNullable< NonNullable["effort"] >; +type ComparableReasoningEffort = + | AnthropicOutputConfigEffort + | ResponsesReasoningEffort; + +export const ANTHROPIC_OUTPUT_CONFIG_EFFORTS = [ + "low", + "medium", + "high", + "xhigh", + "max", +] as const satisfies readonly AnthropicOutputConfigEffort[]; + export const RESPONSES_REASONING_EFFORTS = [ "none", "minimal", @@ -14,13 +30,14 @@ export const RESPONSES_REASONING_EFFORTS = [ "xhigh", ] as const satisfies readonly ResponsesReasoningEffort[]; -const REASONING_RANK: Record = { +const REASONING_RANK: Record = { none: 0, minimal: 1, low: 2, medium: 3, high: 4, xhigh: 5, + max: 6, }; export function mapThinkingBudgetToReasoningEffort( @@ -38,7 +55,10 @@ export function getAnthropicRequestedReasoningEffort( if (payload.output_config?.effort) { const effort = payload.output_config.effort; if (effort === "max") return "high"; - if (effort === "low" || effort === "medium" || effort === "high") { + if ( + effort === "low" || effort === "medium" || effort === "high" || + effort === "xhigh" + ) { return effort; } } @@ -56,6 +76,20 @@ export function pickSupportedReasoningEffort( requested: ResponsesReasoningEffort | null, supported: readonly ResponsesReasoningEffort[], ): ResponsesReasoningEffort | null { + return pickSupportedComparableEffort(requested, supported); +} + +export function pickSupportedAnthropicOutputConfigEffort( + requested: AnthropicOutputConfigEffort | null, + supported: readonly AnthropicOutputConfigEffort[], +): AnthropicOutputConfigEffort | null { + return pickSupportedComparableEffort(requested, supported); +} + +function pickSupportedComparableEffort( + requested: T | null, + supported: readonly T[], +): T | null { if (!requested || supported.length === 0) return null; if (supported.includes(requested)) return requested; @@ -78,6 +112,15 @@ export function isResponsesReasoningEffort( RESPONSES_REASONING_EFFORTS.includes(value as ResponsesReasoningEffort); } +export function isAnthropicOutputConfigEffort( + value: unknown, +): value is AnthropicOutputConfigEffort { + return typeof value === "string" && + ANTHROPIC_OUTPUT_CONFIG_EFFORTS.includes( + value as AnthropicOutputConfigEffort, + ); +} + export function makeResponsesReasoningId(index: number): string { return `rs_${index}`; } diff --git a/src/routes/messages.ts b/src/routes/messages.ts index fd0d787f..b0e75134 100644 --- a/src/routes/messages.ts +++ b/src/routes/messages.ts @@ -7,7 +7,12 @@ import { import { getGithubCredentials } from "../lib/github.ts"; import { modelSupportsEndpoint } from "../lib/models-cache.ts"; import { normalizeModelName } from "../lib/model-name.ts"; -import { getAnthropicRequestedReasoningEffort } from "../lib/reasoning.ts"; +import { + getAnthropicRequestedReasoningEffort, + isAnthropicOutputConfigEffort, + pickSupportedAnthropicOutputConfigEffort, + type AnthropicOutputConfigEffort, +} from "../lib/reasoning.ts"; import type { AnthropicMessagesPayload, AnthropicStreamState, @@ -138,6 +143,67 @@ function stripCacheControlScope(payload: AnthropicMessagesPayload): void { } } +function parseNativeReasoningEffortSupport( + text: string, +): { supported: AnthropicOutputConfigEffort[] } | null { + let message = text; + let code: string | undefined; + + try { + const parsed = JSON.parse(text) as { + error?: { message?: unknown; code?: unknown }; + }; + if (typeof parsed.error?.message === "string") message = parsed.error.message; + if (typeof parsed.error?.code === "string") code = parsed.error.code; + } catch { + // Ignore malformed upstream error payloads and fall back to string matching. + } + + if (code !== "invalid_reasoning_effort" && + !message.includes("output_config.effort")) { + return null; + } + + const supportedMatch = message.match(/supported values:\s*\[([^\]]*)\]/i); + const supported = supportedMatch + ? supportedMatch[1] + .split(",") + .map((value) => value.trim().replace(/^['\"]|['\"]$/g, "")) + .filter(isAnthropicOutputConfigEffort) + : []; + + return { supported }; +} + +function buildNativeMessagesReasoningRetryPayload( + payload: AnthropicMessagesPayload, + errorText: string, +): AnthropicMessagesPayload | null { + const requested = payload.output_config?.effort; + if (!requested) return null; + + const parsed = parseNativeReasoningEffortSupport(errorText); + if (!parsed) return null; + + const retryEffort = pickSupportedAnthropicOutputConfigEffort( + requested, + parsed.supported, + ); + if (retryEffort === requested) return null; + + const retryPayload: AnthropicMessagesPayload = { ...payload }; + if (retryEffort) { + retryPayload.output_config = { + ...(payload.output_config ?? {}), + effort: retryEffort, + }; + return retryPayload; + } + + delete retryPayload.output_config; + return retryPayload; +} + /** Anthropic-compatible error that triggers compact in Claude Code */ function contextWindowErrorResponse(c: Context) { return c.json({ @@ -294,22 +360,46 @@ async function forwardMessages( accountType: string, fetchOptions: CopilotFetchOptions, ): Promise { - const { service_tier: _, ...cleanPayload } = payload; const wantsStream = !!payload.stream; - // Always stream upstream to avoid blocking on large responses - cleanPayload.stream = true; + const postMessages = (nextPayload: AnthropicMessagesPayload) => { + const { service_tier: _, ...cleanPayload } = nextPayload; - const resp = await copilotFetch( - "/v1/messages", - { method: "POST", body: JSON.stringify(cleanPayload) }, - githubToken, - accountType, - fetchOptions, - ); + // Always stream upstream to avoid blocking on large responses + cleanPayload.stream = true; + + return copilotFetch( + "/v1/messages", + { method: "POST", body: JSON.stringify(cleanPayload) }, + githubToken, + accountType, + fetchOptions, + ); + }; + + let resp = await postMessages(payload); + let errorText: string | null = null; if (!resp.ok) { - const text = await resp.text(); + errorText = await resp.text(); + const retryPayload = buildNativeMessagesReasoningRetryPayload( + payload, + errorText, + ); + if (retryPayload) { + console.warn( + "Retrying native /v1/messages with downgraded output_config.effort:", + payload.output_config?.effort, + "->", + retryPayload.output_config?.effort ?? "omitted", + ); + resp = await postMessages(retryPayload); + errorText = resp.ok ? null : await resp.text(); + } + } + + if (!resp.ok) { + const text = errorText ?? await resp.text(); return isContextWindowError(text) ? contextWindowErrorResponse(c) : anthropicCopilotApiErrorResponse( diff --git a/src/routes/messages_test.ts b/src/routes/messages_test.ts index 083ec09c..60a77c15 100644 --- a/src/routes/messages_test.ts +++ b/src/routes/messages_test.ts @@ -465,6 +465,131 @@ Deno.test("/v1/messages forwards Anthropic tool strict field on native messages" ); }); +Deno.test("/v1/messages retries native requests with downgraded output_config.effort", async () => { + const { apiKey } = await setupAppTest(); + + const upstreamBodies: Array> = []; + + await withMockedFetch(async (request) => { + const url = new URL(request.url); + + if (url.hostname === "update.code.visualstudio.com") { + return jsonResponse(["1.110.1"]); + } + if (url.pathname === "/copilot_internal/v2/token") { + return jsonResponse({ + token: "copilot-access-token", + expires_at: 4102444800, + refresh_in: 3600, + }); + } + if (url.pathname === "/models") { + return jsonResponse(copilotModels([ + { id: "claude-native-reasoning", supported_endpoints: ["/v1/messages"] }, + ])); + } + if (url.pathname === "/v1/messages") { + const body = JSON.parse(await request.text()) as Record; + upstreamBodies.push(body); + + const outputConfig = body.output_config as Record | undefined; + if (outputConfig?.effort === "xhigh") { + return jsonResponse({ + error: { + message: + 'output_config.effort "xhigh" is not supported by model claude-native-reasoning; supported values: [medium]', + code: "invalid_reasoning_effort", + }, + }, 400); + } + + return sseResponse([ + { + event: "message_start", + data: { + type: "message_start", + message: { + id: "msg_reasoning", + type: "message", + role: "assistant", + content: [], + model: "claude-native-reasoning", + stop_reason: null, + stop_sequence: null, + usage: { input_tokens: 10, output_tokens: 0 }, + }, + }, + }, + { + event: "content_block_start", + data: { + type: "content_block_start", + index: 0, + content_block: { type: "text", text: "" }, + }, + }, + { + event: "content_block_delta", + data: { + type: "content_block_delta", + index: 0, + delta: { type: "text_delta", text: "ok" }, + }, + }, + { + event: "content_block_stop", + data: { type: "content_block_stop", index: 0 }, + }, + { + event: "message_delta", + data: { + type: "message_delta", + delta: { stop_reason: "end_turn", stop_sequence: null }, + usage: { output_tokens: 4 }, + }, + }, + { event: "message_stop", data: { type: "message_stop" } }, + ]); + } + + throw new Error(`Unhandled fetch ${request.url}`); + }, async () => { + const response = await requestApp("/v1/messages", { + method: "POST", + headers: { + "content-type": "application/json", + "x-api-key": apiKey.key, + }, + body: JSON.stringify({ + model: "claude-native-reasoning", + max_tokens: 64, + stream: false, + thinking: { type: "enabled", budget_tokens: 4096 }, + output_config: { effort: "xhigh" }, + messages: [{ role: "user", content: "hello" }], + }), + }); + + assertEquals(response.status, 200); + const body = await response.json(); + assertEquals(body.id, "msg_reasoning"); + }); + + assertEquals(upstreamBodies.length, 2); + assertEquals( + ((upstreamBodies[0].output_config as Record).effort), + "xhigh", + ); + assertEquals( + ((upstreamBodies[1].output_config as Record).effort), + "medium", + ); + assertEquals( + ((upstreamBodies[1].thinking as Record).budget_tokens), + 4096, + ); +}); + Deno.test("/v1/messages keeps strict Anthropic tools on native messages when both endpoints are available", async () => { const { apiKey } = await setupAppTest();