Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/lib/anthropic-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ export interface AnthropicMessagesPayload {
name?: string;
};
thinking?: { type: "enabled" | "adaptive"; budget_tokens?: number };
output_config?: { effort?: "low" | "medium" | "high" | "max" };
output_config?: { effort?: "low" | "medium" | "high" | "xhigh" | "max" };
service_tier?: "auto" | "standard_only";
}

Expand Down
47 changes: 45 additions & 2 deletions src/lib/reasoning.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,26 @@
import type { AnthropicMessagesPayload } from "./anthropic-types.ts";
import type { ResponsesPayload } from "./responses-types.ts";

export type AnthropicOutputConfigEffort = NonNullable<
NonNullable<AnthropicMessagesPayload["output_config"]>["effort"]
>;

export type ResponsesReasoningEffort = NonNullable<
NonNullable<ResponsesPayload["reasoning"]>["effort"]
>;

type ComparableReasoningEffort =
| AnthropicOutputConfigEffort
| ResponsesReasoningEffort;

export const ANTHROPIC_OUTPUT_CONFIG_EFFORTS = [
"low",
"medium",
"high",
"xhigh",
"max",
] as const satisfies readonly AnthropicOutputConfigEffort[];

export const RESPONSES_REASONING_EFFORTS = [
"none",
"minimal",
Expand All @@ -14,13 +30,14 @@ export const RESPONSES_REASONING_EFFORTS = [
"xhigh",
] as const satisfies readonly ResponsesReasoningEffort[];

const REASONING_RANK: Record<ResponsesReasoningEffort, number> = {
const REASONING_RANK: Record<ComparableReasoningEffort, number> = {
none: 0,
minimal: 1,
low: 2,
medium: 3,
high: 4,
xhigh: 5,
max: 6,
};

export function mapThinkingBudgetToReasoningEffort(
Expand All @@ -38,7 +55,10 @@ export function getAnthropicRequestedReasoningEffort(
if (payload.output_config?.effort) {
const effort = payload.output_config.effort;
if (effort === "max") return "high";
if (effort === "low" || effort === "medium" || effort === "high") {
if (
effort === "low" || effort === "medium" || effort === "high" ||
effort === "xhigh"
) {
return effort;
}
}
Expand All @@ -56,6 +76,20 @@ export function pickSupportedReasoningEffort(
requested: ResponsesReasoningEffort | null,
supported: readonly ResponsesReasoningEffort[],
): ResponsesReasoningEffort | null {
return pickSupportedComparableEffort(requested, supported);
}

export function pickSupportedAnthropicOutputConfigEffort(
requested: AnthropicOutputConfigEffort | null,
supported: readonly AnthropicOutputConfigEffort[],
): AnthropicOutputConfigEffort | null {
return pickSupportedComparableEffort(requested, supported);
}

function pickSupportedComparableEffort<T extends ComparableReasoningEffort>(
requested: T | null,
supported: readonly T[],
): T | null {
if (!requested || supported.length === 0) return null;
if (supported.includes(requested)) return requested;

Expand All @@ -78,6 +112,15 @@ export function isResponsesReasoningEffort(
RESPONSES_REASONING_EFFORTS.includes(value as ResponsesReasoningEffort);
}

export function isAnthropicOutputConfigEffort(
value: unknown,
): value is AnthropicOutputConfigEffort {
return typeof value === "string" &&
ANTHROPIC_OUTPUT_CONFIG_EFFORTS.includes(
value as AnthropicOutputConfigEffort,
);
}

export function makeResponsesReasoningId(index: number): string {
return `rs_${index}`;
}
114 changes: 102 additions & 12 deletions src/routes/messages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@ import {
import { getGithubCredentials } from "../lib/github.ts";
import { modelSupportsEndpoint } from "../lib/models-cache.ts";
import { normalizeModelName } from "../lib/model-name.ts";
import { getAnthropicRequestedReasoningEffort } from "../lib/reasoning.ts";
import {
getAnthropicRequestedReasoningEffort,
isAnthropicOutputConfigEffort,
pickSupportedAnthropicOutputConfigEffort,
type AnthropicOutputConfigEffort,
} from "../lib/reasoning.ts";
import type {
AnthropicMessagesPayload,
AnthropicStreamState,
Expand Down Expand Up @@ -137,6 +142,67 @@ function stripCacheControlScope(payload: AnthropicMessagesPayload): void {
}
}

function parseNativeReasoningEffortSupport(
text: string,
): { supported: AnthropicOutputConfigEffort[] } | null {
let message = text;
let code: string | undefined;

try {
const parsed = JSON.parse(text) as {
error?: { message?: unknown; code?: unknown };
};
if (typeof parsed.error?.message === "string") message = parsed.error.message;
if (typeof parsed.error?.code === "string") code = parsed.error.code;
} catch {
// Ignore malformed upstream error payloads and fall back to string matching.
}

if (code !== "invalid_reasoning_effort" &&
!message.includes("output_config.effort")) {
return null;
}

const supportedMatch = message.match(/supported values:\s*\[([^\]]*)\]/i);
const supported = supportedMatch
? supportedMatch[1]
.split(",")
.map((value) => value.trim().replace(/^['\"]|['\"]$/g, ""))
.filter(isAnthropicOutputConfigEffort)
: [];

return { supported };
}

function buildNativeMessagesReasoningRetryPayload(
payload: AnthropicMessagesPayload,
errorText: string,
): AnthropicMessagesPayload | null {
const requested = payload.output_config?.effort;
if (!requested) return null;

const parsed = parseNativeReasoningEffortSupport(errorText);
if (!parsed) return null;

const retryEffort = pickSupportedAnthropicOutputConfigEffort(
requested,
parsed.supported,
);
if (retryEffort === requested) return null;

const retryPayload: AnthropicMessagesPayload = { ...payload };
if (retryEffort) {
retryPayload.output_config = {
...(payload.output_config ?? {}),
effort: retryEffort,
};
return retryPayload;
}

delete retryPayload.output_config;
return retryPayload;
}

/** Anthropic-compatible error that triggers compact in Claude Code */
function contextWindowErrorResponse(c: Context) {
return c.json({
Expand Down Expand Up @@ -293,22 +359,46 @@ async function forwardMessages(
accountType: string,
fetchOptions: CopilotFetchOptions,
): Promise<Response> {
const { service_tier: _, ...cleanPayload } = payload;
const wantsStream = !!payload.stream;

// Always stream upstream to avoid blocking on large responses
cleanPayload.stream = true;
const postMessages = (nextPayload: AnthropicMessagesPayload) => {
const { service_tier: _, ...cleanPayload } = nextPayload;

const resp = await copilotFetch(
"/v1/messages",
{ method: "POST", body: JSON.stringify(cleanPayload) },
githubToken,
accountType,
fetchOptions,
);
// Always stream upstream to avoid blocking on large responses
cleanPayload.stream = true;

return copilotFetch(
"/v1/messages",
{ method: "POST", body: JSON.stringify(cleanPayload) },
githubToken,
accountType,
fetchOptions,
);
};

let resp = await postMessages(payload);
let errorText: string | null = null;

if (!resp.ok) {
const text = await resp.text();
errorText = await resp.text();
const retryPayload = buildNativeMessagesReasoningRetryPayload(
payload,
errorText,
);
if (retryPayload) {
console.warn(
"Retrying native /v1/messages with downgraded output_config.effort:",
payload.output_config?.effort,
"->",
retryPayload.output_config?.effort ?? "omitted",
);
resp = await postMessages(retryPayload);
errorText = resp.ok ? null : await resp.text();
}
}

if (!resp.ok) {
const text = errorText ?? await resp.text();
if (isContextWindowError(text)) return contextWindowErrorResponse(c);
return new Response(text, {
status: resp.status,
Expand Down
125 changes: 125 additions & 0 deletions src/routes/messages_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,131 @@ Deno.test("/v1/messages forwards Anthropic tool strict field on native messages"
);
});

Deno.test("/v1/messages retries native requests with downgraded output_config.effort", async () => {
const { apiKey } = await setupAppTest();

const upstreamBodies: Array<Record<string, unknown>> = [];

await withMockedFetch(async (request) => {
const url = new URL(request.url);

if (url.hostname === "update.code.visualstudio.com") {
return jsonResponse(["1.110.1"]);
}
if (url.pathname === "/copilot_internal/v2/token") {
return jsonResponse({
token: "copilot-access-token",
expires_at: 4102444800,
refresh_in: 3600,
});
}
if (url.pathname === "/models") {
return jsonResponse(copilotModels([
{ id: "claude-native-reasoning", supported_endpoints: ["/v1/messages"] },
]));
}
if (url.pathname === "/v1/messages") {
const body = JSON.parse(await request.text()) as Record<string, unknown>;
upstreamBodies.push(body);

const outputConfig = body.output_config as Record<string, unknown> | undefined;
if (outputConfig?.effort === "xhigh") {
return jsonResponse({
error: {
message:
'output_config.effort "xhigh" is not supported by model claude-native-reasoning; supported values: [medium]',
code: "invalid_reasoning_effort",
},
}, 400);
}

return sseResponse([
{
event: "message_start",
data: {
type: "message_start",
message: {
id: "msg_reasoning",
type: "message",
role: "assistant",
content: [],
model: "claude-native-reasoning",
stop_reason: null,
stop_sequence: null,
usage: { input_tokens: 10, output_tokens: 0 },
},
},
},
{
event: "content_block_start",
data: {
type: "content_block_start",
index: 0,
content_block: { type: "text", text: "" },
},
},
{
event: "content_block_delta",
data: {
type: "content_block_delta",
index: 0,
delta: { type: "text_delta", text: "ok" },
},
},
{
event: "content_block_stop",
data: { type: "content_block_stop", index: 0 },
},
{
event: "message_delta",
data: {
type: "message_delta",
delta: { stop_reason: "end_turn", stop_sequence: null },
usage: { output_tokens: 4 },
},
},
{ event: "message_stop", data: { type: "message_stop" } },
]);
}

throw new Error(`Unhandled fetch ${request.url}`);
}, async () => {
const response = await requestApp("/v1/messages", {
method: "POST",
headers: {
"content-type": "application/json",
"x-api-key": apiKey.key,
},
body: JSON.stringify({
model: "claude-native-reasoning",
max_tokens: 64,
stream: false,
thinking: { type: "enabled", budget_tokens: 4096 },
output_config: { effort: "xhigh" },
messages: [{ role: "user", content: "hello" }],
}),
});

assertEquals(response.status, 200);
const body = await response.json();
assertEquals(body.id, "msg_reasoning");
});

assertEquals(upstreamBodies.length, 2);
assertEquals(
((upstreamBodies[0].output_config as Record<string, unknown>).effort),
"xhigh",
);
assertEquals(
((upstreamBodies[1].output_config as Record<string, unknown>).effort),
"medium",
);
assertEquals(
((upstreamBodies[1].thinking as Record<string, unknown>).budget_tokens),
4096,
);
});

Deno.test("/v1/messages keeps strict Anthropic tools on native messages when both endpoints are available", async () => {
const { apiKey } = await setupAppTest();

Expand Down