From e273b3fe7560c28e60f66006f9a572c5d30be2e4 Mon Sep 17 00:00:00 2001
From: Razon <davidinfosec07@gmail.com>
Date: Fri, 12 Jun 2026 00:50:46 +0000
Subject: [PATCH 1/9] feat: add models CLI command to inspect Copilot model
 support

---
 package.json                       |   1 +
 src/main.ts                        |   3 +-
 src/models.ts                      | 120 +++++++++++++++++++++++++++++
 src/services/copilot/get-models.ts |   1 +
 4 files changed, 124 insertions(+), 1 deletion(-)
 create mode 100644 src/models.ts
diff --git a/package.json b/package.json
index a5adbb8e7..3b2744fa0 100644
--- a/package.json
+++ b/package.json
@@ -27,6 +27,7 @@
     "knip": "knip-bun",
     "lint": "eslint --cache",
     "lint:all": "eslint --cache .",
+    "models": "bun run ./src/main.ts models",
     "prepack": "bun run build",
     "prepare": "simple-git-hooks",
     "release": "bumpp && bun publish --access public",
diff --git a/src/main.ts b/src/main.ts
index 4f6ca784b..1aa5a5e9f 100644
--- a/src/main.ts
+++ b/src/main.ts
@@ -5,6 +5,7 @@ import { defineCommand, runMain } from "citty"
 import { auth } from "./auth"
 import { checkUsage } from "./check-usage"
 import { debug } from "./debug"
+import { models } from "./models"
 import { start } from "./start"
 
 const main = defineCommand({
@@ -13,7 +14,7 @@ const main = defineCommand({
     description:
       "A wrapper around GitHub Copilot API to make it OpenAI compatible, making it usable for other tools.",
   },
-  subCommands: { auth, start, "check-usage": checkUsage, debug },
+  subCommands: { auth, start, models, "check-usage": checkUsage, debug },
 })
 
 await runMain(main)
diff --git a/src/models.ts b/src/models.ts
new file mode 100644
index 000000000..52b0b3e4d
--- /dev/null
+++ b/src/models.ts
@@ -0,0 +1,120 @@
+#!/usr/bin/env node
+
+import { defineCommand } from "citty"
+import consola from "consola"
+
+import { ensurePaths } from "./lib/paths"
+import { initProxyFromEnv } from "./lib/proxy"
+import { state } from "./lib/state"
+import { setupGitHubToken } from "./lib/token"
+import { cacheVSCodeVersion } from "./lib/utils"
+import { getModels } from "./services/copilot/get-models"
+import { getCopilotToken } from "./services/github/get-copilot-token"
+
+interface RunModelsOptions {
+  verbose: boolean
+  accountType: string
+  githubToken?: string
+  showToken: boolean
+  proxyEnv: boolean
+  json: boolean
+}
+
+export async function runModels(options: RunModelsOptions): Promise<void> {
+  if (options.proxyEnv) {
+    initProxyFromEnv()
+  }
+
+  if (options.verbose) {
+    consola.level = 5
+    consola.info("Verbose logging enabled")
+  }
+
+  state.accountType = options.accountType
+  state.showToken = options.showToken
+
+  await ensurePaths()
+  await cacheVSCodeVersion()
+
+  if (options.githubToken) {
+    state.githubToken = options.githubToken
+    consola.info("Using provided GitHub token")
+  } else {
+    await setupGitHubToken()
+  }
+
+  const { token } = await getCopilotToken()
+  state.copilotToken = token
+
+  if (state.showToken) {
+    consola.info("Copilot token:", token)
+  }
+
+  const models = await getModels()
+
+  if (options.json) {
+    console.log(JSON.stringify(models, null, 2))
+    return
+  }
+
+  consola.box(
+    models.data
+      .map((model) => {
+        const endpoints = model.supported_endpoints?.join(", ") ?? "default"
+        return `${model.id} (${model.vendor}; ${endpoints})`
+      })
+      .join("\n"),
+  )
+}
+
+export const models = defineCommand({
+  meta: {
+    name: "models",
+    description: "List the current GitHub Copilot models available to the API",
+  },
+  args: {
+    verbose: {
+      alias: "v",
+      type: "boolean",
+      default: false,
+      description: "Enable verbose logging",
+    },
+    "account-type": {
+      alias: "a",
+      type: "string",
+      default: "individual",
+      description: "Account type to use (individual, business, enterprise)",
+    },
+    "github-token": {
+      alias: "g",
+      type: "string",
+      description:
+        "Provide GitHub token directly (must be generated using the `auth` subcommand)",
+    },
+    "show-token": {
+      type: "boolean",
+      default: false,
+      description: "Show GitHub and Copilot tokens on fetch",
+    },
+    "proxy-env": {
+      type: "boolean",
+      default: false,
+      description: "Initialize proxy from environment variables",
+    },
+    json: {
+      type: "boolean",
+      default: false,
+      description: "Output raw model response as JSON",
+    },
+  },
+  run({ args }) {
+    return runModels({
+      verbose: args.verbose,
+      accountType: args["account-type"],
+      githubToken: args["github-token"],
+      showToken: args["show-token"],
+      proxyEnv: args["proxy-env"],
+      json: args.json,
+    })
+  },
+})
diff --git a/src/services/copilot/get-models.ts b/src/services/copilot/get-models.ts
index 3cfa30af0..efd61f276 100644
--- a/src/services/copilot/get-models.ts
+++ b/src/services/copilot/get-models.ts
@@ -46,6 +46,7 @@ export interface Model {
   name: string
   object: string
   preview: boolean
+  supported_endpoints?: Array<string>
   vendor: string
   version: string
   policy?: {

From fb77680bbf71ffa438484546c49228268b58cb0b Mon Sep 17 00:00:00 2001
From: Razon <davidinfosec07@gmail.com>
Date: Fri, 12 Jun 2026 00:50:55 +0000
Subject: [PATCH 2/9] feat: route models requiring /responses endpoint through
 responses API

---
 src/routes/chat-completions/handler.ts   |  42 ++++
 src/services/copilot/create-responses.ts | 249 +++++++++++++++++++++++
 tests/create-responses.test.ts           | 139 +++++++++++++
 3 files changed, 430 insertions(+)
 create mode 100644 src/services/copilot/create-responses.ts
 create mode 100644 tests/create-responses.test.ts

diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts
index 04a5ae9ed..c17cccf3b 100644
--- a/src/routes/chat-completions/handler.ts
+++ b/src/routes/chat-completions/handler.ts
@@ -13,6 +13,14 @@ import {
   type ChatCompletionResponse,
   type ChatCompletionsPayload,
 } from "~/services/copilot/create-chat-completions"
+import {
+  createResponsesFromChatCompletions,
+  responseEventToChatChunks,
+  responseToChatCompletion,
+  shouldUseResponsesEndpoint,
+  type ResponseApiResponse,
+  type ResponsesStreamState,
+} from "~/services/copilot/create-responses"
 
 export async function handleCompletion(c: Context) {
   await checkRateLimit(state)
@@ -47,6 +55,36 @@ export async function handleCompletion(c: Context) {
     consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens))
   }
 
+  if (shouldUseResponsesEndpoint(selectedModel?.supported_endpoints)) {
+    const response = await createResponsesFromChatCompletions(payload)
+
+    if (isNonStreamingResponse(response)) {
+      consola.debug("Non-streaming response:", JSON.stringify(response))
+      return c.json(responseToChatCompletion(response))
+    }
+
+    consola.debug("Streaming response from responses endpoint")
+    return streamSSE(c, async (stream) => {
+      const streamState: ResponsesStreamState = {
+        id: "",
+        model: payload.model,
+        created: Math.floor(Date.now() / 1000),
+        roleSent: false,
+      }
+
+      for await (const event of response) {
+        if (!event.data) continue
+
+        const chunks = responseEventToChatChunks(event.data, streamState)
+        for (const chunk of chunks) {
+          await stream.writeSSE({
+            data: chunk === "[DONE]" ? chunk : JSON.stringify(chunk),
+          })
+        }
+      }
+    })
+  }
+
   const response = await createChatCompletions(payload)
 
   if (isNonStreaming(response)) {
@@ -66,3 +104,7 @@ export async function handleCompletion(c: Context) {
 const isNonStreaming = (
   response: Awaited<ReturnType<typeof createChatCompletions>>,
 ): response is ChatCompletionResponse => Object.hasOwn(response, "choices")
+
+const isNonStreamingResponse = (
+  response: Awaited<ReturnType<typeof createResponsesFromChatCompletions>>,
+): response is ResponseApiResponse => !(Symbol.asyncIterator in response)
diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
new file mode 100644
index 000000000..38f9b1785
--- /dev/null
+++ b/src/services/copilot/create-responses.ts
@@ -0,0 +1,249 @@
+import { events } from "fetch-event-stream"
+
+import { copilotBaseUrl, copilotHeaders } from "~/lib/api-config"
+import { HTTPError } from "~/lib/error"
+import { state } from "~/lib/state"
+
+import type {
+  ChatCompletionChunk,
+  ChatCompletionResponse,
+  ChatCompletionsPayload,
+  ContentPart,
+  Message,
+} from "./create-chat-completions"
+
+export const createResponsesFromChatCompletions = async (
+  payload: ChatCompletionsPayload,
+) => {
+  if (!state.copilotToken) throw new Error("Copilot token not found")
+
+  const response = await fetch(`${copilotBaseUrl(state)}/responses`, {
+    method: "POST",
+    headers: copilotHeaders(state),
+    body: JSON.stringify(toResponsesPayload(payload)),
+  })
+
+  if (!response.ok) {
+    throw new HTTPError("Failed to create response", response)
+  }
+
+  if (payload.stream) {
+    return events(response)
+  }
+
+  return (await response.json()) as ResponseApiResponse
+}
+
+export const shouldUseResponsesEndpoint = (endpoints?: Array<string>) =>
+  endpoints ?
+    endpoints.includes("/responses") && !endpoints.includes("/chat/completions")
+  : false
+
+export function responseToChatCompletion(
+  response: ResponseApiResponse,
+): ChatCompletionResponse {
+  return {
+    id: response.id,
+    object: "chat.completion",
+    created: response.created_at,
+    model: response.model,
+    choices: [
+      {
+        index: 0,
+        message: {
+          role: "assistant",
+          content: getResponseText(response),
+        },
+        logprobs: null,
+        finish_reason: response.status === "completed" ? "stop" : "length",
+      },
+    ],
+    usage: toChatUsage(response.usage),
+  }
+}
+
+export function responseEventToChatChunks(
+  eventData: string,
+  state: ResponsesStreamState,
+): Array<ChatCompletionChunk | "[DONE]"> {
+  const event = JSON.parse(eventData) as ResponseStreamEvent
+
+  if (event.type === "response.created" && event.response) {
+    state.id = event.response.id
+    state.model = event.response.model
+    state.created = event.response.created_at
+    return []
+  }
+
+  if (event.type === "response.output_text.delta" && event.delta) {
+    const role = state.roleSent ? undefined : "assistant"
+    state.roleSent = true
+
+    return [
+      {
+        id: state.id,
+        object: "chat.completion.chunk",
+        created: state.created,
+        model: state.model,
+        choices: [
+          {
+            index: 0,
+            delta: {
+              role,
+              content: event.delta,
+            },
+            finish_reason: null,
+            logprobs: null,
+          },
+        ],
+      },
+    ]
+  }
+
+  if (event.type === "response.completed" && event.response) {
+    state.id = event.response.id
+    state.model = event.response.model
+    state.created = event.response.created_at
+    state.roleSent = true
+
+    return [
+      {
+        id: event.response.id,
+        object: "chat.completion.chunk",
+        created: event.response.created_at,
+        model: event.response.model,
+        choices: [
+          {
+            index: 0,
+            delta: {
+              content: null,
+            },
+            finish_reason: "stop",
+            logprobs: null,
+          },
+        ],
+        usage: toChatUsage(event.response.usage),
+      },
+      "[DONE]",
+    ]
+  }
+
+  return []
+}
+
+export interface ResponsesStreamState {
+  created: number
+  id: string
+  model: string
+  roleSent: boolean
+}
+
+interface ResponsesPayload {
+  input: Array<ResponseInputMessage>
+  max_output_tokens?: number | null
+  model: string
+  stream?: boolean | null
+  temperature?: number | null
+  top_p?: number | null
+}
+
+interface ResponseInputMessage {
+  content: string
+  role: "assistant" | "developer" | "system" | "user"
+}
+
+export interface ResponseApiResponse {
+  created_at: number
+  error?: unknown
+  id: string
+  model: string
+  object: "response"
+  output?: Array<ResponseOutputItem>
+  status: string
+  usage?: ResponseUsage | null
+}
+
+interface ResponseOutputItem {
+  content?: Array<ResponseContentPart>
+}
+
+interface ResponseContentPart {
+  text?: string
+  type: string
+}
+
+interface ResponseUsage {
+  input_tokens?: number
+  output_tokens?: number
+  total_tokens?: number
+}
+
+interface ResponseStreamEvent {
+  delta?: string
+  response?: ResponseApiResponse
+  type: string
+}
+
+function toResponsesPayload(payload: ChatCompletionsPayload): ResponsesPayload {
+  return {
+    model: payload.model,
+    input: payload.messages.flatMap((message) =>
+      toResponseInputMessage(message),
+    ),
+    max_output_tokens: payload.max_tokens,
+    stream: payload.stream,
+    temperature: payload.temperature,
+    top_p: payload.top_p,
+  }
+}
+
+function toResponseInputMessage(message: Message): Array<ResponseInputMessage> {
+  if (message.role === "tool") {
+    return [
+      {
+        role: "user",
+        content: contentToText(message.content),
+      },
+    ]
+  }
+
+  return [
+    {
+      role: message.role,
+      content: contentToText(message.content),
+    },
+  ]
+}
+
+function contentToText(content: Message["content"]): string {
+  if (typeof content === "string") return content
+  if (!content) return ""
+
+  return content.map((part) => contentPartToText(part)).join("\n")
+}
+
+function contentPartToText(part: ContentPart): string {
+  if (part.type === "text") return part.text
+
+  return `[image: ${part.image_url.url}]`
+}
+
+function getResponseText(response: ResponseApiResponse): string {
+  return (
+    response.output
+      ?.flatMap((item) => item.content ?? [])
+      .filter((part) => part.type === "output_text")
+      .map((part) => part.text ?? "")
+      .join("") ?? ""
+  )
+}
+
+function toChatUsage(usage?: ResponseUsage | null) {
+  if (!usage) return undefined
+
+  return {
+    prompt_tokens: usage.input_tokens ?? 0,
+    completion_tokens: usage.output_tokens ?? 0,
+    total_tokens: usage.total_tokens ?? 0,
+  }
+}
diff --git a/tests/create-responses.test.ts b/tests/create-responses.test.ts
new file mode 100644
index 000000000..819f1f655
--- /dev/null
+++ b/tests/create-responses.test.ts
@@ -0,0 +1,139 @@
+import { describe, expect, test } from "bun:test"
+
+import {
+  responseEventToChatChunks,
+  responseToChatCompletion,
+  shouldUseResponsesEndpoint,
+  type ResponseApiResponse,
+  type ResponsesStreamState,
+} from "~/services/copilot/create-responses"
+
+describe("Responses API adapter", () => {
+  test("uses responses only for models that do not support chat completions", () => {
+    expect(shouldUseResponsesEndpoint(["/responses"])).toBe(true)
+    expect(
+      shouldUseResponsesEndpoint(["/responses", "/chat/completions"]),
+    ).toBe(false)
+    expect(shouldUseResponsesEndpoint(["/chat/completions"])).toBe(false)
+    expect(shouldUseResponsesEndpoint()).toBe(false)
+  })
+
+  test("converts a non-streaming response to a chat completion", () => {
+    const response: ResponseApiResponse = {
+      id: "resp_123",
+      object: "response",
+      created_at: 1700000000,
+      model: "gpt-test",
+      status: "completed",
+      output: [
+        {
+          content: [
+            { type: "output_text", text: "Hello" },
+            { type: "output_text", text: " there" },
+          ],
+        },
+      ],
+      usage: {
+        input_tokens: 2,
+        output_tokens: 3,
+        total_tokens: 5,
+      },
+    }
+
+    const chatCompletion = responseToChatCompletion(response)
+
+    expect(chatCompletion.id).toBe("resp_123")
+    expect(chatCompletion.object).toBe("chat.completion")
+    expect(chatCompletion.choices[0].message.content).toBe("Hello there")
+    expect(chatCompletion.choices[0].finish_reason).toBe("stop")
+    expect(chatCompletion.usage).toEqual({
+      prompt_tokens: 2,
+      completion_tokens: 3,
+      total_tokens: 5,
+    })
+  })
+
+  test("converts response stream events to chat completion chunks", () => {
+    const streamState: ResponsesStreamState = {
+      id: "",
+      model: "gpt-test",
+      created: 0,
+      roleSent: false,
+    }
+
+    expect(
+      responseEventToChatChunks(
+        JSON.stringify({
+          type: "response.created",
+          response: {
+            id: "resp_123",
+            object: "response",
+            created_at: 1700000000,
+            model: "gpt-test",
+            status: "in_progress",
+          },
+        }),
+        streamState,
+      ),
+    ).toEqual([])
+
+    const deltaChunks = responseEventToChatChunks(
+      JSON.stringify({
+        type: "response.output_text.delta",
+        delta: "Hello",
+      }),
+      streamState,
+    )
+
+    expect(deltaChunks).toHaveLength(1)
+    expect(deltaChunks[0]).toMatchObject({
+      id: "resp_123",
+      object: "chat.completion.chunk",
+      choices: [
+        {
+          delta: {
+            role: "assistant",
+            content: "Hello",
+          },
+          finish_reason: null,
+        },
+      ],
+    })
+
+    const completedChunks = responseEventToChatChunks(
+      JSON.stringify({
+        type: "response.completed",
+        response: {
+          id: "resp_123",
+          object: "response",
+          created_at: 1700000000,
+          model: "gpt-test",
+          status: "completed",
+          usage: {
+            input_tokens: 2,
+            output_tokens: 3,
+            total_tokens: 5,
+          },
+        },
+      }),
+      streamState,
+    )
+
+    expect(completedChunks.at(-1)).toBe("[DONE]")
+    expect(completedChunks[0]).toMatchObject({
+      choices: [
+        {
+          delta: {
+            content: null,
+          },
+          finish_reason: "stop",
+        },
+      ],
+      usage: {
+        prompt_tokens: 2,
+        completion_tokens: 3,
+        total_tokens: 5,
+      },
+    })
+  })
+})

From 3d24eef6b33253090d2b7e1e8912c2e9d8d51613 Mon Sep 17 00:00:00 2001
From: Razon <davidinfosec07@gmail.com>
Date: Fri, 12 Jun 2026 00:50:55 +0000
Subject: [PATCH 3/9] fix: fetch VS Code version lazily instead of at import
 time

---
 src/services/get-vscode-version.ts | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/services/get-vscode-version.ts b/src/services/get-vscode-version.ts
index 6078f09b5..51ea5e24f 100644
--- a/src/services/get-vscode-version.ts
+++ b/src/services/get-vscode-version.ts
@@ -29,5 +29,3 @@ export async function getVSCodeVersion() {
     clearTimeout(timeout)
   }
 }
-
-await getVSCodeVersion()

From 44c74c7173cb09a7100bc0b3b6a271946132eb61 Mon Sep 17 00:00:00 2001
From: Razon <davidinfosec07@gmail.com>
Date: Fri, 12 Jun 2026 00:51:03 +0000
Subject: [PATCH 4/9] feat: bind to 127.0.0.1 by default, add --host flag

Server is now local-only unless --host is explicitly passed.
Docker entrypoint passes --host 0.0.0.0 so published ports keep working.
---
 entrypoint.sh |  4 +++-
 src/start.ts  | 18 +++++++++++++++++-
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/entrypoint.sh b/entrypoint.sh
index dfe63c902..7d5cc4fdc 100644
--- a/entrypoint.sh
+++ b/entrypoint.sh
@@ -4,6 +4,8 @@ if [ "$1" = "--auth" ]; then
   exec bun run dist/main.js auth
 else
   # Default command
-  exec bun run dist/main.js start -g "$GH_TOKEN" "$@"
+  # Bind to 0.0.0.0 inside the container so published ports work;
+  # restrict exposure on the host side (e.g. -p 127.0.0.1:4141:4141).
+  exec bun run dist/main.js start --host 0.0.0.0 -g "$GH_TOKEN" "$@"
 fi
 
diff --git a/src/start.ts b/src/start.ts
index 14abbbdff..1e8a48320 100644
--- a/src/start.ts
+++ b/src/start.ts
@@ -16,6 +16,7 @@ import { server } from "./server"
 
 interface RunServerOptions {
   port: number
+  host: string
   verbose: boolean
   accountType: string
   manual: boolean
@@ -64,7 +65,14 @@ export async function runServer(options: RunServerOptions): Promise<void> {
     `Available models: \n${state.models?.data.map((model) => `- ${model.id}`).join("\n")}`,
   )
 
-  const serverUrl = `http://localhost:${options.port}`
+  const displayHost = options.host === "0.0.0.0" ? "localhost" : options.host
+  const serverUrl = `http://${displayHost}:${options.port}`
+
+  if (options.host !== "127.0.0.1" && options.host !== "localhost") {
+    consola.warn(
+      `Server will listen on ${options.host} and may be reachable from other machines. Use the default host (127.0.0.1) for local-only access.`,
+    )
+  }
 
   if (options.claudeCode) {
     invariant(state.models, "Models should be loaded by now")
@@ -116,6 +124,7 @@ export async function runServer(options: RunServerOptions): Promise<void> {
 
   serve({
     fetch: server.fetch as ServerHandler,
+    hostname: options.host,
     port: options.port,
   })
 }
@@ -132,6 +141,12 @@ export const start = defineCommand({
       default: "4141",
       description: "Port to listen on",
     },
+    host: {
+      type: "string",
+      default: "127.0.0.1",
+      description:
+        "Host to bind to. Defaults to 127.0.0.1 (local-only). Use 0.0.0.0 to expose to the network (e.g. inside Docker)",
+    },
     verbose: {
       alias: "v",
       type: "boolean",
@@ -193,6 +208,7 @@ export const start = defineCommand({
 
     return runServer({
       port: Number.parseInt(args.port, 10),
+      host: args.host,
       verbose: args.verbose,
       accountType: args["account-type"],
       manual: args.manual,

From dec36948cc4f7c06c6d1242fb28db16a9ef984d9 Mon Sep 17 00:00:00 2001
From: Razon <davidinfosec07@gmail.com>
Date: Fri, 12 Jun 2026 00:51:03 +0000
Subject: [PATCH 5/9] test: add Anthropic edge-case tests; tolerate invalid
 tool call JSON

Covers images, tool_result ordering, mixed text/tool streams,
invalid tool JSON, and cache token accounting.
---
 src/routes/messages/non-stream-translation.ts |  16 +-
 tests/anthropic-edge-cases.test.ts            | 494 ++++++++++++++++++
 2 files changed, 509 insertions(+), 1 deletion(-)
 create mode 100644 tests/anthropic-edge-cases.test.ts

diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts
index dc41e6382..3f3939b60 100644
--- a/src/routes/messages/non-stream-translation.ts
+++ b/src/routes/messages/non-stream-translation.ts
@@ -352,6 +352,20 @@ function getAnthropicToolUseBlocks(
     type: "tool_use",
     id: toolCall.id,
     name: toolCall.function.name,
-    input: JSON.parse(toolCall.function.arguments) as Record<string, unknown>,
+    input: safeParseToolInput(toolCall.function.arguments),
   }))
 }
+
+function safeParseToolInput(args: string): Record<string, unknown> {
+  try {
+    const parsed: unknown = JSON.parse(args)
+    if (parsed !== null && typeof parsed === "object") {
+      return parsed as Record<string, unknown>
+    }
+    return {}
+  } catch {
+    // Models occasionally emit malformed JSON for tool arguments.
+    // Return an empty input instead of crashing the whole response.
+    return {}
+  }
+}
diff --git a/tests/anthropic-edge-cases.test.ts b/tests/anthropic-edge-cases.test.ts
new file mode 100644
index 000000000..aa4f35dc3
--- /dev/null
+++ b/tests/anthropic-edge-cases.test.ts
@@ -0,0 +1,494 @@
+import { describe, test, expect } from "bun:test"
+
+import type {
+  AnthropicMessagesPayload,
+  AnthropicStreamState,
+  AnthropicTextBlock,
+  AnthropicToolUseBlock,
+} from "~/routes/messages/anthropic-types"
+import type {
+  ChatCompletionChunk,
+  ChatCompletionResponse,
+  ContentPart,
+} from "~/services/copilot/create-chat-completions"
+
+import {
+  translateToAnthropic,
+  translateToOpenAI,
+} from "../src/routes/messages/non-stream-translation"
+import { translateChunkToAnthropicEvents } from "../src/routes/messages/stream-translation"
+
+function freshStreamState(): AnthropicStreamState {
+  return {
+    messageStartSent: false,
+    contentBlockIndex: 0,
+    contentBlockOpen: false,
+    toolCalls: {},
+  }
+}
+
+function makeChunk(partial: Partial<ChatCompletionChunk>): ChatCompletionChunk {
+  return {
+    id: "chunk-1",
+    object: "chat.completion.chunk",
+    created: 0,
+    model: "gpt-4o",
+    choices: [],
+    ...partial,
+  }
+}
+
+describe("Anthropic request edge cases", () => {
+  test("translates image blocks into image_url content parts", () => {
+    const payload: AnthropicMessagesPayload = {
+      model: "gpt-4o",
+      max_tokens: 100,
+      messages: [
+        {
+          role: "user",
+          content: [
+            { type: "text", text: "What is in this image?" },
+            {
+              type: "image",
+              source: {
+                type: "base64",
+                media_type: "image/png",
+                data: "iVBORw0KGgo=",
+              },
+            },
+          ],
+        },
+      ],
+    }
+
+    const result = translateToOpenAI(payload)
+    const userMessage = result.messages.find((m) => m.role === "user")
+    expect(Array.isArray(userMessage?.content)).toBe(true)
+
+    const parts = userMessage?.content as Array<ContentPart>
+    const imagePart = parts.find((p) => p.type === "image_url")
+    expect(imagePart).toBeDefined()
+    if (imagePart?.type === "image_url") {
+      expect(imagePart.image_url.url).toBe("data:image/png;base64,iVBORw0KGgo=")
+    }
+    const textPart = parts.find((p) => p.type === "text")
+    expect(textPart).toBeDefined()
+  })
+
+  test("translates tool_result blocks into tool role messages before user content", () => {
+    const payload: AnthropicMessagesPayload = {
+      model: "gpt-4o",
+      max_tokens: 100,
+      messages: [
+        { role: "user", content: "What's the weather?" },
+        {
+          role: "assistant",
+          content: [
+            {
+              type: "tool_use",
+              id: "toolu_1",
+              name: "get_weather",
+              input: { location: "Boston" },
+            },
+          ],
+        },
+        {
+          role: "user",
+          content: [
+            { type: "text", text: "Thanks, summarize that." },
+            {
+              type: "tool_result",
+              tool_use_id: "toolu_1",
+              content: "Sunny, 75F",
+            },
+          ],
+        },
+      ],
+    }
+
+    const result = translateToOpenAI(payload)
+    const roles = result.messages.map((m) => m.role)
+
+    // tool_result must directly follow the assistant tool_call message
+    const assistantIndex = roles.indexOf("assistant")
+    expect(roles[assistantIndex + 1]).toBe("tool")
+
+    const toolMessage = result.messages[assistantIndex + 1]
+    expect(toolMessage.tool_call_id).toBe("toolu_1")
+    expect(toolMessage.content).toBe("Sunny, 75F")
+
+    // remaining user text still arrives after the tool message
+    expect(roles[assistantIndex + 2]).toBe("user")
+  })
+
+  test("handles multiple tool_result blocks in a single user message", () => {
+    const payload: AnthropicMessagesPayload = {
+      model: "gpt-4o",
+      max_tokens: 100,
+      messages: [
+        {
+          role: "assistant",
+          content: [
+            { type: "tool_use", id: "toolu_a", name: "a", input: {} },
+            { type: "tool_use", id: "toolu_b", name: "b", input: {} },
+          ],
+        },
+        {
+          role: "user",
+          content: [
+            { type: "tool_result", tool_use_id: "toolu_a", content: "A" },
+            { type: "tool_result", tool_use_id: "toolu_b", content: "B" },
+          ],
+        },
+      ],
+    }
+
+    const result = translateToOpenAI(payload)
+    const toolMessages = result.messages.filter((m) => m.role === "tool")
+    expect(toolMessages).toHaveLength(2)
+    expect(toolMessages.map((m) => m.tool_call_id)).toEqual([
+      "toolu_a",
+      "toolu_b",
+    ])
+  })
+})
+
+function makeResponse(
+  overrides: Partial<ChatCompletionResponse>,
+): ChatCompletionResponse {
+  return {
+    id: "resp-1",
+    object: "chat.completion",
+    created: 0,
+    model: "gpt-4o",
+    choices: [],
+    ...overrides,
+  } as ChatCompletionResponse
+}
+
+describe("Anthropic response edge cases", () => {
+  test("does not crash on invalid tool call JSON and falls back to empty input", () => {
+    const response = makeResponse({
+      choices: [
+        {
+          index: 0,
+          logprobs: null,
+          finish_reason: "tool_calls",
+          message: {
+            role: "assistant",
+            content: null,
+            tool_calls: [
+              {
+                id: "call_bad",
+                type: "function",
+                function: { name: "broken_tool", arguments: "{not json" },
+              },
+            ],
+          },
+        },
+      ],
+    })
+
+    const result = translateToAnthropic(response)
+    const toolUse = result.content.find(
+      (b): b is AnthropicToolUseBlock => b.type === "tool_use",
+    )
+    expect(toolUse).toBeDefined()
+    expect(toolUse?.input).toEqual({})
+    expect(result.stop_reason).toBe("tool_use")
+  })
+
+  test("falls back to empty input when tool arguments parse to a non-object", () => {
+    const response = makeResponse({
+      choices: [
+        {
+          index: 0,
+          logprobs: null,
+          finish_reason: "tool_calls",
+          message: {
+            role: "assistant",
+            content: null,
+            tool_calls: [
+              {
+                id: "call_scalar",
+                type: "function",
+                function: { name: "scalar_tool", arguments: "42" },
+              },
+            ],
+          },
+        },
+      ],
+    })
+
+    const result = translateToAnthropic(response)
+    const toolUse = result.content.find(
+      (b): b is AnthropicToolUseBlock => b.type === "tool_use",
+    )
+    expect(toolUse?.input).toEqual({})
+  })
+
+  test("subtracts cached tokens from input_tokens and reports cache_read_input_tokens", () => {
+    const response = makeResponse({
+      choices: [
+        {
+          index: 0,
+          logprobs: null,
+          finish_reason: "stop",
+          message: { role: "assistant", content: "Hi" },
+        },
+      ],
+      usage: {
+        prompt_tokens: 1000,
+        completion_tokens: 5,
+        total_tokens: 1005,
+        prompt_tokens_details: { cached_tokens: 800 },
+      },
+    })
+
+    const result = translateToAnthropic(response)
+    expect(result.usage.input_tokens).toBe(200)
+    expect(result.usage.cache_read_input_tokens).toBe(800)
+    expect(result.usage.output_tokens).toBe(5)
+  })
+
+  test("omits cache_read_input_tokens when no cached token details exist", () => {
+    const response = makeResponse({
+      choices: [
+        {
+          index: 0,
+          logprobs: null,
+          finish_reason: "stop",
+          message: { role: "assistant", content: "Hi" },
+        },
+      ],
+      usage: { prompt_tokens: 100, completion_tokens: 5, total_tokens: 105 },
+    })
+
+    const result = translateToAnthropic(response)
+    expect(result.usage.input_tokens).toBe(100)
+    expect("cache_read_input_tokens" in result.usage).toBe(false)
+  })
+})
+
+describe("Anthropic stream translation edge cases", () => {
+  test("closes a text block before opening a tool block, and vice versa", () => {
+    const state = freshStreamState()
+
+    // 1. text chunk
+    const textEvents = translateChunkToAnthropicEvents(
+      makeChunk({
+        choices: [
+          {
+            index: 0,
+            logprobs: null,
+            finish_reason: null,
+            delta: { role: "assistant", content: "Let me check." },
+          },
+        ],
+      }),
+      state,
+    )
+    expect(textEvents.map((e) => e.type)).toEqual([
+      "message_start",
+      "content_block_start",
+      "content_block_delta",
+    ])
+
+    // 2. tool call chunk: must close the text block first
+    const toolEvents = translateChunkToAnthropicEvents(
+      makeChunk({
+        choices: [
+          {
+            index: 0,
+            logprobs: null,
+            finish_reason: null,
+            delta: {
+              tool_calls: [
+                {
+                  index: 0,
+                  id: "call_1",
+                  type: "function",
+                  function: { name: "get_weather", arguments: "" },
+                },
+              ],
+            },
+          },
+        ],
+      }),
+      state,
+    )
+    expect(toolEvents.map((e) => e.type)).toEqual([
+      "content_block_stop",
+      "content_block_start",
+    ])
+    const blockStart = toolEvents.find((e) => e.type === "content_block_start")
+    expect(
+      blockStart?.type === "content_block_start"
+        && blockStart.content_block.type === "tool_use"
+        && blockStart.content_block.name,
+    ).toBe("get_weather")
+
+    // 3. text again after the tool block: closes tool block, opens new text block
+    const backToTextEvents = translateChunkToAnthropicEvents(
+      makeChunk({
+        choices: [
+          {
+            index: 0,
+            logprobs: null,
+            finish_reason: null,
+            delta: { content: "Done." },
+          },
+        ],
+      }),
+      state,
+    )
+    expect(backToTextEvents.map((e) => e.type)).toEqual([
+      "content_block_stop",
+      "content_block_start",
+      "content_block_delta",
+    ])
+
+    // Block indices must be strictly increasing across the three blocks
+    expect(state.contentBlockIndex).toBe(2)
+  })
+
+  test("streams partial tool JSON as input_json_delta tied to the right block", () => {
+    const state = freshStreamState()
+
+    translateChunkToAnthropicEvents(
+      makeChunk({
+        choices: [
+          {
+            index: 0,
+            logprobs: null,
+            finish_reason: null,
+            delta: {
+              role: "assistant",
+              tool_calls: [
+                {
+                  index: 0,
+                  id: "call_1",
+                  type: "function",
+                  function: { name: "get_weather", arguments: "" },
+                },
+              ],
+            },
+          },
+        ],
+      }),
+      state,
+    )
+
+    const argEvents = translateChunkToAnthropicEvents(
+      makeChunk({
+        choices: [
+          {
+            index: 0,
+            logprobs: null,
+            finish_reason: null,
+            delta: {
+              tool_calls: [
+                { index: 0, function: { arguments: '{"location":"Bo' } },
+              ],
+            },
+          },
+        ],
+      }),
+      state,
+    )
+
+    expect(argEvents).toHaveLength(1)
+    const deltaEvent = argEvents[0]
+    expect(
+      deltaEvent.type === "content_block_delta"
+        && deltaEvent.delta.type === "input_json_delta"
+        && deltaEvent.delta.partial_json,
+    ).toBe('{"location":"Bo')
+  })
+
+  test("ignores argument deltas for unknown tool call indices", () => {
+    const state = freshStreamState()
+    state.messageStartSent = true
+
+    const events = translateChunkToAnthropicEvents(
+      makeChunk({
+        choices: [
+          {
+            index: 0,
+            logprobs: null,
+            finish_reason: null,
+            delta: {
+              tool_calls: [{ index: 7, function: { arguments: '{"x":1}' } }],
+            },
+          },
+        ],
+      }),
+      state,
+    )
+
+    expect(events).toHaveLength(0)
+  })
+
+  test("reports cache-aware usage in message_delta on finish", () => {
+    const state = freshStreamState()
+    state.messageStartSent = true
+    state.contentBlockOpen = true
+
+    const events = translateChunkToAnthropicEvents(
+      makeChunk({
+        choices: [
+          {
+            index: 0,
+            logprobs: null,
+            finish_reason: "stop",
+            delta: {},
+          },
+        ],
+        usage: {
+          prompt_tokens: 500,
+          completion_tokens: 20,
+          total_tokens: 520,
+          prompt_tokens_details: { cached_tokens: 300 },
+        },
+      }),
+      state,
+    )
+
+    const messageDelta = events.find((e) => e.type === "message_delta")
+    expect(messageDelta?.type).toBe("message_delta")
+    if (messageDelta?.type === "message_delta") {
+      expect(messageDelta.usage?.input_tokens).toBe(200)
+      expect(messageDelta.usage?.cache_read_input_tokens).toBe(300)
+      expect(messageDelta.usage?.output_tokens).toBe(20)
+    }
+    expect(events.at(-1)?.type).toBe("message_stop")
+  })
+
+  test("handles empty choices chunks without emitting events", () => {
+    const state = freshStreamState()
+    const events = translateChunkToAnthropicEvents(
+      makeChunk({ choices: [] }),
+      state,
+    )
+    expect(events).toHaveLength(0)
+    expect(state.messageStartSent).toBe(false)
+  })
+})
+
+describe("system prompt translation", () => {
+  test("joins array-form system prompts into a single system message", () => {
+    const payload: AnthropicMessagesPayload = {
+      model: "gpt-4o",
+      max_tokens: 10,
+      system: [
+        { type: "text", text: "You are helpful." },
+        { type: "text", text: "Be concise." },
+      ] as Array<AnthropicTextBlock>,
+      messages: [{ role: "user", content: "Hi" }],
+    }
+
+    const result = translateToOpenAI(payload)
+    expect(result.messages[0].role).toBe("system")
+    expect(result.messages[0].content).toBe("You are helpful.\n\nBe concise.")
+  })
+})

From 58249ba0778f919bdd0ac4e526b0aba196f573b0 Mon Sep 17 00:00:00 2001
From: Razon <davidinfosec07@gmail.com>
Date: Fri, 12 Jun 2026 00:51:10 +0000
Subject: [PATCH 6/9] docs+chore: run Docker as non-root, fix docs drift,
 deploy Pages from master

---
 .github/workflows/deploy-pages.yml |  4 ++--
 AGENTS.md                          |  4 ++--
 Dockerfile                         |  5 +++--
 README.md                          | 31 +++++++++++++++++++++++++-----
 4 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/deploy-pages.yml b/.github/workflows/deploy-pages.yml
index e5b05974c..81cef85c7 100644
--- a/.github/workflows/deploy-pages.yml
+++ b/.github/workflows/deploy-pages.yml
@@ -2,7 +2,7 @@ name: Deploy to GitHub Pages
 
 on:
   push:
-    branches: [ "main" ]
+    branches: [master]
   workflow_dispatch:
 
 # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
@@ -37,4 +37,4 @@ jobs:
           
       - name: Deploy to GitHub Pages
         id: deployment
-        uses: actions/deploy-pages@v4
\ No newline at end of file
+        uses: actions/deploy-pages@v4
diff --git a/AGENTS.md b/AGENTS.md
index d509d5b6b..ad2e36816 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -3,7 +3,7 @@
 ## Build, Lint, and Test Commands
 
 - **Build:**  
-  `bun run build` (uses tsup)
+  `bun run build` (uses tsdown)
 - **Dev:**  
   `bun run dev`
 - **Lint:**  
@@ -13,7 +13,7 @@
 - **Test all:**  
    `bun test`
 - **Test single file:**  
-   `bun test tests/claude-request.test.ts`
+   `bun test tests/anthropic-request.test.ts`
 - **Start (prod):**  
   `bun run start`
 
diff --git a/Dockerfile b/Dockerfile
index 1265220ef..1cbcabe8d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -13,13 +13,14 @@ WORKDIR /app
 COPY ./package.json ./bun.lock ./
 RUN bun install --frozen-lockfile --production --ignore-scripts --no-cache
 
-COPY --from=builder /app/dist ./dist
+COPY --from=builder --chown=bun:bun /app/dist ./dist
 
 EXPOSE 4141
 
 HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
   CMD wget --spider -q http://localhost:4141/ || exit 1
 
-COPY entrypoint.sh /entrypoint.sh
+COPY --chown=bun:bun entrypoint.sh /entrypoint.sh
 RUN chmod +x /entrypoint.sh
+USER bun
 ENTRYPOINT ["/entrypoint.sh"]
diff --git a/README.md b/README.md
index 0d36c13c9..307995dad 100644
--- a/README.md
+++ b/README.md
@@ -74,25 +74,25 @@ mkdir -p ./copilot-data
 # Run the container with a bind mount to persist the token
 # This ensures your authentication survives container restarts
 
-docker run -p 4141:4141 -v $(pwd)/copilot-data:/root/.local/share/copilot-api copilot-api
+docker run -p 4141:4141 -v $(pwd)/copilot-data:/home/bun/.local/share/copilot-api copilot-api
 ```
 
 > **Note:**
-> The GitHub token and related data will be stored in `copilot-data` on your host. This is mapped to `/root/.local/share/copilot-api` inside the container, ensuring persistence across restarts.
+> The GitHub token and related data will be stored in `copilot-data` on your host. This is mapped to `/home/bun/.local/share/copilot-api` inside the container, ensuring persistence across restarts.
 
 ### Docker with Environment Variables
 
 You can pass the GitHub token directly to the container using environment variables:
 
 ```sh
-# Build with GitHub token
-docker build --build-arg GH_TOKEN=your_github_token_here -t copilot-api .
+# Build the image
+docker build -t copilot-api .
 
 # Run with GitHub token
 docker run -p 4141:4141 -e GH_TOKEN=your_github_token_here copilot-api
 
 # Run with additional options
-docker run -p 4141:4141 -e GH_TOKEN=your_token copilot-api start --verbose --port 4141
+docker run -p 4141:4141 -e GH_TOKEN=your_token copilot-api --verbose --port 4141
 ```
 
 ### Docker Compose Example
@@ -142,6 +142,7 @@ Copilot API now uses a subcommand structure with these main commands:
 
 - `start`: Start the Copilot API server. This command will also handle authentication if needed.
 - `auth`: Run GitHub authentication flow without starting the server. This is typically used if you need to generate a token for use with the `--github-token` option, especially in non-interactive environments.
+- `models`: List the current GitHub Copilot models available to the API. This is useful for non-interactive deployments where you want to inspect model support without starting the server.
 - `check-usage`: Show your current GitHub Copilot usage and quota information directly in the terminal (no server required).
 - `debug`: Display diagnostic information including version, runtime details, file paths, and authentication status. Useful for troubleshooting and support.
 
@@ -154,6 +155,7 @@ The following command line options are available for the `start` command:
 | Option         | Description                                                                   | Default    | Alias |
 | -------------- | ----------------------------------------------------------------------------- | ---------- | ----- |
 | --port         | Port to listen on                                                             | 4141       | -p    |
+| --host         | Host to bind to. Defaults to local-only. Use 0.0.0.0 to expose to the network (e.g. inside Docker) | 127.0.0.1  | none  |
 | --verbose      | Enable verbose logging                                                        | false      | -v    |
 | --account-type | Account type to use (individual, business, enterprise)                        | individual | -a    |
 | --manual       | Enable manual request approval                                                | false      | none  |
@@ -171,6 +173,17 @@ The following command line options are available for the `start` command:
 | --verbose    | Enable verbose logging    | false   | -v    |
 | --show-token | Show GitHub token on auth | false   | none  |
 
+### Models Command Options
+
+| Option         | Description                                                                   | Default    | Alias |
+| -------------- | ----------------------------------------------------------------------------- | ---------- | ----- |
+| --verbose      | Enable verbose logging                                                        | false      | -v    |
+| --account-type | Account type to use (individual, business, enterprise)                        | individual | -a    |
+| --github-token | Provide GitHub token directly (must be generated using the `auth` subcommand) | none       | -g    |
+| --show-token   | Show GitHub and Copilot tokens on fetch                                       | false      | none  |
+| --proxy-env    | Initialize proxy from environment variables                                   | false      | none  |
+| --json         | Output raw model response as JSON                                             | false      | none  |
+
 ### Debug Command Options
 
 | Option | Description               | Default | Alias |
@@ -342,6 +355,12 @@ bun run dev
 bun run start
 ```
 
+### List Available Models
+
+```sh
+bun run models
+```
+
 ## Usage Tips
 
 - To avoid hitting GitHub Copilot's rate limits, you can use the following flags:
@@ -349,3 +368,5 @@ bun run start
   - `--rate-limit <seconds>`: Enforces a minimum time interval between requests. For example, `copilot-api start --rate-limit 30` will ensure there's at least a 30-second gap between requests.
   - `--wait`: Use this with `--rate-limit`. It makes the server wait for the cooldown period to end instead of rejecting the request with an error. This is useful for clients that don't automatically retry on rate limit errors.
 - If you have a GitHub business or enterprise plan account with Copilot, use the `--account-type` flag (e.g., `--account-type business`). See the [official documentation](https://docs.github.com/en/enterprise-cloud@latest/copilot/managing-copilot/managing-github-copilot-in-your-organization/managing-access-to-github-copilot-in-your-organization/managing-github-copilot-access-to-your-organizations-network#configuring-copilot-subscription-based-network-routing-for-your-enterprise-or-organization) for more details.
+- This server is designed for localhost or trusted local-network use. It is intentionally single-account: all clients share the same GitHub/Copilot token and model cache.
+- The server binds to `127.0.0.1` by default, so it is not reachable from other machines unless you explicitly pass `--host` (for example `--host 0.0.0.0`). The Docker entrypoint passes `--host 0.0.0.0` so published ports work; restrict exposure on the host side with `-p 127.0.0.1:4141:4141` if you want it local-only.

From e8f316cba3544f14e4df450e8bec071a5ddfe8a0 Mon Sep 17 00:00:00 2001
From: Razon <davidinfosec07@gmail.com>
Date: Sat, 13 Jun 2026 01:43:41 +0000
Subject: [PATCH 7/9] fix: harden /responses stream adapter

- Skip malformed or non-JSON SSE events instead of crashing the stream
- Warn when tool definitions/calls are dropped for /responses-only models
---
 src/services/copilot/create-responses.ts | 20 ++++++++++++-
 tests/create-responses.test.ts           | 37 ++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
index 38f9b1785..032356698 100644
--- a/src/services/copilot/create-responses.ts
+++ b/src/services/copilot/create-responses.ts
@@ -1,3 +1,4 @@
+import consola from "consola"
 import { events } from "fetch-event-stream"
 
 import { copilotBaseUrl, copilotHeaders } from "~/lib/api-config"
@@ -66,7 +67,15 @@ export function responseEventToChatChunks(
   eventData: string,
   state: ResponsesStreamState,
 ): Array<ChatCompletionChunk | "[DONE]"> {
-  const event = JSON.parse(eventData) as ResponseStreamEvent
+  let event: ResponseStreamEvent
+  try {
+    event = JSON.parse(eventData) as ResponseStreamEvent
+  } catch {
+    // Skip malformed or non-JSON SSE events (e.g. keepalives)
+    // instead of killing the whole stream.
+    consola.debug("Skipping non-JSON responses stream event:", eventData)
+    return []
+  }
 
   if (event.type === "response.created" && event.response) {
     state.id = event.response.id
@@ -185,6 +194,15 @@ interface ResponseStreamEvent {
 }
 
 function toResponsesPayload(payload: ChatCompletionsPayload): ResponsesPayload {
+  if (
+    (payload.tools && payload.tools.length > 0)
+    || payload.messages.some((m) => m.tool_calls && m.tool_calls.length > 0)
+  ) {
+    consola.warn(
+      "Tool definitions/calls are not supported via the /responses endpoint adapter and will be dropped for this model.",
+    )
+  }
+
   return {
     model: payload.model,
     input: payload.messages.flatMap((message) =>
diff --git a/tests/create-responses.test.ts b/tests/create-responses.test.ts
index 819f1f655..f669f7f3f 100644
--- a/tests/create-responses.test.ts
+++ b/tests/create-responses.test.ts
@@ -136,4 +136,41 @@ describe("Responses API adapter", () => {
       },
     })
   })
+
+  test("skips malformed stream events instead of throwing", () => {
+    const streamState: ResponsesStreamState = {
+      id: "resp_123",
+      model: "gpt-test",
+      created: 1700000000,
+      roleSent: true,
+    }
+
+    expect(responseEventToChatChunks("not json {", streamState)).toEqual([])
+    expect(responseEventToChatChunks("[DONE]", streamState)).toEqual([])
+    expect(responseEventToChatChunks("", streamState)).toEqual([])
+
+    // Stream state must be untouched so later valid events still work
+    const deltaChunks = responseEventToChatChunks(
+      JSON.stringify({ type: "response.output_text.delta", delta: "Hi" }),
+      streamState,
+    )
+    expect(deltaChunks).toHaveLength(1)
+  })
+
+  test("ignores unknown event types", () => {
+    const streamState: ResponsesStreamState = {
+      id: "resp_123",
+      model: "gpt-test",
+      created: 1700000000,
+      roleSent: false,
+    }
+
+    expect(
+      responseEventToChatChunks(
+        JSON.stringify({ type: "response.output_item.added" }),
+        streamState,
+      ),
+    ).toEqual([])
+    expect(streamState.roleSent).toBe(false)
+  })
 })

From ddede24fae78086e43edd2682f58f05797cd65ca Mon Sep 17 00:00:00 2001
From: Razon <davidinfosec07@gmail.com>
Date: Sat, 13 Jun 2026 02:13:29 +0000
Subject: [PATCH 8/9] test: cover count_tokens handler multiplier and
 tool-overhead logic

Drives the real Hono route and asserts the claude (1.15) and grok (1.03)
multipliers, the 346/480-token tool overhead, the mcp__/claude-code beta
exemption, and the invalid-JSON fallback.
---
 tests/count-tokens-handler.test.ts | 185 +++++++++++++++++++++++++++++
 1 file changed, 185 insertions(+)
 create mode 100644 tests/count-tokens-handler.test.ts

diff --git a/tests/count-tokens-handler.test.ts b/tests/count-tokens-handler.test.ts
new file mode 100644
index 000000000..08088ac1f
--- /dev/null
+++ b/tests/count-tokens-handler.test.ts
@@ -0,0 +1,185 @@
+import { afterEach, describe, expect, test } from "bun:test"
+
+import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types"
+import type { Model } from "~/services/copilot/get-models"
+
+import { state } from "~/lib/state"
+import { getTokenCount } from "~/lib/tokenizer"
+import { translateToOpenAI } from "~/routes/messages/non-stream-translation"
+import { messageRoutes } from "~/routes/messages/route"
+
+function makeModel(id: string): Model {
+  return {
+    id,
+    name: id,
+    object: "model",
+    vendor: "test",
+    version: "1",
+    preview: false,
+    model_picker_enabled: true,
+    capabilities: {
+      family: id,
+      object: "model_capabilities",
+      type: "chat",
+      tokenizer: "o200k_base",
+      limits: { max_output_tokens: 4096 },
+      supports: { tool_calls: true },
+    },
+  }
+}
+
+function setModels(...ids: Array<string>): void {
+  state.models = { object: "list", data: ids.map((id) => makeModel(id)) }
+}
+
+async function countTokens(
+  payload: AnthropicMessagesPayload,
+  headers: Record<string, string> = {},
+): Promise<number> {
+  const res = await messageRoutes.request("/count_tokens", {
+    method: "POST",
+    headers: { "content-type": "application/json", ...headers },
+    body: JSON.stringify(payload),
+  })
+  const json = (await res.json()) as { input_tokens: number }
+  return json.input_tokens
+}
+
+/** Re-derive the base count the handler starts from, before overhead/multiplier. */
+async function baseCount(payload: AnthropicMessagesPayload): Promise<number> {
+  const model = state.models?.data.find((m) => m.id === payload.model)
+  if (!model) throw new Error("model not seeded for baseCount")
+  const openAIPayload = translateToOpenAI(payload)
+  const { input, output } = await getTokenCount(openAIPayload, model)
+  return input + output
+}
+
+afterEach(() => {
+  state.models = undefined
+})
+
+describe("count_tokens handler", () => {
+  test("returns default count of 1 when the model is unknown", async () => {
+    setModels("some-other-model")
+    const result = await countTokens({
+      model: "nonexistent-model",
+      max_tokens: 10,
+      messages: [{ role: "user", content: "Hello there" }],
+    })
+    expect(result).toBe(1)
+  })
+
+  test("applies the 1.15 multiplier for claude models", async () => {
+    setModels("claude-sonnet-4")
+    const payload: AnthropicMessagesPayload = {
+      model: "claude-sonnet-4",
+      max_tokens: 10,
+      messages: [{ role: "user", content: "Hello there, how are you?" }],
+    }
+    const base = await baseCount(payload)
+    const result = await countTokens(payload)
+    expect(result).toBe(Math.round(base * 1.15))
+  })
+
+  test("applies the 1.03 multiplier for grok models", async () => {
+    setModels("grok-code")
+    const payload: AnthropicMessagesPayload = {
+      model: "grok-code",
+      max_tokens: 10,
+      messages: [{ role: "user", content: "Hello there, how are you?" }],
+    }
+    const base = await baseCount(payload)
+    const result = await countTokens(payload)
+    expect(result).toBe(Math.round(base * 1.03))
+  })
+
+  test("adds 346-token tool overhead for claude before the multiplier", async () => {
+    setModels("claude-sonnet-4")
+    const payload: AnthropicMessagesPayload = {
+      model: "claude-sonnet-4",
+      max_tokens: 10,
+      messages: [{ role: "user", content: "What's the weather?" }],
+      tools: [
+        {
+          name: "get_weather",
+          description: "Get weather",
+          input_schema: { type: "object", properties: {} },
+        },
+      ],
+    }
+    const base = await baseCount(payload)
+    const result = await countTokens(payload)
+    expect(result).toBe(Math.round((base + 346) * 1.15))
+  })
+
+  test("adds 480-token tool overhead for grok before the multiplier", async () => {
+    setModels("grok-code")
+    const payload: AnthropicMessagesPayload = {
+      model: "grok-code",
+      max_tokens: 10,
+      messages: [{ role: "user", content: "What's the weather?" }],
+      tools: [
+        {
+          name: "get_weather",
+          description: "Get weather",
+          input_schema: { type: "object", properties: {} },
+        },
+      ],
+    }
+    const base = await baseCount(payload)
+    const result = await countTokens(payload)
+    expect(result).toBe(Math.round((base + 480) * 1.03))
+  })
+
+  test("skips tool overhead when an mcp__ tool is present under the claude-code beta", async () => {
+    setModels("claude-sonnet-4")
+    const payload: AnthropicMessagesPayload = {
+      model: "claude-sonnet-4",
+      max_tokens: 10,
+      messages: [{ role: "user", content: "Use a tool" }],
+      tools: [
+        {
+          name: "mcp__server__do_thing",
+          description: "An MCP tool",
+          input_schema: { type: "object", properties: {} },
+        },
+      ],
+    }
+    const base = await baseCount(payload)
+    const result = await countTokens(payload, {
+      "anthropic-beta": "claude-code-20250101",
+    })
+    // No 346 overhead because an mcp__ tool exists under the claude-code beta
+    expect(result).toBe(Math.round(base * 1.15))
+  })
+
+  test("still adds overhead for an mcp__ tool when the claude-code beta is absent", async () => {
+    setModels("claude-sonnet-4")
+    const payload: AnthropicMessagesPayload = {
+      model: "claude-sonnet-4",
+      max_tokens: 10,
+      messages: [{ role: "user", content: "Use a tool" }],
+      tools: [
+        {
+          name: "mcp__server__do_thing",
+          description: "An MCP tool",
+          input_schema: { type: "object", properties: {} },
+        },
+      ],
+    }
+    const base = await baseCount(payload)
+    const result = await countTokens(payload)
+    expect(result).toBe(Math.round((base + 346) * 1.15))
+  })
+
+  test("falls back to a count of 1 when the body is not valid JSON", async () => {
+    setModels("claude-sonnet-4")
+    const res = await messageRoutes.request("/count_tokens", {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: "{ not valid json",
+    })
+    const json = (await res.json()) as { input_tokens: number }
+    expect(json.input_tokens).toBe(1)
+  })
+})

From a695374e5910ce38c781e719a1534a339d5a234b Mon Sep 17 00:00:00 2001
From: Razon <davidinfosec07@gmail.com>
Date: Sat, 13 Jun 2026 16:57:18 +0000
Subject: [PATCH 9/9] feat: forward tool calls through the /responses adapter

Map chat-completions tools/tool_choice into the Responses API shape, convert
assistant tool_calls + tool results into function_call/function_call_output
input items, and translate function_call output items and
function_call_arguments deltas back into chat-completions tool_calls (non-stream
and streaming). Replaces the previous drop-with-warning behavior.
---
 src/services/copilot/create-responses.ts | 340 ++++++++++++++++++-----
 tests/create-responses.test.ts           | 281 +++++++++++++++++++
 2 files changed, 549 insertions(+), 72 deletions(-)

diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
index 032356698..d42be3df9 100644
--- a/src/services/copilot/create-responses.ts
+++ b/src/services/copilot/create-responses.ts
@@ -11,6 +11,8 @@ import type {
   ChatCompletionsPayload,
   ContentPart,
   Message,
+  Tool,
+  ToolCall,
 } from "./create-chat-completions"
 
 export const createResponsesFromChatCompletions = async (
@@ -43,6 +45,9 @@ export const shouldUseResponsesEndpoint = (endpoints?: Array<string>) =>
 export function responseToChatCompletion(
   response: ResponseApiResponse,
 ): ChatCompletionResponse {
+  const toolCalls = getResponseToolCalls(response)
+  const text = getResponseText(response)
+
   return {
     id: response.id,
     object: "chat.completion",
@@ -53,16 +58,154 @@ export function responseToChatCompletion(
         index: 0,
         message: {
           role: "assistant",
-          content: getResponseText(response),
+          // Chat Completions convention: content is null when the turn is
+          // purely tool calls.
+          content: text || (toolCalls.length > 0 ? null : ""),
+          ...(toolCalls.length > 0 && { tool_calls: toolCalls }),
         },
         logprobs: null,
-        finish_reason: response.status === "completed" ? "stop" : "length",
+        finish_reason: nonStreamFinishReason(response, toolCalls.length > 0),
       },
     ],
     usage: toChatUsage(response.usage),
   }
 }
 
+function nonStreamFinishReason(
+  response: ResponseApiResponse,
+  hasToolCalls: boolean,
+): "stop" | "length" | "tool_calls" {
+  if (hasToolCalls) return "tool_calls"
+  return response.status === "completed" ? "stop" : "length"
+}
+
+function getResponseToolCalls(response: ResponseApiResponse): Array<ToolCall> {
+  return (
+    response.output
+      ?.filter((item) => item.type === "function_call")
+      .map((item) => ({
+        id: item.call_id ?? item.id ?? "",
+        type: "function" as const,
+        function: {
+          name: item.name ?? "",
+          arguments: item.arguments ?? "",
+        },
+      })) ?? []
+  )
+}
+
+type ChatChunkDelta = ChatCompletionChunk["choices"][number]["delta"]
+type ChatChunkFinish = ChatCompletionChunk["choices"][number]["finish_reason"]
+
+interface ChunkParts {
+  delta: ChatChunkDelta
+  finishReason: ChatChunkFinish
+  usage?: ChatCompletionChunk["usage"]
+}
+
+function buildChunk(
+  state: ResponsesStreamState,
+  { delta, finishReason, usage }: ChunkParts,
+): ChatCompletionChunk {
+  return {
+    id: state.id,
+    object: "chat.completion.chunk",
+    created: state.created,
+    model: state.model,
+    choices: [{ index: 0, delta, finish_reason: finishReason, logprobs: null }],
+    ...(usage && { usage }),
+  }
+}
+
+function consumeRole(state: ResponsesStreamState): "assistant" | undefined {
+  const role = state.roleSent ? undefined : "assistant"
+  state.roleSent = true
+  return role
+}
+
+function handleTextDelta(
+  event: ResponseStreamEvent,
+  state: ResponsesStreamState,
+): Array<ChatCompletionChunk | "[DONE]"> {
+  if (!event.delta) return []
+  return [
+    buildChunk(state, {
+      delta: { role: consumeRole(state), content: event.delta },
+      finishReason: null,
+    }),
+  ]
+}
+
+function handleFunctionCallAdded(
+  event: ResponseStreamEvent,
+  state: ResponsesStreamState,
+): Array<ChatCompletionChunk | "[DONE]"> {
+  if (event.item?.type !== "function_call") return []
+
+  state.toolCalls ??= {}
+  const index = state.toolCallCount ?? 0
+  state.toolCallCount = index + 1
+  state.hasToolCalls = true
+  state.toolCalls[event.item.id ?? event.item_id ?? String(index)] = { index }
+
+  return [
+    buildChunk(state, {
+      delta: {
+        role: consumeRole(state),
+        tool_calls: [
+          {
+            index,
+            id: event.item.call_id ?? event.item.id ?? "",
+            type: "function",
+            function: { name: event.item.name ?? "", arguments: "" },
+          },
+        ],
+      },
+      finishReason: null,
+    }),
+  ]
+}
+
+function handleFunctionCallArgsDelta(
+  event: ResponseStreamEvent,
+  state: ResponsesStreamState,
+): Array<ChatCompletionChunk | "[DONE]"> {
+  if (!event.item_id || !event.delta) return []
+  const toolCall = state.toolCalls?.[event.item_id]
+  if (!toolCall) return []
+
+  return [
+    buildChunk(state, {
+      delta: {
+        tool_calls: [
+          { index: toolCall.index, function: { arguments: event.delta } },
+        ],
+      },
+      finishReason: null,
+    }),
+  ]
+}
+
+function handleCompleted(
+  event: ResponseStreamEvent,
+  state: ResponsesStreamState,
+): Array<ChatCompletionChunk | "[DONE]"> {
+  if (!event.response) return []
+  state.id = event.response.id
+  state.model = event.response.model
+  state.created = event.response.created_at
+  state.roleSent = true
+
+  return [
+    buildChunk(state, {
+      delta: { content: null },
+      finishReason: state.hasToolCalls ? "tool_calls" : "stop",
+      usage: toChatUsage(event.response.usage),
+    }),
+    "[DONE]",
+  ]
+}
+
 export function responseEventToChatChunks(
   eventData: string,
   state: ResponsesStreamState,
@@ -84,60 +227,23 @@ export function responseEventToChatChunks(
     return []
   }
 
-  if (event.type === "response.output_text.delta" && event.delta) {
-    const role = state.roleSent ? undefined : "assistant"
-    state.roleSent = true
-
-    return [
-      {
-        id: state.id,
-        object: "chat.completion.chunk",
-        created: state.created,
-        model: state.model,
-        choices: [
-          {
-            index: 0,
-            delta: {
-              role,
-              content: event.delta,
-            },
-            finish_reason: null,
-            logprobs: null,
-          },
-        ],
-      },
-    ]
-  }
-
-  if (event.type === "response.completed" && event.response) {
-    state.id = event.response.id
-    state.model = event.response.model
-    state.created = event.response.created_at
-    state.roleSent = true
-
-    return [
-      {
-        id: event.response.id,
-        object: "chat.completion.chunk",
-        created: event.response.created_at,
-        model: event.response.model,
-        choices: [
-          {
-            index: 0,
-            delta: {
-              content: null,
-            },
-            finish_reason: "stop",
-            logprobs: null,
-          },
-        ],
-        usage: toChatUsage(event.response.usage),
-      },
-      "[DONE]",
-    ]
+  switch (event.type) {
+    case "response.output_text.delta": {
+      return handleTextDelta(event, state)
+    }
+    case "response.output_item.added": {
+      return handleFunctionCallAdded(event, state)
+    }
+    case "response.function_call_arguments.delta": {
+      return handleFunctionCallArgsDelta(event, state)
+    }
+    case "response.completed": {
+      return handleCompleted(event, state)
+    }
+    default: {
+      return []
+    }
   }
-
-  return []
 }
 
 export interface ResponsesStreamState {
@@ -145,22 +251,61 @@ export interface ResponsesStreamState {
   id: string
   model: string
   roleSent: boolean
+  // Maps a streamed function_call item id to its chat-completions tool_calls
+  // array index. Lazily initialized so existing construction sites still work.
+  toolCalls?: Record<string, { index: number }>
+  toolCallCount?: number
+  hasToolCalls?: boolean
 }
 
 interface ResponsesPayload {
-  input: Array<ResponseInputMessage>
+  input: Array<ResponseInputItem>
   max_output_tokens?: number | null
   model: string
   stream?: boolean | null
   temperature?: number | null
   top_p?: number | null
+  tools?: Array<ResponsesFunctionTool>
+  tool_choice?: ResponsesToolChoice
 }
 
 interface ResponseInputMessage {
+  type?: "message"
   content: string
   role: "assistant" | "developer" | "system" | "user"
 }
 
+interface ResponseFunctionCallInput {
+  type: "function_call"
+  call_id: string
+  name: string
+  arguments: string
+}
+
+interface ResponseFunctionCallOutputInput {
+  type: "function_call_output"
+  call_id: string
+  output: string
+}
+
+type ResponseInputItem =
+  | ResponseInputMessage
+  | ResponseFunctionCallInput
+  | ResponseFunctionCallOutputInput
+
+interface ResponsesFunctionTool {
+  type: "function"
+  name: string
+  description?: string
+  parameters: Record<string, unknown>
+}
+
+type ResponsesToolChoice =
+  | "auto"
+  | "none"
+  | "required"
+  | { type: "function"; name: string }
+
 export interface ResponseApiResponse {
   created_at: number
   error?: unknown
@@ -173,7 +318,13 @@ export interface ResponseApiResponse {
 }
 
 interface ResponseOutputItem {
+  type?: string
   content?: Array<ResponseContentPart>
+  // Present when type === "function_call"
+  id?: string
+  call_id?: string
+  name?: string
+  arguments?: string
 }
 
 interface ResponseContentPart {
@@ -191,42 +342,87 @@ interface ResponseStreamEvent {
   delta?: string
   response?: ResponseApiResponse
   type: string
+  // Function-call streaming fields
+  item?: ResponseOutputItem
+  item_id?: string
+  output_index?: number
 }
 
-function toResponsesPayload(payload: ChatCompletionsPayload): ResponsesPayload {
-  if (
-    (payload.tools && payload.tools.length > 0)
-    || payload.messages.some((m) => m.tool_calls && m.tool_calls.length > 0)
-  ) {
-    consola.warn(
-      "Tool definitions/calls are not supported via the /responses endpoint adapter and will be dropped for this model.",
-    )
-  }
-
+export function toResponsesPayload(
+  payload: ChatCompletionsPayload,
+): ResponsesPayload {
   return {
     model: payload.model,
-    input: payload.messages.flatMap((message) =>
-      toResponseInputMessage(message),
-    ),
+    input: payload.messages.flatMap((message) => toResponseInputItems(message)),
     max_output_tokens: payload.max_tokens,
     stream: payload.stream,
     temperature: payload.temperature,
     top_p: payload.top_p,
+    tools: toResponsesTools(payload.tools),
+    tool_choice: toResponsesToolChoice(payload.tool_choice),
   }
 }
 
-function toResponseInputMessage(message: Message): Array<ResponseInputMessage> {
+function toResponsesTools(
+  tools: ChatCompletionsPayload["tools"],
+): Array<ResponsesFunctionTool> | undefined {
+  if (!tools || tools.length === 0) return undefined
+  // Chat Completions nests the function under `function`; the Responses API
+  // expects the name/description/parameters flattened onto the tool itself.
+  return tools.map((tool: Tool) => ({
+    type: "function",
+    name: tool.function.name,
+    description: tool.function.description,
+    parameters: tool.function.parameters,
+  }))
+}
+
+function toResponsesToolChoice(
+  toolChoice: ChatCompletionsPayload["tool_choice"],
+): ResponsesToolChoice | undefined {
+  if (toolChoice === null || toolChoice === undefined) return undefined
+  if (typeof toolChoice === "string") return toolChoice
+  return { type: "function", name: toolChoice.function.name }
+}
+
+function toResponseInputItems(message: Message): Array<ResponseInputItem> {
+  // A tool result becomes a function_call_output keyed by its call id.
   if (message.role === "tool") {
     return [
       {
-        role: "user",
-        content: contentToText(message.content),
+        type: "function_call_output",
+        call_id: message.tool_call_id ?? "",
+        output: contentToText(message.content),
       },
     ]
   }
 
+  // An assistant turn that issued tool calls becomes optional text followed by
+  // one function_call item per call.
+  if (
+    message.role === "assistant"
+    && message.tool_calls
+    && message.tool_calls.length > 0
+  ) {
+    const items: Array<ResponseInputItem> = []
+    const text = contentToText(message.content)
+    if (text) {
+      items.push({ type: "message", role: "assistant", content: text })
+    }
+    for (const toolCall of message.tool_calls) {
+      items.push({
+        type: "function_call",
+        call_id: toolCall.id,
+        name: toolCall.function.name,
+        arguments: toolCall.function.arguments,
+      })
+    }
+    return items
+  }
+
   return [
     {
+      type: "message",
       role: message.role,
       content: contentToText(message.content),
     },
diff --git a/tests/create-responses.test.ts b/tests/create-responses.test.ts
index f669f7f3f..468b36216 100644
--- a/tests/create-responses.test.ts
+++ b/tests/create-responses.test.ts
@@ -4,6 +4,7 @@ import {
   responseEventToChatChunks,
   responseToChatCompletion,
   shouldUseResponsesEndpoint,
+  toResponsesPayload,
   type ResponseApiResponse,
   type ResponsesStreamState,
 } from "~/services/copilot/create-responses"
@@ -174,3 +175,283 @@ describe("Responses API adapter", () => {
     expect(streamState.roleSent).toBe(false)
   })
 })
+
+describe("Responses API tool forwarding", () => {
+  test("flattens chat-completions tools into responses function tools", () => {
+    const payload = toResponsesPayload({
+      model: "gpt-test",
+      messages: [{ role: "user", content: "hi" }],
+      tools: [
+        {
+          type: "function",
+          function: {
+            name: "get_weather",
+            description: "Get weather",
+            parameters: { type: "object", properties: { city: {} } },
+          },
+        },
+      ],
+    }) as unknown as {
+      tools?: Array<Record<string, unknown>>
+    }
+
+    expect(payload.tools).toEqual([
+      {
+        type: "function",
+        name: "get_weather",
+        description: "Get weather",
+        parameters: { type: "object", properties: { city: {} } },
+      },
+    ])
+  })
+
+  test("maps tool_choice variants", () => {
+    const auto = toResponsesPayload({
+      model: "m",
+      messages: [],
+      tool_choice: "auto",
+    }) as unknown as { tool_choice?: unknown }
+    expect(auto.tool_choice).toBe("auto")
+
+    const forced = toResponsesPayload({
+      model: "m",
+      messages: [],
+      tool_choice: { type: "function", function: { name: "get_weather" } },
+    }) as unknown as { tool_choice?: unknown }
+    expect(forced.tool_choice).toEqual({
+      type: "function",
+      name: "get_weather",
+    })
+
+    const none = toResponsesPayload({
+      model: "m",
+      messages: [],
+    }) as unknown as { tool_choice?: unknown }
+    expect(none.tool_choice).toBeUndefined()
+  })
+
+  test("converts assistant tool_calls and tool results into input items", () => {
+    const payload = toResponsesPayload({
+      model: "m",
+      messages: [
+        { role: "user", content: "weather?" },
+        {
+          role: "assistant",
+          content: "Let me check.",
+          tool_calls: [
+            {
+              id: "call_1",
+              type: "function",
+              function: {
+                name: "get_weather",
+                arguments: '{"city":"Boston"}',
+              },
+            },
+          ],
+        },
+        { role: "tool", tool_call_id: "call_1", content: "Sunny" },
+      ],
+    }) as unknown as { input: Array<Record<string, unknown>> }
+
+    expect(payload.input).toEqual([
+      { type: "message", role: "user", content: "weather?" },
+      { type: "message", role: "assistant", content: "Let me check." },
+      {
+        type: "function_call",
+        call_id: "call_1",
+        name: "get_weather",
+        arguments: '{"city":"Boston"}',
+      },
+      { type: "function_call_output", call_id: "call_1", output: "Sunny" },
+    ])
+  })
+
+  test("omits assistant text when the turn is purely a tool call", () => {
+    const payload = toResponsesPayload({
+      model: "m",
+      messages: [
+        {
+          role: "assistant",
+          content: null,
+          tool_calls: [
+            {
+              id: "call_9",
+              type: "function",
+              function: { name: "noop", arguments: "{}" },
+            },
+          ],
+        },
+      ],
+    }) as unknown as { input: Array<Record<string, unknown>> }
+
+    expect(payload.input).toEqual([
+      {
+        type: "function_call",
+        call_id: "call_9",
+        name: "noop",
+        arguments: "{}",
+      },
+    ])
+  })
+})
+
+describe("Responses API tool forwarding (responses)", () => {
+  test("converts a non-streaming function_call output into tool_calls", () => {
+    const response: ResponseApiResponse = {
+      id: "resp_1",
+      object: "response",
+      created_at: 1700000000,
+      model: "gpt-test",
+      status: "completed",
+      output: [
+        {
+          type: "function_call",
+          id: "fc_1",
+          call_id: "call_1",
+          name: "get_weather",
+          arguments: '{"city":"Boston"}',
+        },
+      ],
+    }
+
+    const chat = responseToChatCompletion(response)
+    expect(chat.choices[0].finish_reason).toBe("tool_calls")
+    expect(chat.choices[0].message.content).toBeNull()
+    expect(chat.choices[0].message.tool_calls).toEqual([
+      {
+        id: "call_1",
+        type: "function",
+        function: { name: "get_weather", arguments: '{"city":"Boston"}' },
+      },
+    ])
+  })
+
+  test("streams a function call as opening + argument chunks", () => {
+    const streamState: ResponsesStreamState = {
+      id: "resp_1",
+      model: "gpt-test",
+      created: 1700000000,
+      roleSent: false,
+    }
+
+    const openChunks = responseEventToChatChunks(
+      JSON.stringify({
+        type: "response.output_item.added",
+        output_index: 0,
+        item: {
+          type: "function_call",
+          id: "fc_1",
+          call_id: "call_1",
+          name: "get_weather",
+          arguments: "",
+        },
+      }),
+      streamState,
+    )
+
+    expect(openChunks).toHaveLength(1)
+    expect(openChunks[0]).toMatchObject({
+      choices: [
+        {
+          delta: {
+            role: "assistant",
+            tool_calls: [
+              {
+                index: 0,
+                id: "call_1",
+                type: "function",
+                function: { name: "get_weather", arguments: "" },
+              },
+            ],
+          },
+          finish_reason: null,
+        },
+      ],
+    })
+
+    const argChunks = responseEventToChatChunks(
+      JSON.stringify({
+        type: "response.function_call_arguments.delta",
+        item_id: "fc_1",
+        delta: '{"city":',
+      }),
+      streamState,
+    )
+    expect(argChunks).toHaveLength(1)
+    expect(argChunks[0]).toMatchObject({
+      choices: [
+        {
+          delta: {
+            tool_calls: [{ index: 0, function: { arguments: '{"city":' } }],
+          },
+        },
+      ],
+    })
+
+    const completed = responseEventToChatChunks(
+      JSON.stringify({
+        type: "response.completed",
+        response: {
+          id: "resp_1",
+          object: "response",
+          created_at: 1700000000,
+          model: "gpt-test",
+          status: "completed",
+        },
+      }),
+      streamState,
+    )
+    expect(completed[0]).toMatchObject({
+      choices: [{ finish_reason: "tool_calls" }],
+    })
+    expect(completed.at(-1)).toBe("[DONE]")
+  })
+
+  test("ignores argument deltas for unknown function-call item ids", () => {
+    const streamState: ResponsesStreamState = {
+      id: "resp_1",
+      model: "gpt-test",
+      created: 1700000000,
+      roleSent: true,
+    }
+
+    expect(
+      responseEventToChatChunks(
+        JSON.stringify({
+          type: "response.function_call_arguments.delta",
+          item_id: "unknown",
+          delta: "{}",
+        }),
+        streamState,
+      ),
+    ).toEqual([])
+  })
+
+  test("assigns increasing indices to parallel function calls", () => {
+    const streamState: ResponsesStreamState = {
+      id: "resp_1",
+      model: "gpt-test",
+      created: 1700000000,
+      roleSent: false,
+    }
+
+    responseEventToChatChunks(
+      JSON.stringify({
+        type: "response.output_item.added",
+        item: { type: "function_call", id: "fc_1", call_id: "c1", name: "a" },
+      }),
+      streamState,
+    )
+    const second = responseEventToChatChunks(
+      JSON.stringify({
+        type: "response.output_item.added",
+        item: { type: "function_call", id: "fc_2", call_id: "c2", name: "b" },
+      }),
+      streamState,
+    )
+
+    expect(second[0]).toMatchObject({
+      choices: [{ delta: { tool_calls: [{ index: 1, id: "c2" }] } }],
+    })
+  })
+})