ericc-ch · RazonIn4K · Jun 12, 2026 · Jun 12, 2026 · Jun 12, 2026 · Jun 12, 2026
diff --git a/.github/workflows/deploy-pages.yml b/.github/workflows/deploy-pages.yml
@@ -2,7 +2,7 @@ name: Deploy to GitHub Pages
 
 on:
   push:
-    branches: [ "main" ]
+    branches: [master]
   workflow_dispatch:
 
 # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
@@ -37,4 +37,4 @@ jobs:
 
       - name: Deploy to GitHub Pages
         id: deployment
-        uses: actions/deploy-pages@v4
+        uses: actions/deploy-pages@v4
diff --git a/AGENTS.md b/AGENTS.md
@@ -3,7 +3,7 @@
 ## Build, Lint, and Test Commands
 
 - **Build:**  
-  `bun run build` (uses tsup)
+  `bun run build` (uses tsdown)
 - **Dev:**  
   `bun run dev`
 - **Lint:**  
@@ -13,7 +13,7 @@
 - **Test all:**  
    `bun test`
 - **Test single file:**  
-   `bun test tests/claude-request.test.ts`
+   `bun test tests/anthropic-request.test.ts`
 - **Start (prod):**  
   `bun run start`
 

diff --git a/Dockerfile b/Dockerfile
@@ -13,13 +13,14 @@ WORKDIR /app
 COPY ./package.json ./bun.lock ./
 RUN bun install --frozen-lockfile --production --ignore-scripts --no-cache
 
-COPY --from=builder /app/dist ./dist
+COPY --from=builder --chown=bun:bun /app/dist ./dist
 
 EXPOSE 4141
 
 HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
   CMD wget --spider -q http://localhost:4141/ || exit 1
 
-COPY entrypoint.sh /entrypoint.sh
+COPY --chown=bun:bun entrypoint.sh /entrypoint.sh
 RUN chmod +x /entrypoint.sh
+USER bun
 ENTRYPOINT ["/entrypoint.sh"]
diff --git a/README.md b/README.md
@@ -74,25 +74,25 @@ mkdir -p ./copilot-data
 # Run the container with a bind mount to persist the token
 # This ensures your authentication survives container restarts
 
-docker run -p 4141:4141 -v $(pwd)/copilot-data:/root/.local/share/copilot-api copilot-api
+docker run -p 4141:4141 -v $(pwd)/copilot-data:/home/bun/.local/share/copilot-api copilot-api
 ```
 
 > **Note:**
-> The GitHub token and related data will be stored in `copilot-data` on your host. This is mapped to `/root/.local/share/copilot-api` inside the container, ensuring persistence across restarts.
+> The GitHub token and related data will be stored in `copilot-data` on your host. This is mapped to `/home/bun/.local/share/copilot-api` inside the container, ensuring persistence across restarts.
 
 ### Docker with Environment Variables
 
 You can pass the GitHub token directly to the container using environment variables:
 
 ```sh
-# Build with GitHub token
-docker build --build-arg GH_TOKEN=your_github_token_here -t copilot-api .
+# Build the image
+docker build -t copilot-api .
 
 # Run with GitHub token
 docker run -p 4141:4141 -e GH_TOKEN=your_github_token_here copilot-api
 
 # Run with additional options
-docker run -p 4141:4141 -e GH_TOKEN=your_token copilot-api start --verbose --port 4141
+docker run -p 4141:4141 -e GH_TOKEN=your_token copilot-api --verbose --port 4141
 ```
 
 ### Docker Compose Example
@@ -142,6 +142,7 @@ Copilot API now uses a subcommand structure with these main commands:
 
 - `start`: Start the Copilot API server. This command will also handle authentication if needed.
 - `auth`: Run GitHub authentication flow without starting the server. This is typically used if you need to generate a token for use with the `--github-token` option, especially in non-interactive environments.
+- `models`: List the current GitHub Copilot models available to the API. This is useful for non-interactive deployments where you want to inspect model support without starting the server.
 - `check-usage`: Show your current GitHub Copilot usage and quota information directly in the terminal (no server required).
 - `debug`: Display diagnostic information including version, runtime details, file paths, and authentication status. Useful for troubleshooting and support.
 
@@ -154,6 +155,7 @@ The following command line options are available for the `start` command:
 | Option         | Description                                                                   | Default    | Alias |
 | -------------- | ----------------------------------------------------------------------------- | ---------- | ----- |
 | --port         | Port to listen on                                                             | 4141       | -p    |
+| --host         | Host to bind to. Defaults to local-only. Use 0.0.0.0 to expose to the network (e.g. inside Docker) | 127.0.0.1  | none  |
 | --verbose      | Enable verbose logging                                                        | false      | -v    |
 | --account-type | Account type to use (individual, business, enterprise)                        | individual | -a    |
 | --manual       | Enable manual request approval                                                | false      | none  |
@@ -171,6 +173,17 @@ The following command line options are available for the `start` command:
 | --verbose    | Enable verbose logging    | false   | -v    |
 | --show-token | Show GitHub token on auth | false   | none  |
 
+### Models Command Options
+
+| Option         | Description                                                                   | Default    | Alias |
+| -------------- | ----------------------------------------------------------------------------- | ---------- | ----- |
+| --verbose      | Enable verbose logging                                                        | false      | -v    |
+| --account-type | Account type to use (individual, business, enterprise)                        | individual | -a    |
+| --github-token | Provide GitHub token directly (must be generated using the `auth` subcommand) | none       | -g    |
+| --show-token   | Show GitHub and Copilot tokens on fetch                                       | false      | none  |
+| --proxy-env    | Initialize proxy from environment variables                                   | false      | none  |
+| --json         | Output raw model response as JSON                                             | false      | none  |
+
 ### Debug Command Options
 
 | Option | Description               | Default | Alias |
@@ -342,10 +355,18 @@ bun run dev
 bun run start
 ```
 
+### List Available Models
+
+```sh
+bun run models
+```
+
 ## Usage Tips
 
 - To avoid hitting GitHub Copilot's rate limits, you can use the following flags:
   - `--manual`: Enables manual approval for each request, giving you full control over when requests are sent.
   - `--rate-limit <seconds>`: Enforces a minimum time interval between requests. For example, `copilot-api start --rate-limit 30` will ensure there's at least a 30-second gap between requests.
   - `--wait`: Use this with `--rate-limit`. It makes the server wait for the cooldown period to end instead of rejecting the request with an error. This is useful for clients that don't automatically retry on rate limit errors.
 - If you have a GitHub business or enterprise plan account with Copilot, use the `--account-type` flag (e.g., `--account-type business`). See the [official documentation](https://docs.github.com/en/enterprise-cloud@latest/copilot/managing-copilot/managing-github-copilot-in-your-organization/managing-access-to-github-copilot-in-your-organization/managing-github-copilot-access-to-your-organizations-network#configuring-copilot-subscription-based-network-routing-for-your-enterprise-or-organization) for more details.
+- This server is designed for localhost or trusted local-network use. It is intentionally single-account: all clients share the same GitHub/Copilot token and model cache.
+- The server binds to `127.0.0.1` by default, so it is not reachable from other machines unless you explicitly pass `--host` (for example `--host 0.0.0.0`). The Docker entrypoint passes `--host 0.0.0.0` so published ports work; restrict exposure on the host side with `-p 127.0.0.1:4141:4141` if you want it local-only.
diff --git a/entrypoint.sh b/entrypoint.sh
@@ -4,6 +4,8 @@ if [ "$1" = "--auth" ]; then
   exec bun run dist/main.js auth
 else
   # Default command
-  exec bun run dist/main.js start -g "$GH_TOKEN" "$@"
+  # Bind to 0.0.0.0 inside the container so published ports work;
+  # restrict exposure on the host side (e.g. -p 127.0.0.1:4141:4141).
+  exec bun run dist/main.js start --host 0.0.0.0 -g "$GH_TOKEN" "$@"
 fi
 
diff --git a/package.json b/package.json
@@ -27,6 +27,7 @@
     "knip": "knip-bun",
     "lint": "eslint --cache",
     "lint:all": "eslint --cache .",
+    "models": "bun run ./src/main.ts models",
     "prepack": "bun run build",
     "prepare": "simple-git-hooks",
     "release": "bumpp && bun publish --access public",

diff --git a/src/main.ts b/src/main.ts
@@ -5,6 +5,7 @@ import { defineCommand, runMain } from "citty"
 import { auth } from "./auth"
 import { checkUsage } from "./check-usage"
 import { debug } from "./debug"
+import { models } from "./models"
 import { start } from "./start"
 
 const main = defineCommand({
@@ -13,7 +14,7 @@ const main = defineCommand({
     description:
       "A wrapper around GitHub Copilot API to make it OpenAI compatible, making it usable for other tools.",
   },
-  subCommands: { auth, start, "check-usage": checkUsage, debug },
+  subCommands: { auth, start, models, "check-usage": checkUsage, debug },
 })
 
 await runMain(main)
diff --git a/src/models.ts b/src/models.ts
@@ -0,0 +1,120 @@
+#!/usr/bin/env node
+
+import { defineCommand } from "citty"
+import consola from "consola"
+
+import { ensurePaths } from "./lib/paths"
+import { initProxyFromEnv } from "./lib/proxy"
+import { state } from "./lib/state"
+import { setupGitHubToken } from "./lib/token"
+import { cacheVSCodeVersion } from "./lib/utils"
+import { getModels } from "./services/copilot/get-models"
+import { getCopilotToken } from "./services/github/get-copilot-token"
+
+interface RunModelsOptions {
+  verbose: boolean
+  accountType: string
+  githubToken?: string
+  showToken: boolean
+  proxyEnv: boolean
+  json: boolean
+}
+
+export async function runModels(options: RunModelsOptions): Promise<void> {
+  if (options.proxyEnv) {
+    initProxyFromEnv()
+  }
+
+  if (options.verbose) {
+    consola.level = 5
+    consola.info("Verbose logging enabled")
+  }
+
+  state.accountType = options.accountType
+  state.showToken = options.showToken
+
+  await ensurePaths()
+  await cacheVSCodeVersion()
+
+  if (options.githubToken) {
+    state.githubToken = options.githubToken
+    consola.info("Using provided GitHub token")
+  } else {
+    await setupGitHubToken()
+  }
+
+  const { token } = await getCopilotToken()
+  state.copilotToken = token
+
+  if (state.showToken) {
+    consola.info("Copilot token:", token)
+  }
+
+  const models = await getModels()
+
+  if (options.json) {
+    console.log(JSON.stringify(models, null, 2))
+    return
+  }
+
+  consola.box(
+    models.data
+      .map((model) => {
+        const endpoints = model.supported_endpoints?.join(", ") ?? "default"
+        return `${model.id} (${model.vendor}; ${endpoints})`
+      })
+      .join("\n"),
+  )
+}
+
+export const models = defineCommand({
+  meta: {
+    name: "models",
+    description: "List the current GitHub Copilot models available to the API",
+  },
+  args: {
+    verbose: {
+      alias: "v",
+      type: "boolean",
+      default: false,
+      description: "Enable verbose logging",
+    },
+    "account-type": {
+      alias: "a",
+      type: "string",
+      default: "individual",
+      description: "Account type to use (individual, business, enterprise)",
+    },
+    "github-token": {
+      alias: "g",
+      type: "string",
+      description:
+        "Provide GitHub token directly (must be generated using the `auth` subcommand)",
+    },
+    "show-token": {
+      type: "boolean",
+      default: false,
+      description: "Show GitHub and Copilot tokens on fetch",
+    },
+    "proxy-env": {
+      type: "boolean",
+      default: false,
+      description: "Initialize proxy from environment variables",
+    },
+    json: {
+      type: "boolean",
+      default: false,
+      description: "Output raw model response as JSON",
+    },
+  },
+  run({ args }) {
+    return runModels({
+      verbose: args.verbose,
+      accountType: args["account-type"],
+      githubToken: args["github-token"],
+      showToken: args["show-token"],
+      proxyEnv: args["proxy-env"],
+      json: args.json,
+    })
+  },
+})
diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts
@@ -13,6 +13,14 @@ import {
   type ChatCompletionResponse,
   type ChatCompletionsPayload,
 } from "~/services/copilot/create-chat-completions"
+import {
+  createResponsesFromChatCompletions,
+  responseEventToChatChunks,
+  responseToChatCompletion,
+  shouldUseResponsesEndpoint,
+  type ResponseApiResponse,
+  type ResponsesStreamState,
+} from "~/services/copilot/create-responses"
 
 export async function handleCompletion(c: Context) {
   await checkRateLimit(state)
@@ -47,6 +55,36 @@ export async function handleCompletion(c: Context) {
     consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens))
   }
 
+  if (shouldUseResponsesEndpoint(selectedModel?.supported_endpoints)) {
+    const response = await createResponsesFromChatCompletions(payload)
+
+    if (isNonStreamingResponse(response)) {
+      consola.debug("Non-streaming response:", JSON.stringify(response))
+      return c.json(responseToChatCompletion(response))
+    }
+
+    consola.debug("Streaming response from responses endpoint")
+    return streamSSE(c, async (stream) => {
+      const streamState: ResponsesStreamState = {
+        id: "",
+        model: payload.model,
+        created: Math.floor(Date.now() / 1000),
+        roleSent: false,
+      }
+
+      for await (const event of response) {
+        if (!event.data) continue
+
+        const chunks = responseEventToChatChunks(event.data, streamState)
+        for (const chunk of chunks) {
+          await stream.writeSSE({
+            data: chunk === "[DONE]" ? chunk : JSON.stringify(chunk),
+          })
+        }
+      }
+    })
+  }
+
   const response = await createChatCompletions(payload)
 
   if (isNonStreaming(response)) {
@@ -66,3 +104,7 @@ export async function handleCompletion(c: Context) {
 const isNonStreaming = (
   response: Awaited<ReturnType<typeof createChatCompletions>>,
 ): response is ChatCompletionResponse => Object.hasOwn(response, "choices")
+
+const isNonStreamingResponse = (
+  response: Awaited<ReturnType<typeof createResponsesFromChatCompletions>>,
+): response is ResponseApiResponse => !(Symbol.asyncIterator in response)
diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts
@@ -352,6 +352,20 @@ function getAnthropicToolUseBlocks(
     type: "tool_use",
     id: toolCall.id,
     name: toolCall.function.name,
-    input: JSON.parse(toolCall.function.arguments) as Record<string, unknown>,
+    input: safeParseToolInput(toolCall.function.arguments),
   }))
 }
+
+function safeParseToolInput(args: string): Record<string, unknown> {
+  try {
+    const parsed: unknown = JSON.parse(args)
+    if (parsed !== null && typeof parsed === "object") {
+      return parsed as Record<string, unknown>
+    }
+    return {}
+  } catch {
+    // Models occasionally emit malformed JSON for tool arguments.
+    // Return an empty input instead of crashing the whole response.
+    return {}
+  }
+}