diff --git a/.env.example b/.env.example index 77ca0f3a..c600bad6 100644 --- a/.env.example +++ b/.env.example @@ -23,10 +23,16 @@ # Without a provider key, agentmemory runs in noop mode: observations are # indexed via zero-LLM synthetic compression, hybrid search still works, # but LLM-backed summarisation / reflection / consolidation are disabled. -# The detection order is OPENAI_API_KEY → MINIMAX_API_KEY → ANTHROPIC_API_KEY -# → GEMINI_API_KEY → OPENROUTER_API_KEY → noop. +# The detection order is AZURE_OPENAI_API_KEY → OPENAI_API_KEY → MINIMAX_API_KEY +# → ANTHROPIC_API_KEY → GEMINI_API_KEY → OPENROUTER_API_KEY → noop. -# OPENAI_API_KEY=sk-... # Used for OpenAI-compatible embeddings today. PR #307 will extend this to chat completions (DeepSeek, SiliconFlow, vLLM, LM Studio, Ollama via `/v1`). +# AZURE_OPENAI_API_KEY=... # Azure OpenAI API-key auth for LLM compression/summarization +# AZURE_OPENAI_ENDPOINT=https://.openai.azure.com +# AZURE_OPENAI_DEPLOYMENT=gpt-5.4-mini # Azure deployment name passed as the model +# AZURE_OPENAI_API_VERSION=2025-04-01-preview # Optional; only needed for legacy deployment URLs +# AZURE_OPENAI_BASE_URL=https://.openai.azure.com/openai/deployments/ + +# OPENAI_API_KEY=sk-... # OpenAI-compatible LLM and embeddings key # OPENAI_BASE_URL=https://api.openai.com # Override for OpenAI-compatible providers # ANTHROPIC_API_KEY=sk-ant-... diff --git a/README.md b/README.md index 803366fe..7b055726 100644 --- a/README.md +++ b/README.md @@ -1069,6 +1069,7 @@ agentmemory auto-detects from your environment. By default, no LLM calls are mad |----------|--------|-------| | **No-op (default)** | No config needed | LLM-backed compress/summarize is DISABLED. Synthetic BM25 compression + recall still work. See `AGENTMEMORY_ALLOW_AGENT_SDK` below if you used to rely on the Claude-subscription fallback. | | Anthropic API | `ANTHROPIC_API_KEY` | Per-token billing | +| Azure OpenAI | `AZURE_OPENAI_API_KEY` + `AZURE_OPENAI_ENDPOINT` + `AZURE_OPENAI_DEPLOYMENT` | Uses your Azure OpenAI deployment for compression/summarization. API-key auth only; Microsoft Entra ID / `DefaultAzureCredential` is not wired yet. | | MiniMax | `MINIMAX_API_KEY` | Anthropic-compatible | | Gemini | `GEMINI_API_KEY` | Also enables embeddings | | OpenRouter | `OPENROUTER_API_KEY` | Any model | @@ -1109,6 +1110,14 @@ Create `~/.agentmemory/.env`: # LLM provider (pick one — default is the no-op provider: no LLM calls) # ANTHROPIC_API_KEY=sk-ant-... # ANTHROPIC_BASE_URL=... # Optional: Anthropic-compatible proxy / Azure +# AZURE_OPENAI_API_KEY=... # API-key auth only +# AZURE_OPENAI_ENDPOINT=https://.openai.azure.com +# AZURE_OPENAI_DEPLOYMENT=gpt-5.4-mini # Azure deployment name passed as the model +# AZURE_OPENAI_API_VERSION=2025-04-01-preview +# # Optional: only needed for legacy deployment URLs +# AZURE_OPENAI_BASE_URL=https://.openai.azure.com/openai/deployments/ +# # Optional: legacy deployment URL. If set, deployment +# # can be parsed from the URL. # GEMINI_API_KEY=... # OPENROUTER_API_KEY=... # MINIMAX_API_KEY=... @@ -1117,7 +1126,9 @@ Create `~/.agentmemory/.env`: # # embedding provider (further below). Set # # OPENAI_API_KEY_FOR_LLM=false to scope it # # to embeddings only. -# OPENAI_BASE_URL=https://api.openai.com # Optional: override for Azure / vLLM / LM Studio / proxies +# OPENAI_BASE_URL=https://api.openai.com # Optional: override for vLLM / LM Studio / proxies. +# # Prefer AZURE_OPENAI_* above for Azure OpenAI. +# # Legacy Azure deployments can still use this shape: # # Azure: https://.openai.azure.com/openai/deployments/ # # Auto-detected from `.openai.azure.com` hostname; uses # # api-key header + api-version query param. diff --git a/src/cli.ts b/src/cli.ts index e1ea9757..62ce50e6 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -1442,7 +1442,9 @@ async function passiveServerChecks(): Promise { { name: "LLM provider", ok: hasLlm, - hint: hasLlm ? undefined : "set ANTHROPIC_API_KEY (or GEMINI/OPENROUTER/MINIMAX) in ~/.agentmemory/.env", + hint: hasLlm + ? undefined + : "set ANTHROPIC_API_KEY, AZURE_OPENAI_API_KEY + AZURE_OPENAI_ENDPOINT + AZURE_OPENAI_DEPLOYMENT, OPENAI_API_KEY, GEMINI_API_KEY, OPENROUTER_API_KEY, or MINIMAX_API_KEY in ~/.agentmemory/.env", }, { name: "Embedding provider", @@ -1877,7 +1879,7 @@ async function runInit() { "All keys are commented out by default. Uncomment the ones you want.", "", "Common next steps:", - " 1. Pick an LLM provider key (ANTHROPIC_API_KEY / OPENAI_API_KEY / GEMINI_API_KEY / etc.)", + " 1. Pick an LLM provider key (ANTHROPIC_API_KEY / AZURE_OPENAI_API_KEY / OPENAI_API_KEY / etc.)", " 2. Run `npx @agentmemory/agentmemory doctor` to verify the daemon sees them", " 3. Run `npx @agentmemory/agentmemory` to start the worker", ].join("\n"), diff --git a/src/cli/doctor-diagnostics.ts b/src/cli/doctor-diagnostics.ts index c1dee4f4..d088c2cc 100644 --- a/src/cli/doctor-diagnostics.ts +++ b/src/cli/doctor-diagnostics.ts @@ -88,6 +88,7 @@ const PLACEHOLDER_VALUES = new Set([ const PROVIDER_KEY_NAMES = [ "ANTHROPIC_API_KEY", + "AZURE_OPENAI_API_KEY", "OPENAI_API_KEY", "GEMINI_API_KEY", "GOOGLE_API_KEY", @@ -197,7 +198,7 @@ export function buildDiagnostics(effects: DoctorEffects): Diagnostic[] { message: "No LLM provider API key found in ~/.agentmemory/.env.", fixPreview: "Open ~/.agentmemory/.env in $EDITOR and paste your key, then re-check.", moreInfo: - "Set at least one of: ANTHROPIC_API_KEY, OPENAI_API_KEY, GEMINI_API_KEY, " + + "Set at least one of: ANTHROPIC_API_KEY, AZURE_OPENAI_API_KEY, OPENAI_API_KEY, GEMINI_API_KEY, " + "OPENROUTER_API_KEY, MINIMAX_API_KEY. The daemon picks the first that resolves " + "to a real (non-placeholder) value at startup.", check: async () => { diff --git a/src/cli/onboarding.ts b/src/cli/onboarding.ts index 48bbf4cc..fc6f8e4e 100644 --- a/src/cli/onboarding.ts +++ b/src/cli/onboarding.ts @@ -60,6 +60,7 @@ const MCP_AGENTS: { value: string; label: string; glyph: string }[] = [ const PROVIDERS: { value: string; label: string; envKey: string | null }[] = [ { value: "anthropic", label: "Anthropic — claude", envKey: "ANTHROPIC_API_KEY" }, + { value: "azure-openai", label: "Azure OpenAI — deployment", envKey: "AZURE_OPENAI_API_KEY" }, { value: "openai", label: "OpenAI — gpt", envKey: "OPENAI_API_KEY" }, { value: "gemini", label: "Google — gemini", envKey: "GEMINI_API_KEY" }, { value: "openrouter", label: "OpenRouter — multi-model", envKey: "OPENROUTER_API_KEY" }, @@ -230,6 +231,11 @@ export async function runOnboarding(): Promise { const envKey = PROVIDERS.find((x) => x.value === provider)?.envKey; if (envKey) { lines.push(` Uncomment ${envKey}= in that file to enable ${provider}.`); + if (provider === "azure-openai") { + lines.push( + " Also set AZURE_OPENAI_ENDPOINT= and AZURE_OPENAI_DEPLOYMENT= for your deployment.", + ); + } } } else { lines.push(" No provider chosen — agentmemory will run in BM25-only mode."); diff --git a/src/config.ts b/src/config.ts index eed5725e..02305eea 100644 --- a/src/config.ts +++ b/src/config.ts @@ -47,9 +47,46 @@ function hasRealValue(v: string | undefined): v is string { return typeof v === "string" && v.trim().length > 0; } -function detectProvider(env: Record): ProviderConfig { +function normalizeAzureEndpoint(endpoint: string): string { + return endpoint.replace(/\/+$/, ""); +} + +function deploymentFromAzureBaseUrl(baseUrl: string | undefined): string | undefined { + if (!hasRealValue(baseUrl)) return undefined; + try { + const parsed = new URL(baseUrl); + const match = parsed.pathname.match(/\/openai\/deployments\/([^/]+)/); + return match ? decodeURIComponent(match[1]) : undefined; + } catch { + return undefined; + } +} + +export function detectProviderForEnv(env: Record): ProviderConfig { const maxTokens = parseInt(env["MAX_TOKENS"] || "4096", 10); + const azureOpenAiKey = env["AZURE_OPENAI_API_KEY"]; + const azureOpenAiEndpoint = env["AZURE_OPENAI_ENDPOINT"]; + const azureOpenAiBaseUrl = env["AZURE_OPENAI_BASE_URL"]; + const azureOpenAiDeployment = + env["AZURE_OPENAI_DEPLOYMENT"] || + env["AZURE_OPENAI_MODEL"] || + deploymentFromAzureBaseUrl(azureOpenAiBaseUrl); + if ( + hasRealValue(azureOpenAiKey) && + (hasRealValue(azureOpenAiEndpoint) || hasRealValue(azureOpenAiBaseUrl)) && + hasRealValue(azureOpenAiDeployment) + ) { + return { + provider: "openai", + model: azureOpenAiDeployment, + maxTokens, + baseURL: hasRealValue(azureOpenAiBaseUrl) + ? azureOpenAiBaseUrl + : normalizeAzureEndpoint(azureOpenAiEndpoint), + }; + } + // OpenAI-compatible: supports OpenAI, DeepSeek, SiliconFlow, Azure, vLLM, LM Studio if (hasRealValue(env["OPENAI_API_KEY"]) && env["OPENAI_API_KEY_FOR_LLM"] !== "false") { return { @@ -102,7 +139,7 @@ function detectProvider(env: Record): ProviderConfig { if (!allowAgentSdk) { process.stderr.write( "[agentmemory] No LLM provider key found " + - "(ANTHROPIC_API_KEY, GEMINI_API_KEY, OPENROUTER_API_KEY, MINIMAX_API_KEY, OPENAI_API_KEY). " + + "(ANTHROPIC_API_KEY, AZURE_OPENAI_API_KEY, GEMINI_API_KEY, OPENROUTER_API_KEY, MINIMAX_API_KEY, OPENAI_API_KEY). " + "LLM-backed compression and summarization are DISABLED — using no-op provider. " + "This is the safe default: the agent-sdk fallback used to spawn Claude Agent SDK " + "child sessions which inherit Claude Code's plugin hooks and cause infinite Stop-hook " + @@ -134,7 +171,7 @@ function detectProvider(env: Record): ProviderConfig { export function loadConfig(): AgentMemoryConfig { const env = getMergedEnv(); - const provider = detectProvider(env); + const provider = detectProviderForEnv(env); return { engineUrl: env["III_ENGINE_URL"] || "ws://localhost:49134", @@ -171,6 +208,12 @@ export function detectLlmProviderKind(): "llm" | "noop" { hasRealValue(env["GOOGLE_API_KEY"]) || hasRealValue(env["OPENROUTER_API_KEY"]) || hasRealValue(env["MINIMAX_API_KEY"]) || + (hasRealValue(env["AZURE_OPENAI_API_KEY"]) && + (hasRealValue(env["AZURE_OPENAI_ENDPOINT"]) || + hasRealValue(env["AZURE_OPENAI_BASE_URL"])) && + (hasRealValue(env["AZURE_OPENAI_DEPLOYMENT"]) || + hasRealValue(env["AZURE_OPENAI_MODEL"]) || + hasRealValue(deploymentFromAzureBaseUrl(env["AZURE_OPENAI_BASE_URL"])))) || (hasRealValue(env["OPENAI_API_KEY"]) && env["OPENAI_API_KEY_FOR_LLM"] !== "false") ) { diff --git a/src/functions/summarize.ts b/src/functions/summarize.ts index 80b29a09..74b7f0ba 100644 --- a/src/functions/summarize.ts +++ b/src/functions/summarize.ts @@ -248,7 +248,7 @@ export function registerSummarizeFunction( success: false, error: "no_provider", reason: - "No LLM provider key set; Summarize is a no-op. Set ANTHROPIC_API_KEY (or GEMINI/OPENROUTER/MINIMAX) in ~/.agentmemory/.env to enable.", + "No LLM provider key set; Summarize is a no-op. Set ANTHROPIC_API_KEY, AZURE_OPENAI_API_KEY + AZURE_OPENAI_ENDPOINT + AZURE_OPENAI_DEPLOYMENT, OPENAI_API_KEY, GEMINI_API_KEY, OPENROUTER_API_KEY, or MINIMAX_API_KEY in ~/.agentmemory/.env to enable.", }; } diff --git a/src/providers/index.ts b/src/providers/index.ts index 5de6807c..51611063 100644 --- a/src/providers/index.ts +++ b/src/providers/index.ts @@ -9,6 +9,7 @@ import { MinimaxProvider } from "./minimax.js"; import { NoopProvider } from "./noop.js"; import { OpenAIProvider } from "./openai.js"; import { OpenRouterProvider } from "./openrouter.js"; +import { detectAzure } from "./_openai-shared.js"; import { ResilientProvider } from "./resilient.js"; import { FallbackChainProvider } from "./fallback-chain.js"; import { getEnvVar } from "../config.js"; @@ -96,10 +97,16 @@ function createBaseProvider(config: ProviderConfig): MemoryProvider { "https://openrouter.ai/api/v1/chat/completions", ); case "openai": { - const openaiKey = getEnvVar("OPENAI_API_KEY"); + const azureKey = getEnvVar("AZURE_OPENAI_API_KEY"); + const standardKey = getEnvVar("OPENAI_API_KEY"); + const effectiveBaseURL = config.baseURL || getEnvVar("OPENAI_BASE_URL"); + const useAzureKey = effectiveBaseURL ? detectAzure(effectiveBaseURL) : false; + const openaiKey = useAzureKey + ? azureKey || standardKey + : standardKey || azureKey; if (!openaiKey) { throw new Error( - "OPENAI_API_KEY is required for the openai provider", + "OPENAI_API_KEY or AZURE_OPENAI_API_KEY is required for the openai provider", ); } return new OpenAIProvider( diff --git a/src/providers/openai.ts b/src/providers/openai.ts index 88e10829..73ccc16c 100644 --- a/src/providers/openai.ts +++ b/src/providers/openai.ts @@ -25,12 +25,14 @@ const DEFAULT_TIMEOUT_MS = 60_000; * * Required env vars: * OPENAI_API_KEY — API key + * AZURE_OPENAI_API_KEY — Azure OpenAI API key alias for chat completions * * Optional: * OPENAI_BASE_URL — base URL without path (default: https://api.openai.com). * Azure: https://.openai.azure.com/openai/deployments/ * OPENAI_MODEL — model name (default: gpt-4o-mini) * OPENAI_API_VERSION — Azure api-version query param (default: 2024-08-01-preview) + * AZURE_OPENAI_API_VERSION — Azure-specific alias for OPENAI_API_VERSION * OPENAI_TIMEOUT_MS — outbound fetch timeout in ms (OpenAI-scoped alias, * takes precedence over AGENTMEMORY_LLM_TIMEOUT_MS * for back-compat with the v0.9.17 shipping name). @@ -63,7 +65,9 @@ export class OpenAIProvider implements MemoryProvider { this.reasoningEffort = getEnvVar("OPENAI_REASONING_EFFORT") || undefined; this.timeoutMs = resolveTimeout(); this.azureApiVersion = - getEnvVar("OPENAI_API_VERSION") || DEFAULT_AZURE_API_VERSION; + getEnvVar("AZURE_OPENAI_API_VERSION") || + getEnvVar("OPENAI_API_VERSION") || + DEFAULT_AZURE_API_VERSION; this.isAzure = detectAzure(this.baseUrl); } @@ -79,7 +83,6 @@ export class OpenAIProvider implements MemoryProvider { const url = buildChatUrl(this.baseUrl, this.isAzure, this.azureApiVersion); const body: Record = { model: this.model, - max_tokens: this.maxTokens, // OpenAI API spec defines `stream` as defaulting to false, so omitting // it should yield a JSON response. Some OpenAI-compatible proxies // (notably 9Router < 0.4.56 — see decolua/9router#1260) default to @@ -92,6 +95,8 @@ export class OpenAIProvider implements MemoryProvider { { role: "user", content: userPrompt }, ], }; + body[usesMaxCompletionTokens(this.model) ? "max_completion_tokens" : "max_tokens"] = + this.maxTokens; if (this.reasoningEffort) { body.reasoning_effort = this.reasoningEffort; } @@ -176,3 +181,6 @@ function parsePositiveInt(raw: string | null | undefined): number | undefined { return Number.isFinite(n) && n > 0 ? n : undefined; } +function usesMaxCompletionTokens(model: string): boolean { + return /^gpt-5(?:[.-]|$)/i.test(model); +} diff --git a/test/config-azure-openai.test.ts b/test/config-azure-openai.test.ts new file mode 100644 index 00000000..5496b5e3 --- /dev/null +++ b/test/config-azure-openai.test.ts @@ -0,0 +1,109 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; + +import { detectProviderForEnv } from "../src/config.js"; +import { createProvider } from "../src/providers/index.js"; + +const ORIGINAL_OPENAI_KEY = process.env["OPENAI_API_KEY"]; +const ORIGINAL_AZURE_KEY = process.env["AZURE_OPENAI_API_KEY"]; +const ORIGINAL_OPENAI_BASE = process.env["OPENAI_BASE_URL"]; + +describe("Azure OpenAI config detection", () => { + afterEach(() => { + if (ORIGINAL_OPENAI_KEY === undefined) delete process.env["OPENAI_API_KEY"]; + else process.env["OPENAI_API_KEY"] = ORIGINAL_OPENAI_KEY; + if (ORIGINAL_AZURE_KEY === undefined) delete process.env["AZURE_OPENAI_API_KEY"]; + else process.env["AZURE_OPENAI_API_KEY"] = ORIGINAL_AZURE_KEY; + if (ORIGINAL_OPENAI_BASE === undefined) delete process.env["OPENAI_BASE_URL"]; + else process.env["OPENAI_BASE_URL"] = ORIGINAL_OPENAI_BASE; + vi.restoreAllMocks(); + }); + + it("uses Azure OpenAI endpoint and deployment as the OpenAI-compatible LLM provider", () => { + const provider = detectProviderForEnv({ + AZURE_OPENAI_API_KEY: "azure-key", + AZURE_OPENAI_ENDPOINT: "https://agentmemory.openai.azure.com/", + AZURE_OPENAI_DEPLOYMENT: "gpt-5.4-mini", + }); + + expect(provider).toEqual({ + provider: "openai", + model: "gpt-5.4-mini", + maxTokens: 4096, + baseURL: "https://agentmemory.openai.azure.com", + }); + }); + + it("supports a legacy Azure deployment base URL", () => { + const provider = detectProviderForEnv({ + AZURE_OPENAI_API_KEY: "azure-key", + AZURE_OPENAI_BASE_URL: + "https://agentmemory.openai.azure.com/openai/deployments/gpt-5.4-nano", + }); + + expect(provider.provider).toBe("openai"); + expect(provider.model).toBe("gpt-5.4-nano"); + expect(provider.baseURL).toBe( + "https://agentmemory.openai.azure.com/openai/deployments/gpt-5.4-nano", + ); + }); + + it("does not enable Azure OpenAI without a deployment", () => { + const provider = detectProviderForEnv({ + AZURE_OPENAI_API_KEY: "azure-key", + AZURE_OPENAI_ENDPOINT: "https://agentmemory.openai.azure.com", + }); + + expect(provider.provider).toBe("noop"); + }); + + it("uses the Azure key for Azure requests even when OPENAI_API_KEY is also set", async () => { + process.env["OPENAI_API_KEY"] = "public-openai-key"; + process.env["AZURE_OPENAI_API_KEY"] = "azure-openai-key"; + const config = detectProviderForEnv({ + OPENAI_API_KEY: "public-openai-key", + AZURE_OPENAI_API_KEY: "azure-openai-key", + AZURE_OPENAI_ENDPOINT: "https://agentmemory.openai.azure.com", + AZURE_OPENAI_DEPLOYMENT: "gpt-5.4-mini", + }); + let capturedHeaders = new Headers(); + vi.spyOn(globalThis, "fetch").mockImplementation( + async (_url: string | URL | Request, init?: RequestInit) => { + capturedHeaders = new Headers(init?.headers); + return new Response( + JSON.stringify({ choices: [{ message: { content: "summary" } }] }), + { status: 200 }, + ); + }, + ); + + await createProvider(config).summarize("system", "user"); + + expect(capturedHeaders.get("api-key")).toBe("azure-openai-key"); + expect(capturedHeaders.get("Authorization")).toBeNull(); + }); + + it("uses the Azure key when OPENAI_BASE_URL points to Azure", async () => { + process.env["OPENAI_API_KEY"] = "public-openai-key"; + process.env["AZURE_OPENAI_API_KEY"] = "azure-openai-key"; + process.env["OPENAI_BASE_URL"] = "https://agentmemory.openai.azure.com"; + let capturedHeaders = new Headers(); + vi.spyOn(globalThis, "fetch").mockImplementation( + async (_url: string | URL | Request, init?: RequestInit) => { + capturedHeaders = new Headers(init?.headers); + return new Response( + JSON.stringify({ choices: [{ message: { content: "summary" } }] }), + { status: 200 }, + ); + }, + ); + + await createProvider({ + provider: "openai", + model: "gpt-5.4-mini", + maxTokens: 256, + }).summarize("system", "user"); + + expect(capturedHeaders.get("api-key")).toBe("azure-openai-key"); + expect(capturedHeaders.get("Authorization")).toBeNull(); + }); +}); diff --git a/test/openai-shared.test.ts b/test/openai-shared.test.ts index 7240e10c..ec516ad2 100644 --- a/test/openai-shared.test.ts +++ b/test/openai-shared.test.ts @@ -6,6 +6,7 @@ import { detectAzure, normalizeBaseUrl, } from "../src/providers/_openai-shared.js"; +import { OpenAIProvider } from "../src/providers/openai.js"; import { OpenAIEmbeddingProvider } from "../src/providers/embedding/openai.js"; describe("_openai-shared — detectAzure", () => { @@ -259,3 +260,64 @@ describe("OpenAIEmbeddingProvider — Azure auto-detection (#371)", () => { expect(capturedHeaders.get("api-key")).toBeNull(); }); }); + +describe("OpenAIProvider — Azure OpenAI aliases", () => { + const ORIGINAL_AZURE_VERSION = process.env["AZURE_OPENAI_API_VERSION"]; + + afterEach(() => { + if (ORIGINAL_AZURE_VERSION === undefined) + delete process.env["AZURE_OPENAI_API_VERSION"]; + else process.env["AZURE_OPENAI_API_VERSION"] = ORIGINAL_AZURE_VERSION; + vi.restoreAllMocks(); + }); + + it("honors AZURE_OPENAI_API_VERSION for legacy Azure chat completions", async () => { + process.env["AZURE_OPENAI_API_VERSION"] = "2025-04-01-preview"; + let capturedUrl = ""; + vi.spyOn(globalThis, "fetch").mockImplementation( + async (url: string | URL | Request) => { + capturedUrl = String(url); + return new Response( + JSON.stringify({ choices: [{ message: { content: "summary" } }] }), + { status: 200 }, + ); + }, + ); + + const provider = new OpenAIProvider( + "azure-key", + "gpt-5.4-mini", + 256, + "https://myres.openai.azure.com/openai/deployments/gpt-5.4-mini", + ); + await provider.summarize("system", "user"); + + expect(capturedUrl).toBe( + "https://myres.openai.azure.com/openai/deployments/gpt-5.4-mini/chat/completions?api-version=2025-04-01-preview", + ); + }); + + it("uses max_completion_tokens for GPT-5 deployments", async () => { + let capturedBody: Record = {}; + vi.spyOn(globalThis, "fetch").mockImplementation( + async (_url: string | URL | Request, init?: RequestInit) => { + capturedBody = JSON.parse(String(init?.body)); + return new Response( + JSON.stringify({ choices: [{ message: { content: "summary" } }] }), + { status: 200 }, + ); + }, + ); + + const provider = new OpenAIProvider( + "azure-key", + "gpt-5.4-mini", + 256, + "https://myres.openai.azure.com", + ); + await provider.summarize("system", "user"); + + expect(capturedBody.max_completion_tokens).toBe(256); + expect(capturedBody.max_tokens).toBeUndefined(); + }); +});