From fdd8cb8031542014423c85c4212eca47be5e8d8f Mon Sep 17 00:00:00 2001 From: Ross Story Date: Tue, 19 May 2026 21:25:53 -0700 Subject: [PATCH 1/3] Add Azure OpenAI provider configuration Adds Azure OpenAI API-key configuration for LLM compression and summarization, including endpoint/deployment detection, OpenAI provider key precedence, docs, onboarding, doctor hints, and focused tests. Documents that Microsoft Entra ID / DefaultAzureCredential auth is not implemented yet; this PR supports Azure OpenAI API-key auth only. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .env.example | 12 +++-- README.md | 13 ++++- src/cli.ts | 4 +- src/cli/doctor-diagnostics.ts | 3 +- src/cli/onboarding.ts | 6 +++ src/config.ts | 49 +++++++++++++++++-- src/functions/summarize.ts | 2 +- src/providers/index.ts | 10 +++- src/providers/openai.ts | 6 ++- test/config-azure-openai.test.ts | 81 ++++++++++++++++++++++++++++++++ test/openai-shared.test.ts | 38 +++++++++++++++ 11 files changed, 210 insertions(+), 14 deletions(-) create mode 100644 test/config-azure-openai.test.ts diff --git a/.env.example b/.env.example index 77ca0f3a..c600bad6 100644 --- a/.env.example +++ b/.env.example @@ -23,10 +23,16 @@ # Without a provider key, agentmemory runs in noop mode: observations are # indexed via zero-LLM synthetic compression, hybrid search still works, # but LLM-backed summarisation / reflection / consolidation are disabled. -# The detection order is OPENAI_API_KEY → MINIMAX_API_KEY → ANTHROPIC_API_KEY -# → GEMINI_API_KEY → OPENROUTER_API_KEY → noop. +# The detection order is AZURE_OPENAI_API_KEY → OPENAI_API_KEY → MINIMAX_API_KEY +# → ANTHROPIC_API_KEY → GEMINI_API_KEY → OPENROUTER_API_KEY → noop. -# OPENAI_API_KEY=sk-... # Used for OpenAI-compatible embeddings today. PR #307 will extend this to chat completions (DeepSeek, SiliconFlow, vLLM, LM Studio, Ollama via `/v1`). +# AZURE_OPENAI_API_KEY=... # Azure OpenAI API-key auth for LLM compression/summarization +# AZURE_OPENAI_ENDPOINT=https://.openai.azure.com +# AZURE_OPENAI_DEPLOYMENT=gpt-5.4-mini # Azure deployment name passed as the model +# AZURE_OPENAI_API_VERSION=2025-04-01-preview # Optional; only needed for legacy deployment URLs +# AZURE_OPENAI_BASE_URL=https://.openai.azure.com/openai/deployments/ + +# OPENAI_API_KEY=sk-... # OpenAI-compatible LLM and embeddings key # OPENAI_BASE_URL=https://api.openai.com # Override for OpenAI-compatible providers # ANTHROPIC_API_KEY=sk-ant-... diff --git a/README.md b/README.md index cfa87bc4..285e395d 100644 --- a/README.md +++ b/README.md @@ -1042,6 +1042,7 @@ agentmemory auto-detects from your environment. No API key needed if you have a |----------|--------|-------| | **No-op (default)** | No config needed | LLM-backed compress/summarize is DISABLED. Synthetic BM25 compression + recall still work. See `AGENTMEMORY_ALLOW_AGENT_SDK` below if you used to rely on the Claude-subscription fallback. | | Anthropic API | `ANTHROPIC_API_KEY` | Per-token billing | +| Azure OpenAI | `AZURE_OPENAI_API_KEY` + `AZURE_OPENAI_ENDPOINT` + `AZURE_OPENAI_DEPLOYMENT` | Uses your Azure OpenAI deployment for compression/summarization. API-key auth only; Microsoft Entra ID / `DefaultAzureCredential` is not wired yet. | | MiniMax | `MINIMAX_API_KEY` | Anthropic-compatible | | Gemini | `GEMINI_API_KEY` | Also enables embeddings | | OpenRouter | `OPENROUTER_API_KEY` | Any model | @@ -1055,6 +1056,14 @@ Create `~/.agentmemory/.env`: # LLM provider (pick one — default is the no-op provider: no LLM calls) # ANTHROPIC_API_KEY=sk-ant-... # ANTHROPIC_BASE_URL=... # Optional: Anthropic-compatible proxy / Azure +# AZURE_OPENAI_API_KEY=... # API-key auth only +# AZURE_OPENAI_ENDPOINT=https://.openai.azure.com +# AZURE_OPENAI_DEPLOYMENT=gpt-5.4-mini # Azure deployment name passed as the model +# AZURE_OPENAI_API_VERSION=2025-04-01-preview +# # Optional: only needed for legacy deployment URLs +# AZURE_OPENAI_BASE_URL=https://.openai.azure.com/openai/deployments/ +# # Optional: legacy deployment URL. If set, deployment +# # can be parsed from the URL. # GEMINI_API_KEY=... # OPENROUTER_API_KEY=... # MINIMAX_API_KEY=... @@ -1063,7 +1072,9 @@ Create `~/.agentmemory/.env`: # # embedding provider (further below). Set # # OPENAI_API_KEY_FOR_LLM=false to scope it # # to embeddings only. -# OPENAI_BASE_URL=https://api.openai.com # Optional: override for Azure / vLLM / LM Studio / proxies +# OPENAI_BASE_URL=https://api.openai.com # Optional: override for vLLM / LM Studio / proxies. +# # Prefer AZURE_OPENAI_* above for Azure OpenAI. +# # Legacy Azure deployments can still use this shape: # # Azure: https://.openai.azure.com/openai/deployments/ # # Auto-detected from `.openai.azure.com` hostname; uses # # api-key header + api-version query param. diff --git a/src/cli.ts b/src/cli.ts index 27885a95..c77ae1ac 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -1400,7 +1400,7 @@ async function passiveServerChecks(): Promise { { name: "LLM provider", ok: hasLlm, - hint: hasLlm ? undefined : "set ANTHROPIC_API_KEY (or GEMINI/OPENROUTER/MINIMAX) in ~/.agentmemory/.env", + hint: hasLlm ? undefined : "set ANTHROPIC_API_KEY (or AZURE_OPENAI/GEMINI/OPENROUTER/MINIMAX) in ~/.agentmemory/.env", }, { name: "Embedding provider", @@ -1835,7 +1835,7 @@ async function runInit() { "All keys are commented out by default. Uncomment the ones you want.", "", "Common next steps:", - " 1. Pick an LLM provider key (ANTHROPIC_API_KEY / OPENAI_API_KEY / GEMINI_API_KEY / etc.)", + " 1. Pick an LLM provider key (ANTHROPIC_API_KEY / AZURE_OPENAI_API_KEY / OPENAI_API_KEY / etc.)", " 2. Run `npx @agentmemory/agentmemory doctor` to verify the daemon sees them", " 3. Run `npx @agentmemory/agentmemory` to start the worker", ].join("\n"), diff --git a/src/cli/doctor-diagnostics.ts b/src/cli/doctor-diagnostics.ts index c1dee4f4..d088c2cc 100644 --- a/src/cli/doctor-diagnostics.ts +++ b/src/cli/doctor-diagnostics.ts @@ -88,6 +88,7 @@ const PLACEHOLDER_VALUES = new Set([ const PROVIDER_KEY_NAMES = [ "ANTHROPIC_API_KEY", + "AZURE_OPENAI_API_KEY", "OPENAI_API_KEY", "GEMINI_API_KEY", "GOOGLE_API_KEY", @@ -197,7 +198,7 @@ export function buildDiagnostics(effects: DoctorEffects): Diagnostic[] { message: "No LLM provider API key found in ~/.agentmemory/.env.", fixPreview: "Open ~/.agentmemory/.env in $EDITOR and paste your key, then re-check.", moreInfo: - "Set at least one of: ANTHROPIC_API_KEY, OPENAI_API_KEY, GEMINI_API_KEY, " + + "Set at least one of: ANTHROPIC_API_KEY, AZURE_OPENAI_API_KEY, OPENAI_API_KEY, GEMINI_API_KEY, " + "OPENROUTER_API_KEY, MINIMAX_API_KEY. The daemon picks the first that resolves " + "to a real (non-placeholder) value at startup.", check: async () => { diff --git a/src/cli/onboarding.ts b/src/cli/onboarding.ts index 48bbf4cc..fc6f8e4e 100644 --- a/src/cli/onboarding.ts +++ b/src/cli/onboarding.ts @@ -60,6 +60,7 @@ const MCP_AGENTS: { value: string; label: string; glyph: string }[] = [ const PROVIDERS: { value: string; label: string; envKey: string | null }[] = [ { value: "anthropic", label: "Anthropic — claude", envKey: "ANTHROPIC_API_KEY" }, + { value: "azure-openai", label: "Azure OpenAI — deployment", envKey: "AZURE_OPENAI_API_KEY" }, { value: "openai", label: "OpenAI — gpt", envKey: "OPENAI_API_KEY" }, { value: "gemini", label: "Google — gemini", envKey: "GEMINI_API_KEY" }, { value: "openrouter", label: "OpenRouter — multi-model", envKey: "OPENROUTER_API_KEY" }, @@ -230,6 +231,11 @@ export async function runOnboarding(): Promise { const envKey = PROVIDERS.find((x) => x.value === provider)?.envKey; if (envKey) { lines.push(` Uncomment ${envKey}= in that file to enable ${provider}.`); + if (provider === "azure-openai") { + lines.push( + " Also set AZURE_OPENAI_ENDPOINT= and AZURE_OPENAI_DEPLOYMENT= for your deployment.", + ); + } } } else { lines.push(" No provider chosen — agentmemory will run in BM25-only mode."); diff --git a/src/config.ts b/src/config.ts index eed5725e..02305eea 100644 --- a/src/config.ts +++ b/src/config.ts @@ -47,9 +47,46 @@ function hasRealValue(v: string | undefined): v is string { return typeof v === "string" && v.trim().length > 0; } -function detectProvider(env: Record): ProviderConfig { +function normalizeAzureEndpoint(endpoint: string): string { + return endpoint.replace(/\/+$/, ""); +} + +function deploymentFromAzureBaseUrl(baseUrl: string | undefined): string | undefined { + if (!hasRealValue(baseUrl)) return undefined; + try { + const parsed = new URL(baseUrl); + const match = parsed.pathname.match(/\/openai\/deployments\/([^/]+)/); + return match ? decodeURIComponent(match[1]) : undefined; + } catch { + return undefined; + } +} + +export function detectProviderForEnv(env: Record): ProviderConfig { const maxTokens = parseInt(env["MAX_TOKENS"] || "4096", 10); + const azureOpenAiKey = env["AZURE_OPENAI_API_KEY"]; + const azureOpenAiEndpoint = env["AZURE_OPENAI_ENDPOINT"]; + const azureOpenAiBaseUrl = env["AZURE_OPENAI_BASE_URL"]; + const azureOpenAiDeployment = + env["AZURE_OPENAI_DEPLOYMENT"] || + env["AZURE_OPENAI_MODEL"] || + deploymentFromAzureBaseUrl(azureOpenAiBaseUrl); + if ( + hasRealValue(azureOpenAiKey) && + (hasRealValue(azureOpenAiEndpoint) || hasRealValue(azureOpenAiBaseUrl)) && + hasRealValue(azureOpenAiDeployment) + ) { + return { + provider: "openai", + model: azureOpenAiDeployment, + maxTokens, + baseURL: hasRealValue(azureOpenAiBaseUrl) + ? azureOpenAiBaseUrl + : normalizeAzureEndpoint(azureOpenAiEndpoint), + }; + } + // OpenAI-compatible: supports OpenAI, DeepSeek, SiliconFlow, Azure, vLLM, LM Studio if (hasRealValue(env["OPENAI_API_KEY"]) && env["OPENAI_API_KEY_FOR_LLM"] !== "false") { return { @@ -102,7 +139,7 @@ function detectProvider(env: Record): ProviderConfig { if (!allowAgentSdk) { process.stderr.write( "[agentmemory] No LLM provider key found " + - "(ANTHROPIC_API_KEY, GEMINI_API_KEY, OPENROUTER_API_KEY, MINIMAX_API_KEY, OPENAI_API_KEY). " + + "(ANTHROPIC_API_KEY, AZURE_OPENAI_API_KEY, GEMINI_API_KEY, OPENROUTER_API_KEY, MINIMAX_API_KEY, OPENAI_API_KEY). " + "LLM-backed compression and summarization are DISABLED — using no-op provider. " + "This is the safe default: the agent-sdk fallback used to spawn Claude Agent SDK " + "child sessions which inherit Claude Code's plugin hooks and cause infinite Stop-hook " + @@ -134,7 +171,7 @@ function detectProvider(env: Record): ProviderConfig { export function loadConfig(): AgentMemoryConfig { const env = getMergedEnv(); - const provider = detectProvider(env); + const provider = detectProviderForEnv(env); return { engineUrl: env["III_ENGINE_URL"] || "ws://localhost:49134", @@ -171,6 +208,12 @@ export function detectLlmProviderKind(): "llm" | "noop" { hasRealValue(env["GOOGLE_API_KEY"]) || hasRealValue(env["OPENROUTER_API_KEY"]) || hasRealValue(env["MINIMAX_API_KEY"]) || + (hasRealValue(env["AZURE_OPENAI_API_KEY"]) && + (hasRealValue(env["AZURE_OPENAI_ENDPOINT"]) || + hasRealValue(env["AZURE_OPENAI_BASE_URL"])) && + (hasRealValue(env["AZURE_OPENAI_DEPLOYMENT"]) || + hasRealValue(env["AZURE_OPENAI_MODEL"]) || + hasRealValue(deploymentFromAzureBaseUrl(env["AZURE_OPENAI_BASE_URL"])))) || (hasRealValue(env["OPENAI_API_KEY"]) && env["OPENAI_API_KEY_FOR_LLM"] !== "false") ) { diff --git a/src/functions/summarize.ts b/src/functions/summarize.ts index 80b29a09..1fdb6d7a 100644 --- a/src/functions/summarize.ts +++ b/src/functions/summarize.ts @@ -248,7 +248,7 @@ export function registerSummarizeFunction( success: false, error: "no_provider", reason: - "No LLM provider key set; Summarize is a no-op. Set ANTHROPIC_API_KEY (or GEMINI/OPENROUTER/MINIMAX) in ~/.agentmemory/.env to enable.", + "No LLM provider key set; Summarize is a no-op. Set ANTHROPIC_API_KEY (or AZURE_OPENAI/GEMINI/OPENROUTER/MINIMAX) in ~/.agentmemory/.env to enable.", }; } diff --git a/src/providers/index.ts b/src/providers/index.ts index 5de6807c..9f810357 100644 --- a/src/providers/index.ts +++ b/src/providers/index.ts @@ -9,6 +9,7 @@ import { MinimaxProvider } from "./minimax.js"; import { NoopProvider } from "./noop.js"; import { OpenAIProvider } from "./openai.js"; import { OpenRouterProvider } from "./openrouter.js"; +import { detectAzure } from "./_openai-shared.js"; import { ResilientProvider } from "./resilient.js"; import { FallbackChainProvider } from "./fallback-chain.js"; import { getEnvVar } from "../config.js"; @@ -96,10 +97,15 @@ function createBaseProvider(config: ProviderConfig): MemoryProvider { "https://openrouter.ai/api/v1/chat/completions", ); case "openai": { - const openaiKey = getEnvVar("OPENAI_API_KEY"); + const azureKey = getEnvVar("AZURE_OPENAI_API_KEY"); + const standardKey = getEnvVar("OPENAI_API_KEY"); + const useAzureKey = config.baseURL ? detectAzure(config.baseURL) : false; + const openaiKey = useAzureKey + ? azureKey || standardKey + : standardKey || azureKey; if (!openaiKey) { throw new Error( - "OPENAI_API_KEY is required for the openai provider", + "OPENAI_API_KEY or AZURE_OPENAI_API_KEY is required for the openai provider", ); } return new OpenAIProvider( diff --git a/src/providers/openai.ts b/src/providers/openai.ts index bca2370f..0f718ff8 100644 --- a/src/providers/openai.ts +++ b/src/providers/openai.ts @@ -25,12 +25,14 @@ const DEFAULT_TIMEOUT_MS = 60_000; * * Required env vars: * OPENAI_API_KEY — API key + * AZURE_OPENAI_API_KEY — Azure OpenAI API key alias for chat completions * * Optional: * OPENAI_BASE_URL — base URL without path (default: https://api.openai.com). * Azure: https://.openai.azure.com/openai/deployments/ * OPENAI_MODEL — model name (default: gpt-4o-mini) * OPENAI_API_VERSION — Azure api-version query param (default: 2024-08-01-preview) + * AZURE_OPENAI_API_VERSION — Azure-specific alias for OPENAI_API_VERSION * OPENAI_TIMEOUT_MS — outbound fetch timeout in ms (OpenAI-scoped alias, * takes precedence over AGENTMEMORY_LLM_TIMEOUT_MS * for back-compat with the v0.9.17 shipping name). @@ -63,7 +65,9 @@ export class OpenAIProvider implements MemoryProvider { this.reasoningEffort = getEnvVar("OPENAI_REASONING_EFFORT") || undefined; this.timeoutMs = resolveTimeout(); this.azureApiVersion = - getEnvVar("OPENAI_API_VERSION") || DEFAULT_AZURE_API_VERSION; + getEnvVar("AZURE_OPENAI_API_VERSION") || + getEnvVar("OPENAI_API_VERSION") || + DEFAULT_AZURE_API_VERSION; this.isAzure = detectAzure(this.baseUrl); } diff --git a/test/config-azure-openai.test.ts b/test/config-azure-openai.test.ts new file mode 100644 index 00000000..7b4d0706 --- /dev/null +++ b/test/config-azure-openai.test.ts @@ -0,0 +1,81 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; + +import { detectProviderForEnv } from "../src/config.js"; +import { createProvider } from "../src/providers/index.js"; + +const ORIGINAL_OPENAI_KEY = process.env["OPENAI_API_KEY"]; +const ORIGINAL_AZURE_KEY = process.env["AZURE_OPENAI_API_KEY"]; + +describe("Azure OpenAI config detection", () => { + afterEach(() => { + if (ORIGINAL_OPENAI_KEY === undefined) delete process.env["OPENAI_API_KEY"]; + else process.env["OPENAI_API_KEY"] = ORIGINAL_OPENAI_KEY; + if (ORIGINAL_AZURE_KEY === undefined) delete process.env["AZURE_OPENAI_API_KEY"]; + else process.env["AZURE_OPENAI_API_KEY"] = ORIGINAL_AZURE_KEY; + vi.restoreAllMocks(); + }); + + it("uses Azure OpenAI endpoint and deployment as the OpenAI-compatible LLM provider", () => { + const provider = detectProviderForEnv({ + AZURE_OPENAI_API_KEY: "azure-key", + AZURE_OPENAI_ENDPOINT: "https://agentmemory.openai.azure.com/", + AZURE_OPENAI_DEPLOYMENT: "gpt-5.4-mini", + }); + + expect(provider).toEqual({ + provider: "openai", + model: "gpt-5.4-mini", + maxTokens: 4096, + baseURL: "https://agentmemory.openai.azure.com", + }); + }); + + it("supports a legacy Azure deployment base URL", () => { + const provider = detectProviderForEnv({ + AZURE_OPENAI_API_KEY: "azure-key", + AZURE_OPENAI_BASE_URL: + "https://agentmemory.openai.azure.com/openai/deployments/gpt-5.4-nano", + }); + + expect(provider.provider).toBe("openai"); + expect(provider.model).toBe("gpt-5.4-nano"); + expect(provider.baseURL).toBe( + "https://agentmemory.openai.azure.com/openai/deployments/gpt-5.4-nano", + ); + }); + + it("does not enable Azure OpenAI without a deployment", () => { + const provider = detectProviderForEnv({ + AZURE_OPENAI_API_KEY: "azure-key", + AZURE_OPENAI_ENDPOINT: "https://agentmemory.openai.azure.com", + }); + + expect(provider.provider).toBe("noop"); + }); + + it("uses the Azure key for Azure requests even when OPENAI_API_KEY is also set", async () => { + process.env["OPENAI_API_KEY"] = "public-openai-key"; + process.env["AZURE_OPENAI_API_KEY"] = "azure-openai-key"; + const config = detectProviderForEnv({ + OPENAI_API_KEY: "public-openai-key", + AZURE_OPENAI_API_KEY: "azure-openai-key", + AZURE_OPENAI_ENDPOINT: "https://agentmemory.openai.azure.com", + AZURE_OPENAI_DEPLOYMENT: "gpt-5.4-mini", + }); + let capturedHeaders = new Headers(); + vi.spyOn(globalThis, "fetch").mockImplementation( + async (_url: string | URL | Request, init?: RequestInit) => { + capturedHeaders = new Headers(init?.headers); + return new Response( + JSON.stringify({ choices: [{ message: { content: "summary" } }] }), + { status: 200 }, + ); + }, + ); + + await createProvider(config).summarize("system", "user"); + + expect(capturedHeaders.get("api-key")).toBe("azure-openai-key"); + expect(capturedHeaders.get("Authorization")).toBeNull(); + }); +}); diff --git a/test/openai-shared.test.ts b/test/openai-shared.test.ts index 7240e10c..93a4e18d 100644 --- a/test/openai-shared.test.ts +++ b/test/openai-shared.test.ts @@ -6,6 +6,7 @@ import { detectAzure, normalizeBaseUrl, } from "../src/providers/_openai-shared.js"; +import { OpenAIProvider } from "../src/providers/openai.js"; import { OpenAIEmbeddingProvider } from "../src/providers/embedding/openai.js"; describe("_openai-shared — detectAzure", () => { @@ -259,3 +260,40 @@ describe("OpenAIEmbeddingProvider — Azure auto-detection (#371)", () => { expect(capturedHeaders.get("api-key")).toBeNull(); }); }); + +describe("OpenAIProvider — Azure OpenAI aliases", () => { + const ORIGINAL_AZURE_VERSION = process.env["AZURE_OPENAI_API_VERSION"]; + + afterEach(() => { + if (ORIGINAL_AZURE_VERSION === undefined) + delete process.env["AZURE_OPENAI_API_VERSION"]; + else process.env["AZURE_OPENAI_API_VERSION"] = ORIGINAL_AZURE_VERSION; + vi.restoreAllMocks(); + }); + + it("honors AZURE_OPENAI_API_VERSION for legacy Azure chat completions", async () => { + process.env["AZURE_OPENAI_API_VERSION"] = "2025-04-01-preview"; + let capturedUrl = ""; + vi.spyOn(globalThis, "fetch").mockImplementation( + async (url: string | URL | Request) => { + capturedUrl = String(url); + return new Response( + JSON.stringify({ choices: [{ message: { content: "summary" } }] }), + { status: 200 }, + ); + }, + ); + + const provider = new OpenAIProvider( + "azure-key", + "gpt-5.4-mini", + 256, + "https://myres.openai.azure.com/openai/deployments/gpt-5.4-mini", + ); + await provider.summarize("system", "user"); + + expect(capturedUrl).toBe( + "https://myres.openai.azure.com/openai/deployments/gpt-5.4-mini/chat/completions?api-version=2025-04-01-preview", + ); + }); +}); From 18763de1a7571e50ee2ebd51ee2150c580677cb6 Mon Sep 17 00:00:00 2001 From: Ross Story Date: Tue, 19 May 2026 21:49:21 -0700 Subject: [PATCH 2/3] Support GPT-5 token limits for OpenAI-compatible chat Uses max_completion_tokens for GPT-5 deployments while preserving max_tokens for existing models. This is required for Azure OpenAI gpt-5.4-mini deployments. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/providers/openai.ts | 6 +++++- test/openai-shared.test.ts | 24 ++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/providers/openai.ts b/src/providers/openai.ts index 0f718ff8..bf2b5170 100644 --- a/src/providers/openai.ts +++ b/src/providers/openai.ts @@ -83,12 +83,13 @@ export class OpenAIProvider implements MemoryProvider { const url = buildChatUrl(this.baseUrl, this.isAzure, this.azureApiVersion); const body: Record = { model: this.model, - max_tokens: this.maxTokens, messages: [ { role: "system", content: systemPrompt }, { role: "user", content: userPrompt }, ], }; + body[usesMaxCompletionTokens(this.model) ? "max_completion_tokens" : "max_tokens"] = + this.maxTokens; if (this.reasoningEffort) { body.reasoning_effort = this.reasoningEffort; } @@ -173,3 +174,6 @@ function parsePositiveInt(raw: string | null | undefined): number | undefined { return Number.isFinite(n) && n > 0 ? n : undefined; } +function usesMaxCompletionTokens(model: string): boolean { + return /^gpt-5(?:[.-]|$)/i.test(model); +} diff --git a/test/openai-shared.test.ts b/test/openai-shared.test.ts index 93a4e18d..ec516ad2 100644 --- a/test/openai-shared.test.ts +++ b/test/openai-shared.test.ts @@ -296,4 +296,28 @@ describe("OpenAIProvider — Azure OpenAI aliases", () => { "https://myres.openai.azure.com/openai/deployments/gpt-5.4-mini/chat/completions?api-version=2025-04-01-preview", ); }); + + it("uses max_completion_tokens for GPT-5 deployments", async () => { + let capturedBody: Record = {}; + vi.spyOn(globalThis, "fetch").mockImplementation( + async (_url: string | URL | Request, init?: RequestInit) => { + capturedBody = JSON.parse(String(init?.body)); + return new Response( + JSON.stringify({ choices: [{ message: { content: "summary" } }] }), + { status: 200 }, + ); + }, + ); + + const provider = new OpenAIProvider( + "azure-key", + "gpt-5.4-mini", + 256, + "https://myres.openai.azure.com", + ); + await provider.summarize("system", "user"); + + expect(capturedBody.max_completion_tokens).toBe(256); + expect(capturedBody.max_tokens).toBeUndefined(); + }); }); From a5dbfb6c6f76b9104579138587257ac704a0ecf4 Mon Sep 17 00:00:00 2001 From: Ross Story Date: Wed, 20 May 2026 13:26:36 -0700 Subject: [PATCH 3/3] Address Azure OpenAI review comments Spells out concrete provider env vars in runtime hints and selects AZURE_OPENAI_API_KEY when OPENAI_BASE_URL is Azure-shaped but provided through the environment. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/cli.ts | 4 +++- src/functions/summarize.ts | 2 +- src/providers/index.ts | 3 ++- test/config-azure-openai.test.ts | 28 ++++++++++++++++++++++++++++ 4 files changed, 34 insertions(+), 3 deletions(-) diff --git a/src/cli.ts b/src/cli.ts index c77ae1ac..1486c132 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -1400,7 +1400,9 @@ async function passiveServerChecks(): Promise { { name: "LLM provider", ok: hasLlm, - hint: hasLlm ? undefined : "set ANTHROPIC_API_KEY (or AZURE_OPENAI/GEMINI/OPENROUTER/MINIMAX) in ~/.agentmemory/.env", + hint: hasLlm + ? undefined + : "set ANTHROPIC_API_KEY, AZURE_OPENAI_API_KEY + AZURE_OPENAI_ENDPOINT + AZURE_OPENAI_DEPLOYMENT, OPENAI_API_KEY, GEMINI_API_KEY, OPENROUTER_API_KEY, or MINIMAX_API_KEY in ~/.agentmemory/.env", }, { name: "Embedding provider", diff --git a/src/functions/summarize.ts b/src/functions/summarize.ts index 1fdb6d7a..74b7f0ba 100644 --- a/src/functions/summarize.ts +++ b/src/functions/summarize.ts @@ -248,7 +248,7 @@ export function registerSummarizeFunction( success: false, error: "no_provider", reason: - "No LLM provider key set; Summarize is a no-op. Set ANTHROPIC_API_KEY (or AZURE_OPENAI/GEMINI/OPENROUTER/MINIMAX) in ~/.agentmemory/.env to enable.", + "No LLM provider key set; Summarize is a no-op. Set ANTHROPIC_API_KEY, AZURE_OPENAI_API_KEY + AZURE_OPENAI_ENDPOINT + AZURE_OPENAI_DEPLOYMENT, OPENAI_API_KEY, GEMINI_API_KEY, OPENROUTER_API_KEY, or MINIMAX_API_KEY in ~/.agentmemory/.env to enable.", }; } diff --git a/src/providers/index.ts b/src/providers/index.ts index 9f810357..51611063 100644 --- a/src/providers/index.ts +++ b/src/providers/index.ts @@ -99,7 +99,8 @@ function createBaseProvider(config: ProviderConfig): MemoryProvider { case "openai": { const azureKey = getEnvVar("AZURE_OPENAI_API_KEY"); const standardKey = getEnvVar("OPENAI_API_KEY"); - const useAzureKey = config.baseURL ? detectAzure(config.baseURL) : false; + const effectiveBaseURL = config.baseURL || getEnvVar("OPENAI_BASE_URL"); + const useAzureKey = effectiveBaseURL ? detectAzure(effectiveBaseURL) : false; const openaiKey = useAzureKey ? azureKey || standardKey : standardKey || azureKey; diff --git a/test/config-azure-openai.test.ts b/test/config-azure-openai.test.ts index 7b4d0706..5496b5e3 100644 --- a/test/config-azure-openai.test.ts +++ b/test/config-azure-openai.test.ts @@ -5,6 +5,7 @@ import { createProvider } from "../src/providers/index.js"; const ORIGINAL_OPENAI_KEY = process.env["OPENAI_API_KEY"]; const ORIGINAL_AZURE_KEY = process.env["AZURE_OPENAI_API_KEY"]; +const ORIGINAL_OPENAI_BASE = process.env["OPENAI_BASE_URL"]; describe("Azure OpenAI config detection", () => { afterEach(() => { @@ -12,6 +13,8 @@ describe("Azure OpenAI config detection", () => { else process.env["OPENAI_API_KEY"] = ORIGINAL_OPENAI_KEY; if (ORIGINAL_AZURE_KEY === undefined) delete process.env["AZURE_OPENAI_API_KEY"]; else process.env["AZURE_OPENAI_API_KEY"] = ORIGINAL_AZURE_KEY; + if (ORIGINAL_OPENAI_BASE === undefined) delete process.env["OPENAI_BASE_URL"]; + else process.env["OPENAI_BASE_URL"] = ORIGINAL_OPENAI_BASE; vi.restoreAllMocks(); }); @@ -78,4 +81,29 @@ describe("Azure OpenAI config detection", () => { expect(capturedHeaders.get("api-key")).toBe("azure-openai-key"); expect(capturedHeaders.get("Authorization")).toBeNull(); }); + + it("uses the Azure key when OPENAI_BASE_URL points to Azure", async () => { + process.env["OPENAI_API_KEY"] = "public-openai-key"; + process.env["AZURE_OPENAI_API_KEY"] = "azure-openai-key"; + process.env["OPENAI_BASE_URL"] = "https://agentmemory.openai.azure.com"; + let capturedHeaders = new Headers(); + vi.spyOn(globalThis, "fetch").mockImplementation( + async (_url: string | URL | Request, init?: RequestInit) => { + capturedHeaders = new Headers(init?.headers); + return new Response( + JSON.stringify({ choices: [{ message: { content: "summary" } }] }), + { status: 200 }, + ); + }, + ); + + await createProvider({ + provider: "openai", + model: "gpt-5.4-mini", + maxTokens: 256, + }).summarize("system", "user"); + + expect(capturedHeaders.get("api-key")).toBe("azure-openai-key"); + expect(capturedHeaders.get("Authorization")).toBeNull(); + }); });