From fdd8cb8031542014423c85c4212eca47be5e8d8f Mon Sep 17 00:00:00 2001
From: Ross Story <rostory@microsoft.com>
Date: Tue, 19 May 2026 21:25:53 -0700
Subject: [PATCH 1/3] Add Azure OpenAI provider configuration

Adds Azure OpenAI API-key configuration for LLM compression and summarization, including endpoint/deployment detection, OpenAI provider key precedence, docs, onboarding, doctor hints, and focused tests.

Documents that Microsoft Entra ID / DefaultAzureCredential auth is not implemented yet; this PR supports Azure OpenAI API-key auth only.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .env.example                     | 12 +++--
 README.md                        | 13 ++++-
 src/cli.ts                       |  4 +-
 src/cli/doctor-diagnostics.ts    |  3 +-
 src/cli/onboarding.ts            |  6 +++
 src/config.ts                    | 49 +++++++++++++++++--
 src/functions/summarize.ts       |  2 +-
 src/providers/index.ts           | 10 +++-
 src/providers/openai.ts          |  6 ++-
 test/config-azure-openai.test.ts | 81 ++++++++++++++++++++++++++++++++
 test/openai-shared.test.ts       | 38 +++++++++++++++
 11 files changed, 210 insertions(+), 14 deletions(-)
 create mode 100644 test/config-azure-openai.test.ts
diff --git a/.env.example b/.env.example
index 77ca0f3a..c600bad6 100644
--- a/.env.example
+++ b/.env.example
@@ -23,10 +23,16 @@
 # Without a provider key, agentmemory runs in noop mode: observations are
 # indexed via zero-LLM synthetic compression, hybrid search still works,
 # but LLM-backed summarisation / reflection / consolidation are disabled.
-# The detection order is OPENAI_API_KEY → MINIMAX_API_KEY → ANTHROPIC_API_KEY
-# → GEMINI_API_KEY → OPENROUTER_API_KEY → noop.
+# The detection order is AZURE_OPENAI_API_KEY → OPENAI_API_KEY → MINIMAX_API_KEY
+# → ANTHROPIC_API_KEY → GEMINI_API_KEY → OPENROUTER_API_KEY → noop.
 
-# OPENAI_API_KEY=sk-...                          # Used for OpenAI-compatible embeddings today. PR #307 will extend this to chat completions (DeepSeek, SiliconFlow, vLLM, LM Studio, Ollama via `/v1`).
+# AZURE_OPENAI_API_KEY=...                       # Azure OpenAI API-key auth for LLM compression/summarization
+# AZURE_OPENAI_ENDPOINT=https://<resource>.openai.azure.com
+# AZURE_OPENAI_DEPLOYMENT=gpt-5.4-mini           # Azure deployment name passed as the model
+# AZURE_OPENAI_API_VERSION=2025-04-01-preview    # Optional; only needed for legacy deployment URLs
+# AZURE_OPENAI_BASE_URL=https://<resource>.openai.azure.com/openai/deployments/<deployment>
+
+# OPENAI_API_KEY=sk-...                          # OpenAI-compatible LLM and embeddings key
 # OPENAI_BASE_URL=https://api.openai.com         # Override for OpenAI-compatible providers
 
 # ANTHROPIC_API_KEY=sk-ant-...
diff --git a/README.md b/README.md
index cfa87bc4..285e395d 100644
--- a/README.md
+++ b/README.md
@@ -1042,6 +1042,7 @@ agentmemory auto-detects from your environment. No API key needed if you have a
 |----------|--------|-------|
 | **No-op (default)** | No config needed | LLM-backed compress/summarize is DISABLED. Synthetic BM25 compression + recall still work. See `AGENTMEMORY_ALLOW_AGENT_SDK` below if you used to rely on the Claude-subscription fallback. |
 | Anthropic API | `ANTHROPIC_API_KEY` | Per-token billing |
+| Azure OpenAI | `AZURE_OPENAI_API_KEY` + `AZURE_OPENAI_ENDPOINT` + `AZURE_OPENAI_DEPLOYMENT` | Uses your Azure OpenAI deployment for compression/summarization. API-key auth only; Microsoft Entra ID / `DefaultAzureCredential` is not wired yet. |
 | MiniMax | `MINIMAX_API_KEY` | Anthropic-compatible |
 | Gemini | `GEMINI_API_KEY` | Also enables embeddings |
 | OpenRouter | `OPENROUTER_API_KEY` | Any model |
@@ -1055,6 +1056,14 @@ Create `~/.agentmemory/.env`:
 # LLM provider (pick one — default is the no-op provider: no LLM calls)
 # ANTHROPIC_API_KEY=sk-ant-...
 # ANTHROPIC_BASE_URL=...              # Optional: Anthropic-compatible proxy / Azure
+# AZURE_OPENAI_API_KEY=...            # API-key auth only
+# AZURE_OPENAI_ENDPOINT=https://<resource>.openai.azure.com
+# AZURE_OPENAI_DEPLOYMENT=gpt-5.4-mini # Azure deployment name passed as the model
+# AZURE_OPENAI_API_VERSION=2025-04-01-preview
+#                                      # Optional: only needed for legacy deployment URLs
+# AZURE_OPENAI_BASE_URL=https://<resource>.openai.azure.com/openai/deployments/<deployment>
+#                                      # Optional: legacy deployment URL. If set, deployment
+#                                      # can be parsed from the URL.
 # GEMINI_API_KEY=...
 # OPENROUTER_API_KEY=...
 # MINIMAX_API_KEY=...
@@ -1063,7 +1072,9 @@ Create `~/.agentmemory/.env`:
 #                                          # embedding provider (further below). Set
 #                                          # OPENAI_API_KEY_FOR_LLM=false to scope it
 #                                          # to embeddings only.
-# OPENAI_BASE_URL=https://api.openai.com   # Optional: override for Azure / vLLM / LM Studio / proxies
+# OPENAI_BASE_URL=https://api.openai.com   # Optional: override for vLLM / LM Studio / proxies.
+#                                          # Prefer AZURE_OPENAI_* above for Azure OpenAI.
+#                                          # Legacy Azure deployments can still use this shape:
 #                                          # Azure: https://<resource>.openai.azure.com/openai/deployments/<deployment>
 #                                          # Auto-detected from `.openai.azure.com` hostname; uses
 #                                          # api-key header + api-version query param.
diff --git a/src/cli.ts b/src/cli.ts
index 27885a95..c77ae1ac 100644
--- a/src/cli.ts
+++ b/src/cli.ts
@@ -1400,7 +1400,7 @@ async function passiveServerChecks(): Promise<DoctorCheck[]> {
     {
       name: "LLM provider",
       ok: hasLlm,
-      hint: hasLlm ? undefined : "set ANTHROPIC_API_KEY (or GEMINI/OPENROUTER/MINIMAX) in ~/.agentmemory/.env",
+      hint: hasLlm ? undefined : "set ANTHROPIC_API_KEY (or AZURE_OPENAI/GEMINI/OPENROUTER/MINIMAX) in ~/.agentmemory/.env",
     },
     {
       name: "Embedding provider",
@@ -1835,7 +1835,7 @@ async function runInit() {
       "All keys are commented out by default. Uncomment the ones you want.",
       "",
       "Common next steps:",
-      "  1. Pick an LLM provider key (ANTHROPIC_API_KEY / OPENAI_API_KEY / GEMINI_API_KEY / etc.)",
+      "  1. Pick an LLM provider key (ANTHROPIC_API_KEY / AZURE_OPENAI_API_KEY / OPENAI_API_KEY / etc.)",
       "  2. Run `npx @agentmemory/agentmemory doctor` to verify the daemon sees them",
       "  3. Run `npx @agentmemory/agentmemory` to start the worker",
     ].join("\n"),
diff --git a/src/cli/doctor-diagnostics.ts b/src/cli/doctor-diagnostics.ts
index c1dee4f4..d088c2cc 100644
--- a/src/cli/doctor-diagnostics.ts
+++ b/src/cli/doctor-diagnostics.ts
@@ -88,6 +88,7 @@ const PLACEHOLDER_VALUES = new Set([
 
 const PROVIDER_KEY_NAMES = [
   "ANTHROPIC_API_KEY",
+  "AZURE_OPENAI_API_KEY",
   "OPENAI_API_KEY",
   "GEMINI_API_KEY",
   "GOOGLE_API_KEY",
@@ -197,7 +198,7 @@ export function buildDiagnostics(effects: DoctorEffects): Diagnostic[] {
       message: "No LLM provider API key found in ~/.agentmemory/.env.",
       fixPreview: "Open ~/.agentmemory/.env in $EDITOR and paste your key, then re-check.",
       moreInfo:
-        "Set at least one of: ANTHROPIC_API_KEY, OPENAI_API_KEY, GEMINI_API_KEY, " +
+        "Set at least one of: ANTHROPIC_API_KEY, AZURE_OPENAI_API_KEY, OPENAI_API_KEY, GEMINI_API_KEY, " +
         "OPENROUTER_API_KEY, MINIMAX_API_KEY. The daemon picks the first that resolves " +
         "to a real (non-placeholder) value at startup.",
       check: async () => {
diff --git a/src/cli/onboarding.ts b/src/cli/onboarding.ts
index 48bbf4cc..fc6f8e4e 100644
--- a/src/cli/onboarding.ts
+++ b/src/cli/onboarding.ts
@@ -60,6 +60,7 @@ const MCP_AGENTS: { value: string; label: string; glyph: string }[] = [
 
 const PROVIDERS: { value: string; label: string; envKey: string | null }[] = [
   { value: "anthropic", label: "Anthropic — claude", envKey: "ANTHROPIC_API_KEY" },
+  { value: "azure-openai", label: "Azure OpenAI — deployment", envKey: "AZURE_OPENAI_API_KEY" },
   { value: "openai", label: "OpenAI — gpt", envKey: "OPENAI_API_KEY" },
   { value: "gemini", label: "Google — gemini", envKey: "GEMINI_API_KEY" },
   { value: "openrouter", label: "OpenRouter — multi-model", envKey: "OPENROUTER_API_KEY" },
@@ -230,6 +231,11 @@ export async function runOnboarding(): Promise<OnboardingResult> {
     const envKey = PROVIDERS.find((x) => x.value === provider)?.envKey;
     if (envKey) {
       lines.push(`  Uncomment ${envKey}= in that file to enable ${provider}.`);
+      if (provider === "azure-openai") {
+        lines.push(
+          "  Also set AZURE_OPENAI_ENDPOINT= and AZURE_OPENAI_DEPLOYMENT= for your deployment.",
+        );
+      }
     }
   } else {
     lines.push("  No provider chosen — agentmemory will run in BM25-only mode.");
diff --git a/src/config.ts b/src/config.ts
index eed5725e..02305eea 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -47,9 +47,46 @@ function hasRealValue(v: string | undefined): v is string {
   return typeof v === "string" && v.trim().length > 0;
 }
 
-function detectProvider(env: Record<string, string>): ProviderConfig {
+function normalizeAzureEndpoint(endpoint: string): string {
+  return endpoint.replace(/\/+$/, "");
+}
+
+function deploymentFromAzureBaseUrl(baseUrl: string | undefined): string | undefined {
+  if (!hasRealValue(baseUrl)) return undefined;
+  try {
+    const parsed = new URL(baseUrl);
+    const match = parsed.pathname.match(/\/openai\/deployments\/([^/]+)/);
+    return match ? decodeURIComponent(match[1]) : undefined;
+  } catch {
+    return undefined;
+  }
+}
+
+export function detectProviderForEnv(env: Record<string, string>): ProviderConfig {
   const maxTokens = parseInt(env["MAX_TOKENS"] || "4096", 10);
 
+  const azureOpenAiKey = env["AZURE_OPENAI_API_KEY"];
+  const azureOpenAiEndpoint = env["AZURE_OPENAI_ENDPOINT"];
+  const azureOpenAiBaseUrl = env["AZURE_OPENAI_BASE_URL"];
+  const azureOpenAiDeployment =
+    env["AZURE_OPENAI_DEPLOYMENT"] ||
+    env["AZURE_OPENAI_MODEL"] ||
+    deploymentFromAzureBaseUrl(azureOpenAiBaseUrl);
+  if (
+    hasRealValue(azureOpenAiKey) &&
+    (hasRealValue(azureOpenAiEndpoint) || hasRealValue(azureOpenAiBaseUrl)) &&
+    hasRealValue(azureOpenAiDeployment)
+  ) {
+    return {
+      provider: "openai",
+      model: azureOpenAiDeployment,
+      maxTokens,
+      baseURL: hasRealValue(azureOpenAiBaseUrl)
+        ? azureOpenAiBaseUrl
+        : normalizeAzureEndpoint(azureOpenAiEndpoint),
+    };
+  }
+
   // OpenAI-compatible: supports OpenAI, DeepSeek, SiliconFlow, Azure, vLLM, LM Studio
   if (hasRealValue(env["OPENAI_API_KEY"]) && env["OPENAI_API_KEY_FOR_LLM"] !== "false") {
     return {
@@ -102,7 +139,7 @@ function detectProvider(env: Record<string, string>): ProviderConfig {
   if (!allowAgentSdk) {
     process.stderr.write(
       "[agentmemory] No LLM provider key found " +
-        "(ANTHROPIC_API_KEY, GEMINI_API_KEY, OPENROUTER_API_KEY, MINIMAX_API_KEY, OPENAI_API_KEY). " +
+        "(ANTHROPIC_API_KEY, AZURE_OPENAI_API_KEY, GEMINI_API_KEY, OPENROUTER_API_KEY, MINIMAX_API_KEY, OPENAI_API_KEY). " +
         "LLM-backed compression and summarization are DISABLED — using no-op provider. " +
         "This is the safe default: the agent-sdk fallback used to spawn Claude Agent SDK " +
         "child sessions which inherit Claude Code's plugin hooks and cause infinite Stop-hook " +
@@ -134,7 +171,7 @@ function detectProvider(env: Record<string, string>): ProviderConfig {
 export function loadConfig(): AgentMemoryConfig {
   const env = getMergedEnv();
 
-  const provider = detectProvider(env);
+  const provider = detectProviderForEnv(env);
 
   return {
     engineUrl: env["III_ENGINE_URL"] || "ws://localhost:49134",
@@ -171,6 +208,12 @@ export function detectLlmProviderKind(): "llm" | "noop" {
     hasRealValue(env["GOOGLE_API_KEY"]) ||
     hasRealValue(env["OPENROUTER_API_KEY"]) ||
     hasRealValue(env["MINIMAX_API_KEY"]) ||
+    (hasRealValue(env["AZURE_OPENAI_API_KEY"]) &&
+      (hasRealValue(env["AZURE_OPENAI_ENDPOINT"]) ||
+        hasRealValue(env["AZURE_OPENAI_BASE_URL"])) &&
+      (hasRealValue(env["AZURE_OPENAI_DEPLOYMENT"]) ||
+        hasRealValue(env["AZURE_OPENAI_MODEL"]) ||
+        hasRealValue(deploymentFromAzureBaseUrl(env["AZURE_OPENAI_BASE_URL"])))) ||
     (hasRealValue(env["OPENAI_API_KEY"]) &&
       env["OPENAI_API_KEY_FOR_LLM"] !== "false")
   ) {
diff --git a/src/functions/summarize.ts b/src/functions/summarize.ts
index 80b29a09..1fdb6d7a 100644
--- a/src/functions/summarize.ts
+++ b/src/functions/summarize.ts
@@ -248,7 +248,7 @@ export function registerSummarizeFunction(
           success: false,
           error: "no_provider",
           reason:
-            "No LLM provider key set; Summarize is a no-op. Set ANTHROPIC_API_KEY (or GEMINI/OPENROUTER/MINIMAX) in ~/.agentmemory/.env to enable.",
+            "No LLM provider key set; Summarize is a no-op. Set ANTHROPIC_API_KEY (or AZURE_OPENAI/GEMINI/OPENROUTER/MINIMAX) in ~/.agentmemory/.env to enable.",
         };
       }
 
diff --git a/src/providers/index.ts b/src/providers/index.ts
index 5de6807c..9f810357 100644
--- a/src/providers/index.ts
+++ b/src/providers/index.ts
@@ -9,6 +9,7 @@ import { MinimaxProvider } from "./minimax.js";
 import { NoopProvider } from "./noop.js";
 import { OpenAIProvider } from "./openai.js";
 import { OpenRouterProvider } from "./openrouter.js";
+import { detectAzure } from "./_openai-shared.js";
 import { ResilientProvider } from "./resilient.js";
 import { FallbackChainProvider } from "./fallback-chain.js";
 import { getEnvVar } from "../config.js";
@@ -96,10 +97,15 @@ function createBaseProvider(config: ProviderConfig): MemoryProvider {
         "https://openrouter.ai/api/v1/chat/completions",
       );
     case "openai": {
-      const openaiKey = getEnvVar("OPENAI_API_KEY");
+      const azureKey = getEnvVar("AZURE_OPENAI_API_KEY");
+      const standardKey = getEnvVar("OPENAI_API_KEY");
+      const useAzureKey = config.baseURL ? detectAzure(config.baseURL) : false;
+      const openaiKey = useAzureKey
+        ? azureKey || standardKey
+        : standardKey || azureKey;
       if (!openaiKey) {
         throw new Error(
-          "OPENAI_API_KEY is required for the openai provider",
+          "OPENAI_API_KEY or AZURE_OPENAI_API_KEY is required for the openai provider",
         );
       }
       return new OpenAIProvider(
diff --git a/src/providers/openai.ts b/src/providers/openai.ts
index bca2370f..0f718ff8 100644
--- a/src/providers/openai.ts
+++ b/src/providers/openai.ts
@@ -25,12 +25,14 @@ const DEFAULT_TIMEOUT_MS = 60_000;
  *
  * Required env vars:
  *   OPENAI_API_KEY  — API key
+ *   AZURE_OPENAI_API_KEY — Azure OpenAI API key alias for chat completions
  *
  * Optional:
  *   OPENAI_BASE_URL          — base URL without path (default: https://api.openai.com).
  *                              Azure: https://<resource>.openai.azure.com/openai/deployments/<deployment>
  *   OPENAI_MODEL             — model name (default: gpt-4o-mini)
  *   OPENAI_API_VERSION       — Azure api-version query param (default: 2024-08-01-preview)
+ *   AZURE_OPENAI_API_VERSION — Azure-specific alias for OPENAI_API_VERSION
  *   OPENAI_TIMEOUT_MS        — outbound fetch timeout in ms (OpenAI-scoped alias,
  *                              takes precedence over AGENTMEMORY_LLM_TIMEOUT_MS
  *                              for back-compat with the v0.9.17 shipping name).
@@ -63,7 +65,9 @@ export class OpenAIProvider implements MemoryProvider {
     this.reasoningEffort = getEnvVar("OPENAI_REASONING_EFFORT") || undefined;
     this.timeoutMs = resolveTimeout();
     this.azureApiVersion =
-      getEnvVar("OPENAI_API_VERSION") || DEFAULT_AZURE_API_VERSION;
+      getEnvVar("AZURE_OPENAI_API_VERSION") ||
+      getEnvVar("OPENAI_API_VERSION") ||
+      DEFAULT_AZURE_API_VERSION;
     this.isAzure = detectAzure(this.baseUrl);
   }
 
diff --git a/test/config-azure-openai.test.ts b/test/config-azure-openai.test.ts
new file mode 100644
index 00000000..7b4d0706
--- /dev/null
+++ b/test/config-azure-openai.test.ts
@@ -0,0 +1,81 @@
+import { afterEach, describe, expect, it, vi } from "vitest";
+
+import { detectProviderForEnv } from "../src/config.js";
+import { createProvider } from "../src/providers/index.js";
+
+const ORIGINAL_OPENAI_KEY = process.env["OPENAI_API_KEY"];
+const ORIGINAL_AZURE_KEY = process.env["AZURE_OPENAI_API_KEY"];
+
+describe("Azure OpenAI config detection", () => {
+  afterEach(() => {
+    if (ORIGINAL_OPENAI_KEY === undefined) delete process.env["OPENAI_API_KEY"];
+    else process.env["OPENAI_API_KEY"] = ORIGINAL_OPENAI_KEY;
+    if (ORIGINAL_AZURE_KEY === undefined) delete process.env["AZURE_OPENAI_API_KEY"];
+    else process.env["AZURE_OPENAI_API_KEY"] = ORIGINAL_AZURE_KEY;
+    vi.restoreAllMocks();
+  });
+
+  it("uses Azure OpenAI endpoint and deployment as the OpenAI-compatible LLM provider", () => {
+    const provider = detectProviderForEnv({
+      AZURE_OPENAI_API_KEY: "azure-key",
+      AZURE_OPENAI_ENDPOINT: "https://agentmemory.openai.azure.com/",
+      AZURE_OPENAI_DEPLOYMENT: "gpt-5.4-mini",
+    });
+
+    expect(provider).toEqual({
+      provider: "openai",
+      model: "gpt-5.4-mini",
+      maxTokens: 4096,
+      baseURL: "https://agentmemory.openai.azure.com",
+    });
+  });
+
+  it("supports a legacy Azure deployment base URL", () => {
+    const provider = detectProviderForEnv({
+      AZURE_OPENAI_API_KEY: "azure-key",
+      AZURE_OPENAI_BASE_URL:
+        "https://agentmemory.openai.azure.com/openai/deployments/gpt-5.4-nano",
+    });
+
+    expect(provider.provider).toBe("openai");
+    expect(provider.model).toBe("gpt-5.4-nano");
+    expect(provider.baseURL).toBe(
+      "https://agentmemory.openai.azure.com/openai/deployments/gpt-5.4-nano",
+    );
+  });
+
+  it("does not enable Azure OpenAI without a deployment", () => {
+    const provider = detectProviderForEnv({
+      AZURE_OPENAI_API_KEY: "azure-key",
+      AZURE_OPENAI_ENDPOINT: "https://agentmemory.openai.azure.com",
+    });
+
+    expect(provider.provider).toBe("noop");
+  });
+
+  it("uses the Azure key for Azure requests even when OPENAI_API_KEY is also set", async () => {
+    process.env["OPENAI_API_KEY"] = "public-openai-key";
+    process.env["AZURE_OPENAI_API_KEY"] = "azure-openai-key";
+    const config = detectProviderForEnv({
+      OPENAI_API_KEY: "public-openai-key",
+      AZURE_OPENAI_API_KEY: "azure-openai-key",
+      AZURE_OPENAI_ENDPOINT: "https://agentmemory.openai.azure.com",
+      AZURE_OPENAI_DEPLOYMENT: "gpt-5.4-mini",
+    });
+    let capturedHeaders = new Headers();
+    vi.spyOn(globalThis, "fetch").mockImplementation(
+      async (_url: string | URL | Request, init?: RequestInit) => {
+        capturedHeaders = new Headers(init?.headers);
+        return new Response(
+          JSON.stringify({ choices: [{ message: { content: "summary" } }] }),
+          { status: 200 },
+        );
+      },
+    );
+
+    await createProvider(config).summarize("system", "user");
+
+    expect(capturedHeaders.get("api-key")).toBe("azure-openai-key");
+    expect(capturedHeaders.get("Authorization")).toBeNull();
+  });
+});
diff --git a/test/openai-shared.test.ts b/test/openai-shared.test.ts
index 7240e10c..93a4e18d 100644
--- a/test/openai-shared.test.ts
+++ b/test/openai-shared.test.ts
@@ -6,6 +6,7 @@ import {
   detectAzure,
   normalizeBaseUrl,
 } from "../src/providers/_openai-shared.js";
+import { OpenAIProvider } from "../src/providers/openai.js";
 import { OpenAIEmbeddingProvider } from "../src/providers/embedding/openai.js";
 
 describe("_openai-shared — detectAzure", () => {
@@ -259,3 +260,40 @@ describe("OpenAIEmbeddingProvider — Azure auto-detection (#371)", () => {
     expect(capturedHeaders.get("api-key")).toBeNull();
   });
 });
+
+describe("OpenAIProvider — Azure OpenAI aliases", () => {
+  const ORIGINAL_AZURE_VERSION = process.env["AZURE_OPENAI_API_VERSION"];
+
+  afterEach(() => {
+    if (ORIGINAL_AZURE_VERSION === undefined)
+      delete process.env["AZURE_OPENAI_API_VERSION"];
+    else process.env["AZURE_OPENAI_API_VERSION"] = ORIGINAL_AZURE_VERSION;
+    vi.restoreAllMocks();
+  });
+
+  it("honors AZURE_OPENAI_API_VERSION for legacy Azure chat completions", async () => {
+    process.env["AZURE_OPENAI_API_VERSION"] = "2025-04-01-preview";
+    let capturedUrl = "";
+    vi.spyOn(globalThis, "fetch").mockImplementation(
+      async (url: string | URL | Request) => {
+        capturedUrl = String(url);
+        return new Response(
+          JSON.stringify({ choices: [{ message: { content: "summary" } }] }),
+          { status: 200 },
+        );
+      },
+    );
+
+    const provider = new OpenAIProvider(
+      "azure-key",
+      "gpt-5.4-mini",
+      256,
+      "https://myres.openai.azure.com/openai/deployments/gpt-5.4-mini",
+    );
+    await provider.summarize("system", "user");
+
+    expect(capturedUrl).toBe(
+      "https://myres.openai.azure.com/openai/deployments/gpt-5.4-mini/chat/completions?api-version=2025-04-01-preview",
+    );
+  });
+});

From 18763de1a7571e50ee2ebd51ee2150c580677cb6 Mon Sep 17 00:00:00 2001
From: Ross Story <rostory@microsoft.com>
Date: Tue, 19 May 2026 21:49:21 -0700
Subject: [PATCH 2/3] Support GPT-5 token limits for OpenAI-compatible chat

Uses max_completion_tokens for GPT-5 deployments while preserving max_tokens for existing models. This is required for Azure OpenAI gpt-5.4-mini deployments.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/providers/openai.ts    |  6 +++++-
 test/openai-shared.test.ts | 24 ++++++++++++++++++++++++
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/src/providers/openai.ts b/src/providers/openai.ts
index 0f718ff8..bf2b5170 100644
--- a/src/providers/openai.ts
+++ b/src/providers/openai.ts
@@ -83,12 +83,13 @@ export class OpenAIProvider implements MemoryProvider {
     const url = buildChatUrl(this.baseUrl, this.isAzure, this.azureApiVersion);
     const body: Record<string, unknown> = {
       model: this.model,
-      max_tokens: this.maxTokens,
       messages: [
         { role: "system", content: systemPrompt },
         { role: "user", content: userPrompt },
       ],
     };
+    body[usesMaxCompletionTokens(this.model) ? "max_completion_tokens" : "max_tokens"] =
+      this.maxTokens;
     if (this.reasoningEffort) {
       body.reasoning_effort = this.reasoningEffort;
     }
@@ -173,3 +174,6 @@ function parsePositiveInt(raw: string | null | undefined): number | undefined {
   return Number.isFinite(n) && n > 0 ? n : undefined;
 }
 
+function usesMaxCompletionTokens(model: string): boolean {
+  return /^gpt-5(?:[.-]|$)/i.test(model);
+}
diff --git a/test/openai-shared.test.ts b/test/openai-shared.test.ts
index 93a4e18d..ec516ad2 100644
--- a/test/openai-shared.test.ts
+++ b/test/openai-shared.test.ts
@@ -296,4 +296,28 @@ describe("OpenAIProvider — Azure OpenAI aliases", () => {
       "https://myres.openai.azure.com/openai/deployments/gpt-5.4-mini/chat/completions?api-version=2025-04-01-preview",
     );
   });
+
+  it("uses max_completion_tokens for GPT-5 deployments", async () => {
+    let capturedBody: Record<string, unknown> = {};
+    vi.spyOn(globalThis, "fetch").mockImplementation(
+      async (_url: string | URL | Request, init?: RequestInit) => {
+        capturedBody = JSON.parse(String(init?.body));
+        return new Response(
+          JSON.stringify({ choices: [{ message: { content: "summary" } }] }),
+          { status: 200 },
+        );
+      },
+    );
+
+    const provider = new OpenAIProvider(
+      "azure-key",
+      "gpt-5.4-mini",
+      256,
+      "https://myres.openai.azure.com",
+    );
+    await provider.summarize("system", "user");
+
+    expect(capturedBody.max_completion_tokens).toBe(256);
+    expect(capturedBody.max_tokens).toBeUndefined();
+  });
 });

From a5dbfb6c6f76b9104579138587257ac704a0ecf4 Mon Sep 17 00:00:00 2001
From: Ross Story <rostory@microsoft.com>
Date: Wed, 20 May 2026 13:26:36 -0700
Subject: [PATCH 3/3] Address Azure OpenAI review comments

Spells out concrete provider env vars in runtime hints and selects AZURE_OPENAI_API_KEY when OPENAI_BASE_URL is Azure-shaped but provided through the environment.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/cli.ts                       |  4 +++-
 src/functions/summarize.ts       |  2 +-
 src/providers/index.ts           |  3 ++-
 test/config-azure-openai.test.ts | 28 ++++++++++++++++++++++++++++
 4 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/src/cli.ts b/src/cli.ts
index c77ae1ac..1486c132 100644
--- a/src/cli.ts
+++ b/src/cli.ts
@@ -1400,7 +1400,9 @@ async function passiveServerChecks(): Promise<DoctorCheck[]> {
     {
       name: "LLM provider",
       ok: hasLlm,
-      hint: hasLlm ? undefined : "set ANTHROPIC_API_KEY (or AZURE_OPENAI/GEMINI/OPENROUTER/MINIMAX) in ~/.agentmemory/.env",
+      hint: hasLlm
+        ? undefined
+        : "set ANTHROPIC_API_KEY, AZURE_OPENAI_API_KEY + AZURE_OPENAI_ENDPOINT + AZURE_OPENAI_DEPLOYMENT, OPENAI_API_KEY, GEMINI_API_KEY, OPENROUTER_API_KEY, or MINIMAX_API_KEY in ~/.agentmemory/.env",
     },
     {
       name: "Embedding provider",
diff --git a/src/functions/summarize.ts b/src/functions/summarize.ts
index 1fdb6d7a..74b7f0ba 100644
--- a/src/functions/summarize.ts
+++ b/src/functions/summarize.ts
@@ -248,7 +248,7 @@ export function registerSummarizeFunction(
           success: false,
           error: "no_provider",
           reason:
-            "No LLM provider key set; Summarize is a no-op. Set ANTHROPIC_API_KEY (or AZURE_OPENAI/GEMINI/OPENROUTER/MINIMAX) in ~/.agentmemory/.env to enable.",
+            "No LLM provider key set; Summarize is a no-op. Set ANTHROPIC_API_KEY, AZURE_OPENAI_API_KEY + AZURE_OPENAI_ENDPOINT + AZURE_OPENAI_DEPLOYMENT, OPENAI_API_KEY, GEMINI_API_KEY, OPENROUTER_API_KEY, or MINIMAX_API_KEY in ~/.agentmemory/.env to enable.",
         };
       }
 
diff --git a/src/providers/index.ts b/src/providers/index.ts
index 9f810357..51611063 100644
--- a/src/providers/index.ts
+++ b/src/providers/index.ts
@@ -99,7 +99,8 @@ function createBaseProvider(config: ProviderConfig): MemoryProvider {
     case "openai": {
       const azureKey = getEnvVar("AZURE_OPENAI_API_KEY");
       const standardKey = getEnvVar("OPENAI_API_KEY");
-      const useAzureKey = config.baseURL ? detectAzure(config.baseURL) : false;
+      const effectiveBaseURL = config.baseURL || getEnvVar("OPENAI_BASE_URL");
+      const useAzureKey = effectiveBaseURL ? detectAzure(effectiveBaseURL) : false;
       const openaiKey = useAzureKey
         ? azureKey || standardKey
         : standardKey || azureKey;
diff --git a/test/config-azure-openai.test.ts b/test/config-azure-openai.test.ts
index 7b4d0706..5496b5e3 100644
--- a/test/config-azure-openai.test.ts
+++ b/test/config-azure-openai.test.ts
@@ -5,6 +5,7 @@ import { createProvider } from "../src/providers/index.js";
 
 const ORIGINAL_OPENAI_KEY = process.env["OPENAI_API_KEY"];
 const ORIGINAL_AZURE_KEY = process.env["AZURE_OPENAI_API_KEY"];
+const ORIGINAL_OPENAI_BASE = process.env["OPENAI_BASE_URL"];
 
 describe("Azure OpenAI config detection", () => {
   afterEach(() => {
@@ -12,6 +13,8 @@ describe("Azure OpenAI config detection", () => {
     else process.env["OPENAI_API_KEY"] = ORIGINAL_OPENAI_KEY;
     if (ORIGINAL_AZURE_KEY === undefined) delete process.env["AZURE_OPENAI_API_KEY"];
     else process.env["AZURE_OPENAI_API_KEY"] = ORIGINAL_AZURE_KEY;
+    if (ORIGINAL_OPENAI_BASE === undefined) delete process.env["OPENAI_BASE_URL"];
+    else process.env["OPENAI_BASE_URL"] = ORIGINAL_OPENAI_BASE;
     vi.restoreAllMocks();
   });
 
@@ -78,4 +81,29 @@ describe("Azure OpenAI config detection", () => {
     expect(capturedHeaders.get("api-key")).toBe("azure-openai-key");
     expect(capturedHeaders.get("Authorization")).toBeNull();
   });
+
+  it("uses the Azure key when OPENAI_BASE_URL points to Azure", async () => {
+    process.env["OPENAI_API_KEY"] = "public-openai-key";
+    process.env["AZURE_OPENAI_API_KEY"] = "azure-openai-key";
+    process.env["OPENAI_BASE_URL"] = "https://agentmemory.openai.azure.com";
+    let capturedHeaders = new Headers();
+    vi.spyOn(globalThis, "fetch").mockImplementation(
+      async (_url: string | URL | Request, init?: RequestInit) => {
+        capturedHeaders = new Headers(init?.headers);
+        return new Response(
+          JSON.stringify({ choices: [{ message: { content: "summary" } }] }),
+          { status: 200 },
+        );
+      },
+    );
+
+    await createProvider({
+      provider: "openai",
+      model: "gpt-5.4-mini",
+      maxTokens: 256,
+    }).summarize("system", "user");
+
+    expect(capturedHeaders.get("api-key")).toBe("azure-openai-key");
+    expect(capturedHeaders.get("Authorization")).toBeNull();
+  });
 });