diff --git a/knip.json b/knip.json
index 13a43187..878dd3f5 100644
--- a/knip.json
+++ b/knip.json
@@ -20,6 +20,7 @@
     "**/*.css",
     "packages/appkit/src/plugins/vector-search/**",
     "packages/appkit/src/plugin/index.ts",
+    "packages/appkit/src/plugin/to-plugin.ts",
     "packages/appkit/src/plugins/agents/index.ts",
     "packages/appkit/src/plugins/agents/tools/index.ts",
     "packages/appkit/src/plugins/agents/from-plugin.ts",
diff --git a/packages/appkit/package.json b/packages/appkit/package.json
index 27a14e66..49d6c516 100644
--- a/packages/appkit/package.json
+++ b/packages/appkit/package.json
@@ -83,6 +83,7 @@
     "@types/semver": "7.7.1",
     "dotenv": "16.6.1",
     "express": "4.22.0",
+    "js-yaml": "^4.1.1",
     "obug": "2.1.1",
     "pg": "8.18.0",
     "picocolors": "1.1.1",
@@ -108,6 +109,7 @@
     "@ai-sdk/openai": "4.0.0-beta.27",
     "@langchain/core": "^1.1.39",
     "@types/express": "4.17.25",
+    "@types/js-yaml": "^4.0.9",
     "@types/json-schema": "7.0.15",
     "@types/pg": "8.16.0",
     "@types/ws": "8.18.1",
diff --git a/packages/appkit/src/core/create-agent-def.ts b/packages/appkit/src/core/create-agent-def.ts
new file mode 100644
index 00000000..3e93371d
--- /dev/null
+++ b/packages/appkit/src/core/create-agent-def.ts
@@ -0,0 +1,53 @@
+import { ConfigurationError } from "../errors";
+import type { AgentDefinition } from "../plugins/agents/types";
+
+/**
+ * Pure factory for agent definitions. Returns the passed-in definition after
+ * cycle-detecting the sub-agent graph. Accepts the full `AgentDefinition` shape
+ * and is safe to call at module top-level.
+ *
+ * The returned value is a plain `AgentDefinition` — no adapter construction,
+ * no side effects. Register it with `agents({ agents: { name: def } })` or run
+ * it standalone via `runAgent(def, input)`.
+ *
+ * @example
+ * ```ts
+ * const support = createAgent({
+ *   instructions: "You help customers.",
+ *   model: "databricks-claude-sonnet-4-5",
+ *   tools: {
+ *     get_weather: tool({ ... }),
+ *   },
+ * });
+ * ```
+ */
+export function createAgent(def: AgentDefinition): AgentDefinition {
+  detectCycles(def);
+  return def;
+}
+
+/**
+ * Walks the `agents: { ... }` sub-agent tree via DFS and throws if a cycle is
+ * found. Cycles would cause infinite recursion at tool-invocation time.
+ */
+function detectCycles(def: AgentDefinition): void {
+  const visiting = new Set<AgentDefinition>();
+  const visited = new Set<AgentDefinition>();
+
+  const walk = (current: AgentDefinition, path: string[]): void => {
+    if (visited.has(current)) return;
+    if (visiting.has(current)) {
+      throw new ConfigurationError(
+        `Agent sub-agent cycle detected: ${path.join(" -> ")}`,
+      );
+    }
+    visiting.add(current);
+    for (const [childKey, child] of Object.entries(current.agents ?? {})) {
+      walk(child, [...path, childKey]);
+    }
+    visiting.delete(current);
+    visited.add(current);
+  };
+
+  walk(def, [def.name ?? "(root)"]);
+}
diff --git a/packages/appkit/src/core/run-agent.ts b/packages/appkit/src/core/run-agent.ts
new file mode 100644
index 00000000..e83c2c9c
--- /dev/null
+++ b/packages/appkit/src/core/run-agent.ts
@@ -0,0 +1,226 @@
+import { randomUUID } from "node:crypto";
+import type {
+  AgentAdapter,
+  AgentEvent,
+  AgentToolDefinition,
+  Message,
+} from "shared";
+import {
+  type FunctionTool,
+  functionToolToDefinition,
+  isFunctionTool,
+} from "../plugins/agents/tools/function-tool";
+import { isHostedTool } from "../plugins/agents/tools/hosted-tools";
+import type {
+  AgentDefinition,
+  AgentTool,
+  ToolkitEntry,
+} from "../plugins/agents/types";
+import { isToolkitEntry } from "../plugins/agents/types";
+
+export interface RunAgentInput {
+  /** Seed messages for the run. Either a single user string or a full message list. */
+  messages: string | Message[];
+  /** Abort signal for cancellation. */
+  signal?: AbortSignal;
+}
+
+export interface RunAgentResult {
+  /** Aggregated text output from all `message_delta` events. */
+  text: string;
+  /** Every event the adapter yielded, in order. Useful for inspection/tests. */
+  events: AgentEvent[];
+}
+
+/**
+ * Standalone agent execution without `createApp`. Resolves the adapter, binds
+ * inline tools, and drives the adapter's `run()` loop to completion.
+ *
+ * Limitations vs. running through the agents() plugin:
+ * - No OBO: there is no HTTP request, so plugin tools run as the service
+ *   principal (when they work at all).
+ * - Plugin tools (`ToolkitEntry`) are not supported — they require a live
+ *   `PluginContext` that only exists when registered in a `createApp`
+ *   instance. This function throws a clear error if encountered.
+ * - Sub-agents (`agents: { ... }` on the def) are executed as nested
+ *   `runAgent` calls with no shared thread state.
+ */
+export async function runAgent(
+  def: AgentDefinition,
+  input: RunAgentInput,
+): Promise<RunAgentResult> {
+  const adapter = await resolveAdapter(def);
+  const messages = normalizeMessages(input.messages, def.instructions);
+  const toolIndex = buildStandaloneToolIndex(def);
+  const tools = Array.from(toolIndex.values()).map((e) => e.def);
+
+  const signal = input.signal;
+
+  const executeTool = async (name: string, args: unknown): Promise<unknown> => {
+    const entry = toolIndex.get(name);
+    if (!entry) throw new Error(`Unknown tool: ${name}`);
+    if (entry.kind === "function") {
+      return entry.tool.execute(args as Record<string, unknown>);
+    }
+    if (entry.kind === "subagent") {
+      const subInput: RunAgentInput = {
+        messages:
+          typeof args === "object" &&
+          args !== null &&
+          typeof (args as { input?: unknown }).input === "string"
+            ? (args as { input: string }).input
+            : JSON.stringify(args),
+        signal,
+      };
+      const res = await runAgent(entry.agentDef, subInput);
+      return res.text;
+    }
+    throw new Error(
+      `runAgent: tool "${name}" is a ${entry.kind} tool. ` +
+        "Plugin toolkits and MCP tools are only usable via createApp({ plugins: [..., agents(...)] }).",
+    );
+  };
+
+  const events: AgentEvent[] = [];
+  let text = "";
+
+  const stream = adapter.run(
+    {
+      messages,
+      tools,
+      threadId: randomUUID(),
+      signal,
+    },
+    { executeTool, signal },
+  );
+
+  for await (const event of stream) {
+    if (signal?.aborted) break;
+    events.push(event);
+    if (event.type === "message_delta") {
+      text += event.content;
+    } else if (event.type === "message") {
+      text = event.content;
+    }
+  }
+
+  return { text, events };
+}
+
+async function resolveAdapter(def: AgentDefinition): Promise<AgentAdapter> {
+  const { model } = def;
+  if (!model) {
+    const { DatabricksAdapter } = await import("../agents/databricks");
+    return DatabricksAdapter.fromModelServing();
+  }
+  if (typeof model === "string") {
+    const { DatabricksAdapter } = await import("../agents/databricks");
+    return DatabricksAdapter.fromModelServing(model);
+  }
+  return await model;
+}
+
+function normalizeMessages(
+  input: string | Message[],
+  instructions: string,
+): Message[] {
+  const systemMessage: Message = {
+    id: "system",
+    role: "system",
+    content: instructions,
+    createdAt: new Date(),
+  };
+  if (typeof input === "string") {
+    return [
+      systemMessage,
+      {
+        id: randomUUID(),
+        role: "user",
+        content: input,
+        createdAt: new Date(),
+      },
+    ];
+  }
+  return [systemMessage, ...input];
+}
+
+type StandaloneEntry =
+  | {
+      kind: "function";
+      def: AgentToolDefinition;
+      tool: FunctionTool;
+    }
+  | {
+      kind: "subagent";
+      def: AgentToolDefinition;
+      agentDef: AgentDefinition;
+    }
+  | {
+      kind: "toolkit";
+      def: AgentToolDefinition;
+      entry: ToolkitEntry;
+    }
+  | {
+      kind: "hosted";
+      def: AgentToolDefinition;
+    };
+
+function buildStandaloneToolIndex(
+  def: AgentDefinition,
+): Map<string, StandaloneEntry> {
+  const index = new Map<string, StandaloneEntry>();
+
+  for (const [key, tool] of Object.entries(def.tools ?? {})) {
+    index.set(key, classifyTool(key, tool));
+  }
+
+  for (const [childKey, child] of Object.entries(def.agents ?? {})) {
+    const toolName = `agent-${childKey}`;
+    index.set(toolName, {
+      kind: "subagent",
+      agentDef: { ...child, name: child.name ?? childKey },
+      def: {
+        name: toolName,
+        description:
+          child.instructions.slice(0, 120) ||
+          `Delegate to the ${childKey} sub-agent`,
+        parameters: {
+          type: "object",
+          properties: {
+            input: {
+              type: "string",
+              description: "Message to send to the sub-agent.",
+            },
+          },
+          required: ["input"],
+        },
+      },
+    });
+  }
+
+  return index;
+}
+
+function classifyTool(key: string, tool: AgentTool): StandaloneEntry {
+  if (isToolkitEntry(tool)) {
+    return { kind: "toolkit", def: { ...tool.def, name: key }, entry: tool };
+  }
+  if (isFunctionTool(tool)) {
+    return {
+      kind: "function",
+      tool,
+      def: { ...functionToolToDefinition(tool), name: key },
+    };
+  }
+  if (isHostedTool(tool)) {
+    return {
+      kind: "hosted",
+      def: {
+        name: key,
+        description: `Hosted tool: ${tool.type}`,
+        parameters: { type: "object", properties: {} },
+      },
+    };
+  }
+  throw new Error(`runAgent: unrecognized tool shape at key "${key}"`);
+}
diff --git a/packages/appkit/src/index.ts b/packages/appkit/src/index.ts
index 6c6c6f5b..ba4110b3 100644
--- a/packages/appkit/src/index.ts
+++ b/packages/appkit/src/index.ts
@@ -43,6 +43,12 @@ export {
 } from "./connectors/lakebase";
 export { getExecutionContext } from "./context";
 export { createApp } from "./core";
+export { createAgent } from "./core/create-agent-def";
+export {
+  type RunAgentInput,
+  type RunAgentResult,
+  runAgent,
+} from "./core/run-agent";
 // Errors
 export {
   AppKitError,
@@ -63,6 +69,20 @@ export {
   toPlugin,
 } from "./plugin";
 export { analytics, files, genie, lakebase, server, serving } from "./plugins";
+export {
+  type AgentDefinition,
+  type AgentsPluginConfig,
+  type AgentTool,
+  agentIdFromMarkdownPath,
+  agents,
+  type BaseSystemPromptOption,
+  isToolkitEntry,
+  loadAgentFromFile,
+  loadAgentsFromDir,
+  type PromptContext,
+  type ToolkitEntry,
+  type ToolkitOptions,
+} from "./plugins/agents";
 export {
   type FunctionTool,
   type HostedTool,
@@ -72,12 +92,6 @@ export {
   type ToolConfig,
   tool,
 } from "./plugins/agents/tools";
-export {
-  type AgentTool,
-  isToolkitEntry,
-  type ToolkitEntry,
-  type ToolkitOptions,
-} from "./plugins/agents/types";
 // Files plugin types (for custom policy authoring)
 export type {
   FileAction,
diff --git a/packages/appkit/src/plugins/agents/agents.ts b/packages/appkit/src/plugins/agents/agents.ts
new file mode 100644
index 00000000..f1938ef2
--- /dev/null
+++ b/packages/appkit/src/plugins/agents/agents.ts
@@ -0,0 +1,1269 @@
+import { randomUUID } from "node:crypto";
+import path from "node:path";
+import type express from "express";
+import pc from "picocolors";
+import type {
+  AgentAdapter,
+  AgentEvent,
+  AgentRunContext,
+  AgentToolDefinition,
+  IAppRouter,
+  Message,
+  PluginPhase,
+  ResponseStreamEvent,
+  Thread,
+  ToolProvider,
+} from "shared";
+import { createLogger } from "../../logging/logger";
+import { Plugin, toPlugin } from "../../plugin";
+import type { PluginManifest } from "../../registry";
+import { agentStreamDefaults } from "./defaults";
+import { EventChannel } from "./event-channel";
+import { AgentEventTranslator } from "./event-translator";
+import { loadAgentsFromDir } from "./load-agents";
+import manifest from "./manifest.json";
+import {
+  approvalRequestSchema,
+  chatRequestSchema,
+  invocationsRequestSchema,
+} from "./schemas";
+import { buildBaseSystemPrompt, composeSystemPrompt } from "./system-prompt";
+import { InMemoryThreadStore } from "./thread-store";
+import { ToolApprovalGate } from "./tool-approval-gate";
+import {
+  AppKitMcpClient,
+  functionToolToDefinition,
+  isFunctionTool,
+  isHostedTool,
+  resolveHostedTools,
+} from "./tools";
+import { buildMcpHostPolicy } from "./tools/mcp-host-policy";
+import type {
+  AgentDefinition,
+  AgentsPluginConfig,
+  BaseSystemPromptOption,
+  PromptContext,
+  RegisteredAgent,
+  ResolvedToolEntry,
+} from "./types";
+import { isToolkitEntry } from "./types";
+
+const logger = createLogger("agents");
+
+const DEFAULT_AGENTS_DIR = "./config/agents";
+
+/**
+ * Context flag recorded on the in-memory AgentDefinition to indicate whether
+ * it came from markdown (file) or from user code. Drives the asymmetric
+ * `autoInheritTools` default.
+ */
+interface AgentSource {
+  origin: "file" | "code";
+}
+
+export class AgentsPlugin extends Plugin implements ToolProvider {
+  static manifest = manifest as PluginManifest;
+  static phase: PluginPhase = "deferred";
+
+  protected declare config: AgentsPluginConfig;
+
+  private agents = new Map<string, RegisteredAgent>();
+  private defaultAgentName: string | null = null;
+  private activeStreams = new Map<
+    string,
+    { controller: AbortController; userId: string }
+  >();
+  private mcpClient: AppKitMcpClient | null = null;
+  private threadStore;
+  private approvalGate = new ToolApprovalGate();
+
+  constructor(config: AgentsPluginConfig) {
+    super(config);
+    this.config = config;
+    if (config.threadStore) {
+      this.threadStore = config.threadStore;
+    } else {
+      this.threadStore = new InMemoryThreadStore();
+      if (process.env.NODE_ENV === "production") {
+        logger.warn(
+          "InMemoryThreadStore is in use in a production build (NODE_ENV=production). " +
+            "Thread history is unbounded and lost on restart. " +
+            "Pass agents({ threadStore: <persistent impl> }) for real deployments.",
+        );
+      } else {
+        logger.info(
+          "Using default InMemoryThreadStore (dev-only — threads are lost on restart and grow without bound).",
+        );
+      }
+    }
+  }
+
+  /** Effective approval policy with defaults applied. */
+  private get resolvedApprovalPolicy(): {
+    requireForDestructive: boolean;
+    timeoutMs: number;
+  } {
+    const cfg = this.config.approval ?? {};
+    return {
+      requireForDestructive: cfg.requireForDestructive ?? true,
+      timeoutMs: cfg.timeoutMs ?? 60_000,
+    };
+  }
+
+  /** Effective DoS limits with defaults applied. */
+  private get resolvedLimits(): {
+    maxConcurrentStreamsPerUser: number;
+    maxToolCalls: number;
+    maxSubAgentDepth: number;
+  } {
+    const cfg = this.config.limits ?? {};
+    return {
+      maxConcurrentStreamsPerUser: cfg.maxConcurrentStreamsPerUser ?? 5,
+      maxToolCalls: cfg.maxToolCalls ?? 50,
+      maxSubAgentDepth: cfg.maxSubAgentDepth ?? 3,
+    };
+  }
+
+  /** Count active streams owned by a given user. */
+  private countUserStreams(userId: string): number {
+    let n = 0;
+    for (const entry of this.activeStreams.values()) {
+      if (entry.userId === userId) n++;
+    }
+    return n;
+  }
+
+  async setup() {
+    await this.loadAgents();
+    this.mountInvocationsRoute();
+    this.printRegistry();
+  }
+
+  /**
+   * Reload agents from the configured directory, preserving code-defined
+   * agents. Swaps the registry atomically at the end.
+   */
+  async reload(): Promise<void> {
+    this.agents.clear();
+    this.defaultAgentName = null;
+    if (this.mcpClient) {
+      await this.mcpClient.close();
+      this.mcpClient = null;
+    }
+    await this.loadAgents();
+  }
+
+  private async loadAgents() {
+    const { defs: fileDefs, defaultAgent: fileDefault } =
+      await this.loadFileDefinitions();
+
+    const codeDefs = this.config.agents ?? {};
+
+    for (const name of Object.keys(fileDefs)) {
+      if (codeDefs[name]) {
+        logger.warn(
+          "Agent '%s' defined in both code and a markdown file. Code definition takes precedence.",
+          name,
+        );
+      }
+    }
+
+    const merged: Record<string, { def: AgentDefinition; src: AgentSource }> =
+      {};
+    for (const [name, def] of Object.entries(fileDefs)) {
+      merged[name] = { def, src: { origin: "file" } };
+    }
+    for (const [name, def] of Object.entries(codeDefs)) {
+      merged[name] = { def, src: { origin: "code" } };
+    }
+
+    if (Object.keys(merged).length === 0) {
+      logger.info(
+        "No agents registered (no files in %s, no code-defined agents)",
+        this.resolvedAgentsDir() ?? "<disabled>",
+      );
+      return;
+    }
+
+    for (const [name, { def, src }] of Object.entries(merged)) {
+      try {
+        const registered = await this.buildRegisteredAgent(name, def, src);
+        this.agents.set(name, registered);
+        if (!this.defaultAgentName) this.defaultAgentName = name;
+      } catch (err) {
+        throw new Error(
+          `Failed to register agent '${name}' (${src.origin}): ${
+            err instanceof Error ? err.message : String(err)
+          }`,
+          { cause: err instanceof Error ? err : undefined },
+        );
+      }
+    }
+
+    if (this.config.defaultAgent) {
+      if (!this.agents.has(this.config.defaultAgent)) {
+        throw new Error(
+          `defaultAgent '${this.config.defaultAgent}' is not registered. Available: ${Array.from(this.agents.keys()).join(", ")}`,
+        );
+      }
+      this.defaultAgentName = this.config.defaultAgent;
+    } else if (fileDefault && this.agents.has(fileDefault)) {
+      this.defaultAgentName = fileDefault;
+    }
+  }
+
+  private resolvedAgentsDir(): string | null {
+    if (this.config.dir === false) return null;
+    const dir = this.config.dir ?? DEFAULT_AGENTS_DIR;
+    return path.isAbsolute(dir) ? dir : path.resolve(process.cwd(), dir);
+  }
+
+  private async loadFileDefinitions(): Promise<{
+    defs: Record<string, AgentDefinition>;
+    defaultAgent: string | null;
+  }> {
+    const dir = this.resolvedAgentsDir();
+    if (!dir) return { defs: {}, defaultAgent: null };
+
+    const pluginToolProviders = this.pluginProviderIndex();
+    const ambient = this.config.tools ?? {};
+
+    const result = await loadAgentsFromDir(dir, {
+      defaultModel: this.config.defaultModel,
+      availableTools: ambient,
+      plugins: pluginToolProviders,
+      codeAgents: this.config.agents,
+    });
+
+    return result;
+  }
+
+  /**
+   * Builds the map of plugin-name → toolkit that the markdown loader consults
+   * when resolving `toolkits:` frontmatter entries.
+   */
+  private pluginProviderIndex(): Map<
+    string,
+    { toolkit: (opts?: unknown) => Record<string, unknown> }
+  > {
+    const out = new Map();
+    if (!this.context) return out;
+    for (const { name, provider } of this.context.getToolProviders()) {
+      const withToolkit = provider as ToolProvider & {
+        toolkit?: (opts?: unknown) => Record<string, unknown>;
+      };
+      if (typeof withToolkit.toolkit === "function") {
+        out.set(name, {
+          toolkit: withToolkit.toolkit.bind(withToolkit),
+        });
+      }
+    }
+    return out;
+  }
+
+  private async buildRegisteredAgent(
+    name: string,
+    def: AgentDefinition,
+    src: AgentSource,
+  ): Promise<RegisteredAgent> {
+    const adapter = await this.resolveAdapter(def, name);
+    const toolIndex = await this.buildToolIndex(name, def, src);
+
+    return {
+      name,
+      instructions: def.instructions,
+      adapter,
+      toolIndex,
+      baseSystemPrompt: def.baseSystemPrompt,
+      maxSteps: def.maxSteps,
+      maxTokens: def.maxTokens,
+      ephemeral: def.ephemeral,
+    };
+  }
+
+  private async resolveAdapter(
+    def: AgentDefinition,
+    name: string,
+  ): Promise<AgentAdapter> {
+    const source = def.model ?? this.config.defaultModel;
+    // Per-agent adapter knobs from `AgentDefinition` / markdown frontmatter.
+    // Only applied when AppKit builds the adapter itself (string or omitted
+    // model). Users who pass a pre-built `AgentAdapter` own these settings.
+    const adapterOptions: { maxSteps?: number; maxTokens?: number } = {};
+    if (def.maxSteps !== undefined) adapterOptions.maxSteps = def.maxSteps;
+    if (def.maxTokens !== undefined) adapterOptions.maxTokens = def.maxTokens;
+
+    if (!source) {
+      const { DatabricksAdapter } = await import("../../agents/databricks");
+      try {
+        return await DatabricksAdapter.fromModelServing(
+          undefined,
+          adapterOptions,
+        );
+      } catch (err) {
+        throw new Error(
+          `Agent '${name}' has no model configured and no DATABRICKS_AGENT_ENDPOINT default available`,
+          { cause: err instanceof Error ? err : undefined },
+        );
+      }
+    }
+    if (typeof source === "string") {
+      const { DatabricksAdapter } = await import("../../agents/databricks");
+      return DatabricksAdapter.fromModelServing(source, adapterOptions);
+    }
+    return await source;
+  }
+
+  /**
+   * Resolves an agent's tool record into a per-agent dispatch index. Connects
+   * hosted tools via MCP client. Applies `autoInheritTools` defaults when the
+   * definition has no declared tools/agents.
+   */
+  private async buildToolIndex(
+    agentName: string,
+    def: AgentDefinition,
+    src: AgentSource,
+  ): Promise<Map<string, ResolvedToolEntry>> {
+    const index = new Map<string, ResolvedToolEntry>();
+    const hasExplicitTools = def.tools && Object.keys(def.tools).length > 0;
+    const hasExplicitSubAgents =
+      def.agents && Object.keys(def.agents).length > 0;
+
+    const inheritDefaults = normalizeAutoInherit(this.config.autoInheritTools);
+    const shouldInherit =
+      !hasExplicitTools &&
+      !hasExplicitSubAgents &&
+      (src.origin === "file" ? inheritDefaults.file : inheritDefaults.code);
+
+    if (shouldInherit) {
+      await this.applyAutoInherit(agentName, index);
+    }
+
+    // 1. Sub-agents → agent-<key>
+    for (const [childKey, childDef] of Object.entries(def.agents ?? {})) {
+      const toolName = `agent-${childKey}`;
+      index.set(toolName, {
+        source: "subagent",
+        agentName: childDef.name ?? childKey,
+        def: {
+          name: toolName,
+          description:
+            childDef.instructions.slice(0, 120) ||
+            `Delegate to the ${childKey} sub-agent`,
+          parameters: {
+            type: "object",
+            properties: {
+              input: {
+                type: "string",
+                description: "Message to send to the sub-agent.",
+              },
+            },
+            required: ["input"],
+          },
+        },
+      });
+    }
+
+    // 2. Explicit tools (toolkit entries, function tools, hosted tools)
+    const hostedToCollect: import("./tools/hosted-tools").HostedTool[] = [];
+    for (const [key, tool] of Object.entries(def.tools ?? {})) {
+      if (isToolkitEntry(tool)) {
+        index.set(key, {
+          source: "toolkit",
+          pluginName: tool.pluginName,
+          localName: tool.localName,
+          def: { ...tool.def, name: key },
+        });
+        continue;
+      }
+      if (isFunctionTool(tool)) {
+        index.set(key, {
+          source: "function",
+          functionTool: tool,
+          def: { ...functionToolToDefinition(tool), name: key },
+        });
+        continue;
+      }
+      if (isHostedTool(tool)) {
+        hostedToCollect.push(tool);
+        continue;
+      }
+      throw new Error(
+        `Agent '${agentName}' tool '${key}' has an unrecognized shape`,
+      );
+    }
+
+    if (hostedToCollect.length > 0) {
+      await this.connectHostedTools(hostedToCollect, index);
+    }
+
+    return index;
+  }
+
+  private async applyAutoInherit(
+    agentName: string,
+    index: Map<string, ResolvedToolEntry>,
+  ): Promise<void> {
+    if (!this.context) return;
+    const inherited: string[] = [];
+    const skippedByPlugin = new Map<string, string[]>();
+    const recordSkip = (pluginName: string, localName: string) => {
+      const list = skippedByPlugin.get(pluginName) ?? [];
+      list.push(localName);
+      skippedByPlugin.set(pluginName, list);
+    };
+
+    for (const {
+      name: pluginName,
+      provider,
+    } of this.context.getToolProviders()) {
+      if (pluginName === this.name) continue;
+      const withToolkit = provider as ToolProvider & {
+        toolkit?: (opts?: unknown) => Record<string, unknown>;
+      };
+      if (typeof withToolkit.toolkit === "function") {
+        const entries = withToolkit.toolkit() as Record<string, unknown>;
+        for (const [key, maybeEntry] of Object.entries(entries)) {
+          if (!isToolkitEntry(maybeEntry)) continue;
+          if (maybeEntry.autoInheritable !== true) {
+            recordSkip(maybeEntry.pluginName, maybeEntry.localName);
+            continue;
+          }
+          index.set(key, {
+            source: "toolkit",
+            pluginName: maybeEntry.pluginName,
+            localName: maybeEntry.localName,
+            def: { ...maybeEntry.def, name: key },
+          });
+          inherited.push(key);
+        }
+        continue;
+      }
+      // Fallback: providers without a toolkit() still expose getAgentTools().
+      // These cannot be selectively opted in per tool, so we conservatively
+      // skip them during auto-inherit and require explicit `tools:` wiring.
+      for (const tool of provider.getAgentTools()) {
+        recordSkip(pluginName, tool.name);
+      }
+    }
+
+    if (inherited.length > 0) {
+      logger.info(
+        "[agent %s] auto-inherited %d tool(s): %s",
+        agentName,
+        inherited.length,
+        inherited.join(", "),
+      );
+    }
+    if (skippedByPlugin.size > 0) {
+      const summary = Array.from(skippedByPlugin.entries())
+        .map(([p, tools]) => `${p}(${tools.length})`)
+        .join(", ");
+      logger.info(
+        "[agent %s] auto-inherit skipped %d tool(s) not marked autoInheritable: %s. Wire them explicitly via `tools:` if needed.",
+        agentName,
+        Array.from(skippedByPlugin.values()).reduce(
+          (n, list) => n + list.length,
+          0,
+        ),
+        summary,
+      );
+    }
+  }
+
+  private async connectHostedTools(
+    hostedTools: import("./tools/hosted-tools").HostedTool[],
+    index: Map<string, ResolvedToolEntry>,
+  ): Promise<void> {
+    let host: string | undefined;
+    let authenticate: () => Promise<Record<string, string>>;
+
+    try {
+      const { getWorkspaceClient } = await import("../../context");
+      const wsClient = getWorkspaceClient();
+      await wsClient.config.ensureResolved();
+      host = wsClient.config.host;
+      authenticate = async () => {
+        const headers = new Headers();
+        await wsClient.config.authenticate(headers);
+        return Object.fromEntries(headers.entries());
+      };
+    } catch {
+      host = process.env.DATABRICKS_HOST;
+      authenticate = async (): Promise<Record<string, string>> => {
+        const token = process.env.DATABRICKS_TOKEN;
+        return token ? { Authorization: `Bearer ${token}` } : {};
+      };
+    }
+
+    if (!host) {
+      logger.warn(
+        "No Databricks host available — skipping %d hosted tool(s)",
+        hostedTools.length,
+      );
+      return;
+    }
+
+    if (!this.mcpClient) {
+      const policy = buildMcpHostPolicy(this.config.mcp, host);
+      this.mcpClient = new AppKitMcpClient(host, authenticate, policy);
+    }
+
+    const endpoints = resolveHostedTools(hostedTools);
+    await this.mcpClient.connectAll(endpoints);
+
+    for (const def of this.mcpClient.getAllToolDefinitions()) {
+      index.set(def.name, {
+        source: "mcp",
+        mcpToolName: def.name,
+        def,
+      });
+    }
+  }
+
+  // ----------------- ToolProvider (no tools of our own) --------------------
+
+  getAgentTools(): AgentToolDefinition[] {
+    return [];
+  }
+
+  async executeAgentTool(): Promise<unknown> {
+    throw new Error("AgentsPlugin does not expose executeAgentTool directly");
+  }
+
+  // ----------------- Route mounting and handlers ---------------------------
+
+  private mountInvocationsRoute() {
+    if (!this.context) return;
+    this.context.addRoute(
+      "post",
+      "/invocations",
+      (req: express.Request, res: express.Response) => {
+        this._handleInvocations(req, res);
+      },
+    );
+  }
+
+  injectRoutes(router: IAppRouter) {
+    this.route(router, {
+      name: "chat",
+      method: "post",
+      path: "/chat",
+      handler: async (req, res) => this._handleChat(req, res),
+    });
+    this.route(router, {
+      name: "cancel",
+      method: "post",
+      path: "/cancel",
+      handler: async (req, res) => this._handleCancel(req, res),
+    });
+    this.route(router, {
+      name: "approve",
+      method: "post",
+      path: "/approve",
+      handler: async (req, res) => this._handleApprove(req, res),
+    });
+    this.route(router, {
+      name: "threads",
+      method: "get",
+      path: "/threads",
+      handler: async (req, res) => this._handleListThreads(req, res),
+    });
+    this.route(router, {
+      name: "thread",
+      method: "get",
+      path: "/threads/:threadId",
+      handler: async (req, res) => this._handleGetThread(req, res),
+    });
+    this.route(router, {
+      name: "deleteThread",
+      method: "delete",
+      path: "/threads/:threadId",
+      handler: async (req, res) => this._handleDeleteThread(req, res),
+    });
+    this.route(router, {
+      name: "info",
+      method: "get",
+      path: "/info",
+      handler: async (_req, res) => {
+        res.json({
+          agents: Array.from(this.agents.keys()),
+          defaultAgent: this.defaultAgentName,
+        });
+      },
+    });
+  }
+
+  clientConfig(): Record<string, unknown> {
+    return {
+      agents: Array.from(this.agents.keys()),
+      defaultAgent: this.defaultAgentName,
+    };
+  }
+
+  private async _handleChat(req: express.Request, res: express.Response) {
+    const parsed = chatRequestSchema.safeParse(req.body);
+    if (!parsed.success) {
+      res.status(400).json({
+        error: "Invalid request",
+        details: parsed.error.flatten().fieldErrors,
+      });
+      return;
+    }
+    const { message, threadId, agent: agentName } = parsed.data;
+
+    const registered = this.resolveAgent(agentName);
+    if (!registered) {
+      res.status(400).json({
+        error: agentName
+          ? `Agent "${agentName}" not found`
+          : "No agent registered",
+      });
+      return;
+    }
+
+    const userId = this.resolveUserId(req);
+
+    // Reject early (before allocating a thread) when the user is already at
+    // their concurrent-stream limit. Prevents a misbehaving client from
+    // churning thread rows while being denied elsewhere.
+    const limits = this.resolvedLimits;
+    if (this.countUserStreams(userId) >= limits.maxConcurrentStreamsPerUser) {
+      res.setHeader("Retry-After", "5");
+      res.status(429).json({
+        error: `Too many concurrent streams for this user (limit ${limits.maxConcurrentStreamsPerUser}). Wait for an existing stream to complete before starting another.`,
+      });
+      return;
+    }
+
+    let thread = threadId ? await this.threadStore.get(threadId, userId) : null;
+    if (threadId && !thread) {
+      res.status(404).json({ error: `Thread ${threadId} not found` });
+      return;
+    }
+    if (!thread) {
+      thread = await this.threadStore.create(userId);
+    }
+
+    const userMessage: Message = {
+      id: randomUUID(),
+      role: "user",
+      content: message,
+      createdAt: new Date(),
+    };
+    await this.threadStore.addMessage(thread.id, userId, userMessage);
+    return this._streamAgent(req, res, registered, thread, userId);
+  }
+
+  private async _handleInvocations(
+    req: express.Request,
+    res: express.Response,
+  ) {
+    const parsed = invocationsRequestSchema.safeParse(req.body);
+    if (!parsed.success) {
+      res.status(400).json({
+        error: "Invalid request",
+        details: parsed.error.flatten().fieldErrors,
+      });
+      return;
+    }
+    const { input } = parsed.data;
+    const registered = this.resolveAgent();
+    if (!registered) {
+      res.status(400).json({ error: "No agent registered" });
+      return;
+    }
+    const userId = this.resolveUserId(req);
+    const thread = await this.threadStore.create(userId);
+
+    if (typeof input === "string") {
+      await this.threadStore.addMessage(thread.id, userId, {
+        id: randomUUID(),
+        role: "user",
+        content: input,
+        createdAt: new Date(),
+      });
+    } else {
+      for (const item of input) {
+        const role = (item.role ?? "user") as Message["role"];
+        const content =
+          typeof item.content === "string"
+            ? item.content
+            : JSON.stringify(item.content ?? "");
+        if (!content) continue;
+        await this.threadStore.addMessage(thread.id, userId, {
+          id: randomUUID(),
+          role,
+          content,
+          createdAt: new Date(),
+        });
+      }
+    }
+
+    return this._streamAgent(req, res, registered, thread, userId);
+  }
+
+  private async _streamAgent(
+    req: express.Request,
+    res: express.Response,
+    registered: RegisteredAgent,
+    thread: Thread,
+    userId: string,
+  ): Promise<void> {
+    const abortController = new AbortController();
+    const signal = abortController.signal;
+    const requestId = randomUUID();
+    this.activeStreams.set(requestId, { controller: abortController, userId });
+
+    const tools = Array.from(registered.toolIndex.values()).map((e) => e.def);
+    const approvalPolicy = this.resolvedApprovalPolicy;
+    const limits = this.resolvedLimits;
+    const outboundEvents = new EventChannel<ResponseStreamEvent>();
+    const translator = new AgentEventTranslator();
+    // Per-run tool-call budget (shared across the top-level adapter and any
+    // sub-agents it delegates to). Counted pre-dispatch so a prompt-injected
+    // agent cannot drain the budget silently via denied calls.
+    let toolCallsUsed = 0;
+
+    const executeTool = async (
+      name: string,
+      args: unknown,
+    ): Promise<unknown> => {
+      if (toolCallsUsed >= limits.maxToolCalls) {
+        abortController.abort(
+          new Error(
+            `Tool-call budget exhausted (limit ${limits.maxToolCalls}).`,
+          ),
+        );
+        throw new Error(
+          `Tool-call budget exhausted (limit ${limits.maxToolCalls}). Raise agents({ limits: { maxToolCalls } }) or review the agent's tool-selection logic.`,
+        );
+      }
+      toolCallsUsed++;
+
+      const entry = registered.toolIndex.get(name);
+      if (!entry) throw new Error(`Unknown tool: ${name}`);
+
+      if (
+        approvalPolicy.requireForDestructive &&
+        entry.def.annotations?.destructive === true
+      ) {
+        const approvalId = randomUUID();
+        for (const ev of translator.translate({
+          type: "approval_pending",
+          approvalId,
+          streamId: requestId,
+          toolName: name,
+          args,
+          annotations: entry.def.annotations,
+        })) {
+          outboundEvents.push(ev);
+        }
+        const decision = await this.approvalGate.wait({
+          approvalId,
+          streamId: requestId,
+          userId,
+          timeoutMs: approvalPolicy.timeoutMs,
+        });
+        if (decision === "deny") {
+          return `Tool execution denied by user approval gate (tool: ${name}).`;
+        }
+      }
+
+      let result: unknown;
+      if (entry.source === "toolkit") {
+        if (!this.context) {
+          throw new Error(
+            "Plugin tool execution requires PluginContext; this should never happen through createApp",
+          );
+        }
+        result = await this.context.executeTool(
+          req,
+          entry.pluginName,
+          entry.localName,
+          args,
+          signal,
+        );
+      } else if (entry.source === "function") {
+        result = await entry.functionTool.execute(
+          args as Record<string, unknown>,
+        );
+      } else if (entry.source === "mcp") {
+        if (!this.mcpClient) throw new Error("MCP client not connected");
+        const oboToken = req.headers["x-forwarded-access-token"];
+        const mcpAuth =
+          typeof oboToken === "string"
+            ? { Authorization: `Bearer ${oboToken}` }
+            : undefined;
+        result = await this.mcpClient.callTool(
+          entry.mcpToolName,
+          args,
+          mcpAuth,
+        );
+      } else if (entry.source === "subagent") {
+        const childAgent = this.agents.get(entry.agentName);
+        if (!childAgent)
+          throw new Error(`Sub-agent not found: ${entry.agentName}`);
+        result = await this.runSubAgent(req, childAgent, args, signal, 1);
+      }
+
+      // A `void` / `undefined` return is a legitimate tool outcome (e.g., a
+      // "send notification" side-effecting tool). Return an empty string so
+      // the LLM sees a successful-but-empty result rather than a bogus
+      // "execution failed" error.
+      if (result === undefined) {
+        return "";
+      }
+      const MAX = 50_000;
+      const serialized =
+        typeof result === "string" ? result : JSON.stringify(result);
+      if (serialized.length > MAX) {
+        return `${serialized.slice(0, MAX)}\n\n[Result truncated: ${serialized.length} chars exceeds ${MAX} limit]`;
+      }
+      return result;
+    };
+
+    // Drive the adapter and the approval-event side-channel concurrently.
+    // Outbound events from both sources flow through `outboundEvents`; the
+    // generator below drains the channel in order. executeTool pushes
+    // approval-pending events into the same channel before awaiting the gate.
+    const driver = (async () => {
+      try {
+        for (const evt of translator.translate({
+          type: "metadata",
+          data: { threadId: thread.id },
+        })) {
+          outboundEvents.push(evt);
+        }
+
+        const pluginNames = this.context
+          ? this.context
+              .getPluginNames()
+              .filter((n) => n !== this.name && n !== "server")
+          : [];
+        const fullPrompt = composePromptForAgent(
+          registered,
+          this.config.baseSystemPrompt,
+          {
+            agentName: registered.name,
+            pluginNames,
+            toolNames: tools.map((t) => t.name),
+          },
+        );
+
+        const messagesWithSystem: Message[] = [
+          {
+            id: "system",
+            role: "system",
+            content: fullPrompt,
+            createdAt: new Date(),
+          },
+          ...thread.messages,
+        ];
+
+        const stream = registered.adapter.run(
+          {
+            messages: messagesWithSystem,
+            tools,
+            threadId: thread.id,
+            signal,
+          },
+          { executeTool, signal },
+        );
+
+        // Accumulate assistant output from BOTH streaming and non-streaming
+        // adapters. Delta-based adapters (Databricks, Vercel AI) emit
+        // `message_delta` chunks that we concatenate; adapters that yield a
+        // single final assistant message (e.g. LangChain's `on_chain_end`
+        // path) emit a `message` event whose content replaces whatever
+        // deltas already arrived. Without the `message` branch, multi-turn
+        // LangChain conversations silently dropped the assistant turn from
+        // thread history.
+        let fullContent = "";
+        for await (const event of stream) {
+          if (signal.aborted) break;
+          if (event.type === "message_delta") {
+            fullContent += event.content;
+          } else if (event.type === "message") {
+            fullContent = event.content;
+          }
+          for (const translated of translator.translate(event)) {
+            outboundEvents.push(translated);
+          }
+        }
+
+        if (fullContent) {
+          await this.threadStore.addMessage(thread.id, userId, {
+            id: randomUUID(),
+            role: "assistant",
+            content: fullContent,
+            createdAt: new Date(),
+          });
+        }
+
+        for (const evt of translator.finalize()) outboundEvents.push(evt);
+      } catch (error) {
+        if (signal.aborted) {
+          outboundEvents.close();
+          return;
+        }
+        logger.error("Agent chat error: %O", error);
+        outboundEvents.close(error);
+        return;
+      } finally {
+        // Any pending approval gates for this stream are auto-denied so the
+        // adapter can unwind if it was still waiting.
+        this.approvalGate.abortStream(requestId);
+        this.activeStreams.delete(requestId);
+        // Stateless agents (e.g. autocomplete) don't persist history; drop
+        // the thread so `InMemoryThreadStore` doesn't accumulate one record
+        // per request. Swallow delete errors — the stream has already
+        // finished and the client has the response.
+        if (registered.ephemeral) {
+          try {
+            await this.threadStore.delete(thread.id, userId);
+          } catch (err) {
+            logger.warn(
+              "Failed to delete ephemeral thread %s: %O",
+              thread.id,
+              err,
+            );
+          }
+        }
+      }
+      outboundEvents.close();
+    })();
+
+    await this.executeStream<ResponseStreamEvent>(
+      res,
+      async function* () {
+        try {
+          for await (const ev of outboundEvents) {
+            yield ev;
+          }
+        } finally {
+          await driver.catch(() => undefined);
+        }
+      },
+      {
+        ...agentStreamDefaults,
+        stream: { ...agentStreamDefaults.stream, streamId: requestId },
+      },
+    );
+  }
+
+  /**
+   * Runs a sub-agent in response to an `agent-<key>` tool call. Returns the
+   * concatenated text output to hand back to the parent adapter as the tool
+   * result.
+   *
+   * `depth` starts at 1 for a top-level sub-agent invocation (i.e. the
+   * outer `_streamAgent` calls `runSubAgent(..., 1)`) and increments on
+   * each nested `runSubAgent` call. Depths exceeding
+   * `limits.maxSubAgentDepth` are rejected before any adapter work.
+   */
+  private async runSubAgent(
+    req: express.Request,
+    child: RegisteredAgent,
+    args: unknown,
+    signal: AbortSignal,
+    depth: number,
+  ): Promise<string> {
+    const limits = this.resolvedLimits;
+    if (depth > limits.maxSubAgentDepth) {
+      throw new Error(
+        `Sub-agent depth exceeded (limit ${limits.maxSubAgentDepth}). ` +
+          `Raise agents({ limits: { maxSubAgentDepth } }) or break the delegation cycle.`,
+      );
+    }
+
+    const input =
+      typeof args === "object" &&
+      args !== null &&
+      typeof (args as { input?: unknown }).input === "string"
+        ? (args as { input: string }).input
+        : JSON.stringify(args);
+    const childTools = Array.from(child.toolIndex.values()).map((e) => e.def);
+
+    const childExecute = async (
+      name: string,
+      childArgs: unknown,
+    ): Promise<unknown> => {
+      const entry = child.toolIndex.get(name);
+      if (!entry) throw new Error(`Unknown tool in sub-agent: ${name}`);
+      if (entry.source === "toolkit" && this.context) {
+        return this.context.executeTool(
+          req,
+          entry.pluginName,
+          entry.localName,
+          childArgs,
+          signal,
+        );
+      }
+      if (entry.source === "function") {
+        return entry.functionTool.execute(childArgs as Record<string, unknown>);
+      }
+      if (entry.source === "subagent") {
+        const grandchild = this.agents.get(entry.agentName);
+        if (!grandchild)
+          throw new Error(`Sub-agent not found: ${entry.agentName}`);
+        return this.runSubAgent(req, grandchild, childArgs, signal, depth + 1);
+      }
+      if (entry.source === "mcp" && this.mcpClient) {
+        const oboToken = req.headers["x-forwarded-access-token"];
+        const mcpAuth =
+          typeof oboToken === "string"
+            ? { Authorization: `Bearer ${oboToken}` }
+            : undefined;
+        return this.mcpClient.callTool(entry.mcpToolName, childArgs, mcpAuth);
+      }
+      throw new Error(`Unsupported sub-agent tool source: ${entry.source}`);
+    };
+
+    const runContext: AgentRunContext = { executeTool: childExecute, signal };
+
+    const pluginNames = this.context
+      ? this.context
+          .getPluginNames()
+          .filter((n) => n !== this.name && n !== "server")
+      : [];
+    const systemPrompt = composePromptForAgent(
+      child,
+      this.config.baseSystemPrompt,
+      {
+        agentName: child.name,
+        pluginNames,
+        toolNames: childTools.map((t) => t.name),
+      },
+    );
+
+    const messages: Message[] = [
+      {
+        id: "system",
+        role: "system",
+        content: systemPrompt,
+        createdAt: new Date(),
+      },
+      {
+        id: randomUUID(),
+        role: "user",
+        content: input,
+        createdAt: new Date(),
+      },
+    ];
+
+    let output = "";
+    const events: AgentEvent[] = [];
+    for await (const event of child.adapter.run(
+      { messages, tools: childTools, threadId: randomUUID(), signal },
+      runContext,
+    )) {
+      events.push(event);
+      if (event.type === "message_delta") output += event.content;
+      else if (event.type === "message") output = event.content;
+    }
+    return output;
+  }
+
+  private async _handleCancel(req: express.Request, res: express.Response) {
+    const { streamId } = req.body as { streamId?: string };
+    if (!streamId) {
+      res.status(400).json({ error: "streamId is required" });
+      return;
+    }
+    const entry = this.activeStreams.get(streamId);
+    if (!entry) {
+      // Stream is unknown or already completed — idempotent no-op.
+      res.json({ cancelled: true });
+      return;
+    }
+    const userId = this.resolveUserId(req);
+    if (entry.userId !== userId) {
+      res.status(403).json({ error: "Forbidden" });
+      return;
+    }
+    entry.controller.abort("Cancelled by user");
+    this.activeStreams.delete(streamId);
+    this.approvalGate.abortStream(streamId);
+    res.json({ cancelled: true });
+  }
+
+  private async _handleApprove(req: express.Request, res: express.Response) {
+    const parsed = approvalRequestSchema.safeParse(req.body);
+    if (!parsed.success) {
+      res.status(400).json({
+        error: "Invalid request",
+        details: parsed.error.flatten().fieldErrors,
+      });
+      return;
+    }
+    const { streamId, approvalId, decision } = parsed.data;
+
+    const streamEntry = this.activeStreams.get(streamId);
+    if (!streamEntry) {
+      // Stream has already completed or never existed. Return 404 so the UI
+      // knows the approval token is no longer valid (the waiter, if any, has
+      // already been timed out or aborted).
+      res.status(404).json({ error: "Stream not found or already completed" });
+      return;
+    }
+
+    const userId = this.resolveUserId(req);
+    if (streamEntry.userId !== userId) {
+      res.status(403).json({ error: "Forbidden" });
+      return;
+    }
+
+    const result = this.approvalGate.submit({ approvalId, userId, decision });
+    if (!result.ok) {
+      if (result.reason === "forbidden") {
+        res.status(403).json({ error: "Forbidden" });
+        return;
+      }
+      res.status(404).json({ error: "Approval not found or already settled" });
+      return;
+    }
+
+    res.json({ decision });
+  }
+
+  private async _handleListThreads(
+    req: express.Request,
+    res: express.Response,
+  ) {
+    const userId = this.resolveUserId(req);
+    const threads = await this.threadStore.list(userId);
+    res.json({ threads });
+  }
+
+  private async _handleGetThread(req: express.Request, res: express.Response) {
+    const userId = this.resolveUserId(req);
+    const thread = await this.threadStore.get(req.params.threadId, userId);
+    if (!thread) {
+      res.status(404).json({ error: "Thread not found" });
+      return;
+    }
+    res.json(thread);
+  }
+
+  private async _handleDeleteThread(
+    req: express.Request,
+    res: express.Response,
+  ) {
+    const userId = this.resolveUserId(req);
+    const deleted = await this.threadStore.delete(req.params.threadId, userId);
+    if (!deleted) {
+      res.status(404).json({ error: "Thread not found" });
+      return;
+    }
+    res.json({ deleted: true });
+  }
+
+  private resolveAgent(name?: string): RegisteredAgent | null {
+    if (name) return this.agents.get(name) ?? null;
+    if (this.defaultAgentName) {
+      return this.agents.get(this.defaultAgentName) ?? null;
+    }
+    const first = this.agents.values().next();
+    return first.done ? null : first.value;
+  }
+
+  private printRegistry(): void {
+    if (this.agents.size === 0) return;
+    console.log("");
+    console.log(`  ${pc.bold("Agents")} ${pc.dim(`(${this.agents.size})`)}`);
+    console.log(`  ${pc.dim("─".repeat(60))}`);
+    for (const [name, reg] of this.agents) {
+      const tools = reg.toolIndex.size;
+      const marker = name === this.defaultAgentName ? pc.green("●") : " ";
+      console.log(
+        `  ${marker} ${pc.bold(name.padEnd(24))} ${pc.dim(`${tools} tools`)}`,
+      );
+    }
+    console.log(`  ${pc.dim("─".repeat(60))}`);
+    console.log("");
+  }
+
+  async shutdown(): Promise<void> {
+    this.approvalGate.abortAll();
+    if (this.mcpClient) {
+      await this.mcpClient.close();
+      this.mcpClient = null;
+    }
+  }
+
+  exports() {
+    return {
+      register: (name: string, def: AgentDefinition) =>
+        this.registerCodeAgent(name, def),
+      list: () => Array.from(this.agents.keys()),
+      get: (name: string) => this.agents.get(name) ?? null,
+      reload: () => this.reload(),
+      getDefault: () => this.defaultAgentName,
+      getThreads: (userId: string) => this.threadStore.list(userId),
+    };
+  }
+
+  private async registerCodeAgent(
+    name: string,
+    def: AgentDefinition,
+  ): Promise<void> {
+    const registered = await this.buildRegisteredAgent(name, def, {
+      origin: "code",
+    });
+    this.agents.set(name, registered);
+    if (!this.defaultAgentName) this.defaultAgentName = name;
+  }
+}
+
+function normalizeAutoInherit(value: AgentsPluginConfig["autoInheritTools"]): {
+  file: boolean;
+  code: boolean;
+} {
+  // Default is opt-out for both origins. A markdown agent or code-defined
+  // agent with no declared `tools:` gets an empty tool index unless the
+  // developer explicitly flips `autoInheritTools` on. Even then, only tools
+  // whose plugin author marked `autoInheritable: true` are spread — see
+  // `applyAutoInherit` for the filter.
+  if (value === undefined) return { file: false, code: false };
+  if (typeof value === "boolean") return { file: value, code: value };
+  return { file: value.file ?? false, code: value.code ?? false };
+}
+
+function composePromptForAgent(
+  registered: RegisteredAgent,
+  pluginLevel: BaseSystemPromptOption | undefined,
+  ctx: PromptContext,
+): string {
+  const perAgent = registered.baseSystemPrompt;
+  const resolved = perAgent !== undefined ? perAgent : pluginLevel;
+
+  let base = "";
+  if (resolved === false) {
+    base = "";
+  } else if (typeof resolved === "string") {
+    base = resolved;
+  } else if (typeof resolved === "function") {
+    base = resolved(ctx);
+  } else {
+    base = buildBaseSystemPrompt(ctx);
+  }
+
+  return composeSystemPrompt(base, registered.instructions);
+}
+
+/**
+ * Plugin factory for the agents plugin. Reads `config/agents/<id>/agent.md` by default,
+ * resolves toolkits/tools from registered plugins, exposes `appkit.agents.*`
+ * runtime API and mounts `/invocations`.
+ *
+ * @example
+ * ```ts
+ * import { agents, analytics, createApp, server } from "@databricks/appkit";
+ *
+ * await createApp({
+ *   plugins: [server(), analytics(), agents()],
+ * });
+ * ```
+ */
+export const agents = toPlugin(AgentsPlugin);
diff --git a/packages/appkit/src/plugins/agents/defaults.ts b/packages/appkit/src/plugins/agents/defaults.ts
new file mode 100644
index 00000000..4da11bef
--- /dev/null
+++ b/packages/appkit/src/plugins/agents/defaults.ts
@@ -0,0 +1,12 @@
+import type { StreamExecutionSettings } from "shared";
+
+export const agentStreamDefaults: StreamExecutionSettings = {
+  default: {
+    cache: { enabled: false },
+    retry: { enabled: false },
+    timeout: 300_000,
+  },
+  stream: {
+    bufferSize: 200,
+  },
+};
diff --git a/packages/appkit/src/plugins/agents/event-channel.ts b/packages/appkit/src/plugins/agents/event-channel.ts
new file mode 100644
index 00000000..c5b60463
--- /dev/null
+++ b/packages/appkit/src/plugins/agents/event-channel.ts
@@ -0,0 +1,70 @@
+/**
+ * Single-producer/single-consumer async queue used by the agents plugin to
+ * merge streams of SSE events from two concurrent sources: the adapter's
+ * `run()` generator, and out-of-band events emitted by `executeTool` (e.g.
+ * human-approval requests).
+ *
+ * The consumer drains the channel as an async iterable; the producer pushes
+ * events synchronously and closes the channel when the source has completed
+ * or errored.
+ */
+interface Waiter<T> {
+  resolve: (value: IteratorResult<T>) => void;
+  reject: (error: unknown) => void;
+}
+
+export class EventChannel<T> {
+  private queue: T[] = [];
+  private waiters: Array<Waiter<T>> = [];
+  private closed = false;
+  private error: unknown = undefined;
+
+  /** Synchronously enqueue an event. Safe to call from non-async contexts. */
+  push(value: T): void {
+    if (this.closed) return;
+    const waiter = this.waiters.shift();
+    if (waiter) {
+      waiter.resolve({ value, done: false });
+    } else {
+      this.queue.push(value);
+    }
+  }
+
+  /**
+   * Close the channel. Any pending `next()` calls resolve with `done: true`.
+   * If `error` is supplied, pending `next()` calls reject with it and future
+   * calls do the same.
+   */
+  close(error?: unknown): void {
+    if (this.closed) return;
+    this.closed = true;
+    this.error = error;
+    while (this.waiters.length > 0) {
+      const waiter = this.waiters.shift();
+      if (!waiter) break;
+      if (error) {
+        waiter.reject(error);
+      } else {
+        waiter.resolve({ value: undefined as never, done: true });
+      }
+    }
+  }
+
+  [Symbol.asyncIterator](): AsyncIterator<T> {
+    return {
+      next: (): Promise<IteratorResult<T>> => {
+        if (this.queue.length > 0) {
+          const value = this.queue.shift() as T;
+          return Promise.resolve({ value, done: false });
+        }
+        if (this.closed) {
+          if (this.error) return Promise.reject(this.error);
+          return Promise.resolve({ value: undefined as never, done: true });
+        }
+        return new Promise((resolve, reject) => {
+          this.waiters.push({ resolve, reject });
+        });
+      },
+    };
+  }
+}
diff --git a/packages/appkit/src/plugins/agents/event-translator.ts b/packages/appkit/src/plugins/agents/event-translator.ts
new file mode 100644
index 00000000..54d749fb
--- /dev/null
+++ b/packages/appkit/src/plugins/agents/event-translator.ts
@@ -0,0 +1,291 @@
+import { randomUUID } from "node:crypto";
+import type {
+  AgentEvent,
+  ResponseFunctionCallOutput,
+  ResponseFunctionToolCall,
+  ResponseOutputMessage,
+  ResponseStreamEvent,
+} from "shared";
+
+/**
+ * Translates internal `AgentEvent` stream into Responses API SSE events.
+ *
+ * Stateful: one instance per streaming request. Tracks sequence numbers and
+ * allocates `output_index` strictly monotonically — each emitted output
+ * item (message, function call, function call output) claims the next
+ * available index and, once claimed, never reuses an earlier one. This is a
+ * Responses-API contract that OpenAI's own SDK parsers enforce.
+ *
+ * A message is opened lazily on the first `message_delta` or `message`
+ * event. If a `tool_call` or `tool_result` arrives while a message is open
+ * (common ReAct flow: partial text → tool call → more text), the open
+ * message is closed (`response.output_item.done`) BEFORE the tool item is
+ * added, so subsequent text resumes as a new message item at a strictly
+ * later index.
+ */
+export class AgentEventTranslator {
+  private seqNum = 0;
+  private nextOutputIndex = 0;
+  private currentMessage: {
+    id: string;
+    text: string;
+    outputIndex: number;
+  } | null = null;
+  private finalized = false;
+
+  translate(event: AgentEvent): ResponseStreamEvent[] {
+    switch (event.type) {
+      case "message_delta":
+        return this.handleMessageDelta(event.content);
+      case "message":
+        return this.handleFullMessage(event.content);
+      case "tool_call":
+        return this.handleToolCall(event.callId, event.name, event.args);
+      case "tool_result":
+        return this.handleToolResult(event.callId, event.result, event.error);
+      case "thinking":
+        return [
+          {
+            type: "appkit.thinking",
+            content: event.content,
+            sequence_number: this.seqNum++,
+          },
+        ];
+      case "metadata":
+        return [
+          {
+            type: "appkit.metadata",
+            data: event.data,
+            sequence_number: this.seqNum++,
+          },
+        ];
+      case "approval_pending":
+        return [
+          {
+            type: "appkit.approval_pending",
+            approval_id: event.approvalId,
+            stream_id: event.streamId,
+            tool_name: event.toolName,
+            args: event.args,
+            annotations: event.annotations,
+            sequence_number: this.seqNum++,
+          },
+        ];
+      case "status":
+        return this.handleStatus(event.status, event.error);
+    }
+  }
+
+  finalize(): ResponseStreamEvent[] {
+    if (this.finalized) return [];
+    this.finalized = true;
+
+    const events: ResponseStreamEvent[] = [];
+    const closeEvent = this.closeCurrentMessage();
+    if (closeEvent) events.push(closeEvent);
+
+    events.push({
+      type: "response.completed",
+      sequence_number: this.seqNum++,
+      response: {},
+    });
+
+    return events;
+  }
+
+  private handleMessageDelta(content: string): ResponseStreamEvent[] {
+    const events: ResponseStreamEvent[] = [];
+
+    if (!this.currentMessage) {
+      const id = `msg_${randomUUID()}`;
+      const outputIndex = this.nextOutputIndex++;
+      this.currentMessage = { id, text: content, outputIndex };
+      const item: ResponseOutputMessage = {
+        type: "message",
+        id,
+        status: "in_progress",
+        role: "assistant",
+        content: [],
+      };
+      events.push({
+        type: "response.output_item.added",
+        output_index: outputIndex,
+        item,
+        sequence_number: this.seqNum++,
+      });
+    } else {
+      this.currentMessage.text += content;
+    }
+
+    events.push({
+      type: "response.output_text.delta",
+      item_id: this.currentMessage.id,
+      output_index: this.currentMessage.outputIndex,
+      content_index: 0,
+      delta: content,
+      sequence_number: this.seqNum++,
+    });
+
+    return events;
+  }
+
+  private handleFullMessage(content: string): ResponseStreamEvent[] {
+    const events: ResponseStreamEvent[] = [];
+
+    if (!this.currentMessage) {
+      // No prior deltas — open and immediately close.
+      const id = `msg_${randomUUID()}`;
+      const outputIndex = this.nextOutputIndex++;
+      this.currentMessage = { id, text: content, outputIndex };
+      const addedItem: ResponseOutputMessage = {
+        type: "message",
+        id,
+        status: "in_progress",
+        role: "assistant",
+        content: [],
+      };
+      events.push({
+        type: "response.output_item.added",
+        output_index: outputIndex,
+        item: addedItem,
+        sequence_number: this.seqNum++,
+      });
+    } else {
+      // Deltas already opened the item; `message` overrides the accumulated
+      // text (per adapter contract) and closes it.
+      this.currentMessage.text = content;
+    }
+
+    const closeEvent = this.closeCurrentMessage();
+    if (closeEvent) events.push(closeEvent);
+    return events;
+  }
+
+  private handleToolCall(
+    callId: string,
+    name: string,
+    args: unknown,
+  ): ResponseStreamEvent[] {
+    const events: ResponseStreamEvent[] = [];
+    const closeEvent = this.closeCurrentMessage();
+    if (closeEvent) events.push(closeEvent);
+
+    const outputIndex = this.nextOutputIndex++;
+    const item: ResponseFunctionToolCall = {
+      type: "function_call",
+      id: `fc_${randomUUID()}`,
+      call_id: callId,
+      name,
+      arguments: typeof args === "string" ? args : JSON.stringify(args),
+    };
+
+    events.push(
+      {
+        type: "response.output_item.added",
+        output_index: outputIndex,
+        item,
+        sequence_number: this.seqNum++,
+      },
+      {
+        type: "response.output_item.done",
+        output_index: outputIndex,
+        item,
+        sequence_number: this.seqNum++,
+      },
+    );
+    return events;
+  }
+
+  private handleToolResult(
+    callId: string,
+    result: unknown,
+    error?: string,
+  ): ResponseStreamEvent[] {
+    const events: ResponseStreamEvent[] = [];
+    const closeEvent = this.closeCurrentMessage();
+    if (closeEvent) events.push(closeEvent);
+
+    const outputIndex = this.nextOutputIndex++;
+    // Coalesce `undefined` → "" so the wire shape is always a string (the
+    // Responses API contract). Non-string results are JSON-serialised.
+    let output: string;
+    if (error !== undefined) {
+      output = error;
+    } else if (typeof result === "string") {
+      output = result;
+    } else if (result === undefined) {
+      output = "";
+    } else {
+      output = JSON.stringify(result);
+    }
+    const item: ResponseFunctionCallOutput = {
+      type: "function_call_output",
+      id: `fc_output_${randomUUID()}`,
+      call_id: callId,
+      output,
+    };
+
+    events.push(
+      {
+        type: "response.output_item.added",
+        output_index: outputIndex,
+        item,
+        sequence_number: this.seqNum++,
+      },
+      {
+        type: "response.output_item.done",
+        output_index: outputIndex,
+        item,
+        sequence_number: this.seqNum++,
+      },
+    );
+    return events;
+  }
+
+  /**
+   * Emit an `response.output_item.done` for the currently-open message, if
+   * any, and clear the state. Returns the event to the caller so it can be
+   * pushed at the right moment in the sequence. Returns `null` when there
+   * is no open message.
+   */
+  private closeCurrentMessage(): ResponseStreamEvent | null {
+    if (!this.currentMessage) return null;
+    const { id, text, outputIndex } = this.currentMessage;
+    this.currentMessage = null;
+    const doneItem: ResponseOutputMessage = {
+      type: "message",
+      id,
+      status: "completed",
+      role: "assistant",
+      content: [{ type: "output_text", text }],
+    };
+    return {
+      type: "response.output_item.done",
+      output_index: outputIndex,
+      item: doneItem,
+      sequence_number: this.seqNum++,
+    };
+  }
+
+  private handleStatus(status: string, error?: string): ResponseStreamEvent[] {
+    if (status === "error") {
+      return [
+        {
+          type: "error",
+          error: error ?? "Unknown error",
+          sequence_number: this.seqNum++,
+        },
+        {
+          type: "response.failed",
+          sequence_number: this.seqNum++,
+        },
+      ];
+    }
+
+    if (status === "complete") {
+      return this.finalize();
+    }
+
+    return [];
+  }
+}
diff --git a/packages/appkit/src/plugins/agents/index.ts b/packages/appkit/src/plugins/agents/index.ts
new file mode 100644
index 00000000..377a8776
--- /dev/null
+++ b/packages/appkit/src/plugins/agents/index.ts
@@ -0,0 +1,23 @@
+export { AgentsPlugin, agents } from "./agents";
+export { buildToolkitEntries } from "./build-toolkit";
+export {
+  agentIdFromMarkdownPath,
+  type LoadContext,
+  type LoadResult,
+  loadAgentFromFile,
+  loadAgentsFromDir,
+  parseFrontmatter,
+} from "./load-agents";
+export {
+  type AgentDefinition,
+  type AgentsPluginConfig,
+  type AgentTool,
+  type AutoInheritToolsConfig,
+  type BaseSystemPromptOption,
+  isToolkitEntry,
+  type PromptContext,
+  type RegisteredAgent,
+  type ResolvedToolEntry,
+  type ToolkitEntry,
+  type ToolkitOptions,
+} from "./types";
diff --git a/packages/appkit/src/plugins/agents/load-agents.ts b/packages/appkit/src/plugins/agents/load-agents.ts
new file mode 100644
index 00000000..5b2999ca
--- /dev/null
+++ b/packages/appkit/src/plugins/agents/load-agents.ts
@@ -0,0 +1,407 @@
+import fs from "node:fs";
+import path from "node:path";
+import yaml from "js-yaml";
+import type { AgentAdapter } from "shared";
+import { createLogger } from "../../logging/logger";
+import type {
+  AgentDefinition,
+  AgentTool,
+  BaseSystemPromptOption,
+  ToolkitEntry,
+  ToolkitOptions,
+} from "./types";
+import { isToolkitEntry } from "./types";
+
+const logger = createLogger("agents:loader");
+
+interface ToolkitProvider {
+  toolkit: (opts?: ToolkitOptions) => Record<string, unknown>;
+}
+
+export interface LoadContext {
+  /** Default model when frontmatter has no `endpoint` and the def has no `model`. */
+  defaultModel?: AgentAdapter | Promise<AgentAdapter> | string;
+  /** Ambient tool library referenced by frontmatter `tools: [key1, key2]`. */
+  availableTools?: Record<string, AgentTool>;
+  /** Registered plugin toolkits referenced by frontmatter `toolkits: [...]`. */
+  plugins?: Map<string, ToolkitProvider>;
+  /**
+   * Code-defined agents contributed by `agents({ agents: { ... } })`. The
+   * directory loader resolves `agents:` frontmatter references against
+   * these alongside sibling markdown files, so a markdown parent can
+   * delegate to a code-defined child. Code-defined names win on collision
+   * with markdown names, matching the plugin's top-level merge precedence.
+   */
+  codeAgents?: Record<string, AgentDefinition>;
+}
+
+export interface LoadResult {
+  /** Agent definitions keyed by agent id (directory name under `dir`). */
+  defs: Record<string, AgentDefinition>;
+  /** First agent with `default: true` frontmatter (sorted id order), or `null`. */
+  defaultAgent: string | null;
+}
+
+interface Frontmatter {
+  endpoint?: string;
+  model?: string;
+  toolkits?: ToolkitSpec[];
+  tools?: string[];
+  /**
+   * Other agent ids to expose as sub-agents. Each becomes an `agent-<id>`
+   * tool at runtime. Resolution happens at directory-load time in
+   * {@link loadAgentsFromDir}; the single-file {@link loadAgentFromFile} path
+   * rejects non-empty values since there are no siblings to resolve against.
+   */
+  agents?: string[];
+  maxSteps?: number;
+  maxTokens?: number;
+  default?: boolean;
+  baseSystemPrompt?: false | string;
+  ephemeral?: boolean;
+}
+
+type ToolkitSpec = string | { [pluginName: string]: ToolkitOptions | string[] };
+
+/**
+ * Derives the logical agent id from a markdown path. When the file is named
+ * `agent.md`, the id is the parent directory name (folder-based layout);
+ * otherwise the id is the file stem (e.g. legacy single-file paths).
+ */
+export function agentIdFromMarkdownPath(filePath: string): string {
+  const normalized = path.normalize(filePath);
+  const base = path.basename(normalized);
+  const parent = path.basename(path.dirname(normalized));
+  if (base === "agent.md" && parent && parent !== "." && parent !== "..") {
+    return parent;
+  }
+  return path.basename(normalized, ".md");
+}
+
+const ALLOWED_KEYS = new Set([
+  "endpoint",
+  "model",
+  "toolkits",
+  "tools",
+  "agents",
+  "maxSteps",
+  "maxTokens",
+  "default",
+  "baseSystemPrompt",
+  "ephemeral",
+]);
+
+/**
+ * Loads a single markdown agent file and resolves its frontmatter against
+ * registered plugin toolkits + ambient tool library.
+ *
+ * Rejects non-empty `agents:` frontmatter because single-file loads have
+ * no siblings to resolve sub-agent references against — callers must use
+ * {@link loadAgentsFromDir} when markdown agents delegate to one another.
+ */
+export async function loadAgentFromFile(
+  filePath: string,
+  ctx: LoadContext,
+): Promise<AgentDefinition> {
+  const raw = fs.readFileSync(filePath, "utf-8");
+  const name = agentIdFromMarkdownPath(filePath);
+  const { data } = parseFrontmatter(raw, filePath);
+  if (Array.isArray(data?.agents) && data.agents.length > 0) {
+    throw new Error(
+      `Agent '${name}' (${filePath}) declares 'agents:' in frontmatter, ` +
+        `which requires loadAgentsFromDir to resolve sibling references. ` +
+        `Use loadAgentsFromDir, or wire sub-agents in code via createAgent({ agents: { ... } }).`,
+    );
+  }
+  return buildDefinition(name, raw, filePath, ctx);
+}
+
+/**
+ * Scans a directory for one subdirectory per agent, each containing
+ * `agent.md` (frontmatter + body). Produces an `AgentDefinition` record keyed
+ * by agent id (folder name). Throws on frontmatter errors or unresolved
+ * references. Returns an empty map if the directory does not exist.
+ *
+ * Legacy top-level `*.md` files are rejected with an error — migrate each to
+ * `<id>/agent.md` under a sibling folder named for the agent id.
+ *
+ * Runs in two passes so sub-agent references in frontmatter (`agents: [...]`)
+ * can be resolved regardless of directory iteration order:
+ *
+ * 1. Build every agent's definition from its own `agent.md`.
+ * 2. Walk `agents:` references and wire `def.agents = { child: childDef }`
+ *    by looking them up in the complete map. Dangling names and
+ *    self-references fail loudly; mutual delegation is allowed and bounded
+ *    at runtime by `limits.maxSubAgentDepth`.
+ */
+export async function loadAgentsFromDir(
+  dir: string,
+  ctx: LoadContext,
+): Promise<LoadResult> {
+  if (!fs.existsSync(dir)) {
+    return { defs: {}, defaultAgent: null };
+  }
+
+  const entries = fs.readdirSync(dir, { withFileTypes: true });
+  const orphanMd = entries
+    .filter((e) => e.isFile() && e.name.endsWith(".md"))
+    .map((e) => e.name)
+    .sort();
+
+  if (orphanMd.length > 0) {
+    const hint = orphanMd
+      .map((f) => `${path.basename(f, ".md")}/agent.md`)
+      .join(", ");
+    throw new Error(
+      `Agents directory contains unsupported top-level markdown file(s): ${orphanMd.join(", ")}. ` +
+        `Use one folder per agent with a fixed entry file, e.g. ${hint}.`,
+    );
+  }
+
+  /** Reserved folder name until per-agent skills land; not an agent package. */
+  const RESERVED_DIRS = new Set(["skills"]);
+
+  const agentIds = entries
+    .filter((e) => e.isDirectory())
+    .map((e) => e.name)
+    .filter((name) => !RESERVED_DIRS.has(name))
+    .sort();
+
+  const defs: Record<string, AgentDefinition> = {};
+  const subAgentRefs: Record<string, string[]> = {};
+  let defaultAgent: string | null = null;
+
+  // Pass 1: build every agent's definition; collect sub-agent refs.
+  for (const id of agentIds) {
+    const agentPath = path.join(dir, id, "agent.md");
+    if (!fs.existsSync(agentPath)) {
+      throw new Error(
+        `Agents subdirectory '${path.join(dir, id)}' must contain agent.md.`,
+      );
+    }
+    const raw = fs.readFileSync(agentPath, "utf-8");
+    defs[id] = buildDefinition(id, raw, agentPath, ctx);
+    const { data } = parseFrontmatter(raw, agentPath);
+    if (data?.agents !== undefined) {
+      subAgentRefs[id] = normalizeAgentsFrontmatter(data.agents, id, agentPath);
+    }
+    if (data?.default === true && !defaultAgent) {
+      defaultAgent = id;
+    }
+  }
+
+  // Pass 2: resolve sub-agent references against the complete defs map.
+  // Code-defined agents (ctx.codeAgents) take precedence over markdown ones
+  // with the same name, matching the plugin's top-level merge behaviour.
+  for (const [name, refs] of Object.entries(subAgentRefs)) {
+    if (refs.length === 0) continue;
+    const children: Record<string, AgentDefinition> = {};
+    const missing: string[] = [];
+    for (const ref of refs) {
+      if (ref === name) {
+        throw new Error(
+          `Agent '${name}' (${path.join(dir, name, "agent.md")}) cannot reference itself in 'agents:'.`,
+        );
+      }
+      const sibling = ctx.codeAgents?.[ref] ?? defs[ref];
+      if (!sibling) {
+        missing.push(ref);
+        continue;
+      }
+      children[ref] = sibling;
+    }
+    if (missing.length > 0) {
+      const available =
+        [...Object.keys(ctx.codeAgents ?? {}), ...Object.keys(defs)]
+          .sort()
+          .join(", ") || "<none>";
+      throw new Error(
+        `Agent '${name}' references sub-agent(s) '${missing.join(", ")}' in 'agents:', ` +
+          `but no markdown or code agent(s) with those names exist. ` +
+          `Available: ${available}.`,
+      );
+    }
+    defs[name].agents = children;
+  }
+
+  return { defs, defaultAgent };
+}
+
+/**
+ * Validates that `agents:` frontmatter is an array of non-empty strings and
+ * returns it with duplicates removed. Throws with a clear per-file message
+ * on malformed input rather than silently ignoring.
+ */
+function normalizeAgentsFrontmatter(
+  value: unknown,
+  agentName: string,
+  filePath: string,
+): string[] {
+  if (!Array.isArray(value)) {
+    throw new Error(
+      `Agent '${agentName}' (${filePath}) has invalid 'agents:' frontmatter: ` +
+        `expected an array of sibling agent ids, got ${typeof value}.`,
+    );
+  }
+  const out: string[] = [];
+  const seen = new Set<string>();
+  for (const item of value) {
+    if (typeof item !== "string" || item.trim() === "") {
+      throw new Error(
+        `Agent '${agentName}' (${filePath}) has invalid 'agents:' entry: ` +
+          `expected non-empty string, got ${JSON.stringify(item)}.`,
+      );
+    }
+    if (seen.has(item)) continue;
+    seen.add(item);
+    out.push(item);
+  }
+  return out;
+}
+
+/** Exposed for tests. Parses `--- yaml ---\nbody` and validates frontmatter keys. */
+export function parseFrontmatter(
+  raw: string,
+  sourcePath?: string,
+): { data: Frontmatter | null; content: string } {
+  const match = raw.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n?([\s\S]*)$/);
+  if (!match) {
+    return { data: null, content: raw.trim() };
+  }
+  let parsed: unknown;
+  try {
+    parsed = yaml.load(match[1]);
+  } catch (err) {
+    const src = sourcePath ? ` (${sourcePath})` : "";
+    throw new Error(
+      `Invalid YAML frontmatter${src}: ${err instanceof Error ? err.message : String(err)}`,
+    );
+  }
+  if (parsed === null || parsed === undefined) {
+    return { data: {}, content: match[2].trim() };
+  }
+  if (typeof parsed !== "object" || Array.isArray(parsed)) {
+    const src = sourcePath ? ` (${sourcePath})` : "";
+    throw new Error(`Frontmatter must be a YAML object${src}`);
+  }
+  const data = parsed as Record<string, unknown>;
+  for (const key of Object.keys(data)) {
+    if (!ALLOWED_KEYS.has(key)) {
+      logger.warn(
+        "Ignoring unknown frontmatter key '%s' in %s",
+        key,
+        sourcePath ?? "<inline>",
+      );
+    }
+  }
+  return { data: data as Frontmatter, content: match[2].trim() };
+}
+
+function buildDefinition(
+  name: string,
+  raw: string,
+  filePath: string,
+  ctx: LoadContext,
+): AgentDefinition {
+  const { data, content } = parseFrontmatter(raw, filePath);
+  const fm: Frontmatter = data ?? {};
+
+  const tools = resolveFrontmatterTools(name, fm, filePath, ctx);
+  const model = fm.model ?? fm.endpoint ?? ctx.defaultModel;
+
+  let baseSystemPrompt: BaseSystemPromptOption | undefined;
+  if (fm.baseSystemPrompt === false) baseSystemPrompt = false;
+  else if (typeof fm.baseSystemPrompt === "string")
+    baseSystemPrompt = fm.baseSystemPrompt;
+
+  return {
+    name,
+    instructions: content,
+    model,
+    tools: Object.keys(tools).length > 0 ? tools : undefined,
+    maxSteps: typeof fm.maxSteps === "number" ? fm.maxSteps : undefined,
+    maxTokens: typeof fm.maxTokens === "number" ? fm.maxTokens : undefined,
+    baseSystemPrompt,
+    ephemeral: typeof fm.ephemeral === "boolean" ? fm.ephemeral : undefined,
+  };
+}
+
+function resolveFrontmatterTools(
+  agentName: string,
+  fm: Frontmatter,
+  filePath: string,
+  ctx: LoadContext,
+): Record<string, AgentTool> {
+  const out: Record<string, AgentTool> = {};
+  const pluginIdx = ctx.plugins ?? new Map<string, ToolkitProvider>();
+
+  for (const spec of fm.toolkits ?? []) {
+    const [pluginName, opts] = parseToolkitSpec(spec, filePath, agentName);
+    const provider = pluginIdx.get(pluginName);
+    if (!provider) {
+      throw new Error(
+        `Agent '${agentName}' (${filePath}) references toolkit '${pluginName}', but plugin '${pluginName}' is not registered. Available: ${
+          pluginIdx.size > 0
+            ? Array.from(pluginIdx.keys()).join(", ")
+            : "<none>"
+        }`,
+      );
+    }
+    const entries = provider.toolkit(opts) as Record<string, unknown>;
+    for (const [key, entry] of Object.entries(entries)) {
+      if (!isToolkitEntry(entry)) {
+        throw new Error(
+          `Plugin '${pluginName}'.toolkit() returned a value at key '${key}' that is not a ToolkitEntry`,
+        );
+      }
+      out[key] = entry as ToolkitEntry;
+    }
+  }
+
+  for (const key of fm.tools ?? []) {
+    const tool = ctx.availableTools?.[key];
+    if (!tool) {
+      const available = ctx.availableTools
+        ? Object.keys(ctx.availableTools).join(", ")
+        : "<none>";
+      throw new Error(
+        `Agent '${agentName}' (${filePath}) references tool '${key}', which is not in the agents() plugin's tools field. Available: ${available}`,
+      );
+    }
+    out[key] = tool;
+  }
+
+  return out;
+}
+
+function parseToolkitSpec(
+  spec: ToolkitSpec,
+  filePath: string,
+  agentName: string,
+): [string, ToolkitOptions | undefined] {
+  if (typeof spec === "string") {
+    return [spec, undefined];
+  }
+  if (typeof spec !== "object" || spec === null) {
+    throw new Error(
+      `Agent '${agentName}' (${filePath}) has invalid toolkit entry: ${JSON.stringify(spec)}`,
+    );
+  }
+  const keys = Object.keys(spec);
+  if (keys.length !== 1) {
+    throw new Error(
+      `Agent '${agentName}' (${filePath}) toolkit entry must have exactly one key, got: ${keys.join(", ")}`,
+    );
+  }
+  const pluginName = keys[0];
+  const value = spec[pluginName];
+  if (Array.isArray(value)) {
+    return [pluginName, { only: value }];
+  }
+  if (typeof value === "object" && value !== null) {
+    return [pluginName, value as ToolkitOptions];
+  }
+  throw new Error(
+    `Agent '${agentName}' (${filePath}) toolkit '${pluginName}' options must be an array of tool names or an options object`,
+  );
+}
diff --git a/packages/appkit/src/plugins/agents/manifest.json b/packages/appkit/src/plugins/agents/manifest.json
new file mode 100644
index 00000000..f3986c83
--- /dev/null
+++ b/packages/appkit/src/plugins/agents/manifest.json
@@ -0,0 +1,24 @@
+{
+  "$schema": "https://databricks.github.io/appkit/schemas/plugin-manifest.schema.json",
+  "name": "agents",
+  "displayName": "Agents Plugin",
+  "description": "AI agents driven by markdown configs or code, with auto-tool-discovery from registered plugins",
+  "resources": {
+    "required": [],
+    "optional": [
+      {
+        "type": "serving_endpoint",
+        "alias": "Model Serving (agents)",
+        "resourceKey": "agents-serving-endpoint",
+        "description": "Databricks Model Serving endpoint for agents using workspace-hosted models (`DatabricksAdapter.fromModelServing`). Wire the same endpoint name AppKit reads from `DATABRICKS_AGENT_ENDPOINT` when no per-agent model is configured. Omit when agents use only external adapters.",
+        "permission": "CAN_QUERY",
+        "fields": {
+          "name": {
+            "env": "DATABRICKS_AGENT_ENDPOINT",
+            "description": "Endpoint name passed to Model Serving when agents default to `DatabricksAdapter.fromModelServing()`"
+          }
+        }
+      }
+    ]
+  }
+}
diff --git a/packages/appkit/src/plugins/agents/schemas.ts b/packages/appkit/src/plugins/agents/schemas.ts
new file mode 100644
index 00000000..cea6c6d6
--- /dev/null
+++ b/packages/appkit/src/plugins/agents/schemas.ts
@@ -0,0 +1,69 @@
+import { z } from "zod";
+
+/**
+ * Static body cap for the `message` field on `POST /chat`. 64 000 characters
+ * is well above any legitimate chat turn (~16k tokens at 4 chars/token) and
+ * bounds the per-request cost of appending to `InMemoryThreadStore` without
+ * requiring per-deployment configuration.
+ */
+const MAX_MESSAGE_CHARS = 64_000;
+
+/** Cap applied to `/invocations` when `input` is a raw string. */
+const MAX_INVOCATIONS_INPUT_CHARS = 64_000;
+
+/**
+ * Cap on the number of items accepted in an `/invocations` `input` array
+ * (one element per seeded message). Protects against a single request
+ * seeding hundreds of messages into the thread store.
+ */
+const MAX_INVOCATIONS_INPUT_ITEMS = 100;
+
+/** Per-message `content` size cap (string form). */
+const MAX_INVOCATIONS_ITEM_CHARS = 64_000;
+
+/** Per-message `content` size cap (array form). */
+const MAX_INVOCATIONS_ITEM_ARRAY_ITEMS = 100;
+
+export const chatRequestSchema = z.object({
+  message: z
+    .string()
+    .min(1, "message must not be empty")
+    .max(
+      MAX_MESSAGE_CHARS,
+      `message exceeds the ${MAX_MESSAGE_CHARS}-character limit`,
+    ),
+  threadId: z.string().optional(),
+  agent: z.string().optional(),
+});
+
+const messageItemSchema = z.object({
+  role: z.enum(["user", "assistant", "system"]).optional(),
+  content: z
+    .union([
+      z.string().max(MAX_INVOCATIONS_ITEM_CHARS),
+      z.array(z.any()).max(MAX_INVOCATIONS_ITEM_ARRAY_ITEMS),
+    ])
+    .optional(),
+  type: z.string().optional(),
+});
+
+export const invocationsRequestSchema = z.object({
+  input: z.union([
+    z.string().min(1).max(MAX_INVOCATIONS_INPUT_CHARS),
+    z
+      .array(messageItemSchema)
+      .min(1)
+      .max(
+        MAX_INVOCATIONS_INPUT_ITEMS,
+        `input array exceeds the ${MAX_INVOCATIONS_INPUT_ITEMS}-item limit`,
+      ),
+  ]),
+  stream: z.boolean().optional().default(true),
+  model: z.string().optional(),
+});
+
+export const approvalRequestSchema = z.object({
+  streamId: z.string().min(1, "streamId is required"),
+  approvalId: z.string().min(1, "approvalId is required"),
+  decision: z.enum(["approve", "deny"]),
+});
diff --git a/packages/appkit/src/plugins/agents/system-prompt.ts b/packages/appkit/src/plugins/agents/system-prompt.ts
new file mode 100644
index 00000000..01f3fe9b
--- /dev/null
+++ b/packages/appkit/src/plugins/agents/system-prompt.ts
@@ -0,0 +1,52 @@
+import type { PromptContext } from "./types";
+
+/**
+ * Default base system prompt: product identity, active AppKit plugins, and
+ * tool-agnostic behavior hints.
+ *
+ * Individual tool definitions and JSON Schemas are still sent through the
+ * model's `tools` / function-calling channel — this string is not a second
+ * copy of that list. `ctx.toolNames` is available for custom
+ * `baseSystemPrompt` callbacks; the default text stays short and does not
+ * enumerate tools to avoid drift and token bloat.
+ */
+export function buildBaseSystemPrompt(ctx: PromptContext): string {
+  const { pluginNames } = ctx;
+  const lines: string[] = [
+    "You are an AI assistant running on Databricks AppKit.",
+  ];
+
+  if (pluginNames.length > 0) {
+    lines.push("");
+    lines.push(`Active AppKit plugins: ${pluginNames.join(", ")}`);
+  }
+
+  lines.push("");
+  lines.push("Guidelines:");
+  lines.push(
+    "- Be concise: for large or noisy tool output, summarize what matters and how to go deeper instead of pasting everything.",
+  );
+  lines.push(
+    "- Use each tool as defined: pass required arguments and use the syntax, dialect, or path rules the target system expects (see each tool’s description and schema).",
+  );
+  lines.push(
+    "- If a tool call fails, explain the error in plain language and suggest a fix or next step.",
+  );
+  lines.push(
+    "- Respect tool metadata and app policy: read-only vs destructive tools, user/identity context, and any approval or safety flows the app provides.",
+  );
+
+  return lines.join("\n");
+}
+
+/**
+ * Compose the full system prompt from the base prompt and an optional
+ * per-agent user prompt.
+ */
+export function composeSystemPrompt(
+  basePrompt: string,
+  agentPrompt?: string,
+): string {
+  if (!agentPrompt) return basePrompt;
+  return `${basePrompt}\n\n${agentPrompt}`;
+}
diff --git a/packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts b/packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts
new file mode 100644
index 00000000..747ada48
--- /dev/null
+++ b/packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts
@@ -0,0 +1,367 @@
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+import type {
+  AgentAdapter,
+  AgentInput,
+  AgentRunContext,
+  AgentToolDefinition,
+  ToolProvider,
+} from "shared";
+import { afterEach, beforeEach, describe, expect, test, vi } from "vitest";
+import { z } from "zod";
+import { CacheManager } from "../../../cache";
+// Import the class directly so we can construct it without a createApp
+import { AgentsPlugin } from "../agents";
+import { buildToolkitEntries } from "../build-toolkit";
+import { defineTool, type ToolRegistry } from "../tools/define-tool";
+import type { AgentsPluginConfig, ToolkitEntry } from "../types";
+import { isToolkitEntry } from "../types";
+
+interface FakeContext {
+  providers: Array<{ name: string; provider: ToolProvider }>;
+  getToolProviders(): Array<{ name: string; provider: ToolProvider }>;
+  getPluginNames(): string[];
+  addRoute(): void;
+  executeTool: (
+    req: unknown,
+    pluginName: string,
+    localName: string,
+    args: unknown,
+  ) => Promise<unknown>;
+}
+
+function fakeContext(
+  providers: Array<{ name: string; provider: ToolProvider }>,
+): FakeContext {
+  return {
+    providers,
+    getToolProviders: () => providers,
+    getPluginNames: () => providers.map((p) => p.name),
+    addRoute: vi.fn(),
+    executeTool: vi.fn(async (_req, p, n, args) => ({
+      plugin: p,
+      tool: n,
+      args,
+    })),
+  };
+}
+
+function stubAdapter(): AgentAdapter {
+  return {
+    async *run(_input: AgentInput, _ctx: AgentRunContext) {
+      yield { type: "message_delta", content: "" };
+    },
+  };
+}
+
+function makeToolProvider(
+  pluginName: string,
+  registry: ToolRegistry,
+): ToolProvider & {
+  toolkit: (opts?: unknown) => Record<string, ToolkitEntry>;
+} {
+  return {
+    getAgentTools(): AgentToolDefinition[] {
+      return Object.entries(registry).map(([name, entry]) => ({
+        name,
+        description: entry.description,
+        parameters: { type: "object", properties: {} },
+      }));
+    },
+    async executeAgentTool(name, args) {
+      return { callFrom: pluginName, name, args };
+    },
+    toolkit: (opts) => buildToolkitEntries(pluginName, registry, opts as never),
+  };
+}
+
+let tmpDir: string;
+
+beforeEach(async () => {
+  tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "agents-plugin-"));
+  const storage = {
+    get: vi.fn(),
+    set: vi.fn(),
+    delete: vi.fn(),
+    keys: vi.fn(),
+    healthCheck: vi.fn(async () => true),
+    close: vi.fn(async () => {}),
+  };
+  // biome-ignore lint/suspicious/noExplicitAny: test-only CacheManager wiring
+  await CacheManager.getInstance({ storage: storage as any });
+});
+
+afterEach(() => {
+  fs.rmSync(tmpDir, { recursive: true, force: true });
+});
+
+function instantiate(config: AgentsPluginConfig, ctx?: FakeContext) {
+  const plugin = new AgentsPlugin({ ...config, name: "agent" });
+  plugin.attachContext({ context: ctx as unknown as object });
+  return plugin;
+}
+
+function writeMarkdownAgent(dir: string, id: string, content: string) {
+  const folder = path.join(dir, id);
+  fs.mkdirSync(folder, { recursive: true });
+  fs.writeFileSync(path.join(folder, "agent.md"), content, "utf-8");
+}
+
+describe("AgentsPlugin", () => {
+  test("registers code-defined agents and exposes them via exports", async () => {
+    const plugin = instantiate({
+      dir: false,
+      agents: {
+        support: {
+          instructions: "You help customers.",
+          model: stubAdapter(),
+        },
+      },
+    });
+    await plugin.setup();
+
+    const api = plugin.exports() as {
+      list: () => string[];
+      getDefault: () => string | null;
+    };
+    expect(api.list()).toEqual(["support"]);
+    expect(api.getDefault()).toBe("support");
+  });
+
+  test("loads markdown agents from a directory", async () => {
+    writeMarkdownAgent(
+      tmpDir,
+      "assistant",
+      "---\ndefault: true\n---\nYou are helpful.",
+    );
+    const plugin = instantiate({
+      dir: tmpDir,
+      defaultModel: stubAdapter(),
+    });
+    await plugin.setup();
+
+    const api = plugin.exports() as {
+      list: () => string[];
+      getDefault: () => string | null;
+    };
+    expect(api.list()).toEqual(["assistant"]);
+    expect(api.getDefault()).toBe("assistant");
+  });
+
+  test("code definitions override markdown on key collision", async () => {
+    writeMarkdownAgent(tmpDir, "support", "---\n---\nFrom markdown.");
+    const plugin = instantiate({
+      dir: tmpDir,
+      defaultModel: stubAdapter(),
+      agents: {
+        support: {
+          instructions: "From code",
+          model: stubAdapter(),
+        },
+      },
+    });
+    await plugin.setup();
+
+    const api = plugin.exports() as {
+      get: (name: string) => { instructions: string } | null;
+    };
+    expect(api.get("support")?.instructions).toBe("From code");
+  });
+
+  test("auto-inherit default is safe (both file and code get nothing without an explicit opt-in)", async () => {
+    const registry: ToolRegistry = {
+      query: defineTool({
+        description: "q",
+        schema: z.object({ sql: z.string() }),
+        autoInheritable: true, // even with autoInheritable, no spread without opt-in
+        handler: () => "ok",
+      }),
+    };
+    const provider = makeToolProvider("analytics", registry);
+    const ctx = fakeContext([{ name: "analytics", provider }]);
+
+    writeMarkdownAgent(tmpDir, "assistant", "---\n---\nYou are helpful.");
+
+    const plugin = instantiate(
+      {
+        dir: tmpDir,
+        defaultModel: stubAdapter(),
+        agents: {
+          manual: {
+            instructions: "Manual agent",
+            model: stubAdapter(),
+          },
+        },
+      },
+      ctx,
+    );
+    await plugin.setup();
+
+    const api = plugin.exports() as {
+      get: (name: string) => { toolIndex: Map<string, unknown> } | null;
+    };
+    const fileAgent = api.get("assistant");
+    const codeAgent = api.get("manual");
+
+    expect(fileAgent?.toolIndex.size).toBe(0);
+    expect(codeAgent?.toolIndex.size).toBe(0);
+  });
+
+  test("opting in with autoInheritTools: { file: true } spreads only autoInheritable tools", async () => {
+    const registry: ToolRegistry = {
+      query: defineTool({
+        description: "read-only query",
+        schema: z.object({ sql: z.string() }),
+        autoInheritable: true,
+        handler: () => "ok",
+      }),
+      destructive: defineTool({
+        description: "mutation",
+        schema: z.object({}),
+        // autoInheritable left unset → skipped even when opted in
+        handler: () => "ok",
+      }),
+    };
+    const provider = makeToolProvider("analytics", registry);
+    const ctx = fakeContext([{ name: "analytics", provider }]);
+
+    writeMarkdownAgent(tmpDir, "assistant", "---\n---\nYou are helpful.");
+
+    const plugin = instantiate(
+      {
+        dir: tmpDir,
+        defaultModel: stubAdapter(),
+        autoInheritTools: { file: true },
+      },
+      ctx,
+    );
+    await plugin.setup();
+
+    const api = plugin.exports() as {
+      get: (name: string) => { toolIndex: Map<string, unknown> } | null;
+    };
+    const fileAgent = api.get("assistant");
+    const keys = Array.from(fileAgent?.toolIndex.keys() ?? []);
+    expect(keys).toEqual(["analytics.query"]);
+  });
+
+  test("autoInheritTools: true enables both origins but still filters by autoInheritable", async () => {
+    const registry: ToolRegistry = {
+      safe: defineTool({
+        description: "safe",
+        schema: z.object({}),
+        autoInheritable: true,
+        handler: () => "ok",
+      }),
+      unsafe: defineTool({
+        description: "unsafe",
+        schema: z.object({}),
+        handler: () => "ok",
+      }),
+    };
+    const provider = makeToolProvider("p", registry);
+    const ctx = fakeContext([{ name: "p", provider }]);
+
+    const plugin = instantiate(
+      {
+        dir: false,
+        defaultModel: stubAdapter(),
+        autoInheritTools: true,
+        agents: {
+          code1: {
+            instructions: "code agent",
+            model: stubAdapter(),
+          },
+        },
+      },
+      ctx,
+    );
+    await plugin.setup();
+
+    const api = plugin.exports() as {
+      get: (name: string) => { toolIndex: Map<string, unknown> } | null;
+    };
+    const codeAgent = api.get("code1");
+    const keys = Array.from(codeAgent?.toolIndex.keys() ?? []);
+    expect(keys).toEqual(["p.safe"]);
+  });
+
+  test("file-loaded agent respects explicit toolkits (skips auto-inherit)", async () => {
+    const registry: ToolRegistry = {
+      query: defineTool({
+        description: "q",
+        schema: z.object({ sql: z.string() }),
+        handler: () => "ok",
+      }),
+    };
+    const registry2: ToolRegistry = {
+      list: defineTool({
+        description: "l",
+        schema: z.object({}),
+        handler: () => [],
+      }),
+    };
+    const ctx = fakeContext([
+      { name: "analytics", provider: makeToolProvider("analytics", registry) },
+      { name: "files", provider: makeToolProvider("files", registry2) },
+    ]);
+
+    writeMarkdownAgent(
+      tmpDir,
+      "analyst",
+      "---\ntoolkits: [analytics]\n---\nAnalyst.",
+    );
+
+    const plugin = instantiate(
+      { dir: tmpDir, defaultModel: stubAdapter() },
+      ctx,
+    );
+    await plugin.setup();
+
+    const api = plugin.exports() as {
+      get: (name: string) => { toolIndex: Map<string, unknown> } | null;
+    };
+    const agent = api.get("analyst");
+    const toolNames = Array.from(agent?.toolIndex.keys() ?? []);
+    expect(toolNames.some((n) => n.startsWith("analytics."))).toBe(true);
+    expect(toolNames.some((n) => n.startsWith("files."))).toBe(false);
+  });
+
+  test("registers sub-agents as agent-<key> tools", async () => {
+    const plugin = instantiate({
+      dir: false,
+      agents: {
+        supervisor: {
+          instructions: "Supervise",
+          model: stubAdapter(),
+          agents: {
+            worker: {
+              instructions: "Work",
+              model: stubAdapter(),
+            },
+          },
+        },
+      },
+    });
+    await plugin.setup();
+
+    const api = plugin.exports() as {
+      get: (name: string) => { toolIndex: Map<string, unknown> } | null;
+    };
+    const sup = api.get("supervisor");
+    expect(sup?.toolIndex.has("agent-worker")).toBe(true);
+  });
+
+  test("isToolkitEntry type guard recognizes toolkit entries", () => {
+    const entry: ToolkitEntry = {
+      __toolkitRef: true,
+      pluginName: "x",
+      localName: "y",
+      def: { name: "x.y", description: "", parameters: { type: "object" } },
+    };
+    expect(isToolkitEntry(entry)).toBe(true);
+    expect(isToolkitEntry({ foo: 1 })).toBe(false);
+    expect(isToolkitEntry(null)).toBe(false);
+  });
+});
diff --git a/packages/appkit/src/plugins/agents/tests/approval-route.test.ts b/packages/appkit/src/plugins/agents/tests/approval-route.test.ts
new file mode 100644
index 00000000..6e090bd2
--- /dev/null
+++ b/packages/appkit/src/plugins/agents/tests/approval-route.test.ts
@@ -0,0 +1,292 @@
+import type express from "express";
+import { beforeEach, describe, expect, test, vi } from "vitest";
+import { CacheManager } from "../../../cache";
+import { AgentsPlugin } from "../agents";
+
+/**
+ * Focused tests for the `POST /approve` route and the associated
+ * ownership / error paths on `_handleApprove`. Covers:
+ *
+ *   - Schema validation of the request body.
+ *   - Ownership check: the user submitting the decision must be the same
+ *     user who initiated the underlying chat stream.
+ *   - 404 for unknown stream (already completed or never existed).
+ *   - 404 for unknown approvalId even when the stream is active.
+ *   - Happy-path resolution of a pending gate with `approve` and `deny`.
+ *   - Cancel of an active stream denies every pending gate on that stream.
+ */
+
+function mockReq(body: unknown, userId?: string): express.Request {
+  const headers: Record<string, string> = {};
+  if (userId) {
+    headers["x-forwarded-user"] = userId;
+    headers["x-forwarded-access-token"] = "fake-token";
+  }
+  return {
+    body,
+    headers,
+    header: (name: string) => headers[name.toLowerCase()],
+  } as unknown as express.Request;
+}
+
+function mockRes() {
+  const json = vi.fn();
+  const end = vi.fn();
+  let statusCode = 200;
+  const status = vi.fn((code: number) => {
+    statusCode = code;
+    return { json, end };
+  });
+  return {
+    res: { status, json, end } as unknown as express.Response,
+    get statusCode() {
+      return statusCode;
+    },
+    json,
+  };
+}
+
+beforeEach(() => {
+  CacheManager.getInstanceSync = vi.fn(() => ({
+    get: vi.fn(),
+    set: vi.fn(),
+    delete: vi.fn(),
+    getOrExecute: vi.fn(async (_k: unknown[], fn: () => Promise<unknown>) =>
+      fn(),
+    ),
+    generateKey: vi.fn(() => "test-key"),
+  })) as any;
+  process.env.NODE_ENV = "development";
+});
+
+describe("POST /approve route handler", () => {
+  test("rejects invalid body shape with 400", async () => {
+    const plugin = new AgentsPlugin({ dir: false });
+    const { res, json } = mockRes();
+    await (plugin as any)._handleApprove(mockReq({}, "alice"), res);
+    expect(res.status).toHaveBeenCalledWith(400);
+    expect(json).toHaveBeenCalledWith(
+      expect.objectContaining({ error: "Invalid request" }),
+    );
+  });
+
+  test("returns 404 when the streamId is unknown", async () => {
+    const plugin = new AgentsPlugin({ dir: false });
+    const { res, json } = mockRes();
+    await (
+      plugin as unknown as {
+        _handleApprove: (
+          r: express.Request,
+          w: express.Response,
+        ) => Promise<void>;
+      }
+    )._handleApprove(
+      mockReq(
+        { streamId: "ghost", approvalId: "a1", decision: "approve" },
+        "alice",
+      ),
+      res,
+    );
+    expect(res.status).toHaveBeenCalledWith(404);
+    expect(json).toHaveBeenCalledWith(
+      expect.objectContaining({ error: expect.stringMatching(/not found/i) }),
+    );
+  });
+
+  test("returns 403 when submitter is different from stream owner", async () => {
+    const plugin = new AgentsPlugin({ dir: false });
+    (plugin as any).activeStreams.set("stream-x", {
+      controller: new AbortController(),
+      userId: "alice",
+    });
+    const gate = (plugin as any).approvalGate;
+    const waiter = gate.wait({
+      approvalId: "a1",
+      streamId: "stream-x",
+      userId: "alice",
+      timeoutMs: 60_000,
+    });
+
+    const { res, json } = mockRes();
+    await (
+      plugin as unknown as {
+        _handleApprove: (
+          r: express.Request,
+          w: express.Response,
+        ) => Promise<void>;
+      }
+    )._handleApprove(
+      mockReq(
+        { streamId: "stream-x", approvalId: "a1", decision: "approve" },
+        "bob",
+      ),
+      res,
+    );
+    expect(res.status).toHaveBeenCalledWith(403);
+    expect(json).toHaveBeenCalledWith(
+      expect.objectContaining({ error: "Forbidden" }),
+    );
+
+    // Settle the waiter to clean up.
+    gate.submit({ approvalId: "a1", userId: "alice", decision: "deny" });
+    await expect(waiter).resolves.toBe("deny");
+  });
+
+  test("returns 404 when approvalId is unknown on an active stream", async () => {
+    const plugin = new AgentsPlugin({ dir: false });
+    (plugin as any).activeStreams.set("stream-y", {
+      controller: new AbortController(),
+      userId: "alice",
+    });
+    const { res, json } = mockRes();
+    await (
+      plugin as unknown as {
+        _handleApprove: (
+          r: express.Request,
+          w: express.Response,
+        ) => Promise<void>;
+      }
+    )._handleApprove(
+      mockReq(
+        { streamId: "stream-y", approvalId: "unknown-a", decision: "approve" },
+        "alice",
+      ),
+      res,
+    );
+    expect(res.status).toHaveBeenCalledWith(404);
+    expect(json).toHaveBeenCalledWith(
+      expect.objectContaining({
+        error: expect.stringMatching(/not found|already settled/i),
+      }),
+    );
+  });
+
+  test("happy path: approve resolves pending gate with 'approve'", async () => {
+    const plugin = new AgentsPlugin({ dir: false });
+    (plugin as any).activeStreams.set("stream-z", {
+      controller: new AbortController(),
+      userId: "alice",
+    });
+    const gate = (plugin as any).approvalGate;
+    const waiter = gate.wait({
+      approvalId: "a42",
+      streamId: "stream-z",
+      userId: "alice",
+      timeoutMs: 60_000,
+    });
+
+    const { res, json } = mockRes();
+    await (
+      plugin as unknown as {
+        _handleApprove: (
+          r: express.Request,
+          w: express.Response,
+        ) => Promise<void>;
+      }
+    )._handleApprove(
+      mockReq(
+        { streamId: "stream-z", approvalId: "a42", decision: "approve" },
+        "alice",
+      ),
+      res,
+    );
+    expect(res.status).not.toHaveBeenCalled();
+    expect(json).toHaveBeenCalledWith({ decision: "approve" });
+    await expect(waiter).resolves.toBe("approve");
+  });
+
+  test("happy path: deny resolves pending gate with 'deny'", async () => {
+    const plugin = new AgentsPlugin({ dir: false });
+    (plugin as any).activeStreams.set("stream-z", {
+      controller: new AbortController(),
+      userId: "alice",
+    });
+    const gate = (plugin as any).approvalGate;
+    const waiter = gate.wait({
+      approvalId: "a43",
+      streamId: "stream-z",
+      userId: "alice",
+      timeoutMs: 60_000,
+    });
+
+    const { res, json } = mockRes();
+    await (
+      plugin as unknown as {
+        _handleApprove: (
+          r: express.Request,
+          w: express.Response,
+        ) => Promise<void>;
+      }
+    )._handleApprove(
+      mockReq(
+        { streamId: "stream-z", approvalId: "a43", decision: "deny" },
+        "alice",
+      ),
+      res,
+    );
+    expect(json).toHaveBeenCalledWith({ decision: "deny" });
+    await expect(waiter).resolves.toBe("deny");
+  });
+});
+
+describe("POST /cancel ownership + gate cleanup", () => {
+  test("cancelling a stream denies every pending approval on that stream", async () => {
+    const plugin = new AgentsPlugin({ dir: false });
+    const controller = new AbortController();
+    (plugin as any).activeStreams.set("stream-c", {
+      controller,
+      userId: "alice",
+    });
+    const gate = (plugin as any).approvalGate;
+    const a = gate.wait({
+      approvalId: "ca1",
+      streamId: "stream-c",
+      userId: "alice",
+      timeoutMs: 60_000,
+    });
+    const b = gate.wait({
+      approvalId: "ca2",
+      streamId: "stream-c",
+      userId: "alice",
+      timeoutMs: 60_000,
+    });
+
+    const { res, json } = mockRes();
+    await (
+      plugin as unknown as {
+        _handleCancel: (
+          r: express.Request,
+          w: express.Response,
+        ) => Promise<void>;
+      }
+    )._handleCancel(mockReq({ streamId: "stream-c" }, "alice"), res);
+
+    expect(controller.signal.aborted).toBe(true);
+    expect(json).toHaveBeenCalledWith({ cancelled: true });
+    await expect(a).resolves.toBe("deny");
+    await expect(b).resolves.toBe("deny");
+  });
+
+  test("cancel from a different user is refused with 403", async () => {
+    const plugin = new AgentsPlugin({ dir: false });
+    const controller = new AbortController();
+    (plugin as any).activeStreams.set("stream-d", {
+      controller,
+      userId: "alice",
+    });
+    const { res, json } = mockRes();
+    await (
+      plugin as unknown as {
+        _handleCancel: (
+          r: express.Request,
+          w: express.Response,
+        ) => Promise<void>;
+      }
+    )._handleCancel(mockReq({ streamId: "stream-d" }, "bob"), res);
+    expect(res.status).toHaveBeenCalledWith(403);
+    expect(controller.signal.aborted).toBe(false);
+    expect(json).toHaveBeenCalledWith(
+      expect.objectContaining({ error: "Forbidden" }),
+    );
+  });
+});
diff --git a/packages/appkit/src/plugins/agents/tests/create-agent.test.ts b/packages/appkit/src/plugins/agents/tests/create-agent.test.ts
new file mode 100644
index 00000000..3822897f
--- /dev/null
+++ b/packages/appkit/src/plugins/agents/tests/create-agent.test.ts
@@ -0,0 +1,75 @@
+import { describe, expect, test } from "vitest";
+import { z } from "zod";
+import { createAgent } from "../../../core/create-agent-def";
+import { tool } from "../tools/tool";
+import type { AgentDefinition } from "../types";
+
+describe("createAgent", () => {
+  test("returns the definition unchanged for a simple agent", () => {
+    const def: AgentDefinition = {
+      name: "support",
+      instructions: "You help customers.",
+      model: "endpoint-x",
+    };
+    const result = createAgent(def);
+    expect(result).toBe(def);
+  });
+
+  test("accepts tools as a keyed record", () => {
+    const get_weather = tool({
+      name: "get_weather",
+      description: "Get the weather",
+      schema: z.object({ city: z.string() }),
+      execute: async ({ city }) => `Sunny in ${city}`,
+    });
+
+    const def = createAgent({
+      instructions: "...",
+      tools: { get_weather },
+    });
+
+    expect(def.tools?.get_weather).toBe(get_weather);
+  });
+
+  test("accepts sub-agents in a keyed record", () => {
+    const researcher = createAgent({ instructions: "Research." });
+    const supervisor = createAgent({
+      instructions: "Supervise.",
+      agents: { researcher },
+    });
+    expect(supervisor.agents?.researcher).toBe(researcher);
+  });
+
+  test("throws on a direct self-cycle", () => {
+    const a: AgentDefinition = { instructions: "a" };
+    // biome-ignore lint/suspicious/noExplicitAny: intentional cycle setup for test
+    (a as any).agents = { self: a };
+    expect(() => createAgent(a)).toThrow(/cycle/i);
+  });
+
+  test("throws on an indirect cycle", () => {
+    const a: AgentDefinition = { instructions: "a" };
+    const b: AgentDefinition = { instructions: "b" };
+    a.agents = { b };
+    b.agents = { a };
+    expect(() => createAgent(a)).toThrow(/cycle/i);
+  });
+
+  test("accepts a DAG of sub-agents without throwing", () => {
+    const leaf: AgentDefinition = { instructions: "leaf" };
+    const branchA: AgentDefinition = {
+      instructions: "a",
+      agents: { leaf },
+    };
+    const branchB: AgentDefinition = {
+      instructions: "b",
+      agents: { leaf },
+    };
+    const root = createAgent({
+      instructions: "root",
+      agents: { branchA, branchB },
+    });
+    expect(root.agents?.branchA.agents?.leaf).toBe(leaf);
+    expect(root.agents?.branchB.agents?.leaf).toBe(leaf);
+  });
+});
diff --git a/packages/appkit/src/plugins/agents/tests/dos-limits.test.ts b/packages/appkit/src/plugins/agents/tests/dos-limits.test.ts
new file mode 100644
index 00000000..935fa240
--- /dev/null
+++ b/packages/appkit/src/plugins/agents/tests/dos-limits.test.ts
@@ -0,0 +1,299 @@
+import type express from "express";
+import { beforeEach, describe, expect, test, vi } from "vitest";
+import { CacheManager } from "../../../cache";
+import { AgentsPlugin } from "../agents";
+import { chatRequestSchema, invocationsRequestSchema } from "../schemas";
+
+/**
+ * Exercises the four DoS caps landed for MVP:
+ *
+ *   - `chatRequestSchema.message.max(64_000)` — body cap on `POST /chat`.
+ *   - Per-user `maxConcurrentStreamsPerUser` — 429 with Retry-After.
+ *   - Per-run `maxToolCalls` — aborts stream and throws in `executeTool`.
+ *   - Per-delegation `maxSubAgentDepth` — rejects in `runSubAgent`.
+ *
+ * Route-level tests exercise the schemas + `_handleChat` directly via the
+ * mocked req/res pattern already used by approval-route.test.ts.
+ */
+
+function mockReq(body: unknown, userId?: string): express.Request {
+  const headers: Record<string, string> = {};
+  if (userId) {
+    headers["x-forwarded-user"] = userId;
+    headers["x-forwarded-access-token"] = "fake-token";
+  }
+  return {
+    body,
+    headers,
+    header: (name: string) => headers[name.toLowerCase()],
+  } as unknown as express.Request;
+}
+
+function mockRes() {
+  const json = vi.fn();
+  const setHeader = vi.fn();
+  let statusCode = 200;
+  const status = vi.fn((code: number) => {
+    statusCode = code;
+    return { json };
+  });
+  return {
+    res: { status, json, setHeader } as unknown as express.Response,
+    get statusCode() {
+      return statusCode;
+    },
+    json,
+    setHeader,
+  };
+}
+
+beforeEach(() => {
+  CacheManager.getInstanceSync = vi.fn(() => ({
+    get: vi.fn(),
+    set: vi.fn(),
+    delete: vi.fn(),
+    getOrExecute: vi.fn(async (_k: unknown[], fn: () => Promise<unknown>) =>
+      fn(),
+    ),
+    generateKey: vi.fn(() => "test-key"),
+    // biome-ignore lint/suspicious/noExplicitAny: test mock
+  })) as any;
+  process.env.NODE_ENV = "development";
+});
+
+describe("chatRequestSchema — body cap", () => {
+  test("accepts messages up to 64_000 characters", () => {
+    const result = chatRequestSchema.safeParse({
+      message: "a".repeat(64_000),
+    });
+    expect(result.success).toBe(true);
+  });
+
+  test("rejects messages over 64_000 characters", () => {
+    const result = chatRequestSchema.safeParse({
+      message: "a".repeat(64_001),
+    });
+    expect(result.success).toBe(false);
+    if (!result.success) {
+      expect(JSON.stringify(result.error.flatten())).toMatch(/64000/);
+    }
+  });
+
+  test("rejects empty message (existing contract)", () => {
+    expect(chatRequestSchema.safeParse({ message: "" }).success).toBe(false);
+  });
+});
+
+describe("invocationsRequestSchema — input caps", () => {
+  test("accepts string input up to 64_000 characters", () => {
+    const result = invocationsRequestSchema.safeParse({
+      input: "a".repeat(64_000),
+    });
+    expect(result.success).toBe(true);
+  });
+
+  test("rejects string input over 64_000 characters", () => {
+    const result = invocationsRequestSchema.safeParse({
+      input: "a".repeat(64_001),
+    });
+    expect(result.success).toBe(false);
+  });
+
+  test("accepts array input up to 100 items", () => {
+    const items = Array.from({ length: 100 }, (_, i) => ({
+      role: "user" as const,
+      content: `m${i}`,
+    }));
+    expect(invocationsRequestSchema.safeParse({ input: items }).success).toBe(
+      true,
+    );
+  });
+
+  test("rejects array input over 100 items", () => {
+    const items = Array.from({ length: 101 }, (_, i) => ({
+      role: "user" as const,
+      content: `m${i}`,
+    }));
+    const result = invocationsRequestSchema.safeParse({ input: items });
+    expect(result.success).toBe(false);
+  });
+
+  test("rejects per-item content over 64_000 characters", () => {
+    const result = invocationsRequestSchema.safeParse({
+      input: [{ role: "user", content: "a".repeat(64_001) }],
+    });
+    expect(result.success).toBe(false);
+  });
+});
+
+describe("POST /chat — per-user concurrent-stream limit", () => {
+  function seedPlugin(
+    overrides: ConstructorParameters<typeof AgentsPlugin>[0] = { dir: false },
+  ): AgentsPlugin {
+    const plugin = new AgentsPlugin(overrides);
+    // Seed the agents map directly so _handleChat can resolve "hello"
+    // without running setup() (which would require a live model).
+    // biome-ignore lint/suspicious/noExplicitAny: seeding private state
+    (plugin as any).agents.set("hello", {
+      name: "hello",
+      instructions: "hi",
+      adapter: { async *run() {} },
+      toolIndex: new Map(),
+    });
+    // biome-ignore lint/suspicious/noExplicitAny: seeding private state
+    (plugin as any).defaultAgentName = "hello";
+    return plugin;
+  }
+
+  test("rejects with 429 + Retry-After when user is at-limit (default 5)", async () => {
+    const plugin = seedPlugin();
+    for (let i = 0; i < 5; i++) {
+      // biome-ignore lint/suspicious/noExplicitAny: seeding
+      (plugin as any).activeStreams.set(`s${i}`, {
+        controller: new AbortController(),
+        userId: "alice",
+      });
+    }
+
+    const { res, setHeader, json } = mockRes();
+    await (
+      plugin as unknown as {
+        _handleChat: (r: express.Request, w: express.Response) => Promise<void>;
+      }
+    )._handleChat(mockReq({ message: "hi" }, "alice"), res);
+
+    expect(res.status).toHaveBeenCalledWith(429);
+    expect(setHeader).toHaveBeenCalledWith("Retry-After", "5");
+    expect(json).toHaveBeenCalledWith(
+      expect.objectContaining({
+        error: expect.stringMatching(/Too many concurrent streams/),
+      }),
+    );
+  });
+
+  test("does not reject when another user is at-limit (per-user, not global)", async () => {
+    const plugin = seedPlugin();
+    for (let i = 0; i < 5; i++) {
+      // biome-ignore lint/suspicious/noExplicitAny: seeding
+      (plugin as any).activeStreams.set(`s${i}`, {
+        controller: new AbortController(),
+        userId: "alice",
+      });
+    }
+
+    // Carol's request must not see a 429 even though alice is at-limit.
+    // Don't bother running the full stream — we assert only that 429 is
+    // not the response status.
+    const { res } = mockRes();
+    // biome-ignore lint/suspicious/noExplicitAny: stub _streamAgent to avoid needing a real adapter
+    (plugin as any)._streamAgent = vi.fn(async () => undefined);
+
+    await (
+      plugin as unknown as {
+        _handleChat: (r: express.Request, w: express.Response) => Promise<void>;
+      }
+    )._handleChat(mockReq({ message: "hi" }, "carol"), res);
+
+    expect(res.status).not.toHaveBeenCalledWith(429);
+  });
+
+  test("honours agents({ limits: { maxConcurrentStreamsPerUser } })", async () => {
+    const plugin = seedPlugin({
+      dir: false,
+      limits: { maxConcurrentStreamsPerUser: 2 },
+    });
+    for (let i = 0; i < 2; i++) {
+      // biome-ignore lint/suspicious/noExplicitAny: seeding
+      (plugin as any).activeStreams.set(`s${i}`, {
+        controller: new AbortController(),
+        userId: "alice",
+      });
+    }
+
+    const { res } = mockRes();
+    await (
+      plugin as unknown as {
+        _handleChat: (r: express.Request, w: express.Response) => Promise<void>;
+      }
+    )._handleChat(mockReq({ message: "hi" }, "alice"), res);
+
+    expect(res.status).toHaveBeenCalledWith(429);
+  });
+});
+
+describe("resolvedLimits — default values", () => {
+  test("exposes the documented MVP defaults when unconfigured", () => {
+    const plugin = new AgentsPlugin({ dir: false });
+    // biome-ignore lint/suspicious/noExplicitAny: read private getter
+    const limits = (plugin as any).resolvedLimits;
+    expect(limits).toEqual({
+      maxConcurrentStreamsPerUser: 5,
+      maxToolCalls: 50,
+      maxSubAgentDepth: 3,
+    });
+  });
+
+  test("lets callers override any subset", () => {
+    const plugin = new AgentsPlugin({
+      dir: false,
+      limits: { maxToolCalls: 100 },
+    });
+    // biome-ignore lint/suspicious/noExplicitAny: read private
+    const limits = (plugin as any).resolvedLimits;
+    expect(limits.maxToolCalls).toBe(100);
+    expect(limits.maxConcurrentStreamsPerUser).toBe(5);
+    expect(limits.maxSubAgentDepth).toBe(3);
+  });
+});
+
+describe("runSubAgent — depth guard", () => {
+  test("rejects when depth exceeds the configured maximum", async () => {
+    const plugin = new AgentsPlugin({
+      dir: false,
+      limits: { maxSubAgentDepth: 2 },
+    });
+    // biome-ignore lint/suspicious/noExplicitAny: call private method directly
+    await expect(
+      (plugin as any).runSubAgent(
+        mockReq({}, "alice"),
+        { name: "child", toolIndex: new Map() },
+        {},
+        new AbortController().signal,
+        3, // exceeds limit 2
+      ),
+    ).rejects.toThrow(/Sub-agent depth exceeded \(limit 2\)/);
+  });
+
+  test("accepts at the boundary (depth === limit)", async () => {
+    // Use a stub adapter so we don't need a real model.
+    const plugin = new AgentsPlugin({
+      dir: false,
+      limits: { maxSubAgentDepth: 3 },
+      agents: {},
+    });
+
+    const stubAdapter = {
+      // biome-ignore lint/suspicious/noExplicitAny: adapter shape not under test
+      async *run(): any {
+        yield { type: "message", content: "hello from depth-3" };
+      },
+    };
+    const child = {
+      name: "child",
+      instructions: "test",
+      // biome-ignore lint/suspicious/noExplicitAny: stub shape
+      adapter: stubAdapter as any,
+      toolIndex: new Map(),
+    };
+
+    // biome-ignore lint/suspicious/noExplicitAny: call private
+    const result = await (plugin as any).runSubAgent(
+      mockReq({}, "alice"),
+      child,
+      { input: "test" },
+      new AbortController().signal,
+      3, // at the limit, not over
+    );
+    expect(result).toBe("hello from depth-3");
+  });
+});
diff --git a/packages/appkit/src/plugins/agents/tests/event-channel.test.ts b/packages/appkit/src/plugins/agents/tests/event-channel.test.ts
new file mode 100644
index 00000000..d80d788d
--- /dev/null
+++ b/packages/appkit/src/plugins/agents/tests/event-channel.test.ts
@@ -0,0 +1,78 @@
+import { describe, expect, test } from "vitest";
+import { EventChannel } from "../event-channel";
+
+async function collect<T>(ch: EventChannel<T>): Promise<T[]> {
+  const out: T[] = [];
+  for await (const v of ch) out.push(v);
+  return out;
+}
+
+describe("EventChannel", () => {
+  test("yields pushed values in order", async () => {
+    const ch = new EventChannel<number>();
+    const p = collect(ch);
+    ch.push(1);
+    ch.push(2);
+    ch.push(3);
+    ch.close();
+    await expect(p).resolves.toEqual([1, 2, 3]);
+  });
+
+  test("pushes before iteration start are buffered", async () => {
+    const ch = new EventChannel<string>();
+    ch.push("a");
+    ch.push("b");
+    ch.close();
+    await expect(collect(ch)).resolves.toEqual(["a", "b"]);
+  });
+
+  test("waiting iterator is unblocked by subsequent push", async () => {
+    const ch = new EventChannel<number>();
+    const promise = collect(ch);
+    await new Promise((r) => setTimeout(r, 5));
+    ch.push(42);
+    ch.close();
+    await expect(promise).resolves.toEqual([42]);
+  });
+
+  test("close with no pending values terminates iteration", async () => {
+    const ch = new EventChannel<number>();
+    const p = collect(ch);
+    ch.close();
+    await expect(p).resolves.toEqual([]);
+  });
+
+  test("push after close is a no-op (channel is closed)", async () => {
+    const ch = new EventChannel<number>();
+    ch.close();
+    ch.push(1);
+    await expect(collect(ch)).resolves.toEqual([]);
+  });
+
+  test("close with error rejects the waiting iterator", async () => {
+    const ch = new EventChannel<number>();
+    const promise = collect(ch);
+    await new Promise((r) => setTimeout(r, 5));
+    ch.close(new Error("boom"));
+    await expect(promise).rejects.toThrow(/boom/);
+  });
+
+  test("interleaved pushes and reads stream through", async () => {
+    const ch = new EventChannel<number>();
+    const received: number[] = [];
+    const reader = (async () => {
+      for await (const v of ch) {
+        received.push(v);
+        if (received.length === 3) break;
+      }
+    })();
+    ch.push(1);
+    await new Promise((r) => setTimeout(r, 0));
+    ch.push(2);
+    await new Promise((r) => setTimeout(r, 0));
+    ch.push(3);
+    await reader;
+    expect(received).toEqual([1, 2, 3]);
+    ch.close();
+  });
+});
diff --git a/packages/appkit/src/plugins/agents/tests/event-translator.test.ts b/packages/appkit/src/plugins/agents/tests/event-translator.test.ts
new file mode 100644
index 00000000..050af001
--- /dev/null
+++ b/packages/appkit/src/plugins/agents/tests/event-translator.test.ts
@@ -0,0 +1,332 @@
+import type { ResponseStreamEvent } from "shared";
+import { describe, expect, test } from "vitest";
+import { AgentEventTranslator } from "../event-translator";
+
+describe("AgentEventTranslator", () => {
+  test("translates message_delta to output_item.added + output_text.delta on first delta", () => {
+    const translator = new AgentEventTranslator();
+    const events = translator.translate({
+      type: "message_delta",
+      content: "Hello",
+    });
+
+    expect(events).toHaveLength(2);
+    expect(events[0].type).toBe("response.output_item.added");
+    expect(events[1].type).toBe("response.output_text.delta");
+
+    if (events[1].type === "response.output_text.delta") {
+      expect(events[1].delta).toBe("Hello");
+    }
+  });
+
+  test("subsequent message_delta only produces output_text.delta", () => {
+    const translator = new AgentEventTranslator();
+    translator.translate({ type: "message_delta", content: "Hello" });
+    const events = translator.translate({
+      type: "message_delta",
+      content: " world",
+    });
+
+    expect(events).toHaveLength(1);
+    expect(events[0].type).toBe("response.output_text.delta");
+  });
+
+  test("sequence_number is monotonically increasing", () => {
+    const translator = new AgentEventTranslator();
+    const e1 = translator.translate({ type: "message_delta", content: "a" });
+    const e2 = translator.translate({ type: "message_delta", content: "b" });
+    const e3 = translator.finalize();
+
+    const allSeqs = [...e1, ...e2, ...e3].map((e) =>
+      "sequence_number" in e ? e.sequence_number : -1,
+    );
+
+    for (let i = 1; i < allSeqs.length; i++) {
+      expect(allSeqs[i]).toBeGreaterThan(allSeqs[i - 1]);
+    }
+  });
+
+  test("translates tool_call to paired output_item.added + output_item.done", () => {
+    const translator = new AgentEventTranslator();
+    const events = translator.translate({
+      type: "tool_call",
+      callId: "call_1",
+      name: "analytics.query",
+      args: { sql: "SELECT 1" },
+    });
+
+    expect(events).toHaveLength(2);
+    expect(events[0].type).toBe("response.output_item.added");
+    expect(events[1].type).toBe("response.output_item.done");
+
+    if (events[0].type === "response.output_item.added") {
+      expect(events[0].item.type).toBe("function_call");
+      if (events[0].item.type === "function_call") {
+        expect(events[0].item.name).toBe("analytics.query");
+        expect(events[0].item.call_id).toBe("call_1");
+      }
+    }
+  });
+
+  test("translates tool_result to paired output_item events", () => {
+    const translator = new AgentEventTranslator();
+    const events = translator.translate({
+      type: "tool_result",
+      callId: "call_1",
+      result: { rows: 42 },
+    });
+
+    expect(events).toHaveLength(2);
+    expect(events[0].type).toBe("response.output_item.added");
+
+    if (events[0].type === "response.output_item.added") {
+      expect(events[0].item.type).toBe("function_call_output");
+    }
+  });
+
+  test("translates tool_result error", () => {
+    const translator = new AgentEventTranslator();
+    const events = translator.translate({
+      type: "tool_result",
+      callId: "call_1",
+      result: null,
+      error: "Query failed",
+    });
+
+    if (
+      events[0].type === "response.output_item.added" &&
+      events[0].item.type === "function_call_output"
+    ) {
+      expect(events[0].item.output).toBe("Query failed");
+    }
+  });
+
+  test("translates thinking to appkit.thinking extension event", () => {
+    const translator = new AgentEventTranslator();
+    const events = translator.translate({
+      type: "thinking",
+      content: "Let me think about this...",
+    });
+
+    expect(events).toHaveLength(1);
+    expect(events[0].type).toBe("appkit.thinking");
+    if (events[0].type === "appkit.thinking") {
+      expect(events[0].content).toBe("Let me think about this...");
+    }
+  });
+
+  test("translates metadata to appkit.metadata extension event", () => {
+    const translator = new AgentEventTranslator();
+    const events = translator.translate({
+      type: "metadata",
+      data: { threadId: "t-123" },
+    });
+
+    expect(events).toHaveLength(1);
+    expect(events[0].type).toBe("appkit.metadata");
+    if (events[0].type === "appkit.metadata") {
+      expect(events[0].data.threadId).toBe("t-123");
+    }
+  });
+
+  test("status:complete triggers finalize with response.completed", () => {
+    const translator = new AgentEventTranslator();
+    translator.translate({ type: "message_delta", content: "Hi" });
+    const events = translator.translate({ type: "status", status: "complete" });
+
+    const types = events.map((e) => e.type);
+    expect(types).toContain("response.output_item.done");
+    expect(types).toContain("response.completed");
+  });
+
+  test("status:error emits error + response.failed", () => {
+    const translator = new AgentEventTranslator();
+    const events = translator.translate({
+      type: "status",
+      status: "error",
+      error: "Something broke",
+    });
+
+    expect(events).toHaveLength(2);
+    expect(events[0].type).toBe("error");
+    expect(events[1].type).toBe("response.failed");
+
+    if (events[0].type === "error") {
+      expect(events[0].error).toBe("Something broke");
+    }
+  });
+
+  test("finalize produces response.completed", () => {
+    const translator = new AgentEventTranslator();
+    const events = translator.finalize();
+
+    expect(events.some((e) => e.type === "response.completed")).toBe(true);
+  });
+
+  test("finalize with accumulated message text produces output_item.done", () => {
+    const translator = new AgentEventTranslator();
+    translator.translate({ type: "message_delta", content: "Hello " });
+    translator.translate({ type: "message_delta", content: "world" });
+    const events = translator.finalize();
+
+    const doneEvent = events.find(
+      (e) => e.type === "response.output_item.done",
+    );
+    expect(doneEvent).toBeDefined();
+    if (
+      doneEvent?.type === "response.output_item.done" &&
+      doneEvent.item.type === "message"
+    ) {
+      expect(doneEvent.item.content[0].text).toBe("Hello world");
+    }
+  });
+
+  test("output_index increments for tool calls", () => {
+    const translator = new AgentEventTranslator();
+    const e1 = translator.translate({
+      type: "tool_call",
+      callId: "c1",
+      name: "tool1",
+      args: {},
+    });
+    const e2 = translator.translate({
+      type: "tool_result",
+      callId: "c1",
+      result: "ok",
+    });
+
+    if (
+      e1[0].type === "response.output_item.added" &&
+      e2[0].type === "response.output_item.added"
+    ) {
+      expect(e2[0].output_index).toBeGreaterThan(e1[0].output_index);
+    }
+  });
+});
+
+describe("AgentEventTranslator — monotonic output_index", () => {
+  /**
+   * Helper: every emitted `response.output_item.added`/`output_item.done`
+   * event's `output_index` must be >= every prior add/done `output_index`.
+   * This is the strict contract Responses-API clients (OpenAI's own SDK
+   * parser) enforce.
+   */
+  function assertMonotonic(events: ResponseStreamEvent[]) {
+    let last = -1;
+    for (const ev of events) {
+      if (
+        ev.type === "response.output_item.added" ||
+        ev.type === "response.output_item.done"
+      ) {
+        expect(ev.output_index).toBeGreaterThanOrEqual(last);
+        last = ev.output_index;
+      }
+    }
+  }
+
+  test("tool_call followed by message_delta emits monotonic indices (regression)", () => {
+    // Before the fix this produced: tool_call at index 1, then
+    // message_delta.added at 0 — monotonicity violated.
+    const t = new AgentEventTranslator();
+    const all: ResponseStreamEvent[] = [];
+    all.push(
+      ...t.translate({
+        type: "tool_call",
+        callId: "c1",
+        name: "lookup",
+        args: { q: "x" },
+      }),
+    );
+    all.push(
+      ...t.translate({ type: "tool_result", callId: "c1", result: "ok" }),
+    );
+    all.push(...t.translate({ type: "message_delta", content: "Result: " }));
+    all.push(...t.translate({ type: "message_delta", content: "ok." }));
+    all.push(...t.finalize());
+
+    assertMonotonic(all);
+
+    const added = all.filter((e) => e.type === "response.output_item.added");
+    // Three items: tool_call, tool_result, message. Indices 0/1/2.
+    expect(added.map((e) => e.output_index)).toEqual([0, 1, 2]);
+  });
+
+  test("message interrupted by tool_call is closed before the tool_call opens", () => {
+    const t = new AgentEventTranslator();
+    const all: ResponseStreamEvent[] = [];
+    all.push(...t.translate({ type: "message_delta", content: "thinking..." }));
+    all.push(
+      ...t.translate({
+        type: "tool_call",
+        callId: "c1",
+        name: "lookup",
+        args: {},
+      }),
+    );
+    all.push(
+      ...t.translate({ type: "tool_result", callId: "c1", result: "ok" }),
+    );
+    all.push(...t.translate({ type: "message_delta", content: "final" }));
+    all.push(...t.finalize());
+
+    assertMonotonic(all);
+
+    // Structure: msg0.added, msg0.delta, msg0.done (closed before tool),
+    // tool_call.added/done, tool_result.added/done, msg1.added, msg1.delta,
+    // msg1.done (from finalize), response.completed.
+    const addedDone = all.filter(
+      (e) =>
+        e.type === "response.output_item.added" ||
+        e.type === "response.output_item.done",
+    );
+    expect(addedDone.map((e) => `${e.type}@${e.output_index}`)).toEqual([
+      "response.output_item.added@0",
+      "response.output_item.done@0",
+      "response.output_item.added@1",
+      "response.output_item.done@1",
+      "response.output_item.added@2",
+      "response.output_item.done@2",
+      "response.output_item.added@3",
+      "response.output_item.done@3",
+    ]);
+  });
+
+  test("full `message` event after deltas does not double-emit output_item.added", () => {
+    const t = new AgentEventTranslator();
+    const all: ResponseStreamEvent[] = [];
+    all.push(...t.translate({ type: "message_delta", content: "partial" }));
+    all.push(
+      ...t.translate({ type: "message", content: "full final content" }),
+    );
+    all.push(...t.finalize());
+
+    const added = all.filter((e) => e.type === "response.output_item.added");
+    const done = all.filter((e) => e.type === "response.output_item.done");
+    // Exactly one added (from the first delta) and one done (from the full
+    // message). finalize() must not emit a second done for the same item.
+    expect(added).toHaveLength(1);
+    expect(done).toHaveLength(1);
+    if (done[0].type === "response.output_item.done") {
+      const item = done[0].item;
+      if (item.type === "message") {
+        expect(item.content[0].text).toBe("full final content");
+      }
+    }
+  });
+
+  test("tool_result coerces undefined result to empty-string output", () => {
+    const t = new AgentEventTranslator();
+    const events = t.translate({
+      type: "tool_result",
+      callId: "c1",
+      result: undefined,
+    });
+    const done = events.find((e) => e.type === "response.output_item.done");
+    if (done?.type === "response.output_item.done") {
+      const item = done.item;
+      if (item.type === "function_call_output") {
+        expect(item.output).toBe("");
+      }
+    }
+  });
+});
diff --git a/packages/appkit/src/plugins/agents/tests/load-agents.test.ts b/packages/appkit/src/plugins/agents/tests/load-agents.test.ts
new file mode 100644
index 00000000..1a5b9523
--- /dev/null
+++ b/packages/appkit/src/plugins/agents/tests/load-agents.test.ts
@@ -0,0 +1,360 @@
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+import { afterEach, beforeEach, describe, expect, test } from "vitest";
+import { z } from "zod";
+import { buildToolkitEntries } from "../build-toolkit";
+import {
+  agentIdFromMarkdownPath,
+  loadAgentFromFile,
+  loadAgentsFromDir,
+  parseFrontmatter,
+} from "../load-agents";
+import { defineTool, type ToolRegistry } from "../tools/define-tool";
+import { tool } from "../tools/tool";
+import type { AgentDefinition } from "../types";
+
+let workDir: string;
+
+beforeEach(() => {
+  workDir = fs.mkdtempSync(path.join(os.tmpdir(), "agents-test-"));
+});
+
+afterEach(() => {
+  fs.rmSync(workDir, { recursive: true, force: true });
+});
+
+/** Flat file under workDir (for legacy loadAgentFromFile tests). */
+function writeRoot(name: string, content: string) {
+  fs.writeFileSync(path.join(workDir, name), content, "utf-8");
+  return path.join(workDir, name);
+}
+
+/** Folder layout: `<id>/agent.md`. */
+function writeAgent(id: string, content: string) {
+  const dir = path.join(workDir, id);
+  fs.mkdirSync(dir, { recursive: true });
+  const p = path.join(dir, "agent.md");
+  fs.writeFileSync(p, content, "utf-8");
+  return p;
+}
+
+describe("agentIdFromMarkdownPath", () => {
+  test("uses parent folder name when file is agent.md", () => {
+    expect(agentIdFromMarkdownPath("/foo/bar/assistant/agent.md")).toBe(
+      "assistant",
+    );
+  });
+
+  test("uses file stem for other .md names", () => {
+    expect(agentIdFromMarkdownPath("/tmp/assistant.md")).toBe("assistant");
+  });
+});
+
+describe("parseFrontmatter", () => {
+  test("parses a simple object", () => {
+    const { data, content } = parseFrontmatter(
+      "---\nendpoint: foo\ndefault: true\n---\nHello body",
+    );
+    expect(data).toEqual({ endpoint: "foo", default: true });
+    expect(content).toBe("Hello body");
+  });
+
+  test("parses nested arrays", () => {
+    const { data } = parseFrontmatter(
+      "---\ntoolkits:\n  - analytics\n  - files: [uploads.list]\n---\nbody",
+    );
+    expect(data?.toolkits).toEqual(["analytics", { files: ["uploads.list"] }]);
+  });
+
+  test("returns null data when no frontmatter", () => {
+    const { data, content } = parseFrontmatter("No frontmatter here");
+    expect(data).toBeNull();
+    expect(content).toBe("No frontmatter here");
+  });
+
+  test("throws on invalid YAML", () => {
+    expect(() => parseFrontmatter("---\nkey: : : bad\n---\n")).toThrow(/YAML/);
+  });
+});
+
+describe("loadAgentFromFile", () => {
+  test("returns AgentDefinition with body as instructions", async () => {
+    const p = writeRoot(
+      "assistant.md",
+      "---\nendpoint: e-1\n---\nYou are helpful.",
+    );
+    const def = await loadAgentFromFile(p, {});
+    expect(def.name).toBe("assistant");
+    expect(def.instructions).toBe("You are helpful.");
+    expect(def.model).toBe("e-1");
+  });
+
+  test("derives agent id from folder when path ends with agent.md", async () => {
+    const p = writeAgent("router", "---\nendpoint: e-1\n---\nRoute traffic.");
+    const def = await loadAgentFromFile(p, {});
+    expect(def.name).toBe("router");
+    expect(def.instructions).toBe("Route traffic.");
+  });
+});
+
+describe("loadAgentsFromDir", () => {
+  test("returns empty map when dir doesn't exist", async () => {
+    const res = await loadAgentsFromDir("/nonexistent-for-tests", {});
+    expect(res.defs).toEqual({});
+    expect(res.defaultAgent).toBeNull();
+  });
+
+  test("loads each subdirectory with agent.md keyed by folder name", async () => {
+    writeAgent("support", "---\nendpoint: e-1\n---\nSupport prompt.");
+    writeAgent("sales", "---\nendpoint: e-2\n---\nSales prompt.");
+    const res = await loadAgentsFromDir(workDir, {});
+    expect(Object.keys(res.defs).sort()).toEqual(["sales", "support"]);
+  });
+
+  test("throws when legacy top-level .md exists", async () => {
+    writeRoot("assistant.md", "---\nendpoint: e\n---\nLegacy flat file.");
+    await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow(
+      /unsupported top-level markdown file\(s\): assistant\.md.*assistant\/agent\.md/s,
+    );
+  });
+
+  test("throws when a subdirectory lacks agent.md", async () => {
+    fs.mkdirSync(path.join(workDir, "broken"), { recursive: true });
+    await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow(
+      /must contain agent\.md/,
+    );
+  });
+
+  test("ignores reserved skills directory without agent.md", async () => {
+    fs.mkdirSync(path.join(workDir, "skills"), { recursive: true });
+    writeAgent("solo", "---\nendpoint: e\n---\nOnly real agent.");
+    const res = await loadAgentsFromDir(workDir, {});
+    expect(Object.keys(res.defs)).toEqual(["solo"]);
+  });
+
+  test("picks up default: true from frontmatter (deterministic sorted ids)", async () => {
+    writeAgent("one", "---\nendpoint: a\n---\nOne.");
+    writeAgent("two", "---\nendpoint: b\ndefault: true\n---\nTwo.");
+    const res = await loadAgentsFromDir(workDir, {});
+    expect(res.defaultAgent).toBe("two");
+  });
+
+  test("throws when frontmatter references an unregistered plugin", async () => {
+    writeAgent(
+      "broken",
+      "---\nendpoint: e\ntoolkits: [missing]\n---\nBroken agent.",
+    );
+    await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow(
+      /references toolkit 'missing'/,
+    );
+  });
+
+  test("throws when frontmatter references an unknown ambient tool", async () => {
+    writeAgent(
+      "broken",
+      "---\nendpoint: e\ntools: [unknown_tool]\n---\nBroken.",
+    );
+    await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow(
+      /references tool 'unknown_tool'/,
+    );
+  });
+
+  test("resolves toolkits + ambient tools when provided", async () => {
+    const registry: ToolRegistry = {
+      query: defineTool({
+        description: "q",
+        schema: z.object({ sql: z.string() }),
+        handler: () => "ok",
+      }),
+    };
+    const plugins = new Map<
+      string,
+      { toolkit: (opts?: unknown) => Record<string, unknown> }
+    >([
+      [
+        "analytics",
+        {
+          toolkit: (opts) =>
+            buildToolkitEntries("analytics", registry, opts as never),
+        },
+      ],
+    ]);
+
+    const weather = tool({
+      name: "get_weather",
+      description: "Weather",
+      schema: z.object({ city: z.string() }),
+      execute: async () => "sunny",
+    });
+
+    writeAgent(
+      "analyst",
+      "---\nendpoint: e\ntoolkits:\n  - analytics\ntools:\n  - get_weather\n---\nBody.",
+    );
+    const res = await loadAgentsFromDir(workDir, {
+      plugins,
+      availableTools: { get_weather: weather },
+    });
+    expect(res.defs.analyst.tools).toBeDefined();
+    expect(Object.keys(res.defs.analyst.tools ?? {}).sort()).toEqual([
+      "analytics.query",
+      "get_weather",
+    ]);
+  });
+
+  describe("agents: sibling sub-agent references", () => {
+    test("resolves sibling references into def.agents regardless of folder order", async () => {
+      writeAgent(
+        "dispatcher",
+        "---\nendpoint: e\nagents:\n  - analyst\n  - writer\n---\nRoute work.",
+      );
+      writeAgent("analyst", "---\nendpoint: e\n---\nAnalyst.");
+      writeAgent("writer", "---\nendpoint: e\n---\nWriter.");
+
+      const res = await loadAgentsFromDir(workDir, {});
+      expect(Object.keys(res.defs.dispatcher.agents ?? {}).sort()).toEqual([
+        "analyst",
+        "writer",
+      ]);
+      expect(res.defs.dispatcher.agents?.analyst).toBe(res.defs.analyst);
+      expect(res.defs.dispatcher.agents?.writer).toBe(res.defs.writer);
+      expect(res.defs.analyst.agents).toBeUndefined();
+      expect(res.defs.writer.agents).toBeUndefined();
+    });
+
+    test("mutual delegation is allowed (runtime depth cap handles cycles)", async () => {
+      writeAgent("a", "---\nendpoint: e\nagents:\n  - b\n---\nA.");
+      writeAgent("b", "---\nendpoint: e\nagents:\n  - a\n---\nB.");
+
+      const res = await loadAgentsFromDir(workDir, {});
+      expect(res.defs.a.agents?.b).toBe(res.defs.b);
+      expect(res.defs.b.agents?.a).toBe(res.defs.a);
+    });
+
+    test("throws with available list when a sibling is missing", async () => {
+      writeAgent("dispatcher", "---\nendpoint: e\nagents:\n  - ghost\n---\nD.");
+      writeAgent("analyst", "---\nendpoint: e\n---\nAnalyst.");
+      await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow(
+        /references sub-agent\(s\) 'ghost'.*Available: analyst, dispatcher/s,
+      );
+    });
+
+    test("reports every missing sibling in one error, not just the first", async () => {
+      writeAgent(
+        "dispatcher",
+        "---\nendpoint: e\nagents:\n  - ghost1\n  - ghost2\n---\nD.",
+      );
+      await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow(
+        /ghost1, ghost2/,
+      );
+    });
+
+    test("throws on self-reference", async () => {
+      writeAgent("solo", "---\nendpoint: e\nagents:\n  - solo\n---\nSolo.");
+      await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow(
+        /'solo'.*cannot reference itself/s,
+      );
+    });
+
+    test("throws on non-array 'agents:' value", async () => {
+      writeAgent("bad", "---\nendpoint: e\nagents: analyst\n---\nBad.");
+      writeAgent("analyst", "---\nendpoint: e\n---\nAnalyst.");
+      await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow(
+        /invalid 'agents:' frontmatter/,
+      );
+    });
+
+    test("throws on non-string entries in 'agents:'", async () => {
+      writeAgent("bad", "---\nendpoint: e\nagents:\n  - 42\n---\nBad.");
+      await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow(
+        /invalid 'agents:' entry/,
+      );
+    });
+
+    test("deduplicates repeated entries silently", async () => {
+      writeAgent(
+        "dispatcher",
+        "---\nendpoint: e\nagents:\n  - analyst\n  - analyst\n---\nD.",
+      );
+      writeAgent("analyst", "---\nendpoint: e\n---\nAnalyst.");
+      const res = await loadAgentsFromDir(workDir, {});
+      expect(Object.keys(res.defs.dispatcher.agents ?? {})).toEqual([
+        "analyst",
+      ]);
+    });
+
+    test("empty array yields no sub-agents (no-op)", async () => {
+      writeAgent("dispatcher", "---\nendpoint: e\nagents: []\n---\nD.");
+      const res = await loadAgentsFromDir(workDir, {});
+      expect(res.defs.dispatcher.agents).toBeUndefined();
+    });
+
+    test("resolves 'agents:' references against codeAgents when provided", async () => {
+      writeAgent(
+        "dispatcher",
+        "---\nendpoint: e\nagents:\n  - support\n---\nD.",
+      );
+      const support: AgentDefinition = {
+        name: "support",
+        instructions: "Code-defined support.",
+      };
+      const res = await loadAgentsFromDir(workDir, {
+        codeAgents: { support },
+      });
+      expect(res.defs.dispatcher.agents?.support).toBe(support);
+    });
+
+    test("codeAgents takes precedence over markdown sibling with the same name", async () => {
+      writeAgent(
+        "dispatcher",
+        "---\nendpoint: e\nagents:\n  - support\n---\nD.",
+      );
+      writeAgent("support", "---\nendpoint: e\n---\nMarkdown support.");
+      const codeSupport: AgentDefinition = {
+        name: "support",
+        instructions: "Code support.",
+      };
+      const res = await loadAgentsFromDir(workDir, {
+        codeAgents: { support: codeSupport },
+      });
+      expect(res.defs.dispatcher.agents?.support).toBe(codeSupport);
+      expect(res.defs.dispatcher.agents?.support.instructions).toBe(
+        "Code support.",
+      );
+    });
+
+    test("missing-sibling error lists both markdown and code agent names", async () => {
+      writeAgent("dispatcher", "---\nendpoint: e\nagents:\n  - ghost\n---\nD.");
+      writeAgent("analyst", "---\nendpoint: e\n---\nAnalyst.");
+      const codeAgent: AgentDefinition = {
+        name: "writer",
+        instructions: "Writer.",
+      };
+      await expect(
+        loadAgentsFromDir(workDir, { codeAgents: { writer: codeAgent } }),
+      ).rejects.toThrow(/Available: analyst, dispatcher, writer/);
+    });
+  });
+});
+
+describe("loadAgentFromFile — sub-agent refs rejected", () => {
+  test("throws when 'agents:' is non-empty in a single-file load", async () => {
+    const p = writeRoot(
+      "lonely.md",
+      "---\nendpoint: e\nagents:\n  - ghost\n---\nLonely.",
+    );
+    await expect(loadAgentFromFile(p, {})).rejects.toThrow(
+      /requires loadAgentsFromDir/,
+    );
+  });
+
+  test("ignores empty 'agents:' array (treated as absent)", async () => {
+    const p = writeRoot(
+      "lonely.md",
+      "---\nendpoint: e\nagents: []\n---\nLonely.",
+    );
+    const def = await loadAgentFromFile(p, {});
+    expect(def.agents).toBeUndefined();
+  });
+});
diff --git a/packages/appkit/src/plugins/agents/tests/run-agent.test.ts b/packages/appkit/src/plugins/agents/tests/run-agent.test.ts
new file mode 100644
index 00000000..1a974811
--- /dev/null
+++ b/packages/appkit/src/plugins/agents/tests/run-agent.test.ts
@@ -0,0 +1,120 @@
+import type {
+  AgentAdapter,
+  AgentEvent,
+  AgentInput,
+  AgentRunContext,
+} from "shared";
+import { describe, expect, test, vi } from "vitest";
+import { z } from "zod";
+import { createAgent } from "../../../core/create-agent-def";
+import { runAgent } from "../../../core/run-agent";
+import { tool } from "../tools/tool";
+import type { ToolkitEntry } from "../types";
+
+function scriptedAdapter(events: AgentEvent[]): AgentAdapter {
+  return {
+    async *run(_input: AgentInput, _context: AgentRunContext) {
+      for (const event of events) {
+        yield event;
+      }
+    },
+  };
+}
+
+describe("runAgent", () => {
+  test("drives the adapter and returns aggregated text", async () => {
+    const events: AgentEvent[] = [
+      { type: "message_delta", content: "Hello " },
+      { type: "message_delta", content: "world" },
+      { type: "status", status: "complete" },
+    ];
+    const def = createAgent({
+      instructions: "Say hello",
+      model: scriptedAdapter(events),
+    });
+
+    const result = await runAgent(def, { messages: "hi" });
+    expect(result.text).toBe("Hello world");
+    expect(result.events).toHaveLength(3);
+  });
+
+  test("prefers terminal 'message' event over deltas when present", async () => {
+    const events: AgentEvent[] = [
+      { type: "message_delta", content: "partial" },
+      { type: "message", content: "final answer" },
+    ];
+    const def = createAgent({
+      instructions: "x",
+      model: scriptedAdapter(events),
+    });
+    const result = await runAgent(def, { messages: "hi" });
+    expect(result.text).toBe("final answer");
+  });
+
+  test("invokes inline tools via executeTool callback", async () => {
+    const weatherFn = vi.fn(async () => "Sunny in NYC");
+    const weather = tool({
+      name: "get_weather",
+      description: "Weather",
+      schema: z.object({ city: z.string() }),
+      execute: weatherFn,
+    });
+
+    let capturedCtx: AgentRunContext | null = null;
+    const adapter: AgentAdapter = {
+      async *run(_input, context) {
+        capturedCtx = context;
+        yield { type: "message_delta", content: "" };
+      },
+    };
+
+    const def = createAgent({
+      instructions: "x",
+      model: adapter,
+      tools: { get_weather: weather },
+    });
+
+    await runAgent(def, { messages: "hi" });
+    expect(capturedCtx).not.toBeNull();
+    // biome-ignore lint/style/noNonNullAssertion: asserted above
+    const result = await capturedCtx!.executeTool("get_weather", {
+      city: "NYC",
+    });
+    expect(result).toBe("Sunny in NYC");
+    expect(weatherFn).toHaveBeenCalledWith({ city: "NYC" });
+  });
+
+  test("throws a clear error when a ToolkitEntry is invoked", async () => {
+    const toolkitEntry: ToolkitEntry = {
+      __toolkitRef: true,
+      pluginName: "analytics",
+      localName: "query",
+      def: {
+        name: "analytics.query",
+        description: "SQL",
+        parameters: { type: "object", properties: {} },
+      },
+    };
+
+    let capturedCtx: AgentRunContext | null = null;
+    const adapter: AgentAdapter = {
+      async *run(_input, context) {
+        capturedCtx = context;
+        yield { type: "message_delta", content: "" };
+      },
+    };
+
+    const def = createAgent({
+      instructions: "x",
+      model: adapter,
+      tools: { "analytics.query": toolkitEntry },
+    });
+
+    await runAgent(def, { messages: "hi" });
+    expect(capturedCtx).not.toBeNull();
+    await expect(
+      // biome-ignore lint/style/noNonNullAssertion: asserted above
+      capturedCtx!.executeTool("analytics.query", {}),
+    ).rejects.toThrow(/only usable via createApp/);
+  });
+});
diff --git a/packages/appkit/src/plugins/agents/tests/system-prompt.test.ts b/packages/appkit/src/plugins/agents/tests/system-prompt.test.ts
new file mode 100644
index 00000000..25724259
--- /dev/null
+++ b/packages/appkit/src/plugins/agents/tests/system-prompt.test.ts
@@ -0,0 +1,59 @@
+import { describe, expect, test } from "vitest";
+import { buildBaseSystemPrompt, composeSystemPrompt } from "../system-prompt";
+
+const emptyCtx = {
+  agentName: "a",
+  pluginNames: [] as string[],
+  toolNames: [] as string[],
+};
+
+describe("buildBaseSystemPrompt", () => {
+  test("includes plugin names", () => {
+    const prompt = buildBaseSystemPrompt({
+      agentName: "assistant",
+      pluginNames: ["analytics", "files", "genie"],
+      toolNames: [],
+    });
+    expect(prompt).toContain("Active AppKit plugins: analytics, files, genie");
+  });
+
+  test("includes guidelines", () => {
+    const prompt = buildBaseSystemPrompt(emptyCtx);
+    expect(prompt).toContain("Guidelines:");
+    expect(prompt).toContain("syntax, dialect, or path rules");
+    expect(prompt).toContain("summarize what matters");
+  });
+
+  test("works with no plugins", () => {
+    const prompt = buildBaseSystemPrompt(emptyCtx);
+    expect(prompt).toContain("AI assistant running on Databricks AppKit");
+    expect(prompt).not.toContain("Active AppKit plugins:");
+  });
+
+  test("does NOT include individual tool names", () => {
+    const prompt = buildBaseSystemPrompt({
+      agentName: "a",
+      pluginNames: ["analytics"],
+      toolNames: ["analytics.query"],
+    });
+    expect(prompt).not.toContain("analytics.query");
+    expect(prompt).not.toContain("Available tools:");
+  });
+});
+
+describe("composeSystemPrompt", () => {
+  test("concatenates base + agent prompt with double newline", () => {
+    const composed = composeSystemPrompt("Base prompt.", "Agent prompt.");
+    expect(composed).toBe("Base prompt.\n\nAgent prompt.");
+  });
+
+  test("returns base prompt alone when no agent prompt", () => {
+    const composed = composeSystemPrompt("Base prompt.");
+    expect(composed).toBe("Base prompt.");
+  });
+
+  test("returns base prompt when agent prompt is empty string", () => {
+    const composed = composeSystemPrompt("Base prompt.", "");
+    expect(composed).toBe("Base prompt.");
+  });
+});
diff --git a/packages/appkit/src/plugins/agents/tests/thread-store.test.ts b/packages/appkit/src/plugins/agents/tests/thread-store.test.ts
new file mode 100644
index 00000000..ed4f70ba
--- /dev/null
+++ b/packages/appkit/src/plugins/agents/tests/thread-store.test.ts
@@ -0,0 +1,138 @@
+import { describe, expect, test } from "vitest";
+import { InMemoryThreadStore } from "../thread-store";
+
+describe("InMemoryThreadStore", () => {
+  test("create() returns a new thread with the given userId", async () => {
+    const store = new InMemoryThreadStore();
+    const thread = await store.create("user-1");
+
+    expect(thread.id).toBeDefined();
+    expect(thread.userId).toBe("user-1");
+    expect(thread.messages).toEqual([]);
+    expect(thread.createdAt).toBeInstanceOf(Date);
+    expect(thread.updatedAt).toBeInstanceOf(Date);
+  });
+
+  test("get() returns the thread for the correct user", async () => {
+    const store = new InMemoryThreadStore();
+    const thread = await store.create("user-1");
+
+    const retrieved = await store.get(thread.id, "user-1");
+    expect(retrieved).toEqual(thread);
+  });
+
+  test("get() returns null for wrong user", async () => {
+    const store = new InMemoryThreadStore();
+    const thread = await store.create("user-1");
+
+    const retrieved = await store.get(thread.id, "user-2");
+    expect(retrieved).toBeNull();
+  });
+
+  test("get() returns null for non-existent thread", async () => {
+    const store = new InMemoryThreadStore();
+    const retrieved = await store.get("non-existent", "user-1");
+    expect(retrieved).toBeNull();
+  });
+
+  test("list() returns threads sorted by updatedAt desc", async () => {
+    const store = new InMemoryThreadStore();
+    const t1 = await store.create("user-1");
+    const t2 = await store.create("user-1");
+
+    // Make t1 more recently updated
+    await store.addMessage(t1.id, "user-1", {
+      id: "msg-1",
+      role: "user",
+      content: "hello",
+      createdAt: new Date(),
+    });
+
+    const threads = await store.list("user-1");
+    expect(threads).toHaveLength(2);
+    expect(threads[0].id).toBe(t1.id);
+    expect(threads[1].id).toBe(t2.id);
+  });
+
+  test("list() returns empty for unknown user", async () => {
+    const store = new InMemoryThreadStore();
+    await store.create("user-1");
+
+    const threads = await store.list("user-2");
+    expect(threads).toEqual([]);
+  });
+
+  test("addMessage() appends to thread and updates timestamp", async () => {
+    const store = new InMemoryThreadStore();
+    const thread = await store.create("user-1");
+    const originalUpdatedAt = thread.updatedAt;
+
+    // Small delay to ensure timestamp differs
+    await new Promise((r) => setTimeout(r, 5));
+
+    await store.addMessage(thread.id, "user-1", {
+      id: "msg-1",
+      role: "user",
+      content: "hello",
+      createdAt: new Date(),
+    });
+
+    const updated = await store.get(thread.id, "user-1");
+    expect(updated?.messages).toHaveLength(1);
+    expect(updated?.messages[0].content).toBe("hello");
+    expect(updated?.updatedAt.getTime()).toBeGreaterThanOrEqual(
+      originalUpdatedAt.getTime(),
+    );
+  });
+
+  test("addMessage() throws for non-existent thread", async () => {
+    const store = new InMemoryThreadStore();
+
+    await expect(
+      store.addMessage("non-existent", "user-1", {
+        id: "msg-1",
+        role: "user",
+        content: "hello",
+        createdAt: new Date(),
+      }),
+    ).rejects.toThrow("Thread non-existent not found");
+  });
+
+  test("delete() removes a thread and returns true", async () => {
+    const store = new InMemoryThreadStore();
+    const thread = await store.create("user-1");
+
+    const deleted = await store.delete(thread.id, "user-1");
+    expect(deleted).toBe(true);
+
+    const retrieved = await store.get(thread.id, "user-1");
+    expect(retrieved).toBeNull();
+  });
+
+  test("delete() returns false for non-existent thread", async () => {
+    const store = new InMemoryThreadStore();
+    const deleted = await store.delete("non-existent", "user-1");
+    expect(deleted).toBe(false);
+  });
+
+  test("delete() returns false for wrong user", async () => {
+    const store = new InMemoryThreadStore();
+    const thread = await store.create("user-1");
+
+    const deleted = await store.delete(thread.id, "user-2");
+    expect(deleted).toBe(false);
+  });
+
+  test("threads are isolated per user", async () => {
+    const store = new InMemoryThreadStore();
+    await store.create("user-1");
+    await store.create("user-1");
+    await store.create("user-2");
+
+    const user1Threads = await store.list("user-1");
+    const user2Threads = await store.list("user-2");
+
+    expect(user1Threads).toHaveLength(2);
+    expect(user2Threads).toHaveLength(1);
+  });
+});
diff --git a/packages/appkit/src/plugins/agents/tests/tool-approval-gate.test.ts b/packages/appkit/src/plugins/agents/tests/tool-approval-gate.test.ts
new file mode 100644
index 00000000..1e17ddf6
--- /dev/null
+++ b/packages/appkit/src/plugins/agents/tests/tool-approval-gate.test.ts
@@ -0,0 +1,156 @@
+import { afterEach, beforeEach, describe, expect, test, vi } from "vitest";
+import { ToolApprovalGate } from "../tool-approval-gate";
+
+describe("ToolApprovalGate", () => {
+  let gate: ToolApprovalGate;
+
+  beforeEach(() => {
+    vi.useFakeTimers();
+    gate = new ToolApprovalGate();
+  });
+
+  afterEach(() => {
+    vi.useRealTimers();
+  });
+
+  test("resolves with 'approve' when a matching submit arrives", async () => {
+    const waiter = gate.wait({
+      approvalId: "a1",
+      streamId: "s1",
+      userId: "alice",
+      timeoutMs: 60_000,
+    });
+    expect(gate.size).toBe(1);
+
+    const result = gate.submit({
+      approvalId: "a1",
+      userId: "alice",
+      decision: "approve",
+    });
+
+    expect(result).toEqual({ ok: true });
+    await expect(waiter).resolves.toBe("approve");
+    expect(gate.size).toBe(0);
+  });
+
+  test("resolves with 'deny' on explicit deny", async () => {
+    const waiter = gate.wait({
+      approvalId: "a2",
+      streamId: "s1",
+      userId: "alice",
+      timeoutMs: 60_000,
+    });
+    gate.submit({
+      approvalId: "a2",
+      userId: "alice",
+      decision: "deny",
+    });
+    await expect(waiter).resolves.toBe("deny");
+  });
+
+  test("auto-denies after timeoutMs with no submit", async () => {
+    const waiter = gate.wait({
+      approvalId: "a3",
+      streamId: "s1",
+      userId: "alice",
+      timeoutMs: 1000,
+    });
+    vi.advanceTimersByTime(1000);
+    await expect(waiter).resolves.toBe("deny");
+    expect(gate.size).toBe(0);
+  });
+
+  test("refuses a submit from a different user (ownership check)", async () => {
+    const waiter = gate.wait({
+      approvalId: "a4",
+      streamId: "s1",
+      userId: "alice",
+      timeoutMs: 60_000,
+    });
+    const result = gate.submit({
+      approvalId: "a4",
+      userId: "bob",
+      decision: "approve",
+    });
+    expect(result).toEqual({ ok: false, reason: "forbidden" });
+    expect(gate.size).toBe(1);
+    // Waiter is still pending; cleanup to let fake timers drain.
+    gate.submit({
+      approvalId: "a4",
+      userId: "alice",
+      decision: "deny",
+    });
+    await expect(waiter).resolves.toBe("deny");
+  });
+
+  test("returns 'unknown' reason when approvalId is not registered", () => {
+    expect(
+      gate.submit({ approvalId: "nope", userId: "x", decision: "approve" }),
+    ).toEqual({ ok: false, reason: "unknown" });
+  });
+
+  test("abortStream denies every pending gate for that stream", async () => {
+    const a = gate.wait({
+      approvalId: "a5",
+      streamId: "s1",
+      userId: "alice",
+      timeoutMs: 60_000,
+    });
+    const b = gate.wait({
+      approvalId: "a6",
+      streamId: "s1",
+      userId: "alice",
+      timeoutMs: 60_000,
+    });
+    const c = gate.wait({
+      approvalId: "a7",
+      streamId: "s2",
+      userId: "alice",
+      timeoutMs: 60_000,
+    });
+    gate.abortStream("s1");
+    await expect(a).resolves.toBe("deny");
+    await expect(b).resolves.toBe("deny");
+    expect(gate.size).toBe(1);
+    // s2's waiter is still pending; settle it to clean up timers.
+    gate.submit({ approvalId: "a7", userId: "alice", decision: "deny" });
+    await expect(c).resolves.toBe("deny");
+  });
+
+  test("abortAll denies every pending gate", async () => {
+    const a = gate.wait({
+      approvalId: "a8",
+      streamId: "s1",
+      userId: "alice",
+      timeoutMs: 60_000,
+    });
+    const b = gate.wait({
+      approvalId: "a9",
+      streamId: "s2",
+      userId: "bob",
+      timeoutMs: 60_000,
+    });
+    gate.abortAll();
+    await expect(a).resolves.toBe("deny");
+    await expect(b).resolves.toBe("deny");
+    expect(gate.size).toBe(0);
+  });
+
+  test("a timed-out approval cannot be resolved by a late submit", async () => {
+    const waiter = gate.wait({
+      approvalId: "a10",
+      streamId: "s1",
+      userId: "alice",
+      timeoutMs: 500,
+    });
+    vi.advanceTimersByTime(500);
+    await expect(waiter).resolves.toBe("deny");
+
+    const late = gate.submit({
+      approvalId: "a10",
+      userId: "alice",
+      decision: "approve",
+    });
+    expect(late).toEqual({ ok: false, reason: "unknown" });
+  });
+});
diff --git a/packages/appkit/src/plugins/agents/thread-store.ts b/packages/appkit/src/plugins/agents/thread-store.ts
new file mode 100644
index 00000000..7c4622cd
--- /dev/null
+++ b/packages/appkit/src/plugins/agents/thread-store.ts
@@ -0,0 +1,66 @@
+import { randomUUID } from "node:crypto";
+import type { Message, Thread, ThreadStore } from "shared";
+
+/**
+ * In-memory thread store backed by a nested Map.
+ *
+ * Outer key: userId, inner key: threadId. Thread history is retained for the
+ * lifetime of the process with no eviction, caps, or TTL — a chatty user will
+ * grow the in-memory footprint monotonically, and the server loses every
+ * thread on restart. **This implementation is intended for local development
+ * and single-process demos only.**
+ *
+ * For any real deployment, pass a persistent `ThreadStore` to `agents({ ... })`
+ * (e.g. a Lakebase- or Postgres-backed implementation). A bounded
+ * `InMemoryThreadStore` with eviction policies is tracked as a follow-up.
+ */
+export class InMemoryThreadStore implements ThreadStore {
+  private store = new Map<string, Map<string, Thread>>();
+
+  async create(userId: string): Promise<Thread> {
+    const now = new Date();
+    const thread: Thread = {
+      id: randomUUID(),
+      userId,
+      messages: [],
+      createdAt: now,
+      updatedAt: now,
+    };
+    this.userMap(userId).set(thread.id, thread);
+    return thread;
+  }
+
+  async get(threadId: string, userId: string): Promise<Thread | null> {
+    return this.userMap(userId).get(threadId) ?? null;
+  }
+
+  async list(userId: string): Promise<Thread[]> {
+    return Array.from(this.userMap(userId).values()).sort(
+      (a, b) => b.updatedAt.getTime() - a.updatedAt.getTime(),
+    );
+  }
+
+  async addMessage(
+    threadId: string,
+    userId: string,
+    message: Message,
+  ): Promise<void> {
+    const thread = this.userMap(userId).get(threadId);
+    if (!thread) throw new Error(`Thread ${threadId} not found`);
+    thread.messages.push(message);
+    thread.updatedAt = new Date();
+  }
+
+  async delete(threadId: string, userId: string): Promise<boolean> {
+    return this.userMap(userId).delete(threadId);
+  }
+
+  private userMap(userId: string): Map<string, Thread> {
+    let map = this.store.get(userId);
+    if (!map) {
+      map = new Map();
+      this.store.set(userId, map);
+    }
+    return map;
+  }
+}
diff --git a/packages/appkit/src/plugins/agents/tool-approval-gate.ts b/packages/appkit/src/plugins/agents/tool-approval-gate.ts
new file mode 100644
index 00000000..669f30a9
--- /dev/null
+++ b/packages/appkit/src/plugins/agents/tool-approval-gate.ts
@@ -0,0 +1,122 @@
+/**
+ * Server-side state for the human-in-the-loop approval gate on
+ * `destructive: true` agent tool calls.
+ *
+ * Lifecycle:
+ *
+ * 1. `wait(...)` is called from inside `executeTool` when a destructive tool
+ *    is about to execute. A `Pending` record is registered and a timer is
+ *    scheduled for auto-deny. The returned promise is what blocks the
+ *    adapter until the decision arrives.
+ * 2. The client receives an `appkit.approval_pending` SSE event carrying the
+ *    `approvalId` + `streamId` and posts a decision to `POST /chat/approve`.
+ *    The route calls {@link ToolApprovalGate.submit} which resolves the
+ *    pending promise and clears the timer.
+ * 3. If no submit arrives within `timeoutMs`, the timer fires and the
+ *    promise resolves with `"deny"`.
+ *
+ * Security invariants:
+ *
+ * - `submit` verifies that the decider's user id matches the user who
+ *   initiated the stream (set by `wait`). Mismatches are rejected without
+ *   resolving the pending promise — this prevents a second user from
+ *   approving (or denying) another user's destructive action.
+ * - `abort(streamId)` cancels every pending gate for a stream and denies
+ *   each one. Used when the enclosing stream is cancelled or the plugin is
+ *   shutting down.
+ */
+type ApprovalDecision = "approve" | "deny";
+
+interface Pending {
+  resolve: (decision: ApprovalDecision) => void;
+  userId: string;
+  streamId: string;
+  timeout: ReturnType<typeof setTimeout>;
+}
+
+type ApprovalSubmitResult =
+  | { ok: true }
+  | { ok: false; reason: "unknown" | "forbidden" };
+
+export class ToolApprovalGate {
+  private pending = new Map<string, Pending>();
+
+  /**
+   * Register a pending approval and return a promise that resolves with the
+   * user's decision or with `"deny"` when the timeout elapses. The returned
+   * promise never rejects.
+   */
+  wait(args: {
+    approvalId: string;
+    streamId: string;
+    userId: string;
+    timeoutMs: number;
+  }): Promise<ApprovalDecision> {
+    const { approvalId, streamId, userId, timeoutMs } = args;
+    return new Promise<ApprovalDecision>((resolve) => {
+      const timeout = setTimeout(() => {
+        if (this.pending.delete(approvalId)) {
+          resolve("deny");
+        }
+      }, timeoutMs);
+      this.pending.set(approvalId, {
+        resolve,
+        userId,
+        streamId,
+        timeout,
+      });
+    });
+  }
+
+  /**
+   * Settle an approval with a user decision. Returns:
+   * - `{ ok: true }` if the pending record existed, the userId matched, and
+   *   the promise was resolved.
+   * - `{ ok: false, reason: "unknown" }` if no pending record matches the id.
+   * - `{ ok: false, reason: "forbidden" }` if the userId does not match the
+   *   user who initiated the stream.
+   */
+  submit(args: {
+    approvalId: string;
+    userId: string;
+    decision: ApprovalDecision;
+  }): ApprovalSubmitResult {
+    const { approvalId, userId, decision } = args;
+    const p = this.pending.get(approvalId);
+    if (!p) return { ok: false, reason: "unknown" };
+    if (p.userId !== userId) return { ok: false, reason: "forbidden" };
+    clearTimeout(p.timeout);
+    this.pending.delete(approvalId);
+    p.resolve(decision);
+    return { ok: true };
+  }
+
+  /**
+   * Cancel all pending gates for a specific stream (e.g., when the user
+   * cancels the stream). Each gate resolves with `"deny"` so the adapter
+   * unwinds cleanly.
+   */
+  abortStream(streamId: string): void {
+    for (const [id, p] of this.pending) {
+      if (p.streamId === streamId) {
+        clearTimeout(p.timeout);
+        this.pending.delete(id);
+        p.resolve("deny");
+      }
+    }
+  }
+
+  /** Cancel every pending gate. Used at plugin shutdown. */
+  abortAll(): void {
+    for (const [id, p] of this.pending) {
+      clearTimeout(p.timeout);
+      this.pending.delete(id);
+      p.resolve("deny");
+    }
+  }
+
+  /** Number of pending approvals (test/diagnostic helper). */
+  get size(): number {
+    return this.pending.size;
+  }
+}
diff --git a/packages/appkit/src/plugins/agents/types.ts b/packages/appkit/src/plugins/agents/types.ts
index 086a0426..8d52d278 100644
--- a/packages/appkit/src/plugins/agents/types.ts
+++ b/packages/appkit/src/plugins/agents/types.ts
@@ -1,6 +1,13 @@
-import type { AgentToolDefinition, ToolAnnotations } from "shared";
+import type {
+  AgentAdapter,
+  AgentToolDefinition,
+  BasePluginConfig,
+  ThreadStore,
+  ToolAnnotations,
+} from "shared";
 import type { FunctionTool } from "./tools/function-tool";
 import type { HostedTool } from "./tools/hosted-tools";
+import type { McpHostPolicyConfig } from "./tools/mcp-host-policy";
 
 /**
  * A tool reference produced by a plugin's `.toolkit()` call. The agents plugin
@@ -42,8 +49,172 @@ export interface ToolkitOptions {
 }
 
 /**
- * Type guard for `ToolkitEntry` — used to differentiate toolkit references
- * from inline tools in a mixed `tools` record.
+ * Context passed to `baseSystemPrompt` callbacks.
+ */
+export interface PromptContext {
+  agentName: string;
+  pluginNames: string[];
+  toolNames: string[];
+}
+
+export type BaseSystemPromptOption =
+  | false
+  | string
+  | ((ctx: PromptContext) => string);
+
+export interface AgentDefinition {
+  /** Filled in from the enclosing key when used in `agents: { foo: def }`. */
+  name?: string;
+  /** System prompt body. For markdown-loaded agents this is the file body. */
+  instructions: string;
+  /**
+   * Model adapter (or endpoint-name string sugar for
+   * `DatabricksAdapter.fromServingEndpoint({ endpointName })`). Optional —
+   * falls back to the plugin's `defaultModel`.
+   */
+  model?: AgentAdapter | Promise<AgentAdapter> | string;
+  /** Per-agent tool record. Key is the LLM-visible tool-call name. */
+  tools?: Record<string, AgentTool>;
+  /** Sub-agents, exposed as `agent-<key>` tools on this agent. */
+  agents?: Record<string, AgentDefinition>;
+  /** Override the plugin's baseSystemPrompt for this agent only. */
+  baseSystemPrompt?: BaseSystemPromptOption;
+  maxSteps?: number;
+  maxTokens?: number;
+  /**
+   * When true, the thread used for a chat request against this agent is
+   * deleted from `ThreadStore` after the stream completes (success or
+   * failure). Use for stateless one-shot agents — e.g. autocomplete, where
+   * each request is independent and retaining history would both poison
+   * future calls and accumulate unbounded state in the default
+   * `InMemoryThreadStore`. Defaults to `false`.
+   */
+  ephemeral?: boolean;
+}
+
+/**
+ * Auto-inherit configuration. When enabled for a given agent origin, agents
+ * with no explicit `tools:` declaration receive every registered ToolProvider
+ * plugin tool whose author marked `autoInheritable: true`. Tools without that
+ * flag — destructive, state-mutating, or privilege-sensitive — never spread
+ * automatically and must be wired via `tools:`, `toolkits:`, or `fromPlugin`.
+ *
+ * Defaults are `false` for both origins (safe-by-default): developers must
+ * consciously opt an origin in to any auto-inherit behaviour.
+ */
+export interface AutoInheritToolsConfig {
+  /** Default for agents loaded from markdown files. Default: `false`. */
+  file?: boolean;
+  /** Default for code-defined agents (via `agents: { foo: createAgent(...) }`). Default: `false`. */
+  code?: boolean;
+}
+
+export interface AgentsPluginConfig extends BasePluginConfig {
+  /** Directory of agent packages (`<id>/agent.md` each). Default `./config/agents`. Set to `false` to disable. */
+  dir?: string | false;
+  /** Code-defined agents, merged with file-loaded ones (code wins on key collision). */
+  agents?: Record<string, AgentDefinition>;
+  /** Agent used when clients don't specify one. Defaults to the first-registered agent or the file with `default: true` frontmatter. */
+  defaultAgent?: string;
+  /** Default model for agents that don't specify their own (in code or frontmatter). */
+  defaultModel?: AgentAdapter | Promise<AgentAdapter> | string;
+  /** Ambient tool library. Keys may be referenced by markdown frontmatter via `tools: [key1, key2]`. */
+  tools?: Record<string, AgentTool>;
+  /** Whether to auto-inherit every ToolProvider plugin's toolkit. Accepts a boolean shorthand. */
+  autoInheritTools?: boolean | AutoInheritToolsConfig;
+  /** Persistent thread store. Default: in-memory. */
+  threadStore?: ThreadStore;
+  /** Customize or disable the AppKit base system prompt. */
+  baseSystemPrompt?: BaseSystemPromptOption;
+  /**
+   * MCP server host policy. By default only same-origin Databricks workspace
+   * URLs may be used as MCP endpoints; custom hosts must be explicitly
+   * allowlisted here. Workspace credentials (SP / OBO) are never forwarded
+   * to non-workspace hosts.
+   */
+  mcp?: McpHostPolicyConfig;
+  /**
+   * Human-in-the-loop approval gate for destructive tool calls. When enabled
+   * (the default), the agents plugin emits an `appkit.approval_pending` SSE
+   * event before executing any tool annotated `destructive: true` and waits
+   * for a `POST /chat/approve` decision from the same user who initiated the
+   * stream. A missing decision after `timeoutMs` auto-denies the call.
+   */
+  approval?: {
+    /** Require human approval for tools annotated `destructive: true`. Default: `true`. */
+    requireForDestructive?: boolean;
+    /** Milliseconds to wait before auto-denying. Default: 60_000. */
+    timeoutMs?: number;
+  };
+  /**
+   * Runtime resource limits applied during agent execution. Defaults are
+   * tuned to protect a single-instance deployment from a misbehaving user or
+   * a runaway prompt injection; tighten or relax as appropriate for the
+   * deployment's scale and trust model. Request-body caps (chat message
+   * size, invocations input size / length) are enforced statically by the
+   * Zod schemas and are not configurable here.
+   */
+  limits?: {
+    /**
+     * Max concurrent chat streams a single user may have open. Subsequent
+     * `POST /chat` requests from that user while at-limit are rejected with
+     * HTTP 429. Default: `5`.
+     */
+    maxConcurrentStreamsPerUser?: number;
+    /**
+     * Max tool invocations per agent run (across the full tool-call graph,
+     * including sub-agent invocations). A run that exceeds the budget is
+     * aborted with a terminal error event. Default: `50`.
+     */
+    maxToolCalls?: number;
+    /**
+     * Max sub-agent recursion depth. Protects against a prompt-injected
+     * agent that delegates to a sub-agent which in turn delegates back to
+     * itself (directly or transitively). Default: `3`.
+     */
+    maxSubAgentDepth?: number;
+  };
+}
+
+/** Internal tool-index entry after a tool record has been resolved to a dispatchable form. */
+export type ResolvedToolEntry =
+  | {
+      source: "toolkit";
+      pluginName: string;
+      localName: string;
+      def: AgentToolDefinition;
+    }
+  | {
+      source: "function";
+      functionTool: FunctionTool;
+      def: AgentToolDefinition;
+    }
+  | {
+      source: "mcp";
+      mcpToolName: string;
+      def: AgentToolDefinition;
+    }
+  | {
+      source: "subagent";
+      agentName: string;
+      def: AgentToolDefinition;
+    };
+
+export interface RegisteredAgent {
+  name: string;
+  instructions: string;
+  adapter: AgentAdapter;
+  toolIndex: Map<string, ResolvedToolEntry>;
+  baseSystemPrompt?: BaseSystemPromptOption;
+  maxSteps?: number;
+  maxTokens?: number;
+  /** Mirrors `AgentDefinition.ephemeral` — skip thread persistence. */
+  ephemeral?: boolean;
+}
+
+/**
+ * Type guard for `ToolkitEntry` — used by the agents plugin to differentiate
+ * toolkit references from inline tools in a mixed `tools` record.
  */
 export function isToolkitEntry(value: unknown): value is ToolkitEntry {
   return (
diff --git a/packages/shared/src/agent.ts b/packages/shared/src/agent.ts
index c4f76b29..8e34d5fb 100644
--- a/packages/shared/src/agent.ts
+++ b/packages/shared/src/agent.ts
@@ -86,7 +86,21 @@ export type AgentEvent =
       status: "running" | "waiting" | "complete" | "error";
       error?: string;
     }
-  | { type: "metadata"; data: Record<string, unknown> };
+  | { type: "metadata"; data: Record<string, unknown> }
+  | {
+      /**
+       * Emitted by the agents plugin (not adapters) when a tool call annotated
+       * `destructive: true` is awaiting human approval. Clients should render
+       * an approval prompt and POST to `/chat/approve` with the matching
+       * `approvalId` and a `decision` of `approve` or `deny`.
+       */
+      type: "approval_pending";
+      approvalId: string;
+      streamId: string;
+      toolName: string;
+      args: unknown;
+      annotations?: ToolAnnotations;
+    };
 
 // ---------------------------------------------------------------------------
 // Responses API types (OpenAI-compatible wire format for HTTP boundary)
@@ -178,6 +192,23 @@ export interface AppKitMetadataEvent {
   sequence_number: number;
 }
 
+/**
+ * Emitted when a destructive tool call is awaiting human approval. The client
+ * should render an approval UI and POST the decision to `/chat/approve` with
+ * `{ streamId, approvalId, decision: "approve" | "deny" }`. If no decision
+ * arrives before the server-side timeout, the call is auto-denied and the
+ * agent receives a denial string as the tool output.
+ */
+export interface AppKitApprovalPendingEvent {
+  type: "appkit.approval_pending";
+  approval_id: string;
+  stream_id: string;
+  tool_name: string;
+  args: unknown;
+  annotations?: ToolAnnotations;
+  sequence_number: number;
+}
+
 export type ResponseStreamEvent =
   | ResponseOutputItemAddedEvent
   | ResponseOutputItemDoneEvent
@@ -186,7 +217,8 @@ export type ResponseStreamEvent =
   | ResponseErrorEvent
   | ResponseFailedEvent
   | AppKitThinkingEvent
-  | AppKitMetadataEvent;
+  | AppKitMetadataEvent
+  | AppKitApprovalPendingEvent;
 
 // ---------------------------------------------------------------------------
 // Adapter contract
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 16079b1d..307f44cf 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -305,6 +305,9 @@ importers:
       express:
         specifier: 4.22.0
         version: 4.22.0
+      js-yaml:
+        specifier: ^4.1.1
+        version: 4.1.1
       obug:
         specifier: 2.1.1
         version: 2.1.1
@@ -339,6 +342,9 @@ importers:
       '@types/express':
         specifier: 4.17.25
         version: 4.17.25
+      '@types/js-yaml':
+        specifier: ^4.0.9
+        version: 4.0.9
       '@types/json-schema':
         specifier: 7.0.15
         version: 7.0.15
@@ -4989,6 +4995,9 @@ packages:
   '@types/istanbul-reports@3.0.4':
     resolution: {integrity: sha512-pk2B1NWalF9toCRu6gjBzR69syFjP4Od8WRAX+0mmf9lAjCRicLOWc+ZrxZHx/0XRjotgkF9t6iaMJ+aXcOdZQ==}
 
+  '@types/js-yaml@4.0.9':
+    resolution: {integrity: sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg==}
+
   '@types/jsesc@2.5.1':
     resolution: {integrity: sha512-9VN+6yxLOPLOav+7PwjZbxiID2bVaeq0ED4qSQmdQTdjnXJSaCVKTR58t15oqH1H5t8Ng2ZX1SabJVoN9Q34bw==}
 
@@ -17421,6 +17430,8 @@ snapshots:
     dependencies:
       '@types/istanbul-lib-report': 3.0.3
 
+  '@types/js-yaml@4.0.9': {}
+
   '@types/jsesc@2.5.1': {}
 
   '@types/json-schema@7.0.15': {}
diff --git a/template/appkit.plugins.json b/template/appkit.plugins.json
index d1420d2e..c8589f9a 100644
--- a/template/appkit.plugins.json
+++ b/template/appkit.plugins.json
@@ -2,6 +2,30 @@
   "$schema": "https://databricks.github.io/appkit/schemas/template-plugins.schema.json",
   "version": "1.0",
   "plugins": {
+    "agents": {
+      "name": "agents",
+      "displayName": "Agents Plugin",
+      "description": "AI agents driven by markdown configs or code, with auto-tool-discovery from registered plugins",
+      "package": "@databricks/appkit",
+      "resources": {
+        "required": [],
+        "optional": [
+          {
+            "type": "serving_endpoint",
+            "alias": "Model Serving (agents)",
+            "resourceKey": "agents-serving-endpoint",
+            "description": "Databricks Model Serving endpoint for agents using workspace-hosted models (`DatabricksAdapter.fromModelServing`). Wire the same endpoint name AppKit reads from `DATABRICKS_AGENT_ENDPOINT` when no per-agent model is configured. Omit when agents use only external adapters.",
+            "permission": "CAN_QUERY",
+            "fields": {
+              "name": {
+                "env": "DATABRICKS_AGENT_ENDPOINT",
+                "description": "Endpoint name passed to Model Serving when agents default to `DatabricksAdapter.fromModelServing()`"
+              }
+            }
+          }
+        ]
+      }
+    },
     "analytics": {
       "name": "analytics",
       "displayName": "Analytics Plugin",