diff --git a/knip.json b/knip.json index 13a43187..878dd3f5 100644 --- a/knip.json +++ b/knip.json @@ -20,6 +20,7 @@ "**/*.css", "packages/appkit/src/plugins/vector-search/**", "packages/appkit/src/plugin/index.ts", + "packages/appkit/src/plugin/to-plugin.ts", "packages/appkit/src/plugins/agents/index.ts", "packages/appkit/src/plugins/agents/tools/index.ts", "packages/appkit/src/plugins/agents/from-plugin.ts", diff --git a/packages/appkit/package.json b/packages/appkit/package.json index 27a14e66..49d6c516 100644 --- a/packages/appkit/package.json +++ b/packages/appkit/package.json @@ -83,6 +83,7 @@ "@types/semver": "7.7.1", "dotenv": "16.6.1", "express": "4.22.0", + "js-yaml": "^4.1.1", "obug": "2.1.1", "pg": "8.18.0", "picocolors": "1.1.1", @@ -108,6 +109,7 @@ "@ai-sdk/openai": "4.0.0-beta.27", "@langchain/core": "^1.1.39", "@types/express": "4.17.25", + "@types/js-yaml": "^4.0.9", "@types/json-schema": "7.0.15", "@types/pg": "8.16.0", "@types/ws": "8.18.1", diff --git a/packages/appkit/src/core/create-agent-def.ts b/packages/appkit/src/core/create-agent-def.ts new file mode 100644 index 00000000..3e93371d --- /dev/null +++ b/packages/appkit/src/core/create-agent-def.ts @@ -0,0 +1,53 @@ +import { ConfigurationError } from "../errors"; +import type { AgentDefinition } from "../plugins/agents/types"; + +/** + * Pure factory for agent definitions. Returns the passed-in definition after + * cycle-detecting the sub-agent graph. Accepts the full `AgentDefinition` shape + * and is safe to call at module top-level. + * + * The returned value is a plain `AgentDefinition` — no adapter construction, + * no side effects. Register it with `agents({ agents: { name: def } })` or run + * it standalone via `runAgent(def, input)`. + * + * @example + * ```ts + * const support = createAgent({ + * instructions: "You help customers.", + * model: "databricks-claude-sonnet-4-5", + * tools: { + * get_weather: tool({ ... }), + * }, + * }); + * ``` + */ +export function createAgent(def: AgentDefinition): AgentDefinition { + detectCycles(def); + return def; +} + +/** + * Walks the `agents: { ... }` sub-agent tree via DFS and throws if a cycle is + * found. Cycles would cause infinite recursion at tool-invocation time. + */ +function detectCycles(def: AgentDefinition): void { + const visiting = new Set(); + const visited = new Set(); + + const walk = (current: AgentDefinition, path: string[]): void => { + if (visited.has(current)) return; + if (visiting.has(current)) { + throw new ConfigurationError( + `Agent sub-agent cycle detected: ${path.join(" -> ")}`, + ); + } + visiting.add(current); + for (const [childKey, child] of Object.entries(current.agents ?? {})) { + walk(child, [...path, childKey]); + } + visiting.delete(current); + visited.add(current); + }; + + walk(def, [def.name ?? "(root)"]); +} diff --git a/packages/appkit/src/core/run-agent.ts b/packages/appkit/src/core/run-agent.ts new file mode 100644 index 00000000..e83c2c9c --- /dev/null +++ b/packages/appkit/src/core/run-agent.ts @@ -0,0 +1,226 @@ +import { randomUUID } from "node:crypto"; +import type { + AgentAdapter, + AgentEvent, + AgentToolDefinition, + Message, +} from "shared"; +import { + type FunctionTool, + functionToolToDefinition, + isFunctionTool, +} from "../plugins/agents/tools/function-tool"; +import { isHostedTool } from "../plugins/agents/tools/hosted-tools"; +import type { + AgentDefinition, + AgentTool, + ToolkitEntry, +} from "../plugins/agents/types"; +import { isToolkitEntry } from "../plugins/agents/types"; + +export interface RunAgentInput { + /** Seed messages for the run. Either a single user string or a full message list. */ + messages: string | Message[]; + /** Abort signal for cancellation. */ + signal?: AbortSignal; +} + +export interface RunAgentResult { + /** Aggregated text output from all `message_delta` events. */ + text: string; + /** Every event the adapter yielded, in order. Useful for inspection/tests. */ + events: AgentEvent[]; +} + +/** + * Standalone agent execution without `createApp`. Resolves the adapter, binds + * inline tools, and drives the adapter's `run()` loop to completion. + * + * Limitations vs. running through the agents() plugin: + * - No OBO: there is no HTTP request, so plugin tools run as the service + * principal (when they work at all). + * - Plugin tools (`ToolkitEntry`) are not supported — they require a live + * `PluginContext` that only exists when registered in a `createApp` + * instance. This function throws a clear error if encountered. + * - Sub-agents (`agents: { ... }` on the def) are executed as nested + * `runAgent` calls with no shared thread state. + */ +export async function runAgent( + def: AgentDefinition, + input: RunAgentInput, +): Promise { + const adapter = await resolveAdapter(def); + const messages = normalizeMessages(input.messages, def.instructions); + const toolIndex = buildStandaloneToolIndex(def); + const tools = Array.from(toolIndex.values()).map((e) => e.def); + + const signal = input.signal; + + const executeTool = async (name: string, args: unknown): Promise => { + const entry = toolIndex.get(name); + if (!entry) throw new Error(`Unknown tool: ${name}`); + if (entry.kind === "function") { + return entry.tool.execute(args as Record); + } + if (entry.kind === "subagent") { + const subInput: RunAgentInput = { + messages: + typeof args === "object" && + args !== null && + typeof (args as { input?: unknown }).input === "string" + ? (args as { input: string }).input + : JSON.stringify(args), + signal, + }; + const res = await runAgent(entry.agentDef, subInput); + return res.text; + } + throw new Error( + `runAgent: tool "${name}" is a ${entry.kind} tool. ` + + "Plugin toolkits and MCP tools are only usable via createApp({ plugins: [..., agents(...)] }).", + ); + }; + + const events: AgentEvent[] = []; + let text = ""; + + const stream = adapter.run( + { + messages, + tools, + threadId: randomUUID(), + signal, + }, + { executeTool, signal }, + ); + + for await (const event of stream) { + if (signal?.aborted) break; + events.push(event); + if (event.type === "message_delta") { + text += event.content; + } else if (event.type === "message") { + text = event.content; + } + } + + return { text, events }; +} + +async function resolveAdapter(def: AgentDefinition): Promise { + const { model } = def; + if (!model) { + const { DatabricksAdapter } = await import("../agents/databricks"); + return DatabricksAdapter.fromModelServing(); + } + if (typeof model === "string") { + const { DatabricksAdapter } = await import("../agents/databricks"); + return DatabricksAdapter.fromModelServing(model); + } + return await model; +} + +function normalizeMessages( + input: string | Message[], + instructions: string, +): Message[] { + const systemMessage: Message = { + id: "system", + role: "system", + content: instructions, + createdAt: new Date(), + }; + if (typeof input === "string") { + return [ + systemMessage, + { + id: randomUUID(), + role: "user", + content: input, + createdAt: new Date(), + }, + ]; + } + return [systemMessage, ...input]; +} + +type StandaloneEntry = + | { + kind: "function"; + def: AgentToolDefinition; + tool: FunctionTool; + } + | { + kind: "subagent"; + def: AgentToolDefinition; + agentDef: AgentDefinition; + } + | { + kind: "toolkit"; + def: AgentToolDefinition; + entry: ToolkitEntry; + } + | { + kind: "hosted"; + def: AgentToolDefinition; + }; + +function buildStandaloneToolIndex( + def: AgentDefinition, +): Map { + const index = new Map(); + + for (const [key, tool] of Object.entries(def.tools ?? {})) { + index.set(key, classifyTool(key, tool)); + } + + for (const [childKey, child] of Object.entries(def.agents ?? {})) { + const toolName = `agent-${childKey}`; + index.set(toolName, { + kind: "subagent", + agentDef: { ...child, name: child.name ?? childKey }, + def: { + name: toolName, + description: + child.instructions.slice(0, 120) || + `Delegate to the ${childKey} sub-agent`, + parameters: { + type: "object", + properties: { + input: { + type: "string", + description: "Message to send to the sub-agent.", + }, + }, + required: ["input"], + }, + }, + }); + } + + return index; +} + +function classifyTool(key: string, tool: AgentTool): StandaloneEntry { + if (isToolkitEntry(tool)) { + return { kind: "toolkit", def: { ...tool.def, name: key }, entry: tool }; + } + if (isFunctionTool(tool)) { + return { + kind: "function", + tool, + def: { ...functionToolToDefinition(tool), name: key }, + }; + } + if (isHostedTool(tool)) { + return { + kind: "hosted", + def: { + name: key, + description: `Hosted tool: ${tool.type}`, + parameters: { type: "object", properties: {} }, + }, + }; + } + throw new Error(`runAgent: unrecognized tool shape at key "${key}"`); +} diff --git a/packages/appkit/src/index.ts b/packages/appkit/src/index.ts index 6c6c6f5b..ba4110b3 100644 --- a/packages/appkit/src/index.ts +++ b/packages/appkit/src/index.ts @@ -43,6 +43,12 @@ export { } from "./connectors/lakebase"; export { getExecutionContext } from "./context"; export { createApp } from "./core"; +export { createAgent } from "./core/create-agent-def"; +export { + type RunAgentInput, + type RunAgentResult, + runAgent, +} from "./core/run-agent"; // Errors export { AppKitError, @@ -63,6 +69,20 @@ export { toPlugin, } from "./plugin"; export { analytics, files, genie, lakebase, server, serving } from "./plugins"; +export { + type AgentDefinition, + type AgentsPluginConfig, + type AgentTool, + agentIdFromMarkdownPath, + agents, + type BaseSystemPromptOption, + isToolkitEntry, + loadAgentFromFile, + loadAgentsFromDir, + type PromptContext, + type ToolkitEntry, + type ToolkitOptions, +} from "./plugins/agents"; export { type FunctionTool, type HostedTool, @@ -72,12 +92,6 @@ export { type ToolConfig, tool, } from "./plugins/agents/tools"; -export { - type AgentTool, - isToolkitEntry, - type ToolkitEntry, - type ToolkitOptions, -} from "./plugins/agents/types"; // Files plugin types (for custom policy authoring) export type { FileAction, diff --git a/packages/appkit/src/plugins/agents/agents.ts b/packages/appkit/src/plugins/agents/agents.ts new file mode 100644 index 00000000..f1938ef2 --- /dev/null +++ b/packages/appkit/src/plugins/agents/agents.ts @@ -0,0 +1,1269 @@ +import { randomUUID } from "node:crypto"; +import path from "node:path"; +import type express from "express"; +import pc from "picocolors"; +import type { + AgentAdapter, + AgentEvent, + AgentRunContext, + AgentToolDefinition, + IAppRouter, + Message, + PluginPhase, + ResponseStreamEvent, + Thread, + ToolProvider, +} from "shared"; +import { createLogger } from "../../logging/logger"; +import { Plugin, toPlugin } from "../../plugin"; +import type { PluginManifest } from "../../registry"; +import { agentStreamDefaults } from "./defaults"; +import { EventChannel } from "./event-channel"; +import { AgentEventTranslator } from "./event-translator"; +import { loadAgentsFromDir } from "./load-agents"; +import manifest from "./manifest.json"; +import { + approvalRequestSchema, + chatRequestSchema, + invocationsRequestSchema, +} from "./schemas"; +import { buildBaseSystemPrompt, composeSystemPrompt } from "./system-prompt"; +import { InMemoryThreadStore } from "./thread-store"; +import { ToolApprovalGate } from "./tool-approval-gate"; +import { + AppKitMcpClient, + functionToolToDefinition, + isFunctionTool, + isHostedTool, + resolveHostedTools, +} from "./tools"; +import { buildMcpHostPolicy } from "./tools/mcp-host-policy"; +import type { + AgentDefinition, + AgentsPluginConfig, + BaseSystemPromptOption, + PromptContext, + RegisteredAgent, + ResolvedToolEntry, +} from "./types"; +import { isToolkitEntry } from "./types"; + +const logger = createLogger("agents"); + +const DEFAULT_AGENTS_DIR = "./config/agents"; + +/** + * Context flag recorded on the in-memory AgentDefinition to indicate whether + * it came from markdown (file) or from user code. Drives the asymmetric + * `autoInheritTools` default. + */ +interface AgentSource { + origin: "file" | "code"; +} + +export class AgentsPlugin extends Plugin implements ToolProvider { + static manifest = manifest as PluginManifest; + static phase: PluginPhase = "deferred"; + + protected declare config: AgentsPluginConfig; + + private agents = new Map(); + private defaultAgentName: string | null = null; + private activeStreams = new Map< + string, + { controller: AbortController; userId: string } + >(); + private mcpClient: AppKitMcpClient | null = null; + private threadStore; + private approvalGate = new ToolApprovalGate(); + + constructor(config: AgentsPluginConfig) { + super(config); + this.config = config; + if (config.threadStore) { + this.threadStore = config.threadStore; + } else { + this.threadStore = new InMemoryThreadStore(); + if (process.env.NODE_ENV === "production") { + logger.warn( + "InMemoryThreadStore is in use in a production build (NODE_ENV=production). " + + "Thread history is unbounded and lost on restart. " + + "Pass agents({ threadStore: }) for real deployments.", + ); + } else { + logger.info( + "Using default InMemoryThreadStore (dev-only — threads are lost on restart and grow without bound).", + ); + } + } + } + + /** Effective approval policy with defaults applied. */ + private get resolvedApprovalPolicy(): { + requireForDestructive: boolean; + timeoutMs: number; + } { + const cfg = this.config.approval ?? {}; + return { + requireForDestructive: cfg.requireForDestructive ?? true, + timeoutMs: cfg.timeoutMs ?? 60_000, + }; + } + + /** Effective DoS limits with defaults applied. */ + private get resolvedLimits(): { + maxConcurrentStreamsPerUser: number; + maxToolCalls: number; + maxSubAgentDepth: number; + } { + const cfg = this.config.limits ?? {}; + return { + maxConcurrentStreamsPerUser: cfg.maxConcurrentStreamsPerUser ?? 5, + maxToolCalls: cfg.maxToolCalls ?? 50, + maxSubAgentDepth: cfg.maxSubAgentDepth ?? 3, + }; + } + + /** Count active streams owned by a given user. */ + private countUserStreams(userId: string): number { + let n = 0; + for (const entry of this.activeStreams.values()) { + if (entry.userId === userId) n++; + } + return n; + } + + async setup() { + await this.loadAgents(); + this.mountInvocationsRoute(); + this.printRegistry(); + } + + /** + * Reload agents from the configured directory, preserving code-defined + * agents. Swaps the registry atomically at the end. + */ + async reload(): Promise { + this.agents.clear(); + this.defaultAgentName = null; + if (this.mcpClient) { + await this.mcpClient.close(); + this.mcpClient = null; + } + await this.loadAgents(); + } + + private async loadAgents() { + const { defs: fileDefs, defaultAgent: fileDefault } = + await this.loadFileDefinitions(); + + const codeDefs = this.config.agents ?? {}; + + for (const name of Object.keys(fileDefs)) { + if (codeDefs[name]) { + logger.warn( + "Agent '%s' defined in both code and a markdown file. Code definition takes precedence.", + name, + ); + } + } + + const merged: Record = + {}; + for (const [name, def] of Object.entries(fileDefs)) { + merged[name] = { def, src: { origin: "file" } }; + } + for (const [name, def] of Object.entries(codeDefs)) { + merged[name] = { def, src: { origin: "code" } }; + } + + if (Object.keys(merged).length === 0) { + logger.info( + "No agents registered (no files in %s, no code-defined agents)", + this.resolvedAgentsDir() ?? "", + ); + return; + } + + for (const [name, { def, src }] of Object.entries(merged)) { + try { + const registered = await this.buildRegisteredAgent(name, def, src); + this.agents.set(name, registered); + if (!this.defaultAgentName) this.defaultAgentName = name; + } catch (err) { + throw new Error( + `Failed to register agent '${name}' (${src.origin}): ${ + err instanceof Error ? err.message : String(err) + }`, + { cause: err instanceof Error ? err : undefined }, + ); + } + } + + if (this.config.defaultAgent) { + if (!this.agents.has(this.config.defaultAgent)) { + throw new Error( + `defaultAgent '${this.config.defaultAgent}' is not registered. Available: ${Array.from(this.agents.keys()).join(", ")}`, + ); + } + this.defaultAgentName = this.config.defaultAgent; + } else if (fileDefault && this.agents.has(fileDefault)) { + this.defaultAgentName = fileDefault; + } + } + + private resolvedAgentsDir(): string | null { + if (this.config.dir === false) return null; + const dir = this.config.dir ?? DEFAULT_AGENTS_DIR; + return path.isAbsolute(dir) ? dir : path.resolve(process.cwd(), dir); + } + + private async loadFileDefinitions(): Promise<{ + defs: Record; + defaultAgent: string | null; + }> { + const dir = this.resolvedAgentsDir(); + if (!dir) return { defs: {}, defaultAgent: null }; + + const pluginToolProviders = this.pluginProviderIndex(); + const ambient = this.config.tools ?? {}; + + const result = await loadAgentsFromDir(dir, { + defaultModel: this.config.defaultModel, + availableTools: ambient, + plugins: pluginToolProviders, + codeAgents: this.config.agents, + }); + + return result; + } + + /** + * Builds the map of plugin-name → toolkit that the markdown loader consults + * when resolving `toolkits:` frontmatter entries. + */ + private pluginProviderIndex(): Map< + string, + { toolkit: (opts?: unknown) => Record } + > { + const out = new Map(); + if (!this.context) return out; + for (const { name, provider } of this.context.getToolProviders()) { + const withToolkit = provider as ToolProvider & { + toolkit?: (opts?: unknown) => Record; + }; + if (typeof withToolkit.toolkit === "function") { + out.set(name, { + toolkit: withToolkit.toolkit.bind(withToolkit), + }); + } + } + return out; + } + + private async buildRegisteredAgent( + name: string, + def: AgentDefinition, + src: AgentSource, + ): Promise { + const adapter = await this.resolveAdapter(def, name); + const toolIndex = await this.buildToolIndex(name, def, src); + + return { + name, + instructions: def.instructions, + adapter, + toolIndex, + baseSystemPrompt: def.baseSystemPrompt, + maxSteps: def.maxSteps, + maxTokens: def.maxTokens, + ephemeral: def.ephemeral, + }; + } + + private async resolveAdapter( + def: AgentDefinition, + name: string, + ): Promise { + const source = def.model ?? this.config.defaultModel; + // Per-agent adapter knobs from `AgentDefinition` / markdown frontmatter. + // Only applied when AppKit builds the adapter itself (string or omitted + // model). Users who pass a pre-built `AgentAdapter` own these settings. + const adapterOptions: { maxSteps?: number; maxTokens?: number } = {}; + if (def.maxSteps !== undefined) adapterOptions.maxSteps = def.maxSteps; + if (def.maxTokens !== undefined) adapterOptions.maxTokens = def.maxTokens; + + if (!source) { + const { DatabricksAdapter } = await import("../../agents/databricks"); + try { + return await DatabricksAdapter.fromModelServing( + undefined, + adapterOptions, + ); + } catch (err) { + throw new Error( + `Agent '${name}' has no model configured and no DATABRICKS_AGENT_ENDPOINT default available`, + { cause: err instanceof Error ? err : undefined }, + ); + } + } + if (typeof source === "string") { + const { DatabricksAdapter } = await import("../../agents/databricks"); + return DatabricksAdapter.fromModelServing(source, adapterOptions); + } + return await source; + } + + /** + * Resolves an agent's tool record into a per-agent dispatch index. Connects + * hosted tools via MCP client. Applies `autoInheritTools` defaults when the + * definition has no declared tools/agents. + */ + private async buildToolIndex( + agentName: string, + def: AgentDefinition, + src: AgentSource, + ): Promise> { + const index = new Map(); + const hasExplicitTools = def.tools && Object.keys(def.tools).length > 0; + const hasExplicitSubAgents = + def.agents && Object.keys(def.agents).length > 0; + + const inheritDefaults = normalizeAutoInherit(this.config.autoInheritTools); + const shouldInherit = + !hasExplicitTools && + !hasExplicitSubAgents && + (src.origin === "file" ? inheritDefaults.file : inheritDefaults.code); + + if (shouldInherit) { + await this.applyAutoInherit(agentName, index); + } + + // 1. Sub-agents → agent- + for (const [childKey, childDef] of Object.entries(def.agents ?? {})) { + const toolName = `agent-${childKey}`; + index.set(toolName, { + source: "subagent", + agentName: childDef.name ?? childKey, + def: { + name: toolName, + description: + childDef.instructions.slice(0, 120) || + `Delegate to the ${childKey} sub-agent`, + parameters: { + type: "object", + properties: { + input: { + type: "string", + description: "Message to send to the sub-agent.", + }, + }, + required: ["input"], + }, + }, + }); + } + + // 2. Explicit tools (toolkit entries, function tools, hosted tools) + const hostedToCollect: import("./tools/hosted-tools").HostedTool[] = []; + for (const [key, tool] of Object.entries(def.tools ?? {})) { + if (isToolkitEntry(tool)) { + index.set(key, { + source: "toolkit", + pluginName: tool.pluginName, + localName: tool.localName, + def: { ...tool.def, name: key }, + }); + continue; + } + if (isFunctionTool(tool)) { + index.set(key, { + source: "function", + functionTool: tool, + def: { ...functionToolToDefinition(tool), name: key }, + }); + continue; + } + if (isHostedTool(tool)) { + hostedToCollect.push(tool); + continue; + } + throw new Error( + `Agent '${agentName}' tool '${key}' has an unrecognized shape`, + ); + } + + if (hostedToCollect.length > 0) { + await this.connectHostedTools(hostedToCollect, index); + } + + return index; + } + + private async applyAutoInherit( + agentName: string, + index: Map, + ): Promise { + if (!this.context) return; + const inherited: string[] = []; + const skippedByPlugin = new Map(); + const recordSkip = (pluginName: string, localName: string) => { + const list = skippedByPlugin.get(pluginName) ?? []; + list.push(localName); + skippedByPlugin.set(pluginName, list); + }; + + for (const { + name: pluginName, + provider, + } of this.context.getToolProviders()) { + if (pluginName === this.name) continue; + const withToolkit = provider as ToolProvider & { + toolkit?: (opts?: unknown) => Record; + }; + if (typeof withToolkit.toolkit === "function") { + const entries = withToolkit.toolkit() as Record; + for (const [key, maybeEntry] of Object.entries(entries)) { + if (!isToolkitEntry(maybeEntry)) continue; + if (maybeEntry.autoInheritable !== true) { + recordSkip(maybeEntry.pluginName, maybeEntry.localName); + continue; + } + index.set(key, { + source: "toolkit", + pluginName: maybeEntry.pluginName, + localName: maybeEntry.localName, + def: { ...maybeEntry.def, name: key }, + }); + inherited.push(key); + } + continue; + } + // Fallback: providers without a toolkit() still expose getAgentTools(). + // These cannot be selectively opted in per tool, so we conservatively + // skip them during auto-inherit and require explicit `tools:` wiring. + for (const tool of provider.getAgentTools()) { + recordSkip(pluginName, tool.name); + } + } + + if (inherited.length > 0) { + logger.info( + "[agent %s] auto-inherited %d tool(s): %s", + agentName, + inherited.length, + inherited.join(", "), + ); + } + if (skippedByPlugin.size > 0) { + const summary = Array.from(skippedByPlugin.entries()) + .map(([p, tools]) => `${p}(${tools.length})`) + .join(", "); + logger.info( + "[agent %s] auto-inherit skipped %d tool(s) not marked autoInheritable: %s. Wire them explicitly via `tools:` if needed.", + agentName, + Array.from(skippedByPlugin.values()).reduce( + (n, list) => n + list.length, + 0, + ), + summary, + ); + } + } + + private async connectHostedTools( + hostedTools: import("./tools/hosted-tools").HostedTool[], + index: Map, + ): Promise { + let host: string | undefined; + let authenticate: () => Promise>; + + try { + const { getWorkspaceClient } = await import("../../context"); + const wsClient = getWorkspaceClient(); + await wsClient.config.ensureResolved(); + host = wsClient.config.host; + authenticate = async () => { + const headers = new Headers(); + await wsClient.config.authenticate(headers); + return Object.fromEntries(headers.entries()); + }; + } catch { + host = process.env.DATABRICKS_HOST; + authenticate = async (): Promise> => { + const token = process.env.DATABRICKS_TOKEN; + return token ? { Authorization: `Bearer ${token}` } : {}; + }; + } + + if (!host) { + logger.warn( + "No Databricks host available — skipping %d hosted tool(s)", + hostedTools.length, + ); + return; + } + + if (!this.mcpClient) { + const policy = buildMcpHostPolicy(this.config.mcp, host); + this.mcpClient = new AppKitMcpClient(host, authenticate, policy); + } + + const endpoints = resolveHostedTools(hostedTools); + await this.mcpClient.connectAll(endpoints); + + for (const def of this.mcpClient.getAllToolDefinitions()) { + index.set(def.name, { + source: "mcp", + mcpToolName: def.name, + def, + }); + } + } + + // ----------------- ToolProvider (no tools of our own) -------------------- + + getAgentTools(): AgentToolDefinition[] { + return []; + } + + async executeAgentTool(): Promise { + throw new Error("AgentsPlugin does not expose executeAgentTool directly"); + } + + // ----------------- Route mounting and handlers --------------------------- + + private mountInvocationsRoute() { + if (!this.context) return; + this.context.addRoute( + "post", + "/invocations", + (req: express.Request, res: express.Response) => { + this._handleInvocations(req, res); + }, + ); + } + + injectRoutes(router: IAppRouter) { + this.route(router, { + name: "chat", + method: "post", + path: "/chat", + handler: async (req, res) => this._handleChat(req, res), + }); + this.route(router, { + name: "cancel", + method: "post", + path: "/cancel", + handler: async (req, res) => this._handleCancel(req, res), + }); + this.route(router, { + name: "approve", + method: "post", + path: "/approve", + handler: async (req, res) => this._handleApprove(req, res), + }); + this.route(router, { + name: "threads", + method: "get", + path: "/threads", + handler: async (req, res) => this._handleListThreads(req, res), + }); + this.route(router, { + name: "thread", + method: "get", + path: "/threads/:threadId", + handler: async (req, res) => this._handleGetThread(req, res), + }); + this.route(router, { + name: "deleteThread", + method: "delete", + path: "/threads/:threadId", + handler: async (req, res) => this._handleDeleteThread(req, res), + }); + this.route(router, { + name: "info", + method: "get", + path: "/info", + handler: async (_req, res) => { + res.json({ + agents: Array.from(this.agents.keys()), + defaultAgent: this.defaultAgentName, + }); + }, + }); + } + + clientConfig(): Record { + return { + agents: Array.from(this.agents.keys()), + defaultAgent: this.defaultAgentName, + }; + } + + private async _handleChat(req: express.Request, res: express.Response) { + const parsed = chatRequestSchema.safeParse(req.body); + if (!parsed.success) { + res.status(400).json({ + error: "Invalid request", + details: parsed.error.flatten().fieldErrors, + }); + return; + } + const { message, threadId, agent: agentName } = parsed.data; + + const registered = this.resolveAgent(agentName); + if (!registered) { + res.status(400).json({ + error: agentName + ? `Agent "${agentName}" not found` + : "No agent registered", + }); + return; + } + + const userId = this.resolveUserId(req); + + // Reject early (before allocating a thread) when the user is already at + // their concurrent-stream limit. Prevents a misbehaving client from + // churning thread rows while being denied elsewhere. + const limits = this.resolvedLimits; + if (this.countUserStreams(userId) >= limits.maxConcurrentStreamsPerUser) { + res.setHeader("Retry-After", "5"); + res.status(429).json({ + error: `Too many concurrent streams for this user (limit ${limits.maxConcurrentStreamsPerUser}). Wait for an existing stream to complete before starting another.`, + }); + return; + } + + let thread = threadId ? await this.threadStore.get(threadId, userId) : null; + if (threadId && !thread) { + res.status(404).json({ error: `Thread ${threadId} not found` }); + return; + } + if (!thread) { + thread = await this.threadStore.create(userId); + } + + const userMessage: Message = { + id: randomUUID(), + role: "user", + content: message, + createdAt: new Date(), + }; + await this.threadStore.addMessage(thread.id, userId, userMessage); + return this._streamAgent(req, res, registered, thread, userId); + } + + private async _handleInvocations( + req: express.Request, + res: express.Response, + ) { + const parsed = invocationsRequestSchema.safeParse(req.body); + if (!parsed.success) { + res.status(400).json({ + error: "Invalid request", + details: parsed.error.flatten().fieldErrors, + }); + return; + } + const { input } = parsed.data; + const registered = this.resolveAgent(); + if (!registered) { + res.status(400).json({ error: "No agent registered" }); + return; + } + const userId = this.resolveUserId(req); + const thread = await this.threadStore.create(userId); + + if (typeof input === "string") { + await this.threadStore.addMessage(thread.id, userId, { + id: randomUUID(), + role: "user", + content: input, + createdAt: new Date(), + }); + } else { + for (const item of input) { + const role = (item.role ?? "user") as Message["role"]; + const content = + typeof item.content === "string" + ? item.content + : JSON.stringify(item.content ?? ""); + if (!content) continue; + await this.threadStore.addMessage(thread.id, userId, { + id: randomUUID(), + role, + content, + createdAt: new Date(), + }); + } + } + + return this._streamAgent(req, res, registered, thread, userId); + } + + private async _streamAgent( + req: express.Request, + res: express.Response, + registered: RegisteredAgent, + thread: Thread, + userId: string, + ): Promise { + const abortController = new AbortController(); + const signal = abortController.signal; + const requestId = randomUUID(); + this.activeStreams.set(requestId, { controller: abortController, userId }); + + const tools = Array.from(registered.toolIndex.values()).map((e) => e.def); + const approvalPolicy = this.resolvedApprovalPolicy; + const limits = this.resolvedLimits; + const outboundEvents = new EventChannel(); + const translator = new AgentEventTranslator(); + // Per-run tool-call budget (shared across the top-level adapter and any + // sub-agents it delegates to). Counted pre-dispatch so a prompt-injected + // agent cannot drain the budget silently via denied calls. + let toolCallsUsed = 0; + + const executeTool = async ( + name: string, + args: unknown, + ): Promise => { + if (toolCallsUsed >= limits.maxToolCalls) { + abortController.abort( + new Error( + `Tool-call budget exhausted (limit ${limits.maxToolCalls}).`, + ), + ); + throw new Error( + `Tool-call budget exhausted (limit ${limits.maxToolCalls}). Raise agents({ limits: { maxToolCalls } }) or review the agent's tool-selection logic.`, + ); + } + toolCallsUsed++; + + const entry = registered.toolIndex.get(name); + if (!entry) throw new Error(`Unknown tool: ${name}`); + + if ( + approvalPolicy.requireForDestructive && + entry.def.annotations?.destructive === true + ) { + const approvalId = randomUUID(); + for (const ev of translator.translate({ + type: "approval_pending", + approvalId, + streamId: requestId, + toolName: name, + args, + annotations: entry.def.annotations, + })) { + outboundEvents.push(ev); + } + const decision = await this.approvalGate.wait({ + approvalId, + streamId: requestId, + userId, + timeoutMs: approvalPolicy.timeoutMs, + }); + if (decision === "deny") { + return `Tool execution denied by user approval gate (tool: ${name}).`; + } + } + + let result: unknown; + if (entry.source === "toolkit") { + if (!this.context) { + throw new Error( + "Plugin tool execution requires PluginContext; this should never happen through createApp", + ); + } + result = await this.context.executeTool( + req, + entry.pluginName, + entry.localName, + args, + signal, + ); + } else if (entry.source === "function") { + result = await entry.functionTool.execute( + args as Record, + ); + } else if (entry.source === "mcp") { + if (!this.mcpClient) throw new Error("MCP client not connected"); + const oboToken = req.headers["x-forwarded-access-token"]; + const mcpAuth = + typeof oboToken === "string" + ? { Authorization: `Bearer ${oboToken}` } + : undefined; + result = await this.mcpClient.callTool( + entry.mcpToolName, + args, + mcpAuth, + ); + } else if (entry.source === "subagent") { + const childAgent = this.agents.get(entry.agentName); + if (!childAgent) + throw new Error(`Sub-agent not found: ${entry.agentName}`); + result = await this.runSubAgent(req, childAgent, args, signal, 1); + } + + // A `void` / `undefined` return is a legitimate tool outcome (e.g., a + // "send notification" side-effecting tool). Return an empty string so + // the LLM sees a successful-but-empty result rather than a bogus + // "execution failed" error. + if (result === undefined) { + return ""; + } + const MAX = 50_000; + const serialized = + typeof result === "string" ? result : JSON.stringify(result); + if (serialized.length > MAX) { + return `${serialized.slice(0, MAX)}\n\n[Result truncated: ${serialized.length} chars exceeds ${MAX} limit]`; + } + return result; + }; + + // Drive the adapter and the approval-event side-channel concurrently. + // Outbound events from both sources flow through `outboundEvents`; the + // generator below drains the channel in order. executeTool pushes + // approval-pending events into the same channel before awaiting the gate. + const driver = (async () => { + try { + for (const evt of translator.translate({ + type: "metadata", + data: { threadId: thread.id }, + })) { + outboundEvents.push(evt); + } + + const pluginNames = this.context + ? this.context + .getPluginNames() + .filter((n) => n !== this.name && n !== "server") + : []; + const fullPrompt = composePromptForAgent( + registered, + this.config.baseSystemPrompt, + { + agentName: registered.name, + pluginNames, + toolNames: tools.map((t) => t.name), + }, + ); + + const messagesWithSystem: Message[] = [ + { + id: "system", + role: "system", + content: fullPrompt, + createdAt: new Date(), + }, + ...thread.messages, + ]; + + const stream = registered.adapter.run( + { + messages: messagesWithSystem, + tools, + threadId: thread.id, + signal, + }, + { executeTool, signal }, + ); + + // Accumulate assistant output from BOTH streaming and non-streaming + // adapters. Delta-based adapters (Databricks, Vercel AI) emit + // `message_delta` chunks that we concatenate; adapters that yield a + // single final assistant message (e.g. LangChain's `on_chain_end` + // path) emit a `message` event whose content replaces whatever + // deltas already arrived. Without the `message` branch, multi-turn + // LangChain conversations silently dropped the assistant turn from + // thread history. + let fullContent = ""; + for await (const event of stream) { + if (signal.aborted) break; + if (event.type === "message_delta") { + fullContent += event.content; + } else if (event.type === "message") { + fullContent = event.content; + } + for (const translated of translator.translate(event)) { + outboundEvents.push(translated); + } + } + + if (fullContent) { + await this.threadStore.addMessage(thread.id, userId, { + id: randomUUID(), + role: "assistant", + content: fullContent, + createdAt: new Date(), + }); + } + + for (const evt of translator.finalize()) outboundEvents.push(evt); + } catch (error) { + if (signal.aborted) { + outboundEvents.close(); + return; + } + logger.error("Agent chat error: %O", error); + outboundEvents.close(error); + return; + } finally { + // Any pending approval gates for this stream are auto-denied so the + // adapter can unwind if it was still waiting. + this.approvalGate.abortStream(requestId); + this.activeStreams.delete(requestId); + // Stateless agents (e.g. autocomplete) don't persist history; drop + // the thread so `InMemoryThreadStore` doesn't accumulate one record + // per request. Swallow delete errors — the stream has already + // finished and the client has the response. + if (registered.ephemeral) { + try { + await this.threadStore.delete(thread.id, userId); + } catch (err) { + logger.warn( + "Failed to delete ephemeral thread %s: %O", + thread.id, + err, + ); + } + } + } + outboundEvents.close(); + })(); + + await this.executeStream( + res, + async function* () { + try { + for await (const ev of outboundEvents) { + yield ev; + } + } finally { + await driver.catch(() => undefined); + } + }, + { + ...agentStreamDefaults, + stream: { ...agentStreamDefaults.stream, streamId: requestId }, + }, + ); + } + + /** + * Runs a sub-agent in response to an `agent-` tool call. Returns the + * concatenated text output to hand back to the parent adapter as the tool + * result. + * + * `depth` starts at 1 for a top-level sub-agent invocation (i.e. the + * outer `_streamAgent` calls `runSubAgent(..., 1)`) and increments on + * each nested `runSubAgent` call. Depths exceeding + * `limits.maxSubAgentDepth` are rejected before any adapter work. + */ + private async runSubAgent( + req: express.Request, + child: RegisteredAgent, + args: unknown, + signal: AbortSignal, + depth: number, + ): Promise { + const limits = this.resolvedLimits; + if (depth > limits.maxSubAgentDepth) { + throw new Error( + `Sub-agent depth exceeded (limit ${limits.maxSubAgentDepth}). ` + + `Raise agents({ limits: { maxSubAgentDepth } }) or break the delegation cycle.`, + ); + } + + const input = + typeof args === "object" && + args !== null && + typeof (args as { input?: unknown }).input === "string" + ? (args as { input: string }).input + : JSON.stringify(args); + const childTools = Array.from(child.toolIndex.values()).map((e) => e.def); + + const childExecute = async ( + name: string, + childArgs: unknown, + ): Promise => { + const entry = child.toolIndex.get(name); + if (!entry) throw new Error(`Unknown tool in sub-agent: ${name}`); + if (entry.source === "toolkit" && this.context) { + return this.context.executeTool( + req, + entry.pluginName, + entry.localName, + childArgs, + signal, + ); + } + if (entry.source === "function") { + return entry.functionTool.execute(childArgs as Record); + } + if (entry.source === "subagent") { + const grandchild = this.agents.get(entry.agentName); + if (!grandchild) + throw new Error(`Sub-agent not found: ${entry.agentName}`); + return this.runSubAgent(req, grandchild, childArgs, signal, depth + 1); + } + if (entry.source === "mcp" && this.mcpClient) { + const oboToken = req.headers["x-forwarded-access-token"]; + const mcpAuth = + typeof oboToken === "string" + ? { Authorization: `Bearer ${oboToken}` } + : undefined; + return this.mcpClient.callTool(entry.mcpToolName, childArgs, mcpAuth); + } + throw new Error(`Unsupported sub-agent tool source: ${entry.source}`); + }; + + const runContext: AgentRunContext = { executeTool: childExecute, signal }; + + const pluginNames = this.context + ? this.context + .getPluginNames() + .filter((n) => n !== this.name && n !== "server") + : []; + const systemPrompt = composePromptForAgent( + child, + this.config.baseSystemPrompt, + { + agentName: child.name, + pluginNames, + toolNames: childTools.map((t) => t.name), + }, + ); + + const messages: Message[] = [ + { + id: "system", + role: "system", + content: systemPrompt, + createdAt: new Date(), + }, + { + id: randomUUID(), + role: "user", + content: input, + createdAt: new Date(), + }, + ]; + + let output = ""; + const events: AgentEvent[] = []; + for await (const event of child.adapter.run( + { messages, tools: childTools, threadId: randomUUID(), signal }, + runContext, + )) { + events.push(event); + if (event.type === "message_delta") output += event.content; + else if (event.type === "message") output = event.content; + } + return output; + } + + private async _handleCancel(req: express.Request, res: express.Response) { + const { streamId } = req.body as { streamId?: string }; + if (!streamId) { + res.status(400).json({ error: "streamId is required" }); + return; + } + const entry = this.activeStreams.get(streamId); + if (!entry) { + // Stream is unknown or already completed — idempotent no-op. + res.json({ cancelled: true }); + return; + } + const userId = this.resolveUserId(req); + if (entry.userId !== userId) { + res.status(403).json({ error: "Forbidden" }); + return; + } + entry.controller.abort("Cancelled by user"); + this.activeStreams.delete(streamId); + this.approvalGate.abortStream(streamId); + res.json({ cancelled: true }); + } + + private async _handleApprove(req: express.Request, res: express.Response) { + const parsed = approvalRequestSchema.safeParse(req.body); + if (!parsed.success) { + res.status(400).json({ + error: "Invalid request", + details: parsed.error.flatten().fieldErrors, + }); + return; + } + const { streamId, approvalId, decision } = parsed.data; + + const streamEntry = this.activeStreams.get(streamId); + if (!streamEntry) { + // Stream has already completed or never existed. Return 404 so the UI + // knows the approval token is no longer valid (the waiter, if any, has + // already been timed out or aborted). + res.status(404).json({ error: "Stream not found or already completed" }); + return; + } + + const userId = this.resolveUserId(req); + if (streamEntry.userId !== userId) { + res.status(403).json({ error: "Forbidden" }); + return; + } + + const result = this.approvalGate.submit({ approvalId, userId, decision }); + if (!result.ok) { + if (result.reason === "forbidden") { + res.status(403).json({ error: "Forbidden" }); + return; + } + res.status(404).json({ error: "Approval not found or already settled" }); + return; + } + + res.json({ decision }); + } + + private async _handleListThreads( + req: express.Request, + res: express.Response, + ) { + const userId = this.resolveUserId(req); + const threads = await this.threadStore.list(userId); + res.json({ threads }); + } + + private async _handleGetThread(req: express.Request, res: express.Response) { + const userId = this.resolveUserId(req); + const thread = await this.threadStore.get(req.params.threadId, userId); + if (!thread) { + res.status(404).json({ error: "Thread not found" }); + return; + } + res.json(thread); + } + + private async _handleDeleteThread( + req: express.Request, + res: express.Response, + ) { + const userId = this.resolveUserId(req); + const deleted = await this.threadStore.delete(req.params.threadId, userId); + if (!deleted) { + res.status(404).json({ error: "Thread not found" }); + return; + } + res.json({ deleted: true }); + } + + private resolveAgent(name?: string): RegisteredAgent | null { + if (name) return this.agents.get(name) ?? null; + if (this.defaultAgentName) { + return this.agents.get(this.defaultAgentName) ?? null; + } + const first = this.agents.values().next(); + return first.done ? null : first.value; + } + + private printRegistry(): void { + if (this.agents.size === 0) return; + console.log(""); + console.log(` ${pc.bold("Agents")} ${pc.dim(`(${this.agents.size})`)}`); + console.log(` ${pc.dim("─".repeat(60))}`); + for (const [name, reg] of this.agents) { + const tools = reg.toolIndex.size; + const marker = name === this.defaultAgentName ? pc.green("●") : " "; + console.log( + ` ${marker} ${pc.bold(name.padEnd(24))} ${pc.dim(`${tools} tools`)}`, + ); + } + console.log(` ${pc.dim("─".repeat(60))}`); + console.log(""); + } + + async shutdown(): Promise { + this.approvalGate.abortAll(); + if (this.mcpClient) { + await this.mcpClient.close(); + this.mcpClient = null; + } + } + + exports() { + return { + register: (name: string, def: AgentDefinition) => + this.registerCodeAgent(name, def), + list: () => Array.from(this.agents.keys()), + get: (name: string) => this.agents.get(name) ?? null, + reload: () => this.reload(), + getDefault: () => this.defaultAgentName, + getThreads: (userId: string) => this.threadStore.list(userId), + }; + } + + private async registerCodeAgent( + name: string, + def: AgentDefinition, + ): Promise { + const registered = await this.buildRegisteredAgent(name, def, { + origin: "code", + }); + this.agents.set(name, registered); + if (!this.defaultAgentName) this.defaultAgentName = name; + } +} + +function normalizeAutoInherit(value: AgentsPluginConfig["autoInheritTools"]): { + file: boolean; + code: boolean; +} { + // Default is opt-out for both origins. A markdown agent or code-defined + // agent with no declared `tools:` gets an empty tool index unless the + // developer explicitly flips `autoInheritTools` on. Even then, only tools + // whose plugin author marked `autoInheritable: true` are spread — see + // `applyAutoInherit` for the filter. + if (value === undefined) return { file: false, code: false }; + if (typeof value === "boolean") return { file: value, code: value }; + return { file: value.file ?? false, code: value.code ?? false }; +} + +function composePromptForAgent( + registered: RegisteredAgent, + pluginLevel: BaseSystemPromptOption | undefined, + ctx: PromptContext, +): string { + const perAgent = registered.baseSystemPrompt; + const resolved = perAgent !== undefined ? perAgent : pluginLevel; + + let base = ""; + if (resolved === false) { + base = ""; + } else if (typeof resolved === "string") { + base = resolved; + } else if (typeof resolved === "function") { + base = resolved(ctx); + } else { + base = buildBaseSystemPrompt(ctx); + } + + return composeSystemPrompt(base, registered.instructions); +} + +/** + * Plugin factory for the agents plugin. Reads `config/agents//agent.md` by default, + * resolves toolkits/tools from registered plugins, exposes `appkit.agents.*` + * runtime API and mounts `/invocations`. + * + * @example + * ```ts + * import { agents, analytics, createApp, server } from "@databricks/appkit"; + * + * await createApp({ + * plugins: [server(), analytics(), agents()], + * }); + * ``` + */ +export const agents = toPlugin(AgentsPlugin); diff --git a/packages/appkit/src/plugins/agents/defaults.ts b/packages/appkit/src/plugins/agents/defaults.ts new file mode 100644 index 00000000..4da11bef --- /dev/null +++ b/packages/appkit/src/plugins/agents/defaults.ts @@ -0,0 +1,12 @@ +import type { StreamExecutionSettings } from "shared"; + +export const agentStreamDefaults: StreamExecutionSettings = { + default: { + cache: { enabled: false }, + retry: { enabled: false }, + timeout: 300_000, + }, + stream: { + bufferSize: 200, + }, +}; diff --git a/packages/appkit/src/plugins/agents/event-channel.ts b/packages/appkit/src/plugins/agents/event-channel.ts new file mode 100644 index 00000000..c5b60463 --- /dev/null +++ b/packages/appkit/src/plugins/agents/event-channel.ts @@ -0,0 +1,70 @@ +/** + * Single-producer/single-consumer async queue used by the agents plugin to + * merge streams of SSE events from two concurrent sources: the adapter's + * `run()` generator, and out-of-band events emitted by `executeTool` (e.g. + * human-approval requests). + * + * The consumer drains the channel as an async iterable; the producer pushes + * events synchronously and closes the channel when the source has completed + * or errored. + */ +interface Waiter { + resolve: (value: IteratorResult) => void; + reject: (error: unknown) => void; +} + +export class EventChannel { + private queue: T[] = []; + private waiters: Array> = []; + private closed = false; + private error: unknown = undefined; + + /** Synchronously enqueue an event. Safe to call from non-async contexts. */ + push(value: T): void { + if (this.closed) return; + const waiter = this.waiters.shift(); + if (waiter) { + waiter.resolve({ value, done: false }); + } else { + this.queue.push(value); + } + } + + /** + * Close the channel. Any pending `next()` calls resolve with `done: true`. + * If `error` is supplied, pending `next()` calls reject with it and future + * calls do the same. + */ + close(error?: unknown): void { + if (this.closed) return; + this.closed = true; + this.error = error; + while (this.waiters.length > 0) { + const waiter = this.waiters.shift(); + if (!waiter) break; + if (error) { + waiter.reject(error); + } else { + waiter.resolve({ value: undefined as never, done: true }); + } + } + } + + [Symbol.asyncIterator](): AsyncIterator { + return { + next: (): Promise> => { + if (this.queue.length > 0) { + const value = this.queue.shift() as T; + return Promise.resolve({ value, done: false }); + } + if (this.closed) { + if (this.error) return Promise.reject(this.error); + return Promise.resolve({ value: undefined as never, done: true }); + } + return new Promise((resolve, reject) => { + this.waiters.push({ resolve, reject }); + }); + }, + }; + } +} diff --git a/packages/appkit/src/plugins/agents/event-translator.ts b/packages/appkit/src/plugins/agents/event-translator.ts new file mode 100644 index 00000000..54d749fb --- /dev/null +++ b/packages/appkit/src/plugins/agents/event-translator.ts @@ -0,0 +1,291 @@ +import { randomUUID } from "node:crypto"; +import type { + AgentEvent, + ResponseFunctionCallOutput, + ResponseFunctionToolCall, + ResponseOutputMessage, + ResponseStreamEvent, +} from "shared"; + +/** + * Translates internal `AgentEvent` stream into Responses API SSE events. + * + * Stateful: one instance per streaming request. Tracks sequence numbers and + * allocates `output_index` strictly monotonically — each emitted output + * item (message, function call, function call output) claims the next + * available index and, once claimed, never reuses an earlier one. This is a + * Responses-API contract that OpenAI's own SDK parsers enforce. + * + * A message is opened lazily on the first `message_delta` or `message` + * event. If a `tool_call` or `tool_result` arrives while a message is open + * (common ReAct flow: partial text → tool call → more text), the open + * message is closed (`response.output_item.done`) BEFORE the tool item is + * added, so subsequent text resumes as a new message item at a strictly + * later index. + */ +export class AgentEventTranslator { + private seqNum = 0; + private nextOutputIndex = 0; + private currentMessage: { + id: string; + text: string; + outputIndex: number; + } | null = null; + private finalized = false; + + translate(event: AgentEvent): ResponseStreamEvent[] { + switch (event.type) { + case "message_delta": + return this.handleMessageDelta(event.content); + case "message": + return this.handleFullMessage(event.content); + case "tool_call": + return this.handleToolCall(event.callId, event.name, event.args); + case "tool_result": + return this.handleToolResult(event.callId, event.result, event.error); + case "thinking": + return [ + { + type: "appkit.thinking", + content: event.content, + sequence_number: this.seqNum++, + }, + ]; + case "metadata": + return [ + { + type: "appkit.metadata", + data: event.data, + sequence_number: this.seqNum++, + }, + ]; + case "approval_pending": + return [ + { + type: "appkit.approval_pending", + approval_id: event.approvalId, + stream_id: event.streamId, + tool_name: event.toolName, + args: event.args, + annotations: event.annotations, + sequence_number: this.seqNum++, + }, + ]; + case "status": + return this.handleStatus(event.status, event.error); + } + } + + finalize(): ResponseStreamEvent[] { + if (this.finalized) return []; + this.finalized = true; + + const events: ResponseStreamEvent[] = []; + const closeEvent = this.closeCurrentMessage(); + if (closeEvent) events.push(closeEvent); + + events.push({ + type: "response.completed", + sequence_number: this.seqNum++, + response: {}, + }); + + return events; + } + + private handleMessageDelta(content: string): ResponseStreamEvent[] { + const events: ResponseStreamEvent[] = []; + + if (!this.currentMessage) { + const id = `msg_${randomUUID()}`; + const outputIndex = this.nextOutputIndex++; + this.currentMessage = { id, text: content, outputIndex }; + const item: ResponseOutputMessage = { + type: "message", + id, + status: "in_progress", + role: "assistant", + content: [], + }; + events.push({ + type: "response.output_item.added", + output_index: outputIndex, + item, + sequence_number: this.seqNum++, + }); + } else { + this.currentMessage.text += content; + } + + events.push({ + type: "response.output_text.delta", + item_id: this.currentMessage.id, + output_index: this.currentMessage.outputIndex, + content_index: 0, + delta: content, + sequence_number: this.seqNum++, + }); + + return events; + } + + private handleFullMessage(content: string): ResponseStreamEvent[] { + const events: ResponseStreamEvent[] = []; + + if (!this.currentMessage) { + // No prior deltas — open and immediately close. + const id = `msg_${randomUUID()}`; + const outputIndex = this.nextOutputIndex++; + this.currentMessage = { id, text: content, outputIndex }; + const addedItem: ResponseOutputMessage = { + type: "message", + id, + status: "in_progress", + role: "assistant", + content: [], + }; + events.push({ + type: "response.output_item.added", + output_index: outputIndex, + item: addedItem, + sequence_number: this.seqNum++, + }); + } else { + // Deltas already opened the item; `message` overrides the accumulated + // text (per adapter contract) and closes it. + this.currentMessage.text = content; + } + + const closeEvent = this.closeCurrentMessage(); + if (closeEvent) events.push(closeEvent); + return events; + } + + private handleToolCall( + callId: string, + name: string, + args: unknown, + ): ResponseStreamEvent[] { + const events: ResponseStreamEvent[] = []; + const closeEvent = this.closeCurrentMessage(); + if (closeEvent) events.push(closeEvent); + + const outputIndex = this.nextOutputIndex++; + const item: ResponseFunctionToolCall = { + type: "function_call", + id: `fc_${randomUUID()}`, + call_id: callId, + name, + arguments: typeof args === "string" ? args : JSON.stringify(args), + }; + + events.push( + { + type: "response.output_item.added", + output_index: outputIndex, + item, + sequence_number: this.seqNum++, + }, + { + type: "response.output_item.done", + output_index: outputIndex, + item, + sequence_number: this.seqNum++, + }, + ); + return events; + } + + private handleToolResult( + callId: string, + result: unknown, + error?: string, + ): ResponseStreamEvent[] { + const events: ResponseStreamEvent[] = []; + const closeEvent = this.closeCurrentMessage(); + if (closeEvent) events.push(closeEvent); + + const outputIndex = this.nextOutputIndex++; + // Coalesce `undefined` → "" so the wire shape is always a string (the + // Responses API contract). Non-string results are JSON-serialised. + let output: string; + if (error !== undefined) { + output = error; + } else if (typeof result === "string") { + output = result; + } else if (result === undefined) { + output = ""; + } else { + output = JSON.stringify(result); + } + const item: ResponseFunctionCallOutput = { + type: "function_call_output", + id: `fc_output_${randomUUID()}`, + call_id: callId, + output, + }; + + events.push( + { + type: "response.output_item.added", + output_index: outputIndex, + item, + sequence_number: this.seqNum++, + }, + { + type: "response.output_item.done", + output_index: outputIndex, + item, + sequence_number: this.seqNum++, + }, + ); + return events; + } + + /** + * Emit an `response.output_item.done` for the currently-open message, if + * any, and clear the state. Returns the event to the caller so it can be + * pushed at the right moment in the sequence. Returns `null` when there + * is no open message. + */ + private closeCurrentMessage(): ResponseStreamEvent | null { + if (!this.currentMessage) return null; + const { id, text, outputIndex } = this.currentMessage; + this.currentMessage = null; + const doneItem: ResponseOutputMessage = { + type: "message", + id, + status: "completed", + role: "assistant", + content: [{ type: "output_text", text }], + }; + return { + type: "response.output_item.done", + output_index: outputIndex, + item: doneItem, + sequence_number: this.seqNum++, + }; + } + + private handleStatus(status: string, error?: string): ResponseStreamEvent[] { + if (status === "error") { + return [ + { + type: "error", + error: error ?? "Unknown error", + sequence_number: this.seqNum++, + }, + { + type: "response.failed", + sequence_number: this.seqNum++, + }, + ]; + } + + if (status === "complete") { + return this.finalize(); + } + + return []; + } +} diff --git a/packages/appkit/src/plugins/agents/index.ts b/packages/appkit/src/plugins/agents/index.ts new file mode 100644 index 00000000..377a8776 --- /dev/null +++ b/packages/appkit/src/plugins/agents/index.ts @@ -0,0 +1,23 @@ +export { AgentsPlugin, agents } from "./agents"; +export { buildToolkitEntries } from "./build-toolkit"; +export { + agentIdFromMarkdownPath, + type LoadContext, + type LoadResult, + loadAgentFromFile, + loadAgentsFromDir, + parseFrontmatter, +} from "./load-agents"; +export { + type AgentDefinition, + type AgentsPluginConfig, + type AgentTool, + type AutoInheritToolsConfig, + type BaseSystemPromptOption, + isToolkitEntry, + type PromptContext, + type RegisteredAgent, + type ResolvedToolEntry, + type ToolkitEntry, + type ToolkitOptions, +} from "./types"; diff --git a/packages/appkit/src/plugins/agents/load-agents.ts b/packages/appkit/src/plugins/agents/load-agents.ts new file mode 100644 index 00000000..5b2999ca --- /dev/null +++ b/packages/appkit/src/plugins/agents/load-agents.ts @@ -0,0 +1,407 @@ +import fs from "node:fs"; +import path from "node:path"; +import yaml from "js-yaml"; +import type { AgentAdapter } from "shared"; +import { createLogger } from "../../logging/logger"; +import type { + AgentDefinition, + AgentTool, + BaseSystemPromptOption, + ToolkitEntry, + ToolkitOptions, +} from "./types"; +import { isToolkitEntry } from "./types"; + +const logger = createLogger("agents:loader"); + +interface ToolkitProvider { + toolkit: (opts?: ToolkitOptions) => Record; +} + +export interface LoadContext { + /** Default model when frontmatter has no `endpoint` and the def has no `model`. */ + defaultModel?: AgentAdapter | Promise | string; + /** Ambient tool library referenced by frontmatter `tools: [key1, key2]`. */ + availableTools?: Record; + /** Registered plugin toolkits referenced by frontmatter `toolkits: [...]`. */ + plugins?: Map; + /** + * Code-defined agents contributed by `agents({ agents: { ... } })`. The + * directory loader resolves `agents:` frontmatter references against + * these alongside sibling markdown files, so a markdown parent can + * delegate to a code-defined child. Code-defined names win on collision + * with markdown names, matching the plugin's top-level merge precedence. + */ + codeAgents?: Record; +} + +export interface LoadResult { + /** Agent definitions keyed by agent id (directory name under `dir`). */ + defs: Record; + /** First agent with `default: true` frontmatter (sorted id order), or `null`. */ + defaultAgent: string | null; +} + +interface Frontmatter { + endpoint?: string; + model?: string; + toolkits?: ToolkitSpec[]; + tools?: string[]; + /** + * Other agent ids to expose as sub-agents. Each becomes an `agent-` + * tool at runtime. Resolution happens at directory-load time in + * {@link loadAgentsFromDir}; the single-file {@link loadAgentFromFile} path + * rejects non-empty values since there are no siblings to resolve against. + */ + agents?: string[]; + maxSteps?: number; + maxTokens?: number; + default?: boolean; + baseSystemPrompt?: false | string; + ephemeral?: boolean; +} + +type ToolkitSpec = string | { [pluginName: string]: ToolkitOptions | string[] }; + +/** + * Derives the logical agent id from a markdown path. When the file is named + * `agent.md`, the id is the parent directory name (folder-based layout); + * otherwise the id is the file stem (e.g. legacy single-file paths). + */ +export function agentIdFromMarkdownPath(filePath: string): string { + const normalized = path.normalize(filePath); + const base = path.basename(normalized); + const parent = path.basename(path.dirname(normalized)); + if (base === "agent.md" && parent && parent !== "." && parent !== "..") { + return parent; + } + return path.basename(normalized, ".md"); +} + +const ALLOWED_KEYS = new Set([ + "endpoint", + "model", + "toolkits", + "tools", + "agents", + "maxSteps", + "maxTokens", + "default", + "baseSystemPrompt", + "ephemeral", +]); + +/** + * Loads a single markdown agent file and resolves its frontmatter against + * registered plugin toolkits + ambient tool library. + * + * Rejects non-empty `agents:` frontmatter because single-file loads have + * no siblings to resolve sub-agent references against — callers must use + * {@link loadAgentsFromDir} when markdown agents delegate to one another. + */ +export async function loadAgentFromFile( + filePath: string, + ctx: LoadContext, +): Promise { + const raw = fs.readFileSync(filePath, "utf-8"); + const name = agentIdFromMarkdownPath(filePath); + const { data } = parseFrontmatter(raw, filePath); + if (Array.isArray(data?.agents) && data.agents.length > 0) { + throw new Error( + `Agent '${name}' (${filePath}) declares 'agents:' in frontmatter, ` + + `which requires loadAgentsFromDir to resolve sibling references. ` + + `Use loadAgentsFromDir, or wire sub-agents in code via createAgent({ agents: { ... } }).`, + ); + } + return buildDefinition(name, raw, filePath, ctx); +} + +/** + * Scans a directory for one subdirectory per agent, each containing + * `agent.md` (frontmatter + body). Produces an `AgentDefinition` record keyed + * by agent id (folder name). Throws on frontmatter errors or unresolved + * references. Returns an empty map if the directory does not exist. + * + * Legacy top-level `*.md` files are rejected with an error — migrate each to + * `/agent.md` under a sibling folder named for the agent id. + * + * Runs in two passes so sub-agent references in frontmatter (`agents: [...]`) + * can be resolved regardless of directory iteration order: + * + * 1. Build every agent's definition from its own `agent.md`. + * 2. Walk `agents:` references and wire `def.agents = { child: childDef }` + * by looking them up in the complete map. Dangling names and + * self-references fail loudly; mutual delegation is allowed and bounded + * at runtime by `limits.maxSubAgentDepth`. + */ +export async function loadAgentsFromDir( + dir: string, + ctx: LoadContext, +): Promise { + if (!fs.existsSync(dir)) { + return { defs: {}, defaultAgent: null }; + } + + const entries = fs.readdirSync(dir, { withFileTypes: true }); + const orphanMd = entries + .filter((e) => e.isFile() && e.name.endsWith(".md")) + .map((e) => e.name) + .sort(); + + if (orphanMd.length > 0) { + const hint = orphanMd + .map((f) => `${path.basename(f, ".md")}/agent.md`) + .join(", "); + throw new Error( + `Agents directory contains unsupported top-level markdown file(s): ${orphanMd.join(", ")}. ` + + `Use one folder per agent with a fixed entry file, e.g. ${hint}.`, + ); + } + + /** Reserved folder name until per-agent skills land; not an agent package. */ + const RESERVED_DIRS = new Set(["skills"]); + + const agentIds = entries + .filter((e) => e.isDirectory()) + .map((e) => e.name) + .filter((name) => !RESERVED_DIRS.has(name)) + .sort(); + + const defs: Record = {}; + const subAgentRefs: Record = {}; + let defaultAgent: string | null = null; + + // Pass 1: build every agent's definition; collect sub-agent refs. + for (const id of agentIds) { + const agentPath = path.join(dir, id, "agent.md"); + if (!fs.existsSync(agentPath)) { + throw new Error( + `Agents subdirectory '${path.join(dir, id)}' must contain agent.md.`, + ); + } + const raw = fs.readFileSync(agentPath, "utf-8"); + defs[id] = buildDefinition(id, raw, agentPath, ctx); + const { data } = parseFrontmatter(raw, agentPath); + if (data?.agents !== undefined) { + subAgentRefs[id] = normalizeAgentsFrontmatter(data.agents, id, agentPath); + } + if (data?.default === true && !defaultAgent) { + defaultAgent = id; + } + } + + // Pass 2: resolve sub-agent references against the complete defs map. + // Code-defined agents (ctx.codeAgents) take precedence over markdown ones + // with the same name, matching the plugin's top-level merge behaviour. + for (const [name, refs] of Object.entries(subAgentRefs)) { + if (refs.length === 0) continue; + const children: Record = {}; + const missing: string[] = []; + for (const ref of refs) { + if (ref === name) { + throw new Error( + `Agent '${name}' (${path.join(dir, name, "agent.md")}) cannot reference itself in 'agents:'.`, + ); + } + const sibling = ctx.codeAgents?.[ref] ?? defs[ref]; + if (!sibling) { + missing.push(ref); + continue; + } + children[ref] = sibling; + } + if (missing.length > 0) { + const available = + [...Object.keys(ctx.codeAgents ?? {}), ...Object.keys(defs)] + .sort() + .join(", ") || ""; + throw new Error( + `Agent '${name}' references sub-agent(s) '${missing.join(", ")}' in 'agents:', ` + + `but no markdown or code agent(s) with those names exist. ` + + `Available: ${available}.`, + ); + } + defs[name].agents = children; + } + + return { defs, defaultAgent }; +} + +/** + * Validates that `agents:` frontmatter is an array of non-empty strings and + * returns it with duplicates removed. Throws with a clear per-file message + * on malformed input rather than silently ignoring. + */ +function normalizeAgentsFrontmatter( + value: unknown, + agentName: string, + filePath: string, +): string[] { + if (!Array.isArray(value)) { + throw new Error( + `Agent '${agentName}' (${filePath}) has invalid 'agents:' frontmatter: ` + + `expected an array of sibling agent ids, got ${typeof value}.`, + ); + } + const out: string[] = []; + const seen = new Set(); + for (const item of value) { + if (typeof item !== "string" || item.trim() === "") { + throw new Error( + `Agent '${agentName}' (${filePath}) has invalid 'agents:' entry: ` + + `expected non-empty string, got ${JSON.stringify(item)}.`, + ); + } + if (seen.has(item)) continue; + seen.add(item); + out.push(item); + } + return out; +} + +/** Exposed for tests. Parses `--- yaml ---\nbody` and validates frontmatter keys. */ +export function parseFrontmatter( + raw: string, + sourcePath?: string, +): { data: Frontmatter | null; content: string } { + const match = raw.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n?([\s\S]*)$/); + if (!match) { + return { data: null, content: raw.trim() }; + } + let parsed: unknown; + try { + parsed = yaml.load(match[1]); + } catch (err) { + const src = sourcePath ? ` (${sourcePath})` : ""; + throw new Error( + `Invalid YAML frontmatter${src}: ${err instanceof Error ? err.message : String(err)}`, + ); + } + if (parsed === null || parsed === undefined) { + return { data: {}, content: match[2].trim() }; + } + if (typeof parsed !== "object" || Array.isArray(parsed)) { + const src = sourcePath ? ` (${sourcePath})` : ""; + throw new Error(`Frontmatter must be a YAML object${src}`); + } + const data = parsed as Record; + for (const key of Object.keys(data)) { + if (!ALLOWED_KEYS.has(key)) { + logger.warn( + "Ignoring unknown frontmatter key '%s' in %s", + key, + sourcePath ?? "", + ); + } + } + return { data: data as Frontmatter, content: match[2].trim() }; +} + +function buildDefinition( + name: string, + raw: string, + filePath: string, + ctx: LoadContext, +): AgentDefinition { + const { data, content } = parseFrontmatter(raw, filePath); + const fm: Frontmatter = data ?? {}; + + const tools = resolveFrontmatterTools(name, fm, filePath, ctx); + const model = fm.model ?? fm.endpoint ?? ctx.defaultModel; + + let baseSystemPrompt: BaseSystemPromptOption | undefined; + if (fm.baseSystemPrompt === false) baseSystemPrompt = false; + else if (typeof fm.baseSystemPrompt === "string") + baseSystemPrompt = fm.baseSystemPrompt; + + return { + name, + instructions: content, + model, + tools: Object.keys(tools).length > 0 ? tools : undefined, + maxSteps: typeof fm.maxSteps === "number" ? fm.maxSteps : undefined, + maxTokens: typeof fm.maxTokens === "number" ? fm.maxTokens : undefined, + baseSystemPrompt, + ephemeral: typeof fm.ephemeral === "boolean" ? fm.ephemeral : undefined, + }; +} + +function resolveFrontmatterTools( + agentName: string, + fm: Frontmatter, + filePath: string, + ctx: LoadContext, +): Record { + const out: Record = {}; + const pluginIdx = ctx.plugins ?? new Map(); + + for (const spec of fm.toolkits ?? []) { + const [pluginName, opts] = parseToolkitSpec(spec, filePath, agentName); + const provider = pluginIdx.get(pluginName); + if (!provider) { + throw new Error( + `Agent '${agentName}' (${filePath}) references toolkit '${pluginName}', but plugin '${pluginName}' is not registered. Available: ${ + pluginIdx.size > 0 + ? Array.from(pluginIdx.keys()).join(", ") + : "" + }`, + ); + } + const entries = provider.toolkit(opts) as Record; + for (const [key, entry] of Object.entries(entries)) { + if (!isToolkitEntry(entry)) { + throw new Error( + `Plugin '${pluginName}'.toolkit() returned a value at key '${key}' that is not a ToolkitEntry`, + ); + } + out[key] = entry as ToolkitEntry; + } + } + + for (const key of fm.tools ?? []) { + const tool = ctx.availableTools?.[key]; + if (!tool) { + const available = ctx.availableTools + ? Object.keys(ctx.availableTools).join(", ") + : ""; + throw new Error( + `Agent '${agentName}' (${filePath}) references tool '${key}', which is not in the agents() plugin's tools field. Available: ${available}`, + ); + } + out[key] = tool; + } + + return out; +} + +function parseToolkitSpec( + spec: ToolkitSpec, + filePath: string, + agentName: string, +): [string, ToolkitOptions | undefined] { + if (typeof spec === "string") { + return [spec, undefined]; + } + if (typeof spec !== "object" || spec === null) { + throw new Error( + `Agent '${agentName}' (${filePath}) has invalid toolkit entry: ${JSON.stringify(spec)}`, + ); + } + const keys = Object.keys(spec); + if (keys.length !== 1) { + throw new Error( + `Agent '${agentName}' (${filePath}) toolkit entry must have exactly one key, got: ${keys.join(", ")}`, + ); + } + const pluginName = keys[0]; + const value = spec[pluginName]; + if (Array.isArray(value)) { + return [pluginName, { only: value }]; + } + if (typeof value === "object" && value !== null) { + return [pluginName, value as ToolkitOptions]; + } + throw new Error( + `Agent '${agentName}' (${filePath}) toolkit '${pluginName}' options must be an array of tool names or an options object`, + ); +} diff --git a/packages/appkit/src/plugins/agents/manifest.json b/packages/appkit/src/plugins/agents/manifest.json new file mode 100644 index 00000000..f3986c83 --- /dev/null +++ b/packages/appkit/src/plugins/agents/manifest.json @@ -0,0 +1,24 @@ +{ + "$schema": "https://databricks.github.io/appkit/schemas/plugin-manifest.schema.json", + "name": "agents", + "displayName": "Agents Plugin", + "description": "AI agents driven by markdown configs or code, with auto-tool-discovery from registered plugins", + "resources": { + "required": [], + "optional": [ + { + "type": "serving_endpoint", + "alias": "Model Serving (agents)", + "resourceKey": "agents-serving-endpoint", + "description": "Databricks Model Serving endpoint for agents using workspace-hosted models (`DatabricksAdapter.fromModelServing`). Wire the same endpoint name AppKit reads from `DATABRICKS_AGENT_ENDPOINT` when no per-agent model is configured. Omit when agents use only external adapters.", + "permission": "CAN_QUERY", + "fields": { + "name": { + "env": "DATABRICKS_AGENT_ENDPOINT", + "description": "Endpoint name passed to Model Serving when agents default to `DatabricksAdapter.fromModelServing()`" + } + } + } + ] + } +} diff --git a/packages/appkit/src/plugins/agents/schemas.ts b/packages/appkit/src/plugins/agents/schemas.ts new file mode 100644 index 00000000..cea6c6d6 --- /dev/null +++ b/packages/appkit/src/plugins/agents/schemas.ts @@ -0,0 +1,69 @@ +import { z } from "zod"; + +/** + * Static body cap for the `message` field on `POST /chat`. 64 000 characters + * is well above any legitimate chat turn (~16k tokens at 4 chars/token) and + * bounds the per-request cost of appending to `InMemoryThreadStore` without + * requiring per-deployment configuration. + */ +const MAX_MESSAGE_CHARS = 64_000; + +/** Cap applied to `/invocations` when `input` is a raw string. */ +const MAX_INVOCATIONS_INPUT_CHARS = 64_000; + +/** + * Cap on the number of items accepted in an `/invocations` `input` array + * (one element per seeded message). Protects against a single request + * seeding hundreds of messages into the thread store. + */ +const MAX_INVOCATIONS_INPUT_ITEMS = 100; + +/** Per-message `content` size cap (string form). */ +const MAX_INVOCATIONS_ITEM_CHARS = 64_000; + +/** Per-message `content` size cap (array form). */ +const MAX_INVOCATIONS_ITEM_ARRAY_ITEMS = 100; + +export const chatRequestSchema = z.object({ + message: z + .string() + .min(1, "message must not be empty") + .max( + MAX_MESSAGE_CHARS, + `message exceeds the ${MAX_MESSAGE_CHARS}-character limit`, + ), + threadId: z.string().optional(), + agent: z.string().optional(), +}); + +const messageItemSchema = z.object({ + role: z.enum(["user", "assistant", "system"]).optional(), + content: z + .union([ + z.string().max(MAX_INVOCATIONS_ITEM_CHARS), + z.array(z.any()).max(MAX_INVOCATIONS_ITEM_ARRAY_ITEMS), + ]) + .optional(), + type: z.string().optional(), +}); + +export const invocationsRequestSchema = z.object({ + input: z.union([ + z.string().min(1).max(MAX_INVOCATIONS_INPUT_CHARS), + z + .array(messageItemSchema) + .min(1) + .max( + MAX_INVOCATIONS_INPUT_ITEMS, + `input array exceeds the ${MAX_INVOCATIONS_INPUT_ITEMS}-item limit`, + ), + ]), + stream: z.boolean().optional().default(true), + model: z.string().optional(), +}); + +export const approvalRequestSchema = z.object({ + streamId: z.string().min(1, "streamId is required"), + approvalId: z.string().min(1, "approvalId is required"), + decision: z.enum(["approve", "deny"]), +}); diff --git a/packages/appkit/src/plugins/agents/system-prompt.ts b/packages/appkit/src/plugins/agents/system-prompt.ts new file mode 100644 index 00000000..01f3fe9b --- /dev/null +++ b/packages/appkit/src/plugins/agents/system-prompt.ts @@ -0,0 +1,52 @@ +import type { PromptContext } from "./types"; + +/** + * Default base system prompt: product identity, active AppKit plugins, and + * tool-agnostic behavior hints. + * + * Individual tool definitions and JSON Schemas are still sent through the + * model's `tools` / function-calling channel — this string is not a second + * copy of that list. `ctx.toolNames` is available for custom + * `baseSystemPrompt` callbacks; the default text stays short and does not + * enumerate tools to avoid drift and token bloat. + */ +export function buildBaseSystemPrompt(ctx: PromptContext): string { + const { pluginNames } = ctx; + const lines: string[] = [ + "You are an AI assistant running on Databricks AppKit.", + ]; + + if (pluginNames.length > 0) { + lines.push(""); + lines.push(`Active AppKit plugins: ${pluginNames.join(", ")}`); + } + + lines.push(""); + lines.push("Guidelines:"); + lines.push( + "- Be concise: for large or noisy tool output, summarize what matters and how to go deeper instead of pasting everything.", + ); + lines.push( + "- Use each tool as defined: pass required arguments and use the syntax, dialect, or path rules the target system expects (see each tool’s description and schema).", + ); + lines.push( + "- If a tool call fails, explain the error in plain language and suggest a fix or next step.", + ); + lines.push( + "- Respect tool metadata and app policy: read-only vs destructive tools, user/identity context, and any approval or safety flows the app provides.", + ); + + return lines.join("\n"); +} + +/** + * Compose the full system prompt from the base prompt and an optional + * per-agent user prompt. + */ +export function composeSystemPrompt( + basePrompt: string, + agentPrompt?: string, +): string { + if (!agentPrompt) return basePrompt; + return `${basePrompt}\n\n${agentPrompt}`; +} diff --git a/packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts b/packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts new file mode 100644 index 00000000..747ada48 --- /dev/null +++ b/packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts @@ -0,0 +1,367 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import type { + AgentAdapter, + AgentInput, + AgentRunContext, + AgentToolDefinition, + ToolProvider, +} from "shared"; +import { afterEach, beforeEach, describe, expect, test, vi } from "vitest"; +import { z } from "zod"; +import { CacheManager } from "../../../cache"; +// Import the class directly so we can construct it without a createApp +import { AgentsPlugin } from "../agents"; +import { buildToolkitEntries } from "../build-toolkit"; +import { defineTool, type ToolRegistry } from "../tools/define-tool"; +import type { AgentsPluginConfig, ToolkitEntry } from "../types"; +import { isToolkitEntry } from "../types"; + +interface FakeContext { + providers: Array<{ name: string; provider: ToolProvider }>; + getToolProviders(): Array<{ name: string; provider: ToolProvider }>; + getPluginNames(): string[]; + addRoute(): void; + executeTool: ( + req: unknown, + pluginName: string, + localName: string, + args: unknown, + ) => Promise; +} + +function fakeContext( + providers: Array<{ name: string; provider: ToolProvider }>, +): FakeContext { + return { + providers, + getToolProviders: () => providers, + getPluginNames: () => providers.map((p) => p.name), + addRoute: vi.fn(), + executeTool: vi.fn(async (_req, p, n, args) => ({ + plugin: p, + tool: n, + args, + })), + }; +} + +function stubAdapter(): AgentAdapter { + return { + async *run(_input: AgentInput, _ctx: AgentRunContext) { + yield { type: "message_delta", content: "" }; + }, + }; +} + +function makeToolProvider( + pluginName: string, + registry: ToolRegistry, +): ToolProvider & { + toolkit: (opts?: unknown) => Record; +} { + return { + getAgentTools(): AgentToolDefinition[] { + return Object.entries(registry).map(([name, entry]) => ({ + name, + description: entry.description, + parameters: { type: "object", properties: {} }, + })); + }, + async executeAgentTool(name, args) { + return { callFrom: pluginName, name, args }; + }, + toolkit: (opts) => buildToolkitEntries(pluginName, registry, opts as never), + }; +} + +let tmpDir: string; + +beforeEach(async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "agents-plugin-")); + const storage = { + get: vi.fn(), + set: vi.fn(), + delete: vi.fn(), + keys: vi.fn(), + healthCheck: vi.fn(async () => true), + close: vi.fn(async () => {}), + }; + // biome-ignore lint/suspicious/noExplicitAny: test-only CacheManager wiring + await CacheManager.getInstance({ storage: storage as any }); +}); + +afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +function instantiate(config: AgentsPluginConfig, ctx?: FakeContext) { + const plugin = new AgentsPlugin({ ...config, name: "agent" }); + plugin.attachContext({ context: ctx as unknown as object }); + return plugin; +} + +function writeMarkdownAgent(dir: string, id: string, content: string) { + const folder = path.join(dir, id); + fs.mkdirSync(folder, { recursive: true }); + fs.writeFileSync(path.join(folder, "agent.md"), content, "utf-8"); +} + +describe("AgentsPlugin", () => { + test("registers code-defined agents and exposes them via exports", async () => { + const plugin = instantiate({ + dir: false, + agents: { + support: { + instructions: "You help customers.", + model: stubAdapter(), + }, + }, + }); + await plugin.setup(); + + const api = plugin.exports() as { + list: () => string[]; + getDefault: () => string | null; + }; + expect(api.list()).toEqual(["support"]); + expect(api.getDefault()).toBe("support"); + }); + + test("loads markdown agents from a directory", async () => { + writeMarkdownAgent( + tmpDir, + "assistant", + "---\ndefault: true\n---\nYou are helpful.", + ); + const plugin = instantiate({ + dir: tmpDir, + defaultModel: stubAdapter(), + }); + await plugin.setup(); + + const api = plugin.exports() as { + list: () => string[]; + getDefault: () => string | null; + }; + expect(api.list()).toEqual(["assistant"]); + expect(api.getDefault()).toBe("assistant"); + }); + + test("code definitions override markdown on key collision", async () => { + writeMarkdownAgent(tmpDir, "support", "---\n---\nFrom markdown."); + const plugin = instantiate({ + dir: tmpDir, + defaultModel: stubAdapter(), + agents: { + support: { + instructions: "From code", + model: stubAdapter(), + }, + }, + }); + await plugin.setup(); + + const api = plugin.exports() as { + get: (name: string) => { instructions: string } | null; + }; + expect(api.get("support")?.instructions).toBe("From code"); + }); + + test("auto-inherit default is safe (both file and code get nothing without an explicit opt-in)", async () => { + const registry: ToolRegistry = { + query: defineTool({ + description: "q", + schema: z.object({ sql: z.string() }), + autoInheritable: true, // even with autoInheritable, no spread without opt-in + handler: () => "ok", + }), + }; + const provider = makeToolProvider("analytics", registry); + const ctx = fakeContext([{ name: "analytics", provider }]); + + writeMarkdownAgent(tmpDir, "assistant", "---\n---\nYou are helpful."); + + const plugin = instantiate( + { + dir: tmpDir, + defaultModel: stubAdapter(), + agents: { + manual: { + instructions: "Manual agent", + model: stubAdapter(), + }, + }, + }, + ctx, + ); + await plugin.setup(); + + const api = plugin.exports() as { + get: (name: string) => { toolIndex: Map } | null; + }; + const fileAgent = api.get("assistant"); + const codeAgent = api.get("manual"); + + expect(fileAgent?.toolIndex.size).toBe(0); + expect(codeAgent?.toolIndex.size).toBe(0); + }); + + test("opting in with autoInheritTools: { file: true } spreads only autoInheritable tools", async () => { + const registry: ToolRegistry = { + query: defineTool({ + description: "read-only query", + schema: z.object({ sql: z.string() }), + autoInheritable: true, + handler: () => "ok", + }), + destructive: defineTool({ + description: "mutation", + schema: z.object({}), + // autoInheritable left unset → skipped even when opted in + handler: () => "ok", + }), + }; + const provider = makeToolProvider("analytics", registry); + const ctx = fakeContext([{ name: "analytics", provider }]); + + writeMarkdownAgent(tmpDir, "assistant", "---\n---\nYou are helpful."); + + const plugin = instantiate( + { + dir: tmpDir, + defaultModel: stubAdapter(), + autoInheritTools: { file: true }, + }, + ctx, + ); + await plugin.setup(); + + const api = plugin.exports() as { + get: (name: string) => { toolIndex: Map } | null; + }; + const fileAgent = api.get("assistant"); + const keys = Array.from(fileAgent?.toolIndex.keys() ?? []); + expect(keys).toEqual(["analytics.query"]); + }); + + test("autoInheritTools: true enables both origins but still filters by autoInheritable", async () => { + const registry: ToolRegistry = { + safe: defineTool({ + description: "safe", + schema: z.object({}), + autoInheritable: true, + handler: () => "ok", + }), + unsafe: defineTool({ + description: "unsafe", + schema: z.object({}), + handler: () => "ok", + }), + }; + const provider = makeToolProvider("p", registry); + const ctx = fakeContext([{ name: "p", provider }]); + + const plugin = instantiate( + { + dir: false, + defaultModel: stubAdapter(), + autoInheritTools: true, + agents: { + code1: { + instructions: "code agent", + model: stubAdapter(), + }, + }, + }, + ctx, + ); + await plugin.setup(); + + const api = plugin.exports() as { + get: (name: string) => { toolIndex: Map } | null; + }; + const codeAgent = api.get("code1"); + const keys = Array.from(codeAgent?.toolIndex.keys() ?? []); + expect(keys).toEqual(["p.safe"]); + }); + + test("file-loaded agent respects explicit toolkits (skips auto-inherit)", async () => { + const registry: ToolRegistry = { + query: defineTool({ + description: "q", + schema: z.object({ sql: z.string() }), + handler: () => "ok", + }), + }; + const registry2: ToolRegistry = { + list: defineTool({ + description: "l", + schema: z.object({}), + handler: () => [], + }), + }; + const ctx = fakeContext([ + { name: "analytics", provider: makeToolProvider("analytics", registry) }, + { name: "files", provider: makeToolProvider("files", registry2) }, + ]); + + writeMarkdownAgent( + tmpDir, + "analyst", + "---\ntoolkits: [analytics]\n---\nAnalyst.", + ); + + const plugin = instantiate( + { dir: tmpDir, defaultModel: stubAdapter() }, + ctx, + ); + await plugin.setup(); + + const api = plugin.exports() as { + get: (name: string) => { toolIndex: Map } | null; + }; + const agent = api.get("analyst"); + const toolNames = Array.from(agent?.toolIndex.keys() ?? []); + expect(toolNames.some((n) => n.startsWith("analytics."))).toBe(true); + expect(toolNames.some((n) => n.startsWith("files."))).toBe(false); + }); + + test("registers sub-agents as agent- tools", async () => { + const plugin = instantiate({ + dir: false, + agents: { + supervisor: { + instructions: "Supervise", + model: stubAdapter(), + agents: { + worker: { + instructions: "Work", + model: stubAdapter(), + }, + }, + }, + }, + }); + await plugin.setup(); + + const api = plugin.exports() as { + get: (name: string) => { toolIndex: Map } | null; + }; + const sup = api.get("supervisor"); + expect(sup?.toolIndex.has("agent-worker")).toBe(true); + }); + + test("isToolkitEntry type guard recognizes toolkit entries", () => { + const entry: ToolkitEntry = { + __toolkitRef: true, + pluginName: "x", + localName: "y", + def: { name: "x.y", description: "", parameters: { type: "object" } }, + }; + expect(isToolkitEntry(entry)).toBe(true); + expect(isToolkitEntry({ foo: 1 })).toBe(false); + expect(isToolkitEntry(null)).toBe(false); + }); +}); diff --git a/packages/appkit/src/plugins/agents/tests/approval-route.test.ts b/packages/appkit/src/plugins/agents/tests/approval-route.test.ts new file mode 100644 index 00000000..6e090bd2 --- /dev/null +++ b/packages/appkit/src/plugins/agents/tests/approval-route.test.ts @@ -0,0 +1,292 @@ +import type express from "express"; +import { beforeEach, describe, expect, test, vi } from "vitest"; +import { CacheManager } from "../../../cache"; +import { AgentsPlugin } from "../agents"; + +/** + * Focused tests for the `POST /approve` route and the associated + * ownership / error paths on `_handleApprove`. Covers: + * + * - Schema validation of the request body. + * - Ownership check: the user submitting the decision must be the same + * user who initiated the underlying chat stream. + * - 404 for unknown stream (already completed or never existed). + * - 404 for unknown approvalId even when the stream is active. + * - Happy-path resolution of a pending gate with `approve` and `deny`. + * - Cancel of an active stream denies every pending gate on that stream. + */ + +function mockReq(body: unknown, userId?: string): express.Request { + const headers: Record = {}; + if (userId) { + headers["x-forwarded-user"] = userId; + headers["x-forwarded-access-token"] = "fake-token"; + } + return { + body, + headers, + header: (name: string) => headers[name.toLowerCase()], + } as unknown as express.Request; +} + +function mockRes() { + const json = vi.fn(); + const end = vi.fn(); + let statusCode = 200; + const status = vi.fn((code: number) => { + statusCode = code; + return { json, end }; + }); + return { + res: { status, json, end } as unknown as express.Response, + get statusCode() { + return statusCode; + }, + json, + }; +} + +beforeEach(() => { + CacheManager.getInstanceSync = vi.fn(() => ({ + get: vi.fn(), + set: vi.fn(), + delete: vi.fn(), + getOrExecute: vi.fn(async (_k: unknown[], fn: () => Promise) => + fn(), + ), + generateKey: vi.fn(() => "test-key"), + })) as any; + process.env.NODE_ENV = "development"; +}); + +describe("POST /approve route handler", () => { + test("rejects invalid body shape with 400", async () => { + const plugin = new AgentsPlugin({ dir: false }); + const { res, json } = mockRes(); + await (plugin as any)._handleApprove(mockReq({}, "alice"), res); + expect(res.status).toHaveBeenCalledWith(400); + expect(json).toHaveBeenCalledWith( + expect.objectContaining({ error: "Invalid request" }), + ); + }); + + test("returns 404 when the streamId is unknown", async () => { + const plugin = new AgentsPlugin({ dir: false }); + const { res, json } = mockRes(); + await ( + plugin as unknown as { + _handleApprove: ( + r: express.Request, + w: express.Response, + ) => Promise; + } + )._handleApprove( + mockReq( + { streamId: "ghost", approvalId: "a1", decision: "approve" }, + "alice", + ), + res, + ); + expect(res.status).toHaveBeenCalledWith(404); + expect(json).toHaveBeenCalledWith( + expect.objectContaining({ error: expect.stringMatching(/not found/i) }), + ); + }); + + test("returns 403 when submitter is different from stream owner", async () => { + const plugin = new AgentsPlugin({ dir: false }); + (plugin as any).activeStreams.set("stream-x", { + controller: new AbortController(), + userId: "alice", + }); + const gate = (plugin as any).approvalGate; + const waiter = gate.wait({ + approvalId: "a1", + streamId: "stream-x", + userId: "alice", + timeoutMs: 60_000, + }); + + const { res, json } = mockRes(); + await ( + plugin as unknown as { + _handleApprove: ( + r: express.Request, + w: express.Response, + ) => Promise; + } + )._handleApprove( + mockReq( + { streamId: "stream-x", approvalId: "a1", decision: "approve" }, + "bob", + ), + res, + ); + expect(res.status).toHaveBeenCalledWith(403); + expect(json).toHaveBeenCalledWith( + expect.objectContaining({ error: "Forbidden" }), + ); + + // Settle the waiter to clean up. + gate.submit({ approvalId: "a1", userId: "alice", decision: "deny" }); + await expect(waiter).resolves.toBe("deny"); + }); + + test("returns 404 when approvalId is unknown on an active stream", async () => { + const plugin = new AgentsPlugin({ dir: false }); + (plugin as any).activeStreams.set("stream-y", { + controller: new AbortController(), + userId: "alice", + }); + const { res, json } = mockRes(); + await ( + plugin as unknown as { + _handleApprove: ( + r: express.Request, + w: express.Response, + ) => Promise; + } + )._handleApprove( + mockReq( + { streamId: "stream-y", approvalId: "unknown-a", decision: "approve" }, + "alice", + ), + res, + ); + expect(res.status).toHaveBeenCalledWith(404); + expect(json).toHaveBeenCalledWith( + expect.objectContaining({ + error: expect.stringMatching(/not found|already settled/i), + }), + ); + }); + + test("happy path: approve resolves pending gate with 'approve'", async () => { + const plugin = new AgentsPlugin({ dir: false }); + (plugin as any).activeStreams.set("stream-z", { + controller: new AbortController(), + userId: "alice", + }); + const gate = (plugin as any).approvalGate; + const waiter = gate.wait({ + approvalId: "a42", + streamId: "stream-z", + userId: "alice", + timeoutMs: 60_000, + }); + + const { res, json } = mockRes(); + await ( + plugin as unknown as { + _handleApprove: ( + r: express.Request, + w: express.Response, + ) => Promise; + } + )._handleApprove( + mockReq( + { streamId: "stream-z", approvalId: "a42", decision: "approve" }, + "alice", + ), + res, + ); + expect(res.status).not.toHaveBeenCalled(); + expect(json).toHaveBeenCalledWith({ decision: "approve" }); + await expect(waiter).resolves.toBe("approve"); + }); + + test("happy path: deny resolves pending gate with 'deny'", async () => { + const plugin = new AgentsPlugin({ dir: false }); + (plugin as any).activeStreams.set("stream-z", { + controller: new AbortController(), + userId: "alice", + }); + const gate = (plugin as any).approvalGate; + const waiter = gate.wait({ + approvalId: "a43", + streamId: "stream-z", + userId: "alice", + timeoutMs: 60_000, + }); + + const { res, json } = mockRes(); + await ( + plugin as unknown as { + _handleApprove: ( + r: express.Request, + w: express.Response, + ) => Promise; + } + )._handleApprove( + mockReq( + { streamId: "stream-z", approvalId: "a43", decision: "deny" }, + "alice", + ), + res, + ); + expect(json).toHaveBeenCalledWith({ decision: "deny" }); + await expect(waiter).resolves.toBe("deny"); + }); +}); + +describe("POST /cancel ownership + gate cleanup", () => { + test("cancelling a stream denies every pending approval on that stream", async () => { + const plugin = new AgentsPlugin({ dir: false }); + const controller = new AbortController(); + (plugin as any).activeStreams.set("stream-c", { + controller, + userId: "alice", + }); + const gate = (plugin as any).approvalGate; + const a = gate.wait({ + approvalId: "ca1", + streamId: "stream-c", + userId: "alice", + timeoutMs: 60_000, + }); + const b = gate.wait({ + approvalId: "ca2", + streamId: "stream-c", + userId: "alice", + timeoutMs: 60_000, + }); + + const { res, json } = mockRes(); + await ( + plugin as unknown as { + _handleCancel: ( + r: express.Request, + w: express.Response, + ) => Promise; + } + )._handleCancel(mockReq({ streamId: "stream-c" }, "alice"), res); + + expect(controller.signal.aborted).toBe(true); + expect(json).toHaveBeenCalledWith({ cancelled: true }); + await expect(a).resolves.toBe("deny"); + await expect(b).resolves.toBe("deny"); + }); + + test("cancel from a different user is refused with 403", async () => { + const plugin = new AgentsPlugin({ dir: false }); + const controller = new AbortController(); + (plugin as any).activeStreams.set("stream-d", { + controller, + userId: "alice", + }); + const { res, json } = mockRes(); + await ( + plugin as unknown as { + _handleCancel: ( + r: express.Request, + w: express.Response, + ) => Promise; + } + )._handleCancel(mockReq({ streamId: "stream-d" }, "bob"), res); + expect(res.status).toHaveBeenCalledWith(403); + expect(controller.signal.aborted).toBe(false); + expect(json).toHaveBeenCalledWith( + expect.objectContaining({ error: "Forbidden" }), + ); + }); +}); diff --git a/packages/appkit/src/plugins/agents/tests/create-agent.test.ts b/packages/appkit/src/plugins/agents/tests/create-agent.test.ts new file mode 100644 index 00000000..3822897f --- /dev/null +++ b/packages/appkit/src/plugins/agents/tests/create-agent.test.ts @@ -0,0 +1,75 @@ +import { describe, expect, test } from "vitest"; +import { z } from "zod"; +import { createAgent } from "../../../core/create-agent-def"; +import { tool } from "../tools/tool"; +import type { AgentDefinition } from "../types"; + +describe("createAgent", () => { + test("returns the definition unchanged for a simple agent", () => { + const def: AgentDefinition = { + name: "support", + instructions: "You help customers.", + model: "endpoint-x", + }; + const result = createAgent(def); + expect(result).toBe(def); + }); + + test("accepts tools as a keyed record", () => { + const get_weather = tool({ + name: "get_weather", + description: "Get the weather", + schema: z.object({ city: z.string() }), + execute: async ({ city }) => `Sunny in ${city}`, + }); + + const def = createAgent({ + instructions: "...", + tools: { get_weather }, + }); + + expect(def.tools?.get_weather).toBe(get_weather); + }); + + test("accepts sub-agents in a keyed record", () => { + const researcher = createAgent({ instructions: "Research." }); + const supervisor = createAgent({ + instructions: "Supervise.", + agents: { researcher }, + }); + expect(supervisor.agents?.researcher).toBe(researcher); + }); + + test("throws on a direct self-cycle", () => { + const a: AgentDefinition = { instructions: "a" }; + // biome-ignore lint/suspicious/noExplicitAny: intentional cycle setup for test + (a as any).agents = { self: a }; + expect(() => createAgent(a)).toThrow(/cycle/i); + }); + + test("throws on an indirect cycle", () => { + const a: AgentDefinition = { instructions: "a" }; + const b: AgentDefinition = { instructions: "b" }; + a.agents = { b }; + b.agents = { a }; + expect(() => createAgent(a)).toThrow(/cycle/i); + }); + + test("accepts a DAG of sub-agents without throwing", () => { + const leaf: AgentDefinition = { instructions: "leaf" }; + const branchA: AgentDefinition = { + instructions: "a", + agents: { leaf }, + }; + const branchB: AgentDefinition = { + instructions: "b", + agents: { leaf }, + }; + const root = createAgent({ + instructions: "root", + agents: { branchA, branchB }, + }); + expect(root.agents?.branchA.agents?.leaf).toBe(leaf); + expect(root.agents?.branchB.agents?.leaf).toBe(leaf); + }); +}); diff --git a/packages/appkit/src/plugins/agents/tests/dos-limits.test.ts b/packages/appkit/src/plugins/agents/tests/dos-limits.test.ts new file mode 100644 index 00000000..935fa240 --- /dev/null +++ b/packages/appkit/src/plugins/agents/tests/dos-limits.test.ts @@ -0,0 +1,299 @@ +import type express from "express"; +import { beforeEach, describe, expect, test, vi } from "vitest"; +import { CacheManager } from "../../../cache"; +import { AgentsPlugin } from "../agents"; +import { chatRequestSchema, invocationsRequestSchema } from "../schemas"; + +/** + * Exercises the four DoS caps landed for MVP: + * + * - `chatRequestSchema.message.max(64_000)` — body cap on `POST /chat`. + * - Per-user `maxConcurrentStreamsPerUser` — 429 with Retry-After. + * - Per-run `maxToolCalls` — aborts stream and throws in `executeTool`. + * - Per-delegation `maxSubAgentDepth` — rejects in `runSubAgent`. + * + * Route-level tests exercise the schemas + `_handleChat` directly via the + * mocked req/res pattern already used by approval-route.test.ts. + */ + +function mockReq(body: unknown, userId?: string): express.Request { + const headers: Record = {}; + if (userId) { + headers["x-forwarded-user"] = userId; + headers["x-forwarded-access-token"] = "fake-token"; + } + return { + body, + headers, + header: (name: string) => headers[name.toLowerCase()], + } as unknown as express.Request; +} + +function mockRes() { + const json = vi.fn(); + const setHeader = vi.fn(); + let statusCode = 200; + const status = vi.fn((code: number) => { + statusCode = code; + return { json }; + }); + return { + res: { status, json, setHeader } as unknown as express.Response, + get statusCode() { + return statusCode; + }, + json, + setHeader, + }; +} + +beforeEach(() => { + CacheManager.getInstanceSync = vi.fn(() => ({ + get: vi.fn(), + set: vi.fn(), + delete: vi.fn(), + getOrExecute: vi.fn(async (_k: unknown[], fn: () => Promise) => + fn(), + ), + generateKey: vi.fn(() => "test-key"), + // biome-ignore lint/suspicious/noExplicitAny: test mock + })) as any; + process.env.NODE_ENV = "development"; +}); + +describe("chatRequestSchema — body cap", () => { + test("accepts messages up to 64_000 characters", () => { + const result = chatRequestSchema.safeParse({ + message: "a".repeat(64_000), + }); + expect(result.success).toBe(true); + }); + + test("rejects messages over 64_000 characters", () => { + const result = chatRequestSchema.safeParse({ + message: "a".repeat(64_001), + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(JSON.stringify(result.error.flatten())).toMatch(/64000/); + } + }); + + test("rejects empty message (existing contract)", () => { + expect(chatRequestSchema.safeParse({ message: "" }).success).toBe(false); + }); +}); + +describe("invocationsRequestSchema — input caps", () => { + test("accepts string input up to 64_000 characters", () => { + const result = invocationsRequestSchema.safeParse({ + input: "a".repeat(64_000), + }); + expect(result.success).toBe(true); + }); + + test("rejects string input over 64_000 characters", () => { + const result = invocationsRequestSchema.safeParse({ + input: "a".repeat(64_001), + }); + expect(result.success).toBe(false); + }); + + test("accepts array input up to 100 items", () => { + const items = Array.from({ length: 100 }, (_, i) => ({ + role: "user" as const, + content: `m${i}`, + })); + expect(invocationsRequestSchema.safeParse({ input: items }).success).toBe( + true, + ); + }); + + test("rejects array input over 100 items", () => { + const items = Array.from({ length: 101 }, (_, i) => ({ + role: "user" as const, + content: `m${i}`, + })); + const result = invocationsRequestSchema.safeParse({ input: items }); + expect(result.success).toBe(false); + }); + + test("rejects per-item content over 64_000 characters", () => { + const result = invocationsRequestSchema.safeParse({ + input: [{ role: "user", content: "a".repeat(64_001) }], + }); + expect(result.success).toBe(false); + }); +}); + +describe("POST /chat — per-user concurrent-stream limit", () => { + function seedPlugin( + overrides: ConstructorParameters[0] = { dir: false }, + ): AgentsPlugin { + const plugin = new AgentsPlugin(overrides); + // Seed the agents map directly so _handleChat can resolve "hello" + // without running setup() (which would require a live model). + // biome-ignore lint/suspicious/noExplicitAny: seeding private state + (plugin as any).agents.set("hello", { + name: "hello", + instructions: "hi", + adapter: { async *run() {} }, + toolIndex: new Map(), + }); + // biome-ignore lint/suspicious/noExplicitAny: seeding private state + (plugin as any).defaultAgentName = "hello"; + return plugin; + } + + test("rejects with 429 + Retry-After when user is at-limit (default 5)", async () => { + const plugin = seedPlugin(); + for (let i = 0; i < 5; i++) { + // biome-ignore lint/suspicious/noExplicitAny: seeding + (plugin as any).activeStreams.set(`s${i}`, { + controller: new AbortController(), + userId: "alice", + }); + } + + const { res, setHeader, json } = mockRes(); + await ( + plugin as unknown as { + _handleChat: (r: express.Request, w: express.Response) => Promise; + } + )._handleChat(mockReq({ message: "hi" }, "alice"), res); + + expect(res.status).toHaveBeenCalledWith(429); + expect(setHeader).toHaveBeenCalledWith("Retry-After", "5"); + expect(json).toHaveBeenCalledWith( + expect.objectContaining({ + error: expect.stringMatching(/Too many concurrent streams/), + }), + ); + }); + + test("does not reject when another user is at-limit (per-user, not global)", async () => { + const plugin = seedPlugin(); + for (let i = 0; i < 5; i++) { + // biome-ignore lint/suspicious/noExplicitAny: seeding + (plugin as any).activeStreams.set(`s${i}`, { + controller: new AbortController(), + userId: "alice", + }); + } + + // Carol's request must not see a 429 even though alice is at-limit. + // Don't bother running the full stream — we assert only that 429 is + // not the response status. + const { res } = mockRes(); + // biome-ignore lint/suspicious/noExplicitAny: stub _streamAgent to avoid needing a real adapter + (plugin as any)._streamAgent = vi.fn(async () => undefined); + + await ( + plugin as unknown as { + _handleChat: (r: express.Request, w: express.Response) => Promise; + } + )._handleChat(mockReq({ message: "hi" }, "carol"), res); + + expect(res.status).not.toHaveBeenCalledWith(429); + }); + + test("honours agents({ limits: { maxConcurrentStreamsPerUser } })", async () => { + const plugin = seedPlugin({ + dir: false, + limits: { maxConcurrentStreamsPerUser: 2 }, + }); + for (let i = 0; i < 2; i++) { + // biome-ignore lint/suspicious/noExplicitAny: seeding + (plugin as any).activeStreams.set(`s${i}`, { + controller: new AbortController(), + userId: "alice", + }); + } + + const { res } = mockRes(); + await ( + plugin as unknown as { + _handleChat: (r: express.Request, w: express.Response) => Promise; + } + )._handleChat(mockReq({ message: "hi" }, "alice"), res); + + expect(res.status).toHaveBeenCalledWith(429); + }); +}); + +describe("resolvedLimits — default values", () => { + test("exposes the documented MVP defaults when unconfigured", () => { + const plugin = new AgentsPlugin({ dir: false }); + // biome-ignore lint/suspicious/noExplicitAny: read private getter + const limits = (plugin as any).resolvedLimits; + expect(limits).toEqual({ + maxConcurrentStreamsPerUser: 5, + maxToolCalls: 50, + maxSubAgentDepth: 3, + }); + }); + + test("lets callers override any subset", () => { + const plugin = new AgentsPlugin({ + dir: false, + limits: { maxToolCalls: 100 }, + }); + // biome-ignore lint/suspicious/noExplicitAny: read private + const limits = (plugin as any).resolvedLimits; + expect(limits.maxToolCalls).toBe(100); + expect(limits.maxConcurrentStreamsPerUser).toBe(5); + expect(limits.maxSubAgentDepth).toBe(3); + }); +}); + +describe("runSubAgent — depth guard", () => { + test("rejects when depth exceeds the configured maximum", async () => { + const plugin = new AgentsPlugin({ + dir: false, + limits: { maxSubAgentDepth: 2 }, + }); + // biome-ignore lint/suspicious/noExplicitAny: call private method directly + await expect( + (plugin as any).runSubAgent( + mockReq({}, "alice"), + { name: "child", toolIndex: new Map() }, + {}, + new AbortController().signal, + 3, // exceeds limit 2 + ), + ).rejects.toThrow(/Sub-agent depth exceeded \(limit 2\)/); + }); + + test("accepts at the boundary (depth === limit)", async () => { + // Use a stub adapter so we don't need a real model. + const plugin = new AgentsPlugin({ + dir: false, + limits: { maxSubAgentDepth: 3 }, + agents: {}, + }); + + const stubAdapter = { + // biome-ignore lint/suspicious/noExplicitAny: adapter shape not under test + async *run(): any { + yield { type: "message", content: "hello from depth-3" }; + }, + }; + const child = { + name: "child", + instructions: "test", + // biome-ignore lint/suspicious/noExplicitAny: stub shape + adapter: stubAdapter as any, + toolIndex: new Map(), + }; + + // biome-ignore lint/suspicious/noExplicitAny: call private + const result = await (plugin as any).runSubAgent( + mockReq({}, "alice"), + child, + { input: "test" }, + new AbortController().signal, + 3, // at the limit, not over + ); + expect(result).toBe("hello from depth-3"); + }); +}); diff --git a/packages/appkit/src/plugins/agents/tests/event-channel.test.ts b/packages/appkit/src/plugins/agents/tests/event-channel.test.ts new file mode 100644 index 00000000..d80d788d --- /dev/null +++ b/packages/appkit/src/plugins/agents/tests/event-channel.test.ts @@ -0,0 +1,78 @@ +import { describe, expect, test } from "vitest"; +import { EventChannel } from "../event-channel"; + +async function collect(ch: EventChannel): Promise { + const out: T[] = []; + for await (const v of ch) out.push(v); + return out; +} + +describe("EventChannel", () => { + test("yields pushed values in order", async () => { + const ch = new EventChannel(); + const p = collect(ch); + ch.push(1); + ch.push(2); + ch.push(3); + ch.close(); + await expect(p).resolves.toEqual([1, 2, 3]); + }); + + test("pushes before iteration start are buffered", async () => { + const ch = new EventChannel(); + ch.push("a"); + ch.push("b"); + ch.close(); + await expect(collect(ch)).resolves.toEqual(["a", "b"]); + }); + + test("waiting iterator is unblocked by subsequent push", async () => { + const ch = new EventChannel(); + const promise = collect(ch); + await new Promise((r) => setTimeout(r, 5)); + ch.push(42); + ch.close(); + await expect(promise).resolves.toEqual([42]); + }); + + test("close with no pending values terminates iteration", async () => { + const ch = new EventChannel(); + const p = collect(ch); + ch.close(); + await expect(p).resolves.toEqual([]); + }); + + test("push after close is a no-op (channel is closed)", async () => { + const ch = new EventChannel(); + ch.close(); + ch.push(1); + await expect(collect(ch)).resolves.toEqual([]); + }); + + test("close with error rejects the waiting iterator", async () => { + const ch = new EventChannel(); + const promise = collect(ch); + await new Promise((r) => setTimeout(r, 5)); + ch.close(new Error("boom")); + await expect(promise).rejects.toThrow(/boom/); + }); + + test("interleaved pushes and reads stream through", async () => { + const ch = new EventChannel(); + const received: number[] = []; + const reader = (async () => { + for await (const v of ch) { + received.push(v); + if (received.length === 3) break; + } + })(); + ch.push(1); + await new Promise((r) => setTimeout(r, 0)); + ch.push(2); + await new Promise((r) => setTimeout(r, 0)); + ch.push(3); + await reader; + expect(received).toEqual([1, 2, 3]); + ch.close(); + }); +}); diff --git a/packages/appkit/src/plugins/agents/tests/event-translator.test.ts b/packages/appkit/src/plugins/agents/tests/event-translator.test.ts new file mode 100644 index 00000000..050af001 --- /dev/null +++ b/packages/appkit/src/plugins/agents/tests/event-translator.test.ts @@ -0,0 +1,332 @@ +import type { ResponseStreamEvent } from "shared"; +import { describe, expect, test } from "vitest"; +import { AgentEventTranslator } from "../event-translator"; + +describe("AgentEventTranslator", () => { + test("translates message_delta to output_item.added + output_text.delta on first delta", () => { + const translator = new AgentEventTranslator(); + const events = translator.translate({ + type: "message_delta", + content: "Hello", + }); + + expect(events).toHaveLength(2); + expect(events[0].type).toBe("response.output_item.added"); + expect(events[1].type).toBe("response.output_text.delta"); + + if (events[1].type === "response.output_text.delta") { + expect(events[1].delta).toBe("Hello"); + } + }); + + test("subsequent message_delta only produces output_text.delta", () => { + const translator = new AgentEventTranslator(); + translator.translate({ type: "message_delta", content: "Hello" }); + const events = translator.translate({ + type: "message_delta", + content: " world", + }); + + expect(events).toHaveLength(1); + expect(events[0].type).toBe("response.output_text.delta"); + }); + + test("sequence_number is monotonically increasing", () => { + const translator = new AgentEventTranslator(); + const e1 = translator.translate({ type: "message_delta", content: "a" }); + const e2 = translator.translate({ type: "message_delta", content: "b" }); + const e3 = translator.finalize(); + + const allSeqs = [...e1, ...e2, ...e3].map((e) => + "sequence_number" in e ? e.sequence_number : -1, + ); + + for (let i = 1; i < allSeqs.length; i++) { + expect(allSeqs[i]).toBeGreaterThan(allSeqs[i - 1]); + } + }); + + test("translates tool_call to paired output_item.added + output_item.done", () => { + const translator = new AgentEventTranslator(); + const events = translator.translate({ + type: "tool_call", + callId: "call_1", + name: "analytics.query", + args: { sql: "SELECT 1" }, + }); + + expect(events).toHaveLength(2); + expect(events[0].type).toBe("response.output_item.added"); + expect(events[1].type).toBe("response.output_item.done"); + + if (events[0].type === "response.output_item.added") { + expect(events[0].item.type).toBe("function_call"); + if (events[0].item.type === "function_call") { + expect(events[0].item.name).toBe("analytics.query"); + expect(events[0].item.call_id).toBe("call_1"); + } + } + }); + + test("translates tool_result to paired output_item events", () => { + const translator = new AgentEventTranslator(); + const events = translator.translate({ + type: "tool_result", + callId: "call_1", + result: { rows: 42 }, + }); + + expect(events).toHaveLength(2); + expect(events[0].type).toBe("response.output_item.added"); + + if (events[0].type === "response.output_item.added") { + expect(events[0].item.type).toBe("function_call_output"); + } + }); + + test("translates tool_result error", () => { + const translator = new AgentEventTranslator(); + const events = translator.translate({ + type: "tool_result", + callId: "call_1", + result: null, + error: "Query failed", + }); + + if ( + events[0].type === "response.output_item.added" && + events[0].item.type === "function_call_output" + ) { + expect(events[0].item.output).toBe("Query failed"); + } + }); + + test("translates thinking to appkit.thinking extension event", () => { + const translator = new AgentEventTranslator(); + const events = translator.translate({ + type: "thinking", + content: "Let me think about this...", + }); + + expect(events).toHaveLength(1); + expect(events[0].type).toBe("appkit.thinking"); + if (events[0].type === "appkit.thinking") { + expect(events[0].content).toBe("Let me think about this..."); + } + }); + + test("translates metadata to appkit.metadata extension event", () => { + const translator = new AgentEventTranslator(); + const events = translator.translate({ + type: "metadata", + data: { threadId: "t-123" }, + }); + + expect(events).toHaveLength(1); + expect(events[0].type).toBe("appkit.metadata"); + if (events[0].type === "appkit.metadata") { + expect(events[0].data.threadId).toBe("t-123"); + } + }); + + test("status:complete triggers finalize with response.completed", () => { + const translator = new AgentEventTranslator(); + translator.translate({ type: "message_delta", content: "Hi" }); + const events = translator.translate({ type: "status", status: "complete" }); + + const types = events.map((e) => e.type); + expect(types).toContain("response.output_item.done"); + expect(types).toContain("response.completed"); + }); + + test("status:error emits error + response.failed", () => { + const translator = new AgentEventTranslator(); + const events = translator.translate({ + type: "status", + status: "error", + error: "Something broke", + }); + + expect(events).toHaveLength(2); + expect(events[0].type).toBe("error"); + expect(events[1].type).toBe("response.failed"); + + if (events[0].type === "error") { + expect(events[0].error).toBe("Something broke"); + } + }); + + test("finalize produces response.completed", () => { + const translator = new AgentEventTranslator(); + const events = translator.finalize(); + + expect(events.some((e) => e.type === "response.completed")).toBe(true); + }); + + test("finalize with accumulated message text produces output_item.done", () => { + const translator = new AgentEventTranslator(); + translator.translate({ type: "message_delta", content: "Hello " }); + translator.translate({ type: "message_delta", content: "world" }); + const events = translator.finalize(); + + const doneEvent = events.find( + (e) => e.type === "response.output_item.done", + ); + expect(doneEvent).toBeDefined(); + if ( + doneEvent?.type === "response.output_item.done" && + doneEvent.item.type === "message" + ) { + expect(doneEvent.item.content[0].text).toBe("Hello world"); + } + }); + + test("output_index increments for tool calls", () => { + const translator = new AgentEventTranslator(); + const e1 = translator.translate({ + type: "tool_call", + callId: "c1", + name: "tool1", + args: {}, + }); + const e2 = translator.translate({ + type: "tool_result", + callId: "c1", + result: "ok", + }); + + if ( + e1[0].type === "response.output_item.added" && + e2[0].type === "response.output_item.added" + ) { + expect(e2[0].output_index).toBeGreaterThan(e1[0].output_index); + } + }); +}); + +describe("AgentEventTranslator — monotonic output_index", () => { + /** + * Helper: every emitted `response.output_item.added`/`output_item.done` + * event's `output_index` must be >= every prior add/done `output_index`. + * This is the strict contract Responses-API clients (OpenAI's own SDK + * parser) enforce. + */ + function assertMonotonic(events: ResponseStreamEvent[]) { + let last = -1; + for (const ev of events) { + if ( + ev.type === "response.output_item.added" || + ev.type === "response.output_item.done" + ) { + expect(ev.output_index).toBeGreaterThanOrEqual(last); + last = ev.output_index; + } + } + } + + test("tool_call followed by message_delta emits monotonic indices (regression)", () => { + // Before the fix this produced: tool_call at index 1, then + // message_delta.added at 0 — monotonicity violated. + const t = new AgentEventTranslator(); + const all: ResponseStreamEvent[] = []; + all.push( + ...t.translate({ + type: "tool_call", + callId: "c1", + name: "lookup", + args: { q: "x" }, + }), + ); + all.push( + ...t.translate({ type: "tool_result", callId: "c1", result: "ok" }), + ); + all.push(...t.translate({ type: "message_delta", content: "Result: " })); + all.push(...t.translate({ type: "message_delta", content: "ok." })); + all.push(...t.finalize()); + + assertMonotonic(all); + + const added = all.filter((e) => e.type === "response.output_item.added"); + // Three items: tool_call, tool_result, message. Indices 0/1/2. + expect(added.map((e) => e.output_index)).toEqual([0, 1, 2]); + }); + + test("message interrupted by tool_call is closed before the tool_call opens", () => { + const t = new AgentEventTranslator(); + const all: ResponseStreamEvent[] = []; + all.push(...t.translate({ type: "message_delta", content: "thinking..." })); + all.push( + ...t.translate({ + type: "tool_call", + callId: "c1", + name: "lookup", + args: {}, + }), + ); + all.push( + ...t.translate({ type: "tool_result", callId: "c1", result: "ok" }), + ); + all.push(...t.translate({ type: "message_delta", content: "final" })); + all.push(...t.finalize()); + + assertMonotonic(all); + + // Structure: msg0.added, msg0.delta, msg0.done (closed before tool), + // tool_call.added/done, tool_result.added/done, msg1.added, msg1.delta, + // msg1.done (from finalize), response.completed. + const addedDone = all.filter( + (e) => + e.type === "response.output_item.added" || + e.type === "response.output_item.done", + ); + expect(addedDone.map((e) => `${e.type}@${e.output_index}`)).toEqual([ + "response.output_item.added@0", + "response.output_item.done@0", + "response.output_item.added@1", + "response.output_item.done@1", + "response.output_item.added@2", + "response.output_item.done@2", + "response.output_item.added@3", + "response.output_item.done@3", + ]); + }); + + test("full `message` event after deltas does not double-emit output_item.added", () => { + const t = new AgentEventTranslator(); + const all: ResponseStreamEvent[] = []; + all.push(...t.translate({ type: "message_delta", content: "partial" })); + all.push( + ...t.translate({ type: "message", content: "full final content" }), + ); + all.push(...t.finalize()); + + const added = all.filter((e) => e.type === "response.output_item.added"); + const done = all.filter((e) => e.type === "response.output_item.done"); + // Exactly one added (from the first delta) and one done (from the full + // message). finalize() must not emit a second done for the same item. + expect(added).toHaveLength(1); + expect(done).toHaveLength(1); + if (done[0].type === "response.output_item.done") { + const item = done[0].item; + if (item.type === "message") { + expect(item.content[0].text).toBe("full final content"); + } + } + }); + + test("tool_result coerces undefined result to empty-string output", () => { + const t = new AgentEventTranslator(); + const events = t.translate({ + type: "tool_result", + callId: "c1", + result: undefined, + }); + const done = events.find((e) => e.type === "response.output_item.done"); + if (done?.type === "response.output_item.done") { + const item = done.item; + if (item.type === "function_call_output") { + expect(item.output).toBe(""); + } + } + }); +}); diff --git a/packages/appkit/src/plugins/agents/tests/load-agents.test.ts b/packages/appkit/src/plugins/agents/tests/load-agents.test.ts new file mode 100644 index 00000000..1a5b9523 --- /dev/null +++ b/packages/appkit/src/plugins/agents/tests/load-agents.test.ts @@ -0,0 +1,360 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, beforeEach, describe, expect, test } from "vitest"; +import { z } from "zod"; +import { buildToolkitEntries } from "../build-toolkit"; +import { + agentIdFromMarkdownPath, + loadAgentFromFile, + loadAgentsFromDir, + parseFrontmatter, +} from "../load-agents"; +import { defineTool, type ToolRegistry } from "../tools/define-tool"; +import { tool } from "../tools/tool"; +import type { AgentDefinition } from "../types"; + +let workDir: string; + +beforeEach(() => { + workDir = fs.mkdtempSync(path.join(os.tmpdir(), "agents-test-")); +}); + +afterEach(() => { + fs.rmSync(workDir, { recursive: true, force: true }); +}); + +/** Flat file under workDir (for legacy loadAgentFromFile tests). */ +function writeRoot(name: string, content: string) { + fs.writeFileSync(path.join(workDir, name), content, "utf-8"); + return path.join(workDir, name); +} + +/** Folder layout: `/agent.md`. */ +function writeAgent(id: string, content: string) { + const dir = path.join(workDir, id); + fs.mkdirSync(dir, { recursive: true }); + const p = path.join(dir, "agent.md"); + fs.writeFileSync(p, content, "utf-8"); + return p; +} + +describe("agentIdFromMarkdownPath", () => { + test("uses parent folder name when file is agent.md", () => { + expect(agentIdFromMarkdownPath("/foo/bar/assistant/agent.md")).toBe( + "assistant", + ); + }); + + test("uses file stem for other .md names", () => { + expect(agentIdFromMarkdownPath("/tmp/assistant.md")).toBe("assistant"); + }); +}); + +describe("parseFrontmatter", () => { + test("parses a simple object", () => { + const { data, content } = parseFrontmatter( + "---\nendpoint: foo\ndefault: true\n---\nHello body", + ); + expect(data).toEqual({ endpoint: "foo", default: true }); + expect(content).toBe("Hello body"); + }); + + test("parses nested arrays", () => { + const { data } = parseFrontmatter( + "---\ntoolkits:\n - analytics\n - files: [uploads.list]\n---\nbody", + ); + expect(data?.toolkits).toEqual(["analytics", { files: ["uploads.list"] }]); + }); + + test("returns null data when no frontmatter", () => { + const { data, content } = parseFrontmatter("No frontmatter here"); + expect(data).toBeNull(); + expect(content).toBe("No frontmatter here"); + }); + + test("throws on invalid YAML", () => { + expect(() => parseFrontmatter("---\nkey: : : bad\n---\n")).toThrow(/YAML/); + }); +}); + +describe("loadAgentFromFile", () => { + test("returns AgentDefinition with body as instructions", async () => { + const p = writeRoot( + "assistant.md", + "---\nendpoint: e-1\n---\nYou are helpful.", + ); + const def = await loadAgentFromFile(p, {}); + expect(def.name).toBe("assistant"); + expect(def.instructions).toBe("You are helpful."); + expect(def.model).toBe("e-1"); + }); + + test("derives agent id from folder when path ends with agent.md", async () => { + const p = writeAgent("router", "---\nendpoint: e-1\n---\nRoute traffic."); + const def = await loadAgentFromFile(p, {}); + expect(def.name).toBe("router"); + expect(def.instructions).toBe("Route traffic."); + }); +}); + +describe("loadAgentsFromDir", () => { + test("returns empty map when dir doesn't exist", async () => { + const res = await loadAgentsFromDir("/nonexistent-for-tests", {}); + expect(res.defs).toEqual({}); + expect(res.defaultAgent).toBeNull(); + }); + + test("loads each subdirectory with agent.md keyed by folder name", async () => { + writeAgent("support", "---\nendpoint: e-1\n---\nSupport prompt."); + writeAgent("sales", "---\nendpoint: e-2\n---\nSales prompt."); + const res = await loadAgentsFromDir(workDir, {}); + expect(Object.keys(res.defs).sort()).toEqual(["sales", "support"]); + }); + + test("throws when legacy top-level .md exists", async () => { + writeRoot("assistant.md", "---\nendpoint: e\n---\nLegacy flat file."); + await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow( + /unsupported top-level markdown file\(s\): assistant\.md.*assistant\/agent\.md/s, + ); + }); + + test("throws when a subdirectory lacks agent.md", async () => { + fs.mkdirSync(path.join(workDir, "broken"), { recursive: true }); + await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow( + /must contain agent\.md/, + ); + }); + + test("ignores reserved skills directory without agent.md", async () => { + fs.mkdirSync(path.join(workDir, "skills"), { recursive: true }); + writeAgent("solo", "---\nendpoint: e\n---\nOnly real agent."); + const res = await loadAgentsFromDir(workDir, {}); + expect(Object.keys(res.defs)).toEqual(["solo"]); + }); + + test("picks up default: true from frontmatter (deterministic sorted ids)", async () => { + writeAgent("one", "---\nendpoint: a\n---\nOne."); + writeAgent("two", "---\nendpoint: b\ndefault: true\n---\nTwo."); + const res = await loadAgentsFromDir(workDir, {}); + expect(res.defaultAgent).toBe("two"); + }); + + test("throws when frontmatter references an unregistered plugin", async () => { + writeAgent( + "broken", + "---\nendpoint: e\ntoolkits: [missing]\n---\nBroken agent.", + ); + await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow( + /references toolkit 'missing'/, + ); + }); + + test("throws when frontmatter references an unknown ambient tool", async () => { + writeAgent( + "broken", + "---\nendpoint: e\ntools: [unknown_tool]\n---\nBroken.", + ); + await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow( + /references tool 'unknown_tool'/, + ); + }); + + test("resolves toolkits + ambient tools when provided", async () => { + const registry: ToolRegistry = { + query: defineTool({ + description: "q", + schema: z.object({ sql: z.string() }), + handler: () => "ok", + }), + }; + const plugins = new Map< + string, + { toolkit: (opts?: unknown) => Record } + >([ + [ + "analytics", + { + toolkit: (opts) => + buildToolkitEntries("analytics", registry, opts as never), + }, + ], + ]); + + const weather = tool({ + name: "get_weather", + description: "Weather", + schema: z.object({ city: z.string() }), + execute: async () => "sunny", + }); + + writeAgent( + "analyst", + "---\nendpoint: e\ntoolkits:\n - analytics\ntools:\n - get_weather\n---\nBody.", + ); + const res = await loadAgentsFromDir(workDir, { + plugins, + availableTools: { get_weather: weather }, + }); + expect(res.defs.analyst.tools).toBeDefined(); + expect(Object.keys(res.defs.analyst.tools ?? {}).sort()).toEqual([ + "analytics.query", + "get_weather", + ]); + }); + + describe("agents: sibling sub-agent references", () => { + test("resolves sibling references into def.agents regardless of folder order", async () => { + writeAgent( + "dispatcher", + "---\nendpoint: e\nagents:\n - analyst\n - writer\n---\nRoute work.", + ); + writeAgent("analyst", "---\nendpoint: e\n---\nAnalyst."); + writeAgent("writer", "---\nendpoint: e\n---\nWriter."); + + const res = await loadAgentsFromDir(workDir, {}); + expect(Object.keys(res.defs.dispatcher.agents ?? {}).sort()).toEqual([ + "analyst", + "writer", + ]); + expect(res.defs.dispatcher.agents?.analyst).toBe(res.defs.analyst); + expect(res.defs.dispatcher.agents?.writer).toBe(res.defs.writer); + expect(res.defs.analyst.agents).toBeUndefined(); + expect(res.defs.writer.agents).toBeUndefined(); + }); + + test("mutual delegation is allowed (runtime depth cap handles cycles)", async () => { + writeAgent("a", "---\nendpoint: e\nagents:\n - b\n---\nA."); + writeAgent("b", "---\nendpoint: e\nagents:\n - a\n---\nB."); + + const res = await loadAgentsFromDir(workDir, {}); + expect(res.defs.a.agents?.b).toBe(res.defs.b); + expect(res.defs.b.agents?.a).toBe(res.defs.a); + }); + + test("throws with available list when a sibling is missing", async () => { + writeAgent("dispatcher", "---\nendpoint: e\nagents:\n - ghost\n---\nD."); + writeAgent("analyst", "---\nendpoint: e\n---\nAnalyst."); + await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow( + /references sub-agent\(s\) 'ghost'.*Available: analyst, dispatcher/s, + ); + }); + + test("reports every missing sibling in one error, not just the first", async () => { + writeAgent( + "dispatcher", + "---\nendpoint: e\nagents:\n - ghost1\n - ghost2\n---\nD.", + ); + await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow( + /ghost1, ghost2/, + ); + }); + + test("throws on self-reference", async () => { + writeAgent("solo", "---\nendpoint: e\nagents:\n - solo\n---\nSolo."); + await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow( + /'solo'.*cannot reference itself/s, + ); + }); + + test("throws on non-array 'agents:' value", async () => { + writeAgent("bad", "---\nendpoint: e\nagents: analyst\n---\nBad."); + writeAgent("analyst", "---\nendpoint: e\n---\nAnalyst."); + await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow( + /invalid 'agents:' frontmatter/, + ); + }); + + test("throws on non-string entries in 'agents:'", async () => { + writeAgent("bad", "---\nendpoint: e\nagents:\n - 42\n---\nBad."); + await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow( + /invalid 'agents:' entry/, + ); + }); + + test("deduplicates repeated entries silently", async () => { + writeAgent( + "dispatcher", + "---\nendpoint: e\nagents:\n - analyst\n - analyst\n---\nD.", + ); + writeAgent("analyst", "---\nendpoint: e\n---\nAnalyst."); + const res = await loadAgentsFromDir(workDir, {}); + expect(Object.keys(res.defs.dispatcher.agents ?? {})).toEqual([ + "analyst", + ]); + }); + + test("empty array yields no sub-agents (no-op)", async () => { + writeAgent("dispatcher", "---\nendpoint: e\nagents: []\n---\nD."); + const res = await loadAgentsFromDir(workDir, {}); + expect(res.defs.dispatcher.agents).toBeUndefined(); + }); + + test("resolves 'agents:' references against codeAgents when provided", async () => { + writeAgent( + "dispatcher", + "---\nendpoint: e\nagents:\n - support\n---\nD.", + ); + const support: AgentDefinition = { + name: "support", + instructions: "Code-defined support.", + }; + const res = await loadAgentsFromDir(workDir, { + codeAgents: { support }, + }); + expect(res.defs.dispatcher.agents?.support).toBe(support); + }); + + test("codeAgents takes precedence over markdown sibling with the same name", async () => { + writeAgent( + "dispatcher", + "---\nendpoint: e\nagents:\n - support\n---\nD.", + ); + writeAgent("support", "---\nendpoint: e\n---\nMarkdown support."); + const codeSupport: AgentDefinition = { + name: "support", + instructions: "Code support.", + }; + const res = await loadAgentsFromDir(workDir, { + codeAgents: { support: codeSupport }, + }); + expect(res.defs.dispatcher.agents?.support).toBe(codeSupport); + expect(res.defs.dispatcher.agents?.support.instructions).toBe( + "Code support.", + ); + }); + + test("missing-sibling error lists both markdown and code agent names", async () => { + writeAgent("dispatcher", "---\nendpoint: e\nagents:\n - ghost\n---\nD."); + writeAgent("analyst", "---\nendpoint: e\n---\nAnalyst."); + const codeAgent: AgentDefinition = { + name: "writer", + instructions: "Writer.", + }; + await expect( + loadAgentsFromDir(workDir, { codeAgents: { writer: codeAgent } }), + ).rejects.toThrow(/Available: analyst, dispatcher, writer/); + }); + }); +}); + +describe("loadAgentFromFile — sub-agent refs rejected", () => { + test("throws when 'agents:' is non-empty in a single-file load", async () => { + const p = writeRoot( + "lonely.md", + "---\nendpoint: e\nagents:\n - ghost\n---\nLonely.", + ); + await expect(loadAgentFromFile(p, {})).rejects.toThrow( + /requires loadAgentsFromDir/, + ); + }); + + test("ignores empty 'agents:' array (treated as absent)", async () => { + const p = writeRoot( + "lonely.md", + "---\nendpoint: e\nagents: []\n---\nLonely.", + ); + const def = await loadAgentFromFile(p, {}); + expect(def.agents).toBeUndefined(); + }); +}); diff --git a/packages/appkit/src/plugins/agents/tests/run-agent.test.ts b/packages/appkit/src/plugins/agents/tests/run-agent.test.ts new file mode 100644 index 00000000..1a974811 --- /dev/null +++ b/packages/appkit/src/plugins/agents/tests/run-agent.test.ts @@ -0,0 +1,120 @@ +import type { + AgentAdapter, + AgentEvent, + AgentInput, + AgentRunContext, +} from "shared"; +import { describe, expect, test, vi } from "vitest"; +import { z } from "zod"; +import { createAgent } from "../../../core/create-agent-def"; +import { runAgent } from "../../../core/run-agent"; +import { tool } from "../tools/tool"; +import type { ToolkitEntry } from "../types"; + +function scriptedAdapter(events: AgentEvent[]): AgentAdapter { + return { + async *run(_input: AgentInput, _context: AgentRunContext) { + for (const event of events) { + yield event; + } + }, + }; +} + +describe("runAgent", () => { + test("drives the adapter and returns aggregated text", async () => { + const events: AgentEvent[] = [ + { type: "message_delta", content: "Hello " }, + { type: "message_delta", content: "world" }, + { type: "status", status: "complete" }, + ]; + const def = createAgent({ + instructions: "Say hello", + model: scriptedAdapter(events), + }); + + const result = await runAgent(def, { messages: "hi" }); + expect(result.text).toBe("Hello world"); + expect(result.events).toHaveLength(3); + }); + + test("prefers terminal 'message' event over deltas when present", async () => { + const events: AgentEvent[] = [ + { type: "message_delta", content: "partial" }, + { type: "message", content: "final answer" }, + ]; + const def = createAgent({ + instructions: "x", + model: scriptedAdapter(events), + }); + const result = await runAgent(def, { messages: "hi" }); + expect(result.text).toBe("final answer"); + }); + + test("invokes inline tools via executeTool callback", async () => { + const weatherFn = vi.fn(async () => "Sunny in NYC"); + const weather = tool({ + name: "get_weather", + description: "Weather", + schema: z.object({ city: z.string() }), + execute: weatherFn, + }); + + let capturedCtx: AgentRunContext | null = null; + const adapter: AgentAdapter = { + async *run(_input, context) { + capturedCtx = context; + yield { type: "message_delta", content: "" }; + }, + }; + + const def = createAgent({ + instructions: "x", + model: adapter, + tools: { get_weather: weather }, + }); + + await runAgent(def, { messages: "hi" }); + expect(capturedCtx).not.toBeNull(); + // biome-ignore lint/style/noNonNullAssertion: asserted above + const result = await capturedCtx!.executeTool("get_weather", { + city: "NYC", + }); + expect(result).toBe("Sunny in NYC"); + expect(weatherFn).toHaveBeenCalledWith({ city: "NYC" }); + }); + + test("throws a clear error when a ToolkitEntry is invoked", async () => { + const toolkitEntry: ToolkitEntry = { + __toolkitRef: true, + pluginName: "analytics", + localName: "query", + def: { + name: "analytics.query", + description: "SQL", + parameters: { type: "object", properties: {} }, + }, + }; + + let capturedCtx: AgentRunContext | null = null; + const adapter: AgentAdapter = { + async *run(_input, context) { + capturedCtx = context; + yield { type: "message_delta", content: "" }; + }, + }; + + const def = createAgent({ + instructions: "x", + model: adapter, + tools: { "analytics.query": toolkitEntry }, + }); + + await runAgent(def, { messages: "hi" }); + expect(capturedCtx).not.toBeNull(); + await expect( + // biome-ignore lint/style/noNonNullAssertion: asserted above + capturedCtx!.executeTool("analytics.query", {}), + ).rejects.toThrow(/only usable via createApp/); + }); +}); diff --git a/packages/appkit/src/plugins/agents/tests/system-prompt.test.ts b/packages/appkit/src/plugins/agents/tests/system-prompt.test.ts new file mode 100644 index 00000000..25724259 --- /dev/null +++ b/packages/appkit/src/plugins/agents/tests/system-prompt.test.ts @@ -0,0 +1,59 @@ +import { describe, expect, test } from "vitest"; +import { buildBaseSystemPrompt, composeSystemPrompt } from "../system-prompt"; + +const emptyCtx = { + agentName: "a", + pluginNames: [] as string[], + toolNames: [] as string[], +}; + +describe("buildBaseSystemPrompt", () => { + test("includes plugin names", () => { + const prompt = buildBaseSystemPrompt({ + agentName: "assistant", + pluginNames: ["analytics", "files", "genie"], + toolNames: [], + }); + expect(prompt).toContain("Active AppKit plugins: analytics, files, genie"); + }); + + test("includes guidelines", () => { + const prompt = buildBaseSystemPrompt(emptyCtx); + expect(prompt).toContain("Guidelines:"); + expect(prompt).toContain("syntax, dialect, or path rules"); + expect(prompt).toContain("summarize what matters"); + }); + + test("works with no plugins", () => { + const prompt = buildBaseSystemPrompt(emptyCtx); + expect(prompt).toContain("AI assistant running on Databricks AppKit"); + expect(prompt).not.toContain("Active AppKit plugins:"); + }); + + test("does NOT include individual tool names", () => { + const prompt = buildBaseSystemPrompt({ + agentName: "a", + pluginNames: ["analytics"], + toolNames: ["analytics.query"], + }); + expect(prompt).not.toContain("analytics.query"); + expect(prompt).not.toContain("Available tools:"); + }); +}); + +describe("composeSystemPrompt", () => { + test("concatenates base + agent prompt with double newline", () => { + const composed = composeSystemPrompt("Base prompt.", "Agent prompt."); + expect(composed).toBe("Base prompt.\n\nAgent prompt."); + }); + + test("returns base prompt alone when no agent prompt", () => { + const composed = composeSystemPrompt("Base prompt."); + expect(composed).toBe("Base prompt."); + }); + + test("returns base prompt when agent prompt is empty string", () => { + const composed = composeSystemPrompt("Base prompt.", ""); + expect(composed).toBe("Base prompt."); + }); +}); diff --git a/packages/appkit/src/plugins/agents/tests/thread-store.test.ts b/packages/appkit/src/plugins/agents/tests/thread-store.test.ts new file mode 100644 index 00000000..ed4f70ba --- /dev/null +++ b/packages/appkit/src/plugins/agents/tests/thread-store.test.ts @@ -0,0 +1,138 @@ +import { describe, expect, test } from "vitest"; +import { InMemoryThreadStore } from "../thread-store"; + +describe("InMemoryThreadStore", () => { + test("create() returns a new thread with the given userId", async () => { + const store = new InMemoryThreadStore(); + const thread = await store.create("user-1"); + + expect(thread.id).toBeDefined(); + expect(thread.userId).toBe("user-1"); + expect(thread.messages).toEqual([]); + expect(thread.createdAt).toBeInstanceOf(Date); + expect(thread.updatedAt).toBeInstanceOf(Date); + }); + + test("get() returns the thread for the correct user", async () => { + const store = new InMemoryThreadStore(); + const thread = await store.create("user-1"); + + const retrieved = await store.get(thread.id, "user-1"); + expect(retrieved).toEqual(thread); + }); + + test("get() returns null for wrong user", async () => { + const store = new InMemoryThreadStore(); + const thread = await store.create("user-1"); + + const retrieved = await store.get(thread.id, "user-2"); + expect(retrieved).toBeNull(); + }); + + test("get() returns null for non-existent thread", async () => { + const store = new InMemoryThreadStore(); + const retrieved = await store.get("non-existent", "user-1"); + expect(retrieved).toBeNull(); + }); + + test("list() returns threads sorted by updatedAt desc", async () => { + const store = new InMemoryThreadStore(); + const t1 = await store.create("user-1"); + const t2 = await store.create("user-1"); + + // Make t1 more recently updated + await store.addMessage(t1.id, "user-1", { + id: "msg-1", + role: "user", + content: "hello", + createdAt: new Date(), + }); + + const threads = await store.list("user-1"); + expect(threads).toHaveLength(2); + expect(threads[0].id).toBe(t1.id); + expect(threads[1].id).toBe(t2.id); + }); + + test("list() returns empty for unknown user", async () => { + const store = new InMemoryThreadStore(); + await store.create("user-1"); + + const threads = await store.list("user-2"); + expect(threads).toEqual([]); + }); + + test("addMessage() appends to thread and updates timestamp", async () => { + const store = new InMemoryThreadStore(); + const thread = await store.create("user-1"); + const originalUpdatedAt = thread.updatedAt; + + // Small delay to ensure timestamp differs + await new Promise((r) => setTimeout(r, 5)); + + await store.addMessage(thread.id, "user-1", { + id: "msg-1", + role: "user", + content: "hello", + createdAt: new Date(), + }); + + const updated = await store.get(thread.id, "user-1"); + expect(updated?.messages).toHaveLength(1); + expect(updated?.messages[0].content).toBe("hello"); + expect(updated?.updatedAt.getTime()).toBeGreaterThanOrEqual( + originalUpdatedAt.getTime(), + ); + }); + + test("addMessage() throws for non-existent thread", async () => { + const store = new InMemoryThreadStore(); + + await expect( + store.addMessage("non-existent", "user-1", { + id: "msg-1", + role: "user", + content: "hello", + createdAt: new Date(), + }), + ).rejects.toThrow("Thread non-existent not found"); + }); + + test("delete() removes a thread and returns true", async () => { + const store = new InMemoryThreadStore(); + const thread = await store.create("user-1"); + + const deleted = await store.delete(thread.id, "user-1"); + expect(deleted).toBe(true); + + const retrieved = await store.get(thread.id, "user-1"); + expect(retrieved).toBeNull(); + }); + + test("delete() returns false for non-existent thread", async () => { + const store = new InMemoryThreadStore(); + const deleted = await store.delete("non-existent", "user-1"); + expect(deleted).toBe(false); + }); + + test("delete() returns false for wrong user", async () => { + const store = new InMemoryThreadStore(); + const thread = await store.create("user-1"); + + const deleted = await store.delete(thread.id, "user-2"); + expect(deleted).toBe(false); + }); + + test("threads are isolated per user", async () => { + const store = new InMemoryThreadStore(); + await store.create("user-1"); + await store.create("user-1"); + await store.create("user-2"); + + const user1Threads = await store.list("user-1"); + const user2Threads = await store.list("user-2"); + + expect(user1Threads).toHaveLength(2); + expect(user2Threads).toHaveLength(1); + }); +}); diff --git a/packages/appkit/src/plugins/agents/tests/tool-approval-gate.test.ts b/packages/appkit/src/plugins/agents/tests/tool-approval-gate.test.ts new file mode 100644 index 00000000..1e17ddf6 --- /dev/null +++ b/packages/appkit/src/plugins/agents/tests/tool-approval-gate.test.ts @@ -0,0 +1,156 @@ +import { afterEach, beforeEach, describe, expect, test, vi } from "vitest"; +import { ToolApprovalGate } from "../tool-approval-gate"; + +describe("ToolApprovalGate", () => { + let gate: ToolApprovalGate; + + beforeEach(() => { + vi.useFakeTimers(); + gate = new ToolApprovalGate(); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + test("resolves with 'approve' when a matching submit arrives", async () => { + const waiter = gate.wait({ + approvalId: "a1", + streamId: "s1", + userId: "alice", + timeoutMs: 60_000, + }); + expect(gate.size).toBe(1); + + const result = gate.submit({ + approvalId: "a1", + userId: "alice", + decision: "approve", + }); + + expect(result).toEqual({ ok: true }); + await expect(waiter).resolves.toBe("approve"); + expect(gate.size).toBe(0); + }); + + test("resolves with 'deny' on explicit deny", async () => { + const waiter = gate.wait({ + approvalId: "a2", + streamId: "s1", + userId: "alice", + timeoutMs: 60_000, + }); + gate.submit({ + approvalId: "a2", + userId: "alice", + decision: "deny", + }); + await expect(waiter).resolves.toBe("deny"); + }); + + test("auto-denies after timeoutMs with no submit", async () => { + const waiter = gate.wait({ + approvalId: "a3", + streamId: "s1", + userId: "alice", + timeoutMs: 1000, + }); + vi.advanceTimersByTime(1000); + await expect(waiter).resolves.toBe("deny"); + expect(gate.size).toBe(0); + }); + + test("refuses a submit from a different user (ownership check)", async () => { + const waiter = gate.wait({ + approvalId: "a4", + streamId: "s1", + userId: "alice", + timeoutMs: 60_000, + }); + const result = gate.submit({ + approvalId: "a4", + userId: "bob", + decision: "approve", + }); + expect(result).toEqual({ ok: false, reason: "forbidden" }); + expect(gate.size).toBe(1); + // Waiter is still pending; cleanup to let fake timers drain. + gate.submit({ + approvalId: "a4", + userId: "alice", + decision: "deny", + }); + await expect(waiter).resolves.toBe("deny"); + }); + + test("returns 'unknown' reason when approvalId is not registered", () => { + expect( + gate.submit({ approvalId: "nope", userId: "x", decision: "approve" }), + ).toEqual({ ok: false, reason: "unknown" }); + }); + + test("abortStream denies every pending gate for that stream", async () => { + const a = gate.wait({ + approvalId: "a5", + streamId: "s1", + userId: "alice", + timeoutMs: 60_000, + }); + const b = gate.wait({ + approvalId: "a6", + streamId: "s1", + userId: "alice", + timeoutMs: 60_000, + }); + const c = gate.wait({ + approvalId: "a7", + streamId: "s2", + userId: "alice", + timeoutMs: 60_000, + }); + gate.abortStream("s1"); + await expect(a).resolves.toBe("deny"); + await expect(b).resolves.toBe("deny"); + expect(gate.size).toBe(1); + // s2's waiter is still pending; settle it to clean up timers. + gate.submit({ approvalId: "a7", userId: "alice", decision: "deny" }); + await expect(c).resolves.toBe("deny"); + }); + + test("abortAll denies every pending gate", async () => { + const a = gate.wait({ + approvalId: "a8", + streamId: "s1", + userId: "alice", + timeoutMs: 60_000, + }); + const b = gate.wait({ + approvalId: "a9", + streamId: "s2", + userId: "bob", + timeoutMs: 60_000, + }); + gate.abortAll(); + await expect(a).resolves.toBe("deny"); + await expect(b).resolves.toBe("deny"); + expect(gate.size).toBe(0); + }); + + test("a timed-out approval cannot be resolved by a late submit", async () => { + const waiter = gate.wait({ + approvalId: "a10", + streamId: "s1", + userId: "alice", + timeoutMs: 500, + }); + vi.advanceTimersByTime(500); + await expect(waiter).resolves.toBe("deny"); + + const late = gate.submit({ + approvalId: "a10", + userId: "alice", + decision: "approve", + }); + expect(late).toEqual({ ok: false, reason: "unknown" }); + }); +}); diff --git a/packages/appkit/src/plugins/agents/thread-store.ts b/packages/appkit/src/plugins/agents/thread-store.ts new file mode 100644 index 00000000..7c4622cd --- /dev/null +++ b/packages/appkit/src/plugins/agents/thread-store.ts @@ -0,0 +1,66 @@ +import { randomUUID } from "node:crypto"; +import type { Message, Thread, ThreadStore } from "shared"; + +/** + * In-memory thread store backed by a nested Map. + * + * Outer key: userId, inner key: threadId. Thread history is retained for the + * lifetime of the process with no eviction, caps, or TTL — a chatty user will + * grow the in-memory footprint monotonically, and the server loses every + * thread on restart. **This implementation is intended for local development + * and single-process demos only.** + * + * For any real deployment, pass a persistent `ThreadStore` to `agents({ ... })` + * (e.g. a Lakebase- or Postgres-backed implementation). A bounded + * `InMemoryThreadStore` with eviction policies is tracked as a follow-up. + */ +export class InMemoryThreadStore implements ThreadStore { + private store = new Map>(); + + async create(userId: string): Promise { + const now = new Date(); + const thread: Thread = { + id: randomUUID(), + userId, + messages: [], + createdAt: now, + updatedAt: now, + }; + this.userMap(userId).set(thread.id, thread); + return thread; + } + + async get(threadId: string, userId: string): Promise { + return this.userMap(userId).get(threadId) ?? null; + } + + async list(userId: string): Promise { + return Array.from(this.userMap(userId).values()).sort( + (a, b) => b.updatedAt.getTime() - a.updatedAt.getTime(), + ); + } + + async addMessage( + threadId: string, + userId: string, + message: Message, + ): Promise { + const thread = this.userMap(userId).get(threadId); + if (!thread) throw new Error(`Thread ${threadId} not found`); + thread.messages.push(message); + thread.updatedAt = new Date(); + } + + async delete(threadId: string, userId: string): Promise { + return this.userMap(userId).delete(threadId); + } + + private userMap(userId: string): Map { + let map = this.store.get(userId); + if (!map) { + map = new Map(); + this.store.set(userId, map); + } + return map; + } +} diff --git a/packages/appkit/src/plugins/agents/tool-approval-gate.ts b/packages/appkit/src/plugins/agents/tool-approval-gate.ts new file mode 100644 index 00000000..669f30a9 --- /dev/null +++ b/packages/appkit/src/plugins/agents/tool-approval-gate.ts @@ -0,0 +1,122 @@ +/** + * Server-side state for the human-in-the-loop approval gate on + * `destructive: true` agent tool calls. + * + * Lifecycle: + * + * 1. `wait(...)` is called from inside `executeTool` when a destructive tool + * is about to execute. A `Pending` record is registered and a timer is + * scheduled for auto-deny. The returned promise is what blocks the + * adapter until the decision arrives. + * 2. The client receives an `appkit.approval_pending` SSE event carrying the + * `approvalId` + `streamId` and posts a decision to `POST /chat/approve`. + * The route calls {@link ToolApprovalGate.submit} which resolves the + * pending promise and clears the timer. + * 3. If no submit arrives within `timeoutMs`, the timer fires and the + * promise resolves with `"deny"`. + * + * Security invariants: + * + * - `submit` verifies that the decider's user id matches the user who + * initiated the stream (set by `wait`). Mismatches are rejected without + * resolving the pending promise — this prevents a second user from + * approving (or denying) another user's destructive action. + * - `abort(streamId)` cancels every pending gate for a stream and denies + * each one. Used when the enclosing stream is cancelled or the plugin is + * shutting down. + */ +type ApprovalDecision = "approve" | "deny"; + +interface Pending { + resolve: (decision: ApprovalDecision) => void; + userId: string; + streamId: string; + timeout: ReturnType; +} + +type ApprovalSubmitResult = + | { ok: true } + | { ok: false; reason: "unknown" | "forbidden" }; + +export class ToolApprovalGate { + private pending = new Map(); + + /** + * Register a pending approval and return a promise that resolves with the + * user's decision or with `"deny"` when the timeout elapses. The returned + * promise never rejects. + */ + wait(args: { + approvalId: string; + streamId: string; + userId: string; + timeoutMs: number; + }): Promise { + const { approvalId, streamId, userId, timeoutMs } = args; + return new Promise((resolve) => { + const timeout = setTimeout(() => { + if (this.pending.delete(approvalId)) { + resolve("deny"); + } + }, timeoutMs); + this.pending.set(approvalId, { + resolve, + userId, + streamId, + timeout, + }); + }); + } + + /** + * Settle an approval with a user decision. Returns: + * - `{ ok: true }` if the pending record existed, the userId matched, and + * the promise was resolved. + * - `{ ok: false, reason: "unknown" }` if no pending record matches the id. + * - `{ ok: false, reason: "forbidden" }` if the userId does not match the + * user who initiated the stream. + */ + submit(args: { + approvalId: string; + userId: string; + decision: ApprovalDecision; + }): ApprovalSubmitResult { + const { approvalId, userId, decision } = args; + const p = this.pending.get(approvalId); + if (!p) return { ok: false, reason: "unknown" }; + if (p.userId !== userId) return { ok: false, reason: "forbidden" }; + clearTimeout(p.timeout); + this.pending.delete(approvalId); + p.resolve(decision); + return { ok: true }; + } + + /** + * Cancel all pending gates for a specific stream (e.g., when the user + * cancels the stream). Each gate resolves with `"deny"` so the adapter + * unwinds cleanly. + */ + abortStream(streamId: string): void { + for (const [id, p] of this.pending) { + if (p.streamId === streamId) { + clearTimeout(p.timeout); + this.pending.delete(id); + p.resolve("deny"); + } + } + } + + /** Cancel every pending gate. Used at plugin shutdown. */ + abortAll(): void { + for (const [id, p] of this.pending) { + clearTimeout(p.timeout); + this.pending.delete(id); + p.resolve("deny"); + } + } + + /** Number of pending approvals (test/diagnostic helper). */ + get size(): number { + return this.pending.size; + } +} diff --git a/packages/appkit/src/plugins/agents/types.ts b/packages/appkit/src/plugins/agents/types.ts index 086a0426..8d52d278 100644 --- a/packages/appkit/src/plugins/agents/types.ts +++ b/packages/appkit/src/plugins/agents/types.ts @@ -1,6 +1,13 @@ -import type { AgentToolDefinition, ToolAnnotations } from "shared"; +import type { + AgentAdapter, + AgentToolDefinition, + BasePluginConfig, + ThreadStore, + ToolAnnotations, +} from "shared"; import type { FunctionTool } from "./tools/function-tool"; import type { HostedTool } from "./tools/hosted-tools"; +import type { McpHostPolicyConfig } from "./tools/mcp-host-policy"; /** * A tool reference produced by a plugin's `.toolkit()` call. The agents plugin @@ -42,8 +49,172 @@ export interface ToolkitOptions { } /** - * Type guard for `ToolkitEntry` — used to differentiate toolkit references - * from inline tools in a mixed `tools` record. + * Context passed to `baseSystemPrompt` callbacks. + */ +export interface PromptContext { + agentName: string; + pluginNames: string[]; + toolNames: string[]; +} + +export type BaseSystemPromptOption = + | false + | string + | ((ctx: PromptContext) => string); + +export interface AgentDefinition { + /** Filled in from the enclosing key when used in `agents: { foo: def }`. */ + name?: string; + /** System prompt body. For markdown-loaded agents this is the file body. */ + instructions: string; + /** + * Model adapter (or endpoint-name string sugar for + * `DatabricksAdapter.fromServingEndpoint({ endpointName })`). Optional — + * falls back to the plugin's `defaultModel`. + */ + model?: AgentAdapter | Promise | string; + /** Per-agent tool record. Key is the LLM-visible tool-call name. */ + tools?: Record; + /** Sub-agents, exposed as `agent-` tools on this agent. */ + agents?: Record; + /** Override the plugin's baseSystemPrompt for this agent only. */ + baseSystemPrompt?: BaseSystemPromptOption; + maxSteps?: number; + maxTokens?: number; + /** + * When true, the thread used for a chat request against this agent is + * deleted from `ThreadStore` after the stream completes (success or + * failure). Use for stateless one-shot agents — e.g. autocomplete, where + * each request is independent and retaining history would both poison + * future calls and accumulate unbounded state in the default + * `InMemoryThreadStore`. Defaults to `false`. + */ + ephemeral?: boolean; +} + +/** + * Auto-inherit configuration. When enabled for a given agent origin, agents + * with no explicit `tools:` declaration receive every registered ToolProvider + * plugin tool whose author marked `autoInheritable: true`. Tools without that + * flag — destructive, state-mutating, or privilege-sensitive — never spread + * automatically and must be wired via `tools:`, `toolkits:`, or `fromPlugin`. + * + * Defaults are `false` for both origins (safe-by-default): developers must + * consciously opt an origin in to any auto-inherit behaviour. + */ +export interface AutoInheritToolsConfig { + /** Default for agents loaded from markdown files. Default: `false`. */ + file?: boolean; + /** Default for code-defined agents (via `agents: { foo: createAgent(...) }`). Default: `false`. */ + code?: boolean; +} + +export interface AgentsPluginConfig extends BasePluginConfig { + /** Directory of agent packages (`/agent.md` each). Default `./config/agents`. Set to `false` to disable. */ + dir?: string | false; + /** Code-defined agents, merged with file-loaded ones (code wins on key collision). */ + agents?: Record; + /** Agent used when clients don't specify one. Defaults to the first-registered agent or the file with `default: true` frontmatter. */ + defaultAgent?: string; + /** Default model for agents that don't specify their own (in code or frontmatter). */ + defaultModel?: AgentAdapter | Promise | string; + /** Ambient tool library. Keys may be referenced by markdown frontmatter via `tools: [key1, key2]`. */ + tools?: Record; + /** Whether to auto-inherit every ToolProvider plugin's toolkit. Accepts a boolean shorthand. */ + autoInheritTools?: boolean | AutoInheritToolsConfig; + /** Persistent thread store. Default: in-memory. */ + threadStore?: ThreadStore; + /** Customize or disable the AppKit base system prompt. */ + baseSystemPrompt?: BaseSystemPromptOption; + /** + * MCP server host policy. By default only same-origin Databricks workspace + * URLs may be used as MCP endpoints; custom hosts must be explicitly + * allowlisted here. Workspace credentials (SP / OBO) are never forwarded + * to non-workspace hosts. + */ + mcp?: McpHostPolicyConfig; + /** + * Human-in-the-loop approval gate for destructive tool calls. When enabled + * (the default), the agents plugin emits an `appkit.approval_pending` SSE + * event before executing any tool annotated `destructive: true` and waits + * for a `POST /chat/approve` decision from the same user who initiated the + * stream. A missing decision after `timeoutMs` auto-denies the call. + */ + approval?: { + /** Require human approval for tools annotated `destructive: true`. Default: `true`. */ + requireForDestructive?: boolean; + /** Milliseconds to wait before auto-denying. Default: 60_000. */ + timeoutMs?: number; + }; + /** + * Runtime resource limits applied during agent execution. Defaults are + * tuned to protect a single-instance deployment from a misbehaving user or + * a runaway prompt injection; tighten or relax as appropriate for the + * deployment's scale and trust model. Request-body caps (chat message + * size, invocations input size / length) are enforced statically by the + * Zod schemas and are not configurable here. + */ + limits?: { + /** + * Max concurrent chat streams a single user may have open. Subsequent + * `POST /chat` requests from that user while at-limit are rejected with + * HTTP 429. Default: `5`. + */ + maxConcurrentStreamsPerUser?: number; + /** + * Max tool invocations per agent run (across the full tool-call graph, + * including sub-agent invocations). A run that exceeds the budget is + * aborted with a terminal error event. Default: `50`. + */ + maxToolCalls?: number; + /** + * Max sub-agent recursion depth. Protects against a prompt-injected + * agent that delegates to a sub-agent which in turn delegates back to + * itself (directly or transitively). Default: `3`. + */ + maxSubAgentDepth?: number; + }; +} + +/** Internal tool-index entry after a tool record has been resolved to a dispatchable form. */ +export type ResolvedToolEntry = + | { + source: "toolkit"; + pluginName: string; + localName: string; + def: AgentToolDefinition; + } + | { + source: "function"; + functionTool: FunctionTool; + def: AgentToolDefinition; + } + | { + source: "mcp"; + mcpToolName: string; + def: AgentToolDefinition; + } + | { + source: "subagent"; + agentName: string; + def: AgentToolDefinition; + }; + +export interface RegisteredAgent { + name: string; + instructions: string; + adapter: AgentAdapter; + toolIndex: Map; + baseSystemPrompt?: BaseSystemPromptOption; + maxSteps?: number; + maxTokens?: number; + /** Mirrors `AgentDefinition.ephemeral` — skip thread persistence. */ + ephemeral?: boolean; +} + +/** + * Type guard for `ToolkitEntry` — used by the agents plugin to differentiate + * toolkit references from inline tools in a mixed `tools` record. */ export function isToolkitEntry(value: unknown): value is ToolkitEntry { return ( diff --git a/packages/shared/src/agent.ts b/packages/shared/src/agent.ts index c4f76b29..8e34d5fb 100644 --- a/packages/shared/src/agent.ts +++ b/packages/shared/src/agent.ts @@ -86,7 +86,21 @@ export type AgentEvent = status: "running" | "waiting" | "complete" | "error"; error?: string; } - | { type: "metadata"; data: Record }; + | { type: "metadata"; data: Record } + | { + /** + * Emitted by the agents plugin (not adapters) when a tool call annotated + * `destructive: true` is awaiting human approval. Clients should render + * an approval prompt and POST to `/chat/approve` with the matching + * `approvalId` and a `decision` of `approve` or `deny`. + */ + type: "approval_pending"; + approvalId: string; + streamId: string; + toolName: string; + args: unknown; + annotations?: ToolAnnotations; + }; // --------------------------------------------------------------------------- // Responses API types (OpenAI-compatible wire format for HTTP boundary) @@ -178,6 +192,23 @@ export interface AppKitMetadataEvent { sequence_number: number; } +/** + * Emitted when a destructive tool call is awaiting human approval. The client + * should render an approval UI and POST the decision to `/chat/approve` with + * `{ streamId, approvalId, decision: "approve" | "deny" }`. If no decision + * arrives before the server-side timeout, the call is auto-denied and the + * agent receives a denial string as the tool output. + */ +export interface AppKitApprovalPendingEvent { + type: "appkit.approval_pending"; + approval_id: string; + stream_id: string; + tool_name: string; + args: unknown; + annotations?: ToolAnnotations; + sequence_number: number; +} + export type ResponseStreamEvent = | ResponseOutputItemAddedEvent | ResponseOutputItemDoneEvent @@ -186,7 +217,8 @@ export type ResponseStreamEvent = | ResponseErrorEvent | ResponseFailedEvent | AppKitThinkingEvent - | AppKitMetadataEvent; + | AppKitMetadataEvent + | AppKitApprovalPendingEvent; // --------------------------------------------------------------------------- // Adapter contract diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 16079b1d..307f44cf 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -305,6 +305,9 @@ importers: express: specifier: 4.22.0 version: 4.22.0 + js-yaml: + specifier: ^4.1.1 + version: 4.1.1 obug: specifier: 2.1.1 version: 2.1.1 @@ -339,6 +342,9 @@ importers: '@types/express': specifier: 4.17.25 version: 4.17.25 + '@types/js-yaml': + specifier: ^4.0.9 + version: 4.0.9 '@types/json-schema': specifier: 7.0.15 version: 7.0.15 @@ -4989,6 +4995,9 @@ packages: '@types/istanbul-reports@3.0.4': resolution: {integrity: sha512-pk2B1NWalF9toCRu6gjBzR69syFjP4Od8WRAX+0mmf9lAjCRicLOWc+ZrxZHx/0XRjotgkF9t6iaMJ+aXcOdZQ==} + '@types/js-yaml@4.0.9': + resolution: {integrity: sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg==} + '@types/jsesc@2.5.1': resolution: {integrity: sha512-9VN+6yxLOPLOav+7PwjZbxiID2bVaeq0ED4qSQmdQTdjnXJSaCVKTR58t15oqH1H5t8Ng2ZX1SabJVoN9Q34bw==} @@ -17421,6 +17430,8 @@ snapshots: dependencies: '@types/istanbul-lib-report': 3.0.3 + '@types/js-yaml@4.0.9': {} + '@types/jsesc@2.5.1': {} '@types/json-schema@7.0.15': {} diff --git a/template/appkit.plugins.json b/template/appkit.plugins.json index d1420d2e..c8589f9a 100644 --- a/template/appkit.plugins.json +++ b/template/appkit.plugins.json @@ -2,6 +2,30 @@ "$schema": "https://databricks.github.io/appkit/schemas/template-plugins.schema.json", "version": "1.0", "plugins": { + "agents": { + "name": "agents", + "displayName": "Agents Plugin", + "description": "AI agents driven by markdown configs or code, with auto-tool-discovery from registered plugins", + "package": "@databricks/appkit", + "resources": { + "required": [], + "optional": [ + { + "type": "serving_endpoint", + "alias": "Model Serving (agents)", + "resourceKey": "agents-serving-endpoint", + "description": "Databricks Model Serving endpoint for agents using workspace-hosted models (`DatabricksAdapter.fromModelServing`). Wire the same endpoint name AppKit reads from `DATABRICKS_AGENT_ENDPOINT` when no per-agent model is configured. Omit when agents use only external adapters.", + "permission": "CAN_QUERY", + "fields": { + "name": { + "env": "DATABRICKS_AGENT_ENDPOINT", + "description": "Endpoint name passed to Model Serving when agents default to `DatabricksAdapter.fromModelServing()`" + } + } + } + ] + } + }, "analytics": { "name": "analytics", "displayName": "Analytics Plugin",