From 076ed431d504689b80d20426b130af285bc57177 Mon Sep 17 00:00:00 2001 From: Georges Garnier Date: Mon, 27 Apr 2026 13:17:47 +0200 Subject: [PATCH 1/5] =?UTF-8?q?feat(p4):=20native=20tools=20for=20agents?= =?UTF-8?q?=20=E2=80=94=20bash=20+=20file-write=20inside=20the=20sandbox?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agents can now call two native tools by emitting fenced forge:* blocks in their reply. The runtime parses the first block per turn, executes it, feeds the structured result back as a user message, and loops up to maxTurns (capped at 10). Tools live in tools-core/src/runtime/ and stay distinct from the host-side FileWrite : they are sandboxed to /workspace, overwrite by default (in-sandbox iteration), and surface stdout/stderr/exit/timed-out to the LLM. DockerLaunch now bind-mounts a per-run host directory at /workspace so the agent has somewhere writable, and so artifacts survive the container (used later in P5 for extraction). Runtime mode switches automatically based on AGENT.md.maxTurns : single turn keeps the P3 one-shot path, multi-turn enables the tool loop and prepends a TOOLS section to the system prompt explaining the protocol. Tool output is wrapped in [forge:tool] / [/forge:tool] markers on stdout so the host TUI can route it to its action card instead of mixing it with prose. Tests : - agent-side parser (none / tool / invalid / first-block-only) - runtime FileWrite path traversal, sandbox escape, overwrite - runtime Bash stdout/stderr/exit/timeout, cwd respected Tests use a FORGE_WORKSPACE override so they don't try to touch /workspace on the host. --- packages/runtime/src/index.ts | 146 +++++++++++++++--- packages/runtime/src/tool-protocol.ts | 135 ++++++++++++++++ packages/runtime/tests/tool-protocol.test.ts | 74 +++++++++ packages/tools-core/src/docker-launch.ts | 18 ++- packages/tools-core/src/index.ts | 17 ++ packages/tools-core/src/runtime/bash.ts | 104 +++++++++++++ packages/tools-core/src/runtime/file-write.ts | 76 +++++++++ .../tools-core/tests/runtime-bash.test.ts | 52 +++++++ .../tests/runtime-file-write.test.ts | 99 ++++++++++++ 9 files changed, 696 insertions(+), 25 deletions(-) create mode 100644 packages/runtime/src/tool-protocol.ts create mode 100644 packages/runtime/tests/tool-protocol.test.ts create mode 100644 packages/tools-core/src/runtime/bash.ts create mode 100644 packages/tools-core/src/runtime/file-write.ts create mode 100644 packages/tools-core/tests/runtime-bash.test.ts create mode 100644 packages/tools-core/tests/runtime-file-write.test.ts diff --git a/packages/runtime/src/index.ts b/packages/runtime/src/index.ts index 19d2732..d8b3ab1 100644 --- a/packages/runtime/src/index.ts +++ b/packages/runtime/src/index.ts @@ -4,19 +4,33 @@ // // 1. Standalone (P1) : reads a prompt from stdin, calls an OpenAI- // compatible LLM endpoint, streams the answer to stdout. No agent -// configuration required. +// configuration required, no tool loop. // -// 2. Agent mode (P3.4) : if an AGENT.md is mounted at /agent/AGENT.md, -// its frontmatter overrides the model and its body becomes the -// system prompt. The prompt from stdin is the user message. +// 2. Agent mode (P3+) : reads /agent/AGENT.md (frontmatter overrides +// the model, body becomes the system prompt). The user prompt comes +// from stdin. Native tools are available via fenced forge:* blocks +// (P4) — the runtime parses them, executes the tool, feeds the +// result back into the conversation, and loops up to maxTurns. // -// The output is STREAMED token by token to stdout so the host can render -// progress live in the TUI. +// Output is STREAMED token by token to stdout so the host can render +// progress live in the TUI. Tool results are also written to stdout +// inside [forge:tool] markers so the host can show them in Mission +// Control without re-running the parser. import { readFileSync } from 'node:fs' import { createOpenAI } from '@ai-sdk/openai' import { parseAgentMd } from '@agent-forge/core/types' -import { streamText } from 'ai' +import { + executeBash, + executeRuntimeFileWrite, +} from '@agent-forge/tools-core' +import { type CoreMessage, streamText } from 'ai' +import { + parseFirstToolBlock, + renderBashResult, + renderInvalid, + renderWriteResult, +} from './tool-protocol.ts' const AGENT_MD_PATH = '/agent/AGENT.md' @@ -25,16 +39,18 @@ const API_KEY = process.env.FORGE_API_KEY ?? 'not-needed' const ENV_MODEL = process.env.FORGE_MODEL ?? 'mlx-community/Qwen2.5-7B-Instruct-4bit' const MAX_TOKENS = Number(process.env.FORGE_MAX_TOKENS ?? '1024') +// Hard cap to prevent runaway loops even if AGENT.md says otherwise. +const MAX_TURNS_HARD_CAP = 10 type AgentConfig = { model: string systemPrompt?: string agentName?: string + maxTurns: number } function loadAgentConfig(): AgentConfig { - // Default config when no AGENT.md is mounted (standalone P1 mode). - let config: AgentConfig = { model: ENV_MODEL } + let config: AgentConfig = { model: ENV_MODEL, maxTurns: 1 } try { const raw = readFileSync(AGENT_MD_PATH, 'utf8') const parsed = parseAgentMd(raw) @@ -42,11 +58,9 @@ function loadAgentConfig(): AgentConfig { model: parsed.meta.model ?? ENV_MODEL, systemPrompt: parsed.body.length > 0 ? parsed.body : undefined, agentName: parsed.meta.name, + maxTurns: Math.min(parsed.meta.maxTurns ?? 1, MAX_TURNS_HARD_CAP), } } catch (err) { - // ENOENT means standalone mode, that is fine. Anything else is fatal : - // a malformed AGENT.md would otherwise silently fall back to the - // default model + no system prompt, which is misleading. const code = (err as NodeJS.ErrnoException).code if (code !== 'ENOENT') { console.error( @@ -68,28 +82,112 @@ async function readStdin(): Promise { return Buffer.concat(chunks).toString('utf8').trim() } +const TOOL_PROMPT = ` + +You have access to two native tools, callable by emitting a fenced block in your reply. + +## forge:bash — execute a shell command + +\`\`\`forge:bash +{ "command": "ls -la", "timeoutMs": 10000 } +\`\`\` + +The command runs via \`bash -lc\` inside /workspace. \`timeoutMs\` is optional (default 30000, max 120000). The result (stdout, stderr, exit code) will be injected back into the conversation on the next turn. + +## forge:write — write a file in /workspace + +\`\`\`forge:write +{ "path": "src/index.ts", "content": "export const x = 1\\n" } +\`\`\` + +\`path\` is relative to /workspace (or an absolute path under /workspace). Existing files are overwritten. The result (absolute path, bytes written, or an error) will be injected back into the conversation on the next turn. + +## Iteration + +- Emit at most ONE block per reply. Anything you write before the block is shown to the user. Anything after the block is discarded. +- After you receive a tool result, decide whether you need another tool call or whether you can produce the final answer. +- When you are done, reply with plain text (no fenced block). +` + +function buildSystem(config: AgentConfig, hasTools: boolean): string | undefined { + const base = config.systemPrompt ?? '' + if (!hasTools) return base.length > 0 ? base : undefined + return base.length > 0 ? `${base}${TOOL_PROMPT}` : TOOL_PROMPT.trim() +} + +async function streamOneTurn( + provider: ReturnType, + model: string, + system: string | undefined, + messages: CoreMessage[], +): Promise { + const result = streamText({ + model: provider(model), + system, + messages, + maxTokens: MAX_TOKENS, + }) + let acc = '' + for await (const chunk of result.textStream) { + process.stdout.write(chunk) + acc += chunk + } + return acc +} + +async function executeToolBlock( + parsed: Extract, { kind: 'tool' }>, +): Promise { + const tool = parsed.tool + if (tool.kind === 'bash') { + const result = await executeBash(tool.input) + return renderBashResult(tool.input, result) + } + // tool.kind === 'write' + const result = executeRuntimeFileWrite(tool.input) + return renderWriteResult(tool.input, result) +} + async function main(): Promise { const config = loadAgentConfig() - const prompt = await readStdin() - if (!prompt) { + const userPrompt = await readStdin() + if (!userPrompt) { console.error('✗ no prompt received on stdin') process.exit(1) } const provider = createOpenAI({ baseURL: BASE_URL, apiKey: API_KEY }) + const hasTools = config.maxTurns > 1 + const system = buildSystem(config, hasTools) - const result = streamText({ - model: provider(config.model), - system: config.systemPrompt, - prompt, - maxTokens: MAX_TOKENS, - }) + const messages: CoreMessage[] = [{ role: 'user', content: userPrompt }] - for await (const chunk of result.textStream) { - process.stdout.write(chunk) + for (let turn = 0; turn < config.maxTurns; turn += 1) { + const reply = await streamOneTurn(provider, config.model, system, messages) + process.stdout.write('\n') + + if (!hasTools) break + + const parsed = parseFirstToolBlock(reply) + if (parsed.kind === 'none') break + + // Record what the LLM just said (text + raw block) so the next turn + // sees it as a real assistant message. + messages.push({ role: 'assistant', content: reply }) + + let toolReply: string + if (parsed.kind === 'invalid') { + toolReply = renderInvalid(parsed.error) + } else { + toolReply = await executeToolBlock(parsed) + } + + // Mark tool output for the host TUI so it can render it inside the + // Mission Control card instead of mixing it with prose. + process.stdout.write(`\n[forge:tool]\n${toolReply}\n[/forge:tool]\n`) + + messages.push({ role: 'user', content: toolReply }) } - // Trailing newline so the host can detect the end of the stream cleanly. - process.stdout.write('\n') } main().catch((err) => { diff --git a/packages/runtime/src/tool-protocol.ts b/packages/runtime/src/tool-protocol.ts new file mode 100644 index 0000000..5942b47 --- /dev/null +++ b/packages/runtime/src/tool-protocol.ts @@ -0,0 +1,135 @@ +// Agent-side tool protocol — fenced blocks the agent emits to invoke a +// native tool, and the rendering of tool results back to the LLM. +// +// We deliberately mirror the builder's text-structured protocol (forge:write +// and forge:run) instead of using OpenAI tool_calls for two reasons : +// 1. Local LLMs (MLX, llama.cpp) often don't honor tool_calls. +// 2. A consistent protocol across builder and agents simplifies debugging +// and lets users read the raw stream. +// +// Block grammar : +// +// ```forge:bash +// { "command": "ls -la" } +// ``` +// +// ```forge:write +// { "path": "src/index.ts", "content": "..." } +// ``` +// +// Only ONE block is parsed per turn (the first encountered). Everything +// before the block is treated as the agent's "thinking out loud" text and +// streamed to the host. Everything after the block is dropped — the agent +// will see the tool result on the next turn and continue from there. + +import { z } from 'zod' +import { + BashInputSchema, + RuntimeFileWriteInputSchema, + type BashInput, + type BashResult, + type RuntimeFileWriteInput, + type RuntimeFileWriteResult, +} from '@agent-forge/tools-core' + +export type ParsedTool = + | { kind: 'bash'; input: BashInput; raw: string } + | { kind: 'write'; input: RuntimeFileWriteInput; raw: string } + +export type ParseOutcome = + | { kind: 'none'; text: string } + | { kind: 'invalid'; text: string; error: string; raw: string } + | { kind: 'tool'; text: string; tool: ParsedTool } + +const FENCE_RE = /```forge:(bash|write)\s*\n([\s\S]*?)```/ + +export function parseFirstToolBlock(stream: string): ParseOutcome { + const m = FENCE_RE.exec(stream) + if (!m) return { kind: 'none', text: stream } + + const tag = m[1] as 'bash' | 'write' + const body = m[2] ?? '' + const before = stream.slice(0, m.index) + + let parsed: unknown + try { + parsed = JSON.parse(body) + } catch (err) { + return { + kind: 'invalid', + text: before, + error: `forge:${tag} block is not valid JSON : ${ + err instanceof Error ? err.message : String(err) + }`, + raw: m[0], + } + } + + if (tag === 'bash') { + const result = BashInputSchema.safeParse(parsed) + if (!result.success) { + return { + kind: 'invalid', + text: before, + error: `forge:bash input failed validation : ${formatZodError(result.error)}`, + raw: m[0], + } + } + return { + kind: 'tool', + text: before, + tool: { kind: 'bash', input: result.data, raw: m[0] }, + } + } + + // tag === 'write' + const result = RuntimeFileWriteInputSchema.safeParse(parsed) + if (!result.success) { + return { + kind: 'invalid', + text: before, + error: `forge:write input failed validation : ${formatZodError(result.error)}`, + raw: m[0], + } + } + return { + kind: 'tool', + text: before, + tool: { kind: 'write', input: result.data, raw: m[0] }, + } +} + +function formatZodError(err: z.ZodError): string { + return err.errors + .map((e) => `${e.path.join('.') || '(root)'}: ${e.message}`) + .join(' ; ') +} + +// Render a tool result as the message we feed back to the LLM on the next +// turn. We use the same fenced format so the agent can recognize it as +// "the result of MY previous call". +export function renderBashResult( + input: BashInput, + result: BashResult, +): string { + const head = `[forge:bash result] command="${input.command}" exit=${result.exitCode.toString()}${ + result.timedOut ? ' (timed out)' : '' + }` + const stdout = result.stdout.length > 0 ? `\n--- stdout ---\n${result.stdout}` : '' + const stderr = result.stderr.length > 0 ? `\n--- stderr ---\n${result.stderr}` : '' + return `${head}${stdout}${stderr}` +} + +export function renderWriteResult( + input: RuntimeFileWriteInput, + result: RuntimeFileWriteResult, +): string { + if (result.ok) { + return `[forge:write result] wrote ${result.absolutePath} (${result.bytes.toString()} bytes)` + } + return `[forge:write result] FAILED on path="${input.path}" : ${result.error}` +} + +export function renderInvalid(error: string): string { + return `[forge:tool error] ${error}\n\nFix the JSON or schema and try again.` +} diff --git a/packages/runtime/tests/tool-protocol.test.ts b/packages/runtime/tests/tool-protocol.test.ts new file mode 100644 index 0000000..e699fd7 --- /dev/null +++ b/packages/runtime/tests/tool-protocol.test.ts @@ -0,0 +1,74 @@ +// Tests for the agent-side tool block parser. Pure : no FS, no spawn. + +import { describe, expect, test } from 'bun:test' +import { parseFirstToolBlock } from '../src/tool-protocol.ts' + +describe('parseFirstToolBlock', () => { + test('returns kind=none on plain text', () => { + const r = parseFirstToolBlock('just a sentence with no block') + expect(r.kind).toBe('none') + }) + + test('parses a forge:bash block with prose before it', () => { + const stream = [ + 'I will list the workspace contents.', + '', + '```forge:bash', + '{ "command": "ls -la" }', + '```', + '', + 'After the block — should be ignored.', + ].join('\n') + const r = parseFirstToolBlock(stream) + expect(r.kind).toBe('tool') + if (r.kind === 'tool') { + expect(r.text.startsWith('I will list')).toBe(true) + expect(r.tool.kind).toBe('bash') + if (r.tool.kind === 'bash') expect(r.tool.input.command).toBe('ls -la') + } + }) + + test('parses a forge:write block', () => { + const stream = [ + '```forge:write', + '{ "path": "notes.md", "content": "# hi\\n" }', + '```', + ].join('\n') + const r = parseFirstToolBlock(stream) + expect(r.kind).toBe('tool') + if (r.kind === 'tool' && r.tool.kind === 'write') { + expect(r.tool.input.path).toBe('notes.md') + expect(r.tool.input.content).toBe('# hi\n') + } + }) + + test('returns kind=invalid when JSON is malformed', () => { + const stream = '```forge:bash\n{ not json }\n```' + const r = parseFirstToolBlock(stream) + expect(r.kind).toBe('invalid') + if (r.kind === 'invalid') expect(r.error).toContain('not valid JSON') + }) + + test('returns kind=invalid when schema is wrong', () => { + const stream = '```forge:bash\n{ "command": "" }\n```' + const r = parseFirstToolBlock(stream) + expect(r.kind).toBe('invalid') + if (r.kind === 'invalid') expect(r.error).toContain('failed validation') + }) + + test('only the first block matters', () => { + const stream = [ + '```forge:bash', + '{ "command": "echo a" }', + '```', + '```forge:bash', + '{ "command": "echo b" }', + '```', + ].join('\n') + const r = parseFirstToolBlock(stream) + expect(r.kind).toBe('tool') + if (r.kind === 'tool' && r.tool.kind === 'bash') { + expect(r.tool.input.command).toBe('echo a') + } + }) +}) diff --git a/packages/tools-core/src/docker-launch.ts b/packages/tools-core/src/docker-launch.ts index c7f5594..5544e00 100644 --- a/packages/tools-core/src/docker-launch.ts +++ b/packages/tools-core/src/docker-launch.ts @@ -9,7 +9,7 @@ // agents can run in parallel without collision. import { spawn, spawnSync } from 'node:child_process' -import { existsSync } from 'node:fs' +import { existsSync, mkdirSync } from 'node:fs' import { join } from 'node:path' import { z } from 'zod' import { FORGE_HOME } from './file-write.ts' @@ -75,6 +75,16 @@ export function launchAgent(input: DockerLaunchInput): LaunchHandle { spawnSync('docker', ['rm', '-f', containerName], { stdio: 'ignore' }) } + // Per-run workspace on the host, bind-mounted RW into the container so + // tools (forge:bash, forge:write) have a sandbox they can scribble in. + // Kept after the container exits — useful for debugging and for P5 + // artifact extraction. + const workspaceHostDir = join( + FORGE_HOME, + 'workspaces', + containerName, + ) + async function* run(): AsyncGenerator { if (!existsSync(agentMdPath)) { yield { type: 'error', error: `AGENT.md not found : ${agentMdPath}` } @@ -90,6 +100,8 @@ export function launchAgent(input: DockerLaunchInput): LaunchHandle { return } + mkdirSync(workspaceHostDir, { recursive: true }) + const args = [ 'run', '--rm', @@ -100,6 +112,10 @@ export function launchAgent(input: DockerLaunchInput): LaunchHandle { `${agentMdPath}:/agent/AGENT.md:ro`, '-v', `${RUNTIME_DIST_FROM_TOOLS}:/runtime:ro`, + '-v', + `${workspaceHostDir}:/workspace`, + '-w', + '/workspace', ...inheritEnv(), IMAGE, 'node', diff --git a/packages/tools-core/src/index.ts b/packages/tools-core/src/index.ts index 0fb06f6..ac20361 100644 --- a/packages/tools-core/src/index.ts +++ b/packages/tools-core/src/index.ts @@ -21,3 +21,20 @@ export { type DockerLaunchInput, type LaunchHandle, } from './docker-launch.ts' + +// Runtime-side tools — used INSIDE the agent's container, sandboxed to +// /workspace. Distinct from the host-side FileWrite above. +export { + BashInputSchema, + WORKSPACE_DIR, + executeBash, + type BashInput, + type BashResult, +} from './runtime/bash.ts' + +export { + RuntimeFileWriteInputSchema, + executeRuntimeFileWrite, + type RuntimeFileWriteInput, + type RuntimeFileWriteResult, +} from './runtime/file-write.ts' diff --git a/packages/tools-core/src/runtime/bash.ts b/packages/tools-core/src/runtime/bash.ts new file mode 100644 index 0000000..c039d42 --- /dev/null +++ b/packages/tools-core/src/runtime/bash.ts @@ -0,0 +1,104 @@ +// Bash — execute a shell command inside an agent's container. +// +// Runs INSIDE the container (called from @agent-forge/runtime). Wraps the +// command with `bash -lc` so simple shell features (pipes, &&, $VAR) just +// work. The cwd is locked to /workspace : the agent never sees anything +// outside its sandbox. A timeout (default 30s) prevents runaway commands +// from blocking the tool loop. +// +// Returns a structured result (stdout, stderr, exitCode, timedOut). The +// caller is responsible for formatting it back into a message the LLM will +// read on the next turn. + +import { spawn } from 'node:child_process' +import { z } from 'zod' + +export const WORKSPACE_DIR = '/workspace' + +// Tests on the host don't have /workspace. The runtime always uses +// WORKSPACE_DIR when running inside the container ; tests can point this +// at a temp dir via FORGE_WORKSPACE. +function bashCwd(): string { + return process.env.FORGE_WORKSPACE ?? WORKSPACE_DIR +} + +export const BashInputSchema = z.object({ + command: z + .string() + .min(1) + .describe( + 'Shell command to execute inside the agent sandbox. Run via `bash -lc`. The current directory is /workspace.', + ), + timeoutMs: z + .number() + .int() + .positive() + .max(120_000) + .optional() + .describe('Hard timeout in milliseconds. Defaults to 30000. Capped at 120000.'), +}) + +export type BashInput = z.infer + +export type BashResult = { + stdout: string + stderr: string + exitCode: number + timedOut: boolean +} + +const DEFAULT_TIMEOUT_MS = 30_000 +// Cap captured streams so a runaway command can't blow the LLM context. +const MAX_OUTPUT_BYTES = 16_384 + +function clip(text: string): string { + if (Buffer.byteLength(text, 'utf8') <= MAX_OUTPUT_BYTES) return text + const head = text.slice(0, MAX_OUTPUT_BYTES) + return `${head}\n…[output truncated at ${MAX_OUTPUT_BYTES.toString()} bytes]` +} + +export async function executeBash(input: BashInput): Promise { + const timeoutMs = input.timeoutMs ?? DEFAULT_TIMEOUT_MS + return await new Promise((resolve) => { + const child = spawn('bash', ['-lc', input.command], { + cwd: bashCwd(), + stdio: ['ignore', 'pipe', 'pipe'], + }) + + let stdout = '' + let stderr = '' + let timedOut = false + + const timer = setTimeout(() => { + timedOut = true + child.kill('SIGKILL') + }, timeoutMs) + + child.stdout.on('data', (b: Buffer) => { + stdout += b.toString('utf8') + }) + child.stderr.on('data', (b: Buffer) => { + stderr += b.toString('utf8') + }) + + child.on('error', (err) => { + clearTimeout(timer) + resolve({ + stdout: clip(stdout), + stderr: clip(`${stderr}${err.message}`), + exitCode: -1, + timedOut, + }) + }) + + child.on('close', (code) => { + clearTimeout(timer) + resolve({ + stdout: clip(stdout), + stderr: clip(stderr), + exitCode: code ?? -1, + timedOut, + }) + }) + }) +} diff --git a/packages/tools-core/src/runtime/file-write.ts b/packages/tools-core/src/runtime/file-write.ts new file mode 100644 index 0000000..f568eee --- /dev/null +++ b/packages/tools-core/src/runtime/file-write.ts @@ -0,0 +1,76 @@ +// FileWrite (runtime) — write a file under /workspace from inside the +// agent's container. +// +// Distinct from packages/tools-core/src/file-write.ts which writes under +// the host's ~/.agent-forge/. The runtime version is sandboxed to +// /workspace : the agent has no way to escape its container's mount. +// +// Path traversal (..), null bytes, and absolute paths outside /workspace +// are refused. Existing files are overwritten by default — unlike the +// host tool which is strict — because in-sandbox iteration is expected +// (agents often rewrite their own files mid-loop). +// +// The sandbox root defaults to /workspace (the in-container mount) but +// can be overridden via FORGE_WORKSPACE — useful for tests that want to +// run on the host without touching /workspace. + +import { mkdirSync, writeFileSync } from 'node:fs' +import { dirname, isAbsolute, join, resolve } from 'node:path' +import { z } from 'zod' +import { WORKSPACE_DIR } from './bash.ts' + +function sandboxRoot(): string { + return process.env.FORGE_WORKSPACE ?? WORKSPACE_DIR +} + +export const RuntimeFileWriteInputSchema = z.object({ + path: z + .string() + .min(1) + .describe( + 'Path inside the agent sandbox (/workspace). Either relative ("notes.md") or absolute under /workspace ("/workspace/src/index.ts"). Paths outside /workspace are rejected.', + ), + content: z.string().describe('Full file content to write.'), +}) + +export type RuntimeFileWriteInput = z.infer + +export type RuntimeFileWriteResult = + | { ok: true; absolutePath: string; bytes: number } + | { ok: false; error: string } + +export function resolveSandboxedPath(rawPath: string): + | { ok: true; absolutePath: string } + | { ok: false; error: string } { + if (rawPath.includes('\0')) { + return { ok: false, error: 'path contains a null byte' } + } + const root = sandboxRoot() + const target = isAbsolute(rawPath) ? rawPath : join(root, rawPath) + const resolved = resolve(target) + if (resolved !== root && !resolved.startsWith(`${root}/`)) { + return { + ok: false, + error: `path escapes the agent sandbox (${root})`, + } + } + return { ok: true, absolutePath: resolved } +} + +export function executeRuntimeFileWrite( + input: RuntimeFileWriteInput, +): RuntimeFileWriteResult { + const safe = resolveSandboxedPath(input.path) + if (!safe.ok) return safe + try { + mkdirSync(dirname(safe.absolutePath), { recursive: true }) + writeFileSync(safe.absolutePath, input.content, 'utf8') + return { + ok: true, + absolutePath: safe.absolutePath, + bytes: Buffer.byteLength(input.content, 'utf8'), + } + } catch (err) { + return { ok: false, error: err instanceof Error ? err.message : String(err) } + } +} diff --git a/packages/tools-core/tests/runtime-bash.test.ts b/packages/tools-core/tests/runtime-bash.test.ts new file mode 100644 index 0000000..b0b0d10 --- /dev/null +++ b/packages/tools-core/tests/runtime-bash.test.ts @@ -0,0 +1,52 @@ +// Round-trip tests for the runtime-side Bash tool. +// Uses FORGE_WORKSPACE so the cwd is a temp dir, not /workspace. + +import { afterAll, beforeAll, describe, expect, test } from 'bun:test' +import { mkdtempSync, rmSync, writeFileSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' + +let TMP_WORKSPACE: string +const ORIGINAL_ENV = process.env.FORGE_WORKSPACE + +beforeAll(() => { + TMP_WORKSPACE = mkdtempSync(join(tmpdir(), 'forge-rt-bash-')) + process.env.FORGE_WORKSPACE = TMP_WORKSPACE +}) + +afterAll(() => { + if (ORIGINAL_ENV === undefined) delete process.env.FORGE_WORKSPACE + else process.env.FORGE_WORKSPACE = ORIGINAL_ENV + rmSync(TMP_WORKSPACE, { recursive: true, force: true }) +}) + +const { executeBash } = await import('../src/runtime/bash.ts') + +describe('executeBash', () => { + test('captures stdout from a simple command', async () => { + const r = await executeBash({ command: 'echo hello' }) + expect(r.exitCode).toBe(0) + expect(r.stdout.trim()).toBe('hello') + expect(r.stderr).toBe('') + expect(r.timedOut).toBe(false) + }) + + test('captures stderr and a non-zero exit code', async () => { + const r = await executeBash({ command: 'echo oops 1>&2 ; exit 7' }) + expect(r.exitCode).toBe(7) + expect(r.stderr.trim()).toBe('oops') + }) + + test('runs in the sandbox cwd', async () => { + writeFileSync(join(TMP_WORKSPACE, 'marker.txt'), 'present') + const r = await executeBash({ command: 'cat marker.txt' }) + expect(r.exitCode).toBe(0) + expect(r.stdout).toBe('present') + }) + + test('honors a tight timeout', async () => { + const r = await executeBash({ command: 'sleep 5', timeoutMs: 200 }) + expect(r.timedOut).toBe(true) + expect(r.exitCode).not.toBe(0) + }) +}) diff --git a/packages/tools-core/tests/runtime-file-write.test.ts b/packages/tools-core/tests/runtime-file-write.test.ts new file mode 100644 index 0000000..8087566 --- /dev/null +++ b/packages/tools-core/tests/runtime-file-write.test.ts @@ -0,0 +1,99 @@ +// Security and round-trip tests for the runtime-side FileWrite tool. +// Uses FORGE_WORKSPACE to point the sandbox at a temp dir so the tests +// don't try to write to /workspace on the host. + +import { afterAll, afterEach, beforeAll, describe, expect, test } from 'bun:test' +import { existsSync, mkdtempSync, readFileSync, rmSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' + +let TMP_WORKSPACE: string +const ORIGINAL_ENV = process.env.FORGE_WORKSPACE + +beforeAll(() => { + TMP_WORKSPACE = mkdtempSync(join(tmpdir(), 'forge-rt-fw-')) + process.env.FORGE_WORKSPACE = TMP_WORKSPACE +}) + +afterAll(() => { + if (ORIGINAL_ENV === undefined) delete process.env.FORGE_WORKSPACE + else process.env.FORGE_WORKSPACE = ORIGINAL_ENV + rmSync(TMP_WORKSPACE, { recursive: true, force: true }) +}) + +// Late import so module-level reads of process.env happen after we set it. +const { + executeRuntimeFileWrite, + resolveSandboxedPath, +} = await import('../src/runtime/file-write.ts') + +afterEach(() => { + // Wipe contents but keep the dir itself so the env var stays valid. + for (const entry of [ + 'a.txt', + 'sub/b.txt', + 'sub', + 'overwrite-me.txt', + ]) { + const p = join(TMP_WORKSPACE, entry) + if (existsSync(p)) rmSync(p, { recursive: true, force: true }) + } +}) + +describe('resolveSandboxedPath (runtime)', () => { + test('accepts a relative path under the sandbox', () => { + const r = resolveSandboxedPath('a.txt') + expect(r.ok).toBe(true) + if (r.ok) expect(r.absolutePath).toBe(join(TMP_WORKSPACE, 'a.txt')) + }) + + test('rejects path traversal', () => { + const r = resolveSandboxedPath('../escape.txt') + expect(r.ok).toBe(false) + }) + + test('rejects absolute path outside the sandbox', () => { + const r = resolveSandboxedPath('/etc/passwd') + expect(r.ok).toBe(false) + }) + + test('rejects null byte', () => { + const r = resolveSandboxedPath('foo\0bar') + expect(r.ok).toBe(false) + }) +}) + +describe('executeRuntimeFileWrite', () => { + test('writes a file in the sandbox', () => { + const r = executeRuntimeFileWrite({ path: 'a.txt', content: 'hi' }) + expect(r.ok).toBe(true) + if (r.ok) { + expect(readFileSync(r.absolutePath, 'utf8')).toBe('hi') + expect(r.bytes).toBe(2) + } + }) + + test('creates parent directories', () => { + const r = executeRuntimeFileWrite({ + path: 'sub/b.txt', + content: 'nested', + }) + expect(r.ok).toBe(true) + if (r.ok) expect(readFileSync(r.absolutePath, 'utf8')).toBe('nested') + }) + + test('overwrites an existing file', () => { + executeRuntimeFileWrite({ path: 'overwrite-me.txt', content: 'v1' }) + const r = executeRuntimeFileWrite({ path: 'overwrite-me.txt', content: 'v2' }) + expect(r.ok).toBe(true) + if (r.ok) expect(readFileSync(r.absolutePath, 'utf8')).toBe('v2') + }) + + test('refuses path escaping the sandbox', () => { + const r = executeRuntimeFileWrite({ + path: '../evil.txt', + content: 'x', + }) + expect(r.ok).toBe(false) + }) +}) From cc720f060924bd1422267fc3522c872b826cff7f Mon Sep 17 00:00:00 2001 From: Georges Garnier Date: Mon, 27 Apr 2026 14:02:10 +0200 Subject: [PATCH 2/5] =?UTF-8?q?feat(p4):=20add=20four=20remaining=20native?= =?UTF-8?q?=20tools=20=E2=80=94=20read,=20edit,=20grep,=20glob?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Completes the P4 tool catalog. Agents now have the full six : bash, write, read, edit, grep, glob. All sandboxed to /workspace, all callable via fenced forge:* blocks, all validated by Zod and capped on output size to protect the LLM context. read — line-based offset/limit, 16 KB clip, fails on missing or non-regular files edit — exact substring patch, refuses ambiguous matches unless replaceAll=true, refuses identical old/new grep — pure JS regex over a glob filter, skips binary files (NUL bytes), 200 hits cap, line clipped at 400 chars glob — hand-rolled matcher for *, **, ? (no dep), 200 results cap, walk bounded at 5000 nodes Tool dispatcher in the runtime is now a switch over six branches. System prompt lists all six with their JSON shape. Tests added for each tool plus four new parser cases (forge:read / edit / grep / glob, and a refine-rule violation on edit). resolveSandboxedPath is now exported so tools that don't write but still need the sandbox root (grep, glob) reuse it instead of duplicating the FORGE_WORKSPACE override logic. --- packages/runtime/src/index.ts | 81 +++++++-- packages/runtime/src/tool-protocol.ts | 154 ++++++++++++++---- packages/runtime/tests/tool-protocol.test.ts | 51 ++++++ packages/tools-core/src/index.ts | 30 ++++ packages/tools-core/src/runtime/file-edit.ts | 89 ++++++++++ packages/tools-core/src/runtime/file-read.ts | 93 +++++++++++ packages/tools-core/src/runtime/glob.ts | 123 ++++++++++++++ packages/tools-core/src/runtime/grep.ts | 106 ++++++++++++ .../tests/runtime-file-edit.test.ts | 86 ++++++++++ .../tests/runtime-file-read.test.ts | 55 +++++++ .../tools-core/tests/runtime-glob.test.ts | 53 ++++++ .../tools-core/tests/runtime-grep.test.ts | 62 +++++++ 12 files changed, 938 insertions(+), 45 deletions(-) create mode 100644 packages/tools-core/src/runtime/file-edit.ts create mode 100644 packages/tools-core/src/runtime/file-read.ts create mode 100644 packages/tools-core/src/runtime/glob.ts create mode 100644 packages/tools-core/src/runtime/grep.ts create mode 100644 packages/tools-core/tests/runtime-file-edit.test.ts create mode 100644 packages/tools-core/tests/runtime-file-read.test.ts create mode 100644 packages/tools-core/tests/runtime-glob.test.ts create mode 100644 packages/tools-core/tests/runtime-grep.test.ts diff --git a/packages/runtime/src/index.ts b/packages/runtime/src/index.ts index d8b3ab1..a473609 100644 --- a/packages/runtime/src/index.ts +++ b/packages/runtime/src/index.ts @@ -22,13 +22,21 @@ import { createOpenAI } from '@ai-sdk/openai' import { parseAgentMd } from '@agent-forge/core/types' import { executeBash, + executeRuntimeFileEdit, + executeRuntimeFileRead, executeRuntimeFileWrite, + executeRuntimeGlob, + executeRuntimeGrep, } from '@agent-forge/tools-core' import { type CoreMessage, streamText } from 'ai' import { parseFirstToolBlock, renderBashResult, + renderEditResult, + renderGlobResult, + renderGrepResult, renderInvalid, + renderReadResult, renderWriteResult, } from './tool-protocol.ts' @@ -84,7 +92,7 @@ async function readStdin(): Promise { const TOOL_PROMPT = ` -You have access to two native tools, callable by emitting a fenced block in your reply. +You have access to six native tools, each callable by emitting a fenced block in your reply. ## forge:bash — execute a shell command @@ -92,19 +100,51 @@ You have access to two native tools, callable by emitting a fenced block in your { "command": "ls -la", "timeoutMs": 10000 } \`\`\` -The command runs via \`bash -lc\` inside /workspace. \`timeoutMs\` is optional (default 30000, max 120000). The result (stdout, stderr, exit code) will be injected back into the conversation on the next turn. +Runs via \`bash -lc\` inside /workspace. \`timeoutMs\` defaults to 30000, capped at 120000. -## forge:write — write a file in /workspace +## forge:write — create or overwrite a file \`\`\`forge:write { "path": "src/index.ts", "content": "export const x = 1\\n" } \`\`\` -\`path\` is relative to /workspace (or an absolute path under /workspace). Existing files are overwritten. The result (absolute path, bytes written, or an error) will be injected back into the conversation on the next turn. +\`path\` is relative to /workspace (or absolute under /workspace). Existing files are overwritten. + +## forge:read — read a file + +\`\`\`forge:read +{ "path": "src/index.ts", "offset": 0, "limit": 200 } +\`\`\` + +\`offset\` and \`limit\` are line-based, both optional. Default limit 200, max 2000. Output is clipped at 16 KB ; use offset/limit to walk a long file. + +## forge:edit — patch a file by exact substring replacement + +\`\`\`forge:edit +{ "path": "src/index.ts", "oldString": "const x = 1", "newString": "const x = 2" } +\`\`\` + +\`oldString\` must match exactly once unless you set \`replaceAll\` true. If it matches multiple times, widen the surrounding context until it's unique. + +## forge:grep — regex search across files + +\`\`\`forge:grep +{ "pattern": "TODO|FIXME", "glob": "src/**/*.ts", "ignoreCase": false } +\`\`\` + +\`pattern\` is a JavaScript RegExp source. \`glob\` is optional (defaults to \`**/*\`). Returns up to 200 hits with path:line:text. + +## forge:glob — list files by pattern + +\`\`\`forge:glob +{ "pattern": "src/**/*.ts" } +\`\`\` + +Supports \`*\`, \`**\`, and \`?\`. Returns up to 200 paths relative to /workspace. ## Iteration -- Emit at most ONE block per reply. Anything you write before the block is shown to the user. Anything after the block is discarded. +- Emit at most ONE block per reply. Text before the block is shown to the user. Text after the block is discarded. - After you receive a tool result, decide whether you need another tool call or whether you can produce the final answer. - When you are done, reply with plain text (no fenced block). ` @@ -139,13 +179,32 @@ async function executeToolBlock( parsed: Extract, { kind: 'tool' }>, ): Promise { const tool = parsed.tool - if (tool.kind === 'bash') { - const result = await executeBash(tool.input) - return renderBashResult(tool.input, result) + switch (tool.kind) { + case 'bash': { + const result = await executeBash(tool.input) + return renderBashResult(tool.input, result) + } + case 'write': { + const result = executeRuntimeFileWrite(tool.input) + return renderWriteResult(tool.input, result) + } + case 'read': { + const result = executeRuntimeFileRead(tool.input) + return renderReadResult(tool.input, result) + } + case 'edit': { + const result = executeRuntimeFileEdit(tool.input) + return renderEditResult(tool.input, result) + } + case 'grep': { + const result = executeRuntimeGrep(tool.input) + return renderGrepResult(tool.input, result) + } + case 'glob': { + const result = executeRuntimeGlob(tool.input) + return renderGlobResult(tool.input, result) + } } - // tool.kind === 'write' - const result = executeRuntimeFileWrite(tool.input) - return renderWriteResult(tool.input, result) } async function main(): Promise { diff --git a/packages/runtime/src/tool-protocol.ts b/packages/runtime/src/tool-protocol.ts index 5942b47..b9ea1a0 100644 --- a/packages/runtime/src/tool-protocol.ts +++ b/packages/runtime/src/tool-protocol.ts @@ -7,7 +7,7 @@ // 2. A consistent protocol across builder and agents simplifies debugging // and lets users read the raw stream. // -// Block grammar : +// Six tools wired today : bash, write, read, edit, grep, glob. // // ```forge:bash // { "command": "ls -la" } @@ -17,37 +17,81 @@ // { "path": "src/index.ts", "content": "..." } // ``` // +// ```forge:read +// { "path": "src/index.ts", "offset": 0, "limit": 200 } +// ``` +// +// ```forge:edit +// { "path": "src/index.ts", "oldString": "...", "newString": "..." } +// ``` +// +// ```forge:grep +// { "pattern": "TODO", "glob": "**/*.ts", "ignoreCase": true } +// ``` +// +// ```forge:glob +// { "pattern": "src/**/*.ts" } +// ``` +// // Only ONE block is parsed per turn (the first encountered). Everything -// before the block is treated as the agent's "thinking out loud" text and -// streamed to the host. Everything after the block is dropped — the agent -// will see the tool result on the next turn and continue from there. +// before the block is treated as the agent's "thinking out loud" text +// and streamed to the host. Everything after the block is dropped — the +// agent will see the tool result on the next turn and continue from there. import { z } from 'zod' import { BashInputSchema, + RuntimeFileEditInputSchema, + RuntimeFileReadInputSchema, RuntimeFileWriteInputSchema, + RuntimeGlobInputSchema, + RuntimeGrepInputSchema, type BashInput, type BashResult, + type GrepHit, + type RuntimeFileEditInput, + type RuntimeFileEditResult, + type RuntimeFileReadInput, + type RuntimeFileReadResult, type RuntimeFileWriteInput, type RuntimeFileWriteResult, + type RuntimeGlobInput, + type RuntimeGlobResult, + type RuntimeGrepInput, + type RuntimeGrepResult, } from '@agent-forge/tools-core' +export type ToolKind = 'bash' | 'write' | 'read' | 'edit' | 'grep' | 'glob' + export type ParsedTool = | { kind: 'bash'; input: BashInput; raw: string } | { kind: 'write'; input: RuntimeFileWriteInput; raw: string } + | { kind: 'read'; input: RuntimeFileReadInput; raw: string } + | { kind: 'edit'; input: RuntimeFileEditInput; raw: string } + | { kind: 'grep'; input: RuntimeGrepInput; raw: string } + | { kind: 'glob'; input: RuntimeGlobInput; raw: string } export type ParseOutcome = | { kind: 'none'; text: string } | { kind: 'invalid'; text: string; error: string; raw: string } | { kind: 'tool'; text: string; tool: ParsedTool } -const FENCE_RE = /```forge:(bash|write)\s*\n([\s\S]*?)```/ +const SCHEMAS: Record = { + bash: BashInputSchema, + write: RuntimeFileWriteInputSchema, + read: RuntimeFileReadInputSchema, + edit: RuntimeFileEditInputSchema, + grep: RuntimeGrepInputSchema, + glob: RuntimeGlobInputSchema, +} + +const FENCE_RE = /```forge:(bash|write|read|edit|grep|glob)\s*\n([\s\S]*?)```/ export function parseFirstToolBlock(stream: string): ParseOutcome { const m = FENCE_RE.exec(stream) if (!m) return { kind: 'none', text: stream } - const tag = m[1] as 'bash' | 'write' + const tag = m[1] as ToolKind const body = m[2] ?? '' const before = stream.slice(0, m.index) @@ -65,37 +109,23 @@ export function parseFirstToolBlock(stream: string): ParseOutcome { } } - if (tag === 'bash') { - const result = BashInputSchema.safeParse(parsed) - if (!result.success) { - return { - kind: 'invalid', - text: before, - error: `forge:bash input failed validation : ${formatZodError(result.error)}`, - raw: m[0], - } - } - return { - kind: 'tool', - text: before, - tool: { kind: 'bash', input: result.data, raw: m[0] }, - } - } - - // tag === 'write' - const result = RuntimeFileWriteInputSchema.safeParse(parsed) + const schema = SCHEMAS[tag] + const result = schema.safeParse(parsed) if (!result.success) { return { kind: 'invalid', text: before, - error: `forge:write input failed validation : ${formatZodError(result.error)}`, + error: `forge:${tag} input failed validation : ${formatZodError(result.error)}`, raw: m[0], } } + + // Narrow to the right ParsedTool variant by tag — the schema guarantees + // the data shape matches. return { kind: 'tool', text: before, - tool: { kind: 'write', input: result.data, raw: m[0] }, + tool: { kind: tag, input: result.data, raw: m[0] } as ParsedTool, } } @@ -105,13 +135,11 @@ function formatZodError(err: z.ZodError): string { .join(' ; ') } -// Render a tool result as the message we feed back to the LLM on the next -// turn. We use the same fenced format so the agent can recognize it as -// "the result of MY previous call". -export function renderBashResult( - input: BashInput, - result: BashResult, -): string { +// ── Result renderers : turn each tool's structured result into the +// message we feed back to the LLM on the next turn. Same `[forge:X result]` +// header so the agent recognizes it as the answer to its previous call. + +export function renderBashResult(input: BashInput, result: BashResult): string { const head = `[forge:bash result] command="${input.command}" exit=${result.exitCode.toString()}${ result.timedOut ? ' (timed out)' : '' }` @@ -130,6 +158,64 @@ export function renderWriteResult( return `[forge:write result] FAILED on path="${input.path}" : ${result.error}` } +export function renderReadResult( + input: RuntimeFileReadInput, + result: RuntimeFileReadResult, +): string { + if (!result.ok) { + return `[forge:read result] FAILED on path="${input.path}" : ${result.error}` + } + const head = `[forge:read result] ${result.absolutePath} · lines ${(input.offset ?? 0).toString()}..${( + (input.offset ?? 0) + result.returnedLines + ).toString()} of ${result.totalLines.toString()}${result.truncatedBytes ? ' (clipped)' : ''}` + return `${head}\n--- content ---\n${result.content}` +} + +export function renderEditResult( + input: RuntimeFileEditInput, + result: RuntimeFileEditResult, +): string { + if (result.ok) { + return `[forge:edit result] ${result.absolutePath} · ${result.replacements.toString()} replacement${ + result.replacements === 1 ? '' : 's' + }` + } + return `[forge:edit result] FAILED on path="${input.path}" : ${result.error}` +} + +export function renderGlobResult( + input: RuntimeGlobInput, + result: RuntimeGlobResult, +): string { + if (!result.ok) { + return `[forge:glob result] FAILED on pattern="${input.pattern}" : ${result.error}` + } + const head = `[forge:glob result] ${result.matches.length.toString()} match${ + result.matches.length === 1 ? '' : 'es' + }${result.truncated ? ' (truncated)' : ''}` + if (result.matches.length === 0) return head + return `${head}\n${result.matches.join('\n')}` +} + +export function renderGrepResult( + input: RuntimeGrepInput, + result: RuntimeGrepResult, +): string { + if (!result.ok) { + return `[forge:grep result] FAILED on pattern="${input.pattern}" : ${result.error}` + } + const head = `[forge:grep result] ${result.hits.length.toString()} hit${ + result.hits.length === 1 ? '' : 's' + } across ${result.scanned.toString()} file${result.scanned === 1 ? '' : 's'}${ + result.truncated ? ' (truncated)' : '' + }` + if (result.hits.length === 0) return head + const body = result.hits + .map((h: GrepHit) => `${h.path}:${h.line.toString()}: ${h.text}`) + .join('\n') + return `${head}\n${body}` +} + export function renderInvalid(error: string): string { return `[forge:tool error] ${error}\n\nFix the JSON or schema and try again.` } diff --git a/packages/runtime/tests/tool-protocol.test.ts b/packages/runtime/tests/tool-protocol.test.ts index e699fd7..b05cae1 100644 --- a/packages/runtime/tests/tool-protocol.test.ts +++ b/packages/runtime/tests/tool-protocol.test.ts @@ -71,4 +71,55 @@ describe('parseFirstToolBlock', () => { expect(r.tool.input.command).toBe('echo a') } }) + + test('parses forge:read', () => { + const r = parseFirstToolBlock( + '```forge:read\n{ "path": "src/x.ts", "offset": 10, "limit": 50 }\n```', + ) + expect(r.kind).toBe('tool') + if (r.kind === 'tool' && r.tool.kind === 'read') { + expect(r.tool.input.path).toBe('src/x.ts') + expect(r.tool.input.offset).toBe(10) + expect(r.tool.input.limit).toBe(50) + } + }) + + test('parses forge:edit', () => { + const r = parseFirstToolBlock( + '```forge:edit\n{ "path": "a.ts", "oldString": "x", "newString": "y" }\n```', + ) + expect(r.kind).toBe('tool') + if (r.kind === 'tool' && r.tool.kind === 'edit') { + expect(r.tool.input.oldString).toBe('x') + expect(r.tool.input.newString).toBe('y') + } + }) + + test('parses forge:grep', () => { + const r = parseFirstToolBlock( + '```forge:grep\n{ "pattern": "TODO", "glob": "**/*.ts", "ignoreCase": true }\n```', + ) + expect(r.kind).toBe('tool') + if (r.kind === 'tool' && r.tool.kind === 'grep') { + expect(r.tool.input.pattern).toBe('TODO') + expect(r.tool.input.ignoreCase).toBe(true) + } + }) + + test('parses forge:glob', () => { + const r = parseFirstToolBlock( + '```forge:glob\n{ "pattern": "src/**/*.ts" }\n```', + ) + expect(r.kind).toBe('tool') + if (r.kind === 'tool' && r.tool.kind === 'glob') { + expect(r.tool.input.pattern).toBe('src/**/*.ts') + } + }) + + test('rejects invalid forge:edit (oldString equals newString)', () => { + const r = parseFirstToolBlock( + '```forge:edit\n{ "path": "a.ts", "oldString": "x", "newString": "x" }\n```', + ) + expect(r.kind).toBe('invalid') + }) }) diff --git a/packages/tools-core/src/index.ts b/packages/tools-core/src/index.ts index ac20361..c38bac8 100644 --- a/packages/tools-core/src/index.ts +++ b/packages/tools-core/src/index.ts @@ -35,6 +35,36 @@ export { export { RuntimeFileWriteInputSchema, executeRuntimeFileWrite, + resolveSandboxedPath, type RuntimeFileWriteInput, type RuntimeFileWriteResult, } from './runtime/file-write.ts' + +export { + RuntimeFileReadInputSchema, + executeRuntimeFileRead, + type RuntimeFileReadInput, + type RuntimeFileReadResult, +} from './runtime/file-read.ts' + +export { + RuntimeFileEditInputSchema, + executeRuntimeFileEdit, + type RuntimeFileEditInput, + type RuntimeFileEditResult, +} from './runtime/file-edit.ts' + +export { + RuntimeGlobInputSchema, + executeRuntimeGlob, + type RuntimeGlobInput, + type RuntimeGlobResult, +} from './runtime/glob.ts' + +export { + RuntimeGrepInputSchema, + executeRuntimeGrep, + type GrepHit, + type RuntimeGrepInput, + type RuntimeGrepResult, +} from './runtime/grep.ts' diff --git a/packages/tools-core/src/runtime/file-edit.ts b/packages/tools-core/src/runtime/file-edit.ts new file mode 100644 index 0000000..99e7cdf --- /dev/null +++ b/packages/tools-core/src/runtime/file-edit.ts @@ -0,0 +1,89 @@ +// FileEdit (runtime) — patch a file under /workspace by replacing one +// exact substring with another. Same shape as Claude Code's Edit tool. +// +// The match must be unique unless `replaceAll: true`. This forces the +// LLM to widen its `oldString` window when it's ambiguous, instead of +// guessing which occurrence it meant. + +import { readFileSync, writeFileSync } from 'node:fs' +import { z } from 'zod' +import { resolveSandboxedPath } from './file-write.ts' + +export const RuntimeFileEditInputSchema = z + .object({ + path: z.string().min(1).describe('File path under /workspace.'), + oldString: z + .string() + .min(1) + .describe( + 'Exact substring to find. Must match exactly once unless replaceAll is true.', + ), + newString: z.string().describe('Replacement substring.'), + replaceAll: z + .boolean() + .optional() + .describe('Replace every occurrence. Default false.'), + }) + .refine((v) => v.oldString !== v.newString, { + message: 'oldString and newString must differ', + path: ['newString'], + }) + +export type RuntimeFileEditInput = z.infer + +export type RuntimeFileEditResult = + | { ok: true; absolutePath: string; replacements: number } + | { ok: false; error: string } + +function countOccurrences(haystack: string, needle: string): number { + if (needle.length === 0) return 0 + let count = 0 + let i = 0 + while (true) { + const at = haystack.indexOf(needle, i) + if (at === -1) return count + count += 1 + i = at + needle.length + } +} + +export function executeRuntimeFileEdit( + input: RuntimeFileEditInput, +): RuntimeFileEditResult { + const safe = resolveSandboxedPath(input.path) + if (!safe.ok) return safe + + let original: string + try { + original = readFileSync(safe.absolutePath, 'utf8') + } catch (err) { + return { ok: false, error: err instanceof Error ? err.message : String(err) } + } + + const occurrences = countOccurrences(original, input.oldString) + if (occurrences === 0) { + return { ok: false, error: 'oldString not found in file' } + } + if (occurrences > 1 && !input.replaceAll) { + return { + ok: false, + error: `oldString matches ${occurrences.toString()} times — widen the context or set replaceAll=true`, + } + } + + const updated = input.replaceAll + ? original.split(input.oldString).join(input.newString) + : original.replace(input.oldString, input.newString) + + try { + writeFileSync(safe.absolutePath, updated, 'utf8') + } catch (err) { + return { ok: false, error: err instanceof Error ? err.message : String(err) } + } + + return { + ok: true, + absolutePath: safe.absolutePath, + replacements: input.replaceAll ? occurrences : 1, + } +} diff --git a/packages/tools-core/src/runtime/file-read.ts b/packages/tools-core/src/runtime/file-read.ts new file mode 100644 index 0000000..7e15ba9 --- /dev/null +++ b/packages/tools-core/src/runtime/file-read.ts @@ -0,0 +1,93 @@ +// FileRead (runtime) — read a file under /workspace. +// +// Offset/limit are line-based (matches what an LLM expects when reading +// source files). Output is clipped at 16 KB to protect the LLM context ; +// any further reading should use offset. + +import { readFileSync, statSync } from 'node:fs' +import { z } from 'zod' +import { resolveSandboxedPath } from './file-write.ts' + +export const RuntimeFileReadInputSchema = z.object({ + path: z + .string() + .min(1) + .describe( + 'Path inside the agent sandbox (/workspace). Relative or absolute under /workspace.', + ), + offset: z + .number() + .int() + .min(0) + .optional() + .describe('Line offset (1-based first line of the slice). Default 0.'), + limit: z + .number() + .int() + .positive() + .max(2000) + .optional() + .describe('Max number of lines to return. Default 200, max 2000.'), +}) + +export type RuntimeFileReadInput = z.infer + +export type RuntimeFileReadResult = + | { + ok: true + absolutePath: string + content: string + totalLines: number + returnedLines: number + truncatedBytes: boolean + } + | { ok: false; error: string } + +const DEFAULT_LIMIT = 200 +const MAX_BYTES = 16_384 + +export function executeRuntimeFileRead( + input: RuntimeFileReadInput, +): RuntimeFileReadResult { + const safe = resolveSandboxedPath(input.path) + if (!safe.ok) return safe + + let raw: string + try { + const st = statSync(safe.absolutePath) + if (!st.isFile()) { + return { ok: false, error: `not a regular file : ${safe.absolutePath}` } + } + raw = readFileSync(safe.absolutePath, 'utf8') + } catch (err) { + return { ok: false, error: err instanceof Error ? err.message : String(err) } + } + + const allLines = raw.split('\n') + // Drop the trailing empty element when the file ends with \n so totalLines + // reflects the human count, not split() artifact. + if (allLines.length > 0 && allLines[allLines.length - 1] === '') { + allLines.pop() + } + const totalLines = allLines.length + + const offset = input.offset ?? 0 + const limit = input.limit ?? DEFAULT_LIMIT + const slice = allLines.slice(offset, offset + limit) + let content = slice.join('\n') + + let truncatedBytes = false + if (Buffer.byteLength(content, 'utf8') > MAX_BYTES) { + truncatedBytes = true + content = `${content.slice(0, MAX_BYTES)}\n…[output truncated at ${MAX_BYTES.toString()} bytes — use offset/limit for the rest]` + } + + return { + ok: true, + absolutePath: safe.absolutePath, + content, + totalLines, + returnedLines: slice.length, + truncatedBytes, + } +} diff --git a/packages/tools-core/src/runtime/glob.ts b/packages/tools-core/src/runtime/glob.ts new file mode 100644 index 0000000..7692037 --- /dev/null +++ b/packages/tools-core/src/runtime/glob.ts @@ -0,0 +1,123 @@ +// Glob (runtime) — find files matching a glob pattern under /workspace. +// +// Hand-rolled to avoid adding a dependency to the in-container bundle. +// Supports the patterns LLMs actually use : `*`, `**`, `?`. No braces, +// no character classes — those rarely appear in agent-emitted patterns +// and would just bloat the parser. +// +// Returns relative paths (from the sandbox root) sorted alphabetically. +// Capped at 200 results. + +import { readdirSync, statSync } from 'node:fs' +import { join, relative, resolve, sep } from 'node:path' +import { z } from 'zod' +import { resolveSandboxedPath } from './file-write.ts' + +export const RuntimeGlobInputSchema = z.object({ + pattern: z + .string() + .min(1) + .describe( + 'Glob pattern relative to /workspace. Supports *, **, and ?. Example : "src/**/*.ts".', + ), +}) + +export type RuntimeGlobInput = z.infer + +export type RuntimeGlobResult = + | { ok: true; matches: string[]; truncated: boolean } + | { ok: false; error: string } + +const MAX_MATCHES = 200 +const MAX_WALK_NODES = 5000 + +// Convert a glob to a RegExp anchored at the start, allowing partial +// path-segment matches. Each segment is converted independently and +// joined with `/`. +function globToRegex(pattern: string): RegExp { + // Normalize : split on / and process per segment. + const parts = pattern.split('/') + const out: string[] = [] + for (const part of parts) { + if (part === '**') { + out.push('(?:.*?)') + continue + } + let segment = '' + for (const ch of part) { + if (ch === '*') segment += '[^/]*' + else if (ch === '?') segment += '[^/]' + else if (/[.+^${}()|[\]\\]/.test(ch)) segment += `\\${ch}` + else segment += ch + } + out.push(segment) + } + // Glue : `/` between regular segments, but `**` already swallows separators. + let glued = '' + for (let i = 0; i < out.length; i += 1) { + const part = out[i] as string + if (i === 0) { + glued = part + continue + } + const prev = out[i - 1] + if (prev === '(?:.*?)' || part === '(?:.*?)') glued += part + else glued += `/${part}` + } + return new RegExp(`^${glued}$`) +} + +// Walk a directory tree and return relative POSIX paths of all FILES. +// Bounded by MAX_WALK_NODES to protect against pathological trees. +function walk(root: string): string[] { + const out: string[] = [] + const stack: string[] = [root] + let visited = 0 + while (stack.length > 0 && visited < MAX_WALK_NODES) { + const dir = stack.pop() as string + let entries: string[] + try { + entries = readdirSync(dir) + } catch { + continue + } + for (const name of entries) { + visited += 1 + if (visited >= MAX_WALK_NODES) break + const full = join(dir, name) + let st: ReturnType + try { + st = statSync(full) + } catch { + continue + } + if (st.isDirectory()) { + stack.push(full) + } else if (st.isFile()) { + const rel = relative(root, full).split(sep).join('/') + out.push(rel) + } + } + } + return out +} + +export function executeRuntimeGlob( + input: RuntimeGlobInput, +): RuntimeGlobResult { + // Resolve sandbox root via a dummy path : ensures we use the same + // FORGE_WORKSPACE override as the other runtime tools. + const safeRoot = resolveSandboxedPath('.') + if (!safeRoot.ok) return safeRoot + const root = resolve(safeRoot.absolutePath) + + const re = globToRegex(input.pattern) + const all = walk(root) + const matched = all.filter((p) => re.test(p)).sort() + const truncated = matched.length > MAX_MATCHES + return { + ok: true, + matches: truncated ? matched.slice(0, MAX_MATCHES) : matched, + truncated, + } +} diff --git a/packages/tools-core/src/runtime/grep.ts b/packages/tools-core/src/runtime/grep.ts new file mode 100644 index 0000000..b7ae6e8 --- /dev/null +++ b/packages/tools-core/src/runtime/grep.ts @@ -0,0 +1,106 @@ +// Grep (runtime) — regex search across files under /workspace. +// +// Pure JS, no ripgrep dependency : the alpine container doesn't ship rg +// by default and we don't want to bloat the image just for this. For a +// POC the trade-off is fine ; if it becomes a bottleneck we'll bind-mount +// rg later. +// +// The pattern is a JavaScript RegExp source. Files are filtered by an +// optional glob to keep the scan bounded. Binary-looking content +// (NUL bytes in the first 4 KB) is skipped. + +import { readFileSync } from 'node:fs' +import { join } from 'node:path' +import { z } from 'zod' +import { resolveSandboxedPath } from './file-write.ts' +import { executeRuntimeGlob } from './glob.ts' + +export const RuntimeGrepInputSchema = z.object({ + pattern: z + .string() + .min(1) + .describe('JavaScript RegExp source. Example : "TODO|FIXME".'), + glob: z + .string() + .optional() + .describe( + 'Optional file pattern relative to /workspace (e.g. "src/**/*.ts"). Defaults to "**/*".', + ), + ignoreCase: z.boolean().optional().describe('Case-insensitive match. Default false.'), +}) + +export type RuntimeGrepInput = z.infer + +export type GrepHit = { path: string; line: number; text: string } + +export type RuntimeGrepResult = + | { ok: true; hits: GrepHit[]; truncated: boolean; scanned: number } + | { ok: false; error: string } + +const MAX_HITS = 200 +const MAX_LINE_LEN = 400 // clip long lines so a minified file doesn't blow context +const MAX_FILE_BYTES = 1_048_576 // skip files > 1 MB + +function looksBinary(buf: Buffer): boolean { + const limit = Math.min(buf.length, 4096) + for (let i = 0; i < limit; i += 1) { + if (buf[i] === 0) return true + } + return false +} + +export function executeRuntimeGrep( + input: RuntimeGrepInput, +): RuntimeGrepResult { + let re: RegExp + try { + re = new RegExp(input.pattern, input.ignoreCase ? 'i' : undefined) + } catch (err) { + return { ok: false, error: `invalid regex : ${err instanceof Error ? err.message : String(err)}` } + } + + const safeRoot = resolveSandboxedPath('.') + if (!safeRoot.ok) return safeRoot + + const filesResult = executeRuntimeGlob({ pattern: input.glob ?? '**/*' }) + if (!filesResult.ok) return filesResult + + const hits: GrepHit[] = [] + let truncated = false + let scanned = 0 + + for (const rel of filesResult.matches) { + if (hits.length >= MAX_HITS) { + truncated = true + break + } + const abs = join(safeRoot.absolutePath, rel) + let buf: Buffer + try { + buf = readFileSync(abs) + } catch { + continue + } + if (buf.length > MAX_FILE_BYTES) continue + if (looksBinary(buf)) continue + scanned += 1 + const text = buf.toString('utf8') + const lines = text.split('\n') + for (let i = 0; i < lines.length; i += 1) { + const line = lines[i] as string + if (re.test(line)) { + hits.push({ + path: rel, + line: i + 1, + text: line.length > MAX_LINE_LEN ? `${line.slice(0, MAX_LINE_LEN)}…` : line, + }) + if (hits.length >= MAX_HITS) { + truncated = true + break + } + } + } + } + + return { ok: true, hits, truncated, scanned } +} diff --git a/packages/tools-core/tests/runtime-file-edit.test.ts b/packages/tools-core/tests/runtime-file-edit.test.ts new file mode 100644 index 0000000..37d0ddb --- /dev/null +++ b/packages/tools-core/tests/runtime-file-edit.test.ts @@ -0,0 +1,86 @@ +import { afterAll, beforeAll, describe, expect, test } from 'bun:test' +import { mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' + +let TMP_WORKSPACE: string +const ORIGINAL_ENV = process.env.FORGE_WORKSPACE + +beforeAll(() => { + TMP_WORKSPACE = mkdtempSync(join(tmpdir(), 'forge-rt-fe-')) + process.env.FORGE_WORKSPACE = TMP_WORKSPACE +}) + +afterAll(() => { + if (ORIGINAL_ENV === undefined) delete process.env.FORGE_WORKSPACE + else process.env.FORGE_WORKSPACE = ORIGINAL_ENV + rmSync(TMP_WORKSPACE, { recursive: true, force: true }) +}) + +const { executeRuntimeFileEdit } = await import('../src/runtime/file-edit.ts') + +describe('executeRuntimeFileEdit', () => { + test('replaces a unique substring', () => { + const path = join(TMP_WORKSPACE, 'a.ts') + writeFileSync(path, 'const x = 1\nconst y = 2\n') + const r = executeRuntimeFileEdit({ + path: 'a.ts', + oldString: 'const x = 1', + newString: 'const x = 42', + }) + expect(r.ok).toBe(true) + if (r.ok) { + expect(r.replacements).toBe(1) + expect(readFileSync(path, 'utf8')).toBe('const x = 42\nconst y = 2\n') + } + }) + + test('refuses ambiguous match without replaceAll', () => { + const path = join(TMP_WORKSPACE, 'b.ts') + writeFileSync(path, 'foo\nfoo\n') + const r = executeRuntimeFileEdit({ + path: 'b.ts', + oldString: 'foo', + newString: 'bar', + }) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.error).toContain('matches 2 times') + }) + + test('replaceAll handles every occurrence', () => { + const path = join(TMP_WORKSPACE, 'c.ts') + writeFileSync(path, 'foo\nfoo\nfoo\n') + const r = executeRuntimeFileEdit({ + path: 'c.ts', + oldString: 'foo', + newString: 'bar', + replaceAll: true, + }) + expect(r.ok).toBe(true) + if (r.ok) { + expect(r.replacements).toBe(3) + expect(readFileSync(path, 'utf8')).toBe('bar\nbar\nbar\n') + } + }) + + test('returns an error when oldString is missing', () => { + const path = join(TMP_WORKSPACE, 'd.ts') + writeFileSync(path, 'hello') + const r = executeRuntimeFileEdit({ + path: 'd.ts', + oldString: 'goodbye', + newString: 'bye', + }) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.error).toContain('not found') + }) + + test('refuses path outside the sandbox', () => { + const r = executeRuntimeFileEdit({ + path: '../escape', + oldString: 'a', + newString: 'b', + }) + expect(r.ok).toBe(false) + }) +}) diff --git a/packages/tools-core/tests/runtime-file-read.test.ts b/packages/tools-core/tests/runtime-file-read.test.ts new file mode 100644 index 0000000..e1c6374 --- /dev/null +++ b/packages/tools-core/tests/runtime-file-read.test.ts @@ -0,0 +1,55 @@ +import { afterAll, beforeAll, describe, expect, test } from 'bun:test' +import { mkdtempSync, rmSync, writeFileSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' + +let TMP_WORKSPACE: string +const ORIGINAL_ENV = process.env.FORGE_WORKSPACE + +beforeAll(() => { + TMP_WORKSPACE = mkdtempSync(join(tmpdir(), 'forge-rt-fr-')) + process.env.FORGE_WORKSPACE = TMP_WORKSPACE +}) + +afterAll(() => { + if (ORIGINAL_ENV === undefined) delete process.env.FORGE_WORKSPACE + else process.env.FORGE_WORKSPACE = ORIGINAL_ENV + rmSync(TMP_WORKSPACE, { recursive: true, force: true }) +}) + +const { executeRuntimeFileRead } = await import('../src/runtime/file-read.ts') + +describe('executeRuntimeFileRead', () => { + test('reads the full file when no offset/limit', () => { + writeFileSync(join(TMP_WORKSPACE, 'a.txt'), 'one\ntwo\nthree\n') + const r = executeRuntimeFileRead({ path: 'a.txt' }) + expect(r.ok).toBe(true) + if (r.ok) { + expect(r.content).toBe('one\ntwo\nthree') + expect(r.totalLines).toBe(3) + expect(r.returnedLines).toBe(3) + } + }) + + test('honors offset and limit', () => { + const lines = Array.from({ length: 10 }, (_, i) => `line${(i + 1).toString()}`).join('\n') + writeFileSync(join(TMP_WORKSPACE, 'b.txt'), lines) + const r = executeRuntimeFileRead({ path: 'b.txt', offset: 3, limit: 4 }) + expect(r.ok).toBe(true) + if (r.ok) { + expect(r.content).toBe('line4\nline5\nline6\nline7') + expect(r.totalLines).toBe(10) + expect(r.returnedLines).toBe(4) + } + }) + + test('rejects path outside the sandbox', () => { + const r = executeRuntimeFileRead({ path: '../escape.txt' }) + expect(r.ok).toBe(false) + }) + + test('returns an error for missing files', () => { + const r = executeRuntimeFileRead({ path: 'nope.txt' }) + expect(r.ok).toBe(false) + }) +}) diff --git a/packages/tools-core/tests/runtime-glob.test.ts b/packages/tools-core/tests/runtime-glob.test.ts new file mode 100644 index 0000000..cdfe6ec --- /dev/null +++ b/packages/tools-core/tests/runtime-glob.test.ts @@ -0,0 +1,53 @@ +import { afterAll, beforeAll, describe, expect, test } from 'bun:test' +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' + +let TMP_WORKSPACE: string +const ORIGINAL_ENV = process.env.FORGE_WORKSPACE + +beforeAll(() => { + TMP_WORKSPACE = mkdtempSync(join(tmpdir(), 'forge-rt-gl-')) + process.env.FORGE_WORKSPACE = TMP_WORKSPACE + mkdirSync(join(TMP_WORKSPACE, 'src/sub'), { recursive: true }) + writeFileSync(join(TMP_WORKSPACE, 'src/index.ts'), '') + writeFileSync(join(TMP_WORKSPACE, 'src/sub/util.ts'), '') + writeFileSync(join(TMP_WORKSPACE, 'src/sub/util.test.ts'), '') + writeFileSync(join(TMP_WORKSPACE, 'README.md'), '') +}) + +afterAll(() => { + if (ORIGINAL_ENV === undefined) delete process.env.FORGE_WORKSPACE + else process.env.FORGE_WORKSPACE = ORIGINAL_ENV + rmSync(TMP_WORKSPACE, { recursive: true, force: true }) +}) + +const { executeRuntimeGlob } = await import('../src/runtime/glob.ts') + +describe('executeRuntimeGlob', () => { + test('matches all .ts files recursively with **/*.ts', () => { + const r = executeRuntimeGlob({ pattern: '**/*.ts' }) + expect(r.ok).toBe(true) + if (r.ok) { + expect(r.matches).toEqual(['src/index.ts', 'src/sub/util.test.ts', 'src/sub/util.ts']) + } + }) + + test('matches a single segment with src/*.ts', () => { + const r = executeRuntimeGlob({ pattern: 'src/*.ts' }) + expect(r.ok).toBe(true) + if (r.ok) expect(r.matches).toEqual(['src/index.ts']) + }) + + test('matches with ? for single char', () => { + const r = executeRuntimeGlob({ pattern: 'README.m?' }) + expect(r.ok).toBe(true) + if (r.ok) expect(r.matches).toEqual(['README.md']) + }) + + test('returns empty when nothing matches', () => { + const r = executeRuntimeGlob({ pattern: '**/*.rs' }) + expect(r.ok).toBe(true) + if (r.ok) expect(r.matches).toEqual([]) + }) +}) diff --git a/packages/tools-core/tests/runtime-grep.test.ts b/packages/tools-core/tests/runtime-grep.test.ts new file mode 100644 index 0000000..4e711ba --- /dev/null +++ b/packages/tools-core/tests/runtime-grep.test.ts @@ -0,0 +1,62 @@ +import { afterAll, beforeAll, describe, expect, test } from 'bun:test' +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' + +let TMP_WORKSPACE: string +const ORIGINAL_ENV = process.env.FORGE_WORKSPACE + +beforeAll(() => { + TMP_WORKSPACE = mkdtempSync(join(tmpdir(), 'forge-rt-gr-')) + process.env.FORGE_WORKSPACE = TMP_WORKSPACE + mkdirSync(join(TMP_WORKSPACE, 'src'), { recursive: true }) + writeFileSync( + join(TMP_WORKSPACE, 'src/index.ts'), + '// TODO: implement\nexport const x = 1\n// fixme later\n', + ) + writeFileSync(join(TMP_WORKSPACE, 'src/util.ts'), 'export const todo = "x"\n') + writeFileSync(join(TMP_WORKSPACE, 'README.md'), '# project\nTODO: write docs\n') +}) + +afterAll(() => { + if (ORIGINAL_ENV === undefined) delete process.env.FORGE_WORKSPACE + else process.env.FORGE_WORKSPACE = ORIGINAL_ENV + rmSync(TMP_WORKSPACE, { recursive: true, force: true }) +}) + +const { executeRuntimeGrep } = await import('../src/runtime/grep.ts') + +describe('executeRuntimeGrep', () => { + test('finds case-sensitive matches across files', () => { + const r = executeRuntimeGrep({ pattern: 'TODO' }) + expect(r.ok).toBe(true) + if (r.ok) { + const paths = r.hits.map((h) => h.path).sort() + expect(paths).toEqual(['README.md', 'src/index.ts']) + } + }) + + test('honors ignoreCase', () => { + const r = executeRuntimeGrep({ pattern: 'todo', ignoreCase: true }) + expect(r.ok).toBe(true) + if (r.ok) { + const paths = r.hits.map((h) => h.path).sort() + // util.ts matches via "const todo", index.ts via TODO, README.md via TODO. + expect(paths).toEqual(['README.md', 'src/index.ts', 'src/util.ts']) + } + }) + + test('respects the glob filter', () => { + const r = executeRuntimeGrep({ pattern: 'TODO', glob: '**/*.md' }) + expect(r.ok).toBe(true) + if (r.ok) { + const paths = r.hits.map((h) => h.path) + expect(paths).toEqual(['README.md']) + } + }) + + test('returns an error for an invalid regex', () => { + const r = executeRuntimeGrep({ pattern: '(' }) + expect(r.ok).toBe(false) + }) +}) From b38f18492a2240c9420303fc7ec06d1dcf7891dc Mon Sep 17 00:00:00 2001 From: Georges Garnier Date: Mon, 27 Apr 2026 14:08:22 +0200 Subject: [PATCH 3/5] fix(cli,core): auto-quote AGENT.md description when it contains a colon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mistral Small (and likely most small models) regularly emits an AGENT.md where the `description` value embeds a colon — typically when listing steps ("Step 1: ..., Step 2: ...") or quoting another key (`maxTurns: 8`, `timeout: 60s`). YAML reads that as a nested mapping and rejects the whole frontmatter. Two fixes : 1. The builder system prompt now spells out the rule in both EN and FR : no colon / no embedded YAML / wrap in double quotes if needed. Comes with an example so the LLM has a template to follow. 2. The CLI normalizer now scans the frontmatter and wraps any `description` value containing an unquoted colon in double quotes, escaping any embedded double quotes in the process. Already-quoted values are left alone. Tests cover both : an unquoted "Step 1: ... Step 2: ..." is fixed up and accepted ; an already-quoted equivalent is left untouched. --- packages/cli/src/builder-actions.ts | 48 ++++++++++++++++++++-- packages/cli/tests/builder-actions.test.ts | 42 +++++++++++++++++++ packages/core/src/builder/system-prompt.ts | 2 + 3 files changed, 88 insertions(+), 4 deletions(-) diff --git a/packages/cli/src/builder-actions.ts b/packages/cli/src/builder-actions.ts index 8858c2a..889ce4c 100644 --- a/packages/cli/src/builder-actions.ts +++ b/packages/cli/src/builder-actions.ts @@ -144,17 +144,57 @@ export type RunActionExecution = { export type ActionExecution = WriteActionExecution | RunActionExecution +function quoteUnsafeDescription(content: string): string { + // Small models commonly write a `description` value containing a colon + // (e.g. "Étape 1 : ..." or "...timeout: 60s..."), which YAML mis-parses + // as a nested mapping and chokes the whole frontmatter. Detect that case + // and wrap the value in double quotes ; the parser then reads it as a + // plain string. + const lines = content.split('\n') + let inFrontmatter = false + let fmFenceCount = 0 + for (let i = 0; i < lines.length; i += 1) { + const line = lines[i] as string + if (line.trim() === '---') { + fmFenceCount += 1 + inFrontmatter = fmFenceCount === 1 + if (fmFenceCount === 2) break + continue + } + if (!inFrontmatter) continue + const m = /^(\s*description\s*:\s*)(.*)$/.exec(line) + if (!m) continue + const prefix = m[1] as string + const value = (m[2] as string).trim() + if (value.length === 0) continue + // Already quoted ? leave it alone. + if ( + (value.startsWith('"') && value.endsWith('"')) || + (value.startsWith("'") && value.endsWith("'")) + ) { + continue + } + if (!value.includes(':')) continue + // Escape any embedded double quotes so the wrap stays valid. + const safe = value.replace(/"/g, '\\"') + lines[i] = `${prefix}"${safe}"` + } + return lines.join('\n') +} + function normalizeAgentMd(content: string): string { // Small models often confuse the protocol separator (`---` between path // and content) with the YAML frontmatter opener and forget to write a // leading `---`. If the content looks like raw frontmatter (starts with a // recognized key), prepend `---` so it parses cleanly. const trimmed = content.replace(/^\s+/, '') - if (trimmed.startsWith('---')) return content - if (/^(name|description|model|sandbox|maxTurns)\s*:/m.test(trimmed)) { - return `---\n${content.replace(/^\s+/, '')}` + let normalized = content + if (!trimmed.startsWith('---')) { + if (/^(name|description|model|sandbox|maxTurns)\s*:/m.test(trimmed)) { + normalized = `---\n${content.replace(/^\s+/, '')}` + } } - return content + return quoteUnsafeDescription(normalized) } const AGENT_PATH_RE = /^(agents\/[a-z][a-z0-9-]*)\/[^/]+$/ diff --git a/packages/cli/tests/builder-actions.test.ts b/packages/cli/tests/builder-actions.test.ts index 25c58d1..18259cf 100644 --- a/packages/cli/tests/builder-actions.test.ts +++ b/packages/cli/tests/builder-actions.test.ts @@ -198,6 +198,48 @@ body` if (exec.kind === 'write') expect(exec.result.ok).toBe(true) }) + test('quotes a description that contains an unquoted colon', () => { + const unsafe = `--- +name: ${TEST_AGENT} +description: Audits the project. Step 1: list files. Step 2: fix TODOs. +sandbox: + image: agent-forge/base:latest + timeout: 60s +maxTurns: 1 +--- + +body` + const exec = executeAction({ + kind: 'write', + path: `agents/${TEST_AGENT}/AGENT.md`, + content: unsafe, + raw: '', + }) + expect(exec.kind).toBe('write') + if (exec.kind === 'write') expect(exec.result.ok).toBe(true) + }) + + test('leaves an already-quoted description untouched', () => { + const safe = `--- +name: ${TEST_AGENT} +description: "Step 1: do this. Step 2: do that." +sandbox: + image: agent-forge/base:latest + timeout: 60s +maxTurns: 1 +--- + +body` + const exec = executeAction({ + kind: 'write', + path: `agents/${TEST_AGENT}/AGENT.md`, + content: safe, + raw: '', + }) + expect(exec.kind).toBe('write') + if (exec.kind === 'write') expect(exec.result.ok).toBe(true) + }) + test('run action passes through pre-flight (actual launch is async)', () => { const exec = executeAction({ kind: 'run', diff --git a/packages/core/src/builder/system-prompt.ts b/packages/core/src/builder/system-prompt.ts index 81ab986..8d3d869 100644 --- a/packages/core/src/builder/system-prompt.ts +++ b/packages/core/src/builder/system-prompt.ts @@ -35,6 +35,7 @@ You are a haiku poet. Answer with exactly three lines, syllables 5-7-5. ABSOLUTE rules — failing any of these IS A BUG : - The path MUST be exactly \`agents//AGENT.md\`. The filename MUST be the literal string \`AGENT.md\`. Never invent variants like \`haiku-writer.md\` or \`HAIKU-WRITER.md\`. - The file content MUST start with a YAML frontmatter block : a line \`---\`, then the YAML keys (name, description, sandbox, maxTurns), then a closing \`---\`, then the body. Look at the example above carefully — there are TWO \`---\` after the \`path:\` line : the first one separates the path from the content, the second one OPENS the frontmatter. +- The \`description\` value MUST be a single line of plain prose, with NO colon (\`:\`), NO YAML-looking syntax (\`key: value\`), NO line break, NO unbalanced quote. If you cannot write it cleanly without a colon, wrap the whole value in double quotes : \`description: "Audits the project. Step 1: list files. Step 2: fix TODOs."\`. Never repeat the values of the other keys (\`maxTurns\`, \`timeout\`) inside \`description\` — they go in the body of the AGENT.md instead. - The block opens with three backticks + \`forge:write\` and CLOSES with three backticks on their own line. - Replace placeholders with real values. Do not keep angle brackets. - Always propose the block first and ask the user to confirm with "yes" / "go" / "ok" before re-emitting it. @@ -83,6 +84,7 @@ Tu es un poète haïku. Réponds par exactement trois lignes, syllabes 5-7-5. Règles ABSOLUES — toute violation EST UN BUG : - Le chemin DOIT être exactement \`agents//AGENT.md\`. Le nom de fichier DOIT être la chaîne littérale \`AGENT.md\`. N'invente jamais de variante comme \`haiku-writer.md\` ou \`HAIKU-WRITER.md\`. - Le contenu du fichier DOIT commencer par un bloc YAML frontmatter : une ligne \`---\`, puis les clés YAML (name, description, sandbox, maxTurns), puis un \`---\` de fermeture, puis le corps. Regarde bien l'exemple ci-dessus — il y a DEUX \`---\` après la ligne \`path:\` : le premier sépare le path du contenu, le second OUVRE le frontmatter. +- La valeur de \`description\` DOIT être une seule ligne de prose simple, SANS deux-points (\`:\`), SANS syntaxe ressemblant à du YAML (\`clé: valeur\`), SANS retour à la ligne, SANS guillemet non fermé. Si tu ne peux pas écrire la valeur proprement sans deux-points, encadre toute la valeur entre guillemets doubles : \`description: "Audite le projet. Étape 1 : lister les fichiers. Étape 2 : corriger les TODO."\`. Ne répète JAMAIS les valeurs des autres clés (\`maxTurns\`, \`timeout\`) dans la \`description\` — elles vont dans le corps de l'AGENT.md. - Le bloc s'ouvre par trois backticks + \`forge:write\` et se FERME par trois backticks sur leur propre ligne. - Remplace les placeholders par des vraies valeurs. Ne laisse pas les chevrons. - Propose toujours le bloc d'abord et demande la confirmation (« oui » / « ok » / « go ») avant de le ré-émettre. From 2bbf412af90336b97a2e1da013bfa5e790e5c7a9 Mon Sep 17 00:00:00 2001 From: Georges Garnier Date: Mon, 27 Apr 2026 14:22:53 +0200 Subject: [PATCH 4/5] feat(cli): Tab to focus a Mission Control card, Enter to open it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cards in Mission Control are now keyboard-navigable : - Tab cycle focus forward (lands on the most recent card the first time) - Shift+Tab cycle focus backward - Enter open the focused card in a full-screen detail view - Esc / q close the detail view The detail view uses the entire terminal, shows the action's full content (the AGENT.md body for write actions ; prompt + streamed output for run actions) with line numbers, and supports scrolling with arrow keys / PgUp / PgDn / g / G. Tab/Enter are only captured when there are actions, no permission dialog is up, the detail view is closed, and the prompt input is empty — so typing in the prompt always wins. The prompt draft is now lifted into useChat so App can read it for that guard. Visual cues : the focused card switches to a brighter "double" border and gains a leading triangle ; the Mission Control header changes its hint line depending on whether anything is focused. --- packages/cli/src/components/App.tsx | 44 ++++- packages/cli/src/components/CardDetail.tsx | 157 ++++++++++++++++++ .../cli/src/components/MissionControl.tsx | 63 +++++-- packages/cli/src/components/Welcome.tsx | 11 +- packages/cli/src/hooks/useCardFocus.ts | 85 ++++++++++ packages/cli/src/hooks/useChat.ts | 11 ++ 6 files changed, 348 insertions(+), 23 deletions(-) create mode 100644 packages/cli/src/components/CardDetail.tsx create mode 100644 packages/cli/src/hooks/useCardFocus.ts diff --git a/packages/cli/src/components/App.tsx b/packages/cli/src/components/App.tsx index 4687356..fbfe321 100644 --- a/packages/cli/src/components/App.tsx +++ b/packages/cli/src/components/App.tsx @@ -9,11 +9,16 @@ // └──────────────┘ ← terminal bottom (FIXED) // // PgUp / PgDn / Ctrl+E scroll the chat transcript inside Welcome. +// Tab / Shift+Tab cycle focus through Mission Control cards (only when +// the prompt input is empty so it doesn't fight TextInput). Enter on a +// focused card opens a full-screen CardDetail view ; Esc closes it. import { Box, useInput, useStdin } from 'ink' import React from 'react' import { useChatContext } from '../hooks/useChatContext.tsx' +import { useCardFocus } from '../hooks/useCardFocus.ts' import { useLanguage } from '../i18n/LanguageContext.tsx' +import { CardDetail } from './CardDetail.tsx' import { MissionControl } from './MissionControl.tsx' import { ProviderLogo } from './ProviderLogo.tsx' import { Splash } from './Splash.tsx' @@ -22,25 +27,54 @@ import { Welcome } from './Welcome.tsx' export function App(): React.JSX.Element { const { lang } = useLanguage() const { isRawModeSupported } = useStdin() - const { scrollUp, scrollDown, scrollToBottom, pending, state } = useChatContext() + const { scrollUp, scrollDown, scrollToBottom, pending, state, promptDraft } = + useChatContext() + const focus = useCardFocus(state.actions) const rows = process.stdout.rows ?? 30 const cols = process.stdout.columns ?? 80 const hasPending = pending !== null const hasActions = state.actions.length > 0 + const promptIsEmpty = promptDraft.length === 0 + + // Tab/Enter is only meaningful when there are actions, the prompt is + // empty (so TextInput doesn't lose its keystrokes), and no permission + // dialog is showing. + const cardKeysActive = + isRawModeSupported && + lang !== null && + !focus.detailOpen && + !hasPending && + hasActions && + promptIsEmpty useInput( - (_, key) => { + (input, key) => { if (key.pageUp) scrollUp() else if (key.pageDown) scrollDown() - else if (key.ctrl && _ === 'e') scrollToBottom() + else if (key.ctrl && input === 'e') scrollToBottom() + else if (cardKeysActive && key.tab && key.shift) focus.cycleBack() + else if (cardKeysActive && key.tab) focus.cycle() + else if (cardKeysActive && key.return) focus.open() }, - { isActive: isRawModeSupported && lang !== null }, + { isActive: isRawModeSupported && lang !== null && !focus.detailOpen }, ) + // Detail view : modal full-screen replacement. + if (focus.detailOpen && focus.focusedId !== null) { + const action = state.actions.find((a) => a.id === focus.focusedId) + if (action) { + return + } + } + return ( - {hasActions ? : } + {hasActions ? ( + + ) : ( + + )} {/* Spacer pushes Welcome to the bottom AND parks the provider logo at the bottom-right of the top zone (just above the Welcome diff --git a/packages/cli/src/components/CardDetail.tsx b/packages/cli/src/components/CardDetail.tsx new file mode 100644 index 0000000..986480f --- /dev/null +++ b/packages/cli/src/components/CardDetail.tsx @@ -0,0 +1,157 @@ +// Full-screen detail view for a single Mission Control action. +// +// Mounted by App when useCardFocus reports detailOpen=true. Replaces +// both Mission Control AND Welcome — the user gets the entire screen +// to read the full content of the action they pressed Enter on. +// +// Scrolls line-by-line with PgUp / PgDn / arrow up/down. Esc closes. + +import { Box, Text, useInput } from 'ink' +import React, { useState } from 'react' +import type { Action, ActionStatus, RunAction, WriteAction } from '../actions/types.ts' +import { C } from '../theme/colors.ts' +import { + type HighlightedLine, + type Segment, + highlightPlain, + highlightYamlText, +} from './syntax.ts' + +const STATUS_LABEL: Record = { + proposed: 'PROPOSED', + approved: 'APPROVED', + running: 'RUNNING', + done: 'DONE', + failed: 'FAILED', + declined: 'DECLINED', +} + +const STATUS_COLOR: Record = { + proposed: C.orange, + approved: C.orangeBright, + running: C.yellow, + done: C.green, + failed: C.red, + declined: C.grey, +} + +function buildLines(action: Action): HighlightedLine[] { + if (action.kind === 'write') { + return highlightYamlText(action.content) + } + // run : prompt then output + const out: HighlightedLine[] = [] + out.push([{ text: '── prompt ──', color: C.grey, dim: true }]) + out.push(...highlightPlain(action.prompt)) + out.push([{ text: '' }]) + out.push([{ text: '── output ──', color: C.grey, dim: true }]) + if (action.output.length > 0) { + out.push(...highlightPlain(action.output)) + } else { + out.push([{ text: '(empty)', color: C.grey, dim: true }]) + } + if (action.status === 'failed' && action.error) { + out.push([{ text: '' }]) + out.push([{ text: `✗ ${action.error}`, color: C.red }]) + } + return out +} + +function headerFor(action: Action): string { + if (action.kind === 'write') return `write ${action.path}` + return `run ${action.agent}` +} + +export function CardDetail({ + action, + onClose, +}: { + action: Action + onClose: () => void +}): React.JSX.Element { + const rows = process.stdout.rows ?? 30 + const cols = process.stdout.columns ?? 80 + const lines = buildLines(action) + + // Reserve : 2 rows for the title bar, 2 rows for the footer hint, 1 + // separator. Body gets the rest. + const bodyHeight = Math.max(5, rows - 5) + const [offset, setOffset] = useState(0) + const maxOffset = Math.max(0, lines.length - bodyHeight) + + useInput((input, key) => { + if (key.escape || input === 'q') { + onClose() + return + } + if (key.pageUp) setOffset((o) => Math.max(0, o - bodyHeight)) + else if (key.pageDown) setOffset((o) => Math.min(maxOffset, o + bodyHeight)) + else if (key.upArrow) setOffset((o) => Math.max(0, o - 1)) + else if (key.downArrow) setOffset((o) => Math.min(maxOffset, o + 1)) + else if (input === 'g') setOffset(0) + else if (input === 'G') setOffset(maxOffset) + }) + + const visible = lines.slice(offset, offset + bodyHeight) + const totalLines = lines.length + const lastShown = Math.min(totalLines, offset + bodyHeight) + + return ( + + {/* Title bar */} + + + {`[${STATUS_LABEL[action.status]}]`} + + + {' detail '} + + {headerFor(action)} + + + {'─'.repeat(cols)} + + + {/* Body */} + + {visible.map((segments: HighlightedLine, i: number) => { + const lineNo = offset + i + 1 + return ( + + + {`${lineNo.toString().padStart(4, ' ')} `} + + {segments.map((seg: Segment, j: number) => ( + + {seg.text} + + ))} + + ) + })} + + + {/* Footer */} + + {'─'.repeat(cols)} + + + + + {`lines ${(offset + 1).toString()}..${lastShown.toString()} of ${totalLines.toString()}`} + + + + + {'[↑↓ / PgUp/PgDn] scroll [g/G] top/bottom [Esc / q] close'} + + + + + ) +} diff --git a/packages/cli/src/components/MissionControl.tsx b/packages/cli/src/components/MissionControl.tsx index 9edc30f..0dbd0d3 100644 --- a/packages/cli/src/components/MissionControl.tsx +++ b/packages/cli/src/components/MissionControl.tsx @@ -84,7 +84,8 @@ function StatusBadge({ status }: { status: ActionStatus }): React.JSX.Element { ) } -function borderColorFor(status: ActionStatus): string { +function borderColorFor(status: ActionStatus, focused: boolean): string { + if (focused) return C.orangeBright if (status === 'done') return C.green if (status === 'failed') return C.red if (status === 'declined') return C.grey @@ -94,16 +95,18 @@ function borderColorFor(status: ActionStatus): string { function CardFrame({ status, + focused, children, }: { status: ActionStatus + focused: boolean children: React.ReactNode }): React.JSX.Element { return ( + {focused ? '▸ ' : ' '} + + ) +} + +function WriteCard({ + action, + focused, +}: { + action: WriteAction + focused: boolean +}): React.JSX.Element { const lines = highlightYamlText(action.content) return ( - + + {' write '} {action.path} @@ -140,12 +158,19 @@ function WriteCard({ action }: { action: WriteAction }): React.JSX.Element { ) } -function RunCard({ action }: { action: RunAction }): React.JSX.Element { +function RunCard({ + action, + focused, +}: { + action: RunAction + focused: boolean +}): React.JSX.Element { const promptLines = highlightPlain(action.prompt) const outputLines = action.output.length > 0 ? highlightPlain(action.output) : [] return ( - + + {' run '} {action.agent} @@ -173,8 +198,10 @@ function RunCard({ action }: { action: RunAction }): React.JSX.Element { export function MissionControl({ actions, + focusedId, }: { actions: Action[] + focusedId: string | null }): React.JSX.Element { const cols = process.stdout.columns ?? 80 return ( @@ -191,14 +218,24 @@ export function MissionControl({ {` ${actions.length.toString()} action${actions.length === 1 ? '' : 's'}`} + {focusedId === null ? ( + + {' [Tab] focus a card · [Enter] open detail'} + + ) : ( + + {' [Enter] open detail · [Tab/Shift+Tab] cycle'} + + )} - {actions.map((a) => - a.kind === 'write' ? ( - + {actions.map((a) => { + const focused = a.id === focusedId + return a.kind === 'write' ? ( + ) : ( - - ), - )} + + ) + })} ) } diff --git a/packages/cli/src/components/Welcome.tsx b/packages/cli/src/components/Welcome.tsx index e1dcee4..3bb709d 100644 --- a/packages/cli/src/components/Welcome.tsx +++ b/packages/cli/src/components/Welcome.tsx @@ -14,7 +14,7 @@ import { Box, Text, useApp, useStdin } from 'ink' import TextInput from 'ink-text-input' -import React, { useState } from 'react' +import React from 'react' import { getCurrentModelName } from '@agent-forge/core/builder' import { isCommand, runCommand } from '../commands.ts' import { useChatContext } from '../hooks/useChatContext.tsx' @@ -39,7 +39,6 @@ export function Welcome(): React.JSX.Element { const { lang, setLang } = useLanguage() const { exit } = useApp() const { isRawModeSupported } = useStdin() - const [input, setInput] = useState('') const { state, send, @@ -51,6 +50,8 @@ export function Welcome(): React.JSX.Element { pending, approvePending, declinePending, + promptDraft, + setPromptDraft, } = useChatContext() const hasMessages = state.messages.length > 0 || state.streaming !== null @@ -59,7 +60,7 @@ export function Welcome(): React.JSX.Element { const handleSubmit = (value: string): void => { const trimmed = value.trim() if (!trimmed || busy) return - setInput('') + setPromptDraft('') if (isCommand(trimmed)) { addSystemMessage(trimmed) @@ -116,8 +117,8 @@ export function Welcome(): React.JSX.Element { {' ❯ '} {isRawModeSupported ? ( diff --git a/packages/cli/src/hooks/useCardFocus.ts b/packages/cli/src/hooks/useCardFocus.ts new file mode 100644 index 0000000..a40bd4a --- /dev/null +++ b/packages/cli/src/hooks/useCardFocus.ts @@ -0,0 +1,85 @@ +// Mission Control card focus + detail view state. +// +// Kept separate from useChat so the chat hook stays focused on +// conversation/action state. Exposes : +// - focusedId : id of the action currently highlighted (or null) +// - detailOpen : whether the full-screen detail panel is mounted +// - cycle / cycleBack / open / close : the actions wired to Tab keys +// +// Behaviour : +// - Tab from "no focus" → focus the LAST action (most recent on top +// of Mission Control reads as bottom of the list, so we land on +// what the user just saw). +// - Tab again → walk forward; wraps around. +// - Shift+Tab → walk backward; wraps around. +// - When the focused action disappears (cleared, etc.), focus resets. + +import { useCallback, useEffect, useState } from 'react' +import type { Action } from '../actions/types.ts' + +export type CardFocusApi = { + focusedId: string | null + detailOpen: boolean + cycle: () => void + cycleBack: () => void + open: () => void + close: () => void + clearFocus: () => void +} + +export function useCardFocus(actions: Action[]): CardFocusApi { + const [focusedId, setFocusedId] = useState(null) + const [detailOpen, setDetailOpen] = useState(false) + + // If the focused action disappears (e.g. /clear), drop focus and the + // detail panel together so we never display a stale card. + useEffect(() => { + if (focusedId === null) return + const stillThere = actions.some((a) => a.id === focusedId) + if (!stillThere) { + setFocusedId(null) + setDetailOpen(false) + } + }, [actions, focusedId]) + + const cycle = useCallback(() => { + if (actions.length === 0) return + setFocusedId((current) => { + if (current === null) { + return actions[actions.length - 1]?.id ?? null + } + const idx = actions.findIndex((a) => a.id === current) + if (idx === -1) return actions[actions.length - 1]?.id ?? null + const next = (idx + 1) % actions.length + return actions[next]?.id ?? null + }) + }, [actions]) + + const cycleBack = useCallback(() => { + if (actions.length === 0) return + setFocusedId((current) => { + if (current === null) { + return actions[0]?.id ?? null + } + const idx = actions.findIndex((a) => a.id === current) + if (idx === -1) return actions[0]?.id ?? null + const prev = (idx - 1 + actions.length) % actions.length + return actions[prev]?.id ?? null + }) + }, [actions]) + + const open = useCallback(() => { + if (focusedId !== null) setDetailOpen(true) + }, [focusedId]) + + const close = useCallback(() => { + setDetailOpen(false) + }, []) + + const clearFocus = useCallback(() => { + setFocusedId(null) + setDetailOpen(false) + }, []) + + return { focusedId, detailOpen, cycle, cycleBack, open, close, clearFocus } +} diff --git a/packages/cli/src/hooks/useChat.ts b/packages/cli/src/hooks/useChat.ts index 199272f..13dce72 100644 --- a/packages/cli/src/hooks/useChat.ts +++ b/packages/cli/src/hooks/useChat.ts @@ -116,6 +116,8 @@ export function useChat(lang: Lang): { pending: Action | null approvePending: () => void declinePending: () => void + promptDraft: string + setPromptDraft: (value: string) => void } { const [state, setState] = useState({ messages: [], @@ -125,6 +127,13 @@ export function useChat(lang: Lang): { }) const [busy, setBusy] = useState(false) const [scrollOffset, setScrollOffset] = useState(0) + // Lifted out of Welcome so App can know when the input is empty (and + // thus capture Tab for Mission Control focus without stealing keys + // from the prompt). + const [promptDraft, setPromptDraftState] = useState('') + const setPromptDraft = useCallback((value: string) => { + setPromptDraftState(value) + }, []) // Buffer des messages cachés mais toujours envoyés au LLM dans `send`. // `/clear` y déplace les messages visibles (vue vide, contexte préservé) ; // `/reset` le purge. Stocké en ref pour ne pas redéclencher de rendu. @@ -366,5 +375,7 @@ export function useChat(lang: Lang): { pending: headPending, approvePending, declinePending, + promptDraft, + setPromptDraft, } } From 3bd47c5ce804834d78f3305f6a35e3baaef21ced Mon Sep 17 00:00:00 2001 From: Georges Garnier Date: Mon, 27 Apr 2026 14:35:11 +0200 Subject: [PATCH 5/5] feat(cli): Esc clears the Mission Control card focus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a card is focused but the detail view isn't open, pressing Esc now drops the focus without opening anything. Guarded so it only fires when the prompt is empty and no permission dialog is up — Esc keeps its meaning everywhere else. Header hint updated accordingly. --- packages/cli/src/components/App.tsx | 11 +++++++++++ packages/cli/src/components/MissionControl.tsx | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/packages/cli/src/components/App.tsx b/packages/cli/src/components/App.tsx index fbfe321..c85a8c9 100644 --- a/packages/cli/src/components/App.tsx +++ b/packages/cli/src/components/App.tsx @@ -55,6 +55,17 @@ export function App(): React.JSX.Element { else if (cardKeysActive && key.tab && key.shift) focus.cycleBack() else if (cardKeysActive && key.tab) focus.cycle() else if (cardKeysActive && key.return) focus.open() + // Esc clears the card focus (only when something is focused and + // the prompt is empty, so we never swallow an Esc the user meant + // for cancelling input). + else if ( + key.escape && + promptIsEmpty && + !hasPending && + focus.focusedId !== null + ) { + focus.clearFocus() + } }, { isActive: isRawModeSupported && lang !== null && !focus.detailOpen }, ) diff --git a/packages/cli/src/components/MissionControl.tsx b/packages/cli/src/components/MissionControl.tsx index 0dbd0d3..999b350 100644 --- a/packages/cli/src/components/MissionControl.tsx +++ b/packages/cli/src/components/MissionControl.tsx @@ -224,7 +224,7 @@ export function MissionControl({ ) : ( - {' [Enter] open detail · [Tab/Shift+Tab] cycle'} + {' [Enter] open detail · [Tab/Shift+Tab] cycle · [Esc] unfocus'} )}