diff --git a/packages/cli/src/builder-actions.ts b/packages/cli/src/builder-actions.ts index 8858c2a..889ce4c 100644 --- a/packages/cli/src/builder-actions.ts +++ b/packages/cli/src/builder-actions.ts @@ -144,17 +144,57 @@ export type RunActionExecution = { export type ActionExecution = WriteActionExecution | RunActionExecution +function quoteUnsafeDescription(content: string): string { + // Small models commonly write a `description` value containing a colon + // (e.g. "Étape 1 : ..." or "...timeout: 60s..."), which YAML mis-parses + // as a nested mapping and chokes the whole frontmatter. Detect that case + // and wrap the value in double quotes ; the parser then reads it as a + // plain string. + const lines = content.split('\n') + let inFrontmatter = false + let fmFenceCount = 0 + for (let i = 0; i < lines.length; i += 1) { + const line = lines[i] as string + if (line.trim() === '---') { + fmFenceCount += 1 + inFrontmatter = fmFenceCount === 1 + if (fmFenceCount === 2) break + continue + } + if (!inFrontmatter) continue + const m = /^(\s*description\s*:\s*)(.*)$/.exec(line) + if (!m) continue + const prefix = m[1] as string + const value = (m[2] as string).trim() + if (value.length === 0) continue + // Already quoted ? leave it alone. + if ( + (value.startsWith('"') && value.endsWith('"')) || + (value.startsWith("'") && value.endsWith("'")) + ) { + continue + } + if (!value.includes(':')) continue + // Escape any embedded double quotes so the wrap stays valid. + const safe = value.replace(/"/g, '\\"') + lines[i] = `${prefix}"${safe}"` + } + return lines.join('\n') +} + function normalizeAgentMd(content: string): string { // Small models often confuse the protocol separator (`---` between path // and content) with the YAML frontmatter opener and forget to write a // leading `---`. If the content looks like raw frontmatter (starts with a // recognized key), prepend `---` so it parses cleanly. const trimmed = content.replace(/^\s+/, '') - if (trimmed.startsWith('---')) return content - if (/^(name|description|model|sandbox|maxTurns)\s*:/m.test(trimmed)) { - return `---\n${content.replace(/^\s+/, '')}` + let normalized = content + if (!trimmed.startsWith('---')) { + if (/^(name|description|model|sandbox|maxTurns)\s*:/m.test(trimmed)) { + normalized = `---\n${content.replace(/^\s+/, '')}` + } } - return content + return quoteUnsafeDescription(normalized) } const AGENT_PATH_RE = /^(agents\/[a-z][a-z0-9-]*)\/[^/]+$/ diff --git a/packages/cli/src/components/App.tsx b/packages/cli/src/components/App.tsx index 4687356..c85a8c9 100644 --- a/packages/cli/src/components/App.tsx +++ b/packages/cli/src/components/App.tsx @@ -9,11 +9,16 @@ // └──────────────┘ ← terminal bottom (FIXED) // // PgUp / PgDn / Ctrl+E scroll the chat transcript inside Welcome. +// Tab / Shift+Tab cycle focus through Mission Control cards (only when +// the prompt input is empty so it doesn't fight TextInput). Enter on a +// focused card opens a full-screen CardDetail view ; Esc closes it. import { Box, useInput, useStdin } from 'ink' import React from 'react' import { useChatContext } from '../hooks/useChatContext.tsx' +import { useCardFocus } from '../hooks/useCardFocus.ts' import { useLanguage } from '../i18n/LanguageContext.tsx' +import { CardDetail } from './CardDetail.tsx' import { MissionControl } from './MissionControl.tsx' import { ProviderLogo } from './ProviderLogo.tsx' import { Splash } from './Splash.tsx' @@ -22,25 +27,65 @@ import { Welcome } from './Welcome.tsx' export function App(): React.JSX.Element { const { lang } = useLanguage() const { isRawModeSupported } = useStdin() - const { scrollUp, scrollDown, scrollToBottom, pending, state } = useChatContext() + const { scrollUp, scrollDown, scrollToBottom, pending, state, promptDraft } = + useChatContext() + const focus = useCardFocus(state.actions) const rows = process.stdout.rows ?? 30 const cols = process.stdout.columns ?? 80 const hasPending = pending !== null const hasActions = state.actions.length > 0 + const promptIsEmpty = promptDraft.length === 0 + + // Tab/Enter is only meaningful when there are actions, the prompt is + // empty (so TextInput doesn't lose its keystrokes), and no permission + // dialog is showing. + const cardKeysActive = + isRawModeSupported && + lang !== null && + !focus.detailOpen && + !hasPending && + hasActions && + promptIsEmpty useInput( - (_, key) => { + (input, key) => { if (key.pageUp) scrollUp() else if (key.pageDown) scrollDown() - else if (key.ctrl && _ === 'e') scrollToBottom() + else if (key.ctrl && input === 'e') scrollToBottom() + else if (cardKeysActive && key.tab && key.shift) focus.cycleBack() + else if (cardKeysActive && key.tab) focus.cycle() + else if (cardKeysActive && key.return) focus.open() + // Esc clears the card focus (only when something is focused and + // the prompt is empty, so we never swallow an Esc the user meant + // for cancelling input). + else if ( + key.escape && + promptIsEmpty && + !hasPending && + focus.focusedId !== null + ) { + focus.clearFocus() + } }, - { isActive: isRawModeSupported && lang !== null }, + { isActive: isRawModeSupported && lang !== null && !focus.detailOpen }, ) + // Detail view : modal full-screen replacement. + if (focus.detailOpen && focus.focusedId !== null) { + const action = state.actions.find((a) => a.id === focus.focusedId) + if (action) { + return + } + } + return ( - {hasActions ? : } + {hasActions ? ( + + ) : ( + + )} {/* Spacer pushes Welcome to the bottom AND parks the provider logo at the bottom-right of the top zone (just above the Welcome diff --git a/packages/cli/src/components/CardDetail.tsx b/packages/cli/src/components/CardDetail.tsx new file mode 100644 index 0000000..986480f --- /dev/null +++ b/packages/cli/src/components/CardDetail.tsx @@ -0,0 +1,157 @@ +// Full-screen detail view for a single Mission Control action. +// +// Mounted by App when useCardFocus reports detailOpen=true. Replaces +// both Mission Control AND Welcome — the user gets the entire screen +// to read the full content of the action they pressed Enter on. +// +// Scrolls line-by-line with PgUp / PgDn / arrow up/down. Esc closes. + +import { Box, Text, useInput } from 'ink' +import React, { useState } from 'react' +import type { Action, ActionStatus, RunAction, WriteAction } from '../actions/types.ts' +import { C } from '../theme/colors.ts' +import { + type HighlightedLine, + type Segment, + highlightPlain, + highlightYamlText, +} from './syntax.ts' + +const STATUS_LABEL: Record = { + proposed: 'PROPOSED', + approved: 'APPROVED', + running: 'RUNNING', + done: 'DONE', + failed: 'FAILED', + declined: 'DECLINED', +} + +const STATUS_COLOR: Record = { + proposed: C.orange, + approved: C.orangeBright, + running: C.yellow, + done: C.green, + failed: C.red, + declined: C.grey, +} + +function buildLines(action: Action): HighlightedLine[] { + if (action.kind === 'write') { + return highlightYamlText(action.content) + } + // run : prompt then output + const out: HighlightedLine[] = [] + out.push([{ text: '── prompt ──', color: C.grey, dim: true }]) + out.push(...highlightPlain(action.prompt)) + out.push([{ text: '' }]) + out.push([{ text: '── output ──', color: C.grey, dim: true }]) + if (action.output.length > 0) { + out.push(...highlightPlain(action.output)) + } else { + out.push([{ text: '(empty)', color: C.grey, dim: true }]) + } + if (action.status === 'failed' && action.error) { + out.push([{ text: '' }]) + out.push([{ text: `✗ ${action.error}`, color: C.red }]) + } + return out +} + +function headerFor(action: Action): string { + if (action.kind === 'write') return `write ${action.path}` + return `run ${action.agent}` +} + +export function CardDetail({ + action, + onClose, +}: { + action: Action + onClose: () => void +}): React.JSX.Element { + const rows = process.stdout.rows ?? 30 + const cols = process.stdout.columns ?? 80 + const lines = buildLines(action) + + // Reserve : 2 rows for the title bar, 2 rows for the footer hint, 1 + // separator. Body gets the rest. + const bodyHeight = Math.max(5, rows - 5) + const [offset, setOffset] = useState(0) + const maxOffset = Math.max(0, lines.length - bodyHeight) + + useInput((input, key) => { + if (key.escape || input === 'q') { + onClose() + return + } + if (key.pageUp) setOffset((o) => Math.max(0, o - bodyHeight)) + else if (key.pageDown) setOffset((o) => Math.min(maxOffset, o + bodyHeight)) + else if (key.upArrow) setOffset((o) => Math.max(0, o - 1)) + else if (key.downArrow) setOffset((o) => Math.min(maxOffset, o + 1)) + else if (input === 'g') setOffset(0) + else if (input === 'G') setOffset(maxOffset) + }) + + const visible = lines.slice(offset, offset + bodyHeight) + const totalLines = lines.length + const lastShown = Math.min(totalLines, offset + bodyHeight) + + return ( + + {/* Title bar */} + + + {`[${STATUS_LABEL[action.status]}]`} + + + {' detail '} + + {headerFor(action)} + + + {'─'.repeat(cols)} + + + {/* Body */} + + {visible.map((segments: HighlightedLine, i: number) => { + const lineNo = offset + i + 1 + return ( + + + {`${lineNo.toString().padStart(4, ' ')} `} + + {segments.map((seg: Segment, j: number) => ( + + {seg.text} + + ))} + + ) + })} + + + {/* Footer */} + + {'─'.repeat(cols)} + + + + + {`lines ${(offset + 1).toString()}..${lastShown.toString()} of ${totalLines.toString()}`} + + + + + {'[↑↓ / PgUp/PgDn] scroll [g/G] top/bottom [Esc / q] close'} + + + + + ) +} diff --git a/packages/cli/src/components/MissionControl.tsx b/packages/cli/src/components/MissionControl.tsx index 9edc30f..999b350 100644 --- a/packages/cli/src/components/MissionControl.tsx +++ b/packages/cli/src/components/MissionControl.tsx @@ -84,7 +84,8 @@ function StatusBadge({ status }: { status: ActionStatus }): React.JSX.Element { ) } -function borderColorFor(status: ActionStatus): string { +function borderColorFor(status: ActionStatus, focused: boolean): string { + if (focused) return C.orangeBright if (status === 'done') return C.green if (status === 'failed') return C.red if (status === 'declined') return C.grey @@ -94,16 +95,18 @@ function borderColorFor(status: ActionStatus): string { function CardFrame({ status, + focused, children, }: { status: ActionStatus + focused: boolean children: React.ReactNode }): React.JSX.Element { return ( + {focused ? '▸ ' : ' '} + + ) +} + +function WriteCard({ + action, + focused, +}: { + action: WriteAction + focused: boolean +}): React.JSX.Element { const lines = highlightYamlText(action.content) return ( - + + {' write '} {action.path} @@ -140,12 +158,19 @@ function WriteCard({ action }: { action: WriteAction }): React.JSX.Element { ) } -function RunCard({ action }: { action: RunAction }): React.JSX.Element { +function RunCard({ + action, + focused, +}: { + action: RunAction + focused: boolean +}): React.JSX.Element { const promptLines = highlightPlain(action.prompt) const outputLines = action.output.length > 0 ? highlightPlain(action.output) : [] return ( - + + {' run '} {action.agent} @@ -173,8 +198,10 @@ function RunCard({ action }: { action: RunAction }): React.JSX.Element { export function MissionControl({ actions, + focusedId, }: { actions: Action[] + focusedId: string | null }): React.JSX.Element { const cols = process.stdout.columns ?? 80 return ( @@ -191,14 +218,24 @@ export function MissionControl({ {` ${actions.length.toString()} action${actions.length === 1 ? '' : 's'}`} + {focusedId === null ? ( + + {' [Tab] focus a card · [Enter] open detail'} + + ) : ( + + {' [Enter] open detail · [Tab/Shift+Tab] cycle · [Esc] unfocus'} + + )} - {actions.map((a) => - a.kind === 'write' ? ( - + {actions.map((a) => { + const focused = a.id === focusedId + return a.kind === 'write' ? ( + ) : ( - - ), - )} + + ) + })} ) } diff --git a/packages/cli/src/components/Welcome.tsx b/packages/cli/src/components/Welcome.tsx index e1dcee4..3bb709d 100644 --- a/packages/cli/src/components/Welcome.tsx +++ b/packages/cli/src/components/Welcome.tsx @@ -14,7 +14,7 @@ import { Box, Text, useApp, useStdin } from 'ink' import TextInput from 'ink-text-input' -import React, { useState } from 'react' +import React from 'react' import { getCurrentModelName } from '@agent-forge/core/builder' import { isCommand, runCommand } from '../commands.ts' import { useChatContext } from '../hooks/useChatContext.tsx' @@ -39,7 +39,6 @@ export function Welcome(): React.JSX.Element { const { lang, setLang } = useLanguage() const { exit } = useApp() const { isRawModeSupported } = useStdin() - const [input, setInput] = useState('') const { state, send, @@ -51,6 +50,8 @@ export function Welcome(): React.JSX.Element { pending, approvePending, declinePending, + promptDraft, + setPromptDraft, } = useChatContext() const hasMessages = state.messages.length > 0 || state.streaming !== null @@ -59,7 +60,7 @@ export function Welcome(): React.JSX.Element { const handleSubmit = (value: string): void => { const trimmed = value.trim() if (!trimmed || busy) return - setInput('') + setPromptDraft('') if (isCommand(trimmed)) { addSystemMessage(trimmed) @@ -116,8 +117,8 @@ export function Welcome(): React.JSX.Element { {' ❯ '} {isRawModeSupported ? ( diff --git a/packages/cli/src/hooks/useCardFocus.ts b/packages/cli/src/hooks/useCardFocus.ts new file mode 100644 index 0000000..a40bd4a --- /dev/null +++ b/packages/cli/src/hooks/useCardFocus.ts @@ -0,0 +1,85 @@ +// Mission Control card focus + detail view state. +// +// Kept separate from useChat so the chat hook stays focused on +// conversation/action state. Exposes : +// - focusedId : id of the action currently highlighted (or null) +// - detailOpen : whether the full-screen detail panel is mounted +// - cycle / cycleBack / open / close : the actions wired to Tab keys +// +// Behaviour : +// - Tab from "no focus" → focus the LAST action (most recent on top +// of Mission Control reads as bottom of the list, so we land on +// what the user just saw). +// - Tab again → walk forward; wraps around. +// - Shift+Tab → walk backward; wraps around. +// - When the focused action disappears (cleared, etc.), focus resets. + +import { useCallback, useEffect, useState } from 'react' +import type { Action } from '../actions/types.ts' + +export type CardFocusApi = { + focusedId: string | null + detailOpen: boolean + cycle: () => void + cycleBack: () => void + open: () => void + close: () => void + clearFocus: () => void +} + +export function useCardFocus(actions: Action[]): CardFocusApi { + const [focusedId, setFocusedId] = useState(null) + const [detailOpen, setDetailOpen] = useState(false) + + // If the focused action disappears (e.g. /clear), drop focus and the + // detail panel together so we never display a stale card. + useEffect(() => { + if (focusedId === null) return + const stillThere = actions.some((a) => a.id === focusedId) + if (!stillThere) { + setFocusedId(null) + setDetailOpen(false) + } + }, [actions, focusedId]) + + const cycle = useCallback(() => { + if (actions.length === 0) return + setFocusedId((current) => { + if (current === null) { + return actions[actions.length - 1]?.id ?? null + } + const idx = actions.findIndex((a) => a.id === current) + if (idx === -1) return actions[actions.length - 1]?.id ?? null + const next = (idx + 1) % actions.length + return actions[next]?.id ?? null + }) + }, [actions]) + + const cycleBack = useCallback(() => { + if (actions.length === 0) return + setFocusedId((current) => { + if (current === null) { + return actions[0]?.id ?? null + } + const idx = actions.findIndex((a) => a.id === current) + if (idx === -1) return actions[0]?.id ?? null + const prev = (idx - 1 + actions.length) % actions.length + return actions[prev]?.id ?? null + }) + }, [actions]) + + const open = useCallback(() => { + if (focusedId !== null) setDetailOpen(true) + }, [focusedId]) + + const close = useCallback(() => { + setDetailOpen(false) + }, []) + + const clearFocus = useCallback(() => { + setFocusedId(null) + setDetailOpen(false) + }, []) + + return { focusedId, detailOpen, cycle, cycleBack, open, close, clearFocus } +} diff --git a/packages/cli/src/hooks/useChat.ts b/packages/cli/src/hooks/useChat.ts index 199272f..13dce72 100644 --- a/packages/cli/src/hooks/useChat.ts +++ b/packages/cli/src/hooks/useChat.ts @@ -116,6 +116,8 @@ export function useChat(lang: Lang): { pending: Action | null approvePending: () => void declinePending: () => void + promptDraft: string + setPromptDraft: (value: string) => void } { const [state, setState] = useState({ messages: [], @@ -125,6 +127,13 @@ export function useChat(lang: Lang): { }) const [busy, setBusy] = useState(false) const [scrollOffset, setScrollOffset] = useState(0) + // Lifted out of Welcome so App can know when the input is empty (and + // thus capture Tab for Mission Control focus without stealing keys + // from the prompt). + const [promptDraft, setPromptDraftState] = useState('') + const setPromptDraft = useCallback((value: string) => { + setPromptDraftState(value) + }, []) // Buffer des messages cachés mais toujours envoyés au LLM dans `send`. // `/clear` y déplace les messages visibles (vue vide, contexte préservé) ; // `/reset` le purge. Stocké en ref pour ne pas redéclencher de rendu. @@ -366,5 +375,7 @@ export function useChat(lang: Lang): { pending: headPending, approvePending, declinePending, + promptDraft, + setPromptDraft, } } diff --git a/packages/cli/tests/builder-actions.test.ts b/packages/cli/tests/builder-actions.test.ts index 25c58d1..18259cf 100644 --- a/packages/cli/tests/builder-actions.test.ts +++ b/packages/cli/tests/builder-actions.test.ts @@ -198,6 +198,48 @@ body` if (exec.kind === 'write') expect(exec.result.ok).toBe(true) }) + test('quotes a description that contains an unquoted colon', () => { + const unsafe = `--- +name: ${TEST_AGENT} +description: Audits the project. Step 1: list files. Step 2: fix TODOs. +sandbox: + image: agent-forge/base:latest + timeout: 60s +maxTurns: 1 +--- + +body` + const exec = executeAction({ + kind: 'write', + path: `agents/${TEST_AGENT}/AGENT.md`, + content: unsafe, + raw: '', + }) + expect(exec.kind).toBe('write') + if (exec.kind === 'write') expect(exec.result.ok).toBe(true) + }) + + test('leaves an already-quoted description untouched', () => { + const safe = `--- +name: ${TEST_AGENT} +description: "Step 1: do this. Step 2: do that." +sandbox: + image: agent-forge/base:latest + timeout: 60s +maxTurns: 1 +--- + +body` + const exec = executeAction({ + kind: 'write', + path: `agents/${TEST_AGENT}/AGENT.md`, + content: safe, + raw: '', + }) + expect(exec.kind).toBe('write') + if (exec.kind === 'write') expect(exec.result.ok).toBe(true) + }) + test('run action passes through pre-flight (actual launch is async)', () => { const exec = executeAction({ kind: 'run', diff --git a/packages/core/src/builder/system-prompt.ts b/packages/core/src/builder/system-prompt.ts index 81ab986..8d3d869 100644 --- a/packages/core/src/builder/system-prompt.ts +++ b/packages/core/src/builder/system-prompt.ts @@ -35,6 +35,7 @@ You are a haiku poet. Answer with exactly three lines, syllables 5-7-5. ABSOLUTE rules — failing any of these IS A BUG : - The path MUST be exactly \`agents//AGENT.md\`. The filename MUST be the literal string \`AGENT.md\`. Never invent variants like \`haiku-writer.md\` or \`HAIKU-WRITER.md\`. - The file content MUST start with a YAML frontmatter block : a line \`---\`, then the YAML keys (name, description, sandbox, maxTurns), then a closing \`---\`, then the body. Look at the example above carefully — there are TWO \`---\` after the \`path:\` line : the first one separates the path from the content, the second one OPENS the frontmatter. +- The \`description\` value MUST be a single line of plain prose, with NO colon (\`:\`), NO YAML-looking syntax (\`key: value\`), NO line break, NO unbalanced quote. If you cannot write it cleanly without a colon, wrap the whole value in double quotes : \`description: "Audits the project. Step 1: list files. Step 2: fix TODOs."\`. Never repeat the values of the other keys (\`maxTurns\`, \`timeout\`) inside \`description\` — they go in the body of the AGENT.md instead. - The block opens with three backticks + \`forge:write\` and CLOSES with three backticks on their own line. - Replace placeholders with real values. Do not keep angle brackets. - Always propose the block first and ask the user to confirm with "yes" / "go" / "ok" before re-emitting it. @@ -83,6 +84,7 @@ Tu es un poète haïku. Réponds par exactement trois lignes, syllabes 5-7-5. Règles ABSOLUES — toute violation EST UN BUG : - Le chemin DOIT être exactement \`agents//AGENT.md\`. Le nom de fichier DOIT être la chaîne littérale \`AGENT.md\`. N'invente jamais de variante comme \`haiku-writer.md\` ou \`HAIKU-WRITER.md\`. - Le contenu du fichier DOIT commencer par un bloc YAML frontmatter : une ligne \`---\`, puis les clés YAML (name, description, sandbox, maxTurns), puis un \`---\` de fermeture, puis le corps. Regarde bien l'exemple ci-dessus — il y a DEUX \`---\` après la ligne \`path:\` : le premier sépare le path du contenu, le second OUVRE le frontmatter. +- La valeur de \`description\` DOIT être une seule ligne de prose simple, SANS deux-points (\`:\`), SANS syntaxe ressemblant à du YAML (\`clé: valeur\`), SANS retour à la ligne, SANS guillemet non fermé. Si tu ne peux pas écrire la valeur proprement sans deux-points, encadre toute la valeur entre guillemets doubles : \`description: "Audite le projet. Étape 1 : lister les fichiers. Étape 2 : corriger les TODO."\`. Ne répète JAMAIS les valeurs des autres clés (\`maxTurns\`, \`timeout\`) dans la \`description\` — elles vont dans le corps de l'AGENT.md. - Le bloc s'ouvre par trois backticks + \`forge:write\` et se FERME par trois backticks sur leur propre ligne. - Remplace les placeholders par des vraies valeurs. Ne laisse pas les chevrons. - Propose toujours le bloc d'abord et demande la confirmation (« oui » / « ok » / « go ») avant de le ré-émettre. diff --git a/packages/runtime/src/index.ts b/packages/runtime/src/index.ts index 19d2732..a473609 100644 --- a/packages/runtime/src/index.ts +++ b/packages/runtime/src/index.ts @@ -4,19 +4,41 @@ // // 1. Standalone (P1) : reads a prompt from stdin, calls an OpenAI- // compatible LLM endpoint, streams the answer to stdout. No agent -// configuration required. +// configuration required, no tool loop. // -// 2. Agent mode (P3.4) : if an AGENT.md is mounted at /agent/AGENT.md, -// its frontmatter overrides the model and its body becomes the -// system prompt. The prompt from stdin is the user message. +// 2. Agent mode (P3+) : reads /agent/AGENT.md (frontmatter overrides +// the model, body becomes the system prompt). The user prompt comes +// from stdin. Native tools are available via fenced forge:* blocks +// (P4) — the runtime parses them, executes the tool, feeds the +// result back into the conversation, and loops up to maxTurns. // -// The output is STREAMED token by token to stdout so the host can render -// progress live in the TUI. +// Output is STREAMED token by token to stdout so the host can render +// progress live in the TUI. Tool results are also written to stdout +// inside [forge:tool] markers so the host can show them in Mission +// Control without re-running the parser. import { readFileSync } from 'node:fs' import { createOpenAI } from '@ai-sdk/openai' import { parseAgentMd } from '@agent-forge/core/types' -import { streamText } from 'ai' +import { + executeBash, + executeRuntimeFileEdit, + executeRuntimeFileRead, + executeRuntimeFileWrite, + executeRuntimeGlob, + executeRuntimeGrep, +} from '@agent-forge/tools-core' +import { type CoreMessage, streamText } from 'ai' +import { + parseFirstToolBlock, + renderBashResult, + renderEditResult, + renderGlobResult, + renderGrepResult, + renderInvalid, + renderReadResult, + renderWriteResult, +} from './tool-protocol.ts' const AGENT_MD_PATH = '/agent/AGENT.md' @@ -25,16 +47,18 @@ const API_KEY = process.env.FORGE_API_KEY ?? 'not-needed' const ENV_MODEL = process.env.FORGE_MODEL ?? 'mlx-community/Qwen2.5-7B-Instruct-4bit' const MAX_TOKENS = Number(process.env.FORGE_MAX_TOKENS ?? '1024') +// Hard cap to prevent runaway loops even if AGENT.md says otherwise. +const MAX_TURNS_HARD_CAP = 10 type AgentConfig = { model: string systemPrompt?: string agentName?: string + maxTurns: number } function loadAgentConfig(): AgentConfig { - // Default config when no AGENT.md is mounted (standalone P1 mode). - let config: AgentConfig = { model: ENV_MODEL } + let config: AgentConfig = { model: ENV_MODEL, maxTurns: 1 } try { const raw = readFileSync(AGENT_MD_PATH, 'utf8') const parsed = parseAgentMd(raw) @@ -42,11 +66,9 @@ function loadAgentConfig(): AgentConfig { model: parsed.meta.model ?? ENV_MODEL, systemPrompt: parsed.body.length > 0 ? parsed.body : undefined, agentName: parsed.meta.name, + maxTurns: Math.min(parsed.meta.maxTurns ?? 1, MAX_TURNS_HARD_CAP), } } catch (err) { - // ENOENT means standalone mode, that is fine. Anything else is fatal : - // a malformed AGENT.md would otherwise silently fall back to the - // default model + no system prompt, which is misleading. const code = (err as NodeJS.ErrnoException).code if (code !== 'ENOENT') { console.error( @@ -68,28 +90,163 @@ async function readStdin(): Promise { return Buffer.concat(chunks).toString('utf8').trim() } +const TOOL_PROMPT = ` + +You have access to six native tools, each callable by emitting a fenced block in your reply. + +## forge:bash — execute a shell command + +\`\`\`forge:bash +{ "command": "ls -la", "timeoutMs": 10000 } +\`\`\` + +Runs via \`bash -lc\` inside /workspace. \`timeoutMs\` defaults to 30000, capped at 120000. + +## forge:write — create or overwrite a file + +\`\`\`forge:write +{ "path": "src/index.ts", "content": "export const x = 1\\n" } +\`\`\` + +\`path\` is relative to /workspace (or absolute under /workspace). Existing files are overwritten. + +## forge:read — read a file + +\`\`\`forge:read +{ "path": "src/index.ts", "offset": 0, "limit": 200 } +\`\`\` + +\`offset\` and \`limit\` are line-based, both optional. Default limit 200, max 2000. Output is clipped at 16 KB ; use offset/limit to walk a long file. + +## forge:edit — patch a file by exact substring replacement + +\`\`\`forge:edit +{ "path": "src/index.ts", "oldString": "const x = 1", "newString": "const x = 2" } +\`\`\` + +\`oldString\` must match exactly once unless you set \`replaceAll\` true. If it matches multiple times, widen the surrounding context until it's unique. + +## forge:grep — regex search across files + +\`\`\`forge:grep +{ "pattern": "TODO|FIXME", "glob": "src/**/*.ts", "ignoreCase": false } +\`\`\` + +\`pattern\` is a JavaScript RegExp source. \`glob\` is optional (defaults to \`**/*\`). Returns up to 200 hits with path:line:text. + +## forge:glob — list files by pattern + +\`\`\`forge:glob +{ "pattern": "src/**/*.ts" } +\`\`\` + +Supports \`*\`, \`**\`, and \`?\`. Returns up to 200 paths relative to /workspace. + +## Iteration + +- Emit at most ONE block per reply. Text before the block is shown to the user. Text after the block is discarded. +- After you receive a tool result, decide whether you need another tool call or whether you can produce the final answer. +- When you are done, reply with plain text (no fenced block). +` + +function buildSystem(config: AgentConfig, hasTools: boolean): string | undefined { + const base = config.systemPrompt ?? '' + if (!hasTools) return base.length > 0 ? base : undefined + return base.length > 0 ? `${base}${TOOL_PROMPT}` : TOOL_PROMPT.trim() +} + +async function streamOneTurn( + provider: ReturnType, + model: string, + system: string | undefined, + messages: CoreMessage[], +): Promise { + const result = streamText({ + model: provider(model), + system, + messages, + maxTokens: MAX_TOKENS, + }) + let acc = '' + for await (const chunk of result.textStream) { + process.stdout.write(chunk) + acc += chunk + } + return acc +} + +async function executeToolBlock( + parsed: Extract, { kind: 'tool' }>, +): Promise { + const tool = parsed.tool + switch (tool.kind) { + case 'bash': { + const result = await executeBash(tool.input) + return renderBashResult(tool.input, result) + } + case 'write': { + const result = executeRuntimeFileWrite(tool.input) + return renderWriteResult(tool.input, result) + } + case 'read': { + const result = executeRuntimeFileRead(tool.input) + return renderReadResult(tool.input, result) + } + case 'edit': { + const result = executeRuntimeFileEdit(tool.input) + return renderEditResult(tool.input, result) + } + case 'grep': { + const result = executeRuntimeGrep(tool.input) + return renderGrepResult(tool.input, result) + } + case 'glob': { + const result = executeRuntimeGlob(tool.input) + return renderGlobResult(tool.input, result) + } + } +} + async function main(): Promise { const config = loadAgentConfig() - const prompt = await readStdin() - if (!prompt) { + const userPrompt = await readStdin() + if (!userPrompt) { console.error('✗ no prompt received on stdin') process.exit(1) } const provider = createOpenAI({ baseURL: BASE_URL, apiKey: API_KEY }) + const hasTools = config.maxTurns > 1 + const system = buildSystem(config, hasTools) - const result = streamText({ - model: provider(config.model), - system: config.systemPrompt, - prompt, - maxTokens: MAX_TOKENS, - }) + const messages: CoreMessage[] = [{ role: 'user', content: userPrompt }] - for await (const chunk of result.textStream) { - process.stdout.write(chunk) + for (let turn = 0; turn < config.maxTurns; turn += 1) { + const reply = await streamOneTurn(provider, config.model, system, messages) + process.stdout.write('\n') + + if (!hasTools) break + + const parsed = parseFirstToolBlock(reply) + if (parsed.kind === 'none') break + + // Record what the LLM just said (text + raw block) so the next turn + // sees it as a real assistant message. + messages.push({ role: 'assistant', content: reply }) + + let toolReply: string + if (parsed.kind === 'invalid') { + toolReply = renderInvalid(parsed.error) + } else { + toolReply = await executeToolBlock(parsed) + } + + // Mark tool output for the host TUI so it can render it inside the + // Mission Control card instead of mixing it with prose. + process.stdout.write(`\n[forge:tool]\n${toolReply}\n[/forge:tool]\n`) + + messages.push({ role: 'user', content: toolReply }) } - // Trailing newline so the host can detect the end of the stream cleanly. - process.stdout.write('\n') } main().catch((err) => { diff --git a/packages/runtime/src/tool-protocol.ts b/packages/runtime/src/tool-protocol.ts new file mode 100644 index 0000000..b9ea1a0 --- /dev/null +++ b/packages/runtime/src/tool-protocol.ts @@ -0,0 +1,221 @@ +// Agent-side tool protocol — fenced blocks the agent emits to invoke a +// native tool, and the rendering of tool results back to the LLM. +// +// We deliberately mirror the builder's text-structured protocol (forge:write +// and forge:run) instead of using OpenAI tool_calls for two reasons : +// 1. Local LLMs (MLX, llama.cpp) often don't honor tool_calls. +// 2. A consistent protocol across builder and agents simplifies debugging +// and lets users read the raw stream. +// +// Six tools wired today : bash, write, read, edit, grep, glob. +// +// ```forge:bash +// { "command": "ls -la" } +// ``` +// +// ```forge:write +// { "path": "src/index.ts", "content": "..." } +// ``` +// +// ```forge:read +// { "path": "src/index.ts", "offset": 0, "limit": 200 } +// ``` +// +// ```forge:edit +// { "path": "src/index.ts", "oldString": "...", "newString": "..." } +// ``` +// +// ```forge:grep +// { "pattern": "TODO", "glob": "**/*.ts", "ignoreCase": true } +// ``` +// +// ```forge:glob +// { "pattern": "src/**/*.ts" } +// ``` +// +// Only ONE block is parsed per turn (the first encountered). Everything +// before the block is treated as the agent's "thinking out loud" text +// and streamed to the host. Everything after the block is dropped — the +// agent will see the tool result on the next turn and continue from there. + +import { z } from 'zod' +import { + BashInputSchema, + RuntimeFileEditInputSchema, + RuntimeFileReadInputSchema, + RuntimeFileWriteInputSchema, + RuntimeGlobInputSchema, + RuntimeGrepInputSchema, + type BashInput, + type BashResult, + type GrepHit, + type RuntimeFileEditInput, + type RuntimeFileEditResult, + type RuntimeFileReadInput, + type RuntimeFileReadResult, + type RuntimeFileWriteInput, + type RuntimeFileWriteResult, + type RuntimeGlobInput, + type RuntimeGlobResult, + type RuntimeGrepInput, + type RuntimeGrepResult, +} from '@agent-forge/tools-core' + +export type ToolKind = 'bash' | 'write' | 'read' | 'edit' | 'grep' | 'glob' + +export type ParsedTool = + | { kind: 'bash'; input: BashInput; raw: string } + | { kind: 'write'; input: RuntimeFileWriteInput; raw: string } + | { kind: 'read'; input: RuntimeFileReadInput; raw: string } + | { kind: 'edit'; input: RuntimeFileEditInput; raw: string } + | { kind: 'grep'; input: RuntimeGrepInput; raw: string } + | { kind: 'glob'; input: RuntimeGlobInput; raw: string } + +export type ParseOutcome = + | { kind: 'none'; text: string } + | { kind: 'invalid'; text: string; error: string; raw: string } + | { kind: 'tool'; text: string; tool: ParsedTool } + +const SCHEMAS: Record = { + bash: BashInputSchema, + write: RuntimeFileWriteInputSchema, + read: RuntimeFileReadInputSchema, + edit: RuntimeFileEditInputSchema, + grep: RuntimeGrepInputSchema, + glob: RuntimeGlobInputSchema, +} + +const FENCE_RE = /```forge:(bash|write|read|edit|grep|glob)\s*\n([\s\S]*?)```/ + +export function parseFirstToolBlock(stream: string): ParseOutcome { + const m = FENCE_RE.exec(stream) + if (!m) return { kind: 'none', text: stream } + + const tag = m[1] as ToolKind + const body = m[2] ?? '' + const before = stream.slice(0, m.index) + + let parsed: unknown + try { + parsed = JSON.parse(body) + } catch (err) { + return { + kind: 'invalid', + text: before, + error: `forge:${tag} block is not valid JSON : ${ + err instanceof Error ? err.message : String(err) + }`, + raw: m[0], + } + } + + const schema = SCHEMAS[tag] + const result = schema.safeParse(parsed) + if (!result.success) { + return { + kind: 'invalid', + text: before, + error: `forge:${tag} input failed validation : ${formatZodError(result.error)}`, + raw: m[0], + } + } + + // Narrow to the right ParsedTool variant by tag — the schema guarantees + // the data shape matches. + return { + kind: 'tool', + text: before, + tool: { kind: tag, input: result.data, raw: m[0] } as ParsedTool, + } +} + +function formatZodError(err: z.ZodError): string { + return err.errors + .map((e) => `${e.path.join('.') || '(root)'}: ${e.message}`) + .join(' ; ') +} + +// ── Result renderers : turn each tool's structured result into the +// message we feed back to the LLM on the next turn. Same `[forge:X result]` +// header so the agent recognizes it as the answer to its previous call. + +export function renderBashResult(input: BashInput, result: BashResult): string { + const head = `[forge:bash result] command="${input.command}" exit=${result.exitCode.toString()}${ + result.timedOut ? ' (timed out)' : '' + }` + const stdout = result.stdout.length > 0 ? `\n--- stdout ---\n${result.stdout}` : '' + const stderr = result.stderr.length > 0 ? `\n--- stderr ---\n${result.stderr}` : '' + return `${head}${stdout}${stderr}` +} + +export function renderWriteResult( + input: RuntimeFileWriteInput, + result: RuntimeFileWriteResult, +): string { + if (result.ok) { + return `[forge:write result] wrote ${result.absolutePath} (${result.bytes.toString()} bytes)` + } + return `[forge:write result] FAILED on path="${input.path}" : ${result.error}` +} + +export function renderReadResult( + input: RuntimeFileReadInput, + result: RuntimeFileReadResult, +): string { + if (!result.ok) { + return `[forge:read result] FAILED on path="${input.path}" : ${result.error}` + } + const head = `[forge:read result] ${result.absolutePath} · lines ${(input.offset ?? 0).toString()}..${( + (input.offset ?? 0) + result.returnedLines + ).toString()} of ${result.totalLines.toString()}${result.truncatedBytes ? ' (clipped)' : ''}` + return `${head}\n--- content ---\n${result.content}` +} + +export function renderEditResult( + input: RuntimeFileEditInput, + result: RuntimeFileEditResult, +): string { + if (result.ok) { + return `[forge:edit result] ${result.absolutePath} · ${result.replacements.toString()} replacement${ + result.replacements === 1 ? '' : 's' + }` + } + return `[forge:edit result] FAILED on path="${input.path}" : ${result.error}` +} + +export function renderGlobResult( + input: RuntimeGlobInput, + result: RuntimeGlobResult, +): string { + if (!result.ok) { + return `[forge:glob result] FAILED on pattern="${input.pattern}" : ${result.error}` + } + const head = `[forge:glob result] ${result.matches.length.toString()} match${ + result.matches.length === 1 ? '' : 'es' + }${result.truncated ? ' (truncated)' : ''}` + if (result.matches.length === 0) return head + return `${head}\n${result.matches.join('\n')}` +} + +export function renderGrepResult( + input: RuntimeGrepInput, + result: RuntimeGrepResult, +): string { + if (!result.ok) { + return `[forge:grep result] FAILED on pattern="${input.pattern}" : ${result.error}` + } + const head = `[forge:grep result] ${result.hits.length.toString()} hit${ + result.hits.length === 1 ? '' : 's' + } across ${result.scanned.toString()} file${result.scanned === 1 ? '' : 's'}${ + result.truncated ? ' (truncated)' : '' + }` + if (result.hits.length === 0) return head + const body = result.hits + .map((h: GrepHit) => `${h.path}:${h.line.toString()}: ${h.text}`) + .join('\n') + return `${head}\n${body}` +} + +export function renderInvalid(error: string): string { + return `[forge:tool error] ${error}\n\nFix the JSON or schema and try again.` +} diff --git a/packages/runtime/tests/tool-protocol.test.ts b/packages/runtime/tests/tool-protocol.test.ts new file mode 100644 index 0000000..b05cae1 --- /dev/null +++ b/packages/runtime/tests/tool-protocol.test.ts @@ -0,0 +1,125 @@ +// Tests for the agent-side tool block parser. Pure : no FS, no spawn. + +import { describe, expect, test } from 'bun:test' +import { parseFirstToolBlock } from '../src/tool-protocol.ts' + +describe('parseFirstToolBlock', () => { + test('returns kind=none on plain text', () => { + const r = parseFirstToolBlock('just a sentence with no block') + expect(r.kind).toBe('none') + }) + + test('parses a forge:bash block with prose before it', () => { + const stream = [ + 'I will list the workspace contents.', + '', + '```forge:bash', + '{ "command": "ls -la" }', + '```', + '', + 'After the block — should be ignored.', + ].join('\n') + const r = parseFirstToolBlock(stream) + expect(r.kind).toBe('tool') + if (r.kind === 'tool') { + expect(r.text.startsWith('I will list')).toBe(true) + expect(r.tool.kind).toBe('bash') + if (r.tool.kind === 'bash') expect(r.tool.input.command).toBe('ls -la') + } + }) + + test('parses a forge:write block', () => { + const stream = [ + '```forge:write', + '{ "path": "notes.md", "content": "# hi\\n" }', + '```', + ].join('\n') + const r = parseFirstToolBlock(stream) + expect(r.kind).toBe('tool') + if (r.kind === 'tool' && r.tool.kind === 'write') { + expect(r.tool.input.path).toBe('notes.md') + expect(r.tool.input.content).toBe('# hi\n') + } + }) + + test('returns kind=invalid when JSON is malformed', () => { + const stream = '```forge:bash\n{ not json }\n```' + const r = parseFirstToolBlock(stream) + expect(r.kind).toBe('invalid') + if (r.kind === 'invalid') expect(r.error).toContain('not valid JSON') + }) + + test('returns kind=invalid when schema is wrong', () => { + const stream = '```forge:bash\n{ "command": "" }\n```' + const r = parseFirstToolBlock(stream) + expect(r.kind).toBe('invalid') + if (r.kind === 'invalid') expect(r.error).toContain('failed validation') + }) + + test('only the first block matters', () => { + const stream = [ + '```forge:bash', + '{ "command": "echo a" }', + '```', + '```forge:bash', + '{ "command": "echo b" }', + '```', + ].join('\n') + const r = parseFirstToolBlock(stream) + expect(r.kind).toBe('tool') + if (r.kind === 'tool' && r.tool.kind === 'bash') { + expect(r.tool.input.command).toBe('echo a') + } + }) + + test('parses forge:read', () => { + const r = parseFirstToolBlock( + '```forge:read\n{ "path": "src/x.ts", "offset": 10, "limit": 50 }\n```', + ) + expect(r.kind).toBe('tool') + if (r.kind === 'tool' && r.tool.kind === 'read') { + expect(r.tool.input.path).toBe('src/x.ts') + expect(r.tool.input.offset).toBe(10) + expect(r.tool.input.limit).toBe(50) + } + }) + + test('parses forge:edit', () => { + const r = parseFirstToolBlock( + '```forge:edit\n{ "path": "a.ts", "oldString": "x", "newString": "y" }\n```', + ) + expect(r.kind).toBe('tool') + if (r.kind === 'tool' && r.tool.kind === 'edit') { + expect(r.tool.input.oldString).toBe('x') + expect(r.tool.input.newString).toBe('y') + } + }) + + test('parses forge:grep', () => { + const r = parseFirstToolBlock( + '```forge:grep\n{ "pattern": "TODO", "glob": "**/*.ts", "ignoreCase": true }\n```', + ) + expect(r.kind).toBe('tool') + if (r.kind === 'tool' && r.tool.kind === 'grep') { + expect(r.tool.input.pattern).toBe('TODO') + expect(r.tool.input.ignoreCase).toBe(true) + } + }) + + test('parses forge:glob', () => { + const r = parseFirstToolBlock( + '```forge:glob\n{ "pattern": "src/**/*.ts" }\n```', + ) + expect(r.kind).toBe('tool') + if (r.kind === 'tool' && r.tool.kind === 'glob') { + expect(r.tool.input.pattern).toBe('src/**/*.ts') + } + }) + + test('rejects invalid forge:edit (oldString equals newString)', () => { + const r = parseFirstToolBlock( + '```forge:edit\n{ "path": "a.ts", "oldString": "x", "newString": "x" }\n```', + ) + expect(r.kind).toBe('invalid') + }) +}) diff --git a/packages/tools-core/src/docker-launch.ts b/packages/tools-core/src/docker-launch.ts index c7f5594..5544e00 100644 --- a/packages/tools-core/src/docker-launch.ts +++ b/packages/tools-core/src/docker-launch.ts @@ -9,7 +9,7 @@ // agents can run in parallel without collision. import { spawn, spawnSync } from 'node:child_process' -import { existsSync } from 'node:fs' +import { existsSync, mkdirSync } from 'node:fs' import { join } from 'node:path' import { z } from 'zod' import { FORGE_HOME } from './file-write.ts' @@ -75,6 +75,16 @@ export function launchAgent(input: DockerLaunchInput): LaunchHandle { spawnSync('docker', ['rm', '-f', containerName], { stdio: 'ignore' }) } + // Per-run workspace on the host, bind-mounted RW into the container so + // tools (forge:bash, forge:write) have a sandbox they can scribble in. + // Kept after the container exits — useful for debugging and for P5 + // artifact extraction. + const workspaceHostDir = join( + FORGE_HOME, + 'workspaces', + containerName, + ) + async function* run(): AsyncGenerator { if (!existsSync(agentMdPath)) { yield { type: 'error', error: `AGENT.md not found : ${agentMdPath}` } @@ -90,6 +100,8 @@ export function launchAgent(input: DockerLaunchInput): LaunchHandle { return } + mkdirSync(workspaceHostDir, { recursive: true }) + const args = [ 'run', '--rm', @@ -100,6 +112,10 @@ export function launchAgent(input: DockerLaunchInput): LaunchHandle { `${agentMdPath}:/agent/AGENT.md:ro`, '-v', `${RUNTIME_DIST_FROM_TOOLS}:/runtime:ro`, + '-v', + `${workspaceHostDir}:/workspace`, + '-w', + '/workspace', ...inheritEnv(), IMAGE, 'node', diff --git a/packages/tools-core/src/index.ts b/packages/tools-core/src/index.ts index 0fb06f6..c38bac8 100644 --- a/packages/tools-core/src/index.ts +++ b/packages/tools-core/src/index.ts @@ -21,3 +21,50 @@ export { type DockerLaunchInput, type LaunchHandle, } from './docker-launch.ts' + +// Runtime-side tools — used INSIDE the agent's container, sandboxed to +// /workspace. Distinct from the host-side FileWrite above. +export { + BashInputSchema, + WORKSPACE_DIR, + executeBash, + type BashInput, + type BashResult, +} from './runtime/bash.ts' + +export { + RuntimeFileWriteInputSchema, + executeRuntimeFileWrite, + resolveSandboxedPath, + type RuntimeFileWriteInput, + type RuntimeFileWriteResult, +} from './runtime/file-write.ts' + +export { + RuntimeFileReadInputSchema, + executeRuntimeFileRead, + type RuntimeFileReadInput, + type RuntimeFileReadResult, +} from './runtime/file-read.ts' + +export { + RuntimeFileEditInputSchema, + executeRuntimeFileEdit, + type RuntimeFileEditInput, + type RuntimeFileEditResult, +} from './runtime/file-edit.ts' + +export { + RuntimeGlobInputSchema, + executeRuntimeGlob, + type RuntimeGlobInput, + type RuntimeGlobResult, +} from './runtime/glob.ts' + +export { + RuntimeGrepInputSchema, + executeRuntimeGrep, + type GrepHit, + type RuntimeGrepInput, + type RuntimeGrepResult, +} from './runtime/grep.ts' diff --git a/packages/tools-core/src/runtime/bash.ts b/packages/tools-core/src/runtime/bash.ts new file mode 100644 index 0000000..c039d42 --- /dev/null +++ b/packages/tools-core/src/runtime/bash.ts @@ -0,0 +1,104 @@ +// Bash — execute a shell command inside an agent's container. +// +// Runs INSIDE the container (called from @agent-forge/runtime). Wraps the +// command with `bash -lc` so simple shell features (pipes, &&, $VAR) just +// work. The cwd is locked to /workspace : the agent never sees anything +// outside its sandbox. A timeout (default 30s) prevents runaway commands +// from blocking the tool loop. +// +// Returns a structured result (stdout, stderr, exitCode, timedOut). The +// caller is responsible for formatting it back into a message the LLM will +// read on the next turn. + +import { spawn } from 'node:child_process' +import { z } from 'zod' + +export const WORKSPACE_DIR = '/workspace' + +// Tests on the host don't have /workspace. The runtime always uses +// WORKSPACE_DIR when running inside the container ; tests can point this +// at a temp dir via FORGE_WORKSPACE. +function bashCwd(): string { + return process.env.FORGE_WORKSPACE ?? WORKSPACE_DIR +} + +export const BashInputSchema = z.object({ + command: z + .string() + .min(1) + .describe( + 'Shell command to execute inside the agent sandbox. Run via `bash -lc`. The current directory is /workspace.', + ), + timeoutMs: z + .number() + .int() + .positive() + .max(120_000) + .optional() + .describe('Hard timeout in milliseconds. Defaults to 30000. Capped at 120000.'), +}) + +export type BashInput = z.infer + +export type BashResult = { + stdout: string + stderr: string + exitCode: number + timedOut: boolean +} + +const DEFAULT_TIMEOUT_MS = 30_000 +// Cap captured streams so a runaway command can't blow the LLM context. +const MAX_OUTPUT_BYTES = 16_384 + +function clip(text: string): string { + if (Buffer.byteLength(text, 'utf8') <= MAX_OUTPUT_BYTES) return text + const head = text.slice(0, MAX_OUTPUT_BYTES) + return `${head}\n…[output truncated at ${MAX_OUTPUT_BYTES.toString()} bytes]` +} + +export async function executeBash(input: BashInput): Promise { + const timeoutMs = input.timeoutMs ?? DEFAULT_TIMEOUT_MS + return await new Promise((resolve) => { + const child = spawn('bash', ['-lc', input.command], { + cwd: bashCwd(), + stdio: ['ignore', 'pipe', 'pipe'], + }) + + let stdout = '' + let stderr = '' + let timedOut = false + + const timer = setTimeout(() => { + timedOut = true + child.kill('SIGKILL') + }, timeoutMs) + + child.stdout.on('data', (b: Buffer) => { + stdout += b.toString('utf8') + }) + child.stderr.on('data', (b: Buffer) => { + stderr += b.toString('utf8') + }) + + child.on('error', (err) => { + clearTimeout(timer) + resolve({ + stdout: clip(stdout), + stderr: clip(`${stderr}${err.message}`), + exitCode: -1, + timedOut, + }) + }) + + child.on('close', (code) => { + clearTimeout(timer) + resolve({ + stdout: clip(stdout), + stderr: clip(stderr), + exitCode: code ?? -1, + timedOut, + }) + }) + }) +} diff --git a/packages/tools-core/src/runtime/file-edit.ts b/packages/tools-core/src/runtime/file-edit.ts new file mode 100644 index 0000000..99e7cdf --- /dev/null +++ b/packages/tools-core/src/runtime/file-edit.ts @@ -0,0 +1,89 @@ +// FileEdit (runtime) — patch a file under /workspace by replacing one +// exact substring with another. Same shape as Claude Code's Edit tool. +// +// The match must be unique unless `replaceAll: true`. This forces the +// LLM to widen its `oldString` window when it's ambiguous, instead of +// guessing which occurrence it meant. + +import { readFileSync, writeFileSync } from 'node:fs' +import { z } from 'zod' +import { resolveSandboxedPath } from './file-write.ts' + +export const RuntimeFileEditInputSchema = z + .object({ + path: z.string().min(1).describe('File path under /workspace.'), + oldString: z + .string() + .min(1) + .describe( + 'Exact substring to find. Must match exactly once unless replaceAll is true.', + ), + newString: z.string().describe('Replacement substring.'), + replaceAll: z + .boolean() + .optional() + .describe('Replace every occurrence. Default false.'), + }) + .refine((v) => v.oldString !== v.newString, { + message: 'oldString and newString must differ', + path: ['newString'], + }) + +export type RuntimeFileEditInput = z.infer + +export type RuntimeFileEditResult = + | { ok: true; absolutePath: string; replacements: number } + | { ok: false; error: string } + +function countOccurrences(haystack: string, needle: string): number { + if (needle.length === 0) return 0 + let count = 0 + let i = 0 + while (true) { + const at = haystack.indexOf(needle, i) + if (at === -1) return count + count += 1 + i = at + needle.length + } +} + +export function executeRuntimeFileEdit( + input: RuntimeFileEditInput, +): RuntimeFileEditResult { + const safe = resolveSandboxedPath(input.path) + if (!safe.ok) return safe + + let original: string + try { + original = readFileSync(safe.absolutePath, 'utf8') + } catch (err) { + return { ok: false, error: err instanceof Error ? err.message : String(err) } + } + + const occurrences = countOccurrences(original, input.oldString) + if (occurrences === 0) { + return { ok: false, error: 'oldString not found in file' } + } + if (occurrences > 1 && !input.replaceAll) { + return { + ok: false, + error: `oldString matches ${occurrences.toString()} times — widen the context or set replaceAll=true`, + } + } + + const updated = input.replaceAll + ? original.split(input.oldString).join(input.newString) + : original.replace(input.oldString, input.newString) + + try { + writeFileSync(safe.absolutePath, updated, 'utf8') + } catch (err) { + return { ok: false, error: err instanceof Error ? err.message : String(err) } + } + + return { + ok: true, + absolutePath: safe.absolutePath, + replacements: input.replaceAll ? occurrences : 1, + } +} diff --git a/packages/tools-core/src/runtime/file-read.ts b/packages/tools-core/src/runtime/file-read.ts new file mode 100644 index 0000000..7e15ba9 --- /dev/null +++ b/packages/tools-core/src/runtime/file-read.ts @@ -0,0 +1,93 @@ +// FileRead (runtime) — read a file under /workspace. +// +// Offset/limit are line-based (matches what an LLM expects when reading +// source files). Output is clipped at 16 KB to protect the LLM context ; +// any further reading should use offset. + +import { readFileSync, statSync } from 'node:fs' +import { z } from 'zod' +import { resolveSandboxedPath } from './file-write.ts' + +export const RuntimeFileReadInputSchema = z.object({ + path: z + .string() + .min(1) + .describe( + 'Path inside the agent sandbox (/workspace). Relative or absolute under /workspace.', + ), + offset: z + .number() + .int() + .min(0) + .optional() + .describe('Line offset (1-based first line of the slice). Default 0.'), + limit: z + .number() + .int() + .positive() + .max(2000) + .optional() + .describe('Max number of lines to return. Default 200, max 2000.'), +}) + +export type RuntimeFileReadInput = z.infer + +export type RuntimeFileReadResult = + | { + ok: true + absolutePath: string + content: string + totalLines: number + returnedLines: number + truncatedBytes: boolean + } + | { ok: false; error: string } + +const DEFAULT_LIMIT = 200 +const MAX_BYTES = 16_384 + +export function executeRuntimeFileRead( + input: RuntimeFileReadInput, +): RuntimeFileReadResult { + const safe = resolveSandboxedPath(input.path) + if (!safe.ok) return safe + + let raw: string + try { + const st = statSync(safe.absolutePath) + if (!st.isFile()) { + return { ok: false, error: `not a regular file : ${safe.absolutePath}` } + } + raw = readFileSync(safe.absolutePath, 'utf8') + } catch (err) { + return { ok: false, error: err instanceof Error ? err.message : String(err) } + } + + const allLines = raw.split('\n') + // Drop the trailing empty element when the file ends with \n so totalLines + // reflects the human count, not split() artifact. + if (allLines.length > 0 && allLines[allLines.length - 1] === '') { + allLines.pop() + } + const totalLines = allLines.length + + const offset = input.offset ?? 0 + const limit = input.limit ?? DEFAULT_LIMIT + const slice = allLines.slice(offset, offset + limit) + let content = slice.join('\n') + + let truncatedBytes = false + if (Buffer.byteLength(content, 'utf8') > MAX_BYTES) { + truncatedBytes = true + content = `${content.slice(0, MAX_BYTES)}\n…[output truncated at ${MAX_BYTES.toString()} bytes — use offset/limit for the rest]` + } + + return { + ok: true, + absolutePath: safe.absolutePath, + content, + totalLines, + returnedLines: slice.length, + truncatedBytes, + } +} diff --git a/packages/tools-core/src/runtime/file-write.ts b/packages/tools-core/src/runtime/file-write.ts new file mode 100644 index 0000000..f568eee --- /dev/null +++ b/packages/tools-core/src/runtime/file-write.ts @@ -0,0 +1,76 @@ +// FileWrite (runtime) — write a file under /workspace from inside the +// agent's container. +// +// Distinct from packages/tools-core/src/file-write.ts which writes under +// the host's ~/.agent-forge/. The runtime version is sandboxed to +// /workspace : the agent has no way to escape its container's mount. +// +// Path traversal (..), null bytes, and absolute paths outside /workspace +// are refused. Existing files are overwritten by default — unlike the +// host tool which is strict — because in-sandbox iteration is expected +// (agents often rewrite their own files mid-loop). +// +// The sandbox root defaults to /workspace (the in-container mount) but +// can be overridden via FORGE_WORKSPACE — useful for tests that want to +// run on the host without touching /workspace. + +import { mkdirSync, writeFileSync } from 'node:fs' +import { dirname, isAbsolute, join, resolve } from 'node:path' +import { z } from 'zod' +import { WORKSPACE_DIR } from './bash.ts' + +function sandboxRoot(): string { + return process.env.FORGE_WORKSPACE ?? WORKSPACE_DIR +} + +export const RuntimeFileWriteInputSchema = z.object({ + path: z + .string() + .min(1) + .describe( + 'Path inside the agent sandbox (/workspace). Either relative ("notes.md") or absolute under /workspace ("/workspace/src/index.ts"). Paths outside /workspace are rejected.', + ), + content: z.string().describe('Full file content to write.'), +}) + +export type RuntimeFileWriteInput = z.infer + +export type RuntimeFileWriteResult = + | { ok: true; absolutePath: string; bytes: number } + | { ok: false; error: string } + +export function resolveSandboxedPath(rawPath: string): + | { ok: true; absolutePath: string } + | { ok: false; error: string } { + if (rawPath.includes('\0')) { + return { ok: false, error: 'path contains a null byte' } + } + const root = sandboxRoot() + const target = isAbsolute(rawPath) ? rawPath : join(root, rawPath) + const resolved = resolve(target) + if (resolved !== root && !resolved.startsWith(`${root}/`)) { + return { + ok: false, + error: `path escapes the agent sandbox (${root})`, + } + } + return { ok: true, absolutePath: resolved } +} + +export function executeRuntimeFileWrite( + input: RuntimeFileWriteInput, +): RuntimeFileWriteResult { + const safe = resolveSandboxedPath(input.path) + if (!safe.ok) return safe + try { + mkdirSync(dirname(safe.absolutePath), { recursive: true }) + writeFileSync(safe.absolutePath, input.content, 'utf8') + return { + ok: true, + absolutePath: safe.absolutePath, + bytes: Buffer.byteLength(input.content, 'utf8'), + } + } catch (err) { + return { ok: false, error: err instanceof Error ? err.message : String(err) } + } +} diff --git a/packages/tools-core/src/runtime/glob.ts b/packages/tools-core/src/runtime/glob.ts new file mode 100644 index 0000000..7692037 --- /dev/null +++ b/packages/tools-core/src/runtime/glob.ts @@ -0,0 +1,123 @@ +// Glob (runtime) — find files matching a glob pattern under /workspace. +// +// Hand-rolled to avoid adding a dependency to the in-container bundle. +// Supports the patterns LLMs actually use : `*`, `**`, `?`. No braces, +// no character classes — those rarely appear in agent-emitted patterns +// and would just bloat the parser. +// +// Returns relative paths (from the sandbox root) sorted alphabetically. +// Capped at 200 results. + +import { readdirSync, statSync } from 'node:fs' +import { join, relative, resolve, sep } from 'node:path' +import { z } from 'zod' +import { resolveSandboxedPath } from './file-write.ts' + +export const RuntimeGlobInputSchema = z.object({ + pattern: z + .string() + .min(1) + .describe( + 'Glob pattern relative to /workspace. Supports *, **, and ?. Example : "src/**/*.ts".', + ), +}) + +export type RuntimeGlobInput = z.infer + +export type RuntimeGlobResult = + | { ok: true; matches: string[]; truncated: boolean } + | { ok: false; error: string } + +const MAX_MATCHES = 200 +const MAX_WALK_NODES = 5000 + +// Convert a glob to a RegExp anchored at the start, allowing partial +// path-segment matches. Each segment is converted independently and +// joined with `/`. +function globToRegex(pattern: string): RegExp { + // Normalize : split on / and process per segment. + const parts = pattern.split('/') + const out: string[] = [] + for (const part of parts) { + if (part === '**') { + out.push('(?:.*?)') + continue + } + let segment = '' + for (const ch of part) { + if (ch === '*') segment += '[^/]*' + else if (ch === '?') segment += '[^/]' + else if (/[.+^${}()|[\]\\]/.test(ch)) segment += `\\${ch}` + else segment += ch + } + out.push(segment) + } + // Glue : `/` between regular segments, but `**` already swallows separators. + let glued = '' + for (let i = 0; i < out.length; i += 1) { + const part = out[i] as string + if (i === 0) { + glued = part + continue + } + const prev = out[i - 1] + if (prev === '(?:.*?)' || part === '(?:.*?)') glued += part + else glued += `/${part}` + } + return new RegExp(`^${glued}$`) +} + +// Walk a directory tree and return relative POSIX paths of all FILES. +// Bounded by MAX_WALK_NODES to protect against pathological trees. +function walk(root: string): string[] { + const out: string[] = [] + const stack: string[] = [root] + let visited = 0 + while (stack.length > 0 && visited < MAX_WALK_NODES) { + const dir = stack.pop() as string + let entries: string[] + try { + entries = readdirSync(dir) + } catch { + continue + } + for (const name of entries) { + visited += 1 + if (visited >= MAX_WALK_NODES) break + const full = join(dir, name) + let st: ReturnType + try { + st = statSync(full) + } catch { + continue + } + if (st.isDirectory()) { + stack.push(full) + } else if (st.isFile()) { + const rel = relative(root, full).split(sep).join('/') + out.push(rel) + } + } + } + return out +} + +export function executeRuntimeGlob( + input: RuntimeGlobInput, +): RuntimeGlobResult { + // Resolve sandbox root via a dummy path : ensures we use the same + // FORGE_WORKSPACE override as the other runtime tools. + const safeRoot = resolveSandboxedPath('.') + if (!safeRoot.ok) return safeRoot + const root = resolve(safeRoot.absolutePath) + + const re = globToRegex(input.pattern) + const all = walk(root) + const matched = all.filter((p) => re.test(p)).sort() + const truncated = matched.length > MAX_MATCHES + return { + ok: true, + matches: truncated ? matched.slice(0, MAX_MATCHES) : matched, + truncated, + } +} diff --git a/packages/tools-core/src/runtime/grep.ts b/packages/tools-core/src/runtime/grep.ts new file mode 100644 index 0000000..b7ae6e8 --- /dev/null +++ b/packages/tools-core/src/runtime/grep.ts @@ -0,0 +1,106 @@ +// Grep (runtime) — regex search across files under /workspace. +// +// Pure JS, no ripgrep dependency : the alpine container doesn't ship rg +// by default and we don't want to bloat the image just for this. For a +// POC the trade-off is fine ; if it becomes a bottleneck we'll bind-mount +// rg later. +// +// The pattern is a JavaScript RegExp source. Files are filtered by an +// optional glob to keep the scan bounded. Binary-looking content +// (NUL bytes in the first 4 KB) is skipped. + +import { readFileSync } from 'node:fs' +import { join } from 'node:path' +import { z } from 'zod' +import { resolveSandboxedPath } from './file-write.ts' +import { executeRuntimeGlob } from './glob.ts' + +export const RuntimeGrepInputSchema = z.object({ + pattern: z + .string() + .min(1) + .describe('JavaScript RegExp source. Example : "TODO|FIXME".'), + glob: z + .string() + .optional() + .describe( + 'Optional file pattern relative to /workspace (e.g. "src/**/*.ts"). Defaults to "**/*".', + ), + ignoreCase: z.boolean().optional().describe('Case-insensitive match. Default false.'), +}) + +export type RuntimeGrepInput = z.infer + +export type GrepHit = { path: string; line: number; text: string } + +export type RuntimeGrepResult = + | { ok: true; hits: GrepHit[]; truncated: boolean; scanned: number } + | { ok: false; error: string } + +const MAX_HITS = 200 +const MAX_LINE_LEN = 400 // clip long lines so a minified file doesn't blow context +const MAX_FILE_BYTES = 1_048_576 // skip files > 1 MB + +function looksBinary(buf: Buffer): boolean { + const limit = Math.min(buf.length, 4096) + for (let i = 0; i < limit; i += 1) { + if (buf[i] === 0) return true + } + return false +} + +export function executeRuntimeGrep( + input: RuntimeGrepInput, +): RuntimeGrepResult { + let re: RegExp + try { + re = new RegExp(input.pattern, input.ignoreCase ? 'i' : undefined) + } catch (err) { + return { ok: false, error: `invalid regex : ${err instanceof Error ? err.message : String(err)}` } + } + + const safeRoot = resolveSandboxedPath('.') + if (!safeRoot.ok) return safeRoot + + const filesResult = executeRuntimeGlob({ pattern: input.glob ?? '**/*' }) + if (!filesResult.ok) return filesResult + + const hits: GrepHit[] = [] + let truncated = false + let scanned = 0 + + for (const rel of filesResult.matches) { + if (hits.length >= MAX_HITS) { + truncated = true + break + } + const abs = join(safeRoot.absolutePath, rel) + let buf: Buffer + try { + buf = readFileSync(abs) + } catch { + continue + } + if (buf.length > MAX_FILE_BYTES) continue + if (looksBinary(buf)) continue + scanned += 1 + const text = buf.toString('utf8') + const lines = text.split('\n') + for (let i = 0; i < lines.length; i += 1) { + const line = lines[i] as string + if (re.test(line)) { + hits.push({ + path: rel, + line: i + 1, + text: line.length > MAX_LINE_LEN ? `${line.slice(0, MAX_LINE_LEN)}…` : line, + }) + if (hits.length >= MAX_HITS) { + truncated = true + break + } + } + } + } + + return { ok: true, hits, truncated, scanned } +} diff --git a/packages/tools-core/tests/runtime-bash.test.ts b/packages/tools-core/tests/runtime-bash.test.ts new file mode 100644 index 0000000..b0b0d10 --- /dev/null +++ b/packages/tools-core/tests/runtime-bash.test.ts @@ -0,0 +1,52 @@ +// Round-trip tests for the runtime-side Bash tool. +// Uses FORGE_WORKSPACE so the cwd is a temp dir, not /workspace. + +import { afterAll, beforeAll, describe, expect, test } from 'bun:test' +import { mkdtempSync, rmSync, writeFileSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' + +let TMP_WORKSPACE: string +const ORIGINAL_ENV = process.env.FORGE_WORKSPACE + +beforeAll(() => { + TMP_WORKSPACE = mkdtempSync(join(tmpdir(), 'forge-rt-bash-')) + process.env.FORGE_WORKSPACE = TMP_WORKSPACE +}) + +afterAll(() => { + if (ORIGINAL_ENV === undefined) delete process.env.FORGE_WORKSPACE + else process.env.FORGE_WORKSPACE = ORIGINAL_ENV + rmSync(TMP_WORKSPACE, { recursive: true, force: true }) +}) + +const { executeBash } = await import('../src/runtime/bash.ts') + +describe('executeBash', () => { + test('captures stdout from a simple command', async () => { + const r = await executeBash({ command: 'echo hello' }) + expect(r.exitCode).toBe(0) + expect(r.stdout.trim()).toBe('hello') + expect(r.stderr).toBe('') + expect(r.timedOut).toBe(false) + }) + + test('captures stderr and a non-zero exit code', async () => { + const r = await executeBash({ command: 'echo oops 1>&2 ; exit 7' }) + expect(r.exitCode).toBe(7) + expect(r.stderr.trim()).toBe('oops') + }) + + test('runs in the sandbox cwd', async () => { + writeFileSync(join(TMP_WORKSPACE, 'marker.txt'), 'present') + const r = await executeBash({ command: 'cat marker.txt' }) + expect(r.exitCode).toBe(0) + expect(r.stdout).toBe('present') + }) + + test('honors a tight timeout', async () => { + const r = await executeBash({ command: 'sleep 5', timeoutMs: 200 }) + expect(r.timedOut).toBe(true) + expect(r.exitCode).not.toBe(0) + }) +}) diff --git a/packages/tools-core/tests/runtime-file-edit.test.ts b/packages/tools-core/tests/runtime-file-edit.test.ts new file mode 100644 index 0000000..37d0ddb --- /dev/null +++ b/packages/tools-core/tests/runtime-file-edit.test.ts @@ -0,0 +1,86 @@ +import { afterAll, beforeAll, describe, expect, test } from 'bun:test' +import { mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' + +let TMP_WORKSPACE: string +const ORIGINAL_ENV = process.env.FORGE_WORKSPACE + +beforeAll(() => { + TMP_WORKSPACE = mkdtempSync(join(tmpdir(), 'forge-rt-fe-')) + process.env.FORGE_WORKSPACE = TMP_WORKSPACE +}) + +afterAll(() => { + if (ORIGINAL_ENV === undefined) delete process.env.FORGE_WORKSPACE + else process.env.FORGE_WORKSPACE = ORIGINAL_ENV + rmSync(TMP_WORKSPACE, { recursive: true, force: true }) +}) + +const { executeRuntimeFileEdit } = await import('../src/runtime/file-edit.ts') + +describe('executeRuntimeFileEdit', () => { + test('replaces a unique substring', () => { + const path = join(TMP_WORKSPACE, 'a.ts') + writeFileSync(path, 'const x = 1\nconst y = 2\n') + const r = executeRuntimeFileEdit({ + path: 'a.ts', + oldString: 'const x = 1', + newString: 'const x = 42', + }) + expect(r.ok).toBe(true) + if (r.ok) { + expect(r.replacements).toBe(1) + expect(readFileSync(path, 'utf8')).toBe('const x = 42\nconst y = 2\n') + } + }) + + test('refuses ambiguous match without replaceAll', () => { + const path = join(TMP_WORKSPACE, 'b.ts') + writeFileSync(path, 'foo\nfoo\n') + const r = executeRuntimeFileEdit({ + path: 'b.ts', + oldString: 'foo', + newString: 'bar', + }) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.error).toContain('matches 2 times') + }) + + test('replaceAll handles every occurrence', () => { + const path = join(TMP_WORKSPACE, 'c.ts') + writeFileSync(path, 'foo\nfoo\nfoo\n') + const r = executeRuntimeFileEdit({ + path: 'c.ts', + oldString: 'foo', + newString: 'bar', + replaceAll: true, + }) + expect(r.ok).toBe(true) + if (r.ok) { + expect(r.replacements).toBe(3) + expect(readFileSync(path, 'utf8')).toBe('bar\nbar\nbar\n') + } + }) + + test('returns an error when oldString is missing', () => { + const path = join(TMP_WORKSPACE, 'd.ts') + writeFileSync(path, 'hello') + const r = executeRuntimeFileEdit({ + path: 'd.ts', + oldString: 'goodbye', + newString: 'bye', + }) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.error).toContain('not found') + }) + + test('refuses path outside the sandbox', () => { + const r = executeRuntimeFileEdit({ + path: '../escape', + oldString: 'a', + newString: 'b', + }) + expect(r.ok).toBe(false) + }) +}) diff --git a/packages/tools-core/tests/runtime-file-read.test.ts b/packages/tools-core/tests/runtime-file-read.test.ts new file mode 100644 index 0000000..e1c6374 --- /dev/null +++ b/packages/tools-core/tests/runtime-file-read.test.ts @@ -0,0 +1,55 @@ +import { afterAll, beforeAll, describe, expect, test } from 'bun:test' +import { mkdtempSync, rmSync, writeFileSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' + +let TMP_WORKSPACE: string +const ORIGINAL_ENV = process.env.FORGE_WORKSPACE + +beforeAll(() => { + TMP_WORKSPACE = mkdtempSync(join(tmpdir(), 'forge-rt-fr-')) + process.env.FORGE_WORKSPACE = TMP_WORKSPACE +}) + +afterAll(() => { + if (ORIGINAL_ENV === undefined) delete process.env.FORGE_WORKSPACE + else process.env.FORGE_WORKSPACE = ORIGINAL_ENV + rmSync(TMP_WORKSPACE, { recursive: true, force: true }) +}) + +const { executeRuntimeFileRead } = await import('../src/runtime/file-read.ts') + +describe('executeRuntimeFileRead', () => { + test('reads the full file when no offset/limit', () => { + writeFileSync(join(TMP_WORKSPACE, 'a.txt'), 'one\ntwo\nthree\n') + const r = executeRuntimeFileRead({ path: 'a.txt' }) + expect(r.ok).toBe(true) + if (r.ok) { + expect(r.content).toBe('one\ntwo\nthree') + expect(r.totalLines).toBe(3) + expect(r.returnedLines).toBe(3) + } + }) + + test('honors offset and limit', () => { + const lines = Array.from({ length: 10 }, (_, i) => `line${(i + 1).toString()}`).join('\n') + writeFileSync(join(TMP_WORKSPACE, 'b.txt'), lines) + const r = executeRuntimeFileRead({ path: 'b.txt', offset: 3, limit: 4 }) + expect(r.ok).toBe(true) + if (r.ok) { + expect(r.content).toBe('line4\nline5\nline6\nline7') + expect(r.totalLines).toBe(10) + expect(r.returnedLines).toBe(4) + } + }) + + test('rejects path outside the sandbox', () => { + const r = executeRuntimeFileRead({ path: '../escape.txt' }) + expect(r.ok).toBe(false) + }) + + test('returns an error for missing files', () => { + const r = executeRuntimeFileRead({ path: 'nope.txt' }) + expect(r.ok).toBe(false) + }) +}) diff --git a/packages/tools-core/tests/runtime-file-write.test.ts b/packages/tools-core/tests/runtime-file-write.test.ts new file mode 100644 index 0000000..8087566 --- /dev/null +++ b/packages/tools-core/tests/runtime-file-write.test.ts @@ -0,0 +1,99 @@ +// Security and round-trip tests for the runtime-side FileWrite tool. +// Uses FORGE_WORKSPACE to point the sandbox at a temp dir so the tests +// don't try to write to /workspace on the host. + +import { afterAll, afterEach, beforeAll, describe, expect, test } from 'bun:test' +import { existsSync, mkdtempSync, readFileSync, rmSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' + +let TMP_WORKSPACE: string +const ORIGINAL_ENV = process.env.FORGE_WORKSPACE + +beforeAll(() => { + TMP_WORKSPACE = mkdtempSync(join(tmpdir(), 'forge-rt-fw-')) + process.env.FORGE_WORKSPACE = TMP_WORKSPACE +}) + +afterAll(() => { + if (ORIGINAL_ENV === undefined) delete process.env.FORGE_WORKSPACE + else process.env.FORGE_WORKSPACE = ORIGINAL_ENV + rmSync(TMP_WORKSPACE, { recursive: true, force: true }) +}) + +// Late import so module-level reads of process.env happen after we set it. +const { + executeRuntimeFileWrite, + resolveSandboxedPath, +} = await import('../src/runtime/file-write.ts') + +afterEach(() => { + // Wipe contents but keep the dir itself so the env var stays valid. + for (const entry of [ + 'a.txt', + 'sub/b.txt', + 'sub', + 'overwrite-me.txt', + ]) { + const p = join(TMP_WORKSPACE, entry) + if (existsSync(p)) rmSync(p, { recursive: true, force: true }) + } +}) + +describe('resolveSandboxedPath (runtime)', () => { + test('accepts a relative path under the sandbox', () => { + const r = resolveSandboxedPath('a.txt') + expect(r.ok).toBe(true) + if (r.ok) expect(r.absolutePath).toBe(join(TMP_WORKSPACE, 'a.txt')) + }) + + test('rejects path traversal', () => { + const r = resolveSandboxedPath('../escape.txt') + expect(r.ok).toBe(false) + }) + + test('rejects absolute path outside the sandbox', () => { + const r = resolveSandboxedPath('/etc/passwd') + expect(r.ok).toBe(false) + }) + + test('rejects null byte', () => { + const r = resolveSandboxedPath('foo\0bar') + expect(r.ok).toBe(false) + }) +}) + +describe('executeRuntimeFileWrite', () => { + test('writes a file in the sandbox', () => { + const r = executeRuntimeFileWrite({ path: 'a.txt', content: 'hi' }) + expect(r.ok).toBe(true) + if (r.ok) { + expect(readFileSync(r.absolutePath, 'utf8')).toBe('hi') + expect(r.bytes).toBe(2) + } + }) + + test('creates parent directories', () => { + const r = executeRuntimeFileWrite({ + path: 'sub/b.txt', + content: 'nested', + }) + expect(r.ok).toBe(true) + if (r.ok) expect(readFileSync(r.absolutePath, 'utf8')).toBe('nested') + }) + + test('overwrites an existing file', () => { + executeRuntimeFileWrite({ path: 'overwrite-me.txt', content: 'v1' }) + const r = executeRuntimeFileWrite({ path: 'overwrite-me.txt', content: 'v2' }) + expect(r.ok).toBe(true) + if (r.ok) expect(readFileSync(r.absolutePath, 'utf8')).toBe('v2') + }) + + test('refuses path escaping the sandbox', () => { + const r = executeRuntimeFileWrite({ + path: '../evil.txt', + content: 'x', + }) + expect(r.ok).toBe(false) + }) +}) diff --git a/packages/tools-core/tests/runtime-glob.test.ts b/packages/tools-core/tests/runtime-glob.test.ts new file mode 100644 index 0000000..cdfe6ec --- /dev/null +++ b/packages/tools-core/tests/runtime-glob.test.ts @@ -0,0 +1,53 @@ +import { afterAll, beforeAll, describe, expect, test } from 'bun:test' +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' + +let TMP_WORKSPACE: string +const ORIGINAL_ENV = process.env.FORGE_WORKSPACE + +beforeAll(() => { + TMP_WORKSPACE = mkdtempSync(join(tmpdir(), 'forge-rt-gl-')) + process.env.FORGE_WORKSPACE = TMP_WORKSPACE + mkdirSync(join(TMP_WORKSPACE, 'src/sub'), { recursive: true }) + writeFileSync(join(TMP_WORKSPACE, 'src/index.ts'), '') + writeFileSync(join(TMP_WORKSPACE, 'src/sub/util.ts'), '') + writeFileSync(join(TMP_WORKSPACE, 'src/sub/util.test.ts'), '') + writeFileSync(join(TMP_WORKSPACE, 'README.md'), '') +}) + +afterAll(() => { + if (ORIGINAL_ENV === undefined) delete process.env.FORGE_WORKSPACE + else process.env.FORGE_WORKSPACE = ORIGINAL_ENV + rmSync(TMP_WORKSPACE, { recursive: true, force: true }) +}) + +const { executeRuntimeGlob } = await import('../src/runtime/glob.ts') + +describe('executeRuntimeGlob', () => { + test('matches all .ts files recursively with **/*.ts', () => { + const r = executeRuntimeGlob({ pattern: '**/*.ts' }) + expect(r.ok).toBe(true) + if (r.ok) { + expect(r.matches).toEqual(['src/index.ts', 'src/sub/util.test.ts', 'src/sub/util.ts']) + } + }) + + test('matches a single segment with src/*.ts', () => { + const r = executeRuntimeGlob({ pattern: 'src/*.ts' }) + expect(r.ok).toBe(true) + if (r.ok) expect(r.matches).toEqual(['src/index.ts']) + }) + + test('matches with ? for single char', () => { + const r = executeRuntimeGlob({ pattern: 'README.m?' }) + expect(r.ok).toBe(true) + if (r.ok) expect(r.matches).toEqual(['README.md']) + }) + + test('returns empty when nothing matches', () => { + const r = executeRuntimeGlob({ pattern: '**/*.rs' }) + expect(r.ok).toBe(true) + if (r.ok) expect(r.matches).toEqual([]) + }) +}) diff --git a/packages/tools-core/tests/runtime-grep.test.ts b/packages/tools-core/tests/runtime-grep.test.ts new file mode 100644 index 0000000..4e711ba --- /dev/null +++ b/packages/tools-core/tests/runtime-grep.test.ts @@ -0,0 +1,62 @@ +import { afterAll, beforeAll, describe, expect, test } from 'bun:test' +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' + +let TMP_WORKSPACE: string +const ORIGINAL_ENV = process.env.FORGE_WORKSPACE + +beforeAll(() => { + TMP_WORKSPACE = mkdtempSync(join(tmpdir(), 'forge-rt-gr-')) + process.env.FORGE_WORKSPACE = TMP_WORKSPACE + mkdirSync(join(TMP_WORKSPACE, 'src'), { recursive: true }) + writeFileSync( + join(TMP_WORKSPACE, 'src/index.ts'), + '// TODO: implement\nexport const x = 1\n// fixme later\n', + ) + writeFileSync(join(TMP_WORKSPACE, 'src/util.ts'), 'export const todo = "x"\n') + writeFileSync(join(TMP_WORKSPACE, 'README.md'), '# project\nTODO: write docs\n') +}) + +afterAll(() => { + if (ORIGINAL_ENV === undefined) delete process.env.FORGE_WORKSPACE + else process.env.FORGE_WORKSPACE = ORIGINAL_ENV + rmSync(TMP_WORKSPACE, { recursive: true, force: true }) +}) + +const { executeRuntimeGrep } = await import('../src/runtime/grep.ts') + +describe('executeRuntimeGrep', () => { + test('finds case-sensitive matches across files', () => { + const r = executeRuntimeGrep({ pattern: 'TODO' }) + expect(r.ok).toBe(true) + if (r.ok) { + const paths = r.hits.map((h) => h.path).sort() + expect(paths).toEqual(['README.md', 'src/index.ts']) + } + }) + + test('honors ignoreCase', () => { + const r = executeRuntimeGrep({ pattern: 'todo', ignoreCase: true }) + expect(r.ok).toBe(true) + if (r.ok) { + const paths = r.hits.map((h) => h.path).sort() + // util.ts matches via "const todo", index.ts via TODO, README.md via TODO. + expect(paths).toEqual(['README.md', 'src/index.ts', 'src/util.ts']) + } + }) + + test('respects the glob filter', () => { + const r = executeRuntimeGrep({ pattern: 'TODO', glob: '**/*.md' }) + expect(r.ok).toBe(true) + if (r.ok) { + const paths = r.hits.map((h) => h.path) + expect(paths).toEqual(['README.md']) + } + }) + + test('returns an error for an invalid regex', () => { + const r = executeRuntimeGrep({ pattern: '(' }) + expect(r.ok).toBe(false) + }) +})