diff --git a/packages/cli/src/builder-actions.ts b/packages/cli/src/builder-actions.ts
index 8858c2a..889ce4c 100644
--- a/packages/cli/src/builder-actions.ts
+++ b/packages/cli/src/builder-actions.ts
@@ -144,17 +144,57 @@ export type RunActionExecution = {
export type ActionExecution = WriteActionExecution | RunActionExecution
+function quoteUnsafeDescription(content: string): string {
+ // Small models commonly write a `description` value containing a colon
+ // (e.g. "Étape 1 : ..." or "...timeout: 60s..."), which YAML mis-parses
+ // as a nested mapping and chokes the whole frontmatter. Detect that case
+ // and wrap the value in double quotes ; the parser then reads it as a
+ // plain string.
+ const lines = content.split('\n')
+ let inFrontmatter = false
+ let fmFenceCount = 0
+ for (let i = 0; i < lines.length; i += 1) {
+ const line = lines[i] as string
+ if (line.trim() === '---') {
+ fmFenceCount += 1
+ inFrontmatter = fmFenceCount === 1
+ if (fmFenceCount === 2) break
+ continue
+ }
+ if (!inFrontmatter) continue
+ const m = /^(\s*description\s*:\s*)(.*)$/.exec(line)
+ if (!m) continue
+ const prefix = m[1] as string
+ const value = (m[2] as string).trim()
+ if (value.length === 0) continue
+ // Already quoted ? leave it alone.
+ if (
+ (value.startsWith('"') && value.endsWith('"')) ||
+ (value.startsWith("'") && value.endsWith("'"))
+ ) {
+ continue
+ }
+ if (!value.includes(':')) continue
+ // Escape any embedded double quotes so the wrap stays valid.
+ const safe = value.replace(/"/g, '\\"')
+ lines[i] = `${prefix}"${safe}"`
+ }
+ return lines.join('\n')
+}
+
function normalizeAgentMd(content: string): string {
// Small models often confuse the protocol separator (`---` between path
// and content) with the YAML frontmatter opener and forget to write a
// leading `---`. If the content looks like raw frontmatter (starts with a
// recognized key), prepend `---` so it parses cleanly.
const trimmed = content.replace(/^\s+/, '')
- if (trimmed.startsWith('---')) return content
- if (/^(name|description|model|sandbox|maxTurns)\s*:/m.test(trimmed)) {
- return `---\n${content.replace(/^\s+/, '')}`
+ let normalized = content
+ if (!trimmed.startsWith('---')) {
+ if (/^(name|description|model|sandbox|maxTurns)\s*:/m.test(trimmed)) {
+ normalized = `---\n${content.replace(/^\s+/, '')}`
+ }
}
- return content
+ return quoteUnsafeDescription(normalized)
}
const AGENT_PATH_RE = /^(agents\/[a-z][a-z0-9-]*)\/[^/]+$/
diff --git a/packages/cli/src/components/App.tsx b/packages/cli/src/components/App.tsx
index 4687356..c85a8c9 100644
--- a/packages/cli/src/components/App.tsx
+++ b/packages/cli/src/components/App.tsx
@@ -9,11 +9,16 @@
// └──────────────┘ ← terminal bottom (FIXED)
//
// PgUp / PgDn / Ctrl+E scroll the chat transcript inside Welcome.
+// Tab / Shift+Tab cycle focus through Mission Control cards (only when
+// the prompt input is empty so it doesn't fight TextInput). Enter on a
+// focused card opens a full-screen CardDetail view ; Esc closes it.
import { Box, useInput, useStdin } from 'ink'
import React from 'react'
import { useChatContext } from '../hooks/useChatContext.tsx'
+import { useCardFocus } from '../hooks/useCardFocus.ts'
import { useLanguage } from '../i18n/LanguageContext.tsx'
+import { CardDetail } from './CardDetail.tsx'
import { MissionControl } from './MissionControl.tsx'
import { ProviderLogo } from './ProviderLogo.tsx'
import { Splash } from './Splash.tsx'
@@ -22,25 +27,65 @@ import { Welcome } from './Welcome.tsx'
export function App(): React.JSX.Element {
const { lang } = useLanguage()
const { isRawModeSupported } = useStdin()
- const { scrollUp, scrollDown, scrollToBottom, pending, state } = useChatContext()
+ const { scrollUp, scrollDown, scrollToBottom, pending, state, promptDraft } =
+ useChatContext()
+ const focus = useCardFocus(state.actions)
const rows = process.stdout.rows ?? 30
const cols = process.stdout.columns ?? 80
const hasPending = pending !== null
const hasActions = state.actions.length > 0
+ const promptIsEmpty = promptDraft.length === 0
+
+ // Tab/Enter is only meaningful when there are actions, the prompt is
+ // empty (so TextInput doesn't lose its keystrokes), and no permission
+ // dialog is showing.
+ const cardKeysActive =
+ isRawModeSupported &&
+ lang !== null &&
+ !focus.detailOpen &&
+ !hasPending &&
+ hasActions &&
+ promptIsEmpty
useInput(
- (_, key) => {
+ (input, key) => {
if (key.pageUp) scrollUp()
else if (key.pageDown) scrollDown()
- else if (key.ctrl && _ === 'e') scrollToBottom()
+ else if (key.ctrl && input === 'e') scrollToBottom()
+ else if (cardKeysActive && key.tab && key.shift) focus.cycleBack()
+ else if (cardKeysActive && key.tab) focus.cycle()
+ else if (cardKeysActive && key.return) focus.open()
+ // Esc clears the card focus (only when something is focused and
+ // the prompt is empty, so we never swallow an Esc the user meant
+ // for cancelling input).
+ else if (
+ key.escape &&
+ promptIsEmpty &&
+ !hasPending &&
+ focus.focusedId !== null
+ ) {
+ focus.clearFocus()
+ }
},
- { isActive: isRawModeSupported && lang !== null },
+ { isActive: isRawModeSupported && lang !== null && !focus.detailOpen },
)
+ // Detail view : modal full-screen replacement.
+ if (focus.detailOpen && focus.focusedId !== null) {
+ const action = state.actions.find((a) => a.id === focus.focusedId)
+ if (action) {
+ return
+ }
+ }
+
return (
- {hasActions ? : }
+ {hasActions ? (
+
+ ) : (
+
+ )}
{/* Spacer pushes Welcome to the bottom AND parks the provider logo
at the bottom-right of the top zone (just above the Welcome
diff --git a/packages/cli/src/components/CardDetail.tsx b/packages/cli/src/components/CardDetail.tsx
new file mode 100644
index 0000000..986480f
--- /dev/null
+++ b/packages/cli/src/components/CardDetail.tsx
@@ -0,0 +1,157 @@
+// Full-screen detail view for a single Mission Control action.
+//
+// Mounted by App when useCardFocus reports detailOpen=true. Replaces
+// both Mission Control AND Welcome — the user gets the entire screen
+// to read the full content of the action they pressed Enter on.
+//
+// Scrolls line-by-line with PgUp / PgDn / arrow up/down. Esc closes.
+
+import { Box, Text, useInput } from 'ink'
+import React, { useState } from 'react'
+import type { Action, ActionStatus, RunAction, WriteAction } from '../actions/types.ts'
+import { C } from '../theme/colors.ts'
+import {
+ type HighlightedLine,
+ type Segment,
+ highlightPlain,
+ highlightYamlText,
+} from './syntax.ts'
+
+const STATUS_LABEL: Record = {
+ proposed: 'PROPOSED',
+ approved: 'APPROVED',
+ running: 'RUNNING',
+ done: 'DONE',
+ failed: 'FAILED',
+ declined: 'DECLINED',
+}
+
+const STATUS_COLOR: Record = {
+ proposed: C.orange,
+ approved: C.orangeBright,
+ running: C.yellow,
+ done: C.green,
+ failed: C.red,
+ declined: C.grey,
+}
+
+function buildLines(action: Action): HighlightedLine[] {
+ if (action.kind === 'write') {
+ return highlightYamlText(action.content)
+ }
+ // run : prompt then output
+ const out: HighlightedLine[] = []
+ out.push([{ text: '── prompt ──', color: C.grey, dim: true }])
+ out.push(...highlightPlain(action.prompt))
+ out.push([{ text: '' }])
+ out.push([{ text: '── output ──', color: C.grey, dim: true }])
+ if (action.output.length > 0) {
+ out.push(...highlightPlain(action.output))
+ } else {
+ out.push([{ text: '(empty)', color: C.grey, dim: true }])
+ }
+ if (action.status === 'failed' && action.error) {
+ out.push([{ text: '' }])
+ out.push([{ text: `✗ ${action.error}`, color: C.red }])
+ }
+ return out
+}
+
+function headerFor(action: Action): string {
+ if (action.kind === 'write') return `write ${action.path}`
+ return `run ${action.agent}`
+}
+
+export function CardDetail({
+ action,
+ onClose,
+}: {
+ action: Action
+ onClose: () => void
+}): React.JSX.Element {
+ const rows = process.stdout.rows ?? 30
+ const cols = process.stdout.columns ?? 80
+ const lines = buildLines(action)
+
+ // Reserve : 2 rows for the title bar, 2 rows for the footer hint, 1
+ // separator. Body gets the rest.
+ const bodyHeight = Math.max(5, rows - 5)
+ const [offset, setOffset] = useState(0)
+ const maxOffset = Math.max(0, lines.length - bodyHeight)
+
+ useInput((input, key) => {
+ if (key.escape || input === 'q') {
+ onClose()
+ return
+ }
+ if (key.pageUp) setOffset((o) => Math.max(0, o - bodyHeight))
+ else if (key.pageDown) setOffset((o) => Math.min(maxOffset, o + bodyHeight))
+ else if (key.upArrow) setOffset((o) => Math.max(0, o - 1))
+ else if (key.downArrow) setOffset((o) => Math.min(maxOffset, o + 1))
+ else if (input === 'g') setOffset(0)
+ else if (input === 'G') setOffset(maxOffset)
+ })
+
+ const visible = lines.slice(offset, offset + bodyHeight)
+ const totalLines = lines.length
+ const lastShown = Math.min(totalLines, offset + bodyHeight)
+
+ return (
+
+ {/* Title bar */}
+
+
+ {`[${STATUS_LABEL[action.status]}]`}
+
+
+ {' detail '}
+
+ {headerFor(action)}
+
+
+ {'─'.repeat(cols)}
+
+
+ {/* Body */}
+
+ {visible.map((segments: HighlightedLine, i: number) => {
+ const lineNo = offset + i + 1
+ return (
+
+
+ {`${lineNo.toString().padStart(4, ' ')} `}
+
+ {segments.map((seg: Segment, j: number) => (
+
+ {seg.text}
+
+ ))}
+
+ )
+ })}
+
+
+ {/* Footer */}
+
+ {'─'.repeat(cols)}
+
+
+
+
+ {`lines ${(offset + 1).toString()}..${lastShown.toString()} of ${totalLines.toString()}`}
+
+
+
+
+ {'[↑↓ / PgUp/PgDn] scroll [g/G] top/bottom [Esc / q] close'}
+
+
+
+
+ )
+}
diff --git a/packages/cli/src/components/MissionControl.tsx b/packages/cli/src/components/MissionControl.tsx
index 9edc30f..999b350 100644
--- a/packages/cli/src/components/MissionControl.tsx
+++ b/packages/cli/src/components/MissionControl.tsx
@@ -84,7 +84,8 @@ function StatusBadge({ status }: { status: ActionStatus }): React.JSX.Element {
)
}
-function borderColorFor(status: ActionStatus): string {
+function borderColorFor(status: ActionStatus, focused: boolean): string {
+ if (focused) return C.orangeBright
if (status === 'done') return C.green
if (status === 'failed') return C.red
if (status === 'declined') return C.grey
@@ -94,16 +95,18 @@ function borderColorFor(status: ActionStatus): string {
function CardFrame({
status,
+ focused,
children,
}: {
status: ActionStatus
+ focused: boolean
children: React.ReactNode
}): React.JSX.Element {
return (
+ {focused ? '▸ ' : ' '}
+
+ )
+}
+
+function WriteCard({
+ action,
+ focused,
+}: {
+ action: WriteAction
+ focused: boolean
+}): React.JSX.Element {
const lines = highlightYamlText(action.content)
return (
-
+
+
{' write '}
{action.path}
@@ -140,12 +158,19 @@ function WriteCard({ action }: { action: WriteAction }): React.JSX.Element {
)
}
-function RunCard({ action }: { action: RunAction }): React.JSX.Element {
+function RunCard({
+ action,
+ focused,
+}: {
+ action: RunAction
+ focused: boolean
+}): React.JSX.Element {
const promptLines = highlightPlain(action.prompt)
const outputLines = action.output.length > 0 ? highlightPlain(action.output) : []
return (
-
+
+
{' run '}
{action.agent}
@@ -173,8 +198,10 @@ function RunCard({ action }: { action: RunAction }): React.JSX.Element {
export function MissionControl({
actions,
+ focusedId,
}: {
actions: Action[]
+ focusedId: string | null
}): React.JSX.Element {
const cols = process.stdout.columns ?? 80
return (
@@ -191,14 +218,24 @@ export function MissionControl({
{` ${actions.length.toString()} action${actions.length === 1 ? '' : 's'}`}
+ {focusedId === null ? (
+
+ {' [Tab] focus a card · [Enter] open detail'}
+
+ ) : (
+
+ {' [Enter] open detail · [Tab/Shift+Tab] cycle · [Esc] unfocus'}
+
+ )}
- {actions.map((a) =>
- a.kind === 'write' ? (
-
+ {actions.map((a) => {
+ const focused = a.id === focusedId
+ return a.kind === 'write' ? (
+
) : (
-
- ),
- )}
+
+ )
+ })}
)
}
diff --git a/packages/cli/src/components/Welcome.tsx b/packages/cli/src/components/Welcome.tsx
index e1dcee4..3bb709d 100644
--- a/packages/cli/src/components/Welcome.tsx
+++ b/packages/cli/src/components/Welcome.tsx
@@ -14,7 +14,7 @@
import { Box, Text, useApp, useStdin } from 'ink'
import TextInput from 'ink-text-input'
-import React, { useState } from 'react'
+import React from 'react'
import { getCurrentModelName } from '@agent-forge/core/builder'
import { isCommand, runCommand } from '../commands.ts'
import { useChatContext } from '../hooks/useChatContext.tsx'
@@ -39,7 +39,6 @@ export function Welcome(): React.JSX.Element {
const { lang, setLang } = useLanguage()
const { exit } = useApp()
const { isRawModeSupported } = useStdin()
- const [input, setInput] = useState('')
const {
state,
send,
@@ -51,6 +50,8 @@ export function Welcome(): React.JSX.Element {
pending,
approvePending,
declinePending,
+ promptDraft,
+ setPromptDraft,
} = useChatContext()
const hasMessages = state.messages.length > 0 || state.streaming !== null
@@ -59,7 +60,7 @@ export function Welcome(): React.JSX.Element {
const handleSubmit = (value: string): void => {
const trimmed = value.trim()
if (!trimmed || busy) return
- setInput('')
+ setPromptDraft('')
if (isCommand(trimmed)) {
addSystemMessage(trimmed)
@@ -116,8 +117,8 @@ export function Welcome(): React.JSX.Element {
{' ❯ '}
{isRawModeSupported ? (
diff --git a/packages/cli/src/hooks/useCardFocus.ts b/packages/cli/src/hooks/useCardFocus.ts
new file mode 100644
index 0000000..a40bd4a
--- /dev/null
+++ b/packages/cli/src/hooks/useCardFocus.ts
@@ -0,0 +1,85 @@
+// Mission Control card focus + detail view state.
+//
+// Kept separate from useChat so the chat hook stays focused on
+// conversation/action state. Exposes :
+// - focusedId : id of the action currently highlighted (or null)
+// - detailOpen : whether the full-screen detail panel is mounted
+// - cycle / cycleBack / open / close : the actions wired to Tab keys
+//
+// Behaviour :
+// - Tab from "no focus" → focus the LAST action (most recent on top
+// of Mission Control reads as bottom of the list, so we land on
+// what the user just saw).
+// - Tab again → walk forward; wraps around.
+// - Shift+Tab → walk backward; wraps around.
+// - When the focused action disappears (cleared, etc.), focus resets.
+
+import { useCallback, useEffect, useState } from 'react'
+import type { Action } from '../actions/types.ts'
+
+export type CardFocusApi = {
+ focusedId: string | null
+ detailOpen: boolean
+ cycle: () => void
+ cycleBack: () => void
+ open: () => void
+ close: () => void
+ clearFocus: () => void
+}
+
+export function useCardFocus(actions: Action[]): CardFocusApi {
+ const [focusedId, setFocusedId] = useState(null)
+ const [detailOpen, setDetailOpen] = useState(false)
+
+ // If the focused action disappears (e.g. /clear), drop focus and the
+ // detail panel together so we never display a stale card.
+ useEffect(() => {
+ if (focusedId === null) return
+ const stillThere = actions.some((a) => a.id === focusedId)
+ if (!stillThere) {
+ setFocusedId(null)
+ setDetailOpen(false)
+ }
+ }, [actions, focusedId])
+
+ const cycle = useCallback(() => {
+ if (actions.length === 0) return
+ setFocusedId((current) => {
+ if (current === null) {
+ return actions[actions.length - 1]?.id ?? null
+ }
+ const idx = actions.findIndex((a) => a.id === current)
+ if (idx === -1) return actions[actions.length - 1]?.id ?? null
+ const next = (idx + 1) % actions.length
+ return actions[next]?.id ?? null
+ })
+ }, [actions])
+
+ const cycleBack = useCallback(() => {
+ if (actions.length === 0) return
+ setFocusedId((current) => {
+ if (current === null) {
+ return actions[0]?.id ?? null
+ }
+ const idx = actions.findIndex((a) => a.id === current)
+ if (idx === -1) return actions[0]?.id ?? null
+ const prev = (idx - 1 + actions.length) % actions.length
+ return actions[prev]?.id ?? null
+ })
+ }, [actions])
+
+ const open = useCallback(() => {
+ if (focusedId !== null) setDetailOpen(true)
+ }, [focusedId])
+
+ const close = useCallback(() => {
+ setDetailOpen(false)
+ }, [])
+
+ const clearFocus = useCallback(() => {
+ setFocusedId(null)
+ setDetailOpen(false)
+ }, [])
+
+ return { focusedId, detailOpen, cycle, cycleBack, open, close, clearFocus }
+}
diff --git a/packages/cli/src/hooks/useChat.ts b/packages/cli/src/hooks/useChat.ts
index 199272f..13dce72 100644
--- a/packages/cli/src/hooks/useChat.ts
+++ b/packages/cli/src/hooks/useChat.ts
@@ -116,6 +116,8 @@ export function useChat(lang: Lang): {
pending: Action | null
approvePending: () => void
declinePending: () => void
+ promptDraft: string
+ setPromptDraft: (value: string) => void
} {
const [state, setState] = useState({
messages: [],
@@ -125,6 +127,13 @@ export function useChat(lang: Lang): {
})
const [busy, setBusy] = useState(false)
const [scrollOffset, setScrollOffset] = useState(0)
+ // Lifted out of Welcome so App can know when the input is empty (and
+ // thus capture Tab for Mission Control focus without stealing keys
+ // from the prompt).
+ const [promptDraft, setPromptDraftState] = useState('')
+ const setPromptDraft = useCallback((value: string) => {
+ setPromptDraftState(value)
+ }, [])
// Buffer des messages cachés mais toujours envoyés au LLM dans `send`.
// `/clear` y déplace les messages visibles (vue vide, contexte préservé) ;
// `/reset` le purge. Stocké en ref pour ne pas redéclencher de rendu.
@@ -366,5 +375,7 @@ export function useChat(lang: Lang): {
pending: headPending,
approvePending,
declinePending,
+ promptDraft,
+ setPromptDraft,
}
}
diff --git a/packages/cli/tests/builder-actions.test.ts b/packages/cli/tests/builder-actions.test.ts
index 25c58d1..18259cf 100644
--- a/packages/cli/tests/builder-actions.test.ts
+++ b/packages/cli/tests/builder-actions.test.ts
@@ -198,6 +198,48 @@ body`
if (exec.kind === 'write') expect(exec.result.ok).toBe(true)
})
+ test('quotes a description that contains an unquoted colon', () => {
+ const unsafe = `---
+name: ${TEST_AGENT}
+description: Audits the project. Step 1: list files. Step 2: fix TODOs.
+sandbox:
+ image: agent-forge/base:latest
+ timeout: 60s
+maxTurns: 1
+---
+
+body`
+ const exec = executeAction({
+ kind: 'write',
+ path: `agents/${TEST_AGENT}/AGENT.md`,
+ content: unsafe,
+ raw: '',
+ })
+ expect(exec.kind).toBe('write')
+ if (exec.kind === 'write') expect(exec.result.ok).toBe(true)
+ })
+
+ test('leaves an already-quoted description untouched', () => {
+ const safe = `---
+name: ${TEST_AGENT}
+description: "Step 1: do this. Step 2: do that."
+sandbox:
+ image: agent-forge/base:latest
+ timeout: 60s
+maxTurns: 1
+---
+
+body`
+ const exec = executeAction({
+ kind: 'write',
+ path: `agents/${TEST_AGENT}/AGENT.md`,
+ content: safe,
+ raw: '',
+ })
+ expect(exec.kind).toBe('write')
+ if (exec.kind === 'write') expect(exec.result.ok).toBe(true)
+ })
+
test('run action passes through pre-flight (actual launch is async)', () => {
const exec = executeAction({
kind: 'run',
diff --git a/packages/core/src/builder/system-prompt.ts b/packages/core/src/builder/system-prompt.ts
index 81ab986..8d3d869 100644
--- a/packages/core/src/builder/system-prompt.ts
+++ b/packages/core/src/builder/system-prompt.ts
@@ -35,6 +35,7 @@ You are a haiku poet. Answer with exactly three lines, syllables 5-7-5.
ABSOLUTE rules — failing any of these IS A BUG :
- The path MUST be exactly \`agents//AGENT.md\`. The filename MUST be the literal string \`AGENT.md\`. Never invent variants like \`haiku-writer.md\` or \`HAIKU-WRITER.md\`.
- The file content MUST start with a YAML frontmatter block : a line \`---\`, then the YAML keys (name, description, sandbox, maxTurns), then a closing \`---\`, then the body. Look at the example above carefully — there are TWO \`---\` after the \`path:\` line : the first one separates the path from the content, the second one OPENS the frontmatter.
+- The \`description\` value MUST be a single line of plain prose, with NO colon (\`:\`), NO YAML-looking syntax (\`key: value\`), NO line break, NO unbalanced quote. If you cannot write it cleanly without a colon, wrap the whole value in double quotes : \`description: "Audits the project. Step 1: list files. Step 2: fix TODOs."\`. Never repeat the values of the other keys (\`maxTurns\`, \`timeout\`) inside \`description\` — they go in the body of the AGENT.md instead.
- The block opens with three backticks + \`forge:write\` and CLOSES with three backticks on their own line.
- Replace placeholders with real values. Do not keep angle brackets.
- Always propose the block first and ask the user to confirm with "yes" / "go" / "ok" before re-emitting it.
@@ -83,6 +84,7 @@ Tu es un poète haïku. Réponds par exactement trois lignes, syllabes 5-7-5.
Règles ABSOLUES — toute violation EST UN BUG :
- Le chemin DOIT être exactement \`agents//AGENT.md\`. Le nom de fichier DOIT être la chaîne littérale \`AGENT.md\`. N'invente jamais de variante comme \`haiku-writer.md\` ou \`HAIKU-WRITER.md\`.
- Le contenu du fichier DOIT commencer par un bloc YAML frontmatter : une ligne \`---\`, puis les clés YAML (name, description, sandbox, maxTurns), puis un \`---\` de fermeture, puis le corps. Regarde bien l'exemple ci-dessus — il y a DEUX \`---\` après la ligne \`path:\` : le premier sépare le path du contenu, le second OUVRE le frontmatter.
+- La valeur de \`description\` DOIT être une seule ligne de prose simple, SANS deux-points (\`:\`), SANS syntaxe ressemblant à du YAML (\`clé: valeur\`), SANS retour à la ligne, SANS guillemet non fermé. Si tu ne peux pas écrire la valeur proprement sans deux-points, encadre toute la valeur entre guillemets doubles : \`description: "Audite le projet. Étape 1 : lister les fichiers. Étape 2 : corriger les TODO."\`. Ne répète JAMAIS les valeurs des autres clés (\`maxTurns\`, \`timeout\`) dans la \`description\` — elles vont dans le corps de l'AGENT.md.
- Le bloc s'ouvre par trois backticks + \`forge:write\` et se FERME par trois backticks sur leur propre ligne.
- Remplace les placeholders par des vraies valeurs. Ne laisse pas les chevrons.
- Propose toujours le bloc d'abord et demande la confirmation (« oui » / « ok » / « go ») avant de le ré-émettre.
diff --git a/packages/runtime/src/index.ts b/packages/runtime/src/index.ts
index 19d2732..a473609 100644
--- a/packages/runtime/src/index.ts
+++ b/packages/runtime/src/index.ts
@@ -4,19 +4,41 @@
//
// 1. Standalone (P1) : reads a prompt from stdin, calls an OpenAI-
// compatible LLM endpoint, streams the answer to stdout. No agent
-// configuration required.
+// configuration required, no tool loop.
//
-// 2. Agent mode (P3.4) : if an AGENT.md is mounted at /agent/AGENT.md,
-// its frontmatter overrides the model and its body becomes the
-// system prompt. The prompt from stdin is the user message.
+// 2. Agent mode (P3+) : reads /agent/AGENT.md (frontmatter overrides
+// the model, body becomes the system prompt). The user prompt comes
+// from stdin. Native tools are available via fenced forge:* blocks
+// (P4) — the runtime parses them, executes the tool, feeds the
+// result back into the conversation, and loops up to maxTurns.
//
-// The output is STREAMED token by token to stdout so the host can render
-// progress live in the TUI.
+// Output is STREAMED token by token to stdout so the host can render
+// progress live in the TUI. Tool results are also written to stdout
+// inside [forge:tool] markers so the host can show them in Mission
+// Control without re-running the parser.
import { readFileSync } from 'node:fs'
import { createOpenAI } from '@ai-sdk/openai'
import { parseAgentMd } from '@agent-forge/core/types'
-import { streamText } from 'ai'
+import {
+ executeBash,
+ executeRuntimeFileEdit,
+ executeRuntimeFileRead,
+ executeRuntimeFileWrite,
+ executeRuntimeGlob,
+ executeRuntimeGrep,
+} from '@agent-forge/tools-core'
+import { type CoreMessage, streamText } from 'ai'
+import {
+ parseFirstToolBlock,
+ renderBashResult,
+ renderEditResult,
+ renderGlobResult,
+ renderGrepResult,
+ renderInvalid,
+ renderReadResult,
+ renderWriteResult,
+} from './tool-protocol.ts'
const AGENT_MD_PATH = '/agent/AGENT.md'
@@ -25,16 +47,18 @@ const API_KEY = process.env.FORGE_API_KEY ?? 'not-needed'
const ENV_MODEL =
process.env.FORGE_MODEL ?? 'mlx-community/Qwen2.5-7B-Instruct-4bit'
const MAX_TOKENS = Number(process.env.FORGE_MAX_TOKENS ?? '1024')
+// Hard cap to prevent runaway loops even if AGENT.md says otherwise.
+const MAX_TURNS_HARD_CAP = 10
type AgentConfig = {
model: string
systemPrompt?: string
agentName?: string
+ maxTurns: number
}
function loadAgentConfig(): AgentConfig {
- // Default config when no AGENT.md is mounted (standalone P1 mode).
- let config: AgentConfig = { model: ENV_MODEL }
+ let config: AgentConfig = { model: ENV_MODEL, maxTurns: 1 }
try {
const raw = readFileSync(AGENT_MD_PATH, 'utf8')
const parsed = parseAgentMd(raw)
@@ -42,11 +66,9 @@ function loadAgentConfig(): AgentConfig {
model: parsed.meta.model ?? ENV_MODEL,
systemPrompt: parsed.body.length > 0 ? parsed.body : undefined,
agentName: parsed.meta.name,
+ maxTurns: Math.min(parsed.meta.maxTurns ?? 1, MAX_TURNS_HARD_CAP),
}
} catch (err) {
- // ENOENT means standalone mode, that is fine. Anything else is fatal :
- // a malformed AGENT.md would otherwise silently fall back to the
- // default model + no system prompt, which is misleading.
const code = (err as NodeJS.ErrnoException).code
if (code !== 'ENOENT') {
console.error(
@@ -68,28 +90,163 @@ async function readStdin(): Promise {
return Buffer.concat(chunks).toString('utf8').trim()
}
+const TOOL_PROMPT = `
+
+You have access to six native tools, each callable by emitting a fenced block in your reply.
+
+## forge:bash — execute a shell command
+
+\`\`\`forge:bash
+{ "command": "ls -la", "timeoutMs": 10000 }
+\`\`\`
+
+Runs via \`bash -lc\` inside /workspace. \`timeoutMs\` defaults to 30000, capped at 120000.
+
+## forge:write — create or overwrite a file
+
+\`\`\`forge:write
+{ "path": "src/index.ts", "content": "export const x = 1\\n" }
+\`\`\`
+
+\`path\` is relative to /workspace (or absolute under /workspace). Existing files are overwritten.
+
+## forge:read — read a file
+
+\`\`\`forge:read
+{ "path": "src/index.ts", "offset": 0, "limit": 200 }
+\`\`\`
+
+\`offset\` and \`limit\` are line-based, both optional. Default limit 200, max 2000. Output is clipped at 16 KB ; use offset/limit to walk a long file.
+
+## forge:edit — patch a file by exact substring replacement
+
+\`\`\`forge:edit
+{ "path": "src/index.ts", "oldString": "const x = 1", "newString": "const x = 2" }
+\`\`\`
+
+\`oldString\` must match exactly once unless you set \`replaceAll\` true. If it matches multiple times, widen the surrounding context until it's unique.
+
+## forge:grep — regex search across files
+
+\`\`\`forge:grep
+{ "pattern": "TODO|FIXME", "glob": "src/**/*.ts", "ignoreCase": false }
+\`\`\`
+
+\`pattern\` is a JavaScript RegExp source. \`glob\` is optional (defaults to \`**/*\`). Returns up to 200 hits with path:line:text.
+
+## forge:glob — list files by pattern
+
+\`\`\`forge:glob
+{ "pattern": "src/**/*.ts" }
+\`\`\`
+
+Supports \`*\`, \`**\`, and \`?\`. Returns up to 200 paths relative to /workspace.
+
+## Iteration
+
+- Emit at most ONE block per reply. Text before the block is shown to the user. Text after the block is discarded.
+- After you receive a tool result, decide whether you need another tool call or whether you can produce the final answer.
+- When you are done, reply with plain text (no fenced block).
+`
+
+function buildSystem(config: AgentConfig, hasTools: boolean): string | undefined {
+ const base = config.systemPrompt ?? ''
+ if (!hasTools) return base.length > 0 ? base : undefined
+ return base.length > 0 ? `${base}${TOOL_PROMPT}` : TOOL_PROMPT.trim()
+}
+
+async function streamOneTurn(
+ provider: ReturnType,
+ model: string,
+ system: string | undefined,
+ messages: CoreMessage[],
+): Promise {
+ const result = streamText({
+ model: provider(model),
+ system,
+ messages,
+ maxTokens: MAX_TOKENS,
+ })
+ let acc = ''
+ for await (const chunk of result.textStream) {
+ process.stdout.write(chunk)
+ acc += chunk
+ }
+ return acc
+}
+
+async function executeToolBlock(
+ parsed: Extract, { kind: 'tool' }>,
+): Promise {
+ const tool = parsed.tool
+ switch (tool.kind) {
+ case 'bash': {
+ const result = await executeBash(tool.input)
+ return renderBashResult(tool.input, result)
+ }
+ case 'write': {
+ const result = executeRuntimeFileWrite(tool.input)
+ return renderWriteResult(tool.input, result)
+ }
+ case 'read': {
+ const result = executeRuntimeFileRead(tool.input)
+ return renderReadResult(tool.input, result)
+ }
+ case 'edit': {
+ const result = executeRuntimeFileEdit(tool.input)
+ return renderEditResult(tool.input, result)
+ }
+ case 'grep': {
+ const result = executeRuntimeGrep(tool.input)
+ return renderGrepResult(tool.input, result)
+ }
+ case 'glob': {
+ const result = executeRuntimeGlob(tool.input)
+ return renderGlobResult(tool.input, result)
+ }
+ }
+}
+
async function main(): Promise {
const config = loadAgentConfig()
- const prompt = await readStdin()
- if (!prompt) {
+ const userPrompt = await readStdin()
+ if (!userPrompt) {
console.error('✗ no prompt received on stdin')
process.exit(1)
}
const provider = createOpenAI({ baseURL: BASE_URL, apiKey: API_KEY })
+ const hasTools = config.maxTurns > 1
+ const system = buildSystem(config, hasTools)
- const result = streamText({
- model: provider(config.model),
- system: config.systemPrompt,
- prompt,
- maxTokens: MAX_TOKENS,
- })
+ const messages: CoreMessage[] = [{ role: 'user', content: userPrompt }]
- for await (const chunk of result.textStream) {
- process.stdout.write(chunk)
+ for (let turn = 0; turn < config.maxTurns; turn += 1) {
+ const reply = await streamOneTurn(provider, config.model, system, messages)
+ process.stdout.write('\n')
+
+ if (!hasTools) break
+
+ const parsed = parseFirstToolBlock(reply)
+ if (parsed.kind === 'none') break
+
+ // Record what the LLM just said (text + raw block) so the next turn
+ // sees it as a real assistant message.
+ messages.push({ role: 'assistant', content: reply })
+
+ let toolReply: string
+ if (parsed.kind === 'invalid') {
+ toolReply = renderInvalid(parsed.error)
+ } else {
+ toolReply = await executeToolBlock(parsed)
+ }
+
+ // Mark tool output for the host TUI so it can render it inside the
+ // Mission Control card instead of mixing it with prose.
+ process.stdout.write(`\n[forge:tool]\n${toolReply}\n[/forge:tool]\n`)
+
+ messages.push({ role: 'user', content: toolReply })
}
- // Trailing newline so the host can detect the end of the stream cleanly.
- process.stdout.write('\n')
}
main().catch((err) => {
diff --git a/packages/runtime/src/tool-protocol.ts b/packages/runtime/src/tool-protocol.ts
new file mode 100644
index 0000000..b9ea1a0
--- /dev/null
+++ b/packages/runtime/src/tool-protocol.ts
@@ -0,0 +1,221 @@
+// Agent-side tool protocol — fenced blocks the agent emits to invoke a
+// native tool, and the rendering of tool results back to the LLM.
+//
+// We deliberately mirror the builder's text-structured protocol (forge:write
+// and forge:run) instead of using OpenAI tool_calls for two reasons :
+// 1. Local LLMs (MLX, llama.cpp) often don't honor tool_calls.
+// 2. A consistent protocol across builder and agents simplifies debugging
+// and lets users read the raw stream.
+//
+// Six tools wired today : bash, write, read, edit, grep, glob.
+//
+// ```forge:bash
+// { "command": "ls -la" }
+// ```
+//
+// ```forge:write
+// { "path": "src/index.ts", "content": "..." }
+// ```
+//
+// ```forge:read
+// { "path": "src/index.ts", "offset": 0, "limit": 200 }
+// ```
+//
+// ```forge:edit
+// { "path": "src/index.ts", "oldString": "...", "newString": "..." }
+// ```
+//
+// ```forge:grep
+// { "pattern": "TODO", "glob": "**/*.ts", "ignoreCase": true }
+// ```
+//
+// ```forge:glob
+// { "pattern": "src/**/*.ts" }
+// ```
+//
+// Only ONE block is parsed per turn (the first encountered). Everything
+// before the block is treated as the agent's "thinking out loud" text
+// and streamed to the host. Everything after the block is dropped — the
+// agent will see the tool result on the next turn and continue from there.
+
+import { z } from 'zod'
+import {
+ BashInputSchema,
+ RuntimeFileEditInputSchema,
+ RuntimeFileReadInputSchema,
+ RuntimeFileWriteInputSchema,
+ RuntimeGlobInputSchema,
+ RuntimeGrepInputSchema,
+ type BashInput,
+ type BashResult,
+ type GrepHit,
+ type RuntimeFileEditInput,
+ type RuntimeFileEditResult,
+ type RuntimeFileReadInput,
+ type RuntimeFileReadResult,
+ type RuntimeFileWriteInput,
+ type RuntimeFileWriteResult,
+ type RuntimeGlobInput,
+ type RuntimeGlobResult,
+ type RuntimeGrepInput,
+ type RuntimeGrepResult,
+} from '@agent-forge/tools-core'
+
+export type ToolKind = 'bash' | 'write' | 'read' | 'edit' | 'grep' | 'glob'
+
+export type ParsedTool =
+ | { kind: 'bash'; input: BashInput; raw: string }
+ | { kind: 'write'; input: RuntimeFileWriteInput; raw: string }
+ | { kind: 'read'; input: RuntimeFileReadInput; raw: string }
+ | { kind: 'edit'; input: RuntimeFileEditInput; raw: string }
+ | { kind: 'grep'; input: RuntimeGrepInput; raw: string }
+ | { kind: 'glob'; input: RuntimeGlobInput; raw: string }
+
+export type ParseOutcome =
+ | { kind: 'none'; text: string }
+ | { kind: 'invalid'; text: string; error: string; raw: string }
+ | { kind: 'tool'; text: string; tool: ParsedTool }
+
+const SCHEMAS: Record = {
+ bash: BashInputSchema,
+ write: RuntimeFileWriteInputSchema,
+ read: RuntimeFileReadInputSchema,
+ edit: RuntimeFileEditInputSchema,
+ grep: RuntimeGrepInputSchema,
+ glob: RuntimeGlobInputSchema,
+}
+
+const FENCE_RE = /```forge:(bash|write|read|edit|grep|glob)\s*\n([\s\S]*?)```/
+
+export function parseFirstToolBlock(stream: string): ParseOutcome {
+ const m = FENCE_RE.exec(stream)
+ if (!m) return { kind: 'none', text: stream }
+
+ const tag = m[1] as ToolKind
+ const body = m[2] ?? ''
+ const before = stream.slice(0, m.index)
+
+ let parsed: unknown
+ try {
+ parsed = JSON.parse(body)
+ } catch (err) {
+ return {
+ kind: 'invalid',
+ text: before,
+ error: `forge:${tag} block is not valid JSON : ${
+ err instanceof Error ? err.message : String(err)
+ }`,
+ raw: m[0],
+ }
+ }
+
+ const schema = SCHEMAS[tag]
+ const result = schema.safeParse(parsed)
+ if (!result.success) {
+ return {
+ kind: 'invalid',
+ text: before,
+ error: `forge:${tag} input failed validation : ${formatZodError(result.error)}`,
+ raw: m[0],
+ }
+ }
+
+ // Narrow to the right ParsedTool variant by tag — the schema guarantees
+ // the data shape matches.
+ return {
+ kind: 'tool',
+ text: before,
+ tool: { kind: tag, input: result.data, raw: m[0] } as ParsedTool,
+ }
+}
+
+function formatZodError(err: z.ZodError): string {
+ return err.errors
+ .map((e) => `${e.path.join('.') || '(root)'}: ${e.message}`)
+ .join(' ; ')
+}
+
+// ── Result renderers : turn each tool's structured result into the
+// message we feed back to the LLM on the next turn. Same `[forge:X result]`
+// header so the agent recognizes it as the answer to its previous call.
+
+export function renderBashResult(input: BashInput, result: BashResult): string {
+ const head = `[forge:bash result] command="${input.command}" exit=${result.exitCode.toString()}${
+ result.timedOut ? ' (timed out)' : ''
+ }`
+ const stdout = result.stdout.length > 0 ? `\n--- stdout ---\n${result.stdout}` : ''
+ const stderr = result.stderr.length > 0 ? `\n--- stderr ---\n${result.stderr}` : ''
+ return `${head}${stdout}${stderr}`
+}
+
+export function renderWriteResult(
+ input: RuntimeFileWriteInput,
+ result: RuntimeFileWriteResult,
+): string {
+ if (result.ok) {
+ return `[forge:write result] wrote ${result.absolutePath} (${result.bytes.toString()} bytes)`
+ }
+ return `[forge:write result] FAILED on path="${input.path}" : ${result.error}`
+}
+
+export function renderReadResult(
+ input: RuntimeFileReadInput,
+ result: RuntimeFileReadResult,
+): string {
+ if (!result.ok) {
+ return `[forge:read result] FAILED on path="${input.path}" : ${result.error}`
+ }
+ const head = `[forge:read result] ${result.absolutePath} · lines ${(input.offset ?? 0).toString()}..${(
+ (input.offset ?? 0) + result.returnedLines
+ ).toString()} of ${result.totalLines.toString()}${result.truncatedBytes ? ' (clipped)' : ''}`
+ return `${head}\n--- content ---\n${result.content}`
+}
+
+export function renderEditResult(
+ input: RuntimeFileEditInput,
+ result: RuntimeFileEditResult,
+): string {
+ if (result.ok) {
+ return `[forge:edit result] ${result.absolutePath} · ${result.replacements.toString()} replacement${
+ result.replacements === 1 ? '' : 's'
+ }`
+ }
+ return `[forge:edit result] FAILED on path="${input.path}" : ${result.error}`
+}
+
+export function renderGlobResult(
+ input: RuntimeGlobInput,
+ result: RuntimeGlobResult,
+): string {
+ if (!result.ok) {
+ return `[forge:glob result] FAILED on pattern="${input.pattern}" : ${result.error}`
+ }
+ const head = `[forge:glob result] ${result.matches.length.toString()} match${
+ result.matches.length === 1 ? '' : 'es'
+ }${result.truncated ? ' (truncated)' : ''}`
+ if (result.matches.length === 0) return head
+ return `${head}\n${result.matches.join('\n')}`
+}
+
+export function renderGrepResult(
+ input: RuntimeGrepInput,
+ result: RuntimeGrepResult,
+): string {
+ if (!result.ok) {
+ return `[forge:grep result] FAILED on pattern="${input.pattern}" : ${result.error}`
+ }
+ const head = `[forge:grep result] ${result.hits.length.toString()} hit${
+ result.hits.length === 1 ? '' : 's'
+ } across ${result.scanned.toString()} file${result.scanned === 1 ? '' : 's'}${
+ result.truncated ? ' (truncated)' : ''
+ }`
+ if (result.hits.length === 0) return head
+ const body = result.hits
+ .map((h: GrepHit) => `${h.path}:${h.line.toString()}: ${h.text}`)
+ .join('\n')
+ return `${head}\n${body}`
+}
+
+export function renderInvalid(error: string): string {
+ return `[forge:tool error] ${error}\n\nFix the JSON or schema and try again.`
+}
diff --git a/packages/runtime/tests/tool-protocol.test.ts b/packages/runtime/tests/tool-protocol.test.ts
new file mode 100644
index 0000000..b05cae1
--- /dev/null
+++ b/packages/runtime/tests/tool-protocol.test.ts
@@ -0,0 +1,125 @@
+// Tests for the agent-side tool block parser. Pure : no FS, no spawn.
+
+import { describe, expect, test } from 'bun:test'
+import { parseFirstToolBlock } from '../src/tool-protocol.ts'
+
+describe('parseFirstToolBlock', () => {
+ test('returns kind=none on plain text', () => {
+ const r = parseFirstToolBlock('just a sentence with no block')
+ expect(r.kind).toBe('none')
+ })
+
+ test('parses a forge:bash block with prose before it', () => {
+ const stream = [
+ 'I will list the workspace contents.',
+ '',
+ '```forge:bash',
+ '{ "command": "ls -la" }',
+ '```',
+ '',
+ 'After the block — should be ignored.',
+ ].join('\n')
+ const r = parseFirstToolBlock(stream)
+ expect(r.kind).toBe('tool')
+ if (r.kind === 'tool') {
+ expect(r.text.startsWith('I will list')).toBe(true)
+ expect(r.tool.kind).toBe('bash')
+ if (r.tool.kind === 'bash') expect(r.tool.input.command).toBe('ls -la')
+ }
+ })
+
+ test('parses a forge:write block', () => {
+ const stream = [
+ '```forge:write',
+ '{ "path": "notes.md", "content": "# hi\\n" }',
+ '```',
+ ].join('\n')
+ const r = parseFirstToolBlock(stream)
+ expect(r.kind).toBe('tool')
+ if (r.kind === 'tool' && r.tool.kind === 'write') {
+ expect(r.tool.input.path).toBe('notes.md')
+ expect(r.tool.input.content).toBe('# hi\n')
+ }
+ })
+
+ test('returns kind=invalid when JSON is malformed', () => {
+ const stream = '```forge:bash\n{ not json }\n```'
+ const r = parseFirstToolBlock(stream)
+ expect(r.kind).toBe('invalid')
+ if (r.kind === 'invalid') expect(r.error).toContain('not valid JSON')
+ })
+
+ test('returns kind=invalid when schema is wrong', () => {
+ const stream = '```forge:bash\n{ "command": "" }\n```'
+ const r = parseFirstToolBlock(stream)
+ expect(r.kind).toBe('invalid')
+ if (r.kind === 'invalid') expect(r.error).toContain('failed validation')
+ })
+
+ test('only the first block matters', () => {
+ const stream = [
+ '```forge:bash',
+ '{ "command": "echo a" }',
+ '```',
+ '```forge:bash',
+ '{ "command": "echo b" }',
+ '```',
+ ].join('\n')
+ const r = parseFirstToolBlock(stream)
+ expect(r.kind).toBe('tool')
+ if (r.kind === 'tool' && r.tool.kind === 'bash') {
+ expect(r.tool.input.command).toBe('echo a')
+ }
+ })
+
+ test('parses forge:read', () => {
+ const r = parseFirstToolBlock(
+ '```forge:read\n{ "path": "src/x.ts", "offset": 10, "limit": 50 }\n```',
+ )
+ expect(r.kind).toBe('tool')
+ if (r.kind === 'tool' && r.tool.kind === 'read') {
+ expect(r.tool.input.path).toBe('src/x.ts')
+ expect(r.tool.input.offset).toBe(10)
+ expect(r.tool.input.limit).toBe(50)
+ }
+ })
+
+ test('parses forge:edit', () => {
+ const r = parseFirstToolBlock(
+ '```forge:edit\n{ "path": "a.ts", "oldString": "x", "newString": "y" }\n```',
+ )
+ expect(r.kind).toBe('tool')
+ if (r.kind === 'tool' && r.tool.kind === 'edit') {
+ expect(r.tool.input.oldString).toBe('x')
+ expect(r.tool.input.newString).toBe('y')
+ }
+ })
+
+ test('parses forge:grep', () => {
+ const r = parseFirstToolBlock(
+ '```forge:grep\n{ "pattern": "TODO", "glob": "**/*.ts", "ignoreCase": true }\n```',
+ )
+ expect(r.kind).toBe('tool')
+ if (r.kind === 'tool' && r.tool.kind === 'grep') {
+ expect(r.tool.input.pattern).toBe('TODO')
+ expect(r.tool.input.ignoreCase).toBe(true)
+ }
+ })
+
+ test('parses forge:glob', () => {
+ const r = parseFirstToolBlock(
+ '```forge:glob\n{ "pattern": "src/**/*.ts" }\n```',
+ )
+ expect(r.kind).toBe('tool')
+ if (r.kind === 'tool' && r.tool.kind === 'glob') {
+ expect(r.tool.input.pattern).toBe('src/**/*.ts')
+ }
+ })
+
+ test('rejects invalid forge:edit (oldString equals newString)', () => {
+ const r = parseFirstToolBlock(
+ '```forge:edit\n{ "path": "a.ts", "oldString": "x", "newString": "x" }\n```',
+ )
+ expect(r.kind).toBe('invalid')
+ })
+})
diff --git a/packages/tools-core/src/docker-launch.ts b/packages/tools-core/src/docker-launch.ts
index c7f5594..5544e00 100644
--- a/packages/tools-core/src/docker-launch.ts
+++ b/packages/tools-core/src/docker-launch.ts
@@ -9,7 +9,7 @@
// agents can run in parallel without collision.
import { spawn, spawnSync } from 'node:child_process'
-import { existsSync } from 'node:fs'
+import { existsSync, mkdirSync } from 'node:fs'
import { join } from 'node:path'
import { z } from 'zod'
import { FORGE_HOME } from './file-write.ts'
@@ -75,6 +75,16 @@ export function launchAgent(input: DockerLaunchInput): LaunchHandle {
spawnSync('docker', ['rm', '-f', containerName], { stdio: 'ignore' })
}
+ // Per-run workspace on the host, bind-mounted RW into the container so
+ // tools (forge:bash, forge:write) have a sandbox they can scribble in.
+ // Kept after the container exits — useful for debugging and for P5
+ // artifact extraction.
+ const workspaceHostDir = join(
+ FORGE_HOME,
+ 'workspaces',
+ containerName,
+ )
+
async function* run(): AsyncGenerator {
if (!existsSync(agentMdPath)) {
yield { type: 'error', error: `AGENT.md not found : ${agentMdPath}` }
@@ -90,6 +100,8 @@ export function launchAgent(input: DockerLaunchInput): LaunchHandle {
return
}
+ mkdirSync(workspaceHostDir, { recursive: true })
+
const args = [
'run',
'--rm',
@@ -100,6 +112,10 @@ export function launchAgent(input: DockerLaunchInput): LaunchHandle {
`${agentMdPath}:/agent/AGENT.md:ro`,
'-v',
`${RUNTIME_DIST_FROM_TOOLS}:/runtime:ro`,
+ '-v',
+ `${workspaceHostDir}:/workspace`,
+ '-w',
+ '/workspace',
...inheritEnv(),
IMAGE,
'node',
diff --git a/packages/tools-core/src/index.ts b/packages/tools-core/src/index.ts
index 0fb06f6..c38bac8 100644
--- a/packages/tools-core/src/index.ts
+++ b/packages/tools-core/src/index.ts
@@ -21,3 +21,50 @@ export {
type DockerLaunchInput,
type LaunchHandle,
} from './docker-launch.ts'
+
+// Runtime-side tools — used INSIDE the agent's container, sandboxed to
+// /workspace. Distinct from the host-side FileWrite above.
+export {
+ BashInputSchema,
+ WORKSPACE_DIR,
+ executeBash,
+ type BashInput,
+ type BashResult,
+} from './runtime/bash.ts'
+
+export {
+ RuntimeFileWriteInputSchema,
+ executeRuntimeFileWrite,
+ resolveSandboxedPath,
+ type RuntimeFileWriteInput,
+ type RuntimeFileWriteResult,
+} from './runtime/file-write.ts'
+
+export {
+ RuntimeFileReadInputSchema,
+ executeRuntimeFileRead,
+ type RuntimeFileReadInput,
+ type RuntimeFileReadResult,
+} from './runtime/file-read.ts'
+
+export {
+ RuntimeFileEditInputSchema,
+ executeRuntimeFileEdit,
+ type RuntimeFileEditInput,
+ type RuntimeFileEditResult,
+} from './runtime/file-edit.ts'
+
+export {
+ RuntimeGlobInputSchema,
+ executeRuntimeGlob,
+ type RuntimeGlobInput,
+ type RuntimeGlobResult,
+} from './runtime/glob.ts'
+
+export {
+ RuntimeGrepInputSchema,
+ executeRuntimeGrep,
+ type GrepHit,
+ type RuntimeGrepInput,
+ type RuntimeGrepResult,
+} from './runtime/grep.ts'
diff --git a/packages/tools-core/src/runtime/bash.ts b/packages/tools-core/src/runtime/bash.ts
new file mode 100644
index 0000000..c039d42
--- /dev/null
+++ b/packages/tools-core/src/runtime/bash.ts
@@ -0,0 +1,104 @@
+// Bash — execute a shell command inside an agent's container.
+//
+// Runs INSIDE the container (called from @agent-forge/runtime). Wraps the
+// command with `bash -lc` so simple shell features (pipes, &&, $VAR) just
+// work. The cwd is locked to /workspace : the agent never sees anything
+// outside its sandbox. A timeout (default 30s) prevents runaway commands
+// from blocking the tool loop.
+//
+// Returns a structured result (stdout, stderr, exitCode, timedOut). The
+// caller is responsible for formatting it back into a message the LLM will
+// read on the next turn.
+
+import { spawn } from 'node:child_process'
+import { z } from 'zod'
+
+export const WORKSPACE_DIR = '/workspace'
+
+// Tests on the host don't have /workspace. The runtime always uses
+// WORKSPACE_DIR when running inside the container ; tests can point this
+// at a temp dir via FORGE_WORKSPACE.
+function bashCwd(): string {
+ return process.env.FORGE_WORKSPACE ?? WORKSPACE_DIR
+}
+
+export const BashInputSchema = z.object({
+ command: z
+ .string()
+ .min(1)
+ .describe(
+ 'Shell command to execute inside the agent sandbox. Run via `bash -lc`. The current directory is /workspace.',
+ ),
+ timeoutMs: z
+ .number()
+ .int()
+ .positive()
+ .max(120_000)
+ .optional()
+ .describe('Hard timeout in milliseconds. Defaults to 30000. Capped at 120000.'),
+})
+
+export type BashInput = z.infer
+
+export type BashResult = {
+ stdout: string
+ stderr: string
+ exitCode: number
+ timedOut: boolean
+}
+
+const DEFAULT_TIMEOUT_MS = 30_000
+// Cap captured streams so a runaway command can't blow the LLM context.
+const MAX_OUTPUT_BYTES = 16_384
+
+function clip(text: string): string {
+ if (Buffer.byteLength(text, 'utf8') <= MAX_OUTPUT_BYTES) return text
+ const head = text.slice(0, MAX_OUTPUT_BYTES)
+ return `${head}\n…[output truncated at ${MAX_OUTPUT_BYTES.toString()} bytes]`
+}
+
+export async function executeBash(input: BashInput): Promise {
+ const timeoutMs = input.timeoutMs ?? DEFAULT_TIMEOUT_MS
+ return await new Promise((resolve) => {
+ const child = spawn('bash', ['-lc', input.command], {
+ cwd: bashCwd(),
+ stdio: ['ignore', 'pipe', 'pipe'],
+ })
+
+ let stdout = ''
+ let stderr = ''
+ let timedOut = false
+
+ const timer = setTimeout(() => {
+ timedOut = true
+ child.kill('SIGKILL')
+ }, timeoutMs)
+
+ child.stdout.on('data', (b: Buffer) => {
+ stdout += b.toString('utf8')
+ })
+ child.stderr.on('data', (b: Buffer) => {
+ stderr += b.toString('utf8')
+ })
+
+ child.on('error', (err) => {
+ clearTimeout(timer)
+ resolve({
+ stdout: clip(stdout),
+ stderr: clip(`${stderr}${err.message}`),
+ exitCode: -1,
+ timedOut,
+ })
+ })
+
+ child.on('close', (code) => {
+ clearTimeout(timer)
+ resolve({
+ stdout: clip(stdout),
+ stderr: clip(stderr),
+ exitCode: code ?? -1,
+ timedOut,
+ })
+ })
+ })
+}
diff --git a/packages/tools-core/src/runtime/file-edit.ts b/packages/tools-core/src/runtime/file-edit.ts
new file mode 100644
index 0000000..99e7cdf
--- /dev/null
+++ b/packages/tools-core/src/runtime/file-edit.ts
@@ -0,0 +1,89 @@
+// FileEdit (runtime) — patch a file under /workspace by replacing one
+// exact substring with another. Same shape as Claude Code's Edit tool.
+//
+// The match must be unique unless `replaceAll: true`. This forces the
+// LLM to widen its `oldString` window when it's ambiguous, instead of
+// guessing which occurrence it meant.
+
+import { readFileSync, writeFileSync } from 'node:fs'
+import { z } from 'zod'
+import { resolveSandboxedPath } from './file-write.ts'
+
+export const RuntimeFileEditInputSchema = z
+ .object({
+ path: z.string().min(1).describe('File path under /workspace.'),
+ oldString: z
+ .string()
+ .min(1)
+ .describe(
+ 'Exact substring to find. Must match exactly once unless replaceAll is true.',
+ ),
+ newString: z.string().describe('Replacement substring.'),
+ replaceAll: z
+ .boolean()
+ .optional()
+ .describe('Replace every occurrence. Default false.'),
+ })
+ .refine((v) => v.oldString !== v.newString, {
+ message: 'oldString and newString must differ',
+ path: ['newString'],
+ })
+
+export type RuntimeFileEditInput = z.infer
+
+export type RuntimeFileEditResult =
+ | { ok: true; absolutePath: string; replacements: number }
+ | { ok: false; error: string }
+
+function countOccurrences(haystack: string, needle: string): number {
+ if (needle.length === 0) return 0
+ let count = 0
+ let i = 0
+ while (true) {
+ const at = haystack.indexOf(needle, i)
+ if (at === -1) return count
+ count += 1
+ i = at + needle.length
+ }
+}
+
+export function executeRuntimeFileEdit(
+ input: RuntimeFileEditInput,
+): RuntimeFileEditResult {
+ const safe = resolveSandboxedPath(input.path)
+ if (!safe.ok) return safe
+
+ let original: string
+ try {
+ original = readFileSync(safe.absolutePath, 'utf8')
+ } catch (err) {
+ return { ok: false, error: err instanceof Error ? err.message : String(err) }
+ }
+
+ const occurrences = countOccurrences(original, input.oldString)
+ if (occurrences === 0) {
+ return { ok: false, error: 'oldString not found in file' }
+ }
+ if (occurrences > 1 && !input.replaceAll) {
+ return {
+ ok: false,
+ error: `oldString matches ${occurrences.toString()} times — widen the context or set replaceAll=true`,
+ }
+ }
+
+ const updated = input.replaceAll
+ ? original.split(input.oldString).join(input.newString)
+ : original.replace(input.oldString, input.newString)
+
+ try {
+ writeFileSync(safe.absolutePath, updated, 'utf8')
+ } catch (err) {
+ return { ok: false, error: err instanceof Error ? err.message : String(err) }
+ }
+
+ return {
+ ok: true,
+ absolutePath: safe.absolutePath,
+ replacements: input.replaceAll ? occurrences : 1,
+ }
+}
diff --git a/packages/tools-core/src/runtime/file-read.ts b/packages/tools-core/src/runtime/file-read.ts
new file mode 100644
index 0000000..7e15ba9
--- /dev/null
+++ b/packages/tools-core/src/runtime/file-read.ts
@@ -0,0 +1,93 @@
+// FileRead (runtime) — read a file under /workspace.
+//
+// Offset/limit are line-based (matches what an LLM expects when reading
+// source files). Output is clipped at 16 KB to protect the LLM context ;
+// any further reading should use offset.
+
+import { readFileSync, statSync } from 'node:fs'
+import { z } from 'zod'
+import { resolveSandboxedPath } from './file-write.ts'
+
+export const RuntimeFileReadInputSchema = z.object({
+ path: z
+ .string()
+ .min(1)
+ .describe(
+ 'Path inside the agent sandbox (/workspace). Relative or absolute under /workspace.',
+ ),
+ offset: z
+ .number()
+ .int()
+ .min(0)
+ .optional()
+ .describe('Line offset (1-based first line of the slice). Default 0.'),
+ limit: z
+ .number()
+ .int()
+ .positive()
+ .max(2000)
+ .optional()
+ .describe('Max number of lines to return. Default 200, max 2000.'),
+})
+
+export type RuntimeFileReadInput = z.infer
+
+export type RuntimeFileReadResult =
+ | {
+ ok: true
+ absolutePath: string
+ content: string
+ totalLines: number
+ returnedLines: number
+ truncatedBytes: boolean
+ }
+ | { ok: false; error: string }
+
+const DEFAULT_LIMIT = 200
+const MAX_BYTES = 16_384
+
+export function executeRuntimeFileRead(
+ input: RuntimeFileReadInput,
+): RuntimeFileReadResult {
+ const safe = resolveSandboxedPath(input.path)
+ if (!safe.ok) return safe
+
+ let raw: string
+ try {
+ const st = statSync(safe.absolutePath)
+ if (!st.isFile()) {
+ return { ok: false, error: `not a regular file : ${safe.absolutePath}` }
+ }
+ raw = readFileSync(safe.absolutePath, 'utf8')
+ } catch (err) {
+ return { ok: false, error: err instanceof Error ? err.message : String(err) }
+ }
+
+ const allLines = raw.split('\n')
+ // Drop the trailing empty element when the file ends with \n so totalLines
+ // reflects the human count, not split() artifact.
+ if (allLines.length > 0 && allLines[allLines.length - 1] === '') {
+ allLines.pop()
+ }
+ const totalLines = allLines.length
+
+ const offset = input.offset ?? 0
+ const limit = input.limit ?? DEFAULT_LIMIT
+ const slice = allLines.slice(offset, offset + limit)
+ let content = slice.join('\n')
+
+ let truncatedBytes = false
+ if (Buffer.byteLength(content, 'utf8') > MAX_BYTES) {
+ truncatedBytes = true
+ content = `${content.slice(0, MAX_BYTES)}\n…[output truncated at ${MAX_BYTES.toString()} bytes — use offset/limit for the rest]`
+ }
+
+ return {
+ ok: true,
+ absolutePath: safe.absolutePath,
+ content,
+ totalLines,
+ returnedLines: slice.length,
+ truncatedBytes,
+ }
+}
diff --git a/packages/tools-core/src/runtime/file-write.ts b/packages/tools-core/src/runtime/file-write.ts
new file mode 100644
index 0000000..f568eee
--- /dev/null
+++ b/packages/tools-core/src/runtime/file-write.ts
@@ -0,0 +1,76 @@
+// FileWrite (runtime) — write a file under /workspace from inside the
+// agent's container.
+//
+// Distinct from packages/tools-core/src/file-write.ts which writes under
+// the host's ~/.agent-forge/. The runtime version is sandboxed to
+// /workspace : the agent has no way to escape its container's mount.
+//
+// Path traversal (..), null bytes, and absolute paths outside /workspace
+// are refused. Existing files are overwritten by default — unlike the
+// host tool which is strict — because in-sandbox iteration is expected
+// (agents often rewrite their own files mid-loop).
+//
+// The sandbox root defaults to /workspace (the in-container mount) but
+// can be overridden via FORGE_WORKSPACE — useful for tests that want to
+// run on the host without touching /workspace.
+
+import { mkdirSync, writeFileSync } from 'node:fs'
+import { dirname, isAbsolute, join, resolve } from 'node:path'
+import { z } from 'zod'
+import { WORKSPACE_DIR } from './bash.ts'
+
+function sandboxRoot(): string {
+ return process.env.FORGE_WORKSPACE ?? WORKSPACE_DIR
+}
+
+export const RuntimeFileWriteInputSchema = z.object({
+ path: z
+ .string()
+ .min(1)
+ .describe(
+ 'Path inside the agent sandbox (/workspace). Either relative ("notes.md") or absolute under /workspace ("/workspace/src/index.ts"). Paths outside /workspace are rejected.',
+ ),
+ content: z.string().describe('Full file content to write.'),
+})
+
+export type RuntimeFileWriteInput = z.infer
+
+export type RuntimeFileWriteResult =
+ | { ok: true; absolutePath: string; bytes: number }
+ | { ok: false; error: string }
+
+export function resolveSandboxedPath(rawPath: string):
+ | { ok: true; absolutePath: string }
+ | { ok: false; error: string } {
+ if (rawPath.includes('\0')) {
+ return { ok: false, error: 'path contains a null byte' }
+ }
+ const root = sandboxRoot()
+ const target = isAbsolute(rawPath) ? rawPath : join(root, rawPath)
+ const resolved = resolve(target)
+ if (resolved !== root && !resolved.startsWith(`${root}/`)) {
+ return {
+ ok: false,
+ error: `path escapes the agent sandbox (${root})`,
+ }
+ }
+ return { ok: true, absolutePath: resolved }
+}
+
+export function executeRuntimeFileWrite(
+ input: RuntimeFileWriteInput,
+): RuntimeFileWriteResult {
+ const safe = resolveSandboxedPath(input.path)
+ if (!safe.ok) return safe
+ try {
+ mkdirSync(dirname(safe.absolutePath), { recursive: true })
+ writeFileSync(safe.absolutePath, input.content, 'utf8')
+ return {
+ ok: true,
+ absolutePath: safe.absolutePath,
+ bytes: Buffer.byteLength(input.content, 'utf8'),
+ }
+ } catch (err) {
+ return { ok: false, error: err instanceof Error ? err.message : String(err) }
+ }
+}
diff --git a/packages/tools-core/src/runtime/glob.ts b/packages/tools-core/src/runtime/glob.ts
new file mode 100644
index 0000000..7692037
--- /dev/null
+++ b/packages/tools-core/src/runtime/glob.ts
@@ -0,0 +1,123 @@
+// Glob (runtime) — find files matching a glob pattern under /workspace.
+//
+// Hand-rolled to avoid adding a dependency to the in-container bundle.
+// Supports the patterns LLMs actually use : `*`, `**`, `?`. No braces,
+// no character classes — those rarely appear in agent-emitted patterns
+// and would just bloat the parser.
+//
+// Returns relative paths (from the sandbox root) sorted alphabetically.
+// Capped at 200 results.
+
+import { readdirSync, statSync } from 'node:fs'
+import { join, relative, resolve, sep } from 'node:path'
+import { z } from 'zod'
+import { resolveSandboxedPath } from './file-write.ts'
+
+export const RuntimeGlobInputSchema = z.object({
+ pattern: z
+ .string()
+ .min(1)
+ .describe(
+ 'Glob pattern relative to /workspace. Supports *, **, and ?. Example : "src/**/*.ts".',
+ ),
+})
+
+export type RuntimeGlobInput = z.infer
+
+export type RuntimeGlobResult =
+ | { ok: true; matches: string[]; truncated: boolean }
+ | { ok: false; error: string }
+
+const MAX_MATCHES = 200
+const MAX_WALK_NODES = 5000
+
+// Convert a glob to a RegExp anchored at the start, allowing partial
+// path-segment matches. Each segment is converted independently and
+// joined with `/`.
+function globToRegex(pattern: string): RegExp {
+ // Normalize : split on / and process per segment.
+ const parts = pattern.split('/')
+ const out: string[] = []
+ for (const part of parts) {
+ if (part === '**') {
+ out.push('(?:.*?)')
+ continue
+ }
+ let segment = ''
+ for (const ch of part) {
+ if (ch === '*') segment += '[^/]*'
+ else if (ch === '?') segment += '[^/]'
+ else if (/[.+^${}()|[\]\\]/.test(ch)) segment += `\\${ch}`
+ else segment += ch
+ }
+ out.push(segment)
+ }
+ // Glue : `/` between regular segments, but `**` already swallows separators.
+ let glued = ''
+ for (let i = 0; i < out.length; i += 1) {
+ const part = out[i] as string
+ if (i === 0) {
+ glued = part
+ continue
+ }
+ const prev = out[i - 1]
+ if (prev === '(?:.*?)' || part === '(?:.*?)') glued += part
+ else glued += `/${part}`
+ }
+ return new RegExp(`^${glued}$`)
+}
+
+// Walk a directory tree and return relative POSIX paths of all FILES.
+// Bounded by MAX_WALK_NODES to protect against pathological trees.
+function walk(root: string): string[] {
+ const out: string[] = []
+ const stack: string[] = [root]
+ let visited = 0
+ while (stack.length > 0 && visited < MAX_WALK_NODES) {
+ const dir = stack.pop() as string
+ let entries: string[]
+ try {
+ entries = readdirSync(dir)
+ } catch {
+ continue
+ }
+ for (const name of entries) {
+ visited += 1
+ if (visited >= MAX_WALK_NODES) break
+ const full = join(dir, name)
+ let st: ReturnType
+ try {
+ st = statSync(full)
+ } catch {
+ continue
+ }
+ if (st.isDirectory()) {
+ stack.push(full)
+ } else if (st.isFile()) {
+ const rel = relative(root, full).split(sep).join('/')
+ out.push(rel)
+ }
+ }
+ }
+ return out
+}
+
+export function executeRuntimeGlob(
+ input: RuntimeGlobInput,
+): RuntimeGlobResult {
+ // Resolve sandbox root via a dummy path : ensures we use the same
+ // FORGE_WORKSPACE override as the other runtime tools.
+ const safeRoot = resolveSandboxedPath('.')
+ if (!safeRoot.ok) return safeRoot
+ const root = resolve(safeRoot.absolutePath)
+
+ const re = globToRegex(input.pattern)
+ const all = walk(root)
+ const matched = all.filter((p) => re.test(p)).sort()
+ const truncated = matched.length > MAX_MATCHES
+ return {
+ ok: true,
+ matches: truncated ? matched.slice(0, MAX_MATCHES) : matched,
+ truncated,
+ }
+}
diff --git a/packages/tools-core/src/runtime/grep.ts b/packages/tools-core/src/runtime/grep.ts
new file mode 100644
index 0000000..b7ae6e8
--- /dev/null
+++ b/packages/tools-core/src/runtime/grep.ts
@@ -0,0 +1,106 @@
+// Grep (runtime) — regex search across files under /workspace.
+//
+// Pure JS, no ripgrep dependency : the alpine container doesn't ship rg
+// by default and we don't want to bloat the image just for this. For a
+// POC the trade-off is fine ; if it becomes a bottleneck we'll bind-mount
+// rg later.
+//
+// The pattern is a JavaScript RegExp source. Files are filtered by an
+// optional glob to keep the scan bounded. Binary-looking content
+// (NUL bytes in the first 4 KB) is skipped.
+
+import { readFileSync } from 'node:fs'
+import { join } from 'node:path'
+import { z } from 'zod'
+import { resolveSandboxedPath } from './file-write.ts'
+import { executeRuntimeGlob } from './glob.ts'
+
+export const RuntimeGrepInputSchema = z.object({
+ pattern: z
+ .string()
+ .min(1)
+ .describe('JavaScript RegExp source. Example : "TODO|FIXME".'),
+ glob: z
+ .string()
+ .optional()
+ .describe(
+ 'Optional file pattern relative to /workspace (e.g. "src/**/*.ts"). Defaults to "**/*".',
+ ),
+ ignoreCase: z.boolean().optional().describe('Case-insensitive match. Default false.'),
+})
+
+export type RuntimeGrepInput = z.infer
+
+export type GrepHit = { path: string; line: number; text: string }
+
+export type RuntimeGrepResult =
+ | { ok: true; hits: GrepHit[]; truncated: boolean; scanned: number }
+ | { ok: false; error: string }
+
+const MAX_HITS = 200
+const MAX_LINE_LEN = 400 // clip long lines so a minified file doesn't blow context
+const MAX_FILE_BYTES = 1_048_576 // skip files > 1 MB
+
+function looksBinary(buf: Buffer): boolean {
+ const limit = Math.min(buf.length, 4096)
+ for (let i = 0; i < limit; i += 1) {
+ if (buf[i] === 0) return true
+ }
+ return false
+}
+
+export function executeRuntimeGrep(
+ input: RuntimeGrepInput,
+): RuntimeGrepResult {
+ let re: RegExp
+ try {
+ re = new RegExp(input.pattern, input.ignoreCase ? 'i' : undefined)
+ } catch (err) {
+ return { ok: false, error: `invalid regex : ${err instanceof Error ? err.message : String(err)}` }
+ }
+
+ const safeRoot = resolveSandboxedPath('.')
+ if (!safeRoot.ok) return safeRoot
+
+ const filesResult = executeRuntimeGlob({ pattern: input.glob ?? '**/*' })
+ if (!filesResult.ok) return filesResult
+
+ const hits: GrepHit[] = []
+ let truncated = false
+ let scanned = 0
+
+ for (const rel of filesResult.matches) {
+ if (hits.length >= MAX_HITS) {
+ truncated = true
+ break
+ }
+ const abs = join(safeRoot.absolutePath, rel)
+ let buf: Buffer
+ try {
+ buf = readFileSync(abs)
+ } catch {
+ continue
+ }
+ if (buf.length > MAX_FILE_BYTES) continue
+ if (looksBinary(buf)) continue
+ scanned += 1
+ const text = buf.toString('utf8')
+ const lines = text.split('\n')
+ for (let i = 0; i < lines.length; i += 1) {
+ const line = lines[i] as string
+ if (re.test(line)) {
+ hits.push({
+ path: rel,
+ line: i + 1,
+ text: line.length > MAX_LINE_LEN ? `${line.slice(0, MAX_LINE_LEN)}…` : line,
+ })
+ if (hits.length >= MAX_HITS) {
+ truncated = true
+ break
+ }
+ }
+ }
+ }
+
+ return { ok: true, hits, truncated, scanned }
+}
diff --git a/packages/tools-core/tests/runtime-bash.test.ts b/packages/tools-core/tests/runtime-bash.test.ts
new file mode 100644
index 0000000..b0b0d10
--- /dev/null
+++ b/packages/tools-core/tests/runtime-bash.test.ts
@@ -0,0 +1,52 @@
+// Round-trip tests for the runtime-side Bash tool.
+// Uses FORGE_WORKSPACE so the cwd is a temp dir, not /workspace.
+
+import { afterAll, beforeAll, describe, expect, test } from 'bun:test'
+import { mkdtempSync, rmSync, writeFileSync } from 'node:fs'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+
+let TMP_WORKSPACE: string
+const ORIGINAL_ENV = process.env.FORGE_WORKSPACE
+
+beforeAll(() => {
+ TMP_WORKSPACE = mkdtempSync(join(tmpdir(), 'forge-rt-bash-'))
+ process.env.FORGE_WORKSPACE = TMP_WORKSPACE
+})
+
+afterAll(() => {
+ if (ORIGINAL_ENV === undefined) delete process.env.FORGE_WORKSPACE
+ else process.env.FORGE_WORKSPACE = ORIGINAL_ENV
+ rmSync(TMP_WORKSPACE, { recursive: true, force: true })
+})
+
+const { executeBash } = await import('../src/runtime/bash.ts')
+
+describe('executeBash', () => {
+ test('captures stdout from a simple command', async () => {
+ const r = await executeBash({ command: 'echo hello' })
+ expect(r.exitCode).toBe(0)
+ expect(r.stdout.trim()).toBe('hello')
+ expect(r.stderr).toBe('')
+ expect(r.timedOut).toBe(false)
+ })
+
+ test('captures stderr and a non-zero exit code', async () => {
+ const r = await executeBash({ command: 'echo oops 1>&2 ; exit 7' })
+ expect(r.exitCode).toBe(7)
+ expect(r.stderr.trim()).toBe('oops')
+ })
+
+ test('runs in the sandbox cwd', async () => {
+ writeFileSync(join(TMP_WORKSPACE, 'marker.txt'), 'present')
+ const r = await executeBash({ command: 'cat marker.txt' })
+ expect(r.exitCode).toBe(0)
+ expect(r.stdout).toBe('present')
+ })
+
+ test('honors a tight timeout', async () => {
+ const r = await executeBash({ command: 'sleep 5', timeoutMs: 200 })
+ expect(r.timedOut).toBe(true)
+ expect(r.exitCode).not.toBe(0)
+ })
+})
diff --git a/packages/tools-core/tests/runtime-file-edit.test.ts b/packages/tools-core/tests/runtime-file-edit.test.ts
new file mode 100644
index 0000000..37d0ddb
--- /dev/null
+++ b/packages/tools-core/tests/runtime-file-edit.test.ts
@@ -0,0 +1,86 @@
+import { afterAll, beforeAll, describe, expect, test } from 'bun:test'
+import { mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+
+let TMP_WORKSPACE: string
+const ORIGINAL_ENV = process.env.FORGE_WORKSPACE
+
+beforeAll(() => {
+ TMP_WORKSPACE = mkdtempSync(join(tmpdir(), 'forge-rt-fe-'))
+ process.env.FORGE_WORKSPACE = TMP_WORKSPACE
+})
+
+afterAll(() => {
+ if (ORIGINAL_ENV === undefined) delete process.env.FORGE_WORKSPACE
+ else process.env.FORGE_WORKSPACE = ORIGINAL_ENV
+ rmSync(TMP_WORKSPACE, { recursive: true, force: true })
+})
+
+const { executeRuntimeFileEdit } = await import('../src/runtime/file-edit.ts')
+
+describe('executeRuntimeFileEdit', () => {
+ test('replaces a unique substring', () => {
+ const path = join(TMP_WORKSPACE, 'a.ts')
+ writeFileSync(path, 'const x = 1\nconst y = 2\n')
+ const r = executeRuntimeFileEdit({
+ path: 'a.ts',
+ oldString: 'const x = 1',
+ newString: 'const x = 42',
+ })
+ expect(r.ok).toBe(true)
+ if (r.ok) {
+ expect(r.replacements).toBe(1)
+ expect(readFileSync(path, 'utf8')).toBe('const x = 42\nconst y = 2\n')
+ }
+ })
+
+ test('refuses ambiguous match without replaceAll', () => {
+ const path = join(TMP_WORKSPACE, 'b.ts')
+ writeFileSync(path, 'foo\nfoo\n')
+ const r = executeRuntimeFileEdit({
+ path: 'b.ts',
+ oldString: 'foo',
+ newString: 'bar',
+ })
+ expect(r.ok).toBe(false)
+ if (!r.ok) expect(r.error).toContain('matches 2 times')
+ })
+
+ test('replaceAll handles every occurrence', () => {
+ const path = join(TMP_WORKSPACE, 'c.ts')
+ writeFileSync(path, 'foo\nfoo\nfoo\n')
+ const r = executeRuntimeFileEdit({
+ path: 'c.ts',
+ oldString: 'foo',
+ newString: 'bar',
+ replaceAll: true,
+ })
+ expect(r.ok).toBe(true)
+ if (r.ok) {
+ expect(r.replacements).toBe(3)
+ expect(readFileSync(path, 'utf8')).toBe('bar\nbar\nbar\n')
+ }
+ })
+
+ test('returns an error when oldString is missing', () => {
+ const path = join(TMP_WORKSPACE, 'd.ts')
+ writeFileSync(path, 'hello')
+ const r = executeRuntimeFileEdit({
+ path: 'd.ts',
+ oldString: 'goodbye',
+ newString: 'bye',
+ })
+ expect(r.ok).toBe(false)
+ if (!r.ok) expect(r.error).toContain('not found')
+ })
+
+ test('refuses path outside the sandbox', () => {
+ const r = executeRuntimeFileEdit({
+ path: '../escape',
+ oldString: 'a',
+ newString: 'b',
+ })
+ expect(r.ok).toBe(false)
+ })
+})
diff --git a/packages/tools-core/tests/runtime-file-read.test.ts b/packages/tools-core/tests/runtime-file-read.test.ts
new file mode 100644
index 0000000..e1c6374
--- /dev/null
+++ b/packages/tools-core/tests/runtime-file-read.test.ts
@@ -0,0 +1,55 @@
+import { afterAll, beforeAll, describe, expect, test } from 'bun:test'
+import { mkdtempSync, rmSync, writeFileSync } from 'node:fs'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+
+let TMP_WORKSPACE: string
+const ORIGINAL_ENV = process.env.FORGE_WORKSPACE
+
+beforeAll(() => {
+ TMP_WORKSPACE = mkdtempSync(join(tmpdir(), 'forge-rt-fr-'))
+ process.env.FORGE_WORKSPACE = TMP_WORKSPACE
+})
+
+afterAll(() => {
+ if (ORIGINAL_ENV === undefined) delete process.env.FORGE_WORKSPACE
+ else process.env.FORGE_WORKSPACE = ORIGINAL_ENV
+ rmSync(TMP_WORKSPACE, { recursive: true, force: true })
+})
+
+const { executeRuntimeFileRead } = await import('../src/runtime/file-read.ts')
+
+describe('executeRuntimeFileRead', () => {
+ test('reads the full file when no offset/limit', () => {
+ writeFileSync(join(TMP_WORKSPACE, 'a.txt'), 'one\ntwo\nthree\n')
+ const r = executeRuntimeFileRead({ path: 'a.txt' })
+ expect(r.ok).toBe(true)
+ if (r.ok) {
+ expect(r.content).toBe('one\ntwo\nthree')
+ expect(r.totalLines).toBe(3)
+ expect(r.returnedLines).toBe(3)
+ }
+ })
+
+ test('honors offset and limit', () => {
+ const lines = Array.from({ length: 10 }, (_, i) => `line${(i + 1).toString()}`).join('\n')
+ writeFileSync(join(TMP_WORKSPACE, 'b.txt'), lines)
+ const r = executeRuntimeFileRead({ path: 'b.txt', offset: 3, limit: 4 })
+ expect(r.ok).toBe(true)
+ if (r.ok) {
+ expect(r.content).toBe('line4\nline5\nline6\nline7')
+ expect(r.totalLines).toBe(10)
+ expect(r.returnedLines).toBe(4)
+ }
+ })
+
+ test('rejects path outside the sandbox', () => {
+ const r = executeRuntimeFileRead({ path: '../escape.txt' })
+ expect(r.ok).toBe(false)
+ })
+
+ test('returns an error for missing files', () => {
+ const r = executeRuntimeFileRead({ path: 'nope.txt' })
+ expect(r.ok).toBe(false)
+ })
+})
diff --git a/packages/tools-core/tests/runtime-file-write.test.ts b/packages/tools-core/tests/runtime-file-write.test.ts
new file mode 100644
index 0000000..8087566
--- /dev/null
+++ b/packages/tools-core/tests/runtime-file-write.test.ts
@@ -0,0 +1,99 @@
+// Security and round-trip tests for the runtime-side FileWrite tool.
+// Uses FORGE_WORKSPACE to point the sandbox at a temp dir so the tests
+// don't try to write to /workspace on the host.
+
+import { afterAll, afterEach, beforeAll, describe, expect, test } from 'bun:test'
+import { existsSync, mkdtempSync, readFileSync, rmSync } from 'node:fs'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+
+let TMP_WORKSPACE: string
+const ORIGINAL_ENV = process.env.FORGE_WORKSPACE
+
+beforeAll(() => {
+ TMP_WORKSPACE = mkdtempSync(join(tmpdir(), 'forge-rt-fw-'))
+ process.env.FORGE_WORKSPACE = TMP_WORKSPACE
+})
+
+afterAll(() => {
+ if (ORIGINAL_ENV === undefined) delete process.env.FORGE_WORKSPACE
+ else process.env.FORGE_WORKSPACE = ORIGINAL_ENV
+ rmSync(TMP_WORKSPACE, { recursive: true, force: true })
+})
+
+// Late import so module-level reads of process.env happen after we set it.
+const {
+ executeRuntimeFileWrite,
+ resolveSandboxedPath,
+} = await import('../src/runtime/file-write.ts')
+
+afterEach(() => {
+ // Wipe contents but keep the dir itself so the env var stays valid.
+ for (const entry of [
+ 'a.txt',
+ 'sub/b.txt',
+ 'sub',
+ 'overwrite-me.txt',
+ ]) {
+ const p = join(TMP_WORKSPACE, entry)
+ if (existsSync(p)) rmSync(p, { recursive: true, force: true })
+ }
+})
+
+describe('resolveSandboxedPath (runtime)', () => {
+ test('accepts a relative path under the sandbox', () => {
+ const r = resolveSandboxedPath('a.txt')
+ expect(r.ok).toBe(true)
+ if (r.ok) expect(r.absolutePath).toBe(join(TMP_WORKSPACE, 'a.txt'))
+ })
+
+ test('rejects path traversal', () => {
+ const r = resolveSandboxedPath('../escape.txt')
+ expect(r.ok).toBe(false)
+ })
+
+ test('rejects absolute path outside the sandbox', () => {
+ const r = resolveSandboxedPath('/etc/passwd')
+ expect(r.ok).toBe(false)
+ })
+
+ test('rejects null byte', () => {
+ const r = resolveSandboxedPath('foo\0bar')
+ expect(r.ok).toBe(false)
+ })
+})
+
+describe('executeRuntimeFileWrite', () => {
+ test('writes a file in the sandbox', () => {
+ const r = executeRuntimeFileWrite({ path: 'a.txt', content: 'hi' })
+ expect(r.ok).toBe(true)
+ if (r.ok) {
+ expect(readFileSync(r.absolutePath, 'utf8')).toBe('hi')
+ expect(r.bytes).toBe(2)
+ }
+ })
+
+ test('creates parent directories', () => {
+ const r = executeRuntimeFileWrite({
+ path: 'sub/b.txt',
+ content: 'nested',
+ })
+ expect(r.ok).toBe(true)
+ if (r.ok) expect(readFileSync(r.absolutePath, 'utf8')).toBe('nested')
+ })
+
+ test('overwrites an existing file', () => {
+ executeRuntimeFileWrite({ path: 'overwrite-me.txt', content: 'v1' })
+ const r = executeRuntimeFileWrite({ path: 'overwrite-me.txt', content: 'v2' })
+ expect(r.ok).toBe(true)
+ if (r.ok) expect(readFileSync(r.absolutePath, 'utf8')).toBe('v2')
+ })
+
+ test('refuses path escaping the sandbox', () => {
+ const r = executeRuntimeFileWrite({
+ path: '../evil.txt',
+ content: 'x',
+ })
+ expect(r.ok).toBe(false)
+ })
+})
diff --git a/packages/tools-core/tests/runtime-glob.test.ts b/packages/tools-core/tests/runtime-glob.test.ts
new file mode 100644
index 0000000..cdfe6ec
--- /dev/null
+++ b/packages/tools-core/tests/runtime-glob.test.ts
@@ -0,0 +1,53 @@
+import { afterAll, beforeAll, describe, expect, test } from 'bun:test'
+import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+
+let TMP_WORKSPACE: string
+const ORIGINAL_ENV = process.env.FORGE_WORKSPACE
+
+beforeAll(() => {
+ TMP_WORKSPACE = mkdtempSync(join(tmpdir(), 'forge-rt-gl-'))
+ process.env.FORGE_WORKSPACE = TMP_WORKSPACE
+ mkdirSync(join(TMP_WORKSPACE, 'src/sub'), { recursive: true })
+ writeFileSync(join(TMP_WORKSPACE, 'src/index.ts'), '')
+ writeFileSync(join(TMP_WORKSPACE, 'src/sub/util.ts'), '')
+ writeFileSync(join(TMP_WORKSPACE, 'src/sub/util.test.ts'), '')
+ writeFileSync(join(TMP_WORKSPACE, 'README.md'), '')
+})
+
+afterAll(() => {
+ if (ORIGINAL_ENV === undefined) delete process.env.FORGE_WORKSPACE
+ else process.env.FORGE_WORKSPACE = ORIGINAL_ENV
+ rmSync(TMP_WORKSPACE, { recursive: true, force: true })
+})
+
+const { executeRuntimeGlob } = await import('../src/runtime/glob.ts')
+
+describe('executeRuntimeGlob', () => {
+ test('matches all .ts files recursively with **/*.ts', () => {
+ const r = executeRuntimeGlob({ pattern: '**/*.ts' })
+ expect(r.ok).toBe(true)
+ if (r.ok) {
+ expect(r.matches).toEqual(['src/index.ts', 'src/sub/util.test.ts', 'src/sub/util.ts'])
+ }
+ })
+
+ test('matches a single segment with src/*.ts', () => {
+ const r = executeRuntimeGlob({ pattern: 'src/*.ts' })
+ expect(r.ok).toBe(true)
+ if (r.ok) expect(r.matches).toEqual(['src/index.ts'])
+ })
+
+ test('matches with ? for single char', () => {
+ const r = executeRuntimeGlob({ pattern: 'README.m?' })
+ expect(r.ok).toBe(true)
+ if (r.ok) expect(r.matches).toEqual(['README.md'])
+ })
+
+ test('returns empty when nothing matches', () => {
+ const r = executeRuntimeGlob({ pattern: '**/*.rs' })
+ expect(r.ok).toBe(true)
+ if (r.ok) expect(r.matches).toEqual([])
+ })
+})
diff --git a/packages/tools-core/tests/runtime-grep.test.ts b/packages/tools-core/tests/runtime-grep.test.ts
new file mode 100644
index 0000000..4e711ba
--- /dev/null
+++ b/packages/tools-core/tests/runtime-grep.test.ts
@@ -0,0 +1,62 @@
+import { afterAll, beforeAll, describe, expect, test } from 'bun:test'
+import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+
+let TMP_WORKSPACE: string
+const ORIGINAL_ENV = process.env.FORGE_WORKSPACE
+
+beforeAll(() => {
+ TMP_WORKSPACE = mkdtempSync(join(tmpdir(), 'forge-rt-gr-'))
+ process.env.FORGE_WORKSPACE = TMP_WORKSPACE
+ mkdirSync(join(TMP_WORKSPACE, 'src'), { recursive: true })
+ writeFileSync(
+ join(TMP_WORKSPACE, 'src/index.ts'),
+ '// TODO: implement\nexport const x = 1\n// fixme later\n',
+ )
+ writeFileSync(join(TMP_WORKSPACE, 'src/util.ts'), 'export const todo = "x"\n')
+ writeFileSync(join(TMP_WORKSPACE, 'README.md'), '# project\nTODO: write docs\n')
+})
+
+afterAll(() => {
+ if (ORIGINAL_ENV === undefined) delete process.env.FORGE_WORKSPACE
+ else process.env.FORGE_WORKSPACE = ORIGINAL_ENV
+ rmSync(TMP_WORKSPACE, { recursive: true, force: true })
+})
+
+const { executeRuntimeGrep } = await import('../src/runtime/grep.ts')
+
+describe('executeRuntimeGrep', () => {
+ test('finds case-sensitive matches across files', () => {
+ const r = executeRuntimeGrep({ pattern: 'TODO' })
+ expect(r.ok).toBe(true)
+ if (r.ok) {
+ const paths = r.hits.map((h) => h.path).sort()
+ expect(paths).toEqual(['README.md', 'src/index.ts'])
+ }
+ })
+
+ test('honors ignoreCase', () => {
+ const r = executeRuntimeGrep({ pattern: 'todo', ignoreCase: true })
+ expect(r.ok).toBe(true)
+ if (r.ok) {
+ const paths = r.hits.map((h) => h.path).sort()
+ // util.ts matches via "const todo", index.ts via TODO, README.md via TODO.
+ expect(paths).toEqual(['README.md', 'src/index.ts', 'src/util.ts'])
+ }
+ })
+
+ test('respects the glob filter', () => {
+ const r = executeRuntimeGrep({ pattern: 'TODO', glob: '**/*.md' })
+ expect(r.ok).toBe(true)
+ if (r.ok) {
+ const paths = r.hits.map((h) => h.path)
+ expect(paths).toEqual(['README.md'])
+ }
+ })
+
+ test('returns an error for an invalid regex', () => {
+ const r = executeRuntimeGrep({ pattern: '(' })
+ expect(r.ok).toBe(false)
+ })
+})