Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions packages/opencode/src/session/prompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ import { NamedError } from "@opencode-ai/util/error"
import { fn } from "@/util/fn"
import { SessionProcessor } from "./processor"
import { TaskTool } from "@/tool/task"
// altimate_change start — critic gate
import { Critic } from "@/tool/critic"
// altimate_change end
import { Tool } from "@/tool/tool"
import { PermissionNext } from "@/permission/next"
import { SessionStatus } from "./status"
Expand Down Expand Up @@ -1501,6 +1504,28 @@ export namespace SessionPrompt {
args,
},
)
// altimate_change start — critic gate (pre-execution safety check)
// Flag-gated (ALTIMATE_CRITIC_GATE), default off. On a hard verdict we
// skip execution and feed the reason back so the model can retry, while
// still emitting tool.execute.after so observability sees the blocked call.
if (Critic.enabled()) {
const verdict = await Critic.gate(item.id, args as Record<string, any>, Critic.basicSafetyVerifier)
if (!verdict.allow) {
const blocked = {
title: `Blocked: ${item.id}`,
metadata: { critic: { blocked: true, reason: verdict.feedback } } as any,
output: verdict.feedback ?? "Blocked by altimate verifier.",
attachments: [] as never[],
}
await Plugin.trigger(
"tool.execute.after",
{ tool: item.id, sessionID: ctx.sessionID, callID: ctx.callID, args },
blocked,
)
return blocked
}
}
// altimate_change end
const result = await item.execute(args, ctx)
const output = {
...result,
Expand Down
262 changes: 262 additions & 0 deletions packages/opencode/src/tool/critic.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,262 @@
/**
* Pre-execution critic gate.
*
* Before a SIDE-EFFECTING tool runs (bash, write, edit, sql_execute, dbt_*), a
* verifier checks the proposed args; on hard failure the call is denied and the
* reason is fed back so the model can retry — instead of executing a bad action.
*
* The judgment plugs in via the `Verifier` interface. Two impls ship here:
* - `ALLOW_ALL` — ungated (open); for tests / opt-out.
* - `basicSafetyVerifier` — the wired default: a conservative, dependency-free
* heuristic that blocks catastrophic, unambiguous
* host-destructive bash (e.g. `rm -rf /`, fork bombs,
* `mkfs`/`dd` on a raw device).
*
* This is a best-effort safety NET, NOT a security boundary or sandbox: a
* determined caller can obfuscate around literal patterns (command substitution,
* base64-decode-pipe-sh, etc.). Defense in depth lives elsewhere (OS sandbox, the
* permission system, and a richer pluggable verifier the caller may inject).
*
* Pure + testable. Wired into session/prompt.ts, just before `item.execute(args, ctx)`.
*/

export namespace Critic {
/**
* Side-effecting tools worth gating. Reads (glob/grep/read) are never gated.
* NOTE: the gate is wired into the native ToolRegistry execute wrapper only;
* MCP-provided tools run through a separate wrapper and are NOT gated. The
* shipped `basicSafetyVerifier` is bash-only (a native tool), so this is a
* no-op gap today — but a product injecting a verifier for `sql_execute`/
* `dbt_run` must confirm those are native, not MCP, tools.
*/
export const DEFAULT_GATED = ["bash", "write", "edit", "sql_execute", "dbt_run", "patch"]
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1: DEFAULT_GATED uses patch instead of the real apply_patch tool id, so patch edits are not gated.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At packages/opencode/src/tool/critic.ts, line 32:

<comment>`DEFAULT_GATED` uses `patch` instead of the real `apply_patch` tool id, so patch edits are not gated.</comment>

<file context>
@@ -0,0 +1,262 @@
+   * no-op gap today — but a product injecting a verifier for `sql_execute`/
+   * `dbt_run` must confirm those are native, not MCP, tools.
+   */
+  export const DEFAULT_GATED = ["bash", "write", "edit", "sql_execute", "dbt_run", "patch"]
+
+  export interface Verdict {
</file context>


export interface Verdict {
ok: boolean
reason?: string
}

/** The judgment interface. Default impl allows all (open). Product plugs a richer verifier. */
export interface Verifier {
verify(toolName: string, args: Record<string, any>): Verdict | Promise<Verdict>
}

export const ALLOW_ALL: Verifier = { verify: () => ({ ok: true }) }

export function enabled(): boolean {
return process.env["ALTIMATE_CRITIC_GATE"] === "1"
}

export function isGated(toolName: string, gated: string[] = DEFAULT_GATED): boolean {
return gated.includes(toolName)
}

// ── Heuristic bash safety ──────────────────────────────────────────────────
//
// Targets that, combined with a recursive+force `rm`, mean catastrophic loss.
// Deliberately ABSOLUTE only — `.`, `./` and bare `*` are NOT here: clearing
// the working/build directory (`rm -rf *`, `rm -rf .`) is routine and safe in a
// sandboxed workspace, and we have no `workdir` context to judge them.
const RM_FATAL_TARGETS = new Set(["/", "/*", "/.", "/..", "~", "~/", "~/*", "$home", "$home/", "$home/*"])
// Top-level system paths whose recursive deletion bricks the machine.
const RM_FATAL_TOPLEVEL = [
"/etc",
"/usr",
"/var",
"/bin",
"/sbin",
"/boot",
"/lib",
"/lib64",
"/sys",
"/dev",
"/proc",
"/root",
"/home",
"/opt",
]

// Transparent command prefixes: words that may precede the real command without
// changing which command runs. Lets `sudo rm -rf /` and `bash -c "rm -rf /"`
// still be seen as an `rm` in command position, while `git commit -m "rm -rf /"`
// (rm buried in an argument) is not.
const TRANSPARENT_PREFIX = new Set([
"sudo",
"doas",
"nohup",
"time",
"env",
"exec",
"command",
"builtin",
"ionice",
"nice",
"setsid",
"stdbuf",
"then",
"do",
"else",
"bash",
"sh",
"zsh",
"dash",
"ksh",
])

function isFatalRmTarget(tok: string): boolean {
// Normalize bash brace expansion: ${home} -> $home, ${home}/ -> $home/
const norm = tok.replace(/^\$\{([a-z_]+)\}(\/.*)?$/, (_m, v, rest) => "$" + v + (rest ?? ""))
if (RM_FATAL_TARGETS.has(tok) || RM_FATAL_TARGETS.has(norm)) return true
// Catch a system path itself, its trailing-slash form, OR a glob wipe of its
// contents (`/var/*`) — but NOT a scoped subpath like `/home/user/project`.
return RM_FATAL_TOPLEVEL.some((p) => tok === p || tok === p + "/" || tok === p + "/*")
}

/**
* Is the token at index `i` in COMMAND position (the start of a pipeline
* segment), versus an argument to some other command? Walks left skipping
* flags and transparent prefixes; a separator or the start means command
* position, any other bareword means it's an argument.
*/
function isCommandPosition(tokens: string[], i: number, sep: Set<string>): boolean {
for (let j = i - 1; j >= 0; j--) {
const t = tokens[j]
if (sep.has(t)) return true
if (t.startsWith("-")) continue // a flag (e.g. `sudo -E`, `bash -c`)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: isCommandPosition misses rm after env assignments (for example env FOO=1 rm -rf /), allowing dangerous commands to evade detection.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At packages/opencode/src/tool/critic.ts, line 125:

<comment>`isCommandPosition` misses `rm` after `env` assignments (for example `env FOO=1 rm -rf /`), allowing dangerous commands to evade detection.</comment>

<file context>
@@ -0,0 +1,262 @@
+    for (let j = i - 1; j >= 0; j--) {
+      const t = tokens[j]
+      if (sep.has(t)) return true
+      if (t.startsWith("-")) continue // a flag (e.g. `sudo -E`, `bash -c`)
+      if (TRANSPARENT_PREFIX.has(t)) continue
+      return false // a real preceding word -> rm is an argument, not the command
</file context>

if (TRANSPARENT_PREFIX.has(t)) continue
return false // a real preceding word -> rm is an argument, not the command
}
return true // reached the start through only flags/prefixes
Comment on lines +121 to +129
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Skip shell assignment words when deciding command position.

isCommandPosition() currently treats assignment words as normal arguments, so catastrophic commands like FOO=1 rm -rf / or env HOME=/ rm -rf / bypass the detector because rm is no longer seen in command position. That is a simple false negative for the default safety gate.

Suggested fix
+  function isAssignmentWord(token: string): boolean {
+    return /^[a-z_][a-z0-9_]*=.*/.test(token)
+  }
+
   function isCommandPosition(tokens: string[], i: number, sep: Set<string>): boolean {
     for (let j = i - 1; j >= 0; j--) {
       const t = tokens[j]
       if (sep.has(t)) return true
       if (t.startsWith("-")) continue // a flag (e.g. `sudo -E`, `bash -c`)
+      if (isAssignmentWord(t)) continue
       if (TRANSPARENT_PREFIX.has(t)) continue
       return false // a real preceding word -> rm is an argument, not the command
     }
     return true // reached the start through only flags/prefixes
   }

Also applies to: 180-184

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@packages/opencode/src/tool/critic.ts` around lines 121 - 129, The
isCommandPosition function treats shell assignment words as normal arguments;
update its backward token-scan to also skip assignment words by recognizing
tokens that match a shell variable assignment pattern (e.g.
/^[A-Za-z_][A-Za-z0-9_]*=.*$/) and treat them like flags/prefixes (similar to
TRANSPARENT_PREFIX), so tokens like FOO=1 or HOME=/ don't stop the scan; apply
the same change to the other identical token-scan logic elsewhere in the file
(the analogous loop that decides command position further down) so both checks
skip assignment words.

}

/**
* Detect a catastrophic, unambiguous host-destructive bash command. Returns a
* human reason when dangerous, else null. Conservative on purpose — it only
* fires on the few patterns that are almost never legitimate. Quotes are
* stripped so `bash -c "rm -rf /"` is still seen; this can over-match a command
* that merely mentions such a string (acceptable: the gate is opt-in/off by default).
*/
export function detectDangerousBash(raw: string): string | null {
if (!raw || typeof raw !== "string") return null
if (raw.length > 1_000_000) return null // cap — don't scan pathologically huge input
// Normalize: drop quotes, collapse whitespace, lowercase for keyword/path matching.
const norm = raw.replace(/['"`]/g, "").replace(/\s+/g, " ").trim().toLowerCase()
if (!norm) return null

// Fork bomb: :(){ :|:& };:
if (norm.replace(/\s+/g, "").includes(":|:&") || /:\s*\(\s*\)\s*\{/.test(norm)) {
return "fork bomb"
}
// Explicit intent to remove the root filesystem.
if (norm.includes("--no-preserve-root")) {
return "rm with --no-preserve-root targets the root filesystem"
}
// mkfs on a block device.
if (/\bmkfs(\.\w+)?\b/.test(norm) && /\/dev\//.test(norm)) {
return "mkfs on a block device"
}
// dd writing to a raw disk.
if (/\bdd\b/.test(norm) && /\bof=\/dev\/(sd|nvme|disk|hd|vd|mmcblk)/.test(norm)) {
return "dd writing to a raw block device"
}
// Redirect over a raw disk.
if (/>\s*\/dev\/(sd|nvme|disk|hd|vd|mmcblk)/.test(norm)) {
return "redirect over a raw block device"
}
// Recursive chmod/chown of the root filesystem (short `-R`/`-rf` or long
// `--recursive`, against bare `/` or a `/*` glob wipe).
if (/\bch(mod|own)\b/.test(norm) && /(^|\s)(-[a-z]*r|--recursive)\b/.test(norm) && /\s\/\*?(\s|$)/.test(norm)) {
return "recursive permission/ownership change on /"
}
// Recursive + force rm of a fatal target. Split shell separators into their
// own tokens so they don't glue to a target (`/;`) and so EVERY `rm` in a
// compound command is inspected — not just the last one.
const SEP = new Set([";", "|", "&", ">", "<"])
const tokens = norm
.replace(/([;|&><])/g, " $1 ")
.replace(/\s+/g, " ")
.trim()
.split(" ")
for (let i = 0; i < tokens.length; i++) {
// Match `rm` or a fully-qualified `/bin/rm`, but only in command position
// (not when `rm -rf /` merely appears inside another command's argument).
if (tokens[i] !== "rm" && !tokens[i].endsWith("/rm")) continue
if (!isCommandPosition(tokens, i, SEP)) continue
let recursive = false
let force = false
const targets: string[] = []
for (let j = i + 1; j < tokens.length; j++) {
const t = tokens[j]
if (SEP.has(t)) break // end of this rm invocation
if (t.startsWith("-")) {
if (t === "--recursive" || (/^-[a-z]+$/.test(t) && t.includes("r"))) recursive = true
if (t === "--force" || (/^-[a-z]+$/.test(t) && t.includes("f"))) force = true
} else {
targets.push(t)
}
}
if (recursive && force) {
for (const t of targets) {
if (isFatalRmTarget(t)) return `recursive force-delete of "${t}"`
}
}
}
return null
}

/**
* The wired default verifier. Blocks only catastrophic bash; everything else
* (other gated tools, non-bash) is allowed. A product may inject a richer one.
*/
export const basicSafetyVerifier: Verifier = {
verify(toolName, args) {
if (toolName !== "bash") return { ok: true }
const reason = detectDangerousBash(String(args?.["command"] ?? ""))
return reason ? { ok: false, reason } : { ok: true }
},
}

export interface GateResult {
allow: boolean
/** when blocked, the message to feed back to the model in place of execution. */
feedback?: string
}

/**
* Decide whether a proposed tool call may execute. Non-gated tools always pass.
* Gated tools are checked by the verifier; a not-ok verdict blocks with feedback.
* NEVER throws — a critic failure must not break the agent (fail-open on error).
*/
/** Max time to wait on a verifier before failing open. A hung verifier must
* never hang the agent. */
export const VERIFIER_TIMEOUT_MS = 5000

export async function gate(
toolName: string,
args: Record<string, any>,
verifier: Verifier = ALLOW_ALL,
gated: string[] = DEFAULT_GATED,
timeoutMs: number = VERIFIER_TIMEOUT_MS,
): Promise<GateResult> {
if (!enabled() || !isGated(toolName, gated)) return { allow: true }
try {
// async IIFE so a synchronous throw in verify() rejects the promise (and is
// caught below) rather than escaping the Promise.race.
const verifyPromise = (async () => verifier.verify(toolName, args))()
const timeout = new Promise<Verdict>((resolve) => {
const t = setTimeout(() => resolve({ ok: true, reason: "__timeout__" }), timeoutMs)
// don't keep the event loop alive for this guard timer
;(t as any)?.unref?.()
})
const v = await Promise.race([verifyPromise, timeout])
if (v.ok) return { allow: true }
return {
allow: false,
feedback: `Blocked by altimate verifier before execution: ${v.reason ?? "failed validation"}. Fix and retry.`,
}
} catch {
// Fail-open: observability/governance must never break core functionality.
return { allow: true }
}
}
}
97 changes: 97 additions & 0 deletions packages/opencode/test/tool/critic-adversarial.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/**
* Adversarial probes for the critic gate's default safety heuristic.
*
* Two purposes:
* 1. Confirm the heuristic catches the obfuscations it CLAIMS to catch
* (whitespace, quote-wrapping, flag reordering, sudo/compound prefixes).
* 2. Honestly DOCUMENT the obfuscations it does NOT catch. The default verifier
* is a best-effort safety net, not a security boundary — a determined caller
* can always evade literal pattern matching. These "known bypass" tests are
* guardrails against over-claiming: if one ever starts being caught, great,
* but we never depend on it. Real isolation is the OS sandbox + permission
* system + a richer pluggable verifier.
*/
import { describe, expect, test } from "bun:test"
import { Critic } from "../../src/tool/critic"

describe("critic adversarial — obfuscations that ARE caught", () => {
const caught = [
"rm -rf /", // arbitrary whitespace
"\trm -rf /\n", // surrounding control whitespace
"RM -RF /", // case (heuristic lowercases; harmless over-match)
'bash -c "rm -rf /"', // shell wrapper, quotes stripped
"sh -c 'rm -rf /'", // single-quote wrapper
'r""m -rf /', // quote splicing inside the command name
"sudo rm -rf /", // privilege prefix
"nohup rm -rf / &", // backgrounded
"true; rm -rf /", // statement separator
"ls && rm -rf ~", // conjunction, home target
"rm -rf /var", // system path
"/bin/rm -rf /", // fully-qualified rm path
"/usr/bin/rm -rf /", // fully-qualified rm in /usr/bin
"sudo -E rm -rf /", // privilege prefix with a flag
"rm -rf ${HOME}", // brace-expanded $HOME
]
for (const cmd of caught) {
test(`caught: ${JSON.stringify(cmd)}`, () => {
expect(Critic.detectDangerousBash(cmd)).not.toBeNull()
})
}
})

describe("critic adversarial — KNOWN bypasses (documented, not depended upon)", () => {
// These evade literal matching. We assert current behavior so the limitation is
// explicit and reviewed, NOT because allowing them is desirable.
const knownBypasses = [
"$(echo rm) -rf /", // command substitution splits the rm token
"`echo rm` -rf /", // backtick substitution — `echo` is not a transparent prefix, so rm isn't in command position
"echo cm0gLXJmIC8K | base64 -d | sh", // base64-encoded `rm -rf /`
"find / -delete", // destructive without rm
"rm -rf $TARGET", // target hidden behind a variable
"alias x='rm -rf /'; x", // aliased
"xargs rm -rf < /tmp/list", // target via stdin (no literal fatal target token)
]
for (const cmd of knownBypasses) {
test(`known bypass (returns null today): ${JSON.stringify(cmd)}`, () => {
expect(Critic.detectDangerousBash(cmd)).toBeNull()
})
}
})

describe("critic adversarial — gate never throws, regardless of input", () => {
test("malformed/pathological args do not crash the gate (fail-open)", async () => {
process.env["ALTIMATE_CRITIC_GATE"] = "1"
const inputs: any[] = [
undefined,
null,
{},
{ command: 123 },
{ command: { nested: true } },
{ command: "x".repeat(100_000) },
{ command: Array(1000).fill("rm -rf /").join(" && ") },
]
for (const args of inputs) {
const g = await Critic.gate("bash", args ?? {}, Critic.basicSafetyVerifier)
expect(typeof g.allow).toBe("boolean")
}
delete process.env["ALTIMATE_CRITIC_GATE"]
})

test("a verifier that returns a malformed verdict is tolerated", async () => {
process.env["ALTIMATE_CRITIC_GATE"] = "1"
const weird: Critic.Verifier = { verify: () => ({}) as any }
// ok is undefined -> falsy -> treated as a block (safe direction), never throws.
const g = await Critic.gate("bash", { command: "ls" }, weird)
expect(typeof g.allow).toBe("boolean")
delete process.env["ALTIMATE_CRITIC_GATE"]
})

test("ReDoS guard: a huge whitespace-heavy command returns quickly", () => {
const big = "rm" + " ".repeat(200_000) + "-rf /"
const start = performance.now()
const r = Critic.detectDangerousBash(big)
const ms = performance.now() - start
expect(r).not.toBeNull() // collapsed whitespace still matches
expect(ms).toBeLessThan(250) // no catastrophic backtracking
})
})
Loading
Loading