diff --git a/esbuild.config.mjs b/esbuild.config.mjs index ffa200da..70a9f629 100644 --- a/esbuild.config.mjs +++ b/esbuild.config.mjs @@ -89,6 +89,9 @@ const codexHooks = [ { entry: "dist/src/hooks/codex/stop.js", out: "stop" }, { entry: "dist/src/hooks/codex/wiki-worker.js", out: "wiki-worker" }, { entry: "dist/src/skillify/skillify-worker.js", out: "skillify-worker" }, + // SkillOpt worker — codex's capture spawns it on a user reaction to judge + improve a + // recently-used org skill (judging runs on the codex CLI). Same shared module CC uses. + { entry: "dist/src/skillify/skillopt-worker.js", out: "skillopt-worker" }, { entry: "dist/src/hooks/graph-pull-worker.js", out: "graph-pull-worker" }, // G3: code-graph auto-build parity for Codex (same shared hook as CC/Cursor). { entry: "dist/src/hooks/graph-on-stop.js", out: "graph-on-stop" }, @@ -161,6 +164,9 @@ const hermesHooks = [ { entry: "dist/src/hooks/hermes/pre-tool-use.js", out: "pre-tool-use" }, { entry: "dist/src/hooks/hermes/wiki-worker.js", out: "wiki-worker" }, { entry: "dist/src/skillify/skillify-worker.js", out: "skillify-worker" }, + // SkillOpt worker — hermes capture spawns it on a reaction to judge + improve a recently-used + // org skill (judging runs on the hermes CLI). Same shared module CC uses. + { entry: "dist/src/skillify/skillopt-worker.js", out: "skillopt-worker" }, { entry: "dist/src/hooks/graph-pull-worker.js", out: "graph-pull-worker" }, // G3: code-graph auto-build parity for Hermes (registered on on_session_end). { entry: "dist/src/hooks/graph-on-stop.js", out: "graph-on-stop" }, @@ -267,6 +273,9 @@ const piWorker = [ { entry: "dist/src/hooks/pi/wiki-worker.js", out: "wiki-worker" }, { entry: "dist/src/skillify/skillify-worker.js", out: "skillify-worker" }, { entry: "dist/src/skillify/autopull-worker.js", out: "autopull-worker" }, + // SkillOpt worker — pi spawns it on a user reaction (the extension can't import the + // raw-.ts trigger, so it shells this bundle like the others). Same shared module CC uses. + { entry: "dist/src/skillify/skillopt-worker.js", out: "skillopt-worker" }, ]; await build({ entryPoints: Object.fromEntries(piWorker.map(h => [h.out, h.entry])), diff --git a/pi/extension-source/hivemind.ts b/pi/extension-source/hivemind.ts index 2f6c8f4c..b8026a4e 100644 --- a/pi/extension-source/hivemind.ts +++ b/pi/extension-source/hivemind.ts @@ -466,6 +466,94 @@ function runAutopullWorker(): void { } } +// ---------- SkillOpt: arm on org-skill use, react on the next user message ------------ +// Mirrors the CC PreToolUse/UserPromptSubmit wiring, inlined because this extension is raw +// .ts with zero non-builtin deps (it can't import the skillify trigger). pi has no first-class +// `Skill` tool — it USES a skill by READING its SKILL.md — so we arm on a tool_result whose +// path is .../skills//SKILL.md, then on the next user prompt (the reaction) spawn +// the bundled skillopt-worker to judge + improve. Env-var names are the cross-process contract +// with the worker (SKILLOPT_ENV in src/skillify/skillopt-env.ts) — kept as literals here since +// the extension can't import. Fully swallowed; never blocks pi. Both call sites sit AFTER the +// handler's captureEnabled check, so the worker's own pi-judge subprocess (HIVEMIND_CAPTURE=false) +// can't re-arm/re-react — that's the recursion guard. +const PI_SKILLOPT_WORKER_PATH = join(homedir(), ".pi", "agent", "hivemind", "skillopt-worker.js"); +// Mirror getStateDir()'s contract: a non-empty (trimmed) HIVEMIND_STATE_DIR overrides the default +// ~/.deeplake/state/skillify root, so pi's pending state co-locates with the rest of Skillify +// (and test-isolation overrides apply here too, not just in the shared trigger). +const SKILLOPT_STATE_ROOT = (typeof process.env.HIVEMIND_STATE_DIR === "string" && process.env.HIVEMIND_STATE_DIR.trim()) + ? process.env.HIVEMIND_STATE_DIR.trim() + : join(homedir(), ".deeplake", "state", "skillify"); +const SKILLOPT_PENDING_DIR = join(SKILLOPT_STATE_ROOT, "skillopt", "pending"); +const SKILLOPT_JUDGE_WINDOW = 3; // K reactions to keep judging after a skill use (DEFAULT_JUDGE_WINDOW) + +/** Recover an org-skill ref (name--author) from a path that loads a skill's SKILL.md, else null. */ +function skilloptRefFromPath(p: unknown): string | null { + if (typeof p !== "string") return null; + const m = p.match(/\/skills\/([^/]+)\/SKILL\.md$/); + if (!m) return null; + const ref = m[1]; + // org shape only: name--author, no plugin namespace / path separators / traversal. + if (ref.includes(":") || ref.includes("/") || ref.includes("\\") || ref.includes("..")) return null; + const i = ref.lastIndexOf("--"); + return i > 0 && i + 2 < ref.length ? ref : null; +} + +function skilloptPendingFile(sessionId: string): string { + const safe = sessionId.replace(/[^A-Za-z0-9_-]/g, "_").slice(0, 200); + return join(SKILLOPT_PENDING_DIR, `${safe}.json`); +} + +/** tool_result: pi read an org skill's SKILL.md → open a K-message judgment window. */ +function skilloptArm(sessionId: string, toolName: unknown, toolInput: any, toolCallId: unknown): void { + try { + if (process.env.HIVEMIND_SKILLOPT_DISABLED === "1") return; + // Arm only on a READ of the SKILL.md — USING a skill is reading it. An edit/write of a + // SKILL.md (even one whose input carries a matching path) must NOT open a judgment window. + if (!/^read/i.test(String(toolName ?? ""))) return; + const ref = skilloptRefFromPath(toolInput?.path ?? toolInput?.file ?? toolInput?.filePath); + if (!ref) return; + mkdirSync(SKILLOPT_PENDING_DIR, { recursive: true }); + const f = skilloptPendingFile(sessionId); + const tmp = `${f}.${process.pid}.tmp`; + writeFileSync(tmp, JSON.stringify({ skill: ref, budget: SKILLOPT_JUDGE_WINDOW, toolUseId: typeof toolCallId === "string" ? toolCallId : undefined })); + renameSync(tmp, f); + logHm(`skillopt: armed ${ref} for ${sessionId}`); + } catch (e: any) { logHm(`skillopt arm swallowed: ${e?.message ?? e}`); } +} + +/** input: the user's reaction → spawn the detached worker to judge the pending skill; spend budget. */ +function skilloptReact(sessionId: string, reaction: string): void { + try { + if (process.env.HIVEMIND_SKILLOPT_DISABLED === "1" || process.env.HIVEMIND_WIKI_WORKER === "1") return; + if (!reaction.trim()) return; + const f = skilloptPendingFile(sessionId); + let p: { skill?: string; budget?: number; toolUseId?: string }; + try { p = JSON.parse(readFileSync(f, "utf8")); } catch { return; } // no window open → no-op + if (!p?.skill || typeof p.budget !== "number") return; + if (!existsSync(PI_SKILLOPT_WORKER_PATH)) { logHm(`skillopt: worker bundle missing at ${PI_SKILLOPT_WORKER_PATH} — run 'hivemind pi install'`); return; } + // Spend one message of the budget; close the window when exhausted. + try { + if (p.budget - 1 <= 0) { unlinkSync(f); } + else { const tmp = `${f}.${process.pid}.tmp`; writeFileSync(tmp, JSON.stringify({ ...p, budget: p.budget - 1 })); renameSync(tmp, f); } + } catch { /* best-effort */ } + const child = spawn(process.execPath, [PI_SKILLOPT_WORKER_PATH], { + detached: true, + stdio: "ignore", + env: { + ...process.env, + HIVEMIND_SKILLOPT_WORKER: "1", // recursion guard (worker won't re-fire the trigger) + HIVEMIND_SKILLOPT_SESSION: sessionId, + HIVEMIND_SKILLOPT_SKILL: p.skill, + HIVEMIND_SKILLOPT_REACTION: reaction.slice(0, 8000), + HIVEMIND_SKILLOPT_AGENT: "pi", // judge/proposer run on pi (the user's own agent) + ...(p.toolUseId ? { HIVEMIND_SKILLOPT_TOOL_USE_ID: p.toolUseId } : {}), + }, + }); + child.unref(); + logHm(`skillopt: spawned worker for ${p.skill} in ${sessionId} (agent=pi)`); + } catch (e: any) { logHm(`skillopt react swallowed: ${e?.message ?? e}`); } +} + interface SummaryState { lastSummaryAt: number; lastSummaryCount: number; @@ -1255,6 +1343,8 @@ export default function hivemindExtension(pi: ExtensionAPI): void { } catch (e: any) { logHm(`input: writeSessionRow swallowed: ${e?.message ?? e}`); } + // SkillOpt: this prompt is the user's reaction to a recently-used org skill. Swallowed. + skilloptReact(sessionId, text); maybeTriggerPeriodicSummary(creds, sessionId, cwd); }); @@ -1286,6 +1376,9 @@ export default function hivemindExtension(pi: ExtensionAPI): void { } catch (e: any) { logHm(`tool_result: writeSessionRow swallowed: ${e?.message ?? e}`); } + // SkillOpt: pi USES an org skill by reading its SKILL.md — arm the judgment window on + // a successful such read (skip errored reads). Swallowed. + if (event.isError !== true) skilloptArm(sessionId, event.toolName, event.input, event.toolCallId); maybeTriggerPeriodicSummary(creds, sessionId, cwd); }); diff --git a/src/cli/install-pi.ts b/src/cli/install-pi.ts index 1ca4fba3..b344ea7f 100644 --- a/src/cli/install-pi.ts +++ b/src/cli/install-pi.ts @@ -48,6 +48,10 @@ const SKILLIFY_WORKER_PATH = join(WIKI_WORKER_DIR, "skillify-worker.js"); // shared autoPullSkills() codex/cursor/hermes call directly; pi can't // import the TS module so it routes through this child process. const AUTOPULL_WORKER_PATH = join(WIKI_WORKER_DIR, "autopull-worker.js"); +// SkillOpt worker bundle, spawned by the pi extension on a user reaction to judge a +// recently-used org skill and publish an improvement. Same shared module CC ships; pi +// can't import the raw-.ts trigger so it shells this bundle. Sibling of the others. +const SKILLOPT_WORKER_PATH = join(WIKI_WORKER_DIR, "skillopt-worker.js"); const HIVEMIND_BLOCK_START = ""; const HIVEMIND_BLOCK_END = ""; @@ -148,6 +152,14 @@ export function installPi(): void { copyFileSync(srcAutopullWorker, AUTOPULL_WORKER_PATH); } + // 6. SkillOpt-worker bundle (spawned by extension on a user reaction to judge + + // improve a recently-used org skill). Same dir, same cleanup. + const srcSkilloptWorker = join(pkgRoot(), "pi", "bundle", "skillopt-worker.js"); + if (existsSync(srcSkilloptWorker)) { + ensureDir(WIKI_WORKER_DIR); + copyFileSync(srcSkilloptWorker, SKILLOPT_WORKER_PATH); + } + ensureDir(VERSION_DIR); writeVersionStamp(VERSION_DIR, getVersion()); @@ -162,6 +174,9 @@ export function installPi(): void { if (existsSync(AUTOPULL_WORKER_PATH)) { log(` pi autopull-worker installed -> ${AUTOPULL_WORKER_PATH}`); } + if (existsSync(SKILLOPT_WORKER_PATH)) { + log(` pi skillopt-worker installed -> ${SKILLOPT_WORKER_PATH}`); + } } export function uninstallPi(): void { diff --git a/src/hooks/codex/capture.ts b/src/hooks/codex/capture.ts index c095ecc4..cf43c645 100644 --- a/src/hooks/codex/capture.ts +++ b/src/hooks/codex/capture.ts @@ -34,6 +34,7 @@ import { } from "../summary-state.js"; import { bundleDirFromImportMeta, spawnCodexWikiWorker, wikiLog } from "./spawn-wiki-worker.js"; import { getInstalledVersion } from "../../utils/version-check.js"; +import { reactSkillOpt } from "../shared/skillopt-hook.js"; const log = (msg: string) => _log("codex-capture", msg); function resolveEmbedDaemonPath(): string { @@ -151,6 +152,10 @@ async function main(): Promise { log("capture ok"); + // SkillOpt: a UserPromptSubmit prompt is the user's reaction to a recently-used org skill. + // Swallowed; no-op unless a judgment window is open for this session. + reactSkillOpt(input.session_id, input.prompt, "codex"); + maybeTriggerPeriodicSummary(input.session_id, input.cwd ?? "", config); } diff --git a/src/hooks/codex/pre-tool-use.ts b/src/hooks/codex/pre-tool-use.ts index 9c1a9095..cbab5ab1 100644 --- a/src/hooks/codex/pre-tool-use.ts +++ b/src/hooks/codex/pre-tool-use.ts @@ -34,6 +34,7 @@ import { import { log as _log } from "../../utils/debug.js"; import { isDirectRun } from "../../utils/direct-run.js"; import { isSafe, touchesMemory, rewritePaths } from "../memory-path-utils.js"; +import { armSkillOptOnSkillUse } from "../shared/skillopt-hook.js"; export { isSafe, touchesMemory, rewritePaths }; @@ -368,6 +369,12 @@ export async function processCodexPreToolUse( /* c8 ignore start */ async function main(): Promise { const input = await readStdin(); + // SkillOpt: codex USES an org skill by shelling a read of its SKILL.md — arm the judgment + // window on that command. Guarded at the call site too (armSkillOptOnSkillUse is already + // internally swallowed): a throw here must NOT short-circuit the memory-path gate below, whose + // top-level catch exits 0 (fail-open). Fail-closed for the SkillOpt side-effect. + try { armSkillOptOnSkillUse(input.session_id, input.tool_name, input.tool_input, input.tool_use_id); } + catch { /* never let the SkillOpt arm affect the tool decision */ } const decision = await processCodexPreToolUse(input); if (decision.action === "pass") return; diff --git a/src/hooks/hermes/capture.ts b/src/hooks/hermes/capture.ts index 9d4c39da..4a134eec 100644 --- a/src/hooks/hermes/capture.ts +++ b/src/hooks/hermes/capture.ts @@ -38,6 +38,7 @@ import { bundleDirFromImportMeta, spawnHermesWikiWorker, wikiLog } from "./spawn import { tryStopCounterTrigger } from "../../skillify/triggers.js"; import type { Config } from "../../config.js"; import { getInstalledVersion } from "../../utils/version-check.js"; +import { reactSkillOpt } from "../shared/skillopt-hook.js"; const log = (msg: string) => _log("hermes-capture", msg); function resolveEmbedDaemonPath(): string { @@ -96,12 +97,14 @@ async function main(): Promise { }; let entry: Record | null = null; + let reactPrompt: string | undefined; // the user's prompt = the SkillOpt reaction (fired after capture) if (event === "pre_llm_call") { const prompt = pickString(extra.prompt, extra.user_message, (extra.message as Record | undefined)?.content); if (!prompt) { log(`pre_llm_call: no prompt found in extra`); return; } log(`user session=${sessionId}`); entry = { id: crypto.randomUUID(), ...meta, type: "user_message", content: prompt }; + reactPrompt = prompt; } else if (event === "post_tool_call" && typeof input.tool_name === "string") { const toolResponse = extra.tool_result ?? extra.tool_output ?? extra.result ?? extra.output; log(`tool=${input.tool_name} session=${sessionId}`); @@ -158,6 +161,10 @@ async function main(): Promise { log("capture ok → cloud"); + // SkillOpt: a pre_llm_call prompt is the user's reaction to a recently-used org skill. + // Swallowed; no-op unless a judgment window is open for this session. + reactSkillOpt(sessionId, reactPrompt, "hermes"); + maybeTriggerPeriodicSummary(sessionId, cwd, config); // Skillify Stop counter — post_llm_call is the assistant-complete event. diff --git a/src/hooks/hermes/pre-tool-use.ts b/src/hooks/hermes/pre-tool-use.ts index 87239764..2deb8975 100644 --- a/src/hooks/hermes/pre-tool-use.ts +++ b/src/hooks/hermes/pre-tool-use.ts @@ -27,6 +27,7 @@ import { log as _log } from "../../utils/debug.js"; import { parseBashGrep, handleGrepDirect } from "../grep-direct.js"; import { touchesMemory, rewritePaths } from "../memory-path-utils.js"; import { tryGraphRead } from "../../graph/graph-command.js"; +import { armSkillOptOnSkillUse } from "../shared/skillopt-hook.js"; const log = (msg: string) => _log("hermes-pre-tool-use", msg); interface HermesPreToolUseInput { @@ -40,6 +41,9 @@ interface HermesPreToolUseInput { async function main(): Promise { const input = await readStdin(); + // SkillOpt: hermes USES an org skill by shelling a read of its SKILL.md (the path is in the + // terminal command). Arm the judgment window on it. Swallowed; never affects the decision below. + armSkillOptOnSkillUse(input.session_id ?? "", input.tool_name ?? "", input.tool_input); // Hermes' shell-hook tool name for terminal commands is "terminal". if (input.tool_name !== "terminal") return; diff --git a/src/hooks/shared/skillopt-hook.ts b/src/hooks/shared/skillopt-hook.ts index 5134228c..4ef46ee3 100644 --- a/src/hooks/shared/skillopt-hook.ts +++ b/src/hooks/shared/skillopt-hook.ts @@ -5,17 +5,39 @@ * whether a tool runs or a prompt is captured. */ import { markSkillPending, runEventTrigger } from "../../skillify/skillopt-trigger.js"; +import { pathToSkillRef } from "../../skillify/skill-invocations.js"; import { SKILLOPT_ENV } from "../../skillify/skillopt-env.js"; /** - * PreToolUse: if the agent invoked a `Skill` tool on an ORG skill (`name--author`), - * open its K-message judgment window. Non-org skills (bare / plugin) are ignored. + * Recover an org-skill ref from a tool call that LOADS a skill's SKILL.md — how agents without + * a first-class `Skill` tool use skills: pi `read`s `.../skills//SKILL.md` (structured + * `path`), codex/hermes SHELL a read of it (path inside `command`). The `` segment is the + * ref. Returns null when it isn't a SKILL.md load. markSkillPending still gates the ref + * (org-shape + manifest), so a bare/non-org dir is rejected there. + */ +export function skillRefFromSkillFileRead(toolName: string, toolInput: unknown): string | null { + // read tool with a structured path (pi) + if (/^read$/i.test(toolName)) return pathToSkillRef((toolInput as { path?: unknown })?.path); + // shell tool with the path inside the command (codex Bash, hermes terminal) + return pathToSkillRef((toolInput as { command?: unknown })?.command); +} + +/** + * PreToolUse: open a skill's K-message judgment window when the agent USES an org skill — + * either via a first-class `Skill` tool (claude) or by reading its SKILL.md file (pi/codex). + * Org-skill gating (shape + pull manifest) happens in markSkillPending. */ export function armSkillOptOnSkillUse(sessionId: string, toolName: string, toolInput: unknown, toolUseId?: string): void { try { - if (toolName !== "Skill" || process.env[SKILLOPT_ENV.DISABLED] === "1") return; - const ref = (toolInput as { skill?: unknown })?.skill; - if (typeof ref === "string") markSkillPending(sessionId, ref, toolUseId); + if (process.env[SKILLOPT_ENV.DISABLED] === "1") return; + let ref: string | null = null; + if (toolName === "Skill") { + const s = (toolInput as { skill?: unknown })?.skill; + ref = typeof s === "string" ? s : null; + } else { + ref = skillRefFromSkillFileRead(toolName, toolInput); // pi/codex: read of …/skills//SKILL.md + } + if (ref) markSkillPending(sessionId, ref, toolUseId); } catch { /* never break PreToolUse */ } } diff --git a/src/skillify/skill-invocations.ts b/src/skillify/skill-invocations.ts index 794af39c..e1430951 100644 --- a/src/skillify/skill-invocations.ts +++ b/src/skillify/skill-invocations.ts @@ -44,13 +44,36 @@ export function parseMessage(m: unknown): ParsedMsg | null { return null; } -/** The skill ref invoked by a tool_call message (e.g. "name--author"), else null. */ +/** Match a path that loads a skill's SKILL.md anywhere in `s` → the `` ref (name--author), + * else null. Works on a bare path (pi `read` tool_input.path) or inside a shell command string + * (codex/hermes `cat …/SKILL.md`). The dir class excludes whitespace/quotes so a command's + * trailing args don't get swallowed into the ref. */ +export function pathToSkillRef(s: unknown): string | null { + if (typeof s !== "string") return null; + // `/skills/` then any intermediate dirs (codex nests org skills under `.system/`, + // e.g. …/skills/.system//SKILL.md), capturing the dir right before + // SKILL.md. markSkillPending still gates org-shape (name--author), so `.system` / + // bare system-skill dirs are rejected there. + const m = s.match(/\/skills\/(?:[^/\s"'`]+\/)*([^/\s"'`]+)\/SKILL\.md/); + return m ? m[1] : null; +} + +/** + * The skill ref invoked by a tool_call message (e.g. "name--author"), else null. Recognises: + * - claude's first-class `Skill` tool (tool_input.skill) + * - pi/codex/hermes loading a skill by reading its SKILL.md — a `read` tool_input.path, or a + * shell tool_input.command that cats it (the worker windows around whichever it finds). + */ export function invokedSkillRef(msg: ParsedMsg): string | null { - if (msg.type !== "tool_call" || msg.tool_name !== "Skill") return null; + if (msg.type !== "tool_call") return null; let input: unknown = msg.tool_input; - if (typeof input === "string") { try { input = JSON.parse(input); } catch { return null; } } - const skill = (input as { skill?: unknown })?.skill; - return typeof skill === "string" && skill.length > 0 ? skill : null; + if (typeof input === "string") { try { input = JSON.parse(input); } catch { input = msg.tool_input; } } + if (msg.tool_name === "Skill") { + const skill = (input as { skill?: unknown })?.skill; + return typeof skill === "string" && skill.length > 0 ? skill : null; + } + const io = input as { path?: unknown; command?: unknown } | undefined; + return pathToSkillRef(io?.path) ?? pathToSkillRef(io?.command); } /** Split "--" → parts. null for plugin-namespaced / bare / malformed refs. */ @@ -66,16 +89,18 @@ export function splitOrgSkill(skill: string): { name: string; author: string } | } /** - * Org-skill invocations across captured sessions, newest first. Coarse prefilter - * on `"Skill"` (robust to JSONB colon-spacing) then a precise in-code check, so a - * stray "Skill" in prose can't slip through as a real invocation. + * Org-skill invocations across captured sessions, newest first. Coarse prefilter then a precise + * in-code check (invokedSkillRef), so a stray match in prose can't slip through. The prefilter + * matches EITHER a first-class `Skill` tool_call OR a `SKILL.md` load (the read/shell path that + * pi/codex/hermes use) — otherwise those newly-supported invocations get dropped before + * invokedSkillRef can evaluate them. */ export async function listSkillInvocations( query: QueryFn, sessionsTable: string, opts: { sinceIso?: string; untilIso?: string; limit?: number } = {}, ): Promise { - const where = [`CAST(message AS TEXT) LIKE '%"Skill"%'`]; + const where = [`(CAST(message AS TEXT) LIKE '%"Skill"%' OR CAST(message AS TEXT) LIKE '%/SKILL.md%')`]; if (opts.sinceIso) where.push(`last_update_date >= '${sqlStr(opts.sinceIso)}'`); if (opts.untilIso) where.push(`last_update_date < '${sqlStr(opts.untilIso)}'`); const limit = opts.limit && opts.limit > 0 ? ` LIMIT ${Math.floor(opts.limit)}` : ""; diff --git a/src/skillify/skillopt-worker.ts b/src/skillify/skillopt-worker.ts index a5a0b552..7f84f515 100644 --- a/src/skillify/skillopt-worker.ts +++ b/src/skillify/skillopt-worker.ts @@ -11,7 +11,7 @@ * Inputs come via env: HIVEMIND_SKILLOPT_{SESSION,SKILL,REACTION,AGENT}. */ import path from "node:path"; -import { execSync } from "node:child_process"; +import { accessSync, constants as fsConstants } from "node:fs"; import { log as _log } from "../utils/debug.js"; import { loadConfig } from "../config.js"; import { DeeplakeApi } from "../deeplake-api.js"; @@ -25,18 +25,26 @@ import { SKILLOPT_ENV } from "./skillopt-env.js"; const log = (m: string) => _log("skillopt-worker", m); /** - * Resolve an agent's CLI via `command -v` — finds nvm/volta/fnm installs that - * gate-runner's static-path findAgentBin misses (it deliberately avoids PATH for - * the openclaw bundle). undefined → agentModel falls back to findAgentBin. Mirrors - * the wiki spawn helpers' findXBin pattern (this worker is a standalone detached - * process, not the openclaw-scanned skillify-worker, so `command -v` is fine here). + * Resolve a known agent's CLI by walking $PATH — finds nvm/volta/fnm installs that + * gate-runner's static-path findAgentBin misses (it deliberately avoids PATH for the + * openclaw bundle). undefined → agentModel falls back to findAgentBin. Done in Node + * (no shell / subprocess) so an env-derived agent name can't reach a command line. */ const AGENT_CMD: Record = { claude_code: "claude", codex: "codex", cursor: "cursor-agent", hermes: "hermes", pi: "pi" }; function resolveAgentBin(agent: string): string | undefined { - try { - const p = execSync(`command -v ${AGENT_CMD[agent] ?? agent}`, { encoding: "utf8", stdio: ["ignore", "pipe", "ignore"] }).trim(); - return p || undefined; - } catch { return undefined; } + // Only resolve KNOWN agents — no `?? agent` fallback. `agent` traces back to the + // HIVEMIND_SKILLOPT_AGENT env var; feeding an arbitrary value to a command is a + // command-injection sink (CodeQL: indirect uncontrolled command line). Resolve the + // whitelisted binary by walking PATH ourselves — no shell, no subprocess — which still + // finds nvm/volta/fnm installs (they're on PATH), the reason the old `command -v` existed. + const cmd = AGENT_CMD[agent]; + if (!cmd) return undefined; + for (const dir of (process.env.PATH ?? "").split(path.delimiter)) { + if (!dir) continue; + const full = path.join(dir, cmd); + try { accessSync(full, fsConstants.X_OK); return full; } catch { /* not here or not executable */ } + } + return undefined; } async function main(): Promise { diff --git a/tests/pi/pi-extension-source.test.ts b/tests/pi/pi-extension-source.test.ts index 3fadcb60..9994f87b 100644 --- a/tests/pi/pi-extension-source.test.ts +++ b/tests/pi/pi-extension-source.test.ts @@ -192,3 +192,39 @@ describe("pi extension — embedding wiring", () => { expect(PI_SRC).toMatch(/return\s+"NULL"/); }); }); + +describe("pi extension — SkillOpt wiring", () => { + it("arms on a SKILL.md READ in the tool_result handler (non-error reads only, read tools only)", () => { + // pi has no first-class Skill tool — it USES a skill by reading its SKILL.md, so arming hangs + // off tool_result, gated on a successful read. The toolName is passed so arming is restricted + // to read tools (an edit/write of a SKILL.md must not arm). + expect(PI_SRC).toContain("skilloptArm(sessionId, event.toolName, event.input, event.toolCallId)"); + expect(PI_SRC).toContain("event.isError !== true"); + expect(PI_SRC).toMatch(/\/\^read\/i\.test/); // arm restricted to read tools + expect(PI_SRC).toContain("/skills/"); // the ref-from-path matcher targets …/skills//SKILL.md + }); + + it("reacts on the user prompt in the input handler", () => { + expect(PI_SRC).toContain("skilloptReact(sessionId, text)"); + }); + + it("spawns the bundled skillopt-worker with the cross-process env contract", () => { + expect(PI_SRC).toContain('"skillopt-worker.js"'); + // these env-var names MUST match SKILLOPT_ENV — the worker reads the literals back + for (const v of [ + "HIVEMIND_SKILLOPT_WORKER", "HIVEMIND_SKILLOPT_SESSION", "HIVEMIND_SKILLOPT_SKILL", + "HIVEMIND_SKILLOPT_REACTION", "HIVEMIND_SKILLOPT_AGENT", + ]) expect(PI_SRC).toContain(v); + expect(PI_SRC).toContain('HIVEMIND_SKILLOPT_AGENT: "pi"'); // judge/proposer run on pi + }); + + it("honors the kill switch + the in-worker recursion guard", () => { + expect(PI_SRC).toContain('process.env.HIVEMIND_SKILLOPT_DISABLED === "1"'); + expect(PI_SRC).toContain('process.env.HIVEMIND_WIKI_WORKER === "1"'); + }); + + it("only arms org-shaped refs (name--author), not bare/plugin skills", () => { + expect(PI_SRC).toContain('ref.includes(":")'); // reject plugin-namespaced + expect(PI_SRC).toContain('ref.lastIndexOf("--")'); // require name--author + }); +}); diff --git a/tests/shared/agent-model.test.ts b/tests/shared/agent-model.test.ts index 95715c86..79edb012 100644 --- a/tests/shared/agent-model.test.ts +++ b/tests/shared/agent-model.test.ts @@ -92,6 +92,13 @@ describe("agentModel — per-agent no-tools dispatch", () => { expect(argVal(calls[0].args, "-m")).toBe("us.anthropic.claude-haiku-4-5-20251001-v1:0"); }); + it("codex applies an explicit model override (-m) when one is configured", async () => { + const { spawnImpl, calls } = fakeSpawn("raw"); + const env = { HIVEMIND_SKILLOPT_CODEX_JUDGE_MODEL: "o3" } as unknown as NodeJS.ProcessEnv; + await agentModel({ agent: "codex", role: "judge", bin: "/x/codex", spawnImpl, env })("S", "U"); + expect(argVal(calls[0].args, "-m")).toBe("o3"); // the `model ? ["-m", model] : []` present-branch + }); + it("rejects a hermes/pi provider override with NO model (the default id wouldn't match)", async () => { const { spawnImpl } = fakeSpawn("x"); const env = { HIVEMIND_SKILLOPT_HERMES_PROVIDER: "bedrock" } as unknown as NodeJS.ProcessEnv; diff --git a/tests/shared/skill-invocations.test.ts b/tests/shared/skill-invocations.test.ts index 8e8e1bfd..1ba3fb06 100644 --- a/tests/shared/skill-invocations.test.ts +++ b/tests/shared/skill-invocations.test.ts @@ -4,6 +4,7 @@ import { splitOrgSkill, listSkillInvocations, windowAroundInvocation, + parseMessage, type SkillInvocation, } from "../../src/skillify/skill-invocations.js"; @@ -17,6 +18,17 @@ const toolCall = (skill: string, sessionId = "S1", ts = "t", asString = false) = return { message: asString ? JSON.stringify(msg) : msg, last_update_date: ts }; }; +describe("parseMessage", () => { + it("returns null for non-string/non-object inputs and bad JSON; parses strings + passes objects", () => { + expect(parseMessage(123)).toBeNull(); // number → null (the typeof-object miss) + expect(parseMessage(true)).toBeNull(); // boolean → null + expect(parseMessage(null)).toBeNull(); // null → null + expect(parseMessage("not json")).toBeNull(); // unparseable string → null + expect(parseMessage('{"a":1}')).toEqual({ a: 1 }); // JSON string → object + expect(parseMessage({ a: 1 })).toEqual({ a: 1 }); // object → passthrough + }); +}); + describe("invokedSkillRef", () => { it("returns the skill ref for a Skill tool_call (object or stringified input)", () => { expect(invokedSkillRef({ type: "tool_call", tool_name: "Skill", tool_input: JSON.stringify({ skill: "a--b" }) })).toBe("a--b"); @@ -27,6 +39,15 @@ describe("invokedSkillRef", () => { expect(invokedSkillRef({ type: "assistant_message", content: "use the Skill tool" })).toBeNull(); expect(invokedSkillRef({ type: "tool_call", tool_name: "Skill", tool_input: "not json" })).toBeNull(); }); + it("recognizes a SKILL.md load via a read path or a shell command (pi/codex/hermes)", () => { + expect(invokedSkillRef({ type: "tool_call", tool_name: "read", tool_input: { path: "/x/skills/a--b/SKILL.md" } as unknown })).toBe("a--b"); + expect(invokedSkillRef({ type: "tool_call", tool_name: "Bash", tool_input: JSON.stringify({ command: "sed -n '1,5p' /x/.agents/skills/a--b/SKILL.md" }) })).toBe("a--b"); + expect(invokedSkillRef({ type: "tool_call", tool_name: "Bash", tool_input: JSON.stringify({ command: "ls /tmp" }) })).toBeNull(); + }); + it("handles codex's nested .system path (intermediate dirs before the ref)", () => { + // codex reads org skills as `sed -n '1,220p' ~/.codex/skills/.system//SKILL.md` + expect(invokedSkillRef({ type: "tool_call", tool_name: "Bash", tool_input: JSON.stringify({ command: "sed -n '1,220p' /home/e/.codex/skills/.system/posthog--kamo/SKILL.md" }) })).toBe("posthog--kamo"); + }); }); describe("splitOrgSkill", () => { @@ -58,6 +79,9 @@ describe("listSkillInvocations", () => { ]); const got = await listSkillInvocations(fn, TABLE, { sinceIso: "2026-06-01", limit: 100 }); expect(calls[0]).toContain(`CAST(message AS TEXT) LIKE '%"Skill"%'`); + // prefilter must ALSO match SKILL.md loads (pi/codex/hermes read/shell invocations), else they + // get dropped before invokedSkillRef can evaluate them. + expect(calls[0]).toContain(`CAST(message AS TEXT) LIKE '%/SKILL.md%'`); expect(calls[0]).toContain("last_update_date >= '2026-06-01'"); expect(calls[0]).toContain("LIMIT 100"); expect(got).toEqual([ @@ -96,6 +120,20 @@ describe("windowAroundInvocation", () => { expect(out).toBe("USER: hi\n\nASSISTANT: bye"); // whole (short) transcript }); + it("drops rows from a different session_id (path-LIKE collision) and empty-content turns", async () => { + const { fn } = mockQuery([ + { message: { type: "user_message", content: "real", session_id: "S1" } }, + { message: { type: "user_message", content: "from another session", session_id: "OTHER" } }, // collision → dropped + { message: { type: "assistant_message", content: "", session_id: "S1" } }, // empty text → skipped + { message: { type: "tool_call", tool_name: "Skill", tool_input: JSON.stringify({ skill: "posthog-smoke--kamo" }), timestamp: "t5", session_id: "S1" } }, + { message: { type: "user_message", content: "pushback", session_id: "S1" } }, + ]); + const out = await windowAroundInvocation(fn, TABLE, inv, { before: 5, after: 5 }); + expect(out).not.toContain("another session"); // OTHER session_id dropped + expect(out).toContain("real"); + expect(out).toContain("pushback"); + }); + it("elides a window longer than maxChars", async () => { const big = "x".repeat(400); const { fn } = mockQuery([ diff --git a/tests/shared/skill-org-publish.test.ts b/tests/shared/skill-org-publish.test.ts index 5b349f4f..a8845a3c 100644 --- a/tests/shared/skill-org-publish.test.ts +++ b/tests/shared/skill-org-publish.test.ts @@ -37,6 +37,25 @@ describe("readCurrentSkillRow", () => { ); expect(row).toMatchObject({ contributors: [], sourceSessions: [], version: 2, install: "project", scope: "me" }); }); + + it("parses the column fallbacks: install/scope/version ternaries + array vs JSON-string vs non-JSON vs object", async () => { + // contributors as a JSON-array STRING → parsed; source_sessions as a real array → mapped; + // install != global → project; scope != team → me; version 0 → 1. + const r1 = await readCurrentSkillRow( + async () => [{ name: "x", author: "a", install: "local", scope: "weird", body: "b", + contributors: JSON.stringify(["u1", "u2"]), source_sessions: ["s1", "s2"], version: 0 }], + "skills", "x", "a", + ); + expect(r1).toMatchObject({ install: "project", scope: "me", contributors: ["u1", "u2"], sourceSessions: ["s1", "s2"], version: 1 }); + + // contributors as a non-JSON string → []; a JSON OBJECT (not array) → []; install=global; scope=team; version 5 kept. + const r2 = await readCurrentSkillRow( + async () => [{ name: "x", author: "a", install: "global", scope: "team", body: "b", + contributors: "not json at all", source_sessions: JSON.stringify({ not: "array" }), version: 5 }], + "skills", "x", "a", + ); + expect(r2).toMatchObject({ install: "global", scope: "team", contributors: [], sourceSessions: [], version: 5 }); + }); }); describe("publishImprovedSkill", () => { diff --git a/tests/shared/skillopt-hook.test.ts b/tests/shared/skillopt-hook.test.ts index 52df482c..92b34ac7 100644 --- a/tests/shared/skillopt-hook.test.ts +++ b/tests/shared/skillopt-hook.test.ts @@ -6,7 +6,7 @@ vi.mock("../../src/skillify/skillopt-trigger.js", () => ({ })); import { markSkillPending, runEventTrigger } from "../../src/skillify/skillopt-trigger.js"; -import { armSkillOptOnSkillUse, reactSkillOpt } from "../../src/hooks/shared/skillopt-hook.js"; +import { armSkillOptOnSkillUse, reactSkillOpt, skillRefFromSkillFileRead } from "../../src/hooks/shared/skillopt-hook.js"; beforeEach(() => { vi.clearAllMocks(); delete process.env.HIVEMIND_SKILLOPT_DISABLED; delete process.env.HIVEMIND_WIKI_WORKER; }); @@ -26,6 +26,34 @@ describe("armSkillOptOnSkillUse", () => { armSkillOptOnSkillUse("s1", "Skill", {}); expect(markSkillPending).not.toHaveBeenCalled(); }); + + it("arms on a SKILL.md read (pi style), recovering the ref from the path", () => { + armSkillOptOnSkillUse("s1", "read", { path: "/home/u/.pi/agent/skills/posthog--kamo/SKILL.md" }, "tu2"); + expect(markSkillPending).toHaveBeenCalledWith("s1", "posthog--kamo", "tu2"); + }); + + it("arms on a SHELL command that reads SKILL.md (codex/hermes style — path in the command)", () => { + armSkillOptOnSkillUse("s1", "Bash", { command: 'cat "/home/u/.agents/skills/posthog--kamo/SKILL.md"' }, "tu3"); + expect(markSkillPending).toHaveBeenCalledWith("s1", "posthog--kamo", "tu3"); + }); + + it("does not arm on a non-SKILL.md read, nor on an EDIT of a SKILL.md (use, not edit)", () => { + armSkillOptOnSkillUse("s1", "read", { path: "/home/u/notes.md" }); + armSkillOptOnSkillUse("s1", "Edit", { path: "/home/u/.pi/agent/skills/x--a/SKILL.md" }); + expect(markSkillPending).not.toHaveBeenCalled(); + }); +}); + +describe("skillRefFromSkillFileRead", () => { + it("extracts the dir segment as the ref from a skills SKILL.md read", () => { + expect(skillRefFromSkillFileRead("read", { path: "/x/.pi/agent/skills/posthog--kamo/SKILL.md" })).toBe("posthog--kamo"); + expect(skillRefFromSkillFileRead("Read", { path: "/a/skills/bare/SKILL.md" })).toBe("bare"); // returned; markSkillPending rejects bare + }); + it("returns null for non-read tools, non-SKILL.md paths, or missing path", () => { + expect(skillRefFromSkillFileRead("Edit", { path: "/x/skills/y--z/SKILL.md" })).toBeNull(); + expect(skillRefFromSkillFileRead("read", { path: "/x/notes.md" })).toBeNull(); + expect(skillRefFromSkillFileRead("read", {})).toBeNull(); + }); }); describe("reactSkillOpt", () => { diff --git a/tests/shared/skillopt-improve.test.ts b/tests/shared/skillopt-improve.test.ts index 1107ce44..73082a14 100644 --- a/tests/shared/skillopt-improve.test.ts +++ b/tests/shared/skillopt-improve.test.ts @@ -57,6 +57,13 @@ describe("findInvocation", () => { expect((await findInvocation(two, "sessions", "s", "x", "a", "nope"))?.ts).toBe("t2"); // unknown id → latest fallback expect((await findInvocation(two, "sessions", "s", "x", "a"))?.ts).toBe("t2"); // no pin → latest }); + + it("skips rows whose recorded session_id differs from the queried one (path-LIKE collision guard)", async () => { + const q: QueryFn = async () => [ + { message: { type: "tool_call", tool_name: "Skill", tool_input: JSON.stringify({ skill: "x--a" }), session_id: "OTHER", timestamp: "t1" } }, + ] as Array>; + expect(await findInvocation(q, "sessions", "s1", "x", "a")).toBeNull(); // row.session_id !== s1 → skipped + }); }); describe("improveSkillIfFailed", () => { @@ -78,6 +85,27 @@ describe("improveSkillIfFailed", () => { expect(inserts).toHaveLength(0); }); + it("an empty reaction still judges + improves (no reaction appended to the window)", async () => { + const { query, inserts } = makeQuery(); + const r = await improveSkillIfFailed(base(query, { reaction: " " })); // whitespace → the append branch is skipped + expect(r).toMatchObject({ judged: true, failed: true, improved: true }); + expect(inserts).toHaveLength(1); + }); + + it("failed + proposer makes no change → judged, not improved", async () => { + const { query, inserts } = makeQuery(); + const r = await improveSkillIfFailed(base(query, { proposerModel: async () => "[]" })); // no edits + expect(r).toMatchObject({ judged: true, failed: true, improved: false, reason: "proposer made no change" }); + expect(inserts).toHaveLength(0); + }); + + it("failed + edit already proposed (dedup) → judged, not improved", async () => { + const { query, inserts } = makeQuery(); + const r = await improveSkillIfFailed(base(query, { alreadyProposed: () => true })); + expect(r).toMatchObject({ judged: true, failed: true, improved: false, reason: "edit already proposed (dedup)" }); + expect(inserts).toHaveLength(0); + }); + it("not an org skill (bare/plugin) → not judged", async () => { const { query } = makeQuery(); expect(await improveSkillIfFailed(base(query, { skillRef: "bare" }))).toMatchObject({ judged: false }); diff --git a/tests/shared/skillopt-meta.test.ts b/tests/shared/skillopt-meta.test.ts index f92d8516..88118784 100644 --- a/tests/shared/skillopt-meta.test.ts +++ b/tests/shared/skillopt-meta.test.ts @@ -31,6 +31,14 @@ describe("alreadyProposed / priorEditSummaries", () => { expect(prior.join(" ")).toContain("append"); expect(priorEditSummaries(meta, "posthog", "kamo").join(" ")).not.toContain('append: z'); // other skill's edit }); + + it("summarizes a delete edit (target, NO content) and a targetless append (content, NO target)", () => { + const m = [metaEntryFor("p", "k", [{ op: "delete", target: "old rule" }, { op: "append", content: "x" }], "t")]; + const prior = priorEditSummaries(m, "p", "k"); + // delete → target anchor + no content preview; append → content preview + no anchor (both ternary halves) + expect(prior.join(" ")).toMatch(/delete @"old rule"/); + expect(prior.join(" ")).toMatch(/append: x/); + }); }); describe("loadMeta / appendMeta", () => { diff --git a/tests/shared/skillopt-trigger.test.ts b/tests/shared/skillopt-trigger.test.ts index 498405bc..9391f7fd 100644 --- a/tests/shared/skillopt-trigger.test.ts +++ b/tests/shared/skillopt-trigger.test.ts @@ -1,6 +1,13 @@ -import { describe, it, expect, vi } from "vitest"; +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; +import { join } from "node:path"; +import { mkdtempSync, existsSync } from "node:fs"; +import { tmpdir } from "node:os"; import { markSkillPending, runEventTrigger, judgeWindow, DEFAULT_JUDGE_WINDOW, type PendingSkill, type PendingStore } from "../../src/skillify/skillopt-trigger.js"; +// Mock node:child_process so the REAL spawnWorker can run without a detached process. +const { spawnMock } = vi.hoisted(() => ({ spawnMock: vi.fn(() => ({ unref: () => {}, pid: 4242 })) })); +vi.mock("node:child_process", () => ({ spawn: spawnMock })); + /** In-memory per-session store standing in for the per-session files. */ function memStore(initial: Record = {}) { const m = new Map(Object.entries(initial)); @@ -106,3 +113,61 @@ describe("judgeWindow", () => { expect(judgeWindow({ HIVEMIND_SKILLOPT_JUDGE_WINDOW: "x" } as never)).toBe(3); }); }); + +// Exercise the REAL default fileStore (per-session files) + the REAL spawnWorker, which the +// dependency-injected tests above never touch — these are the trigger's actual side effects. +describe("default fileStore + spawnWorker (real implementations)", () => { + let tmp: string; + beforeEach(() => { tmp = mkdtempSync(join(tmpdir(), "skillopt-trig-")); process.env.HIVEMIND_STATE_DIR = tmp; spawnMock.mockClear(); }); + afterEach(() => { delete process.env.HIVEMIND_STATE_DIR; }); + + it("persists the pending window to a per-session file and reads it back across mark→react", () => { + // arm via the REAL fileStore (no injected store), org-gate injected + expect(markSkillPending("fs-a", "posthog--kamo", "tuF", { isOrgSkill: () => true, env: {} as never })).toBe(true); + expect(existsSync(join(tmp, "skillopt", "pending", "fs-a.json"))).toBe(true); + // react via the REAL fileStore (load → decrement → save) + REAL spawnWorker (mocked spawn) + const r = runEventTrigger("fs-a", "no, mocking hides the failure", { agent: "hermes", deps: { canFire: () => true, env: {} as never } }); + expect(r).toEqual({ fired: true, reason: "spawned" }); + expect(spawnMock).toHaveBeenCalledTimes(1); + }); + + it("spawnWorker passes the SKILLOPT_* env contract the worker reads back", () => { + markSkillPending("fs-b", "x--a", "tuZ", { isOrgSkill: () => true, env: {} as never }); + runEventTrigger("fs-b", "you broke it", { agent: "codex", deps: { canFire: () => true, env: {} as never } }); + const [bin, args, opts] = spawnMock.mock.calls[0] as unknown as [string, string[], { env: Record }]; + expect(args[0]).toContain("skillopt-worker.js"); + expect(opts.env.HIVEMIND_SKILLOPT_WORKER).toBe("1"); + expect(opts.env.HIVEMIND_SKILLOPT_SESSION).toBe("fs-b"); + expect(opts.env.HIVEMIND_SKILLOPT_SKILL).toBe("x--a"); + expect(opts.env.HIVEMIND_SKILLOPT_REACTION).toBe("you broke it"); + expect(opts.env.HIVEMIND_SKILLOPT_TOOL_USE_ID).toBe("tuZ"); + expect(opts.env.HIVEMIND_SKILLOPT_AGENT).toBe("codex"); + }); + + it("react closes the window (deletes the file) when the last budget is spent", () => { + markSkillPending("fs-c", "y--b", undefined, { isOrgSkill: () => true, env: { HIVEMIND_SKILLOPT_JUDGE_WINDOW: "1" } as never }); + runEventTrigger("fs-c", "still wrong", { deps: { canFire: () => true, env: {} as never } }); + expect(existsSync(join(tmp, "skillopt", "pending", "fs-c.json"))).toBe(false); // budget 1 → cleared + }); + + it("react is a no-op when no file exists for the session", () => { + expect(runEventTrigger("never-armed", "x", { deps: { canFire: () => true, env: {} as never } })) + .toEqual({ fired: false, reason: "no-skill" }); + expect(spawnMock).not.toHaveBeenCalled(); + }); + + it("real defaultIsOrgSkill (pull manifest) rejects a ref absent from the manifest", () => { + // no injected isOrgSkill → the real defaultIsOrgSkill(loadManifest) runs against the empty + // temp state dir → the ref isn't a pulled org skill → not armed (no shared-row edit risk). + const s = memStore(); + expect(markSkillPending("real-org", "x--a", "tu", { store: s.store, env: {} as never })).toBe(false); + expect(s.get("real-org")).toBeNull(); + }); + + it("real defaultHasCreds runs when canFire isn't injected (spawns iff creds resolve)", () => { + // no injected canFire → the real defaultHasCreds(loadConfig) runs. Either outcome exercises it. + const s = memStore({ s1: { skill: "x--a", budget: 2 } }); + const r = runEventTrigger("s1", "reaction", { deps: { store: s.store, env: {} as never } }); + expect(["spawned", "no-creds"]).toContain(r.reason); + }); +});