Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 44 additions & 15 deletions src/skillify/skill-proposer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,36 +26,65 @@ export interface ProposeConfig {
}

const SYSTEM =
"You improve an engineering SKILL document that has been producing repeated, " +
"confirmed failures. Diagnose the SINGLE recurring weakness behind the failures " +
"and propose a SMALL set of structured edits that fix it. Do NOT rewrite the " +
`whole doc, and do NOT touch anything between ${SU_START} and ${SU_END}. Reply ` +
'with ONLY a JSON array of edits, each: {"op":"append|insert_after|replace|' +
'delete","target":"<exact existing text to anchor on; required for ' +
'insert_after/replace/delete>","content":"<new text; required for ' +
'append/insert_after/replace>"}. Prefer the smallest change that fixes the weakness.';
"You improve an engineering SKILL document that produced repeated, confirmed " +
"failures. Work in two steps: (1) diagnose the SINGLE recurring weakness — the " +
"specific thing the doc told the agent to do, or failed to tell it, that caused " +
"the pushback; (2) propose a SMALL set of structured edits that fix exactly that.\n" +
"Every edit MUST be:\n" +
"- CONCRETE and OPERATIONAL: a specific rule, check, command, or step the agent " +
"can follow — never vague advice ('be careful', 'communicate clearly').\n" +
"- PLACED WHERE IT FIRES: anchor it to the RELEVANT existing section " +
"(insert_after that heading), or REPLACE the weak existing instruction that " +
"allowed the failure. Do NOT just append a paragraph at the end of a long doc — " +
"that gets skipped.\n" +
"- TRACEABLE: each edit must directly prevent a specific failure listed below.\n" +
"Prefer strengthening/replacing an existing instruction over adding new text. Do " +
`NOT rewrite the whole doc, and do NOT touch anything between ${SU_START} and ` +
`${SU_END}. Reply in TWO parts: first ONE line naming the single recurring ` +
"weakness, then a JSON array of edits (the array is what gets parsed), each: " +
'{"op":"append|insert_after|replace|delete","target":"<exact existing text to ' +
'anchor on; required for insert_after/replace/delete>","content":"<new text; ' +
'required for append/insert_after/replace>"}. Prefer the smallest change that fixes the weakness.';

function buildUserPrompt(body: string, failures: string[], priorEdits: string[]): string {
const cases = failures.slice(0, 8).map((f, i) => `${i + 1}. ${f}`).join("\n");
const prior = priorEdits.length
? `\n\nALREADY TRIED for this skill on earlier runs (do NOT repeat these — propose something different, or nothing):\n${priorEdits.slice(0, 12).map((p) => `- ${p}`).join("\n")}`
: "";
return `CURRENT SKILL:\n${body}\n\nCONFIRMED FAILURES it produced (user pushed back AND a judge confirmed the task was not accomplished):\n${cases}${prior}\n\nPropose the bounded edits. JSON array only.`;
return `CURRENT SKILL:\n${body}\n\nCONFIRMED FAILURES it produced (user pushed back AND a judge confirmed the task was not accomplished):\n${cases}${prior}\n\nFirst name the single recurring weakness in one line, then output the JSON array of edits that anchor the fix into the relevant existing section.`;
}

const OPS = new Set<EditOp>(["append", "insert_after", "replace", "delete"]);

/**
* Extract the JSON array from noisy model output. Robust to a leading prose line
* (the "weakness" the proposer is asked to state first) even if it contains
* brackets: scan back from the last `]` to its balanced `[`, and fall back to
* first-`[`..last-`]` if that doesn't parse.
*/
function extractArray(s: string): unknown[] | null {
const b = s.lastIndexOf("]");
if (b === -1) return null;
let depth = 0;
for (let i = b; i >= 0; i--) {
if (s[i] === "]") depth++;
else if (s[i] === "[" && --depth === 0) {
try { const a = JSON.parse(s.slice(i, b + 1)); if (Array.isArray(a)) return a; } catch { /* fall through */ }
break;
}
}
const first = s.indexOf("[");
if (first !== -1 && b > first) { try { const a = JSON.parse(s.slice(first, b + 1)); if (Array.isArray(a)) return a; } catch { /* none */ } }
return null;
}

/** Tolerant parse of a JSON array of edits (handles ```fences / surrounding prose). */
export function parseEdits(raw: string): Edit[] {
let s = raw.trim();
const fence = s.match(/```(?:json)?\s*([\s\S]*?)```/);
if (fence) s = fence[1].trim();
const a = s.indexOf("[");
const b = s.lastIndexOf("]");
if (a === -1 || b <= a) return [];
let arr: unknown;
try { arr = JSON.parse(s.slice(a, b + 1)); } catch { return []; }
if (!Array.isArray(arr)) return [];
const arr = extractArray(s);
if (!arr) return [];
const out: Edit[] = [];
for (const e of arr) {
if (!e || typeof e !== "object") continue;
Expand Down
30 changes: 30 additions & 0 deletions tests/shared/skill-proposer.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,16 @@ describe("parseEdits", () => {
it("returns [] when there's no array", () => {
expect(parseEdits("the model refused")).toEqual([]);
});
it("extracts the edits array past a weakness line that itself contains brackets", () => {
const raw = 'Weakness: the doc never says to verify [the real client] before reporting.\n' +
'[{"op":"append","content":"verify via API"}]';
expect(parseEdits(raw)).toEqual([{ op: "append", content: "verify via API" }]);
});
it("falls back to first-[..last-] when content holds an unbalanced bracket", () => {
// the inner "]" defeats the balanced back-scan; the fallback still parses it
const raw = '[{"op":"append","content":"use arr] carefully"}]';
expect(parseEdits(raw)).toEqual([{ op: "append", content: "use arr] carefully" }]);
});
});

describe("proposeSkillEdit", () => {
Expand All @@ -35,6 +45,26 @@ describe("proposeSkillEdit", () => {
expect(model.mock.calls[0][1]).toContain("CONFIRMED FAILURES");
});

it("steers toward concrete, anchored edits — not append-at-end", async () => {
const model = vi.fn(async (_s: string, _u: string) =>
'[{"op":"insert_after","target":"## Rules","content":"0. verify on the real client first"}]');
const p = await proposeSkillEdit(body, failures, { model });
expect(p.changed).toBe(true);
const system = model.mock.calls[0][0];
expect(system).toContain("anchor it to the RELEVANT existing section");
expect(system).toContain("CONCRETE and OPERATIONAL");
expect(system).toContain("REPLACE the weak existing instruction");
expect(model.mock.calls[0][1]).toContain("First name the single recurring weakness in one line");
});

it("includes prior edits so the proposer doesn't repeat them", async () => {
const model = vi.fn(async (_s: string, _u: string) => "[]");
await proposeSkillEdit(body, failures, { model, priorEdits: ["append: verify via API"] });
const user = model.mock.calls[0][1];
expect(user).toContain("ALREADY TRIED");
expect(user).toContain("append: verify via API");
});

it("enforces the edit budget", async () => {
const model = vi.fn(async (_s: string, _u: string) =>
'[{"op":"append","content":"a"},{"op":"append","content":"b"},{"op":"append","content":"c"}]');
Expand Down
Loading