activeloopai · khustup2 · Jun 7, 2026 · Jun 7, 2026 · Jun 7, 2026 · Jun 7, 2026
diff --git a/src/skillify/skill-proposer.ts b/src/skillify/skill-proposer.ts
@@ -26,36 +26,65 @@ export interface ProposeConfig {
 }
 
 const SYSTEM =
-  "You improve an engineering SKILL document that has been producing repeated, " +
-  "confirmed failures. Diagnose the SINGLE recurring weakness behind the failures " +
-  "and propose a SMALL set of structured edits that fix it. Do NOT rewrite the " +
-  `whole doc, and do NOT touch anything between ${SU_START} and ${SU_END}. Reply ` +
-  'with ONLY a JSON array of edits, each: {"op":"append|insert_after|replace|' +
-  'delete","target":"<exact existing text to anchor on; required for ' +
-  'insert_after/replace/delete>","content":"<new text; required for ' +
-  'append/insert_after/replace>"}. Prefer the smallest change that fixes the weakness.';
+  "You improve an engineering SKILL document that produced repeated, confirmed " +
+  "failures. Work in two steps: (1) diagnose the SINGLE recurring weakness — the " +
+  "specific thing the doc told the agent to do, or failed to tell it, that caused " +
+  "the pushback; (2) propose a SMALL set of structured edits that fix exactly that.\n" +
+  "Every edit MUST be:\n" +
+  "- CONCRETE and OPERATIONAL: a specific rule, check, command, or step the agent " +
+  "can follow — never vague advice ('be careful', 'communicate clearly').\n" +
+  "- PLACED WHERE IT FIRES: anchor it to the RELEVANT existing section " +
+  "(insert_after that heading), or REPLACE the weak existing instruction that " +
+  "allowed the failure. Do NOT just append a paragraph at the end of a long doc — " +
+  "that gets skipped.\n" +
+  "- TRACEABLE: each edit must directly prevent a specific failure listed below.\n" +
+  "Prefer strengthening/replacing an existing instruction over adding new text. Do " +
+  `NOT rewrite the whole doc, and do NOT touch anything between ${SU_START} and ` +
+  `${SU_END}. Reply in TWO parts: first ONE line naming the single recurring ` +
+  "weakness, then a JSON array of edits (the array is what gets parsed), each: " +
+  '{"op":"append|insert_after|replace|delete","target":"<exact existing text to ' +
+  'anchor on; required for insert_after/replace/delete>","content":"<new text; ' +
+  'required for append/insert_after/replace>"}. Prefer the smallest change that fixes the weakness.';
 
 function buildUserPrompt(body: string, failures: string[], priorEdits: string[]): string {
   const cases = failures.slice(0, 8).map((f, i) => `${i + 1}. ${f}`).join("\n");
   const prior = priorEdits.length
     ? `\n\nALREADY TRIED for this skill on earlier runs (do NOT repeat these — propose something different, or nothing):\n${priorEdits.slice(0, 12).map((p) => `- ${p}`).join("\n")}`
     : "";
-  return `CURRENT SKILL:\n${body}\n\nCONFIRMED FAILURES it produced (user pushed back AND a judge confirmed the task was not accomplished):\n${cases}${prior}\n\nPropose the bounded edits. JSON array only.`;
+  return `CURRENT SKILL:\n${body}\n\nCONFIRMED FAILURES it produced (user pushed back AND a judge confirmed the task was not accomplished):\n${cases}${prior}\n\nFirst name the single recurring weakness in one line, then output the JSON array of edits that anchor the fix into the relevant existing section.`;
 }
 
 const OPS = new Set<EditOp>(["append", "insert_after", "replace", "delete"]);
 
+/**
+ * Extract the JSON array from noisy model output. Robust to a leading prose line
+ * (the "weakness" the proposer is asked to state first) even if it contains
+ * brackets: scan back from the last `]` to its balanced `[`, and fall back to
+ * first-`[`..last-`]` if that doesn't parse.
+ */
+function extractArray(s: string): unknown[] | null {
+  const b = s.lastIndexOf("]");
+  if (b === -1) return null;
+  let depth = 0;
+  for (let i = b; i >= 0; i--) {
+    if (s[i] === "]") depth++;
+    else if (s[i] === "[" && --depth === 0) {
+      try { const a = JSON.parse(s.slice(i, b + 1)); if (Array.isArray(a)) return a; } catch { /* fall through */ }
+      break;
+    }
+  }
+  const first = s.indexOf("[");
+  if (first !== -1 && b > first) { try { const a = JSON.parse(s.slice(first, b + 1)); if (Array.isArray(a)) return a; } catch { /* none */ } }
+  return null;
+}
+
 /** Tolerant parse of a JSON array of edits (handles ```fences / surrounding prose). */
 export function parseEdits(raw: string): Edit[] {
   let s = raw.trim();
   const fence = s.match(/```(?:json)?\s*([\s\S]*?)```/);
   if (fence) s = fence[1].trim();
-  const a = s.indexOf("[");
-  const b = s.lastIndexOf("]");
-  if (a === -1 || b <= a) return [];
-  let arr: unknown;
-  try { arr = JSON.parse(s.slice(a, b + 1)); } catch { return []; }
-  if (!Array.isArray(arr)) return [];
+  const arr = extractArray(s);
+  if (!arr) return [];
   const out: Edit[] = [];
   for (const e of arr) {
     if (!e || typeof e !== "object") continue;

diff --git a/tests/shared/skill-proposer.test.ts b/tests/shared/skill-proposer.test.ts
@@ -18,6 +18,16 @@ describe("parseEdits", () => {
   it("returns [] when there's no array", () => {
     expect(parseEdits("the model refused")).toEqual([]);
   });
+  it("extracts the edits array past a weakness line that itself contains brackets", () => {
+    const raw = 'Weakness: the doc never says to verify [the real client] before reporting.\n' +
+      '[{"op":"append","content":"verify via API"}]';
+    expect(parseEdits(raw)).toEqual([{ op: "append", content: "verify via API" }]);
+  });
+  it("falls back to first-[..last-] when content holds an unbalanced bracket", () => {
+    // the inner "]" defeats the balanced back-scan; the fallback still parses it
+    const raw = '[{"op":"append","content":"use arr] carefully"}]';
+    expect(parseEdits(raw)).toEqual([{ op: "append", content: "use arr] carefully" }]);
+  });
 });
 
 describe("proposeSkillEdit", () => {
@@ -35,6 +45,26 @@ describe("proposeSkillEdit", () => {
     expect(model.mock.calls[0][1]).toContain("CONFIRMED FAILURES");
   });
 
+  it("steers toward concrete, anchored edits — not append-at-end", async () => {
+    const model = vi.fn(async (_s: string, _u: string) =>
+      '[{"op":"insert_after","target":"## Rules","content":"0. verify on the real client first"}]');
+    const p = await proposeSkillEdit(body, failures, { model });
+    expect(p.changed).toBe(true);
+    const system = model.mock.calls[0][0];
+    expect(system).toContain("anchor it to the RELEVANT existing section");
+    expect(system).toContain("CONCRETE and OPERATIONAL");
+    expect(system).toContain("REPLACE the weak existing instruction");
+    expect(model.mock.calls[0][1]).toContain("First name the single recurring weakness in one line");
+  });
+
+  it("includes prior edits so the proposer doesn't repeat them", async () => {
+    const model = vi.fn(async (_s: string, _u: string) => "[]");
+    await proposeSkillEdit(body, failures, { model, priorEdits: ["append: verify via API"] });
+    const user = model.mock.calls[0][1];
+    expect(user).toContain("ALREADY TRIED");
+    expect(user).toContain("append: verify via API");
+  });
+
   it("enforces the edit budget", async () => {
     const model = vi.fn(async (_s: string, _u: string) =>
       '[{"op":"append","content":"a"},{"op":"append","content":"b"},{"op":"append","content":"c"}]');