diff --git a/packages/cli/src/core/init.ts b/packages/cli/src/core/init.ts index f245ead..c886ec5 100644 --- a/packages/cli/src/core/init.ts +++ b/packages/cli/src/core/init.ts @@ -84,6 +84,7 @@ export interface ExecutePhaseContext { state_path: string; roadmap_path: string; config_path: string; + skill_paths: string; } export interface PlanPhaseContext { @@ -393,6 +394,8 @@ export function cmdInitExecutePhase(cwd: string, phase: string | undefined, raw: const milestone = getMilestoneInfo(cwd); const phase_req_ids = extractReqIds(cwd, phase!); + const skillPaths = path.join(os.homedir(), '.claude', 'skills'); + const result: ExecutePhaseContext = { executor_model: resolveModelInternal(cwd, 'maxsim-executor'), verifier_model: resolveModelInternal(cwd, 'maxsim-verifier'), @@ -431,6 +434,7 @@ export function cmdInitExecutePhase(cwd: string, phase: string | undefined, raw: state_path: '.planning/STATE.md', roadmap_path: '.planning/ROADMAP.md', config_path: '.planning/config.json', + skill_paths: skillPaths, }; output(result, raw); diff --git a/templates/agents/maxsim-executor.md b/templates/agents/maxsim-executor.md index 0663e14..fee7376 100644 --- a/templates/agents/maxsim-executor.md +++ b/templates/agents/maxsim-executor.md @@ -23,7 +23,7 @@ Before executing, discover project context: **Self-improvement lessons:** Read `.planning/LESSONS.md` if it exists — accumulated lessons from past executions on this codebase. Apply them proactively to avoid known mistakes before they become deviations. -**Project skills:** Check `.agents/skills/` directory if it exists: +**Project skills:** Check `~/.claude/skills/` directory if it exists (also check `.claude/skills/` in the project root as a fallback): 1. List available skills (subdirectories) 2. Read `SKILL.md` for each skill (lightweight index ~130 lines) 3. Load specific `rules/*.md` files as needed during implementation @@ -80,22 +80,57 @@ grep -n "type=\"checkpoint" [plan-path] **Pattern C: Continuation** — Check `` in prompt, verify commits exist, resume from specified task. + +## Task-Based Context Loading (EXEC-03) + +For each task, load ONLY the files listed in the task's `Files:` field — not the entire codebase. + +1. Call `skill-context` or read the plan to get the task's file list +2. Use the `Read` tool to load only those specific files +3. If the task has no `Files:` field, load files referenced in the task description +4. Do NOT speculatively read the entire `src/` directory or similar broad paths + +This keeps executor context lean and focused per task. + + -For each task: +For each task, follow the Execute → Simplify → Verify → Commit cycle: 1. **If `type="auto"`:** - - Check for `tdd="true"` → follow TDD execution flow - - Execute task, apply deviation rules as needed - - Handle auth errors as authentication gates - - Run verification, confirm done criteria - - Commit (see task_commit_protocol) - - Track completion + commit hash for Summary + - **Execute:** Check for `tdd="true"` → follow TDD execution flow. Otherwise implement task, apply deviation rules as needed. Handle auth errors as authentication gates. + - **Simplify:** Run a simplification pass on files modified by this task — check for duplication, dead code, complexity. Only simplify behavior-preserving changes. Skip if task is config/docs only or fewer than 10 lines changed. + - **Verify:** Run verification, confirm done criteria. If simplification broke something, revert simplification and re-verify. + - **Commit:** Commit (see task_commit_protocol). Track completion + commit hash for Summary. + - **Update progress table** (see progress_tracking). 2. **If `type="checkpoint:*"`:** - STOP immediately — return structured checkpoint message - A fresh agent will be spawned to continue -3. After all tasks: run overall verification, confirm success criteria, document deviations +3. After all tasks in a wave: run **wave code review** (see wave_review_protocol). +4. After all waves: run overall verification, confirm success criteria, document deviations. + + + +## Orchestrator Status Tracking (EXEC-02) + +Maintain a progress table throughout execution. Update after each task state change: + +```markdown +| Wave | Task | Status | Stage | +|------|------|--------|-------| +| 1 | Task 1 | Complete | Committed | +| 1 | Task 2 | In Progress | Simplifying | +| 2 | Task 3 | Blocked | Waiting for Wave 1 | +``` + +**Stages:** Executing → Simplifying → Verifying → Committed → Reviewed + +**Rules:** +- Update the table in your working state after each task stage transition +- Include the table in checkpoint returns so continuation agents have full state +- Include the final table in the SUMMARY.md under `## Execution Progress` +- If a task is blocked or failed, record the reason in the Status column @@ -612,11 +647,11 @@ Do not rely on memory of the skill content — always read the file fresh. | Skill | Read | Trigger | |-------|------|---------| -| TDD Enforcement | `.agents/skills/tdd/SKILL.md` | Before writing implementation code for a new feature, bug fix, or when plan type is `tdd` | -| Systematic Debugging | `.agents/skills/systematic-debugging/SKILL.md` | When encountering any bug, test failure, or unexpected behavior during execution | -| Verification Before Completion | `.agents/skills/verification-before-completion/SKILL.md` | Before claiming any task is done, fixed, or passing | +| TDD Enforcement | `~/.claude/skills/tdd/SKILL.md` | Before writing implementation code for a new feature, bug fix, or when plan type is `tdd` | +| Systematic Debugging | `~/.claude/skills/systematic-debugging/SKILL.md` | When encountering any bug, test failure, or unexpected behavior during execution | +| Verification Before Completion | `~/.claude/skills/verification-before-completion/SKILL.md` | Before claiming any task is done, fixed, or passing | -**Project skills override built-in skills.** If a skill with the same name exists in `.agents/skills/` in the project, load that one instead. +**Project skills override built-in skills.** If a skill with the same name exists in `~/.claude/skills/` or `.claude/skills/` in the project, load that one instead. diff --git a/templates/workflows/execute-phase.md b/templates/workflows/execute-phase.md index b9e29cf..e77edae 100644 --- a/templates/workflows/execute-phase.md +++ b/templates/workflows/execute-phase.md @@ -152,7 +152,7 @@ Execute each wave in sequence. Within a wave: parallel if `PARALLELIZATION=true` - .planning/STATE.md (State) - .planning/config.json (Config, if exists) - ./CLAUDE.md (Project instructions, if exists — follow project-specific guidelines and coding conventions) - - .agents/skills/ (Project skills, if exists — list skills, read SKILL.md for each, follow relevant rules during implementation) + - ~/.claude/skills/ (Skills, if exists — list skills, read SKILL.md for each, follow relevant rules during implementation) @@ -177,7 +177,21 @@ Execute each wave in sequence. Within a wave: parallel if `PARALLELIZATION=true` If ANY spot-check fails: report which plan failed, route to failure handler — ask "Retry plan?" or "Continue with remaining waves?" - If pass — **emit plan-complete lifecycle event** (if `DASHBOARD_ACTIVE`): + If pass — **verify wave results with code review:** + + Review the wave's combined changes for spec compliance and code quality: + ```bash + # Get all files changed in this wave + WAVE_FIRST_COMMIT=$(git log --oneline --all --grep="{phase}-{first_plan_in_wave}" --reverse | head -1 | cut -d' ' -f1) + git diff ${WAVE_FIRST_COMMIT}^..HEAD --name-only + ``` + + - **Spec compliance:** Cross-check each plan's `` criteria against actual implementation + - **Code quality:** Scan for inconsistent patterns, missing error handling, hardcoded values + - If blocking issues found: fix before proceeding to next wave + - Record review verdict: `Wave {N} Review: PASS` or `Wave {N} Review: PASS after fixes (N fixes)` + + **Emit plan-complete lifecycle event** (if `DASHBOARD_ACTIVE`): ``` mcp__maxsim-dashboard__submit_lifecycle_event( event_type: "plan-complete", @@ -186,6 +200,15 @@ Execute each wave in sequence. Within a wave: parallel if `PARALLELIZATION=true` ) ``` + **Update progress table** (maintain throughout execution): + ```markdown + | Wave | Plan | Status | Review | + |------|------|--------|--------| + | 1 | 01-01 | Complete | Passed | + | 1 | 01-02 | Complete | Passed | + | 2 | 01-03 | In Progress | Pending | + ``` + Then report: ``` --- @@ -194,13 +217,14 @@ Execute each wave in sequence. Within a wave: parallel if `PARALLELIZATION=true` **{Plan ID}: {Plan Name}** {What was built — from SUMMARY.md} {Notable deviations, if any} + {Wave review verdict} {If more waves: what this enables for next wave} --- ``` - Bad: "Wave 2 complete. Proceeding to Wave 3." - - Good: "Terrain system complete — 3 biome types, height-based texturing, physics collision meshes. Vehicle physics (Wave 3) can now reference ground surfaces." + - Good: "Terrain system complete — 3 biome types, height-based texturing, physics collision meshes. Wave review: PASS. Vehicle physics (Wave 3) can now reference ground surfaces." 5. **Handle failures:** @@ -265,19 +289,31 @@ After all waves: **Waves:** {N} | **Plans:** {M}/{total} complete -| Wave | Plans | Status | -|------|-------|--------| -| 1 | plan-01, plan-02 | ✓ Complete | -| CP | plan-03 | ✓ Verified | -| 2 | plan-04 | ✓ Complete | +| Wave | Plans | Status | Review | +|------|-------|--------|--------| +| 1 | plan-01, plan-02 | Complete | Passed | +| CP | plan-03 | Verified | Passed | +| 2 | plan-04 | Complete | Passed after 1 fix | ### Plan Details 1. **03-01**: [one-liner from SUMMARY.md] 2. **03-02**: [one-liner from SUMMARY.md] +### Wave Reviews +| Wave | Spec Review | Code Review | Fixes Applied | +|------|------------|-------------|---------------| +| 1 | Pass | Pass | 0 | +| 2 | Pass | Pass after fix | 1 | + ### Issues Encountered [Aggregate from SUMMARYs, or "None"] ``` + +Aggregate task results from all executor agents. For each plan's SUMMARY.md, extract: +- One-liner description +- Deviation count and categories +- Wave review verdicts +- Any deferred issues diff --git a/templates/workflows/execute-plan.md b/templates/workflows/execute-plan.md index 43a6517..9a27619 100644 --- a/templates/workflows/execute-plan.md +++ b/templates/workflows/execute-plan.md @@ -144,13 +144,32 @@ Deviations are normal — handle via rules below. 1. Read @context files from prompt 2. Per task: - - `type="auto"`: if `tdd="true"` → TDD execution. Implement with deviation rules + auth gates. Verify done criteria. Commit (see task_commit). Track hash for Summary. + - `type="auto"`: if `tdd="true"` → TDD execution. Implement with deviation rules + auth gates. Verify done criteria. **Simplify** (see simplify_pass). Re-verify. Commit (see task_commit). Track hash for Summary. - `type="checkpoint:*"`: STOP → checkpoint_protocol → wait for user → continue only after confirmation. 3. Run `` checks 4. Confirm `` met 5. Document deviations in Summary + +## Post-Task Simplification + +After each task's implementation passes tests but BEFORE committing, run a simplification pass on the files modified by that task: + +1. **Duplication check:** Scan modified files for copy-pasted blocks, near-identical functions, repeated patterns. Extract shared helpers where 3+ lines repeat. +2. **Dead code removal:** Remove unused imports, unreachable branches, commented-out code, unused variables/functions introduced by this task. +3. **Complexity reduction:** Simplify nested conditionals (early returns), flatten callback chains, replace verbose patterns with idiomatic equivalents. + +**Rules:** +- Only simplify files touched by the current task — do NOT refactor unrelated code +- Changes must be behavior-preserving (no new features, no bug fixes) +- If no simplification opportunities found, skip — do not force changes +- After applying simplifications, re-run the task's verification to confirm nothing broke +- Track simplifications as part of the task (not as separate deviations) + +**Skip if:** Task only modifies config files, documentation, or has fewer than 10 lines of code changes. + + ## Authentication Gates @@ -270,6 +289,51 @@ TASK_COMMITS+=("Task ${TASK_NUM}: ${TASK_COMMIT}") + +## Post-Wave Code Review Gate + +After ALL tasks in a wave complete (all committed), run a 2-stage code review on the wave's changes before proceeding to the next wave. + +**1. Identify wave changes:** +```bash +# Get the diff for all commits in this wave +WAVE_FIRST_COMMIT=$(echo "${TASK_COMMITS[0]}" | awk '{print $NF}') +git diff ${WAVE_FIRST_COMMIT}^..HEAD --name-only +``` + +**2. Stage 1 — Spec Compliance:** +Review each task's implementation against its `` criteria from the plan: +- Are all done criteria actually met (not just claimed)? +- Do implementations match the task specifications? +- Are there gaps between what was specified and what was built? + +**On PASS:** Proceed to Stage 2. +**On FAIL:** Fix blocking issues inline, re-run affected task verification, re-commit fixes. + +**3. Stage 2 — Code Quality:** +Review the wave's changed files for: +- Consistent naming conventions and code style +- Proper error handling on all new code paths +- No hardcoded values that should be configurable +- No security issues (exposed secrets, injection vectors, missing auth checks) + +**On PASS:** Wave complete — proceed to next wave. +**On FAIL:** Fix issues inline, re-verify, re-commit fixes. + +**4. Record review verdict in wave notes:** +``` +Wave {N} Review: PASS (spec: pass, quality: pass) +``` +Or with issues: +``` +Wave {N} Review: PASS after fixes (spec: 1 fix, quality: 2 fixes) +``` + +**Max retries:** 2 per stage. After 2 retries still failing: flag in SUMMARY.md under "Wave Review Issues", continue to next wave. + +**Skip if:** Wave contains only a single documentation or config task. + + On `type="checkpoint:*"`: automate everything possible first. Checkpoints are for verification/decisions only.