diff --git a/CLAUDE.md b/CLAUDE.md index adccc31..b663829 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -79,24 +79,26 @@ Protocol: stdin receives JSON payload, exit codes control behavior (0=allow, 2=b The expected feature lifecycle within a single session is: ``` -brainstorming → spec-feature → writing-plans → lock-tests → executing-plans → finish-feature +/devflow:brainstorming → /devflow:spec-feature → /devflow:writing-plans → /devflow:lock-tests → /devflow:executing-plans → /devflow:finish-feature ``` -- `new-feature` sets up context, recalls memories, runs scope-check, and starts brainstorming. -- `spec-feature` writes the spec document. -- `writing-plans` writes the implementation plan. -- `lock-tests` writes the full failing-test inventory and gates on user approval before implementation begins. -- `executing-plans` drives per-task red-green-refactor. -- `finish-feature` runs verification, creates the PR/MR, retains learnings. +- `/devflow:new-feature` sets up context, recalls memories, runs scope-check, and starts brainstorming. +- `/devflow:spec-feature` writes the spec document. +- `/devflow:writing-plans` writes the implementation plan (devflow wrapper around the upstream writing-plans skill). +- `/devflow:lock-tests` writes the full failing-test inventory and gates on user approval before implementation begins. +- `/devflow:executing-plans` drives per-task red-green-refactor (devflow wrapper that forces the post-implementation handoff to `/devflow:finish-feature`). +- `/devflow:finish-feature` runs verification, creates the PR/MR, retains learnings. -Each phase ends by invoking `devflow:phase-handoff`, which writes a frozen-state file +**Convention:** all callers always go through `/devflow:*` wrappers — never reach past them to the upstream skill directly. The wrappers are the canonical surface devflow callers see; the upstream skills are an implementation detail of the wrappers. + +Each phase ends by invoking `/devflow:phase-handoff`, which writes a frozen-state file at `.devflow/state//.md` (with worktree-relative artefact paths), gates on a one-click `AskUserQuestion`, then spawns a new Claude Desktop session via `mcp__ccd_session__spawn_task`. The spawned session's title is deterministic: `[] [MR#] ` (e.g. `[MES-4282] [MR#29] Implementation`); the MR# slot is omitted when no MR/PR exists yet for the branch. Its initial prompt hands it ABSOLUTE paths to the frozen-state file + spec + plan + test inventory as the only -authoritative inputs — the brainstorming context does not bleed into implementation +authoritative inputs — the prior-phase context does not bleed into implementation because the spawned session has zero conversational memory of the prior phase. On feature branches, always complete work with `/devflow:finish-feature` before ending diff --git a/config/agent-deck/README.md b/config/agent-deck/README.md deleted file mode 100644 index 7da3172..0000000 --- a/config/agent-deck/README.md +++ /dev/null @@ -1,43 +0,0 @@ -# Agent Deck Configuration - -Configuration template for [Agent Deck](https://github.com/asheshgoplani/agent-deck) — a unified manager for AI coding agents. - -## Setup - -1. **Install Agent Deck** (if not already installed): - - ```bash - brew install asheshgoplani/tap/agent-deck - ``` - -2. **Initialize in your project** (handled automatically by `devflow init`): - - ```bash - devflow init - ``` - - This copies `config.toml.tmpl` to your project as `.agent-deck/config.toml` with placeholders replaced. - -3. **Manual setup** (if configuring without devflow): - ```bash - cp config.toml.tmpl ~/.config/agent-deck/config.toml - # Edit the file to match your environment - ``` - -## What's Configured - -| Section | Purpose | -| ---------- | ----------------------------------------------------------------------------- | -| `tools` | Agent commands (Claude Code, OpenCode) — set `default_tool` to your preferred | -| `mcps` | MCP servers managed by Agent Deck — Hindsight is pre-configured for memory | -| `docker` | Docker container settings for sandboxed sessions — disabled by default | -| `worktree` | Git worktree integration — `subdirectory` places worktrees under project root | -| `claude` | Claude Code-specific settings (e.g., dangerous mode for shell access) | -| `logs` | Session logging for review and debugging | - -## Customization - -- **Add tool profiles**: Add `[tools.]` blocks with `command` and optional `args` -- **Add MCP servers**: Add `[mcps.]` blocks with `type` and `url` (http) or `command`/`args` (stdio) -- **Enable Docker**: Set `default_enabled = true` under `[docker]` for sandboxed agent sessions -- **Change worktree location**: Set `default_location` to `"sibling"` to place worktrees next to the project instead of inside it diff --git a/config/agent-deck/config.toml.tmpl b/config/agent-deck/config.toml.tmpl deleted file mode 100644 index 0a95b82..0000000 --- a/config/agent-deck/config.toml.tmpl +++ /dev/null @@ -1,37 +0,0 @@ -# Devflow — Agent Deck Configuration -# Generated by: devflow init -# Docs: https://github.com/asheshgoplani/agent-deck - -# ── Tool profiles ───────────────────────────────────────────────── -default_tool = "claude" - -[tools.claude] -command = "claude" -args = ["--profile", "devflow"] - -[tools.opencode] -command = "opencode" - -# ── MCP Servers ─────────────────────────────────────────────────── -[mcps.hindsight] -type = "http" -url = "http://localhost:8888/mcp/" - -# ── Docker / SSH ────────────────────────────────────────────────── -[docker] -default_enabled = false -mount_ssh = true -auto_cleanup = true -environment = ["ANTHROPIC_API_KEY", "GITHUB_TOKEN"] - -# ── Worktrees ───────────────────────────────────────────────────── -[worktree] -default_location = "subdirectory" - -# ── Claude-specific ─────────────────────────────────────────────── -[claude] -allow_dangerous_mode = true - -# ── Logs ────────────────────────────────────────────────────────── -[logs] -enabled = true diff --git a/config/agent-deck/tmux-notifications-fix.md b/config/agent-deck/tmux-notifications-fix.md deleted file mode 100644 index 7285ec2..0000000 --- a/config/agent-deck/tmux-notifications-fix.md +++ /dev/null @@ -1,104 +0,0 @@ -# Fixing Terminal Notifications in Agent-Deck (tmux) - -## Problem - -When Claude Code or OpenCode runs inside agent-deck's tmux sessions, desktop/terminal -notifications for "needs input" events are silently swallowed. The user never knows -a session is waiting unless they manually check. - -## Root Cause (Multi-Layer) - -1. **Claude Code sends OSC 9 escape sequences** (`\033]9;...`) for desktop notifications. - tmux does not forward raw OSC sequences to the outer terminal — they need DCS - passthrough wrapping. This is a known issue: [claude-code#19976](https://github.com/anthropics/claude-code/issues/19976). - -2. **Agent-deck does not configure tmux bell options.** The tmux options `monitor-bell`, - `bell-action`, and `visual-bell` are not set by agent-deck on the sessions it creates - (see `/internal/tmux/tmux.go` `Start()` function). While tmux defaults _should_ work, - the programmatic session creation may not inherit them reliably. - -3. **`terminal_bell` kills macOS notification banners.** Setting `preferredNotifChannel` - to `terminal_bell` replaces OSC 9 with just a BEL character. This gives you the dock - badge (red circle on Ghostty icon) and title bell icon, but kills the macOS - notification banner in the top-right corner. The correct setting is `auto`. - -4. **OpenCode's "question" tool** is not detected as a "waiting" state by agent-deck - ([agent-deck#255](https://github.com/asheshgoplani/agent-deck/issues/255)). - -5. **Agent-deck only sends macOS notifications for selector-type questions** (permission - prompts, elicitation dialogs) but NOT for general "awaiting input" state after the - agent finishes answering. This is an agent-deck limitation. - -## What This Fix Does - -### 1. Keep `auto` notification channel (`~/.claude.json`) - -```json -"preferredNotifChannel": "auto" -``` - -This preserves Claude Code's native OSC 9 desktop notifications outside tmux. Do NOT -set to `terminal_bell` — it kills macOS notification banners. - -### 2. Add tmux bell options to agent-deck (`~/.agent-deck/config.toml`) - -```toml -[tmux] - [tmux.options] - monitor-bell = "on" - bell-action = "any" - visual-bell = "off" -``` - -This ensures agent-deck's tmux sessions explicitly forward bell events to the outer -terminal. `bell-action = "any"` means bells from any window are forwarded, not just -the current one. - -### 3. Add Notification hook (`~/.claude/settings.json`) - -```json -{ - "hooks": [{ - "type": "command", - "command": "bash -c 'if [ -n \"$TMUX\" ]; then printf \"\\033Ptmux;\\033\\033]9;Claude Code needs input\\007\\033\\\\\" > /dev/tty; fi; printf \"\\a\" > /dev/tty'", - "async": true - }] -} -``` - -This hook does two things on every Notification event: -- **Inside tmux**: sends a DCS-wrapped OSC 9 notification so the outer terminal - (Ghostty/iTerm2/Kitty) shows a macOS notification banner -- **Always**: sends a BEL character for the dock badge (red circle on terminal icon) - -### What you get after this fix - -| Scenario | macOS banner (top-right) | Dock badge (red circle) | Title bell icon | -|----------|-------------------------|------------------------|----------------| -| Raw Claude Code (no tmux) | Yes (native OSC 9) | Yes (hook BEL) | Yes | -| Claude Code in agent-deck | Yes (DCS-wrapped OSC 9) | Yes (hook BEL) | Yes | - -## Known Limitations - -- **Agent-deck only notifies for selector questions**: macOS notification banners from - agent-deck's own hook-handler only fire for permission_prompt/elicitation_dialog - events, not for the general "waiting for input" state after answering. -- **OpenCode question tool not detected**: agent-deck doesn't recognize OpenCode's - "question" tool as a waiting state (#255). - -## Relevant Upstream Issues - -| Issue | Status | Description | -|-------|--------|-------------| -| [claude-code#19976](https://github.com/anthropics/claude-code/issues/19976) | Open | Claude Code OSC notifications don't work in tmux | -| [agent-deck#211](https://github.com/asheshgoplani/agent-deck/issues/211) | Open | Native notification bridge (Slack/Telegram/desktop) | -| [agent-deck#255](https://github.com/asheshgoplani/agent-deck/issues/255) | Open | OpenCode waiting status not detected for question tool | -| [agent-deck#150](https://github.com/asheshgoplani/agent-deck/issues/150) | Closed | Added `[tmux] options` config override mechanism | - -## Ideal Upstream Fixes - -- **Agent-deck**: Add `monitor-bell on` + `bell-action any` as defaults in `Start()` in - `/internal/tmux/tmux.go` — a 2-line change alongside existing options. -- **Claude Code**: Auto-detect `$TMUX` and DCS-wrap OSC 9 notifications (#19976). -- **Agent-deck**: Fix OpenCode question tool detection (#255). -- **Agent-deck**: Send macOS notifications for ALL waiting states, not just selector questions. diff --git a/config/bootstrap-snippet.sh b/config/bootstrap-snippet.sh index 000b4e5..90cf36e 100644 --- a/config/bootstrap-snippet.sh +++ b/config/bootstrap-snippet.sh @@ -14,8 +14,9 @@ : <<'BREWFILE_SNIPPET' # AI Dev Environment (devflow) -brew "agent-deck" # Session wrapper (if available via brew) brew "worktrunk" # Git worktree manager +brew "gh" # GitHub CLI (for /devflow:create-pr and devflow review) +brew "glab" # GitLab CLI (for /devflow:create-pr and devflow review) # devflow itself installed via local tap or make install BREWFILE_SNIPPET diff --git a/devflow-plugin/.claude-plugin/plugin.json b/devflow-plugin/.claude-plugin/plugin.json index d59eea8..d33ac87 100644 --- a/devflow-plugin/.claude-plugin/plugin.json +++ b/devflow-plugin/.claude-plugin/plugin.json @@ -14,6 +14,7 @@ "./skills/reflect-session/SKILL.md", "./skills/pre-push-check/SKILL.md", "./skills/review/SKILL.md", + "./skills/review-document/SKILL.md", "./skills/new-feature/SKILL.md", "./skills/finish-feature/SKILL.md", "./skills/create-pr/SKILL.md", diff --git a/devflow-plugin/commands/review-document.md b/devflow-plugin/commands/review-document.md new file mode 100644 index 0000000..5c71707 --- /dev/null +++ b/devflow-plugin/commands/review-document.md @@ -0,0 +1,165 @@ +--- +description: Use when reviewing a prose document — KB article, RFC, spike, runbook, PRD, design doc, knowledge-base page — hosted on Google Docs, Confluence, a local file path, or an arbitrary URL. Checks correctness, internal consistency, audience-fit, prose clarity, and external-claim verification; cross-checks against existing platform comments to avoid re-flagging; returns severity-tagged findings with anchor + quote + concrete fix. Use when asked to "review this doc / KB / RFC / spike / runbook / PRD" and the target is prose, not a code diff. Counterpart to /devflow:write-spike. NOT for code diffs — use /devflow:review for those. +--- + +# /devflow:review-document — Multi-perspective prose document review + +You are a thorough, multi-perspective document reviewer. Counterpart to `/devflow:write-spike`. Reviews prose docs on any platform with deep context gathering and parallel review agents. Sibling to `/devflow:review` (which reviews code diffs). + +## Scope guardrails (read before doing anything) + +1. **NEVER flag pure styling issues.** Broken markdown table rows, header-level inconsistencies, missing alignment chars, font weight, colour, whitespace — none. Only substance: factual correctness, internal consistency, completeness, prose clarity, audience-fit, external-claim verification. Renderer quirks belong in a linter, not a doc review. +2. **NEVER re-flag findings already raised in existing platform comments.** Phase 1c mirrors `/devflow:review` Phase 1e — read all comments first, cross-check every finding. Re-flagging a colleague's existing comment erodes trust. +3. **NEVER trust the raw fetched text without handling source-format quirks** (Google Docs suggestion mode, Confluence change-tracking spans, Word `.docx` track-changes all leak phantom text). Always fetch the clean post-suggestion view — see Phase 0b for the authoritative recipe. + +## Input + +`$ARGUMENTS` may contain any combination of: + +- **Google Doc URL or ID** — `https://docs.google.com/document/d//...` or just the 44-char `` +- **Confluence page URL or ID** — `https://.atlassian.net/wiki/spaces/.../pages/` or the bare ID +- **Local file path** — `.md` / `.txt` / `.adoc` / `.rst` (absolute or `~`-relative) +- **Arbitrary URL** — blog, Medium, Substack, Notion-public, etc. Fetched via the `defuddle` skill (preferred) or `WebFetch` fallback +- **Nothing** — fall back to the most-recently-edited prose file in the current working directory +- **Optional doc-type hint** — `--type kb|spike|rfc|runbook|prd|design|generic` (auto-detected via Phase 0d if omitted) +- **Optional `quick` keyword** — single-pass review, skip subagents + +Parse to determine `SOURCE_TYPE`, `URL_OR_PATH`, `DOC_TYPE`, `MODE` (`quick` | `thorough`, default `thorough`). + +## Phase 0 — Source detection + clean fetch + anchor selection + +### 0a. Identify source platform + +| Input shape | Platform | Fetch tool | +|---|---|---| +| `docs.google.com/document/d/` or a bare 44-char base64-ish ID | Google Doc | Drive MCP (`mcp__bf06f3e8-*`) | +| `.atlassian.net/wiki/.../pages/` or bare numeric ID | Confluence | Atlassian MCP (`mcp__5ebcd1ed-*`) | +| Path starting `/`, `.`, `~` | Local file | `Read` | +| Any other `http(s)://` URL | Web | `defuddle` skill (preferred), `WebFetch` fallback | + +### 0b. Fetch CLEAN text (mandatory — strip suggestion / track-change artifacts) + +| Platform | Primary call | Strikethrough / track-change handling | +|---|---|---| +| Google Doc | `mcp__bf06f3e8-*__download_file_content` with `exportMimeType: "text/plain"` — strips strikethrough at export time AND embeds inline comments as `[a]/[b]/[c]` markers. Avoid `read_file_content` for review (concatenates strikethrough + replacement → phantom typos like `"version 1Phase 1"` when live text is `"Phase 1"`). | Use the export path. | +| Confluence | `mcp__5ebcd1ed-*__getConfluencePage` with `bodyFormat=storage` | Strip `` spans before passing to agents | +| Local file | `Read` | None (assume final) | +| Web (defuddle preferred) | invoke the `defuddle` skill | None (defuddle returns clean rendered text) | +| Web (fallback) | `WebFetch` | None | + +**Defuddle is a soft dependency** of devflow. If `defuddle` skill / CLI is absent, fall back to `WebFetch` and warn the user in the output's Context section: `"Web URL fetched via WebFetch (defuddle not installed — see devflow README for install steps)"`. Never block on missing defuddle. + +### 0c. Auto-pick anchor format per source + +| Source | Anchor format used in output | +|---|---| +| Local file | `:` | +| Hosted (Google Doc / Confluence / arbitrary URL) | `§"

" → ""` (so the user can Ctrl+F to the location in the platform UI) | + +Skill orchestrator sets `ANCHOR_TYPE` once and passes it to all agents so their findings are emitted in the right shape. + +### 0d. Detect `DOC_TYPE` if not explicit + +Apply in order: +1. `--type` hint → use it +2. Filename / URL slug contains `spike` / `rfc` / `runbook` / `prd` / `design` / `kb` → that type +3. Top-of-doc frontmatter `type:` field → use it +4. Audience signal in first ~500 chars: "customers" / "agents" / "admins" / "support team" → `kb`; "engineers" / "architecture" / "service" / "system" → `spike` or `rfc`; step-by-step ops → `runbook`; user-stories / "as a … I want" → `prd` +5. Default → `generic` + +`DOC_TYPE` drives which agents activate in Phase 2. + +## Phase 1 — Context + comments + memory + +### 1a. Linked source-of-truth pull + +- Extract Jira-style task IDs from doc body (`[A-Z][A-Z0-9]+-\d+`). For each hit, fetch via `mcp__5ebcd1ed-*__getJiraIssue` (cap 3 tickets). +- Extract URLs from doc body. Flag any pointing at code repos / merge requests / external vendor docs as cross-check candidates for Phase 2 agents. +- For technical doc types (`spike` / `rfc` / `runbook`): identify referenced code paths in the doc → `Read` them so the `verifier` / `architect` agents can ground their findings. + +### 1b. Hindsight recall + +- Call `mcp__hindsight__recall` with `(doc title + extracted feature keywords)`. Cap top 5 hits. +- Skip silently if Hindsight MCP not configured. + +### 1c. Existing platform comments — MANDATORY cross-check input + +| Platform | Comment fetch | Notes | +|---|---|---| +| Confluence | `mcp__5ebcd1ed-*__getConfluencePageFooterComments` + `getConfluencePageInlineComments` (both paginated fully) | Inline comments include `anchor_text` (the quoted snippet they attach to) → perfect for cross-check | +| Google Doc | `mcp__bf06f3e8-*__download_file_content` with `exportMimeType: "text/plain"` embeds inline comments as `[a]`, `[b]`, `[c]` markers at the anchor position AND lists the comment bodies in `[a]author-text` / `[b]author-text` blocks AT THE END of the exported text. Parse those after the last article paragraph. The anchor for finding `[a]` is the markdown text immediately preceding the marker. | +| Local file | None possible (no platform) — skip silently | +| Arbitrary URL | None possible — skip silently | + +For each fetched comment, extract `(author, body, anchor_text_if_inline, resolved_state if available)`. Store as `EXISTING_COMMENTS`. Phase 3b uses this to mark every finding NEW vs RAISED-*. + +**Google Docs caveat:** the `text/plain` export does NOT carry `resolved_state` for inline `[a]/[b]/[c]` comments. Treat all extracted Google Doc comments as `RAISED-OPEN` by default. Only downgrade to `RAISED-RESOLVED-FIXED` if you can see in the cleaned body that the issue is no longer present (manual heuristic). The Confluence path does carry true `resolved_state`. + +### 1d. Cleaned-text sanity check + +If the Phase 0b clean fetch differs from the raw fetch by >10% in length, warn the user and ask whether to proceed (heavy track-changes can legitimately strip a lot — confirm intent). + +## Phase 2 — Review agents + +Agent set depends on `DOC_TYPE` and `MODE`. See `AGENTS.md` (sibling file) for full definitions, prompts, and activation rules. + +### Quick Mode + +Single in-context pass covering correctness + clarity + a sanity-check of factual claims. Skip subagents entirely. Skip to Phase 4. + +### Thorough Mode + +**Read `AGENTS.md` for the full agent set per doc type.** Dispatch all active agents in ONE message with multiple `Task` tool calls (each with `subagent_type: ` per the AGENTS.md role table — e.g. `subagent_type: critic`, `subagent_type: writer`) so they run in parallel. Each agent receives the context packet: + +- Cleaned doc text (Phase 0b output) +- `DOC_TYPE`, `ANCHOR_TYPE`, audience signal +- Linked tickets + external sources (Phase 1a) +- Hindsight memories (Phase 1b) +- `EXISTING_COMMENTS` (Phase 1c) — for the agent to avoid duplicating where possible +- Relevant code excerpts for technical doc types (Phase 1a tail) + +## Phase 3 — Severity scoring + dedup + cross-check + +### 3a. Confidence scoring +Apply the rubric in `TEMPLATES.md` (`0–100`, drop below 50). Cap any "recurring pattern" finding (e.g. passive voice repeated across the doc) at one entry — not one per occurrence. + +### 3b. Cross-check against `EXISTING_COMMENTS` +Mark every surviving finding as one of: + +- **NEW** — no prior comment overlaps anchor + issue class. Include normally. +- **RAISED-OPEN** — comment covers this anchor + issue class, unresolved. Include with tag `(also raised by @ — open)`. +- **RAISED-RESOLVED-FIXED** — comment marked resolved AND latest doc text confirms fix landed. **Move to 🟢 Strengths section under "Already addressed in review comments". Do NOT re-flag.** +- **RAISED-RESOLVED-NOT-FIXED** — comment marked resolved by author but issue still present in current text. Re-flag with tag `(thread marked resolved but issue still present)`. High signal. + +### 3c. Cross-agent dedup +Same anchor + same issue class flagged by multiple agents → keep highest-confidence version, append `(N/ agents)`. + +## Phase 4 — Output + +**Read `TEMPLATES.md` (sibling) for the structured Markdown output template.** Format mirrors `/devflow:review`: emoji severity (🔴 🟠 🟡 🟢 🔵), bold key terms, ≤3 lines per finding in 🔴/🟠, one-line for 🟡, TL;DR block with severity counts + top-3 fixes + one-line verdict. + +Differences from `/devflow:review`: +- Anchor auto-picks per Phase 0c (`file:line` for local, `§heading + quote` for hosted) — do not emit `file:line` for a Google Doc, it is useless. +- Verdict labels: `✅ APPROVED` / `⚠️ NEEDS FIXES` / `❓ NEEDS DISCUSSION` (no merge-state semantics). +- No 🟠 "external-unmodified" category (no diff-scope concept for prose). +- Code-suggestion fenced blocks are replaced with inline prose rewrites quoted verbatim. + +## Phase 5 — Retain learnings + +If any factual nuance, gotcha, or convention surfaced during review (e.g. `"Meta BSUID docs: phone number only hides after 30 days of no interaction"`), call `mcp__hindsight__retain` to persist. Tag by `(doc-type, topic, source)`. Skip silently if Hindsight unavailable. + +## Phase 6 — Posting comments back to the platform + +**Not in v1.** Output is chat-only by default (per the user's `default-to-draft` rule). If the user later asks "draft these as Confluence inline comments" or similar, that is a separate follow-up — do not auto-post. + +## Rationalizations — STOP + +| Thought | Counter | +|---|---| +| "Skip comment fetch, doc looks new" | New docs often have half-resolved threads. Fetch (Phase 1c). | +| "Skip the strikethrough strip" | Produces phantom typos. Always strip (Phase 0b). | +| "Markdown render is broken" | Out of scope (guardrail #1). Drop. | +| "Critic alone is enough" | Writer catches tone, document-specialist catches factual drift. Run the full set per doc type. | +| "Vendor claim looks standard" | Vendor docs drift. Verify. | + +$ARGUMENTS diff --git a/devflow-plugin/commands/update-visualizations.md b/devflow-plugin/commands/update-visualizations.md index 781d856..eead491 100644 --- a/devflow-plugin/commands/update-visualizations.md +++ b/devflow-plugin/commands/update-visualizations.md @@ -39,8 +39,8 @@ For each changed file, determine which visualization(s) it might affect. Use the **For devflow projects:** - `lib/services.sh`, `docker/docker-compose.yml` → `architecture/runtime-architecture.md` - `lib/sync.sh`, MCP/skill config changes → `architecture/sync-architecture.md` -- `lib/work.sh`, `lib/worktree.sh`, skill workflow changes → `workflows/development-workflow.md` -- `lib/init.sh`, agent-deck config changes → `integrations/agent-deck-integration.md` +- `lib/worktree.sh`, skill workflow changes → `workflows/development-workflow.md` +- `lib/init.sh` setup changes → `architecture/devflow-ecosystem.md` - `bin/devflow` (new commands) → `architecture/devflow-ecosystem.md` - New skill/plugin → May need a new diagram in `integrations/` diff --git a/devflow-plugin/skills/update-visualizations/SKILL.md b/devflow-plugin/skills/update-visualizations/SKILL.md index be96a5f..2a7c94a 100644 --- a/devflow-plugin/skills/update-visualizations/SKILL.md +++ b/devflow-plugin/skills/update-visualizations/SKILL.md @@ -40,8 +40,8 @@ For each changed file, determine which visualization(s) it might affect. Use the **For devflow projects:** - `lib/services.sh`, `docker/docker-compose.yml` → `architecture/runtime-architecture.md` - `lib/sync.sh`, MCP/skill config changes → `architecture/sync-architecture.md` -- `lib/work.sh`, `lib/worktree.sh`, skill workflow changes → `workflows/development-workflow.md` -- `lib/init.sh`, agent-deck config changes → `integrations/agent-deck-integration.md` +- `lib/worktree.sh`, skill workflow changes → `workflows/development-workflow.md` +- `lib/init.sh` setup changes → `architecture/devflow-ecosystem.md` - `bin/devflow` (new commands) → `architecture/devflow-ecosystem.md` - New skill/plugin → May need a new diagram in `integrations/` diff --git a/skills/update-visualizations/SKILL.md b/skills/update-visualizations/SKILL.md index be96a5f..2a7c94a 100644 --- a/skills/update-visualizations/SKILL.md +++ b/skills/update-visualizations/SKILL.md @@ -40,8 +40,8 @@ For each changed file, determine which visualization(s) it might affect. Use the **For devflow projects:** - `lib/services.sh`, `docker/docker-compose.yml` → `architecture/runtime-architecture.md` - `lib/sync.sh`, MCP/skill config changes → `architecture/sync-architecture.md` -- `lib/work.sh`, `lib/worktree.sh`, skill workflow changes → `workflows/development-workflow.md` -- `lib/init.sh`, agent-deck config changes → `integrations/agent-deck-integration.md` +- `lib/worktree.sh`, skill workflow changes → `workflows/development-workflow.md` +- `lib/init.sh` setup changes → `architecture/devflow-ecosystem.md` - `bin/devflow` (new commands) → `architecture/devflow-ecosystem.md` - New skill/plugin → May need a new diagram in `integrations/` diff --git a/tasks/P0/ARCH-skills-mcp-sync.md b/tasks/P0/ARCH-skills-mcp-sync.md deleted file mode 100644 index cd101ee..0000000 --- a/tasks/P0/ARCH-skills-mcp-sync.md +++ /dev/null @@ -1,265 +0,0 @@ ---- -id: ARCH-skills-mcp-sync -title: "Single Source of Truth Sync for Skills, MCPs, and Config Across All Layers" -priority: P0 -category: architecture -status: open -depends_on: [ARCH-update-actual-configs, ARCH-skills-registry-global-sources] -estimated_effort: XL -files_to_touch: - - lib/sync.sh (new) - - bin/devflow - - lib/init.sh - - lib/services.sh - - lib/check.sh - - config/agent-deck/config.toml.tmpl - - config/worktrunk/ (new configs) - - config/review/ (new configs) - - config/langfuse/ (new export scripts) ---- - -# Single Source of Truth Sync for Skills, MCPs, and Config Across All Layers - -## Context - -The devflow ecosystem has 6 layers + 2 AI providers, each with its own config for skills, MCPs, hooks, and rules. Today these are configured independently with significant drift and gaps: - -- **Agent Deck** (`~/.agent-deck/config.toml`): Template defines `[mcps.hindsight]` but live config has empty `[mcps]`. Skill pool dir doesn't exist. MCP pool disabled. -- **Claude Code**: 4 skill sources, 3 MCP sources, hooks in settings.json, 4 plugins, 5 agents — partially synced (only CLAUDE.md). -- **OpenCode**: Skills via git clone, plugin manually placed, `opencode.json` barely configured, no MCPs. -- **Code Review**: The `devflow check` command wraps the code review backend, accepts check rules per invocation — no separate CLI install needed. -- **Worktrunk**: `~/.config/worktrunk/config.toml` doesn't exist despite rich hook system (10 types). -- **Langfuse**: Prompt templates, eval configs, dashboards all in Postgres DB — not version-controlled. -- **Cross-agent**: `~/.agents/skills/` managed by `npx skills` — separate from all other systems. - -**Agent-deck's TUI Skill Manager (`s` key) and MCP Manager (`m` key) are currently useless** because nothing is registered in its pool. - -## Problem Statement - -There is no single source of truth. Skills, MCPs, and config are scattered across 8+ locations with no sync mechanism. Adding a new MCP or skill requires manually configuring it in 4+ places. - -## Desired Outcome - -A `devflow sync` command that propagates config from agent-deck (source of truth) to all consumers, plus a `devflow sync --status` that shows drift. - -### Architecture - -``` -agent-deck config.toml ← SOURCE OF TRUTH for MCPs, tools, pools -agent-deck skills/pool/ ← SOURCE OF TRUTH for skills - │ - ▼ devflow sync - ┌────┴──────────────────────────────────────────────┐ - │ │ │ │ │ - ▼ ▼ ▼ ▼ ▼ -Claude Code OpenCode Code Review Worktrunk Langfuse -├ ~/.claude.json ├ opencode.json ├ wrapped by ├ config.toml ├ (export -│ (MCPs) │ (MCPs) │ devflow check │ (hooks) │ prompts -├ ~/.claude/ ├ ~/.config/ │ --rule └────────── │ via API) -│ skills/ │ opencode/ └──────────── └───────── -│ settings.json │ skills/ -│ plugins │ plugins/ -└───────────── └─────────── -``` - -## Implementation Guide - -### Phase 1: Populate Agent-Deck as Source of Truth - -**Step 1.1 — Create agent-deck skill pool and register sources:** - -```bash -# Create pool directory -mkdir -p ~/.agent-deck/skills/pool - -# Register devflow skills as a source -agent-deck skill source add devflow ~/dev/devflow/devflow-plugin/skills 2>/dev/null || true - -# Register superpowers as a source (Claude Code has these as a plugin, but agent-deck needs them) -agent-deck skill source add superpowers ~/.claude/plugins/cache/claude-plugins-official/superpowers/*/skills 2>/dev/null || true - -# Symlink devflow plugin skills into the pool -ln -sf ~/dev/devflow/devflow-plugin/skills/* ~/.agent-deck/skills/pool/ 2>/dev/null || true -ln -sf ~/dev/devflow/devflow-plugin/commands ~/.agent-deck/skills/pool/devflow-commands 2>/dev/null || true -``` - -**Step 1.2 — Populate agent-deck MCP config:** - -Add to `~/.agent-deck/config.toml`: -```toml -[mcps.hindsight] -type = "http" -url = "http://localhost:8888/mcp/" - -[mcp_pool] -enabled = true -auto_start = true -pool_all = true -exclude_mcps = [] -fallback_to_stdio = true -show_pool_status = true -``` - -**Step 1.3 — Enable MCP pool:** - -The pool shares MCP processes across sessions via Unix sockets. With `pool_all = true`, all MCPs defined in `[mcps.*]` are pooled. This reduces N sessions × M MCPs = N×M processes → M shared processes. - -### Phase 2: Create `devflow sync` Command - -**Step 2.1 — Create `lib/sync.sh`:** - -```bash -devflow_sync() { - section "Syncing devflow config across layers" - - # ── Read agent-deck as source of truth ────────────────────── - local ad_config="${HOME}/.agent-deck/config.toml" - [[ -f "$ad_config" ]] || die "Agent-deck config not found: $ad_config" - - # ── Sync MCPs ─────────────────────────────────────────────── - sync_mcps_to_claude_code - sync_mcps_to_opencode - - # ── Sync Skills ───────────────────────────────────────────── - sync_skills_to_claude_code - sync_skills_to_opencode - # ── Sync Worktrunk config ─────────────────────────────────── - sync_worktrunk_config - - # ── Report ────────────────────────────────────────────────── - sync_status -} -``` - -**Step 2.2 — Implement sync functions:** - -`sync_mcps_to_claude_code()`: -- Read `[mcps.*]` from agent-deck config.toml -- Parse each MCP entry (type, url/command, args) -- Use `claude mcp add -s user ` for HTTP MCPs -- Use `claude mcp add -s user -- ` for stdio MCPs -- Skip MCPs already registered (check `claude mcp list`) - -`sync_mcps_to_opencode()`: -- Read `[mcps.*]` from agent-deck config.toml -- Write to `~/.config/opencode/opencode.json` `mcpServers` section -- Use `jq` or `python3` for JSON manipulation -- Preserve existing opencode.json fields - -`sync_skills_to_claude_code()`: -- Read agent-deck skill pool at `~/.agent-deck/skills/pool/` -- Symlink each skill directory to `~/.claude/skills/` (if not already there) -- Skip skills that are already delivered via plugins (superpowers) - -`sync_skills_to_opencode()`: -- Read agent-deck skill pool -- Symlink to `~/.config/opencode/skills/` (if not already there) - -`sync_worktrunk_config()`: -- If `~/.config/worktrunk/config.toml` doesn't exist, generate from devflow template -- Set defaults: worktree-path pattern, LLM commit command, merge behavior -- Register devflow hooks: post-create (copy-ignored), post-remove (cleanup) - -**Step 2.3 — `sync_status` function:** - -Show a matrix of what's synced and what's drifted: -``` -Layer Skills MCPs Hooks Config -───────────── ────── ────── ────── ────── -Agent Deck ✓ 11 ✓ 1 ✓ ✓ -Claude Code ✓ 4 ✓ 1 ✓ ✓ -OpenCode ✗ 0 ✗ 0 — ✗ -Code Review — — — ✓ -Worktrunk — — ✗ 0 ✗ -Langfuse — — — n/a -``` - -### Phase 3: Wire Into Devflow - -**Step 3.1 — Add to `bin/devflow`:** -```bash -sync) devflow_sync "$@" ;; -``` - -**Step 3.2 — Add `--sync` step to `devflow init`:** -After all layers are configured, run `devflow sync` as the final step. - -**Step 3.3 — Add sync to `devflow up`:** -After services are healthy, run `devflow sync --quiet` to ensure everything is aligned. - -**Step 3.4 — Wrap `devflow check` for code review:** -Update `lib/check.sh` to pass review flags: -```bash -devflow_check() { - local review_flags="" - # Read check rules from devflow config - # ... parse checks config ... - devflow check $review_flags "$@" -} -``` - -### Phase 4: Langfuse Config Export (Optional Enhancement) - -If Langfuse has prompt templates or eval configs configured via the web UI: -- Add `devflow sync --export-langfuse` that calls Langfuse API to export: - - Prompt templates → `config/langfuse/prompts/` - - Evaluation configs → `config/langfuse/evaluations/` -- Add `devflow sync --import-langfuse` to restore from exported files -- This enables version-controlling Langfuse config in the devflow repo - -## Acceptance Criteria - -- [ ] `~/.agent-deck/skills/pool/` exists and contains devflow skills -- [ ] Agent-deck skill sources include `devflow` and `superpowers` -- [ ] Agent-deck `[mcps.hindsight]` is configured in live config.toml -- [ ] Agent-deck `[mcp_pool]` is enabled with `pool_all = true` -- [ ] `devflow sync` propagates MCPs to Claude Code (`~/.claude.json`) -- [ ] `devflow sync` propagates MCPs to OpenCode (`opencode.json`) -- [ ] `devflow sync` propagates skills to Claude Code (`~/.claude/skills/`) -- [ ] `devflow sync` propagates skills to OpenCode (`~/.config/opencode/skills/`) -- [ ] `devflow sync --status` shows sync state matrix -- [ ] `~/.config/worktrunk/config.toml` is generated with devflow defaults -- [ ] `devflow check` passes appropriate flags for code review with Hindsight access -- [ ] `devflow init` runs sync as final step -- [ ] `devflow up` runs quiet sync after services are healthy -- [ ] Agent-deck TUI Skill Manager (`s` key) shows devflow skills -- [ ] Agent-deck TUI MCP Manager (`m` key) shows Hindsight - -## Technical Notes - -- **Agent-deck cannot write to OpenCode or Claude Code configs natively.** The sync command bridges this gap. -- **Code Review has no persistent MCP config.** Wrapping `devflow check` with flags is the only reliable approach. -- **Worktrunk hooks use Jinja2 template variables** (`{{ branch }}`, `{{ repo }}`, etc.). Leverage these for automatic setup. -- **Cross-agent skills at `~/.agents/skills/`** are managed by the skills CLI with `.skill-lock.json`. Don't overwrite — only add missing ones. -- **Langfuse export is optional** — many users won't have custom prompts/evals yet. -- **Idempotent:** `devflow sync` must be safe to run repeatedly. Use symlinks where possible (single source, multiple targets). Check before writing. -- **Drift detection:** Compare timestamps or checksums of synced files to detect when manual changes have diverged from agent-deck source. - -## Verification - -```bash -# After running devflow sync: - -# 1. Check agent-deck skill pool -ls ~/.agent-deck/skills/pool/ - -# 2. Check agent-deck MCP config -grep -A2 '\[mcps.hindsight\]' ~/.agent-deck/config.toml - -# 3. Check Claude Code MCPs -claude mcp list 2>&1 | grep hindsight - -# 4. Check OpenCode MCPs -cat ~/.config/opencode/opencode.json | python3 -c "import json,sys; print(json.load(sys.stdin).get('mcpServers',{}).keys())" - -# 5. Check skills in all targets -ls ~/.claude/skills/ -ls ~/.config/opencode/skills/ -# (Code review checks are managed via devflow check — no separate skills dir needed) - -# 6. Check worktrunk config -cat ~/.config/worktrunk/config.toml - -# 7. Full status -devflow sync --status -``` diff --git a/tasks/P0/ARCH-visualization-update-hook.md b/tasks/P0/ARCH-visualization-update-hook.md index d0fc2e3..d134f52 100644 --- a/tasks/P0/ARCH-visualization-update-hook.md +++ b/tasks/P0/ARCH-visualization-update-hook.md @@ -74,10 +74,8 @@ The visualizations live in the devflow project as the source of truth: │ └── sync-architecture.md # Skills/MCP sync flow (future) ├── workflows/ # Process flow diagrams │ ├── development-workflow.md # Full SDD workflow: idea → MR -│ ├── devflow-work-flow.md # The devflow work command flow (future) -│ └── session-lifecycle.md # Session create → conductor → done → cleanup (future) +│ └── session-lifecycle.md # Session create → phase-handoff spawn → done → cleanup (future) ├── integrations/ # Tool-specific integration diagrams -│ ├── agent-deck-integration.md # How agent-deck connects to everything (future) │ ├── hindsight-data-flow.md # Memory recall/retain/reflect patterns (future) │ └── langfuse-trace-flow.md # What gets traced and where (future) └── decisions/ # Visual ADRs @@ -153,12 +151,11 @@ Session ending. Consider running /devflow:update-visualizations to update archit | Component | Color | Hex | |-----------|-------|-----| | Hindsight | Purple | `#7c3aed` | -| Agent Deck | Blue | `#3b82f6` | | Worktrunk | Green | `#059669` | | Code Review | Amber | `#d97706` | | Skills/Marketplace | Pink | `#be185d` | | Langfuse | Cyan | `#0891b2` | -| Conductor | Amber variant | `#f59e0b` | +| Phase-handoff / spawn | Amber variant | `#f59e0b` | | CLI/Terminal | Gray | `#374151` | **Init block** (every mermaid diagram): diff --git a/tasks/P1/ARCH-agent-spawning-consistency.md b/tasks/P1/ARCH-agent-spawning-consistency.md deleted file mode 100644 index f45a1c6..0000000 --- a/tasks/P1/ARCH-agent-spawning-consistency.md +++ /dev/null @@ -1,154 +0,0 @@ ---- -id: ARCH-agent-spawning-consistency -title: "Agent Spawning Consistency Through Agent-Deck" -priority: P1 -category: architecture -status: open -depends_on: [] -estimated_effort: M -files_to_touch: - - bin/devflow - - lib/worktree.sh - - lib/init.sh - - skills/worktree-flow/new-feature.md - - skills/worktree-flow/finish-feature.md - - templates/CLAUDE.md.tmpl ---- - -# Agent Spawning Consistency Through Agent-Deck - -## Context - -Devflow spawns AI agents from multiple places in the codebase — the `devflow worktree` command with its `--agent` flag, direct invocations in `bin/devflow`, and potentially from init scripts. Currently these spawn points use raw `claude` or `opencode` commands directly, bypassing agent-deck entirely. This means spawned sessions are invisible to agent-deck's tracking, miss memory/observability layers (Hindsight, Langfuse), and aren't organized into groups. - -Agent-deck is the session management layer (Layer 3) that provides visibility, group organization, MCP connection pooling, and lifecycle management. Every agent session MUST pass through it. - -## Problem Statement - -Agent spawning is inconsistent and fragmented: - -1. Some spawn points call `claude` or `opencode` directly instead of going through `agent-deck add` -2. The `--agent ` flag on `devflow worktree` uses opt-in logic — agents should be the DEFAULT, with `--no-agent` as the opt-out -3. Spawned agents don't get assigned to the correct agent-deck group (project/features, project/bugfixes, etc.) -4. Sessions launched outside agent-deck are invisible — no tracking, no memory injection, no observability - -## Desired Outcome - -- **Every** agent spawn in devflow goes through `agent-deck add -c -g ` -- `devflow worktree` launches an agent by default; the flag becomes `--no-agent` (inverse logic) -- Each launch auto-detects the correct group from the branch prefix (`feat/` → features, `fix/` → bugfixes, `chore/` → chores, etc.) -- Sessions are visible in `agent-deck list`, receive MCP connections (Hindsight), and are logged - -## Implementation Guide - -### Step 1: Audit all agent spawn points - -Search these files for any direct invocation of `claude`, `opencode`, or any AI tool binary: - -- `bin/devflow` — look for `exec`, `command`, or subprocess calls that launch an AI tool -- `lib/worktree.sh` — the `--agent` flag handling -- `lib/init.sh` — any post-init agent launch -- `skills/worktree-flow/new-feature.md` — instructions that tell the agent to spawn sub-agents -- `skills/worktree-flow/finish-feature.md` — same - -Document every spawn point found. - -### Step 2: Create a shared spawn function - -In `lib/worktree.sh` (or a new `lib/agent.sh` if separation is cleaner), create: - -```bash -# Spawns an agent through agent-deck. Never call claude/opencode directly. -# Usage: devflow_spawn_agent -devflow_spawn_agent() { - local path="$1" - local tool="${2:-claude}" - local group="$3" - - if ! command -v agent-deck &>/dev/null; then - echo "ERROR: agent-deck not found. Install it first." >&2 - return 1 - fi - - agent-deck add "$path" -c "$tool" ${group:+-g "$group"} -} -``` - -### Step 3: Invert the agent flag logic - -In `lib/worktree.sh`, change: - -- **Before:** `--agent ` opts IN to launching an agent -- **After:** Agent launch is DEFAULT. Add `--no-agent` flag that opts OUT - -```bash -# Old: -# devflow worktree feat/foo --agent claude -# New: -# devflow worktree feat/foo # launches agent by default -# devflow worktree feat/foo --no-agent # skips agent launch -``` - -### Step 4: Auto-detect group from branch prefix - -```bash -detect_group() { - local branch="$1" - local project="$2" # e.g., "messaging" - - case "$branch" in - feat/*|feature/*) echo "${project}/features" ;; - fix/*|bugfix/*) echo "${project}/bugfixes" ;; - review/*|cr/*) echo "${project}/reviews" ;; - chore/*|refactor/*) echo "${project}/chores" ;; - *) echo "${project}" ;; # fallback to project root group - esac -} -``` - -### Step 5: Replace all raw spawns - -Go through every spawn point found in Step 1 and replace with calls to `devflow_spawn_agent`. Ensure every call passes the correct group. - -### Step 6: Update skill documents - -Update `new-feature.md` and `finish-feature.md` to reference the new default behavior and `--no-agent` flag. - -## Acceptance Criteria - -- [ ] `grep -r 'claude\|opencode' bin/ lib/` returns ZERO direct invocations outside of the spawn function and tool-detection logic -- [ ] `devflow worktree feat/test-branch` creates a worktree AND launches an agent-deck session without any `--agent` flag -- [ ] `devflow worktree feat/test-branch --no-agent` creates a worktree WITHOUT launching an agent -- [ ] The launched session appears in `agent-deck list` with the correct group assignment -- [ ] Branch prefix `feat/` maps to `/features` group -- [ ] Branch prefix `fix/` maps to `/bugfixes` group -- [ ] Skills documents (`new-feature.md`, `finish-feature.md`) reference the updated flag behavior -- [ ] A session launched through devflow has Hindsight MCP available (verify with `agent-deck inspect `) - -## Technical Notes - -- agent-deck's `add` command syntax: `agent-deck add -c [-g ] [-n ]` -- The tool flag (`-c`) should default to whatever the user configured in `devflow init` or fall back to `claude` -- If agent-deck is not installed, the command should fail loudly with an install instruction — never silently fall back to raw `claude` -- Consider storing the default tool preference in `~/.config/devflow/config.toml` or similar - -## Verification - -```bash -# 1. Create a test worktree with default agent -devflow worktree feat/test-spawn-001 - -# 2. Verify session exists in agent-deck -agent-deck list | grep "test-spawn-001" - -# 3. Verify group assignment -agent-deck inspect | grep "features" - -# 4. Verify no-agent flag works -devflow worktree feat/test-spawn-002 --no-agent -agent-deck list | grep -v "test-spawn-002" # should NOT appear - -# 5. Cleanup -wt drop feat/test-spawn-001 -wt drop feat/test-spawn-002 -``` diff --git a/tasks/P1/ARCH-detached-head-worktrees.md b/tasks/P1/ARCH-detached-head-worktrees.md index 9cbabb3..8cffad3 100644 --- a/tasks/P1/ARCH-detached-head-worktrees.md +++ b/tasks/P1/ARCH-detached-head-worktrees.md @@ -19,7 +19,7 @@ Git only allows one worktree per branch. If any worktree has `main` checked out, no other worktree can use it. This blocks operations like `git checkout main` in other worktrees and can interfere with worktree creation flows. -Current state: `devflow.feat-wrap-superpowers-skills` has `main` checked out, +Current state: a feature worktree (e.g. `~/dev/.worktrees/devflow/feat-some-feature`) has `main` checked out, blocking all other worktrees from using main directly. ## Industry Pattern diff --git a/tasks/P1/ARCH-devflow-work-entry-point.md b/tasks/P1/ARCH-devflow-work-entry-point.md deleted file mode 100644 index 0b37638..0000000 --- a/tasks/P1/ARCH-devflow-work-entry-point.md +++ /dev/null @@ -1,325 +0,0 @@ ---- -id: ARCH-devflow-work-entry-point -title: "Development Workflow Entry Point (devflow work)" -priority: P1 -category: architecture -status: open -depends_on: - - ARCH-agent-spawning-consistency - - ARCH-forgotten-items-previous-impl -estimated_effort: L -files_to_touch: - - lib/work.sh - - bin/devflow ---- - -# Development Workflow Entry Point (`devflow work`) - -## Context - -Devflow currently has `devflow init` for one-time project setup but lacks a daily-driver command for starting feature work. Developers need a single command that creates an isolated worktree, launches an AI agent session through agent-deck, assigns it to the correct group, and bootstraps the session with context — all in one step. - -This is the most frequently used command in the devflow workflow. It must be fast, reliable, and ergonomic. - -### `devflow init` vs `devflow work` — clear separation: - -| Aspect | `devflow init` | `devflow work` | -| ----------------- | ------------------------------------------------------------------ | -------------------------------------------- | -| **Frequency** | Once per project | Every feature/task | -| **Purpose** | Install tools, configure CLAUDE.md, create groups, install plugins | Create worktree, launch agent, start working | -| **Scope** | Project-level setup | Branch-level work session | -| **Prerequisites** | None | `devflow init` already run | - -## Problem Statement - -Starting a new feature currently requires multiple manual steps: - -1. `wt step -c feat/MES-1234` — create worktree with copy-ignored -2. `agent-deck add -c claude -g /features` — create session -3. Navigate to the session or open it -4. Manually tell the agent what to work on - -This should be ONE command. - -## Desired Outcome - -`devflow work feat/MES-1234` does everything: creates worktree, launches tracked agent session in the correct group, and seeds it with initial context. The developer goes from "I have a ticket" to "agent is working on it" in one command. - -## Implementation Guide - -### Step 1: Create `lib/work.sh` - -```bash -#!/usr/bin/env bash -# devflow work — primary entry point for starting feature work - -set -euo pipefail - -DEVFLOW_DIR="$(dirname "$(dirname "$(realpath "$0")")")" -source "$DEVFLOW_DIR/lib/utils.sh" # shared utilities - -# Defaults -DEFAULT_TOOL="claude" -NO_AGENT=false -TOOL="$DEFAULT_TOOL" -GROUP="" -INITIAL_MESSAGE="" - -usage() { - cat < [options] - -Start a new work session: creates worktree, launches agent, assigns to group. - -Arguments: - Branch name or ticket ID. Examples: - feat/MES-1234 → creates feat/MES-1234 branch - MES-1234 → auto-prefixed to feat/MES-1234 - fix/MES-5678 → creates fix/MES-5678 branch - -Options: - --no-agent Create worktree without launching an agent - --tool AI tool to use (default: claude). Options: claude, opencode - --group Override auto-detected group - --message Custom initial message for the agent - -h, --help Show this help - -Examples: - devflow work feat/MES-1234 - devflow work MES-1234 # auto-prefixed to feat/ - devflow work fix/MES-5678 --tool opencode - devflow work feat/MES-1234 --no-agent - devflow work feat/MES-1234 --message "Focus on the API layer only" -EOF -} - -# Parse the branch name and auto-prefix if needed -normalize_branch() { - local input="$1" - case "$input" in - feat/*|feature/*|fix/*|bugfix/*|chore/*|refactor/*|review/*|cr/*) - echo "$input" - ;; - *) - # Auto-prefix with feat/ if no recognized prefix - echo "feat/$input" - ;; - esac -} - -# Detect the agent-deck group from branch prefix -detect_group() { - local branch="$1" - local project - project="$(basename "$(git rev-parse --show-toplevel 2>/dev/null || pwd)")" - - case "$branch" in - feat/*|feature/*) echo "${project}/features" ;; - fix/*|bugfix/*) echo "${project}/bugfixes" ;; - review/*|cr/*) echo "${project}/reviews" ;; - chore/*|refactor/*) echo "${project}/chores" ;; - *) echo "${project}" ;; - esac -} - -# Generate the initial agent message -generate_initial_message() { - local branch="$1" - local project="$2" - local custom_msg="$3" - - if [[ -n "$custom_msg" ]]; then - echo "$custom_msg" - else - cat </dev/null || pwd)")" - - echo "→ Starting work session: $branch" - - # Step 1: Create worktree - echo "→ Creating worktree..." - wt step -c "$branch" - local worktree_path - worktree_path="$(wt path "$branch" 2>/dev/null || git worktree list | grep "$branch" | awk '{print $1}')" - - if [[ -z "$worktree_path" ]]; then - echo "ERROR: Could not determine worktree path for $branch" >&2 - exit 1 - fi - - echo " Worktree created at: $worktree_path" - - # Step 2: Launch agent (unless --no-agent) - if [[ "$NO_AGENT" == "true" ]]; then - echo "→ Skipping agent launch (--no-agent)" - echo "" - echo "Worktree ready at: $worktree_path" - echo "To launch an agent later: agent-deck add $worktree_path -c $TOOL" - return 0 - fi - - # Detect group - local group="${GROUP:-$(detect_group "$branch")}" - - echo "→ Launching agent session..." - echo " Tool: $TOOL" - echo " Group: $group" - - # Launch through agent-deck - agent-deck add "$worktree_path" -c "$TOOL" -g "$group" - - # Step 3: Send initial message (if supported) - local msg - msg="$(generate_initial_message "$branch" "$project" "$INITIAL_MESSAGE")" - - echo "" - echo "Session started. Initial context:" - echo " $msg" - echo "" - echo "Use 'agent-deck list' to see active sessions." - echo "Use 'devflow done $branch' when finished." -} - -main "$@" -``` - -### Step 2: Register in `bin/devflow` - -Add the `work` subcommand to the main devflow dispatcher: - -```bash -# In bin/devflow, in the case statement: -work) - shift - source "$DEVFLOW_DIR/lib/work.sh" - main "$@" - ;; -``` - -Also update the help text to list `work` as a command: - -``` -Commands: - init One-time project setup - work Start a new work session (create worktree + launch agent) - done Finish a work session (cleanup + PR) - ... -``` - -### Step 3: Handle edge cases - -1. **Branch already exists**: If the worktree/branch already exists, `wt step` will fail. Detect this and offer to hop to the existing worktree instead: - - ```bash - if wt list | grep -q "$branch"; then - echo "Worktree for $branch already exists. Hopping to it..." - wt hop "$branch" - # Still launch agent if needed - fi - ``` - -2. **Agent-deck not running**: If agent-deck is not running, start it or give a clear error: - - ```bash - if ! agent-deck status &>/dev/null; then - echo "Starting agent-deck..." - agent-deck start - fi - ``` - -3. **Not in a git repo**: Fail early with a clear message if not in a git repository. - -4. **devflow init not run**: Check for devflow initialization markers and suggest running `devflow init` first. - -### Step 4: Auto-detection heuristics - -For ticket-ID-only input (e.g., `devflow work MES-1234`): - -- Default prefix is `feat/` -- Could be configurable in project config: `default_branch_prefix = "feat/"` -- Consider looking up the ticket type from Jira/Linear to auto-detect fix vs feat (future enhancement) - -## Acceptance Criteria - -- [ ] `devflow work feat/MES-1234` creates a worktree AND launches an agent-deck session -- [ ] `devflow work MES-1234` auto-prefixes to `feat/MES-1234` -- [ ] `devflow work fix/MES-5678` correctly assigns to `/bugfixes` group -- [ ] `devflow work feat/MES-1234 --no-agent` creates worktree only, no agent -- [ ] `devflow work feat/MES-1234 --tool opencode` launches with opencode instead of claude -- [ ] `devflow work feat/MES-1234 --group custom/group` overrides auto-detected group -- [ ] `devflow work feat/MES-1234 --message "Focus on tests"` passes custom message -- [ ] Running `devflow work` with no arguments shows help -- [ ] If branch worktree already exists, the command hops to it instead of erroring -- [ ] Session appears in `agent-deck list` with correct group and path -- [ ] `bin/devflow --help` lists the `work` command with a description - -## Technical Notes - -- `wt step -c` creates a worktree AND copies gitignored files (node_modules, .env, etc.) — this is critical for the worktree to be immediately functional -- `wt path ` may not exist in all worktrunk versions — fall back to parsing `git worktree list` -- agent-deck's `add` command syntax may accept the initial message — check `agent-deck help add`. If not, the message may need to be sent via `agent-deck send ` or `agent-deck exec -- echo ` -- The `detect_group` function assumes the project name is the git root directory name. This may not match the agent-deck project name if it was configured differently during `devflow init`. -- Consider storing the project name mapping in `.devflow/config.toml` or deriving from `agent-deck project list` - -## Verification - -```bash -# 1. Basic work session -devflow work feat/test-work-001 -agent-deck list | grep "test-work-001" -# Expected: session visible in features group - -# 2. Auto-prefix -devflow work TEST-002 -git worktree list | grep "feat/TEST-002" -# Expected: branch auto-prefixed - -# 3. No-agent mode -devflow work feat/test-work-003 --no-agent -agent-deck list | grep -v "test-work-003" -git worktree list | grep "test-work-003" -# Expected: worktree exists, no session - -# 4. Custom tool -devflow work feat/test-work-004 --tool opencode -agent-deck inspect | grep "opencode" -# Expected: session uses opencode - -# 5. Cleanup -for b in feat/test-work-001 feat/TEST-002 feat/test-work-003 feat/test-work-004; do - wt drop "$b" 2>/dev/null -done -``` diff --git a/tasks/P1/ARCH-forgotten-items-previous-impl.md b/tasks/P1/ARCH-forgotten-items-previous-impl.md deleted file mode 100644 index 9f93de4..0000000 --- a/tasks/P1/ARCH-forgotten-items-previous-impl.md +++ /dev/null @@ -1,181 +0,0 @@ ---- -id: ARCH-forgotten-items-previous-impl -title: "Forgotten Items From Previous Implementation" -priority: P1 -category: architecture -status: open -depends_on: [] -estimated_effort: L -files_to_touch: - - bin/devflow - - lib/init.sh - - lib/worktree.sh - - devflow-plugin/skills/** - - visualizations/** ---- - -# Forgotten Items From Previous Implementation - -## Context - -The previous implementation round delivered the core devflow structure but left several items incomplete or incorrectly implemented. These are not new features — they are gaps in what was already specified and partially built. Three distinct areas need attention: agent-deck group/session wiring, Layer 5 marketplace identity, and visualization file organization. - -## Problem Statement - -### 1. Groups and Sessions — Not Wired End-to-End - -`devflow init` may create agent-deck groups (project, project/features, project/bugfixes, project/reviews), but it's unverified whether: - -- The groups actually get created successfully -- `devflow worktree --agent` assigns sessions to the correct group based on branch prefix -- The group hierarchy works as expected in `agent-deck list` - -### 2. Marketplace as Layer 5 — Incomplete Identity - -The generated plugin at `devflow-plugin/` IS Layer 5 (Plugin Marketplace), but this identity isn't reflected everywhere: - -- `bin/devflow` help text may still say "CLAUDE.md" for Layer 5 instead of "Plugin Marketplace" -- Visualization files don't reference the marketplace -- `devflow skills convert` should produce the canonical Layer 5 artifact -- `devflow init` doesn't offer to install the plugin via `claude plugin install` - -### 3. Visualization Files — Wrong Location and Naming - -Visualization files currently live in `~/dev/aircall/visualizations/` mixed with numbered messaging knowledge-base files. Devflow visualizations should: - -- Live in `/Users/andrejorgelopes/dev/devflow/visualizations/` as source of truth -- Be symlinked to `~/dev/aircall/visualizations/devflow/` (a dedicated subfolder) -- NOT use numbered prefixes (`10-`, `11-`) — those are a messaging convention -- Use descriptive names (`devflow-ecosystem.md`, `development-workflow.md`) - -## Desired Outcome - -1. `devflow init` reliably creates agent-deck groups, and sessions are correctly assigned -2. Layer 5 is consistently referred to as "Plugin Marketplace" across all devflow artifacts -3. Visualization files live in the devflow repo with descriptive names and are symlinked to the shared location - -## Implementation Guide - -### Part 1: Groups and Sessions Audit - -#### Step 1: Test group creation in `devflow init` - -Run `devflow init` in a test project and verify: - -```bash -agent-deck group list | grep "" -``` - -If groups aren't created, find the relevant code in `lib/init.sh` and fix it. - -#### Step 2: Verify session-to-group assignment - -In `lib/worktree.sh`, trace the code path from branch creation to `agent-deck add`. Confirm the `-g` flag is passed with the correct group derived from the branch prefix: - -- `feat/MES-123` → `-g /features` -- `fix/MES-456` → `-g /bugfixes` -- `review/MES-789` → `-g /reviews` - -#### Step 3: Test end-to-end - -```bash -devflow worktree feat/test-groups --agent claude -agent-deck list # should show session in /features -``` - -### Part 2: Marketplace as Layer 5 - -#### Step 1: Update `bin/devflow` help text - -Find the layer listing in `bin/devflow` (likely in a help or info subcommand). Change Layer 5 from whatever it currently says to: - -``` -Layer 5: Plugin Marketplace (devflow-plugin/) -``` - -#### Step 2: Update visualization references - -Any visualization file that depicts the layer stack should reference "Plugin Marketplace" for Layer 5. - -#### Step 3: Update `devflow init` to offer plugin install - -At the end of `devflow init`, add: - -```bash -echo "Install devflow plugin for Claude Code?" -read -r answer -if [[ "$answer" =~ ^[Yy] ]]; then - claude plugin install ./devflow-plugin -fi -``` - -#### Step 4: Ensure `devflow skills convert` output is Layer 5 artifact - -The convert command should output skills in the format expected by the plugin marketplace, not just raw markdown. - -### Part 3: Visualization File Relocation - -#### Step 1: Create the devflow visualizations directory - -```bash -mkdir -p /Users/andrejorgelopes/dev/devflow/visualizations -``` - -#### Step 2: Move/rename visualization files - -Move any devflow-related visualization files from `~/dev/aircall/visualizations/` to `/Users/andrejorgelopes/dev/devflow/visualizations/`. Rename them: - -- Remove numbered prefixes -- Use descriptive kebab-case names: `devflow-ecosystem.md`, `development-workflow.md`, `layer-architecture.md` - -#### Step 3: Create symlinks - -```bash -mkdir -p ~/dev/aircall/visualizations/devflow -ln -sf /Users/andrejorgelopes/dev/devflow/visualizations/*.md ~/dev/aircall/visualizations/devflow/ -``` - -#### Step 4: Update any references - -Search the devflow codebase for hardcoded paths to the old visualization locations and update them. - -## Acceptance Criteria - -- [ ] `devflow init` creates agent-deck groups: ``, `/features`, `/bugfixes`, `/reviews` -- [ ] `agent-deck group list` shows the groups after init -- [ ] Sessions created via `devflow worktree` appear in the correct group in `agent-deck list` -- [ ] `bin/devflow` help text references "Plugin Marketplace" for Layer 5 -- [ ] `devflow init` offers to install the devflow plugin -- [ ] Visualization files live in `/Users/andrejorgelopes/dev/devflow/visualizations/` with descriptive names (no numbered prefixes) -- [ ] Symlinks exist at `~/dev/aircall/visualizations/devflow/` pointing to the source files -- [ ] No devflow visualization files remain directly in `~/dev/aircall/visualizations/` (only in the `devflow/` subfolder) - -## Technical Notes - -- agent-deck group creation syntax: `agent-deck group create ` — verify exact syntax from `agent-deck help group` -- The `claude plugin install` command may not exist yet or may have different syntax — check Claude Code docs. If it doesn't exist, use whatever the current plugin installation mechanism is. -- When moving visualization files, check git history to preserve authorship if they're tracked -- The numbered prefix convention (`10-`, `11-`) is specific to the messaging knowledge base visualization system — devflow should NOT adopt it - -## Verification - -```bash -# Part 1: Groups -devflow init # in a test project -agent-deck group list -# Expected: project groups visible - -# Part 2: Layer 5 -devflow --help | grep -i "marketplace\|layer 5" -# Expected: "Plugin Marketplace" mentioned - -# Part 3: Visualizations -ls /Users/andrejorgelopes/dev/devflow/visualizations/ -# Expected: descriptive names, no numbered prefixes - -ls -la ~/dev/aircall/visualizations/devflow/ -# Expected: symlinks pointing to devflow/visualizations/ - -ls ~/dev/aircall/visualizations/ | grep -E "^[0-9]+-.*devflow" -# Expected: no results (no numbered devflow files in root) -``` diff --git a/tasks/P1/ARCH-session-task-completion.md b/tasks/P1/ARCH-session-task-completion.md deleted file mode 100644 index e54038b..0000000 --- a/tasks/P1/ARCH-session-task-completion.md +++ /dev/null @@ -1,340 +0,0 @@ ---- -id: ARCH-session-task-completion -title: "Session/Task Completion Command" -priority: P1 -category: architecture -status: open -depends_on: - - ARCH-devflow-work-entry-point -estimated_effort: L -files_to_touch: - - lib/done.sh - - bin/devflow - - skills/process-discipline/done.md - - skills/registry.json ---- - -# Session/Task Completion Command - -## Context - -Devflow has a clear entry point for starting work (`devflow work`) but no corresponding exit point. When a task is finished, the developer currently has to manually: run verification, commit, push, create a PR, retain learnings, close the agent session, and clean up the worktree. This should be streamlined into two complementary pieces: - -1. **Agent-side skill** (`done.md`) — the agent runs verification, commits, pushes, creates a PR, and retains learnings -2. **CLI-side command** (`devflow done`) — the developer runs cleanup: merge worktree, delete session, remove temp files - -The skill handles everything INSIDE the agent session. The CLI handles everything OUTSIDE (terminal-level cleanup after the session ends). - -## Problem Statement - -Task completion is fragmented and error-prone: - -- Developers forget to run verification before pushing -- Code review checks (`devflow check`) are skipped -- Session learnings aren't retained to Hindsight (knowledge is lost) -- Worktrees accumulate because cleanup is manual -- Agent sessions linger in agent-deck after the work is done -- No summary is logged to Langfuse for observability - -## Desired Outcome - -A clean, repeatable task completion flow: - -1. Agent runs the `done` skill → verification, commit, push, PR, retain learnings -2. Developer runs `devflow done` → merge worktree, close session, cleanup - -## Implementation Guide - -### Part 1: The Agent-Side Skill (`done.md`) - -Create `skills/process-discipline/done.md`: - -````markdown ---- -name: done -description: "Complete a task: verify, commit, push, create PR, retain learnings" -trigger: "When the user says the task is done, or asks to finish/complete/wrap up" -type: rigid ---- - -# Task Completion Skill - -When the user indicates a task is complete, follow this sequence exactly. -Do NOT skip steps. Do NOT claim completion until all steps pass. - -## Step 1: Verification - -Run all relevant verification commands. ALL must pass before proceeding. - -1. **Tests**: Run the project's test command (e.g., `yarn test`, `npm test`, `pytest`) - - Only run tests for affected files if the project supports targeted testing - - If tests fail: fix them. Do not proceed until they pass. - -2. **Lint**: Run the project's lint command (e.g., `yarn lint`, `npm run lint`) - - Auto-fix if possible. Manual fix if auto-fix fails. - -3. **Type check / Build**: Run the build command (e.g., `yarn build`, `tsc --noEmit`) - - Fix all type errors before proceeding. - -4. **Code review checks**: Run `devflow check` if available - - Address any findings. These are automated review rules. - -If ANY verification step fails, fix the issue and re-run. Do not skip. - -## Step 2: Commit - -1. Stage all changes: `git add -A` -2. Review what's staged: `git status` and `git diff --cached` -3. Ensure no secrets or credentials are staged (check for `.env`, `credentials.*`, API keys) -4. Write a clear commit message following the project's conventions -5. Commit: `git commit -m ""` - -If there are multiple logical changes, create multiple commits (one per concern). - -## Step 3: Push - -```bash -git push -u origin HEAD -``` -```` - -If the push is rejected (e.g., force push needed), STOP and ask the developer. Never force push without explicit permission. - -## Step 4: Create Pull Request - -Detect the VCS provider from the git remote and create a PR: - -- **GitHub**: `gh pr create --title "" --body "<body>"` -- **GitLab**: `glab mr create --title "<title>" --description "<body>"` - -The PR body should include: - -- Summary of changes (2-3 bullet points) -- Link to the ticket/issue if identifiable from the branch name -- Any notable decisions or trade-offs - -If a PR already exists for this branch, skip creation and provide the URL. - -## Step 5: Retain Learnings - -Use Hindsight to retain what was learned during this session: - -``` -retain("<project>: <what was done and any important decisions>", tags=["<project>", "feature"]) -retain("<project>: <any gotchas or non-obvious patterns discovered>", tags=["<project>", "gotcha"]) -``` - -Only retain things that would be useful in future sessions. Skip if nothing novel was learned. - -## Step 6: Summary - -Output a completion summary: - -``` -## Task Complete ✓ - -**Branch**: feat/MES-1234 -**PR**: https://github.com/org/repo/pull/123 -**Commits**: 3 -**Verification**: All passed (tests, lint, build, devflow check) - -### To clean up the worktree, run: -devflow done feat/MES-1234 -``` - -Always tell the developer the `devflow done` command to run for cleanup. - -```` - -### Part 2: The CLI Command (`devflow done`) - -Create `lib/done.sh`: - -```bash -#!/usr/bin/env bash -# devflow done — clean up a completed work session - -set -euo pipefail - -usage() { - cat <<EOF -Usage: devflow done <branch-name> [options] - -Clean up a completed work session: close agent session, merge/remove worktree. - -Arguments: - <branch-name> The branch to clean up (e.g., feat/MES-1234) - -Options: - --keep-branch Don't delete the remote branch after merge - --no-merge Remove worktree without merging (discard changes) - --force Force cleanup even if there are uncommitted changes - -h, --help Show this help - -Examples: - devflow done feat/MES-1234 - devflow done feat/MES-1234 --keep-branch - devflow done feat/MES-1234 --no-merge # discard work -EOF -} - -main() { - if [[ $# -eq 0 ]]; then - usage - exit 1 - fi - - local branch="$1" - shift - - local keep_branch=false - local no_merge=false - local force=false - - while [[ $# -gt 0 ]]; do - case "$1" in - --keep-branch) keep_branch=true; shift ;; - --no-merge) no_merge=true; shift ;; - --force) force=true; shift ;; - -h|--help) usage; exit 0 ;; - *) echo "Unknown option: $1"; usage; exit 1 ;; - esac - done - - echo "→ Cleaning up session: $branch" - - # Step 1: Close agent-deck session (if exists) - local session_id - session_id="$(agent-deck list --json 2>/dev/null | jq -r ".[] | select(.branch == \"$branch\") | .id" 2>/dev/null || true)" - - if [[ -n "$session_id" ]]; then - echo "→ Closing agent-deck session: $session_id" - agent-deck remove "$session_id" 2>/dev/null || true - else - echo " No active agent-deck session found for $branch" - fi - - # Step 2: Handle worktree - if [[ "$no_merge" == "true" ]]; then - echo "→ Removing worktree (no merge)..." - if [[ "$force" == "true" ]]; then - wt drop "$branch" --force 2>/dev/null || git worktree remove "$branch" --force - else - wt drop "$branch" 2>/dev/null || git worktree remove "$branch" - fi - else - echo "→ Parking worktree and removing..." - # wt park stashes changes and returns to main - wt park 2>/dev/null || true - wt drop "$branch" 2>/dev/null || git worktree remove "$branch" 2>/dev/null || true - fi - - # Step 3: Clean up local branch (if worktree is gone) - if ! git worktree list | grep -q "$branch"; then - echo "→ Cleaning up local branch..." - git branch -d "$branch" 2>/dev/null || git branch -D "$branch" 2>/dev/null || true - fi - - # Step 4: Clean up remote branch (unless --keep-branch) - if [[ "$keep_branch" == "false" && "$no_merge" == "false" ]]; then - echo "→ Note: Remote branch will be cleaned up when PR is merged." - echo " To delete now: git push origin --delete $branch" - fi - - echo "" - echo "✓ Session cleaned up: $branch" -} - -main "$@" -```` - -### Step 3: Register in `bin/devflow` - -```bash -# In bin/devflow case statement: -done) - shift - source "$DEVFLOW_DIR/lib/done.sh" - main "$@" - ;; -``` - -Update help text: - -``` -Commands: - init One-time project setup - work Start a new work session - done Clean up a completed work session -``` - -### Step 4: Register the skill - -Update `skills/registry.json` (or wherever skills are registered) to include the done skill: - -```json -{ - "process-discipline/done": { - "name": "done", - "description": "Complete a task: verify, commit, push, create PR, retain learnings", - "trigger": "task completion", - "type": "rigid" - } -} -``` - -## Acceptance Criteria - -- [ ] `skills/process-discipline/done.md` exists and follows the rigid skill format -- [ ] The skill runs verification (tests, lint, build, devflow check) before committing -- [ ] The skill commits, pushes, and creates a PR -- [ ] The skill retains learnings to Hindsight -- [ ] The skill outputs a summary with the `devflow done` command -- [ ] `devflow done feat/MES-1234` closes the agent-deck session -- [ ] `devflow done feat/MES-1234` removes the worktree -- [ ] `devflow done feat/MES-1234` cleans up the local branch -- [ ] `devflow done feat/MES-1234 --no-merge` removes worktree without merging -- [ ] `devflow done feat/MES-1234 --keep-branch` preserves the remote branch -- [ ] `devflow done` with no arguments shows help -- [ ] `bin/devflow --help` lists the `done` command -- [ ] The skill is registered and discoverable by agent-deck - -## Technical Notes - -- The skill is type `rigid` — agents must follow it exactly, no shortcuts -- The skill should detect the project's test/lint/build commands from `package.json`, `Makefile`, `pyproject.toml`, etc. A hardcoded command won't work across projects. -- `devflow check` may not be available in all projects — the skill should check availability before running -- The `devflow done` CLI command should be safe to run even if the agent-side cleanup already happened (idempotent) -- agent-deck's `remove` command may have different syntax — check `agent-deck help remove` -- `wt drop` removes the worktree AND deletes the branch. `git worktree remove` only removes the worktree. Be careful about which to use. -- The skill's PR creation should use the VCS detection from ARCH-P1-008 once that's implemented - -## Verification - -```bash -# 1. Create a test work session -devflow work feat/test-done-001 -# ... make some changes in the worktree ... - -# 2. In the agent session, trigger the done skill -# (say "task is done" or equivalent) -# Expected: agent runs verification, commits, pushes, creates PR - -# 3. Run CLI cleanup -devflow done feat/test-done-001 -# Expected: session closed, worktree removed, branch cleaned up - -# 4. Verify cleanup -agent-deck list | grep -v "test-done-001" # should not appear -git worktree list | grep -v "test-done-001" # should not appear -git branch | grep -v "test-done-001" # should not appear - -# 5. Test --no-merge -devflow work feat/test-done-002 -devflow done feat/test-done-002 --no-merge -# Expected: worktree removed, no merge attempted - -# 6. Test idempotency -devflow done feat/test-done-001 # already cleaned up -# Expected: no errors, graceful no-op -``` diff --git a/tasks/P1/ARCH-skills-registry-global-sources.md b/tasks/P1/ARCH-skills-registry-global-sources.md deleted file mode 100644 index 6f28d0f..0000000 --- a/tasks/P1/ARCH-skills-registry-global-sources.md +++ /dev/null @@ -1,192 +0,0 @@ ---- -id: ARCH-skills-registry-global-sources -title: "Skills Registry to Global Sources + MCP Pool" -priority: P1 -category: architecture -status: open -depends_on: - - ARCH-update-actual-configs -estimated_effort: M -files_to_touch: - - ~/.agent-deck/config.toml - - ~/.agent-deck/skills/sources.toml - - lib/init.sh - - devflow-plugin/skills/** ---- - -# Skills Registry to Global Sources + MCP Pool - -## Context - -Agent-deck has a dedicated skills registry system that discovers skills from "global sources" — not from `config.toml` and not from per-project `.claude/skills/` directories. Skills are discovered from registered source directories and can be attached to projects via managed manifests. The current devflow implementation may be placing skills in the wrong locations, making them invisible to agent-deck's registry. - -Agent-deck's skill discovery model: - -- **Global source registry**: `~/.agent-deck/skills/sources.toml` — lists directories where agent-deck looks for skills -- **Default sources**: `pool` → `~/.agent-deck/skills/pool`, `claude-global` → `~/.claude/skills` -- **Project attachment**: `<project>/.agent-deck/skills.toml` — a managed manifest that references global skills - -Skills should NEVER be copied into per-project directories. They should live in global registries and be referenced by projects. - -## Problem Statement - -1. Devflow skills may not be registered as a global source in agent-deck -2. Skills might be getting copied to project `.claude/skills/` directories instead of being globally available -3. The MCP pool (which enables shared MCP connections across sessions) may not be properly configured -4. `lib/init.sh` may be copying skills to project directories instead of registering global sources - -## Desired Outcome - -- Devflow skills are registered as a global source in agent-deck's skill registry -- Agent-deck discovers devflow skills automatically for all sessions -- Skills are NEVER copied to per-project directories — only global registries -- MCP pool is enabled and configured for connection sharing across sessions -- `devflow init` registers the skill source (idempotently), never copies skills - -## Implementation Guide - -### Step 1: Verify agent-deck skill registry structure - -Check what currently exists: - -```bash -# Check if sources.toml exists -cat ~/.agent-deck/skills/sources.toml 2>/dev/null || echo "Not found" - -# Check the pool directory -ls ~/.agent-deck/skills/pool/ 2>/dev/null || echo "Not found" - -# Check registered sources -agent-deck skill source list 2>/dev/null || echo "Command not available" -``` - -### Step 2: Register devflow skills as a global source - -The devflow plugin's skills directory should be registered as a source: - -```bash -# Register the devflow skills source -agent-deck skill source add devflow /Users/andrejorgelopes/dev/devflow/devflow-plugin/skills -``` - -If the CLI command doesn't exist, manually create/update `~/.agent-deck/skills/sources.toml`: - -```toml -[sources.devflow] -path = "/Users/andrejorgelopes/dev/devflow/devflow-plugin/skills" -type = "directory" -auto_discover = true -``` - -### Step 3: Symlink to the pool directory (belt and suspenders) - -As a fallback for discovery, also symlink individual skills to the pool: - -```bash -mkdir -p ~/.agent-deck/skills/pool -for skill_dir in /Users/andrejorgelopes/dev/devflow/devflow-plugin/skills/*/; do - skill_name=$(basename "$skill_dir") - ln -sf "$skill_dir" ~/.agent-deck/skills/pool/"$skill_name" -done -``` - -### Step 4: Remove any per-project skill copies - -Search for and remove any devflow skills that were copied to project directories: - -```bash -# Find any project-level skill copies -find ~/dev -path "*/.claude/skills/*" -name "*.md" | head -20 - -# Remove devflow-managed skills from project directories -# (Be careful — only remove devflow skills, not project-specific ones) -``` - -### Step 5: Update `lib/init.sh` - -Find the skill installation code in `lib/init.sh` and replace it: - -**Before** (copying skills): - -```bash -# DON'T DO THIS -cp -r skills/ "$PROJECT_DIR/.claude/skills/" -``` - -**After** (registering global source): - -```bash -# Register devflow skills as global source (idempotent) -if ! agent-deck skill source list 2>/dev/null | grep -q "devflow"; then - agent-deck skill source add devflow "$DEVFLOW_DIR/devflow-plugin/skills" -fi - -# Attach skills to project if needed -# agent-deck skill attach <project> <skill-name> -``` - -### Step 6: Enable MCP Pool - -Ensure `~/.agent-deck/config.toml` has the MCP pool configuration: - -```toml -[mcp_pool] -enabled = true -auto_start = true -pool_all = true -exclude_mcps = [] -fallback_to_stdio = true -show_pool_status = true -``` - -This enables: - -- **Connection pooling**: MCP servers (like Hindsight) are shared across sessions instead of each session starting its own -- **Auto-start**: Pool starts when agent-deck starts -- **Pool all**: All configured MCPs are pooled by default -- **Fallback**: If pooling fails, sessions fall back to direct stdio connections - -## Acceptance Criteria - -- [ ] `agent-deck skill source list` (or equivalent) shows "devflow" as a registered source -- [ ] `~/.agent-deck/skills/sources.toml` exists and contains the devflow source entry -- [ ] Skills in `devflow-plugin/skills/` are discoverable by agent-deck without any per-project copies -- [ ] `find ~/dev -path "*/.claude/skills/devflow*"` returns NO results (no per-project copies) -- [ ] `lib/init.sh` registers the global source, does NOT copy skills to project directories -- [ ] `[mcp_pool]` section in `~/.agent-deck/config.toml` has `enabled = true` and `pool_all = true` -- [ ] Running `agent-deck skill list` shows devflow skills available -- [ ] `devflow init` can be run multiple times without creating duplicate source registrations (idempotent) - -## Technical Notes - -- Agent-deck's skill source system may vary by version. Check `agent-deck --version` and `agent-deck help skill` for exact syntax. -- The `sources.toml` file format may differ from what's documented above — inspect any existing file first and match its format. -- MCP pool requires agent-deck to be running as a daemon/service. Verify with `agent-deck status` or `agent-deck pool status`. -- Symlinks in the pool directory should point to DIRECTORIES (one per skill), not individual `.md` files. -- The `auto_discover = true` flag means agent-deck will watch the source directory for new skills — no manual refresh needed. -- If skills have a `SKILL.md` or `skill.toml` manifest file, ensure the devflow skills follow that convention. - -## Verification - -```bash -# 1. Verify source registration -agent-deck skill source list -# Expected: "devflow" source listed pointing to devflow-plugin/skills - -# 2. Verify skill discovery -agent-deck skill list | grep -i "devflow\|worktree\|process" -# Expected: devflow skills visible - -# 3. Verify no per-project copies -find ~/dev -path "*/.claude/skills/*" -name "*.md" -exec grep -l "devflow" {} \; -# Expected: no results - -# 4. Verify MCP pool -agent-deck pool status 2>/dev/null || agent-deck status -# Expected: pool running, MCPs listed - -# 5. Verify idempotency -devflow init && devflow init # run twice -agent-deck skill source list | grep -c "devflow" -# Expected: exactly 1 -``` diff --git a/tasks/P1/ARCH-stop-hook-finish-feature-removal.md b/tasks/P1/ARCH-stop-hook-finish-feature-removal.md index 601a446..3a855c8 100644 --- a/tasks/P1/ARCH-stop-hook-finish-feature-removal.md +++ b/tasks/P1/ARCH-stop-hook-finish-feature-removal.md @@ -20,7 +20,7 @@ The stop hook (`stop-finish-prompt.sh`) is fundamentally flawed as a mechanism f prompting finish-feature because: 1. **Fires on ALL agent stops** — including subagents spawned by `devflow review`, - refactor flows, agent-deck sessions, etc. + refactor flows, phase-handoff spawned sessions, etc. 2. **Shows "Stop hook error:" in UI** — hardcoded by Claude Code, cannot be suppressed. 3. **PR/MR detection is fragile** — fails when the reviewed MR is for a different branch than the current one, or when `gh`/`glab` CLI has auth issues. diff --git a/tasks/P1/ARCH-testing-foundation.md b/tasks/P1/ARCH-testing-foundation.md index 0761020..68b20da 100644 --- a/tasks/P1/ARCH-testing-foundation.md +++ b/tasks/P1/ARCH-testing-foundation.md @@ -120,7 +120,7 @@ bats tests/unit/utils.bats ## Workflow Integration -This is the entry point for the entire testing effort. An agent session starts here with `/devflow:new-feature` → brainstorming → writing-plans → executing-plans. +This is the entry point for the entire testing effort. An agent session starts here with `/devflow:new-feature` → `/devflow:brainstorming` → `/devflow:spec-feature` → `/devflow:writing-plans` → `/devflow:lock-tests` → `/devflow:executing-plans` → `/devflow:finish-feature`. Once this phase lands, every subsequent testing task can run `make test-unit` in its verification step. The helpers created here (`common.bash`, `mocks.bash`, `assertions.bash`) are shared infrastructure used by all test files in Phases 2–4. diff --git a/tasks/P1/ARCH-testing-infrastructure.md b/tasks/P1/ARCH-testing-infrastructure.md index 0f09f19..96af532 100644 --- a/tasks/P1/ARCH-testing-infrastructure.md +++ b/tasks/P1/ARCH-testing-infrastructure.md @@ -24,7 +24,7 @@ Devflow is a Bash CLI orchestrating 6 layers of AI dev tooling across 8 library - **No end-to-end tests** for multi-step workflows (init → check → status pipeline) - **No CI pipeline** (no GitHub Actions, no automated testing on push/PR) - **No test framework** installed (no bats-core, no shunit2) -- **No test fixtures** or mock infrastructure for external dependencies (Docker, claude, opencode, agent-deck, wt, Hindsight API) +- **No test fixtures** or mock infrastructure for external dependencies (Docker, claude, opencode, wt, gh, glab, Hindsight API) Every task file has a `## Verification` section with manual bash commands, but none are automated. As the codebase grows (28 tasks pending), we need confidence that changes don't break existing functionality — especially after large refactors like the recent Continue.dev → Code Review migration that touched 22+ files. @@ -44,7 +44,7 @@ Every task file has a `## Verification` section with manual bash commands, but n 4. **E2E workflow tests**: Test multi-step flows (init → check → status) in isolated environments. 5. **CI pipeline**: GitHub Actions workflow that runs tests on every push and PR. 6. **Test observability**: CI badges in README, test result summaries, failure notifications. -7. **Mock infrastructure**: Helper functions to mock external CLIs (docker, claude, opencode, wt, agent-deck) for deterministic tests. +7. **Mock infrastructure**: Helper functions to mock external CLIs (docker, claude, opencode, wt, gh, glab) for deterministic tests. ## Architecture @@ -107,7 +107,7 @@ Mock targets: - `opencode` — mock `run` output (text for check fallback tests) - `docker` — mock `info`, `compose` for service tests - `wt` — mock worktree operations -- `agent-deck` — mock session/conductor/group operations +- `gh` / `glab` — mock PR/MR creation + comment fetching - `git` — selective mocking for diff/log output (or use real repos in fixtures) - `brew` — mock install operations - `uvx` — mock Hindsight operations diff --git a/tasks/P1/ARCH-update-actual-configs.md b/tasks/P1/ARCH-update-actual-configs.md deleted file mode 100644 index 46de879..0000000 --- a/tasks/P1/ARCH-update-actual-configs.md +++ /dev/null @@ -1,166 +0,0 @@ ---- -id: ARCH-update-actual-configs -title: "Update Actual Configs (Not Just Templates)" -priority: P1 -category: architecture -status: open -depends_on: [] -estimated_effort: M -files_to_touch: - - ~/.agent-deck/config.toml - - ~/.claude/CLAUDE.md - - templates/config.toml.tmpl - - templates/CLAUDE.md.tmpl ---- - -# Update Actual Configs (Not Just Templates) - -## Context - -The previous implementation round focused on updating template files (`templates/config.toml.tmpl`, `templates/CLAUDE.md.tmpl`) but never applied those changes to the ACTUAL running configuration files on this machine. Templates are useless if the live configs don't match — agent-deck reads `~/.agent-deck/config.toml`, not the template, and Claude Code reads `~/.claude/CLAUDE.md`, not the template. - -This machine is the primary development environment. The configs need to be correct NOW, not just for future `devflow init` runs. - -## Problem Statement - -1. **`~/.agent-deck/config.toml`** (the live config) may have incorrect syntax or missing sections compared to the updated template. Known issues from the template rewrite include: correct `[tools.*]` format, `[mcps.*]` sections, `[docker]` with `mount_ssh = true`, `[worktree]` section, `[claude]` section, `[mcp_pool]` with `enabled = true` and `pool_all = true`. - -2. **`~/.claude/CLAUDE.md`** (the live user-scoped CLAUDE.md) may be missing the "Starting Feature Work" section and other updates that were added to the template. - -3. There's no verification that the actual configs match the templates — drift is invisible. - -## Desired Outcome - -- `~/.agent-deck/config.toml` is updated with all fixes from the template rewrite -- `~/.claude/CLAUDE.md` is updated with all additions from the template -- A diff confirms the actual configs match the templates (with expected project-specific differences) -- Future `devflow init` runs won't overwrite manual customizations (templates should be additive, not destructive) - -## Implementation Guide - -### Step 1: Read the current actual configs - -Read both files to understand their current state: - -```bash -cat ~/.agent-deck/config.toml -cat ~/.claude/CLAUDE.md -``` - -### Step 2: Read the templates - -Read the updated templates to understand what changes need to be applied: - -```bash -cat templates/config.toml.tmpl -cat templates/CLAUDE.md.tmpl -``` - -### Step 3: Diff and identify gaps - -For each config, identify: - -- Sections present in template but missing in actual config -- Sections present in actual config but with incorrect syntax -- Sections present in actual config that should NOT be overwritten (custom project-specific settings) - -### Step 4: Update `~/.agent-deck/config.toml` - -Apply changes carefully. The following sections MUST be correct: - -```toml -[tools.claude] -command = "claude" -args = [] - -[tools.opencode] -command = "opencode" -args = [] - -[mcps.hindsight] -command = "hindsight" -transport = "stdio" - -[docker] -enabled = false -mount_ssh = true - -[worktree] -auto_detect = true -isolation = true - -[claude] -model = "claude-sonnet-4-20250514" - -[mcp_pool] -enabled = true -auto_start = true -pool_all = true -exclude_mcps = [] -fallback_to_stdio = true -show_pool_status = true -``` - -Preserve any existing sections that are correct and not covered by the template. - -### Step 5: Update `~/.claude/CLAUDE.md` - -Add the "Starting Feature Work" section if missing. Ensure the devflow instructions block is present and up to date. Do NOT overwrite non-devflow content in the file — CLAUDE.md may contain other project instructions. - -Look for markers like `<!-- devflow -->` and `<!-- /devflow -->` to identify the devflow-managed section. Only update content within those markers. - -### Step 6: Verify consistency - -```bash -# For config.toml — diff template placeholders vs actual values -diff <(sed 's/{{[^}]*}}/PLACEHOLDER/g' templates/config.toml.tmpl) \ - <(cat ~/.agent-deck/config.toml) - -# For CLAUDE.md — check devflow section matches -grep -A 100 "<!-- devflow -->" ~/.claude/CLAUDE.md -grep -A 100 "<!-- devflow -->" templates/CLAUDE.md.tmpl -``` - -## Acceptance Criteria - -- [ ] `~/.agent-deck/config.toml` contains all required sections: `[tools.*]`, `[mcps.*]`, `[docker]`, `[worktree]`, `[claude]`, `[mcp_pool]` -- [ ] `[mcp_pool]` has `enabled = true` and `pool_all = true` -- [ ] `[docker]` has `mount_ssh = true` -- [ ] `[mcps.hindsight]` is configured with correct command and transport -- [ ] `~/.claude/CLAUDE.md` contains the "Starting Feature Work" section within the devflow markers -- [ ] agent-deck can parse the config without errors: `agent-deck config validate` (or `agent-deck list` doesn't error) -- [ ] A diff between templates and actual configs shows only expected differences (project-specific values vs placeholders) -- [ ] No content outside the `<!-- devflow -->` markers in CLAUDE.md was modified - -## Technical Notes - -- **BACKUP FIRST**: Before modifying either config, create backups: - ```bash - cp ~/.agent-deck/config.toml ~/.agent-deck/config.toml.bak.$(date +%s) - cp ~/.claude/CLAUDE.md ~/.claude/CLAUDE.md.bak.$(date +%s) - ``` -- The `config.toml` template uses `{{placeholder}}` syntax for variable substitution. The actual config will have real values instead. -- `~/.claude/CLAUDE.md` is read by Claude Code on every session start. Syntax errors or corruption will affect ALL Claude Code sessions. -- agent-deck may cache its config — after updating, restart any running agent-deck processes or run `agent-deck config reload` if available. -- The TOML spec requires that `[section.subsection]` syntax is used for nested tables (e.g., `[tools.claude]`), NOT `[tools] claude = ...`. Verify the actual config uses correct TOML syntax. - -## Verification - -```bash -# 1. Validate agent-deck config loads without errors -agent-deck list 2>&1 | grep -i "error\|invalid\|parse" -# Expected: no errors - -# 2. Verify MCP pool is enabled -grep -A 3 "mcp_pool" ~/.agent-deck/config.toml -# Expected: enabled = true, pool_all = true - -# 3. Verify CLAUDE.md has devflow section -grep "Starting Feature Work\|devflow" ~/.claude/CLAUDE.md -# Expected: section found - -# 4. Verify backups exist -ls ~/.agent-deck/config.toml.bak.* -ls ~/.claude/CLAUDE.md.bak.* -# Expected: backup files present -``` diff --git a/tasks/P3/SPIKE-dynamic-mcp-selection.md b/tasks/P3/SPIKE-dynamic-mcp-selection.md deleted file mode 100644 index ec78a56..0000000 --- a/tasks/P3/SPIKE-dynamic-mcp-selection.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -id: SPIKE-dynamic-mcp-selection -title: "Dynamic MCP Selection and Lazy-Loading" -priority: P3 -category: spikes -status: open -depends_on: [] -estimated_effort: L -files_to_touch: [] ---- - -# Dynamic MCP Selection and Lazy-Loading - -## Context - -Currently all MCP servers (Hindsight, etc.) are loaded at session start, consuming context tokens even when idle. Every MCP server registers its tool descriptions into the system prompt, which means unused MCPs still cost tokens on every turn. Research whether we can dynamically add/remove MCPs mid-task to reduce this overhead. - -## Research Questions - -1. Can you add an MCP server to a running Claude Code session via `agent-deck mcp` commands? -2. Does Claude Code support dynamic MCP registration mid-conversation? -3. Can agent-deck's MCP pool (`[mcp_pool]`) help here — if MCPs are pooled, is their tool description still loaded into context? -4. Could we create an "MCP broker" MCP that has one tool (`get_mcp`) which dynamically connects to other MCPs on demand? -5. Is there a way to reduce the context cost of idle MCP tools? (e.g., hiding tool descriptions until needed) -6. Could the agent request new MCPs from the internet (e.g., from MCP registries like mcp.so) with user permission? - -## Investigation Steps - -1. Read agent-deck MCP pool docs thoroughly — understand how `[mcp_pool]` works and whether pooled MCPs inject tool descriptions at startup or on demand. -2. Test: start a Claude Code session, then try `agent-deck mcp add` from another terminal — does the running session pick it up? -3. Check if Claude Code has a `/mcp` command or similar for mid-session MCP management. -4. Research MCP registries (mcp.so, Smithery, Glama) for auto-discovery protocols and whether they support dynamic connection. -5. Estimate context token cost per MCP server by counting tool descriptions in the system prompt for each registered MCP. -6. Prototype an "MCP broker" concept — a single MCP with one `get_mcp` tool that dynamically spawns and connects to other MCP servers on demand. -7. Test whether removing an MCP mid-session causes errors or graceful degradation. - -## Expected Deliverables - -- **Feasibility report**: Can we lazy-load MCPs mid-task? Document what works and what doesn't. -- **If yes**: Proposed architecture for MCP broker/lazy-loading, including sequence diagrams. -- **If no**: Alternative approaches to reduce idle MCP context cost (e.g., MCP rotation, session profiles). -- **Token cost estimates**: Per-MCP server token cost breakdown (tool descriptions, system prompt overhead). -- **Prototype**: If feasible, a minimal MCP broker that can connect to one other MCP on demand. - -## Decision Criteria - -- **Feasible** if we can add/remove MCPs without restarting the session AND the agent can invoke the newly added tools. -- **Partially feasible** if we can reduce context cost through pooling or description hiding, even if full dynamic loading isn't possible. -- **Not feasible** if MCP registration is strictly a session-start operation with no workaround. -- Token savings must be >20% to justify the added complexity. - -## Technical Notes - -- Claude Code's MCP integration is based on the Model Context Protocol spec — check the spec for dynamic server registration capabilities. -- agent-deck may have its own layer of MCP management that could be leveraged independently of Claude Code's native support. -- Consider the security implications of dynamically connecting to MCPs from registries — user permission flow is critical. -- The "MCP broker" pattern is similar to a service mesh sidecar — research prior art in that space. diff --git a/tasks/P3/SPIKE-hooks-improvement-opportunities.md b/tasks/P3/SPIKE-hooks-improvement-opportunities.md index 6fa9249..9f65025 100644 --- a/tasks/P3/SPIKE-hooks-improvement-opportunities.md +++ b/tasks/P3/SPIKE-hooks-improvement-opportunities.md @@ -35,7 +35,7 @@ Claude Code hooks (PreToolUse, PostToolUse, Stop, SessionStart, Notification, et 6. **Skill auto-invocation**: Can hooks detect patterns (e.g., "starting a new feature", "creating a PR") and auto-invoke the relevant skill if the agent hasn't already? - **Partially addressed:** Stop hook suggests `/devflow:finish-feature` when on feature branch with commits -7. **Agent-deck integration**: The global hooks already use `agent-deck hook-handler` — how can devflow plugin hooks complement these without conflicts? +7. **Plugin hook coordination**: How do devflow plugin hooks complement Claude Code's native hook system (`UserPromptSubmit`, `PostToolUse`, `Stop`) without conflicts or duplicate firing? 8. **Task completion automation**: When `devflow:finish-feature` or `devflow:create-pr` completes successfully, could a hook auto-invoke `devflow:task-complete`? - **Partially addressed:** Stop hook chains to finish-feature which includes task completion in its flow diff --git a/tasks/P3/SPIKE-kanban-board-integration.md b/tasks/P3/SPIKE-kanban-board-integration.md index 04a7b66..668b284 100644 --- a/tasks/P3/SPIKE-kanban-board-integration.md +++ b/tasks/P3/SPIKE-kanban-board-integration.md @@ -22,7 +22,7 @@ Research integrating a Kanban board that connects to Linear and Jira, shows open 3. Is the Linear CLI feature-complete enough to replace a Kanban UI, or is it query-only? 4. Can jira-cli trigger custom commands (like `devflow work`) on status transitions? 5. What is the latency of bidirectional sync for each approach? (Real-time vs polling) -6. Can agent-deck conductor notifications be triggered from Kanban column transitions? +6. Can OS-level notifications (terminal-notifier on macOS, notify-send on Linux) be triggered from Kanban column transitions? ## Investigation Steps @@ -54,7 +54,7 @@ Research integrating a Kanban board that connects to Linear and Jira, shows open 5. Test vendor-agnostic abstraction — can we create a common interface that works with both Linear and Jira? 6. Prototype: select a task from a board → extract ticket ID → run `devflow work <ticket-id>` → auto-create branch. -7. Test notification integration with agent-deck conductor. +7. Test OS-level notification integration (terminal-notifier / notify-send) for column transitions. 8. Evaluate mobile notification options (push notifications when agent needs attention). ## Expected Deliverables @@ -86,6 +86,6 @@ Research integrating a Kanban board that connects to Linear and Jira, shows open - Consider a layered architecture: abstract task source (Linear, Jira, local YAML) → common task model → Kanban renderer → devflow integration. - Bidirectional sync is notoriously hard — research conflict resolution strategies. Last-write-wins may be acceptable for status fields but dangerous for descriptions. -- agent-deck conductor notifications could use OS-level notifications (terminal-notifier on macOS) or a webhook to a mobile push service. +- Notification options for agent-needs-attention alerts: OS-level (terminal-notifier on macOS, notify-send on Linux) or a webhook to a mobile push service. - The local task board (`tasks/` directory with YAML frontmatter) is already a data source — it should be a first-class citizen alongside Linear/Jira. - This spike has a dependency relationship with SPIKE-P3-004 (Task Management Export Format) — coordinate findings. diff --git a/tasks/P3/SPIKE-telemetry-observability.md b/tasks/P3/SPIKE-telemetry-observability.md index c9d140a..3576173 100644 --- a/tasks/P3/SPIKE-telemetry-observability.md +++ b/tasks/P3/SPIKE-telemetry-observability.md @@ -20,10 +20,9 @@ How can we verify that the right skills, tools, plugins, and MCPs are being call 1. Can Langfuse traces tell us which skills were invoked per session? 2. Does Claude Code log skill/plugin invocations anywhere (logs, telemetry, debug output)? 3. Can we add instrumentation to our hooks that log to Langfuse when skills fire? -4. Can agent-deck's logging (`[logs]` section) capture skill invocations? -5. Is there a way to set up alerts when expected skills DON'T fire? (e.g., "brainstorming should fire before any implementation but didn't") -6. Can we distinguish between "skill was loaded" and "skill instructions were followed"? -7. What is the performance overhead of adding telemetry to every skill/tool invocation? +4. Is there a way to set up alerts when expected skills DON'T fire? (e.g., "/devflow:brainstorming should fire before any implementation but didn't") +5. Can we distinguish between "skill was loaded" and "skill instructions were followed"? +6. What is the performance overhead of adding telemetry to every skill/tool invocation? ## Investigation Steps @@ -33,13 +32,7 @@ How can we verify that the right skills, tools, plugins, and MCPs are being call - Can we add custom metadata/tags to traces from hooks? - Check if Langfuse has a "spans" or "events" concept that could represent skill invocations. -2. **Check agent-deck session logs** - - What is logged per session in the `[logs]` section? - - Are MCP tool calls logged with timestamps and arguments? - - Is there a structured log format we can parse? - - Can we add custom log entries from hooks? - -3. **Check Claude Code native telemetry** +2. **Check Claude Code native telemetry** - Does Claude Code have debug/verbose logging that captures tool calls? - Is there a `--debug` or `--verbose` flag that exposes internal state? - Check if the Claude Code plugin API exposes invocation events. @@ -51,7 +44,7 @@ How can we verify that the right skills, tools, plugins, and MCPs are being call 5. **Research negative alerting (missing invocations)** - Can Langfuse alert on the absence of an expected event? - - Could we define "skill policies" (e.g., "brainstorming must fire before implementation") and check compliance post-session? + - Could we define "skill policies" (e.g., "/devflow:brainstorming must fire before /devflow:executing-plans") and check compliance post-session? - Research: is post-session analysis (batch) more practical than real-time alerting? 6. **Research existing observability tools for LLM agents** @@ -62,7 +55,6 @@ How can we verify that the right skills, tools, plugins, and MCPs are being call - **Report on current telemetry data availability**: - What data Langfuse captures today (with examples). - - What data agent-deck logs capture today (with examples). - What data Claude Code natively exposes (with examples). - **Gap analysis**: What telemetry data we NEED but DON'T HAVE, specifically: @@ -76,7 +68,7 @@ How can we verify that the right skills, tools, plugins, and MCPs are being call - Estimated effort per component. - **Proposed alert system for missing expected invocations**: - - Define "skill policies" format (e.g., YAML rules like `before: [implementation] require: [brainstorming]`). + - Define "skill policies" format (e.g., YAML rules like `before: [/devflow:executing-plans] require: [/devflow:brainstorming]`). - Compliance checker: post-session batch job or real-time monitor? - Alert channels: terminal notification, Slack, email, dashboard. @@ -96,5 +88,5 @@ How can we verify that the right skills, tools, plugins, and MCPs are being call - Consider using OpenTelemetry as the instrumentation standard — it would make the solution vendor-agnostic (swap Langfuse for any OTEL-compatible backend). - The "skill was loaded" vs "skill was followed" distinction is fundamental. Loading can be tracked mechanically; following requires either LLM self-reporting or output analysis. - For negative alerting, consider a state machine approach: define expected skill sequences as state machines, feed observed invocations through them, alert on invalid transitions or missing states. -- agent-deck hooks (`pre_session`, `post_session`, `pre_prompt`) are natural instrumentation points. +- Claude Code's `UserPromptSubmit`, `PostToolUse`, and `Stop` hooks (registered via `~/.claude/settings.json`) are natural instrumentation points. - Consider a lightweight local SQLite database for session telemetry that can be optionally synced to Langfuse — this enables offline analysis and reduces external API dependency. diff --git a/tasks/P4/POLISH-docker-sandbox-adr.md b/tasks/P4/POLISH-docker-sandbox-adr.md deleted file mode 100644 index e26a47e..0000000 --- a/tasks/P4/POLISH-docker-sandbox-adr.md +++ /dev/null @@ -1,213 +0,0 @@ ---- -id: POLISH-docker-sandbox-adr -title: "Document Docker Disabled by Default Decision (ADR)" -priority: P4 -category: polish -status: open -depends_on: [] -estimated_effort: S -files_to_touch: - - /Users/andrejorgelopes/dev/devflow/docs/decisions/001-docker-sandbox-disabled.md ---- - -# Document Docker Disabled by Default Decision (ADR) - -## Context - -Agent Deck supports running AI coding agents inside Docker sandboxed containers for security isolation. During devflow setup, the decision was made to keep Docker sandbox **disabled by default** (`default_enabled = false` in `config.toml`). This was a deliberate architectural choice based on performance, complexity, and threat model analysis — but it's not documented anywhere. If someone reviews the config and sees sandboxing disabled, they might think it was an oversight. - -Architecture Decision Records (ADRs) exist to capture the "why" behind non-obvious decisions so they survive beyond the original author's memory. - -## Problem Statement - -The reasoning behind disabling Docker sandbox by default in agent-deck is only in the developer's head (and possibly in Hindsight memory). It needs to be written down as a formal ADR so that: - -1. Future-self can recall the rationale without re-researching -2. Anyone contributing to devflow understands the tradeoff -3. The decision can be revisited with context when circumstances change (e.g., Docker on macOS gets faster, or threat model changes) - -## Desired Outcome - -A clean ADR document at `~/dev/devflow/docs/decisions/001-docker-sandbox-disabled.md` that follows standard ADR format and captures all 5 reasoning points. - -## Implementation Guide - -### Step 1: Create the directory structure - -```bash -mkdir -p ~/dev/devflow/docs/decisions -``` - -### Step 2: Write the ADR - -Create `~/dev/devflow/docs/decisions/001-docker-sandbox-disabled.md` with the following content: - -````markdown -# ADR-001: Docker Sandbox Disabled by Default - -**Status:** Accepted -**Date:** 2025-06-XX (replace with actual date of decision) -**Decision Makers:** Andre Jorge Lopes - -## Context - -Agent Deck (the TUI session wrapper in devflow's Layer 2) supports running AI coding agents inside Docker sandboxed containers. This provides security isolation — agents can't access the host filesystem, network, or credentials beyond what's explicitly mounted. - -The question: should Docker sandboxing be **enabled** or **disabled** by default in devflow's agent-deck configuration? - -## Decision - -Docker sandbox is **disabled by default** (`default_enabled = false`). SSH key mounting is enabled (`mount_ssh = true`) for when users opt in. Users can enable sandboxing per-session via the `--sandbox` flag for untrusted or experimental tasks. - -Relevant config in `~/.agent-deck/config.toml`: - -```toml -[sandbox] -default_enabled = false -mount_ssh = true -``` -```` - -## Rationale - -### 1. Performance Overhead on macOS - -Docker on macOS does not run natively — it runs inside a Linux VM (via Colima, OrbStack, or Docker Desktop). Every file operation and command execution from a sandboxed agent container crosses the VM boundary, adding measurable latency. For a coding agent that performs hundreds of file reads/writes per session, this compounds into significant slowdown. - -On Linux hosts, this concern is reduced since Docker runs natively. This decision may be revisited if devflow expands to Linux-primary users. - -### 2. Nested Docker Complexity - -The primary target project (messaging) uses Docker Compose extensively for integration tests — MySQL (Aurora), DynamoDB local, and other services run in containers. A sandboxed agent container would need to interact with these host-level Docker services, requiring either: - -- Docker-in-Docker (DinD) — complex, fragile, performance penalty -- Docker socket mounting (`/var/run/docker.sock`) — negates most sandboxing benefits -- Complex network bridging between sandbox and test containers - -None of these options are simple or reliable. They add failure modes without proportional security benefit. - -### 3. Memory Pressure - -Docker Desktop/Colima allocates a fixed memory budget to the Linux VM. Running multiple sandboxed agent sessions (Agent Deck supports concurrent sessions) alongside test database containers, Hindsight daemon, and Langfuse (also Docker) would strain this budget. Memory pressure leads to OOM kills, swap thrashing, and degraded agent performance. - -Typical memory landscape: - -- Colima VM: 4-8 GB allocated -- Langfuse (Postgres + web): ~500 MB -- MySQL test container: ~300 MB -- DynamoDB local: ~200 MB -- Each sandboxed agent session: ~500 MB-1 GB -- Leaves little headroom for concurrent sessions - -### 4. SSH and Git Credential Complexity - -Sandboxed containers need SSH key access for git operations (clone, push, pull). While `mount_ssh = true` mounts the host's `~/.ssh` into the container, this is an additional integration point that can fail due to: - -- SSH agent socket forwarding issues -- Key permission mismatches between host and container user -- GPG signing not available inside container -- Credential helpers (git-credential-manager) not available - -Each of these requires debugging when it breaks, adding friction to the development workflow. - -### 5. Low Threat Model in Target Environment - -In the primary use case — a professional developer working on known, trusted repositories with vetted AI agents (Claude Code, OpenCode) — the threat model is low: - -- The agent operates on code the developer controls -- The agent's actions are reviewed before commit/push -- The agent doesn't execute untrusted third-party code -- The host machine is a personal development machine, not a production server - -The security benefit of sandboxing is proportional to the trust level of the agent and codebase. In this high-trust environment, the overhead cost exceeds the security value. - -## Consequences - -### Positive - -- Zero Docker overhead for agent sessions — native filesystem performance -- No nested Docker complexity — test containers work without special configuration -- Lower memory footprint — more headroom for concurrent sessions and services -- Simpler debugging — fewer layers between agent and filesystem -- Faster cold starts — no container image pull/build required - -### Negative - -- Agents have full access to the host filesystem and network -- A compromised or misbehaving agent could read/modify files outside the project -- SSH keys, environment variables, and credentials are accessible to the agent -- Less defense-in-depth compared to sandboxed execution - -### Mitigations - -- Agent actions are reviewed via pre-push checks (Code Review) and self-review (devflow review) -- Git worktrees (Worktrunk) provide project-level isolation between concurrent tasks -- Hindsight memory provides audit trail of agent actions -- Users can opt-in to sandboxing via `--sandbox` flag when working with untrusted code -- Agent Deck's Conductor mode monitors agent behavior in real-time - -## Alternatives Considered - -1. **Sandbox enabled by default, opt-out**: Rejected due to performance and complexity costs being the common case, not the exception. -2. **Lightweight sandboxing (namespaces, bubblewrap)**: Not supported by Agent Deck. Would require custom implementation. -3. **Project-level sandboxing config**: Allow per-project `.agent-deck.toml` to enable sandboxing for specific repos. Viable future enhancement but not needed now. - -## Review Trigger - -Revisit this decision if: - -- Docker on macOS achieves native filesystem performance (e.g., via VirtioFS improvements) -- Agent Deck adds lightweight sandboxing options (namespaces, not full Docker) -- Devflow is used in lower-trust environments (open-source contributions, untrusted agents) -- A security incident occurs related to unsandboxed agent access - -```` - -### Step 3: Add to devflow git - -```bash -cd ~/dev/devflow -git add docs/decisions/001-docker-sandbox-disabled.md -git commit -m "docs: ADR-001 document Docker sandbox disabled by default decision" -```` - -## Acceptance Criteria - -- [ ] `~/dev/devflow/docs/decisions/` directory exists -- [ ] `001-docker-sandbox-disabled.md` exists with complete ADR content -- [ ] ADR covers all 5 reasoning points (performance, nested Docker, memory, SSH/git, threat model) -- [ ] ADR follows standard format: Context, Decision, Rationale, Consequences, Alternatives -- [ ] ADR includes the actual config snippet (`default_enabled = false`, `mount_ssh = true`) -- [ ] ADR includes "Review Trigger" section for when to revisit -- [ ] ADR includes both positive and negative consequences -- [ ] ADR includes mitigations for the negative consequences -- [ ] File is committed to the devflow git repo - -## Technical Notes - -- ADR numbering starts at 001. Future ADRs increment: 002, 003, etc. -- The `docs/decisions/` path follows common ADR conventions (adr-tools, MADR) -- The date in the ADR should reflect when the decision was actually made, not when the ADR was written. If unknown, use the approximate date devflow was set up. -- The ADR references `config.toml` fields — if the config format changes, the ADR should be updated -- This ADR is about the **default** setting. It explicitly preserves the ability to opt-in via `--sandbox` - -## Verification - -```bash -# Verify file exists -test -f ~/dev/devflow/docs/decisions/001-docker-sandbox-disabled.md && echo "OK" || echo "MISSING" - -# Verify key sections exist -grep -c "## Context\|## Decision\|## Rationale\|## Consequences\|## Alternatives" \ - ~/dev/devflow/docs/decisions/001-docker-sandbox-disabled.md -# Should return 5 - -# Verify all 5 reasoning points -grep -c "Performance\|Nested Docker\|Memory Pressure\|SSH\|Threat Model" \ - ~/dev/devflow/docs/decisions/001-docker-sandbox-disabled.md -# Should return >= 5 - -# Verify git tracking -cd ~/dev/devflow && git ls-files docs/decisions/ -# Should list 001-docker-sandbox-disabled.md -``` diff --git a/tasks/P4/POLISH-readme-improvement.md b/tasks/P4/POLISH-readme-improvement.md index 6f52f3b..340b9db 100644 --- a/tasks/P4/POLISH-readme-improvement.md +++ b/tasks/P4/POLISH-readme-improvement.md @@ -111,15 +111,11 @@ graph TB Hindsight[Hindsight MCP<br/>3-tier persistent memory] end - subgraph "Layer 2: Session Management" - AgentDeck[Agent Deck<br/>TUI + Conductor] - end - - subgraph "Layer 3: Git Isolation" + subgraph "Layer 2: Git Isolation" Worktrunk[Worktrunk<br/>Worktree lifecycle] end - subgraph "Layer 4: Code Review" + subgraph "Layer 3: Code Review" Review[Code Review<br/>Pre-push checks] end diff --git a/tasks/P4/POLISH-yadm-tracking.md b/tasks/P4/POLISH-yadm-tracking.md deleted file mode 100644 index fc660b7..0000000 --- a/tasks/P4/POLISH-yadm-tracking.md +++ /dev/null @@ -1,241 +0,0 @@ ---- -id: POLISH-yadm-tracking -title: "YADM Tracking for All Devflow-Related Configs" -priority: P4 -category: polish -status: open -depends_on: [] -estimated_effort: M -files_to_touch: - - ~/.config/zsh/init - - ~/.config/zsh/.zshrc - - ~/.config/zsh/shell - - ~/.config/zsh/envs - - ~/.config/zsh/keybinds - - ~/.config/zsh/aliases - - ~/.config/zsh/functions - - ~/.claude/CLAUDE.md - - ~/.claude/AGENTS.md - - ~/.agent-deck/config.toml - - ~/.hindsight/profiles/main.env - - ~/.hindsight/profiles/metadata.json - - ~/.hindsight/active_profile - - ~/.config/opencode/skills/superpowers/ (all files) - - ~/.zshenv - - ~/dev/aircall/visualizations/README.md - - ~/dev/aircall/visualizations/devflow-ecosystem.md - - ~/dev/aircall/visualizations/development-workflow.md - - ~/dev/devflow/visualizations/devflow-ecosystem.md - - ~/dev/devflow/visualizations/development-workflow.md ---- - -# YADM Tracking for All Devflow-Related Configs - -## Context - -The developer uses [yadm](https://yadm.io/) for dotfile management across machines. Currently yadm tracks 0 files — none of the devflow-related configuration files are version-controlled. This means: - -- Losing the machine or reinstalling means manually recreating all agent configs, shell integrations, skill files, and Hindsight profiles -- No history of changes to critical files like `CLAUDE.md` or `AGENTS.md` -- No way to sync configs across machines - -Additionally, some devflow visualizations currently live in `~/dev/aircall/visualizations/` but logically belong in the devflow project repository. These should be moved to `~/dev/devflow/visualizations/` and symlinked back. - -## Problem Statement - -1. **No dotfile tracking**: Critical devflow configs (shell init, agent instructions, skills, Hindsight profiles) are not tracked by yadm -2. **Scattered visualizations**: Devflow architecture diagrams live in the aircall project's visualizations directory instead of the devflow repo -3. **Numbered prefixes**: Visualization files have numbered prefixes (`10-devflow-ecosystem.md`, `11-development-workflow.md`) that are vestiges of the aircall project's naming scheme — already renamed to clean names but need to be moved to proper home -4. **devflow repo gaps**: The devflow git repo may not track `devflow-plugin/`, `tasks/`, or `visualizations/` directories - -## Desired Outcome - -- All devflow-related dotfiles tracked by yadm with a clean initial commit -- Visualizations live in `~/dev/devflow/visualizations/` (canonical location) -- Symlinks from `~/dev/aircall/visualizations/devflow/` point to the canonical files -- The aircall visualizations README updated to reference new paths -- The devflow git repo tracks all generated/managed directories - -## Implementation Guide - -### Part 1: Add Files to YADM - -#### Step 1: Verify files exist - -```bash -# Check each file exists before adding -ls -la ~/.config/zsh/init -ls -la ~/.config/zsh/.zshrc -ls -la ~/.config/zsh/shell -ls -la ~/.config/zsh/envs -ls -la ~/.config/zsh/keybinds -ls -la ~/.config/zsh/aliases -ls -la ~/.config/zsh/functions -ls -la ~/.claude/CLAUDE.md -ls -la ~/.claude/AGENTS.md -ls -la ~/.agent-deck/config.toml -ls -la ~/.hindsight/profiles/main.env -ls -la ~/.hindsight/profiles/metadata.json -ls -la ~/.hindsight/active_profile -ls -la ~/.config/opencode/skills/superpowers/ -ls -la ~/.zshenv -``` - -#### Step 2: Add shell config files - -```bash -yadm add ~/.zshenv -yadm add ~/.config/zsh/init -yadm add ~/.config/zsh/.zshrc -yadm add ~/.config/zsh/shell -yadm add ~/.config/zsh/envs -yadm add ~/.config/zsh/keybinds -yadm add ~/.config/zsh/aliases -yadm add ~/.config/zsh/functions -``` - -#### Step 3: Add agent config files - -```bash -yadm add ~/.claude/CLAUDE.md -yadm add ~/.claude/AGENTS.md -yadm add ~/.agent-deck/config.toml -``` - -#### Step 4: Add Hindsight profile (NOT logs) - -```bash -yadm add ~/.hindsight/profiles/main.env -yadm add ~/.hindsight/profiles/metadata.json -yadm add ~/.hindsight/active_profile -``` - -**IMPORTANT**: Do NOT add `~/.hindsight/profiles/main.log` or any log/data files. Only configuration and profile metadata. - -#### Step 5: Add OpenCode skills - -```bash -# Add all superpowers skill files -yadm add ~/.config/opencode/skills/superpowers/ -``` - -#### Step 6: Commit to yadm - -```bash -yadm status # Review what's staged -yadm commit -m "feat: track devflow-related dotfiles - -Adds shell config, agent instructions, Hindsight profiles, -Agent Deck config, and OpenCode superpowers skills to yadm." -``` - -### Part 2: Move Visualizations to Devflow Repo - -#### Step 7: Create devflow visualizations directory - -```bash -mkdir -p ~/dev/devflow/visualizations -``` - -#### Step 8: Move files (they already have clean names) - -```bash -# Move from aircall visualizations to devflow repo -mv ~/dev/aircall/visualizations/devflow-ecosystem.md ~/dev/devflow/visualizations/devflow-ecosystem.md -mv ~/dev/aircall/visualizations/development-workflow.md ~/dev/devflow/visualizations/development-workflow.md -``` - -#### Step 9: Create symlink directory in aircall visualizations - -```bash -mkdir -p ~/dev/aircall/visualizations/devflow -``` - -#### Step 10: Create symlinks - -```bash -ln -s ~/dev/devflow/visualizations/devflow-ecosystem.md ~/dev/aircall/visualizations/devflow/devflow-ecosystem.md -ln -s ~/dev/devflow/visualizations/development-workflow.md ~/dev/aircall/visualizations/devflow/development-workflow.md -``` - -#### Step 11: Verify symlinks work - -```bash -ls -la ~/dev/aircall/visualizations/devflow/ -# Should show symlinks pointing to ~/dev/devflow/visualizations/ -cat ~/dev/aircall/visualizations/devflow/devflow-ecosystem.md | head -5 -# Should show content -``` - -#### Step 12: Update aircall visualizations README - -Edit `~/dev/aircall/visualizations/README.md` to reference the new paths: - -- Note that devflow visualizations now live in the devflow repo -- Reference `devflow/devflow-ecosystem.md` and `devflow/development-workflow.md` (symlinked) - -### Part 3: Ensure Devflow Repo Tracks All Directories - -#### Step 13: Add directories to devflow git - -```bash -cd ~/dev/devflow - -# Check what's currently tracked -git status - -# Add new directories -git add visualizations/ -git add tasks/ - -# Check if devflow-plugin exists and add it -ls devflow-plugin/ && git add devflow-plugin/ || echo "devflow-plugin/ not found, skip" - -git commit -m "feat: track visualizations, tasks, and generated plugin directories" -``` - -## Acceptance Criteria - -- [ ] `yadm list` shows all files from the list above -- [ ] `yadm status` is clean (all files committed) -- [ ] `~/.hindsight/profiles/main.log` is NOT tracked by yadm -- [ ] `~/dev/devflow/visualizations/devflow-ecosystem.md` exists (not a symlink — canonical file) -- [ ] `~/dev/devflow/visualizations/development-workflow.md` exists (not a symlink — canonical file) -- [ ] `~/dev/aircall/visualizations/devflow/devflow-ecosystem.md` is a symlink to `~/dev/devflow/visualizations/devflow-ecosystem.md` -- [ ] `~/dev/aircall/visualizations/devflow/development-workflow.md` is a symlink to `~/dev/devflow/visualizations/development-workflow.md` -- [ ] `~/dev/aircall/visualizations/README.md` references the new devflow/ subdirectory -- [ ] `~/dev/devflow/` git repo tracks `visualizations/`, `tasks/` directories -- [ ] Original visualization files no longer exist at old paths (moved, not copied) - -## Technical Notes - -- **yadm** is a wrapper around git that uses `~` as the work tree. `yadm add` / `yadm commit` work like regular git commands. -- **yadm vs git**: yadm tracks user-scoped dotfiles. The devflow project repo (regular git) tracks project files. These are separate concerns. -- **Hindsight main.env**: Contains environment variables like LLM provider config. Safe to track. `main.log` contains memory data — do NOT track. -- **OpenCode skills**: The `~/.config/opencode/skills/superpowers/` directory contains SKILL.md files and bundled resources. All should be tracked since they're hand-authored. -- **Symlink direction**: The canonical file lives in `~/dev/devflow/visualizations/`. The symlink lives in `~/dev/aircall/visualizations/devflow/`. This means edits in either location update the same file. -- **No secrets**: None of the files listed contain secrets. `main.env` has LLM provider names (like `claude-code`) but no API keys — those come from environment variables. - -## Verification - -```bash -# Verify yadm tracking -yadm list | grep -E "(zsh|claude|agent-deck|hindsight|opencode|zshenv)" | wc -l -# Should be >= 15 files - -# Verify no logs tracked -yadm list | grep "main.log" -# Should return nothing - -# Verify symlinks -file ~/dev/aircall/visualizations/devflow/devflow-ecosystem.md -# Should say "symbolic link to ..." - -# Verify devflow repo -cd ~/dev/devflow && git ls-files visualizations/ -# Should list the two visualization files - -# Verify content accessible via symlink -diff ~/dev/devflow/visualizations/devflow-ecosystem.md ~/dev/aircall/visualizations/devflow/devflow-ecosystem.md -# Should show no differences -``` diff --git a/tasks/README.md b/tasks/README.md index 24c91cd..15dcee1 100644 --- a/tasks/README.md +++ b/tasks/README.md @@ -35,21 +35,23 @@ tasks/ | [BUGS-fix-docker-compose-warnings](P0/BUGS-fix-docker-compose-warnings.md) | Fix Docker Compose Warnings | bugs | M | open | | [BUGS-fix-hindsight-startup-timeout](P0/BUGS-fix-hindsight-startup-timeout.md) | Fix Hindsight Startup Timeout | bugs | L | open | | [BUGS-fix-docker-daemon-guidance](P0/BUGS-fix-docker-daemon-guidance.md) | Fix Docker Daemon Startup Guidance | bugs | M | open | -| [ARCH-skills-mcp-sync](P0/ARCH-skills-mcp-sync.md) | Single Source of Truth Sync for Skills, MCPs, Config | arch | XL | open | | [ARCH-visualization-update-hook](P0/ARCH-visualization-update-hook.md) | Auto-Update Visualizations After Task Completion | arch | L | open | ### P1 — Architecture (system correctness) | ID | Title | Effort | Depends On | Status | |----|-------|--------|------------|--------| -| [ARCH-agent-spawning-consistency](P1/ARCH-agent-spawning-consistency.md) | Agent Spawning Consistency Through Agent-Deck | M | — | open | -| [ARCH-forgotten-items-previous-impl](P1/ARCH-forgotten-items-previous-impl.md) | Forgotten Items From Previous Implementation | L | — | open | -| [ARCH-update-actual-configs](P1/ARCH-update-actual-configs.md) | Update Actual Configs (Not Just Templates) | M | — | open | -| [ARCH-skills-registry-global-sources](P1/ARCH-skills-registry-global-sources.md) | Skills Registry to Global Sources + MCP Pool | M | update-actual-configs | open | +| [ARCH-detached-head-worktrees](P1/ARCH-detached-head-worktrees.md) | Never Lock Main Branch in Worktrees | M | — | open | | [ARCH-global-vs-per-project-configs](P1/ARCH-global-vs-per-project-configs.md) | Global vs Per-Project Configs | M | — | open | -| [ARCH-devflow-work-entry-point](P1/ARCH-devflow-work-entry-point.md) | Development Workflow Entry Point (`devflow work`) | L | agent-spawning, forgotten-items | open | -| [ARCH-session-task-completion](P1/ARCH-session-task-completion.md) | Session/Task Completion Command (`devflow done`) | L | devflow-work | open | | [ARCH-pr-creation-vcs-detection](P1/ARCH-pr-creation-vcs-detection.md) | PR Creation With Correct VCS Tool | S | — | open | +| [ARCH-stop-hook-finish-feature-removal](P1/ARCH-stop-hook-finish-feature-removal.md) | Remove finish-feature From Stop Hook | S | — | open | +| [ARCH-testing-foundation](P1/ARCH-testing-foundation.md) | Testing Foundation (bats-core + helpers) | M | — | open | +| [ARCH-testing-infrastructure](P1/ARCH-testing-infrastructure.md) | Testing Infrastructure (mocks + fixtures) | M | testing-foundation | open | +| [ARCH-testing-unit-tests](P1/ARCH-testing-unit-tests.md) | Unit Test Coverage for lib/ | L | testing-infrastructure | open | +| [ARCH-testing-integration](P1/ARCH-testing-integration.md) | Integration Tests for CLI commands | L | testing-unit-tests | open | +| [ARCH-testing-e2e](P1/ARCH-testing-e2e.md) | End-to-End Workflow Tests | L | testing-integration | open | +| [ARCH-testing-ci-pipeline](P1/ARCH-testing-ci-pipeline.md) | CI Pipeline (GitHub Actions) | M | testing-e2e | open | +| [FEAT-lsp-integration-devflow-init](P1/FEAT-lsp-integration-devflow-init.md) | LSP Integration in devflow init | M | — | open | ### P2 — Features (new capabilities) @@ -57,7 +59,6 @@ tasks/ |----|-------|--------|--------| | [FEAT-interactive-hindsight-seeding](P2/FEAT-interactive-hindsight-seeding.md) | Interactive Hindsight Seeding on `devflow up` | M | open | | [FEAT-self-learning-mechanisms](P2/FEAT-self-learning-mechanisms.md) | Self-Learning Mechanisms (Agent Memory Hooks) | L | open | -| [FEAT-context-compaction-skill](P2/FEAT-context-compaction-skill.md) | Context Compaction Skill | M | open | | [FEAT-refactor-skill](P2/FEAT-refactor-skill.md) | Refactor Skill (Multi-Agent Refactoring) | XL | open | | [FEAT-langfuse-traces-tldr](P2/FEAT-langfuse-traces-tldr.md) | Langfuse Traces TLDR Skill | M | open | | [FEAT-lazygit-lazydocker-wrappers](P2/FEAT-lazygit-lazydocker-wrappers.md) | Lazygit and Lazydocker CLI Wrappers | S | open | @@ -66,19 +67,17 @@ tasks/ | ID | Title | Effort | Status | |----|-------|--------|--------| -| [SPIKE-dynamic-mcp-selection](P3/SPIKE-dynamic-mcp-selection.md) | Dynamic MCP Selection and Lazy-Loading | L | open | | [SPIKE-specialized-agent-projects](P3/SPIKE-specialized-agent-projects.md) | Specialized Agent Projects as New Layer | M | open | | [SPIKE-kanban-board-integration](P3/SPIKE-kanban-board-integration.md) | Kanban Board Integration (Vibe-Kanban) | L | open | | [SPIKE-task-management-export](P3/SPIKE-task-management-export.md) | Task Management Export Format | M | open | | [SPIKE-telemetry-observability](P3/SPIKE-telemetry-observability.md) | Telemetry for Skill/Tool Invocation | M | open | +| [SPIKE-hooks-improvement-opportunities](P3/SPIKE-hooks-improvement-opportunities.md) | Hooks Improvement Opportunities | M | open | ### P4 — Polish | ID | Title | Effort | Status | |----|-------|--------|--------| | [POLISH-readme-improvement](P4/POLISH-readme-improvement.md) | README Improvement | M | open | -| [POLISH-yadm-tracking](P4/POLISH-yadm-tracking.md) | YADM Tracking for All Configs | M | open | -| [POLISH-docker-sandbox-adr](P4/POLISH-docker-sandbox-adr.md) | Document Docker Disabled Decision (ADR) | S | open | --- @@ -86,18 +85,29 @@ tasks/ ``` BUGS-fix-help-escape-chars ──────────┐ -BUGS-fix-docker-compose-warnings ────┤ -BUGS-fix-hindsight-startup-timeout ──┤──→ ARCH-update-actual-configs ──→ ARCH-skills-registry-global-sources -BUGS-fix-docker-daemon-guidance ─────┘ │ - ▼ -ARCH-skills-mcp-sync ◄──────────────────────────────────────────────────────────┘ -ARCH-visualization-update-hook ── standalone - -ARCH-agent-spawning-consistency ─┐ -ARCH-forgotten-items-previous ───┼──→ ARCH-devflow-work-entry-point ──→ ARCH-session-task-completion - │ -ARCH-global-vs-per-project ──────┘ -ARCH-pr-creation-vcs-detection ── standalone +BUGS-fix-docker-compose-warnings ────┤── standalone bug fixes +BUGS-fix-hindsight-startup-timeout ──┤ (compose fix unblocks hindsight) +BUGS-fix-docker-daemon-guidance ─────┘ + +ARCH-visualization-update-hook ───── standalone +ARCH-detached-head-worktrees ─────── standalone +ARCH-global-vs-per-project-configs ─ standalone +ARCH-pr-creation-vcs-detection ───── standalone +ARCH-stop-hook-finish-feature-removal standalone + +ARCH-testing-foundation + ↓ +ARCH-testing-infrastructure + ↓ +ARCH-testing-unit-tests + ↓ +ARCH-testing-integration + ↓ +ARCH-testing-e2e + ↓ +ARCH-testing-ci-pipeline + +FEAT-lsp-integration-devflow-init ── standalone FEAT-* ── all standalone, can be parallelized SPIKE-* ── all standalone research @@ -106,15 +116,16 @@ POLISH-* ── do last ## Execution Order (recommended) -1. **Batch 1 (parallel):** BUGS-fix-help-escape-chars, BUGS-fix-docker-compose-warnings, BUGS-fix-docker-daemon-guidance, ARCH-pr-creation-vcs-detection +1. **Batch 1 (parallel):** BUGS-fix-help-escape-chars, BUGS-fix-docker-compose-warnings, BUGS-fix-docker-daemon-guidance, ARCH-pr-creation-vcs-detection, ARCH-stop-hook-finish-feature-removal 2. **Batch 2:** BUGS-fix-hindsight-startup-timeout (depends on compose fix) -3. **Batch 3 (parallel):** ARCH-agent-spawning-consistency, ARCH-forgotten-items, ARCH-update-actual-configs, ARCH-global-vs-per-project, ARCH-visualization-update-hook -4. **Batch 4:** ARCH-skills-registry-global-sources (after update-actual-configs), ARCH-devflow-work-entry-point (after agent-spawning + forgotten-items) -5. **Batch 5:** ARCH-skills-mcp-sync (after skills-registry), ARCH-session-task-completion (after devflow-work) -6. **Batch 6 (parallel):** All P2 features -7. **Batch 7 (parallel):** All P3 spikes -8. **Batch 8:** P4 polish +3. **Batch 3 (parallel):** ARCH-visualization-update-hook, ARCH-detached-head-worktrees, ARCH-global-vs-per-project-configs, ARCH-testing-foundation +4. **Batch 4 (testing chain):** testing-infrastructure → testing-unit-tests → testing-integration → testing-e2e → testing-ci-pipeline (each depends on previous) +5. **Batch 5 (parallel):** All P2 features + FEAT-lsp-integration-devflow-init +6. **Batch 6 (parallel):** All P3 spikes +7. **Batch 7:** P4 polish --- -_Total: 28 tickets | 6 P0 | 8 P1 | 6 P2 | 5 P3 | 3 P4_ +_Total: 23 tickets | 5 P0 | 11 P1 | 5 P2 | 5 P3 | 1 P4_ + +_Last revised: 2026-05-28 (deprecation cleanup — 10 stale agent-deck/superpowers-era tasks removed)_ diff --git a/visualizations/README.md b/visualizations/README.md index ecc2c5f..db95859 100644 --- a/visualizations/README.md +++ b/visualizations/README.md @@ -13,14 +13,12 @@ tags: [devflow, visualizations, index, mermaid, style-guide] | Category | File | What it shows | |----------|------|---------------| -| **Architecture** | [[devflow-ecosystem]] | The 6-layer tool ecosystem — Hindsight, Agent Deck, Worktrunk, Code Review, Skills, Langfuse | +| **Architecture** | [[devflow-ecosystem]] | The 5-layer tool ecosystem — Hindsight, Worktrunk, Code Review, Skills, Langfuse | | **Architecture** | [[code-review-architecture]] | Code review dispatch, check rules pipeline, devflow review dual-mode | | **Architecture** | runtime-architecture _(future)_ | Docker containers, Homebrew CLIs, config file locations | -| **Architecture** | sync-architecture _(future)_ | Skills/MCP sync flow across 7 targets | -| **Workflows** | [[development-workflow]] | Full SDD workflow: idea to merge request, TDD loop, review gates | -| **Workflows** | devflow-work-flow _(future)_ | The `devflow work` command flow | -| **Workflows** | session-lifecycle _(future)_ | Session create → conductor → done → cleanup | -| **Integrations** | agent-deck-integration _(future)_ | How agent-deck connects to everything | +| **Architecture** | sync-architecture _(future)_ | Skills/MCP sync flow across delivery targets | +| **Workflows** | [[development-workflow]] | Full SDD workflow: idea to merge request, TDD loop, review gates, phase-handoff spawns | +| **Workflows** | session-lifecycle _(future)_ | Session create → phase-handoff spawn → done → cleanup | | **Integrations** | hindsight-data-flow _(future)_ | Memory recall/retain/reflect patterns | | **Integrations** | langfuse-trace-flow _(future)_ | What gets traced and where | | **Decisions** | _(created as needed)_ | Visual ADRs when diagrams help explain decisions | @@ -57,12 +55,11 @@ Consistent across all diagrams — each devflow component has a fixed color: | Component | Color | Hex | classDef name | |-----------|-------|-----|---------------| | Hindsight | Purple | `#7c3aed` | `hindsightStyle` | -| Agent Deck | Blue | `#3b82f6` | `agentDeckStyle` | | Worktrunk | Green | `#059669` | `worktrunkStyle` | | Code Review | Amber | `#d97706` | `reviewStyle` | | Skills/Marketplace | Pink | `#be185d` | `skillsStyle` | | Langfuse | Cyan | `#0891b2` | `langfuseStyle` | -| Conductor | Amber variant | `#f59e0b` | `conductorStyle` | +| Phase-handoff / spawn | Amber variant | `#f59e0b` | `handoffStyle` | | CLI/Terminal | Gray | `#374151` | `cliStyle` | | Decision nodes | Dark gray | `#374151` | `decisionStyle` | | Terminal nodes | Medium gray | `#6b7280` | `terminalStyle` | @@ -73,12 +70,11 @@ Copy this block into every diagram and apply the relevant classes: ```mermaid classDef hindsightStyle fill:#7c3aed,color:#fff,stroke:#5b21b6 -classDef agentDeckStyle fill:#3b82f6,color:#fff,stroke:#1e40af classDef worktrunkStyle fill:#059669,color:#fff,stroke:#047857 classDef reviewStyle fill:#d97706,color:#fff,stroke:#b45309 classDef skillsStyle fill:#be185d,color:#fff,stroke:#9d174d classDef langfuseStyle fill:#0891b2,color:#fff,stroke:#0e7490 -classDef conductorStyle fill:#f59e0b,color:#fff,stroke:#d97706 +classDef handoffStyle fill:#f59e0b,color:#fff,stroke:#d97706 classDef cliStyle fill:#374151,color:#fff,stroke:#1f2937 classDef decisionStyle fill:#374151,color:#fff,stroke:#1f2937 classDef terminalStyle fill:#6b7280,color:#fff,stroke:#4b5563 @@ -131,4 +127,4 @@ The command will: --- -_Last updated: 2026-03-10_ +_Last updated: 2026-05-28_ diff --git a/visualizations/architecture/devflow-ecosystem.md b/visualizations/architecture/devflow-ecosystem.md index d5d8f47..f8507e8 100644 --- a/visualizations/architecture/devflow-ecosystem.md +++ b/visualizations/architecture/devflow-ecosystem.md @@ -5,17 +5,16 @@ tags: tooling, ai-development, hindsight, - agent-deck, worktrunk, code-review, langfuse, skills, - conductor, + phase-handoff, ] related: ["[[development-workflow]]"] --- -# Devflow Ecosystem — The 6-Layer AI Dev Environment +# Devflow Ecosystem — The 5-Layer AI Dev Environment > Local-first AI development orchestrator. Each layer is an independent tool; devflow composes them. > Related: [[development-workflow]] @@ -33,19 +32,13 @@ graph TD HS["Hindsight<br/>(MCP server)<br/>:8888 API · :9999 UI"] end - subgraph L2 [" Layer 2 — Sessions "] - AD["Agent Deck<br/>(TUI wrapper)<br/>MCP pooling · Groups"] - COND["Conductor<br/>(persistent session)<br/>auto-respond · escalate"] - WEB["Web UI<br/>(agent-deck web)<br/>:8420 dashboard"] - end - - subgraph L3 [" Layer 3 — Isolation "] + subgraph L2 [" Layer 2 — Isolation "] WT["Worktrunk<br/>(git worktrees)<br/>wt step · wt hop · wt park"] end - subgraph L4 [" Layer 4 — Code Review "] + subgraph L3 [" Layer 3 — Code Review "] CR["Code Review<br/>(devflow check)<br/>.devflow/checks/*.md"] - RV["Self / PR Review<br/>(devflow review)<br/>local diff or PR/MR URL"] + RV["Self / PR / MR Review<br/>(devflow review)<br/>local diff or PR/MR URL"] CR_CLAUDE["Claude Code CLI<br/>(claude --print)<br/>structured JSON output"] CR_OPENCODE["OpenCode CLI<br/>(opencode run)<br/>text output fallback"] CR -->|"primary"| CR_CLAUDE @@ -53,48 +46,42 @@ graph TD RV -->|"always"| CR_CLAUDE end - subgraph L5 [" Layer 5 — Process Discipline "] - SK["CLAUDE.md + Skills<br/>(11 skills · 6 categories)<br/>Slash commands"] + subgraph L4 [" Layer 4 — Process Discipline "] + SK["CLAUDE.md + Skills<br/>(/devflow:* wrappers ·<br/>phase-handoff via spawn_task)"] HK["Hooks<br/>(lib/hooks/)<br/>Stop · PostToolUse · UserPromptSubmit"] + PH["Phase Handoff<br/>(/devflow:phase-handoff)<br/>spawn_task → new session"] end - subgraph L6 [" Layer 6 — Observability "] + subgraph L5 [" Layer 5 — Observability "] LF["Langfuse<br/>(self-hosted tracing)<br/>:3100 UI · Postgres"] end CLI -->|"up / down"| HS CLI -->|"up / down"| LF - CLI -->|"worktree --agent"| WT + CLI -->|"worktree <ticket>"| WT CLI -->|"check"| CR CLI -->|"review [url]"| RV - CLI -->|"skills install/remove"| SK + CLI -->|"skills install/remove/convert"| SK CLI -->|"init (registers)"| HK HK -->|"guards &<br/>nudges"| SK + SK --> PH CLI -->|"seed"| HS - CLI -->|"init"| AD - CLI -->|"conductor"| COND - CLI -->|"web"| WEB - AD --> COND - AD --> WEB classDef hindsightStyle fill:#7c3aed,color:#fff,stroke:#5b21b6 - classDef agentDeckStyle fill:#3b82f6,color:#fff,stroke:#1e40af classDef worktrunkStyle fill:#059669,color:#fff,stroke:#047857 classDef reviewStyle fill:#d97706,color:#fff,stroke:#b45309 classDef skillsStyle fill:#be185d,color:#fff,stroke:#9d174d classDef langfuseStyle fill:#0891b2,color:#fff,stroke:#0e7490 classDef cliStyle fill:#374151,color:#fff,stroke:#1f2937 - classDef conductorStyle fill:#f59e0b,color:#fff,stroke:#d97706 + classDef handoffStyle fill:#f59e0b,color:#fff,stroke:#d97706 class HS hindsightStyle - class AD agentDeckStyle class WT worktrunkStyle class CR,RV,CR_CLAUDE,CR_OPENCODE reviewStyle class SK,HK skillsStyle + class PH handoffStyle class LF langfuseStyle class CLI cliStyle - class COND conductorStyle - class WEB agentDeckStyle ``` --- @@ -104,52 +91,47 @@ graph TD ```mermaid %%{init: {'flowchart': {'rankSpacing': 50, 'nodeSpacing': 30, 'diagramPadding': 15}}}%% graph LR - AD["Agent Deck<br/>(Sessions)"] - COND["Conductor<br/>(Monitor + Auto-respond)"] HS["Hindsight<br/>(Memory)"] WT["Worktrunk<br/>(Worktrees)"] CR["Code Review<br/>(devflow check)"] - SK["Skills<br/>(Process)"] + SK["Skills<br/>(/devflow:* wrappers)"] LF["Langfuse<br/>(Traces)"] HK["Hooks<br/>(Process Guards)"] + PH["Phase-handoff<br/>(spawn_task)"] AGENT["AI Agent<br/>(Claude Code /<br/>OpenCode)"] - HUMAN["Developer<br/>(Escalation target)"] - - WT -->|"launches agent<br/>in worktree"| AD - AD -->|"pools MCP<br/>sockets"| HS - AD -->|"wraps session<br/>for"| AGENT - AD -->|"groups sessions<br/>by project/type"| AGENT - COND -->|"monitors all<br/>agent sessions"| AD - COND -->|"auto-responds to<br/>routine prompts"| AGENT - COND -->|"escalates to human<br/>via notifications"| HUMAN + HUMAN["Developer<br/>(at terminal /<br/>in Claude Desktop)"] + + WT -->|"isolated workspace<br/>per feature"| AGENT AGENT -->|"recall / retain /<br/>reflect via MCP"| HS AGENT -->|"follows process<br/>from"| SK SK -->|"orchestrates<br/>across layers"| HS SK -->|"triggers<br/>devflow check"| CR SK -->|"creates / cleans<br/>worktrees"| WT SK -->|"logs session<br/>summary"| LF + SK --> PH + PH -->|"spawns next-phase<br/>session in sidebar"| AGENT + PH -->|"writes frozen-state<br/>file as handoff"| WT HK -->|"blocks stop on<br/>unfinished features"| AGENT HK -->|"nudges continuation<br/>after PR creation"| AGENT LF -.->|"collects traces<br/>from agent"| AGENT + HUMAN -->|"invokes /devflow:* slash"| AGENT classDef hindsightStyle fill:#7c3aed,color:#fff,stroke:#5b21b6 - classDef agentDeckStyle fill:#3b82f6,color:#fff,stroke:#1e40af classDef worktrunkStyle fill:#059669,color:#fff,stroke:#047857 classDef reviewStyle fill:#d97706,color:#fff,stroke:#b45309 classDef skillsStyle fill:#be185d,color:#fff,stroke:#9d174d classDef langfuseStyle fill:#0891b2,color:#fff,stroke:#0e7490 classDef agentStyle fill:#374151,color:#fff,stroke:#1f2937 - classDef conductorStyle fill:#f59e0b,color:#fff,stroke:#d97706 + classDef handoffStyle fill:#f59e0b,color:#fff,stroke:#d97706 classDef humanStyle fill:#6b7280,color:#fff,stroke:#4b5563 class HS hindsightStyle - class AD agentDeckStyle class WT worktrunkStyle class CR reviewStyle class SK,HK skillsStyle class LF langfuseStyle class AGENT agentStyle - class COND conductorStyle + class PH handoffStyle class HUMAN humanStyle ``` @@ -157,21 +139,28 @@ graph LR ## 3. Skill-to-Layer Mapping -Each skill is a slash command that orchestrates across multiple layers: - -| Skill | Layer | Touches | What it does | -| ------------------------ | ----- | ------- | ----------------------------------------------------- | -| `/memory-recall` | 1 | L1 | Recall memories before starting a task | -| `/retain-learning` | 1 | L1 | Store a discovery into Hindsight | -| `/reflect-session` | 1 | L1 | End-of-session reflection and memory consolidation | -| `/new-feature` | 1 | L1 | POST-LAUNCH setup guide for new feature workspace | -| `/finish-feature` | 4 | L4 + L1 + L5 | devflow check + PR creation + viz check + retain learnings | -| `/pre-push-check` | 4 | L4 + L5 | devflow check + CLAUDE.md compliance self-review | -| `/create-pr` | 4 | L4 + L1 | Self-review + devflow check + gh pr create | -| `/spec-feature` | 5 | L1 + L5 | Architecture recall + spec doc + task breakdown | -| `/architecture-decision` | 5 | L1 + L5 | ADR + Hindsight retention + CLAUDE.md update | -| `/pr-strategy` | 5 | L1 + L5 | View or reset PR description strategy preference | -| `/session-summary` | 6 | L6 + L1 | Metrics, quality scores, Langfuse trace logging | +Each skill is a `/devflow:*` slash command that orchestrates across multiple layers. Wrappers (`brainstorming`, `writing-plans`, `executing-plans`) delegate to upstream skills internally — devflow callers never reach past the wrappers. + +| Skill | Layer | Touches | What it does | +| --------------------------- | ----- | ------------------- | --------------------------------------------------------------------------------------- | +| `/devflow:recall-before-task` | 1 | L1 | Recall memories before starting a task | +| `/devflow:retain-learning` | 1 | L1 | Store a discovery into Hindsight | +| `/devflow:reflect-session` | 1 | L1 | End-of-session reflection and memory consolidation | +| `/devflow:new-feature` | 1 | L1 | POST-LAUNCH setup guide for new feature workspace + invokes /devflow:brainstorming | +| `/devflow:brainstorming` | 4 | L4 | Wrapper around upstream brainstorming; overrides terminal handoff to /devflow:spec-feature | +| `/devflow:spec-feature` | 4 | L1 + L4 | Architecture recall + spec doc + task breakdown + phase-handoff to plan | +| `/devflow:writing-plans` | 4 | L4 | Wrapper around upstream writing-plans; phase-handoff to lock-tests at end | +| `/devflow:lock-tests` | 4 | L4 | Batch failing test inventory + user-approval gate + phase-handoff to impl | +| `/devflow:executing-plans` | 4 | L4 | Wrapper around upstream executing-plans; forces terminal handoff to /devflow:finish-feature | +| `/devflow:phase-handoff` | 4 | L4 + L2 | Writes frozen-state file + spawn_task → new session with [TICKET] [MR#N] Phase title | +| `/devflow:finish-feature` | 3 | L3 + L1 + L4 | devflow check + PR/MR creation + viz check + retain learnings | +| `/devflow:pre-push-check` | 3 | L3 + L4 | devflow check + CLAUDE.md compliance self-review | +| `/devflow:create-pr` | 3 | L3 + L1 | Self-review + devflow check + gh/glab pr create | +| `/devflow:review` | 3 | L3 | Multi-perspective code review on local diff or PR/MR URL | +| `/devflow:review-document` | 3 | L3 | Prose-doc review (KB/RFC/spike/runbook/PRD) on Google Docs, Confluence, local files, URLs | +| `/devflow:architecture-decision` | 4 | L1 + L4 | ADR + Hindsight retention + CLAUDE.md update | +| `/devflow:session-summary` | 5 | L5 + L1 | Metrics, quality scores, Langfuse trace logging | +| `/devflow:update-visualizations` | 4 | L4 | Analyze diff + update affected diagrams | --- @@ -188,15 +177,12 @@ graph TD end subgraph CLI_Tools ["Homebrew CLIs"] - AD_C["agent-deck<br/>(brew install)"] - COND_C["Conductor<br/>(persistent session in Agent Deck)<br/>monitors · auto-responds · escalates"] - WEB_C["Web UI<br/>(agent-deck web)<br/>:8420 dashboard"] WT_C["worktrunk / wt<br/>(brew install)"] - AD_C --> COND_C - AD_C --> WEB_C + GH_C["gh<br/>(GitHub CLI)"] + GLAB_C["glab<br/>(GitLab CLI)"] end - subgraph Review ["Code Review (devflow check)"] + subgraph Review ["Code Review (devflow check + devflow review)"] CR_C["devflow check<br/>(CLI dispatch)"] CR_CL["claude --print<br/>(primary · structured JSON)"] CR_OC["opencode run<br/>(fallback · text output)"] @@ -206,45 +192,38 @@ graph TD subgraph Config ["Config Files"] CLAUDE["~/.claude/CLAUDE.md<br/>(user-scoped agent config)"] - AGENTS["~/.claude/AGENTS.md<br/>(multi-agent coordination)"] - TRUST["~/.claude.json<br/>(trust config)"] + AGENTS["~/.claude/AGENTS.md<br/>(symlink → CLAUDE.md)"] CHECKS[".devflow/checks/*.md<br/>(per-project review rules)"] TOML[".worktrunk.toml<br/>(per-project worktree config)"] - SKILLS["~/.claude/commands/*<br/>(installed skills)"] + SKILLS["~/.claude/commands/devflow/<br/>(symlinked plugin)"] HOOKS["lib/hooks/*.sh<br/>(Stop · PostToolUse ·<br/>UserPromptSubmit)"] SETTINGS["~/.claude/settings.json<br/>(hooks registration)"] + STATE[".devflow/state/<branch>/<br/>(frozen-state files, gitignored)"] end - AD_C -->|"MCP connection"| HS_C - COND_C -->|"monitors sessions via"| AD_C CR_C -->|"reads rules from"| CHECKS WT_C -->|"reads config from"| TOML - AD_C -->|"reads profiles"| CLAUDE - AD_C -->|"reads trust"| TRUST - SKILLS -->|"orchestrate"| AD_C SKILLS -->|"orchestrate"| WT_C SKILLS -->|"orchestrate"| CR_C SKILLS -->|"orchestrate"| HS_C SKILLS -->|"orchestrate"| LF_WEB + SKILLS -->|"write/read"| STATE + SKILLS -->|"PR/MR via"| GH_C + SKILLS -->|"PR/MR via"| GLAB_C HOOKS -->|"registered in"| SETTINGS classDef hindsightStyle fill:#7c3aed,color:#fff,stroke:#5b21b6 - classDef agentDeckStyle fill:#3b82f6,color:#fff,stroke:#1e40af classDef worktrunkStyle fill:#059669,color:#fff,stroke:#047857 classDef reviewStyle fill:#d97706,color:#fff,stroke:#b45309 classDef skillsStyle fill:#be185d,color:#fff,stroke:#9d174d classDef langfuseStyle fill:#0891b2,color:#fff,stroke:#0e7490 classDef configStyle fill:#6b7280,color:#fff,stroke:#4b5563 - classDef conductorStyle fill:#f59e0b,color:#fff,stroke:#d97706 class HS_C hindsightStyle - class AD_C agentDeckStyle - class COND_C conductorStyle - class WEB_C agentDeckStyle - class WT_C,TOML worktrunkStyle + class WT_C,TOML,GH_C,GLAB_C worktrunkStyle class CR_C,CR_CL,CR_OC,CHECKS reviewStyle class LF_DB,LF_WEB langfuseStyle - class CLAUDE,AGENTS,TRUST,SKILLS,HOOKS,SETTINGS skillsStyle + class CLAUDE,AGENTS,SKILLS,HOOKS,SETTINGS,STATE skillsStyle ``` --- @@ -253,24 +232,23 @@ graph TD | Command | What it orchestrates | Layers | | ----------------------------------- | ------------------------------------------------------------------------------------------------- | ------ | -| `devflow init [dir]` | Full setup: install 6 tools, configure CLAUDE.md, AGENTS.md, project config, MCP, plugins, skills | All 6 | -| `devflow up` | Start Docker services (Hindsight + Langfuse) | L1, L6 | -| `devflow down` | Stop Docker services | L1, L6 | -| `devflow restart` | Restart Docker services | L1, L6 | -| `devflow status` | Health check across all 6 layers | All 6 | +| `devflow init [dir]` | Full setup: install 5 tools, configure CLAUDE.md, AGENTS.md, project config, MCP, plugins, skills | All 5 | +| `devflow up` | Start Docker services (Hindsight + Langfuse) | L1, L5 | +| `devflow down` | Stop Docker services | L1, L5 | +| `devflow restart` | Restart Docker services | L1, L5 | +| `devflow status` | Health check across all 5 layers | All 5 | | `devflow seed [dir]` | Seed Hindsight memory from project files | L1 | -| `devflow worktree <name> [--agent]` | Create worktree + copy deps + optionally launch agent | L2, L3 | -| `devflow check` | Run code review against .devflow/checks/ (Claude Code primary, OpenCode fallback) | L4 | -| `devflow review` | Review local diff against CLAUDE.md conventions via Claude Code | L4, L5 | -| `devflow review <pr-url>` | Fetch PR/MR diff (gh/glab) and review via Claude Code | L4 | -| `/devflow:review-document <src>` | Multi-perspective prose-doc review (KB/RFC/spike/runbook/PRD) on Google Docs, Confluence, local files, or URLs. Soft dep: defuddle CLI for clean web fetches. | L4 | -| `devflow web` | Open agent-deck web dashboard (:8420) | L2 | -| `devflow conductor` | Manage conductors (start, stop, status) | L2 | -| `devflow skills list` | List all 10 skills from registry with install status | L5 | -| `devflow skills install <name>` | Copy skill to .claude/commands/ | L5 | -| `devflow skills remove <name>` | Delete skill from project | L5 | -| `devflow skills convert` | Convert skills to Claude Code plugin format | L5 | -| `devflow watch [setup\|remove]` | Sensitive file watchdog — cron + post-merge hook for staleness detection | L5 | -| `devflow check-version` | Verify version consistency across Makefile, utils.sh, plugin.json, command badges | L5 | -| `devflow version-bump <version>` | Bump version in all version-bearing files | L5 | -| `devflow release` | Preview next release (conventional commit analysis, dry-run) | L5 | +| `devflow worktree <ticket>` | Create worktree + branch-name enforcement + copy gitignored deps | L2 | +| `devflow done <branch>` | Cleanup worktree after PR/MR merge | L2 | +| `devflow check` | Run code review against .devflow/checks/ (Claude Code primary, OpenCode fallback) | L3 | +| `devflow review` | Review local diff against CLAUDE.md conventions via Claude Code | L3, L4 | +| `devflow review <pr-url>` | Fetch PR/MR diff (gh/glab) and review via Claude Code | L3 | +| `/devflow:review-document <src>` | Multi-perspective prose-doc review (KB/RFC/spike/runbook/PRD) | L3 | +| `devflow skills list` | List all skills from registry with install status | L4 | +| `devflow skills install <name>` | Copy skill to .claude/commands/ | L4 | +| `devflow skills remove <name>` | Delete skill from project | L4 | +| `devflow skills convert` | Convert skills to Claude Code plugin format (regenerate 3-tier mirrors) | L4 | +| `devflow watch [setup\|remove]` | Sensitive file watchdog — cron + post-merge hook for staleness detection | L4 | +| `devflow check-version` | Verify version consistency across Makefile, utils.sh, plugin.json, command badges | L4 | +| `devflow version-bump <version>` | Bump version in all version-bearing files | L4 | +| `devflow release` | Preview next release (conventional commit analysis, dry-run) | L4 | diff --git a/visualizations/workflows/development-workflow.md b/visualizations/workflows/development-workflow.md index 48af6ac..e37aaa3 100644 --- a/visualizations/workflows/development-workflow.md +++ b/visualizations/workflows/development-workflow.md @@ -9,14 +9,14 @@ tags: planning, code-review, merge-request, - conductor, + phase-handoff, ] related: ["[[devflow-ecosystem]]"] --- # Development Workflow — From Idea to Merge Request -> The full SDD (Subagent-Driven Development) workflow using devflow's 6-layer toolchain. +> The full SDD (Spec-Driven Development) workflow using devflow's 5-layer toolchain and the `/devflow:*` wrapper convention. > Related: [[devflow-ecosystem]] --- @@ -27,85 +27,69 @@ related: ["[[devflow-ecosystem]]"] %%{init: {'flowchart': {'rankSpacing': 50, 'nodeSpacing': 30, 'diagramPadding': 15}}}%% graph TD START(["Feature request /<br/>Bug report"]) - WORKTREE["Developer at terminal:<br/>devflow worktree feat/X --agent claude<br/>(CLI creates worktree + launches session)"] + WORKTREE["Developer at terminal:<br/>devflow worktree TICKET-N<br/>(CLI creates worktree + opens session)"] SESSION(["Agent session starts<br/>in worktree"]) - RECALL["Recall from Hindsight<br/>recall('project: topic')"] - BRAIN["Brainstorming<br/>(superpowers skill)"] - PLAN["Writing Plans<br/>(superpowers skill)"] - CHOOSE{"Execution<br/>approach?"} - SDD["Subagent-Driven Dev<br/>(same session)"] - EXEC["Executing Plans<br/>(parallel session)"] - LOOP["TDD Implementation Loop<br/>(per task)"] - VERIFY["Verification<br/>(superpowers skill)"] - CHECK["Pre-Push Check<br/>devflow check + self-review"] - COMMIT["Commit + Push"] - MR["Create Merge Request<br/>gh pr create"] - RETAIN["Retain learnings<br/>retain('project: discovery')"] - CLEANUP["Worktree Cleanup<br/>(agent-offered: delete or keep)"] + NEWFEAT["/devflow:new-feature<br/>(recall, scope-check, walkthrough?)"] + BRAIN["/devflow:brainstorming<br/>(wraps upstream brainstorming)"] + SPEC["/devflow:spec-feature<br/>(write spec doc + tasks)"] + HANDOFF1["/devflow:phase-handoff<br/>--phase spec --next-phase plan<br/>(spawn new session)"] + PLAN["/devflow:writing-plans<br/>(spawned session — wraps upstream)"] + HANDOFF2["/devflow:phase-handoff<br/>--phase plan --next-phase lock-tests"] + LOCK["/devflow:lock-tests<br/>(spawned session — batch failing tests + gate)"] + HANDOFF3["/devflow:phase-handoff<br/>--phase lock-tests --next-phase impl"] + EXEC["/devflow:executing-plans<br/>(spawned session — wraps upstream + forces finish)"] + LOOP["TDD Implementation Loop<br/>(per task — RED, GREEN, REFACTOR)"] + FINISH["/devflow:finish-feature<br/>(verify + PR + retain + cleanup)"] DONE(["Done"]) - COND["Conductor<br/>(parallel process)<br/>monitors all sessions"] - START --> WORKTREE WORKTREE --> SESSION - SESSION --> RECALL - RECALL --> BRAIN - BRAIN --> PLAN - PLAN --> CHOOSE - CHOOSE -->|"Same session"| SDD - CHOOSE -->|"Separate session"| EXEC - SDD --> LOOP + SESSION --> NEWFEAT + NEWFEAT --> BRAIN + BRAIN --> SPEC + SPEC --> HANDOFF1 + HANDOFF1 --> PLAN + PLAN --> HANDOFF2 + HANDOFF2 --> LOCK + LOCK --> HANDOFF3 + HANDOFF3 --> EXEC EXEC --> LOOP - LOOP --> VERIFY - VERIFY --> CHECK - CHECK --> COMMIT - COMMIT --> MR - MR --> RETAIN - RETAIN --> CLEANUP - CLEANUP --> DONE - - COND -.->|"monitors"| SESSION - COND -.->|"auto-responds<br/>routine prompts"| LOOP - COND -.->|"escalates to human<br/>during brainstorming"| BRAIN - COND -.->|"escalates on<br/>repeated failures"| VERIFY + LOOP --> FINISH + FINISH --> DONE classDef hindsightStyle fill:#7c3aed,color:#fff,stroke:#5b21b6 classDef worktrunkStyle fill:#059669,color:#fff,stroke:#047857 classDef skillsStyle fill:#be185d,color:#fff,stroke:#9d174d classDef reviewStyle fill:#d97706,color:#fff,stroke:#b45309 + classDef handoffStyle fill:#f59e0b,color:#fff,stroke:#d97706 classDef decisionStyle fill:#374151,color:#fff,stroke:#1f2937 classDef terminalStyle fill:#6b7280,color:#fff,stroke:#4b5563 - classDef conductorStyle fill:#f59e0b,color:#fff,stroke:#d97706 - classDef agentDeckStyle fill:#3b82f6,color:#fff,stroke:#1e40af - - class RECALL,RETAIN hindsightStyle - class WORKTREE,CLEANUP worktrunkStyle - class BRAIN,PLAN,SDD,EXEC,LOOP,VERIFY skillsStyle - class CHECK reviewStyle - class MR,COMMIT reviewStyle - class CHOOSE decisionStyle - class START,DONE terminalStyle - class SESSION agentDeckStyle - class COND conductorStyle + + class WORKTREE worktrunkStyle + class NEWFEAT,BRAIN,SPEC,PLAN,LOCK,EXEC,LOOP,FINISH skillsStyle + class HANDOFF1,HANDOFF2,HANDOFF3 handoffStyle + class START,SESSION,DONE terminalStyle ``` +> **Phase-handoff spawns a new Claude Desktop session** (`mcp__ccd_session__spawn_task`) with a deterministic title `[<TICKET>] [MR#<N>] <Phase>`. The new session is cold — its only context is the prompt the handoff hands it (leading with the next-phase slash command + absolute artefact paths). The prior session stays open as an archive. + --- ## 2. Phase 1 — Brainstorming -> **Conductor note:** The Conductor escalates to the human during brainstorming — this phase is interactive and should not be auto-responded. +> `/devflow:brainstorming` is devflow's wrapper around the upstream brainstorming skill. It pass-through-delegates to the upstream HARD-GATE design loop, then OVERRIDES the upstream's terminal handoff so it leads to `/devflow:spec-feature` (devflow inserts spec-feature between brainstorming and writing-plans). ```mermaid %%{init: {'flowchart': {'rankSpacing': 50, 'nodeSpacing': 30, 'diagramPadding': 15}}}%% graph TD - B_START(["Brainstorming skill invoked"]) + B_START(["/devflow:brainstorming invoked"]) B_CTX["Explore project context<br/>(files, docs, recent commits)"] B_Q["Ask clarifying questions<br/>(one at a time, prefer multiple choice)"] B_APPROACH["Propose 2-3 approaches<br/>(with trade-offs + recommendation)"] B_DESIGN["Present design<br/>(section by section)"] B_OK{"User approves<br/>design?"} B_DOC["Write design doc<br/>docs/plans/YYYY-MM-DD-topic-design.md"] - B_NEXT(["Invoke writing-plans skill"]) + B_NEXT(["Wrapper directs user to /devflow:spec-feature<br/>(NOT /writing-plans — devflow inserts spec-feature)"]) B_START --> B_CTX B_CTX --> B_Q @@ -129,71 +113,105 @@ graph TD ## 3. Phase 2 — Writing Plans +> `/devflow:writing-plans` is devflow's wrapper around the upstream writing-plans skill. It runs in a fresh spawned session after `phase-handoff` from spec-feature. Its only context is the frozen-state file + spec absolute path handed by the handoff prompt. + ```mermaid %%{init: {'flowchart': {'rankSpacing': 50, 'nodeSpacing': 30, 'diagramPadding': 15}}}%% graph TD - P_START(["Writing-plans skill invoked"]) + P_START(["/devflow:writing-plans invoked<br/>(fresh spawned session)"]) + P_READ["Read frozen-state file<br/>+ spec absolute path"] P_BREAK["Break design into<br/>bite-sized tasks (2-5 min each)"] P_STRUCT["Structure each task:<br/>files, test, implementation, command"] P_TDD["Embed TDD steps per task:<br/>1. Write failing test<br/>2. Verify fail<br/>3. Implement<br/>4. Verify pass<br/>5. Commit"] P_SAVE["Save plan to<br/>docs/plans/YYYY-MM-DD-feature.md"] - P_CHOOSE{"Execution<br/>approach?"} - P_SDD(["Subagent-Driven Dev<br/>(same session)"]) - P_EXEC(["Executing Plans<br/>(parallel session)"]) + P_HANDOFF(["/devflow:phase-handoff<br/>--phase plan --next-phase lock-tests"]) - P_START --> P_BREAK + P_START --> P_READ + P_READ --> P_BREAK P_BREAK --> P_STRUCT P_STRUCT --> P_TDD P_TDD --> P_SAVE - P_SAVE --> P_CHOOSE - P_CHOOSE -->|"Subagent-Driven"| P_SDD - P_CHOOSE -->|"Parallel Session"| P_EXEC + P_SAVE --> P_HANDOFF + + classDef skillsStyle fill:#be185d,color:#fff,stroke:#9d174d + classDef handoffStyle fill:#f59e0b,color:#fff,stroke:#d97706 + classDef terminalStyle fill:#6b7280,color:#fff,stroke:#4b5563 + + class P_READ,P_BREAK,P_STRUCT,P_TDD,P_SAVE skillsStyle + class P_HANDOFF handoffStyle + class P_START terminalStyle +``` + +--- + +## 4. Phase 3 — Lock Tests (Batch Failing Test Inventory) + +> `/devflow:lock-tests` reads spec + plan + AC, writes ALL failing tests up-front (one per AC + judgment-driven edge cases), emits a Test Inventory document with a `## Considered but not added` section, then gates with `AskUserQuestion` before any implementation begins. + +```mermaid +%%{init: {'flowchart': {'rankSpacing': 50, 'nodeSpacing': 30, 'diagramPadding': 15}}}%% +graph TD + L_START(["/devflow:lock-tests invoked<br/>(fresh spawned session)"]) + L_READ["Read frozen-state + spec + plan + AC"] + L_TRIVIAL{"Trivial change?<br/>(escape hatch)"} + L_BATCH["Write ALL failing tests<br/>(one per AC + judgment edge cases)"] + L_VERIFY["Run tests — confirm each fails<br/>for the RIGHT reason"] + L_INV["Emit Test Inventory doc<br/>(AC→test map + 'Considered but not added')"] + L_GATE{"User approves<br/>inventory?"} + L_HANDOFF(["/devflow:phase-handoff<br/>--phase lock-tests --next-phase impl"]) + + L_START --> L_READ + L_READ --> L_TRIVIAL + L_TRIVIAL -->|"Skip gate"| L_HANDOFF + L_TRIVIAL -->|"No"| L_BATCH + L_BATCH --> L_VERIFY + L_VERIFY --> L_INV + L_INV --> L_GATE + L_GATE -->|"Add more"| L_BATCH + L_GATE -->|"Approve"| L_HANDOFF classDef skillsStyle fill:#be185d,color:#fff,stroke:#9d174d classDef decisionStyle fill:#374151,color:#fff,stroke:#1f2937 + classDef handoffStyle fill:#f59e0b,color:#fff,stroke:#d97706 classDef terminalStyle fill:#6b7280,color:#fff,stroke:#4b5563 - class P_BREAK,P_STRUCT,P_TDD,P_SAVE skillsStyle - class P_CHOOSE decisionStyle - class P_START,P_SDD,P_EXEC terminalStyle + class L_READ,L_BATCH,L_VERIFY,L_INV skillsStyle + class L_TRIVIAL,L_GATE decisionStyle + class L_HANDOFF handoffStyle + class L_START terminalStyle ``` --- -## 4. Phase 3 — TDD Implementation Loop (Per Task) +## 5. Phase 4 — TDD Implementation Loop (Per Task) -> **Conductor note:** The Conductor can auto-respond to routine prompts during this phase (e.g., confirming test runs, approving standard refactors). It escalates to the human when tests fail repeatedly. +> `/devflow:executing-plans` is devflow's wrapper around the upstream executing-plans skill. The wrapper delegates the per-task red-green-refactor loop to the upstream flow AND intercepts the terminal handoff so it goes to `/devflow:finish-feature` (NOT upstream `finishing-a-development-branch`). ```mermaid %%{init: {'flowchart': {'rankSpacing': 50, 'nodeSpacing': 30, 'diagramPadding': 15}}}%% graph TD - T_START(["Task N from plan"]) - T_RED["RED: Write failing test<br/>(exact test from plan)"] - T_RUN1["Run test<br/>Expected: FAIL"] - T_FAIL{"Test<br/>fails?"} + T_START(["Task N from plan<br/>(test already locked from Phase 3)"]) + T_VERIFY_RED["Verify test still fails<br/>(safety check)"] T_GREEN["GREEN: Write minimal<br/>implementation to pass"] - T_RUN2["Run test<br/>Expected: PASS"] + T_RUN["Run test<br/>Expected: PASS"] T_PASS{"Test<br/>passes?"} T_REFACTOR["REFACTOR: Clean up<br/>(no behavior change)"] - T_RUN3["Run full test suite<br/>Expected: ALL PASS"] + T_RUN_ALL["Run full test suite<br/>Expected: ALL PASS"] T_COMMIT["Commit<br/>(frequent, small commits)"] T_SPEC["Spec Review<br/>(does implementation match spec?)"] T_SPEC_OK{"Spec<br/>approved?"} T_QUALITY["Code Quality Review<br/>(dispatches code-reviewer)"] T_QUALITY_OK{"Quality<br/>approved?"} - T_DONE(["Task complete"]) - - T_START --> T_RED - T_RED --> T_RUN1 - T_RUN1 --> T_FAIL - T_FAIL -->|"No — fix test"| T_RED - T_FAIL -->|"Yes"| T_GREEN - T_GREEN --> T_RUN2 - T_RUN2 --> T_PASS + T_DONE(["Task complete<br/>→ next task or /devflow:finish-feature"]) + + T_START --> T_VERIFY_RED + T_VERIFY_RED --> T_GREEN + T_GREEN --> T_RUN + T_RUN --> T_PASS T_PASS -->|"No — fix impl"| T_GREEN T_PASS -->|"Yes"| T_REFACTOR - T_REFACTOR --> T_RUN3 - T_RUN3 --> T_COMMIT + T_REFACTOR --> T_RUN_ALL + T_RUN_ALL --> T_COMMIT T_COMMIT --> T_SPEC T_SPEC --> T_SPEC_OK T_SPEC_OK -->|"No — fix issues"| T_GREEN @@ -209,24 +227,24 @@ graph TD classDef decisionStyle fill:#374151,color:#fff,stroke:#1f2937 classDef terminalStyle fill:#6b7280,color:#fff,stroke:#4b5563 - class T_RED,T_RUN1 redStyle - class T_GREEN,T_RUN2 greenStyle - class T_REFACTOR,T_RUN3,T_COMMIT refactorStyle + class T_VERIFY_RED redStyle + class T_GREEN,T_RUN greenStyle + class T_REFACTOR,T_RUN_ALL,T_COMMIT refactorStyle class T_SPEC,T_QUALITY reviewStyle - class T_FAIL,T_PASS,T_SPEC_OK,T_QUALITY_OK decisionStyle + class T_PASS,T_SPEC_OK,T_QUALITY_OK decisionStyle class T_START,T_DONE terminalStyle ``` --- -## 5. Phase 4 — Finishing & Merge Request +## 6. Phase 5 — Finishing & Merge Request -This phase runs entirely inside the agent session: verification, devflow check, commit, visualization updates, PR description strategy, PR/MR creation, retain learnings, and optional worktree cleanup. Post-PR continuation is enforced by hooks (PostToolUse nudge + Stop hook PR detection + explicit skill instruction). +This phase runs entirely inside the implementation spawned session: verification, devflow check, commit, visualization updates, PR description strategy, PR/MR creation, retain learnings, and optional worktree cleanup. Post-PR continuation is enforced by hooks (PostToolUse nudge + Stop hook PR detection + explicit skill instruction). ```mermaid %%{init: {'flowchart': {'rankSpacing': 50, 'nodeSpacing': 30, 'diagramPadding': 15}}}%% graph TD - F_START(["All tasks complete"]) + F_START(["All tasks complete<br/>(triggered by executing-plans wrapper)"]) subgraph AgentActions [" Agent Actions (inside session) "] F_VERIFY["Run full verification<br/>(tests, lint, build)"] @@ -283,7 +301,7 @@ graph TD classDef terminalStyle fill:#6b7280,color:#fff,stroke:#4b5563 classDef skillsStyle fill:#be185d,color:#fff,stroke:#9d174d - class F_VERIFY,F_PASS,F_FIX verifyStyle + class F_VERIFY,F_FIX verifyStyle class F_CN,F_SELF,F_PUSH,F_GH,F_COMMIT,F_CHECKPOINT reviewStyle class F_RETAIN hindsightStyle class F_SUMMARY langfuseStyle @@ -295,49 +313,31 @@ graph TD --- -## 6. Tool Active at Each Phase - -| Phase | Hindsight (L1) | Agent Deck (L2) | Conductor (L2) | Worktrunk (L3) | Code Review (L4) | Skills (L5) | Langfuse (L6) | -| ------------------ | :-----------------: | :-------------: | :------------: | :----------------: | :---------------: | :------------: | :-------------: | -| **Start (CLI)** | — | wraps session | — | create worktree | — | — | — | -| **Recall (Agent)** | recall | — | monitors | — | — | — | traces | -| **Brainstorming** | recall context | — | escalates | — | — | brainstorming | traces | -| **Writing Plans** | — | — | monitors | — | — | writing-plans | traces | -| **TDD Loop** | retain discoveries | — | auto-responds | isolated workspace | — | TDD, SDD | traces | -| **Spec Review** | — | — | auto-responds | — | — | spec-reviewer | traces | -| **Quality Review** | — | — | auto-responds | — | — | code-reviewer | traces | -| **Pre-Push** | — | — | monitors | — | devflow check | pre-push-check | traces | -| **Create MR** | context for PR body | — | monitors | — | — | create-pr | traces | -| **Viz Check** | — | — | monitors | — | — | finish-feature | — | -| **PR Strategy** | recall preference | — | monitors | — | — | finish-feature | — | -| **Create MR** | context for PR body | — | monitors | — | — | finish-feature | traces | -| **Finish (Agent)** | retain learnings | — | monitors | — | — | finish-feature | session-summary | -| **Cleanup (Agent)**| — | — | — | devflow done | — | finish-feature | — | +## 7. Tool Active at Each Phase ---- +| Phase | Hindsight (L1) | Worktrunk (L2) | Code Review (L3) | Skills (L4) | Langfuse (L5) | +| ----------------- | :-----------------: | :----------------: | :--------------: | :----------------------: | :-------------: | +| **Start (CLI)** | — | create worktree | — | — | — | +| **new-feature** | recall context | — | — | /devflow:new-feature | traces | +| **brainstorming** | recall context | — | — | /devflow:brainstorming | traces | +| **spec-feature** | recall + retain | — | — | /devflow:spec-feature | traces | +| **writing-plans** | retain decisions | — | — | /devflow:writing-plans | traces | +| **lock-tests** | retain decisions | — | — | /devflow:lock-tests | traces | +| **TDD Loop** | retain discoveries | isolated workspace | spec/quality | /devflow:executing-plans | traces | +| **Pre-Push** | — | — | devflow check | /devflow:pre-push-check | traces | +| **Finish + MR** | retain learnings | devflow done | — | /devflow:finish-feature | session-summary | -## 7. Entry Points +--- -There are two ways to start a devflow development session: +## 8. Entry Point -### Recommended: `devflow worktree` +There is one canonical way to start a devflow development session: ```bash -devflow worktree feat/X --agent claude +devflow worktree TICKET-N ``` - Uses **worktrunk** under the hood for worktree creation -- Runs `wt step copy-ignored` to copy `.env`, `node_modules`, and other gitignored files -- Launches an agent-deck session in the new worktree -- Single command from idea to working agent session - -### Alternative: `agent-deck add` - -```bash -agent-deck add . -c claude --worktree feat/X -b -``` - -- Atomic command — creates worktree + session in one step -- Does **not** run `copy-ignored` (no `.env`, `node_modules` in new worktree) -- Useful when you don't need gitignored files (e.g., pure documentation work) -- `-b` flag runs session in background +- Branch-name enforcement: ticket-shaped names pass through, free-form names are prefixed `feat/` +- Worktree lands at `~/dev/.worktrees/<repo>/<branch-slug>` +- Open a Claude Code session in the new worktree → invoke `/devflow:new-feature` as the first command