diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 6f29b6b..26a101a 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -1670,6 +1670,24 @@ "security", "compliance" ] + }, + { + "name": "forkmind", + "source": "./plugins/forkmind", + "description": "Local-first LLM branching & debugging. Treat AI context windows like a Git repo โ€” capture, branch, and regression-test LLM calls as a DAG. Free & local via Ollama, any OpenAI-compatible API, and MCP for agents.", + "version": "0.1.0", + "author": { + "name": "Medhovarsh Bayyapureddi", + "url": "https://github.com/Medhovarsh" + }, + "category": "Development Engineering", + "homepage": "https://github.com/ccplugins/awesome-claude-code-plugins/tree/main/plugins/forkmind", + "keywords": [ + "llm", + "debugging", + "observability", + "mcp" + ] } ] } \ No newline at end of file diff --git a/plugins/forkmind/.claude-plugin/plugin.json b/plugins/forkmind/.claude-plugin/plugin.json new file mode 100644 index 0000000..97349c2 --- /dev/null +++ b/plugins/forkmind/.claude-plugin/plugin.json @@ -0,0 +1,27 @@ +{ + "$schema": "https://anthropic.com/claude-code/plugin.schema.json", + "name": "forkmind", + "version": "0.1.0", + "description": "Local-first LLM branching & debugging. Treat AI context windows like a Git repo โ€” capture, branch, and regression-test LLM calls as a DAG. Teaches Claude when and how to drive ForkMind.", + "author": { + "name": "Medhovarsh Bayyapureddi", + "url": "https://github.com/Medhovarsh" + }, + "homepage": "https://medhovarsh.github.io/forkmind/", + "repository": "https://github.com/Medhovarsh/forkmind", + "license": "MIT", + "keywords": [ + "llm", + "debugging", + "observability", + "ollama", + "mcp", + "local-first" + ], + "mcpServers": { + "forkmind": { + "command": "npx", + "args": ["-y", "github:medhovarsh/forkmind", "mcp"] + } + } +} diff --git a/plugins/forkmind/README.md b/plugins/forkmind/README.md new file mode 100644 index 0000000..c758928 --- /dev/null +++ b/plugins/forkmind/README.md @@ -0,0 +1,13 @@ +# ForkMind ๐Ÿง  + +Local-first LLM branching & debugging. Treat AI context windows like a Git repo โ€” +capture, branch, and regression-test LLM calls as a DAG. Free & local via Ollama, +any OpenAI-compatible API, and an MCP server so agents query their own history. + +- **Skill** `forkmind` โ€” Claude reaches for it on debug/compare/branch/regression. +- **Command** `/forkmind` โ€” start / branch / test / mcp. +- **Agent** `forkmind-debugger` โ€” isolated model/prompt compares, compact verdict. +- **MCP** auto-wired โ€” agents query their own `.forkmind/` history. + +Source & docs: https://github.com/Medhovarsh/forkmind +Homepage: https://medhovarsh.github.io/forkmind/ diff --git a/plugins/forkmind/agents/forkmind-debugger.md b/plugins/forkmind/agents/forkmind-debugger.md new file mode 100644 index 0000000..22ac07c --- /dev/null +++ b/plugins/forkmind/agents/forkmind-debugger.md @@ -0,0 +1,43 @@ +--- +name: forkmind-debugger +description: > + Drives ForkMind to debug, compare, or regression-test LLM / agent calls in an + isolated context. Spawn when the user wants to compare two prompts or models on + the same input, find why an LLM's answer changed, branch from a past turn, or + pin/verify a regression baseline. Returns a compact verdict (winner, diffs, + drift) โ€” not raw transcripts โ€” so main context stays small. +tools: [Read, Bash, Glob, Grep] +--- + +You drive **ForkMind** โ€” a local-first proxy that records LLM calls into +`.forkmind/` as a branchable DAG. Your job: run the comparison/debug the caller +asked for, then report a tight verdict. + +## Operating rules + +- ForkMind is local, free (Ollama default), no cloud. Never send data anywhere. +- Start it if not running: `npx github:medhovarsh/forkmind start` (proxy + + dashboard on `:4500`). Clients hit `http://localhost:4500/v1`. +- Inspect captured runs from `.forkmind/` (plain JSON on disk) โ€” read nodes + directly rather than re-running when the data already exists. +- If `forkmind mcp` history tools are available, use them to trace lineage. + +## Workflow + +1. Confirm what to compare: prompt A vs B, model X vs Y, or before/after a tweak. +2. Ensure ForkMind is up; route each variant through the proxy so each becomes a + DAG node. +3. Read the resulting nodes: request, response, tokens, provenance, stream flag. +4. For regression: compare against the pinned baseline; flag drift. + +## Report format (return this, nothing more) + +``` +VERDICT: +A (): ยท +B (): ยท +DIFF: +NEXT: +``` + +Keep it short. Caller wants the conclusion, not the logs. diff --git a/plugins/forkmind/commands/forkmind.md b/plugins/forkmind/commands/forkmind.md new file mode 100644 index 0000000..31f5749 --- /dev/null +++ b/plugins/forkmind/commands/forkmind.md @@ -0,0 +1,15 @@ +--- +description: Start ForkMind or branch/inspect/regression-test an LLM call +--- + +Invoke the `forkmind` skill. Based on `$ARGUMENTS`: + +- no args or `start` โ†’ tell the user how to launch ForkMind + (`npx github:medhovarsh/forkmind start`, proxy + dashboard on :4500) and how + to point their OpenAI-compatible client at `http://localhost:4500/v1`. +- `branch` / `fork` โ†’ explain forking an alternative prompt or model from a + historical node in the dashboard. +- `test` / `regression` โ†’ explain pinning baselines and catching drift in CI. +- `mcp` โ†’ show the MCP server config so an agent can query its own history. + +Keep it actionable: give the exact command(s) for what the user asked. diff --git a/plugins/forkmind/skills/forkmind/SKILL.md b/plugins/forkmind/skills/forkmind/SKILL.md new file mode 100644 index 0000000..83b0247 --- /dev/null +++ b/plugins/forkmind/skills/forkmind/SKILL.md @@ -0,0 +1,88 @@ +--- +name: forkmind +description: > + Use when debugging, comparing, or regression-testing LLM / agent calls โ€” when + the user wants to capture LLM traffic, see a conversation as a branchable DAG, + fork an alternative prompt or model from a past turn, or pin good outputs as + baselines to catch drift. ForkMind is local-first (no cloud, no account) and + proxies any OpenAI-compatible API, defaulting to free Ollama models. + Trigger: "debug this prompt", "compare two models", "branch from that turn", + "why did the LLM change its answer", "regression test my prompt", "capture LLM + calls", "forkmind". +--- + +# ForkMind + +ForkMind treats AI context windows like a Git repo. It runs a local proxy that +records every LLM call into `.forkmind/` as a node in a DAG, serves a dashboard, +and lets you branch alternative prompts/models from any historical turn. + +## When to reach for it + +| Situation | Why ForkMind | +|---|---| +| "This prompt sometimes regresses โ€” catch it" | Pin baselines, re-run, diff drift | +| "Compare llama3 vs gpt-4o on the same turn" | Fork the node, swap model, re-run | +| "Why did the agent change its answer?" | Inspect the DAG: request/response, tokens, lineage | +| "Capture all LLM traffic during this task" | Point client at the proxy; everything is logged | +| Agent needs to recall its own past attempts | ForkMind MCP server exposes the history | + +Do NOT use for: production traffic logging at scale, hosted/cloud observability, +or non-LLM HTTP debugging. + +## Run it + +ForkMind runs straight from the git link โ€” no npm registry needed: + +```bash +# starts proxy + dashboard on :4500 +npx github:medhovarsh/forkmind start +``` + +Then point any OpenAI-compatible client at the proxy: +- `baseURL: http://localhost:4500/v1` +- dashboard: `http://localhost:4500` + +Free local default: install [Ollama](https://ollama.com), `ollama pull llama3`. +Any provider works (OpenAI, Anthropic, Groq, OpenRouter, Together, vLLM, LM Studio) +by passing that provider's base URL + key through the proxy. + +## Core moves + +- **Capture** โ€” route calls through `:4500/v1`; each call becomes a DAG node. +- **Branch** โ€” in the dashboard, "Fork from here" on any node โ†’ edit prompt / + swap model / change params โ†’ re-run only that subtree. +- **Inspect** โ€” node inspector shows request, response, tokens, provenance, and + whether the response was streamed. +- **Regression test** โ€” pin a known-good output as a baseline; re-run after a + prompt tweak; ForkMind flags drift (wire into CI). + +## MCP for agents + +Installed as the Claude Code plugin, ForkMind's MCP server is wired up +automatically (`forkmind` server, launched via `npx โ€ฆ forkmind mcp`). An agent +can query its own history mid-task โ€” recall previous attempts, trace lineage, +self-correct. No manual config needed. + +Outside the plugin, add it to any MCP client by hand: + +```json +{ + "mcpServers": { + "forkmind": { "command": "npx", "args": ["-y", "github:medhovarsh/forkmind", "mcp"] } + } +} +``` + +## Delegating heavy compares + +For "compare model X vs Y" or "why did the answer change" tasks that would dump +big transcripts into context, spawn the **`forkmind-debugger`** subagent. It runs +the comparison in isolation and returns a compact verdict (winner, diff, drift), +not raw logs. + +## Notes + +- Everything is plain JSON on disk under `.forkmind/`. No database, no telemetry. +- Add `.forkmind/` to `.gitignore` (the repo already does for its own checkout). +- Full docs: https://github.com/Medhovarsh/forkmind#readme