diff --git a/.php-cs-fixer.dist.php b/.php-cs-fixer.dist.php index 86debc5..d594f38 100644 --- a/.php-cs-fixer.dist.php +++ b/.php-cs-fixer.dist.php @@ -18,6 +18,11 @@ 'single_quote' => true, 'trailing_comma_in_multiline' => true, 'declare_strict_types' => true, + // a blank line before these statements => a blank line after a control-structure's closing `}` + 'blank_line_before_statement' => ['statements' => [ + 'break', 'continue', 'declare', 'do', 'for', 'foreach', 'if', + 'return', 'switch', 'throw', 'try', 'while', 'yield', 'yield_from', + ]], 'phpdoc_align' => false, 'phpdoc_separation' => false, 'no_superfluous_phpdoc_tags' => false, diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index d62e3e3..203edb5 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -1,294 +1,294 @@ # php-claw — Architecture -A minimal personal AI agent (OpenClaw / NanoClaw style) built **entirely on PHP -TrueAsync**. You message it from Telegram; it runs a Claude agent loop that can -take real actions on the host (bash, files) and replies in the same chat. -"Claude Code whose terminal is Telegram." - -## Core idea: single thread, all coroutines - -Everything the agent does is **I/O-bound**: HTTP to Claude, HTTP to Telegram, -`bash` subprocesses, SQLite. Under TrueAsync all of these `await` and cost no CPU -while suspended. So there is **no ThreadPool and no threads** — one OS process, -one reactor, and a coroutine per concurrent unit of work. Hundreds of chats run -concurrently in a single thread. - -(ThreadPool is only worth adding later for genuinely CPU-bound work — there is -none in v1. Sub-agents, if added, are just more coroutines.) - -## Model - -``` - one process, one reactor - ┌──────────────────────────────────────────────────────────────────┐ - │ Telegram poll loop (async curl getUpdates, long-poll) │ - │ │ │ - │ ▼ │ - │ Router: authorize chat_id │ - │ ├─ message ──▶ session inbound Channel (per chat_id) │ - │ └─ callback ──▶ Approval registry (resolve pending Future) │ - │ │ - │ per chat_id: a session coroutine │ - │ while (msg = inbox.recv()): runTurn(msg) │ - │ │ - │ runTurn = agent loop (AgentInterface) + tool execution (await'd) │ - │ state persisted in per-session SQLite (PDO pool) │ - └──────────────────────────────────────────────────────────────────┘ -``` - -All boxes are coroutines in the same thread. `Channel` and `Future` here are the -in-thread coroutine primitives (no cross-thread copying). - -## One turn - -A *turn* = full handling of one user message until a final reply. Internally it -is the agentic loop (many Claude round-trips + actions), so it can take -seconds–minutes — but it only ever `await`s, never blocks the thread: - -``` -runTurn(text): - history = store.load(chatId) - loop: - resp = await agent.send(AgentRequest{system, history, tools, model}) # async, retry - history += Message(Assistant, resp.content) - if resp.stopReason == ToolUse: - foreach resp.toolCalls as call: - result = await executor.call(toToolCall(call)) # security + bash subprocess - history += Message(Tool, ToolResult(call.id, result)) - continue - else: - store.save(chatId, history) - return resp.text # delivered to Telegram -``` - -## Agent (LLM backend) interface - -The agent **only decides the next action; it never executes tools** — that is the -`Executor` chain's job (where security lives). So the interface is a single model -round-trip: given history + tool specs, return the model's next move (text or -tool-use requests). The turn loop owns the loop. +A per-issue **autonomous solver** for software projects, built entirely on PHP +**TrueAsync**. You register an external project folder, open issues against it, and +`claw run ` drives an issue to resolution. A second face — `claw serve` — is a +JSON + SSE dashboard API that runs and observes those issues live. + +## Core idea: a workflow that writes a workflow + +`claw run ` does **not** solve the issue directly. The default entry workflow, +`GenerateIssueWorkflow`, is a *workflow that writes a workflow*: it generates a PHP +**solver class** tailored to that one issue, saves it as the project's procedural +memory (via the `define_workflow` tool), a human approves the generated code, and +then that solver runs against the real project folder. A solver already generated +for an issue is reused. Crashes are auto-repaired (a supervisor rewrites the class) +and resumed from a durable snapshot. + +So claw's unit of work is a **run**: one execution of one issue's solver, with a +ledger row, a trace journal, and a resumable state snapshot tying it together. + +## Concurrency: single thread, all coroutines + +Everything claw does is **I/O-bound**: async curl to the model, `bash` over async +streams, SQLite. Under TrueAsync these `await` and cost no CPU while suspended, so +there is **one process, one reactor, a coroutine per unit of work — no ThreadPool**. + +- `claw run` is a single foreground run on the reactor. +- `claw serve` boots `TrueAsync\HttpServer`; **every request handler is a coroutine** + on the one event loop. `POST .../start` spawns the run as a *detached coroutine in + a long-lived `Async\Scope`*, so it outlives the request and a crash is contained. + Because runs, SSE streams, and the human gate all share one loop, the trace bus and + the gate are plain in-process `Async\Channel`s — no thread boundary. + +`Tracer` is synchronous and single-stack; parallel sub-workflows (a per-coroutine +span stack) are a known limitation. + +## Projects & state + +A **project** is an external working tree (a folder, maybe a git repo) that lives +elsewhere on disk; claw never creates or mutates that folder. It owns only the +*application-side* state: **one SQLite file per project** under the app home +(`/projects/.db`), keyed by the folder's absolute path +(`ProjectStore::keyFor` slugifies it). `ProjectStore::discover` walks up from the cwd +to the nearest registered project, the way git finds the repo root. One open `\PDO` +is shared by the state store and the tracer. + +Tables (the source of truth is the code that creates them): + +| table | columns | role | +|---|---|---| +| `project` | `id, name, path, description, created_at` | the registered folder | +| `issues` | `id, title, description, status, created_at` | status = enum **name** (`Open\|InProgress\|WaitingHuman\|Done\|Closed`) | +| `runs` | `id, issue_id, workflow, status, created_at` | the ledger; status = enum **value** (`running\|generated\|done\|failed`) | +| `trace` | `seq, run_id, span_id, parent_id, depth, phase, type, level, data, created_at` | the journal; `seq` is the global resume cursor | +| `workflow_state` | `run_id, state, done, updated_at` | durable run snapshot (JSON) | +| `workflow_handoff` | `run_id, from_step, handoff, updated_at` | the step-to-step baton, persisted | +| `state_seq` | `id` | monotonic leaf-call ids | + +## CLI surface (`Claw\Cli\WorkflowMode`) + +- `claw -c [folder]` — register an external folder, create its `.db`. The only + command that does not resolve an existing project. +- `claw -i ""` — open an issue in the resolved project. +- `claw run <id>` — generate/run the solver for an issue (loads the full `Config`; + needs an API key). Wires the console seams and delegates to `IssueRunner`. +- `claw log [runId]` — print a run's recorded trace tree via `TraceReader` + (read-only, no API key). Defaults to the latest run. +- `claw serve [--host H] [--port N]` — boot the dashboard `Server` + (default `127.0.0.1:8787`). Requires the TrueAsync server extension. + +Cross-cutting: `--project <dir>` / `-C` (also `CLAW_PROJECT`), `-q`/`-v` verbosity. +`--session` reaches the legacy chat mode. + +## The run pipeline (`Claw\Cli\IssueRunner`) + +`IssueRunner` is the shared headless engine behind **both** `claw run` and the +server's `POST start`. `IssueRunner::run(Issue)`: + +1. Build the environment against the **real project folder**: a `Workspace`, a + `Registry` of tools, a `SqliteStateStore` + `TraceStore` on the project PDO, + role models and budgets from `Config`. +2. Resolve the solver class (`Issue<id>Solver`). `ProjectStore::resumableRun` reuses + the runId of an interrupted run (still `running`), else `recordRun`. Issue → + `InProgress`. +3. `ensureSolver`: reuse the solver on disk, or run `GenerateIssueWorkflow` to write + it, then call the **`$approve`** seam. Declined → run `generated`, stop. +4. `runSolver`: instantiate the solver, `->run()`. `WorkflowFinished` (the `done` + tool) = clean finish. Any `\Throwable` → **repair-and-resume**: `SuperviseWorkflow` + (supervisor role) writes a fixed class `…R<n>`, the same runId resumes from its + snapshot (finished steps skipped), bounded by `MAX_REPAIRS = 2`. Success → run + + issue `Done`. + +The pipeline holds no I/O opinion — four **seams** are injected, so console vs server +differ only here: + +| seam | type | console (`claw run`) | server (`POST start`) | +|---|---|---|---| +| `$human` | `\Closure(Tracer): SpeakerInterface` | `ConsoleSpeaker` | `HttpGateSpeaker` | +| `$approve` | `\Closure(path, code): bool` | show + `confirm` | auto-`true` | +| `$report` | `\Closure(msg, isError): void` | STDOUT/STDERR | discard (dashboard reads the journal) | +| `$liveSink` | `?TraceSinkInterface` | `ConsoleTraceSink` | `LiveTraceSink` | + +The human tier is a *factory* because the HTTP gate records through the run's tracer, +which only exists after the environment is built. + +## Workflows (`Claw\Workflow`) + +A workflow is a PHP class — `WorkflowAbstract` is a **helper, not an engine**. State is +the subclass's own typed fields; the base offers one narrow surface: + +- `ai(prompt, ?tools, ?agent)` — drive the model (the work happens here; the model + calls tools / `ask`s, never the PHP). +- `tool(name, params)` — call a tool (errors come back as a string, never thrown). +- `step()`, `ask()`, `artifact()`, `critique()`, `criticRules()`, `param()`, `log()`. + +Mechanics: + +- **`#[Step]`** marks a `protected` method (the validator rejects public/private). The + default `run()` drives them in declaration order; a subclass may override `run()` + with plain `if/while` for branching/looping phases. +- **Critics** — a step may declare `#[Step(critic: '<name>', maxRounds: N)]`. After the + method runs, its result is judged on the **reviewer** role against + `criticRules()['<name>']` (a name with no rule fails the run). The critic is a full + AI with every tool — it opens the artifacts, runs `php -l`/tests, and re-runs the + step until it passes (default cap `DEFAULT_MAX_ROUNDS = 50`, then escalates). +- **The ask channel** is a ladder, `EscalatingSpeaker(first, …rest)`: ask each tier, + first non-null wins, null passes up. In a run it is + `EscalatingSpeaker(supervisorAgent, human)` — a tool-less **supervisor agent** + settles `accept` / `stop` / guidance on its own judgement, and only `ESCALATE` + (→ null) reaches the human tier. +- **Durable resume is snapshot-based, not replay.** After each step the base saves + `{state, done}` to `workflow_state`; construction restores the fields and `done` + skips finished steps. The durability boundary is the **step edge** — a crash + mid-step re-runs that whole step. +- **Handoffs** — a selective context baton: after a step the model is asked, in that + step's own history, to summarise what the next step must watch for; it is persisted + to `workflow_handoff` so a resumed process restores it without re-asking. +- **`Environment` / `EnvKey`** — a scoped key→value with a parent link, so + project → issue → workflow → sub-workflow inherit (worker, registry, model, store, + tracer, ask channel, budgets…). + +`WorkflowStore` writes/loads generated classes; `WorkflowValidator` is the safety gate +before saving; `DefineWorkflowTool` is the `define_workflow` door. + +## Agents (`Claw\Agent`) + +- **`AgentInterface`** — one method, `send(AgentRequest): AgentResponse`: a single model + round-trip (text or tool-use). It never executes tools. +- **`AbstractAgent`** — `send()` wraps the provider-specific `attempt()` with cause-aware + retry (`BackoffAgentRetryPolicy`, typed exceptions classifying transient vs permanent), + suspending via `Async\delay`. `CurlHttpClient` is a single request (no retry). +- Concrete: **`ClaudeAgent`** (Anthropic Messages) and **`OpenAiCompatibleAgent`** (Chat + Completions — DeepSeek / Groq / Mistral / Qwen / Ollama / OpenRouter / OpenAI). A + `gemini` config value is accepted but **not yet wired** in `Cli::makeAgent`. +- **Role tiers** differ only by model. `SpeakerRole`: `Worker, Reviewer, Supervisor, + Planner, Human`, plus `*-smart` tiers. `CLAW_AGENT_<ROLE>=<model>` → `Config::$agents` + → `EnvKey::Agents`; `ai(…, agent: 'reviewer')` routes by name, an unknown role falls + back to the scope model. `DefaultTurnLoop` is the ReAct loop; `Budget` caps tokens + + time along the parent chain. + +## Tools (`Claw\Tool`) ```php -interface AgentInterface { - // One model round-trip. May await (async HTTP). No tool execution here. - public function send(AgentRequest $request): AgentResponse; +interface ToolInterface { + public function name(): string; + public function description(): string; + public function inputSchema(): array; // JSON Schema + public function risk(): Risk; // Safe | Mutating | Dangerous + public function handle(array $input): string; // tool_result text; may await } ``` -Provider-neutral value types (so any backend fits, not just Anthropic): +The run-path tool set wired in `IssueRunner`: **`bash`, `read_file`, `write_file`, +`list_files`, `define_workflow`, `done`** (`FinishTool`, throws `WorkflowFinished`), +and **`recall`** (the run's own journal + task brief, added once the tracer exists). +A workflow may also expose its own `#[Tool]`-annotated methods. `done` ends the whole +run — it means the deliverable exists and is verified, not "this step finished". -```php -AgentRequest { string system; Message[] messages; ToolSpec[] tools; - string model; int maxTokens; float temperature; } -AgentResponse { ?string text; ToolUseBlock[] toolCalls; StopReason stopReason; Usage usage; } - -Message { Role role; ContentBlock[] content; } // Role: User | Assistant | Tool -TextBlock { string text; } -ToolUseBlock { string id; string name; array input; } -ToolResultBlock { string toolUseId; string content; bool isError; } -ToolSpec { string name; string description; array inputSchema; } // from ToolInterface -enum StopReason { EndTurn, ToolUse, MaxTokens } -``` +## Tool execution & security (`Claw\Exec`, `Claw\Permission`) -Implementations (the persona is just `$system` — no class per persona; only per -provider): +`ChainExecutor` runs a middleware onion (`AuditMiddleware`, `PermissionMiddleware`, +`TimeoutMiddleware`) around a terminal that resolves and `await`s the tool. -- `ClaudeAgent` — native Anthropic. -- `OpenAiCompatibleAgent` — one class for DeepSeek / Groq / Mistral / Qwen / - Ollama / OpenRouter (differ only by base URL + model + key). -- `GeminiAgent` — native Google (or via its OpenAI-compatible endpoint). +**Honest note:** the *autonomous run path* (`Environment::executor()`) builds the chain +with an **empty middleware list** — no permission gate, no timeout, no audit. An +autonomous run is effectively allow-all; its safety story today is observability (the +`Tracer`) plus the human gate, not a permission layer. The middlewares + `Policy` +(deterministic denylist, then Safe→allow / Mutating→confirm / Dangerous→deny) are used +**only** by the legacy chat path. A real autonomous-bash policy is a known gap. -Cheap + capable defaults to start with: DeepSeek (`deepseek-chat`), Gemini Flash, -Groq-hosted Llama/Qwen. Exact model ids and pricing confirmed at implementation. +## Tracing (`Claw\Trace`) -## Chat interface (Telegram now, WhatsApp later) +`Tracer` is the single typed recorder per run: hierarchical `enterWorkflow / enterStep +/ enterAi / enterTurn` + `exit`, and point events `prompt / reply / toolCall / +toolResult / log / artifact / handoff / question / answer`. It holds a parent stack + +depth and fans each `TraceRecord` out to every `TraceSinkInterface` (a failing sink +never breaks the run). `reply` carries token usage, so the journal doubles as a cost +ledger. The span hierarchy is **workflow → step → ai → turn → reply / tool**. -The chat is an abstraction over the messenger — its only job is "what did the -human say?" and "send this to the human". A socket-style shape: the gateway -`accept()`s the next new chat and hands back a `Conversation` bound to it. +Sinks: `TraceStore` (durable `trace` table), `ConsoleTraceSink` (live stderr tree for +`claw run`), `LiveTraceSink` (publishes to the `TraceBus` for the server), +`ArrayTraceSink` (tests). `TraceReader` + `claw log` render the tree back. -```php -interface ChatInterface { // gateway - public function accept(): ConversationInterface; // await the next new conversation -} +## Dashboard server (`src/Server.php`) -interface ConversationInterface { // one chat, bound (no chatId) - public function receive(): ?string; // next message, null when closed - public function send(string $text): void; -} -``` - -Demultiplexing many chats over one connection (Telegram) is the gateway's -internal concern; the Session sees only its own `Conversation`. The main loop is -`while (true) { $c = $chat->accept(); spawn(fn () => new Session($c, ...)->run()); }`. -`ConsoleChat` yields a single stdin/stdout conversation. - -## Tools +Boots `TrueAsync\HttpServer` and routes every request through one `handle()` coroutine +(permissive CORS, `OPTIONS`→204). It holds a long-lived `Async\Scope`, a `TraceBus`, +an `$active` double-start guard, and `$gates` (issue id → answer `Channel`). -A tool is a typed function the model can call: - -```php -interface ToolInterface { - public function name(): string; - public function description(): string; - public function inputSchema(): array; // JSON Schema - public function risk(): Risk; // Safe | Mutating | Dangerous - public function handle(array $input): string; // tool_result text; may await -} +``` +GET /api/health +GET /api/projects +GET /api/projects/{key}/issues +GET /api/projects/{key}/issues/stream SSE — board (an `issue` event per change) +GET /api/projects/{key}/runs/{id}/stream SSE — live trace, keyed by seq +GET /api/projects/{key}/runs/{id}/trace?since=<seq> poll fallback for the run stream +GET /api/projects/{key}/runs/{id}/artifacts +POST /api/projects/{key}/issues/{id}/start launch the solver (202) +POST /api/projects/{key}/issues/{id}/answer reply to the run's open gate ``` -v1 set: -- `bash` — Mutating. Async subprocess in workspace dir, scrubbed env, timeout. -- `read_file` — Safe. Confined to workspace. -- `write_file` — Mutating. Confined to workspace. - -Tool errors are returned to the model as `tool_result(is_error)`; tools are never -auto-retried. Skills (markdown playbooks) are phase 2 — not tools. +**Run stream — push, not poll.** A server-started run's tracer fans out to a +`LiveTraceSink` that, after `TraceStore` persists a record, reads its `seq` and +publishes the formatted row to the `TraceBus`. The SSE handler subscribes to the bus +*before* replaying the journal gap (`since` from `Last-Event-ID`/`?since=`), then +blocks on `channel->recv(Async\timeout(10s))` — pure push, with a ~10s heartbeat on +timeout, seq de-dupe of the replay/live overlap, and a gap-heal from the DB on any seq +discontinuity. A pushed row is byte-identical to a replayed one. -## Tool execution: one `.call()`, a middleware chain inside +**Board stream — a deliberate poll.** The Kanban is low-frequency, so `issuesStream` +re-derives the issue snapshot every ~2s and emits an `issue` event per issue whose JSON +changed. Only the hot per-record path (the run stream) needed push. -The turn loop never touches a tool directly. It calls one transparent entry -point — `ExecutorInterface::call(toolCall): string` — and everything (security, -audit, timeout, transparency) is a middleware in an onion chain. Adding behavior = -adding a middleware, not editing the loop. +**Start / gate.** `start` rejects a concurrent run for the same issue (409), then +`scope->spawn`s the `IssueRunner` detached, with `HttpGateSpeaker` as the human tier. +When the supervisor escalates, the gate writes a `question` trace row, flips the issue +to `WaitingHuman`, and **parks the run coroutine** on the answer channel; `answer` +(valid only while `WaitingHuman`) sends the reply, the gate writes an `answer` row and +the run resumes. The question/answer rows are the durable record, the channel is the +live wakeup — so a restart keeps the gate visible and the run resumes from its snapshot. -```php -interface ExecutorInterface { - public function call(ToolCall $call): string; // runs the middleware chain -} +Run it with the server extension: -interface MiddlewareInterface { - // Wrap the next stage: inspect, short-circuit, modify, time, log. - public function handle(ToolCall $call, callable $next): string; // tool_result text -} +``` +php -d extension=/path/to/true_async_server.so bin/claw serve [--port 8787] [--host 127.0.0.1] ``` -Chain (outer → inner); the terminal stage resolves the tool and `await`s it: - -1. **Audit** — log intent before, result/verdict after (even denials). -2. **Permission** — the security layer; may short-circuit (see below). -3. **Transparency** — echo intent to chat ("running: `…`" + `[stop]`). -4. **Timeout** — wrap `next` in `\Async\timeout()`. -5. **terminal** — `await registry.get(call.name).handle(call.input)`. - -The whole pipeline is `await`-able end to end, so a middleware can suspend (e.g. -Permission awaiting a button) without blocking the thread. - -### Permission middleware (the security layer) - -Because every call funnels through `.call()`, the permission middleware -transparently sees the agent's full intent for *every* action and can stop it. -Ordered checks, first decisive wins: +## Config (`Claw\Config`, `.env`) -1. **Denylist** — hard rules (`rm -rf`, fork bombs, workspace escape, reading the - secrets file) → blocked, not unlockable. -2. **Rules** — persisted allow/deny rules from the session SQLite. -3. **Risk default** — `Safe` → allow, `Mutating` → confirm, `Dangerous` → deny. +- `CLAW_AGENT` (`claude` | `openai-compatible` | `gemini`), `CLAW_MODEL`, `CLAW_BASE_URL`. +- Keys: `ANTHROPIC_API_KEY` / `OPENAI_API_KEY` / `GEMINI_API_KEY`, or `CLAW_API_KEY`. +- Role models: `CLAW_AGENT_<ROLE>=<model>` (e.g. `CLAW_AGENT_WORKER_SMART`). +- Budgets: `CLAW_BUDGET_TOKENS/SECONDS`, `CLAW_TURN_TOKENS/SECONDS`, `CLAW_BUDGET_POLICY` + (`stop`|`ask`), `CLAW_MAX_HISTORY`. +- Paths: `CLAW_WORKSPACE` (app home), `CLAW_PROJECT` (project override). -```php -public function handle(ToolCall $call, callable $next): string { - $verdict = $this->policy->check($call); // Allow | Deny | Confirm - if ($verdict->isDeny()) return "blocked: {$verdict->reason}"; // is_error - if ($verdict->isConfirm()) { - $choice = $this->chat->ask($call->chatId, $call->summary(), [Allow, Deny, Always]); - if ($choice === Deny) return "user denied"; - if ($choice === Always) $this->policy->persistRule($call); - } - return $next($call); // proceed to inner stages -} -``` +Secrets stay in memory on `Config` and are never exported to the environment, so `bash` +subprocesses do not inherit them. -**Interrupt while running.** Tools execute under the turn's cancellation token. -`/stop` from the user (or the Timeout middleware) cancels the turn coroutine; -TrueAsync cancellation propagates into the awaited `bash` subprocess and kills it. -So the layer blocks *before* and aborts *during*. - -## Memory - -One SQLite file per `chat_id` (one writer → no contention), via the PDO pool: -- `messages` — conversation history -- `rules` — persisted permission rules -- `audit` — every tool call - -Plus a host-level `CLAUDE.md` persona / system prompt. - -## Retry & guardrails - -- Retry is **cause-aware and lives at the agent level**, not in the HTTP client. - Each agent normalizes the failure into a typed exception (`AgentErrors`): - transport / overloaded / server / rate-limit are `TransientErrorInterface`; - auth / bad-request are permanent. Retry lives in the agent itself: `AbstractAgent::send()` - wraps the provider's one-shot `attempt()` and retries by the policy — transient - → backoff + jitter; `RateLimitException` → honor its - `retryAfterMs` if near, else give up so the bot reports the resume time; - permanent → never. `CurlHttpClient` is a single request (no retry). Sleeping - via `\Async\delay()` blocks nothing. A different consumer (Telegram) gets its - own retry at its own boundary — not stacked under the agent's. -- Tool errors: returned to the model, no transport retry. -- Per turn: max tool iterations + `\Async\timeout()`. -- `Session` reacts to the cause: rate-limit → "try again in N"; auth → config - error; otherwise a generic message — the conversation survives either way. - -## Security (honest) - -- **Authorization (must-have #1)**: Router drops any `chat_id` not in the - allowlist — otherwise a stranger gets a shell. -- A single-process agent runs `bash` with full process privileges. Real isolation - is an OS concern (low-priv user / container). Documented, not enforced in code. -- `TELEGRAM_BOT_TOKEN` and the agent API key live in the host; the `bash` tool - gets a scrubbed env. -- Workspace confinement + audit log of every tool call. - -## Config - -- `CLAW_AGENT` — which `AgentInterface` impl (`claude` | `openai-compatible` | `gemini`) -- agent key for the chosen provider (`ANTHROPIC_API_KEY` / `OPENAI_API_KEY` / - `GEMINI_API_KEY` / …) + optional `CLAW_BASE_URL` for OpenAI-compatible providers -- `CLAW_MODEL` — model id -- `TELEGRAM_BOT_TOKEN`, `CLAW_ALLOWED_CHATS`, `CLAW_WORKSPACE` - -## File layout +## File layout (`src/`) ``` -php-claw/ - bin/claw entrypoint: autoload, then Cli->run(argv) - src/ - Cli/ Cli.php arg dispatch: pick a mode + agent factory - WorkflowMode.php default mode: create/issue/run/log (per-issue solver runs) - SessionMode.php --session: bootstrap reactor; accept() -> spawn Session - Config.php - Session.php conversation state + agentic loop (run/handle/execute) - Chat/ ChatInterface.php (accept) ConversationInterface.php - ConsoleChat.php ConsoleConversation.php TelegramChat.php (todo) - Agent/ AgentInterface.php ClaudeAgent.php OpenAiCompatibleAgent.php GeminiAgent.php - AgentRequest.php AgentResponse.php Message.php ContentBlock.php (+Text/ - ToolUse/ToolResult) Role.php StopReason.php Usage.php ToolSpec.php - AbstractAgent.php (send() = retry loop; attempt() = one request) - AgentErrors.php (classify) AgentRetryPolicyInterface.php BackoffAgentRetryPolicy.php - Http/ HttpClientInterface.php CurlHttpClient.php (one-shot) HttpResponse.php - Exec/ ExecutorInterface.php ChainExecutor.php MiddlewareInterface.php - AuditMiddleware.php PermissionMiddleware.php - TransparencyMiddleware.php TimeoutMiddleware.php - Tool/ ToolInterface.php Risk.php ToolCall.php Registry.php Workspace.php - ReadFileTool.php WriteFileTool.php ListFilesTool.php BashTool.php (proc_open) - DateTool.php (current time) PhpEvalTool.php (eval one expression; Dangerous) - ScheduleTool.php (one-shot reminder; spawns a delay coroutine) - Permission/ Policy.php - Store/ SessionStore.php Schema.php - Exceptions/ ClawException.php (base) ConfigException.php ChatException.php - HttpException.php ToolException.php - AgentException.php (base) TransientErrorInterface.php - RateLimitException.php OverloadedException.php ServerErrorException.php - TransportException.php AuthException.php BadRequestException.php - skills/ phase 2: *.md playbooks - workspace/ sandboxed working dir for tools - CLAUDE.md agent persona - README.md ARCHITECTURE.md +Config.php Server.php HttpGateSpeaker.php Session.php(legacy) +Cli/ Cli.php WorkflowMode.php IssueRunner.php RunContext.php SessionMode.php +Workflow/ WorkflowAbstract.php WorkflowInterface.php Step.php Tool.php MethodTool.php + Environment.php EnvKey.php GenerateIssueWorkflow.php SuperviseWorkflow.php + WorkflowStore.php WorkflowValidator.php SqliteStateStore.php + InMemoryStateStore.php WorkflowStateStoreInterface.php Artifact.php BudgetPolicy.php +Agent/ AgentInterface.php AbstractAgent.php ClaudeAgent.php OpenAiCompatibleAgent.php + DefaultTurnLoop.php AgentSpeaker.php ConsoleSpeaker.php EscalatingSpeaker.php + SpeakerInterface.php SpeakerRole.php Budget.php AgentRequest/Response … +Tool/ ToolInterface.php Registry.php Risk.php Workspace.php BashTool.php + ReadFileTool.php WriteFileTool.php ListFilesTool.php RecallTool.php + DefineWorkflowTool.php FinishTool.php (legacy: DateTool, PhpEvalTool, ScheduleTool) +Exec/ ExecutorInterface.php ChainExecutor.php MiddlewareInterface.php + AuditMiddleware.php PermissionMiddleware.php TimeoutMiddleware.php +Permission/ Policy.php Decision.php Verdict.php +Trace/ Tracer.php TraceStore.php ConsoleTraceSink.php LiveTraceSink.php TraceBus.php + ArrayTraceSink.php TraceReader.php TraceFormat.php TraceEvent/Record Level.php +Project/ Project.php ProjectStore.php Issue.php IssueStatus.php RunStatus.php +Http/ HttpClientInterface.php CurlHttpClient.php HttpResponse.php +Exceptions/ ClawException.php WorkflowFinished.php + typed agent/http/tool/config errors +Chat/ Store/ (legacy: Telegram + console chat, SessionStore) +Knowledge/ (forward-looking skeleton: declarative memory / KB, not yet wired) ``` -## Phasing +## Legacy -- **v1**: Telegram, session coroutines, AgentTurn + 3 tools, per-action approvals, - per-session SQLite, chat_id allowlist, retry + guardrails. -- **Phase 2**: skills, scheduler (proactive messages), plan-mode, WhatsApp, - sub-agents, ThreadPool for any CPU-bound work. +The original claw was an interactive **Telegram/console chat bot**; that code still +exists but is reached only via `claw --session` (`Cli\SessionMode`): `src/Chat/*`, +`src/Session.php`, `src/Store/SessionStore.php`, the `CLAW_CHANNEL=telegram` config and +its allowlist, the chat-only tools (`current_date`, `php_eval`, `schedule`), and the +`Exec` middleware chain + `Permission\Policy` (wired only on this path). The autonomous +workflow system above is the focus; the legacy chat path is left as-is. diff --git a/composer.json b/composer.json index d41476f..ff2e228 100644 --- a/composer.json +++ b/composer.json @@ -24,7 +24,7 @@ "psr/log": "^3.0", "phpstan/phpstan": "^2.2", "friendsofphp/php-cs-fixer": "^3.95", - "true-async/ide-helper": "^0.7.2" + "true-async/ide-helper": "^0.7.3" }, "autoload": { "psr-4": { diff --git a/composer.lock b/composer.lock index 43f6b41..4e96b17 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "f3461a6f1cf42c64eaae189640509def", + "content-hash": "b154077932d56629fc586408861ea928", "packages": [ { "name": "composer/ca-bundle", @@ -3899,16 +3899,16 @@ }, { "name": "true-async/ide-helper", - "version": "v0.7.2", + "version": "v0.7.3", "source": { "type": "git", "url": "https://github.com/true-async/ide-helper.git", - "reference": "3b2e280cf82819fffd1ea742c54a22393721d512" + "reference": "87e785fb936fbcd0bf2e001f1c37299ad985aae4" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/true-async/ide-helper/zipball/3b2e280cf82819fffd1ea742c54a22393721d512", - "reference": "3b2e280cf82819fffd1ea742c54a22393721d512", + "url": "https://api.github.com/repos/true-async/ide-helper/zipball/87e785fb936fbcd0bf2e001f1c37299ad985aae4", + "reference": "87e785fb936fbcd0bf2e001f1c37299ad985aae4", "shasum": "" }, "type": "library", @@ -3928,9 +3928,9 @@ ], "support": { "issues": "https://github.com/true-async/ide-helper/issues", - "source": "https://github.com/true-async/ide-helper/tree/v0.7.2" + "source": "https://github.com/true-async/ide-helper/tree/v0.7.3" }, - "time": "2026-06-20T17:30:13+00:00" + "time": "2026-06-27T04:31:30+00:00" }, { "name": "yiisoft/injector", diff --git a/docs/dashboard-server-plan.md b/docs/dashboard-server-plan.md new file mode 100644 index 0000000..e275812 --- /dev/null +++ b/docs/dashboard-server-plan.md @@ -0,0 +1,297 @@ +# Dashboard server: endpoints, DB, and the run engine + +Plan for finishing the `claw serve` HTTP API that backs **php-claw-ui**. Covers three +things, in order: (1) the endpoint surface, (2) what the API needs from the DB, and +(3) how a `start` runs an issue as a coroutine on the server's own event loop, with live +updates pushed over an in-process trace bus. + +Status today: `src/Server.php` is a **read-only, polling** JSON API. SSE and the two +write paths (`start`, `answer`) do not exist yet. The TrueAsync server now ships SSE +(`HttpResponse::sseStart/sseEvent/sseComment/sseRetry`), so the live stream is unblocked. + +**Architecture in one line:** the `Tracer` already fans out to several `TraceSinkInterface`s +(durable `TraceStore`, `ConsoleTraceSink`); we add one more — a `LiveTraceSink` that publishes +each record to an **in-process trace bus**. SSE handlers *subscribe* to the bus (push, no +poll). The DB stays the durable journal for replay/catch-up. Runs execute as coroutines via +`Async\spawn` on the server loop (not a thread pool), so publisher and subscribers share one +loop and the bus is a plain `Async\Channel`. + +--- + +## 0. The contract gap (decide this first) + +The UI (`php-claw-ui/src/data/client.ts`) is written **single-project**: + +``` +GET /api/project +GET /api/issues +GET /api/runs/{id}/stream (SSE — trace spans + status, keyed by trace.seq) + ↳ fallback: GET /api/runs/{id}/trace?since=seq +POST /api/issues/{id}/start +POST /api/issues/{id}/answer +``` + +The server (`src/Server.php`) is **multi-project**, keyed by the project's db filename: + +``` +GET /api/projects +GET /api/projects/{key}/issues +GET /api/projects/{key}/runs/{runId}/trace?since=<seq> +GET /api/projects/{key}/runs/{runId}/artifacts +``` + +**DECIDED: multi-project (B).** The server keeps serving every project under `projectsDir` +(`glob(*.db)`); routes keep the `{key}` segment. The UI's `HttpSseClient` gains a project +selector and threads `{key}` into every call — a prototype-client change, not a server +compromise. One server, many quest boards. + +Everything downstream keeps `{key}`: `/api/projects/{key}/issues`, +`/api/projects/{key}/issues/{id}/start|answer`, +`/api/projects/{key}/runs/{id}/stream|trace|artifacts`. `run_id` is unique within a project +db, so `(key, runId)` is the global handle; the trace bus topics and the gate channels key on it. + +--- + +## 1. Endpoint surface (target) + +| Method | Path | Kind | Purpose | +|---|---|---|---| +| GET | `/api/health` | read | liveness `{ok:true}` | +| GET | `/api/projects` | read | every project `[{key,name,path}]` (exists) | +| GET | `/api/projects/{key}/issues` | read | full `Issue[]` snapshot (board state) (exists) | +| GET | `/api/projects/{key}/issues/stream` | **SSE** | board-level live updates: an `issue` event per changed issue | +| GET | `/api/projects/{key}/runs/{id}/stream` | **SSE** | run-level live updates: `trace` events keyed by `seq` | +| GET | `/api/projects/{key}/runs/{id}/trace?since=<seq>` | read | polling fallback for the run stream (exists) | +| GET | `/api/projects/{key}/runs/{id}/artifacts` | read | run artifacts (exists) | +| POST | `/api/projects/{key}/issues/{id}/start` | **write** | enqueue the issue for execution | +| POST | `/api/projects/{key}/issues/{id}/answer` | **write** | resolve a pending human gate / reply to the ask-channel | + +Two split streams, on purpose: + +- **Board stream** (`/api/issues/stream`) — low frequency. Carries whole-`Issue` + snapshots (status, `done`, tokens, gate) so the Kanban stays live without polling. + The UI's `subscribe(onChange: (issues) => void)` maps here. *Note:* the UI today expects + full `Issue[]`; the stream emits per-issue events, so the real `HttpSseClient.subscribe` + reassembles the array from `getIssues()` + patches. Small UI-client change, not a redesign. +- **Run stream** (`/api/runs/{id}/stream`) — high frequency. The trace waterfall for the + one focused/expanded run. Opened on demand, not for every card. + +CORS stays permissive (vite dev on `:5173`). `OPTIONS` → 204 (already handled). + +### SSE mechanics: replay from DB, then subscribe to the bus + +Each handler is a coroutine on the server loop. It **catches up from the DB up to the live +edge, then attaches to the in-process bus** — so there is no polling and no gap across the +join. The `seq` autoincrement is the cursor that stitches the two halves together. + +```php +$res->sseStart(); // commits text/event-stream headers, unblocks onopen +$since = (int) ($req->getHeader('Last-Event-ID') ?? $req->getQueryParam('since', 0)); + +$sub = $bus->subscribe($runId); // attach FIRST, so nothing published now is lost +foreach ($this->traceSince($pdo, $runId, $since) as $rec) { // 1) replay the gap from the journal + $res->sseEvent(data: json_encode($rec), event: 'trace', id: (string) $rec['seq']); + $since = $rec['seq']; +} +while (!$res->isClosed()) { // 2) live: block on the bus, wake exactly on an event + $rec = $sub->recv(); // no delay(), no SELECT — pure push + if ($rec->seq <= $since) continue; // de-dupe the replay/live overlap by seq + if (!$res->sendable()) { /* drop or coalesce for this slow client */ } + $res->sseEvent(data: json_encode($rec), event: 'trace', id: (string) $rec->seq); + $since = $rec->seq; +} +``` + +**Push, not poll.** Live delivery is an `Async\Channel`, fed by the `LiveTraceSink` the +`Tracer` writes to (§3). Wakeup is immediate (coroutine resume), not a ~250 ms tick, and there +are **zero SELECTs per stream while live** — the only DB read is the one-shot replay on connect. +`subscribe()` before `traceSince()` closes the race: anything published during replay is queued +on the channel and de-duped by `seq`. `Last-Event-ID` makes reconnect resume exactly where it +left off; the `?since=` endpoint is the same `traceSince()` query for clients that can't hold SSE. + +A heartbeat `sseComment()` still goes out on an idle timer (a separate `Async\delay` ticker, or +piggybacked) to defeat proxy idle timeouts. `sendable()` guards a slow client so it can't wedge +the loop or balloon the buffer. + +The board stream is the same shape over an `issue`-topic bus: `start`, status flips, gate +open/close, and token deltas publish an `issue` snapshot event; the handler replays current +`getIssues()` once, then pushes per-issue changes. + +--- + +## 2. What the DB needs + +Per-project SQLite (`<key>.db`). Existing tables (all already written by the CLI run path): + +- `project(id, name, path, description, created_at)` — feeds `/api/project`. +- `issues(id, title, description, status, created_at)` — status ∈ `Open|InProgress|WaitingHuman|Done|Closed` (enum name), mapped to UI `open|inprogress|waiting|done|closed`. +- `runs(id, issue_id, workflow, status, created_at)` — status ∈ `running|generated|done|failed`. +- `trace(seq, run_id, span_id, parent_id, depth, phase, type, level, data, created_at)` — **the journal.** Everything live flows through here. `type='reply'` carries `usage.in/out` (tokens), `type='artifact'` carries `{label,kind,value}`. +- `workflow_state(run_id, done JSON, …)` — durable snapshot; `count(done)` = completed steps for the progress bar. + +**The good news: almost nothing new is needed.** tokens, artifacts, progress, and the +whole waterfall are already derivable from `trace` + `workflow_state`. The new endpoints add +exactly one piece of state the read-only API never had to model — **the human gate**: + +### New: pending question / gate + +When a run blocks on the ask-channel (the human tier), the dashboard must (a) show the +`gate` text on the `waiting` card, and (b) let `POST /answer` resolve it — durably, so a +server restart or a reconnecting browser doesn't lose it. + +Option (i) — **trace-only, no new table.** Record the question as a trace row +(`type='question'`, `data={prompt, answered:false}`) and the answer as `type='answer'`. +The gate text = the latest unanswered `question` for the run; `Issue.chat` = the +`question`/`answer`/`chat` rows in seq order. Resolution state lives in whether a matching +`answer` row exists. Zero schema change; chat falls out for free. + +Option (ii) — **a `pending_question(run_id, span_id, prompt, answer, status, created_at)` +table.** Explicit, easy to query "is this run waiting", but duplicates what trace already +records and adds a table to keep in sync. + +**Recommendation: (i).** It reuses the journal, gives `Issue.chat` (currently hardcoded +`[]` in `Server::issues()`) for free, and keeps the "trace is the source of truth" +invariant. The `question`/`answer` rows are the *durable* record (they survive restart and +feed chat); the *live* wakeup of the blocked run is a separate in-process channel, handled in +§3 — not the schema. So the gate is durable in the DB and instant over the channel, both. + +Also add, for correctness of `start`: + +- `runs.status` already distinguishes `running` — used to **reject a double-start** (a + running run for the issue → 409, don't enqueue a second). + +--- + +## 3. The run engine: `start` → `spawn` → bus / gate → `answer` + +This is the heart of it. `claw serve` boots the event-loop server and holds a long-lived +**runs scope** plus the **trace bus** and a **gate-channel registry**. `POST /start` spawns +the solver pipeline as a coroutine in that scope; the `Tracer`'s `LiveTraceSink` publishes +every record to the bus (SSE subscribers wake instantly); a human gate parks the run on an +`Async\Channel` that `POST /answer` feeds. + +Everything — runs, SSE handlers, the gate — lives on **one event loop in one thread**, so the +bus and gate channels are plain in-process `Async\Channel`s. No thread boundary, no +`ThreadChannel`, no per-stream polling. + +### 3.1 Boot + +```php +$scope = new Async\Scope(); // owns every in-flight run; outlives each request +$bus = new TraceBus(); // runId → list of subscriber Channels (trace + issue topics) +$gates = new GateChannels(); // runId → Async\Channel for the pending human answer +$limit = new Semaphore($config->maxConcurrentRuns); // throttle (replaces ThreadPool worker count) +``` + +**Why `spawn` on the loop, not `ThreadPool`:** a run is almost all *waiting* — async curl to +the LLM, file I/O, bash over async streams — which cooperatively yields the loop, so coroutines +give the concurrency without a thread per run. Same loop = the trace bus and gate are plain +`Channel`s (the whole reason this is cleaner than the cross-thread alternative). The throttle +the pool's queue used to give becomes a `Semaphore`: `start` acquires a slot, the run releases +it on finish; excess `start`s await a slot. If a genuinely CPU-bound op ever blocks the loop +(a huge diff/JSON), offload **that op** to a `ThreadPool`, not the whole run. + +### 3.2 `POST /api/projects/{key}/issues/{id}/start` + +``` +1. load issue; if a run for it is already 'running' → 409 (no double-start). +2. set issue status InProgress; publish an `issue` event on the board bus. +3. Async\spawn(in $scope): $limit->acquire(); try { IssueRunner->run(); } finally { $limit->release(); } +4. respond 202 Accepted { runId }. // do NOT await the run +``` + +The dashboard never waits on the run — it watches the board stream (status → `inprogress`) +and the run stream (trace records pushed live). The spawn is detached into the server scope so +it survives the request handler returning; the scope contains any crash so one run can't take +the server down. + +### 3.3 `IssueRunner` — refactor `WorkflowMode::runIssue` + +The current `runIssue()` is console-coupled in three spots; the run logic itself is reusable. +Extract an **`IssueRunner`** (the run pipeline minus CLI plumbing) used by both `claw run` +and the server. The three couplings to break: + +| Console concern | Headless replacement | +|---|---| +| `ConsoleTraceSink(STDERR)` | **`LiveTraceSink($bus)`** — publishes each record to the bus *and* `TraceStore` still persists it (the Tracer fans out to both) | +| `ConsoleSpeaker(STDIN, STDOUT)` human tier | **`HttpGateSpeaker($gates, $store)`** (below) | +| `confirm("Run this workflow now?")` solver approval | a gate too: a `question`/`answer` round, surfaced as the UI's `workflow`-kind artifact + approve button | + +The key seam already exists: `Tracer` takes a list of `TraceSinkInterface`. `claw run` passes +`[TraceStore, ConsoleTraceSink]`; the server passes `[TraceStore, LiveTraceSink]`. Live delivery +is *just another sink* — no special path, and persistence (for replay/`?since=`) is unchanged. + +### 3.4 The human gate: durable in the DB, instant over a channel + +A gate has two needs and two mechanisms, cleanly split: + +- **Durable record** → `question`/`answer` trace rows keyed by `span_id` (§2). Survives restart, + feeds `Issue.chat` and the `gate` text, replays to a reconnecting browser. (Writing the + `question` row also publishes it on the bus, so the gate appears live with no extra work.) +- **Live wakeup** → an `Async\Channel` per run in `$gates`. The blocked run *awaits* it; the + `answer` request *sends* into it. Instant resume, no polling. + +``` +HttpGateSpeaker::reply($prompt): // runs in the run's coroutine + write trace: type='question', span_id=S, {prompt} // durable + published on the bus → gate shows live + setIssueStatus(WaitingHuman); publish issue event // card → 'waiting' column + $answer = $gates->for($runId)->recv() // PARK the coroutine on the channel (no poll) + write trace: type='answer', span_id=S, {answer} // durable + chat row + setIssueStatus(InProgress); publish issue event + return $answer + +POST /api/projects/{key}/issues/{id}/answer { text }: // same loop, another coroutine + require an open question for this run (else 409) // no orphan answers + $gates->for($runId)->send($text) // wake the run instantly + respond 202 +``` + +`pending` = a `question` span_id with no `answer` row for the same span_id (the DB is the +source of truth for "is this run waiting", so a restart mid-gate is recoverable). + +Restart safety: if the server dies while a run waits, the in-memory channel and the run are +both gone — but the unanswered `question` row remains, so the gate is still visible, and the run +resumes via the existing `resumableRun` + `workflow_state` snapshot path (it re-enters the gate +and re-parks on a fresh channel). The channel is purely the live-wakeup optimization over the +durable record; losing it on restart costs nothing. + +This is the same `EscalatingSpeaker(supervisor, human)` ladder as the CLI — the **supervisor +agent tier is unchanged** (it settles most escalations inline in the run, no gate). Only the +*human* tier swaps `ConsoleSpeaker` → `HttpGateSpeaker`. + +--- + +## 4. Build order + +1. **SSE run stream, replay-only first** `/api/projects/{key}/runs/{id}/stream`: ship the + `traceSince()` replay + `Last-Event-ID` half against *existing* runs, before the bus exists. + *(no new state, immediately a live-ish view via a short reconnect/poll)* +2. **`TraceBus` + `LiveTraceSink`**: the in-process push layer; wire the run stream's second + half (subscribe-after-replay). Add the **board-stream** topic the same way. +3. **`IssueRunner`** extraction from `WorkflowMode::runIssue` (console → `LiveTraceSink` / + `HttpGateSpeaker` seams). +4. **`spawn` + `Semaphore` + `POST /start`**: detached run in the server scope, 202, + double-start guard, board event on status flip. +5. **Human gate + `POST /answer`**: `HttpGateSpeaker` (durable `question`/`answer` rows + + `$gates` channel wakeup), `WaitingHuman` flow, `Issue.chat` from the same rows. +6. **Solver-approval gate** (replace `confirm()`), surfaced as the `workflow`-kind artifact. + +Steps 1–2 are pure read/stream work and ship the live dashboard against existing runs. +Steps 3–6 add execution and the human-in-the-loop write paths. + +--- + +## 5. Open questions + +1. ~~Single vs multi-project API.~~ **DECIDED: multi-project** — routes keep `{key}`. +2. ~~Run engine.~~ **DECIDED: `Async\spawn` on the server loop** (not `ThreadPool`). Live + delivery is a push bus and the gate is a channel, both plain in-process `Async\Channel` + because run + SSE + gate share one loop. Throttle = a `Semaphore`. CPU-bound ops can be + offloaded to a `ThreadPool` per-op if they ever block the loop. +3. ~~Gate state.~~ **DECIDED: trace-only for durability** — `question`/`answer` rows keyed by + `span_id` (chat + restart-safe); the live wakeup is the `$gates` channel, not a poll. +4. ~~Live delivery.~~ **DECIDED: in-process trace bus** (`LiveTraceSink`, a third + `TraceSinkInterface`) — SSE handlers subscribe and are pushed to. The DB is the durable + journal for replay (`Last-Event-ID`) and the `?since=` fallback, not the live path. + +All design questions are settled. Next: build step 1 (SSE run stream, replay half). diff --git a/docs/workflow-gaps.md b/docs/workflow-gaps.md deleted file mode 100644 index 67b0c1e..0000000 --- a/docs/workflow-gaps.md +++ /dev/null @@ -1,174 +0,0 @@ -# Что мы упустили - -Разбор слабых мест нашей петли агента — на фоне того, что известно из исследований и -практики про автономные циклы (полный обзор источников — [`agentic-loops-survey.md`](agentic-loops-survey.md); -как устроена сама система — [`workflow-architecture.md`](workflow-architecture.md)). - -Сначала коротко о том, что у нас уже хорошо, — чтобы пробелы читались на верном фоне. Мы -сознательно построили **workflow, а не вольного агента**: порядок действий задаёт код -(`run()` и шаги `#[Step]`), а модель принимает лишь локальные решения внутри шага. Для работы -с кодом это правильная ставка. К этому добавлены сохраняемое состояние с возобновлением -прогона, библиотека готовых workflow как «процедурная память», принцип наименьших привилегий -у инструментов и одобрение человеком ключевых шагов. Всё это — сильные стороны. - -Ниже — то, чего не хватает. Разделы упорядочены по приоритету: сверху дешёвое и важное, -ниже — то, что требует больше работы. **Часть пробелов уже закрыта** — отмечено в колонке -«Статус» и пометками ✅/◐ в разделах. - -| # | Пробел | Срочно | Стоимость | Статус | -|---|--------|--------|-----------|--------| -| 1 | Петля ничем не ограничена | высокая | низкая | ✅ закрыт — бюджеты (токены+время) + `BudgetPolicy` | -| 2 | Результат проверяет сама модель | высокая | средняя | ◐ частично — критик (reviewer), но это LLM-judge, не тесты | -| 3 | Нет починки при сбое (супервизор) | высокая | высокая | ✅ закрыт — `SuperviseWorkflow` + супервизор-критик | -| 4 | Контекст не сжимается | средняя | средняя | — | -| 5 | Система не учится на прошлых прогонах | средняя | средняя | — | -| 6 | У автономного запуска нет ограничения прав | высокая | средняя | — | -| 7 | Нет защиты от преждевременного «готово» | средняя | низкая | ◐ частично — критик, но без прогона тестов | - ---- - -## 1. Петля ничем не ограничена - -**✅ Закрыто.** Введены **бюджеты** (`Claw\Agent\Budget`: токены + время, parent-цепочка -тотальный→turn) и `BudgetPolicy` (`stop` | pause-`ask` с прибавкой). `ai()`/`step()` проверяют -тотальный, `DefaultTurnLoop` — turn-бюджет. Лимиты из `.env`. Детектор «нет прогресса» как -таковой не делали — его роль играет бюджет (зацикленная петля жжёт токены и упирается в лимит). - -**В чём дело.** Внутренний цикл (`DefaultTurnLoop`) повторяет «спросить модель → выполнить -инструмент → вернуть результат» до тех пор, пока модель сама не перестанет вызывать -инструменты. Единственная преграда на этом пути — переполнение окна контекста: когда история -разрастается, запрос отвергает уже сам провайдер. Нет ни предела по числу ходов, ни лимита по -токенам или времени, ни простой проверки, что прогон вообще движется вперёд, а не топчется на -месте. - -**Что говорит практика.** Автономные циклы склонны застревать. Хрестоматийный пример — -AutoGPT, который снова и снова берётся за одну и ту же подзадачу, потому что не помнит, что -уже её делал. Поэтому базовое правило: ограничивать любую петлю — числом итераций, бюджетом и -детектором застоя. - -**Как у нас.** Предела нет. Если модель зациклится — например, правит файл, видит падающий -тест, правит снова, и так по кругу, — прогон будет жечь время и деньги, пока не упрётся в -окно контекста. - -**Что сделать.** Добавить в цикл потолок по числу ходов и лимит по токенам, а также простую -проверку «нет прогресса» (скажем, несколько ходов подряд не меняют состояние — пора -прерваться). Это дёшево: учёт токенов уже ведёт `Trace`, нужный материал под рукой. - -## 2. Результат проверяет сама модель - -**◐ Частично.** Появился **декларативный критик** `#[Step(critic: 'рубрика')]`: драйвер судит -результат шага на роли `reviewer`, недоволен → супервизор (`EscalatingSpeaker`: агент → человек) -направляет/принимает/останавливает. Это вынесло проверку из «молчаливой самооценки» в отдельный -суд с эскалацией к человеку. **Но** это всё ещё LLM-judge, а не внешний прогон тестов — -объективный гейт «готово по `composer qa`» (см. ниже, #7) пока не построен. - -**В чём дело.** Когда нам нужно оценить результат шага, мы делаем ещё один вызов `ai()` — то -есть та же модель судит собственную работу. И решение «задача выполнена» тоже принимает -модель: молча, тем, что перестаёт вызывать инструменты. - -**Что говорят исследования.** Это самое нагруженное место всего обзора. Главный вывод: модель -плохо находит и исправляет собственные ошибки рассуждения, когда судит только сама себя — и -нередко после «самопроверки» ответ становится не лучше, а хуже (DeepMind, 2023). Надёжной -проверка становится тогда, когда сигнал приходит **извне**: запуск кода, компилятор, тесты, -ответ инструмента. - -**Как у нас.** Правильный приём есть, но только на словах. Рецепт, по которому генерируется -солвер, требует для бага сначала написать падающий тест, а в конце прогнать `composer qa` до -зелёного. Но это лишь **текст инструкции** генератору. Сам каркас выполнение не контролирует: -сгенерированный солвер может рецепт не соблюсти, и поймать это будет нечем. - -**Что сделать.** Сделать внешнюю проверку частью каркаса, а не пожеланием. Признак «готово» -для шага и для прогона привязать к фактическому прогону тестов, а не к словам модели. - -## 3. Нет починки при сбое - -**✅ Закрыто.** На runtime-краше солвера run-path запускает `SuperviseWorkflow`: роль `supervisor` -пишет исправленную версию под НОВЫМ именем класса, сохраняет через `define_workflow`, и прогон -**резюмируется** тем же `runId` (снапшот пропускает готовые шаги). Ограничено `MAX_REPAIRS`. -Отдельно — критик+супервизор разруливают «работа плоха» на уровне шага (#2). - -**В чём дело.** Сгенерировать рабочий солвер с первого раза удаётся не всегда. Если -сгенерированный код падает при запуске, ошибку никто не подхватывает: прогон просто -помечается неудачным, и на этом всё. - -**Что задумано.** Здесь у нас есть проект решения — супервизор. Это «над-агент», который на -застрявшем шаге пытается разблокировать прогон сам, прежде чем звать человека, поднимаясь по -лестнице: подсказать исполнителю → сделать минимальную правку самому → передать более сильной -(и дорогой) модели → и лишь в крайнем случае человеку. Кирпич для разговора двух агентов между -собой (`Dialogue`) уже написан. - -**Как у нас.** Спроектировано, но не построено. - -**Что сделать.** На ошибке прогона передавать роли supervisor связку «ошибка + трасса + -код», чинить солвер и до-исполнять оставшийся хвост из сохранённого состояния. Это самый -крупный пробел по надёжности, и именно сюда стоит вложиться после первых двух пунктов. - -## 4. Контекст не сжимается - -**В чём дело.** На каждом ходу модель получает всю историю прогона целиком. Кэширование -повторяющегося начала экономит деньги, но не качество. - -**Что говорят исследования.** На длинном контексте качество падает ещё до формального лимита. -Модель хуже использует середину длинного ввода («потеряно в середине»), а с ростом объёма -надёжность плавно деградирует — это явление называют «гниением контекста» (Chroma, 2025). - -**Как у нас.** Сжатия и резюмирования нет — в архитектуре это честно отмечено как «более -поздний слой». - -**Что сделать.** На длинных прогонах сжимать историю: заменять завершённые шаги их кратким -резюме, выносить заметки за пределы окна и подмешивать обратно по необходимости — держа в -активном контексте только то, что действительно нужно сейчас. - -## 5. Система не учится на прошлых прогонах - -**В чём дело.** Между прогонами система не выносит для себя никаких уроков. Каждый новый -прогон начинается с чистого листа. - -**Что говорят исследования.** Процедурная память — наши workflow, сохранённые как код, — у нас -сильна; это прямой аналог «библиотеки навыков» из Voyager. Но нет памяти эпизодической и -рефлексии (подход Reflexion): неудача прошлого прогона не превращается в подсказку для -следующего. - -**Как у нас.** Нет. - -**Что сделать.** Сохранять короткие выводы по итогам прогона (что сработало, на чём -споткнулись) и подмешивать их в похожие будущие задачи. - -## 6. У автономного запуска нет ограничения прав - -**В чём дело.** Во время автономного прогона инструмент `bash` работает по реальной папке -проекта без какого-либо разрешительного гейта — фактически «разрешено всё». - -**Что говорят исследования.** В списке OWASP это «избыточные полномочия» (LLM06): опасное -действие выполняется в ответ на неожиданный или подменённый вывод модели. Необратимые -действия должны проходить независимую проверку и одобрение. - -**Как у нас.** Одобрение человека есть на генерацию солвера и на продвижение workflow в общую -область, но во время самого прогона `bash` ничем не ограничен. - -**Что сделать.** Ввести реальную политику прав для автономного `bash`: сначала чёрный список -заведомо опасных команд, позже — оценка риска перед конкретным действием. - -## 7. Нет защиты от преждевременного «готово» - -**В чём дело.** Ничто структурно не мешает прогону объявить задачу решённой раньше, чем -выполнены критерии приёмки. - -**Что говорят исследования.** Есть дешёвый приём: вести явный список критериев, изначально все -помечены как невыполненные, и не давать закрыть задачу, пока они «красные» (Anthropic, ноябрь -2025). - -**Как у нас.** Рецепт просит «закрепить» критерии приёмки, но контроля их выполнения нет. - -**Что сделать.** Вести список критериев приёмки как часть состояния прогона и привязать к нему -момент завершения. - ---- - -## С чего начать - -Разумный порядок — по таблице сверху вниз. Первым стоит закрыть пункт **1** (предел петли): -он дёшев, а застрахует от самого грубого сбоя — бесконечного прогона, жгущего токены. Затем — -пункты **2** и **3** в связке: вместе они закрывают главную дыру надёжности (проверять -результат снаружи и уметь чиниться после сбоя). Остальное можно подключать по мере того, как -прогоны станут длиннее и их станет больше. diff --git a/src/Agent/AbstractAgent.php b/src/Agent/AbstractAgent.php index ef799cb..53134f5 100644 --- a/src/Agent/AbstractAgent.php +++ b/src/Agent/AbstractAgent.php @@ -28,6 +28,7 @@ final public function send(AgentRequest $request): AgentResponse return $this->attempt($request); } catch (AgentException $e) { $delay = $this->retryPolicy->delayBeforeRetry($e, $attempt); + if ($delay === null) { throw $e; } @@ -56,6 +57,7 @@ protected function postJson(string $url, string $body, array $headers): array { try { $response = $this->http->post($url, $body, $headers); + if (!$response->isOk()) { throw AgentErrors::fromResponse($response); } diff --git a/src/Agent/AgentErrors.php b/src/Agent/AgentErrors.php index 7c8e86d..6cca0e4 100644 --- a/src/Agent/AgentErrors.php +++ b/src/Agent/AgentErrors.php @@ -79,6 +79,7 @@ public static function classify(int $status, ?string $errorType, string $message private static function isContextOverflow(string $message): bool { $message = strtolower($message); + foreach (['context length', 'context window', 'maximum context', 'prompt is too long', 'too many tokens', 'reduce the length'] as $needle) { if (str_contains($message, $needle)) { return true; diff --git a/src/Agent/AgentFactory.php b/src/Agent/AgentFactory.php new file mode 100644 index 0000000..a3f9ee1 --- /dev/null +++ b/src/Agent/AgentFactory.php @@ -0,0 +1,29 @@ +<?php + +declare(strict_types=1); + +namespace Claw\Agent; + +use Claw\Config; +use Claw\Http\HttpClientInterface; + +/** + * Builds the agent named by the config, or null if that agent is not wired yet. Agents retry internally + * (cause-aware), so callers pass a plain transport. Lives here, not in the CLI layer, so every entry + * point (CLI run, dashboard server) composes its agent the same way. + */ +final class AgentFactory +{ + public static function make(Config $config, HttpClientInterface $http): ?AgentInterface + { + return match ($config->agent) { + 'claude' => new ClaudeAgent($http, $config->apiKey), + 'openai-compatible' => new OpenAiCompatibleAgent( + $http, + $config->apiKey, + $config->baseUrl ?? 'https://api.deepseek.com', + ), + default => null, + }; + } +} diff --git a/src/Agent/Budget.php b/src/Agent/Budget.php index b2c90f4..5795181 100644 --- a/src/Agent/Budget.php +++ b/src/Agent/Budget.php @@ -70,6 +70,7 @@ public function isExhausted(): bool if ($this->tokenLimit > 0 && $this->tokens >= $this->tokenLimit) { return true; } + if ($this->secondsLimit > 0.0 && $this->elapsed() >= $this->secondsLimit) { return true; } @@ -83,6 +84,7 @@ public function reason(): string if ($this->tokenLimit > 0 && $this->tokens >= $this->tokenLimit) { return "token budget exhausted ({$this->tokens}/{$this->tokenLimit})"; } + if ($this->secondsLimit > 0.0 && $this->elapsed() >= $this->secondsLimit) { return 'time budget exhausted (' . round($this->elapsed(), 1) . "s/{$this->secondsLimit}s)"; } diff --git a/src/Agent/ClaudeAgent.php b/src/Agent/ClaudeAgent.php index 0c6ddbd..460e083 100644 --- a/src/Agent/ClaudeAgent.php +++ b/src/Agent/ClaudeAgent.php @@ -91,6 +91,7 @@ public static function decodeResponse(array $data): AgentResponse $textBlock = new TextBlock((string) ($block['text'] ?? '')); $content[] = $textBlock; $texts[] = $textBlock->text; + break; case 'tool_use': @@ -101,6 +102,7 @@ public static function decodeResponse(array $data): AgentResponse ); $content[] = $useBlock; $toolCalls[] = $useBlock; + break; } } diff --git a/src/Agent/DefaultTurnLoop.php b/src/Agent/DefaultTurnLoop.php index 23d98e0..5465753 100644 --- a/src/Agent/DefaultTurnLoop.php +++ b/src/Agent/DefaultTurnLoop.php @@ -122,6 +122,7 @@ public function run(array $history): TurnResult // is spent, stop the exchange here and return what we have, not another round-trip. if ($this->turnBudget !== null) { $this->turnBudget->spend($response->usage->inputTokens + $response->usage->outputTokens); + if ($this->turnBudget->isExhausted()) { $this->tracer?->exit($turn); @@ -143,8 +144,10 @@ public function run(array $history): TurnResult // the answer as the next user turn, and continue the same loop (context stays whole). if ($this->ask !== null) { $question = $this->extractQuestion($response->text ?? ''); + if ($question !== null) { $answer = $this->ask->reply($question); + if ($answer !== null) { // null = the chain passed up, no one answered $history[] = Message::userText($answer); @@ -157,6 +160,7 @@ public function run(array $history): TurnResult } $results = []; + foreach ($response->toolCalls as $call) { $this->tracer?->toolCall($call->name, $call->input); $result = $this->executor->call(new ToolCall($call->id, $call->name, $call->input)); diff --git a/src/Agent/EscalatingSpeaker.php b/src/Agent/EscalatingSpeaker.php index 55208a1..2a9ab14 100644 --- a/src/Agent/EscalatingSpeaker.php +++ b/src/Agent/EscalatingSpeaker.php @@ -33,6 +33,7 @@ public function reply(string $incoming): ?string { foreach ($this->tiers as $tier) { $answer = $tier->reply($incoming); + if ($answer !== null) { return $answer; // this tier handled it } diff --git a/src/Agent/OpenAiCompatibleAgent.php b/src/Agent/OpenAiCompatibleAgent.php index 5556147..42b9e29 100644 --- a/src/Agent/OpenAiCompatibleAgent.php +++ b/src/Agent/OpenAiCompatibleAgent.php @@ -49,6 +49,7 @@ protected function attempt(AgentRequest $request): AgentResponse public static function encodeRequest(AgentRequest $request): array { $messages = []; + if ($request->system !== '') { $messages[] = ['role' => 'system', 'content' => $request->system]; } @@ -98,6 +99,7 @@ public static function decodeResponse(array $data): AgentResponse $text = null; $contentText = $message['content'] ?? null; + if (is_string($contentText) && $contentText !== '') { $content[] = new TextBlock($contentText); $text = $contentText; @@ -151,6 +153,7 @@ private static function encodeMessage(Message $message): array if ($message->role === Role::Assistant) { $text = ''; $toolCalls = []; + foreach ($message->content as $block) { if ($block instanceof TextBlock) { $text .= $block->text; @@ -170,6 +173,7 @@ private static function encodeMessage(Message $message): array } $encoded = ['role' => 'assistant', 'content' => $text === '' ? null : $text]; + if ($toolCalls !== []) { $encoded['tool_calls'] = $toolCalls; } @@ -178,6 +182,7 @@ private static function encodeMessage(Message $message): array } $text = ''; + foreach ($message->content as $block) { if ($block instanceof TextBlock) { $text .= $block->text; diff --git a/src/Chat/AsyncConsoleConversation.php b/src/Chat/AsyncConsoleConversation.php index f8f5348..ba93d52 100644 --- a/src/Chat/AsyncConsoleConversation.php +++ b/src/Chat/AsyncConsoleConversation.php @@ -139,6 +139,7 @@ private function readLoop(): void if ($line === false) { $this->eof = true; // EOF (Ctrl-D / closed stdin) + return; } @@ -231,6 +232,7 @@ public function updateStatus(?Status $status): void if ($status === null) { $this->statusLabel = ''; $this->writeStatus(''); + return; } @@ -410,9 +412,11 @@ private function renderHistory(): void $height = $this->chatRows - $this->chatStart + 1; $lines = $this->history; + foreach ($this->deferred as $line) { $lines[] = self::C_DIM . 'User: ' . $line . self::C_RESET . "\n"; } + if ($this->warning !== null) { $lines[] = $this->warning . "\n"; // its own block, below the dim deferred area } @@ -450,6 +454,7 @@ private function watchResize(): void private function syncSize(): void { [$rows, $cols] = self::detectSize(); + if ($rows !== $this->rows || $cols !== $this->cols) { $this->rows = $rows; $this->cols = $cols; @@ -489,6 +494,7 @@ private static function detectSize(): array // readline_info (static here), this refreshes on every resize. if (PHP_OS_FAMILY === 'Windows') { $size = self::winConsoleSize(); + if ($size !== null) { return $size; } @@ -501,6 +507,7 @@ private static function detectSize(): array // Linux/macOS: `stty size` prints "rows cols" for the tty — live on resize. if (($rows < 4 || $cols < 20) && PHP_OS_FAMILY !== 'Windows') { $out = @shell_exec('stty size 2>/dev/null'); + if (is_string($out) && preg_match('/^(\d+)\s+(\d+)/', trim($out), $m)) { $rows = (int) $m[1]; $cols = (int) $m[2]; @@ -548,11 +555,13 @@ private static function winConsoleSize(): ?array ); } $info = $k->new('CONSOLE_SCREEN_BUFFER_INFO'); + if (!$k->GetConsoleScreenBufferInfo($k->GetStdHandle(0xFFFFFFF5), \FFI::addr($info))) { return null; // STD_OUTPUT_HANDLE = (DWORD)-11 } $cols = $info->w->Right - $info->w->Left + 1; $rows = $info->w->Bottom - $info->w->Top + 1; + return ($cols < 20 || $rows < 4) ? null : [$rows, $cols]; } catch (\Throwable) { return null; diff --git a/src/Chat/ConsoleConversation.php b/src/Chat/ConsoleConversation.php index 8d4fedd..dbd5c13 100644 --- a/src/Chat/ConsoleConversation.php +++ b/src/Chat/ConsoleConversation.php @@ -56,6 +56,7 @@ public function receive(): ?string } $line = trim($line); + if ($line !== '') { return $line; } diff --git a/src/Chat/TelegramChat.php b/src/Chat/TelegramChat.php index f902912..c541871 100644 --- a/src/Chat/TelegramChat.php +++ b/src/Chat/TelegramChat.php @@ -71,6 +71,7 @@ public function accept(): ConversationInterface public function ingest(array $update): void { $callback = $update['callback_query'] ?? null; + if (\is_array($callback)) { $this->ingestCallback($callback); @@ -78,17 +79,20 @@ public function ingest(array $update): void } $message = $update['message'] ?? null; + if (!\is_array($message)) { return; } $chat = \is_array($message['chat'] ?? null) ? $message['chat'] : []; + if (($chat['type'] ?? null) !== 'private') { return; // DMs only for now } $from = \is_array($message['from'] ?? null) ? $message['from'] : []; $userId = (int) ($from['id'] ?? 0); + if (!($this->isAllowed)($userId)) { // Silent drop. Log the id so the owner can find it for the allowlist. fwrite(STDERR, "telegram: dropped message from unauthorized id {$userId}\n"); @@ -97,12 +101,14 @@ public function ingest(array $update): void } $text = $message['text'] ?? null; + if (!\is_string($text) || trim($text) === '') { return; } $chatId = (int) ($chat['id'] ?? $userId); $conversation = $this->conversations[$chatId] ?? null; + if ($conversation === null) { $conversation = new TelegramConversation($chatId, $this->client); $this->conversations[$chatId] = $conversation; @@ -121,11 +127,13 @@ public function ingest(array $update): void private function ingestCallback(array $callback): void { $from = \is_array($callback['from'] ?? null) ? $callback['from'] : []; + if (!($this->isAllowed)((int) ($from['id'] ?? 0))) { return; } $id = (string) ($callback['id'] ?? ''); + if ($id !== '') { $this->client->answerCallbackQuery($id); } @@ -136,6 +144,7 @@ private function ingestCallback(array $callback): void $data = $callback['data'] ?? null; $conversation = $this->conversations[$chatId] ?? null; + if ($conversation !== null && \is_string($data)) { $conversation->deliver($data); } diff --git a/src/Chat/TelegramClient.php b/src/Chat/TelegramClient.php index 3e7f7bd..7b68d4d 100644 --- a/src/Chat/TelegramClient.php +++ b/src/Chat/TelegramClient.php @@ -39,16 +39,19 @@ public function getUpdates(int $offset, int $timeoutSeconds = 25): array ]); $data = $this->http->get($url)->json(); + if (($data['ok'] ?? false) !== true) { throw new HttpException('Telegram getUpdates failed: ' . json_encode($data['description'] ?? $data)); } $result = $data['result'] ?? []; + if (!\is_array($result)) { return []; } $updates = []; + foreach ($result as $item) { if (\is_array($item)) { /** @var array<string, mixed> $item */ @@ -65,6 +68,7 @@ public function getUpdates(int $offset, int $timeoutSeconds = 25): array public function sendMessage(int $chatId, string $text, ?array $replyMarkup = null): void { $payload = ['chat_id' => $chatId, 'text' => $text]; + if ($replyMarkup !== null) { $payload['reply_markup'] = $replyMarkup; } @@ -95,6 +99,7 @@ private function call(string $method, array $payload): void // (message too long, chat blocked, bad markup). Check ok like getUpdates so a failed write does // not silently look like success. $data = $this->http->post($this->base . $method, $body, ['Content-Type: application/json'])->json(); + if (($data['ok'] ?? false) !== true) { throw new HttpException("Telegram {$method} failed: " . json_encode($data['description'] ?? $data)); } diff --git a/src/Cli/Cli.php b/src/Cli/Cli.php index bd23e00..9f1bfb0 100644 --- a/src/Cli/Cli.php +++ b/src/Cli/Cli.php @@ -4,12 +4,6 @@ namespace Claw\Cli; -use Claw\Agent\AgentInterface; -use Claw\Agent\ClaudeAgent; -use Claw\Agent\OpenAiCompatibleAgent; -use Claw\Config; -use Claw\Http\HttpClientInterface; - /** * The command-line front door: parse argv, pick the mode, dispatch. * @@ -18,14 +12,11 @@ * `claw -c`, `claw -i`, `claw run`, `claw log`. See {@see WorkflowMode}. * - session (`--session` / `-s`): the original interactive chat. See {@see SessionMode}. * - * Each mode loads its own {@see Config} lazily — the setup commands (`-c`/`-i`/`log`) + * Each mode loads its own {@see \Claw\Config} lazily — the setup commands (`-c`/`-i`/`log`) * touch only local state and must run without an API key. */ final class Cli { - public const DEFAULT_SYSTEM = 'You are Claw, a helpful coding assistant. Be concise. ' - . 'Use the tools to inspect and change files and run commands in the workspace.'; - /** @param string $root the install root: holds .env, CLAUDE.md, vendor/ and the workspace. */ public function __construct(private readonly string $root) { @@ -45,23 +36,6 @@ public function run(array $argv): int return new WorkflowMode($this->root)->run($args); } - /** - * Build the agent named by the config, or null if that agent is not wired yet. - * Agents retry internally (cause-aware), so callers pass a plain transport. - */ - public static function makeAgent(Config $config, HttpClientInterface $http): ?AgentInterface - { - return match ($config->agent) { - 'claude' => new ClaudeAgent($http, $config->apiKey), - 'openai-compatible' => new OpenAiCompatibleAgent( - $http, - $config->apiKey, - $config->baseUrl ?? 'https://api.deepseek.com', - ), - default => null, - }; - } - /** * The first argument that is not an option flag, or null if there is none. * diff --git a/src/Cli/SessionMode.php b/src/Cli/SessionMode.php index 677af86..dd85b74 100644 --- a/src/Cli/SessionMode.php +++ b/src/Cli/SessionMode.php @@ -6,6 +6,7 @@ use function Async\spawn; +use Claw\Agent\AgentFactory; use Claw\Chat\ConsoleChat; use Claw\Chat\ConversationInterface; use Claw\Chat\TelegramChat; @@ -52,6 +53,7 @@ public function run(): int mkdir($config->workspace, 0o775, true); } $workspaceDir = realpath($config->workspace); + if ($workspaceDir === false) { fwrite(STDERR, "Cannot resolve workspace: {$config->workspace}\n"); @@ -61,7 +63,8 @@ public function run(): int // Transport is a single request; retries are cause-aware at the agent level. $http = new CurlHttpClient(); - $agent = Cli::makeAgent($config, $http); // agents retry internally (cause-aware) + $agent = AgentFactory::make($config, $http); // agents retry internally (cause-aware) + if ($agent === null) { fwrite(STDERR, "Agent '{$config->agent}' is not wired yet.\n"); @@ -71,7 +74,7 @@ public function run(): int $workspace = new Workspace($workspaceDir); $persona = $this->root . '/CLAUDE.md'; - $system = is_file($persona) ? (string) file_get_contents($persona) : Cli::DEFAULT_SYSTEM; + $system = is_file($persona) ? (string) file_get_contents($persona) : Config::DEFAULT_SYSTEM; $chat = match ($config->channel) { 'console' => new ConsoleChat(), @@ -81,6 +84,7 @@ public function run(): int ), default => null, }; + if ($chat === null) { fwrite(STDERR, "Channel '{$config->channel}' is not wired yet.\n"); @@ -89,6 +93,7 @@ public function run(): int // One SQLite file per conversation (keyed by its id), so history survives restarts. $sessionsDir = $workspaceDir . '/sessions'; + if (!is_dir($sessionsDir)) { mkdir($sessionsDir, 0o775, true); } @@ -123,6 +128,7 @@ public function run(): int if ($chat instanceof TelegramChat) { // Many chats: long-poll in the background, then one Session per authorized chat. spawn($chat->poll(...)); + for (;;) { $conversation = $chat->accept(); spawn(static fn () => $runSession($conversation)); diff --git a/src/Cli/WorkflowMode.php b/src/Cli/WorkflowMode.php index 2054826..7b404d7 100644 --- a/src/Cli/WorkflowMode.php +++ b/src/Cli/WorkflowMode.php @@ -4,44 +4,16 @@ namespace Claw\Cli; -use Claw\Agent\AgentInterface; -use Claw\Agent\AgentSpeaker; -use Claw\Agent\Budget; -use Claw\Agent\ConsoleSpeaker; -use Claw\Agent\DefaultTurnLoop; -use Claw\Agent\EscalatingSpeaker; -use Claw\Agent\SpeakerInterface; -use Claw\Agent\SpeakerRole; +use Claw\Agent\AgentFactory; use Claw\Config; use Claw\Exceptions\ClawException; -use Claw\Exceptions\WorkflowFinished; use Claw\Http\CurlHttpClient; -use Claw\Project\IssueStatus; use Claw\Project\ProjectStore; -use Claw\Project\RunStatus; -use Claw\Tool\BashTool; -use Claw\Tool\DefineWorkflowTool; -use Claw\Tool\FinishTool; -use Claw\Tool\ListFilesTool; -use Claw\Tool\ReadFileTool; -use Claw\Tool\RecallTool; -use Claw\Tool\Registry; -use Claw\Tool\Workspace; -use Claw\Tool\WriteFileTool; -use Claw\Trace\ConsoleTraceSink; +use Claw\Run\ConsoleRunFrontend; +use Claw\Run\IssueRunner; +use Claw\Server; use Claw\Trace\Level; -use Claw\Trace\Tracer; use Claw\Trace\TraceReader; -use Claw\Trace\TraceStore; -use Claw\Workflow\BudgetPolicy; -use Claw\Workflow\Environment; -use Claw\Workflow\EnvKey; -use Claw\Workflow\GenerateIssueWorkflow; -use Claw\Workflow\SqliteStateStore; -use Claw\Workflow\SuperviseWorkflow; -use Claw\Workflow\WorkflowAbstract; -use Claw\Workflow\WorkflowStore; -use Claw\Workflow\WorkflowValidator; /** * The default mode: drive a project's issues through generated solver workflows. @@ -62,31 +34,6 @@ */ final class WorkflowMode { - /** How many times the supervisor may repair-and-resume a crashing solver before giving up. */ - private const int MAX_REPAIRS = 2; - - /** The supervisor agent's standing role — it settles in-run escalations or defers to the human. */ - private const string SUPERVISOR_SYSTEM = <<<'PROMPT' - You are the SUPERVISOR of an autonomous coding workflow. You are consulted when a step is stuck: - a worker pauses with a question, or a step's work failed review and the run asks whether to keep - going. Your job is to UNBLOCK with the smallest sound decision, so the run does not churn. - - How to answer (reply with ONLY the decision, no preamble): - - To resolve a "did not pass review / is this OK?" escalation, reply with exactly one of: - `accept` — the work is good enough as is, stop reworking; - `stop` — the goal cannot be reached here (e.g. a required tool is missing, the gate is - unsatisfiable in this environment) or it is looping with no progress — abort the step; - or a short, concrete GUIDANCE for ONE more attempt (only if a specific fix is likely to work). - - To answer a worker's question, give the briefest concrete answer that lets it proceed. - - Bias to ending churn: if a step has failed several times for the same reason, or the blocker is - environmental (a missing test runner, an absent dependency) and cannot change, choose `accept` - (if the actual work looks correct) or `stop` — do NOT keep saying "try again". - - Reply exactly `ESCALATE` only when the decision genuinely needs a human (a scope or product call - you must not make alone); it will then be passed up to the person. - PROMPT; - /** @param string $root the install root: anchors the app home (state db, generated workflows). */ public function __construct(private readonly string $root) { @@ -105,6 +52,7 @@ public function run(array $args): int if (\in_array('-c', $args, true) || \in_array('--create', $args, true)) { return $this->createProject($args); } + if (\in_array('-i', $args, true) || \in_array('--issue', $args, true)) { return $this->createIssue($args, $projectDir); } @@ -112,10 +60,47 @@ public function run(array $args): int return match ($args[0] ?? null) { 'run' => $this->runIssue(\array_slice($args, 1), $projectDir, $verbosity), 'log' => $this->showHistory(\array_slice($args, 1), $projectDir, $verbosity), + 'serve' => $this->serve(\array_slice($args, 1)), default => $this->usage(), }; } + /** + * Handle `claw serve [--port N] [--host H]`: start the read-only dashboard JSON API + * over every project's state db. Requires the TrueAsync server extension to be loaded: + * php -d extension=/path/to/true_async_server.so bin/claw serve + * + * @param list<string> $args + */ + private function serve(array $args): int + { + $host = '127.0.0.1'; + $port = 8787; + + foreach ($args as $i => $arg) { + if ($arg === '--host' && isset($args[$i + 1])) { + $host = $args[$i + 1]; + } elseif (\str_starts_with($arg, '--host=')) { + $host = \substr($arg, 7); + } elseif ($arg === '--port' && isset($args[$i + 1])) { + $port = (int) $args[$i + 1]; + } elseif (\str_starts_with($arg, '--port=')) { + $port = (int) \substr($arg, 7); + } + } + + if (!\class_exists('TrueAsync\\HttpServer')) { + fwrite(STDERR, "claw serve: the TrueAsync server extension is not loaded.\n"); + fwrite(STDERR, " php -d extension=/path/to/true_async_server.so bin/claw serve\n"); + + return 1; + } + + (new Server($this->projectsDir(), $this->root))->run($host, $port); + + return 0; + } + /** * Handle `claw -c [folder]`: register a project's state db under the app home. The target is * the first non-flag argument, defaulting to the current directory. This is the one command @@ -126,6 +111,7 @@ public function run(array $args): int private function createProject(array $args): int { $target = Cli::firstPositional($args) ?? getcwd(); + if ($target === false) { fwrite(STDERR, "claw -c: cannot determine the project folder\n"); @@ -156,6 +142,7 @@ private function createProject(array $args): int private function createIssue(array $args, ?string $projectDir): int { $title = Cli::firstPositional($args); + if ($title === null) { fwrite(STDERR, "claw -i: an issue title is required (usage: claw -i \"title\")\n"); @@ -190,6 +177,7 @@ private function createIssue(array $args, ?string $projectDir): int private function runIssue(array $args, ?string $projectDir, ?Level $verbosity): int { $issueId = Cli::firstPositional($args); + if ($issueId === null) { fwrite(STDERR, "claw run: an issue id is required (usage: claw run <id>)\n"); @@ -198,7 +186,6 @@ private function runIssue(array $args, ?string $projectDir, ?Level $verbosity): try { $store = $this->resolve($projectDir); - $project = $store->project(); $issue = $store->loadIssue($issueId); $config = Config::load($this->root . '/.env'); } catch (ClawException $e) { @@ -207,243 +194,16 @@ private function runIssue(array $args, ?string $projectDir, ?Level $verbosity): return 1; } - $agent = Cli::makeAgent($config, new CurlHttpClient()); + $agent = AgentFactory::make($config, new CurlHttpClient()); + if ($agent === null) { fwrite(STDERR, "claw run: agent '{$config->agent}' is not wired yet.\n"); return 1; } - // The palette acts on the REAL project folder: this run works on the user's repo. - $workspace = new Workspace($project->path); - $workflowStore = new WorkflowStore($this->projectsDir() . '/' . $project->id . '-workflows', $project->id); - $projectDb = $store->pdo(); // the one open connection: shared by the state store + trace - - $registry = new Registry(); - $registry->add(new BashTool($project->path)); - $registry->add(new ReadFileTool($workspace)); - $registry->add(new WriteFileTool($workspace)); - $registry->add(new ListFilesTool($workspace)); - $registry->add(new DefineWorkflowTool($workflowStore, new WorkflowValidator())); - $registry->add(new FinishTool()); // the model can declare the task solved and end the run - - $env = new Environment() - ->set(EnvKey::Worker, $agent) - ->set(EnvKey::Registry, $registry) - ->set(EnvKey::ModelId, $config->model) - ->set(EnvKey::SystemPrompt, Cli::DEFAULT_SYSTEM) - ->set(EnvKey::MaxHistory, $config->maxHistory) - ->set(EnvKey::Store, new SqliteStateStore($projectDb)) // durable: a killed run resumes here - ->set(EnvKey::Agents, $config->agents) // named roles share this access, override only the model - ->set(EnvKey::Budget, new Budget($config->budgetTokens, (float) $config->budgetSeconds)) // run total (0 = unlimited) - ->set(EnvKey::TurnTokenLimit, $config->turnTokens) // per-exchange caps (0 = unlimited) - ->set(EnvKey::TurnTimeLimit, (float) $config->turnSeconds) - ->set(EnvKey::BudgetPolicy, BudgetPolicy::from($config->budgetPolicy)); // stop | ask on the run total - - // The ask channel is a ladder: a SUPERVISOR AGENT first (it can unblock a stuck step or settle - // a critic escalation — accept / stop / guidance — on its own judgement), then the HUMAN console - // behind it. The supervisor passes a decision up to the human only when it replies ESCALATE. - $env->set(EnvKey::Ask, new EscalatingSpeaker( - $this->supervisorSpeaker($env, $agent, $config), - new ConsoleSpeaker(STDIN, STDOUT), - )); - - $solverName = 'Issue' . (string) preg_replace('/[^A-Za-z0-9]/', '', $issueId) . 'Solver'; - $solverClass = $workflowStore->classFor($solverName, true); - - // Resume an interrupted run (status still 'running') for this issue's solver, else start a new - // one. The run id ties the ledger row, the trace, and the durable state snapshot together — so - // resuming reuses it: the solver restores its saved state and re-runs only the unfinished tail. - $runId = $store->resumableRun($issue->id, $solverName); - $resuming = $runId !== null; - if ($runId === null) { - $runId = $store->recordRun($issue->id, $solverName); - } - $store->setIssueStatus($issue->id, IssueStatus::InProgress); - $tracer = new Tracer($runId, new TraceStore($projectDb), new ConsoleTraceSink(STDERR, $verbosity ?? Level::Info)); - $env->set(EnvKey::Tracer, $tracer); - $taskBrief = "Title: {$issue->title}\n\nDescription: {$issue->description}"; - $registry->add(new RecallTool(new TraceReader($projectDb), $runId, $taskBrief)); // recall this run's own journal + task - if ($resuming) { - fwrite(STDOUT, "Resuming run #{$runId} for issue #{$issue->id}…\n"); - } - - $ctx = new RunContext($env, $tracer, $store, $workflowStore, $runId, $issue, $project, $solverName, $solverClass); - - $early = $this->ensureSolver($ctx); - if ($early !== null) { - return $early; // generation failed, or the human saved the solver without running it - } - - return $this->runSolver($ctx); - } - - /** - * Make sure a solver workflow exists for the run: reuse the one on disk, or generate one and have - * the human approve it. Returns null to proceed to running, or an exit code to stop here — a failed - * generation (1), or the human saving the solver without running it yet (0). - */ - private function ensureSolver(RunContext $ctx): ?int - { - $solverPath = $ctx->workflowStore->path($ctx->solverName, true); - if (is_file($solverPath)) { - fwrite(STDOUT, "Reusing solver {$ctx->solverClass}.\n"); - - return null; - } - - fwrite(STDOUT, "Generating a solver workflow for issue #{$ctx->issue->id}…\n"); - - $gen = $ctx->tracer->enterWorkflow('generate-issue-workflow'); - try { - new GenerateIssueWorkflow($ctx->env, $ctx->runId . '-gen', [ - 'solverName' => $ctx->solverName, - 'solverNamespace' => $ctx->workflowStore->namespaceFor(true), - 'solverTools' => ['read_file', 'write_file', 'list_files', 'bash'], - ], $ctx->issue, $ctx->project)->run(); - } catch (\Throwable $e) { - $ctx->tracer->exit($gen); - $ctx->store->setRunStatus($ctx->runId, RunStatus::Failed); - fwrite(STDERR, 'claw run: generation failed: ' . $e->getMessage() . "\n"); - - return 1; - } - $ctx->tracer->exit($gen); - - if (!is_file($solverPath)) { - $ctx->store->setRunStatus($ctx->runId, RunStatus::Failed); - fwrite(STDERR, "claw run: no solver workflow was produced\n"); - - return 1; - } - - fwrite(STDOUT, "\n--- {$solverPath} ---\n" . (string) file_get_contents($solverPath) . "\n--- end ---\n\n"); - if (!$this->confirm('Run this workflow now?')) { - $ctx->store->setRunStatus($ctx->runId, RunStatus::Generated); - fwrite(STDOUT, "Saved. Not run — review it, then `claw run {$ctx->issue->id}` again.\n"); - - return 0; - } - - return null; - } - - /** - * Run the solver to completion; on a runtime crash, ask the supervisor to repair it (a new class - * version) and resume the same runId — its snapshot skips the finished steps. Bounded by MAX_REPAIRS. - */ - private function runSolver(RunContext $ctx): int - { - $solverSpan = $ctx->tracer->enterWorkflow($ctx->solverName); - $currentClass = $ctx->solverClass; - $attempt = 0; - while (true) { - try { - $solver = new $currentClass($ctx->env, $ctx->runId, [], $ctx->issue, $ctx->project); - if (!$solver instanceof WorkflowAbstract) { - throw new ClawException("{$currentClass} is not a workflow"); - } - $solver->run(); - break; - } catch (WorkflowFinished) { - break; // the solver called `done`: a clean finish, not a crash to repair - } catch (\Throwable $e) { - if (++$attempt > self::MAX_REPAIRS) { - $ctx->tracer->exit($solverSpan); - $ctx->store->setRunStatus($ctx->runId, RunStatus::Failed); - fwrite(STDERR, "claw run: run #{$ctx->runId} failed after {$attempt} repair attempt(s): {$e->getMessage()}\n"); - - return 1; - } - - fwrite(STDOUT, "Run hit an error; asking the supervisor to repair (attempt {$attempt})…\n"); - $fixed = $this->repairSolver($ctx, $currentClass, $e->getMessage(), $attempt); - if ($fixed === null) { - $ctx->tracer->exit($solverSpan); - $ctx->store->setRunStatus($ctx->runId, RunStatus::Failed); - fwrite(STDERR, "claw run: the supervisor could not repair run #{$ctx->runId}\n"); - - return 1; - } - $currentClass = $fixed; // resume with the fixed class on the next loop turn - } - } - $ctx->tracer->exit($solverSpan); - - $ctx->store->setRunStatus($ctx->runId, RunStatus::Done); - $ctx->store->setIssueStatus($ctx->issue->id, IssueStatus::Done); // every step ran -> the issue is resolved - fwrite(STDOUT, "Run #{$ctx->runId} finished for issue #{$ctx->issue->id}.\n"); - - return 0; - } - - /** - * The supervisor tier of the ask channel: an agent on the `supervisor` model that settles in-run - * escalations (accept / stop / guidance) on its own judgement, so a stuck step does not wait on — - * or churn against — the human. It runs tool-less (it judges from the escalation text). Replying - * `ESCALATE` returns null, so {@see EscalatingSpeaker} passes the decision up to the human console. - */ - private function supervisorSpeaker(Environment $env, AgentInterface $agent, Config $config): SpeakerInterface - { - $configured = $config->agents['supervisor'] ?? null; - $model = \is_string($configured) && $configured !== '' ? $configured : $config->model; - - $loop = new DefaultTurnLoop($agent, $env->executor(), $model, self::SUPERVISOR_SYSTEM); - $supervisor = new AgentSpeaker(SpeakerRole::Supervisor, $loop); - - return new class ($supervisor) implements SpeakerInterface { - public function __construct(private readonly AgentSpeaker $supervisor) - { - } - - public function name(): SpeakerRole - { - return SpeakerRole::Supervisor; - } - - public function reply(string $incoming): ?string - { - $answer = trim($this->supervisor->reply($incoming)); - - // ESCALATE (or an empty answer) -> pass up to the next tier (the human). - return $answer === '' || str_contains(strtoupper($answer), 'ESCALATE') ? null : $answer; - } - }; - } - - /** - * Repair a crashed solver: read its source, hand it and the error to {@see SuperviseWorkflow} - * (the supervisor role), which writes a corrected version under a new class name. Returns that - * fully-qualified class name, or null if the repair produced nothing. - */ - private function repairSolver(RunContext $ctx, string $brokenClass, string $error, int $attempt): ?string - { - $fixedName = $ctx->solverName . 'R' . $attempt; - $fixedClass = $ctx->workflowStore->classFor($fixedName, true); - $fixedNamespace = $ctx->workflowStore->namespaceFor(true); - - $brokenShort = WorkflowStore::shortName($brokenClass); - $brokenPath = $ctx->workflowStore->path($brokenShort, true); - $brokenCode = is_file($brokenPath) ? (string) file_get_contents($brokenPath) : ''; - - $span = $ctx->tracer->enterWorkflow('supervise-run'); - try { - new SuperviseWorkflow($ctx->env, $ctx->runId . '-fix' . $attempt, [ - 'brokenName' => $brokenShort, - 'brokenCode' => $brokenCode, - 'error' => $error, - 'fixedName' => $fixedName, - 'fixedNamespace' => $fixedNamespace, - ], $ctx->issue, $ctx->project)->run(); - } catch (\Throwable $e) { - $ctx->tracer->exit($span); - fwrite(STDERR, 'claw run: repair failed: ' . $e->getMessage() . "\n"); - - return null; - } - $ctx->tracer->exit($span); - - return is_file($ctx->workflowStore->path($fixedName, true)) ? $fixedClass : null; + return new IssueRunner($this->projectsDir(), $store, $config, $agent, new ConsoleRunFrontend($verbosity)) + ->run($issue); } /** @@ -465,6 +225,7 @@ private function showHistory(array $args, ?string $projectDir, ?Level $verbosity $reader = new TraceReader($store->pdo()); $runs = $store->recentRuns(); + if ($runs === []) { fwrite(STDOUT, "No runs yet for this project.\n"); @@ -475,6 +236,7 @@ private function showHistory(array $args, ?string $projectDir, ?Level $verbosity fwrite(STDOUT, "Runs:\n"); $header = null; + foreach ($runs as $run) { if ($run['id'] === $runId) { $header = $run; @@ -526,11 +288,13 @@ private function usage(): int private function resolve(?string $projectDir): ProjectStore { $start = $projectDir ?? (getenv('CLAW_PROJECT') ?: getcwd()); + if ($start === false) { throw new ClawException('cannot determine the project folder'); } $store = ProjectStore::discover($this->projectsDir(), $start); + if ($store === null) { throw new ClawException("not inside a registered project: {$start} (run: claw -c)"); } @@ -550,15 +314,20 @@ private function extractProjectOption(array $args): array { $rest = []; $dir = null; + for ($i = 0, $n = \count($args); $i < $n; ++$i) { $arg = $args[$i]; + if ($arg === '--project' || $arg === '-C') { $dir = $args[$i + 1] ?? null; // value is the next token ++$i; // consume it + continue; } + if (str_starts_with($arg, '--project=')) { $dir = substr($arg, \strlen('--project=')); + continue; } $rest[] = $arg; @@ -580,6 +349,7 @@ private function extractVerbosity(array $args): array { $rest = []; $level = null; + foreach ($args as $arg) { if ($arg === '-q' || $arg === '--quiet') { $level = Level::Notice; @@ -609,13 +379,4 @@ private function appHome(): string return ($home === false || $home === '') ? $this->root . '/workspace' : $home; } - - /** Ask a yes/no question on the console; true only on an explicit yes. */ - private function confirm(string $question): bool - { - fwrite(STDOUT, $question . ' [y/N] '); - $line = fgets(STDIN); - - return $line !== false && \in_array(strtolower(trim($line)), ['y', 'yes'], true); - } } diff --git a/src/Config.php b/src/Config.php index d8d13ed..19c0a0f 100644 --- a/src/Config.php +++ b/src/Config.php @@ -15,6 +15,10 @@ */ final class Config { + /** The fallback system prompt when no project CLAUDE.md persona is present. */ + public const DEFAULT_SYSTEM = 'You are Claw, a helpful coding assistant. Be concise. ' + . 'Use the tools to inspect and change files and run commands in the workspace.'; + private const DEFAULT_CHANNEL = 'console'; private const CHANNELS = ['console', 'telegram']; @@ -77,6 +81,7 @@ public static function load(string $path = '.env'): self $get = static function (string $key) use ($file): ?string { $env = getenv($key); + if ($env !== false && $env !== '') { return $env; } @@ -86,6 +91,7 @@ public static function load(string $path = '.env'): self $require = static function (string $key) use ($get): string { $value = $get($key); + if ($value === null || $value === '') { throw new ConfigException("Missing required config: {$key}"); } @@ -94,6 +100,7 @@ public static function load(string $path = '.env'): self }; $channel = $get('CLAW_CHANNEL') ?? self::DEFAULT_CHANNEL; + if (!\in_array($channel, self::CHANNELS, true)) { throw new ConfigException( "Unknown CLAW_CHANNEL '{$channel}', expected one of: " . implode(', ', self::CHANNELS) @@ -101,6 +108,7 @@ public static function load(string $path = '.env'): self } $agent = $get('CLAW_AGENT') ?? self::DEFAULT_AGENT; + if (!\in_array($agent, self::AGENTS, true)) { throw new ConfigException( "Unknown CLAW_AGENT '{$agent}', expected one of: " . implode(', ', self::AGENTS) @@ -108,6 +116,7 @@ public static function load(string $path = '.env'): self } $budgetPolicy = strtolower($get('CLAW_BUDGET_POLICY') ?? self::DEFAULT_BUDGET_POLICY); + if (!\in_array($budgetPolicy, self::BUDGET_POLICIES, true)) { throw new ConfigException( "Unknown CLAW_BUDGET_POLICY '{$budgetPolicy}', expected one of: " . implode(', ', self::BUDGET_POLICIES) @@ -116,6 +125,7 @@ public static function load(string $path = '.env'): self $keyVar = self::API_KEY_VARS[$agent]; $apiKey = $get($keyVar) ?? $get('CLAW_API_KEY'); + if ($apiKey === null || $apiKey === '') { throw new ConfigException( "Missing API key: set {$keyVar} (or CLAW_API_KEY) for agent '{$agent}'" @@ -170,11 +180,13 @@ private static function parseAgents(array $file): array { $prefix = 'CLAW_AGENT_'; $merged = $file; // file is the base; real env overrides it + foreach (getenv() as $key => $value) { $merged[$key] = $value; } $agents = []; + foreach ($merged as $key => $value) { if (!str_starts_with($key, $prefix)) { continue; @@ -184,6 +196,7 @@ private static function parseAgents(array $file): array // so a workflow routes a call with the readable `ai(..., agent: 'worker-smart')`. $role = str_replace('_', '-', strtolower(substr($key, \strlen($prefix)))); $model = trim($value); + if ($role !== '' && $model !== '') { $agents[$role] = $model; } @@ -210,13 +223,16 @@ private static function readDotenv(string $path): array } $lines = file($path, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); + if ($lines === false) { throw new ConfigException("Cannot read env file: {$path}"); } $out = []; + foreach ($lines as $line) { $line = trim($line); + if ($line === '' || $line[0] === '#') { continue; } @@ -226,6 +242,7 @@ private static function readDotenv(string $path): array } $eq = strpos($line, '='); + if ($eq === false) { continue; } @@ -241,6 +258,7 @@ private static function readDotenv(string $path): array private static function stripQuotes(string $value): string { $len = \strlen($value); + if ($len >= 2 && ($value[0] === '"' || $value[0] === "'") && $value[$len - 1] === $value[0]) { return substr($value, 1, $len - 2); } @@ -254,8 +272,10 @@ private static function stripQuotes(string $value): string private static function parseChatIds(string $raw): array { $ids = []; + foreach (explode(',', $raw) as $part) { $part = trim($part); + if ($part === '') { continue; } diff --git a/src/Exec/ChainExecutor.php b/src/Exec/ChainExecutor.php index 74739ac..d7fa35f 100644 --- a/src/Exec/ChainExecutor.php +++ b/src/Exec/ChainExecutor.php @@ -34,6 +34,7 @@ public function __construct(array $middlewares, \Closure $terminal) public function call(ToolCall $call): ToolResultBlock { $next = $this->terminal; + foreach (array_reverse($this->middlewares) as $middleware) { $next = static fn (ToolCall $c): ToolResultBlock => $middleware->handle($c, $next); } diff --git a/src/Http/CurlHttpClient.php b/src/Http/CurlHttpClient.php index 0b1283e..32fbc7c 100644 --- a/src/Http/CurlHttpClient.php +++ b/src/Http/CurlHttpClient.php @@ -52,6 +52,7 @@ private function send(string $method, string $url, ?string $body, array $headers CURLOPT_TIMEOUT_MS => $this->timeoutMs, CURLOPT_HEADERFUNCTION => static function ($_handle, string $header) use (&$responseHeaders): int { $colon = strpos($header, ':'); + if ($colon !== false) { $name = strtolower(trim(substr($header, 0, $colon))); $responseHeaders[$name] = trim(substr($header, $colon + 1)); diff --git a/src/Http/HttpResponse.php b/src/Http/HttpResponse.php index 5b118d6..8cc730c 100644 --- a/src/Http/HttpResponse.php +++ b/src/Http/HttpResponse.php @@ -33,6 +33,7 @@ public function isOk(): bool public function json(): array { $data = json_decode($this->body, true); + // A top-level JSON list (e.g. "[1,2,3]") is also an array but violates the // string-keyed contract callers rely on; reject it like any non-object. if (!is_array($data) || (array_is_list($data) && $data !== [])) { diff --git a/src/HttpGateSpeaker.php b/src/HttpGateSpeaker.php new file mode 100644 index 0000000..3cb3400 --- /dev/null +++ b/src/HttpGateSpeaker.php @@ -0,0 +1,54 @@ +<?php + +declare(strict_types=1); + +namespace Claw; + +use Async\Channel; +use Claw\Agent\SpeakerInterface; +use Claw\Agent\SpeakerRole; +use Claw\Project\IssueStatus; +use Claw\Project\ProjectStore; +use Claw\Trace\Tracer; + +/** + * The human tier of a server-driven run's ask channel. When the supervisor agent escalates + * (replies ESCALATE), this records the question in the trace journal — so the dashboard shows the + * gate and a chat row — flips the issue to WaitingHuman, then PARKS the run coroutine on a channel + * until {@see Server} feeds it the human's reply from `POST .../answer`. The reply is recorded as an + * `answer` row and handed back to the run, which resumes. + * + * Two mechanisms, split on purpose (see docs/dashboard-server-plan.md §3.4): the question/answer + * trace rows are the DURABLE record (they survive a restart and feed chat), the channel is only the + * LIVE wakeup. A restart loses the channel but not the journal, so the gate stays visible and the run + * resumes from its snapshot back into a fresh gate. + */ +final readonly class HttpGateSpeaker implements SpeakerInterface +{ + /** @param Channel<string> $answers the live wakeup — POST .../answer sends the human reply here */ + public function __construct( + private Tracer $tracer, + private ProjectStore $store, + private string $issueId, + private Channel $answers, + ) { + } + + public function name(): SpeakerRole + { + return SpeakerRole::Human; + } + + public function reply(string $incoming): string + { + $questionId = $this->tracer->question($incoming); + $this->store->setIssueStatus($this->issueId, IssueStatus::WaitingHuman); + + $text = (string) $this->answers->recv(); // park the run here until POST .../answer sends the reply + + $this->store->setIssueStatus($this->issueId, IssueStatus::InProgress); + $this->tracer->answer($questionId, $text); + + return $text; + } +} diff --git a/src/Permission/Policy.php b/src/Permission/Policy.php index bc5a306..7f066f1 100644 --- a/src/Permission/Policy.php +++ b/src/Permission/Policy.php @@ -35,6 +35,7 @@ final class Policy public function check(ToolInterface $tool, array $input): Verdict { $command = isset($input['command']) && \is_string($input['command']) ? $input['command'] : ''; + if ($command !== '' && $this->isDenied($command)) { return Verdict::deny('command matches a hard-blocked pattern'); } @@ -49,6 +50,7 @@ public function check(ToolInterface $tool, array $input): Verdict private function isDenied(string $command): bool { $haystack = strtolower($command); + foreach (self::DENYLIST as $needle) { if (str_contains($haystack, strtolower($needle))) { return true; diff --git a/src/Project/IssueStatus.php b/src/Project/IssueStatus.php index 790e453..c3db0ee 100644 --- a/src/Project/IssueStatus.php +++ b/src/Project/IssueStatus.php @@ -6,14 +6,18 @@ use Claw\Exceptions\ClawException; -/** Lifecycle of an issue: tracked above the level of a single workflow run. */ -enum IssueStatus +/** + * Lifecycle of an issue: tracked above the level of a single workflow run. The case NAME is what the + * project db persists (see {@see ProjectStore}); the backing VALUE is the dashboard's lowercase form, + * so the API serializes a status straight from the enum with no second mapping. + */ +enum IssueStatus: string { - case Open; - case InProgress; - case WaitingHuman; - case Done; - case Closed; + case Open = 'open'; + case InProgress = 'inprogress'; + case WaitingHuman = 'waiting'; + case Done = 'done'; + case Closed = 'closed'; /** * Resolve a case by its name (how the status round-trips through the project db). An unrecognized diff --git a/src/Project/ProjectStore.php b/src/Project/ProjectStore.php index 1aad181..5e06c35 100644 --- a/src/Project/ProjectStore.php +++ b/src/Project/ProjectStore.php @@ -38,6 +38,7 @@ private function __construct( public static function init(string $projectsDir, string $projectPath): Project { $abs = realpath($projectPath); + if ($abs === false || !is_dir($abs)) { throw new ClawException("project folder does not exist: {$projectPath}"); } @@ -48,11 +49,13 @@ public static function init(string $projectsDir, string $projectPath): Project $id = self::keyFor($abs); $dbPath = self::dbPath($projectsDir, $id); + if (is_file($dbPath)) { throw new ClawException("project already initialized: {$abs} ({$dbPath})"); } $name = basename($abs); + try { $pdo = self::open($dbPath); self::ensureSchema($pdo); @@ -88,16 +91,19 @@ public static function init(string $projectsDir, string $projectPath): Project public static function discover(string $projectsDir, string $startDir): ?self { $dir = realpath($startDir); + if ($dir === false) { return null; } while (true) { $dbPath = self::dbPath($projectsDir, self::keyFor($dir)); + if (is_file($dbPath)) { return self::openHandle($dbPath); } $parent = \dirname($dir); + if ($parent === $dir) { // reached the filesystem root without a match return null; } @@ -105,12 +111,84 @@ public static function discover(string $projectsDir, string $startDir): ?self } } + /** Open a registered project by its key (the db filename), or null if there is no such db. */ + public static function openByKey(string $projectsDir, string $key): ?self + { + $dbPath = self::dbPath($projectsDir, basename($key)); + + return is_file($dbPath) ? self::openHandle($dbPath) : null; + } + + /** + * Every registered project's metadata — for the dashboard's project list. A db that is not a valid + * project (no metadata row) is skipped rather than failing the whole listing. + * + * @return list<Project> + */ + public static function all(string $projectsDir): array + { + $projects = []; + + foreach (glob($projectsDir . '/*.db') ?: [] as $dbPath) { + try { + $projects[] = self::openHandle($dbPath)->project; + } catch (\Exception) { + continue; + } + } + + return $projects; + } + /** This project's metadata (id, name, the external folder path, description). */ public function project(): Project { return $this->project; } + /** + * Every issue in the project, oldest first — for the dashboard board. Runs are read separately + * ({@see runsFor()}), so this stays a single cheap query. + * + * @return list<Issue> + */ + public function allIssues(): array + { + $stmt = $this->pdo->query('SELECT id, title, description, status FROM issues ORDER BY id'); + $rows = $stmt === false ? [] : $stmt->fetchAll(\PDO::FETCH_ASSOC); + + return array_values(array_map( + fn (array $row): Issue => new Issue( + (string) $row['id'], + $this->project->id, + (string) $row['title'], + (string) $row['description'], + IssueStatus::fromName((string) $row['status']), + ), + $rows, + )); + } + + /** + * The runs spawned for an issue, oldest first, with their status — for the dashboard's run list. + * + * @return list<array{id: string, workflow: string, status: string}> + */ + public function runsFor(string $issueId): array + { + $stmt = $this->pdo->prepare('SELECT id, workflow, status FROM runs WHERE issue_id = ? ORDER BY id'); + $stmt->execute([$issueId]); + + return array_values(array_map( + static fn (array $row): array => [ + 'id' => (string) $row['id'], + 'workflow' => (string) $row['workflow'], + 'status' => (string) $row['status'], + ], + $stmt->fetchAll(\PDO::FETCH_ASSOC), + )); + } + /** The single open connection, shared with the run-state store and the tracer. */ public function pdo(): \PDO { @@ -126,6 +204,7 @@ public function pdo(): \PDO public function addIssue(string $title, string $description = ''): Issue { $title = trim($title); + if ($title === '') { throw new ClawException('issue title must not be empty'); } @@ -156,6 +235,7 @@ public function loadIssue(string $issueId): Issue $stmt = $this->pdo->prepare('SELECT title, description, status FROM issues WHERE id = :id'); $stmt->execute(['id' => $issueId]); $row = $stmt->fetch(\PDO::FETCH_ASSOC); + if (!\is_array($row)) { throw new ClawException("issue #{$issueId} not found in project {$this->project->id}"); } @@ -208,6 +288,7 @@ public function recentRuns(int $limit = 20): array $stmt->execute(); $runs = []; + foreach ($stmt->fetchAll(\PDO::FETCH_ASSOC) as $row) { $runs[] = [ 'id' => (string) ($row['id'] ?? ''), @@ -256,6 +337,7 @@ private static function openHandle(string $dbPath): self $stmt = $pdo->query('SELECT id, name, path, description FROM project LIMIT 1'); $row = $stmt === false ? false : $stmt->fetch(\PDO::FETCH_ASSOC); + if (!\is_array($row)) { throw new ClawException("project has no metadata: {$dbPath}"); } @@ -278,6 +360,7 @@ private static function open(string $dbPath): \PDO { $pdo = new \PDO('sqlite:' . $dbPath); $pdo->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION); + $pdo->exec('PRAGMA busy_timeout=4000'); // ride out a concurrent writer (the run) rather than fail return $pdo; } diff --git a/src/Run/ConsoleRunFrontend.php b/src/Run/ConsoleRunFrontend.php new file mode 100644 index 0000000..f587d04 --- /dev/null +++ b/src/Run/ConsoleRunFrontend.php @@ -0,0 +1,58 @@ +<?php + +declare(strict_types=1); + +namespace Claw\Run; + +use Claw\Agent\ConsoleSpeaker; +use Claw\Agent\SpeakerInterface; +use Claw\Trace\ConsoleTraceSink; +use Claw\Trace\Level; +use Claw\Trace\Tracer; +use Claw\Trace\TraceStore; + +/** + * The console front-end of a run (`claw run`): the human answers on the terminal, the generated solver is + * shown for a y/N confirm before it runs, progress prints to STDOUT/STDERR, and the live trace is the + * indented stderr tree. + */ +final readonly class ConsoleRunFrontend implements RunFrontendInterface +{ + public function __construct(private ?Level $verbosity = null) + { + } + + public function human(Tracer $tracer): SpeakerInterface + { + return new ConsoleSpeaker(STDIN, STDOUT); + } + + public function approveSolver(string $solverPath, string $solverCode): bool + { + fwrite(STDOUT, "\n--- {$solverPath} ---\n{$solverCode}\n--- end ---\n\n"); + + return $this->confirm('Run this workflow now?'); + } + + public function report(string $message, bool $isError): void + { + $isError ? fwrite(STDERR, "claw run: {$message}\n") : fwrite(STDOUT, "{$message}\n"); + } + + public function traceSinks(\PDO $projectDb): array + { + return [ + new TraceStore($projectDb), + new ConsoleTraceSink(STDERR, $this->verbosity ?? Level::Info), + ]; + } + + /** Ask a yes/no question on the console; true only on an explicit yes. */ + private function confirm(string $question): bool + { + fwrite(STDOUT, "{$question} [y/N] "); + $line = fgets(STDIN); + + return $line !== false && \in_array(strtolower(trim($line)), ['y', 'yes'], true); + } +} diff --git a/src/Run/HttpRunFrontend.php b/src/Run/HttpRunFrontend.php new file mode 100644 index 0000000..2590e3b --- /dev/null +++ b/src/Run/HttpRunFrontend.php @@ -0,0 +1,54 @@ +<?php + +declare(strict_types=1); + +namespace Claw\Run; + +use Async\Channel; +use Claw\Agent\SpeakerInterface; +use Claw\HttpGateSpeaker; +use Claw\Project\ProjectStore; +use Claw\Trace\LiveTraceSink; +use Claw\Trace\TraceBus; +use Claw\Trace\Tracer; +use Claw\Trace\TraceStore; + +/** + * The HTTP front-end of a run (the dashboard server): the human answers over POST .../answer through a + * channel-backed gate, the generated solver is auto-approved (the approval gate is a later step), + * progress goes only to the trace journal the dashboard reads, and the live sink pushes each record to + * the SSE streams. + */ +final readonly class HttpRunFrontend implements RunFrontendInterface +{ + /** @param Channel<string> $answers the open gate's answer channel — POST .../answer sends the reply here */ + public function __construct( + private ProjectStore $store, + private string $issueId, + private Channel $answers, + private TraceBus $bus, + ) { + } + + public function human(Tracer $tracer): SpeakerInterface + { + return new HttpGateSpeaker($tracer, $this->store, $this->issueId, $this->answers); + } + + public function approveSolver(string $solverPath, string $solverCode): bool + { + return true; + } + + public function report(string $message, bool $isError): void + { + // No console: the dashboard reads progress from the trace journal. + } + + public function traceSinks(\PDO $projectDb): array + { + // One sink that persists AND publishes: LiveTraceSink writes through the TraceStore, then pushes + // the persisted record (with its seq) to the bus — no separate TraceStore to order against. + return [new LiveTraceSink(new TraceStore($projectDb), $this->bus)]; + } +} diff --git a/src/Run/IssueRunner.php b/src/Run/IssueRunner.php new file mode 100644 index 0000000..1a2ba0e --- /dev/null +++ b/src/Run/IssueRunner.php @@ -0,0 +1,346 @@ +<?php + +declare(strict_types=1); + +namespace Claw\Run; + +use Claw\Agent\AgentInterface; +use Claw\Agent\AgentSpeaker; +use Claw\Agent\Budget; +use Claw\Agent\DefaultTurnLoop; +use Claw\Agent\EscalatingSpeaker; +use Claw\Agent\SpeakerInterface; +use Claw\Agent\SpeakerRole; +use Claw\Config; +use Claw\Exceptions\ClawException; +use Claw\Exceptions\WorkflowFinished; +use Claw\Project\Issue; +use Claw\Project\IssueStatus; +use Claw\Project\ProjectStore; +use Claw\Project\RunStatus; +use Claw\Tool\RecallTool; +use Claw\Tool\ToolFactory; +use Claw\Tool\Workspace; +use Claw\Trace\Tracer; +use Claw\Trace\TraceReader; +use Claw\Workflow\BudgetPolicy; +use Claw\Workflow\Environment; +use Claw\Workflow\EnvKey; +use Claw\Workflow\GenerateIssueWorkflow; +use Claw\Workflow\SqliteStateStore; +use Claw\Workflow\SuperviseWorkflow; +use Claw\Workflow\WorkflowAbstract; +use Claw\Workflow\WorkflowStore; + +/** + * Runs one issue's solver workflow to completion, headless. The shared run engine behind both + * `claw run` (console) and the dashboard server's POST /start (a coroutine on the event loop): it + * wires the run environment, generates or reuses the solver, runs it, and on a runtime crash asks + * the supervisor to repair it and resumes the same run from its durable snapshot. + * + * Everything that differs between the console and the server is behind one {@see RunFrontendInterface} + * (the human tier, the solver-approval decision, progress reporting, the live trace sink), so the + * pipeline itself holds no I/O opinion. + */ +final readonly class IssueRunner +{ + /** How many times the supervisor may repair-and-resume a crashing solver before giving up. */ + private const int MAX_REPAIRS = 2; + + /** The supervisor agent's standing role — it settles in-run escalations or defers to the human. */ + private const string SUPERVISOR_SYSTEM = <<<'PROMPT' + You are the SUPERVISOR of an autonomous coding workflow. You are consulted when a step is stuck: + a worker pauses with a question, or a step's work failed review and the run asks whether to keep + going. Your job is to UNBLOCK with the smallest sound decision, so the run does not churn. + + How to answer (reply with ONLY the decision, no preamble): + - To resolve a "did not pass review / is this OK?" escalation, reply with exactly one of: + `accept` — the work is good enough as is, stop reworking; + `stop` — the goal cannot be reached here (e.g. a required tool is missing, the gate is + unsatisfiable in this environment) or it is looping with no progress — abort the step; + or a short, concrete GUIDANCE for ONE more attempt (only if a specific fix is likely to work). + - To answer a worker's question, give the briefest concrete answer that lets it proceed. + + Bias to ending churn: if a step has failed several times for the same reason, or the blocker is + environmental (a missing test runner, an absent dependency) and cannot change, choose `accept` + (if the actual work looks correct) or `stop` — do NOT keep saying "try again". + + Reply exactly `ESCALATE` only when the decision genuinely needs a human (a scope or product call + you must not make alone); it will then be passed up to the person. + PROMPT; + + public function __construct( + private string $projectsDir, + private ProjectStore $store, + private Config $config, + private AgentInterface $agent, + private RunFrontendInterface $frontend, + ) { + } + + /** + * Generate/reuse the solver for the issue and run it. Returns a process-style exit code: 0 on a + * finished run (or a solver saved-but-not-run), 1 on a failed generation/run. The issue moves to + * InProgress at the start and Done when every step has run. + */ + public function run(Issue $issue): int + { + $project = $this->store->project(); + + // The palette acts on the REAL project folder: this run works on the user's repo. + $workspace = new Workspace($project->path); + $workflowStore = new WorkflowStore($this->projectsDir . '/' . $project->id . '-workflows', $project->id); + $projectDb = $this->store->pdo(); // the one open connection: shared by the state store + trace + $registry = ToolFactory::forRun($project, $workspace, $workflowStore); + + // The store is durable (a killed run resumes from its snapshot); budgets cap the run total and + // each exchange (0 = unlimited); named agent roles share the access and override only the model. + $env = new Environment() + ->set(EnvKey::Worker, $this->agent) + ->set(EnvKey::Registry, $registry) + ->set(EnvKey::ModelId, $this->config->model) + ->set(EnvKey::SystemPrompt, Config::DEFAULT_SYSTEM) + ->set(EnvKey::MaxHistory, $this->config->maxHistory) + ->set(EnvKey::Store, new SqliteStateStore($projectDb)) + ->set(EnvKey::Agents, $this->config->agents) + ->set(EnvKey::Budget, new Budget($this->config->budgetTokens, (float) $this->config->budgetSeconds)) + ->set(EnvKey::TurnTokenLimit, $this->config->turnTokens) + ->set(EnvKey::TurnTimeLimit, (float) $this->config->turnSeconds) + ->set(EnvKey::BudgetPolicy, BudgetPolicy::from($this->config->budgetPolicy)); + + $solverName = 'Issue' . (string) preg_replace('/[^A-Za-z0-9]/', '', $issue->id) . 'Solver'; + $solverClass = $workflowStore->classFor($solverName, true); + + // Resume an interrupted run (status still 'running') for this issue's solver, else start a new + // one. The run id ties the ledger row, the trace, and the durable state snapshot together — so + // resuming reuses it: the solver restores its saved state and re-runs only the unfinished tail. + $runId = $this->store->resumableRun($issue->id, $solverName); + $resuming = $runId !== null; + + if ($runId === null) { + $runId = $this->store->recordRun($issue->id, $solverName); + } + $this->store->setIssueStatus($issue->id, IssueStatus::InProgress); + + $tracer = new Tracer($runId, ...$this->frontend->traceSinks($projectDb)); + $env->set(EnvKey::Tracer, $tracer); + + // The ask channel is a ladder: a SUPERVISOR AGENT first (it can unblock a stuck step or settle a + // critic escalation on its own judgement), then the human tier behind it. The human tier is built + // here, now the tracer exists, because the HTTP gate records its question/answer through it. + $env->set(EnvKey::Ask, new EscalatingSpeaker( + $this->supervisorSpeaker($env), + $this->frontend->human($tracer), + )); + + $taskBrief = "Title: {$issue->title}\n\nDescription: {$issue->description}"; + $registry->add(new RecallTool(new TraceReader($projectDb), $runId, $taskBrief)); + + if ($resuming) { + $this->frontend->report("Resuming run #{$runId} for issue #{$issue->id}…", false); + } + + $ctx = new RunContext( + $env, + $tracer, + $this->store, + $workflowStore, + $runId, + $issue, + $project, + $solverName, + $solverClass, + ); + + $early = $this->ensureSolver($ctx); + + if ($early !== null) { + return $early; // generation failed, or the solver was saved without running it + } + + return $this->runSolver($ctx); + } + + /** + * Make sure a solver workflow exists for the run: reuse the one on disk, or generate one and have + * the front-end decide whether to run it. Returns null to proceed to running, or an exit code to stop + * here — a failed generation (1), or the solver saved without being run yet (0). + */ + private function ensureSolver(RunContext $ctx): ?int + { + $solverPath = $ctx->workflowStore->path($ctx->solverName, true); + + if (is_file($solverPath)) { + $this->frontend->report("Reusing solver {$ctx->solverClass}.", false); + + return null; + } + + $this->frontend->report("Generating a solver workflow for issue #{$ctx->issue->id}…", false); + + $gen = $ctx->tracer->enterWorkflow('generate-issue-workflow'); + + try { + new GenerateIssueWorkflow($ctx->env, $ctx->runId . '-gen', [ + 'solverName' => $ctx->solverName, + 'solverNamespace' => $ctx->workflowStore->namespaceFor(true), + 'solverTools' => ['read_file', 'write_file', 'list_files', 'bash'], + ], $ctx->issue, $ctx->project)->run(); + } catch (\Cancellation $cancellation) { + throw $cancellation; // a cancelled run must stop, not be reported as a generation failure + } catch (\Throwable $e) { + $ctx->tracer->exit($gen); + $ctx->store->setRunStatus($ctx->runId, RunStatus::Failed); + $this->frontend->report('generation failed: ' . $e->getMessage(), true); + + return 1; + } + $ctx->tracer->exit($gen); + + if (!is_file($solverPath)) { + $ctx->store->setRunStatus($ctx->runId, RunStatus::Failed); + $this->frontend->report('no solver workflow was produced', true); + + return 1; + } + + if (!$this->frontend->approveSolver($solverPath, (string) file_get_contents($solverPath))) { + $ctx->store->setRunStatus($ctx->runId, RunStatus::Generated); + $this->frontend->report("Saved. Not run — review it, then run issue #{$ctx->issue->id} again.", false); + + return 0; + } + + return null; + } + + /** + * Run the solver to completion; on a runtime crash, ask the supervisor to repair it (a new class + * version) and resume the same runId — its snapshot skips the finished steps. Bounded by MAX_REPAIRS. + */ + private function runSolver(RunContext $ctx): int + { + $solverSpan = $ctx->tracer->enterWorkflow($ctx->solverName); + $currentClass = $ctx->solverClass; + $attempt = 0; + + while (true) { + try { + $solver = new $currentClass($ctx->env, $ctx->runId, [], $ctx->issue, $ctx->project); + + if (!$solver instanceof WorkflowAbstract) { + throw new ClawException("{$currentClass} is not a workflow"); + } + $solver->run(); + + break; + } catch (WorkflowFinished) { + break; // the solver called `done`: a clean finish, not a crash to repair + } catch (\Cancellation $cancellation) { + throw $cancellation; // a cancelled run must stop — never "repair" a cancellation + } catch (\Throwable $e) { + // a generated solver throws Error (TypeError, ParseError, ...) as readily as Exception, + // so catch the lot here and repair-and-resume — that is exactly this boundary's job + if (++$attempt > self::MAX_REPAIRS) { + $ctx->tracer->exit($solverSpan); + $ctx->store->setRunStatus($ctx->runId, RunStatus::Failed); + $message = "run #{$ctx->runId} failed after {$attempt} repair attempt(s): {$e->getMessage()}"; + $this->frontend->report($message, true); + + return 1; + } + + $this->frontend->report("Run hit an error; repairing (attempt {$attempt})…", false); + $fixed = $this->repairSolver($ctx, $currentClass, $e->getMessage(), $attempt); + + if ($fixed === null) { + $ctx->tracer->exit($solverSpan); + $ctx->store->setRunStatus($ctx->runId, RunStatus::Failed); + $this->frontend->report("the supervisor could not repair run #{$ctx->runId}", true); + + return 1; + } + $currentClass = $fixed; // resume with the fixed class on the next loop turn + } + } + $ctx->tracer->exit($solverSpan); + + $ctx->store->setRunStatus($ctx->runId, RunStatus::Done); + $ctx->store->setIssueStatus($ctx->issue->id, IssueStatus::Done); // every step ran -> issue resolved + $this->frontend->report("Run #{$ctx->runId} finished for issue #{$ctx->issue->id}.", false); + + return 0; + } + + /** + * The supervisor tier of the ask channel: an agent on the `supervisor` model that settles in-run + * escalations (accept / stop / guidance) on its own judgement, so a stuck step does not wait on — + * or churn against — the human. It runs tool-less (it judges from the escalation text). Replying + * `ESCALATE` returns null, so {@see EscalatingSpeaker} passes the decision up to the human tier. + */ + private function supervisorSpeaker(Environment $env): SpeakerInterface + { + $configured = $this->config->agents['supervisor'] ?? null; + $model = \is_string($configured) && $configured !== '' ? $configured : $this->config->model; + + $loop = new DefaultTurnLoop($this->agent, $env->executor(), $model, self::SUPERVISOR_SYSTEM); + $supervisor = new AgentSpeaker(SpeakerRole::Supervisor, $loop); + + return new class ($supervisor) implements SpeakerInterface { + public function __construct(private readonly AgentSpeaker $supervisor) + { + } + + public function name(): SpeakerRole + { + return SpeakerRole::Supervisor; + } + + public function reply(string $incoming): ?string + { + $answer = trim($this->supervisor->reply($incoming)); + + // ESCALATE (or an empty answer) -> pass up to the next tier (the human). + return $answer === '' || str_contains(strtoupper($answer), 'ESCALATE') ? null : $answer; + } + }; + } + + /** + * Repair a crashed solver: read its source, hand it and the error to {@see SuperviseWorkflow} + * (the supervisor role), which writes a corrected version under a new class name. Returns that + * fully-qualified class name, or null if the repair produced nothing. + */ + private function repairSolver(RunContext $ctx, string $brokenClass, string $error, int $attempt): ?string + { + $fixedName = $ctx->solverName . 'R' . $attempt; + $fixedClass = $ctx->workflowStore->classFor($fixedName, true); + $fixedNamespace = $ctx->workflowStore->namespaceFor(true); + + $brokenShort = WorkflowStore::shortName($brokenClass); + $brokenPath = $ctx->workflowStore->path($brokenShort, true); + $brokenCode = is_file($brokenPath) ? (string) file_get_contents($brokenPath) : ''; + + $span = $ctx->tracer->enterWorkflow('supervise-run'); + + try { + new SuperviseWorkflow($ctx->env, $ctx->runId . '-fix' . $attempt, [ + 'brokenName' => $brokenShort, + 'brokenCode' => $brokenCode, + 'error' => $error, + 'fixedName' => $fixedName, + 'fixedNamespace' => $fixedNamespace, + ], $ctx->issue, $ctx->project)->run(); + } catch (\Cancellation $cancellation) { + throw $cancellation; + } catch (\Throwable $e) { + $ctx->tracer->exit($span); + $this->frontend->report('repair failed: ' . $e->getMessage(), true); + + return null; + } + $ctx->tracer->exit($span); + + return is_file($ctx->workflowStore->path($fixedName, true)) ? $fixedClass : null; + } +} diff --git a/src/Cli/RunContext.php b/src/Run/RunContext.php similarity index 97% rename from src/Cli/RunContext.php rename to src/Run/RunContext.php index bd301fd..d3e04ea 100644 --- a/src/Cli/RunContext.php +++ b/src/Run/RunContext.php @@ -2,7 +2,7 @@ declare(strict_types=1); -namespace Claw\Cli; +namespace Claw\Run; use Claw\Project\Issue; use Claw\Project\Project; diff --git a/src/Run/RunFrontendInterface.php b/src/Run/RunFrontendInterface.php new file mode 100644 index 0000000..05bfc4e --- /dev/null +++ b/src/Run/RunFrontendInterface.php @@ -0,0 +1,35 @@ +<?php + +declare(strict_types=1); + +namespace Claw\Run; + +use Claw\Agent\SpeakerInterface; +use Claw\Trace\Tracer; +use Claw\Trace\TraceSinkInterface; + +/** + * The outside-facing side of a run: how {@see \Claw\Run\IssueRunner} reaches a human, gets a generated + * solver approved, reports progress, and surfaces live trace — so the run pipeline itself holds no I/O + * opinion. The two implementations are {@see ConsoleRunFrontend} (`claw run`) and {@see HttpRunFrontend} + * (the dashboard server). + */ +interface RunFrontendInterface +{ + /** The ask channel's human tier, built once the run's tracer exists (the HTTP gate records through it). */ + public function human(Tracer $tracer): SpeakerInterface; + + /** Decide whether to run a freshly generated solver now. */ + public function approveSolver(string $solverPath, string $solverCode): bool; + + /** Emit one progress or error line. */ + public function report(string $message, bool $isError): void; + + /** + * The run's trace sinks, over the project's db connection: always persists (the console front-end + * adds the live stderr tree; the HTTP one persists-and-publishes through one {@see LiveTraceSink}). + * + * @return list<TraceSinkInterface> + */ + public function traceSinks(\PDO $projectDb): array; +} diff --git a/src/Server.php b/src/Server.php index f8aae51..7270b22 100644 --- a/src/Server.php +++ b/src/Server.php @@ -4,49 +4,91 @@ namespace Claw; +use Async\AsyncCancellation; +use Async\Channel; + +use function Async\spawn; + +use Claw\Agent\AgentFactory; +use Claw\Http\CurlHttpClient; +use Claw\Project\IssueStatus; +use Claw\Project\Project; +use Claw\Project\ProjectStore; +use Claw\Run\HttpRunFrontend; +use Claw\Run\IssueRunner; +use Claw\Trace\TraceBus; +use Claw\Trace\TraceReader; +use Claw\Trace\TraceRecordInterface; +use Claw\Workflow\SqliteStateStore; use TrueAsync\HttpRequest; use TrueAsync\HttpResponse; use TrueAsync\HttpServer; use TrueAsync\HttpServerConfig; /** - * Read-only JSON API over the project state databases, for the php-claw-ui dashboard. + * JSON + SSE API over the project state databases, for the php-claw-ui dashboard. * - * Runs on the TrueAsync HTTP server (true_async_server.so), so every request handler - * is a coroutine on the event loop. There is NO SSE yet: the dashboard polls, and the - * `trace.seq` autoincrement is the live cursor (`/runs/{id}/trace?since=<seq>`). The - * server only ever reads — it opens the same SQLite the CLI writes, never mutating it. + * Runs on the TrueAsync HTTP server (true_async_server.so), so every request handler is a coroutine on + * the event loop. Reads open the same SQLite the CLI writes; POST .../start runs an issue's solver as a + * detached coroutine ({@see IssueRunner}) and POST .../answer feeds its human gate. A run pushes each + * trace record to an in-process {@see TraceBus}, so the run stream is live with no polling; the durable + * `trace.seq` autoincrement is the resume cursor (`Last-Event-ID`/`?since=`). * * php -d extension=/path/to/true_async_server.so bin/claw serve [--port 8787] [--host 127.0.0.1] * * Endpoints: - * GET /api/health - * GET /api/projects - * GET /api/projects/{key}/issues - * GET /api/projects/{key}/runs/{runId}/trace?since=<seq> - * GET /api/projects/{key}/runs/{runId}/artifacts - * - * (SSE — a live /runs/{id}/stream — lands once the server extension grows it.) + * GET /api/health + * GET /api/projects + * GET /api/projects/{key}/issues + * GET /api/projects/{key}/issues/stream (SSE — board: an `issue` event per change) + * GET /api/projects/{key}/runs/{runId}/stream (SSE — live trace, keyed by seq) + * GET /api/projects/{key}/runs/{runId}/trace?since=<seq> (poll fallback for the run stream) + * GET /api/projects/{key}/runs/{runId}/artifacts + * POST /api/projects/{key}/issues/{id}/start (launch the solver, 202) + * POST /api/projects/{key}/issues/{id}/answer (reply to the run's open gate) */ final class Server { - public function __construct(private readonly string $projectsDir) - { + /** Flags for the SSE data payloads (the JSON the dashboard reads). */ + private const int JSON = JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_THROW_ON_ERROR; + + /** Live trace pub/sub: a run's LiveTraceSink publishes here, SSE streams subscribe (push, no poll). */ + private TraceBus $bus; + + /** @var array<string, true> issue ids with an active run — guards against a concurrent double-start. */ + private array $active = []; + + /** @var array<string, Channel<string>> issue id → the open gate's answer channel ({@see answer()} feeds it). */ + private array $gates = []; + + /** @var array<string, ProjectStore> read handles, one per project key, opened once and reused. */ + private array $readStores = []; + + /** @var array<string, TraceReader> trace readers over the read handles, cached so a stream re-uses one. */ + private array $readers = []; + + /** @param string $root the install root: anchors the app home so a run can load its {@see Config}. */ + public function __construct( + private readonly string $projectsDir, + private readonly string $root, + ) { } public function run(string $host = '127.0.0.1', int $port = 8787): void { - $config = (new HttpServerConfig()) + $config = new HttpServerConfig() ->addListener($host, $port) ->setReadTimeout(15) ->setWriteTimeout(15) ->setKeepAliveTimeout(60); + $this->bus = new TraceBus(); // live trace push from the spawned runs to the SSE streams + $server = new HttpServer($config); $server->addHttpHandler($this->handle(...)); - \fwrite(STDOUT, "claw dashboard API → http://{$host}:{$port}\n"); - \fwrite(STDOUT, " projects: {$this->projectsDir}\n"); + echo "claw dashboard API → http://{$host}:{$port}\n"; + echo " projects: {$this->projectsDir}\n"; $server->start(); } @@ -66,240 +108,418 @@ public function handle(HttpRequest $req, HttpResponse $res): void return; } - if ($method !== 'GET') { - $this->json($res, 405, ['error' => 'method not allowed']); - - return; - } try { + if ($method === 'POST') { + if (\preg_match('#^/api/projects/([^/]+)/issues/([^/]+)/start$#', $path, $matches)) { + $this->start($res, $matches[1], $matches[2]); + + return; + } + + if (\preg_match('#^/api/projects/([^/]+)/issues/([^/]+)/answer$#', $path, $matches)) { + $this->answer($req, $res, $matches[1], $matches[2]); + + return; + } + $res->json(['error' => 'not found', 'path' => $path], 404); + + return; + } + + if ($method !== 'GET') { + $res->json(['error' => 'method not allowed'], 405); + + return; + } + if ($path === '/api/health') { - $this->json($res, 200, ['ok' => true]); + $res->json(['ok' => true]); return; } + if ($path === '/api/projects') { - $this->json($res, 200, $this->projects()); + $res->json($this->projects()); + + return; + } + + if (\preg_match('#^/api/projects/([^/]+)/issues/stream$#', $path, $matches)) { + $this->issuesStream($res, $matches[1]); + + return; + } + + if (\preg_match('#^/api/projects/([^/]+)/issues$#', $path, $matches)) { + $res->json($this->issues($matches[1])); return; } - if (\preg_match('#^/api/projects/([^/]+)/issues$#', $path, $m)) { - $this->json($res, 200, $this->issues($m[1])); + + if (\preg_match('#^/api/projects/([^/]+)/runs/([^/]+)/stream$#', $path, $matches)) { + $this->stream($req, $res, $matches[1], $matches[2]); return; } - if (\preg_match('#^/api/projects/([^/]+)/runs/([^/]+)/trace$#', $path, $m)) { + + if (\preg_match('#^/api/projects/([^/]+)/runs/([^/]+)/trace$#', $path, $matches)) { $since = (int) $req->getQueryParam('since', 0); - $this->json($res, 200, $this->trace($this->pdo($m[1]), $m[2], $since)); + $res->json($this->reader($matches[1])->tail($matches[2], $since)); return; } - if (\preg_match('#^/api/projects/([^/]+)/runs/([^/]+)/artifacts$#', $path, $m)) { - $this->json($res, 200, $this->artifacts($this->pdo($m[1]), $m[2])); + + if (\preg_match('#^/api/projects/([^/]+)/runs/([^/]+)/artifacts$#', $path, $matches)) { + $res->json($this->reader($matches[1])->artifactRecords($matches[2])); return; } - $this->json($res, 404, ['error' => 'not found', 'path' => $path]); - } catch (\Throwable $e) { - $this->json($res, 500, ['error' => $e->getMessage()]); + $res->json(['error' => 'not found', 'path' => $path], 404); + } catch (\Exception $e) { + $res->json(['error' => $e->getMessage()], 500); } } - /** @return list<array{key:string,name:string,path:string}> */ + /** @return list<array{key: string, name: string, path: string}> */ private function projects(): array { - $out = []; - foreach (\glob($this->projectsDir . '/*.db') ?: [] as $file) { - try { - $stmt = $this->open($file)->prepare('SELECT name, path FROM project LIMIT 1'); - $stmt->execute(); - $row = $stmt->fetch(\PDO::FETCH_ASSOC); - } catch (\Throwable) { - continue; // not a project db - } - if (\is_array($row)) { - $out[] = ['key' => \basename($file, '.db'), 'name' => (string) $row['name'], 'path' => (string) $row['path']]; - } - } - - return $out; + return array_map( + static fn (Project $project): array => [ + 'key' => $project->id, + 'name' => $project->name, + 'path' => $project->path, + ], + ProjectStore::all($this->projectsDir), + ); } /** - * Issues for a project, shaped to the UI's Issue model (status / progress / runs / - * tokens / artifacts), so the dashboard's HttpClient maps them with no translation. + * Issues for a project, shaped to the UI's Issue model (status / progress / runs / tokens / + * artifacts), assembled from the {@see ProjectStore} and the run's {@see TraceReader} — no SQL here. * - * @return list<array<string,mixed>> + * @return list<array<string, mixed>> */ private function issues(string $key): array { - $pdo = $this->pdo($key); - $stmt = $pdo->prepare('SELECT id, title, status FROM issues ORDER BY id'); - $stmt->execute(); - $rows = $stmt->fetchAll(\PDO::FETCH_ASSOC); + $store = $this->readStore($key); + $reader = $this->reader($key); + $state = new SqliteStateStore($store->pdo()); $issues = []; - foreach ($rows as $r) { - $runStmt = $pdo->prepare('SELECT id, workflow, status FROM runs WHERE issue_id = ? ORDER BY id'); - $runStmt->execute([(string) $r['id']]); - $runRows = $runStmt->fetchAll(\PDO::FETCH_ASSOC); - $latest = $runRows ? $runRows[\array_key_last($runRows)] : null; + + foreach ($store->allIssues() as $issue) { + $runs = $store->runsFor($issue->id); + $latest = $runs === [] ? null : $runs[array_key_last($runs)]; $done = 0; - $tin = 0; - $tout = 0; + $tokensIn = 0; + $tokensOut = 0; $artifacts = []; + if ($latest !== null) { - $rid = (string) $latest['id']; - $done = $this->doneCount($pdo, $rid); - [$tin, $tout] = $this->tokens($pdo, $rid); - $artifacts = $this->artifacts($pdo, $rid); + $runId = $latest['id']; + $done = \count($state->load($runId)['done']); + [$tokensIn, $tokensOut] = $reader->tokens($runId); + $artifacts = $reader->artifactRecords($runId); } - $status = $this->uiStatus((string) $r['status']); + $status = $issue->status->value; $issues[] = [ - 'id' => (int) $r['id'], - 'title' => (string) $r['title'], + 'id' => (int) $issue->id, + 'title' => $issue->title, 'status' => $status, 'done' => $done, - 'live' => $status === 'inprogress', - 'runs' => \array_map( - static fn (array $x): array => ['n' => (int) $x['id'], 'status' => (string) $x['status']], - $runRows, + 'live' => $status === IssueStatus::InProgress->value, + 'runs' => array_map( + static fn (array $run): array => ['n' => (int) $run['id'], 'status' => $run['status']], + $runs, ), - 'tokensIn' => $tin, - 'tokensOut' => $tout, + 'tokensIn' => $tokensIn, + 'tokensOut' => $tokensOut, 'artifacts' => $artifacts, - 'chat' => [], // ask-channel inbox is a write path — needs the SSE/answer work + 'chat' => [], // the gate conversation streams as question/answer trace events, not here ]; } return $issues; } - /** Completed-step count from the durable snapshot. */ - private function doneCount(\PDO $pdo, string $runId): int + /** A reused read handle for a project (the dashboard only SELECTs); opened once, cached by key. */ + private function readStore(string $key): ProjectStore { - try { - $stmt = $pdo->prepare('SELECT done FROM workflow_state WHERE run_id = ?'); - $stmt->execute([$runId]); - $json = $stmt->fetchColumn(); - } catch (\Throwable) { - return 0; // table not created on this db yet - } - if (!\is_string($json)) { - return 0; - } - $done = \json_decode($json, true); - - return \is_array($done) ? \count($done) : 0; + return $this->readStores[$key] ??= ProjectStore::openByKey($this->projectsDir, $key) + ?? throw new \RuntimeException("unknown project: {$key}"); } - /** @return array{0:int,1:int} input/output tokens summed over the run's replies. */ - private function tokens(\PDO $pdo, string $runId): array + /** The trace reader over a project's read handle, cached so a stream does not re-open it per tail. */ + private function reader(string $key): TraceReader { - $stmt = $pdo->prepare( - "SELECT COALESCE(SUM(json_extract(data, '$.usage.in')), 0) AS tin, - COALESCE(SUM(json_extract(data, '$.usage.out')), 0) AS tout - FROM trace WHERE run_id = ? AND type = 'reply'", - ); - $stmt->execute([$runId]); - $row = $stmt->fetch(\PDO::FETCH_ASSOC) ?: ['tin' => 0, 'tout' => 0]; - - return [(int) $row['tin'], (int) $row['tout']]; + return $this->readers[$key] ??= new TraceReader($this->readStore($key)->pdo()); } - /** @return list<array<string,mixed>> */ - private function artifacts(\PDO $pdo, string $runId): array + /** + * Live trace for a run, as Server-Sent Events. Replays the journal tail past the client's cursor + * (`Last-Event-ID` header on an EventSource reconnect, or `?since=`), then subscribes to the trace + * bus and is PUSHED each new record — no polling while connected. Every event carries `id: <seq>`, + * so a dropped connection resumes with no gap. + * + * Only runs executing in this server publish to the bus; a stream over any other run replays the + * journal then idles (heartbeating). recv blocks until an event or a ~10s heartbeat tick, which also + * re-checks for client disconnect. A dropped live event shows up as a seq gap and is healed from the + * db on the spot. + */ + private function stream(HttpRequest $req, HttpResponse $res, string $key, string $runId): void { - $stmt = $pdo->prepare("SELECT data FROM trace WHERE run_id = ? AND type = 'artifact' ORDER BY seq"); - $stmt->execute([$runId]); - - $out = []; - foreach ($stmt->fetchAll(\PDO::FETCH_COLUMN) as $json) { - $d = \json_decode((string) $json, true); - if (!\is_array($d)) { - continue; - } - $out[] = [ - 'name' => (string) ($d['label'] ?? ''), - 'kind' => (string) ($d['kind'] ?? 'file'), - 'meta' => '', - 'body' => (string) ($d['value'] ?? ''), - ]; + try { + $reader = $this->reader($key); + } catch (\Exception $e) { + $res->json(['error' => $e->getMessage()], 404); // pre-stream: a normal JSON error is fine + + return; } - return $out; + $since = (int) ($req->getHeader('Last-Event-ID') ?? $req->getQueryParam('since', 0)); + $res->sseStart(); // commit text/event-stream headers now, so the browser's onopen fires + + // Subscribe BEFORE the replay so nothing published mid-replay is lost; the seq check de-dupes the + // overlap. The unsubscribe MUST run on every exit, so the topic does not leak. + [$channel, $unsubscribe] = $this->bus->subscribe($runId); + + try { + foreach ($reader->tail($runId, $since) as $row) { // 1) replay the journal gap + $this->sseRow($res, $row); + $since = (int) $row['seq']; + } + + while (!$res->isClosed()) { // 2) live: pushed by the run's LiveTraceSink, no poll + try { + [$record, $seq] = $channel->recv(\Async\timeout(10000)); + } catch (AsyncCancellation) { + $res->sseComment(); // heartbeat (an empty SSE comment) — ping, then re-check the connection + + continue; + } catch (\Cancellation $cancellation) { + throw $cancellation; // a real coroutine cancellation must propagate, never be swallowed + } + + if ($seq <= $since) { + continue; // already sent during the replay/overlap + } + + if ($seq > $since + 1) { + // a dropped event left a gap — heal it from the durable journal, in order + foreach ($reader->tail($runId, $since) as $gapRow) { + $this->sseRow($res, $gapRow); + $since = (int) $gapRow['seq']; + } + + continue; + } + + if (!$res->sendable()) { + continue; // slow client: skip; a later gap-heal or a reconnect replays it + } + $this->sseRow($res, $this->liveRow($record, $seq)); + $since = $seq; + } + } catch (\Exception) { + // The client vanished mid-write — the connection is gone. + } finally { + $unsubscribe(); + } } /** - * Trace spans for a run past a seq cursor — the polling primitive that stands in - * for SSE. `seq` is a global monotonic autoincrement, so `seq > since` is a clean tail. + * Emit one trace row as an SSE `trace` event keyed by its seq. * - * @return list<array<string,mixed>> + * @param array<string, mixed> $row */ - private function trace(\PDO $pdo, string $runId, int $since): array + private function sseRow(HttpResponse $res, array $row): void { - $stmt = $pdo->prepare( - 'SELECT seq, span_id, parent_id, depth, phase, type, level, data - FROM trace WHERE run_id = ? AND seq > ? ORDER BY seq', + $res->sseEvent( + data: \json_encode($row, self::JSON), + event: 'trace', + id: (string) $row['seq'], ); - $stmt->execute([$runId, $since]); - - $out = []; - foreach ($stmt->fetchAll(\PDO::FETCH_ASSOC) as $r) { - $out[] = [ - 'seq' => (int) $r['seq'], - 'spanId' => (int) $r['span_id'], - 'parentId' => $r['parent_id'] !== null ? (int) $r['parent_id'] : null, - 'depth' => (int) $r['depth'], - 'phase' => (string) $r['phase'], - 'type' => (string) $r['type'], - 'level' => (int) $r['level'], - 'data' => \json_decode((string) $r['data'], true), - ]; - } + } - return $out; + /** + * Format a live (record, seq) into the same wire shape {@see TraceReader::tail()} produces from a db + * row, so a pushed event is indistinguishable from a replayed one. The wire shape belongs here, at the + * edge — not in the bus or the sink. + * + * @return array<string, mixed> + */ + private function liveRow(TraceRecordInterface $record, int $seq): array + { + $event = $record->event(); + + return [ + 'seq' => $seq, + 'spanId' => $record->id(), + 'parentId' => $record->parentId(), + 'depth' => $record->depth(), + 'phase' => $record->phase(), + 'type' => $event->type, + 'level' => $event->level->value, + 'data' => $event->data, + ]; } - private function uiStatus(string $name): string + /** + * Live board, as Server-Sent Events: an `issue` event per issue whose snapshot changed. This is the + * low-frequency Kanban feed (a card moving column, a token tick, a gate opening), so unlike the run + * stream it polls the issue snapshot on a slow tick and emits diffs — re-deriving a handful of issues + * every couple of seconds is cheap, and the hot per-record path is the run stream, not this. On + * connect (and on reconnect) every issue is emitted once, since the seen-set starts empty; the client + * keeps an id→issue map and applies each event. + */ + private function issuesStream(HttpResponse $res, string $key): void { - return match ($name) { - 'Open' => 'open', - 'InProgress' => 'inprogress', - 'WaitingHuman' => 'waiting', - 'Done' => 'done', - 'Closed' => 'closed', - default => 'open', - }; + try { + $this->readStore($key); // resolve/validate the project before committing the stream + } catch (\Exception $e) { + $res->json(['error' => $e->getMessage()], 404); + + return; + } + + $res->sseStart(); + + $sentSnapshots = []; // issue id → the json it was last sent as + $idleTicks = 0; + + try { + while (!$res->isClosed()) { + $changed = false; + + foreach ($this->issues($key) as $issue) { + $id = (string) $issue['id']; + $snapshot = \json_encode($issue, self::JSON); + + if (($sentSnapshots[$id] ?? null) === $snapshot) { + continue; + } + + if (!$res->sendable()) { + continue; // slow client: leave it unseen so the next tick retries + } + $sentSnapshots[$id] = $snapshot; + $res->sseEvent(data: $snapshot, event: 'issue'); + $changed = true; + } + + if ($changed) { + $idleTicks = 0; + } elseif (++$idleTicks >= 5) { // ~10s of a still board → heartbeat past proxy idle timeouts + $res->sseComment(); // empty SSE comment = the canonical keepalive + $idleTicks = 0; + } + + \Async\delay(2000); + } + } catch (\Exception) { + // The client vanished mid-write — the connection is gone. + } } - private function pdo(string $key): \PDO + /** + * POST .../issues/{id}/start — launch the issue's solver as a detached coroutine and return at once. + * The dashboard watches progress on the run stream; the run records its own ledger row, trace and + * final status. At most one active run per issue (a concurrent start is rejected 409), and the run's + * gate parks on a per-issue answer channel that {@see answer()} feeds. + */ + private function start(HttpResponse $res, string $key, string $issueId): void { - $file = $this->projectsDir . '/' . \basename($key) . '.db'; - if (!\is_file($file)) { - throw new \RuntimeException("unknown project: {$key}"); + try { + $store = $this->storeFor($key); + $issue = $store->loadIssue($issueId); + } catch (\Exception $e) { + $res->json(['error' => $e->getMessage()], 404); + + return; + } + + if (isset($this->active[$issue->id])) { + $res->json(['error' => 'a run for this issue is already active'], 409); + + return; + } + + $config = Config::load($this->root . '/.env'); + $agent = AgentFactory::make($config, new CurlHttpClient()); + + if ($agent === null) { + $res->json(['error' => "agent '{$config->agent}' is not wired"], 500); + + return; } - return $this->open($file); + $this->active[$issue->id] = true; + /** @var Channel<string> $answers unbuffered: a gate's send() waits for the parked run's recv() */ + $answers = new Channel(); + $this->gates[$issue->id] = $answers; + + $frontend = new HttpRunFrontend($store, $issue->id, $answers, $this->bus); + + // Spawn detached so the run survives this handler returning; the dashboard watches the run stream. + // The run records its own final status, so there is nothing to catch — only the active/gate cleanup. + spawn(function () use ($store, $config, $agent, $issue, $frontend): void { + try { + new IssueRunner($this->projectsDir, $store, $config, $agent, $frontend)->run($issue); + } finally { + unset($this->active[$issue->id], $this->gates[$issue->id]); + } + }); + + $res->json(['ok' => true], 202); } - /** A read connection. The dashboard only SELECTs; busy_timeout rides out a concurrent writer. */ - private function open(string $file): \PDO + /** + * POST .../issues/{id}/answer — deliver the human's reply to the run parked at its gate. Valid only + * while the issue is WaitingHuman (a gate is actually open); otherwise there is nothing to answer and + * the unbuffered send would hang, so we reject with 409. + */ + private function answer(HttpRequest $req, HttpResponse $res, string $key, string $issueId): void { - $pdo = new \PDO('sqlite:' . $file, options: [\PDO::ATTR_ERRMODE => \PDO::ERRMODE_EXCEPTION]); - $pdo->exec('PRAGMA busy_timeout=4000'); + $channel = $this->gates[$issueId] ?? null; + + if ($channel === null) { + $res->json(['error' => 'no run is waiting for an answer on this issue'], 409); - return $pdo; + return; + } + + try { + $issue = $this->readStore($key)->loadIssue($issueId); + } catch (\Exception $e) { + $res->json(['error' => $e->getMessage()], 404); + + return; + } + + if ($issue->status !== IssueStatus::WaitingHuman) { + $res->json(['error' => 'the run is not waiting for an answer right now'], 409); + + return; + } + + $payload = \json_decode($req->getBody(), true); + $text = \is_array($payload) && isset($payload['text']) ? (string) $payload['text'] : ''; + + $channel->send($text); // wake the parked run; unbuffered, so this returns once the run takes it + $res->json(['ok' => true], 202); } - private function json(HttpResponse $res, int $status, mixed $data): void + /** + * Open a FRESH writable handle for a run (its own connection — a run mutates state across awaits, so + * it must not share the cached read handle). Not cached: each run gets its own. + */ + private function storeFor(string $key): ProjectStore { - $res->setStatusCode($status) - ->setHeader('Content-Type', 'application/json; charset=utf-8') - ->setBody(\json_encode($data, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_THROW_ON_ERROR)) - ->end(); + return ProjectStore::openByKey($this->projectsDir, $key) + ?? throw new \RuntimeException("unknown project: {$key}"); } } diff --git a/src/Session.php b/src/Session.php index 28f8164..3480045 100644 --- a/src/Session.php +++ b/src/Session.php @@ -92,6 +92,7 @@ public function __construct( new AuditMiddleware($this->store), new PermissionMiddleware($this->policy, $this->tools, $this->conversation, $this->store), ]; + if ($this->toolTimeoutMs > 0) { $middlewares[] = new TimeoutMiddleware($this->toolTimeoutMs); // innermost: bound each tool run } @@ -278,6 +279,7 @@ private function turnLoop(): void } $results = []; + foreach ($response->toolCalls as $call) { $this->conversation->updateStatus(Status::toolCall($call->name)); $results[] = $this->execute($call); @@ -295,6 +297,7 @@ private function persist(): void } $new = \array_slice($this->history, $this->persisted); + if ($new !== []) { $this->store->append(...$new); $this->persisted = \count($this->history); diff --git a/src/Store/SessionStore.php b/src/Store/SessionStore.php index 34b5e2e..c9806f5 100644 --- a/src/Store/SessionStore.php +++ b/src/Store/SessionStore.php @@ -58,6 +58,7 @@ public function __construct(string $path) public function load(): array { $stmt = $this->pdo->query('SELECT role, content FROM messages ORDER BY seq'); + if ($stmt === false) { return []; // unreachable under ERRMODE_EXCEPTION; kept because query() is typed PDOStatement|false } @@ -112,6 +113,7 @@ public function logToolCall(string $call, string $result, bool $isError): void public function auditTrail(): array { $stmt = $this->pdo->query('SELECT call, is_error, result FROM audit ORDER BY id'); + if ($stmt === false) { return []; // unreachable under ERRMODE_EXCEPTION; kept because query() is typed PDOStatement|false } diff --git a/src/Tool/BashTool.php b/src/Tool/BashTool.php index d99a64c..54e49a5 100644 --- a/src/Tool/BashTool.php +++ b/src/Tool/BashTool.php @@ -48,6 +48,7 @@ public function risk(): Risk public function handle(array $input): string { $command = (string) ($input['command'] ?? ''); + if (trim($command) === '') { throw new ToolException('bash: "command" is required'); } @@ -71,6 +72,7 @@ public function handle(array $input): string $shell = DIRECTORY_SEPARATOR === '\\' ? 'sh' : '/bin/sh'; $process = proc_open([$shell, '-c', $command], $descriptors, $pipes, $this->cwd, $env); + if (!\is_resource($process)) { throw new ToolException('bash: failed to start the command'); } diff --git a/src/Tool/ListFilesTool.php b/src/Tool/ListFilesTool.php index 7885e6f..93de393 100644 --- a/src/Tool/ListFilesTool.php +++ b/src/Tool/ListFilesTool.php @@ -46,33 +46,40 @@ public function risk(): Risk public function handle(array $input): string { $path = (string) ($input['path'] ?? '.'); + if ($path === '') { $path = '.'; } $real = $this->workspace->resolveExisting($path); + if (!is_dir($real)) { throw new ToolException("list_files: not a directory: {$path}"); } $entries = scandir($real); + if ($entries === false) { throw new ToolException("list_files: cannot read directory: {$path}"); } $entries = array_values(array_filter($entries, static fn (string $e): bool => $e !== '.' && $e !== '..')); + if ($entries === []) { return '(empty directory)'; } $lines = []; + foreach ($entries as $i => $entry) { if ($i >= $this->maxEntries) { $lines[] = "... [truncated at {$this->maxEntries} entries]"; + break; } $full = $real . DIRECTORY_SEPARATOR . $entry; + if (is_dir($full)) { $lines[] = $entry . '/'; } else { diff --git a/src/Tool/PhpEvalTool.php b/src/Tool/PhpEvalTool.php index f4d7975..e87e69c 100644 --- a/src/Tool/PhpEvalTool.php +++ b/src/Tool/PhpEvalTool.php @@ -43,6 +43,7 @@ public function risk(): Risk public function handle(array $input): string { $code = trim((string) ($input['code'] ?? '')); + if ($code === '') { throw new ToolException('php_eval: "code" is required'); } diff --git a/src/Tool/ReadFileTool.php b/src/Tool/ReadFileTool.php index 62bdfb5..90df0f4 100644 --- a/src/Tool/ReadFileTool.php +++ b/src/Tool/ReadFileTool.php @@ -49,6 +49,7 @@ public function risk(): Risk public function handle(array $input): string { $path = (string) ($input['path'] ?? ''); + if ($path === '') { throw new ToolException('read_file: "path" is required'); } @@ -56,6 +57,7 @@ public function handle(array $input): string $real = $this->workspace->resolveExisting($path); $data = file_get_contents($real, false, null, 0, $this->maxBytes + 1); + if ($data === false) { throw new ToolException("read_file: cannot read {$path}"); } diff --git a/src/Tool/Registry.php b/src/Tool/Registry.php index af59409..94ae842 100644 --- a/src/Tool/Registry.php +++ b/src/Tool/Registry.php @@ -50,6 +50,7 @@ public function all(): array public function specs(): array { $specs = []; + foreach ($this->tools as $tool) { $specs[] = new ToolSpec($tool->name(), $tool->description(), $tool->inputSchema()); } @@ -72,6 +73,7 @@ public function specs(): array public function only(array $names): self { $subset = new self(); + foreach ($names as $name) { $subset->add($this->get($name)); } @@ -88,6 +90,7 @@ public function only(array $names): self public function agents(): array { $agents = []; + foreach ($this->tools as $tool) { if ($tool instanceof AgentToolInterface) { $agents[] = $tool; diff --git a/src/Tool/ScheduleTool.php b/src/Tool/ScheduleTool.php index 8de5ae8..882a5be 100644 --- a/src/Tool/ScheduleTool.php +++ b/src/Tool/ScheduleTool.php @@ -62,6 +62,7 @@ public function handle(array $input): string if ($after <= 0) { throw new ToolException('schedule: "after_seconds" must be greater than zero'); } + if ($message === '') { throw new ToolException('schedule: "message" is required'); } diff --git a/src/Tool/ToolFactory.php b/src/Tool/ToolFactory.php new file mode 100644 index 0000000..b56aa78 --- /dev/null +++ b/src/Tool/ToolFactory.php @@ -0,0 +1,31 @@ +<?php + +declare(strict_types=1); + +namespace Claw\Tool; + +use Claw\Project\Project; +use Claw\Workflow\WorkflowStore; +use Claw\Workflow\WorkflowValidator; + +/** + * Builds the tool palette a solver run works with — the file/shell tools plus the workflow-authoring and + * finish tools — against a project's real folder. Kept out of {@see \Claw\Run\IssueRunner} so the run + * pipeline does not own the tool wiring. The run's own RecallTool is added by the runner afterwards, once + * the tracer it reads from exists. + */ +final class ToolFactory +{ + public static function forRun(Project $project, Workspace $workspace, WorkflowStore $workflowStore): Registry + { + $registry = new Registry(); + $registry->add(new BashTool($project->path)); + $registry->add(new ReadFileTool($workspace)); + $registry->add(new WriteFileTool($workspace)); + $registry->add(new ListFilesTool($workspace)); + $registry->add(new DefineWorkflowTool($workflowStore, new WorkflowValidator())); + $registry->add(new FinishTool()); // the model can declare the task solved and end the run + + return $registry; + } +} diff --git a/src/Tool/Workspace.php b/src/Tool/Workspace.php index b411fc6..261f476 100644 --- a/src/Tool/Workspace.php +++ b/src/Tool/Workspace.php @@ -18,6 +18,7 @@ public function __construct(string $root) { $real = realpath($root); + if ($real === false) { throw new ToolException("Workspace directory does not exist: {$root}"); } @@ -34,6 +35,7 @@ public function root(): string public function resolveExisting(string $path): string { $real = realpath($this->join($path)); + if ($real === false) { throw new ToolException("No such file: {$path}"); } @@ -49,6 +51,7 @@ public function resolveForWrite(string $path): string $full = $this->join($path); $parent = realpath(\dirname($full)); + if ($parent === false) { throw new ToolException("Directory does not exist for: {$path}"); } diff --git a/src/Tool/WriteFileTool.php b/src/Tool/WriteFileTool.php index e27011a..fcd1a4c 100644 --- a/src/Tool/WriteFileTool.php +++ b/src/Tool/WriteFileTool.php @@ -45,6 +45,7 @@ public function risk(): Risk public function handle(array $input): string { $path = (string) ($input['path'] ?? ''); + if ($path === '') { throw new ToolException('write_file: "path" is required'); } diff --git a/src/Trace/ConsoleTraceSink.php b/src/Trace/ConsoleTraceSink.php index 9e50eed..5eb79e4 100644 --- a/src/Trace/ConsoleTraceSink.php +++ b/src/Trace/ConsoleTraceSink.php @@ -36,6 +36,7 @@ public function write(TraceRecordInterface $record): void } $event = $record->event(); + if (!$event->level->passes($this->threshold)) { return; } diff --git a/src/Trace/LiveTraceSink.php b/src/Trace/LiveTraceSink.php new file mode 100644 index 0000000..5bed26b --- /dev/null +++ b/src/Trace/LiveTraceSink.php @@ -0,0 +1,28 @@ +<?php + +declare(strict_types=1); + +namespace Claw\Trace; + +/** + * The live-delivery trace sink: persist through the {@see TraceStore} it composes, then publish the + * persisted record to the {@see TraceBus} so subscribed SSE streams are pushed to with no polling. + * + * It owns the persistence (so there is no separate TraceStore in the sink list to order against): it + * writes, then asks that same store for the row's `seq` — the autoincrement the record itself does not + * carry but the dashboard resumes on — and hands the bus the typed record together with that seq. + */ +final readonly class LiveTraceSink implements TraceSinkInterface +{ + public function __construct( + private TraceStore $store, + private TraceBus $bus, + ) { + } + + public function write(TraceRecordInterface $record): void + { + $this->store->write($record); // persist first — the store owns the connection + seq + $this->bus->publish($record, $this->store->lastSeq()); // then notify, with the record and its persisted seq + } +} diff --git a/src/Trace/TraceBus.php b/src/Trace/TraceBus.php new file mode 100644 index 0000000..22599d4 --- /dev/null +++ b/src/Trace/TraceBus.php @@ -0,0 +1,63 @@ +<?php + +declare(strict_types=1); + +namespace Claw\Trace; + +use Async\Channel; + +/** + * An in-process pub/sub for live trace, one topic per run id. A run's {@see LiveTraceSink} publishes + * each persisted record here; the dashboard's SSE handlers subscribe and are pushed to — so the live + * stream needs no polling. Only runs executing IN this server process publish; a stream over any other + * run simply gets no live events (it still replays the journal from the db). + * + * The bus carries the typed {@see TraceRecordInterface} together with its persisted `seq` — the wire + * formatting belongs to the SSE edge ({@see \Claw\Server}), not here. Delivery is best-effort and + * non-blocking ({@see publish()} uses sendAsync), so a slow or vanished subscriber can never stall the + * run. A dropped event leaves a gap the SSE handler heals from the db by seq, and a reconnect replays + * from `Last-Event-ID` — the durable journal, not this bus, is the source of truth. + */ +final class TraceBus +{ + /** Per-subscriber buffer: a burst of records queues here rather than blocking the run. */ + private const int CAPACITY = 1024; + + /** @var array<string, array<int, Channel<array{0: TraceRecordInterface, 1: int}>>> runId → spl_object_id → channel */ + private array $subscribers = []; + + /** + * Subscribe to a run's live trace. Returns the channel to recv `[record, seq]` pairs on, plus an + * unsubscribe closure the caller MUST run when it stops listening (so the topic does not leak). + * + * @return array{0: Channel<array{0: TraceRecordInterface, 1: int}>, 1: \Closure(): void} + */ + public function subscribe(string $runId): array + { + /** @var Channel<array{0: TraceRecordInterface, 1: int}> $channel */ + $channel = new Channel(self::CAPACITY); + $id = spl_object_id($channel); // the channel is its own identity — no counter to keep + $this->subscribers[$runId][$id] = $channel; + + $unsubscribe = function () use ($runId, $id): void { + unset($this->subscribers[$runId][$id]); + + if (($this->subscribers[$runId] ?? []) === []) { + unset($this->subscribers[$runId]); + } + }; + + return [$channel, $unsubscribe]; + } + + /** + * Push one persisted record (with its db seq) to every subscriber of its run. Non-blocking: a full + * buffer (a slow client) drops it rather than suspend the run — the SSE handler heals the gap by seq. + */ + public function publish(TraceRecordInterface $record, int $seq): void + { + foreach ($this->subscribers[$record->runId()] ?? [] as $channel) { + $channel->sendAsync([$record, $seq]); + } + } +} diff --git a/src/Trace/TraceFormat.php b/src/Trace/TraceFormat.php index b753a27..358f6ab 100644 --- a/src/Trace/TraceFormat.php +++ b/src/Trace/TraceFormat.php @@ -95,6 +95,7 @@ private static function reply(array $data): string $names = []; $calls = $data['tool_calls'] ?? []; + if (\is_array($calls)) { foreach ($calls as $call) { if (\is_array($call) && isset($call['name']) && \is_scalar($call['name'])) { diff --git a/src/Trace/TraceReader.php b/src/Trace/TraceReader.php index 8b16549..04d31fc 100644 --- a/src/Trace/TraceReader.php +++ b/src/Trace/TraceReader.php @@ -49,6 +49,7 @@ public function render(string $runId, Level $threshold = Level::Debug, bool $col public function stepHistory(string $runId, string $name): string { $bounds = $this->stepBounds($runId, $name); + if ($bounds === null) { return "No step '{$name}' has run in this workflow yet."; } @@ -77,6 +78,7 @@ public function artifacts(string $runId, string $step = ''): string { if ($step !== '') { $bounds = $this->stepBounds($runId, $step); + if ($bounds === null) { return "No step '{$step}' has run in this workflow yet."; } @@ -89,6 +91,7 @@ public function artifacts(string $runId, string $step = ''): string } $lines = []; + foreach ($stmt->fetchAll(\PDO::FETCH_ASSOC) as $row) { $data = json_decode(TraceFormat::str($row, 'data'), true); $data = \is_array($data) ? $data : []; @@ -98,6 +101,87 @@ public function artifacts(string $runId, string $step = ''): string return $lines === [] ? 'No artifacts have been recorded in this workflow yet.' : implode("\n", $lines); } + /** + * The run's trace rows past a seq cursor — the dashboard's replay and `?since=` poll. `seq` is a + * global monotonic autoincrement, so `seq > since` is a clean tail. + * + * @return list<array<string, mixed>> + */ + public function tail(string $runId, int $since): array + { + $stmt = $this->pdo->prepare( + 'SELECT seq, span_id, parent_id, depth, phase, type, level, data + FROM trace WHERE run_id = :r AND seq > :s ORDER BY seq', + ); + $stmt->execute(['r' => $runId, 's' => $since]); + + $rows = []; + + foreach ($stmt->fetchAll(\PDO::FETCH_ASSOC) as $row) { + $rows[] = [ + 'seq' => (int) $row['seq'], + 'spanId' => (int) $row['span_id'], + 'parentId' => $row['parent_id'] !== null ? (int) $row['parent_id'] : null, + 'depth' => (int) $row['depth'], + 'phase' => (string) $row['phase'], + 'type' => (string) $row['type'], + 'level' => (int) $row['level'], + 'data' => json_decode((string) $row['data'], true), + ]; + } + + return $rows; + } + + /** + * Input/output tokens summed over the run's model replies — the run's cost so far. + * + * @return array{0: int, 1: int} + */ + public function tokens(string $runId): array + { + $stmt = $this->pdo->prepare( + "SELECT COALESCE(SUM(json_extract(data, '$.usage.in')), 0) AS tokens_in, + COALESCE(SUM(json_extract(data, '$.usage.out')), 0) AS tokens_out + FROM trace WHERE run_id = ? AND type = 'reply'", + ); + $stmt->execute([$runId]); + $row = $stmt->fetch(\PDO::FETCH_ASSOC) ?: ['tokens_in' => 0, 'tokens_out' => 0]; + + return [(int) $row['tokens_in'], (int) $row['tokens_out']]; + } + + /** + * The run's artifacts as structured records (name / kind / body) — for the dashboard, distinct from + * {@see artifacts()} which renders them as a console string. + * + * @return list<array<string, mixed>> + */ + public function artifactRecords(string $runId): array + { + $stmt = $this->pdo->prepare("SELECT data FROM trace WHERE run_id = ? AND type = 'artifact' ORDER BY seq"); + $stmt->execute([$runId]); + + $records = []; + + foreach ($stmt->fetchAll(\PDO::FETCH_COLUMN) as $json) { + $artifact = json_decode((string) $json, true); + + if (!\is_array($artifact)) { + continue; + } + + $records[] = [ + 'name' => (string) ($artifact['label'] ?? ''), + 'kind' => (string) ($artifact['kind'] ?? 'file'), + 'meta' => '', + 'body' => (string) ($artifact['value'] ?? ''), + ]; + } + + return $records; + } + /** The workflow's name and the steps it has run so far (in order) — a quick map of the run. */ public function describe(string $runId): string { @@ -111,9 +195,11 @@ public function describe(string $runId): string $steps = $this->pdo->prepare("SELECT data FROM trace WHERE run_id = :r AND type = 'step' AND phase = 'enter' ORDER BY seq"); $steps->execute(['r' => $runId]); $names = []; + foreach ($steps->fetchAll(\PDO::FETCH_ASSOC) as $stepRow) { $decoded = json_decode(TraceFormat::str($stepRow, 'data'), true); $stepName = \is_array($decoded) ? TraceFormat::str($decoded, 'name') : ''; + if ($stepName !== '' && !\in_array($stepName, $names, true)) { $names[] = $stepName; } @@ -136,6 +222,7 @@ private function stepBounds(string $runId, string $name): ?array ); $enter->execute(['r' => $runId, 'n' => $name]); $row = $enter->fetch(\PDO::FETCH_ASSOC); + if ($row === false) { return null; } @@ -159,6 +246,7 @@ private function stepBounds(string $runId, string $name): ?array private function renderRows(array $rows, bool $color = false): string { $lines = []; + foreach ($rows as $row) { if (!\is_array($row)) { continue; diff --git a/src/Trace/TraceStore.php b/src/Trace/TraceStore.php index 1e81a30..1f61ebb 100644 --- a/src/Trace/TraceStore.php +++ b/src/Trace/TraceStore.php @@ -44,6 +44,7 @@ public static function ensureTable(\PDO $pdo): void private static function ensureColumn(\PDO $pdo, string $column, string $decl): void { $info = $pdo->query('PRAGMA table_info(trace)'); + if ($info === false) { return; } @@ -75,4 +76,10 @@ public function write(TraceRecordInterface $record): void 'at' => $record->at(), ]); } + + /** The autoincrement `seq` of the row the last {@see write()} inserted on this connection — the dashboard's cursor. */ + public function lastSeq(): int + { + return (int) $this->pdo->lastInsertId(); + } } diff --git a/src/Trace/Tracer.php b/src/Trace/Tracer.php index be2acc5..1a3fe0c 100644 --- a/src/Trace/Tracer.php +++ b/src/Trace/Tracer.php @@ -133,6 +133,31 @@ public function handoff(string $text): void $this->event(new TraceEvent('handoff', Level::Notice, ['text' => $text])); } + /** + * A run paused for a human: record the escalation/question and return its span id. The dashboard + * surfaces the latest `question` with no matching {@see answer()} as the issue's open gate, and the + * id lets the answer point back at exactly this question (one run can gate more than once). + */ + public function question(string $prompt): int + { + $id = ++$this->seq; + $this->emit('event', $id, new TraceEvent('question', Level::Notice, ['prompt' => $prompt])); + + return $id; + } + + /** The human's reply to a {@see question()}, recorded against its id — closes that gate in the journal. */ + public function answer(int $questionId, string $text): void + { + $this->event(new TraceEvent('answer', Level::Notice, ['ref' => $questionId, 'text' => $text])); + } + + /** The run jumped BACK to an earlier step (e.g. a review sending the work back) — recorded with the reason. */ + public function back(string $from, string $to, string $reason): void + { + $this->event(new TraceEvent('back', Level::Notice, ['from' => $from, 'to' => $to, 'reason' => $reason])); + } + private function open(TraceEvent $event): int { $id = ++$this->seq; @@ -155,7 +180,7 @@ private function emit(string $phase, int $id, TraceEvent $event): void foreach ($this->sinks as $sink) { try { $sink->write($record); - } catch (\Throwable) { + } catch (\Exception) { // Tracing must never bring the run down. } } diff --git a/src/Workflow/Environment.php b/src/Workflow/Environment.php index 27edf10..0cd3e73 100644 --- a/src/Workflow/Environment.php +++ b/src/Workflow/Environment.php @@ -68,6 +68,7 @@ public function child(): self public function findWorker(): AgentInterface { $worker = $this->find(EnvKey::Worker); + if (!$worker instanceof AgentInterface) { throw new WorkflowException('environment has no worker agent'); } @@ -98,6 +99,7 @@ public function executor(): ExecutorInterface public function findRegistry(): Registry { $registry = $this->find(EnvKey::Registry); + if (!$registry instanceof Registry) { throw new WorkflowException('environment has no tool registry'); } @@ -108,6 +110,7 @@ public function findRegistry(): Registry public function findStore(): WorkflowStateStoreInterface { $store = $this->find(EnvKey::Store); + if (!$store instanceof WorkflowStateStoreInterface) { throw new WorkflowException('environment has no state store'); } diff --git a/src/Workflow/GenerateIssueWorkflow.php b/src/Workflow/GenerateIssueWorkflow.php index 29f2f20..9dc9e8e 100644 --- a/src/Workflow/GenerateIssueWorkflow.php +++ b/src/Workflow/GenerateIssueWorkflow.php @@ -106,39 +106,19 @@ protected function assess(): void $this->workerTier = $this->difficulty === 'simple' ? 'worker' : 'worker-smart'; } - #[Step] - protected function draft(): void - { - $this->code = $this->extractCode($this->ai($this->draftPrompt(), [], 'worker-smart')); // [] = return code, don't act - } - /** - * The supervisor reviews the finished solver before it is saved: not "is it valid PHP" (the - * validator does that) but "will it actually work" — does every step do real work via tools/ai - * rather than return a placeholder, is each critic name backed by criticRules(), is the task - * truly solved. On a rejection, one strong-tier revision pass folds the findings back in. + * Write the solver, then have it reviewed by the `solverReview` critic — "will it actually solve the + * task", not "is it valid PHP" (the validator covers that). The critic gates the step, so a rejected + * draft RE-RUNS here (continuing this conversation, see {@see WorkflowAbstract::ai()}) and is re-judged + * — the worker's fix can't slip through unreviewed, which is how a bad draft used to escape. */ - #[Step] - protected function review(): void + #[Step(critic: 'solverReview')] + protected function draft(): string { - $verdict = trim($this->ai( - 'You are a senior engineer reviewing a GENERATED solver workflow before it is allowed to ' - . 'run. Judge whether it will actually solve the task — not its syntax. Reject it if any ' - . 'step just returns a placeholder string instead of doing real work via $this->tool()/' - . "\$this->ai(), if a `#[Step(critic: '<name>')]` has no matching entry in criticRules(), " - . "or if the recipe is not genuinely carried out.\n\n" - . "If it is genuinely ready to run, reply with exactly: OK\n" - . "Otherwise reply with the concrete problems that must be fixed.\n\n" - . "The task:\n{$this->taskSummary()}\n\nThe workflow code:\n{$this->code}", - [], - 'supervisor-smart', - )); - - if (strtoupper(trim($verdict)) === 'OK') { - return; - } + // [] = the model returns the class CODE, it does not act with tools + $this->code = $this->extractCode($this->ai($this->draftPrompt(), [], 'worker-smart')); - $this->code = $this->reviseCode("A senior reviewer rejected the workflow you wrote. Problems to fix:\n{$verdict}"); + return $this->code; // the critic judges this; a rejection re-runs draft with the findings } #[Step] @@ -147,10 +127,30 @@ protected function save(): void $this->code = $this->saveGeneratedWorkflow( (string) $this->param('solverName'), $this->code, - fn (string $rejection): string => $this->reviseCode("The workflow class you wrote was rejected: {$rejection}"), + fn (string $rejection): string => $this->reviseCode("The class you wrote was rejected: {$rejection}"), ); } + /** + * The rubric the `solverReview` critic judges the {@see draft()} against: will the generated solver + * ACTUALLY solve the task. Spelled out in full because the reviewer is judged only against this text. + * + * @return array<string, string> + */ + protected function criticRules(): array + { + return [ + 'solverReview' => 'Judge the workflow code in the step result: will it ACTUALLY solve the task ' + . 'below — not whether it is valid PHP (the validator covers that). REJECT it if any step ' + . 'just returns a placeholder string instead of doing real work via $this->tool()/$this->ai(); ' + . "if a `#[Step(critic: '<name>')]` has no matching entry in criticRules(); if any step calls " + . "`done`/\$this->tool('done') before the deliverable is actually built (e.g. `done` hardcoded " + . 'in validate/design ends the run having done NOTHING — `done` means the whole task is solved, ' + . 'not that a step finished); or if the recipe is not genuinely carried out.' + . "\n\nThe task:\n{$this->taskSummary()}", + ]; + } + /** The issue's title and description as a compact task brief — shared by the planning steps. */ private function taskSummary(): string { @@ -163,6 +163,15 @@ private function taskSummary(): string private function draftPrompt(): string { + // A re-run after the critic rejected the draft: the model still holds its previous attempt in the + // continued conversation, so don't re-state the whole brief — just hand it the findings to fix. + $critique = $this->critique(); + + if ($critique !== null) { + return "A reviewer REJECTED the workflow you just wrote:\n\n{$critique}\n\n" + . 'Rewrite the FULL class fixing exactly those problems, keeping the rest. Reply with only the PHP code.'; + } + $namespace = (string) $this->param('solverNamespace'); $class = (string) $this->param('solverName'); $toolDocs = $this->availableTools(); @@ -222,7 +231,7 @@ private function draftPrompt(): string - file paths are relative to the project root, EXACTLY as list_files shows them (e.g. 'src/Calculator.php', NOT 'Calculator.php'); when unsure of a path, call list_files at run time inside a step rather than hardcoding a guess - `\$this->tool(...)` returns the tool's raw output as a STRING and `\$this->ai(...)` returns the model's text as a STRING — never index them like arrays (no `\$result['content']`); parse the string if you need to - a tool error does NOT throw — `\$this->tool(...)` returns the failure as a string starting `tool '<name>' failed: ...`; check for that and recover (e.g. a wrong path: call list_files and retry) or fold the message into the next `\$this->ai(...)` so the model fixes it, rather than blindly using a failed result - - a step does not have to be exhausted: if the task is genuinely solved (and verified) before the planned steps run out, tell the step's model it may call the `done` tool with a short summary to finish the workflow immediately and skip the rest — a small task should not be dragged through every phase + - `done` ENDS THE WHOLE RUN, it does NOT mark a step complete. A step completes by RETURNING; calling `done` stops the entire workflow on the spot and skips every remaining step. So `done` means "the task's actual deliverable now exists and has been verified" — NEVER "this phase is finished". NEVER put `\$this->tool('done', ...)` in an early step (validate/design/assess) or in a PHP branch: that aborts the run before the implement step ever creates anything (e.g. a validate that calls `done` when "no conflict" finishes having done NOTHING). The only legitimate `done` is the model deciding, INSIDE an `ai()` exchange AFTER it has produced and verified the change, that the task is fully solved and the rest is redundant — so offer it by listing `done` among that step's tools, do not hardcode it. When unsure, just `return` and let the next step run. - each step's `ai()` starts fresh — it does NOT see earlier steps, and the engine carries NOTHING between steps automatically. YOU decide, per step, what prior context it needs and have it pulled in. The door is the `recall` tool the step's model can call: `recall(what='task')` re-reads the issue brief, `what='workflow'` lists the steps so far, `what='step', name='design'` returns a sibling step's history, `what='artifacts', name='design'` its artifacts, `what='tool', name='bash'` a tool's calls. So when a step builds on an earlier one, say so in its prompt (e.g. "first call recall(what='artifacts', name='implement') to see what was changed, then ...") — do not assume the earlier work is visible, and do not re-derive what a prior step already produced - THE BATON IS AUTOMATIC: after each step the engine EXPLICITLY asks the model to form a handoff (a summary of what the step did + the findings the next step must watch for), and feeds it into the next step's context as "the previous step handed this to you". You do NOT write handoff code — just make each step do its work well and `return` what it produced (so the engine has good material to form the handoff from). The next step can rely on that incoming context being present - NEVER call PHP builtins such as file_get_contents, fopen, exec, shell_exec, system, eval, include/require, or a dynamic `\$var(...)` call — they are forbidden and the code will be rejected @@ -241,11 +250,13 @@ private function availableTools(): string $names = \is_array($tools) ? array_map(strval(...), $tools) : ['read_file', 'write_file', 'list_files', 'bash']; $registry = $this->find(EnvKey::Registry); + if (!$registry instanceof Registry) { return implode(', ', $names); } $docs = []; + foreach ($names as $name) { if (!$registry->has($name)) { continue; diff --git a/src/Workflow/MethodTool.php b/src/Workflow/MethodTool.php index ecaf5cd..80b382d 100644 --- a/src/Workflow/MethodTool.php +++ b/src/Workflow/MethodTool.php @@ -38,8 +38,10 @@ public function inputSchema(): array { $properties = []; $required = []; + foreach ($this->method->getParameters() as $param) { $properties[$param->getName()] = ['type' => self::jsonType($param->getType())]; + if (!$param->isOptional()) { $required[] = $param->getName(); } @@ -57,8 +59,10 @@ public function risk(): Risk public function handle(array $input): string { $args = []; + foreach ($this->method->getParameters() as $param) { $name = $param->getName(); + if (\array_key_exists($name, $input)) { $args[] = $input[$name]; } elseif ($param->isOptional()) { diff --git a/src/Workflow/SqliteStateStore.php b/src/Workflow/SqliteStateStore.php index ee460e7..2cefa06 100644 --- a/src/Workflow/SqliteStateStore.php +++ b/src/Workflow/SqliteStateStore.php @@ -61,6 +61,7 @@ public function load(string $runId): array $stmt = $this->pdo->prepare('SELECT state, done FROM workflow_state WHERE run_id = :run'); $stmt->execute(['run' => $runId]); $row = $stmt->fetch(\PDO::FETCH_ASSOC); + if (!\is_array($row)) { return ['state' => [], 'done' => []]; } @@ -87,6 +88,7 @@ public function loadHandoff(string $runId): array $stmt = $this->pdo->prepare('SELECT from_step, handoff FROM workflow_handoff WHERE run_id = :run'); $stmt->execute(['run' => $runId]); $row = $stmt->fetch(\PDO::FETCH_ASSOC); + if (!\is_array($row)) { return ['from' => '', 'handoff' => '']; } @@ -114,6 +116,7 @@ private function decodeState(mixed $json): array { $decoded = json_decode(\is_string($json) ? $json : '', true); $state = []; + if (\is_array($decoded)) { foreach ($decoded as $key => $value) { $state[(string) $key] = $value; @@ -128,6 +131,7 @@ private function decodeDone(mixed $json): array { $decoded = json_decode(\is_string($json) ? $json : '', true); $done = []; + if (\is_array($decoded)) { foreach ($decoded as $value) { if (\is_string($value)) { diff --git a/src/Workflow/WorkflowAbstract.php b/src/Workflow/WorkflowAbstract.php index 8e0bdd5..b61ed5c 100644 --- a/src/Workflow/WorkflowAbstract.php +++ b/src/Workflow/WorkflowAbstract.php @@ -84,6 +84,34 @@ abstract class WorkflowAbstract implements WorkflowInterface */ private array $lastHistory = []; + /** + * The prior attempt's conversation, carried into a critic re-run (or a {@see back()} jump) so the + * step's next ai() CONTINUES that history instead of cold-restarting: the model keeps everything it + * already did and reacts to the critique, rather than re-deriving the whole step from scratch. The + * attempt's FIRST ai() consumes it (then it clears); empty otherwise. Transient. + * + * @var list<Message> + */ + private array $resumeHistory = []; + + /** + * Each step's last work conversation, kept so a {@see back()} into an earlier step can CONTINUE it + * (the model re-enters with full context, not cold). Transient — a resume rebuilds it as steps re-run. + * + * @var array<string, list<Message>> + */ + private array $stepHistory = []; + + /** A {@see back()} request made during the running step: the earlier step to re-enter, and why. */ + private ?string $backTo = null; + + private string $backReason = ''; + + /** The step the driver is re-entering via back(), and the reason to hand it — its first-attempt guidance. */ + private ?string $reentryStep = null; + + private string $reentryReason = ''; + /** * Artifacts produced this run, kept per step so PRIOR steps' outputs are not lost — only the * current step's slot is reset on a critic re-run (it regenerates them). Transient: not part of @@ -124,6 +152,7 @@ public function __construct( // simply gets none, as it would have had the crash struck a moment earlier. $saved = $store->loadHandoff($runId); $lastDone = $this->done === [] ? null : $this->done[array_key_last($this->done)]; + if ($saved['from'] !== '' && $saved['from'] === $lastDone) { $this->incomingHandoff = $saved['handoff']; } @@ -133,15 +162,20 @@ abstract public function name(): string; /** * The run's entry point. Default: drive every {@see Step} method in declaration order, each - * skipped if already done. Override to orchestrate by hand — it is plain PHP (ordering, - * if/while, sub-workflows); call $this->step('methodName') to run a step with the same - * skip-and-snapshot guarantee. + * skipped if already done. A step may {@see back()} to an earlier step — the driver then re-runs + * that step onward (so a review can send the work back to where it was produced). Override to + * orchestrate by hand — it is plain PHP (ordering, if/while, sub-workflows); call + * $this->step('methodName') to run a step with the same skip-and-snapshot guarantee. */ public function run(): void { try { - foreach ($this->stepMethods() as $name) { - $this->step($name); + $names = $this->stepMethods(); + $index = 0; + + while ($index < \count($names)) { + $this->step($names[$index]); + $index = $this->backTo === null ? $index + 1 : $this->rewindTo($names, $index); } } catch (WorkflowFinished $finished) { // the model called the `done` tool: the task is solved, skip any remaining steps. @@ -149,6 +183,49 @@ public function run(): void } } + /** + * Send the run BACK to an earlier step from inside the current one (e.g. a review that wants the + * work redone where it was produced). The default {@see run()} re-runs the target onward; the target + * re-enters CONTINUING its own conversation (so the model keeps its context) and reads $reason as its + * first-attempt guidance via {@see critique()}. Recorded in the journal so the jump and its reason are + * visible. Within a hand-written run(), honor it yourself (e.g. loop back to the step). + */ + protected function back(string $toStep, string $reason): void + { + if (!\in_array($toStep, $this->stepMethods(), true)) { + throw new \LogicException("back('{$toStep}'): no such step"); + } + $this->backTo = $toStep; + $this->backReason = $reason; + $this->tracer()?->back($this->currentStep, $toStep, $reason); + } + + /** + * Carry out a back() requested during the step at $from: clear the done-marks of target..$from so they + * re-run, arm the target's re-entry (continue its history + read the reason), and return the target's + * index for the driver to jump to. + * + * @param list<string> $names + */ + private function rewindTo(array $names, int $from): int + { + $target = (string) $this->backTo; + $this->backTo = null; + $to = array_search($target, $names, true); + + if ($to === false || $to > $from) { + throw new \LogicException("back('{$target}') must name an EARLIER step"); + } + + for ($k = $to; $k <= $from; ++$k) { + $this->done = array_values(array_filter($this->done, static fn (string $d): bool => $d !== $names[$k])); + } + $this->reentryStep = $target; + $this->reentryReason = $this->backReason; + + return $to; + } + /** * Configure the run before it executes — a hook the workflow overrides to set up its own * values via {@see set()}, reading the project's defaults via {@see find()}. Default is a @@ -190,27 +267,41 @@ protected function step(string $name): void $maxRounds = $this->maxRounds($step); $result = ''; $workHistory = []; + $resume = []; // the prior attempt's conversation; a re-run continues it (empty on the first attempt) + + if ($this->reentryStep === $name) { + $resume = $this->stepHistory[$name] ?? []; // a back() into this step continues its prior conversation + $this->critique = $this->reentryReason; // the back() reason is its first-attempt guidance + $this->reentryStep = null; + $this->reentryReason = ''; + } + while (true) { $this->artifacts[$name] = []; // a fresh attempt of THIS step regenerates its artifacts; prior steps keep theirs $this->lastHistory = []; // so a step that makes no ai() call leaves no (stale) history + $this->resumeHistory = $resume; // a re-run's first ai() CONTINUES the prior attempt, not a cold restart $raw = $this->{$name}(); $result = \is_string($raw) ? $raw : ''; $workHistory = $this->lastHistory; // the work exchange — its handoff continues THIS context + if ($rubric === null) { break; } $findings = $this->critic($name, $result, $rubric, $this->artifacts[$name]); + if ($findings === null) { break; // the critic is satisfied } $guidance = $this->superviseStep($name, $result, $findings, ++$round, $maxRounds); + if ($guidance === null) { break; // the supervisor accepted the work as-is } $this->critique = $guidance; // the re-run reads this via critique() + $resume = $workHistory; // the next attempt continues THIS attempt's conversation $this->enforceBudget(); // the round spent tokens; stop here if the budget is gone } } finally { @@ -224,6 +315,7 @@ protected function step(string $name): void // e.g. the last step, or a step that finishes through a tool) — by CONTINUING this history, // and is persisted as it is formed. See {@see formPendingHandoff()}. $this->pendingHandoff = ['name' => $name, 'history' => $workHistory]; + $this->stepHistory[$name] = $workHistory; // kept so a later back() into this step continues its context $this->done[] = $name; $this->env->findStore()->save($this->runId, $this->captureState(), $this->done); @@ -308,7 +400,10 @@ protected function ai(string $prompt, ?array $tools = null, ?string $agent = nul $this->enforceBudget(); // refuse to start a model call once the run's total budget is spent $this->formPendingHandoff(); // a downstream step is reading: form (and persist) the previous step's handoff - return $this->runTurns($prompt, $tools, $agent, []); + $prior = $this->resumeHistory; // a re-run/back continues the prior attempt's conversation, not a cold restart + $this->resumeHistory = []; // only the attempt's first ai() continues; later calls start fresh + + return $this->runTurns($prompt, $tools, $agent, $prior); } /** @@ -367,6 +462,7 @@ private function paletteScope(?array $tools, ?string $agent): Environment $scope = $this->env->child()->set(EnvKey::Registry, $palette); $model = $agent !== null ? $this->agentModel($agent) : null; + if ($model !== null) { $scope->set(EnvKey::ModelId, $model); // route this call to the role's model } @@ -406,6 +502,7 @@ private function makeTurnLoop(Environment $scope, string $system, ?SpeakerInterf private function formPendingHandoff(): void { $pending = $this->pendingHandoff; + if ($pending === null) { return; } @@ -444,6 +541,7 @@ private function handoffContext(): string private function toolBriefing(Registry $palette): string { $tools = $palette->all(); + if ($tools === []) { return ''; } @@ -469,14 +567,17 @@ private function tracer(): ?Tracer private function withLocalTools(Registry $registry): Registry { $local = $this->localTools(); + if ($local === []) { return $registry; } $combined = new Registry(); + foreach ($registry->all() as $tool) { $combined->add($tool); } + foreach ($local as $tool) { $combined->add($tool); } @@ -497,8 +598,10 @@ private function localTools(): array } $tools = []; + foreach (new \ReflectionClass($this)->getMethods() as $method) { $attributes = $method->getAttributes(Tool::class); + if ($attributes !== []) { $tools[] = new MethodTool($this, $method, $attributes[0]->newInstance()); } @@ -511,6 +614,7 @@ private function localTools(): array private function agentModel(string $agent): ?string { $agents = $this->env->find(EnvKey::Agents); + if (\is_array($agents) && isset($agents[$agent]) && \is_string($agents[$agent]) && $agents[$agent] !== '') { return $agents[$agent]; } @@ -536,6 +640,7 @@ private function turnBudget(): ?Budget $seconds = $this->numEnv(EnvKey::TurnTimeLimit); $workflow = $this->budget(); + if ($workflow !== null) { return $workflow->child($tokens, $seconds); } @@ -554,16 +659,19 @@ private function turnBudget(): ?Budget private function enforceBudget(): void { $budget = $this->budget(); + if ($budget === null || !$budget->isExhausted()) { return; } if ($this->budgetPolicy() === BudgetPolicy::Ask) { $channel = $this->env->find(EnvKey::Ask); + if ($channel instanceof SpeakerInterface) { $extra = $this->parseExtraTokens($channel->reply( "Budget spent: {$budget->reason()}. Enter extra tokens to continue, or nothing to stop.", )); + if ($extra > 0) { $budget->raise($extra); $this->tracer()?->log('budget', "raised by {$extra} tokens", [], Level::Notice); @@ -616,11 +724,13 @@ private function stepAttribute(string $name): ?Step private function criticRubric(?Step $step, string $name): ?string { $critic = $step?->critic; + if ($critic === null || $critic === '') { return null; } $rules = $this->criticRules()[$critic] ?? null; + if ($rules === null || trim($rules) === '') { throw new \LogicException("Step '{$name}' names critic '{$critic}', but criticRules() has no rules for it."); } @@ -728,6 +838,7 @@ private function superviseStep(string $name, string $result, string $findings, i . "Reply with guidance to fix it, or 'accept' to keep it as is, or 'stop' to abort."; $reply = $channel->reply($prompt); + if ($reply === null) { if ($stuck) { throw new WorkflowException("step '{$name}' still failed review after {$round} rounds"); @@ -738,9 +849,11 @@ private function superviseStep(string $name, string $result, string $findings, i $answer = trim($reply); $lower = strtolower($answer); + if ($answer === '' || str_starts_with($lower, 'accept')) { return null; // accept the work as-is } + if (str_starts_with($lower, 'stop')) { throw new WorkflowException("run stopped at step '{$name}' by the supervisor"); } @@ -765,6 +878,7 @@ protected function tool(string $name, array $params): string $result = $this->env->executor()->call(new ToolCall($this->env->findStore()->nextId(), $name, $params)); $tracer?->toolResult($name, $result->content, $result->isError); + if ($result->isError) { return "tool '{$name}' failed: " . $result->content; } @@ -779,6 +893,7 @@ protected function tool(string $name, array $params): string protected function extractCode(string $text): string { $text = trim($text); + if (preg_match('/```(?:php)?\s*(.+?)\s*```/s', $text, $m) === 1) { return trim($m[1]); } @@ -806,12 +921,14 @@ protected function extractCode(string $text): string protected function saveGeneratedWorkflow(string $name, string $code, callable $revise): string { $result = $this->tool('define_workflow', ['name' => $name, 'code' => $code, 'shared' => true]); + if (str_contains($result, self::WORKFLOW_SAVED_MARKER)) { return $code; } $code = $revise($result); $result = $this->tool('define_workflow', ['name' => $name, 'code' => $code, 'shared' => true]); + if (!str_contains($result, self::WORKFLOW_SAVED_MARKER)) { throw new WorkflowException($result); // a second failure surfaces to the run-path } @@ -830,6 +947,7 @@ protected function saveGeneratedWorkflow(string $name, string $code, callable $r protected function ask(string $question): string { $channel = $this->env->find(EnvKey::Ask); + if (!$channel instanceof SpeakerInterface) { throw new WorkflowException('the workflow asked for input but no ask channel is configured'); } @@ -862,6 +980,7 @@ protected function log(string $action, string $message = '', array $context = [] private function stepMethods(): array { $names = []; + foreach (new \ReflectionClass($this)->getMethods() as $method) { if ($method->getAttributes(Step::class) !== []) { $names[] = $method->getName(); @@ -880,12 +999,14 @@ private function stepMethods(): array private function captureState(): array { $state = []; + foreach ($this->stateProperties() as $property) { if (!$property->isInitialized($this)) { continue; } $value = $property->getValue($this); + // The snapshot is JSON-persisted; a closure or resource is not durable state and would // corrupt the store or fail opaquely later. Fail loud here, naming the offending field. if ($value instanceof \Closure || \is_resource($value)) { @@ -929,6 +1050,7 @@ private function stateProperties(): array { $properties = []; $class = new \ReflectionClass($this); + while ($class !== false && $class->getName() !== self::class) { foreach ($class->getProperties() as $property) { if (!$property->isStatic()) { diff --git a/src/Workflow/WorkflowStore.php b/src/Workflow/WorkflowStore.php index 071aeb6..068ae88 100644 --- a/src/Workflow/WorkflowStore.php +++ b/src/Workflow/WorkflowStore.php @@ -48,11 +48,13 @@ public function write(string $name, string $code, bool $shared = false): string $this->assertValidName($name); $dir = $shared ? $this->commonDir() : $this->sessionDir(); + if (!is_dir($dir) && !mkdir($dir, 0o775, true) && !is_dir($dir)) { throw new WorkflowException("cannot create workflow directory: {$dir}"); } $path = $this->path($name, $shared); + if (file_put_contents($path, $code) === false) { throw new WorkflowException("cannot write workflow: {$path}"); } @@ -121,6 +123,7 @@ private function registerAutoloader(): void $relative = substr($class, \strlen(self::NS_ROOT) + 1); $path = $base . '/' . str_replace('\\', '/', $relative) . '.php'; + if (is_file($path)) { require $path; } diff --git a/tests/Agent/AbstractAgentTest.php b/tests/Agent/AbstractAgentTest.php index 89d5b04..5bb6134 100644 --- a/tests/Agent/AbstractAgentTest.php +++ b/tests/Agent/AbstractAgentTest.php @@ -84,6 +84,7 @@ private function agent(AgentResponse|\Throwable ...$outcomes): ScriptedRetryAgen private function assertThrows(ScriptedRetryAgent $agent): void { $threw = false; + try { $agent->send($this->request()); } catch (\Claw\Exceptions\AgentException $e) { diff --git a/tests/Agent/AgentErrorsTest.php b/tests/Agent/AgentErrorsTest.php index 4d0fef1..88efad3 100644 --- a/tests/Agent/AgentErrorsTest.php +++ b/tests/Agent/AgentErrorsTest.php @@ -28,6 +28,7 @@ public function classifiesByProviderErrorType(): void $rateLimited = AgentErrors::classify(429, 'rate_limit_error', 'x', 2000); Assert::same($rateLimited::class, RateLimitException::class); + if ($rateLimited instanceof RateLimitException) { Assert::same($rateLimited->retryAfterMs, 2000); } diff --git a/tests/Agent/ClaudeAgentTest.php b/tests/Agent/ClaudeAgentTest.php index c34f0c8..4e85b84 100644 --- a/tests/Agent/ClaudeAgentTest.php +++ b/tests/Agent/ClaudeAgentTest.php @@ -175,6 +175,7 @@ public function sendThrowsAgentExceptionOnApiError(): void $agent = new ClaudeAgent($http, 'sk-test'); $threw = false; + try { $agent->send(new AgentRequest('m', [Message::userText('x')])); } catch (AgentException $e) { @@ -199,6 +200,7 @@ public function malformed2xxBodySurfacesAsAgentExceptionNotHttpException(): void ); $caught = null; + try { $agent->send(new AgentRequest('m', [Message::userText('x')])); } catch (\Throwable $e) { @@ -221,6 +223,7 @@ public function wrapsTransportErrorAsAgentException(): void ); $threw = false; + try { $agent->send(new AgentRequest('m', [Message::userText('x')])); } catch (AgentException $e) { diff --git a/tests/Agent/ConsoleSpeakerTest.php b/tests/Agent/ConsoleSpeakerTest.php index 2b8e553..7dea955 100644 --- a/tests/Agent/ConsoleSpeakerTest.php +++ b/tests/Agent/ConsoleSpeakerTest.php @@ -16,6 +16,7 @@ public function printsTheQuestionWithARequestPromptAndReturnsTheTypedLine(): voi { $in = fopen('php://memory', 'r+'); $out = fopen('php://memory', 'r+'); + if ($in === false || $out === false) { throw new \RuntimeException('cannot open memory stream'); } @@ -37,12 +38,14 @@ public function printsTheQuestionWithARequestPromptAndReturnsTheTypedLine(): voi public function returnsNullOnEofButEmptyStringOnABlankLine(): void { $out = fopen('php://memory', 'r+'); + if ($out === false) { throw new \RuntimeException('cannot open memory stream'); } // A deliberately blank line is a real (empty) answer. $blank = fopen('php://memory', 'r+'); + if ($blank === false) { throw new \RuntimeException('cannot open memory stream'); } @@ -53,6 +56,7 @@ public function returnsNullOnEofButEmptyStringOnABlankLine(): void // Exhausted input (EOF) means no one is there to answer -> null, so the loop stops // instead of treating absence as an empty answer and churning. $empty = fopen('php://memory', 'r+'); + if ($empty === false) { throw new \RuntimeException('cannot open memory stream'); } diff --git a/tests/Agent/DefaultTurnLoopTest.php b/tests/Agent/DefaultTurnLoopTest.php index d1496c8..17de210 100644 --- a/tests/Agent/DefaultTurnLoopTest.php +++ b/tests/Agent/DefaultTurnLoopTest.php @@ -351,6 +351,7 @@ public function throwsWhenHistoryReachesTheSoftCap(): void $loop = new DefaultTurnLoop($agent, new RecordingExecutor(), 'm', 's', maxHistory: 4); $threw = false; + try { $loop->run([Message::userText('go')]); } catch (ContextLengthException) { diff --git a/tests/Chat/ConsoleChatTest.php b/tests/Chat/ConsoleChatTest.php index 772518f..f9ab558 100644 --- a/tests/Chat/ConsoleChatTest.php +++ b/tests/Chat/ConsoleChatTest.php @@ -41,6 +41,7 @@ public function sendWritesLineToOutput(): void private function memoryStream() { $stream = fopen('php://memory', 'r+'); + if ($stream === false) { throw new \RuntimeException('cannot open memory stream'); } diff --git a/tests/Chat/TelegramClientTest.php b/tests/Chat/TelegramClientTest.php index b42583d..36c99c9 100644 --- a/tests/Chat/TelegramClientTest.php +++ b/tests/Chat/TelegramClientTest.php @@ -34,6 +34,7 @@ public function getUpdatesThrowsOnApiError(): void ); $threw = false; + try { $client->getUpdates(0); } catch (HttpException $e) { diff --git a/tests/Project/ProjectStoreTest.php b/tests/Project/ProjectStoreTest.php index 299d548..cf85345 100644 --- a/tests/Project/ProjectStoreTest.php +++ b/tests/Project/ProjectStoreTest.php @@ -42,6 +42,7 @@ public function initRejectsADoubleInit(): void ProjectStore::init($projectsDir, $folder); $threw = false; + try { ProjectStore::init($projectsDir, $folder); } catch (ClawException $e) { @@ -62,6 +63,7 @@ public function initRejectsAMissingFolder(): void try { $threw = false; + try { ProjectStore::init($projectsDir, $projectsDir . '/does-not-exist'); } catch (ClawException $e) { @@ -87,6 +89,7 @@ public function discoverWalksUpToTheNearestProjectRoot(): void mkdir($sub, 0o775, true); $store = ProjectStore::discover($projectsDir, $sub); // started deep inside the tree + if (!$store instanceof ProjectStore) { throw new \RuntimeException('discover() returned null for a subdirectory of a project'); } @@ -132,6 +135,7 @@ public function addIssueOpensAnIssueInTheProjectDb(): void $pdo = new \PDO('sqlite:' . $projectsDir . '/' . $store->project()->id . '.db'); $pdo->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION); $stmt = $pdo->query('SELECT title, description, status FROM issues WHERE id = 1'); + if ($stmt === false) { throw new \RuntimeException('issues query failed'); } @@ -155,6 +159,7 @@ public function addIssueRejectsAnEmptyTitle(): void $store = self::openProject($projectsDir, $folder); $threw = false; + try { $store->addIssue(' '); } catch (ClawException $e) { @@ -219,6 +224,7 @@ private static function openProject(string $projectsDir, string $folder): Projec { ProjectStore::init($projectsDir, $folder); $store = ProjectStore::discover($projectsDir, $folder); + if (!$store instanceof ProjectStore) { throw new \RuntimeException('discover() returned null for a just-initialized project'); } diff --git a/tests/Support/ScriptedAgent.php b/tests/Support/ScriptedAgent.php index bca874e..e8bf8d7 100644 --- a/tests/Support/ScriptedAgent.php +++ b/tests/Support/ScriptedAgent.php @@ -30,6 +30,7 @@ public function send(AgentRequest $request): AgentResponse $this->requests[] = $request; $next = array_shift($this->outcomes); + if ($next === null) { throw new \RuntimeException('ScriptedAgent: no more outcomes'); } diff --git a/tests/Support/ScriptedHttpClient.php b/tests/Support/ScriptedHttpClient.php index 4514285..ca25f6d 100644 --- a/tests/Support/ScriptedHttpClient.php +++ b/tests/Support/ScriptedHttpClient.php @@ -39,6 +39,7 @@ private function next(): HttpResponse $this->calls++; $outcome = array_shift($this->outcomes); + if ($outcome === null) { throw new \RuntimeException('ScriptedHttpClient: no more outcomes'); } diff --git a/tests/Support/ScriptedRetryAgent.php b/tests/Support/ScriptedRetryAgent.php index e5e0738..9d4d696 100644 --- a/tests/Support/ScriptedRetryAgent.php +++ b/tests/Support/ScriptedRetryAgent.php @@ -39,6 +39,7 @@ protected function attempt(AgentRequest $request): AgentResponse $this->attempts++; $next = array_shift($this->outcomes); + if ($next === null) { throw new \RuntimeException('ScriptedRetryAgent: no more outcomes'); } diff --git a/tests/Tool/BashToolTest.php b/tests/Tool/BashToolTest.php index 62b845a..78ce751 100644 --- a/tests/Tool/BashToolTest.php +++ b/tests/Tool/BashToolTest.php @@ -38,6 +38,7 @@ public function runsInCwdWithScrubbedEnv(): void $bash = new BashTool($dir); $pwd = $bash->handle(['command' => 'pwd']); + if (DIRECTORY_SEPARATOR === '\\') { // MSYS/Git `sh` reports a POSIX path (/tmp/…) that can't equal PHP's // realpath (C:\…\Temp\…); assert it ran in the right dir by name. @@ -55,6 +56,7 @@ public function runsInCwdWithScrubbedEnv(): void public function requiresCommand(): void { $threw = false; + try { new BashTool(sys_get_temp_dir())->handle([]); } catch (ToolException $e) { diff --git a/tests/Tool/DateToolTest.php b/tests/Tool/DateToolTest.php index 304b7c1..ead0ed2 100644 --- a/tests/Tool/DateToolTest.php +++ b/tests/Tool/DateToolTest.php @@ -26,6 +26,7 @@ public function returnsCurrentDateInRequestedFormat(): void public function rejectsInvalidTimezone(): void { $threw = false; + try { new DateTool()->handle(['timezone' => 'Nowhere/Bad']); } catch (ToolException $e) { diff --git a/tests/Tool/FileToolsTest.php b/tests/Tool/FileToolsTest.php index f510ee5..f7db692 100644 --- a/tests/Tool/FileToolsTest.php +++ b/tests/Tool/FileToolsTest.php @@ -69,6 +69,7 @@ private function workspace(): Workspace private function assertToolError(callable $fn): void { $threw = false; + try { $fn(); } catch (ToolException $e) { diff --git a/tests/Tool/ListFilesToolTest.php b/tests/Tool/ListFilesToolTest.php index 7c57c19..7c8968d 100644 --- a/tests/Tool/ListFilesToolTest.php +++ b/tests/Tool/ListFilesToolTest.php @@ -69,6 +69,7 @@ private function workspace(): Workspace private function assertToolError(callable $fn): void { $threw = false; + try { $fn(); } catch (ToolException $e) { diff --git a/tests/Tool/PhpEvalToolTest.php b/tests/Tool/PhpEvalToolTest.php index 5c2e9b8..326addc 100644 --- a/tests/Tool/PhpEvalToolTest.php +++ b/tests/Tool/PhpEvalToolTest.php @@ -35,6 +35,7 @@ public function reportsErrorsAndMissingCode(): void private function assertToolError(callable $fn): void { $threw = false; + try { $fn(); } catch (ToolException $e) { diff --git a/tests/Tool/RecallToolTest.php b/tests/Tool/RecallToolTest.php index b2691c3..141e04a 100644 --- a/tests/Tool/RecallToolTest.php +++ b/tests/Tool/RecallToolTest.php @@ -49,6 +49,7 @@ public function missingNameForAStepRecallIsAnError(): void $tool = new RecallTool(new TraceReader($pdo), '1'); $threw = false; + try { $tool->handle(['what' => 'step']); // no name } catch (\Claw\Exceptions\ToolException) { diff --git a/tests/Tool/RegistryTest.php b/tests/Tool/RegistryTest.php index 34e4a30..32a1147 100644 --- a/tests/Tool/RegistryTest.php +++ b/tests/Tool/RegistryTest.php @@ -32,6 +32,7 @@ public function addGetHasAll(): void public function unknownToolThrows(): void { $threw = false; + try { new Registry()->get('nope'); } catch (ToolException $e) { @@ -96,6 +97,7 @@ public function onlyThrowsOnAnUnknownName(): void $registry->add(new StubTool('read')); $threw = false; + try { $registry->only(['read', 'ghost']); } catch (ToolException $e) { diff --git a/tests/Tool/ScheduleToolTest.php b/tests/Tool/ScheduleToolTest.php index 1c81b8d..09df8d6 100644 --- a/tests/Tool/ScheduleToolTest.php +++ b/tests/Tool/ScheduleToolTest.php @@ -38,6 +38,7 @@ public function deliversTheMessageAfterTheDelay(): void public function rejectsNonPositiveDelay(): void { $threw = false; + try { new ScheduleTool(static function (string $m): void { })->handle(['after_seconds' => 0, 'message' => 'x']); @@ -52,6 +53,7 @@ public function rejectsNonPositiveDelay(): void public function rejectsEmptyMessage(): void { $threw = false; + try { new ScheduleTool(static function (string $m): void { })->handle(['after_seconds' => 1, 'message' => ' ']); diff --git a/tests/Tool/WorkspaceTest.php b/tests/Tool/WorkspaceTest.php index 351c19c..fa4dfe7 100644 --- a/tests/Tool/WorkspaceTest.php +++ b/tests/Tool/WorkspaceTest.php @@ -56,6 +56,7 @@ private function workspace(): Workspace private function assertRejected(callable $fn): void { $threw = false; + try { $fn(); } catch (ToolException $e) { diff --git a/tests/Trace/ConsoleTraceSinkTest.php b/tests/Trace/ConsoleTraceSinkTest.php index b4a618c..8a18cf2 100644 --- a/tests/Trace/ConsoleTraceSinkTest.php +++ b/tests/Trace/ConsoleTraceSinkTest.php @@ -50,6 +50,7 @@ public function quietThresholdShowsOnlyMilestonesAndErrors(): void private function capture(Level $threshold, callable $run): string { $stream = fopen('php://memory', 'r+'); + if ($stream === false) { throw new \RuntimeException('cannot open memory stream'); } diff --git a/tests/Trace/TracerTest.php b/tests/Trace/TracerTest.php index fe74668..3a2f5e1 100644 --- a/tests/Trace/TracerTest.php +++ b/tests/Trace/TracerTest.php @@ -86,6 +86,7 @@ public function closingAParentEmitsMatchingExitsForStillOpenChildren(): void // Every opened span has exactly one matching exit — no unbalanced enter. $opened = []; $closed = []; + foreach ($sink->records as $rec) { if ($rec->phase() === 'enter') { $opened[] = $rec->id(); @@ -99,6 +100,7 @@ public function closingAParentEmitsMatchingExitsForStillOpenChildren(): void // The orphaned child was closed before its parent, inheriting its own level. $aiExit = null; + foreach ($sink->records as $rec) { if ($rec->phase() === 'exit' && $rec->id() === $ai) { $aiExit = $rec; @@ -124,6 +126,7 @@ public function traceStorePersistsEveryRecord(): void $tracer->exit($wf); $stmt = $pdo->query('SELECT run_id, phase, type, data FROM trace ORDER BY seq'); + if ($stmt === false) { throw new \RuntimeException('trace query failed'); } diff --git a/tests/Workflow/GenerateIssueWorkflowTest.php b/tests/Workflow/GenerateIssueWorkflowTest.php index 4e4f21f..8272c89 100644 --- a/tests/Workflow/GenerateIssueWorkflowTest.php +++ b/tests/Workflow/GenerateIssueWorkflowTest.php @@ -41,16 +41,15 @@ public function generatesAndSavesASolverWorkflowForTheIssue(): void $registry->add(new DefineWorkflowTool($store, new WorkflowValidator())); // Each step calls ai(); between steps the engine forms a handoff (continuing the step's - // conversation), so those replies are interleaved: plan, [handoff], difficulty, [handoff], - // code, [handoff], OK. + // conversation). draft is critic-gated, so after the code comes the solverReview critic's + // verdict: plan, [handoff], difficulty, [handoff], code, OK(critic). save makes no model call. $agent = new ScriptedAgent( self::answer('Plan: read the file, then summarize it.'), self::answer('handoff after understand'), self::answer('simple — a localized, mechanical change.'), self::answer('handoff after assess'), self::answer(self::solverCode('Issue7Solver')), - self::answer('handoff after draft'), - self::answer('OK'), + self::answer('OK'), // the solverReview critic passes the drafted solver ); $env = new Environment() diff --git a/tests/Workflow/WorkflowAbstractTest.php b/tests/Workflow/WorkflowAbstractTest.php index 4be8195..0d34dbc 100644 --- a/tests/Workflow/WorkflowAbstractTest.php +++ b/tests/Workflow/WorkflowAbstractTest.php @@ -22,6 +22,7 @@ use Claw\Tool\ToolInterface; use Claw\Trace\ArrayTraceSink; use Claw\Trace\Tracer; +use Claw\Trace\TraceRecordInterface; use Claw\Workflow\BudgetPolicy; use Claw\Workflow\Environment; use Claw\Workflow\EnvKey; @@ -241,6 +242,59 @@ public function run(): void Assert::same($this->stepNames($sink), ['b']); } + #[Test] + public function backReRunsTheTargetStepOnwardAndIsJournaled(): void + { + $sink = new ArrayTraceSink(); + $wf = new class ($this->config(tracer: new Tracer('r1', $sink)), 'r1') extends WorkflowAbstract { + public string $trail = ''; + + private int $backs = 0; + + public function name(): string + { + return 'backer'; + } + + #[Step] + public function a(): void + { + $this->trail .= 'a'; + } + + #[Step] + public function b(): void + { + $this->trail .= 'b'; + } + + #[Step] + public function c(): void + { + $this->trail .= 'c'; + + if ($this->backs === 0) { + ++$this->backs; // once, so the run terminates + $this->back('a', 'redo from a'); // send the run back to step a + } + } + }; + + $wf->run(); + + // a,b,c ran, then back('a') re-ran a,b,c — the driver honored the backward jump. + Assert::same($wf->trail, 'abcabc'); + Assert::same($this->stepNames($sink), ['a', 'b', 'c', 'a', 'b', 'c']); + + // and the jump is in the journal, with where it came from / went to / why. + $backs = array_values(array_filter( + $sink->records, + static fn (TraceRecordInterface $record): bool => $record->event()->type === 'back', + )); + Assert::count($backs, 1); + Assert::same($backs[0]->event()->data, ['from' => 'c', 'to' => 'a', 'reason' => 'redo from a']); + } + #[Test] public function logTracesANote(): void { @@ -250,6 +304,7 @@ public function logTracesANote(): void $wf->callLog('did-thing', 'the details'); $notes = []; + foreach ($sink->records as $record) { if ($record->event()->type === 'note') { $notes[] = $record->event()->data; @@ -299,6 +354,7 @@ public function askThrowsWhenNoChannelIsConfigured(): void $wf = new ProbeWorkflow($this->config(), 'r1'); // no EnvKey::Ask set $threw = false; + try { $wf->callAsk('anyone?'); } catch (WorkflowException) { @@ -316,6 +372,7 @@ public function aiThrowsWhenTheRunBudgetIsSpent(): void $wf = new ProbeWorkflow($this->config()->set(EnvKey::Budget, $budget), 'r1'); $threw = false; + try { $wf->callAi('hi'); } catch (WorkflowException) { @@ -333,6 +390,7 @@ public function stepThrowsWhenTheRunBudgetIsSpent(): void $wf = new ProbeWorkflow($this->config()->set(EnvKey::Budget, $budget), 'r1'); $threw = false; + try { $wf->callStep('alpha'); } catch (WorkflowException) { @@ -392,6 +450,7 @@ public function reply(string $incoming): string $wf = new ProbeWorkflow($env, 'r1'); $threw = false; + try { $wf->callAi('hi'); } catch (WorkflowException) { @@ -420,6 +479,7 @@ public function make(): string }; $threw = false; + try { $wf->run(); } catch (\LogicException) { @@ -515,6 +575,7 @@ public function emit(): void $wf->run(); $artifacts = []; + foreach ($sink->records as $record) { if ($record->event()->type === 'artifact') { $artifacts[] = $record->event()->data['label'] . ':' . $record->event()->data['kind']; @@ -550,6 +611,7 @@ public function make(): string }; $threw = false; + try { $wf->run(); } catch (WorkflowException) { @@ -852,8 +914,10 @@ private function config( private function stepNames(ArrayTraceSink $sink): array { $names = []; + foreach ($sink->records as $record) { $event = $record->event(); + if ($event->type === 'step') { $names[] = (string) ($event->data['name'] ?? ''); }