Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,10 @@ jobs:
- name: Typecheck
run: pnpm -r typecheck

- name: Test
run: pnpm -r test
# Runs every package's tests under v8 coverage and fails if any package
# drops below its per-package thresholds (see each vitest.config.ts).
- name: Test + coverage
run: pnpm coverage

- name: Lint
run: pnpm lint
Expand Down
15 changes: 10 additions & 5 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ A **pnpm workspace** of OpenCode plugins under the `@vymalo` npm scope. There ar
| `packages/opencode-oauth2` → `@vymalo/opencode-oauth2` | OAuth2 / OIDC auth + dynamic model discovery for OpenAI-compatible providers. The mature plugin; five auth flows (`authorization_code`, `device_code`, `client_credentials`, `jwt_bearer`, `token_exchange`), persistent token cache, periodic sync scheduler. PKCE is on by default for the two interactive flows (`pkce: false` opts out per server). |
| `packages/opencode-models-info` → `@vymalo/opencode-models-info` | **Auth-agnostic** metadata enrichment plugin: fetches OpenRouter-shaped `/models` JSON and merges `limit` / `cost` / `modalities` / capability flags onto existing provider model entries. Runs as a `Hooks.config` hook *after* other plugins. |
| `packages/opencode-ratelimit` → `@vymalo/opencode-ratelimit` | **Auth-agnostic** rate-limit awareness plugin: in its `Hooks.config` hook it injects a custom `fetch` onto opted-in providers (`options.meta.rateLimit`) that reads Envoy Gateway / IETF draft-03 rate-limit headers (`x-ratelimit-limit/remaining/reset`), proactively throttles when `remaining` hits 0, and backs off + retries on `429`. Supports `tiers` (reset-magnitude policy bands with `wait`/`error` actions, so a 60s burst waits but a multi-day budget reset errors fast) and `scope: "model"\|"provider"` (per-model cooldown buckets for per-model gateway limits). The only response-observing plugin — OpenCode has no post-response hook, so wrapping `options.fetch` is the sole interception point. In-memory state only (no `cache.ts`). See [`docs/ratelimit.md`](docs/ratelimit.md). |
| `packages/opencode-browser` → `@vymalo/opencode-browser` | **Auth-agnostic** browser-automation plugin: registers `browser_*` **tools** (`Hooks.tool`) the model calls (open, click, type, scroll, screenshot, snapshot, …) and hosts a localhost WebSocket **bridge** (via the Node `ws` package, so it runs under both Bun and Node) that the companion extension dials. **33 tools** in three groups (`page`/`control`/`debug`, gated by the `groups` option); tabs are organized into **named groups**. The single source of truth for the tool surface is `catalog.ts` (shared with the MCP server). The bridge is an **auto-elect broker** (`broker.ts`) routing between **agents** (plugin/MCP/sessions) and **executors** (extensions) by named-group ownership — so multiple browsers and multiple agents can share one bridge. The only tool-registering plugin. Screenshots are written to disk (tool output is text-only). See [`docs/browser.md`](docs/browser.md) and [`plans/multi-client-routing.md`](plans/multi-client-routing.md). |
| `packages/opencode-browser` → `@vymalo/opencode-browser` | **Auth-agnostic** browser-automation plugin: registers `browser_*` **tools** (`Hooks.tool`) the model calls (open, click, type, scroll, screenshot, snapshot, …) and hosts a localhost WebSocket **bridge** (via the Node `ws` package, so it runs under both Bun and Node) that the companion extension dials. **34 tools** in four groups (`page`/`control`/`debug`/`interactive`, gated by the `groups` option; `debug` and `interactive` are opt-in); tabs are organized into **named groups**. The single source of truth for the tool surface is `catalog.ts` (shared with the MCP server). The bridge is an **auto-elect broker** (`broker.ts`) routing between **agents** (plugin/MCP/sessions) and **executors** (extensions) by named-group ownership — so multiple browsers and multiple agents can share one bridge. The only tool-registering plugin. Screenshots are written to disk (tool output is text-only). The `interactive` group adds **human-in-the-loop** feedback (`browser_request_feedback`): a blocking, branded in-page overlay (point/confirm/choose) that the broker can tear down via a `cancel` frame on abort/timeout — see [`plans/ui-feedback.md`](plans/ui-feedback.md). See [`docs/browser.md`](docs/browser.md) and [`plans/multi-client-routing.md`](plans/multi-client-routing.md). |
| `packages/opencode-browser-mcp` → `@vymalo/opencode-browser-mcp` | **MCP stdio server** (a `bin`) that hosts the same bridge (Node `ws` transport) and exposes the same group-filtered `browser_*` catalog over the Model Context Protocol — so non-OpenCode agents (Claude Code, Cursor, Cline, …) can drive the extension. Reuses `@vymalo/opencode-browser`'s catalog + JSON-Schema via `./lib`; returns screenshots as inline MCP image content. |
| `apps/browser-extension` → `@vymalo/opencode-browser-extension` (private) | Companion Chromium MV3 + Firefox extension for the browser plugin/MCP server. WXT + React + Tailwind + shadcn-style UI + TanStack Query + Dexie/IndexedDB. Its background worker connects out to the bridge and drives tabs via CDP (`chrome.debugger`) or a content-script fallback. **Not** published to npm. |
| `packages/plugin-bundle` → `@vymalo/opencode-oauth2-bundle` (private) | Rolldown build that ships a single-file distribution of the oauth2 plugin. |
Expand All @@ -24,13 +24,16 @@ The plugins are deliberately decoupled: `opencode-models-info`, `opencode-rateli
pnpm install # bootstrap workspace
pnpm -r build # compile all packages (tsc → dist/)
pnpm -r typecheck # tsc --noEmit across packages
pnpm -r test # vitest run in each package that has tests
pnpm -r test # vitest run in each package that has tests (fast, no coverage)
pnpm coverage # vitest run --coverage per package; FAILS below per-package thresholds
pnpm lint # biome lint (full repo)
pnpm format # biome format --write
pnpm format:check # biome format (no write) — part of the pre-push gate
```

Pre-push gate (run all five before opening a PR): `pnpm -r build && pnpm -r typecheck && pnpm -r test && pnpm lint && pnpm format:check`.
Pre-push gate (run all five before opening a PR): `pnpm -r build && pnpm -r typecheck && pnpm coverage && pnpm lint && pnpm format:check`. (`pnpm coverage` runs the tests **and** enforces coverage; CI runs the same. Use the faster `pnpm -r test` for local iteration.)

Coverage thresholds are per-package, declared in each `vitest.config.ts` (`test.coverage.thresholds`), set a few points below current so a regression fails CI without exact-match churn. `@vymalo/opencode-browser` is the bar (~88%+); `opencode-browser-mcp` excludes its stdio `bin` (`mcp.ts`) from the metric (it's e2e-only); the **browser extension floor is intentionally low** (chrome/DOM/React glue is verified manually — raise it once a fake-browser harness lands).

Per-package iteration (much faster):

Expand Down Expand Up @@ -95,7 +98,7 @@ packages/<plugin>/

`opencode-ratelimit` follows the same shape **minus `cache.ts`** (its rate-limit state is in-memory only — a reset window is seconds, so persisting it would only serve stale data) and **plus `headers.ts`** (a pure parser for the `x-ratelimit-*` triple). It is also the only plugin that injects a custom `fetch` into `provider.options.fetch` (the sole way to observe response status/headers, since OpenCode has no post-response hook) rather than only reading/merging config.

`opencode-browser` follows the same shape **minus `cache.ts`** (broker state is in-memory) and **plus**: `protocol.ts` (the dependency-free wire-frame contract, mirrored into the extension), `transport.ts` (the `BridgeTransport` seam + `isAddrInUse`) and `node-transport.ts` (the `ws`-backed host transport + guest socket, runs under Bun *and* Node — shared with the MCP server via `./lib`; async `listen` for bind-based election), `broker.ts` (role-aware broker: executors + agents + group-ownership routing, DI-tested), `agent-client.ts` (guest-agent WS client), `endpoint.ts` (try-bind → host-or-guest auto-election with failover), `token-file.ts` (shared `bridge.json`), `catalog.ts` + `schema.ts` (the neutral tool surface), and `tools.ts` (the OpenCode `Hooks.tool` adapter over the catalog). It is the only plugin that **registers tools** and **hosts a server**. The companion extension under `apps/browser-extension` is a WXT project (not the per-package `src/` layout) — its engine lives in `src/background/` (bridge client, command router, group registry, CDP + content executors, `page-actions` injected via `chrome.scripting.executeScript`) and its UI in `src/entrypoints/{popup,options}` over Dexie.
`opencode-browser` follows the same shape **minus `cache.ts`** (broker state is in-memory) and **plus**: `protocol.ts` (the dependency-free wire-frame contract, mirrored into the extension), `transport.ts` (the `BridgeTransport` seam + `isAddrInUse`) and `node-transport.ts` (the `ws`-backed host transport + guest socket, runs under Bun *and* Node — shared with the MCP server via `./lib`; async `listen` for bind-based election), `broker.ts` (role-aware broker: executors + agents + group-ownership routing, DI-tested), `agent-client.ts` (guest-agent WS client), `endpoint.ts` (try-bind → host-or-guest auto-election with failover), `token-file.ts` (shared `bridge.json`), `catalog.ts` + `schema.ts` (the neutral tool surface), and `tools.ts` (the OpenCode `Hooks.tool` adapter over the catalog). It is the only plugin that **registers tools** and **hosts a server**. The companion extension under `apps/browser-extension` is a WXT project (not the per-package `src/` layout) — its engine lives in `src/background/` (bridge client, command router, group registry, CDP + content executors, `page-actions` injected via `chrome.scripting.executeScript`, `feedback`/`feedback-overlay`/`feedback-side-panel` for the `interactive` HITL flow) and its UI in `src/entrypoints/{popup,options,sidepanel}` over Dexie (the `sidepanel` is the docked annotation fallback for overlay-blocked pages).

**Important — two entry points per published package:**

Expand All @@ -119,7 +122,7 @@ When changing the mapping in [`packages/opencode-models-info/src/mapping.ts`](pa

- **Biome, not ESLint/Prettier.** Config in [`biome.json`](biome.json) — double quotes, 100-col, no trailing commas, semicolons always. `noNonNullAssertion` is a warning the existing code stays clean of; mirror that in new code (`@vymalo/opencode-oauth2` has 0 warnings, treat that as the bar).
- **Strict TS.** Base config is in [`tsconfig.base.json`](tsconfig.base.json) — `ES2022` + `NodeNext` + `strict: true`. Per-package tsconfig only sets `rootDir`/`outDir`. `lib.ts` re-exports are the public surface.
- **Vitest** is the test runner; each package owns a `vitest.config.ts`. Tests live in `test/`, not co-located.
- **Vitest** is the test runner; each package owns a `vitest.config.ts` (with a `coverage` block + per-package thresholds enforced by `pnpm coverage`). Tests live in `test/`, not co-located. Coverage uses the v8 provider (`@vitest/coverage-v8`).
- **Node ≥ 22** for the runtime packages (set in each package.json `engines`). Use `node:` prefixed imports for built-ins (`node:fs/promises`, `node:crypto`).
- **Logging pattern**: every plugin emits structured events through both a JSON console fallback and `client.app.log` (so the host log stream picks them up). Event names use `snake_case` (`models_info_cache_hit`, `oauth2_token_refreshed`). Add new events to that pattern, not ad-hoc `console.log`.
- **Cache layout** mirrors per-OS conventions — `~/Library/Caches/<ns>/` on macOS, `XDG_CACHE_HOME` on Linux, `LOCALAPPDATA` on Windows. Each plugin uses its own namespace (`opencode-oauth2`, `opencode-models-info`). Disk writes are atomic-rename + `0o600`.
Expand All @@ -138,5 +141,7 @@ Versions are bumped **manually** — there are no changesets and no release scri

- [`plans/prd.md`](plans/prd.md) — original oauth2 PRD with the phased roadmap.
- [`plans/models-info-plan.md`](plans/models-info-plan.md) — design doc for the metadata plugin, including the OpenRouter→OpenCode field mapping table.
- [`plans/multi-client-routing.md`](plans/multi-client-routing.md) — design (final) for the browser bridge's auto-elect broker: multi-executor + multi-agent routing by group ownership, host-or-guest election, failover.
- [`plans/ui-feedback.md`](plans/ui-feedback.md) — design (draft) for human-in-the-loop browser feedback: a `browser_request_feedback` tool that paints an annotation overlay and blocks on the user; needs a `CancelFrame` + per-command timeout first.
- [`docs/`](docs/) — the architecture doc is canonical for hook behavior. Also: [`well-known.md`](docs/well-known.md) (`.well-known/opencode` distribution), [`models-info.md`](docs/models-info.md) (enrichment composition + caching), [`ratelimit.md`](docs/ratelimit.md) (rate-limit policy/tiers), [`browser.md`](docs/browser.md) (browser-automation dual plugin — topology, wire protocol, tool reference, executors, security), [`troubleshooting.md`](docs/troubleshooting.md) (symptom-keyed fixes), plus GitHub Actions / Kubernetes cookbooks and local-dev setup.
- [`docs/adr/`](docs/adr/) — Architecture Decision Records: load-bearing, non-obvious decisions and *why* (e.g. [ADR-0001](docs/adr/0001-bridge-transport-ws-not-bun-serve-or-socketio.md) — the browser bridge uses `ws`, not `Bun.serve` or socket.io). Add one when a choice closes off alternatives someone would reasonably reach for.
1 change: 1 addition & 0 deletions apps/browser-extension/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"submit:firefox": "wxt submit --firefox-zip .output/*-firefox.zip --firefox-sources-zip .output/*-sources.zip",
"typecheck": "wxt prepare && tsc --noEmit",
"test": "vitest run",
"coverage": "vitest run --coverage",
"lint": "biome lint .",
"format": "biome format --write .",
"format:check": "biome format ."
Expand Down
8 changes: 8 additions & 0 deletions apps/browser-extension/src/background/bridge-client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ export interface BridgeClientDeps {
getConfig: () => Promise<BridgeClientConfig>;
/** Execute one command and resolve with its result data. */
onCommand: (frame: CommandFrame) => Promise<unknown>;
/**
* The broker abandoned an in-flight command (agent abort / timeout / agent
* gone) — tear down any UI/work for that command id. No result is sent.
*/
onCancel?: (id: string) => void;
/** Executor kind to publish in the status row, for the UI. */
executorKind: () => ExecutorKind;
/**
Expand Down Expand Up @@ -169,6 +174,9 @@ export class BridgeClient {
case "release":
await this.deps.onRelease?.();
return;
case "cancel":
this.deps.onCancel?.(frame.id);
return;
case "ping":
ws.send(encodeFrame({ v: PROTOCOL_VERSION, type: "pong" }));
return;
Expand Down
49 changes: 49 additions & 0 deletions apps/browser-extension/src/background/command-router.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import { recordAction, recordScreenshot } from "../shared/db";
import type { CommandFrame } from "../shared/protocol";
import type { Executor, Viewport } from "./executor";
import { startFeedback } from "./feedback";
import type { FeedbackMode, FeedbackRequest } from "./feedback-overlay";
import type { GroupRegistry } from "./group-registry";
import { runPageAction, type Target } from "./page-actions";

Expand All @@ -19,11 +21,34 @@ function target(params: Record<string, unknown>): Target {
* (and screenshot) to IndexedDB for the dashboard — including failures.
*/
export class CommandRouter {
/**
* Teardown callbacks for in-flight cancellable commands, keyed by command id.
* Long-running interactive commands (e.g. a feedback overlay) register here so
* a broker `cancel` can abort them; ordinary commands never register.
*/
private readonly cancellers = new Map<string, () => void>();

constructor(
private readonly registry: GroupRegistry,
private readonly executor: Executor
) {}

/** Register a teardown for a cancellable command; returns a disposer. */
registerCanceller(id: string, teardown: () => void): () => void {
this.cancellers.set(id, teardown);
return () => this.cancellers.delete(id);
}

/** Broker abandoned command `id` — run and drop its teardown if present. */
cancel(id: string): void {
const teardown = this.cancellers.get(id);
if (!teardown) {
return;
}
this.cancellers.delete(id);
teardown();
}

async handle(frame: CommandFrame): Promise<unknown> {
const start = Date.now();
try {
Expand All @@ -48,6 +73,9 @@ export class CommandRouter {
durationMs: Date.now() - start
});
throw err;
} finally {
// The command settled on its own; drop any teardown it registered.
this.cancellers.delete(frame.id);
}
}

Expand Down Expand Up @@ -291,6 +319,27 @@ export class CommandRouter {
await this.executor.releaseAll();
return { data: { ok: true }, summary: "released control" };
}
case "request_feedback": {
const tabId = this.tab(group, params);
const req: FeedbackRequest = {
mode: (params.mode as FeedbackMode) ?? "confirm",
prompt: params.prompt as string | undefined,
options: Array.isArray(params.options) ? (params.options as string[]) : undefined,
timeoutMs: typeof params.timeoutMs === "number" ? params.timeoutMs : 120_000
};
const handle = startFeedback(tabId, frame.id, req);
// Register before awaiting so a broker `cancel` mid-wait can tear it down.
this.registerCanceller(frame.id, handle.cancel);
const result = await handle.result;
const summary = result.error
? `feedback unavailable: ${result.error}`
: result.timedOut
? "feedback timed out"
: result.responded
? `feedback: ${result.annotations.map((a) => a.kind).join(",") || "none"}`
: "feedback dismissed";
return { data: result, summary };
}
default:
throw new Error(`unknown action: ${action}`);
}
Expand Down
Loading