diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 6d39c37..0000000 Binary files a/.DS_Store and /dev/null differ diff --git a/.agents/plugins/marketplace.json b/.agents/plugins/marketplace.json new file mode 100644 index 0000000..01fd1da --- /dev/null +++ b/.agents/plugins/marketplace.json @@ -0,0 +1,32 @@ +{ + "name": "braintrust-codex-plugins", + "interface": { + "displayName": "Braintrust Codex Plugins" + }, + "plugins": [ + { + "name": "braintrust", + "source": { + "source": "local", + "path": "./plugins/braintrust-codex-plugin" + }, + "policy": { + "installation": "AVAILABLE", + "authentication": "ON_INSTALL" + }, + "category": "Developer tools" + }, + { + "name": "trace-codex", + "source": { + "source": "local", + "path": "./plugins/trace-codex" + }, + "policy": { + "installation": "AVAILABLE", + "authentication": "ON_INSTALL" + }, + "category": "Observability" + } + ] +} diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..57dc956 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,24 @@ +name: CI + +on: + pull_request: + push: + branches: [main] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + + - name: Install Bun + uses: oven-sh/setup-bun@0c5077e51419868618aeaa5fe8019c62421857d6 # v2 + with: + bun-version: latest + + - name: Install dependencies + run: bun install --frozen-lockfile + working-directory: plugins/trace-codex + + - name: Run tests + run: make test diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..9d7eefe --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,93 @@ +name: Release trace-codex + +on: + workflow_dispatch: + inputs: + version: + description: "Version to release, e.g. 0.2.0 (no leading v)" + required: true + type: string + +permissions: + contents: write + +env: + PLUGIN_DIR: plugins/trace-codex + MANIFEST: plugins/trace-codex/.codex-plugin/plugin.json + +jobs: + release: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + + - name: Require main branch + run: | + if [ "${{ github.ref }}" != "refs/heads/main" ]; then + echo "::error::Releases must run on main (got '${{ github.ref }}')." + exit 1 + fi + + - name: Validate version format + run: | + if ! printf '%s' "${{ inputs.version }}" | grep -Eq '^[0-9]+\.[0-9]+\.[0-9]+$'; then + echo "::error::Version '${{ inputs.version }}' is not valid semver (expected MAJOR.MINOR.PATCH, no leading v)." + exit 1 + fi + + - name: Set tag + id: vars + run: echo "tag=trace-codex-v${{ inputs.version }}" >> "$GITHUB_OUTPUT" + + - name: Ensure tag does not already exist + run: | + if git rev-parse "refs/tags/${{ steps.vars.outputs.tag }}" >/dev/null 2>&1; then + echo "::error::Tag ${{ steps.vars.outputs.tag }} already exists." + exit 1 + fi + + - name: Install Bun + uses: oven-sh/setup-bun@0c5077e51419868618aeaa5fe8019c62421857d6 # v2 + with: + bun-version: latest + + - name: Update plugin manifest version + run: | + tmp="$(mktemp)" + sed -E 's/("version"[[:space:]]*:[[:space:]]*")[^"]*(")/\1${{ inputs.version }}\2/' "$MANIFEST" > "$tmp" + mv "$tmp" "$MANIFEST" + echo "Updated $MANIFEST:" + grep '"version"' "$MANIFEST" + # Fail if the bump did not take (e.g. unexpected manifest format). + grep -q '"version": "${{ inputs.version }}"' "$MANIFEST" + + - name: Commit, tag, and push + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add "$MANIFEST" + git commit -m "chore(trace-codex): release v${{ inputs.version }}" + git tag "${{ steps.vars.outputs.tag }}" + git push origin HEAD + git push origin "${{ steps.vars.outputs.tag }}" + + - name: Build release binaries + working-directory: ${{ env.PLUGIN_DIR }} + run: | + bun install --frozen-lockfile + # Cross-compiles every target (darwin/linux x arm64/x64) into bin/. + bun run build + ls -la bin/ + + - name: Create GitHub release with autogenerated notes + env: + GH_TOKEN: ${{ github.token }} + run: | + gh release create "${{ steps.vars.outputs.tag }}" \ + --title "${{ steps.vars.outputs.tag }}" \ + --target "${{ github.sha }}" \ + --generate-notes \ + "$PLUGIN_DIR"/bin/codex-hook-darwin-arm64 \ + "$PLUGIN_DIR"/bin/codex-hook-darwin-x64 \ + "$PLUGIN_DIR"/bin/codex-hook-linux-x64 \ + "$PLUGIN_DIR"/bin/codex-hook-linux-arm64 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..496ee2c --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.DS_Store \ No newline at end of file diff --git a/AGENTS.md b/AGENTS.md index dca9f4e..d2016b0 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -4,20 +4,34 @@ Guidelines for AI agents working in this repo. ## Repo purpose -This repo packages the [Braintrust MCP server](https://www.braintrust.dev/docs/integrations/developer-tools/mcp) as a [Codex marketplace plugin](https://developers.openai.com/codex/plugins). The key files are: +This repo is a monorepo of two independent [Codex marketplace plugins](https://developers.openai.com/codex/plugins): -- `.codex-plugin/plugin.json` — plugin manifest (version, UI metadata, default prompts) -- `.mcp.json` — MCP server definition -- `skills/braintrust/` — agent skills exposed through the plugin +- `plugins/braintrust-codex-plugin/` — packages the [Braintrust MCP server](https://www.braintrust.dev/docs/integrations/developer-tools/mcp) plus a routing skill. +- `plugins/trace-codex/` — an opt-in plugin that traces Codex sessions to Braintrust (session, turn, and tool spans) via Codex lifecycle hooks. Do **not** merge tracing behavior into the MCP/skills plugin; they are separate, independently installable plugins. + +Both plugins are listed as separate entries in `.agents/plugins/marketplace.json` (the repo marketplace). + +Key files for the MCP/skills plugin: + +- `plugins/braintrust-codex-plugin/.codex-plugin/plugin.json` — plugin manifest (version, UI metadata, default prompts) +- `plugins/braintrust-codex-plugin/.mcp.json` — MCP server definition +- `plugins/braintrust-codex-plugin/skills/braintrust/` — agent skills exposed through the plugin + +Key files for the tracing plugin: + +- `plugins/trace-codex/.codex-plugin/plugin.json` — plugin manifest +- `plugins/trace-codex/hooks/hooks.json` — lifecycle hook config +- `plugins/trace-codex/src/` — the hook client + event server (compiled to `bin/codex-hook`) ## Making changes - **Skills**: There is only one simple skill in this repo which handles routing and tool definitions, it should not be modified significantly. -- **MCP config**: edit `.mcp.json` to change the MCP server command or environment variables. -- **Plugin metadata**: edit `.codex-plugin/plugin.json` for display name, description, brand color, default prompts, etc. +- **MCP config**: edit `plugins/braintrust-codex-plugin/.mcp.json` to change the MCP server command or environment variables. +- **Plugin metadata**: edit the relevant `.codex-plugin/plugin.json` for display name, description, brand color, default prompts, etc. +- **Marketplace**: edit `.agents/plugins/marketplace.json` to change plugin entries, categories, or install policies. ## Releasing a new version -1. Bump `"version"` in `.codex-plugin/plugin.json`. +1. Bump `"version"` in the relevant plugin's `.codex-plugin/plugin.json`. 2. Commit, tag, and create a GitHub release (see README for exact commands). 3. Do **not** skip the git tag — releases are tracked via tags so users can see a changelog. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f8a1f4a --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Braintrust Data, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..917cb89 --- /dev/null +++ b/Makefile @@ -0,0 +1,30 @@ +# Root Makefile. +# +# Delegates to each plugin's own Makefile so CI only needs to run `make test` +# here. When a new plugin gains a Makefile, add it to PLUGIN_DIRS below. + +SHELL := /bin/bash + +PLUGIN_DIRS := plugins/trace-codex + +.PHONY: test build clean $(PLUGIN_DIRS) + +# Default: run every plugin's `test` target. +test: $(PLUGIN_DIRS) + +# Run `make test` in each plugin directory. +$(PLUGIN_DIRS): + @echo "==> $@" + $(MAKE) -C $@ test + +build: + @for dir in $(PLUGIN_DIRS); do \ + echo "==> build $$dir"; \ + $(MAKE) -C $$dir build; \ + done + +clean: + @for dir in $(PLUGIN_DIRS); do \ + echo "==> clean $$dir"; \ + $(MAKE) -C $$dir clean; \ + done diff --git a/README.md b/README.md index c1229d6..e002b09 100644 --- a/README.md +++ b/README.md @@ -1,41 +1,18 @@ -# Braintrust Codex Plugin +# Braintrust Codex Plugins -A [Codex plugin](https://developers.openai.com/codex/plugins) that connects the Braintrust MCP server to the Codex marketplace, enabling agents to query Braintrust evals, logs, experiments, datasets, prompts, and traces. +This repo is a monorepo of Braintrust [Codex plugins](https://developers.openai.com/codex/plugins) -## Local setup +## Quickstart -Follow the [Codex plugin local install guide](https://developers.openai.com/codex/plugins/build#install-a-local-plugin-manually) to sideload this plugin. +Add this repo as a Codex plugin marketplace: -A reference PR showing a working local setup: https://github.com/braintrustdata/braintrust/pull/13536 +```bash +codex plugin marketplace add braintrustdata/braintrust-codex-plugin +``` -Once installed it should look like this: +Then install the plugins you want: -image - -## Contributing - -When adding or modifying a skill, update the skill files under `skills/braintrust/` and test locally before opening a PR. - -## Releasing - -1. Bump the version in `.codex-plugin/plugin.json`: - ```json - { "version": "0.2.0" } - ``` - -2. Commit the version bump: - ```bash - git add .codex-plugin/plugin.json - git commit -m "chore: bump version to 0.2.0" - ``` - -3. Create a git tag and GitHub release: - ```bash - git tag v0.2.0 - git push origin main --tags - gh release create v0.2.0 --title "v0.2.0" --notes "Describe what changed" - ``` - -Users can see the changelog via [GitHub Releases](https://github.com/braintrustdata/braintrust-codex-plugin/releases). - -To make the repo public, ping **#wg-infra** or **#eng** on Slack. +- mcp and skills: `codex plugin add braintrust@braintrust-codex-plugins` +- trace codex sessions to braintrust: `codex plugin add trace-codex@braintrust-codex-plugins` + - run: `TRACE_TO_BRAINTRUST=true BRAINTRUST_PROJECT=my-coding-agent codex` + - see plugin's [README](/plugins/trace-codex/README.md) for details and config options diff --git a/install.sh b/install.sh new file mode 100755 index 0000000..26c6bbb --- /dev/null +++ b/install.sh @@ -0,0 +1,138 @@ +#!/bin/bash +### +# Local dev install for the Braintrust Codex plugins. +# +# Codex has no `--plugin-dir` flag. Plugins are loaded from a marketplace and +# COPIED into a versioned cache under ~/.codex/plugins/cache/. Codex validates +# that the cached plugin is a real directory under the cache root, so a symlink +# back to the repo does NOT work (Codex reports it as "not installed"). +# +# This script therefore does a normal install (a copy) and re-syncs that copy +# on every run. Re-run it whenever you edit plugin files so Codex picks up the +# changes. +# +# What it does: +# 1. registers this repo as a local marketplace, +# 2. installs the plugin(s) so Codex copies them into the cache and writes the +# correct config entries. +# +# Usage: +# ./install.sh # install all plugins in this repo +# ./install.sh trace-codex # install just one plugin (by folder) +# +# Use ./uninstall.sh to remove. +### + +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +MARKETPLACE="braintrust-codex-plugins" + +# Default port the trace-codex event server listens on (see plugins/trace-codex +# config.ts DEFAULT_PORT). Overridable via the same env var the plugin reads. +EVENT_SERVER_PORT="${BRAINTRUST_EVENT_SERVER_PORT:-52734}" + +# Ask any running trace-codex event server to shut down. After a re-install the +# cache holds a new build, but a server spawned from the OLD build may still be +# running; it would refuse the new version (version mismatch) and block tracing +# until its idle timeout. Shutting it down lets the next session boot a fresh +# one. No server running is the normal case, so failures are ignored. +shutdown_event_server() { + if ! command -v curl >/dev/null 2>&1; then + return 0 + fi + if curl -fsS --max-time 2 -X POST \ + "http://127.0.0.1:$EVENT_SERVER_PORT/shutdown" >/dev/null 2>&1; then + echo " shut down running event server on port $EVENT_SERVER_PORT." + fi +} + +# Plugin folders to install. Default: every folder under plugins/. +if [ "$#" -gt 0 ]; then + PLUGINS=("$@") +else + PLUGINS=() + for d in "$REPO_ROOT"/plugins/*/; do + PLUGINS+=("$(basename "$d")") + done +fi + +if ! command -v codex >/dev/null 2>&1; then + echo "Error: 'codex' CLI not found. Install Codex CLI first." >&2 + exit 1 +fi + +echo "Installing Braintrust Codex plugins from: $REPO_ROOT" +echo "" + +# 1. (Re)register the local marketplace so the snapshot reflects the current +# marketplace.json. Remove first so a fresh snapshot is taken. +echo "Registering local marketplace '$MARKETPLACE'..." +codex plugin marketplace remove "$MARKETPLACE" >/dev/null 2>&1 || true +codex plugin marketplace add "$REPO_ROOT" >/dev/null +echo " done." +echo "" + +read_json_field() { + # read_json_field -> first string value for that field + grep -o "\"$2\"[[:space:]]*:[[:space:]]*\"[^\"]*\"" "$1" | head -1 | sed 's/.*"\([^"]*\)"$/\1/' +} + +# Build a plugin's compiled assets if it has a Bun build (build-on-install). +build_plugin() { + plugin_src="$1" + if [ ! -f "$plugin_src/package.json" ]; then + return 0 + fi + if ! grep -q '"build"' "$plugin_src/package.json"; then + return 0 + fi + if ! command -v bun >/dev/null 2>&1; then + echo "Error: '$folder' needs Bun to build, but 'bun' was not found." >&2 + echo " Install Bun from https://bun.sh and re-run ./install.sh" >&2 + exit 1 + fi + echo " building (bun)..." + ( cd "$plugin_src" && bun install --silent && BUILD_HOST_ONLY=1 bun run build ) +} + +for folder in "${PLUGINS[@]}"; do + plugin_src="$REPO_ROOT/plugins/$folder" + manifest="$plugin_src/.codex-plugin/plugin.json" + + if [ ! -f "$manifest" ]; then + echo "Skipping '$folder': no manifest at $manifest" >&2 + continue + fi + + # The installable plugin name is the manifest `name` (Codex requires the + # marketplace entry name to match it), which may differ from the folder name. + name="$(read_json_field "$manifest" name)" + version="$(read_json_field "$manifest" version)" + if [ -z "$name" ] || [ -z "$version" ]; then + echo "Skipping '$folder': could not read name/version from manifest" >&2 + continue + fi + + echo "Installing '$name' (v$version) from plugins/$folder..." + # Build compiled assets first so the marketplace copy includes them. + build_plugin "$plugin_src" + # Remove any prior install so the copy is re-synced from the current files. + codex plugin remove "$name@$MARKETPLACE" >/dev/null 2>&1 || true + codex plugin add "$name" --marketplace "$MARKETPLACE" >/dev/null + echo " installed." + + # trace-codex runs a long-lived background event server. Stop any stale one + # left over from a previous build so it doesn't linger with the old version. + if [ "$folder" = "trace-codex" ]; then + shutdown_event_server + fi +done + +echo "" +echo "Done. Next steps:" +echo " 1. Restart Codex (or start a new session) so it loads the plugins." +echo " 2. Plugin hooks are non-managed: run /hooks in the Codex CLI and trust them" +echo " before they will fire." +echo "" +echo "After editing plugin files, re-run ./install.sh to re-sync the cached copy." diff --git a/.codex-plugin/plugin.json b/plugins/braintrust-codex-plugin/.codex-plugin/plugin.json similarity index 100% rename from .codex-plugin/plugin.json rename to plugins/braintrust-codex-plugin/.codex-plugin/plugin.json diff --git a/.mcp.json b/plugins/braintrust-codex-plugin/.mcp.json similarity index 100% rename from .mcp.json rename to plugins/braintrust-codex-plugin/.mcp.json diff --git a/assets/favicon.svg b/plugins/braintrust-codex-plugin/assets/favicon.svg similarity index 100% rename from assets/favicon.svg rename to plugins/braintrust-codex-plugin/assets/favicon.svg diff --git a/assets/logo.png b/plugins/braintrust-codex-plugin/assets/logo.png similarity index 100% rename from assets/logo.png rename to plugins/braintrust-codex-plugin/assets/logo.png diff --git a/skills/braintrust/SKILL.md b/plugins/braintrust-codex-plugin/skills/braintrust/SKILL.md similarity index 100% rename from skills/braintrust/SKILL.md rename to plugins/braintrust-codex-plugin/skills/braintrust/SKILL.md diff --git a/plugins/trace-codex/.codex-plugin/plugin.json b/plugins/trace-codex/.codex-plugin/plugin.json new file mode 100644 index 0000000..04d1ce1 --- /dev/null +++ b/plugins/trace-codex/.codex-plugin/plugin.json @@ -0,0 +1,33 @@ +{ + "name": "trace-codex", + "version": "0.1.0", + "description": "Trace Codex sessions to Braintrust (session, turn, and tool spans).", + "author": { + "name": "Braintrust", + "url": "https://www.braintrust.dev/" + }, + "homepage": "https://www.braintrust.dev/docs", + "license": "MIT", + "keywords": [ + "braintrust", + "tracing", + "observability", + "codex", + "hooks" + ], + "hooks": "./hooks/hooks.json", + "interface": { + "displayName": "Trace Codex", + "shortDescription": "Trace Codex sessions to Braintrust", + "longDescription": "Opt-in plugin that traces Codex sessions to Braintrust as session, turn, and tool spans via Codex lifecycle hooks.", + "developerName": "Braintrust", + "category": "Observability", + "capabilities": [ + "Read" + ], + "websiteURL": "https://www.braintrust.dev/", + "privacyPolicyURL": "https://www.braintrust.dev/legal/privacy-policy", + "termsOfServiceURL": "https://www.braintrust.dev/legal/terms-of-service", + "brandColor": "#2C1FEB" + } +} diff --git a/plugins/trace-codex/.gitignore b/plugins/trace-codex/.gitignore new file mode 100644 index 0000000..2c8d7b0 --- /dev/null +++ b/plugins/trace-codex/.gitignore @@ -0,0 +1,8 @@ +node_modules/ +# Built binaries are downloaded at runtime / built locally, never committed. +# The launcher scripts (bin/codex-hook.sh, bin/codex-hook.cmd) are tracked. +bin/codex-hook +bin/codex-hook-* +*.log +# Local user settings; the tracked template is config.json.example. +config.json diff --git a/plugins/trace-codex/Makefile b/plugins/trace-codex/Makefile new file mode 100644 index 0000000..0f20746 --- /dev/null +++ b/plugins/trace-codex/Makefile @@ -0,0 +1,86 @@ +# Makefile for the Braintrust Codex tracing plugin. +# +# Targets: +# make test Lint + typecheck + unit tests + a full build + live +# integration check. +# make lint Run the Biome linter/formatter check (no writes). +# make typecheck Run the TypeScript type checker. +# make unit Run only the bun unit tests. +# make build Compile the host binary (and per-platform binaries). +# make integration Build, boot the server via the hook binary, health-check, +# assert the version, then shut it down. +# make clean Remove build artifacts. + +SHELL := /bin/bash + +# Port used only by the integration test. Chosen from the IANA dynamic/private +# range (49152-65535) and deliberately different from the default (52734) so +# the test never collides with a real running server. +TEST_PORT ?= 54219 + +# Expected /health version, read from the plugin manifest (single source of truth). +EXPECTED_VERSION := $(shell sed -n 's/.*"version"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/p' .codex-plugin/plugin.json | head -1) + +BIN := bin/codex-hook +BASE_URL := http://127.0.0.1:$(TEST_PORT) + +.PHONY: test lint typecheck unit build integration clean + +test: lint typecheck unit integration + +lint: + @echo "==> Linting (biome)" + bun run check + +typecheck: + @echo "==> Typechecking (tsc)" + bun run typecheck + +unit: + @echo "==> Running unit tests" + bun test + +build: + @echo "==> Building binaries" + BUILD_HOST_ONLY=1 bun run build + +integration: build + @echo "==> Integration test (port $(TEST_PORT), expecting version $(EXPECTED_VERSION))" + @set -euo pipefail; \ + export BRAINTRUST_EVENT_SERVER_PORT=$(TEST_PORT); \ + export BRAINTRUST_EVENT_SERVER_LOG_DIR="$$(mktemp -d)"; \ + cleanup() { curl -s -o /dev/null -X POST $(BASE_URL)/shutdown || true; }; \ + trap cleanup EXIT; \ + echo " -> invoking hook dispatcher (boots server in background)"; \ + echo '{"hook_event_name":"SessionStart","session_id":"make-test"}' | "$(BIN)" hook; \ + echo " -> waiting for /health on $(BASE_URL)"; \ + version=""; \ + for i in $$(seq 1 50); do \ + body="$$(curl -s --max-time 1 $(BASE_URL)/health || true)"; \ + if [ -n "$$body" ]; then version="$$body"; break; fi; \ + sleep 0.1; \ + done; \ + if [ -z "$$version" ]; then echo "FAIL: server never became healthy on $(BASE_URL)"; exit 1; fi; \ + echo " -> /health returned: $$version"; \ + expected='{"version":"$(EXPECTED_VERSION)"}'; \ + if [ "$$version" != "$$expected" ]; then \ + echo "FAIL: version mismatch"; echo " expected: $$expected"; echo " actual: $$version"; exit 1; \ + fi; \ + echo " -> version matches ($(EXPECTED_VERSION))"; \ + echo " -> calling /shutdown"; \ + code="$$(curl -s -o /dev/null -w '%{http_code}' -X POST $(BASE_URL)/shutdown)"; \ + if [ "$$code" != "200" ]; then echo "FAIL: /shutdown returned $$code (expected 200)"; exit 1; fi; \ + echo " -> /shutdown returned 200"; \ + echo " -> verifying server stopped"; \ + stopped=0; \ + for i in $$(seq 1 30); do \ + if ! curl -s -o /dev/null --max-time 1 $(BASE_URL)/health; then stopped=1; break; fi; \ + sleep 0.1; \ + done; \ + if [ "$$stopped" != "1" ]; then echo "FAIL: server still responding after shutdown"; exit 1; fi; \ + echo " -> server stopped cleanly"; \ + echo "PASS: integration test" + +clean: + @echo "==> Cleaning" + rm -rf bin diff --git a/plugins/trace-codex/README.md b/plugins/trace-codex/README.md new file mode 100644 index 0000000..761a012 --- /dev/null +++ b/plugins/trace-codex/README.md @@ -0,0 +1,112 @@ +# Braintrust Codex Tracing Plugin + +A separate, opt-in [Codex plugin](https://developers.openai.com/codex/plugins) that wires Codex lifecycle hooks as a foundation for sending Codex sessions to Braintrust as traces. + +It is **opt-in** (you must install and enable it) and **requires no Braintrust credentials yet** — it makes no network calls beyond a local loopback connection. + +> This is a different plugin from the Braintrust MCP/skills plugin (`braintrust`). The two are independent and can be installed separately. See the [repo README](../../README.md). + +## Architecture + +Codex invokes a hook as a **fresh, short-lived process per event**, but a trace spans the whole session. To bridge that, the plugin has two parts in one compiled binary (`bin/codex-hook`): + +- **Hook client** (`codex-hook hook`, the default): what Codex runs on every lifecycle event. It reads the hook event JSON from stdin, ensures the background server is running (booting it if needed), POSTs the event to the server, and exits. It never fails the Codex turn. +- **Background server** (`codex-hook serve`): a long-lived local HTTP server, bound to loopback, that receives events and (in a later phase) turns them into Braintrust spans. It shuts itself down after a configurable idle period. + +Every Codex hook event (`SessionStart`, `UserPromptSubmit`, `PreToolUse`, `PermissionRequest`, `PostToolUse`, `PreCompact`, `PostCompact`, `SubagentStart`, `SubagentStop`, `Stop`) is wired to the hook client. + +### Multi-agent layout + +The code is split into a generic, agent-agnostic core and per-agent modules so other coding agents (e.g. Claude Code) can be added without touching the core: + +- **Generic core**: `src/server/` (HTTP event server, queue, recorder), `src/processor/` (the `EventProcessor`/`EventProcessorFactory` interface + LRU registry), `src/client/` (server lifecycle + the run loop), `src/braintrust/`, `src/config.ts`, `src/replay/`. The registry is keyed by `eventSource` and never names a specific agent. +- **Per-agent modules**: `src/agents//`. The Codex module (`src/agents/codex/`) contains its `event-processor.ts` (events → spans), `event-builder.ts` (raw stdin → generic `EnqueueEvent`), `settings.ts` (reads its `config.json`), and `register.ts` exporting a `codexAgent` object. + +To add an agent: create `src/agents//` implementing the same surface (`eventSource`, `createProcessor`, `buildEvent`, `loadSettings`) and add it to the `AGENTS` list in `src/index.ts`. No changes to the core are required. + +### Server endpoints (Phase 1) + +- `GET /health` → `{ "version": "" }`. Returns `503` while shutting down. +- `POST /enqueue` → body `{ eventSource, eventSourceVersion, eventName, eventData }`. Currently logs and no-ops; returns `{ "ok": true }`. Returns `503` while shutting down. +- `POST /shutdown` → marks the server as shutting down (subsequent `/health` and `/enqueue` return `503`) and stops gracefully. Returns `200` with an empty body. + +The server tracks a heartbeat (bumped on every request) and shuts down after an idle timeout (default 5 minutes). + +## Configuration + +There are two ways to configure the plugin. **Environment variables always win over the config file**, so you can override any file setting at runtime or in CI. + +### `config.json` (recommended) + +Codex does not pass custom settings into plugin hooks, so the plugin reads its own `config.json` from the plugin's writable data directory (`PLUGIN_DATA`): + +``` +~/.codex/plugins/data/trace-codex-/config.json +``` + +Copy [`config.json.example`](./config.json.example) into that directory as `config.json` and edit it by hand. All keys are optional: + +| Key | Maps to env var | Meaning | +| --- | --- | --- | +| `traceToBraintrust` | `TRACE_TO_BRAINTRUST` | Master switch. **When `false` or unset, no traces are reported** (events are dropped). Set `true` to enable tracing. | +| `apiKey` | `BRAINTRUST_API_KEY` | Braintrust API key. | +| `apiUrl` | `BRAINTRUST_API_URL` | Braintrust API URL (for self-hosted / staging). | +| `appUrl` | `BRAINTRUST_APP_URL` | Braintrust app URL. | +| `project` | `BRAINTRUST_PROJECT` | Project to log traces into. | +| `additionalMetadata` | `BRAINTRUST_ADDITIONAL_METADATA` | JSON object of extra metadata merged into the root span. Standard keys (`session_id`, `model`, `project`, etc.) take precedence on conflict. | +| `recordFile` | `BRAINTRUST_EVENT_SERVER_RECORD_FILE` | If set, record every event to this NDJSON file (for `replay`). | +| `port` | `BRAINTRUST_EVENT_SERVER_PORT` | Loopback port for the server. | +| `idleTimeoutMs` | `BRAINTRUST_EVENT_SERVER_IDLE_TIMEOUT_MS` | Idle shutdown window. | +| `idleCheckIntervalMs` | `BRAINTRUST_EVENT_SERVER_IDLE_CHECK_INTERVAL_MS` | Idle watchdog cadence. | + +> By default tracing is **off**. Set `traceToBraintrust: true` (or `TRACE_TO_BRAINTRUST=true`) to start reporting. + +The config file is read by the hook client only at the moment it boots the background server (the running server keeps the config it started with). To pick up config changes, stop the server (or wait for it to idle out) so the next event re-boots it. + +### Environment variables + +Every setting above can also be set directly as an environment variable (and an env var overrides the file). Defaults: + +| Variable | Default | Meaning | +| --- | --- | --- | +| `BRAINTRUST_EVENT_SERVER_PORT` | `52734` | Loopback port for the server. | +| `BRAINTRUST_EVENT_SERVER_IDLE_TIMEOUT_MS` | `300000` | Idle shutdown window. | +| `BRAINTRUST_EVENT_SERVER_IDLE_CHECK_INTERVAL_MS` | `30000` | Idle watchdog cadence. | +| `BRAINTRUST_EVENT_SERVER_LOG_DIR` | `$PLUGIN_DATA` | Directory for logs, pidfile, and `config.json`. | +| `BRAINTRUST_EVENT_SERVER_RECORD_FILE` | _(unset)_ | Record events to this NDJSON file. | + +## Binary distribution (launcher) + +The compiled binary is ~56 MB per platform, far too large to commit. Codex installs a plugin by cloning/copying only the committed repo files (no build step), so the binary is fetched at runtime instead: + +- `hooks/hooks.json` invokes a small committed launcher script, `bin/codex-hook.sh` (and `bin/codex-hook.cmd` on Windows via `commandWindows`). +- On each hook, the launcher looks for `${PLUGIN_ROOT}/bin/codex-hook`. If present, it `exec`s it (fast path: a file check + exec, no version parsing). If missing, it detects the platform (`uname`), reads the plugin version from `${PLUGIN_ROOT}/.codex-plugin/plugin.json` (single source of truth), downloads the matching `codex-hook--` asset from the GitHub release `trace-codex-v`, caches it at `${PLUGIN_ROOT}/bin/codex-hook`, and execs it. +- **Upgrades self-heal**: Codex wipes the versioned plugin cache (`${PLUGIN_ROOT}`) on upgrade, so the cached binary is automatically invalidated; the next hook re-downloads the matching version. No manual step. +- The launcher never fails the Codex turn: any download/exec error logs to stderr and exits 0 (tracing simply does nothing that session). + +Supported platforms: macOS (arm64/x64) and Linux (x64/arm64). Windows currently prints a "coming soon" message and no-ops. + +> **Publishing prerequisite:** the release assets must be **anonymously downloadable**, i.e. the repo must be public. For a private repo the launcher's download will 404 and tracing will no-op. Local dev is unaffected (the locally built `bin/codex-hook` is used directly). + +## Build & install (dev) + +Requires [Bun](https://bun.sh). From the repo root: + +```bash +./install.sh trace-codex # builds the host binary, then installs +``` + +After editing source, re-run `./install.sh trace-codex` to rebuild and re-sync the Codex plugin cache. + +## Trust + +Plugin-bundled hooks are non-managed. Installing/enabling the plugin does **not** auto-trust the hooks; Codex skips them until you review and trust them. Run `/hooks` in the Codex CLI and trust the `trace-codex` hooks. This is **one-time per hook definition** — you are only re-prompted if the command string in `hooks/hooks.json` changes. (Rebuilding the binary does not change the command string, so it does not re-trigger trust.) + +## Local testing + +```bash +bun test # unit tests +bun run typecheck +echo '{"hook_event_name":"SessionStart"}' | bun run src/index.ts hook # run the client +bun run src/index.ts serve # run the server in the foreground +``` diff --git a/plugins/trace-codex/bin/codex-hook.cmd b/plugins/trace-codex/bin/codex-hook.cmd new file mode 100644 index 0000000..9c1525c --- /dev/null +++ b/plugins/trace-codex/bin/codex-hook.cmd @@ -0,0 +1,9 @@ +@echo off +REM Windows launcher for the trace-codex hook binary. +REM +REM Windows is not supported yet. This stub exits 0 so it never fails a Codex +REM turn; tracing simply does nothing on Windows for now. When Windows support +REM lands, this will mirror codex-hook.sh: detect arch, download the matching +REM codex-hook.exe from the GitHub release into %PLUGIN_ROOT%\bin, and exec it. +echo trace-codex: Windows support coming soon; tracing disabled this session.>&2 +exit /b 0 diff --git a/plugins/trace-codex/bin/codex-hook.sh b/plugins/trace-codex/bin/codex-hook.sh new file mode 100755 index 0000000..e69b528 --- /dev/null +++ b/plugins/trace-codex/bin/codex-hook.sh @@ -0,0 +1,96 @@ +#!/bin/sh +# Launcher for the trace-codex hook binary. +# +# hooks.json invokes this script (a fixed, platform-agnostic command). The real +# binary is platform-specific and far too large to commit, so it is downloaded +# on demand from the plugin's GitHub release and cached next to this script at +# $PLUGIN_ROOT/bin/codex-hook. +# +# Because Codex wipes the versioned plugin cache ($PLUGIN_ROOT) on every +# install/upgrade, a cached binary there is automatically invalidated on +# upgrade: the next hook finds it missing and re-downloads the matching version. +# This means the hot path is just "is the binary here? exec it" with no version +# parsing, and upgrades self-heal with no manual step. +# +# Hard rule: never fail the Codex turn. Any error here logs to stderr and exits +# 0 (Codex treats a 0 exit with no stdout as success). + +set -u + +REPO="braintrustdata/braintrust-codex-plugin" + +# PLUGIN_ROOT is set by Codex to the installed plugin directory. Fall back to +# this script's own directory's parent so the launcher is runnable standalone. +SCRIPT_DIR=$(CDPATH= cd "$(dirname "$0")" && pwd) +ROOT="${PLUGIN_ROOT:-$(dirname "$SCRIPT_DIR")}" +BIN="$ROOT/bin/codex-hook" + +# Fast path: the binary is already cached for this plugin version. Run it. +if [ -x "$BIN" ]; then + exec "$BIN" "$@" +fi + +# --- Slow path: download the matching binary, then exec it. --- + +log() { printf 'trace-codex launcher: %s\n' "$1" >&2; } + +# Map uname output to our release asset suffix (-). +os=$(uname -s 2>/dev/null || echo unknown) +arch=$(uname -m 2>/dev/null || echo unknown) +case "$os" in + Darwin) os_name=darwin ;; + Linux) os_name=linux ;; + *) log "unsupported OS '$os'; tracing disabled this session"; exit 0 ;; +esac +case "$arch" in + arm64 | aarch64) arch_name=arm64 ;; + x86_64 | amd64) arch_name=x64 ;; + *) log "unsupported arch '$arch'; tracing disabled this session"; exit 0 ;; +esac +suffix="$os_name-$arch_name" + +# Read the plugin version (single source of truth) from the manifest. Only on +# this slow path, so no dependency (jq) and no parsing on the hot path. +manifest="$ROOT/.codex-plugin/plugin.json" +version=$(sed -n 's/.*"version"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/p' "$manifest" 2>/dev/null | head -1) +if [ -z "$version" ]; then + log "could not read plugin version from $manifest; tracing disabled this session" + exit 0 +fi + +url="https://github.com/$REPO/releases/download/trace-codex-v$version/codex-hook-$suffix" +tmp="$BIN.download.$$" + +mkdir -p "$ROOT/bin" 2>/dev/null || { + log "could not create $ROOT/bin; tracing disabled this session" + exit 0 +} + +# Download with curl, falling back to wget. -f makes curl fail on HTTP errors. +if command -v curl >/dev/null 2>&1; then + curl -fsSL "$url" -o "$tmp" 2>/dev/null + ok=$? +elif command -v wget >/dev/null 2>&1; then + wget -q "$url" -O "$tmp" 2>/dev/null + ok=$? +else + log "neither curl nor wget found; cannot download binary; tracing disabled this session" + exit 0 +fi + +if [ "$ok" -ne 0 ] || [ ! -s "$tmp" ]; then + rm -f "$tmp" 2>/dev/null + log "failed to download $url; tracing disabled this session" + exit 0 +fi + +chmod +x "$tmp" 2>/dev/null +# Atomic rename into place so a concurrent hook never sees a half-written file. +mv -f "$tmp" "$BIN" 2>/dev/null || { + rm -f "$tmp" 2>/dev/null + log "could not install binary at $BIN; tracing disabled this session" + exit 0 +} + +log "downloaded codex-hook $version ($suffix)" +exec "$BIN" "$@" diff --git a/plugins/trace-codex/biome.json b/plugins/trace-codex/biome.json new file mode 100644 index 0000000..4391881 --- /dev/null +++ b/plugins/trace-codex/biome.json @@ -0,0 +1,29 @@ +{ + "$schema": "https://biomejs.dev/schemas/2.3.11/schema.json", + "vcs": { + "enabled": true, + "clientKind": "git", + "useIgnoreFile": true + }, + "files": { + "ignoreUnknown": true + }, + "formatter": { + "enabled": true, + "indentStyle": "space", + "indentWidth": 2, + "lineWidth": 100 + }, + "linter": { + "enabled": true, + "rules": { + "recommended": true + } + }, + "javascript": { + "formatter": { + "quoteStyle": "double", + "semicolons": "always" + } + } +} diff --git a/plugins/trace-codex/bun.lock b/plugins/trace-codex/bun.lock new file mode 100644 index 0000000..5705db6 --- /dev/null +++ b/plugins/trace-codex/bun.lock @@ -0,0 +1,358 @@ +{ + "lockfileVersion": 1, + "configVersion": 1, + "workspaces": { + "": { + "name": "trace-codex", + "dependencies": { + "braintrust": "^3.17.0", + }, + "devDependencies": { + "@biomejs/biome": "^2.3.11", + "@types/bun": "^1.1.14", + "typescript": "^5.7.2", + }, + }, + }, + "packages": { + "@apm-js-collab/code-transformer": ["@apm-js-collab/code-transformer@0.12.0", "", { "dependencies": { "@types/estree": "^1.0.8", "astring": "^1.9.0", "esquery": "^1.7.0", "meriyah": "^6.1.4", "semifies": "^1.0.0", "source-map": "^0.6.0" } }, "sha512-5F2ob4cMYezbaUGAk+YltbDvb9BFIghN92ubct9Ho/0MFx4FkChCxYV99NkU6Kx+RAgaqBV6yxKuWreQ6K8SOw=="], + + "@biomejs/biome": ["@biomejs/biome@2.5.0", "", { "optionalDependencies": { "@biomejs/cli-darwin-arm64": "2.5.0", "@biomejs/cli-darwin-x64": "2.5.0", "@biomejs/cli-linux-arm64": "2.5.0", "@biomejs/cli-linux-arm64-musl": "2.5.0", "@biomejs/cli-linux-x64": "2.5.0", "@biomejs/cli-linux-x64-musl": "2.5.0", "@biomejs/cli-win32-arm64": "2.5.0", "@biomejs/cli-win32-x64": "2.5.0" }, "bin": { "biome": "bin/biome" } }, "sha512-4kURkd9hAPrdDM3C9n82ycYgx8hvQcW6MjKTEejruj8rK0N8P3OPpdy8BvI8kt3KWY4ycF5XtDOrktetEfhfuw=="], + + "@biomejs/cli-darwin-arm64": ["@biomejs/cli-darwin-arm64@2.5.0", "", { "os": "darwin", "cpu": "arm64" }, "sha512-Mn3Fwi3SA5fgmfCPqmzpWF2DLZnms3BVAhM088nTnGrTZmHS3wwIjcoZPqpXeNgd3DrrLH6xp8vTLIBuJoZiXw=="], + + "@biomejs/cli-darwin-x64": ["@biomejs/cli-darwin-x64@2.5.0", "", { "os": "darwin", "cpu": "x64" }, "sha512-rg3VPL5P8mYro6pqlXYXuJWph21slVp3SZtAqWSrkZs40d2gTzYmHF8E/X1iTID25btmNKltNDJ926sqVBp7DQ=="], + + "@biomejs/cli-linux-arm64": ["@biomejs/cli-linux-arm64@2.5.0", "", { "os": "linux", "cpu": "arm64" }, "sha512-tl+LW8fdD96/xdeWtWwc82LIOc5CoY7N2AsogLTp5R4ECErYt+8Jl/N68ezN9vzSiqPTxw6vjcihoLPYKZHrlw=="], + + "@biomejs/cli-linux-arm64-musl": ["@biomejs/cli-linux-arm64-musl@2.5.0", "", { "os": "linux", "cpu": "arm64" }, "sha512-vQdM4oSGaf7ZNeGO9w5+Y8SBtyser9M6znxYbm7Ec8wInxJu1WiKxFYZW5Auj2d80bcVvefuGGRxoFOE0eee8g=="], + + "@biomejs/cli-linux-x64": ["@biomejs/cli-linux-x64@2.5.0", "", { "os": "linux", "cpu": "x64" }, "sha512-zpEGf4RQbFEh8Vt7OmavLyyOzRbtcE9osCqrS1kfvt8jDvxwhKXLSf7n0ebr/ov0RJ9ssP+lhs6C8a9WwFvrQA=="], + + "@biomejs/cli-linux-x64-musl": ["@biomejs/cli-linux-x64-musl@2.5.0", "", { "os": "linux", "cpu": "x64" }, "sha512-+9hIcMngJ+yGUahXqZuZ8CoWKJE9SAZsFsM3QDvXpNsLbXZ9lqVzgBhOk/jTSYkOA0GLP9eu3teukqpLUojHMg=="], + + "@biomejs/cli-win32-arm64": ["@biomejs/cli-win32-arm64@2.5.0", "", { "os": "win32", "cpu": "arm64" }, "sha512-jB0wAvTLI4itx5VidqVUejPQFhRUxiZ9l9FvZ26D5fl6t3qme+ZB4PD3bTSeL1vZ8NI2Rx/zj6H9zcESuGHKGw=="], + + "@biomejs/cli-win32-x64": ["@biomejs/cli-win32-x64@2.5.0", "", { "os": "win32", "cpu": "x64" }, "sha512-VT/lF+GId+67j8aDfLkxdxNoVApsPSTbyAtB3jJq0IWTrY77WXfbPfpngxq0bA6JCEv/7k8C9qWjDRKRznDlyw=="], + + "@braintrust/bt-darwin-arm64": ["@braintrust/bt-darwin-arm64@0.11.1", "", { "os": "darwin", "cpu": "arm64" }, "sha512-jhL/X24ss4e4qMMdlXtxO8rA957Z77wJA59XXFHqpuaaVCd4pXE5JPUQBkms9dHcs4efJhY3Lx9zNQqoaeCqWg=="], + + "@braintrust/bt-darwin-x64": ["@braintrust/bt-darwin-x64@0.11.1", "", { "os": "darwin", "cpu": "x64" }, "sha512-f4l25gVUpCJ99mFS9y+zmnYOcevtI7KLLvlLF/xOil2YCIx/KCM6AqS96jj6CJW74B7hYhd74dLnFKMFFL429w=="], + + "@braintrust/bt-linux-arm64": ["@braintrust/bt-linux-arm64@0.11.1", "", { "os": "linux", "cpu": "arm64" }, "sha512-01GWsP/p17I3yy0kxkp+Yt8w5L28j3nFL+7iLCkQWtDdfCGkuRsnrIbrY8dbnqGo/wvgz7uPXBkCXoIuNsfoxw=="], + + "@braintrust/bt-linux-x64": ["@braintrust/bt-linux-x64@0.11.1", "", { "os": "linux", "cpu": "x64" }, "sha512-E/XwRuhPrZxD+IZgSbPCuj4gywBrim/g+tU0MjWxFohpMMkJJW2CxMCIO+6RVk8gTxlVh/ajCIjv2/Ph1Yugeg=="], + + "@braintrust/bt-linux-x64-musl": ["@braintrust/bt-linux-x64-musl@0.11.1", "", { "os": "linux", "cpu": "x64" }, "sha512-QLqlFsF6HKON5Vc0c8JfpQ4vrl4KjmInGF1Vsqy+ecVkgXVk8pwCVibb8Ea/udVpBQqctAaNMn7ZsmvWR2vO8Q=="], + + "@braintrust/bt-win32-arm64": ["@braintrust/bt-win32-arm64@0.11.1", "", { "os": "win32", "cpu": "arm64" }, "sha512-0LSVXZ/tE79VVcpRjaTE+iTMQR4EKNC6tteAS01uKT34eID7OqJ7xJijZOthe11kGqMcX8nnNbdDrWHqLczDow=="], + + "@braintrust/bt-win32-x64": ["@braintrust/bt-win32-x64@0.11.1", "", { "os": "win32", "cpu": "x64" }, "sha512-JPo3xffJvW0OKowqpbh+XtlafpoZq8VXzhOcW2yQmHcN56Me2u9plPiXi4gzrpgz2cnFx6/EiJ1bTa3F25F0RA=="], + + "@colors/colors": ["@colors/colors@1.5.0", "", {}, "sha512-ooWCrlZP11i8GImSjTHYHLkvFDP48nS4+204nGb1RiX/WXYHmJA2III9/e2DWVabCESdW7hBAEzHRqUn9OUVvQ=="], + + "@esbuild/aix-ppc64": ["@esbuild/aix-ppc64@0.28.0", "", { "os": "aix", "cpu": "ppc64" }, "sha512-lhRUCeuOyJQURhTxl4WkpFTjIsbDayJHih5kZC1giwE+MhIzAb7mEsQMqMf18rHLsrb5qI1tafG20mLxEWcWlA=="], + + "@esbuild/android-arm": ["@esbuild/android-arm@0.28.0", "", { "os": "android", "cpu": "arm" }, "sha512-wqh0ByljabXLKHeWXYLqoJ5jKC4XBaw6Hk08OfMrCRd2nP2ZQ5eleDZC41XHyCNgktBGYMbqnrJKq/K/lzPMSQ=="], + + "@esbuild/android-arm64": ["@esbuild/android-arm64@0.28.0", "", { "os": "android", "cpu": "arm64" }, "sha512-+WzIXQOSaGs33tLEgYPYe/yQHf0WTU0X42Jca3y8NWMbUVhp7rUnw+vAsRC/QiDrdD31IszMrZy+qwPOPjd+rw=="], + + "@esbuild/android-x64": ["@esbuild/android-x64@0.28.0", "", { "os": "android", "cpu": "x64" }, "sha512-+VJggoaKhk2VNNqVL7f6S189UzShHC/mR9EE8rDdSkdpN0KflSwWY/gWjDrNxxisg8Fp1ZCD9jLMo4m0OUfeUA=="], + + "@esbuild/darwin-arm64": ["@esbuild/darwin-arm64@0.28.0", "", { "os": "darwin", "cpu": "arm64" }, "sha512-0T+A9WZm+bZ84nZBtk1ckYsOvyA3x7e2Acj1KdVfV4/2tdG4fzUp91YHx+GArWLtwqp77pBXVCPn2We7Letr0Q=="], + + "@esbuild/darwin-x64": ["@esbuild/darwin-x64@0.28.0", "", { "os": "darwin", "cpu": "x64" }, "sha512-fyzLm/DLDl/84OCfp2f/XQ4flmORsjU7VKt8HLjvIXChJoFFOIL6pLJPH4Yhd1n1gGFF9mPwtlN5Wf82DZs+LQ=="], + + "@esbuild/freebsd-arm64": ["@esbuild/freebsd-arm64@0.28.0", "", { "os": "freebsd", "cpu": "arm64" }, "sha512-l9GeW5UZBT9k9brBYI+0WDffcRxgHQD8ShN2Ur4xWq/NFzUKm3k5lsH4PdaRgb2w7mI9u61nr2gI2mLI27Nh3Q=="], + + "@esbuild/freebsd-x64": ["@esbuild/freebsd-x64@0.28.0", "", { "os": "freebsd", "cpu": "x64" }, "sha512-BXoQai/A0wPO6Es3yFJ7APCiKGc1tdAEOgeTNy3SsB491S3aHn4S4r3e976eUnPdU+NbdtmBuLncYir2tMU9Nw=="], + + "@esbuild/linux-arm": ["@esbuild/linux-arm@0.28.0", "", { "os": "linux", "cpu": "arm" }, "sha512-CjaaREJagqJp7iTaNQjjidaNbCKYcd4IDkzbwwxtSvjI7NZm79qiHc8HqciMddQ6CKvJT6aBd8lO9kN/ZudLlw=="], + + "@esbuild/linux-arm64": ["@esbuild/linux-arm64@0.28.0", "", { "os": "linux", "cpu": "arm64" }, "sha512-RVyzfb3FWsGA55n6WY0MEIEPURL1FcbhFE6BffZEMEekfCzCIMtB5yyDcFnVbTnwk+CLAgTujmV/Lgvih56W+A=="], + + "@esbuild/linux-ia32": ["@esbuild/linux-ia32@0.28.0", "", { "os": "linux", "cpu": "ia32" }, "sha512-KBnSTt1kxl9x70q+ydterVdl+Cn0H18ngRMRCEQfrbqdUuntQQ0LoMZv47uB97NljZFzY6HcfqEZ2SAyIUTQBQ=="], + + "@esbuild/linux-loong64": ["@esbuild/linux-loong64@0.28.0", "", { "os": "linux", "cpu": "none" }, "sha512-zpSlUce1mnxzgBADvxKXX5sl8aYQHo2ezvMNI8I0lbblJtp8V4odlm3Yzlj7gPyt3T8ReksE6bK+pT3WD+aJRg=="], + + "@esbuild/linux-mips64el": ["@esbuild/linux-mips64el@0.28.0", "", { "os": "linux", "cpu": "none" }, "sha512-2jIfP6mmjkdmeTlsX/9vmdmhBmKADrWqN7zcdtHIeNSCH1SqIoNI63cYsjQR8J+wGa4Y5izRcSHSm8K3QWmk3w=="], + + "@esbuild/linux-ppc64": ["@esbuild/linux-ppc64@0.28.0", "", { "os": "linux", "cpu": "ppc64" }, "sha512-bc0FE9wWeC0WBm49IQMPSPILRocGTQt3j5KPCA8os6VprfuJ7KD+5PzESSrJ6GmPIPJK965ZJHTUlSA6GNYEhg=="], + + "@esbuild/linux-riscv64": ["@esbuild/linux-riscv64@0.28.0", "", { "os": "linux", "cpu": "none" }, "sha512-SQPZOwoTTT/HXFXQJG/vBX8sOFagGqvZyXcgLA3NhIqcBv1BJU1d46c0rGcrij2B56Z2rNiSLaZOYW5cUk7yLQ=="], + + "@esbuild/linux-s390x": ["@esbuild/linux-s390x@0.28.0", "", { "os": "linux", "cpu": "s390x" }, "sha512-SCfR0HN8CEEjnYnySJTd2cw0k9OHB/YFzt5zgJEwa+wL/T/raGWYMBqwDNAC6dqFKmJYZoQBRfHjgwLHGSrn3Q=="], + + "@esbuild/linux-x64": ["@esbuild/linux-x64@0.28.0", "", { "os": "linux", "cpu": "x64" }, "sha512-us0dSb9iFxIi8srnpl931Nvs65it/Jd2a2K3qs7fz2WfGPHqzfzZTfec7oxZJRNPXPnNYZtanmRc4AL/JwVzHQ=="], + + "@esbuild/netbsd-arm64": ["@esbuild/netbsd-arm64@0.28.0", "", { "os": "none", "cpu": "arm64" }, "sha512-CR/RYotgtCKwtftMwJlUU7xCVNg3lMYZ0RzTmAHSfLCXw3NtZtNpswLEj/Kkf6kEL3Gw+BpOekRX0BYCtklhUw=="], + + "@esbuild/netbsd-x64": ["@esbuild/netbsd-x64@0.28.0", "", { "os": "none", "cpu": "x64" }, "sha512-nU1yhmYutL+fQ71Kxnhg8uEOdC0pwEW9entHykTgEbna2pw2dkbFSMeqjjyHZoCmt8SBkOSvV+yNmm94aUrrqw=="], + + "@esbuild/openbsd-arm64": ["@esbuild/openbsd-arm64@0.28.0", "", { "os": "openbsd", "cpu": "arm64" }, "sha512-cXb5vApOsRsxsEl4mcZ1XY3D4DzcoMxR/nnc4IyqYs0rTI8ZKmW6kyyg+11Z8yvgMfAEldKzP7AdP64HnSC/6g=="], + + "@esbuild/openbsd-x64": ["@esbuild/openbsd-x64@0.28.0", "", { "os": "openbsd", "cpu": "x64" }, "sha512-8wZM2qqtv9UP3mzy7HiGYNH/zjTA355mpeuA+859TyR+e+Tc08IHYpLJuMsfpDJwoLo1ikIJI8jC3GFjnRClzA=="], + + "@esbuild/openharmony-arm64": ["@esbuild/openharmony-arm64@0.28.0", "", { "os": "none", "cpu": "arm64" }, "sha512-FLGfyizszcef5C3YtoyQDACyg95+dndv79i2EekILBofh5wpCa1KuBqOWKrEHZg3zrL3t5ouE5jgr94vA+Wb2w=="], + + "@esbuild/sunos-x64": ["@esbuild/sunos-x64@0.28.0", "", { "os": "sunos", "cpu": "x64" }, "sha512-1ZgjUoEdHZZl/YlV76TSCz9Hqj9h9YmMGAgAPYd+q4SicWNX3G5GCyx9uhQWSLcbvPW8Ni7lj4gDa1T40akdlw=="], + + "@esbuild/win32-arm64": ["@esbuild/win32-arm64@0.28.0", "", { "os": "win32", "cpu": "arm64" }, "sha512-Q9StnDmQ/enxnpxCCLSg0oo4+34B9TdXpuyPeTedN/6+iXBJ4J+zwfQI28u/Jl40nOYAxGoNi7mFP40RUtkmUA=="], + + "@esbuild/win32-ia32": ["@esbuild/win32-ia32@0.28.0", "", { "os": "win32", "cpu": "ia32" }, "sha512-zF3ag/gfiCe6U2iczcRzSYJKH1DCI+ByzSENHlM2FcDbEeo5Zd2C86Aq0tKUYAJJ1obRP84ymxIAksZUcdztHA=="], + + "@esbuild/win32-x64": ["@esbuild/win32-x64@0.28.0", "", { "os": "win32", "cpu": "x64" }, "sha512-pEl1bO9mfAmIC+tW5btTmrKaujg3zGtUmWNdCw/xs70FBjwAL3o9OEKNHvNmnyylD6ubxUERiEhdsL0xBQ9efw=="], + + "@jridgewell/gen-mapping": ["@jridgewell/gen-mapping@0.3.13", "", { "dependencies": { "@jridgewell/sourcemap-codec": "^1.5.0", "@jridgewell/trace-mapping": "^0.3.24" } }, "sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA=="], + + "@jridgewell/remapping": ["@jridgewell/remapping@2.3.5", "", { "dependencies": { "@jridgewell/gen-mapping": "^0.3.5", "@jridgewell/trace-mapping": "^0.3.24" } }, "sha512-LI9u/+laYG4Ds1TDKSJW2YPrIlcVYOwi2fUC6xB43lueCjgxV4lffOCZCtYFiH6TNOX+tQKXx97T4IKHbhyHEQ=="], + + "@jridgewell/resolve-uri": ["@jridgewell/resolve-uri@3.1.2", "", {}, "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw=="], + + "@jridgewell/sourcemap-codec": ["@jridgewell/sourcemap-codec@1.5.5", "", {}, "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og=="], + + "@jridgewell/trace-mapping": ["@jridgewell/trace-mapping@0.3.31", "", { "dependencies": { "@jridgewell/resolve-uri": "^3.1.0", "@jridgewell/sourcemap-codec": "^1.4.14" } }, "sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw=="], + + "@kwsites/file-exists": ["@kwsites/file-exists@1.1.1", "", { "dependencies": { "debug": "^4.1.1" } }, "sha512-m9/5YGR18lIwxSFDwfE3oA7bWuq9kdau6ugN4H2rJeyhFQZcG9AgSHkQtSD15a8WvTgfz9aikZMrKPHvbpqFiw=="], + + "@kwsites/promise-deferred": ["@kwsites/promise-deferred@1.1.1", "", {}, "sha512-GaHYm+c0O9MjZRu0ongGBRbinu8gVAMd2UZjji6jVmqKtZluZnptXGWhz1E8j8D2HJ3f/yMxKAUC0b+57wncIw=="], + + "@next/env": ["@next/env@14.2.35", "", {}, "sha512-DuhvCtj4t9Gwrx80dmz2F4t/zKQ4ktN8WrMwOuVzkJfBilwAwGr6v16M5eI8yCuZ63H9TTuEU09Iu2HqkzFPVQ=="], + + "@simple-git/args-pathspec": ["@simple-git/args-pathspec@1.0.3", "", {}, "sha512-ngJMaHlsWDTfjyq9F3VIQ8b7NXbBLq5j9i5bJ6XLYtD6qlDXT7fdKY2KscWWUF8t18xx052Y/PUO1K1TRc9yKA=="], + + "@simple-git/argv-parser": ["@simple-git/argv-parser@1.1.1", "", { "dependencies": { "@simple-git/args-pathspec": "^1.0.3" } }, "sha512-Q9lBcfQ+VQCpQqGJFHe5yooOS5hGdLFFbJ5R+R5aDsnkPCahtn1hSkMcORX65J2Z5lxSkD0lQorMsncuBQxYUw=="], + + "@types/bun": ["@types/bun@1.3.14", "", { "dependencies": { "bun-types": "1.3.14" } }, "sha512-h1hFqFVcvAvD9j9K7ZW7vd82aSA+rTdznZa+5bwvCwqSB1jmmfLcbIWhOLx1/+boy/xmjgCs/OMUL8hRJSmnPw=="], + + "@types/estree": ["@types/estree@1.0.9", "", {}, "sha512-GhdPgy1el4/ImP05X05Uw4cw2/M93BCUmnEvWZNStlCzEKME4Fkk+YpoA5OiHNQmoS7Cafb8Xa3Pya8m1Qrzeg=="], + + "@types/node": ["@types/node@25.9.3", "", { "dependencies": { "undici-types": ">=7.24.0 <7.24.7" } }, "sha512-603BddQMv3pUcr4U2dhujk83N2tTDVr/34wII2B6bJy6g+8WD6yUb11jszNs0gdi4PesVWl7ABt8nYMVpnLUcg=="], + + "@vercel/functions": ["@vercel/functions@1.6.0", "", { "peerDependencies": { "@aws-sdk/credential-provider-web-identity": "*" }, "optionalPeers": ["@aws-sdk/credential-provider-web-identity"] }, "sha512-R6FKQrYT5MZs5IE1SqeCJWxMuBdHawFcCZboKKw8p7s+6/mcd55Gx6tWmyKnQTyrSEA04NH73Tc9CbqpEle8RA=="], + + "accepts": ["accepts@2.0.0", "", { "dependencies": { "mime-types": "^3.0.0", "negotiator": "^1.0.0" } }, "sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng=="], + + "acorn": ["acorn@8.17.0", "", { "bin": { "acorn": "bin/acorn" } }, "sha512-xRQbDb9BnwDafYNn6Vwl839DYVjqXYb1XVGtWAZ1kcDc6iwAL4hg3B1dZlRiuENFeO2H53gFG3in621AdERVAg=="], + + "ajv": ["ajv@8.20.0", "", { "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", "require-from-string": "^2.0.2" } }, "sha512-Thbli+OlOj+iMPYFBVBfJ3OmCAnaSyNn4M1vz9T6Gka5Jt9ba/HIR56joy65tY6kx/FCF5VXNB819Y7/GUrBGA=="], + + "ansi-regex": ["ansi-regex@5.0.1", "", {}, "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ=="], + + "argparse": ["argparse@2.0.1", "", {}, "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q=="], + + "astring": ["astring@1.9.0", "", { "bin": { "astring": "bin/astring" } }, "sha512-LElXdjswlqjWrPpJFg1Fx4wpkOCxj1TDHlSV4PlaRxHGWko024xICaa97ZkMfs6DRKlCguiAI+rbXv5GWwXIkg=="], + + "balanced-match": ["balanced-match@4.0.4", "", {}, "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA=="], + + "body-parser": ["body-parser@2.3.0", "", { "dependencies": { "bytes": "^3.1.2", "content-type": "^2.0.0", "debug": "^4.4.3", "http-errors": "^2.0.1", "iconv-lite": "^0.7.2", "on-finished": "^2.4.1", "qs": "^6.15.2", "raw-body": "^3.0.2", "type-is": "^2.1.0" } }, "sha512-2cGmJupaNgg+QUwVLAucDuWuoMZ6EX9iHDRswZ5lsNYEmwPaRknMPCLZz07yTzVq/83p4o/wzbDZbBrTvGGTIw=="], + + "brace-expansion": ["brace-expansion@5.0.6", "", { "dependencies": { "balanced-match": "^4.0.2" } }, "sha512-kLpxurY4Z4r9sgMsyG0Z9uzsBlgiU/EFKhj/h91/8yHu0edo7XuixOIH3VcJ8kkxs6/jPzoI6U9Vj3WqbMQ94g=="], + + "braintrust": ["braintrust@3.17.0", "", { "dependencies": { "@apm-js-collab/code-transformer": "^0.12.0", "@next/env": "^14.2.3", "@vercel/functions": "^1.0.2", "ajv": "^8.20.0", "argparse": "^2.0.1", "cli-progress": "^3.12.0", "cli-table3": "^0.6.5", "cors": "^2.8.5", "dc-browser": "^1.0.4", "dotenv": "^16.4.5", "esbuild": "0.28.0", "eventsource-parser": "^1.1.2", "express": "^5.2.1", "http-errors": "^2.0.0", "minimatch": "^10.2.5", "module-details-from-path": "^1.0.4", "mustache": "^4.2.0", "pluralize": "^8.0.0", "simple-git": "^3.36.0", "source-map": "^0.7.4", "termi-link": "^1.0.1", "unplugin": "^2.3.5", "uuid": "^11.1.1", "zod-to-json-schema": "^3.25.0" }, "optionalDependencies": { "@braintrust/bt-darwin-arm64": "0.11.1", "@braintrust/bt-darwin-x64": "0.11.1", "@braintrust/bt-linux-arm64": "0.11.1", "@braintrust/bt-linux-x64": "0.11.1", "@braintrust/bt-linux-x64-musl": "0.11.1", "@braintrust/bt-win32-arm64": "0.11.1", "@braintrust/bt-win32-x64": "0.11.1" }, "peerDependencies": { "zod": "^3.25.34 || ^4.0" }, "bin": { "braintrust": "dist/cli.js", "bt": "bin/bt" } }, "sha512-nyV+j/FJJJsWnkiSn9tAoNSTsMtDfbH4v8EQpBTYGj1120eXFPcPPs66kkkKcYuN0tEo/Ai7VO8Ujcy5j3SrUQ=="], + + "bun-types": ["bun-types@1.3.14", "", { "dependencies": { "@types/node": "*" } }, "sha512-4N0ig0fEomHt5R0KCFWjovxow98rIoRwKolrYdCcknNwMekCXRnWEUvgu5soYV8QXtVsrUD8B95MBOZGPvr6KQ=="], + + "bytes": ["bytes@3.1.2", "", {}, "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg=="], + + "call-bind-apply-helpers": ["call-bind-apply-helpers@1.0.2", "", { "dependencies": { "es-errors": "^1.3.0", "function-bind": "^1.1.2" } }, "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ=="], + + "call-bound": ["call-bound@1.0.4", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.2", "get-intrinsic": "^1.3.0" } }, "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg=="], + + "cli-progress": ["cli-progress@3.12.0", "", { "dependencies": { "string-width": "^4.2.3" } }, "sha512-tRkV3HJ1ASwm19THiiLIXLO7Im7wlTuKnvkYaTkyoAPefqjNg7W7DHKUlGRxy9vxDvbyCYQkQozvptuMkGCg8A=="], + + "cli-table3": ["cli-table3@0.6.5", "", { "dependencies": { "string-width": "^4.2.0" }, "optionalDependencies": { "@colors/colors": "1.5.0" } }, "sha512-+W/5efTR7y5HRD7gACw9yQjqMVvEMLBHmboM/kPWam+H+Hmyrgjh6YncVKK122YZkXrLudzTuAukUw9FnMf7IQ=="], + + "content-disposition": ["content-disposition@1.1.0", "", {}, "sha512-5jRCH9Z/+DRP7rkvY83B+yGIGX96OYdJmzngqnw2SBSxqCFPd0w2km3s5iawpGX8krnwSGmF0FW5Nhr0Hfai3g=="], + + "content-type": ["content-type@1.0.5", "", {}, "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA=="], + + "cookie": ["cookie@0.7.2", "", {}, "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w=="], + + "cookie-signature": ["cookie-signature@1.2.2", "", {}, "sha512-D76uU73ulSXrD1UXF4KE2TMxVVwhsnCgfAyTg9k8P6KGZjlXKrOLe4dJQKI3Bxi5wjesZoFXJWElNWBjPZMbhg=="], + + "cors": ["cors@2.8.6", "", { "dependencies": { "object-assign": "^4", "vary": "^1" } }, "sha512-tJtZBBHA6vjIAaF6EnIaq6laBBP9aq/Y3ouVJjEfoHbRBcHBAHYcMh/w8LDrk2PvIMMq8gmopa5D4V8RmbrxGw=="], + + "dc-browser": ["dc-browser@1.0.4", "", {}, "sha512-7oEtnzNlcE+hr4OvO3GR6Gndgw8BhW+wKOEwMqSleyY7N29jbAxzyW5BaJl7qBCw+6OIxfMWtY0T+6dxq8RWLw=="], + + "debug": ["debug@4.4.3", "", { "dependencies": { "ms": "^2.1.3" } }, "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA=="], + + "depd": ["depd@2.0.0", "", {}, "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw=="], + + "dotenv": ["dotenv@16.6.1", "", {}, "sha512-uBq4egWHTcTt33a72vpSG0z3HnPuIl6NqYcTrKEg2azoEyl2hpW0zqlxysq2pK9HlDIHyHyakeYaYnSAwd8bow=="], + + "dunder-proto": ["dunder-proto@1.0.1", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.1", "es-errors": "^1.3.0", "gopd": "^1.2.0" } }, "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A=="], + + "ee-first": ["ee-first@1.1.1", "", {}, "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow=="], + + "emoji-regex": ["emoji-regex@8.0.0", "", {}, "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="], + + "encodeurl": ["encodeurl@2.0.0", "", {}, "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg=="], + + "es-define-property": ["es-define-property@1.0.1", "", {}, "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g=="], + + "es-errors": ["es-errors@1.3.0", "", {}, "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw=="], + + "es-object-atoms": ["es-object-atoms@1.1.2", "", { "dependencies": { "es-errors": "^1.3.0" } }, "sha512-HWcBoN6NileqtSydK2FqHbS/LoDd2pqrnQHLyJzBj4kOp/ky2MWMN694xOfkK8/SnUsW2DH7EfyVlydKCsm1Zw=="], + + "esbuild": ["esbuild@0.28.0", "", { "optionalDependencies": { "@esbuild/aix-ppc64": "0.28.0", "@esbuild/android-arm": "0.28.0", "@esbuild/android-arm64": "0.28.0", "@esbuild/android-x64": "0.28.0", "@esbuild/darwin-arm64": "0.28.0", "@esbuild/darwin-x64": "0.28.0", "@esbuild/freebsd-arm64": "0.28.0", "@esbuild/freebsd-x64": "0.28.0", "@esbuild/linux-arm": "0.28.0", "@esbuild/linux-arm64": "0.28.0", "@esbuild/linux-ia32": "0.28.0", "@esbuild/linux-loong64": "0.28.0", "@esbuild/linux-mips64el": "0.28.0", "@esbuild/linux-ppc64": "0.28.0", "@esbuild/linux-riscv64": "0.28.0", "@esbuild/linux-s390x": "0.28.0", "@esbuild/linux-x64": "0.28.0", "@esbuild/netbsd-arm64": "0.28.0", "@esbuild/netbsd-x64": "0.28.0", "@esbuild/openbsd-arm64": "0.28.0", "@esbuild/openbsd-x64": "0.28.0", "@esbuild/openharmony-arm64": "0.28.0", "@esbuild/sunos-x64": "0.28.0", "@esbuild/win32-arm64": "0.28.0", "@esbuild/win32-ia32": "0.28.0", "@esbuild/win32-x64": "0.28.0" }, "bin": { "esbuild": "bin/esbuild" } }, "sha512-sNR9MHpXSUV/XB4zmsFKN+QgVG82Cc7+/aaxJ8Adi8hyOac+EXptIp45QBPaVyX3N70664wRbTcLTOemCAnyqw=="], + + "escape-html": ["escape-html@1.0.3", "", {}, "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow=="], + + "esquery": ["esquery@1.7.0", "", { "dependencies": { "estraverse": "^5.1.0" } }, "sha512-Ap6G0WQwcU/LHsvLwON1fAQX9Zp0A2Y6Y/cJBl9r/JbW90Zyg4/zbG6zzKa2OTALELarYHmKu0GhpM5EO+7T0g=="], + + "estraverse": ["estraverse@5.3.0", "", {}, "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA=="], + + "etag": ["etag@1.8.1", "", {}, "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg=="], + + "eventsource-parser": ["eventsource-parser@1.1.2", "", {}, "sha512-v0eOBUbiaFojBu2s2NPBfYUoRR9GjcDNvCXVaqEf5vVfpIAh9f8RCo4vXTP8c63QRKCFwoLpMpTdPwwhEKVgzA=="], + + "express": ["express@5.2.1", "", { "dependencies": { "accepts": "^2.0.0", "body-parser": "^2.2.1", "content-disposition": "^1.0.0", "content-type": "^1.0.5", "cookie": "^0.7.1", "cookie-signature": "^1.2.1", "debug": "^4.4.0", "depd": "^2.0.0", "encodeurl": "^2.0.0", "escape-html": "^1.0.3", "etag": "^1.8.1", "finalhandler": "^2.1.0", "fresh": "^2.0.0", "http-errors": "^2.0.0", "merge-descriptors": "^2.0.0", "mime-types": "^3.0.0", "on-finished": "^2.4.1", "once": "^1.4.0", "parseurl": "^1.3.3", "proxy-addr": "^2.0.7", "qs": "^6.14.0", "range-parser": "^1.2.1", "router": "^2.2.0", "send": "^1.1.0", "serve-static": "^2.2.0", "statuses": "^2.0.1", "type-is": "^2.0.1", "vary": "^1.1.2" } }, "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw=="], + + "fast-deep-equal": ["fast-deep-equal@3.1.3", "", {}, "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q=="], + + "fast-uri": ["fast-uri@3.1.2", "", {}, "sha512-rVjf7ArG3LTk+FS6Yw81V1DLuZl1bRbNrev6Tmd/9RaroeeRRJhAt7jg/6YFxbvAQXUCavSoZhPPj6oOx+5KjQ=="], + + "finalhandler": ["finalhandler@2.1.1", "", { "dependencies": { "debug": "^4.4.0", "encodeurl": "^2.0.0", "escape-html": "^1.0.3", "on-finished": "^2.4.1", "parseurl": "^1.3.3", "statuses": "^2.0.1" } }, "sha512-S8KoZgRZN+a5rNwqTxlZZePjT/4cnm0ROV70LedRHZ0p8u9fRID0hJUZQpkKLzro8LfmC8sx23bY6tVNxv8pQA=="], + + "forwarded": ["forwarded@0.2.0", "", {}, "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow=="], + + "fresh": ["fresh@2.0.0", "", {}, "sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A=="], + + "function-bind": ["function-bind@1.1.2", "", {}, "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA=="], + + "get-intrinsic": ["get-intrinsic@1.3.0", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.2", "es-define-property": "^1.0.1", "es-errors": "^1.3.0", "es-object-atoms": "^1.1.1", "function-bind": "^1.1.2", "get-proto": "^1.0.1", "gopd": "^1.2.0", "has-symbols": "^1.1.0", "hasown": "^2.0.2", "math-intrinsics": "^1.1.0" } }, "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ=="], + + "get-proto": ["get-proto@1.0.1", "", { "dependencies": { "dunder-proto": "^1.0.1", "es-object-atoms": "^1.0.0" } }, "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g=="], + + "gopd": ["gopd@1.2.0", "", {}, "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg=="], + + "has-symbols": ["has-symbols@1.1.0", "", {}, "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ=="], + + "hasown": ["hasown@2.0.4", "", { "dependencies": { "function-bind": "^1.1.2" } }, "sha512-T2UbfbBEF32wiepXIsMlTW9+dDYC6wMh/t/vYA4tuOMKqWz/n3vr1NFSxQiyP+zk2mXsoMA/i/7qV6LKut1t1A=="], + + "http-errors": ["http-errors@2.0.1", "", { "dependencies": { "depd": "~2.0.0", "inherits": "~2.0.4", "setprototypeof": "~1.2.0", "statuses": "~2.0.2", "toidentifier": "~1.0.1" } }, "sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ=="], + + "iconv-lite": ["iconv-lite@0.7.2", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw=="], + + "inherits": ["inherits@2.0.4", "", {}, "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="], + + "ipaddr.js": ["ipaddr.js@1.9.1", "", {}, "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g=="], + + "is-fullwidth-code-point": ["is-fullwidth-code-point@3.0.0", "", {}, "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg=="], + + "is-promise": ["is-promise@4.0.0", "", {}, "sha512-hvpoI6korhJMnej285dSg6nu1+e6uxs7zG3BYAm5byqDsgJNWwxzM6z6iZiAgQR4TJ30JmBTOwqZUw3WlyH3AQ=="], + + "json-schema-traverse": ["json-schema-traverse@1.0.0", "", {}, "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="], + + "math-intrinsics": ["math-intrinsics@1.1.0", "", {}, "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g=="], + + "media-typer": ["media-typer@1.1.0", "", {}, "sha512-aisnrDP4GNe06UcKFnV5bfMNPBUw4jsLGaWwWfnH3v02GnBuXX2MCVn5RbrWo0j3pczUilYblq7fQ7Nw2t5XKw=="], + + "merge-descriptors": ["merge-descriptors@2.0.0", "", {}, "sha512-Snk314V5ayFLhp3fkUREub6WtjBfPdCPY1Ln8/8munuLuiYhsABgBVWsozAG+MWMbVEvcdcpbi9R7ww22l9Q3g=="], + + "meriyah": ["meriyah@6.1.4", "", {}, "sha512-Sz8FzjzI0kN13GK/6MVEsVzMZEPvOhnmmI1lU5+/1cGOiK3QUahntrNNtdVeihrO7t9JpoH75iMNXg6R6uWflQ=="], + + "mime-db": ["mime-db@1.54.0", "", {}, "sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ=="], + + "mime-types": ["mime-types@3.0.2", "", { "dependencies": { "mime-db": "^1.54.0" } }, "sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A=="], + + "minimatch": ["minimatch@10.2.5", "", { "dependencies": { "brace-expansion": "^5.0.5" } }, "sha512-MULkVLfKGYDFYejP07QOurDLLQpcjk7Fw+7jXS2R2czRQzR56yHRveU5NDJEOviH+hETZKSkIk5c+T23GjFUMg=="], + + "module-details-from-path": ["module-details-from-path@1.0.4", "", {}, "sha512-EGWKgxALGMgzvxYF1UyGTy0HXX/2vHLkw6+NvDKW2jypWbHpjQuj4UMcqQWXHERJhVGKikolT06G3bcKe4fi7w=="], + + "ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="], + + "mustache": ["mustache@4.2.0", "", { "bin": { "mustache": "bin/mustache" } }, "sha512-71ippSywq5Yb7/tVYyGbkBggbU8H3u5Rz56fH60jGFgr8uHwxs+aSKeqmluIVzM0m0kB7xQjKS6qPfd0b2ZoqQ=="], + + "negotiator": ["negotiator@1.0.0", "", {}, "sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg=="], + + "object-assign": ["object-assign@4.1.1", "", {}, "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg=="], + + "object-inspect": ["object-inspect@1.13.4", "", {}, "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew=="], + + "on-finished": ["on-finished@2.4.1", "", { "dependencies": { "ee-first": "1.1.1" } }, "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg=="], + + "once": ["once@1.4.0", "", { "dependencies": { "wrappy": "1" } }, "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w=="], + + "parseurl": ["parseurl@1.3.3", "", {}, "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ=="], + + "path-to-regexp": ["path-to-regexp@8.4.2", "", {}, "sha512-qRcuIdP69NPm4qbACK+aDogI5CBDMi1jKe0ry5rSQJz8JVLsC7jV8XpiJjGRLLol3N+R5ihGYcrPLTno6pAdBA=="], + + "picomatch": ["picomatch@4.0.4", "", {}, "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A=="], + + "pluralize": ["pluralize@8.0.0", "", {}, "sha512-Nc3IT5yHzflTfbjgqWcCPpo7DaKy4FnpB0l/zCAW0Tc7jxAiuqSxHasntB3D7887LSrA93kDJ9IXovxJYxyLCA=="], + + "proxy-addr": ["proxy-addr@2.0.7", "", { "dependencies": { "forwarded": "0.2.0", "ipaddr.js": "1.9.1" } }, "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg=="], + + "qs": ["qs@6.15.2", "", { "dependencies": { "side-channel": "^1.1.0" } }, "sha512-Rzq0KEyX/w/tEybncDgdkZrJgVUsUMk3xjh3t5bv3S1HTAtg+uOYt72+ZfwiQwKdysThkTBdL/rTi6HDmX9Ddw=="], + + "range-parser": ["range-parser@1.2.1", "", {}, "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg=="], + + "raw-body": ["raw-body@3.0.2", "", { "dependencies": { "bytes": "~3.1.2", "http-errors": "~2.0.1", "iconv-lite": "~0.7.0", "unpipe": "~1.0.0" } }, "sha512-K5zQjDllxWkf7Z5xJdV0/B0WTNqx6vxG70zJE4N0kBs4LovmEYWJzQGxC9bS9RAKu3bgM40lrd5zoLJ12MQ5BA=="], + + "require-from-string": ["require-from-string@2.0.2", "", {}, "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw=="], + + "router": ["router@2.2.0", "", { "dependencies": { "debug": "^4.4.0", "depd": "^2.0.0", "is-promise": "^4.0.0", "parseurl": "^1.3.3", "path-to-regexp": "^8.0.0" } }, "sha512-nLTrUKm2UyiL7rlhapu/Zl45FwNgkZGaCpZbIHajDYgwlJCOzLSk+cIPAnsEqV955GjILJnKbdQC1nVPz+gAYQ=="], + + "safer-buffer": ["safer-buffer@2.1.2", "", {}, "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="], + + "semifies": ["semifies@1.0.0", "", {}, "sha512-xXR3KGeoxTNWPD4aBvL5NUpMTT7WMANr3EWnaS190QVkY52lqqcVRD7Q05UVbBhiWDGWMlJEUam9m7uFFGVScw=="], + + "send": ["send@1.2.1", "", { "dependencies": { "debug": "^4.4.3", "encodeurl": "^2.0.0", "escape-html": "^1.0.3", "etag": "^1.8.1", "fresh": "^2.0.0", "http-errors": "^2.0.1", "mime-types": "^3.0.2", "ms": "^2.1.3", "on-finished": "^2.4.1", "range-parser": "^1.2.1", "statuses": "^2.0.2" } }, "sha512-1gnZf7DFcoIcajTjTwjwuDjzuz4PPcY2StKPlsGAQ1+YH20IRVrBaXSWmdjowTJ6u8Rc01PoYOGHXfP1mYcZNQ=="], + + "serve-static": ["serve-static@2.2.1", "", { "dependencies": { "encodeurl": "^2.0.0", "escape-html": "^1.0.3", "parseurl": "^1.3.3", "send": "^1.2.0" } }, "sha512-xRXBn0pPqQTVQiC8wyQrKs2MOlX24zQ0POGaj0kultvoOCstBQM5yvOhAVSUwOMjQtTvsPWoNCHfPGwaaQJhTw=="], + + "setprototypeof": ["setprototypeof@1.2.0", "", {}, "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw=="], + + "side-channel": ["side-channel@1.1.1", "", { "dependencies": { "es-errors": "^1.3.0", "object-inspect": "^1.13.4", "side-channel-list": "^1.0.1", "side-channel-map": "^1.0.1", "side-channel-weakmap": "^1.0.2" } }, "sha512-6x6dK6zJdpTzF4sQeNYxwtvBzf6Eg4GtlesS94HOvTudUeyK2WXAaIfmDgsyslYrRBeFIlsi54AYsFGUuhmvrQ=="], + + "side-channel-list": ["side-channel-list@1.0.1", "", { "dependencies": { "es-errors": "^1.3.0", "object-inspect": "^1.13.4" } }, "sha512-mjn/0bi/oUURjc5Xl7IaWi/OJJJumuoJFQJfDDyO46+hBWsfaVM65TBHq2eoZBhzl9EchxOijpkbRC8SVBQU0w=="], + + "side-channel-map": ["side-channel-map@1.0.1", "", { "dependencies": { "call-bound": "^1.0.2", "es-errors": "^1.3.0", "get-intrinsic": "^1.2.5", "object-inspect": "^1.13.3" } }, "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA=="], + + "side-channel-weakmap": ["side-channel-weakmap@1.0.2", "", { "dependencies": { "call-bound": "^1.0.2", "es-errors": "^1.3.0", "get-intrinsic": "^1.2.5", "object-inspect": "^1.13.3", "side-channel-map": "^1.0.1" } }, "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A=="], + + "simple-git": ["simple-git@3.36.0", "", { "dependencies": { "@kwsites/file-exists": "^1.1.1", "@kwsites/promise-deferred": "^1.1.1", "@simple-git/args-pathspec": "^1.0.3", "@simple-git/argv-parser": "^1.1.0", "debug": "^4.4.0" } }, "sha512-cGQjLjK8bxJw4QuYT7gxHw3/IouVESbhahSsHrX97MzCL1gu2u7oy38W6L2ZIGECEfIBG4BabsWDPjBxJENv9Q=="], + + "source-map": ["source-map@0.7.6", "", {}, "sha512-i5uvt8C3ikiWeNZSVZNWcfZPItFQOsYTUAOkcUPGd8DqDy1uOUikjt5dG+uRlwyvR108Fb9DOd4GvXfT0N2/uQ=="], + + "statuses": ["statuses@2.0.2", "", {}, "sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw=="], + + "string-width": ["string-width@4.2.3", "", { "dependencies": { "emoji-regex": "^8.0.0", "is-fullwidth-code-point": "^3.0.0", "strip-ansi": "^6.0.1" } }, "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g=="], + + "strip-ansi": ["strip-ansi@6.0.1", "", { "dependencies": { "ansi-regex": "^5.0.1" } }, "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A=="], + + "termi-link": ["termi-link@1.1.0", "", {}, "sha512-2qSN6TnomHgVLtk+htSWbaYs4Rd2MH/RU7VpHTy6MBstyNyWbM4yKd1DCYpE3fDg8dmGWojXCngNi/MHCzGuAA=="], + + "toidentifier": ["toidentifier@1.0.1", "", {}, "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA=="], + + "type-is": ["type-is@2.1.0", "", { "dependencies": { "content-type": "^2.0.0", "media-typer": "^1.1.0", "mime-types": "^3.0.0" } }, "sha512-faYHw0anBbc/kWF3zFTEnxSFOAGUX9GFbOBthvDdLsIlEoWOFOtS0zgCiQYwIskL9iGXZL3kAXD8OoZ4GmMATA=="], + + "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="], + + "undici-types": ["undici-types@7.24.6", "", {}, "sha512-WRNW+sJgj5OBN4/0JpHFqtqzhpbnV0GuB+OozA9gCL7a993SmU+1JBZCzLNxYsbMfIeDL+lTsphD5jN5N+n0zg=="], + + "unpipe": ["unpipe@1.0.0", "", {}, "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ=="], + + "unplugin": ["unplugin@2.3.11", "", { "dependencies": { "@jridgewell/remapping": "^2.3.5", "acorn": "^8.15.0", "picomatch": "^4.0.3", "webpack-virtual-modules": "^0.6.2" } }, "sha512-5uKD0nqiYVzlmCRs01Fhs2BdkEgBS3SAVP6ndrBsuK42iC2+JHyxM05Rm9G8+5mkmRtzMZGY8Ct5+mliZxU/Ww=="], + + "uuid": ["uuid@11.1.1", "", { "bin": { "uuid": "dist/esm/bin/uuid" } }, "sha512-vIYxrBCC/N/K+Js3qSN88go7kIfNPssr/hHCesKCQNAjmgvYS2oqr69kIufEG+O4+PfezOH4EbIeHCfFov8ZgQ=="], + + "vary": ["vary@1.1.2", "", {}, "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg=="], + + "webpack-virtual-modules": ["webpack-virtual-modules@0.6.2", "", {}, "sha512-66/V2i5hQanC51vBQKPH4aI8NMAcBW59FVBs+rC7eGHupMyfn34q7rZIE+ETlJ+XTevqfUhVVBgSUNSW2flEUQ=="], + + "wrappy": ["wrappy@1.0.2", "", {}, "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ=="], + + "zod": ["zod@4.4.3", "", {}, "sha512-ytENFjIJFl2UwYglde2jchW2Hwm4GJFLDiSXWdTrJQBIN9Fcyp7n4DhxJEiWNAJMV1/BqWfW/kkg71UDcHJyTQ=="], + + "zod-to-json-schema": ["zod-to-json-schema@3.25.2", "", { "peerDependencies": { "zod": "^3.25.28 || ^4" } }, "sha512-O/PgfnpT1xKSDeQYSCfRI5Gy3hPf91mKVDuYLUHZJMiDFptvP41MSnWofm8dnCm0256ZNfZIM7DSzuSMAFnjHA=="], + + "@apm-js-collab/code-transformer/source-map": ["source-map@0.6.1", "", {}, "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g=="], + + "body-parser/content-type": ["content-type@2.0.0", "", {}, "sha512-j/O/d7GcZCyNl7/hwZAb606rzqkyvaDctLmckbxLzHvFBzTJHuGEdodATcP3yIRoDrLHkIATJuvzbFlp/ki2cQ=="], + + "type-is/content-type": ["content-type@2.0.0", "", {}, "sha512-j/O/d7GcZCyNl7/hwZAb606rzqkyvaDctLmckbxLzHvFBzTJHuGEdodATcP3yIRoDrLHkIATJuvzbFlp/ki2cQ=="], + } +} diff --git a/plugins/trace-codex/config.json.example b/plugins/trace-codex/config.json.example new file mode 100644 index 0000000..8f05530 --- /dev/null +++ b/plugins/trace-codex/config.json.example @@ -0,0 +1,13 @@ +{ + "_comment": "Copy this file to your plugin data dir as config.json (e.g. ~/.codex/plugins/data/trace-codex-/config.json) and fill in your values. All keys are optional. Environment variables override these values. See README.md for details.", + "traceToBraintrust": true, + "apiKey": "sk-...", + "apiUrl": "https://api.braintrust.dev", + "appUrl": "https://www.braintrust.dev", + "project": "my-codex-project", + "additionalMetadata": { "team": "platform", "env": "dev" }, + "recordFile": "", + "port": 52734, + "idleTimeoutMs": 300000, + "idleCheckIntervalMs": 30000 +} diff --git a/plugins/trace-codex/hooks/hooks.json b/plugins/trace-codex/hooks/hooks.json new file mode 100644 index 0000000..938afdf --- /dev/null +++ b/plugins/trace-codex/hooks/hooks.json @@ -0,0 +1,124 @@ +{ + "hooks": { + "SessionStart": [ + { + "hooks": [ + { + "type": "command", + "command": "sh \"${PLUGIN_ROOT}/bin/codex-hook.sh\" hook", + "commandWindows": "cmd /c \"${PLUGIN_ROOT}\\bin\\codex-hook.cmd\" hook", + "statusMessage": "Braintrust tracing" + } + ] + } + ], + "UserPromptSubmit": [ + { + "hooks": [ + { + "type": "command", + "command": "sh \"${PLUGIN_ROOT}/bin/codex-hook.sh\" hook", + "commandWindows": "cmd /c \"${PLUGIN_ROOT}\\bin\\codex-hook.cmd\" hook", + "statusMessage": "Braintrust tracing" + } + ] + } + ], + "PreToolUse": [ + { + "hooks": [ + { + "type": "command", + "command": "sh \"${PLUGIN_ROOT}/bin/codex-hook.sh\" hook", + "commandWindows": "cmd /c \"${PLUGIN_ROOT}\\bin\\codex-hook.cmd\" hook", + "statusMessage": "Braintrust tracing" + } + ] + } + ], + "PermissionRequest": [ + { + "hooks": [ + { + "type": "command", + "command": "sh \"${PLUGIN_ROOT}/bin/codex-hook.sh\" hook", + "commandWindows": "cmd /c \"${PLUGIN_ROOT}\\bin\\codex-hook.cmd\" hook", + "statusMessage": "Braintrust tracing" + } + ] + } + ], + "PostToolUse": [ + { + "hooks": [ + { + "type": "command", + "command": "sh \"${PLUGIN_ROOT}/bin/codex-hook.sh\" hook", + "commandWindows": "cmd /c \"${PLUGIN_ROOT}\\bin\\codex-hook.cmd\" hook", + "statusMessage": "Braintrust tracing" + } + ] + } + ], + "PreCompact": [ + { + "hooks": [ + { + "type": "command", + "command": "sh \"${PLUGIN_ROOT}/bin/codex-hook.sh\" hook", + "commandWindows": "cmd /c \"${PLUGIN_ROOT}\\bin\\codex-hook.cmd\" hook", + "statusMessage": "Braintrust tracing" + } + ] + } + ], + "PostCompact": [ + { + "hooks": [ + { + "type": "command", + "command": "sh \"${PLUGIN_ROOT}/bin/codex-hook.sh\" hook", + "commandWindows": "cmd /c \"${PLUGIN_ROOT}\\bin\\codex-hook.cmd\" hook", + "statusMessage": "Braintrust tracing" + } + ] + } + ], + "SubagentStart": [ + { + "hooks": [ + { + "type": "command", + "command": "sh \"${PLUGIN_ROOT}/bin/codex-hook.sh\" hook", + "commandWindows": "cmd /c \"${PLUGIN_ROOT}\\bin\\codex-hook.cmd\" hook", + "statusMessage": "Braintrust tracing" + } + ] + } + ], + "SubagentStop": [ + { + "hooks": [ + { + "type": "command", + "command": "sh \"${PLUGIN_ROOT}/bin/codex-hook.sh\" hook", + "commandWindows": "cmd /c \"${PLUGIN_ROOT}\\bin\\codex-hook.cmd\" hook", + "statusMessage": "Braintrust tracing" + } + ] + } + ], + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "sh \"${PLUGIN_ROOT}/bin/codex-hook.sh\" hook", + "commandWindows": "cmd /c \"${PLUGIN_ROOT}\\bin\\codex-hook.cmd\" hook", + "statusMessage": "Braintrust tracing" + } + ] + } + ] + } +} diff --git a/plugins/trace-codex/package.json b/plugins/trace-codex/package.json new file mode 100644 index 0000000..656a1bd --- /dev/null +++ b/plugins/trace-codex/package.json @@ -0,0 +1,34 @@ +{ + "name": "trace-codex", + "version": "0.1.0", + "private": true, + "description": "Background event server and hook client for tracing Codex sessions to Braintrust.", + "type": "module", + "scripts": { + "build": "bun run scripts/build.ts", + "dev": "bun run --watch src/index.ts serve", + "test": "bun test", + "typecheck": "tsc --noEmit", + "lint": "biome lint src scripts", + "lint:fix": "biome lint --write src scripts", + "format": "biome format --write src scripts", + "check": "biome check src scripts" + }, + "keywords": [ + "braintrust", + "tracing", + "observability", + "codex", + "hooks" + ], + "author": "Braintrust", + "license": "MIT", + "devDependencies": { + "@biomejs/biome": "^2.3.11", + "@types/bun": "^1.1.14", + "typescript": "^5.7.2" + }, + "dependencies": { + "braintrust": "^3.17.0" + } +} diff --git a/plugins/trace-codex/scripts/build.ts b/plugins/trace-codex/scripts/build.ts new file mode 100644 index 0000000..8bcac5e --- /dev/null +++ b/plugins/trace-codex/scripts/build.ts @@ -0,0 +1,79 @@ +// Compile the `codex-hook` binary. +// +// The Codex hook command invokes a single, fixed-name binary: +// ${PLUGIN_ROOT}/bin/codex-hook +// (no shell, no uname, no per-platform command string). So the build always +// produces that fixed-name binary for the HOST platform. +// +// For distribution we additionally emit per-platform named binaries +// (codex-hook--). A future prebuilt-distribution flow can ship all of +// them and select the right one at install time; for build-on-install we only +// need the host binary. +// +// Supported targets: macOS (arm64, x64) and Linux (x64, arm64). Windows is +// not yet built but slots in cleanly (bun-windows-x64 -> codex-hook.exe, +// referenced via a `commandWindows` entry in hooks.json). + +import { copyFileSync, mkdirSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; +import { $ } from "bun"; + +const ROOT = join(dirname(fileURLToPath(import.meta.url)), ".."); +const ENTRY = join(ROOT, "src/index.ts"); +const OUT_DIR = join(ROOT, "bin"); + +/** Fixed name the Codex hook command invokes. */ +const HOST_BINARY_NAME = "codex-hook"; + +interface Target { + /** Bun --target value. */ + bunTarget: string; + /** Output suffix: -. */ + suffix: string; +} + +const TARGETS: Target[] = [ + { bunTarget: "bun-darwin-arm64", suffix: "darwin-arm64" }, + { bunTarget: "bun-darwin-x64", suffix: "darwin-x64" }, + { bunTarget: "bun-linux-x64", suffix: "linux-x64" }, + { bunTarget: "bun-linux-arm64", suffix: "linux-arm64" }, + // Future: { bunTarget: "bun-windows-x64", suffix: "windows-x64.exe" }, +]; + +function hostTargetSuffix(): string { + const os = process.platform === "darwin" ? "darwin" : "linux"; + const arch = process.arch === "arm64" ? "arm64" : "x64"; + return `${os}-${arch}`; +} + +async function compile(bunTarget: string, outfile: string): Promise { + process.stderr.write(`building ${bunTarget} -> ${outfile}\n`); + await $`bun build ${ENTRY} --compile --target=${bunTarget} --outfile ${outfile}`.quiet(); +} + +async function main(): Promise { + mkdirSync(OUT_DIR, { recursive: true }); + + const hostSuffix = hostTargetSuffix(); + const hostOnly = process.env.BUILD_HOST_ONLY === "1"; + const targets = hostOnly ? TARGETS.filter((t) => t.suffix === hostSuffix) : TARGETS; + + for (const target of targets) { + const outfile = join(OUT_DIR, `codex-hook-${target.suffix}`); + await compile(target.bunTarget, outfile); + } + + // Always provide the fixed-name host binary the hook command points at. + const hostNamed = join(OUT_DIR, `codex-hook-${hostSuffix}`); + const hostFixed = join(OUT_DIR, HOST_BINARY_NAME); + copyFileSync(hostNamed, hostFixed); + process.stderr.write(`host binary: ${hostFixed}\n`); + + process.stderr.write(`done: built ${targets.length} target(s)\n`); +} + +main().catch((err) => { + process.stderr.write(`build failed: ${err}\n`); + process.exit(1); +}); diff --git a/plugins/trace-codex/src/agents/codex/event-builder.test.ts b/plugins/trace-codex/src/agents/codex/event-builder.test.ts new file mode 100644 index 0000000..1b8fb8b --- /dev/null +++ b/plugins/trace-codex/src/agents/codex/event-builder.test.ts @@ -0,0 +1,142 @@ +import { describe, expect, test } from "bun:test"; +import { + buildConfigEvent, + buildEnqueueEvents, + CODEX_CONFIG_EVENT, + resolveReportingConfig, +} from "./event-builder.ts"; + +describe("buildEnqueueEvents", () => { + test("a non-SessionStart event yields a single event", () => { + const events = buildEnqueueEvents( + '{"hook_event_name":"UserPromptSubmit","session_id":"sess-123","x":1}', + {}, + ); + expect(events.length).toBe(1); + const e = events[0]; + expect(e.queueId).toBe("sess-123"); + expect(e.eventSource).toBe("codex-hook"); + expect(e.eventName).toBe("UserPromptSubmit"); + expect(e.eventData).toEqual({ + hook_event_name: "UserPromptSubmit", + session_id: "sess-123", + x: 1, + }); + }); + + test("SessionStart yields a leading config event with the same queueId", () => { + const events = buildEnqueueEvents('{"hook_event_name":"SessionStart","session_id":"s1"}', { + BRAINTRUST_PROJECT: "proj", + }); + expect(events.length).toBe(2); + const [config, start] = events; + expect(config.eventName).toBe(CODEX_CONFIG_EVENT); + expect(config.queueId).toBe("s1"); + expect((config.eventData as { project?: string }).project).toBe("proj"); + expect(start.eventName).toBe("SessionStart"); + expect(start.queueId).toBe("s1"); + }); + + test("null queueId when session_id is missing", () => { + const events = buildEnqueueEvents('{"hook_event_name":"Stop"}', {}); + expect(events.length).toBe(1); + expect(events[0].queueId).toBeNull(); + expect(events[0].eventName).toBe("Stop"); + }); + + test("null queueId when session_id is empty", () => { + const [e] = buildEnqueueEvents('{"hook_event_name":"Stop","session_id":""}', {}); + expect(e.queueId).toBeNull(); + }); + + test("forwards raw text and null queueId on non-JSON stdin", () => { + const [e] = buildEnqueueEvents("not json at all", {}); + expect(e.queueId).toBeNull(); + expect(e.eventName).toBe("unknown"); + expect(e.eventData).toEqual({ raw: "not json at all" }); + }); + + test("uses CODEX_VERSION env when present", () => { + const [e] = buildEnqueueEvents('{"hook_event_name":"Stop","session_id":"s"}', { + CODEX_VERSION: "1.2.3", + }); + expect(e.eventSourceVersion).toBe("1.2.3"); + }); + + test("null version when env missing", () => { + const [e] = buildEnqueueEvents('{"hook_event_name":"Stop","session_id":"s"}', {}); + expect(e.eventSourceVersion).toBeNull(); + }); + + test("defaults event name to unknown when hook_event_name missing", () => { + const [e] = buildEnqueueEvents('{"session_id":"abc"}', {}); + expect(e.eventName).toBe("unknown"); + }); +}); + +describe("resolveReportingConfig", () => { + test("reads project/key/urls from env", () => { + expect( + resolveReportingConfig({ + BRAINTRUST_PROJECT: "p", + BRAINTRUST_API_KEY: "sk-1", + BRAINTRUST_API_URL: "https://api", + BRAINTRUST_APP_URL: "https://app", + }), + ).toEqual({ + project: "p", + apiKey: "sk-1", + apiUrl: "https://api", + appUrl: "https://app", + traceToBraintrust: false, + }); + }); + + test("falls back to BRAINTRUST_DEFAULT_PROJECT for project", () => { + expect(resolveReportingConfig({ BRAINTRUST_DEFAULT_PROJECT: "dp" })).toEqual({ + project: "dp", + traceToBraintrust: false, + }); + }); + + test("empty env yields traceToBraintrust:false and nothing else", () => { + expect(resolveReportingConfig({})).toEqual({ traceToBraintrust: false }); + }); + + test("TRACE_TO_BRAINTRUST true/1 enables tracing; other values disable", () => { + expect(resolveReportingConfig({ TRACE_TO_BRAINTRUST: "true" }).traceToBraintrust).toBe(true); + expect(resolveReportingConfig({ TRACE_TO_BRAINTRUST: "1" }).traceToBraintrust).toBe(true); + expect(resolveReportingConfig({ TRACE_TO_BRAINTRUST: "TRUE" }).traceToBraintrust).toBe(true); + expect(resolveReportingConfig({ TRACE_TO_BRAINTRUST: "false" }).traceToBraintrust).toBe(false); + expect(resolveReportingConfig({ TRACE_TO_BRAINTRUST: "no" }).traceToBraintrust).toBe(false); + }); + + test("parses BRAINTRUST_ADDITIONAL_METADATA JSON object", () => { + expect( + resolveReportingConfig({ BRAINTRUST_ADDITIONAL_METADATA: '{"team":"platform","n":1}' }) + .additionalMetadata, + ).toEqual({ team: "platform", n: 1 }); + }); + + test("ignores malformed or non-object additional metadata", () => { + expect( + resolveReportingConfig({ BRAINTRUST_ADDITIONAL_METADATA: "not json" }).additionalMetadata, + ).toBeUndefined(); + expect( + resolveReportingConfig({ BRAINTRUST_ADDITIONAL_METADATA: "[1,2]" }).additionalMetadata, + ).toBeUndefined(); + }); +}); + +describe("buildConfigEvent", () => { + test("carries the reporting config (including apiKey) on the wire", () => { + const e = buildConfigEvent("s1", { + BRAINTRUST_PROJECT: "p", + BRAINTRUST_API_KEY: "sk-secret", + TRACE_TO_BRAINTRUST: "true", + }); + expect(e.eventName).toBe(CODEX_CONFIG_EVENT); + expect(e.queueId).toBe("s1"); + expect(e.eventData).toEqual({ project: "p", apiKey: "sk-secret", traceToBraintrust: true }); + }); +}); diff --git a/plugins/trace-codex/src/agents/codex/event-builder.ts b/plugins/trace-codex/src/agents/codex/event-builder.ts new file mode 100644 index 0000000..5b6042a --- /dev/null +++ b/plugins/trace-codex/src/agents/codex/event-builder.ts @@ -0,0 +1,113 @@ +// Translates a raw Codex hook payload (the JSON Codex writes to the hook's +// stdin) into one or more generic EnqueueEvents. This is the Codex-specific half +// of the client; the generic run loop (ensure server, POST) lives in src/client. + +import type { ReportingConfig } from "../../braintrust/logger.ts"; +import type { EnqueueEvent } from "../../server/routes.ts"; + +/** Identifies events originating from the Codex hook. */ +export const CODEX_EVENT_SOURCE = "codex-hook"; + +/** + * Internal, synthetic event emitted (with the session's queueId) ahead of a + * SessionStart. It carries the resolved Braintrust reporting config so the + * per-session processor can build its own logger before opening any spans. + * Prefixed to avoid clashing with real Codex hook_event_name values. + */ +export const CODEX_CONFIG_EVENT = "__braintrust_config"; + +/** Parse a boolean env var: true only for "true"/"1" (case-insensitive). */ +function parseBoolEnv(value: string | undefined): boolean { + if (!value) return false; + const v = value.trim().toLowerCase(); + return v === "true" || v === "1"; +} + +/** Resolve the reporting config from the (already settings-applied) environment. */ +export function resolveReportingConfig(env: NodeJS.ProcessEnv = process.env): ReportingConfig { + const config: ReportingConfig = {}; + if (env.BRAINTRUST_PROJECT) config.project = env.BRAINTRUST_PROJECT; + else if (env.BRAINTRUST_DEFAULT_PROJECT) config.project = env.BRAINTRUST_DEFAULT_PROJECT; + if (env.BRAINTRUST_API_KEY) config.apiKey = env.BRAINTRUST_API_KEY; + if (env.BRAINTRUST_API_URL) config.apiUrl = env.BRAINTRUST_API_URL; + if (env.BRAINTRUST_APP_URL) config.appUrl = env.BRAINTRUST_APP_URL; + + // Master switch: off unless explicitly enabled. + config.traceToBraintrust = parseBoolEnv(env.TRACE_TO_BRAINTRUST); + + // Additional metadata: a JSON object. Ignore anything that isn't one. + const rawMeta = env.BRAINTRUST_ADDITIONAL_METADATA; + if (rawMeta) { + try { + const parsed = JSON.parse(rawMeta); + if (typeof parsed === "object" && parsed !== null && !Array.isArray(parsed)) { + config.additionalMetadata = parsed as Record; + } + } catch { + // Malformed: ignore rather than break the session. + } + } + return config; +} + +/** Build the synthetic config event for a session. */ +export function buildConfigEvent( + queueId: string | null, + env: NodeJS.ProcessEnv = process.env, +): EnqueueEvent { + return { + queueId, + eventSource: CODEX_EVENT_SOURCE, + eventSourceVersion: env.CODEX_VERSION ?? null, + eventName: CODEX_CONFIG_EVENT, + eventData: resolveReportingConfig(env), + }; +} + +/** + * Build the enqueue payload(s) from the raw Codex hook event JSON. + * + * Returns an array: normally a single event, but on SessionStart a leading + * config event (same queueId) is prepended so the processor configures its + * reporting before opening the root span. + * + * The queue is keyed by the Codex session id. We always forward the event so it + * lands in the background queue; if there is no usable `session_id`, `queueId` + * is left null and the server-side consumer logs a warning. + */ +export function buildEnqueueEvents( + rawStdin: string, + env: NodeJS.ProcessEnv = process.env, +): EnqueueEvent[] { + let eventData: unknown = null; + let eventName = "unknown"; + let queueId: string | null = null; + try { + const parsed = JSON.parse(rawStdin) as Record; + eventData = parsed; + if (typeof parsed.hook_event_name === "string") { + eventName = parsed.hook_event_name; + } + if (typeof parsed.session_id === "string" && parsed.session_id.length > 0) { + queueId = parsed.session_id; + } + } catch { + // Still forward the raw text for debugging. + eventData = { raw: rawStdin }; + } + + const event: EnqueueEvent = { + queueId, + eventSource: CODEX_EVENT_SOURCE, + eventSourceVersion: env.CODEX_VERSION ?? null, + eventName, + eventData, + }; + + // On session start, prepend the config event so the processor can configure + // its per-session reporting before the root span is created. + if (eventName === "SessionStart") { + return [buildConfigEvent(queueId, env), event]; + } + return [event]; +} diff --git a/plugins/trace-codex/src/agents/codex/event-processor.test.ts b/plugins/trace-codex/src/agents/codex/event-processor.test.ts new file mode 100644 index 0000000..ab63e57 --- /dev/null +++ b/plugins/trace-codex/src/agents/codex/event-processor.test.ts @@ -0,0 +1,259 @@ +import { describe, expect, test } from "bun:test"; +import type { ReportingConfig, SpanFactory } from "../../braintrust/logger.ts"; +import type { EnqueueEvent } from "../../server/routes.ts"; +import { createTestLogger, withCapturedTrace } from "../../test-helpers.ts"; +import { CODEX_CONFIG_EVENT } from "./event-builder.ts"; +import { CodexEventProcessor } from "./event-processor.ts"; +import { + assertProducesTrace, + configEvent, + sessionStart, + stop, + userPromptSubmit, +} from "./test-helpers.ts"; + +describe("CodexEventProcessor", () => { + test("SessionStart produces a root span", async () => { + await assertProducesTrace( + [sessionStart({ model: "gpt-5.5", cwd: "/work", source: "startup" })], + { + span_attributes: { name: "codex: work", type: "task" }, + input: { model: "gpt-5.5", cwd: "/work", source: "startup" }, + metadata: { session_id: "session-1", model: "gpt-5.5" }, + children: [], + }, + ); + }); + + test("root span is named after the project directory (basename of cwd)", async () => { + await assertProducesTrace([sessionStart({ cwd: "/whatever/myapp" })], { + span_attributes: { name: "codex: myapp", type: "task" }, + children: [], + }); + }); + + test("root span name handles a trailing slash in cwd", async () => { + await assertProducesTrace([sessionStart({ cwd: "/whatever/myapp/" })], { + span_attributes: { name: "codex: myapp", type: "task" }, + children: [], + }); + }); + + test("root span falls back to 'codex session' when cwd is missing", async () => { + await assertProducesTrace([sessionStart({})], { + span_attributes: { name: "codex session", type: "task" }, + children: [], + }); + }); + + test("a session with no Stop stays active (no end time)", async () => { + await assertProducesTrace( + [sessionStart({ model: "gpt-5.5", source: "startup" }), userPromptSubmit({ prompt: "hi" })], + { + span_attributes: { name: "codex session", type: "task" }, + ended: false, + children: [], + }, + ); + }); + + test("Stop ends the root span", async () => { + await assertProducesTrace( + [ + sessionStart({ model: "gpt-5.5", source: "startup" }), + userPromptSubmit({ prompt: "hi" }), + stop({}), + ], + { + span_attributes: { name: "codex session", type: "task" }, + ended: true, + children: [], + }, + ); + }); + + test("ends on the first Stop; later events do not reopen it", async () => { + await assertProducesTrace( + [ + sessionStart({ model: "gpt-5.5", source: "startup" }), + userPromptSubmit({ prompt: "hi", turn_id: "t1" }), + stop({ turn_id: "t1", last_assistant_message: "hello" }), + userPromptSubmit({ prompt: "another thing", turn_id: "t2" }), + stop({ turn_id: "t2", last_assistant_message: "ok" }), + ], + { + span_attributes: { name: "codex session", type: "task" }, + ended: true, + children: [ + { span_attributes: { name: "turn: t1", type: "task" } }, + { span_attributes: { name: "turn: t2", type: "task" } }, + ], + }, + ); + }); + + test("a turn becomes a child span with prompt input and assistant output", async () => { + await assertProducesTrace( + [ + sessionStart({ model: "gpt-5.5", source: "startup" }), + userPromptSubmit({ prompt: "what's your name?", turn_id: "t1" }), + stop({ turn_id: "t1", last_assistant_message: "I'm Codex." }), + ], + { + span_attributes: { name: "codex session", type: "task" }, + ended: true, + children: [ + { + span_attributes: { name: "turn: t1", type: "task" }, + input: "what's your name?", + output: "I'm Codex.", + metadata: { turn_id: "t1" }, + ended: true, + }, + ], + }, + ); + }); + + test("an open turn with no matching Stop stays active", async () => { + await assertProducesTrace( + [ + sessionStart({ model: "gpt-5.5", source: "startup" }), + userPromptSubmit({ prompt: "hi", turn_id: "t1" }), + ], + { + span_attributes: { name: "codex session", type: "task" }, + ended: false, + children: [ + { + span_attributes: { name: "turn: t1", type: "task" }, + input: "hi", + ended: false, + }, + ], + }, + ); + }); + + test("a Stop with no matching turn does not create a turn span", async () => { + await assertProducesTrace( + [ + sessionStart({ model: "gpt-5.5", source: "startup" }), + stop({ turn_id: "nope", last_assistant_message: "orphan" }), + ], + { + span_attributes: { name: "codex session", type: "task" }, + ended: true, + children: [], + }, + ); + }); + + test("duplicate SessionStart still yields a single root span", async () => { + await assertProducesTrace( + [ + sessionStart({ model: "gpt-5.5", source: "startup" }), + sessionStart({ model: "gpt-5.5", source: "startup" }), + ], + { + span_attributes: { name: "codex session", type: "task" }, + children: [], + }, + ); + }); + + test("a config event builds the per-session factory with its config", () => { + // Spy provider records the config it was asked to build a factory for. + let seen: ReportingConfig | undefined; + const fake: SpanFactory = { + startSpan: () => + ({ id: "s", end: () => 0, log: () => {} }) as unknown as ReturnType< + SpanFactory["startSpan"] + >, + flush: async () => {}, + }; + const provider = (config?: ReportingConfig) => { + seen = config; + return fake; + }; + + const processor = new CodexEventProcessor("sess-1", createTestLogger(), provider); + const cfg: EnqueueEvent = { + queueId: "sess-1", + eventSource: "codex-hook", + eventSourceVersion: null, + eventName: CODEX_CONFIG_EVENT, + eventData: { + project: "team-project", + apiKey: "sk-1", + apiUrl: "https://api", + traceToBraintrust: true, + }, + }; + processor.process(cfg); + // Factory is built lazily on first span (the SessionStart). + processor.process(sessionStart({ session_id: "sess-1", model: "gpt-5.5" })); + + expect(seen).toEqual({ + project: "team-project", + apiKey: "sk-1", + apiUrl: "https://api", + appUrl: undefined, + traceToBraintrust: true, + additionalMetadata: undefined, + }); + }); + + test("root span metadata carries the configured project", async () => { + await assertProducesTrace( + [configEvent({ project: "team-project" }), sessionStart({ model: "gpt-5.5" })], + { + span_attributes: { name: "codex session", type: "task" }, + metadata: { project: "team-project" }, + children: [], + }, + ); + }); + + test("when tracing is disabled, no spans are produced", async () => { + const trace = withCapturedTrace(); + try { + const processor = new CodexEventProcessor("s", createTestLogger(), () => trace.spanFactory); + // Config event WITHOUT traceToBraintrust (defaults off). + processor.process({ + queueId: "s", + eventSource: "codex-hook", + eventSourceVersion: null, + eventName: CODEX_CONFIG_EVENT, + eventData: { project: "p" }, + }); + processor.process(sessionStart({ session_id: "s" })); + processor.process(userPromptSubmit({ session_id: "s", turn_id: "t1", prompt: "hi" })); + processor.process(stop({ session_id: "s", turn_id: "t1", last_assistant_message: "yo" })); + await processor.flush(); + + const spans = await trace.drain(); + expect(spans.length).toBe(0); + } finally { + trace.cleanup(); + } + }); + + test("additionalMetadata is merged into root metadata; standard keys win", async () => { + await assertProducesTrace( + [ + configEvent({ + project: "team-project", + additionalMetadata: { team: "platform", model: "SHOULD_BE_OVERRIDDEN" }, + }), + sessionStart({ model: "gpt-5.5" }), + ], + { + span_attributes: { name: "codex session", type: "task" }, + // team comes from additionalMetadata; model is the standard key (wins). + metadata: { team: "platform", model: "gpt-5.5", project: "team-project" }, + children: [], + }, + ); + }); +}); diff --git a/plugins/trace-codex/src/agents/codex/event-processor.ts b/plugins/trace-codex/src/agents/codex/event-processor.ts new file mode 100644 index 0000000..dd95c50 --- /dev/null +++ b/plugins/trace-codex/src/agents/codex/event-processor.ts @@ -0,0 +1,314 @@ +// Processor for Codex events. A leading config event (same queueId) configures +// how this session reports to Braintrust (project/credentials), so the processor +// builds its own per-session SpanFactory. On SessionStart it opens a Braintrust +// root span. Each turn is a child span: UserPromptSubmit opens it (input = the +// user prompt), and the matching Stop closes it (output = the final assistant +// message). The first Stop also ends the root span. Buffered spans are delivered +// via flush(). + +import { + defaultSpanFactoryProvider, + type ReportingConfig, + type Span, + type SpanFactory, + type SpanFactoryProvider, +} from "../../braintrust/logger.ts"; +import type { Logger } from "../../log.ts"; +import type { EventProcessor } from "../../processor/event-processor.ts"; +import type { EnqueueEvent } from "../../server/routes.ts"; +import { CODEX_CONFIG_EVENT } from "./event-builder.ts"; + +const SESSION_START = "SessionStart"; +const USER_PROMPT_SUBMIT = "UserPromptSubmit"; +const STOP = "Stop"; + +/** + * The final path segment of `cwd`, used to name the root span (e.g. + * "/whatever/myapp" -> "myapp"). Handles trailing separators and both POSIX and + * Windows separators. Returns undefined when cwd is missing or has no usable + * segment (e.g. "/"). + */ +function projectDirName(cwd: string | undefined): string | undefined { + if (!cwd) return undefined; + const segments = cwd.split(/[/\\]/).filter((s) => s.length > 0); + return segments.length > 0 ? segments[segments.length - 1] : undefined; +} + +export class CodexEventProcessor implements EventProcessor { + private readonly logger: Logger; + private readonly queueId: string | null; + private readonly spanFactoryProvider: SpanFactoryProvider; + /** Built lazily from the config event (or defaults) when first needed. */ + private spanFactoryInstance: SpanFactory | null = null; + private reportingConfig: ReportingConfig | undefined; + private rootSpan: Span | null = null; + private rootEnded = false; + // Turn spans currently open, keyed by turn_id. A turn is opened by + // UserPromptSubmit and closed (ended) by the matching Stop. + private readonly openTurns = new Map(); + + constructor( + queueId: string | null, + logger: Logger, + spanFactoryProvider: SpanFactoryProvider = defaultSpanFactoryProvider, + ) { + this.queueId = queueId; + this.logger = logger; + this.spanFactoryProvider = spanFactoryProvider; + } + + /** The session's SpanFactory, built from the config event on first use. */ + private get spanFactory(): SpanFactory { + if (this.spanFactoryInstance === null) { + this.spanFactoryInstance = this.spanFactoryProvider(this.reportingConfig, this.logger); + } + return this.spanFactoryInstance; + } + + process(event: EnqueueEvent): void { + if (event.eventName === CODEX_CONFIG_EVENT) { + this.configure(event); + return; + } + // Master switch: when tracing is disabled, drop everything (no SDK calls). + if (!this.tracingEnabled) { + this.logger.debug("codex processor: tracing disabled; dropping event", { + queueId: this.queueId, + eventName: event.eventName, + }); + return; + } + if (event.eventName === SESSION_START) { + this.startRootSpan(event); + return; + } + if (event.eventName === USER_PROMPT_SUBMIT) { + this.startTurnSpan(event); + return; + } + if (event.eventName === STOP) { + this.endTurnSpan(event); + this.endRootSpan(); + return; + } + // Other events will attach child spans to the root span in later phases. + this.logger.debug("codex processor: no-op", { + queueId: this.queueId, + eventName: event.eventName, + }); + } + + // Record the session's reporting config from the config event. The + // SpanFactory is built lazily (on first span) from this config. Arriving + // before the root span is created, this lets the session report to its own + // project/account. + private configure(event: EnqueueEvent): void { + if (this.spanFactoryInstance !== null) { + this.logger.warn("codex processor: config event after factory built; ignoring", { + queueId: this.queueId, + }); + return; + } + const data = (event.eventData ?? {}) as Record; + this.reportingConfig = { + project: typeof data.project === "string" ? data.project : undefined, + apiKey: typeof data.apiKey === "string" ? data.apiKey : undefined, + apiUrl: typeof data.apiUrl === "string" ? data.apiUrl : undefined, + appUrl: typeof data.appUrl === "string" ? data.appUrl : undefined, + traceToBraintrust: data.traceToBraintrust === true, + additionalMetadata: + typeof data.additionalMetadata === "object" && + data.additionalMetadata !== null && + !Array.isArray(data.additionalMetadata) + ? (data.additionalMetadata as Record) + : undefined, + }; + this.logger.info("codex processor: configured reporting", { + queueId: this.queueId, + project: this.reportingConfig.project, + apiUrl: this.reportingConfig.apiUrl, + hasApiKey: Boolean(this.reportingConfig.apiKey), + traceToBraintrust: this.reportingConfig.traceToBraintrust, + }); + } + + /** Whether this session reports to Braintrust at all (master switch). */ + private get tracingEnabled(): boolean { + return this.reportingConfig?.traceToBraintrust === true; + } + + async flush(): Promise { + if (this.rootSpan === null) return; + try { + await this.rootSpan.flush(); + this.logger.debug("codex processor: flush ok", { queueId: this.queueId }); + } catch (err) { + this.logger.error("codex processor: flush failed", { + queueId: this.queueId, + error: String(err), + }); + } + } + + // Open a child span for a turn on UserPromptSubmit. Keyed by turn_id so the + // matching Stop can close it. The user prompt is the span's input. + private startTurnSpan(event: EnqueueEvent): void { + if (this.rootSpan === null) { + this.logger.warn("codex processor: turn without a root span; ignoring", { + queueId: this.queueId, + }); + return; + } + + const data = (event.eventData ?? {}) as Record; + const turnId = typeof data.turn_id === "string" ? data.turn_id : undefined; + const prompt = typeof data.prompt === "string" ? data.prompt : undefined; + const model = typeof data.model === "string" ? data.model : undefined; + + if (turnId === undefined) { + this.logger.warn("codex processor: UserPromptSubmit without turn_id; skipping turn span", { + queueId: this.queueId, + }); + return; + } + + if (this.openTurns.has(turnId)) { + this.logger.warn("codex processor: duplicate turn_id; keeping existing turn span", { + queueId: this.queueId, + turnId, + }); + return; + } + + try { + const turnSpan = this.rootSpan.startSpan({ + name: `turn: ${turnId}`, + type: "task", + event: { + input: prompt, + metadata: { turn_id: turnId, model }, + }, + }); + this.openTurns.set(turnId, turnSpan); + this.logger.info("codex processor: opened turn span", { + queueId: this.queueId, + turnId, + spanId: turnSpan.id, + }); + } catch (err) { + this.logger.error("codex processor: failed to open turn span", { + queueId: this.queueId, + turnId, + error: String(err), + }); + } + } + + // Close the turn span matching the Stop's turn_id, recording the final + // assistant message as the span's output. + private endTurnSpan(event: EnqueueEvent): void { + const data = (event.eventData ?? {}) as Record; + const turnId = typeof data.turn_id === "string" ? data.turn_id : undefined; + const output = + typeof data.last_assistant_message === "string" ? data.last_assistant_message : undefined; + + if (turnId === undefined) { + this.logger.debug("codex processor: Stop without turn_id; no turn span to close", { + queueId: this.queueId, + }); + return; + } + + const turnSpan = this.openTurns.get(turnId); + if (turnSpan === undefined) { + this.logger.debug("codex processor: Stop with no open turn span", { + queueId: this.queueId, + turnId, + }); + return; + } + + try { + turnSpan.log({ output }); + turnSpan.end(); + this.logger.info("codex processor: ended turn span", { queueId: this.queueId, turnId }); + } catch (err) { + this.logger.error("codex processor: failed to end turn span", { + queueId: this.queueId, + turnId, + error: String(err), + }); + } finally { + this.openTurns.delete(turnId); + } + } + + // End the root span on the first Stop event. Subsequent Stops are ignored + // (the SDK keeps the first end time anyway). + private endRootSpan(): void { + if (this.rootSpan === null || this.rootEnded) return; + this.rootEnded = true; + try { + this.rootSpan.end(); + this.logger.info("codex processor: ended root span", { queueId: this.queueId }); + } catch (err) { + this.logger.error("codex processor: failed to end root span", { + queueId: this.queueId, + error: String(err), + }); + } + } + + private startRootSpan(event: EnqueueEvent): void { + if (this.rootSpan !== null) { + this.logger.warn("codex processor: duplicate SessionStart; keeping existing root span", { + queueId: this.queueId, + }); + return; + } + + const data = (event.eventData ?? {}) as Record; + const model = typeof data.model === "string" ? data.model : undefined; + const cwd = typeof data.cwd === "string" ? data.cwd : undefined; + const source = typeof data.source === "string" ? data.source : undefined; + const permissionMode = + typeof data.permission_mode === "string" ? data.permission_mode : undefined; + + // Name the root span after the directory Codex was launched from, e.g. + // "codex: myapp" for /whatever/myapp. Falls back to "codex session" when cwd + // is unknown. + const projectDir = projectDirName(cwd); + const spanName = projectDir ? `codex: ${projectDir}` : "codex session"; + + try { + this.rootSpan = this.spanFactory.startSpan({ + name: spanName, + type: "task", + event: { + input: { model, cwd, source }, + metadata: { + // User-provided extras first, so the standard keys below win on + // conflict. + ...this.reportingConfig?.additionalMetadata, + session_id: this.queueId, + model, + cwd, + source, + permission_mode: permissionMode, + event_source_version: event.eventSourceVersion, + project: this.reportingConfig?.project, + }, + }, + }); + this.logger.info("codex processor: opened root span", { + queueId: this.queueId, + spanId: this.rootSpan.id, + }); + } catch (err) { + this.logger.error("codex processor: failed to open root span", { + queueId: this.queueId, + error: String(err), + }); + } + } +} diff --git a/plugins/trace-codex/src/agents/codex/register.ts b/plugins/trace-codex/src/agents/codex/register.ts new file mode 100644 index 0000000..a905601 --- /dev/null +++ b/plugins/trace-codex/src/agents/codex/register.ts @@ -0,0 +1,41 @@ +// Public surface of the Codex agent module. index.ts imports `codexAgent` and +// wires it into the generic core (processor registry, hook client). Adding +// another agent means adding a sibling module that exports the same shape. + +import type { EventBuilder } from "../../client/client.ts"; +import type { EventProcessorFactory } from "../../processor/event-processor.ts"; +import { buildEnqueueEvents, CODEX_EVENT_SOURCE } from "./event-builder.ts"; +import { CodexEventProcessor } from "./event-processor.ts"; +import { applySettingsToEnv, loadSettingsFile, settingsFilePath } from "./settings.ts"; + +export interface Agent { + /** Event source string this agent's events carry (matches the registry key). */ + eventSource: string; + /** Creates a per-session processor for this agent's events. */ + createProcessor: EventProcessorFactory; + /** Translates the agent's raw stdin payload into one or more EnqueueEvents. */ + buildEvents: EventBuilder; + /** + * Event names that terminate a turn/session. After enqueuing one of these, + * the hook asks the server to flush synchronously so the final spans are + * delivered before the process tree is torn down (e.g. in CI). Codex only + * exposes a per-turn "Stop" hook today; there is no session-end event. + */ + terminalEvents: string[]; + /** + * Read this agent's user settings and apply them to the environment + * (environment wins). Returns the names of applied settings, for diagnostics + * (never values). Run before booting the server so the client and server + * agree on configuration. + */ + loadSettings: () => string[]; +} + +export const codexAgent: Agent = { + eventSource: CODEX_EVENT_SOURCE, + createProcessor: (queueId, logger, spanFactoryProvider) => + new CodexEventProcessor(queueId, logger, spanFactoryProvider), + buildEvents: buildEnqueueEvents, + terminalEvents: ["Stop"], + loadSettings: () => applySettingsToEnv(loadSettingsFile(settingsFilePath())), +}; diff --git a/plugins/trace-codex/src/agents/codex/settings.test.ts b/plugins/trace-codex/src/agents/codex/settings.test.ts new file mode 100644 index 0000000..1ace67b --- /dev/null +++ b/plugins/trace-codex/src/agents/codex/settings.test.ts @@ -0,0 +1,146 @@ +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; +import { mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { + applySettingsToEnv, + loadSettingsFile, + type Settings, + settingsFilePath, +} from "./settings.ts"; + +describe("loadSettingsFile", () => { + let dir: string; + + beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), "settings-test-")); + }); + afterEach(() => { + rmSync(dir, { recursive: true, force: true }); + }); + + function write(content: string): string { + const path = join(dir, "config.json"); + writeFileSync(path, content); + return path; + } + + test("parses recognized keys", () => { + const path = write( + JSON.stringify({ + apiKey: "sk-123", + project: "proj", + port: 40000, + idleTimeoutMs: 1000, + }), + ); + expect(loadSettingsFile(path)).toEqual({ + apiKey: "sk-123", + project: "proj", + port: 40000, + idleTimeoutMs: 1000, + }); + }); + + test("ignores unknown keys and a _comment", () => { + const path = write(JSON.stringify({ _comment: "hi", nope: "x", project: "p" })); + expect(loadSettingsFile(path)).toEqual({ project: "p" }); + }); + + test("ignores empty strings and wrong types", () => { + const path = write(JSON.stringify({ apiKey: "", project: 5, port: "nope" })); + expect(loadSettingsFile(path)).toEqual({}); + }); + + test("parses traceToBraintrust boolean and additionalMetadata object", () => { + const path = write( + JSON.stringify({ + traceToBraintrust: true, + additionalMetadata: { team: "platform", n: 1 }, + }), + ); + expect(loadSettingsFile(path)).toEqual({ + traceToBraintrust: true, + additionalMetadata: { team: "platform", n: 1 }, + }); + }); + + test("ignores non-boolean traceToBraintrust and non-object/array metadata", () => { + const path = write(JSON.stringify({ traceToBraintrust: "yes", additionalMetadata: [1, 2] })); + expect(loadSettingsFile(path)).toEqual({}); + }); + + test("missing file returns {}", () => { + expect(loadSettingsFile(join(dir, "absent.json"))).toEqual({}); + }); + + test("malformed JSON returns {} (never throws)", () => { + const path = write("{ not json"); + expect(loadSettingsFile(path)).toEqual({}); + }); + + test("non-object JSON returns {}", () => { + const path = write("[1,2,3]"); + expect(loadSettingsFile(path)).toEqual({}); + }); +}); + +describe("applySettingsToEnv", () => { + test("applies settings to their env vars when unset", () => { + const env: NodeJS.ProcessEnv = {}; + const settings: Settings = { apiKey: "sk-123", project: "proj", port: 40000 }; + const applied = applySettingsToEnv(settings, env); + + expect(env.BRAINTRUST_API_KEY).toBe("sk-123"); + expect(env.BRAINTRUST_PROJECT).toBe("proj"); + expect(env.BRAINTRUST_EVENT_SERVER_PORT).toBe("40000"); + expect(applied.sort()).toEqual(["apiKey", "port", "project"]); + }); + + test("environment wins: does not overwrite an existing env var", () => { + const env: NodeJS.ProcessEnv = { BRAINTRUST_PROJECT: "from-env" }; + const applied = applySettingsToEnv({ project: "from-file", apiKey: "sk" }, env); + + expect(env.BRAINTRUST_PROJECT).toBe("from-env"); + expect(env.BRAINTRUST_API_KEY).toBe("sk"); + expect(applied).toEqual(["apiKey"]); + }); + + test("returns key names only (no values, so secrets are not exposed)", () => { + const env: NodeJS.ProcessEnv = {}; + const applied = applySettingsToEnv({ apiKey: "super-secret" }, env); + expect(applied).toEqual(["apiKey"]); + expect(JSON.stringify(applied)).not.toContain("super-secret"); + }); + + test("empty settings apply nothing", () => { + const env: NodeJS.ProcessEnv = { EXISTING: "1" }; + expect(applySettingsToEnv({}, env)).toEqual([]); + expect(env).toEqual({ EXISTING: "1" }); + }); + + test("serializes booleans and objects to env strings", () => { + const env: NodeJS.ProcessEnv = {}; + applySettingsToEnv({ traceToBraintrust: true, additionalMetadata: { team: "platform" } }, env); + expect(env.TRACE_TO_BRAINTRUST).toBe("true"); + expect(env.BRAINTRUST_ADDITIONAL_METADATA).toBe('{"team":"platform"}'); + }); +}); + +describe("settingsFilePath", () => { + test("resolves config.json under PLUGIN_DATA", () => { + expect(settingsFilePath({ PLUGIN_DATA: "/data" })).toBe("/data/config.json"); + }); + + test("prefers the explicit log dir override", () => { + expect(settingsFilePath({ BRAINTRUST_EVENT_SERVER_LOG_DIR: "/a", PLUGIN_DATA: "/b" })).toBe( + "/a/config.json", + ); + }); + + test("falls back to a temp dir when neither is set", () => { + expect(settingsFilePath({ TMPDIR: "/tmp/x/" })).toBe( + "/tmp/x/braintrust-event-server/config.json", + ); + }); +}); diff --git a/plugins/trace-codex/src/agents/codex/settings.ts b/plugins/trace-codex/src/agents/codex/settings.ts new file mode 100644 index 0000000..f1cc7f0 --- /dev/null +++ b/plugins/trace-codex/src/agents/codex/settings.ts @@ -0,0 +1,142 @@ +// User settings layer. +// +// Codex has no native mechanism to pass custom settings into plugin hooks (it +// only provides PLUGIN_ROOT / PLUGIN_DATA). So we read our own config.json from +// the plugin's writable data directory (PLUGIN_DATA) and map its friendly keys +// onto the BRAINTRUST_* / BRAINTRUST_EVENT_SERVER_* environment variables that +// the rest of the code (and the Braintrust SDK) already understand. +// +// Precedence: environment variables always win over the file, so power users +// and CI can override the file without editing it. The file is optional; +// missing or malformed files are ignored (never throw). + +import { readFileSync } from "node:fs"; +import { join } from "node:path"; + +/** Friendly, camelCase settings a user can put in config.json. All optional. */ +export interface Settings { + /** Braintrust API key. */ + apiKey?: string; + /** Braintrust API URL (override for self-hosted/staging). */ + apiUrl?: string; + /** Braintrust app URL. */ + appUrl?: string; + /** Project to log traces into. */ + project?: string; + /** Master switch: when false or unset, no traces are reported to Braintrust. */ + traceToBraintrust?: boolean; + /** Extra metadata merged into the root span (standard keys win on conflict). */ + additionalMetadata?: Record; + /** If set, record every event to this NDJSON file (for replay). */ + recordFile?: string; + /** Local event server port. */ + port?: number; + /** Idle timeout (ms) before the background server shuts down. */ + idleTimeoutMs?: number; + /** How often (ms) the idle watchdog checks for inactivity. */ + idleCheckIntervalMs?: number; +} + +/** Maps each setting to the environment variable it populates. */ +export const SETTINGS_TO_ENV: Record = { + apiKey: "BRAINTRUST_API_KEY", + apiUrl: "BRAINTRUST_API_URL", + appUrl: "BRAINTRUST_APP_URL", + project: "BRAINTRUST_PROJECT", + traceToBraintrust: "TRACE_TO_BRAINTRUST", + additionalMetadata: "BRAINTRUST_ADDITIONAL_METADATA", + recordFile: "BRAINTRUST_EVENT_SERVER_RECORD_FILE", + port: "BRAINTRUST_EVENT_SERVER_PORT", + idleTimeoutMs: "BRAINTRUST_EVENT_SERVER_IDLE_TIMEOUT_MS", + idleCheckIntervalMs: "BRAINTRUST_EVENT_SERVER_IDLE_CHECK_INTERVAL_MS", +}; + +const SETTING_KEYS = Object.keys(SETTINGS_TO_ENV) as Array; + +const NUMBER_KEYS = new Set(["port", "idleTimeoutMs", "idleCheckIntervalMs"]); + +/** + * The plugin's writable data directory, where config.json lives. Resolved + * independently of the server Config layer so settings detection stays its own + * layer. Precedence matches the server's data dir: explicit log dir override, + * then Codex's PLUGIN_DATA, then a temp fallback. + */ +function dataDir(env: NodeJS.ProcessEnv): string { + return ( + env.BRAINTRUST_EVENT_SERVER_LOG_DIR || + env.PLUGIN_DATA || + `${env.TMPDIR || "/tmp"}/braintrust-event-server` + ); +} + +/** Absolute path to the user's config.json (in the plugin data dir). */ +export function settingsFilePath(env: NodeJS.ProcessEnv = process.env): string { + return join(dataDir(env), "config.json"); +} + +/** + * Read and parse config.json. Returns the recognized settings, or {} if the + * file is missing, unreadable, or malformed. Never throws. + */ +export function loadSettingsFile(path: string): Settings { + let raw: string; + try { + raw = readFileSync(path, "utf8"); + } catch { + return {}; // missing/unreadable is fine — the file is optional + } + + let parsed: unknown; + try { + parsed = JSON.parse(raw); + } catch { + return {}; // malformed JSON: ignore rather than break the hook + } + + if (typeof parsed !== "object" || parsed === null) return {}; + const obj = parsed as Record; + + const settings: Settings = {}; + for (const key of SETTING_KEYS) { + const value = obj[key]; + if (value === undefined || value === null) continue; + if (NUMBER_KEYS.has(key)) { + if (typeof value === "number" && Number.isFinite(value)) { + (settings as Record)[key] = value; + } + } else if (key === "traceToBraintrust") { + if (typeof value === "boolean") settings.traceToBraintrust = value; + } else if (key === "additionalMetadata") { + if (typeof value === "object" && !Array.isArray(value)) { + settings.additionalMetadata = value as Record; + } + } else if (typeof value === "string" && value.length > 0) { + (settings as Record)[key] = value; + } + } + return settings; +} + +/** + * Apply settings to the environment: for each setting, set its env var only if + * that var is not already set (environment wins). Mutates `env`. Returns the + * list of setting keys that were applied, for diagnostics (never includes + * values, so secrets are not logged). + */ +export function applySettingsToEnv( + settings: Settings, + env: NodeJS.ProcessEnv = process.env, +): Array { + const applied: Array = []; + for (const key of SETTING_KEYS) { + const value = settings[key]; + if (value === undefined) continue; + const envVar = SETTINGS_TO_ENV[key]; + if (env[envVar]) continue; // environment wins + // Objects (additionalMetadata) are serialized as JSON; everything else + // stringifies directly (booleans -> "true"/"false", numbers, strings). + env[envVar] = typeof value === "object" ? JSON.stringify(value) : String(value); + applied.push(key); + } + return applied; +} diff --git a/plugins/trace-codex/src/agents/codex/test-helpers.ts b/plugins/trace-codex/src/agents/codex/test-helpers.ts new file mode 100644 index 0000000..eaa0e57 --- /dev/null +++ b/plugins/trace-codex/src/agents/codex/test-helpers.ts @@ -0,0 +1,86 @@ +// Codex-specific test helpers: event builders that mimic Codex hook payloads, +// and assertProducesTrace, which runs events through the CodexEventProcessor and +// checks the resulting Braintrust trace. Generic trace/span helpers are imported +// from the shared test-helpers. + +import type { EnqueueEvent } from "../../server/routes.ts"; +import { + createTestLogger, + diffSpan, + type ExpectedSpan, + spansToTree, + withCapturedTrace, +} from "../../test-helpers.ts"; +import { CODEX_CONFIG_EVENT } from "./event-builder.ts"; +import { CodexEventProcessor } from "./event-processor.ts"; + +// ============================================================================ +// Event builders +// ============================================================================ + +function codexEvent(eventName: string, eventData: Record): EnqueueEvent { + const queueId = typeof eventData.session_id === "string" ? eventData.session_id : "session-1"; + return { + queueId, + eventSource: "codex-hook", + eventSourceVersion: null, + eventName, + eventData: { hook_event_name: eventName, ...eventData }, + }; +} + +export function sessionStart(data: Record = {}): EnqueueEvent { + return codexEvent("SessionStart", { session_id: "session-1", ...data }); +} + +export function userPromptSubmit(data: Record = {}): EnqueueEvent { + return codexEvent("UserPromptSubmit", { session_id: "session-1", ...data }); +} + +export function stop(data: Record = {}): EnqueueEvent { + return codexEvent("Stop", { session_id: "session-1", ...data }); +} + +/** A config event enabling tracing; pass extra reporting config via `data`. */ +export function configEvent(data: Record = {}): EnqueueEvent { + return { + queueId: typeof data.session_id === "string" ? data.session_id : "session-1", + eventSource: "codex-hook", + eventSourceVersion: null, + eventName: CODEX_CONFIG_EVENT, + eventData: { traceToBraintrust: true, ...data }, + }; +} + +// ============================================================================ +// assertProducesTrace: run events through a CodexEventProcessor and assert the +// resulting trace. One session => one trace. +// ============================================================================ + +export async function assertProducesTrace( + events: EnqueueEvent[], + expected: ExpectedSpan, + opts: { queueId?: string } = {}, +): Promise { + const queueId = opts.queueId ?? events[0]?.queueId ?? "session-1"; + // Tracing is off by default; prepend a tracing-enabled config event unless the + // caller already provided one, so trace assertions exercise the span path. + const hasConfig = events.some((e) => e.eventName === CODEX_CONFIG_EVENT); + const toProcess = hasConfig ? events : [configEvent({ session_id: queueId }), ...events]; + const trace = withCapturedTrace(); + try { + const processor = new CodexEventProcessor(queueId, createTestLogger(), () => trace.spanFactory); + for (const event of toProcess) { + await processor.process(event); + } + await processor.flush(); + + const tree = spansToTree(await trace.drain()); + const diffs = diffSpan(tree, expected, "root"); + if (diffs.length > 0) { + throw new Error(`trace does not match expected:\n${diffs.join("\n")}`); + } + } finally { + trace.cleanup(); + } +} diff --git a/plugins/trace-codex/src/braintrust/logger.test.ts b/plugins/trace-codex/src/braintrust/logger.test.ts new file mode 100644 index 0000000..c7ae98a --- /dev/null +++ b/plugins/trace-codex/src/braintrust/logger.test.ts @@ -0,0 +1,29 @@ +import { describe, expect, test } from "bun:test"; +import { resolveProjectName } from "./logger.ts"; + +describe("resolveProjectName", () => { + test("prefers explicit config project over env", () => { + expect(resolveProjectName({ project: "from-config" }, { BRAINTRUST_PROJECT: "from-env" })).toBe( + "from-config", + ); + }); + + test("prefers BRAINTRUST_PROJECT when no config project", () => { + expect( + resolveProjectName(undefined, { + BRAINTRUST_PROJECT: "explicit", + BRAINTRUST_DEFAULT_PROJECT: "default", + }), + ).toBe("explicit"); + }); + + test("falls back to BRAINTRUST_DEFAULT_PROJECT", () => { + expect(resolveProjectName(undefined, { BRAINTRUST_DEFAULT_PROJECT: "default" })).toBe( + "default", + ); + }); + + test("defaults to codex when nothing is set", () => { + expect(resolveProjectName(undefined, {})).toBe("codex"); + }); +}); diff --git a/plugins/trace-codex/src/braintrust/logger.ts b/plugins/trace-codex/src/braintrust/logger.ts new file mode 100644 index 0000000..a31710f --- /dev/null +++ b/plugins/trace-codex/src/braintrust/logger.ts @@ -0,0 +1,96 @@ +// Thin wrapper around the Braintrust SDK. +// +// The SDK auto-configures from standard env vars (BRAINTRUST_API_KEY, +// BRAINTRUST_API_URL, BRAINTRUST_APP_URL), so no custom config is needed. When +// credentials are absent the SDK still creates spans locally and simply fails +// to flush — it does not throw — so callers can use it unconditionally. +// +// We expose a narrow SpanFactory interface (rather than the full SDK surface) +// so processors are easy to unit test with a fake. + +import { initLogger, type Span, type StartSpanArgs } from "braintrust"; +import type { Logger } from "../log.ts"; + +export type { Span, StartSpanArgs }; + +/** Creates root spans and can flush buffered events. */ +export interface SpanFactory { + startSpan(args: StartSpanArgs): Span; + flush(): Promise; +} + +/** + * How to report a session's traces to Braintrust. Resolved per session (e.g. + * from a config event) so different sessions can log to different projects / + * accounts. All fields optional; missing fields fall back to env / SDK defaults. + */ +export interface ReportingConfig { + project?: string; + apiKey?: string; + apiUrl?: string; + appUrl?: string; + /** Master switch: when false, the session reports nothing to Braintrust. */ + traceToBraintrust?: boolean; + /** Extra metadata merged into the root span (standard keys win on conflict). */ + additionalMetadata?: Record; +} + +/** + * Builds a SpanFactory for a given reporting config. Injected into processors so + * each session can create its own logger, and so tests can stay offline by + * supplying a provider that returns a fake/captured factory. + */ +export type SpanFactoryProvider = (config?: ReportingConfig, diagLogger?: Logger) => SpanFactory; + +/** + * Project to log into. Precedence: + * explicit config -> BRAINTRUST_PROJECT -> BRAINTRUST_DEFAULT_PROJECT -> "codex" + */ +export function resolveProjectName( + config?: ReportingConfig, + env: NodeJS.ProcessEnv = process.env, +): string { + return config?.project || env.BRAINTRUST_PROJECT || env.BRAINTRUST_DEFAULT_PROJECT || "codex"; +} + +/** + * Creates a fresh SpanFactory backed by a new Braintrust SDK logger + * (asyncFlush: true, so writes are batched and flushed in the background). + * + * When `config` is provided, its project/credentials are passed explicitly to + * the SDK so this logger reports independently of process env (enabling + * per-session routing). When omitted, the SDK auto-configures from env. Either + * way the SDK no-ops offline (missing creds) rather than throwing. + * + * Not memoized — each call creates an isolated logger. + */ +export function createSpanFactory(config?: ReportingConfig, diagLogger?: Logger): SpanFactory { + const projectName = resolveProjectName(config); + const logger = initLogger({ + projectName, + asyncFlush: true, + ...(config?.apiKey ? { apiKey: config.apiKey } : {}), + ...(config?.appUrl ? { appUrl: config.appUrl } : {}), + }); + diagLogger?.info("braintrust logger initialized", { + projectName, + apiUrl: config?.apiUrl ?? process.env.BRAINTRUST_API_URL ?? "https://api.braintrust.dev", + hasApiKey: Boolean(config?.apiKey ?? process.env.BRAINTRUST_API_KEY), + }); + return { + startSpan: (args) => logger.startSpan(args), + flush: async () => { + try { + await logger.flush(); + diagLogger?.debug("braintrust flush ok"); + } catch (err) { + diagLogger?.error("braintrust flush failed", { error: String(err) }); + throw err; + } + }, + }; +} + +/** The default production provider: a fresh per-session logger from config. */ +export const defaultSpanFactoryProvider: SpanFactoryProvider = (config, diagLogger) => + createSpanFactory(config, diagLogger); diff --git a/plugins/trace-codex/src/client/client.ts b/plugins/trace-codex/src/client/client.ts new file mode 100644 index 0000000..f4b1735 --- /dev/null +++ b/plugins/trace-codex/src/client/client.ts @@ -0,0 +1,90 @@ +// Hook client run loop ("hook" mode, the default): read the agent's event from +// stdin, ensure the server is up, POST it to /enqueue, and exit. +// +// This is agent-agnostic: the caller supplies a `buildEvent` function that knows +// how to translate the agent's raw stdin payload into a generic EnqueueEvent. +// +// Hard rule: this must NEVER throw out to the process in a way that fails the +// agent's turn. All errors are logged and swallowed; we always exit 0. + +import type { Config } from "../config.ts"; +import type { Logger } from "../log.ts"; +import { postEnqueue, postFlush } from "../server/enqueue-client.ts"; +import type { EnqueueEvent } from "../server/routes.ts"; +import { checkHealth, ensureServer, sleep } from "./ensure-server.ts"; +import { spawnServer } from "./spawn-server.ts"; + +/** + * Translates an agent's raw stdin payload into one or more generic + * EnqueueEvents. Most events map to a single event; some (e.g. a session start) + * also emit a leading config event. The events are POSTed in array order, so + * ordering-sensitive events (config before session start) come first. + */ +export type EventBuilder = (rawStdin: string, env?: NodeJS.ProcessEnv) => EnqueueEvent[]; + +/** Read all of stdin as a string. */ +async function readStdin(): Promise { + try { + return await Bun.stdin.text(); + } catch { + return ""; + } +} + +export interface HookClientOptions { + /** + * Event names that terminate a turn/session. After enqueuing one of these, + * the client asks the server to flush synchronously (POST /flush) so the + * final spans are delivered before this process — and any background server — + * is torn down. Defaults to none (fire-and-forget). + */ + terminalEvents?: readonly string[]; +} + +export async function runHookClient( + config: Config, + logger: Logger, + buildEvents: EventBuilder, + options: HookClientOptions = {}, +): Promise { + const rawStdin = await readStdin(); + const events = buildEvents(rawStdin); + if (events.length === 0) return; + + const terminalEvents = new Set(options.terminalEvents ?? []); + + const healthy = await ensureServer({ + config, + logger, + checkHealth, + spawn: spawnServer, + sleep, + }); + + if (!healthy) { + logger.error("could not reach or start event server; dropping events", { + count: events.length, + }); + return; + } + + // POST in order so a leading config event is enqueued before the event it + // configures (the FIFO consumer then sees config first). + let sawTerminal = false; + for (const event of events) { + const ok = await postEnqueue(config, event, logger); + if (ok) { + logger.debug("event enqueued", { eventName: event.eventName }); + if (terminalEvents.has(event.eventName)) sawTerminal = true; + } + } + + // On a terminal event, block until the server confirms the queue has drained + // and buffered spans are flushed. Without this, a CI job (or any short-lived + // host) can end right after the agent's last turn — before the background + // server's idle timeout fires — and lose the final spans. + if (sawTerminal) { + const flushed = await postFlush(config, logger); + logger.debug("flush requested on terminal event", { flushed }); + } +} diff --git a/plugins/trace-codex/src/client/ensure-server.test.ts b/plugins/trace-codex/src/client/ensure-server.test.ts new file mode 100644 index 0000000..a1e7d10 --- /dev/null +++ b/plugins/trace-codex/src/client/ensure-server.test.ts @@ -0,0 +1,121 @@ +import { describe, expect, mock, test } from "bun:test"; +import type { Config } from "../config.ts"; +import { createTestLogger } from "../test-helpers.ts"; +import { type EnsureServerDeps, ensureServer, type HealthStatus } from "./ensure-server.ts"; + +const config: Config = { + host: "127.0.0.1", + port: 52799, + idleTimeoutMs: 1000, + idleCheckIntervalMs: 100, + dataDir: "/tmp/test", +}; + +const fastTimings = { + bootHealthAttempts: 5, + bootHealthIntervalMs: 0, + shutdownWaitAttempts: 5, + shutdownWaitIntervalMs: 0, +}; + +/** Build deps where checkHealth returns a scripted sequence of statuses. */ +function makeDeps(statuses: HealthStatus[]): { + deps: EnsureServerDeps; + spawn: ReturnType; +} { + let i = 0; + const checkHealth = mock(async (): Promise => { + const s = statuses[Math.min(i, statuses.length - 1)]; + i++; + return s; + }); + const spawn = mock(() => {}); + return { + spawn, + deps: { + config, + logger: createTestLogger(), + checkHealth, + spawn, + sleep: async () => {}, + timings: fastTimings, + }, + }; +} + +describe("ensureServer", () => { + test("reuses an already-healthy server without spawning", async () => { + const { deps, spawn } = makeDeps(["healthy"]); + const ok = await ensureServer(deps); + expect(ok).toBe(true); + expect(spawn).not.toHaveBeenCalled(); + }); + + test("boots a server when none is reachable", async () => { + // unreachable first, then healthy after boot + const { deps, spawn } = makeDeps(["unreachable", "healthy"]); + const ok = await ensureServer(deps); + expect(ok).toBe(true); + expect(spawn).toHaveBeenCalledTimes(1); + }); + + test("waits for a shutting-down server, then boots", async () => { + // shutting_down -> unreachable (it stopped) -> healthy (ours booted) + const { deps, spawn } = makeDeps(["shutting_down", "unreachable", "healthy"]); + const ok = await ensureServer(deps); + expect(ok).toBe(true); + expect(spawn).toHaveBeenCalledTimes(1); + }); + + test("reuses the winner if a healthy server appears during shutdown wait", async () => { + // shutting_down -> healthy (someone else booted): we end up healthy. + const { deps } = makeDeps(["shutting_down", "healthy"]); + const ok = await ensureServer(deps); + expect(ok).toBe(true); + }); + + test("returns false when the server never becomes healthy", async () => { + const { deps } = makeDeps(["unreachable"]); + const ok = await ensureServer(deps); + expect(ok).toBe(false); + }); + + test("bails immediately on a version mismatch without spawning", async () => { + const { deps, spawn } = makeDeps(["version_mismatch"]); + const ok = await ensureServer(deps); + expect(ok).toBe(false); + expect(spawn).not.toHaveBeenCalled(); + }); + + test("bails if a mismatched server wins the port race after boot", async () => { + // unreachable -> (boot) -> version_mismatch: bail rather than poll forever. + const { deps, spawn } = makeDeps(["unreachable", "version_mismatch"]); + const ok = await ensureServer(deps); + expect(ok).toBe(false); + expect(spawn).toHaveBeenCalledTimes(1); + }); +}); + +describe("checkHealth version gating", () => { + test("treats a matching version as healthy and a mismatch as version_mismatch", async () => { + const { PLUGIN_VERSION } = await import("../version.ts"); + const { checkHealth } = await import("./ensure-server.ts"); + + const originalFetch = globalThis.fetch; + const fakeFetch = (version: string) => + (async () => + new Response(JSON.stringify({ version }), { + status: 200, + headers: { "content-type": "application/json" }, + })) as unknown as typeof fetch; + try { + globalThis.fetch = fakeFetch(PLUGIN_VERSION); + expect(await checkHealth(config)).toBe("healthy"); + + globalThis.fetch = fakeFetch("9.9.9-different"); + expect(await checkHealth(config)).toBe("version_mismatch"); + } finally { + globalThis.fetch = originalFetch; + } + }); +}); diff --git a/plugins/trace-codex/src/client/ensure-server.ts b/plugins/trace-codex/src/client/ensure-server.ts new file mode 100644 index 0000000..56385c8 --- /dev/null +++ b/plugins/trace-codex/src/client/ensure-server.ts @@ -0,0 +1,141 @@ +// Ensure a healthy event server is running, booting one if needed. +// +// State machine: +// - /health refused -> boot our own, wait until healthy +// - /health 200 (healthy) -> reuse it +// - /health 503 (shutting) -> wait until it stops, then boot our own +// - /health version mismatch -> bail (a different server version owns the +// port; do not enqueue) +// - boot loses a port race -> re-check /health; reuse the winner +// - still unhealthy at end -> return false (caller logs + exits 0) + +import type { Config } from "../config.ts"; +import { serverBaseUrl } from "../config.ts"; +import type { Logger } from "../log.ts"; +import { PLUGIN_VERSION } from "../version.ts"; + +export type HealthStatus = "healthy" | "shutting_down" | "version_mismatch" | "unreachable"; + +export interface EnsureServerDeps { + config: Config; + logger: Logger; + /** Probe /health. */ + checkHealth: (config: Config) => Promise; + /** Boot a detached server. */ + spawn: (config: Config, logger: Logger) => void; + /** Sleep helper (injectable for tests). */ + sleep: (ms: number) => Promise; + /** Tunables (injectable for tests). */ + timings?: Partial; +} + +export interface EnsureTimings { + /** Max attempts to reach health after booting. */ + bootHealthAttempts: number; + /** Delay between post-boot health probes. */ + bootHealthIntervalMs: number; + /** Max attempts to wait for a shutting-down server to disappear. */ + shutdownWaitAttempts: number; + /** Delay between shutdown-wait probes. */ + shutdownWaitIntervalMs: number; +} + +const DEFAULT_TIMINGS: EnsureTimings = { + bootHealthAttempts: 50, + bootHealthIntervalMs: 100, + shutdownWaitAttempts: 100, + shutdownWaitIntervalMs: 100, +}; + +/** Default health probe using fetch with a short timeout. */ +export async function checkHealth(config: Config): Promise { + try { + const res = await fetch(`${serverBaseUrl(config)}/health`, { + method: "GET", + signal: AbortSignal.timeout(1000), + }); + if (res.status === 503) return "shutting_down"; + if (!res.ok) return "unreachable"; + + // A healthy server must match our version. If a different version owns the + // port, treat it as a mismatch so the caller bails. + try { + const body = (await res.json()) as { version?: unknown }; + if (body.version !== PLUGIN_VERSION) return "version_mismatch"; + } catch { + return "version_mismatch"; + } + return "healthy"; + } catch { + return "unreachable"; + } +} + +export function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +/** Returns true if a healthy server is available to receive events. */ +export async function ensureServer(deps: EnsureServerDeps): Promise { + const { config, logger, checkHealth, spawn } = deps; + const timings = { ...DEFAULT_TIMINGS, ...deps.timings }; + + const status = await checkHealth(config); + + if (status === "healthy") { + return true; + } + + if (status === "version_mismatch") { + // A different server version owns the port. Bail without enqueuing rather + // than fighting over the port or sending events to an incompatible server. + logger.warn("event server version mismatch; skipping", { + clientVersion: PLUGIN_VERSION, + }); + return false; + } + + if (status === "shutting_down") { + logger.info("server is shutting down; waiting for it to stop"); + const stopped = await waitForStop(deps, timings); + if (!stopped) { + logger.error("timed out waiting for shutting-down server"); + return false; + } + // Fall through to boot our own. + } + + // status === "unreachable" (or just-stopped): boot our own. + spawn(config, logger); + return waitForHealthy(deps, timings); +} + +async function waitForStop(deps: EnsureServerDeps, timings: EnsureTimings): Promise { + for (let i = 0; i < timings.shutdownWaitAttempts; i++) { + const status = await deps.checkHealth(deps.config); + if (status === "unreachable") return true; + // If it came back healthy (e.g. a new server booted), reuse it. + if (status === "healthy") return true; + await deps.sleep(timings.shutdownWaitIntervalMs); + } + return false; +} + +async function waitForHealthy(deps: EnsureServerDeps, timings: EnsureTimings): Promise { + for (let i = 0; i < timings.bootHealthAttempts; i++) { + const status = await deps.checkHealth(deps.config); + if (status === "healthy") return true; + // A different version won the port race: bail instead of polling forever. + if (status === "version_mismatch") { + deps.logger.warn("event server version mismatch after boot; skipping", { + clientVersion: PLUGIN_VERSION, + }); + return false; + } + // A "shutting_down" here means someone else's server is going away; keep + // polling — either it dies (then ours never bound, re-handled by caller) or + // a fresh one comes up healthy. + await deps.sleep(timings.bootHealthIntervalMs); + } + return false; +} diff --git a/plugins/trace-codex/src/client/spawn-server.ts b/plugins/trace-codex/src/client/spawn-server.ts new file mode 100644 index 0000000..4ee693c --- /dev/null +++ b/plugins/trace-codex/src/client/spawn-server.ts @@ -0,0 +1,36 @@ +// Spawn the background server by re-executing this binary with "serve". +// +// In a Bun-compiled standalone binary, process.execPath is the binary itself, +// so re-exec'ing it with "serve" runs the server entrypoint. We detach and +// unref the child so it outlives this short-lived hook process. + +import { openSync } from "node:fs"; +import { join } from "node:path"; +import type { Config } from "../config.ts"; +import type { Logger } from "../log.ts"; + +export function spawnServer(config: Config, logger: Logger): void { + // Append server stdout/stderr to a logfile so a detached server isn't silent. + let logFd: number | "ignore" = "ignore"; + try { + logFd = openSync(join(config.dataDir, "server.out.log"), "a"); + } catch { + logFd = "ignore"; + } + + try { + const child = Bun.spawn([process.execPath, "serve"], { + // Inherit env so BRAINTRUST_EVENT_SERVER_* / PLUGIN_DATA carry over. + env: process.env, + stdin: "ignore", + stdout: logFd, + stderr: logFd, + // Detach so the server is not tied to this hook process's lifetime. + // Bun keeps children unref'd by default; unref() is explicit and safe. + }); + child.unref(); + logger.info("spawned server", { pid: child.pid, port: config.port }); + } catch (err) { + logger.error("failed to spawn server", { error: String(err) }); + } +} diff --git a/plugins/trace-codex/src/config.test.ts b/plugins/trace-codex/src/config.test.ts new file mode 100644 index 0000000..b1a7b34 --- /dev/null +++ b/plugins/trace-codex/src/config.test.ts @@ -0,0 +1,37 @@ +import { describe, expect, test } from "bun:test"; +import { DEFAULT_IDLE_TIMEOUT_MS, DEFAULT_PORT, loadConfig, serverBaseUrl } from "./config.ts"; + +describe("loadConfig", () => { + test("uses defaults when env is empty", () => { + const c = loadConfig({}); + expect(c.host).toBe("127.0.0.1"); + expect(c.port).toBe(DEFAULT_PORT); + expect(c.idleTimeoutMs).toBe(DEFAULT_IDLE_TIMEOUT_MS); + }); + + test("reads port override", () => { + const c = loadConfig({ BRAINTRUST_EVENT_SERVER_PORT: "40000" }); + expect(c.port).toBe(40000); + }); + + test("ignores invalid port and falls back", () => { + const c = loadConfig({ BRAINTRUST_EVENT_SERVER_PORT: "nope" }); + expect(c.port).toBe(DEFAULT_PORT); + }); + + test("reads idle timeout override", () => { + const c = loadConfig({ BRAINTRUST_EVENT_SERVER_IDLE_TIMEOUT_MS: "1000" }); + expect(c.idleTimeoutMs).toBe(1000); + }); + + test("prefers explicit log dir, then PLUGIN_DATA", () => { + expect(loadConfig({ BRAINTRUST_EVENT_SERVER_LOG_DIR: "/a" }).dataDir).toBe("/a"); + expect(loadConfig({ PLUGIN_DATA: "/b" }).dataDir).toBe("/b"); + }); +}); + +describe("serverBaseUrl", () => { + test("builds a loopback URL", () => { + expect(serverBaseUrl({ host: "127.0.0.1", port: 52734 })).toBe("http://127.0.0.1:52734"); + }); +}); diff --git a/plugins/trace-codex/src/config.ts b/plugins/trace-codex/src/config.ts new file mode 100644 index 0000000..92b0ed8 --- /dev/null +++ b/plugins/trace-codex/src/config.ts @@ -0,0 +1,68 @@ +// Runtime configuration, resolved from environment variables. +// +// All settings are codex-agnostic (this is a generic "event server"), so the +// env prefix is BRAINTRUST_EVENT_SERVER_*. + +export interface Config { + /** Host to bind the server to. Always loopback for now. */ + host: string; + /** TCP port for the local event server. */ + port: number; + /** Idle timeout: shut the server down after this long with no activity. */ + idleTimeoutMs: number; + /** How often the idle watchdog checks for inactivity. */ + idleCheckIntervalMs: number; + /** Directory for logs and pidfile. Defaults to PLUGIN_DATA, then a temp dir. */ + dataDir: string; + /** + * If set, every dequeued event is recorded as newline-delimited JSON to this + * file (truncated on server start). Used to capture a session for later + * `replay`. Unset (the default) means no recording. + */ + recordFile?: string; +} + +/** Default port chosen from the IANA dynamic/private range (49152-65535). */ +export const DEFAULT_PORT = 52734; +/** Default idle timeout: 5 minutes. */ +export const DEFAULT_IDLE_TIMEOUT_MS = 5 * 60 * 1000; +/** Default watchdog cadence: every 30 seconds. */ +export const DEFAULT_IDLE_CHECK_INTERVAL_MS = 30 * 1000; + +function parseIntEnv(value: string | undefined, fallback: number): number { + if (!value) return fallback; + const parsed = Number.parseInt(value, 10); + return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback; +} + +function defaultDataDir(env: NodeJS.ProcessEnv): string { + // Codex sets PLUGIN_DATA to a writable per-plugin directory. Fall back to a + // temp dir so the binary is still runnable standalone (tests, manual runs). + return ( + env.BRAINTRUST_EVENT_SERVER_LOG_DIR || + env.PLUGIN_DATA || + `${env.TMPDIR || "/tmp"}/braintrust-event-server` + ); +} + +export function loadConfig(env: NodeJS.ProcessEnv = process.env): Config { + return { + host: "127.0.0.1", + port: parseIntEnv(env.BRAINTRUST_EVENT_SERVER_PORT, DEFAULT_PORT), + idleTimeoutMs: parseIntEnv( + env.BRAINTRUST_EVENT_SERVER_IDLE_TIMEOUT_MS, + DEFAULT_IDLE_TIMEOUT_MS, + ), + idleCheckIntervalMs: parseIntEnv( + env.BRAINTRUST_EVENT_SERVER_IDLE_CHECK_INTERVAL_MS, + DEFAULT_IDLE_CHECK_INTERVAL_MS, + ), + dataDir: defaultDataDir(env), + recordFile: env.BRAINTRUST_EVENT_SERVER_RECORD_FILE || undefined, + }; +} + +/** Base URL for talking to the local server. */ +export function serverBaseUrl(config: Pick): string { + return `http://${config.host}:${config.port}`; +} diff --git a/plugins/trace-codex/src/index.ts b/plugins/trace-codex/src/index.ts new file mode 100644 index 0000000..d5be092 --- /dev/null +++ b/plugins/trace-codex/src/index.ts @@ -0,0 +1,75 @@ +// Entry point. One binary, three modes: +// codex-hook serve -> run the long-lived background event server +// codex-hook replay -> re-POST a recorded NDJSON session to the server +// codex-hook hook -> (default) read a hook event from stdin, enqueue it +// +// The hook mode never fails the Codex turn: it always exits 0. + +import { codexAgent } from "./agents/codex/register.ts"; +import { runHookClient } from "./client/client.ts"; +import { loadConfig } from "./config.ts"; +import { createLogger } from "./log.ts"; +import type { EventProcessorFactory } from "./processor/event-processor.ts"; +import { runReplay } from "./replay/replay.ts"; +import { startServer } from "./server/server.ts"; + +// The set of agents this plugin supports. Adding another agent (e.g. Claude +// Code) is a one-line change here plus a new src/agents// module. +const AGENTS = [codexAgent]; + +/** Processor factories keyed by event source, for the server's registry. */ +function agentFactories(): Map { + return new Map(AGENTS.map((a) => [a.eventSource, a.createProcessor])); +} + +async function main(): Promise { + const mode = process.argv[2] ?? "hook"; + + if (mode === "serve") { + const config = loadConfig(); + const logger = createLogger({ dataDir: config.dataDir, component: "server" }); + const server = startServer(config, agentFactories(), logger); + // Stay alive until the server stops (idle, /shutdown, or signal). + process.on("SIGTERM", () => void server.stop()); + process.on("SIGINT", () => void server.stop()); + await server.done; + return; + } + + if (mode === "replay") { + const config = loadConfig(); + const logger = createLogger({ dataDir: config.dataDir, component: "replay" }); + const filePath = process.argv[3]; + if (!filePath) { + logger.error("replay: missing file argument; usage: codex-hook replay "); + return; + } + await runReplay(config, logger, filePath); + return; + } + + // Default: hook client. The hook is the client-side entry point, so this is + // where the agent reads its user settings and maps them onto the environment + // (environment wins). We do this BEFORE loadConfig so the client and the + // server it may spawn agree on port/etc. The spawned server inherits this + // resolved env. Swallow everything; never break the agent's turn. + const agent = codexAgent; + const dataDir = loadConfig().dataDir; // resolve data dir to locate the settings file + const applied = agent.loadSettings(); + const config = loadConfig(); // re-resolve now that settings are in the env + const logger = createLogger({ dataDir, component: "hook" }); + if (applied.length > 0) { + logger.info("applied settings from config", { settings: applied }); + } + try { + await runHookClient(config, logger, agent.buildEvents, { + terminalEvents: agent.terminalEvents, + }); + } catch (err) { + logger.error("unexpected hook client error", { error: String(err) }); + } +} + +main() + .then(() => process.exit(0)) + .catch(() => process.exit(0)); diff --git a/plugins/trace-codex/src/log.ts b/plugins/trace-codex/src/log.ts new file mode 100644 index 0000000..03b6e57 --- /dev/null +++ b/plugins/trace-codex/src/log.ts @@ -0,0 +1,68 @@ +// Minimal append-only logger. Writes newline-delimited JSON to a file under the +// data dir, and mirrors to stderr. Never throws: logging must not be able to +// break the hook or crash the server. + +import { appendFileSync, mkdirSync } from "node:fs"; +import { join } from "node:path"; + +export type LogLevel = "debug" | "info" | "warn" | "error"; + +export interface Logger { + debug(message: string, fields?: Record): void; + info(message: string, fields?: Record): void; + warn(message: string, fields?: Record): void; + error(message: string, fields?: Record): void; +} + +export interface LoggerOptions { + /** Directory to write the log file into. */ + dataDir: string; + /** Log file name. */ + fileName?: string; + /** Component tag included on every line (e.g. "server", "hook"). */ + component: string; +} + +function safeMkdir(dir: string): void { + try { + mkdirSync(dir, { recursive: true }); + } catch { + // ignore: we fall back to stderr-only + } +} + +export function createLogger(options: LoggerOptions): Logger { + const fileName = options.fileName ?? "event-server.log"; + safeMkdir(options.dataDir); + const filePath = join(options.dataDir, fileName); + + const write = (level: LogLevel, message: string, fields?: Record) => { + const line = JSON.stringify({ + ts: new Date().toISOString(), + level, + component: options.component, + pid: process.pid, + message, + ...fields, + }); + try { + appendFileSync(filePath, `${line}\n`); + } catch { + // ignore file errors + } + // Mirror to stderr so it surfaces in foreground/dev runs without polluting + // stdout (which the hook protocol may read). + try { + process.stderr.write(`${line}\n`); + } catch { + // ignore + } + }; + + return { + debug: (m, f) => write("debug", m, f), + info: (m, f) => write("info", m, f), + warn: (m, f) => write("warn", m, f), + error: (m, f) => write("error", m, f), + }; +} diff --git a/plugins/trace-codex/src/processor/event-processor.ts b/plugins/trace-codex/src/processor/event-processor.ts new file mode 100644 index 0000000..895bba4 --- /dev/null +++ b/plugins/trace-codex/src/processor/event-processor.ts @@ -0,0 +1,35 @@ +// EventProcessor is the interface implemented per event source (Codex today; +// Claude Code, opencode, etc. later). One processor instance is created per +// queueId (session) and receives that session's events in order. +// +// Each agent module provides an EventProcessorFactory and registers it (keyed by +// its eventSource) with the ProcessorRegistry. The registry itself is generic +// and never names a specific agent. + +import type { SpanFactoryProvider } from "../braintrust/logger.ts"; +import type { Logger } from "../log.ts"; +import type { EnqueueEvent } from "../server/routes.ts"; + +export interface EventProcessor { + /** Handle a single event for this session. */ + process(event: EnqueueEvent): Promise | void; + + /** + * Flush any buffered state to its backend. Called when the queue goes idle, + * on eviction, and on server stop. Must not throw and must not mutate span + * data (e.g. it must not end spans) — flushing only delivers what already + * exists. + */ + flush(): Promise | void; +} + +/** + * Creates a per-session EventProcessor for one event source. Receives a + * SpanFactoryProvider (not a concrete factory) so the processor can build its + * own logger from per-session config. + */ +export type EventProcessorFactory = ( + queueId: string | null, + logger: Logger, + spanFactoryProvider: SpanFactoryProvider, +) => EventProcessor; diff --git a/plugins/trace-codex/src/processor/lru-map.test.ts b/plugins/trace-codex/src/processor/lru-map.test.ts new file mode 100644 index 0000000..9e53dd5 --- /dev/null +++ b/plugins/trace-codex/src/processor/lru-map.test.ts @@ -0,0 +1,51 @@ +import { describe, expect, test } from "bun:test"; +import { LruMap } from "./lru-map.ts"; + +describe("LruMap", () => { + test("stores and retrieves values", () => { + const m = new LruMap({ capacity: 3 }); + m.set("a", 1); + expect(m.get("a")).toBe(1); + expect(m.has("a")).toBe(true); + expect(m.size).toBe(1); + }); + + test("evicts the least-recently-used entry over capacity", () => { + const evicted: string[] = []; + const m = new LruMap({ capacity: 2, onEvict: (k) => evicted.push(k) }); + m.set("a", 1); + m.set("b", 2); + m.set("c", 3); // evicts "a" + expect(m.has("a")).toBe(false); + expect(m.has("b")).toBe(true); + expect(m.has("c")).toBe(true); + expect(evicted).toEqual(["a"]); + }); + + test("get() marks an entry most-recently-used", () => { + const evicted: string[] = []; + const m = new LruMap({ capacity: 2, onEvict: (k) => evicted.push(k) }); + m.set("a", 1); + m.set("b", 2); + m.get("a"); // now "b" is the LRU + m.set("c", 3); // evicts "b" + expect(m.has("a")).toBe(true); + expect(m.has("b")).toBe(false); + expect(evicted).toEqual(["b"]); + }); + + test("set() on an existing key refreshes recency without growing", () => { + const m = new LruMap({ capacity: 2 }); + m.set("a", 1); + m.set("b", 2); + m.set("a", 10); // refresh "a"; "b" becomes LRU + m.set("c", 3); // evicts "b" + expect(m.get("a")).toBe(10); + expect(m.has("b")).toBe(false); + expect(m.has("c")).toBe(true); + }); + + test("rejects invalid capacity", () => { + expect(() => new LruMap({ capacity: 0 })).toThrow(); + }); +}); diff --git a/plugins/trace-codex/src/processor/lru-map.ts b/plugins/trace-codex/src/processor/lru-map.ts new file mode 100644 index 0000000..5a6c461 --- /dev/null +++ b/plugins/trace-codex/src/processor/lru-map.ts @@ -0,0 +1,62 @@ +// A tiny LRU map. Insertion/access order is tracked by the underlying Map +// (JS Maps preserve insertion order); on access we re-insert to mark the key as +// most-recently-used. When over capacity, the oldest entry is evicted. + +export interface LruMapOptions { + capacity: number; + /** Optional callback invoked when an entry is evicted (not on delete). */ + onEvict?: (key: string, value: V) => void; +} + +export class LruMap { + private readonly map = new Map(); + private readonly capacity: number; + private readonly onEvict?: (key: string, value: V) => void; + + constructor(options: LruMapOptions) { + if (options.capacity < 1) throw new Error("LruMap capacity must be >= 1"); + this.capacity = options.capacity; + this.onEvict = options.onEvict; + } + + get size(): number { + return this.map.size; + } + + has(key: string): boolean { + return this.map.has(key); + } + + /** Get a value and mark it most-recently-used. */ + get(key: string): V | undefined { + const value = this.map.get(key); + if (value === undefined) return undefined; + // Re-insert to move to the end (most-recently-used). + this.map.delete(key); + this.map.set(key, value); + return value; + } + + /** Insert/update a value, marking it most-recently-used, evicting if needed. */ + set(key: string, value: V): void { + if (this.map.has(key)) this.map.delete(key); + this.map.set(key, value); + while (this.map.size > this.capacity) { + // The first key in iteration order is the least-recently-used. + const oldestKey = this.map.keys().next().value as string | undefined; + if (oldestKey === undefined) break; + const oldestValue = this.map.get(oldestKey) as V; + this.map.delete(oldestKey); + this.onEvict?.(oldestKey, oldestValue); + } + } + + /** Iterate values (oldest-first); used for cleanup on shutdown. */ + values(): IterableIterator { + return this.map.values(); + } + + clear(): void { + this.map.clear(); + } +} diff --git a/plugins/trace-codex/src/processor/processor-registry.test.ts b/plugins/trace-codex/src/processor/processor-registry.test.ts new file mode 100644 index 0000000..d76a33b --- /dev/null +++ b/plugins/trace-codex/src/processor/processor-registry.test.ts @@ -0,0 +1,115 @@ +import { describe, expect, test } from "bun:test"; +import type { SpanFactory, SpanFactoryProvider } from "../braintrust/logger.ts"; +import type { Logger } from "../log.ts"; +import type { EnqueueEvent } from "../server/routes.ts"; +import { createFakeSpanFactory, createTestLogger } from "../test-helpers.ts"; +import type { EventProcessor, EventProcessorFactory } from "./event-processor.ts"; +import { ProcessorRegistry } from "./processor-registry.ts"; + +const TEST_SOURCE = "test-agent"; + +function event(overrides: Partial = {}): EnqueueEvent { + return { + queueId: "session-1", + eventSource: TEST_SOURCE, + eventSourceVersion: null, + eventName: "SessionStart", + eventData: {}, + ...overrides, + }; +} + +// A minimal processor that opens one span per session and flushes it, so the +// fake span factory's flush/end counters reflect registry behavior. +class TestProcessor implements EventProcessor { + private readonly span; + constructor(_queueId: string | null, _logger: Logger, provider: SpanFactoryProvider) { + this.span = provider().startSpan({ name: "test", type: "task" }); + } + process(): void {} + async flush(): Promise { + await this.span?.flush(); + } +} + +const testFactory: EventProcessorFactory = (queueId, logger, provider) => + new TestProcessor(queueId, logger, provider); + +/** A registry wired with a single test agent factory and a fixed fake factory. */ +function makeRegistry(options: { capacity?: number; spanFactory?: SpanFactory } = {}) { + const { spanFactory, ...rest } = options; + const provider: SpanFactoryProvider | undefined = spanFactory ? () => spanFactory : undefined; + return new ProcessorRegistry(createTestLogger(), new Map([[TEST_SOURCE, testFactory]]), { + ...rest, + spanFactoryProvider: provider, + }); +} + +describe("ProcessorRegistry", () => { + test("creates a processor for a recognized source and routes the event", async () => { + const registry = makeRegistry({ spanFactory: createFakeSpanFactory() }); + await registry.handle(event()); + expect(registry.size).toBe(1); + }); + + test("reuses the same processor for the same queueId", async () => { + const registry = makeRegistry({ spanFactory: createFakeSpanFactory() }); + await registry.handle(event({ queueId: "s1", eventName: "SessionStart" })); + await registry.handle(event({ queueId: "s1", eventName: "Stop" })); + expect(registry.size).toBe(1); + }); + + test("creates separate processors for different queueIds", async () => { + const registry = makeRegistry({ spanFactory: createFakeSpanFactory() }); + await registry.handle(event({ queueId: "s1" })); + await registry.handle(event({ queueId: "s2" })); + expect(registry.size).toBe(2); + }); + + test("no-ops on an unrecognized source without caching", async () => { + const registry = makeRegistry({ spanFactory: createFakeSpanFactory() }); + await registry.handle(event({ eventSource: "mystery-source" })); + expect(registry.size).toBe(0); + }); + + test("processes an event with no queueId", async () => { + const registry = makeRegistry({ spanFactory: createFakeSpanFactory() }); + await registry.handle(event({ queueId: null })); + expect(registry.size).toBe(1); + }); + + test("evicts and flushes the least-recently-used processor at capacity", async () => { + const factory = createFakeSpanFactory(); + const registry = makeRegistry({ capacity: 2, spanFactory: factory }); + await registry.handle(event({ queueId: "s1" })); + await registry.handle(event({ queueId: "s2" })); + await registry.handle(event({ queueId: "s3" })); // evicts s1 + expect(registry.size).toBe(2); + // The evicted processor's span (s1, the first created) was flushed, not ended. + // Allow the fire-and-forget eviction flush to settle. + await new Promise((r) => setTimeout(r, 10)); + expect(factory.spans[0].flushCount).toBeGreaterThanOrEqual(1); + expect(factory.spans[0].endCount).toBe(0); + }); + + test("flushAll flushes every active processor", async () => { + const factory = createFakeSpanFactory(); + const registry = makeRegistry({ spanFactory: factory }); + await registry.handle(event({ queueId: "s1" })); + await registry.handle(event({ queueId: "s2" })); + await registry.flushAll(); + expect(factory.spans.length).toBe(2); + expect(factory.spans.every((s) => s.flushCount >= 1)).toBe(true); + }); + + test("closeAll flushes every processor (without ending spans) and clears", async () => { + const factory = createFakeSpanFactory(); + const registry = makeRegistry({ spanFactory: factory }); + await registry.handle(event({ queueId: "s1" })); + await registry.handle(event({ queueId: "s2" })); + await registry.closeAll(); + expect(factory.spans.every((s) => s.flushCount >= 1)).toBe(true); + expect(factory.spans.every((s) => s.endCount === 0)).toBe(true); + expect(registry.size).toBe(0); + }); +}); diff --git a/plugins/trace-codex/src/processor/processor-registry.ts b/plugins/trace-codex/src/processor/processor-registry.ts new file mode 100644 index 0000000..3435af6 --- /dev/null +++ b/plugins/trace-codex/src/processor/processor-registry.ts @@ -0,0 +1,113 @@ +// Routes each dequeued event to the EventProcessor for its queueId (session), +// creating one on first use based on the event source. The set of supported +// sources is supplied as a factory map (one per agent), so this registry is +// generic and never names a specific agent. Processors are kept in an LRU map +// capped at MAX_PROCESSORS; the least-recently-used session is evicted when the +// cap is exceeded. + +import { defaultSpanFactoryProvider, type SpanFactoryProvider } from "../braintrust/logger.ts"; +import type { Logger } from "../log.ts"; +import type { EnqueueEvent } from "../server/routes.ts"; +import type { EventProcessor, EventProcessorFactory } from "./event-processor.ts"; +import { LruMap } from "./lru-map.ts"; + +/** Max number of concurrent per-session processors retained. */ +export const MAX_PROCESSORS = 1024; + +/** Key used for events that arrive without a queueId. */ +const NO_QUEUE_KEY = "\u0000no-queue-id"; + +export interface ProcessorRegistryOptions { + capacity?: number; + /** + * Builds the SpanFactory each processor uses. Defaults to a real per-session + * provider; tests override it to stay offline. + */ + spanFactoryProvider?: SpanFactoryProvider; +} + +export class ProcessorRegistry { + private readonly processors: LruMap; + private readonly logger: Logger; + private readonly capacity: number; + private readonly spanFactoryProvider: SpanFactoryProvider; + /** Processor factories keyed by event source (one per supported agent). */ + private readonly factories: Map; + + constructor( + logger: Logger, + factories: Map, + options: ProcessorRegistryOptions = {}, + ) { + this.logger = logger; + this.factories = factories; + this.capacity = options.capacity ?? MAX_PROCESSORS; + this.spanFactoryProvider = options.spanFactoryProvider ?? defaultSpanFactoryProvider; + this.processors = new LruMap({ + capacity: this.capacity, + onEvict: (key, processor) => { + this.logger.debug("evicted processor", { queueId: key }); + // Flush the victim so its buffered spans are delivered. onEvict is + // sync, so this is fire-and-forget; errors are logged, never thrown. + void this.safeFlush(processor, key); + }, + }); + } + + get size(): number { + return this.processors.size; + } + + /** Flush every active processor. Called when the queue goes idle. */ + async flushAll(): Promise { + const tasks: Array> = []; + for (const processor of this.processors.values()) { + tasks.push(this.safeFlush(processor)); + } + await Promise.all(tasks); + } + + /** Flush every active processor and clear the map. Called on server stop. */ + async closeAll(): Promise { + await this.flushAll(); + this.processors.clear(); + } + + private async safeFlush(processor: EventProcessor, queueId?: string): Promise { + try { + await processor.flush(); + } catch (err) { + this.logger.error("processor flush failed", { queueId, error: String(err) }); + } + } + + /** Route an event to its session's processor, creating one if needed. */ + async handle(event: EnqueueEvent): Promise { + if (event.queueId === null) { + this.logger.warn("event has no queueId (session id)", { + eventSource: event.eventSource, + eventName: event.eventName, + }); + } + + const key = event.queueId ?? NO_QUEUE_KEY; + + let processor = this.processors.get(key); + if (processor === undefined) { + const factory = this.factories.get(event.eventSource); + if (factory === undefined) { + // Unregistered source: warn and no-op. Don't cache anything. + this.logger.warn("unrecognized event source; skipping", { + eventSource: event.eventSource, + eventName: event.eventName, + queueId: event.queueId, + }); + return; + } + processor = factory(event.queueId, this.logger, this.spanFactoryProvider); + this.processors.set(key, processor); + } + + await processor.process(event); + } +} diff --git a/plugins/trace-codex/src/replay/replay.test.ts b/plugins/trace-codex/src/replay/replay.test.ts new file mode 100644 index 0000000..86010ff --- /dev/null +++ b/plugins/trace-codex/src/replay/replay.test.ts @@ -0,0 +1,71 @@ +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; +import { mkdtempSync, readFileSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { EventRecorder } from "../server/recorder.ts"; +import type { EnqueueEvent } from "../server/routes.ts"; +import { createTestLogger } from "../test-helpers.ts"; +import { parseRecording } from "./replay.ts"; + +// A generic sample event; replay/recorder are agent-agnostic, so these tests +// don't depend on any particular agent's event shape. +function sampleEvent(overrides: Partial = {}): EnqueueEvent { + return { + queueId: "session-1", + eventSource: "test-agent", + eventSourceVersion: null, + eventName: "Something", + eventData: { foo: "bar" }, + ...overrides, + }; +} + +describe("parseRecording", () => { + test("parses newline-delimited events, skipping blank lines", () => { + const events = [ + sampleEvent(), + sampleEvent({ eventName: "Other", eventData: { n: 1 } }), + sampleEvent({ queueId: "session-2" }), + ]; + const text = `${events.map((e) => JSON.stringify(e)).join("\n")}\n\n`; + expect(parseRecording(text)).toEqual(events); + }); + + test("skips malformed lines instead of throwing", () => { + const good = sampleEvent(); + const text = `${JSON.stringify(good)}\nnot json\n`; + expect(parseRecording(text, createTestLogger())).toEqual([good]); + }); + + test("returns [] for empty input", () => { + expect(parseRecording("")).toEqual([]); + }); +}); + +describe("recorder -> parser round-trip", () => { + let dir: string; + + beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), "roundtrip-")); + }); + afterEach(() => { + rmSync(dir, { recursive: true, force: true }); + }); + + test("events written by the recorder parse back equal to the originals", () => { + const events = [ + sampleEvent({ eventName: "Start", eventData: { model: "x" } }), + sampleEvent({ eventName: "Prompt", eventData: { prompt: "hi" } }), + sampleEvent({ eventName: "Stop" }), + sampleEvent({ eventName: "Prompt", eventData: { prompt: "and another" } }), + sampleEvent({ eventName: "Stop" }), + ]; + + const path = join(dir, "session.ndjson"); + const recorder = new EventRecorder(path, createTestLogger()); + for (const e of events) recorder.record(e); + + const parsed = parseRecording(readFileSync(path, "utf8")); + expect(parsed).toEqual(events); + }); +}); diff --git a/plugins/trace-codex/src/replay/replay.ts b/plugins/trace-codex/src/replay/replay.ts new file mode 100644 index 0000000..b4610c2 --- /dev/null +++ b/plugins/trace-codex/src/replay/replay.ts @@ -0,0 +1,81 @@ +// Replay a recorded session: read an NDJSON file of raw EnqueueEvents and POST +// each one, in order, to a running event server (booting one if needed). This +// exercises the full real pipeline (queue -> registry -> processors -> spans), +// reproducing the original session's trace. + +import { checkHealth, ensureServer, sleep } from "../client/ensure-server.ts"; +import { spawnServer } from "../client/spawn-server.ts"; +import type { Config } from "../config.ts"; +import type { Logger } from "../log.ts"; +import { postEnqueue } from "../server/enqueue-client.ts"; +import type { EnqueueEvent } from "../server/routes.ts"; + +/** Parse NDJSON text into events, skipping blank/malformed lines. */ +export function parseRecording(text: string, logger?: Logger): EnqueueEvent[] { + const events: EnqueueEvent[] = []; + const lines = text.split("\n"); + for (let i = 0; i < lines.length; i++) { + const line = lines[i].trim(); + if (line.length === 0) continue; + try { + events.push(JSON.parse(line) as EnqueueEvent); + } catch (err) { + logger?.warn("replay: skipping malformed line", { + lineNumber: i + 1, + error: String(err), + }); + } + } + return events; +} + +export interface ReplayResult { + total: number; + sent: number; + failed: number; +} + +/** + * Read the recording at `filePath`, ensure the server is up, and POST each + * event in order. Returns a summary. Never throws. + */ +export async function runReplay( + config: Config, + logger: Logger, + filePath: string, +): Promise { + let text: string; + try { + text = await Bun.file(filePath).text(); + } catch (err) { + logger.error("replay: could not read recording file", { + filePath, + error: String(err), + }); + return { total: 0, sent: 0, failed: 0 }; + } + + const events = parseRecording(text, logger); + if (events.length === 0) { + logger.warn("replay: no events to replay", { filePath }); + return { total: 0, sent: 0, failed: 0 }; + } + + const healthy = await ensureServer({ config, logger, checkHealth, spawn: spawnServer, sleep }); + if (!healthy) { + logger.error("replay: could not reach or start event server", { filePath }); + return { total: events.length, sent: 0, failed: events.length }; + } + + let sent = 0; + let failed = 0; + // Sequential POSTs preserve per-session ordering on the queue. + for (const event of events) { + const ok = await postEnqueue(config, event, logger); + if (ok) sent++; + else failed++; + } + + logger.info("replay complete", { filePath, total: events.length, sent, failed }); + return { total: events.length, sent, failed }; +} diff --git a/plugins/trace-codex/src/server/enqueue-client.ts b/plugins/trace-codex/src/server/enqueue-client.ts new file mode 100644 index 0000000..a947c0e --- /dev/null +++ b/plugins/trace-codex/src/server/enqueue-client.ts @@ -0,0 +1,62 @@ +// Shared HTTP client for POSTing an event to a running server's /enqueue. +// Used by the hook client and by replay. + +import type { Config } from "../config.ts"; +import { serverBaseUrl } from "../config.ts"; +import type { Logger } from "../log.ts"; +import type { EnqueueEvent } from "./routes.ts"; + +/** POST one event to /enqueue. Returns true on a 2xx response. Never throws. */ +export async function postEnqueue( + config: Pick, + event: EnqueueEvent, + logger: Logger, +): Promise { + try { + const res = await fetch(`${serverBaseUrl(config)}/enqueue`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(event), + signal: AbortSignal.timeout(2000), + }); + if (!res.ok) { + logger.warn("enqueue rejected", { status: res.status }); + return false; + } + return true; + } catch (err) { + logger.error("enqueue request failed", { error: String(err) }); + return false; + } +} + +/** + * Ask the server to process everything enqueued so far and flush buffered spans + * to the backend, then wait for it to confirm. Returns true on a 2xx response. + * Never throws. + * + * The hook calls this after a terminal event so the final spans are delivered + * before the process tree is torn down (e.g. when a CI job ends right after the + * agent's last turn, before the background server's idle timeout fires). + */ +export async function postFlush( + config: Pick, + logger: Logger, +): Promise { + try { + const res = await fetch(`${serverBaseUrl(config)}/flush`, { + method: "POST", + // The server bounds its own wait (FLUSH_TIMEOUT_MS); allow a little more + // here so we receive its response rather than aborting the request. + signal: AbortSignal.timeout(12_000), + }); + if (!res.ok) { + logger.warn("flush rejected", { status: res.status }); + return false; + } + return true; + } catch (err) { + logger.error("flush request failed", { error: String(err) }); + return false; + } +} diff --git a/plugins/trace-codex/src/server/event-queue.test.ts b/plugins/trace-codex/src/server/event-queue.test.ts new file mode 100644 index 0000000..dc0bd34 --- /dev/null +++ b/plugins/trace-codex/src/server/event-queue.test.ts @@ -0,0 +1,207 @@ +import { describe, expect, test } from "bun:test"; +import { createTestLogger } from "../test-helpers.ts"; +import { EventQueue } from "./event-queue.ts"; +import type { EnqueueEvent } from "./routes.ts"; + +const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms)); + +function makeEvent(overrides: Partial = {}): EnqueueEvent { + return { + queueId: "session-1", + eventSource: "codex-hook", + eventSourceVersion: null, + eventName: "SessionStart", + eventData: {}, + ...overrides, + }; +} + +describe("EventQueue", () => { + test("processes enqueued events via the consumer", async () => { + const processed: EnqueueEvent[] = []; + const queue = new EventQueue({ + logger: createTestLogger(), + handler: (e) => { + processed.push(e); + }, + }); + queue.start(); + + queue.enqueue(makeEvent({ eventName: "SessionStart" })); + queue.enqueue(makeEvent({ eventName: "Stop" })); + + await queue.stop(); + + expect(processed.map((e) => e.eventName)).toEqual(["SessionStart", "Stop"]); + }); + + test("processes events in FIFO order, one at a time", async () => { + const order: string[] = []; + const queue = new EventQueue({ + logger: createTestLogger(), + handler: async (e) => { + order.push(`${e.eventName}:start`); + await sleep(5); + order.push(`${e.eventName}:end`); + }, + }); + queue.start(); + + queue.enqueue(makeEvent({ eventName: "a" })); + queue.enqueue(makeEvent({ eventName: "b" })); + queue.enqueue(makeEvent({ eventName: "c" })); + + await queue.stop(); + + expect(order).toEqual(["a:start", "a:end", "b:start", "b:end", "c:start", "c:end"]); + }); + + test("enqueue is non-blocking; size reflects pending items", () => { + const queue = new EventQueue({ logger: createTestLogger() }); + // Not started: items just accumulate. + queue.enqueue(makeEvent()); + queue.enqueue(makeEvent()); + expect(queue.size).toBe(2); + }); + + test("a failing processor does not stop the consumer", async () => { + const processed: string[] = []; + let first = true; + const queue = new EventQueue({ + logger: createTestLogger(), + handler: (e) => { + if (first) { + first = false; + throw new Error("boom"); + } + processed.push(e.eventName); + }, + }); + queue.start(); + + queue.enqueue(makeEvent({ eventName: "bad" })); + queue.enqueue(makeEvent({ eventName: "good" })); + + await queue.stop(); + + // The good event is still processed after the bad one throws. + expect(processed).toEqual(["good"]); + }); + + test("drains remaining events on stop", async () => { + const processed: string[] = []; + const queue = new EventQueue({ + logger: createTestLogger(), + handler: async (e) => { + await sleep(2); + processed.push(e.eventName); + }, + }); + queue.start(); + + queue.enqueue(makeEvent({ eventName: "x" })); + queue.enqueue(makeEvent({ eventName: "y" })); + // stop() should wait for both to finish. + await queue.stop(); + + expect(processed).toEqual(["x", "y"]); + }); + + test("drops events enqueued after stop", async () => { + const processed: string[] = []; + const queue = new EventQueue({ + logger: createTestLogger(), + handler: (e) => { + processed.push(e.eventName); + }, + }); + queue.start(); + await queue.stop(); + + queue.enqueue(makeEvent({ eventName: "late" })); + expect(queue.size).toBe(0); + expect(processed).toEqual([]); + }); + + test("fires onIdle when the queue drains to empty", async () => { + let idleCount = 0; + const queue = new EventQueue({ + logger: createTestLogger(), + handler: () => {}, + onIdle: () => { + idleCount += 1; + }, + }); + queue.start(); + queue.enqueue(makeEvent()); + queue.enqueue(makeEvent()); + + // Wait for the queue to process both and go idle. + await sleep(20); + expect(idleCount).toBeGreaterThanOrEqual(1); + await queue.stop(); + }); + + test("drained() resolves after pending events are processed", async () => { + const processed: string[] = []; + const queue = new EventQueue({ + logger: createTestLogger(), + handler: async (e) => { + await sleep(10); + processed.push(e.eventName); + }, + }); + queue.start(); + queue.enqueue(makeEvent({ eventName: "a" })); + queue.enqueue(makeEvent({ eventName: "b" })); + + await queue.drained(); + // Everything enqueued before drained() must be processed by the time it + // resolves. + expect(processed).toEqual(["a", "b"]); + + await queue.stop(); + }); + + test("drained() resolves promptly when the queue is already empty", async () => { + const queue = new EventQueue({ logger: createTestLogger(), handler: () => {} }); + queue.start(); + // Nothing enqueued: the next idle pass should resolve the waiter. + await queue.drained(); + await queue.stop(); + }); + + test("drained() resolves (does not hang) if the queue is stopping", async () => { + const queue = new EventQueue({ logger: createTestLogger(), handler: () => {} }); + queue.start(); + const drained = queue.drained(); + await queue.stop(); + // Must not hang even though stop() set draining before the waiter resolved. + await drained; + }); + + test("processes events that arrive during the onIdle handler", async () => { + const processed: string[] = []; + let firstIdle = true; + const queue = new EventQueue({ + logger: createTestLogger(), + handler: (e) => { + processed.push(e.eventName); + }, + onIdle: async () => { + // On the first idle, enqueue one more; it must still get processed. + if (firstIdle) { + firstIdle = false; + queue.enqueue(makeEvent({ eventName: "late-from-idle" })); + } + }, + }); + queue.start(); + queue.enqueue(makeEvent({ eventName: "first" })); + + await sleep(20); + expect(processed).toContain("first"); + expect(processed).toContain("late-from-idle"); + await queue.stop(); + }); +}); diff --git a/plugins/trace-codex/src/server/event-queue.ts b/plugins/trace-codex/src/server/event-queue.ts new file mode 100644 index 0000000..c460673 --- /dev/null +++ b/plugins/trace-codex/src/server/event-queue.ts @@ -0,0 +1,179 @@ +// An in-memory FIFO queue of events with a single background consumer. +// +// /enqueue pushes onto the queue and returns immediately. A consumer loop pops +// events one at a time and processes them (currently: logs them). Processing is +// serialized: one event is handled to completion before the next is popped, so +// a processor can safely do async work without interleaving. + +import type { Logger } from "../log.ts"; +import type { EnqueueEvent } from "./routes.ts"; + +/** Handles a single dequeued event. */ +export type EventHandler = (event: EnqueueEvent) => Promise | void; + +/** Called once each time the queue drains to empty (before parking). */ +export type IdleHandler = () => Promise | void; + +export interface EventQueueOptions { + logger: Logger; + /** Handles each dequeued event. Defaults to a logging no-op. */ + handler?: EventHandler; + /** Invoked when the queue drains to empty (e.g. to flush buffered state). */ + onIdle?: IdleHandler; +} + +export class EventQueue { + private readonly items: EnqueueEvent[] = []; + private readonly logger: Logger; + private readonly handler: EventHandler; + private readonly onIdle?: IdleHandler; + + // Resolver for a consumer that is parked waiting for the next item. + private waiter: (() => void) | null = null; + private draining = false; + private consuming = false; + // Resolves once the consumer loop has fully exited after stop(). + private stopped: Promise | null = null; + // Resolvers waiting for the queue to next reach empty (see drained()). + private drainWaiters: Array<() => void> = []; + + constructor(options: EventQueueOptions) { + this.logger = options.logger; + this.handler = options.handler ?? ((event) => this.logEvent(event)); + this.onIdle = options.onIdle; + } + + /** Number of events currently waiting to be processed. */ + get size(): number { + return this.items.length; + } + + /** + * Resolves the next time the queue drains to empty (i.e. all items enqueued + * up to now have been processed). If the queue is already empty and nothing + * is being processed, resolves on the next consumer pass. Used by /flush to + * wait for in-flight events before flushing processors. + */ + drained(): Promise { + return new Promise((resolve) => { + this.drainWaiters.push(resolve); + // Nudge the consumer in case it's parked, so an empty queue still makes a + // pass and resolves the waiter promptly. + if (this.waiter) { + const wake = this.waiter; + this.waiter = null; + wake(); + } + }); + } + + private notifyDrained(): void { + if (this.drainWaiters.length === 0) return; + const waiters = this.drainWaiters; + this.drainWaiters = []; + for (const resolve of waiters) resolve(); + } + + /** Push an event onto the queue. Non-blocking. */ + enqueue(event: EnqueueEvent): void { + if (this.draining) { + // Dropping is fine: once draining, the server is shutting down. + this.logger.warn("enqueue after drain; dropping event", { + queueId: event.queueId, + eventName: event.eventName, + }); + return; + } + this.items.push(event); + // Wake a parked consumer, if any. + if (this.waiter) { + const wake = this.waiter; + this.waiter = null; + wake(); + } + } + + /** Start the background consumer loop. Idempotent. */ + start(): void { + if (this.consuming) return; + this.consuming = true; + this.stopped = this.consumeLoop(); + } + + /** + * Stop accepting new events, let the consumer finish what's already queued, + * and wait for the loop to exit. + */ + async stop(): Promise { + this.draining = true; + // Wake the consumer so it can observe the drain state and exit when empty. + if (this.waiter) { + const wake = this.waiter; + this.waiter = null; + wake(); + } + await this.stopped; + } + + private async consumeLoop(): Promise { + while (true) { + const event = this.items.shift(); + if (event === undefined) { + if (this.draining) { + // Drained and stopping: release any drain waiters before exiting so + // they don't hang. + this.notifyDrained(); + return; + } + // The queue just went empty: run the idle handler (e.g. flush) once, + // then re-check — items may have arrived while it ran. + await this.runIdle(); + if (this.items.length > 0) continue; + // The queue is empty and processors have been flushed: any /flush + // waiters can now observe their events as fully delivered. + this.notifyDrained(); + if (this.draining) return; + // Park until something is enqueued (or we're woken to drain). + await new Promise((resolve) => { + this.waiter = resolve; + }); + continue; + } + try { + await this.handler(event); + } catch (err) { + this.logger.error("event processing failed", { + queueId: event.queueId, + eventName: event.eventName, + error: String(err), + }); + } + } + } + + private async runIdle(): Promise { + if (!this.onIdle) return; + try { + await this.onIdle(); + } catch (err) { + this.logger.error("idle handler failed", { error: String(err) }); + } + } + + private logEvent(event: EnqueueEvent): void { + if (event.queueId === null) { + this.logger.warn("event has no queueId (session id)", { + eventSource: event.eventSource, + eventName: event.eventName, + }); + } + // Placeholder: just log the dequeued event. + this.logger.info("process event", { + queueId: event.queueId, + eventSource: event.eventSource, + eventSourceVersion: event.eventSourceVersion, + eventName: event.eventName, + eventData: event.eventData, + }); + } +} diff --git a/plugins/trace-codex/src/server/mutex.test.ts b/plugins/trace-codex/src/server/mutex.test.ts new file mode 100644 index 0000000..a9f3a74 --- /dev/null +++ b/plugins/trace-codex/src/server/mutex.test.ts @@ -0,0 +1,60 @@ +import { describe, expect, test } from "bun:test"; +import { Mutex } from "./mutex.ts"; + +const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms)); + +describe("Mutex", () => { + test("runs sections one at a time, in order, with no interleaving", async () => { + const mutex = new Mutex(); + const events: string[] = []; + + const section = (name: string, delay: number) => + mutex.runExclusive(async () => { + events.push(`${name}:start`); + await sleep(delay); + events.push(`${name}:end`); + }); + + // Start three "concurrently". Without the lock, their start/end would + // interleave; with it, each must complete before the next starts. + await Promise.all([section("a", 20), section("b", 1), section("c", 5)]); + + expect(events).toEqual(["a:start", "a:end", "b:start", "b:end", "c:start", "c:end"]); + }); + + test("returns the function's result", async () => { + const mutex = new Mutex(); + const result = await mutex.runExclusive(async () => 42); + expect(result).toBe(42); + }); + + test("releases the lock even when the section throws", async () => { + const mutex = new Mutex(); + await expect( + mutex.runExclusive(async () => { + throw new Error("boom"); + }), + ).rejects.toThrow("boom"); + + // The next acquirer must still be able to run. + const after = await mutex.runExclusive(async () => "ok"); + expect(after).toBe("ok"); + }); + + test("serializes read-modify-write without lost updates", async () => { + const mutex = new Mutex(); + let counter = 0; + + // Each section reads, awaits (a yield point), then writes back. Without the + // lock this classic pattern loses updates; with it, the final value is N. + const bump = () => + mutex.runExclusive(async () => { + const current = counter; + await sleep(1); + counter = current + 1; + }); + + await Promise.all(Array.from({ length: 25 }, () => bump())); + expect(counter).toBe(25); + }); +}); diff --git a/plugins/trace-codex/src/server/mutex.ts b/plugins/trace-codex/src/server/mutex.ts new file mode 100644 index 0000000..830dc66 --- /dev/null +++ b/plugins/trace-codex/src/server/mutex.ts @@ -0,0 +1,35 @@ +// A minimal async mutex: serializes async work so that only one critical +// section runs at a time, even across `await` points. +// +// JS is single-threaded, so there is no parallel execution, but multiple async +// callers can still interleave at `await` boundaries. `runExclusive` chains +// callers onto a shared promise tail so each one runs to completion before the +// next begins. + +export class Mutex { + // The tail of the queue. Each acquirer awaits the previous tail, then becomes + // the new tail. Starts resolved so the first caller runs immediately. + private tail: Promise = Promise.resolve(); + + /** + * Run `fn` exclusively. Resolves/rejects with `fn`'s result. The lock is + * released even if `fn` throws. + */ + runExclusive(fn: () => Promise | T): Promise { + // Capture the current tail, then advance it to a promise that only resolves + // once this caller finishes. + const previous = this.tail; + let release!: () => void; + this.tail = new Promise((resolve) => { + release = resolve; + }); + + return previous.then(async () => { + try { + return await fn(); + } finally { + release(); + } + }); + } +} diff --git a/plugins/trace-codex/src/server/recorder.test.ts b/plugins/trace-codex/src/server/recorder.test.ts new file mode 100644 index 0000000..bc91f35 --- /dev/null +++ b/plugins/trace-codex/src/server/recorder.test.ts @@ -0,0 +1,128 @@ +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; +import { mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { createTestLogger } from "../test-helpers.ts"; +import { EventRecorder, redactForRecording } from "./recorder.ts"; +import type { EnqueueEvent } from "./routes.ts"; + +// Generic sample events; the recorder is agent-agnostic. +function sampleEvent(overrides: Partial = {}): EnqueueEvent { + return { + queueId: "session-1", + eventSource: "test-agent", + eventSourceVersion: null, + eventName: "Something", + eventData: { foo: "bar" }, + ...overrides, + }; +} + +describe("EventRecorder", () => { + let dir: string; + + beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), "recorder-test-")); + }); + afterEach(() => { + rmSync(dir, { recursive: true, force: true }); + }); + + test("records events as newline-delimited JSON that round-trips", () => { + const file = join(dir, "session.ndjson"); + const recorder = new EventRecorder(file, createTestLogger()); + + const events = [ + sampleEvent({ eventName: "Start", eventData: { model: "x" } }), + sampleEvent({ eventName: "Prompt", eventData: { prompt: "hi" } }), + sampleEvent({ eventName: "Stop" }), + ]; + for (const e of events) recorder.record(e); + + const lines = readFileSync(file, "utf8") + .split("\n") + .filter((l) => l.length > 0); + expect(lines.length).toBe(3); + const parsed = lines.map((l) => JSON.parse(l)); + expect(parsed).toEqual(events); + }); + + test("truncates an existing file on open", () => { + const file = join(dir, "session.ndjson"); + writeFileSync(file, "stale\nlines\n"); + + const recorder = new EventRecorder(file, createTestLogger()); + recorder.record(sampleEvent()); + + const lines = readFileSync(file, "utf8") + .split("\n") + .filter((l) => l.length > 0); + expect(lines.length).toBe(1); + expect(JSON.parse(lines[0]).eventName).toBe("Something"); + }); + + test("creates the parent directory if missing", () => { + const file = join(dir, "nested", "deep", "session.ndjson"); + const recorder = new EventRecorder(file, createTestLogger()); + recorder.record(sampleEvent()); + const lines = readFileSync(file, "utf8") + .split("\n") + .filter((l) => l.length > 0); + expect(lines.length).toBe(1); + }); + + test("never throws when the path is unwritable", () => { + // A path whose parent is a file, not a directory: open will fail. + const blocker = join(dir, "blocker"); + writeFileSync(blocker, "x"); + const file = join(blocker, "session.ndjson"); + + const recorder = new EventRecorder(file, createTestLogger()); + // record() must be a safe no-op rather than throwing. + expect(() => recorder.record(sampleEvent())).not.toThrow(); + }); + + test("redacts the apiKey field from recorded config events", () => { + const file = join(dir, "session.ndjson"); + const recorder = new EventRecorder(file, createTestLogger()); + recorder.record( + sampleEvent({ + eventName: "__braintrust_config", + eventData: { project: "p", apiKey: "sk-super-secret", apiUrl: "https://api" }, + }), + ); + + const raw = readFileSync(file, "utf8"); + expect(raw).not.toContain("sk-super-secret"); + const parsed = JSON.parse(raw.trim()); + expect(parsed.eventData).toEqual({ + project: "p", + apiKey: "__redacted__", + apiUrl: "https://api", + }); + }); +}); + +describe("redactForRecording", () => { + test("redacts apiKey but leaves other events untouched", () => { + const withKey = sampleEvent({ eventData: { apiKey: "sk-1", project: "p" } }); + expect((redactForRecording(withKey).eventData as { apiKey: string }).apiKey).toBe( + "__redacted__", + ); + + const noKey = sampleEvent({ eventData: { foo: "bar" } }); + // No secret fields: returns the same event unchanged. + expect(redactForRecording(noKey)).toBe(noKey); + }); + + test("does not mutate the original event", () => { + const ev = sampleEvent({ eventData: { apiKey: "sk-1" } }); + redactForRecording(ev); + expect((ev.eventData as { apiKey: string }).apiKey).toBe("sk-1"); + }); + + test("tolerates non-object eventData", () => { + const ev = sampleEvent({ eventData: "raw string" }); + expect(redactForRecording(ev)).toBe(ev); + }); +}); diff --git a/plugins/trace-codex/src/server/recorder.ts b/plugins/trace-codex/src/server/recorder.ts new file mode 100644 index 0000000..019c4c1 --- /dev/null +++ b/plugins/trace-codex/src/server/recorder.ts @@ -0,0 +1,73 @@ +// Records every dequeued event as newline-delimited JSON to a file, for later +// `replay`. Each line is a raw EnqueueEvent — exactly what was POSTed to +// /enqueue — so replay is symmetric and faithful to production. +// +// The file is truncated when the recorder opens (one server run = one capture). +// Like the logger, the recorder never throws: a recording failure must not be +// able to break the event pipeline. + +import { appendFileSync, mkdirSync, writeFileSync } from "node:fs"; +import { dirname } from "node:path"; +import type { Logger } from "../log.ts"; +import type { EnqueueEvent } from "./routes.ts"; + +/** eventData keys that hold secrets and must never be written to a recording. */ +const REDACTED_KEYS = ["apiKey"]; +const REDACTED = "__redacted__"; + +/** + * Return a copy of the event safe to persist: any secret fields in eventData + * (e.g. a config event's apiKey) are replaced with a redaction marker so + * recordings never contain credentials. Replay still works — the redacted key + * just lacks the secret, which the server re-resolves from env anyway. + */ +export function redactForRecording(event: EnqueueEvent): EnqueueEvent { + const data = event.eventData; + if (typeof data !== "object" || data === null) return event; + const obj = data as Record; + let redacted: Record | undefined; + for (const key of REDACTED_KEYS) { + if (key in obj && obj[key] !== undefined) { + redacted ??= { ...obj }; + redacted[key] = REDACTED; + } + } + return redacted ? { ...event, eventData: redacted } : event; +} + +export class EventRecorder { + private readonly filePath: string; + private readonly logger: Logger; + private enabled = true; + + constructor(filePath: string, logger: Logger) { + this.filePath = filePath; + this.logger = logger; + // Truncate on open: each server run starts a fresh capture. + try { + mkdirSync(dirname(filePath), { recursive: true }); + writeFileSync(filePath, ""); + this.logger.info("recording events", { recordFile: filePath }); + } catch (err) { + // Disable so we don't spam per-event errors for a path we can't write. + this.enabled = false; + this.logger.error("recorder: could not open record file; recording off", { + recordFile: filePath, + error: String(err), + }); + } + } + + /** Append one event as a JSON line. Never throws. */ + record(event: EnqueueEvent): void { + if (!this.enabled) return; + try { + appendFileSync(this.filePath, `${JSON.stringify(redactForRecording(event))}\n`); + } catch (err) { + this.logger.error("recorder: write failed", { + recordFile: this.filePath, + error: String(err), + }); + } + } +} diff --git a/plugins/trace-codex/src/server/routes.test.ts b/plugins/trace-codex/src/server/routes.test.ts new file mode 100644 index 0000000..c38cc1d --- /dev/null +++ b/plugins/trace-codex/src/server/routes.test.ts @@ -0,0 +1,163 @@ +import { describe, expect, test } from "bun:test"; +import { createTestLogger } from "../test-helpers.ts"; +import { EventQueue } from "./event-queue.ts"; +import { handleRequest, type RouteDeps } from "./routes.ts"; +import { ServerState } from "./state.ts"; + +// A sentinel version so the /health test proves it echoes the state's version +// rather than any hardcoded constant. +const TEST_VERSION = "test-1.2.3"; + +function makeDeps(overrides: Partial = {}): RouteDeps { + const logger = createTestLogger(); + return { + state: new ServerState(TEST_VERSION), + logger, + queue: new EventQueue({ logger }), + onShutdownRequested: () => {}, + ...overrides, + }; +} + +function get(path: string): Request { + return new Request(`http://127.0.0.1/${path.replace(/^\//, "")}`, { method: "GET" }); +} + +function post(path: string, body?: unknown): Request { + return new Request(`http://127.0.0.1/${path.replace(/^\//, "")}`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: body === undefined ? undefined : JSON.stringify(body), + }); +} + +describe("GET /health", () => { + test("echoes the server's version", async () => { + const res = await handleRequest(get("/health"), makeDeps()); + expect(res.status).toBe(200); + expect(await res.json()).toEqual({ version: TEST_VERSION }); + }); + + test("returns 503 when shutting down", async () => { + const state = new ServerState(TEST_VERSION); + state.beginShutdown(); + const res = await handleRequest(get("/health"), makeDeps({ state })); + expect(res.status).toBe(503); + }); +}); + +describe("POST /enqueue", () => { + const validEvent = { + queueId: "session-abc", + eventSource: "codex-hook", + eventSourceVersion: null, + eventName: "UserPromptSubmit", + eventData: { hook_event_name: "UserPromptSubmit" }, + }; + + test("accepts a valid event and pushes it onto the queue", async () => { + const deps = makeDeps(); + const res = await handleRequest(post("/enqueue", validEvent), deps); + expect(res.status).toBe(200); + expect(await res.json()).toEqual({ ok: true }); + expect(deps.queue.size).toBe(1); + }); + + test("accepts an event with a null queueId", async () => { + const deps = makeDeps(); + const res = await handleRequest(post("/enqueue", { ...validEvent, queueId: null }), deps); + expect(res.status).toBe(200); + expect(deps.queue.size).toBe(1); + }); + + test("rejects invalid JSON", async () => { + const req = new Request("http://127.0.0.1/enqueue", { + method: "POST", + headers: { "content-type": "application/json" }, + body: "{not json", + }); + const res = await handleRequest(req, makeDeps()); + expect(res.status).toBe(400); + }); + + test("rejects wrong shape", async () => { + const res = await handleRequest(post("/enqueue", { foo: "bar" }), makeDeps()); + expect(res.status).toBe(400); + }); + + test("returns 503 when shutting down", async () => { + const state = new ServerState(TEST_VERSION); + state.beginShutdown(); + const res = await handleRequest(post("/enqueue", validEvent), makeDeps({ state })); + expect(res.status).toBe(503); + }); +}); + +describe("POST /shutdown", () => { + test("returns 200 empty body and triggers shutdown callback", async () => { + let shutdownCalled = false; + const state = new ServerState(TEST_VERSION); + const res = await handleRequest( + post("/shutdown"), + makeDeps({ state, onShutdownRequested: () => (shutdownCalled = true) }), + ); + expect(res.status).toBe(200); + expect(await res.text()).toBe(""); + expect(state.isShuttingDown()).toBe(true); + // callback is deferred via setTimeout; wait a tick. + await new Promise((r) => setTimeout(r, 80)); + expect(shutdownCalled).toBe(true); + }); +}); + +describe("POST /flush", () => { + const validEvent = { + queueId: "session-abc", + eventSource: "codex-hook", + eventSourceVersion: null, + eventName: "Stop", + eventData: { hook_event_name: "Stop" }, + }; + + test("waits for the queue to drain, then returns ok", async () => { + const processed: string[] = []; + const logger = createTestLogger(); + const queue = new EventQueue({ + logger, + handler: async (e) => { + await new Promise((r) => setTimeout(r, 10)); + processed.push(e.eventName); + }, + }); + queue.start(); + const deps = makeDeps({ logger, queue }); + + // Enqueue a Stop, then flush. The flush response must come only after the + // event has been processed. + await handleRequest(post("/enqueue", validEvent), deps); + const res = await handleRequest(post("/flush"), deps); + + expect(res.status).toBe(200); + expect(await res.json()).toEqual({ ok: true }); + expect(processed).toEqual(["Stop"]); + + await queue.stop(); + }); +}); + +describe("unknown routes", () => { + test("404", async () => { + const res = await handleRequest(get("/nope"), makeDeps()); + expect(res.status).toBe(404); + }); +}); + +describe("heartbeat", () => { + test("any request bumps the heartbeat", async () => { + const state = new ServerState(TEST_VERSION, 1000); + expect(state.getLastHeartbeat()).toBe(1000); + // handleRequest calls state.bump() with Date.now(); just assert it changed. + await handleRequest(get("/health"), makeDeps({ state })); + expect(state.getLastHeartbeat()).toBeGreaterThan(1000); + }); +}); diff --git a/plugins/trace-codex/src/server/routes.ts b/plugins/trace-codex/src/server/routes.ts new file mode 100644 index 0000000..3ae78bf --- /dev/null +++ b/plugins/trace-codex/src/server/routes.ts @@ -0,0 +1,128 @@ +// HTTP route handlers. Pure-ish: they take state + deps and return a Response, +// plus optionally signal that a shutdown was requested. + +import type { Logger } from "../log.ts"; +import type { EventQueue } from "./event-queue.ts"; +import type { ServerState } from "./state.ts"; + +/** Shape of the body POSTed to /enqueue. */ +export interface EnqueueEvent { + /** + * Correlation key for this event stream (the Codex session id), or null if + * the source could not determine one. Events without a queueId are still + * accepted; the consumer logs a warning. + */ + queueId: string | null; + /** Where the event came from, e.g. "codex-hook". */ + eventSource: string; + /** Version of that source, or null if unknown. */ + eventSourceVersion: string | null; + /** The lifecycle event name, e.g. "UserPromptSubmit". */ + eventName: string; + /** Raw event payload from the source. */ + eventData: unknown; +} + +export interface RouteDeps { + state: ServerState; + logger: Logger; + /** Queue that /enqueue pushes events onto. */ + queue: EventQueue; + /** Invoked when /shutdown is hit, after the response is constructed. */ + onShutdownRequested: () => void; +} + +function json(body: unknown, status = 200): Response { + return new Response(body === undefined ? null : JSON.stringify(body), { + status, + headers: { "content-type": "application/json" }, + }); +} + +const SERVICE_UNAVAILABLE = () => json({ error: "shutting_down" }, 503); + +/** Max time /flush waits for the queue to drain before giving up. */ +const FLUSH_TIMEOUT_MS = 10_000; + +export async function handleRequest(req: Request, deps: RouteDeps): Promise { + const { state, logger, queue, onShutdownRequested } = deps; + const url = new URL(req.url); + const path = url.pathname; + + if (state.isShuttingDown()) return SERVICE_UNAVAILABLE(); + + // Any request counts as activity. + state.bump(); + + // GET /health + if (path === "/health" && req.method === "GET") { + return json({ version: state.version }); + } + + // POST /enqueue + if (path === "/enqueue" && req.method === "POST") { + if (state.isShuttingDown()) return SERVICE_UNAVAILABLE(); + + let event: EnqueueEvent; + try { + event = (await req.json()) as EnqueueEvent; + } catch { + logger.warn("enqueue: invalid JSON body"); + return json({ error: "invalid_json" }, 400); + } + + if (!isValidEnqueueEvent(event)) { + logger.warn("enqueue: invalid event shape", { received: event }); + return json({ error: "invalid_event" }, 400); + } + + // Push onto the background queue and return immediately; the consumer + // handles processing. + queue.enqueue(event); + return json({ ok: true }); + } + + // POST /flush + // Wait until everything enqueued so far has been processed and buffered spans + // have been flushed to the backend, then respond. Used by the hook client on + // terminal events (e.g. Codex "Stop") so the final spans are delivered before + // the process tree is torn down — important in short-lived environments like + // CI where the background server won't survive to flush on idle. + if (path === "/flush" && req.method === "POST") { + const flushed = await Promise.race([ + queue.drained().then(() => true), + // Bound the wait so a hung backend flush can't hold the request lock (and + // thus block the idle watchdog and other requests) indefinitely. + new Promise((resolve) => setTimeout(() => resolve(false), FLUSH_TIMEOUT_MS)), + ]); + if (!flushed) { + logger.warn("flush timed out waiting for queue to drain"); + return json({ ok: false, timedOut: true }, 200); + } + return json({ ok: true }); + } + + // POST /shutdown + if (path === "/shutdown" && req.method === "POST") { + state.beginShutdown(); + logger.info("shutdown requested"); + // Defer the actual stop briefly so this 200 response is fully flushed to + // the client before the server closes its listener. + setTimeout(onShutdownRequested, 50); + return new Response(null, { status: 200 }); + } + + return json({ error: "not_found" }, 404); +} + +function isValidEnqueueEvent(value: unknown): value is EnqueueEvent { + if (typeof value !== "object" || value === null) return false; + const v = value as Record; + return ( + (typeof v.queueId === "string" || v.queueId === null) && + typeof v.eventSource === "string" && + (typeof v.eventSourceVersion === "string" || v.eventSourceVersion === null) && + typeof v.eventName === "string" && + "eventData" in v + ); +} diff --git a/plugins/trace-codex/src/server/server.test.ts b/plugins/trace-codex/src/server/server.test.ts new file mode 100644 index 0000000..e26689c --- /dev/null +++ b/plugins/trace-codex/src/server/server.test.ts @@ -0,0 +1,130 @@ +import { describe, expect, test } from "bun:test"; +import type { Config } from "../config.ts"; +import type { EventProcessorFactory } from "../processor/event-processor.ts"; +import { createTestLogger } from "../test-helpers.ts"; +import { PLUGIN_VERSION } from "../version.ts"; +import { startServer } from "./server.ts"; + +const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms)); + +function testConfig(overrides: Partial = {}): Config { + return { + host: "127.0.0.1", + // Port 0 lets the OS pick a free port, avoiding collisions between tests. + port: 0, + idleTimeoutMs: 60_000, + idleCheckIntervalMs: 60_000, + dataDir: "/tmp/braintrust-event-server-test", + ...overrides, + }; +} + +describe("startServer", () => { + test("serves /health and stops via stop()", async () => { + const server = startServer(testConfig(), new Map(), createTestLogger()); + try { + const res = await fetch(`http://127.0.0.1:${server.port}/health`); + expect(res.status).toBe(200); + expect(await res.json()).toEqual({ version: PLUGIN_VERSION }); + } finally { + await server.stop(); + } + await server.done; // resolves once fully stopped + }); + + test("idle watchdog shuts the server down after inactivity", async () => { + const server = startServer( + testConfig({ idleTimeoutMs: 10, idleCheckIntervalMs: 10 }), + new Map(), + createTestLogger(), + ); + + // Wait for the watchdog to fire and stop the server. + const stoppedInTime = await Promise.race([ + server.done.then(() => true), + sleep(2000).then(() => false), + ]); + + expect(stoppedInTime).toBe(true); + + // The port should no longer accept connections. + let refused = false; + try { + await fetch(`http://127.0.0.1:${server.port}/health`, { + signal: AbortSignal.timeout(500), + }); + } catch { + refused = true; + } + expect(refused).toBe(true); + }); + + test("activity resets the idle timer", async () => { + const server = startServer( + testConfig({ idleTimeoutMs: 120, idleCheckIntervalMs: 20 }), + new Map(), + createTestLogger(), + ); + + try { + // Keep it alive past the idle window by pinging /health repeatedly. + for (let i = 0; i < 6; i++) { + await sleep(40); + const res = await fetch(`http://127.0.0.1:${server.port}/health`); + expect(res.status).toBe(200); + } + // ~240ms elapsed (> idleTimeoutMs) but activity kept it up. + expect(server.state.isShuttingDown()).toBe(false); + } finally { + await server.stop(); + } + }); + + test("draining the queue counts as activity and keeps the server alive", async () => { + let processed = 0; + // Each event takes a little time; the whole backlog outlasts the idle + // window, but no single event does. Pulling each event off the queue bumps + // the heartbeat, so the watchdog never sees the server as idle mid-drain. + const factory: EventProcessorFactory = () => ({ + process: async () => { + await sleep(40); + processed++; + }, + flush: () => {}, + }); + + const server = startServer( + testConfig({ idleTimeoutMs: 60, idleCheckIntervalMs: 20 }), + new Map([["test", factory]]), + createTestLogger(), + ); + + try { + // Enqueue a backlog whose total processing time (~240ms) far exceeds the + // 60ms idle window. + for (let i = 0; i < 6; i++) { + const res = await fetch(`http://127.0.0.1:${server.port}/enqueue`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + queueId: "s1", + eventSource: "test", + eventSourceVersion: null, + eventName: "UserPromptSubmit", + eventData: {}, + }), + }); + expect(res.status).toBe(200); + } + + // Wait for the whole backlog to drain. + while (processed < 6) await sleep(20); + + // The server stayed up the entire time despite each idle check seeing no + // HTTP traffic, because draining bumped the heartbeat. + expect(server.state.isShuttingDown()).toBe(false); + } finally { + await server.stop(); + } + }); +}); diff --git a/plugins/trace-codex/src/server/server.ts b/plugins/trace-codex/src/server/server.ts new file mode 100644 index 0000000..06a79b0 --- /dev/null +++ b/plugins/trace-codex/src/server/server.ts @@ -0,0 +1,142 @@ +// Long-lived background HTTP server ("serve" mode). + +import { defaultSpanFactoryProvider } from "../braintrust/logger.ts"; +import type { Config } from "../config.ts"; +import { createLogger, type Logger } from "../log.ts"; +import type { EventProcessorFactory } from "../processor/event-processor.ts"; +import { ProcessorRegistry } from "../processor/processor-registry.ts"; +import { PLUGIN_VERSION } from "../version.ts"; +import { EventQueue } from "./event-queue.ts"; +import { Mutex } from "./mutex.ts"; +import { EventRecorder } from "./recorder.ts"; +import { handleRequest } from "./routes.ts"; +import { ServerState } from "./state.ts"; + +export interface RunningServer { + port: number; + state: ServerState; + stop(): Promise; + /** Resolves when the server has fully stopped (idle, /shutdown, or stop()). */ + done: Promise; +} + +/** + * Starts the event server. Throws if the port is already bound. + * + * `factories` maps each supported event source to its processor factory (one per + * agent). The server stays agent-agnostic and just forwards them to the + * ProcessorRegistry. + */ +export function startServer( + config: Config, + factories: Map, + logger?: Logger, +): RunningServer { + const log = logger ?? createLogger({ dataDir: config.dataDir, component: "server" }); + const state = new ServerState(PLUGIN_VERSION); + + let resolveDone!: () => void; + const done = new Promise((resolve) => { + resolveDone = resolve; + }); + + let idleTimer: ReturnType | undefined; + let stopped = false; + + // Serialize request handling so only one handleRequest runs at a time, end to + // end, even across `await` points. This makes read-modify-write of shared + // server state safe without per-field guards. + const requestLock = new Mutex(); + + // Background queue + consumer. Each event is routed to a per-session + // EventProcessor by the registry. When the queue drains to empty, flush all + // processors so buffered spans reach Braintrust promptly. + const registry = new ProcessorRegistry(log, factories, { + spanFactoryProvider: defaultSpanFactoryProvider, + }); + // Optional recorder: if configured, capture every dequeued event (all + // sources, all sessions) before routing it, for later `replay`. + const recorder = config.recordFile ? new EventRecorder(config.recordFile, log) : undefined; + const queue = new EventQueue({ + logger: log, + handler: (event) => { + // Pulling an event off the queue counts as activity, so a slow consumer + // (e.g. a slow Braintrust flush) doesn't let the idle watchdog tear the + // server down while events are still being drained. + state.bump(); + recorder?.record(event); + return registry.handle(event); + }, + onIdle: () => registry.flushAll(), + }); + queue.start(); + + const stop = async (): Promise => { + if (stopped) return; + stopped = true; + if (idleTimer) clearInterval(idleTimer); + state.beginShutdown(); + // Graceful stop: stop accepting new connections and let in-flight requests + // (e.g. the /shutdown response itself) finish before closing. + await server.stop(); + // Drain the queue: process whatever was already enqueued, then exit. + await queue.stop(); + // End all sessions (finalize + flush their root spans) before we exit. + await registry.closeAll(); + log.info("server stopped", { port: config.port }); + resolveDone(); + }; + + const server = Bun.serve({ + hostname: config.host, + port: config.port, + // Surface listen errors as a thrown exception from Bun.serve. + // Every request is serialized through requestLock so handlers never + // interleave with each other. + fetch: (req) => + requestLock.runExclusive(() => + handleRequest(req, { + state, + logger: log, + queue, + onShutdownRequested: () => { + void stop(); + }, + }), + ), + error: (err) => { + log.error("request error", { error: String(err) }); + return new Response(JSON.stringify({ error: "internal" }), { + status: 500, + headers: { "content-type": "application/json" }, + }); + }, + }); + + // Idle watchdog: shut down after inactivity. The check-and-shutdown runs + // through the same lock as request handlers, so it cannot observe staleness + // or tear the server down while a request is mid-flight. If a request is + // queued/running, the watchdog waits its turn; by then the request has bumped + // the heartbeat, so this pass sees the server as active and does nothing. + idleTimer = setInterval(() => { + void requestLock.runExclusive(() => { + if (state.isShuttingDown()) return; + if (state.isIdleExpired(config.idleTimeoutMs)) { + log.info("idle timeout reached, shutting down", { + idleTimeoutMs: config.idleTimeoutMs, + }); + void stop(); + } + }); + }, config.idleCheckIntervalMs); + // Don't let the watchdog keep the process alive on its own. + idleTimer.unref?.(); + + log.info("server started", { + version: PLUGIN_VERSION, + host: config.host, + port: server.port, + }); + + return { port: server.port ?? config.port, state, stop, done }; +} diff --git a/plugins/trace-codex/src/server/state.test.ts b/plugins/trace-codex/src/server/state.test.ts new file mode 100644 index 0000000..52175e9 --- /dev/null +++ b/plugins/trace-codex/src/server/state.test.ts @@ -0,0 +1,44 @@ +import { describe, expect, test } from "bun:test"; +import { ServerState } from "./state.ts"; + +// Version is incidental to these tests (they exercise heartbeat/shutdown), so +// any sentinel works. +const VERSION = "test-version"; + +describe("ServerState", () => { + test("stores the version it was constructed with", () => { + expect(new ServerState(VERSION).version).toBe(VERSION); + }); + + test("starts not shutting down", () => { + const s = new ServerState(VERSION); + expect(s.isShuttingDown()).toBe(false); + }); + + test("beginShutdown flips the flag", () => { + const s = new ServerState(VERSION); + s.beginShutdown(); + expect(s.isShuttingDown()).toBe(true); + }); + + test("bump updates heartbeat", () => { + const s = new ServerState(VERSION, 100); + s.bump(500); + expect(s.getLastHeartbeat()).toBe(500); + }); + + test("isIdleExpired respects the timeout window", () => { + const s = new ServerState(VERSION, 1000); + // 1000 + 5000 = 6000 boundary + expect(s.isIdleExpired(5000, 5999)).toBe(false); + expect(s.isIdleExpired(5000, 6000)).toBe(true); + expect(s.isIdleExpired(5000, 7000)).toBe(true); + }); + + test("bump resets the idle window", () => { + const s = new ServerState(VERSION, 1000); + expect(s.isIdleExpired(5000, 6500)).toBe(true); + s.bump(6500); + expect(s.isIdleExpired(5000, 7000)).toBe(false); + }); +}); diff --git a/plugins/trace-codex/src/server/state.ts b/plugins/trace-codex/src/server/state.ts new file mode 100644 index 0000000..c003fc2 --- /dev/null +++ b/plugins/trace-codex/src/server/state.ts @@ -0,0 +1,37 @@ +// Mutable server state, shared across route handlers and the idle watchdog. + +export class ServerState { + readonly version: string; + /** Monotonic-ish wall clock of the last activity, in ms. */ + private lastHeartbeat: number; + /** Once true, /health and /enqueue return 503 and the server is stopping. */ + private shuttingDown = false; + + constructor(version: string, now: number = Date.now()) { + this.version = version; + this.lastHeartbeat = now; + } + + /** Record activity. Called on every request. */ + bump(now: number = Date.now()): void { + this.lastHeartbeat = now; + } + + getLastHeartbeat(): number { + return this.lastHeartbeat; + } + + isShuttingDown(): boolean { + return this.shuttingDown; + } + + /** Mark the server as shutting down. Idempotent. */ + beginShutdown(): void { + this.shuttingDown = true; + } + + /** True when the idle window has elapsed with no activity. */ + isIdleExpired(idleTimeoutMs: number, now: number = Date.now()): boolean { + return now - this.lastHeartbeat >= idleTimeoutMs; + } +} diff --git a/plugins/trace-codex/src/test-helpers.ts b/plugins/trace-codex/src/test-helpers.ts new file mode 100644 index 0000000..639d4a4 --- /dev/null +++ b/plugins/trace-codex/src/test-helpers.ts @@ -0,0 +1,259 @@ +// Shared test helpers. + +import { _exportsForTestingOnly, initLogger } from "braintrust"; +import type { Span, SpanFactory, StartSpanArgs } from "./braintrust/logger.ts"; +import type { Logger } from "./log.ts"; + +/** A no-op logger for tests; never touches the filesystem. */ +export function createTestLogger(): Logger { + return { + debug: () => {}, + info: () => {}, + warn: () => {}, + error: () => {}, + }; +} + +/** A fake span that records calls; stands in for a Braintrust Span. */ +export interface FakeSpan { + id: string; + startArgs: StartSpanArgs; + flushCount: number; + endCount: number; +} + +/** A fake SpanFactory capturing the spans it creates, for offline tests. */ +export interface FakeSpanFactory extends SpanFactory { + spans: FakeSpan[]; + factoryFlushCount: number; +} + +export function createFakeSpanFactory(): FakeSpanFactory { + const spans: FakeSpan[] = []; + const factory: FakeSpanFactory = { + spans, + factoryFlushCount: 0, + startSpan(args: StartSpanArgs): Span { + const fake: FakeSpan = { + id: `span-${spans.length + 1}`, + startArgs: args, + flushCount: 0, + endCount: 0, + }; + spans.push(fake); + // Only the members the processor uses are needed; cast through unknown. + return { + id: fake.id, + flush: async () => { + fake.flushCount += 1; + }, + end: () => { + fake.endCount += 1; + return 0; + }, + } as unknown as Span; + }, + flush: async () => { + factory.factoryFlushCount += 1; + }, + }; + return factory; +} + +// ============================================================================ +// Captured-trace harness +// +// Uses the Braintrust SDK's own test facility to capture the spans the SDK +// would have flushed, so tests assert on real span output (span_id, +// root_span_id, span_parents, span_attributes, input, metadata, metrics) +// rather than a hand-rolled struct. +// ============================================================================ + +// A non-real but well-formed UUID. Passing both projectName and projectId makes +// the SDK skip network project resolution (see computeLoggerMetadata). +const TEST_PROJECT_ID = "00000000-0000-0000-0000-000000000000"; + +// biome-ignore lint/suspicious/noExplicitAny: _exportsForTestingOnly is untyped. +const testOnly = _exportsForTestingOnly as any; + +/** A single span event as captured by the SDK test logger. */ +export interface CapturedSpan { + span_id: string; + root_span_id: string; + span_parents?: string[]; + span_attributes?: { name?: string; type?: string }; + input?: unknown; + output?: unknown; + metadata?: Record; + metrics?: { start?: number; end?: number }; +} + +export interface CapturedTrace { + /** A real SDK-backed SpanFactory writing into the test logger. */ + spanFactory: SpanFactory; + /** Drain the captured span events. */ + drain(): Promise; + /** Tear down the test logger. */ + cleanup(): void; +} + +/** Install the SDK test logger and return a SpanFactory + drain/cleanup. */ +export function withCapturedTrace(): CapturedTrace { + testOnly.simulateLoginForTests(); + const bg = testOnly.useTestBackgroundLogger(); + const logger = initLogger({ + projectName: "codex-test", + projectId: TEST_PROJECT_ID, + asyncFlush: true, + }); + return { + spanFactory: { + startSpan: (args) => logger.startSpan(args), + flush: () => logger.flush(), + }, + drain: async () => { + await logger.flush(); + return (await bg.drain()) as CapturedSpan[]; + }, + cleanup: () => testOnly.clearTestBackgroundLogger(), + }; +} + +// ============================================================================ +// Span tree +// ============================================================================ + +export interface SpanTree { + span_id: string; + root_span_id: string; + name?: string; + type?: string; + input?: unknown; + output?: unknown; + metadata?: Record; + metrics?: { start?: number; end?: number }; + children: SpanTree[]; +} + +/** Build a single-rooted tree from flat captured spans (via span_parents). */ +export function spansToTree(spans: CapturedSpan[]): SpanTree | null { + if (spans.length === 0) return null; + + const root = spans.find( + (s) => !s.span_parents || s.span_parents.length === 0 || s.span_parents[0] === s.span_id, + ); + if (!root) return null; + + const childrenByParent = new Map(); + for (const span of spans) { + const parentId = span.span_parents?.[0]; + if (parentId && parentId !== span.span_id) { + const list = childrenByParent.get(parentId) ?? []; + list.push(span); + childrenByParent.set(parentId, list); + } + } + + const build = (span: CapturedSpan): SpanTree => { + const children = (childrenByParent.get(span.span_id) ?? []) + .map((c) => ({ span: c, index: spans.indexOf(c) })) + .sort((a, b) => { + const aStart = a.span.metrics?.start ?? 0; + const bStart = b.span.metrics?.start ?? 0; + return aStart !== bStart ? aStart - bStart : a.index - b.index; + }) + .map((entry) => build(entry.span)); + return { + span_id: span.span_id, + root_span_id: span.root_span_id, + name: span.span_attributes?.name, + type: span.span_attributes?.type, + input: span.input, + output: span.output, + metadata: span.metadata, + metrics: span.metrics, + children, + }; + }; + + return build(root); +} + +// ============================================================================ +// Expected-trace matcher +// ============================================================================ + +export interface ExpectedSpan { + span_attributes?: { name?: string | RegExp; type?: string }; + input?: unknown; + output?: unknown; + metadata?: Record; + /** If set, assert whether the span has an end time (true) or not (false). */ + ended?: boolean; + /** Exact list of children (length and order are checked). */ + children?: ExpectedSpan[]; +} + +function nameMatches(actual: string | undefined, expected: string | RegExp): boolean { + return expected instanceof RegExp ? expected.test(actual ?? "") : actual === expected; +} + +export function diffSpan(actual: SpanTree | null, expected: ExpectedSpan, path: string): string[] { + const diffs: string[] = []; + if (actual === null) { + diffs.push(`${path}: expected a span, got none`); + return diffs; + } + + if ( + expected.span_attributes?.name !== undefined && + !nameMatches(actual.name, expected.span_attributes.name) + ) { + diffs.push( + `${path}.name: expected ${String(expected.span_attributes.name)}, got ${String(actual.name)}`, + ); + } + if ( + expected.span_attributes?.type !== undefined && + actual.type !== expected.span_attributes.type + ) { + diffs.push( + `${path}.type: expected ${expected.span_attributes.type}, got ${String(actual.type)}`, + ); + } + if (expected.input !== undefined) { + const a = JSON.stringify(actual.input); + const e = JSON.stringify(expected.input); + if (a !== e) diffs.push(`${path}.input: expected ${e}, got ${a}`); + } + if (expected.output !== undefined) { + const a = JSON.stringify(actual.output); + const e = JSON.stringify(expected.output); + if (a !== e) diffs.push(`${path}.output: expected ${e}, got ${a}`); + } + if (expected.metadata !== undefined) { + for (const [key, value] of Object.entries(expected.metadata)) { + const a = JSON.stringify(actual.metadata?.[key]); + const e = JSON.stringify(value); + if (a !== e) diffs.push(`${path}.metadata.${key}: expected ${e}, got ${a}`); + } + } + if (expected.ended !== undefined) { + const isEnded = actual.metrics?.end !== undefined; + if (isEnded !== expected.ended) { + diffs.push(`${path}.ended: expected ${expected.ended}, got ${isEnded}`); + } + } + if (expected.children !== undefined) { + if (actual.children.length !== expected.children.length) { + diffs.push( + `${path}.children.length: expected ${expected.children.length}, got ${actual.children.length}`, + ); + } else { + for (let i = 0; i < expected.children.length; i++) { + diffs.push(...diffSpan(actual.children[i], expected.children[i], `${path}.children[${i}]`)); + } + } + } + return diffs; +} diff --git a/plugins/trace-codex/src/version.ts b/plugins/trace-codex/src/version.ts new file mode 100644 index 0000000..83948b7 --- /dev/null +++ b/plugins/trace-codex/src/version.ts @@ -0,0 +1,9 @@ +// The server reports the plugin's version on /health. We read it from the +// plugin manifest at build/bundle time so there is a single source of truth. +// +// `with { type: "json" }` import attributes let Bun inline the JSON into the +// compiled binary, so this works in the standalone executable with no runtime +// file access. +import manifest from "../.codex-plugin/plugin.json" with { type: "json" }; + +export const PLUGIN_VERSION: string = manifest.version; diff --git a/plugins/trace-codex/tsconfig.json b/plugins/trace-codex/tsconfig.json new file mode 100644 index 0000000..990024f --- /dev/null +++ b/plugins/trace-codex/tsconfig.json @@ -0,0 +1,19 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "moduleResolution": "bundler", + "lib": ["ES2022"], + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "noEmit": true, + "resolveJsonModule": true, + "allowImportingTsExtensions": true, + "isolatedModules": true, + "verbatimModuleSyntax": true, + "types": ["bun-types"] + }, + "include": ["src/**/*", "scripts/**/*"], + "exclude": ["node_modules", "bin"] +} diff --git a/uninstall.sh b/uninstall.sh new file mode 100755 index 0000000..49ebf79 --- /dev/null +++ b/uninstall.sh @@ -0,0 +1,40 @@ +#!/bin/bash +### +# Remove the local dev install created by ./install.sh: +# uninstalls the plugins, removes the local marketplace, and clears the cache +# (including the dev symlinks). +### + +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +MARKETPLACE="braintrust-codex-plugins" +CODEX_HOME="${CODEX_HOME:-$HOME/.codex}" +CACHE_DIR="$CODEX_HOME/plugins/cache/$MARKETPLACE" + +if ! command -v codex >/dev/null 2>&1; then + echo "Error: 'codex' CLI not found." >&2 + exit 1 +fi + +echo "Uninstalling Braintrust Codex plugins..." + +for d in "$REPO_ROOT"/plugins/*/; do + manifest="$d/.codex-plugin/plugin.json" + [ -f "$manifest" ] || continue + # Installable name is the manifest `name`, which may differ from the folder. + name="$(grep -o '"name"[[:space:]]*:[[:space:]]*"[^"]*"' "$manifest" | head -1 | sed 's/.*"\([^"]*\)"$/\1/')" + [ -n "$name" ] || continue + codex plugin remove "$name@$MARKETPLACE" >/dev/null 2>&1 \ + && echo " removed plugin '$name'" \ + || true +done + +codex plugin marketplace remove "$MARKETPLACE" >/dev/null 2>&1 \ + && echo " removed marketplace '$MARKETPLACE'" \ + || true + +# Clear any leftover cache (including dev symlinks). +rm -rf "$CACHE_DIR" 2>/dev/null || true + +echo "Done. Restart Codex to fully unload the plugins."