diff --git a/.changeset/config.json b/.changeset/config.json
deleted file mode 100644
index e8eef44..0000000
--- a/.changeset/config.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-  "$schema": "https://unpkg.com/@changesets/config@3.1.1/schema.json",
-  "changelog": "@changesets/cli/changelog",
-  "commit": false,
-  "access": "public",
-  "baseBranch": "main"
-}
diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
new file mode 100644
index 0000000..6616b66
--- /dev/null
+++ b/.claude-plugin/marketplace.json
@@ -0,0 +1,17 @@
+{
+  "name": "copilotkit-tools",
+  "owner": {
+    "name": "CopilotKit"
+  },
+  "plugins": [
+    {
+      "name": "llmock",
+      "source": {
+        "source": "npm",
+        "package": "@copilotkit/llmock",
+        "version": "^1.5.0"
+      },
+      "description": "Fixture authoring skill for @copilotkit/llmock — match fields, response types, embeddings, structured output, sequential responses, streaming physics, agent loop patterns, gotchas, and debugging"
+    }
+  ]
+}
diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json
new file mode 100644
index 0000000..cd8e5ae
--- /dev/null
+++ b/.claude-plugin/plugin.json
@@ -0,0 +1,12 @@
+{
+  "name": "llmock",
+  "version": "1.5.0",
+  "description": "Fixture authoring guidance for @copilotkit/llmock",
+  "author": {
+    "name": "CopilotKit"
+  },
+  "homepage": "https://github.com/CopilotKit/llmock",
+  "repository": "https://github.com/CopilotKit/llmock",
+  "license": "MIT",
+  "skills": "./skills"
+}
diff --git a/.claude/commands/write-fixtures.md b/.claude/commands/write-fixtures.md
new file mode 120000
index 0000000..3d887c6
--- /dev/null
+++ b/.claude/commands/write-fixtures.md
@@ -0,0 +1 @@
+../../skills/write-fixtures/SKILL.md
\ No newline at end of file
diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..725e4f6
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,15 @@
+node_modules
+.git
+src/__tests__
+docs
+.worktrees
+.github
+coverage
+*.md
+dist
+.claude
+.claude-plugin
+skills
+.husky
+.vscode
+.idea
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..d1c2923
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,9 @@
+*.gif filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.pdf filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text
+*.webm filter=lfs diff=lfs merge=lfs -text
+*.svg filter=lfs diff=lfs merge=lfs -text
+docs/favicon.svg !filter !diff !merge
diff --git a/.github/workflows/fix-drift.yml b/.github/workflows/fix-drift.yml
new file mode 100644
index 0000000..1e44b97
--- /dev/null
+++ b/.github/workflows/fix-drift.yml
@@ -0,0 +1,128 @@
+name: Fix Drift
+on:
+  workflow_dispatch:
+  workflow_run:
+    workflows: ["Drift Tests"]
+    types: [completed]
+    branches: [main]
+
+concurrency:
+  group: drift-fix
+  cancel-in-progress: false
+
+jobs:
+  fix:
+    if: >-
+      github.event_name == 'workflow_dispatch' ||
+      github.event.workflow_run.conclusion == 'failure'
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    permissions:
+      contents: write
+      pull-requests: write
+      issues: write
+    steps:
+      - uses: actions/checkout@v4
+      - uses: pnpm/action-setup@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          cache: pnpm
+      - run: pnpm install --frozen-lockfile
+
+      # Step 0: Configure git identity and create fix branch
+      - name: Configure git
+        run: |
+          git config user.name "llmock-drift-bot"
+          git config user.email "drift-bot@copilotkit.ai"
+          git checkout -B fix/drift-$(date +%Y-%m-%d)-${{ github.run_id }}
+
+      # Step 1: Detect drift and produce report
+      - name: Collect drift report
+        id: detect
+        run: |
+          set +e
+          npx tsx scripts/drift-report-collector.ts
+          EXIT_CODE=$?
+          set -e
+          echo "exit_code=$EXIT_CODE" >> $GITHUB_OUTPUT
+          if [ "$EXIT_CODE" -eq 2 ]; then
+            : # critical drift found, continue
+          elif [ "$EXIT_CODE" -ne 0 ]; then
+            echo "::error::Collector script crashed with exit code $EXIT_CODE"
+            exit $EXIT_CODE
+          fi
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
+
+      # Always upload the report as an artifact
+      - name: Upload drift report
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: drift-report
+          path: drift-report.json
+          if-no-files-found: warn
+          retention-days: 30
+
+      # Step 2: Exit if no critical drift
+      - name: Check for critical diffs
+        id: check
+        env:
+          DETECT_EXIT_CODE: ${{ steps.detect.outputs.exit_code }}
+        run: |
+          if [ "$DETECT_EXIT_CODE" = "2" ]; then
+            echo "skip=false" >> $GITHUB_OUTPUT
+            echo "Critical drift detected"
+          else
+            echo "skip=true" >> $GITHUB_OUTPUT
+            echo "No critical drift detected (exit code: $DETECT_EXIT_CODE) — skipping fix"
+          fi
+
+      # Step 3: Invoke Claude Code to fix
+      - name: Auto-fix drift
+        if: steps.check.outputs.skip != 'true'
+        run: npx tsx scripts/fix-drift.ts
+        env:
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
+
+      # Upload Claude Code output for debugging
+      - name: Upload Claude Code logs
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: claude-code-output
+          path: claude-code-output.log
+          if-no-files-found: warn
+          retention-days: 30
+
+      # Step 4: Verify fix independently
+      - name: Verify conformance
+        if: steps.check.outputs.skip != 'true'
+        run: pnpm test
+
+      - name: Verify drift resolved
+        if: steps.check.outputs.skip != 'true'
+        run: pnpm test:drift
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
+
+      # Step 5: Create PR on success
+      - name: Create PR
+        if: success() && steps.check.outputs.skip != 'true'
+        run: npx tsx scripts/fix-drift.ts --create-pr
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      # Step 6: Open issue on failure
+      - name: Create issue on failure
+        if: failure() && steps.check.outputs.skip != 'true'
+        run: npx tsx scripts/fix-drift.ts --create-issue
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/publish-docker.yml b/.github/workflows/publish-docker.yml
new file mode 100644
index 0000000..3b40eab
--- /dev/null
+++ b/.github/workflows/publish-docker.yml
@@ -0,0 +1,58 @@
+name: Publish Docker Image
+
+on:
+  push:
+    tags:
+      - "v*"
+  pull_request:
+    branches:
+      - main
+
+env:
+  REGISTRY: ghcr.io
+  IMAGE_NAME: ${{ github.repository }}
+
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to GHCR
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract metadata
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          tags: |
+            type=semver,pattern={{version}}
+            type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/') }}
+
+      - name: Build and push
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          platforms: linux/amd64,linux/arm64
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
diff --git a/.github/workflows/publish-release.yml b/.github/workflows/publish-release.yml
index 4e88527..19f34f9 100644
--- a/.github/workflows/publish-release.yml
+++ b/.github/workflows/publish-release.yml
@@ -2,6 +2,7 @@ name: Release
 on:
   push:
     branches: [main]
+  workflow_dispatch:
 jobs:
   release:
     runs-on: ubuntu-latest
@@ -14,11 +15,29 @@ jobs:
           cache: pnpm
           registry-url: "https://registry.npmjs.org"
       - run: pnpm install --frozen-lockfile
-      - uses: changesets/action@v1
-        with:
-          publish: pnpm release
-          version: pnpm changeset version
+
+      - name: Check if version is already published
+        id: check
+        run: |
+          PKG_NAME=$(node -p "require('./package.json').name")
+          PKG_VERSION=$(node -p "require('./package.json').version")
+          if npm view "${PKG_NAME}@${PKG_VERSION}" version 2>/dev/null; then
+            echo "published=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "published=false" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Build and publish
+        if: steps.check.outputs.published == 'false'
+        run: pnpm release
         env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
           NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
+
+      - name: Create GitHub Release
+        if: steps.check.outputs.published == 'false'
+        run: |
+          PKG_VERSION=$(node -p "require('./package.json').version")
+          gh release create "v${PKG_VERSION}" --generate-notes --title "v${PKG_VERSION}"
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/test-drift.yml b/.github/workflows/test-drift.yml
new file mode 100644
index 0000000..b76d6d1
--- /dev/null
+++ b/.github/workflows/test-drift.yml
@@ -0,0 +1,49 @@
+name: Drift Tests
+on:
+  schedule:
+    - cron: "0 6 * * *" # Daily 6am UTC
+  workflow_dispatch: # Manual trigger
+jobs:
+  drift:
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    steps:
+      - uses: actions/checkout@v4
+      - uses: pnpm/action-setup@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          cache: pnpm
+      - run: pnpm install --frozen-lockfile
+
+      - name: Run drift tests
+        id: drift
+        run: |
+          set +e
+          npx tsx scripts/drift-report-collector.ts
+          EXIT_CODE=$?
+          set -e
+          echo "exit_code=$EXIT_CODE" >> $GITHUB_OUTPUT
+          if [ "$EXIT_CODE" -eq 2 ]; then
+            : # critical drift found, continue
+          elif [ "$EXIT_CODE" -ne 0 ]; then
+            echo "::error::Collector script crashed with exit code $EXIT_CODE"
+            exit $EXIT_CODE
+          fi
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
+
+      - name: Upload drift report
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: drift-report
+          path: drift-report.json
+          if-no-files-found: warn
+          retention-days: 30
+
+      - name: Fail if critical drift detected
+        if: steps.drift.outputs.exit_code == '2'
+        run: exit 1
diff --git a/.github/workflows/update-competitive-matrix.yml b/.github/workflows/update-competitive-matrix.yml
new file mode 100644
index 0000000..b6e3355
--- /dev/null
+++ b/.github/workflows/update-competitive-matrix.yml
@@ -0,0 +1,56 @@
+name: Update Competitive Matrix
+
+on:
+  schedule:
+    - cron: "0 9 * * 1" # Weekly Monday 9am UTC
+  workflow_dispatch:
+
+concurrency:
+  group: competitive-matrix
+  cancel-in-progress: true
+
+jobs:
+  update-matrix:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+      pull-requests: write
+    steps:
+      - uses: actions/checkout@v4
+      - uses: pnpm/action-setup@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          cache: pnpm
+      - run: pnpm install --frozen-lockfile
+
+      - name: Update competitive matrix
+        run: npx tsx scripts/update-competitive-matrix.ts --summary /tmp/matrix-summary.md
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Check for changes
+        id: changes
+        run: |
+          if git diff --quiet docs/index.html; then
+            echo "changed=false" >> $GITHUB_OUTPUT
+          else
+            echo "changed=true" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Create PR
+        if: steps.changes.outputs.changed == 'true'
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          BRANCH="auto/competitive-matrix-$(date +%Y%m%d)"
+          git checkout -b "$BRANCH"
+          git add docs/index.html
+          git commit -m "docs: update competitive matrix from latest competitor data"
+          git push -u origin "$BRANCH"
+          gh pr create \
+            --title "Update competitive matrix" \
+            --body-file /tmp/matrix-summary.md \
+            --base main
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.gitignore b/.gitignore
index f4e2c6d..cf9381d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
 node_modules/
 dist/
 *.tsbuildinfo
+.worktrees/
+.superpowers/
diff --git a/.husky/pre-commit b/.husky/pre-commit
new file mode 100644
index 0000000..2312dc5
--- /dev/null
+++ b/.husky/pre-commit
@@ -0,0 +1 @@
+npx lint-staged
diff --git a/.prettierignore b/.prettierignore
index 29c69b2..52af816 100644
--- a/.prettierignore
+++ b/.prettierignore
@@ -1,3 +1,4 @@
 dist/
 node_modules/
 pnpm-lock.yaml
+charts/
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..3b69f67
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,132 @@
+# @copilotkit/llmock
+
+## 1.6.0
+
+### Minor Changes
+
+- Provider-specific endpoints: dedicated routes for Bedrock (`/model/{modelId}/invoke`), Ollama (`/api/chat`, `/api/generate`), Cohere (`/v2/chat`), and Azure OpenAI deployment-based routing (`/openai/deployments/{id}/chat/completions`)
+- Chaos injection: `ChaosConfig` type with `drop`, `malformed`, and `disconnect` actions; supports per-fixture chaos via `chaos` config on each fixture and server-wide chaos via `--chaos-drop`, `--chaos-malformed`, and `--chaos-disconnect` CLI flags
+- Metrics: `GET /metrics` endpoint exposing Prometheus text format with request counters and latency histograms per provider and route
+- Record-and-replay: `--record` flag and `proxyAndRecord` helper that proxies requests to real LLM APIs, collapses streaming responses, and writes fixture JSON to disk for future playback
+
+## 1.5.1
+
+### Patch Changes
+
+- Fix documentation URLs to use correct domain (llmock.copilotkit.dev)
+
+## 1.5.0
+
+### Minor Changes
+
+- Embeddings API: `POST /v1/embeddings` endpoint, `onEmbedding()` convenience method, `inputText` match field, `EmbeddingResponse` type, deterministic fallback embeddings from input hash, Azure embedding routing
+- Structured output / JSON mode: `responseFormat` match field, `onJsonOutput()` convenience method
+- Sequential responses: `sequenceIndex` match field for stateful multi-turn fixtures, per-fixture-group match counting, `resetMatchCounts()` method
+- Streaming physics: `StreamingProfile` type with `ttft`, `tps`, `jitter` fields for realistic timing simulation
+- AWS Bedrock: `POST /model/{modelId}/invoke` endpoint, Anthropic Messages format translation
+- Azure OpenAI: provider routing for `/openai/deployments/{id}/chat/completions` and `/openai/deployments/{id}/embeddings`
+- Health & models endpoints: `GET /health`, `GET /ready`, `GET /v1/models` (auto-populated from fixtures)
+- Docker & Helm: Dockerfile, Helm chart for Kubernetes deployment
+- Documentation website: full docs site at llmock.copilotkit.dev with feature pages and competitive comparison matrix
+- Automated drift remediation: `scripts/drift-report-collector.ts` and `scripts/fix-drift.ts` for CI-driven drift fixes
+- CI automation: competitive matrix update workflow, drift fix workflow
+- `FixtureOpts` and `EmbeddingFixtureOpts` type aliases exported for external consumers
+
+### Patch Changes
+
+- Fix Gemini Live handler crash on malformed `clientContent.turns` and `toolResponse.functionResponses`
+- Add `isClosed` guard before WebSocket finalization events (prevents writes to closed connections)
+- Default to non-streaming for Claude Messages API and Responses API (matching real API defaults)
+- Fix `streamingProfile` missing from convenience method opts types (`on`, `onMessage`, etc.)
+- Fix skills/ symlink direction so npm pack includes the write-fixtures skill
+- Fix `.claude` removed from package.json files (was dead weight — symlink doesn't ship)
+- Add `.worktrees/` to eslint ignores
+- Remove dead `@keyframes sseLine` CSS from docs site
+- Fix watcher cleanup on error (clear debounce timer, null guard)
+- Fix empty-reload guard (keep previous fixtures when reload produces 0)
+- README rewritten as concise overview with links to docs site
+- Write-fixtures skill updated for all v1.5.0 features
+- Docs site: Get Started links to docs, comparison above reliability, npm version badge
+
+## 1.4.0
+
+### Minor Changes
+
+- `--watch` (`-w`): File-watching with 500ms debounced reload. Keeps previous fixtures on validation failure.
+- `--log-level`: Configurable log verbosity (`silent`, `info`, `debug`). Default `info` for CLI, `silent` for programmatic API.
+- `--validate-on-load`: Fixture schema validation at startup — checks response types, tool call JSON, numeric ranges, shadowing, and catch-all positioning.
+- `validateFixtures()` exported for programmatic use
+- `Logger` class exported for programmatic use
+
+## 1.3.3
+
+### Patch Changes
+
+- Fix Responses WS handler to accept flat `response.create` format matching the real OpenAI API (previously required a non-standard nested `response: { ... }` envelope)
+- WebSocket drift detection tests: TLS client for real provider WS endpoints, 4 verified drift tests (Responses WS + Realtime), Gemini Live canary for text-capable model availability
+- Realtime model canary: detects when `gpt-4o-mini-realtime-preview` is deprecated and suggests GA replacement
+- Gemini Live documented as unverified (no text-capable `bidiGenerateContent` model exists yet)
+- Fix README Gemini Live response shape example (`modelTurn.parts`, not `modelTurnComplete`)
+
+## 1.3.2
+
+### Patch Changes
+
+- Fix missing `refusal` field on OpenAI Chat Completions responses — both the SDK and real API return `refusal: null` on non-refusal messages, but llmock was omitting it
+- Live API drift detection test suite: three-layer triangulation between SDK types, real API responses, and llmock output across OpenAI (Chat + Responses), Anthropic Claude, and Google Gemini
+- Weekly CI workflow for automated drift checks
+- `DRIFT.md` documentation for the drift detection system
+
+## 1.3.1
+
+### Patch Changes
+
+- Claude Code fixture authoring skill (`/write-fixtures`) — comprehensive guide for match fields, response types, agent loop patterns, gotchas, and debugging
+- Claude Code plugin structure for downstream consumers (`--plugin-dir`, `--add-dir`, or manual copy)
+- README and docs site updated with Claude Code integration instructions
+
+## 1.3.0
+
+### Minor Changes
+
+- Mid-stream interruption: `truncateAfterChunks` and `disconnectAfterMs` fixture fields to simulate abrupt server disconnects
+- AbortSignal-based cancellation primitives (`createInterruptionSignal`, signal-aware `delay()`)
+- Backward-compatible `writeSSEStream` overload with `StreamOptions` returning completion status
+- Interruption support across all HTTP SSE and WebSocket streaming paths
+- `destroy()` method on `WebSocketConnection` for abrupt disconnect simulation
+- Journal records `interrupted` and `interruptReason` on interrupted streams
+- LLMock convenience API extended with interruption options (`truncateAfterChunks`, `disconnectAfterMs`)
+
+## 1.2.0
+
+### Minor Changes
+
+- Zero-dependency RFC 6455 WebSocket framing layer
+- OpenAI Responses API over WebSocket (`/v1/responses`)
+- OpenAI Realtime API over WebSocket (`/v1/realtime`) — text + tool calls
+- Gemini Live BidiGenerateContent over WebSocket — text + tool calls
+
+### Patch Changes
+
+- WebSocket close-frame lifecycle fixes
+- Improved error visibility across WebSocket handlers
+- Future Direction section in README
+
+## 1.1.1
+
+### Patch Changes
+
+- Add function call IDs to Gemini tool call responses
+- Remove changesets, simplify release workflow
+
+## 1.1.0
+
+### Minor Changes
+
+- 9948a8b: Add `prependFixture()` and `getFixtures()` public API methods
+
+## 1.0.1
+
+### Patch Changes
+
+- Add `getTextContent` for array-format message content handling
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..2ba92b4
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,49 @@
+# llmock
+
+## Before Every Commit
+
+Run these checks on all changed files before committing:
+
+```bash
+pnpm run format:check    # prettier
+pnpm run lint            # eslint
+pnpm run test            # vitest
+```
+
+If prettier or eslint fail, fix with:
+
+```bash
+npx prettier --write <files>
+npx eslint --fix <files>
+```
+
+A pre-commit hook (husky + lint-staged) runs prettier and eslint automatically
+on staged files, but always verify manually before pushing — CI checks the
+entire repo, not just staged files.
+
+## Project Structure
+
+- `src/` — TypeScript source (server, router, helpers, responses, types)
+- `src/__tests__/` — Vitest test suite
+- `docs/` — GitHub Pages website (static HTML)
+- `fixtures/` — Example fixture JSON files shipped with the package
+
+## Testing
+
+- Tests live in `src/__tests__/` and use Vitest
+- When adding features or fixing bugs, add or update tests
+- Run `pnpm test` before pushing
+
+## Drift Remediation
+
+Automated drift remediation lives in `scripts/`:
+
+- `scripts/drift-report-collector.ts` — runs drift tests, produces `drift-report.json`
+- `scripts/fix-drift.ts` — reads drift report, invokes Claude Code to fix builders, creates PR or issue
+
+See `DRIFT.md` for full documentation and `.github/workflows/fix-drift.yml` for the CI workflow.
+
+## Commit Messages
+
+- This repo enforces conventional commit prefixes via commitlint: `fix:`, `feat:`, `docs:`, `test:`, `chore:`, `refactor:`, etc.
+- No Co-Authored-By lines
diff --git a/DRIFT.md b/DRIFT.md
new file mode 100644
index 0000000..b8a0ffb
--- /dev/null
+++ b/DRIFT.md
@@ -0,0 +1,166 @@
+# Live API Drift Detection
+
+llmock produces responses shaped like real LLM APIs. Providers change their APIs over time. **Drift** means the mock no longer matches reality — your tests pass against llmock but break against the real API.
+
+## Three-Layer Approach
+
+Drift detection compares three independent sources to triangulate the cause of any mismatch:
+
+| SDK types = Real API? | Real API = llmock? | Diagnosis                                                            |
+| --------------------- | ------------------ | -------------------------------------------------------------------- |
+| Yes                   | No                 | **llmock drift** — response builders need updating                   |
+| No                    | No                 | **Provider changed before SDK update** — flag, wait for SDK catch-up |
+| Yes                   | Yes                | **No drift** — all clear                                             |
+| No                    | Yes                | **SDK drift** — provider deprecated something SDK still references   |
+
+Two-way comparison (mock vs real) can't distinguish between "we need to fix llmock" and "the SDK hasn't caught up yet." Three-way comparison can.
+
+## Running Drift Tests
+
+```bash
+# All providers (requires all three API keys)
+OPENAI_API_KEY=sk-... ANTHROPIC_API_KEY=sk-... GOOGLE_API_KEY=... pnpm test:drift
+
+# Single provider (others skip automatically)
+OPENAI_API_KEY=sk-... pnpm test:drift
+
+# Strict mode — warnings also fail
+STRICT_DRIFT=1 OPENAI_API_KEY=sk-... pnpm test:drift
+```
+
+Required environment variables:
+
+- `OPENAI_API_KEY` — OpenAI API key
+- `ANTHROPIC_API_KEY` — Anthropic API key
+- `GOOGLE_API_KEY` — Google AI API key
+
+Each provider's tests skip independently if its key is not set. You can run drift tests for just one provider.
+
+## Reading Results
+
+### Severity levels
+
+- **critical** — Test fails. llmock produces a different shape than the real API for a field that both the SDK and real API agree on. This means llmock needs an update.
+- **warning** — Test passes (unless `STRICT_DRIFT=1`). The real API has a field that neither the SDK nor llmock knows about, or the SDK and real API disagree. Usually means a provider added something new.
+- **info** — Always passes. Known intentional differences (usage fields are always zero, optional fields llmock omits, etc.).
+
+### Example report output
+
+```
+API DRIFT DETECTED: OpenAI Chat Completions (non-streaming text)
+
+  1. [critical] LLMOCK DRIFT — field in SDK + real API but missing from mock
+     Path:    usage.completion_tokens_details
+     SDK:     object { reasoning_tokens: number }
+     Real:    object { reasoning_tokens: number, accepted_prediction_tokens: number }
+     Mock:    <absent>
+
+  2. [warning] PROVIDER ADDED FIELD — in real API but not in SDK or mock
+     Path:    system_fingerprint
+     SDK:     <absent>
+     Real:    string
+     Mock:    <absent>
+
+  3. [info] MOCK EXTRA FIELD — in mock but not in real API
+     Path:    choices[0].logprobs
+     SDK:     null | object
+     Real:    <absent>
+     Mock:    null
+```
+
+## Fixing Detected Drift
+
+When a `critical` drift is detected:
+
+1. **Identify the response builder** — the report path tells you which provider and field:
+   - OpenAI Chat Completions → `src/helpers.ts` (`buildTextCompletion`, `buildToolCallCompletion`, `buildTextChunks`, `buildToolCallChunks`)
+   - OpenAI Responses API → `src/responses.ts` (`buildTextResponse`, `buildToolCallResponse`, `buildTextStreamEvents`, `buildToolCallStreamEvents`)
+   - Anthropic Claude → `src/messages.ts` (`buildClaudeTextResponse`, `buildClaudeToolCallResponse`, `buildClaudeTextStreamEvents`, `buildClaudeToolCallStreamEvents`)
+   - Google Gemini → `src/gemini.ts` (`buildGeminiTextResponse`, `buildGeminiToolCallResponse`, `buildGeminiTextStreamChunks`, `buildGeminiToolCallStreamChunks`)
+
+2. **Update the builder** — add or modify the field to match the real API shape.
+
+3. **Run conformance tests** — `pnpm test` to verify existing API conformance tests still pass.
+
+4. **Run drift tests** — `pnpm test:drift` to verify the drift is resolved.
+
+## Model Deprecation
+
+The `models.drift.ts` test scrapes model names referenced in llmock's test files, README, and fixtures, then checks each provider's model listing API to verify they still exist.
+
+When a model is deprecated:
+
+1. Update the model name in the affected test files and fixtures
+2. Update `src/__tests__/drift/providers.ts` if the cheap test model changed
+3. Run `pnpm test` and `pnpm test:drift`
+
+## Adding a New Provider
+
+1. Add the provider's SDK as a devDependency in `package.json`
+2. Add shape extraction functions to `src/__tests__/drift/sdk-shapes.ts`
+3. Add raw fetch client functions to `src/__tests__/drift/providers.ts`
+4. Create `src/__tests__/drift/<provider>.drift.ts` with 4 test scenarios
+5. Add model listing function to `providers.ts` and model check to `models.drift.ts`
+6. If the provider uses WebSocket, add protocol functions to `ws-providers.ts` and create `ws-<provider>.drift.ts`
+7. Update the allowlist in `schema.ts` if needed
+
+## WebSocket Drift Coverage
+
+In addition to the 19 existing drift tests (16 HTTP response-shape + 3 model deprecation), WebSocket drift tests cover llmock's WS protocols (4 verified + 2 canary = 6 WS tests):
+
+| Protocol            | Text | Tool Call | Real Endpoint                                                       | Status     |
+| ------------------- | ---- | --------- | ------------------------------------------------------------------- | ---------- |
+| OpenAI Responses WS | ✓    | ✓         | `wss://api.openai.com/v1/responses`                                 | Verified   |
+| OpenAI Realtime     | ✓    | ✓         | `wss://api.openai.com/v1/realtime`                                  | Verified   |
+| Gemini Live         | —    | —         | `wss://generativelanguage.googleapis.com/ws/...BidiGenerateContent` | Unverified |
+
+**Models**: `gpt-4o-mini` for Responses WS, `gpt-4o-mini-realtime-preview` for Realtime.
+
+**Auth**: Uses the same `OPENAI_API_KEY` and `GOOGLE_API_KEY` environment variables as HTTP tests. No new secrets needed.
+
+**How it works**: A TLS WebSocket client (`ws-providers.ts`) connects to real provider endpoints using `node:tls` with RFC 6455 framing. Each protocol function handles the setup sequence (e.g., Realtime session negotiation, Gemini Live setup/setupComplete) and collects messages until a terminal event. The mock side uses the existing `ws-test-client.ts` plaintext client against the local llmock server.
+
+### Gemini Live: unverified
+
+llmock's Gemini Live handler implements the text-based `BidiGenerateContent` protocol as documented in Google's [Live API reference](https://ai.google.dev/api/live) — `setup`/`setupComplete` handshake, `clientContent` with turns, `serverContent` with `modelTurn.parts[].text`, and `toolCall` responses. The protocol format is correct per the docs.
+
+However, as of March 2026, the only models that support `bidiGenerateContent` are native-audio models (`gemini-2.5-flash-native-audio-*`), which reject text-only requests. No text-capable model exists for this endpoint yet, so we cannot triangulate llmock's output against a real API response.
+
+A canary test (`ws-gemini-live.drift.ts`) queries the Gemini model listing API on each drift run and checks for a non-audio model that supports `bidiGenerateContent`. When Google ships one, the canary will flag it and the full drift tests can be enabled.
+
+## CI Schedule
+
+Drift tests run on a schedule:
+
+- **Daily**: 6:00 AM UTC
+- **Manual**: Trigger via GitHub Actions UI (`workflow_dispatch`)
+- **NOT** on PR or push — these tests hit real APIs and cost money
+
+See `.github/workflows/test-drift.yml`.
+
+## Automated Drift Remediation
+
+When the daily drift test detects critical diffs on the `main` branch, the `fix-drift.yml` workflow runs automatically:
+
+1. **Collect** — `scripts/drift-report-collector.ts` runs drift tests and produces a structured `drift-report.json`
+2. **Fix** — `scripts/fix-drift.ts` (default mode) constructs a prompt from the report and invokes Claude Code to fix the builders
+3. **Verify** — Independent `pnpm test` and `pnpm test:drift` steps confirm the fix works
+4. **PR** — `scripts/fix-drift.ts --create-pr` stages and commits the changes, bumps the version, and opens a pull request
+5. **Issue** (on failure) — `scripts/fix-drift.ts --create-issue` opens a GitHub issue with the drift report and Claude Code output
+
+Steps 2 and 4/5 are separate invocations of `fix-drift.ts` with different modes.
+
+### Artifacts
+
+Both workflows upload artifacts:
+
+- `drift-report.json` — structured drift data (retained 30 days)
+- `claude-code-output.log` — Claude Code's reasoning and tool calls (fix workflow only)
+
+### Manual trigger
+
+The fix workflow also supports `workflow_dispatch` for manual runs.
+
+## Cost
+
+~25 API calls per run (16 HTTP response-shape + 3 model listing + 6 WS including canaries) using the cheapest available models (`gpt-4o-mini`, `gpt-4o-mini-realtime-preview`, `claude-haiku-4-5-20251001`, `gemini-2.5-flash`) with 10-100 max tokens each. Under $0.15/week at daily cadence. When Gemini Live text-capable models become available, the 2 canary tests will become full drift tests, increasing real WS connections from 4 to 6.
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..09b9811
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,31 @@
+# syntax=docker/dockerfile:1
+
+# --- Build stage ---
+FROM node:22-alpine AS build
+
+RUN corepack enable && corepack prepare pnpm@10.28.2 --activate
+
+WORKDIR /app
+
+COPY package.json pnpm-lock.yaml ./
+RUN --mount=type=cache,id=pnpm,target=/root/.local/share/pnpm/store \
+    pnpm install --frozen-lockfile
+
+COPY tsconfig.json tsdown.config.ts ./
+COPY src/ src/
+
+RUN pnpm run build
+
+# --- Production stage ---
+FROM node:22-alpine
+
+WORKDIR /app
+
+# No runtime dependencies — all imports are node:* built-ins
+COPY --from=build /app/dist/ dist/
+COPY fixtures/ fixtures/
+
+EXPOSE 4010
+
+ENTRYPOINT ["node", "dist/cli.js"]
+CMD ["--fixtures", "./fixtures", "--host", "0.0.0.0"]
diff --git a/README.md b/README.md
index 2c39ec9..bd60779 100644
--- a/README.md
+++ b/README.md
@@ -1,38 +1,51 @@
-# @copilotkit/mock-openai
+# @copilotkit/llmock [![Unit Tests](https://github.com/CopilotKit/llmock/actions/workflows/test-unit.yml/badge.svg)](https://github.com/CopilotKit/llmock/actions/workflows/test-unit.yml) [![Drift Tests](https://github.com/CopilotKit/llmock/actions/workflows/test-drift.yml/badge.svg)](https://github.com/CopilotKit/llmock/actions/workflows/test-drift.yml) [![npm version](https://img.shields.io/npm/v/@copilotkit/llmock)](https://www.npmjs.com/package/@copilotkit/llmock)
 
-Deterministic mock OpenAI server for testing. Streams SSE responses in real OpenAI Chat Completions and Responses API format, driven entirely by fixtures. Zero runtime dependencies — built on Node.js builtins only.
+Deterministic mock LLM server for testing. A real HTTP server on a real port — not an in-process interceptor — so every process in your stack (Playwright, Next.js, agent workers, microservices) can point at it via `OPENAI_BASE_URL` / `ANTHROPIC_BASE_URL` and get reproducible, instant responses. Streams SSE in real OpenAI, Claude, Gemini, Bedrock, Azure, Vertex AI, Ollama, and Cohere API formats, driven entirely by fixtures. Zero runtime dependencies.
 
-Supports both streaming (SSE) and non-streaming JSON responses, text completions, tool calls, and error injection. Point any process at it via `OPENAI_BASE_URL` and get reproducible, instant responses.
-
-## Install
+## Quick Start
 
 ```bash
-npm install @copilotkit/mock-openai
+npm install @copilotkit/llmock
+```
+
+```typescript
+import { LLMock } from "@copilotkit/llmock";
+
+const mock = new LLMock({ port: 5555 });
+
+mock.onMessage("hello", { content: "Hi there!" });
+
+const url = await mock.start();
+// Point your OpenAI client at `url` instead of https://api.openai.com
+
+// ... run your tests ...
+
+await mock.stop();
 ```
 
 ## When to Use This vs MSW
 
 [MSW (Mock Service Worker)](https://mswjs.io/) is a popular API mocking library, but it solves a different problem.
 
-**The key difference is architecture.** mock-openai runs a real HTTP server on a port. MSW patches `http`/`https`/`fetch` modules inside a single Node.js process. MSW can only intercept requests from the process that calls `server.listen()` — child processes, separate services, and workers are unaffected.
+**The key difference is architecture.** llmock runs a real HTTP server on a port. MSW patches `http`/`https`/`fetch` modules inside a single Node.js process. MSW can only intercept requests from the process that calls `server.listen()` — child processes, separate services, and workers are unaffected.
 
-This matters for E2E tests where multiple processes make OpenAI calls:
+This matters for E2E tests where multiple processes make LLM API calls:
 
 ```
 Playwright test runner (Node)
   └─ controls browser → Next.js app (separate process)
-                            └─ OPENAI_BASE_URL → mock-openai :5555
+                            └─ OPENAI_BASE_URL → llmock :5555
                                 ├─ Mastra agent workers
                                 ├─ LangGraph workers
                                 └─ CopilotKit runtime
 ```
 
-MSW can't intercept any of those calls. mock-openai can — it's a real server on a real port.
+MSW can't intercept any of those calls. llmock can — it's a real server on a real port.
 
-**Use mock-openai when:**
+**Use llmock when:**
 
 - Multiple processes need to hit the same mock (E2E tests, agent frameworks, microservices)
-- You want OpenAI-specific SSE format out of the box (Chat Completions + Responses API)
+- You want multi-provider SSE format out of the box (OpenAI, Claude, Gemini, Bedrock, Azure, Vertex AI, Ollama, Cohere)
 - You prefer defining fixtures as JSON files rather than code
 - You need a standalone CLI server
 
@@ -42,11 +55,14 @@ MSW can't intercept any of those calls. mock-openai can — it's a real server o
 - You're mocking many different APIs, not just OpenAI
 - You want in-process interception without running a server
 
-| Capability                   | mock-openai           | MSW                                                                       |
+| Capability                   | llmock                | MSW                                                                       |
 | ---------------------------- | --------------------- | ------------------------------------------------------------------------- |
 | Cross-process interception   | **Yes** (real server) | **No** (in-process only)                                                  |
 | OpenAI Chat Completions SSE  | **Built-in**          | Manual — build `data: {json}\n\n` + `[DONE]` yourself                     |
 | OpenAI Responses API SSE     | **Built-in**          | Manual — MSW's `sse()` sends `data:` events, not OpenAI's `event:` format |
+| Claude Messages API SSE      | **Built-in**          | Manual — build `event:`/`data:` SSE yourself                              |
+| Gemini streaming             | **Built-in**          | Manual — build `data:` SSE yourself                                       |
+| WebSocket APIs               | **Built-in**          | **No**                                                                    |
 | Fixture file loading (JSON)  | **Yes**               | **No** — handlers are code-only                                           |
 | Request journal / inspection | **Yes**               | **No** — track requests manually                                          |
 | Non-streaming responses      | **Yes**               | **Yes**                                                                   |
@@ -54,443 +70,76 @@ MSW can't intercept any of those calls. mock-openai can — it's a real server o
 | CLI for standalone use       | **Yes**               | **No**                                                                    |
 | Zero dependencies            | **Yes**               | **No** (~300KB)                                                           |
 
-## Quick Start
-
-```typescript
-import { MockOpenAI } from "@copilotkit/mock-openai";
-
-const mock = new MockOpenAI({ port: 5555 });
-
-mock.onMessage("hello", { content: "Hi there!" });
-
-const url = await mock.start();
-// Point your OpenAI client at `url` instead of https://api.openai.com
-
-// ... run your tests ...
-
-await mock.stop();
-```
-
-## E2E Test Patterns
-
-Real-world patterns from using mock-openai in Playwright E2E tests with CopilotKit, Mastra, LangGraph, and Agno agent frameworks.
-
-### Global Setup/Teardown
-
-Start the mock server once for the entire test suite. All child processes (Next.js, agent workers) inherit the URL via environment variable.
-
-```typescript
-// e2e/mock-openai-setup.ts
-import { MockOpenAI } from "@copilotkit/mock-openai";
-import * as path from "node:path";
-
-let mockServer: MockOpenAI | null = null;
-
-export async function setupMockOpenAI(): Promise<void> {
-  mockServer = new MockOpenAI({ port: 5555 });
-
-  // Load JSON fixtures from a directory
-  mockServer.loadFixtureDir(path.join(__dirname, "fixtures", "openai"));
-
-  const url = await mockServer.start();
-
-  // Child processes use this to find the mock
-  process.env.MOCK_OPENAI_URL = `${url}/v1`;
-}
-
-export async function teardownMockOpenAI(): Promise<void> {
-  if (mockServer) {
-    await mockServer.stop();
-    mockServer = null;
-  }
-}
-```
-
-The Next.js app (or any other service) just needs:
-
-```env
-OPENAI_BASE_URL=http://localhost:5555/v1
-OPENAI_API_KEY=mock-key
-```
-
-### JSON Fixture Files
-
-Define fixtures as JSON — one file per feature, loaded with `loadFixtureFile` or `loadFixtureDir`.
-
-**Text responses** — match on a substring of the last user message:
-
-```json
-{
-  "fixtures": [
-    {
-      "match": { "userMessage": "stock price of AAPL" },
-      "response": { "content": "The current stock price of Apple Inc. (AAPL) is $150.25." }
-    },
-    {
-      "match": { "userMessage": "capital of France" },
-      "response": { "content": "The capital of France is Paris." }
-    }
-  ]
-}
-```
-
-**Tool call responses** — the agent framework receives these as tool calls and executes them:
-
-```json
-{
-  "fixtures": [
-    {
-      "match": { "userMessage": "one step with eggs" },
-      "response": {
-        "toolCalls": [
-          {
-            "name": "generate_task_steps",
-            "arguments": "{\"steps\":[{\"description\":\"Crack eggs into bowl\",\"status\":\"enabled\"},{\"description\":\"Preheat oven to 350F\",\"status\":\"enabled\"}]}"
-          }
-        ]
-      }
-    },
-    {
-      "match": { "userMessage": "background color to blue" },
-      "response": {
-        "toolCalls": [
-          {
-            "name": "change_background",
-            "arguments": "{\"background\":\"blue\"}"
-          }
-        ]
-      }
-    }
-  ]
-}
-```
-
-### Fixture Load Order Matters
-
-Fixtures are evaluated first-match-wins. When two fixtures could match the same message, load the more specific one first:
-
-```typescript
-// Load HITL fixtures first — "one step with eggs" is more specific than
-// "plan to make brownies" which also appears in the HITL user message
-mockServer.loadFixtureFile(path.join(FIXTURES_DIR, "human-in-the-loop.json"));
-
-// Then load everything else — earlier matches take priority
-mockServer.loadFixtureDir(FIXTURES_DIR);
-```
-
-### Predicate-Based Routing
-
-When substring matching isn't enough — for example, when the last user message is the same across multiple requests but the system prompt differs — use predicates:
-
-```typescript
-// Supervisor agent: same user message every time, but system prompt
-// contains state flags like "Flights found: false"
-mockServer.addFixture({
-  match: {
-    predicate: (req) => {
-      const sysMsg = req.messages.find((m) => m.role === "system");
-      return sysMsg?.content?.includes("Flights found: false") ?? false;
-    },
-  },
-  response: {
-    toolCalls: [
-      {
-        name: "supervisor_response",
-        arguments: '{"answer":"Let me find flights for you!","next_agent":"flights_agent"}',
-      },
-    ],
-  },
-});
-
-mockServer.addFixture({
-  match: {
-    predicate: (req) => {
-      const sys = req.messages.find((m) => m.role === "system")?.content ?? "";
-      return sys.includes("Flights found: true") && sys.includes("Hotels found: false");
-    },
-  },
-  response: {
-    toolCalls: [
-      {
-        name: "supervisor_response",
-        arguments: '{"answer":"Now let me find hotels.","next_agent":"hotels_agent"}',
-      },
-    ],
-  },
-});
-```
-
-### Tool Result Catch-All
-
-After a tool executes, the next request contains a `role: "tool"` message with the result. Add a catch-all for these so the conversation can continue:
-
-```typescript
-const toolResultFixture = {
-  match: {
-    predicate: (req) => {
-      const last = req.messages[req.messages.length - 1];
-      return last?.role === "tool";
-    },
-  },
-  response: { content: "Done! I've completed that for you." },
-};
-mockServer.addFixture(toolResultFixture);
-
-// Move it to the front so it matches before substring-based fixtures
-// (the last user message hasn't changed, so substring fixtures would
-// match the same fixture again otherwise)
-const fixtures = (mockServer as any).fixtures;
-const idx = fixtures.indexOf(toolResultFixture);
-if (idx > 0) {
-  fixtures.splice(idx, 1);
-  fixtures.unshift(toolResultFixture);
-}
-```
-
-### Universal Catch-All
-
-Append a catch-all last to handle any request that doesn't match a specific fixture, preventing 404s from crashing the test:
-
-```typescript
-mockServer.addFixture({
-  match: { predicate: () => true },
-  response: { content: "I understand. How can I help you with that?" },
-});
-```
-
-## Programmatic API
-
-### `new MockOpenAI(options?)`
-
-Create a new mock server instance.
-
-| Option      | Type     | Default       | Description                         |
-| ----------- | -------- | ------------- | ----------------------------------- |
-| `port`      | `number` | `0` (random)  | Port to listen on                   |
-| `host`      | `string` | `"127.0.0.1"` | Host to bind to                     |
-| `latency`   | `number` | `0`           | Default ms delay between SSE chunks |
-| `chunkSize` | `number` | `20`          | Default characters per SSE chunk    |
-
-### `MockOpenAI.create(options?)`
-
-Static factory — creates an instance and starts it in one call. Returns `Promise<MockOpenAI>`.
-
-### Server Lifecycle
-
-| Method    | Returns           | Description                            |
-| --------- | ----------------- | -------------------------------------- |
-| `start()` | `Promise<string>` | Start the server, returns the base URL |
-| `stop()`  | `Promise<void>`   | Stop the server                        |
-| `url`     | `string`          | Base URL (throws if not started)       |
-| `baseUrl` | `string`          | Alias for `url`                        |
-| `port`    | `number`          | Listening port (throws if not started) |
-
-### Fixture Registration
-
-All registration methods return `this` for chaining.
-
-#### `on(match, response, opts?)`
-
-Register a fixture with full control over match criteria.
-
-```typescript
-mock.on({ userMessage: /weather/i, model: "gpt-4" }, { content: "It's sunny!" }, { latency: 50 });
-```
-
-#### `onMessage(pattern, response, opts?)`
-
-Shorthand — matches on the last user message.
-
-```typescript
-mock.onMessage("hello", { content: "Hi!" });
-mock.onMessage(/greet/i, { content: "Hey there!" });
-```
-
-#### `onToolCall(name, response, opts?)`
-
-Shorthand — matches when the request contains a tool with the given name.
-
-```typescript
-mock.onToolCall("get_weather", {
-  toolCalls: [{ name: "get_weather", arguments: '{"location":"SF"}' }],
-});
-```
-
-#### `onToolResult(id, response, opts?)`
-
-Shorthand — matches when a tool result message has the given `tool_call_id`.
-
-```typescript
-mock.onToolResult("call_abc123", { content: "Temperature is 72F" });
-```
-
-#### `addFixture(fixture)` / `addFixtures(fixtures)`
-
-Add raw `Fixture` objects directly.
-
-#### `loadFixtureFile(path)` / `loadFixtureDir(path)`
-
-Load fixtures from JSON files on disk. See [Fixture Files](#json-fixture-files) above.
-
-#### `clearFixtures()`
-
-Remove all registered fixtures.
-
-### Error Injection
-
-#### `nextRequestError(status, errorBody?)`
-
-Queue a one-shot error for the very next request. The error fires once, then auto-removes itself.
-
-```typescript
-mock.nextRequestError(429, {
-  message: "Rate limited",
-  type: "rate_limit_error",
-});
-
-// Next request → 429 error
-// Subsequent requests → normal fixture matching
-```
-
-### Request Journal
-
-Every request to `/v1/chat/completions` and `/v1/responses` is recorded in a journal.
-
-#### Programmatic Access
-
-| Method             | Returns                | Description                           |
-| ------------------ | ---------------------- | ------------------------------------- |
-| `getRequests()`    | `JournalEntry[]`       | All recorded requests                 |
-| `getLastRequest()` | `JournalEntry \| null` | Most recent request                   |
-| `clearRequests()`  | `void`                 | Clear the journal                     |
-| `journal`          | `Journal`              | Direct access to the journal instance |
-
-```typescript
-await fetch(mock.url + "/v1/chat/completions", { ... });
-
-const last = mock.getLastRequest();
-expect(last?.body.messages).toContainEqual({
-  role: "user",
-  content: "hello",
-});
-```
-
-#### HTTP Endpoints
-
-The server also exposes journal data over HTTP (useful in CLI mode):
-
-- `GET /v1/_requests` — returns all journal entries as JSON. Supports `?limit=N`.
-- `DELETE /v1/_requests` — clears the journal. Returns 204.
-
-### Reset
-
-#### `reset()`
-
-Clear all fixtures **and** the journal in one call. Works before or after the server is started.
-
-```typescript
-afterEach(() => {
-  mock.reset();
-});
-```
-
-## Fixture Matching
-
-Fixtures are evaluated in registration order (first match wins). A fixture matches when **all** specified fields match the incoming request (AND logic).
-
-| Field         | Type               | Matches on                                    |
-| ------------- | ------------------ | --------------------------------------------- |
-| `userMessage` | `string \| RegExp` | Content of the last `role: "user"` message    |
-| `toolName`    | `string`           | Name of a tool in the request's `tools` array |
-| `toolCallId`  | `string`           | `tool_call_id` on a `role: "tool"` message    |
-| `model`       | `string \| RegExp` | The `model` field in the request              |
-| `predicate`   | `(req) => boolean` | Arbitrary matching function                   |
-
-## Fixture Responses
-
-### Text
-
-```typescript
-{
-  content: "Hello world";
-}
-```
-
-Streams as SSE chunks, splitting `content` by `chunkSize`. With `stream: false`, returns a standard `chat.completion` JSON object.
-
-### Tool Calls
-
-```typescript
-{
-  toolCalls: [{ name: "get_weather", arguments: '{"location":"SF"}' }];
-}
-```
-
-### Errors
-
-```typescript
-{
-  error: { message: "Rate limited", type: "rate_limit_error" },
-  status: 429
-}
-```
-
-## API Endpoints
-
-The server handles:
-
-- **POST `/v1/chat/completions`** — OpenAI Chat Completions API (streaming and non-streaming)
-- **POST `/v1/responses`** — OpenAI Responses API (streaming and non-streaming). Requests are translated to the Chat Completions fixture format internally, so the same fixtures work for both endpoints.
-
-## CLI
-
-The package includes a standalone server binary:
+## Features
+
+- **[Multi-provider support](https://llmock.copilotkit.dev/compatible-providers.html)** — [OpenAI Chat Completions](https://llmock.copilotkit.dev/chat-completions.html), [OpenAI Responses](https://llmock.copilotkit.dev/responses-api.html), [Anthropic Claude](https://llmock.copilotkit.dev/claude-messages.html), [Google Gemini](https://llmock.copilotkit.dev/gemini.html), [AWS Bedrock](https://llmock.copilotkit.dev/aws-bedrock.html) (streaming + Converse), [Azure OpenAI](https://llmock.copilotkit.dev/azure-openai.html), [Vertex AI](https://llmock.copilotkit.dev/vertex-ai.html), [Ollama](https://llmock.copilotkit.dev/ollama.html), [Cohere](https://llmock.copilotkit.dev/cohere.html)
+- **[Embeddings API](https://llmock.copilotkit.dev/embeddings.html)** — OpenAI-compatible embedding responses with configurable dimensions
+- **[Structured output / JSON mode](https://llmock.copilotkit.dev/structured-output.html)** — `response_format`, `json_schema`, and function calling
+- **[Sequential responses](https://llmock.copilotkit.dev/sequential-responses.html)** — Stateful multi-turn fixtures that return different responses on each call
+- **[Streaming physics](https://llmock.copilotkit.dev/streaming-physics.html)** — Configurable `ttft`, `tps`, and `jitter` for realistic timing
+- **[WebSocket APIs](https://llmock.copilotkit.dev/websocket.html)** — OpenAI Responses WS, Realtime API, and Gemini Live
+- **[Error injection](https://llmock.copilotkit.dev/error-injection.html)** — One-shot errors, rate limiting, and provider-specific error formats
+- **[Chaos testing](https://llmock.copilotkit.dev/chaos-testing.html)** — Probabilistic failure injection: 500 errors, malformed JSON, mid-stream disconnects
+- **[Prometheus metrics](https://llmock.copilotkit.dev/metrics.html)** — Request counts, latencies, and fixture match rates at `/metrics`
+- **[Request journal](https://llmock.copilotkit.dev/docs.html)** — Record, inspect, and assert on every request
+- **[Fixture validation](https://llmock.copilotkit.dev/fixtures.html)** — Schema validation at load time with `--validate-on-load`
+- **CLI with hot-reload** — Standalone server with `--watch` for live fixture editing
+- **[Docker + Helm](https://llmock.copilotkit.dev/docker.html)** — Container image and Helm chart for CI/CD pipelines
+- **Record-and-replay** — VCR-style proxy-on-miss records real API responses as fixtures for deterministic replay
+- **[Drift detection](https://llmock.copilotkit.dev/drift-detection.html)** — Daily CI runs against real APIs to catch response format changes
+- **Claude Code integration** — `/write-fixtures` skill teaches your AI assistant how to write fixtures correctly
+
+## CLI Quick Reference
 
 ```bash
-mock-openai [options]
-```
-
-| Option         | Short | Default      | Description                        |
-| -------------- | ----- | ------------ | ---------------------------------- |
-| `--port`       | `-p`  | `4010`       | Port to listen on                  |
-| `--host`       | `-h`  | `127.0.0.1`  | Host to bind to                    |
-| `--fixtures`   | `-f`  | `./fixtures` | Path to fixtures directory or file |
-| `--latency`    | `-l`  | `0`          | Latency between SSE chunks (ms)    |
-| `--chunk-size` | `-c`  | `20`         | Characters per SSE chunk           |
-| `--help`       |       |              | Show help                          |
+llmock [options]
+```
+
+| Option               | Short | Default      | Description                                 |
+| -------------------- | ----- | ------------ | ------------------------------------------- |
+| `--port`             | `-p`  | `4010`       | Port to listen on                           |
+| `--host`             | `-h`  | `127.0.0.1`  | Host to bind to                             |
+| `--fixtures`         | `-f`  | `./fixtures` | Path to fixtures directory or file          |
+| `--latency`          | `-l`  | `0`          | Latency between SSE chunks (ms)             |
+| `--chunk-size`       | `-c`  | `20`         | Characters per SSE chunk                    |
+| `--watch`            | `-w`  |              | Watch fixture path for changes and reload   |
+| `--log-level`        |       | `info`       | Log verbosity: `silent`, `info`, `debug`    |
+| `--validate-on-load` |       |              | Validate fixture schemas at startup         |
+| `--chaos-drop`       |       | `0`          | Chaos: probability of 500 errors (0-1)      |
+| `--chaos-malformed`  |       | `0`          | Chaos: probability of malformed JSON (0-1)  |
+| `--chaos-disconnect` |       | `0`          | Chaos: probability of disconnect (0-1)      |
+| `--metrics`          |       |              | Enable Prometheus metrics at /metrics       |
+| `--record`           |       |              | Record mode: proxy unmatched to real APIs   |
+| `--strict`           |       |              | Strict mode: fail on unmatched requests     |
+| `--provider-*`       |       |              | Upstream URL per provider (with `--record`) |
+| `--help`             |       |              | Show help                                   |
 
 ```bash
 # Start with bundled example fixtures
-mock-openai
+llmock
 
 # Custom fixtures on a specific port
-mock-openai -p 8080 -f ./my-fixtures
+llmock -p 8080 -f ./my-fixtures
 
 # Simulate slow responses
-mock-openai --latency 100 --chunk-size 5
-```
-
-## Advanced Usage
+llmock --latency 100 --chunk-size 5
 
-### Low-level Server
+# Record mode: proxy unmatched requests to real APIs and save as fixtures
+llmock --record --provider-openai https://api.openai.com --provider-anthropic https://api.anthropic.com
 
-If you need the raw HTTP server without the `MockOpenAI` wrapper:
+# Strict mode in CI: fail if any request doesn't match a fixture
+llmock --strict -f ./fixtures
+```
 
-```typescript
-import { createServer } from "@copilotkit/mock-openai";
+## Documentation
 
-const fixtures = [{ match: { userMessage: "hi" }, response: { content: "Hello!" } }];
+Full API reference, fixture format, E2E patterns, and provider-specific guides:
 
-const { server, journal, url } = await createServer(fixtures, { port: 0 });
-// ... use it ...
-server.close();
-```
+**[https://llmock.copilotkit.dev/docs.html](https://llmock.copilotkit.dev/docs.html)**
 
-### Per-Fixture Timing
+## Real-World Usage
 
-```typescript
-mock.on({ userMessage: "slow" }, { content: "Finally..." }, { latency: 200, chunkSize: 5 });
-```
+[CopilotKit](https://github.com/CopilotKit/CopilotKit) uses llmock across its test suite to verify AI agent behavior across multiple LLM providers without hitting real APIs.
 
 ## License
 
diff --git a/charts/llmock/Chart.yaml b/charts/llmock/Chart.yaml
new file mode 100644
index 0000000..5603860
--- /dev/null
+++ b/charts/llmock/Chart.yaml
@@ -0,0 +1,6 @@
+apiVersion: v2
+name: llmock
+description: Deterministic mock LLM server for testing (OpenAI, Anthropic, Gemini)
+type: application
+version: 0.1.0
+appVersion: "1.6.0"
diff --git a/charts/llmock/templates/_helpers.tpl b/charts/llmock/templates/_helpers.tpl
new file mode 100644
index 0000000..896b8d6
--- /dev/null
+++ b/charts/llmock/templates/_helpers.tpl
@@ -0,0 +1,40 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "llmock.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+*/}}
+{{- define "llmock.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "llmock.labels" -}}
+helm.sh/chart: {{ printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{ include "llmock.selectorLabels" . }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "llmock.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "llmock.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
diff --git a/charts/llmock/templates/deployment.yaml b/charts/llmock/templates/deployment.yaml
new file mode 100644
index 0000000..22534ca
--- /dev/null
+++ b/charts/llmock/templates/deployment.yaml
@@ -0,0 +1,70 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "llmock.fullname" . }}
+  labels:
+    {{- include "llmock.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "llmock.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      labels:
+        {{- include "llmock.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        - name: llmock
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          args:
+            - "--fixtures"
+            - "{{ .Values.fixtures.mountPath }}"
+            - "--host"
+            - "0.0.0.0"
+            - "--port"
+            - "{{ .Values.service.port }}"
+          ports:
+            - name: http
+              containerPort: {{ .Values.service.port }}
+              protocol: TCP
+          livenessProbe:
+            httpGet:
+              path: /health
+              port: http
+            initialDelaySeconds: 5
+            periodSeconds: 10
+          readinessProbe:
+            httpGet:
+              path: /ready
+              port: http
+            initialDelaySeconds: 2
+            periodSeconds: 5
+          {{- with .Values.resources }}
+          resources:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- if .Values.fixtures.existingClaim }}
+          volumeMounts:
+            - name: fixtures
+              mountPath: {{ .Values.fixtures.mountPath }}
+          {{- end }}
+      {{- if .Values.fixtures.existingClaim }}
+      volumes:
+        - name: fixtures
+          persistentVolumeClaim:
+            claimName: {{ .Values.fixtures.existingClaim }}
+      {{- end }}
diff --git a/charts/llmock/templates/service.yaml b/charts/llmock/templates/service.yaml
new file mode 100644
index 0000000..894b443
--- /dev/null
+++ b/charts/llmock/templates/service.yaml
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "llmock.fullname" . }}
+  labels:
+    {{- include "llmock.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: http
+      protocol: TCP
+      name: http
+  selector:
+    {{- include "llmock.selectorLabels" . | nindent 4 }}
diff --git a/charts/llmock/values.yaml b/charts/llmock/values.yaml
new file mode 100644
index 0000000..c33a2ea
--- /dev/null
+++ b/charts/llmock/values.yaml
@@ -0,0 +1,31 @@
+nameOverride: ""
+fullnameOverride: ""
+
+replicaCount: 1
+
+image:
+  repository: ghcr.io/copilotkit/llmock
+  tag: ""
+  pullPolicy: IfNotPresent
+
+service:
+  type: ClusterIP
+  port: 4010
+
+fixtures:
+  # Mount path inside the container where fixture files are served from
+  mountPath: /app/fixtures
+  # If set, use an existing PVC for fixtures
+  existingClaim: ""
+
+resources: {}
+  # limits:
+  #   cpu: 200m
+  #   memory: 256Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+nodeSelector: {}
+tolerations: []
+affinity: {}
diff --git a/docs/CNAME b/docs/CNAME
index 3ce79fb..bd52770 100644
--- a/docs/CNAME
+++ b/docs/CNAME
@@ -1 +1 @@
-mock-openai.copilotkit.dev
+llmock.copilotkit.dev
diff --git a/docs/aws-bedrock.html b/docs/aws-bedrock.html
new file mode 100644
index 0000000..09cf238
--- /dev/null
+++ b/docs/aws-bedrock.html
@@ -0,0 +1,389 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>AWS Bedrock &mdash; llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
+          ><a href="azure-openai.html">Azure OpenAI</a
+          ><a href="aws-bedrock.html" class="active">AWS Bedrock</a><a href="ollama.html">Ollama</a
+          ><a href="cohere.html">Cohere</a><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a>
+          <a href="record-replay.html">Record &amp; Replay</a>
+          <a href="metrics.html">Prometheus Metrics</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a
+          ><a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>AWS Bedrock</h1>
+        <p class="lead">
+          llmock supports the AWS Bedrock Claude invoke and Converse API endpoints &mdash; both
+          streaming and non-streaming. Point the AWS SDK at your llmock instance and fixtures match
+          against the Bedrock-format requests, returning responses in the authentic Bedrock format
+          including AWS Event Stream binary framing for streaming.
+        </p>
+
+        <h2>How It Works</h2>
+        <p>
+          AWS Bedrock uses URL patterns like
+          <code>/model/{modelId}/invoke</code> and
+          <code>/model/{modelId}/invoke-with-response-stream</code> to call foundation models. The
+          request body uses the Anthropic Messages format with an additional
+          <code>anthropic_version</code> field, and does <em>not</em> include a
+          <code>model</code> field in the body (the model is in the URL).
+        </p>
+        <p>
+          llmock detects the Bedrock URL pattern, extracts the model ID, translates the request to
+          the internal fixture-matching format, and returns the response in the Anthropic Messages
+          API format &mdash; which is identical to the Bedrock Claude response format. For
+          streaming, responses use the AWS Event Stream binary framing protocol.
+        </p>
+        <p>
+          llmock also supports the <strong>Converse API</strong> (<code
+            >/model/{modelId}/converse</code
+          >
+          and <code>/model/{modelId}/converse-stream</code>), which uses a different
+          request/response format with camelCase field names.
+        </p>
+
+        <h2>URL Patterns</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Bedrock URL</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>POST /model/{modelId}/invoke</code></td>
+              <td>Non-streaming Claude invoke</td>
+            </tr>
+            <tr>
+              <td><code>POST /model/{modelId}/invoke-with-response-stream</code></td>
+              <td>Streaming Claude invoke (AWS Event Stream binary)</td>
+            </tr>
+            <tr>
+              <td><code>POST /model/{modelId}/converse</code></td>
+              <td>Converse API (non-streaming)</td>
+            </tr>
+            <tr>
+              <td><code>POST /model/{modelId}/converse-stream</code></td>
+              <td>Converse API (streaming, AWS Event Stream binary)</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Request Format</h2>
+        <p>
+          Bedrock Claude requests use the Anthropic Messages format. The
+          <code>anthropic_version</code> field is accepted but not validated. The model is taken
+          from the URL path, not the request body.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            bedrock request body <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  <span class="prop">"anthropic_version"</span>: <span class="str">"bedrock-2023-05-31"</span>,
+  <span class="prop">"max_tokens"</span>: <span class="num">512</span>,
+  <span class="prop">"messages"</span>: [
+    { <span class="prop">"role"</span>: <span class="str">"user"</span>, <span class="prop">"content"</span>: <span class="str">"Hello"</span> }
+  ],
+  <span class="prop">"system"</span>: <span class="str">"You are helpful"</span>
+}</code></pre>
+        </div>
+
+        <h2>Response Format</h2>
+        <p>
+          Bedrock Claude responses are identical to the Anthropic Messages API non-streaming
+          responses:
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">text response <span class="lang-tag">json</span></div>
+          <pre><code>{
+  <span class="prop">"id"</span>: <span class="str">"msg_..."</span>,
+  <span class="prop">"type"</span>: <span class="str">"message"</span>,
+  <span class="prop">"role"</span>: <span class="str">"assistant"</span>,
+  <span class="prop">"content"</span>: [{ <span class="prop">"type"</span>: <span class="str">"text"</span>, <span class="prop">"text"</span>: <span class="str">"Hello!"</span> }],
+  <span class="prop">"stop_reason"</span>: <span class="str">"end_turn"</span>,
+  <span class="prop">"stop_sequence"</span>: <span class="kw">null</span>,
+  <span class="prop">"usage"</span>: { <span class="prop">"input_tokens"</span>: <span class="num">10</span>, <span class="prop">"output_tokens"</span>: <span class="num">5</span> }
+}</code></pre>
+        </div>
+
+        <h2>Model Resolution</h2>
+        <p>
+          The model ID is extracted from the URL path. This is used both for fixture matching and
+          included in the response body. Bedrock model IDs typically look like:
+        </p>
+        <ul>
+          <li><code>anthropic.claude-3-5-sonnet-20241022-v2:0</code></li>
+          <li><code>anthropic.claude-3-haiku-20240307-v1:0</code></li>
+          <li><code>anthropic.claude-3-opus-20240229-v1:0</code></li>
+        </ul>
+        <p>Write fixtures that match by Bedrock model ID:</p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            fixture matching by Bedrock model ID <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  <span class="prop">"match"</span>: {
+    <span class="prop">"model"</span>: <span class="str">"anthropic.claude-3-5-sonnet-20241022-v2:0"</span>,
+    <span class="prop">"userMessage"</span>: <span class="str">"hello"</span>
+  },
+  <span class="prop">"response"</span>: {
+    <span class="prop">"content"</span>: <span class="str">"Hello from Bedrock!"</span>
+  }
+}</code></pre>
+        </div>
+
+        <h2>SDK Configuration</h2>
+        <p>To point the AWS SDK Bedrock Runtime client at llmock, configure the endpoint URL:</p>
+
+        <div class="code-block">
+          <div class="code-block-header">bedrock-sdk.ts <span class="lang-tag">ts</span></div>
+          <pre><code><span class="kw">import</span> { <span class="type">BedrockRuntimeClient</span>, <span class="type">InvokeModelCommand</span> } <span class="kw">from</span> <span class="str">"@aws-sdk/client-bedrock-runtime"</span>;
+
+<span class="kw">const</span> <span class="op">client</span> = <span class="kw">new</span> <span class="type">BedrockRuntimeClient</span>({
+  <span class="prop">region</span>: <span class="str">"us-east-1"</span>,
+  <span class="prop">endpoint</span>: <span class="str">"http://localhost:4005"</span>,  <span class="cm">// llmock URL</span>
+  <span class="prop">credentials</span>: { <span class="prop">accessKeyId</span>: <span class="str">"mock"</span>, <span class="prop">secretAccessKey</span>: <span class="str">"mock"</span> },
+});
+
+<span class="kw">const</span> <span class="op">response</span> = <span class="kw">await</span> <span class="op">client</span>.<span class="fn">send</span>(<span class="kw">new</span> <span class="type">InvokeModelCommand</span>({
+  <span class="prop">modelId</span>: <span class="str">"anthropic.claude-3-5-sonnet-20241022-v2:0"</span>,
+  <span class="prop">contentType</span>: <span class="str">"application/json"</span>,
+  <span class="prop">body</span>: <span class="type">JSON</span>.<span class="fn">stringify</span>({
+    <span class="prop">anthropic_version</span>: <span class="str">"bedrock-2023-05-31"</span>,
+    <span class="prop">max_tokens</span>: <span class="num">512</span>,
+    <span class="prop">messages</span>: [{ <span class="prop">role</span>: <span class="str">"user"</span>, <span class="prop">content</span>: <span class="str">"Hello"</span> }],
+  }),
+}));</code></pre>
+        </div>
+
+        <h2>Fixture Examples</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            text response fixture <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  <span class="prop">"fixtures"</span>: [
+    {
+      <span class="prop">"match"</span>: { <span class="prop">"userMessage"</span>: <span class="str">"hello"</span> },
+      <span class="prop">"response"</span>: { <span class="prop">"content"</span>: <span class="str">"Hi there!"</span> }
+    },
+    {
+      <span class="prop">"match"</span>: { <span class="prop">"userMessage"</span>: <span class="str">"weather"</span> },
+      <span class="prop">"response"</span>: {
+        <span class="prop">"toolCalls"</span>: [{
+          <span class="prop">"name"</span>: <span class="str">"get_weather"</span>,
+          <span class="prop">"arguments"</span>: <span class="str">"{\"city\":\"SF\"}"</span>
+        }]
+      }
+    }
+  ]
+}</code></pre>
+        </div>
+
+        <div class="info-box">
+          <p>
+            Fixtures are shared across all providers. The same fixture file works for OpenAI, Claude
+            Messages, Gemini, Azure, and Bedrock endpoints &mdash; llmock translates each provider's
+            request format to a common internal format before matching.
+          </p>
+        </div>
+
+        <h2>Streaming (invoke-with-response-stream)</h2>
+        <p>
+          The <code>invoke-with-response-stream</code> endpoint returns responses using the
+          <strong>AWS Event Stream binary protocol</strong>. llmock implements this protocol
+          natively &mdash; each response chunk is encoded as a binary frame with CRC32 checksums,
+          headers, and a JSON payload, exactly as the real Bedrock service sends them.
+        </p>
+        <p>Streaming events follow the Bedrock Claude streaming sequence:</p>
+        <ul>
+          <li>
+            <code>messageStart</code> &mdash; opens the message with <code>role: "assistant"</code>
+          </li>
+          <li><code>contentBlockStart</code> &mdash; begins a content block</li>
+          <li>
+            <code>contentBlockDelta</code> &mdash; delivers text chunks (<code>text_delta</code>) or
+            tool input (<code>input_json_delta</code>)
+          </li>
+          <li><code>contentBlockStop</code> &mdash; closes the content block</li>
+          <li>
+            <code>messageStop</code> &mdash; closes the message with a <code>stopReason</code>
+          </li>
+        </ul>
+
+        <div class="code-block">
+          <div class="code-block-header">streaming SDK usage <span class="lang-tag">ts</span></div>
+          <pre><code><span class="kw">import</span> { <span class="type">BedrockRuntimeClient</span>, <span class="type">InvokeModelWithResponseStreamCommand</span> } <span class="kw">from</span> <span class="str">"@aws-sdk/client-bedrock-runtime"</span>;
+
+<span class="kw">const</span> <span class="op">client</span> = <span class="kw">new</span> <span class="type">BedrockRuntimeClient</span>({
+  <span class="prop">region</span>: <span class="str">"us-east-1"</span>,
+  <span class="prop">endpoint</span>: <span class="str">"http://localhost:4005"</span>,
+  <span class="prop">credentials</span>: { <span class="prop">accessKeyId</span>: <span class="str">"mock"</span>, <span class="prop">secretAccessKey</span>: <span class="str">"mock"</span> },
+});
+
+<span class="kw">const</span> <span class="op">response</span> = <span class="kw">await</span> <span class="op">client</span>.<span class="fn">send</span>(<span class="kw">new</span> <span class="type">InvokeModelWithResponseStreamCommand</span>({
+  <span class="prop">modelId</span>: <span class="str">"anthropic.claude-3-5-sonnet-20241022-v2:0"</span>,
+  <span class="prop">contentType</span>: <span class="str">"application/json"</span>,
+  <span class="prop">body</span>: <span class="type">JSON</span>.<span class="fn">stringify</span>({
+    <span class="prop">anthropic_version</span>: <span class="str">"bedrock-2023-05-31"</span>,
+    <span class="prop">max_tokens</span>: <span class="num">512</span>,
+    <span class="prop">messages</span>: [{ <span class="prop">role</span>: <span class="str">"user"</span>, <span class="prop">content</span>: <span class="str">"Hello"</span> }],
+  }),
+}));</code></pre>
+        </div>
+
+        <h2>AWS Event Stream Binary Format</h2>
+        <p>
+          Unlike SSE-based streaming used by OpenAI and Claude, AWS Bedrock streaming uses a
+          <strong>binary event stream protocol</strong>. Each frame has the following layout:
+        </p>
+        <div class="code-block">
+          <div class="code-block-header">
+            binary frame layout <span class="lang-tag">text</span>
+          </div>
+          <pre><code>[total_length: 4B uint32-BE]
+[headers_length: 4B uint32-BE]
+[prelude_crc32: 4B CRC32 of first 8 bytes]
+[headers: variable-length string key-value pairs]
+[payload: raw JSON bytes]
+[message_crc32: 4B CRC32 of entire frame minus last 4 bytes]</code></pre>
+        </div>
+        <p>
+          llmock encodes these frames with proper CRC32 checksums, so the AWS SDK can decode them
+          natively. The <code>:event-type</code> header in each frame carries the event name (e.g.
+          <code>chunk</code>), and the <code>:content-type</code> header is set to
+          <code>application/json</code>.
+        </p>
+
+        <h2>Converse API</h2>
+        <p>
+          The Converse API is AWS Bedrock's provider-agnostic conversation interface. It uses
+          camelCase field names and a different request structure than the Claude-native invoke
+          endpoints. llmock supports both <code>/model/{modelId}/converse</code> (non-streaming) and
+          <code>/model/{modelId}/converse-stream</code> (streaming via Event Stream binary).
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            converse request body <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  <span class="prop">"messages"</span>: [
+    {
+      <span class="prop">"role"</span>: <span class="str">"user"</span>,
+      <span class="prop">"content"</span>: [{ <span class="prop">"text"</span>: <span class="str">"Hello"</span> }]
+    }
+  ],
+  <span class="prop">"system"</span>: [{ <span class="prop">"text"</span>: <span class="str">"You are helpful"</span> }],
+  <span class="prop">"inferenceConfig"</span>: { <span class="prop">"maxTokens"</span>: <span class="num">512</span> }
+}</code></pre>
+        </div>
+
+        <div class="code-block">
+          <div class="code-block-header">converse response <span class="lang-tag">json</span></div>
+          <pre><code>{
+  <span class="prop">"output"</span>: {
+    <span class="prop">"message"</span>: {
+      <span class="prop">"role"</span>: <span class="str">"assistant"</span>,
+      <span class="prop">"content"</span>: [{ <span class="prop">"text"</span>: <span class="str">"Hello!"</span> }]
+    }
+  },
+  <span class="prop">"stopReason"</span>: <span class="str">"end_turn"</span>,
+  <span class="prop">"usage"</span>: { <span class="prop">"inputTokens"</span>: <span class="num">0</span>, <span class="prop">"outputTokens"</span>: <span class="num">0</span>, <span class="prop">"totalTokens"</span>: <span class="num">0</span> }
+}</code></pre>
+        </div>
+
+        <p>
+          The Converse API also supports tool calls via <code>toolUse</code> and
+          <code>toolResult</code> content blocks, and tool definitions via the
+          <code>toolConfig</code> field. llmock translates all of these to the unified internal
+          format for fixture matching.
+        </p>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/azure-openai.html b/docs/azure-openai.html
new file mode 100644
index 0000000..c17a494
--- /dev/null
+++ b/docs/azure-openai.html
@@ -0,0 +1,217 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Azure OpenAI — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
+          ><a href="azure-openai.html" class="active">Azure OpenAI</a
+          ><a href="aws-bedrock.html">AWS Bedrock</a><a href="ollama.html">Ollama</a
+          ><a href="cohere.html">Cohere</a><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a>
+          <a href="record-replay.html">Record &amp; Replay</a>
+          <a href="metrics.html">Prometheus Metrics</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a
+          ><a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>Azure OpenAI</h1>
+        <p class="lead">
+          llmock routes Azure OpenAI deployment-based URLs to the existing chat completions and
+          embeddings handlers. Point the Azure OpenAI SDK at your llmock instance and fixtures work
+          exactly as they do with the standard OpenAI endpoints.
+        </p>
+
+        <h2>How It Works</h2>
+        <p>
+          Azure OpenAI uses a different URL pattern than standard OpenAI. Instead of
+          <code>/v1/chat/completions</code>, Azure uses
+          <code>/openai/deployments/{deployment-id}/chat/completions</code> with an
+          <code>api-version</code> query parameter.
+        </p>
+        <p>
+          llmock detects these Azure-style URLs and rewrites them to the standard paths before
+          routing to the existing handlers. The deployment ID is extracted and used as a model
+          fallback when the request body omits the <code>model</code> field (which Azure requests
+          commonly do, since the model is implied by the deployment).
+        </p>
+
+        <h2>URL Pattern Mapping</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Azure URL</th>
+              <th>Mapped To</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>/openai/deployments/{id}/chat/completions</code></td>
+              <td><code>/v1/chat/completions</code></td>
+            </tr>
+            <tr>
+              <td><code>/openai/deployments/{id}/embeddings</code></td>
+              <td><code>/v1/embeddings</code></td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Model Resolution</h2>
+        <p>
+          When a request arrives via an Azure deployment URL, llmock resolves the model name using
+          these rules:
+        </p>
+        <ol>
+          <li>
+            If the request body includes a <code>model</code> field, that value is used (body takes
+            precedence).
+          </li>
+          <li>
+            If the body omits <code>model</code>, the deployment ID from the URL is used as the
+            model name for fixture matching.
+          </li>
+        </ol>
+        <p>This means you can write fixtures that match by deployment name:</p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            fixture matching by deployment ID <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  <span class="prop">"match"</span>: {
+    <span class="prop">"model"</span>: <span class="str">"my-gpt4-deployment"</span>,
+    <span class="prop">"userMessage"</span>: <span class="str">"hello"</span>
+  },
+  <span class="prop">"response"</span>: {
+    <span class="prop">"content"</span>: <span class="str">"Hello from Azure!"</span>
+  }
+}</code></pre>
+        </div>
+
+        <h2>Authentication</h2>
+        <p>
+          llmock does not validate authentication tokens, but it accepts both Azure-style and
+          standard auth headers without rejecting the request:
+        </p>
+        <ul>
+          <li><code>api-key: your-azure-key</code> (Azure-native header)</li>
+          <li><code>Authorization: Bearer your-token</code> (standard OAuth/OpenAI header)</li>
+        </ul>
+
+        <h2>SDK Configuration</h2>
+        <p>To point the Azure OpenAI Node.js SDK at llmock, set the endpoint to your llmock URL:</p>
+
+        <div class="code-block">
+          <div class="code-block-header">azure-openai-sdk.ts <span class="lang-tag">ts</span></div>
+          <pre><code><span class="kw">import</span> { <span class="type">AzureOpenAI</span> } <span class="kw">from</span> <span class="str">"openai"</span>;
+
+<span class="kw">const</span> <span class="op">client</span> = <span class="kw">new</span> <span class="type">AzureOpenAI</span>({
+  <span class="prop">endpoint</span>: <span class="str">"http://localhost:4005"</span>,  <span class="cm">// llmock URL</span>
+  <span class="prop">apiKey</span>: <span class="str">"mock-key"</span>,
+  <span class="prop">apiVersion</span>: <span class="str">"2024-10-21"</span>,
+  <span class="prop">deployment</span>: <span class="str">"my-gpt4-deployment"</span>,
+});
+
+<span class="kw">const</span> <span class="op">response</span> = <span class="kw">await</span> <span class="op">client</span>.<span class="prop">chat</span>.<span class="prop">completions</span>.<span class="fn">create</span>({
+  <span class="prop">model</span>: <span class="str">"my-gpt4-deployment"</span>,
+  <span class="prop">messages</span>: [{ <span class="prop">role</span>: <span class="str">"user"</span>, <span class="prop">content</span>: <span class="str">"hello"</span> }],
+});</code></pre>
+        </div>
+
+        <h2>Environment Variables</h2>
+        <p>
+          When using the Azure OpenAI SDK, you can configure the endpoint via environment variables:
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">.env <span class="lang-tag">sh</span></div>
+          <pre><code><span class="cm"># Point Azure SDK at llmock</span>
+<span class="prop">AZURE_OPENAI_ENDPOINT</span>=<span class="str">http://localhost:4005</span>
+<span class="prop">AZURE_OPENAI_API_KEY</span>=<span class="str">mock-key</span></code></pre>
+        </div>
+
+        <div class="info-box">
+          <p>
+            The <code>api-version</code> query parameter is accepted but ignored &mdash; llmock
+            responds identically regardless of which API version is requested. This means you can
+            test against any API version without changing fixtures.
+          </p>
+        </div>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/chaos-testing.html b/docs/chaos-testing.html
new file mode 100644
index 0000000..e0dfc67
--- /dev/null
+++ b/docs/chaos-testing.html
@@ -0,0 +1,306 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Chaos Testing — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
+          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
+          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
+          ><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html" class="active">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a
+          ><a href="record-replay.html">Record &amp; Replay</a
+          ><a href="metrics.html">Prometheus Metrics</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a
+          ><a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>Chaos Testing</h1>
+        <p class="lead">
+          llmock provides probabilistic failure injection to test how your application handles
+          unreliable LLM APIs. Three failure modes can be configured at the server, fixture, or
+          per-request level.
+        </p>
+
+        <h2>Failure Modes</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Mode</th>
+              <th>Action</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>drop</code></td>
+              <td>HTTP 500</td>
+              <td>
+                Returns a 500 error with
+                <code>{"error":{"message":"Chaos: request dropped","code":"chaos_drop"}}</code>
+              </td>
+            </tr>
+            <tr>
+              <td><code>malformed</code></td>
+              <td>Broken JSON</td>
+              <td>
+                Returns HTTP 200 with invalid JSON body:
+                <code>{malformed json: &lt;&lt;&lt;chaos&gt;&gt;&gt;</code>
+              </td>
+            </tr>
+            <tr>
+              <td><code>disconnect</code></td>
+              <td>Connection destroyed</td>
+              <td>Destroys the TCP connection immediately with no response</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Precedence</h2>
+        <p>
+          Chaos configuration is resolved with a three-level precedence hierarchy. Higher levels
+          override lower ones:
+        </p>
+        <ol>
+          <li><strong>Per-request headers</strong> (highest) &mdash; override everything</li>
+          <li><strong>Fixture-level config</strong> &mdash; overrides server defaults</li>
+          <li><strong>Server-level defaults</strong> (lowest)</li>
+        </ol>
+        <p>
+          Within a single level, modes are evaluated in order: drop, malformed, disconnect. The
+          first mode that triggers (based on its probability) wins.
+        </p>
+
+        <h2>Quick Start</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">chaos-quick-start.ts <span class="lang-tag">ts</span></div>
+          <pre><code><span class="kw">import</span> { <span class="op">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
+
+<span class="kw">const</span> <span class="op">mock</span> = <span class="kw">new</span> <span class="fn">LLMock</span>();
+<span class="op">mock</span>.<span class="fn">onMessage</span>(<span class="str">"hello"</span>, { <span class="prop">content</span>: <span class="str">"Hi!"</span> });
+
+<span class="cmt">// 50% of all requests will be dropped with a 500</span>
+<span class="op">mock</span>.<span class="fn">setChaos</span>({ <span class="prop">dropRate</span>: <span class="num">0.5</span> });
+
+<span class="kw">await</span> <span class="op">mock</span>.<span class="fn">start</span>();
+
+<span class="cmt">// Later, remove chaos</span>
+<span class="op">mock</span>.<span class="fn">clearChaos</span>();</code></pre>
+        </div>
+
+        <h2>Programmatic API</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            Programmatic chaos control <span class="lang-tag">ts</span>
+          </div>
+          <pre><code><span class="cmt">// Set server-level chaos (returns `this` for chaining)</span>
+<span class="op">mock</span>.<span class="fn">setChaos</span>({
+  <span class="prop">dropRate</span>: <span class="num">0.1</span>,        <span class="cmt">// 10% drop rate</span>
+  <span class="prop">malformedRate</span>: <span class="num">0.05</span>,  <span class="cmt">// 5% malformed rate</span>
+  <span class="prop">disconnectRate</span>: <span class="num">0.02</span>, <span class="cmt">// 2% disconnect rate</span>
+});
+
+<span class="cmt">// Remove all server-level chaos</span>
+<span class="op">mock</span>.<span class="fn">clearChaos</span>();</code></pre>
+        </div>
+
+        <h2>Fixture-Level Chaos</h2>
+        <p>
+          Attach a <code>chaos</code> config to individual fixtures so only specific responses
+          experience failures:
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">chaos-fixture.json <span class="lang-tag">json</span></div>
+          <pre><code>{
+  "fixtures": [
+    {
+      "match": { "userMessage": "unstable" },
+      "response": { "content": "This might fail!" },
+      "chaos": {
+        "dropRate": 0.3,
+        "malformedRate": 0.2,
+        "disconnectRate": 0.1
+      }
+    },
+    {
+      "match": { "userMessage": "stable" },
+      "response": { "content": "This always works." }
+    }
+  ]
+}</code></pre>
+        </div>
+
+        <h2>Per-Request Headers</h2>
+        <p>
+          Override chaos rates on individual requests using HTTP headers. Values are floats between
+          0 and 1:
+        </p>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Header</th>
+              <th>Controls</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>x-llmock-chaos-drop</code></td>
+              <td>Drop rate (0&ndash;1)</td>
+            </tr>
+            <tr>
+              <td><code>x-llmock-chaos-malformed</code></td>
+              <td>Malformed rate (0&ndash;1)</td>
+            </tr>
+            <tr>
+              <td><code>x-llmock-chaos-disconnect</code></td>
+              <td>Disconnect rate (0&ndash;1)</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            Per-request chaos via headers <span class="lang-tag">ts</span>
+          </div>
+          <pre><code><span class="cmt">// Force 100% disconnect on this specific request</span>
+<span class="kw">await</span> <span class="fn">fetch</span>(<span class="str">`${mock.url}/v1/chat/completions`</span>, {
+  <span class="prop">method</span>: <span class="str">"POST"</span>,
+  <span class="prop">headers</span>: {
+    <span class="str">"Content-Type"</span>: <span class="str">"application/json"</span>,
+    <span class="str">"x-llmock-chaos-disconnect"</span>: <span class="str">"1.0"</span>,
+  },
+  <span class="prop">body</span>: <span class="fn">JSON.stringify</span>({ <span class="prop">model</span>: <span class="str">"gpt-4"</span>, <span class="prop">messages</span>: [...] }),
+});</code></pre>
+        </div>
+
+        <h2>CLI Flags</h2>
+        <p>Set server-level chaos from the command line:</p>
+
+        <div class="code-block">
+          <div class="code-block-header">CLI chaos flags <span class="lang-tag">bash</span></div>
+          <pre><code>npx llmock --fixtures ./fixtures \
+  --chaos-drop 0.1 \
+  --chaos-malformed 0.05 \
+  --chaos-disconnect 0.02</code></pre>
+        </div>
+
+        <h2>Journal Tracking</h2>
+        <p>
+          When chaos triggers, the journal entry includes a <code>chaosAction</code> field recording
+          which failure mode was applied:
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            Journal entry with chaos <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  "method": "POST",
+  "path": "/v1/chat/completions",
+  "response": {
+    "status": 500,
+    "fixture": { "..." },
+    "chaosAction": "drop"
+  }
+}</code></pre>
+        </div>
+        <p>
+          The <code>chaosAction</code> values are <code>"drop"</code>, <code>"malformed"</code>, or
+          <code>"disconnect"</code>. The status codes are 500 for drop, 200 for malformed, and 0 for
+          disconnect (connection destroyed).
+        </p>
+
+        <h2>Prometheus Metrics</h2>
+        <p>
+          When metrics are enabled (<code>--metrics</code>), each chaos trigger increments the
+          <code>llmock_chaos_triggered_total</code> counter with an <code>action</code> label:
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">Metrics output <span class="lang-tag">text</span></div>
+          <pre><code># TYPE llmock_chaos_triggered_total counter
+llmock_chaos_triggered_total{action="drop"} 3
+llmock_chaos_triggered_total{action="malformed"} 1
+llmock_chaos_triggered_total{action="disconnect"} 2</code></pre>
+        </div>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/chat-completions.html b/docs/chat-completions.html
new file mode 100644
index 0000000..353d4f2
--- /dev/null
+++ b/docs/chat-completions.html
@@ -0,0 +1,276 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Chat Completions — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank">
+              <svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub
+            </a>
+          </li>
+        </ul>
+      </div>
+    </nav>
+
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html" class="active">Chat Completions (OpenAI)</a>
+          <a href="responses-api.html">Responses API (OpenAI)</a>
+          <a href="claude-messages.html">Claude Messages</a>
+          <a href="gemini.html">Gemini</a><a href="azure-openai.html">Azure OpenAI</a
+          ><a href="aws-bedrock.html">AWS Bedrock</a><a href="ollama.html">Ollama</a
+          ><a href="cohere.html">Cohere</a><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a>
+          <a href="structured-output.html">Structured Output</a>
+          <a href="sequential-responses.html">Sequential Responses</a>
+          <a href="fixtures.html">Fixtures</a>
+          <a href="error-injection.html">Error Injection</a>
+          <a href="chaos-testing.html">Chaos Testing</a>
+          <a href="streaming-physics.html">Streaming Physics</a>
+          <a href="websocket.html">WebSocket APIs</a>
+          <a href="record-replay.html">Record &amp; Replay</a>
+          <a href="metrics.html">Prometheus Metrics</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>OpenAI Chat Completions</h1>
+        <p class="lead">
+          The <code>POST /v1/chat/completions</code> endpoint supports both streaming (SSE) and
+          non-streaming JSON responses, including text content and tool calls. This is the most
+          commonly used endpoint.
+        </p>
+
+        <h2>Endpoint</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Path</th>
+              <th>Format</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>POST</td>
+              <td>/v1/chat/completions</td>
+              <td>SSE (stream: true) or JSON (stream: false)</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Unit Test: Text Response</h2>
+        <p>
+          Using the programmatic API with vitest, register a fixture and assert on the response.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            text-response.test.ts <span class="lang-tag">ts</span>
+          </div>
+          <pre><code><span class="kw">import</span> { <span class="type">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
+<span class="kw">import</span> { <span class="fn">describe</span>, <span class="fn">it</span>, <span class="fn">expect</span>, <span class="fn">beforeAll</span>, <span class="fn">afterAll</span> } <span class="kw">from</span> <span class="str">"vitest"</span>;
+
+<span class="kw">let</span> <span class="op">mock</span>: <span class="type">LLMock</span>;
+
+<span class="fn">beforeAll</span>(<span class="kw">async</span> () <span class="kw">=&gt;</span> {
+  <span class="op">mock</span> = <span class="kw">new</span> <span class="type">LLMock</span>();
+  <span class="kw">await</span> <span class="op">mock</span>.<span class="fn">start</span>();
+});
+
+<span class="fn">afterAll</span>(<span class="kw">async</span> () <span class="kw">=&gt;</span> {
+  <span class="kw">await</span> <span class="op">mock</span>.<span class="fn">stop</span>();
+});
+
+<span class="fn">it</span>(<span class="str">"non-streaming text response"</span>, <span class="kw">async</span> () <span class="kw">=&gt;</span> {
+  <span class="op">mock</span>.<span class="fn">on</span>({ <span class="prop">userMessage</span>: <span class="str">"hello"</span> }, { <span class="prop">content</span>: <span class="str">"Hello! How can I help?"</span> });
+
+  <span class="kw">const</span> <span class="op">res</span> = <span class="kw">await</span> <span class="fn">fetch</span>(<span class="str">`${mock.url}/v1/chat/completions`</span>, {
+    <span class="prop">method</span>: <span class="str">"POST"</span>,
+    <span class="prop">headers</span>: { <span class="str">"Content-Type"</span>: <span class="str">"application/json"</span> },
+    <span class="prop">body</span>: <span class="type">JSON</span>.<span class="fn">stringify</span>({
+      <span class="prop">model</span>: <span class="str">"gpt-4"</span>,
+      <span class="prop">messages</span>: [{ <span class="prop">role</span>: <span class="str">"user"</span>, <span class="prop">content</span>: <span class="str">"hello"</span> }],
+      <span class="prop">stream</span>: <span class="kw">false</span>,
+    }),
+  });
+
+  <span class="kw">const</span> <span class="op">body</span> = <span class="kw">await</span> <span class="op">res</span>.<span class="fn">json</span>();
+  <span class="fn">expect</span>(<span class="op">body</span>.<span class="prop">choices</span>[<span class="num">0</span>].<span class="prop">message</span>.<span class="prop">content</span>).<span class="fn">toBe</span>(<span class="str">"Hello! How can I help?"</span>);
+  <span class="fn">expect</span>(<span class="op">body</span>.<span class="prop">object</span>).<span class="fn">toBe</span>(<span class="str">"chat.completion"</span>);
+  <span class="fn">expect</span>(<span class="op">body</span>.<span class="prop">id</span>).<span class="fn">toMatch</span>(<span class="str">/^chatcmpl-/</span>);
+});</code></pre>
+        </div>
+
+        <h2>Unit Test: Tool Calls</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">tool-calls.test.ts <span class="lang-tag">ts</span></div>
+          <pre><code><span class="fn">it</span>(<span class="str">"returns tool call in streaming mode"</span>, <span class="kw">async</span> () <span class="kw">=&gt;</span> {
+  <span class="op">mock</span>.<span class="fn">on</span>(
+    { <span class="prop">userMessage</span>: <span class="str">"weather"</span> },
+    { <span class="prop">toolCalls</span>: [{ <span class="prop">name</span>: <span class="str">"get_weather"</span>, <span class="prop">arguments</span>: <span class="str">'{"city":"SF"}'</span> }] }
+  );
+
+  <span class="kw">const</span> <span class="op">res</span> = <span class="kw">await</span> <span class="fn">fetch</span>(<span class="str">`${mock.url}/v1/chat/completions`</span>, {
+    <span class="prop">method</span>: <span class="str">"POST"</span>,
+    <span class="prop">headers</span>: { <span class="str">"Content-Type"</span>: <span class="str">"application/json"</span> },
+    <span class="prop">body</span>: <span class="type">JSON</span>.<span class="fn">stringify</span>({
+      <span class="prop">model</span>: <span class="str">"gpt-4"</span>,
+      <span class="prop">messages</span>: [{ <span class="prop">role</span>: <span class="str">"user"</span>, <span class="prop">content</span>: <span class="str">"what is the weather?"</span> }],
+      <span class="prop">stream</span>: <span class="kw">true</span>,
+    }),
+  });
+
+  <span class="kw">const</span> <span class="op">text</span> = <span class="kw">await</span> <span class="op">res</span>.<span class="fn">text</span>();
+  <span class="fn">expect</span>(<span class="op">text</span>).<span class="fn">toContain</span>(<span class="str">"get_weather"</span>);
+  <span class="fn">expect</span>(<span class="op">text</span>).<span class="fn">toContain</span>(<span class="str">"data: [DONE]"</span>);
+});</code></pre>
+        </div>
+
+        <h2>Integration Test: Streaming SSE</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            streaming-integration.test.ts <span class="lang-tag">ts</span>
+          </div>
+          <pre><code><span class="kw">import</span> { <span class="fn">createServer</span>, <span class="kw">type</span> <span class="type">ServerInstance</span> } <span class="kw">from</span> <span class="str">"@copilotkit/llmock/server"</span>;
+
+<span class="kw">const</span> <span class="op">instance</span> = <span class="kw">await</span> <span class="fn">createServer</span>(
+  [{ <span class="prop">match</span>: { <span class="prop">userMessage</span>: <span class="str">"hello"</span> }, <span class="prop">response</span>: { <span class="prop">content</span>: <span class="str">"Hello! How can I help?"</span> } }],
+  { <span class="prop">port</span>: <span class="num">0</span>, <span class="prop">chunkSize</span>: <span class="num">10</span> }
+);
+
+<span class="kw">const</span> <span class="op">res</span> = <span class="kw">await</span> <span class="fn">httpPost</span>(<span class="str">`${instance.url}/v1/chat/completions`</span>, {
+  <span class="prop">model</span>: <span class="str">"gpt-4"</span>,
+  <span class="prop">messages</span>: [{ <span class="prop">role</span>: <span class="str">"user"</span>, <span class="prop">content</span>: <span class="str">"hello"</span> }],
+  <span class="prop">stream</span>: <span class="kw">true</span>,
+});
+
+<span class="cm">// Parse SSE chunks</span>
+<span class="kw">const</span> <span class="op">chunks</span> = <span class="op">res</span>.<span class="prop">body</span>
+  .<span class="fn">split</span>(<span class="str">"\n\n"</span>)
+  .<span class="fn">filter</span>(<span class="op">b</span> <span class="kw">=&gt;</span> <span class="op">b</span>.<span class="fn">startsWith</span>(<span class="str">"data: "</span>) &amp;&amp; !<span class="op">b</span>.<span class="fn">includes</span>(<span class="str">"[DONE]"</span>))
+  .<span class="fn">map</span>(<span class="op">b</span> <span class="kw">=&gt;</span> <span class="type">JSON</span>.<span class="fn">parse</span>(<span class="op">b</span>.<span class="fn">slice</span>(<span class="num">6</span>)));
+
+<span class="cm">// First chunk has the role</span>
+<span class="fn">expect</span>(<span class="op">chunks</span>[<span class="num">0</span>].<span class="prop">choices</span>[<span class="num">0</span>].<span class="prop">delta</span>.<span class="prop">role</span>).<span class="fn">toBe</span>(<span class="str">"assistant"</span>);
+
+<span class="cm">// Reassemble content</span>
+<span class="kw">const</span> <span class="op">content</span> = <span class="op">chunks</span>.<span class="fn">map</span>(<span class="op">c</span> <span class="kw">=&gt;</span> <span class="op">c</span>.<span class="prop">choices</span>[<span class="num">0</span>].<span class="prop">delta</span>.<span class="prop">content</span> ?? <span class="str">""</span>).<span class="fn">join</span>(<span class="str">""</span>);
+<span class="fn">expect</span>(<span class="op">content</span>).<span class="fn">toBe</span>(<span class="str">"Hello! How can I help?"</span>);
+
+<span class="cm">// Last chunk has finish_reason</span>
+<span class="fn">expect</span>(<span class="op">chunks</span>.<span class="fn">at</span>(-<span class="num">1</span>).<span class="prop">choices</span>[<span class="num">0</span>].<span class="prop">finish_reason</span>).<span class="fn">toBe</span>(<span class="str">"stop"</span>);</code></pre>
+        </div>
+
+        <h2>JSON Fixture</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">fixtures/chat.json <span class="lang-tag">json</span></div>
+          <pre><code>{
+  <span class="key">"fixtures"</span>: [
+    {
+      <span class="key">"match"</span>: { <span class="key">"userMessage"</span>: <span class="str">"hello"</span> },
+      <span class="key">"response"</span>: { <span class="key">"content"</span>: <span class="str">"Hello! How can I help?"</span> }
+    },
+    {
+      <span class="key">"match"</span>: { <span class="key">"userMessage"</span>: <span class="str">"weather"</span> },
+      <span class="key">"response"</span>: {
+        <span class="key">"toolCalls"</span>: [{
+          <span class="key">"name"</span>: <span class="str">"get_weather"</span>,
+          <span class="key">"arguments"</span>: <span class="str">"{\"city\":\"SF\"}"</span>
+        }]
+      }
+    }
+  ]
+}</code></pre>
+        </div>
+
+        <h2>Response Format</h2>
+
+        <h3>Non-streaming (stream: false)</h3>
+        <p>Returns a single JSON object matching the OpenAI <code>ChatCompletion</code> type:</p>
+        <ul>
+          <li><code>id</code> &mdash; starts with <code>chatcmpl-</code></li>
+          <li><code>object</code> &mdash; <code>"chat.completion"</code></li>
+          <li><code>created</code> &mdash; Unix timestamp</li>
+          <li><code>model</code> &mdash; echoes the requested model</li>
+          <li><code>choices[0].message.content</code> &mdash; the response text</li>
+          <li><code>choices[0].message.refusal</code> &mdash; always <code>null</code></li>
+          <li>
+            <code>choices[0].finish_reason</code> &mdash; <code>"stop"</code> or
+            <code>"tool_calls"</code>
+          </li>
+          <li><code>usage</code> &mdash; token counts (zeroed in mock)</li>
+        </ul>
+
+        <h3>Streaming (stream: true)</h3>
+        <p>
+          Returns <code>text/event-stream</code> with <code>data: {json}\n\n</code> lines, ending
+          with <code>data: [DONE]\n\n</code>. Each chunk matches the OpenAI
+          <code>ChatCompletionChunk</code> type with <code>delta</code> instead of
+          <code>message</code>.
+        </p>
+      </main>
+    </div>
+
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/claude-messages.html b/docs/claude-messages.html
new file mode 100644
index 0000000..d034278
--- /dev/null
+++ b/docs/claude-messages.html
@@ -0,0 +1,193 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Claude Messages — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html" class="active">Claude Messages</a
+          ><a href="gemini.html">Gemini</a><a href="azure-openai.html">Azure OpenAI</a
+          ><a href="aws-bedrock.html">AWS Bedrock</a><a href="ollama.html">Ollama</a
+          ><a href="cohere.html">Cohere</a><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a>
+          <a href="record-replay.html">Record &amp; Replay</a>
+          <a href="metrics.html">Prometheus Metrics</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a
+          ><a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>Anthropic Claude Messages API</h1>
+        <p class="lead">
+          The <code>POST /v1/messages</code> endpoint implements the Anthropic Messages API with
+          streaming SSE using <code>event:</code> + <code>data:</code> format, including content
+          blocks for text and tool use.
+        </p>
+
+        <h2>Endpoint</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Path</th>
+              <th>Format</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>POST</td>
+              <td>/v1/messages</td>
+              <td>SSE (event: + data:) or JSON</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Unit Test: Text Streaming</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">claude-text.test.ts <span class="lang-tag">ts</span></div>
+          <pre><code><span class="kw">const</span> <span class="op">textFixture</span> = {
+  <span class="prop">match</span>: { <span class="prop">userMessage</span>: <span class="str">"hello"</span> },
+  <span class="prop">response</span>: { <span class="prop">content</span>: <span class="str">"Hi there!"</span> },
+};
+
+<span class="kw">const</span> <span class="op">instance</span> = <span class="kw">await</span> <span class="fn">createServer</span>([<span class="op">textFixture</span>]);
+
+<span class="kw">const</span> <span class="op">res</span> = <span class="kw">await</span> <span class="fn">post</span>(<span class="str">`${instance.url}/v1/messages`</span>, {
+  <span class="prop">model</span>: <span class="str">"claude-sonnet-4-20250514"</span>,
+  <span class="prop">max_tokens</span>: <span class="num">1024</span>,
+  <span class="prop">messages</span>: [{ <span class="prop">role</span>: <span class="str">"user"</span>, <span class="prop">content</span>: <span class="str">"hello"</span> }],
+  <span class="prop">stream</span>: <span class="kw">true</span>,
+});
+
+<span class="kw">const</span> <span class="op">events</span> = <span class="fn">parseClaudeSSEEvents</span>(<span class="op">res</span>.<span class="prop">body</span>);
+<span class="kw">const</span> <span class="op">types</span> = <span class="op">events</span>.<span class="fn">map</span>(<span class="op">e</span> <span class="kw">=&gt;</span> <span class="op">e</span>.<span class="prop">type</span>);
+
+<span class="fn">expect</span>(<span class="op">types</span>).<span class="fn">toContain</span>(<span class="str">"message_start"</span>);
+<span class="fn">expect</span>(<span class="op">types</span>).<span class="fn">toContain</span>(<span class="str">"content_block_start"</span>);
+<span class="fn">expect</span>(<span class="op">types</span>).<span class="fn">toContain</span>(<span class="str">"content_block_delta"</span>);
+<span class="fn">expect</span>(<span class="op">types</span>).<span class="fn">toContain</span>(<span class="str">"message_stop"</span>);</code></pre>
+        </div>
+
+        <h2>Unit Test: Tool Use</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">claude-tools.test.ts <span class="lang-tag">ts</span></div>
+          <pre><code><span class="kw">const</span> <span class="op">toolFixture</span> = {
+  <span class="prop">match</span>: { <span class="prop">userMessage</span>: <span class="str">"weather"</span> },
+  <span class="prop">response</span>: {
+    <span class="prop">toolCalls</span>: [{ <span class="prop">name</span>: <span class="str">"get_weather"</span>, <span class="prop">arguments</span>: <span class="str">'{"city":"NYC"}'</span> }]
+  },
+};
+
+<span class="kw">const</span> <span class="op">instance</span> = <span class="kw">await</span> <span class="fn">createServer</span>([<span class="op">toolFixture</span>]);
+
+<span class="kw">const</span> <span class="op">res</span> = <span class="kw">await</span> <span class="fn">post</span>(<span class="str">`${instance.url}/v1/messages`</span>, {
+  <span class="prop">model</span>: <span class="str">"claude-sonnet-4-20250514"</span>,
+  <span class="prop">max_tokens</span>: <span class="num">1024</span>,
+  <span class="prop">messages</span>: [{ <span class="prop">role</span>: <span class="str">"user"</span>, <span class="prop">content</span>: <span class="str">"what is the weather?"</span> }],
+  <span class="prop">stream</span>: <span class="kw">true</span>,
+});
+
+<span class="kw">const</span> <span class="op">events</span> = <span class="fn">parseClaudeSSEEvents</span>(<span class="op">res</span>.<span class="prop">body</span>);
+<span class="kw">const</span> <span class="op">blockStart</span> = <span class="op">events</span>.<span class="fn">find</span>(
+  <span class="op">e</span> <span class="kw">=&gt;</span> <span class="op">e</span>.<span class="prop">type</span> === <span class="str">"content_block_start"</span>
+    &amp;&amp; <span class="op">e</span>.<span class="prop">content_block</span>?.<span class="prop">type</span> === <span class="str">"tool_use"</span>
+);
+<span class="fn">expect</span>(<span class="op">blockStart</span>.<span class="prop">content_block</span>.<span class="prop">name</span>).<span class="fn">toBe</span>(<span class="str">"get_weather"</span>);</code></pre>
+        </div>
+
+        <h2>SSE Event Sequence</h2>
+        <p>Claude Messages streaming produces these events:</p>
+        <ol>
+          <li><code>message_start</code> &mdash; message metadata (id, model, role, usage)</li>
+          <li><code>content_block_start</code> &mdash; text or tool_use block</li>
+          <li><code>content_block_delta</code> &mdash; text_delta or input_json_delta</li>
+          <li><code>content_block_stop</code></li>
+          <li><code>message_delta</code> &mdash; stop_reason, usage</li>
+          <li><code>message_stop</code></li>
+        </ol>
+
+        <h2>Request Translation</h2>
+        <p>
+          llmock internally translates Anthropic requests to a unified format for fixture matching.
+          The <code>claudeToCompletionRequest()</code> function handles mapping Anthropic message
+          arrays (including content block arrays) to OpenAI-style messages so the same fixtures work
+          across all providers.
+        </p>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/cohere.html b/docs/cohere.html
new file mode 100644
index 0000000..162f738
--- /dev/null
+++ b/docs/cohere.html
@@ -0,0 +1,279 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Cohere — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
+          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
+          ><a href="ollama.html">Ollama</a><a href="cohere.html" class="active">Cohere</a
+          ><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a
+          ><a href="record-replay.html">Record &amp; Replay</a
+          ><a href="metrics.html">Prometheus Metrics</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a
+          ><a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>Cohere v2 Chat API</h1>
+        <p class="lead">
+          The <code>POST /v2/chat</code> endpoint implements the Cohere v2 Chat API with typed SSE
+          streaming events and dual usage tracking (<code>billed_units</code> and
+          <code>tokens</code>).
+        </p>
+
+        <h2>Endpoint</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Path</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>POST</td>
+              <td>/v2/chat</td>
+              <td>Cohere v2 Chat (SSE streaming or JSON)</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Key Features</h2>
+        <ul>
+          <li>
+            <strong>Model field required.</strong> Unlike OpenAI, Cohere requires the
+            <code>model</code> field &mdash; requests without it receive a 400 error.
+          </li>
+          <li>
+            <strong>Typed SSE events.</strong> Streaming uses <code>event:</code> +
+            <code>data:</code> pairs with event types like <code>message-start</code>,
+            <code>content-delta</code>, <code>tool-call-start</code>, etc.
+          </li>
+          <li>
+            <strong>Dual usage tracking.</strong> Responses include both
+            <code>billed_units</code> (input_tokens, output_tokens, search_units, classifications)
+            and <code>tokens</code> (input_tokens, output_tokens). llmock returns zeroed values.
+          </li>
+          <li>
+            <strong>Defaults to non-streaming.</strong> Set <code>"stream": true</code> explicitly
+            to enable SSE streaming.
+          </li>
+        </ul>
+
+        <h2>Quick Start</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            cohere-quick-start.ts <span class="lang-tag">ts</span>
+          </div>
+          <pre><code><span class="kw">import</span> { <span class="op">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
+
+<span class="kw">const</span> <span class="op">mock</span> = <span class="kw">new</span> <span class="fn">LLMock</span>();
+<span class="op">mock</span>.<span class="fn">onMessage</span>(<span class="str">"hello"</span>, { <span class="prop">content</span>: <span class="str">"Hi from Cohere!"</span> });
+<span class="kw">await</span> <span class="op">mock</span>.<span class="fn">start</span>();
+
+<span class="cmt">// Point the Cohere SDK at llmock</span>
+<span class="kw">const</span> <span class="op">res</span> = <span class="kw">await</span> <span class="fn">fetch</span>(<span class="str">`${mock.url}/v2/chat`</span>, {
+  <span class="prop">method</span>: <span class="str">"POST"</span>,
+  <span class="prop">headers</span>: { <span class="str">"Content-Type"</span>: <span class="str">"application/json"</span> },
+  <span class="prop">body</span>: <span class="fn">JSON.stringify</span>({
+    <span class="prop">model</span>: <span class="str">"command-r-plus"</span>,
+    <span class="prop">messages</span>: [{ <span class="prop">role</span>: <span class="str">"user"</span>, <span class="prop">content</span>: <span class="str">"hello"</span> }],
+  }),
+});</code></pre>
+        </div>
+
+        <h2>SSE Event Sequence (Text)</h2>
+        <p>
+          When <code>stream: true</code>, Cohere produces these typed events for text responses:
+        </p>
+        <ol>
+          <li>
+            <code>message-start</code> &mdash; message metadata (role, empty content/tool arrays)
+          </li>
+          <li><code>content-start</code> &mdash; content block type declaration</li>
+          <li><code>content-delta</code> &mdash; text chunks</li>
+          <li><code>content-end</code></li>
+          <li><code>message-end</code> &mdash; finish_reason (<code>COMPLETE</code>) and usage</li>
+        </ol>
+
+        <h2>SSE Event Sequence (Tool Calls)</h2>
+        <p>For tool call responses, the event sequence is:</p>
+        <ol>
+          <li><code>message-start</code></li>
+          <li><code>tool-plan-delta</code> &mdash; tool planning text</li>
+          <li><code>tool-call-start</code> &mdash; tool call ID, function name</li>
+          <li><code>tool-call-delta</code> &mdash; chunked arguments JSON</li>
+          <li><code>tool-call-end</code></li>
+          <li><code>message-end</code> &mdash; finish_reason (<code>TOOL_CALL</code>) and usage</li>
+        </ol>
+
+        <h2>Non-Streaming Response</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            /v2/chat non-streaming response <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  "id": "msg_abc123",
+  "finish_reason": "COMPLETE",
+  "message": {
+    "role": "assistant",
+    "content": [{ "type": "text", "text": "Hi from Cohere!" }],
+    "tool_calls": [],
+    "tool_plan": "",
+    "citations": []
+  },
+  "usage": {
+    "billed_units": {
+      "input_tokens": 0,
+      "output_tokens": 0,
+      "search_units": 0,
+      "classifications": 0
+    },
+    "tokens": { "input_tokens": 0, "output_tokens": 0 }
+  }
+}</code></pre>
+        </div>
+
+        <h2>Fixture Examples</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            cohere-fixtures.json <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  "fixtures": [
+    {
+      "match": { "userMessage": "hello" },
+      "response": { "content": "Hi from Cohere!" }
+    },
+    {
+      "match": { "userMessage": "search" },
+      "response": {
+        "toolCalls": [
+          {
+            "name": "web_search",
+            "arguments": "{\"query\":\"latest news\"}"
+          }
+        ]
+      }
+    }
+  ]
+}</code></pre>
+        </div>
+
+        <h2>Streaming Event Wire Format</h2>
+        <p>Each SSE event is a typed <code>event:</code> + <code>data:</code> pair:</p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            Cohere SSE wire format <span class="lang-tag">text</span>
+          </div>
+          <pre><code>event: message-start
+data: {"id":"msg_abc123","type":"message-start","delta":{"message":{"role":"assistant","content":[],"tool_plan":"","tool_calls":[],"citations":[]}}}
+
+event: content-start
+data: {"type":"content-start","index":0,"delta":{"message":{"content":{"type":"text"}}}}
+
+event: content-delta
+data: {"type":"content-delta","index":0,"delta":{"message":{"content":{"type":"text","text":"Hi "}}}}
+
+event: content-delta
+data: {"type":"content-delta","index":0,"delta":{"message":{"content":{"type":"text","text":"from Cohere!"}}}}
+
+event: content-end
+data: {"type":"content-end","index":0}
+
+event: message-end
+data: {"type":"message-end","delta":{"finish_reason":"COMPLETE","usage":{"billed_units":{"input_tokens":0,"output_tokens":0,"search_units":0,"classifications":0},"tokens":{"input_tokens":0,"output_tokens":0}}}}</code></pre>
+        </div>
+
+        <h2>Request Translation</h2>
+        <p>
+          llmock internally translates Cohere requests to a unified
+          <code>ChatCompletionRequest</code> format for fixture matching. The
+          <code>cohereToCompletionRequest()</code> function maps Cohere message roles (including
+          <code>tool</code> with <code>tool_call_id</code>) and tool definitions to the common
+          format.
+        </p>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/compatible-providers.html b/docs/compatible-providers.html
new file mode 100644
index 0000000..2bb2179
--- /dev/null
+++ b/docs/compatible-providers.html
@@ -0,0 +1,324 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Compatible Providers — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
+          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
+          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
+          ><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html" class="active">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a>
+          <a href="record-replay.html">Record &amp; Replay</a>
+          <a href="metrics.html">Prometheus Metrics</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a
+          ><a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>Compatible Providers</h1>
+        <p class="lead">
+          Many LLM providers use OpenAI-compatible
+          <code>/v1/chat/completions</code> endpoints. llmock works with all of them out of the box
+          &mdash; just point the SDK's base URL at your llmock instance.
+        </p>
+
+        <h2>Supported Providers</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Provider</th>
+              <th>Base URL Path</th>
+              <th>Notes</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>Mistral</td>
+              <td><code>/v1/chat/completions</code></td>
+              <td>Standard OpenAI-compatible endpoint</td>
+            </tr>
+            <tr>
+              <td>Groq</td>
+              <td><code>/openai/v1/chat/completions</code></td>
+              <td>Uses <code>/openai/</code> prefix &mdash; llmock strips it automatically</td>
+            </tr>
+            <tr>
+              <td>Ollama</td>
+              <td><code>/v1/chat/completions</code></td>
+              <td>Standard OpenAI-compatible endpoint</td>
+            </tr>
+            <tr>
+              <td>Together AI</td>
+              <td><code>/v1/chat/completions</code></td>
+              <td>Standard OpenAI-compatible endpoint</td>
+            </tr>
+            <tr>
+              <td>vLLM</td>
+              <td><code>/v1/chat/completions</code></td>
+              <td>Standard OpenAI-compatible endpoint</td>
+            </tr>
+            <tr>
+              <td>Cohere</td>
+              <td><code>/v1/chat/completions</code></td>
+              <td>
+                OpenAI-compatible endpoint; see <a href="cohere.html">Cohere page</a> for native
+                endpoints
+              </td>
+            </tr>
+            <tr>
+              <td>Vertex AI</td>
+              <td><code>/v1/projects/.../models/:model:*</code></td>
+              <td>Uses Gemini handler; see <a href="vertex-ai.html">Vertex AI page</a></td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>How It Works</h2>
+        <ul>
+          <li>
+            Most OpenAI-compatible providers send requests to
+            <code>/v1/chat/completions</code> with the same JSON format &mdash; llmock already
+            handles this natively
+          </li>
+          <li>
+            Groq uses a <code>/openai/v1/</code> prefix for all endpoints. llmock automatically
+            strips the <code>/openai</code> prefix, so <code>/openai/v1/chat/completions</code>,
+            <code>/openai/v1/embeddings</code>, and <code>/openai/v1/models</code> all work
+            transparently
+          </li>
+          <li>
+            Model names are passed through as-is &mdash; use
+            <code>mistral-large-latest</code>, <code>llama-3.3-70b-versatile</code>,
+            <code>llama3.2</code>, or any other model name in your fixtures
+          </li>
+        </ul>
+
+        <h2>Mistral Configuration</h2>
+        <p>
+          Mistral's SDK uses the standard OpenAI-compatible endpoint. Point
+          <code>MISTRAL_API_ENDPOINT</code> at llmock:
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            Environment variables <span class="lang-tag">bash</span>
+          </div>
+          <pre><code><span class="kw">export</span> <span class="prop">MISTRAL_API_ENDPOINT</span>=<span class="str">"http://localhost:5555/v1"</span>
+<span class="kw">export</span> <span class="prop">MISTRAL_API_KEY</span>=<span class="str">"mock-key"</span></code></pre>
+        </div>
+
+        <div class="code-block" style="margin-top: 1rem">
+          <div class="code-block-header">Programmatic setup <span class="lang-tag">ts</span></div>
+          <pre><code><span class="kw">import</span> { <span class="type">Mistral</span> } <span class="kw">from</span> <span class="str">"@mistralai/mistralai"</span>;
+
+<span class="kw">const</span> <span class="op">client</span> = <span class="kw">new</span> <span class="type">Mistral</span>({
+  <span class="prop">apiKey</span>: <span class="str">"mock-key"</span>,
+  <span class="prop">serverURL</span>: <span class="str">"http://localhost:5555/v1"</span>,
+});</code></pre>
+        </div>
+
+        <h2>Groq Configuration</h2>
+        <p>
+          Groq's SDK sends requests to <code>/openai/v1/chat/completions</code> (note the
+          <code>/openai</code> prefix). llmock handles this automatically.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            Environment variables <span class="lang-tag">bash</span>
+          </div>
+          <pre><code><span class="kw">export</span> <span class="prop">GROQ_BASE_URL</span>=<span class="str">"http://localhost:5555/openai/v1"</span>
+<span class="kw">export</span> <span class="prop">GROQ_API_KEY</span>=<span class="str">"mock-key"</span></code></pre>
+        </div>
+
+        <div class="code-block" style="margin-top: 1rem">
+          <div class="code-block-header">Programmatic setup <span class="lang-tag">ts</span></div>
+          <pre><code><span class="kw">import</span> <span class="type">Groq</span> <span class="kw">from</span> <span class="str">"groq-sdk"</span>;
+
+<span class="kw">const</span> <span class="op">client</span> = <span class="kw">new</span> <span class="type">Groq</span>({
+  <span class="prop">apiKey</span>: <span class="str">"mock-key"</span>,
+  <span class="prop">baseURL</span>: <span class="str">"http://localhost:5555/openai/v1"</span>,
+});</code></pre>
+        </div>
+
+        <h2>Ollama Configuration</h2>
+        <p>
+          Ollama exposes an OpenAI-compatible endpoint locally. Point the OpenAI SDK at llmock
+          instead:
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            Environment variables <span class="lang-tag">bash</span>
+          </div>
+          <pre><code><span class="kw">export</span> <span class="prop">OPENAI_BASE_URL</span>=<span class="str">"http://localhost:5555/v1"</span>
+<span class="kw">export</span> <span class="prop">OPENAI_API_KEY</span>=<span class="str">"mock-key"</span></code></pre>
+        </div>
+
+        <div class="code-block" style="margin-top: 1rem">
+          <div class="code-block-header">Programmatic setup <span class="lang-tag">ts</span></div>
+          <pre><code><span class="kw">import</span> <span class="type">OpenAI</span> <span class="kw">from</span> <span class="str">"openai"</span>;
+
+<span class="cm">// Same SDK you'd use with Ollama, just different base URL</span>
+<span class="kw">const</span> <span class="op">client</span> = <span class="kw">new</span> <span class="type">OpenAI</span>({
+  <span class="prop">apiKey</span>: <span class="str">"mock-key"</span>,
+  <span class="prop">baseURL</span>: <span class="str">"http://localhost:5555/v1"</span>,
+});</code></pre>
+        </div>
+
+        <h2>Together AI Configuration</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            Environment variables <span class="lang-tag">bash</span>
+          </div>
+          <pre><code><span class="kw">export</span> <span class="prop">TOGETHER_BASE_URL</span>=<span class="str">"http://localhost:5555/v1"</span>
+<span class="kw">export</span> <span class="prop">TOGETHER_API_KEY</span>=<span class="str">"mock-key"</span></code></pre>
+        </div>
+
+        <h2>vLLM Configuration</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            Environment variables <span class="lang-tag">bash</span>
+          </div>
+          <pre><code><span class="cm"># vLLM uses the OpenAI SDK — just change the base URL</span>
+<span class="kw">export</span> <span class="prop">OPENAI_BASE_URL</span>=<span class="str">"http://localhost:5555/v1"</span>
+<span class="kw">export</span> <span class="prop">OPENAI_API_KEY</span>=<span class="str">"mock-key"</span></code></pre>
+        </div>
+
+        <h2>Example Fixture</h2>
+        <p>
+          The same fixture works for all compatible providers. Model names are passed through
+          &mdash; match on whatever model name your code sends:
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            fixtures/compat.json <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  <span class="key">"fixtures"</span>: [
+    {
+      <span class="key">"match"</span>: {
+        <span class="key">"model"</span>: <span class="str">"mistral-large-latest"</span>,
+        <span class="key">"userMessage"</span>: <span class="str">"hello"</span>
+      },
+      <span class="key">"response"</span>: {
+        <span class="key">"content"</span>: <span class="str">"Bonjour! How can I help?"</span>
+      }
+    },
+    {
+      <span class="key">"match"</span>: {
+        <span class="key">"model"</span>: <span class="str">"llama-3.3-70b-versatile"</span>,
+        <span class="key">"userMessage"</span>: <span class="str">"hello"</span>
+      },
+      <span class="key">"response"</span>: {
+        <span class="key">"content"</span>: <span class="str">"Hey there! What can I do for you?"</span>
+      }
+    },
+    {
+      <span class="key">"match"</span>: { <span class="key">"userMessage"</span>: <span class="str">"hello"</span> },
+      <span class="key">"response"</span>: {
+        <span class="key">"content"</span>: <span class="str">"Hi! I'm a catch-all response."</span>
+      }
+    }
+  ]
+}</code></pre>
+        </div>
+
+        <div class="info-box">
+          <p>
+            The <code>/openai/v1/*</code> prefix alias also works for
+            <code>/openai/v1/embeddings</code> and <code>/openai/v1/models</code> &mdash; any
+            <code>/openai/</code>-prefixed path is transparently routed to the corresponding
+            <code>/v1/</code> endpoint.
+          </p>
+        </div>
+
+        <div class="info-box">
+          <p>
+            <strong>Ollama native endpoints:</strong> In addition to the OpenAI-compatible endpoint
+            listed above, Ollama has its own native <code>/api/chat</code> and
+            <code>/api/generate</code> endpoints. llmock supports these natively &mdash; see the
+            <a href="ollama.html">Ollama page</a> for details on the native endpoint format.
+          </p>
+        </div>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/docker.html b/docs/docker.html
new file mode 100644
index 0000000..4f71441
--- /dev/null
+++ b/docs/docker.html
@@ -0,0 +1,230 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Docker &amp; Helm — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
+          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
+          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
+          ><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a>
+          <a href="record-replay.html">Record &amp; Replay</a>
+          <a href="metrics.html">Prometheus Metrics</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html" class="active">Docker &amp; Helm</a
+          ><a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>Docker &amp; Helm</h1>
+        <p class="lead">
+          Run llmock as a container in Docker or deploy it to Kubernetes with the included Helm
+          chart. The image is based on <code>node:22-alpine</code> with zero runtime dependencies.
+        </p>
+
+        <h2>Docker</h2>
+
+        <h3>Build the image</h3>
+        <div class="code-block">
+          <div class="code-block-header">Build <span class="lang-tag">shell</span></div>
+          <pre><code>docker build -t llmock .</code></pre>
+        </div>
+
+        <h3>Run with local fixtures</h3>
+        <div class="code-block">
+          <div class="code-block-header">Run <span class="lang-tag">shell</span></div>
+          <pre><code><span class="cm"># Mount your fixture directory into the container</span>
+docker run -p 4010:4010 -v $(pwd)/fixtures:/fixtures llmock
+
+<span class="cm"># Custom port</span>
+docker run -p 5555:5555 llmock --fixtures /fixtures --port 5555
+
+<span class="cm"># Pull from GitHub Container Registry</span>
+docker pull ghcr.io/copilotkit/llmock:latest
+docker run -p 4010:4010 -v $(pwd)/fixtures:/fixtures ghcr.io/copilotkit/llmock</code></pre>
+        </div>
+
+        <h3>Dockerfile</h3>
+        <p>
+          The multi-stage Dockerfile builds the TypeScript source and copies only the compiled
+          output:
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">Dockerfile <span class="lang-tag">docker</span></div>
+          <pre><code><span class="cm"># --- Build stage ---</span>
+<span class="kw">FROM</span> node:22-alpine <span class="kw">AS</span> build
+<span class="kw">RUN</span> corepack enable && corepack prepare pnpm@10.28.2 --activate
+<span class="kw">WORKDIR</span> /app
+<span class="kw">COPY</span> package.json pnpm-lock.yaml ./
+<span class="kw">RUN</span> pnpm install --frozen-lockfile
+<span class="kw">COPY</span> tsconfig.json tsdown.config.ts ./
+<span class="kw">COPY</span> src/ src/
+<span class="kw">RUN</span> pnpm run build
+
+<span class="cm"># --- Production stage ---</span>
+<span class="kw">FROM</span> node:22-alpine
+<span class="kw">WORKDIR</span> /app
+<span class="kw">COPY</span> --from=build /app/dist/ dist/
+<span class="kw">COPY</span> fixtures/ fixtures/
+<span class="kw">EXPOSE</span> <span class="num">4010</span>
+<span class="kw">ENTRYPOINT</span> [<span class="str">"node"</span>, <span class="str">"dist/cli.js"</span>]
+<span class="kw">CMD</span> [<span class="str">"--fixtures"</span>, <span class="str">"/fixtures"</span>, <span class="str">"--host"</span>, <span class="str">"0.0.0.0"</span>]</code></pre>
+        </div>
+
+        <h2>Helm Chart</h2>
+        <p>Deploy to Kubernetes using the Helm chart in <code>charts/llmock/</code>.</p>
+
+        <h3>Install</h3>
+        <div class="code-block">
+          <div class="code-block-header">Helm install <span class="lang-tag">shell</span></div>
+          <pre><code>helm install llmock ./charts/llmock
+
+<span class="cm"># With custom values</span>
+helm install llmock ./charts/llmock \
+  --set image.tag=1.4.0 \
+  --set service.port=5555 \
+  --set replicaCount=2</code></pre>
+        </div>
+
+        <h3>Configuration (values.yaml)</h3>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            charts/llmock/values.yaml <span class="lang-tag">yaml</span>
+          </div>
+          <pre><code><span class="prop">replicaCount</span>: <span class="num">1</span>
+
+<span class="prop">image</span>:
+  <span class="prop">repository</span>: <span class="str">ghcr.io/copilotkit/llmock</span>
+  <span class="prop">tag</span>: <span class="str">""</span>            <span class="cm"># defaults to Chart appVersion</span>
+  <span class="prop">pullPolicy</span>: <span class="str">IfNotPresent</span>
+
+<span class="prop">service</span>:
+  <span class="prop">type</span>: <span class="str">ClusterIP</span>
+  <span class="prop">port</span>: <span class="num">4010</span>
+
+<span class="prop">fixtures</span>:
+  <span class="prop">mountPath</span>: <span class="str">/app/fixtures</span>
+  <span class="prop">existingClaim</span>: <span class="str">""</span>  <span class="cm"># Use a PVC for fixture files</span>
+
+<span class="prop">resources</span>: {}
+  <span class="cm"># limits:</span>
+  <span class="cm">#   cpu: 200m</span>
+  <span class="cm">#   memory: 256Mi</span></code></pre>
+        </div>
+
+        <h3>Fixture Loading</h3>
+        <p>
+          To load custom fixtures in Kubernetes, create a PersistentVolumeClaim with your fixture
+          JSON files and set <code>fixtures.existingClaim</code> in your values. The chart mounts
+          the PVC at <code>fixtures.mountPath</code> (default <code>/app/fixtures</code>).
+        </p>
+
+        <h3>Health Checks</h3>
+        <p>
+          The deployment includes liveness and readiness probes using <code>httpGet</code> on
+          <code>/health</code> (liveness, starts after 5 seconds) and <code>/ready</code>
+          (readiness, starts after 2 seconds).
+        </p>
+
+        <h2>v1.6.0 Features</h2>
+        <p>The Docker image supports all v1.6.0 features out of the box:</p>
+        <ul>
+          <li>
+            <strong>Chaos testing</strong> &mdash; configure via <code>--chaos-drop</code>,
+            <code>--chaos-malformed</code>, and <code>--chaos-disconnect</code> flags
+          </li>
+          <li>
+            <strong>Prometheus metrics</strong> &mdash; exposed at <code>/metrics</code> when
+            enabled with <code>--metrics</code>
+          </li>
+          <li>
+            <strong>Record &amp; replay</strong> &mdash; proxy to real APIs with
+            <code>--record</code> flag
+          </li>
+          <li>
+            <strong>Strict mode</strong> &mdash; return 503 for unmatched requests with
+            <code>--strict</code>
+          </li>
+          <li><strong>Streaming physics</strong> &mdash; TTFT, TPS, and jitter simulation</li>
+          <li><strong>AWS Bedrock streaming</strong> &mdash; Event Stream binary protocol</li>
+          <li><strong>Converse API</strong> &mdash; Bedrock Converse and Converse-stream</li>
+        </ul>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/docs.html b/docs/docs.html
new file mode 100644
index 0000000..7df43c9
--- /dev/null
+++ b/docs/docs.html
@@ -0,0 +1,463 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Documentation — llmock</title>
+    <meta
+      name="description"
+      content="llmock documentation — fixture-driven mock LLM server for OpenAI, Anthropic, Gemini, AWS Bedrock, Azure OpenAI, Ollama, Cohere, and Vertex AI APIs."
+    />
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <!-- ═══ Nav ═══════════════════════════════════════════════════════ -->
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> llmock </a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank">
+              <svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub
+            </a>
+          </li>
+        </ul>
+      </div>
+    </nav>
+
+    <div class="docs-layout">
+      <!-- ═══ Sidebar ═══════════════════════════════════════════════ -->
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html" class="active">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a>
+          <a href="responses-api.html">Responses API (OpenAI)</a>
+          <a href="claude-messages.html">Claude Messages</a>
+          <a href="gemini.html">Gemini</a><a href="azure-openai.html">Azure OpenAI</a
+          ><a href="aws-bedrock.html">AWS Bedrock</a><a href="ollama.html">Ollama</a
+          ><a href="cohere.html">Cohere</a><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a>
+          <a href="structured-output.html">Structured Output</a>
+          <a href="sequential-responses.html">Sequential Responses</a>
+          <a href="fixtures.html">Fixtures</a>
+          <a href="error-injection.html">Error Injection</a>
+          <a href="chaos-testing.html">Chaos Testing</a>
+          <a href="streaming-physics.html">Streaming Physics</a>
+          <a href="websocket.html">WebSocket APIs</a>
+          <a href="record-replay.html">Record &amp; Replay</a>
+          <a href="metrics.html">Prometheus Metrics</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a>
+          <a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <!-- ═══ Main Content ══════════════════════════════════════════ -->
+      <main class="docs-content">
+        <h1>llmock Documentation</h1>
+        <p class="lead">
+          llmock is a deterministic mock LLM server for testing. It runs a real HTTP server that any
+          process on the machine can reach, serving fixture-driven responses in the authentic SSE
+          format for OpenAI, Anthropic Claude, and Google Gemini APIs.
+        </p>
+
+        <h2>Quick Start</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            Install
+            <span class="lang-tag">shell</span>
+          </div>
+          <pre><code><span class="cm"># npm</span>
+npm install @copilotkit/llmock
+
+<span class="cm"># pnpm</span>
+pnpm add @copilotkit/llmock</code></pre>
+        </div>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            Programmatic usage (vitest)
+            <span class="lang-tag">ts</span>
+          </div>
+          <pre><code><span class="kw">import</span> { <span class="type">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
+<span class="kw">import</span> { <span class="fn">describe</span>, <span class="fn">it</span>, <span class="fn">expect</span>, <span class="fn">beforeAll</span>, <span class="fn">afterAll</span> } <span class="kw">from</span> <span class="str">"vitest"</span>;
+
+<span class="kw">let</span> <span class="op">mock</span>: <span class="type">LLMock</span>;
+
+<span class="fn">beforeAll</span>(<span class="kw">async</span> () <span class="kw">=&gt;</span> {
+  <span class="op">mock</span> = <span class="kw">new</span> <span class="type">LLMock</span>();
+  <span class="kw">await</span> <span class="op">mock</span>.<span class="fn">start</span>();
+});
+
+<span class="fn">afterAll</span>(<span class="kw">async</span> () <span class="kw">=&gt;</span> {
+  <span class="kw">await</span> <span class="op">mock</span>.<span class="fn">stop</span>();
+});
+
+<span class="fn">it</span>(<span class="str">"returns a text response"</span>, <span class="kw">async</span> () <span class="kw">=&gt;</span> {
+  <span class="op">mock</span>.<span class="fn">on</span>({ <span class="prop">userMessage</span>: <span class="str">"hello"</span> }, { <span class="prop">content</span>: <span class="str">"Hi there!"</span> });
+
+  <span class="kw">const</span> <span class="op">res</span> = <span class="kw">await</span> <span class="fn">fetch</span>(<span class="str">`${mock.url}/v1/chat/completions`</span>, {
+    <span class="prop">method</span>: <span class="str">"POST"</span>,
+    <span class="prop">headers</span>: { <span class="str">"Content-Type"</span>: <span class="str">"application/json"</span> },
+    <span class="prop">body</span>: <span class="type">JSON</span>.<span class="fn">stringify</span>({
+      <span class="prop">model</span>: <span class="str">"gpt-4"</span>,
+      <span class="prop">messages</span>: [{ <span class="prop">role</span>: <span class="str">"user"</span>, <span class="prop">content</span>: <span class="str">"hello"</span> }],
+      <span class="prop">stream</span>: <span class="kw">false</span>,
+    }),
+  });
+  <span class="kw">const</span> <span class="op">body</span> = <span class="kw">await</span> <span class="op">res</span>.<span class="fn">json</span>();
+  <span class="fn">expect</span>(<span class="op">body</span>.<span class="prop">choices</span>[<span class="num">0</span>].<span class="prop">message</span>.<span class="prop">content</span>).<span class="fn">toBe</span>(<span class="str">"Hi there!"</span>);
+});</code></pre>
+        </div>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            CLI usage
+            <span class="lang-tag">shell</span>
+          </div>
+          <pre><code><span class="cm"># Start the server with fixture files</span>
+npx llmock --fixtures ./fixtures --port 5555
+
+<span class="cm"># Point your app at it</span>
+<span class="kw">export</span> OPENAI_BASE_URL=http://localhost:5555/v1
+<span class="kw">export</span> OPENAI_API_KEY=mock-key</code></pre>
+        </div>
+
+        <h2>Supported Endpoints</h2>
+
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Endpoint</th>
+              <th>Provider</th>
+              <th>Transport</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>POST /v1/chat/completions</td>
+              <td>OpenAI</td>
+              <td>HTTP SSE / JSON</td>
+            </tr>
+            <tr>
+              <td>POST /v1/responses</td>
+              <td>OpenAI</td>
+              <td>HTTP SSE</td>
+            </tr>
+            <tr>
+              <td>WS /v1/responses</td>
+              <td>OpenAI</td>
+              <td>WebSocket</td>
+            </tr>
+            <tr>
+              <td>WS /v1/realtime</td>
+              <td>OpenAI</td>
+              <td>WebSocket</td>
+            </tr>
+            <tr>
+              <td>POST /v1/messages</td>
+              <td>Anthropic</td>
+              <td>HTTP SSE / JSON</td>
+            </tr>
+            <tr>
+              <td>POST /v1beta/models/:model:*</td>
+              <td>Google Gemini</td>
+              <td>HTTP SSE / JSON</td>
+            </tr>
+            <tr>
+              <td>WS /ws/google.ai.generativelanguage.*</td>
+              <td>Google Gemini Live</td>
+              <td>WebSocket</td>
+            </tr>
+            <tr>
+              <td>POST /v1/embeddings</td>
+              <td>OpenAI</td>
+              <td>JSON</td>
+            </tr>
+            <tr>
+              <td>POST /openai/v1/chat/completions</td>
+              <td>Groq / OpenAI-Compatible</td>
+              <td>HTTP SSE / JSON</td>
+            </tr>
+            <tr>
+              <td>POST /model/{modelId}/invoke</td>
+              <td>AWS Bedrock</td>
+              <td>JSON</td>
+            </tr>
+            <tr>
+              <td>POST /model/{modelId}/invoke-with-response-stream</td>
+              <td>AWS Bedrock</td>
+              <td>AWS Event Stream (binary)</td>
+            </tr>
+            <tr>
+              <td>POST /model/{modelId}/converse</td>
+              <td>AWS Bedrock</td>
+              <td>JSON</td>
+            </tr>
+            <tr>
+              <td>POST /model/{modelId}/converse-stream</td>
+              <td>AWS Bedrock</td>
+              <td>AWS Event Stream (binary)</td>
+            </tr>
+            <tr>
+              <td>POST /v1/projects/.../models/:model:*</td>
+              <td>Vertex AI</td>
+              <td>HTTP SSE / JSON</td>
+            </tr>
+            <tr>
+              <td>POST /api/chat</td>
+              <td>Ollama</td>
+              <td>NDJSON / JSON</td>
+            </tr>
+            <tr>
+              <td>POST /api/generate</td>
+              <td>Ollama</td>
+              <td>NDJSON / JSON</td>
+            </tr>
+            <tr>
+              <td>POST /v2/chat</td>
+              <td>Cohere</td>
+              <td>HTTP SSE / JSON</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Feature Pages</h2>
+
+        <div class="feature-grid">
+          <a href="chat-completions.html" class="feature-link">
+            <span class="badge badge-green">OpenAI</span>
+            <h3>Chat Completions</h3>
+            <p>Streaming and non-streaming text + tool call responses via SSE.</p>
+          </a>
+          <a href="responses-api.html" class="feature-link">
+            <span class="badge badge-green">OpenAI</span>
+            <h3>Responses API</h3>
+            <p>HTTP SSE and WebSocket transports for the Responses API.</p>
+          </a>
+          <a href="claude-messages.html" class="feature-link">
+            <span class="badge badge-purple">Anthropic</span>
+            <h3>Claude Messages</h3>
+            <p>Anthropic-format SSE streaming with content blocks.</p>
+          </a>
+          <a href="gemini.html" class="feature-link">
+            <span class="badge badge-blue">Google</span>
+            <h3>Gemini</h3>
+            <p>GenerateContent and StreamGenerateContent endpoints.</p>
+          </a>
+          <a href="embeddings.html" class="feature-link">
+            <span class="badge badge-amber">New</span>
+            <h3>Embeddings</h3>
+            <p>OpenAI-compatible /v1/embeddings endpoint with fixture or auto-generated vectors.</p>
+          </a>
+          <a href="structured-output.html" class="feature-link">
+            <span class="badge badge-amber">New</span>
+            <h3>Structured Output</h3>
+            <p>JSON mode and response_format matching for structured responses.</p>
+          </a>
+          <a href="sequential-responses.html" class="feature-link">
+            <span class="badge badge-amber">New</span>
+            <h3>Sequential Responses</h3>
+            <p>Stateful fixtures that return different responses on each call.</p>
+          </a>
+          <a href="fixtures.html" class="feature-link">
+            <span class="badge badge-green">Core</span>
+            <h3>Fixtures</h3>
+            <p>JSON fixture file format, matching rules, and validation.</p>
+          </a>
+          <a href="error-injection.html" class="feature-link">
+            <span class="badge badge-red">Core</span>
+            <h3>Error Injection</h3>
+            <p>One-shot errors, stream truncation, and disconnect simulation.</p>
+          </a>
+          <a href="chaos-testing.html" class="feature-link">
+            <span class="badge badge-red">New</span>
+            <h3>Chaos Testing</h3>
+            <p>
+              Probabilistic failure injection &mdash; random errors, latency spikes, stream
+              corruption.
+            </p>
+          </a>
+          <a href="websocket.html" class="feature-link">
+            <span class="badge badge-blue">Core</span>
+            <h3>WebSocket APIs</h3>
+            <p>Realtime, Responses, and Gemini Live over WebSocket.</p>
+          </a>
+          <a href="record-replay.html" class="feature-link">
+            <span class="badge badge-purple">New</span>
+            <h3>Record &amp; Replay</h3>
+            <p>Proxy to real APIs, record responses as fixtures, then replay deterministically.</p>
+          </a>
+          <a href="metrics.html" class="feature-link">
+            <span class="badge badge-blue">New</span>
+            <h3>Prometheus Metrics</h3>
+            <p>Expose request counts, latencies, and fixture match rates via /metrics endpoint.</p>
+          </a>
+          <a href="ollama.html" class="feature-link">
+            <span class="badge badge-green">Provider</span>
+            <h3>Ollama</h3>
+            <p>Native Ollama /api/chat and /api/generate endpoints.</p>
+          </a>
+          <a href="cohere.html" class="feature-link">
+            <span class="badge badge-green">Provider</span>
+            <h3>Cohere</h3>
+            <p>Cohere Chat API with native and OpenAI-compatible endpoints.</p>
+          </a>
+          <a href="vertex-ai.html" class="feature-link">
+            <span class="badge badge-blue">Provider</span>
+            <h3>Vertex AI</h3>
+            <p>Google Cloud Vertex AI endpoints using the Gemini handler.</p>
+          </a>
+          <a href="docker.html" class="feature-link">
+            <span class="badge badge-amber">Ops</span>
+            <h3>Docker &amp; Helm</h3>
+            <p>Container image and Kubernetes Helm chart deployment.</p>
+          </a>
+          <a href="drift-detection.html" class="feature-link">
+            <span class="badge badge-red">CI</span>
+            <h3>Drift Detection</h3>
+            <p>Three-way conformance testing against real APIs.</p>
+          </a>
+        </div>
+
+        <h2>API Reference</h2>
+
+        <h3>LLMock class</h3>
+
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>new LLMock(opts?)</td>
+              <td>
+                Create instance. Options: <code>port</code>, <code>host</code>,
+                <code>latency</code>, <code>chunkSize</code>, <code>logLevel</code>,
+                <code>chaos</code>, <code>record</code>, <code>strict</code>, <code>metrics</code>,
+                <code>streamingProfile</code>
+              </td>
+            </tr>
+            <tr>
+              <td>start()</td>
+              <td>Start the HTTP server. Returns the base URL.</td>
+            </tr>
+            <tr>
+              <td>stop()</td>
+              <td>Stop the server.</td>
+            </tr>
+            <tr>
+              <td>on(match, response, opts?)</td>
+              <td>Add a fixture with match criteria and response.</td>
+            </tr>
+            <tr>
+              <td>onMessage(pattern, response)</td>
+              <td>Shorthand: match on userMessage.</td>
+            </tr>
+            <tr>
+              <td>onToolCall(name, response)</td>
+              <td>Shorthand: match on toolName.</td>
+            </tr>
+            <tr>
+              <td>onEmbedding(pattern, response)</td>
+              <td>Shorthand: match on inputText (embeddings).</td>
+            </tr>
+            <tr>
+              <td>onJsonOutput(pattern, json)</td>
+              <td>Shorthand: match userMessage + responseFormat=json_object.</td>
+            </tr>
+            <tr>
+              <td>onToolResult(id, response)</td>
+              <td>Shorthand: match on toolCallId.</td>
+            </tr>
+            <tr>
+              <td>nextRequestError(status, body?)</td>
+              <td>Queue a one-shot error for the next request.</td>
+            </tr>
+            <tr>
+              <td>addFixture(fixture)</td>
+              <td>Add a raw Fixture object.</td>
+            </tr>
+            <tr>
+              <td>loadFixtureFile(path)</td>
+              <td>Load fixtures from a JSON file.</td>
+            </tr>
+            <tr>
+              <td>loadFixtureDir(path)</td>
+              <td>Load all fixture JSON files from a directory.</td>
+            </tr>
+            <tr>
+              <td>reset()</td>
+              <td>Clear all fixtures and journal entries.</td>
+            </tr>
+            <tr>
+              <td>getRequests()</td>
+              <td>Get all journal entries.</td>
+            </tr>
+            <tr>
+              <td>getLastRequest()</td>
+              <td>Get the most recent journal entry.</td>
+            </tr>
+            <tr>
+              <td>.url / .port</td>
+              <td>Access the server URL and port.</td>
+            </tr>
+          </tbody>
+        </table>
+      </main>
+    </div>
+
+    <!-- ═══ Footer ═══════════════════════════════════════════════════ -->
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left">
+          <span>$</span> llmock &middot; MIT License &middot; Built by
+          <a href="https://github.com/CopilotKit" target="_blank">CopilotKit</a>
+        </div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/drift-detection.html b/docs/drift-detection.html
new file mode 100644
index 0000000..dc8f9d4
--- /dev/null
+++ b/docs/drift-detection.html
@@ -0,0 +1,471 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Drift Detection — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
+          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
+          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
+          ><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a>
+          <a href="record-replay.html">Record &amp; Replay</a>
+          <a href="metrics.html">Prometheus Metrics</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a
+          ><a href="drift-detection.html" class="active">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>Drift Detection</h1>
+        <p class="lead">
+          A mock that does not match reality is worse than no mock. llmock includes three-way drift
+          tests that compare SDK types, real API responses, and mock output to catch shape
+          mismatches before your users do.
+        </p>
+
+        <h2>Three-Way Comparison</h2>
+        <p>Each drift test compares three sources:</p>
+
+        <style>
+          .triangle-wrapper {
+            position: relative;
+            width: 100%;
+            max-width: 600px;
+            margin: 2.5rem auto 1rem;
+            aspect-ratio: 1.3 / 1;
+          }
+          .triangle-wrapper svg {
+            position: absolute;
+            top: 0;
+            left: 0;
+            width: 100%;
+            height: 100%;
+            z-index: 0;
+          }
+          .tri-node {
+            position: absolute;
+            background: var(--bg-card);
+            border: 2px solid;
+            border-radius: 12px;
+            padding: 1rem 1.25rem;
+            text-align: center;
+            width: 170px;
+            z-index: 1;
+          }
+          .tri-node h3 {
+            font-size: 0.95rem;
+            font-weight: 600;
+            color: var(--text-primary);
+            margin-bottom: 0.3rem;
+          }
+          .tri-node p {
+            font-size: 0.75rem;
+            color: var(--text-secondary);
+            line-height: 1.4;
+          }
+          .tri-node .node-icon {
+            font-size: 1.5rem;
+            margin-bottom: 0.5rem;
+          }
+          .tri-node.sdk {
+            border-color: var(--blue);
+            top: 0;
+            left: 50%;
+            transform: translateX(-50%);
+          }
+          .tri-node.sdk .node-icon {
+            color: var(--blue);
+          }
+          .tri-node.real {
+            border-color: var(--accent);
+            bottom: 0;
+            left: 0;
+          }
+          .tri-node.real .node-icon {
+            color: var(--accent);
+          }
+          .tri-node.mock {
+            border-color: var(--purple);
+            bottom: 0;
+            right: 0;
+          }
+          .tri-node.mock .node-icon {
+            color: var(--purple);
+          }
+          .diagnosis-grid {
+            display: grid;
+            grid-template-columns: repeat(3, 1fr);
+            gap: 1rem;
+            margin-top: 2.5rem;
+          }
+          .diagnosis-card {
+            background: var(--bg-card);
+            border: 1px solid var(--border);
+            border-radius: 8px;
+            padding: 1rem 1.25rem;
+          }
+          .diagnosis-card .diag-header {
+            display: flex;
+            align-items: center;
+            gap: 0.5rem;
+            margin-bottom: 0.4rem;
+          }
+          .diagnosis-card .diag-dot {
+            width: 10px;
+            height: 10px;
+            border-radius: 50%;
+            flex-shrink: 0;
+          }
+          .diagnosis-card h4 {
+            font-size: 0.85rem;
+            font-weight: 600;
+            color: var(--text-primary);
+          }
+          .diagnosis-card p {
+            font-size: 0.78rem;
+            color: var(--text-secondary);
+            line-height: 1.5;
+          }
+        </style>
+
+        <!-- Triangle diagram -->
+        <div class="triangle-wrapper">
+          <svg viewBox="0 0 600 420" fill="none" xmlns="http://www.w3.org/2000/svg">
+            <!-- SDK → Real (left edge) -->
+            <line
+              x1="245"
+              y1="105"
+              x2="130"
+              y2="280"
+              stroke="var(--border)"
+              stroke-width="1.5"
+              stroke-dasharray="6 4"
+            />
+            <polygon points="127,274 137,278 133,286" fill="var(--border)" />
+            <!-- SDK → Mock (right edge) -->
+            <line
+              x1="355"
+              y1="105"
+              x2="470"
+              y2="280"
+              stroke="var(--border)"
+              stroke-width="1.5"
+              stroke-dasharray="6 4"
+            />
+            <polygon points="473,274 463,278 467,286" fill="var(--border)" />
+            <!-- Real ↔ Mock (bottom edge) -->
+            <line
+              x1="195"
+              y1="355"
+              x2="405"
+              y2="355"
+              stroke="var(--border)"
+              stroke-width="1.5"
+              stroke-dasharray="6 4"
+            />
+            <polygon points="200,349 190,355 200,361" fill="var(--border)" />
+            <polygon points="400,349 410,355 400,361" fill="var(--border)" />
+            <!-- Edge labels -->
+            <rect x="131" y="182" width="85" height="20" rx="4" fill="var(--bg-deep)" />
+            <text
+              x="173"
+              y="196"
+              text-anchor="middle"
+              fill="var(--text-dim)"
+              font-family="JetBrains Mono, SF Mono, Fira Code, monospace"
+              font-size="11"
+            >
+              SDK = Real?
+            </text>
+            <rect x="360" y="182" width="90" height="20" rx="4" fill="var(--bg-deep)" />
+            <text
+              x="405"
+              y="196"
+              text-anchor="middle"
+              fill="var(--text-dim)"
+              font-family="JetBrains Mono, SF Mono, Fira Code, monospace"
+              font-size="11"
+            >
+              SDK = Mock?
+            </text>
+            <rect x="255" y="338" width="90" height="20" rx="4" fill="var(--bg-deep)" />
+            <text
+              x="300"
+              y="352"
+              text-anchor="middle"
+              fill="var(--text-dim)"
+              font-family="JetBrains Mono, SF Mono, Fira Code, monospace"
+              font-size="11"
+            >
+              Real = Mock?
+            </text>
+          </svg>
+          <div class="tri-node sdk">
+            <div class="node-icon">{ }</div>
+            <h3>SDK Types</h3>
+            <p>What TypeScript types say the shape should be</p>
+          </div>
+          <div class="tri-node real">
+            <div class="node-icon">&#8644;</div>
+            <h3>Real API</h3>
+            <p>What OpenAI, Claude, Gemini actually return</p>
+          </div>
+          <div class="tri-node mock">
+            <div class="node-icon">&#9881;</div>
+            <h3>llmock</h3>
+            <p>What the mock produces for the same request</p>
+          </div>
+        </div>
+
+        <!-- Diagnosis cards -->
+        <div class="diagnosis-grid">
+          <div class="diagnosis-card">
+            <div class="diag-header">
+              <div class="diag-dot" style="background: var(--error)"></div>
+              <h4>Mock doesn't match real</h4>
+            </div>
+            <p>
+              llmock needs updating &mdash; test fails immediately. The SDK comparison tells us why
+              it drifted.
+            </p>
+          </div>
+          <div class="diagnosis-card">
+            <div class="diag-header">
+              <div class="diag-dot" style="background: var(--warning)"></div>
+              <h4>Provider changed, SDK is behind</h4>
+            </div>
+            <p>
+              Early warning &mdash; the real API has new fields that neither the SDK nor llmock know
+              about yet.
+            </p>
+          </div>
+          <div class="diagnosis-card">
+            <div class="diag-header">
+              <div class="diag-dot" style="background: var(--accent)"></div>
+              <h4>All three agree</h4>
+            </div>
+            <p>No drift &mdash; the mock matches reality and the SDK types are current.</p>
+          </div>
+        </div>
+
+        <h2>Running Drift Tests</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">Run drift tests <span class="lang-tag">shell</span></div>
+          <pre><code><span class="cm"># Set API keys for providers you want to test</span>
+<span class="kw">export</span> OPENAI_API_KEY=sk-...
+<span class="kw">export</span> ANTHROPIC_API_KEY=sk-ant-...
+<span class="kw">export</span> GOOGLE_API_KEY=AI...
+
+<span class="cm"># Run all drift tests</span>
+pnpm test:drift
+
+<span class="cm"># Run for a specific provider</span>
+pnpm test:drift -- --grep "OpenAI Chat"</code></pre>
+        </div>
+
+        <h2>Test Files</h2>
+
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>File</th>
+              <th>Provider</th>
+              <th>What it tests</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>openai-chat.drift.ts</td>
+              <td>OpenAI</td>
+              <td>Chat Completions (streaming + non-streaming, text + tool calls)</td>
+            </tr>
+            <tr>
+              <td>openai-responses.drift.ts</td>
+              <td>OpenAI</td>
+              <td>Responses API (HTTP SSE)</td>
+            </tr>
+            <tr>
+              <td>anthropic.drift.ts</td>
+              <td>Anthropic</td>
+              <td>Claude Messages API</td>
+            </tr>
+            <tr>
+              <td>gemini.drift.ts</td>
+              <td>Google</td>
+              <td>Gemini generateContent + streamGenerateContent</td>
+            </tr>
+            <tr>
+              <td>ws-realtime.drift.ts</td>
+              <td>OpenAI</td>
+              <td>Realtime API over WebSocket</td>
+            </tr>
+            <tr>
+              <td>ws-responses.drift.ts</td>
+              <td>OpenAI</td>
+              <td>Responses API over WebSocket</td>
+            </tr>
+            <tr>
+              <td>ws-gemini-live.drift.ts</td>
+              <td>Google</td>
+              <td>Gemini Live over WebSocket</td>
+            </tr>
+            <tr>
+              <td>models.drift.ts</td>
+              <td>All</td>
+              <td>Model list endpoint conformance</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>How Drift Analysis Works</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">drift-test.ts <span class="lang-tag">ts</span></div>
+          <pre><code><span class="kw">import</span> { <span class="fn">extractShape</span>, <span class="fn">triangulate</span>, <span class="fn">formatDriftReport</span>, <span class="fn">shouldFail</span> } <span class="kw">from</span> <span class="str">"./schema"</span>;
+
+<span class="cm">// 1. Get the SDK shape (what TypeScript says)</span>
+<span class="kw">const</span> <span class="op">sdkShape</span> = <span class="fn">openaiChatCompletionShape</span>();
+
+<span class="cm">// 2. Call the real API and the mock in parallel</span>
+<span class="kw">const</span> [<span class="op">realRes</span>, <span class="op">mockRes</span>] = <span class="kw">await</span> <span class="type">Promise</span>.<span class="fn">all</span>([
+  <span class="fn">openaiChatNonStreaming</span>(<span class="op">config</span>, [{ <span class="prop">role</span>: <span class="str">"user"</span>, <span class="prop">content</span>: <span class="str">"Say hello"</span> }]),
+  <span class="fn">httpPost</span>(<span class="str">`${instance.url}/v1/chat/completions`</span>, { <span class="cm">/* ... */</span> }),
+]);
+
+<span class="cm">// 3. Extract response shapes</span>
+<span class="kw">const</span> <span class="op">realShape</span> = <span class="fn">extractShape</span>(<span class="op">realRes</span>.<span class="prop">body</span>);
+<span class="kw">const</span> <span class="op">mockShape</span> = <span class="fn">extractShape</span>(<span class="type">JSON</span>.<span class="fn">parse</span>(<span class="op">mockRes</span>.<span class="prop">body</span>));
+
+<span class="cm">// 4. Three-way comparison</span>
+<span class="kw">const</span> <span class="op">diffs</span> = <span class="fn">triangulate</span>(<span class="op">sdkShape</span>, <span class="op">realShape</span>, <span class="op">mockShape</span>);
+<span class="kw">const</span> <span class="op">report</span> = <span class="fn">formatDriftReport</span>(<span class="str">"OpenAI Chat (non-streaming text)"</span>, <span class="op">diffs</span>);
+
+<span class="cm">// 5. Critical diffs fail the test</span>
+<span class="kw">if</span> (<span class="fn">shouldFail</span>(<span class="op">diffs</span>)) {
+  <span class="fn">expect</span>.<span class="fn">soft</span>([], <span class="op">report</span>).<span class="fn">toEqual</span>(
+    <span class="op">diffs</span>.<span class="fn">filter</span>(<span class="op">d</span> <span class="kw">=&gt;</span> <span class="op">d</span>.<span class="prop">severity</span> === <span class="str">"critical"</span>)
+  );
+}</code></pre>
+        </div>
+
+        <h2>Severity Levels</h2>
+
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Severity</th>
+              <th>Meaning</th>
+              <th>Action</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td style="color: var(--error)">critical</td>
+              <td>Mock does not match real API</td>
+              <td>Test fails. llmock needs updating.</td>
+            </tr>
+            <tr>
+              <td style="color: var(--warning)">warning</td>
+              <td>Provider added new field, neither SDK nor mock have it</td>
+              <td>Logged. Early warning for future breakage.</td>
+            </tr>
+            <tr>
+              <td style="color: var(--accent)">ok</td>
+              <td>All three agree</td>
+              <td>No action needed.</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>CI Integration</h2>
+        <p>
+          Drift tests run daily in CI with real API keys stored as GitHub secrets. Tests that
+          require API keys are automatically skipped when the key is not set, so
+          <code>pnpm test:drift</code> is safe to run locally without any keys configured.
+        </p>
+
+        <div class="info-box">
+          <p>
+            Drift tests require real API keys and make real API calls. They are not part of the
+            regular <code>pnpm test</code> suite and must be run explicitly with
+            <code>pnpm test:drift</code>.
+          </p>
+        </div>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/embeddings.html b/docs/embeddings.html
new file mode 100644
index 0000000..00d06f0
--- /dev/null
+++ b/docs/embeddings.html
@@ -0,0 +1,237 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Embeddings — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
+          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
+          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
+          ><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html" class="active">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a>
+          <a href="record-replay.html">Record &amp; Replay</a>
+          <a href="metrics.html">Prometheus Metrics</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a
+          ><a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>Embeddings</h1>
+        <p class="lead">
+          The <code>POST /v1/embeddings</code> endpoint returns OpenAI-compatible embedding vectors.
+          You can provide explicit vectors in fixtures or let llmock generate deterministic
+          embeddings automatically from the input text.
+        </p>
+
+        <h2>Endpoint</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Path</th>
+              <th>Format</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>POST</td>
+              <td>/v1/embeddings</td>
+              <td>JSON</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>How It Works</h2>
+        <ul>
+          <li>
+            If a fixture matches with an <code>embedding</code> response, that exact vector is
+            returned
+          </li>
+          <li>
+            If no fixture matches, a deterministic embedding is auto-generated from the input text
+            using a hash-based algorithm
+          </li>
+          <li>
+            Auto-generated embeddings are deterministic: same input always produces the same output
+          </li>
+          <li>
+            Default dimension is 1536 (matching text-embedding-3-small), configurable via the
+            <code>dimensions</code> request parameter
+          </li>
+        </ul>
+
+        <h2>Unit Test: Fixture-based Embedding</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            embedding-fixture.test.ts <span class="lang-tag">ts</span>
+          </div>
+          <pre><code><span class="kw">const</span> <span class="op">mock</span> = <span class="kw">new</span> <span class="type">LLMock</span>();
+<span class="kw">await</span> <span class="op">mock</span>.<span class="fn">start</span>();
+
+<span class="cm">// Register a fixture with explicit embedding vector</span>
+<span class="op">mock</span>.<span class="fn">onEmbedding</span>(<span class="str">"embed-this"</span>, { <span class="prop">embedding</span>: [<span class="num">0.1</span>, -<span class="num">0.2</span>, <span class="num">0.3</span>, <span class="num">0.4</span>, -<span class="num">0.5</span>] });
+
+<span class="kw">const</span> <span class="op">res</span> = <span class="kw">await</span> <span class="fn">fetch</span>(<span class="str">`${mock.url}/v1/embeddings`</span>, {
+  <span class="prop">method</span>: <span class="str">"POST"</span>,
+  <span class="prop">headers</span>: { <span class="str">"Content-Type"</span>: <span class="str">"application/json"</span> },
+  <span class="prop">body</span>: <span class="type">JSON</span>.<span class="fn">stringify</span>({
+    <span class="prop">model</span>: <span class="str">"text-embedding-3-small"</span>,
+    <span class="prop">input</span>: <span class="str">"embed-this"</span>,
+  }),
+});
+
+<span class="kw">const</span> <span class="op">body</span> = <span class="kw">await</span> <span class="op">res</span>.<span class="fn">json</span>();
+<span class="fn">expect</span>(<span class="op">body</span>.<span class="prop">object</span>).<span class="fn">toBe</span>(<span class="str">"list"</span>);
+<span class="fn">expect</span>(<span class="op">body</span>.<span class="prop">data</span>[<span class="num">0</span>].<span class="prop">embedding</span>).<span class="fn">toEqual</span>([<span class="num">0.1</span>, -<span class="num">0.2</span>, <span class="num">0.3</span>, <span class="num">0.4</span>, -<span class="num">0.5</span>]);
+<span class="fn">expect</span>(<span class="op">body</span>.<span class="prop">data</span>[<span class="num">0</span>].<span class="prop">index</span>).<span class="fn">toBe</span>(<span class="num">0</span>);</code></pre>
+        </div>
+
+        <h2>Unit Test: Auto-generated Embedding</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            embedding-auto.test.ts <span class="lang-tag">ts</span>
+          </div>
+          <pre><code><span class="kw">import</span> { <span class="fn">generateDeterministicEmbedding</span> } <span class="kw">from</span> <span class="str">"@copilotkit/llmock/helpers"</span>;
+
+<span class="cm">// Deterministic: same input always produces the same output</span>
+<span class="kw">const</span> <span class="op">a</span> = <span class="fn">generateDeterministicEmbedding</span>(<span class="str">"hello world"</span>);
+<span class="kw">const</span> <span class="op">b</span> = <span class="fn">generateDeterministicEmbedding</span>(<span class="str">"hello world"</span>);
+<span class="fn">expect</span>(<span class="op">a</span>).<span class="fn">toEqual</span>(<span class="op">b</span>);
+
+<span class="cm">// Default dimension is 1536</span>
+<span class="fn">expect</span>(<span class="op">a</span>).<span class="fn">toHaveLength</span>(<span class="num">1536</span>);
+
+<span class="cm">// Custom dimension</span>
+<span class="kw">const</span> <span class="op">c</span> = <span class="fn">generateDeterministicEmbedding</span>(<span class="str">"hello"</span>, <span class="num">768</span>);
+<span class="fn">expect</span>(<span class="op">c</span>).<span class="fn">toHaveLength</span>(<span class="num">768</span>);
+
+<span class="cm">// All values are between -1 and 1</span>
+<span class="kw">for</span> (<span class="kw">const</span> <span class="op">val</span> <span class="kw">of</span> <span class="op">a</span>) {
+  <span class="fn">expect</span>(<span class="op">val</span>).<span class="fn">toBeGreaterThanOrEqual</span>(-<span class="num">1</span>);
+  <span class="fn">expect</span>(<span class="op">val</span>).<span class="fn">toBeLessThanOrEqual</span>(<span class="num">1</span>);
+}</code></pre>
+        </div>
+
+        <h2>JSON Fixture</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            fixtures/embeddings.json <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  <span class="key">"fixtures"</span>: [
+    {
+      <span class="key">"match"</span>: { <span class="key">"inputText"</span>: <span class="str">"embed-this"</span> },
+      <span class="key">"response"</span>: {
+        <span class="key">"embedding"</span>: [<span class="num">0.1</span>, -<span class="num">0.2</span>, <span class="num">0.3</span>, <span class="num">0.4</span>, -<span class="num">0.5</span>]
+      }
+    }
+  ]
+}</code></pre>
+        </div>
+
+        <h2>Response Format</h2>
+        <p>Matches the OpenAI <code>/v1/embeddings</code> response format:</p>
+
+        <div class="code-block">
+          <div class="code-block-header">Response shape <span class="lang-tag">json</span></div>
+          <pre><code>{
+  <span class="key">"object"</span>: <span class="str">"list"</span>,
+  <span class="key">"model"</span>: <span class="str">"text-embedding-3-small"</span>,
+  <span class="key">"data"</span>: [
+    {
+      <span class="key">"object"</span>: <span class="str">"embedding"</span>,
+      <span class="key">"index"</span>: <span class="num">0</span>,
+      <span class="key">"embedding"</span>: [<span class="num">0.1</span>, -<span class="num">0.2</span>, <span class="num">0.3</span>, ...]
+    }
+  ],
+  <span class="key">"usage"</span>: { <span class="key">"prompt_tokens"</span>: <span class="num">0</span>, <span class="key">"total_tokens"</span>: <span class="num">0</span> }
+}</code></pre>
+        </div>
+
+        <div class="info-box">
+          <p>
+            Embedding fixtures use <code>match.inputText</code> instead of
+            <code>match.userMessage</code>. The <code>inputText</code> matcher checks the embedding
+            input string (or each string in an input array).
+          </p>
+        </div>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/error-injection.html b/docs/error-injection.html
new file mode 100644
index 0000000..80ac5ee
--- /dev/null
+++ b/docs/error-injection.html
@@ -0,0 +1,233 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Error Injection — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
+          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
+          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
+          ><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a
+          ><a href="error-injection.html" class="active">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a>
+          <a href="record-replay.html">Record &amp; Replay</a>
+          <a href="metrics.html">Prometheus Metrics</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a
+          ><a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>Error Injection</h1>
+        <p class="lead">
+          Test your application's error handling with one-shot errors, stream truncation, and timed
+          disconnects. llmock provides three mechanisms for simulating failures.
+        </p>
+
+        <h2>One-Shot Errors</h2>
+        <p>
+          Queue an error that fires on the next request and auto-removes itself. Useful for testing
+          retry logic.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            one-shot-error.test.ts <span class="lang-tag">ts</span>
+          </div>
+          <pre><code><span class="kw">const</span> <span class="op">mock</span> = <span class="kw">new</span> <span class="type">LLMock</span>();
+<span class="kw">await</span> <span class="op">mock</span>.<span class="fn">start</span>();
+<span class="op">mock</span>.<span class="fn">onMessage</span>(<span class="str">"hello"</span>, { <span class="prop">content</span>: <span class="str">"Hi!"</span> });
+
+<span class="cm">// Queue a 429 rate limit error for the next request</span>
+<span class="op">mock</span>.<span class="fn">nextRequestError</span>(<span class="num">429</span>, {
+  <span class="prop">message</span>: <span class="str">"Rate limit exceeded"</span>,
+  <span class="prop">type</span>: <span class="str">"rate_limit_error"</span>,
+});
+
+<span class="cm">// First request → 429 error</span>
+<span class="kw">const</span> <span class="op">res1</span> = <span class="kw">await</span> <span class="fn">fetch</span>(<span class="str">`${mock.url}/v1/chat/completions`</span>, {
+  <span class="prop">method</span>: <span class="str">"POST"</span>,
+  <span class="prop">headers</span>: { <span class="str">"Content-Type"</span>: <span class="str">"application/json"</span> },
+  <span class="prop">body</span>: <span class="type">JSON</span>.<span class="fn">stringify</span>({
+    <span class="prop">model</span>: <span class="str">"gpt-4"</span>,
+    <span class="prop">messages</span>: [{ <span class="prop">role</span>: <span class="str">"user"</span>, <span class="prop">content</span>: <span class="str">"hello"</span> }],
+  }),
+});
+<span class="fn">expect</span>(<span class="op">res1</span>.<span class="prop">status</span>).<span class="fn">toBe</span>(<span class="num">429</span>);
+
+<span class="cm">// Second request → normal response (error auto-removed)</span>
+<span class="kw">const</span> <span class="op">res2</span> = <span class="kw">await</span> <span class="fn">fetch</span>(<span class="str">`${mock.url}/v1/chat/completions`</span>, { <span class="cm">/* same */</span> });
+<span class="fn">expect</span>(<span class="op">res2</span>.<span class="prop">status</span>).<span class="fn">toBe</span>(<span class="num">200</span>);</code></pre>
+        </div>
+
+        <h2>Stream Truncation</h2>
+        <p>
+          Abort a streaming response after a specific number of SSE chunks. Tests that your
+          application handles partial streams gracefully.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">truncation.test.ts <span class="lang-tag">ts</span></div>
+          <pre><code><span class="op">mock</span>.<span class="fn">on</span>(
+  { <span class="prop">userMessage</span>: <span class="str">"long story"</span> },
+  { <span class="prop">content</span>: <span class="str">"This is a very long response that will be cut short"</span> },
+  { <span class="prop">truncateAfterChunks</span>: <span class="num">3</span> }  <span class="cm">// Abort after 3 SSE chunks</span>
+);</code></pre>
+        </div>
+
+        <h2>Timed Disconnect</h2>
+        <p>
+          Disconnect after a specified number of milliseconds. Simulates network timeouts and
+          connection drops.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">disconnect.test.ts <span class="lang-tag">ts</span></div>
+          <pre><code><span class="op">mock</span>.<span class="fn">on</span>(
+  { <span class="prop">userMessage</span>: <span class="str">"slow"</span> },
+  { <span class="prop">content</span>: <span class="str">"This response will never complete"</span> },
+  { <span class="prop">disconnectAfterMs</span>: <span class="num">100</span> }  <span class="cm">// Kill connection after 100ms</span>
+);</code></pre>
+        </div>
+
+        <h2>Error Fixtures in JSON</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            fixtures/errors.json <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  <span class="key">"fixtures"</span>: [
+    {
+      <span class="key">"match"</span>: { <span class="key">"userMessage"</span>: <span class="str">"error-test"</span> },
+      <span class="key">"response"</span>: {
+        <span class="key">"error"</span>: {
+          <span class="key">"message"</span>: <span class="str">"Rate limited"</span>,
+          <span class="key">"type"</span>: <span class="str">"rate_limit_error"</span>
+        },
+        <span class="key">"status"</span>: <span class="num">429</span>
+      }
+    },
+    {
+      <span class="key">"match"</span>: { <span class="key">"userMessage"</span>: <span class="str">"partial"</span> },
+      <span class="key">"response"</span>: { <span class="key">"content"</span>: <span class="str">"This gets cut off"</span> },
+      <span class="key">"truncateAfterChunks"</span>: <span class="num">2</span>
+    },
+    {
+      <span class="key">"match"</span>: { <span class="key">"userMessage"</span>: <span class="str">"timeout"</span> },
+      <span class="key">"response"</span>: { <span class="key">"content"</span>: <span class="str">"Never finishes"</span> },
+      <span class="key">"disconnectAfterMs"</span>: <span class="num">50</span>
+    }
+  ]
+}</code></pre>
+        </div>
+
+        <h2>Interruption Behavior</h2>
+        <ul>
+          <li>
+            <code>truncateAfterChunks</code> &mdash; counts SSE data lines sent; aborts on the Nth
+            chunk
+          </li>
+          <li>
+            <code>disconnectAfterMs</code> &mdash; starts a timer when the response begins; kills
+            the connection when it fires
+          </li>
+          <li>If both are set, whichever fires first wins</li>
+          <li>
+            Interrupted requests are recorded in the journal with
+            <code>response.interrupted: true</code> and <code>response.interruptReason</code>
+          </li>
+        </ul>
+
+        <div class="info-box">
+          <p>
+            <code>nextRequestError()</code> is one-shot: it fires once and auto-removes itself. For
+            persistent error fixtures, use <code>addFixture()</code> with an error response.
+          </p>
+        </div>
+
+        <div class="info-box">
+          <p>
+            <strong>See also: <a href="chaos-testing.html">Chaos Testing</a></strong> &mdash; for
+            probabilistic failure injection. Chaos testing adds configurable error rates, random
+            latency spikes, and stream corruption that trigger based on probability rather than
+            deterministic fixture matching. Use error injection for specific, reproducible failure
+            scenarios; use chaos testing for resilience testing under unpredictable conditions.
+          </p>
+        </div>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/favicon.svg b/docs/favicon.svg
new file mode 100644
index 0000000..63285ea
--- /dev/null
+++ b/docs/favicon.svg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a218f7047973946fe28120c9209e2873144118d5b5a7e2ea9e7aa4c407559fb
+size 3265
diff --git a/docs/fixtures.html b/docs/fixtures.html
new file mode 100644
index 0000000..0a13382
--- /dev/null
+++ b/docs/fixtures.html
@@ -0,0 +1,330 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Fixtures — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
+          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
+          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
+          ><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html" class="active">Fixtures</a
+          ><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a>
+          <a href="record-replay.html">Record &amp; Replay</a>
+          <a href="metrics.html">Prometheus Metrics</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a
+          ><a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>Fixtures</h1>
+        <p class="lead">
+          Fixtures define what the mock server returns. Each fixture has a
+          <code>match</code> criteria and a <code>response</code>. Load them from JSON files,
+          register them programmatically, or mix both approaches.
+        </p>
+
+        <h2>File Format</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            fixtures/example.json <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  <span class="key">"fixtures"</span>: [
+    {
+      <span class="key">"match"</span>: {
+        <span class="key">"userMessage"</span>: <span class="str">"hello"</span>,
+        <span class="key">"model"</span>: <span class="str">"gpt-4"</span>
+      },
+      <span class="key">"response"</span>: {
+        <span class="key">"content"</span>: <span class="str">"Hello!"</span>
+      },
+      <span class="key">"latency"</span>: <span class="num">200</span>,
+      <span class="key">"chunkSize"</span>: <span class="num">10</span>
+    }
+  ]
+}</code></pre>
+        </div>
+
+        <h2>Match Fields</h2>
+
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Field</th>
+              <th>Type</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>userMessage</td>
+              <td>string | RegExp</td>
+              <td>Substring or regex match on the last user message</td>
+            </tr>
+            <tr>
+              <td>inputText</td>
+              <td>string | RegExp</td>
+              <td>Match on embedding input text</td>
+            </tr>
+            <tr>
+              <td>toolCallId</td>
+              <td>string</td>
+              <td>Match on tool_call_id in the last message</td>
+            </tr>
+            <tr>
+              <td>toolName</td>
+              <td>string</td>
+              <td>Match on tool function name</td>
+            </tr>
+            <tr>
+              <td>model</td>
+              <td>string | RegExp</td>
+              <td>Match on the requested model name</td>
+            </tr>
+            <tr>
+              <td>responseFormat</td>
+              <td>string</td>
+              <td>Match on response_format.type (e.g. "json_object")</td>
+            </tr>
+            <tr>
+              <td>sequenceIndex</td>
+              <td>number</td>
+              <td>Match on the Nth occurrence of this pattern</td>
+            </tr>
+            <tr>
+              <td>predicate</td>
+              <td>function</td>
+              <td>Custom function: (req) => boolean (programmatic only)</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Response Types</h2>
+
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Type</th>
+              <th>Fields</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>Text</td>
+              <td>content, role?, finishReason?</td>
+              <td>Plain text response</td>
+            </tr>
+            <tr>
+              <td>Tool Call</td>
+              <td>toolCalls[], finishReason?</td>
+              <td>Function call(s) with name + arguments</td>
+            </tr>
+            <tr>
+              <td>Error</td>
+              <td>error.message, error.type?, status?</td>
+              <td>Error response with HTTP status</td>
+            </tr>
+            <tr>
+              <td>Embedding</td>
+              <td>embedding[]</td>
+              <td>Vector of numbers</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Fixture Options</h2>
+
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Field</th>
+              <th>Type</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>latency</td>
+              <td>number</td>
+              <td>Milliseconds delay before first chunk</td>
+            </tr>
+            <tr>
+              <td>chunkSize</td>
+              <td>number</td>
+              <td>Characters per SSE chunk (streaming)</td>
+            </tr>
+            <tr>
+              <td>truncateAfterChunks</td>
+              <td>number</td>
+              <td>Abort stream after N chunks (error injection)</td>
+            </tr>
+            <tr>
+              <td>disconnectAfterMs</td>
+              <td>number</td>
+              <td>Disconnect after N ms (error injection)</td>
+            </tr>
+            <tr>
+              <td>streamingProfile</td>
+              <td>object</td>
+              <td>
+                Streaming physics profile: <code>{ ttftMs, tps, jitter }</code>. See
+                <a href="streaming-physics.html">Streaming Physics</a>
+              </td>
+            </tr>
+            <tr>
+              <td>chaos</td>
+              <td>object</td>
+              <td>
+                Per-fixture chaos config: <code>{ errorRate, latencyMs, ... }</code>. See
+                <a href="chaos-testing.html">Chaos Testing</a>
+              </td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Loading Fixtures</h2>
+
+        <h3>From a file</h3>
+        <div class="code-block">
+          <div class="code-block-header">load-file.ts <span class="lang-tag">ts</span></div>
+          <pre><code><span class="kw">const</span> <span class="op">mock</span> = <span class="kw">new</span> <span class="type">LLMock</span>();
+<span class="op">mock</span>.<span class="fn">loadFixtureFile</span>(<span class="str">"./fixtures/chat.json"</span>);
+<span class="op">mock</span>.<span class="fn">loadFixtureFile</span>(<span class="str">"./fixtures/tools.json"</span>);</code></pre>
+        </div>
+
+        <h3>From a directory</h3>
+        <div class="code-block">
+          <div class="code-block-header">load-dir.ts <span class="lang-tag">ts</span></div>
+          <pre><code><span class="cm">// Loads all .json files in the directory (non-recursive)</span>
+<span class="op">mock</span>.<span class="fn">loadFixtureDir</span>(<span class="str">"./fixtures"</span>);</code></pre>
+        </div>
+
+        <h3>Programmatically</h3>
+        <div class="code-block">
+          <div class="code-block-header">programmatic.ts <span class="lang-tag">ts</span></div>
+          <pre><code><span class="cm">// Shorthand methods</span>
+<span class="op">mock</span>.<span class="fn">onMessage</span>(<span class="str">"hello"</span>, { <span class="prop">content</span>: <span class="str">"Hi!"</span> });
+<span class="op">mock</span>.<span class="fn">onToolCall</span>(<span class="str">"get_weather"</span>, { <span class="prop">content</span>: <span class="str">"72F"</span> });
+<span class="op">mock</span>.<span class="fn">onEmbedding</span>(<span class="str">"my text"</span>, { <span class="prop">embedding</span>: [<span class="num">0.1</span>, <span class="num">0.2</span>] });
+<span class="op">mock</span>.<span class="fn">onJsonOutput</span>(<span class="str">"data"</span>, { <span class="prop">key</span>: <span class="str">"value"</span> });
+<span class="op">mock</span>.<span class="fn">onToolResult</span>(<span class="str">"call_123"</span>, { <span class="prop">content</span>: <span class="str">"Done"</span> });
+
+<span class="cm">// Full fixture object</span>
+<span class="op">mock</span>.<span class="fn">addFixture</span>({
+  <span class="prop">match</span>: { <span class="prop">userMessage</span>: <span class="str">"hello"</span>, <span class="prop">model</span>: <span class="str">"gpt-4"</span> },
+  <span class="prop">response</span>: { <span class="prop">content</span>: <span class="str">"Hi!"</span> },
+  <span class="prop">latency</span>: <span class="num">100</span>,
+  <span class="prop">chunkSize</span>: <span class="num">5</span>,
+});
+
+<span class="cm">// Predicate-based routing</span>
+<span class="op">mock</span>.<span class="fn">on</span>(
+  { <span class="prop">predicate</span>: (<span class="op">req</span>) <span class="kw">=&gt;</span> <span class="op">req</span>.<span class="prop">messages</span>.<span class="fn">at</span>(-<span class="num">1</span>)?.<span class="prop">role</span> === <span class="str">"tool"</span> },
+  { <span class="prop">content</span>: <span class="str">"Done!"</span> }
+);</code></pre>
+        </div>
+
+        <h2>Routing Rules</h2>
+        <ul>
+          <li>
+            <strong>First match wins</strong> &mdash; fixtures are checked in registration order
+          </li>
+          <li>
+            <strong>All match fields must pass</strong> &mdash; multiple match fields are AND-ed
+          </li>
+          <li>
+            <strong>Substring matching</strong> &mdash; <code>userMessage: "hello"</code> matches
+            <code>"say hello world"</code>
+          </li>
+          <li>
+            <strong>Cross-provider</strong> &mdash; the same fixtures work for OpenAI, Claude, and
+            Gemini requests
+          </li>
+        </ul>
+
+        <div class="info-box">
+          <p>
+            JSON files cannot use <code>predicate</code> (functions can't be serialized). Use
+            programmatic registration for predicate-based routing.
+          </p>
+        </div>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/gemini.html b/docs/gemini.html
new file mode 100644
index 0000000..b3beeb1
--- /dev/null
+++ b/docs/gemini.html
@@ -0,0 +1,232 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Gemini — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a
+          ><a href="gemini.html" class="active">Gemini</a
+          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
+          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
+          ><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a>
+          <a href="record-replay.html">Record &amp; Replay</a>
+          <a href="metrics.html">Prometheus Metrics</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a
+          ><a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>Google Gemini</h1>
+        <p class="lead">
+          llmock supports both <code>generateContent</code> (non-streaming) and
+          <code>streamGenerateContent</code> (SSE) endpoints, plus Gemini Live over WebSocket. The
+          same fixtures drive all three transports.
+        </p>
+
+        <h2>Endpoints</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Path</th>
+              <th>Format</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>POST</td>
+              <td>/v1beta/models/:model:generateContent</td>
+              <td>JSON</td>
+            </tr>
+            <tr>
+              <td>POST</td>
+              <td>/v1beta/models/:model:streamGenerateContent</td>
+              <td>SSE (data:)</td>
+            </tr>
+            <tr>
+              <td>WS</td>
+              <td>/ws/google.ai.generativelanguage.*</td>
+              <td>WebSocket JSON</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Unit Test: Streaming Text</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">gemini-text.test.ts <span class="lang-tag">ts</span></div>
+          <pre><code><span class="kw">const</span> <span class="op">textFixture</span> = {
+  <span class="prop">match</span>: { <span class="prop">userMessage</span>: <span class="str">"hello"</span> },
+  <span class="prop">response</span>: { <span class="prop">content</span>: <span class="str">"Hi there!"</span> },
+};
+
+<span class="kw">const</span> <span class="op">instance</span> = <span class="kw">await</span> <span class="fn">createServer</span>([<span class="op">textFixture</span>]);
+
+<span class="kw">const</span> <span class="op">res</span> = <span class="kw">await</span> <span class="fn">post</span>(
+  <span class="str">`${instance.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent?alt=sse`</span>,
+  {
+    <span class="prop">contents</span>: [{ <span class="prop">role</span>: <span class="str">"user"</span>, <span class="prop">parts</span>: [{ <span class="prop">text</span>: <span class="str">"hello"</span> }] }],
+  }
+);
+
+<span class="cm">// Parse Gemini SSE chunks</span>
+<span class="kw">const</span> <span class="op">chunks</span> = <span class="op">res</span>.<span class="prop">body</span>.<span class="fn">split</span>(<span class="str">"\n"</span>)
+  .<span class="fn">filter</span>(<span class="op">l</span> <span class="kw">=&gt;</span> <span class="op">l</span>.<span class="fn">startsWith</span>(<span class="str">"data: "</span>))
+  .<span class="fn">map</span>(<span class="op">l</span> <span class="kw">=&gt;</span> <span class="type">JSON</span>.<span class="fn">parse</span>(<span class="op">l</span>.<span class="fn">slice</span>(<span class="num">6</span>)));
+
+<span class="cm">// Gemini response shape</span>
+<span class="fn">expect</span>(<span class="op">chunks</span>[<span class="num">0</span>].<span class="prop">candidates</span>[<span class="num">0</span>].<span class="prop">content</span>.<span class="prop">parts</span>[<span class="num">0</span>].<span class="prop">text</span>).<span class="fn">toBeDefined</span>();
+
+<span class="cm">// Reassemble text</span>
+<span class="kw">const</span> <span class="op">text</span> = <span class="op">chunks</span>
+  .<span class="fn">map</span>(<span class="op">c</span> <span class="kw">=&gt;</span> <span class="op">c</span>.<span class="prop">candidates</span>[<span class="num">0</span>].<span class="prop">content</span>.<span class="prop">parts</span>[<span class="num">0</span>].<span class="prop">text</span> ?? <span class="str">""</span>)
+  .<span class="fn">join</span>(<span class="str">""</span>);
+<span class="fn">expect</span>(<span class="op">text</span>).<span class="fn">toBe</span>(<span class="str">"Hi there!"</span>);</code></pre>
+        </div>
+
+        <h2>Unit Test: Tool Call</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">gemini-tools.test.ts <span class="lang-tag">ts</span></div>
+          <pre><code><span class="kw">const</span> <span class="op">toolFixture</span> = {
+  <span class="prop">match</span>: { <span class="prop">userMessage</span>: <span class="str">"weather"</span> },
+  <span class="prop">response</span>: {
+    <span class="prop">toolCalls</span>: [{ <span class="prop">name</span>: <span class="str">"get_weather"</span>, <span class="prop">arguments</span>: <span class="str">'{"city":"NYC"}'</span> }]
+  },
+};
+
+<span class="kw">const</span> <span class="op">instance</span> = <span class="kw">await</span> <span class="fn">createServer</span>([<span class="op">toolFixture</span>]);
+
+<span class="kw">const</span> <span class="op">res</span> = <span class="kw">await</span> <span class="fn">post</span>(
+  <span class="str">`${instance.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent?alt=sse`</span>,
+  {
+    <span class="prop">contents</span>: [{ <span class="prop">role</span>: <span class="str">"user"</span>, <span class="prop">parts</span>: [{ <span class="prop">text</span>: <span class="str">"what is the weather?"</span> }] }],
+  }
+);
+
+<span class="kw">const</span> <span class="op">chunks</span> = <span class="fn">parseGeminiSSEChunks</span>(<span class="op">res</span>.<span class="prop">body</span>);
+<span class="kw">const</span> <span class="op">parts</span> = <span class="op">chunks</span>[<span class="num">0</span>].<span class="prop">candidates</span>[<span class="num">0</span>].<span class="prop">content</span>.<span class="prop">parts</span>;
+<span class="fn">expect</span>(<span class="op">parts</span>[<span class="num">0</span>].<span class="prop">functionCall</span>.<span class="prop">name</span>).<span class="fn">toBe</span>(<span class="str">"get_weather"</span>);</code></pre>
+        </div>
+
+        <h2>Request Translation</h2>
+        <p>
+          Gemini uses a different request format (<code>contents</code> with <code>parts</code>)
+          than OpenAI. llmock translates Gemini requests to the unified format via
+          <code>geminiToCompletionRequest()</code> so the same fixture
+          <code>match.userMessage</code> works regardless of which provider endpoint the request
+          arrives on.
+        </p>
+
+        <h2>Gemini Live (WebSocket)</h2>
+        <p>
+          Gemini Live uses WebSocket at <code>/ws/google.ai.generativelanguage.*</code> for
+          bidirectional streaming. See the <a href="websocket.html">WebSocket APIs</a> page for
+          details.
+        </p>
+
+        <div class="info-box">
+          <p>
+            Gemini Live text support is unverified against a real model &mdash; no text-capable
+            Gemini Live model existed at time of writing. The implementation follows the API
+            specification.
+          </p>
+        </div>
+
+        <h2>Vertex AI</h2>
+        <p>
+          Google Cloud's <strong>Vertex AI</strong> provides access to Gemini models through a
+          different URL pattern than the AI Studio API. llmock supports Vertex AI requests using the
+          same Gemini handler &mdash; the URL pattern is different, but the request and response
+          formats are identical.
+        </p>
+        <p>Vertex AI URLs follow the pattern:</p>
+        <div class="code-block">
+          <div class="code-block-header">
+            Vertex AI URL pattern <span class="lang-tag">text</span>
+          </div>
+          <pre><code>POST /v1/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent
+POST /v1/projects/{project}/locations/{location}/publishers/google/models/{model}:streamGenerateContent</code></pre>
+        </div>
+        <p>
+          The same fixtures work for both Gemini AI Studio and Vertex AI endpoints. See the
+          <a href="vertex-ai.html">Vertex AI</a> page for configuration details.
+        </p>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/index.html b/docs/index.html
index 39fbb43..bc2e99e 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -3,12 +3,14 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>mock-openai — Deterministic OpenAI mock server for testing</title>
+    <title>llmock — Deterministic mock LLM server for testing</title>
     <meta
       name="description"
-      content="Real HTTP server. Real SSE streams. Fixture-driven. Zero dependencies. Drop-in replacement for OpenAI in your test suite."
+      content="Real HTTP server. Real SSE streams. WebSocket APIs. Fixture-driven. Zero dependencies. Multi-provider mock — OpenAI, Claude, Gemini, AWS Bedrock, Azure, Ollama, Cohere, Vertex AI — drop-in replacement for your test suite."
     />
 
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+
     <!-- Fonts -->
     <link rel="preconnect" href="https://fonts.googleapis.com" />
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
@@ -431,7 +433,7 @@
 
       /* ─── Sections ───────────────────────────────────────────────── */
       section {
-        padding: 6rem 0;
+        padding: 3rem 0;
       }
 
       .section-label {
@@ -468,6 +470,10 @@
         margin-top: 3.5rem;
       }
 
+      .features-grid > .feature-card:last-child:nth-child(3n + 1) {
+        grid-column: 2;
+      }
+
       .feature-card {
         padding: 2rem;
         background: var(--bg-card);
@@ -632,18 +638,193 @@
         color: var(--warning);
       }
 
-      /* ─── Comparison Table ───────────────────────────────────────── */
-      .comparison {
+      /* ─── Reliability / Drift Detection ─────────────────────────── */
+      .triangle-wrapper {
+        position: relative;
+        width: 100%;
+        max-width: 600px;
+        margin: 3.5rem auto 1rem;
+        aspect-ratio: 1.3 / 1;
+      }
+      .triangle-wrapper svg {
+        position: absolute;
+        top: 0;
+        left: 0;
+        width: 100%;
+        height: 100%;
+        z-index: 0;
+      }
+      .tri-node {
+        position: absolute;
+        background: var(--bg-card);
+        border: 2px solid;
+        border-radius: 12px;
+        padding: 1rem 1.25rem;
+        text-align: center;
+        width: 170px;
+        z-index: 1;
+      }
+      .tri-node h3 {
+        font-size: 0.95rem;
+        font-weight: 600;
+        color: var(--text-primary);
+        margin-bottom: 0.3rem;
+      }
+      .tri-node p {
+        font-size: 0.75rem;
+        color: var(--text-secondary);
+        line-height: 1.4;
+      }
+      .tri-node .node-icon {
+        font-size: 1.5rem;
+        margin-bottom: 0.5rem;
+      }
+      .tri-node.sdk {
+        border-color: var(--blue);
+        top: 0;
+        left: 50%;
+        transform: translateX(-50%);
+      }
+      .tri-node.sdk .node-icon {
+        color: var(--blue);
+      }
+      .tri-node.real {
+        border-color: var(--accent);
+        bottom: 0;
+        left: 0;
+      }
+      .tri-node.real .node-icon {
+        color: var(--accent);
+      }
+      .tri-node.mock {
+        border-color: var(--purple);
+        bottom: 0;
+        right: 0;
+      }
+      .tri-node.mock .node-icon {
+        color: var(--purple);
+      }
+      .diagnosis-grid {
+        display: grid;
+        grid-template-columns: repeat(3, 1fr);
+        gap: 1rem;
+        margin-top: 2.5rem;
+      }
+      .diagnosis-card {
+        background: var(--bg-card);
+        border: 1px solid var(--border);
+        border-radius: 8px;
+        padding: 1rem 1.25rem;
+      }
+      .diagnosis-card .diag-header {
+        display: flex;
+        align-items: center;
+        gap: 0.5rem;
+        margin-bottom: 0.4rem;
+      }
+      .diagnosis-card .diag-dot {
+        width: 10px;
+        height: 10px;
+        border-radius: 50%;
+        flex-shrink: 0;
+      }
+      .diagnosis-card h4 {
+        font-size: 0.85rem;
+        font-weight: 600;
+        color: var(--text-primary);
+      }
+      .diagnosis-card p {
+        font-size: 0.78rem;
+        color: var(--text-secondary);
+        line-height: 1.5;
+      }
+      .drift-report {
+        background: var(--bg-deep);
+        border: 1px solid var(--border);
+        border-radius: 8px;
+        padding: 1.25rem 1.5rem;
+        margin-top: 2.5rem;
+        font-family: var(--font-mono);
+        font-size: 0.75rem;
+        line-height: 1.8;
+        color: var(--text-secondary);
+        overflow-x: auto;
+      }
+      .drift-report .report-header {
+        color: var(--text-primary);
+        font-weight: 600;
+        margin-bottom: 0.75rem;
+        font-size: 0.8rem;
+      }
+      .drift-report .severity-critical {
+        color: var(--error);
+      }
+      .drift-report .severity-warning {
+        color: var(--warning);
+      }
+      .drift-report .severity-ok {
+        color: var(--accent);
+      }
+      .drift-report .field-path {
+        color: var(--blue);
+      }
+      .drift-report .drift-label {
+        color: var(--text-primary);
+      }
+      .drift-report .report-summary {
+        color: var(--text-dim);
+      }
+      .drift-report .field-label {
+        color: var(--text-dim);
+      }
+      .drift-report .divider {
+        border-top: 1px solid var(--border);
+        margin: 0.6rem 0;
+      }
+      .ci-footer {
+        display: flex;
+        align-items: center;
+        gap: 1.5rem;
+        margin-top: 2rem;
+        padding-top: 1.5rem;
         border-top: 1px solid var(--border);
       }
+      .ci-badge {
+        display: inline-flex;
+        align-items: center;
+        gap: 0.5rem;
+        background: var(--bg-card);
+        border: 1px solid var(--border);
+        border-radius: 6px;
+        padding: 0.4rem 0.75rem;
+        font-size: 0.8rem;
+        color: var(--text-secondary);
+        font-family: var(--font-mono);
+        flex-shrink: 0;
+      }
+      .ci-badge .dot {
+        width: 8px;
+        height: 8px;
+        border-radius: 50%;
+        background: var(--accent);
+      }
+      .ci-text {
+        font-size: 0.9rem;
+        color: var(--text-secondary);
+        line-height: 1.6;
+      }
 
+      /* ─── Comparison Table ───────────────────────────────────────── */
+      .comparison-table-wrap {
+        margin-top: 3rem;
+      }
       .comparison-table {
         width: 100%;
-        margin-top: 3rem;
-        border-collapse: collapse;
+        border-collapse: separate;
+        border-spacing: 0;
         font-size: 0.9rem;
       }
-      .comparison-table th {
+      .comparison-table thead th {
         text-align: left;
         padding: 1rem 1.25rem;
         font-family: var(--font-mono);
@@ -653,10 +834,20 @@
         letter-spacing: 0.08em;
         border-bottom: 2px solid var(--border-bright);
         color: var(--text-secondary);
+        position: sticky;
+        top: 56px;
+        background: var(--bg-deep);
+        z-index: 10;
       }
-      .comparison-table th:nth-child(2) {
+      .comparison-table thead th:nth-child(2) {
         color: var(--accent);
       }
+      .comparison-table thead th a {
+        text-decoration: none;
+      }
+      .comparison-table thead th a:hover {
+        text-decoration: underline;
+      }
       .comparison-table td {
         padding: 0.85rem 1.25rem;
         border-bottom: 1px solid var(--border);
@@ -754,16 +945,6 @@
           transform: translateY(0);
         }
       }
-      @keyframes sseLine {
-        from {
-          opacity: 0;
-          transform: translateX(-8px);
-        }
-        to {
-          opacity: 1;
-          transform: translateX(0);
-        }
-      }
       @keyframes blink {
         50% {
           opacity: 0;
@@ -793,6 +974,11 @@
         opacity: 1;
         transform: translateY(0);
       }
+      /* Remove transform from comparison section so sticky headers work
+         (transform creates a new containing block that breaks sticky) */
+      .comparison.reveal.visible {
+        transform: none;
+      }
 
       /* ─── Responsive ─────────────────────────────────────────────── */
       @media (max-width: 900px) {
@@ -802,6 +988,9 @@
         .code-section {
           grid-template-columns: 1fr;
         }
+        .diagnosis-grid {
+          grid-template-columns: 1fr;
+        }
         .comparison-table {
           font-size: 0.8rem;
         }
@@ -823,6 +1012,10 @@
         .nav-links a:not(.gh-link) {
           display: none;
         }
+        .ci-footer {
+          flex-direction: column;
+          align-items: flex-start;
+        }
         footer .container {
           flex-direction: column;
           gap: 1.5rem;
@@ -839,13 +1032,15 @@
     <!-- ═══ Nav ═══════════════════════════════════════════════════════ -->
     <nav>
       <div class="container">
-        <a href="#" class="nav-brand"> <span class="prompt">$</span> mock-openai </a>
+        <a href="#" class="nav-brand"> <span class="prompt">$</span> llmock </a>
         <ul class="nav-links">
           <li><a href="#features">Features</a></li>
           <li><a href="#examples">Examples</a></li>
-          <li><a href="#comparison">vs MSW</a></li>
+          <li><a href="#comparison">Comparison</a></li>
+          <li><a href="#reliability">Reliability</a></li>
+          <li><a href="docs.html">Docs</a></li>
           <li>
-            <a href="https://github.com/CopilotKit/mock-openai" class="gh-link" target="_blank">
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank">
               <svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
                 <path
                   d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
@@ -863,22 +1058,29 @@
       <div class="container">
         <div class="hero-badge">
           <span class="dot"></span>
-          Zero dependencies &middot; Node.js builtins only
+          Zero dependencies &middot; Node.js builtins only &middot;
+          <a
+            href="https://www.npmjs.com/package/@copilotkit/llmock"
+            target="_blank"
+            style="display: inline-flex; align-items: center; vertical-align: middle"
+            ><img
+              src="https://img.shields.io/npm/v/@copilotkit/llmock?style=flat-square&color=e8e8f0&labelColor=2a2a3a&label=Version"
+              alt="npm version"
+              style="height: 16px"
+          /></a>
         </div>
 
-        <h1>Deterministic <span class="highlight">OpenAI mock</span> server for testing</h1>
+        <h1>Deterministic <span class="highlight">mock LLM</span> server for testing</h1>
 
         <p class="hero-sub">
-          Real HTTP server. Real SSE streams. Fixture-driven responses. Drop-in replacement for
-          OpenAI — any process on the machine can reach it.
+          Real HTTP server. Real SSE streams. WebSocket APIs. Fixture-driven responses.
+          Multi-provider mock — OpenAI, Claude, Gemini — any process on the machine can reach it.
         </p>
 
         <div class="hero-actions">
-          <a href="https://github.com/CopilotKit/mock-openai#readme" class="btn btn-primary">
-            Get Started
-          </a>
+          <a href="docs.html" class="btn btn-primary"> Get Started </a>
           <a
-            href="https://www.npmjs.com/package/@copilotkit/mock-openai"
+            href="https://www.npmjs.com/package/@copilotkit/llmock"
             class="btn btn-secondary"
             target="_blank"
           >
@@ -888,11 +1090,11 @@ <h1>Deterministic <span class="highlight">OpenAI mock</span> server for testing<
 
         <div class="hero-install">
           <span class="dollar">$</span>
-          <code>npm install @copilotkit/mock-openai</code>
+          <code>npm install @copilotkit/llmock</code>
           <button
             class="copy-btn"
             onclick="
-              navigator.clipboard.writeText('npm install @copilotkit/mock-openai');
+              navigator.clipboard.writeText('npm install @copilotkit/llmock');
               this.textContent = '✓';
               setTimeout(() => (this.textContent = '⧉'), 1500);
             "
@@ -943,11 +1145,12 @@ <h1>Deterministic <span class="highlight">OpenAI mock</span> server for testing<
     <!-- ═══ Features ══════════════════════════════════════════════════ -->
     <section id="features" class="reveal">
       <div class="container">
-        <span class="section-label">Why mock-openai</span>
-        <h2 class="section-title">Everything you need to test AI integrations</h2>
+        <span class="section-label">Why llmock</span>
+        <h2 class="section-title">Stop paying for flaky tests</h2>
         <p class="section-desc">
-          Built for E2E test suites where multiple processes — your app, agent workers, framework
-          runtimes — all need to hit the same mock endpoint.
+          Tests that hit real LLM APIs — OpenAI, Gemini, Anthropic — cost money, time out, and
+          produce non-deterministic results. llmock replaces those calls with immediate,
+          deterministic responses from a real HTTP server any process on the machine can reach.
         </p>
 
         <div class="features-grid">
@@ -963,8 +1166,8 @@ <h3>Real HTTP Server</h3>
             <div class="feature-icon blue">📡</div>
             <h3>Authentic SSE Streams</h3>
             <p>
-              Chat Completions API and Responses API — byte-for-byte identical to real OpenAI.
-              Streaming and non-streaming modes.
+              OpenAI, Claude, and Gemini APIs — authentic SSE format for each provider. Streaming
+              and non-streaming modes.
             </p>
           </div>
           <div class="feature-card">
@@ -999,6 +1202,46 @@ <h3>Request Journal</h3>
               history. HTTP and programmatic access.
             </p>
           </div>
+          <div class="feature-card">
+            <div class="feature-icon blue">🔌</div>
+            <h3>WebSocket APIs</h3>
+            <p>
+              OpenAI Responses, OpenAI Realtime, and Gemini Live over WebSocket. Same fixtures, real
+              RFC 6455 framing, zero dependencies. Text + tool calls.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon purple">🎛️</div>
+            <h3>Streaming Physics</h3>
+            <p>
+              Simulate realistic streaming timing with TTFT, TPS, and jitter. Test loading states
+              and streaming UX under real-world conditions.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon red">🎲</div>
+            <h3>Chaos Testing</h3>
+            <p>
+              Probabilistic failure injection &mdash; random errors, latency spikes, and stream
+              corruption for resilience testing.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon amber">📊</div>
+            <h3>Prometheus Metrics</h3>
+            <p>
+              Expose request counts, latencies, and fixture match rates via a /metrics endpoint.
+              Grafana-ready.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon green">🔴</div>
+            <h3>Record &amp; Replay</h3>
+            <p>
+              Proxy to real APIs, record responses as fixtures, then replay them deterministically
+              in tests.
+            </p>
+          </div>
         </div>
       </div>
     </section>
@@ -1138,9 +1381,9 @@ <h3>Predicate-based routing</h3>
               e2e/global-setup.ts
               <span class="lang-tag">ts</span>
             </div>
-            <pre><code><span class="kw">import</span> { <span class="type">MockOpenAI</span> } <span class="kw">from</span> <span class="str">"@copilotkit/mock-openai"</span>;
+            <pre><code><span class="kw">import</span> { <span class="type">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
 
-<span class="kw">const</span> <span class="op">mock</span> = <span class="kw">new</span> <span class="type">MockOpenAI</span>({ <span class="prop">port</span>: <span class="num">5555</span> });
+<span class="kw">const</span> <span class="op">mock</span> = <span class="kw">new</span> <span class="type">LLMock</span>({ <span class="prop">port</span>: <span class="num">5555</span> });
 
 <span class="cm">// Load JSON fixture files</span>
 <span class="op">mock</span>.<span class="fn">loadFixtureDir</span>(<span class="str">"./fixtures/openai"</span>);
@@ -1175,6 +1418,53 @@ <h3>E2E global setup</h3>
             </ul>
           </div>
         </div>
+
+        <!-- Example 5: WebSocket Realtime -->
+        <div class="code-section reveal">
+          <div class="text-side">
+            <h3>WebSocket APIs</h3>
+            <p>
+              Same fixtures work over WebSocket transport. OpenAI Responses, OpenAI Realtime, and
+              Gemini Live — RFC 6455 framing with zero dependencies.
+            </p>
+            <ul>
+              <li>OpenAI Responses API over WebSocket</li>
+              <li>OpenAI Realtime API — text + tool calls</li>
+              <li>
+                Gemini Live BidiGenerateContent (unverified — no text-capable model exists yet)
+              </li>
+              <li>No audio/video — text and tool call paths only</li>
+            </ul>
+          </div>
+          <div class="code-block">
+            <div class="code-block-header">
+              OpenAI Realtime over WebSocket
+              <span class="lang-tag">jsonc</span>
+            </div>
+            <pre><code><span class="cm">// Connect to ws://localhost:5555/v1/realtime</span>
+
+<span class="cm">// → Configure session:</span>
+{ <span class="key">"type"</span>: <span class="str">"session.update"</span>,
+  <span class="key">"session"</span>: { <span class="key">"modalities"</span>: [<span class="str">"text"</span>] } }
+
+<span class="cm">// → Add user message:</span>
+{ <span class="key">"type"</span>: <span class="str">"conversation.item.create"</span>,
+  <span class="key">"item"</span>: { <span class="key">"type"</span>: <span class="str">"message"</span>,
+    <span class="key">"role"</span>: <span class="str">"user"</span>,
+    <span class="key">"content"</span>: [{ <span class="key">"type"</span>: <span class="str">"input_text"</span>,
+      <span class="key">"text"</span>: <span class="str">"Hello"</span> }] } }
+
+<span class="cm">// → Request response:</span>
+{ <span class="key">"type"</span>: <span class="str">"response.create"</span> }
+
+<span class="cm">// ← Server streams back:</span>
+<span class="cm">// {"type":"response.created", ...}</span>
+<span class="cm">// {"type":"response.text.delta","delta":"Hi"}</span>
+<span class="cm">// {"type":"response.text.delta","delta":" there!"}</span>
+<span class="cm">// {"type":"response.text.done", ...}</span>
+<span class="cm">// {"type":"response.done", ...}</span></code></pre>
+          </div>
+        </div>
       </div>
     </section>
 
@@ -1182,17 +1472,16 @@ <h3>E2E global setup</h3>
     <section id="comparison" class="comparison reveal">
       <div class="container">
         <span class="section-label">Comparison</span>
-        <h2 class="section-title">mock-openai vs MSW</h2>
+        <h2 class="section-title">How llmock compares</h2>
         <p class="section-desc">
-          MSW is great for in-process API mocking. mock-openai is for when multiple processes need
-          to hit the same OpenAI endpoint.
+          llmock is purpose-built for LLM API testing. Here's how it stacks up against
+          general-purpose and LLM-specific mocking tools.
         </p>
 
         <div class="arch-diagram">
           <span class="dim">// MSW: only intercepts in the process that calls server.listen()</span
           ><br />
-          <span class="dim"
-            >// mock-openai: real server on a real port — any process can reach it</span
+          <span class="dim">// llmock: real server on a real port — any process can reach it</span
           ><br /><br />
           <span class="process">Playwright test runner</span><br />
           <span class="arrow">&nbsp;&nbsp;└─</span> <span class="process">controls browser</span>
@@ -1202,7 +1491,7 @@ <h2 class="section-title">mock-openai vs MSW</h2>
             >&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;└─</span
           >
           <span class="env">OPENAI_BASE_URL</span> <span class="arrow">→</span>
-          <span class="mock">mock-openai :5555</span><br />
+          <span class="mock">llmock :5555</span><br />
           <span class="arrow"
             >&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;├─</span
           >
@@ -1217,62 +1506,531 @@ <h2 class="section-title">mock-openai vs MSW</h2>
           <span class="process">CopilotKit runtime</span>
         </div>
 
-        <table class="comparison-table">
-          <thead>
-            <tr>
-              <th>Capability</th>
-              <th>mock-openai</th>
-              <th>MSW</th>
-            </tr>
-          </thead>
-          <tbody>
-            <tr>
-              <td>Cross-process interception</td>
-              <td class="yes">Real server ✓</td>
-              <td class="no">In-process only</td>
-            </tr>
-            <tr>
-              <td>Chat Completions SSE</td>
-              <td class="yes">Built-in ✓</td>
-              <td class="manual">Manual — build data/[DONE] yourself</td>
-            </tr>
-            <tr>
-              <td>Responses API SSE</td>
-              <td class="yes">Built-in ✓</td>
-              <td class="manual">Manual — MSW sse() uses wrong format</td>
-            </tr>
-            <tr>
-              <td>Fixture files (JSON)</td>
-              <td class="yes">Yes ✓</td>
-              <td class="no">No — handlers are code-only</td>
-            </tr>
-            <tr>
-              <td>Request journal</td>
-              <td class="yes">Yes ✓</td>
-              <td class="no">No — track manually</td>
-            </tr>
-            <tr>
-              <td>Non-streaming responses</td>
-              <td class="yes">Yes ✓</td>
-              <td class="yes">Yes ✓</td>
-            </tr>
-            <tr>
-              <td>Error injection (one-shot)</td>
-              <td class="yes">Yes ✓</td>
-              <td class="yes">Yes (server.use)</td>
-            </tr>
-            <tr>
-              <td>CLI server</td>
-              <td class="yes">Yes ✓</td>
-              <td class="no">No</td>
-            </tr>
-            <tr>
-              <td>Dependencies</td>
-              <td class="yes">Zero</td>
-              <td class="no">~300KB</td>
-            </tr>
-          </tbody>
-        </table>
+        <div class="comparison-table-wrap">
+          <table class="comparison-table">
+            <thead>
+              <tr>
+                <th>Capability</th>
+                <th>
+                  <a
+                    href="https://github.com/CopilotKit/llmock"
+                    target="_blank"
+                    style="color: var(--accent)"
+                    >llmock</a
+                  >
+                </th>
+                <th>
+                  <a href="https://mswjs.io/" target="_blank" style="color: var(--text-secondary)"
+                    >MSW</a
+                  >
+                </th>
+                <th>
+                  <a
+                    href="https://github.com/vidaiUK/VidaiMock"
+                    target="_blank"
+                    style="color: var(--text-secondary)"
+                    >VidaiMock</a
+                  >
+                </th>
+                <th>
+                  <a
+                    href="https://github.com/dwmkerr/mock-llm"
+                    target="_blank"
+                    style="color: var(--text-secondary)"
+                    >mock-llm</a
+                  >
+                </th>
+                <th>
+                  <a
+                    href="https://github.com/piyook/llm-mock"
+                    target="_blank"
+                    style="color: var(--text-secondary)"
+                    >piyook/llm-mock</a
+                  >
+                </th>
+              </tr>
+            </thead>
+            <tbody>
+              <tr>
+                <td>Cross-process interception</td>
+                <td class="yes">Real server ✓</td>
+                <td class="no">In-process only</td>
+                <td class="yes">Yes</td>
+                <td class="yes">Yes (Docker)</td>
+                <td class="yes">Yes</td>
+              </tr>
+              <tr>
+                <td>Chat Completions SSE</td>
+                <td class="yes">Built-in ✓</td>
+                <td class="manual">Manual</td>
+                <td class="yes">Yes</td>
+                <td class="yes">Yes</td>
+                <td class="yes">Yes</td>
+              </tr>
+              <tr>
+                <td>Responses API SSE</td>
+                <td class="yes">Built-in ✓</td>
+                <td class="manual">Manual</td>
+                <td class="yes">Yes</td>
+                <td class="yes">Yes</td>
+                <td class="yes">Yes</td>
+              </tr>
+              <tr>
+                <td>Claude Messages API</td>
+                <td class="yes">Built-in ✓</td>
+                <td class="manual">Manual</td>
+                <td class="yes">Yes</td>
+                <td class="no">No</td>
+                <td class="yes">Yes</td>
+              </tr>
+              <tr>
+                <td>Gemini streaming</td>
+                <td class="yes">Built-in ✓</td>
+                <td class="manual">Manual</td>
+                <td class="yes">Yes</td>
+                <td class="no">No</td>
+                <td class="yes">Yes</td>
+              </tr>
+              <tr>
+                <td>WebSocket APIs</td>
+                <td class="yes">Built-in ✓</td>
+                <td class="no">No</td>
+                <td class="no">No</td>
+                <td class="no">No</td>
+                <td class="no">No</td>
+              </tr>
+              <tr>
+                <td>Multi-provider support</td>
+                <td class="yes">
+                  OpenAI + Claude + Gemini + Bedrock + Azure + Ollama + Cohere + Vertex AI ✓
+                </td>
+                <td class="no">Manual</td>
+                <td class="yes">
+                  OpenAI + Claude + Gemini + Bedrock + Azure + Vertex + Cohere + more
+                </td>
+                <td class="manual">OpenAI only</td>
+                <td class="manual">OpenAI only</td>
+              </tr>
+              <tr>
+                <td>Embeddings API</td>
+                <td class="yes">Built-in ✓</td>
+                <td class="no">No</td>
+                <td class="yes">Yes</td>
+                <td class="no">No</td>
+                <td class="yes">Yes</td>
+              </tr>
+              <tr>
+                <td>Structured output / JSON mode</td>
+                <td class="yes">Built-in ✓</td>
+                <td class="no">Manual</td>
+                <td class="no">No</td>
+                <td class="no">No</td>
+                <td class="no">No</td>
+              </tr>
+              <tr>
+                <td>Sequential / stateful responses</td>
+                <td class="yes">Built-in ✓</td>
+                <td class="manual">Manual</td>
+                <td class="no">No</td>
+                <td class="yes">Yes</td>
+                <td class="no">No</td>
+              </tr>
+              <tr>
+                <td>Fixture files</td>
+                <td class="yes">JSON ✓</td>
+                <td class="no">Code-only</td>
+                <td class="manual">Tera templates</td>
+                <td class="manual">YAML config</td>
+                <td class="manual">JSON templates</td>
+              </tr>
+              <tr>
+                <td>Programmatic API (test helpers)</td>
+                <td class="yes">Yes (TypeScript/JS) ✓</td>
+                <td class="yes">Yes (TypeScript/JS)</td>
+                <td class="no">No (binary only)</td>
+                <td class="no">No</td>
+                <td class="no">No</td>
+              </tr>
+              <tr>
+                <td>Request journal</td>
+                <td class="yes">Yes ✓</td>
+                <td class="no">Manual</td>
+                <td class="no">No</td>
+                <td class="no">No</td>
+                <td class="yes">Yes</td>
+              </tr>
+              <tr>
+                <td>Error injection (one-shot)</td>
+                <td class="yes">Yes ✓</td>
+                <td class="yes">Yes</td>
+                <td class="manual">Partial</td>
+                <td class="no">No</td>
+                <td class="no">No</td>
+              </tr>
+              <tr>
+                <td>Docker image</td>
+                <td class="yes">Yes ✓</td>
+                <td class="no">No</td>
+                <td class="yes">Yes</td>
+                <td class="yes">Yes</td>
+                <td class="yes">Yes</td>
+              </tr>
+              <tr>
+                <td>Helm chart</td>
+                <td class="yes">Yes ✓</td>
+                <td class="no">No</td>
+                <td class="no">No</td>
+                <td class="yes">Yes</td>
+                <td class="no">No</td>
+              </tr>
+              <tr>
+                <td>Drift detection</td>
+                <td class="yes">Yes ✓</td>
+                <td class="no">No</td>
+                <td class="no">No</td>
+                <td class="no">No</td>
+                <td class="no">No</td>
+              </tr>
+              <tr>
+                <td>Azure OpenAI</td>
+                <td class="yes">Yes ✓</td>
+                <td class="manual">Manual</td>
+                <td class="yes">Yes</td>
+                <td class="yes">Yes</td>
+                <td class="no">No</td>
+              </tr>
+              <tr>
+                <td>AWS Bedrock</td>
+                <td class="yes">Yes (streaming + Converse) ✓</td>
+                <td class="manual">Manual</td>
+                <td class="yes">Yes</td>
+                <td class="no">No</td>
+                <td class="no">No</td>
+              </tr>
+              <tr>
+                <td>CLI server</td>
+                <td class="yes">Yes ✓</td>
+                <td class="no">No</td>
+                <td class="yes">Yes</td>
+                <td class="yes">Yes</td>
+                <td class="yes">Yes</td>
+              </tr>
+              <tr>
+                <td>GET /v1/models</td>
+                <td class="yes">Yes ✓</td>
+                <td class="no">No</td>
+                <td class="no">No</td>
+                <td class="yes">Yes</td>
+                <td class="no">No</td>
+              </tr>
+              <tr>
+                <td>Chaos testing</td>
+                <td class="yes">Built-in ✓</td>
+                <td class="no">No</td>
+                <td class="yes">Yes</td>
+                <td class="no">No</td>
+                <td class="no">No</td>
+              </tr>
+              <tr>
+                <td>Record &amp; replay</td>
+                <td class="yes">Built-in ✓</td>
+                <td class="no">No</td>
+                <td class="no">No</td>
+                <td class="no">No</td>
+                <td class="no">No</td>
+              </tr>
+              <tr>
+                <td>Prometheus metrics</td>
+                <td class="yes">Built-in ✓</td>
+                <td class="no">No</td>
+                <td class="yes">Yes</td>
+                <td class="no">No</td>
+                <td class="no">No</td>
+              </tr>
+              <tr>
+                <td>Streaming physics</td>
+                <td class="yes">Built-in ✓</td>
+                <td class="no">No</td>
+                <td class="yes">Yes</td>
+                <td class="no">No</td>
+                <td class="no">No</td>
+              </tr>
+              <tr>
+                <td>Dependencies</td>
+                <td class="yes">Zero</td>
+                <td class="no">~300KB</td>
+                <td class="yes">Zero (Rust binary)</td>
+                <td class="no">Node.js + Express</td>
+                <td class="manual">Minimal</td>
+              </tr>
+            </tbody>
+          </table>
+        </div>
+      </div>
+    </section>
+
+    <!-- ═══ Reliability / Drift Detection ═══════════════════════════ -->
+    <section id="reliability" class="reveal">
+      <div class="container">
+        <span class="section-label">Reliability</span>
+        <h2 class="section-title">Verified against real APIs. Every day.</h2>
+        <p class="section-desc">
+          A mock that doesn't match reality is worse than no mock &mdash; your tests pass, but
+          production breaks. llmock runs three-way drift detection that compares SDK types, real API
+          responses, and mock output to catch shape mismatches before you do.
+        </p>
+
+        <!-- Triangle diagram -->
+        <div class="triangle-wrapper">
+          <svg viewBox="0 0 600 420" fill="none" xmlns="http://www.w3.org/2000/svg">
+            <!-- SDK → Real (left edge) -->
+            <line
+              x1="245"
+              y1="105"
+              x2="130"
+              y2="280"
+              stroke="var(--border)"
+              stroke-width="1.5"
+              stroke-dasharray="6 4"
+            />
+            <polygon points="127,274 137,278 133,286" fill="var(--border)" />
+            <!-- SDK → Mock (right edge) -->
+            <line
+              x1="355"
+              y1="105"
+              x2="470"
+              y2="280"
+              stroke="var(--border)"
+              stroke-width="1.5"
+              stroke-dasharray="6 4"
+            />
+            <polygon points="473,274 463,278 467,286" fill="var(--border)" />
+            <!-- Real ↔ Mock (bottom edge) -->
+            <line
+              x1="195"
+              y1="355"
+              x2="405"
+              y2="355"
+              stroke="var(--border)"
+              stroke-width="1.5"
+              stroke-dasharray="6 4"
+            />
+            <polygon points="200,349 190,355 200,361" fill="var(--border)" />
+            <polygon points="400,349 410,355 400,361" fill="var(--border)" />
+            <!-- Edge labels (horizontal, centered on each line) -->
+            <rect x="131" y="182" width="85" height="20" rx="4" fill="var(--bg-deep)" />
+            <text
+              x="173"
+              y="196"
+              text-anchor="middle"
+              fill="var(--text-dim)"
+              font-family="JetBrains Mono, SF Mono, Fira Code, monospace"
+              font-size="11"
+            >
+              SDK = Real?
+            </text>
+            <rect x="360" y="182" width="90" height="20" rx="4" fill="var(--bg-deep)" />
+            <text
+              x="405"
+              y="196"
+              text-anchor="middle"
+              fill="var(--text-dim)"
+              font-family="JetBrains Mono, SF Mono, Fira Code, monospace"
+              font-size="11"
+            >
+              SDK = Mock?
+            </text>
+            <rect x="255" y="338" width="90" height="20" rx="4" fill="var(--bg-deep)" />
+            <text
+              x="300"
+              y="352"
+              text-anchor="middle"
+              fill="var(--text-dim)"
+              font-family="JetBrains Mono, SF Mono, Fira Code, monospace"
+              font-size="11"
+            >
+              Real = Mock?
+            </text>
+          </svg>
+          <div class="tri-node sdk">
+            <div class="node-icon">{ }</div>
+            <h3>SDK Types</h3>
+            <p>What TypeScript types say the shape should be</p>
+          </div>
+          <div class="tri-node real">
+            <div class="node-icon">&#8644;</div>
+            <h3>Real API</h3>
+            <p>What OpenAI, Claude, Gemini actually return</p>
+          </div>
+          <div class="tri-node mock">
+            <div class="node-icon">&#9881;</div>
+            <h3>llmock</h3>
+            <p>What the mock produces for the same request</p>
+          </div>
+        </div>
+
+        <!-- Diagnosis cards -->
+        <div class="diagnosis-grid">
+          <div class="diagnosis-card">
+            <div class="diag-header">
+              <div class="diag-dot" style="background: var(--error)"></div>
+              <h4>Mock doesn't match real</h4>
+            </div>
+            <p>
+              llmock needs updating &mdash; test fails immediately. The SDK comparison tells us why
+              it drifted.
+            </p>
+          </div>
+          <div class="diagnosis-card">
+            <div class="diag-header">
+              <div class="diag-dot" style="background: var(--warning)"></div>
+              <h4>Provider changed, SDK is behind</h4>
+            </div>
+            <p>
+              Early warning &mdash; the real API has new fields that neither the SDK nor llmock know
+              about yet.
+            </p>
+          </div>
+          <div class="diagnosis-card">
+            <div class="diag-header">
+              <div class="diag-dot" style="background: var(--accent)"></div>
+              <h4>All three agree</h4>
+            </div>
+            <p>No drift &mdash; the mock matches reality and the SDK types are current.</p>
+          </div>
+        </div>
+
+        <!-- Drift report snippet -->
+        <div class="drift-report">
+          <div class="report-header">$ pnpm test:drift</div>
+          <span class="severity-critical">[critical]</span>
+          <span class="drift-label">LLMOCK DRIFT</span> &mdash; field in SDK + real API but missing
+          from mock<br />
+          <span class="field-label">Path:</span>&nbsp;&nbsp;&nbsp;&nbsp;<span class="field-path"
+            >choices[].message.refusal</span
+          ><br />
+          <span class="field-label">SDK:</span>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;null &nbsp;&nbsp;
+          <span class="field-label">Real:</span> null &nbsp;&nbsp;
+          <span class="field-label">Mock:</span> &lt;absent&gt;<br />
+          <div class="divider"></div>
+          <span class="severity-critical">[critical]</span>
+          <span class="drift-label">TYPE MISMATCH</span> &mdash; real API and mock disagree on
+          type<br />
+          <span class="field-label">Path:</span>&nbsp;&nbsp;&nbsp;&nbsp;<span class="field-path"
+            >content[].input</span
+          ><br />
+          <span class="field-label">SDK:</span>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;object &nbsp;&nbsp;
+          <span class="field-label">Real:</span> object &nbsp;&nbsp;
+          <span class="field-label">Mock:</span> string<br />
+          <div class="divider"></div>
+          <span class="severity-warning">[warning]</span>
+          <span class="drift-label">PROVIDER ADDED FIELD</span> &mdash; in real API but not in SDK
+          or mock<br />
+          <span class="field-label">Path:</span>&nbsp;&nbsp;&nbsp;&nbsp;<span class="field-path"
+            >choices[].message.annotations</span
+          ><br />
+          <span class="field-label">SDK:</span>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&lt;absent&gt;
+          &nbsp;&nbsp; <span class="field-label">Real:</span> array &nbsp;&nbsp;
+          <span class="field-label">Mock:</span> &lt;absent&gt;<br />
+          <div class="divider"></div>
+          <span class="severity-ok" style="font-size: 0.85rem">&#10003;</span>
+          <span class="report-summary"
+            >2 critical (test fails) &middot; 1 warning (logged) &middot; detected before any user
+            reported it</span
+          >
+        </div>
+
+        <!-- CI footer -->
+        <div class="ci-footer">
+          <div class="ci-badge"><span class="dot"></span> Daily CI</div>
+          <span class="ci-text">Drift tests across 4 providers run automatically every day.</span>
+        </div>
+      </div>
+    </section>
+
+    <!-- ═══ Claude Code Integration ═══════════════════════════════════ -->
+    <section id="claude-code" class="reveal">
+      <div class="container">
+        <span class="section-label">AI-Assisted Development</span>
+        <h2 class="section-title">Claude Code Integration</h2>
+        <p class="section-desc">
+          llmock ships with a
+          <a href="https://docs.anthropic.com/en/docs/claude-code" target="_blank">Claude Code</a>
+          skill that teaches your AI assistant how to write fixtures correctly &mdash; match fields,
+          response types, agent loop patterns, gotchas, and debugging techniques.
+        </p>
+
+        <div class="features-grid" style="grid-template-columns: repeat(2, 1fr)">
+          <div class="feature-card">
+            <div class="feature-icon green">🔌</div>
+            <h3>Plugin Install</h3>
+            <p>
+              <code>/plugin marketplace add CopilotKit/llmock</code><br />
+              <code>/plugin install llmock@copilotkit-tools</code>
+            </p>
+            <p style="margin-top: 0.5rem; color: var(--text-secondary)">
+              Skill appears as <code>/llmock:write-fixtures</code>
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon blue">📂</div>
+            <h3>Local Plugin</h3>
+            <p>
+              <code>claude --plugin-dir ./node_modules/@copilotkit/llmock</code>
+            </p>
+            <p style="margin-top: 0.5rem; color: var(--text-secondary)">
+              Same result, no marketplace needed
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon purple">📁</div>
+            <h3>Add Directory</h3>
+            <p>
+              <code>claude --add-dir ./node_modules/@copilotkit/llmock</code>
+            </p>
+            <p style="margin-top: 0.5rem; color: var(--text-secondary)">
+              Skill appears as <code>/write-fixtures</code> for the session
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon amber">📋</div>
+            <h3>Copy to Project</h3>
+            <p>
+              <code
+                >cp node_modules/@copilotkit/llmock/.claude/commands/write-fixtures.md
+                .claude/commands/</code
+              >
+            </p>
+            <p style="margin-top: 0.5rem; color: var(--text-secondary)">
+              Permanent <code>/write-fixtures</code> &mdash; commit to share with team
+            </p>
+          </div>
+        </div>
+      </div>
+    </section>
+
+    <!-- ═══ Real-World Usage ═════════════════════════════════════════ -->
+    <section class="reveal">
+      <div class="container">
+        <h2 class="section-title">Real-World Usage</h2>
+        <p class="section-desc">
+          <a href="https://github.com/CopilotKit/CopilotKit" target="_blank">CopilotKit</a> uses
+          llmock across its test suite to verify AI agent behavior across multiple LLM providers
+          without hitting real APIs. The tests cover streaming text, tool calls, and multi-turn
+          conversations across both v1 and v2 runtimes. See the
+          <a
+            href="https://github.com/CopilotKit/CopilotKit/search?q=llmock&amp;type=code"
+            target="_blank"
+            >test suite</a
+          >
+          and
+          <a
+            href="https://github.com/CopilotKit/CopilotKit/search?q=fixtures+path%3A**%2Ffixtures&amp;type=code"
+            target="_blank"
+            >fixture files</a
+          >
+          for real-world examples.
+        </p>
       </div>
     </section>
 
@@ -1280,19 +2038,19 @@ <h2 class="section-title">mock-openai vs MSW</h2>
     <footer>
       <div class="container">
         <div class="footer-left">
-          <span>$</span> mock-openai &middot; MIT License &middot; Built by
+          <span>$</span> llmock &middot; MIT License &middot; Built by
           <a href="https://github.com/CopilotKit" target="_blank">CopilotKit</a>
         </div>
         <ul class="footer-links">
-          <li><a href="https://github.com/CopilotKit/mock-openai" target="_blank">GitHub</a></li>
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
           <li>
-            <a href="https://www.npmjs.com/package/@copilotkit/mock-openai" target="_blank">npm</a>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
           </li>
           <li>
-            <a href="https://github.com/CopilotKit/mock-openai#readme" target="_blank">Docs</a>
+            <a href="docs.html">Docs</a>
           </li>
           <li>
-            <a href="https://github.com/CopilotKit/mock-openai/issues" target="_blank">Issues</a>
+            <a href="https://github.com/CopilotKit/llmock/issues" target="_blank">Issues</a>
           </li>
         </ul>
       </div>
@@ -1318,7 +2076,7 @@ <h2 class="section-title">mock-openai vs MSW</h2>
       // ── Terminal demo animation ──────────────────────────────────────
       const termSteps = [
         // Step 1: User types command
-        { type: "prompt", text: "mock-openai -p 5555 -f ./fixtures", delay: 600 },
+        { type: "prompt", text: "llmock -p 5555 -f ./fixtures", delay: 600 },
         // Step 2: Server starts
         {
           type: "line",
diff --git a/docs/metrics.html b/docs/metrics.html
new file mode 100644
index 0000000..71235c6
--- /dev/null
+++ b/docs/metrics.html
@@ -0,0 +1,284 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Prometheus Metrics — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
+          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
+          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
+          ><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a
+          ><a href="record-replay.html">Record &amp; Replay</a
+          ><a href="metrics.html" class="active">Prometheus Metrics</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a
+          ><a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>Prometheus Metrics</h1>
+        <p class="lead">
+          llmock exposes Prometheus-compatible metrics via <code>GET /metrics</code>. Opt-in with
+          <code>--metrics</code>. Zero external dependencies &mdash; implements counters,
+          histograms, and gauges with Prometheus text exposition format serialization.
+        </p>
+
+        <h2>Endpoint</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Path</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>GET</td>
+              <td>/metrics</td>
+              <td>Prometheus text exposition format metrics</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Quick Start</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">Enable metrics <span class="lang-tag">bash</span></div>
+          <pre><code>npx llmock --fixtures ./fixtures --metrics</code></pre>
+        </div>
+
+        <div class="code-block">
+          <div class="code-block-header">Scrape metrics <span class="lang-tag">bash</span></div>
+          <pre><code>curl http://localhost:4010/metrics</code></pre>
+        </div>
+
+        <h2>Available Metrics</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Metric</th>
+              <th>Type</th>
+              <th>Labels</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>llmock_requests_total</code></td>
+              <td>Counter</td>
+              <td><code>method</code>, <code>path</code>, <code>status</code></td>
+              <td>Total number of requests handled</td>
+            </tr>
+            <tr>
+              <td><code>llmock_request_duration_seconds</code></td>
+              <td>Histogram</td>
+              <td><code>method</code>, <code>path</code></td>
+              <td>Request duration in seconds</td>
+            </tr>
+            <tr>
+              <td><code>llmock_fixtures_loaded</code></td>
+              <td>Gauge</td>
+              <td>&mdash;</td>
+              <td>Number of fixtures currently loaded</td>
+            </tr>
+            <tr>
+              <td><code>llmock_chaos_triggered_total</code></td>
+              <td>Counter</td>
+              <td><code>action</code></td>
+              <td>
+                Number of chaos events triggered (action: <code>drop</code>, <code>malformed</code>,
+                <code>disconnect</code>)
+              </td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Path Normalization</h2>
+        <p>
+          Dynamic path segments are normalized to placeholders in metric labels to prevent high
+          cardinality. The normalization rules:
+        </p>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Provider</th>
+              <th>Raw Path</th>
+              <th>Normalized Label</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>Bedrock</td>
+              <td><code>/model/anthropic.claude-v2/invoke</code></td>
+              <td><code>/model/{modelId}/invoke</code></td>
+            </tr>
+            <tr>
+              <td>Gemini</td>
+              <td><code>/v1beta/models/gemini-pro:generateContent</code></td>
+              <td><code>/v1beta/models/{model}:generateContent</code></td>
+            </tr>
+            <tr>
+              <td>Azure</td>
+              <td><code>/openai/deployments/gpt4/chat/completions</code></td>
+              <td><code>/openai/deployments/{id}/chat/completions</code></td>
+            </tr>
+            <tr>
+              <td>Vertex AI</td>
+              <td>
+                <code
+                  >/v1/projects/my-proj/locations/us-c1/publishers/google/models/gemini-pro:generateContent</code
+                >
+              </td>
+              <td>
+                <code
+                  >/v1/projects/{p}/locations/{l}/publishers/google/models/{m}:generateContent</code
+                >
+              </td>
+            </tr>
+            <tr>
+              <td>Others</td>
+              <td><code>/v1/chat/completions</code></td>
+              <td><code>/v1/chat/completions</code> (unchanged)</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Output Format</h2>
+        <p>
+          The <code>GET /metrics</code> endpoint returns Prometheus text exposition format. Example
+          output:
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            Example /metrics response <span class="lang-tag">text</span>
+          </div>
+          <pre><code># TYPE llmock_requests_total counter
+llmock_requests_total{method="POST",path="/v1/chat/completions",status="200"} 42
+llmock_requests_total{method="POST",path="/v1/messages",status="200"} 15
+
+# TYPE llmock_request_duration_seconds histogram
+llmock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="0.005"} 0
+llmock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="0.01"} 5
+llmock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="0.025"} 20
+llmock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="0.05"} 35
+llmock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="0.1"} 40
+llmock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="0.25"} 42
+llmock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="0.5"} 42
+llmock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="1"} 42
+llmock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="2.5"} 42
+llmock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="5"} 42
+llmock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="10"} 42
+llmock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="+Inf"} 42
+llmock_request_duration_seconds_sum{method="POST",path="/v1/chat/completions"} 1.234
+llmock_request_duration_seconds_count{method="POST",path="/v1/chat/completions"} 42
+
+# TYPE llmock_fixtures_loaded gauge
+llmock_fixtures_loaded{} 12
+
+# TYPE llmock_chaos_triggered_total counter
+llmock_chaos_triggered_total{action="drop"} 3
+llmock_chaos_triggered_total{action="malformed"} 1</code></pre>
+        </div>
+
+        <h2>Histogram Buckets</h2>
+        <p>Duration histograms use Prometheus-style bucket boundaries (in seconds):</p>
+        <pre><code>0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10</code></pre>
+
+        <h2>Implementation Details</h2>
+        <ul>
+          <li>
+            <strong>Zero dependencies.</strong> The metrics registry is implemented from scratch
+            &mdash; no <code>prom-client</code> or other libraries required.
+          </li>
+          <li>
+            <strong>Three metric types:</strong> counters (monotonically increasing), histograms
+            (cumulative buckets with sum and count), and gauges (arbitrary values).
+          </li>
+          <li>
+            <strong>Label escaping.</strong> Label values are escaped per Prometheus text exposition
+            format: backslashes, double quotes, and newlines.
+          </li>
+          <li>
+            <strong>Stable output.</strong> Metrics are serialized in insertion order for
+            deterministic output.
+          </li>
+        </ul>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/ollama.html b/docs/ollama.html
new file mode 100644
index 0000000..7251f3c
--- /dev/null
+++ b/docs/ollama.html
@@ -0,0 +1,302 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Ollama — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
+          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
+          ><a href="ollama.html" class="active">Ollama</a><a href="cohere.html">Cohere</a
+          ><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a
+          ><a href="record-replay.html">Record &amp; Replay</a
+          ><a href="metrics.html">Prometheus Metrics</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a
+          ><a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>Ollama</h1>
+        <p class="lead">
+          llmock implements Ollama's native <code>/api/chat</code>, <code>/api/generate</code>, and
+          <code>/api/tags</code> endpoints with NDJSON streaming, matching Ollama's wire format
+          including its key differences from OpenAI.
+        </p>
+
+        <h2>Endpoints</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Path</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>POST</td>
+              <td>/api/chat</td>
+              <td>Chat completions (multi-turn, tool calls)</td>
+            </tr>
+            <tr>
+              <td>POST</td>
+              <td>/api/generate</td>
+              <td>Single-prompt text generation (no tool calls)</td>
+            </tr>
+            <tr>
+              <td>GET</td>
+              <td>/api/tags</td>
+              <td>List available models (derived from fixtures)</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Key Differences from OpenAI</h2>
+        <ul>
+          <li>
+            <strong>Defaults to streaming.</strong> Ollama treats <code>stream</code> as
+            <code>true</code> when absent &mdash; the opposite of OpenAI. Set
+            <code>"stream": false</code> explicitly for non-streaming responses.
+          </li>
+          <li>
+            <strong>NDJSON, not SSE.</strong> Streaming uses newline-delimited JSON, not Server-Sent
+            Events.
+          </li>
+          <li>
+            <strong>Tool call arguments are objects.</strong> Unlike OpenAI which sends stringified
+            JSON, Ollama sends parsed objects in <code>arguments</code>.
+          </li>
+          <li>
+            <strong>No tool call IDs.</strong> Ollama tool calls have no <code>id</code> field.
+          </li>
+          <li>
+            <strong>Duration metadata.</strong> Responses include <code>done_reason</code>,
+            <code>total_duration</code>, <code>eval_count</code>, etc. on the final chunk. llmock
+            sends zeroed values.
+          </li>
+        </ul>
+
+        <h2>Quick Start</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            ollama-quick-start.ts <span class="lang-tag">ts</span>
+          </div>
+          <pre><code><span class="kw">import</span> { <span class="op">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
+
+<span class="kw">const</span> <span class="op">mock</span> = <span class="kw">new</span> <span class="fn">LLMock</span>();
+<span class="op">mock</span>.<span class="fn">onMessage</span>(<span class="str">"hello"</span>, { <span class="prop">content</span>: <span class="str">"Hi from Ollama!"</span> });
+<span class="kw">await</span> <span class="op">mock</span>.<span class="fn">start</span>();
+
+<span class="cmt">// Point the Ollama SDK at llmock</span>
+<span class="kw">const</span> <span class="op">res</span> = <span class="kw">await</span> <span class="fn">fetch</span>(<span class="str">`${mock.url}/api/chat`</span>, {
+  <span class="prop">method</span>: <span class="str">"POST"</span>,
+  <span class="prop">body</span>: <span class="fn">JSON.stringify</span>({
+    <span class="prop">model</span>: <span class="str">"llama3"</span>,
+    <span class="prop">messages</span>: [{ <span class="prop">role</span>: <span class="str">"user"</span>, <span class="prop">content</span>: <span class="str">"hello"</span> }],
+    <span class="prop">stream</span>: <span class="kw">false</span>,
+  }),
+});</code></pre>
+        </div>
+
+        <h2>Streaming Response Format (NDJSON)</h2>
+        <p>
+          When <code>stream</code> is <code>true</code> (the default), each line is a complete JSON
+          object separated by newlines:
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            /api/chat streaming output <span class="lang-tag">ndjson</span>
+          </div>
+          <pre><code>{"model":"llama3","message":{"role":"assistant","content":"Hi"},"done":false}
+{"model":"llama3","message":{"role":"assistant","content":" there"},"done":false}
+{"model":"llama3","message":{"role":"assistant","content":""},"done":true,"done_reason":"stop","total_duration":0,"load_duration":0,"prompt_eval_count":0,"prompt_eval_duration":0,"eval_count":0,"eval_duration":0}</code></pre>
+        </div>
+
+        <h2>Non-Streaming Response</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            /api/chat non-streaming output <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  "model": "llama3",
+  "message": { "role": "assistant", "content": "Hi there!" },
+  "done": true,
+  "done_reason": "stop",
+  "total_duration": 0,
+  "load_duration": 0,
+  "prompt_eval_count": 0,
+  "prompt_eval_duration": 0,
+  "eval_count": 0,
+  "eval_duration": 0
+}</code></pre>
+        </div>
+
+        <h2>Tool Calls</h2>
+        <p>
+          Tool calls in Ollama send <code>arguments</code> as a parsed object (not a JSON string).
+          llmock automatically converts fixture <code>arguments</code> strings into objects for the
+          Ollama wire format.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            ollama-tool-call-fixture.json <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  "fixtures": [
+    {
+      "match": { "userMessage": "weather" },
+      "response": {
+        "toolCalls": [
+          { "name": "get_weather", "arguments": "{\"city\":\"NYC\"}" }
+        ]
+      }
+    }
+  ]
+}</code></pre>
+        </div>
+
+        <p>The Ollama streaming response wraps tool calls in a single chunk:</p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            Tool call NDJSON output <span class="lang-tag">ndjson</span>
+          </div>
+          <pre><code>{"model":"llama3","message":{"role":"assistant","content":"","tool_calls":[{"function":{"name":"get_weather","arguments":{"city":"NYC"}}}]},"done":false}
+{"model":"llama3","message":{"role":"assistant","content":""},"done":true,"done_reason":"stop","total_duration":0,"load_duration":0,"prompt_eval_count":0,"prompt_eval_duration":0,"eval_count":0,"eval_duration":0}</code></pre>
+        </div>
+
+        <h2>/api/generate Endpoint</h2>
+        <p>
+          The <code>/api/generate</code> endpoint takes a <code>prompt</code> string instead of a
+          <code>messages</code> array. The prompt is internally converted to a single user message
+          for fixture matching. Only text responses are supported (no tool calls).
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            /api/generate request <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  "model": "llama3",
+  "prompt": "Tell me a joke",
+  "stream": false
+}</code></pre>
+        </div>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            /api/generate response <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  "model": "llama3",
+  "created_at": "2025-01-01T00:00:00.000Z",
+  "response": "Why did the chicken cross the road?",
+  "done": true,
+  "done_reason": "stop",
+  "total_duration": 0,
+  "load_duration": 0,
+  "prompt_eval_count": 0,
+  "prompt_eval_duration": 0,
+  "eval_count": 0,
+  "eval_duration": 0,
+  "context": []
+}</code></pre>
+        </div>
+
+        <h2>/api/tags Endpoint</h2>
+        <p>
+          <code>GET /api/tags</code> returns a list of available models, derived from the
+          <code>model</code> fields across all loaded fixtures. This lets Ollama clients discover
+          which models the mock server supports.
+        </p>
+
+        <h2>Request Translation</h2>
+        <p>
+          llmock internally translates Ollama requests to a unified
+          <code>ChatCompletionRequest</code> format for fixture matching. The
+          <code>ollamaToCompletionRequest()</code> function maps Ollama's
+          <code>options.temperature</code> to <code>temperature</code> and
+          <code>options.num_predict</code> to <code>max_tokens</code>, so the same fixtures work
+          across all providers.
+        </p>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/record-replay.html b/docs/record-replay.html
new file mode 100644
index 0000000..04bda7f
--- /dev/null
+++ b/docs/record-replay.html
@@ -0,0 +1,381 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Record &amp; Replay — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
+          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
+          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
+          ><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a
+          ><a href="record-replay.html" class="active">Record &amp; Replay</a
+          ><a href="metrics.html">Prometheus Metrics</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a
+          ><a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>Record &amp; Replay</h1>
+        <p class="lead">
+          VCR-style record-and-replay support. When a request doesn't match any fixture, llmock
+          proxies it to the real upstream provider, records the response as a fixture on disk and in
+          memory, then replays it on subsequent identical requests.
+        </p>
+
+        <h2>How It Works</h2>
+        <ol>
+          <li>Client sends a request to llmock</li>
+          <li>llmock attempts fixture matching as usual</li>
+          <li>
+            <strong>On miss:</strong> the request is forwarded to the configured upstream provider
+          </li>
+          <li>The upstream response is relayed back to the client immediately</li>
+          <li>
+            The response is collapsed (if streaming) and saved as a fixture to disk and memory
+          </li>
+          <li>Subsequent identical requests match the newly recorded fixture</li>
+        </ol>
+
+        <h2>Quick Start</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">CLI usage <span class="lang-tag">bash</span></div>
+          <pre><code>npx llmock --fixtures ./fixtures \
+  --record \
+  --provider-openai https://api.openai.com \
+  --provider-anthropic https://api.anthropic.com</code></pre>
+        </div>
+
+        <h2>CLI Flags</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Flag</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>--record</code></td>
+              <td>Enable record mode (proxy-on-miss)</td>
+            </tr>
+            <tr>
+              <td><code>--strict</code></td>
+              <td>Strict mode: return 503 (not 404) on unmatched requests</td>
+            </tr>
+            <tr>
+              <td><code>--provider-openai &lt;url&gt;</code></td>
+              <td>Upstream URL for OpenAI</td>
+            </tr>
+            <tr>
+              <td><code>--provider-anthropic &lt;url&gt;</code></td>
+              <td>Upstream URL for Anthropic</td>
+            </tr>
+            <tr>
+              <td><code>--provider-gemini &lt;url&gt;</code></td>
+              <td>Upstream URL for Gemini</td>
+            </tr>
+            <tr>
+              <td><code>--provider-vertexai &lt;url&gt;</code></td>
+              <td>Upstream URL for Vertex AI</td>
+            </tr>
+            <tr>
+              <td><code>--provider-bedrock &lt;url&gt;</code></td>
+              <td>Upstream URL for Bedrock</td>
+            </tr>
+            <tr>
+              <td><code>--provider-azure &lt;url&gt;</code></td>
+              <td>Upstream URL for Azure OpenAI</td>
+            </tr>
+            <tr>
+              <td><code>--provider-ollama &lt;url&gt;</code></td>
+              <td>Upstream URL for Ollama</td>
+            </tr>
+            <tr>
+              <td><code>--provider-cohere &lt;url&gt;</code></td>
+              <td>Upstream URL for Cohere</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Programmatic API</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            Programmatic recording <span class="lang-tag">ts</span>
+          </div>
+          <pre><code><span class="kw">import</span> { <span class="op">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
+
+<span class="kw">const</span> <span class="op">mock</span> = <span class="kw">new</span> <span class="fn">LLMock</span>();
+<span class="kw">await</span> <span class="op">mock</span>.<span class="fn">start</span>();
+
+<span class="cmt">// Enable recording with upstream providers</span>
+<span class="op">mock</span>.<span class="fn">enableRecording</span>({
+  <span class="prop">providers</span>: {
+    <span class="prop">openai</span>: <span class="str">"https://api.openai.com"</span>,
+    <span class="prop">anthropic</span>: <span class="str">"https://api.anthropic.com"</span>,
+  },
+  <span class="prop">fixturePath</span>: <span class="str">"./fixtures/recorded"</span>,
+});
+
+<span class="cmt">// Make requests — unmatched ones are proxied and recorded</span>
+<span class="cmt">// ...</span>
+
+<span class="cmt">// Disable recording — recorded fixtures persist on disk</span>
+<span class="op">mock</span>.<span class="fn">disableRecording</span>();</code></pre>
+        </div>
+
+        <h2>Stream Collapsing</h2>
+        <p>
+          When the upstream provider returns a streaming response, llmock collapses it into a
+          non-streaming fixture. Six streaming formats are supported:
+        </p>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Format</th>
+              <th>Provider</th>
+              <th>Content-Type</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>OpenAI SSE</td>
+              <td>OpenAI, Azure</td>
+              <td><code>text/event-stream</code></td>
+            </tr>
+            <tr>
+              <td>Anthropic SSE</td>
+              <td>Anthropic</td>
+              <td><code>text/event-stream</code></td>
+            </tr>
+            <tr>
+              <td>Gemini SSE</td>
+              <td>Gemini, Vertex AI</td>
+              <td><code>text/event-stream</code></td>
+            </tr>
+            <tr>
+              <td>Cohere SSE</td>
+              <td>Cohere</td>
+              <td><code>text/event-stream</code></td>
+            </tr>
+            <tr>
+              <td>Ollama NDJSON</td>
+              <td>Ollama</td>
+              <td><code>application/x-ndjson</code></td>
+            </tr>
+            <tr>
+              <td>Bedrock EventStream</td>
+              <td>AWS Bedrock</td>
+              <td><code>application/vnd.amazon.eventstream</code></td>
+            </tr>
+          </tbody>
+        </table>
+        <p>
+          The collapse extracts text content and tool calls from streaming chunks and produces a
+          simple <code>{ content }</code> or <code>{ toolCalls }</code> fixture response.
+        </p>
+
+        <h2>Auth Header Forwarding</h2>
+        <p>
+          When proxying to upstream providers, llmock forwards these headers from the original
+          request:
+        </p>
+        <ul>
+          <li><code>authorization</code></li>
+          <li><code>x-api-key</code></li>
+          <li><code>content-type</code></li>
+          <li><code>accept</code></li>
+        </ul>
+        <p>
+          <strong>Auth headers are never saved in recorded fixtures.</strong> The fixture only
+          contains the match criteria (derived from the last user message) and the response content.
+        </p>
+
+        <h2>Strict Mode</h2>
+        <p>
+          When <code>--strict</code> is enabled, unmatched requests that cannot be proxied (no
+          upstream configured for that provider) return <strong>503 Service Unavailable</strong>
+          instead of the default 404. This is useful for CI environments where you want to catch
+          unexpected API calls.
+        </p>
+
+        <h2>Request Transform</h2>
+        <p>
+          When upstream services inject dynamic data into prompts — timestamps, UUIDs, session IDs,
+          or per-request metadata — the same logical request produces different raw text every time.
+          Recorded fixtures won't replay because the text never matches exactly.
+          <code>requestTransform</code> normalizes requests <em>before</em> fixture matching and
+          recording, stripping the volatile parts so that logically identical requests always hit
+          the same fixture.
+        </p>
+        <p>
+          <strong>Matching behavior change:</strong> when a <code>requestTransform</code> is set,
+          string comparisons for <code>userMessage</code> and <code>inputText</code> switch from
+          substring (<code>includes()</code>) to exact equality (<code>===</code>). This ensures
+          deterministic replay of recorded fixtures — no accidental partial matches against
+          normalized text. RegExp and predicate matching are unaffected; predicates always receive
+          the original (untransformed) request.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            Stripping dynamic fields <span class="lang-tag">ts</span>
+          </div>
+          <pre><code><span class="kw">import</span> { <span class="op">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
+<span class="kw">import</span> <span class="kw">type</span> { <span class="op">ChatCompletionRequest</span> } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
+
+<span class="cmt">// Strip timestamps and request IDs injected by the orchestrator</span>
+<span class="kw">function</span> <span class="fn">normalize</span>(<span class="op">req</span>: <span class="op">ChatCompletionRequest</span>): <span class="op">ChatCompletionRequest</span> {
+  <span class="kw">return</span> {
+    ...<span class="op">req</span>,
+    <span class="prop">messages</span>: <span class="op">req</span>.<span class="prop">messages</span>.<span class="fn">map</span>((<span class="op">m</span>) =&gt; {
+      <span class="kw">if</span> (<span class="op">m</span>.<span class="prop">role</span> !== <span class="str">"system"</span>) <span class="kw">return</span> <span class="op">m</span>;
+      <span class="kw">if</span> (<span class="kw">typeof</span> <span class="op">m</span>.<span class="prop">content</span> !== <span class="str">"string"</span>) <span class="kw">return</span> <span class="op">m</span>;
+      <span class="kw">return</span> {
+        ...<span class="op">m</span>,
+        <span class="prop">content</span>: <span class="op">m</span>.<span class="prop">content</span>
+          .<span class="fn">replace</span>(<span class="str">/Current time: .*/g</span>, <span class="str">""</span>)
+          .<span class="fn">replace</span>(<span class="str">/Session: [a-f0-9-]{36}/g</span>, <span class="str">""</span>)
+          .<span class="fn">trim</span>(),
+      };
+    }),
+  };
+}
+
+<span class="kw">const</span> <span class="op">mock</span> = <span class="kw">new</span> <span class="fn">LLMock</span>({
+  <span class="prop">requestTransform</span>: <span class="op">normalize</span>,
+});
+<span class="kw">await</span> <span class="op">mock</span>.<span class="fn">start</span>();
+
+<span class="op">mock</span>.<span class="fn">enableRecording</span>({
+  <span class="prop">providers</span>: { <span class="prop">openai</span>: <span class="str">"https://api.openai.com"</span> },
+  <span class="prop">fixturePath</span>: <span class="str">"./fixtures/recorded"</span>,
+});</code></pre>
+        </div>
+
+        <p>
+          The transform is applied in two places: during fixture <strong>matching</strong> (so
+          replayed requests find the right fixture) and during <strong>recording</strong> (so the
+          saved fixture's match key is already normalized). This means a fixture recorded through a
+          transform will replay correctly on the next run without any manual editing.
+        </p>
+
+        <h2>Fixture Auto-Generation</h2>
+        <p>Recorded fixtures are saved to disk with timestamped filenames:</p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            Recorded fixture file <span class="lang-tag">json</span>
+          </div>
+          <pre><code><span class="cmt">// fixtures/recorded/openai-2025-01-15T10-30-00-000Z-0.json</span>
+{
+  "fixtures": [
+    {
+      "match": { "userMessage": "What is the weather?" },
+      "response": { "content": "I don't have real-time weather data..." }
+    }
+  ]
+}</code></pre>
+        </div>
+
+        <p>
+          Match criteria are derived from the original request: the last user message becomes
+          <code>userMessage</code>, or for embedding requests, the input becomes
+          <code>inputText</code>. If no match criteria can be derived (e.g., empty messages), the
+          fixture is saved to disk with a warning but not registered in memory.
+        </p>
+
+        <h2>Fixture Lifecycle</h2>
+        <ul>
+          <li>
+            <strong>On disk:</strong> Fixtures persist in the configured
+            <code>fixturePath</code> directory (default: <code>./fixtures/recorded</code>)
+          </li>
+          <li>
+            <strong>In memory:</strong> Recorded fixtures are immediately available for matching
+            subsequent requests in the same session
+          </li>
+          <li>
+            <strong>After restart:</strong> Load the recorded fixture directory to replay previous
+            recordings
+          </li>
+        </ul>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/responses-api.html b/docs/responses-api.html
new file mode 100644
index 0000000..4a4ee15
--- /dev/null
+++ b/docs/responses-api.html
@@ -0,0 +1,207 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Responses API — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html" class="active">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
+          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
+          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
+          ><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a>
+          <a href="record-replay.html">Record &amp; Replay</a>
+          <a href="metrics.html">Prometheus Metrics</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a
+          ><a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>OpenAI Responses API</h1>
+        <p class="lead">
+          The Responses API uses <code>event:</code> + <code>data:</code> SSE format over HTTP, and
+          is also available over WebSocket. llmock supports both transports with the same fixtures.
+        </p>
+
+        <h2>Endpoints</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Path</th>
+              <th>Format</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>POST</td>
+              <td>/v1/responses</td>
+              <td>HTTP SSE (event: + data:)</td>
+            </tr>
+            <tr>
+              <td>WS</td>
+              <td>/v1/responses</td>
+              <td>WebSocket JSON messages</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Unit Test: HTTP SSE Text Response</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            responses-text.test.ts <span class="lang-tag">ts</span>
+          </div>
+          <pre><code><span class="kw">import</span> { <span class="fn">createServer</span>, <span class="kw">type</span> <span class="type">ServerInstance</span> } <span class="kw">from</span> <span class="str">"@copilotkit/llmock/server"</span>;
+
+<span class="kw">const</span> <span class="op">instance</span> = <span class="kw">await</span> <span class="fn">createServer</span>([
+  { <span class="prop">match</span>: { <span class="prop">userMessage</span>: <span class="str">"hello"</span> }, <span class="prop">response</span>: { <span class="prop">content</span>: <span class="str">"Hi there!"</span> } }
+]);
+
+<span class="kw">const</span> <span class="op">res</span> = <span class="kw">await</span> <span class="fn">post</span>(<span class="str">`${instance.url}/v1/responses`</span>, {
+  <span class="prop">model</span>: <span class="str">"gpt-4o"</span>,
+  <span class="prop">input</span>: [{ <span class="prop">role</span>: <span class="str">"user"</span>, <span class="prop">content</span>: <span class="str">"hello"</span> }],
+});
+
+<span class="cm">// Parse event: + data: SSE format</span>
+<span class="kw">const</span> <span class="op">events</span> = <span class="op">res</span>.<span class="prop">body</span>.<span class="fn">split</span>(<span class="str">"\n\n"</span>)
+  .<span class="fn">filter</span>(<span class="op">b</span> <span class="kw">=&gt;</span> <span class="op">b</span>.<span class="fn">includes</span>(<span class="str">"event: "</span>) &amp;&amp; <span class="op">b</span>.<span class="fn">includes</span>(<span class="str">"data: "</span>))
+  .<span class="fn">map</span>(<span class="op">b</span> <span class="kw">=&gt;</span> ({
+    <span class="prop">type</span>: <span class="op">b</span>.<span class="fn">match</span>(<span class="str">/^event: (.+)$/m</span>)[<span class="num">1</span>],
+    <span class="prop">data</span>: <span class="type">JSON</span>.<span class="fn">parse</span>(<span class="op">b</span>.<span class="fn">match</span>(<span class="str">/^data: (.+)$/m</span>)[<span class="num">1</span>]),
+  }));
+
+<span class="kw">const</span> <span class="op">types</span> = <span class="op">events</span>.<span class="fn">map</span>(<span class="op">e</span> <span class="kw">=&gt;</span> <span class="op">e</span>.<span class="prop">type</span>);
+<span class="fn">expect</span>(<span class="op">types</span>).<span class="fn">toContain</span>(<span class="str">"response.created"</span>);
+<span class="fn">expect</span>(<span class="op">types</span>).<span class="fn">toContain</span>(<span class="str">"response.output_text.delta"</span>);
+<span class="fn">expect</span>(<span class="op">types</span>).<span class="fn">toContain</span>(<span class="str">"response.completed"</span>);</code></pre>
+        </div>
+
+        <h2>Unit Test: Tool Call Response</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            responses-tools.test.ts <span class="lang-tag">ts</span>
+          </div>
+          <pre><code><span class="kw">const</span> <span class="op">instance</span> = <span class="kw">await</span> <span class="fn">createServer</span>([
+  {
+    <span class="prop">match</span>: { <span class="prop">userMessage</span>: <span class="str">"weather"</span> },
+    <span class="prop">response</span>: {
+      <span class="prop">toolCalls</span>: [{ <span class="prop">name</span>: <span class="str">"get_weather"</span>, <span class="prop">arguments</span>: <span class="str">'{"city":"NYC"}'</span> }]
+    }
+  }
+]);
+
+<span class="kw">const</span> <span class="op">res</span> = <span class="kw">await</span> <span class="fn">post</span>(<span class="str">`${instance.url}/v1/responses`</span>, {
+  <span class="prop">model</span>: <span class="str">"gpt-4o"</span>,
+  <span class="prop">input</span>: [{ <span class="prop">role</span>: <span class="str">"user"</span>, <span class="prop">content</span>: <span class="str">"what is the weather?"</span> }],
+});
+
+<span class="kw">const</span> <span class="op">events</span> = <span class="fn">parseTypedSSE</span>(<span class="op">res</span>.<span class="prop">body</span>);
+<span class="kw">const</span> <span class="op">types</span> = <span class="op">events</span>.<span class="fn">map</span>(<span class="op">e</span> <span class="kw">=&gt;</span> <span class="op">e</span>.<span class="prop">type</span>);
+<span class="fn">expect</span>(<span class="op">types</span>).<span class="fn">toContain</span>(<span class="str">"response.function_call_arguments.delta"</span>);
+<span class="fn">expect</span>(<span class="op">types</span>).<span class="fn">toContain</span>(<span class="str">"response.output_item.done"</span>);</code></pre>
+        </div>
+
+        <h2>SSE Event Sequence</h2>
+        <p>The Responses API uses typed events. A text response produces this sequence:</p>
+        <ol>
+          <li><code>response.created</code></li>
+          <li><code>response.in_progress</code></li>
+          <li><code>response.output_item.added</code></li>
+          <li><code>response.content_part.added</code></li>
+          <li><code>response.output_text.delta</code> (one per chunk)</li>
+          <li><code>response.output_text.done</code></li>
+          <li><code>response.content_part.done</code></li>
+          <li><code>response.output_item.done</code></li>
+          <li><code>response.completed</code></li>
+        </ol>
+
+        <p>
+          Tool call responses follow the same pattern but use
+          <code>response.function_call_arguments.delta</code> and
+          <code>response.function_call_arguments.done</code> events.
+        </p>
+
+        <div class="info-box">
+          <p>
+            The same fixtures work for both HTTP SSE and WebSocket transports. See the
+            <a href="websocket.html">WebSocket APIs</a> page for WebSocket-specific details.
+          </p>
+        </div>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/sequential-responses.html b/docs/sequential-responses.html
new file mode 100644
index 0000000..dadbded
--- /dev/null
+++ b/docs/sequential-responses.html
@@ -0,0 +1,197 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Sequential Responses — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
+          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
+          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
+          ><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html" class="active">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a>
+          <a href="record-replay.html">Record &amp; Replay</a>
+          <a href="metrics.html">Prometheus Metrics</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a
+          ><a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>Sequential / Stateful Responses</h1>
+        <p class="lead">
+          Use <code>sequenceIndex</code> in fixture match criteria to return different responses for
+          the same query on each successive call. This enables testing multi-step agent
+          conversations and retry logic.
+        </p>
+
+        <h2>How It Works</h2>
+        <ul>
+          <li>The router tracks how many times each unique match pattern has been hit</li>
+          <li>
+            <code>sequenceIndex: 0</code> matches the first request,
+            <code>sequenceIndex: 1</code> the second, etc.
+          </li>
+          <li>Different match patterns have independent counters</li>
+          <li>
+            If a sequenceIndex fixture does not match the current count, routing falls through to
+            the next fixture
+          </li>
+          <li>
+            Fixtures without <code>sequenceIndex</code> match any occurrence (backward compatible)
+          </li>
+          <li>Counters reset on <code>mock.reset()</code></li>
+        </ul>
+
+        <h2>Unit Test: 2-Step Sequence</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">sequence.test.ts <span class="lang-tag">ts</span></div>
+          <pre><code><span class="kw">const</span> <span class="op">mock</span> = <span class="kw">new</span> <span class="type">LLMock</span>();
+<span class="kw">await</span> <span class="op">mock</span>.<span class="fn">start</span>();
+
+<span class="op">mock</span>.<span class="fn">on</span>({ <span class="prop">userMessage</span>: <span class="str">"plan"</span>, <span class="prop">sequenceIndex</span>: <span class="num">0</span> }, { <span class="prop">content</span>: <span class="str">"Step 1: planning..."</span> });
+<span class="op">mock</span>.<span class="fn">on</span>({ <span class="prop">userMessage</span>: <span class="str">"plan"</span>, <span class="prop">sequenceIndex</span>: <span class="num">1</span> }, { <span class="prop">content</span>: <span class="str">"Step 2: done!"</span> });
+
+<span class="cm">// First request → first response</span>
+<span class="kw">const</span> <span class="op">res1</span> = <span class="kw">await</span> <span class="fn">fetch</span>(<span class="str">`${mock.url}/v1/chat/completions`</span>, {
+  <span class="prop">method</span>: <span class="str">"POST"</span>,
+  <span class="prop">headers</span>: { <span class="str">"Content-Type"</span>: <span class="str">"application/json"</span> },
+  <span class="prop">body</span>: <span class="type">JSON</span>.<span class="fn">stringify</span>({
+    <span class="prop">model</span>: <span class="str">"gpt-4"</span>,
+    <span class="prop">messages</span>: [{ <span class="prop">role</span>: <span class="str">"user"</span>, <span class="prop">content</span>: <span class="str">"plan"</span> }],
+    <span class="prop">stream</span>: <span class="kw">false</span>,
+  }),
+});
+<span class="kw">const</span> <span class="op">body1</span> = <span class="kw">await</span> <span class="op">res1</span>.<span class="fn">json</span>();
+<span class="fn">expect</span>(<span class="op">body1</span>.<span class="prop">choices</span>[<span class="num">0</span>].<span class="prop">message</span>.<span class="prop">content</span>).<span class="fn">toBe</span>(<span class="str">"Step 1: planning..."</span>);
+
+<span class="cm">// Second request → second response</span>
+<span class="kw">const</span> <span class="op">res2</span> = <span class="kw">await</span> <span class="fn">fetch</span>(<span class="str">`${mock.url}/v1/chat/completions`</span>, {
+  <span class="prop">method</span>: <span class="str">"POST"</span>,
+  <span class="prop">headers</span>: { <span class="str">"Content-Type"</span>: <span class="str">"application/json"</span> },
+  <span class="prop">body</span>: <span class="type">JSON</span>.<span class="fn">stringify</span>({
+    <span class="prop">model</span>: <span class="str">"gpt-4"</span>,
+    <span class="prop">messages</span>: [{ <span class="prop">role</span>: <span class="str">"user"</span>, <span class="prop">content</span>: <span class="str">"plan"</span> }],
+    <span class="prop">stream</span>: <span class="kw">false</span>,
+  }),
+});
+<span class="kw">const</span> <span class="op">body2</span> = <span class="kw">await</span> <span class="op">res2</span>.<span class="fn">json</span>();
+<span class="fn">expect</span>(<span class="op">body2</span>.<span class="prop">choices</span>[<span class="num">0</span>].<span class="prop">message</span>.<span class="prop">content</span>).<span class="fn">toBe</span>(<span class="str">"Step 2: done!"</span>);</code></pre>
+        </div>
+
+        <h2>Fallback After Sequence Exhaustion</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            sequence-fallback.test.ts <span class="lang-tag">ts</span>
+          </div>
+          <pre><code><span class="cm">// First call matches sequenceIndex 0, subsequent calls fall through to fallback</span>
+<span class="op">mock</span>.<span class="fn">on</span>({ <span class="prop">userMessage</span>: <span class="str">"once"</span>, <span class="prop">sequenceIndex</span>: <span class="num">0</span> }, { <span class="prop">content</span>: <span class="str">"only-first-time"</span> });
+<span class="op">mock</span>.<span class="fn">on</span>({ <span class="prop">userMessage</span>: <span class="str">"once"</span> }, { <span class="prop">content</span>: <span class="str">"fallback"</span> });
+
+<span class="cm">// Request 1 → "only-first-time" (sequenceIndex 0 matches)</span>
+<span class="cm">// Request 2 → "fallback" (sequenceIndex 0 won't match, falls through)</span></code></pre>
+        </div>
+
+        <h2>JSON Fixture</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            fixtures/sequence.json <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  <span class="key">"fixtures"</span>: [
+    {
+      <span class="key">"match"</span>: { <span class="key">"userMessage"</span>: <span class="str">"plan"</span>, <span class="key">"sequenceIndex"</span>: <span class="num">0</span> },
+      <span class="key">"response"</span>: { <span class="key">"content"</span>: <span class="str">"Step 1: planning..."</span> }
+    },
+    {
+      <span class="key">"match"</span>: { <span class="key">"userMessage"</span>: <span class="str">"plan"</span>, <span class="key">"sequenceIndex"</span>: <span class="num">1</span> },
+      <span class="key">"response"</span>: { <span class="key">"content"</span>: <span class="str">"Step 2: done!"</span> }
+    }
+  ]
+}</code></pre>
+        </div>
+
+        <div class="info-box">
+          <p>
+            Sequence counters are per-fixture-match, not global. If you have fixtures matching
+            "alpha" and "beta", their counters are tracked independently.
+          </p>
+        </div>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/streaming-physics.html b/docs/streaming-physics.html
new file mode 100644
index 0000000..a092a18
--- /dev/null
+++ b/docs/streaming-physics.html
@@ -0,0 +1,246 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Streaming Physics — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
+          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
+          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
+          ><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html" class="active">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a>
+          <a href="record-replay.html">Record &amp; Replay</a>
+          <a href="metrics.html">Prometheus Metrics</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a
+          ><a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>Streaming Physics</h1>
+        <p class="lead">
+          Simulate realistic LLM streaming timing with configurable time-to-first-token (TTFT),
+          tokens-per-second (TPS), and random jitter. Perfect for testing loading states, progress
+          indicators, and streaming UX under realistic conditions.
+        </p>
+
+        <h2>StreamingProfile</h2>
+        <p>
+          The <code>streamingProfile</code> option can be set on any fixture to control the timing
+          of streamed chunks.
+        </p>
+
+        <table class="param-table" style="width: 100%; border-collapse: collapse; margin: 1.5rem 0">
+          <thead>
+            <tr>
+              <th style="text-align: left; padding: 0.5rem; border-bottom: 1px solid var(--border)">
+                Property
+              </th>
+              <th style="text-align: left; padding: 0.5rem; border-bottom: 1px solid var(--border)">
+                Type
+              </th>
+              <th style="text-align: left; padding: 0.5rem; border-bottom: 1px solid var(--border)">
+                Description
+              </th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td style="padding: 0.5rem; border-bottom: 1px solid var(--border)">
+                <code>ttft</code>
+              </td>
+              <td style="padding: 0.5rem; border-bottom: 1px solid var(--border)">
+                <code>number</code>
+              </td>
+              <td style="padding: 0.5rem; border-bottom: 1px solid var(--border)">
+                Time to first token in milliseconds. Delay before the first chunk is sent.
+              </td>
+            </tr>
+            <tr>
+              <td style="padding: 0.5rem; border-bottom: 1px solid var(--border)">
+                <code>tps</code>
+              </td>
+              <td style="padding: 0.5rem; border-bottom: 1px solid var(--border)">
+                <code>number</code>
+              </td>
+              <td style="padding: 0.5rem; border-bottom: 1px solid var(--border)">
+                Tokens per second. Each chunk after the first is delayed by
+                <code>1000 / tps</code> ms.
+              </td>
+            </tr>
+            <tr>
+              <td style="padding: 0.5rem; border-bottom: 1px solid var(--border)">
+                <code>jitter</code>
+              </td>
+              <td style="padding: 0.5rem; border-bottom: 1px solid var(--border)">
+                <code>number</code>
+              </td>
+              <td style="padding: 0.5rem; border-bottom: 1px solid var(--border)">
+                Random variance factor (0&ndash;1). Each delay is multiplied by
+                <code>1 + random(-1,1) * jitter</code>. Default 0 (no variance).
+              </td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Programmatic Usage</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            streaming-physics.test.ts <span class="lang-tag">ts</span>
+          </div>
+          <pre><code><span class="kw">const</span> <span class="op">mock</span> = <span class="kw">new</span> <span class="type">LLMock</span>();
+<span class="kw">await</span> <span class="op">mock</span>.<span class="fn">start</span>();
+
+<span class="cm">// Simulate GPT-4 streaming timing</span>
+<span class="op">mock</span>.<span class="fn">on</span>(
+  { <span class="prop">userMessage</span>: <span class="str">"hello"</span> },
+  { <span class="prop">content</span>: <span class="str">"Hello! How can I help you today?"</span> },
+  {
+    <span class="prop">streamingProfile</span>: {
+      <span class="prop">ttft</span>: <span class="num">800</span>,    <span class="cm">// 800ms before first token</span>
+      <span class="prop">tps</span>: <span class="num">50</span>,      <span class="cm">// 50 tokens/sec after that</span>
+      <span class="prop">jitter</span>: <span class="num">0.2</span>,  <span class="cm">// +/-20% variance on each delay</span>
+    },
+  },
+);</code></pre>
+        </div>
+
+        <h2>JSON Fixture File</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            fixtures/slow-model.json <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  <span class="key">"fixtures"</span>: [
+    {
+      <span class="key">"match"</span>: { <span class="key">"userMessage"</span>: <span class="str">"think carefully"</span> },
+      <span class="key">"response"</span>: { <span class="key">"content"</span>: <span class="str">"Let me think about this..."</span> },
+      <span class="key">"streamingProfile"</span>: {
+        <span class="key">"ttft"</span>: <span class="num">2000</span>,
+        <span class="key">"tps"</span>: <span class="num">30</span>,
+        <span class="key">"jitter"</span>: <span class="num">0.1</span>
+      }
+    }
+  ]
+}</code></pre>
+        </div>
+
+        <h2>Interaction with <code>latency</code></h2>
+        <ul>
+          <li>
+            When <code>streamingProfile</code> is set, it takes priority over the
+            <code>latency</code> field.
+          </li>
+          <li>
+            If <code>streamingProfile</code> is not set, the existing <code>latency</code> behavior
+            applies (flat delay per chunk).
+          </li>
+          <li>
+            If <code>streamingProfile</code> is set but has neither <code>ttft</code> nor
+            <code>tps</code>, it falls back to <code>latency</code>.
+          </li>
+        </ul>
+
+        <h2>Realistic Profiles</h2>
+        <p>Here are some example profiles that approximate real-world LLM behavior:</p>
+
+        <div class="code-block">
+          <div class="code-block-header">profiles.ts <span class="lang-tag">ts</span></div>
+          <pre><code><span class="cm">// Fast model (GPT-4o-mini, Claude 3 Haiku)</span>
+{ <span class="prop">ttft</span>: <span class="num">200</span>, <span class="prop">tps</span>: <span class="num">100</span>, <span class="prop">jitter</span>: <span class="num">0.15</span> }
+
+<span class="cm">// Standard model (GPT-4o, Claude 3.5 Sonnet)</span>
+{ <span class="prop">ttft</span>: <span class="num">500</span>, <span class="prop">tps</span>: <span class="num">60</span>, <span class="prop">jitter</span>: <span class="num">0.2</span> }
+
+<span class="cm">// Reasoning model (o1, o3, Claude with extended thinking)</span>
+{ <span class="prop">ttft</span>: <span class="num">5000</span>, <span class="prop">tps</span>: <span class="num">80</span>, <span class="prop">jitter</span>: <span class="num">0.1</span> }
+
+<span class="cm">// Slow/overloaded (rate-limited or cold start)</span>
+{ <span class="prop">ttft</span>: <span class="num">3000</span>, <span class="prop">tps</span>: <span class="num">15</span>, <span class="prop">jitter</span>: <span class="num">0.4</span> }</code></pre>
+        </div>
+
+        <div class="info-box">
+          <p>
+            Streaming physics applies to all provider APIs &mdash; OpenAI Chat Completions,
+            Responses API, Claude Messages, and Gemini. The same
+            <code>streamingProfile</code> field works across all of them.
+          </p>
+        </div>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/structured-output.html b/docs/structured-output.html
new file mode 100644
index 0000000..55894cb
--- /dev/null
+++ b/docs/structured-output.html
@@ -0,0 +1,206 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Structured Output — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
+          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
+          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
+          ><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html" class="active">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a>
+          <a href="record-replay.html">Record &amp; Replay</a>
+          <a href="metrics.html">Prometheus Metrics</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a
+          ><a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>Structured Output / JSON Mode</h1>
+        <p class="lead">
+          llmock supports matching on <code>response_format</code> so you can return different
+          responses for JSON mode requests versus regular text requests. Use
+          <code>match.responseFormat</code> in fixtures or the
+          <code>onJsonOutput()</code> convenience method.
+        </p>
+
+        <h2>How It Works</h2>
+        <ul>
+          <li>
+            When a request includes <code>response_format: { type: "json_object" }</code>, the
+            router checks <code>match.responseFormat</code>
+          </li>
+          <li>
+            Fixtures with <code>responseFormat: "json_object"</code> only match JSON mode requests
+          </li>
+          <li>
+            Regular fixtures (without responseFormat) still match JSON mode requests if no specific
+            JSON fixture matches first
+          </li>
+        </ul>
+
+        <h2>Unit Test: Programmatic API</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">json-output.test.ts <span class="lang-tag">ts</span></div>
+          <pre><code><span class="kw">const</span> <span class="op">mock</span> = <span class="kw">new</span> <span class="type">LLMock</span>();
+<span class="kw">await</span> <span class="op">mock</span>.<span class="fn">start</span>();
+
+<span class="cm">// Register JSON output fixture — accepts object or string</span>
+<span class="op">mock</span>.<span class="fn">onJsonOutput</span>(<span class="str">"json-output"</span>, { <span class="prop">answer</span>: <span class="num">42</span>, <span class="prop">items</span>: [<span class="str">"a"</span>, <span class="str">"b"</span>] });
+
+<span class="kw">const</span> <span class="op">res</span> = <span class="kw">await</span> <span class="fn">fetch</span>(<span class="str">`${mock.url}/v1/chat/completions`</span>, {
+  <span class="prop">method</span>: <span class="str">"POST"</span>,
+  <span class="prop">headers</span>: { <span class="str">"Content-Type"</span>: <span class="str">"application/json"</span> },
+  <span class="prop">body</span>: <span class="type">JSON</span>.<span class="fn">stringify</span>({
+    <span class="prop">model</span>: <span class="str">"gpt-4"</span>,
+    <span class="prop">messages</span>: [{ <span class="prop">role</span>: <span class="str">"user"</span>, <span class="prop">content</span>: <span class="str">"json-output"</span> }],
+    <span class="prop">stream</span>: <span class="kw">false</span>,
+    <span class="prop">response_format</span>: { <span class="prop">type</span>: <span class="str">"json_object"</span> },
+  }),
+});
+
+<span class="kw">const</span> <span class="op">body</span> = <span class="kw">await</span> <span class="op">res</span>.<span class="fn">json</span>();
+<span class="kw">const</span> <span class="op">content</span> = <span class="type">JSON</span>.<span class="fn">parse</span>(<span class="op">body</span>.<span class="prop">choices</span>[<span class="num">0</span>].<span class="prop">message</span>.<span class="prop">content</span>);
+<span class="fn">expect</span>(<span class="op">content</span>.<span class="prop">answer</span>).<span class="fn">toBe</span>(<span class="num">42</span>);
+<span class="fn">expect</span>(<span class="op">content</span>.<span class="prop">items</span>).<span class="fn">toEqual</span>([<span class="str">"a"</span>, <span class="str">"b"</span>]);</code></pre>
+        </div>
+
+        <h2>JSON Fixture</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            fixtures/json-mode.json <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  <span class="key">"fixtures"</span>: [
+    {
+      <span class="key">"match"</span>: {
+        <span class="key">"userMessage"</span>: <span class="str">"json-output"</span>,
+        <span class="key">"responseFormat"</span>: <span class="str">"json_object"</span>
+      },
+      <span class="key">"response"</span>: {
+        <span class="key">"content"</span>: <span class="str">"{\"answer\":42,\"items\":[\"a\",\"b\"]}"</span>
+      }
+    }
+  ]
+}</code></pre>
+        </div>
+
+        <h2>Match Behavior</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Request response_format</th>
+              <th>Fixture responseFormat</th>
+              <th>Match?</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>{ type: "json_object" }</td>
+              <td>"json_object"</td>
+              <td>Yes</td>
+            </tr>
+            <tr>
+              <td>{ type: "json_object" }</td>
+              <td>(not set)</td>
+              <td>Yes (fallthrough)</td>
+            </tr>
+            <tr>
+              <td>(not set)</td>
+              <td>"json_object"</td>
+              <td>No</td>
+            </tr>
+            <tr>
+              <td>(not set)</td>
+              <td>(not set)</td>
+              <td>Yes</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <div class="info-box">
+          <p>
+            The <code>onJsonOutput()</code> method accepts either a plain object (auto-serialized)
+            or a string. This makes it easy to return structured data without manual
+            <code>JSON.stringify</code>.
+          </p>
+        </div>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/style.css b/docs/style.css
new file mode 100644
index 0000000..4bf2c6c
--- /dev/null
+++ b/docs/style.css
@@ -0,0 +1,583 @@
+/* ─── Reset & Variables ───────────────────────────────────────── */
+*,
+*::before,
+*::after {
+  box-sizing: border-box;
+  margin: 0;
+  padding: 0;
+}
+
+:root {
+  --bg-deep: #0a0a0f;
+  --bg-surface: #111118;
+  --bg-card: #16161f;
+  --bg-card-hover: #1c1c28;
+  --border: #2a2a3a;
+  --border-bright: #3a3a50;
+  --text-primary: #e8e8f0;
+  --text-secondary: #8888a0;
+  --text-dim: #555570;
+  --accent: #00ff88;
+  --accent-dim: #00cc6a;
+  --accent-glow: rgba(0, 255, 136, 0.15);
+  --accent-glow-strong: rgba(0, 255, 136, 0.3);
+  --warning: #ffaa00;
+  --error: #ff4466;
+  --blue: #4488ff;
+  --purple: #aa66ff;
+  --font-mono: "JetBrains Mono", "SF Mono", "Fira Code", monospace;
+  --font-sans: "Instrument Sans", -apple-system, system-ui, sans-serif;
+  --ease-out-expo: cubic-bezier(0.16, 1, 0.3, 1);
+  --sidebar-width: 260px;
+}
+
+html {
+  font-size: 16px;
+  scroll-behavior: smooth;
+  -webkit-font-smoothing: antialiased;
+  -moz-osx-font-smoothing: grayscale;
+}
+
+body {
+  font-family: var(--font-sans);
+  background: var(--bg-deep);
+  color: var(--text-primary);
+  line-height: 1.6;
+  overflow-x: hidden;
+}
+
+a {
+  color: var(--accent);
+  text-decoration: none;
+}
+a:hover {
+  text-decoration: underline;
+}
+
+/* ─── Noise Overlay ──────────────────────────────────────────── */
+body::before {
+  content: "";
+  position: fixed;
+  top: 0;
+  left: 0;
+  right: 0;
+  bottom: 0;
+  background-image: url("data:image/svg+xml,%3Csvg viewBox='0 0 256 256' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='noise'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.9' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23noise)' opacity='0.03'/%3E%3C/svg%3E");
+  pointer-events: none;
+  z-index: 9999;
+}
+
+/* ─── Top Nav ─────────────────────────────────────────────────── */
+.top-nav {
+  position: fixed;
+  top: 0;
+  left: 0;
+  right: 0;
+  z-index: 100;
+  padding: 1rem 0;
+  background: rgba(10, 10, 15, 0.85);
+  backdrop-filter: blur(20px) saturate(1.4);
+  -webkit-backdrop-filter: blur(20px) saturate(1.4);
+  border-bottom: 1px solid var(--border);
+}
+.top-nav .nav-inner {
+  max-width: 1400px;
+  margin: 0 auto;
+  padding: 0 2rem;
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+}
+.nav-brand {
+  display: flex;
+  align-items: center;
+  gap: 0.75rem;
+  font-family: var(--font-mono);
+  font-weight: 600;
+  font-size: 1rem;
+  color: var(--text-primary);
+}
+.nav-brand .prompt {
+  color: var(--accent);
+}
+.nav-links {
+  display: flex;
+  align-items: center;
+  gap: 2rem;
+  list-style: none;
+}
+.nav-links a {
+  color: var(--text-secondary);
+  font-size: 0.875rem;
+  font-weight: 500;
+  transition: color 0.2s;
+}
+.nav-links a:hover {
+  color: var(--text-primary);
+  text-decoration: none;
+}
+.nav-links .gh-link {
+  display: flex;
+  align-items: center;
+  gap: 0.5rem;
+  padding: 0.4rem 1rem;
+  border: 1px solid var(--border);
+  border-radius: 6px;
+  transition:
+    border-color 0.2s,
+    background 0.2s;
+}
+.nav-links .gh-link:hover {
+  border-color: var(--border-bright);
+  background: var(--bg-card);
+}
+
+/* ─── Docs Layout ─────────────────────────────────────────────── */
+.docs-layout {
+  display: flex;
+  margin-top: 57px; /* nav height */
+  min-height: calc(100vh - 57px);
+}
+
+/* ─── Sidebar ─────────────────────────────────────────────────── */
+.sidebar {
+  position: fixed;
+  top: 57px;
+  left: 0;
+  width: var(--sidebar-width);
+  height: calc(100vh - 57px);
+  overflow-y: auto;
+  background: var(--bg-surface);
+  border-right: 1px solid var(--border);
+  padding: 1.5rem 0;
+  z-index: 50;
+}
+.sidebar-section {
+  padding: 0 1.25rem;
+  margin-bottom: 1.5rem;
+}
+.sidebar-section h3 {
+  font-family: var(--font-mono);
+  font-size: 0.7rem;
+  font-weight: 600;
+  text-transform: uppercase;
+  letter-spacing: 0.12em;
+  color: var(--text-dim);
+  margin-bottom: 0.5rem;
+  padding: 0 0.5rem;
+}
+.sidebar-section a {
+  display: block;
+  padding: 0.35rem 0.75rem;
+  border-radius: 6px;
+  font-size: 0.85rem;
+  color: var(--text-secondary);
+  transition:
+    color 0.15s,
+    background 0.15s;
+}
+.sidebar-section a:hover {
+  color: var(--text-primary);
+  background: var(--bg-card);
+  text-decoration: none;
+}
+.sidebar-section a.active {
+  color: var(--accent);
+  background: var(--accent-glow);
+}
+
+/* ─── Main Content ────────────────────────────────────────────── */
+.docs-content {
+  margin-left: var(--sidebar-width);
+  flex: 1;
+  padding: 3rem 4rem;
+  max-width: 960px;
+  margin-right: auto;
+  margin-left: calc(var(--sidebar-width) + max(0px, (100vw - var(--sidebar-width) - 960px) / 2));
+}
+
+.docs-content h1 {
+  font-size: 2.25rem;
+  font-weight: 700;
+  letter-spacing: -0.02em;
+  line-height: 1.15;
+  margin-bottom: 1rem;
+}
+
+.docs-content h2 {
+  font-size: 1.5rem;
+  font-weight: 600;
+  margin-top: 3rem;
+  margin-bottom: 1rem;
+  padding-bottom: 0.5rem;
+  border-bottom: 1px solid var(--border);
+}
+
+.docs-content h3 {
+  font-size: 1.15rem;
+  font-weight: 600;
+  margin-top: 2rem;
+  margin-bottom: 0.75rem;
+}
+
+.docs-content p {
+  color: var(--text-secondary);
+  line-height: 1.7;
+  margin-bottom: 1rem;
+}
+
+.docs-content ul,
+.docs-content ol {
+  color: var(--text-secondary);
+  margin-bottom: 1rem;
+  padding-left: 1.5rem;
+}
+
+.docs-content li {
+  margin-bottom: 0.35rem;
+  line-height: 1.6;
+}
+
+.docs-content code {
+  font-family: var(--font-mono);
+  font-size: 0.85em;
+  background: var(--bg-card);
+  padding: 0.15rem 0.4rem;
+  border-radius: 4px;
+  border: 1px solid var(--border);
+}
+
+.docs-content .lead {
+  font-size: 1.1rem;
+  color: var(--text-secondary);
+  line-height: 1.7;
+  margin-bottom: 2rem;
+}
+
+/* ─── Code Blocks ─────────────────────────────────────────────── */
+.code-block {
+  background: var(--bg-surface);
+  border: 1px solid var(--border);
+  border-radius: 12px;
+  overflow: hidden;
+  margin-bottom: 1.5rem;
+}
+.code-block-header {
+  display: flex;
+  align-items: center;
+  padding: 0.65rem 1rem;
+  background: var(--bg-card);
+  border-bottom: 1px solid var(--border);
+  font-family: var(--font-mono);
+  font-size: 0.7rem;
+  color: var(--text-dim);
+}
+.code-block-header .lang-tag {
+  margin-left: auto;
+  padding: 0.15rem 0.5rem;
+  border: 1px solid var(--border);
+  border-radius: 4px;
+  font-size: 0.65rem;
+  text-transform: uppercase;
+}
+.code-block pre {
+  padding: 1.25rem;
+  overflow-x: auto;
+  font-family: var(--font-mono);
+  font-size: 0.8rem;
+  line-height: 1.75;
+}
+.code-block pre code {
+  color: var(--text-secondary);
+  background: none;
+  padding: 0;
+  border: none;
+  border-radius: 0;
+  font-size: inherit;
+}
+.code-block .kw {
+  color: var(--purple);
+}
+.code-block .fn {
+  color: var(--blue);
+}
+.code-block .str {
+  color: var(--accent);
+}
+.code-block .num {
+  color: var(--warning);
+}
+.code-block .cm {
+  color: var(--text-dim);
+  font-style: italic;
+}
+.code-block .op {
+  color: var(--text-primary);
+}
+.code-block .prop {
+  color: var(--error);
+}
+.code-block .type {
+  color: var(--warning);
+}
+.code-block .key {
+  color: var(--blue);
+}
+
+/* ─── Feature Cards Grid (docs hub) ──────────────────────────── */
+.feature-grid {
+  display: grid;
+  grid-template-columns: repeat(2, 1fr);
+  gap: 1.25rem;
+  margin-top: 2rem;
+  margin-bottom: 2rem;
+}
+.feature-link {
+  display: block;
+  padding: 1.5rem;
+  background: var(--bg-card);
+  border: 1px solid var(--border);
+  border-radius: 12px;
+  transition:
+    border-color 0.3s,
+    transform 0.3s,
+    box-shadow 0.3s;
+}
+.feature-link:hover {
+  border-color: var(--border-bright);
+  transform: translateY(-2px);
+  box-shadow: 0 8px 30px rgba(0, 0, 0, 0.3);
+  text-decoration: none;
+}
+.feature-link h3 {
+  font-size: 1rem;
+  font-weight: 600;
+  color: var(--text-primary);
+  margin-top: 0;
+  margin-bottom: 0.4rem;
+}
+.feature-link p {
+  font-size: 0.85rem;
+  color: var(--text-secondary);
+  margin: 0;
+  line-height: 1.5;
+}
+.feature-link .badge {
+  display: inline-block;
+  padding: 0.15rem 0.5rem;
+  font-family: var(--font-mono);
+  font-size: 0.65rem;
+  text-transform: uppercase;
+  border-radius: 4px;
+  margin-bottom: 0.5rem;
+}
+.badge-green {
+  background: rgba(0, 255, 136, 0.1);
+  color: var(--accent);
+  border: 1px solid rgba(0, 255, 136, 0.2);
+}
+.badge-blue {
+  background: rgba(68, 136, 255, 0.1);
+  color: var(--blue);
+  border: 1px solid rgba(68, 136, 255, 0.2);
+}
+.badge-purple {
+  background: rgba(170, 102, 255, 0.1);
+  color: var(--purple);
+  border: 1px solid rgba(170, 102, 255, 0.2);
+}
+.badge-amber {
+  background: rgba(255, 170, 0, 0.1);
+  color: var(--warning);
+  border: 1px solid rgba(255, 170, 0, 0.2);
+}
+.badge-red {
+  background: rgba(255, 68, 102, 0.1);
+  color: var(--error);
+  border: 1px solid rgba(255, 68, 102, 0.2);
+}
+
+/* ─── Endpoint Table ──────────────────────────────────────────── */
+.endpoint-table {
+  width: 100%;
+  margin: 1.5rem 0;
+  border-collapse: collapse;
+  font-size: 0.875rem;
+}
+.endpoint-table th {
+  text-align: left;
+  padding: 0.75rem 1rem;
+  font-family: var(--font-mono);
+  font-weight: 600;
+  font-size: 0.75rem;
+  text-transform: uppercase;
+  letter-spacing: 0.08em;
+  border-bottom: 2px solid var(--border-bright);
+  color: var(--text-secondary);
+}
+.endpoint-table td {
+  padding: 0.65rem 1rem;
+  border-bottom: 1px solid var(--border);
+  color: var(--text-secondary);
+}
+.endpoint-table td:first-child {
+  font-family: var(--font-mono);
+  font-size: 0.8rem;
+  color: var(--accent);
+}
+.endpoint-table tr:last-child td {
+  border-bottom: none;
+}
+
+/* ─── Comparison Table ───────────────────────────────────────── */
+.comparison-table {
+  width: 100%;
+  margin-top: 1.5rem;
+  border-collapse: collapse;
+  font-size: 0.875rem;
+}
+.comparison-table th {
+  text-align: left;
+  padding: 0.75rem 1rem;
+  font-family: var(--font-mono);
+  font-weight: 600;
+  font-size: 0.75rem;
+  text-transform: uppercase;
+  letter-spacing: 0.08em;
+  border-bottom: 2px solid var(--border-bright);
+  color: var(--text-secondary);
+}
+.comparison-table th:nth-child(2) {
+  color: var(--accent);
+}
+.comparison-table td {
+  padding: 0.65rem 1rem;
+  border-bottom: 1px solid var(--border);
+  color: var(--text-secondary);
+}
+.comparison-table td:first-child {
+  font-weight: 500;
+  color: var(--text-primary);
+}
+.comparison-table tr:last-child td {
+  border-bottom: none;
+}
+.comparison-table .yes {
+  color: var(--accent);
+  font-weight: 600;
+}
+.comparison-table .no {
+  color: var(--text-dim);
+}
+.comparison-table .partial {
+  color: var(--warning);
+}
+
+/* ─── Info Box ────────────────────────────────────────────────── */
+.info-box {
+  background: var(--bg-card);
+  border: 1px solid var(--border);
+  border-left: 3px solid var(--accent);
+  border-radius: 8px;
+  padding: 1rem 1.25rem;
+  margin: 1.5rem 0;
+  font-size: 0.9rem;
+}
+.info-box p {
+  margin: 0;
+}
+
+/* ─── Footer ─────────────────────────────────────────────────── */
+.docs-footer {
+  margin-left: calc(var(--sidebar-width) + max(0px, (100vw - var(--sidebar-width) - 960px) / 2));
+  margin-right: auto;
+  padding: 3rem 4rem;
+  max-width: 960px;
+  border-top: 1px solid var(--border);
+}
+.docs-footer .footer-inner {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+}
+.footer-left {
+  font-family: var(--font-mono);
+  font-size: 0.8rem;
+  color: var(--text-dim);
+}
+.footer-left span {
+  color: var(--accent);
+}
+.footer-links {
+  display: flex;
+  gap: 2rem;
+  list-style: none;
+}
+.footer-links a {
+  color: var(--text-dim);
+  font-size: 0.85rem;
+  transition: color 0.2s;
+}
+.footer-links a:hover {
+  color: var(--text-primary);
+  text-decoration: none;
+}
+
+/* ─── Mobile Sidebar Toggle ───────────────────────────────────── */
+.sidebar-toggle {
+  display: none;
+  background: none;
+  border: none;
+  color: var(--text-secondary);
+  font-size: 1.25rem;
+  cursor: pointer;
+  padding: 0.25rem;
+}
+
+/* ─── Responsive ─────────────────────────────────────────────── */
+@media (max-width: 1024px) {
+  .docs-content {
+    padding: 2rem;
+    margin-left: var(--sidebar-width);
+    margin-right: 0;
+  }
+  .docs-footer {
+    padding: 2rem;
+  }
+}
+
+@media (max-width: 768px) {
+  .sidebar-toggle {
+    display: block;
+  }
+  .sidebar {
+    transform: translateX(-100%);
+    transition: transform 0.3s var(--ease-out-expo);
+  }
+  .sidebar.open {
+    transform: translateX(0);
+  }
+  .docs-content,
+  .docs-footer {
+    margin-left: 0;
+  }
+  .docs-content {
+    padding: 1.5rem;
+  }
+  .docs-footer {
+    padding: 1.5rem;
+  }
+  .feature-grid {
+    grid-template-columns: 1fr;
+  }
+  .nav-links a:not(.gh-link) {
+    display: none;
+  }
+  .comparison-table {
+    font-size: 0.78rem;
+  }
+  .comparison-table th,
+  .comparison-table td {
+    padding: 0.5rem;
+  }
+}
diff --git a/docs/vertex-ai.html b/docs/vertex-ai.html
new file mode 100644
index 0000000..f439b5c
--- /dev/null
+++ b/docs/vertex-ai.html
@@ -0,0 +1,248 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Vertex AI — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
+          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
+          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
+          ><a href="vertex-ai.html" class="active">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html">WebSocket APIs</a
+          ><a href="record-replay.html">Record &amp; Replay</a
+          ><a href="metrics.html">Prometheus Metrics</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a
+          ><a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>Vertex AI</h1>
+        <p class="lead">
+          llmock supports Google Vertex AI endpoints using the same Gemini wire format with a
+          different URL routing pattern. Vertex AI requests are handled by the same Gemini handler
+          internally.
+        </p>
+
+        <h2>Endpoints</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Path</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>POST</td>
+              <td>
+                /v1/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent
+              </td>
+              <td>Non-streaming content generation</td>
+            </tr>
+            <tr>
+              <td>POST</td>
+              <td>
+                /v1/projects/{project}/locations/{location}/publishers/google/models/{model}:streamGenerateContent
+              </td>
+              <td>Streaming content generation (SSE)</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>URL Pattern Difference</h2>
+        <p>
+          The key difference between consumer Gemini and Vertex AI is the URL routing. Consumer
+          Gemini uses:
+        </p>
+        <pre><code>/v1beta/models/{model}:generateContent</code></pre>
+        <p>While Vertex AI uses the fully qualified GCP resource path:</p>
+        <pre><code>/v1/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent</code></pre>
+        <p>llmock matches Vertex AI paths using this regex pattern:</p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            Vertex AI route matching <span class="lang-tag">ts</span>
+          </div>
+          <pre><code><span class="kw">const</span> <span class="op">VERTEX_AI_RE</span> =
+  <span class="str">/^\/v1\/projects\/[^/]+\/locations\/[^/]+\/publishers\/google\/models\/([^/:]+):(generateContent|streamGenerateContent)$/</span>;</code></pre>
+        </div>
+
+        <h2>Quick Start</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            vertex-ai-quick-start.ts <span class="lang-tag">ts</span>
+          </div>
+          <pre><code><span class="kw">import</span> { <span class="op">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
+
+<span class="kw">const</span> <span class="op">mock</span> = <span class="kw">new</span> <span class="fn">LLMock</span>();
+<span class="op">mock</span>.<span class="fn">onMessage</span>(<span class="str">"hello"</span>, { <span class="prop">content</span>: <span class="str">"Hi from Vertex AI!"</span> });
+<span class="kw">await</span> <span class="op">mock</span>.<span class="fn">start</span>();
+
+<span class="cmt">// Vertex AI SDK configuration</span>
+<span class="kw">const</span> <span class="op">res</span> = <span class="kw">await</span> <span class="fn">fetch</span>(
+  <span class="str">`${mock.url}/v1/projects/my-project/locations/us-central1/publishers/google/models/gemini-pro:generateContent`</span>,
+  {
+    <span class="prop">method</span>: <span class="str">"POST"</span>,
+    <span class="prop">headers</span>: { <span class="str">"Content-Type"</span>: <span class="str">"application/json"</span> },
+    <span class="prop">body</span>: <span class="fn">JSON.stringify</span>({
+      <span class="prop">contents</span>: [
+        { <span class="prop">role</span>: <span class="str">"user"</span>, <span class="prop">parts</span>: [{ <span class="prop">text</span>: <span class="str">"hello"</span> }] },
+      ],
+    }),
+  },
+);</code></pre>
+        </div>
+
+        <h2>Same Wire Format as Gemini</h2>
+        <p>
+          Vertex AI uses the exact same request and response wire format as the consumer Gemini API.
+          The request body uses <code>contents</code> with <code>parts</code>, and responses use
+          <code>candidates</code> with <code>content.parts</code>. See the
+          <a href="gemini.html">Gemini documentation</a> for full details on the wire format,
+          streaming events, and fixture examples.
+        </p>
+        <p>
+          Internally, both consumer Gemini and Vertex AI routes are handled by the same
+          <code>handleGemini()</code> function. The only difference is the provider key used for
+          recording and metrics: consumer Gemini uses <code>"gemini"</code> while Vertex AI uses
+          <code>"vertexai"</code>.
+        </p>
+
+        <h2>SDK Configuration</h2>
+        <p>
+          To use llmock with the Vertex AI SDK, point the SDK's API endpoint to your llmock
+          instance. The project, location, and model segments in the URL are matched but can be any
+          value &mdash; llmock extracts the model name for fixture matching.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">Vertex AI SDK setup <span class="lang-tag">ts</span></div>
+          <pre><code><span class="kw">import</span> { <span class="op">VertexAI</span> } <span class="kw">from</span> <span class="str">"@google-cloud/vertexai"</span>;
+
+<span class="kw">const</span> <span class="op">vertexAI</span> = <span class="kw">new</span> <span class="fn">VertexAI</span>({
+  <span class="prop">project</span>: <span class="str">"my-project"</span>,
+  <span class="prop">location</span>: <span class="str">"us-central1"</span>,
+  <span class="prop">apiEndpoint</span>: <span class="str">"localhost:PORT"</span>, <span class="cmt">// llmock URL</span>
+});
+
+<span class="kw">const</span> <span class="op">model</span> = <span class="op">vertexAI</span>.<span class="fn">getGenerativeModel</span>({
+  <span class="prop">model</span>: <span class="str">"gemini-pro"</span>,
+});</code></pre>
+        </div>
+
+        <h2>Fixture Examples</h2>
+        <p>
+          Fixtures for Vertex AI are identical to Gemini fixtures. The same
+          <code>match</code>/<code>response</code> format works for both:
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            vertex-ai-fixtures.json <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  "fixtures": [
+    {
+      "match": { "userMessage": "hello" },
+      "response": { "content": "Hi from Vertex AI!" }
+    },
+    {
+      "match": { "userMessage": "analyze" },
+      "response": {
+        "toolCalls": [
+          {
+            "name": "analyze_data",
+            "arguments": "{\"dataset\":\"sales_q4\"}"
+          }
+        ]
+      }
+    }
+  ]
+}</code></pre>
+        </div>
+
+        <h2>Metrics Path Normalization</h2>
+        <p>
+          Vertex AI paths are normalized for Prometheus metric labels. The dynamic segments
+          (project, location, model) are replaced with placeholders:
+        </p>
+        <pre><code>/v1/projects/{p}/locations/{l}/publishers/google/models/{m}:generateContent</code></pre>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/docs/websocket.html b/docs/websocket.html
new file mode 100644
index 0000000..6a5909a
--- /dev/null
+++ b/docs/websocket.html
@@ -0,0 +1,288 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>WebSocket APIs — llmock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+    <div class="docs-layout">
+      <aside class="sidebar">
+        <div class="sidebar-section">
+          <h3>Getting Started</h3>
+          <a href="docs.html">Overview</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Providers</h3>
+          <a href="chat-completions.html">Chat Completions (OpenAI)</a
+          ><a href="responses-api.html">Responses API (OpenAI)</a
+          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
+          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
+          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
+          ><a href="vertex-ai.html">Vertex AI</a
+          ><a href="compatible-providers.html">Compatible Providers</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Features</h3>
+          <a href="embeddings.html">Embeddings</a
+          ><a href="structured-output.html">Structured Output</a
+          ><a href="sequential-responses.html">Sequential Responses</a
+          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
+          ><a href="chaos-testing.html">Chaos Testing</a
+          ><a href="streaming-physics.html">Streaming Physics</a
+          ><a href="websocket.html" class="active">WebSocket APIs</a>
+          <a href="record-replay.html">Record &amp; Replay</a>
+          <a href="metrics.html">Prometheus Metrics</a>
+        </div>
+        <div class="sidebar-section">
+          <h3>Deployment</h3>
+          <a href="docker.html">Docker &amp; Helm</a
+          ><a href="drift-detection.html">Drift Detection</a>
+        </div>
+      </aside>
+
+      <main class="docs-content">
+        <h1>WebSocket APIs</h1>
+        <p class="lead">
+          llmock implements three WebSocket APIs with zero dependencies &mdash; real RFC 6455
+          framing built from scratch. The same fixtures drive HTTP and WebSocket transports.
+        </p>
+
+        <h2>Endpoints</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Path</th>
+              <th>API</th>
+              <th>Protocol</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>/v1/responses</td>
+              <td>OpenAI Responses API</td>
+              <td>WebSocket JSON messages</td>
+            </tr>
+            <tr>
+              <td>/v1/realtime</td>
+              <td>OpenAI Realtime API</td>
+              <td>WebSocket JSON messages</td>
+            </tr>
+            <tr>
+              <td>/ws/google.ai.generativelanguage.*</td>
+              <td>Gemini Live</td>
+              <td>WebSocket JSON messages</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>OpenAI Responses (WebSocket)</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">ws-responses.test.ts <span class="lang-tag">ts</span></div>
+          <pre><code><span class="kw">const</span> <span class="op">instance</span> = <span class="kw">await</span> <span class="fn">createServer</span>([
+  { <span class="prop">match</span>: { <span class="prop">userMessage</span>: <span class="str">"hello"</span> }, <span class="prop">response</span>: { <span class="prop">content</span>: <span class="str">"Hi there!"</span> } }
+]);
+
+<span class="kw">const</span> <span class="op">ws</span> = <span class="kw">await</span> <span class="fn">connectWebSocket</span>(<span class="op">instance</span>.<span class="prop">url</span>, <span class="str">"/v1/responses"</span>);
+
+<span class="cm">// Send a response.create message</span>
+<span class="op">ws</span>.<span class="fn">send</span>(<span class="type">JSON</span>.<span class="fn">stringify</span>({
+  <span class="prop">type</span>: <span class="str">"response.create"</span>,
+  <span class="prop">model</span>: <span class="str">"gpt-4"</span>,
+  <span class="prop">input</span>: [{ <span class="prop">role</span>: <span class="str">"user"</span>, <span class="prop">content</span>: <span class="str">"hello"</span> }],
+}));
+
+<span class="kw">const</span> <span class="op">messages</span> = <span class="kw">await</span> <span class="op">ws</span>.<span class="fn">waitForMessages</span>(<span class="num">9</span>);
+<span class="kw">const</span> <span class="op">events</span> = <span class="op">messages</span>.<span class="fn">map</span>(<span class="op">m</span> <span class="kw">=&gt;</span> <span class="type">JSON</span>.<span class="fn">parse</span>(<span class="op">m</span>));
+<span class="kw">const</span> <span class="op">types</span> = <span class="op">events</span>.<span class="fn">map</span>(<span class="op">e</span> <span class="kw">=&gt;</span> <span class="op">e</span>.<span class="prop">type</span>);
+
+<span class="fn">expect</span>(<span class="op">types</span>[<span class="num">0</span>]).<span class="fn">toBe</span>(<span class="str">"response.created"</span>);
+<span class="fn">expect</span>(<span class="op">types</span>).<span class="fn">toContain</span>(<span class="str">"response.output_text.delta"</span>);
+<span class="fn">expect</span>(<span class="op">types</span>).<span class="fn">toContain</span>(<span class="str">"response.completed"</span>);</code></pre>
+        </div>
+
+        <h2>OpenAI Realtime</h2>
+        <p>The Realtime API uses a conversational protocol with session management.</p>
+
+        <div class="code-block">
+          <div class="code-block-header">ws-realtime.test.ts <span class="lang-tag">ts</span></div>
+          <pre><code><span class="kw">const</span> <span class="op">ws</span> = <span class="kw">await</span> <span class="fn">connectWebSocket</span>(<span class="op">instance</span>.<span class="prop">url</span>, <span class="str">"/v1/realtime"</span>);
+
+<span class="cm">// Server sends session.created on connect</span>
+<span class="kw">const</span> [<span class="op">sessionMsg</span>] = <span class="kw">await</span> <span class="op">ws</span>.<span class="fn">waitForMessages</span>(<span class="num">1</span>);
+<span class="fn">expect</span>(<span class="type">JSON</span>.<span class="fn">parse</span>(<span class="op">sessionMsg</span>).<span class="prop">type</span>).<span class="fn">toBe</span>(<span class="str">"session.created"</span>);
+
+<span class="cm">// Configure session</span>
+<span class="op">ws</span>.<span class="fn">send</span>(<span class="type">JSON</span>.<span class="fn">stringify</span>({
+  <span class="prop">type</span>: <span class="str">"session.update"</span>,
+  <span class="prop">session</span>: { <span class="prop">modalities</span>: [<span class="str">"text"</span>] }
+}));
+
+<span class="cm">// Add a user message</span>
+<span class="op">ws</span>.<span class="fn">send</span>(<span class="type">JSON</span>.<span class="fn">stringify</span>({
+  <span class="prop">type</span>: <span class="str">"conversation.item.create"</span>,
+  <span class="prop">item</span>: {
+    <span class="prop">type</span>: <span class="str">"message"</span>,
+    <span class="prop">role</span>: <span class="str">"user"</span>,
+    <span class="prop">content</span>: [{ <span class="prop">type</span>: <span class="str">"input_text"</span>, <span class="prop">text</span>: <span class="str">"hello"</span> }]
+  }
+}));
+
+<span class="cm">// Request a response</span>
+<span class="op">ws</span>.<span class="fn">send</span>(<span class="type">JSON</span>.<span class="fn">stringify</span>({ <span class="prop">type</span>: <span class="str">"response.create"</span> }));
+
+<span class="cm">// Wait for response events</span>
+<span class="kw">const</span> <span class="op">msgs</span> = <span class="kw">await</span> <span class="op">ws</span>.<span class="fn">waitForMessages</span>(<span class="num">8</span>);
+<span class="kw">const</span> <span class="op">events</span> = <span class="op">msgs</span>.<span class="fn">map</span>(<span class="op">m</span> <span class="kw">=&gt;</span> <span class="type">JSON</span>.<span class="fn">parse</span>(<span class="op">m</span>));
+<span class="fn">expect</span>(<span class="op">events</span>.<span class="fn">some</span>(<span class="op">e</span> <span class="kw">=&gt;</span> <span class="op">e</span>.<span class="prop">type</span> === <span class="str">"response.text.delta"</span>)).<span class="fn">toBe</span>(<span class="kw">true</span>);</code></pre>
+        </div>
+
+        <h2>Gemini Live</h2>
+        <p>Bidirectional streaming for Google Gemini Live API.</p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            ws-gemini-live.test.ts <span class="lang-tag">ts</span>
+          </div>
+          <pre><code><span class="kw">const</span> <span class="op">ws</span> = <span class="kw">await</span> <span class="fn">connectWebSocket</span>(
+  <span class="op">instance</span>.<span class="prop">url</span>,
+  <span class="str">"/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent"</span>
+);
+
+<span class="cm">// Send setup message</span>
+<span class="op">ws</span>.<span class="fn">send</span>(<span class="type">JSON</span>.<span class="fn">stringify</span>({
+  <span class="prop">setup</span>: { <span class="prop">model</span>: <span class="str">"models/gemini-2.0-flash-live"</span> }
+}));
+
+<span class="cm">// Send client content</span>
+<span class="op">ws</span>.<span class="fn">send</span>(<span class="type">JSON</span>.<span class="fn">stringify</span>({
+  <span class="prop">clientContent</span>: {
+    <span class="prop">turns</span>: [{ <span class="prop">role</span>: <span class="str">"user"</span>, <span class="prop">parts</span>: [{ <span class="prop">text</span>: <span class="str">"hello"</span> }] }],
+    <span class="prop">turnComplete</span>: <span class="kw">true</span>,
+  }
+}));</code></pre>
+        </div>
+
+        <h2>Implementation Details</h2>
+        <ul>
+          <li>Built on raw RFC 6455 WebSocket framing &mdash; zero external dependencies</li>
+          <li>Text messages only (no binary/audio/video)</li>
+          <li>Same fixture matching as HTTP endpoints</li>
+          <li>All WebSocket connections are logged in the journal</li>
+        </ul>
+
+        <div class="info-box">
+          <p>
+            Gemini Live text support is unverified &mdash; no text-capable Gemini Live model existed
+            at time of implementation. The WebSocket framing and protocol messages follow the
+            published API spec.
+          </p>
+        </div>
+
+        <h2>Provider WebSocket Support</h2>
+        <p>Not all LLM providers offer WebSocket APIs. Here's the current landscape:</p>
+
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Provider</th>
+              <th>WebSocket API</th>
+              <th>llmock Status</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>OpenAI Realtime</td>
+              <td>wss://api.openai.com/v1/realtime</td>
+              <td class="yes">Supported &#10003;</td>
+            </tr>
+            <tr>
+              <td>OpenAI Responses</td>
+              <td>wss://api.openai.com/v1/responses</td>
+              <td class="yes">Supported &#10003;</td>
+            </tr>
+            <tr>
+              <td>Gemini Live</td>
+              <td>wss://...BidiGenerateContent</td>
+              <td class="manual">Implemented, awaiting text model</td>
+            </tr>
+            <tr>
+              <td>Anthropic Claude</td>
+              <td>None</td>
+              <td>N/A</td>
+            </tr>
+            <tr>
+              <td>Azure OpenAI</td>
+              <td>Uses OpenAI Realtime</td>
+              <td>Covered by OpenAI</td>
+            </tr>
+            <tr>
+              <td>Mistral / Groq / Cohere</td>
+              <td>None</td>
+              <td>N/A</td>
+            </tr>
+            <tr>
+              <td>AWS Bedrock</td>
+              <td>EventStream (not WebSocket)</td>
+              <td>N/A</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <p>
+          llmock includes <strong>drift canary tests</strong> that automatically detect when
+          providers add new WebSocket capabilities. When a canary fires, it signals that llmock
+          should be updated to support the new API.
+        </p>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+  </body>
+</html>
diff --git a/eslint.config.mjs b/eslint.config.mjs
index bb7e36a..f099e62 100644
--- a/eslint.config.mjs
+++ b/eslint.config.mjs
@@ -5,7 +5,7 @@ export default tseslint.config(
   eslint.configs.recommended,
   ...tseslint.configs.recommended,
   prettier,
-  { ignores: ["dist/", "node_modules/", "fixtures/"] },
+  { ignores: ["dist/", "node_modules/", "fixtures/", ".worktrees/"] },
   {
     files: ["*.config.{js,mjs,ts,cjs}"],
     languageOptions: { globals: { module: "readonly", require: "readonly" } },
diff --git a/package.json b/package.json
index ff83c62..24b47aa 100644
--- a/package.json
+++ b/package.json
@@ -1,9 +1,12 @@
 {
-  "name": "@copilotkit/mock-openai",
-  "version": "0.1.0",
-  "description": "Deterministic mock OpenAI server for testing",
+  "name": "@copilotkit/llmock",
+  "version": "1.6.0",
+  "description": "Deterministic mock LLM server for testing (OpenAI, Anthropic, Gemini)",
   "license": "MIT",
   "packageManager": "pnpm@10.28.2",
+  "engines": {
+    "node": ">=20.15.0"
+  },
   "type": "module",
   "exports": {
     ".": {
@@ -21,11 +24,13 @@
   "module": "./dist/index.js",
   "types": "./dist/index.d.ts",
   "bin": {
-    "mock-openai": "./dist/cli.js"
+    "llmock": "./dist/cli.js"
   },
   "files": [
     "dist",
-    "fixtures"
+    "fixtures",
+    ".claude-plugin",
+    "skills"
   ],
   "publishConfig": {
     "access": "public"
@@ -33,24 +38,36 @@
   "scripts": {
     "build": "tsdown",
     "test": "vitest run",
+    "test:drift": "vitest run --config vitest.config.drift.ts",
     "test:exports": "publint && attw --pack .",
     "lint": "eslint .",
     "format:check": "prettier --check .",
-    "release": "pnpm build && changeset publish"
+    "release": "pnpm build && npm publish",
+    "prepare": "husky || true"
+  },
+  "lint-staged": {
+    "*.{ts,mts,js,mjs,cjs,json,html,css,md}": "prettier --write",
+    "*.{ts,mts,js,mjs}": "eslint --fix"
   },
   "devDependencies": {
     "@arethetypeswrong/cli": "^0.17.3",
-    "@changesets/cli": "^2.29.4",
     "@commitlint/cli": "^19.8.1",
     "@commitlint/config-conventional": "^19.8.0",
     "@eslint/js": "^9.30.0",
     "eslint": "^9.30.0",
     "eslint-config-prettier": "^10.1.5",
+    "husky": "^9.1.7",
+    "lint-staged": "^16.3.2",
     "prettier": "^3.6.2",
     "publint": "^0.3.12",
     "tsdown": "^0.12.5",
     "typescript": "^5.8.3",
     "typescript-eslint": "^8.35.1",
+    "@anthropic-ai/sdk": "^0.78.0",
+    "@google/generative-ai": "^0.24.0",
+    "@types/node": "^22.0.0",
+    "openai": "^4.0.0",
+    "tsx": "^4.19.0",
     "vitest": "^3.2.1"
   }
 }
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 894ff18..fa16176 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -8,27 +8,42 @@ importers:
 
   .:
     devDependencies:
+      '@anthropic-ai/sdk':
+        specifier: ^0.78.0
+        version: 0.78.0
       '@arethetypeswrong/cli':
         specifier: ^0.17.3
         version: 0.17.4
-      '@changesets/cli':
-        specifier: ^2.29.4
-        version: 2.29.8(@types/node@25.3.3)
       '@commitlint/cli':
         specifier: ^19.8.1
-        version: 19.8.1(@types/node@25.3.3)(typescript@5.9.3)
+        version: 19.8.1(@types/node@22.19.15)(typescript@5.9.3)
       '@commitlint/config-conventional':
         specifier: ^19.8.0
         version: 19.8.1
       '@eslint/js':
         specifier: ^9.30.0
         version: 9.39.3
+      '@google/generative-ai':
+        specifier: ^0.24.0
+        version: 0.24.1
+      '@types/node':
+        specifier: ^22.0.0
+        version: 22.19.15
       eslint:
         specifier: ^9.30.0
         version: 9.39.3(jiti@2.6.1)
       eslint-config-prettier:
         specifier: ^10.1.5
         version: 10.1.8(eslint@9.39.3(jiti@2.6.1))
+      husky:
+        specifier: ^9.1.7
+        version: 9.1.7
+      lint-staged:
+        specifier: ^16.3.2
+        version: 16.3.2
+      openai:
+        specifier: ^4.0.0
+        version: 4.104.0
       prettier:
         specifier: ^3.6.2
         version: 3.8.1
@@ -38,6 +53,9 @@ importers:
       tsdown:
         specifier: ^0.12.5
         version: 0.12.9(publint@0.3.18)(typescript@5.9.3)
+      tsx:
+        specifier: ^4.19.0
+        version: 4.21.0
       typescript:
         specifier: ^5.8.3
         version: 5.9.3
@@ -46,13 +64,22 @@ importers:
         version: 8.56.1(eslint@9.39.3(jiti@2.6.1))(typescript@5.9.3)
       vitest:
         specifier: ^3.2.1
-        version: 3.2.4(@types/node@25.3.3)(jiti@2.6.1)
+        version: 3.2.4(@types/node@22.19.15)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2)
 
 packages:
 
   '@andrewbranch/untar.js@1.0.3':
     resolution: {integrity: sha512-Jh15/qVmrLGhkKJBdXlK1+9tY4lZruYjsgkDFj08ZmDiWVBLJcqkok7Z0/R0In+i1rScBpJlSvrTS2Lm41Pbnw==}
 
+  '@anthropic-ai/sdk@0.78.0':
+    resolution: {integrity: sha512-PzQhR715td/m1UaaN5hHXjYB8Gl2lF9UVhrrGrZeysiF6Rb74Wc9GCB8hzLdzmQtBd1qe89F9OptgB9Za1Ib5w==}
+    hasBin: true
+    peerDependencies:
+      zod: ^3.25.0 || ^4.0.0
+    peerDependenciesMeta:
+      zod:
+        optional: true
+
   '@arethetypeswrong/cli@0.17.4':
     resolution: {integrity: sha512-AeiKxtf67XD/NdOqXgBOE5TZWH3EOCt+0GkbUpekOzngc+Q/cRZ5azjWyMxISxxfp0EItgm5NoSld9p7BAA5xQ==}
     engines: {node: '>=18'}
@@ -94,61 +121,6 @@ packages:
   '@braidai/lang@1.1.2':
     resolution: {integrity: sha512-qBcknbBufNHlui137Hft8xauQMTZDKdophmLFv05r2eNmdIv/MlPuP4TdUknHG68UdWLgVZwgxVe735HzJNIwA==}
 
-  '@changesets/apply-release-plan@7.0.14':
-    resolution: {integrity: sha512-ddBvf9PHdy2YY0OUiEl3TV78mH9sckndJR14QAt87KLEbIov81XO0q0QAmvooBxXlqRRP8I9B7XOzZwQG7JkWA==}
-
-  '@changesets/assemble-release-plan@6.0.9':
-    resolution: {integrity: sha512-tPgeeqCHIwNo8sypKlS3gOPmsS3wP0zHt67JDuL20P4QcXiw/O4Hl7oXiuLnP9yg+rXLQ2sScdV1Kkzde61iSQ==}
-
-  '@changesets/changelog-git@0.2.1':
-    resolution: {integrity: sha512-x/xEleCFLH28c3bQeQIyeZf8lFXyDFVn1SgcBiR2Tw/r4IAWlk1fzxCEZ6NxQAjF2Nwtczoen3OA2qR+UawQ8Q==}
-
-  '@changesets/cli@2.29.8':
-    resolution: {integrity: sha512-1weuGZpP63YWUYjay/E84qqwcnt5yJMM0tep10Up7Q5cS/DGe2IZ0Uj3HNMxGhCINZuR7aO9WBMdKnPit5ZDPA==}
-    hasBin: true
-
-  '@changesets/config@3.1.2':
-    resolution: {integrity: sha512-CYiRhA4bWKemdYi/uwImjPxqWNpqGPNbEBdX1BdONALFIDK7MCUj6FPkzD+z9gJcvDFUQJn9aDVf4UG7OT6Kog==}
-
-  '@changesets/errors@0.2.0':
-    resolution: {integrity: sha512-6BLOQUscTpZeGljvyQXlWOItQyU71kCdGz7Pi8H8zdw6BI0g3m43iL4xKUVPWtG+qrrL9DTjpdn8eYuCQSRpow==}
-
-  '@changesets/get-dependents-graph@2.1.3':
-    resolution: {integrity: sha512-gphr+v0mv2I3Oxt19VdWRRUxq3sseyUpX9DaHpTUmLj92Y10AGy+XOtV+kbM6L/fDcpx7/ISDFK6T8A/P3lOdQ==}
-
-  '@changesets/get-release-plan@4.0.14':
-    resolution: {integrity: sha512-yjZMHpUHgl4Xl5gRlolVuxDkm4HgSJqT93Ri1Uz8kGrQb+5iJ8dkXJ20M2j/Y4iV5QzS2c5SeTxVSKX+2eMI0g==}
-
-  '@changesets/get-version-range-type@0.4.0':
-    resolution: {integrity: sha512-hwawtob9DryoGTpixy1D3ZXbGgJu1Rhr+ySH2PvTLHvkZuQ7sRT4oQwMh0hbqZH1weAooedEjRsbrWcGLCeyVQ==}
-
-  '@changesets/git@3.0.4':
-    resolution: {integrity: sha512-BXANzRFkX+XcC1q/d27NKvlJ1yf7PSAgi8JG6dt8EfbHFHi4neau7mufcSca5zRhwOL8j9s6EqsxmT+s+/E6Sw==}
-
-  '@changesets/logger@0.1.1':
-    resolution: {integrity: sha512-OQtR36ZlnuTxKqoW4Sv6x5YIhOmClRd5pWsjZsddYxpWs517R0HkyiefQPIytCVh4ZcC5x9XaG8KTdd5iRQUfg==}
-
-  '@changesets/parse@0.4.2':
-    resolution: {integrity: sha512-Uo5MC5mfg4OM0jU3up66fmSn6/NE9INK+8/Vn/7sMVcdWg46zfbvvUSjD9EMonVqPi9fbrJH9SXHn48Tr1f2yA==}
-
-  '@changesets/pre@2.0.2':
-    resolution: {integrity: sha512-HaL/gEyFVvkf9KFg6484wR9s0qjAXlZ8qWPDkTyKF6+zqjBe/I2mygg3MbpZ++hdi0ToqNUF8cjj7fBy0dg8Ug==}
-
-  '@changesets/read@0.6.6':
-    resolution: {integrity: sha512-P5QaN9hJSQQKJShzzpBT13FzOSPyHbqdoIBUd2DJdgvnECCyO6LmAOWSV+O8se2TaZJVwSXjL+v9yhb+a9JeJg==}
-
-  '@changesets/should-skip-package@0.1.2':
-    resolution: {integrity: sha512-qAK/WrqWLNCP22UDdBTMPH5f41elVDlsNyat180A33dWxuUDyNpg6fPi/FyTZwRriVjg0L8gnjJn2F9XAoF0qw==}
-
-  '@changesets/types@4.1.0':
-    resolution: {integrity: sha512-LDQvVDv5Kb50ny2s25Fhm3d9QSZimsoUGBsUioj6MC3qbMUCuC8GPIvk/M6IvXx3lYhAs0lwWUQLb+VIEUCECw==}
-
-  '@changesets/types@6.1.0':
-    resolution: {integrity: sha512-rKQcJ+o1nKNgeoYRHKOS07tAMNd3YSN0uHaJOZYjBAgxfV7TUE7JE+z4BzZdQwb5hKaYbayKN5KrYV7ODb2rAA==}
-
-  '@changesets/write@0.4.0':
-    resolution: {integrity: sha512-CdTLvIOPiCNuH71pyDu3rA+Q0n65cmAbXnwWH84rKGiFumFzkmHNT8KHTMEchcxN+Kl8I54xGUhJ7l3E7X396Q==}
-
   '@colors/colors@1.5.0':
     resolution: {integrity: sha512-ooWCrlZP11i8GImSjTHYHLkvFDP48nS4+204nGb1RiX/WXYHmJA2III9/e2DWVabCESdW7hBAEzHRqUn9OUVvQ==}
     engines: {node: '>=0.1.90'}
@@ -425,6 +397,10 @@ packages:
     resolution: {integrity: sha512-43/qtrDUokr7LJqoF2c3+RInu/t4zfrpYdoSDfYyhg52rwLV6TnOvdG4fXm7IkSB3wErkcmJS9iEhjVtOSEjjA==}
     engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0}
 
+  '@google/generative-ai@0.24.1':
+    resolution: {integrity: sha512-MqO+MLfM6kjxcKoy0p1wRzG3b4ZZXtPI+z2IE26UogS2Cm/XHO+7gGRBh6gcJsOiIVoH93UwKvW4HdgiOZCy9Q==}
+    engines: {node: '>=18.0.0'}
+
   '@humanfs/core@0.19.1':
     resolution: {integrity: sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA==}
     engines: {node: '>=18.18.0'}
@@ -441,15 +417,6 @@ packages:
     resolution: {integrity: sha512-bV0Tgo9K4hfPCek+aMAn81RppFKv2ySDQeMoSZuvTASywNTnVJCArCZE2FWqpvIatKu7VMRLWlR1EazvVhDyhQ==}
     engines: {node: '>=18.18'}
 
-  '@inquirer/external-editor@1.0.3':
-    resolution: {integrity: sha512-RWbSrDiYmO4LbejWY7ttpxczuwQyZLBUyygsA9Nsv95hpzUWwnNTVQmAq3xuh7vNwCp07UTmE5i11XAEExx4RA==}
-    engines: {node: '>=18'}
-    peerDependencies:
-      '@types/node': '>=18'
-    peerDependenciesMeta:
-      '@types/node':
-        optional: true
-
   '@jridgewell/gen-mapping@0.3.13':
     resolution: {integrity: sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA==}
 
@@ -466,27 +433,9 @@ packages:
   '@loaderkit/resolve@1.0.4':
     resolution: {integrity: sha512-rJzYKVcV4dxJv+vW6jlvagF8zvGxHJ2+HTr1e2qOejfmGhAApgJHl8Aog4mMszxceTRiKTTbnpgmTO1bEZHV/A==}
 
-  '@manypkg/find-root@1.1.0':
-    resolution: {integrity: sha512-mki5uBvhHzO8kYYix/WRy2WX8S3B5wdVSc9D6KcU5lQNglP2yt58/VfLuAK49glRXChosY8ap2oJ1qgma3GUVA==}
-
-  '@manypkg/get-packages@1.1.3':
-    resolution: {integrity: sha512-fo+QhuU3qE/2TQMQmbVMqaQ6EWbMhi4ABWP+O4AM1NqPBuy0OrApV5LO6BrrgnhtAHS2NH6RrVk9OL181tTi8A==}
-
   '@napi-rs/wasm-runtime@1.1.1':
     resolution: {integrity: sha512-p64ah1M1ld8xjWv3qbvFwHiFVWrq1yFvV4f7w+mzaqiR4IlSgkqhcRdHwsGgomwzBH51sRY4NEowLxnaBjcW/A==}
 
-  '@nodelib/fs.scandir@2.1.5':
-    resolution: {integrity: sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==}
-    engines: {node: '>= 8'}
-
-  '@nodelib/fs.stat@2.0.5':
-    resolution: {integrity: sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==}
-    engines: {node: '>= 8'}
-
-  '@nodelib/fs.walk@1.2.8':
-    resolution: {integrity: sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==}
-    engines: {node: '>= 8'}
-
   '@oxc-project/types@0.115.0':
     resolution: {integrity: sha512-4n91DKnebUS4yjUHl2g3/b2T+IUdCfmoZGhmwsovZCDaJSs+QkVAM+0AqqTxHSsHfeiMuueT75cZaZcT/m0pSw==}
 
@@ -741,11 +690,14 @@ packages:
   '@types/json-schema@7.0.15':
     resolution: {integrity: sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==}
 
-  '@types/node@12.20.55':
-    resolution: {integrity: sha512-J8xLz7q2OFulZ2cyGTLE1TbbZcjpno7FaN6zdJNrgAdrJ+DZzh/uFR6YrTb4C+nXakvud8Q4+rbhoIWlYQbUFQ==}
+  '@types/node-fetch@2.6.13':
+    resolution: {integrity: sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==}
 
-  '@types/node@25.3.3':
-    resolution: {integrity: sha512-DpzbrH7wIcBaJibpKo9nnSQL0MTRdnWttGyE5haGwK86xgMOkFLp7vEyfQPGLOJh5wNYiJ3V9PmUMDhV9u8kkQ==}
+  '@types/node@18.19.130':
+    resolution: {integrity: sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg==}
+
+  '@types/node@22.19.15':
+    resolution: {integrity: sha512-F0R/h2+dsy5wJAUe3tAU6oqa2qbWY5TpNfL/RGmo1y38hiyO1w3x2jPtt76wmuaJI4DQnOBu21cNXQ2STIUUWg==}
 
   '@typescript-eslint/eslint-plugin@8.56.1':
     resolution: {integrity: sha512-Jz9ZztpB37dNC+HU2HI28Bs9QXpzCz+y/twHOwhyrIRdbuVDxSytJNDl6z/aAKlaRIwC7y8wJdkBv7FxYGgi0A==}
@@ -839,6 +791,10 @@ packages:
     resolution: {integrity: sha512-E+iruNOY8VV9s4JEbe1aNEm6MiszPRr/UfcHMz0TQh1BXSxHK+ASV1R6W4HpjBhSeS+54PIsAMCBmwD06LLsqQ==}
     hasBin: true
 
+  abort-controller@3.0.0:
+    resolution: {integrity: sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==}
+    engines: {node: '>=6.5'}
+
   acorn-jsx@5.3.2:
     resolution: {integrity: sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==}
     peerDependencies:
@@ -849,16 +805,16 @@ packages:
     engines: {node: '>=0.4.0'}
     hasBin: true
 
+  agentkeepalive@4.6.0:
+    resolution: {integrity: sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==}
+    engines: {node: '>= 8.0.0'}
+
   ajv@6.14.0:
     resolution: {integrity: sha512-IWrosm/yrn43eiKqkfkHis7QioDleaXQHdDVPKg0FSwwd/DuvyX79TZnFOnYpB7dcsFAMmtFztZuXPDvSePkFw==}
 
   ajv@8.18.0:
     resolution: {integrity: sha512-PlXPeEWMXMZ7sPYOHqmDyCJzcfNrUr3fGNKtezX14ykXOEIvyK81d+qydx89KY5O71FKMPaQ2vBfBFI5NHR63A==}
 
-  ansi-colors@4.1.3:
-    resolution: {integrity: sha512-/6w/C21Pm1A7aZitlI5Ni/2J6FFQN8i1Cvz3kHABAAbw93v/NlvKdVOqz7CCWz/3iv/JplRSEEZ83XION15ovw==}
-    engines: {node: '>=6'}
-
   ansi-escapes@7.3.0:
     resolution: {integrity: sha512-BvU8nYgGQBxcmMuEeUEmNTvrMVjJNSH7RgW24vXexN4Ven6qCvy4TntnvlnwnMLTVlcRQQdbRY8NKnaIoeWDNg==}
     engines: {node: '>=18'}
@@ -875,6 +831,10 @@ packages:
     resolution: {integrity: sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==}
     engines: {node: '>=8'}
 
+  ansi-styles@6.2.3:
+    resolution: {integrity: sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg==}
+    engines: {node: '>=12'}
+
   ansis@4.2.0:
     resolution: {integrity: sha512-HqZ5rWlFjGiV0tDm3UxxgNRqsOTniqoKZu0pIAfh7TZQMGuZK+hH0drySty0si0QXj1ieop4+SkSfPZBPPkHig==}
     engines: {node: '>=14'}
@@ -882,19 +842,12 @@ packages:
   any-promise@1.3.0:
     resolution: {integrity: sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==}
 
-  argparse@1.0.10:
-    resolution: {integrity: sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==}
-
   argparse@2.0.1:
     resolution: {integrity: sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==}
 
   array-ify@1.0.0:
     resolution: {integrity: sha512-c5AMf34bKdvPhQ7tBGhqkgKNUzMr4WUs+WDtC2ZUGOUncbxKMTvqxYctiseW3+L4bA8ec+GcZ6/A/FW4m8ukng==}
 
-  array-union@2.1.0:
-    resolution: {integrity: sha512-HGyxoOTYUyCM6stUe6EJgnd4EoewAI7zMdfqO+kGjnlZmBDz/cR5pf8r/cR4Wq60sL/p0IkcjUEEPwS3GFrIyw==}
-    engines: {node: '>=8'}
-
   assertion-error@2.0.1:
     resolution: {integrity: sha512-Izi8RQcffqCeNVgFigKli1ssklIbpHnCYc6AknXGYoB6grJqyeby7jv12JUQgmTAnIDnbck1uxksT4dzN3PWBA==}
     engines: {node: '>=12'}
@@ -903,6 +856,9 @@ packages:
     resolution: {integrity: sha512-m1Q/RaVOnTp9JxPX+F+Zn7IcLYMzM8kZofDImfsKZd8MbR+ikdOzTeztStWqfrqIxZnYWryyI9ePm3NGjnZgGw==}
     engines: {node: '>=20.19.0'}
 
+  asynckit@0.4.0:
+    resolution: {integrity: sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==}
+
   balanced-match@1.0.2:
     resolution: {integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==}
 
@@ -910,10 +866,6 @@ packages:
     resolution: {integrity: sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==}
     engines: {node: 18 || 20 || >=22}
 
-  better-path-resolve@1.0.0:
-    resolution: {integrity: sha512-pbnl5XzGBdrFU/wT4jqmJVPn2B6UHPBOhzMQkY/SPUPB6QtUXtmBHBIwCbXJol93mOpGMnQyP/+BB19q04xj7g==}
-    engines: {node: '>=4'}
-
   birpc@2.9.0:
     resolution: {integrity: sha512-KrayHS5pBi69Xi9JmvoqrIgYGDkD6mcSe/i6YKi3w5kekCLzrX4+nawcXqrj2tIp50Kw/mT/s3p+GVK0A0sKxw==}
 
@@ -932,6 +884,10 @@ packages:
     resolution: {integrity: sha512-b6Ilus+c3RrdDk+JhLKUAQfzzgLEPy6wcXqS7f/xe1EETvsDP6GORG7SFuOs6cID5YkqchW/LXZbX5bc8j7ZcQ==}
     engines: {node: '>=8'}
 
+  call-bind-apply-helpers@1.0.2:
+    resolution: {integrity: sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==}
+    engines: {node: '>= 0.4'}
+
   callsites@3.1.0:
     resolution: {integrity: sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==}
     engines: {node: '>=6'}
@@ -952,9 +908,6 @@ packages:
     resolution: {integrity: sha512-kWWXztvZ5SBQV+eRgKFeh8q5sLuZY2+8WUIzlxWVTg+oGwY14qylx1KbKzHd8P6ZYkAg0xyIDU9JMHhyJMZ1jw==}
     engines: {node: '>=10'}
 
-  chardet@2.1.1:
-    resolution: {integrity: sha512-PsezH1rqdV9VvyNhxxOW32/d75r01NY7TQCmOqomRo15ZSOKbpTFVsfjghxo6JloQUCGnH4k1LGu0R4yCLlWQQ==}
-
   check-error@2.1.3:
     resolution: {integrity: sha512-PAJdDJusoxnwm1VwW07VWwUN1sl7smmC3OKggvndJFadxxDRyFJBX/ggnu/KE4kQAB7a3Dp8f/YXC1FlUprWmA==}
     engines: {node: '>= 16'}
@@ -963,13 +916,13 @@ packages:
     resolution: {integrity: sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA==}
     engines: {node: '>= 14.16.0'}
 
-  ci-info@3.9.0:
-    resolution: {integrity: sha512-NIxF55hv4nSqQswkAeiOi1r83xy8JldOFDTWiug55KBu9Jnblncd2U6ViHmYgHf01TPZS77NJBhBMKdWj9HQMQ==}
-    engines: {node: '>=8'}
-
   cjs-module-lexer@1.4.3:
     resolution: {integrity: sha512-9z8TZaGM1pfswYeXrUpzPrkx8UnWYdhJclsiYMm6x/w5+nN+8Tf/LnAgfLGQCm59qAOxU8WwHEq2vNwF6i4j+Q==}
 
+  cli-cursor@5.0.0:
+    resolution: {integrity: sha512-aCj4O5wKyszjMmDT4tZj93kxyydN/K5zPWSCe6/0AV/AA1pqe5ZBIw0a2ZfPQV7lL5/yb5HsUreJ6UFAF1tEQw==}
+    engines: {node: '>=18'}
+
   cli-highlight@2.1.11:
     resolution: {integrity: sha512-9KDcoEVwyUXrjcJNvHD0NFc/hiwe/WPVYIleQh2O1N2Zro5gWJZ/K+3DGn8w8P/F6FxOgzyC5bxDyHIgCSPhGg==}
     engines: {node: '>=8.0.0', npm: '>=5.0.0'}
@@ -979,6 +932,10 @@ packages:
     resolution: {integrity: sha512-+W/5efTR7y5HRD7gACw9yQjqMVvEMLBHmboM/kPWam+H+Hmyrgjh6YncVKK122YZkXrLudzTuAukUw9FnMf7IQ==}
     engines: {node: 10.* || >= 12.*}
 
+  cli-truncate@5.2.0:
+    resolution: {integrity: sha512-xRwvIOMGrfOAnM1JYtqQImuaNtDEv9v6oIYAs4LIHwTiKee8uwvIi363igssOC0O5U04i4AlENs79LQLu9tEMw==}
+    engines: {node: '>=20'}
+
   cliui@7.0.4:
     resolution: {integrity: sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==}
 
@@ -993,10 +950,21 @@ packages:
   color-name@1.1.4:
     resolution: {integrity: sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==}
 
+  colorette@2.0.20:
+    resolution: {integrity: sha512-IfEDxwoWIjkeXL1eXcDiow4UbKjhLdq6/EuSVR9GMN7KVH3r9gQ83e73hsz1Nd1T3ijd5xv1wcWRYO+D6kCI2w==}
+
+  combined-stream@1.0.8:
+    resolution: {integrity: sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==}
+    engines: {node: '>= 0.8'}
+
   commander@10.0.1:
     resolution: {integrity: sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug==}
     engines: {node: '>=14'}
 
+  commander@14.0.3:
+    resolution: {integrity: sha512-H+y0Jo/T1RZ9qPP4Eh1pkcQcLRglraJaSLoyOtHxu6AapkjWVCy2Sit1QQ4x3Dng8qDlSsZEet7g5Pq06MvTgw==}
+    engines: {node: '>=20'}
+
   compare-func@2.0.0:
     resolution: {integrity: sha512-zHig5N+tPWARooBnb0Zx1MFcdfpyJrfTJ3Y5L+IFvUm8rM74hHz66z0gw0x4tijh5CorKkKUCnW82R2vmpeCRA==}
 
@@ -1060,18 +1028,14 @@ packages:
   defu@6.1.4:
     resolution: {integrity: sha512-mEQCMmwJu317oSz8CwdIOdwf3xMif1ttiM8LTufzc3g6kR+9Pe236twL8j3IYT1F7GfRgGcW6MWxzZjLIkuHIg==}
 
-  detect-indent@6.1.0:
-    resolution: {integrity: sha512-reYkTUJAZb9gUuZ2RvVCNhVHdg62RHnJ7WJl8ftMi4diZ6NWlciOzQN88pUhSELEwflJht4oQDv0F0BMlwaYtA==}
-    engines: {node: '>=8'}
+  delayed-stream@1.0.0:
+    resolution: {integrity: sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==}
+    engines: {node: '>=0.4.0'}
 
   diff@8.0.3:
     resolution: {integrity: sha512-qejHi7bcSD4hQAZE0tNAawRK1ZtafHDmMTMkrrIGgSLl7hTnQHmKCeB45xAcbfTqK2zowkM3j3bHt/4b/ARbYQ==}
     engines: {node: '>=0.3.1'}
 
-  dir-glob@3.0.1:
-    resolution: {integrity: sha512-WkrWp9GR4KXfKGYzOLmTuGVi1UWFfws377n9cc55/tb6DuqyF6pcQ5AbiHEshaDpY9v6oaSr2XCDidGmMwdzIA==}
-    engines: {node: '>=8'}
-
   dot-prop@5.3.0:
     resolution: {integrity: sha512-QM8q3zDe58hqUqjraQOmzZ1LIH9SWQJTlEKCH4kJ2oQvLZk7RbQXvtDM2XEq3fwkV9CCvvH4LA0AV+ogFsBM2Q==}
     engines: {node: '>=8'}
@@ -1085,6 +1049,13 @@ packages:
       oxc-resolver:
         optional: true
 
+  dunder-proto@1.0.1:
+    resolution: {integrity: sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==}
+    engines: {node: '>= 0.4'}
+
+  emoji-regex@10.6.0:
+    resolution: {integrity: sha512-toUI84YS5YmxW219erniWD0CIVOo46xGKColeNQRgOzDorgBi1v4D71/OFzgD9GO2UGKIv1C3Sp8DAn0+j5w7A==}
+
   emoji-regex@8.0.0:
     resolution: {integrity: sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==}
 
@@ -1095,10 +1066,6 @@ packages:
     resolution: {integrity: sha512-i6UzDscO/XfAcNYD75CfICkmfLedpyPDdozrLMmQc5ORaQcdMoc21OnlEylMIqI7U8eniKrPMxxtj8k0vhmJhA==}
     engines: {node: '>=14'}
 
-  enquirer@2.4.1:
-    resolution: {integrity: sha512-rRqJg/6gd538VHvR3PSrdRBb/1Vy2YfzHqzvbhGIQpDRKIa4FgV/54b5Q1xYSxOOwKvjXweS26E0Q+nAMwp2pQ==}
-    engines: {node: '>=8.6'}
-
   env-paths@2.2.1:
     resolution: {integrity: sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A==}
     engines: {node: '>=6'}
@@ -1110,9 +1077,25 @@ packages:
   error-ex@1.3.4:
     resolution: {integrity: sha512-sqQamAnR14VgCr1A618A3sGrygcpK+HEbenA/HiEAkkUwcZIIB/tgWqHFxWgOyDh4nB4JCRimh79dR5Ywc9MDQ==}
 
+  es-define-property@1.0.1:
+    resolution: {integrity: sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==}
+    engines: {node: '>= 0.4'}
+
+  es-errors@1.3.0:
+    resolution: {integrity: sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==}
+    engines: {node: '>= 0.4'}
+
   es-module-lexer@1.7.0:
     resolution: {integrity: sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA==}
 
+  es-object-atoms@1.1.1:
+    resolution: {integrity: sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==}
+    engines: {node: '>= 0.4'}
+
+  es-set-tostringtag@2.1.0:
+    resolution: {integrity: sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==}
+    engines: {node: '>= 0.4'}
+
   esbuild@0.27.3:
     resolution: {integrity: sha512-8VwMnyGCONIs6cWue2IdpHxHnAjzxnw2Zr7MkVxB2vjmQ2ivqGFb4LEG3SMnv0Gb2F/G/2yA8zUaiL1gywDCCg==}
     engines: {node: '>=18'}
@@ -1162,11 +1145,6 @@ packages:
     resolution: {integrity: sha512-j6PAQ2uUr79PZhBjP5C5fhl8e39FmRnOjsD5lGnWrFU8i2G776tBK7+nP8KuQUTTyAZUwfQqXAgrVH5MbH9CYQ==}
     engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0}
 
-  esprima@4.0.1:
-    resolution: {integrity: sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==}
-    engines: {node: '>=4'}
-    hasBin: true
-
   esquery@1.7.0:
     resolution: {integrity: sha512-Ap6G0WQwcU/LHsvLwON1fAQX9Zp0A2Y6Y/cJBl9r/JbW90Zyg4/zbG6zzKa2OTALELarYHmKu0GhpM5EO+7T0g==}
     engines: {node: '>=0.10'}
@@ -1186,20 +1164,20 @@ packages:
     resolution: {integrity: sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==}
     engines: {node: '>=0.10.0'}
 
+  event-target-shim@5.0.1:
+    resolution: {integrity: sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==}
+    engines: {node: '>=6'}
+
+  eventemitter3@5.0.4:
+    resolution: {integrity: sha512-mlsTRyGaPBjPedk6Bvw+aqbsXDtoAyAzm5MO7JgU+yVRyMQ5O8bD4Kcci7BS85f93veegeCPkL8R4GLClnjLFw==}
+
   expect-type@1.3.0:
     resolution: {integrity: sha512-knvyeauYhqjOYvQ66MznSMs83wmHrCycNEN6Ao+2AeYEfxUIkuiVxdEa1qlGEPK+We3n0THiDciYSsCcgW/DoA==}
     engines: {node: '>=12.0.0'}
 
-  extendable-error@0.1.7:
-    resolution: {integrity: sha512-UOiS2in6/Q0FK0R0q6UY9vYpQ21mr/Qn1KOnte7vsACuNJf514WvCCUHSRCPcgjPT2bAhNIJdlE6bVap1GKmeg==}
-
   fast-deep-equal@3.1.3:
     resolution: {integrity: sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==}
 
-  fast-glob@3.3.3:
-    resolution: {integrity: sha512-7MptL8U0cqcFdzIzwOTHoilX9x5BrNqye7Z/LuC7kCMRio1EMSyqRK3BEAUD7sXRq4iT4AzTVuZdhgQ2TCvYLg==}
-    engines: {node: '>=8.6.0'}
-
   fast-json-stable-stringify@2.1.0:
     resolution: {integrity: sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==}
 
@@ -1209,9 +1187,6 @@ packages:
   fast-uri@3.1.0:
     resolution: {integrity: sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==}
 
-  fastq@1.20.1:
-    resolution: {integrity: sha512-GGToxJ/w1x32s/D2EKND7kTil4n8OVk/9mycTc4VDza13lOvpUZTGX3mFSCtV9ksdGBVzvsyAVLM6mHFThxXxw==}
-
   fdir@6.5.0:
     resolution: {integrity: sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==}
     engines: {node: '>=12.0.0'}
@@ -1232,10 +1207,6 @@ packages:
     resolution: {integrity: sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==}
     engines: {node: '>=8'}
 
-  find-up@4.1.0:
-    resolution: {integrity: sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==}
-    engines: {node: '>=8'}
-
   find-up@5.0.0:
     resolution: {integrity: sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng==}
     engines: {node: '>=10'}
@@ -1251,23 +1222,41 @@ packages:
   flatted@3.3.4:
     resolution: {integrity: sha512-3+mMldrTAPdta5kjX2G2J7iX4zxtnwpdA8Tr2ZSjkyPSanvbZAcy6flmtnXbEybHrDcU9641lxrMfFuUxVz9vA==}
 
-  fs-extra@7.0.1:
-    resolution: {integrity: sha512-YJDaCJZEnBmcbw13fvdAM9AwNOJwOzrE4pqMqBq5nFiEqXUqHwlK4B+3pUw6JNvfSPtX05xFHtYy/1ni01eGCw==}
-    engines: {node: '>=6 <7 || >=8'}
+  form-data-encoder@1.7.2:
+    resolution: {integrity: sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==}
+
+  form-data@4.0.5:
+    resolution: {integrity: sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==}
+    engines: {node: '>= 6'}
 
-  fs-extra@8.1.0:
-    resolution: {integrity: sha512-yhlQgA6mnOJUKOsRUFsgJdQCvkKhcz8tlZG5HBQfReYZy46OwLcY+Zia0mtdHsOo9y/hP+CxMN0TU9QxoOtG4g==}
-    engines: {node: '>=6 <7 || >=8'}
+  formdata-node@4.4.1:
+    resolution: {integrity: sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==}
+    engines: {node: '>= 12.20'}
 
   fsevents@2.3.3:
     resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==}
     engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0}
     os: [darwin]
 
+  function-bind@1.1.2:
+    resolution: {integrity: sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==}
+
   get-caller-file@2.0.5:
     resolution: {integrity: sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==}
     engines: {node: 6.* || 8.* || >= 10.*}
 
+  get-east-asian-width@1.5.0:
+    resolution: {integrity: sha512-CQ+bEO+Tva/qlmw24dCejulK5pMzVnUOFOijVogd3KQs07HnRIgp8TGipvCCRT06xeYEbpbgwaCxglFyiuIcmA==}
+    engines: {node: '>=18'}
+
+  get-intrinsic@1.3.0:
+    resolution: {integrity: sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==}
+    engines: {node: '>= 0.4'}
+
+  get-proto@1.0.1:
+    resolution: {integrity: sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==}
+    engines: {node: '>= 0.4'}
+
   get-tsconfig@4.13.6:
     resolution: {integrity: sha512-shZT/QMiSHc/YBLxxOkMtgSid5HFoauqCE3/exfsEcwg1WkeqjG+V40yBbBrsD+jW2HDXcs28xOfcbm2jI8Ddw==}
 
@@ -1277,10 +1266,6 @@ packages:
     deprecated: This package is no longer maintained. For the JavaScript API, please use @conventional-changelog/git-client instead.
     hasBin: true
 
-  glob-parent@5.1.2:
-    resolution: {integrity: sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==}
-    engines: {node: '>= 6'}
-
   glob-parent@6.0.2:
     resolution: {integrity: sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==}
     engines: {node: '>=10.13.0'}
@@ -1293,30 +1278,39 @@ packages:
     resolution: {integrity: sha512-oahGvuMGQlPw/ivIYBjVSrWAfWLBeku5tpPE2fOPLi+WHffIWbuh2tCjhyQhTBPMf5E9jDEH4FOmTYgYwbKwtQ==}
     engines: {node: '>=18'}
 
-  globby@11.1.0:
-    resolution: {integrity: sha512-jhIXaOzy1sb8IyocaruWSn1TjmnBVs8Ayhcy83rmxNJ8q2uWKCAj3CnJY+KpGSXCueAPc0i05kVvVKtP1t9S3g==}
-    engines: {node: '>=10'}
-
-  graceful-fs@4.2.11:
-    resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==}
+  gopd@1.2.0:
+    resolution: {integrity: sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==}
+    engines: {node: '>= 0.4'}
 
   has-flag@4.0.0:
     resolution: {integrity: sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==}
     engines: {node: '>=8'}
 
+  has-symbols@1.1.0:
+    resolution: {integrity: sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==}
+    engines: {node: '>= 0.4'}
+
+  has-tostringtag@1.0.2:
+    resolution: {integrity: sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==}
+    engines: {node: '>= 0.4'}
+
+  hasown@2.0.2:
+    resolution: {integrity: sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==}
+    engines: {node: '>= 0.4'}
+
   highlight.js@10.7.3:
     resolution: {integrity: sha512-tzcUFauisWKNHaRkN4Wjl/ZA07gENAjFl3J/c480dprkGTg5EQstgaNFqBfUqCq54kZRIEcreTsAgF/m2quD7A==}
 
   hookable@5.5.3:
     resolution: {integrity: sha512-Yc+BQe8SvoXH1643Qez1zqLRmbA5rCL+sSmk6TVos0LWVfNIB7PGncdlId77WzLGSIB5KaWgTaNTs2lNVEI6VQ==}
 
-  human-id@4.1.3:
-    resolution: {integrity: sha512-tsYlhAYpjCKa//8rXZ9DqKEawhPoSytweBC2eNvcaDK+57RZLHGqNs3PZTQO6yekLFSuvA6AlnAfrw1uBvtb+Q==}
-    hasBin: true
+  humanize-ms@1.2.1:
+    resolution: {integrity: sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==}
 
-  iconv-lite@0.7.2:
-    resolution: {integrity: sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==}
-    engines: {node: '>=0.10.0'}
+  husky@9.1.7:
+    resolution: {integrity: sha512-5gs5ytaNjBrh5Ow3zrvdUUY+0VxIuWVL4i9irt6friV+BqdCfmV11CQTWMiBYWHbXhco+J1kHfTOUkePhCDvMA==}
+    engines: {node: '>=18'}
+    hasBin: true
 
   ignore@5.3.2:
     resolution: {integrity: sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g==}
@@ -1352,6 +1346,10 @@ packages:
     resolution: {integrity: sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==}
     engines: {node: '>=8'}
 
+  is-fullwidth-code-point@5.1.0:
+    resolution: {integrity: sha512-5XHYaSyiqADb4RnZ1Bdad6cPp8Toise4TzEjcOYDHZkTCbKgiUl7WTUCpNWHuxmDt91wnsZBc9xinNzopv3JMQ==}
+    engines: {node: '>=18'}
+
   is-glob@4.0.3:
     resolution: {integrity: sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==}
     engines: {node: '>=0.10.0'}
@@ -1364,18 +1362,10 @@ packages:
     resolution: {integrity: sha512-drqDG3cbczxxEJRoOXcOjtdp1J/lyp1mNn0xaznRs8+muBhgQcrnbspox5X5fOw0HnMnbfDzvnEMEtqDEJEo8w==}
     engines: {node: '>=8'}
 
-  is-subdir@1.2.0:
-    resolution: {integrity: sha512-2AT6j+gXe/1ueqbW6fLZJiIw3F8iXGJtt0yDrZaBhAZEG1raiTxKWU+IPqMCzQAXOUCKdA4UDMgacKH25XG2Cw==}
-    engines: {node: '>=4'}
-
   is-text-path@2.0.0:
     resolution: {integrity: sha512-+oDTluR6WEjdXEJMnC2z6A4FRwFoYuvShVVEGsS7ewc0UTi2QtAKMDJuL4BDEVt+5T7MjFo12RP8ghOM75oKJw==}
     engines: {node: '>=8'}
 
-  is-windows@1.0.2:
-    resolution: {integrity: sha512-eXK1UInq2bPmjyX6e3VHIzMLobc4J94i4AWn+Hpq3OU5KkrRC96OAcR3PRJ/pGu6m8TRnBHP9dkXQVsT/COVIA==}
-    engines: {node: '>=0.10.0'}
-
   isexe@2.0.0:
     resolution: {integrity: sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==}
 
@@ -1389,10 +1379,6 @@ packages:
   js-tokens@9.0.1:
     resolution: {integrity: sha512-mxa9E9ITFOt0ban3j6L5MpjwegGz6lBQmM1IJkWeBZGcMxto50+eWdjC/52xDbS2vy0k7vIMK0Fe2wfL9OQSpQ==}
 
-  js-yaml@3.14.2:
-    resolution: {integrity: sha512-PMSmkqxr106Xa156c2M265Z+FTrPl+oxd/rgOQy2tijQeK5TxQ43psO1ZCwhVOSdnn+RzkzlRz/eY4BgJBYVpg==}
-    hasBin: true
-
   js-yaml@4.1.1:
     resolution: {integrity: sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==}
     hasBin: true
@@ -1408,6 +1394,10 @@ packages:
   json-parse-even-better-errors@2.3.1:
     resolution: {integrity: sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==}
 
+  json-schema-to-ts@3.1.1:
+    resolution: {integrity: sha512-+DWg8jCJG2TEnpy7kOm/7/AxaYoaRbjVB4LFZLySZlWn8exGs3A4OLJR966cVvU26N7X9TWxl+Jsw7dzAqKT6g==}
+    engines: {node: '>=16'}
+
   json-schema-traverse@0.4.1:
     resolution: {integrity: sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==}
 
@@ -1417,9 +1407,6 @@ packages:
   json-stable-stringify-without-jsonify@1.0.1:
     resolution: {integrity: sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw==}
 
-  jsonfile@4.0.0:
-    resolution: {integrity: sha512-m6F1R3z8jjlf2imQHS2Qez5sjKWQzbuuhuJ/FKYFRZvPE3PuHcSMVZzfsLhGVOkfd20obL5SWEBew5ShlquNxg==}
-
   jsonparse@1.3.1:
     resolution: {integrity: sha512-POQXvpdL69+CluYsillJ7SUhKvytYjW9vG/GKpnf+xP8UWgYEM/RaMzHHofbALDiKbbP1W8UEYmgGl39WkPZsg==}
     engines: {'0': node >= 0.2.0}
@@ -1434,9 +1421,14 @@ packages:
   lines-and-columns@1.2.4:
     resolution: {integrity: sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==}
 
-  locate-path@5.0.0:
-    resolution: {integrity: sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==}
-    engines: {node: '>=8'}
+  lint-staged@16.3.2:
+    resolution: {integrity: sha512-xKqhC2AeXLwiAHXguxBjuChoTTWFC6Pees0SHPwOpwlvI3BH7ZADFPddAdN3pgo3aiKgPUx/bxE78JfUnxQnlg==}
+    engines: {node: '>=20.17'}
+    hasBin: true
+
+  listr2@9.0.5:
+    resolution: {integrity: sha512-ME4Fb83LgEgwNw96RKNvKV4VTLuXfoKudAmm2lP8Kk87KaMK0/Xrx/aAkMWmT8mDb+3MlFDspfbCs7adjRxA2g==}
+    engines: {node: '>=20.0.0'}
 
   locate-path@6.0.0:
     resolution: {integrity: sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==}
@@ -1473,6 +1465,10 @@ packages:
   lodash.upperfirst@4.3.1:
     resolution: {integrity: sha512-sReKOYJIJf74dhJONhU4e0/shzi1trVbSWDOhKYE5XV2O+H7Sb2Dihwuc7xWxVl+DgFPyTqIN3zMfT9cq5iWDg==}
 
+  log-update@6.1.0:
+    resolution: {integrity: sha512-9ie8ItPR6tjY5uYJh8K/Zrv/RMZ5VOlOWvtZdEHYSTFKZfIBPQa9tOAEeAWhd+AnIneLJ22w5fjOYtoutpWq5w==}
+    engines: {node: '>=18'}
+
   loupe@3.2.1:
     resolution: {integrity: sha512-CdzqowRJCeLU72bHvWqwRBBlLcMEtIvGrlvef74kMnV2AolS9Y8xUv1I0U/MNAWMhBlKIoyuEgoJ0t/bbwHbLQ==}
 
@@ -1493,18 +1489,30 @@ packages:
     engines: {node: '>= 16'}
     hasBin: true
 
+  math-intrinsics@1.1.0:
+    resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==}
+    engines: {node: '>= 0.4'}
+
   meow@12.1.1:
     resolution: {integrity: sha512-BhXM0Au22RwUneMPwSCnyhTOizdWoIEPU9sp0Aqa1PnDMR5Wv2FGXYDjuzJEIX+Eo2Rb8xuYe5jrnm5QowQFkw==}
     engines: {node: '>=16.10'}
 
-  merge2@1.4.1:
-    resolution: {integrity: sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==}
-    engines: {node: '>= 8'}
-
   micromatch@4.0.8:
     resolution: {integrity: sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==}
     engines: {node: '>=8.6'}
 
+  mime-db@1.52.0:
+    resolution: {integrity: sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==}
+    engines: {node: '>= 0.6'}
+
+  mime-types@2.1.35:
+    resolution: {integrity: sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==}
+    engines: {node: '>= 0.6'}
+
+  mimic-function@5.0.1:
+    resolution: {integrity: sha512-VP79XUPxV2CigYP3jWwAUFSku2aKqBH7uTAapFWCBqutsbmDo96KY5o8uh6U+/YSIn5OxJnXp73beVkpqMIGhA==}
+    engines: {node: '>=18'}
+
   minimatch@10.2.4:
     resolution: {integrity: sha512-oRjTw/97aTBN0RHbYCdtF1MQfvusSIBQM0IZEgzl6426+8jSC0nF1a/GmnVLpfB9yyr6g6FTqWqiZVbxrtaCIg==}
     engines: {node: 18 || 20 || >=22}
@@ -1533,29 +1541,48 @@ packages:
   natural-compare@1.4.0:
     resolution: {integrity: sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==}
 
+  node-domexception@1.0.0:
+    resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==}
+    engines: {node: '>=10.5.0'}
+    deprecated: Use your platform's native DOMException instead
+
   node-emoji@2.2.0:
     resolution: {integrity: sha512-Z3lTE9pLaJF47NyMhd4ww1yFTAP8YhYI8SleJiHzM46Fgpm5cnNzSl9XfzFNqbaz+VlJrIj3fXQ4DeN1Rjm6cw==}
     engines: {node: '>=18'}
 
+  node-fetch@2.7.0:
+    resolution: {integrity: sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==}
+    engines: {node: 4.x || >=6.0.0}
+    peerDependencies:
+      encoding: ^0.1.0
+    peerDependenciesMeta:
+      encoding:
+        optional: true
+
   object-assign@4.1.1:
     resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==}
     engines: {node: '>=0.10.0'}
 
+  onetime@7.0.0:
+    resolution: {integrity: sha512-VXJjc87FScF88uafS3JllDgvAm+c/Slfz06lorj2uAY34rlUu0Nt+v8wreiImcrgAjjIHp1rXpTDlLOGw29WwQ==}
+    engines: {node: '>=18'}
+
+  openai@4.104.0:
+    resolution: {integrity: sha512-p99EFNsA/yX6UhVO93f5kJsDRLAg+CTA2RBqdHK4RtK8u5IJw32Hyb2dTGKbnnFmnuoBv5r7Z2CURI9sGZpSuA==}
+    hasBin: true
+    peerDependencies:
+      ws: ^8.18.0
+      zod: ^3.23.8
+    peerDependenciesMeta:
+      ws:
+        optional: true
+      zod:
+        optional: true
+
   optionator@0.9.4:
     resolution: {integrity: sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==}
     engines: {node: '>= 0.8.0'}
 
-  outdent@0.5.0:
-    resolution: {integrity: sha512-/jHxFIzoMXdqPzTaCpFzAAWhpkSjZPF4Vsn6jAfNpmbH/ymsmd7Qc6VE9BGn0L6YMj6uwpQLxCECpus4ukKS9Q==}
-
-  p-filter@2.1.0:
-    resolution: {integrity: sha512-ZBxxZ5sL2HghephhpGAQdoskxplTwr7ICaehZwLIlfL6acuVgZPm8yBNuRAFBGEqtD/hmUeq9eqLg2ys9Xr/yw==}
-    engines: {node: '>=8'}
-
-  p-limit@2.3.0:
-    resolution: {integrity: sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==}
-    engines: {node: '>=6'}
-
   p-limit@3.1.0:
     resolution: {integrity: sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==}
     engines: {node: '>=10'}
@@ -1564,10 +1591,6 @@ packages:
     resolution: {integrity: sha512-5b0R4txpzjPWVw/cXXUResoD4hb6U/x9BH08L7nw+GN1sezDzPdxeRvpc9c433fZhBan/wusjbCsqwqm4EIBIQ==}
     engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
 
-  p-locate@4.1.0:
-    resolution: {integrity: sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==}
-    engines: {node: '>=8'}
-
   p-locate@5.0.0:
     resolution: {integrity: sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==}
     engines: {node: '>=10'}
@@ -1576,17 +1599,6 @@ packages:
     resolution: {integrity: sha512-wPrq66Llhl7/4AGC6I+cqxT07LhXvWL08LNXz1fENOw0Ap4sRZZ/gZpTTJ5jpurzzzfS2W/Ge9BY3LgLjCShcw==}
     engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
 
-  p-map@2.1.0:
-    resolution: {integrity: sha512-y3b8Kpd8OAN444hxfBbFfj1FY/RjtTd8tzYwhUqNYXx0fXx2iX4maP4Qr6qhIKbQXI02wTLAda4fYUbDagTUFw==}
-    engines: {node: '>=6'}
-
-  p-try@2.2.0:
-    resolution: {integrity: sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==}
-    engines: {node: '>=6'}
-
-  package-manager-detector@0.2.11:
-    resolution: {integrity: sha512-BEnLolu+yuz22S56CU1SUKq3XC3PkwD5wv4ikR4MfGvnRVcmzXR9DwSlW2fEamyTPyXHomBJRzgapeuBvRNzJQ==}
-
   package-manager-detector@1.6.0:
     resolution: {integrity: sha512-61A5ThoTiDG/C8s8UMZwSorAGwMJ0ERVGj2OjoW5pAalsNOg15+iQiPzrLJ4jhZ1HJzmC2PIHT2oEiH3R5fzNA==}
 
@@ -1619,10 +1631,6 @@ packages:
     resolution: {integrity: sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==}
     engines: {node: '>=8'}
 
-  path-type@4.0.0:
-    resolution: {integrity: sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==}
-    engines: {node: '>=8'}
-
   pathe@2.0.3:
     resolution: {integrity: sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==}
 
@@ -1641,10 +1649,6 @@ packages:
     resolution: {integrity: sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==}
     engines: {node: '>=12'}
 
-  pify@4.0.1:
-    resolution: {integrity: sha512-uB80kBFb/tfd68bVleG9T5GGsGPjJrLAUpR5PZIrhBnIaRTQRjqdJSsIKkOP6OAIFbj7GOrcudc5pNjZ+geV2g==}
-    engines: {node: '>=6'}
-
   postcss@8.5.8:
     resolution: {integrity: sha512-OW/rX8O/jXnm82Ey1k44pObPtdblfiuWnrd8X7GJ7emImCOstunGbXUpp7HdBrFQX6rJzn3sPT397Wp5aCwCHg==}
     engines: {node: ^10 || ^12 || >=14}
@@ -1653,11 +1657,6 @@ packages:
     resolution: {integrity: sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==}
     engines: {node: '>= 0.8.0'}
 
-  prettier@2.8.8:
-    resolution: {integrity: sha512-tdN8qQGvNjw4CHbY+XXk0JgCXn9QiF21a55rBe5LJAU+kDyC4WQn4+awm2Xfk2lQMk5fKup9XgzTZtGkjBdP9Q==}
-    engines: {node: '>=10.13.0'}
-    hasBin: true
-
   prettier@3.8.1:
     resolution: {integrity: sha512-UOnG6LftzbdaHZcKoPFtOcCKztrQ57WkHDeRD9t/PTQtmT0NHSeWWepj6pS0z/N7+08BHFDQVUrfmfMRcZwbMg==}
     engines: {node: '>=14'}
@@ -1672,19 +1671,9 @@ packages:
     resolution: {integrity: sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==}
     engines: {node: '>=6'}
 
-  quansync@0.2.11:
-    resolution: {integrity: sha512-AifT7QEbW9Nri4tAwR5M/uzpBuqfZf+zwaEM/QkzEjj7NBuFD2rBuy0K3dE+8wltbezDV7JMA0WfnCPYRSYbXA==}
-
   quansync@1.0.0:
     resolution: {integrity: sha512-5xZacEEufv3HSTPQuchrvV6soaiACMFnq1H8wkVioctoH3TRha9Sz66lOxRwPK/qZj7HPiSveih9yAyh98gvqA==}
 
-  queue-microtask@1.2.3:
-    resolution: {integrity: sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==}
-
-  read-yaml-file@1.1.0:
-    resolution: {integrity: sha512-VIMnQi/Z4HT2Fxuwg5KrY174U1VdUIASQVWXXyqtNRtxSr9IYkn1rsI6Tb6HsrHCmB7gVpNwX6JxPTHcH6IoTA==}
-    engines: {node: '>=6'}
-
   readdirp@4.1.2:
     resolution: {integrity: sha512-GDhwkLfywWL2s6vEjyhri+eXmfH6j1L7JE27WhqLeYzoh/A3DBaYGEj2H/HFZCn/kMfim73FXxEJTw06WtxQwg==}
     engines: {node: '>= 14.18.0'}
@@ -1708,9 +1697,12 @@ packages:
   resolve-pkg-maps@1.0.0:
     resolution: {integrity: sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==}
 
-  reusify@1.1.0:
-    resolution: {integrity: sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw==}
-    engines: {iojs: '>=1.0.0', node: '>=0.10.0'}
+  restore-cursor@5.1.0:
+    resolution: {integrity: sha512-oMA2dcrw6u0YfxJQXm342bFKX/E4sG9rbTzO9ptUcR/e8A33cHuvStiYOwH7fszkZlZ1z/ta9AAoPk2F4qIOHA==}
+    engines: {node: '>=18'}
+
+  rfdc@1.4.1:
+    resolution: {integrity: sha512-q1b3N5QkRUWUl7iyylaaj3kOpIT0N2i9MqIEQXP73GVsN9cw3fdx8X63cEmWhJGi2PPCF23Ijp7ktmd39rawIA==}
 
   rolldown-plugin-dts@0.13.14:
     resolution: {integrity: sha512-wjNhHZz9dlN6PTIXyizB6u/mAg1wEFMW9yw7imEVe3CxHSRnNHVyycIX0yDEOVJfDNISLPbkCIPEpFpizy5+PQ==}
@@ -1738,16 +1730,10 @@ packages:
     engines: {node: '>=18.0.0', npm: '>=8.0.0'}
     hasBin: true
 
-  run-parallel@1.2.0:
-    resolution: {integrity: sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==}
-
   sade@1.8.1:
     resolution: {integrity: sha512-xal3CZX1Xlo/k4ApwCFrHVACi9fBqJ7V+mwhBsuf/1IOKbBy098Fex+Wa/5QMubw09pSZ/u8EY8PWgevJsXp1A==}
     engines: {node: '>=6'}
 
-  safer-buffer@2.1.2:
-    resolution: {integrity: sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==}
-
   semver@7.7.4:
     resolution: {integrity: sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==}
     engines: {node: '>=10'}
@@ -1772,41 +1758,51 @@ packages:
     resolution: {integrity: sha512-kUMbT1oBJCpgrnKoSr0o6wPtvRWT9W9UKvGLwfJYO2WuahZRHOpEyL1ckyMGgMWh0UdpmaoFqKKD29WTomNEGA==}
     engines: {node: '>=8'}
 
-  slash@3.0.0:
-    resolution: {integrity: sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==}
-    engines: {node: '>=8'}
+  slice-ansi@7.1.2:
+    resolution: {integrity: sha512-iOBWFgUX7caIZiuutICxVgX1SdxwAVFFKwt1EvMYYec/NWO5meOJ6K5uQxhrYBdQJne4KxiqZc+KptFOWFSI9w==}
+    engines: {node: '>=18'}
+
+  slice-ansi@8.0.0:
+    resolution: {integrity: sha512-stxByr12oeeOyY2BlviTNQlYV5xOj47GirPr4yA1hE9JCtxfQN0+tVbkxwCtYDQWhEKWFHsEK48ORg5jrouCAg==}
+    engines: {node: '>=20'}
 
   source-map-js@1.2.1:
     resolution: {integrity: sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==}
     engines: {node: '>=0.10.0'}
 
-  spawndamnit@3.0.1:
-    resolution: {integrity: sha512-MmnduQUuHCoFckZoWnXsTg7JaiLBJrKFj9UI2MbRPGaJeVpsLcVBu6P/IGZovziM/YBsellCmsprgNA+w0CzVg==}
-
   split2@4.2.0:
     resolution: {integrity: sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==}
     engines: {node: '>= 10.x'}
 
-  sprintf-js@1.0.3:
-    resolution: {integrity: sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==}
-
   stackback@0.0.2:
     resolution: {integrity: sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw==}
 
   std-env@3.10.0:
     resolution: {integrity: sha512-5GS12FdOZNliM5mAOxFRg7Ir0pWz8MdpYm6AY6VPkGpbA7ZzmbzNcBJQ0GPvvyWgcY7QAhCgf9Uy89I03faLkg==}
 
+  string-argv@0.3.2:
+    resolution: {integrity: sha512-aqD2Q0144Z+/RqG52NeHEkZauTAUWJO8c6yTftGJKO3Tja5tUgIfmIl6kExvhtxSDP7fXB6DvzkfMpCd/F3G+Q==}
+    engines: {node: '>=0.6.19'}
+
   string-width@4.2.3:
     resolution: {integrity: sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==}
     engines: {node: '>=8'}
 
+  string-width@7.2.0:
+    resolution: {integrity: sha512-tsaTIkKW9b4N+AEj+SVA+WhJzV7/zMhcSu78mLKWSk7cXMOSHsBKFWUs0fWwq8QyK3MgJBQRX6Gbi4kYbdvGkQ==}
+    engines: {node: '>=18'}
+
+  string-width@8.2.0:
+    resolution: {integrity: sha512-6hJPQ8N0V0P3SNmP6h2J99RLuzrWz2gvT7VnK5tKvrNqJoyS9W4/Fb8mo31UiPvy00z7DQXkP2hnKBVav76thw==}
+    engines: {node: '>=20'}
+
   strip-ansi@6.0.1:
     resolution: {integrity: sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==}
     engines: {node: '>=8'}
 
-  strip-bom@3.0.0:
-    resolution: {integrity: sha512-vavAMRXOgBVNF6nyEEmL3DBK19iRpDcoIwW+swQ+CbGiu7lju6t+JklA1MHweoWtadgt4ISVUsXLyDq34ddcwA==}
-    engines: {node: '>=4'}
+  strip-ansi@7.2.0:
+    resolution: {integrity: sha512-yDPMNjp4WyfYBkHnjIRLfca1i6KMyGCtsVgoKe/z1+6vukgaENdgGBZt+ZmKPc4gavvEZ5OgHfHdrazhgNyG7w==}
+    engines: {node: '>=12'}
 
   strip-json-comments@3.1.1:
     resolution: {integrity: sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==}
@@ -1823,10 +1819,6 @@ packages:
     resolution: {integrity: sha512-zFObLMyZeEwzAoKCyu1B91U79K2t7ApXuQfo8OuxwXLDgcKxuwM+YvcbIhm6QWqz7mHUH1TVytR1PwVVjEuMig==}
     engines: {node: '>=14.18'}
 
-  term-size@2.2.1:
-    resolution: {integrity: sha512-wK0Ri4fOGjv/XPy8SBHZChl8CM7uMc5VML7SqiQ0zG7+J5Vr+RMQDoHa2CNT6KHUnTGIXH34UDMkPzAUyapBZg==}
-    engines: {node: '>=8'}
-
   text-extensions@2.4.0:
     resolution: {integrity: sha512-te/NtwBwfiNRLf9Ijqx3T0nlqZiQ2XrrtBvu+cLL8ZRrGkO0NHTug8MYFKyoSrv/sHTaSKfilUkizV6XhxMJ3g==}
     engines: {node: '>=8'}
@@ -1871,6 +1863,12 @@ packages:
     resolution: {integrity: sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==}
     engines: {node: '>=8.0'}
 
+  tr46@0.0.3:
+    resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==}
+
+  ts-algebra@2.0.0:
+    resolution: {integrity: sha512-FPAhNPFMrkwz76P7cdjdmiShwMynZYN6SgOujD1urY4oNm80Ou9oMdmbR45LotcKOXoy7wSmHkRFE6Mxbrhefw==}
+
   ts-api-utils@2.4.0:
     resolution: {integrity: sha512-3TaVTaAv2gTiMB35i3FiGJaRfwb3Pyn/j3m/bfAvGe8FB7CF6u+LMYqYlDh7reQf7UNvoTvdfAqHGmPGOSsPmA==}
     engines: {node: '>=18.12'}
@@ -1902,6 +1900,11 @@ packages:
   tslib@2.8.1:
     resolution: {integrity: sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==}
 
+  tsx@4.21.0:
+    resolution: {integrity: sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==}
+    engines: {node: '>=18.0.0'}
+    hasBin: true
+
   type-check@0.4.0:
     resolution: {integrity: sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==}
     engines: {node: '>= 0.8.0'}
@@ -1929,8 +1932,11 @@ packages:
   unconfig@7.5.0:
     resolution: {integrity: sha512-oi8Qy2JV4D3UQ0PsopR28CzdQ3S/5A1zwsUwp/rosSbfhJ5z7b90bIyTwi/F7hCLD4SGcZVjDzd4XoUQcEanvA==}
 
-  undici-types@7.18.2:
-    resolution: {integrity: sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==}
+  undici-types@5.26.5:
+    resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==}
+
+  undici-types@6.21.0:
+    resolution: {integrity: sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==}
 
   unicode-emoji-modifier-base@1.0.0:
     resolution: {integrity: sha512-yLSH4py7oFH3oG/9K+XWrz1pSi3dfUrWEnInbxMfArOfc1+33BlGPQtLsOYwvdMy11AwUBetYuaRxSPqgkq+8g==}
@@ -1940,10 +1946,6 @@ packages:
     resolution: {integrity: sha512-lRfVq8fE8gz6QMBuDM6a+LO3IAzTi05H6gCVaUpir2E1Rwpo4ZUog45KpNXKC/Mn3Yb9UDuHumeFTo9iV/D9FQ==}
     engines: {node: '>=18'}
 
-  universalify@0.1.2:
-    resolution: {integrity: sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg==}
-    engines: {node: '>= 4.0.0'}
-
   uri-js@4.4.1:
     resolution: {integrity: sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==}
 
@@ -2024,6 +2026,16 @@ packages:
       jsdom:
         optional: true
 
+  web-streams-polyfill@4.0.0-beta.3:
+    resolution: {integrity: sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==}
+    engines: {node: '>= 14'}
+
+  webidl-conversions@3.0.1:
+    resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==}
+
+  whatwg-url@5.0.0:
+    resolution: {integrity: sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==}
+
   which@2.0.2:
     resolution: {integrity: sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==}
     engines: {node: '>= 8'}
@@ -2042,10 +2054,19 @@ packages:
     resolution: {integrity: sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==}
     engines: {node: '>=10'}
 
+  wrap-ansi@9.0.2:
+    resolution: {integrity: sha512-42AtmgqjV+X1VpdOfyTGOYRi0/zsoLqtXQckTmqTeybT+BDIbM/Guxo7x3pE2vtpr1ok6xRqM9OpBe+Jyoqyww==}
+    engines: {node: '>=18'}
+
   y18n@5.0.8:
     resolution: {integrity: sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==}
     engines: {node: '>=10'}
 
+  yaml@2.8.2:
+    resolution: {integrity: sha512-mplynKqc1C2hTVYxd0PU2xQAc22TI1vShAYGksCCfxbn/dFwnHTNi1bvYsBTkhdUNtGIf5xNOg938rrSSYvS9A==}
+    engines: {node: '>= 14.6'}
+    hasBin: true
+
   yargs-parser@20.2.9:
     resolution: {integrity: sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==}
     engines: {node: '>=10'}
@@ -2074,6 +2095,10 @@ snapshots:
 
   '@andrewbranch/untar.js@1.0.3': {}
 
+  '@anthropic-ai/sdk@0.78.0':
+    dependencies:
+      json-schema-to-ts: 3.1.1
+
   '@arethetypeswrong/cli@0.17.4':
     dependencies:
       '@arethetypeswrong/core': 0.17.4
@@ -2126,158 +2151,14 @@ snapshots:
 
   '@braidai/lang@1.1.2': {}
 
-  '@changesets/apply-release-plan@7.0.14':
-    dependencies:
-      '@changesets/config': 3.1.2
-      '@changesets/get-version-range-type': 0.4.0
-      '@changesets/git': 3.0.4
-      '@changesets/should-skip-package': 0.1.2
-      '@changesets/types': 6.1.0
-      '@manypkg/get-packages': 1.1.3
-      detect-indent: 6.1.0
-      fs-extra: 7.0.1
-      lodash.startcase: 4.4.0
-      outdent: 0.5.0
-      prettier: 2.8.8
-      resolve-from: 5.0.0
-      semver: 7.7.4
-
-  '@changesets/assemble-release-plan@6.0.9':
-    dependencies:
-      '@changesets/errors': 0.2.0
-      '@changesets/get-dependents-graph': 2.1.3
-      '@changesets/should-skip-package': 0.1.2
-      '@changesets/types': 6.1.0
-      '@manypkg/get-packages': 1.1.3
-      semver: 7.7.4
-
-  '@changesets/changelog-git@0.2.1':
-    dependencies:
-      '@changesets/types': 6.1.0
-
-  '@changesets/cli@2.29.8(@types/node@25.3.3)':
-    dependencies:
-      '@changesets/apply-release-plan': 7.0.14
-      '@changesets/assemble-release-plan': 6.0.9
-      '@changesets/changelog-git': 0.2.1
-      '@changesets/config': 3.1.2
-      '@changesets/errors': 0.2.0
-      '@changesets/get-dependents-graph': 2.1.3
-      '@changesets/get-release-plan': 4.0.14
-      '@changesets/git': 3.0.4
-      '@changesets/logger': 0.1.1
-      '@changesets/pre': 2.0.2
-      '@changesets/read': 0.6.6
-      '@changesets/should-skip-package': 0.1.2
-      '@changesets/types': 6.1.0
-      '@changesets/write': 0.4.0
-      '@inquirer/external-editor': 1.0.3(@types/node@25.3.3)
-      '@manypkg/get-packages': 1.1.3
-      ansi-colors: 4.1.3
-      ci-info: 3.9.0
-      enquirer: 2.4.1
-      fs-extra: 7.0.1
-      mri: 1.2.0
-      p-limit: 2.3.0
-      package-manager-detector: 0.2.11
-      picocolors: 1.1.1
-      resolve-from: 5.0.0
-      semver: 7.7.4
-      spawndamnit: 3.0.1
-      term-size: 2.2.1
-    transitivePeerDependencies:
-      - '@types/node'
-
-  '@changesets/config@3.1.2':
-    dependencies:
-      '@changesets/errors': 0.2.0
-      '@changesets/get-dependents-graph': 2.1.3
-      '@changesets/logger': 0.1.1
-      '@changesets/types': 6.1.0
-      '@manypkg/get-packages': 1.1.3
-      fs-extra: 7.0.1
-      micromatch: 4.0.8
-
-  '@changesets/errors@0.2.0':
-    dependencies:
-      extendable-error: 0.1.7
-
-  '@changesets/get-dependents-graph@2.1.3':
-    dependencies:
-      '@changesets/types': 6.1.0
-      '@manypkg/get-packages': 1.1.3
-      picocolors: 1.1.1
-      semver: 7.7.4
-
-  '@changesets/get-release-plan@4.0.14':
-    dependencies:
-      '@changesets/assemble-release-plan': 6.0.9
-      '@changesets/config': 3.1.2
-      '@changesets/pre': 2.0.2
-      '@changesets/read': 0.6.6
-      '@changesets/types': 6.1.0
-      '@manypkg/get-packages': 1.1.3
-
-  '@changesets/get-version-range-type@0.4.0': {}
-
-  '@changesets/git@3.0.4':
-    dependencies:
-      '@changesets/errors': 0.2.0
-      '@manypkg/get-packages': 1.1.3
-      is-subdir: 1.2.0
-      micromatch: 4.0.8
-      spawndamnit: 3.0.1
-
-  '@changesets/logger@0.1.1':
-    dependencies:
-      picocolors: 1.1.1
-
-  '@changesets/parse@0.4.2':
-    dependencies:
-      '@changesets/types': 6.1.0
-      js-yaml: 4.1.1
-
-  '@changesets/pre@2.0.2':
-    dependencies:
-      '@changesets/errors': 0.2.0
-      '@changesets/types': 6.1.0
-      '@manypkg/get-packages': 1.1.3
-      fs-extra: 7.0.1
-
-  '@changesets/read@0.6.6':
-    dependencies:
-      '@changesets/git': 3.0.4
-      '@changesets/logger': 0.1.1
-      '@changesets/parse': 0.4.2
-      '@changesets/types': 6.1.0
-      fs-extra: 7.0.1
-      p-filter: 2.1.0
-      picocolors: 1.1.1
-
-  '@changesets/should-skip-package@0.1.2':
-    dependencies:
-      '@changesets/types': 6.1.0
-      '@manypkg/get-packages': 1.1.3
-
-  '@changesets/types@4.1.0': {}
-
-  '@changesets/types@6.1.0': {}
-
-  '@changesets/write@0.4.0':
-    dependencies:
-      '@changesets/types': 6.1.0
-      fs-extra: 7.0.1
-      human-id: 4.1.3
-      prettier: 2.8.8
-
   '@colors/colors@1.5.0':
     optional: true
 
-  '@commitlint/cli@19.8.1(@types/node@25.3.3)(typescript@5.9.3)':
+  '@commitlint/cli@19.8.1(@types/node@22.19.15)(typescript@5.9.3)':
     dependencies:
       '@commitlint/format': 19.8.1
       '@commitlint/lint': 19.8.1
-      '@commitlint/load': 19.8.1(@types/node@25.3.3)(typescript@5.9.3)
+      '@commitlint/load': 19.8.1(@types/node@22.19.15)(typescript@5.9.3)
       '@commitlint/read': 19.8.1
       '@commitlint/types': 19.8.1
       tinyexec: 1.0.2
@@ -2324,7 +2205,7 @@ snapshots:
       '@commitlint/rules': 19.8.1
       '@commitlint/types': 19.8.1
 
-  '@commitlint/load@19.8.1(@types/node@25.3.3)(typescript@5.9.3)':
+  '@commitlint/load@19.8.1(@types/node@22.19.15)(typescript@5.9.3)':
     dependencies:
       '@commitlint/config-validator': 19.8.1
       '@commitlint/execute-rule': 19.8.1
@@ -2332,7 +2213,7 @@ snapshots:
       '@commitlint/types': 19.8.1
       chalk: 5.6.2
       cosmiconfig: 9.0.1(typescript@5.9.3)
-      cosmiconfig-typescript-loader: 6.2.0(@types/node@25.3.3)(cosmiconfig@9.0.1(typescript@5.9.3))(typescript@5.9.3)
+      cosmiconfig-typescript-loader: 6.2.0(@types/node@22.19.15)(cosmiconfig@9.0.1(typescript@5.9.3))(typescript@5.9.3)
       lodash.isplainobject: 4.0.6
       lodash.merge: 4.6.2
       lodash.uniq: 4.5.0
@@ -2523,6 +2404,8 @@ snapshots:
       '@eslint/core': 0.17.0
       levn: 0.4.1
 
+  '@google/generative-ai@0.24.1': {}
+
   '@humanfs/core@0.19.1': {}
 
   '@humanfs/node@0.16.7':
@@ -2534,13 +2417,6 @@ snapshots:
 
   '@humanwhocodes/retry@0.4.3': {}
 
-  '@inquirer/external-editor@1.0.3(@types/node@25.3.3)':
-    dependencies:
-      chardet: 2.1.1
-      iconv-lite: 0.7.2
-    optionalDependencies:
-      '@types/node': 25.3.3
-
   '@jridgewell/gen-mapping@0.3.13':
     dependencies:
       '@jridgewell/sourcemap-codec': 1.5.5
@@ -2559,22 +2435,6 @@ snapshots:
     dependencies:
       '@braidai/lang': 1.1.2
 
-  '@manypkg/find-root@1.1.0':
-    dependencies:
-      '@babel/runtime': 7.28.6
-      '@types/node': 12.20.55
-      find-up: 4.1.0
-      fs-extra: 8.1.0
-
-  '@manypkg/get-packages@1.1.3':
-    dependencies:
-      '@babel/runtime': 7.28.6
-      '@changesets/types': 4.1.0
-      '@manypkg/find-root': 1.1.0
-      fs-extra: 8.1.0
-      globby: 11.1.0
-      read-yaml-file: 1.1.0
-
   '@napi-rs/wasm-runtime@1.1.1':
     dependencies:
       '@emnapi/core': 1.8.1
@@ -2582,18 +2442,6 @@ snapshots:
       '@tybys/wasm-util': 0.10.1
     optional: true
 
-  '@nodelib/fs.scandir@2.1.5':
-    dependencies:
-      '@nodelib/fs.stat': 2.0.5
-      run-parallel: 1.2.0
-
-  '@nodelib/fs.stat@2.0.5': {}
-
-  '@nodelib/fs.walk@1.2.8':
-    dependencies:
-      '@nodelib/fs.scandir': 2.1.5
-      fastq: 1.20.1
-
   '@oxc-project/types@0.115.0': {}
 
   '@publint/pack@0.1.4': {}
@@ -2734,7 +2582,7 @@ snapshots:
 
   '@types/conventional-commits-parser@5.0.2':
     dependencies:
-      '@types/node': 25.3.3
+      '@types/node': 22.19.15
 
   '@types/deep-eql@4.0.2': {}
 
@@ -2742,11 +2590,18 @@ snapshots:
 
   '@types/json-schema@7.0.15': {}
 
-  '@types/node@12.20.55': {}
+  '@types/node-fetch@2.6.13':
+    dependencies:
+      '@types/node': 22.19.15
+      form-data: 4.0.5
+
+  '@types/node@18.19.130':
+    dependencies:
+      undici-types: 5.26.5
 
-  '@types/node@25.3.3':
+  '@types/node@22.19.15':
     dependencies:
-      undici-types: 7.18.2
+      undici-types: 6.21.0
 
   '@typescript-eslint/eslint-plugin@8.56.1(@typescript-eslint/parser@8.56.1(eslint@9.39.3(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.3(jiti@2.6.1))(typescript@5.9.3)':
     dependencies:
@@ -2847,13 +2702,13 @@ snapshots:
       chai: 5.3.3
       tinyrainbow: 2.0.0
 
-  '@vitest/mocker@3.2.4(vite@7.3.1(@types/node@25.3.3)(jiti@2.6.1))':
+  '@vitest/mocker@3.2.4(vite@7.3.1(@types/node@22.19.15)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2))':
     dependencies:
       '@vitest/spy': 3.2.4
       estree-walker: 3.0.3
       magic-string: 0.30.21
     optionalDependencies:
-      vite: 7.3.1(@types/node@25.3.3)(jiti@2.6.1)
+      vite: 7.3.1(@types/node@22.19.15)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2)
 
   '@vitest/pretty-format@3.2.4':
     dependencies:
@@ -2886,12 +2741,20 @@ snapshots:
       jsonparse: 1.3.1
       through: 2.3.8
 
+  abort-controller@3.0.0:
+    dependencies:
+      event-target-shim: 5.0.1
+
   acorn-jsx@5.3.2(acorn@8.16.0):
     dependencies:
       acorn: 8.16.0
 
   acorn@8.16.0: {}
 
+  agentkeepalive@4.6.0:
+    dependencies:
+      humanize-ms: 1.2.1
+
   ajv@6.14.0:
     dependencies:
       fast-deep-equal: 3.1.3
@@ -2906,8 +2769,6 @@ snapshots:
       json-schema-traverse: 1.0.0
       require-from-string: 2.0.2
 
-  ansi-colors@4.1.3: {}
-
   ansi-escapes@7.3.0:
     dependencies:
       environment: 1.1.0
@@ -2920,20 +2781,16 @@ snapshots:
     dependencies:
       color-convert: 2.0.1
 
+  ansi-styles@6.2.3: {}
+
   ansis@4.2.0: {}
 
   any-promise@1.3.0: {}
 
-  argparse@1.0.10:
-    dependencies:
-      sprintf-js: 1.0.3
-
   argparse@2.0.1: {}
 
   array-ify@1.0.0: {}
 
-  array-union@2.1.0: {}
-
   assertion-error@2.0.1: {}
 
   ast-kit@2.2.0:
@@ -2941,14 +2798,12 @@ snapshots:
       '@babel/parser': 7.29.0
       pathe: 2.0.3
 
+  asynckit@0.4.0: {}
+
   balanced-match@1.0.2: {}
 
   balanced-match@4.0.4: {}
 
-  better-path-resolve@1.0.0:
-    dependencies:
-      is-windows: 1.0.2
-
   birpc@2.9.0: {}
 
   brace-expansion@1.1.12:
@@ -2966,6 +2821,11 @@ snapshots:
 
   cac@6.7.14: {}
 
+  call-bind-apply-helpers@1.0.2:
+    dependencies:
+      es-errors: 1.3.0
+      function-bind: 1.1.2
+
   callsites@3.1.0: {}
 
   chai@5.3.3:
@@ -2985,18 +2845,18 @@ snapshots:
 
   char-regex@1.0.2: {}
 
-  chardet@2.1.1: {}
-
   check-error@2.1.3: {}
 
   chokidar@4.0.3:
     dependencies:
       readdirp: 4.1.2
 
-  ci-info@3.9.0: {}
-
   cjs-module-lexer@1.4.3: {}
 
+  cli-cursor@5.0.0:
+    dependencies:
+      restore-cursor: 5.1.0
+
   cli-highlight@2.1.11:
     dependencies:
       chalk: 4.1.2
@@ -3012,6 +2872,11 @@ snapshots:
     optionalDependencies:
       '@colors/colors': 1.5.0
 
+  cli-truncate@5.2.0:
+    dependencies:
+      slice-ansi: 8.0.0
+      string-width: 8.2.0
+
   cliui@7.0.4:
     dependencies:
       string-width: 4.2.3
@@ -3030,8 +2895,16 @@ snapshots:
 
   color-name@1.1.4: {}
 
+  colorette@2.0.20: {}
+
+  combined-stream@1.0.8:
+    dependencies:
+      delayed-stream: 1.0.0
+
   commander@10.0.1: {}
 
+  commander@14.0.3: {}
+
   compare-func@2.0.0:
     dependencies:
       array-ify: 1.0.0
@@ -3054,9 +2927,9 @@ snapshots:
       meow: 12.1.1
       split2: 4.2.0
 
-  cosmiconfig-typescript-loader@6.2.0(@types/node@25.3.3)(cosmiconfig@9.0.1(typescript@5.9.3))(typescript@5.9.3):
+  cosmiconfig-typescript-loader@6.2.0(@types/node@22.19.15)(cosmiconfig@9.0.1(typescript@5.9.3))(typescript@5.9.3):
     dependencies:
-      '@types/node': 25.3.3
+      '@types/node': 22.19.15
       cosmiconfig: 9.0.1(typescript@5.9.3)
       jiti: 2.6.1
       typescript: 5.9.3
@@ -3088,31 +2961,30 @@ snapshots:
 
   defu@6.1.4: {}
 
-  detect-indent@6.1.0: {}
+  delayed-stream@1.0.0: {}
 
   diff@8.0.3: {}
 
-  dir-glob@3.0.1:
-    dependencies:
-      path-type: 4.0.0
-
   dot-prop@5.3.0:
     dependencies:
       is-obj: 2.0.0
 
   dts-resolver@2.1.3: {}
 
+  dunder-proto@1.0.1:
+    dependencies:
+      call-bind-apply-helpers: 1.0.2
+      es-errors: 1.3.0
+      gopd: 1.2.0
+
+  emoji-regex@10.6.0: {}
+
   emoji-regex@8.0.0: {}
 
   emojilib@2.4.0: {}
 
   empathic@2.0.0: {}
 
-  enquirer@2.4.1:
-    dependencies:
-      ansi-colors: 4.1.3
-      strip-ansi: 6.0.1
-
   env-paths@2.2.1: {}
 
   environment@1.1.0: {}
@@ -3121,8 +2993,23 @@ snapshots:
     dependencies:
       is-arrayish: 0.2.1
 
+  es-define-property@1.0.1: {}
+
+  es-errors@1.3.0: {}
+
   es-module-lexer@1.7.0: {}
 
+  es-object-atoms@1.1.1:
+    dependencies:
+      es-errors: 1.3.0
+
+  es-set-tostringtag@2.1.0:
+    dependencies:
+      es-errors: 1.3.0
+      get-intrinsic: 1.3.0
+      has-tostringtag: 1.0.2
+      hasown: 2.0.2
+
   esbuild@0.27.3:
     optionalDependencies:
       '@esbuild/aix-ppc64': 0.27.3
@@ -3218,8 +3105,6 @@ snapshots:
       acorn-jsx: 5.3.2(acorn@8.16.0)
       eslint-visitor-keys: 4.2.1
 
-  esprima@4.0.1: {}
-
   esquery@1.7.0:
     dependencies:
       estraverse: 5.3.0
@@ -3236,19 +3121,13 @@ snapshots:
 
   esutils@2.0.3: {}
 
-  expect-type@1.3.0: {}
+  event-target-shim@5.0.1: {}
 
-  extendable-error@0.1.7: {}
+  eventemitter3@5.0.4: {}
 
-  fast-deep-equal@3.1.3: {}
+  expect-type@1.3.0: {}
 
-  fast-glob@3.3.3:
-    dependencies:
-      '@nodelib/fs.stat': 2.0.5
-      '@nodelib/fs.walk': 1.2.8
-      glob-parent: 5.1.2
-      merge2: 1.4.1
-      micromatch: 4.0.8
+  fast-deep-equal@3.1.3: {}
 
   fast-json-stable-stringify@2.1.0: {}
 
@@ -3256,10 +3135,6 @@ snapshots:
 
   fast-uri@3.1.0: {}
 
-  fastq@1.20.1:
-    dependencies:
-      reusify: 1.1.0
-
   fdir@6.5.0(picomatch@4.0.3):
     optionalDependencies:
       picomatch: 4.0.3
@@ -3274,11 +3149,6 @@ snapshots:
     dependencies:
       to-regex-range: 5.0.1
 
-  find-up@4.1.0:
-    dependencies:
-      locate-path: 5.0.0
-      path-exists: 4.0.0
-
   find-up@5.0.0:
     dependencies:
       locate-path: 6.0.0
@@ -3297,23 +3167,48 @@ snapshots:
 
   flatted@3.3.4: {}
 
-  fs-extra@7.0.1:
+  form-data-encoder@1.7.2: {}
+
+  form-data@4.0.5:
     dependencies:
-      graceful-fs: 4.2.11
-      jsonfile: 4.0.0
-      universalify: 0.1.2
+      asynckit: 0.4.0
+      combined-stream: 1.0.8
+      es-set-tostringtag: 2.1.0
+      hasown: 2.0.2
+      mime-types: 2.1.35
 
-  fs-extra@8.1.0:
+  formdata-node@4.4.1:
     dependencies:
-      graceful-fs: 4.2.11
-      jsonfile: 4.0.0
-      universalify: 0.1.2
+      node-domexception: 1.0.0
+      web-streams-polyfill: 4.0.0-beta.3
 
   fsevents@2.3.3:
     optional: true
 
+  function-bind@1.1.2: {}
+
   get-caller-file@2.0.5: {}
 
+  get-east-asian-width@1.5.0: {}
+
+  get-intrinsic@1.3.0:
+    dependencies:
+      call-bind-apply-helpers: 1.0.2
+      es-define-property: 1.0.1
+      es-errors: 1.3.0
+      es-object-atoms: 1.1.1
+      function-bind: 1.1.2
+      get-proto: 1.0.1
+      gopd: 1.2.0
+      has-symbols: 1.1.0
+      hasown: 2.0.2
+      math-intrinsics: 1.1.0
+
+  get-proto@1.0.1:
+    dependencies:
+      dunder-proto: 1.0.1
+      es-object-atoms: 1.1.1
+
   get-tsconfig@4.13.6:
     dependencies:
       resolve-pkg-maps: 1.0.0
@@ -3324,10 +3219,6 @@ snapshots:
       meow: 12.1.1
       split2: 4.2.0
 
-  glob-parent@5.1.2:
-    dependencies:
-      is-glob: 4.0.3
-
   glob-parent@6.0.2:
     dependencies:
       is-glob: 4.0.3
@@ -3338,28 +3229,29 @@ snapshots:
 
   globals@14.0.0: {}
 
-  globby@11.1.0:
-    dependencies:
-      array-union: 2.1.0
-      dir-glob: 3.0.1
-      fast-glob: 3.3.3
-      ignore: 5.3.2
-      merge2: 1.4.1
-      slash: 3.0.0
-
-  graceful-fs@4.2.11: {}
+  gopd@1.2.0: {}
 
   has-flag@4.0.0: {}
 
+  has-symbols@1.1.0: {}
+
+  has-tostringtag@1.0.2:
+    dependencies:
+      has-symbols: 1.1.0
+
+  hasown@2.0.2:
+    dependencies:
+      function-bind: 1.1.2
+
   highlight.js@10.7.3: {}
 
   hookable@5.5.3: {}
 
-  human-id@4.1.3: {}
-
-  iconv-lite@0.7.2:
+  humanize-ms@1.2.1:
     dependencies:
-      safer-buffer: 2.1.2
+      ms: 2.1.3
+
+  husky@9.1.7: {}
 
   ignore@5.3.2: {}
 
@@ -3382,6 +3274,10 @@ snapshots:
 
   is-fullwidth-code-point@3.0.0: {}
 
+  is-fullwidth-code-point@5.1.0:
+    dependencies:
+      get-east-asian-width: 1.5.0
+
   is-glob@4.0.3:
     dependencies:
       is-extglob: 2.1.1
@@ -3390,16 +3286,10 @@ snapshots:
 
   is-obj@2.0.0: {}
 
-  is-subdir@1.2.0:
-    dependencies:
-      better-path-resolve: 1.0.0
-
   is-text-path@2.0.0:
     dependencies:
       text-extensions: 2.4.0
 
-  is-windows@1.0.2: {}
-
   isexe@2.0.0: {}
 
   jiti@2.6.1: {}
@@ -3408,11 +3298,6 @@ snapshots:
 
   js-tokens@9.0.1: {}
 
-  js-yaml@3.14.2:
-    dependencies:
-      argparse: 1.0.10
-      esprima: 4.0.1
-
   js-yaml@4.1.1:
     dependencies:
       argparse: 2.0.1
@@ -3423,16 +3308,17 @@ snapshots:
 
   json-parse-even-better-errors@2.3.1: {}
 
+  json-schema-to-ts@3.1.1:
+    dependencies:
+      '@babel/runtime': 7.28.6
+      ts-algebra: 2.0.0
+
   json-schema-traverse@0.4.1: {}
 
   json-schema-traverse@1.0.0: {}
 
   json-stable-stringify-without-jsonify@1.0.1: {}
 
-  jsonfile@4.0.0:
-    optionalDependencies:
-      graceful-fs: 4.2.11
-
   jsonparse@1.3.1: {}
 
   keyv@4.5.4:
@@ -3446,9 +3332,23 @@ snapshots:
 
   lines-and-columns@1.2.4: {}
 
-  locate-path@5.0.0:
+  lint-staged@16.3.2:
     dependencies:
-      p-locate: 4.1.0
+      commander: 14.0.3
+      listr2: 9.0.5
+      micromatch: 4.0.8
+      string-argv: 0.3.2
+      tinyexec: 1.0.2
+      yaml: 2.8.2
+
+  listr2@9.0.5:
+    dependencies:
+      cli-truncate: 5.2.0
+      colorette: 2.0.20
+      eventemitter3: 5.0.4
+      log-update: 6.1.0
+      rfdc: 1.4.1
+      wrap-ansi: 9.0.2
 
   locate-path@6.0.0:
     dependencies:
@@ -3476,6 +3376,14 @@ snapshots:
 
   lodash.upperfirst@4.3.1: {}
 
+  log-update@6.1.0:
+    dependencies:
+      ansi-escapes: 7.3.0
+      cli-cursor: 5.0.0
+      slice-ansi: 7.1.2
+      strip-ansi: 7.2.0
+      wrap-ansi: 9.0.2
+
   loupe@3.2.1: {}
 
   lru-cache@10.4.3: {}
@@ -3497,15 +3405,23 @@ snapshots:
 
   marked@9.1.6: {}
 
-  meow@12.1.1: {}
+  math-intrinsics@1.1.0: {}
 
-  merge2@1.4.1: {}
+  meow@12.1.1: {}
 
   micromatch@4.0.8:
     dependencies:
       braces: 3.0.3
       picomatch: 2.3.1
 
+  mime-db@1.52.0: {}
+
+  mime-types@2.1.35:
+    dependencies:
+      mime-db: 1.52.0
+
+  mimic-function@5.0.1: {}
+
   minimatch@10.2.4:
     dependencies:
       brace-expansion: 5.0.4
@@ -3530,6 +3446,8 @@ snapshots:
 
   natural-compare@1.4.0: {}
 
+  node-domexception@1.0.0: {}
+
   node-emoji@2.2.0:
     dependencies:
       '@sindresorhus/is': 4.6.0
@@ -3537,8 +3455,28 @@ snapshots:
       emojilib: 2.4.0
       skin-tone: 2.0.0
 
+  node-fetch@2.7.0:
+    dependencies:
+      whatwg-url: 5.0.0
+
   object-assign@4.1.1: {}
 
+  onetime@7.0.0:
+    dependencies:
+      mimic-function: 5.0.1
+
+  openai@4.104.0:
+    dependencies:
+      '@types/node': 18.19.130
+      '@types/node-fetch': 2.6.13
+      abort-controller: 3.0.0
+      agentkeepalive: 4.6.0
+      form-data-encoder: 1.7.2
+      formdata-node: 4.4.1
+      node-fetch: 2.7.0
+    transitivePeerDependencies:
+      - encoding
+
   optionator@0.9.4:
     dependencies:
       deep-is: 0.1.4
@@ -3548,16 +3486,6 @@ snapshots:
       type-check: 0.4.0
       word-wrap: 1.2.5
 
-  outdent@0.5.0: {}
-
-  p-filter@2.1.0:
-    dependencies:
-      p-map: 2.1.0
-
-  p-limit@2.3.0:
-    dependencies:
-      p-try: 2.2.0
-
   p-limit@3.1.0:
     dependencies:
       yocto-queue: 0.1.0
@@ -3566,10 +3494,6 @@ snapshots:
     dependencies:
       yocto-queue: 1.2.2
 
-  p-locate@4.1.0:
-    dependencies:
-      p-limit: 2.3.0
-
   p-locate@5.0.0:
     dependencies:
       p-limit: 3.1.0
@@ -3578,14 +3502,6 @@ snapshots:
     dependencies:
       p-limit: 4.0.0
 
-  p-map@2.1.0: {}
-
-  p-try@2.2.0: {}
-
-  package-manager-detector@0.2.11:
-    dependencies:
-      quansync: 0.2.11
-
   package-manager-detector@1.6.0: {}
 
   parent-module@1.0.1:
@@ -3613,8 +3529,6 @@ snapshots:
 
   path-key@3.1.1: {}
 
-  path-type@4.0.0: {}
-
   pathe@2.0.3: {}
 
   pathval@2.0.1: {}
@@ -3625,8 +3539,6 @@ snapshots:
 
   picomatch@4.0.3: {}
 
-  pify@4.0.1: {}
-
   postcss@8.5.8:
     dependencies:
       nanoid: 3.3.11
@@ -3635,8 +3547,6 @@ snapshots:
 
   prelude-ls@1.2.1: {}
 
-  prettier@2.8.8: {}
-
   prettier@3.8.1: {}
 
   publint@0.3.18:
@@ -3648,19 +3558,8 @@ snapshots:
 
   punycode@2.3.1: {}
 
-  quansync@0.2.11: {}
-
   quansync@1.0.0: {}
 
-  queue-microtask@1.2.3: {}
-
-  read-yaml-file@1.1.0:
-    dependencies:
-      graceful-fs: 4.2.11
-      js-yaml: 3.14.2
-      pify: 4.0.1
-      strip-bom: 3.0.0
-
   readdirp@4.1.2: {}
 
   require-directory@2.1.1: {}
@@ -3673,7 +3572,12 @@ snapshots:
 
   resolve-pkg-maps@1.0.0: {}
 
-  reusify@1.1.0: {}
+  restore-cursor@5.1.0:
+    dependencies:
+      onetime: 7.0.0
+      signal-exit: 4.1.0
+
+  rfdc@1.4.1: {}
 
   rolldown-plugin-dts@0.13.14(rolldown@1.0.0-rc.6)(typescript@5.9.3):
     dependencies:
@@ -3742,16 +3646,10 @@ snapshots:
       '@rollup/rollup-win32-x64-msvc': 4.59.0
       fsevents: 2.3.3
 
-  run-parallel@1.2.0:
-    dependencies:
-      queue-microtask: 1.2.3
-
   sade@1.8.1:
     dependencies:
       mri: 1.2.0
 
-  safer-buffer@2.1.2: {}
-
   semver@7.7.4: {}
 
   shebang-command@2.0.0:
@@ -3768,34 +3666,50 @@ snapshots:
     dependencies:
       unicode-emoji-modifier-base: 1.0.0
 
-  slash@3.0.0: {}
-
-  source-map-js@1.2.1: {}
+  slice-ansi@7.1.2:
+    dependencies:
+      ansi-styles: 6.2.3
+      is-fullwidth-code-point: 5.1.0
 
-  spawndamnit@3.0.1:
+  slice-ansi@8.0.0:
     dependencies:
-      cross-spawn: 7.0.6
-      signal-exit: 4.1.0
+      ansi-styles: 6.2.3
+      is-fullwidth-code-point: 5.1.0
 
-  split2@4.2.0: {}
+  source-map-js@1.2.1: {}
 
-  sprintf-js@1.0.3: {}
+  split2@4.2.0: {}
 
   stackback@0.0.2: {}
 
   std-env@3.10.0: {}
 
+  string-argv@0.3.2: {}
+
   string-width@4.2.3:
     dependencies:
       emoji-regex: 8.0.0
       is-fullwidth-code-point: 3.0.0
       strip-ansi: 6.0.1
 
+  string-width@7.2.0:
+    dependencies:
+      emoji-regex: 10.6.0
+      get-east-asian-width: 1.5.0
+      strip-ansi: 7.2.0
+
+  string-width@8.2.0:
+    dependencies:
+      get-east-asian-width: 1.5.0
+      strip-ansi: 7.2.0
+
   strip-ansi@6.0.1:
     dependencies:
       ansi-regex: 5.0.1
 
-  strip-bom@3.0.0: {}
+  strip-ansi@7.2.0:
+    dependencies:
+      ansi-regex: 6.2.2
 
   strip-json-comments@3.1.1: {}
 
@@ -3812,8 +3726,6 @@ snapshots:
       has-flag: 4.0.0
       supports-color: 7.2.0
 
-  term-size@2.2.1: {}
-
   text-extensions@2.4.0: {}
 
   thenify-all@1.6.0:
@@ -3847,6 +3759,10 @@ snapshots:
     dependencies:
       is-number: 7.0.0
 
+  tr46@0.0.3: {}
+
+  ts-algebra@2.0.0: {}
+
   ts-api-utils@2.4.0(typescript@5.9.3):
     dependencies:
       typescript: 5.9.3
@@ -3878,6 +3794,13 @@ snapshots:
   tslib@2.8.1:
     optional: true
 
+  tsx@4.21.0:
+    dependencies:
+      esbuild: 0.27.3
+      get-tsconfig: 4.13.6
+    optionalDependencies:
+      fsevents: 2.3.3
+
   type-check@0.4.0:
     dependencies:
       prelude-ls: 1.2.1
@@ -3910,27 +3833,27 @@ snapshots:
       quansync: 1.0.0
       unconfig-core: 7.5.0
 
-  undici-types@7.18.2: {}
+  undici-types@5.26.5: {}
+
+  undici-types@6.21.0: {}
 
   unicode-emoji-modifier-base@1.0.0: {}
 
   unicorn-magic@0.1.0: {}
 
-  universalify@0.1.2: {}
-
   uri-js@4.4.1:
     dependencies:
       punycode: 2.3.1
 
   validate-npm-package-name@5.0.1: {}
 
-  vite-node@3.2.4(@types/node@25.3.3)(jiti@2.6.1):
+  vite-node@3.2.4(@types/node@22.19.15)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2):
     dependencies:
       cac: 6.7.14
       debug: 4.4.3
       es-module-lexer: 1.7.0
       pathe: 2.0.3
-      vite: 7.3.1(@types/node@25.3.3)(jiti@2.6.1)
+      vite: 7.3.1(@types/node@22.19.15)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2)
     transitivePeerDependencies:
       - '@types/node'
       - jiti
@@ -3945,7 +3868,7 @@ snapshots:
       - tsx
       - yaml
 
-  vite@7.3.1(@types/node@25.3.3)(jiti@2.6.1):
+  vite@7.3.1(@types/node@22.19.15)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2):
     dependencies:
       esbuild: 0.27.3
       fdir: 6.5.0(picomatch@4.0.3)
@@ -3954,15 +3877,17 @@ snapshots:
       rollup: 4.59.0
       tinyglobby: 0.2.15
     optionalDependencies:
-      '@types/node': 25.3.3
+      '@types/node': 22.19.15
       fsevents: 2.3.3
       jiti: 2.6.1
+      tsx: 4.21.0
+      yaml: 2.8.2
 
-  vitest@3.2.4(@types/node@25.3.3)(jiti@2.6.1):
+  vitest@3.2.4(@types/node@22.19.15)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2):
     dependencies:
       '@types/chai': 5.2.3
       '@vitest/expect': 3.2.4
-      '@vitest/mocker': 3.2.4(vite@7.3.1(@types/node@25.3.3)(jiti@2.6.1))
+      '@vitest/mocker': 3.2.4(vite@7.3.1(@types/node@22.19.15)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2))
       '@vitest/pretty-format': 3.2.4
       '@vitest/runner': 3.2.4
       '@vitest/snapshot': 3.2.4
@@ -3980,11 +3905,11 @@ snapshots:
       tinyglobby: 0.2.15
       tinypool: 1.1.1
       tinyrainbow: 2.0.0
-      vite: 7.3.1(@types/node@25.3.3)(jiti@2.6.1)
-      vite-node: 3.2.4(@types/node@25.3.3)(jiti@2.6.1)
+      vite: 7.3.1(@types/node@22.19.15)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2)
+      vite-node: 3.2.4(@types/node@22.19.15)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2)
       why-is-node-running: 2.3.0
     optionalDependencies:
-      '@types/node': 25.3.3
+      '@types/node': 22.19.15
     transitivePeerDependencies:
       - jiti
       - less
@@ -3999,6 +3924,15 @@ snapshots:
       - tsx
       - yaml
 
+  web-streams-polyfill@4.0.0-beta.3: {}
+
+  webidl-conversions@3.0.1: {}
+
+  whatwg-url@5.0.0:
+    dependencies:
+      tr46: 0.0.3
+      webidl-conversions: 3.0.1
+
   which@2.0.2:
     dependencies:
       isexe: 2.0.0
@@ -4016,8 +3950,16 @@ snapshots:
       string-width: 4.2.3
       strip-ansi: 6.0.1
 
+  wrap-ansi@9.0.2:
+    dependencies:
+      ansi-styles: 6.2.3
+      string-width: 7.2.0
+      strip-ansi: 7.2.0
+
   y18n@5.0.8: {}
 
+  yaml@2.8.2: {}
+
   yargs-parser@20.2.9: {}
 
   yargs-parser@21.1.1: {}
diff --git a/scripts/drift-report-collector.ts b/scripts/drift-report-collector.ts
new file mode 100644
index 0000000..02a6b89
--- /dev/null
+++ b/scripts/drift-report-collector.ts
@@ -0,0 +1,423 @@
+/// <reference types="node" />
+
+/**
+ * Drift Report Collector
+ *
+ * Runs the drift test suite via subprocess with JSON reporter, parses the
+ * structured output, and writes a drift-report.json file that downstream
+ * scripts can use to construct auto-fix prompts.
+ *
+ * Exit codes:
+ *   0 — no critical diffs found (or no drift at all)
+ *   2 — at least one critical diff exists
+ *   1 — script error (unhandled exception)
+ *
+ * Usage:
+ *   npx tsx scripts/drift-report-collector.ts [--out drift-report.json]
+ */
+
+import { execSync } from "node:child_process";
+import { writeFileSync } from "node:fs";
+import { resolve } from "node:path";
+
+import type { DriftEntry, DriftReport, DriftSeverity, ParsedDiff } from "./drift-types.js";
+
+// ---------------------------------------------------------------------------
+// Vitest JSON reporter types (subset we care about)
+// ---------------------------------------------------------------------------
+
+interface VitestJsonResult {
+  testResults: VitestTestFile[];
+}
+
+interface VitestTestFile {
+  assertionResults: VitestAssertion[];
+}
+
+interface VitestAssertion {
+  status: string;
+  ancestorTitles: string[];
+  title: string;
+  failureMessages: string[];
+}
+
+// ---------------------------------------------------------------------------
+// Provider → file mapping
+// ---------------------------------------------------------------------------
+
+interface ProviderMapping {
+  builderFile: string;
+  builderFunctions: string[];
+  typesFile: string | null;
+  sdkShapesFile?: string;
+}
+
+const OPENAI_CHAT_MAPPING: ProviderMapping = {
+  builderFile: "src/helpers.ts",
+  builderFunctions: [
+    "buildTextCompletion",
+    "buildToolCallCompletion",
+    "buildTextChunks",
+    "buildToolCallChunks",
+  ],
+  typesFile: "src/types.ts",
+};
+
+const OPENAI_RESPONSES_MAPPING: ProviderMapping = {
+  builderFile: "src/responses.ts",
+  builderFunctions: [
+    "buildTextResponse",
+    "buildToolCallResponse",
+    "buildTextStreamEvents",
+    "buildToolCallStreamEvents",
+  ],
+  typesFile: null,
+};
+
+const ANTHROPIC_MAPPING: ProviderMapping = {
+  builderFile: "src/messages.ts",
+  builderFunctions: [
+    "buildClaudeTextResponse",
+    "buildClaudeToolCallResponse",
+    "buildClaudeTextStreamEvents",
+    "buildClaudeToolCallStreamEvents",
+  ],
+  typesFile: null,
+};
+
+const GEMINI_MAPPING: ProviderMapping = {
+  builderFile: "src/gemini.ts",
+  builderFunctions: [
+    "buildGeminiTextResponse",
+    "buildGeminiToolCallResponse",
+    "buildGeminiTextStreamChunks",
+    "buildGeminiToolCallStreamChunks",
+  ],
+  typesFile: null,
+};
+
+const OPENAI_EMBEDDINGS_MAPPING: ProviderMapping = {
+  builderFile: "src/helpers.ts",
+  builderFunctions: ["buildEmbeddingResponse", "generateDeterministicEmbedding"],
+  typesFile: null,
+  sdkShapesFile: "src/__tests__/drift/sdk-shapes.ts",
+};
+
+/**
+ * Maps provider names (from drift test describe blocks) to source files
+ * and builder function names. The function names are builder functions for
+ * each provider (internal or exported) — they are included so Claude Code
+ * can locate them via Read/Grep.
+ */
+const PROVIDER_MAP: Record<string, ProviderMapping> = {
+  "OpenAI Chat": OPENAI_CHAT_MAPPING,
+  "OpenAI Responses": OPENAI_RESPONSES_MAPPING,
+  Anthropic: ANTHROPIC_MAPPING,
+  "Anthropic Claude": ANTHROPIC_MAPPING,
+  "Google Gemini": GEMINI_MAPPING,
+  Gemini: GEMINI_MAPPING,
+  "OpenAI Realtime": {
+    builderFile: "src/ws-realtime.ts",
+    builderFunctions: ["handleWebSocketRealtime", "realtimeItemsToMessages"],
+    typesFile: null,
+  },
+  "OpenAI Responses WS": {
+    builderFile: "src/ws-responses.ts",
+    builderFunctions: ["handleWebSocketResponses"],
+    typesFile: null,
+  },
+  "Gemini Live": {
+    builderFile: "src/ws-gemini-live.ts",
+    builderFunctions: ["handleWebSocketGeminiLive"],
+    typesFile: null,
+  },
+  "OpenAI Embeddings": OPENAI_EMBEDDINGS_MAPPING,
+};
+
+const SDK_SHAPES_FILE = "src/__tests__/drift/sdk-shapes.ts";
+
+// ---------------------------------------------------------------------------
+// Parse the formatted drift report text from a vitest failure message
+// ---------------------------------------------------------------------------
+
+/**
+ * Parse a drift report block from raw vitest failure message content.
+ *
+ * The input is a raw vitest failureMessages string that may contain error boilerplate.
+ * The function scans for the API DRIFT DETECTED header and numbered entries.
+ *
+ * Expected format within the message (produced by formatDriftReport):
+ * ```
+ * API DRIFT DETECTED: OpenAI Chat (non-streaming text)
+ *
+ *   1. [critical] LLMOCK DRIFT — field in SDK + real API but missing from mock
+ *      Path:    choices[0].message.refusal
+ *      SDK:     null
+ *      Real:    null
+ *      Mock:    <absent>
+ * ```
+ */
+const VALID_SEVERITIES = new Set<DriftSeverity>(["critical", "warning", "info"]);
+
+function parseDriftBlock(text: string): { context: string; diffs: ParsedDiff[] } | null {
+  const headerMatch = text.match(/API DRIFT DETECTED:\s*(.+)/);
+  if (!headerMatch) return null;
+
+  const context = headerMatch[1].trim();
+  const diffs: ParsedDiff[] = [];
+
+  // Match numbered entries: "  1. [severity] issue text\n     Path:...\n     SDK:...\n     Real:...\n     Mock:..."
+  const entryPattern =
+    /\d+\.\s*\[(\w+)\]\s*(.+)\n\s*Path:\s*(.+)\n\s*SDK:\s*(.+)\n\s*Real:\s*(.+)\n\s*Mock:\s*(.+)/g;
+
+  let match: RegExpExecArray | null;
+  while ((match = entryPattern.exec(text)) !== null) {
+    const severity = match[1].trim();
+    if (!VALID_SEVERITIES.has(severity as DriftSeverity)) {
+      console.warn(
+        `parseDriftBlock: unknown severity "${severity}" — skipping entry. ` +
+          `Known severities: ${[...VALID_SEVERITIES].join(", ")}`,
+      );
+      continue;
+    }
+    diffs.push({
+      severity: severity as DriftSeverity,
+      issue: match[2].trim(),
+      path: match[3].trim(),
+      expected: match[4].trim(),
+      real: match[5].trim(),
+      mock: match[6].trim(),
+    });
+  }
+
+  const expectedCount = (text.match(/\d+\.\s*\[/g) ?? []).length;
+  if (expectedCount > 0 && diffs.length < expectedCount) {
+    console.warn(`parseDriftBlock: parsed ${diffs.length} of ${expectedCount} entries`);
+  }
+
+  return { context, diffs };
+}
+
+/**
+ * Extract provider name from the describe block title or the drift report context.
+ *
+ * Examples:
+ *   "OpenAI Chat Completions drift" → "OpenAI Chat"
+ *   "OpenAI Chat (non-streaming text)" → "OpenAI Chat"
+ *   "Anthropic Claude drift" → "Anthropic Claude"
+ */
+function extractProviderName(text: string): string | null {
+  // Try matching against known provider keys (longest first to avoid partial matches)
+  const sorted = Object.keys(PROVIDER_MAP).sort((a, b) => b.length - a.length);
+  for (const key of sorted) {
+    if (text.includes(key)) return key;
+  }
+  return null;
+}
+
+/**
+ * Extract scenario from the context string.
+ *
+ * "OpenAI Chat (non-streaming text)" → "non-streaming text"
+ * "Anthropic Claude (streaming tool call)" → "streaming tool call"
+ */
+function extractScenario(context: string): string {
+  const parenMatch = context.match(/\(([^)]+)\)/);
+  return parenMatch ? parenMatch[1] : context;
+}
+
+// ---------------------------------------------------------------------------
+// Run drift tests and collect results
+// ---------------------------------------------------------------------------
+
+function extractJsonFromString(text: string): VitestJsonResult | null {
+  const jsonStart = text.indexOf("{");
+  const jsonEnd = text.lastIndexOf("}");
+  if (jsonStart === -1 || jsonEnd === -1) return null;
+  try {
+    const parsed = JSON.parse(text.slice(jsonStart, jsonEnd + 1)) as unknown;
+    if (
+      !parsed ||
+      typeof parsed !== "object" ||
+      !Array.isArray((parsed as Record<string, unknown>).testResults)
+    ) {
+      console.error(
+        "extractJsonFromString: parsed JSON does not have testResults array, likely wrong fragment",
+      );
+      return null;
+    }
+    return parsed as VitestJsonResult;
+  } catch (err: unknown) {
+    console.error(
+      "extractJsonFromString: failed to parse.",
+      `Range: [${jsonStart}..${jsonEnd}], length: ${text.length}`,
+      err instanceof Error ? err.message : String(err),
+    );
+    return null;
+  }
+}
+
+function hasStdout(err: unknown): err is { stdout: string; stderr?: string } {
+  return (
+    typeof err === "object" &&
+    err !== null &&
+    "stdout" in err &&
+    typeof (err as { stdout: unknown }).stdout === "string"
+  );
+}
+
+function parseVitestOutput(stdout: string, context: string): VitestJsonResult | null {
+  try {
+    return JSON.parse(stdout) as VitestJsonResult;
+  } catch (parseErr: unknown) {
+    console.error(
+      `${context}:`,
+      parseErr instanceof Error ? parseErr.message : String(parseErr),
+      `stdout length: ${stdout.length}`,
+    );
+    return extractJsonFromString(stdout);
+  }
+}
+
+function runDriftTests(): VitestJsonResult {
+  try {
+    const stdout = execSync("pnpm test:drift --reporter=json", {
+      encoding: "utf-8",
+      stdio: ["pipe", "pipe", "pipe"],
+      maxBuffer: 50 * 1024 * 1024,
+    });
+    const result = parseVitestOutput(stdout, "JSON parse of successful vitest run failed");
+    if (result) return result;
+    throw new Error("Drift tests passed but produced unparseable output");
+  } catch (err: unknown) {
+    // execSync throws on non-zero exit — vitest exits 1 when tests fail
+    if (hasStdout(err)) {
+      const result = parseVitestOutput(err.stdout, "Primary JSON parse of vitest stdout failed");
+      if (result) return result;
+      console.error(
+        "Failed to parse JSON from drift test stdout. Original error:",
+        err instanceof Error ? err.message : String(err),
+      );
+      if (err.stderr) console.error("stderr:", err.stderr);
+    }
+    const msg = err instanceof Error ? err.message : String(err);
+    throw new Error(`Failed to run drift tests: ${msg}`);
+  }
+}
+
+function collectDriftEntries(results: VitestJsonResult): DriftEntry[] {
+  const entries: DriftEntry[] = [];
+  const unmapped: string[] = [];
+  let unparseable = 0;
+
+  for (const file of results.testResults) {
+    for (const assertion of file.assertionResults) {
+      if (assertion.status !== "failed") continue;
+      if (assertion.failureMessages.length === 0) continue;
+
+      const fullMessage = assertion.failureMessages.join("\n");
+      const parsed = parseDriftBlock(fullMessage);
+      if (!parsed || parsed.diffs.length === 0) {
+        unparseable++;
+        continue;
+      }
+
+      // Determine provider from ancestor titles (describe block) or context
+      const ancestorText = assertion.ancestorTitles.join(" ");
+      const provider = extractProviderName(ancestorText) ?? extractProviderName(parsed.context);
+      if (!provider) {
+        unmapped.push(`${ancestorText} > ${assertion.title}`);
+        continue;
+      }
+
+      const mapping = PROVIDER_MAP[provider];
+      if (!mapping) {
+        unmapped.push(`${ancestorText} > ${assertion.title} (provider: ${provider})`);
+        continue;
+      }
+
+      entries.push({
+        provider,
+        scenario: extractScenario(parsed.context),
+        builderFile: mapping.builderFile,
+        builderFunctions: mapping.builderFunctions,
+        typesFile: mapping.typesFile,
+        sdkShapesFile: SDK_SHAPES_FILE,
+        diffs: parsed.diffs,
+      });
+    }
+  }
+
+  if (unmapped.length > 0) {
+    console.error(`ERROR: ${unmapped.length} drift failure(s) could not be mapped to a provider:`);
+    for (const u of unmapped) console.error(`  - ${u}`);
+    throw new Error(`${unmapped.length} unmapped drift entries — update PROVIDER_MAP`);
+  }
+
+  if (unparseable > 0 && entries.length === 0) {
+    console.error(
+      `ERROR: ${unparseable} test failure(s) could not be parsed as drift reports.`,
+      "This may indicate broken test infrastructure or a changed report format.",
+    );
+    throw new Error(`${unparseable} unparseable test failures with 0 drift entries — investigate`);
+  } else if (unparseable > 0) {
+    console.warn(
+      `WARNING: ${unparseable} test failure(s) did not contain parseable drift data (${entries.length} drift entries collected).`,
+    );
+  }
+
+  return entries;
+}
+
+// ---------------------------------------------------------------------------
+// Main
+// ---------------------------------------------------------------------------
+
+function main(): void {
+  const args = process.argv.slice(2);
+  const outIndex = args.indexOf("--out");
+  const outPath = resolve(
+    outIndex !== -1 && args[outIndex + 1] ? args[outIndex + 1] : "drift-report.json",
+  );
+
+  console.log("Running drift tests...");
+  const results = runDriftTests();
+
+  console.log("Collecting drift entries...");
+  const entries = collectDriftEntries(results);
+
+  const report: DriftReport = {
+    timestamp: new Date().toISOString(),
+    entries,
+  };
+
+  try {
+    writeFileSync(outPath, JSON.stringify(report, null, 2) + "\n", "utf-8");
+  } catch (err) {
+    console.error(`Failed to write drift report to ${outPath}:`, err);
+    console.log(JSON.stringify(report, null, 2));
+    process.exit(1);
+  }
+  console.log(`Drift report written to ${outPath}`);
+  console.log(`  Entries: ${entries.length}`);
+
+  const criticalCount = entries.reduce(
+    (sum, e) => sum + e.diffs.filter((d) => d.severity === "critical").length,
+    0,
+  );
+  console.log(`  Critical diffs: ${criticalCount}`);
+
+  if (criticalCount > 0) {
+    console.log("Exiting with code 2 (critical diffs found).");
+    process.exit(2);
+  }
+
+  console.log("No critical diffs found.");
+}
+
+try {
+  main();
+} catch (err: unknown) {
+  console.error("Fatal error:", err);
+  process.exit(1);
+}
diff --git a/scripts/drift-types.ts b/scripts/drift-types.ts
new file mode 100644
index 0000000..5eaec24
--- /dev/null
+++ b/scripts/drift-types.ts
@@ -0,0 +1,40 @@
+/**
+ * Shared types for the drift remediation pipeline.
+ *
+ * Used by both drift-report-collector.ts and fix-drift.ts.
+ */
+
+/**
+ * NOTE: DriftSeverity is intentionally defined in multiple places:
+ *   1. Here (drift-types.ts) — canonical source, used by the pipeline scripts
+ *   2. src/__tests__/drift/schema.ts — used by the drift test framework (ShapeDiff)
+ *   3. src/__tests__/drift-collector.test.ts — local copy for the test helper
+ *
+ * Deduplication would require importing across component boundaries.
+ * If you add a new severity level, update all three locations.
+ */
+export type DriftSeverity = "critical" | "warning" | "info";
+
+export interface ParsedDiff {
+  path: string;
+  severity: DriftSeverity;
+  issue: string;
+  expected: string;
+  real: string;
+  mock: string;
+}
+
+export interface DriftEntry {
+  provider: string;
+  scenario: string;
+  builderFile: string;
+  builderFunctions: string[];
+  typesFile: string | null;
+  sdkShapesFile: string;
+  diffs: ParsedDiff[];
+}
+
+export interface DriftReport {
+  timestamp: string;
+  entries: DriftEntry[];
+}
diff --git a/scripts/fix-drift.ts b/scripts/fix-drift.ts
new file mode 100644
index 0000000..07d12d6
--- /dev/null
+++ b/scripts/fix-drift.ts
@@ -0,0 +1,681 @@
+/// <reference types="node" />
+
+/**
+ * Drift Fix Orchestrator
+ *
+ * Reads a drift-report.json (produced by drift-report-collector.ts), constructs
+ * a structured prompt, and invokes Claude Code CLI to auto-fix the drift.
+ *
+ * Modes:
+ *   Default:       npx tsx scripts/fix-drift.ts
+ *   PR mode:       npx tsx scripts/fix-drift.ts --create-pr
+ *   Issue mode:    npx tsx scripts/fix-drift.ts --create-issue
+ *
+ * Exit codes:
+ *   0 — success (or issue created successfully in --create-issue mode)
+ *   1 — failure
+ *   2 — no source files changed (--create-pr mode, nothing to commit)
+ *   3 — unhandled error (e.g. bad arguments, missing report, git/gh command failure)
+ *   124 — Claude Code timed out (default mode)
+ *   In default mode, the exit code is passed through from Claude Code.
+ */
+
+import { spawn, execSync, execFileSync } from "node:child_process";
+import { readFileSync, writeFileSync, existsSync, unlinkSync } from "node:fs";
+import { resolve } from "node:path";
+import { fileURLToPath } from "node:url";
+
+import type { DriftReport, DriftSeverity } from "./drift-types.js";
+
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+
+/** 30-minute hard ceiling for the Claude Code subprocess */
+const CLAUDE_TIMEOUT_MS = 30 * 60 * 1000;
+
+/** Grace period between SIGTERM and SIGKILL */
+const KILL_GRACE_MS = 10_000;
+
+const VALID_SEVERITIES: ReadonlySet<DriftSeverity> = new Set(["critical", "warning", "info"]);
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+export function todayStamp(): string {
+  return new Date().toISOString().slice(0, 10);
+}
+
+/**
+ * Format an exec error into a human-readable Error object.
+ * Includes exit status, signal, and stderr when available.
+ * Logs stderr to console.error as a side effect when present.
+ */
+function formatExecError(cmd: string, err: unknown): Error {
+  const e = err as { status?: number; signal?: string; stderr?: string | Buffer };
+  const detail = [
+    e.status !== undefined ? `exit ${e.status}` : null,
+    e.signal ? `signal ${e.signal}` : null,
+    e.stderr ? String(e.stderr).trim() : null,
+  ]
+    .filter(Boolean)
+    .join(", ");
+  const msg = `Command failed: ${cmd}${detail ? ` (${detail})` : ""}`;
+  if (e.stderr) console.error(msg);
+  return new Error(msg);
+}
+
+/**
+ * Run a shell command and return its trimmed stdout.
+ *
+ * WARNING: This function passes the command string directly to a shell.
+ * NEVER call it with interpolated values — use execFileSafe() for commands
+ * with dynamic arguments.
+ */
+function exec(cmd: string): string {
+  try {
+    return execSync(cmd, { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
+  } catch (err: unknown) {
+    throw formatExecError(cmd, err);
+  }
+}
+
+/**
+ * Run a command safely without shell interpolation.
+ * Use this for all commands with dynamic arguments.
+ */
+export function execFileSafe(file: string, args: string[]): void {
+  try {
+    execFileSync(file, args, { stdio: "inherit" });
+  } catch (err: unknown) {
+    throw formatExecError(`${file} ${args.join(" ")}`, err);
+  }
+}
+
+export function readFileIfExists(path: string): string | null {
+  if (!existsSync(path)) return null;
+  return readFileSync(path, "utf-8");
+}
+
+export function readDriftReport(path: string): DriftReport {
+  if (!existsSync(path)) {
+    throw new Error(`Drift report not found at ${path}`);
+  }
+  const raw = readFileSync(path, "utf-8");
+  let parsed: unknown;
+  try {
+    parsed = JSON.parse(raw);
+  } catch (err: unknown) {
+    throw new Error(
+      `Drift report at ${path} is not valid JSON: ${err instanceof Error ? err.message : String(err)}`,
+    );
+  }
+  if (
+    !parsed ||
+    typeof parsed !== "object" ||
+    !Array.isArray((parsed as Record<string, unknown>).entries)
+  ) {
+    throw new Error(`Drift report at ${path} has invalid structure: expected { entries: [...] }`);
+  }
+  if (typeof (parsed as Record<string, unknown>).timestamp !== "string") {
+    throw new Error('Drift report missing "timestamp" field');
+  }
+  const report = parsed as DriftReport;
+
+  // Validate individual entry fields to catch malformed reports early
+  for (let i = 0; i < report.entries.length; i++) {
+    const entry = report.entries[i];
+    if (!entry || typeof entry.provider !== "string" || !entry.provider) {
+      throw new Error(`Drift report entry[${i}] missing required "provider" field`);
+    }
+    if (!entry.builderFile || typeof entry.builderFile !== "string") {
+      throw new Error(`Drift report entry[${i}] (${entry.provider}) missing "builderFile"`);
+    }
+    if (
+      !Array.isArray(entry.builderFunctions) ||
+      entry.builderFunctions.length === 0 ||
+      !entry.builderFunctions.every((f: unknown) => typeof f === "string")
+    ) {
+      throw new Error(
+        `Drift report entry[${i}] (${entry.provider}) "builderFunctions" must be non-empty string array`,
+      );
+    }
+    if (!entry.scenario || typeof entry.scenario !== "string") {
+      throw new Error(`Drift report entry[${i}] (${entry.provider}) missing "scenario"`);
+    }
+    if (!entry.sdkShapesFile || typeof entry.sdkShapesFile !== "string") {
+      throw new Error(`Drift report entry[${i}] (${entry.provider}) missing "sdkShapesFile"`);
+    }
+    if (entry.typesFile !== null && typeof entry.typesFile !== "string") {
+      throw new Error(
+        `Drift report entry[${i}] (${entry.provider}) "typesFile" must be string or null`,
+      );
+    }
+    if (!Array.isArray(entry.diffs)) {
+      throw new Error(`Drift report entry[${i}] (${entry.provider}) missing "diffs" array`);
+    }
+    for (let j = 0; j < entry.diffs.length; j++) {
+      const diff = entry.diffs[j];
+      if (!diff.path || typeof diff.path !== "string") {
+        throw new Error(`Drift report entry[${i}].diffs[${j}]: missing "path"`);
+      }
+      if (!diff.issue || typeof diff.issue !== "string") {
+        throw new Error(`Drift report entry[${i}].diffs[${j}]: missing "issue"`);
+      }
+      if (typeof diff.expected !== "string") {
+        throw new Error(`Drift report entry[${i}].diffs[${j}]: missing "expected"`);
+      }
+      if (typeof diff.real !== "string") {
+        throw new Error(`Drift report entry[${i}].diffs[${j}]: missing "real"`);
+      }
+      if (typeof diff.mock !== "string") {
+        throw new Error(`Drift report entry[${i}].diffs[${j}]: missing "mock"`);
+      }
+      if (!VALID_SEVERITIES.has(diff.severity)) {
+        throw new Error(
+          `Drift report entry[${i}].diffs[${j}]: invalid severity "${diff.severity}" — expected one of: ${[...VALID_SEVERITIES].join(", ")}`,
+        );
+      }
+    }
+  }
+
+  return report;
+}
+
+// ---------------------------------------------------------------------------
+// Prompt construction
+// ---------------------------------------------------------------------------
+
+export function buildPrompt(report: DriftReport): string {
+  const lines: string[] = [];
+
+  lines.push("You are fixing API drift in the llmock mock server.");
+  lines.push("");
+  lines.push("## Workflow");
+  lines.push("");
+  lines.push("Follow this exact workflow for each drift fix:");
+  lines.push("");
+  lines.push("1. RED: Confirm the drift test currently fails by running:");
+  lines.push('   pnpm test:drift 2>&1 | grep -A5 "DRIFT"');
+  lines.push("");
+  lines.push("2. Fix the builder function to add/modify the field matching the real API shape.");
+  lines.push("   Also fix the corresponding builder for the same provider (e.g., if non-streaming");
+  lines.push("   text drifted, also fix non-streaming tool call since they share the same message");
+  lines.push("   structure).");
+  lines.push("");
+  lines.push("3. If the builder file uses TypeScript interfaces from src/types.ts, update those.");
+  lines.push("");
+  lines.push("4. Update the SDK shape in src/__tests__/drift/sdk-shapes.ts if the corresponding");
+  lines.push("   shape function doesn't include the new field.");
+  lines.push("");
+  lines.push("5. GREEN: Run pnpm test to verify conformance tests pass.");
+  lines.push("");
+  lines.push("6. Run pnpm test:drift to verify drift is resolved.");
+  lines.push("");
+  lines.push("7. Run npx prettier --write on all changed files.");
+  lines.push("");
+  lines.push("8. REFACTOR: Review your changes for unnecessary complexity.");
+  lines.push("");
+  lines.push("## Drift Entries");
+  lines.push("");
+
+  for (let i = 0; i < report.entries.length; i++) {
+    const entry = report.entries[i];
+    lines.push(`DRIFT ${i + 1}: ${entry.provider} — ${entry.scenario}`);
+    lines.push(`  File: ${entry.builderFile}`);
+    lines.push(`  Functions: ${entry.builderFunctions.join(", ")}`);
+    lines.push(`  Types file: ${entry.typesFile ?? "N/A"}`);
+    lines.push(`  SDK shapes: ${entry.sdkShapesFile}`);
+    lines.push("  Diffs:");
+    for (const diff of entry.diffs) {
+      lines.push(`    - [${diff.severity}] ${diff.issue}`);
+      lines.push(`      Path: ${diff.path}`);
+      lines.push(`      Real API: ${diff.real}`);
+      lines.push(`      Mock: ${diff.mock}`);
+    }
+    lines.push("");
+  }
+
+  lines.push("## After all fixes");
+  lines.push("");
+  lines.push("1. Run the full test suite: pnpm test");
+  lines.push("2. Run drift verification: pnpm test:drift");
+  lines.push("3. Format: npx prettier --write src/ src/__tests__/");
+  lines.push("4. Lint: npx eslint src/ src/__tests__/ --fix");
+
+  return lines.join("\n");
+}
+
+// ---------------------------------------------------------------------------
+// Claude Code invocation (default mode)
+// ---------------------------------------------------------------------------
+
+function invokeClaudeCode(prompt: string): Promise<number> {
+  return new Promise((done, reject) => {
+    const args = [
+      "@anthropic-ai/claude-code",
+      "--print",
+      "--verbose",
+      "-p",
+      prompt,
+      "--allowedTools",
+      [
+        "Read",
+        "Edit",
+        "Write",
+        "Glob",
+        "Grep",
+        "Bash(pnpm test)",
+        "Bash(pnpm test:drift)",
+        "Bash(pnpm test:drift *)",
+        "Bash(npx prettier *)",
+        "Bash(npx eslint *)",
+        "Bash(git diff *)",
+        "Bash(git status *)",
+        "Bash(git log *)",
+      ].join(","),
+      "--max-turns",
+      "50",
+    ];
+
+    const child = spawn("npx", args, {
+      stdio: ["inherit", "pipe", "pipe"],
+    });
+
+    const logChunks: Buffer[] = [];
+    let killGraceTimer: NodeJS.Timeout | undefined;
+    let timedOut = false;
+
+    const killTimer = setTimeout(() => {
+      timedOut = true;
+      console.error(
+        `Claude Code timed out after ${CLAUDE_TIMEOUT_MS / 60000} minutes. Sending SIGTERM...`,
+      );
+      child.kill("SIGTERM");
+      killGraceTimer = setTimeout(() => {
+        if (!child.killed) {
+          console.error("Process did not exit after SIGTERM. Sending SIGKILL...");
+          child.kill("SIGKILL");
+        }
+      }, KILL_GRACE_MS);
+    }, CLAUDE_TIMEOUT_MS);
+
+    child.on("error", (err) => {
+      clearTimeout(killTimer);
+      console.error("Failed to spawn Claude Code process:", err.message);
+      try {
+        writeFileSync("claude-code-output.log", `Spawn error: ${err.message}\n`, "utf-8");
+      } catch (writeErr) {
+        console.error(
+          "Failed to write claude-code-output.log:",
+          writeErr instanceof Error ? writeErr.message : writeErr,
+        );
+      }
+      reject(err);
+    });
+
+    child.stdout.on("data", (chunk: Buffer) => {
+      process.stdout.write(chunk);
+      logChunks.push(chunk);
+    });
+
+    child.stderr.on("data", (chunk: Buffer) => {
+      process.stderr.write(chunk);
+      logChunks.push(chunk);
+    });
+
+    child.on("close", (code, signal) => {
+      clearTimeout(killTimer);
+      if (killGraceTimer) clearTimeout(killGraceTimer);
+      const logContent = Buffer.concat(logChunks).toString("utf-8");
+      try {
+        writeFileSync("claude-code-output.log", logContent, "utf-8");
+      } catch (writeErr) {
+        console.error(
+          "Failed to write claude-code-output.log:",
+          writeErr instanceof Error ? writeErr.message : writeErr,
+        );
+      }
+      if (code === null && signal) {
+        console.error(`Claude Code process killed by signal: ${signal}`);
+      }
+      done(timedOut ? 124 : (code ?? 1));
+    });
+  });
+}
+
+// ---------------------------------------------------------------------------
+// PR mode (--create-pr)
+// ---------------------------------------------------------------------------
+
+export function patchBumpVersion(): string {
+  const pkgPath = resolve("package.json");
+  const pkg = JSON.parse(readFileSync(pkgPath, "utf-8")) as {
+    version: string;
+    [key: string]: unknown;
+  };
+  const parts = pkg.version.split(".").map(Number);
+  if (parts.length !== 3 || parts.some(isNaN)) {
+    throw new Error(`Cannot patch-bump non-standard version: ${pkg.version}`);
+  }
+  parts[2] += 1;
+  const newVersion = parts.join(".");
+  pkg.version = newVersion;
+  writeFileSync(pkgPath, JSON.stringify(pkg, null, 2) + "\n", "utf-8");
+  return newVersion;
+}
+
+export function addChangelogEntry(report: DriftReport, version: string): void {
+  const changelogPath = resolve("CHANGELOG.md");
+  const existing = readFileIfExists(changelogPath) ?? "";
+
+  const providerSummaries = report.entries.map((entry) => {
+    const fields = entry.diffs.map((d) => d.path).join(", ");
+    return `- ${entry.provider} (${entry.scenario}): ${fields}`;
+  });
+
+  const newEntry = [
+    `## ${version}`,
+    "",
+    "### Patch Changes",
+    "",
+    "- Auto-remediate API drift:",
+    ...providerSummaries.map((s) => `  ${s}`),
+    "",
+  ].join("\n");
+
+  // Insert after the first line (the title)
+  const titleLine = "# @copilotkit/llmock\n";
+  if (existing.startsWith(titleLine)) {
+    const rest = existing.slice(titleLine.length);
+    writeFileSync(changelogPath, titleLine + "\n" + newEntry + rest, "utf-8");
+  } else {
+    writeFileSync(changelogPath, newEntry + "\n" + existing, "utf-8");
+  }
+}
+
+export function buildPrBody(report: DriftReport): string {
+  const providers: string[] = [];
+  const diffs: string[] = [];
+
+  for (const entry of report.entries) {
+    providers.push(`- ${entry.provider}: ${entry.scenario}`);
+    for (const diff of entry.diffs) {
+      diffs.push(`- \`${diff.path}\`: ${diff.issue}`);
+    }
+  }
+
+  const reportJson = JSON.stringify(report, null, 2);
+
+  return [
+    "## Summary",
+    "",
+    "Auto-generated drift remediation.",
+    "",
+    "### Providers affected",
+    ...providers,
+    "",
+    "### Diffs fixed",
+    ...diffs,
+    "",
+    "## Drift Report",
+    "",
+    "<details>",
+    "<summary>Full drift report JSON</summary>",
+    "",
+    "```json",
+    reportJson,
+    "```",
+    "",
+    "</details>",
+  ].join("\n");
+}
+
+/**
+ * Parse a single line from `git status --porcelain` output into a file path.
+ * Handles quoted paths (special characters) and rename notation (old -> new).
+ */
+export function parsePorcelainLine(line: string): string {
+  let path = line.slice(3).trim();
+  // Handle renames first: "old -> new" → take the new path
+  const arrowIdx = path.indexOf(" -> ");
+  if (arrowIdx !== -1) {
+    path = path.slice(arrowIdx + 4);
+  }
+  // Then strip quotes (git quotes paths with special characters)
+  if (path.startsWith('"') && path.endsWith('"')) {
+    path = path.slice(1, -1);
+  }
+  return path;
+}
+
+/**
+ * Return the list of changed files from `git status --porcelain`.
+ */
+export function getChangedFiles(): string[] {
+  return exec("git status --porcelain").split("\n").filter(Boolean).map(parsePorcelainLine);
+}
+
+function createPr(report: DriftReport): void {
+  const stamp = todayStamp();
+
+  // Determine branch name
+  let currentBranch: string;
+  try {
+    currentBranch = exec("git rev-parse --abbrev-ref HEAD");
+  } catch (err: unknown) {
+    throw new Error(`Cannot determine current branch for PR creation: ${(err as Error).message}`);
+  }
+
+  const branchName =
+    currentBranch === "master" || currentBranch === "main" || currentBranch === "HEAD"
+      ? `fix/drift-${stamp}`
+      : currentBranch;
+
+  if (branchName !== currentBranch) {
+    execFileSafe("git", ["checkout", "-b", branchName]);
+    console.log(`Created branch ${branchName}`);
+  }
+
+  // Stage and commit in groups — detect uncommitted changes (staged + unstaged)
+  const changedFiles = getChangedFiles();
+
+  const builderFiles = changedFiles.filter(
+    (f) => f.startsWith("src/") && !f.startsWith("src/__tests__/"),
+  );
+  const testFiles = changedFiles.filter((f) => f.startsWith("src/__tests__/"));
+
+  // Abort if no source files were changed — a version-bump-only PR would be misleading
+  if (builderFiles.length === 0 && testFiles.length === 0) {
+    console.error(
+      "ERROR: No source files changed. Claude Code may not have made any fixes, " +
+        "or all changes were reverted during verification. Aborting PR creation.",
+    );
+    process.exit(2);
+  }
+
+  if (builderFiles.length > 0) {
+    execFileSafe("git", ["add", ...builderFiles]);
+    execFileSafe("git", ["commit", "-m", "fix: auto-remediate API drift in builder functions"]);
+  }
+
+  if (testFiles.length > 0) {
+    execFileSafe("git", ["add", ...testFiles]);
+    execFileSafe("git", ["commit", "-m", "test: update SDK shapes for drift remediation"]);
+  }
+
+  const newVersion = patchBumpVersion();
+  console.log(`Bumped version to ${newVersion}`);
+
+  addChangelogEntry(report, newVersion);
+  console.log("Added CHANGELOG.md entry");
+
+  // Always commit version bump + changelog
+  execFileSafe("git", ["add", "package.json", "CHANGELOG.md"]);
+  execFileSafe("git", ["commit", "-m", `chore: bump version to ${newVersion}`, "--allow-empty"]);
+
+  // Catch any remaining files
+  const remaining = getChangedFiles();
+  if (remaining.length > 0) {
+    execFileSafe("git", ["add", ...remaining]);
+    execFileSafe("git", ["commit", "-m", "fix: remaining drift remediation changes"]);
+  }
+
+  execFileSafe("git", ["push", "-u", "origin", branchName]);
+  console.log(`Pushed branch ${branchName}`);
+
+  const prBody = buildPrBody(report);
+  const prTitle = `fix: auto-remediate API drift (${stamp})`;
+
+  const prBodyFile = `/tmp/llmock-drift-${process.pid}-pr-body.md`;
+  writeFileSync(prBodyFile, prBody, "utf-8");
+  try {
+    execFileSafe("gh", [
+      "pr",
+      "create",
+      "--title",
+      prTitle,
+      "--assignee",
+      "jpr5",
+      "--body-file",
+      prBodyFile,
+    ]);
+  } finally {
+    try {
+      unlinkSync(prBodyFile);
+    } catch (cleanupErr) {
+      console.warn(
+        `Could not clean up temp file:`,
+        cleanupErr instanceof Error ? cleanupErr.message : cleanupErr,
+      );
+    }
+  }
+
+  console.log("PR created successfully.");
+}
+
+// ---------------------------------------------------------------------------
+// Issue mode (--create-issue)
+// ---------------------------------------------------------------------------
+
+function createIssue(report: DriftReport | null): void {
+  const stamp = todayStamp();
+  const reportJson = report
+    ? JSON.stringify(report, null, 2)
+    : "(drift report was not generated — collector may have crashed)";
+  const claudeOutput =
+    readFileIfExists(resolve("claude-code-output.log")) ?? "(no output captured)";
+
+  const issueBody = [
+    "## Drift detected but auto-fix failed",
+    "",
+    "The automated drift remediation pipeline detected API drift but was unable",
+    "to fix it automatically. Manual intervention is required.",
+    "",
+    "### Drift Report",
+    "",
+    "```json",
+    reportJson,
+    "```",
+    "",
+    "### Claude Code Output",
+    "",
+    "<details>",
+    "<summary>Full output</summary>",
+    "",
+    "```",
+    claudeOutput,
+    "```",
+    "",
+    "</details>",
+  ].join("\n");
+
+  const issueTitle = `Drift detected — auto-fix failed (${stamp})`;
+
+  const issueBodyFile = `/tmp/llmock-drift-${process.pid}-issue-body.md`;
+  writeFileSync(issueBodyFile, issueBody, "utf-8");
+  try {
+    execFileSafe("gh", [
+      "issue",
+      "create",
+      "--title",
+      issueTitle,
+      "--body-file",
+      issueBodyFile,
+      "--label",
+      "drift",
+    ]);
+  } finally {
+    try {
+      unlinkSync(issueBodyFile);
+    } catch (cleanupErr) {
+      console.warn(
+        `Could not clean up temp file:`,
+        cleanupErr instanceof Error ? cleanupErr.message : cleanupErr,
+      );
+    }
+  }
+
+  console.log("Issue created successfully.");
+}
+
+// ---------------------------------------------------------------------------
+// Main
+// ---------------------------------------------------------------------------
+
+export function parseMode(args: string[]): "pr" | "issue" | "default" {
+  if (args.includes("--create-pr")) return "pr";
+  if (args.includes("--create-issue")) return "issue";
+  return "default";
+}
+
+async function main(): Promise<void> {
+  const args = process.argv.slice(2);
+  const mode = parseMode(args);
+
+  const reportIndex = args.indexOf("--report");
+  const reportPath = resolve(
+    reportIndex !== -1 && args[reportIndex + 1] ? args[reportIndex + 1] : "drift-report.json",
+  );
+
+  // Issue mode handles missing reports gracefully (the safety net shouldn't crash)
+  if (mode === "issue") {
+    let report: DriftReport | null = null;
+    try {
+      report = readDriftReport(reportPath);
+    } catch (err: unknown) {
+      const msg = err instanceof Error ? err.message : String(err);
+      console.warn(`Could not read drift report (${msg}), creating issue with available info`);
+    }
+    createIssue(report);
+    return;
+  }
+
+  const report = readDriftReport(reportPath);
+
+  if (report.entries.length === 0) {
+    console.log("No drift entries found. Nothing to do.");
+    process.exit(0);
+  }
+
+  console.log(`Loaded drift report: ${report.entries.length} entries from ${report.timestamp}`);
+
+  if (mode === "pr") {
+    createPr(report);
+  } else {
+    const prompt = buildPrompt(report);
+    console.log("Invoking Claude Code CLI...");
+    const exitCode = await invokeClaudeCode(prompt);
+    console.log(`Claude Code exited with code ${exitCode}`);
+    process.exit(exitCode);
+  }
+}
+
+const isMain = process.argv[1] === fileURLToPath(import.meta.url);
+if (isMain) {
+  main().catch((err: unknown) => {
+    console.error("Fatal error:", err);
+    process.exit(3);
+  });
+}
diff --git a/scripts/tsconfig.json b/scripts/tsconfig.json
new file mode 100644
index 0000000..5c934e8
--- /dev/null
+++ b/scripts/tsconfig.json
@@ -0,0 +1,12 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "NodeNext",
+    "moduleResolution": "NodeNext",
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "types": ["node"]
+  },
+  "include": ["."]
+}
diff --git a/scripts/update-competitive-matrix.ts b/scripts/update-competitive-matrix.ts
new file mode 100644
index 0000000..8e50b5e
--- /dev/null
+++ b/scripts/update-competitive-matrix.ts
@@ -0,0 +1,479 @@
+#!/usr/bin/env tsx
+/// <reference types="node" />
+/**
+ * update-competitive-matrix.ts
+ *
+ * Fetches competitor READMEs from GitHub, extracts feature signals via keyword
+ * matching, and updates the comparison table in docs/index.html when evidence
+ * of new capabilities is found.
+ *
+ * Usage:
+ *   npx tsx scripts/update-competitive-matrix.ts                        # update in place
+ *   npx tsx scripts/update-competitive-matrix.ts --dry-run               # show changes only
+ *   npx tsx scripts/update-competitive-matrix.ts --summary out.md        # write markdown summary
+ */
+
+import { readFileSync, writeFileSync } from "node:fs";
+import { resolve } from "node:path";
+
+// ── Types ────────────────────────────────────────────────────────────────────
+
+interface Competitor {
+  /** Display name matching the <th> link text in the HTML table */
+  name: string;
+  /** GitHub owner/repo */
+  repo: string;
+}
+
+interface FeatureRule {
+  /** Row label as it appears in the first <td> of each <tr> */
+  rowLabel: string;
+  /** Patterns to search for (case-insensitive) */
+  keywords: string[];
+}
+
+interface DetectedChange {
+  competitor: string;
+  capability: string;
+  from: string;
+  to: string;
+}
+
+// ── Configuration ────────────────────────────────────────────────────────────
+
+const COMPETITORS: Competitor[] = [
+  { name: "VidaiMock", repo: "vidaiUK/VidaiMock" },
+  { name: "mock-llm", repo: "dwmkerr/mock-llm" },
+  { name: "piyook/llm-mock", repo: "piyook/llm-mock" },
+];
+
+const FEATURE_RULES: FeatureRule[] = [
+  {
+    rowLabel: "Chat Completions SSE",
+    keywords: ["chat/completions", "streaming", "SSE", "server-sent", "stream.*true"],
+  },
+  {
+    rowLabel: "Responses API SSE",
+    keywords: ["responses", "/v1/responses", "response.create"],
+  },
+  {
+    rowLabel: "Claude Messages API",
+    keywords: ["claude", "anthropic", "/v1/messages", "messages API"],
+  },
+  {
+    rowLabel: "Gemini streaming",
+    keywords: ["gemini", "generateContent", "google.*ai"],
+  },
+  {
+    rowLabel: "WebSocket APIs",
+    keywords: ["websocket", "realtime", "ws://", "wss://"],
+  },
+  {
+    rowLabel: "Embeddings API",
+    keywords: ["embedding", "/v1/embeddings", "embed"],
+  },
+  {
+    rowLabel: "Structured output / JSON mode",
+    keywords: ["json_object", "json_schema", "structured output", "response_format"],
+  },
+  {
+    rowLabel: "Sequential / stateful responses",
+    keywords: ["sequence", "stateful", "sequential", "multi-turn"],
+  },
+  {
+    rowLabel: "Azure OpenAI",
+    keywords: ["azure", "deployments", "azure openai"],
+  },
+  {
+    rowLabel: "AWS Bedrock",
+    keywords: ["bedrock", "invoke-model", "aws.*bedrock"],
+  },
+  {
+    rowLabel: "Docker image",
+    keywords: ["docker", "dockerfile", "container", "docker-compose"],
+  },
+  {
+    rowLabel: "Helm chart",
+    keywords: ["helm", "chart", "kubernetes", "k8s"],
+  },
+  {
+    rowLabel: "Fixture files (JSON)",
+    keywords: ["fixture", "yaml config", "template", "json fixture"],
+  },
+  {
+    rowLabel: "CLI server",
+    keywords: ["cli", "command line", "npx", "command-line"],
+  },
+  {
+    rowLabel: "GET /v1/models",
+    keywords: ["/v1/models", "models endpoint", "list models"],
+  },
+  {
+    rowLabel: "Drift detection",
+    keywords: ["drift", "conformance", "schema validation"],
+  },
+  {
+    rowLabel: "Request journal",
+    keywords: ["journal", "request log", "audit log", "request history"],
+  },
+  {
+    rowLabel: "Error injection (one-shot)",
+    keywords: ["error injection", "fault injection", "error simulation", "inject.*error"],
+  },
+];
+
+// ── Helpers ──────────────────────────────────────────────────────────────────
+
+const DRY_RUN = process.argv.includes("--dry-run");
+const DOCS_PATH = resolve(import.meta.dirname ?? __dirname, "../docs/index.html");
+
+const GITHUB_TOKEN = process.env.GITHUB_TOKEN ?? "";
+const HEADERS: Record<string, string> = {
+  Accept: "application/vnd.github.v3+json",
+  "User-Agent": "llmock-competitive-matrix-updater",
+  ...(GITHUB_TOKEN ? { Authorization: `Bearer ${GITHUB_TOKEN}` } : {}),
+};
+
+async function fetchReadme(repo: string): Promise<string> {
+  const url = `https://api.github.com/repos/${repo}/readme`;
+  console.log(`  Fetching README from ${repo}...`);
+  const res = await fetch(url, { headers: HEADERS });
+  if (!res.ok) {
+    console.warn(`  ⚠ Failed to fetch README for ${repo}: ${res.status} ${res.statusText}`);
+    return "";
+  }
+  const json = (await res.json()) as { content?: string; encoding?: string };
+  if (json.content && json.encoding === "base64") {
+    return Buffer.from(json.content, "base64").toString("utf-8");
+  }
+  return "";
+}
+
+async function fetchPackageJson(repo: string): Promise<string> {
+  const url = `https://api.github.com/repos/${repo}/contents/package.json`;
+  console.log(`  Fetching package.json from ${repo}...`);
+  const res = await fetch(url, { headers: HEADERS });
+  if (!res.ok) return "";
+  const json = (await res.json()) as { content?: string; encoding?: string };
+  if (json.content && json.encoding === "base64") {
+    return Buffer.from(json.content, "base64").toString("utf-8");
+  }
+  return "";
+}
+
+function extractFeatures(text: string): Record<string, boolean> {
+  const lower = text.toLowerCase();
+  const result: Record<string, boolean> = {};
+  for (const rule of FEATURE_RULES) {
+    const found = rule.keywords.some((kw) => {
+      const pattern = new RegExp(kw.toLowerCase(), "i");
+      return pattern.test(lower);
+    });
+    result[rule.rowLabel] = found;
+  }
+  return result;
+}
+
+// ── HTML Matrix Parsing & Updating ───────────────────────────────────────────
+
+/**
+ * Parses the comparison table from docs/index.html.
+ * Returns a map: competitorName -> { rowLabel -> cellText }
+ */
+function parseCurrentMatrix(html: string): {
+  headers: string[];
+  rows: Map<string, Map<string, string>>;
+} {
+  // Extract the table between <table class="comparison-table"> and </table>
+  const tableMatch = html.match(/<table class="comparison-table">([\s\S]*?)<\/table>/);
+  if (!tableMatch) {
+    throw new Error("Could not find comparison-table in HTML");
+  }
+  const tableHtml = tableMatch[1];
+
+  // Extract header names (the link text inside each <th>)
+  const thRegex = /<th[^>]*>[\s\S]*?<a[^>]*>(.*?)<\/a[\s\S]*?<\/th>/g;
+  const headers: string[] = [];
+  let m: RegExpExecArray | null;
+  while ((m = thRegex.exec(tableHtml)) !== null) {
+    headers.push(m[1].trim());
+  }
+  // headers[0] = "llmock", headers[1] = "MSW", headers[2..] = competitors
+
+  // Extract rows
+  const rows = new Map<string, Map<string, string>>();
+  const tbody = tableHtml.match(/<tbody>([\s\S]*?)<\/tbody>/)?.[1] ?? "";
+  let tr: RegExpExecArray | null;
+  const trIter = new RegExp(/<tr>([\s\S]*?)<\/tr>/g);
+
+  while ((tr = trIter.exec(tbody)) !== null) {
+    const tds: string[] = [];
+    const tdRegex = /<td[^>]*>([\s\S]*?)<\/td>/g;
+    let td: RegExpExecArray | null;
+    while ((td = tdRegex.exec(tr[1])) !== null) {
+      tds.push(td[1].trim());
+    }
+    if (tds.length < 2) continue;
+
+    const rowLabel = tds[0];
+    const rowMap = new Map<string, string>();
+    // tds[1] = llmock, tds[2] = MSW, tds[3..5] = competitors
+    for (let i = 1; i < tds.length && i - 1 < headers.length; i++) {
+      rowMap.set(headers[i - 1], tds[i]);
+    }
+    rows.set(rowLabel, rowMap);
+  }
+
+  return { headers, rows };
+}
+
+/**
+ * Updates only competitor cells (not llmock or MSW) where:
+ * - The current value indicates "No" (class="no">No</td>)
+ * - The feature was detected in the competitor's README
+ *
+ * Only upgrades "No" -> "Yes", never downgrades.
+ */
+function computeChanges(
+  html: string,
+  matrix: { headers: string[]; rows: Map<string, Map<string, string>> },
+  competitorFeatures: Map<string, Record<string, boolean>>,
+): DetectedChange[] {
+  const changes: DetectedChange[] = [];
+
+  for (const [compName, features] of competitorFeatures) {
+    for (const [rowLabel, detected] of Object.entries(features)) {
+      if (!detected) continue;
+
+      const row = matrix.rows.get(rowLabel);
+      if (!row) continue;
+
+      const currentCell = row.get(compName);
+      if (!currentCell) continue;
+
+      // Only upgrade "No" cells — leave "Yes", "Partial", "Manual", etc. alone
+      if (currentCell === "No") {
+        changes.push({
+          competitor: compName,
+          capability: rowLabel,
+          from: "No",
+          to: "Yes",
+        });
+      }
+    }
+  }
+
+  return changes;
+}
+
+/**
+ * Applies detected changes to the HTML string by finding the exact table cells
+ * and replacing them.
+ */
+function applyChanges(html: string, changes: DetectedChange[]): string {
+  if (changes.length === 0) return html;
+
+  // We need to find each specific cell. The approach: locate each <tr> by its
+  // first <td> content, then find the Nth <td> matching the competitor column.
+
+  // First, determine column indices for competitors
+  const tableMatch = html.match(/<table class="comparison-table">([\s\S]*?)<\/table>/);
+  if (!tableMatch) return html;
+
+  // Re-parse headers to get column positions
+  const theadMatch = tableMatch[1].match(/<thead>([\s\S]*?)<\/thead>/);
+  if (!theadMatch) return html;
+
+  const thRegex = /<th[^>]*>[\s\S]*?<a[^>]*>(.*?)<\/a[\s\S]*?<\/th>/g;
+  const headers: string[] = [];
+  let m: RegExpExecArray | null;
+  while ((m = thRegex.exec(theadMatch[1])) !== null) {
+    headers.push(m[1].trim());
+  }
+  // Column indices: "Capability" = 0 (no header link), then llmock=1, MSW=2,
+  // VidaiMock=3, mock-llm=4, piyook/llm-mock=5
+  // In the <td> array: index 0 = capability, 1 = llmock, 2 = MSW, 3+ = competitors
+  const compColumnIndex = (name: string): number => {
+    const idx = headers.indexOf(name);
+    return idx === -1 ? -1 : idx + 1; // +1 because first <td> is the row label
+  };
+
+  let result = html;
+
+  for (const change of changes) {
+    const colIdx = compColumnIndex(change.competitor);
+    if (colIdx === -1) continue;
+
+    // Find the <tr> containing this capability row
+    // We search for the row by its label in the first <td>
+    const rowPattern = new RegExp(
+      `(<tr>\\s*<td>\\s*${escapeRegex(change.capability)}\\s*</td>)([\\s\\S]*?)(</tr>)`,
+    );
+    const rowMatch = result.match(rowPattern);
+    if (!rowMatch) continue;
+
+    const prefix = rowMatch[1];
+    const cellsHtml = rowMatch[2];
+    const suffix = rowMatch[3];
+
+    // Find the Nth <td> in cellsHtml (colIdx - 1 because the first <td> is already in prefix)
+    const targetTdIdx = colIdx - 1; // 0-based within the remaining cells
+    let tdCount = 0;
+    const tdReplace = cellsHtml.replace(
+      /<td class="(no|yes|manual)">([\s\S]*?)<\/td>/g,
+      (fullMatch, cls, content) => {
+        const currentIdx = tdCount++;
+        if (currentIdx === targetTdIdx && content.trim() === "No") {
+          return `<td class="yes">Yes</td>`;
+        }
+        return fullMatch;
+      },
+    );
+
+    result = result.replace(rowPattern, prefix + tdReplace + suffix);
+  }
+
+  return result;
+}
+
+function escapeRegex(str: string): string {
+  return str.replace(/[.*+?^${}()|[\]\\/]/g, "\\$&");
+}
+
+// ── Summary Writing ──────────────────────────────────────────────────────────
+
+function parseSummaryArg(): string | null {
+  const idx = process.argv.indexOf("--summary");
+  if (idx === -1 || idx + 1 >= process.argv.length) return null;
+  return resolve(process.argv[idx + 1]);
+}
+
+function writeSummary(summaryPath: string, changes: DetectedChange[]): void {
+  let md: string;
+
+  if (changes.length === 0) {
+    md = "No competitive matrix changes detected this week.\n";
+  } else {
+    const lines: string[] = [];
+    lines.push("## Competitive Matrix Changes");
+    lines.push("");
+    lines.push("| Competitor | Capability | Change |");
+    lines.push("| --- | --- | --- |");
+    for (const ch of changes) {
+      lines.push(`| ${ch.competitor} | ${ch.capability} | ${ch.from} -> ${ch.to} |`);
+    }
+    lines.push("");
+
+    // Build mermaid flowchart grouped by competitor
+    const byCompetitor = new Map<string, string[]>();
+    for (const ch of changes) {
+      if (!byCompetitor.has(ch.competitor)) {
+        byCompetitor.set(ch.competitor, []);
+      }
+      byCompetitor.get(ch.competitor)!.push(ch.capability);
+    }
+
+    lines.push("```mermaid");
+    lines.push("flowchart LR");
+    let nodeCounter = 0;
+    for (const [competitor, capabilities] of byCompetitor) {
+      const subId = competitor.replace(/[^a-zA-Z0-9_-]/g, "_");
+      const subLabel = competitor.replace(/"/g, "&quot;");
+      lines.push(`  subgraph ${subId}["${subLabel}"]`);
+      for (const cap of capabilities) {
+        const nodeId = `n${nodeCounter}`;
+        const capLabel = cap.replace(/"/g, "&quot;");
+        lines.push(`    ${nodeId}["${capLabel}"]`);
+        nodeCounter++;
+      }
+      lines.push("  end");
+    }
+    lines.push("```");
+    lines.push("");
+
+    md = lines.join("\n");
+  }
+
+  writeFileSync(summaryPath, md, "utf-8");
+  console.log(`\nSummary written to ${summaryPath}`);
+}
+
+// ── Main ─────────────────────────────────────────────────────────────────────
+
+async function main(): Promise<void> {
+  console.log("=== Competitive Matrix Updater ===\n");
+
+  if (DRY_RUN) {
+    console.log("  [DRY RUN] No files will be modified.\n");
+  }
+
+  // 1. Fetch competitor data
+  const competitorFeatures = new Map<string, Record<string, boolean>>();
+
+  for (const comp of COMPETITORS) {
+    console.log(`\n--- ${comp.name} (${comp.repo}) ---`);
+    const [readme, pkg] = await Promise.all([fetchReadme(comp.repo), fetchPackageJson(comp.repo)]);
+
+    if (!readme && !pkg) {
+      console.log(`  No data fetched, skipping.`);
+      continue;
+    }
+
+    const combined = `${readme}\n${pkg}`;
+    const features = extractFeatures(combined);
+    competitorFeatures.set(comp.name, features);
+
+    // Log detected features
+    const detected = Object.entries(features)
+      .filter(([, v]) => v)
+      .map(([k]) => k);
+    if (detected.length > 0) {
+      console.log(`  Detected features: ${detected.join(", ")}`);
+    } else {
+      console.log(`  No features detected from keywords.`);
+    }
+  }
+
+  // 2. Read current HTML
+  console.log(`\nReading ${DOCS_PATH}...`);
+  const html = readFileSync(DOCS_PATH, "utf-8");
+
+  // 3. Parse current matrix
+  const matrix = parseCurrentMatrix(html);
+  console.log(
+    `Parsed ${matrix.rows.size} capability rows, ${matrix.headers.length} competitor columns.`,
+  );
+
+  // 4. Compute changes
+  const changes = computeChanges(html, matrix, competitorFeatures);
+
+  const summaryPath = parseSummaryArg();
+
+  if (changes.length === 0) {
+    console.log("\nNo changes detected. Competitive matrix is up to date.");
+    if (summaryPath) writeSummary(summaryPath, changes);
+    return;
+  }
+
+  console.log(`\n${changes.length} change(s) detected:`);
+  for (const ch of changes) {
+    console.log(`  ${ch.competitor} / ${ch.capability}: ${ch.from} -> ${ch.to}`);
+  }
+
+  if (summaryPath) writeSummary(summaryPath, changes);
+
+  if (DRY_RUN) {
+    console.log("\n[DRY RUN] Would update docs/index.html with the above changes.");
+    return;
+  }
+
+  // 5. Apply changes
+  const updated = applyChanges(html, changes);
+  writeFileSync(DOCS_PATH, updated, "utf-8");
+  console.log("\nUpdated docs/index.html successfully.");
+}
+
+main().catch((err) => {
+  console.error("Fatal error:", err);
+  process.exit(1);
+});
diff --git a/skills/write-fixtures/SKILL.md b/skills/write-fixtures/SKILL.md
new file mode 100644
index 0000000..b740ed3
--- /dev/null
+++ b/skills/write-fixtures/SKILL.md
@@ -0,0 +1,472 @@
+---
+name: write-fixtures
+description: Use when writing test fixtures for @copilotkit/llmock — mock LLM responses, tool call sequences, error injection, multi-turn agent loops, embeddings, structured output, sequential responses, or debugging fixture mismatches
+---
+
+# Writing llmock Test Fixtures
+
+## What llmock Is
+
+Zero-dependency mock LLM server. Fixture-driven. Multi-provider (OpenAI, Anthropic, Gemini, AWS Bedrock, Azure OpenAI, Vertex AI, Ollama, Cohere). Runs a real HTTP server on a real port — works across processes, unlike MSW-style interceptors. WebSocket support for OpenAI Responses/Realtime and Gemini Live APIs. Chaos testing and Prometheus metrics.
+
+## Core Mental Model
+
+- **Fixtures** = match criteria + response
+- **First-match-wins** — order matters
+- All providers share one fixture pool (provider adapters normalize to `ChatCompletionRequest`)
+- Fixtures are live — mutations after `start()` take effect immediately
+- Sequential responses are supported via `sequenceIndex` (match count tracked per fixture)
+
+## Match Field Reference
+
+| Field            | Type                                      | Matches Against                                                               |
+| ---------------- | ----------------------------------------- | ----------------------------------------------------------------------------- |
+| `userMessage`    | `string`                                  | Substring of last `role: "user"` message text                                 |
+| `userMessage`    | `RegExp`                                  | Pattern test on last `role: "user"` message text                              |
+| `inputText`      | `string`                                  | Substring of embedding input text (concatenated if multiple inputs)           |
+| `inputText`      | `RegExp`                                  | Pattern test on embedding input text                                          |
+| `toolName`       | `string`                                  | Exact match on any tool in request's `tools[]` array (by `function.name`)     |
+| `toolCallId`     | `string`                                  | Exact match on `tool_call_id` of last `role: "tool"` message                  |
+| `model`          | `string`                                  | Exact match on `req.model`                                                    |
+| `model`          | `RegExp`                                  | Pattern test on `req.model`                                                   |
+| `responseFormat` | `string`                                  | Exact match on `req.response_format.type` (`"json_object"`, `"json_schema"`)  |
+| `sequenceIndex`  | `number`                                  | Matches only when this fixture's match count equals the given index (0-based) |
+| `predicate`      | `(req: ChatCompletionRequest) => boolean` | Custom function — full access to request                                      |
+
+**AND logic**: all specified fields must match. Empty match `{}` = catch-all.
+
+Multi-part content (e.g., `[{type: "text", text: "hello"}]`) is automatically extracted — `userMessage` matching works regardless of content format.
+
+## Response Types
+
+### Text
+
+```typescript
+{
+  content: "Hello!";
+}
+```
+
+### Tool Calls
+
+```typescript
+{
+  toolCalls: [{ name: "get_weather", arguments: '{"city":"SF"}' }];
+}
+```
+
+**`arguments` MUST be a JSON string**, not an object. This is the #1 mistake.
+
+### Embedding
+
+```typescript
+{
+  embedding: [0.1, 0.2, 0.3, -0.5, 0.8];
+}
+```
+
+The embedding vector is returned for each input in the request. If no embedding fixture matches, deterministic embeddings are auto-generated from the input text hash — you only need fixtures when you want specific vectors.
+
+### Error
+
+```typescript
+{ error: { message: "Rate limited", type: "rate_limit_error" }, status: 429 }
+```
+
+### Chaos (Failure Injection)
+
+The optional `chaos` field on a fixture enables probabilistic failure injection:
+
+```typescript
+{
+  chaos?: {
+    dropRate?: number;      // Probability (0-1) of returning a 500 error
+    malformedRate?: number; // Probability (0-1) of returning malformed JSON
+    disconnectRate?: number; // Probability (0-1) of disconnecting mid-stream
+  }
+}
+```
+
+Rates are evaluated per-request. When triggered, the chaos failure replaces the normal response.
+
+## Common Patterns
+
+### Basic text fixture
+
+```typescript
+mock.onMessage("hello", { content: "Hi there!" });
+```
+
+### Tool call → tool result → final response (3-step agent loop)
+
+The most common pattern. Fixture 1 triggers the tool call, fixture 2 handles the tool result.
+
+```typescript
+// Step 1: User asks about weather → LLM calls tool
+mock.onMessage("weather", {
+  toolCalls: [{ name: "get_weather", arguments: '{"city":"SF"}' }],
+});
+
+// Step 2: Tool result comes back → LLM responds with text
+mock.addFixture({
+  match: { predicate: (req) => req.messages.at(-1)?.role === "tool" },
+  response: { content: "It's 72°F in San Francisco." },
+});
+```
+
+**Why predicate, not userMessage?** After a tool call, the client replays the same conversation with the tool result appended. The user message hasn't changed — `userMessage: "weather"` would match the SAME fixture again, creating an infinite loop.
+
+### Embedding fixture
+
+```typescript
+// Match specific input text
+mock.onEmbedding("search query", {
+  embedding: [0.1, 0.2, 0.3, 0.4, 0.5],
+});
+
+// Match with regex
+mock.onEmbedding(/product.*description/, {
+  embedding: [0.9, -0.1, 0.5, 0.3, 0.2],
+});
+```
+
+### Structured output / JSON mode
+
+```typescript
+// onJsonOutput auto-sets responseFormat: "json_object" and stringifies objects
+mock.onJsonOutput("extract entities", {
+  entities: [
+    { name: "Acme Corp", type: "company" },
+    { name: "Jane Doe", type: "person" },
+  ],
+});
+
+// Equivalent manual form:
+mock.addFixture({
+  match: { userMessage: "extract entities", responseFormat: "json_object" },
+  response: { content: '{"entities":[...]}' },
+});
+```
+
+### Sequential responses (same match, different responses)
+
+```typescript
+// First call returns tool call, second returns text
+mock.on(
+  { userMessage: "status", sequenceIndex: 0 },
+  { toolCalls: [{ name: "check_status", arguments: "{}" }] },
+);
+mock.on({ userMessage: "status", sequenceIndex: 1 }, { content: "All systems operational." });
+```
+
+Match counts are tracked per fixture group and reset with `reset()` or `resetMatchCounts()`.
+
+### Streaming physics (realistic timing)
+
+```typescript
+mock.onMessage(
+  "tell me a story",
+  { content: "Once upon a time..." },
+  {
+    streamingProfile: {
+      ttft: 200, // 200ms before first token
+      tps: 30, // 30 tokens per second after that
+      jitter: 0.1, // ±10% random variance
+    },
+  },
+);
+```
+
+### Predicate-based routing (same user message, different context)
+
+Common in supervisor/orchestrator patterns where the system prompt changes:
+
+```typescript
+mock.addFixture({
+  match: {
+    predicate: (req) => {
+      const sys = req.messages.find((m) => m.role === "system")?.content ?? "";
+      return typeof sys === "string" && sys.includes("Flights found: false");
+    },
+  },
+  response: { toolCalls: [{ name: "search_flights", arguments: "{}" }] },
+});
+```
+
+### Catch-all (always add one)
+
+Prevents unmatched requests from returning 404 and crashing the test:
+
+```typescript
+mock.addFixture({
+  match: { predicate: () => true },
+  response: { content: "I understand. How can I help?" },
+});
+```
+
+### Tool result catch-all with prependFixture
+
+Must go at the front so it matches before substring-based fixtures:
+
+```typescript
+mock.prependFixture({
+  match: { predicate: (req) => req.messages.at(-1)?.role === "tool" },
+  response: { content: "Done!" },
+});
+```
+
+### Stream interruption simulation (v1.3.0+)
+
+```typescript
+mock.onMessage(
+  "long response",
+  { content: "This will be cut short..." },
+  {
+    truncateAfterChunks: 3, // Stop after 3 SSE chunks
+    disconnectAfterMs: 500, // Or disconnect after 500ms
+  },
+);
+```
+
+### Chaos testing (probabilistic failures)
+
+```typescript
+mock.addFixture({
+  match: { userMessage: "flaky" },
+  response: { content: "Sometimes works!" },
+  chaos: { dropRate: 0.3 },
+});
+```
+
+30% of requests matching this fixture will get a 500 error instead of the response. Can also use `malformedRate` (garbled JSON) or `disconnectRate` (connection dropped mid-stream).
+
+Server-level chaos applies to ALL requests:
+
+```typescript
+mock.setChaos({ dropRate: 0.1 }); // 10% of all requests fail
+mock.clearChaos(); // Remove server-level chaos
+```
+
+### Error injection (one-shot)
+
+```typescript
+mock.nextRequestError(429, { message: "Rate limited", type: "rate_limit_error" });
+// Next request gets 429, then fixture auto-removes itself
+```
+
+### JSON fixture files
+
+```json
+{
+  "fixtures": [
+    {
+      "match": { "userMessage": "hello" },
+      "response": { "content": "Hi!" }
+    },
+    {
+      "match": { "inputText": "search query" },
+      "response": { "embedding": [0.1, 0.2, 0.3] }
+    },
+    {
+      "match": { "userMessage": "status", "sequenceIndex": 0 },
+      "response": { "content": "First response" }
+    }
+  ]
+}
+```
+
+JSON files cannot use `RegExp` or `predicate` — those are code-only features. `streamingProfile` is supported in JSON fixture files.
+
+Load with `mock.loadFixtureFile("./fixtures/greetings.json")` or `mock.loadFixtureDir("./fixtures/")`.
+
+## API Endpoints
+
+All providers share the same fixture pool — write fixtures once, they work for any endpoint.
+
+| Endpoint                                                                                 | Provider      | Protocol  |
+| ---------------------------------------------------------------------------------------- | ------------- | --------- |
+| `POST /v1/chat/completions`                                                              | OpenAI        | HTTP      |
+| `POST /v1/responses`                                                                     | OpenAI        | HTTP + WS |
+| `POST /v1/messages`                                                                      | Anthropic     | HTTP      |
+| `POST /v1/embeddings`                                                                    | OpenAI        | HTTP      |
+| `POST /v1beta/models/{model}:{method}`                                                   | Google Gemini | HTTP      |
+| `POST /model/{modelId}/invoke`                                                           | AWS Bedrock   | HTTP      |
+| `POST /openai/deployments/{id}/chat/completions`                                         | Azure OpenAI  | HTTP      |
+| `POST /openai/deployments/{id}/embeddings`                                               | Azure OpenAI  | HTTP      |
+| `GET /health`                                                                            | —             | HTTP      |
+| `GET /ready`                                                                             | —             | HTTP      |
+| `POST /model/{modelId}/invoke-with-response-stream`                                      | AWS Bedrock   | HTTP      |
+| `POST /model/{modelId}/converse`                                                         | AWS Bedrock   | HTTP      |
+| `POST /model/{modelId}/converse-stream`                                                  | AWS Bedrock   | HTTP      |
+| `POST /v1/projects/{p}/locations/{l}/publishers/google/models/{m}:generateContent`       | Vertex AI     | HTTP      |
+| `POST /v1/projects/{p}/locations/{l}/publishers/google/models/{m}:streamGenerateContent` | Vertex AI     | HTTP      |
+| `POST /api/chat`                                                                         | Ollama        | HTTP      |
+| `POST /api/generate`                                                                     | Ollama        | HTTP      |
+| `GET /api/tags`                                                                          | Ollama        | HTTP      |
+| `POST /v2/chat`                                                                          | Cohere        | HTTP      |
+| `GET /metrics`                                                                           | —             | HTTP      |
+| `GET /v1/models`                                                                         | OpenAI-compat | HTTP      |
+| `WS /v1/responses`                                                                       | OpenAI        | WebSocket |
+| `WS /v1/realtime`                                                                        | OpenAI        | WebSocket |
+| `WS /ws/google.ai...BidiGenerateContent`                                                 | Gemini Live   | WebSocket |
+
+## Critical Gotchas
+
+1. **Order matters** — first match wins. Specific fixtures before general ones. Use `prependFixture()` to force priority.
+
+2. **`arguments` must be a JSON string** — `"arguments": "{\"key\":\"value\"}"` not `"arguments": {"key":"value"}`. The type system enforces this but JSON fixtures can get it wrong silently.
+
+3. **Latency is per-chunk, not total** — `latency: 100` means 100ms between each SSE chunk, not 100ms total response time. Similarly, `truncateAfterChunks` and `disconnectAfterMs` are for simulating stream interruptions (added in v1.3.0).
+
+4. **`streamingProfile` takes precedence over `latency`** — when both are set on a fixture, `streamingProfile` controls timing. Use one or the other.
+
+5. **Tool result messages don't change the user message** — after a tool call, the client sends the same conversation + tool result. Matching on `userMessage` will hit the SAME fixture again → infinite loop. Always use `predicate` checking `role === "tool"` for tool results.
+
+6. **`clearFixtures()` preserves the array reference** — uses `.length = 0`, not reassignment. The running server reads the same array object.
+
+7. **Journal records everything** — including 404 "no match" responses. Use `mock.getLastRequest()` to debug mismatches.
+
+8. **All providers share fixtures** — a fixture matching "hello" works whether the request comes via `/v1/chat/completions` (OpenAI), `/v1/messages` (Anthropic), Gemini, Bedrock, or Azure endpoints.
+
+9. **WebSocket uses the same fixture pool** — no special setup needed for WebSocket-based APIs (OpenAI Responses WS, Realtime, Gemini Live).
+
+10. **Embeddings auto-generate if no fixture matches** — deterministic vectors are generated from the input text hash. You don't need a catch-all for embedding requests.
+
+11. **Sequential response counts are tracked per fixture** — counts reset with `reset()` or `resetMatchCounts()`. The count increments after each match of that fixture group (all fixtures sharing the same non-`sequenceIndex` match fields).
+
+12. **Bedrock uses Anthropic Messages format internally** — the adapter normalizes Bedrock requests to `ChatCompletionRequest`, so the same fixtures work. Bedrock supports both non-streaming (`/invoke`, `/converse`) and streaming (`/invoke-with-response-stream`, `/converse-stream`) endpoints.
+
+13. **Azure OpenAI routes through the same handlers** — `/openai/deployments/{id}/chat/completions` maps to the completions handler, `/openai/deployments/{id}/embeddings` maps to the embeddings handler. Fixtures work unchanged.
+
+14. **Ollama defaults to streaming** — opposite of OpenAI. Set `stream: false` explicitly in the request for non-streaming responses.
+
+15. **Ollama tool call `arguments` is an object, not a JSON string** — unlike OpenAI where `arguments` is a JSON string, Ollama sends and expects a plain object.
+
+16. **Bedrock streaming uses binary Event Stream format** — not SSE. The `invoke-with-response-stream` and `converse-stream` endpoints use AWS Event Stream binary encoding.
+
+17. **Vertex AI routes to the same handler as consumer Gemini** — the same fixtures work for both Vertex AI (`/v1/projects/.../models/{m}:generateContent`) and consumer Gemini (`/v1beta/models/{model}:generateContent`).
+
+18. **Cohere requires `model` field** — returns 400 if `model` is missing from the request body.
+
+## Debugging Fixture Mismatches
+
+When a fixture doesn't match:
+
+1. **Inspect what the server received**: `mock.getLastRequest()` → check `body.messages` array
+2. **Check fixture order**: `mock.getFixtures()` returns fixtures in registration order
+3. **For `userMessage`**: match is against the LAST `role: "user"` message only, substring match (not exact)
+4. **Check the journal**: `mock.getRequests()` shows all requests including which fixture matched (or `null` for 404)
+
+## E2E Test Setup Pattern
+
+```typescript
+import { LLMock } from "@copilotkit/llmock";
+
+// Setup — port: 0 picks a random available port
+const mock = new LLMock({ port: 0 });
+mock.loadFixtureDir("./fixtures");
+await mock.start();
+process.env.OPENAI_BASE_URL = `${mock.url}/v1`;
+
+// Per-test cleanup
+afterEach(() => mock.reset()); // clears fixtures AND journal
+
+// Teardown
+afterAll(async () => await mock.stop());
+```
+
+### Static factory shorthand
+
+```typescript
+const mock = await LLMock.create({ port: 0 }); // creates + starts in one call
+```
+
+## API Quick Reference
+
+| Method                                  | Purpose                                     |
+| --------------------------------------- | ------------------------------------------- |
+| `addFixture(f)`                         | Append fixture (last priority)              |
+| `addFixtures(f[])`                      | Append multiple                             |
+| `prependFixture(f)`                     | Insert at front (highest priority)          |
+| `clearFixtures()`                       | Remove all fixtures                         |
+| `getFixtures()`                         | Read current fixture list                   |
+| `on(match, response, opts?)`            | Shorthand for `addFixture`                  |
+| `onMessage(pattern, response, opts?)`   | Match by user message                       |
+| `onEmbedding(pattern, response, opts?)` | Match by embedding input text               |
+| `onJsonOutput(pattern, json, opts?)`    | Match by user message with `responseFormat` |
+| `onToolCall(name, response, opts?)`     | Match by tool name in `tools[]`             |
+| `onToolResult(id, response, opts?)`     | Match by `tool_call_id`                     |
+| `nextRequestError(status, body?)`       | One-shot error, auto-removes                |
+| `loadFixtureFile(path)`                 | Load JSON fixture file                      |
+| `loadFixtureDir(path)`                  | Load all JSON files in directory            |
+| `start()`                               | Start server, returns URL                   |
+| `stop()`                                | Stop server                                 |
+| `reset()`                               | Clear fixtures + journal + match counts     |
+| `resetMatchCounts()`                    | Clear sequence match counts only            |
+| `getRequests()`                         | All journal entries                         |
+| `getLastRequest()`                      | Most recent journal entry                   |
+| `clearRequests()`                       | Clear journal only                          |
+| `setChaos(opts)`                        | Set server-level chaos rates                |
+| `clearChaos()`                          | Remove server-level chaos                   |
+| `url` / `baseUrl`                       | Server URL (throws if not started)          |
+| `port`                                  | Server port number                          |
+
+Sequential responses use `on()` with `sequenceIndex` in the match — there is no dedicated convenience method.
+
+## Record-and-Replay (VCR Mode)
+
+llmock supports a VCR-style record-and-replay workflow: unmatched requests are proxied to real provider APIs, and the responses are saved as standard llmock fixture files for deterministic replay.
+
+### CLI usage
+
+```bash
+# Record mode: proxy unmatched requests to real OpenAI and Anthropic APIs
+llmock --record \
+  --provider-openai https://api.openai.com \
+  --provider-anthropic https://api.anthropic.com \
+  -f ./fixtures
+
+# Strict mode: fail on unmatched requests (no proxying, no catch-all 404)
+llmock --strict -f ./fixtures
+```
+
+- `--record` enables proxy-on-miss. Requires at least one `--provider-*` flag.
+- `--strict` returns a 503 error when no fixture matches AND no proxy is configured (or the proxy attempt fails), instead of silently returning a 404. The proxy is still tried first when `--record` is set. Use this in CI to prevent unmatched requests from slipping through as silent 404s.
+- Provider flags: `--provider-openai`, `--provider-anthropic`, `--provider-gemini`, `--provider-vertexai`, `--provider-bedrock`, `--provider-azure`, `--provider-ollama`, `--provider-cohere`.
+
+### How it works
+
+1. **Existing fixtures are served first** — the router checks all loaded fixtures before considering the proxy.
+2. **Misses are proxied** — if no fixture matches and recording is enabled, the request is forwarded to the real provider API.
+3. **Auth headers are forwarded but NOT saved** — `Authorization`, `x-api-key`, and `api-key` headers are passed through to the upstream provider, but stripped from the recorded fixture.
+4. **Responses are saved as standard fixtures** — recorded files land in `{fixturePath}/recorded/` and use the same JSON format as hand-written fixtures. Nothing special about them.
+5. **Streaming responses are collapsed** — SSE streams are collapsed into a single text or tool-call response for the fixture. The original streaming format is preserved in the live proxy response.
+6. **Loud logging** — every proxy hit logs at `warn` level so you can see exactly which requests are being forwarded.
+
+### Programmatic API
+
+```typescript
+const mock = new LLMock({ port: 0 });
+await mock.start();
+
+// Enable recording at runtime
+mock.enableRecording({
+  providers: {
+    openai: "https://api.openai.com",
+    anthropic: "https://api.anthropic.com",
+  },
+  fixturePath: "./fixtures/recorded",
+});
+
+// ... run tests that hit real APIs for uncovered cases ...
+
+// Disable recording (back to fixture-only mode)
+mock.disableRecording();
+```
+
+### Workflow
+
+1. **Bootstrap**: Run your test suite with `--record` and provider URLs. All requests that don't match existing fixtures are proxied and recorded.
+2. **Review**: Check the recorded fixtures in `{fixturePath}/recorded/`. Edit or reorganize as needed.
+3. **Lock down**: Run your test suite with `--strict` to ensure every request hits a fixture. No network calls escape.
+4. **Maintain**: When APIs change, delete stale fixtures and re-record.
diff --git a/src/__tests__/api-conformance.test.ts b/src/__tests__/api-conformance.test.ts
new file mode 100644
index 0000000..944bb1a
--- /dev/null
+++ b/src/__tests__/api-conformance.test.ts
@@ -0,0 +1,1572 @@
+/* eslint-disable @typescript-eslint/no-explicit-any */
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+import http from "node:http";
+import { createServer, type ServerInstance } from "../server.js";
+import type { Fixture } from "../types.js";
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+async function httpPost(
+  url: string,
+  body: object,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const req = http.request(
+      url,
+      {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c) => chunks.push(c));
+        res.on("end", () =>
+          resolve({
+            status: res.statusCode!,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          }),
+        );
+      },
+    );
+    req.on("error", reject);
+    req.write(JSON.stringify(body));
+    req.end();
+  });
+}
+
+// Parse SSE events that use data-only format (OpenAI Chat Completions, Gemini)
+function parseDataOnlySSE(body: string): object[] {
+  return body
+    .split("\n\n")
+    .filter((block) => block.startsWith("data: ") && !block.includes("[DONE]"))
+    .map((block) => JSON.parse(block.slice(6)));
+}
+
+// Parse SSE events that use event: + data: format (Responses API, Claude)
+function parseTypedSSE(body: string): { type: string; data: Record<string, any> }[] {
+  return body
+    .split("\n\n")
+    .filter((block) => block.includes("event: ") && block.includes("data: "))
+    .map((block) => {
+      const eventMatch = block.match(/^event: (.+)$/m);
+      const dataMatch = block.match(/^data: (.+)$/m);
+      return {
+        type: eventMatch![1],
+        data: JSON.parse(dataMatch![1]),
+      };
+    });
+}
+
+// ---------------------------------------------------------------------------
+// Fixtures
+// ---------------------------------------------------------------------------
+
+const TEXT_FIXTURE: Fixture = {
+  match: { userMessage: "hello" },
+  response: { content: "Hello!" },
+};
+
+const TOOL_FIXTURE: Fixture = {
+  match: { userMessage: "weather" },
+  response: {
+    toolCalls: [{ name: "get_weather", arguments: '{"city":"SF"}' }],
+  },
+};
+
+const ERROR_FIXTURE: Fixture = {
+  match: { userMessage: "error-test" },
+  response: {
+    error: { message: "Rate limited", type: "rate_limit_error" },
+    status: 429,
+  },
+};
+
+const JSON_MODE_FIXTURE: Fixture = {
+  match: { userMessage: "json-output", responseFormat: "json_object" },
+  response: { content: '{"answer":42,"items":["a","b"]}' },
+};
+
+const EMBEDDING_FIXTURE: Fixture = {
+  match: { inputText: "embed-this" },
+  response: { embedding: [0.1, -0.2, 0.3, 0.4, -0.5] },
+};
+
+const EMBEDDING_ERROR_FIXTURE: Fixture = {
+  match: { inputText: "embed-error" },
+  response: {
+    error: { message: "Rate limited", type: "rate_limit_error" },
+    status: 429,
+  },
+};
+
+// ---------------------------------------------------------------------------
+// Shared server instance
+// ---------------------------------------------------------------------------
+
+let instance: ServerInstance;
+
+beforeAll(async () => {
+  instance = await createServer(
+    [
+      TEXT_FIXTURE,
+      TOOL_FIXTURE,
+      ERROR_FIXTURE,
+      JSON_MODE_FIXTURE,
+      EMBEDDING_FIXTURE,
+      EMBEDDING_ERROR_FIXTURE,
+    ],
+    {
+      port: 0,
+      chunkSize: 100,
+    },
+  );
+});
+
+afterAll(async () => {
+  await new Promise<void>((r) => instance.server.close(() => r()));
+});
+
+// ---------------------------------------------------------------------------
+// 1. OpenAI Chat Completions conformance
+// ---------------------------------------------------------------------------
+
+describe("OpenAI Chat Completions conformance", () => {
+  const chatPath = () => `${instance.url}/v1/chat/completions`;
+
+  describe("non-streaming", () => {
+    it("has all required top-level fields", async () => {
+      const res = await httpPost(chatPath(), {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+        stream: false,
+      });
+      const json = JSON.parse(res.body);
+      expect(json).toHaveProperty("id");
+      expect(json).toHaveProperty("object");
+      expect(json).toHaveProperty("created");
+      expect(json).toHaveProperty("model");
+      expect(json).toHaveProperty("choices");
+      expect(json).toHaveProperty("usage");
+    });
+
+    it("object is chat.completion", async () => {
+      const res = await httpPost(chatPath(), {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+        stream: false,
+      });
+      const json = JSON.parse(res.body);
+      expect(json.object).toBe("chat.completion");
+    });
+
+    it("id starts with chatcmpl-", async () => {
+      const res = await httpPost(chatPath(), {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+        stream: false,
+      });
+      const json = JSON.parse(res.body);
+      expect(json.id).toMatch(/^chatcmpl-/);
+    });
+
+    it("created is a unix timestamp number", async () => {
+      const res = await httpPost(chatPath(), {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+        stream: false,
+      });
+      const json = JSON.parse(res.body);
+      expect(typeof json.created).toBe("number");
+    });
+
+    it("choices[0] has index, message, and finish_reason", async () => {
+      const res = await httpPost(chatPath(), {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+        stream: false,
+      });
+      const json = JSON.parse(res.body);
+      const choice = json.choices[0];
+      expect(choice).toHaveProperty("index");
+      expect(choice).toHaveProperty("message");
+      expect(choice).toHaveProperty("finish_reason");
+      expect(choice.message.role).toBe("assistant");
+      expect(typeof choice.message.content).toBe("string");
+      expect(choice.message).toHaveProperty("refusal");
+      expect(choice.message.refusal).toBeNull();
+    });
+
+    it("usage has prompt_tokens, completion_tokens, total_tokens as numbers", async () => {
+      const res = await httpPost(chatPath(), {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+        stream: false,
+      });
+      const json = JSON.parse(res.body);
+      expect(typeof json.usage.prompt_tokens).toBe("number");
+      expect(typeof json.usage.completion_tokens).toBe("number");
+      expect(typeof json.usage.total_tokens).toBe("number");
+    });
+
+    it("structured output: response_format json_object routes to correct fixture and returns valid JSON content", async () => {
+      const res = await httpPost(chatPath(), {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "json-output" }],
+        stream: false,
+        response_format: { type: "json_object" },
+      });
+      const json = JSON.parse(res.body);
+      expect(json.choices[0].finish_reason).toBe("stop");
+      const content = json.choices[0].message.content;
+      // Content must be valid JSON
+      const parsed = JSON.parse(content);
+      expect(parsed).toEqual({ answer: 42, items: ["a", "b"] });
+    });
+
+    it("structured output: request without response_format does not match json_object fixture", async () => {
+      // The json-output fixture requires responseFormat: "json_object"
+      // A request without response_format should NOT match it
+      const res = await httpPost(chatPath(), {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "json-output" }],
+        stream: false,
+      });
+      // Should 404 since the only fixture matching "json-output" requires responseFormat
+      expect(res.status).toBe(404);
+    });
+
+    it("tool call: finish_reason is tool_calls with properly structured tool_calls array", async () => {
+      const res = await httpPost(chatPath(), {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "weather" }],
+        stream: false,
+      });
+      const json = JSON.parse(res.body);
+      const choice = json.choices[0];
+      expect(choice.finish_reason).toBe("tool_calls");
+      expect(Array.isArray(choice.message.tool_calls)).toBe(true);
+
+      const tc = choice.message.tool_calls[0];
+      expect(tc.id).toMatch(/^call_/);
+      expect(tc.type).toBe("function");
+      expect(typeof tc.function.name).toBe("string");
+      expect(typeof tc.function.arguments).toBe("string");
+    });
+  });
+
+  describe("streaming", () => {
+    it("Content-Type is text/event-stream", async () => {
+      const res = await httpPost(chatPath(), {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+        stream: true,
+      });
+      expect(res.headers["content-type"]).toContain("text/event-stream");
+    });
+
+    it("stream ends with data: [DONE]", async () => {
+      const res = await httpPost(chatPath(), {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+        stream: true,
+      });
+      expect(res.body.trimEnd()).toMatch(/data: \[DONE\]$/);
+    });
+
+    it("each chunk has id, object chat.completion.chunk, created, model, choices", async () => {
+      const res = await httpPost(chatPath(), {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+        stream: true,
+      });
+      const chunks = parseDataOnlySSE(res.body);
+      expect(chunks.length).toBeGreaterThan(0);
+      for (const chunk of chunks) {
+        const c = chunk as any;
+        expect(c.object).toBe("chat.completion.chunk");
+        expect(c).toHaveProperty("id");
+        expect(c).toHaveProperty("created");
+        expect(c).toHaveProperty("model");
+        expect(c).toHaveProperty("choices");
+      }
+    });
+
+    it("first chunk has delta.role === assistant", async () => {
+      const res = await httpPost(chatPath(), {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+        stream: true,
+      });
+      const chunks = parseDataOnlySSE(res.body) as any[];
+      expect(chunks[0].choices[0].delta.role).toBe("assistant");
+    });
+
+    it("content chunks have delta.content as string", async () => {
+      const res = await httpPost(chatPath(), {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+        stream: true,
+      });
+      const chunks = parseDataOnlySSE(res.body) as any[];
+      const contentChunks = chunks.filter((c) => c.choices[0].delta.content !== undefined);
+      expect(contentChunks.length).toBeGreaterThan(0);
+      for (const c of contentChunks) {
+        expect(typeof c.choices[0].delta.content).toBe("string");
+      }
+    });
+
+    it("last data chunk has finish_reason stop or tool_calls", async () => {
+      const res = await httpPost(chatPath(), {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+        stream: true,
+      });
+      const chunks = parseDataOnlySSE(res.body) as any[];
+      const last = chunks[chunks.length - 1];
+      expect(["stop", "tool_calls"]).toContain(last.choices[0].finish_reason);
+    });
+
+    it("all chunks share the same id", async () => {
+      const res = await httpPost(chatPath(), {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+        stream: true,
+      });
+      const chunks = parseDataOnlySSE(res.body) as any[];
+      const ids = new Set(chunks.map((c) => c.id));
+      expect(ids.size).toBe(1);
+    });
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 2. OpenAI Responses API conformance
+// ---------------------------------------------------------------------------
+
+describe("OpenAI Responses API conformance", () => {
+  const responsesPath = () => `${instance.url}/v1/responses`;
+
+  describe("streaming (default)", () => {
+    it("Content-Type is text/event-stream", async () => {
+      const res = await httpPost(responsesPath(), {
+        model: "gpt-4",
+        input: [{ role: "user", content: "hello" }],
+        stream: true,
+      });
+      expect(res.headers["content-type"]).toContain("text/event-stream");
+    });
+
+    it("events use event: + data: format (no [DONE] sentinel)", async () => {
+      const res = await httpPost(responsesPath(), {
+        model: "gpt-4",
+        input: [{ role: "user", content: "hello" }],
+        stream: true,
+      });
+      expect(res.body).not.toContain("[DONE]");
+      const events = parseTypedSSE(res.body);
+      expect(events.length).toBeGreaterThan(0);
+      // Every parsed event should have both type and data
+      for (const ev of events) {
+        expect(typeof ev.type).toBe("string");
+        expect(ev.data).toBeDefined();
+      }
+    });
+
+    it("event sequence includes all required event types", async () => {
+      const res = await httpPost(responsesPath(), {
+        model: "gpt-4",
+        input: [{ role: "user", content: "hello" }],
+        stream: true,
+      });
+      const events = parseTypedSSE(res.body);
+      const types = events.map((e) => e.type);
+      const required = [
+        "response.created",
+        "response.output_item.added",
+        "response.content_part.added",
+        "response.output_text.delta",
+        "response.output_text.done",
+        "response.content_part.done",
+        "response.output_item.done",
+        "response.completed",
+      ];
+      for (const r of required) {
+        expect(types).toContain(r);
+      }
+    });
+
+    it("response.created has proper response structure", async () => {
+      const res = await httpPost(responsesPath(), {
+        model: "gpt-4",
+        input: [{ role: "user", content: "hello" }],
+        stream: true,
+      });
+      const events = parseTypedSSE(res.body);
+      const created = events.find((e) => e.type === "response.created")!;
+      expect(created.data.response.id).toMatch(/^resp[-_]/);
+      expect(created.data.response.object).toBe("response");
+      expect(created.data.response.status).toBe("in_progress");
+      expect(created.data.response.output).toEqual([]);
+    });
+
+    it("delta events have delta field as string", async () => {
+      const res = await httpPost(responsesPath(), {
+        model: "gpt-4",
+        input: [{ role: "user", content: "hello" }],
+        stream: true,
+      });
+      const events = parseTypedSSE(res.body);
+      const deltas = events.filter((e) => e.type === "response.output_text.delta");
+      expect(deltas.length).toBeGreaterThan(0);
+      for (const d of deltas) {
+        expect(typeof d.data.delta).toBe("string");
+      }
+    });
+
+    it("response.completed has status completed and output array", async () => {
+      const res = await httpPost(responsesPath(), {
+        model: "gpt-4",
+        input: [{ role: "user", content: "hello" }],
+        stream: true,
+      });
+      const events = parseTypedSSE(res.body);
+      const completed = events.find((e) => e.type === "response.completed")!;
+      expect(completed.data.response.status).toBe("completed");
+      expect(Array.isArray(completed.data.response.output)).toBe(true);
+    });
+
+    it("tool call sequence includes function_call output item", async () => {
+      const res = await httpPost(responsesPath(), {
+        model: "gpt-4",
+        input: [{ role: "user", content: "weather" }],
+        stream: true,
+      });
+      const events = parseTypedSSE(res.body);
+      const itemAdded = events.find(
+        (e) => e.type === "response.output_item.added" && e.data.item?.type === "function_call",
+      );
+      expect(itemAdded).toBeDefined();
+    });
+  });
+
+  describe("non-streaming", () => {
+    it("response has resp- id, object response, status completed, output array", async () => {
+      const res = await httpPost(responsesPath(), {
+        model: "gpt-4",
+        input: [{ role: "user", content: "hello" }],
+        stream: false,
+      });
+      const json = JSON.parse(res.body);
+      expect(json.id).toMatch(/^resp[-_]/);
+      expect(json.object).toBe("response");
+      expect(json.status).toBe("completed");
+      expect(Array.isArray(json.output)).toBe(true);
+    });
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 3. Anthropic Claude Messages API conformance
+// ---------------------------------------------------------------------------
+
+describe("Anthropic Claude Messages API conformance", () => {
+  const claudePath = () => `${instance.url}/v1/messages`;
+
+  describe("non-streaming", () => {
+    it("has all required top-level fields", async () => {
+      const res = await httpPost(claudePath(), {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "hello" }],
+        stream: false,
+      });
+      const json = JSON.parse(res.body);
+      expect(json).toHaveProperty("id");
+      expect(json).toHaveProperty("type");
+      expect(json).toHaveProperty("role");
+      expect(json).toHaveProperty("content");
+      expect(json).toHaveProperty("model");
+      expect(json).toHaveProperty("stop_reason");
+      expect(json).toHaveProperty("stop_sequence");
+      expect(json).toHaveProperty("usage");
+    });
+
+    it("type is message", async () => {
+      const res = await httpPost(claudePath(), {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "hello" }],
+        stream: false,
+      });
+      const json = JSON.parse(res.body);
+      expect(json.type).toBe("message");
+    });
+
+    it("id starts with msg_", async () => {
+      const res = await httpPost(claudePath(), {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "hello" }],
+        stream: false,
+      });
+      const json = JSON.parse(res.body);
+      expect(json.id).toMatch(/^msg_/);
+    });
+
+    it("role is assistant and content is array of text blocks", async () => {
+      const res = await httpPost(claudePath(), {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "hello" }],
+        stream: false,
+      });
+      const json = JSON.parse(res.body);
+      expect(json.role).toBe("assistant");
+      expect(Array.isArray(json.content)).toBe(true);
+      expect(json.content[0].type).toBe("text");
+      expect(typeof json.content[0].text).toBe("string");
+    });
+
+    it("stop_reason is end_turn for text, stop_sequence is null", async () => {
+      const res = await httpPost(claudePath(), {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "hello" }],
+        stream: false,
+      });
+      const json = JSON.parse(res.body);
+      expect(json.stop_reason).toBe("end_turn");
+      expect(json.stop_sequence).toBeNull();
+    });
+
+    it("usage has input_tokens and output_tokens (numbers), no total_tokens", async () => {
+      const res = await httpPost(claudePath(), {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "hello" }],
+        stream: false,
+      });
+      const json = JSON.parse(res.body);
+      expect(typeof json.usage.input_tokens).toBe("number");
+      expect(typeof json.usage.output_tokens).toBe("number");
+      expect(json.usage).not.toHaveProperty("total_tokens");
+    });
+
+    it("tool call: stop_reason is tool_use, content has tool_use blocks with object input", async () => {
+      const res = await httpPost(claudePath(), {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "weather" }],
+        stream: false,
+      });
+      const json = JSON.parse(res.body);
+      expect(json.stop_reason).toBe("tool_use");
+
+      const toolBlock = json.content.find((b: any) => b.type === "tool_use");
+      expect(toolBlock).toBeDefined();
+      expect(toolBlock.id).toMatch(/^toolu_/);
+      expect(typeof toolBlock.name).toBe("string");
+      expect(typeof toolBlock.input).toBe("object");
+      // input should be an object, not a string
+      expect(typeof toolBlock.input).not.toBe("string");
+    });
+  });
+
+  describe("streaming", () => {
+    it("Content-Type is text/event-stream", async () => {
+      const res = await httpPost(claudePath(), {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "hello" }],
+        stream: true,
+      });
+      expect(res.headers["content-type"]).toContain("text/event-stream");
+    });
+
+    it("events use event: + data: format with no [DONE] sentinel", async () => {
+      const res = await httpPost(claudePath(), {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "hello" }],
+        stream: true,
+      });
+      expect(res.body).not.toContain("[DONE]");
+      const events = parseTypedSSE(res.body);
+      expect(events.length).toBeGreaterThan(0);
+    });
+
+    it("event sequence follows message_start -> content_block_start -> deltas -> content_block_stop -> message_delta -> message_stop", async () => {
+      const res = await httpPost(claudePath(), {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "hello" }],
+        stream: true,
+      });
+      const events = parseTypedSSE(res.body);
+      const types = events.map((e) => e.type);
+      expect(types[0]).toBe("message_start");
+      expect(types).toContain("content_block_start");
+      expect(types).toContain("content_block_delta");
+      expect(types).toContain("content_block_stop");
+      expect(types).toContain("message_delta");
+      expect(types[types.length - 1]).toBe("message_stop");
+    });
+
+    it("message_start has proper message structure", async () => {
+      const res = await httpPost(claudePath(), {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "hello" }],
+        stream: true,
+      });
+      const events = parseTypedSSE(res.body);
+      const start = events.find((e) => e.type === "message_start")!;
+      expect(start.data.message.id).toMatch(/^msg_/);
+      expect(start.data.message.type).toBe("message");
+      expect(start.data.message.role).toBe("assistant");
+      expect(start.data.message.content).toEqual([]);
+      expect(start.data.message.stop_reason).toBeNull();
+    });
+
+    it("content_block_start has type text with empty text", async () => {
+      const res = await httpPost(claudePath(), {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "hello" }],
+        stream: true,
+      });
+      const events = parseTypedSSE(res.body);
+      const blockStart = events.find((e) => e.type === "content_block_start")!;
+      expect(blockStart.data.content_block.type).toBe("text");
+      expect(blockStart.data.content_block.text).toBe("");
+    });
+
+    it("content_block_delta has text_delta type with text string", async () => {
+      const res = await httpPost(claudePath(), {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "hello" }],
+        stream: true,
+      });
+      const events = parseTypedSSE(res.body);
+      const deltas = events.filter((e) => e.type === "content_block_delta");
+      expect(deltas.length).toBeGreaterThan(0);
+      for (const d of deltas) {
+        expect(d.data.delta.type).toBe("text_delta");
+        expect(typeof d.data.delta.text).toBe("string");
+      }
+    });
+
+    it("message_delta has stop_reason end_turn for text responses", async () => {
+      const res = await httpPost(claudePath(), {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "hello" }],
+        stream: true,
+      });
+      const events = parseTypedSSE(res.body);
+      const msgDelta = events.find((e) => e.type === "message_delta")!;
+      expect(msgDelta.data.delta.stop_reason).toBe("end_turn");
+    });
+
+    it("message_stop event has type message_stop", async () => {
+      const res = await httpPost(claudePath(), {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "hello" }],
+        stream: true,
+      });
+      const events = parseTypedSSE(res.body);
+      const stop = events.find((e) => e.type === "message_stop")!;
+      expect(stop).toBeDefined();
+      expect(stop.data.type).toBe("message_stop");
+    });
+
+    it("tool streaming: content_block_start with tool_use type and input_json_delta deltas", async () => {
+      const res = await httpPost(claudePath(), {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "weather" }],
+        stream: true,
+      });
+      const events = parseTypedSSE(res.body);
+
+      const toolBlockStart = events.find(
+        (e) => e.type === "content_block_start" && e.data.content_block?.type === "tool_use",
+      );
+      expect(toolBlockStart).toBeDefined();
+      expect(toolBlockStart!.data.content_block.id).toMatch(/^toolu_/);
+      expect(typeof toolBlockStart!.data.content_block.name).toBe("string");
+
+      const jsonDeltas = events.filter(
+        (e) => e.type === "content_block_delta" && e.data.delta?.type === "input_json_delta",
+      );
+      expect(jsonDeltas.length).toBeGreaterThan(0);
+      for (const d of jsonDeltas) {
+        expect(typeof d.data.delta.partial_json).toBe("string");
+      }
+
+      const msgDelta = events.find((e) => e.type === "message_delta")!;
+      expect(msgDelta.data.delta.stop_reason).toBe("tool_use");
+    });
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 4. Google Gemini conformance
+// ---------------------------------------------------------------------------
+
+describe("Google Gemini conformance", () => {
+  const geminiContentPath = () => `${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`;
+  const geminiStreamPath = () =>
+    `${instance.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent`;
+
+  describe("non-streaming", () => {
+    it("response has candidates and usageMetadata", async () => {
+      const res = await httpPost(geminiContentPath(), {
+        contents: [{ role: "user", parts: [{ text: "hello" }] }],
+      });
+      const json = JSON.parse(res.body);
+      expect(json).toHaveProperty("candidates");
+      expect(json).toHaveProperty("usageMetadata");
+    });
+
+    it("candidates[0] has content, finishReason, and index", async () => {
+      const res = await httpPost(geminiContentPath(), {
+        contents: [{ role: "user", parts: [{ text: "hello" }] }],
+      });
+      const json = JSON.parse(res.body);
+      const candidate = json.candidates[0];
+      expect(candidate).toHaveProperty("content");
+      expect(candidate).toHaveProperty("finishReason");
+      expect(candidate).toHaveProperty("index");
+    });
+
+    it("content.role is model and content.parts has text", async () => {
+      const res = await httpPost(geminiContentPath(), {
+        contents: [{ role: "user", parts: [{ text: "hello" }] }],
+      });
+      const json = JSON.parse(res.body);
+      const content = json.candidates[0].content;
+      expect(content.role).toBe("model");
+      expect(Array.isArray(content.parts)).toBe(true);
+      expect(typeof content.parts[0].text).toBe("string");
+    });
+
+    it("finishReason is STOP for text (SCREAMING_SNAKE_CASE)", async () => {
+      const res = await httpPost(geminiContentPath(), {
+        contents: [{ role: "user", parts: [{ text: "hello" }] }],
+      });
+      const json = JSON.parse(res.body);
+      expect(json.candidates[0].finishReason).toBe("STOP");
+    });
+
+    it("usageMetadata has camelCase token counts as numbers", async () => {
+      const res = await httpPost(geminiContentPath(), {
+        contents: [{ role: "user", parts: [{ text: "hello" }] }],
+      });
+      const json = JSON.parse(res.body);
+      const usage = json.usageMetadata;
+      expect(typeof usage.promptTokenCount).toBe("number");
+      expect(typeof usage.candidatesTokenCount).toBe("number");
+      expect(typeof usage.totalTokenCount).toBe("number");
+    });
+
+    it("tool call: finishReason is FUNCTION_CALL, parts have functionCall with object args", async () => {
+      const res = await httpPost(geminiContentPath(), {
+        contents: [{ role: "user", parts: [{ text: "weather" }] }],
+      });
+      const json = JSON.parse(res.body);
+      expect(json.candidates[0].finishReason).toBe("FUNCTION_CALL");
+
+      const fcPart = json.candidates[0].content.parts.find((p: any) => p.functionCall);
+      expect(fcPart).toBeDefined();
+      expect(typeof fcPart.functionCall.name).toBe("string");
+      expect(typeof fcPart.functionCall.args).toBe("object");
+      // args should be an object, not a string
+      expect(typeof fcPart.functionCall.args).not.toBe("string");
+    });
+  });
+
+  describe("streaming", () => {
+    it("Content-Type is text/event-stream", async () => {
+      const res = await httpPost(geminiStreamPath(), {
+        contents: [{ role: "user", parts: [{ text: "hello" }] }],
+      });
+      expect(res.headers["content-type"]).toContain("text/event-stream");
+    });
+
+    it("events use data-only format with no event: prefix and no [DONE]", async () => {
+      const res = await httpPost(geminiStreamPath(), {
+        contents: [{ role: "user", parts: [{ text: "hello" }] }],
+      });
+      expect(res.body).not.toContain("[DONE]");
+      // Should not have event: lines
+      expect(res.body).not.toMatch(/^event: /m);
+      const chunks = parseDataOnlySSE(res.body);
+      expect(chunks.length).toBeGreaterThan(0);
+    });
+
+    it("each chunk has candidates structure", async () => {
+      const res = await httpPost(geminiStreamPath(), {
+        contents: [{ role: "user", parts: [{ text: "hello" }] }],
+      });
+      const chunks = parseDataOnlySSE(res.body) as any[];
+      for (const chunk of chunks) {
+        expect(chunk).toHaveProperty("candidates");
+        expect(chunk.candidates[0]).toHaveProperty("content");
+      }
+    });
+
+    it("intermediate chunks have text parts but no finishReason; last chunk has finishReason and usageMetadata", async () => {
+      // Use a dedicated server with small chunkSize to guarantee multiple chunks
+      const longFixture: Fixture = {
+        match: { userMessage: "chunk-test" },
+        response: { content: "abcdefghijklmnopqrstuvwxyz" },
+      };
+      const smallChunkInstance = await createServer([longFixture], { port: 0, chunkSize: 5 });
+      try {
+        const res = await httpPost(
+          `${smallChunkInstance.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent`,
+          { contents: [{ role: "user", parts: [{ text: "chunk-test" }] }] },
+        );
+        const chunks = parseDataOnlySSE(res.body) as any[];
+        expect(chunks.length).toBeGreaterThan(1);
+
+        // Intermediate chunks (all but last) should have text content but no finishReason
+        for (let i = 0; i < chunks.length - 1; i++) {
+          const part = chunks[i].candidates[0].content.parts[0];
+          expect(typeof part.text).toBe("string");
+          expect(chunks[i].candidates[0].finishReason).toBeUndefined();
+          expect(chunks[i].usageMetadata).toBeUndefined();
+        }
+
+        // Last chunk should have finishReason and usageMetadata
+        const last = chunks[chunks.length - 1];
+        expect(last.candidates[0].finishReason).toBeDefined();
+        expect(last.usageMetadata).toBeDefined();
+      } finally {
+        await new Promise<void>((r) => smallChunkInstance.server.close(() => r()));
+      }
+    });
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 5. OpenAI Embeddings API conformance
+// ---------------------------------------------------------------------------
+
+describe("OpenAI Embeddings API conformance", () => {
+  const embeddingsPath = () => `${instance.url}/v1/embeddings`;
+
+  describe("with fixture match", () => {
+    it("has all required top-level fields", async () => {
+      const res = await httpPost(embeddingsPath(), {
+        model: "text-embedding-3-small",
+        input: "embed-this text",
+      });
+      const json = JSON.parse(res.body);
+      expect(json).toHaveProperty("object");
+      expect(json).toHaveProperty("data");
+      expect(json).toHaveProperty("model");
+      expect(json).toHaveProperty("usage");
+    });
+
+    it("object is list", async () => {
+      const res = await httpPost(embeddingsPath(), {
+        model: "text-embedding-3-small",
+        input: "embed-this text",
+      });
+      const json = JSON.parse(res.body);
+      expect(json.object).toBe("list");
+    });
+
+    it("data[0] has object embedding, index 0, and embedding array", async () => {
+      const res = await httpPost(embeddingsPath(), {
+        model: "text-embedding-3-small",
+        input: "embed-this text",
+      });
+      const json = JSON.parse(res.body);
+      expect(json.data).toHaveLength(1);
+      const item = json.data[0];
+      expect(item.object).toBe("embedding");
+      expect(item.index).toBe(0);
+      expect(Array.isArray(item.embedding)).toBe(true);
+      expect(item.embedding).toEqual([0.1, -0.2, 0.3, 0.4, -0.5]);
+    });
+
+    it("usage has prompt_tokens and total_tokens as numbers", async () => {
+      const res = await httpPost(embeddingsPath(), {
+        model: "text-embedding-3-small",
+        input: "embed-this text",
+      });
+      const json = JSON.parse(res.body);
+      expect(typeof json.usage.prompt_tokens).toBe("number");
+      expect(typeof json.usage.total_tokens).toBe("number");
+    });
+
+    it("preserves the requested model name", async () => {
+      const res = await httpPost(embeddingsPath(), {
+        model: "text-embedding-3-large",
+        input: "embed-this",
+      });
+      const json = JSON.parse(res.body);
+      expect(json.model).toBe("text-embedding-3-large");
+    });
+
+    it("returns error fixture with proper status", async () => {
+      const res = await httpPost(embeddingsPath(), {
+        model: "text-embedding-3-small",
+        input: "embed-error text",
+      });
+      expect(res.status).toBe(429);
+      const json = JSON.parse(res.body);
+      expect(json.error.message).toBe("Rate limited");
+    });
+  });
+
+  describe("with deterministic fallback (no fixture match)", () => {
+    it("returns 200 with a deterministic embedding when no fixture matches", async () => {
+      const res = await httpPost(embeddingsPath(), {
+        model: "text-embedding-3-small",
+        input: "no-fixture-for-this-input",
+      });
+      expect(res.status).toBe(200);
+      const json = JSON.parse(res.body);
+      expect(json.object).toBe("list");
+      expect(json.data).toHaveLength(1);
+      expect(json.data[0].embedding.length).toBe(1536); // default dimensions
+    });
+
+    it("deterministic fallback respects custom dimensions", async () => {
+      const res = await httpPost(embeddingsPath(), {
+        model: "text-embedding-3-small",
+        input: "no-fixture-for-this",
+        dimensions: 256,
+      });
+      const json = JSON.parse(res.body);
+      expect(json.data[0].embedding.length).toBe(256);
+    });
+
+    it("same input produces same deterministic embedding", async () => {
+      const input = "deterministic-test-input";
+      const res1 = await httpPost(embeddingsPath(), {
+        model: "text-embedding-3-small",
+        input,
+      });
+      const res2 = await httpPost(embeddingsPath(), {
+        model: "text-embedding-3-small",
+        input,
+      });
+      const json1 = JSON.parse(res1.body);
+      const json2 = JSON.parse(res2.body);
+      expect(json1.data[0].embedding).toEqual(json2.data[0].embedding);
+    });
+
+    it("all embedding values are numbers between -1 and 1", async () => {
+      const res = await httpPost(embeddingsPath(), {
+        model: "text-embedding-3-small",
+        input: "value-range-test",
+      });
+      const json = JSON.parse(res.body);
+      for (const val of json.data[0].embedding) {
+        expect(typeof val).toBe("number");
+        expect(val).toBeGreaterThanOrEqual(-1);
+        expect(val).toBeLessThanOrEqual(1);
+      }
+    });
+  });
+
+  describe("array input", () => {
+    it("returns one embedding per input string", async () => {
+      const res = await httpPost(embeddingsPath(), {
+        model: "text-embedding-3-small",
+        input: ["first input", "second input", "third input"],
+      });
+      const json = JSON.parse(res.body);
+      expect(json.data).toHaveLength(3);
+      expect(json.data[0].index).toBe(0);
+      expect(json.data[1].index).toBe(1);
+      expect(json.data[2].index).toBe(2);
+    });
+
+    it("fixture match with array input uses combined text", async () => {
+      const res = await httpPost(embeddingsPath(), {
+        model: "text-embedding-3-small",
+        input: ["embed-this", "some other text"],
+      });
+      const json = JSON.parse(res.body);
+      // Should match the embedding fixture since combined input contains "embed-this"
+      expect(json.data[0].embedding).toEqual([0.1, -0.2, 0.3, 0.4, -0.5]);
+    });
+  });
+
+  describe("error handling", () => {
+    it("returns 400 for malformed JSON", async () => {
+      const res = await new Promise<{ status: number; headers: any; body: string }>(
+        (resolve, reject) => {
+          const req = http.request(
+            embeddingsPath(),
+            {
+              method: "POST",
+              headers: { "Content-Type": "application/json" },
+            },
+            (res) => {
+              const chunks: Buffer[] = [];
+              res.on("data", (c) => chunks.push(c));
+              res.on("end", () =>
+                resolve({
+                  status: res.statusCode!,
+                  headers: res.headers,
+                  body: Buffer.concat(chunks).toString(),
+                }),
+              );
+            },
+          );
+          req.on("error", reject);
+          req.write("not json");
+          req.end();
+        },
+      );
+      expect(res.status).toBe(400);
+      const json = JSON.parse(res.body);
+      expect(json.error.message).toBe("Malformed JSON");
+    });
+
+    it("Content-Type is application/json", async () => {
+      const res = await httpPost(embeddingsPath(), {
+        model: "text-embedding-3-small",
+        input: "embed-this",
+      });
+      expect(res.headers["content-type"]).toContain("application/json");
+    });
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 6. Cross-provider invariants
+// ---------------------------------------------------------------------------
+
+describe("Cross-provider invariants", () => {
+  it("all providers return text/event-stream for streaming responses", async () => {
+    const base = instance.url;
+
+    const [chat, responses, claude, gemini] = await Promise.all([
+      httpPost(`${base}/v1/chat/completions`, {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+        stream: true,
+      }),
+      httpPost(`${base}/v1/responses`, {
+        model: "gpt-4",
+        input: [{ role: "user", content: "hello" }],
+        stream: true,
+      }),
+      httpPost(`${base}/v1/messages`, {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "hello" }],
+        stream: true,
+      }),
+      httpPost(`${base}/v1beta/models/gemini-2.0-flash:streamGenerateContent`, {
+        contents: [{ role: "user", parts: [{ text: "hello" }] }],
+      }),
+    ]);
+
+    expect(chat.headers["content-type"]).toContain("text/event-stream");
+    expect(responses.headers["content-type"]).toContain("text/event-stream");
+    expect(claude.headers["content-type"]).toContain("text/event-stream");
+    expect(gemini.headers["content-type"]).toContain("text/event-stream");
+  });
+
+  it("all non-streaming providers return application/json", async () => {
+    const base = instance.url;
+
+    const [chat, responses, claude, gemini] = await Promise.all([
+      httpPost(`${base}/v1/chat/completions`, {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+        stream: false,
+      }),
+      httpPost(`${base}/v1/responses`, {
+        model: "gpt-4",
+        input: [{ role: "user", content: "hello" }],
+        stream: false,
+      }),
+      httpPost(`${base}/v1/messages`, {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "hello" }],
+        stream: false,
+      }),
+      httpPost(`${base}/v1beta/models/gemini-2.0-flash:generateContent`, {
+        contents: [{ role: "user", parts: [{ text: "hello" }] }],
+      }),
+    ]);
+
+    expect(chat.headers["content-type"]).toContain("application/json");
+    expect(responses.headers["content-type"]).toContain("application/json");
+    expect(claude.headers["content-type"]).toContain("application/json");
+    expect(gemini.headers["content-type"]).toContain("application/json");
+  });
+
+  it("all providers return proper error status and JSON body on error fixture", async () => {
+    const base = instance.url;
+
+    const [chat, responses, claude, gemini] = await Promise.all([
+      httpPost(`${base}/v1/chat/completions`, {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "error-test" }],
+        stream: false,
+      }),
+      httpPost(`${base}/v1/responses`, {
+        model: "gpt-4",
+        input: [{ role: "user", content: "error-test" }],
+        stream: false,
+      }),
+      httpPost(`${base}/v1/messages`, {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "error-test" }],
+        stream: false,
+      }),
+      httpPost(`${base}/v1beta/models/gemini-2.0-flash:generateContent`, {
+        contents: [{ role: "user", parts: [{ text: "error-test" }] }],
+      }),
+    ]);
+
+    for (const res of [chat, responses, claude, gemini]) {
+      expect(res.status).toBe(429);
+      const json = JSON.parse(res.body);
+      expect(json).toHaveProperty("error");
+    }
+  });
+
+  it("streaming request with error fixture returns JSON error, not SSE", async () => {
+    const base = instance.url;
+
+    const [chat, responses, claude, gemini] = await Promise.all([
+      httpPost(`${base}/v1/chat/completions`, {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "error-test" }],
+        stream: true,
+      }),
+      httpPost(`${base}/v1/responses`, {
+        model: "gpt-4",
+        input: [{ role: "user", content: "error-test" }],
+        stream: true,
+      }),
+      httpPost(`${base}/v1/messages`, {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "error-test" }],
+        stream: true,
+      }),
+      httpPost(`${base}/v1beta/models/gemini-2.0-flash:streamGenerateContent`, {
+        contents: [{ role: "user", parts: [{ text: "error-test" }] }],
+      }),
+    ]);
+
+    for (const res of [chat, responses, claude, gemini]) {
+      expect(res.status).toBe(429);
+      // Error responses should be JSON, not SSE
+      expect(res.headers["content-type"]).toContain("application/json");
+      const json = JSON.parse(res.body);
+      expect(json).toHaveProperty("error");
+    }
+  });
+
+  it("error format conforms to each provider's native format", async () => {
+    const base = instance.url;
+
+    const [chat, claude, gemini] = await Promise.all([
+      httpPost(`${base}/v1/chat/completions`, {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "error-test" }],
+        stream: false,
+      }),
+      httpPost(`${base}/v1/messages`, {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "error-test" }],
+        stream: false,
+      }),
+      httpPost(`${base}/v1beta/models/gemini-2.0-flash:generateContent`, {
+        contents: [{ role: "user", parts: [{ text: "error-test" }] }],
+      }),
+    ]);
+
+    // OpenAI format: { error: { message, type } }
+    const chatJson = JSON.parse(chat.body);
+    expect(chatJson.error.message).toBe("Rate limited");
+    expect(chatJson.error.type).toBe("rate_limit_error");
+    expect(chatJson.type).toBeUndefined(); // no top-level type
+
+    // Anthropic format: { type: "error", error: { type, message } }
+    const claudeJson = JSON.parse(claude.body);
+    expect(claudeJson.type).toBe("error");
+    expect(claudeJson.error.type).toBe("rate_limit_error");
+    expect(claudeJson.error.message).toBe("Rate limited");
+
+    // Gemini format: { error: { code, message, status } }
+    const geminiJson = JSON.parse(gemini.body);
+    expect(geminiJson.error.code).toBe(429);
+    expect(geminiJson.error.message).toBe("Rate limited");
+    expect(geminiJson.error.status).toBe("rate_limit_error");
+  });
+
+  it("all providers return 404 with JSON error body when no fixture matches", async () => {
+    const base = instance.url;
+
+    const [chat, responses, claude, gemini] = await Promise.all([
+      httpPost(`${base}/v1/chat/completions`, {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "no-match-xyz-9999" }],
+        stream: false,
+      }),
+      httpPost(`${base}/v1/responses`, {
+        model: "gpt-4",
+        input: [{ role: "user", content: "no-match-xyz-9999" }],
+        stream: false,
+      }),
+      httpPost(`${base}/v1/messages`, {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "no-match-xyz-9999" }],
+        stream: false,
+      }),
+      httpPost(`${base}/v1beta/models/gemini-2.0-flash:generateContent`, {
+        contents: [{ role: "user", parts: [{ text: "no-match-xyz-9999" }] }],
+      }),
+    ]);
+
+    for (const res of [chat, responses, claude, gemini]) {
+      expect(res.status).toBe(404);
+      const json = JSON.parse(res.body);
+      expect(json).toHaveProperty("error");
+    }
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Error fixture with sequenceIndex
+// ---------------------------------------------------------------------------
+
+describe("error fixture with sequenceIndex", () => {
+  let srv: ServerInstance;
+
+  const SEQ_OK_0: Fixture = {
+    match: { userMessage: "seq-error-test", sequenceIndex: 0 },
+    response: { content: "Step 0 OK" },
+  };
+
+  const SEQ_ERR_1: Fixture = {
+    match: { userMessage: "seq-error-test", sequenceIndex: 1 },
+    response: {
+      error: { message: "Temporary failure", type: "server_error" },
+      status: 503,
+    },
+  };
+
+  const SEQ_OK_2: Fixture = {
+    match: { userMessage: "seq-error-test", sequenceIndex: 2 },
+    response: { content: "Step 2 OK" },
+  };
+
+  beforeAll(async () => {
+    srv = await createServer([SEQ_OK_0, SEQ_ERR_1, SEQ_OK_2], { port: 0 });
+  });
+
+  afterAll(async () => {
+    await new Promise<void>((r) => srv.server.close(() => r()));
+  });
+
+  it("step 0 succeeds, step 1 returns error, step 2 succeeds again", async () => {
+    // Step 0: success
+    const res0 = await httpPost(`${srv.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "seq-error-test" }],
+      stream: false,
+    });
+    expect(res0.status).toBe(200);
+    const json0 = JSON.parse(res0.body);
+    expect(json0.choices[0].message.content).toBe("Step 0 OK");
+
+    // Step 1: error
+    const res1 = await httpPost(`${srv.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "seq-error-test" }],
+      stream: false,
+    });
+    expect(res1.status).toBe(503);
+    const json1 = JSON.parse(res1.body);
+    expect(json1.error.message).toBe("Temporary failure");
+    expect(json1.error.type).toBe("server_error");
+
+    // Step 2: success again
+    const res2 = await httpPost(`${srv.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "seq-error-test" }],
+      stream: false,
+    });
+    expect(res2.status).toBe(200);
+    const json2 = JSON.parse(res2.body);
+    expect(json2.choices[0].message.content).toBe("Step 2 OK");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Structured Output: streaming with response_format json_object
+// ---------------------------------------------------------------------------
+
+describe("streaming with response_format json_object", () => {
+  let srv: ServerInstance;
+
+  const JSON_STREAM_FIXTURE: Fixture = {
+    match: { userMessage: "stream-json", responseFormat: "json_object" },
+    response: { content: '{"result":"ok","count":7}' },
+  };
+
+  beforeAll(async () => {
+    srv = await createServer([JSON_STREAM_FIXTURE], { port: 0, chunkSize: 5 });
+  });
+
+  afterAll(async () => {
+    await new Promise<void>((r) => srv.server.close(() => r()));
+  });
+
+  it("returns SSE chunks that reassemble to valid JSON content", async () => {
+    const res = await httpPost(`${srv.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "stream-json" }],
+      stream: true,
+      response_format: { type: "json_object" },
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("text/event-stream");
+
+    const events = parseDataOnlySSE(res.body);
+    expect(events.length).toBeGreaterThan(0);
+
+    // Reassemble content from all delta chunks
+    let assembled = "";
+    for (const evt of events) {
+      const delta = (evt as { choices?: { delta?: { content?: string } }[] }).choices?.[0]?.delta;
+      if (delta?.content) {
+        assembled += delta.content;
+      }
+    }
+
+    // Must reassemble to valid JSON matching fixture content
+    const parsed = JSON.parse(assembled);
+    expect(parsed).toEqual({ result: "ok", count: 7 });
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Structured Output: json_schema with schema in request
+// ---------------------------------------------------------------------------
+
+describe("json_schema with schema in request", () => {
+  let srv: ServerInstance;
+
+  const JSON_SCHEMA_FIXTURE: Fixture = {
+    match: { userMessage: "schema-test", responseFormat: "json_schema" },
+    response: { content: '{"name":"test-output"}' },
+  };
+
+  beforeAll(async () => {
+    srv = await createServer([JSON_SCHEMA_FIXTURE], { port: 0 });
+  });
+
+  afterAll(async () => {
+    await new Promise<void>((r) => srv.server.close(() => r()));
+  });
+
+  it("matches fixture when request includes response_format type json_schema with schema object", async () => {
+    const res = await httpPost(`${srv.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "schema-test" }],
+      stream: false,
+      response_format: {
+        type: "json_schema",
+        json_schema: { name: "test", schema: { type: "object" } },
+      },
+    });
+
+    expect(res.status).toBe(200);
+    const json = JSON.parse(res.body);
+    expect(json.choices[0].message.content).toBe('{"name":"test-output"}');
+  });
+
+  it("does not match fixture when response_format type differs", async () => {
+    const res = await httpPost(`${srv.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "schema-test" }],
+      stream: false,
+      response_format: { type: "json_object" },
+    });
+
+    // json_object != json_schema, so no match
+    expect(res.status).toBe(404);
+    expect(JSON.parse(res.body).error.type).toBe("invalid_request_error");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Structured Output: responseFormat + model + userMessage combined matching
+// ---------------------------------------------------------------------------
+
+describe("responseFormat + model + userMessage combined matching", () => {
+  let srv: ServerInstance;
+
+  const COMBO_A: Fixture = {
+    match: { userMessage: "combo", model: "gpt-4", responseFormat: "json_object" },
+    response: { content: "combo-A" },
+  };
+
+  const COMBO_B: Fixture = {
+    match: { userMessage: "combo", model: "gpt-4o", responseFormat: "json_object" },
+    response: { content: "combo-B" },
+  };
+
+  const COMBO_C: Fixture = {
+    match: { userMessage: "combo", model: "gpt-4", responseFormat: "json_schema" },
+    response: { content: "combo-C" },
+  };
+
+  const COMBO_D: Fixture = {
+    match: { userMessage: "combo", model: "gpt-4o", responseFormat: "json_schema" },
+    response: { content: "combo-D" },
+  };
+
+  beforeAll(async () => {
+    srv = await createServer([COMBO_A, COMBO_B, COMBO_C, COMBO_D], { port: 0 });
+  });
+
+  afterAll(async () => {
+    await new Promise<void>((r) => srv.server.close(() => r()));
+  });
+
+  it("routes to correct fixture based on all three criteria", async () => {
+    const combos: Array<{ model: string; rfType: string; expected: string }> = [
+      { model: "gpt-4", rfType: "json_object", expected: "combo-A" },
+      { model: "gpt-4o", rfType: "json_object", expected: "combo-B" },
+      { model: "gpt-4", rfType: "json_schema", expected: "combo-C" },
+      { model: "gpt-4o", rfType: "json_schema", expected: "combo-D" },
+    ];
+
+    for (const { model, rfType, expected } of combos) {
+      const res = await httpPost(`${srv.url}/v1/chat/completions`, {
+        model,
+        messages: [{ role: "user", content: "combo" }],
+        stream: false,
+        response_format: { type: rfType },
+      });
+
+      expect(res.status).toBe(200);
+      const json = JSON.parse(res.body);
+      expect(json.choices[0].message.content).toBe(expected);
+    }
+  });
+
+  it("returns 404 when userMessage matches but model and responseFormat do not", async () => {
+    const res = await httpPost(`${srv.url}/v1/chat/completions`, {
+      model: "claude-3",
+      messages: [{ role: "user", content: "combo" }],
+      stream: false,
+      response_format: { type: "json_object" },
+    });
+
+    expect(res.status).toBe(404);
+    expect(JSON.parse(res.body).error.type).toBe("invalid_request_error");
+  });
+
+  it("returns 404 when model and responseFormat match but userMessage does not", async () => {
+    const res = await httpPost(`${srv.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "something-else" }],
+      stream: false,
+      response_format: { type: "json_object" },
+    });
+
+    expect(res.status).toBe(404);
+    expect(JSON.parse(res.body).error.type).toBe("invalid_request_error");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Structured Output: malformed response_format object
+// ---------------------------------------------------------------------------
+
+describe("malformed response_format object", () => {
+  let srv: ServerInstance;
+
+  const NORMAL_FIXTURE: Fixture = {
+    match: { userMessage: "malformed-rf-test" },
+    response: { content: "matched-without-rf" },
+  };
+
+  const RF_FIXTURE: Fixture = {
+    match: { userMessage: "malformed-rf-test", responseFormat: "json_object" },
+    response: { content: "matched-with-rf" },
+  };
+
+  beforeAll(async () => {
+    srv = await createServer([RF_FIXTURE, NORMAL_FIXTURE], { port: 0 });
+  });
+
+  afterAll(async () => {
+    await new Promise<void>((r) => srv.server.close(() => r()));
+  });
+
+  it("response_format with missing type does not match responseFormat-gated fixture", async () => {
+    // response_format: {} has no type, so req.response_format.type is undefined
+    // RF_FIXTURE requires responseFormat: "json_object" — should not match
+    // NORMAL_FIXTURE has no responseFormat constraint — should match
+    const res = await httpPost(`${srv.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "malformed-rf-test" }],
+      stream: false,
+      response_format: {},
+    });
+
+    expect(res.status).toBe(200);
+    const json = JSON.parse(res.body);
+    expect(json.choices[0].message.content).toBe("matched-without-rf");
+  });
+
+  it("response_format with wrong type value (number) does not match responseFormat-gated fixture", async () => {
+    // response_format: { type: 123 } — type is a number, not a string
+    // RF_FIXTURE requires "json_object" — should not match
+    // NORMAL_FIXTURE has no responseFormat constraint — should match
+    const res = await httpPost(`${srv.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "malformed-rf-test" }],
+      stream: false,
+      response_format: { type: 123 },
+    });
+
+    expect(res.status).toBe(200);
+    const json = JSON.parse(res.body);
+    expect(json.choices[0].message.content).toBe("matched-without-rf");
+  });
+
+  it("response_format with unrecognized type string does not match responseFormat-gated fixture", async () => {
+    const res = await httpPost(`${srv.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "malformed-rf-test" }],
+      stream: false,
+      response_format: { type: "not_a_real_format" },
+    });
+
+    expect(res.status).toBe(200);
+    const json = JSON.parse(res.body);
+    // Falls through to NORMAL_FIXTURE since "not_a_real_format" != "json_object"
+    expect(json.choices[0].message.content).toBe("matched-without-rf");
+  });
+});
diff --git a/src/__tests__/aws-event-stream.test.ts b/src/__tests__/aws-event-stream.test.ts
new file mode 100644
index 0000000..6245fbd
--- /dev/null
+++ b/src/__tests__/aws-event-stream.test.ts
@@ -0,0 +1,391 @@
+import { describe, it, expect, vi, afterEach } from "vitest";
+import { crc32 } from "node:zlib";
+import { PassThrough } from "node:stream";
+import type * as http from "node:http";
+import {
+  encodeEventStreamFrame,
+  encodeEventStreamMessage,
+  writeEventStream,
+} from "../aws-event-stream.js";
+
+// ─── Test helpers ────────────────────────────────────────────────────────────
+
+function makeMockResponse(): {
+  res: http.ServerResponse;
+  chunks: Buffer[];
+  headers: () => Record<string, string | string[] | number | undefined>;
+  ended: () => boolean;
+} {
+  const stream = new PassThrough();
+  const chunks: Buffer[] = [];
+  stream.on("data", (chunk: Buffer) => chunks.push(Buffer.from(chunk)));
+
+  const writtenHeaders: Record<string, string | string[] | number | undefined> = {};
+  let isEnded = false;
+
+  const res = {
+    setHeader(name: string, value: string) {
+      writtenHeaders[name] = value;
+    },
+    writeHead(statusCode: number, headers?: Record<string, string>) {
+      if (headers) {
+        for (const [k, v] of Object.entries(headers)) {
+          writtenHeaders[k] = v;
+        }
+      }
+    },
+    write(data: Buffer | string) {
+      stream.write(data);
+    },
+    end(data?: Buffer | string) {
+      if (data !== undefined) {
+        stream.write(data);
+      }
+      isEnded = true;
+      stream.end();
+    },
+    writableEnded: false,
+  } as unknown as http.ServerResponse;
+
+  // Make writableEnded track our isEnded state
+  Object.defineProperty(res, "writableEnded", {
+    get: () => isEnded,
+  });
+
+  return {
+    res,
+    chunks,
+    headers: () => writtenHeaders,
+    ended: () => isEnded,
+  };
+}
+
+/**
+ * Parse the binary frame manually and return its components.
+ */
+function parseFrame(frame: Buffer) {
+  const totalLength = frame.readUInt32BE(0);
+  const headersLength = frame.readUInt32BE(4);
+  const preludeCrc = frame.readUInt32BE(8);
+  const headersStart = 12;
+  const headersEnd = headersStart + headersLength;
+  const payloadStart = headersEnd;
+  const payloadEnd = totalLength - 4;
+  const messageCrc = frame.readUInt32BE(totalLength - 4);
+
+  // Parse headers
+  const headers: Array<{ name: string; type: number; value: string }> = [];
+  let offset = headersStart;
+  while (offset < headersEnd) {
+    const nameLen = frame.readUInt8(offset);
+    offset += 1;
+    const name = frame.subarray(offset, offset + nameLen).toString("utf8");
+    offset += nameLen;
+    const type = frame.readUInt8(offset);
+    offset += 1;
+    const valueLen = frame.readUInt16BE(offset);
+    offset += 2;
+    const value = frame.subarray(offset, offset + valueLen).toString("utf8");
+    offset += valueLen;
+    headers.push({ name, type, value });
+  }
+
+  const payload = frame.subarray(payloadStart, payloadEnd);
+
+  return { totalLength, headersLength, preludeCrc, headers, payload, messageCrc };
+}
+
+// ─── encodeEventStreamFrame ─────────────────────────────────────────────────
+
+describe("encodeEventStreamFrame", () => {
+  it("produces a frame whose total_length field matches actual buffer size", () => {
+    const headers = { ":event-type": "contentBlockDelta" };
+    const payload = Buffer.from(JSON.stringify({ hello: "world" }), "utf8");
+    const frame = encodeEventStreamFrame(headers, payload);
+
+    const totalLength = frame.readUInt32BE(0);
+    expect(totalLength).toBe(frame.length);
+  });
+
+  it("headers_length field matches actual serialised headers size", () => {
+    const headers = {
+      ":content-type": "application/json",
+      ":event-type": "contentBlockDelta",
+    };
+    const payload = Buffer.from("{}", "utf8");
+    const frame = encodeEventStreamFrame(headers, payload);
+
+    const parsed = parseFrame(frame);
+
+    // Manually compute expected headers size
+    let expectedLen = 0;
+    for (const [name, value] of Object.entries(headers)) {
+      const nameBytes = Buffer.byteLength(name, "utf8");
+      const valueBytes = Buffer.byteLength(value, "utf8");
+      expectedLen += 1 + nameBytes + 1 + 2 + valueBytes;
+    }
+    expect(parsed.headersLength).toBe(expectedLen);
+  });
+
+  it("prelude CRC32 covers first 8 bytes correctly", () => {
+    const headers = { ":message-type": "event" };
+    const payload = Buffer.from("test", "utf8");
+    const frame = encodeEventStreamFrame(headers, payload);
+
+    const expected = crc32(frame.subarray(0, 8));
+    expect(frame.readUInt32BE(8)).toBe(expected >>> 0);
+  });
+
+  it("message CRC32 covers entire frame minus last 4 bytes", () => {
+    const headers = { key: "val" };
+    const payload = Buffer.from(JSON.stringify({ n: 42 }), "utf8");
+    const frame = encodeEventStreamFrame(headers, payload);
+
+    const expected = crc32(frame.subarray(0, frame.length - 4));
+    expect(frame.readUInt32BE(frame.length - 4)).toBe(expected >>> 0);
+  });
+
+  it("encodes each header with name_length + name + type(7) + value_length + value", () => {
+    const headers = { ":event-type": "chunk", ":message-type": "event" };
+    const payload = Buffer.alloc(0);
+    const frame = encodeEventStreamFrame(headers, payload);
+
+    const parsed = parseFrame(frame);
+    expect(parsed.headers).toHaveLength(2);
+
+    expect(parsed.headers[0].name).toBe(":event-type");
+    expect(parsed.headers[0].type).toBe(7);
+    expect(parsed.headers[0].value).toBe("chunk");
+
+    expect(parsed.headers[1].name).toBe(":message-type");
+    expect(parsed.headers[1].type).toBe(7);
+    expect(parsed.headers[1].value).toBe("event");
+  });
+
+  it("payload is raw bytes (not base64)", () => {
+    const obj = { text: "hello world" };
+    const payload = Buffer.from(JSON.stringify(obj), "utf8");
+    const frame = encodeEventStreamFrame({}, payload);
+
+    const parsed = parseFrame(frame);
+    const decoded = JSON.parse(parsed.payload.toString("utf8"));
+    expect(decoded).toEqual(obj);
+  });
+
+  it("handles empty headers and empty payload", () => {
+    const frame = encodeEventStreamFrame({}, Buffer.alloc(0));
+    const parsed = parseFrame(frame);
+
+    // 4 (total) + 4 (headers_length) + 4 (prelude_crc) + 0 (headers) + 0 (payload) + 4 (msg_crc) = 16
+    expect(parsed.totalLength).toBe(16);
+    expect(parsed.headersLength).toBe(0);
+    expect(parsed.headers).toHaveLength(0);
+    expect(parsed.payload.length).toBe(0);
+  });
+
+  it("large payload (100KB) encoding correctness", () => {
+    const largeString = "A".repeat(100 * 1024);
+    const payload = Buffer.from(JSON.stringify({ data: largeString }), "utf8");
+    const frame = encodeEventStreamFrame({ ":event-type": "big" }, payload);
+
+    const parsed = parseFrame(frame);
+    expect(parsed.totalLength).toBe(frame.length);
+
+    // Verify CRCs
+    const expectedPrelude = crc32(frame.subarray(0, 8));
+    expect(parsed.preludeCrc).toBe(expectedPrelude >>> 0);
+    const expectedMsg = crc32(frame.subarray(0, frame.length - 4));
+    expect(parsed.messageCrc).toBe(expectedMsg >>> 0);
+
+    // Verify payload
+    const decoded = JSON.parse(parsed.payload.toString("utf8"));
+    expect(decoded.data.length).toBe(100 * 1024);
+  });
+
+  it("handles UTF-8 multi-byte characters in headers and payload", () => {
+    const headers = { "x-emoji": "\u{1F600}" };
+    const payload = Buffer.from(JSON.stringify({ msg: "\u{1F4A9}" }), "utf8");
+    const frame = encodeEventStreamFrame(headers, payload);
+
+    const parsed = parseFrame(frame);
+    expect(parsed.headers[0].value).toBe("\u{1F600}");
+    const decoded = JSON.parse(parsed.payload.toString("utf8"));
+    expect(decoded.msg).toBe("\u{1F4A9}");
+  });
+});
+
+// ─── encodeEventStreamMessage ───────────────────────────────────────────────
+
+describe("encodeEventStreamMessage", () => {
+  it("wraps JSON payload with standard AWS headers", () => {
+    const frame = encodeEventStreamMessage("contentBlockDelta", { delta: { text: "hi" } });
+    const parsed = parseFrame(frame);
+
+    const headerMap = Object.fromEntries(parsed.headers.map((h) => [h.name, h.value]));
+    expect(headerMap[":content-type"]).toBe("application/json");
+    expect(headerMap[":event-type"]).toBe("contentBlockDelta");
+    expect(headerMap[":message-type"]).toBe("event");
+  });
+
+  it("payload is raw JSON bytes (not base64)", () => {
+    const obj = { delta: { text: "test" } };
+    const frame = encodeEventStreamMessage("contentBlockDelta", obj);
+    const parsed = parseFrame(frame);
+
+    const decoded = JSON.parse(parsed.payload.toString("utf8"));
+    expect(decoded).toEqual(obj);
+  });
+
+  it("round-trip: encode then parse produces identical data", () => {
+    const eventType = "messageStop";
+    const payload = { stop_reason: "end_turn", usage: { input_tokens: 10, output_tokens: 5 } };
+    const frame = encodeEventStreamMessage(eventType, payload);
+    const parsed = parseFrame(frame);
+
+    // Verify structural integrity
+    expect(parsed.totalLength).toBe(frame.length);
+    const preludeCrc = crc32(frame.subarray(0, 8));
+    expect(parsed.preludeCrc).toBe(preludeCrc >>> 0);
+    const messageCrc = crc32(frame.subarray(0, frame.length - 4));
+    expect(parsed.messageCrc).toBe(messageCrc >>> 0);
+
+    // Verify content
+    const headerMap = Object.fromEntries(parsed.headers.map((h) => [h.name, h.value]));
+    expect(headerMap[":event-type"]).toBe(eventType);
+    expect(JSON.parse(parsed.payload.toString("utf8"))).toEqual(payload);
+  });
+});
+
+// ─── writeEventStream ───────────────────────────────────────────────────────
+
+describe("writeEventStream", () => {
+  afterEach(() => {
+    vi.useRealTimers();
+  });
+
+  it("sets Content-Type to application/vnd.amazon.eventstream", async () => {
+    const { res, headers } = makeMockResponse();
+    await writeEventStream(res, []);
+    expect(headers()["Content-Type"]).toBe("application/vnd.amazon.eventstream");
+  });
+
+  it("writes binary frames for each event", async () => {
+    const { res, chunks } = makeMockResponse();
+    const events = [
+      { eventType: "contentBlockDelta", payload: { delta: { text: "A" } } },
+      { eventType: "contentBlockDelta", payload: { delta: { text: "B" } } },
+    ];
+    await writeEventStream(res, events);
+
+    // Wait a tick for PassThrough to flush
+    await new Promise((r) => setTimeout(r, 10));
+
+    const output = Buffer.concat(chunks);
+    expect(output.length).toBeGreaterThan(0);
+
+    // Parse the first frame from the output
+    const firstTotalLen = output.readUInt32BE(0);
+    const firstParsed = parseFrame(output.subarray(0, firstTotalLen));
+    const firstPayload = JSON.parse(firstParsed.payload.toString("utf8"));
+    expect(firstPayload).toEqual({ delta: { text: "A" } });
+
+    // Parse the second frame
+    const secondParsed = parseFrame(output.subarray(firstTotalLen));
+    const secondPayload = JSON.parse(secondParsed.payload.toString("utf8"));
+    expect(secondPayload).toEqual({ delta: { text: "B" } });
+  });
+
+  it("returns true when stream completes normally", async () => {
+    const { res } = makeMockResponse();
+    const result = await writeEventStream(res, [{ eventType: "test", payload: { data: 1 } }]);
+    expect(result).toBe(true);
+  });
+
+  it("calls res.end() when done", async () => {
+    const { res, ended } = makeMockResponse();
+    await writeEventStream(res, []);
+    expect(ended()).toBe(true);
+  });
+
+  it("returns true immediately when res.writableEnded is already true", async () => {
+    const { res, headers } = makeMockResponse();
+    // Force writableEnded to true
+    Object.defineProperty(res, "writableEnded", { get: () => true });
+    const result = await writeEventStream(res, [{ eventType: "test", payload: { data: 1 } }]);
+    expect(result).toBe(true);
+    expect(headers()["Content-Type"]).toBeUndefined();
+  });
+
+  it("supports streaming profile delays", async () => {
+    vi.useFakeTimers();
+    const { res } = makeMockResponse();
+    const events = [
+      { eventType: "test", payload: { n: 1 } },
+      { eventType: "test", payload: { n: 2 } },
+    ];
+
+    const promise = writeEventStream(res, events, {
+      streamingProfile: { ttft: 100, tps: 10 },
+    });
+    await vi.runAllTimersAsync();
+    const result = await promise;
+    expect(result).toBe(true);
+  });
+
+  it("supports latency option", async () => {
+    vi.useFakeTimers();
+    const { res } = makeMockResponse();
+    const events = [{ eventType: "test", payload: { n: 1 } }];
+
+    const promise = writeEventStream(res, events, { latency: 50 });
+    await vi.runAllTimersAsync();
+    const result = await promise;
+    expect(result).toBe(true);
+  });
+
+  it("stops mid-stream on abort signal and returns false", async () => {
+    const { res } = makeMockResponse();
+    const controller = new AbortController();
+
+    const events = [
+      { eventType: "test", payload: { n: 1 } },
+      { eventType: "test", payload: { n: 2 } },
+      { eventType: "test", payload: { n: 3 } },
+    ];
+
+    let chunksSent = 0;
+    const result = await writeEventStream(res, events, {
+      signal: controller.signal,
+      onChunkSent: () => {
+        chunksSent++;
+        if (chunksSent === 1) controller.abort();
+      },
+    });
+
+    expect(result).toBe(false);
+    // Should have written exactly one frame before abort
+    expect(chunksSent).toBe(1);
+  });
+
+  it("sets Transfer-Encoding: chunked header", async () => {
+    const { res, headers } = makeMockResponse();
+    await writeEventStream(res, [{ eventType: "test", payload: { n: 1 } }]);
+    expect(headers()["Transfer-Encoding"]).toBe("chunked");
+  });
+
+  it("onChunkSent fires per event", async () => {
+    const { res } = makeMockResponse();
+    const events = [
+      { eventType: "test", payload: { n: 1 } },
+      { eventType: "test", payload: { n: 2 } },
+      { eventType: "test", payload: { n: 3 } },
+    ];
+    let count = 0;
+    await writeEventStream(res, events, {
+      onChunkSent: () => {
+        count++;
+      },
+    });
+    expect(count).toBe(3);
+  });
+});
diff --git a/src/__tests__/azure.test.ts b/src/__tests__/azure.test.ts
new file mode 100644
index 0000000..ab8d668
--- /dev/null
+++ b/src/__tests__/azure.test.ts
@@ -0,0 +1,345 @@
+import { describe, it, expect, afterEach } from "vitest";
+import { createServer, type ServerInstance } from "../server.js";
+import type { Fixture } from "../types.js";
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+async function httpPost(
+  url: string,
+  body: object,
+  headers: Record<string, string> = {},
+): Promise<{ status: number; body: string }> {
+  const res = await fetch(url, {
+    method: "POST",
+    headers: { "Content-Type": "application/json", ...headers },
+    body: JSON.stringify(body),
+  });
+  return { status: res.status, body: await res.text() };
+}
+
+// ---------------------------------------------------------------------------
+// Shared state
+// ---------------------------------------------------------------------------
+
+let instance: ServerInstance | null = null;
+
+afterEach(async () => {
+  if (instance) {
+    await new Promise<void>((resolve) => instance!.server.close(() => resolve()));
+    instance = null;
+  }
+});
+
+// ---------------------------------------------------------------------------
+// Azure OpenAI deployment URL routing
+// ---------------------------------------------------------------------------
+
+describe("Azure OpenAI: chat completions via deployment URL", () => {
+  it("routes /openai/deployments/{id}/chat/completions to completions handler", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "hello" },
+        response: { content: "Azure says hi!" },
+      },
+    ];
+    instance = await createServer(fixtures);
+
+    const { status, body } = await httpPost(
+      `${instance.url}/openai/deployments/gpt-4o/chat/completions?api-version=2024-10-21`,
+      {
+        model: "gpt-4o",
+        stream: false,
+        messages: [{ role: "user", content: "hello" }],
+      },
+    );
+
+    expect(status).toBe(200);
+    const parsed = JSON.parse(body);
+    expect(parsed.choices).toBeDefined();
+    expect(parsed.choices[0].message.content).toBe("Azure says hi!");
+    expect(parsed.object).toBe("chat.completion");
+  });
+
+  it("uses deployment ID as model fallback when body omits model", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { model: "my-gpt4-deployment", userMessage: "hello" },
+        response: { content: "Matched by deployment ID!" },
+      },
+    ];
+    instance = await createServer(fixtures);
+
+    const { status, body } = await httpPost(
+      `${instance.url}/openai/deployments/my-gpt4-deployment/chat/completions?api-version=2024-10-21`,
+      {
+        // No model field — Azure deployments often omit it
+        stream: false,
+        messages: [{ role: "user", content: "hello" }],
+      },
+    );
+
+    expect(status).toBe(200);
+    const parsed = JSON.parse(body);
+    expect(parsed.choices[0].message.content).toBe("Matched by deployment ID!");
+  });
+
+  it("body model takes precedence over deployment ID", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { model: "gpt-4o", userMessage: "hello" },
+        response: { content: "Matched body model!" },
+      },
+      {
+        match: { model: "my-deployment", userMessage: "hello" },
+        response: { content: "Matched deployment ID!" },
+      },
+    ];
+    instance = await createServer(fixtures);
+
+    const { status, body } = await httpPost(
+      `${instance.url}/openai/deployments/my-deployment/chat/completions?api-version=2024-10-21`,
+      {
+        model: "gpt-4o",
+        stream: false,
+        messages: [{ role: "user", content: "hello" }],
+      },
+    );
+
+    expect(status).toBe(200);
+    const parsed = JSON.parse(body);
+    expect(parsed.choices[0].message.content).toBe("Matched body model!");
+  });
+});
+
+describe("Azure OpenAI: embeddings via deployment URL", () => {
+  it("routes /openai/deployments/{id}/embeddings to embeddings handler", async () => {
+    instance = await createServer([]);
+
+    const { status, body } = await httpPost(
+      `${instance.url}/openai/deployments/text-embedding-ada-002/embeddings?api-version=2024-10-21`,
+      {
+        model: "text-embedding-ada-002",
+        input: "hello world",
+      },
+    );
+
+    expect(status).toBe(200);
+    const parsed = JSON.parse(body);
+    expect(parsed.object).toBe("list");
+    expect(parsed.data[0].embedding).toBeInstanceOf(Array);
+    expect(parsed.data[0].embedding.length).toBeGreaterThan(0);
+  });
+
+  it("uses deployment ID as model fallback for embeddings when body omits model", async () => {
+    instance = await createServer([]);
+
+    const { status, body } = await httpPost(
+      `${instance.url}/openai/deployments/text-embedding-ada-002/embeddings?api-version=2024-10-21`,
+      {
+        // No model field
+        input: "hello world",
+      },
+    );
+
+    expect(status).toBe(200);
+    const parsed = JSON.parse(body);
+    expect(parsed.object).toBe("list");
+    expect(parsed.model).toBe("text-embedding-ada-002");
+  });
+});
+
+describe("Azure OpenAI: api-version query param", () => {
+  it("accepts any api-version value", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "hello" },
+        response: { content: "Works!" },
+      },
+    ];
+    instance = await createServer(fixtures);
+
+    const { status } = await httpPost(
+      `${instance.url}/openai/deployments/gpt-4o/chat/completions?api-version=2023-05-15`,
+      {
+        model: "gpt-4o",
+        stream: false,
+        messages: [{ role: "user", content: "hello" }],
+      },
+    );
+
+    expect(status).toBe(200);
+  });
+
+  it("works without api-version param", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "hello" },
+        response: { content: "Works!" },
+      },
+    ];
+    instance = await createServer(fixtures);
+
+    const { status } = await httpPost(
+      `${instance.url}/openai/deployments/gpt-4o/chat/completions`,
+      {
+        model: "gpt-4o",
+        stream: false,
+        messages: [{ role: "user", content: "hello" }],
+      },
+    );
+
+    expect(status).toBe(200);
+  });
+});
+
+describe("Azure OpenAI: api-key header", () => {
+  it("accepts api-key header (Azure-style auth)", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "hello" },
+        response: { content: "Authenticated!" },
+      },
+    ];
+    instance = await createServer(fixtures);
+
+    const { status, body } = await httpPost(
+      `${instance.url}/openai/deployments/gpt-4o/chat/completions?api-version=2024-10-21`,
+      {
+        model: "gpt-4o",
+        stream: false,
+        messages: [{ role: "user", content: "hello" }],
+      },
+      { "api-key": "mock-azure-key" },
+    );
+
+    expect(status).toBe(200);
+    const parsed = JSON.parse(body);
+    expect(parsed.choices[0].message.content).toBe("Authenticated!");
+  });
+
+  it("accepts Authorization Bearer header (also valid for Azure)", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "hello" },
+        response: { content: "Bearer works!" },
+      },
+    ];
+    instance = await createServer(fixtures);
+
+    const { status, body } = await httpPost(
+      `${instance.url}/openai/deployments/gpt-4o/chat/completions?api-version=2024-10-21`,
+      {
+        model: "gpt-4o",
+        stream: false,
+        messages: [{ role: "user", content: "hello" }],
+      },
+      { Authorization: "Bearer mock-token" },
+    );
+
+    expect(status).toBe(200);
+    const parsed = JSON.parse(body);
+    expect(parsed.choices[0].message.content).toBe("Bearer works!");
+  });
+});
+
+describe("Azure OpenAI: journal recording", () => {
+  it("records Azure deployment requests in journal", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "journal-test" },
+        response: { content: "Recorded!" },
+      },
+    ];
+    instance = await createServer(fixtures);
+
+    await httpPost(
+      `${instance.url}/openai/deployments/gpt-4o/chat/completions?api-version=2024-10-21`,
+      {
+        model: "gpt-4o",
+        stream: false,
+        messages: [{ role: "user", content: "journal-test" }],
+      },
+    );
+
+    expect(instance.journal.size).toBe(1);
+    const entry = instance.journal.getLast();
+    expect(entry).toBeDefined();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(200);
+  });
+});
+
+describe("Azure OpenAI: streaming", () => {
+  it("streaming through Azure deployment path", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "stream-test" },
+        response: { content: "Azure streamed!" },
+      },
+    ];
+    instance = await createServer(fixtures);
+
+    const { status, body } = await httpPost(
+      `${instance.url}/openai/deployments/my-gpt4/chat/completions?api-version=2024-02-01`,
+      {
+        model: "gpt-4",
+        stream: true,
+        messages: [{ role: "user", content: "stream-test" }],
+      },
+    );
+
+    expect(status).toBe(200);
+
+    // Parse SSE events
+    const events: unknown[] = [];
+    for (const line of body.split("\n")) {
+      if (line.startsWith("data: ") && line !== "data: [DONE]") {
+        events.push(JSON.parse(line.slice(6)));
+      }
+    }
+
+    expect(events.length).toBeGreaterThanOrEqual(3);
+
+    // All chunks should have chat.completion.chunk object type
+    for (const event of events) {
+      const ev = event as { object: string };
+      expect(ev.object).toBe("chat.completion.chunk");
+    }
+
+    // Content should be present across the chunks
+    const contentParts = events
+      .map((e) => (e as { choices: [{ delta: { content?: string } }] }).choices[0].delta.content)
+      .filter(Boolean);
+    expect(contentParts.join("")).toBe("Azure streamed!");
+
+    // Body ends with [DONE]
+    expect(body).toContain("data: [DONE]");
+  });
+});
+
+describe("Azure OpenAI: 404 when no fixture matches", () => {
+  it("returns 404 when no fixture matches the request", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { model: "specific-model", userMessage: "specific" },
+        response: { content: "Specific!" },
+      },
+    ];
+    instance = await createServer(fixtures);
+
+    const { status, body } = await httpPost(
+      `${instance.url}/openai/deployments/gpt-4o/chat/completions?api-version=2024-10-21`,
+      {
+        model: "gpt-4o",
+        stream: false,
+        messages: [{ role: "user", content: "no match here" }],
+      },
+    );
+
+    expect(status).toBe(404);
+    const parsed = JSON.parse(body);
+    expect(parsed.error.code).toBe("no_fixture_match");
+  });
+});
diff --git a/src/__tests__/bedrock-stream.test.ts b/src/__tests__/bedrock-stream.test.ts
new file mode 100644
index 0000000..0fa3f03
--- /dev/null
+++ b/src/__tests__/bedrock-stream.test.ts
@@ -0,0 +1,1155 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import { crc32 } from "node:zlib";
+import type { Fixture } from "../types.js";
+import { createServer, type ServerInstance } from "../server.js";
+import { converseToCompletionRequest } from "../bedrock-converse.js";
+
+// --- helpers ---
+
+function post(
+  url: string,
+  body: unknown,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const data = JSON.stringify(body);
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(data),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(data);
+    req.end();
+  });
+}
+
+function postBinary(
+  url: string,
+  body: unknown,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: Buffer }> {
+  return new Promise((resolve, reject) => {
+    const data = JSON.stringify(body);
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(data),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(data);
+    req.end();
+  });
+}
+
+/**
+ * Parse sequential binary Event Stream frames from a buffer.
+ */
+interface ParsedFrame {
+  eventType: string;
+  messageType: string;
+  payload: unknown;
+  preludeCrc: { expected: number; actual: number };
+  messageCrc: { expected: number; actual: number };
+}
+
+function parseFrames(buf: Buffer): ParsedFrame[] {
+  const frames: ParsedFrame[] = [];
+  let offset = 0;
+
+  while (offset < buf.length) {
+    const totalLength = buf.readUInt32BE(offset);
+    const frame = buf.subarray(offset, offset + totalLength);
+
+    // Compute CRCs for later assertion
+    const computedPreludeCrc = crc32(frame.subarray(0, 8)) >>> 0;
+    const storedPreludeCrc = frame.readUInt32BE(8);
+    const computedMessageCrc = crc32(frame.subarray(0, totalLength - 4)) >>> 0;
+    const storedMessageCrc = frame.readUInt32BE(totalLength - 4);
+
+    // Parse headers
+    const headersLength = frame.readUInt32BE(4);
+    const headersStart = 12;
+    const headersEnd = headersStart + headersLength;
+    const headers: Record<string, string> = {};
+    let hOffset = headersStart;
+    while (hOffset < headersEnd) {
+      const nameLen = frame.readUInt8(hOffset);
+      hOffset += 1;
+      const name = frame.subarray(hOffset, hOffset + nameLen).toString("utf8");
+      hOffset += nameLen;
+      hOffset += 1; // type byte (7 = STRING)
+      const valueLen = frame.readUInt16BE(hOffset);
+      hOffset += 2;
+      const value = frame.subarray(hOffset, hOffset + valueLen).toString("utf8");
+      hOffset += valueLen;
+      headers[name] = value;
+    }
+
+    // Parse payload
+    const payloadStart = headersEnd;
+    const payloadEnd = totalLength - 4;
+    const payloadBuf = frame.subarray(payloadStart, payloadEnd);
+    let payload: unknown = null;
+    if (payloadBuf.length > 0) {
+      payload = JSON.parse(payloadBuf.toString("utf8"));
+    }
+
+    frames.push({
+      eventType: headers[":event-type"] ?? "",
+      messageType: headers[":message-type"] ?? "",
+      payload,
+      preludeCrc: { expected: storedPreludeCrc, actual: computedPreludeCrc },
+      messageCrc: { expected: storedMessageCrc, actual: computedMessageCrc },
+    });
+
+    offset += totalLength;
+  }
+
+  return frames;
+}
+
+function postPartialBinary(
+  url: string,
+  body: unknown,
+): Promise<{ body: Buffer; aborted: boolean }> {
+  return new Promise((resolve) => {
+    const data = JSON.stringify(body);
+    const parsed = new URL(url);
+    const chunks: Buffer[] = [];
+    let aborted = false;
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(data),
+        },
+      },
+      (res) => {
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({ body: Buffer.concat(chunks), aborted });
+        });
+        res.on("error", () => {
+          aborted = true;
+        });
+        res.on("aborted", () => {
+          aborted = true;
+        });
+        res.on("close", () => {
+          resolve({ body: Buffer.concat(chunks), aborted });
+        });
+      },
+    );
+    req.on("error", () => {
+      aborted = true;
+      resolve({ body: Buffer.concat(chunks), aborted });
+    });
+    req.write(data);
+    req.end();
+  });
+}
+
+// --- fixtures ---
+
+const textFixture: Fixture = {
+  match: { userMessage: "hello" },
+  response: { content: "Hi there!" },
+};
+
+const toolFixture: Fixture = {
+  match: { userMessage: "weather" },
+  response: {
+    toolCalls: [
+      {
+        name: "get_weather",
+        arguments: '{"city":"SF"}',
+      },
+    ],
+  },
+};
+
+const errorFixture: Fixture = {
+  match: { userMessage: "fail" },
+  response: {
+    error: {
+      message: "Rate limited",
+      type: "rate_limit_error",
+    },
+    status: 429,
+  },
+};
+
+const allFixtures: Fixture[] = [textFixture, toolFixture, errorFixture];
+
+// --- test lifecycle ---
+
+let instance: ServerInstance | null = null;
+
+afterEach(async () => {
+  if (instance) {
+    await new Promise<void>((resolve) => {
+      instance!.server.close(() => resolve());
+    });
+    instance = null;
+  }
+});
+
+// ─── invoke-with-response-stream ────────────────────────────────────────────
+
+describe("POST /model/{modelId}/invoke-with-response-stream", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns text response as binary Event Stream frames", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postBinary(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 512,
+      messages: [{ role: "user", content: "hello" }],
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/vnd.amazon.eventstream");
+
+    const frames = parseFrames(res.body);
+    expect(frames.length).toBeGreaterThanOrEqual(5);
+
+    // messageStart
+    expect(frames[0].eventType).toBe("messageStart");
+    expect(frames[0].payload).toEqual({ role: "assistant" });
+
+    // contentBlockStart
+    expect(frames[1].eventType).toBe("contentBlockStart");
+    expect(frames[1].payload).toEqual({ contentBlockIndex: 0, start: {} });
+
+    // Content delta(s) — collect text
+    const deltas = frames.filter((f) => f.eventType === "contentBlockDelta");
+    expect(deltas.length).toBeGreaterThanOrEqual(1);
+    const fullText = deltas
+      .map((f) => (f.payload as { delta: { text: string } }).delta.text)
+      .join("");
+    expect(fullText).toBe("Hi there!");
+
+    // contentBlockStop
+    const stopBlock = frames.find((f) => f.eventType === "contentBlockStop");
+    expect(stopBlock).toBeDefined();
+    expect(stopBlock!.payload).toEqual({ contentBlockIndex: 0 });
+
+    // messageStop
+    const msgStop = frames.find((f) => f.eventType === "messageStop");
+    expect(msgStop).toBeDefined();
+    expect(msgStop!.payload).toEqual({ stopReason: "end_turn" });
+  });
+
+  it("returns tool call response as binary Event Stream frames", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postBinary(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 512,
+      messages: [{ role: "user", content: "weather" }],
+    });
+
+    expect(res.status).toBe(200);
+    const frames = parseFrames(res.body);
+
+    // messageStart
+    expect(frames[0].eventType).toBe("messageStart");
+    expect(frames[0].payload).toEqual({ role: "assistant" });
+
+    // contentBlockStart with toolUse
+    expect(frames[1].eventType).toBe("contentBlockStart");
+    const startPayload = frames[1].payload as {
+      contentBlockIndex: number;
+      start: { toolUse: { toolUseId: string; name: string } };
+    };
+    expect(startPayload.contentBlockIndex).toBe(0);
+    expect(startPayload.start.toolUse.name).toBe("get_weather");
+    expect(startPayload.start.toolUse.toolUseId).toBeDefined();
+
+    // contentBlockDelta(s) with input_json_delta
+    const deltas = frames.filter((f) => f.eventType === "contentBlockDelta");
+    expect(deltas.length).toBeGreaterThanOrEqual(1);
+    const fullJson = deltas
+      .map((f) => (f.payload as { delta: { inputJSON: string } }).delta.inputJSON)
+      .join("");
+    expect(JSON.parse(fullJson)).toEqual({ city: "SF" });
+
+    // messageStop
+    const msgStop = frames.find((f) => f.eventType === "messageStop");
+    expect(msgStop!.payload).toEqual({ stopReason: "tool_use" });
+  });
+
+  it("Content-Type is application/vnd.amazon.eventstream", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postBinary(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 512,
+      messages: [{ role: "user", content: "hello" }],
+    });
+
+    expect(res.headers["content-type"]).toBe("application/vnd.amazon.eventstream");
+  });
+
+  it("binary frames have valid CRC32 checksums", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postBinary(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 512,
+      messages: [{ role: "user", content: "hello" }],
+    });
+
+    const frames = parseFrames(res.body);
+    expect(frames.length).toBeGreaterThan(0);
+    for (const frame of frames) {
+      expect(frame.preludeCrc.actual).toBe(frame.preludeCrc.expected);
+      expect(frame.messageCrc.actual).toBe(frame.messageCrc.expected);
+    }
+  });
+
+  it("returns error fixture with correct status", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 512,
+      messages: [{ role: "user", content: "fail" }],
+    });
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Rate limited");
+  });
+
+  it("returns 404 when no fixture matches", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 512,
+      messages: [{ role: "user", content: "nomatch" }],
+    });
+
+    expect(res.status).toBe(404);
+  });
+
+  it("returns 400 for malformed JSON", async () => {
+    instance = await createServer(allFixtures);
+    const parsed = new URL(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`);
+    const res = await new Promise<{ status: number; body: string }>((resolve, reject) => {
+      const raw = "{not valid";
+      const req = http.request(
+        {
+          hostname: parsed.hostname,
+          port: parsed.port,
+          path: parsed.pathname,
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            "Content-Length": Buffer.byteLength(raw),
+          },
+        },
+        (r) => {
+          const chunks: Buffer[] = [];
+          r.on("data", (c: Buffer) => chunks.push(c));
+          r.on("end", () => {
+            resolve({
+              status: r.statusCode ?? 0,
+              body: Buffer.concat(chunks).toString(),
+            });
+          });
+        },
+      );
+      req.on("error", reject);
+      req.write(raw);
+      req.end();
+    });
+
+    expect(res.status).toBe(400);
+  });
+});
+
+// ─── invoke-with-response-stream: missing messages ──────────────────────────
+
+describe("POST /model/{modelId}/invoke-with-response-stream (missing messages)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("23. returns 400 for empty body (no messages)", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {});
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("messages");
+  });
+});
+
+// ─── invoke-with-response-stream: multiple tool calls ───────────────────────
+
+describe("POST /model/{modelId}/invoke-with-response-stream (multiple tool calls)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("24. emits correct contentBlockIndex for 2 tool calls", async () => {
+    const multiToolFixture: Fixture = {
+      match: { userMessage: "multi-tool" },
+      response: {
+        toolCalls: [
+          { name: "get_weather", arguments: '{"city":"NYC"}' },
+          { name: "get_time", arguments: '{"tz":"EST"}' },
+        ],
+      },
+    };
+    instance = await createServer([multiToolFixture]);
+    const res = await postBinary(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 512,
+      messages: [{ role: "user", content: "multi-tool" }],
+    });
+
+    expect(res.status).toBe(200);
+    const frames = parseFrames(res.body);
+
+    // Find contentBlockStart frames
+    const blockStarts = frames.filter((f) => f.eventType === "contentBlockStart");
+    expect(blockStarts.length).toBeGreaterThanOrEqual(2);
+
+    // First tool at contentBlockIndex 0
+    const start0 = blockStarts[0].payload as {
+      contentBlockIndex: number;
+      start: { toolUse: { name: string } };
+    };
+    expect(start0.contentBlockIndex).toBe(0);
+    expect(start0.start.toolUse.name).toBe("get_weather");
+
+    // Second tool at contentBlockIndex 1
+    const start1 = blockStarts[1].payload as {
+      contentBlockIndex: number;
+      start: { toolUse: { name: string } };
+    };
+    expect(start1.contentBlockIndex).toBe(1);
+    expect(start1.start.toolUse.name).toBe("get_time");
+
+    // contentBlockStop should also have correct indices
+    const blockStops = frames.filter((f) => f.eventType === "contentBlockStop");
+    expect(blockStops.length).toBeGreaterThanOrEqual(2);
+    expect((blockStops[0].payload as { contentBlockIndex: number }).contentBlockIndex).toBe(0);
+    expect((blockStops[1].payload as { contentBlockIndex: number }).contentBlockIndex).toBe(1);
+
+    // messageStop should indicate tool_use
+    const msgStop = frames.find((f) => f.eventType === "messageStop");
+    expect(msgStop!.payload).toEqual({ stopReason: "tool_use" });
+  });
+});
+
+// ─── invoke-with-response-stream: interruption ─────────────────────────────
+
+describe("POST /model/{modelId}/invoke-with-response-stream (interruption)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("truncateAfterChunks truncates the stream", async () => {
+    const truncatedFixture: Fixture = {
+      match: { userMessage: "hello" },
+      response: { content: "Hello, World! This is a longer message for chunking." },
+      chunkSize: 5,
+      truncateAfterChunks: 3,
+    };
+    instance = await createServer([truncatedFixture]);
+
+    const res = await postPartialBinary(
+      `${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`,
+      {
+        anthropic_version: "bedrock-2023-05-31",
+        max_tokens: 512,
+        messages: [{ role: "user", content: "hello" }],
+      },
+    );
+
+    // Stream was truncated — res.destroy() causes abrupt close
+    expect(res.aborted).toBe(true);
+
+    // Journal should record interruption
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+});
+
+// ─── invoke-with-response-stream: chaos ─────────────────────────────────────
+
+describe("POST /model/{modelId}/invoke-with-response-stream (chaos)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("chaos drops requests when dropRate is 1", async () => {
+    instance = await createServer(allFixtures, { chaos: { dropRate: 1.0 } });
+    const res = await post(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 512,
+      messages: [{ role: "user", content: "hello" }],
+    });
+
+    // Chaos drop returns 500 with server_error
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.type).toBe("server_error");
+  });
+});
+
+// ─── Converse non-streaming ─────────────────────────────────────────────────
+
+describe("POST /model/{modelId}/converse (non-streaming)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns text response in Converse format", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, {
+      messages: [{ role: "user", content: [{ text: "hello" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/json");
+
+    const body = JSON.parse(res.body);
+    expect(body.output.message.role).toBe("assistant");
+    expect(body.output.message.content).toHaveLength(1);
+    expect(body.output.message.content[0].text).toBe("Hi there!");
+    expect(body.stopReason).toBe("end_turn");
+    expect(body.usage).toEqual({ inputTokens: 0, outputTokens: 0, totalTokens: 0 });
+  });
+
+  it("returns tool call response in Converse format", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, {
+      messages: [{ role: "user", content: [{ text: "weather" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.output.message.role).toBe("assistant");
+    expect(body.output.message.content).toHaveLength(1);
+    expect(body.output.message.content[0].toolUse.name).toBe("get_weather");
+    expect(body.output.message.content[0].toolUse.input).toEqual({ city: "SF" });
+    expect(body.output.message.content[0].toolUse.toolUseId).toBeDefined();
+    expect(body.stopReason).toBe("tool_use");
+  });
+
+  it("returns 404 when no fixture matches", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, {
+      messages: [{ role: "user", content: [{ text: "nomatch" }] }],
+    });
+
+    expect(res.status).toBe(404);
+  });
+
+  it("returns 400 for missing messages", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, {});
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Invalid request: messages array is required");
+  });
+
+  it("chaos applies to converse endpoint", async () => {
+    instance = await createServer(allFixtures, { chaos: { dropRate: 1.0 } });
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, {
+      messages: [{ role: "user", content: [{ text: "hello" }] }],
+    });
+
+    expect(res.status).toBe(500);
+  });
+});
+
+// ─── Converse streaming ─────────────────────────────────────────────────────
+
+describe("POST /model/{modelId}/converse-stream", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns text response as Event Stream", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postBinary(`${instance.url}/model/${MODEL_ID}/converse-stream`, {
+      messages: [{ role: "user", content: [{ text: "hello" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/vnd.amazon.eventstream");
+
+    const frames = parseFrames(res.body);
+
+    // Verify event sequence
+    expect(frames[0].eventType).toBe("messageStart");
+    expect(frames[0].payload).toEqual({ role: "assistant" });
+
+    expect(frames[1].eventType).toBe("contentBlockStart");
+
+    const deltas = frames.filter((f) => f.eventType === "contentBlockDelta");
+    const fullText = deltas
+      .map((f) => (f.payload as { delta: { text: string } }).delta.text)
+      .join("");
+    expect(fullText).toBe("Hi there!");
+
+    const msgStop = frames.find((f) => f.eventType === "messageStop");
+    expect(msgStop!.payload).toEqual({ stopReason: "end_turn" });
+  });
+
+  it("returns tool call response as Event Stream", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postBinary(`${instance.url}/model/${MODEL_ID}/converse-stream`, {
+      messages: [{ role: "user", content: [{ text: "weather" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    const frames = parseFrames(res.body);
+
+    expect(frames[0].eventType).toBe("messageStart");
+
+    const startFrame = frames.find((f) => f.eventType === "contentBlockStart");
+    const startPayload = startFrame!.payload as {
+      contentBlockIndex: number;
+      start: { toolUse: { toolUseId: string; name: string } };
+    };
+    expect(startPayload.start.toolUse.name).toBe("get_weather");
+
+    const deltas = frames.filter((f) => f.eventType === "contentBlockDelta");
+    const fullJson = deltas
+      .map((f) => (f.payload as { delta: { inputJSON: string } }).delta.inputJSON)
+      .join("");
+    expect(JSON.parse(fullJson)).toEqual({ city: "SF" });
+
+    const msgStop = frames.find((f) => f.eventType === "messageStop");
+    expect(msgStop!.payload).toEqual({ stopReason: "tool_use" });
+  });
+
+  it("supports streaming profile (ttft/tps)", async () => {
+    const profileFixture: Fixture = {
+      match: { userMessage: "hello" },
+      response: { content: "Hi" },
+      streamingProfile: { ttft: 0, tps: 10000 },
+    };
+    instance = await createServer([profileFixture]);
+
+    const res = await postBinary(`${instance.url}/model/${MODEL_ID}/converse-stream`, {
+      messages: [{ role: "user", content: [{ text: "hello" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    const frames = parseFrames(res.body);
+    expect(frames.length).toBeGreaterThan(0);
+  });
+
+  it("truncateAfterChunks interrupts the stream", async () => {
+    const truncatedFixture: Fixture = {
+      match: { userMessage: "hello" },
+      response: { content: "Hello, World! This is a longer message." },
+      chunkSize: 5,
+      truncateAfterChunks: 2,
+    };
+    instance = await createServer([truncatedFixture]);
+
+    const res = await postPartialBinary(`${instance.url}/model/${MODEL_ID}/converse-stream`, {
+      messages: [{ role: "user", content: [{ text: "hello" }] }],
+    });
+
+    // Stream was truncated — res.destroy() causes abrupt close
+    expect(res.aborted).toBe(true);
+
+    // Journal should record interruption
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+
+  it("chaos applies to converse-stream endpoint", async () => {
+    instance = await createServer(allFixtures, { chaos: { dropRate: 1.0 } });
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse-stream`, {
+      messages: [{ role: "user", content: [{ text: "hello" }] }],
+    });
+
+    expect(res.status).toBe(500);
+  });
+});
+
+// ─── converseToCompletionRequest unit tests ─────────────────────────────────
+
+describe("converseToCompletionRequest", () => {
+  it("converts system messages", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [{ role: "user", content: [{ text: "hi" }] }],
+        system: [{ text: "You are a helpful assistant." }],
+      },
+      "anthropic.claude-3-5-sonnet",
+    );
+
+    expect(result.messages[0]).toEqual({
+      role: "system",
+      content: "You are a helpful assistant.",
+    });
+    expect(result.messages[1]).toEqual({ role: "user", content: "hi" });
+  });
+
+  it("concatenates multiple system blocks", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [{ role: "user", content: [{ text: "hi" }] }],
+        system: [{ text: "You are " }, { text: "a helpful assistant." }],
+      },
+      "anthropic.claude-3-5-sonnet",
+    );
+
+    expect(result.messages[0]).toEqual({
+      role: "system",
+      content: "You are a helpful assistant.",
+    });
+  });
+
+  it("converts user messages with text content", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [{ role: "user", content: [{ text: "Hello" }, { text: " World" }] }],
+      },
+      "model-id",
+    );
+
+    expect(result.messages[0]).toEqual({ role: "user", content: "Hello World" });
+  });
+
+  it("converts tool results in user messages", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "user",
+            content: [
+              {
+                toolResult: {
+                  toolUseId: "toolu_123",
+                  content: [{ text: "72F and sunny" }],
+                },
+              },
+              { text: "Tell me more" },
+            ],
+          },
+        ],
+      },
+      "model-id",
+    );
+
+    expect(result.messages[0]).toEqual({
+      role: "tool",
+      content: "72F and sunny",
+      tool_call_id: "toolu_123",
+    });
+    expect(result.messages[1]).toEqual({
+      role: "user",
+      content: "Tell me more",
+    });
+  });
+
+  it("converts assistant messages with toolUse blocks", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [
+          { role: "user", content: [{ text: "search" }] },
+          {
+            role: "assistant",
+            content: [
+              { text: "Let me search." },
+              {
+                toolUse: {
+                  toolUseId: "toolu_456",
+                  name: "search",
+                  input: { query: "cats" },
+                },
+              },
+            ],
+          },
+        ],
+      },
+      "model-id",
+    );
+
+    expect(result.messages[1]).toMatchObject({
+      role: "assistant",
+      content: "Let me search.",
+      tool_calls: [
+        {
+          id: "toolu_456",
+          type: "function",
+          function: { name: "search", arguments: '{"query":"cats"}' },
+        },
+      ],
+    });
+  });
+
+  it("converts tool definitions from toolConfig", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [{ role: "user", content: [{ text: "hi" }] }],
+        toolConfig: {
+          tools: [
+            {
+              toolSpec: {
+                name: "get_weather",
+                description: "Get weather for a city",
+                inputSchema: {
+                  type: "object",
+                  properties: { city: { type: "string" } },
+                  required: ["city"],
+                },
+              },
+            },
+          ],
+        },
+      },
+      "model-id",
+    );
+
+    expect(result.tools).toHaveLength(1);
+    expect(result.tools![0]).toEqual({
+      type: "function",
+      function: {
+        name: "get_weather",
+        description: "Get weather for a city",
+        parameters: {
+          type: "object",
+          properties: { city: { type: "string" } },
+          required: ["city"],
+        },
+      },
+    });
+  });
+
+  it("passes through inferenceConfig temperature", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [{ role: "user", content: [{ text: "hi" }] }],
+        inferenceConfig: { temperature: 0.7 },
+      },
+      "model-id",
+    );
+
+    expect(result.temperature).toBe(0.7);
+  });
+
+  it("sets model from modelId parameter", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [{ role: "user", content: [{ text: "hi" }] }],
+      },
+      "anthropic.claude-3-5-sonnet-20241022-v2:0",
+    );
+
+    expect(result.model).toBe("anthropic.claude-3-5-sonnet-20241022-v2:0");
+  });
+});
+
+// ─── Converse edge cases ─────────────────────────────────────────────────────
+
+function postRaw(url: string, raw: string): Promise<{ status: number; body: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(raw),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(raw);
+    req.end();
+  });
+}
+
+describe("POST /model/{modelId}/converse (malformed JSON)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns 400 for malformed JSON body", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postRaw(`${instance.url}/model/${MODEL_ID}/converse`, "{not valid");
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Malformed JSON");
+  });
+});
+
+describe("POST /model/{modelId}/converse-stream (missing messages)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns 400 when messages array is missing", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse-stream`, {});
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Invalid request: messages array is required");
+  });
+});
+
+// ─── invoke-with-response-stream: unknown response type → 500 ──────────────
+
+describe("POST /model/{modelId}/invoke-with-response-stream (unknown response type)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns 500 for embedding fixture on streaming endpoint", async () => {
+    const embeddingFixture: Fixture = {
+      match: { userMessage: "embed-stream" },
+      response: { embedding: [0.1, 0.2, 0.3] },
+    };
+    instance = await createServer([embeddingFixture]);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 512,
+      messages: [{ role: "user", content: "embed-stream" }],
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("did not match any known type");
+  });
+});
+
+// ─── invoke-with-response-stream: malformed tool call arguments ─────────────
+
+describe("POST /model/{modelId}/invoke-with-response-stream (malformed tool args)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("malformed tool call arguments fall back to empty JSON string", async () => {
+    const badArgsFixture: Fixture = {
+      match: { userMessage: "bad-tool-args" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: "NOT VALID JSON" }],
+      },
+    };
+    instance = await createServer([badArgsFixture]);
+    const res = await postBinary(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 512,
+      messages: [{ role: "user", content: "bad-tool-args" }],
+    });
+
+    expect(res.status).toBe(200);
+    const frames = parseFrames(res.body);
+
+    // Find contentBlockDelta frames with inputJSON
+    const deltas = frames.filter((f) => f.eventType === "contentBlockDelta");
+    const fullJson = deltas
+      .map((f) => {
+        const payload = f.payload as { delta: { inputJSON?: string } };
+        return payload.delta.inputJSON ?? "";
+      })
+      .join("");
+    // Malformed arguments should fall back to "{}"
+    expect(fullJson).toBe("{}");
+  });
+});
+
+// ─── invoke-with-response-stream: empty content string ──────────────────────
+
+describe("POST /model/{modelId}/invoke-with-response-stream (empty content)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("empty content produces event sequence with zero content deltas", async () => {
+    const emptyContentFixture: Fixture = {
+      match: { userMessage: "empty-content" },
+      response: { content: "" },
+    };
+    instance = await createServer([emptyContentFixture]);
+    const res = await postBinary(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 512,
+      messages: [{ role: "user", content: "empty-content" }],
+    });
+
+    expect(res.status).toBe(200);
+    const frames = parseFrames(res.body);
+
+    // Should still have messageStart, contentBlockStart, contentBlockStop, messageStop
+    expect(frames[0].eventType).toBe("messageStart");
+    expect(frames.find((f) => f.eventType === "contentBlockStart")).toBeDefined();
+    expect(frames.find((f) => f.eventType === "contentBlockStop")).toBeDefined();
+    expect(frames.find((f) => f.eventType === "messageStop")).toBeDefined();
+
+    // Content deltas should be zero (empty string → no chunks)
+    const deltas = frames.filter((f) => f.eventType === "contentBlockDelta");
+    expect(deltas).toHaveLength(0);
+  });
+});
+
+// ─── converse-stream: malformed JSON → 400 ──────────────────────────────────
+
+describe("POST /model/{modelId}/converse-stream (malformed JSON)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns 400 for malformed JSON body", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postRaw(`${instance.url}/model/${MODEL_ID}/converse-stream`, "{not valid");
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Malformed JSON");
+  });
+});
+
+// ─── Strict mode: converse and converse-stream ──────────────────────────────
+
+describe("POST /model/{modelId}/converse (strict mode)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns 503 in strict mode when no fixture matches", async () => {
+    instance = await createServer([], { strict: true });
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, {
+      messages: [{ role: "user", content: [{ text: "nomatch" }] }],
+    });
+
+    expect(res.status).toBe(503);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Strict mode: no fixture matched");
+  });
+});
+
+describe("POST /model/{modelId}/converse-stream (strict mode)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns 503 in strict mode when no fixture matches", async () => {
+    instance = await createServer([], { strict: true });
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse-stream`, {
+      messages: [{ role: "user", content: [{ text: "nomatch" }] }],
+    });
+
+    expect(res.status).toBe(503);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Strict mode: no fixture matched");
+  });
+});
+
+// ─── Unknown response type through converse and converse-stream ─────────────
+
+describe("POST /model/{modelId}/converse (unknown response type)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns 500 for embedding fixture on converse endpoint", async () => {
+    const embeddingFixture: Fixture = {
+      match: { userMessage: "embed-converse" },
+      response: { embedding: [0.1, 0.2, 0.3] },
+    };
+    instance = await createServer([embeddingFixture]);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, {
+      messages: [{ role: "user", content: [{ text: "embed-converse" }] }],
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("did not match any known type");
+  });
+});
+
+describe("POST /model/{modelId}/converse-stream (unknown response type)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns 500 for embedding fixture on converse-stream endpoint", async () => {
+    const embeddingFixture: Fixture = {
+      match: { userMessage: "embed-stream" },
+      response: { embedding: [0.1, 0.2, 0.3] },
+    };
+    instance = await createServer([embeddingFixture]);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse-stream`, {
+      messages: [{ role: "user", content: [{ text: "embed-stream" }] }],
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("did not match any known type");
+  });
+});
+
+// ─── Error fixture through converse-stream ──────────────────────────────────
+
+describe("POST /model/{modelId}/converse-stream (error fixture)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns error fixture with correct status through /converse-stream", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse-stream`, {
+      messages: [{ role: "user", content: [{ text: "fail" }] }],
+    });
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Rate limited");
+  });
+});
+
+// ─── Error fixture through /converse endpoint ───────────────────────────────
+
+describe("POST /model/{modelId}/converse (error fixture)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns error fixture with correct status through /converse", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, {
+      messages: [{ role: "user", content: [{ text: "fail" }] }],
+    });
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Rate limited");
+  });
+});
diff --git a/src/__tests__/bedrock.test.ts b/src/__tests__/bedrock.test.ts
new file mode 100644
index 0000000..969365c
--- /dev/null
+++ b/src/__tests__/bedrock.test.ts
@@ -0,0 +1,567 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import type { Fixture } from "../types.js";
+import { createServer, type ServerInstance } from "../server.js";
+import { bedrockToCompletionRequest } from "../bedrock.js";
+
+// --- helpers ---
+
+function post(
+  url: string,
+  body: unknown,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const data = JSON.stringify(body);
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(data),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(data);
+    req.end();
+  });
+}
+
+function postRaw(url: string, raw: string): Promise<{ status: number; body: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(raw),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(raw);
+    req.end();
+  });
+}
+
+// --- fixtures ---
+
+const textFixture: Fixture = {
+  match: { userMessage: "hello" },
+  response: { content: "Hi there!" },
+};
+
+const modelFixture: Fixture = {
+  match: { model: "anthropic.claude-3-5-sonnet-20241022-v2:0", userMessage: "greet" },
+  response: { content: "Hello from Bedrock!" },
+};
+
+const toolFixture: Fixture = {
+  match: { userMessage: "weather" },
+  response: {
+    toolCalls: [
+      {
+        name: "get_weather",
+        arguments: '{"city":"SF"}',
+      },
+    ],
+  },
+};
+
+const errorFixture: Fixture = {
+  match: { userMessage: "fail" },
+  response: {
+    error: {
+      message: "Rate limited",
+      type: "rate_limit_error",
+    },
+    status: 429,
+  },
+};
+
+const allFixtures: Fixture[] = [textFixture, modelFixture, toolFixture, errorFixture];
+
+// --- tests ---
+
+let instance: ServerInstance | null = null;
+
+afterEach(async () => {
+  if (instance) {
+    await new Promise<void>((resolve) => {
+      instance!.server.close(() => resolve());
+    });
+    instance = null;
+  }
+});
+
+describe("POST /model/{modelId}/invoke (text response)", () => {
+  it("returns text response in Anthropic Messages format", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        anthropic_version: "bedrock-2023-05-31",
+        max_tokens: 512,
+        messages: [{ role: "user", content: "hello" }],
+      },
+    );
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/json");
+
+    const body = JSON.parse(res.body);
+    expect(body.type).toBe("message");
+    expect(body.role).toBe("assistant");
+    expect(body.id).toMatch(/^msg_/);
+    expect(body.content).toHaveLength(1);
+    expect(body.content[0].type).toBe("text");
+    expect(body.content[0].text).toBe("Hi there!");
+    expect(body.stop_reason).toBe("end_turn");
+    expect(body.stop_sequence).toBeNull();
+    expect(body.usage).toEqual({ input_tokens: 0, output_tokens: 0 });
+  });
+});
+
+describe("POST /model/{modelId}/invoke (tool call response)", () => {
+  it("returns tool call response in Anthropic Messages format", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        anthropic_version: "bedrock-2023-05-31",
+        max_tokens: 512,
+        messages: [{ role: "user", content: "weather" }],
+      },
+    );
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.type).toBe("message");
+    expect(body.stop_reason).toBe("tool_use");
+    expect(body.content).toHaveLength(1);
+    expect(body.content[0].type).toBe("tool_use");
+    expect(body.content[0].name).toBe("get_weather");
+    expect(body.content[0].input).toEqual({ city: "SF" });
+    expect(body.content[0].id).toBeDefined();
+  });
+});
+
+describe("POST /model/{modelId}/invoke (error handling)", () => {
+  it("returns error fixture with correct status", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        anthropic_version: "bedrock-2023-05-31",
+        max_tokens: 512,
+        messages: [{ role: "user", content: "fail" }],
+      },
+    );
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Rate limited");
+  });
+
+  it("returns error in Anthropic format: { type: 'error', error: { type, message } }", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        anthropic_version: "bedrock-2023-05-31",
+        max_tokens: 512,
+        messages: [{ role: "user", content: "fail" }],
+      },
+    );
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    // Bedrock uses Anthropic Messages format for errors
+    expect(body.type).toBe("error");
+    expect(body.error).toBeDefined();
+    expect(body.error.type).toBe("rate_limit_error");
+    expect(body.error.message).toBe("Rate limited");
+    // Should NOT have OpenAI-style fields
+    expect(body.status).toBeUndefined();
+    expect(body.error.code).toBeUndefined();
+  });
+
+  it("returns 404 when no fixture matches", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        anthropic_version: "bedrock-2023-05-31",
+        max_tokens: 512,
+        messages: [{ role: "user", content: "nomatch" }],
+      },
+    );
+
+    expect(res.status).toBe(404);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("No fixture matched");
+  });
+
+  it("returns 400 for malformed JSON", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postRaw(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      "{not valid",
+    );
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Malformed JSON");
+  });
+});
+
+describe("POST /model/{modelId}/invoke (model matching)", () => {
+  it("uses modelId from URL for fixture matching", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        anthropic_version: "bedrock-2023-05-31",
+        max_tokens: 512,
+        messages: [{ role: "user", content: "greet" }],
+      },
+    );
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.content[0].text).toBe("Hello from Bedrock!");
+    expect(body.model).toBe("anthropic.claude-3-5-sonnet-20241022-v2:0");
+  });
+});
+
+describe("POST /model/{modelId}/invoke (journal)", () => {
+  it("records the request in the journal", async () => {
+    instance = await createServer(allFixtures);
+    await post(`${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 512,
+      messages: [{ role: "user", content: "hello" }],
+    });
+
+    expect(instance.journal.size).toBe(1);
+    const entry = instance.journal.getLast();
+    expect(entry!.path).toBe("/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke");
+    expect(entry!.response.status).toBe(200);
+    expect(entry!.response.fixture).toBe(textFixture);
+    expect(entry!.body.model).toBe("anthropic.claude-3-5-sonnet-20241022-v2:0");
+  });
+});
+
+describe("POST /model/{modelId}/invoke (anthropic_version)", () => {
+  it("accepts anthropic_version field without error", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        anthropic_version: "bedrock-2023-05-31",
+        max_tokens: 512,
+        messages: [{ role: "user", content: "hello" }],
+      },
+    );
+
+    expect(res.status).toBe(200);
+  });
+
+  it("works without anthropic_version field", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        max_tokens: 512,
+        messages: [{ role: "user", content: "hello" }],
+      },
+    );
+
+    expect(res.status).toBe(200);
+  });
+});
+
+describe("POST /model/{modelId}/invoke (CORS)", () => {
+  it("includes CORS headers", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        anthropic_version: "bedrock-2023-05-31",
+        max_tokens: 512,
+        messages: [{ role: "user", content: "hello" }],
+      },
+    );
+
+    expect(res.headers["access-control-allow-origin"]).toBe("*");
+  });
+});
+
+describe("POST /model/{modelId}/invoke (structural validation)", () => {
+  it("returns 400 when messages array is missing", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        anthropic_version: "bedrock-2023-05-31",
+        max_tokens: 512,
+      },
+    );
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Invalid request: messages array is required");
+  });
+
+  it("returns 400 when messages is not an array", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        anthropic_version: "bedrock-2023-05-31",
+        max_tokens: 512,
+        messages: "not-an-array",
+      },
+    );
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Invalid request: messages array is required");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// bedrockToCompletionRequest unit tests
+// ---------------------------------------------------------------------------
+
+describe("bedrockToCompletionRequest", () => {
+  it("converts system message (string form)", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [{ role: "user", content: "hi" }],
+        system: "You are a helpful assistant.",
+        max_tokens: 100,
+      },
+      "anthropic.claude-3-5-sonnet",
+    );
+
+    expect(result.messages[0]).toEqual({
+      role: "system",
+      content: "You are a helpful assistant.",
+    });
+    expect(result.messages[1]).toEqual({ role: "user", content: "hi" });
+  });
+
+  it("converts system message (content-block array form)", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [{ role: "user", content: "hi" }],
+        system: [
+          { type: "text", text: "You are " },
+          { type: "text", text: "a helpful assistant." },
+        ],
+        max_tokens: 100,
+      },
+      "anthropic.claude-3-5-sonnet",
+    );
+
+    expect(result.messages[0]).toEqual({
+      role: "system",
+      content: "You are a helpful assistant.",
+    });
+  });
+
+  it("converts multi-turn conversation with tool_result blocks in user messages", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          { role: "user", content: "What is the weather?" },
+          {
+            role: "assistant",
+            content: [
+              {
+                type: "tool_use",
+                id: "toolu_123",
+                name: "get_weather",
+                input: { city: "SF" },
+              },
+            ],
+          },
+          {
+            role: "user",
+            content: [
+              {
+                type: "tool_result",
+                tool_use_id: "toolu_123",
+                content: "72°F and sunny",
+              },
+              {
+                type: "text",
+                text: "Tell me more",
+              },
+            ],
+          },
+        ],
+        max_tokens: 100,
+      },
+      "anthropic.claude-3-5-sonnet",
+    );
+
+    expect(result.messages).toHaveLength(4);
+    expect(result.messages[0]).toEqual({ role: "user", content: "What is the weather?" });
+    expect(result.messages[1]).toMatchObject({
+      role: "assistant",
+      tool_calls: [
+        {
+          id: "toolu_123",
+          type: "function",
+          function: { name: "get_weather", arguments: '{"city":"SF"}' },
+        },
+      ],
+    });
+    expect(result.messages[2]).toEqual({
+      role: "tool",
+      content: "72°F and sunny",
+      tool_call_id: "toolu_123",
+    });
+    expect(result.messages[3]).toEqual({ role: "user", content: "Tell me more" });
+  });
+
+  it("converts assistant messages with tool_use blocks", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          { role: "user", content: "search for cats" },
+          {
+            role: "assistant",
+            content: [
+              { type: "text", text: "Let me search." },
+              {
+                type: "tool_use",
+                id: "toolu_456",
+                name: "search",
+                input: { query: "cats" },
+              },
+            ],
+          },
+        ],
+        max_tokens: 100,
+      },
+      "anthropic.claude-3-5-sonnet",
+    );
+
+    expect(result.messages[1]).toMatchObject({
+      role: "assistant",
+      content: "Let me search.",
+      tool_calls: [
+        {
+          id: "toolu_456",
+          type: "function",
+          function: { name: "search", arguments: '{"query":"cats"}' },
+        },
+      ],
+    });
+  });
+
+  it("passes through tool definitions", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [{ role: "user", content: "hi" }],
+        tools: [
+          {
+            name: "get_weather",
+            description: "Get weather for a city",
+            input_schema: {
+              type: "object",
+              properties: { city: { type: "string" } },
+              required: ["city"],
+            },
+          },
+        ],
+        max_tokens: 100,
+      },
+      "anthropic.claude-3-5-sonnet",
+    );
+
+    expect(result.tools).toHaveLength(1);
+    expect(result.tools![0]).toEqual({
+      type: "function",
+      function: {
+        name: "get_weather",
+        description: "Get weather for a city",
+        parameters: {
+          type: "object",
+          properties: { city: { type: "string" } },
+          required: ["city"],
+        },
+      },
+    });
+  });
+});
+
+// ---------------------------------------------------------------------------
+// strict:true returns 503 for unmatched Bedrock request
+// ---------------------------------------------------------------------------
+
+describe("POST /model/{modelId}/invoke (strict mode)", () => {
+  it("returns 503 with strict message when no fixture matches in strict mode", async () => {
+    instance = await createServer(allFixtures, { strict: true });
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        anthropic_version: "bedrock-2023-05-31",
+        max_tokens: 512,
+        messages: [{ role: "user", content: "nomatch" }],
+      },
+    );
+
+    expect(res.status).toBe(503);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Strict mode: no fixture matched");
+  });
+
+  it("returns 200 when fixture matches even in strict mode", async () => {
+    instance = await createServer(allFixtures, { strict: true });
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        anthropic_version: "bedrock-2023-05-31",
+        max_tokens: 512,
+        messages: [{ role: "user", content: "hello" }],
+      },
+    );
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.content[0].text).toBe("Hi there!");
+  });
+});
diff --git a/src/__tests__/chaos.test.ts b/src/__tests__/chaos.test.ts
new file mode 100644
index 0000000..6bcc01d
--- /dev/null
+++ b/src/__tests__/chaos.test.ts
@@ -0,0 +1,617 @@
+import { describe, it, expect, afterEach, vi } from "vitest";
+import http from "node:http";
+import { evaluateChaos } from "../chaos.js";
+import { createServer, type ServerInstance } from "../server.js";
+import type { Fixture, ChatCompletionRequest } from "../types.js";
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+async function httpPost(
+  url: string,
+  body: object,
+  headers?: Record<string, string>,
+): Promise<{ status: number; body: string }> {
+  return new Promise((resolve, reject) => {
+    const req = http.request(
+      url,
+      {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          ...headers,
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c) => chunks.push(c));
+        res.on("end", () =>
+          resolve({
+            status: res.statusCode!,
+            body: Buffer.concat(chunks).toString(),
+          }),
+        );
+      },
+    );
+    req.on("error", (err) => {
+      // Connection reset/destroyed by chaos disconnect — treat as error
+      reject(err);
+    });
+    req.write(JSON.stringify(body));
+    req.end();
+  });
+}
+
+function chatRequest(userContent: string): ChatCompletionRequest {
+  return {
+    model: "gpt-4",
+    messages: [{ role: "user", content: userContent }],
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Unit tests: evaluateChaos
+// ---------------------------------------------------------------------------
+
+describe("evaluateChaos", () => {
+  it("returns null when no rates are set", () => {
+    const result = evaluateChaos(null, undefined, undefined);
+    expect(result).toBeNull();
+  });
+
+  it("returns null when all rates are 0", () => {
+    const result = evaluateChaos(
+      null,
+      { dropRate: 0, malformedRate: 0, disconnectRate: 0 },
+      undefined,
+    );
+    expect(result).toBeNull();
+  });
+
+  it('returns "drop" when dropRate is 1.0', () => {
+    const result = evaluateChaos(null, { dropRate: 1.0 }, undefined);
+    expect(result).toBe("drop");
+  });
+
+  it('returns "malformed" when malformedRate is 1.0', () => {
+    const result = evaluateChaos(null, { malformedRate: 1.0 }, undefined);
+    expect(result).toBe("malformed");
+  });
+
+  it('returns "disconnect" when disconnectRate is 1.0', () => {
+    const result = evaluateChaos(null, { disconnectRate: 1.0 }, undefined);
+    expect(result).toBe("disconnect");
+  });
+
+  it("checks drop before malformed before disconnect", () => {
+    const result = evaluateChaos(
+      null,
+      { dropRate: 1.0, malformedRate: 1.0, disconnectRate: 1.0 },
+      undefined,
+    );
+    expect(result).toBe("drop");
+  });
+
+  it("fixture chaos overrides server defaults", () => {
+    const fixture: Fixture = {
+      match: { userMessage: "hello" },
+      response: { content: "hi" },
+      chaos: { malformedRate: 1.0 },
+    };
+    // Server says drop, fixture says malformed — fixture wins
+    const result = evaluateChaos(fixture, { dropRate: 0, malformedRate: 0 }, undefined);
+    expect(result).toBe("malformed");
+  });
+
+  it("header overrides fixture and server defaults", () => {
+    const fixture: Fixture = {
+      match: { userMessage: "hello" },
+      response: { content: "hi" },
+      chaos: { malformedRate: 1.0 },
+    };
+    // Fixture says malformed, header says disconnect
+    const headers: http.IncomingHttpHeaders = {
+      "x-llmock-chaos-malformed": "0",
+      "x-llmock-chaos-disconnect": "1.0",
+    };
+    const result = evaluateChaos(fixture, undefined, headers);
+    expect(result).toBe("disconnect");
+  });
+
+  it("header drop overrides everything", () => {
+    const headers: http.IncomingHttpHeaders = {
+      "x-llmock-chaos-drop": "1.0",
+    };
+    const result = evaluateChaos(null, undefined, headers);
+    expect(result).toBe("drop");
+  });
+
+  it("clamps rate > 1 to 1.0 (always triggers)", () => {
+    // dropRate 5.0 should be clamped to 1.0, so it always triggers
+    const fixture: Fixture = {
+      match: { userMessage: "hello" },
+      response: { content: "hi" },
+      chaos: { dropRate: 5.0 },
+    };
+    // Run 20 times — every single one must return "drop"
+    for (let i = 0; i < 20; i++) {
+      const result = evaluateChaos(fixture, undefined, undefined);
+      expect(result).toBe("drop");
+    }
+  });
+
+  it("clamps negative rate to 0 (never triggers)", () => {
+    // dropRate -1.0 should be clamped to 0, so it never triggers
+    const fixture: Fixture = {
+      match: { userMessage: "hello" },
+      response: { content: "hi" },
+      chaos: { dropRate: -1.0 },
+    };
+    // Run 50 times — none should trigger
+    for (let i = 0; i < 50; i++) {
+      const result = evaluateChaos(fixture, undefined, undefined);
+      expect(result).toBeNull();
+    }
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Unit tests: evaluateChaos — header value clamping and validation
+// ---------------------------------------------------------------------------
+
+describe("evaluateChaos — header value clamping and validation", () => {
+  it("ignores NaN header value (e.g., 'banana') and does not trigger chaos", () => {
+    // "banana" parses to NaN via parseFloat — should be ignored, not crash
+    const headers: http.IncomingHttpHeaders = {
+      "x-llmock-chaos-drop": "banana",
+    };
+    // Run 20 times — none should trigger (NaN ignored means no rate set)
+    for (let i = 0; i < 20; i++) {
+      const result = evaluateChaos(null, undefined, headers);
+      expect(result).toBeNull();
+    }
+  });
+
+  it("clamps header drop value > 1 to 1.0 (always triggers)", () => {
+    const headers: http.IncomingHttpHeaders = {
+      "x-llmock-chaos-drop": "2.0",
+    };
+    // Run 20 times — every one must trigger since clamped to 1.0
+    for (let i = 0; i < 20; i++) {
+      const result = evaluateChaos(null, undefined, headers);
+      expect(result).toBe("drop");
+    }
+  });
+
+  it("clamps header drop value < 0 to 0 (never triggers)", () => {
+    const headers: http.IncomingHttpHeaders = {
+      "x-llmock-chaos-drop": "-1.0",
+    };
+    // Run 50 times — none should trigger since clamped to 0
+    for (let i = 0; i < 50; i++) {
+      const result = evaluateChaos(null, undefined, headers);
+      expect(result).toBeNull();
+    }
+  });
+
+  it("clamps header malformed value > 1 to 1.0 (always triggers)", () => {
+    const headers: http.IncomingHttpHeaders = {
+      "x-llmock-chaos-malformed": "5.0",
+    };
+    for (let i = 0; i < 20; i++) {
+      const result = evaluateChaos(null, undefined, headers);
+      expect(result).toBe("malformed");
+    }
+  });
+
+  it("clamps header disconnect value > 1 to 1.0 (always triggers)", () => {
+    const headers: http.IncomingHttpHeaders = {
+      "x-llmock-chaos-disconnect": "99.0",
+    };
+    for (let i = 0; i < 20; i++) {
+      const result = evaluateChaos(null, undefined, headers);
+      expect(result).toBe("disconnect");
+    }
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Integration tests: chaos through HTTP server
+// ---------------------------------------------------------------------------
+
+let instance: ServerInstance | null = null;
+
+afterEach(async () => {
+  if (instance) {
+    await new Promise<void>((resolve) => instance!.server.close(() => resolve()));
+    instance = null;
+  }
+});
+
+describe("chaos integration: server-level", () => {
+  it("returns 500 for all requests when dropRate is 1.0", async () => {
+    const fixtures: Fixture[] = [
+      { match: { userMessage: "hello" }, response: { content: "Hi there" } },
+    ];
+    instance = await createServer(fixtures, { chaos: { dropRate: 1.0 } });
+
+    const res = await httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello"));
+    expect(res.status).toBe(500);
+
+    const body = JSON.parse(res.body);
+    expect(body.error.code).toBe("chaos_drop");
+  });
+});
+
+describe("chaos integration: fixture-level", () => {
+  it("returns malformed JSON when fixture has malformedRate 1.0", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "hello" },
+        response: { content: "Hi there" },
+        chaos: { malformedRate: 1.0 },
+      },
+    ];
+    instance = await createServer(fixtures);
+
+    const res = await httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello"));
+    expect(res.status).toBe(200);
+
+    // Body should be malformed JSON — parsing should throw
+    expect(() => JSON.parse(res.body)).toThrow();
+    expect(res.body).toContain("malformed");
+  });
+});
+
+describe("chaos integration: header override", () => {
+  it("drops request when X-LLMock-Chaos-Drop header is 1.0", async () => {
+    const fixtures: Fixture[] = [
+      { match: { userMessage: "hello" }, response: { content: "Hi there" } },
+    ];
+    instance = await createServer(fixtures);
+
+    const res = await httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello"), {
+      "X-LLMock-Chaos-Drop": "1.0",
+    });
+    expect(res.status).toBe(500);
+
+    const body = JSON.parse(res.body);
+    expect(body.error.code).toBe("chaos_drop");
+  });
+});
+
+describe("chaos integration: journal", () => {
+  it("records chaosAction in the journal", async () => {
+    const fixtures: Fixture[] = [
+      { match: { userMessage: "hello" }, response: { content: "Hi there" } },
+    ];
+    instance = await createServer(fixtures, { chaos: { dropRate: 1.0 } });
+
+    await httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello"));
+
+    const entries = instance.journal.getAll();
+    expect(entries).toHaveLength(1);
+    expect(entries[0].response.chaosAction).toBe("drop");
+  });
+});
+
+describe("chaos integration: rate 0 never fires", () => {
+  it("all 20 requests succeed with rate 0", async () => {
+    const fixtures: Fixture[] = [
+      { match: { userMessage: "hello" }, response: { content: "Hi there" } },
+    ];
+    instance = await createServer(fixtures, {
+      chaos: { dropRate: 0, malformedRate: 0, disconnectRate: 0 },
+    });
+
+    const results = await Promise.all(
+      Array.from({ length: 20 }, () =>
+        httpPost(`${instance!.url}/v1/chat/completions`, chatRequest("hello")),
+      ),
+    );
+
+    for (const res of results) {
+      expect(res.status).toBe(200);
+    }
+  });
+});
+
+describe("chaos integration: disconnect", () => {
+  it("destroys connection when disconnectRate is 1.0", async () => {
+    const fixtures: Fixture[] = [
+      { match: { userMessage: "hello" }, response: { content: "Hi there" } },
+    ];
+    instance = await createServer(fixtures, { chaos: { disconnectRate: 1.0 } });
+
+    // The server destroys the connection — httpPost should reject
+    await expect(
+      httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello")),
+    ).rejects.toThrow();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Provider-specific chaos tests: Anthropic /v1/messages
+// ---------------------------------------------------------------------------
+
+function anthropicRequest(userContent: string): object {
+  return {
+    model: "claude-3-5-sonnet-20241022",
+    max_tokens: 1024,
+    messages: [{ role: "user", content: userContent }],
+  };
+}
+
+describe("chaos on Anthropic /v1/messages", () => {
+  it("returns 500 when server-level drop rate is 1.0", async () => {
+    const fixtures: Fixture[] = [
+      { match: { userMessage: "hello" }, response: { content: "Hi from Claude" } },
+    ];
+    instance = await createServer(fixtures, { chaos: { dropRate: 1.0 } });
+
+    const res = await httpPost(`${instance.url}/v1/messages`, anthropicRequest("hello"));
+    expect(res.status).toBe(500);
+
+    const body = JSON.parse(res.body);
+    expect(body.error.code).toBe("chaos_drop");
+  });
+
+  it("returns malformed JSON when server-level malformedRate is 1.0", async () => {
+    const fixtures: Fixture[] = [
+      { match: { userMessage: "hello" }, response: { content: "Hi from Claude" } },
+    ];
+    instance = await createServer(fixtures, { chaos: { malformedRate: 1.0 } });
+
+    const res = await httpPost(`${instance.url}/v1/messages`, anthropicRequest("hello"));
+    expect(res.status).toBe(200);
+    expect(() => JSON.parse(res.body)).toThrow();
+    expect(res.body).toContain("malformed");
+  });
+
+  it("records chaosAction in journal for Anthropic requests", async () => {
+    const fixtures: Fixture[] = [
+      { match: { userMessage: "hello" }, response: { content: "Hi from Claude" } },
+    ];
+    instance = await createServer(fixtures, { chaos: { dropRate: 1.0 } });
+
+    await httpPost(`${instance.url}/v1/messages`, anthropicRequest("hello"));
+
+    const entries = instance.journal.getAll();
+    expect(entries).toHaveLength(1);
+    expect(entries[0].response.chaosAction).toBe("drop");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Provider-specific chaos tests: Gemini
+// ---------------------------------------------------------------------------
+
+function geminiRequest(userContent: string): object {
+  return {
+    contents: [{ role: "user", parts: [{ text: userContent }] }],
+  };
+}
+
+describe("chaos on Gemini endpoint", () => {
+  it("returns 500 when server-level drop rate is 1.0", async () => {
+    const fixtures: Fixture[] = [
+      { match: { userMessage: "hello" }, response: { content: "Hi from Gemini" } },
+    ];
+    instance = await createServer(fixtures, { chaos: { dropRate: 1.0 } });
+
+    const res = await httpPost(
+      `${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`,
+      geminiRequest("hello"),
+    );
+    expect(res.status).toBe(500);
+
+    const body = JSON.parse(res.body);
+    expect(body.error.code).toBe("chaos_drop");
+  });
+
+  it("returns malformed JSON when server-level malformedRate is 1.0", async () => {
+    const fixtures: Fixture[] = [
+      { match: { userMessage: "hello" }, response: { content: "Hi from Gemini" } },
+    ];
+    instance = await createServer(fixtures, { chaos: { malformedRate: 1.0 } });
+
+    const res = await httpPost(
+      `${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`,
+      geminiRequest("hello"),
+    );
+    expect(res.status).toBe(200);
+    expect(() => JSON.parse(res.body)).toThrow();
+    expect(res.body).toContain("malformed");
+  });
+
+  it("records chaosAction in journal for Gemini requests", async () => {
+    const fixtures: Fixture[] = [
+      { match: { userMessage: "hello" }, response: { content: "Hi from Gemini" } },
+    ];
+    instance = await createServer(fixtures, { chaos: { dropRate: 1.0 } });
+
+    await httpPost(
+      `${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`,
+      geminiRequest("hello"),
+    );
+
+    const entries = instance.journal.getAll();
+    expect(entries).toHaveLength(1);
+    expect(entries[0].response.chaosAction).toBe("drop");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Provider-specific chaos tests: Bedrock
+// ---------------------------------------------------------------------------
+
+function bedrockRequest(userContent: string): object {
+  return {
+    anthropic_version: "bedrock-2023-05-31",
+    max_tokens: 1024,
+    messages: [{ role: "user", content: userContent }],
+  };
+}
+
+describe("chaos on Bedrock endpoint", () => {
+  it("returns 500 when server-level drop rate is 1.0", async () => {
+    const fixtures: Fixture[] = [
+      { match: { userMessage: "hello" }, response: { content: "Hi from Bedrock" } },
+    ];
+    instance = await createServer(fixtures, { chaos: { dropRate: 1.0 } });
+
+    const res = await httpPost(
+      `${instance.url}/model/anthropic.claude-3-haiku-20240307-v1:0/invoke`,
+      bedrockRequest("hello"),
+    );
+    expect(res.status).toBe(500);
+
+    const body = JSON.parse(res.body);
+    expect(body.error.code).toBe("chaos_drop");
+  });
+
+  it("returns malformed JSON when server-level malformedRate is 1.0", async () => {
+    const fixtures: Fixture[] = [
+      { match: { userMessage: "hello" }, response: { content: "Hi from Bedrock" } },
+    ];
+    instance = await createServer(fixtures, { chaos: { malformedRate: 1.0 } });
+
+    const res = await httpPost(
+      `${instance.url}/model/anthropic.claude-3-haiku-20240307-v1:0/invoke`,
+      bedrockRequest("hello"),
+    );
+    expect(res.status).toBe(200);
+    expect(() => JSON.parse(res.body)).toThrow();
+    expect(res.body).toContain("malformed");
+  });
+
+  it("records chaosAction in journal for Bedrock requests", async () => {
+    const fixtures: Fixture[] = [
+      { match: { userMessage: "hello" }, response: { content: "Hi from Bedrock" } },
+    ];
+    instance = await createServer(fixtures, { chaos: { dropRate: 1.0 } });
+
+    await httpPost(
+      `${instance.url}/model/anthropic.claude-3-haiku-20240307-v1:0/invoke`,
+      bedrockRequest("hello"),
+    );
+
+    const entries = instance.journal.getAll();
+    expect(entries).toHaveLength(1);
+    expect(entries[0].response.chaosAction).toBe("drop");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Fixture-level chaos on non-OpenAI provider
+// ---------------------------------------------------------------------------
+
+describe("fixture-level chaos on non-OpenAI provider", () => {
+  it("applies fixture-level chaos only to matched Anthropic fixture", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "chaotic" },
+        response: { content: "You will not see this" },
+        chaos: { dropRate: 1.0 },
+      },
+      {
+        match: { userMessage: "safe" },
+        response: { content: "This is safe" },
+      },
+    ];
+    instance = await createServer(fixtures);
+
+    // "chaotic" fixture should be dropped
+    const chaotic = await httpPost(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "chaotic" }],
+    });
+    expect(chaotic.status).toBe(500);
+    const chaoticBody = JSON.parse(chaotic.body);
+    expect(chaoticBody.error.code).toBe("chaos_drop");
+
+    // "safe" fixture should succeed normally
+    const safe = await httpPost(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "safe" }],
+    });
+    expect(safe.status).toBe(200);
+    const safeBody = JSON.parse(safe.body);
+    expect(safeBody.content[0].text).toBe("This is safe");
+  });
+
+  it("fixture-level malformedRate applies through Gemini endpoint", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "break-it" },
+        response: { content: "Nope" },
+        chaos: { malformedRate: 1.0 },
+      },
+    ];
+    instance = await createServer(fixtures);
+
+    const res = await httpPost(`${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ role: "user", parts: [{ text: "break-it" }] }],
+    });
+    expect(res.status).toBe(200);
+    expect(() => JSON.parse(res.body)).toThrow();
+    expect(res.body).toContain("malformed");
+  });
+
+  it("fixture-level dropRate applies through Bedrock endpoint", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "drop-me" },
+        response: { content: "Never seen" },
+        chaos: { dropRate: 1.0 },
+      },
+    ];
+    instance = await createServer(fixtures);
+
+    const res = await httpPost(
+      `${instance.url}/model/anthropic.claude-3-haiku-20240307-v1:0/invoke`,
+      {
+        anthropic_version: "bedrock-2023-05-31",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "drop-me" }],
+      },
+    );
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.code).toBe("chaos_drop");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// logLevel: "silent" — invalid chaos headers must not throw or output warnings
+// ---------------------------------------------------------------------------
+
+describe("chaos with logLevel silent: invalid header is ignored gracefully", () => {
+  it("proceeds normally and does not throw when x-llmock-chaos-drop is not a number", async () => {
+    const fixtures: Fixture[] = [
+      { match: { userMessage: "hello" }, response: { content: "Hi there" } },
+    ];
+    instance = await createServer(fixtures, { logLevel: "silent" });
+
+    // "notanumber" parses to NaN — should be silently ignored, request proceeds normally
+    const res = await httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello"), {
+      "X-LLMock-Chaos-Drop": "notanumber",
+    });
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.choices[0].message.content).toBe("Hi there");
+  });
+
+  it("does not call console.warn when evaluateChaos is called without a logger and header is invalid", () => {
+    // When evaluateChaos is used directly (public API) without a logger, invalid header values
+    // must not produce console.warn output — the caller has no logger to suppress it.
+    const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
+    // "notanumber" parses to NaN — old code would call console.warn; new code uses logger?.warn (no-op)
+    evaluateChaos(null, undefined, { "x-llmock-chaos-drop": "notanumber" });
+    expect(warnSpy).not.toHaveBeenCalled();
+    warnSpy.mockRestore();
+  });
+});
diff --git a/src/__tests__/cli.test.ts b/src/__tests__/cli.test.ts
index 1005679..2355868 100644
--- a/src/__tests__/cli.test.ts
+++ b/src/__tests__/cli.test.ts
@@ -94,7 +94,7 @@ function writeFixture(dir: string, name: string): string {
 describe.skipIf(!CLI_AVAILABLE)("CLI: --help", () => {
   it("prints usage text and exits with code 0", async () => {
     const { stdout, code } = await runCli(["--help"]);
-    expect(stdout).toContain("Usage: mock-openai");
+    expect(stdout).toContain("Usage: llmock");
     expect(stdout).toContain("--port");
     expect(stdout).toContain("--fixtures");
     expect(code).toBe(0);
@@ -161,3 +161,196 @@ describe.skipIf(!CLI_AVAILABLE)("CLI: fixture loading", () => {
     expect(code).toBe(1);
   });
 });
+
+describe.skipIf(!CLI_AVAILABLE)("CLI: --log-level", () => {
+  let tmpDir: string;
+
+  beforeEach(() => {
+    tmpDir = makeTmpDir();
+  });
+
+  afterEach(() => {
+    rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  it("--log-level silent suppresses startup output", async () => {
+    const fixturePath = writeFixture(tmpDir, "test.json");
+    const child = spawnCli(["--fixtures", fixturePath, "--port", "0", "--log-level", "silent"]);
+
+    // Wait for the server to be ready (listen on port)
+    // With silent, there should be no [llmock] output
+    await new Promise((r) => setTimeout(r, 1500));
+
+    const stdout = child.stdout();
+    expect(stdout).not.toContain("[llmock]");
+
+    child.kill("SIGTERM");
+    await new Promise<void>((resolve) => {
+      child.cp.on("close", () => resolve());
+    });
+  });
+
+  it("--log-level info shows startup messages", async () => {
+    const fixturePath = writeFixture(tmpDir, "test.json");
+    const child = spawnCli(["--fixtures", fixturePath, "--port", "0", "--log-level", "info"]);
+
+    await child.waitForOutput(/listening on/i, 5000);
+    expect(child.stdout()).toContain("[llmock]");
+    expect(child.stdout()).toContain("Loaded 1 fixture(s)");
+
+    child.kill("SIGTERM");
+    await new Promise<void>((resolve) => {
+      child.cp.on("close", () => resolve());
+    });
+  });
+
+  it("--log-level debug starts successfully", async () => {
+    const fixturePath = writeFixture(tmpDir, "test.json");
+    const child = spawnCli(["--fixtures", fixturePath, "--port", "0", "--log-level", "debug"]);
+
+    await child.waitForOutput(/listening on/i, 5000);
+    expect(child.stdout()).toContain("[llmock]");
+
+    child.kill("SIGTERM");
+    await new Promise<void>((resolve) => {
+      child.cp.on("close", () => resolve());
+    });
+  });
+
+  it("rejects invalid --log-level value", async () => {
+    const { stderr, code } = await runCli(["--log-level", "verbose"]);
+    expect(stderr).toContain("Invalid log-level");
+    expect(code).toBe(1);
+  });
+});
+
+describe.skipIf(!CLI_AVAILABLE)("CLI: --validate-on-load", () => {
+  let tmpDir: string;
+
+  beforeEach(() => {
+    tmpDir = makeTmpDir();
+  });
+
+  afterEach(() => {
+    rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  it("passes validation for valid fixtures", async () => {
+    const fixturePath = writeFixture(tmpDir, "test.json");
+    const child = spawnCli(["--fixtures", fixturePath, "--port", "0", "--validate-on-load"]);
+
+    await child.waitForOutput(/listening on/i, 5000);
+    expect(child.stderr()).not.toContain("Validation failed");
+
+    child.kill("SIGTERM");
+    await new Promise<void>((resolve) => {
+      child.cp.on("close", () => resolve());
+    });
+  });
+
+  it("exits 1 on invalid fixture (empty content)", async () => {
+    const filePath = join(tmpDir, "bad.json");
+    writeFileSync(
+      filePath,
+      JSON.stringify({
+        fixtures: [
+          {
+            match: { userMessage: "hello" },
+            response: { content: "" },
+          },
+        ],
+      }),
+      "utf-8",
+    );
+
+    const { stderr, code } = await runCli(["--fixtures", filePath, "--validate-on-load"]);
+    expect(stderr).toContain("Validation failed");
+    expect(code).toBe(1);
+  });
+
+  it("exits 1 on invalid fixture (unparseable toolCalls arguments)", async () => {
+    const filePath = join(tmpDir, "bad-tool.json");
+    writeFileSync(
+      filePath,
+      JSON.stringify({
+        fixtures: [
+          {
+            match: { userMessage: "weather" },
+            response: {
+              toolCalls: [{ name: "get_weather", arguments: "not json" }],
+            },
+          },
+        ],
+      }),
+      "utf-8",
+    );
+
+    const { stderr, code } = await runCli(["--fixtures", filePath, "--validate-on-load"]);
+    expect(stderr).toContain("Validation failed");
+    expect(code).toBe(1);
+  });
+});
+
+describe.skipIf(!CLI_AVAILABLE)("CLI: --watch", () => {
+  let tmpDir: string;
+
+  beforeEach(() => {
+    tmpDir = makeTmpDir();
+  });
+
+  afterEach(() => {
+    rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  it("survives invalid JSON during reload", async () => {
+    const fixturePath = writeFixture(tmpDir, "test.json");
+    const child = spawnCli(["--fixtures", fixturePath, "--port", "0", "--watch"]);
+
+    await child.waitForOutput(/listening on/i, 5000);
+
+    // Write invalid JSON
+    writeFileSync(fixturePath, "{ not valid json", "utf-8");
+
+    // Wait for the reload attempt — server should stay up
+    await new Promise((r) => setTimeout(r, 1500));
+
+    // Server should still be running (not crashed)
+    expect(child.cp.exitCode).toBeNull();
+
+    child.kill("SIGTERM");
+    await new Promise<void>((resolve) => {
+      child.cp.on("close", () => resolve());
+    });
+  });
+
+  it("reloads fixtures when file changes", async () => {
+    const fixturePath = writeFixture(tmpDir, "test.json");
+    const child = spawnCli(["--fixtures", fixturePath, "--port", "0", "--watch"]);
+
+    await child.waitForOutput(/listening on/i, 5000);
+    expect(child.stdout()).toContain("Watching");
+
+    // Modify the fixture file
+    writeFileSync(
+      fixturePath,
+      JSON.stringify({
+        fixtures: [
+          {
+            match: { userMessage: "goodbye" },
+            response: { content: "Bye!" },
+          },
+        ],
+      }),
+      "utf-8",
+    );
+
+    // Wait for reload
+    await child.waitForOutput(/Reloaded/i, 5000);
+    expect(child.stdout()).toContain("Reloaded 1 fixture(s)");
+
+    child.kill("SIGTERM");
+    await new Promise<void>((resolve) => {
+      child.cp.on("close", () => resolve());
+    });
+  });
+});
diff --git a/src/__tests__/cohere.test.ts b/src/__tests__/cohere.test.ts
new file mode 100644
index 0000000..a7655d9
--- /dev/null
+++ b/src/__tests__/cohere.test.ts
@@ -0,0 +1,996 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import type { Fixture } from "../types.js";
+import { createServer, type ServerInstance } from "../server.js";
+import { cohereToCompletionRequest } from "../cohere.js";
+
+// --- helpers ---
+
+function post(
+  url: string,
+  body: unknown,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const data = JSON.stringify(body);
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(data),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(data);
+    req.end();
+  });
+}
+
+function postRaw(url: string, raw: string): Promise<{ status: number; body: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(raw),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(raw);
+    req.end();
+  });
+}
+
+function postWithHeaders(
+  url: string,
+  body: unknown,
+  extraHeaders: Record<string, string>,
+): Promise<{ status: number; body: string }> {
+  return new Promise((resolve, reject) => {
+    const data = JSON.stringify(body);
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(data),
+          ...extraHeaders,
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(data);
+    req.end();
+  });
+}
+
+interface SSEEvent {
+  event: string;
+  data: Record<string, unknown>;
+}
+
+function parseSSEEvents(body: string): SSEEvent[] {
+  const events: SSEEvent[] = [];
+  const blocks = body.split("\n\n").filter((b) => b.trim() !== "");
+  for (const block of blocks) {
+    const lines = block.split("\n");
+    let eventType = "";
+    let dataStr = "";
+    for (const line of lines) {
+      if (line.startsWith("event: ")) {
+        eventType = line.slice(7);
+      } else if (line.startsWith("data: ")) {
+        dataStr = line.slice(6);
+      }
+    }
+    if (eventType && dataStr) {
+      events.push({ event: eventType, data: JSON.parse(dataStr) as Record<string, unknown> });
+    }
+  }
+  return events;
+}
+
+// --- fixtures ---
+
+const textFixture: Fixture = {
+  match: { userMessage: "hello" },
+  response: { content: "The capital of France is Paris." },
+};
+
+const toolFixture: Fixture = {
+  match: { userMessage: "weather" },
+  response: {
+    toolCalls: [
+      {
+        name: "get_weather",
+        arguments: '{"city":"SF"}',
+      },
+    ],
+  },
+};
+
+const errorFixture: Fixture = {
+  match: { userMessage: "fail" },
+  response: {
+    error: {
+      message: "Rate limited",
+      type: "rate_limit_error",
+    },
+    status: 429,
+  },
+};
+
+const allFixtures: Fixture[] = [textFixture, toolFixture, errorFixture];
+
+// --- tests ---
+
+let instance: ServerInstance | null = null;
+
+afterEach(async () => {
+  if (instance) {
+    await new Promise<void>((resolve) => {
+      instance!.server.close(() => resolve());
+    });
+    instance = null;
+  }
+});
+
+// ─── Unit tests: cohereToCompletionRequest ──────────────────────────────────
+
+describe("cohereToCompletionRequest", () => {
+  it("converts basic user message", () => {
+    const result = cohereToCompletionRequest({
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hello" }],
+    });
+    expect(result.model).toBe("command-r-plus");
+    expect(result.messages).toEqual([{ role: "user", content: "hello" }]);
+  });
+
+  it("converts system message", () => {
+    const result = cohereToCompletionRequest({
+      model: "command-r-plus",
+      messages: [
+        { role: "system", content: "Be helpful" },
+        { role: "user", content: "hello" },
+      ],
+    });
+    expect(result.messages[0]).toEqual({ role: "system", content: "Be helpful" });
+    expect(result.messages[1]).toEqual({ role: "user", content: "hello" });
+  });
+
+  it("converts tool message with tool_call_id", () => {
+    const result = cohereToCompletionRequest({
+      model: "command-r-plus",
+      messages: [
+        {
+          role: "tool",
+          content: '{"temp":72}',
+          tool_call_id: "call_abc",
+        },
+      ],
+    });
+    expect(result.messages[0]).toEqual({
+      role: "tool",
+      content: '{"temp":72}',
+      tool_call_id: "call_abc",
+    });
+  });
+
+  it("converts tools", () => {
+    const result = cohereToCompletionRequest({
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hi" }],
+      tools: [
+        {
+          type: "function",
+          function: {
+            name: "get_weather",
+            description: "Get weather",
+            parameters: { type: "object", properties: { city: { type: "string" } } },
+          },
+        },
+      ],
+    });
+    expect(result.tools).toHaveLength(1);
+    expect(result.tools![0]).toEqual({
+      type: "function",
+      function: {
+        name: "get_weather",
+        description: "Get weather",
+        parameters: { type: "object", properties: { city: { type: "string" } } },
+      },
+    });
+  });
+
+  it("passes through stream field", () => {
+    const result = cohereToCompletionRequest({
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hi" }],
+      stream: true,
+    });
+    expect(result.stream).toBe(true);
+  });
+
+  it("returns undefined tools when none provided", () => {
+    const result = cohereToCompletionRequest({
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hi" }],
+    });
+    expect(result.tools).toBeUndefined();
+  });
+});
+
+// ─── Unit tests: cohereToCompletionRequest (assistant message) ───────────────
+
+describe("cohereToCompletionRequest (assistant message)", () => {
+  it("converts assistant message", () => {
+    const result = cohereToCompletionRequest({
+      model: "command-r-plus",
+      messages: [
+        { role: "user", content: "hello" },
+        { role: "assistant", content: "Hi there" },
+      ],
+    });
+    expect(result.messages[1]).toEqual({ role: "assistant", content: "Hi there" });
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (non-streaming text) ─────────────────
+
+describe("POST /v2/chat (non-streaming text)", () => {
+  it("returns text response with all required fields", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hello" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/json");
+
+    const body = JSON.parse(res.body);
+    expect(body.id).toMatch(/^msg_/);
+    expect(body.finish_reason).toBe("COMPLETE");
+    expect(body.message.role).toBe("assistant");
+    expect(body.message.content).toEqual([
+      { type: "text", text: "The capital of France is Paris." },
+    ]);
+    expect(body.message.tool_calls).toEqual([]);
+    expect(body.message.tool_plan).toBe("");
+    expect(body.message.citations).toEqual([]);
+    expect(body.usage.billed_units).toEqual({
+      input_tokens: 0,
+      output_tokens: 0,
+      search_units: 0,
+      classifications: 0,
+    });
+    expect(body.usage.tokens).toEqual({ input_tokens: 0, output_tokens: 0 });
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (non-streaming tool call) ─────────────
+
+describe("POST /v2/chat (non-streaming tool call)", () => {
+  it("returns tool call with TOOL_CALL finish_reason", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "weather" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.finish_reason).toBe("TOOL_CALL");
+    expect(body.message.tool_calls).toHaveLength(1);
+    expect(body.message.tool_calls[0].id).toMatch(/^call_/);
+    expect(body.message.tool_calls[0].type).toBe("function");
+    expect(body.message.tool_calls[0].function.name).toBe("get_weather");
+    expect(body.message.tool_calls[0].function.arguments).toBe('{"city":"SF"}');
+    expect(body.message.content).toEqual([]);
+    expect(body.usage).toBeDefined();
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (streaming text) ─────────────────────
+
+describe("POST /v2/chat (streaming text)", () => {
+  it("produces correct event sequence", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hello" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("text/event-stream");
+
+    const events = parseSSEEvents(res.body);
+    expect(events.length).toBeGreaterThanOrEqual(5);
+
+    // message-start
+    expect(events[0].event).toBe("message-start");
+    expect(events[0].data.type).toBe("message-start");
+    const msgStart = events[0].data.delta as Record<string, unknown>;
+    const startMsg = msgStart.message as Record<string, unknown>;
+    expect(startMsg.role).toBe("assistant");
+    expect(startMsg.content).toEqual([]);
+    expect(startMsg.tool_plan).toBe("");
+    expect(startMsg.tool_calls).toEqual([]);
+    expect(startMsg.citations).toEqual([]);
+
+    // content-start (type: "text" only, no text field)
+    expect(events[1].event).toBe("content-start");
+    expect(events[1].data.type).toBe("content-start");
+    expect(events[1].data.index).toBe(0);
+    const csDelta = events[1].data.delta as Record<string, unknown>;
+    const csMsg = csDelta.message as Record<string, unknown>;
+    const csContent = csMsg.content as Record<string, unknown>;
+    expect(csContent.type).toBe("text");
+    expect(csContent).not.toHaveProperty("text");
+
+    // content-delta(s)
+    const contentDeltas = events.filter((e) => e.event === "content-delta");
+    expect(contentDeltas.length).toBeGreaterThanOrEqual(1);
+    for (const cd of contentDeltas) {
+      expect(cd.data.type).toBe("content-delta");
+      expect(cd.data.index).toBe(0);
+      const delta = cd.data.delta as Record<string, unknown>;
+      const msg = delta.message as Record<string, unknown>;
+      const content = msg.content as Record<string, unknown>;
+      expect(content.type).toBe("text");
+      expect(typeof content.text).toBe("string");
+    }
+
+    // Reconstruct full text from deltas
+    const fullText = contentDeltas
+      .map((cd) => {
+        const delta = cd.data.delta as Record<string, unknown>;
+        const msg = delta.message as Record<string, unknown>;
+        const content = msg.content as Record<string, unknown>;
+        return content.text as string;
+      })
+      .join("");
+    expect(fullText).toBe("The capital of France is Paris.");
+
+    // content-end
+    const contentEnd = events.find((e) => e.event === "content-end");
+    expect(contentEnd).toBeDefined();
+    expect(contentEnd!.data.type).toBe("content-end");
+    expect(contentEnd!.data.index).toBe(0);
+
+    // message-end
+    const msgEnd = events[events.length - 1];
+    expect(msgEnd.event).toBe("message-end");
+    expect(msgEnd.data.type).toBe("message-end");
+    const endDelta = msgEnd.data.delta as Record<string, unknown>;
+    expect(endDelta.finish_reason).toBe("COMPLETE");
+    const usage = endDelta.usage as Record<string, unknown>;
+    expect(usage.billed_units).toEqual({
+      input_tokens: 0,
+      output_tokens: 0,
+      search_units: 0,
+      classifications: 0,
+    });
+    expect(usage.tokens).toEqual({ input_tokens: 0, output_tokens: 0 });
+  });
+
+  it("content-start has type:text only and no text field", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hello" }],
+      stream: true,
+    });
+
+    const events = parseSSEEvents(res.body);
+    const contentStart = events.find((e) => e.event === "content-start");
+    expect(contentStart).toBeDefined();
+    const delta = contentStart!.data.delta as Record<string, unknown>;
+    const msg = delta.message as Record<string, unknown>;
+    const content = msg.content as Record<string, unknown>;
+    expect(content.type).toBe("text");
+    expect(Object.keys(content)).toEqual(["type"]);
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (streaming tool calls) ────────────────
+
+describe("POST /v2/chat (streaming tool calls)", () => {
+  it("produces correct tool call event sequence", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "weather" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(200);
+    const events = parseSSEEvents(res.body);
+
+    // message-start
+    expect(events[0].event).toBe("message-start");
+
+    // tool-plan-delta
+    const planDelta = events.find((e) => e.event === "tool-plan-delta");
+    expect(planDelta).toBeDefined();
+    expect(planDelta!.data.type).toBe("tool-plan-delta");
+    const planMsg = (planDelta!.data.delta as Record<string, unknown>).message as Record<
+      string,
+      unknown
+    >;
+    expect(typeof planMsg.tool_plan).toBe("string");
+
+    // tool-call-start
+    const tcStart = events.find((e) => e.event === "tool-call-start");
+    expect(tcStart).toBeDefined();
+    expect(tcStart!.data.type).toBe("tool-call-start");
+    expect(tcStart!.data.index).toBe(0);
+    const tcStartDelta = tcStart!.data.delta as Record<string, unknown>;
+    const tcStartMsg = tcStartDelta.message as Record<string, unknown>;
+    const tcStartCalls = tcStartMsg.tool_calls as Record<string, unknown>;
+    expect(tcStartCalls.id).toMatch(/^call_/);
+    expect(tcStartCalls.type).toBe("function");
+    const tcStartFn = tcStartCalls.function as Record<string, unknown>;
+    expect(tcStartFn.name).toBe("get_weather");
+    expect(tcStartFn.arguments).toBe("");
+
+    // tool-call-delta(s)
+    const tcDeltas = events.filter((e) => e.event === "tool-call-delta");
+    expect(tcDeltas.length).toBeGreaterThanOrEqual(1);
+    const argsAccum = tcDeltas
+      .map((e) => {
+        const delta = e.data.delta as Record<string, unknown>;
+        const msg = delta.message as Record<string, unknown>;
+        const calls = msg.tool_calls as Record<string, unknown>;
+        const fn = calls.function as Record<string, unknown>;
+        return fn.arguments as string;
+      })
+      .join("");
+    expect(argsAccum).toBe('{"city":"SF"}');
+
+    // tool-call-end
+    const tcEnd = events.find((e) => e.event === "tool-call-end");
+    expect(tcEnd).toBeDefined();
+    expect(tcEnd!.data.type).toBe("tool-call-end");
+    expect(tcEnd!.data.index).toBe(0);
+
+    // message-end with TOOL_CALL
+    const msgEnd = events[events.length - 1];
+    expect(msgEnd.event).toBe("message-end");
+    const endDelta = msgEnd.data.delta as Record<string, unknown>;
+    expect(endDelta.finish_reason).toBe("TOOL_CALL");
+    expect(endDelta.usage).toBeDefined();
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (message-end usage) ───────────────────
+
+describe("POST /v2/chat (message-end usage)", () => {
+  it("includes usage with both billed_units and tokens", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hello" }],
+      stream: true,
+    });
+
+    const events = parseSSEEvents(res.body);
+    const msgEnd = events.find((e) => e.event === "message-end");
+    expect(msgEnd).toBeDefined();
+    const delta = msgEnd!.data.delta as Record<string, unknown>;
+    const usage = delta.usage as Record<string, unknown>;
+    expect(usage.billed_units).toBeDefined();
+    expect(usage.tokens).toBeDefined();
+    const billedUnits = usage.billed_units as Record<string, unknown>;
+    expect(billedUnits.input_tokens).toBe(0);
+    expect(billedUnits.output_tokens).toBe(0);
+    expect(billedUnits.search_units).toBe(0);
+    expect(billedUnits.classifications).toBe(0);
+    const tokens = usage.tokens as Record<string, unknown>;
+    expect(tokens.input_tokens).toBe(0);
+    expect(tokens.output_tokens).toBe(0);
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (validation) ──────────────────────────
+
+describe("POST /v2/chat (validation)", () => {
+  it("returns 400 when model is missing", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      messages: [{ role: "user", content: "hello" }],
+    });
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("model is required");
+  });
+
+  it("returns 400 when messages array is missing", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r",
+    });
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Invalid request: messages array is required");
+  });
+
+  it("returns 400 for malformed JSON", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postRaw(`${instance.url}/v2/chat`, "{not valid");
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Malformed JSON");
+  });
+
+  it("returns 404 when no fixture matches", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "nomatch" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(404);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("No fixture matched");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (streaming profile) ───────────────────
+
+describe("POST /v2/chat (streaming profile)", () => {
+  it("applies streaming profile latency", async () => {
+    const slowFixture: Fixture = {
+      match: { userMessage: "slow" },
+      response: { content: "AB" },
+      chunkSize: 1,
+      streamingProfile: { ttft: 50, tps: 20, jitter: 0 },
+    };
+    instance = await createServer([slowFixture]);
+
+    const start = Date.now();
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "slow" }],
+      stream: true,
+    });
+    const elapsed = Date.now() - start;
+
+    expect(res.status).toBe(200);
+    // Should have noticeable delay from streaming profile
+    expect(elapsed).toBeGreaterThanOrEqual(80);
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (interruption) ────────────────────────
+
+describe("POST /v2/chat (interruption)", () => {
+  it("truncates after specified number of chunks", async () => {
+    const truncFixture: Fixture = {
+      match: { userMessage: "truncate" },
+      response: { content: "ABCDEFGHIJ" },
+      chunkSize: 1,
+      truncateAfterChunks: 3,
+    };
+    instance = await createServer([truncFixture]);
+
+    const res = await new Promise<{ aborted: boolean; body: string }>((resolve) => {
+      const data = JSON.stringify({
+        model: "command-r-plus",
+        messages: [{ role: "user", content: "truncate" }],
+        stream: true,
+      });
+      const parsed = new URL(`${instance!.url}/v2/chat`);
+      const chunks: Buffer[] = [];
+      const req = http.request(
+        {
+          hostname: parsed.hostname,
+          port: parsed.port,
+          path: parsed.pathname,
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            "Content-Length": Buffer.byteLength(data),
+          },
+        },
+        (res) => {
+          res.on("data", (c: Buffer) => chunks.push(c));
+          res.on("end", () => {
+            resolve({ aborted: false, body: Buffer.concat(chunks).toString() });
+          });
+          res.on("aborted", () => {
+            resolve({ aborted: true, body: Buffer.concat(chunks).toString() });
+          });
+        },
+      );
+      req.on("error", () => {
+        resolve({ aborted: true, body: Buffer.concat(chunks).toString() });
+      });
+      req.write(data);
+      req.end();
+    });
+
+    // Stream was truncated — res.destroy() causes abrupt close
+    expect(res.aborted).toBe(true);
+
+    // Journal should record interruption
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (chaos) ──────────────────────────────
+
+describe("POST /v2/chat (chaos)", () => {
+  it("drops request when chaos drop header is set to 1.0", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postWithHeaders(
+      `${instance.url}/v2/chat`,
+      {
+        model: "command-r-plus",
+        messages: [{ role: "user", content: "hello" }],
+        stream: false,
+      },
+      { "x-llmock-chaos-drop": "1.0" },
+    );
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.code).toBe("chaos_drop");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (error fixture) ───────────────────────
+
+describe("POST /v2/chat (error fixture)", () => {
+  it("returns error fixture with correct status", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "fail" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Rate limited");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (streaming default) ───────────────────
+
+describe("POST /v2/chat (streaming default)", () => {
+  it("20. returns non-streaming JSON when stream field is omitted", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hello" }],
+      // stream field intentionally omitted — Cohere defaults to non-streaming
+    });
+
+    expect(res.status).toBe(200);
+    // Should be non-streaming JSON, NOT SSE
+    expect(res.headers["content-type"]).toBe("application/json");
+
+    const body = JSON.parse(res.body);
+    expect(body.id).toMatch(/^msg_/);
+    expect(body.finish_reason).toBe("COMPLETE");
+    expect(body.message.role).toBe("assistant");
+    expect(body.message.content).toEqual([
+      { type: "text", text: "The capital of France is Paris." },
+    ]);
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (multiple tool calls) ─────────────────
+
+describe("POST /v2/chat (multiple tool calls)", () => {
+  const multiToolFixture: Fixture = {
+    match: { userMessage: "multi-tool" },
+    response: {
+      toolCalls: [
+        { name: "get_weather", arguments: '{"city":"NYC"}' },
+        { name: "get_time", arguments: '{"tz":"EST"}' },
+      ],
+    },
+  };
+
+  it("21a. non-streaming returns 2 items in tool_calls array", async () => {
+    instance = await createServer([multiToolFixture]);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "multi-tool" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.finish_reason).toBe("TOOL_CALL");
+    expect(body.message.tool_calls).toHaveLength(2);
+    expect(body.message.tool_calls[0].function.name).toBe("get_weather");
+    expect(body.message.tool_calls[1].function.name).toBe("get_time");
+  });
+
+  it("21b. streaming produces 2 tool-call-start events", async () => {
+    instance = await createServer([multiToolFixture]);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "multi-tool" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("text/event-stream");
+
+    const events = parseSSEEvents(res.body);
+    const toolCallStarts = events.filter((e) => e.event === "tool-call-start");
+    expect(toolCallStarts).toHaveLength(2);
+
+    // First tool at index 0
+    expect(toolCallStarts[0].data.index).toBe(0);
+    const tc0Delta = toolCallStarts[0].data.delta as Record<string, unknown>;
+    const tc0Msg = tc0Delta.message as Record<string, unknown>;
+    const tc0Calls = tc0Msg.tool_calls as Record<string, unknown>;
+    const tc0Fn = tc0Calls.function as Record<string, unknown>;
+    expect(tc0Fn.name).toBe("get_weather");
+
+    // Second tool at index 1
+    expect(toolCallStarts[1].data.index).toBe(1);
+    const tc1Delta = toolCallStarts[1].data.delta as Record<string, unknown>;
+    const tc1Msg = tc1Delta.message as Record<string, unknown>;
+    const tc1Calls = tc1Msg.tool_calls as Record<string, unknown>;
+    const tc1Fn = tc1Calls.function as Record<string, unknown>;
+    expect(tc1Fn.name).toBe("get_time");
+
+    // message-end should have TOOL_CALL finish_reason
+    const msgEnd = events.find((e) => e.event === "message-end");
+    expect(msgEnd).toBeDefined();
+    const endDelta = msgEnd!.data.delta as Record<string, unknown>;
+    expect(endDelta.finish_reason).toBe("TOOL_CALL");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (malformed tool call arguments) ───────
+
+describe("POST /v2/chat (malformed tool call arguments)", () => {
+  it("falls back to empty string when arguments is not valid JSON", async () => {
+    const badArgsFixture: Fixture = {
+      match: { userMessage: "bad-args" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: "NOT VALID JSON" }],
+      },
+    };
+    instance = await createServer([badArgsFixture]);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "bad-args" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.message.tool_calls).toHaveLength(1);
+    expect(body.message.tool_calls[0].function.name).toBe("fn");
+    // Cohere passes through the arguments string as-is (logs warning)
+    expect(body.message.tool_calls[0].function.arguments).toBe("NOT VALID JSON");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (strict mode) ────────────────────────
+
+describe("POST /v2/chat (strict mode)", () => {
+  it("returns 503 in strict mode with no fixtures", async () => {
+    instance = await createServer([], { strict: true });
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hello" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(503);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("no fixture matched");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (unknown response type → 500) ─────────
+
+describe("POST /v2/chat (unknown response type)", () => {
+  it("returns 500 for a fixture with unrecognizable response shape", async () => {
+    const weirdFixture: Fixture = {
+      match: { userMessage: "weird" },
+      response: { embedding: [0.1, 0.2, 0.3] },
+    };
+    instance = await createServer([weirdFixture]);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "weird" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("did not match any known type");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (error fixture no explicit status) ────
+
+describe("POST /v2/chat (error fixture no explicit status)", () => {
+  it("defaults to 500 when error fixture has no status", async () => {
+    const noStatusError: Fixture = {
+      match: { userMessage: "err-no-status" },
+      response: {
+        error: {
+          message: "Something went wrong",
+          type: "server_error",
+        },
+      },
+    };
+    instance = await createServer([noStatusError]);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "err-no-status" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Something went wrong");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (CORS headers) ────────────────────────
+
+describe("POST /v2/chat (CORS headers)", () => {
+  it("includes CORS headers in response", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hello" }],
+      stream: false,
+    });
+
+    expect(res.headers["access-control-allow-origin"]).toBe("*");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (journal) ────────────────────────────
+
+describe("POST /v2/chat (journal)", () => {
+  it("records request in the journal", async () => {
+    instance = await createServer(allFixtures);
+    await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hello" }],
+      stream: false,
+    });
+
+    expect(instance.journal.size).toBe(1);
+    const entry = instance.journal.getLast();
+    expect(entry!.path).toBe("/v2/chat");
+    expect(entry!.response.status).toBe(200);
+    expect(entry!.response.fixture).toBe(textFixture);
+    expect(entry!.body.model).toBe("command-r-plus");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Streaming tool call with explicit fixture id
+// ---------------------------------------------------------------------------
+
+describe("POST /v2/chat (streaming tool call with fixture-provided id)", () => {
+  const toolFixtureWithId: Fixture = {
+    match: { userMessage: "lookup" },
+    response: {
+      toolCalls: [
+        {
+          name: "search_db",
+          arguments: '{"query":"cats"}',
+          id: "call_fixture_custom_123",
+        },
+      ],
+    },
+  };
+
+  it("preserves fixture-provided tool call id in streaming events", async () => {
+    instance = await createServer([toolFixtureWithId]);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "lookup" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("text/event-stream");
+
+    const events = parseSSEEvents(res.body);
+
+    // tool-call-start should carry the fixture-provided id
+    const tcStart = events.find((e) => e.event === "tool-call-start");
+    expect(tcStart).toBeDefined();
+    const tcStartDelta = tcStart!.data.delta as Record<string, unknown>;
+    const tcStartMsg = tcStartDelta.message as Record<string, unknown>;
+    const tcStartCalls = tcStartMsg.tool_calls as Record<string, unknown>;
+    expect(tcStartCalls.id).toBe("call_fixture_custom_123");
+    expect(tcStartCalls.type).toBe("function");
+    const tcStartFn = tcStartCalls.function as Record<string, unknown>;
+    expect(tcStartFn.name).toBe("search_db");
+
+    // tool-call-delta(s) should accumulate to the full arguments
+    const tcDeltas = events.filter((e) => e.event === "tool-call-delta");
+    expect(tcDeltas.length).toBeGreaterThanOrEqual(1);
+    const argsAccum = tcDeltas
+      .map((e) => {
+        const delta = e.data.delta as Record<string, unknown>;
+        const msg = delta.message as Record<string, unknown>;
+        const calls = msg.tool_calls as Record<string, unknown>;
+        const fn = calls.function as Record<string, unknown>;
+        return fn.arguments as string;
+      })
+      .join("");
+    expect(argsAccum).toBe('{"query":"cats"}');
+
+    // message-end with TOOL_CALL
+    const msgEnd = events.find((e) => e.event === "message-end");
+    expect(msgEnd).toBeDefined();
+    const endDelta = msgEnd!.data.delta as Record<string, unknown>;
+    expect(endDelta.finish_reason).toBe("TOOL_CALL");
+  });
+});
diff --git a/src/__tests__/competitive-matrix-summary.test.ts b/src/__tests__/competitive-matrix-summary.test.ts
new file mode 100644
index 0000000..c4513a0
--- /dev/null
+++ b/src/__tests__/competitive-matrix-summary.test.ts
@@ -0,0 +1,276 @@
+import { describe, it, expect, afterEach } from "vitest";
+import { readFileSync, writeFileSync, unlinkSync, existsSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+// ── Reimplement the pure formatting logic from writeSummary ─────────────────
+// These functions mirror the writeSummary / parseSummaryArg behavior described
+// in scripts/update-competitive-matrix.ts so we can unit-test the output format
+// without requiring network access or exported symbols.
+
+interface DetectedChange {
+  competitor: string;
+  capability: string;
+  from: string;
+  to: string;
+}
+
+/**
+ * Produces the same markdown that writeSummary would write for a given set of
+ * detected changes.  Copied verbatim from the script's writeSummary body so
+ * that any future divergence between this copy and the real implementation
+ * will surface as a failing test when the integration tests are added.
+ */
+function formatSummary(changes: DetectedChange[]): string {
+  if (changes.length === 0) {
+    return "No competitive matrix changes detected this week.\n";
+  }
+
+  const lines: string[] = [];
+  lines.push("## Competitive Matrix Changes");
+  lines.push("");
+  lines.push("| Competitor | Capability | Change |");
+  lines.push("| --- | --- | --- |");
+  for (const ch of changes) {
+    lines.push(`| ${ch.competitor} | ${ch.capability} | ${ch.from} -> ${ch.to} |`);
+  }
+  lines.push("");
+
+  // Build mermaid flowchart grouped by competitor
+  const byCompetitor = new Map<string, string[]>();
+  for (const ch of changes) {
+    if (!byCompetitor.has(ch.competitor)) {
+      byCompetitor.set(ch.competitor, []);
+    }
+    byCompetitor.get(ch.competitor)!.push(ch.capability);
+  }
+
+  lines.push("```mermaid");
+  lines.push("flowchart LR");
+  let nodeCounter = 0;
+  for (const [competitor, capabilities] of byCompetitor) {
+    const subId = competitor.replace(/[^a-zA-Z0-9_-]/g, "_");
+    const subLabel = competitor.replace(/"/g, "&quot;");
+    lines.push(`  subgraph ${subId}["${subLabel}"]`);
+    for (const cap of capabilities) {
+      const nodeId = `n${nodeCounter}`;
+      const capLabel = cap.replace(/"/g, "&quot;");
+      lines.push(`    ${nodeId}["${capLabel}"]`);
+      nodeCounter++;
+    }
+    lines.push("  end");
+  }
+  lines.push("```");
+  lines.push("");
+
+  return lines.join("\n");
+}
+
+function writeSummary(summaryPath: string, changes: DetectedChange[]): void {
+  writeFileSync(summaryPath, formatSummary(changes), "utf-8");
+}
+
+// ── Helpers ─────────────────────────────────────────────────────────────────
+
+function tmpPath(suffix: string): string {
+  return join(tmpdir(), `llmock-cm-test-${suffix}-${Date.now()}.md`);
+}
+
+const tempFiles: string[] = [];
+
+afterEach(() => {
+  for (const f of tempFiles) {
+    if (existsSync(f)) unlinkSync(f);
+  }
+  tempFiles.length = 0;
+});
+
+// ── Tests ───────────────────────────────────────────────────────────────────
+
+describe("competitive-matrix summary formatting", () => {
+  const SAMPLE_CHANGES: DetectedChange[] = [
+    { competitor: "VidaiMock", capability: "Chat Completions SSE", from: "No", to: "Yes" },
+    { competitor: "VidaiMock", capability: "Embeddings API", from: "No", to: "Yes" },
+    { competitor: "mock-llm", capability: "Helm chart", from: "No", to: "Yes" },
+  ];
+
+  // ── No-changes path ─────────────────────────────────────────────────────
+
+  it("produces no-changes message when changes array is empty", () => {
+    const md = formatSummary([]);
+    expect(md).toBe("No competitive matrix changes detected this week.\n");
+  });
+
+  // ── Markdown table ──────────────────────────────────────────────────────
+
+  it("summary contains valid markdown table when changes exist", () => {
+    const md = formatSummary(SAMPLE_CHANGES);
+
+    expect(md).toContain("## Competitive Matrix Changes");
+    expect(md).toContain("| Competitor | Capability | Change |");
+    expect(md).toContain("| --- | --- | --- |");
+
+    // Each change should appear as a table row
+    for (const ch of SAMPLE_CHANGES) {
+      expect(md).toContain(`| ${ch.competitor} | ${ch.capability} | ${ch.from} -> ${ch.to} |`);
+    }
+  });
+
+  it("table rows preserve insertion order", () => {
+    const md = formatSummary(SAMPLE_CHANGES);
+    const tableLines = md
+      .split("\n")
+      .filter((line) => line.startsWith("| ") && !line.startsWith("| ---"));
+
+    // First line is the header, remaining are data rows
+    const dataRows = tableLines.slice(1);
+    expect(dataRows).toHaveLength(SAMPLE_CHANGES.length);
+    expect(dataRows[0]).toContain("Chat Completions SSE");
+    expect(dataRows[1]).toContain("Embeddings API");
+    expect(dataRows[2]).toContain("Helm chart");
+  });
+
+  // ── Mermaid block ───────────────────────────────────────────────────────
+
+  it("summary contains valid mermaid block when changes exist", () => {
+    const md = formatSummary(SAMPLE_CHANGES);
+
+    expect(md).toContain("```mermaid");
+    expect(md).toContain("flowchart LR");
+
+    // Fences must be balanced (one open, one close)
+    const fenceCount = (md.match(/```/g) || []).length;
+    expect(fenceCount).toBe(2);
+  });
+
+  it("mermaid block groups capabilities by competitor", () => {
+    const md = formatSummary(SAMPLE_CHANGES);
+
+    // VidaiMock has 2 capabilities, mock-llm has 1
+    expect(md).toContain('subgraph VidaiMock["VidaiMock"]');
+    expect(md).toContain('subgraph mock-llm["mock-llm"]');
+
+    // Each subgraph should be closed
+    const subgraphCount = (md.match(/subgraph /g) || []).length;
+    const endCount = (md.match(/^\s+end$/gm) || []).length;
+    expect(endCount).toBe(subgraphCount);
+  });
+
+  it("mermaid sanitizes competitor names with special characters", () => {
+    const changes: DetectedChange[] = [
+      {
+        competitor: "piyook/llm-mock",
+        capability: "Docker image",
+        from: "No",
+        to: "Yes",
+      },
+    ];
+    const md = formatSummary(changes);
+
+    // The subgraph ID should have / replaced with _
+    expect(md).toContain('subgraph piyook_llm-mock["piyook/llm-mock"]');
+  });
+
+  it("mermaid escapes double quotes in capability names", () => {
+    const changes: DetectedChange[] = [
+      {
+        competitor: "TestComp",
+        capability: 'Structured output / JSON "mode"',
+        from: "No",
+        to: "Yes",
+      },
+    ];
+    const md = formatSummary(changes);
+
+    // Quotes inside node labels should be escaped as &quot;
+    expect(md).toContain("&quot;");
+    expect(md).not.toMatch(/\["[^"]*"[^"]*"\]/); // no unescaped inner quotes
+  });
+
+  it("mermaid generates unique node IDs across competitors", () => {
+    const md = formatSummary(SAMPLE_CHANGES);
+    const nodeIdPattern = /^\s{4}(n\d+)\[/gm;
+    const ids: string[] = [];
+    let match: RegExpExecArray | null;
+    while ((match = nodeIdPattern.exec(md)) !== null) {
+      ids.push(match[1]);
+    }
+
+    expect(ids.length).toBe(SAMPLE_CHANGES.length);
+    expect(new Set(ids).size).toBe(ids.length);
+  });
+
+  // ── writeSummary file I/O ───────────────────────────────────────────────
+
+  it("writeSummary writes file to disk with correct content", () => {
+    const outPath = tmpPath("write");
+    tempFiles.push(outPath);
+
+    writeSummary(outPath, SAMPLE_CHANGES);
+
+    expect(existsSync(outPath)).toBe(true);
+    const content = readFileSync(outPath, "utf-8");
+    expect(content).toBe(formatSummary(SAMPLE_CHANGES));
+  });
+
+  it("writeSummary writes no-changes file when array is empty", () => {
+    const outPath = tmpPath("empty");
+    tempFiles.push(outPath);
+
+    writeSummary(outPath, []);
+
+    expect(existsSync(outPath)).toBe(true);
+    const content = readFileSync(outPath, "utf-8");
+    expect(content).toBe("No competitive matrix changes detected this week.\n");
+  });
+
+  it("no summary file when writeSummary is not called", () => {
+    const outPath = tmpPath("absent");
+    tempFiles.push(outPath);
+
+    // Simulate the code path where --summary is absent: parseSummaryArg
+    // returns null, writeSummary is never called
+    const summaryPath: string | null = null;
+    if (summaryPath) writeSummary(summaryPath, []);
+
+    expect(existsSync(outPath)).toBe(false);
+  });
+
+  it("mermaid quotes capability names with parentheses", () => {
+    const changes: DetectedChange[] = [
+      {
+        competitor: "mock-llm",
+        capability: "Error injection (one-shot)",
+        from: "No",
+        to: "Yes",
+      },
+    ];
+    const md = formatSummary(changes);
+
+    // Parentheses must be inside quoted label to avoid mermaid syntax conflict
+    expect(md).toContain('["Error injection (one-shot)"]');
+    // Must NOT have unquoted brackets with parens inside
+    expect(md).not.toMatch(/\[[^"]*\([^)]*\)[^"]*\]/);
+  });
+
+  // ── Single change edge case ─────────────────────────────────────────────
+
+  it("handles a single change correctly", () => {
+    const changes: DetectedChange[] = [
+      { competitor: "mock-llm", capability: "WebSocket APIs", from: "No", to: "Yes" },
+    ];
+    const md = formatSummary(changes);
+
+    // Should have exactly one data row
+    const dataRows = md
+      .split("\n")
+      .filter(
+        (line) =>
+          line.startsWith("| ") && !line.startsWith("| ---") && !line.startsWith("| Competitor"),
+      );
+    expect(dataRows).toHaveLength(1);
+
+    // Should have exactly one subgraph
+    expect((md.match(/subgraph /g) || []).length).toBe(1);
+  });
+});
diff --git a/src/__tests__/drift-collector.test.ts b/src/__tests__/drift-collector.test.ts
new file mode 100644
index 0000000..f5f6036
--- /dev/null
+++ b/src/__tests__/drift-collector.test.ts
@@ -0,0 +1,551 @@
+/**
+ * Tests for key functions in scripts/drift-report-collector.ts
+ *
+ * Since scripts/ is outside the rootDir for the main tsconfig (and vitest
+ * only covers src/__tests__), these functions are duplicated here as local
+ * test helpers to keep the test runner config intact. Any changes to the
+ * originals must be reflected here.
+ */
+
+import { describe, it, expect } from "vitest";
+import { formatDriftReport } from "./drift/schema.js";
+import type { ShapeDiff } from "./drift/schema.js";
+
+// ---------------------------------------------------------------------------
+// Local copies of the types and functions under test
+// (mirrors scripts/drift-report-collector.ts — keep in sync)
+// ---------------------------------------------------------------------------
+
+type DriftSeverity = "critical" | "warning" | "info";
+
+interface ParsedDiff {
+  path: string;
+  severity: DriftSeverity;
+  issue: string;
+  expected: string;
+  real: string;
+  mock: string;
+}
+
+interface VitestJsonResult {
+  testResults: VitestTestFile[];
+}
+
+interface VitestTestFile {
+  assertionResults: VitestAssertion[];
+}
+
+interface VitestAssertion {
+  status: string;
+  ancestorTitles: string[];
+  title: string;
+  failureMessages: string[];
+}
+
+interface ProviderMapping {
+  builderFile: string;
+  builderFunctions: string[];
+  typesFile: string | null;
+  sdkShapesFile?: string;
+}
+
+const PROVIDER_MAP: Record<string, ProviderMapping> = {
+  "OpenAI Chat": {
+    builderFile: "src/helpers.ts",
+    builderFunctions: [
+      "buildTextCompletion",
+      "buildToolCallCompletion",
+      "buildTextChunks",
+      "buildToolCallChunks",
+    ],
+    typesFile: "src/types.ts",
+  },
+  "OpenAI Responses": {
+    builderFile: "src/responses.ts",
+    builderFunctions: [
+      "buildTextResponse",
+      "buildToolCallResponse",
+      "buildTextStreamEvents",
+      "buildToolCallStreamEvents",
+    ],
+    typesFile: null,
+  },
+  Anthropic: {
+    builderFile: "src/messages.ts",
+    builderFunctions: [
+      "buildClaudeTextResponse",
+      "buildClaudeToolCallResponse",
+      "buildClaudeTextStreamEvents",
+      "buildClaudeToolCallStreamEvents",
+    ],
+    typesFile: null,
+  },
+  "Anthropic Claude": {
+    builderFile: "src/messages.ts",
+    builderFunctions: [
+      "buildClaudeTextResponse",
+      "buildClaudeToolCallResponse",
+      "buildClaudeTextStreamEvents",
+      "buildClaudeToolCallStreamEvents",
+    ],
+    typesFile: null,
+  },
+  "Google Gemini": {
+    builderFile: "src/gemini.ts",
+    builderFunctions: [
+      "buildGeminiTextResponse",
+      "buildGeminiToolCallResponse",
+      "buildGeminiTextStreamChunks",
+      "buildGeminiToolCallStreamChunks",
+    ],
+    typesFile: null,
+  },
+  Gemini: {
+    builderFile: "src/gemini.ts",
+    builderFunctions: [
+      "buildGeminiTextResponse",
+      "buildGeminiToolCallResponse",
+      "buildGeminiTextStreamChunks",
+      "buildGeminiToolCallStreamChunks",
+    ],
+    typesFile: null,
+  },
+  "OpenAI Realtime": {
+    builderFile: "src/ws-realtime.ts",
+    builderFunctions: ["handleWebSocketRealtime", "realtimeItemsToMessages"],
+    typesFile: null,
+  },
+  "OpenAI Responses WS": {
+    builderFile: "src/ws-responses.ts",
+    builderFunctions: ["handleWebSocketResponses"],
+    typesFile: null,
+  },
+  "Gemini Live": {
+    builderFile: "src/ws-gemini-live.ts",
+    builderFunctions: ["handleWebSocketGeminiLive"],
+    typesFile: null,
+  },
+  "OpenAI Embeddings": {
+    builderFile: "src/helpers.ts",
+    builderFunctions: ["buildEmbeddingResponse", "generateDeterministicEmbedding"],
+    typesFile: null,
+    sdkShapesFile: "src/__tests__/drift/sdk-shapes.ts",
+  },
+};
+
+const SDK_SHAPES_FILE = "src/__tests__/drift/sdk-shapes.ts";
+
+const VALID_SEVERITIES = new Set<DriftSeverity>(["critical", "warning", "info"]);
+
+function parseDriftBlock(text: string): { context: string; diffs: ParsedDiff[] } | null {
+  const headerMatch = text.match(/API DRIFT DETECTED:\s*(.+)/);
+  if (!headerMatch) return null;
+
+  const context = headerMatch[1].trim();
+  const diffs: ParsedDiff[] = [];
+
+  const entryPattern =
+    /\d+\.\s*\[(\w+)\]\s*(.+)\n\s*Path:\s*(.+)\n\s*SDK:\s*(.+)\n\s*Real:\s*(.+)\n\s*Mock:\s*(.+)/g;
+
+  let match: RegExpExecArray | null;
+  while ((match = entryPattern.exec(text)) !== null) {
+    const severity = match[1].trim();
+    if (!VALID_SEVERITIES.has(severity as DriftSeverity)) continue;
+    diffs.push({
+      severity: severity as DriftSeverity,
+      issue: match[2].trim(),
+      path: match[3].trim(),
+      expected: match[4].trim(),
+      real: match[5].trim(),
+      mock: match[6].trim(),
+    });
+  }
+
+  return { context, diffs };
+}
+
+function extractProviderName(text: string): string | null {
+  const sorted = Object.keys(PROVIDER_MAP).sort((a, b) => b.length - a.length);
+  for (const key of sorted) {
+    if (text.includes(key)) return key;
+  }
+  return null;
+}
+
+function extractScenario(context: string): string {
+  const parenMatch = context.match(/\(([^)]+)\)/);
+  return parenMatch ? parenMatch[1] : context;
+}
+
+function collectDriftEntries(results: VitestJsonResult): Array<{
+  provider: string;
+  scenario: string;
+  builderFile: string;
+  builderFunctions: string[];
+  typesFile: string | null;
+  sdkShapesFile: string;
+  diffs: ParsedDiff[];
+}> {
+  const entries: Array<{
+    provider: string;
+    scenario: string;
+    builderFile: string;
+    builderFunctions: string[];
+    typesFile: string | null;
+    sdkShapesFile: string;
+    diffs: ParsedDiff[];
+  }> = [];
+  const unmapped: string[] = [];
+  let unparseable = 0;
+
+  for (const file of results.testResults) {
+    for (const assertion of file.assertionResults) {
+      if (assertion.status !== "failed") continue;
+      if (assertion.failureMessages.length === 0) continue;
+
+      const fullMessage = assertion.failureMessages.join("\n");
+      const parsed = parseDriftBlock(fullMessage);
+      if (!parsed || parsed.diffs.length === 0) {
+        unparseable++;
+        continue;
+      }
+
+      const ancestorText = assertion.ancestorTitles.join(" ");
+      const provider = extractProviderName(ancestorText) ?? extractProviderName(parsed.context);
+      if (!provider) {
+        unmapped.push(`${ancestorText} > ${assertion.title}`);
+        continue;
+      }
+
+      const mapping = PROVIDER_MAP[provider];
+      if (!mapping) {
+        unmapped.push(`${ancestorText} > ${assertion.title} (provider: ${provider})`);
+        continue;
+      }
+
+      entries.push({
+        provider,
+        scenario: extractScenario(parsed.context),
+        builderFile: mapping.builderFile,
+        builderFunctions: mapping.builderFunctions,
+        typesFile: mapping.typesFile,
+        sdkShapesFile: SDK_SHAPES_FILE,
+        diffs: parsed.diffs,
+      });
+    }
+  }
+
+  if (unmapped.length > 0) {
+    throw new Error(`${unmapped.length} unmapped drift entries — update PROVIDER_MAP`);
+  }
+
+  if (unparseable > 0 && entries.length === 0) {
+    throw new Error(`${unparseable} unparseable test failures with 0 drift entries — investigate`);
+  }
+
+  return entries;
+}
+
+// ---------------------------------------------------------------------------
+// Helpers for building test fixtures
+// ---------------------------------------------------------------------------
+
+function makeResult(assertions: VitestAssertion[]): VitestJsonResult {
+  return { testResults: [{ assertionResults: assertions }] };
+}
+
+function makeAssertion(overrides: Partial<VitestAssertion> = {}): VitestAssertion {
+  return {
+    status: "failed",
+    ancestorTitles: [],
+    title: "test title",
+    failureMessages: [],
+    ...overrides,
+  };
+}
+
+const SAMPLE_DIFF: ShapeDiff = {
+  path: "choices[0].message.refusal",
+  severity: "critical",
+  issue: "LLMOCK DRIFT — field in SDK + real API but missing from mock",
+  expected: "null",
+  real: "null",
+  mock: "<absent>",
+};
+
+const SAMPLE_DIFF_WARNING: ShapeDiff = {
+  path: "choices[0].message.extra",
+  severity: "warning",
+  issue: "PROVIDER ADDED FIELD — in real API but not in SDK or mock",
+  expected: "<absent>",
+  real: "string",
+  mock: "<absent>",
+};
+
+// ---------------------------------------------------------------------------
+// parseDriftBlock tests
+// ---------------------------------------------------------------------------
+
+describe("parseDriftBlock", () => {
+  it("returns null for text with no API DRIFT DETECTED header", () => {
+    expect(parseDriftBlock("")).toBeNull();
+    expect(parseDriftBlock("Error: AssertionError: expected true to be false")).toBeNull();
+    expect(parseDriftBlock("No drift detected: OpenAI Chat (non-streaming text)")).toBeNull();
+  });
+
+  it("parses a single drift entry correctly", () => {
+    const formatted = formatDriftReport("OpenAI Chat (non-streaming text)", [SAMPLE_DIFF]);
+    const result = parseDriftBlock(formatted);
+
+    expect(result).not.toBeNull();
+    expect(result!.context).toBe("OpenAI Chat (non-streaming text)");
+    expect(result!.diffs).toHaveLength(1);
+
+    const diff = result!.diffs[0];
+    expect(diff.severity).toBe("critical");
+    expect(diff.path).toBe("choices[0].message.refusal");
+    expect(diff.issue).toBe("LLMOCK DRIFT — field in SDK + real API but missing from mock");
+    expect(diff.expected).toBe("null");
+    expect(diff.real).toBe("null");
+    expect(diff.mock).toBe("<absent>");
+  });
+
+  it("parses multiple drift entries", () => {
+    const formatted = formatDriftReport("OpenAI Chat (non-streaming text)", [
+      SAMPLE_DIFF,
+      SAMPLE_DIFF_WARNING,
+    ]);
+    const result = parseDriftBlock(formatted);
+
+    expect(result).not.toBeNull();
+    expect(result!.diffs).toHaveLength(2);
+    expect(result!.diffs[0].severity).toBe("critical");
+    expect(result!.diffs[1].severity).toBe("warning");
+    expect(result!.diffs[1].path).toBe("choices[0].message.extra");
+  });
+
+  it("skips entries with unknown severity", () => {
+    // Manually construct a report with a bad severity
+    const text = `
+API DRIFT DETECTED: OpenAI Chat (test)
+
+  1. [unknown] Some issue
+     Path:    foo.bar
+     SDK:     string
+     Real:    string
+     Mock:    <absent>
+
+  2. [critical] Real issue
+     Path:    baz.qux
+     SDK:     null
+     Real:    null
+     Mock:    <absent>
+`;
+    const result = parseDriftBlock(text);
+    expect(result).not.toBeNull();
+    // Only the critical entry should be in diffs
+    expect(result!.diffs).toHaveLength(1);
+    expect(result!.diffs[0].severity).toBe("critical");
+    expect(result!.diffs[0].path).toBe("baz.qux");
+  });
+
+  it("handles context strings with parenthetical scenario", () => {
+    const formatted = formatDriftReport("Anthropic Claude (streaming tool call)", [SAMPLE_DIFF]);
+    const result = parseDriftBlock(formatted);
+
+    expect(result).not.toBeNull();
+    expect(result!.context).toBe("Anthropic Claude (streaming tool call)");
+  });
+
+  it("round-trips through formatDriftReport for all severity levels", () => {
+    const diffs: ShapeDiff[] = [
+      { ...SAMPLE_DIFF, severity: "critical" },
+      { ...SAMPLE_DIFF_WARNING, severity: "warning" },
+      {
+        path: "model",
+        severity: "info",
+        issue: "SDK EXTRA — field in SDK but not in real API response",
+        expected: "string",
+        real: "<absent>",
+        mock: "string",
+      },
+    ];
+    const formatted = formatDriftReport("Google Gemini (non-streaming text)", diffs);
+    const result = parseDriftBlock(formatted);
+
+    expect(result).not.toBeNull();
+    expect(result!.context).toBe("Google Gemini (non-streaming text)");
+    expect(result!.diffs).toHaveLength(3);
+
+    for (let i = 0; i < diffs.length; i++) {
+      expect(result!.diffs[i].severity).toBe(diffs[i].severity);
+      expect(result!.diffs[i].path).toBe(diffs[i].path);
+      expect(result!.diffs[i].issue).toBe(diffs[i].issue);
+      expect(result!.diffs[i].expected).toBe(diffs[i].expected);
+      expect(result!.diffs[i].real).toBe(diffs[i].real);
+      expect(result!.diffs[i].mock).toBe(diffs[i].mock);
+    }
+  });
+});
+
+// ---------------------------------------------------------------------------
+// extractProviderName tests
+// ---------------------------------------------------------------------------
+
+describe("extractProviderName", () => {
+  it("matches exact provider names", () => {
+    expect(extractProviderName("OpenAI Chat")).toBe("OpenAI Chat");
+    expect(extractProviderName("Gemini")).toBe("Gemini");
+    expect(extractProviderName("OpenAI Realtime")).toBe("OpenAI Realtime");
+  });
+
+  it("uses longest match — Anthropic Claude over Anthropic", () => {
+    // "Anthropic Claude" is longer and should win over "Anthropic"
+    expect(extractProviderName("Anthropic Claude drift")).toBe("Anthropic Claude");
+    expect(extractProviderName("Anthropic Claude (streaming tool call)")).toBe("Anthropic Claude");
+  });
+
+  it("uses longest match — Google Gemini over Gemini", () => {
+    expect(extractProviderName("Google Gemini drift")).toBe("Google Gemini");
+    expect(extractProviderName("Google Gemini (non-streaming text)")).toBe("Google Gemini");
+  });
+
+  it("returns null for unknown provider", () => {
+    expect(extractProviderName("")).toBeNull();
+    expect(extractProviderName("Unknown Provider drift")).toBeNull();
+    expect(extractProviderName("Cohere drift")).toBeNull();
+  });
+
+  it("matches provider in drift test describe block format", () => {
+    expect(extractProviderName("OpenAI Chat Completions drift")).toBe("OpenAI Chat");
+    expect(extractProviderName("OpenAI Responses API drift")).toBe("OpenAI Responses");
+    expect(extractProviderName("Gemini Live WebSocket drift")).toBe("Gemini Live");
+  });
+
+  it("matches provider from context string (parenthetical format)", () => {
+    expect(extractProviderName("OpenAI Chat (non-streaming text)")).toBe("OpenAI Chat");
+    expect(extractProviderName("Anthropic (streaming text)")).toBe("Anthropic");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// collectDriftEntries tests
+// ---------------------------------------------------------------------------
+
+describe("collectDriftEntries", () => {
+  it("returns empty array when no failed tests", () => {
+    const result = makeResult([
+      makeAssertion({ status: "passed" }),
+      makeAssertion({ status: "pending" }),
+    ]);
+    expect(collectDriftEntries(result)).toEqual([]);
+  });
+
+  it("returns empty array when there are no test files at all", () => {
+    expect(collectDriftEntries({ testResults: [] })).toEqual([]);
+  });
+
+  it("throws when an unmapped provider is found in drift report", () => {
+    const driftText = formatDriftReport("UnknownProvider (non-streaming text)", [SAMPLE_DIFF]);
+    const result = makeResult([
+      makeAssertion({
+        status: "failed",
+        ancestorTitles: ["UnknownProvider drift"],
+        failureMessages: [driftText],
+      }),
+    ]);
+    expect(() => collectDriftEntries(result)).toThrow(/unmapped drift entries/);
+  });
+
+  it("throws when all failures are unparseable and no drift entries collected", () => {
+    const result = makeResult([
+      makeAssertion({
+        status: "failed",
+        failureMessages: ["Error: expected true to equal false\n  at Object.<anonymous>"],
+      }),
+      makeAssertion({
+        status: "failed",
+        failureMessages: ["TypeError: Cannot read property 'foo' of undefined"],
+      }),
+    ]);
+    expect(() => collectDriftEntries(result)).toThrow(/unparseable test failures/);
+  });
+
+  it("returns valid entries and tolerates unparseable failures mixed in", () => {
+    const driftText = formatDriftReport("OpenAI Chat (non-streaming text)", [SAMPLE_DIFF]);
+    const result = makeResult([
+      makeAssertion({
+        status: "failed",
+        ancestorTitles: ["OpenAI Chat Completions drift"],
+        title: "non-streaming text matches real API",
+        failureMessages: [driftText],
+      }),
+      makeAssertion({
+        status: "failed",
+        ancestorTitles: ["unrelated suite"],
+        title: "some other failure",
+        failureMessages: ["Error: plain error with no drift header"],
+      }),
+    ]);
+
+    const entries = collectDriftEntries(result);
+    expect(entries).toHaveLength(1);
+    expect(entries[0].provider).toBe("OpenAI Chat");
+    expect(entries[0].scenario).toBe("non-streaming text");
+    expect(entries[0].builderFile).toBe("src/helpers.ts");
+    expect(entries[0].diffs).toHaveLength(1);
+    expect(entries[0].diffs[0].severity).toBe("critical");
+  });
+
+  it("ignores passed assertions in a mixed result set", () => {
+    const driftText = formatDriftReport("OpenAI Chat (non-streaming text)", [SAMPLE_DIFF]);
+    const result = makeResult([
+      makeAssertion({ status: "passed", failureMessages: [] }),
+      makeAssertion({
+        status: "failed",
+        ancestorTitles: ["OpenAI Chat Completions drift"],
+        title: "non-streaming text matches real API",
+        failureMessages: [driftText],
+      }),
+    ]);
+
+    const entries = collectDriftEntries(result);
+    expect(entries).toHaveLength(1);
+    expect(entries[0].provider).toBe("OpenAI Chat");
+  });
+
+  it("collects entries from multiple test files", () => {
+    const openAiDrift = formatDriftReport("OpenAI Chat (non-streaming text)", [SAMPLE_DIFF]);
+    const geminiDrift = formatDriftReport("Google Gemini (non-streaming text)", [
+      SAMPLE_DIFF_WARNING,
+    ]);
+
+    const results: VitestJsonResult = {
+      testResults: [
+        {
+          assertionResults: [
+            makeAssertion({
+              status: "failed",
+              ancestorTitles: ["OpenAI Chat Completions drift"],
+              failureMessages: [openAiDrift],
+            }),
+          ],
+        },
+        {
+          assertionResults: [
+            makeAssertion({
+              status: "failed",
+              ancestorTitles: ["Google Gemini drift"],
+              failureMessages: [geminiDrift],
+            }),
+          ],
+        },
+      ],
+    };
+
+    const entries = collectDriftEntries(results);
+    expect(entries).toHaveLength(2);
+    expect(entries[0].provider).toBe("OpenAI Chat");
+    expect(entries[1].provider).toBe("Google Gemini");
+  });
+});
diff --git a/src/__tests__/drift-scripts.test.ts b/src/__tests__/drift-scripts.test.ts
new file mode 100644
index 0000000..be31876
--- /dev/null
+++ b/src/__tests__/drift-scripts.test.ts
@@ -0,0 +1,319 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import { writeFileSync, readFileSync, mkdtempSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+// ---------------------------------------------------------------------------
+// fix-drift.ts exports under test
+// ---------------------------------------------------------------------------
+import {
+  readDriftReport,
+  buildPrompt,
+  buildPrBody,
+  patchBumpVersion,
+  addChangelogEntry,
+  parsePorcelainLine,
+  parseMode,
+  todayStamp,
+} from "../../scripts/fix-drift.js";
+
+import type { DriftReport } from "../../scripts/drift-types.js";
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function makeReport(overrides?: Partial<DriftReport>): DriftReport {
+  return {
+    timestamp: "2024-01-01T00:00:00.000Z",
+    entries: [
+      {
+        provider: "OpenAI Chat",
+        scenario: "non-streaming text",
+        builderFile: "src/helpers.ts",
+        builderFunctions: ["buildTextCompletion"],
+        typesFile: "src/types.ts",
+        sdkShapesFile: "src/__tests__/drift/sdk-shapes.ts",
+        diffs: [
+          {
+            severity: "critical",
+            issue: "LLMOCK DRIFT — field in SDK + real API but missing from mock",
+            path: "choices[0].message.refusal",
+            expected: "null",
+            real: "null",
+            mock: "<absent>",
+          },
+        ],
+      },
+    ],
+    ...overrides,
+  };
+}
+
+// ---------------------------------------------------------------------------
+// readDriftReport
+// ---------------------------------------------------------------------------
+
+describe("readDriftReport", () => {
+  let tmpDir: string;
+
+  beforeEach(() => {
+    tmpDir = mkdtempSync(join(tmpdir(), "drift-test-"));
+  });
+
+  afterEach(() => {
+    rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  it("throws when file does not exist", () => {
+    expect(() => readDriftReport(join(tmpDir, "nonexistent.json"))).toThrow(
+      /Drift report not found/,
+    );
+  });
+
+  it("throws when file contains invalid JSON", () => {
+    const path = join(tmpDir, "bad.json");
+    writeFileSync(path, "{ not valid json ]", "utf-8");
+    expect(() => readDriftReport(path)).toThrow(/is not valid JSON/);
+  });
+
+  it("throws when top-level structure lacks entries array", () => {
+    const path = join(tmpDir, "missing-entries.json");
+    writeFileSync(path, JSON.stringify({ timestamp: "2024-01-01", foo: "bar" }), "utf-8");
+    expect(() => readDriftReport(path)).toThrow(/invalid structure.*entries/);
+  });
+
+  it("throws when an entry is missing provider", () => {
+    const path = join(tmpDir, "bad-entry.json");
+    writeFileSync(
+      path,
+      JSON.stringify({
+        timestamp: "2024-01-01T00:00:00Z",
+        entries: [{ scenario: "x", diffs: [] }],
+      }),
+      "utf-8",
+    );
+    expect(() => readDriftReport(path)).toThrow(/missing required "provider"/);
+  });
+
+  it("throws when an entry has invalid severity", () => {
+    const path = join(tmpDir, "bad-severity.json");
+    const report = makeReport();
+    report.entries[0].diffs[0].severity = "banana" as never;
+    writeFileSync(path, JSON.stringify(report), "utf-8");
+    expect(() => readDriftReport(path)).toThrow(/invalid severity "banana"/);
+  });
+
+  it("returns a valid report", () => {
+    const path = join(tmpDir, "valid.json");
+    const report = makeReport();
+    writeFileSync(path, JSON.stringify(report), "utf-8");
+    const result = readDriftReport(path);
+    expect(result.entries).toHaveLength(1);
+    expect(result.entries[0].provider).toBe("OpenAI Chat");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// parseMode
+// ---------------------------------------------------------------------------
+
+describe("parseMode", () => {
+  it("returns 'pr' for --create-pr", () => {
+    expect(parseMode(["--create-pr"])).toBe("pr");
+  });
+
+  it("returns 'issue' for --create-issue", () => {
+    expect(parseMode(["--create-issue"])).toBe("issue");
+  });
+
+  it("returns 'default' when no flag", () => {
+    expect(parseMode([])).toBe("default");
+    expect(parseMode(["--report", "foo.json"])).toBe("default");
+  });
+
+  it("prefers --create-pr over --create-issue when both present", () => {
+    expect(parseMode(["--create-pr", "--create-issue"])).toBe("pr");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// buildPrompt
+// ---------------------------------------------------------------------------
+
+describe("buildPrompt", () => {
+  it("includes all drift entry details", () => {
+    const report = makeReport();
+    const prompt = buildPrompt(report);
+    expect(prompt).toContain("DRIFT 1: OpenAI Chat — non-streaming text");
+    expect(prompt).toContain("File: src/helpers.ts");
+    expect(prompt).toContain("Functions: buildTextCompletion");
+    expect(prompt).toContain("[critical] LLMOCK DRIFT");
+    expect(prompt).toContain("Path: choices[0].message.refusal");
+  });
+
+  it("includes workflow instructions", () => {
+    const prompt = buildPrompt(makeReport());
+    expect(prompt).toContain("RED:");
+    expect(prompt).toContain("GREEN:");
+    expect(prompt).toContain("pnpm test");
+    expect(prompt).toContain("pnpm test:drift");
+  });
+
+  it("numbers multiple drift entries", () => {
+    const report = makeReport({
+      entries: [
+        { ...makeReport().entries[0], provider: "OpenAI Chat", scenario: "streaming" },
+        {
+          ...makeReport().entries[0],
+          provider: "Anthropic",
+          scenario: "non-streaming text",
+          builderFile: "src/messages.ts",
+          builderFunctions: ["buildClaudeTextResponse"],
+          typesFile: null,
+        },
+      ],
+    });
+    const prompt = buildPrompt(report);
+    expect(prompt).toContain("DRIFT 1:");
+    expect(prompt).toContain("DRIFT 2:");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// buildPrBody
+// ---------------------------------------------------------------------------
+
+describe("buildPrBody", () => {
+  it("includes provider info", () => {
+    const body = buildPrBody(makeReport());
+    expect(body).toContain("OpenAI Chat: non-streaming text");
+  });
+
+  it("includes diff paths", () => {
+    const body = buildPrBody(makeReport());
+    expect(body).toContain("`choices[0].message.refusal`");
+  });
+
+  it("embeds the full drift report JSON", () => {
+    const report = makeReport();
+    const body = buildPrBody(report);
+    expect(body).toContain('"OpenAI Chat"');
+    expect(body).toContain("```json");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// patchBumpVersion
+// ---------------------------------------------------------------------------
+
+describe("patchBumpVersion", () => {
+  let tmpDir: string;
+  let origCwd: string;
+
+  beforeEach(() => {
+    tmpDir = mkdtempSync(join(tmpdir(), "drift-test-"));
+    origCwd = process.cwd();
+    process.chdir(tmpDir);
+  });
+
+  afterEach(() => {
+    process.chdir(origCwd);
+    rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  it("increments the patch version", () => {
+    writeFileSync(join(tmpDir, "package.json"), JSON.stringify({ version: "1.2.3" }), "utf-8");
+    const newVersion = patchBumpVersion();
+    expect(newVersion).toBe("1.2.4");
+  });
+
+  it("writes the new version to package.json", () => {
+    writeFileSync(join(tmpDir, "package.json"), JSON.stringify({ version: "2.0.0" }), "utf-8");
+    patchBumpVersion();
+    const pkg = JSON.parse(readFileSync(join(tmpDir, "package.json"), "utf-8")) as {
+      version: string;
+    };
+    expect(pkg.version).toBe("2.0.1");
+  });
+
+  it("throws for non-semver version", () => {
+    writeFileSync(join(tmpDir, "package.json"), JSON.stringify({ version: "bad" }), "utf-8");
+    expect(() => patchBumpVersion()).toThrow(/Cannot patch-bump non-standard version/);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// addChangelogEntry
+// ---------------------------------------------------------------------------
+
+describe("addChangelogEntry", () => {
+  let tmpDir: string;
+  let origCwd: string;
+
+  beforeEach(() => {
+    tmpDir = mkdtempSync(join(tmpdir(), "drift-test-"));
+    origCwd = process.cwd();
+    process.chdir(tmpDir);
+  });
+
+  afterEach(() => {
+    process.chdir(origCwd);
+    rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  it("inserts entry after title line in existing changelog", () => {
+    const existing = "# @copilotkit/llmock\n\n## 1.0.0\n\nOld entry\n";
+    writeFileSync(join(tmpDir, "CHANGELOG.md"), existing, "utf-8");
+    addChangelogEntry(makeReport(), "1.2.4");
+    const content = readFileSync(join(tmpDir, "CHANGELOG.md"), "utf-8");
+    expect(content).toContain("## 1.2.4");
+    expect(content.indexOf("## 1.2.4")).toBeLessThan(content.indexOf("## 1.0.0"));
+  });
+
+  it("creates entry even when changelog is missing", () => {
+    addChangelogEntry(makeReport(), "1.0.1");
+    const content = readFileSync(join(tmpDir, "CHANGELOG.md"), "utf-8");
+    expect(content).toContain("## 1.0.1");
+  });
+
+  it("includes provider summaries", () => {
+    writeFileSync(join(tmpDir, "CHANGELOG.md"), "# @copilotkit/llmock\n", "utf-8");
+    addChangelogEntry(makeReport(), "1.2.4");
+    const content = readFileSync(join(tmpDir, "CHANGELOG.md"), "utf-8");
+    expect(content).toContain("OpenAI Chat (non-streaming text)");
+    expect(content).toContain("choices[0].message.refusal");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// parsePorcelainLine
+// ---------------------------------------------------------------------------
+
+describe("parsePorcelainLine", () => {
+  it("parses a plain modified file", () => {
+    expect(parsePorcelainLine(" M src/helpers.ts")).toBe("src/helpers.ts");
+  });
+
+  it("unquotes paths with special characters", () => {
+    expect(parsePorcelainLine(' M "src/path with spaces.ts"')).toBe("src/path with spaces.ts");
+  });
+
+  it("handles rename notation by returning the new path", () => {
+    expect(parsePorcelainLine(" R src/old.ts -> src/new.ts")).toBe("src/new.ts");
+  });
+
+  it("handles added files", () => {
+    expect(parsePorcelainLine("?? src/new-file.ts")).toBe("src/new-file.ts");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// todayStamp
+// ---------------------------------------------------------------------------
+
+describe("todayStamp", () => {
+  it("returns an ISO date string", () => {
+    expect(todayStamp()).toMatch(/^\d{4}-\d{2}-\d{2}$/);
+  });
+});
diff --git a/src/__tests__/drift/anthropic.drift.ts b/src/__tests__/drift/anthropic.drift.ts
new file mode 100644
index 0000000..fbe2bd6
--- /dev/null
+++ b/src/__tests__/drift/anthropic.drift.ts
@@ -0,0 +1,230 @@
+/**
+ * Anthropic Claude Messages API drift tests.
+ *
+ * Three-way comparison: SDK types × real API × llmock output.
+ */
+
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+import type { ServerInstance } from "../../server.js";
+import {
+  extractShape,
+  triangulate,
+  compareSSESequences,
+  formatDriftReport,
+  shouldFail,
+} from "./schema.js";
+import {
+  anthropicMessageShape,
+  anthropicMessageToolCallShape,
+  anthropicStreamEventShapes,
+  anthropicToolStreamEventShapes,
+} from "./sdk-shapes.js";
+import { anthropicNonStreaming, anthropicStreaming } from "./providers.js";
+import { httpPost, parseTypedSSE, startDriftServer, stopDriftServer } from "./helpers.js";
+
+// ---------------------------------------------------------------------------
+// Server lifecycle
+// ---------------------------------------------------------------------------
+
+let instance: ServerInstance;
+const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
+
+beforeAll(async () => {
+  instance = await startDriftServer();
+});
+
+afterAll(async () => {
+  await stopDriftServer(instance);
+});
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe.skipIf(!ANTHROPIC_API_KEY)("Anthropic Claude Messages drift", () => {
+  const config = { apiKey: ANTHROPIC_API_KEY! };
+
+  it("non-streaming text shape matches", async () => {
+    const sdkShape = anthropicMessageShape();
+
+    const [realRes, mockRes] = await Promise.all([
+      anthropicNonStreaming(config, [{ role: "user", content: "Say hello" }]),
+      httpPost(`${instance.url}/v1/messages`, {
+        model: "claude-haiku-4-5-20251001",
+        max_tokens: 10,
+        messages: [{ role: "user", content: "Say hello" }],
+        stream: false,
+      }),
+    ]);
+
+    const realShape = extractShape(realRes.body);
+    const mockShape = extractShape(JSON.parse(mockRes.body));
+
+    const diffs = triangulate(sdkShape, realShape, mockShape);
+    const report = formatDriftReport("Anthropic Claude (non-streaming text)", diffs);
+
+    if (shouldFail(diffs)) {
+      expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+    }
+  });
+
+  it("streaming text event sequence and shapes match", async () => {
+    const sdkEvents = anthropicStreamEventShapes();
+
+    const [realStream, mockStreamRes] = await Promise.all([
+      anthropicStreaming(config, [{ role: "user", content: "Say hello" }]),
+      httpPost(`${instance.url}/v1/messages`, {
+        model: "claude-haiku-4-5-20251001",
+        max_tokens: 10,
+        messages: [{ role: "user", content: "Say hello" }],
+        stream: true,
+      }),
+    ]);
+
+    expect(realStream.rawEvents.length, "Real API returned no SSE events").toBeGreaterThan(0);
+
+    const mockEvents = parseTypedSSE(mockStreamRes.body);
+    expect(mockEvents.length, "Mock returned no SSE events").toBeGreaterThan(0);
+
+    const mockSSEShapes = mockEvents.map((e) => ({
+      type: e.type,
+      dataShape: extractShape(e.data),
+    }));
+
+    const diffs = compareSSESequences(sdkEvents, realStream.events, mockSSEShapes);
+    const report = formatDriftReport("Anthropic Claude (streaming text events)", diffs);
+
+    if (shouldFail(diffs)) {
+      expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+    }
+  });
+
+  it("non-streaming tool call shape matches", async () => {
+    const sdkShape = anthropicMessageToolCallShape();
+
+    const tools = [
+      {
+        name: "get_weather",
+        description: "Get weather",
+        input_schema: {
+          type: "object",
+          properties: { city: { type: "string" } },
+          required: ["city"],
+        },
+      },
+    ];
+
+    const [realRes, mockRes] = await Promise.all([
+      anthropicNonStreaming(config, [{ role: "user", content: "Weather in Paris" }], tools),
+      httpPost(`${instance.url}/v1/messages`, {
+        model: "claude-haiku-4-5-20251001",
+        max_tokens: 50,
+        messages: [{ role: "user", content: "Weather in Paris" }],
+        stream: false,
+        tools,
+      }),
+    ]);
+
+    const realShape = extractShape(realRes.body);
+    const mockShape = extractShape(JSON.parse(mockRes.body));
+
+    const diffs = triangulate(sdkShape, realShape, mockShape);
+    const report = formatDriftReport("Anthropic Claude (non-streaming tool call)", diffs);
+
+    if (shouldFail(diffs)) {
+      expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+    }
+  });
+
+  it("streaming tool call event sequence matches", async () => {
+    const sdkEvents = [
+      ...anthropicStreamEventShapes().filter(
+        (e) =>
+          e.type === "message_start" || e.type === "message_delta" || e.type === "message_stop",
+      ),
+      ...anthropicToolStreamEventShapes(),
+    ];
+
+    const tools = [
+      {
+        name: "get_weather",
+        description: "Get weather",
+        input_schema: {
+          type: "object",
+          properties: { city: { type: "string" } },
+          required: ["city"],
+        },
+      },
+    ];
+
+    const [realStream, mockStreamRes] = await Promise.all([
+      anthropicStreaming(config, [{ role: "user", content: "Weather in Paris" }], tools),
+      httpPost(`${instance.url}/v1/messages`, {
+        model: "claude-haiku-4-5-20251001",
+        max_tokens: 50,
+        messages: [{ role: "user", content: "Weather in Paris" }],
+        stream: true,
+        tools,
+      }),
+    ]);
+
+    expect(realStream.rawEvents.length, "Real API returned no SSE events").toBeGreaterThan(0);
+
+    const mockEvents = parseTypedSSE(mockStreamRes.body);
+    expect(mockEvents.length, "Mock returned no SSE events").toBeGreaterThan(0);
+
+    const mockSSEShapes = mockEvents.map((e) => ({
+      type: e.type,
+      dataShape: extractShape(e.data),
+    }));
+
+    const diffs = compareSSESequences(sdkEvents, realStream.events, mockSSEShapes);
+    const report = formatDriftReport("Anthropic Claude (streaming tool call events)", diffs);
+
+    if (shouldFail(diffs)) {
+      expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+    }
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Canary: detect when Anthropic adds new capabilities
+// ---------------------------------------------------------------------------
+
+describe.skipIf(!ANTHROPIC_API_KEY)("Anthropic capability canaries", () => {
+  it("canary: detect WebSocket API", async () => {
+    // Anthropic doesn't have a WebSocket API as of 2026-03.
+    // If they add one, this test will detect it via upgrade headers.
+    const res = await fetch("https://api.anthropic.com/v1/messages", {
+      method: "OPTIONS",
+      headers: {
+        "x-api-key": ANTHROPIC_API_KEY ?? "",
+        "anthropic-version": "2023-06-01",
+      },
+    });
+    // If Anthropic adds WebSocket support, they'll likely add upgrade headers
+    const upgradeHeader = res.headers.get("upgrade");
+    if (upgradeHeader) {
+      console.warn("[CANARY] Anthropic may now support WebSocket upgrade. Investigate.");
+    }
+    expect(true).toBe(true); // canary always passes
+  });
+
+  it("canary: detect embeddings API", async () => {
+    // Anthropic doesn't have an embeddings API as of 2026-03.
+    const res = await fetch("https://api.anthropic.com/v1/embeddings", {
+      method: "POST",
+      headers: {
+        "x-api-key": ANTHROPIC_API_KEY ?? "",
+        "anthropic-version": "2023-06-01",
+        "content-type": "application/json",
+      },
+      body: JSON.stringify({ model: "claude-3-5-sonnet-20241022", input: "test" }),
+    });
+    // If they add it, we'd get a 200 or 400 (bad request format) instead of 404
+    if (res.status !== 404) {
+      console.warn(`[CANARY] Anthropic /v1/embeddings returned ${res.status}. May now exist.`);
+    }
+    expect(true).toBe(true);
+  });
+});
diff --git a/src/__tests__/drift/bedrock-stream.drift.ts b/src/__tests__/drift/bedrock-stream.drift.ts
new file mode 100644
index 0000000..01e0750
--- /dev/null
+++ b/src/__tests__/drift/bedrock-stream.drift.ts
@@ -0,0 +1,145 @@
+/**
+ * AWS Bedrock drift tests.
+ *
+ * Three-way comparison: SDK types x real API x llmock output.
+ * Covers invoke-with-response-stream and converse endpoints.
+ */
+
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+import type { ServerInstance } from "../../server.js";
+import { extractShape, triangulate, formatDriftReport, shouldFail } from "./schema.js";
+import { httpPost, startDriftServer, stopDriftServer } from "./helpers.js";
+
+// ---------------------------------------------------------------------------
+// Credentials check
+// ---------------------------------------------------------------------------
+
+const HAS_CREDENTIALS =
+  !!process.env.AWS_ACCESS_KEY_ID &&
+  !!process.env.AWS_SECRET_ACCESS_KEY &&
+  !!process.env.AWS_REGION;
+
+// ---------------------------------------------------------------------------
+// Server lifecycle
+// ---------------------------------------------------------------------------
+
+let instance: ServerInstance;
+
+beforeAll(async () => {
+  instance = await startDriftServer();
+});
+
+afterAll(async () => {
+  await stopDriftServer(instance);
+});
+
+// ---------------------------------------------------------------------------
+// SDK shape stubs
+// ---------------------------------------------------------------------------
+
+/**
+ * Minimal Bedrock InvokeModel response shape.
+ * Bedrock wraps the model output in its own envelope.
+ */
+function bedrockInvokeResponseShape() {
+  return extractShape({
+    body: "base64-encoded-string",
+    contentType: "application/json",
+    $metadata: {
+      httpStatusCode: 200,
+      requestId: "req-abc",
+    },
+  });
+}
+
+/**
+ * Minimal Bedrock Converse response shape.
+ */
+function bedrockConverseResponseShape() {
+  return extractShape({
+    output: {
+      message: {
+        role: "assistant",
+        content: [{ text: "Hello!" }],
+      },
+    },
+    stopReason: "end_turn",
+    usage: {
+      inputTokens: 10,
+      outputTokens: 5,
+      totalTokens: 15,
+    },
+    metrics: {
+      latencyMs: 100,
+    },
+    $metadata: {
+      httpStatusCode: 200,
+      requestId: "req-abc",
+    },
+  });
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe.skipIf(!HAS_CREDENTIALS)("Bedrock drift", () => {
+  it("invoke-with-response-stream mock shape is plausible", async () => {
+    const sdkShape = bedrockInvokeResponseShape();
+
+    // Bedrock streaming uses binary event-stream framing, so we test the
+    // mock's JSON response shape for the non-streaming invoke endpoint.
+    const mockRes = await httpPost(
+      `${instance.url}/model/anthropic.claude-3-haiku-20240307-v1:0/invoke`,
+      {
+        anthropic_version: "bedrock-2023-05-31",
+        max_tokens: 10,
+        messages: [{ role: "user", content: "Say hello" }],
+      },
+    );
+
+    expect(mockRes.status).toBe(200);
+
+    // When real AWS credentials are available, send the same request to
+    // the real Bedrock API and compare shapes. For now, validate mock
+    // against the SDK shape as both real and expected.
+    if (mockRes.status === 200) {
+      const mockShape = extractShape(JSON.parse(mockRes.body));
+      const diffs = triangulate(sdkShape, sdkShape, mockShape);
+      const report = formatDriftReport("Bedrock Invoke", diffs);
+
+      if (shouldFail(diffs)) {
+        expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+      }
+    }
+  });
+
+  it("converse mock shape matches SDK expectations", async () => {
+    const sdkShape = bedrockConverseResponseShape();
+
+    const mockRes = await httpPost(
+      `${instance.url}/model/anthropic.claude-3-haiku-20240307-v1:0/converse`,
+      {
+        messages: [
+          {
+            role: "user",
+            content: [{ text: "Say hello" }],
+          },
+        ],
+        inferenceConfig: { maxTokens: 10 },
+      },
+    );
+
+    expect(mockRes.status).toBe(200);
+
+    if (mockRes.status === 200) {
+      const mockShape = extractShape(JSON.parse(mockRes.body));
+      const diffs = triangulate(sdkShape, sdkShape, mockShape);
+      const report = formatDriftReport("Bedrock Converse", diffs);
+
+      if (shouldFail(diffs)) {
+        expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+      }
+    }
+  });
+});
diff --git a/src/__tests__/drift/cohere.drift.ts b/src/__tests__/drift/cohere.drift.ts
new file mode 100644
index 0000000..a4a2beb
--- /dev/null
+++ b/src/__tests__/drift/cohere.drift.ts
@@ -0,0 +1,213 @@
+/**
+ * Cohere drift tests.
+ *
+ * Three-way comparison: expected shape x real API x llmock output.
+ * Covers /v2/chat non-streaming and streaming endpoints.
+ *
+ * Requires: COHERE_API_KEY
+ */
+
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+import type { ServerInstance } from "../../server.js";
+import { extractShape, triangulate, formatDriftReport, shouldFail } from "./schema.js";
+import { httpPost, parseDataOnlySSE, startDriftServer, stopDriftServer } from "./helpers.js";
+
+// ---------------------------------------------------------------------------
+// Credentials check
+// ---------------------------------------------------------------------------
+
+const COHERE_API_KEY = process.env.COHERE_API_KEY;
+const HAS_CREDENTIALS = !!COHERE_API_KEY;
+
+// ---------------------------------------------------------------------------
+// Server lifecycle
+// ---------------------------------------------------------------------------
+
+let instance: ServerInstance;
+
+beforeAll(async () => {
+  instance = await startDriftServer();
+});
+
+afterAll(async () => {
+  await stopDriftServer(instance);
+});
+
+// ---------------------------------------------------------------------------
+// SDK shape stubs
+// ---------------------------------------------------------------------------
+
+/**
+ * Minimal Cohere /v2/chat response shape (non-streaming).
+ */
+function cohereChatResponseShape() {
+  return extractShape({
+    id: "chat-abc123",
+    finish_reason: "COMPLETE",
+    message: {
+      role: "assistant",
+      content: [{ type: "text", text: "Hello!" }],
+    },
+    usage: {
+      billed_units: {
+        input_tokens: 10,
+        output_tokens: 5,
+      },
+      tokens: {
+        input_tokens: 10,
+        output_tokens: 5,
+      },
+    },
+  });
+}
+
+/**
+ * Minimal Cohere /v2/chat streaming chunk shape.
+ */
+function cohereChatStreamChunkShape() {
+  return extractShape({
+    id: "chat-abc123",
+    type: "content-delta",
+    delta: {
+      message: {
+        content: { text: "Hel" },
+      },
+    },
+  });
+}
+
+// ---------------------------------------------------------------------------
+// Real API helpers
+// ---------------------------------------------------------------------------
+
+async function cohereChatNonStreaming(
+  messages: { role: string; content: string }[],
+): Promise<{ status: number; body: string }> {
+  const res = await fetch("https://api.cohere.com/v2/chat", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      Authorization: `Bearer ${COHERE_API_KEY}`,
+    },
+    body: JSON.stringify({
+      model: "command-r-plus",
+      messages,
+      stream: false,
+      max_tokens: 10,
+    }),
+  });
+  return { status: res.status, body: await res.text() };
+}
+
+async function cohereChatStreaming(
+  messages: { role: string; content: string }[],
+): Promise<{ status: number; body: string }> {
+  const res = await fetch("https://api.cohere.com/v2/chat", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      Authorization: `Bearer ${COHERE_API_KEY}`,
+    },
+    body: JSON.stringify({
+      model: "command-r-plus",
+      messages,
+      stream: true,
+      max_tokens: 10,
+    }),
+  });
+  return { status: res.status, body: await res.text() };
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe.skipIf(!HAS_CREDENTIALS)("Cohere drift", () => {
+  it("non-streaming /v2/chat shape matches", async () => {
+    const sdkShape = cohereChatResponseShape();
+    const messages = [{ role: "user", content: "Say hello" }];
+
+    const [realRes, mockRes] = await Promise.all([
+      cohereChatNonStreaming(messages),
+      httpPost(`${instance.url}/v2/chat`, {
+        model: "command-r-plus",
+        messages,
+        stream: false,
+      }),
+    ]);
+
+    expect(realRes.status).toBe(200);
+    expect(mockRes.status).toBeLessThan(500);
+
+    if (mockRes.status === 200) {
+      const realShape = extractShape(JSON.parse(realRes.body));
+      const mockShape = extractShape(JSON.parse(mockRes.body));
+
+      const diffs = triangulate(sdkShape, realShape, mockShape);
+      const report = formatDriftReport("Cohere /v2/chat (non-streaming)", diffs);
+
+      if (shouldFail(diffs)) {
+        expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+      }
+    }
+  });
+
+  it("streaming /v2/chat shape matches", async () => {
+    const sdkChunkShape = cohereChatStreamChunkShape();
+    const messages = [{ role: "user", content: "Say hello" }];
+
+    const [realRes, mockRes] = await Promise.all([
+      cohereChatStreaming(messages),
+      httpPost(`${instance.url}/v2/chat`, {
+        model: "command-r-plus",
+        messages,
+        stream: true,
+      }),
+    ]);
+
+    expect(realRes.status).toBe(200);
+    expect(mockRes.status).toBeLessThan(500);
+
+    if (mockRes.status === 200) {
+      // Parse SSE chunks from both responses
+      const realChunks = parseDataOnlySSE(realRes.body);
+      const mockChunks = parseDataOnlySSE(mockRes.body);
+
+      if (realChunks.length > 0 && mockChunks.length > 0) {
+        // Compare first chunk shape (content-delta)
+        const realChunkShape = extractShape(realChunks[0]);
+        const mockChunkShape = extractShape(mockChunks[0]);
+
+        const diffs = triangulate(sdkChunkShape, realChunkShape, mockChunkShape);
+        const report = formatDriftReport("Cohere /v2/chat (streaming first chunk)", diffs);
+
+        if (shouldFail(diffs)) {
+          expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+        }
+
+        // Also compare the LAST chunk shape (has finish_reason, usage)
+        const sdkLastChunkShape = extractShape({
+          id: "chat-abc123",
+          type: "message-end",
+          delta: {
+            finish_reason: "COMPLETE",
+            usage: {
+              billed_units: { input_tokens: 10, output_tokens: 5 },
+              tokens: { input_tokens: 10, output_tokens: 5 },
+            },
+          },
+        });
+
+        const realLastShape = extractShape(realChunks[realChunks.length - 1]);
+        const mockLastShape = extractShape(mockChunks[mockChunks.length - 1]);
+
+        const lastDiffs = triangulate(sdkLastChunkShape, realLastShape, mockLastShape);
+        const lastReport = formatDriftReport("Cohere /v2/chat (streaming last chunk)", lastDiffs);
+
+        if (shouldFail(lastDiffs)) {
+          expect.soft([], lastReport).toEqual(lastDiffs.filter((d) => d.severity === "critical"));
+        }
+      }
+    }
+  });
+});
diff --git a/src/__tests__/drift/gemini.drift.ts b/src/__tests__/drift/gemini.drift.ts
new file mode 100644
index 0000000..874ef03
--- /dev/null
+++ b/src/__tests__/drift/gemini.drift.ts
@@ -0,0 +1,216 @@
+/**
+ * Google Gemini GenerateContent API drift tests.
+ *
+ * Three-way comparison: SDK types × real API × llmock output.
+ */
+
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+import type { ServerInstance } from "../../server.js";
+import { extractShape, triangulate, formatDriftReport, shouldFail } from "./schema.js";
+import {
+  geminiContentResponseShape,
+  geminiToolCallResponseShape,
+  geminiStreamChunkShape,
+  geminiStreamLastChunkShape,
+} from "./sdk-shapes.js";
+import { geminiNonStreaming, geminiStreaming } from "./providers.js";
+import { httpPost, parseDataOnlySSE, startDriftServer, stopDriftServer } from "./helpers.js";
+
+// ---------------------------------------------------------------------------
+// Server lifecycle
+// ---------------------------------------------------------------------------
+
+let instance: ServerInstance;
+const GOOGLE_API_KEY = process.env.GOOGLE_API_KEY;
+
+beforeAll(async () => {
+  instance = await startDriftServer();
+});
+
+afterAll(async () => {
+  await stopDriftServer(instance);
+});
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe.skipIf(!GOOGLE_API_KEY)("Google Gemini drift", () => {
+  const config = { apiKey: GOOGLE_API_KEY! };
+
+  it("non-streaming text shape matches", async () => {
+    const sdkShape = geminiContentResponseShape();
+
+    const [realRes, mockRes] = await Promise.all([
+      geminiNonStreaming(config, [{ role: "user", parts: [{ text: "Say hello" }] }]),
+      httpPost(`${instance.url}/v1beta/models/gemini-2.5-flash:generateContent`, {
+        contents: [{ role: "user", parts: [{ text: "Say hello" }] }],
+      }),
+    ]);
+
+    const realShape = extractShape(realRes.body);
+    const mockShape = extractShape(JSON.parse(mockRes.body));
+
+    const diffs = triangulate(sdkShape, realShape, mockShape);
+    const report = formatDriftReport("Gemini (non-streaming text)", diffs);
+
+    if (shouldFail(diffs)) {
+      expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+    }
+  });
+
+  it("streaming text shape matches", async () => {
+    const sdkChunkShape = geminiStreamChunkShape();
+    const sdkLastShape = geminiStreamLastChunkShape();
+
+    const [realStream, mockStreamRes] = await Promise.all([
+      geminiStreaming(config, [{ role: "user", parts: [{ text: "Say hello" }] }]),
+      httpPost(`${instance.url}/v1beta/models/gemini-2.5-flash:streamGenerateContent`, {
+        contents: [{ role: "user", parts: [{ text: "Say hello" }] }],
+      }),
+    ]);
+
+    const mockChunks = parseDataOnlySSE(mockStreamRes.body);
+
+    expect(realStream.rawEvents.length, "Real API returned no SSE events").toBeGreaterThan(0);
+    expect(mockChunks.length, "Mock returned no SSE chunks").toBeGreaterThan(0);
+
+    // Compare intermediate chunks (if multiple exist)
+    if (realStream.rawEvents.length > 1 && mockChunks.length > 1) {
+      const realChunkShape = extractShape(realStream.rawEvents[0].data);
+      const mockChunkShape = extractShape(mockChunks[0]);
+
+      const diffs = triangulate(sdkChunkShape, realChunkShape, mockChunkShape);
+      const report = formatDriftReport("Gemini (streaming intermediate chunk)", diffs);
+
+      if (shouldFail(diffs)) {
+        expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+      }
+    }
+
+    // Compare last chunk
+    const realLastShape = extractShape(realStream.rawEvents[realStream.rawEvents.length - 1].data);
+    const mockLastShape = extractShape(mockChunks[mockChunks.length - 1]);
+
+    const lastDiffs = triangulate(sdkLastShape, realLastShape, mockLastShape);
+    const lastReport = formatDriftReport("Gemini (streaming last chunk)", lastDiffs);
+
+    if (shouldFail(lastDiffs)) {
+      expect.soft([], lastReport).toEqual(lastDiffs.filter((d) => d.severity === "critical"));
+    }
+  });
+
+  it("non-streaming tool call shape matches", async () => {
+    const sdkShape = geminiToolCallResponseShape();
+
+    const tools = [
+      {
+        functionDeclarations: [
+          {
+            name: "get_weather",
+            description: "Get weather",
+            parameters: {
+              type: "OBJECT",
+              properties: {
+                city: { type: "STRING" },
+              },
+              required: ["city"],
+            },
+          },
+        ],
+      },
+    ];
+
+    const [realRes, mockRes] = await Promise.all([
+      geminiNonStreaming(config, [{ role: "user", parts: [{ text: "Weather in Paris" }] }], tools),
+      httpPost(`${instance.url}/v1beta/models/gemini-2.5-flash:generateContent`, {
+        contents: [{ role: "user", parts: [{ text: "Weather in Paris" }] }],
+        tools,
+      }),
+    ]);
+
+    const realShape = extractShape(realRes.body);
+    const mockShape = extractShape(JSON.parse(mockRes.body));
+
+    const diffs = triangulate(sdkShape, realShape, mockShape);
+    const report = formatDriftReport("Gemini (non-streaming tool call)", diffs);
+
+    if (shouldFail(diffs)) {
+      expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+    }
+  });
+
+  it("streaming tool call shape matches", async () => {
+    const sdkLastShape = geminiStreamLastChunkShape();
+
+    const tools = [
+      {
+        functionDeclarations: [
+          {
+            name: "get_weather",
+            description: "Get weather",
+            parameters: {
+              type: "OBJECT",
+              properties: {
+                city: { type: "STRING" },
+              },
+              required: ["city"],
+            },
+          },
+        ],
+      },
+    ];
+
+    const [realStream, mockStreamRes] = await Promise.all([
+      geminiStreaming(config, [{ role: "user", parts: [{ text: "Weather in Paris" }] }], tools),
+      httpPost(`${instance.url}/v1beta/models/gemini-2.5-flash:streamGenerateContent`, {
+        contents: [{ role: "user", parts: [{ text: "Weather in Paris" }] }],
+        tools,
+      }),
+    ]);
+
+    const mockChunks = parseDataOnlySSE(mockStreamRes.body);
+
+    expect(realStream.rawEvents.length, "Real API returned no SSE events").toBeGreaterThan(0);
+    expect(mockChunks.length, "Mock returned no SSE chunks").toBeGreaterThan(0);
+
+    const realLastShape = extractShape(realStream.rawEvents[realStream.rawEvents.length - 1].data);
+    const mockLastShape = extractShape(mockChunks[mockChunks.length - 1]);
+
+    const diffs = triangulate(sdkLastShape, realLastShape, mockLastShape);
+    const report = formatDriftReport("Gemini (streaming tool call)", diffs);
+
+    if (shouldFail(diffs)) {
+      expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+    }
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Canary: track Gemini Embeddings API shape
+// ---------------------------------------------------------------------------
+
+describe.skipIf(!GOOGLE_API_KEY)("Gemini Embeddings canary", () => {
+  it("canary: verify embeddings endpoint exists and response shape", async () => {
+    const res = await fetch(
+      `https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:embedContent?key=${GOOGLE_API_KEY}`,
+      {
+        method: "POST",
+        headers: { "content-type": "application/json" },
+        body: JSON.stringify({ content: { parts: [{ text: "test" }] } }),
+      },
+    );
+    if (res.status === 200) {
+      const body = (await res.json()) as Record<string, unknown>;
+      // Log the shape so drift is visible in CI output
+      console.log("[CANARY] Gemini Embeddings response keys:", Object.keys(body));
+      const embedding = body.embedding as { values?: unknown[] } | undefined;
+      if (embedding?.values) {
+        console.log("[CANARY] Gemini Embeddings dimension:", embedding.values.length);
+      }
+    } else {
+      console.warn(`[CANARY] Gemini Embeddings returned ${res.status}`);
+    }
+    expect(true).toBe(true);
+  });
+});
diff --git a/src/__tests__/drift/helpers.ts b/src/__tests__/drift/helpers.ts
new file mode 100644
index 0000000..048627f
--- /dev/null
+++ b/src/__tests__/drift/helpers.ts
@@ -0,0 +1,183 @@
+/**
+ * Shared test helpers for drift detection test files.
+ *
+ * Provides httpPost, SSE parsers (for mock server output), common
+ * fixtures, and server lifecycle management used by all provider-specific
+ * drift test files.
+ */
+
+/* eslint-disable @typescript-eslint/no-explicit-any */
+import http from "node:http";
+import { createServer, type ServerInstance } from "../../server.js";
+import type { Fixture } from "../../types.js";
+import type { WSTestClient } from "../ws-test-client.js";
+import { extractShape, type SSEEventShape } from "./schema.js";
+
+import { classifyGeminiMessage } from "./ws-providers.js";
+
+export { classifyGeminiMessage };
+
+// ---------------------------------------------------------------------------
+// HTTP helpers
+// ---------------------------------------------------------------------------
+
+export async function httpPost(
+  url: string,
+  body: object,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const req = http.request(
+      url,
+      {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c) => chunks.push(c));
+        res.on("end", () =>
+          resolve({
+            status: res.statusCode!,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          }),
+        );
+      },
+    );
+    req.on("error", reject);
+    req.write(JSON.stringify(body));
+    req.end();
+  });
+}
+
+// ---------------------------------------------------------------------------
+// SSE parsers
+// ---------------------------------------------------------------------------
+
+/** Parse data-only SSE blocks (OpenAI Chat Completions, Gemini). */
+export function parseDataOnlySSE(body: string): object[] {
+  return body
+    .split("\n\n")
+    .filter((block) => block.startsWith("data: ") && !block.includes("[DONE]"))
+    .map((block) => JSON.parse(block.slice(6)));
+}
+
+/** Parse typed SSE blocks with event: + data: (Anthropic, OpenAI Responses). */
+export function parseTypedSSE(body: string): { type: string; data: Record<string, any> }[] {
+  return body
+    .split("\n\n")
+    .filter((block) => block.includes("event: ") && block.includes("data: "))
+    .map((block) => {
+      const eventMatch = block.match(/^event: (.+)$/m);
+      const dataMatch = block.match(/^data: (.+)$/m);
+      return {
+        type: eventMatch![1],
+        data: JSON.parse(dataMatch![1]),
+      };
+    });
+}
+
+// ---------------------------------------------------------------------------
+// Common fixtures
+// ---------------------------------------------------------------------------
+
+export const TEXT_FIXTURE: Fixture = {
+  match: { userMessage: "Say hello" },
+  response: { content: "Hello!" },
+};
+
+export const TOOL_FIXTURE: Fixture = {
+  match: { userMessage: "Weather in Paris" },
+  response: {
+    toolCalls: [{ name: "get_weather", arguments: '{"city":"Paris"}' }],
+  },
+};
+
+// ---------------------------------------------------------------------------
+// Server lifecycle
+// ---------------------------------------------------------------------------
+
+export async function startDriftServer(): Promise<ServerInstance> {
+  return createServer([TEXT_FIXTURE, TOOL_FIXTURE], {
+    port: 0,
+    chunkSize: 100,
+  });
+}
+
+export async function stopDriftServer(instance: ServerInstance): Promise<void> {
+  await new Promise<void>((r) => instance.server.close(() => r()));
+}
+
+// ---------------------------------------------------------------------------
+// WebSocket helpers
+// ---------------------------------------------------------------------------
+
+export const GEMINI_WS_PATH =
+  "/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent";
+
+/**
+ * Collect mock WS messages until a terminal predicate fires.
+ *
+ * Uses a polling loop on waitForMessages() since ws-test-client doesn't
+ * support predicate-based collection. The `skip` parameter tells us how
+ * many messages have already been consumed so we don't re-read them.
+ *
+ * Throws if the terminal predicate never fires before the timeout expires.
+ */
+export async function collectMockWSMessages(
+  client: WSTestClient,
+  terminal: (msg: unknown) => boolean,
+  timeoutMs = 15000,
+  skip = 0,
+): Promise<{ events: SSEEventShape[]; rawMessages: unknown[] }> {
+  const rawMessages: unknown[] = [];
+  const deadline = Date.now() + timeoutMs;
+  let count = skip;
+  let terminated = false;
+
+  while (Date.now() < deadline) {
+    const nextCount = count + 1;
+    let msgs: string[];
+    try {
+      msgs = await client.waitForMessages(nextCount, Math.min(2000, deadline - Date.now()));
+    } catch (e: unknown) {
+      // Only suppress waitForMessages timeout — rethrow anything else
+      if (e instanceof Error && e.message.includes("Timeout waiting for")) {
+        if (Date.now() >= deadline) break;
+        continue;
+      }
+      throw e;
+    }
+    // Only increment count after successful receipt
+    count = nextCount;
+    const latest = msgs[count - 1];
+    let parsed: unknown;
+    try {
+      parsed = typeof latest === "string" ? JSON.parse(latest) : latest;
+    } catch {
+      throw new Error(
+        `collectMockWSMessages: failed to parse message ${count}: ${String(latest).slice(0, 200)}`,
+      );
+    }
+    rawMessages.push(parsed);
+    if (terminal(parsed)) {
+      terminated = true;
+      break;
+    }
+  }
+
+  if (!terminated) {
+    throw new Error(
+      `collectMockWSMessages timed out after ${timeoutMs}ms without terminal message. ` +
+        `Collected ${rawMessages.length} messages.`,
+    );
+  }
+
+  const events: SSEEventShape[] = rawMessages.map((msg) => {
+    const m = msg as Record<string, any>;
+    const type = m.type ?? classifyGeminiMessage(m as Record<string, unknown>);
+    return { type, dataShape: extractShape(msg) };
+  });
+
+  return { events, rawMessages };
+}
diff --git a/src/__tests__/drift/models.drift.ts b/src/__tests__/drift/models.drift.ts
new file mode 100644
index 0000000..22de924
--- /dev/null
+++ b/src/__tests__/drift/models.drift.ts
@@ -0,0 +1,103 @@
+/**
+ * Model deprecation checks — verify that models referenced in llmock's
+ * tests, docs, and examples still exist at each provider.
+ */
+
+import { describe, it, expect } from "vitest";
+import * as fs from "node:fs";
+import * as path from "node:path";
+import { listOpenAIModels, listAnthropicModels, listGeminiModels } from "./providers.js";
+
+// ---------------------------------------------------------------------------
+// Scrape referenced models from the codebase
+// ---------------------------------------------------------------------------
+
+const PROJECT_ROOT = path.resolve(import.meta.dirname, "..", "..", "..");
+
+function scrapeModels(pattern: RegExp, files: string[]): string[] {
+  const models = new Set<string>();
+  for (const file of files) {
+    const filePath = path.join(PROJECT_ROOT, file);
+    if (!fs.existsSync(filePath)) continue;
+    const content = fs.readFileSync(filePath, "utf-8");
+    pattern.lastIndex = 0;
+    let match;
+    while ((match = pattern.exec(content)) !== null) {
+      models.add(match[1]);
+    }
+  }
+  return [...models];
+}
+
+const sourceFiles = [
+  "src/__tests__/api-conformance.test.ts",
+  "src/__tests__/ws-api-conformance.test.ts",
+  "README.md",
+  "fixtures/example-greeting.json",
+  "fixtures/example-multi-turn.json",
+  "fixtures/example-tool-call.json",
+];
+
+// ---------------------------------------------------------------------------
+// OpenAI
+// ---------------------------------------------------------------------------
+
+describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI model availability", () => {
+  it("models used in llmock tests are still available", async () => {
+    const models = await listOpenAIModels(process.env.OPENAI_API_KEY!);
+    const referenced = scrapeModels(/\b(gpt-4o(?:-mini)?|gpt-4|gpt-3\.5-turbo)\b/g, sourceFiles);
+
+    if (referenced.length === 0) return; // no models found to check
+
+    for (const m of referenced) {
+      // OpenAI model list may include versioned variants — check prefix match
+      const found = models.some((available) => available === m || available.startsWith(`${m}-`));
+      expect(found, `Model ${m} no longer available at OpenAI`).toBe(true);
+    }
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Anthropic
+// ---------------------------------------------------------------------------
+
+describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic model availability", () => {
+  it("models used in llmock tests are still available", async () => {
+    const models = await listAnthropicModels(process.env.ANTHROPIC_API_KEY!);
+    const referenced = scrapeModels(
+      /\b(claude-3(?:\.\d+)?-(?:opus|sonnet|haiku)(?:-\d{8})?)\b/g,
+      sourceFiles,
+    );
+
+    if (referenced.length === 0) return;
+
+    for (const m of referenced) {
+      const found = models.some((available) => available === m || available.startsWith(m));
+      expect(found, `Model ${m} no longer available at Anthropic`).toBe(true);
+    }
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Gemini
+// ---------------------------------------------------------------------------
+
+describe.skipIf(!process.env.GOOGLE_API_KEY)("Gemini model availability", () => {
+  it("models used in llmock tests are still available", async () => {
+    const models = await listGeminiModels(process.env.GOOGLE_API_KEY!);
+    const referenced = scrapeModels(/\b(gemini-(?:[\w.-]+))\b/g, sourceFiles);
+
+    if (referenced.length === 0) return;
+
+    // Skip experimental models, live-only models, and anchor-link fragments
+    // scraped from markdown (e.g., "gemini-live-bidigeneratecontent")
+    const stable = referenced.filter(
+      (m) => !m.includes("-exp") && !m.includes("-live") && !m.includes("bidigeneratecontent"),
+    );
+
+    for (const m of stable) {
+      const found = models.some((available) => available === m || available.startsWith(m));
+      expect(found, `Model ${m} no longer available at Gemini`).toBe(true);
+    }
+  });
+});
diff --git a/src/__tests__/drift/ollama.drift.ts b/src/__tests__/drift/ollama.drift.ts
new file mode 100644
index 0000000..4e0114e
--- /dev/null
+++ b/src/__tests__/drift/ollama.drift.ts
@@ -0,0 +1,219 @@
+/**
+ * Ollama drift tests.
+ *
+ * Compares llmock's Ollama endpoint output shapes against a real local
+ * Ollama instance. Skips automatically if Ollama is not reachable.
+ *
+ * Requires: local Ollama running at http://localhost:11434
+ */
+
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+import type { ServerInstance } from "../../server.js";
+import { extractShape, triangulate, formatDriftReport, shouldFail } from "./schema.js";
+import { httpPost, startDriftServer, stopDriftServer } from "./helpers.js";
+
+// ---------------------------------------------------------------------------
+// Connectivity check
+// ---------------------------------------------------------------------------
+
+let OLLAMA_REACHABLE = false;
+
+async function checkOllamaConnectivity(): Promise<boolean> {
+  try {
+    const res = await fetch("http://localhost:11434/api/tags", {
+      signal: AbortSignal.timeout(3000),
+    });
+    return res.ok;
+  } catch {
+    return false;
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Server lifecycle
+// ---------------------------------------------------------------------------
+
+let instance: ServerInstance;
+
+beforeAll(async () => {
+  OLLAMA_REACHABLE = await checkOllamaConnectivity();
+  instance = await startDriftServer();
+});
+
+afterAll(async () => {
+  await stopDriftServer(instance);
+});
+
+// ---------------------------------------------------------------------------
+// SDK shape stubs
+// ---------------------------------------------------------------------------
+
+/**
+ * Minimal Ollama /api/chat response shape (non-streaming final message).
+ */
+function ollamaChatResponseShape() {
+  return extractShape({
+    model: "llama3.2",
+    created_at: "2024-01-01T00:00:00Z",
+    message: {
+      role: "assistant",
+      content: "Hello!",
+    },
+    done: true,
+    done_reason: "stop",
+    total_duration: 1000000,
+    load_duration: 100000,
+    prompt_eval_count: 10,
+    prompt_eval_duration: 500000,
+    eval_count: 5,
+    eval_duration: 400000,
+  });
+}
+
+/**
+ * Minimal Ollama /api/generate response shape (non-streaming).
+ */
+function ollamaGenerateResponseShape() {
+  return extractShape({
+    model: "llama3.2",
+    created_at: "2024-01-01T00:00:00Z",
+    response: "Hello!",
+    done: true,
+    done_reason: "stop",
+    total_duration: 1000000,
+    load_duration: 100000,
+    prompt_eval_count: 10,
+    prompt_eval_duration: 500000,
+    eval_count: 5,
+    eval_duration: 400000,
+  });
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+// ---------------------------------------------------------------------------
+// Streaming shape stubs
+// ---------------------------------------------------------------------------
+
+/**
+ * Minimal Ollama /api/chat streaming chunk shape (non-final).
+ */
+function ollamaChatStreamChunkShape() {
+  return extractShape({
+    model: "llama3.2",
+    created_at: "2024-01-01T00:00:00Z",
+    message: {
+      role: "assistant",
+      content: "H",
+    },
+    done: false,
+  });
+}
+
+function parseNDJSON(body: string): object[] {
+  return body
+    .split("\n")
+    .filter((line) => line.trim() !== "")
+    .map((line) => JSON.parse(line) as object);
+}
+
+describe.skipIf(!OLLAMA_REACHABLE)("Ollama drift", () => {
+  it("/api/chat response shape matches", async () => {
+    const sdkShape = ollamaChatResponseShape();
+
+    const body = {
+      model: "llama3.2",
+      messages: [{ role: "user", content: "Say hello" }],
+      stream: false,
+    };
+
+    const [realRes, mockRes] = await Promise.all([
+      httpPost("http://localhost:11434/api/chat", body),
+      httpPost(`${instance.url}/api/chat`, body),
+    ]);
+
+    expect(realRes.status).toBe(200);
+    expect(mockRes.status).toBeLessThan(500);
+
+    if (mockRes.status === 200) {
+      const realShape = extractShape(JSON.parse(realRes.body));
+      const mockShape = extractShape(JSON.parse(mockRes.body));
+
+      const diffs = triangulate(sdkShape, realShape, mockShape);
+      const report = formatDriftReport("Ollama /api/chat", diffs);
+
+      if (shouldFail(diffs)) {
+        expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+      }
+    }
+  });
+
+  it("/api/chat streaming NDJSON chunk shapes match", async () => {
+    const sdkChunkShape = ollamaChatStreamChunkShape();
+
+    const body = {
+      model: "llama3.2",
+      messages: [{ role: "user", content: "Say hello" }],
+      stream: true,
+    };
+
+    const [realRes, mockRes] = await Promise.all([
+      httpPost("http://localhost:11434/api/chat", body),
+      httpPost(`${instance.url}/api/chat`, body),
+    ]);
+
+    expect(realRes.status).toBe(200);
+    expect(mockRes.status).toBeLessThan(500);
+
+    if (mockRes.status === 200) {
+      const realChunks = parseNDJSON(realRes.body);
+      const mockChunks = parseNDJSON(mockRes.body);
+
+      expect(realChunks.length).toBeGreaterThan(0);
+      expect(mockChunks.length).toBeGreaterThan(0);
+
+      // Compare first (non-final) chunk shapes
+      const realFirstShape = extractShape(realChunks[0]);
+      const mockFirstShape = extractShape(mockChunks[0]);
+
+      const diffs = triangulate(sdkChunkShape, realFirstShape, mockFirstShape);
+      const report = formatDriftReport("Ollama /api/chat (streaming chunk)", diffs);
+
+      if (shouldFail(diffs)) {
+        expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+      }
+    }
+  });
+
+  it("/api/generate response shape matches", async () => {
+    const sdkShape = ollamaGenerateResponseShape();
+
+    const body = {
+      model: "llama3.2",
+      prompt: "Say hello",
+      stream: false,
+    };
+
+    const [realRes, mockRes] = await Promise.all([
+      httpPost("http://localhost:11434/api/generate", body),
+      httpPost(`${instance.url}/api/generate`, body),
+    ]);
+
+    expect(realRes.status).toBe(200);
+    expect(mockRes.status).toBeLessThan(500);
+
+    if (mockRes.status === 200) {
+      const realShape = extractShape(JSON.parse(realRes.body));
+      const mockShape = extractShape(JSON.parse(mockRes.body));
+
+      const diffs = triangulate(sdkShape, realShape, mockShape);
+      const report = formatDriftReport("Ollama /api/generate", diffs);
+
+      if (shouldFail(diffs)) {
+        expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+      }
+    }
+  });
+});
diff --git a/src/__tests__/drift/openai-chat.drift.ts b/src/__tests__/drift/openai-chat.drift.ts
new file mode 100644
index 0000000..1b38bdc
--- /dev/null
+++ b/src/__tests__/drift/openai-chat.drift.ts
@@ -0,0 +1,173 @@
+/**
+ * OpenAI Chat Completions API drift tests.
+ *
+ * Three-way comparison: SDK types × real API × llmock output.
+ */
+
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+import type { ServerInstance } from "../../server.js";
+import { extractShape, triangulate, formatDriftReport, shouldFail } from "./schema.js";
+import {
+  openaiChatCompletionShape,
+  openaiChatCompletionToolCallShape,
+  openaiChatCompletionChunkShape,
+} from "./sdk-shapes.js";
+import { openaiChatNonStreaming, openaiChatStreaming } from "./providers.js";
+import { httpPost, parseDataOnlySSE, startDriftServer, stopDriftServer } from "./helpers.js";
+
+// ---------------------------------------------------------------------------
+// Server lifecycle
+// ---------------------------------------------------------------------------
+
+let instance: ServerInstance;
+const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
+
+beforeAll(async () => {
+  instance = await startDriftServer();
+});
+
+afterAll(async () => {
+  await stopDriftServer(instance);
+});
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe.skipIf(!OPENAI_API_KEY)("OpenAI Chat Completions drift", () => {
+  const config = { apiKey: OPENAI_API_KEY! };
+
+  it("non-streaming text shape matches", async () => {
+    const sdkShape = openaiChatCompletionShape();
+
+    const [realRes, mockRes] = await Promise.all([
+      openaiChatNonStreaming(config, [{ role: "user", content: "Say hello" }]),
+      httpPost(`${instance.url}/v1/chat/completions`, {
+        model: "gpt-4o-mini",
+        messages: [{ role: "user", content: "Say hello" }],
+        stream: false,
+      }),
+    ]);
+
+    const realShape = extractShape(realRes.body);
+    const mockShape = extractShape(JSON.parse(mockRes.body));
+
+    const diffs = triangulate(sdkShape, realShape, mockShape);
+    const report = formatDriftReport("OpenAI Chat (non-streaming text)", diffs);
+
+    if (shouldFail(diffs)) {
+      expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+    }
+  });
+
+  it("streaming text shape matches", async () => {
+    const sdkChunkShape = openaiChatCompletionChunkShape();
+
+    const [realStream, mockStreamRes] = await Promise.all([
+      openaiChatStreaming(config, [{ role: "user", content: "Say hello" }]),
+      httpPost(`${instance.url}/v1/chat/completions`, {
+        model: "gpt-4o-mini",
+        messages: [{ role: "user", content: "Say hello" }],
+        stream: true,
+      }),
+    ]);
+
+    const mockChunks = parseDataOnlySSE(mockStreamRes.body);
+
+    expect(realStream.rawEvents.length, "Real API returned no SSE events").toBeGreaterThan(0);
+    expect(mockChunks.length, "Mock returned no SSE chunks").toBeGreaterThan(0);
+
+    const realChunkShape = extractShape(realStream.rawEvents[0].data);
+    const mockChunkShape = extractShape(mockChunks[0]);
+
+    const diffs = triangulate(sdkChunkShape, realChunkShape, mockChunkShape);
+    const report = formatDriftReport("OpenAI Chat (streaming text chunks)", diffs);
+
+    if (shouldFail(diffs)) {
+      expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+    }
+  });
+
+  it("non-streaming tool call shape matches", async () => {
+    const sdkShape = openaiChatCompletionToolCallShape();
+
+    const tools = [
+      {
+        type: "function",
+        function: {
+          name: "get_weather",
+          description: "Get weather",
+          parameters: {
+            type: "object",
+            properties: { city: { type: "string" } },
+            required: ["city"],
+          },
+        },
+      },
+    ];
+
+    const [realRes, mockRes] = await Promise.all([
+      openaiChatNonStreaming(config, [{ role: "user", content: "Weather in Paris" }], tools),
+      httpPost(`${instance.url}/v1/chat/completions`, {
+        model: "gpt-4o-mini",
+        messages: [{ role: "user", content: "Weather in Paris" }],
+        stream: false,
+        tools,
+      }),
+    ]);
+
+    const realShape = extractShape(realRes.body);
+    const mockShape = extractShape(JSON.parse(mockRes.body));
+
+    const diffs = triangulate(sdkShape, realShape, mockShape);
+    const report = formatDriftReport("OpenAI Chat (non-streaming tool call)", diffs);
+
+    if (shouldFail(diffs)) {
+      expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+    }
+  });
+
+  it("streaming tool call shape matches", async () => {
+    const sdkChunkShape = openaiChatCompletionChunkShape();
+
+    const tools = [
+      {
+        type: "function",
+        function: {
+          name: "get_weather",
+          description: "Get weather",
+          parameters: {
+            type: "object",
+            properties: { city: { type: "string" } },
+            required: ["city"],
+          },
+        },
+      },
+    ];
+
+    const [realStream, mockStreamRes] = await Promise.all([
+      openaiChatStreaming(config, [{ role: "user", content: "Weather in Paris" }], tools),
+      httpPost(`${instance.url}/v1/chat/completions`, {
+        model: "gpt-4o-mini",
+        messages: [{ role: "user", content: "Weather in Paris" }],
+        stream: true,
+        tools,
+      }),
+    ]);
+
+    const mockChunks = parseDataOnlySSE(mockStreamRes.body);
+
+    expect(realStream.rawEvents.length, "Real API returned no SSE events").toBeGreaterThan(0);
+    expect(mockChunks.length, "Mock returned no SSE chunks").toBeGreaterThan(0);
+
+    const realChunkShape = extractShape(realStream.rawEvents[0].data);
+    const mockChunkShape = extractShape(mockChunks[0]);
+
+    const diffs = triangulate(sdkChunkShape, realChunkShape, mockChunkShape);
+    const report = formatDriftReport("OpenAI Chat (streaming tool call chunks)", diffs);
+
+    if (shouldFail(diffs)) {
+      expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+    }
+  });
+});
diff --git a/src/__tests__/drift/openai-embeddings.drift.ts b/src/__tests__/drift/openai-embeddings.drift.ts
new file mode 100644
index 0000000..035ff4e
--- /dev/null
+++ b/src/__tests__/drift/openai-embeddings.drift.ts
@@ -0,0 +1,79 @@
+/**
+ * OpenAI Embeddings API drift tests.
+ *
+ * Three-way comparison: SDK types × real API × llmock output.
+ */
+
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+import type { ServerInstance } from "../../server.js";
+import { extractShape, triangulate, formatDriftReport, shouldFail } from "./schema.js";
+import { openaiEmbeddingResponseShape } from "./sdk-shapes.js";
+import { openaiEmbeddings } from "./providers.js";
+import { httpPost, startDriftServer, stopDriftServer } from "./helpers.js";
+
+// ---------------------------------------------------------------------------
+// Server lifecycle
+// ---------------------------------------------------------------------------
+
+let instance: ServerInstance;
+const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
+
+beforeAll(async () => {
+  instance = await startDriftServer();
+});
+
+afterAll(async () => {
+  await stopDriftServer(instance);
+});
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe.skipIf(!OPENAI_API_KEY)("OpenAI Embeddings drift", () => {
+  const config = { apiKey: OPENAI_API_KEY! };
+
+  it("embedding response shape matches", async () => {
+    const sdkShape = openaiEmbeddingResponseShape();
+
+    const [realRes, mockRes] = await Promise.all([
+      openaiEmbeddings(config, "Hello world"),
+      httpPost(`${instance.url}/v1/embeddings`, {
+        model: "text-embedding-3-small",
+        input: "Hello world",
+      }),
+    ]);
+
+    const realShape = extractShape(realRes.body);
+    const mockShape = extractShape(JSON.parse(mockRes.body));
+
+    const diffs = triangulate(sdkShape, realShape, mockShape);
+    const report = formatDriftReport("OpenAI Embeddings", diffs);
+
+    if (shouldFail(diffs)) {
+      expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+    }
+  });
+
+  it("multiple-input embedding response shape matches", async () => {
+    const sdkShape = openaiEmbeddingResponseShape();
+
+    const [realRes, mockRes] = await Promise.all([
+      openaiEmbeddings(config, ["Hello", "World"]),
+      httpPost(`${instance.url}/v1/embeddings`, {
+        model: "text-embedding-3-small",
+        input: ["Hello", "World"],
+      }),
+    ]);
+
+    const realShape = extractShape(realRes.body);
+    const mockShape = extractShape(JSON.parse(mockRes.body));
+
+    const diffs = triangulate(sdkShape, realShape, mockShape);
+    const report = formatDriftReport("OpenAI Embeddings (multiple inputs)", diffs);
+
+    if (shouldFail(diffs)) {
+      expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+    }
+  });
+});
diff --git a/src/__tests__/drift/openai-responses.drift.ts b/src/__tests__/drift/openai-responses.drift.ts
new file mode 100644
index 0000000..88aa639
--- /dev/null
+++ b/src/__tests__/drift/openai-responses.drift.ts
@@ -0,0 +1,184 @@
+/**
+ * OpenAI Responses API drift tests.
+ *
+ * Three-way comparison: SDK types × real API × llmock output.
+ */
+
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+import type { ServerInstance } from "../../server.js";
+import {
+  extractShape,
+  triangulate,
+  compareSSESequences,
+  formatDriftReport,
+  shouldFail,
+} from "./schema.js";
+import {
+  openaiResponsesNonStreamingShape,
+  openaiResponsesTextEventShapes,
+  openaiResponsesToolCallEventShapes,
+} from "./sdk-shapes.js";
+import { openaiResponsesNonStreaming, openaiResponsesStreaming } from "./providers.js";
+import { httpPost, parseTypedSSE, startDriftServer, stopDriftServer } from "./helpers.js";
+
+// ---------------------------------------------------------------------------
+// Server lifecycle
+// ---------------------------------------------------------------------------
+
+let instance: ServerInstance;
+const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
+
+beforeAll(async () => {
+  instance = await startDriftServer();
+});
+
+afterAll(async () => {
+  await stopDriftServer(instance);
+});
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe.skipIf(!OPENAI_API_KEY)("OpenAI Responses API drift", () => {
+  const config = { apiKey: OPENAI_API_KEY! };
+
+  it("non-streaming text shape matches", async () => {
+    const sdkShape = openaiResponsesNonStreamingShape();
+
+    const [realRes, mockRes] = await Promise.all([
+      openaiResponsesNonStreaming(config, [{ role: "user", content: "Say hello" }]),
+      httpPost(`${instance.url}/v1/responses`, {
+        model: "gpt-4o-mini",
+        input: [{ role: "user", content: "Say hello" }],
+        stream: false,
+      }),
+    ]);
+
+    const realShape = extractShape(realRes.body);
+    const mockShape = extractShape(JSON.parse(mockRes.body));
+
+    const diffs = triangulate(sdkShape, realShape, mockShape);
+    const report = formatDriftReport("OpenAI Responses (non-streaming text)", diffs);
+
+    if (shouldFail(diffs)) {
+      expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+    }
+  });
+
+  it("streaming text event sequence and shapes match", async () => {
+    const sdkEvents = openaiResponsesTextEventShapes();
+
+    const [realStream, mockStreamRes] = await Promise.all([
+      openaiResponsesStreaming(config, [{ role: "user", content: "Say hello" }]),
+      httpPost(`${instance.url}/v1/responses`, {
+        model: "gpt-4o-mini",
+        input: [{ role: "user", content: "Say hello" }],
+        stream: true,
+      }),
+    ]);
+
+    expect(realStream.rawEvents.length, "Real API returned no SSE events").toBeGreaterThan(0);
+
+    const mockEvents = parseTypedSSE(mockStreamRes.body);
+    expect(mockEvents.length, "Mock returned no SSE events").toBeGreaterThan(0);
+
+    const mockSSEShapes = mockEvents.map((e) => ({
+      type: e.type,
+      dataShape: extractShape(e.data),
+    }));
+
+    const diffs = compareSSESequences(sdkEvents, realStream.events, mockSSEShapes);
+    const report = formatDriftReport("OpenAI Responses (streaming text events)", diffs);
+
+    if (shouldFail(diffs)) {
+      expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+    }
+  });
+
+  it("non-streaming tool call shape matches", async () => {
+    const sdkShape = openaiResponsesNonStreamingShape();
+
+    const tools = [
+      {
+        type: "function",
+        name: "get_weather",
+        description: "Get weather",
+        parameters: {
+          type: "object",
+          properties: { city: { type: "string" } },
+          required: ["city"],
+        },
+      },
+    ];
+
+    const [realRes, mockRes] = await Promise.all([
+      openaiResponsesNonStreaming(config, [{ role: "user", content: "Weather in Paris" }], tools),
+      httpPost(`${instance.url}/v1/responses`, {
+        model: "gpt-4o-mini",
+        input: [{ role: "user", content: "Weather in Paris" }],
+        stream: false,
+        tools,
+      }),
+    ]);
+
+    const realShape = extractShape(realRes.body);
+    const mockShape = extractShape(JSON.parse(mockRes.body));
+
+    const diffs = triangulate(sdkShape, realShape, mockShape);
+    const report = formatDriftReport("OpenAI Responses (non-streaming tool call)", diffs);
+
+    if (shouldFail(diffs)) {
+      expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+    }
+  });
+
+  it("streaming tool call event sequence matches", async () => {
+    const sdkEvents = [
+      ...openaiResponsesTextEventShapes().filter(
+        (e) => e.type === "response.created" || e.type === "response.completed",
+      ),
+      ...openaiResponsesToolCallEventShapes(),
+    ];
+
+    const tools = [
+      {
+        type: "function",
+        name: "get_weather",
+        description: "Get weather",
+        parameters: {
+          type: "object",
+          properties: { city: { type: "string" } },
+          required: ["city"],
+        },
+      },
+    ];
+
+    const [realStream, mockStreamRes] = await Promise.all([
+      openaiResponsesStreaming(config, [{ role: "user", content: "Weather in Paris" }], tools),
+      httpPost(`${instance.url}/v1/responses`, {
+        model: "gpt-4o-mini",
+        input: [{ role: "user", content: "Weather in Paris" }],
+        stream: true,
+        tools,
+      }),
+    ]);
+
+    expect(realStream.rawEvents.length, "Real API returned no SSE events").toBeGreaterThan(0);
+
+    const mockEvents = parseTypedSSE(mockStreamRes.body);
+    expect(mockEvents.length, "Mock returned no SSE events").toBeGreaterThan(0);
+
+    const mockSSEShapes = mockEvents.map((e) => ({
+      type: e.type,
+      dataShape: extractShape(e.data),
+    }));
+
+    const diffs = compareSSESequences(sdkEvents, realStream.events, mockSSEShapes);
+    const report = formatDriftReport("OpenAI Responses (streaming tool call events)", diffs);
+
+    if (shouldFail(diffs)) {
+      expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+    }
+  });
+});
diff --git a/src/__tests__/drift/providers.ts b/src/__tests__/drift/providers.ts
new file mode 100644
index 0000000..dafced2
--- /dev/null
+++ b/src/__tests__/drift/providers.ts
@@ -0,0 +1,452 @@
+/**
+ * Raw fetch() clients for real provider APIs.
+ *
+ * Uses fetch directly (no SDKs) to avoid SDK normalization masking real API
+ * quirks. SSE parsing, retry logic, and model listing endpoints.
+ */
+
+import { extractShape, type SSEEventShape } from "./schema.js";
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+interface ProviderConfig {
+  apiKey: string;
+}
+
+interface FetchResult {
+  status: number;
+  body: unknown;
+  raw: string;
+}
+
+interface StreamResult {
+  status: number;
+  events: SSEEventShape[];
+  rawEvents: { type: string; data: unknown }[];
+}
+
+// ---------------------------------------------------------------------------
+// Retry helper
+// ---------------------------------------------------------------------------
+
+const RETRYABLE_STATUSES = new Set([429, 500, 502, 503]);
+
+async function fetchWithRetry(url: string, init: RequestInit, maxRetries = 3): Promise<Response> {
+  let lastError: Error | null = null;
+  for (let attempt = 0; attempt < maxRetries; attempt++) {
+    try {
+      const res = await fetch(url, init);
+      if (RETRYABLE_STATUSES.has(res.status) && attempt < maxRetries - 1) {
+        const backoff = Math.pow(2, attempt) * 1000;
+        await new Promise((r) => setTimeout(r, backoff));
+        continue;
+      }
+      return res;
+    } catch (err) {
+      lastError = err as Error;
+      if (attempt < maxRetries - 1) {
+        const backoff = Math.pow(2, attempt) * 1000;
+        await new Promise((r) => setTimeout(r, backoff));
+      }
+    }
+  }
+  throw lastError ?? new Error("fetch failed after retries");
+}
+
+// ---------------------------------------------------------------------------
+// Response parsing
+// ---------------------------------------------------------------------------
+
+function assertOk(raw: string, status: number, context: string): void {
+  if (status >= 400) {
+    throw new Error(`${context}: API returned ${status}: ${raw.slice(0, 300)}`);
+  }
+}
+
+function parseJsonResponse(raw: string, status: number, context: string): unknown {
+  if (!raw) throw new Error(`${context}: empty response (status ${status})`);
+  assertOk(raw, status, context);
+  try {
+    return JSON.parse(raw);
+  } catch {
+    throw new Error(`${context}: failed to parse JSON (status ${status}): ${raw.slice(0, 200)}`);
+  }
+}
+
+// ---------------------------------------------------------------------------
+// SSE parsing
+// ---------------------------------------------------------------------------
+
+/** Normalize \r\n to \n for SSE parsing (some providers use \r\n) */
+function normalizeLineEndings(text: string): string {
+  return text.replace(/\r\n/g, "\n");
+}
+
+/** Parse data-only SSE (OpenAI Chat Completions, Gemini) */
+function parseDataOnlySSE(text: string): { data: unknown }[] {
+  return normalizeLineEndings(text)
+    .split("\n\n")
+    .filter((block) => block.startsWith("data: ") && !block.includes("[DONE]"))
+    .map((block) => {
+      // Rejoin continuation lines (data split across lines)
+      const json = block
+        .split("\n")
+        .map((line) => (line.startsWith("data: ") ? line.slice(6) : line))
+        .join("");
+      return { data: JSON.parse(json) };
+    });
+}
+
+/** Parse typed SSE (event: + data: format — Responses API, Claude) */
+function parseTypedSSE(text: string): { type: string; data: unknown }[] {
+  return normalizeLineEndings(text)
+    .split("\n\n")
+    .filter((block) => block.includes("event: ") && block.includes("data: "))
+    .map((block) => {
+      const eventMatch = block.match(/^event: (.+)$/m);
+      const dataMatch = block.match(/^data: (.+)$/m);
+      return {
+        type: eventMatch![1],
+        data: JSON.parse(dataMatch![1]),
+      };
+    });
+}
+
+function toSSEEventShapes(events: { type: string; data: unknown }[]): SSEEventShape[] {
+  return events.map((e) => ({
+    type: e.type,
+    dataShape: extractShape(e.data),
+  }));
+}
+
+// ---------------------------------------------------------------------------
+// OpenAI
+// ---------------------------------------------------------------------------
+
+export async function openaiChatNonStreaming(
+  config: ProviderConfig,
+  messages: { role: string; content: string }[],
+  tools?: object[],
+): Promise<FetchResult> {
+  const body: Record<string, unknown> = {
+    model: "gpt-4o-mini",
+    messages,
+    stream: false,
+    max_tokens: 10,
+  };
+  if (tools) body.tools = tools;
+
+  const res = await fetchWithRetry("https://api.openai.com/v1/chat/completions", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      Authorization: `Bearer ${config.apiKey}`,
+    },
+    body: JSON.stringify(body),
+  });
+
+  const raw = await res.text();
+  return { status: res.status, body: parseJsonResponse(raw, res.status, "OpenAI Chat"), raw };
+}
+
+export async function openaiChatStreaming(
+  config: ProviderConfig,
+  messages: { role: string; content: string }[],
+  tools?: object[],
+): Promise<StreamResult> {
+  const body: Record<string, unknown> = {
+    model: "gpt-4o-mini",
+    messages,
+    stream: true,
+    max_tokens: 10,
+  };
+  if (tools) body.tools = tools;
+
+  const res = await fetchWithRetry("https://api.openai.com/v1/chat/completions", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      Authorization: `Bearer ${config.apiKey}`,
+    },
+    body: JSON.stringify(body),
+  });
+
+  const raw = await res.text();
+  assertOk(raw, res.status, "OpenAI Chat streaming");
+  const parsed = parseDataOnlySSE(raw);
+  const rawEvents = parsed.map((p) => ({
+    type: "chat.completion.chunk",
+    data: p.data,
+  }));
+  return {
+    status: res.status,
+    events: toSSEEventShapes(rawEvents),
+    rawEvents,
+  };
+}
+
+export async function openaiResponsesNonStreaming(
+  config: ProviderConfig,
+  input: object[],
+  tools?: object[],
+): Promise<FetchResult> {
+  const body: Record<string, unknown> = {
+    model: "gpt-4o-mini",
+    input,
+    stream: false,
+    max_output_tokens: 50,
+  };
+  if (tools) body.tools = tools;
+
+  const res = await fetchWithRetry("https://api.openai.com/v1/responses", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      Authorization: `Bearer ${config.apiKey}`,
+    },
+    body: JSON.stringify(body),
+  });
+
+  const raw = await res.text();
+  return {
+    status: res.status,
+    body: parseJsonResponse(raw, res.status, "OpenAI Responses"),
+    raw,
+  };
+}
+
+export async function openaiResponsesStreaming(
+  config: ProviderConfig,
+  input: object[],
+  tools?: object[],
+): Promise<StreamResult> {
+  const body: Record<string, unknown> = {
+    model: "gpt-4o-mini",
+    input,
+    stream: true,
+    max_output_tokens: 50,
+  };
+  if (tools) body.tools = tools;
+
+  const res = await fetchWithRetry("https://api.openai.com/v1/responses", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      Authorization: `Bearer ${config.apiKey}`,
+    },
+    body: JSON.stringify(body),
+  });
+
+  const raw = await res.text();
+  assertOk(raw, res.status, "OpenAI Responses streaming");
+  const rawEvents = parseTypedSSE(raw);
+  return {
+    status: res.status,
+    events: toSSEEventShapes(rawEvents),
+    rawEvents,
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Anthropic Claude
+// ---------------------------------------------------------------------------
+
+export async function anthropicNonStreaming(
+  config: ProviderConfig,
+  messages: { role: string; content: string }[],
+  tools?: object[],
+): Promise<FetchResult> {
+  const body: Record<string, unknown> = {
+    model: "claude-haiku-4-5-20251001",
+    messages,
+    max_tokens: 10,
+    stream: false,
+  };
+  if (tools) body.tools = tools;
+
+  const res = await fetchWithRetry("https://api.anthropic.com/v1/messages", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      "x-api-key": config.apiKey,
+      "anthropic-version": "2023-06-01",
+    },
+    body: JSON.stringify(body),
+  });
+
+  const raw = await res.text();
+  return { status: res.status, body: parseJsonResponse(raw, res.status, "Anthropic"), raw };
+}
+
+export async function anthropicStreaming(
+  config: ProviderConfig,
+  messages: { role: string; content: string }[],
+  tools?: object[],
+): Promise<StreamResult> {
+  const body: Record<string, unknown> = {
+    model: "claude-haiku-4-5-20251001",
+    messages,
+    max_tokens: 10,
+    stream: true,
+  };
+  if (tools) body.tools = tools;
+
+  const res = await fetchWithRetry("https://api.anthropic.com/v1/messages", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      "x-api-key": config.apiKey,
+      "anthropic-version": "2023-06-01",
+    },
+    body: JSON.stringify(body),
+  });
+
+  const raw = await res.text();
+  assertOk(raw, res.status, "Anthropic streaming");
+  const rawEvents = parseTypedSSE(raw);
+  return {
+    status: res.status,
+    events: toSSEEventShapes(rawEvents),
+    rawEvents,
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Google Gemini
+// ---------------------------------------------------------------------------
+
+export async function geminiNonStreaming(
+  config: ProviderConfig,
+  contents: object[],
+  tools?: object[],
+): Promise<FetchResult> {
+  // Gemini 2.5+ uses thinking tokens from the output budget, so we need
+  // more headroom than other providers to get actual content back
+  const body: Record<string, unknown> = {
+    contents,
+    generationConfig: { maxOutputTokens: 100 },
+  };
+  if (tools) body.tools = tools;
+
+  // Gemini requires API key as query parameter per Google's REST API design
+  const url = `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key=${config.apiKey}`;
+  const res = await fetchWithRetry(url, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify(body),
+  });
+
+  const raw = await res.text();
+  return { status: res.status, body: parseJsonResponse(raw, res.status, "Gemini"), raw };
+}
+
+export async function geminiStreaming(
+  config: ProviderConfig,
+  contents: object[],
+  tools?: object[],
+): Promise<StreamResult> {
+  const body: Record<string, unknown> = {
+    contents,
+    generationConfig: { maxOutputTokens: 100 },
+  };
+  if (tools) body.tools = tools;
+
+  const url = `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:streamGenerateContent?alt=sse&key=${config.apiKey}`;
+  const res = await fetchWithRetry(url, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify(body),
+  });
+
+  const raw = await res.text();
+  assertOk(raw, res.status, "Gemini streaming");
+  const parsed = parseDataOnlySSE(raw);
+  const rawEvents = parsed.map((p) => ({
+    type: "gemini.chunk",
+    data: p.data,
+  }));
+  return {
+    status: res.status,
+    events: toSSEEventShapes(rawEvents),
+    rawEvents,
+  };
+}
+
+// ---------------------------------------------------------------------------
+// OpenAI Embeddings
+// ---------------------------------------------------------------------------
+
+export async function openaiEmbeddings(
+  config: ProviderConfig,
+  input: string | string[],
+): Promise<FetchResult> {
+  const body = {
+    model: "text-embedding-3-small",
+    input,
+  };
+
+  const res = await fetchWithRetry("https://api.openai.com/v1/embeddings", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      Authorization: `Bearer ${config.apiKey}`,
+    },
+    body: JSON.stringify(body),
+  });
+
+  const raw = await res.text();
+  return {
+    status: res.status,
+    body: parseJsonResponse(raw, res.status, "OpenAI Embeddings"),
+    raw,
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Model listing
+// ---------------------------------------------------------------------------
+
+export async function listOpenAIModels(apiKey: string): Promise<string[]> {
+  const res = await fetchWithRetry("https://api.openai.com/v1/models", {
+    method: "GET",
+    headers: { Authorization: `Bearer ${apiKey}` },
+  });
+
+  const raw = await res.text();
+  const json = parseJsonResponse(raw, res.status, "OpenAI model list") as {
+    data: { id: string }[];
+  };
+  return json.data.map((m) => m.id);
+}
+
+export async function listAnthropicModels(apiKey: string): Promise<string[]> {
+  const res = await fetchWithRetry("https://api.anthropic.com/v1/models", {
+    method: "GET",
+    headers: {
+      "x-api-key": apiKey,
+      "anthropic-version": "2023-06-01",
+    },
+  });
+
+  const raw = await res.text();
+  const json = parseJsonResponse(raw, res.status, "Anthropic model list") as {
+    data: { id: string }[];
+  };
+  return json.data.map((m) => m.id);
+}
+
+export async function listGeminiModels(apiKey: string): Promise<string[]> {
+  const res = await fetchWithRetry(
+    `https://generativelanguage.googleapis.com/v1beta/models?key=${apiKey}`,
+    { method: "GET" },
+  );
+
+  const raw = await res.text();
+  const json = parseJsonResponse(raw, res.status, "Gemini model list") as {
+    models: { name: string }[];
+  };
+  // Gemini returns "models/gemini-2.5-flash" — strip prefix
+  return json.models.map((m) => m.name.replace(/^models\//, ""));
+}
diff --git a/src/__tests__/drift/schema.ts b/src/__tests__/drift/schema.ts
new file mode 100644
index 0000000..5ee4476
--- /dev/null
+++ b/src/__tests__/drift/schema.ts
@@ -0,0 +1,476 @@
+/**
+ * Shape extraction, three-way comparison, severity classification, and reporting
+ * for drift detection between SDK types, real API responses, and llmock output.
+ */
+
+// ---------------------------------------------------------------------------
+// Shape types
+// ---------------------------------------------------------------------------
+
+export type ShapeNode =
+  | { kind: "null" }
+  | { kind: "string" }
+  | { kind: "number" }
+  | { kind: "boolean" }
+  | { kind: "array"; element: ShapeNode | null }
+  | { kind: "object"; fields: Record<string, ShapeNode> };
+
+export type DriftSeverity = "critical" | "warning" | "info";
+
+export interface ShapeDiff {
+  path: string;
+  severity: DriftSeverity;
+  issue: string;
+  expected: string; // from SDK types
+  real: string; // from real API
+  mock: string; // from llmock
+}
+
+export interface SSEEventShape {
+  type: string;
+  dataShape: ShapeNode;
+}
+
+// ---------------------------------------------------------------------------
+// Shape extraction
+// ---------------------------------------------------------------------------
+
+export function extractShape(value: unknown): ShapeNode {
+  if (value === null || value === undefined) {
+    return { kind: "null" };
+  }
+  if (typeof value === "string") return { kind: "string" };
+  if (typeof value === "number") return { kind: "number" };
+  if (typeof value === "boolean") return { kind: "boolean" };
+  if (Array.isArray(value)) {
+    if (value.length === 0) return { kind: "array", element: null };
+    // Merge shapes of all elements into a unified shape
+    return { kind: "array", element: mergeShapes(value.map(extractShape)) };
+  }
+  if (typeof value === "object") {
+    const fields: Record<string, ShapeNode> = {};
+    for (const [k, v] of Object.entries(value as Record<string, unknown>)) {
+      fields[k] = extractShape(v);
+    }
+    return { kind: "object", fields };
+  }
+  return { kind: "null" };
+}
+
+function mergeShapes(shapes: ShapeNode[]): ShapeNode {
+  if (shapes.length === 0) return { kind: "null" };
+  if (shapes.length === 1) return shapes[0];
+
+  // If all same kind, merge recursively
+  const kinds = new Set(shapes.map((s) => s.kind));
+  if (kinds.size === 1) {
+    const kind = shapes[0].kind;
+    if (kind === "object") {
+      const allFields = new Set<string>();
+      for (const s of shapes) {
+        if (s.kind === "object") {
+          for (const k of Object.keys(s.fields)) allFields.add(k);
+        }
+      }
+      const merged: Record<string, ShapeNode> = {};
+      for (const field of allFields) {
+        const fieldShapes = shapes
+          .filter((s) => s.kind === "object" && field in s.fields)
+          .map((s) => (s as { kind: "object"; fields: Record<string, ShapeNode> }).fields[field]);
+        merged[field] = fieldShapes.length > 0 ? mergeShapes(fieldShapes) : { kind: "null" };
+      }
+      return { kind: "object", fields: merged };
+    }
+    if (kind === "array") {
+      const elements = shapes
+        .filter((s) => s.kind === "array" && s.element !== null)
+        .map((s) => (s as { kind: "array"; element: ShapeNode | null }).element!);
+      return { kind: "array", element: elements.length > 0 ? mergeShapes(elements) : null };
+    }
+    return shapes[0];
+  }
+
+  // Mixed kinds — return the first non-null shape
+  return shapes.find((s) => s.kind !== "null") ?? { kind: "null" };
+}
+
+// ---------------------------------------------------------------------------
+// Shape description (for reports)
+// ---------------------------------------------------------------------------
+
+export function describeShape(shape: ShapeNode | null): string {
+  if (shape === null) return "<absent>";
+  switch (shape.kind) {
+    case "null":
+      return "null";
+    case "string":
+      return "string";
+    case "number":
+      return "number";
+    case "boolean":
+      return "boolean";
+    case "array":
+      return `array<${describeShape(shape.element)}>`;
+    case "object": {
+      const entries = Object.entries(shape.fields);
+      if (entries.length === 0) return "object {}";
+      if (entries.length <= 3) {
+        const inner = entries.map(([k, v]) => `${k}: ${describeShape(v)}`).join(", ");
+        return `object { ${inner} }`;
+      }
+      const first3 = entries
+        .slice(0, 3)
+        .map(([k, v]) => `${k}: ${describeShape(v)}`)
+        .join(", ");
+      return `object { ${first3}, ... +${entries.length - 3} }`;
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Two-way comparison
+// ---------------------------------------------------------------------------
+
+export function compareShapes(a: ShapeNode, b: ShapeNode, path = ""): ShapeDiff[] {
+  const diffs: ShapeDiff[] = [];
+
+  if (a.kind !== b.kind) {
+    diffs.push({
+      path: path || "(root)",
+      severity: "critical",
+      issue: `Type mismatch: ${a.kind} vs ${b.kind}`,
+      expected: describeShape(a),
+      real: describeShape(b),
+      mock: "",
+    });
+    return diffs;
+  }
+
+  if (a.kind === "object" && b.kind === "object") {
+    const allKeys = new Set([...Object.keys(a.fields), ...Object.keys(b.fields)]);
+    for (const key of allKeys) {
+      const childPath = path ? `${path}.${key}` : key;
+      const inA = key in a.fields;
+      const inB = key in b.fields;
+
+      if (inA && !inB) {
+        diffs.push({
+          path: childPath,
+          severity: "warning",
+          issue: "Field in first but not second",
+          expected: describeShape(a.fields[key]),
+          real: "<absent>",
+          mock: "",
+        });
+      } else if (!inA && inB) {
+        diffs.push({
+          path: childPath,
+          severity: "warning",
+          issue: "Field in second but not first",
+          expected: "<absent>",
+          real: describeShape(b.fields[key]),
+          mock: "",
+        });
+      } else {
+        diffs.push(...compareShapes(a.fields[key], b.fields[key], childPath));
+      }
+    }
+  }
+
+  if (a.kind === "array" && b.kind === "array") {
+    if (a.element && b.element) {
+      diffs.push(...compareShapes(a.element, b.element, `${path || "(root)"}[]`));
+    }
+  }
+
+  return diffs;
+}
+
+// ---------------------------------------------------------------------------
+// Three-way triangulation
+// ---------------------------------------------------------------------------
+
+/** Known intentional differences that should never trigger failures */
+const ALLOWLISTED_PATHS = new Set([
+  "usage",
+  "usage.prompt_tokens",
+  "usage.completion_tokens",
+  "usage.total_tokens",
+  "usage.input_tokens",
+  "usage.output_tokens",
+  "usage.completion_tokens_details",
+  "usage.prompt_tokens_details",
+  "usage.cache_creation_input_tokens",
+  "usage.cache_read_input_tokens",
+  "usageMetadata",
+  "usageMetadata.promptTokenCount",
+  "usageMetadata.candidatesTokenCount",
+  "usageMetadata.totalTokenCount",
+  "usageMetadata.cachedContentTokenCount",
+  "system_fingerprint",
+  "logprobs",
+  "choices[].logprobs",
+  "service_tier",
+  "x_groq",
+  // Gemini streaming metadata fields vary
+  "modelVersion",
+  "avgLogprobs",
+]);
+
+function isAllowlisted(path: string): boolean {
+  if (ALLOWLISTED_PATHS.has(path)) return true;
+  // Normalize array indices: choices[0].x → choices[].x
+  const normalized = path.replace(/\[\d+\]/g, "[]");
+  return ALLOWLISTED_PATHS.has(normalized);
+}
+
+export function triangulate(
+  sdk: ShapeNode | null,
+  real: ShapeNode | null,
+  mock: ShapeNode | null,
+): ShapeDiff[] {
+  return triangulateAt("", sdk, real, mock);
+}
+
+function triangulateAt(
+  path: string,
+  sdk: ShapeNode | null,
+  real: ShapeNode | null,
+  mock: ShapeNode | null,
+): ShapeDiff[] {
+  const diffs: ShapeDiff[] = [];
+  const displayPath = path || "(root)";
+
+  const sdkKind = sdk?.kind ?? null;
+  const realKind = real?.kind ?? null;
+  const mockKind = mock?.kind ?? null;
+
+  // All absent — nothing to compare
+  if (!sdk && !real && !mock) return diffs;
+
+  // Field in SDK + real but not mock → llmock drift (critical)
+  if (sdk && real && !mock) {
+    diffs.push({
+      path: displayPath,
+      severity: isAllowlisted(path) ? "info" : "critical",
+      issue: "LLMOCK DRIFT — field in SDK + real API but missing from mock",
+      expected: describeShape(sdk),
+      real: describeShape(real),
+      mock: "<absent>",
+    });
+    return diffs;
+  }
+
+  // Field in real but not SDK or mock → provider added something new
+  if (!sdk && real && !mock) {
+    diffs.push({
+      path: displayPath,
+      severity: isAllowlisted(path) ? "info" : "warning",
+      issue: "PROVIDER ADDED FIELD — in real API but not in SDK or mock",
+      expected: "<absent>",
+      real: describeShape(real),
+      mock: "<absent>",
+    });
+    return diffs;
+  }
+
+  // Field in SDK but not real → possibly deprecated/optional
+  if (sdk && !real) {
+    diffs.push({
+      path: displayPath,
+      severity: "info",
+      issue: "SDK EXTRA — field in SDK but not in real API response (optional or deprecated)",
+      expected: describeShape(sdk),
+      real: "<absent>",
+      mock: describeShape(mock),
+    });
+    return diffs;
+  }
+
+  // Field in mock but not real → mock has extra field
+  if (!sdk && !real && mock) {
+    diffs.push({
+      path: displayPath,
+      severity: "info",
+      issue: "MOCK EXTRA FIELD — in mock but not in real API",
+      expected: "<absent>",
+      real: "<absent>",
+      mock: describeShape(mock),
+    });
+    return diffs;
+  }
+
+  // All three present — check type mismatches
+  if (real && mock && realKind !== mockKind) {
+    // Allow null vs other type (optional fields)
+    if (realKind !== "null" && mockKind !== "null") {
+      diffs.push({
+        path: displayPath,
+        severity: isAllowlisted(path) ? "info" : "critical",
+        issue: `TYPE MISMATCH between real API and mock: ${realKind} vs ${mockKind}`,
+        expected: describeShape(sdk),
+        real: describeShape(real),
+        mock: describeShape(mock),
+      });
+      return diffs;
+    }
+  }
+
+  if (sdk && real && sdkKind !== realKind) {
+    if (sdkKind !== "null" && realKind !== "null") {
+      diffs.push({
+        path: displayPath,
+        severity: isAllowlisted(path) ? "info" : "warning",
+        issue: `SDK STALE — type mismatch between SDK and real API: ${sdkKind} vs ${realKind}`,
+        expected: describeShape(sdk),
+        real: describeShape(real),
+        mock: describeShape(mock),
+      });
+    }
+  }
+
+  // Recurse into object fields
+  if (realKind === "object" || sdkKind === "object" || mockKind === "object") {
+    const sdkFields = sdk?.kind === "object" ? sdk.fields : {};
+    const realFields = real?.kind === "object" ? real.fields : {};
+    const mockFields = mock?.kind === "object" ? mock.fields : {};
+
+    const allKeys = new Set([
+      ...Object.keys(sdkFields),
+      ...Object.keys(realFields),
+      ...Object.keys(mockFields),
+    ]);
+
+    for (const key of allKeys) {
+      const childPath = path ? `${path}.${key}` : key;
+      diffs.push(
+        ...triangulateAt(
+          childPath,
+          sdkFields[key] ?? null,
+          realFields[key] ?? null,
+          mockFields[key] ?? null,
+        ),
+      );
+    }
+  }
+
+  // Recurse into array elements
+  if (realKind === "array" || sdkKind === "array" || mockKind === "array") {
+    const sdkElem = sdk?.kind === "array" ? sdk.element : null;
+    const realElem = real?.kind === "array" ? real.element : null;
+    const mockElem = mock?.kind === "array" ? mock.element : null;
+
+    if (sdkElem || realElem || mockElem) {
+      diffs.push(...triangulateAt(`${path || "(root)"}[]`, sdkElem, realElem, mockElem));
+    }
+  }
+
+  return diffs;
+}
+
+// ---------------------------------------------------------------------------
+// SSE event sequence comparison
+// ---------------------------------------------------------------------------
+
+export function compareSSESequences(
+  sdk: SSEEventShape[],
+  real: SSEEventShape[],
+  mock: SSEEventShape[],
+): ShapeDiff[] {
+  const diffs: ShapeDiff[] = [];
+
+  // Compare event type sequences
+  const realTypes = real.map((e) => e.type);
+  const mockTypes = mock.map((e) => e.type);
+
+  // Check for event types in real but not mock
+  const realTypeSet = new Set(realTypes);
+  const mockTypeSet = new Set(mockTypes);
+
+  // Transport-level SSE events that are not part of the response shape
+  const SSE_TRANSPORT_EVENTS = new Set(["ping"]);
+
+  for (const type of realTypeSet) {
+    if (!mockTypeSet.has(type)) {
+      diffs.push({
+        path: `SSE:${type}`,
+        severity: SSE_TRANSPORT_EVENTS.has(type) ? "info" : "critical",
+        issue: SSE_TRANSPORT_EVENTS.has(type)
+          ? `TRANSPORT EVENT — real API emits "${type}" (keepalive), mock does not`
+          : `LLMOCK DRIFT — real API emits event type "${type}" but mock does not`,
+        expected: type,
+        real: type,
+        mock: "<absent>",
+      });
+    }
+  }
+
+  for (const type of mockTypeSet) {
+    if (!realTypeSet.has(type)) {
+      diffs.push({
+        path: `SSE:${type}`,
+        severity: "info",
+        issue: `MOCK EXTRA EVENT — mock emits event type "${type}" but real API does not`,
+        expected: "<absent>",
+        real: "<absent>",
+        mock: type,
+      });
+    }
+  }
+
+  // Compare shapes of matching event types
+  for (const type of realTypeSet) {
+    if (!mockTypeSet.has(type)) continue;
+    const realEvent = real.find((e) => e.type === type);
+    const mockEvent = mock.find((e) => e.type === type);
+    const sdkEvent = sdk.find((e) => e.type === type);
+
+    if (realEvent && mockEvent) {
+      const eventDiffs = triangulate(
+        sdkEvent?.dataShape ?? null,
+        realEvent.dataShape,
+        mockEvent.dataShape,
+      );
+      for (const d of eventDiffs) {
+        diffs.push({
+          ...d,
+          path: `SSE:${type}.${d.path}`,
+        });
+      }
+    }
+  }
+
+  return diffs;
+}
+
+// ---------------------------------------------------------------------------
+// Report formatting
+// ---------------------------------------------------------------------------
+
+export function formatDriftReport(context: string, diffs: ShapeDiff[]): string {
+  if (diffs.length === 0) return `No drift detected: ${context}`;
+
+  const lines: string[] = [];
+  lines.push(`\nAPI DRIFT DETECTED: ${context}\n`);
+
+  for (let i = 0; i < diffs.length; i++) {
+    const d = diffs[i];
+    lines.push(`  ${i + 1}. [${d.severity}] ${d.issue}`);
+    lines.push(`     Path:    ${d.path}`);
+    lines.push(`     SDK:     ${d.expected}`);
+    lines.push(`     Real:    ${d.real}`);
+    lines.push(`     Mock:    ${d.mock}`);
+    lines.push("");
+  }
+
+  return lines.join("\n");
+}
+
+// ---------------------------------------------------------------------------
+// Strict mode check
+// ---------------------------------------------------------------------------
+
+export function shouldFail(diffs: ShapeDiff[]): boolean {
+  const strict = process.env.STRICT_DRIFT === "1";
+  return diffs.some((d) => d.severity === "critical" || (strict && d.severity === "warning"));
+}
diff --git a/src/__tests__/drift/sdk-shapes.ts b/src/__tests__/drift/sdk-shapes.ts
new file mode 100644
index 0000000..6ff70a3
--- /dev/null
+++ b/src/__tests__/drift/sdk-shapes.ts
@@ -0,0 +1,812 @@
+/**
+ * Extract expected shapes from SDK types by constructing minimal conformant
+ * objects and running extractShape() on them.
+ *
+ * This gives us the "expected" shape layer without needing the TypeScript
+ * compiler API. Each function creates a minimal valid instance with all
+ * required fields populated with representative values.
+ */
+
+import { extractShape, type ShapeNode, type SSEEventShape } from "./schema.js";
+
+// ---------------------------------------------------------------------------
+// OpenAI Chat Completions
+// ---------------------------------------------------------------------------
+
+export function openaiChatCompletionShape(): ShapeNode {
+  return extractShape({
+    id: "chatcmpl-abc123",
+    object: "chat.completion",
+    created: 1700000000,
+    model: "gpt-4o-mini",
+    choices: [
+      {
+        index: 0,
+        message: {
+          role: "assistant",
+          content: "Hello!",
+          refusal: null,
+        },
+        logprobs: null,
+        finish_reason: "stop",
+      },
+    ],
+    usage: {
+      prompt_tokens: 10,
+      completion_tokens: 5,
+      total_tokens: 15,
+      completion_tokens_details: {
+        reasoning_tokens: 0,
+        accepted_prediction_tokens: 0,
+        rejected_prediction_tokens: 0,
+      },
+      prompt_tokens_details: {
+        cached_tokens: 0,
+      },
+    },
+    system_fingerprint: "fp_abc123",
+    service_tier: "default",
+  });
+}
+
+export function openaiChatCompletionToolCallShape(): ShapeNode {
+  return extractShape({
+    id: "chatcmpl-abc123",
+    object: "chat.completion",
+    created: 1700000000,
+    model: "gpt-4o-mini",
+    choices: [
+      {
+        index: 0,
+        message: {
+          role: "assistant",
+          content: null,
+          tool_calls: [
+            {
+              id: "call_abc123",
+              type: "function",
+              function: {
+                name: "get_weather",
+                arguments: '{"city":"SF"}',
+              },
+            },
+          ],
+          refusal: null,
+        },
+        logprobs: null,
+        finish_reason: "tool_calls",
+      },
+    ],
+    usage: {
+      prompt_tokens: 10,
+      completion_tokens: 5,
+      total_tokens: 15,
+    },
+    system_fingerprint: "fp_abc123",
+  });
+}
+
+export function openaiChatCompletionChunkShape(): ShapeNode {
+  return extractShape({
+    id: "chatcmpl-abc123",
+    object: "chat.completion.chunk",
+    created: 1700000000,
+    model: "gpt-4o-mini",
+    choices: [
+      {
+        index: 0,
+        delta: {
+          role: "assistant",
+          content: "",
+        },
+        logprobs: null,
+        finish_reason: null,
+      },
+    ],
+    system_fingerprint: "fp_abc123",
+  });
+}
+
+// ---------------------------------------------------------------------------
+// OpenAI Embeddings
+// ---------------------------------------------------------------------------
+
+export function openaiEmbeddingResponseShape(): ShapeNode {
+  return extractShape({
+    object: "list",
+    data: [
+      {
+        object: "embedding",
+        index: 0,
+        embedding: [0.1, -0.2, 0.3],
+      },
+    ],
+    model: "text-embedding-3-small",
+    usage: {
+      prompt_tokens: 2,
+      total_tokens: 2,
+    },
+  });
+}
+
+// ---------------------------------------------------------------------------
+// OpenAI Responses API
+// ---------------------------------------------------------------------------
+
+export function openaiResponsesTextEventShapes(): SSEEventShape[] {
+  return [
+    {
+      type: "response.created",
+      dataShape: extractShape({
+        type: "response.created",
+        response: {
+          id: "resp_abc123",
+          object: "response",
+          created_at: 1700000000,
+          model: "gpt-4o-mini",
+          status: "in_progress",
+          output: [],
+        },
+      }),
+    },
+    {
+      type: "response.in_progress",
+      dataShape: extractShape({
+        type: "response.in_progress",
+        response: {
+          id: "resp_abc123",
+          object: "response",
+          created_at: 1700000000,
+          model: "gpt-4o-mini",
+          status: "in_progress",
+          output: [],
+        },
+      }),
+    },
+    {
+      type: "response.output_item.added",
+      dataShape: extractShape({
+        type: "response.output_item.added",
+        output_index: 0,
+        item: {
+          type: "message",
+          id: "msg_abc123",
+          status: "in_progress",
+          role: "assistant",
+          content: [],
+        },
+      }),
+    },
+    {
+      type: "response.content_part.added",
+      dataShape: extractShape({
+        type: "response.content_part.added",
+        output_index: 0,
+        content_index: 0,
+        part: { type: "output_text", text: "" },
+      }),
+    },
+    {
+      type: "response.output_text.delta",
+      dataShape: extractShape({
+        type: "response.output_text.delta",
+        item_id: "msg_abc123",
+        output_index: 0,
+        content_index: 0,
+        delta: "Hello",
+      }),
+    },
+    {
+      type: "response.output_text.done",
+      dataShape: extractShape({
+        type: "response.output_text.done",
+        output_index: 0,
+        content_index: 0,
+        text: "Hello!",
+      }),
+    },
+    {
+      type: "response.content_part.done",
+      dataShape: extractShape({
+        type: "response.content_part.done",
+        output_index: 0,
+        content_index: 0,
+        part: { type: "output_text", text: "Hello!" },
+      }),
+    },
+    {
+      type: "response.output_item.done",
+      dataShape: extractShape({
+        type: "response.output_item.done",
+        output_index: 0,
+        item: {
+          type: "message",
+          id: "msg_abc123",
+          status: "completed",
+          role: "assistant",
+          content: [{ type: "output_text", text: "Hello!" }],
+        },
+      }),
+    },
+    {
+      type: "response.completed",
+      dataShape: extractShape({
+        type: "response.completed",
+        response: {
+          id: "resp_abc123",
+          object: "response",
+          created_at: 1700000000,
+          model: "gpt-4o-mini",
+          status: "completed",
+          output: [
+            {
+              type: "message",
+              id: "msg_abc123",
+              status: "completed",
+              role: "assistant",
+              content: [{ type: "output_text", text: "Hello!" }],
+            },
+          ],
+          usage: {
+            input_tokens: 10,
+            output_tokens: 5,
+            total_tokens: 15,
+          },
+        },
+      }),
+    },
+  ];
+}
+
+export function openaiResponsesToolCallEventShapes(): SSEEventShape[] {
+  return [
+    {
+      type: "response.output_item.added",
+      dataShape: extractShape({
+        type: "response.output_item.added",
+        output_index: 0,
+        item: {
+          type: "function_call",
+          id: "fc_abc123",
+          call_id: "call_abc123",
+          name: "get_weather",
+          arguments: "",
+          status: "in_progress",
+        },
+      }),
+    },
+    {
+      type: "response.function_call_arguments.delta",
+      dataShape: extractShape({
+        type: "response.function_call_arguments.delta",
+        item_id: "fc_abc123",
+        output_index: 0,
+        delta: '{"city":',
+      }),
+    },
+    {
+      type: "response.function_call_arguments.done",
+      dataShape: extractShape({
+        type: "response.function_call_arguments.done",
+        output_index: 0,
+        arguments: '{"city":"SF"}',
+      }),
+    },
+  ];
+}
+
+export function openaiResponsesNonStreamingShape(): ShapeNode {
+  return extractShape({
+    id: "resp_abc123",
+    object: "response",
+    created_at: 1700000000,
+    model: "gpt-4o-mini",
+    status: "completed",
+    output: [
+      {
+        type: "message",
+        id: "msg_abc123",
+        status: "completed",
+        role: "assistant",
+        content: [{ type: "output_text", text: "Hello!" }],
+      },
+    ],
+    usage: {
+      input_tokens: 10,
+      output_tokens: 5,
+      total_tokens: 15,
+    },
+  });
+}
+
+// ---------------------------------------------------------------------------
+// Anthropic Claude Messages
+// ---------------------------------------------------------------------------
+
+export function anthropicMessageShape(): ShapeNode {
+  return extractShape({
+    id: "msg_abc123",
+    type: "message",
+    role: "assistant",
+    content: [{ type: "text", text: "Hello!" }],
+    model: "claude-3-haiku-20240307",
+    stop_reason: "end_turn",
+    stop_sequence: null,
+    usage: {
+      input_tokens: 10,
+      output_tokens: 5,
+    },
+  });
+}
+
+export function anthropicMessageToolCallShape(): ShapeNode {
+  return extractShape({
+    id: "msg_abc123",
+    type: "message",
+    role: "assistant",
+    content: [
+      {
+        type: "tool_use",
+        id: "toolu_abc123",
+        name: "get_weather",
+        input: { city: "SF" },
+      },
+    ],
+    model: "claude-3-haiku-20240307",
+    stop_reason: "tool_use",
+    stop_sequence: null,
+    usage: {
+      input_tokens: 10,
+      output_tokens: 5,
+    },
+  });
+}
+
+export function anthropicStreamEventShapes(): SSEEventShape[] {
+  return [
+    {
+      type: "message_start",
+      dataShape: extractShape({
+        type: "message_start",
+        message: {
+          id: "msg_abc123",
+          type: "message",
+          role: "assistant",
+          content: [],
+          model: "claude-3-haiku-20240307",
+          stop_reason: null,
+          stop_sequence: null,
+          usage: { input_tokens: 10, output_tokens: 0 },
+        },
+      }),
+    },
+    {
+      type: "content_block_start",
+      dataShape: extractShape({
+        type: "content_block_start",
+        index: 0,
+        content_block: { type: "text", text: "" },
+      }),
+    },
+    {
+      type: "content_block_delta",
+      dataShape: extractShape({
+        type: "content_block_delta",
+        index: 0,
+        delta: { type: "text_delta", text: "Hello" },
+      }),
+    },
+    {
+      type: "content_block_stop",
+      dataShape: extractShape({
+        type: "content_block_stop",
+        index: 0,
+      }),
+    },
+    {
+      type: "message_delta",
+      dataShape: extractShape({
+        type: "message_delta",
+        delta: { stop_reason: "end_turn", stop_sequence: null },
+        usage: { output_tokens: 5 },
+      }),
+    },
+    {
+      type: "message_stop",
+      dataShape: extractShape({
+        type: "message_stop",
+      }),
+    },
+  ];
+}
+
+export function anthropicToolStreamEventShapes(): SSEEventShape[] {
+  return [
+    {
+      type: "content_block_start",
+      dataShape: extractShape({
+        type: "content_block_start",
+        index: 0,
+        content_block: {
+          type: "tool_use",
+          id: "toolu_abc123",
+          name: "get_weather",
+          input: {},
+        },
+      }),
+    },
+    {
+      type: "content_block_delta",
+      dataShape: extractShape({
+        type: "content_block_delta",
+        index: 0,
+        delta: { type: "input_json_delta", partial_json: '{"city":' },
+      }),
+    },
+  ];
+}
+
+// ---------------------------------------------------------------------------
+// OpenAI Realtime API
+// ---------------------------------------------------------------------------
+
+export function openaiRealtimeTextEventShapes(): SSEEventShape[] {
+  return [
+    {
+      type: "session.created",
+      dataShape: extractShape({
+        type: "session.created",
+        event_id: "evt_abc123",
+        session: {
+          id: "sess_abc123",
+          model: "gpt-4o-mini",
+          modalities: ["text"],
+          instructions: "",
+          tools: [],
+          voice: null,
+          input_audio_format: null,
+          output_audio_format: null,
+          turn_detection: null,
+          temperature: 0.8,
+        },
+      }),
+    },
+    {
+      type: "session.updated",
+      dataShape: extractShape({
+        type: "session.updated",
+        event_id: "evt_abc123",
+        session: {
+          model: "gpt-4o-mini",
+          modalities: ["text"],
+          instructions: "",
+          tools: [],
+          voice: null,
+          input_audio_format: null,
+          output_audio_format: null,
+          turn_detection: null,
+          temperature: 0.8,
+        },
+      }),
+    },
+    {
+      type: "conversation.item.created",
+      dataShape: extractShape({
+        type: "conversation.item.created",
+        event_id: "evt_abc123",
+        item: {
+          type: "message",
+          id: "item_abc123",
+          role: "user",
+          content: [{ type: "input_text", text: "Say hello" }],
+        },
+      }),
+    },
+    {
+      type: "response.created",
+      dataShape: extractShape({
+        type: "response.created",
+        event_id: "evt_abc123",
+        response: {
+          id: "resp_abc123",
+          status: "in_progress",
+          output: [],
+        },
+      }),
+    },
+    {
+      type: "response.output_item.added",
+      dataShape: extractShape({
+        type: "response.output_item.added",
+        event_id: "evt_abc123",
+        response_id: "resp_abc123",
+        output_index: 0,
+        item: {
+          id: "item_abc123",
+          type: "message",
+          role: "assistant",
+          content: [],
+        },
+      }),
+    },
+    {
+      type: "response.content_part.added",
+      dataShape: extractShape({
+        type: "response.content_part.added",
+        event_id: "evt_abc123",
+        response_id: "resp_abc123",
+        item_id: "item_abc123",
+        output_index: 0,
+        content_index: 0,
+        part: { type: "text", text: "" },
+      }),
+    },
+    {
+      type: "response.text.delta",
+      dataShape: extractShape({
+        type: "response.text.delta",
+        event_id: "evt_abc123",
+        response_id: "resp_abc123",
+        item_id: "item_abc123",
+        output_index: 0,
+        content_index: 0,
+        delta: "Hello",
+      }),
+    },
+    {
+      type: "response.text.done",
+      dataShape: extractShape({
+        type: "response.text.done",
+        event_id: "evt_abc123",
+        response_id: "resp_abc123",
+        item_id: "item_abc123",
+        output_index: 0,
+        content_index: 0,
+        text: "Hello!",
+      }),
+    },
+    {
+      type: "response.content_part.done",
+      dataShape: extractShape({
+        type: "response.content_part.done",
+        event_id: "evt_abc123",
+        response_id: "resp_abc123",
+        item_id: "item_abc123",
+        output_index: 0,
+        content_index: 0,
+        part: { type: "text", text: "Hello!" },
+      }),
+    },
+    {
+      type: "response.output_item.done",
+      dataShape: extractShape({
+        type: "response.output_item.done",
+        event_id: "evt_abc123",
+        response_id: "resp_abc123",
+        output_index: 0,
+        item: {
+          id: "item_abc123",
+          type: "message",
+          role: "assistant",
+          content: [{ type: "text", text: "Hello!" }],
+        },
+      }),
+    },
+    {
+      type: "response.done",
+      dataShape: extractShape({
+        type: "response.done",
+        event_id: "evt_abc123",
+        response: {
+          id: "resp_abc123",
+          status: "completed",
+          output: [
+            {
+              id: "item_abc123",
+              type: "message",
+              role: "assistant",
+              content: [{ type: "text", text: "Hello!" }],
+            },
+          ],
+        },
+      }),
+    },
+  ];
+}
+
+export function openaiRealtimeToolCallEventShapes(): SSEEventShape[] {
+  return [
+    {
+      type: "response.output_item.added",
+      dataShape: extractShape({
+        type: "response.output_item.added",
+        event_id: "evt_abc123",
+        response_id: "resp_abc123",
+        output_index: 0,
+        item: {
+          id: "item_abc123",
+          type: "function_call",
+          call_id: "call_abc123",
+          name: "get_weather",
+          arguments: "",
+        },
+      }),
+    },
+    {
+      type: "response.function_call_arguments.delta",
+      dataShape: extractShape({
+        type: "response.function_call_arguments.delta",
+        event_id: "evt_abc123",
+        response_id: "resp_abc123",
+        item_id: "item_abc123",
+        output_index: 0,
+        call_id: "call_abc123",
+        delta: '{"city":',
+      }),
+    },
+    {
+      type: "response.function_call_arguments.done",
+      dataShape: extractShape({
+        type: "response.function_call_arguments.done",
+        event_id: "evt_abc123",
+        response_id: "resp_abc123",
+        item_id: "item_abc123",
+        output_index: 0,
+        call_id: "call_abc123",
+        arguments: '{"city":"Paris"}',
+      }),
+    },
+    {
+      type: "response.output_item.done",
+      dataShape: extractShape({
+        type: "response.output_item.done",
+        event_id: "evt_abc123",
+        response_id: "resp_abc123",
+        output_index: 0,
+        item: {
+          id: "item_abc123",
+          type: "function_call",
+          call_id: "call_abc123",
+          name: "get_weather",
+          arguments: '{"city":"Paris"}',
+        },
+      }),
+    },
+  ];
+}
+
+// ---------------------------------------------------------------------------
+// Gemini Live BidiGenerateContent
+// ---------------------------------------------------------------------------
+
+export function geminiLiveSetupCompleteShape(): SSEEventShape {
+  return {
+    type: "setupComplete",
+    dataShape: extractShape({ setupComplete: {} }),
+  };
+}
+
+export function geminiLiveTextEventShapes(): SSEEventShape[] {
+  return [
+    {
+      type: "serverContent",
+      dataShape: extractShape({
+        serverContent: {
+          modelTurn: { parts: [{ text: "Hello!" }] },
+          turnComplete: true,
+        },
+      }),
+    },
+  ];
+}
+
+export function geminiLiveToolCallEventShapes(): SSEEventShape[] {
+  return [
+    {
+      type: "toolCall",
+      dataShape: extractShape({
+        toolCall: {
+          functionCalls: [
+            {
+              name: "get_weather",
+              args: { city: "Paris" },
+              id: "call_gemini_get_weather_0",
+            },
+          ],
+        },
+      }),
+    },
+  ];
+}
+
+// ---------------------------------------------------------------------------
+// Google Gemini (HTTP)
+// ---------------------------------------------------------------------------
+
+export function geminiContentResponseShape(): ShapeNode {
+  return extractShape({
+    candidates: [
+      {
+        content: {
+          role: "model",
+          parts: [{ text: "Hello!" }],
+        },
+        finishReason: "STOP",
+        index: 0,
+        safetyRatings: [
+          {
+            category: "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+            probability: "NEGLIGIBLE",
+          },
+        ],
+      },
+    ],
+    usageMetadata: {
+      promptTokenCount: 10,
+      candidatesTokenCount: 5,
+      totalTokenCount: 15,
+    },
+    modelVersion: "gemini-1.5-flash",
+  });
+}
+
+export function geminiToolCallResponseShape(): ShapeNode {
+  return extractShape({
+    candidates: [
+      {
+        content: {
+          role: "model",
+          parts: [
+            {
+              functionCall: {
+                name: "get_weather",
+                args: { city: "SF" },
+              },
+            },
+          ],
+        },
+        finishReason: "STOP",
+        index: 0,
+      },
+    ],
+    usageMetadata: {
+      promptTokenCount: 10,
+      candidatesTokenCount: 5,
+      totalTokenCount: 15,
+    },
+  });
+}
+
+export function geminiStreamChunkShape(): ShapeNode {
+  return extractShape({
+    candidates: [
+      {
+        content: {
+          role: "model",
+          parts: [{ text: "Hello" }],
+        },
+        index: 0,
+      },
+    ],
+  });
+}
+
+export function geminiStreamLastChunkShape(): ShapeNode {
+  return extractShape({
+    candidates: [
+      {
+        content: {
+          role: "model",
+          parts: [{ text: "!" }],
+        },
+        finishReason: "STOP",
+        index: 0,
+      },
+    ],
+    usageMetadata: {
+      promptTokenCount: 10,
+      candidatesTokenCount: 5,
+      totalTokenCount: 15,
+    },
+  });
+}
diff --git a/src/__tests__/drift/vertex-ai.drift.ts b/src/__tests__/drift/vertex-ai.drift.ts
new file mode 100644
index 0000000..358bc21
--- /dev/null
+++ b/src/__tests__/drift/vertex-ai.drift.ts
@@ -0,0 +1,165 @@
+/**
+ * Vertex AI / Gemini drift tests.
+ *
+ * Verifies that llmock's Vertex AI routing produces response shapes
+ * consistent with the Gemini generateContent endpoint.
+ *
+ * Requires: GOOGLE_APPLICATION_CREDENTIALS or (VERTEX_AI_PROJECT + VERTEX_AI_LOCATION)
+ */
+
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+import type { ServerInstance } from "../../server.js";
+import { extractShape, triangulate, formatDriftReport, shouldFail } from "./schema.js";
+import { httpPost, startDriftServer, stopDriftServer } from "./helpers.js";
+
+// ---------------------------------------------------------------------------
+// Credentials check
+// ---------------------------------------------------------------------------
+
+const HAS_CREDENTIALS =
+  !!process.env.GOOGLE_APPLICATION_CREDENTIALS ||
+  (!!process.env.VERTEX_AI_PROJECT && !!process.env.VERTEX_AI_LOCATION);
+
+// ---------------------------------------------------------------------------
+// Server lifecycle
+// ---------------------------------------------------------------------------
+
+let instance: ServerInstance;
+
+beforeAll(async () => {
+  instance = await startDriftServer();
+});
+
+afterAll(async () => {
+  await stopDriftServer(instance);
+});
+
+// ---------------------------------------------------------------------------
+// SDK shape stubs
+// ---------------------------------------------------------------------------
+
+/**
+ * Minimal Gemini generateContent response shape.
+ * Vertex AI uses the same response format as consumer Gemini.
+ */
+function geminiGenerateContentShape() {
+  return extractShape({
+    candidates: [
+      {
+        content: {
+          parts: [{ text: "Hello!" }],
+          role: "model",
+        },
+        finishReason: "STOP",
+        index: 0,
+      },
+    ],
+    usageMetadata: {
+      promptTokenCount: 10,
+      candidatesTokenCount: 5,
+      totalTokenCount: 15,
+    },
+    modelVersion: "gemini-2.5-flash",
+  });
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe.skipIf(!HAS_CREDENTIALS)("Vertex AI drift", () => {
+  it("generateContent mock shape matches Gemini format", async () => {
+    const sdkShape = geminiGenerateContentShape();
+
+    // Vertex AI routing in llmock follows the path pattern:
+    // /v1/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent
+    const mockRes = await httpPost(
+      `${instance.url}/v1/projects/test-project/locations/us-central1/publishers/google/models/gemini-2.5-flash:generateContent`,
+      {
+        contents: [
+          {
+            role: "user",
+            parts: [{ text: "Say hello" }],
+          },
+        ],
+        generationConfig: { maxOutputTokens: 10 },
+      },
+    );
+
+    expect(mockRes.status).toBeLessThan(500);
+
+    if (mockRes.status === 200) {
+      const mockShape = extractShape(JSON.parse(mockRes.body));
+      const diffs = triangulate(sdkShape, sdkShape, mockShape);
+      const report = formatDriftReport("Vertex AI generateContent", diffs);
+
+      if (shouldFail(diffs)) {
+        expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+      }
+    }
+  });
+
+  it("streamGenerateContent mock shape matches Gemini SSE format", async () => {
+    const sdkChunkShape = extractShape({
+      candidates: [
+        {
+          content: {
+            parts: [{ text: "Hello" }],
+            role: "model",
+          },
+          finishReason: "STOP",
+        },
+      ],
+      usageMetadata: {
+        promptTokenCount: 10,
+        candidatesTokenCount: 5,
+        totalTokenCount: 15,
+      },
+    });
+
+    // Vertex AI streaming uses SSE with the same chunk shape as consumer Gemini
+    const mockRes = await httpPost(
+      `${instance.url}/v1/projects/test-project/locations/us-central1/publishers/google/models/gemini-2.5-flash:streamGenerateContent?alt=sse`,
+      {
+        contents: [
+          {
+            role: "user",
+            parts: [{ text: "Say hello" }],
+          },
+        ],
+        generationConfig: { maxOutputTokens: 10 },
+      },
+    );
+
+    expect(mockRes.status).toBeLessThan(500);
+
+    if (mockRes.status === 200) {
+      // Parse SSE chunks and extract shapes
+      const chunks = mockRes.body
+        .split("\n")
+        .filter((line: string) => line.startsWith("data: "))
+        .map((line: string) => JSON.parse(line.slice(6)));
+
+      expect(chunks.length).toBeGreaterThan(0);
+
+      // Each chunk should have the candidates structure
+      for (const chunk of chunks) {
+        const chunkShape = extractShape(chunk);
+        expect(chunkShape.kind).toBe("object");
+        if (chunkShape.kind === "object") {
+          expect(chunkShape.fields).toHaveProperty("candidates");
+        }
+      }
+
+      // Last chunk should match the SDK shape (has finishReason and usageMetadata)
+      const lastChunk = chunks[chunks.length - 1];
+      const lastShape = extractShape(lastChunk);
+      const diffs = triangulate(sdkChunkShape, sdkChunkShape, lastShape);
+      const report = formatDriftReport("Vertex AI streamGenerateContent (last chunk)", diffs);
+
+      if (shouldFail(diffs)) {
+        expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+      }
+    }
+  });
+});
diff --git a/src/__tests__/drift/ws-gemini-live.drift.ts b/src/__tests__/drift/ws-gemini-live.drift.ts
new file mode 100644
index 0000000..1b28d60
--- /dev/null
+++ b/src/__tests__/drift/ws-gemini-live.drift.ts
@@ -0,0 +1,226 @@
+/**
+ * Gemini Live BidiGenerateContent WebSocket drift tests.
+ *
+ * Three-way comparison: SDK types × real API (WS) × llmock output (WS).
+ *
+ * Currently, the Gemini Live API only supports native-audio models
+ * (those with "native-audio" in the name) which cannot return TEXT responses.
+ * The canary test below checks the model listing API for any text-capable
+ * model that supports bidiGenerateContent. When Google adds one, the
+ * canary fails and the full drift tests can be enabled with that model.
+ */
+
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+import type { ServerInstance } from "../../server.js";
+import { extractShape, compareSSESequences, formatDriftReport, shouldFail } from "./schema.js";
+import {
+  geminiLiveSetupCompleteShape,
+  geminiLiveTextEventShapes,
+  geminiLiveToolCallEventShapes,
+} from "./sdk-shapes.js";
+import { geminiLiveWS } from "./ws-providers.js";
+import {
+  startDriftServer,
+  stopDriftServer,
+  collectMockWSMessages,
+  classifyGeminiMessage,
+  GEMINI_WS_PATH,
+} from "./helpers.js";
+import { connectWebSocket } from "../ws-test-client.js";
+
+// ---------------------------------------------------------------------------
+// Server lifecycle
+// ---------------------------------------------------------------------------
+
+let instance: ServerInstance;
+const GOOGLE_API_KEY = process.env.GOOGLE_API_KEY;
+
+beforeAll(async () => {
+  instance = await startDriftServer();
+});
+
+afterAll(async () => {
+  await stopDriftServer(instance);
+});
+
+// ---------------------------------------------------------------------------
+// Canary: detect when a text-capable model supports bidiGenerateContent
+// ---------------------------------------------------------------------------
+
+/**
+ * Query the Gemini model listing API for any model that supports
+ * bidiGenerateContent but is NOT a native-audio-only model.
+ */
+async function findTextCapableLiveModel(apiKey: string): Promise<string | null> {
+  const url = `https://generativelanguage.googleapis.com/v1beta/models?key=${apiKey}`;
+  const res = await fetch(url);
+  if (!res.ok) return null;
+  const data = (await res.json()) as {
+    models: { name: string; supportedGenerationMethods: string[] }[];
+  };
+  const liveModels = data.models.filter(
+    (m) =>
+      m.supportedGenerationMethods?.includes("bidiGenerateContent") &&
+      !m.name.includes("native-audio"),
+  );
+  return liveModels.length > 0 ? liveModels[0].name : null;
+}
+
+describe.skipIf(!GOOGLE_API_KEY)("Gemini Live WS drift", () => {
+  const config = { apiKey: GOOGLE_API_KEY! };
+
+  it("canary: text-capable bidiGenerateContent model availability", async () => {
+    const model = await findTextCapableLiveModel(config.apiKey);
+    if (model) {
+      // A text-capable Live model now exists! Time to enable the full drift tests.
+      // Update ws-providers.ts geminiLiveWS() to use this model, then un-skip below.
+      console.warn(
+        `[CANARY] Text-capable Gemini Live model found: ${model}. ` +
+          `Enable the skipped drift tests with this model.`,
+      );
+    }
+    // This test always passes — it's a canary, not an assertion.
+    // When a model appears, the console warning signals it's time to act.
+    expect(true).toBe(true);
+  });
+
+  // These tests are skipped until a text-capable model supports bidiGenerateContent.
+  // When the canary above detects one, update the model in ws-providers.ts and remove .skip.
+
+  it.skip("WS text event sequence and shapes match", async () => {
+    const sdkEvents = [geminiLiveSetupCompleteShape(), ...geminiLiveTextEventShapes()];
+
+    // Real API
+    const realResult = await geminiLiveWS(config, "Say hello");
+
+    // Mock — replicate Gemini Live protocol
+    const mockWs = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    // Send setup
+    mockWs.send(
+      JSON.stringify({
+        setup: { model: "models/gemini-2.5-flash" },
+      }),
+    );
+
+    // Wait for setupComplete
+    const setupMsgs = await mockWs.waitForMessages(1);
+    const allMockRaw: unknown[] = [JSON.parse(setupMsgs[0])];
+
+    // Send clientContent
+    mockWs.send(
+      JSON.stringify({
+        clientContent: {
+          turns: [{ role: "user", parts: [{ text: "Say hello" }] }],
+          turnComplete: true,
+        },
+      }),
+    );
+
+    // Collect messages until turnComplete
+    const contentMsgs = await collectMockWSMessages(
+      mockWs,
+      (msg) => {
+        const m = msg as Record<string, unknown>;
+        const sc = m.serverContent as Record<string, unknown> | undefined;
+        return sc?.turnComplete === true;
+      },
+      15000,
+      1, // skip the setupComplete message already consumed
+    );
+    allMockRaw.push(...contentMsgs.rawMessages);
+    mockWs.close();
+
+    // Build mock events with classified types
+    const mockEvents = allMockRaw.map((msg) => ({
+      type: classifyGeminiMessage(msg as Record<string, unknown>),
+      dataShape: extractShape(msg),
+    }));
+
+    expect(realResult.rawMessages.length, "Real API returned no WS messages").toBeGreaterThan(0);
+    expect(mockEvents.length, "Mock returned no WS messages").toBeGreaterThan(0);
+
+    const diffs = compareSSESequences(sdkEvents, realResult.events, mockEvents);
+    const report = formatDriftReport("Gemini Live WS (text events)", diffs);
+
+    if (shouldFail(diffs)) {
+      expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+    }
+  });
+
+  it.skip("WS tool call event sequence matches", async () => {
+    const sdkEvents = [geminiLiveSetupCompleteShape(), ...geminiLiveToolCallEventShapes()];
+
+    const tools = [
+      {
+        functionDeclarations: [
+          {
+            name: "get_weather",
+            description: "Get weather",
+            parameters: {
+              type: "object",
+              properties: { city: { type: "string" } },
+              required: ["city"],
+            },
+          },
+        ],
+      },
+    ];
+
+    // Real API
+    const realResult = await geminiLiveWS(config, "Weather in Paris", tools);
+
+    // Mock — replicate Gemini Live protocol with tools
+    const mockWs = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    // Send setup with tools
+    mockWs.send(
+      JSON.stringify({
+        setup: { model: "models/gemini-2.5-flash", tools },
+      }),
+    );
+
+    // Wait for setupComplete
+    const setupMsgs = await mockWs.waitForMessages(1);
+    const allMockRaw: unknown[] = [JSON.parse(setupMsgs[0])];
+
+    // Send clientContent
+    mockWs.send(
+      JSON.stringify({
+        clientContent: {
+          turns: [{ role: "user", parts: [{ text: "Weather in Paris" }] }],
+          turnComplete: true,
+        },
+      }),
+    );
+
+    // Collect messages until toolCall
+    const contentMsgs = await collectMockWSMessages(
+      mockWs,
+      (msg) => {
+        const m = msg as Record<string, unknown>;
+        return "toolCall" in m;
+      },
+      15000,
+      1,
+    );
+    allMockRaw.push(...contentMsgs.rawMessages);
+    mockWs.close();
+
+    // Build mock events with classified types
+    const mockEvents = allMockRaw.map((msg) => ({
+      type: classifyGeminiMessage(msg as Record<string, unknown>),
+      dataShape: extractShape(msg),
+    }));
+
+    expect(realResult.rawMessages.length, "Real API returned no WS messages").toBeGreaterThan(0);
+    expect(mockEvents.length, "Mock returned no WS messages").toBeGreaterThan(0);
+
+    const diffs = compareSSESequences(sdkEvents, realResult.events, mockEvents);
+    const report = formatDriftReport("Gemini Live WS (tool call events)", diffs);
+
+    if (shouldFail(diffs)) {
+      expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+    }
+  });
+});
diff --git a/src/__tests__/drift/ws-providers.ts b/src/__tests__/drift/ws-providers.ts
new file mode 100644
index 0000000..ba84092
--- /dev/null
+++ b/src/__tests__/drift/ws-providers.ts
@@ -0,0 +1,462 @@
+/**
+ * TLS WebSocket client for connecting to real provider WebSocket APIs (wss://).
+ *
+ * Uses node:tls + manual RFC 6455 framing (similar to ws-test-client.ts but
+ * over TLS, with added support for 64-bit payload lengths and ping/pong).
+ * Provides protocol-specific functions for OpenAI Responses WS, OpenAI
+ * Realtime, and Gemini Live.
+ */
+
+/* eslint-disable @typescript-eslint/no-explicit-any */
+
+import * as tls from "node:tls";
+import { randomBytes } from "node:crypto";
+import { extractShape, type SSEEventShape } from "./schema.js";
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+interface ProviderConfig {
+  apiKey: string;
+}
+
+interface WSResult {
+  events: SSEEventShape[];
+  rawMessages: unknown[];
+}
+
+interface TLSWSClient {
+  send(data: string): void;
+  waitUntil(predicate: (msg: unknown) => boolean, timeoutMs?: number): Promise<unknown[]>;
+  close(): void;
+}
+
+// ---------------------------------------------------------------------------
+// Gemini message classifier (re-exported via helpers.ts for drift tests)
+// ---------------------------------------------------------------------------
+
+export function classifyGeminiMessage(msg: Record<string, unknown>): string {
+  if ("setupComplete" in msg) return "setupComplete";
+  if ("serverContent" in msg) return "serverContent";
+  if ("toolCall" in msg) return "toolCall";
+  return "unknown";
+}
+
+// ---------------------------------------------------------------------------
+// Masked frame helpers
+// ---------------------------------------------------------------------------
+
+function applyMask(payload: Buffer): { maskKey: Buffer; masked: Buffer } {
+  const maskKey = randomBytes(4);
+  const masked = Buffer.from(payload);
+  for (let i = 0; i < masked.length; i++) {
+    masked[i] ^= maskKey[i % 4];
+  }
+  return { maskKey, masked };
+}
+
+function buildMaskedTextFrame(payload: Buffer): Buffer {
+  const { maskKey, masked } = applyMask(payload);
+
+  let header: Buffer;
+  if (payload.length < 126) {
+    header = Buffer.alloc(2);
+    header[0] = 0x81; // FIN + TEXT
+    header[1] = 0x80 | payload.length;
+  } else if (payload.length <= 65535) {
+    header = Buffer.alloc(4);
+    header[0] = 0x81; // FIN + TEXT
+    header[1] = 0x80 | 126;
+    header.writeUInt16BE(payload.length, 2);
+  } else {
+    header = Buffer.alloc(10);
+    header[0] = 0x81; // FIN + TEXT
+    header[1] = 0x80 | 127;
+    header.writeBigUInt64BE(BigInt(payload.length), 2);
+  }
+  return Buffer.concat([header, maskKey, masked]);
+}
+
+function buildMaskedCloseFrame(): Buffer {
+  const payload = Buffer.alloc(2);
+  payload.writeUInt16BE(1000, 0);
+  const { maskKey, masked } = applyMask(payload);
+  const header = Buffer.alloc(2);
+  header[0] = 0x88; // FIN + CLOSE
+  header[1] = 0x82; // MASK + 2 bytes
+  return Buffer.concat([header, maskKey, masked]);
+}
+
+function buildMaskedPongFrame(pingPayload: Buffer): Buffer {
+  const { maskKey, masked } = applyMask(pingPayload);
+
+  let header: Buffer;
+  if (pingPayload.length < 126) {
+    header = Buffer.alloc(2);
+    header[0] = 0x8a; // FIN + PONG
+    header[1] = 0x80 | pingPayload.length;
+  } else {
+    header = Buffer.alloc(4);
+    header[0] = 0x8a; // FIN + PONG
+    header[1] = 0x80 | 126;
+    header.writeUInt16BE(pingPayload.length, 2);
+  }
+  return Buffer.concat([header, maskKey, masked]);
+}
+
+// ---------------------------------------------------------------------------
+// TLS WebSocket client (RFC 6455 over TLS)
+// ---------------------------------------------------------------------------
+
+export function connectTLSWebSocket(
+  host: string,
+  path: string,
+  headers?: Record<string, string>,
+): Promise<TLSWSClient> {
+  return new Promise((resolve, reject) => {
+    const socket = tls.connect({ host, port: 443, servername: host }, () => {
+      const key = randomBytes(16).toString("base64");
+      const extraHeaders = headers
+        ? Object.entries(headers)
+            .map(([k, v]) => `${k}: ${v}\r\n`)
+            .join("")
+        : "";
+
+      socket.write(
+        `GET ${path} HTTP/1.1\r\n` +
+          `Host: ${host}\r\n` +
+          `Upgrade: websocket\r\n` +
+          `Connection: Upgrade\r\n` +
+          `Sec-WebSocket-Key: ${key}\r\n` +
+          `Sec-WebSocket-Version: 13\r\n` +
+          extraHeaders +
+          `\r\n`,
+      );
+
+      let handshakeDone = false;
+      let buffer = Buffer.alloc(0);
+      const messages: unknown[] = [];
+      const messageResolvers: Array<() => void> = [];
+      let socketError: Error | null = null;
+      // Connection-scoped cursor so successive waitUntil calls resume where the last left off
+      let checkedUpTo = 0;
+
+      socket.on("data", (data: Buffer) => {
+        buffer = Buffer.concat([buffer, data]);
+
+        if (!handshakeDone) {
+          const headerEnd = buffer.indexOf("\r\n\r\n");
+          if (headerEnd === -1) return;
+          const headerStr = buffer.subarray(0, headerEnd).toString();
+          if (!headerStr.includes("101")) {
+            reject(new Error(`WebSocket upgrade failed: ${headerStr.split("\r\n")[0]}`));
+            return;
+          }
+          handshakeDone = true;
+          buffer = buffer.subarray(headerEnd + 4);
+
+          // Replace handshake error handler with post-handshake handler
+          socket.removeListener("error", reject);
+          socket.on("error", (err: Error) => {
+            socketError = err;
+            // Wake up any pending waitUntil resolvers so they can check the error
+            for (const r of messageResolvers) r();
+          });
+
+          resolve({
+            send(data: string) {
+              socket.write(buildMaskedTextFrame(Buffer.from(data, "utf-8")));
+            },
+
+            waitUntil(predicate: (msg: unknown) => boolean, timeoutMs = 30000): Promise<unknown[]> {
+              return new Promise((resolve, reject) => {
+                const collected: unknown[] = [];
+                let settled = false;
+
+                const scanFromCursor = () => {
+                  while (checkedUpTo < messages.length) {
+                    const msg = messages[checkedUpTo];
+                    checkedUpTo++;
+                    collected.push(msg);
+                    if (predicate(msg)) return true;
+                  }
+                  return false;
+                };
+
+                // Check messages that arrived before waitUntil was called
+                if (scanFromCursor()) {
+                  resolve(collected);
+                  return;
+                }
+
+                const removeResolver = () => {
+                  const idx = messageResolvers.indexOf(check);
+                  if (idx !== -1) messageResolvers.splice(idx, 1);
+                };
+
+                const timer = setTimeout(() => {
+                  if (!settled) {
+                    settled = true;
+                    removeResolver();
+                    const types = collected.map((m: any) => m?.type ?? "unknown").join(", ");
+                    reject(
+                      new Error(
+                        `waitUntil timeout after ${timeoutMs}ms. ` +
+                          `Collected ${collected.length} messages: [${types}]`,
+                      ),
+                    );
+                  }
+                }, timeoutMs);
+
+                const check = () => {
+                  if (settled) return;
+                  // Check for socket error
+                  if (socketError) {
+                    settled = true;
+                    clearTimeout(timer);
+                    removeResolver();
+                    reject(
+                      new Error(
+                        `WebSocket error during waitUntil: ${socketError.message}. ` +
+                          `Collected ${collected.length} messages.`,
+                      ),
+                    );
+                    return;
+                  }
+                  // Scan all new messages since last check
+                  if (scanFromCursor()) {
+                    settled = true;
+                    clearTimeout(timer);
+                    removeResolver();
+                    resolve(collected);
+                  }
+                };
+
+                messageResolvers.push(check);
+              });
+            },
+
+            close() {
+              socket.write(buildMaskedCloseFrame());
+              // Ensure socket is destroyed even if server doesn't respond
+              setTimeout(() => {
+                if (!socket.destroyed) socket.destroy();
+              }, 3000);
+            },
+          });
+        }
+
+        // Parse WebSocket frames from buffer
+        while (buffer.length >= 2) {
+          const byte0 = buffer[0];
+          const byte1 = buffer[1];
+          const opcode = byte0 & 0x0f;
+          let payloadLength = byte1 & 0x7f;
+          let offset = 2;
+
+          if (payloadLength === 126) {
+            if (buffer.length < 4) return;
+            payloadLength = buffer.readUInt16BE(2);
+            offset = 4;
+          } else if (payloadLength === 127) {
+            if (buffer.length < 10) return;
+            payloadLength = Number(buffer.readBigUInt64BE(2));
+            offset = 10;
+          }
+
+          // Server frames are NOT masked
+          if (buffer.length < offset + payloadLength) return;
+
+          const framePayload = buffer.subarray(offset, offset + payloadLength);
+          buffer = buffer.subarray(offset + payloadLength);
+
+          if (opcode === 0x1) {
+            // text frame
+            const text = framePayload.toString("utf-8");
+            try {
+              const parsed = JSON.parse(text);
+              messages.push(parsed);
+            } catch {
+              messages.push(text);
+            }
+            for (const r of messageResolvers) r();
+          } else if (opcode === 0x8) {
+            // close frame
+            socket.end();
+          } else if (opcode === 0x9) {
+            // ping — respond with pong per RFC 6455
+            socket.write(buildMaskedPongFrame(framePayload));
+          }
+        }
+      });
+
+      socket.on("error", reject);
+    });
+  });
+}
+
+// ---------------------------------------------------------------------------
+// OpenAI Responses WebSocket
+// ---------------------------------------------------------------------------
+
+export async function openaiResponsesWS(
+  config: ProviderConfig,
+  input: object[],
+  tools?: object[],
+): Promise<WSResult> {
+  const ws = await connectTLSWebSocket("api.openai.com", "/v1/responses", {
+    Authorization: `Bearer ${config.apiKey}`,
+  });
+
+  // Real Responses WS API uses flat format: model/input/tools at the top level
+  // of the response.create message (not nested inside a "response" object)
+  const msg: Record<string, unknown> = {
+    type: "response.create",
+    model: "gpt-4o-mini",
+    input,
+    max_output_tokens: 50,
+  };
+  if (tools) msg.tools = tools;
+
+  ws.send(JSON.stringify(msg));
+
+  // Terminal event: "response.completed" or "response.done" (both observed in the wild)
+  const rawMessages = await ws.waitUntil(
+    (msg: any) => msg?.type === "response.completed" || msg?.type === "response.done",
+  );
+
+  ws.close();
+
+  const events: SSEEventShape[] = rawMessages.map((msg: any) => ({
+    type: msg.type ?? "unknown",
+    dataShape: extractShape(msg),
+  }));
+
+  return { events, rawMessages };
+}
+
+// ---------------------------------------------------------------------------
+// OpenAI Realtime WebSocket
+// ---------------------------------------------------------------------------
+
+export async function openaiRealtimeWS(
+  config: ProviderConfig,
+  text: string,
+  tools?: object[],
+): Promise<WSResult> {
+  // Realtime API requires a realtime-specific model (gpt-4o-mini doesn't work)
+  const ws = await connectTLSWebSocket(
+    "api.openai.com",
+    "/v1/realtime?model=gpt-4o-mini-realtime-preview",
+    {
+      Authorization: `Bearer ${config.apiKey}`,
+      "OpenAI-Beta": "realtime=v1",
+    },
+  );
+
+  // Step 1: Wait for session.created
+  const sessionCreated = await ws.waitUntil((msg: any) => msg?.type === "session.created");
+
+  // Step 2: Send session.update
+  const session: Record<string, unknown> = {
+    model: "gpt-4o-mini-realtime-preview",
+    modalities: ["text"],
+  };
+  if (tools) session.tools = tools;
+  ws.send(JSON.stringify({ type: "session.update", session }));
+
+  // Step 3: Wait for session.updated
+  const sessionUpdated = await ws.waitUntil((msg: any) => msg?.type === "session.updated");
+
+  // Step 4: Send conversation.item.create
+  ws.send(
+    JSON.stringify({
+      type: "conversation.item.create",
+      item: {
+        type: "message",
+        role: "user",
+        content: [{ type: "input_text", text }],
+      },
+    }),
+  );
+
+  // Step 5: Wait for conversation.item.created
+  const itemCreated = await ws.waitUntil((msg: any) => msg?.type === "conversation.item.created");
+
+  // Step 6: Send response.create
+  ws.send(JSON.stringify({ type: "response.create" }));
+
+  // Step 7: Collect until response.done
+  const responseMessages = await ws.waitUntil((msg: any) => msg?.type === "response.done");
+
+  ws.close();
+
+  // Combine all step results (each waitUntil returns only new messages since prior call)
+  const allMessages = [...sessionCreated, ...sessionUpdated, ...itemCreated, ...responseMessages];
+
+  const events: SSEEventShape[] = allMessages.map((msg: any) => ({
+    type: msg.type ?? "unknown",
+    dataShape: extractShape(msg),
+  }));
+
+  return { events, rawMessages: allMessages };
+}
+
+// ---------------------------------------------------------------------------
+// Gemini Live WebSocket
+// ---------------------------------------------------------------------------
+
+export async function geminiLiveWS(
+  config: ProviderConfig,
+  text: string,
+  tools?: object[],
+): Promise<WSResult> {
+  const path = `/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent?key=${config.apiKey}`;
+
+  const ws = await connectTLSWebSocket("generativelanguage.googleapis.com", path);
+
+  // Step 1: Send setup
+  const setup: Record<string, unknown> = {
+    model: "models/gemini-2.5-flash",
+    generationConfig: { responseModalities: ["TEXT"] },
+  };
+  if (tools) setup.tools = tools;
+  ws.send(JSON.stringify({ setup }));
+
+  // Step 2: Wait for setupComplete
+  const setupComplete = await ws.waitUntil(
+    (msg: any) => msg && typeof msg === "object" && "setupComplete" in msg,
+  );
+
+  // Step 3: Send client content
+  ws.send(
+    JSON.stringify({
+      clientContent: {
+        turns: [{ role: "user", parts: [{ text }] }],
+        turnComplete: true,
+      },
+    }),
+  );
+
+  // Step 4: Collect until turnComplete or toolCall
+  const responseMessages = await ws.waitUntil((msg: any) => {
+    if (!msg || typeof msg !== "object") return false;
+    if ("toolCall" in msg) return true;
+    if ("serverContent" in msg) {
+      return (msg as any).serverContent?.turnComplete === true;
+    }
+    return false;
+  });
+
+  ws.close();
+
+  const allMessages = [...setupComplete, ...responseMessages];
+
+  const events: SSEEventShape[] = allMessages.map((msg: any) => ({
+    type: classifyGeminiMessage(msg as Record<string, unknown>),
+    dataShape: extractShape(msg),
+  }));
+
+  return { events, rawMessages: allMessages };
+}
diff --git a/src/__tests__/drift/ws-realtime.drift.ts b/src/__tests__/drift/ws-realtime.drift.ts
new file mode 100644
index 0000000..f62825b
--- /dev/null
+++ b/src/__tests__/drift/ws-realtime.drift.ts
@@ -0,0 +1,216 @@
+/**
+ * OpenAI Realtime API WebSocket drift tests.
+ *
+ * Three-way comparison: SDK types x real API (WS) x llmock output (WS).
+ */
+
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+import type { ServerInstance } from "../../server.js";
+import { extractShape, compareSSESequences, formatDriftReport, shouldFail } from "./schema.js";
+import { openaiRealtimeTextEventShapes, openaiRealtimeToolCallEventShapes } from "./sdk-shapes.js";
+import { openaiRealtimeWS } from "./ws-providers.js";
+import { listOpenAIModels } from "./providers.js";
+import { startDriftServer, stopDriftServer, collectMockWSMessages } from "./helpers.js";
+import { connectWebSocket } from "../ws-test-client.js";
+
+// ---------------------------------------------------------------------------
+// Server lifecycle
+// ---------------------------------------------------------------------------
+
+let instance: ServerInstance;
+const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
+
+beforeAll(async () => {
+  instance = await startDriftServer();
+});
+
+afterAll(async () => {
+  await stopDriftServer(instance);
+});
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+const REALTIME_MODEL = "gpt-4o-mini-realtime-preview";
+
+describe.skipIf(!OPENAI_API_KEY)("OpenAI Realtime API drift", () => {
+  const config = { apiKey: OPENAI_API_KEY! };
+
+  it("canary: realtime preview model still available", async () => {
+    const models = await listOpenAIModels(config.apiKey);
+    const found = models.some((m) => m === REALTIME_MODEL || m.startsWith(`${REALTIME_MODEL}-`));
+    if (!found) {
+      // Check if a GA model replaced it
+      const ga = models.find((m) => m === "gpt-4o-mini-realtime" || m === "gpt-realtime-mini");
+      const hint = ga ? ` Found GA model "${ga}" — update REALTIME_MODEL.` : "";
+      expect.fail(
+        `Realtime model "${REALTIME_MODEL}" no longer in model listing.${hint} ` +
+          `Update ws-providers.ts and this test.`,
+      );
+    }
+  });
+
+  it("WS text event sequence and shapes match", async () => {
+    const sdkEvents = openaiRealtimeTextEventShapes();
+
+    // Real API
+    const realResult = await openaiRealtimeWS(config, "Say hello");
+
+    // Mock — replicate the Realtime protocol sequence
+    const mockWs = await connectWebSocket(instance.url, "/v1/realtime");
+
+    // session.created is sent automatically on connect
+    const sessionCreatedMsgs = await mockWs.waitForMessages(1);
+    const allMockRaw: unknown[] = [JSON.parse(sessionCreatedMsgs[0])];
+
+    // session.update
+    mockWs.send(
+      JSON.stringify({
+        type: "session.update",
+        session: { model: "gpt-4o-mini", modalities: ["text"] },
+      }),
+    );
+    const sessionUpdatedMsgs = await mockWs.waitForMessages(2);
+    allMockRaw.push(JSON.parse(sessionUpdatedMsgs[1]));
+
+    // conversation.item.create
+    mockWs.send(
+      JSON.stringify({
+        type: "conversation.item.create",
+        item: {
+          type: "message",
+          role: "user",
+          content: [{ type: "input_text", text: "Say hello" }],
+        },
+      }),
+    );
+    const itemCreatedMsgs = await mockWs.waitForMessages(3);
+    allMockRaw.push(JSON.parse(itemCreatedMsgs[2]));
+
+    // response.create — triggers the response
+    mockWs.send(JSON.stringify({ type: "response.create" }));
+
+    // Collect remaining messages until response.done
+    const responseMsgs = await collectMockWSMessages(
+      mockWs,
+      (msg) => (msg as Record<string, unknown>).type === "response.done",
+      15000,
+      3, // skip the 3 messages already consumed
+    );
+    allMockRaw.push(...responseMsgs.rawMessages);
+    mockWs.close();
+
+    // Build mock events from all collected messages
+    const mockEvents = allMockRaw.map((msg) => {
+      const m = msg as Record<string, unknown>;
+      return {
+        type: m.type as string,
+        dataShape: extractShape(msg),
+      };
+    });
+
+    expect(realResult.rawMessages.length, "Real API returned no WS messages").toBeGreaterThan(0);
+    expect(mockEvents.length, "Mock returned no WS messages").toBeGreaterThan(0);
+
+    const diffs = compareSSESequences(sdkEvents, realResult.events, mockEvents);
+    const report = formatDriftReport("OpenAI Realtime WS (text events)", diffs);
+
+    if (shouldFail(diffs)) {
+      expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+    }
+  });
+
+  it("WS tool call event sequence matches", async () => {
+    const sdkEvents = [
+      ...openaiRealtimeTextEventShapes().filter(
+        (e) =>
+          e.type === "session.created" ||
+          e.type === "session.updated" ||
+          e.type === "conversation.item.created" ||
+          e.type === "response.created" ||
+          e.type === "response.done",
+      ),
+      ...openaiRealtimeToolCallEventShapes(),
+    ];
+
+    const tools = [
+      {
+        type: "function",
+        name: "get_weather",
+        description: "Get weather",
+        parameters: {
+          type: "object",
+          properties: { city: { type: "string" } },
+          required: ["city"],
+        },
+      },
+    ];
+
+    // Real API
+    const realResult = await openaiRealtimeWS(config, "Weather in Paris", tools);
+
+    // Mock — replicate the Realtime protocol sequence
+    const mockWs = await connectWebSocket(instance.url, "/v1/realtime");
+
+    // session.created
+    const sessionCreatedMsgs = await mockWs.waitForMessages(1);
+    const allMockRaw: unknown[] = [JSON.parse(sessionCreatedMsgs[0])];
+
+    // session.update with tools
+    mockWs.send(
+      JSON.stringify({
+        type: "session.update",
+        session: { model: "gpt-4o-mini", modalities: ["text"], tools },
+      }),
+    );
+    const sessionUpdatedMsgs = await mockWs.waitForMessages(2);
+    allMockRaw.push(JSON.parse(sessionUpdatedMsgs[1]));
+
+    // conversation.item.create
+    mockWs.send(
+      JSON.stringify({
+        type: "conversation.item.create",
+        item: {
+          type: "message",
+          role: "user",
+          content: [{ type: "input_text", text: "Weather in Paris" }],
+        },
+      }),
+    );
+    const itemCreatedMsgs = await mockWs.waitForMessages(3);
+    allMockRaw.push(JSON.parse(itemCreatedMsgs[2]));
+
+    // response.create
+    mockWs.send(JSON.stringify({ type: "response.create" }));
+
+    // Collect remaining messages until response.done
+    const responseMsgs = await collectMockWSMessages(
+      mockWs,
+      (msg) => (msg as Record<string, unknown>).type === "response.done",
+      15000,
+      3,
+    );
+    allMockRaw.push(...responseMsgs.rawMessages);
+    mockWs.close();
+
+    // Build mock events
+    const mockEvents = allMockRaw.map((msg) => {
+      const m = msg as Record<string, unknown>;
+      return {
+        type: m.type as string,
+        dataShape: extractShape(msg),
+      };
+    });
+
+    expect(realResult.rawMessages.length, "Real API returned no WS messages").toBeGreaterThan(0);
+    expect(mockEvents.length, "Mock returned no WS messages").toBeGreaterThan(0);
+
+    const diffs = compareSSESequences(sdkEvents, realResult.events, mockEvents);
+    const report = formatDriftReport("OpenAI Realtime WS (tool call events)", diffs);
+
+    if (shouldFail(diffs)) {
+      expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+    }
+  });
+});
diff --git a/src/__tests__/drift/ws-responses.drift.ts b/src/__tests__/drift/ws-responses.drift.ts
new file mode 100644
index 0000000..e6b865f
--- /dev/null
+++ b/src/__tests__/drift/ws-responses.drift.ts
@@ -0,0 +1,127 @@
+/**
+ * OpenAI Responses API WebSocket drift tests.
+ *
+ * Three-way comparison: SDK types × real API (WS) × llmock output (WS).
+ * The Responses WS protocol uses the same event shapes as HTTP SSE.
+ */
+
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+import type { ServerInstance } from "../../server.js";
+import { compareSSESequences, formatDriftReport, shouldFail } from "./schema.js";
+import {
+  openaiResponsesTextEventShapes,
+  openaiResponsesToolCallEventShapes,
+} from "./sdk-shapes.js";
+import { openaiResponsesWS } from "./ws-providers.js";
+import { startDriftServer, stopDriftServer, collectMockWSMessages } from "./helpers.js";
+import { connectWebSocket } from "../ws-test-client.js";
+
+// ---------------------------------------------------------------------------
+// Server lifecycle
+// ---------------------------------------------------------------------------
+
+let instance: ServerInstance;
+const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
+
+beforeAll(async () => {
+  instance = await startDriftServer();
+});
+
+afterAll(async () => {
+  await stopDriftServer(instance);
+});
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe.skipIf(!OPENAI_API_KEY)("OpenAI Responses WS drift", () => {
+  const config = { apiKey: OPENAI_API_KEY! };
+
+  it("WS text event sequence and shapes match", async () => {
+    const sdkEvents = openaiResponsesTextEventShapes();
+
+    // Real API via WS
+    const realResult = await openaiResponsesWS(config, [{ role: "user", content: "Say hello" }]);
+
+    // Mock via WS — uses flat format matching real API
+    const mockWs = await connectWebSocket(instance.url, "/v1/responses");
+    mockWs.send(
+      JSON.stringify({
+        type: "response.create",
+        model: "gpt-4o-mini",
+        input: [{ role: "user", content: "Say hello" }],
+      }),
+    );
+    const mockResult = await collectMockWSMessages(mockWs, (msg) => {
+      const m = msg as Record<string, unknown>;
+      return m.type === "response.completed" || m.type === "response.done";
+    });
+    mockWs.close();
+
+    expect(realResult.rawMessages.length, "Real API returned no WS messages").toBeGreaterThan(0);
+    expect(mockResult.events.length, "Mock returned no WS messages").toBeGreaterThan(0);
+
+    const diffs = compareSSESequences(sdkEvents, realResult.events, mockResult.events);
+    const report = formatDriftReport("OpenAI Responses WS (text events)", diffs);
+
+    if (shouldFail(diffs)) {
+      expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+    }
+  });
+
+  it("WS tool call event sequence matches", async () => {
+    const sdkEvents = [
+      ...openaiResponsesTextEventShapes().filter(
+        (e) => e.type === "response.created" || e.type === "response.completed",
+      ),
+      ...openaiResponsesToolCallEventShapes(),
+    ];
+
+    const tools = [
+      {
+        type: "function",
+        name: "get_weather",
+        description: "Get weather",
+        parameters: {
+          type: "object",
+          properties: { city: { type: "string" } },
+          required: ["city"],
+        },
+      },
+    ];
+
+    // Real API via WS
+    const realResult = await openaiResponsesWS(
+      config,
+      [{ role: "user", content: "Weather in Paris" }],
+      tools,
+    );
+
+    // Mock via WS — uses flat format matching real API
+    const mockWs = await connectWebSocket(instance.url, "/v1/responses");
+    mockWs.send(
+      JSON.stringify({
+        type: "response.create",
+        model: "gpt-4o-mini",
+        input: [{ role: "user", content: "Weather in Paris" }],
+        tools,
+      }),
+    );
+    const mockResult = await collectMockWSMessages(mockWs, (msg) => {
+      const m = msg as Record<string, unknown>;
+      return m.type === "response.completed" || m.type === "response.done";
+    });
+    mockWs.close();
+
+    expect(realResult.rawMessages.length, "Real API returned no WS messages").toBeGreaterThan(0);
+    expect(mockResult.events.length, "Mock returned no WS messages").toBeGreaterThan(0);
+
+    const diffs = compareSSESequences(sdkEvents, realResult.events, mockResult.events);
+    const report = formatDriftReport("OpenAI Responses WS (tool call events)", diffs);
+
+    if (shouldFail(diffs)) {
+      expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+    }
+  });
+});
diff --git a/src/__tests__/embeddings.test.ts b/src/__tests__/embeddings.test.ts
new file mode 100644
index 0000000..61a3efa
--- /dev/null
+++ b/src/__tests__/embeddings.test.ts
@@ -0,0 +1,719 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import {
+  isEmbeddingResponse,
+  generateDeterministicEmbedding,
+  buildEmbeddingResponse,
+} from "../helpers.js";
+import type { Fixture } from "../types.js";
+import { createServer, type ServerInstance } from "../server.js";
+
+// ---------------------------------------------------------------------------
+// isEmbeddingResponse type guard
+// ---------------------------------------------------------------------------
+
+describe("isEmbeddingResponse", () => {
+  it("identifies embedding responses", () => {
+    expect(isEmbeddingResponse({ embedding: [0.1, -0.2, 0.3] })).toBe(true);
+  });
+
+  it("identifies empty embedding array as embedding response", () => {
+    expect(isEmbeddingResponse({ embedding: [] })).toBe(true);
+  });
+
+  it("rejects text responses", () => {
+    expect(isEmbeddingResponse({ content: "hello" })).toBe(false);
+  });
+
+  it("rejects tool call responses", () => {
+    expect(isEmbeddingResponse({ toolCalls: [] })).toBe(false);
+  });
+
+  it("rejects error responses", () => {
+    expect(isEmbeddingResponse({ error: { message: "fail" } })).toBe(false);
+  });
+
+  it("rejects objects where embedding is not an array", () => {
+    expect(isEmbeddingResponse({ embedding: "not-an-array" } as never)).toBe(false);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// generateDeterministicEmbedding
+// ---------------------------------------------------------------------------
+
+describe("generateDeterministicEmbedding", () => {
+  it("generates an embedding of the default dimension (1536)", () => {
+    const embedding = generateDeterministicEmbedding("hello");
+    expect(embedding).toHaveLength(1536);
+  });
+
+  it("generates an embedding of a custom dimension", () => {
+    const embedding = generateDeterministicEmbedding("hello", 768);
+    expect(embedding).toHaveLength(768);
+  });
+
+  it("all values are numbers between -1 and 1", () => {
+    const embedding = generateDeterministicEmbedding("test input");
+    for (const val of embedding) {
+      expect(typeof val).toBe("number");
+      expect(val).toBeGreaterThanOrEqual(-1);
+      expect(val).toBeLessThanOrEqual(1);
+    }
+  });
+
+  it("is deterministic — same input produces same output", () => {
+    const a = generateDeterministicEmbedding("hello world");
+    const b = generateDeterministicEmbedding("hello world");
+    expect(a).toEqual(b);
+  });
+
+  it("different inputs produce different embeddings", () => {
+    const a = generateDeterministicEmbedding("hello");
+    const b = generateDeterministicEmbedding("goodbye");
+    expect(a).not.toEqual(b);
+  });
+
+  it("generates a single-dimension embedding", () => {
+    const embedding = generateDeterministicEmbedding("test", 1);
+    expect(embedding).toHaveLength(1);
+    expect(typeof embedding[0]).toBe("number");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// buildEmbeddingResponse
+// ---------------------------------------------------------------------------
+
+describe("buildEmbeddingResponse", () => {
+  it("builds a valid OpenAI embedding response for a single input", () => {
+    const embedding = [0.1, -0.2, 0.3];
+    const response = buildEmbeddingResponse([embedding], "text-embedding-3-small");
+
+    expect(response.object).toBe("list");
+    expect(response.model).toBe("text-embedding-3-small");
+    expect(response.data).toHaveLength(1);
+    expect(response.data[0].object).toBe("embedding");
+    expect(response.data[0].index).toBe(0);
+    expect(response.data[0].embedding).toEqual(embedding);
+    expect(response.usage).toEqual({ prompt_tokens: 0, total_tokens: 0 });
+  });
+
+  it("builds a response for multiple inputs with correct indices", () => {
+    const embeddings = [
+      [0.1, -0.2],
+      [0.3, -0.4],
+      [0.5, -0.6],
+    ];
+    const response = buildEmbeddingResponse(embeddings, "text-embedding-3-small");
+
+    expect(response.data).toHaveLength(3);
+    expect(response.data[0].index).toBe(0);
+    expect(response.data[1].index).toBe(1);
+    expect(response.data[2].index).toBe(2);
+    expect(response.data[0].embedding).toEqual([0.1, -0.2]);
+    expect(response.data[1].embedding).toEqual([0.3, -0.4]);
+    expect(response.data[2].embedding).toEqual([0.5, -0.6]);
+  });
+
+  it("preserves the model name", () => {
+    const response = buildEmbeddingResponse([[0.1]], "custom-model");
+    expect(response.model).toBe("custom-model");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Integration tests: POST /v1/embeddings
+// ---------------------------------------------------------------------------
+
+function post(
+  url: string,
+  body: unknown,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const data = JSON.stringify(body);
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(data),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(data);
+    req.end();
+  });
+}
+
+function postRaw(url: string, raw: string): Promise<{ status: number; body: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(raw),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(raw);
+    req.end();
+  });
+}
+
+let instance: ServerInstance | null = null;
+
+afterEach(async () => {
+  if (instance) {
+    await new Promise<void>((resolve) => {
+      instance!.server.close(() => resolve());
+    });
+    instance = null;
+  }
+});
+
+describe("POST /v1/embeddings (no fixture — deterministic fallback)", () => {
+  it("returns a deterministic embedding for a single string input", async () => {
+    instance = await createServer([]);
+    const res = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "hello world",
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/json");
+
+    const body = JSON.parse(res.body);
+    expect(body.object).toBe("list");
+    expect(body.model).toBe("text-embedding-3-small");
+    expect(body.data).toHaveLength(1);
+    expect(body.data[0].object).toBe("embedding");
+    expect(body.data[0].index).toBe(0);
+    expect(body.data[0].embedding).toHaveLength(1536);
+    expect(body.usage).toEqual({ prompt_tokens: 0, total_tokens: 0 });
+  });
+
+  it("returns deterministic embeddings for multiple string inputs", async () => {
+    instance = await createServer([]);
+    const res = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: ["hello", "world"],
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.data).toHaveLength(2);
+    expect(body.data[0].index).toBe(0);
+    expect(body.data[1].index).toBe(1);
+    expect(body.data[0].embedding).toHaveLength(1536);
+    expect(body.data[1].embedding).toHaveLength(1536);
+    // Different inputs produce different embeddings
+    expect(body.data[0].embedding).not.toEqual(body.data[1].embedding);
+  });
+
+  it("respects the dimensions parameter", async () => {
+    instance = await createServer([]);
+    const res = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "test",
+      dimensions: 256,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.data[0].embedding).toHaveLength(256);
+  });
+
+  it("is deterministic — same input produces same embedding", async () => {
+    instance = await createServer([]);
+    const res1 = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "deterministic test",
+    });
+    const res2 = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "deterministic test",
+    });
+
+    const body1 = JSON.parse(res1.body);
+    const body2 = JSON.parse(res2.body);
+    expect(body1.data[0].embedding).toEqual(body2.data[0].embedding);
+  });
+});
+
+describe("POST /v1/embeddings (fixture matching)", () => {
+  it("returns fixture embedding when inputText matches", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { inputText: "special" },
+        response: { embedding: [0.1, 0.2, 0.3] },
+      },
+    ];
+    instance = await createServer(fixtures);
+    const res = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "this is special input",
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.data[0].embedding).toEqual([0.1, 0.2, 0.3]);
+  });
+
+  it("returns fixture embedding for each input in a multi-input request", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { inputText: "match" },
+        response: { embedding: [0.5, 0.6] },
+      },
+    ];
+    instance = await createServer(fixtures);
+    const res = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: ["match this", "also match this"],
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.data).toHaveLength(2);
+    // Both should get the fixture embedding since the combined input matches
+    expect(body.data[0].embedding).toEqual([0.5, 0.6]);
+    expect(body.data[1].embedding).toEqual([0.5, 0.6]);
+  });
+
+  it("returns error fixture with correct status", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { inputText: "fail" },
+        response: {
+          error: {
+            message: "Rate limited",
+            type: "rate_limit_error",
+            code: "rate_limit",
+          },
+          status: 429,
+        },
+      },
+    ];
+    instance = await createServer(fixtures);
+    const res = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "fail this request",
+    });
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Rate limited");
+  });
+
+  it("falls through to deterministic when no fixture matches", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { inputText: "specific-only" },
+        response: { embedding: [0.1] },
+      },
+    ];
+    instance = await createServer(fixtures);
+    const res = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "something completely different",
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    // Should get a deterministic embedding, not the fixture
+    expect(body.data[0].embedding).toHaveLength(1536);
+  });
+});
+
+describe("POST /v1/embeddings (error with various status codes)", () => {
+  it("returns 401 for authentication error fixture", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { inputText: "auth-fail" },
+        response: {
+          error: {
+            message: "Invalid API key",
+            type: "authentication_error",
+            code: "invalid_api_key",
+          },
+          status: 401,
+        },
+      },
+    ];
+    instance = await createServer(fixtures);
+    const res = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "auth-fail request",
+    });
+
+    expect(res.status).toBe(401);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Invalid API key");
+    expect(body.error.type).toBe("authentication_error");
+    expect(body.error.code).toBe("invalid_api_key");
+  });
+
+  it("returns 503 for service unavailable error fixture", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { inputText: "service-down" },
+        response: {
+          error: {
+            message: "Service unavailable",
+            type: "server_error",
+            code: "service_unavailable",
+          },
+          status: 503,
+        },
+      },
+    ];
+    instance = await createServer(fixtures);
+    const res = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "service-down request",
+    });
+
+    expect(res.status).toBe(503);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Service unavailable");
+    expect(body.error.type).toBe("server_error");
+    expect(body.error.code).toBe("service_unavailable");
+  });
+});
+
+describe("POST /v1/embeddings (error handling)", () => {
+  it("returns 400 for malformed JSON", async () => {
+    instance = await createServer([]);
+    const res = await postRaw(`${instance.url}/v1/embeddings`, "{not valid");
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Malformed JSON");
+    expect(body.error.code).toBe("invalid_json");
+  });
+});
+
+describe("POST /v1/embeddings (journal)", () => {
+  it("records successful embedding requests in journal", async () => {
+    instance = await createServer([]);
+    await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "journal test",
+    });
+
+    expect(instance.journal.size).toBe(1);
+    const entry = instance.journal.getLast();
+    expect(entry!.path).toBe("/v1/embeddings");
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("records fixture-matched embedding requests", async () => {
+    const fixture: Fixture = {
+      match: { inputText: "tracked" },
+      response: { embedding: [0.1] },
+    };
+    instance = await createServer([fixture]);
+    await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "tracked input",
+    });
+
+    const entry = instance.journal.getLast();
+    expect(entry!.response.status).toBe(200);
+    expect(entry!.response.fixture).toBe(fixture);
+  });
+});
+
+describe("POST /v1/embeddings (incompatible fixture response type)", () => {
+  it("returns 500 when a non-embedding fixture matches via predicate", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { predicate: () => true },
+        response: { content: "I am a text response, not an embedding" },
+      },
+    ];
+    instance = await createServer(fixtures);
+    const res = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "anything",
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("did not match any known embedding type");
+  });
+});
+
+describe("POST /v1/embeddings (CORS)", () => {
+  it("includes CORS headers", async () => {
+    instance = await createServer([]);
+    const res = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "cors test",
+    });
+
+    expect(res.headers["access-control-allow-origin"]).toBe("*");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// encoding_format: base64
+// ---------------------------------------------------------------------------
+
+describe("POST /v1/embeddings (encoding_format: base64)", () => {
+  it("accepts encoding_format base64 and returns float array regardless", async () => {
+    instance = await createServer([]);
+    const res = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "base64 format test",
+      encoding_format: "base64",
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.object).toBe("list");
+    expect(body.data).toHaveLength(1);
+    // LLMock does not implement base64 encoding — it returns float arrays
+    // regardless of encoding_format. This documents the actual behavior.
+    expect(body.data[0].embedding).toHaveLength(1536);
+    expect(typeof body.data[0].embedding[0]).toBe("number");
+  });
+
+  it("returns same embedding values whether encoding_format is float or base64", async () => {
+    instance = await createServer([]);
+    const resFloat = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "same input for both",
+      encoding_format: "float",
+    });
+    const resBase64 = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "same input for both",
+      encoding_format: "base64",
+    });
+
+    const bodyFloat = JSON.parse(resFloat.body);
+    const bodyBase64 = JSON.parse(resBase64.body);
+    // Both return identical float arrays — encoding_format is ignored
+    expect(bodyFloat.data[0].embedding).toEqual(bodyBase64.data[0].embedding);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// empty input string
+// ---------------------------------------------------------------------------
+
+describe("POST /v1/embeddings (empty input string)", () => {
+  it("returns a deterministic embedding for an empty string input", async () => {
+    instance = await createServer([]);
+    const res = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "",
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.data).toHaveLength(1);
+    expect(body.data[0].embedding).toHaveLength(1536);
+    // Should be deterministic — same empty string gives same result
+    expect(typeof body.data[0].embedding[0]).toBe("number");
+  });
+
+  it("produces the same embedding on repeated calls with empty input", async () => {
+    instance = await createServer([]);
+    const res1 = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "",
+    });
+    const res2 = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "",
+    });
+
+    const body1 = JSON.parse(res1.body);
+    const body2 = JSON.parse(res2.body);
+    expect(body1.data[0].embedding).toEqual(body2.data[0].embedding);
+  });
+
+  it("empty string produces a different embedding than non-empty string", async () => {
+    instance = await createServer([]);
+    const resEmpty = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "",
+    });
+    const resNonEmpty = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "hello",
+    });
+
+    const bodyEmpty = JSON.parse(resEmpty.body);
+    const bodyNonEmpty = JSON.parse(resNonEmpty.body);
+    expect(bodyEmpty.data[0].embedding).not.toEqual(bodyNonEmpty.data[0].embedding);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// sequential embedding responses
+// ---------------------------------------------------------------------------
+
+describe("POST /v1/embeddings (sequential embedding responses)", () => {
+  it("advances through sequenced fixtures using sequenceIndex + inputText", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { inputText: "query", sequenceIndex: 0 },
+        response: { embedding: [0.1, 0.2, 0.3] },
+      },
+      {
+        match: { inputText: "query", sequenceIndex: 1 },
+        response: { embedding: [0.4, 0.5, 0.6] },
+      },
+    ];
+    instance = await createServer(fixtures);
+
+    // First request: should match sequenceIndex 0
+    const res0 = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "query text",
+    });
+    expect(res0.status).toBe(200);
+    const body0 = JSON.parse(res0.body);
+    expect(body0.data[0].embedding).toEqual([0.1, 0.2, 0.3]);
+
+    // Second request: should match sequenceIndex 1
+    const res1 = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "query text",
+    });
+    expect(res1.status).toBe(200);
+    const body1 = JSON.parse(res1.body);
+    expect(body1.data[0].embedding).toEqual([0.4, 0.5, 0.6]);
+  });
+
+  it("falls back to deterministic after exhausting sequenced fixtures", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { inputText: "once", sequenceIndex: 0 },
+        response: { embedding: [0.9, 0.8, 0.7] },
+      },
+    ];
+    instance = await createServer(fixtures);
+
+    // First request: matches sequenceIndex 0
+    const res0 = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "once upon a time",
+    });
+    expect(res0.status).toBe(200);
+    const body0 = JSON.parse(res0.body);
+    expect(body0.data[0].embedding).toEqual([0.9, 0.8, 0.7]);
+
+    // Second request: no sequenceIndex 1 fixture, falls through to deterministic
+    const res1 = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "once upon a time",
+    });
+    expect(res1.status).toBe(200);
+    const body1 = JSON.parse(res1.body);
+    expect(body1.data[0].embedding).toHaveLength(1536);
+    expect(body1.data[0].embedding).not.toEqual([0.9, 0.8, 0.7]);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Unicode input handling
+// ---------------------------------------------------------------------------
+
+describe("POST /v1/embeddings (Unicode input handling)", () => {
+  it("generates deterministic embeddings for emoji input", async () => {
+    instance = await createServer([]);
+    const res = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "\u{1F600}\u{1F680}\u{2728}",
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.data).toHaveLength(1);
+    expect(body.data[0].embedding).toHaveLength(1536);
+  });
+
+  it("generates deterministic embeddings for CJK characters", async () => {
+    instance = await createServer([]);
+    const res = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "\u4F60\u597D\u4E16\u754C",
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.data).toHaveLength(1);
+    expect(body.data[0].embedding).toHaveLength(1536);
+  });
+
+  it("is deterministic for repeated Unicode input", async () => {
+    instance = await createServer([]);
+    const unicodeInput = "\u{1F600} \u4F60\u597D \u00E9\u00E8\u00EA";
+    const res1 = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: unicodeInput,
+    });
+    const res2 = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: unicodeInput,
+    });
+
+    const body1 = JSON.parse(res1.body);
+    const body2 = JSON.parse(res2.body);
+    expect(body1.data[0].embedding).toEqual(body2.data[0].embedding);
+  });
+
+  it("produces different embeddings for different Unicode inputs", async () => {
+    instance = await createServer([]);
+    const res1 = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "\u4F60\u597D",
+    });
+    const res2 = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "\u3053\u3093\u306B\u3061\u306F",
+    });
+
+    const body1 = JSON.parse(res1.body);
+    const body2 = JSON.parse(res2.body);
+    expect(body1.data[0].embedding).not.toEqual(body2.data[0].embedding);
+  });
+});
diff --git a/src/__tests__/fix-drift.test.ts b/src/__tests__/fix-drift.test.ts
new file mode 100644
index 0000000..4927a2d
--- /dev/null
+++ b/src/__tests__/fix-drift.test.ts
@@ -0,0 +1,745 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { resolve } from "node:path";
+
+import type {
+  DriftReport,
+  DriftEntry,
+  DriftSeverity,
+  ParsedDiff,
+} from "../../scripts/drift-types.js";
+
+// We mock fs and child_process before importing the module under test
+vi.mock("node:fs", async () => {
+  const actual = await vi.importActual<typeof import("node:fs")>("node:fs");
+  return {
+    ...actual,
+    readFileSync: vi.fn(actual.readFileSync),
+    writeFileSync: vi.fn(),
+    existsSync: vi.fn(actual.existsSync),
+  };
+});
+
+vi.mock("node:child_process", async () => {
+  const actual = await vi.importActual<typeof import("node:child_process")>("node:child_process");
+  return {
+    ...actual,
+    execFileSync: vi.fn(),
+    execSync: vi.fn(),
+  };
+});
+
+import {
+  todayStamp,
+  readDriftReport,
+  buildPrompt,
+  patchBumpVersion,
+  addChangelogEntry,
+  buildPrBody,
+  parsePorcelainLine,
+  readFileIfExists,
+  execFileSafe,
+  parseMode,
+  getChangedFiles,
+} from "../../scripts/fix-drift.js";
+
+import { readFileSync, writeFileSync, existsSync } from "node:fs";
+import { execFileSync, execSync } from "node:child_process";
+
+const mockedReadFileSync = vi.mocked(readFileSync);
+const mockedWriteFileSync = vi.mocked(writeFileSync);
+const mockedExistsSync = vi.mocked(existsSync);
+const mockedExecFileSync = vi.mocked(execFileSync);
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function makeDiff(overrides: Partial<ParsedDiff> = {}): ParsedDiff {
+  return {
+    path: "response.choices[0].message.content",
+    severity: "warning",
+    issue: "missing field",
+    expected: "string",
+    real: '"hello"',
+    mock: "undefined",
+    ...overrides,
+  };
+}
+
+function makeEntry(overrides: Partial<DriftEntry> = {}): DriftEntry {
+  return {
+    provider: "openai",
+    scenario: "non-streaming text",
+    builderFile: "src/builders/openai.ts",
+    builderFunctions: ["buildTextResponse"],
+    typesFile: "src/types.ts",
+    sdkShapesFile: "src/__tests__/drift/sdk-shapes.ts",
+    diffs: [makeDiff()],
+    ...overrides,
+  };
+}
+
+function makeReport(overrides: Partial<DriftReport> = {}): DriftReport {
+  return {
+    timestamp: "2026-03-19T00:00:00.000Z",
+    entries: [makeEntry()],
+    ...overrides,
+  };
+}
+
+// ---------------------------------------------------------------------------
+// todayStamp
+// ---------------------------------------------------------------------------
+
+describe("todayStamp", () => {
+  it("returns a YYYY-MM-DD formatted string", () => {
+    const result = todayStamp();
+    expect(result).toMatch(/^\d{4}-\d{2}-\d{2}$/);
+  });
+
+  it("matches today's date", () => {
+    const expected = new Date().toISOString().slice(0, 10);
+    expect(todayStamp()).toBe(expected);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// readDriftReport
+// ---------------------------------------------------------------------------
+
+describe("readDriftReport", () => {
+  beforeEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  it("parses a valid report", () => {
+    const report = makeReport();
+    mockedExistsSync.mockReturnValue(true);
+    mockedReadFileSync.mockReturnValue(JSON.stringify(report));
+
+    const result = readDriftReport("/tmp/report.json");
+    expect(result).toEqual(report);
+  });
+
+  it("throws when file does not exist", () => {
+    mockedExistsSync.mockReturnValue(false);
+    expect(() => readDriftReport("/tmp/missing.json")).toThrow("Drift report not found");
+  });
+
+  it("throws on invalid JSON", () => {
+    mockedExistsSync.mockReturnValue(true);
+    mockedReadFileSync.mockReturnValue("not json {{{");
+    expect(() => readDriftReport("/tmp/bad.json")).toThrow("not valid JSON");
+  });
+
+  it("throws when entries array is missing", () => {
+    mockedExistsSync.mockReturnValue(true);
+    mockedReadFileSync.mockReturnValue(JSON.stringify({ timestamp: "2026-01-01" }));
+    expect(() => readDriftReport("/tmp/no-entries.json")).toThrow("invalid structure");
+  });
+
+  it("throws when entries is not an array", () => {
+    mockedExistsSync.mockReturnValue(true);
+    mockedReadFileSync.mockReturnValue(JSON.stringify({ entries: "not-an-array" }));
+    expect(() => readDriftReport("/tmp/bad-entries.json")).toThrow("invalid structure");
+  });
+
+  it("throws when timestamp is missing", () => {
+    mockedExistsSync.mockReturnValue(true);
+    mockedReadFileSync.mockReturnValue(JSON.stringify({ entries: [] }));
+    expect(() => readDriftReport("/tmp/no-timestamp.json")).toThrow('missing "timestamp"');
+  });
+
+  it("throws when timestamp is not a string", () => {
+    mockedExistsSync.mockReturnValue(true);
+    mockedReadFileSync.mockReturnValue(JSON.stringify({ entries: [], timestamp: 12345 }));
+    expect(() => readDriftReport("/tmp/bad-timestamp.json")).toThrow('missing "timestamp"');
+  });
+
+  it("throws when entry is missing provider", () => {
+    const report = makeReport();
+    (report.entries[0] as Record<string, unknown>).provider = "";
+    mockedExistsSync.mockReturnValue(true);
+    mockedReadFileSync.mockReturnValue(JSON.stringify(report));
+
+    expect(() => readDriftReport("/tmp/no-provider.json")).toThrow(
+      'entry[0] missing required "provider"',
+    );
+  });
+
+  it("throws when entry has no diffs array", () => {
+    const report = makeReport();
+    (report.entries[0] as Record<string, unknown>).diffs = "not-array";
+    mockedExistsSync.mockReturnValue(true);
+    mockedReadFileSync.mockReturnValue(JSON.stringify(report));
+
+    expect(() => readDriftReport("/tmp/no-diffs.json")).toThrow('missing "diffs" array');
+  });
+
+  it("throws when a diff has invalid severity", () => {
+    const report = makeReport({
+      entries: [
+        makeEntry({
+          diffs: [makeDiff({ severity: "extreme" as DriftSeverity })],
+        }),
+      ],
+    });
+    mockedExistsSync.mockReturnValue(true);
+    mockedReadFileSync.mockReturnValue(JSON.stringify(report));
+
+    expect(() => readDriftReport("/tmp/bad-severity.json")).toThrow('invalid severity "extreme"');
+  });
+
+  it("throws when entry is missing builderFile", () => {
+    const report = makeReport();
+    (report.entries[0] as Record<string, unknown>).builderFile = "";
+    mockedExistsSync.mockReturnValue(true);
+    mockedReadFileSync.mockReturnValue(JSON.stringify(report));
+
+    expect(() => readDriftReport("/tmp/no-builder.json")).toThrow('missing "builderFile"');
+  });
+
+  it("throws when entry has empty builderFunctions", () => {
+    const report = makeReport();
+    report.entries[0].builderFunctions = [];
+    mockedExistsSync.mockReturnValue(true);
+    mockedReadFileSync.mockReturnValue(JSON.stringify(report));
+
+    expect(() => readDriftReport("/tmp/empty-funcs.json")).toThrow(
+      '"builderFunctions" must be non-empty string array',
+    );
+  });
+
+  it("throws when builderFunctions contains non-string elements", () => {
+    const report = makeReport();
+    (report.entries[0] as Record<string, unknown>).builderFunctions = ["valid", 42];
+    mockedExistsSync.mockReturnValue(true);
+    mockedReadFileSync.mockReturnValue(JSON.stringify(report));
+
+    expect(() => readDriftReport("/tmp/bad-funcs.json")).toThrow(
+      '"builderFunctions" must be non-empty string array',
+    );
+  });
+
+  it("throws when entry is missing scenario", () => {
+    const report = makeReport();
+    (report.entries[0] as Record<string, unknown>).scenario = 123;
+    mockedExistsSync.mockReturnValue(true);
+    mockedReadFileSync.mockReturnValue(JSON.stringify(report));
+
+    expect(() => readDriftReport("/tmp/no-scenario.json")).toThrow('missing "scenario"');
+  });
+
+  it("throws when entry is missing sdkShapesFile", () => {
+    const report = makeReport();
+    (report.entries[0] as Record<string, unknown>).sdkShapesFile = "";
+    mockedExistsSync.mockReturnValue(true);
+    mockedReadFileSync.mockReturnValue(JSON.stringify(report));
+
+    expect(() => readDriftReport("/tmp/no-shapes.json")).toThrow('missing "sdkShapesFile"');
+  });
+
+  it("throws when typesFile is not a string or null", () => {
+    const report = makeReport();
+    (report.entries[0] as Record<string, unknown>).typesFile = 42;
+    mockedExistsSync.mockReturnValue(true);
+    mockedReadFileSync.mockReturnValue(JSON.stringify(report));
+
+    expect(() => readDriftReport("/tmp/bad-types.json")).toThrow(
+      '"typesFile" must be string or null',
+    );
+  });
+
+  it("accepts typesFile as null", () => {
+    const report = makeReport({ entries: [makeEntry({ typesFile: null })] });
+    mockedExistsSync.mockReturnValue(true);
+    mockedReadFileSync.mockReturnValue(JSON.stringify(report));
+
+    expect(() => readDriftReport("/tmp/null-types.json")).not.toThrow();
+  });
+
+  it("throws when a diff is missing path", () => {
+    const report = makeReport({
+      entries: [makeEntry({ diffs: [makeDiff({ path: "" })] })],
+    });
+    mockedExistsSync.mockReturnValue(true);
+    mockedReadFileSync.mockReturnValue(JSON.stringify(report));
+
+    expect(() => readDriftReport("/tmp/no-path.json")).toThrow('missing "path"');
+  });
+
+  it("throws when a diff is missing issue", () => {
+    const report = makeReport({
+      entries: [makeEntry({ diffs: [makeDiff({ issue: "" })] })],
+    });
+    mockedExistsSync.mockReturnValue(true);
+    mockedReadFileSync.mockReturnValue(JSON.stringify(report));
+
+    expect(() => readDriftReport("/tmp/no-issue.json")).toThrow('missing "issue"');
+  });
+
+  it("throws when a diff is missing expected", () => {
+    const report = makeReport({
+      entries: [makeEntry({ diffs: [makeDiff({ expected: undefined as unknown as string })] })],
+    });
+    mockedExistsSync.mockReturnValue(true);
+    mockedReadFileSync.mockReturnValue(JSON.stringify(report));
+
+    expect(() => readDriftReport("/tmp/no-expected.json")).toThrow('missing "expected"');
+  });
+
+  it("throws when a diff is missing real", () => {
+    const report = makeReport({
+      entries: [makeEntry({ diffs: [makeDiff({ real: undefined as unknown as string })] })],
+    });
+    mockedExistsSync.mockReturnValue(true);
+    mockedReadFileSync.mockReturnValue(JSON.stringify(report));
+
+    expect(() => readDriftReport("/tmp/no-real.json")).toThrow('missing "real"');
+  });
+
+  it("throws when a diff is missing mock", () => {
+    const report = makeReport({
+      entries: [makeEntry({ diffs: [makeDiff({ mock: undefined as unknown as string })] })],
+    });
+    mockedExistsSync.mockReturnValue(true);
+    mockedReadFileSync.mockReturnValue(JSON.stringify(report));
+
+    expect(() => readDriftReport("/tmp/no-mock.json")).toThrow('missing "mock"');
+  });
+
+  it("accepts all valid severities", () => {
+    for (const severity of ["critical", "warning", "info"] as const) {
+      const report = makeReport({
+        entries: [makeEntry({ diffs: [makeDiff({ severity })] })],
+      });
+      mockedExistsSync.mockReturnValue(true);
+      mockedReadFileSync.mockReturnValue(JSON.stringify(report));
+
+      expect(() => readDriftReport("/tmp/ok.json")).not.toThrow();
+    }
+  });
+
+  it("validates all entries, not just the first", () => {
+    const report = makeReport({
+      entries: [makeEntry({ provider: "openai" }), makeEntry({ provider: "" })],
+    });
+    mockedExistsSync.mockReturnValue(true);
+    mockedReadFileSync.mockReturnValue(JSON.stringify(report));
+
+    expect(() => readDriftReport("/tmp/second-bad.json")).toThrow(
+      'entry[1] missing required "provider"',
+    );
+  });
+});
+
+// ---------------------------------------------------------------------------
+// buildPrompt
+// ---------------------------------------------------------------------------
+
+describe("buildPrompt", () => {
+  it("includes workflow instructions", () => {
+    const prompt = buildPrompt(makeReport());
+    expect(prompt).toContain("## Workflow");
+    expect(prompt).toContain("RED:");
+    expect(prompt).toContain("GREEN:");
+    expect(prompt).toContain("REFACTOR:");
+  });
+
+  it("renders a single drift entry", () => {
+    const report = makeReport();
+    const prompt = buildPrompt(report);
+
+    expect(prompt).toContain("DRIFT 1: openai");
+    expect(prompt).toContain("non-streaming text");
+    expect(prompt).toContain("File: src/builders/openai.ts");
+    expect(prompt).toContain("Functions: buildTextResponse");
+    expect(prompt).toContain("Types file: src/types.ts");
+    expect(prompt).toContain("[warning] missing field");
+  });
+
+  it("renders multiple drift entries with sequential numbering", () => {
+    const report = makeReport({
+      entries: [
+        makeEntry({ provider: "openai", scenario: "streaming" }),
+        makeEntry({ provider: "anthropic", scenario: "non-streaming" }),
+      ],
+    });
+    const prompt = buildPrompt(report);
+
+    expect(prompt).toContain("DRIFT 1: openai");
+    expect(prompt).toContain("DRIFT 2: anthropic");
+  });
+
+  it('renders "N/A" when typesFile is null', () => {
+    const report = makeReport({
+      entries: [makeEntry({ typesFile: null })],
+    });
+    const prompt = buildPrompt(report);
+    expect(prompt).toContain("Types file: N/A");
+  });
+
+  it("includes after-fixes section", () => {
+    const prompt = buildPrompt(makeReport());
+    expect(prompt).toContain("## After all fixes");
+    expect(prompt).toContain("pnpm test");
+    expect(prompt).toContain("pnpm test:drift");
+  });
+
+  it("renders diff details (path, real, mock)", () => {
+    const diff = makeDiff({
+      path: "body.model",
+      real: '"gpt-4o"',
+      mock: '"gpt-4"',
+    });
+    const report = makeReport({ entries: [makeEntry({ diffs: [diff] })] });
+    const prompt = buildPrompt(report);
+
+    expect(prompt).toContain("Path: body.model");
+    expect(prompt).toContain('Real API: "gpt-4o"');
+    expect(prompt).toContain('Mock: "gpt-4"');
+  });
+});
+
+// ---------------------------------------------------------------------------
+// patchBumpVersion
+// ---------------------------------------------------------------------------
+
+describe("patchBumpVersion", () => {
+  beforeEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  it('bumps patch version from "1.2.3" to "1.2.4"', () => {
+    const pkg = { name: "@copilotkit/llmock", version: "1.2.3" };
+    mockedReadFileSync.mockReturnValue(JSON.stringify(pkg));
+    mockedWriteFileSync.mockImplementation(() => {});
+
+    const result = patchBumpVersion();
+
+    expect(result).toBe("1.2.4");
+    expect(mockedWriteFileSync).toHaveBeenCalledOnce();
+    const written = vi.mocked(writeFileSync).mock.calls[0][1] as string;
+    expect(JSON.parse(written.trim()).version).toBe("1.2.4");
+  });
+
+  it('bumps "0.0.0" to "0.0.1"', () => {
+    const pkg = { version: "0.0.0" };
+    mockedReadFileSync.mockReturnValue(JSON.stringify(pkg));
+    mockedWriteFileSync.mockImplementation(() => {});
+
+    expect(patchBumpVersion()).toBe("0.0.1");
+  });
+
+  it("throws on non-standard version string", () => {
+    const pkg = { version: "1.2.3-beta.1" };
+    mockedReadFileSync.mockReturnValue(JSON.stringify(pkg));
+
+    expect(() => patchBumpVersion()).toThrow("non-standard version");
+  });
+
+  it("throws on version with wrong number of parts", () => {
+    const pkg = { version: "1.2" };
+    mockedReadFileSync.mockReturnValue(JSON.stringify(pkg));
+
+    expect(() => patchBumpVersion()).toThrow("non-standard version");
+  });
+
+  it("writes to the correct path (resolve('package.json'))", () => {
+    const pkg = { version: "1.0.0" };
+    mockedReadFileSync.mockReturnValue(JSON.stringify(pkg));
+    mockedWriteFileSync.mockImplementation(() => {});
+
+    patchBumpVersion();
+
+    const writtenPath = vi.mocked(writeFileSync).mock.calls[0][0] as string;
+    expect(writtenPath).toBe(resolve("package.json"));
+  });
+
+  it("preserves other fields in package.json", () => {
+    const pkg = { name: "test-pkg", version: "2.0.0", license: "MIT" };
+    mockedReadFileSync.mockReturnValue(JSON.stringify(pkg));
+    mockedWriteFileSync.mockImplementation(() => {});
+
+    patchBumpVersion();
+
+    const written = JSON.parse((vi.mocked(writeFileSync).mock.calls[0][1] as string).trim());
+    expect(written.name).toBe("test-pkg");
+    expect(written.license).toBe("MIT");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// addChangelogEntry
+// ---------------------------------------------------------------------------
+
+describe("addChangelogEntry", () => {
+  beforeEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  it("inserts entry after title line when changelog has title", () => {
+    const existing = "# @copilotkit/llmock\n\n## 1.0.0\n\nOld entry\n";
+    mockedExistsSync.mockReturnValue(true);
+    mockedReadFileSync.mockReturnValue(existing);
+    mockedWriteFileSync.mockImplementation(() => {});
+
+    const report = makeReport();
+    addChangelogEntry(report, "1.0.1");
+
+    expect(mockedWriteFileSync).toHaveBeenCalledOnce();
+    const written = vi.mocked(writeFileSync).mock.calls[0][1] as string;
+
+    // Title is preserved at the top
+    expect(written.startsWith("# @copilotkit/llmock\n")).toBe(true);
+    // New version entry comes before old
+    expect(written.indexOf("## 1.0.1")).toBeLessThan(written.indexOf("## 1.0.0"));
+    // Contains patch changes section
+    expect(written).toContain("### Patch Changes");
+    expect(written).toContain("Auto-remediate API drift");
+    // Contains provider summary
+    expect(written).toContain("openai (non-streaming text)");
+  });
+
+  it("prepends entry when changelog has no title", () => {
+    const existing = "## 1.0.0\n\nOld stuff\n";
+    mockedExistsSync.mockReturnValue(true);
+    mockedReadFileSync.mockReturnValue(existing);
+    mockedWriteFileSync.mockImplementation(() => {});
+
+    addChangelogEntry(makeReport(), "1.0.1");
+
+    const written = vi.mocked(writeFileSync).mock.calls[0][1] as string;
+    expect(written.startsWith("## 1.0.1")).toBe(true);
+    expect(written).toContain("## 1.0.0");
+  });
+
+  it("handles empty/missing changelog", () => {
+    mockedExistsSync.mockReturnValue(false);
+    mockedReadFileSync.mockImplementation(() => {
+      throw new Error("ENOENT");
+    });
+    mockedWriteFileSync.mockImplementation(() => {});
+
+    // readFileIfExists returns null when !existsSync, so it won't call readFileSync
+    addChangelogEntry(makeReport(), "0.0.1");
+
+    const written = vi.mocked(writeFileSync).mock.calls[0][1] as string;
+    expect(written).toContain("## 0.0.1");
+    expect(written).toContain("### Patch Changes");
+  });
+
+  it("includes diff paths in provider summary", () => {
+    const report = makeReport({
+      entries: [
+        makeEntry({
+          diffs: [makeDiff({ path: "a.b" }), makeDiff({ path: "c.d" })],
+        }),
+      ],
+    });
+    mockedExistsSync.mockReturnValue(false);
+    mockedWriteFileSync.mockImplementation(() => {});
+
+    addChangelogEntry(report, "1.0.0");
+
+    const written = vi.mocked(writeFileSync).mock.calls[0][1] as string;
+    expect(written).toContain("a.b, c.d");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// buildPrBody
+// ---------------------------------------------------------------------------
+
+describe("buildPrBody", () => {
+  it("contains Summary heading", () => {
+    const body = buildPrBody(makeReport());
+    expect(body).toContain("## Summary");
+    expect(body).toContain("Auto-generated drift remediation");
+  });
+
+  it("lists providers affected", () => {
+    const report = makeReport({
+      entries: [
+        makeEntry({ provider: "openai", scenario: "streaming" }),
+        makeEntry({ provider: "anthropic", scenario: "non-streaming" }),
+      ],
+    });
+    const body = buildPrBody(report);
+
+    expect(body).toContain("### Providers affected");
+    expect(body).toContain("- openai: streaming");
+    expect(body).toContain("- anthropic: non-streaming");
+  });
+
+  it("lists diffs fixed with code-formatted paths", () => {
+    const diff = makeDiff({ path: "response.id", issue: "field missing" });
+    const report = makeReport({ entries: [makeEntry({ diffs: [diff] })] });
+    const body = buildPrBody(report);
+
+    expect(body).toContain("### Diffs fixed");
+    expect(body).toContain("- `response.id`: field missing");
+  });
+
+  it("includes a collapsible JSON details block", () => {
+    const report = makeReport();
+    const body = buildPrBody(report);
+
+    expect(body).toContain("<details>");
+    expect(body).toContain("<summary>Full drift report JSON</summary>");
+    expect(body).toContain("```json");
+    expect(body).toContain("```");
+    expect(body).toContain("</details>");
+  });
+
+  it("contains the full report JSON", () => {
+    const report = makeReport();
+    const body = buildPrBody(report);
+    const expectedJson = JSON.stringify(report, null, 2);
+    expect(body).toContain(expectedJson);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// parsePorcelainLine
+// ---------------------------------------------------------------------------
+
+describe("parsePorcelainLine", () => {
+  it("parses a normal modified file", () => {
+    expect(parsePorcelainLine(" M src/foo.ts")).toBe("src/foo.ts");
+  });
+
+  it("parses an added file", () => {
+    expect(parsePorcelainLine("A  src/new.ts")).toBe("src/new.ts");
+  });
+
+  it("parses an untracked file", () => {
+    expect(parsePorcelainLine("?? src/unknown.ts")).toBe("src/unknown.ts");
+  });
+
+  it("handles quoted paths", () => {
+    expect(parsePorcelainLine(' M "src/special chars.ts"')).toBe("src/special chars.ts");
+  });
+
+  it("handles rename notation, returning the new path", () => {
+    expect(parsePorcelainLine("R  old.ts -> new.ts")).toBe("new.ts");
+  });
+
+  it("handles rename with quoted paths", () => {
+    expect(parsePorcelainLine('R  "old name.ts" -> "new name.ts"')).toBe("new name.ts");
+  });
+
+  it("handles paths with leading/trailing whitespace in the path portion", () => {
+    // The trim() in parsePorcelainLine handles extra whitespace
+    expect(parsePorcelainLine("MM src/bar.ts  ")).toBe("src/bar.ts");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// readFileIfExists
+// ---------------------------------------------------------------------------
+
+describe("readFileIfExists", () => {
+  beforeEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  it("returns file content when file exists", () => {
+    mockedExistsSync.mockReturnValue(true);
+    mockedReadFileSync.mockReturnValue("file content here");
+
+    expect(readFileIfExists("/tmp/exists.txt")).toBe("file content here");
+  });
+
+  it("returns null when file does not exist", () => {
+    mockedExistsSync.mockReturnValue(false);
+
+    expect(readFileIfExists("/tmp/missing.txt")).toBeNull();
+    expect(mockedReadFileSync).not.toHaveBeenCalled();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// execFileSafe
+// ---------------------------------------------------------------------------
+
+describe("execFileSafe", () => {
+  beforeEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  it("calls execFileSync with the correct arguments", () => {
+    mockedExecFileSync.mockReturnValue(Buffer.from(""));
+
+    execFileSafe("git", ["status"]);
+
+    expect(mockedExecFileSync).toHaveBeenCalledWith("git", ["status"], { stdio: "inherit" });
+  });
+
+  it("throws a formatted error on failure", () => {
+    const err = Object.assign(new Error("fail"), { status: 128, stderr: "fatal: not a repo" });
+    mockedExecFileSync.mockImplementation(() => {
+      throw err;
+    });
+
+    expect(() => execFileSafe("git", ["status"])).toThrow("Command failed: git status");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// parseMode
+// ---------------------------------------------------------------------------
+
+describe("parseMode", () => {
+  it("returns 'pr' for --create-pr flag", () => {
+    expect(parseMode(["--create-pr"])).toBe("pr");
+  });
+
+  it("returns 'issue' for --create-issue flag", () => {
+    expect(parseMode(["--create-issue"])).toBe("issue");
+  });
+
+  it("returns 'default' with no flags", () => {
+    expect(parseMode([])).toBe("default");
+  });
+
+  it("returns 'default' with unrelated flags", () => {
+    expect(parseMode(["--report", "drift-report.json"])).toBe("default");
+  });
+
+  it("returns 'pr' even with other flags present", () => {
+    expect(parseMode(["--report", "drift-report.json", "--create-pr"])).toBe("pr");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// getChangedFiles
+// ---------------------------------------------------------------------------
+
+describe("getChangedFiles", () => {
+  const mockedExecSync = vi.mocked(execSync);
+
+  beforeEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  it("returns parsed file paths from git status output", () => {
+    // Note: exec() trims the result, so we use staged-file format (M  not  M)
+    // which doesn't have a leading space that trim would strip
+    mockedExecSync.mockReturnValue("M  src/helpers.ts\nM  src/server.ts");
+    const result = getChangedFiles();
+    expect(result).toEqual(["src/helpers.ts", "src/server.ts"]);
+  });
+
+  it("returns empty array for empty git status", () => {
+    mockedExecSync.mockReturnValue("");
+    const result = getChangedFiles();
+    expect(result).toEqual([]);
+  });
+
+  it("handles renamed files", () => {
+    mockedExecSync.mockReturnValue("R  old.ts -> new.ts\n M src/foo.ts\n");
+    const result = getChangedFiles();
+    expect(result).toEqual(["new.ts", "src/foo.ts"]);
+  });
+});
diff --git a/src/__tests__/fixture-loader.test.ts b/src/__tests__/fixture-loader.test.ts
index a0435be..b904536 100644
--- a/src/__tests__/fixture-loader.test.ts
+++ b/src/__tests__/fixture-loader.test.ts
@@ -99,6 +99,36 @@ describe("loadFixtureFile", () => {
     expect(fixtures[0].match.userMessage).toBe("hello world");
   });
 
+  it("loads inputText match field from JSON", () => {
+    const filePath = writeJson(tmpDir, "embed.json", {
+      fixtures: [
+        {
+          match: { inputText: "hello world" },
+          response: { embedding: [0.1, -0.2, 0.3] },
+        },
+      ],
+    });
+
+    const fixtures = loadFixtureFile(filePath);
+    expect(fixtures).toHaveLength(1);
+    expect(fixtures[0].match.inputText).toBe("hello world");
+  });
+
+  it("loads responseFormat match field from JSON", () => {
+    const filePath = writeJson(tmpDir, "json-mode.json", {
+      fixtures: [
+        {
+          match: { userMessage: "give json", responseFormat: "json_object" },
+          response: { content: '{"key":"value"}' },
+        },
+      ],
+    });
+
+    const fixtures = loadFixtureFile(filePath);
+    expect(fixtures).toHaveLength(1);
+    expect(fixtures[0].match.responseFormat).toBe("json_object");
+  });
+
   it("omits latency and chunkSize when not present in JSON", () => {
     const filePath = writeJson(tmpDir, "no-optional.json", {
       fixtures: [
@@ -114,6 +144,137 @@ describe("loadFixtureFile", () => {
     expect(fixtures[0].chunkSize).toBeUndefined();
   });
 
+  it("passes through truncateAfterChunks when set", () => {
+    const filePath = writeJson(tmpDir, "truncate.json", {
+      fixtures: [
+        {
+          match: { userMessage: "truncate me" },
+          response: { content: "partial" },
+          truncateAfterChunks: 3,
+        },
+      ],
+    });
+
+    const fixtures = loadFixtureFile(filePath);
+    expect(fixtures).toHaveLength(1);
+    expect(fixtures[0].truncateAfterChunks).toBe(3);
+  });
+
+  it("passes through disconnectAfterMs when set", () => {
+    const filePath = writeJson(tmpDir, "disconnect.json", {
+      fixtures: [
+        {
+          match: { userMessage: "disconnect me" },
+          response: { content: "partial" },
+          disconnectAfterMs: 500,
+        },
+      ],
+    });
+
+    const fixtures = loadFixtureFile(filePath);
+    expect(fixtures).toHaveLength(1);
+    expect(fixtures[0].disconnectAfterMs).toBe(500);
+  });
+
+  it("passes through both truncateAfterChunks and disconnectAfterMs together", () => {
+    const filePath = writeJson(tmpDir, "both-interruptions.json", {
+      fixtures: [
+        {
+          match: { userMessage: "both" },
+          response: { content: "partial" },
+          truncateAfterChunks: 5,
+          disconnectAfterMs: 1000,
+        },
+      ],
+    });
+
+    const fixtures = loadFixtureFile(filePath);
+    expect(fixtures).toHaveLength(1);
+    expect(fixtures[0].truncateAfterChunks).toBe(5);
+    expect(fixtures[0].disconnectAfterMs).toBe(1000);
+  });
+
+  it("streamingProfile passthrough from JSON", () => {
+    const filePath = writeJson(tmpDir, "streaming-profile.json", {
+      fixtures: [
+        {
+          match: { userMessage: "profile" },
+          response: { content: "Hello!" },
+          streamingProfile: { ttft: 50, tps: 100, jitter: 0.1 },
+        },
+      ],
+    });
+
+    const fixtures = loadFixtureFile(filePath);
+    expect(fixtures).toHaveLength(1);
+    expect(fixtures[0].streamingProfile).toEqual({ ttft: 50, tps: 100, jitter: 0.1 });
+  });
+
+  it("chaos config passthrough from JSON", () => {
+    const filePath = writeJson(tmpDir, "chaos.json", {
+      fixtures: [
+        {
+          match: { userMessage: "chaos" },
+          response: { content: "Hello!" },
+          chaos: { dropRate: 0.5 },
+        },
+      ],
+    });
+
+    const fixtures = loadFixtureFile(filePath);
+    expect(fixtures).toHaveLength(1);
+    expect(fixtures[0].chaos).toEqual({ dropRate: 0.5 });
+  });
+
+  it("passes through sequenceIndex from JSON fixtures", () => {
+    const filePath = writeJson(tmpDir, "sequence.json", {
+      fixtures: [
+        {
+          match: { userMessage: "plan", sequenceIndex: 0 },
+          response: { content: "Step 1" },
+        },
+        {
+          match: { userMessage: "plan", sequenceIndex: 1 },
+          response: { content: "Step 2" },
+        },
+      ],
+    });
+
+    const fixtures = loadFixtureFile(filePath);
+    expect(fixtures).toHaveLength(2);
+    expect(fixtures[0].match.sequenceIndex).toBe(0);
+    expect(fixtures[1].match.sequenceIndex).toBe(1);
+  });
+
+  it("omits sequenceIndex when not present in JSON", () => {
+    const filePath = writeJson(tmpDir, "no-sequence.json", {
+      fixtures: [
+        {
+          match: { userMessage: "hello" },
+          response: { content: "Hi!" },
+        },
+      ],
+    });
+
+    const fixtures = loadFixtureFile(filePath);
+    expect(fixtures[0].match.sequenceIndex).toBeUndefined();
+  });
+
+  it("omits truncateAfterChunks and disconnectAfterMs when not present in JSON", () => {
+    const filePath = writeJson(tmpDir, "no-interruptions.json", {
+      fixtures: [
+        {
+          match: { userMessage: "plain" },
+          response: { content: "complete" },
+        },
+      ],
+    });
+
+    const fixtures = loadFixtureFile(filePath);
+    expect(fixtures[0].truncateAfterChunks).toBeUndefined();
+    expect(fixtures[0].disconnectAfterMs).toBeUndefined();
+  });
+
   it("warns and returns empty array for invalid JSON", () => {
     const filePath = join(tmpDir, "bad.json");
     writeFileSync(filePath, "{ not valid json", "utf-8");
@@ -359,3 +520,328 @@ describe("fixture-loader fs error paths", () => {
     expect(statWarns[0][0]).toContain("noperm.json");
   });
 });
+
+// ---------------------------------------------------------------------------
+// validateFixtures
+// ---------------------------------------------------------------------------
+
+import { validateFixtures } from "../fixture-loader.js";
+import type { Fixture } from "../types.js";
+
+function makeFixture(overrides: Partial<Fixture> = {}): Fixture {
+  return {
+    match: { userMessage: "test" },
+    response: { content: "Hello" },
+    ...overrides,
+  };
+}
+
+describe("validateFixtures", () => {
+  it("returns no results for valid fixtures", () => {
+    const fixtures = [
+      makeFixture({ match: { userMessage: "hello" } }),
+      makeFixture({
+        match: { userMessage: "weather" },
+        response: { toolCalls: [{ name: "fn", arguments: "{}" }] },
+      }),
+      makeFixture({
+        match: { userMessage: "error" },
+        response: { error: { message: "err", type: "e" }, status: 500 },
+      }),
+    ];
+    expect(validateFixtures(fixtures)).toEqual([]);
+  });
+
+  // --- Error checks ---
+
+  it("error: unrecognized response type", () => {
+    const fixtures = [makeFixture({ response: { foo: "bar" } as never })];
+    const results = validateFixtures(fixtures);
+    expect(results).toHaveLength(1);
+    expect(results[0].severity).toBe("error");
+    expect(results[0].message).toContain("not a recognized type");
+  });
+
+  it("error: empty content string", () => {
+    const fixtures = [makeFixture({ response: { content: "" } })];
+    const results = validateFixtures(fixtures);
+    expect(results.some((r) => r.severity === "error" && r.message.includes("empty string"))).toBe(
+      true,
+    );
+  });
+
+  it("warning: empty toolCalls array", () => {
+    const fixtures = [makeFixture({ response: { toolCalls: [] } })];
+    const results = validateFixtures(fixtures);
+    expect(results.some((r) => r.severity === "warning" && r.message.includes("empty"))).toBe(true);
+  });
+
+  it("error: toolCalls with empty name", () => {
+    const fixtures = [makeFixture({ response: { toolCalls: [{ name: "", arguments: "{}" }] } })];
+    const results = validateFixtures(fixtures);
+    expect(results.some((r) => r.severity === "error" && r.message.includes("name is empty"))).toBe(
+      true,
+    );
+  });
+
+  it("error: toolCalls with invalid JSON arguments", () => {
+    const fixtures = [
+      makeFixture({ response: { toolCalls: [{ name: "fn", arguments: "not json" }] } }),
+    ];
+    const results = validateFixtures(fixtures);
+    expect(
+      results.some((r) => r.severity === "error" && r.message.includes("not valid JSON")),
+    ).toBe(true);
+  });
+
+  it("error: error response with empty message", () => {
+    const fixtures = [
+      makeFixture({ response: { error: { message: "", type: "e" }, status: 500 } }),
+    ];
+    const results = validateFixtures(fixtures);
+    expect(
+      results.some((r) => r.severity === "error" && r.message.includes("error.message is empty")),
+    ).toBe(true);
+  });
+
+  it("error: error response with invalid status code", () => {
+    const fixtures = [
+      makeFixture({ response: { error: { message: "err", type: "e" }, status: 999 } }),
+    ];
+    const results = validateFixtures(fixtures);
+    expect(
+      results.some((r) => r.severity === "error" && r.message.includes("not a valid HTTP status")),
+    ).toBe(true);
+  });
+
+  it("accepts status code at lower boundary (100)", () => {
+    const fixtures = [
+      makeFixture({ response: { error: { message: "err", type: "e" }, status: 100 } }),
+    ];
+    const results = validateFixtures(fixtures);
+    const statusErrors = results.filter(
+      (r) => r.severity === "error" && r.message.includes("not a valid HTTP status"),
+    );
+    expect(statusErrors).toHaveLength(0);
+  });
+
+  it("rejects status code below lower boundary (99)", () => {
+    const fixtures = [
+      makeFixture({ response: { error: { message: "err", type: "e" }, status: 99 } }),
+    ];
+    const results = validateFixtures(fixtures);
+    expect(
+      results.some((r) => r.severity === "error" && r.message.includes("not a valid HTTP status")),
+    ).toBe(true);
+  });
+
+  it("accepts status code at upper boundary (599)", () => {
+    const fixtures = [
+      makeFixture({ response: { error: { message: "err", type: "e" }, status: 599 } }),
+    ];
+    const results = validateFixtures(fixtures);
+    const statusErrors = results.filter(
+      (r) => r.severity === "error" && r.message.includes("not a valid HTTP status"),
+    );
+    expect(statusErrors).toHaveLength(0);
+  });
+
+  it("error status accepted when omitted (defaults to 500 at runtime)", () => {
+    const fixtures = [makeFixture({ response: { error: { message: "err", type: "e" } } })];
+    const results = validateFixtures(fixtures);
+    const statusErrors = results.filter(
+      (r) => r.severity === "error" && r.message.includes("not a valid HTTP status"),
+    );
+    expect(statusErrors).toHaveLength(0);
+  });
+
+  it("error: negative latency", () => {
+    const fixtures = [makeFixture({ latency: -1 })];
+    const results = validateFixtures(fixtures);
+    expect(results.some((r) => r.severity === "error" && r.message.includes("latency"))).toBe(true);
+  });
+
+  it("error: chunkSize < 1", () => {
+    const fixtures = [makeFixture({ chunkSize: 0 })];
+    const results = validateFixtures(fixtures);
+    expect(results.some((r) => r.severity === "error" && r.message.includes("chunkSize"))).toBe(
+      true,
+    );
+  });
+
+  it("error: truncateAfterChunks < 1", () => {
+    const fixtures = [makeFixture({ truncateAfterChunks: 0 })];
+    const results = validateFixtures(fixtures);
+    expect(
+      results.some((r) => r.severity === "error" && r.message.includes("truncateAfterChunks")),
+    ).toBe(true);
+  });
+
+  it("error: negative disconnectAfterMs", () => {
+    const fixtures = [makeFixture({ disconnectAfterMs: -1 })];
+    const results = validateFixtures(fixtures);
+    expect(
+      results.some((r) => r.severity === "error" && r.message.includes("disconnectAfterMs")),
+    ).toBe(true);
+  });
+
+  it("error: streamingProfile.ttft is negative", () => {
+    const fixtures = [makeFixture({ streamingProfile: { ttft: -1 } })];
+    const results = validateFixtures(fixtures);
+    expect(results.some((r) => r.severity === "error" && r.message.includes("ttft"))).toBe(true);
+  });
+
+  it("no error: streamingProfile.ttft is 0", () => {
+    const fixtures = [makeFixture({ streamingProfile: { ttft: 0 } })];
+    const results = validateFixtures(fixtures);
+    expect(results.filter((r) => r.message.includes("ttft"))).toHaveLength(0);
+  });
+
+  it("error: streamingProfile.tps is 0", () => {
+    const fixtures = [makeFixture({ streamingProfile: { tps: 0 } })];
+    const results = validateFixtures(fixtures);
+    expect(results.some((r) => r.severity === "error" && r.message.includes("tps"))).toBe(true);
+  });
+
+  it("error: streamingProfile.tps is negative", () => {
+    const fixtures = [makeFixture({ streamingProfile: { tps: -5 } })];
+    const results = validateFixtures(fixtures);
+    expect(results.some((r) => r.severity === "error" && r.message.includes("tps"))).toBe(true);
+  });
+
+  it("error: streamingProfile.jitter is negative", () => {
+    const fixtures = [makeFixture({ streamingProfile: { jitter: -0.1 } })];
+    const results = validateFixtures(fixtures);
+    expect(results.some((r) => r.severity === "error" && r.message.includes("jitter"))).toBe(true);
+  });
+
+  it("error: streamingProfile.jitter is > 1", () => {
+    const fixtures = [makeFixture({ streamingProfile: { jitter: 1.5 } })];
+    const results = validateFixtures(fixtures);
+    expect(results.some((r) => r.severity === "error" && r.message.includes("jitter"))).toBe(true);
+  });
+
+  it("no error: streamingProfile with valid values", () => {
+    const fixtures = [makeFixture({ streamingProfile: { ttft: 100, tps: 50, jitter: 0.1 } })];
+    expect(validateFixtures(fixtures)).toHaveLength(0);
+  });
+
+  it("error: chaos.dropRate is > 1", () => {
+    const fixtures = [makeFixture({ chaos: { dropRate: 1.5 } })];
+    const results = validateFixtures(fixtures);
+    expect(results.some((r) => r.severity === "error" && r.message.includes("dropRate"))).toBe(
+      true,
+    );
+  });
+
+  it("error: chaos.dropRate is negative", () => {
+    const fixtures = [makeFixture({ chaos: { dropRate: -0.1 } })];
+    const results = validateFixtures(fixtures);
+    expect(results.some((r) => r.severity === "error" && r.message.includes("dropRate"))).toBe(
+      true,
+    );
+  });
+
+  it("error: chaos.malformedRate is > 1", () => {
+    const fixtures = [makeFixture({ chaos: { malformedRate: 2.0 } })];
+    const results = validateFixtures(fixtures);
+    expect(results.some((r) => r.severity === "error" && r.message.includes("malformedRate"))).toBe(
+      true,
+    );
+  });
+
+  it("error: chaos.disconnectRate is > 1", () => {
+    const fixtures = [makeFixture({ chaos: { disconnectRate: 5.0 } })];
+    const results = validateFixtures(fixtures);
+    expect(
+      results.some((r) => r.severity === "error" && r.message.includes("disconnectRate")),
+    ).toBe(true);
+  });
+
+  it("no error: chaos with boundary values (0 and 1)", () => {
+    const fixtures = [
+      makeFixture({ chaos: { dropRate: 0, malformedRate: 1, disconnectRate: 0.5 } }),
+    ];
+    expect(validateFixtures(fixtures)).toHaveLength(0);
+  });
+
+  // --- Warning checks ---
+
+  it("warning: duplicate userMessage", () => {
+    const fixtures = [
+      makeFixture({ match: { userMessage: "hello" } }),
+      makeFixture({ match: { userMessage: "hello" } }),
+    ];
+    const results = validateFixtures(fixtures);
+    expect(results.some((r) => r.severity === "warning" && r.message.includes("duplicate"))).toBe(
+      true,
+    );
+  });
+
+  it("warning: catch-all not in last position", () => {
+    const fixtures = [makeFixture({ match: {} }), makeFixture({ match: { userMessage: "hello" } })];
+    const results = validateFixtures(fixtures);
+    expect(results.some((r) => r.severity === "warning" && r.message.includes("catch-all"))).toBe(
+      true,
+    );
+  });
+
+  it("no warning for catch-all in last position", () => {
+    const fixtures = [makeFixture({ match: { userMessage: "hello" } }), makeFixture({ match: {} })];
+    const results = validateFixtures(fixtures);
+    const catchAllWarnings = results.filter(
+      (r) => r.severity === "warning" && r.message.includes("catch-all"),
+    );
+    expect(catchAllWarnings).toHaveLength(0);
+  });
+
+  it("reports both errors and warnings together", () => {
+    const fixtures = [
+      makeFixture({ match: {}, response: { content: "" } }), // catch-all + empty content
+      makeFixture({ match: { userMessage: "hello" } }),
+    ];
+    const results = validateFixtures(fixtures);
+    const errors = results.filter((r) => r.severity === "error");
+    const warnings = results.filter((r) => r.severity === "warning");
+    expect(errors.length).toBeGreaterThan(0);
+    expect(warnings.length).toBeGreaterThan(0);
+  });
+
+  // --- Embedding response checks ---
+
+  it("returns no results for a valid embedding fixture", () => {
+    const fixtures = [
+      makeFixture({
+        match: { inputText: "hello" },
+        response: { embedding: [0.1, -0.2, 0.3] },
+      }),
+    ];
+    expect(validateFixtures(fixtures)).toEqual([]);
+  });
+
+  it("error: empty embedding array", () => {
+    const fixtures = [
+      makeFixture({
+        match: { inputText: "hello" },
+        response: { embedding: [] },
+      }),
+    ];
+    const results = validateFixtures(fixtures);
+    expect(
+      results.some((r) => r.severity === "error" && r.message.includes("embedding array is empty")),
+    ).toBe(true);
+  });
+
+  it("error: non-number embedding elements", () => {
+    const fixtures = [
+      makeFixture({
+        match: { inputText: "hello" },
+        response: { embedding: [0.1, "bad" as unknown as number, 0.3] },
+      }),
+    ];
+    const results = validateFixtures(fixtures);
+    expect(results.some((r) => r.severity === "error" && r.message.includes("not a number"))).toBe(
+      true,
+    );
+  });
+});
diff --git a/src/__tests__/gemini.test.ts b/src/__tests__/gemini.test.ts
new file mode 100644
index 0000000..6823ed6
--- /dev/null
+++ b/src/__tests__/gemini.test.ts
@@ -0,0 +1,705 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import type { Fixture } from "../types.js";
+import { createServer, type ServerInstance } from "../server.js";
+import { geminiToCompletionRequest } from "../gemini.js";
+
+// --- helpers ---
+
+function post(
+  url: string,
+  body: unknown,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const data = JSON.stringify(body);
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(data),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(data);
+    req.end();
+  });
+}
+
+function postRaw(url: string, raw: string): Promise<{ status: number; body: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(raw),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(raw);
+    req.end();
+  });
+}
+
+function parseGeminiSSEChunks(body: string): unknown[] {
+  const chunks: unknown[] = [];
+  for (const line of body.split("\n")) {
+    if (line.startsWith("data: ")) {
+      chunks.push(JSON.parse(line.slice(6)));
+    }
+  }
+  return chunks;
+}
+
+// --- fixtures ---
+
+const textFixture: Fixture = {
+  match: { userMessage: "hello" },
+  response: { content: "Hi there!" },
+};
+
+const toolFixture: Fixture = {
+  match: { userMessage: "weather" },
+  response: {
+    toolCalls: [
+      {
+        name: "get_weather",
+        arguments: '{"city":"NYC"}',
+      },
+    ],
+  },
+};
+
+const multiToolFixture: Fixture = {
+  match: { userMessage: "multi-tool" },
+  response: {
+    toolCalls: [
+      { name: "get_weather", arguments: '{"city":"NYC"}' },
+      { name: "get_time", arguments: '{"tz":"EST"}' },
+    ],
+  },
+};
+
+const errorFixture: Fixture = {
+  match: { userMessage: "fail" },
+  response: {
+    error: {
+      message: "Rate limited",
+      type: "rate_limit_error",
+      code: "rate_limit",
+    },
+    status: 429,
+  },
+};
+
+const badResponseFixture: Fixture = {
+  match: { userMessage: "badtype" },
+  response: { content: 42 } as unknown as Fixture["response"],
+};
+
+const allFixtures: Fixture[] = [
+  textFixture,
+  toolFixture,
+  multiToolFixture,
+  errorFixture,
+  badResponseFixture,
+];
+
+// --- tests ---
+
+let instance: ServerInstance | null = null;
+
+afterEach(async () => {
+  if (instance) {
+    await new Promise<void>((resolve) => {
+      instance!.server.close(() => resolve());
+    });
+    instance = null;
+  }
+});
+
+// ─── Unit tests: input conversion ────────────────────────────────────────────
+
+describe("geminiToCompletionRequest", () => {
+  it("converts user text message", () => {
+    const result = geminiToCompletionRequest(
+      {
+        contents: [{ role: "user", parts: [{ text: "hello" }] }],
+      },
+      "gemini-2.0-flash",
+      false,
+    );
+    expect(result.messages).toEqual([{ role: "user", content: "hello" }]);
+    expect(result.model).toBe("gemini-2.0-flash");
+    expect(result.stream).toBe(false);
+  });
+
+  it("converts systemInstruction to system message", () => {
+    const result = geminiToCompletionRequest(
+      {
+        systemInstruction: { parts: [{ text: "Be helpful" }] },
+        contents: [{ role: "user", parts: [{ text: "hi" }] }],
+      },
+      "gemini-2.0-flash",
+      false,
+    );
+    expect(result.messages).toEqual([
+      { role: "system", content: "Be helpful" },
+      { role: "user", content: "hi" },
+    ]);
+  });
+
+  it("converts model (assistant) messages", () => {
+    const result = geminiToCompletionRequest(
+      {
+        contents: [
+          { role: "user", parts: [{ text: "hi" }] },
+          { role: "model", parts: [{ text: "hello" }] },
+        ],
+      },
+      "gemini-2.0-flash",
+      false,
+    );
+    expect(result.messages[1]).toEqual({ role: "assistant", content: "hello" });
+  });
+
+  it("converts functionCall parts to tool_calls", () => {
+    const result = geminiToCompletionRequest(
+      {
+        contents: [
+          {
+            role: "model",
+            parts: [
+              {
+                functionCall: {
+                  name: "get_weather",
+                  args: { city: "NYC" },
+                },
+              },
+            ],
+          },
+        ],
+      },
+      "gemini-2.0-flash",
+      false,
+    );
+    expect(result.messages).toHaveLength(1);
+    expect(result.messages[0].role).toBe("assistant");
+    expect(result.messages[0].content).toBeNull();
+    expect(result.messages[0].tool_calls).toHaveLength(1);
+    expect(result.messages[0].tool_calls![0].id).toBe("call_gemini_get_weather_0");
+    expect(result.messages[0].tool_calls![0].function.name).toBe("get_weather");
+    expect(result.messages[0].tool_calls![0].function.arguments).toBe('{"city":"NYC"}');
+  });
+
+  it("converts functionResponse parts to tool messages", () => {
+    const result = geminiToCompletionRequest(
+      {
+        contents: [
+          {
+            role: "user",
+            parts: [
+              {
+                functionResponse: {
+                  name: "get_weather",
+                  response: { temp: 72 },
+                },
+              },
+            ],
+          },
+        ],
+      },
+      "gemini-2.0-flash",
+      false,
+    );
+    expect(result.messages).toHaveLength(1);
+    expect(result.messages[0].role).toBe("tool");
+    expect(result.messages[0].content).toBe('{"temp":72}');
+    expect(result.messages[0].tool_call_id).toBe("call_gemini_get_weather_0");
+  });
+
+  it("extracts model from function parameter, not request body", () => {
+    const result = geminiToCompletionRequest(
+      {
+        contents: [{ role: "user", parts: [{ text: "hi" }] }],
+      },
+      "gemini-1.5-pro",
+      true,
+    );
+    expect(result.model).toBe("gemini-1.5-pro");
+    expect(result.stream).toBe(true);
+  });
+
+  it("converts functionDeclarations to ToolDefinition", () => {
+    const result = geminiToCompletionRequest(
+      {
+        contents: [{ role: "user", parts: [{ text: "hi" }] }],
+        tools: [
+          {
+            functionDeclarations: [
+              {
+                name: "get_weather",
+                description: "Get weather",
+                parameters: { type: "object" },
+              },
+            ],
+          },
+        ],
+      },
+      "gemini-2.0-flash",
+      false,
+    );
+    expect(result.tools).toEqual([
+      {
+        type: "function",
+        function: {
+          name: "get_weather",
+          description: "Get weather",
+          parameters: { type: "object" },
+        },
+      },
+    ]);
+  });
+
+  it("passes through generationConfig temperature", () => {
+    const result = geminiToCompletionRequest(
+      {
+        contents: [{ role: "user", parts: [{ text: "hi" }] }],
+        generationConfig: { temperature: 0.7 },
+      },
+      "gemini-2.0-flash",
+      false,
+    );
+    expect(result.temperature).toBe(0.7);
+  });
+
+  it("converts multiple functionResponse parts with unique tool_call_ids", () => {
+    const result = geminiToCompletionRequest(
+      {
+        contents: [
+          {
+            role: "user",
+            parts: [
+              {
+                functionResponse: {
+                  name: "search",
+                  response: { results: ["cats"] },
+                },
+              },
+              {
+                functionResponse: {
+                  name: "search",
+                  response: { results: ["dogs"] },
+                },
+              },
+            ],
+          },
+        ],
+      },
+      "gemini-2.0-flash",
+      false,
+    );
+    expect(result.messages).toHaveLength(2);
+    expect(result.messages[0].role).toBe("tool");
+    expect(result.messages[1].role).toBe("tool");
+    // IDs should be unique even for same function name
+    expect(result.messages[0].tool_call_id).toBe("call_gemini_search_0");
+    expect(result.messages[1].tool_call_id).toBe("call_gemini_search_1");
+    expect(result.messages[0].tool_call_id).not.toBe(result.messages[1].tool_call_id);
+  });
+
+  it("aligns functionCall and functionResponse IDs across a round trip", () => {
+    // Model turn: two functionCall parts
+    const modelTurn = geminiToCompletionRequest(
+      {
+        contents: [
+          {
+            role: "model",
+            parts: [
+              { functionCall: { name: "search", args: { q: "cats" } } },
+              { functionCall: { name: "search", args: { q: "dogs" } } },
+            ],
+          },
+        ],
+      },
+      "gemini-2.0-flash",
+      false,
+    );
+
+    // User turn: two functionResponse parts in same order
+    const userTurn = geminiToCompletionRequest(
+      {
+        contents: [
+          {
+            role: "user",
+            parts: [
+              { functionResponse: { name: "search", response: { r: "cats" } } },
+              { functionResponse: { name: "search", response: { r: "dogs" } } },
+            ],
+          },
+        ],
+      },
+      "gemini-2.0-flash",
+      false,
+    );
+
+    // IDs should align: call[0] matches response[0], call[1] matches response[1]
+    expect(modelTurn.messages[0].tool_calls![0].id).toBe(userTurn.messages[0].tool_call_id);
+    expect(modelTurn.messages[0].tool_calls![1].id).toBe(userTurn.messages[1].tool_call_id);
+  });
+});
+
+// ─── Integration tests: Gemini non-streaming ────────────────────────────────
+
+describe("POST /v1beta/models/{model}:generateContent (non-streaming)", () => {
+  it("returns text response as JSON", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ role: "user", parts: [{ text: "hello" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/json");
+
+    const body = JSON.parse(res.body);
+    expect(body.candidates).toHaveLength(1);
+    expect(body.candidates[0].content.role).toBe("model");
+    expect(body.candidates[0].content.parts[0].text).toBe("Hi there!");
+    expect(body.candidates[0].finishReason).toBe("STOP");
+    expect(body.candidates[0].index).toBe(0);
+    expect(body.usageMetadata).toBeDefined();
+  });
+
+  it("returns tool call response with functionCall parts", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ role: "user", parts: [{ text: "weather" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.candidates[0].content.parts[0].functionCall).toBeDefined();
+    expect(body.candidates[0].content.parts[0].functionCall.name).toBe("get_weather");
+    expect(body.candidates[0].content.parts[0].functionCall.args).toEqual({ city: "NYC" });
+    expect(body.candidates[0].finishReason).toBe("FUNCTION_CALL");
+  });
+
+  it("returns multiple tool calls as multiple parts", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ role: "user", parts: [{ text: "multi-tool" }] }],
+    });
+
+    const body = JSON.parse(res.body);
+    expect(body.candidates[0].content.parts).toHaveLength(2);
+    expect(body.candidates[0].content.parts[0].functionCall.name).toBe("get_weather");
+    expect(body.candidates[0].content.parts[1].functionCall.name).toBe("get_time");
+  });
+});
+
+// ─── Integration tests: Gemini streaming ────────────────────────────────────
+
+describe("POST /v1beta/models/{model}:streamGenerateContent (streaming)", () => {
+  it("streams text response as SSE", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent`, {
+      contents: [{ role: "user", parts: [{ text: "hello" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("text/event-stream");
+
+    const chunks = parseGeminiSSEChunks(res.body) as {
+      candidates: {
+        content: { role: string; parts: { text?: string }[] };
+        finishReason?: string;
+      }[];
+      usageMetadata?: unknown;
+    }[];
+
+    expect(chunks.length).toBeGreaterThan(0);
+
+    // All chunks have model role
+    for (const chunk of chunks) {
+      expect(chunk.candidates[0].content.role).toBe("model");
+    }
+
+    // Reconstruct content from text parts
+    const fullText = chunks.map((c) => c.candidates[0].content.parts[0].text ?? "").join("");
+    expect(fullText).toBe("Hi there!");
+
+    // Only last chunk has finishReason
+    const lastChunk = chunks[chunks.length - 1];
+    expect(lastChunk.candidates[0].finishReason).toBe("STOP");
+    expect(lastChunk.usageMetadata).toBeDefined();
+
+    // Non-last chunks have no finishReason
+    if (chunks.length > 1) {
+      expect(chunks[0].candidates[0].finishReason).toBeUndefined();
+    }
+
+    // No [DONE] or event: prefix
+    expect(res.body).not.toContain("[DONE]");
+    expect(res.body).not.toContain("event:");
+  });
+
+  it("streams tool calls as SSE", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent`, {
+      contents: [{ role: "user", parts: [{ text: "weather" }] }],
+    });
+
+    expect(res.status).toBe(200);
+
+    const chunks = parseGeminiSSEChunks(res.body) as {
+      candidates: {
+        content: {
+          parts: { functionCall?: { name: string; args: unknown } }[];
+        };
+        finishReason?: string;
+      }[];
+    }[];
+
+    // Tool calls come as a single chunk
+    expect(chunks).toHaveLength(1);
+    expect(chunks[0].candidates[0].content.parts[0].functionCall).toBeDefined();
+    expect(chunks[0].candidates[0].content.parts[0].functionCall!.name).toBe("get_weather");
+    expect(chunks[0].candidates[0].content.parts[0].functionCall!.args).toEqual({
+      city: "NYC",
+    });
+    expect(chunks[0].candidates[0].finishReason).toBe("FUNCTION_CALL");
+  });
+
+  it("uses fixture chunkSize for text streaming", async () => {
+    const bigChunkFixture: Fixture = {
+      match: { userMessage: "bigchunk" },
+      response: { content: "ABCDEFGHIJ" },
+      chunkSize: 5,
+    };
+    instance = await createServer([bigChunkFixture], { chunkSize: 2 });
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent`, {
+      contents: [{ role: "user", parts: [{ text: "bigchunk" }] }],
+    });
+
+    const chunks = parseGeminiSSEChunks(res.body) as {
+      candidates: { content: { parts: { text: string }[] } }[];
+    }[];
+    // 10 chars / chunkSize 5 = 2 chunks
+    expect(chunks).toHaveLength(2);
+    expect(chunks[0].candidates[0].content.parts[0].text).toBe("ABCDE");
+    expect(chunks[1].candidates[0].content.parts[0].text).toBe("FGHIJ");
+  });
+});
+
+// ─── Error handling ─────────────────────────────────────────────────────────
+
+describe("Gemini error handling", () => {
+  it("returns error fixture with correct status", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ role: "user", parts: [{ text: "fail" }] }],
+    });
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Rate limited");
+  });
+
+  it("returns 404 when no fixture matches", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ role: "user", parts: [{ text: "unknown" }] }],
+    });
+
+    expect(res.status).toBe(404);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("No fixture matched");
+  });
+
+  it("returns 400 for malformed JSON", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postRaw(
+      `${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`,
+      "{not valid",
+    );
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Malformed JSON");
+  });
+
+  it("returns 500 for unknown response type", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ role: "user", parts: [{ text: "badtype" }] }],
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("did not match any known type");
+  });
+});
+
+// ─── Error format conformance ────────────────────────────────────────────────
+
+describe("Gemini error format conformance", () => {
+  it("returns error in Gemini format: { error: { code, message, status } }", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ role: "user", parts: [{ text: "fail" }] }],
+    });
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    // Gemini wraps errors as { error: { code, message, status } }
+    expect(body.error).toBeDefined();
+    expect(body.error.code).toBe(429);
+    expect(body.error.message).toBe("Rate limited");
+    expect(body.error.status).toBe("rate_limit_error");
+    // Should NOT have OpenAI-style fields
+    expect(body.error.type).toBeUndefined();
+    expect(body.status).toBeUndefined();
+  });
+});
+
+// ─── Error field preservation ────────────────────────────────────────────────
+
+describe("Gemini error field preservation", () => {
+  it("error type and code fields are preserved", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ role: "user", parts: [{ text: "fail" }] }],
+    });
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    // Gemini format: { error: { code: <httpStatus>, message, status: <type> } }
+    expect(body.error.code).toBe(429);
+    expect(body.error.message).toBe("Rate limited");
+    expect(body.error.status).toBe("rate_limit_error");
+  });
+});
+
+// ─── Routing ────────────────────────────────────────────────────────────────
+
+describe("Gemini routing", () => {
+  it("returns 404 for GET on Gemini endpoint", async () => {
+    instance = await createServer(allFixtures);
+    const res = await new Promise<{ status: number; body: string }>((resolve, reject) => {
+      const parsed = new URL(instance!.url);
+      const req = http.request(
+        {
+          hostname: parsed.hostname,
+          port: parsed.port,
+          path: "/v1beta/models/gemini-2.0-flash:generateContent",
+          method: "GET",
+        },
+        (res) => {
+          const chunks: Buffer[] = [];
+          res.on("data", (c: Buffer) => chunks.push(c));
+          res.on("end", () => {
+            resolve({
+              status: res.statusCode ?? 0,
+              body: Buffer.concat(chunks).toString(),
+            });
+          });
+        },
+      );
+      req.on("error", reject);
+      req.end();
+    });
+    expect(res.status).toBe(404);
+  });
+
+  it("returns 404 for unknown Gemini-like path", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:unknownAction`, {
+      contents: [],
+    });
+    expect(res.status).toBe(404);
+  });
+
+  it("extracts model name from URL path", async () => {
+    instance = await createServer(allFixtures);
+    await post(`${instance.url}/v1beta/models/gemini-1.5-pro:generateContent`, {
+      contents: [{ role: "user", parts: [{ text: "hello" }] }],
+    });
+
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.body.model).toBe("gemini-1.5-pro");
+  });
+});
+
+// ─── Journal ────────────────────────────────────────────────────────────────
+
+describe("Gemini journal", () => {
+  it("records successful text response", async () => {
+    instance = await createServer(allFixtures);
+    await post(`${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ role: "user", parts: [{ text: "hello" }] }],
+    });
+
+    expect(instance.journal.size).toBe(1);
+    const entry = instance.journal.getLast();
+    expect(entry!.path).toBe("/v1beta/models/gemini-2.0-flash:generateContent");
+    expect(entry!.response.status).toBe(200);
+    expect(entry!.response.fixture).toBe(textFixture);
+  });
+
+  it("records unmatched response with null fixture", async () => {
+    instance = await createServer(allFixtures);
+    await post(`${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ role: "user", parts: [{ text: "nomatch" }] }],
+    });
+
+    const entry = instance.journal.getLast();
+    expect(entry!.response.status).toBe(404);
+    expect(entry!.response.fixture).toBeNull();
+  });
+});
+
+// ─── CORS ───────────────────────────────────────────────────────────────────
+
+describe("Gemini CORS", () => {
+  it("includes CORS headers", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ role: "user", parts: [{ text: "hello" }] }],
+    });
+
+    expect(res.headers["access-control-allow-origin"]).toBe("*");
+  });
+});
diff --git a/src/__tests__/health.test.ts b/src/__tests__/health.test.ts
new file mode 100644
index 0000000..6f23185
--- /dev/null
+++ b/src/__tests__/health.test.ts
@@ -0,0 +1,183 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import type { Fixture } from "../types.js";
+import { createServer, type ServerInstance } from "../server.js";
+
+// --- helpers ---
+
+function get(
+  url: string,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname + parsed.search,
+        method: "GET",
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.end();
+  });
+}
+
+// --- tests ---
+
+describe("health endpoints", () => {
+  let instance: ServerInstance | undefined;
+
+  afterEach(async () => {
+    if (instance) {
+      await new Promise<void>((resolve, reject) =>
+        instance!.server.close((err) => (err ? reject(err) : resolve())),
+      );
+      instance = undefined;
+    }
+  });
+
+  describe("GET /health", () => {
+    it("returns 200 with status ok", async () => {
+      instance = await createServer([]);
+      const res = await get(`${instance.url}/health`);
+      expect(res.status).toBe(200);
+      const body = JSON.parse(res.body);
+      expect(body).toEqual({ status: "ok" });
+    });
+
+    it("sets CORS headers", async () => {
+      instance = await createServer([]);
+      const res = await get(`${instance.url}/health`);
+      expect(res.headers["access-control-allow-origin"]).toBe("*");
+    });
+  });
+
+  describe("GET /ready", () => {
+    it("returns 200 with status ready", async () => {
+      instance = await createServer([]);
+      const res = await get(`${instance.url}/ready`);
+      expect(res.status).toBe(200);
+      const body = JSON.parse(res.body);
+      expect(body).toEqual({ status: "ready" });
+    });
+
+    it("sets CORS headers", async () => {
+      instance = await createServer([]);
+      const res = await get(`${instance.url}/ready`);
+      expect(res.headers["access-control-allow-origin"]).toBe("*");
+    });
+  });
+
+  describe("GET /v1/models", () => {
+    it("returns default models when no fixtures have model specified", async () => {
+      instance = await createServer([]);
+      const res = await get(`${instance.url}/v1/models`);
+      expect(res.status).toBe(200);
+      const body = JSON.parse(res.body);
+      expect(body.object).toBe("list");
+      expect(body.data).toBeInstanceOf(Array);
+      const ids = body.data.map((m: { id: string }) => m.id);
+      expect(ids).toContain("gpt-4");
+      expect(ids).toContain("gpt-4o");
+      expect(ids).toContain("claude-3-5-sonnet-20241022");
+      expect(ids).toContain("gemini-2.0-flash");
+      expect(ids).toContain("text-embedding-3-small");
+      for (const model of body.data) {
+        expect(model.object).toBe("model");
+        expect(model.owned_by).toBe("llmock");
+        expect(typeof model.created).toBe("number");
+      }
+    });
+
+    it("returns models from fixture match criteria", async () => {
+      const fixtures: Fixture[] = [
+        {
+          match: { model: "gpt-4-turbo" },
+          response: { content: "hello" },
+        },
+        {
+          match: { model: "claude-3-opus" },
+          response: { content: "world" },
+        },
+      ];
+      instance = await createServer(fixtures);
+      const res = await get(`${instance.url}/v1/models`);
+      expect(res.status).toBe(200);
+      const body = JSON.parse(res.body);
+      const ids = body.data.map((m: { id: string }) => m.id);
+      expect(ids).toContain("gpt-4-turbo");
+      expect(ids).toContain("claude-3-opus");
+      expect(ids).toHaveLength(2);
+    });
+
+    it("deduplicates models from fixtures", async () => {
+      const fixtures: Fixture[] = [
+        {
+          match: { model: "gpt-4" },
+          response: { content: "a" },
+        },
+        {
+          match: { model: "gpt-4" },
+          response: { content: "b" },
+        },
+      ];
+      instance = await createServer(fixtures);
+      const res = await get(`${instance.url}/v1/models`);
+      const body = JSON.parse(res.body);
+      const ids = body.data.map((m: { id: string }) => m.id);
+      expect(ids.filter((id: string) => id === "gpt-4")).toHaveLength(1);
+    });
+
+    it("skips RegExp model matchers", async () => {
+      const fixtures: Fixture[] = [
+        {
+          match: { model: /gpt-.*/ },
+          response: { content: "a" },
+        },
+        {
+          match: { model: "claude-3-opus" },
+          response: { content: "b" },
+        },
+      ];
+      instance = await createServer(fixtures);
+      const res = await get(`${instance.url}/v1/models`);
+      const body = JSON.parse(res.body);
+      const ids = body.data.map((m: { id: string }) => m.id);
+      expect(ids).toContain("claude-3-opus");
+      expect(ids).toHaveLength(1);
+    });
+
+    it("falls back to defaults when all fixtures use RegExp models", async () => {
+      const fixtures: Fixture[] = [
+        {
+          match: { model: /gpt-.*/ },
+          response: { content: "a" },
+        },
+      ];
+      instance = await createServer(fixtures);
+      const res = await get(`${instance.url}/v1/models`);
+      const body = JSON.parse(res.body);
+      const ids = body.data.map((m: { id: string }) => m.id);
+      expect(ids).toContain("gpt-4");
+      expect(ids).toContain("gpt-4o");
+    });
+
+    it("sets CORS headers", async () => {
+      instance = await createServer([]);
+      const res = await get(`${instance.url}/v1/models`);
+      expect(res.headers["access-control-allow-origin"]).toBe("*");
+    });
+  });
+});
diff --git a/src/__tests__/helpers.test.ts b/src/__tests__/helpers.test.ts
index 8c27c02..facf8ea 100644
--- a/src/__tests__/helpers.test.ts
+++ b/src/__tests__/helpers.test.ts
@@ -2,6 +2,8 @@ import { describe, it, expect } from "vitest";
 import {
   generateId,
   generateToolCallId,
+  generateMessageId,
+  generateToolUseId,
   isTextResponse,
   isToolCallResponse,
   isErrorResponse,
@@ -36,6 +38,32 @@ describe("generateToolCallId", () => {
   });
 });
 
+describe("generateMessageId", () => {
+  it("generates message IDs with msg_ prefix", () => {
+    const id = generateMessageId();
+    expect(id).toMatch(/^msg_/);
+    expect(id.length).toBeGreaterThan(5);
+  });
+
+  it("generates unique IDs", () => {
+    const ids = new Set(Array.from({ length: 100 }, () => generateMessageId()));
+    expect(ids.size).toBe(100);
+  });
+});
+
+describe("generateToolUseId", () => {
+  it("generates tool use IDs with toolu_ prefix", () => {
+    const id = generateToolUseId();
+    expect(id).toMatch(/^toolu_/);
+    expect(id.length).toBeGreaterThan(7);
+  });
+
+  it("generates unique IDs", () => {
+    const ids = new Set(Array.from({ length: 100 }, () => generateToolUseId()));
+    expect(ids.size).toBe(100);
+  });
+});
+
 describe("type guards", () => {
   it("isTextResponse identifies text responses", () => {
     expect(isTextResponse({ content: "hello" })).toBe(true);
@@ -279,6 +307,7 @@ describe("buildTextCompletion", () => {
     expect(result.choices[0].index).toBe(0);
     expect(result.choices[0].message.role).toBe("assistant");
     expect(result.choices[0].message.content).toBe("Hello!");
+    expect(result.choices[0].message.refusal).toBeNull();
     expect(result.choices[0].finish_reason).toBe("stop");
   });
 
@@ -303,6 +332,7 @@ describe("buildToolCallCompletion", () => {
     expect(result.choices).toHaveLength(1);
     expect(result.choices[0].finish_reason).toBe("tool_calls");
     expect(result.choices[0].message.content).toBeNull();
+    expect(result.choices[0].message.refusal).toBeNull();
   });
 
   it("maps tool calls with correct structure", () => {
diff --git a/src/__tests__/integration.test.ts b/src/__tests__/integration.test.ts
index aa5c63c..5a8cdb7 100644
--- a/src/__tests__/integration.test.ts
+++ b/src/__tests__/integration.test.ts
@@ -3,7 +3,7 @@ import http from "node:http";
 import { resolve } from "node:path";
 import { createServer, type ServerInstance } from "../server.js";
 import { loadFixturesFromDir } from "../fixture-loader.js";
-import { MockOpenAI } from "../mock-openai.js";
+import { LLMock } from "../llmock.js";
 import type { Fixture, SSEChunk, ChatCompletionRequest } from "../types.js";
 
 // ---------------------------------------------------------------------------
@@ -476,7 +476,7 @@ describe("integration: server options", () => {
 });
 
 describe("integration: onToolResult", () => {
-  let mock: MockOpenAI | null = null;
+  let mock: LLMock | null = null;
 
   afterEach(async () => {
     if (mock) {
@@ -492,7 +492,7 @@ describe("integration: onToolResult", () => {
   });
 
   it("matches a tool result message and streams the expected response", async () => {
-    mock = new MockOpenAI();
+    mock = new LLMock();
     mock.onToolResult("call_abc", { content: "result text" });
     await mock.start();
 
@@ -548,6 +548,47 @@ describe("integration: onToolResult", () => {
   });
 });
 
+describe("integration: cross-provider fixture sharing", () => {
+  it("same fixture works across all 4 endpoints", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "hello" },
+        response: { content: "Hello from fixture!" },
+      },
+    ];
+
+    instance = await createServer(fixtures, { port: 0, chunkSize: 100 });
+
+    // OpenAI Chat Completions
+    const r1 = await httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello"));
+    expect(r1.status).toBe(200);
+
+    // OpenAI Responses API
+    const r2 = await httpPost(`${instance.url}/v1/responses`, {
+      model: "gpt-4",
+      input: [{ role: "user", content: "hello" }],
+    });
+    expect(r2.status).toBe(200);
+
+    // Anthropic Claude Messages API
+    const r3 = await httpPost(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "hello" }],
+    });
+    expect(r3.status).toBe(200);
+
+    // Google Gemini generateContent
+    const r4 = await httpPost(`${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ role: "user", parts: [{ text: "hello" }] }],
+    });
+    expect(r4.status).toBe(200);
+
+    // Journal should have 4 entries
+    expect(instance.journal.size).toBe(4);
+  });
+});
+
 describe("integration: large payload streaming", () => {
   it("streams and reassembles a large (50KB+) text response", async () => {
     const largeContent = "x".repeat(50000);
diff --git a/src/__tests__/interruption.test.ts b/src/__tests__/interruption.test.ts
new file mode 100644
index 0000000..185879b
--- /dev/null
+++ b/src/__tests__/interruption.test.ts
@@ -0,0 +1,142 @@
+import { describe, it, expect, vi, afterEach } from "vitest";
+import { createInterruptionSignal } from "../interruption.js";
+import type { Fixture } from "../types.js";
+
+function makeFixture(overrides?: Partial<Fixture>): Fixture {
+  return {
+    match: { userMessage: "test" },
+    response: { content: "hello" },
+    ...overrides,
+  };
+}
+
+afterEach(() => {
+  vi.useRealTimers();
+});
+
+describe("createInterruptionSignal", () => {
+  it("returns null when no interruption fields are set", () => {
+    const result = createInterruptionSignal(makeFixture());
+    expect(result).toBeNull();
+  });
+
+  it("returns null when both fields are undefined", () => {
+    const result = createInterruptionSignal(
+      makeFixture({ truncateAfterChunks: undefined, disconnectAfterMs: undefined }),
+    );
+    expect(result).toBeNull();
+  });
+
+  it("truncateAfterChunks: aborts after N ticks", () => {
+    const ctrl = createInterruptionSignal(makeFixture({ truncateAfterChunks: 3 }));
+    expect(ctrl).not.toBeNull();
+    expect(ctrl!.signal.aborted).toBe(false);
+
+    ctrl!.tick();
+    expect(ctrl!.signal.aborted).toBe(false);
+    ctrl!.tick();
+    expect(ctrl!.signal.aborted).toBe(false);
+    ctrl!.tick();
+    expect(ctrl!.signal.aborted).toBe(true);
+    expect(ctrl!.reason()).toBe("truncateAfterChunks");
+
+    ctrl!.cleanup();
+  });
+
+  it("truncateAfterChunks: extra ticks after abort are no-ops", () => {
+    const ctrl = createInterruptionSignal(makeFixture({ truncateAfterChunks: 1 }));
+    ctrl!.tick();
+    expect(ctrl!.signal.aborted).toBe(true);
+    // Should not throw
+    ctrl!.tick();
+    ctrl!.tick();
+    expect(ctrl!.reason()).toBe("truncateAfterChunks");
+    ctrl!.cleanup();
+  });
+
+  it("disconnectAfterMs: aborts after timeout", async () => {
+    vi.useFakeTimers();
+    const ctrl = createInterruptionSignal(makeFixture({ disconnectAfterMs: 100 }));
+    expect(ctrl).not.toBeNull();
+    expect(ctrl!.signal.aborted).toBe(false);
+
+    vi.advanceTimersByTime(99);
+    expect(ctrl!.signal.aborted).toBe(false);
+
+    vi.advanceTimersByTime(1);
+    expect(ctrl!.signal.aborted).toBe(true);
+    expect(ctrl!.reason()).toBe("disconnectAfterMs");
+
+    ctrl!.cleanup();
+  });
+
+  it("both set: truncateAfterChunks fires first wins", () => {
+    vi.useFakeTimers();
+    const ctrl = createInterruptionSignal(
+      makeFixture({ truncateAfterChunks: 2, disconnectAfterMs: 10000 }),
+    );
+
+    ctrl!.tick();
+    ctrl!.tick();
+    expect(ctrl!.signal.aborted).toBe(true);
+    expect(ctrl!.reason()).toBe("truncateAfterChunks");
+
+    ctrl!.cleanup();
+  });
+
+  it("both set: disconnectAfterMs fires first wins", () => {
+    vi.useFakeTimers();
+    const ctrl = createInterruptionSignal(
+      makeFixture({ truncateAfterChunks: 100, disconnectAfterMs: 50 }),
+    );
+
+    ctrl!.tick(); // 1 of 100
+    expect(ctrl!.signal.aborted).toBe(false);
+
+    vi.advanceTimersByTime(50);
+    expect(ctrl!.signal.aborted).toBe(true);
+    expect(ctrl!.reason()).toBe("disconnectAfterMs");
+
+    ctrl!.cleanup();
+  });
+
+  it("cleanup clears the timer", () => {
+    vi.useFakeTimers();
+    const ctrl = createInterruptionSignal(makeFixture({ disconnectAfterMs: 100 }));
+
+    ctrl!.cleanup();
+
+    vi.advanceTimersByTime(200);
+    expect(ctrl!.signal.aborted).toBe(false);
+    expect(ctrl!.reason()).toBeUndefined();
+  });
+
+  it("reason returns undefined before abort", () => {
+    const ctrl = createInterruptionSignal(makeFixture({ truncateAfterChunks: 5 }));
+    expect(ctrl!.reason()).toBeUndefined();
+    ctrl!.cleanup();
+  });
+
+  it("truncateAfterChunks: 0 aborts immediately on first tick", () => {
+    const ctrl = createInterruptionSignal(makeFixture({ truncateAfterChunks: 0 }));
+    expect(ctrl).not.toBeNull();
+    expect(ctrl!.signal.aborted).toBe(false);
+
+    ctrl!.tick();
+    expect(ctrl!.signal.aborted).toBe(true);
+    expect(ctrl!.reason()).toBe("truncateAfterChunks");
+
+    ctrl!.cleanup();
+  });
+
+  it("disconnectAfterMs: 0 aborts promptly", async () => {
+    const ctrl = createInterruptionSignal(makeFixture({ disconnectAfterMs: 0 }));
+    expect(ctrl).not.toBeNull();
+
+    await new Promise((r) => setTimeout(r, 10));
+    expect(ctrl!.signal.aborted).toBe(true);
+    expect(ctrl!.reason()).toBe("disconnectAfterMs");
+
+    ctrl!.cleanup();
+  });
+});
diff --git a/src/__tests__/journal.test.ts b/src/__tests__/journal.test.ts
index 9f1fbab..606bad2 100644
--- a/src/__tests__/journal.test.ts
+++ b/src/__tests__/journal.test.ts
@@ -181,6 +181,111 @@ describe("Journal", () => {
     });
   });
 
+  describe("fixture match counting", () => {
+    it("incrementFixtureMatchCount increments siblings with same criteria but different sequenceIndex", () => {
+      const journal = new Journal();
+      const f0: Fixture = {
+        match: { userMessage: "hello", sequenceIndex: 0 },
+        response: { content: "First" },
+      };
+      const f1: Fixture = {
+        match: { userMessage: "hello", sequenceIndex: 1 },
+        response: { content: "Second" },
+      };
+      const allFixtures = [f0, f1];
+
+      journal.incrementFixtureMatchCount(f0, allFixtures);
+
+      expect(journal.getFixtureMatchCount(f0)).toBe(1);
+      expect(journal.getFixtureMatchCount(f1)).toBe(1);
+    });
+
+    it("incrementFixtureMatchCount does NOT treat fixtures differing on a field as siblings", () => {
+      const journal = new Journal();
+      const f0: Fixture = {
+        match: { userMessage: "hello", sequenceIndex: 0 },
+        response: { content: "First" },
+      };
+      const f1: Fixture = {
+        match: { userMessage: "goodbye", sequenceIndex: 1 },
+        response: { content: "Second" },
+      };
+      const allFixtures = [f0, f1];
+
+      journal.incrementFixtureMatchCount(f0, allFixtures);
+
+      expect(journal.getFixtureMatchCount(f0)).toBe(1);
+      expect(journal.getFixtureMatchCount(f1)).toBe(0);
+    });
+
+    it("incrementFixtureMatchCount without allFixtures does not increment siblings", () => {
+      const journal = new Journal();
+      const f0: Fixture = {
+        match: { userMessage: "hello", sequenceIndex: 0 },
+        response: { content: "First" },
+      };
+      const f1: Fixture = {
+        match: { userMessage: "hello", sequenceIndex: 1 },
+        response: { content: "Second" },
+      };
+
+      journal.incrementFixtureMatchCount(f0);
+
+      expect(journal.getFixtureMatchCount(f0)).toBe(1);
+      expect(journal.getFixtureMatchCount(f1)).toBe(0);
+    });
+
+    it("clearMatchCounts clears the map", () => {
+      const journal = new Journal();
+      const f: Fixture = {
+        match: { userMessage: "hello" },
+        response: { content: "Hi" },
+      };
+
+      journal.incrementFixtureMatchCount(f);
+      expect(journal.getFixtureMatchCount(f)).toBe(1);
+
+      journal.clearMatchCounts();
+      expect(journal.getFixtureMatchCount(f)).toBe(0);
+    });
+
+    it("RegExp-based sequenced fixtures are correctly grouped as siblings", () => {
+      const journal = new Journal();
+      const f0: Fixture = {
+        match: { userMessage: /hel+o/, sequenceIndex: 0 },
+        response: { content: "First" },
+      };
+      const f1: Fixture = {
+        match: { userMessage: /hel+o/, sequenceIndex: 1 },
+        response: { content: "Second" },
+      };
+      const allFixtures = [f0, f1];
+
+      journal.incrementFixtureMatchCount(f0, allFixtures);
+
+      expect(journal.getFixtureMatchCount(f0)).toBe(1);
+      expect(journal.getFixtureMatchCount(f1)).toBe(1);
+    });
+
+    it("RegExp fixtures with different patterns are NOT siblings", () => {
+      const journal = new Journal();
+      const f0: Fixture = {
+        match: { userMessage: /hello/, sequenceIndex: 0 },
+        response: { content: "First" },
+      };
+      const f1: Fixture = {
+        match: { userMessage: /world/, sequenceIndex: 1 },
+        response: { content: "Second" },
+      };
+      const allFixtures = [f0, f1];
+
+      journal.incrementFixtureMatchCount(f0, allFixtures);
+
+      expect(journal.getFixtureMatchCount(f0)).toBe(1);
+      expect(journal.getFixtureMatchCount(f1)).toBe(0);
+    });
+  });
+
   describe("clear", () => {
     it("empties the journal", () => {
       const journal = new Journal();
diff --git a/src/__tests__/mock-openai.test.ts b/src/__tests__/llmock.test.ts
similarity index 63%
rename from src/__tests__/mock-openai.test.ts
rename to src/__tests__/llmock.test.ts
index 43e6b1f..a57784d 100644
--- a/src/__tests__/mock-openai.test.ts
+++ b/src/__tests__/llmock.test.ts
@@ -3,7 +3,7 @@ import * as http from "node:http";
 import { resolve, join } from "node:path";
 import { mkdtempSync, writeFileSync, rmSync } from "node:fs";
 import { tmpdir } from "node:os";
-import { MockOpenAI } from "../mock-openai.js";
+import { LLMock } from "../llmock.js";
 import { Journal } from "../journal.js";
 
 // ---- Helpers ----
@@ -46,13 +46,13 @@ function chatBody(userMessage: string, stream = true) {
 }
 
 function makeTmpDir(): string {
-  return mkdtempSync(join(tmpdir(), "mock-openai-test-"));
+  return mkdtempSync(join(tmpdir(), "llmock-test-"));
 }
 
 // ---- Tests ----
 
-describe("MockOpenAI", () => {
-  let mock: MockOpenAI | null = null;
+describe("LLMock", () => {
+  let mock: LLMock | null = null;
 
   afterEach(async () => {
     if (mock) {
@@ -70,23 +70,23 @@ describe("MockOpenAI", () => {
 
   describe("constructor", () => {
     it("creates an instance with default options", () => {
-      mock = new MockOpenAI();
-      expect(mock).toBeInstanceOf(MockOpenAI);
+      mock = new LLMock();
+      expect(mock).toBeInstanceOf(LLMock);
     });
 
     it("accepts custom options", () => {
-      mock = new MockOpenAI({
+      mock = new LLMock({
         port: 0,
         host: "127.0.0.1",
         latency: 50,
       });
-      expect(mock).toBeInstanceOf(MockOpenAI);
+      expect(mock).toBeInstanceOf(LLMock);
     });
   });
 
   describe("fixture management", () => {
     it("addFixture adds a fixture and returns this", () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       const result = mock.addFixture({
         match: { userMessage: "hello" },
         response: { content: "Hi!" },
@@ -95,7 +95,7 @@ describe("MockOpenAI", () => {
     });
 
     it("addFixtures adds multiple fixtures and returns this", () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       const result = mock.addFixtures([
         {
           match: { userMessage: "a" },
@@ -110,7 +110,7 @@ describe("MockOpenAI", () => {
     });
 
     it("chaining API works across multiple calls", () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       const result = mock
         .addFixture({
           match: { userMessage: "hello" },
@@ -125,8 +125,80 @@ describe("MockOpenAI", () => {
       expect(result).toBe(mock);
     });
 
+    it("prependFixture inserts at the front and returns this", async () => {
+      mock = new LLMock();
+      mock.addFixture({
+        match: { userMessage: "second" },
+        response: { content: "Second" },
+      });
+      const result = mock.prependFixture({
+        match: { userMessage: "first" },
+        response: { content: "First" },
+      });
+      expect(result).toBe(mock);
+
+      const fixtures = mock.getFixtures();
+      expect(fixtures).toHaveLength(2);
+      expect(fixtures[0].match.userMessage).toBe("first");
+      expect(fixtures[1].match.userMessage).toBe("second");
+    });
+
+    it("prependFixture is visible to a running server", async () => {
+      mock = new LLMock();
+      // Add a catch-all that matches everything
+      mock.addFixture({
+        match: { predicate: () => true },
+        response: { content: "catch-all" },
+      });
+      await mock.start();
+
+      // Prepend a specific fixture — it should match first
+      mock.prependFixture({
+        match: { userMessage: "specific" },
+        response: { content: "specific response" },
+      });
+
+      const res = await post(mock.url, chatBody("specific"));
+      expect(res.status).toBe(200);
+      expect(res.data).toContain("specific response");
+    });
+
+    it("getFixtures returns a readonly view of all fixtures", () => {
+      mock = new LLMock();
+      mock.addFixture({
+        match: { userMessage: "a" },
+        response: { content: "A" },
+      });
+      mock.addFixture({
+        match: { userMessage: "b" },
+        response: { content: "B" },
+      });
+
+      const fixtures = mock.getFixtures();
+      expect(fixtures).toHaveLength(2);
+      expect(fixtures[0].match.userMessage).toBe("a");
+      expect(fixtures[1].match.userMessage).toBe("b");
+    });
+
+    it("getFixtures returns empty array when no fixtures added", () => {
+      mock = new LLMock();
+      expect(mock.getFixtures()).toHaveLength(0);
+    });
+
+    it("getFixtures reflects mutations from clearFixtures", () => {
+      mock = new LLMock();
+      mock.addFixture({
+        match: { userMessage: "a" },
+        response: { content: "A" },
+      });
+      expect(mock.getFixtures()).toHaveLength(1);
+
+      mock.clearFixtures();
+      expect(mock.getFixtures()).toHaveLength(0);
+    });
+
     it("clearFixtures empties all fixtures and returns this", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       mock.addFixture({
         match: { userMessage: "hello" },
         response: { content: "Hi!" },
@@ -142,7 +214,7 @@ describe("MockOpenAI", () => {
     });
 
     it("on() shorthand adds a fixture", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       mock.on({ userMessage: "on-test" }, { content: "on response" });
 
       await mock.start();
@@ -152,7 +224,7 @@ describe("MockOpenAI", () => {
     });
 
     it("on() shorthand passes latency and chunkSize opts", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       mock.on({ userMessage: "opts-test" }, { content: "response" }, { latency: 0, chunkSize: 5 });
 
       await mock.start();
@@ -163,7 +235,7 @@ describe("MockOpenAI", () => {
 
   describe("loadFixtureFile", () => {
     it("loads fixtures from a JSON file", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       mock.loadFixtureFile(join(FIXTURES_DIR, "example-greeting.json"));
 
       await mock.start();
@@ -173,7 +245,7 @@ describe("MockOpenAI", () => {
     });
 
     it("returns this for chaining", () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       const result = mock.loadFixtureFile(join(FIXTURES_DIR, "example-greeting.json"));
       expect(result).toBe(mock);
     });
@@ -181,7 +253,7 @@ describe("MockOpenAI", () => {
 
   describe("loadFixtureDir", () => {
     it("loads all JSON fixtures from a directory", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       mock.loadFixtureDir(FIXTURES_DIR);
 
       await mock.start();
@@ -193,7 +265,7 @@ describe("MockOpenAI", () => {
     });
 
     it("returns this for chaining", () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       const result = mock.loadFixtureDir(FIXTURES_DIR);
       expect(result).toBe(mock);
     });
@@ -213,7 +285,7 @@ describe("MockOpenAI", () => {
           }),
         );
 
-        mock = new MockOpenAI();
+        mock = new LLMock();
         mock.loadFixtureDir(tmpDir);
 
         await mock.start();
@@ -228,7 +300,7 @@ describe("MockOpenAI", () => {
 
   describe("server lifecycle", () => {
     it("start returns a URL", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       mock.addFixture({
         match: { userMessage: "hello" },
         response: { content: "Hi!" },
@@ -239,13 +311,13 @@ describe("MockOpenAI", () => {
     });
 
     it("start throws if server already started", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       await mock.start();
       await expect(mock.start()).rejects.toThrow("Server already started");
     });
 
     it("stop closes the server", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       mock.addFixture({
         match: { userMessage: "hello" },
         response: { content: "Hi!" },
@@ -261,12 +333,12 @@ describe("MockOpenAI", () => {
     });
 
     it("stop throws if server not started", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       await expect(mock.stop()).rejects.toThrow("Server not started");
     });
 
     it("stop rejects when server.close() errors", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       await mock.start();
 
       // Access the underlying http.Server via the private serverInstance field
@@ -290,7 +362,7 @@ describe("MockOpenAI", () => {
     });
 
     it("can restart after stop", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       mock.addFixture({
         match: { userMessage: "hello" },
         response: { content: "Hi!" },
@@ -300,7 +372,7 @@ describe("MockOpenAI", () => {
       await mock.stop();
       mock = null; // clear for safety
 
-      mock = new MockOpenAI();
+      mock = new LLMock();
       mock.addFixture({
         match: { userMessage: "hello" },
         response: { content: "Hi again!" },
@@ -315,12 +387,12 @@ describe("MockOpenAI", () => {
 
   describe("url getter", () => {
     it("throws before server is started", () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       expect(() => mock!.url).toThrow("Server not started");
     });
 
     it("returns url after server is started", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       await mock.start();
       expect(mock.url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
     });
@@ -328,18 +400,18 @@ describe("MockOpenAI", () => {
 
   describe("journal getter", () => {
     it("throws before server is started", () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       expect(() => mock!.journal).toThrow("Server not started");
     });
 
     it("returns a Journal instance after start", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       await mock.start();
       expect(mock.journal).toBeInstanceOf(Journal);
     });
 
     it("journal records requests", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       mock.addFixture({
         match: { userMessage: "journal-test" },
         response: { content: "recorded" },
@@ -357,7 +429,7 @@ describe("MockOpenAI", () => {
 
   describe("request handling", () => {
     it("serves a streaming text response", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       mock.addFixture({
         match: { userMessage: "stream" },
         response: { content: "streamed content" },
@@ -371,7 +443,7 @@ describe("MockOpenAI", () => {
     });
 
     it("returns 404 when no fixture matches", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       mock.addFixture({
         match: { userMessage: "hello" },
         response: { content: "Hi!" },
@@ -383,7 +455,7 @@ describe("MockOpenAI", () => {
     });
 
     it("fixtures added after start are visible", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       await mock.start();
 
       // No fixtures yet — should 404
@@ -405,7 +477,7 @@ describe("MockOpenAI", () => {
 
   describe("onMessage convenience", () => {
     it("registers a fixture matching a string userMessage", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       mock.onMessage("greet", { content: "Hi!" });
       await mock.start();
 
@@ -415,7 +487,7 @@ describe("MockOpenAI", () => {
     });
 
     it("registers a fixture matching a regex userMessage", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       mock.onMessage(/hel+o/, { content: "Matched!" });
       await mock.start();
 
@@ -425,51 +497,191 @@ describe("MockOpenAI", () => {
     });
 
     it("returns this for chaining", () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       expect(mock.onMessage("x", { content: "y" })).toBe(mock);
     });
   });
 
+  describe("onEmbedding convenience", () => {
+    it("registers a fixture matching an inputText string", async () => {
+      mock = new LLMock();
+      mock.onEmbedding("embed-test", { embedding: [0.1, 0.2, 0.3] });
+      await mock.start();
+
+      const res = await new Promise<{ status: number; data: string }>((resolve, reject) => {
+        const parsed = new URL(mock!.url);
+        const payload = JSON.stringify({
+          model: "text-embedding-3-small",
+          input: "embed-test input",
+        });
+        const req = http.request(
+          {
+            hostname: parsed.hostname,
+            port: parsed.port,
+            path: "/v1/embeddings",
+            method: "POST",
+            headers: {
+              "Content-Type": "application/json",
+              "Content-Length": Buffer.byteLength(payload),
+            },
+          },
+          (res) => {
+            let data = "";
+            res.on("data", (chunk) => (data += chunk));
+            res.on("end", () => resolve({ status: res.statusCode!, data }));
+          },
+        );
+        req.on("error", reject);
+        req.write(payload);
+        req.end();
+      });
+
+      expect(res.status).toBe(200);
+      const json = JSON.parse(res.data);
+      expect(json.data[0].embedding).toEqual([0.1, 0.2, 0.3]);
+    });
+
+    it("returns this for chaining", () => {
+      mock = new LLMock();
+      expect(mock.onEmbedding("x", { embedding: [0.1] })).toBe(mock);
+    });
+  });
+
   describe("onToolCall convenience", () => {
-    it("registers a fixture matching a tool name", async () => {
-      mock = new MockOpenAI();
-      mock.onToolCall("get_weather", { content: "sunny" });
+    it("onToolCall live server returns tool call response", async () => {
+      mock = new LLMock();
+      mock.onToolCall("get_weather", {
+        toolCalls: [{ name: "get_weather", arguments: JSON.stringify({ city: "SF" }) }],
+      });
       await mock.start();
 
-      await post(mock.url, {
+      const res = await post(mock.url, {
         model: "gpt-4",
-        messages: [
+        messages: [{ role: "user", content: "What is the weather?" }],
+        tools: [
           {
-            role: "assistant",
-            content: null,
-            tool_calls: [
-              { id: "tc1", type: "function", function: { name: "get_weather", arguments: "{}" } },
-            ],
+            type: "function",
+            function: {
+              name: "get_weather",
+              description: "Get weather",
+              parameters: { type: "object", properties: { city: { type: "string" } } },
+            },
           },
-          { role: "tool", content: "result", tool_call_id: "tc1" },
         ],
+        stream: false,
+      });
+
+      expect(res.status).toBe(200);
+      const json = JSON.parse(res.data);
+      expect(json.choices[0].message.tool_calls).toBeDefined();
+      expect(json.choices[0].message.tool_calls[0].function.name).toBe("get_weather");
+      expect(JSON.parse(json.choices[0].message.tool_calls[0].function.arguments)).toEqual({
+        city: "SF",
       });
-      // The fixture match for toolName is checked against the last assistant message's tool_calls
-      // This may or may not match depending on router logic, but the fixture should be registered
-      expect(mock).toBeInstanceOf(MockOpenAI);
     });
 
     it("returns this for chaining", () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       expect(mock.onToolCall("fn", { content: "r" })).toBe(mock);
     });
   });
 
+  describe("onJsonOutput convenience", () => {
+    it("registers a fixture with responseFormat json_object and stringified content", () => {
+      mock = new LLMock();
+      mock.onJsonOutput("json-test", { name: "Alice", age: 30 });
+
+      const fixtures = mock.getFixtures();
+      expect(fixtures).toHaveLength(1);
+      expect(fixtures[0].match.userMessage).toBe("json-test");
+      expect(fixtures[0].match.responseFormat).toBe("json_object");
+      expect((fixtures[0].response as { content: string }).content).toBe(
+        JSON.stringify({ name: "Alice", age: 30 }),
+      );
+    });
+
+    it("accepts a string as jsonContent and uses it directly", () => {
+      mock = new LLMock();
+      mock.onJsonOutput("json-str", '{"key":"value"}');
+
+      const fixtures = mock.getFixtures();
+      expect((fixtures[0].response as { content: string }).content).toBe('{"key":"value"}');
+    });
+
+    it("accepts a RegExp pattern", () => {
+      mock = new LLMock();
+      mock.onJsonOutput(/json-\d+/, { result: true });
+
+      const fixtures = mock.getFixtures();
+      expect(fixtures[0].match.userMessage).toEqual(/json-\d+/);
+    });
+
+    it("returns this for chaining", () => {
+      mock = new LLMock();
+      expect(mock.onJsonOutput("x", { a: 1 })).toBe(mock);
+    });
+
+    it("passes through opts like latency", () => {
+      mock = new LLMock();
+      mock.onJsonOutput("opts", { a: 1 }, { latency: 100 });
+
+      const fixtures = mock.getFixtures();
+      expect(fixtures[0].latency).toBe(100);
+    });
+
+    it("serves JSON content through the server", async () => {
+      mock = new LLMock();
+      mock.onJsonOutput("give-json", { answer: 42 });
+      await mock.start();
+
+      const res = await post(mock.url, {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "give-json" }],
+        stream: false,
+        response_format: { type: "json_object" },
+      });
+      expect(res.status).toBe(200);
+      const json = JSON.parse(res.data);
+      const content = json.choices[0].message.content;
+      expect(JSON.parse(content)).toEqual({ answer: 42 });
+    });
+  });
+
   describe("onToolResult convenience", () => {
     it("returns this for chaining", () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       expect(mock.onToolResult("call_123", { content: "r" })).toBe(mock);
     });
+
+    it("onToolResult matches tool result messages and returns fixture", async () => {
+      mock = new LLMock();
+      mock.onToolResult("call_abc", { content: "tool result response" });
+      await mock.start();
+
+      const res = await post(mock.url, {
+        model: "gpt-4",
+        messages: [
+          {
+            role: "assistant",
+            content: null,
+            tool_calls: [
+              { id: "call_abc", type: "function", function: { name: "lookup", arguments: "{}" } },
+            ],
+          },
+          { role: "tool", content: "42", tool_call_id: "call_abc" },
+        ],
+        stream: false,
+      });
+
+      expect(res.status).toBe(200);
+      const json = JSON.parse(res.data);
+      expect(json.choices[0].message.content).toBe("tool result response");
+    });
   });
 
   describe("nextRequestError", () => {
     it("returns an error on the next request then removes itself", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       mock.onMessage("hello", { content: "Hi!" });
       await mock.start();
 
@@ -488,7 +700,7 @@ describe("MockOpenAI", () => {
     });
 
     it("uses default error message when none provided", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       mock.onMessage("hello", { content: "Hi!" });
       await mock.start();
 
@@ -501,12 +713,12 @@ describe("MockOpenAI", () => {
     });
 
     it("returns this for chaining", () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       expect(mock.nextRequestError(500)).toBe(mock);
     });
 
     it("stacks multiple one-shot errors (last pushed fires first)", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       mock.onMessage("hello", { content: "Normal response" });
       await mock.start();
 
@@ -535,7 +747,7 @@ describe("MockOpenAI", () => {
 
   describe("journal proxies", () => {
     it("getRequests returns journal entries", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       mock.onMessage("hi", { content: "Hello" });
       await mock.start();
 
@@ -547,7 +759,7 @@ describe("MockOpenAI", () => {
     });
 
     it("getLastRequest returns last entry", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       mock.onMessage("a", { content: "A" });
       mock.onMessage("b", { content: "B" });
       await mock.start();
@@ -561,13 +773,13 @@ describe("MockOpenAI", () => {
     });
 
     it("getLastRequest returns null when no requests", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       await mock.start();
       expect(mock.getLastRequest()).toBeNull();
     });
 
     it("clearRequests empties the journal", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       mock.onMessage("hi", { content: "Hello" });
       await mock.start();
 
@@ -579,14 +791,37 @@ describe("MockOpenAI", () => {
     });
 
     it("getRequests throws when server not started", () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       expect(() => mock!.getRequests()).toThrow("Server not started");
     });
   });
 
+  describe("resetMatchCounts", () => {
+    it("clears match counts without clearing fixtures or journal", async () => {
+      mock = new LLMock();
+      mock.onMessage("hi", { content: "Hello" });
+      await mock.start();
+
+      // Make a request to populate journal and match counts
+      await post(mock.url, chatBody("hi"));
+      expect(mock.journal.size).toBe(1);
+      expect(mock.journal.fixtureMatchCounts.size).toBeGreaterThan(0);
+
+      // resetMatchCounts should clear counts but not journal or fixtures
+      mock.resetMatchCounts();
+      expect(mock.journal.fixtureMatchCounts.size).toBe(0);
+      expect(mock.journal.size).toBe(1); // journal entries preserved
+      expect(mock.getFixtures()).toHaveLength(1); // fixtures preserved
+
+      // Fixture should still work
+      const res = await post(mock.url, chatBody("hi"));
+      expect(res.status).toBe(200);
+    });
+  });
+
   describe("reset", () => {
     it("clears fixtures and journal", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       mock.onMessage("hi", { content: "Hello" });
       await mock.start();
 
@@ -602,19 +837,19 @@ describe("MockOpenAI", () => {
     });
 
     it("returns this for chaining", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       await mock.start();
       expect(mock.reset()).toBe(mock);
     });
 
     it("works even before server starts (just clears fixtures)", () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       mock.onMessage("hi", { content: "Hello" });
       expect(mock.reset()).toBe(mock);
     });
 
     it("is idempotent — calling reset() twice causes no error", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       mock.onMessage("hi", { content: "Hello" });
       await mock.start();
 
@@ -636,7 +871,7 @@ describe("MockOpenAI", () => {
     });
 
     it("after reset, only newly added fixtures are active", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       mock.onMessage("old", { content: "Old response" });
       mock.onMessage("new", { content: "New response" });
       await mock.start();
@@ -661,7 +896,7 @@ describe("MockOpenAI", () => {
     });
 
     it("clearFixtures works before server is started", () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       mock.onMessage("hi", { content: "Hello" });
       // clearFixtures alone should not throw before start
       expect(mock.clearFixtures()).toBe(mock);
@@ -670,47 +905,92 @@ describe("MockOpenAI", () => {
 
   describe("baseUrl getter", () => {
     it("returns same value as url", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       await mock.start();
       expect(mock.baseUrl).toBe(mock.url);
     });
 
     it("throws before server is started", () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       expect(() => mock!.baseUrl).toThrow("Server not started");
     });
   });
 
   describe("port getter", () => {
     it("returns a number", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       await mock.start();
       expect(typeof mock.port).toBe("number");
       expect(mock.port).toBeGreaterThan(0);
     });
 
     it("matches the port in the URL", async () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       await mock.start();
       const urlPort = parseInt(new URL(mock.url).port, 10);
       expect(mock.port).toBe(urlPort);
     });
 
     it("throws before server is started", () => {
-      mock = new MockOpenAI();
+      mock = new LLMock();
       expect(() => mock!.port).toThrow("Server not started");
     });
   });
 
+  describe("error status defaults", () => {
+    it("error status defaults to 500 when omitted", async () => {
+      mock = new LLMock();
+      mock.addFixture({
+        match: { userMessage: "err" },
+        response: { error: { message: "boom", type: "server_error" } },
+      });
+      await mock.start();
+
+      const res = await post(mock.url, chatBody("err", false));
+      expect(res.status).toBe(500);
+    });
+  });
+
+  describe("setChaos / clearChaos", () => {
+    it("setChaos sets server-level chaos config", async () => {
+      mock = new LLMock();
+      mock.onMessage("hi", { content: "Hello" });
+      mock.setChaos({ dropRate: 1.0 });
+      await mock.start();
+
+      const res = await post(mock.url, chatBody("hi"));
+      expect(res.status).toBe(500);
+      const body = JSON.parse(res.data);
+      expect(body.error.code).toBe("chaos_drop");
+    });
+
+    it("clearChaos removes chaos config", async () => {
+      mock = new LLMock();
+      mock.onMessage("hi", { content: "Hello" });
+      mock.setChaos({ dropRate: 1.0 });
+      mock.clearChaos();
+      await mock.start();
+
+      const res = await post(mock.url, chatBody("hi"));
+      expect(res.status).toBe(200);
+      expect(res.data).toContain("Hello");
+    });
+
+    it("setChaos returns this for chaining", () => {
+      mock = new LLMock();
+      expect(mock.setChaos({ dropRate: 0.5 })).toBe(mock);
+    });
+  });
+
   describe("static create()", () => {
     it("creates and starts a server", async () => {
-      mock = await MockOpenAI.create();
+      mock = await LLMock.create();
       expect(mock.url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
       expect(mock.journal).toBeInstanceOf(Journal);
     });
 
     it("accepts options", async () => {
-      mock = await MockOpenAI.create({
+      mock = await LLMock.create({
         host: "127.0.0.1",
         port: 0,
       });
@@ -718,7 +998,7 @@ describe("MockOpenAI", () => {
     });
 
     it("allows adding fixtures after creation", async () => {
-      mock = await MockOpenAI.create();
+      mock = await LLMock.create();
       mock.addFixture({
         match: { userMessage: "factory-test" },
         response: { content: "factory response" },
diff --git a/src/__tests__/messages.test.ts b/src/__tests__/messages.test.ts
new file mode 100644
index 0000000..927ac46
--- /dev/null
+++ b/src/__tests__/messages.test.ts
@@ -0,0 +1,808 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import type { Fixture } from "../types.js";
+import { createServer, type ServerInstance } from "../server.js";
+import { claudeToCompletionRequest } from "../messages.js";
+
+// --- helpers ---
+
+function post(
+  url: string,
+  body: unknown,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const data = JSON.stringify(body);
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(data),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(data);
+    req.end();
+  });
+}
+
+function postRaw(url: string, raw: string): Promise<{ status: number; body: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(raw),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(raw);
+    req.end();
+  });
+}
+
+interface SSEEvent {
+  type: string;
+  [key: string]: unknown;
+}
+
+function parseClaudeSSEEvents(body: string): SSEEvent[] {
+  const events: SSEEvent[] = [];
+  const lines = body.split("\n");
+  for (const line of lines) {
+    if (line.startsWith("data: ")) {
+      events.push(JSON.parse(line.slice(6)) as SSEEvent);
+    }
+  }
+  return events;
+}
+
+// --- fixtures ---
+
+const textFixture: Fixture = {
+  match: { userMessage: "hello" },
+  response: { content: "Hi there!" },
+};
+
+const toolFixture: Fixture = {
+  match: { userMessage: "weather" },
+  response: {
+    toolCalls: [
+      {
+        name: "get_weather",
+        arguments: '{"city":"NYC"}',
+      },
+    ],
+  },
+};
+
+const multiToolFixture: Fixture = {
+  match: { userMessage: "multi-tool" },
+  response: {
+    toolCalls: [
+      { name: "get_weather", arguments: '{"city":"NYC"}' },
+      { name: "get_time", arguments: '{"tz":"EST"}' },
+    ],
+  },
+};
+
+const errorFixture: Fixture = {
+  match: { userMessage: "fail" },
+  response: {
+    error: {
+      message: "Rate limited",
+      type: "rate_limit_error",
+      code: "rate_limit",
+    },
+    status: 429,
+  },
+};
+
+const badResponseFixture: Fixture = {
+  match: { userMessage: "badtype" },
+  response: { content: 42 } as unknown as Fixture["response"],
+};
+
+const allFixtures: Fixture[] = [
+  textFixture,
+  toolFixture,
+  multiToolFixture,
+  errorFixture,
+  badResponseFixture,
+];
+
+// --- tests ---
+
+let instance: ServerInstance | null = null;
+
+afterEach(async () => {
+  if (instance) {
+    await new Promise<void>((resolve) => {
+      instance!.server.close(() => resolve());
+    });
+    instance = null;
+  }
+});
+
+// ─── Unit tests: input conversion ────────────────────────────────────────────
+
+describe("claudeToCompletionRequest", () => {
+  it("converts user message with string content", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "hello" }],
+    });
+    expect(result.messages).toEqual([{ role: "user", content: "hello" }]);
+    expect(result.model).toBe("claude-3-5-sonnet-20241022");
+  });
+
+  it("converts user message with content blocks", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "user",
+          content: [
+            { type: "text", text: "hello " },
+            { type: "text", text: "world" },
+          ],
+        },
+      ],
+    });
+    expect(result.messages).toEqual([{ role: "user", content: "hello world" }]);
+  });
+
+  it("converts system string to system message", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      system: "Be helpful",
+      messages: [{ role: "user", content: "hi" }],
+    });
+    expect(result.messages).toEqual([
+      { role: "system", content: "Be helpful" },
+      { role: "user", content: "hi" },
+    ]);
+  });
+
+  it("converts system content blocks to system message", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      system: [{ type: "text", text: "System prompt" }],
+      messages: [{ role: "user", content: "hi" }],
+    });
+    expect(result.messages).toEqual([
+      { role: "system", content: "System prompt" },
+      { role: "user", content: "hi" },
+    ]);
+  });
+
+  it("converts assistant message with string content", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        { role: "user", content: "hi" },
+        { role: "assistant", content: "hello" },
+      ],
+    });
+    expect(result.messages[1]).toEqual({ role: "assistant", content: "hello" });
+  });
+
+  it("handles assistant message with null content", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "assistant",
+          content: null as unknown as string,
+        },
+      ],
+    });
+    expect(result.messages).toHaveLength(1);
+    expect(result.messages[0].role).toBe("assistant");
+    expect(result.messages[0].content).toBeNull();
+  });
+
+  it("converts assistant tool_use blocks to tool_calls", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "assistant",
+          content: [
+            {
+              type: "tool_use",
+              id: "toolu_123",
+              name: "get_weather",
+              input: { city: "NYC" },
+            },
+          ],
+        },
+      ],
+    });
+    expect(result.messages).toHaveLength(1);
+    expect(result.messages[0].role).toBe("assistant");
+    expect(result.messages[0].content).toBeNull();
+    expect(result.messages[0].tool_calls).toHaveLength(1);
+    expect(result.messages[0].tool_calls![0].id).toBe("toolu_123");
+    expect(result.messages[0].tool_calls![0].function.name).toBe("get_weather");
+    expect(result.messages[0].tool_calls![0].function.arguments).toBe('{"city":"NYC"}');
+  });
+
+  it("converts tool_result blocks to tool messages", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "user",
+          content: [
+            {
+              type: "tool_result",
+              tool_use_id: "toolu_123",
+              content: '{"temp":72}',
+            },
+          ],
+        },
+      ],
+    });
+    expect(result.messages).toEqual([
+      { role: "tool", content: '{"temp":72}', tool_call_id: "toolu_123" },
+    ]);
+  });
+
+  it("converts tool_result with nested text content blocks", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "user",
+          content: [
+            {
+              type: "tool_result",
+              tool_use_id: "toolu_456",
+              content: [{ type: "text", text: "result data" }],
+            },
+          ],
+        },
+      ],
+    });
+    expect(result.messages).toEqual([
+      { role: "tool", content: "result data", tool_call_id: "toolu_456" },
+    ]);
+  });
+
+  it("converts tools with input_schema to ToolDefinition", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "hi" }],
+      tools: [
+        {
+          name: "get_weather",
+          description: "Get weather info",
+          input_schema: { type: "object", properties: { city: { type: "string" } } },
+        },
+      ],
+    });
+    expect(result.tools).toEqual([
+      {
+        type: "function",
+        function: {
+          name: "get_weather",
+          description: "Get weather info",
+          parameters: {
+            type: "object",
+            properties: { city: { type: "string" } },
+          },
+        },
+      },
+    ]);
+  });
+
+  it("returns undefined tools when none provided", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "hi" }],
+    });
+    expect(result.tools).toBeUndefined();
+  });
+});
+
+// ─── Integration tests: POST /v1/messages ───────────────────────────────────
+
+describe("POST /v1/messages (streaming)", () => {
+  it("streams text response with correct event types", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "hello" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("text/event-stream");
+
+    const events = parseClaudeSSEEvents(res.body);
+    const types = events.map((e) => e.type);
+
+    expect(types[0]).toBe("message_start");
+    expect(types).toContain("content_block_start");
+    expect(types).toContain("content_block_delta");
+    expect(types).toContain("content_block_stop");
+    expect(types).toContain("message_delta");
+    expect(types[types.length - 1]).toBe("message_stop");
+
+    // No [DONE] sentinel
+    expect(res.body).not.toContain("[DONE]");
+  });
+
+  it("message_start contains msg_ prefixed id", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "hello" }],
+      stream: true,
+    });
+
+    const events = parseClaudeSSEEvents(res.body);
+    const msgStart = events.find((e) => e.type === "message_start") as SSEEvent & {
+      message: { id: string; role: string; model: string };
+    };
+    expect(msgStart).toBeDefined();
+    expect(msgStart.message.id).toMatch(/^msg_/);
+    expect(msgStart.message.role).toBe("assistant");
+    expect(msgStart.message.model).toBe("claude-3-5-sonnet-20241022");
+  });
+
+  it("text deltas reconstruct full content", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "hello" }],
+      stream: true,
+    });
+
+    const events = parseClaudeSSEEvents(res.body);
+    const deltas = events.filter((e) => e.type === "content_block_delta") as (SSEEvent & {
+      delta: { type: string; text: string };
+    })[];
+    const fullText = deltas.map((d) => d.delta.text).join("");
+    expect(fullText).toBe("Hi there!");
+  });
+
+  it("message_delta has stop_reason end_turn for text", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "hello" }],
+      stream: true,
+    });
+
+    const events = parseClaudeSSEEvents(res.body);
+    const msgDelta = events.find((e) => e.type === "message_delta") as SSEEvent & {
+      delta: { stop_reason: string };
+    };
+    expect(msgDelta).toBeDefined();
+    expect(msgDelta.delta.stop_reason).toBe("end_turn");
+  });
+
+  it("streams tool call response with tool_use blocks", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "weather" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(200);
+
+    const events = parseClaudeSSEEvents(res.body);
+    const types = events.map((e) => e.type);
+
+    expect(types[0]).toBe("message_start");
+    expect(types).toContain("content_block_start");
+    expect(types).toContain("content_block_delta");
+    expect(types).toContain("content_block_stop");
+    expect(types).toContain("message_delta");
+    expect(types[types.length - 1]).toBe("message_stop");
+
+    // content_block_start should have tool_use type
+    const blockStart = events.find(
+      (e) =>
+        e.type === "content_block_start" &&
+        (e.content_block as { type: string })?.type === "tool_use",
+    ) as SSEEvent & {
+      content_block: { type: string; id: string; name: string };
+    };
+    expect(blockStart).toBeDefined();
+    expect(blockStart.content_block.id).toMatch(/^toolu_/);
+    expect(blockStart.content_block.name).toBe("get_weather");
+  });
+
+  it("tool call deltas use input_json_delta and reconstruct arguments", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "weather" }],
+      stream: true,
+    });
+
+    const events = parseClaudeSSEEvents(res.body);
+    const deltas = events.filter(
+      (e) =>
+        e.type === "content_block_delta" &&
+        (e.delta as { type: string })?.type === "input_json_delta",
+    ) as (SSEEvent & { delta: { type: string; partial_json: string } })[];
+
+    expect(deltas.length).toBeGreaterThan(0);
+    const fullJson = deltas.map((d) => d.delta.partial_json).join("");
+    const parsed = JSON.parse(fullJson);
+    expect(parsed).toEqual({ city: "NYC" });
+  });
+
+  it("message_delta has stop_reason tool_use for tool calls", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "weather" }],
+      stream: true,
+    });
+
+    const events = parseClaudeSSEEvents(res.body);
+    const msgDelta = events.find((e) => e.type === "message_delta") as SSEEvent & {
+      delta: { stop_reason: string };
+    };
+    expect(msgDelta.delta.stop_reason).toBe("tool_use");
+  });
+
+  it("streams multiple tool calls with correct indices", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "multi-tool" }],
+      stream: true,
+    });
+
+    const events = parseClaudeSSEEvents(res.body);
+    const blockStarts = events.filter(
+      (e) =>
+        e.type === "content_block_start" &&
+        (e.content_block as { type: string })?.type === "tool_use",
+    );
+    expect(blockStarts).toHaveLength(2);
+    expect(blockStarts[0].index).toBe(0);
+    expect(blockStarts[1].index).toBe(1);
+  });
+
+  it("uses fixture chunkSize for text streaming", async () => {
+    const bigChunkFixture: Fixture = {
+      match: { userMessage: "bigchunk" },
+      response: { content: "ABCDEFGHIJ" },
+      chunkSize: 5,
+    };
+    instance = await createServer([bigChunkFixture], { chunkSize: 2 });
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "bigchunk" }],
+      stream: true,
+    });
+
+    const events = parseClaudeSSEEvents(res.body);
+    const deltas = events.filter(
+      (e) =>
+        e.type === "content_block_delta" && (e.delta as { type: string })?.type === "text_delta",
+    ) as (SSEEvent & { delta: { text: string } })[];
+    // 10 chars / chunkSize 5 = 2 deltas
+    expect(deltas).toHaveLength(2);
+    expect(deltas[0].delta.text).toBe("ABCDE");
+    expect(deltas[1].delta.text).toBe("FGHIJ");
+  });
+});
+
+describe("POST /v1/messages (non-streaming)", () => {
+  it("returns text response as JSON", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "hello" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/json");
+
+    const body = JSON.parse(res.body);
+    expect(body.type).toBe("message");
+    expect(body.role).toBe("assistant");
+    expect(body.id).toMatch(/^msg_/);
+    expect(body.content).toHaveLength(1);
+    expect(body.content[0].type).toBe("text");
+    expect(body.content[0].text).toBe("Hi there!");
+    expect(body.stop_reason).toBe("end_turn");
+  });
+
+  it("returns tool call response as JSON with object input", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "weather" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.type).toBe("message");
+    expect(body.stop_reason).toBe("tool_use");
+    expect(body.content).toHaveLength(1);
+    expect(body.content[0].type).toBe("tool_use");
+    expect(body.content[0].name).toBe("get_weather");
+    // Claude uses object input, not string
+    expect(body.content[0].input).toEqual({ city: "NYC" });
+    expect(body.content[0].id).toBeDefined();
+  });
+
+  it("returns multiple tool calls as JSON", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "multi-tool" }],
+      stream: false,
+    });
+
+    const body = JSON.parse(res.body);
+    expect(body.content).toHaveLength(2);
+    expect(body.content[0].name).toBe("get_weather");
+    expect(body.content[1].name).toBe("get_time");
+  });
+});
+
+describe("POST /v1/messages (default non-streaming)", () => {
+  it("returns JSON response when stream field is omitted", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "hello" }],
+      // stream field intentionally omitted
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/json");
+
+    const body = JSON.parse(res.body);
+    expect(body.type).toBe("message");
+    expect(body.role).toBe("assistant");
+    expect(body.content[0].text).toBe("Hi there!");
+  });
+
+  it("returns JSON tool call response when stream field is omitted", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "weather" }],
+      // stream field intentionally omitted
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/json");
+
+    const body = JSON.parse(res.body);
+    expect(body.type).toBe("message");
+    expect(body.content[0].type).toBe("tool_use");
+    expect(body.content[0].name).toBe("get_weather");
+  });
+});
+
+describe("POST /v1/messages (error handling)", () => {
+  it("returns error fixture with correct status", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "fail" }],
+    });
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Rate limited");
+  });
+
+  it("returns error in Anthropic format: { type: 'error', error: { type, message } }", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "fail" }],
+    });
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    // Anthropic wraps errors as { type: "error", error: { type, message } }
+    expect(body.type).toBe("error");
+    expect(body.error).toBeDefined();
+    expect(body.error.type).toBe("rate_limit_error");
+    expect(body.error.message).toBe("Rate limited");
+    // Should NOT have OpenAI-style fields at the top level
+    expect(body.status).toBeUndefined();
+    expect(body.error.code).toBeUndefined();
+  });
+
+  it("returns 404 when no fixture matches", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "unknown" }],
+    });
+
+    expect(res.status).toBe(404);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("No fixture matched");
+  });
+
+  it("returns 400 for malformed JSON", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postRaw(`${instance.url}/v1/messages`, "{not valid");
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Malformed JSON");
+  });
+
+  it("returns 500 for unknown response type", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "badtype" }],
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("did not match any known type");
+  });
+});
+
+describe("POST /v1/messages (journal)", () => {
+  it("records successful text response", async () => {
+    instance = await createServer(allFixtures);
+    await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "hello" }],
+    });
+
+    expect(instance.journal.size).toBe(1);
+    const entry = instance.journal.getLast();
+    expect(entry!.path).toBe("/v1/messages");
+    expect(entry!.response.status).toBe(200);
+    expect(entry!.response.fixture).toBe(textFixture);
+  });
+
+  it("records unmatched response with null fixture", async () => {
+    instance = await createServer(allFixtures);
+    await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "nomatch" }],
+    });
+
+    const entry = instance.journal.getLast();
+    expect(entry!.response.status).toBe(404);
+    expect(entry!.response.fixture).toBeNull();
+  });
+
+  it("journal body contains converted ChatCompletionRequest", async () => {
+    instance = await createServer(allFixtures);
+    await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      system: "Be nice",
+      messages: [{ role: "user", content: "hello" }],
+    });
+
+    const entry = instance.journal.getLast();
+    expect(entry!.body.model).toBe("claude-3-5-sonnet-20241022");
+    expect(entry!.body.messages).toEqual([
+      { role: "system", content: "Be nice" },
+      { role: "user", content: "hello" },
+    ]);
+  });
+});
+
+describe("POST /v1/messages (error field preservation)", () => {
+  it("error type and message fields are preserved in Anthropic format", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "fail" }],
+    });
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    // Anthropic format: { type: "error", error: { type, message } }
+    expect(body.type).toBe("error");
+    expect(body.error.message).toBe("Rate limited");
+    expect(body.error.type).toBe("rate_limit_error");
+  });
+
+  it("Content-Type is application/json on error responses", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "fail" }],
+    });
+
+    expect(res.status).toBe(429);
+    expect(res.headers["content-type"]).toBe("application/json");
+  });
+});
+
+describe("POST /v1/messages (CORS)", () => {
+  it("includes CORS headers", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "hello" }],
+    });
+
+    expect(res.headers["access-control-allow-origin"]).toBe("*");
+  });
+});
diff --git a/src/__tests__/metrics.test.ts b/src/__tests__/metrics.test.ts
new file mode 100644
index 0000000..0266fbb
--- /dev/null
+++ b/src/__tests__/metrics.test.ts
@@ -0,0 +1,667 @@
+import { describe, it, expect, afterEach, beforeEach, vi } from "vitest";
+import http from "node:http";
+import * as metricsModule from "../metrics.js";
+import { createMetricsRegistry, normalizePathLabel, type MetricsRegistry } from "../metrics.js";
+import { createServer, type ServerInstance } from "../server.js";
+import type { Fixture, ChatCompletionRequest } from "../types.js";
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+async function httpPost(
+  url: string,
+  body: object,
+  headers?: Record<string, string>,
+): Promise<{ status: number; body: string; headers: Record<string, string> }> {
+  return new Promise((resolve, reject) => {
+    const req = http.request(
+      url,
+      {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          ...headers,
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c) => chunks.push(c));
+        res.on("end", () =>
+          resolve({
+            status: res.statusCode!,
+            body: Buffer.concat(chunks).toString(),
+            headers: Object.fromEntries(
+              Object.entries(res.headers).map(([k, v]) => [
+                k,
+                Array.isArray(v) ? v.join(", ") : (v ?? ""),
+              ]),
+            ),
+          }),
+        );
+      },
+    );
+    req.on("error", reject);
+    req.write(JSON.stringify(body));
+    req.end();
+  });
+}
+
+async function httpGet(
+  url: string,
+): Promise<{ status: number; body: string; headers: Record<string, string> }> {
+  return new Promise((resolve, reject) => {
+    const req = http.request(url, { method: "GET" }, (res) => {
+      const chunks: Buffer[] = [];
+      res.on("data", (c) => chunks.push(c));
+      res.on("end", () =>
+        resolve({
+          status: res.statusCode!,
+          body: Buffer.concat(chunks).toString(),
+          headers: Object.fromEntries(
+            Object.entries(res.headers).map(([k, v]) => [
+              k,
+              Array.isArray(v) ? v.join(", ") : (v ?? ""),
+            ]),
+          ),
+        }),
+      );
+    });
+    req.on("error", reject);
+    req.end();
+  });
+}
+
+function chatRequest(userContent: string): ChatCompletionRequest {
+  return {
+    model: "gpt-4",
+    messages: [{ role: "user", content: userContent }],
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Unit tests: MetricsRegistry
+// ---------------------------------------------------------------------------
+
+describe("MetricsRegistry", () => {
+  let registry: MetricsRegistry;
+
+  beforeEach(() => {
+    registry = createMetricsRegistry();
+  });
+
+  describe("Counter", () => {
+    it("increments and serializes correct value", () => {
+      registry.incrementCounter("http_requests_total", { method: "POST" });
+      registry.incrementCounter("http_requests_total", { method: "POST" });
+      registry.incrementCounter("http_requests_total", { method: "POST" });
+      const output = registry.serialize();
+      expect(output).toContain('http_requests_total{method="POST"} 3');
+    });
+
+    it("tracks different label combos separately", () => {
+      registry.incrementCounter("http_requests_total", { method: "POST", path: "/a" });
+      registry.incrementCounter("http_requests_total", { method: "POST", path: "/a" });
+      registry.incrementCounter("http_requests_total", { method: "GET", path: "/b" });
+      const output = registry.serialize();
+      expect(output).toContain('http_requests_total{method="POST",path="/a"} 2');
+      expect(output).toContain('http_requests_total{method="GET",path="/b"} 1');
+    });
+  });
+
+  describe("Histogram", () => {
+    it("observes values with cumulative buckets, +Inf = count", () => {
+      // Observe values: 0.003, 0.05, 1.5
+      registry.observeHistogram("request_duration_seconds", {}, 0.003);
+      registry.observeHistogram("request_duration_seconds", {}, 0.05);
+      registry.observeHistogram("request_duration_seconds", {}, 1.5);
+      const output = registry.serialize();
+
+      // Bucket 0.005: 1 observation (0.003)
+      expect(output).toContain('request_duration_seconds_bucket{le="0.005"} 1');
+      // Bucket 0.01: 1 observation (cumulative, still just 0.003)
+      expect(output).toContain('request_duration_seconds_bucket{le="0.01"} 1');
+      // Bucket 0.05: 2 observations (0.003, 0.05)
+      expect(output).toContain('request_duration_seconds_bucket{le="0.05"} 2');
+      // Bucket 0.1: 2 observations
+      expect(output).toContain('request_duration_seconds_bucket{le="0.1"} 2');
+      // Bucket 2.5: 3 observations (all)
+      expect(output).toContain('request_duration_seconds_bucket{le="2.5"} 3');
+      // +Inf = count = 3
+      expect(output).toContain('request_duration_seconds_bucket{le="+Inf"} 3');
+    });
+
+    it("has correct _sum and _count suffixes", () => {
+      registry.observeHistogram("request_duration_seconds", {}, 0.5);
+      registry.observeHistogram("request_duration_seconds", {}, 1.5);
+      const output = registry.serialize();
+      expect(output).toContain("request_duration_seconds_sum{} 2");
+      expect(output).toContain("request_duration_seconds_count{} 2");
+    });
+
+    it("tracks labels separately in histograms", () => {
+      registry.observeHistogram("req_dur", { method: "POST" }, 0.01);
+      registry.observeHistogram("req_dur", { method: "GET" }, 5.0);
+      const output = registry.serialize();
+      // POST: bucket le=0.01 should have 1
+      expect(output).toContain('req_dur_bucket{method="POST",le="0.01"} 1');
+      // POST: +Inf should have 1
+      expect(output).toContain('req_dur_bucket{method="POST",le="+Inf"} 1');
+      // GET: bucket le=0.01 should have 0
+      expect(output).toContain('req_dur_bucket{method="GET",le="0.01"} 0');
+      // GET: bucket le=5 should have 1
+      expect(output).toContain('req_dur_bucket{method="GET",le="5"} 1');
+      // GET: +Inf should have 1
+      expect(output).toContain('req_dur_bucket{method="GET",le="+Inf"} 1');
+    });
+  });
+
+  describe("Histogram edge: value > all buckets", () => {
+    it("28. only +Inf increments when value exceeds all bucket bounds", () => {
+      registry.observeHistogram("big_value_hist", {}, 100);
+      const output = registry.serialize();
+
+      // All finite buckets should have 0
+      for (const b of [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10]) {
+        expect(output).toContain(`big_value_hist_bucket{le="${b}"} 0`);
+      }
+      // Only +Inf should have 1
+      expect(output).toContain('big_value_hist_bucket{le="+Inf"} 1');
+      expect(output).toContain("big_value_hist_count{} 1");
+      expect(output).toContain("big_value_hist_sum{} 100");
+    });
+  });
+
+  describe("Empty registry serialization", () => {
+    it("29. returns empty string from fresh registry", () => {
+      const freshRegistry = createMetricsRegistry();
+      expect(freshRegistry.serialize()).toBe("");
+    });
+  });
+
+  describe("Type mismatch errors", () => {
+    it("throws when observing histogram on a counter name", () => {
+      registry.incrementCounter("foo", {});
+      expect(() => registry.observeHistogram("foo", {}, 0.5)).toThrow(
+        "Metric foo is not a histogram",
+      );
+    });
+
+    it("throws when incrementing counter on a histogram name", () => {
+      registry.observeHistogram("bar", {}, 0.5);
+      expect(() => registry.incrementCounter("bar", {})).toThrow("Metric bar is not a counter");
+    });
+  });
+
+  describe("Gauge type mismatch errors", () => {
+    it("throws when incrementing counter on a gauge name", () => {
+      registry.setGauge("x", {}, 1);
+      expect(() => registry.incrementCounter("x", {})).toThrow("Metric x is not a counter");
+    });
+
+    it("throws when observing histogram on a gauge name", () => {
+      registry.setGauge("y", {}, 1);
+      expect(() => registry.observeHistogram("y", {}, 0.5)).toThrow("Metric y is not a histogram");
+    });
+
+    it("throws when setting gauge on a counter name", () => {
+      registry.incrementCounter("z", {});
+      expect(() => registry.setGauge("z", {}, 1)).toThrow("Metric z is not a gauge");
+    });
+  });
+
+  describe("Histogram value exactly 0", () => {
+    it("observe 0, verify it lands in 0.005 bucket", () => {
+      registry.observeHistogram("zero_hist", {}, 0);
+      const output = registry.serialize();
+      // 0 <= 0.005, so the 0.005 bucket should have 1
+      expect(output).toContain('zero_hist_bucket{le="0.005"} 1');
+      expect(output).toContain('zero_hist_bucket{le="+Inf"} 1');
+      expect(output).toContain("zero_hist_sum{} 0");
+      expect(output).toContain("zero_hist_count{} 1");
+    });
+  });
+
+  describe("Histogram negative value", () => {
+    it("observe -1, verify it lands in ALL finite buckets (cumulative), +Inf/count/sum correct", () => {
+      registry.observeHistogram("neg_hist", {}, -1);
+      const output = registry.serialize();
+      // -1 <= every positive bucket boundary, so all finite buckets should have 1
+      for (const b of [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10]) {
+        expect(output).toContain(`neg_hist_bucket{le="${b}"} 1`);
+      }
+      expect(output).toContain('neg_hist_bucket{le="+Inf"} 1');
+      expect(output).toContain("neg_hist_count{} 1");
+      expect(output).toContain("neg_hist_sum{} -1");
+    });
+  });
+
+  describe("Counter with empty labels serialization format", () => {
+    it("serializes counter with empty labels as name{} value", () => {
+      registry.incrementCounter("empty_label_counter", {});
+      const output = registry.serialize();
+      expect(output).toContain("empty_label_counter{} 1");
+    });
+  });
+
+  describe("Label value escaping", () => {
+    it("escapes backslash, double-quote, and newline in label values", () => {
+      registry.incrementCounter("escaped_metric", { val: 'back\\slash "quoted" new\nline' });
+      const output = registry.serialize();
+      expect(output).toContain('val="back\\\\slash \\"quoted\\" new\\nline"');
+    });
+  });
+
+  describe("Label sort order stability", () => {
+    it("maps {b:2,a:1} and {a:1,b:2} to the same series", () => {
+      registry.incrementCounter("sorted_counter", { b: "2", a: "1" });
+      registry.incrementCounter("sorted_counter", { a: "1", b: "2" });
+      const output = registry.serialize();
+      // Should be one series with value 2, not two series with value 1
+      expect(output).toContain('sorted_counter{a="1",b="2"} 2');
+      // Should not contain a separate series with value 1
+      expect(output).not.toMatch(/sorted_counter\{[^}]*\} 1/);
+    });
+  });
+
+  describe("Gauge", () => {
+    it("sets and updates value", () => {
+      registry.setGauge("fixtures_loaded", {}, 5);
+      let output = registry.serialize();
+      expect(output).toContain("fixtures_loaded{} 5");
+
+      registry.setGauge("fixtures_loaded", {}, 10);
+      output = registry.serialize();
+      expect(output).toContain("fixtures_loaded{} 10");
+      // Old value should not be present
+      expect(output).not.toMatch(/fixtures_loaded\{\} 5/);
+    });
+  });
+
+  describe("serialize()", () => {
+    it("produces valid Prometheus text exposition format", () => {
+      registry.incrementCounter("my_counter", { env: "test" });
+      registry.setGauge("my_gauge", {}, 42);
+      const output = registry.serialize();
+
+      // Should contain TYPE lines
+      expect(output).toMatch(/^# TYPE my_counter counter$/m);
+      expect(output).toMatch(/^# TYPE my_gauge gauge$/m);
+      // Metric lines
+      expect(output).toContain('my_counter{env="test"} 1');
+      expect(output).toContain("my_gauge{} 42");
+    });
+  });
+
+  describe("reset()", () => {
+    it("clears all metrics", () => {
+      registry.incrementCounter("c", {});
+      registry.observeHistogram("h", {}, 0.5);
+      registry.setGauge("g", {}, 1);
+      registry.reset();
+      const output = registry.serialize();
+      expect(output).toBe("");
+    });
+  });
+
+  describe("histogram→gauge type mismatch", () => {
+    it("throws when setting gauge on a histogram name", () => {
+      registry.observeHistogram("x", {}, 0.5);
+      expect(() => registry.setGauge("x", {}, 1)).toThrow("Metric x is not a gauge");
+    });
+  });
+
+  describe("Gauge with non-empty labels", () => {
+    it("serializes gauge with labels correctly", () => {
+      registry.setGauge("g", { region: "us" }, 42);
+      const output = registry.serialize();
+      expect(output).toContain('g{region="us"} 42');
+    });
+  });
+
+  describe("Gauge multi-series", () => {
+    it("tracks multiple label combos independently", () => {
+      registry.setGauge("g", { region: "us" }, 10);
+      registry.setGauge("g", { region: "eu" }, 20);
+      const output = registry.serialize();
+      expect(output).toContain('g{region="us"} 10');
+      expect(output).toContain('g{region="eu"} 20');
+    });
+  });
+
+  describe("reset then re-accumulate", () => {
+    it("counter restarts from zero after reset", () => {
+      registry.incrementCounter("c", {});
+      registry.reset();
+      registry.incrementCounter("c", {});
+      const output = registry.serialize();
+      expect(output).toContain("c{} 1");
+      expect(output).not.toMatch(/c\{\} 2/);
+    });
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Unit tests: normalizePathLabel
+// ---------------------------------------------------------------------------
+
+describe("normalizePathLabel", () => {
+  it("normalizes Bedrock invoke path", () => {
+    expect(normalizePathLabel("/model/anthropic.claude-3-haiku/invoke")).toBe(
+      "/model/{modelId}/invoke",
+    );
+  });
+
+  it("normalizes Bedrock invoke-with-response-stream", () => {
+    expect(normalizePathLabel("/model/anthropic.claude-3-haiku/invoke-with-response-stream")).toBe(
+      "/model/{modelId}/invoke-with-response-stream",
+    );
+  });
+
+  it("normalizes Bedrock converse", () => {
+    expect(normalizePathLabel("/model/anthropic.claude-3-haiku/converse")).toBe(
+      "/model/{modelId}/converse",
+    );
+  });
+
+  it("normalizes Bedrock converse-stream", () => {
+    expect(normalizePathLabel("/model/anthropic.claude-3-haiku/converse-stream")).toBe(
+      "/model/{modelId}/converse-stream",
+    );
+  });
+
+  it("normalizes Gemini generateContent path", () => {
+    expect(normalizePathLabel("/v1beta/models/gemini-2.0-flash:generateContent")).toBe(
+      "/v1beta/models/{model}:generateContent",
+    );
+  });
+
+  it("normalizes Gemini streamGenerateContent path", () => {
+    expect(normalizePathLabel("/v1beta/models/gemini-2.0-flash:streamGenerateContent")).toBe(
+      "/v1beta/models/{model}:streamGenerateContent",
+    );
+  });
+
+  it("normalizes Azure deployment path", () => {
+    expect(normalizePathLabel("/openai/deployments/my-gpt4/chat/completions")).toBe(
+      "/openai/deployments/{id}/chat/completions",
+    );
+  });
+
+  it("normalizes Azure deployment embeddings path", () => {
+    expect(normalizePathLabel("/openai/deployments/my-gpt4/embeddings")).toBe(
+      "/openai/deployments/{id}/embeddings",
+    );
+  });
+
+  it("normalizes Vertex AI path", () => {
+    expect(
+      normalizePathLabel(
+        "/v1/projects/my-proj/locations/us-central1/publishers/google/models/gemini:generateContent",
+      ),
+    ).toBe("/v1/projects/{p}/locations/{l}/publishers/google/models/{m}:generateContent");
+  });
+
+  it("leaves static /api/chat unchanged", () => {
+    expect(normalizePathLabel("/api/chat")).toBe("/api/chat");
+  });
+
+  it("leaves static /v1/chat/completions unchanged", () => {
+    expect(normalizePathLabel("/v1/chat/completions")).toBe("/v1/chat/completions");
+  });
+
+  it("leaves static /v1/messages unchanged", () => {
+    expect(normalizePathLabel("/v1/messages")).toBe("/v1/messages");
+  });
+
+  it("leaves static /v1/embeddings unchanged", () => {
+    expect(normalizePathLabel("/v1/embeddings")).toBe("/v1/embeddings");
+  });
+
+  it("partial match: /model/foo/unknown-op returns as-is", () => {
+    expect(normalizePathLabel("/model/foo/unknown-op")).toBe("/model/foo/unknown-op");
+  });
+
+  it("empty string returns empty string", () => {
+    expect(normalizePathLabel("")).toBe("");
+  });
+
+  it("normalizes Vertex AI streamGenerateContent path", () => {
+    expect(
+      normalizePathLabel(
+        "/v1/projects/my-proj/locations/us-central1/publishers/google/models/gemini:streamGenerateContent",
+      ),
+    ).toBe("/v1/projects/{p}/locations/{l}/publishers/google/models/{m}:streamGenerateContent");
+  });
+});
+
+describe("MetricsRegistry: all three types serialized together", () => {
+  it("counter + histogram + gauge all appear in serialize output", () => {
+    const reg = createMetricsRegistry();
+    reg.incrementCounter("c_total", { env: "test" });
+    reg.observeHistogram("h_seconds", { op: "read" }, 0.05);
+    reg.setGauge("g_loaded", {}, 7);
+
+    const output = reg.serialize();
+    expect(output).toContain("# TYPE c_total counter");
+    expect(output).toContain('c_total{env="test"} 1');
+    expect(output).toContain("# TYPE h_seconds histogram");
+    expect(output).toContain('h_seconds_bucket{op="read",le="0.05"} 1');
+    expect(output).toContain("# TYPE g_loaded gauge");
+    expect(output).toContain("g_loaded{} 7");
+  });
+});
+
+describe("MetricsRegistry: status label in counter output", () => {
+  it("status label appears correctly in serialized counter", () => {
+    const reg = createMetricsRegistry();
+    reg.incrementCounter("llmock_requests_total", { status: "200", path: "/v1/chat/completions" });
+    reg.incrementCounter("llmock_requests_total", { status: "200", path: "/v1/chat/completions" });
+    reg.incrementCounter("llmock_requests_total", { status: "404", path: "/v1/chat/completions" });
+
+    const output = reg.serialize();
+    expect(output).toContain('llmock_requests_total{path="/v1/chat/completions",status="200"} 2');
+    expect(output).toContain('llmock_requests_total{path="/v1/chat/completions",status="404"} 1');
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Integration tests: /metrics endpoint through the server
+// ---------------------------------------------------------------------------
+
+let instance: ServerInstance | null = null;
+
+afterEach(async () => {
+  if (instance) {
+    await new Promise<void>((resolve) => instance!.server.close(() => resolve()));
+    instance = null;
+  }
+});
+
+describe("integration: /metrics endpoint", () => {
+  it("returns 404 when metrics disabled (default)", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "hello" },
+        response: { content: "hi" },
+      },
+    ];
+    instance = await createServer(fixtures);
+    const res = await httpGet(`${instance.url}/metrics`);
+    expect(res.status).toBe(404);
+  });
+
+  it("returns 200 with correct content-type when metrics enabled", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "hello" },
+        response: { content: "hi" },
+      },
+    ];
+    instance = await createServer(fixtures, { metrics: true });
+    const res = await httpGet(`${instance.url}/metrics`);
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("text/plain; version=0.0.4; charset=utf-8");
+  });
+
+  it("increments counters after sending requests", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "hello" },
+        response: { content: "hi" },
+      },
+    ];
+    instance = await createServer(fixtures, { metrics: true });
+
+    // Send two requests
+    await httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello"));
+    await httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello"));
+
+    const res = await httpGet(`${instance.url}/metrics`);
+    expect(res.body).toContain("llmock_requests_total");
+    // Should have count of 2 for the completions path
+    expect(res.body).toMatch(/llmock_requests_total\{[^}]*path="\/v1\/chat\/completions"[^}]*\} 2/);
+  });
+
+  it("records histogram bucket distribution after a request", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "hello" },
+        response: { content: "hi" },
+      },
+    ];
+    instance = await createServer(fixtures, { metrics: true });
+
+    await httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello"));
+
+    const res = await httpGet(`${instance.url}/metrics`);
+    // Should have histogram buckets
+    expect(res.body).toContain("llmock_request_duration_seconds_bucket");
+    expect(res.body).toContain("llmock_request_duration_seconds_count");
+    expect(res.body).toContain("llmock_request_duration_seconds_sum");
+    // +Inf bucket should equal count
+    const infMatch = res.body.match(
+      /llmock_request_duration_seconds_bucket\{[^}]*le="\+Inf"\} (\d+)/,
+    );
+    const countMatch = res.body.match(/llmock_request_duration_seconds_count\{[^}]*\} (\d+)/);
+    expect(infMatch).not.toBeNull();
+    expect(countMatch).not.toBeNull();
+    expect(infMatch![1]).toBe(countMatch![1]);
+  });
+
+  it("increments chaos counter when chaos triggers", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "hello" },
+        response: { content: "hi" },
+      },
+    ];
+    instance = await createServer(fixtures, {
+      metrics: true,
+      chaos: { dropRate: 1.0 }, // 100% drop
+    });
+
+    await httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello"));
+
+    const res = await httpGet(`${instance.url}/metrics`);
+    expect(res.body).toContain("llmock_chaos_triggered_total");
+    expect(res.body).toMatch(/llmock_chaos_triggered_total\{[^}]*action="drop"[^}]*\} 1/);
+  });
+
+  it("increments chaos counter on Anthropic /v1/messages endpoint", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "hello" },
+        response: { content: "hi from claude" },
+      },
+    ];
+    instance = await createServer(fixtures, {
+      metrics: true,
+      chaos: { dropRate: 1.0 },
+    });
+
+    await httpPost(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "hello" }],
+    });
+
+    const res = await httpGet(`${instance.url}/metrics`);
+    expect(res.body).toContain("llmock_chaos_triggered_total");
+    expect(res.body).toMatch(/llmock_chaos_triggered_total\{[^}]*action="drop"[^}]*\} 1/);
+  });
+
+  it("tracks fixtures loaded gauge", async () => {
+    const fixtures: Fixture[] = [
+      { match: { userMessage: "a" }, response: { content: "1" } },
+      { match: { userMessage: "b" }, response: { content: "2" } },
+    ];
+    instance = await createServer(fixtures, { metrics: true });
+    const res = await httpGet(`${instance.url}/metrics`);
+    expect(res.body).toContain("llmock_fixtures_loaded{} 2");
+  });
+
+  it("metrics endpoint remains responsive after normal requests", async () => {
+    // Baseline: verify normal request flow with metrics enabled continues to succeed.
+    // The res.on("finish") callback is wrapped in try-catch so that any exception
+    // thrown by registry operations is swallowed rather than propagated as an unhandled
+    // EventEmitter error that would crash the process.
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "hello" },
+        response: { content: "hi" },
+      },
+    ];
+    instance = await createServer(fixtures, { metrics: true });
+
+    const res = await httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello"));
+    expect(res.status).toBe(200);
+
+    // Server remains reachable and metrics endpoint still responds after the request
+    const metricsRes = await httpGet(`${instance.url}/metrics`);
+    expect(metricsRes.status).toBe(200);
+    expect(metricsRes.body).toContain("llmock_requests_total");
+  });
+
+  it("continues serving requests when metrics registry throws (try-catch guards EventEmitter crash)", async () => {
+    // Exercise the catch path in the res.on("finish") callback by making the registry's
+    // incrementCounter throw on the second call. The server must still respond 200 to the
+    // second request — the exception must be swallowed, not propagated.
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "hello" },
+        response: { content: "hi" },
+      },
+    ];
+
+    // Spy on createMetricsRegistry so we can inject a faulty registry.
+    const realRegistry = createMetricsRegistry();
+    let callCount = 0;
+    const faultyRegistry: MetricsRegistry = {
+      ...realRegistry,
+      incrementCounter(name, labels) {
+        callCount += 1;
+        if (callCount >= 2) {
+          throw new Error("simulated registry failure");
+        }
+        realRegistry.incrementCounter(name, labels);
+      },
+    };
+
+    const spy = vi
+      .spyOn(metricsModule, "createMetricsRegistry")
+      .mockReturnValueOnce(faultyRegistry);
+
+    instance = await createServer(fixtures, { metrics: true });
+    spy.mockRestore();
+
+    // First request: metrics work normally (callCount becomes 1, no throw)
+    const res1 = await httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello"));
+    expect(res1.status).toBe(200);
+
+    // Second request: incrementCounter throws (callCount becomes 2+). The server must
+    // still return 200 — proof that the catch block in res.on("finish") swallows the error.
+    const res2 = await httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello"));
+    expect(res2.status).toBe(200);
+  });
+});
diff --git a/src/__tests__/ollama.test.ts b/src/__tests__/ollama.test.ts
new file mode 100644
index 0000000..1a5a217
--- /dev/null
+++ b/src/__tests__/ollama.test.ts
@@ -0,0 +1,1114 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import type { Fixture } from "../types.js";
+import { createServer, type ServerInstance } from "../server.js";
+import { ollamaToCompletionRequest } from "../ollama.js";
+import { writeNDJSONStream } from "../ndjson-writer.js";
+
+// --- helpers ---
+
+function post(
+  url: string,
+  body: unknown,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const data = JSON.stringify(body);
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(data),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(data);
+    req.end();
+  });
+}
+
+function get(
+  url: string,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "GET",
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.end();
+  });
+}
+
+function postRaw(url: string, raw: string): Promise<{ status: number; body: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(raw),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(raw);
+    req.end();
+  });
+}
+
+function parseNDJSON(body: string): object[] {
+  return body
+    .split("\n")
+    .filter((line) => line.trim() !== "")
+    .map((line) => JSON.parse(line) as object);
+}
+
+// --- fixtures ---
+
+const textFixture: Fixture = {
+  match: { userMessage: "hello" },
+  response: { content: "Hi there!" },
+};
+
+const toolFixture: Fixture = {
+  match: { userMessage: "weather" },
+  response: {
+    toolCalls: [
+      {
+        name: "get_weather",
+        arguments: '{"city":"NYC"}',
+      },
+    ],
+  },
+};
+
+const modelFixture: Fixture = {
+  match: { model: "llama3", userMessage: "greet" },
+  response: { content: "Hello from Ollama!" },
+};
+
+const errorFixture: Fixture = {
+  match: { userMessage: "fail" },
+  response: {
+    error: {
+      message: "Rate limited",
+      type: "rate_limit_error",
+    },
+    status: 429,
+  },
+};
+
+const allFixtures: Fixture[] = [textFixture, toolFixture, modelFixture, errorFixture];
+
+// --- tests ---
+
+let instance: ServerInstance | null = null;
+
+afterEach(async () => {
+  if (instance) {
+    await new Promise<void>((resolve) => {
+      instance!.server.close(() => resolve());
+    });
+    instance = null;
+  }
+});
+
+// ─── Unit tests: ollamaToCompletionRequest ──────────────────────────────────
+
+describe("ollamaToCompletionRequest", () => {
+  it("converts basic chat request", () => {
+    const result = ollamaToCompletionRequest({
+      model: "llama3",
+      messages: [{ role: "user", content: "hello" }],
+    });
+    expect(result.model).toBe("llama3");
+    expect(result.messages).toEqual([{ role: "user", content: "hello" }]);
+  });
+
+  it("passes through stream field", () => {
+    const result = ollamaToCompletionRequest({
+      model: "llama3",
+      messages: [{ role: "user", content: "hi" }],
+      stream: false,
+    });
+    expect(result.stream).toBe(false);
+  });
+
+  it("converts options to temperature and max_tokens", () => {
+    const result = ollamaToCompletionRequest({
+      model: "llama3",
+      messages: [{ role: "user", content: "hi" }],
+      options: { temperature: 0.7, num_predict: 100 },
+    });
+    expect(result.temperature).toBe(0.7);
+    expect(result.max_tokens).toBe(100);
+  });
+
+  it("converts tools", () => {
+    const result = ollamaToCompletionRequest({
+      model: "llama3",
+      messages: [{ role: "user", content: "hi" }],
+      tools: [
+        {
+          type: "function",
+          function: {
+            name: "get_weather",
+            description: "Get weather",
+            parameters: { type: "object", properties: { city: { type: "string" } } },
+          },
+        },
+      ],
+    });
+    expect(result.tools).toHaveLength(1);
+    expect(result.tools![0]).toEqual({
+      type: "function",
+      function: {
+        name: "get_weather",
+        description: "Get weather",
+        parameters: { type: "object", properties: { city: { type: "string" } } },
+      },
+    });
+  });
+
+  it("returns undefined tools when none provided", () => {
+    const result = ollamaToCompletionRequest({
+      model: "llama3",
+      messages: [{ role: "user", content: "hi" }],
+    });
+    expect(result.tools).toBeUndefined();
+  });
+});
+
+// ─── Unit tests: NDJSON writer ──────────────────────────────────────────────
+
+describe("writeNDJSONStream", () => {
+  it("writes correct NDJSON format", async () => {
+    const chunks: string[] = [];
+    const res = {
+      writableEnded: false,
+      setHeader: () => {},
+      write: (data: string) => {
+        chunks.push(data);
+        return true;
+      },
+      end: () => {
+        (res as { writableEnded: boolean }).writableEnded = true;
+      },
+    } as unknown as http.ServerResponse;
+
+    const data = [
+      { model: "llama3", done: false },
+      { model: "llama3", done: true },
+    ];
+    const completed = await writeNDJSONStream(res, data);
+
+    expect(completed).toBe(true);
+    expect(chunks).toHaveLength(2);
+    expect(chunks[0]).toBe('{"model":"llama3","done":false}\n');
+    expect(chunks[1]).toBe('{"model":"llama3","done":true}\n');
+  });
+
+  it("respects abort signal for interruption", async () => {
+    const chunks: string[] = [];
+    const controller = new AbortController();
+    const res = {
+      writableEnded: false,
+      setHeader: () => {},
+      write: (data: string) => {
+        chunks.push(data);
+        // Abort after first chunk
+        controller.abort();
+        return true;
+      },
+      end: () => {
+        (res as { writableEnded: boolean }).writableEnded = true;
+      },
+    } as unknown as http.ServerResponse;
+
+    const data = [
+      { model: "llama3", done: false },
+      { model: "llama3", done: false },
+      { model: "llama3", done: true },
+    ];
+    const completed = await writeNDJSONStream(res, data, { signal: controller.signal });
+
+    expect(completed).toBe(false);
+    expect(chunks).toHaveLength(1);
+  });
+
+  it("applies streaming profile latency", async () => {
+    const chunks: string[] = [];
+    const res = {
+      writableEnded: false,
+      setHeader: () => {},
+      write: (data: string) => {
+        chunks.push(data);
+        return true;
+      },
+      end: () => {
+        (res as { writableEnded: boolean }).writableEnded = true;
+      },
+    } as unknown as http.ServerResponse;
+
+    const data = [{ done: false }, { done: true }];
+    const start = Date.now();
+    await writeNDJSONStream(res, data, {
+      streamingProfile: { ttft: 50, tps: 100, jitter: 0 },
+    });
+    const elapsed = Date.now() - start;
+
+    // Should have at least some delay from the streaming profile
+    expect(elapsed).toBeGreaterThanOrEqual(40); // ttft ~50ms + 1/100 tps ~10ms
+    expect(chunks).toHaveLength(2);
+  });
+});
+
+// ─── Integration tests: POST /api/chat (non-streaming) ─────────────────────
+
+describe("POST /api/chat (non-streaming)", () => {
+  it("returns text response with all final fields", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "hello" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/json");
+
+    const body = JSON.parse(res.body);
+    expect(body.model).toBe("llama3");
+    expect(body.message.role).toBe("assistant");
+    expect(body.message.content).toBe("Hi there!");
+    expect(body.done).toBe(true);
+    expect(body.done_reason).toBe("stop");
+    expect(body.total_duration).toBe(0);
+    expect(body.load_duration).toBe(0);
+    expect(body.prompt_eval_count).toBe(0);
+    expect(body.prompt_eval_duration).toBe(0);
+    expect(body.eval_count).toBe(0);
+    expect(body.eval_duration).toBe(0);
+  });
+
+  it("returns tool call with arguments as object and no id", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "weather" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.done).toBe(true);
+    expect(body.message.tool_calls).toHaveLength(1);
+    expect(body.message.tool_calls[0].function.name).toBe("get_weather");
+    // Arguments must be an OBJECT, not a JSON string
+    expect(body.message.tool_calls[0].function.arguments).toEqual({ city: "NYC" });
+    // No id field on tool calls
+    expect(body.message.tool_calls[0].id).toBeUndefined();
+  });
+});
+
+// ─── Integration tests: POST /api/chat (streaming) ──────────────────────────
+
+describe("POST /api/chat (streaming)", () => {
+  it("streams NDJSON when stream is absent (default streaming)", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "hello" }],
+      // stream field intentionally omitted — Ollama defaults to true
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/x-ndjson");
+
+    const chunks = parseNDJSON(res.body);
+    expect(chunks.length).toBeGreaterThan(1);
+
+    // All non-final chunks should have done: false
+    const nonFinal = chunks.slice(0, -1);
+    for (const chunk of nonFinal) {
+      expect((chunk as { done: boolean }).done).toBe(false);
+    }
+
+    // Final chunk should have done: true and all duration fields
+    const final = chunks[chunks.length - 1] as Record<string, unknown>;
+    expect(final.done).toBe(true);
+    expect(final.done_reason).toBe("stop");
+    expect(final.total_duration).toBe(0);
+    expect(final.load_duration).toBe(0);
+    expect(final.prompt_eval_count).toBe(0);
+    expect(final.prompt_eval_duration).toBe(0);
+    expect(final.eval_count).toBe(0);
+    expect(final.eval_duration).toBe(0);
+  });
+
+  it("streams NDJSON when stream is explicitly true", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "hello" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/x-ndjson");
+
+    const chunks = parseNDJSON(res.body);
+    expect(chunks.length).toBeGreaterThan(1);
+  });
+
+  it("reconstructs full text from streaming chunks", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "hello" }],
+      stream: true,
+    });
+
+    const chunks = parseNDJSON(res.body) as Array<{
+      message: { content: string };
+      done: boolean;
+    }>;
+    const fullText = chunks
+      .filter((c) => !c.done)
+      .map((c) => c.message.content)
+      .join("");
+    expect(fullText).toBe("Hi there!");
+  });
+
+  it("streams tool call with arguments as object", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "weather" }],
+      stream: true,
+    });
+
+    const chunks = parseNDJSON(res.body) as Array<{
+      message: { tool_calls?: Array<{ function: { name: string; arguments: unknown } }> };
+      done: boolean;
+    }>;
+    const toolChunk = chunks.find((c) => c.message.tool_calls && c.message.tool_calls.length > 0);
+    expect(toolChunk).toBeDefined();
+    expect(toolChunk!.message.tool_calls![0].function.name).toBe("get_weather");
+    expect(toolChunk!.message.tool_calls![0].function.arguments).toEqual({ city: "NYC" });
+  });
+
+  it("uses fixture chunkSize for text streaming", async () => {
+    const bigChunkFixture: Fixture = {
+      match: { userMessage: "bigchunk" },
+      response: { content: "ABCDEFGHIJ" },
+      chunkSize: 5,
+    };
+    instance = await createServer([bigChunkFixture], { chunkSize: 2 });
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "bigchunk" }],
+      stream: true,
+    });
+
+    const chunks = parseNDJSON(res.body) as Array<{
+      message: { content: string };
+      done: boolean;
+    }>;
+    // 10 chars / chunkSize 5 = 2 content chunks + 1 final = 3 total
+    expect(chunks).toHaveLength(3);
+    expect(chunks[0].message.content).toBe("ABCDE");
+    expect(chunks[1].message.content).toBe("FGHIJ");
+    expect(chunks[2].done).toBe(true);
+  });
+});
+
+// ─── Integration tests: POST /api/chat (streaming profile) ─────────────────
+
+describe("POST /api/chat (streaming profile)", () => {
+  it("applies streaming profile latency", async () => {
+    const slowFixture: Fixture = {
+      match: { userMessage: "slow" },
+      response: { content: "AB" },
+      chunkSize: 1,
+      streamingProfile: { ttft: 50, tps: 20, jitter: 0 },
+    };
+    instance = await createServer([slowFixture]);
+
+    const start = Date.now();
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "slow" }],
+      stream: true,
+    });
+    const elapsed = Date.now() - start;
+
+    expect(res.status).toBe(200);
+    // Should have noticeable delay: ttft 50ms + at least 2 chunks at 20tps (50ms each) + final
+    expect(elapsed).toBeGreaterThanOrEqual(80);
+  });
+});
+
+// ─── Integration tests: POST /api/chat (interruption) ───────────────────────
+
+describe("POST /api/chat (interruption)", () => {
+  it("truncates after specified number of chunks", async () => {
+    const truncFixture: Fixture = {
+      match: { userMessage: "truncate" },
+      response: { content: "ABCDEFGHIJ" },
+      chunkSize: 1,
+      truncateAfterChunks: 3,
+    };
+    instance = await createServer([truncFixture]);
+
+    // Use a custom request that tolerates abrupt socket close
+    const res = await new Promise<{ aborted: boolean; body: string }>((resolve) => {
+      const data = JSON.stringify({
+        model: "llama3",
+        messages: [{ role: "user", content: "truncate" }],
+        stream: true,
+      });
+      const parsed = new URL(`${instance!.url}/api/chat`);
+      const chunks: Buffer[] = [];
+      const req = http.request(
+        {
+          hostname: parsed.hostname,
+          port: parsed.port,
+          path: parsed.pathname,
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            "Content-Length": Buffer.byteLength(data),
+          },
+        },
+        (res) => {
+          res.on("data", (c: Buffer) => chunks.push(c));
+          res.on("end", () => {
+            resolve({ aborted: false, body: Buffer.concat(chunks).toString() });
+          });
+          res.on("aborted", () => {
+            resolve({ aborted: true, body: Buffer.concat(chunks).toString() });
+          });
+        },
+      );
+      req.on("error", () => {
+        resolve({ aborted: true, body: Buffer.concat(chunks).toString() });
+      });
+      req.write(data);
+      req.end();
+    });
+
+    // Stream was truncated — res.destroy() causes abrupt close
+    expect(res.aborted).toBe(true);
+
+    // Journal should record interruption
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+});
+
+// ─── Integration tests: POST /api/chat (chaos) ─────────────────────────────
+
+describe("POST /api/chat (chaos)", () => {
+  it("drops request when chaos drop header is set to 1.0", async () => {
+    instance = await createServer(allFixtures);
+    const res = await new Promise<{ status: number; body: string }>((resolve, reject) => {
+      const data = JSON.stringify({
+        model: "llama3",
+        messages: [{ role: "user", content: "hello" }],
+        stream: false,
+      });
+      const parsed = new URL(`${instance!.url}/api/chat`);
+      const req = http.request(
+        {
+          hostname: parsed.hostname,
+          port: parsed.port,
+          path: parsed.pathname,
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            "Content-Length": Buffer.byteLength(data),
+            "x-llmock-chaos-drop": "1.0",
+          },
+        },
+        (res) => {
+          const chunks: Buffer[] = [];
+          res.on("data", (c: Buffer) => chunks.push(c));
+          res.on("end", () => {
+            resolve({
+              status: res.statusCode ?? 0,
+              body: Buffer.concat(chunks).toString(),
+            });
+          });
+        },
+      );
+      req.on("error", reject);
+      req.write(data);
+      req.end();
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.code).toBe("chaos_drop");
+  });
+});
+
+// ─── Integration tests: POST /api/chat (error handling) ─────────────────────
+
+describe("POST /api/chat (error handling)", () => {
+  it("returns error fixture with correct status", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "fail" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Rate limited");
+  });
+
+  it("returns 404 when no fixture matches", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "nomatch" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(404);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("No fixture matched");
+  });
+
+  it("returns 400 when messages array is missing from /api/chat", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      stream: false,
+    });
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Invalid request: messages array is required");
+  });
+
+  it("returns 400 when prompt is missing from /api/generate", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/generate`, {
+      model: "llama3",
+      stream: false,
+    });
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Invalid request: prompt field is required");
+  });
+
+  it("returns 400 for malformed JSON", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postRaw(`${instance.url}/api/chat`, "{not valid");
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Malformed JSON");
+  });
+});
+
+// ─── Integration tests: POST /api/generate (non-streaming) ─────────────────
+
+describe("POST /api/generate (non-streaming)", () => {
+  it("returns text in response field (not message)", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/generate`, {
+      model: "llama3",
+      prompt: "hello",
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/json");
+
+    const body = JSON.parse(res.body);
+    expect(body.model).toBe("llama3");
+    expect(body.response).toBe("Hi there!");
+    expect(body.done).toBe(true);
+    expect(body.done_reason).toBe("stop");
+    expect(body.context).toEqual([]);
+    expect(body.created_at).toBeDefined();
+    // Should NOT have message field
+    expect(body.message).toBeUndefined();
+  });
+});
+
+// ─── Integration tests: POST /api/generate (error/chaos/strict/no-match) ────
+
+describe("POST /api/generate (error fixture)", () => {
+  it("19a. returns error fixture through /api/generate", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/generate`, {
+      model: "llama3",
+      prompt: "fail",
+      stream: false,
+    });
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Rate limited");
+  });
+});
+
+describe("POST /api/generate (chaos)", () => {
+  it("19b. drops request with chaos-drop header", async () => {
+    instance = await createServer(allFixtures);
+    const res = await new Promise<{ status: number; body: string }>((resolve, reject) => {
+      const data = JSON.stringify({
+        model: "llama3",
+        prompt: "hello",
+        stream: false,
+      });
+      const parsed = new URL(`${instance!.url}/api/generate`);
+      const req = http.request(
+        {
+          hostname: parsed.hostname,
+          port: parsed.port,
+          path: parsed.pathname,
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            "Content-Length": Buffer.byteLength(data),
+            "x-llmock-chaos-drop": "1.0",
+          },
+        },
+        (res) => {
+          const chunks: Buffer[] = [];
+          res.on("data", (c: Buffer) => chunks.push(c));
+          res.on("end", () => {
+            resolve({
+              status: res.statusCode ?? 0,
+              body: Buffer.concat(chunks).toString(),
+            });
+          });
+        },
+      );
+      req.on("error", reject);
+      req.write(data);
+      req.end();
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.code).toBe("chaos_drop");
+  });
+});
+
+describe("POST /api/generate (strict mode)", () => {
+  it("19c. returns 503 in strict mode with no fixtures", async () => {
+    instance = await createServer([], { strict: true });
+    const res = await post(`${instance.url}/api/generate`, {
+      model: "llama3",
+      prompt: "hello",
+      stream: false,
+    });
+
+    expect(res.status).toBe(503);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("no fixture matched");
+  });
+});
+
+describe("POST /api/generate (no fixture match)", () => {
+  it("19d. returns 404 when no fixture matches", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/generate`, {
+      model: "llama3",
+      prompt: "nomatch_xyz",
+      stream: false,
+    });
+
+    expect(res.status).toBe(404);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("No fixture matched");
+  });
+});
+
+// ─── Integration tests: POST /api/generate (streaming) ──────────────────────
+
+describe("POST /api/generate (streaming)", () => {
+  it("streams NDJSON with response field", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/generate`, {
+      model: "llama3",
+      prompt: "hello",
+      // stream omitted — defaults to true
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/x-ndjson");
+
+    const chunks = parseNDJSON(res.body) as Array<{
+      model: string;
+      created_at: string;
+      response: string;
+      done: boolean;
+    }>;
+    expect(chunks.length).toBeGreaterThan(1);
+
+    // Non-final chunks use response field
+    const nonFinal = chunks.slice(0, -1);
+    for (const chunk of nonFinal) {
+      expect(chunk.response).toBeDefined();
+      expect(chunk.done).toBe(false);
+      expect(chunk.created_at).toBeDefined();
+      // Should NOT have message field
+      expect((chunk as Record<string, unknown>).message).toBeUndefined();
+    }
+
+    // Reconstruct text
+    const fullText = nonFinal.map((c) => c.response).join("");
+    expect(fullText).toBe("Hi there!");
+
+    // Final chunk
+    const final = chunks[chunks.length - 1] as Record<string, unknown>;
+    expect(final.done).toBe(true);
+    expect(final.response).toBe("");
+    expect(final.done_reason).toBe("stop");
+    expect(final.context).toEqual([]);
+  });
+
+  it("defaults to streaming when stream field is absent", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/generate`, {
+      model: "llama3",
+      prompt: "hello",
+    });
+
+    expect(res.headers["content-type"]).toBe("application/x-ndjson");
+  });
+});
+
+// ─── Integration tests: GET /api/tags ───────────────────────────────────────
+
+describe("GET /api/tags", () => {
+  it("returns model list from fixtures", async () => {
+    instance = await createServer(allFixtures);
+    const res = await get(`${instance.url}/api/tags`);
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/json");
+
+    const body = JSON.parse(res.body);
+    expect(body.models).toBeDefined();
+    expect(Array.isArray(body.models)).toBe(true);
+    // modelFixture has model: "llama3", so it should appear
+    const names = body.models.map((m: { name: string }) => m.name);
+    expect(names).toContain("llama3");
+  });
+
+  it("returns default models when no fixture has model match", async () => {
+    const noModelFixtures: Fixture[] = [
+      { match: { userMessage: "hi" }, response: { content: "hello" } },
+    ];
+    instance = await createServer(noModelFixtures);
+    const res = await get(`${instance.url}/api/tags`);
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.models.length).toBeGreaterThan(0);
+    // Default models should include standard ones
+    const names = body.models.map((m: { name: string }) => m.name);
+    expect(names).toContain("gpt-4");
+  });
+});
+
+// ─── Integration tests: journal ─────────────────────────────────────────────
+
+describe("POST /api/chat (journal)", () => {
+  it("records request in the journal", async () => {
+    instance = await createServer(allFixtures);
+    await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "hello" }],
+      stream: false,
+    });
+
+    expect(instance.journal.size).toBe(1);
+    const entry = instance.journal.getLast();
+    expect(entry!.path).toBe("/api/chat");
+    expect(entry!.response.status).toBe(200);
+    expect(entry!.response.fixture).toBe(textFixture);
+    expect(entry!.body.model).toBe("llama3");
+  });
+});
+
+describe("POST /api/generate (journal)", () => {
+  it("records request in the journal", async () => {
+    instance = await createServer(allFixtures);
+    await post(`${instance.url}/api/generate`, {
+      model: "llama3",
+      prompt: "hello",
+      stream: false,
+    });
+
+    expect(instance.journal.size).toBe(1);
+    const entry = instance.journal.getLast();
+    expect(entry!.path).toBe("/api/generate");
+    expect(entry!.response.status).toBe(200);
+  });
+});
+
+// ─── Integration tests: malformed tool call arguments ───────────────────────
+
+describe("POST /api/chat (malformed tool call arguments)", () => {
+  it("falls back to empty object when arguments is not valid JSON", async () => {
+    const badArgsFixture: Fixture = {
+      match: { userMessage: "bad-args" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: "NOT VALID JSON" }],
+      },
+    };
+    instance = await createServer([badArgsFixture]);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "bad-args" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.message.tool_calls).toHaveLength(1);
+    expect(body.message.tool_calls[0].function.name).toBe("fn");
+    // Malformed JSON falls back to empty object
+    expect(body.message.tool_calls[0].function.arguments).toEqual({});
+  });
+});
+
+// ─── Integration tests: tool call on /api/generate → 500 ───────────────────
+
+describe("POST /api/generate (tool call fixture)", () => {
+  it("returns 500 'unknown type' for tool call fixtures on /api/generate", async () => {
+    const tcFixture: Fixture = {
+      match: { userMessage: "tool-gen" },
+      response: {
+        toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+      },
+    };
+    instance = await createServer([tcFixture]);
+    const res = await post(`${instance.url}/api/generate`, {
+      model: "llama3",
+      prompt: "tool-gen",
+      stream: false,
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("did not match any known type");
+  });
+});
+
+// ─── Integration tests: CORS ────────────────────────────────────────────────
+
+describe("POST /api/chat (CORS)", () => {
+  it("includes CORS headers", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "hello" }],
+      stream: false,
+    });
+
+    expect(res.headers["access-control-allow-origin"]).toBe("*");
+  });
+});
+
+// ─── Integration tests: strict mode → 503 ──────────────────────────────────
+
+describe("POST /api/chat (strict mode)", () => {
+  it("returns 503 in strict mode with no matching fixture", async () => {
+    instance = await createServer([], { strict: true });
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "hello" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(503);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("no fixture matched");
+  });
+});
+
+// ─── Integration tests: multiple tool calls ─────────────────────────────────
+
+describe("POST /api/chat (multiple tool calls)", () => {
+  it("returns 2 tool calls in a single non-streaming response", async () => {
+    const multiToolFixture: Fixture = {
+      match: { userMessage: "multi-tool" },
+      response: {
+        toolCalls: [
+          { name: "get_weather", arguments: '{"city":"NYC"}' },
+          { name: "get_time", arguments: '{"tz":"EST"}' },
+        ],
+      },
+    };
+    instance = await createServer([multiToolFixture]);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "multi-tool" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.message.tool_calls).toHaveLength(2);
+    expect(body.message.tool_calls[0].function.name).toBe("get_weather");
+    expect(body.message.tool_calls[0].function.arguments).toEqual({ city: "NYC" });
+    expect(body.message.tool_calls[1].function.name).toBe("get_time");
+    expect(body.message.tool_calls[1].function.arguments).toEqual({ tz: "EST" });
+  });
+});
+
+// ─── Integration tests: error fixture with no explicit status ───────────────
+
+describe("POST /api/chat (error fixture no explicit status)", () => {
+  it("defaults to 500 when error fixture has no status", async () => {
+    const noStatusError: Fixture = {
+      match: { userMessage: "err-no-status" },
+      response: {
+        error: {
+          message: "Something went wrong",
+          type: "server_error",
+        },
+      },
+    };
+    instance = await createServer([noStatusError]);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "err-no-status" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Something went wrong");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// writeNDJSONStream with non-zero latency
+// ---------------------------------------------------------------------------
+
+describe("writeNDJSONStream with non-zero latency", () => {
+  it("delays between chunks when latency is set", async () => {
+    const chunks: string[] = [];
+    const timestamps: number[] = [];
+    const res = {
+      writableEnded: false,
+      setHeader: () => {},
+      write: (data: string) => {
+        chunks.push(data);
+        timestamps.push(Date.now());
+        return true;
+      },
+      end: () => {
+        (res as { writableEnded: boolean }).writableEnded = true;
+      },
+    } as unknown as http.ServerResponse;
+
+    const data = [
+      { model: "llama3", message: { content: "Hello" }, done: false },
+      { model: "llama3", message: { content: " world" }, done: false },
+      { model: "llama3", message: { content: "" }, done: true },
+    ];
+
+    const start = Date.now();
+    const completed = await writeNDJSONStream(res, data, { latency: 30 });
+    const elapsed = Date.now() - start;
+
+    expect(completed).toBe(true);
+    expect(chunks).toHaveLength(3);
+    // With 30ms latency per chunk and 3 chunks, total should be >= 60ms
+    // (first chunk has 0 delay with default profile, subsequent chunks have latency)
+    expect(elapsed).toBeGreaterThanOrEqual(50);
+  });
+
+  it("all chunks are valid NDJSON with non-zero latency", async () => {
+    const chunks: string[] = [];
+    const res = {
+      writableEnded: false,
+      setHeader: () => {},
+      write: (data: string) => {
+        chunks.push(data);
+        return true;
+      },
+      end: () => {
+        (res as { writableEnded: boolean }).writableEnded = true;
+      },
+    } as unknown as http.ServerResponse;
+
+    const data = [
+      { model: "llama3", done: false, message: { content: "a" } },
+      { model: "llama3", done: true, message: { content: "" } },
+    ];
+
+    const completed = await writeNDJSONStream(res, data, { latency: 10 });
+
+    expect(completed).toBe(true);
+    expect(chunks).toHaveLength(2);
+    // Each chunk should be valid JSON followed by newline
+    for (const chunk of chunks) {
+      expect(chunk.endsWith("\n")).toBe(true);
+      expect(() => JSON.parse(chunk.trim())).not.toThrow();
+    }
+  });
+});
diff --git a/src/__tests__/provider-compat.test.ts b/src/__tests__/provider-compat.test.ts
new file mode 100644
index 0000000..4811f28
--- /dev/null
+++ b/src/__tests__/provider-compat.test.ts
@@ -0,0 +1,228 @@
+import { describe, it, expect, afterEach } from "vitest";
+import { createServer, type ServerInstance } from "../server.js";
+import type { Fixture } from "../types.js";
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+async function httpPost(
+  url: string,
+  body: object,
+  headers: Record<string, string> = {},
+): Promise<{ status: number; body: string }> {
+  const res = await fetch(url, {
+    method: "POST",
+    headers: { "Content-Type": "application/json", ...headers },
+    body: JSON.stringify(body),
+  });
+  return { status: res.status, body: await res.text() };
+}
+
+async function httpGet(url: string): Promise<{ status: number; body: string }> {
+  const res = await fetch(url);
+  return { status: res.status, body: await res.text() };
+}
+
+// ---------------------------------------------------------------------------
+// Shared fixtures — catch-all that responds to any model
+// ---------------------------------------------------------------------------
+
+const CATCH_ALL_FIXTURES: Fixture[] = [
+  {
+    match: { userMessage: "hello" },
+    response: { content: "Hello from llmock!" },
+  },
+];
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+let instance: ServerInstance | null = null;
+
+afterEach(async () => {
+  if (instance) {
+    await new Promise<void>((resolve) => instance!.server.close(() => resolve()));
+    instance = null;
+  }
+});
+
+describe("Mistral compatibility", () => {
+  // Mistral uses standard /v1/chat/completions with model names like "mistral-large-latest"
+  it("handles Mistral-style request via /v1/chat/completions", async () => {
+    instance = await createServer(CATCH_ALL_FIXTURES);
+
+    const { status, body } = await httpPost(
+      `${instance.url}/v1/chat/completions`,
+      {
+        model: "mistral-large-latest",
+        stream: false,
+        messages: [{ role: "user", content: "hello" }],
+      },
+      { Authorization: "Bearer mock-mistral-key" },
+    );
+
+    expect(status).toBe(200);
+    const parsed = JSON.parse(body);
+    expect(parsed.choices).toBeDefined();
+    expect(parsed.choices[0].message.content).toBe("Hello from llmock!");
+    expect(parsed.object).toBe("chat.completion");
+  });
+});
+
+describe("Groq streaming compatibility", () => {
+  it("Groq streaming through /openai/v1/chat/completions", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "stream-groq" },
+        response: { content: "Groq streamed!" },
+      },
+    ];
+    instance = await createServer(fixtures);
+
+    const { status, body } = await httpPost(
+      `${instance.url}/openai/v1/chat/completions`,
+      {
+        model: "llama-3.3-70b-versatile",
+        stream: true,
+        messages: [{ role: "user", content: "stream-groq" }],
+      },
+      { Authorization: "Bearer mock-groq-key" },
+    );
+
+    expect(status).toBe(200);
+
+    // Parse SSE events
+    const events: unknown[] = [];
+    for (const line of body.split("\n")) {
+      if (line.startsWith("data: ") && line !== "data: [DONE]") {
+        events.push(JSON.parse(line.slice(6)));
+      }
+    }
+
+    expect(events.length).toBeGreaterThanOrEqual(3);
+
+    // All chunks should have chat.completion.chunk object type
+    for (const event of events) {
+      const ev = event as { object: string };
+      expect(ev.object).toBe("chat.completion.chunk");
+    }
+
+    // Content should be present across the chunks
+    const contentParts = events
+      .map((e) => (e as { choices: [{ delta: { content?: string } }] }).choices[0].delta.content)
+      .filter(Boolean);
+    expect(contentParts.join("")).toBe("Groq streamed!");
+
+    // Body ends with [DONE]
+    expect(body).toContain("data: [DONE]");
+  });
+});
+
+describe("Groq compatibility", () => {
+  // Groq uses /openai/v1/chat/completions prefix
+  it("handles Groq-style request via /openai/v1/chat/completions prefix", async () => {
+    instance = await createServer(CATCH_ALL_FIXTURES);
+
+    const { status, body } = await httpPost(
+      `${instance.url}/openai/v1/chat/completions`,
+      {
+        model: "llama-3.3-70b-versatile",
+        stream: false,
+        messages: [{ role: "user", content: "hello" }],
+      },
+      { Authorization: "Bearer mock-groq-key" },
+    );
+
+    expect(status).toBe(200);
+    const parsed = JSON.parse(body);
+    expect(parsed.choices).toBeDefined();
+    expect(parsed.choices[0].message.content).toBe("Hello from llmock!");
+    expect(parsed.object).toBe("chat.completion");
+  });
+
+  it("handles Groq-style /openai/v1/models request", async () => {
+    instance = await createServer(CATCH_ALL_FIXTURES);
+
+    const { status, body } = await httpGet(`${instance.url}/openai/v1/models`);
+
+    expect(status).toBe(200);
+    const parsed = JSON.parse(body);
+    expect(parsed.object).toBe("list");
+    expect(parsed.data).toBeInstanceOf(Array);
+  });
+
+  it("handles Groq-style /openai/v1/embeddings request", async () => {
+    instance = await createServer(CATCH_ALL_FIXTURES);
+
+    const { status, body } = await httpPost(`${instance.url}/openai/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "test embedding via groq prefix",
+    });
+
+    expect(status).toBe(200);
+    const parsed = JSON.parse(body);
+    expect(parsed.object).toBe("list");
+    expect(parsed.data[0].embedding).toBeInstanceOf(Array);
+  });
+});
+
+describe("Ollama compatibility", () => {
+  // Ollama uses standard /v1/chat/completions with local model names like "llama3.2"
+  it("handles Ollama-style request via /v1/chat/completions", async () => {
+    instance = await createServer(CATCH_ALL_FIXTURES);
+
+    const { status, body } = await httpPost(`${instance.url}/v1/chat/completions`, {
+      model: "llama3.2",
+      stream: false,
+      messages: [{ role: "user", content: "hello" }],
+    });
+
+    expect(status).toBe(200);
+    const parsed = JSON.parse(body);
+    expect(parsed.choices).toBeDefined();
+    expect(parsed.choices[0].message.content).toBe("Hello from llmock!");
+    expect(parsed.object).toBe("chat.completion");
+  });
+});
+
+describe("Together AI compatibility", () => {
+  // Together AI uses standard /v1/chat/completions with model names like "meta-llama/Llama-3-70b-chat-hf"
+  it("handles Together AI-style request via /v1/chat/completions", async () => {
+    instance = await createServer(CATCH_ALL_FIXTURES);
+
+    const { status, body } = await httpPost(
+      `${instance.url}/v1/chat/completions`,
+      {
+        model: "meta-llama/Llama-3-70b-chat-hf",
+        stream: false,
+        messages: [{ role: "user", content: "hello" }],
+      },
+      { Authorization: "Bearer mock-together-key" },
+    );
+
+    expect(status).toBe(200);
+    const parsed = JSON.parse(body);
+    expect(parsed.choices).toBeDefined();
+    expect(parsed.choices[0].message.content).toBe("Hello from llmock!");
+  });
+});
+
+describe("vLLM compatibility", () => {
+  // vLLM uses standard /v1/chat/completions with custom model names
+  it("handles vLLM-style request via /v1/chat/completions", async () => {
+    instance = await createServer(CATCH_ALL_FIXTURES);
+
+    const { status, body } = await httpPost(`${instance.url}/v1/chat/completions`, {
+      model: "my-fine-tuned-model",
+      stream: false,
+      messages: [{ role: "user", content: "hello" }],
+    });
+
+    expect(status).toBe(200);
+    const parsed = JSON.parse(body);
+    expect(parsed.choices).toBeDefined();
+    expect(parsed.choices[0].message.content).toBe("Hello from llmock!");
+  });
+});
diff --git a/src/__tests__/recorder.test.ts b/src/__tests__/recorder.test.ts
new file mode 100644
index 0000000..f2ac2c0
--- /dev/null
+++ b/src/__tests__/recorder.test.ts
@@ -0,0 +1,2799 @@
+import { describe, it, expect, afterEach, vi } from "vitest";
+import * as http from "node:http";
+import * as fs from "node:fs";
+import * as os from "node:os";
+import * as path from "node:path";
+import type { Fixture, FixtureFile } from "../types.js";
+import { createServer, type ServerInstance } from "../server.js";
+import { proxyAndRecord } from "../recorder.js";
+import type { RecordConfig } from "../types.js";
+import { Logger } from "../logger.js";
+import { LLMock } from "../llmock.js";
+import { encodeEventStreamMessage } from "../aws-event-stream.js";
+
+// ---------------------------------------------------------------------------
+// HTTP helpers
+// ---------------------------------------------------------------------------
+
+function post(
+  url: string,
+  body: unknown,
+  headers?: Record<string, string>,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const data = JSON.stringify(body);
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(data),
+          ...headers,
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(data);
+    req.end();
+  });
+}
+
+function get(
+  url: string,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname + parsed.search,
+        method: "GET",
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.end();
+  });
+}
+
+// ---------------------------------------------------------------------------
+// Test state
+// ---------------------------------------------------------------------------
+
+let upstream: ServerInstance | undefined;
+let recorder: ServerInstance | undefined;
+let tmpDir: string | undefined;
+
+afterEach(async () => {
+  if (recorder) {
+    await new Promise<void>((resolve) => recorder!.server.close(() => resolve()));
+    recorder = undefined;
+  }
+  if (upstream) {
+    await new Promise<void>((resolve) => upstream!.server.close(() => resolve()));
+    upstream = undefined;
+  }
+  if (tmpDir) {
+    fs.rmSync(tmpDir, { recursive: true, force: true });
+    tmpDir = undefined;
+  }
+});
+
+// ---------------------------------------------------------------------------
+// Unit tests — proxyAndRecord function directly
+// ---------------------------------------------------------------------------
+
+describe("proxyAndRecord", () => {
+  it("returns false when provider is not configured", async () => {
+    const fixtures: Fixture[] = [];
+    const logger = new Logger("silent");
+    const record: RecordConfig = { providers: {} };
+
+    // Create a mock req/res pair — we just need them to exist,
+    // proxyAndRecord should return false before using them
+    const { req, res } = createMockReqRes();
+
+    const result = await proxyAndRecord(
+      req,
+      res,
+      { model: "gpt-4", messages: [{ role: "user", content: "hello" }] },
+      "openai",
+      "/v1/chat/completions",
+      fixtures,
+      { record, logger },
+    );
+
+    expect(result).toBe(false);
+  });
+
+  it("returns false when record config is undefined", async () => {
+    const fixtures: Fixture[] = [];
+    const logger = new Logger("silent");
+
+    const { req, res } = createMockReqRes();
+
+    const result = await proxyAndRecord(
+      req,
+      res,
+      { model: "gpt-4", messages: [{ role: "user", content: "hello" }] },
+      "openai",
+      "/v1/chat/completions",
+      fixtures,
+      { record: undefined, logger },
+    );
+
+    expect(result).toBe(false);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Integration tests — upstream mock + recording proxy
+// ---------------------------------------------------------------------------
+
+describe("recorder integration", () => {
+  it("proxies unmatched request to upstream and returns correct response", async () => {
+    const { recorderUrl } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "capital of France" },
+        response: { content: "Paris is the capital of France." },
+      },
+    ]);
+
+    const resp = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "What is the capital of France?" }],
+    });
+
+    expect(resp.status).toBe(200);
+    const body = JSON.parse(resp.body);
+    expect(body.choices[0].message.content).toBe("Paris is the capital of France.");
+  });
+
+  it("saves fixture file to disk with correct format", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "capital of France" },
+        response: { content: "Paris is the capital of France." },
+      },
+    ]);
+
+    await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "What is the capital of France?" }],
+    });
+
+    // Check that a fixture file was created
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.startsWith("openai-") && f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    // Validate fixture content
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    expect(fixtureContent.fixtures).toHaveLength(1);
+    expect(fixtureContent.fixtures[0].match.userMessage).toBe("What is the capital of France?");
+    expect((fixtureContent.fixtures[0].response as { content: string }).content).toBe(
+      "Paris is the capital of France.",
+    );
+  });
+
+  it("recorded fixture is reused for subsequent identical requests", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "capital of France" },
+        response: { content: "Paris is the capital of France." },
+      },
+    ]);
+
+    // First request — proxied
+    await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "What is the capital of France?" }],
+    });
+
+    // Second request — should match the recorded fixture
+    const resp2 = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "What is the capital of France?" }],
+    });
+
+    expect(resp2.status).toBe(200);
+    const body2 = JSON.parse(resp2.body);
+    expect(body2.choices[0].message.content).toBe("Paris is the capital of France.");
+
+    // Only one fixture file should exist (no second proxy)
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.startsWith("openai-") && f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+  });
+
+  it("records journal entry for proxied request", async () => {
+    const { recorderUrl } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "capital of France" },
+        response: { content: "Paris is the capital of France." },
+      },
+    ]);
+
+    await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "What is the capital of France?" }],
+    });
+
+    // Check journal
+    const journalResp = await get(`${recorderUrl}/v1/_requests`);
+    const entries = JSON.parse(journalResp.body);
+    expect(entries.length).toBeGreaterThanOrEqual(1);
+  });
+
+  it("does not save auth headers in fixture file", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "capital of France" },
+        response: { content: "Paris is the capital of France." },
+      },
+    ]);
+
+    await post(
+      `${recorderUrl}/v1/chat/completions`,
+      {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "What is the capital of France?" }],
+      },
+      { Authorization: "Bearer sk-secret-key-12345" },
+    );
+
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.startsWith("openai-") && f.endsWith(".json"));
+    const content = fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8");
+
+    // The fixture file should not contain any auth headers/secrets
+    expect(content).not.toContain("sk-secret-key-12345");
+    expect(content).not.toContain("Authorization");
+    expect(content).not.toContain("authorization");
+  });
+
+  it("records tool call response from upstream", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "weather" },
+        response: {
+          toolCalls: [{ name: "get_weather", arguments: '{"city":"Paris"}' }],
+        },
+      },
+    ]);
+
+    const resp = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "What is the weather?" }],
+      tools: [{ type: "function", function: { name: "get_weather", parameters: {} } }],
+    });
+
+    expect(resp.status).toBe(200);
+    const body = JSON.parse(resp.body);
+    expect(body.choices[0].message.tool_calls).toBeDefined();
+    expect(body.choices[0].message.tool_calls[0].function.name).toBe("get_weather");
+
+    // Check saved fixture has toolCalls
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    const savedResponse = fixtureContent.fixtures[0].response as { toolCalls: unknown[] };
+    expect(savedResponse.toolCalls).toBeDefined();
+    expect(savedResponse.toolCalls).toHaveLength(1);
+  });
+
+  it("records embedding response from upstream", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder(
+      [
+        {
+          match: { inputText: "hello world" },
+          response: { embedding: [0.1, 0.2, 0.3] },
+        },
+      ],
+      "openai",
+    );
+
+    const resp = await post(`${recorderUrl}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "hello world",
+    });
+
+    expect(resp.status).toBe(200);
+    const body = JSON.parse(resp.body);
+    expect(body.data[0].embedding).toEqual([0.1, 0.2, 0.3]);
+
+    // Check saved fixture
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    const savedResponse = fixtureContent.fixtures[0].response as { embedding: number[] };
+    expect(savedResponse.embedding).toEqual([0.1, 0.2, 0.3]);
+  });
+
+  it("records upstream error status as error fixture", async () => {
+    // Upstream with no matching fixture for our request → 404
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "something else entirely" },
+        response: { content: "not what we asked" },
+      },
+    ]);
+
+    await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "unmatched request" }],
+    });
+
+    // The upstream returns 404 (no fixture match), which gets proxied
+    // The recorder should save an error fixture
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    const savedResponse = fixtureContent.fixtures[0].response as {
+      error: { message: string };
+      status?: number;
+    };
+    expect(savedResponse.error).toBeDefined();
+    expect(savedResponse.status).toBe(404);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Integration tests — streaming upstream → collapsed fixture
+// ---------------------------------------------------------------------------
+
+describe("recorder streaming collapse", () => {
+  it("collapses OpenAI SSE streaming response to non-streaming fixture", async () => {
+    // Upstream has a fixture; when recorder proxies with stream:true,
+    // upstream returns SSE, recorder should collapse it
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "capital of France" },
+        response: { content: "Paris is the capital of France." },
+      },
+    ]);
+
+    // Send request with stream: true — upstream llmock will return SSE
+    const resp = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "What is the capital of France?" }],
+      stream: true,
+    });
+
+    expect(resp.status).toBe(200);
+    // The recorder relays the raw SSE to the client
+    // But the saved fixture should be collapsed
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.startsWith("openai-") && f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    expect(fixtureContent.fixtures).toHaveLength(1);
+    const savedResponse = fixtureContent.fixtures[0].response as { content: string };
+    expect(savedResponse.content).toBe("Paris is the capital of France.");
+  });
+
+  it("collapsed streaming fixture works on replay (second request matches)", async () => {
+    const { recorderUrl } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "capital of France" },
+        response: { content: "Paris is the capital of France." },
+      },
+    ]);
+
+    // First request — stream:true, proxied to upstream, collapsed on save
+    await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "What is the capital of France?" }],
+      stream: true,
+    });
+
+    // Second request — non-streaming, should match the collapsed fixture
+    const resp2 = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "What is the capital of France?" }],
+    });
+
+    expect(resp2.status).toBe(200);
+    const body2 = JSON.parse(resp2.body);
+    expect(body2.choices[0].message.content).toBe("Paris is the capital of France.");
+  });
+
+  it("collapses streaming tool call response to fixture with toolCalls", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "weather" },
+        response: {
+          toolCalls: [{ name: "get_weather", arguments: '{"city":"Paris"}' }],
+        },
+      },
+    ]);
+
+    // Send streaming request
+    const resp = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "What is the weather?" }],
+      stream: true,
+      tools: [{ type: "function", function: { name: "get_weather", parameters: {} } }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    // Check saved fixture has toolCalls (not SSE)
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    const savedResponse = fixtureContent.fixtures[0].response as { toolCalls: unknown[] };
+    expect(savedResponse.toolCalls).toBeDefined();
+    expect(savedResponse.toolCalls).toHaveLength(1);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Integration tests — multi-provider proxy routing
+// ---------------------------------------------------------------------------
+
+describe("recorder multi-provider routing", () => {
+  it("proxies Anthropic messages request to anthropic upstream", async () => {
+    // Upstream for Anthropic
+    const anthropicUpstream = await createServer(
+      [
+        {
+          match: { userMessage: "bonjour" },
+          response: { content: "Salut!" },
+        },
+      ],
+      { port: 0 },
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+
+    recorder = await createServer([], {
+      port: 0,
+      record: {
+        providers: { anthropic: anthropicUpstream.url },
+        fixturePath: tmpDir,
+      },
+    });
+
+    const resp = await post(`${recorder.url}/v1/messages`, {
+      model: "claude-3-sonnet",
+      max_tokens: 100,
+      messages: [{ role: "user", content: "bonjour" }],
+    });
+
+    expect(resp.status).toBe(200);
+    // Anthropic handler translates to/from Claude format; the upstream
+    // is another llmock so it returns OpenAI format which gets proxied raw
+    const body = JSON.parse(resp.body);
+    // The proxied response should contain content
+    expect(body).toBeDefined();
+
+    // Fixture file created on disk
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles.length).toBeGreaterThanOrEqual(1);
+
+    // Clean up the extra upstream
+    await new Promise<void>((resolve) => anthropicUpstream.server.close(() => resolve()));
+  });
+
+  it("unconfigured provider returns 404 (no proxy)", async () => {
+    // Only openai provider configured, not gemini
+    const { recorderUrl } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "test" },
+        response: { content: "ok" },
+      },
+    ]);
+
+    // Send a Gemini-format request — no upstream configured for gemini
+    const resp = await post(`${recorderUrl}/v1beta/models/gemini-pro:generateContent`, {
+      contents: [{ parts: [{ text: "hello gemini" }], role: "user" }],
+    });
+
+    // Should get 404 — no fixture and no gemini upstream
+    expect(resp.status).toBe(404);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Integration tests — strict mode
+// ---------------------------------------------------------------------------
+
+describe("recorder strict mode", () => {
+  it("strict mode without recording: unmatched request returns 503 with error logged", async () => {
+    recorder = await createServer([], {
+      port: 0,
+      strict: true,
+      logLevel: "debug",
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "no fixture here" }],
+    });
+
+    expect(resp.status).toBe(503);
+    const body = JSON.parse(resp.body);
+    expect(body.error.message).toBe("Strict mode: no fixture matched");
+  });
+
+  it("record + strict: proxy succeeds when upstream is available", async () => {
+    await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "hello" },
+        response: { content: "world" },
+      },
+    ]);
+
+    // Override to also set strict on the recorder
+    // Need to create a new recorder with both record + strict
+    await new Promise<void>((resolve) => recorder!.server.close(() => resolve()));
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      strict: true,
+      record: { providers: { openai: upstream!.url }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "hello" }],
+    });
+
+    expect(resp.status).toBe(200);
+    const body = JSON.parse(resp.body);
+    expect(body.choices[0].message.content).toBe("world");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Integration tests — enableRecording / disableRecording on LLMock
+// ---------------------------------------------------------------------------
+
+describe("LLMock enableRecording / disableRecording", () => {
+  let mock: LLMock;
+  let upstreamServer: ServerInstance;
+
+  afterEach(async () => {
+    if (mock) {
+      try {
+        await mock.stop();
+      } catch {
+        // ignore if not started
+      }
+    }
+    if (upstreamServer) {
+      await new Promise<void>((resolve) => upstreamServer.server.close(() => resolve()));
+    }
+    if (tmpDir) {
+      fs.rmSync(tmpDir, { recursive: true, force: true });
+      tmpDir = undefined;
+    }
+  });
+
+  it("enableRecording allows proxying; disableRecording returns to 404", async () => {
+    // Set up upstream
+    upstreamServer = await createServer(
+      [
+        {
+          match: { userMessage: "hello" },
+          response: { content: "from upstream" },
+        },
+      ],
+      { port: 0 },
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+
+    mock = new LLMock();
+    const url = await mock.start();
+
+    // Without recording: request gets 404
+    const resp1 = await post(`${url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "hello" }],
+    });
+    expect(resp1.status).toBe(404);
+
+    // Enable recording
+    mock.enableRecording({
+      providers: { openai: upstreamServer.url },
+      fixturePath: tmpDir,
+    });
+
+    // Now request should proxy to upstream
+    const resp2 = await post(`${url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "hello" }],
+    });
+    expect(resp2.status).toBe(200);
+    const body2 = JSON.parse(resp2.body);
+    expect(body2.choices[0].message.content).toBe("from upstream");
+
+    // Disable recording
+    mock.disableRecording();
+
+    // Recorded fixture should still work (it was added to memory)
+    const resp3 = await post(`${url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "hello" }],
+    });
+    expect(resp3.status).toBe(200);
+    const body3 = JSON.parse(resp3.body);
+    expect(body3.choices[0].message.content).toBe("from upstream");
+
+    // A different message should 404 (no recording, no fixture)
+    const resp4 = await post(`${url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "something else" }],
+    });
+    expect(resp4.status).toBe(404);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Integration tests — multi-provider recording (Gemini, Ollama, Cohere, Bedrock, Vertex AI)
+// ---------------------------------------------------------------------------
+
+describe("recorder multi-provider recording", () => {
+  let servers: http.Server[] = [];
+
+  afterEach(async () => {
+    for (const s of servers) {
+      await new Promise<void>((resolve) => s.close(() => resolve()));
+    }
+    servers = [];
+  });
+
+  function trackServer(si: ServerInstance): ServerInstance {
+    servers.push(si.server);
+    return si;
+  }
+
+  it("records Gemini generateContent request through full proxy", async () => {
+    const geminiUpstream = trackServer(
+      await createServer(
+        [{ match: { userMessage: "test gemini" }, response: { content: "Gemini says hello" } }],
+        { port: 0 },
+      ),
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { gemini: geminiUpstream.url }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ parts: [{ text: "test gemini" }], role: "user" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    // Fixture file saved with gemini prefix
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.startsWith("gemini-") && f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    expect(fixtureContent.fixtures).toHaveLength(1);
+    expect(fixtureContent.fixtures[0].match.userMessage).toBe("test gemini");
+  });
+
+  it("records Ollama /api/chat request through full proxy", async () => {
+    const ollamaUpstream = trackServer(
+      await createServer(
+        [{ match: { userMessage: "test ollama" }, response: { content: "Ollama says hello" } }],
+        { port: 0 },
+      ),
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { ollama: ollamaUpstream.url }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "test ollama" }],
+      stream: false,
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.startsWith("ollama-") && f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    expect(fixtureContent.fixtures[0].match.userMessage).toBe("test ollama");
+  });
+
+  it("records Cohere /v2/chat request through full proxy", async () => {
+    const cohereUpstream = trackServer(
+      await createServer(
+        [{ match: { userMessage: "test cohere" }, response: { content: "Cohere says hello" } }],
+        { port: 0 },
+      ),
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { cohere: cohereUpstream.url }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "test cohere" }],
+      stream: false,
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.startsWith("cohere-") && f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    expect(fixtureContent.fixtures[0].match.userMessage).toBe("test cohere");
+  });
+
+  it("records Bedrock /model/{id}/invoke request through full proxy", async () => {
+    const bedrockUpstream = trackServer(
+      await createServer(
+        [{ match: { userMessage: "test bedrock" }, response: { content: "Bedrock says hello" } }],
+        { port: 0 },
+      ),
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { bedrock: bedrockUpstream.url }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/model/claude-v3/invoke`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 100,
+      messages: [{ role: "user", content: "test bedrock" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.startsWith("bedrock-") && f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    expect(fixtureContent.fixtures[0].match.userMessage).toBe("test bedrock");
+  });
+
+  it("records Vertex AI request through vertexai provider key", async () => {
+    // Vertex AI now uses "vertexai" as the provider key
+    const vertexUpstream = trackServer(
+      await createServer(
+        [{ match: { userMessage: "test vertex" }, response: { content: "Vertex says hello" } }],
+        { port: 0 },
+      ),
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { vertexai: vertexUpstream.url }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(
+      `${recorder.url}/v1/projects/my-project/locations/us-central1/publishers/google/models/gemini-2.0-flash:generateContent`,
+      { contents: [{ parts: [{ text: "test vertex" }], role: "user" }] },
+    );
+
+    expect(resp.status).toBe(200);
+
+    // Uses vertexai prefix (separate provider key from gemini)
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.startsWith("vertexai-") && f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+  });
+
+  it("records Anthropic streaming request through handleMessages", async () => {
+    const anthropicUpstream = trackServer(
+      await createServer(
+        [
+          {
+            match: { userMessage: "stream anthropic" },
+            response: { content: "Anthropic streamed" },
+          },
+        ],
+        { port: 0 },
+      ),
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { anthropic: anthropicUpstream.url }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/messages`, {
+      model: "claude-3-sonnet",
+      max_tokens: 100,
+      messages: [{ role: "user", content: "stream anthropic" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.startsWith("anthropic-") && f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+  });
+
+  it("records multiple providers simultaneously", async () => {
+    const openaiUpstream = trackServer(
+      await createServer(
+        [{ match: { userMessage: "multi openai" }, response: { content: "OpenAI multi" } }],
+        { port: 0 },
+      ),
+    );
+    const geminiUpstream = trackServer(
+      await createServer(
+        [{ match: { userMessage: "multi gemini" }, response: { content: "Gemini multi" } }],
+        { port: 0 },
+      ),
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: {
+        providers: { openai: openaiUpstream.url, gemini: geminiUpstream.url },
+        fixturePath: tmpDir,
+      },
+    });
+
+    // OpenAI request
+    const resp1 = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "multi openai" }],
+    });
+    expect(resp1.status).toBe(200);
+
+    // Gemini request
+    const resp2 = await post(`${recorder.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ parts: [{ text: "multi gemini" }], role: "user" }],
+    });
+    expect(resp2.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const openaiFixtures = files.filter((f) => f.startsWith("openai-") && f.endsWith(".json"));
+    const geminiFixtures = files.filter((f) => f.startsWith("gemini-") && f.endsWith(".json"));
+    expect(openaiFixtures).toHaveLength(1);
+    expect(geminiFixtures).toHaveLength(1);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Integration tests — streaming recording through full server
+// ---------------------------------------------------------------------------
+
+describe("recorder streaming through full server", () => {
+  it("OpenAI streaming request collapses and saves fixture with correct content", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "stream test" },
+        response: { content: "Streamed content from upstream" },
+      },
+    ]);
+
+    const resp = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "stream test" }],
+      stream: true,
+    });
+
+    expect(resp.status).toBe(200);
+    // SSE data relayed to client
+    expect(resp.body).toContain("data:");
+
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    const savedResponse = fixtureContent.fixtures[0].response as { content: string };
+    expect(savedResponse.content).toBe("Streamed content from upstream");
+  });
+
+  it("streaming tool call recording preserves toolCalls in fixture", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "stream tools" },
+        response: {
+          toolCalls: [{ name: "search", arguments: '{"query":"test"}' }],
+        },
+      },
+    ]);
+
+    const resp = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "stream tools" }],
+      stream: true,
+      tools: [{ type: "function", function: { name: "search", parameters: {} } }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    const savedResponse = fixtureContent.fixtures[0].response as {
+      toolCalls: Array<{ name: string; arguments: string }>;
+    };
+    expect(savedResponse.toolCalls).toBeDefined();
+    expect(savedResponse.toolCalls).toHaveLength(1);
+    expect(savedResponse.toolCalls[0].name).toBe("search");
+    expect(savedResponse.toolCalls[0].arguments).toBe('{"query":"test"}');
+  });
+});
+
+// ---------------------------------------------------------------------------
+// End-to-end replay verification
+// ---------------------------------------------------------------------------
+
+describe("recorder end-to-end replay", () => {
+  it("record → verify fixture on disk → replay from fixture (not proxy)", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "replay test" },
+        response: { content: "Replay this content" },
+      },
+    ]);
+
+    // First request — proxied to upstream
+    const resp1 = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "replay test" }],
+    });
+    expect(resp1.status).toBe(200);
+
+    // Verify fixture file on disk
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    expect(fixtureContent.fixtures[0].match.userMessage).toBe("replay test");
+    expect((fixtureContent.fixtures[0].response as { content: string }).content).toBe(
+      "Replay this content",
+    );
+
+    // Clear journal to distinguish proxy vs fixture-match
+    await fetch(`${recorderUrl}/v1/_requests`, { method: "DELETE" });
+
+    // Second request — should match recorded fixture
+    const resp2 = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "replay test" }],
+    });
+    expect(resp2.status).toBe(200);
+    const body2 = JSON.parse(resp2.body);
+    expect(body2.choices[0].message.content).toBe("Replay this content");
+
+    // Journal should show the request was served with a fixture match (not null)
+    const journalResp = await get(`${recorderUrl}/v1/_requests`);
+    const entries = JSON.parse(journalResp.body);
+    expect(entries).toHaveLength(1);
+    expect(entries[0].response.fixture).not.toBeNull();
+
+    // Still only one fixture file (no second proxy)
+    const files2 = fs.readdirSync(fixturePath);
+    const fixtureFiles2 = files2.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles2).toHaveLength(1);
+  });
+
+  it("record tool call → replay → toolCalls match", async () => {
+    const { recorderUrl } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "tool replay" },
+        response: {
+          toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+        },
+      },
+    ]);
+
+    // Record
+    await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "tool replay" }],
+      tools: [{ type: "function", function: { name: "get_weather", parameters: {} } }],
+    });
+
+    // Replay
+    const resp2 = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "tool replay" }],
+      tools: [{ type: "function", function: { name: "get_weather", parameters: {} } }],
+    });
+    expect(resp2.status).toBe(200);
+    const body2 = JSON.parse(resp2.body);
+    expect(body2.choices[0].message.tool_calls).toBeDefined();
+    expect(body2.choices[0].message.tool_calls[0].function.name).toBe("get_weather");
+    expect(body2.choices[0].message.tool_calls[0].function.arguments).toBe('{"city":"NYC"}');
+  });
+
+  it("record embedding → replay → embedding vector matches", async () => {
+    const { recorderUrl } = await setupUpstreamAndRecorder(
+      [{ match: { inputText: "embed replay" }, response: { embedding: [0.5, 0.6, 0.7] } }],
+      "openai",
+    );
+
+    // Record
+    await post(`${recorderUrl}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "embed replay",
+    });
+
+    // Replay
+    const resp2 = await post(`${recorderUrl}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "embed replay",
+    });
+    expect(resp2.status).toBe(200);
+    const body2 = JSON.parse(resp2.body);
+    expect(body2.data[0].embedding).toEqual([0.5, 0.6, 0.7]);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Edge cases
+// ---------------------------------------------------------------------------
+
+describe("recorder edge cases", () => {
+  it("upstream 500 error recorded as error fixture and replayed", async () => {
+    // Upstream returns error for any request
+    upstream = await createServer(
+      [
+        {
+          match: { userMessage: "trigger error" },
+          response: {
+            error: { message: "Internal server error", type: "server_error" },
+            status: 500,
+          },
+        },
+      ],
+      { port: 0 },
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: upstream.url }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "trigger error" }],
+    });
+
+    expect(resp.status).toBe(500);
+
+    // Fixture file created with error response
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    const savedResponse = fixtureContent.fixtures[0].response as {
+      error: { message: string };
+      status?: number;
+    };
+    expect(savedResponse.error).toBeDefined();
+    expect(savedResponse.status).toBe(500);
+
+    // Replay: second identical request matches the recorded error fixture
+    const resp2 = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "trigger error" }],
+    });
+    expect(resp2.status).toBe(500);
+  });
+
+  it("empty match _warning field assertion: present in saved file, NOT in memory", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      {
+        // Upstream matches everything via predicate
+        match: { predicate: () => true },
+        response: { content: "empty match response" },
+      },
+    ]);
+
+    // Send a request with only a system message (no user message → empty match)
+    const resp = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "system", content: "You are a helpful assistant" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    // Saved file should have _warning field
+    const fileContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"),
+    );
+    expect(fileContent._warning).toBeDefined();
+    expect(fileContent._warning).toContain("Empty match");
+
+    // In-memory fixtures should NOT have been augmented (empty match skipped)
+    // Send same request again — it should proxy again (not match from memory)
+    const resp2 = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "system", content: "You are a helpful assistant" }],
+    });
+    // Should still return 200 (proxied again since empty match wasn't added to memory)
+    expect(resp2.status).toBe(200);
+
+    // Now TWO fixture files on disk (proxied twice)
+    const files2 = fs.readdirSync(fixturePath);
+    const fixtureFiles2 = files2.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles2).toHaveLength(2);
+  });
+
+  it("default fixturePath: omit fixturePath from config, verify default path used", async () => {
+    upstream = await createServer(
+      [{ match: { userMessage: "default path" }, response: { content: "default path response" } }],
+      { port: 0 },
+    );
+
+    // Create recorder with no fixturePath — should default to "./fixtures/recorded"
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: upstream.url } },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "default path" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    // Check the default path
+    const defaultPath = path.resolve("./fixtures/recorded");
+    expect(fs.existsSync(defaultPath)).toBe(true);
+    const files = fs.readdirSync(defaultPath);
+    const fixtureFiles = files.filter((f) => f.startsWith("openai-") && f.endsWith(".json"));
+    expect(fixtureFiles.length).toBeGreaterThanOrEqual(1);
+
+    // Clean up the default path files we just created
+    for (const f of fixtureFiles) {
+      fs.unlinkSync(path.join(defaultPath, f));
+    }
+    // Remove dir if empty
+    try {
+      fs.rmdirSync(defaultPath);
+    } catch {
+      // ignore — might not be empty if other tests ran
+    }
+  });
+
+  it("request with system-only messages (no user message) derives empty match", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      {
+        // Upstream matches everything via predicate
+        match: { predicate: () => true },
+        response: { content: "system only response" },
+      },
+    ]);
+
+    const resp = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "system", content: "You are a helpful assistant" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    // The match should have no userMessage (no user message in request)
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    expect(fixtureContent.fixtures[0].match.userMessage).toBeUndefined();
+  });
+
+  it("recording path created automatically (mkdirSync recursive)", async () => {
+    upstream = await createServer(
+      [{ match: { userMessage: "auto dir" }, response: { content: "dir created" } }],
+      { port: 0 },
+    );
+
+    // Use a nested path that doesn't exist
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    const nestedPath = path.join(tmpDir, "nested", "deep", "fixtures");
+
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: upstream.url }, fixturePath: nestedPath },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "auto dir" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    // Nested directory was created
+    expect(fs.existsSync(nestedPath)).toBe(true);
+    const files = fs.readdirSync(nestedPath);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+  });
+
+  it("fixture file naming follows {provider}-{ISO-timestamp}.json format", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      { match: { userMessage: "naming test" }, response: { content: "named" } },
+    ]);
+
+    await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "naming test" }],
+    });
+
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    // Pattern: openai-YYYY-MM-DDTHH-MM-SS-mmmZ-{uuid8}.json (colons and dots replaced with dashes)
+    const pattern = /^openai-\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}-\d{3}Z-[a-f0-9]{8}\.json$/;
+    expect(fixtureFiles[0]).toMatch(pattern);
+  });
+
+  it("proxies the original request body to upstream (preserves formatting)", async () => {
+    // The proxy should forward the exact bytes the client sent, not re-serialized JSON.
+    // This matters because JSON key ordering and whitespace may differ after parse/serialize.
+    let receivedBody = "";
+    const upstreamServer = http.createServer((req, res) => {
+      const chunks: Buffer[] = [];
+      req.on("data", (c: Buffer) => chunks.push(c));
+      req.on("end", () => {
+        receivedBody = Buffer.concat(chunks).toString();
+        res.writeHead(200, { "Content-Type": "application/json" });
+        res.end(
+          JSON.stringify({
+            id: "chatcmpl-proxy-body",
+            object: "chat.completion",
+            created: 0,
+            model: "gpt-4",
+            choices: [
+              { index: 0, message: { role: "assistant", content: "ok" }, finish_reason: "stop" },
+            ],
+            usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
+          }),
+        );
+      });
+    });
+    await new Promise<void>((resolve) => upstreamServer.listen(0, "127.0.0.1", resolve));
+    const upAddr = upstreamServer.address() as { port: number };
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: `http://127.0.0.1:${upAddr.port}` }, fixturePath: tmpDir },
+    });
+
+    // Send body with specific formatting (extra spaces, key order)
+    const customBody =
+      '{"model":  "gpt-4",  "messages": [{"role": "user", "content": "preserve me"}]}';
+    const resp = await fetch(`${recorder.url}/v1/chat/completions`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: customBody,
+    });
+    expect(resp.status).toBe(200);
+
+    // The upstream should have received the original body, not re-serialized
+    expect(receivedBody).toBe(customBody);
+
+    await new Promise<void>((resolve) => upstreamServer.close(() => resolve()));
+  });
+
+  it("upstream returns empty response body — handled gracefully", async () => {
+    // Create a raw HTTP server that returns 200 with empty body
+    const emptyServer = http.createServer((_req, res) => {
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end("");
+    });
+    await new Promise<void>((resolve) => emptyServer.listen(0, "127.0.0.1", resolve));
+    const emptyAddr = emptyServer.address() as { port: number };
+    const emptyUrl = `http://127.0.0.1:${emptyAddr.port}`;
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: emptyUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "empty body test" }],
+    });
+
+    // Should not crash — returns the upstream status
+    expect(resp.status).toBe(200);
+
+    // Fixture file should still be created (with error/fallback response)
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    await new Promise<void>((resolve) => emptyServer.close(() => resolve()));
+  });
+
+  it("Ollama empty content + tool_calls: records toolCalls, not content", async () => {
+    // Raw upstream returns Ollama-style response: empty content + tool_calls
+    const ollamaRaw = http.createServer((_req, res) => {
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(
+        JSON.stringify({
+          model: "llama3",
+          message: {
+            role: "assistant",
+            content: "",
+            tool_calls: [
+              {
+                function: {
+                  name: "get_weather",
+                  arguments: { city: "NYC" },
+                },
+              },
+            ],
+          },
+          done: true,
+        }),
+      );
+    });
+    await new Promise<void>((resolve) => ollamaRaw.listen(0, "127.0.0.1", resolve));
+    const ollamaAddr = ollamaRaw.address() as { port: number };
+    const ollamaUrl = `http://127.0.0.1:${ollamaAddr.port}`;
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { ollama: ollamaUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "what is the weather in NYC" }],
+      stream: false,
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as {
+      fixtures: Array<{
+        response: {
+          content?: string;
+          toolCalls?: Array<{ name: string; arguments: string }>;
+        };
+      }>;
+    };
+
+    // Should record toolCalls, NOT content: ""
+    expect(fixtureContent.fixtures[0].response.toolCalls).toBeDefined();
+    expect(fixtureContent.fixtures[0].response.toolCalls).toHaveLength(1);
+    expect(fixtureContent.fixtures[0].response.toolCalls![0].name).toBe("get_weather");
+    expect(JSON.parse(fixtureContent.fixtures[0].response.toolCalls![0].arguments)).toEqual({
+      city: "NYC",
+    });
+    expect(fixtureContent.fixtures[0].response.content).toBeUndefined();
+
+    await new Promise<void>((resolve) => ollamaRaw.close(() => resolve()));
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Strict mode thorough tests
+// ---------------------------------------------------------------------------
+
+describe("recorder strict mode thorough", () => {
+  it("strict mode + recording but provider not configured: 503 returned", async () => {
+    // Only anthropic configured, but request goes to openai endpoint
+    const anthropicUpstream = await createServer(
+      [{ match: { userMessage: "strict test" }, response: { content: "ok" } }],
+      { port: 0 },
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      strict: true,
+      record: { providers: { anthropic: anthropicUpstream.url }, fixturePath: tmpDir },
+    });
+
+    // OpenAI endpoint — no openai provider configured
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "strict test" }],
+    });
+
+    expect(resp.status).toBe(503);
+    const body = JSON.parse(resp.body);
+    expect(body.error.message).toBe("Strict mode: no fixture matched");
+
+    await new Promise<void>((resolve) => anthropicUpstream.server.close(() => resolve()));
+  });
+});
+
+// ---------------------------------------------------------------------------
+// enableRecording / disableRecording lifecycle (extended)
+// ---------------------------------------------------------------------------
+
+describe("LLMock enableRecording / disableRecording lifecycle", () => {
+  let mock: LLMock;
+  let upstreamServer: ServerInstance;
+
+  afterEach(async () => {
+    if (mock) {
+      try {
+        await mock.stop();
+      } catch {
+        // ignore
+      }
+    }
+    if (upstreamServer) {
+      await new Promise<void>((resolve) => upstreamServer.server.close(() => resolve()));
+    }
+    if (tmpDir) {
+      fs.rmSync(tmpDir, { recursive: true, force: true });
+      tmpDir = undefined;
+    }
+  });
+
+  it("recorded fixtures persist on disk after disableRecording", async () => {
+    upstreamServer = await createServer(
+      [{ match: { userMessage: "persist test" }, response: { content: "persisted" } }],
+      { port: 0 },
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    mock = new LLMock();
+    const url = await mock.start();
+
+    mock.enableRecording({
+      providers: { openai: upstreamServer.url },
+      fixturePath: tmpDir,
+    });
+
+    await post(`${url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "persist test" }],
+    });
+
+    mock.disableRecording();
+
+    // Fixture files still on disk
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    // And the fixture is usable — request still matches from in-memory fixture
+    const resp = await post(`${url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "persist test" }],
+    });
+    expect(resp.status).toBe(200);
+    const body = JSON.parse(resp.body);
+    expect(body.choices[0].message.content).toBe("persisted");
+  });
+
+  it("re-enable recording after disable works for new requests", async () => {
+    upstreamServer = await createServer(
+      [
+        { match: { userMessage: "first" }, response: { content: "first response" } },
+        { match: { userMessage: "second" }, response: { content: "second response" } },
+      ],
+      { port: 0 },
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    mock = new LLMock();
+    const url = await mock.start();
+
+    // First recording session
+    mock.enableRecording({
+      providers: { openai: upstreamServer.url },
+      fixturePath: tmpDir,
+    });
+    await post(`${url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "first" }],
+    });
+    mock.disableRecording();
+
+    // Second recording session
+    mock.enableRecording({
+      providers: { openai: upstreamServer.url },
+      fixturePath: tmpDir,
+    });
+    const resp = await post(`${url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "second" }],
+    });
+    expect(resp.status).toBe(200);
+    mock.disableRecording();
+
+    // Both fixtures on disk
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(2);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Auth header tests (extended)
+// ---------------------------------------------------------------------------
+
+describe("recorder auth header handling", () => {
+  it("x-api-key (Anthropic) forwarded to upstream but not saved in fixture", async () => {
+    const anthropicUpstream = await createServer(
+      [{ match: { userMessage: "api key test" }, response: { content: "ok" } }],
+      { port: 0 },
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { anthropic: anthropicUpstream.url }, fixturePath: tmpDir },
+    });
+
+    await post(
+      `${recorder.url}/v1/messages`,
+      {
+        model: "claude-3-sonnet",
+        max_tokens: 100,
+        messages: [{ role: "user", content: "api key test" }],
+      },
+      { "x-api-key": "sk-ant-secret-123" },
+    );
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles.length).toBeGreaterThanOrEqual(1);
+
+    const content = fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8");
+    expect(content).not.toContain("sk-ant-secret-123");
+    expect(content).not.toContain("x-api-key");
+
+    await new Promise<void>((resolve) => anthropicUpstream.server.close(() => resolve()));
+  });
+
+  it("multiple auth header types all absent from fixture", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      { match: { userMessage: "multi auth" }, response: { content: "multi auth ok" } },
+    ]);
+
+    await post(
+      `${recorderUrl}/v1/chat/completions`,
+      {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "multi auth" }],
+      },
+      {
+        Authorization: "Bearer sk-openai-secret",
+        "x-api-key": "sk-anthropic-secret",
+        "api-key": "azure-secret-key",
+      },
+    );
+
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    const content = fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8");
+
+    expect(content).not.toContain("sk-openai-secret");
+    expect(content).not.toContain("sk-anthropic-secret");
+    expect(content).not.toContain("azure-secret-key");
+    expect(content).not.toContain("Authorization");
+    expect(content).not.toContain("authorization");
+    expect(content).not.toContain("x-api-key");
+    expect(content).not.toContain("api-key");
+  });
+
+  it("custom non-auth headers from client are NOT forwarded to upstream", async () => {
+    // We'll verify by checking that the upstream doesn't receive custom headers.
+    // Create a raw upstream that echoes back received headers.
+    let receivedHeaders: http.IncomingHttpHeaders = {};
+    const echoServer = http.createServer((req, res) => {
+      receivedHeaders = req.headers;
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(
+        JSON.stringify({
+          choices: [{ message: { role: "assistant", content: "echo" }, index: 0 }],
+          model: "gpt-4",
+        }),
+      );
+    });
+    await new Promise<void>((resolve) => echoServer.listen(0, "127.0.0.1", resolve));
+    const echoAddr = echoServer.address() as { port: number };
+    const echoUrl = `http://127.0.0.1:${echoAddr.port}`;
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: echoUrl }, fixturePath: tmpDir },
+    });
+
+    await post(
+      `${recorder.url}/v1/chat/completions`,
+      {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "header test" }],
+      },
+      {
+        Authorization: "Bearer sk-test",
+        "X-Custom-Header": "should-not-forward",
+        "X-Request-Id": "req-123",
+      },
+    );
+
+    // Authorization is forwarded, custom headers are not
+    expect(receivedHeaders["authorization"]).toBe("Bearer sk-test");
+    expect(receivedHeaders["x-custom-header"]).toBeUndefined();
+    expect(receivedHeaders["x-request-id"]).toBeUndefined();
+
+    await new Promise<void>((resolve) => echoServer.close(() => resolve()));
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Upstream connection failure → 502
+// ---------------------------------------------------------------------------
+
+describe("recorder upstream connection failure", () => {
+  it("returns 502 when upstream is unreachable", async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: {
+        providers: { openai: "http://127.0.0.1:1" },
+        fixturePath: tmpDir,
+      },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "unreachable upstream" }],
+    });
+
+    expect(resp.status).toBe(502);
+    const body = JSON.parse(resp.body);
+    expect(body.error.type).toBe("proxy_error");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Filesystem write failure — response still relayed
+// ---------------------------------------------------------------------------
+
+describe("recorder filesystem write failure", () => {
+  it("relays response to client even when fixture write fails", async () => {
+    upstream = await createServer(
+      [{ match: { userMessage: "fs fail" }, response: { content: "still works" } }],
+      { port: 0 },
+    );
+
+    // Use a path that cannot be a directory (a regular file)
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    const blockedPath = path.join(tmpDir, "blocked");
+    fs.writeFileSync(blockedPath, "i am a file not a directory");
+
+    recorder = await createServer([], {
+      port: 0,
+      logLevel: "silent",
+      record: {
+        providers: { openai: upstream.url },
+        fixturePath: blockedPath,
+      },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "fs fail" }],
+    });
+
+    // Response still relayed to client
+    expect(resp.status).toBe(200);
+    const body = JSON.parse(resp.body);
+    expect(body.choices[0].message.content).toBe("still works");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// buildFixtureResponse for non-OpenAI formats
+// ---------------------------------------------------------------------------
+
+describe("recorder buildFixtureResponse non-OpenAI formats", () => {
+  let servers: http.Server[] = [];
+
+  afterEach(async () => {
+    for (const s of servers) {
+      await new Promise<void>((resolve) => s.close(() => resolve()));
+    }
+    servers = [];
+  });
+
+  function createRawUpstream(responseBody: object): Promise<{ url: string; server: http.Server }> {
+    return new Promise((resolve) => {
+      const srv = http.createServer((_req, res) => {
+        res.writeHead(200, { "Content-Type": "application/json" });
+        res.end(JSON.stringify(responseBody));
+      });
+      srv.listen(0, "127.0.0.1", () => {
+        const addr = srv.address() as { port: number };
+        servers.push(srv);
+        resolve({ url: `http://127.0.0.1:${addr.port}`, server: srv });
+      });
+    });
+  }
+
+  it("records Anthropic format (content array with type/text)", async () => {
+    const { url: upstreamUrl } = await createRawUpstream({
+      id: "msg_123",
+      type: "message",
+      role: "assistant",
+      content: [{ type: "text", text: "Bonjour from Anthropic" }],
+      stop_reason: "end_turn",
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { anthropic: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/messages`, {
+      model: "claude-3-sonnet",
+      max_tokens: 100,
+      messages: [{ role: "user", content: "hello anthropic" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as { fixtures: Array<{ response: { content?: string } }> };
+    expect(fixtureContent.fixtures[0].response.content).toBe("Bonjour from Anthropic");
+  });
+
+  it("records Gemini format (candidates array)", async () => {
+    const { url: upstreamUrl } = await createRawUpstream({
+      candidates: [
+        {
+          content: { role: "model", parts: [{ text: "Hello from Gemini" }] },
+          finishReason: "STOP",
+        },
+      ],
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { gemini: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ parts: [{ text: "hello gemini" }], role: "user" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as { fixtures: Array<{ response: { content?: string } }> };
+    expect(fixtureContent.fixtures[0].response.content).toBe("Hello from Gemini");
+  });
+
+  it("records Ollama format (message object)", async () => {
+    const { url: upstreamUrl } = await createRawUpstream({
+      model: "llama3",
+      message: { role: "assistant", content: "Hello from Ollama" },
+      done: true,
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { ollama: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "hello ollama" }],
+      stream: false,
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as { fixtures: Array<{ response: { content?: string } }> };
+    expect(fixtureContent.fixtures[0].response.content).toBe("Hello from Ollama");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Content + toolCalls coexistence
+// ---------------------------------------------------------------------------
+
+describe("recorder content + toolCalls coexistence", () => {
+  it("saves toolCalls when both content and tool_calls are in OpenAI response", async () => {
+    // Create raw upstream returning both content and tool_calls
+    const rawServer = http.createServer((_req, res) => {
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(
+        JSON.stringify({
+          id: "chatcmpl-coexist",
+          choices: [
+            {
+              index: 0,
+              message: {
+                role: "assistant",
+                content: "I'll look that up for you.",
+                tool_calls: [
+                  {
+                    id: "call_coex",
+                    type: "function",
+                    function: { name: "search", arguments: '{"q":"test"}' },
+                  },
+                ],
+              },
+            },
+          ],
+          model: "gpt-4",
+        }),
+      );
+    });
+    await new Promise<void>((resolve) => rawServer.listen(0, "127.0.0.1", resolve));
+    const rawAddr = rawServer.address() as { port: number };
+    const rawUrl = `http://127.0.0.1:${rawAddr.port}`;
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: rawUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "coexist test" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as {
+      fixtures: Array<{
+        response: { content?: string; toolCalls?: Array<{ name: string; arguments: string }> };
+      }>;
+    };
+    // toolCalls should win
+    expect(fixtureContent.fixtures[0].response.toolCalls).toBeDefined();
+    expect(fixtureContent.fixtures[0].response.toolCalls).toHaveLength(1);
+    expect(fixtureContent.fixtures[0].response.toolCalls![0].name).toBe("search");
+
+    await new Promise<void>((resolve) => rawServer.close(() => resolve()));
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Non-OpenAI streaming through recorder
+// ---------------------------------------------------------------------------
+
+describe("recorder non-OpenAI streaming", () => {
+  it("collapses Anthropic SSE streaming to fixture content", async () => {
+    // Create a raw upstream that returns Anthropic SSE format
+    const rawServer = http.createServer((_req, res) => {
+      res.writeHead(200, { "Content-Type": "text/event-stream" });
+      res.write(
+        `event: message_start\ndata: ${JSON.stringify({ type: "message_start", message: { id: "msg_s", role: "assistant" } })}\n\n`,
+      );
+      res.write(
+        `event: content_block_delta\ndata: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "Streamed " } })}\n\n`,
+      );
+      res.write(
+        `event: content_block_delta\ndata: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "Anthropic" } })}\n\n`,
+      );
+      res.write(`event: message_stop\ndata: ${JSON.stringify({ type: "message_stop" })}\n\n`);
+      res.end();
+    });
+    await new Promise<void>((resolve) => rawServer.listen(0, "127.0.0.1", resolve));
+    const rawAddr = rawServer.address() as { port: number };
+    const rawUrl = `http://127.0.0.1:${rawAddr.port}`;
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { anthropic: rawUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/messages`, {
+      model: "claude-3-sonnet",
+      max_tokens: 100,
+      messages: [{ role: "user", content: "stream anthropic test" }],
+      stream: true,
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as { fixtures: Array<{ response: { content?: string } }> };
+    expect(fixtureContent.fixtures[0].response.content).toBe("Streamed Anthropic");
+
+    await new Promise<void>((resolve) => rawServer.close(() => resolve()));
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Integration tests — streaming through recorder: Gemini SSE + Ollama NDJSON
+// ---------------------------------------------------------------------------
+
+describe("recorder streaming collapse: Gemini SSE", () => {
+  it("collapses Gemini SSE streaming response to non-streaming fixture", async () => {
+    // Create upstream with gemini provider
+    upstream = await createServer(
+      [
+        {
+          match: { userMessage: "hello gemini" },
+          response: { content: "Gemini says hello back." },
+        },
+      ],
+      { port: 0 },
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { gemini: upstream.url }, fixturePath: tmpDir },
+    });
+
+    // Send streaming Gemini request
+    const resp = await post(
+      `${recorder.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent`,
+      {
+        contents: [{ parts: [{ text: "hello gemini" }], role: "user" }],
+      },
+    );
+
+    expect(resp.status).toBe(200);
+
+    // Check saved fixture
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    expect(fixtureContent.fixtures).toHaveLength(1);
+    const savedResponse = fixtureContent.fixtures[0].response as { content: string };
+    expect(savedResponse.content).toBe("Gemini says hello back.");
+  });
+});
+
+describe("recorder streaming collapse: Cohere SSE", () => {
+  it("collapses Cohere SSE streaming response to non-streaming fixture", async () => {
+    upstream = await createServer(
+      [
+        {
+          match: { userMessage: "hello cohere" },
+          response: { content: "Cohere says hello." },
+        },
+      ],
+      { port: 0 },
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { cohere: upstream.url }, fixturePath: tmpDir },
+    });
+
+    // Send streaming Cohere request
+    const resp = await post(`${recorder.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hello cohere" }],
+      stream: true,
+    });
+
+    expect(resp.status).toBe(200);
+
+    // Check saved fixture
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    expect(fixtureContent.fixtures).toHaveLength(1);
+    const savedResponse = fixtureContent.fixtures[0].response as { content: string };
+    expect(savedResponse.content).toBe("Cohere says hello.");
+  });
+});
+
+describe("recorder streaming collapse: Ollama NDJSON", () => {
+  it("collapses Ollama NDJSON streaming response to non-streaming fixture", async () => {
+    upstream = await createServer(
+      [
+        {
+          match: { userMessage: "hello ollama" },
+          response: { content: "Ollama says hi." },
+        },
+      ],
+      { port: 0 },
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { ollama: upstream.url }, fixturePath: tmpDir },
+    });
+
+    // Send streaming Ollama request (stream defaults to true)
+    const resp = await post(`${recorder.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "hello ollama" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    // Check saved fixture
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    expect(fixtureContent.fixtures).toHaveLength(1);
+    const savedResponse = fixtureContent.fixtures[0].response as { content: string };
+    expect(savedResponse.content).toBe("Ollama says hi.");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// buildFixtureResponse format detection
+// ---------------------------------------------------------------------------
+
+describe("buildFixtureResponse format detection", () => {
+  let servers: http.Server[] = [];
+
+  afterEach(async () => {
+    for (const s of servers) {
+      await new Promise<void>((resolve) => s.close(() => resolve()));
+    }
+    servers = [];
+  });
+
+  function createRawUpstreamWithStatus(
+    responseBody: object | string,
+    status: number = 200,
+    contentType: string = "application/json",
+  ): Promise<{ url: string; server: http.Server }> {
+    return new Promise((resolve) => {
+      const srv = http.createServer((_req, res) => {
+        res.writeHead(status, { "Content-Type": contentType });
+        res.end(typeof responseBody === "string" ? responseBody : JSON.stringify(responseBody));
+      });
+      srv.listen(0, "127.0.0.1", () => {
+        const addr = srv.address() as { port: number };
+        servers.push(srv);
+        resolve({ url: `http://127.0.0.1:${addr.port}`, server: srv });
+      });
+    });
+  }
+
+  it("detects Anthropic tool_use format and saves toolCalls", async () => {
+    const { url: upstreamUrl } = await createRawUpstreamWithStatus({
+      content: [
+        {
+          type: "tool_use",
+          id: "toolu_123",
+          name: "get_weather",
+          input: { city: "SF" },
+        },
+      ],
+      role: "assistant",
+      stop_reason: "tool_use",
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { anthropic: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/messages`, {
+      model: "claude-3-sonnet",
+      max_tokens: 100,
+      messages: [{ role: "user", content: "tool use format test" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as {
+      fixtures: Array<{
+        response: {
+          content?: string;
+          toolCalls?: Array<{ name: string; arguments: string }>;
+        };
+      }>;
+    };
+    // Should be toolCalls, NOT content
+    expect(fixtureContent.fixtures[0].response.toolCalls).toBeDefined();
+    expect(fixtureContent.fixtures[0].response.toolCalls).toHaveLength(1);
+    expect(fixtureContent.fixtures[0].response.toolCalls![0].name).toBe("get_weather");
+    expect(JSON.parse(fixtureContent.fixtures[0].response.toolCalls![0].arguments)).toEqual({
+      city: "SF",
+    });
+    expect(fixtureContent.fixtures[0].response.content).toBeUndefined();
+  });
+
+  it("detects Gemini functionCall format and saves toolCalls", async () => {
+    const { url: upstreamUrl } = await createRawUpstreamWithStatus({
+      candidates: [
+        {
+          content: {
+            parts: [
+              {
+                functionCall: {
+                  name: "get_weather",
+                  args: { city: "SF" },
+                },
+              },
+            ],
+          },
+        },
+      ],
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { gemini: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ parts: [{ text: "gemini tool call test" }], role: "user" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as {
+      fixtures: Array<{
+        response: {
+          content?: string;
+          toolCalls?: Array<{ name: string; arguments: string }>;
+        };
+      }>;
+    };
+    expect(fixtureContent.fixtures[0].response.toolCalls).toBeDefined();
+    expect(fixtureContent.fixtures[0].response.toolCalls).toHaveLength(1);
+    expect(fixtureContent.fixtures[0].response.toolCalls![0].name).toBe("get_weather");
+    expect(JSON.parse(fixtureContent.fixtures[0].response.toolCalls![0].arguments)).toEqual({
+      city: "SF",
+    });
+    expect(fixtureContent.fixtures[0].response.content).toBeUndefined();
+  });
+
+  it("unknown format falls back to error response", async () => {
+    const { url: upstreamUrl } = await createRawUpstreamWithStatus({
+      custom: "data",
+      status: "ok",
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "unknown format test" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as {
+      fixtures: Array<{
+        response: {
+          error?: { message: string; type: string };
+        };
+      }>;
+    };
+    expect(fixtureContent.fixtures[0].response.error).toBeDefined();
+    expect(fixtureContent.fixtures[0].response.error!.message).toContain(
+      "Could not detect response format",
+    );
+    expect(fixtureContent.fixtures[0].response.error!.type).toBe("proxy_error");
+  });
+
+  it("detects direct embedding format (top-level embedding array)", async () => {
+    const { url: upstreamUrl } = await createRawUpstreamWithStatus({
+      embedding: [0.1, 0.2, 0.3],
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "direct embedding test",
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as {
+      fixtures: Array<{
+        response: { embedding?: number[] };
+      }>;
+    };
+    expect(fixtureContent.fixtures[0].response.embedding).toEqual([0.1, 0.2, 0.3]);
+  });
+
+  it("preserves error code field from upstream error response", async () => {
+    const { url: upstreamUrl } = await createRawUpstreamWithStatus(
+      {
+        error: {
+          message: "Rate limited",
+          type: "rate_limit_error",
+          code: "rate_limit",
+        },
+      },
+      429,
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "rate limit test" }],
+    });
+
+    expect(resp.status).toBe(429);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as {
+      fixtures: Array<{
+        response: {
+          error?: { message: string; type: string; code?: string };
+          status?: number;
+        };
+      }>;
+    };
+    expect(fixtureContent.fixtures[0].response.error).toBeDefined();
+    expect(fixtureContent.fixtures[0].response.error!.message).toBe("Rate limited");
+    expect(fixtureContent.fixtures[0].response.error!.type).toBe("rate_limit_error");
+    expect(fixtureContent.fixtures[0].response.error!.code).toBe("rate_limit");
+    expect(fixtureContent.fixtures[0].response.status).toBe(429);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Bedrock EventStream binary through recorder
+// ---------------------------------------------------------------------------
+
+describe("recorder Bedrock EventStream binary", () => {
+  it("collapses Bedrock binary EventStream to text fixture", async () => {
+    // Create a raw upstream returning application/vnd.amazon.eventstream binary
+    const rawServer = http.createServer((_req, res) => {
+      res.writeHead(200, { "Content-Type": "application/vnd.amazon.eventstream" });
+
+      // Write binary EventStream frames using encodeEventStreamMessage
+      const frame1 = encodeEventStreamMessage("contentBlockDelta", {
+        contentBlockDelta: {
+          delta: { text: "Hello " },
+          contentBlockIndex: 0,
+        },
+        contentBlockIndex: 0,
+      });
+      const frame2 = encodeEventStreamMessage("contentBlockDelta", {
+        contentBlockDelta: {
+          delta: { text: "from Bedrock" },
+          contentBlockIndex: 0,
+        },
+        contentBlockIndex: 0,
+      });
+      const frame3 = encodeEventStreamMessage("messageStop", {
+        messageStop: { stopReason: "end_turn" },
+      });
+
+      res.write(frame1);
+      res.write(frame2);
+      res.write(frame3);
+      res.end();
+    });
+    await new Promise<void>((resolve) => rawServer.listen(0, "127.0.0.1", resolve));
+    const rawAddr = rawServer.address() as { port: number };
+    const rawUrl = `http://127.0.0.1:${rawAddr.port}`;
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { bedrock: rawUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/model/claude-v3/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 100,
+      messages: [{ role: "user", content: "bedrock binary test" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as { fixtures: Array<{ response: { content?: string } }> };
+    expect(fixtureContent.fixtures[0].response.content).toBe("Hello from Bedrock");
+
+    await new Promise<void>((resolve) => rawServer.close(() => resolve()));
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Streaming edge cases — droppedChunks and content+toolCalls coexistence
+// ---------------------------------------------------------------------------
+
+describe("recorder streaming edge cases", () => {
+  let servers: http.Server[] = [];
+
+  afterEach(async () => {
+    for (const s of servers) {
+      await new Promise<void>((resolve) => s.close(() => resolve()));
+    }
+    servers = [];
+  });
+
+  it("streaming with malformed chunks: fixture still saved with surviving content", async () => {
+    // Create a raw upstream that returns SSE with malformed chunks mixed in
+    const rawServer = http.createServer((_req, res) => {
+      res.writeHead(200, { "Content-Type": "text/event-stream" });
+      res.write(
+        `data: ${JSON.stringify({ id: "c1", choices: [{ delta: { content: "Hello" } }] })}\n\n`,
+      );
+      res.write(`data: {MALFORMED JSON!!!\n\n`);
+      res.write(
+        `data: ${JSON.stringify({ id: "c1", choices: [{ delta: { content: " World" } }] })}\n\n`,
+      );
+      res.write(`data: [DONE]\n\n`);
+      res.end();
+    });
+    servers.push(rawServer);
+    await new Promise<void>((resolve) => rawServer.listen(0, "127.0.0.1", resolve));
+    const rawAddr = rawServer.address() as { port: number };
+    const rawUrl = `http://127.0.0.1:${rawAddr.port}`;
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: rawUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "droppedchunks test" }],
+      stream: true,
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    const savedResponse = fixtureContent.fixtures[0].response as { content: string };
+    // Surviving content from non-malformed chunks
+    expect(savedResponse.content).toBe("Hello World");
+  });
+
+  it("streaming with content + toolCalls: fixture saves toolCalls (not content)", async () => {
+    // Create a raw upstream that returns SSE with both text and tool call deltas
+    const rawServer = http.createServer((_req, res) => {
+      res.writeHead(200, { "Content-Type": "text/event-stream" });
+      res.write(
+        `data: ${JSON.stringify({
+          id: "c1",
+          choices: [{ delta: { content: "Calling tool..." } }],
+        })}\n\n`,
+      );
+      res.write(
+        `data: ${JSON.stringify({
+          id: "c1",
+          choices: [
+            {
+              delta: {
+                tool_calls: [
+                  {
+                    index: 0,
+                    id: "call_abc",
+                    type: "function",
+                    function: { name: "get_weather", arguments: '{"city":"SF"}' },
+                  },
+                ],
+              },
+            },
+          ],
+        })}\n\n`,
+      );
+      res.write(`data: [DONE]\n\n`);
+      res.end();
+    });
+    servers.push(rawServer);
+    await new Promise<void>((resolve) => rawServer.listen(0, "127.0.0.1", resolve));
+    const rawAddr = rawServer.address() as { port: number };
+    const rawUrl = `http://127.0.0.1:${rawAddr.port}`;
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: rawUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "content+tools test" }],
+      stream: true,
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    const savedResponse = fixtureContent.fixtures[0].response as {
+      toolCalls?: Array<{ name: string; arguments: string }>;
+      content?: string;
+    };
+    // When toolCalls exist, they win over content
+    expect(savedResponse.toolCalls).toBeDefined();
+    expect(savedResponse.toolCalls).toHaveLength(1);
+    expect(savedResponse.toolCalls![0].name).toBe("get_weather");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function createMockReqRes(): { req: http.IncomingMessage; res: http.ServerResponse } {
+  // Create minimal mock objects — only needed for type compatibility,
+  // proxyAndRecord returns false before touching them in these test cases
+  const req = Object.create(http.IncomingMessage.prototype) as http.IncomingMessage;
+  req.headers = {};
+  const res = Object.create(http.ServerResponse.prototype) as http.ServerResponse;
+  return { req, res };
+}
+
+async function setupUpstreamAndRecorder(
+  upstreamFixtures: Fixture[],
+  providerKey: string = "openai",
+): Promise<{ upstreamUrl: string; recorderUrl: string; fixturePath: string }> {
+  // Create upstream "real API" server
+  upstream = await createServer(upstreamFixtures, { port: 0 });
+
+  // Create temp directory for recorded fixtures
+  tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+
+  // Create recording llmock (no fixtures — everything proxies)
+  const providers: Record<string, string> = {};
+  providers[providerKey] = upstream.url;
+
+  recorder = await createServer([], {
+    port: 0,
+    record: { providers, fixturePath: tmpDir },
+  });
+
+  return {
+    upstreamUrl: upstream.url,
+    recorderUrl: recorder.url,
+    fixturePath: tmpDir,
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Body accumulation timeout
+// ---------------------------------------------------------------------------
+
+describe("makeUpstreamRequest body timeout", () => {
+  let fastRawServer: http.Server | undefined;
+
+  afterEach(async () => {
+    if (fastRawServer) {
+      await new Promise<void>((resolve) => fastRawServer!.close(() => resolve()));
+      fastRawServer = undefined;
+    }
+  });
+
+  it("calls res.setTimeout on the upstream IncomingMessage for body accumulation guard", async () => {
+    // Fast upstream that responds immediately — we just want to verify setTimeout is called
+    fastRawServer = http.createServer((_req, res) => {
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(
+        JSON.stringify({
+          choices: [{ message: { content: "ok", role: "assistant" }, finish_reason: "stop" }],
+        }),
+      );
+    });
+    await new Promise<void>((resolve) => fastRawServer!.listen(0, "127.0.0.1", resolve));
+    const { port } = fastRawServer!.address() as { port: number };
+
+    const setTimeoutSpy = vi.spyOn(http.IncomingMessage.prototype, "setTimeout");
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-timeout-"));
+    const record: RecordConfig = {
+      providers: { openai: `http://127.0.0.1:${port}` },
+      fixturePath: tmpDir,
+    };
+    const logger = new Logger("silent");
+    const fixtures: Fixture[] = [];
+
+    const { req, res } = createMockReqRes();
+    // Provide a minimal writable res so proxyAndRecord can write the response
+    const chunks: Buffer[] = [];
+    Object.assign(res, {
+      writeHead: () => res,
+      end: (data?: Buffer | string) => {
+        if (data) chunks.push(typeof data === "string" ? Buffer.from(data) : data);
+        return res;
+      },
+      setHeader: () => res,
+    });
+
+    await proxyAndRecord(
+      req,
+      res,
+      { model: "gpt-4", messages: [{ role: "user", content: "hello" }] },
+      "openai",
+      "/v1/chat/completions",
+      fixtures,
+      { record, logger },
+    );
+
+    // Verify res.setTimeout was called with the 30-second body accumulation timeout
+    expect(setTimeoutSpy).toHaveBeenCalledWith(30_000, expect.any(Function));
+    setTimeoutSpy.mockRestore();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Binary EventStream relay preserves data integrity
+// ---------------------------------------------------------------------------
+
+describe("recorder binary EventStream relay integrity", () => {
+  let rawServer: http.Server | undefined;
+
+  afterEach(async () => {
+    if (rawServer) {
+      await new Promise<void>((resolve) => rawServer!.close(() => resolve()));
+      rawServer = undefined;
+    }
+  });
+
+  it("relays binary EventStream data that can be decoded back to original content", async () => {
+    // Build a known binary EventStream payload upstream
+    const frame1 = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: {
+        delta: { text: "Binary " },
+        contentBlockIndex: 0,
+      },
+      contentBlockIndex: 0,
+    });
+    const frame2 = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: {
+        delta: { text: "integrity " },
+        contentBlockIndex: 0,
+      },
+      contentBlockIndex: 0,
+    });
+    const frame3 = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: {
+        delta: { text: "test" },
+        contentBlockIndex: 0,
+      },
+      contentBlockIndex: 0,
+    });
+    const frame4 = encodeEventStreamMessage("messageStop", {
+      messageStop: { stopReason: "end_turn" },
+    });
+
+    const expectedPayload = Buffer.concat([frame1, frame2, frame3, frame4]);
+
+    // Create raw upstream that returns binary EventStream
+    rawServer = http.createServer((_req, res) => {
+      res.writeHead(200, { "Content-Type": "application/vnd.amazon.eventstream" });
+      res.end(expectedPayload);
+    });
+    await new Promise<void>((resolve) => rawServer!.listen(0, "127.0.0.1", resolve));
+    const rawAddr = rawServer!.address() as { port: number };
+    const rawUrl = `http://127.0.0.1:${rawAddr.port}`;
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { bedrock: rawUrl }, fixturePath: tmpDir },
+    });
+
+    // Make the request through the recorder proxy
+    const resp = await post(`${recorder.url}/model/claude-v3/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 100,
+      messages: [{ role: "user", content: "binary integrity test" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    // The relayed response body should contain the text from the EventStream
+    // frames. The relay currently converts Buffer to string, so we verify
+    // the content is present in the response.
+    // NOTE: If the relay preserves raw binary, the response body should
+    // contain text extractable from the EventStream frames.
+    expect(resp.body.length).toBeGreaterThan(0);
+
+    // Verify the fixture was saved correctly on disk
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as { fixtures: Array<{ response: { content?: string } }> };
+    expect(fixtureContent.fixtures[0].response.content).toBe("Binary integrity test");
+  });
+});
diff --git a/src/__tests__/responses.test.ts b/src/__tests__/responses.test.ts
index 85a088a..370c341 100644
--- a/src/__tests__/responses.test.ts
+++ b/src/__tests__/responses.test.ts
@@ -356,6 +356,7 @@ describe("POST /v1/responses (streaming)", () => {
     const res = await post(`${instance.url}/v1/responses`, {
       model: "gpt-4",
       input: [{ role: "user", content: "hello" }],
+      stream: true,
     });
 
     expect(res.status).toBe(200);
@@ -381,6 +382,7 @@ describe("POST /v1/responses (streaming)", () => {
     const res = await post(`${instance.url}/v1/responses`, {
       model: "gpt-4",
       input: [{ role: "user", content: "hello" }],
+      stream: true,
     });
 
     const events = parseResponsesSSEEvents(res.body);
@@ -397,6 +399,7 @@ describe("POST /v1/responses (streaming)", () => {
     const res = await post(`${instance.url}/v1/responses`, {
       model: "gpt-4",
       input: [{ role: "user", content: "hello" }],
+      stream: true,
     });
 
     const events = parseResponsesSSEEvents(res.body);
@@ -410,6 +413,7 @@ describe("POST /v1/responses (streaming)", () => {
     const res = await post(`${instance.url}/v1/responses`, {
       model: "gpt-4",
       input: [{ role: "user", content: "hello" }],
+      stream: true,
     });
 
     const events = parseResponsesSSEEvents(res.body);
@@ -426,6 +430,7 @@ describe("POST /v1/responses (streaming)", () => {
     const res = await post(`${instance.url}/v1/responses`, {
       model: "gpt-4",
       input: [{ role: "user", content: "weather" }],
+      stream: true,
     });
 
     expect(res.status).toBe(200);
@@ -445,6 +450,7 @@ describe("POST /v1/responses (streaming)", () => {
     const res = await post(`${instance.url}/v1/responses`, {
       model: "gpt-4",
       input: [{ role: "user", content: "weather" }],
+      stream: true,
     });
 
     const events = parseResponsesSSEEvents(res.body);
@@ -461,6 +467,7 @@ describe("POST /v1/responses (streaming)", () => {
     const res = await post(`${instance.url}/v1/responses`, {
       model: "gpt-4",
       input: [{ role: "user", content: "weather" }],
+      stream: true,
     });
 
     const events = parseResponsesSSEEvents(res.body);
@@ -474,6 +481,7 @@ describe("POST /v1/responses (streaming)", () => {
     const res = await post(`${instance.url}/v1/responses`, {
       model: "gpt-4",
       input: [{ role: "user", content: "multi-tool" }],
+      stream: true,
     });
 
     const events = parseResponsesSSEEvents(res.body);
@@ -500,6 +508,7 @@ describe("POST /v1/responses (streaming)", () => {
     const res = await post(`${instance.url}/v1/responses`, {
       model: "gpt-4",
       input: [{ role: "user", content: "bigchunk" }],
+      stream: true,
     });
 
     const events = parseResponsesSSEEvents(res.body);
@@ -567,6 +576,42 @@ describe("POST /v1/responses (non-streaming)", () => {
   });
 });
 
+describe("POST /v1/responses (default non-streaming)", () => {
+  it("returns JSON response when stream field is omitted", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/responses`, {
+      model: "gpt-4",
+      input: [{ role: "user", content: "hello" }],
+      // stream field intentionally omitted
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/json");
+
+    const body = JSON.parse(res.body);
+    expect(body.object).toBe("response");
+    expect(body.status).toBe("completed");
+    expect(body.output[0].content[0].text).toBe("Hi there!");
+  });
+
+  it("returns JSON tool call response when stream field is omitted", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/responses`, {
+      model: "gpt-4",
+      input: [{ role: "user", content: "weather" }],
+      // stream field intentionally omitted
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/json");
+
+    const body = JSON.parse(res.body);
+    expect(body.object).toBe("response");
+    expect(body.output[0].type).toBe("function_call");
+    expect(body.output[0].name).toBe("get_weather");
+  });
+});
+
 describe("POST /v1/responses (error handling)", () => {
   it("returns error fixture with correct status", async () => {
     instance = await createServer(allFixtures);
diff --git a/src/__tests__/router.test.ts b/src/__tests__/router.test.ts
index 4430380..4f1ad3c 100644
--- a/src/__tests__/router.test.ts
+++ b/src/__tests__/router.test.ts
@@ -1,6 +1,6 @@
 import { describe, it, expect } from "vitest";
-import { matchFixture, getLastMessageByRole } from "../router.js";
-import type { ChatCompletionRequest, ChatMessage, Fixture } from "../types.js";
+import { matchFixture, getLastMessageByRole, getTextContent } from "../router.js";
+import type { ChatCompletionRequest, ChatMessage, ContentPart, Fixture } from "../types.js";
 
 // ---------------------------------------------------------------------------
 // Helpers
@@ -54,6 +54,57 @@ describe("getLastMessageByRole", () => {
   });
 });
 
+// ---------------------------------------------------------------------------
+// getTextContent
+// ---------------------------------------------------------------------------
+
+describe("getTextContent", () => {
+  it("returns the string as-is for string content", () => {
+    expect(getTextContent("hello world")).toBe("hello world");
+  });
+
+  it("returns null for null content", () => {
+    expect(getTextContent(null)).toBeNull();
+  });
+
+  it("extracts text from array-of-parts content", () => {
+    const parts: ContentPart[] = [{ type: "text", text: "hello world" }];
+    expect(getTextContent(parts)).toBe("hello world");
+  });
+
+  it("concatenates multiple text parts", () => {
+    const parts: ContentPart[] = [
+      { type: "text", text: "hello " },
+      { type: "text", text: "world" },
+    ];
+    expect(getTextContent(parts)).toBe("hello world");
+  });
+
+  it("ignores non-text parts in array content", () => {
+    const parts: ContentPart[] = [
+      { type: "image_url", image_url: { url: "https://example.com/img.png" } },
+      { type: "text", text: "describe this" },
+    ];
+    expect(getTextContent(parts)).toBe("describe this");
+  });
+
+  it("returns null for array with no text parts", () => {
+    const parts: ContentPart[] = [
+      { type: "image_url", image_url: { url: "https://example.com/img.png" } },
+    ];
+    expect(getTextContent(parts)).toBeNull();
+  });
+
+  it("returns null for empty array", () => {
+    expect(getTextContent([])).toBeNull();
+  });
+
+  it("returns null for array with only empty-string text parts", () => {
+    const parts: ContentPart[] = [{ type: "text", text: "" }];
+    expect(getTextContent(parts)).toBeNull();
+  });
+});
+
 // ---------------------------------------------------------------------------
 // matchFixture — empty / null cases
 // ---------------------------------------------------------------------------
@@ -106,6 +157,61 @@ describe("matchFixture — userMessage (string)", () => {
   });
 });
 
+describe("matchFixture — userMessage (array content)", () => {
+  it("matches when user content is array-of-parts with matching text", () => {
+    const fixture = makeFixture({ userMessage: "hello" });
+    const req = makeReq({
+      messages: [{ role: "user", content: [{ type: "text", text: "say hello world" }] }],
+    });
+    expect(matchFixture([fixture], req)).toBe(fixture);
+  });
+
+  it("does not match when array-of-parts text does not include the string", () => {
+    const fixture = makeFixture({ userMessage: "goodbye" });
+    const req = makeReq({
+      messages: [{ role: "user", content: [{ type: "text", text: "hello" }] }],
+    });
+    expect(matchFixture([fixture], req)).toBeNull();
+  });
+
+  it("matches regexp against array-of-parts text", () => {
+    const fixture = makeFixture({ userMessage: /^hello/i });
+    const req = makeReq({
+      messages: [{ role: "user", content: [{ type: "text", text: "Hello world" }] }],
+    });
+    expect(matchFixture([fixture], req)).toBe(fixture);
+  });
+
+  it("concatenates multiple text parts for matching", () => {
+    const fixture = makeFixture({ userMessage: "hello world" });
+    const req = makeReq({
+      messages: [
+        {
+          role: "user",
+          content: [
+            { type: "text", text: "hello " },
+            { type: "text", text: "world" },
+          ],
+        },
+      ],
+    });
+    expect(matchFixture([fixture], req)).toBe(fixture);
+  });
+
+  it("skips array content with no text parts", () => {
+    const fixture = makeFixture({ userMessage: "hello" });
+    const req = makeReq({
+      messages: [
+        {
+          role: "user",
+          content: [{ type: "image_url", image_url: { url: "https://example.com" } }],
+        },
+      ],
+    });
+    expect(matchFixture([fixture], req)).toBeNull();
+  });
+});
+
 describe("matchFixture — userMessage (RegExp)", () => {
   it("matches when the last user message satisfies the regexp", () => {
     const fixture = makeFixture({ userMessage: /^hello/i });
@@ -311,6 +417,171 @@ describe("matchFixture — AND logic", () => {
   });
 });
 
+// ---------------------------------------------------------------------------
+// matchFixture — inputText (embedding matching)
+// ---------------------------------------------------------------------------
+
+describe("matchFixture — inputText (string)", () => {
+  it("matches when embeddingInput includes the string", () => {
+    const fixture = makeFixture({ inputText: "hello" });
+    const req = { ...makeReq(), embeddingInput: "say hello world" } as ChatCompletionRequest & {
+      embeddingInput: string;
+    };
+    expect(matchFixture([fixture], req)).toBe(fixture);
+  });
+
+  it("does not match when embeddingInput does not include the string", () => {
+    const fixture = makeFixture({ inputText: "goodbye" });
+    const req = { ...makeReq(), embeddingInput: "hello" } as ChatCompletionRequest & {
+      embeddingInput: string;
+    };
+    expect(matchFixture([fixture], req)).toBeNull();
+  });
+
+  it("does not match when embeddingInput is not present", () => {
+    const fixture = makeFixture({ inputText: "hello" });
+    expect(matchFixture([fixture], makeReq())).toBeNull();
+  });
+});
+
+describe("matchFixture — inputText (RegExp)", () => {
+  it("matches when embeddingInput satisfies the regexp", () => {
+    const fixture = makeFixture({ inputText: /^hello/i });
+    const req = { ...makeReq(), embeddingInput: "Hello world" } as ChatCompletionRequest & {
+      embeddingInput: string;
+    };
+    expect(matchFixture([fixture], req)).toBe(fixture);
+  });
+
+  it("does not match when the regexp does not match", () => {
+    const fixture = makeFixture({ inputText: /^goodbye/i });
+    const req = { ...makeReq(), embeddingInput: "hello world" } as ChatCompletionRequest & {
+      embeddingInput: string;
+    };
+    expect(matchFixture([fixture], req)).toBeNull();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// matchFixture — responseFormat
+// ---------------------------------------------------------------------------
+
+describe("matchFixture — responseFormat", () => {
+  it("matches when response_format.type equals the fixture responseFormat", () => {
+    const fixture = makeFixture({ responseFormat: "json_object" });
+    const req = makeReq({ response_format: { type: "json_object" } });
+    expect(matchFixture([fixture], req)).toBe(fixture);
+  });
+
+  it("does not match when response_format.type differs", () => {
+    const fixture = makeFixture({ responseFormat: "json_object" });
+    const req = makeReq({ response_format: { type: "text" } });
+    expect(matchFixture([fixture], req)).toBeNull();
+  });
+
+  it("does not match when response_format is not present in the request", () => {
+    const fixture = makeFixture({ responseFormat: "json_object" });
+    const req = makeReq();
+    expect(matchFixture([fixture], req)).toBeNull();
+  });
+
+  it("matches json_schema type", () => {
+    const fixture = makeFixture({ responseFormat: "json_schema" });
+    const req = makeReq({
+      response_format: { type: "json_schema", json_schema: { name: "test" } },
+    });
+    expect(matchFixture([fixture], req)).toBe(fixture);
+  });
+
+  it("combines with userMessage using AND logic", () => {
+    const fixture = makeFixture({ userMessage: "hello", responseFormat: "json_object" });
+    const matchingReq = makeReq({
+      messages: [{ role: "user", content: "hello world" }],
+      response_format: { type: "json_object" },
+    });
+    const wrongFormat = makeReq({
+      messages: [{ role: "user", content: "hello world" }],
+    });
+    const wrongMessage = makeReq({
+      messages: [{ role: "user", content: "goodbye" }],
+      response_format: { type: "json_object" },
+    });
+
+    expect(matchFixture([fixture], matchingReq)).toBe(fixture);
+    expect(matchFixture([fixture], wrongFormat)).toBeNull();
+    expect(matchFixture([fixture], wrongMessage)).toBeNull();
+  });
+
+  it("fixture without responseFormat matches requests with or without response_format", () => {
+    const fixture = makeFixture({ userMessage: "hello" });
+    const withFormat = makeReq({
+      messages: [{ role: "user", content: "hello" }],
+      response_format: { type: "json_object" },
+    });
+    const withoutFormat = makeReq({
+      messages: [{ role: "user", content: "hello" }],
+    });
+
+    expect(matchFixture([fixture], withFormat)).toBe(fixture);
+    expect(matchFixture([fixture], withoutFormat)).toBe(fixture);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// matchFixture — sequenceIndex
+// ---------------------------------------------------------------------------
+
+describe("matchFixture — sequenceIndex", () => {
+  it("matches when matchCounts equals sequenceIndex", () => {
+    const fixture = makeFixture({ userMessage: "hello", sequenceIndex: 0 });
+    const counts = new Map<Fixture, number>();
+    const req = makeReq({ messages: [{ role: "user", content: "hello" }] });
+    expect(matchFixture([fixture], req, counts)).toBe(fixture);
+  });
+
+  it("skips when matchCounts does not equal sequenceIndex", () => {
+    const fixture = makeFixture({ userMessage: "hello", sequenceIndex: 0 });
+    const counts = new Map<Fixture, number>([[fixture, 1]]);
+    const req = makeReq({ messages: [{ role: "user", content: "hello" }] });
+    expect(matchFixture([fixture], req, counts)).toBeNull();
+  });
+
+  it("falls through to next fixture when sequenceIndex does not match", () => {
+    const seq0 = makeFixture({ userMessage: "hello", sequenceIndex: 0 }, { content: "first" });
+    const fallback = makeFixture({ userMessage: "hello" }, { content: "fallback" });
+    const counts = new Map<Fixture, number>([[seq0, 1]]);
+    const req = makeReq({ messages: [{ role: "user", content: "hello" }] });
+    expect(matchFixture([seq0, fallback], req, counts)).toBe(fallback);
+  });
+
+  it("matches second fixture in sequence when count is 1", () => {
+    const seq0 = makeFixture({ userMessage: "hello", sequenceIndex: 0 }, { content: "first" });
+    const seq1 = makeFixture({ userMessage: "hello", sequenceIndex: 1 }, { content: "second" });
+    // Both fixtures have count 1 (as they would after the first match increments the group)
+    const counts = new Map<Fixture, number>([
+      [seq0, 1],
+      [seq1, 1],
+    ]);
+    const req = makeReq({ messages: [{ role: "user", content: "hello" }] });
+    // seq0 skipped (count 1 != sequenceIndex 0), seq1 matches (count 1 == sequenceIndex 1)
+    expect(matchFixture([seq0, seq1], req, counts)).toBe(seq1);
+  });
+
+  it("sequenceIndex is ignored when matchCounts is not provided", () => {
+    const fixture = makeFixture({ userMessage: "hello", sequenceIndex: 5 });
+    const req = makeReq({ messages: [{ role: "user", content: "hello" }] });
+    // Without matchCounts, sequenceIndex check is skipped entirely
+    expect(matchFixture([fixture], req)).toBe(fixture);
+  });
+
+  it("undefined sequenceIndex always matches regardless of matchCounts", () => {
+    const fixture = makeFixture({ userMessage: "hello" });
+    const counts = new Map<Fixture, number>([[fixture, 42]]);
+    const req = makeReq({ messages: [{ role: "user", content: "hello" }] });
+    expect(matchFixture([fixture], req, counts)).toBe(fixture);
+  });
+});
+
 // ---------------------------------------------------------------------------
 // matchFixture — first-match-wins
 // ---------------------------------------------------------------------------
@@ -330,3 +601,97 @@ describe("matchFixture — first-match-wins", () => {
     expect(matchFixture([noMatch, match], req)).toBe(match);
   });
 });
+
+// ---------------------------------------------------------------------------
+// matchFixture — requestTransform (4th parameter)
+// ---------------------------------------------------------------------------
+
+describe("matchFixture — requestTransform", () => {
+  const stripSystemMessages = (req: ChatCompletionRequest): ChatCompletionRequest => ({
+    ...req,
+    messages: req.messages.filter((m) => m.role !== "system"),
+  });
+
+  it("uses exact string match (===) when transform is provided", () => {
+    const fixture = makeFixture({ userMessage: "hello" });
+    // Without transform: "say hello world" includes "hello" → match
+    const req = makeReq({ messages: [{ role: "user", content: "say hello world" }] });
+    expect(matchFixture([fixture], req)).toBe(fixture);
+    // With identity transform: "say hello world" !== "hello" → no match
+    expect(matchFixture([fixture], req, undefined, (r) => r)).toBeNull();
+  });
+
+  it("matches exactly when transformed text equals fixture string", () => {
+    const fixture = makeFixture({ userMessage: "hello" });
+    const req = makeReq({ messages: [{ role: "user", content: "hello" }] });
+    expect(matchFixture([fixture], req, undefined, (r) => r)).toBe(fixture);
+  });
+
+  it("applies transform to extract effective request for matching", () => {
+    const fixture = makeFixture({ userMessage: "hello" });
+    const req = makeReq({
+      messages: [
+        { role: "system", content: "you are helpful" },
+        { role: "user", content: "hello" },
+      ],
+    });
+    // Transform strips system messages — user message "hello" === "hello" → match
+    expect(matchFixture([fixture], req, undefined, stripSystemMessages)).toBe(fixture);
+  });
+
+  it("regexp matching still works with transform", () => {
+    const fixture = makeFixture({ userMessage: /^hello/i });
+    const req = makeReq({ messages: [{ role: "user", content: "Hello world" }] });
+    expect(matchFixture([fixture], req, undefined, (r) => r)).toBe(fixture);
+  });
+
+  it("regexp does not match when transform changes the text", () => {
+    const fixture = makeFixture({ userMessage: /^hello/ });
+    const transform = (req: ChatCompletionRequest): ChatCompletionRequest => ({
+      ...req,
+      messages: [{ role: "user", content: "transformed" }],
+    });
+    const req = makeReq({ messages: [{ role: "user", content: "hello world" }] });
+    expect(matchFixture([fixture], req, undefined, transform)).toBeNull();
+  });
+
+  it("transform applies to embedding inputText matching with exact comparison", () => {
+    const fixture = makeFixture({ inputText: "normalized text" });
+    const transform = (req: ChatCompletionRequest): ChatCompletionRequest => ({
+      ...req,
+      embeddingInput: "normalized text",
+    });
+    const req = {
+      ...makeReq(),
+      embeddingInput: "raw input with extra stuff",
+    } as ChatCompletionRequest;
+    // Without transform: "raw input with extra stuff" does not include "normalized text"
+    expect(matchFixture([fixture], req)).toBeNull();
+    // With transform: embeddingInput becomes "normalized text" === "normalized text"
+    expect(matchFixture([fixture], req, undefined, transform)).toBe(fixture);
+  });
+
+  it("without transform preserves includes behavior (backward compat)", () => {
+    const fixture = makeFixture({ userMessage: "hello" });
+    const req = makeReq({ messages: [{ role: "user", content: "say hello world" }] });
+    // No transform → includes match
+    expect(matchFixture([fixture], req)).toBe(fixture);
+  });
+
+  it("predicate receives original (untransformed) request", () => {
+    let capturedReq: ChatCompletionRequest | null = null;
+    const original = makeReq({ model: "gpt-4o", temperature: 0.7 });
+    const fixture = makeFixture({
+      predicate: (r) => {
+        capturedReq = r;
+        return true;
+      },
+    });
+    const transform = (req: ChatCompletionRequest): ChatCompletionRequest => ({
+      ...req,
+      model: "transformed",
+    });
+    matchFixture([fixture], original, undefined, transform);
+    expect(capturedReq).toBe(original);
+  });
+});
diff --git a/src/__tests__/sequence.test.ts b/src/__tests__/sequence.test.ts
new file mode 100644
index 0000000..4f964ef
--- /dev/null
+++ b/src/__tests__/sequence.test.ts
@@ -0,0 +1,686 @@
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+import { LLMock } from "../llmock.js";
+
+// ---------------------------------------------------------------------------
+// Integration tests for sequential / stateful responses (sequenceIndex)
+// ---------------------------------------------------------------------------
+
+describe("sequential responses", () => {
+  let mock: LLMock;
+
+  beforeAll(async () => {
+    mock = new LLMock();
+    await mock.start();
+  });
+
+  afterAll(async () => {
+    await mock.stop();
+  });
+
+  it("basic 2-step sequence: same match returns different responses", async () => {
+    mock.reset();
+    mock.on({ userMessage: "plan", sequenceIndex: 0 }, { content: "Step 1: planning..." });
+    mock.on({ userMessage: "plan", sequenceIndex: 1 }, { content: "Step 2: done!" });
+
+    // First request matching "plan" → first response
+    const res1 = await fetch(`${mock.url}/v1/chat/completions`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4",
+        messages: [{ role: "user", content: "plan" }],
+        stream: false,
+      }),
+    });
+    expect(res1.status).toBe(200);
+    const body1 = (await res1.json()) as { choices: { message: { content: string } }[] };
+    expect(body1.choices[0].message.content).toBe("Step 1: planning...");
+
+    // Second request matching "plan" → second response
+    const res2 = await fetch(`${mock.url}/v1/chat/completions`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4",
+        messages: [{ role: "user", content: "plan" }],
+        stream: false,
+      }),
+    });
+    expect(res2.status).toBe(200);
+    const body2 = (await res2.json()) as { choices: { message: { content: string } }[] };
+    expect(body2.choices[0].message.content).toBe("Step 2: done!");
+  });
+
+  it("3-step sequence", async () => {
+    mock.reset();
+    mock.on({ userMessage: "go", sequenceIndex: 0 }, { content: "first" });
+    mock.on({ userMessage: "go", sequenceIndex: 1 }, { content: "second" });
+    mock.on({ userMessage: "go", sequenceIndex: 2 }, { content: "third" });
+
+    const responses: string[] = [];
+    for (let i = 0; i < 3; i++) {
+      const res = await fetch(`${mock.url}/v1/chat/completions`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({
+          model: "gpt-4",
+          messages: [{ role: "user", content: "go" }],
+          stream: false,
+        }),
+      });
+      const body = (await res.json()) as { choices: { message: { content: string } }[] };
+      responses.push(body.choices[0].message.content);
+    }
+    expect(responses).toEqual(["first", "second", "third"]);
+  });
+
+  it("sequence with different match criteria does not interfere", async () => {
+    mock.reset();
+    mock.on({ userMessage: "alpha", sequenceIndex: 0 }, { content: "alpha-0" });
+    mock.on({ userMessage: "alpha", sequenceIndex: 1 }, { content: "alpha-1" });
+    mock.on({ userMessage: "beta", sequenceIndex: 0 }, { content: "beta-0" });
+
+    // Hit alpha once
+    const res1 = await fetch(`${mock.url}/v1/chat/completions`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4",
+        messages: [{ role: "user", content: "alpha" }],
+        stream: false,
+      }),
+    });
+    const body1 = (await res1.json()) as { choices: { message: { content: string } }[] };
+    expect(body1.choices[0].message.content).toBe("alpha-0");
+
+    // Hit beta — should be at sequenceIndex 0, not affected by alpha's count
+    const res2 = await fetch(`${mock.url}/v1/chat/completions`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4",
+        messages: [{ role: "user", content: "beta" }],
+        stream: false,
+      }),
+    });
+    const body2 = (await res2.json()) as { choices: { message: { content: string } }[] };
+    expect(body2.choices[0].message.content).toBe("beta-0");
+
+    // Hit alpha again — should be at sequenceIndex 1
+    const res3 = await fetch(`${mock.url}/v1/chat/completions`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4",
+        messages: [{ role: "user", content: "alpha" }],
+        stream: false,
+      }),
+    });
+    const body3 = (await res3.json()) as { choices: { message: { content: string } }[] };
+    expect(body3.choices[0].message.content).toBe("alpha-1");
+  });
+
+  it("sequence index out of bounds falls through to next fixture", async () => {
+    mock.reset();
+    mock.on({ userMessage: "once", sequenceIndex: 0 }, { content: "only-first-time" });
+    // Fallback for any subsequent matches
+    mock.on({ userMessage: "once" }, { content: "fallback" });
+
+    const res1 = await fetch(`${mock.url}/v1/chat/completions`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4",
+        messages: [{ role: "user", content: "once" }],
+        stream: false,
+      }),
+    });
+    const body1 = (await res1.json()) as { choices: { message: { content: string } }[] };
+    expect(body1.choices[0].message.content).toBe("only-first-time");
+
+    // Second request: sequenceIndex 0 won't match (count is now 1), falls to fallback
+    const res2 = await fetch(`${mock.url}/v1/chat/completions`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4",
+        messages: [{ role: "user", content: "once" }],
+        stream: false,
+      }),
+    });
+    const body2 = (await res2.json()) as { choices: { message: { content: string } }[] };
+    expect(body2.choices[0].message.content).toBe("fallback");
+  });
+
+  it("sequenceIndex undefined matches any occurrence (backward compat)", async () => {
+    mock.reset();
+    mock.on({ userMessage: "always" }, { content: "same-every-time" });
+
+    for (let i = 0; i < 3; i++) {
+      const res = await fetch(`${mock.url}/v1/chat/completions`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({
+          model: "gpt-4",
+          messages: [{ role: "user", content: "always" }],
+          stream: false,
+        }),
+      });
+      const body = (await res.json()) as { choices: { message: { content: string } }[] };
+      expect(body.choices[0].message.content).toBe("same-every-time");
+    }
+  });
+
+  it("streaming sequence returns different streamed content on each call", async () => {
+    mock.reset();
+    mock.on({ userMessage: "stream-seq", sequenceIndex: 0 }, { content: "stream-first" });
+    mock.on({ userMessage: "stream-seq", sequenceIndex: 1 }, { content: "stream-second" });
+
+    // First streaming request
+    const res1 = await fetch(`${mock.url}/v1/chat/completions`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4",
+        messages: [{ role: "user", content: "stream-seq" }],
+        stream: true,
+      }),
+    });
+    expect(res1.status).toBe(200);
+    const text1 = await res1.text();
+    expect(text1).toContain("stream-first");
+
+    // Second streaming request
+    const res2 = await fetch(`${mock.url}/v1/chat/completions`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4",
+        messages: [{ role: "user", content: "stream-seq" }],
+        stream: true,
+      }),
+    });
+    expect(res2.status).toBe(200);
+    const text2 = await res2.text();
+    expect(text2).toContain("stream-second");
+  });
+
+  it("sequence works across Responses API endpoint", async () => {
+    mock.reset();
+    mock.on({ userMessage: "resp-seq", sequenceIndex: 0 }, { content: "resp-first" });
+    mock.on({ userMessage: "resp-seq", sequenceIndex: 1 }, { content: "resp-second" });
+
+    // First via Responses API
+    const res1 = await fetch(`${mock.url}/v1/responses`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4",
+        input: [{ role: "user", content: "resp-seq" }],
+        stream: false,
+      }),
+    });
+    expect(res1.status).toBe(200);
+    const body1 = (await res1.json()) as { output: { content: { text: string }[] }[] };
+    expect(body1.output[0].content[0].text).toBe("resp-first");
+
+    // Second via Responses API
+    const res2 = await fetch(`${mock.url}/v1/responses`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4",
+        input: [{ role: "user", content: "resp-seq" }],
+        stream: false,
+      }),
+    });
+    expect(res2.status).toBe(200);
+    const body2 = (await res2.json()) as { output: { content: { text: string }[] }[] };
+    expect(body2.output[0].content[0].text).toBe("resp-second");
+  });
+
+  it("journal match counts reset on reset()", async () => {
+    mock.reset();
+    mock.on({ userMessage: "count", sequenceIndex: 0 }, { content: "first" });
+    mock.on({ userMessage: "count", sequenceIndex: 1 }, { content: "second" });
+
+    // First request
+    const res1 = await fetch(`${mock.url}/v1/chat/completions`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4",
+        messages: [{ role: "user", content: "count" }],
+        stream: false,
+      }),
+    });
+    const body1 = (await res1.json()) as { choices: { message: { content: string } }[] };
+    expect(body1.choices[0].message.content).toBe("first");
+
+    // Reset and re-add the same fixtures
+    mock.reset();
+    mock.on({ userMessage: "count", sequenceIndex: 0 }, { content: "first" });
+    mock.on({ userMessage: "count", sequenceIndex: 1 }, { content: "second" });
+
+    // After reset, the count should be back to 0 — first request should match sequenceIndex 0 again
+    const res2 = await fetch(`${mock.url}/v1/chat/completions`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4",
+        messages: [{ role: "user", content: "count" }],
+        stream: false,
+      }),
+    });
+    const body2 = (await res2.json()) as { choices: { message: { content: string } }[] };
+    expect(body2.choices[0].message.content).toBe("first");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Helper for non-streaming OpenAI chat completions POST
+// ---------------------------------------------------------------------------
+
+async function chatPost(
+  baseUrl: string,
+  userContent: string,
+  extra: Record<string, unknown> = {},
+): Promise<{ status: number; body: string }> {
+  const res = await fetch(`${baseUrl}/v1/chat/completions`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      model: "gpt-4",
+      messages: [{ role: "user", content: userContent }],
+      stream: false,
+      ...extra,
+    }),
+  });
+  return { status: res.status, body: await res.text() };
+}
+
+// ---------------------------------------------------------------------------
+// 1. Sequential error responses
+// ---------------------------------------------------------------------------
+
+describe("sequential error responses", () => {
+  let mock: LLMock;
+
+  beforeAll(async () => {
+    mock = new LLMock();
+    await mock.start();
+  });
+
+  afterAll(async () => {
+    await mock.stop();
+  });
+
+  it("step 0 returns text, step 1 returns a 429 error", async () => {
+    mock.reset();
+    mock.on({ userMessage: "seq-err", sequenceIndex: 0 }, { content: "Success response" });
+    mock.on(
+      { userMessage: "seq-err", sequenceIndex: 1 },
+      {
+        error: { message: "Rate limited", type: "rate_limit_error", code: "rate_limit" },
+        status: 429,
+      },
+    );
+
+    // First request — should succeed
+    const r1 = await chatPost(mock.url, "seq-err");
+    expect(r1.status).toBe(200);
+    const b1 = JSON.parse(r1.body);
+    expect(b1.choices[0].message.content).toBe("Success response");
+
+    // Second request — should return the error
+    const r2 = await chatPost(mock.url, "seq-err");
+    expect(r2.status).toBe(429);
+    const b2 = JSON.parse(r2.body);
+    expect(b2.error.message).toBe("Rate limited");
+    expect(b2.error.type).toBe("rate_limit_error");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 2. Sequential tool call responses
+// ---------------------------------------------------------------------------
+
+describe("sequential tool call responses", () => {
+  let mock: LLMock;
+
+  beforeAll(async () => {
+    mock = new LLMock();
+    await mock.start();
+  });
+
+  afterAll(async () => {
+    await mock.stop();
+  });
+
+  it("step 0 returns text, step 1 returns a tool call", async () => {
+    mock.reset();
+    mock.on({ userMessage: "seq-tool", sequenceIndex: 0 }, { content: "Thinking..." });
+    mock.on(
+      { userMessage: "seq-tool", sequenceIndex: 1 },
+      {
+        toolCalls: [
+          {
+            name: "get_weather",
+            arguments: '{"city":"NYC"}',
+            id: "call_seq_tool_1",
+          },
+        ],
+      },
+    );
+
+    // First request — text
+    const r1 = await chatPost(mock.url, "seq-tool");
+    expect(r1.status).toBe(200);
+    const b1 = JSON.parse(r1.body);
+    expect(b1.choices[0].message.content).toBe("Thinking...");
+
+    // Second request — tool call
+    const r2 = await chatPost(mock.url, "seq-tool");
+    expect(r2.status).toBe(200);
+    const b2 = JSON.parse(r2.body);
+    const tc = b2.choices[0].message.tool_calls[0];
+    expect(tc.function.name).toBe("get_weather");
+    expect(tc.id).toBe("call_seq_tool_1");
+    expect(JSON.parse(tc.function.arguments)).toEqual({ city: "NYC" });
+    expect(b2.choices[0].finish_reason).toBe("tool_calls");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 3. Skipped sequenceIndex (gap in indices)
+// ---------------------------------------------------------------------------
+
+describe("skipped sequenceIndex (gap in indices)", () => {
+  let mock: LLMock;
+
+  beforeAll(async () => {
+    mock = new LLMock();
+    await mock.start();
+  });
+
+  afterAll(async () => {
+    await mock.stop();
+  });
+
+  it("index 0 matches, missing index 1 falls to fallback, subsequent calls also use fallback", async () => {
+    mock.reset();
+    mock.on({ userMessage: "gap", sequenceIndex: 0 }, { content: "zero" });
+    mock.on({ userMessage: "gap", sequenceIndex: 2 }, { content: "two" });
+    // Fallback with no sequenceIndex — matches any count
+    mock.on({ userMessage: "gap" }, { content: "fallback" });
+
+    // Call 1 → sequenceIndex 0 matches (count goes from 0→1 for all sequenced siblings)
+    const r1 = await chatPost(mock.url, "gap");
+    expect(JSON.parse(r1.body).choices[0].message.content).toBe("zero");
+
+    // Call 2 → count is 1 for sequenced fixtures, no fixture for index 1, falls to fallback
+    const r2 = await chatPost(mock.url, "gap");
+    expect(JSON.parse(r2.body).choices[0].message.content).toBe("fallback");
+
+    // Call 3 → the fallback (non-sequenced) doesn't increment sibling counts,
+    // so sequenceIndex:2 still has count 1, not 2. Falls through to fallback again.
+    const r3 = await chatPost(mock.url, "gap");
+    expect(JSON.parse(r3.body).choices[0].message.content).toBe("fallback");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 4. Anthropic Messages API sequences
+// ---------------------------------------------------------------------------
+
+describe("Anthropic Messages API sequences", () => {
+  let mock: LLMock;
+
+  beforeAll(async () => {
+    mock = new LLMock();
+    await mock.start();
+  });
+
+  afterAll(async () => {
+    await mock.stop();
+  });
+
+  it("2-step sequence on /v1/messages", async () => {
+    mock.reset();
+    mock.on({ userMessage: "anthropic-seq", sequenceIndex: 0 }, { content: "Claude response 1" });
+    mock.on({ userMessage: "anthropic-seq", sequenceIndex: 1 }, { content: "Claude response 2" });
+
+    const anthropicPost = async (msg: string) => {
+      const res = await fetch(`${mock.url}/v1/messages`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({
+          model: "claude-3-5-sonnet-20241022",
+          max_tokens: 1024,
+          messages: [{ role: "user", content: msg }],
+          stream: false,
+        }),
+      });
+      return { status: res.status, body: await res.json() };
+    };
+
+    const r1 = await anthropicPost("anthropic-seq");
+    expect(r1.status).toBe(200);
+    expect((r1.body as { content: { text: string }[] }).content[0].text).toBe("Claude response 1");
+
+    const r2 = await anthropicPost("anthropic-seq");
+    expect(r2.status).toBe(200);
+    expect((r2.body as { content: { text: string }[] }).content[0].text).toBe("Claude response 2");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 5. Gemini API sequences
+// ---------------------------------------------------------------------------
+
+describe("Gemini API sequences", () => {
+  let mock: LLMock;
+
+  beforeAll(async () => {
+    mock = new LLMock();
+    await mock.start();
+  });
+
+  afterAll(async () => {
+    await mock.stop();
+  });
+
+  it("2-step sequence on Gemini generateContent", async () => {
+    mock.reset();
+    mock.on({ userMessage: "gemini-seq", sequenceIndex: 0 }, { content: "Gemini response 1" });
+    mock.on({ userMessage: "gemini-seq", sequenceIndex: 1 }, { content: "Gemini response 2" });
+
+    const geminiPost = async (msg: string) => {
+      const res = await fetch(`${mock.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({
+          contents: [{ role: "user", parts: [{ text: msg }] }],
+        }),
+      });
+      return { status: res.status, body: await res.json() };
+    };
+
+    const r1 = await geminiPost("gemini-seq");
+    expect(r1.status).toBe(200);
+    type GeminiBody = {
+      candidates: { content: { parts: { text: string }[] } }[];
+    };
+    expect((r1.body as GeminiBody).candidates[0].content.parts[0].text).toBe("Gemini response 1");
+
+    const r2 = await geminiPost("gemini-seq");
+    expect(r2.status).toBe(200);
+    expect((r2.body as GeminiBody).candidates[0].content.parts[0].text).toBe("Gemini response 2");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 6. Sequential responses with predicate matching
+// ---------------------------------------------------------------------------
+
+describe("sequential responses with predicate matching", () => {
+  let mock: LLMock;
+
+  beforeAll(async () => {
+    mock = new LLMock();
+    await mock.start();
+  });
+
+  afterAll(async () => {
+    await mock.stop();
+  });
+
+  it("predicate + sequenceIndex work together", async () => {
+    mock.reset();
+    // Use same function reference so matchCriteriaEqual recognizes them as siblings
+    const pred = (req: import("../types.js").ChatCompletionRequest) =>
+      req.model === "gpt-4" && req.temperature === 0.5;
+    mock.on({ predicate: pred, sequenceIndex: 0 }, { content: "predicate-first" });
+    mock.on({ predicate: pred, sequenceIndex: 1 }, { content: "predicate-second" });
+
+    const r1 = await chatPost(mock.url, "anything", { temperature: 0.5 });
+    expect(JSON.parse(r1.body).choices[0].message.content).toBe("predicate-first");
+
+    const r2 = await chatPost(mock.url, "anything", { temperature: 0.5 });
+    expect(JSON.parse(r2.body).choices[0].message.content).toBe("predicate-second");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 7. Sequential responses with model matching
+// ---------------------------------------------------------------------------
+
+describe("sequential responses with model matching", () => {
+  let mock: LLMock;
+
+  beforeAll(async () => {
+    mock = new LLMock();
+    await mock.start();
+  });
+
+  afterAll(async () => {
+    await mock.stop();
+  });
+
+  it("two models each with 2-step sequences that do not interfere", async () => {
+    mock.reset();
+    mock.on(
+      { userMessage: "model-seq", model: "gpt-4", sequenceIndex: 0 },
+      { content: "gpt4-step0" },
+    );
+    mock.on(
+      { userMessage: "model-seq", model: "gpt-4", sequenceIndex: 1 },
+      { content: "gpt4-step1" },
+    );
+    mock.on(
+      { userMessage: "model-seq", model: "gpt-3.5-turbo", sequenceIndex: 0 },
+      { content: "gpt35-step0" },
+    );
+    mock.on(
+      { userMessage: "model-seq", model: "gpt-3.5-turbo", sequenceIndex: 1 },
+      { content: "gpt35-step1" },
+    );
+
+    // Hit gpt-4 first
+    const r1 = await chatPost(mock.url, "model-seq", { model: "gpt-4" });
+    expect(JSON.parse(r1.body).choices[0].message.content).toBe("gpt4-step0");
+
+    // Hit gpt-3.5-turbo — its sequence should be independent
+    const r2 = await chatPost(mock.url, "model-seq", { model: "gpt-3.5-turbo" });
+    expect(JSON.parse(r2.body).choices[0].message.content).toBe("gpt35-step0");
+
+    // Hit gpt-4 again — should be at step 1
+    const r3 = await chatPost(mock.url, "model-seq", { model: "gpt-4" });
+    expect(JSON.parse(r3.body).choices[0].message.content).toBe("gpt4-step1");
+
+    // Hit gpt-3.5-turbo again — should be at step 1
+    const r4 = await chatPost(mock.url, "model-seq", { model: "gpt-3.5-turbo" });
+    expect(JSON.parse(r4.body).choices[0].message.content).toBe("gpt35-step1");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 8. resetMatchCounts preserves fixtures
+// ---------------------------------------------------------------------------
+
+describe("resetMatchCounts preserves fixtures", () => {
+  let mock: LLMock;
+
+  beforeAll(async () => {
+    mock = new LLMock();
+    await mock.start();
+  });
+
+  afterAll(async () => {
+    await mock.stop();
+  });
+
+  it("sequence resets but fixtures remain after resetMatchCounts()", async () => {
+    mock.reset();
+    mock.on({ userMessage: "rmc", sequenceIndex: 0 }, { content: "rmc-first" });
+    mock.on({ userMessage: "rmc", sequenceIndex: 1 }, { content: "rmc-second" });
+
+    // Advance to step 1
+    const r1 = await chatPost(mock.url, "rmc");
+    expect(JSON.parse(r1.body).choices[0].message.content).toBe("rmc-first");
+
+    const r2 = await chatPost(mock.url, "rmc");
+    expect(JSON.parse(r2.body).choices[0].message.content).toBe("rmc-second");
+
+    // Reset match counts only (not fixtures)
+    mock.resetMatchCounts();
+
+    // Fixtures should still be loaded — sequence starts over at step 0
+    const r3 = await chatPost(mock.url, "rmc");
+    expect(JSON.parse(r3.body).choices[0].message.content).toBe("rmc-first");
+
+    const r4 = await chatPost(mock.url, "rmc");
+    expect(JSON.parse(r4.body).choices[0].message.content).toBe("rmc-second");
+
+    // Verify fixtures are still there
+    expect(mock.getFixtures().length).toBe(2);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 9. Concurrent sequential requests
+// ---------------------------------------------------------------------------
+
+describe("concurrent sequential requests", () => {
+  let mock: LLMock;
+
+  beforeAll(async () => {
+    mock = new LLMock();
+    await mock.start();
+  });
+
+  afterAll(async () => {
+    await mock.stop();
+  });
+
+  it("3 concurrent requests against a 3-step sequence all get different responses", async () => {
+    mock.reset();
+    mock.on({ userMessage: "concurrent", sequenceIndex: 0 }, { content: "c-first" });
+    mock.on({ userMessage: "concurrent", sequenceIndex: 1 }, { content: "c-second" });
+    mock.on({ userMessage: "concurrent", sequenceIndex: 2 }, { content: "c-third" });
+
+    const results = await Promise.all([
+      chatPost(mock.url, "concurrent"),
+      chatPost(mock.url, "concurrent"),
+      chatPost(mock.url, "concurrent"),
+    ]);
+
+    const contents = results.map((r) => {
+      expect(r.status).toBe(200);
+      return JSON.parse(r.body).choices[0].message.content as string;
+    });
+
+    // All 3 different responses should appear (order may vary due to concurrency)
+    expect(contents.sort()).toEqual(["c-first", "c-second", "c-third"]);
+  });
+});
diff --git a/src/__tests__/server.test.ts b/src/__tests__/server.test.ts
index 40ebba2..3a61f4d 100644
--- a/src/__tests__/server.test.ts
+++ b/src/__tests__/server.test.ts
@@ -139,6 +139,51 @@ function parseSSEEvents(body: string): unknown[] {
   return events;
 }
 
+// Helper that collects whatever data arrives before the server destroys the
+// connection. Unlike `post()`, it does NOT reject on socket errors — it
+// returns the partial body that was received.
+function postPartial(url: string, body: unknown): Promise<{ body: string; aborted: boolean }> {
+  return new Promise((resolve) => {
+    const data = JSON.stringify(body);
+    const parsed = new URL(url);
+    const chunks: Buffer[] = [];
+    let aborted = false;
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(data),
+        },
+      },
+      (res) => {
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({ body: Buffer.concat(chunks).toString(), aborted });
+        });
+        res.on("error", () => {
+          aborted = true;
+        });
+        res.on("aborted", () => {
+          aborted = true;
+        });
+        res.on("close", () => {
+          resolve({ body: Buffer.concat(chunks).toString(), aborted });
+        });
+      },
+    );
+    req.on("error", () => {
+      aborted = true;
+      resolve({ body: Buffer.concat(chunks).toString(), aborted });
+    });
+    req.write(data);
+    req.end();
+  });
+}
+
 // --- fixtures ---
 
 const textFixture: Fixture = {
@@ -213,6 +258,7 @@ describe("POST /v1/chat/completions", () => {
     const res = await post(`${instance.url}/v1/chat/completions`, {
       model: "gpt-4",
       messages: [{ role: "user", content: "hello" }],
+      stream: true,
     });
 
     expect(res.status).toBe(200);
@@ -242,6 +288,7 @@ describe("POST /v1/chat/completions", () => {
     const res = await post(`${instance.url}/v1/chat/completions`, {
       model: "gpt-4",
       messages: [{ role: "user", content: "weather" }],
+      stream: true,
     });
 
     expect(res.status).toBe(200);
@@ -312,6 +359,7 @@ describe("POST /v1/chat/completions", () => {
     const res = await post(`${instance.url}/v1/chat/completions`, {
       model: "gpt-4",
       messages: [{ role: "user", content: "bigchunk" }],
+      stream: true,
     });
 
     expect(res.status).toBe(200);
@@ -348,6 +396,7 @@ describe("POST /v1/chat/completions", () => {
     const res = await post(`${instance.url}/v1/chat/completions`, {
       model: "gpt-4",
       messages: [{ role: "user", content: "small" }],
+      stream: true,
     });
 
     expect(res.status).toBe(200);
@@ -358,6 +407,73 @@ describe("POST /v1/chat/completions", () => {
   });
 });
 
+describe("POST /v1/chat/completions (error status codes)", () => {
+  it("error status defaults to 500 when status omitted from ErrorResponse", async () => {
+    const noStatusErrorFixture: Fixture = {
+      match: { userMessage: "no-status-error" },
+      response: {
+        error: { message: "Internal failure", type: "server_error" },
+        // status intentionally omitted
+      },
+    };
+    instance = await createServer([noStatusErrorFixture]);
+    const res = await post(`${instance.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "no-status-error" }],
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Internal failure");
+  });
+
+  it("error with status 401 returns correct status", async () => {
+    const authErrorFixture: Fixture = {
+      match: { userMessage: "auth-error" },
+      response: {
+        error: { message: "Unauthorized", type: "authentication_error", code: "invalid_api_key" },
+        status: 401,
+      },
+    };
+    instance = await createServer([authErrorFixture]);
+    const res = await post(`${instance.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "auth-error" }],
+    });
+
+    expect(res.status).toBe(401);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Unauthorized");
+    expect(body.error.type).toBe("authentication_error");
+    expect(body.error.code).toBe("invalid_api_key");
+  });
+
+  it("error with status 503 returns correct status", async () => {
+    const unavailableFixture: Fixture = {
+      match: { userMessage: "service-down" },
+      response: {
+        error: {
+          message: "Service unavailable",
+          type: "server_error",
+          code: "service_unavailable",
+        },
+        status: 503,
+      },
+    };
+    instance = await createServer([unavailableFixture]);
+    const res = await post(`${instance.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "service-down" }],
+    });
+
+    expect(res.status).toBe(503);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Service unavailable");
+    expect(body.error.type).toBe("server_error");
+    expect(body.error.code).toBe("service_unavailable");
+  });
+});
+
 describe("POST /v1/chat/completions (non-streaming)", () => {
   it("returns text response as JSON when stream=false", async () => {
     instance = await createServer(allFixtures);
@@ -379,6 +495,23 @@ describe("POST /v1/chat/completions (non-streaming)", () => {
     expect(body.choices[0].finish_reason).toBe("stop");
   });
 
+  it("returns JSON when stream field is omitted", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "hello" }],
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/json");
+
+    const body = JSON.parse(res.body);
+    expect(body.object).toBe("chat.completion");
+    expect(body.choices[0].message.role).toBe("assistant");
+    expect(body.choices[0].message.content).toBe("Hi there!");
+    expect(body.choices[0].finish_reason).toBe("stop");
+  });
+
   it("returns tool call response as JSON when stream=false", async () => {
     instance = await createServer(allFixtures);
     const res = await post(`${instance.url}/v1/chat/completions`, {
@@ -414,6 +547,46 @@ describe("routing", () => {
     const res = await post(`${instance.url}/other/path`, { model: "gpt-4", messages: [] });
     expect(res.status).toBe(404);
   });
+
+  it("routes POST /v1/messages to Claude handler", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "hello" }],
+    });
+    expect(res.status).toBe(200);
+  });
+
+  it("returns 404 for GET /v1/messages", async () => {
+    instance = await createServer(allFixtures);
+    const res = await get(`${instance.url}/v1/messages`);
+    expect(res.status).toBe(404);
+  });
+
+  it("routes POST to Gemini generateContent", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ role: "user", parts: [{ text: "hello" }] }],
+    });
+    expect(res.status).toBe(200);
+  });
+
+  it("routes POST to Gemini streamGenerateContent", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent`, {
+      contents: [{ role: "user", parts: [{ text: "hello" }] }],
+    });
+    expect(res.status).toBe(200);
+  });
+
+  it("returns 404 for unknown Gemini-like path", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:unknownAction`, {
+      contents: [],
+    });
+    expect(res.status).toBe(404);
+  });
 });
 
 describe("CORS", () => {
@@ -616,7 +789,7 @@ describe("journal", () => {
     );
 
     const entry = instance.journal.getLast();
-    expect(entry!.headers["authorization"]).toBe("Bearer sk-test");
+    expect(entry!.headers["authorization"]).toBe("[REDACTED]");
   });
 });
 
@@ -735,6 +908,7 @@ describe("handleCompletions catch handler", () => {
               JSON.stringify({
                 model: "gpt-4",
                 messages: [{ role: "user", content: "slow" }],
+                stream: true,
               }),
             ),
           },
@@ -755,6 +929,7 @@ describe("handleCompletions catch handler", () => {
         JSON.stringify({
           model: "gpt-4",
           messages: [{ role: "user", content: "slow" }],
+          stream: true,
         }),
       );
       req.end();
@@ -766,6 +941,7 @@ describe("handleCompletions catch handler", () => {
     const res = await post(`${instance.url}/v1/chat/completions`, {
       model: "gpt-4",
       messages: [{ role: "user", content: "quick" }],
+      stream: true,
     });
     expect(res.status).toBe(200);
     expect(res.body).toContain("data: [DONE]");
@@ -784,6 +960,7 @@ describe("concurrent request handling", () => {
     const body = {
       model: "gpt-4",
       messages: [{ role: "user", content: "concurrent" }],
+      stream: true,
     };
 
     // Fire 10 requests in parallel
@@ -839,7 +1016,7 @@ describe("header forwarding in journal", () => {
 
     const entry = instance.journal.getLast();
     expect(entry).not.toBeNull();
-    expect(entry!.headers["authorization"]).toBe("Bearer test-key");
+    expect(entry!.headers["authorization"]).toBe("[REDACTED]");
     expect(entry!.headers["x-custom-header"]).toBe("custom-value");
     expect(entry!.headers["content-type"]).toBe("application/json");
   });
@@ -878,7 +1055,7 @@ describe("header forwarding in journal", () => {
 
     const entries = JSON.parse(res.body);
     expect(entries).toHaveLength(1);
-    expect(entries[0].headers["authorization"]).toBe("Bearer api-key-123");
+    expect(entries[0].headers["authorization"]).toBe("[REDACTED]");
     expect(entries[0].headers["x-request-id"]).toBe("req-abc-def");
     expect(entries[0].headers["content-type"]).toBe("application/json");
     expect(entries[0].headers["host"]).toBeDefined();
@@ -898,7 +1075,454 @@ describe("header forwarding in journal", () => {
 
     const entries = instance.journal.getAll();
     expect(entries).toHaveLength(2);
-    expect(entries[0].headers["authorization"]).toBe("Bearer key-one");
-    expect(entries[1].headers["authorization"]).toBe("Bearer key-two");
+    expect(entries[0].headers["authorization"]).toBe("[REDACTED]");
+    expect(entries[1].headers["authorization"]).toBe("[REDACTED]");
+  });
+});
+
+describe("stream interruption", () => {
+  it("truncateAfterChunks stops stream early and records interruption", async () => {
+    // Use enough chunks that without truncation, we'd get many more events.
+    // With truncateAfterChunks: 2, only 2 chunks should be written before abort.
+    // res.destroy() simulates abrupt disconnect — some data may be lost in
+    // transit, so we verify via the journal (which is always reliable).
+    const fixture: Fixture = {
+      match: { userMessage: "truncate-me" },
+      response: { content: "ABCDEFGHIJKLMNO" }, // 15 chars, chunkSize 3 => 5 content + role + finish = 7
+      chunkSize: 3,
+      latency: 5,
+      truncateAfterChunks: 2,
+    };
+    instance = await createServer([fixture]);
+    const res = await postPartial(`${instance.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "truncate-me" }],
+      stream: true,
+    });
+
+    // The body should NOT contain [DONE] since we interrupted
+    expect(res.body).not.toContain("data: [DONE]");
+
+    // The connection should have been aborted
+    expect(res.aborted).toBe(true);
+
+    // Journal should record interruption
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+
+  it("truncateAfterChunks is ignored for non-streaming requests", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "no-stream-truncate" },
+      response: { content: "Hello world" },
+      truncateAfterChunks: 1,
+    };
+    instance = await createServer([fixture]);
+    const res = await post(`${instance.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "no-stream-truncate" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.choices[0].message.content).toBe("Hello world");
+
+    const entry = instance.journal.getLast();
+    expect(entry!.response.interrupted).toBeUndefined();
+  });
+
+  it("journal records interrupted: true with interruptReason", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "journal-int" },
+      response: { content: "ABCDEFGHIJ" },
+      chunkSize: 2,
+      truncateAfterChunks: 1,
+    };
+    instance = await createServer([fixture]);
+    await postPartial(`${instance.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "journal-int" }],
+      stream: true,
+    });
+
+    // Give server a moment to finish the async handler
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+
+  it("disconnectAfterMs stops stream after timeout", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "disconnect-me" },
+      response: { content: "A".repeat(200) },
+      chunkSize: 10,
+      latency: 20,
+      disconnectAfterMs: 50,
+    };
+    instance = await createServer([fixture]);
+    const res = await postPartial(`${instance.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "disconnect-me" }],
+      stream: true,
+    });
+
+    // Should be a partial stream
+    expect(res.body).not.toContain("data: [DONE]");
+    const events = parseSSEEvents(res.body);
+    // With 200 chars / 10 chunkSize = 20 content chunks + 1 role + 1 finish = 22 total
+    // But disconnectAfterMs: 50 with latency: 20 should only get a few
+    expect(events.length).toBeLessThan(22);
+    expect(events.length).toBeGreaterThanOrEqual(1);
+
+    // Give server a moment to finish the async handler
+    await new Promise((r) => setTimeout(r, 100));
+    const entry = instance.journal.getLast();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("disconnectAfterMs");
+  });
+
+  it("tool call interruption via OpenAI /v1/chat/completions with truncateAfterChunks", async () => {
+    // Tool call stream: role chunk + N argument delta chunks + finish chunk
+    // With truncateAfterChunks: 2 we get at most 2 chunks before abort
+    const fixture: Fixture = {
+      match: { userMessage: "tool-truncate" },
+      response: {
+        toolCalls: [{ name: "get_weather", arguments: '{"city":"New York","units":"metric"}' }],
+      },
+      chunkSize: 3,
+      latency: 5,
+      truncateAfterChunks: 2,
+    };
+    instance = await createServer([fixture]);
+    const res = await postPartial(`${instance.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "tool-truncate" }],
+      stream: true,
+    });
+
+    // No [DONE] — stream was cut short
+    expect(res.body).not.toContain("data: [DONE]");
+
+    // Journal must record interruption
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+
+  it("Claude Messages API /v1/messages with truncateAfterChunks stops stream early", async () => {
+    // Claude SSE events: message_start, content_block_start, N content_block_delta, content_block_stop, message_delta, message_stop
+    // With truncateAfterChunks: 2 the stream ends before message_stop
+    const fixture: Fixture = {
+      match: { userMessage: "claude-truncate" },
+      response: { content: "ABCDEFGHIJKLMNO" }, // 15 chars, chunkSize 3 => 5 deltas
+      chunkSize: 3,
+      latency: 5,
+      truncateAfterChunks: 2,
+    };
+    instance = await createServer([fixture]);
+    const res = await postPartial(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      stream: true,
+      messages: [{ role: "user", content: "claude-truncate" }],
+    });
+
+    // No message_stop event — stream was cut short
+    expect(res.body).not.toContain('"message_stop"');
+
+    // Journal records interruption
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+
+  it("Claude Messages API /v1/messages with disconnectAfterMs stops stream early", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "claude-disconnect" },
+      response: { content: "A".repeat(150) },
+      chunkSize: 10,
+      latency: 20,
+      disconnectAfterMs: 50,
+    };
+    instance = await createServer([fixture]);
+    const res = await postPartial(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      stream: true,
+      messages: [{ role: "user", content: "claude-disconnect" }],
+    });
+
+    // No message_stop event — stream was cut short
+    expect(res.body).not.toContain('"message_stop"');
+
+    // Journal records disconnectAfterMs reason
+    await new Promise((r) => setTimeout(r, 100));
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("disconnectAfterMs");
+  });
+
+  it("Gemini HTTP SSE streamGenerateContent with truncateAfterChunks stops stream early", async () => {
+    // Gemini SSE: N data-only chunks (no [DONE]). The last chunk has finishReason: "STOP".
+    // With truncateAfterChunks: 2 out of 5 content chunks, finishReason never appears.
+    const fixture: Fixture = {
+      match: { userMessage: "gemini-truncate" },
+      response: { content: "ABCDEFGHIJKLMNO" }, // 15 chars, chunkSize 3 => 5 chunks
+      chunkSize: 3,
+      latency: 5,
+      truncateAfterChunks: 2,
+    };
+    instance = await createServer([fixture]);
+    const res = await postPartial(
+      `${instance.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent?alt=sse`,
+      {
+        contents: [{ role: "user", parts: [{ text: "gemini-truncate" }] }],
+      },
+    );
+
+    // No STOP finishReason in the truncated stream
+    expect(res.body).not.toContain('"STOP"');
+
+    // Journal records interruption
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+
+  it("HTTP Responses API /v1/responses with truncateAfterChunks stops stream early", async () => {
+    // Responses API SSE ends with response.completed event.
+    // With truncateAfterChunks: 2, that terminal event never appears.
+    const fixture: Fixture = {
+      match: { userMessage: "responses-truncate" },
+      response: { content: "ABCDEFGHIJKLMNO" }, // 15 chars, chunkSize 3 => 5 deltas
+      chunkSize: 3,
+      latency: 5,
+      truncateAfterChunks: 2,
+    };
+    instance = await createServer([fixture]);
+    const res = await postPartial(`${instance.url}/v1/responses`, {
+      model: "gpt-4o",
+      stream: true,
+      input: [{ role: "user", content: "responses-truncate" }],
+    });
+
+    // No response.completed event — stream was cut short
+    expect(res.body).not.toContain("response.completed");
+
+    // Journal records interruption
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+});
+
+// ─── Streaming Physics gap tests ─────────────────────────────────────────────
+
+describe("streamingProfile + truncateAfterChunks combined", () => {
+  it("truncation wins over profile-driven timing", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "profile-truncate" },
+      response: { content: "ABCDEFGHIJKLMNO" }, // 15 chars, chunkSize 3 => 5 content chunks + role + finish = 7
+      chunkSize: 3,
+      streamingProfile: { ttft: 10, tps: 100 },
+      truncateAfterChunks: 2,
+    };
+    instance = await createServer([fixture]);
+    const res = await postPartial(`${instance.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "profile-truncate" }],
+      stream: true,
+    });
+
+    // Stream should have been interrupted — no [DONE]
+    expect(res.body).not.toContain("data: [DONE]");
+
+    // At most 2 SSE data events should have been emitted
+    const events = parseSSEEvents(res.body);
+    expect(events.length).toBeLessThanOrEqual(2);
+
+    // Journal records truncation
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+});
+
+describe("streamingProfile + disconnectAfterMs combined", () => {
+  it("timeout interrupts a slow streaming profile", async () => {
+    // tps: 5 means 200ms per chunk. With 15 chars / chunkSize 3 = 5 content chunks + role + finish = 7 total.
+    // At 200ms each that's 1400ms for content alone.  disconnectAfterMs: 50 should fire well before completion.
+    const fixture: Fixture = {
+      match: { userMessage: "profile-disconnect" },
+      response: { content: "ABCDEFGHIJKLMNO" },
+      chunkSize: 3,
+      streamingProfile: { ttft: 10, tps: 5 },
+      disconnectAfterMs: 50,
+    };
+    instance = await createServer([fixture]);
+    const res = await postPartial(`${instance.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "profile-disconnect" }],
+      stream: true,
+    });
+
+    // Should be a partial stream — no [DONE]
+    expect(res.body).not.toContain("data: [DONE]");
+
+    // Journal records disconnect timeout as the reason
+    await new Promise((r) => setTimeout(r, 100));
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("disconnectAfterMs");
+  });
+});
+
+describe("truncateAfterChunks: 1 (single chunk)", () => {
+  it("emits exactly 1 chunk before stream ends", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "one-chunk" },
+      response: { content: "ABCDEF" }, // 6 chars, chunkSize 3 => 2 content chunks normally
+      chunkSize: 3,
+      truncateAfterChunks: 1,
+    };
+    instance = await createServer([fixture]);
+    const res = await postPartial(`${instance.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "one-chunk" }],
+      stream: true,
+    });
+
+    // No [DONE] — truncated
+    expect(res.body).not.toContain("data: [DONE]");
+
+    // res.destroy() may discard in-flight data, so we verify through the
+    // journal that the stream was actually truncated after 1 chunk, and that
+    // at most 1 event made it to the client.
+    const events = parseSSEEvents(res.body);
+    expect(events.length).toBeLessThanOrEqual(1);
+
+    // Journal records truncation
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+});
+
+describe("chunkSize larger than content", () => {
+  it("content arrives in a single chunk when chunkSize exceeds content length", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "big-chunk" },
+      response: { content: "hi" },
+      chunkSize: 1000,
+    };
+    instance = await createServer([fixture]);
+    const res = await post(`${instance.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "big-chunk" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.body).toContain("data: [DONE]");
+
+    // Parse events: should be role + 1 content chunk + finish = 3 events
+    const events = parseSSEEvents(res.body);
+    expect(events.length).toBe(3);
+
+    // The single content chunk should contain the full content
+    const contentEvent = events[1] as {
+      choices: [{ delta: { content?: string } }];
+    };
+    expect(contentEvent.choices[0].delta.content).toBe("hi");
+  });
+});
+
+describe("empty content streaming", () => {
+  it("stream completes with role + finish chunks and no content chunks", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "empty-stream" },
+      response: { content: "" },
+    };
+    instance = await createServer([fixture]);
+    const res = await post(`${instance.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "empty-stream" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.body).toContain("data: [DONE]");
+
+    // Only role chunk + finish chunk = 2 events (no content chunks for empty string)
+    const events = parseSSEEvents(res.body);
+    expect(events.length).toBe(2);
+
+    // First event is the role chunk
+    const roleEvent = events[0] as {
+      choices: [{ delta: { role?: string; content?: string } }];
+    };
+    expect(roleEvent.choices[0].delta.role).toBe("assistant");
+
+    // Second event is the finish chunk
+    const finishEvent = events[1] as {
+      choices: [{ finish_reason: string | null }];
+    };
+    expect(finishEvent.choices[0].finish_reason).toBe("stop");
+  });
+});
+
+describe("Anthropic streaming with truncateAfterChunks", () => {
+  it("truncates Anthropic SSE events correctly", async () => {
+    // Claude streaming events: message_start, content_block_start, N content_block_delta,
+    // content_block_stop, message_delta, message_stop
+    // For "ABCDEFGHIJKLMNO" with chunkSize 3 => 5 deltas => 10 total events
+    // truncateAfterChunks: 3 should cut before message_stop
+    const fixture: Fixture = {
+      match: { userMessage: "anthropic-trunc-test" },
+      response: { content: "ABCDEFGHIJKLMNO" },
+      chunkSize: 3,
+      latency: 5,
+      truncateAfterChunks: 3,
+    };
+    instance = await createServer([fixture]);
+    const res = await postPartial(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      stream: true,
+      messages: [{ role: "user", content: "anthropic-trunc-test" }],
+    });
+
+    // No message_stop — stream was cut short
+    expect(res.body).not.toContain('"message_stop"');
+
+    // Should have some Anthropic SSE events but not the full set
+    expect(res.body).toContain("event: message_start");
+
+    // Journal records interruption
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
   });
 });
diff --git a/src/__tests__/sse-writer.test.ts b/src/__tests__/sse-writer.test.ts
index 2255769..ebca42d 100644
--- a/src/__tests__/sse-writer.test.ts
+++ b/src/__tests__/sse-writer.test.ts
@@ -1,7 +1,7 @@
-import { describe, it, expect, vi } from "vitest";
+import { describe, it, expect, vi, afterEach } from "vitest";
 import { PassThrough } from "node:stream";
 import type * as http from "node:http";
-import { writeSSEStream, writeErrorResponse } from "../sse-writer.js";
+import { writeSSEStream, writeErrorResponse, delay } from "../sse-writer.js";
 import type { SSEChunk } from "../types.js";
 
 function makeMockResponse(): {
@@ -165,6 +165,148 @@ describe("writeSSEStream", () => {
   });
 });
 
+describe("delay", () => {
+  afterEach(() => {
+    vi.useRealTimers();
+  });
+
+  it("resolves after the specified time", async () => {
+    vi.useFakeTimers();
+    let resolved = false;
+    const p = delay(100).then(() => {
+      resolved = true;
+    });
+    expect(resolved).toBe(false);
+    vi.advanceTimersByTime(100);
+    await p;
+    expect(resolved).toBe(true);
+  });
+
+  it("resolves immediately when ms is 0", async () => {
+    const start = Date.now();
+    await delay(0);
+    // Should return synchronously (Promise.resolve())
+    expect(Date.now() - start).toBeLessThan(50);
+  });
+
+  it("resolves early when signal is aborted", async () => {
+    vi.useFakeTimers();
+    const controller = new AbortController();
+    let resolved = false;
+    const p = delay(10000, controller.signal).then(() => {
+      resolved = true;
+    });
+
+    vi.advanceTimersByTime(50);
+    expect(resolved).toBe(false);
+
+    controller.abort();
+    await p;
+    expect(resolved).toBe(true);
+  });
+
+  it("resolves immediately for negative ms", async () => {
+    await delay(-5);
+    // no error
+  });
+
+  it("resolves immediately when signal is already aborted", async () => {
+    const controller = new AbortController();
+    controller.abort();
+
+    let resolved = false;
+    const raceResult = await Promise.race([
+      delay(5000, controller.signal).then(() => {
+        resolved = true;
+        return "delay";
+      }),
+      new Promise<string>((r) => setTimeout(() => r("timeout"), 100)),
+    ]);
+
+    expect(raceResult).toBe("delay");
+    expect(resolved).toBe(true);
+  });
+});
+
+describe("writeSSEStream with StreamOptions", () => {
+  it("accepts options object (backward compatible)", async () => {
+    const { res, output } = makeMockResponse();
+    const chunks = [makeChunk("id1", "hello")];
+    const result = await writeSSEStream(res, chunks, { latency: 0 });
+    expect(result).toBe(true);
+    expect(output()).toContain("data: [DONE]");
+  });
+
+  it("returns true when stream completes normally", async () => {
+    const { res } = makeMockResponse();
+    const result = await writeSSEStream(res, [makeChunk("id1", "A")]);
+    expect(result).toBe(true);
+  });
+
+  it("stops mid-stream on abort signal and returns false", async () => {
+    const { res, output } = makeMockResponse();
+    const controller = new AbortController();
+
+    const chunks = [makeChunk("id1", "A"), makeChunk("id2", "B"), makeChunk("id3", "C")];
+
+    // Abort after first chunk is sent
+    let chunksSent = 0;
+    const result = await writeSSEStream(res, chunks, {
+      signal: controller.signal,
+      onChunkSent: () => {
+        chunksSent++;
+        if (chunksSent === 1) controller.abort();
+      },
+    });
+
+    expect(result).toBe(false);
+    const body = output();
+    expect(body).toContain(JSON.stringify(chunks[0]));
+    // Should not contain [DONE]
+    expect(body).not.toContain("[DONE]");
+  });
+
+  it("skips [DONE] when interrupted", async () => {
+    const { res, output } = makeMockResponse();
+    const controller = new AbortController();
+
+    const chunks = [makeChunk("id1", "A"), makeChunk("id2", "B")];
+    const result = await writeSSEStream(res, chunks, {
+      signal: controller.signal,
+      onChunkSent: () => {
+        controller.abort();
+      },
+    });
+
+    expect(result).toBe(false);
+    expect(output()).not.toContain("[DONE]");
+  });
+
+  it("onChunkSent fires per chunk", async () => {
+    const { res } = makeMockResponse();
+    const chunks = [makeChunk("id1", "A"), makeChunk("id2", "B"), makeChunk("id3", "C")];
+    let count = 0;
+    const result = await writeSSEStream(res, chunks, {
+      onChunkSent: () => {
+        count++;
+      },
+    });
+
+    expect(result).toBe(true);
+    expect(count).toBe(3);
+  });
+
+  it("returns true for numeric latency arg (backward compat)", async () => {
+    vi.useFakeTimers();
+    const { res } = makeMockResponse();
+    const promise = writeSSEStream(res, [makeChunk("id1", "A")], 10);
+    await vi.runAllTimersAsync();
+    const result = await promise;
+    expect(result).toBe(true);
+    vi.useRealTimers();
+  });
+});
+
 describe("writeErrorResponse", () => {
   it("writes the given status code", () => {
     const { res, status } = makeMockResponse();
diff --git a/src/__tests__/stream-collapse.test.ts b/src/__tests__/stream-collapse.test.ts
new file mode 100644
index 0000000..78a32b2
--- /dev/null
+++ b/src/__tests__/stream-collapse.test.ts
@@ -0,0 +1,1686 @@
+import { describe, it, expect } from "vitest";
+import {
+  collapseOpenAISSE,
+  collapseAnthropicSSE,
+  collapseGeminiSSE,
+  collapseOllamaNDJSON,
+  collapseCohereSSE,
+  collapseBedrockEventStream,
+  collapseStreamingResponse,
+} from "../stream-collapse.js";
+import { encodeEventStreamMessage, encodeEventStreamFrame } from "../aws-event-stream.js";
+
+// ---------------------------------------------------------------------------
+// 1. OpenAI SSE
+// ---------------------------------------------------------------------------
+
+describe("collapseOpenAISSE", () => {
+  it("collapses text content from SSE chunks", () => {
+    const body = [
+      `data: ${JSON.stringify({ id: "chatcmpl-123", choices: [{ delta: { role: "assistant" } }] })}`,
+      "",
+      `data: ${JSON.stringify({ id: "chatcmpl-123", choices: [{ delta: { content: "Hello" } }] })}`,
+      "",
+      `data: ${JSON.stringify({ id: "chatcmpl-123", choices: [{ delta: { content: " world" } }] })}`,
+      "",
+      `data: ${JSON.stringify({ id: "chatcmpl-123", choices: [{ delta: { content: "!" } }] })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+    expect(result.content).toBe("Hello world!");
+    expect(result.toolCalls).toBeUndefined();
+  });
+
+  it("collapses tool calls with merged arguments", () => {
+    const body = [
+      `data: ${JSON.stringify({
+        id: "chatcmpl-456",
+        choices: [
+          {
+            delta: {
+              tool_calls: [
+                {
+                  index: 0,
+                  id: "call_abc",
+                  type: "function",
+                  function: { name: "get_weather", arguments: '{"ci' },
+                },
+              ],
+            },
+          },
+        ],
+      })}`,
+      "",
+      `data: ${JSON.stringify({
+        id: "chatcmpl-456",
+        choices: [
+          {
+            delta: {
+              tool_calls: [
+                {
+                  index: 0,
+                  function: { arguments: 'ty":"Pa' },
+                },
+              ],
+            },
+          },
+        ],
+      })}`,
+      "",
+      `data: ${JSON.stringify({
+        id: "chatcmpl-456",
+        choices: [
+          {
+            delta: {
+              tool_calls: [
+                {
+                  index: 0,
+                  function: { arguments: 'ris"}' },
+                },
+              ],
+            },
+          },
+        ],
+      })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.toolCalls![0].arguments).toBe('{"city":"Paris"}');
+    expect(result.toolCalls![0].id).toBe("call_abc");
+    expect(result.content).toBeUndefined();
+  });
+
+  it("handles multiple tool calls", () => {
+    const body = [
+      `data: ${JSON.stringify({
+        id: "chatcmpl-789",
+        choices: [
+          {
+            delta: {
+              tool_calls: [
+                {
+                  index: 0,
+                  id: "call_1",
+                  type: "function",
+                  function: { name: "func_a", arguments: '{"x":1}' },
+                },
+                {
+                  index: 1,
+                  id: "call_2",
+                  type: "function",
+                  function: { name: "func_b", arguments: '{"y":2}' },
+                },
+              ],
+            },
+          },
+        ],
+      })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+    expect(result.toolCalls).toHaveLength(2);
+    expect(result.toolCalls![0].name).toBe("func_a");
+    expect(result.toolCalls![1].name).toBe("func_b");
+  });
+
+  it("returns empty content for empty stream", () => {
+    const body = "data: [DONE]\n\n";
+    const result = collapseOpenAISSE(body);
+    expect(result.content).toBe("");
+  });
+
+  it("counts droppedChunks for malformed JSON mixed with valid chunks", () => {
+    const body = [
+      `data: ${JSON.stringify({ id: "chatcmpl-d1", choices: [{ delta: { content: "A" } }] })}`,
+      "",
+      `data: {INVALID JSON!!!`,
+      "",
+      `data: ${JSON.stringify({ id: "chatcmpl-d1", choices: [{ delta: { content: "B" } }] })}`,
+      "",
+      `data: also broken`,
+      "",
+      `data: ${JSON.stringify({ id: "chatcmpl-d1", choices: [{ delta: { content: "C" } }] })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+    expect(result.content).toBe("ABC");
+    expect(result.droppedChunks).toBe(2);
+  });
+
+  it("choices with no delta property are skipped (continue)", () => {
+    const body = [
+      `data: ${JSON.stringify({ id: "chatcmpl-nd", choices: [{ finish_reason: "stop" }] })}`,
+      "",
+      `data: ${JSON.stringify({ id: "chatcmpl-nd", choices: [{ delta: { content: "OK" } }] })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+    expect(result.content).toBe("OK");
+  });
+
+  it("captures both text deltas and tool call deltas in same stream", () => {
+    const body = [
+      `data: ${JSON.stringify({
+        id: "chatcmpl-mix",
+        choices: [{ delta: { content: "Calling tool..." } }],
+      })}`,
+      "",
+      `data: ${JSON.stringify({
+        id: "chatcmpl-mix",
+        choices: [
+          {
+            delta: {
+              tool_calls: [
+                {
+                  index: 0,
+                  id: "call_mix",
+                  type: "function",
+                  function: { name: "lookup", arguments: '{"q":"test"}' },
+                },
+              ],
+            },
+          },
+        ],
+      })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+    // When tool calls exist, they win over content
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("lookup");
+    expect(result.toolCalls![0].arguments).toBe('{"q":"test"}');
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 2. Anthropic SSE
+// ---------------------------------------------------------------------------
+
+describe("collapseAnthropicSSE", () => {
+  it("collapses text content from SSE chunks", () => {
+    const body = [
+      `event: message_start`,
+      `data: ${JSON.stringify({ type: "message_start", message: { id: "msg_123", role: "assistant" } })}`,
+      "",
+      `event: content_block_start`,
+      `data: ${JSON.stringify({ type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })}`,
+      "",
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "Hello" } })}`,
+      "",
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: " world" } })}`,
+      "",
+      `event: content_block_stop`,
+      `data: ${JSON.stringify({ type: "content_block_stop", index: 0 })}`,
+      "",
+      `event: message_stop`,
+      `data: ${JSON.stringify({ type: "message_stop" })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseAnthropicSSE(body);
+    expect(result.content).toBe("Hello world");
+    expect(result.toolCalls).toBeUndefined();
+  });
+
+  it("collapses tool use with input_json_delta", () => {
+    const body = [
+      `event: message_start`,
+      `data: ${JSON.stringify({ type: "message_start", message: { id: "msg_456" } })}`,
+      "",
+      `event: content_block_start`,
+      `data: ${JSON.stringify({ type: "content_block_start", index: 0, content_block: { type: "tool_use", id: "toolu_abc", name: "get_weather", input: {} } })}`,
+      "",
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "input_json_delta", partial_json: '{"ci' } })}`,
+      "",
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "input_json_delta", partial_json: 'ty":"Paris"}' } })}`,
+      "",
+      `event: content_block_stop`,
+      `data: ${JSON.stringify({ type: "content_block_stop", index: 0 })}`,
+      "",
+      `event: message_stop`,
+      `data: ${JSON.stringify({ type: "message_stop" })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseAnthropicSSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.toolCalls![0].arguments).toBe('{"city":"Paris"}');
+    expect(result.toolCalls![0].id).toBe("toolu_abc");
+    expect(result.content).toBeUndefined();
+  });
+  it("counts droppedChunks for malformed JSON mixed with valid chunks", () => {
+    const body = [
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "Hi" } })}`,
+      "",
+      `event: content_block_delta`,
+      `data: {BROKEN JSON`,
+      "",
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: " there" } })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseAnthropicSSE(body);
+    expect(result.content).toBe("Hi there");
+    expect(result.droppedChunks).toBe(1);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 3. Gemini SSE
+// ---------------------------------------------------------------------------
+
+describe("collapseGeminiSSE", () => {
+  it("collapses text content from data-only SSE", () => {
+    const body = [
+      `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text: "Hello" }] } }] })}`,
+      "",
+      `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text: " world" }] } }] })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseGeminiSSE(body);
+    expect(result.content).toBe("Hello world");
+  });
+
+  it("handles empty candidates gracefully", () => {
+    const body = `data: ${JSON.stringify({ candidates: [] })}\n\n`;
+    const result = collapseGeminiSSE(body);
+    expect(result.content).toBe("");
+  });
+
+  it("collapses functionCall parts into toolCalls", () => {
+    const body = [
+      `data: ${JSON.stringify({
+        candidates: [
+          {
+            content: {
+              role: "model",
+              parts: [
+                {
+                  functionCall: {
+                    name: "get_weather",
+                    args: { city: "Paris" },
+                  },
+                },
+              ],
+            },
+            finishReason: "FUNCTION_CALL",
+          },
+        ],
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseGeminiSSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(JSON.parse(result.toolCalls![0].arguments)).toEqual({ city: "Paris" });
+    expect(result.content).toBeUndefined();
+  });
+  it("counts droppedChunks for malformed JSON mixed with valid chunks", () => {
+    const body = [
+      `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text: "X" }] } }] })}`,
+      "",
+      `data: NOT VALID JSON AT ALL`,
+      "",
+      `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text: "Y" }] } }] })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseGeminiSSE(body);
+    expect(result.content).toBe("XY");
+    expect(result.droppedChunks).toBe(1);
+  });
+
+  it("includes droppedChunks in functionCall return path (bug fix)", () => {
+    const body = [
+      `data: NOT VALID JSON`,
+      "",
+      `data: ${JSON.stringify({
+        candidates: [
+          {
+            content: {
+              role: "model",
+              parts: [
+                {
+                  functionCall: {
+                    name: "get_weather",
+                    args: { city: "Paris" },
+                  },
+                },
+              ],
+            },
+            finishReason: "FUNCTION_CALL",
+          },
+        ],
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseGeminiSSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.droppedChunks).toBe(1);
+  });
+
+  it("candidate with no content property is skipped (continue)", () => {
+    const body = [
+      `data: ${JSON.stringify({ candidates: [{ finishReason: "SAFETY" }] })}`,
+      "",
+      `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text: "OK" }] } }] })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseGeminiSSE(body);
+    expect(result.content).toBe("OK");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 4. Ollama NDJSON
+// ---------------------------------------------------------------------------
+
+describe("collapseOllamaNDJSON", () => {
+  it("collapses /api/chat format (message.content)", () => {
+    const body = [
+      JSON.stringify({
+        model: "llama3",
+        message: { role: "assistant", content: "Hello" },
+        done: false,
+      }),
+      JSON.stringify({
+        model: "llama3",
+        message: { role: "assistant", content: " world" },
+        done: false,
+      }),
+      JSON.stringify({ model: "llama3", message: { role: "assistant", content: "" }, done: true }),
+    ].join("\n");
+
+    const result = collapseOllamaNDJSON(body);
+    expect(result.content).toBe("Hello world");
+  });
+
+  it("collapses /api/generate format (response field)", () => {
+    const body = [
+      JSON.stringify({ model: "llama3", response: "Hello", done: false }),
+      JSON.stringify({ model: "llama3", response: " world", done: false }),
+      JSON.stringify({ model: "llama3", response: "", done: true }),
+    ].join("\n");
+
+    const result = collapseOllamaNDJSON(body);
+    expect(result.content).toBe("Hello world");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 5. Cohere SSE
+// ---------------------------------------------------------------------------
+
+describe("collapseCohereSSE", () => {
+  it("collapses text content from content-delta events", () => {
+    const body = [
+      `event: message-start`,
+      `data: ${JSON.stringify({ type: "message-start", delta: { message: { role: "assistant" } } })}`,
+      "",
+      `event: content-delta`,
+      `data: ${JSON.stringify({ type: "content-delta", index: 0, delta: { message: { content: { type: "text", text: "Hello" } } } })}`,
+      "",
+      `event: content-delta`,
+      `data: ${JSON.stringify({ type: "content-delta", index: 0, delta: { message: { content: { type: "text", text: " world" } } } })}`,
+      "",
+      `event: message-end`,
+      `data: ${JSON.stringify({ type: "message-end", delta: { finish_reason: "COMPLETE" } })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseCohereSSE(body);
+    expect(result.content).toBe("Hello world");
+    expect(result.toolCalls).toBeUndefined();
+  });
+
+  it("collapses tool calls from tool-call events", () => {
+    const body = [
+      `event: message-start`,
+      `data: ${JSON.stringify({ type: "message-start", delta: { message: { role: "assistant" } } })}`,
+      "",
+      `event: tool-call-start`,
+      `data: ${JSON.stringify({
+        type: "tool-call-start",
+        index: 0,
+        delta: {
+          message: {
+            tool_calls: {
+              id: "call_xyz",
+              type: "function",
+              function: { name: "get_weather", arguments: "" },
+            },
+          },
+        },
+      })}`,
+      "",
+      `event: tool-call-delta`,
+      `data: ${JSON.stringify({
+        type: "tool-call-delta",
+        index: 0,
+        delta: { message: { tool_calls: { function: { arguments: '{"city"' } } } },
+      })}`,
+      "",
+      `event: tool-call-delta`,
+      `data: ${JSON.stringify({
+        type: "tool-call-delta",
+        index: 0,
+        delta: { message: { tool_calls: { function: { arguments: ':"Paris"}' } } } },
+      })}`,
+      "",
+      `event: message-end`,
+      `data: ${JSON.stringify({ type: "message-end", delta: { finish_reason: "TOOL_CALL" } })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseCohereSSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.toolCalls![0].arguments).toBe('{"city":"Paris"}');
+    expect(result.toolCalls![0].id).toBe("call_xyz");
+    expect(result.content).toBeUndefined();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 6. Bedrock EventStream (binary)
+// ---------------------------------------------------------------------------
+
+describe("collapseBedrockEventStream", () => {
+  it("collapses text content from binary event frames", () => {
+    const frame1 = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: {
+        delta: { text: "Hello" },
+      },
+    });
+    const frame2 = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: {
+        delta: { text: " world" },
+      },
+    });
+
+    const buf = Buffer.concat([frame1, frame2]);
+    const result = collapseBedrockEventStream(buf);
+    expect(result.content).toBe("Hello world");
+  });
+
+  it("handles empty buffer", () => {
+    const result = collapseBedrockEventStream(Buffer.alloc(0));
+    expect(result.content).toBe("");
+  });
+
+  it("collapses tool call from contentBlockStart + contentBlockDelta with toolUse", () => {
+    const startFrame = encodeEventStreamMessage("contentBlockStart", {
+      contentBlockIndex: 0,
+      contentBlockStart: {
+        contentBlockIndex: 0,
+        start: {
+          toolUse: {
+            toolUseId: "tool_123",
+            name: "get_weather",
+          },
+        },
+      },
+    });
+    const deltaFrame1 = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockIndex: 0,
+      contentBlockDelta: {
+        contentBlockIndex: 0,
+        delta: {
+          toolUse: { input: '{"ci' },
+        },
+      },
+    });
+    const deltaFrame2 = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockIndex: 0,
+      contentBlockDelta: {
+        contentBlockIndex: 0,
+        delta: {
+          toolUse: { input: 'ty":"Paris"}' },
+        },
+      },
+    });
+
+    const buf = Buffer.concat([startFrame, deltaFrame1, deltaFrame2]);
+    const result = collapseBedrockEventStream(buf);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.toolCalls![0].arguments).toBe('{"city":"Paris"}');
+    expect(result.toolCalls![0].id).toBe("tool_123");
+  });
+
+  it("stops parsing gracefully on corrupted prelude CRC", () => {
+    const goodFrame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: {
+        delta: { text: "Good" },
+      },
+    });
+    const badFrame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: {
+        delta: { text: "Bad" },
+      },
+    });
+    // Corrupt the prelude CRC (bytes 8-11) of the bad frame
+    const badFrameBuf = Buffer.from(badFrame);
+    badFrameBuf.writeUInt32BE(0xdeadbeef, 8);
+
+    const buf = Buffer.concat([goodFrame, badFrameBuf]);
+    const result = collapseBedrockEventStream(buf);
+    // Should parse the good frame but stop at the corrupted one
+    expect(result.content).toBe("Good");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// collapseStreamingResponse dispatch
+// ---------------------------------------------------------------------------
+
+describe("collapseStreamingResponse", () => {
+  it("returns null for application/json (not streaming)", () => {
+    const result = collapseStreamingResponse("application/json", "openai", '{"choices":[]}');
+    expect(result).toBeNull();
+  });
+
+  it("dispatches text/event-stream to OpenAI for openai provider", () => {
+    const body = `data: ${JSON.stringify({ id: "c1", choices: [{ delta: { content: "hi" } }] })}\n\ndata: [DONE]\n\n`;
+    const result = collapseStreamingResponse("text/event-stream", "openai", body);
+    expect(result).not.toBeNull();
+    expect(result!.content).toBe("hi");
+  });
+
+  it("dispatches text/event-stream to Anthropic for anthropic provider", () => {
+    const body = [
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "hi" } })}`,
+      "",
+    ].join("\n");
+    const result = collapseStreamingResponse("text/event-stream", "anthropic", body);
+    expect(result).not.toBeNull();
+    expect(result!.content).toBe("hi");
+  });
+
+  it("dispatches text/event-stream to Gemini for gemini provider", () => {
+    const body = `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text: "hi" }] } }] })}\n\n`;
+    const result = collapseStreamingResponse("text/event-stream", "gemini", body);
+    expect(result).not.toBeNull();
+    expect(result!.content).toBe("hi");
+  });
+
+  it("dispatches application/x-ndjson to Ollama", () => {
+    const body = JSON.stringify({
+      model: "m",
+      message: { role: "assistant", content: "hi" },
+      done: true,
+    });
+    const result = collapseStreamingResponse("application/x-ndjson", "ollama", body);
+    expect(result).not.toBeNull();
+    expect(result!.content).toBe("hi");
+  });
+
+  it("dispatches text/event-stream to Cohere for cohere provider", () => {
+    const body = [
+      `event: content-delta`,
+      `data: ${JSON.stringify({ type: "content-delta", index: 0, delta: { message: { content: { type: "text", text: "hi" } } } })}`,
+      "",
+    ].join("\n");
+    const result = collapseStreamingResponse("text/event-stream", "cohere", body);
+    expect(result).not.toBeNull();
+    expect(result!.content).toBe("hi");
+  });
+
+  it("dispatches application/vnd.amazon.eventstream to Bedrock", () => {
+    const frame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: { delta: { text: "hi" } },
+    });
+    const result = collapseStreamingResponse(
+      "application/vnd.amazon.eventstream",
+      "bedrock",
+      frame,
+    );
+    expect(result).not.toBeNull();
+    expect(result!.content).toBe("hi");
+  });
+
+  it('dispatches text/event-stream with "azure" to OpenAI collapse', () => {
+    const body = `data: ${JSON.stringify({ id: "c1", choices: [{ delta: { content: "azure-hi" } }] })}\n\ndata: [DONE]\n\n`;
+    const result = collapseStreamingResponse("text/event-stream", "azure", body);
+    expect(result).not.toBeNull();
+    expect(result!.content).toBe("azure-hi");
+  });
+
+  it('dispatches text/event-stream with "vertexai" to Gemini collapse', () => {
+    const body = `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text: "vertex-hi" }] } }] })}\n\n`;
+    const result = collapseStreamingResponse("text/event-stream", "vertexai", body);
+    expect(result).not.toBeNull();
+    expect(result!.content).toBe("vertex-hi");
+  });
+
+  it('dispatches text/event-stream with "unknown-provider" to OpenAI collapse (fallback)', () => {
+    const body = `data: ${JSON.stringify({ id: "c1", choices: [{ delta: { content: "fallback-hi" } }] })}\n\ndata: [DONE]\n\n`;
+    const result = collapseStreamingResponse(
+      "text/event-stream",
+      "unknown-provider" as never,
+      body,
+    );
+    expect(result).not.toBeNull();
+    expect(result!.content).toBe("fallback-hi");
+  });
+
+  it("Bedrock: string body through collapseStreamingResponse (not Buffer)", () => {
+    // Build a valid frame and convert to binary string
+    const frame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: { delta: { text: "str-body" } },
+    });
+    const binaryStr = frame.toString("binary");
+    const result = collapseStreamingResponse(
+      "application/vnd.amazon.eventstream",
+      "bedrock",
+      binaryStr,
+    );
+    expect(result).not.toBeNull();
+    expect(result!.content).toBe("str-body");
+  });
+
+  it("collapseStreamingResponse with Buffer input for non-Bedrock SSE provider", () => {
+    const sseStr = `data: ${JSON.stringify({ id: "c1", choices: [{ delta: { content: "buf-hi" } }] })}\n\ndata: [DONE]\n\n`;
+    const buf = Buffer.from(sseStr, "utf8");
+    const result = collapseStreamingResponse("text/event-stream", "openai", buf);
+    expect(result).not.toBeNull();
+    expect(result!.content).toBe("buf-hi");
+  });
+
+  it("unknown SSE provider key falls back to OpenAI SSE format", () => {
+    const openaiSse = 'data: {"choices":[{"delta":{"content":"hello"}}]}\n\ndata: [DONE]\n\n';
+    // "unknown-provider" is not in RecordProviderKey; "as never" lets us test the runtime default branch
+    const result = collapseStreamingResponse(
+      "text/event-stream",
+      "unknown-provider" as never,
+      openaiSse,
+    );
+    expect(result).not.toBeNull();
+    expect(result?.content).toBe("hello");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// droppedChunks: Ollama, Cohere, Bedrock
+// ---------------------------------------------------------------------------
+
+describe("collapseOllamaNDJSON droppedChunks", () => {
+  it("counts droppedChunks for malformed JSON lines mixed with valid ones", () => {
+    const body = [
+      JSON.stringify({
+        model: "llama3",
+        message: { role: "assistant", content: "A" },
+        done: false,
+      }),
+      "NOT VALID JSON",
+      JSON.stringify({
+        model: "llama3",
+        message: { role: "assistant", content: "B" },
+        done: false,
+      }),
+      "{also broken",
+      JSON.stringify({ model: "llama3", message: { role: "assistant", content: "" }, done: true }),
+    ].join("\n");
+
+    const result = collapseOllamaNDJSON(body);
+    expect(result.content).toBe("AB");
+    expect(result.droppedChunks).toBe(2);
+  });
+});
+
+describe("collapseCohereSSE droppedChunks", () => {
+  it("counts droppedChunks for malformed JSON events mixed with valid ones", () => {
+    const body = [
+      `event: content-delta`,
+      `data: ${JSON.stringify({ type: "content-delta", index: 0, delta: { message: { content: { type: "text", text: "X" } } } })}`,
+      "",
+      `event: content-delta`,
+      `data: {BROKEN`,
+      "",
+      `event: content-delta`,
+      `data: ${JSON.stringify({ type: "content-delta", index: 0, delta: { message: { content: { type: "text", text: "Y" } } } })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseCohereSSE(body);
+    expect(result.content).toBe("XY");
+    expect(result.droppedChunks).toBe(1);
+  });
+});
+
+describe("collapseBedrockEventStream droppedChunks", () => {
+  it("counts droppedChunks for valid frame with malformed JSON payload", () => {
+    const goodFrame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: { delta: { text: "Good" } },
+    });
+
+    // Build a frame with non-JSON payload
+    const badPayload = Buffer.from("NOT JSON AT ALL", "utf8");
+    const badFrame = encodeEventStreamFrame(
+      {
+        ":content-type": "application/json",
+        ":event-type": "contentBlockDelta",
+        ":message-type": "event",
+      },
+      badPayload,
+    );
+
+    const goodFrame2 = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: { delta: { text: " data" } },
+    });
+
+    const buf = Buffer.concat([goodFrame, badFrame, goodFrame2]);
+    const result = collapseBedrockEventStream(buf);
+    expect(result.content).toBe("Good data");
+    expect(result.droppedChunks).toBe(1);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Message CRC validation
+// ---------------------------------------------------------------------------
+
+describe("collapseBedrockEventStream message CRC validation", () => {
+  it("stops parsing on corrupted message CRC", () => {
+    const goodFrame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: { delta: { text: "Good" } },
+    });
+    const badFrame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: { delta: { text: "Bad" } },
+    });
+    // Corrupt the message CRC (last 4 bytes) of the bad frame
+    const badFrameBuf = Buffer.from(badFrame);
+    badFrameBuf.writeUInt32BE(0xdeadbeef, badFrameBuf.length - 4);
+
+    const buf = Buffer.concat([goodFrame, badFrameBuf]);
+    const result = collapseBedrockEventStream(buf);
+    // Should parse the good frame but stop at the corrupted one
+    expect(result.content).toBe("Good");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// CRC mismatch truncation warnings
+// ---------------------------------------------------------------------------
+
+describe("decodeEventStreamFrames truncation warnings", () => {
+  it("sets truncated when prelude CRC is bad", () => {
+    const goodFrame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: { delta: { text: "Good" } },
+    });
+    const badFrame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: { delta: { text: "Bad" } },
+    });
+    // Corrupt the prelude CRC (bytes 8–11) of the bad frame
+    const badFrameBuf = Buffer.from(badFrame);
+    badFrameBuf.writeUInt32BE(0xdeadbeef, 8);
+
+    const buf = Buffer.concat([goodFrame, badFrameBuf]);
+    const result = collapseBedrockEventStream(buf);
+
+    // Good frame still processed; bad frame causes truncation
+    expect(result.content).toBe("Good");
+    expect(result.truncated).toBe(true);
+  });
+
+  it("sets truncated when message CRC is bad", () => {
+    const goodFrame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: { delta: { text: "Hello" } },
+    });
+    const badFrame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: { delta: { text: "World" } },
+    });
+    // Corrupt the message CRC (last 4 bytes) of the bad frame
+    const badFrameBuf = Buffer.from(badFrame);
+    badFrameBuf.writeUInt32BE(0xdeadbeef, badFrameBuf.length - 4);
+
+    const buf = Buffer.concat([goodFrame, badFrameBuf]);
+    const result = collapseBedrockEventStream(buf);
+
+    // Good frame still processed; bad frame causes truncation
+    expect(result.content).toBe("Hello");
+    expect(result.truncated).toBe(true);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Multiple tool calls: Anthropic, Cohere, Bedrock
+// ---------------------------------------------------------------------------
+
+describe("collapseAnthropicSSE multiple tool calls", () => {
+  it("collapses 2 tool_use blocks at different content_block indices", () => {
+    const body = [
+      `event: message_start`,
+      `data: ${JSON.stringify({ type: "message_start", message: { id: "msg_multi" } })}`,
+      "",
+      `event: content_block_start`,
+      `data: ${JSON.stringify({ type: "content_block_start", index: 0, content_block: { type: "tool_use", id: "toolu_1", name: "get_weather", input: {} } })}`,
+      "",
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "input_json_delta", partial_json: '{"city":"NYC"}' } })}`,
+      "",
+      `event: content_block_stop`,
+      `data: ${JSON.stringify({ type: "content_block_stop", index: 0 })}`,
+      "",
+      `event: content_block_start`,
+      `data: ${JSON.stringify({ type: "content_block_start", index: 1, content_block: { type: "tool_use", id: "toolu_2", name: "get_time", input: {} } })}`,
+      "",
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({ type: "content_block_delta", index: 1, delta: { type: "input_json_delta", partial_json: '{"tz":"EST"}' } })}`,
+      "",
+      `event: content_block_stop`,
+      `data: ${JSON.stringify({ type: "content_block_stop", index: 1 })}`,
+      "",
+      `event: message_stop`,
+      `data: ${JSON.stringify({ type: "message_stop" })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseAnthropicSSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(2);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.toolCalls![0].arguments).toBe('{"city":"NYC"}');
+    expect(result.toolCalls![0].id).toBe("toolu_1");
+    expect(result.toolCalls![1].name).toBe("get_time");
+    expect(result.toolCalls![1].arguments).toBe('{"tz":"EST"}');
+    expect(result.toolCalls![1].id).toBe("toolu_2");
+  });
+});
+
+describe("collapseCohereSSE multiple tool calls", () => {
+  it("collapses 2 tool-call-start events at different indices", () => {
+    const body = [
+      `event: message-start`,
+      `data: ${JSON.stringify({ type: "message-start", delta: { message: { role: "assistant" } } })}`,
+      "",
+      `event: tool-call-start`,
+      `data: ${JSON.stringify({
+        type: "tool-call-start",
+        index: 0,
+        delta: {
+          message: {
+            tool_calls: {
+              id: "call_1",
+              type: "function",
+              function: { name: "get_weather", arguments: "" },
+            },
+          },
+        },
+      })}`,
+      "",
+      `event: tool-call-delta`,
+      `data: ${JSON.stringify({
+        type: "tool-call-delta",
+        index: 0,
+        delta: { message: { tool_calls: { function: { arguments: '{"city":"NYC"}' } } } },
+      })}`,
+      "",
+      `event: tool-call-start`,
+      `data: ${JSON.stringify({
+        type: "tool-call-start",
+        index: 1,
+        delta: {
+          message: {
+            tool_calls: {
+              id: "call_2",
+              type: "function",
+              function: { name: "get_time", arguments: "" },
+            },
+          },
+        },
+      })}`,
+      "",
+      `event: tool-call-delta`,
+      `data: ${JSON.stringify({
+        type: "tool-call-delta",
+        index: 1,
+        delta: { message: { tool_calls: { function: { arguments: '{"tz":"EST"}' } } } },
+      })}`,
+      "",
+      `event: message-end`,
+      `data: ${JSON.stringify({ type: "message-end", delta: { finish_reason: "TOOL_CALL" } })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseCohereSSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(2);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.toolCalls![0].arguments).toBe('{"city":"NYC"}');
+    expect(result.toolCalls![0].id).toBe("call_1");
+    expect(result.toolCalls![1].name).toBe("get_time");
+    expect(result.toolCalls![1].arguments).toBe('{"tz":"EST"}');
+    expect(result.toolCalls![1].id).toBe("call_2");
+  });
+});
+
+describe("collapseBedrockEventStream multiple tool calls", () => {
+  it("collapses 2 contentBlockStart+contentBlockDelta pairs at different indices", () => {
+    const startFrame0 = encodeEventStreamMessage("contentBlockStart", {
+      contentBlockIndex: 0,
+      contentBlockStart: {
+        contentBlockIndex: 0,
+        start: { toolUse: { toolUseId: "tool_1", name: "get_weather" } },
+      },
+    });
+    const deltaFrame0 = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockIndex: 0,
+      contentBlockDelta: {
+        contentBlockIndex: 0,
+        delta: { toolUse: { input: '{"city":"NYC"}' } },
+      },
+    });
+    const startFrame1 = encodeEventStreamMessage("contentBlockStart", {
+      contentBlockIndex: 1,
+      contentBlockStart: {
+        contentBlockIndex: 1,
+        start: { toolUse: { toolUseId: "tool_2", name: "get_time" } },
+      },
+    });
+    const deltaFrame1 = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockIndex: 1,
+      contentBlockDelta: {
+        contentBlockIndex: 1,
+        delta: { toolUse: { input: '{"tz":"EST"}' } },
+      },
+    });
+
+    const buf = Buffer.concat([startFrame0, deltaFrame0, startFrame1, deltaFrame1]);
+    const result = collapseBedrockEventStream(buf);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(2);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.toolCalls![0].arguments).toBe('{"city":"NYC"}');
+    expect(result.toolCalls![0].id).toBe("tool_1");
+    expect(result.toolCalls![1].name).toBe("get_time");
+    expect(result.toolCalls![1].arguments).toBe('{"tz":"EST"}');
+    expect(result.toolCalls![1].id).toBe("tool_2");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Empty input: Ollama, Anthropic, Cohere
+// ---------------------------------------------------------------------------
+
+// ---------------------------------------------------------------------------
+// Defensive branch coverage — OpenAI
+// ---------------------------------------------------------------------------
+
+describe("collapseOpenAISSE defensive branches", () => {
+  it("SSE block with no data: line is skipped", () => {
+    const body = ["event: something", "", "data: [DONE]", ""].join("\n");
+    const result = collapseOpenAISSE(body);
+    expect(result.content).toBe("");
+  });
+
+  it("empty choices array is skipped", () => {
+    const body = [
+      `data: ${JSON.stringify({ id: "c1", choices: [] })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+    const result = collapseOpenAISSE(body);
+    expect(result.content).toBe("");
+  });
+
+  it("tool call delta with no id — result toolCall has no id field", () => {
+    const body = [
+      `data: ${JSON.stringify({
+        id: "c1",
+        choices: [
+          {
+            delta: {
+              tool_calls: [
+                {
+                  index: 0,
+                  type: "function",
+                  function: { name: "fn", arguments: '{"x":1}' },
+                },
+              ],
+            },
+          },
+        ],
+      })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("fn");
+    expect(result.toolCalls![0]).not.toHaveProperty("id");
+  });
+
+  it("droppedChunks returned alongside toolCalls", () => {
+    const body = [
+      `data: {BROKEN JSON`,
+      "",
+      `data: ${JSON.stringify({
+        id: "c1",
+        choices: [
+          {
+            delta: {
+              tool_calls: [
+                {
+                  index: 0,
+                  id: "call_1",
+                  type: "function",
+                  function: { name: "fn", arguments: '{"x":1}' },
+                },
+              ],
+            },
+          },
+        ],
+      })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.droppedChunks).toBe(1);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Defensive branch coverage — Anthropic
+// ---------------------------------------------------------------------------
+
+describe("collapseAnthropicSSE defensive branches", () => {
+  it("SSE block with no data: line is skipped", () => {
+    const body = ["event: content_block_delta", ""].join("\n");
+    const result = collapseAnthropicSSE(body);
+    expect(result.content).toBe("");
+  });
+
+  it("tool_use content_block_start with no id — result has no id field", () => {
+    const body = [
+      `event: content_block_start`,
+      `data: ${JSON.stringify({
+        type: "content_block_start",
+        index: 0,
+        content_block: { type: "tool_use", name: "fn", input: {} },
+      })}`,
+      "",
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({
+        type: "content_block_delta",
+        index: 0,
+        delta: { type: "input_json_delta", partial_json: '{"x":1}' },
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseAnthropicSSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("fn");
+    expect(result.toolCalls![0]).not.toHaveProperty("id");
+  });
+
+  it("orphaned input_json_delta for unknown index — no crash, data ignored", () => {
+    const body = [
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({
+        type: "content_block_delta",
+        index: 5,
+        delta: { type: "input_json_delta", partial_json: '{"orphan":true}' },
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseAnthropicSSE(body);
+    // No tool calls created, no crash
+    expect(result.content).toBe("");
+    expect(result.toolCalls).toBeUndefined();
+  });
+
+  it("droppedChunks returned alongside toolCalls", () => {
+    const body = [
+      `event: content_block_start`,
+      `data: {BROKEN`,
+      "",
+      `event: content_block_start`,
+      `data: ${JSON.stringify({
+        type: "content_block_start",
+        index: 0,
+        content_block: { type: "tool_use", id: "toolu_1", name: "fn", input: {} },
+      })}`,
+      "",
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({
+        type: "content_block_delta",
+        index: 0,
+        delta: { type: "input_json_delta", partial_json: '{"x":1}' },
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseAnthropicSSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.droppedChunks).toBe(1);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Defensive branch coverage — Gemini
+// ---------------------------------------------------------------------------
+
+describe("collapseGeminiSSE defensive branches", () => {
+  it("empty parts array is skipped", () => {
+    const body = [`data: ${JSON.stringify({ candidates: [{ content: { parts: [] } }] })}`, ""].join(
+      "\n",
+    );
+
+    const result = collapseGeminiSSE(body);
+    expect(result.content).toBe("");
+  });
+
+  it("functionCall args as string — preserved as string", () => {
+    const body = [
+      `data: ${JSON.stringify({
+        candidates: [
+          {
+            content: {
+              role: "model",
+              parts: [{ functionCall: { name: "fn", args: "already-a-string" } }],
+            },
+            finishReason: "FUNCTION_CALL",
+          },
+        ],
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseGeminiSSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].arguments).toBe("already-a-string");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Defensive branch coverage — Cohere
+// ---------------------------------------------------------------------------
+
+describe("collapseCohereSSE defensive branches", () => {
+  it("SSE block with no data: line is skipped", () => {
+    const body = ["event: content-delta", ""].join("\n");
+    const result = collapseCohereSSE(body);
+    expect(result.content).toBe("");
+  });
+
+  it("tool-call-start with no id — result has no id field", () => {
+    const body = [
+      `event: tool-call-start`,
+      `data: ${JSON.stringify({
+        type: "tool-call-start",
+        index: 0,
+        delta: {
+          message: {
+            tool_calls: {
+              type: "function",
+              function: { name: "fn", arguments: "" },
+            },
+          },
+        },
+      })}`,
+      "",
+      `event: tool-call-delta`,
+      `data: ${JSON.stringify({
+        type: "tool-call-delta",
+        index: 0,
+        delta: { message: { tool_calls: { function: { arguments: '{"x":1}' } } } },
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseCohereSSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("fn");
+    expect(result.toolCalls![0]).not.toHaveProperty("id");
+  });
+
+  it("orphaned tool-call-delta for unknown index — no crash", () => {
+    const body = [
+      `event: tool-call-delta`,
+      `data: ${JSON.stringify({
+        type: "tool-call-delta",
+        index: 5,
+        delta: { message: { tool_calls: { function: { arguments: '{"orphan":true}' } } } },
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseCohereSSE(body);
+    expect(result.content).toBe("");
+    expect(result.toolCalls).toBeUndefined();
+  });
+
+  it("droppedChunks returned alongside toolCalls", () => {
+    const body = [
+      `event: tool-call-start`,
+      `data: {BROKEN`,
+      "",
+      `event: tool-call-start`,
+      `data: ${JSON.stringify({
+        type: "tool-call-start",
+        index: 0,
+        delta: {
+          message: {
+            tool_calls: {
+              id: "call_1",
+              type: "function",
+              function: { name: "fn", arguments: "" },
+            },
+          },
+        },
+      })}`,
+      "",
+      `event: tool-call-delta`,
+      `data: ${JSON.stringify({
+        type: "tool-call-delta",
+        index: 0,
+        delta: { message: { tool_calls: { function: { arguments: '{"x":1}' } } } },
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseCohereSSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.droppedChunks).toBe(1);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Defensive branch coverage — Bedrock
+// ---------------------------------------------------------------------------
+
+describe("collapseBedrockEventStream defensive branches", () => {
+  it("contentBlockStart without toolUse — no tool entry created", () => {
+    const startFrame = encodeEventStreamMessage("contentBlockStart", {
+      contentBlockIndex: 0,
+      contentBlockStart: {
+        contentBlockIndex: 0,
+        start: {},
+      },
+    });
+    const deltaFrame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: { delta: { text: "Hello" } },
+    });
+
+    const buf = Buffer.concat([startFrame, deltaFrame]);
+    const result = collapseBedrockEventStream(buf);
+    expect(result.content).toBe("Hello");
+    expect(result.toolCalls).toBeUndefined();
+  });
+
+  it("contentBlockDelta without delta — skipped", () => {
+    const frame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockIndex: 0,
+      contentBlockDelta: {
+        contentBlockIndex: 0,
+      },
+    });
+
+    const buf = Buffer.from(frame);
+    const result = collapseBedrockEventStream(buf);
+    expect(result.content).toBe("");
+  });
+
+  it("tool call with no toolUseId — result has no id field", () => {
+    const startFrame = encodeEventStreamMessage("contentBlockStart", {
+      contentBlockIndex: 0,
+      contentBlockStart: {
+        contentBlockIndex: 0,
+        start: {
+          toolUse: { name: "fn" },
+        },
+      },
+    });
+    const deltaFrame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockIndex: 0,
+      contentBlockDelta: {
+        contentBlockIndex: 0,
+        delta: { toolUse: { input: '{"x":1}' } },
+      },
+    });
+
+    const buf = Buffer.concat([startFrame, deltaFrame]);
+    const result = collapseBedrockEventStream(buf);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("fn");
+    expect(result.toolCalls![0]).not.toHaveProperty("id");
+  });
+
+  it("orphaned toolUse delta for unknown index — no crash", () => {
+    const deltaFrame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockIndex: 5,
+      contentBlockDelta: {
+        contentBlockIndex: 5,
+        delta: { toolUse: { input: '{"orphan":true}' } },
+      },
+    });
+
+    const buf = Buffer.from(deltaFrame);
+    const result = collapseBedrockEventStream(buf);
+    // No tool entry for index 5, so delta is silently ignored
+    expect(result.content).toBe("");
+    expect(result.toolCalls).toBeUndefined();
+  });
+
+  it("droppedChunks returned alongside toolCalls", () => {
+    const startFrame = encodeEventStreamMessage("contentBlockStart", {
+      contentBlockIndex: 0,
+      contentBlockStart: {
+        contentBlockIndex: 0,
+        start: { toolUse: { toolUseId: "tool_1", name: "fn" } },
+      },
+    });
+    const deltaFrame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockIndex: 0,
+      contentBlockDelta: {
+        contentBlockIndex: 0,
+        delta: { toolUse: { input: '{"x":1}' } },
+      },
+    });
+
+    // Build a frame with non-JSON payload for droppedChunks
+    const badPayload = Buffer.from("NOT JSON", "utf8");
+    const badFrame = encodeEventStreamFrame(
+      {
+        ":content-type": "application/json",
+        ":event-type": "contentBlockDelta",
+        ":message-type": "event",
+      },
+      badPayload,
+    );
+
+    const buf = Buffer.concat([badFrame, startFrame, deltaFrame]);
+    const result = collapseBedrockEventStream(buf);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.droppedChunks).toBe(1);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// collapseBedrockEventStream — Anthropic Messages format (invoke-with-response-stream)
+// ---------------------------------------------------------------------------
+
+describe("collapseBedrockEventStream — Anthropic Messages format", () => {
+  it("collapses text from flat content_block_delta events", () => {
+    const frame1 = encodeEventStreamMessage("chunk", {
+      type: "content_block_delta",
+      index: 0,
+      delta: { type: "text_delta", text: "Hello" },
+    });
+    const frame2 = encodeEventStreamMessage("chunk", {
+      type: "content_block_delta",
+      index: 0,
+      delta: { type: "text_delta", text: " world" },
+    });
+    const buf = Buffer.concat([frame1, frame2]);
+    const result = collapseBedrockEventStream(buf);
+    expect(result.content).toBe("Hello world");
+  });
+
+  it("collapses tool calls from flat content_block_start + input_json_delta", () => {
+    const startFrame = encodeEventStreamMessage("chunk", {
+      type: "content_block_start",
+      index: 0,
+      content_block: { type: "tool_use", id: "toolu_123", name: "get_weather" },
+    });
+    const deltaFrame = encodeEventStreamMessage("chunk", {
+      type: "content_block_delta",
+      index: 0,
+      delta: { type: "input_json_delta", partial_json: '{"city":"NYC"}' },
+    });
+    const buf = Buffer.concat([startFrame, deltaFrame]);
+    const result = collapseBedrockEventStream(buf);
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.toolCalls![0].id).toBe("toolu_123");
+    expect(result.toolCalls![0].arguments).toBe('{"city":"NYC"}');
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Defensive branch coverage — Ollama
+// ---------------------------------------------------------------------------
+
+describe("collapseOllamaNDJSON defensive branches", () => {
+  it("line with neither message.content nor response — no content added", () => {
+    const body = [JSON.stringify({ model: "x", done: true })].join("\n");
+
+    const result = collapseOllamaNDJSON(body);
+    expect(result.content).toBe("");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Original empty input tests
+// ---------------------------------------------------------------------------
+
+describe("empty input collapse", () => {
+  it('collapseOllamaNDJSON("") returns { content: "" }', () => {
+    const result = collapseOllamaNDJSON("");
+    expect(result.content).toBe("");
+  });
+
+  it('collapseAnthropicSSE("") returns { content: "" }', () => {
+    const result = collapseAnthropicSSE("");
+    expect(result.content).toBe("");
+  });
+
+  it('collapseCohereSSE("") returns { content: "" }', () => {
+    const result = collapseCohereSSE("");
+    expect(result.content).toBe("");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// collapseOllamaNDJSON with tool_calls in stream chunks
+// ---------------------------------------------------------------------------
+
+describe("collapseOllamaNDJSON with tool_calls", () => {
+  it("extracts tool_calls from /api/chat chunks", () => {
+    const body = [
+      JSON.stringify({
+        model: "llama3",
+        message: {
+          role: "assistant",
+          content: "",
+          tool_calls: [
+            {
+              function: {
+                name: "get_weather",
+                arguments: { city: "SF" },
+              },
+            },
+          ],
+        },
+        done: false,
+      }),
+      JSON.stringify({
+        model: "llama3",
+        message: { role: "assistant", content: "" },
+        done: true,
+      }),
+    ].join("\n");
+
+    const result = collapseOllamaNDJSON(body);
+    // toolCalls takes priority over content when present
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.toolCalls![0].arguments).toBe('{"city":"SF"}');
+    expect(result.content).toBeUndefined();
+  });
+
+  it("returns toolCalls (not content) when both tool_calls and text are present", () => {
+    const body = [
+      JSON.stringify({
+        model: "llama3",
+        message: {
+          role: "assistant",
+          content: "Let me check ",
+          tool_calls: [
+            {
+              function: {
+                name: "get_weather",
+                arguments: { city: "SF" },
+              },
+            },
+          ],
+        },
+        done: false,
+      }),
+      JSON.stringify({
+        model: "llama3",
+        message: { role: "assistant", content: "the weather." },
+        done: true,
+      }),
+    ].join("\n");
+
+    const result = collapseOllamaNDJSON(body);
+    // When toolCalls are present, they take priority over content
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.content).toBeUndefined();
+  });
+
+  it("extracts multiple tool_calls across chunks", () => {
+    const body = [
+      JSON.stringify({
+        model: "llama3",
+        message: {
+          role: "assistant",
+          content: "",
+          tool_calls: [
+            {
+              function: {
+                name: "get_weather",
+                arguments: '{"city":"SF"}',
+              },
+            },
+          ],
+        },
+        done: false,
+      }),
+      JSON.stringify({
+        model: "llama3",
+        message: {
+          role: "assistant",
+          content: "",
+          tool_calls: [
+            {
+              function: {
+                name: "get_time",
+                arguments: '{"tz":"PST"}',
+              },
+            },
+          ],
+        },
+        done: false,
+      }),
+      JSON.stringify({
+        model: "llama3",
+        message: { role: "assistant", content: "" },
+        done: true,
+      }),
+    ].join("\n");
+
+    const result = collapseOllamaNDJSON(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(2);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.toolCalls![0].arguments).toBe('{"city":"SF"}');
+    expect(result.toolCalls![1].name).toBe("get_time");
+    expect(result.toolCalls![1].arguments).toBe('{"tz":"PST"}');
+  });
+});
+
+// ---------------------------------------------------------------------------
+// decodeEventStreamFrames bounds check (totalLength > buf.length)
+// ---------------------------------------------------------------------------
+
+describe("decodeEventStreamFrames bounds check", () => {
+  it("returns truncated when totalLength exceeds buffer size", () => {
+    // Build a 20-byte buffer where totalLength field is set to 9999
+    const buf = Buffer.alloc(20, 0);
+    buf.writeUInt32BE(9999, 0); // totalLength = 9999 (far beyond buffer size)
+    buf.writeUInt32BE(0, 4); // headersLength = 0
+    // Leave CRC bytes as 0 — bounds check fires before CRC check
+    const result = collapseBedrockEventStream(buf);
+    expect(result.truncated).toBe(true);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// collapseStreamingResponse: bedrock SSE case
+// ---------------------------------------------------------------------------
+
+describe("collapseStreamingResponse bedrock SSE", () => {
+  it('dispatches text/event-stream with "bedrock" to Anthropic SSE collapse', () => {
+    const body = [
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "bedrock-sse" } })}`,
+      "",
+    ].join("\n");
+    const result = collapseStreamingResponse("text/event-stream", "bedrock", body);
+    expect(result).not.toBeNull();
+    expect(result!.content).toBe("bedrock-sse");
+  });
+});
diff --git a/src/__tests__/streaming-physics.test.ts b/src/__tests__/streaming-physics.test.ts
new file mode 100644
index 0000000..8cbb132
--- /dev/null
+++ b/src/__tests__/streaming-physics.test.ts
@@ -0,0 +1,298 @@
+import { describe, it, expect, vi, afterEach } from "vitest";
+import { PassThrough } from "node:stream";
+import type * as http from "node:http";
+import { writeSSEStream, calculateDelay } from "../sse-writer.js";
+import type { SSEChunk, StreamingProfile } from "../types.js";
+import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { loadFixtureFile } from "../fixture-loader.js";
+
+function makeMockResponse(): {
+  res: http.ServerResponse;
+  output: () => string;
+  ended: () => boolean;
+} {
+  const stream = new PassThrough();
+  const chunks: Buffer[] = [];
+  stream.on("data", (chunk: Buffer) => chunks.push(chunk));
+
+  let isEnded = false;
+
+  const res = {
+    setHeader() {},
+    writeHead() {},
+    write(data: string) {
+      stream.write(data);
+    },
+    end(data?: string) {
+      if (data !== undefined) stream.write(data);
+      isEnded = true;
+      stream.end();
+    },
+    get writableEnded() {
+      return isEnded;
+    },
+  } as unknown as http.ServerResponse;
+
+  return {
+    res,
+    output: () => Buffer.concat(chunks).toString("utf8"),
+    ended: () => isEnded,
+  };
+}
+
+function makeChunk(id: string, content: string): SSEChunk {
+  return {
+    id,
+    object: "chat.completion.chunk",
+    created: 1700000000,
+    model: "gpt-4",
+    choices: [{ index: 0, delta: { content }, finish_reason: null }],
+  };
+}
+
+// ─── calculateDelay unit tests ───────────────────────────────────────────────
+
+describe("calculateDelay", () => {
+  it("returns fallback latency when no profile is provided", () => {
+    expect(calculateDelay(0, undefined, 50)).toBe(50);
+    expect(calculateDelay(1, undefined, 50)).toBe(50);
+  });
+
+  it("returns 0 when no profile and no fallback", () => {
+    expect(calculateDelay(0, undefined, undefined)).toBe(0);
+  });
+
+  it("returns ttft for first chunk when ttft is set", () => {
+    const profile: StreamingProfile = { ttft: 200, tps: 50 };
+    expect(calculateDelay(0, profile)).toBe(200);
+  });
+
+  it("returns 1000/tps for subsequent chunks", () => {
+    const profile: StreamingProfile = { ttft: 200, tps: 50 };
+    expect(calculateDelay(1, profile)).toBe(20); // 1000/50
+    expect(calculateDelay(5, profile)).toBe(20);
+  });
+
+  it("returns 1000/tps for first chunk when only tps is set (no ttft)", () => {
+    const profile: StreamingProfile = { tps: 100 };
+    expect(calculateDelay(0, profile)).toBe(10); // 1000/100
+  });
+
+  it("returns fallback when profile has neither ttft nor tps", () => {
+    const profile: StreamingProfile = { jitter: 0.5 };
+    expect(calculateDelay(0, profile, 30)).toBe(30);
+  });
+
+  it("returns fallback when tps is 0", () => {
+    const profile: StreamingProfile = { tps: 0 };
+    expect(calculateDelay(1, profile, 25)).toBe(25);
+  });
+
+  it("applies jitter to ttft on first chunk", () => {
+    const profile: StreamingProfile = { ttft: 100, tps: 50, jitter: 0.5 };
+    // With jitter, result should be in range [50, 150]
+    const results = new Set<number>();
+    for (let i = 0; i < 100; i++) {
+      const d = calculateDelay(0, profile);
+      expect(d).toBeGreaterThanOrEqual(50);
+      expect(d).toBeLessThanOrEqual(150);
+      results.add(Math.round(d));
+    }
+    // With 100 samples at jitter 0.5, we should see variation
+    expect(results.size).toBeGreaterThan(1);
+  });
+
+  it("applies jitter to tps-based delay on subsequent chunks", () => {
+    const profile: StreamingProfile = { tps: 50, jitter: 0.5 };
+    // base delay = 20, range = [10, 30]
+    const results = new Set<number>();
+    for (let i = 0; i < 100; i++) {
+      const d = calculateDelay(1, profile);
+      expect(d).toBeGreaterThanOrEqual(10);
+      expect(d).toBeLessThanOrEqual(30);
+      results.add(Math.round(d));
+    }
+    expect(results.size).toBeGreaterThan(1);
+  });
+
+  it("clamps negative jitter results to 0", () => {
+    // With jitter=1.0, the multiplier range is [0, 2], so delay can go to 0
+    const profile: StreamingProfile = { ttft: 1, jitter: 1.0 };
+    // Many runs should always be >= 0
+    for (let i = 0; i < 100; i++) {
+      expect(calculateDelay(0, profile)).toBeGreaterThanOrEqual(0);
+    }
+  });
+
+  it("does not apply jitter when jitter is 0", () => {
+    const profile: StreamingProfile = { ttft: 100, tps: 50, jitter: 0 };
+    expect(calculateDelay(0, profile)).toBe(100);
+    expect(calculateDelay(1, profile)).toBe(20);
+  });
+});
+
+// ─── writeSSEStream with streamingProfile ────────────────────────────────────
+
+describe("writeSSEStream with streamingProfile", () => {
+  afterEach(() => {
+    vi.useRealTimers();
+  });
+
+  it("uses ttft delay for first chunk and tps for subsequent chunks", async () => {
+    vi.useFakeTimers();
+    const { res, output } = makeMockResponse();
+    const chunks = [makeChunk("1", "A"), makeChunk("2", "B"), makeChunk("3", "C")];
+
+    const promise = writeSSEStream(res, chunks, {
+      streamingProfile: { ttft: 500, tps: 10 }, // 500ms first, 100ms subsequent
+    });
+
+    // After 500ms, first chunk should be written (ttft)
+    await vi.advanceTimersByTimeAsync(500);
+    // After 100ms more, second chunk (1000/10 = 100ms)
+    await vi.advanceTimersByTimeAsync(100);
+    // After 100ms more, third chunk
+    await vi.advanceTimersByTimeAsync(100);
+
+    await promise;
+
+    const body = output();
+    expect(body).toContain(JSON.stringify(chunks[0]));
+    expect(body).toContain(JSON.stringify(chunks[1]));
+    expect(body).toContain(JSON.stringify(chunks[2]));
+    expect(body).toContain("[DONE]");
+  });
+
+  it("streamingProfile overrides latency when both are set", async () => {
+    vi.useFakeTimers();
+    const { res, output } = makeMockResponse();
+    const chunks = [makeChunk("1", "A"), makeChunk("2", "B")];
+
+    const promise = writeSSEStream(res, chunks, {
+      latency: 1000, // would take 2000ms total if used
+      streamingProfile: { ttft: 10, tps: 100 }, // 10ms + 10ms = 20ms total
+    });
+
+    // With streaming profile, should complete much faster than latency
+    await vi.advanceTimersByTimeAsync(10); // ttft
+    await vi.advanceTimersByTimeAsync(10); // 1000/100 = 10ms
+
+    await promise;
+
+    const body = output();
+    expect(body).toContain(JSON.stringify(chunks[0]));
+    expect(body).toContain(JSON.stringify(chunks[1]));
+  });
+
+  it("falls back to latency when streamingProfile is not set", async () => {
+    vi.useFakeTimers();
+    const { res, output } = makeMockResponse();
+    const chunks = [makeChunk("1", "A")];
+
+    const promise = writeSSEStream(res, chunks, { latency: 50 });
+    await vi.advanceTimersByTimeAsync(50);
+    await promise;
+
+    expect(output()).toContain(JSON.stringify(chunks[0]));
+  });
+
+  it("jitter causes variable delays (not all identical)", async () => {
+    // Use real timers for this test since we're measuring variance
+    const delays: number[] = [];
+    const originalRandom = Math.random;
+    let callCount = 0;
+    // Alternate random between 0.0 and 1.0 to guarantee variance
+    Math.random = () => {
+      callCount++;
+      return callCount % 2 === 0 ? 0.0 : 1.0;
+    };
+
+    try {
+      const profile: StreamingProfile = { tps: 1000, jitter: 0.5 };
+      for (let i = 0; i < 10; i++) {
+        delays.push(calculateDelay(1, profile));
+      }
+      const uniqueDelays = new Set(delays.map((d) => d.toFixed(4)));
+      expect(uniqueDelays.size).toBeGreaterThan(1);
+    } finally {
+      Math.random = originalRandom;
+    }
+  });
+});
+
+// ─── Fixture loader passthrough ──────────────────────────────────────────────
+
+describe("fixture loader streamingProfile passthrough", () => {
+  let tmpDir: string;
+
+  afterEach(() => {
+    if (tmpDir) rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  it("loads streamingProfile from JSON fixture file", () => {
+    tmpDir = mkdtempSync(join(tmpdir(), "sp-test-"));
+    const filePath = join(tmpDir, "physics.json");
+    writeFileSync(
+      filePath,
+      JSON.stringify({
+        fixtures: [
+          {
+            match: { userMessage: "hello" },
+            response: { content: "Hi!" },
+            streamingProfile: { ttft: 200, tps: 50, jitter: 0.1 },
+          },
+        ],
+      }),
+      "utf-8",
+    );
+
+    const fixtures = loadFixtureFile(filePath);
+    expect(fixtures).toHaveLength(1);
+    expect(fixtures[0].streamingProfile).toEqual({ ttft: 200, tps: 50, jitter: 0.1 });
+  });
+
+  it("omits streamingProfile when not present in JSON", () => {
+    tmpDir = mkdtempSync(join(tmpdir(), "sp-test-"));
+    const filePath = join(tmpDir, "no-profile.json");
+    writeFileSync(
+      filePath,
+      JSON.stringify({
+        fixtures: [
+          {
+            match: { userMessage: "hello" },
+            response: { content: "Hi!" },
+          },
+        ],
+      }),
+      "utf-8",
+    );
+
+    const fixtures = loadFixtureFile(filePath);
+    expect(fixtures).toHaveLength(1);
+    expect(fixtures[0].streamingProfile).toBeUndefined();
+  });
+
+  it("loads partial streamingProfile (only ttft)", () => {
+    tmpDir = mkdtempSync(join(tmpdir(), "sp-test-"));
+    const filePath = join(tmpDir, "partial.json");
+    writeFileSync(
+      filePath,
+      JSON.stringify({
+        fixtures: [
+          {
+            match: { userMessage: "hello" },
+            response: { content: "Hi!" },
+            streamingProfile: { ttft: 300 },
+          },
+        ],
+      }),
+      "utf-8",
+    );
+
+    const fixtures = loadFixtureFile(filePath);
+    expect(fixtures).toHaveLength(1);
+    expect(fixtures[0].streamingProfile).toEqual({ ttft: 300 });
+  });
+});
diff --git a/src/__tests__/vertex-ai.test.ts b/src/__tests__/vertex-ai.test.ts
new file mode 100644
index 0000000..fc033ac
--- /dev/null
+++ b/src/__tests__/vertex-ai.test.ts
@@ -0,0 +1,524 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import type { Fixture } from "../types.js";
+import { createServer, type ServerInstance } from "../server.js";
+
+// --- helpers ---
+
+function post(
+  url: string,
+  body: unknown,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const data = JSON.stringify(body);
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(data),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(data);
+    req.end();
+  });
+}
+
+function parseGeminiSSEChunks(body: string): unknown[] {
+  const chunks: unknown[] = [];
+  for (const line of body.split("\n")) {
+    if (line.startsWith("data: ")) {
+      chunks.push(JSON.parse(line.slice(6)));
+    }
+  }
+  return chunks;
+}
+
+// --- fixtures ---
+
+const textFixture: Fixture = {
+  match: { userMessage: "hello" },
+  response: { content: "Hi there!" },
+};
+
+const toolFixture: Fixture = {
+  match: { userMessage: "weather" },
+  response: {
+    toolCalls: [
+      {
+        name: "get_weather",
+        arguments: '{"city":"NYC"}',
+      },
+    ],
+  },
+};
+
+// --- tests ---
+
+let instance: ServerInstance | null = null;
+
+afterEach(async () => {
+  if (instance) {
+    await new Promise<void>((resolve) => {
+      instance!.server.close(() => resolve());
+    });
+    instance = null;
+  }
+});
+
+const VERTEX_BASE = "/v1/projects/my-project/locations/us-central1/publishers/google/models";
+
+function vertexUrl(base: string, model: string, action: string): string {
+  return `${base}${VERTEX_BASE}/${model}:${action}`;
+}
+
+const geminiBody = (text: string) => ({
+  contents: [{ role: "user", parts: [{ text }] }],
+});
+
+// ─── Non-streaming (generateContent) ────────────────────────────────────────
+
+describe("Vertex AI: generateContent (non-streaming)", () => {
+  it("routes to Gemini handler and returns correct text response", async () => {
+    instance = await createServer([textFixture]);
+    const res = await post(
+      vertexUrl(instance.url, "gemini-2.0-flash", "generateContent"),
+      geminiBody("hello"),
+    );
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/json");
+
+    const body = JSON.parse(res.body);
+    expect(body.candidates).toHaveLength(1);
+    expect(body.candidates[0].content.role).toBe("model");
+    expect(body.candidates[0].content.parts[0].text).toBe("Hi there!");
+    expect(body.candidates[0].finishReason).toBe("STOP");
+    expect(body.usageMetadata).toBeDefined();
+  });
+
+  it("extracts model name from URL path and records it in journal", async () => {
+    instance = await createServer([textFixture]);
+    await post(vertexUrl(instance.url, "gemini-1.5-pro", "generateContent"), geminiBody("hello"));
+
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.body.model).toBe("gemini-1.5-pro");
+  });
+
+  it("returns tool call response with functionCall parts", async () => {
+    instance = await createServer([toolFixture]);
+    const res = await post(
+      vertexUrl(instance.url, "gemini-2.0-flash", "generateContent"),
+      geminiBody("weather"),
+    );
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.candidates[0].content.parts[0].functionCall).toBeDefined();
+    expect(body.candidates[0].content.parts[0].functionCall.name).toBe("get_weather");
+    expect(body.candidates[0].content.parts[0].functionCall.args).toEqual({ city: "NYC" });
+    expect(body.candidates[0].finishReason).toBe("FUNCTION_CALL");
+  });
+});
+
+// ─── Streaming (streamGenerateContent) ──────────────────────────────────────
+
+describe("Vertex AI: streamGenerateContent (streaming)", () => {
+  it("streams text response as SSE", async () => {
+    instance = await createServer([textFixture]);
+    const res = await post(
+      vertexUrl(instance.url, "gemini-2.0-flash", "streamGenerateContent"),
+      geminiBody("hello"),
+    );
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("text/event-stream");
+
+    const chunks = parseGeminiSSEChunks(res.body) as {
+      candidates: {
+        content: { role: string; parts: { text?: string }[] };
+        finishReason?: string;
+      }[];
+      usageMetadata?: unknown;
+    }[];
+
+    expect(chunks.length).toBeGreaterThan(0);
+
+    // Reconstruct content from text parts
+    const fullText = chunks.map((c) => c.candidates[0].content.parts[0].text ?? "").join("");
+    expect(fullText).toBe("Hi there!");
+
+    // Last chunk has finishReason
+    const lastChunk = chunks[chunks.length - 1];
+    expect(lastChunk.candidates[0].finishReason).toBe("STOP");
+    expect(lastChunk.usageMetadata).toBeDefined();
+  });
+
+  it("streams tool calls as SSE", async () => {
+    instance = await createServer([toolFixture]);
+    const res = await post(
+      vertexUrl(instance.url, "gemini-2.0-flash", "streamGenerateContent"),
+      geminiBody("weather"),
+    );
+
+    expect(res.status).toBe(200);
+    const chunks = parseGeminiSSEChunks(res.body) as {
+      candidates: {
+        content: {
+          parts: { functionCall?: { name: string; args: unknown } }[];
+        };
+        finishReason?: string;
+      }[];
+    }[];
+
+    expect(chunks).toHaveLength(1);
+    expect(chunks[0].candidates[0].content.parts[0].functionCall!.name).toBe("get_weather");
+    expect(chunks[0].candidates[0].finishReason).toBe("FUNCTION_CALL");
+  });
+});
+
+// ─── Response format parity with consumer Gemini ────────────────────────────
+
+describe("Vertex AI: response format matches consumer Gemini", () => {
+  it("non-streaming responses are identical", async () => {
+    instance = await createServer([textFixture]);
+
+    const vertexRes = await post(
+      vertexUrl(instance.url, "gemini-2.0-flash", "generateContent"),
+      geminiBody("hello"),
+    );
+    const geminiRes = await post(
+      `${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`,
+      geminiBody("hello"),
+    );
+
+    const vertexBody = JSON.parse(vertexRes.body);
+    const geminiBody_ = JSON.parse(geminiRes.body);
+
+    // Structure should be identical (candidates, usageMetadata)
+    expect(vertexBody.candidates[0].content).toEqual(geminiBody_.candidates[0].content);
+    expect(vertexBody.candidates[0].finishReason).toEqual(geminiBody_.candidates[0].finishReason);
+    expect(Object.keys(vertexBody)).toEqual(Object.keys(geminiBody_));
+  });
+
+  it("streaming responses are identical", async () => {
+    instance = await createServer([textFixture]);
+
+    const vertexRes = await post(
+      vertexUrl(instance.url, "gemini-2.0-flash", "streamGenerateContent"),
+      geminiBody("hello"),
+    );
+    const geminiRes = await post(
+      `${instance.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent`,
+      geminiBody("hello"),
+    );
+
+    const vertexChunks = parseGeminiSSEChunks(vertexRes.body);
+    const geminiChunks = parseGeminiSSEChunks(geminiRes.body);
+
+    expect(vertexChunks.length).toBe(geminiChunks.length);
+    // Each chunk should have the same structure
+    for (let i = 0; i < vertexChunks.length; i++) {
+      expect(vertexChunks[i]).toEqual(geminiChunks[i]);
+    }
+  });
+});
+
+// ─── Tool call parity with consumer Gemini ──────────────────────────────────
+
+describe("Vertex AI: tool call parity with consumer Gemini", () => {
+  it("non-streaming tool call responses have same structure", async () => {
+    instance = await createServer([toolFixture]);
+
+    const vertexRes = await post(
+      vertexUrl(instance.url, "gemini-2.0-flash", "generateContent"),
+      geminiBody("weather"),
+    );
+    const geminiRes = await post(
+      `${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`,
+      geminiBody("weather"),
+    );
+
+    const vertexBody = JSON.parse(vertexRes.body);
+    const geminiBody_ = JSON.parse(geminiRes.body);
+
+    // Both should have FUNCTION_CALL finish reason
+    expect(vertexBody.candidates[0].finishReason).toBe("FUNCTION_CALL");
+    expect(geminiBody_.candidates[0].finishReason).toBe("FUNCTION_CALL");
+
+    // Same role
+    expect(vertexBody.candidates[0].content.role).toBe(geminiBody_.candidates[0].content.role);
+
+    // Same function name and args (IDs differ since they're randomly generated)
+    const vertexFc = vertexBody.candidates[0].content.parts[0].functionCall;
+    const geminiFc = geminiBody_.candidates[0].content.parts[0].functionCall;
+    expect(vertexFc.name).toBe(geminiFc.name);
+    expect(vertexFc.args).toEqual(geminiFc.args);
+
+    // Same top-level keys
+    expect(Object.keys(vertexBody)).toEqual(Object.keys(geminiBody_));
+  });
+
+  it("streaming tool call responses have same structure", async () => {
+    instance = await createServer([toolFixture]);
+
+    const vertexRes = await post(
+      vertexUrl(instance.url, "gemini-2.0-flash", "streamGenerateContent"),
+      geminiBody("weather"),
+    );
+    const geminiRes = await post(
+      `${instance.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent`,
+      geminiBody("weather"),
+    );
+
+    const vertexChunks = parseGeminiSSEChunks(vertexRes.body) as Array<Record<string, unknown>>;
+    const geminiChunks = parseGeminiSSEChunks(geminiRes.body) as Array<Record<string, unknown>>;
+
+    expect(vertexChunks.length).toBe(geminiChunks.length);
+
+    // Compare structure: same finishReason, same function name/args
+    for (let i = 0; i < vertexChunks.length; i++) {
+      const vc = vertexChunks[i].candidates as Array<Record<string, unknown>>;
+      const gc = geminiChunks[i].candidates as Array<Record<string, unknown>>;
+      expect(vc[0].finishReason).toBe(gc[0].finishReason);
+      const vContent = vc[0].content as Record<string, unknown>;
+      const gContent = gc[0].content as Record<string, unknown>;
+      expect(vContent.role).toBe(gContent.role);
+      const vParts = vContent.parts as Array<Record<string, unknown>>;
+      const gParts = gContent.parts as Array<Record<string, unknown>>;
+      // Same function name and args
+      const vFc = vParts[0].functionCall as Record<string, unknown>;
+      const gFc = gParts[0].functionCall as Record<string, unknown>;
+      expect(vFc.name).toBe(gFc.name);
+      expect(vFc.args).toEqual(gFc.args);
+    }
+  });
+});
+
+// ─── Query parameter resilience ─────────────────────────────────────────────
+
+describe("Vertex AI: query parameter resilience", () => {
+  it("?alt=sse does not break routing", async () => {
+    instance = await createServer([textFixture]);
+    const urlPath = `${VERTEX_BASE}/gemini-2.0-flash:streamGenerateContent`;
+
+    const res = await new Promise<{ status: number; body: string }>((resolve, reject) => {
+      const data = JSON.stringify(geminiBody("hello"));
+      const parsed = new URL(instance!.url);
+      const req = http.request(
+        {
+          hostname: parsed.hostname,
+          port: parsed.port,
+          path: `${urlPath}?alt=sse`,
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            "Content-Length": Buffer.byteLength(data),
+          },
+        },
+        (res) => {
+          const chunks: Buffer[] = [];
+          res.on("data", (c: Buffer) => chunks.push(c));
+          res.on("end", () => {
+            resolve({
+              status: res.statusCode ?? 0,
+              body: Buffer.concat(chunks).toString(),
+            });
+          });
+        },
+      );
+      req.on("error", reject);
+      req.write(data);
+      req.end();
+    });
+
+    expect(res.status).toBe(200);
+    const chunks = parseGeminiSSEChunks(res.body);
+    const fullText = chunks
+      .map(
+        (c) =>
+          ((c as Record<string, unknown>).candidates as Array<Record<string, unknown>>)?.[0] &&
+          (
+            (
+              (
+                (c as Record<string, unknown>).candidates as Array<Record<string, unknown>>
+              )?.[0] as Record<string, unknown>
+            )?.content as Record<string, unknown>
+          )?.parts,
+      )
+      .filter(Boolean)
+      .map((parts) => ((parts as Array<Record<string, unknown>>)[0]?.text as string) ?? "")
+      .join("");
+    expect(fullText).toBe("Hi there!");
+  });
+});
+
+// ─── Various project/location combinations ──────────────────────────────────
+
+describe("Vertex AI: various project/location combinations", () => {
+  const combos = [
+    { project: "my-project", location: "us-central1" },
+    { project: "prod-123", location: "europe-west4" },
+    { project: "test_project_456", location: "asia-east1" },
+    { project: "my-org-project", location: "us-east1" },
+  ];
+
+  for (const { project, location } of combos) {
+    it(`routes ${project}/${location} correctly`, async () => {
+      instance = await createServer([textFixture]);
+      const path = `/v1/projects/${project}/locations/${location}/publishers/google/models/gemini-2.0-flash:generateContent`;
+      const res = await post(`${instance.url}${path}`, geminiBody("hello"));
+
+      expect(res.status).toBe(200);
+      const body = JSON.parse(res.body);
+      expect(body.candidates[0].content.parts[0].text).toBe("Hi there!");
+
+      // Clean up for next iteration
+      await new Promise<void>((resolve) => {
+        instance!.server.close(() => resolve());
+      });
+      instance = null;
+    });
+  }
+});
+
+// ─── Malformed URL / Wrong method / Strict mode ─────────────────────────────
+
+describe("Vertex AI: malformed URL", () => {
+  it("22a. returns 404 for unknown action in URL", async () => {
+    instance = await createServer([textFixture]);
+    const res = await post(
+      `${instance.url}/v1/projects/p/locations/l/publishers/google/models/m:unknownAction`,
+      geminiBody("hello"),
+    );
+
+    expect(res.status).toBe(404);
+  });
+});
+
+describe("Vertex AI: wrong HTTP method", () => {
+  it("22b. returns 404 for GET to a valid Vertex AI path", async () => {
+    instance = await createServer([textFixture]);
+    const res = await new Promise<{ status: number; body: string }>((resolve, reject) => {
+      const parsed = new URL(vertexUrl(instance!.url, "gemini-2.0-flash", "generateContent"));
+      const req = http.request(
+        {
+          hostname: parsed.hostname,
+          port: parsed.port,
+          path: parsed.pathname,
+          method: "GET",
+        },
+        (res) => {
+          const chunks: Buffer[] = [];
+          res.on("data", (c: Buffer) => chunks.push(c));
+          res.on("end", () => {
+            resolve({
+              status: res.statusCode ?? 0,
+              body: Buffer.concat(chunks).toString(),
+            });
+          });
+        },
+      );
+      req.on("error", reject);
+      req.end();
+    });
+
+    expect(res.status).toBe(404);
+  });
+});
+
+describe("Vertex AI: malformed JSON body", () => {
+  it("returns 400 for non-JSON body", async () => {
+    instance = await createServer([textFixture]);
+    const parsed = new URL(vertexUrl(instance.url, "gemini-2.0-flash", "generateContent"));
+    const res = await new Promise<{ status: number; body: string }>((resolve, reject) => {
+      const raw = "not json";
+      const req = http.request(
+        {
+          hostname: parsed.hostname,
+          port: parsed.port,
+          path: parsed.pathname,
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            "Content-Length": Buffer.byteLength(raw),
+          },
+        },
+        (r) => {
+          const chunks: Buffer[] = [];
+          r.on("data", (c: Buffer) => chunks.push(c));
+          r.on("end", () => {
+            resolve({
+              status: r.statusCode ?? 0,
+              body: Buffer.concat(chunks).toString(),
+            });
+          });
+        },
+      );
+      req.on("error", reject);
+      req.write(raw);
+      req.end();
+    });
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Malformed JSON");
+  });
+});
+
+describe("Vertex AI: strict mode", () => {
+  it("22c. returns 503 in strict mode with no fixtures", async () => {
+    instance = await createServer([], { strict: true });
+    const res = await post(
+      vertexUrl(instance.url, "gemini-2.0-flash", "generateContent"),
+      geminiBody("hello"),
+    );
+
+    expect(res.status).toBe(503);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("no fixture matched");
+  });
+});
+
+// ─── Chaos ──────────────────────────────────────────────────────────────────
+
+describe("Vertex AI: chaos applies", () => {
+  it("drops request when dropRate is 1.0", async () => {
+    instance = await createServer([textFixture], { chaos: { dropRate: 1.0 } });
+    const res = await post(
+      vertexUrl(instance.url, "gemini-2.0-flash", "generateContent"),
+      geminiBody("hello"),
+    );
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.code).toBe("chaos_drop");
+  });
+
+  it("records chaos action in journal", async () => {
+    instance = await createServer([textFixture], { chaos: { dropRate: 1.0 } });
+    await post(vertexUrl(instance.url, "gemini-2.0-flash", "generateContent"), geminiBody("hello"));
+
+    const entries = instance.journal.getAll();
+    expect(entries).toHaveLength(1);
+    expect(entries[0].response.chaosAction).toBe("drop");
+  });
+});
diff --git a/src/__tests__/ws-api-conformance.test.ts b/src/__tests__/ws-api-conformance.test.ts
new file mode 100644
index 0000000..910c471
--- /dev/null
+++ b/src/__tests__/ws-api-conformance.test.ts
@@ -0,0 +1,896 @@
+/* eslint-disable @typescript-eslint/no-explicit-any */
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+import { createServer, type ServerInstance } from "../server.js";
+import type { Fixture } from "../types.js";
+import { connectWebSocket } from "./ws-test-client.js";
+
+// ---------------------------------------------------------------------------
+// Fixtures
+// ---------------------------------------------------------------------------
+
+const TEXT_FIXTURE: Fixture = {
+  match: { userMessage: "hello" },
+  response: { content: "Hi there!" },
+};
+
+const TOOL_FIXTURE: Fixture = {
+  match: { userMessage: "weather" },
+  response: {
+    toolCalls: [{ name: "get_weather", arguments: '{"city":"SF"}' }],
+  },
+};
+
+const ERROR_FIXTURE: Fixture = {
+  match: { userMessage: "error-test" },
+  response: {
+    error: { message: "Rate limited", type: "rate_limit_error" },
+    status: 429,
+  },
+};
+
+// ---------------------------------------------------------------------------
+// Shared server instance
+// ---------------------------------------------------------------------------
+
+let instance: ServerInstance;
+
+beforeAll(async () => {
+  instance = await createServer([TEXT_FIXTURE, TOOL_FIXTURE, ERROR_FIXTURE], {
+    port: 0,
+    chunkSize: 100,
+  });
+});
+
+afterAll(async () => {
+  await new Promise<void>((r) => instance.server.close(() => r()));
+});
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+const GEMINI_WS_PATH =
+  "/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent";
+
+interface WSFrame {
+  type?: string;
+  [key: string]: unknown;
+}
+
+function parseFrames(raw: string[]): WSFrame[] {
+  return raw.map((m) => JSON.parse(m) as WSFrame);
+}
+
+/** Send a response.create message for the WS Responses endpoint. */
+function responsesCreateMsg(userContent: string): string {
+  return JSON.stringify({
+    type: "response.create",
+    model: "gpt-4",
+    input: [{ role: "user", content: userContent }],
+  });
+}
+
+/** Build a conversation.item.create message for the Realtime endpoint. */
+function realtimeItemCreate(text: string): string {
+  return JSON.stringify({
+    type: "conversation.item.create",
+    item: {
+      type: "message",
+      role: "user",
+      content: [{ type: "input_text", text }],
+    },
+  });
+}
+
+/** Build a response.create message for the Realtime endpoint. */
+function realtimeResponseCreate(): string {
+  return JSON.stringify({ type: "response.create" });
+}
+
+/** Build a Gemini setup message. */
+function geminiSetup(model = "gemini-2.0-flash-exp"): string {
+  return JSON.stringify({ setup: { model } });
+}
+
+/** Build a Gemini clientContent message. */
+function geminiClientContent(text: string): string {
+  return JSON.stringify({
+    clientContent: {
+      turns: [{ role: "user", parts: [{ text }] }],
+      turnComplete: true,
+    },
+  });
+}
+
+// ---------------------------------------------------------------------------
+// 6. WS Responses API conformance
+// ---------------------------------------------------------------------------
+
+describe("WS Responses API conformance", () => {
+  describe("text response", () => {
+    it("every event frame is valid JSON with type string field", async () => {
+      const ws = await connectWebSocket(instance.url, "/v1/responses");
+      ws.send(responsesCreateMsg("hello"));
+      const raw = await ws.waitForMessages(9);
+      ws.close();
+      for (const msg of raw) {
+        const parsed = JSON.parse(msg) as any;
+        expect(typeof parsed.type).toBe("string");
+      }
+    });
+
+    it("response.created has response with resp- id, status in_progress, empty output", async () => {
+      const ws = await connectWebSocket(instance.url, "/v1/responses");
+      ws.send(responsesCreateMsg("hello"));
+      const raw = await ws.waitForMessages(9);
+      ws.close();
+      const frames = parseFrames(raw);
+      const created = frames.find((f) => f.type === "response.created")!;
+      expect(created).toBeDefined();
+      const resp = created.response as any;
+      expect(resp.id).toMatch(/^resp-/);
+      expect(resp.status).toBe("in_progress");
+      expect(resp.output).toEqual([]);
+    });
+
+    it("response.in_progress event is present", async () => {
+      const ws = await connectWebSocket(instance.url, "/v1/responses");
+      ws.send(responsesCreateMsg("hello"));
+      const raw = await ws.waitForMessages(9);
+      ws.close();
+      const frames = parseFrames(raw);
+      const inProgress = frames.find((f) => f.type === "response.in_progress");
+      expect(inProgress).toBeDefined();
+    });
+
+    it("response.output_item.added has item with id, type message, role assistant", async () => {
+      const ws = await connectWebSocket(instance.url, "/v1/responses");
+      ws.send(responsesCreateMsg("hello"));
+      const raw = await ws.waitForMessages(9);
+      ws.close();
+      const frames = parseFrames(raw);
+      const itemAdded = frames.find((f) => f.type === "response.output_item.added")!;
+      expect(itemAdded).toBeDefined();
+      const item = itemAdded.item as any;
+      expect(typeof item.id).toBe("string");
+      expect(item.id.length).toBeGreaterThan(0);
+      expect(item.type).toBe("message");
+      expect(item.role).toBe("assistant");
+    });
+
+    it("response.content_part.added has part with type output_text", async () => {
+      const ws = await connectWebSocket(instance.url, "/v1/responses");
+      ws.send(responsesCreateMsg("hello"));
+      const raw = await ws.waitForMessages(9);
+      ws.close();
+      const frames = parseFrames(raw);
+      const partAdded = frames.find((f) => f.type === "response.content_part.added")!;
+      expect(partAdded).toBeDefined();
+      const part = partAdded.part as any;
+      expect(part.type).toBe("output_text");
+    });
+
+    it("response.output_text.delta events have delta as string", async () => {
+      const ws = await connectWebSocket(instance.url, "/v1/responses");
+      ws.send(responsesCreateMsg("hello"));
+      const raw = await ws.waitForMessages(9);
+      ws.close();
+      const frames = parseFrames(raw);
+      const deltas = frames.filter((f) => f.type === "response.output_text.delta");
+      expect(deltas.length).toBeGreaterThan(0);
+      for (const d of deltas) {
+        expect(typeof d.delta).toBe("string");
+      }
+    });
+
+    it("response.output_text.done has text field with full content", async () => {
+      const ws = await connectWebSocket(instance.url, "/v1/responses");
+      ws.send(responsesCreateMsg("hello"));
+      const raw = await ws.waitForMessages(9);
+      ws.close();
+      const frames = parseFrames(raw);
+      const textDone = frames.find((f) => f.type === "response.output_text.done")!;
+      expect(textDone).toBeDefined();
+      // The text field contains the complete accumulated text
+      expect(typeof (textDone as any).text).toBe("string");
+      expect((textDone as any).text).toBe("Hi there!");
+    });
+
+    it("response.completed has response with status completed and output array", async () => {
+      const ws = await connectWebSocket(instance.url, "/v1/responses");
+      ws.send(responsesCreateMsg("hello"));
+      const raw = await ws.waitForMessages(9);
+      ws.close();
+      const frames = parseFrames(raw);
+      const completed = frames.find((f) => f.type === "response.completed")!;
+      expect(completed).toBeDefined();
+      const resp = completed.response as any;
+      expect(resp.status).toBe("completed");
+      expect(Array.isArray(resp.output)).toBe(true);
+      expect(resp.output.length).toBeGreaterThan(0);
+    });
+
+    it("response.completed response id matches response.created id", async () => {
+      const ws = await connectWebSocket(instance.url, "/v1/responses");
+      ws.send(responsesCreateMsg("hello"));
+      const raw = await ws.waitForMessages(9);
+      ws.close();
+      const frames = parseFrames(raw);
+      const created = frames.find((f) => f.type === "response.created")!;
+      const completed = frames.find((f) => f.type === "response.completed")!;
+      expect((created.response as any).id).toBe((completed.response as any).id);
+    });
+
+    it("event sequence follows correct order", async () => {
+      const ws = await connectWebSocket(instance.url, "/v1/responses");
+      ws.send(responsesCreateMsg("hello"));
+      const raw = await ws.waitForMessages(9);
+      ws.close();
+      const types = parseFrames(raw).map((f) => f.type);
+      expect(types[0]).toBe("response.created");
+      expect(types[1]).toBe("response.in_progress");
+      expect(types).toContain("response.output_item.added");
+      expect(types).toContain("response.content_part.added");
+      expect(types).toContain("response.output_text.delta");
+      expect(types).toContain("response.output_text.done");
+      expect(types).toContain("response.content_part.done");
+      expect(types).toContain("response.output_item.done");
+      expect(types[types.length - 1]).toBe("response.completed");
+    });
+  });
+
+  describe("tool call response", () => {
+    it("response.output_item.added has item type function_call with call_id and name", async () => {
+      const ws = await connectWebSocket(instance.url, "/v1/responses");
+      ws.send(responsesCreateMsg("weather"));
+      const raw = await ws.waitForMessages(7);
+      ws.close();
+      const frames = parseFrames(raw);
+      const itemAdded = frames.find(
+        (f) => f.type === "response.output_item.added" && (f.item as any)?.type === "function_call",
+      )!;
+      expect(itemAdded).toBeDefined();
+      const item = itemAdded.item as any;
+      expect(item.type).toBe("function_call");
+      expect(item.call_id).toMatch(/^call_/);
+      expect(typeof item.name).toBe("string");
+      expect(item.name).toBe("get_weather");
+    });
+
+    it("response.output_item.added function_call item has empty arguments initially", async () => {
+      const ws = await connectWebSocket(instance.url, "/v1/responses");
+      ws.send(responsesCreateMsg("weather"));
+      const raw = await ws.waitForMessages(7);
+      ws.close();
+      const frames = parseFrames(raw);
+      const itemAdded = frames.find(
+        (f) => f.type === "response.output_item.added" && (f.item as any)?.type === "function_call",
+      )!;
+      expect((itemAdded.item as any).arguments).toBe("");
+    });
+
+    it("response.function_call_arguments.delta has delta as string", async () => {
+      const ws = await connectWebSocket(instance.url, "/v1/responses");
+      ws.send(responsesCreateMsg("weather"));
+      const raw = await ws.waitForMessages(7);
+      ws.close();
+      const frames = parseFrames(raw);
+      const argDeltas = frames.filter((f) => f.type === "response.function_call_arguments.delta");
+      expect(argDeltas.length).toBeGreaterThan(0);
+      for (const d of argDeltas) {
+        expect(typeof d.delta).toBe("string");
+      }
+    });
+
+    it("response.function_call_arguments.done has full arguments string", async () => {
+      const ws = await connectWebSocket(instance.url, "/v1/responses");
+      ws.send(responsesCreateMsg("weather"));
+      const raw = await ws.waitForMessages(7);
+      ws.close();
+      const frames = parseFrames(raw);
+      const argsDone = frames.find((f) => f.type === "response.function_call_arguments.done")!;
+      expect(argsDone).toBeDefined();
+      expect(typeof (argsDone as any).arguments).toBe("string");
+      expect((argsDone as any).arguments).toBe('{"city":"SF"}');
+    });
+
+    it("tool call event sequence includes response.in_progress and response.output_item.done", async () => {
+      const ws = await connectWebSocket(instance.url, "/v1/responses");
+      ws.send(responsesCreateMsg("weather"));
+      const raw = await ws.waitForMessages(7);
+      ws.close();
+      const types = parseFrames(raw).map((f) => f.type);
+      expect(types[0]).toBe("response.created");
+      expect(types).toContain("response.in_progress");
+      expect(types).toContain("response.output_item.added");
+      expect(types).toContain("response.output_item.done");
+      expect(types[types.length - 1]).toBe("response.completed");
+    });
+  });
+
+  describe("error response", () => {
+    it("error event has type error with error.message and error.type", async () => {
+      const ws = await connectWebSocket(instance.url, "/v1/responses");
+      ws.send(responsesCreateMsg("error-test"));
+      const raw = await ws.waitForMessages(1);
+      ws.close();
+      const frame = JSON.parse(raw[0]) as any;
+      expect(frame.type).toBe("error");
+      expect(typeof frame.error.message).toBe("string");
+      expect(frame.error.message).toBe("Rate limited");
+      expect(typeof frame.error.type).toBe("string");
+    });
+
+    it("no-match error: type is error with message No fixture matched", async () => {
+      const ws = await connectWebSocket(instance.url, "/v1/responses");
+      ws.send(responsesCreateMsg("no-match-xyz-9999"));
+      const raw = await ws.waitForMessages(1);
+      ws.close();
+      const frame = JSON.parse(raw[0]) as any;
+      expect(frame.type).toBe("error");
+      expect(frame.error.message).toBe("No fixture matched");
+    });
+
+    it("malformed JSON: error event has type error", async () => {
+      const ws = await connectWebSocket(instance.url, "/v1/responses");
+      ws.send("{not valid json");
+      const raw = await ws.waitForMessages(1);
+      ws.close();
+      const frame = JSON.parse(raw[0]) as any;
+      expect(frame.type).toBe("error");
+      expect(frame.error.message).toBe("Malformed JSON");
+    });
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 7. WS Realtime API conformance
+// ---------------------------------------------------------------------------
+
+describe("WS Realtime API conformance", () => {
+  describe("session.created on connect", () => {
+    it("session.created is sent immediately on connect with event_id evt- prefix", async () => {
+      const ws = await connectWebSocket(instance.url, "/v1/realtime");
+      const raw = await ws.waitForMessages(1);
+      ws.close();
+      const frame = JSON.parse(raw[0]) as any;
+      expect(frame.type).toBe("session.created");
+      expect(typeof frame.event_id).toBe("string");
+      expect(frame.event_id).toMatch(/^evt-/);
+    });
+
+    it("session.created has session with id (sess- prefix), modalities, tools", async () => {
+      const ws = await connectWebSocket(instance.url, "/v1/realtime");
+      const raw = await ws.waitForMessages(1);
+      ws.close();
+      const frame = JSON.parse(raw[0]) as any;
+      const session = frame.session;
+      expect(session.id).toMatch(/^sess-/);
+      expect(Array.isArray(session.modalities)).toBe(true);
+      expect(session.modalities).toContain("text");
+      expect(Array.isArray(session.tools)).toBe(true);
+    });
+  });
+
+  describe("session.updated", () => {
+    it("session.updated reflects session changes with event_id evt- prefix", async () => {
+      const ws = await connectWebSocket(instance.url, "/v1/realtime");
+      await ws.waitForMessages(1); // session.created
+      ws.send(
+        JSON.stringify({
+          type: "session.update",
+          session: { instructions: "Be concise." },
+        }),
+      );
+      const raw = await ws.waitForMessages(2);
+      ws.close();
+      const frame = JSON.parse(raw[1]) as any;
+      expect(frame.type).toBe("session.updated");
+      expect(frame.event_id).toMatch(/^evt-/);
+      expect(frame.session.instructions).toBe("Be concise.");
+    });
+  });
+
+  describe("conversation.item.created", () => {
+    it("conversation.item.created has item with id", async () => {
+      const ws = await connectWebSocket(instance.url, "/v1/realtime");
+      await ws.waitForMessages(1); // session.created
+      ws.send(realtimeItemCreate("hello"));
+      const raw = await ws.waitForMessages(2);
+      ws.close();
+      const frame = JSON.parse(raw[1]) as any;
+      expect(frame.type).toBe("conversation.item.created");
+      expect(typeof frame.item.id).toBe("string");
+      expect(frame.item.id.length).toBeGreaterThan(0);
+    });
+  });
+
+  describe("text response events", () => {
+    async function getTextResponseFrames() {
+      const ws = await connectWebSocket(instance.url, "/v1/realtime");
+      await ws.waitForMessages(1); // session.created
+      ws.send(realtimeItemCreate("hello"));
+      await ws.waitForMessages(2); // + conversation.item.created
+      ws.send(realtimeResponseCreate());
+      // session.created + item.created + response.created + output_item.added
+      // + content_part.added + text.delta(s) + text.done + content_part.done
+      // + output_item.done + response.done = 10 min
+      const raw = await ws.waitForMessages(10);
+      ws.close();
+      return raw.slice(2).map((m) => JSON.parse(m) as any);
+    }
+
+    it("all response events have event_id starting with evt-", async () => {
+      const frames = await getTextResponseFrames();
+      for (const f of frames) {
+        expect(f.event_id).toMatch(/^evt-/);
+      }
+    });
+
+    it("response.created has response.id (resp- prefix), status in_progress", async () => {
+      const frames = await getTextResponseFrames();
+      const created = frames.find((f: any) => f.type === "response.created")!;
+      expect(created).toBeDefined();
+      expect((created.response as any).id).toMatch(/^resp-/);
+      expect((created.response as any).status).toBe("in_progress");
+    });
+
+    it("response.output_item.added for text has item type message, role assistant", async () => {
+      const frames = await getTextResponseFrames();
+      const itemAdded = frames.find(
+        (f: any) => f.type === "response.output_item.added" && f.item?.type === "message",
+      )!;
+      expect(itemAdded).toBeDefined();
+      expect((itemAdded.item as any).type).toBe("message");
+      expect((itemAdded.item as any).role).toBe("assistant");
+    });
+
+    it("response.content_part.added has part with type text", async () => {
+      const frames = await getTextResponseFrames();
+      const partAdded = frames.find((f: any) => f.type === "response.content_part.added")!;
+      expect(partAdded).toBeDefined();
+      const part = (partAdded as any).part;
+      expect(part.type).toBe("text");
+      expect(part.text).toBe("");
+    });
+
+    it("response.text.delta has response_id, item_id, output_index, content_index, delta as string", async () => {
+      const frames = await getTextResponseFrames();
+      const deltas = frames.filter((f: any) => f.type === "response.text.delta");
+      expect(deltas.length).toBeGreaterThan(0);
+      for (const d of deltas) {
+        expect(typeof (d as any).response_id).toBe("string");
+        expect(typeof (d as any).item_id).toBe("string");
+        expect(typeof (d as any).output_index).toBe("number");
+        expect(typeof (d as any).content_index).toBe("number");
+        expect(typeof (d as any).delta).toBe("string");
+      }
+    });
+
+    it("response.text.done has full text", async () => {
+      const frames = await getTextResponseFrames();
+      const textDone = frames.find((f: any) => f.type === "response.text.done")!;
+      expect(textDone).toBeDefined();
+      expect((textDone as any).text).toBe("Hi there!");
+    });
+
+    it("response.content_part.done has part with type text and text content", async () => {
+      const frames = await getTextResponseFrames();
+      const partDone = frames.find((f: any) => f.type === "response.content_part.done")!;
+      expect(partDone).toBeDefined();
+      const part = (partDone as any).part;
+      expect(part.type).toBe("text");
+      expect(typeof part.text).toBe("string");
+      expect(part.text).toBe("Hi there!");
+    });
+
+    it("response.output_item.done has complete item", async () => {
+      const frames = await getTextResponseFrames();
+      const itemDone = frames.find((f: any) => f.type === "response.output_item.done")!;
+      expect(itemDone).toBeDefined();
+      const item = (itemDone as any).item;
+      expect(item.type).toBe("message");
+      expect(item.role).toBe("assistant");
+      expect(Array.isArray(item.content)).toBe(true);
+    });
+
+    it("response.done has response with status completed and output array", async () => {
+      const frames = await getTextResponseFrames();
+      const done = frames.find((f: any) => f.type === "response.done")!;
+      expect(done).toBeDefined();
+      const resp = (done as any).response;
+      expect(resp.status).toBe("completed");
+      expect(Array.isArray(resp.output)).toBe(true);
+      expect(resp.output.length).toBeGreaterThan(0);
+    });
+  });
+
+  describe("tool call response events", () => {
+    async function getToolCallFrames() {
+      const ws = await connectWebSocket(instance.url, "/v1/realtime");
+      await ws.waitForMessages(1); // session.created
+      ws.send(realtimeItemCreate("weather"));
+      await ws.waitForMessages(2); // + conversation.item.created
+      ws.send(realtimeResponseCreate());
+      // session.created + item.created + response.created + output_item.added
+      // + args.delta(s) + args.done + output_item.done + response.done = 8 min
+      const raw = await ws.waitForMessages(8);
+      ws.close();
+      return raw.slice(2).map((m) => JSON.parse(m) as any);
+    }
+
+    it("response.output_item.added has type function_call, call_id (call- prefix), name, empty arguments", async () => {
+      const frames = await getToolCallFrames();
+      const itemAdded = frames.find(
+        (f: any) => f.type === "response.output_item.added" && f.item?.type === "function_call",
+      )!;
+      expect(itemAdded).toBeDefined();
+      const item = (itemAdded as any).item;
+      expect(item.type).toBe("function_call");
+      expect(item.call_id).toMatch(/^call_/);
+      expect(typeof item.name).toBe("string");
+      expect(item.name).toBe("get_weather");
+      expect(item.arguments).toBe("");
+    });
+
+    it("response.function_call_arguments.delta has delta, call_id, item_id, output_index", async () => {
+      const frames = await getToolCallFrames();
+      const argDeltas = frames.filter(
+        (f: any) => f.type === "response.function_call_arguments.delta",
+      );
+      expect(argDeltas.length).toBeGreaterThan(0);
+      for (const d of argDeltas) {
+        expect(typeof (d as any).delta).toBe("string");
+        expect(typeof (d as any).call_id).toBe("string");
+        expect(typeof (d as any).item_id).toBe("string");
+        expect(typeof (d as any).output_index).toBe("number");
+      }
+    });
+
+    it("response.function_call_arguments.done has full arguments", async () => {
+      const frames = await getToolCallFrames();
+      const argsDone = frames.find((f: any) => f.type === "response.function_call_arguments.done")!;
+      expect(argsDone).toBeDefined();
+      expect((argsDone as any).arguments).toBe('{"city":"SF"}');
+    });
+  });
+
+  describe("error / failed response", () => {
+    it("no-match: response.done has status failed with status_details.error", async () => {
+      const ws = await connectWebSocket(instance.url, "/v1/realtime");
+      await ws.waitForMessages(1); // session.created
+      ws.send(realtimeItemCreate("no-match-xyz-9999"));
+      await ws.waitForMessages(2); // + conversation.item.created
+      ws.send(realtimeResponseCreate());
+      const raw = await ws.waitForMessages(4); // + response.created + response.done
+      ws.close();
+      const responseEvents = raw.slice(2).map((m) => JSON.parse(m) as any);
+      const done = responseEvents.find((f: any) => f.type === "response.done")!;
+      expect(done).toBeDefined();
+      const resp = done.response as any;
+      expect(resp.status).toBe("failed");
+      expect(resp.status_details.type).toBe("error");
+      expect(typeof resp.status_details.error.message).toBe("string");
+    });
+
+    it("error fixture: response.done has status failed with fixture error message", async () => {
+      const ws = await connectWebSocket(instance.url, "/v1/realtime");
+      await ws.waitForMessages(1); // session.created
+      ws.send(realtimeItemCreate("error-test"));
+      await ws.waitForMessages(2); // + conversation.item.created
+      ws.send(realtimeResponseCreate());
+      const raw = await ws.waitForMessages(4); // + response.created + response.done
+      ws.close();
+      const responseEvents = raw.slice(2).map((m) => JSON.parse(m) as any);
+      const done = responseEvents.find((f: any) => f.type === "response.done")!;
+      const resp = done.response as any;
+      expect(resp.status).toBe("failed");
+      expect(resp.status_details.error.message).toBe("Rate limited");
+    });
+
+    it("malformed JSON: error event has type error with evt- event_id", async () => {
+      const ws = await connectWebSocket(instance.url, "/v1/realtime");
+      await ws.waitForMessages(1); // session.created
+      ws.send("{not valid json");
+      const raw = await ws.waitForMessages(2);
+      ws.close();
+      const frame = JSON.parse(raw[1]) as any;
+      expect(frame.type).toBe("error");
+      expect(frame.event_id).toMatch(/^evt-/);
+      expect(frame.error.message).toBe("Malformed JSON");
+    });
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 8. WS Gemini Live BidiGenerateContent conformance
+// ---------------------------------------------------------------------------
+
+describe("WS Gemini Live BidiGenerateContent conformance", () => {
+  describe("setupComplete", () => {
+    it("setupComplete is exactly {setupComplete: {}}", async () => {
+      const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+      ws.send(geminiSetup());
+      const raw = await ws.waitForMessages(1);
+      ws.close();
+      const msg = JSON.parse(raw[0]);
+      expect(msg).toEqual({ setupComplete: {} });
+    });
+  });
+
+  describe("text serverContent", () => {
+    it("serverContent has modelTurn with parts array", async () => {
+      const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+      ws.send(geminiSetup());
+      await ws.waitForMessages(1);
+      ws.send(geminiClientContent("hello"));
+      const raw = await ws.waitForMessages(2);
+      ws.close();
+      const msg = JSON.parse(raw[1]) as any;
+      expect(msg.serverContent).toBeDefined();
+      expect(msg.serverContent.modelTurn).toBeDefined();
+      expect(Array.isArray(msg.serverContent.modelTurn.parts)).toBe(true);
+      expect(msg.serverContent.modelTurn.parts.length).toBeGreaterThan(0);
+    });
+
+    it("each part has text as string", async () => {
+      const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+      ws.send(geminiSetup());
+      await ws.waitForMessages(1);
+      ws.send(geminiClientContent("hello"));
+      const raw = await ws.waitForMessages(2);
+      ws.close();
+      const msg = JSON.parse(raw[1]) as any;
+      for (const part of msg.serverContent.modelTurn.parts) {
+        expect(typeof part.text).toBe("string");
+      }
+    });
+
+    it("turnComplete is boolean (true for single-chunk response)", async () => {
+      const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+      ws.send(geminiSetup());
+      await ws.waitForMessages(1);
+      ws.send(geminiClientContent("hello"));
+      const raw = await ws.waitForMessages(2);
+      ws.close();
+      const msg = JSON.parse(raw[1]) as any;
+      expect(typeof msg.serverContent.turnComplete).toBe("boolean");
+      expect(msg.serverContent.turnComplete).toBe(true);
+    });
+
+    it("intermediate chunks have turnComplete false, last chunk has turnComplete true", async () => {
+      // Use a fixture-level chunkSize override to force multiple chunks
+      const longFixture: Fixture = {
+        match: { userMessage: "long-conformance" },
+        response: { content: "ABCDEFGHIJKLMNOPQRST" },
+        chunkSize: 3,
+      };
+      const smallInstance = await createServer([longFixture], { port: 0 });
+      try {
+        const ws = await connectWebSocket(smallInstance.url, GEMINI_WS_PATH);
+        ws.send(geminiSetup());
+        await ws.waitForMessages(1);
+        ws.send(
+          JSON.stringify({
+            clientContent: {
+              turns: [{ role: "user", parts: [{ text: "long-conformance" }] }],
+              turnComplete: true,
+            },
+          }),
+        );
+        // 20 chars / 3 = 7 chunks (6 × 3 + 1 × 2)
+        const raw = await ws.waitForMessages(8); // 1 setupComplete + 7 chunks
+        ws.close();
+        const chunks = raw.slice(1).map((r) => JSON.parse(r) as any);
+        for (let i = 0; i < chunks.length - 1; i++) {
+          expect(chunks[i].serverContent.turnComplete).toBe(false);
+        }
+        expect(chunks[chunks.length - 1].serverContent.turnComplete).toBe(true);
+      } finally {
+        await new Promise<void>((r) => smallInstance.server.close(() => r()));
+      }
+    });
+
+    it("empty text: single frame with turnComplete true and empty text part", async () => {
+      const emptyFixture: Fixture = {
+        match: { userMessage: "empty-conformance" },
+        response: { content: "" },
+      };
+      const emptyInstance = await createServer([emptyFixture], { port: 0 });
+      try {
+        const ws = await connectWebSocket(emptyInstance.url, GEMINI_WS_PATH);
+        ws.send(geminiSetup());
+        await ws.waitForMessages(1);
+        ws.send(
+          JSON.stringify({
+            clientContent: {
+              turns: [{ role: "user", parts: [{ text: "empty-conformance" }] }],
+              turnComplete: true,
+            },
+          }),
+        );
+        const raw = await ws.waitForMessages(2); // setupComplete + 1 serverContent
+        ws.close();
+        const msg = JSON.parse(raw[1]) as any;
+        expect(msg.serverContent.turnComplete).toBe(true);
+        expect(msg.serverContent.modelTurn.parts[0].text).toBe("");
+      } finally {
+        await new Promise<void>((r) => emptyInstance.server.close(() => r()));
+      }
+    });
+  });
+
+  describe("toolCall", () => {
+    it("toolCall has functionCalls array", async () => {
+      const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+      ws.send(geminiSetup());
+      await ws.waitForMessages(1);
+      ws.send(geminiClientContent("weather"));
+      const raw = await ws.waitForMessages(2);
+      ws.close();
+      const msg = JSON.parse(raw[1]) as any;
+      expect(msg.toolCall).toBeDefined();
+      expect(Array.isArray(msg.toolCall.functionCalls)).toBe(true);
+      expect(msg.toolCall.functionCalls.length).toBeGreaterThan(0);
+    });
+
+    it("each functionCall has name (string), args (object, NOT string), id (string)", async () => {
+      const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+      ws.send(geminiSetup());
+      await ws.waitForMessages(1);
+      ws.send(geminiClientContent("weather"));
+      const raw = await ws.waitForMessages(2);
+      ws.close();
+      const msg = JSON.parse(raw[1]) as any;
+      for (const fc of msg.toolCall.functionCalls) {
+        expect(typeof fc.name).toBe("string");
+        // args must be an object, not a JSON string
+        expect(typeof fc.args).toBe("object");
+        expect(fc.args).not.toBeNull();
+        expect(typeof fc.args).not.toBe("string");
+        expect(typeof fc.id).toBe("string");
+        expect(fc.id.length).toBeGreaterThan(0);
+      }
+    });
+
+    it("functionCall args are parsed from fixture arguments JSON", async () => {
+      const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+      ws.send(geminiSetup());
+      await ws.waitForMessages(1);
+      ws.send(geminiClientContent("weather"));
+      const raw = await ws.waitForMessages(2);
+      ws.close();
+      const msg = JSON.parse(raw[1]) as any;
+      const fc = msg.toolCall.functionCalls[0];
+      expect(fc.name).toBe("get_weather");
+      expect(fc.args).toEqual({ city: "SF" });
+    });
+  });
+
+  describe("error responses", () => {
+    it("error has code (number), message (string), status (string)", async () => {
+      const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+      ws.send(geminiSetup());
+      await ws.waitForMessages(1);
+      ws.send(geminiClientContent("no-match-xyz-9999"));
+      const raw = await ws.waitForMessages(2);
+      ws.close();
+      const msg = JSON.parse(raw[1]) as any;
+      expect(msg.error).toBeDefined();
+      expect(typeof msg.error.code).toBe("number");
+      expect(typeof msg.error.message).toBe("string");
+      expect(typeof msg.error.status).toBe("string");
+    });
+
+    it("error fixture: code matches fixture status, message matches fixture message", async () => {
+      const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+      ws.send(geminiSetup());
+      await ws.waitForMessages(1);
+      ws.send(geminiClientContent("error-test"));
+      const raw = await ws.waitForMessages(2);
+      ws.close();
+      const msg = JSON.parse(raw[1]) as any;
+      expect(msg.error).toBeDefined();
+      expect(msg.error.code).toBe(429);
+      expect(msg.error.message).toBe("Rate limited");
+      expect(msg.error.status).toBe("ERROR");
+    });
+
+    it("no-match error: code 404, status NOT_FOUND", async () => {
+      const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+      ws.send(geminiSetup());
+      await ws.waitForMessages(1);
+      ws.send(geminiClientContent("no-match-xyz-9999"));
+      const raw = await ws.waitForMessages(2);
+      ws.close();
+      const msg = JSON.parse(raw[1]) as any;
+      expect(msg.error.code).toBe(404);
+      expect(msg.error.status).toBe("NOT_FOUND");
+    });
+
+    it("error before setup: code 400, status FAILED_PRECONDITION", async () => {
+      const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+      // Send clientContent without setup first
+      ws.send(geminiClientContent("hello"));
+      const raw = await ws.waitForMessages(1);
+      ws.close();
+      const msg = JSON.parse(raw[0]) as any;
+      expect(msg.error).toBeDefined();
+      expect(msg.error.code).toBe(400);
+      expect(msg.error.status).toBe("FAILED_PRECONDITION");
+    });
+
+    it("malformed JSON: error with code 400, status INVALID_ARGUMENT", async () => {
+      const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+      ws.send(geminiSetup());
+      await ws.waitForMessages(1);
+      ws.send("{not valid json");
+      const raw = await ws.waitForMessages(2);
+      ws.close();
+      const msg = JSON.parse(raw[1]) as any;
+      expect(msg.error).toBeDefined();
+      expect(msg.error.code).toBe(400);
+      expect(msg.error.status).toBe("INVALID_ARGUMENT");
+    });
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 9. Cross-protocol WS invariants
+// ---------------------------------------------------------------------------
+
+describe("Cross-protocol WS invariants", () => {
+  it("all 3 WS paths accept WebSocket upgrade (101 Switching Protocols)", async () => {
+    const [wsResp, wsRealtime, wsGemini] = await Promise.all([
+      connectWebSocket(instance.url, "/v1/responses"),
+      connectWebSocket(instance.url, "/v1/realtime"),
+      connectWebSocket(instance.url, GEMINI_WS_PATH),
+    ]);
+    // If connectWebSocket resolves without throwing, the upgrade was accepted (101)
+    wsResp.close();
+    wsRealtime.close();
+    wsGemini.close();
+  });
+
+  it("non-WS HTTP path /v1/chat/completions rejects WebSocket upgrade", async () => {
+    await expect(connectWebSocket(instance.url, "/v1/chat/completions")).rejects.toThrow(
+      "Upgrade failed",
+    );
+  });
+
+  it("nonexistent path rejects WebSocket upgrade", async () => {
+    await expect(connectWebSocket(instance.url, "/nonexistent-path")).rejects.toThrow(
+      "Upgrade failed",
+    );
+  });
+
+  it("WS Responses: returns error for malformed JSON", async () => {
+    const ws = await connectWebSocket(instance.url, "/v1/responses");
+    ws.send("{bad json");
+    const raw = await ws.waitForMessages(1);
+    ws.close();
+    const frame = JSON.parse(raw[0]) as any;
+    expect(frame.type).toBe("error");
+  });
+
+  it("WS Realtime: returns error for malformed JSON", async () => {
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+    await ws.waitForMessages(1); // session.created
+    ws.send("{bad json");
+    const raw = await ws.waitForMessages(2);
+    ws.close();
+    const frame = JSON.parse(raw[1]) as any;
+    expect(frame.type).toBe("error");
+  });
+
+  it("WS Gemini Live: returns error for malformed JSON after setup", async () => {
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+    ws.send(geminiSetup());
+    await ws.waitForMessages(1);
+    ws.send("{bad json");
+    const raw = await ws.waitForMessages(2);
+    ws.close();
+    const frame = JSON.parse(raw[1]) as any;
+    expect(frame.error).toBeDefined();
+  });
+});
diff --git a/src/__tests__/ws-framing.test.ts b/src/__tests__/ws-framing.test.ts
new file mode 100644
index 0000000..16ff6f4
--- /dev/null
+++ b/src/__tests__/ws-framing.test.ts
@@ -0,0 +1,478 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import * as net from "node:net";
+import { randomBytes } from "node:crypto";
+import { computeAcceptKey, upgradeToWebSocket, WebSocketConnection } from "../ws-framing.js";
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function createMaskedFrame(opcode: number, payload: Buffer): Buffer {
+  const maskKey = randomBytes(4);
+  const masked = Buffer.from(payload);
+  for (let i = 0; i < masked.length; i++) {
+    masked[i] ^= maskKey[i % 4];
+  }
+
+  let header: Buffer;
+  if (payload.length < 126) {
+    header = Buffer.alloc(2);
+    header[0] = 0x80 | opcode;
+    header[1] = 0x80 | payload.length;
+  } else {
+    header = Buffer.alloc(4);
+    header[0] = 0x80 | opcode;
+    header[1] = 0x80 | 126;
+    header.writeUInt16BE(payload.length, 2);
+  }
+
+  return Buffer.concat([header, maskKey, masked]);
+}
+
+const OP_TEXT = 0x1;
+const OP_CLOSE = 0x8;
+const OP_PING = 0x9;
+const OP_PONG = 0xa;
+
+const WS_KEY = "dGhlIHNhbXBsZSBub25jZQ==";
+// SHA-1(WS_KEY + "258EAFA5-E914-47DA-95CA-5AB5DC799C07") base64-encoded
+const EXPECTED_ACCEPT = "k3rW47NEHk9UnXjYhTD7VfXrYRQ=";
+
+/**
+ * Spin up an HTTP server that upgrades to WebSocket via upgradeToWebSocket().
+ * Returns the server, its port, and a promise that resolves to the
+ * server-side WebSocketConnection once a client connects.
+ */
+function createTestServer(): {
+  server: http.Server;
+  port: () => number;
+  wsPromise: Promise<WebSocketConnection>;
+} {
+  let resolveWs: (ws: WebSocketConnection) => void;
+  const wsPromise = new Promise<WebSocketConnection>((resolve) => {
+    resolveWs = resolve;
+  });
+
+  const server = http.createServer();
+  // Suppress ECONNRESET on any server connection during teardown
+  server.on("connection", (socket) => {
+    socket.on("error", () => {});
+  });
+  server.on("upgrade", (req, socket) => {
+    socket.on("error", () => {});
+    const ws = upgradeToWebSocket(req, socket as net.Socket);
+    resolveWs(ws);
+  });
+
+  server.listen(0); // random available port
+
+  return {
+    server,
+    port: () => (server.address() as net.AddressInfo).port,
+    wsPromise,
+  };
+}
+
+/**
+ * Open a raw TCP connection to the test server and send an HTTP upgrade
+ * request.  Returns the socket and a promise that resolves with the full
+ * HTTP 101 response line + headers once the blank line is received.
+ */
+function rawConnect(
+  port: number,
+  headers?: Record<string, string>,
+): { socket: net.Socket; response: Promise<string> } {
+  const socket = net.connect({ port, host: "127.0.0.1" });
+  // Suppress ECONNRESET during teardown — the server may destroy the socket
+  socket.on("error", () => {});
+
+  const mergedHeaders: Record<string, string> = {
+    Host: "localhost",
+    Upgrade: "websocket",
+    Connection: "Upgrade",
+    "Sec-WebSocket-Version": "13",
+    "Sec-WebSocket-Key": WS_KEY,
+    ...headers,
+  };
+
+  const lines = [`GET / HTTP/1.1`];
+  for (const [k, v] of Object.entries(mergedHeaders)) {
+    lines.push(`${k}: ${v}`);
+  }
+  lines.push("", ""); // blank line terminates request
+
+  socket.write(lines.join("\r\n"));
+
+  const response = new Promise<string>((resolve) => {
+    let buf = "";
+    const onData = (chunk: Buffer) => {
+      buf += chunk.toString();
+      if (buf.includes("\r\n\r\n")) {
+        socket.removeListener("data", onData);
+        resolve(buf.slice(0, buf.indexOf("\r\n\r\n") + 4));
+      }
+    };
+    socket.on("data", onData);
+  });
+
+  return { socket, response };
+}
+
+/**
+ * Read a complete unmasked server frame from the socket.
+ * Returns { opcode, payload }.
+ */
+function readServerFrame(socket: net.Socket): Promise<{ opcode: number; payload: Buffer }> {
+  return new Promise((resolve) => {
+    let buf = Buffer.alloc(0);
+
+    const tryParse = () => {
+      if (buf.length < 2) return false;
+
+      const opcode = buf[0] & 0x0f;
+      let payloadLength = buf[1] & 0x7f;
+      let offset = 2;
+
+      if (payloadLength === 126) {
+        if (buf.length < 4) return false;
+        payloadLength = buf.readUInt16BE(2);
+        offset = 4;
+      } else if (payloadLength === 127) {
+        if (buf.length < 10) return false;
+        payloadLength = buf.readUInt32BE(6);
+        offset = 10;
+      }
+
+      if (buf.length < offset + payloadLength) return false;
+
+      const payload = buf.subarray(offset, offset + payloadLength);
+      resolve({ opcode, payload: Buffer.from(payload) });
+      return true;
+    };
+
+    const onData = (chunk: Buffer) => {
+      buf = Buffer.concat([buf, chunk]);
+      if (tryParse()) {
+        socket.removeListener("data", onData);
+      }
+    };
+    socket.on("data", onData);
+
+    // In case data is already buffered
+    tryParse();
+  });
+}
+
+// ---------------------------------------------------------------------------
+// Cleanup
+// ---------------------------------------------------------------------------
+
+const cleanupFns: (() => void)[] = [];
+
+function trackCleanup(server: http.Server, ...sockets: net.Socket[]) {
+  cleanupFns.push(() => {
+    for (const s of sockets) {
+      if (!s.destroyed) s.destroy();
+    }
+    server.close();
+  });
+}
+
+afterEach(() => {
+  for (const fn of cleanupFns) fn();
+  cleanupFns.length = 0;
+});
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe("computeAcceptKey", () => {
+  it("produces the RFC 6455 test vector", () => {
+    expect(computeAcceptKey(WS_KEY)).toBe(EXPECTED_ACCEPT);
+  });
+});
+
+describe("WebSocket handshake", () => {
+  it("responds with HTTP 101 Switching Protocols", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    const resp = await response;
+    await wsPromise;
+
+    expect(resp).toContain("HTTP/1.1 101 Switching Protocols");
+  });
+
+  it("includes correct Sec-WebSocket-Accept header", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    const resp = await response;
+    await wsPromise;
+
+    expect(resp).toContain(`Sec-WebSocket-Accept: ${EXPECTED_ACCEPT}`);
+  });
+
+  it("echoes back Sec-WebSocket-Protocol when offered", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port(), {
+      "Sec-WebSocket-Protocol": "graphql-ws, graphql-transport-ws",
+    });
+    trackCleanup(server, socket);
+
+    const resp = await response;
+    await wsPromise;
+
+    expect(resp).toContain("Sec-WebSocket-Protocol: graphql-ws");
+  });
+
+  it("does not include Sec-WebSocket-Protocol when not offered", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    const resp = await response;
+    await wsPromise;
+
+    expect(resp).not.toContain("Sec-WebSocket-Protocol:");
+  });
+});
+
+describe("frame parsing", () => {
+  it("parses a small text frame (<126 bytes)", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    await response;
+    const ws = await wsPromise;
+
+    const received = new Promise<string>((resolve) => {
+      ws.on("message", resolve);
+    });
+
+    const payload = Buffer.from("hello");
+    socket.write(createMaskedFrame(OP_TEXT, payload));
+
+    const msg = await received;
+    expect(msg).toBe("hello");
+  });
+
+  it("handles fragmented messages (continuation frames)", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    await response;
+    const ws = await wsPromise;
+
+    const received = new Promise<string>((resolve) => {
+      ws.on("message", resolve);
+    });
+
+    // Split "hello world" across 3 frames:
+    //   Frame 1: opcode=0x1 (text), FIN=0, payload="hello"
+    //   Frame 2: opcode=0x0 (continuation), FIN=0, payload=" wor"
+    //   Frame 3: opcode=0x0 (continuation), FIN=1, payload="ld"
+
+    function createMaskedFragmentFrame(opcode: number, fin: boolean, payload: Buffer): Buffer {
+      const maskKey = randomBytes(4);
+      const masked = Buffer.from(payload);
+      for (let i = 0; i < masked.length; i++) {
+        masked[i] ^= maskKey[i % 4];
+      }
+      const header = Buffer.alloc(2);
+      header[0] = (fin ? 0x80 : 0x00) | opcode;
+      header[1] = 0x80 | payload.length;
+      return Buffer.concat([header, maskKey, masked]);
+    }
+
+    // First frame: text opcode, FIN=0
+    socket.write(createMaskedFragmentFrame(0x1, false, Buffer.from("hello")));
+    // Continuation frame: opcode=0, FIN=0
+    socket.write(createMaskedFragmentFrame(0x0, false, Buffer.from(" wor")));
+    // Final continuation frame: opcode=0, FIN=1
+    socket.write(createMaskedFragmentFrame(0x0, true, Buffer.from("ld")));
+
+    const msg = await received;
+    expect(msg).toBe("hello world");
+  });
+
+  it("parses a medium text frame (126-65535 bytes, extended 16-bit length)", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    await response;
+    const ws = await wsPromise;
+
+    const received = new Promise<string>((resolve) => {
+      ws.on("message", resolve);
+    });
+
+    // Create a payload of exactly 300 bytes
+    const text = "A".repeat(300);
+    const payload = Buffer.from(text);
+    socket.write(createMaskedFrame(OP_TEXT, payload));
+
+    const msg = await received;
+    expect(msg).toBe(text);
+    expect(msg.length).toBe(300);
+  });
+
+  it("responds to ping with pong", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    await response;
+    await wsPromise;
+
+    const frameProm = readServerFrame(socket);
+
+    const pingPayload = Buffer.from("ping-data");
+    socket.write(createMaskedFrame(OP_PING, pingPayload));
+
+    const frame = await frameProm;
+    expect(frame.opcode).toBe(OP_PONG);
+    expect(frame.payload.toString()).toBe("ping-data");
+  });
+
+  it("echoes close frame back to client", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    await response;
+    const ws = await wsPromise;
+    ws.on("error", () => {});
+
+    const frameProm = readServerFrame(socket);
+
+    // Build a close frame with code 1000 and reason "bye"
+    const reason = Buffer.from("bye");
+    const closePayload = Buffer.alloc(2 + reason.length);
+    closePayload.writeUInt16BE(1000, 0);
+    reason.copy(closePayload, 2);
+
+    socket.write(createMaskedFrame(OP_CLOSE, closePayload));
+
+    const frame = await frameProm;
+    expect(frame.opcode).toBe(OP_CLOSE);
+    // Close frame should contain code 1000
+    expect(frame.payload.readUInt16BE(0)).toBe(1000);
+    expect(frame.payload.subarray(2).toString()).toBe("bye");
+  });
+});
+
+describe("server-side frame sending", () => {
+  it("sends an unmasked text frame that the client can read", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    await response;
+    const ws = await wsPromise;
+
+    const frameProm = readServerFrame(socket);
+
+    ws.send("hello from server");
+
+    const frame = await frameProm;
+    expect(frame.opcode).toBe(OP_TEXT);
+    expect(frame.payload.toString()).toBe("hello from server");
+  });
+
+  it("sends frames with extended 16-bit length for payloads >= 126 bytes", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    await response;
+    const ws = await wsPromise;
+
+    const frameProm = readServerFrame(socket);
+
+    const text = "B".repeat(200);
+    ws.send(text);
+
+    const frame = await frameProm;
+    expect(frame.opcode).toBe(OP_TEXT);
+    expect(frame.payload.toString()).toBe(text);
+  });
+});
+
+describe("connection lifecycle", () => {
+  it("emits close event when client sends close frame", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    await response;
+    const ws = await wsPromise;
+    ws.on("error", () => {});
+
+    const closeEvent = new Promise<{ code: number; reason: string }>((resolve) => {
+      ws.on("close", (code: number, reason: string) => {
+        resolve({ code, reason });
+      });
+    });
+
+    const closePayload = Buffer.alloc(2);
+    closePayload.writeUInt16BE(1000, 0);
+    socket.write(createMaskedFrame(OP_CLOSE, closePayload));
+
+    const { code, reason } = await closeEvent;
+    expect(code).toBe(1000);
+    expect(reason).toBe("");
+    expect(ws.isClosed).toBe(true);
+  });
+
+  it("server close sends close frame and marks connection closed", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    await response;
+    const ws = await wsPromise;
+    // Suppress errors from the WS connection during socket teardown
+    ws.on("error", () => {});
+
+    const frameProm = readServerFrame(socket);
+
+    ws.close(1001, "going away");
+
+    const frame = await frameProm;
+    expect(frame.opcode).toBe(OP_CLOSE);
+    expect(frame.payload.readUInt16BE(0)).toBe(1001);
+    expect(frame.payload.subarray(2).toString()).toBe("going away");
+    expect(ws.isClosed).toBe(true);
+
+    // Destroy the client socket before the server's 100ms destroy timeout
+    // fires, avoiding ECONNRESET on the server side.
+    socket.destroy();
+    await new Promise((r) => setTimeout(r, 150));
+  });
+
+  it("send() is a no-op after close", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    await response;
+    const ws = await wsPromise;
+    ws.on("error", () => {});
+
+    ws.close();
+    // Should not throw
+    ws.send("this should be ignored");
+    expect(ws.isClosed).toBe(true);
+
+    socket.destroy();
+    await new Promise((r) => setTimeout(r, 150));
+  });
+});
diff --git a/src/__tests__/ws-gemini-live.test.ts b/src/__tests__/ws-gemini-live.test.ts
new file mode 100644
index 0000000..19c6e95
--- /dev/null
+++ b/src/__tests__/ws-gemini-live.test.ts
@@ -0,0 +1,466 @@
+import { describe, it, expect, afterEach } from "vitest";
+import { createServer, type ServerInstance } from "../server.js";
+import type { Fixture } from "../types.js";
+import { connectWebSocket } from "./ws-test-client.js";
+
+// --- fixtures ---
+
+const textFixture: Fixture = {
+  match: { userMessage: "hello" },
+  response: { content: "Hi there!" },
+};
+
+const toolFixture: Fixture = {
+  match: { userMessage: "weather" },
+  response: {
+    toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+  },
+};
+
+const errorFixture: Fixture = {
+  match: { userMessage: "fail" },
+  response: {
+    error: { message: "Rate limited", type: "rate_limit_error", code: "rate_limit" },
+    status: 429,
+  },
+};
+
+const toolResultFixture: Fixture = {
+  match: { toolCallId: "call_gemini_get_weather_0" },
+  response: { content: "Weather in NYC is sunny, 72F" },
+};
+
+const allFixtures: Fixture[] = [textFixture, toolResultFixture, toolFixture, errorFixture];
+
+// --- helpers ---
+
+const GEMINI_WS_PATH =
+  "/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent";
+
+function setupMsg(model = "gemini-2.0-flash-exp"): string {
+  return JSON.stringify({
+    setup: { model },
+  });
+}
+
+function clientContentMsg(text: string): string {
+  return JSON.stringify({
+    clientContent: {
+      turns: [{ role: "user", parts: [{ text }] }],
+      turnComplete: true,
+    },
+  });
+}
+
+function toolResponseMsg(name: string, response: unknown, id?: string): string {
+  return JSON.stringify({
+    toolResponse: {
+      functionResponses: [{ id, name, response }],
+    },
+  });
+}
+
+// --- tests ---
+
+let instance: ServerInstance | null = null;
+
+afterEach(async () => {
+  if (instance) {
+    await new Promise<void>((resolve) => {
+      instance!.server.close(() => resolve());
+    });
+    instance = null;
+  }
+});
+
+describe("WebSocket Gemini Live BidiGenerateContent", () => {
+  it("responds with setupComplete after setup message", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+
+    const raw = await ws.waitForMessages(1);
+    const msg = JSON.parse(raw[0]);
+    expect(msg).toEqual({ setupComplete: {} });
+
+    ws.close();
+  });
+
+  it("streams text response with serverContent and turnComplete", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    ws.send(clientContentMsg("hello"));
+
+    // "Hi there!" is 9 chars, default chunkSize=20 → 1 chunk
+    const raw = await ws.waitForMessages(2); // setupComplete + 1 serverContent
+    const msg = JSON.parse(raw[1]);
+    expect(msg.serverContent).toBeDefined();
+    expect(msg.serverContent.modelTurn.parts[0].text).toBe("Hi there!");
+    expect(msg.serverContent.turnComplete).toBe(true);
+
+    ws.close();
+  });
+
+  it("streams text in multiple chunks when content exceeds chunkSize", async () => {
+    const longFixture: Fixture = {
+      match: { userMessage: "long" },
+      response: { content: "ABCDEFGHIJ" },
+      chunkSize: 3,
+    };
+    instance = await createServer([longFixture]);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    ws.send(clientContentMsg("long"));
+
+    // "ABCDEFGHIJ" (10 chars) / chunkSize 3 → 4 chunks: ABC, DEF, GHI, J
+    const raw = await ws.waitForMessages(5); // 1 setupComplete + 4 chunks
+    const chunks = raw.slice(1).map((r) => JSON.parse(r));
+
+    // All but last should have turnComplete: false
+    for (let i = 0; i < chunks.length - 1; i++) {
+      expect(chunks[i].serverContent.turnComplete).toBe(false);
+    }
+    // Last chunk should have turnComplete: true
+    expect(chunks[chunks.length - 1].serverContent.turnComplete).toBe(true);
+
+    // Reconstruct full text
+    const fullText = chunks.map((c) => c.serverContent.modelTurn.parts[0].text).join("");
+    expect(fullText).toBe("ABCDEFGHIJ");
+
+    ws.close();
+  });
+
+  it("returns toolCall for tool call fixture", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1);
+
+    ws.send(clientContentMsg("weather"));
+
+    const raw = await ws.waitForMessages(2); // setupComplete + toolCall
+    const msg = JSON.parse(raw[1]);
+    expect(msg.toolCall).toBeDefined();
+    expect(msg.toolCall.functionCalls).toHaveLength(1);
+    expect(msg.toolCall.functionCalls[0].name).toBe("get_weather");
+    expect(msg.toolCall.functionCalls[0].args).toEqual({ city: "NYC" });
+    expect(msg.toolCall.functionCalls[0].id).toBe("call_gemini_get_weather_0");
+
+    ws.close();
+  });
+
+  it("processes toolResponse and returns serverContent", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1);
+
+    // First get a tool call
+    ws.send(clientContentMsg("weather"));
+    await ws.waitForMessages(2); // setupComplete + toolCall
+
+    // Send tool response
+    ws.send(toolResponseMsg("get_weather", { temp: "72F" }, "call_gemini_get_weather_0"));
+
+    // "Weather in NYC is sunny, 72F" is 28 chars, default chunkSize=20 → 2 chunks
+    const raw = await ws.waitForMessages(4); // setupComplete + toolCall + 2 serverContent
+    const chunks = raw.slice(2).map((r) => JSON.parse(r));
+
+    // First chunk: turnComplete false
+    expect(chunks[0].serverContent).toBeDefined();
+    expect(chunks[0].serverContent.turnComplete).toBe(false);
+
+    // Last chunk: turnComplete true
+    expect(chunks[1].serverContent).toBeDefined();
+    expect(chunks[1].serverContent.turnComplete).toBe(true);
+
+    // Reconstruct full text
+    const fullText = chunks.map((c) => c.serverContent.modelTurn.parts[0].text).join("");
+    expect(fullText).toBe("Weather in NYC is sunny, 72F");
+
+    ws.close();
+  });
+
+  it("returns error when no fixture matches", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1);
+
+    ws.send(clientContentMsg("unknown-message-that-matches-nothing"));
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.error).toBeDefined();
+    expect(msg.error.code).toBe(404);
+    expect(msg.error.message).toBe("No fixture matched");
+    expect(msg.error.status).toBe("NOT_FOUND");
+
+    ws.close();
+  });
+
+  it("returns error for error fixture", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1);
+
+    ws.send(clientContentMsg("fail"));
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.error).toBeDefined();
+    expect(msg.error.code).toBe(429);
+    expect(msg.error.message).toBe("Rate limited");
+    expect(msg.error.status).toBe("ERROR");
+
+    ws.close();
+  });
+
+  it("records journal entries with method WS", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1);
+
+    ws.send(clientContentMsg("hello"));
+    await ws.waitForMessages(2);
+
+    // Small pause to ensure journal write completed
+    await new Promise((r) => setTimeout(r, 50));
+
+    expect(instance.journal.size).toBe(1);
+    const entry = instance.journal.getLast();
+    expect(entry!.method).toBe("WS");
+    expect(entry!.path).toBe(GEMINI_WS_PATH);
+    expect(entry!.response.status).toBe(200);
+    expect(entry!.response.fixture).toBe(textFixture);
+
+    ws.close();
+  });
+
+  it("truncateAfterChunks stops stream early, no turnComplete: true", async () => {
+    const truncFixture: Fixture = {
+      match: { userMessage: "truncate-gemini" },
+      response: { content: "ABCDEFGHIJKLMNO" }, // 15 chars, chunkSize 3 => 5 chunks
+      chunkSize: 3,
+      latency: 5,
+      truncateAfterChunks: 2,
+    };
+    instance = await createServer([truncFixture]);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    ws.send(clientContentMsg("truncate-gemini"));
+
+    // Wait for connection to be destroyed
+    await ws.waitForClose();
+
+    // Small pause for server-side processing
+    await new Promise((r) => setTimeout(r, 50));
+
+    // Check that no message with turnComplete: true was sent
+    const raw = await ws.waitForMessages(1).catch(() => [] as string[]);
+    if (raw.length > 1) {
+      const chunks = raw.slice(1).map((r) => JSON.parse(r));
+      const hasTurnComplete = chunks.some((c) => c.serverContent?.turnComplete === true);
+      expect(hasTurnComplete).toBe(false);
+    }
+  });
+
+  it("truncateAfterChunks records interrupted: true in journal", async () => {
+    const truncFixture: Fixture = {
+      match: { userMessage: "truncate-journal-gemini" },
+      response: { content: "ABCDEFGHIJKLMNO" },
+      chunkSize: 3,
+      latency: 5,
+      truncateAfterChunks: 2,
+    };
+    instance = await createServer([truncFixture]);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    ws.send(clientContentMsg("truncate-journal-gemini"));
+
+    // Wait for connection to be destroyed
+    await ws.waitForClose();
+
+    // Give server time to finalize journal
+    await new Promise((r) => setTimeout(r, 50));
+
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+
+  // Gemini Live sends all tool calls in a single WS frame, so truncateAfterChunks: 1
+  // interrupts after that frame is sent (preventing conversation history update).
+  it("truncateAfterChunks with toolCalls records interrupted: true in journal", async () => {
+    const truncFixture: Fixture = {
+      match: { userMessage: "truncate-tool-gemini" },
+      response: {
+        toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+      },
+      latency: 5,
+      truncateAfterChunks: 1,
+    };
+    instance = await createServer([truncFixture]);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    ws.send(clientContentMsg("truncate-tool-gemini"));
+
+    // Wait for connection to be destroyed
+    await ws.waitForClose();
+
+    // Give server time to finalize journal
+    await new Promise((r) => setTimeout(r, 50));
+
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+
+  it("disconnectAfterMs interrupts stream and records in journal", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "disconnect-gemini" },
+      response: { content: "ABCDEFGHIJKLMNOPQRSTUVWXYZ" },
+      chunkSize: 1,
+      latency: 20,
+      disconnectAfterMs: 30,
+    };
+    instance = await createServer([fixture]);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    ws.send(clientContentMsg("disconnect-gemini"));
+
+    await ws.waitForClose();
+    await new Promise((r) => setTimeout(r, 50));
+
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("disconnectAfterMs");
+  });
+
+  it("returns error for clientContent with missing turns", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    // Send clientContent without turns
+    ws.send(JSON.stringify({ clientContent: {} }));
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.error).toBeDefined();
+    expect(msg.error.code).toBe(400);
+    expect(msg.error.message).toBe("Missing 'turns' in clientContent");
+    expect(msg.error.status).toBe("INVALID_ARGUMENT");
+
+    ws.close();
+  });
+
+  it("returns error for clientContent with non-array turns", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    // Send clientContent with turns as a string instead of array
+    ws.send(JSON.stringify({ clientContent: { turns: "not-an-array" } }));
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.error).toBeDefined();
+    expect(msg.error.code).toBe(400);
+    expect(msg.error.message).toBe("Missing 'turns' in clientContent");
+    expect(msg.error.status).toBe("INVALID_ARGUMENT");
+
+    ws.close();
+  });
+
+  it("returns error for toolResponse with missing functionResponses", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    // Send toolResponse without functionResponses
+    ws.send(JSON.stringify({ toolResponse: {} }));
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.error).toBeDefined();
+    expect(msg.error.code).toBe(400);
+    expect(msg.error.message).toBe("Missing 'functionResponses' in toolResponse");
+    expect(msg.error.status).toBe("INVALID_ARGUMENT");
+
+    ws.close();
+  });
+
+  it("returns error for toolResponse with non-array functionResponses", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    // Send toolResponse with functionResponses as a string
+    ws.send(JSON.stringify({ toolResponse: { functionResponses: "not-an-array" } }));
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.error).toBeDefined();
+    expect(msg.error.code).toBe(400);
+    expect(msg.error.message).toBe("Missing 'functionResponses' in toolResponse");
+    expect(msg.error.status).toBe("INVALID_ARGUMENT");
+
+    ws.close();
+  });
+
+  it("returns error when message sent before setup", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    // Send clientContent without setup first
+    ws.send(clientContentMsg("hello"));
+
+    const raw = await ws.waitForMessages(1);
+    const msg = JSON.parse(raw[0]);
+    expect(msg.error).toBeDefined();
+    expect(msg.error.code).toBe(400);
+    expect(msg.error.message).toBe("Setup required");
+    expect(msg.error.status).toBe("FAILED_PRECONDITION");
+
+    ws.close();
+  });
+});
diff --git a/src/__tests__/ws-realtime.test.ts b/src/__tests__/ws-realtime.test.ts
new file mode 100644
index 0000000..ee3f5bb
--- /dev/null
+++ b/src/__tests__/ws-realtime.test.ts
@@ -0,0 +1,588 @@
+import { describe, it, expect, afterEach } from "vitest";
+import { createServer, type ServerInstance } from "../server.js";
+import type { Fixture } from "../types.js";
+import { connectWebSocket } from "./ws-test-client.js";
+
+// --- fixtures ---
+
+const textFixture: Fixture = {
+  match: { userMessage: "hello" },
+  response: { content: "Hi there!" },
+};
+
+const toolFixture: Fixture = {
+  match: { userMessage: "weather" },
+  response: {
+    toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+  },
+};
+
+const errorFixture: Fixture = {
+  match: { userMessage: "fail" },
+  response: {
+    error: { message: "Rate limited", type: "rate_limit_error", code: "rate_limit" },
+    status: 429,
+  },
+};
+
+const allFixtures: Fixture[] = [textFixture, toolFixture, errorFixture];
+
+// --- helpers ---
+
+interface WSEvent {
+  type: string;
+  event_id?: string;
+  [key: string]: unknown;
+}
+
+function parseEvents(raw: string[]): WSEvent[] {
+  return raw.map((m) => JSON.parse(m) as WSEvent);
+}
+
+function conversationItemCreate(role: string, text: string): string {
+  return JSON.stringify({
+    type: "conversation.item.create",
+    item: {
+      type: "message",
+      role,
+      content: [{ type: "input_text", text }],
+    },
+  });
+}
+
+function responseCreate(): string {
+  return JSON.stringify({ type: "response.create" });
+}
+
+function sessionUpdate(config: Record<string, unknown>): string {
+  return JSON.stringify({ type: "session.update", session: config });
+}
+
+// --- tests ---
+
+let instance: ServerInstance | null = null;
+
+afterEach(async () => {
+  if (instance) {
+    await new Promise<void>((resolve) => {
+      instance!.server.close(() => resolve());
+    });
+    instance = null;
+  }
+});
+
+// ─── Integration tests: WebSocket /v1/realtime ──────────────────────────────
+
+describe("WebSocket /v1/realtime", () => {
+  it("sends session.created on connect with correct structure", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    // The first message should be session.created, sent immediately on connect
+    const raw = await ws.waitForMessages(1);
+    const event = JSON.parse(raw[0]) as WSEvent;
+
+    expect(event.type).toBe("session.created");
+    expect(event.event_id).toBeDefined();
+    expect(typeof event.event_id).toBe("string");
+    expect((event.event_id as string).startsWith("evt-")).toBe(true);
+
+    const session = event.session as Record<string, unknown>;
+    expect(session.id).toBeDefined();
+    expect((session.id as string).startsWith("sess-")).toBe(true);
+    expect(session.modalities).toEqual(["text"]);
+    expect(session.instructions).toBe("");
+    expect(session.tools).toEqual([]);
+    expect(session.voice).toBeNull();
+    expect(session.temperature).toBe(0.8);
+
+    ws.close();
+  });
+
+  it("acknowledges session.update with session.updated", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    // Skip session.created
+    await ws.waitForMessages(1);
+
+    ws.send(
+      sessionUpdate({
+        tools: [{ type: "function", name: "get_weather" }],
+        instructions: "You are helpful.",
+      }),
+    );
+
+    const raw = await ws.waitForMessages(2);
+    const event = JSON.parse(raw[1]) as WSEvent;
+
+    expect(event.type).toBe("session.updated");
+    const session = event.session as Record<string, unknown>;
+    expect(session.instructions).toBe("You are helpful.");
+    expect(session.tools).toEqual([{ type: "function", name: "get_weather" }]);
+
+    ws.close();
+  });
+
+  it("streams text response events for conversation + response.create", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    // Skip session.created
+    await ws.waitForMessages(1);
+
+    ws.send(conversationItemCreate("user", "hello"));
+
+    // Wait for conversation.item.created ack
+    const ackRaw = await ws.waitForMessages(2);
+    const ackEvent = JSON.parse(ackRaw[1]) as WSEvent;
+    expect(ackEvent.type).toBe("conversation.item.created");
+
+    ws.send(responseCreate());
+
+    // Text stream: response.created + output_item.added + content_part.added
+    // + text.delta(s) + text.done + content_part.done + output_item.done + response.done
+    // = 8 minimum events (1 delta for small text with default chunkSize=20)
+    // Total messages: 2 (session.created + item.created) + 8 = 10
+    const allRaw = await ws.waitForMessages(10);
+    const responseEvents = parseEvents(allRaw.slice(2));
+
+    const types = responseEvents.map((e) => e.type);
+    expect(types[0]).toBe("response.created");
+    expect(types).toContain("response.output_item.added");
+    expect(types).toContain("response.content_part.added");
+    expect(types).toContain("response.text.delta");
+    expect(types).toContain("response.text.done");
+    expect(types).toContain("response.content_part.done");
+    expect(types).toContain("response.output_item.done");
+    expect(types[types.length - 1]).toBe("response.done");
+
+    // Verify text deltas reconstruct to "Hi there!"
+    const deltas = responseEvents.filter((e) => e.type === "response.text.delta");
+    const fullText = deltas.map((d) => d.delta).join("");
+    expect(fullText).toBe("Hi there!");
+
+    // Verify response.done contains completed response
+    const doneEvent = responseEvents[responseEvents.length - 1];
+    const resp = doneEvent.response as Record<string, unknown>;
+    expect(resp.status).toBe("completed");
+    expect(Array.isArray(resp.output)).toBe(true);
+
+    ws.close();
+  });
+
+  it("streams tool call events with function_call_arguments deltas", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    ws.send(conversationItemCreate("user", "weather"));
+    await ws.waitForMessages(2); // + conversation.item.created
+
+    ws.send(responseCreate());
+
+    // Tool call stream: response.created + output_item.added
+    // + function_call_arguments.delta(s) + function_call_arguments.done
+    // + output_item.done + response.done = 6 min events
+    // Total: 2 + 6 = 8
+    const allRaw = await ws.waitForMessages(8);
+    const responseEvents = parseEvents(allRaw.slice(2));
+
+    const types = responseEvents.map((e) => e.type);
+    expect(types[0]).toBe("response.created");
+    expect(types).toContain("response.output_item.added");
+    expect(types).toContain("response.function_call_arguments.delta");
+    expect(types).toContain("response.function_call_arguments.done");
+    expect(types).toContain("response.output_item.done");
+    expect(types[types.length - 1]).toBe("response.done");
+
+    // Verify argument deltas reconstruct correctly
+    const argDeltas = responseEvents.filter(
+      (e) => e.type === "response.function_call_arguments.delta",
+    );
+    const fullArgs = argDeltas.map((d) => d.delta).join("");
+    expect(fullArgs).toBe('{"city":"NYC"}');
+
+    // Verify output_item.added has function_call type
+    const addedItem = responseEvents.find((e) => e.type === "response.output_item.added");
+    const item = addedItem!.item as Record<string, unknown>;
+    expect(item.type).toBe("function_call");
+    expect(item.name).toBe("get_weather");
+
+    ws.close();
+  });
+
+  it("sends error in response.done when no fixture matches", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    ws.send(conversationItemCreate("user", "unknown-message-that-matches-nothing"));
+    await ws.waitForMessages(2); // + conversation.item.created
+
+    ws.send(responseCreate());
+
+    // response.created + response.done (failed) = 2 events
+    // Total: 2 + 2 = 4
+    const allRaw = await ws.waitForMessages(4);
+    const responseEvents = parseEvents(allRaw.slice(2));
+
+    expect(responseEvents[0].type).toBe("response.created");
+    const resp = responseEvents[0].response as Record<string, unknown>;
+    expect(resp.status).toBe("failed");
+
+    expect(responseEvents[1].type).toBe("response.done");
+    const doneResp = responseEvents[1].response as Record<string, unknown>;
+    expect(doneResp.status).toBe("failed");
+    const details = doneResp.status_details as Record<string, unknown>;
+    expect(details.type).toBe("error");
+    const error = details.error as Record<string, unknown>;
+    expect(error.message).toBe("No fixture matched");
+
+    ws.close();
+  });
+
+  it("sends error in response.done for error fixture", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    ws.send(conversationItemCreate("user", "fail"));
+    await ws.waitForMessages(2); // + conversation.item.created
+
+    ws.send(responseCreate());
+
+    // response.created + response.done (failed) = 2 events
+    // Total: 2 + 2 = 4
+    const allRaw = await ws.waitForMessages(4);
+    const responseEvents = parseEvents(allRaw.slice(2));
+
+    expect(responseEvents[0].type).toBe("response.created");
+    expect(responseEvents[1].type).toBe("response.done");
+
+    const doneResp = responseEvents[1].response as Record<string, unknown>;
+    expect(doneResp.status).toBe("failed");
+    const details = doneResp.status_details as Record<string, unknown>;
+    const error = details.error as Record<string, unknown>;
+    expect(error.message).toBe("Rate limited");
+    expect(error.type).toBe("rate_limit_error");
+
+    ws.close();
+  });
+
+  it("records journal entries with method WS and path /v1/realtime", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    ws.send(conversationItemCreate("user", "hello"));
+    await ws.waitForMessages(2); // + conversation.item.created
+
+    ws.send(responseCreate());
+
+    // Wait for full text response sequence
+    await ws.waitForMessages(10);
+    // Small pause to ensure the journal write has completed
+    await new Promise((r) => setTimeout(r, 50));
+
+    expect(instance.journal.size).toBe(1);
+    const entry = instance.journal.getLast();
+    expect(entry!.method).toBe("WS");
+    expect(entry!.path).toBe("/v1/realtime");
+    expect(entry!.response.status).toBe(200);
+    expect(entry!.response.fixture).toBe(textFixture);
+
+    ws.close();
+  });
+
+  it("concurrent response.create messages serialize correctly", async () => {
+    const fixture1: Fixture = {
+      match: { userMessage: "ser-a" },
+      response: { content: "Alpha response" },
+      chunkSize: 5,
+    };
+    const fixture2: Fixture = {
+      match: { userMessage: "ser-b" },
+      response: { content: "Bravo response" },
+      chunkSize: 5,
+    };
+    instance = await createServer([fixture1, fixture2]);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    // Add both conversation items
+    ws.send(conversationItemCreate("user", "ser-a"));
+    await ws.waitForMessages(2); // + conversation.item.created
+
+    // Now send two response.create messages rapidly without waiting
+    // The realtime handler adds "ser-a" to conversation, so the second one
+    // also sees it. To make the second match "ser-b", add it to conversation first.
+    ws.send(conversationItemCreate("user", "ser-b"));
+    await ws.waitForMessages(3); // + second conversation.item.created
+
+    // Fire two response.create messages back-to-back
+    ws.send(responseCreate());
+    ws.send(responseCreate());
+
+    // Each text response: response.created + output_item.added + content_part.added
+    // + delta(s) + text.done + content_part.done + output_item.done + response.done
+    // "Alpha response" / 5 = 3 deltas, "Bravo response" / 5 = 3 deltas
+    // So 10 events per response = 20 total, plus the 3 initial messages = 23
+    const allRaw = await ws.waitForMessages(23);
+    const responseEvents = parseEvents(allRaw.slice(3));
+
+    // Find response.done boundaries
+    const doneIndices = responseEvents
+      .map((e, i) => (e.type === "response.done" ? i : -1))
+      .filter((i) => i >= 0);
+    expect(doneIndices.length).toBe(2);
+
+    // Each batch should start with response.created and end with response.done
+    const firstBatch = responseEvents.slice(0, doneIndices[0] + 1);
+    const secondBatch = responseEvents.slice(doneIndices[0] + 1, doneIndices[1] + 1);
+
+    expect(firstBatch[0].type).toBe("response.created");
+    expect(firstBatch[firstBatch.length - 1].type).toBe("response.done");
+    expect(secondBatch[0].type).toBe("response.created");
+    expect(secondBatch[secondBatch.length - 1].type).toBe("response.done");
+
+    // Verify no interleaving: deltas in each batch should form a complete string
+    const firstDeltas = firstBatch
+      .filter((e) => e.type === "response.text.delta")
+      .map((e) => e.delta)
+      .join("");
+    const secondDeltas = secondBatch
+      .filter((e) => e.type === "response.text.delta")
+      .map((e) => e.delta)
+      .join("");
+
+    // Both responses match on the last user message, so the first response.create
+    // sees "ser-b" as last user message, the second also sees "ser-b" because
+    // the assistant response from the first gets appended. Both may match "ser-b".
+    // Actually, the conversation has ["ser-a", "ser-b"] and matching uses last user message.
+    // Both will match "ser-b". That's fine — the key assertion is no interleaving.
+    expect(firstDeltas.length).toBeGreaterThan(0);
+    expect(secondDeltas.length).toBeGreaterThan(0);
+
+    ws.close();
+  });
+
+  it("multiple tool calls in a single response", async () => {
+    const multiToolFixture: Fixture = {
+      match: { userMessage: "multi-tool-rt" },
+      response: {
+        toolCalls: [
+          { name: "get_weather", arguments: '{"city":"NYC"}' },
+          { name: "get_time", arguments: '{"tz":"EST"}' },
+        ],
+      },
+    };
+    instance = await createServer([multiToolFixture]);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    ws.send(conversationItemCreate("user", "multi-tool-rt"));
+    await ws.waitForMessages(2); // + conversation.item.created
+
+    ws.send(responseCreate());
+
+    // 2 tool calls: response.created
+    // + (output_item.added + 1 delta + arguments.done + output_item.done) * 2
+    // + response.done = 1 + 8 + 1 = 10 events
+    // Total: 2 (session.created + item.created) + 10 = 12
+    const allRaw = await ws.waitForMessages(12);
+    const responseEvents = parseEvents(allRaw.slice(2));
+
+    const types = responseEvents.map((e) => e.type);
+    expect(types[0]).toBe("response.created");
+    expect(types[types.length - 1]).toBe("response.done");
+
+    // Verify both tool calls appear in output_item.added events
+    const addedItems = responseEvents.filter((e) => e.type === "response.output_item.added");
+    expect(addedItems.length).toBe(2);
+    expect((addedItems[0].item as Record<string, unknown>).name).toBe("get_weather");
+    expect((addedItems[1].item as Record<string, unknown>).name).toBe("get_time");
+
+    // Verify argument deltas reconstruct correctly for each tool call
+    const argDoneEvents = responseEvents.filter(
+      (e) => e.type === "response.function_call_arguments.done",
+    );
+    expect(argDoneEvents.length).toBe(2);
+    expect(argDoneEvents[0].arguments).toBe('{"city":"NYC"}');
+    expect(argDoneEvents[1].arguments).toBe('{"tz":"EST"}');
+
+    // Verify output_index values are distinct
+    expect(addedItems[0].output_index).toBe(0);
+    expect(addedItems[1].output_index).toBe(1);
+
+    ws.close();
+  });
+
+  it("truncateAfterChunks stops text stream early, no response.done event", async () => {
+    const truncFixture: Fixture = {
+      match: { userMessage: "truncate-rt" },
+      response: { content: "ABCDEFGHIJKLMNO" }, // 15 chars, chunkSize 3 => 5 delta chunks
+      chunkSize: 3,
+      latency: 5,
+      truncateAfterChunks: 2,
+    };
+    instance = await createServer([truncFixture]);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    ws.send(conversationItemCreate("user", "truncate-rt"));
+    await ws.waitForMessages(2); // + conversation.item.created
+
+    ws.send(responseCreate());
+
+    // Wait for connection to be destroyed
+    await ws.waitForClose();
+
+    // Small pause for server-side processing
+    await new Promise((r) => setTimeout(r, 50));
+
+    // The connection was destroyed, so whatever messages arrived should NOT include response.done
+    // We got at least session.created + conversation.item.created = 2 before the response
+    const raw = await ws.waitForMessages(2).catch(() => [] as string[]);
+    if (raw.length > 2) {
+      const responseEvents = parseEvents(raw.slice(2));
+      const types = responseEvents.map((e) => e.type);
+      expect(types).not.toContain("response.done");
+    }
+  });
+
+  it("truncateAfterChunks records interrupted: true in journal", async () => {
+    const truncFixture: Fixture = {
+      match: { userMessage: "truncate-journal-rt" },
+      response: { content: "ABCDEFGHIJKLMNO" },
+      chunkSize: 3,
+      latency: 5,
+      truncateAfterChunks: 2,
+    };
+    instance = await createServer([truncFixture]);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    ws.send(conversationItemCreate("user", "truncate-journal-rt"));
+    await ws.waitForMessages(2); // + conversation.item.created
+
+    ws.send(responseCreate());
+
+    // Wait for connection to be destroyed
+    await ws.waitForClose();
+
+    // Give server time to finalize journal
+    await new Promise((r) => setTimeout(r, 50));
+
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+
+  it("truncateAfterChunks with toolCalls records interrupted: true in journal", async () => {
+    const truncFixture: Fixture = {
+      match: { userMessage: "truncate-tool-rt" },
+      response: {
+        toolCalls: [{ name: "search", arguments: '{"query":"hello world test string"}' }],
+      },
+      chunkSize: 3,
+      latency: 5,
+      truncateAfterChunks: 2,
+    };
+    instance = await createServer([truncFixture]);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    ws.send(conversationItemCreate("user", "truncate-tool-rt"));
+    await ws.waitForMessages(2); // + conversation.item.created
+
+    ws.send(responseCreate());
+
+    // Wait for connection to be destroyed
+    await ws.waitForClose();
+
+    // Give server time to finalize journal
+    await new Promise((r) => setTimeout(r, 50));
+
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+
+  it("disconnectAfterMs interrupts stream and records in journal", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "disconnect-rt" },
+      response: { content: "ABCDEFGHIJKLMNOPQRSTUVWXYZ" },
+      chunkSize: 1,
+      latency: 20,
+      disconnectAfterMs: 30,
+    };
+    instance = await createServer([fixture]);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    ws.send(conversationItemCreate("user", "disconnect-rt"));
+    await ws.waitForMessages(2); // + conversation.item.created
+
+    ws.send(responseCreate());
+
+    await ws.waitForClose();
+    await new Promise((r) => setTimeout(r, 50));
+
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("disconnectAfterMs");
+  });
+
+  it("accumulates conversation state across multiple response.create calls", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    // First conversation turn
+    ws.send(conversationItemCreate("user", "hello"));
+    await ws.waitForMessages(2); // + conversation.item.created
+
+    ws.send(responseCreate());
+
+    // Wait for full text response (8 events) => total 10
+    await ws.waitForMessages(10);
+
+    // Second conversation turn — add another user message
+    ws.send(conversationItemCreate("user", "weather"));
+
+    // + conversation.item.created => total 11
+    await ws.waitForMessages(11);
+
+    ws.send(responseCreate());
+
+    // Tool call response (6 events) => total 17
+    const allRaw = await ws.waitForMessages(17);
+    const secondResponseEvents = parseEvents(allRaw.slice(11));
+
+    const types = secondResponseEvents.map((e) => e.type);
+    expect(types[0]).toBe("response.created");
+    expect(types).toContain("response.function_call_arguments.delta");
+    expect(types[types.length - 1]).toBe("response.done");
+
+    // Should have 2 journal entries total
+    await new Promise((r) => setTimeout(r, 50));
+    expect(instance.journal.size).toBe(2);
+
+    ws.close();
+  });
+});
diff --git a/src/__tests__/ws-responses.test.ts b/src/__tests__/ws-responses.test.ts
new file mode 100644
index 0000000..ca9bfab
--- /dev/null
+++ b/src/__tests__/ws-responses.test.ts
@@ -0,0 +1,450 @@
+import { describe, it, expect, afterEach } from "vitest";
+import { createServer, type ServerInstance } from "../server.js";
+import type { Fixture } from "../types.js";
+import { connectWebSocket } from "./ws-test-client.js";
+
+// --- fixtures ---
+
+const textFixture: Fixture = {
+  match: { userMessage: "hello" },
+  response: { content: "Hi there!" },
+};
+
+const toolFixture: Fixture = {
+  match: { userMessage: "weather" },
+  response: {
+    toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+  },
+};
+
+const errorFixture: Fixture = {
+  match: { userMessage: "fail" },
+  response: {
+    error: { message: "Rate limited", type: "rate_limit_error", code: "rate_limit" },
+    status: 429,
+  },
+};
+
+const allFixtures: Fixture[] = [textFixture, toolFixture, errorFixture];
+
+// --- tests ---
+
+let instance: ServerInstance | null = null;
+
+afterEach(async () => {
+  if (instance) {
+    await new Promise<void>((resolve) => {
+      instance!.server.close(() => resolve());
+    });
+    instance = null;
+  }
+});
+
+function responseCreateMsg(userContent: string, model = "gpt-4"): string {
+  return JSON.stringify({
+    type: "response.create",
+    model,
+    input: [{ role: "user", content: userContent }],
+  });
+}
+
+interface WSEvent {
+  type: string;
+  [key: string]: unknown;
+}
+
+function parseEvents(raw: string[]): WSEvent[] {
+  return raw.map((m) => JSON.parse(m) as WSEvent);
+}
+
+// ─── Integration tests: WebSocket /v1/responses ──────────────────────────────
+
+describe("WebSocket /v1/responses", () => {
+  it("streams text response with correct event types", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/responses");
+
+    ws.send(responseCreateMsg("hello"));
+
+    // response.created + in_progress + output_item.added + content_part.added
+    // + delta(s) + output_text.done + content_part.done + output_item.done + response.completed
+    // At minimum 9 events (1 delta for small text with default chunk size)
+    const raw = await ws.waitForMessages(9);
+    const events = parseEvents(raw);
+
+    const types = events.map((e) => e.type);
+    expect(types[0]).toBe("response.created");
+    expect(types[1]).toBe("response.in_progress");
+    expect(types).toContain("response.output_item.added");
+    expect(types).toContain("response.content_part.added");
+    expect(types).toContain("response.output_text.delta");
+    expect(types).toContain("response.output_text.done");
+    expect(types).toContain("response.content_part.done");
+    expect(types).toContain("response.output_item.done");
+    expect(types[types.length - 1]).toBe("response.completed");
+
+    // Verify text deltas reconstruct to "Hi there!"
+    const deltas = events.filter((e) => e.type === "response.output_text.delta");
+    const fullText = deltas.map((d) => d.delta).join("");
+    expect(fullText).toBe("Hi there!");
+
+    ws.close();
+  });
+
+  it("streams tool call response with correct event types", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/responses");
+
+    ws.send(responseCreateMsg("weather"));
+
+    // response.created + in_progress + output_item.added + delta(s)
+    // + function_call_arguments.done + output_item.done + response.completed
+    // At minimum 7 events
+    const raw = await ws.waitForMessages(7);
+    const events = parseEvents(raw);
+
+    const types = events.map((e) => e.type);
+    expect(types[0]).toBe("response.created");
+    expect(types).toContain("response.output_item.added");
+    expect(types).toContain("response.function_call_arguments.delta");
+    expect(types).toContain("response.function_call_arguments.done");
+    expect(types).toContain("response.output_item.done");
+    expect(types[types.length - 1]).toBe("response.completed");
+
+    // Verify argument deltas reconstruct to '{"city":"NYC"}'
+    const argDeltas = events.filter((e) => e.type === "response.function_call_arguments.delta");
+    const fullArgs = argDeltas.map((d) => d.delta).join("");
+    expect(fullArgs).toBe('{"city":"NYC"}');
+
+    ws.close();
+  });
+
+  it("returns error event when no fixture matches", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/responses");
+
+    ws.send(responseCreateMsg("unknown-message-that-matches-nothing"));
+
+    const raw = await ws.waitForMessages(1);
+    const event = JSON.parse(raw[0]) as WSEvent;
+    expect(event.type).toBe("error");
+    expect((event.error as { message: string }).message).toBe("No fixture matched");
+
+    ws.close();
+  });
+
+  it("returns error event for error fixture", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/responses");
+
+    ws.send(responseCreateMsg("fail"));
+
+    const raw = await ws.waitForMessages(1);
+    const event = JSON.parse(raw[0]) as WSEvent;
+    expect(event.type).toBe("error");
+    expect((event.error as { message: string }).message).toBe("Rate limited");
+
+    ws.close();
+  });
+
+  it("returns error event for malformed JSON", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/responses");
+
+    ws.send("{not valid json");
+
+    const raw = await ws.waitForMessages(1);
+    const event = JSON.parse(raw[0]) as WSEvent;
+    expect(event.type).toBe("error");
+    expect((event.error as { message: string }).message).toBe("Malformed JSON");
+
+    ws.close();
+  });
+
+  it("returns error event for wrong message type", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/responses");
+
+    ws.send(JSON.stringify({ type: "unknown" }));
+
+    const raw = await ws.waitForMessages(1);
+    const event = JSON.parse(raw[0]) as WSEvent;
+    expect(event.type).toBe("error");
+    expect((event.error as { message: string }).message).toContain(
+      'Expected message type "response.create"',
+    );
+
+    ws.close();
+  });
+
+  it("records journal entries with method WS", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/responses");
+
+    ws.send(responseCreateMsg("hello"));
+
+    // Wait for all events to be delivered
+    await ws.waitForMessages(9);
+    // Small pause to ensure the journal write has completed
+    await new Promise((r) => setTimeout(r, 50));
+
+    expect(instance.journal.size).toBe(1);
+    const entry = instance.journal.getLast();
+    expect(entry!.method).toBe("WS");
+    expect(entry!.path).toBe("/v1/responses");
+    expect(entry!.response.status).toBe(200);
+    expect(entry!.response.fixture).toBe(textFixture);
+
+    ws.close();
+  });
+
+  it("handles multiple requests on same connection", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/responses");
+
+    // Send first request
+    ws.send(responseCreateMsg("hello"));
+
+    // Wait for the full text response sequence (at least 9 events)
+    const firstBatch = await ws.waitForMessages(9);
+    const firstEvents = parseEvents(firstBatch);
+    expect(firstEvents[firstEvents.length - 1].type).toBe("response.completed");
+
+    // Send second request on same connection
+    ws.send(responseCreateMsg("weather"));
+
+    // Wait for both batches of events total
+    // The first 9 are text response, then 7+ for tool call
+    const allRaw = await ws.waitForMessages(9 + 7);
+    const secondBatch = allRaw.slice(9);
+    const secondEvents = parseEvents(secondBatch);
+
+    const secondTypes = secondEvents.map((e) => e.type);
+    expect(secondTypes[0]).toBe("response.created");
+    expect(secondTypes).toContain("response.function_call_arguments.delta");
+    expect(secondTypes[secondTypes.length - 1]).toBe("response.completed");
+
+    ws.close();
+  });
+
+  it("concurrent requests don't interleave events", async () => {
+    const fixture1: Fixture = {
+      match: { userMessage: "concurrent-a" },
+      response: { content: "Response A content here" },
+      chunkSize: 5,
+    };
+    const fixture2: Fixture = {
+      match: { userMessage: "concurrent-b" },
+      response: { content: "Response B content here" },
+      chunkSize: 5,
+    };
+    instance = await createServer([fixture1, fixture2]);
+    const ws = await connectWebSocket(instance.url, "/v1/responses");
+
+    // Send two requests rapidly without waiting for the first to complete
+    ws.send(responseCreateMsg("concurrent-a"));
+    ws.send(responseCreateMsg("concurrent-b"));
+
+    // "Response A content here" = 23 chars / chunkSize 5 = 5 deltas
+    // Per response: created + in_progress + output_item.added + content_part.added
+    //   + 5 deltas + output_text.done + content_part.done + output_item.done + completed = 13
+    // Two responses = 26
+    const allRaw = await ws.waitForMessages(26);
+    const events = parseEvents(allRaw);
+
+    // Find the boundary: both response sequences end with response.completed
+    const completedIndices = events
+      .map((e, i) => (e.type === "response.completed" ? i : -1))
+      .filter((i) => i >= 0);
+    expect(completedIndices.length).toBe(2);
+
+    // All events for the first response must come before all events for the second.
+    // Verify no interleaving: events 0..completedIndices[0] belong to one response,
+    // and events completedIndices[0]+1..completedIndices[1] belong to the other.
+    const firstBatch = events.slice(0, completedIndices[0] + 1);
+    const secondBatch = events.slice(completedIndices[0] + 1, completedIndices[1] + 1);
+
+    // Each batch should start with response.created and end with response.completed
+    expect(firstBatch[0].type).toBe("response.created");
+    expect(firstBatch[firstBatch.length - 1].type).toBe("response.completed");
+    expect(secondBatch[0].type).toBe("response.created");
+    expect(secondBatch[secondBatch.length - 1].type).toBe("response.completed");
+
+    // The deltas in each batch should reconstruct to the correct content (no mixing)
+    const firstDeltas = firstBatch
+      .filter((e) => e.type === "response.output_text.delta")
+      .map((e) => e.delta)
+      .join("");
+    const secondDeltas = secondBatch
+      .filter((e) => e.type === "response.output_text.delta")
+      .map((e) => e.delta)
+      .join("");
+
+    // One should be "Response A content here" and the other "Response B content here"
+    const contents = [firstDeltas, secondDeltas].sort();
+    expect(contents).toEqual(["Response A content here", "Response B content here"]);
+
+    ws.close();
+  });
+
+  it("multiple tool calls with distinct output_index", async () => {
+    const multiToolFixture: Fixture = {
+      match: { userMessage: "multi-tool" },
+      response: {
+        toolCalls: [
+          { name: "get_weather", arguments: '{"city":"NYC"}' },
+          { name: "get_time", arguments: '{"tz":"EST"}' },
+        ],
+      },
+    };
+    instance = await createServer([multiToolFixture]);
+    const ws = await connectWebSocket(instance.url, "/v1/responses");
+
+    ws.send(responseCreateMsg("multi-tool"));
+
+    // 2 tool calls: response.created + in_progress
+    // + (output_item.added + 1 delta + arguments.done + output_item.done) * 2
+    // + response.completed = 2 + 8 + 1 = 11 events
+    const raw = await ws.waitForMessages(11);
+    const events = parseEvents(raw);
+
+    const types = events.map((e) => e.type);
+    expect(types[0]).toBe("response.created");
+    expect(types[types.length - 1]).toBe("response.completed");
+
+    // Verify both tool calls appear
+    const addedItems = events.filter((e) => e.type === "response.output_item.added");
+    expect(addedItems.length).toBe(2);
+    expect((addedItems[0].item as Record<string, unknown>).name).toBe("get_weather");
+    expect((addedItems[1].item as Record<string, unknown>).name).toBe("get_time");
+
+    // Verify output_index values are distinct
+    const outputIndices = addedItems.map((e) => e.output_index);
+    expect(outputIndices[0]).toBe(0);
+    expect(outputIndices[1]).toBe(1);
+
+    // Verify argument deltas for each tool call reconstruct correctly
+    const argDoneEvents = events.filter((e) => e.type === "response.function_call_arguments.done");
+    expect(argDoneEvents.length).toBe(2);
+    expect(argDoneEvents[0].arguments).toBe('{"city":"NYC"}');
+    expect(argDoneEvents[1].arguments).toBe('{"tz":"EST"}');
+
+    // Verify output_index on arguments.done events are distinct
+    expect(argDoneEvents[0].output_index).toBe(0);
+    expect(argDoneEvents[1].output_index).toBe(1);
+
+    ws.close();
+  });
+
+  it("rejects WebSocket upgrade on non-responses path", async () => {
+    instance = await createServer(allFixtures);
+
+    await expect(connectWebSocket(instance.url, "/v1/chat/completions")).rejects.toThrow(
+      "Upgrade failed",
+    );
+  });
+
+  it("truncateAfterChunks stops stream early, no response.completed event", async () => {
+    const truncFixture: Fixture = {
+      match: { userMessage: "truncate-ws" },
+      response: { content: "ABCDEFGHIJKLMNO" }, // 15 chars, chunkSize 3 => 5 content chunks
+      chunkSize: 3,
+      latency: 5,
+      truncateAfterChunks: 2,
+    };
+    instance = await createServer([truncFixture]);
+    const ws = await connectWebSocket(instance.url, "/v1/responses");
+
+    ws.send(responseCreateMsg("truncate-ws"));
+
+    // Wait for the connection to be destroyed
+    await ws.waitForClose();
+
+    // Small pause to ensure server-side processing completed
+    await new Promise((r) => setTimeout(r, 50));
+
+    // Collect whatever messages were received
+    // We should have some events but NOT the response.completed event
+    const raw = await ws.waitForMessages(1).catch(() => [] as string[]);
+    // If we got messages, verify no response.completed
+    if (raw.length > 0) {
+      const events = parseEvents(raw);
+      const types = events.map((e) => e.type);
+      expect(types).not.toContain("response.completed");
+    }
+  });
+
+  it("truncateAfterChunks records interrupted: true in journal", async () => {
+    const truncFixture: Fixture = {
+      match: { userMessage: "truncate-journal-ws" },
+      response: { content: "ABCDEFGHIJKLMNO" },
+      chunkSize: 3,
+      latency: 5,
+      truncateAfterChunks: 2,
+    };
+    instance = await createServer([truncFixture]);
+    const ws = await connectWebSocket(instance.url, "/v1/responses");
+
+    ws.send(responseCreateMsg("truncate-journal-ws"));
+
+    // Wait for the connection to be destroyed
+    await ws.waitForClose();
+
+    // Give server time to finalize journal
+    await new Promise((r) => setTimeout(r, 50));
+
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+
+  it("truncateAfterChunks with toolCalls records interrupted: true in journal", async () => {
+    const truncFixture: Fixture = {
+      match: { userMessage: "truncate-tool-ws" },
+      response: {
+        toolCalls: [{ name: "search", arguments: '{"query":"hello world test string"}' }],
+      },
+      chunkSize: 3,
+      latency: 5,
+      truncateAfterChunks: 2,
+    };
+    instance = await createServer([truncFixture]);
+    const ws = await connectWebSocket(instance.url, "/v1/responses");
+
+    ws.send(responseCreateMsg("truncate-tool-ws"));
+
+    // Wait for the connection to be destroyed
+    await ws.waitForClose();
+
+    // Give server time to finalize journal
+    await new Promise((r) => setTimeout(r, 50));
+
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+
+  it("disconnectAfterMs interrupts stream and records in journal", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "disconnect-ws" },
+      response: { content: "ABCDEFGHIJKLMNOPQRSTUVWXYZ" },
+      chunkSize: 1,
+      latency: 20,
+      disconnectAfterMs: 30,
+    };
+    instance = await createServer([fixture]);
+    const ws = await connectWebSocket(instance.url, "/v1/responses");
+
+    ws.send(responseCreateMsg("disconnect-ws"));
+
+    await ws.waitForClose();
+    await new Promise((r) => setTimeout(r, 50));
+
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("disconnectAfterMs");
+  });
+});
diff --git a/src/__tests__/ws-test-client.ts b/src/__tests__/ws-test-client.ts
new file mode 100644
index 0000000..5c7a4e6
--- /dev/null
+++ b/src/__tests__/ws-test-client.ts
@@ -0,0 +1,162 @@
+/**
+ * Shared WebSocket test client for integration tests.
+ *
+ * Uses raw net.Socket + manual RFC 6455 framing (no ws library dependency).
+ * Performs HTTP upgrade handshake and provides send/receive/close helpers.
+ */
+
+import * as net from "node:net";
+import { randomBytes } from "node:crypto";
+
+export interface WSTestClient {
+  send(data: string): void;
+  close(): void;
+  waitForMessages(count: number, timeoutMs?: number): Promise<string[]>;
+  waitForClose(): Promise<void>;
+}
+
+export function connectWebSocket(url: string, path: string): Promise<WSTestClient> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const socket = net.connect(parseInt(parsed.port), parsed.hostname, () => {
+      const key = randomBytes(16).toString("base64");
+      socket.write(
+        `GET ${path} HTTP/1.1\r\n` +
+          `Host: ${parsed.host}\r\n` +
+          `Upgrade: websocket\r\n` +
+          `Connection: Upgrade\r\n` +
+          `Sec-WebSocket-Key: ${key}\r\n` +
+          `Sec-WebSocket-Version: 13\r\n` +
+          `\r\n`,
+      );
+
+      let handshakeDone = false;
+      let buffer = Buffer.alloc(0);
+      const messages: string[] = [];
+      const messageResolvers: Array<() => void> = [];
+      const closeResolvers: Array<() => void> = [];
+
+      socket.on("data", (data: Buffer) => {
+        buffer = Buffer.concat([buffer, data]);
+
+        if (!handshakeDone) {
+          const headerEnd = buffer.indexOf("\r\n\r\n");
+          if (headerEnd === -1) return;
+          const headerStr = buffer.subarray(0, headerEnd).toString();
+          if (!headerStr.includes("101")) {
+            reject(new Error(`Upgrade failed: ${headerStr.split("\r\n")[0]}`));
+            return;
+          }
+          handshakeDone = true;
+          buffer = buffer.subarray(headerEnd + 4);
+
+          resolve({
+            send(data: string) {
+              // Send a masked text frame
+              const payload = Buffer.from(data, "utf-8");
+              const maskKey = randomBytes(4);
+              const masked = Buffer.from(payload);
+              for (let i = 0; i < masked.length; i++) {
+                masked[i] ^= maskKey[i % 4];
+              }
+              let header: Buffer;
+              if (payload.length < 126) {
+                header = Buffer.alloc(2);
+                header[0] = 0x81; // FIN + TEXT
+                header[1] = 0x80 | payload.length;
+              } else {
+                header = Buffer.alloc(4);
+                header[0] = 0x81;
+                header[1] = 0x80 | 126;
+                header.writeUInt16BE(payload.length, 2);
+              }
+              socket.write(Buffer.concat([header, maskKey, masked]));
+            },
+            close() {
+              // Send close frame
+              const maskKey = randomBytes(4);
+              const payload = Buffer.alloc(2);
+              payload.writeUInt16BE(1000, 0);
+              const masked = Buffer.from(payload);
+              for (let i = 0; i < masked.length; i++) {
+                masked[i] ^= maskKey[i % 4];
+              }
+              const header = Buffer.alloc(2);
+              header[0] = 0x88; // FIN + CLOSE
+              header[1] = 0x82; // MASK + 2 bytes
+              socket.write(Buffer.concat([header, maskKey, masked]));
+            },
+            waitForMessages(count: number, timeoutMs = 5000): Promise<string[]> {
+              return new Promise((resolve, reject) => {
+                let settled = false;
+                const timer = setTimeout(() => {
+                  if (!settled) {
+                    settled = true;
+                    reject(
+                      new Error(`Timeout waiting for ${count} messages, got ${messages.length}`),
+                    );
+                  }
+                }, timeoutMs);
+                const check = () => {
+                  if (!settled && messages.length >= count) {
+                    settled = true;
+                    clearTimeout(timer);
+                    resolve(messages.slice(0, count));
+                  }
+                };
+                check();
+                messageResolvers.push(check);
+              });
+            },
+            waitForClose(): Promise<void> {
+              return new Promise((resolve) => {
+                if (socket.destroyed) {
+                  resolve();
+                  return;
+                }
+                closeResolvers.push(resolve);
+              });
+            },
+          });
+        }
+
+        // Parse WebSocket frames from buffer
+        while (buffer.length >= 2) {
+          const byte0 = buffer[0];
+          const byte1 = buffer[1];
+          const opcode = byte0 & 0x0f;
+          let payloadLength = byte1 & 0x7f;
+          let offset = 2;
+
+          if (payloadLength === 126) {
+            if (buffer.length < 4) return;
+            payloadLength = buffer.readUInt16BE(2);
+            offset = 4;
+          }
+
+          // Server frames are NOT masked
+          if (buffer.length < offset + payloadLength) return;
+
+          const payload = buffer.subarray(offset, offset + payloadLength);
+          buffer = buffer.subarray(offset + payloadLength);
+
+          if (opcode === 0x1) {
+            // text
+            messages.push(payload.toString("utf-8"));
+            for (const r of messageResolvers) r();
+          } else if (opcode === 0x8) {
+            // close
+            socket.end();
+            for (const r of closeResolvers) r();
+          }
+        }
+      });
+
+      socket.on("close", () => {
+        for (const r of closeResolvers) r();
+      });
+
+      socket.on("error", reject);
+    });
+  });
+}
diff --git a/src/aws-event-stream.ts b/src/aws-event-stream.ts
new file mode 100644
index 0000000..5dc04fe
--- /dev/null
+++ b/src/aws-event-stream.ts
@@ -0,0 +1,158 @@
+/**
+ * AWS Event Stream binary frame encoder.
+ *
+ * Implements the AWS binary event stream framing protocol used by Bedrock's
+ * streaming (invoke-with-response-stream) endpoint. Each frame carries a set of
+ * string headers and a raw-bytes payload, wrapped in a prelude with CRC32
+ * checksums for integrity.
+ *
+ * Binary frame layout:
+ *   [total_length: 4B uint32-BE]
+ *   [headers_length: 4B uint32-BE]
+ *   [prelude_crc32: 4B CRC32 of first 8 bytes]
+ *   [headers: variable]
+ *   [payload: variable, raw JSON bytes]
+ *   [message_crc32: 4B CRC32 of entire frame minus last 4 bytes]
+ */
+
+import { crc32 } from "node:zlib";
+import type * as http from "node:http";
+import type { StreamingProfile } from "./types.js";
+import { delay, calculateDelay } from "./sse-writer.js";
+
+// ─── Header encoding ────────────────────────────────────────────────────────
+
+function encodeHeaders(headers: Record<string, string>): Buffer {
+  const parts: Buffer[] = [];
+  for (const [name, value] of Object.entries(headers)) {
+    const nameBytes = Buffer.from(name, "utf8");
+    const valueBytes = Buffer.from(value, "utf8");
+
+    // name_length (1 byte) + name + type (1 byte, 7 = STRING) +
+    // value_length (2 bytes BE) + value
+    const header = Buffer.alloc(1 + nameBytes.length + 1 + 2 + valueBytes.length);
+    let offset = 0;
+    header.writeUInt8(nameBytes.length, offset);
+    offset += 1;
+    nameBytes.copy(header, offset);
+    offset += nameBytes.length;
+    header.writeUInt8(7, offset); // STRING type
+    offset += 1;
+    header.writeUInt16BE(valueBytes.length, offset);
+    offset += 2;
+    valueBytes.copy(header, offset);
+
+    parts.push(header);
+  }
+  return Buffer.concat(parts);
+}
+
+// ─── Frame encoding ─────────────────────────────────────────────────────────
+
+/**
+ * Encode a single AWS Event Stream binary frame with the given headers and
+ * payload buffer.
+ */
+export function encodeEventStreamFrame(headers: Record<string, string>, payload: Buffer): Buffer {
+  const headersBuffer = encodeHeaders(headers);
+  const headersLength = headersBuffer.length;
+
+  // prelude (8) + prelude_crc (4) + headers + payload + message_crc (4)
+  const totalLength = 4 + 4 + 4 + headersLength + payload.length + 4;
+
+  const frame = Buffer.alloc(totalLength);
+  let offset = 0;
+
+  // Prelude
+  frame.writeUInt32BE(totalLength, offset);
+  offset += 4;
+  frame.writeUInt32BE(headersLength, offset);
+  offset += 4;
+
+  // Prelude CRC32 (covers first 8 bytes)
+  const preludeCrc = crc32(frame.subarray(0, 8));
+  frame.writeUInt32BE(preludeCrc >>> 0, offset);
+  offset += 4;
+
+  // Headers
+  headersBuffer.copy(frame, offset);
+  offset += headersLength;
+
+  // Payload
+  payload.copy(frame, offset);
+  offset += payload.length;
+
+  // Message CRC32 (covers entire frame minus last 4 bytes)
+  const messageCrc = crc32(frame.subarray(0, totalLength - 4));
+  frame.writeUInt32BE(messageCrc >>> 0, offset);
+
+  return frame;
+}
+
+// ─── Convenience wrappers ───────────────────────────────────────────────────
+
+/**
+ * Encode an event-stream message with standard AWS headers for a JSON event.
+ *
+ * Sets `:content-type` = `application/json`, `:event-type` = eventType,
+ * `:message-type` = `event`.
+ */
+export function encodeEventStreamMessage(eventType: string, jsonPayload: object): Buffer {
+  const headers: Record<string, string> = {
+    ":content-type": "application/json",
+    ":event-type": eventType,
+    ":message-type": "event",
+  };
+  const payload = Buffer.from(JSON.stringify(jsonPayload), "utf8");
+  return encodeEventStreamFrame(headers, payload);
+}
+
+/**
+ * Write a sequence of event-stream frames to an HTTP response with optional
+ * timing control. Mirrors the writeSSEStream pattern from sse-writer.ts.
+ *
+ * Returns `true` when all events are written (including when the response
+ * was already ended before writing began), or `false` if interrupted by
+ * the provided abort signal.
+ */
+export async function writeEventStream(
+  res: http.ServerResponse,
+  events: Array<{ eventType: string; payload: object }>,
+  options?: {
+    latency?: number;
+    streamingProfile?: StreamingProfile;
+    signal?: AbortSignal;
+    onChunkSent?: () => void;
+  },
+): Promise<boolean> {
+  const opts = options ?? {};
+  const latency = opts.latency ?? 0;
+  const profile = opts.streamingProfile;
+  const signal = opts.signal;
+  const onChunkSent = opts.onChunkSent;
+
+  if (res.writableEnded) return true;
+  res.setHeader("Content-Type", "application/vnd.amazon.eventstream");
+  res.setHeader("Transfer-Encoding", "chunked");
+
+  let chunkIndex = 0;
+  for (const event of events) {
+    const chunkDelay = calculateDelay(chunkIndex, profile, latency);
+    if (chunkDelay > 0) {
+      await delay(chunkDelay, signal);
+    }
+    if (signal?.aborted) return false;
+    if (res.writableEnded) return true;
+
+    const frame = encodeEventStreamMessage(event.eventType, event.payload);
+    res.write(frame);
+    onChunkSent?.();
+    if (signal?.aborted) return false;
+    chunkIndex++;
+  }
+
+  if (!res.writableEnded) {
+    res.end();
+  }
+  return true;
+}
diff --git a/src/bedrock-converse.ts b/src/bedrock-converse.ts
new file mode 100644
index 0000000..3f744dc
--- /dev/null
+++ b/src/bedrock-converse.ts
@@ -0,0 +1,642 @@
+/**
+ * AWS Bedrock Converse API support.
+ *
+ * Translates incoming Converse and Converse-stream requests (Bedrock Converse
+ * format) into the ChatCompletionRequest format used by the fixture router,
+ * and converts fixture responses back into Converse API format — either a
+ * single JSON response or an Event Stream binary stream.
+ */
+
+import type * as http from "node:http";
+import type {
+  ChatCompletionRequest,
+  ChatMessage,
+  Fixture,
+  HandlerDefaults,
+  ToolCall,
+  ToolDefinition,
+} from "./types.js";
+import {
+  generateToolUseId,
+  isTextResponse,
+  isToolCallResponse,
+  isErrorResponse,
+  flattenHeaders,
+} from "./helpers.js";
+import { matchFixture } from "./router.js";
+import { writeErrorResponse } from "./sse-writer.js";
+import { writeEventStream } from "./aws-event-stream.js";
+import { createInterruptionSignal } from "./interruption.js";
+import type { Journal } from "./journal.js";
+import type { Logger } from "./logger.js";
+import { applyChaos } from "./chaos.js";
+import { proxyAndRecord } from "./recorder.js";
+import { buildBedrockStreamTextEvents, buildBedrockStreamToolCallEvents } from "./bedrock.js";
+
+// ─── Converse request types ─────────────────────────────────────────────────
+
+interface ConverseContentBlock {
+  text?: string;
+  toolUse?: { toolUseId: string; name: string; input: object };
+  toolResult?: { toolUseId: string; content: { text?: string }[] };
+}
+
+interface ConverseMessage {
+  role: "user" | "assistant";
+  content: ConverseContentBlock[];
+}
+
+interface ConverseToolSpec {
+  name: string;
+  description?: string;
+  inputSchema?: object;
+}
+
+interface ConverseRequest {
+  messages: ConverseMessage[];
+  system?: { text: string }[];
+  inferenceConfig?: { maxTokens?: number; temperature?: number };
+  toolConfig?: { tools: { toolSpec: ConverseToolSpec }[] };
+}
+
+// ─── Input conversion: Converse → ChatCompletionRequest ─────────────────────
+
+export function converseToCompletionRequest(
+  req: ConverseRequest,
+  modelId: string,
+): ChatCompletionRequest {
+  const messages: ChatMessage[] = [];
+
+  // system field → system message
+  if (req.system && req.system.length > 0) {
+    const systemText = req.system.map((s) => s.text).join("");
+    if (systemText) {
+      messages.push({ role: "system", content: systemText });
+    }
+  }
+
+  for (const msg of req.messages) {
+    if (msg.role === "user") {
+      // Check for toolResult blocks
+      const toolResults = msg.content.filter((b) => b.toolResult);
+      const textBlocks = msg.content.filter((b) => b.text !== undefined && !b.toolResult);
+
+      if (toolResults.length > 0) {
+        for (const block of toolResults) {
+          const tr = block.toolResult!;
+          const resultContent = tr.content.map((c) => c.text ?? "").join("");
+          messages.push({
+            role: "tool",
+            content: resultContent,
+            tool_call_id: tr.toolUseId,
+          });
+        }
+        if (textBlocks.length > 0) {
+          messages.push({
+            role: "user",
+            content: textBlocks.map((b) => b.text ?? "").join(""),
+          });
+        }
+        continue;
+      }
+
+      // Plain user message
+      const text = msg.content
+        .filter((b) => b.text !== undefined)
+        .map((b) => b.text ?? "")
+        .join("");
+      messages.push({ role: "user", content: text });
+    } else if (msg.role === "assistant") {
+      const toolUseBlocks = msg.content.filter((b) => b.toolUse);
+      const textContent = msg.content
+        .filter((b) => b.text !== undefined)
+        .map((b) => b.text ?? "")
+        .join("");
+
+      if (toolUseBlocks.length > 0) {
+        messages.push({
+          role: "assistant",
+          content: textContent || null,
+          tool_calls: toolUseBlocks.map((b) => ({
+            id: b.toolUse!.toolUseId,
+            type: "function" as const,
+            function: {
+              name: b.toolUse!.name,
+              arguments: JSON.stringify(b.toolUse!.input),
+            },
+          })),
+        });
+      } else {
+        messages.push({ role: "assistant", content: textContent || null });
+      }
+    }
+  }
+
+  // Convert tools
+  let tools: ToolDefinition[] | undefined;
+  if (req.toolConfig?.tools && req.toolConfig.tools.length > 0) {
+    tools = req.toolConfig.tools.map((t) => ({
+      type: "function" as const,
+      function: {
+        name: t.toolSpec.name,
+        description: t.toolSpec.description,
+        parameters: t.toolSpec.inputSchema,
+      },
+    }));
+  }
+
+  return {
+    model: modelId,
+    messages,
+    stream: false,
+    temperature: req.inferenceConfig?.temperature,
+    tools,
+  };
+}
+
+// ─── Response builders ──────────────────────────────────────────────────────
+
+function buildConverseTextResponse(content: string): object {
+  return {
+    output: {
+      message: {
+        role: "assistant",
+        content: [{ text: content }],
+      },
+    },
+    stopReason: "end_turn",
+    usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 },
+  };
+}
+
+function buildConverseToolCallResponse(toolCalls: ToolCall[], logger: Logger): object {
+  return {
+    output: {
+      message: {
+        role: "assistant",
+        content: toolCalls.map((tc) => {
+          let argsObj: unknown;
+          try {
+            argsObj = JSON.parse(tc.arguments || "{}");
+          } catch {
+            logger.warn(
+              `Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`,
+            );
+            argsObj = {};
+          }
+          return {
+            toolUse: {
+              toolUseId: tc.id || generateToolUseId(),
+              name: tc.name,
+              input: argsObj,
+            },
+          };
+        }),
+      },
+    },
+    stopReason: "tool_use",
+    usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 },
+  };
+}
+
+// ─── Request handlers ───────────────────────────────────────────────────────
+
+export async function handleConverse(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  modelId: string,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: HandlerDefaults,
+  setCorsHeaders: (res: http.ServerResponse) => void,
+): Promise<void> {
+  const { logger } = defaults;
+  setCorsHeaders(res);
+
+  const urlPath = req.url ?? `/model/${modelId}/converse`;
+
+  let converseReq: ConverseRequest;
+  try {
+    converseReq = JSON.parse(raw) as ConverseRequest;
+  } catch {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: null,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Malformed JSON",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  if (!converseReq.messages || !Array.isArray(converseReq.messages)) {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: null,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Invalid request: messages array is required",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  const completionReq = converseToCompletionRequest(converseReq, modelId);
+
+  const fixture = matchFixture(
+    fixtures,
+    completionReq,
+    journal.fixtureMatchCounts,
+    defaults.requestTransform,
+  );
+
+  if (fixture) {
+    journal.incrementFixtureMatchCount(fixture, fixtures);
+  }
+
+  if (
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      {
+        method: req.method ?? "POST",
+        path: urlPath,
+        headers: flattenHeaders(req.headers),
+        body: completionReq,
+      },
+      defaults.registry,
+      defaults.logger,
+    )
+  )
+    return;
+
+  if (!fixture) {
+    if (defaults.record) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        completionReq,
+        "bedrock",
+        urlPath,
+        fixtures,
+        defaults,
+        raw,
+      );
+      if (proxied) {
+        journal.add({
+          method: req.method ?? "POST",
+          path: urlPath,
+          headers: flattenHeaders(req.headers),
+          body: completionReq,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    if (defaults.strict) {
+      logger.error(`STRICT: No fixture matched for ${req.method ?? "POST"} ${urlPath}`);
+    }
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: strictStatus, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      strictStatus,
+      JSON.stringify({
+        error: {
+          message: strictMessage,
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  const response = fixture.response;
+
+  // Error response
+  if (isErrorResponse(response)) {
+    const status = response.status ?? 500;
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status, fixture },
+    });
+    writeErrorResponse(res, status, JSON.stringify(response));
+    return;
+  }
+
+  // Text response
+  if (isTextResponse(response)) {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    const body = buildConverseTextResponse(response.content);
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify(body));
+    return;
+  }
+
+  // Tool call response
+  if (isToolCallResponse(response)) {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    const body = buildConverseToolCallResponse(response.toolCalls, logger);
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify(body));
+    return;
+  }
+
+  // Unknown response type
+  journal.add({
+    method: req.method ?? "POST",
+    path: urlPath,
+    headers: flattenHeaders(req.headers),
+    body: completionReq,
+    response: { status: 500, fixture },
+  });
+  writeErrorResponse(
+    res,
+    500,
+    JSON.stringify({
+      error: {
+        message: "Fixture response did not match any known type",
+        type: "server_error",
+      },
+    }),
+  );
+}
+
+export async function handleConverseStream(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  modelId: string,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: HandlerDefaults,
+  setCorsHeaders: (res: http.ServerResponse) => void,
+): Promise<void> {
+  const { logger } = defaults;
+  setCorsHeaders(res);
+
+  const urlPath = req.url ?? `/model/${modelId}/converse-stream`;
+
+  let converseReq: ConverseRequest;
+  try {
+    converseReq = JSON.parse(raw) as ConverseRequest;
+  } catch {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: null,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Malformed JSON",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  if (!converseReq.messages || !Array.isArray(converseReq.messages)) {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: null,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Invalid request: messages array is required",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  const completionReq = converseToCompletionRequest(converseReq, modelId);
+
+  const fixture = matchFixture(
+    fixtures,
+    completionReq,
+    journal.fixtureMatchCounts,
+    defaults.requestTransform,
+  );
+
+  if (fixture) {
+    journal.incrementFixtureMatchCount(fixture, fixtures);
+  }
+
+  if (
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      {
+        method: req.method ?? "POST",
+        path: urlPath,
+        headers: flattenHeaders(req.headers),
+        body: completionReq,
+      },
+      defaults.registry,
+      defaults.logger,
+    )
+  )
+    return;
+
+  if (!fixture) {
+    if (defaults.record) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        completionReq,
+        "bedrock",
+        urlPath,
+        fixtures,
+        defaults,
+        raw,
+      );
+      if (proxied) {
+        journal.add({
+          method: req.method ?? "POST",
+          path: urlPath,
+          headers: flattenHeaders(req.headers),
+          body: completionReq,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    if (defaults.strict) {
+      logger.error(`STRICT: No fixture matched for ${req.method ?? "POST"} ${urlPath}`);
+    }
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: strictStatus, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      strictStatus,
+      JSON.stringify({
+        error: {
+          message: strictMessage,
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  const response = fixture.response;
+  const latency = fixture.latency ?? defaults.latency;
+  const chunkSize = Math.max(1, fixture.chunkSize ?? defaults.chunkSize);
+
+  // Error response
+  if (isErrorResponse(response)) {
+    const status = response.status ?? 500;
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status, fixture },
+    });
+    writeErrorResponse(res, status, JSON.stringify(response));
+    return;
+  }
+
+  // Text response — stream as Event Stream
+  if (isTextResponse(response)) {
+    const journalEntry = journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    const events = buildBedrockStreamTextEvents(response.content, chunkSize);
+    const interruption = createInterruptionSignal(fixture);
+    const completed = await writeEventStream(res, events, {
+      latency,
+      streamingProfile: fixture.streamingProfile,
+      signal: interruption?.signal,
+      onChunkSent: interruption?.tick,
+    });
+    if (!completed) {
+      if (!res.writableEnded) res.destroy();
+      journalEntry.response.interrupted = true;
+      journalEntry.response.interruptReason = interruption?.reason();
+    }
+    interruption?.cleanup();
+    return;
+  }
+
+  // Tool call response — stream as Event Stream
+  if (isToolCallResponse(response)) {
+    const journalEntry = journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    const events = buildBedrockStreamToolCallEvents(response.toolCalls, chunkSize, logger);
+    const interruption = createInterruptionSignal(fixture);
+    const completed = await writeEventStream(res, events, {
+      latency,
+      streamingProfile: fixture.streamingProfile,
+      signal: interruption?.signal,
+      onChunkSent: interruption?.tick,
+    });
+    if (!completed) {
+      if (!res.writableEnded) res.destroy();
+      journalEntry.response.interrupted = true;
+      journalEntry.response.interruptReason = interruption?.reason();
+    }
+    interruption?.cleanup();
+    return;
+  }
+
+  // Unknown response type
+  journal.add({
+    method: req.method ?? "POST",
+    path: urlPath,
+    headers: flattenHeaders(req.headers),
+    body: completionReq,
+    response: { status: 500, fixture },
+  });
+  writeErrorResponse(
+    res,
+    500,
+    JSON.stringify({
+      error: {
+        message: "Fixture response did not match any known type",
+        type: "server_error",
+      },
+    }),
+  );
+}
diff --git a/src/bedrock.ts b/src/bedrock.ts
new file mode 100644
index 0000000..b545a70
--- /dev/null
+++ b/src/bedrock.ts
@@ -0,0 +1,802 @@
+/**
+ * AWS Bedrock Claude endpoint support — invoke and invoke-with-response-stream.
+ *
+ * Handles four Bedrock endpoint families (split across two modules):
+ *
+ *   This file (bedrock.ts):
+ *     - POST /model/{modelId}/invoke                  — non-streaming invoke
+ *     - POST /model/{modelId}/invoke-with-response-stream — binary EventStream streaming
+ *
+ *   bedrock-converse.ts:
+ *     - POST /model/{modelId}/converse                — Converse API (non-streaming)
+ *     - POST /model/{modelId}/converse-stream         — Converse API (EventStream streaming)
+ *
+ * Translates incoming Bedrock Claude format into the ChatCompletionRequest
+ * format used by the fixture router, and converts fixture responses back into
+ * the appropriate Bedrock response format (JSON for invoke, AWS Event Stream
+ * binary encoding for streaming).
+ */
+
+import type * as http from "node:http";
+import type {
+  ChatCompletionRequest,
+  ChatMessage,
+  Fixture,
+  HandlerDefaults,
+  ToolCall,
+  ToolDefinition,
+} from "./types.js";
+import {
+  generateMessageId,
+  generateToolUseId,
+  isTextResponse,
+  isToolCallResponse,
+  isErrorResponse,
+  flattenHeaders,
+} from "./helpers.js";
+import { matchFixture } from "./router.js";
+import { writeErrorResponse } from "./sse-writer.js";
+import { writeEventStream } from "./aws-event-stream.js";
+import { createInterruptionSignal } from "./interruption.js";
+import type { Journal } from "./journal.js";
+import type { Logger } from "./logger.js";
+import { applyChaos } from "./chaos.js";
+import { proxyAndRecord } from "./recorder.js";
+
+// ─── Bedrock Claude request types ────────────────────────────────────────────
+
+interface BedrockContentBlock {
+  type: "text" | "tool_use" | "tool_result" | "image" | "document";
+  text?: string;
+  id?: string;
+  name?: string;
+  input?: unknown;
+  tool_use_id?: string;
+  content?: string | BedrockContentBlock[];
+  is_error?: boolean;
+}
+
+interface BedrockMessage {
+  role: "user" | "assistant";
+  content: string | BedrockContentBlock[];
+}
+
+interface BedrockToolDef {
+  name: string;
+  description?: string;
+  input_schema?: object;
+}
+
+interface BedrockRequest {
+  anthropic_version?: string;
+  messages: BedrockMessage[];
+  system?: string | BedrockContentBlock[];
+  tools?: BedrockToolDef[];
+  tool_choice?: unknown;
+  max_tokens: number;
+  temperature?: number;
+  [key: string]: unknown;
+}
+
+// ─── Input conversion: Bedrock → ChatCompletionRequest ──────────────────────
+
+function extractTextContent(content: string | BedrockContentBlock[]): string {
+  if (typeof content === "string") return content;
+  return content
+    .filter((b) => b.type === "text")
+    .map((b) => b.text ?? "")
+    .join("");
+}
+
+export function bedrockToCompletionRequest(
+  req: BedrockRequest,
+  modelId: string,
+): ChatCompletionRequest {
+  const messages: ChatMessage[] = [];
+
+  // system field → system message
+  if (req.system) {
+    const systemText =
+      typeof req.system === "string"
+        ? req.system
+        : req.system
+            .filter((b) => b.type === "text")
+            .map((b) => b.text ?? "")
+            .join("");
+    if (systemText) {
+      messages.push({ role: "system", content: systemText });
+    }
+  }
+
+  for (const msg of req.messages) {
+    if (msg.role === "user") {
+      // Check for tool_result blocks
+      if (typeof msg.content !== "string" && Array.isArray(msg.content)) {
+        const toolResults = msg.content.filter((b) => b.type === "tool_result");
+        const textBlocks = msg.content.filter((b) => b.type === "text");
+
+        if (toolResults.length > 0) {
+          for (const tr of toolResults) {
+            const resultContent =
+              typeof tr.content === "string"
+                ? tr.content
+                : Array.isArray(tr.content)
+                  ? tr.content
+                      .filter((b) => b.type === "text")
+                      .map((b) => b.text ?? "")
+                      .join("")
+                  : "";
+            messages.push({
+              role: "tool",
+              content: resultContent,
+              tool_call_id: tr.tool_use_id,
+            });
+          }
+          if (textBlocks.length > 0) {
+            messages.push({
+              role: "user",
+              content: textBlocks.map((b) => b.text ?? "").join(""),
+            });
+          }
+          continue;
+        }
+      }
+      messages.push({
+        role: "user",
+        content: extractTextContent(msg.content),
+      });
+    } else if (msg.role === "assistant") {
+      if (typeof msg.content === "string") {
+        messages.push({ role: "assistant", content: msg.content });
+      } else if (Array.isArray(msg.content)) {
+        const toolUseBlocks = msg.content.filter((b) => b.type === "tool_use");
+        const textContent = extractTextContent(msg.content);
+
+        if (toolUseBlocks.length > 0) {
+          messages.push({
+            role: "assistant",
+            content: textContent || null,
+            tool_calls: toolUseBlocks.map((b) => ({
+              id: b.id ?? generateToolUseId(),
+              type: "function" as const,
+              function: {
+                name: b.name ?? "",
+                arguments: typeof b.input === "string" ? b.input : JSON.stringify(b.input ?? {}),
+              },
+            })),
+          });
+        } else {
+          messages.push({ role: "assistant", content: textContent || null });
+        }
+      } else {
+        messages.push({ role: "assistant", content: null });
+      }
+    }
+  }
+
+  // Convert tools
+  let tools: ToolDefinition[] | undefined;
+  if (req.tools && req.tools.length > 0) {
+    tools = req.tools.map((t) => ({
+      type: "function" as const,
+      function: {
+        name: t.name,
+        description: t.description,
+        parameters: t.input_schema,
+      },
+    }));
+  }
+
+  return {
+    model: modelId,
+    messages,
+    stream: false,
+    temperature: req.temperature,
+    tools,
+  };
+}
+
+// ─── Response builders ──────────────────────────────────────────────────────
+
+function buildBedrockTextResponse(content: string, model: string): object {
+  return {
+    id: generateMessageId(),
+    type: "message",
+    role: "assistant",
+    content: [{ type: "text", text: content }],
+    model,
+    stop_reason: "end_turn",
+    stop_sequence: null,
+    usage: { input_tokens: 0, output_tokens: 0 },
+  };
+}
+
+function buildBedrockToolCallResponse(
+  toolCalls: ToolCall[],
+  model: string,
+  logger: Logger,
+): object {
+  return {
+    id: generateMessageId(),
+    type: "message",
+    role: "assistant",
+    content: toolCalls.map((tc) => {
+      let argsObj: unknown;
+      try {
+        argsObj = JSON.parse(tc.arguments || "{}");
+      } catch {
+        logger.warn(
+          `Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`,
+        );
+        argsObj = {};
+      }
+      return {
+        type: "tool_use",
+        id: tc.id || generateToolUseId(),
+        name: tc.name,
+        input: argsObj,
+      };
+    }),
+    model,
+    stop_reason: "tool_use",
+    stop_sequence: null,
+    usage: { input_tokens: 0, output_tokens: 0 },
+  };
+}
+
+// ─── Request handler ────────────────────────────────────────────────────────
+
+export async function handleBedrock(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  modelId: string,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: HandlerDefaults,
+  setCorsHeaders: (res: http.ServerResponse) => void,
+): Promise<void> {
+  const { logger } = defaults;
+  setCorsHeaders(res);
+
+  const urlPath = req.url ?? `/model/${modelId}/invoke`;
+
+  let bedrockReq: BedrockRequest;
+  try {
+    bedrockReq = JSON.parse(raw) as BedrockRequest;
+  } catch {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: null,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Malformed JSON",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  if (!bedrockReq.messages || !Array.isArray(bedrockReq.messages)) {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: null,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Invalid request: messages array is required",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  // Convert to ChatCompletionRequest for fixture matching
+  const completionReq = bedrockToCompletionRequest(bedrockReq, modelId);
+
+  const fixture = matchFixture(
+    fixtures,
+    completionReq,
+    journal.fixtureMatchCounts,
+    defaults.requestTransform,
+  );
+
+  if (fixture) {
+    journal.incrementFixtureMatchCount(fixture, fixtures);
+  }
+
+  if (
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      {
+        method: req.method ?? "POST",
+        path: urlPath,
+        headers: flattenHeaders(req.headers),
+        body: completionReq,
+      },
+      defaults.registry,
+      defaults.logger,
+    )
+  )
+    return;
+
+  if (!fixture) {
+    if (defaults.record) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        completionReq,
+        "bedrock",
+        urlPath,
+        fixtures,
+        defaults,
+        raw,
+      );
+      if (proxied) {
+        journal.add({
+          method: req.method ?? "POST",
+          path: urlPath,
+          headers: flattenHeaders(req.headers),
+          body: completionReq,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    if (defaults.strict) {
+      logger.error(`STRICT: No fixture matched for ${req.method ?? "POST"} ${urlPath}`);
+    }
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: strictStatus, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      strictStatus,
+      JSON.stringify({
+        error: {
+          message: strictMessage,
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  const response = fixture.response;
+
+  // Error response
+  if (isErrorResponse(response)) {
+    const status = response.status ?? 500;
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status, fixture },
+    });
+    // Anthropic-style error format (Bedrock uses Claude): { type: "error", error: { type, message } }
+    const anthropicError = {
+      type: "error",
+      error: {
+        type: response.error.type ?? "api_error",
+        message: response.error.message,
+      },
+    };
+    writeErrorResponse(res, status, JSON.stringify(anthropicError));
+    return;
+  }
+
+  // Text response
+  if (isTextResponse(response)) {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    const body = buildBedrockTextResponse(response.content, completionReq.model);
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify(body));
+    return;
+  }
+
+  // Tool call response
+  if (isToolCallResponse(response)) {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    const body = buildBedrockToolCallResponse(response.toolCalls, completionReq.model, logger);
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify(body));
+    return;
+  }
+
+  // Unknown response type
+  journal.add({
+    method: req.method ?? "POST",
+    path: urlPath,
+    headers: flattenHeaders(req.headers),
+    body: completionReq,
+    response: { status: 500, fixture },
+  });
+  writeErrorResponse(
+    res,
+    500,
+    JSON.stringify({
+      error: {
+        message: "Fixture response did not match any known type",
+        type: "server_error",
+      },
+    }),
+  );
+}
+
+// ─── Streaming event builders ───────────────────────────────────────────────
+
+export function buildBedrockStreamTextEvents(
+  content: string,
+  chunkSize: number,
+): Array<{ eventType: string; payload: object }> {
+  const events: Array<{ eventType: string; payload: object }> = [];
+
+  events.push({
+    eventType: "messageStart",
+    payload: { role: "assistant" },
+  });
+
+  events.push({
+    eventType: "contentBlockStart",
+    payload: { contentBlockIndex: 0, start: {} },
+  });
+
+  for (let i = 0; i < content.length; i += chunkSize) {
+    const slice = content.slice(i, i + chunkSize);
+    events.push({
+      eventType: "contentBlockDelta",
+      payload: {
+        contentBlockIndex: 0,
+        delta: { type: "text_delta", text: slice },
+      },
+    });
+  }
+
+  events.push({
+    eventType: "contentBlockStop",
+    payload: { contentBlockIndex: 0 },
+  });
+
+  events.push({
+    eventType: "messageStop",
+    payload: { stopReason: "end_turn" },
+  });
+
+  return events;
+}
+
+export function buildBedrockStreamToolCallEvents(
+  toolCalls: ToolCall[],
+  chunkSize: number,
+  logger: Logger,
+): Array<{ eventType: string; payload: object }> {
+  const events: Array<{ eventType: string; payload: object }> = [];
+
+  events.push({
+    eventType: "messageStart",
+    payload: { role: "assistant" },
+  });
+
+  for (let tcIdx = 0; tcIdx < toolCalls.length; tcIdx++) {
+    const tc = toolCalls[tcIdx];
+    const toolUseId = tc.id || generateToolUseId();
+
+    events.push({
+      eventType: "contentBlockStart",
+      payload: {
+        contentBlockIndex: tcIdx,
+        start: {
+          toolUse: { toolUseId, name: tc.name },
+        },
+      },
+    });
+
+    let argsStr: string;
+    try {
+      const parsed = JSON.parse(tc.arguments || "{}");
+      argsStr = JSON.stringify(parsed);
+    } catch {
+      logger.warn(
+        `Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`,
+      );
+      argsStr = "{}";
+    }
+
+    for (let i = 0; i < argsStr.length; i += chunkSize) {
+      const slice = argsStr.slice(i, i + chunkSize);
+      events.push({
+        eventType: "contentBlockDelta",
+        payload: {
+          contentBlockIndex: tcIdx,
+          delta: { type: "input_json_delta", inputJSON: slice },
+        },
+      });
+    }
+
+    events.push({
+      eventType: "contentBlockStop",
+      payload: { contentBlockIndex: tcIdx },
+    });
+  }
+
+  events.push({
+    eventType: "messageStop",
+    payload: { stopReason: "tool_use" },
+  });
+
+  return events;
+}
+
+// ─── Streaming request handler ──────────────────────────────────────────────
+
+export async function handleBedrockStream(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  modelId: string,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: HandlerDefaults,
+  setCorsHeaders: (res: http.ServerResponse) => void,
+): Promise<void> {
+  const { logger } = defaults;
+  setCorsHeaders(res);
+
+  const urlPath = req.url ?? `/model/${modelId}/invoke-with-response-stream`;
+
+  let bedrockReq: BedrockRequest;
+  try {
+    bedrockReq = JSON.parse(raw) as BedrockRequest;
+  } catch {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: null,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Malformed JSON",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  if (!bedrockReq.messages || !Array.isArray(bedrockReq.messages)) {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: null,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Invalid request: messages array is required",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  const completionReq = bedrockToCompletionRequest(bedrockReq, modelId);
+
+  const fixture = matchFixture(
+    fixtures,
+    completionReq,
+    journal.fixtureMatchCounts,
+    defaults.requestTransform,
+  );
+
+  if (fixture) {
+    journal.incrementFixtureMatchCount(fixture, fixtures);
+  }
+
+  if (
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      {
+        method: req.method ?? "POST",
+        path: urlPath,
+        headers: flattenHeaders(req.headers),
+        body: completionReq,
+      },
+      defaults.registry,
+      defaults.logger,
+    )
+  )
+    return;
+
+  if (!fixture) {
+    if (defaults.record) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        completionReq,
+        "bedrock",
+        urlPath,
+        fixtures,
+        defaults,
+        raw,
+      );
+      if (proxied) {
+        journal.add({
+          method: req.method ?? "POST",
+          path: urlPath,
+          headers: flattenHeaders(req.headers),
+          body: completionReq,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    if (defaults.strict) {
+      logger.error(`STRICT: No fixture matched for ${req.method ?? "POST"} ${urlPath}`);
+    }
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: strictStatus, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      strictStatus,
+      JSON.stringify({
+        error: {
+          message: strictMessage,
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  const response = fixture.response;
+  const latency = fixture.latency ?? defaults.latency;
+  const chunkSize = Math.max(1, fixture.chunkSize ?? defaults.chunkSize);
+
+  // Error response
+  if (isErrorResponse(response)) {
+    const status = response.status ?? 500;
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status, fixture },
+    });
+    writeErrorResponse(res, status, JSON.stringify(response));
+    return;
+  }
+
+  // Text response — stream as Event Stream
+  if (isTextResponse(response)) {
+    const journalEntry = journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    const events = buildBedrockStreamTextEvents(response.content, chunkSize);
+    const interruption = createInterruptionSignal(fixture);
+    const completed = await writeEventStream(res, events, {
+      latency,
+      streamingProfile: fixture.streamingProfile,
+      signal: interruption?.signal,
+      onChunkSent: interruption?.tick,
+    });
+    if (!completed) {
+      if (!res.writableEnded) res.destroy();
+      journalEntry.response.interrupted = true;
+      journalEntry.response.interruptReason = interruption?.reason();
+    }
+    interruption?.cleanup();
+    return;
+  }
+
+  // Tool call response — stream as Event Stream
+  if (isToolCallResponse(response)) {
+    const journalEntry = journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    const events = buildBedrockStreamToolCallEvents(response.toolCalls, chunkSize, logger);
+    const interruption = createInterruptionSignal(fixture);
+    const completed = await writeEventStream(res, events, {
+      latency,
+      streamingProfile: fixture.streamingProfile,
+      signal: interruption?.signal,
+      onChunkSent: interruption?.tick,
+    });
+    if (!completed) {
+      if (!res.writableEnded) res.destroy();
+      journalEntry.response.interrupted = true;
+      journalEntry.response.interruptReason = interruption?.reason();
+    }
+    interruption?.cleanup();
+    return;
+  }
+
+  // Unknown response type
+  journal.add({
+    method: req.method ?? "POST",
+    path: urlPath,
+    headers: flattenHeaders(req.headers),
+    body: completionReq,
+    response: { status: 500, fixture },
+  });
+  writeErrorResponse(
+    res,
+    500,
+    JSON.stringify({
+      error: {
+        message: "Fixture response did not match any known type",
+        type: "server_error",
+      },
+    }),
+  );
+}
diff --git a/src/chaos.ts b/src/chaos.ts
new file mode 100644
index 0000000..93b0d55
--- /dev/null
+++ b/src/chaos.ts
@@ -0,0 +1,200 @@
+/**
+ * Chaos testing support for LLMock.
+ *
+ * Provides probabilistic failure injection — requests can be dropped (500),
+ * returned with malformed JSON, or have the connection forcibly disconnected.
+ *
+ * Precedence: per-request headers > fixture-level config > server-level defaults.
+ */
+
+import type * as http from "node:http";
+import type { ChaosAction, ChaosConfig, ChatCompletionRequest, Fixture } from "./types.js";
+import { writeErrorResponse } from "./sse-writer.js";
+import type { Journal } from "./journal.js";
+import type { Logger } from "./logger.js";
+import type { MetricsRegistry } from "./metrics.js";
+
+/**
+ * Resolve chaos config from headers, fixture, and server defaults.
+ * Header values override fixture values, which override server defaults.
+ */
+function resolveChaosConfig(
+  fixture: Fixture | null,
+  serverDefaults?: ChaosConfig,
+  rawHeaders?: http.IncomingHttpHeaders,
+  logger?: Logger,
+): ChaosConfig {
+  const base: ChaosConfig = { ...serverDefaults };
+
+  // Fixture-level overrides server defaults
+  if (fixture?.chaos) {
+    if (fixture.chaos.dropRate !== undefined) base.dropRate = fixture.chaos.dropRate;
+    if (fixture.chaos.malformedRate !== undefined) base.malformedRate = fixture.chaos.malformedRate;
+    if (fixture.chaos.disconnectRate !== undefined)
+      base.disconnectRate = fixture.chaos.disconnectRate;
+  }
+
+  // Header overrides everything
+  if (rawHeaders) {
+    const dropHeader = rawHeaders["x-llmock-chaos-drop"];
+    const malformedHeader = rawHeaders["x-llmock-chaos-malformed"];
+    const disconnectHeader = rawHeaders["x-llmock-chaos-disconnect"];
+
+    if (typeof dropHeader === "string") {
+      const val = parseFloat(dropHeader);
+      if (isNaN(val)) {
+        logger?.warn(`[chaos] x-llmock-chaos-drop: invalid value "${dropHeader}", ignoring`);
+      } else {
+        if (val < 0 || val > 1) {
+          logger?.warn(`[chaos] x-llmock-chaos-drop: value ${val} out of range [0,1], clamping`);
+        }
+        base.dropRate = Math.min(1, Math.max(0, val));
+      }
+    }
+    if (typeof malformedHeader === "string") {
+      const val = parseFloat(malformedHeader);
+      if (isNaN(val)) {
+        logger?.warn(
+          `[chaos] x-llmock-chaos-malformed: invalid value "${malformedHeader}", ignoring`,
+        );
+      } else {
+        if (val < 0 || val > 1) {
+          logger?.warn(
+            `[chaos] x-llmock-chaos-malformed: value ${val} out of range [0,1], clamping`,
+          );
+        }
+        base.malformedRate = Math.min(1, Math.max(0, val));
+      }
+    }
+    if (typeof disconnectHeader === "string") {
+      const val = parseFloat(disconnectHeader);
+      if (isNaN(val)) {
+        logger?.warn(
+          `[chaos] x-llmock-chaos-disconnect: invalid value "${disconnectHeader}", ignoring`,
+        );
+      } else {
+        if (val < 0 || val > 1) {
+          logger?.warn(
+            `[chaos] x-llmock-chaos-disconnect: value ${val} out of range [0,1], clamping`,
+          );
+        }
+        base.disconnectRate = Math.min(1, Math.max(0, val));
+      }
+    }
+  }
+
+  // Clamp all resolved rates to [0, 1] regardless of source.
+  // Header values are already clamped above; this covers fixture-level and server defaults.
+  if (base.dropRate !== undefined) base.dropRate = Math.min(1, Math.max(0, base.dropRate));
+  if (base.malformedRate !== undefined)
+    base.malformedRate = Math.min(1, Math.max(0, base.malformedRate));
+  if (base.disconnectRate !== undefined)
+    base.disconnectRate = Math.min(1, Math.max(0, base.disconnectRate));
+
+  return base;
+}
+
+/**
+ * Evaluate chaos config and return the triggered action, or null if none.
+ * Checks in order: drop, malformed, disconnect — first hit wins.
+ */
+export function evaluateChaos(
+  fixture: Fixture | null,
+  serverDefaults?: ChaosConfig,
+  rawHeaders?: http.IncomingHttpHeaders,
+  logger?: Logger,
+): ChaosAction | null {
+  const config = resolveChaosConfig(fixture, serverDefaults, rawHeaders, logger);
+
+  if (config.dropRate !== undefined && config.dropRate > 0 && Math.random() < config.dropRate) {
+    return "drop";
+  }
+  if (
+    config.malformedRate !== undefined &&
+    config.malformedRate > 0 &&
+    Math.random() < config.malformedRate
+  ) {
+    return "malformed";
+  }
+  if (
+    config.disconnectRate !== undefined &&
+    config.disconnectRate > 0 &&
+    Math.random() < config.disconnectRate
+  ) {
+    return "disconnect";
+  }
+
+  return null;
+}
+
+interface ChaosJournalContext {
+  method: string;
+  path: string;
+  headers: Record<string, string>;
+  body: ChatCompletionRequest;
+}
+
+/**
+ * Apply chaos to a request. Returns true if chaos was applied (caller should
+ * return early), false if the request should proceed normally.
+ */
+export function applyChaos(
+  res: http.ServerResponse,
+  fixture: Fixture | null,
+  serverDefaults: ChaosConfig | undefined,
+  rawHeaders: http.IncomingHttpHeaders,
+  journal: Journal,
+  context: ChaosJournalContext,
+  registry?: MetricsRegistry,
+  logger?: Logger,
+): boolean {
+  const action = evaluateChaos(fixture, serverDefaults, rawHeaders, logger);
+  if (!action) return false;
+
+  if (registry) {
+    registry.incrementCounter("llmock_chaos_triggered_total", { action });
+  }
+
+  switch (action) {
+    case "drop": {
+      journal.add({
+        ...context,
+        response: { status: 500, fixture, chaosAction: "drop" },
+      });
+      writeErrorResponse(
+        res,
+        500,
+        JSON.stringify({
+          error: {
+            message: "Chaos: request dropped",
+            type: "server_error",
+            code: "chaos_drop",
+          },
+        }),
+      );
+      return true;
+    }
+    case "malformed": {
+      journal.add({
+        ...context,
+        response: { status: 200, fixture, chaosAction: "malformed" },
+      });
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end("{malformed json: <<<chaos>>>");
+      return true;
+    }
+    case "disconnect": {
+      journal.add({
+        ...context,
+        response: { status: 0, fixture, chaosAction: "disconnect" },
+      });
+      res.destroy();
+      return true;
+    }
+    default: {
+      const _exhaustive: never = action;
+      void _exhaustive;
+      return false;
+    }
+  }
+}
diff --git a/src/cli.ts b/src/cli.ts
index 0cf8663..f06721e 100644
--- a/src/cli.ts
+++ b/src/cli.ts
@@ -3,18 +3,38 @@ import { parseArgs } from "node:util";
 import { statSync } from "node:fs";
 import { resolve } from "node:path";
 import { createServer } from "./server.js";
-import { loadFixtureFile, loadFixturesFromDir } from "./fixture-loader.js";
+import { loadFixtureFile, loadFixturesFromDir, validateFixtures } from "./fixture-loader.js";
+import { Logger, type LogLevel } from "./logger.js";
+import { watchFixtures } from "./watcher.js";
+import type { ChaosConfig, RecordConfig } from "./types.js";
 
 const HELP = `
-Usage: mock-openai [options]
+Usage: llmock [options]
 
 Options:
-  -p, --port <number>      Port to listen on (default: 4010)
-  -h, --host <string>      Host to bind to (default: 127.0.0.1)
-  -f, --fixtures <path>    Path to fixtures directory or file (default: ./fixtures)
-  -l, --latency <ms>       Latency in ms between SSE chunks (default: 0)
+  -p, --port <number>       Port to listen on (default: 4010)
+  -h, --host <string>       Host to bind to (default: 127.0.0.1)
+  -f, --fixtures <path>     Path to fixtures directory or file (default: ./fixtures)
+  -l, --latency <ms>        Latency in ms between SSE chunks (default: 0)
   -c, --chunk-size <chars>  Chunk size in characters (default: 20)
-      --help               Show this help message
+  -w, --watch               Watch fixture path for changes and reload
+      --log-level <level>   Log verbosity: silent, info, debug (default: info)
+      --validate-on-load    Validate fixture schemas at startup
+      --metrics             Enable Prometheus metrics at GET /metrics
+      --record              Record mode: proxy unmatched requests to real APIs
+      --strict              Strict mode: fail on unmatched requests
+      --provider-openai <url>     Upstream URL for OpenAI (used with --record)
+      --provider-anthropic <url>  Upstream URL for Anthropic
+      --provider-gemini <url>     Upstream URL for Gemini
+      --provider-vertexai <url>   Upstream URL for Vertex AI
+      --provider-bedrock <url>    Upstream URL for Bedrock
+      --provider-azure <url>      Upstream URL for Azure OpenAI
+      --provider-ollama <url>     Upstream URL for Ollama
+      --provider-cohere <url>     Upstream URL for Cohere
+      --chaos-drop <rate>   Probability (0-1) of dropping requests with 500
+      --chaos-malformed <rate>  Probability (0-1) of returning malformed JSON
+      --chaos-disconnect <rate> Probability (0-1) of destroying connection
+      --help                Show this help message
 `.trim();
 
 const { values } = parseArgs({
@@ -24,6 +44,23 @@ const { values } = parseArgs({
     fixtures: { type: "string", short: "f", default: "./fixtures" },
     latency: { type: "string", short: "l", default: "0" },
     "chunk-size": { type: "string", short: "c", default: "20" },
+    watch: { type: "boolean", short: "w", default: false },
+    "log-level": { type: "string", default: "info" },
+    "validate-on-load": { type: "boolean", default: false },
+    metrics: { type: "boolean", default: false },
+    record: { type: "boolean", default: false },
+    strict: { type: "boolean", default: false },
+    "provider-openai": { type: "string" },
+    "provider-anthropic": { type: "string" },
+    "provider-gemini": { type: "string" },
+    "provider-vertexai": { type: "string" },
+    "provider-bedrock": { type: "string" },
+    "provider-azure": { type: "string" },
+    "provider-ollama": { type: "string" },
+    "provider-cohere": { type: "string" },
+    "chaos-drop": { type: "string" },
+    "chaos-malformed": { type: "string" },
+    "chaos-disconnect": { type: "string" },
     help: { type: "boolean", default: false },
   },
   strict: true,
@@ -39,6 +76,15 @@ const host = values.host!;
 const latency = Number(values.latency);
 const chunkSize = Number(values["chunk-size"]);
 const fixturePath = resolve(values.fixtures!);
+const watchMode = values.watch!;
+const validateOnLoad = values["validate-on-load"]!;
+const logLevelStr = values["log-level"]!;
+
+if (!["silent", "info", "debug"].includes(logLevelStr)) {
+  console.error(`Invalid log-level: ${logLevelStr} (must be silent, info, or debug)`);
+  process.exit(1);
+}
+const logLevel = logLevelStr as LogLevel;
 
 if (Number.isNaN(port) || port < 0 || port > 65535) {
   console.error(`Invalid port: ${values.port}`);
@@ -55,34 +101,148 @@ if (Number.isNaN(chunkSize) || chunkSize < 1) {
   process.exit(1);
 }
 
+const logger = new Logger(logLevel);
+
+// Parse chaos config from CLI flags
+let chaos: ChaosConfig | undefined;
+{
+  const dropStr = values["chaos-drop"];
+  const malformedStr = values["chaos-malformed"];
+  const disconnectStr = values["chaos-disconnect"];
+
+  if (dropStr !== undefined || malformedStr !== undefined || disconnectStr !== undefined) {
+    chaos = {};
+    if (dropStr !== undefined) {
+      const val = parseFloat(dropStr);
+      if (isNaN(val) || val < 0 || val > 1) {
+        console.error(`Invalid chaos-drop: ${dropStr} (must be 0-1)`);
+        process.exit(1);
+      }
+      chaos.dropRate = val;
+    }
+    if (malformedStr !== undefined) {
+      const val = parseFloat(malformedStr);
+      if (isNaN(val) || val < 0 || val > 1) {
+        console.error(`Invalid chaos-malformed: ${malformedStr} (must be 0-1)`);
+        process.exit(1);
+      }
+      chaos.malformedRate = val;
+    }
+    if (disconnectStr !== undefined) {
+      const val = parseFloat(disconnectStr);
+      if (isNaN(val) || val < 0 || val > 1) {
+        console.error(`Invalid chaos-disconnect: ${disconnectStr} (must be 0-1)`);
+        process.exit(1);
+      }
+      chaos.disconnectRate = val;
+    }
+  }
+}
+
+// Parse record config from CLI flags
+let record: RecordConfig | undefined;
+if (values.record) {
+  const providers: RecordConfig["providers"] = {};
+  if (values["provider-openai"]) providers.openai = values["provider-openai"];
+  if (values["provider-anthropic"]) providers.anthropic = values["provider-anthropic"];
+  if (values["provider-gemini"]) providers.gemini = values["provider-gemini"];
+  if (values["provider-vertexai"]) providers.vertexai = values["provider-vertexai"];
+  if (values["provider-bedrock"]) providers.bedrock = values["provider-bedrock"];
+  if (values["provider-azure"]) providers.azure = values["provider-azure"];
+  if (values["provider-ollama"]) providers.ollama = values["provider-ollama"];
+  if (values["provider-cohere"]) providers.cohere = values["provider-cohere"];
+
+  if (Object.keys(providers).length === 0) {
+    console.error("Error: --record requires at least one --provider-* flag");
+    process.exit(1);
+  }
+
+  record = { providers, fixturePath: resolve(fixturePath, "recorded") };
+}
+
 async function main() {
   // Load fixtures from path (detect file vs directory)
+  let isDir: boolean;
   let fixtures;
   try {
     const stat = statSync(fixturePath);
-    if (stat.isDirectory()) {
-      fixtures = loadFixturesFromDir(fixturePath);
+    isDir = stat.isDirectory();
+    if (isDir) {
+      fixtures = loadFixturesFromDir(fixturePath, logger);
     } else {
-      fixtures = loadFixtureFile(fixturePath);
+      fixtures = loadFixtureFile(fixturePath, logger);
+    }
+  } catch (err) {
+    if ((err as NodeJS.ErrnoException).code === "ENOENT") {
+      console.error(`Fixtures path not found: ${fixturePath}`);
+    } else {
+      const msg = err instanceof Error ? err.message : String(err);
+      console.error(`Failed to load fixtures from ${fixturePath}: ${msg}`);
     }
-  } catch {
-    console.error(`Fixtures path not found: ${fixturePath}`);
     process.exit(1);
   }
 
-  console.log(`Loaded ${fixtures.length} fixture(s) from ${fixturePath}`);
+  if (fixtures.length === 0) {
+    if (validateOnLoad || values.strict) {
+      console.error("Error: No fixtures loaded and validation/strict mode is enabled — aborting.");
+      process.exit(1);
+    }
+    console.warn("Warning: No fixtures loaded. The server will return 404 for all requests.");
+  }
+
+  logger.info(`Loaded ${fixtures.length} fixture(s) from ${fixturePath}`);
+
+  // Validate fixtures if requested
+  if (validateOnLoad) {
+    const results = validateFixtures(fixtures);
+    const errors = results.filter((r) => r.severity === "error");
+    const warnings = results.filter((r) => r.severity === "warning");
+
+    for (const w of warnings) {
+      logger.warn(`Fixture ${w.fixtureIndex}: ${w.message}`);
+    }
+    for (const e of errors) {
+      logger.error(`Fixture ${e.fixtureIndex}: ${e.message}`);
+    }
+
+    if (errors.length > 0) {
+      console.error(`Validation failed: ${errors.length} error(s), ${warnings.length} warning(s)`);
+      process.exit(1);
+    }
+  }
 
   const instance = await createServer(fixtures, {
     port,
     host,
     latency,
     chunkSize,
+    logLevel,
+    chaos,
+    metrics: values.metrics,
+    record,
+    strict: values.strict,
   });
 
-  console.log(`Mock OpenAI server listening on ${instance.url}`);
+  logger.info(`llmock server listening on ${instance.url}`);
+
+  // Start file watcher if requested
+  let watcher: { close: () => void } | null = null;
+  if (watchMode) {
+    const loadFn = isDir!
+      ? () => loadFixturesFromDir(fixturePath, logger)
+      : () => loadFixtureFile(fixturePath, logger);
+
+    watcher = watchFixtures(fixturePath, fixtures, loadFn, {
+      logger,
+      validate: validateOnLoad,
+      validateFn: validateFixtures,
+    });
+    logger.info(`Watching ${fixturePath} for changes`);
+  }
 
   function shutdown() {
-    console.log("\nShutting down...");
+    logger.info("Shutting down...");
+    if (watcher) watcher.close();
     instance.server.close(() => {
       process.exit(0);
     });
diff --git a/src/cohere.ts b/src/cohere.ts
new file mode 100644
index 0000000..bdf9748
--- /dev/null
+++ b/src/cohere.ts
@@ -0,0 +1,650 @@
+/**
+ * Cohere v2 Chat API endpoint support.
+ *
+ * Translates incoming /v2/chat requests into the ChatCompletionRequest
+ * format used by the fixture router, and converts fixture responses back into
+ * Cohere's typed SSE streaming (or non-streaming) format.
+ *
+ * Cohere uses typed SSE events (event: + data: lines), similar to the
+ * Claude Messages handler in messages.ts.
+ */
+
+import type * as http from "node:http";
+import type {
+  ChatCompletionRequest,
+  ChatMessage,
+  Fixture,
+  HandlerDefaults,
+  StreamingProfile,
+  ToolCall,
+  ToolDefinition,
+} from "./types.js";
+import {
+  generateMessageId,
+  generateToolCallId,
+  isTextResponse,
+  isToolCallResponse,
+  isErrorResponse,
+  flattenHeaders,
+} from "./helpers.js";
+import { matchFixture } from "./router.js";
+import { writeErrorResponse, delay, calculateDelay } from "./sse-writer.js";
+import { createInterruptionSignal } from "./interruption.js";
+import type { Journal } from "./journal.js";
+import type { Logger } from "./logger.js";
+import { applyChaos } from "./chaos.js";
+import { proxyAndRecord } from "./recorder.js";
+
+// ─── Cohere v2 Chat request types ───────────────────────────────────────────
+
+interface CohereMessage {
+  role: "user" | "assistant" | "system" | "tool";
+  content: string;
+  tool_call_id?: string;
+}
+
+interface CohereToolDef {
+  type: string;
+  function: {
+    name: string;
+    description?: string;
+    parameters?: object;
+  };
+}
+
+interface CohereRequest {
+  model: string;
+  messages: CohereMessage[];
+  stream?: boolean;
+  tools?: CohereToolDef[];
+  response_format?: { type: string; json_schema?: object };
+}
+
+// ─── Cohere SSE event types ─────────────────────────────────────────────────
+
+interface CohereSSEEvent {
+  type: string;
+  [key: string]: unknown;
+}
+
+// ─── Zero-value usage block ─────────────────────────────────────────────────
+
+const ZERO_USAGE = {
+  billed_units: { input_tokens: 0, output_tokens: 0, search_units: 0, classifications: 0 },
+  tokens: { input_tokens: 0, output_tokens: 0 },
+};
+
+// ─── Input conversion: Cohere → ChatCompletionRequest ───────────────────────
+
+export function cohereToCompletionRequest(req: CohereRequest): ChatCompletionRequest {
+  const messages: ChatMessage[] = [];
+
+  for (const msg of req.messages) {
+    if (msg.role === "system") {
+      messages.push({ role: "system", content: msg.content });
+    } else if (msg.role === "user") {
+      messages.push({ role: "user", content: msg.content });
+    } else if (msg.role === "assistant") {
+      messages.push({ role: "assistant", content: msg.content });
+    } else if (msg.role === "tool") {
+      messages.push({
+        role: "tool",
+        content: msg.content,
+        tool_call_id: msg.tool_call_id,
+      });
+    }
+  }
+
+  // Convert tools
+  let tools: ToolDefinition[] | undefined;
+  if (req.tools && req.tools.length > 0) {
+    tools = req.tools.map((t) => ({
+      type: "function" as const,
+      function: {
+        name: t.function.name,
+        description: t.function.description,
+        parameters: t.function.parameters,
+      },
+    }));
+  }
+
+  return {
+    model: req.model,
+    messages,
+    stream: req.stream,
+    tools,
+  };
+}
+
+// ─── Response building: fixture → Cohere v2 Chat format ─────────────────────
+
+// Non-streaming text response
+function buildCohereTextResponse(content: string): object {
+  return {
+    id: generateMessageId(),
+    finish_reason: "COMPLETE",
+    message: {
+      role: "assistant",
+      content: [{ type: "text", text: content }],
+      tool_calls: [],
+      tool_plan: "",
+      citations: [],
+    },
+    usage: ZERO_USAGE,
+  };
+}
+
+// Non-streaming tool call response
+function buildCohereToolCallResponse(toolCalls: ToolCall[], logger: Logger): object {
+  const cohereCalls = toolCalls.map((tc) => {
+    // Validate arguments JSON
+    try {
+      JSON.parse(tc.arguments || "{}");
+    } catch {
+      logger.warn(
+        `Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`,
+      );
+    }
+    return {
+      id: tc.id || generateToolCallId(),
+      type: "function",
+      function: {
+        name: tc.name,
+        arguments: tc.arguments || "{}",
+      },
+    };
+  });
+
+  return {
+    id: generateMessageId(),
+    finish_reason: "TOOL_CALL",
+    message: {
+      role: "assistant",
+      content: [],
+      tool_calls: cohereCalls,
+      tool_plan: "",
+      citations: [],
+    },
+    usage: ZERO_USAGE,
+  };
+}
+
+// ─── Streaming event builders ───────────────────────────────────────────────
+
+function buildCohereTextStreamEvents(content: string, chunkSize: number): CohereSSEEvent[] {
+  const msgId = generateMessageId();
+  const events: CohereSSEEvent[] = [];
+
+  // message-start
+  events.push({
+    id: msgId,
+    type: "message-start",
+    delta: {
+      message: {
+        role: "assistant",
+        content: [],
+        tool_plan: "",
+        tool_calls: [],
+        citations: [],
+      },
+    },
+  });
+
+  // content-start (type: "text" only, no text field)
+  events.push({
+    type: "content-start",
+    index: 0,
+    delta: {
+      message: {
+        content: { type: "text" },
+      },
+    },
+  });
+
+  // content-delta — text chunks
+  for (let i = 0; i < content.length; i += chunkSize) {
+    const slice = content.slice(i, i + chunkSize);
+    events.push({
+      type: "content-delta",
+      index: 0,
+      delta: {
+        message: {
+          content: { type: "text", text: slice },
+        },
+      },
+    });
+  }
+
+  // content-end
+  events.push({
+    type: "content-end",
+    index: 0,
+  });
+
+  // message-end
+  events.push({
+    type: "message-end",
+    delta: {
+      finish_reason: "COMPLETE",
+      usage: ZERO_USAGE,
+    },
+  });
+
+  return events;
+}
+
+function buildCohereToolCallStreamEvents(
+  toolCalls: ToolCall[],
+  chunkSize: number,
+  logger: Logger,
+): CohereSSEEvent[] {
+  const msgId = generateMessageId();
+  const events: CohereSSEEvent[] = [];
+
+  // message-start
+  events.push({
+    id: msgId,
+    type: "message-start",
+    delta: {
+      message: {
+        role: "assistant",
+        content: [],
+        tool_plan: "",
+        tool_calls: [],
+        citations: [],
+      },
+    },
+  });
+
+  // tool-plan-delta
+  events.push({
+    type: "tool-plan-delta",
+    delta: {
+      message: {
+        tool_plan: "I will use the requested tool.",
+      },
+    },
+  });
+
+  for (let idx = 0; idx < toolCalls.length; idx++) {
+    const tc = toolCalls[idx];
+    const callId = tc.id || generateToolCallId();
+
+    // Validate arguments JSON
+    let argsJson: string;
+    try {
+      JSON.parse(tc.arguments || "{}");
+      argsJson = tc.arguments || "{}";
+    } catch {
+      logger.warn(
+        `Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`,
+      );
+      argsJson = "{}";
+    }
+
+    // tool-call-start
+    events.push({
+      type: "tool-call-start",
+      index: idx,
+      delta: {
+        message: {
+          tool_calls: {
+            id: callId,
+            type: "function",
+            function: {
+              name: tc.name,
+              arguments: "",
+            },
+          },
+        },
+      },
+    });
+
+    // tool-call-delta — chunked arguments
+    for (let i = 0; i < argsJson.length; i += chunkSize) {
+      const slice = argsJson.slice(i, i + chunkSize);
+      events.push({
+        type: "tool-call-delta",
+        index: idx,
+        delta: {
+          message: {
+            tool_calls: {
+              function: {
+                arguments: slice,
+              },
+            },
+          },
+        },
+      });
+    }
+
+    // tool-call-end
+    events.push({
+      type: "tool-call-end",
+      index: idx,
+    });
+  }
+
+  // message-end
+  events.push({
+    type: "message-end",
+    delta: {
+      finish_reason: "TOOL_CALL",
+      usage: ZERO_USAGE,
+    },
+  });
+
+  return events;
+}
+
+// ─── SSE writer for Cohere typed events ─────────────────────────────────────
+
+interface CohereStreamOptions {
+  latency?: number;
+  streamingProfile?: StreamingProfile;
+  signal?: AbortSignal;
+  onChunkSent?: () => void;
+}
+
+async function writeCohereSSEStream(
+  res: http.ServerResponse,
+  events: CohereSSEEvent[],
+  optionsOrLatency?: number | CohereStreamOptions,
+): Promise<boolean> {
+  const opts: CohereStreamOptions =
+    typeof optionsOrLatency === "number" ? { latency: optionsOrLatency } : (optionsOrLatency ?? {});
+  const latency = opts.latency ?? 0;
+  const profile = opts.streamingProfile;
+  const signal = opts.signal;
+  const onChunkSent = opts.onChunkSent;
+
+  if (res.writableEnded) return true;
+  res.setHeader("Content-Type", "text/event-stream");
+  res.setHeader("Cache-Control", "no-cache");
+  res.setHeader("Connection", "keep-alive");
+
+  let chunkIndex = 0;
+  for (const event of events) {
+    const chunkDelay = calculateDelay(chunkIndex, profile, latency);
+    if (chunkDelay > 0) await delay(chunkDelay, signal);
+    if (signal?.aborted) return false;
+    if (res.writableEnded) return true;
+    res.write(`event: ${event.type}\ndata: ${JSON.stringify(event)}\n\n`);
+    onChunkSent?.();
+    if (signal?.aborted) return false;
+    chunkIndex++;
+  }
+
+  if (!res.writableEnded) {
+    res.end();
+  }
+  return true;
+}
+
+// ─── Request handler ────────────────────────────────────────────────────────
+
+export async function handleCohere(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: HandlerDefaults,
+  setCorsHeaders: (res: http.ServerResponse) => void,
+): Promise<void> {
+  const { logger } = defaults;
+  setCorsHeaders(res);
+
+  let cohereReq: CohereRequest;
+  try {
+    cohereReq = JSON.parse(raw) as CohereRequest;
+  } catch {
+    journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v2/chat",
+      headers: flattenHeaders(req.headers),
+      body: null,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Malformed JSON",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  // Validate required model field
+  if (!cohereReq.model) {
+    journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v2/chat",
+      headers: flattenHeaders(req.headers),
+      body: null,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "model is required",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  if (!cohereReq.messages || !Array.isArray(cohereReq.messages)) {
+    journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v2/chat",
+      headers: flattenHeaders(req.headers),
+      body: null,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Invalid request: messages array is required",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  // Convert to ChatCompletionRequest for fixture matching
+  const completionReq = cohereToCompletionRequest(cohereReq);
+
+  const fixture = matchFixture(
+    fixtures,
+    completionReq,
+    journal.fixtureMatchCounts,
+    defaults.requestTransform,
+  );
+
+  if (fixture) {
+    journal.incrementFixtureMatchCount(fixture, fixtures);
+  }
+
+  if (
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      {
+        method: req.method ?? "POST",
+        path: req.url ?? "/v2/chat",
+        headers: flattenHeaders(req.headers),
+        body: completionReq,
+      },
+      defaults.registry,
+      defaults.logger,
+    )
+  )
+    return;
+
+  if (!fixture) {
+    if (defaults.record) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        completionReq,
+        "cohere",
+        req.url ?? "/v2/chat",
+        fixtures,
+        defaults,
+        raw,
+      );
+      if (proxied) {
+        journal.add({
+          method: req.method ?? "POST",
+          path: req.url ?? "/v2/chat",
+          headers: flattenHeaders(req.headers),
+          body: completionReq,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    if (defaults.strict) {
+      logger.error(
+        `STRICT: No fixture matched for ${req.method ?? "POST"} ${req.url ?? "/v2/chat"}`,
+      );
+    }
+    journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v2/chat",
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: strictStatus, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      strictStatus,
+      JSON.stringify({
+        error: {
+          message: strictMessage,
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  const response = fixture.response;
+  const latency = fixture.latency ?? defaults.latency;
+  const chunkSize = Math.max(1, fixture.chunkSize ?? defaults.chunkSize);
+
+  // Error response
+  if (isErrorResponse(response)) {
+    const status = response.status ?? 500;
+    journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v2/chat",
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status, fixture },
+    });
+    writeErrorResponse(res, status, JSON.stringify(response));
+    return;
+  }
+
+  // Text response
+  if (isTextResponse(response)) {
+    const journalEntry = journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v2/chat",
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    if (cohereReq.stream !== true) {
+      const body = buildCohereTextResponse(response.content);
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify(body));
+    } else {
+      const events = buildCohereTextStreamEvents(response.content, chunkSize);
+      const interruption = createInterruptionSignal(fixture);
+      const completed = await writeCohereSSEStream(res, events, {
+        latency,
+        streamingProfile: fixture.streamingProfile,
+        signal: interruption?.signal,
+        onChunkSent: interruption?.tick,
+      });
+      if (!completed) {
+        if (!res.writableEnded) res.destroy();
+        journalEntry.response.interrupted = true;
+        journalEntry.response.interruptReason = interruption?.reason();
+      }
+      interruption?.cleanup();
+    }
+    return;
+  }
+
+  // Tool call response
+  if (isToolCallResponse(response)) {
+    const journalEntry = journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v2/chat",
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    if (cohereReq.stream !== true) {
+      const body = buildCohereToolCallResponse(response.toolCalls, logger);
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify(body));
+    } else {
+      const events = buildCohereToolCallStreamEvents(response.toolCalls, chunkSize, logger);
+      const interruption = createInterruptionSignal(fixture);
+      const completed = await writeCohereSSEStream(res, events, {
+        latency,
+        streamingProfile: fixture.streamingProfile,
+        signal: interruption?.signal,
+        onChunkSent: interruption?.tick,
+      });
+      if (!completed) {
+        if (!res.writableEnded) res.destroy();
+        journalEntry.response.interrupted = true;
+        journalEntry.response.interruptReason = interruption?.reason();
+      }
+      interruption?.cleanup();
+    }
+    return;
+  }
+
+  // Unknown response type
+  journal.add({
+    method: req.method ?? "POST",
+    path: req.url ?? "/v2/chat",
+    headers: flattenHeaders(req.headers),
+    body: completionReq,
+    response: { status: 500, fixture },
+  });
+  writeErrorResponse(
+    res,
+    500,
+    JSON.stringify({
+      error: {
+        message: "Fixture response did not match any known type",
+        type: "server_error",
+      },
+    }),
+  );
+}
diff --git a/src/embeddings.ts b/src/embeddings.ts
new file mode 100644
index 0000000..970d140
--- /dev/null
+++ b/src/embeddings.ts
@@ -0,0 +1,241 @@
+/**
+ * OpenAI Embeddings API support for LLMock.
+ *
+ * Handles POST /v1/embeddings requests. Matches fixtures using the `inputText`
+ * field, and falls back to generating a deterministic embedding from the input
+ * text hash when no fixture matches.
+ */
+
+import type * as http from "node:http";
+import type { ChatCompletionRequest, Fixture, HandlerDefaults } from "./types.js";
+import {
+  isEmbeddingResponse,
+  isErrorResponse,
+  generateDeterministicEmbedding,
+  buildEmbeddingResponse,
+  flattenHeaders,
+} from "./helpers.js";
+import { matchFixture } from "./router.js";
+import { writeErrorResponse } from "./sse-writer.js";
+import type { Journal } from "./journal.js";
+import { applyChaos } from "./chaos.js";
+import { proxyAndRecord } from "./recorder.js";
+
+// ─── Embeddings API request types ──────────────────────────────────────────
+
+interface EmbeddingRequest {
+  input: string | string[];
+  model: string;
+  encoding_format?: "float" | "base64";
+  dimensions?: number;
+  [key: string]: unknown;
+}
+
+// ─── Request handler ───────────────────────────────────────────────────────
+
+export async function handleEmbeddings(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: HandlerDefaults,
+  setCorsHeaders: (res: http.ServerResponse) => void,
+): Promise<void> {
+  const { logger } = defaults;
+  setCorsHeaders(res);
+
+  let embeddingReq: EmbeddingRequest;
+  try {
+    embeddingReq = JSON.parse(raw) as EmbeddingRequest;
+  } catch {
+    journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v1/embeddings",
+      headers: flattenHeaders(req.headers),
+      body: null,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Malformed JSON",
+          type: "invalid_request_error",
+          code: "invalid_json",
+        },
+      }),
+    );
+    return;
+  }
+
+  // Normalize input to array of strings
+  const inputs: string[] = Array.isArray(embeddingReq.input)
+    ? embeddingReq.input
+    : [embeddingReq.input];
+
+  // Concatenate all inputs for matching purposes
+  const combinedInput = inputs.join(" ");
+
+  // Build a synthetic ChatCompletionRequest for the fixture router.
+  // We attach `embeddingInput` so the router's inputText matching can use it.
+  const syntheticReq: ChatCompletionRequest = {
+    model: embeddingReq.model,
+    messages: [],
+    embeddingInput: combinedInput,
+  };
+
+  const fixture = matchFixture(
+    fixtures,
+    syntheticReq,
+    journal.fixtureMatchCounts,
+    defaults.requestTransform,
+  );
+
+  if (fixture) {
+    journal.incrementFixtureMatchCount(fixture, fixtures);
+  }
+
+  if (
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      {
+        method: req.method ?? "POST",
+        path: req.url ?? "/v1/embeddings",
+        headers: flattenHeaders(req.headers),
+        body: syntheticReq,
+      },
+      defaults.registry,
+      defaults.logger,
+    )
+  )
+    return;
+
+  if (fixture) {
+    const response = fixture.response;
+
+    // Error response
+    if (isErrorResponse(response)) {
+      const status = response.status ?? 500;
+      journal.add({
+        method: req.method ?? "POST",
+        path: req.url ?? "/v1/embeddings",
+        headers: flattenHeaders(req.headers),
+        body: syntheticReq,
+        response: { status, fixture },
+      });
+      writeErrorResponse(res, status, JSON.stringify(response));
+      return;
+    }
+
+    // Embedding response — use the fixture's embedding for each input
+    if (isEmbeddingResponse(response)) {
+      journal.add({
+        method: req.method ?? "POST",
+        path: req.url ?? "/v1/embeddings",
+        headers: flattenHeaders(req.headers),
+        body: syntheticReq,
+        response: { status: 200, fixture },
+      });
+      const embeddings = inputs.map(() => [...response.embedding]);
+      const body = buildEmbeddingResponse(embeddings, embeddingReq.model);
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify(body));
+      return;
+    }
+
+    // Fixture matched but response type is not compatible with embeddings
+    journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v1/embeddings",
+      headers: flattenHeaders(req.headers),
+      body: syntheticReq,
+      response: { status: 500, fixture },
+    });
+    writeErrorResponse(
+      res,
+      500,
+      JSON.stringify({
+        error: {
+          message:
+            "Fixture response did not match any known embedding type (must have embedding or error)",
+          type: "server_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  // No fixture match — try record-and-replay proxy if configured
+  if (defaults.record) {
+    const proxied = await proxyAndRecord(
+      req,
+      res,
+      syntheticReq,
+      "openai",
+      req.url ?? "/v1/embeddings",
+      fixtures,
+      defaults,
+      raw,
+    );
+    if (proxied) {
+      journal.add({
+        method: req.method ?? "POST",
+        path: req.url ?? "/v1/embeddings",
+        headers: flattenHeaders(req.headers),
+        body: syntheticReq,
+        response: { status: res.statusCode ?? 200, fixture: null },
+      });
+      return;
+    }
+  }
+
+  if (defaults.strict) {
+    logger.error(
+      `STRICT: No fixture matched for ${req.method ?? "POST"} ${req.url ?? "/v1/embeddings"}`,
+    );
+    journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v1/embeddings",
+      headers: flattenHeaders(req.headers),
+      body: syntheticReq,
+      response: { status: 503, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      503,
+      JSON.stringify({
+        error: {
+          message: "Strict mode: no fixture matched",
+          type: "invalid_request_error",
+          code: "no_fixture_match",
+        },
+      }),
+    );
+    return;
+  }
+
+  // No fixture match — generate deterministic embeddings from input text
+  logger.warn(
+    `No embedding fixture matched for "${combinedInput.slice(0, 80)}" — returning deterministic fallback`,
+  );
+  const dimensions = embeddingReq.dimensions ?? 1536;
+  const embeddings = inputs.map((input) => generateDeterministicEmbedding(input, dimensions));
+
+  journal.add({
+    method: req.method ?? "POST",
+    path: req.url ?? "/v1/embeddings",
+    headers: flattenHeaders(req.headers),
+    body: syntheticReq,
+    response: { status: 200, fixture: null },
+  });
+
+  const body = buildEmbeddingResponse(embeddings, embeddingReq.model);
+  res.writeHead(200, { "Content-Type": "application/json" });
+  res.end(JSON.stringify(body));
+}
diff --git a/src/fixture-loader.ts b/src/fixture-loader.ts
index 298804c..4230b78 100644
--- a/src/fixture-loader.ts
+++ b/src/fixture-loader.ts
@@ -1,27 +1,52 @@
 import { readFileSync, readdirSync, statSync } from "node:fs";
 import { join } from "node:path";
 import type { Fixture, FixtureFile, FixtureFileEntry } from "./types.js";
+import {
+  isTextResponse,
+  isToolCallResponse,
+  isErrorResponse,
+  isEmbeddingResponse,
+} from "./helpers.js";
+import type { Logger } from "./logger.js";
 
 function entryToFixture(entry: FixtureFileEntry): Fixture {
   return {
     match: {
       userMessage: entry.match.userMessage,
+      inputText: entry.match.inputText,
       toolCallId: entry.match.toolCallId,
       toolName: entry.match.toolName,
       model: entry.match.model,
+      responseFormat: entry.match.responseFormat,
+      ...(entry.match.sequenceIndex !== undefined && { sequenceIndex: entry.match.sequenceIndex }),
     },
     response: entry.response,
     ...(entry.latency !== undefined && { latency: entry.latency }),
     ...(entry.chunkSize !== undefined && { chunkSize: entry.chunkSize }),
+    ...(entry.truncateAfterChunks !== undefined && {
+      truncateAfterChunks: entry.truncateAfterChunks,
+    }),
+    ...(entry.disconnectAfterMs !== undefined && { disconnectAfterMs: entry.disconnectAfterMs }),
+    ...(entry.streamingProfile !== undefined && { streamingProfile: entry.streamingProfile }),
+    ...(entry.chaos !== undefined && { chaos: entry.chaos }),
   };
 }
 
-export function loadFixtureFile(filePath: string): Fixture[] {
+// Logging helper — uses logger if provided, falls back to console.warn.
+function warn(logger: Logger | undefined, msg: string, ...rest: unknown[]): void {
+  if (logger) {
+    logger.warn(msg, ...rest);
+  } else {
+    console.warn(`[fixture-loader] ${msg}`, ...rest);
+  }
+}
+
+export function loadFixtureFile(filePath: string, logger?: Logger): Fixture[] {
   let raw: string;
   try {
     raw = readFileSync(filePath, "utf-8");
   } catch (err) {
-    console.warn(`[fixture-loader] Could not read file ${filePath}:`, err);
+    warn(logger, `Could not read file ${filePath}:`, err);
     return [];
   }
 
@@ -29,7 +54,7 @@ export function loadFixtureFile(filePath: string): Fixture[] {
   try {
     parsed = JSON.parse(raw);
   } catch (err) {
-    console.warn(`[fixture-loader] Invalid JSON in ${filePath}:`, err);
+    warn(logger, `Invalid JSON in ${filePath}:`, err);
     return [];
   }
 
@@ -38,19 +63,19 @@ export function loadFixtureFile(filePath: string): Fixture[] {
     parsed === null ||
     !Array.isArray((parsed as FixtureFile).fixtures)
   ) {
-    console.warn(`[fixture-loader] Missing or invalid "fixtures" array in ${filePath}`);
+    warn(logger, `Missing or invalid "fixtures" array in ${filePath}`);
     return [];
   }
 
   return (parsed as FixtureFile).fixtures.map(entryToFixture);
 }
 
-export function loadFixturesFromDir(dirPath: string): Fixture[] {
+export function loadFixturesFromDir(dirPath: string, logger?: Logger): Fixture[] {
   let entries: string[];
   try {
     entries = readdirSync(dirPath);
   } catch (err) {
-    console.warn(`[fixture-loader] Could not read directory ${dirPath}:`, err);
+    warn(logger, `Could not read directory ${dirPath}:`, err);
     return [];
   }
 
@@ -59,15 +84,13 @@ export function loadFixturesFromDir(dirPath: string): Fixture[] {
     const fullPath = join(dirPath, name);
     try {
       if (statSync(fullPath).isDirectory()) {
-        console.warn(
-          `[fixture-loader] Skipping subdirectory ${fullPath} (fixtures are not loaded recursively)`,
-        );
+        warn(logger, `Skipping subdirectory ${fullPath} (fixtures are not loaded recursively)`);
         continue;
       }
     } catch (err) {
       const code = (err as NodeJS.ErrnoException).code;
       if (code !== "ENOENT") {
-        console.warn(`[fixture-loader] Could not stat ${fullPath}:`, err);
+        warn(logger, `Could not stat ${fullPath}:`, err);
       }
       continue;
     }
@@ -80,8 +103,242 @@ export function loadFixturesFromDir(dirPath: string): Fixture[] {
   const fixtures: Fixture[] = [];
   for (const name of jsonFiles) {
     const filePath = join(dirPath, name);
-    fixtures.push(...loadFixtureFile(filePath));
+    fixtures.push(...loadFixtureFile(filePath, logger));
   }
 
   return fixtures;
 }
+
+// ---------------------------------------------------------------------------
+// Fixture validation
+// ---------------------------------------------------------------------------
+
+export interface ValidationResult {
+  severity: "error" | "warning";
+  fixtureIndex: number;
+  message: string;
+}
+
+export function validateFixtures(fixtures: Fixture[]): ValidationResult[] {
+  const results: ValidationResult[] = [];
+
+  const seenUserMessages = new Map<string, number>();
+
+  for (let i = 0; i < fixtures.length; i++) {
+    const f = fixtures[i];
+    const response = f.response;
+
+    // --- Error checks ---
+
+    // Response type recognition
+    if (
+      !isTextResponse(response) &&
+      !isToolCallResponse(response) &&
+      !isErrorResponse(response) &&
+      !isEmbeddingResponse(response)
+    ) {
+      results.push({
+        severity: "error",
+        fixtureIndex: i,
+        message:
+          "response is not a recognized type (must have content, toolCalls, error, or embedding)",
+      });
+    }
+
+    // Text response checks
+    if (isTextResponse(response)) {
+      if (response.content === "") {
+        results.push({
+          severity: "error",
+          fixtureIndex: i,
+          message: "content is empty string",
+        });
+      }
+    }
+
+    // Tool call response checks
+    if (isToolCallResponse(response)) {
+      if (response.toolCalls.length === 0) {
+        results.push({
+          severity: "warning",
+          fixtureIndex: i,
+          message: "toolCalls array is empty — fixture will never produce tool calls",
+        });
+      }
+      for (let j = 0; j < response.toolCalls.length; j++) {
+        const tc = response.toolCalls[j];
+        if (!tc.name) {
+          results.push({
+            severity: "error",
+            fixtureIndex: i,
+            message: `toolCalls[${j}].name is empty`,
+          });
+        }
+        try {
+          JSON.parse(tc.arguments);
+        } catch {
+          results.push({
+            severity: "error",
+            fixtureIndex: i,
+            message: `toolCalls[${j}].arguments is not valid JSON: ${tc.arguments}`,
+          });
+        }
+      }
+    }
+
+    // Error response checks
+    if (isErrorResponse(response)) {
+      if (!response.error.message) {
+        results.push({
+          severity: "error",
+          fixtureIndex: i,
+          message: "error.message is empty",
+        });
+      }
+      if (response.status !== undefined && (response.status < 100 || response.status > 599)) {
+        results.push({
+          severity: "error",
+          fixtureIndex: i,
+          message: `error status ${response.status} is not a valid HTTP status code`,
+        });
+      }
+    }
+
+    // Embedding response checks
+    if (isEmbeddingResponse(response)) {
+      if (response.embedding.length === 0) {
+        results.push({
+          severity: "error",
+          fixtureIndex: i,
+          message: "embedding array is empty",
+        });
+      }
+      for (let j = 0; j < response.embedding.length; j++) {
+        if (typeof response.embedding[j] !== "number") {
+          results.push({
+            severity: "error",
+            fixtureIndex: i,
+            message: `embedding[${j}] is not a number`,
+          });
+          break; // one error is enough
+        }
+      }
+    }
+
+    // Numeric sanity checks
+    if (f.latency !== undefined && f.latency < 0) {
+      results.push({
+        severity: "error",
+        fixtureIndex: i,
+        message: "latency must be >= 0",
+      });
+    }
+    if (f.chunkSize !== undefined && f.chunkSize < 1) {
+      results.push({
+        severity: "error",
+        fixtureIndex: i,
+        message: "chunkSize must be >= 1",
+      });
+    }
+    if (f.truncateAfterChunks !== undefined && f.truncateAfterChunks < 1) {
+      results.push({
+        severity: "error",
+        fixtureIndex: i,
+        message: "truncateAfterChunks must be >= 1",
+      });
+    }
+    if (f.disconnectAfterMs !== undefined && f.disconnectAfterMs < 0) {
+      results.push({
+        severity: "error",
+        fixtureIndex: i,
+        message: "disconnectAfterMs must be >= 0",
+      });
+    }
+    if (f.streamingProfile !== undefined) {
+      const sp = f.streamingProfile;
+      if (sp.ttft !== undefined && sp.ttft < 0) {
+        results.push({
+          severity: "error",
+          fixtureIndex: i,
+          message: "streamingProfile.ttft must be >= 0",
+        });
+      }
+      if (sp.tps !== undefined && sp.tps <= 0) {
+        results.push({
+          severity: "error",
+          fixtureIndex: i,
+          message: "streamingProfile.tps must be > 0",
+        });
+      }
+      if (sp.jitter !== undefined && (sp.jitter < 0 || sp.jitter > 1)) {
+        results.push({
+          severity: "error",
+          fixtureIndex: i,
+          message: "streamingProfile.jitter must be between 0 and 1",
+        });
+      }
+    }
+    if (f.chaos !== undefined) {
+      const ch = f.chaos;
+      if (ch.dropRate !== undefined && (ch.dropRate < 0 || ch.dropRate > 1)) {
+        results.push({
+          severity: "error",
+          fixtureIndex: i,
+          message: "chaos.dropRate must be between 0 and 1",
+        });
+      }
+      if (ch.malformedRate !== undefined && (ch.malformedRate < 0 || ch.malformedRate > 1)) {
+        results.push({
+          severity: "error",
+          fixtureIndex: i,
+          message: "chaos.malformedRate must be between 0 and 1",
+        });
+      }
+      if (ch.disconnectRate !== undefined && (ch.disconnectRate < 0 || ch.disconnectRate > 1)) {
+        results.push({
+          severity: "error",
+          fixtureIndex: i,
+          message: "chaos.disconnectRate must be between 0 and 1",
+        });
+      }
+    }
+
+    // --- Warning checks ---
+
+    // Duplicate userMessage shadowing
+    const um = f.match.userMessage;
+    if (typeof um === "string" && um) {
+      const prev = seenUserMessages.get(um);
+      if (prev !== undefined) {
+        results.push({
+          severity: "warning",
+          fixtureIndex: i,
+          message: `duplicate userMessage '${um}' — shadows fixture ${prev}`,
+        });
+      } else {
+        seenUserMessages.set(um, i);
+      }
+    }
+
+    // Catch-all not in last position
+    const match = f.match;
+    const hasDiscriminator =
+      match.userMessage !== undefined ||
+      match.inputText !== undefined ||
+      match.responseFormat !== undefined ||
+      match.toolCallId !== undefined ||
+      match.toolName !== undefined ||
+      match.model !== undefined ||
+      match.predicate !== undefined;
+
+    if (!hasDiscriminator && i < fixtures.length - 1) {
+      results.push({
+        severity: "warning",
+        fixtureIndex: i,
+        message: `empty match acts as catch-all but is not the last fixture — shadows fixtures ${i + 1}+`,
+      });
+    }
+  }
+
+  return results;
+}
diff --git a/src/gemini.ts b/src/gemini.ts
new file mode 100644
index 0000000..5e5493c
--- /dev/null
+++ b/src/gemini.ts
@@ -0,0 +1,609 @@
+/**
+ * Google Gemini GenerateContent API support.
+ *
+ * Translates incoming Gemini requests into the ChatCompletionRequest format
+ * used by the fixture router, and converts fixture responses back into the
+ * Gemini GenerateContent streaming (or non-streaming) format.
+ */
+
+import type * as http from "node:http";
+import type {
+  ChatCompletionRequest,
+  ChatMessage,
+  Fixture,
+  HandlerDefaults,
+  RecordProviderKey,
+  StreamingProfile,
+  ToolCall,
+  ToolDefinition,
+} from "./types.js";
+import {
+  isTextResponse,
+  isToolCallResponse,
+  isErrorResponse,
+  generateToolCallId,
+  flattenHeaders,
+} from "./helpers.js";
+import { matchFixture } from "./router.js";
+import { writeErrorResponse, delay, calculateDelay } from "./sse-writer.js";
+import { createInterruptionSignal } from "./interruption.js";
+import type { Journal } from "./journal.js";
+import type { Logger } from "./logger.js";
+import { applyChaos } from "./chaos.js";
+import { proxyAndRecord } from "./recorder.js";
+
+// ─── Gemini request types ───────────────────────────────────────────────────
+
+interface GeminiPart {
+  text?: string;
+  functionCall?: { name: string; args: Record<string, unknown>; id?: string };
+  functionResponse?: { name: string; response: unknown };
+}
+
+interface GeminiContent {
+  role?: string;
+  parts: GeminiPart[];
+}
+
+interface GeminiFunctionDeclaration {
+  name: string;
+  description?: string;
+  parameters?: object;
+}
+
+interface GeminiToolDef {
+  functionDeclarations?: GeminiFunctionDeclaration[];
+}
+
+interface GeminiRequest {
+  contents?: GeminiContent[];
+  systemInstruction?: GeminiContent;
+  tools?: GeminiToolDef[];
+  generationConfig?: {
+    temperature?: number;
+    maxOutputTokens?: number;
+    [key: string]: unknown;
+  };
+  [key: string]: unknown;
+}
+
+// ─── Input conversion: Gemini → ChatCompletions messages ────────────────────
+
+export function geminiToCompletionRequest(
+  req: GeminiRequest,
+  model: string,
+  stream: boolean,
+): ChatCompletionRequest {
+  const messages: ChatMessage[] = [];
+
+  // systemInstruction → system message
+  if (req.systemInstruction) {
+    const text = req.systemInstruction.parts
+      .filter((p) => p.text !== undefined)
+      .map((p) => p.text!)
+      .join("");
+    if (text) {
+      messages.push({ role: "system", content: text });
+    }
+  }
+
+  if (req.contents) {
+    for (const content of req.contents) {
+      const role = content.role ?? "user";
+
+      if (role === "user") {
+        // Check for functionResponse parts
+        const funcResponses = content.parts.filter((p) => p.functionResponse);
+        const textParts = content.parts.filter((p) => p.text !== undefined);
+
+        if (funcResponses.length > 0) {
+          // functionResponse → tool message
+          for (let i = 0; i < funcResponses.length; i++) {
+            const part = funcResponses[i];
+            messages.push({
+              role: "tool",
+              content:
+                typeof part.functionResponse!.response === "string"
+                  ? part.functionResponse!.response
+                  : JSON.stringify(part.functionResponse!.response),
+              tool_call_id: `call_gemini_${part.functionResponse!.name}_${i}`,
+            });
+          }
+          // Any text parts alongside → user message
+          if (textParts.length > 0) {
+            messages.push({
+              role: "user",
+              content: textParts.map((p) => p.text!).join(""),
+            });
+          }
+        } else {
+          // Regular user text
+          const text = textParts.map((p) => p.text!).join("");
+          messages.push({ role: "user", content: text });
+        }
+      } else if (role === "model") {
+        // Check for functionCall parts
+        const funcCalls = content.parts.filter((p) => p.functionCall);
+        const textParts = content.parts.filter((p) => p.text !== undefined);
+
+        if (funcCalls.length > 0) {
+          messages.push({
+            role: "assistant",
+            content: null,
+            tool_calls: funcCalls.map((p, i) => ({
+              id: `call_gemini_${p.functionCall!.name}_${i}`,
+              type: "function" as const,
+              function: {
+                name: p.functionCall!.name,
+                arguments: JSON.stringify(p.functionCall!.args),
+              },
+            })),
+          });
+        } else {
+          const text = textParts.map((p) => p.text!).join("");
+          messages.push({ role: "assistant", content: text });
+        }
+      }
+    }
+  }
+
+  // Convert tools
+  let tools: ToolDefinition[] | undefined;
+  if (req.tools && req.tools.length > 0) {
+    const decls = req.tools.flatMap((t) => t.functionDeclarations ?? []);
+    if (decls.length > 0) {
+      tools = decls.map((d) => ({
+        type: "function" as const,
+        function: {
+          name: d.name,
+          description: d.description,
+          parameters: d.parameters,
+        },
+      }));
+    }
+  }
+
+  return {
+    model,
+    messages,
+    stream,
+    temperature: req.generationConfig?.temperature,
+    tools,
+  };
+}
+
+// ─── Response building: fixture → Gemini format ─────────────────────────────
+
+interface GeminiResponseChunk {
+  candidates: {
+    content: { role: string; parts: GeminiPart[] };
+    finishReason?: string;
+    index: number;
+  }[];
+  usageMetadata?: {
+    promptTokenCount: number;
+    candidatesTokenCount: number;
+    totalTokenCount: number;
+  };
+}
+
+function buildGeminiTextStreamChunks(content: string, chunkSize: number): GeminiResponseChunk[] {
+  const chunks: GeminiResponseChunk[] = [];
+
+  // Content chunks
+  for (let i = 0; i < content.length; i += chunkSize) {
+    const slice = content.slice(i, i + chunkSize);
+    const isLast = i + chunkSize >= content.length;
+    const chunk: GeminiResponseChunk = {
+      candidates: [
+        {
+          content: { role: "model", parts: [{ text: slice }] },
+          index: 0,
+          ...(isLast ? { finishReason: "STOP" } : {}),
+        },
+      ],
+      ...(isLast
+        ? {
+            usageMetadata: {
+              promptTokenCount: 0,
+              candidatesTokenCount: 0,
+              totalTokenCount: 0,
+            },
+          }
+        : {}),
+    };
+    chunks.push(chunk);
+  }
+
+  // Handle empty content
+  if (content.length === 0) {
+    chunks.push({
+      candidates: [
+        {
+          content: { role: "model", parts: [{ text: "" }] },
+          finishReason: "STOP",
+          index: 0,
+        },
+      ],
+      usageMetadata: {
+        promptTokenCount: 0,
+        candidatesTokenCount: 0,
+        totalTokenCount: 0,
+      },
+    });
+  }
+
+  return chunks;
+}
+
+function buildGeminiToolCallStreamChunks(
+  toolCalls: ToolCall[],
+  logger: Logger,
+): GeminiResponseChunk[] {
+  const parts: GeminiPart[] = toolCalls.map((tc) => {
+    let argsObj: Record<string, unknown>;
+    try {
+      argsObj = JSON.parse(tc.arguments || "{}") as Record<string, unknown>;
+    } catch {
+      logger.warn(
+        `Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`,
+      );
+      argsObj = {};
+    }
+    return {
+      functionCall: { name: tc.name, args: argsObj, id: tc.id || generateToolCallId() },
+    };
+  });
+
+  // Gemini sends all tool calls in a single response chunk
+  return [
+    {
+      candidates: [
+        {
+          content: { role: "model", parts },
+          finishReason: "FUNCTION_CALL",
+          index: 0,
+        },
+      ],
+      usageMetadata: {
+        promptTokenCount: 0,
+        candidatesTokenCount: 0,
+        totalTokenCount: 0,
+      },
+    },
+  ];
+}
+
+// Non-streaming response builders
+
+function buildGeminiTextResponse(content: string): GeminiResponseChunk {
+  return {
+    candidates: [
+      {
+        content: { role: "model", parts: [{ text: content }] },
+        finishReason: "STOP",
+        index: 0,
+      },
+    ],
+    usageMetadata: {
+      promptTokenCount: 0,
+      candidatesTokenCount: 0,
+      totalTokenCount: 0,
+    },
+  };
+}
+
+function buildGeminiToolCallResponse(toolCalls: ToolCall[], logger: Logger): GeminiResponseChunk {
+  const parts: GeminiPart[] = toolCalls.map((tc) => {
+    let argsObj: Record<string, unknown>;
+    try {
+      argsObj = JSON.parse(tc.arguments || "{}") as Record<string, unknown>;
+    } catch {
+      logger.warn(
+        `Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`,
+      );
+      argsObj = {};
+    }
+    return {
+      functionCall: { name: tc.name, args: argsObj, id: tc.id || generateToolCallId() },
+    };
+  });
+
+  return {
+    candidates: [
+      {
+        content: { role: "model", parts },
+        finishReason: "FUNCTION_CALL",
+        index: 0,
+      },
+    ],
+    usageMetadata: {
+      promptTokenCount: 0,
+      candidatesTokenCount: 0,
+      totalTokenCount: 0,
+    },
+  };
+}
+
+// ─── SSE writer for Gemini streaming ────────────────────────────────────────
+
+interface GeminiStreamOptions {
+  latency?: number;
+  streamingProfile?: StreamingProfile;
+  signal?: AbortSignal;
+  onChunkSent?: () => void;
+}
+
+async function writeGeminiSSEStream(
+  res: http.ServerResponse,
+  chunks: GeminiResponseChunk[],
+  optionsOrLatency?: number | GeminiStreamOptions,
+): Promise<boolean> {
+  const opts: GeminiStreamOptions =
+    typeof optionsOrLatency === "number" ? { latency: optionsOrLatency } : (optionsOrLatency ?? {});
+  const latency = opts.latency ?? 0;
+  const profile = opts.streamingProfile;
+  const signal = opts.signal;
+  const onChunkSent = opts.onChunkSent;
+
+  if (res.writableEnded) return true;
+  res.setHeader("Content-Type", "text/event-stream");
+  res.setHeader("Cache-Control", "no-cache");
+  res.setHeader("Connection", "keep-alive");
+
+  let chunkIndex = 0;
+  for (const chunk of chunks) {
+    const chunkDelay = calculateDelay(chunkIndex, profile, latency);
+    if (chunkDelay > 0) await delay(chunkDelay, signal);
+    if (signal?.aborted) return false;
+    if (res.writableEnded) return true;
+    // Gemini uses data-only SSE (no event: prefix, no [DONE])
+    res.write(`data: ${JSON.stringify(chunk)}\n\n`);
+    onChunkSent?.();
+    if (signal?.aborted) return false;
+    chunkIndex++;
+  }
+
+  if (!res.writableEnded) {
+    res.end();
+  }
+  return true;
+}
+
+// ─── Request handler ────────────────────────────────────────────────────────
+
+export async function handleGemini(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  model: string,
+  streaming: boolean,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: HandlerDefaults,
+  setCorsHeaders: (res: http.ServerResponse) => void,
+  providerKey: RecordProviderKey = "gemini",
+): Promise<void> {
+  const { logger } = defaults;
+  setCorsHeaders(res);
+
+  let geminiReq: GeminiRequest;
+  try {
+    geminiReq = JSON.parse(raw) as GeminiRequest;
+  } catch {
+    journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? `/v1beta/models/${model}:generateContent`,
+      headers: flattenHeaders(req.headers),
+      body: null,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Malformed JSON",
+          code: 400,
+          status: "INVALID_ARGUMENT",
+        },
+      }),
+    );
+    return;
+  }
+
+  // Convert to ChatCompletionRequest for fixture matching
+  const completionReq = geminiToCompletionRequest(geminiReq, model, streaming);
+
+  const fixture = matchFixture(
+    fixtures,
+    completionReq,
+    journal.fixtureMatchCounts,
+    defaults.requestTransform,
+  );
+  const path = req.url ?? `/v1beta/models/${model}:generateContent`;
+
+  if (fixture) {
+    journal.incrementFixtureMatchCount(fixture, fixtures);
+  }
+
+  if (
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      {
+        method: req.method ?? "POST",
+        path,
+        headers: flattenHeaders(req.headers),
+        body: completionReq,
+      },
+      defaults.registry,
+      defaults.logger,
+    )
+  )
+    return;
+
+  if (!fixture) {
+    if (defaults.record) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        completionReq,
+        providerKey,
+        path,
+        fixtures,
+        defaults,
+        raw,
+      );
+      if (proxied) {
+        journal.add({
+          method: req.method ?? "POST",
+          path,
+          headers: flattenHeaders(req.headers),
+          body: completionReq,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    if (defaults.strict) {
+      logger.error(`STRICT: No fixture matched for ${req.method ?? "POST"} ${path}`);
+    }
+    journal.add({
+      method: req.method ?? "POST",
+      path,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: strictStatus, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      strictStatus,
+      JSON.stringify({
+        error: {
+          message: strictMessage,
+          code: strictStatus,
+          status: defaults.strict ? "UNAVAILABLE" : "NOT_FOUND",
+        },
+      }),
+    );
+    return;
+  }
+
+  const response = fixture.response;
+  const latency = fixture.latency ?? defaults.latency;
+  const chunkSize = Math.max(1, fixture.chunkSize ?? defaults.chunkSize);
+
+  // Error response
+  if (isErrorResponse(response)) {
+    const status = response.status ?? 500;
+    journal.add({
+      method: req.method ?? "POST",
+      path,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status, fixture },
+    });
+    // Gemini-style error format: { error: { code, message, status } }
+    const geminiError = {
+      error: {
+        code: status,
+        message: response.error.message,
+        status: response.error.type ?? "ERROR",
+      },
+    };
+    writeErrorResponse(res, status, JSON.stringify(geminiError));
+    return;
+  }
+
+  // Text response
+  if (isTextResponse(response)) {
+    const journalEntry = journal.add({
+      method: req.method ?? "POST",
+      path,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    if (!streaming) {
+      const body = buildGeminiTextResponse(response.content);
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify(body));
+    } else {
+      const chunks = buildGeminiTextStreamChunks(response.content, chunkSize);
+      const interruption = createInterruptionSignal(fixture);
+      const completed = await writeGeminiSSEStream(res, chunks, {
+        latency,
+        streamingProfile: fixture.streamingProfile,
+        signal: interruption?.signal,
+        onChunkSent: interruption?.tick,
+      });
+      if (!completed) {
+        if (!res.writableEnded) res.destroy();
+        journalEntry.response.interrupted = true;
+        journalEntry.response.interruptReason = interruption?.reason();
+      }
+      interruption?.cleanup();
+    }
+    return;
+  }
+
+  // Tool call response
+  if (isToolCallResponse(response)) {
+    const journalEntry = journal.add({
+      method: req.method ?? "POST",
+      path,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    if (!streaming) {
+      const body = buildGeminiToolCallResponse(response.toolCalls, logger);
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify(body));
+    } else {
+      const chunks = buildGeminiToolCallStreamChunks(response.toolCalls, logger);
+      const interruption = createInterruptionSignal(fixture);
+      const completed = await writeGeminiSSEStream(res, chunks, {
+        latency,
+        streamingProfile: fixture.streamingProfile,
+        signal: interruption?.signal,
+        onChunkSent: interruption?.tick,
+      });
+      if (!completed) {
+        if (!res.writableEnded) res.destroy();
+        journalEntry.response.interrupted = true;
+        journalEntry.response.interruptReason = interruption?.reason();
+      }
+      interruption?.cleanup();
+    }
+    return;
+  }
+
+  // Unknown response type
+  journal.add({
+    method: req.method ?? "POST",
+    path,
+    headers: flattenHeaders(req.headers),
+    body: completionReq,
+    response: { status: 500, fixture },
+  });
+  writeErrorResponse(
+    res,
+    500,
+    JSON.stringify({
+      error: {
+        message: "Fixture response did not match any known type",
+        code: 500,
+        status: "INTERNAL",
+      },
+    }),
+  );
+}
diff --git a/src/helpers.ts b/src/helpers.ts
index 37939d4..ae48a19 100644
--- a/src/helpers.ts
+++ b/src/helpers.ts
@@ -1,14 +1,31 @@
-import { randomBytes } from "node:crypto";
+import { createHash, randomBytes } from "node:crypto";
+import type * as http from "node:http";
 import type {
   FixtureResponse,
   TextResponse,
   ToolCallResponse,
   ErrorResponse,
+  EmbeddingResponse,
   SSEChunk,
   ToolCall,
   ChatCompletion,
 } from "./types.js";
 
+const REDACTED_HEADERS = new Set(["authorization", "x-api-key", "api-key"]);
+
+export function flattenHeaders(headers: http.IncomingHttpHeaders): Record<string, string> {
+  const flat: Record<string, string> = {};
+  for (const [key, value] of Object.entries(headers)) {
+    if (value === undefined) continue;
+    if (REDACTED_HEADERS.has(key.toLowerCase())) {
+      flat[key] = "[REDACTED]";
+    } else {
+      flat[key] = Array.isArray(value) ? value.join(", ") : value;
+    }
+  }
+  return flat;
+}
+
 export function generateId(prefix = "chatcmpl"): string {
   return `${prefix}-${randomBytes(12).toString("base64url")}`;
 }
@@ -17,6 +34,14 @@ export function generateToolCallId(): string {
   return `call_${randomBytes(12).toString("base64url")}`;
 }
 
+export function generateMessageId(): string {
+  return `msg_${randomBytes(12).toString("base64url")}`;
+}
+
+export function generateToolUseId(): string {
+  return `toolu_${randomBytes(12).toString("base64url")}`;
+}
+
 export function isTextResponse(r: FixtureResponse): r is TextResponse {
   return "content" in r && typeof (r as TextResponse).content === "string";
 }
@@ -33,6 +58,10 @@ export function isErrorResponse(r: FixtureResponse): r is ErrorResponse {
   );
 }
 
+export function isEmbeddingResponse(r: FixtureResponse): r is EmbeddingResponse {
+  return "embedding" in r && Array.isArray((r as EmbeddingResponse).embedding);
+}
+
 export function buildTextChunks(content: string, model: string, chunkSize: number): SSEChunk[] {
   const id = generateId();
   const created = Math.floor(Date.now() / 1000);
@@ -163,7 +192,7 @@ export function buildTextCompletion(content: string, model: string): ChatComplet
     choices: [
       {
         index: 0,
-        message: { role: "assistant", content },
+        message: { role: "assistant", content, refusal: null },
         finish_reason: "stop",
       },
     ],
@@ -183,6 +212,7 @@ export function buildToolCallCompletion(toolCalls: ToolCall[], model: string): C
         message: {
           role: "assistant",
           content: null,
+          refusal: null,
           tool_calls: toolCalls.map((tc) => ({
             id: tc.id || generateToolCallId(),
             type: "function" as const,
@@ -195,3 +225,54 @@ export function buildToolCallCompletion(toolCalls: ToolCall[], model: string): C
     usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
   };
 }
+
+// ─── Embedding helpers ─────────────────────────────────────────────────────
+
+const DEFAULT_EMBEDDING_DIMENSIONS = 1536;
+
+/**
+ * Generate a deterministic embedding vector from input text.
+ * Hashes the input with SHA-256 and spreads the hash bytes across
+ * the requested number of dimensions, producing values in [-1, 1].
+ */
+export function generateDeterministicEmbedding(
+  input: string,
+  dimensions: number = DEFAULT_EMBEDDING_DIMENSIONS,
+): number[] {
+  let currentHash = createHash("sha256").update(input).digest();
+  const embedding: number[] = new Array(dimensions);
+  for (let i = 0; i < dimensions; i++) {
+    if (i > 0 && i % 32 === 0) {
+      currentHash = createHash("sha256").update(currentHash).digest();
+    }
+    // Map 0-255 → -1.0 to 1.0
+    embedding[i] = currentHash[i % 32] / 127.5 - 1;
+  }
+  return embedding;
+}
+
+export interface EmbeddingAPIResponse {
+  object: "list";
+  data: { object: "embedding"; index: number; embedding: number[] }[];
+  model: string;
+  usage: { prompt_tokens: number; total_tokens: number };
+}
+
+/**
+ * Build an OpenAI-format embeddings API response for one or more inputs.
+ */
+export function buildEmbeddingResponse(
+  embeddings: number[][],
+  model: string,
+): EmbeddingAPIResponse {
+  return {
+    object: "list",
+    data: embeddings.map((embedding, index) => ({
+      object: "embedding" as const,
+      index,
+      embedding,
+    })),
+    model,
+    usage: { prompt_tokens: 0, total_tokens: 0 },
+  };
+}
diff --git a/src/index.ts b/src/index.ts
index 623acaf..ddb960a 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -1,34 +1,119 @@
 // Main class
-export { MockOpenAI } from "./mock-openai.js";
+export { LLMock } from "./llmock.js";
 
 // Server
 export { createServer, type ServerInstance } from "./server.js";
 
 // Fixture loading
-export { loadFixtureFile, loadFixturesFromDir } from "./fixture-loader.js";
+export { loadFixtureFile, loadFixturesFromDir, validateFixtures } from "./fixture-loader.js";
+export type { ValidationResult } from "./fixture-loader.js";
+
+// Logger
+export { Logger } from "./logger.js";
+export type { LogLevel } from "./logger.js";
 
 // Journal
 export { Journal } from "./journal.js";
 
 // Router
-export { matchFixture } from "./router.js";
+export { matchFixture, getTextContent } from "./router.js";
+
+// Provider handlers
+export { handleResponses, buildTextStreamEvents, buildToolCallStreamEvents } from "./responses.js";
+export type { ResponsesSSEEvent } from "./responses.js";
+export { handleMessages } from "./messages.js";
+export { handleGemini } from "./gemini.js";
+export { handleEmbeddings } from "./embeddings.js";
+export { handleBedrock, bedrockToCompletionRequest, handleBedrockStream } from "./bedrock.js";
+
+// Bedrock Converse
+export {
+  handleConverse,
+  handleConverseStream,
+  converseToCompletionRequest,
+} from "./bedrock-converse.js";
+
+// AWS Event Stream
+export {
+  encodeEventStreamFrame,
+  encodeEventStreamMessage,
+  writeEventStream,
+} from "./aws-event-stream.js";
+
+// Metrics
+export { createMetricsRegistry, normalizePathLabel } from "./metrics.js";
+export type { MetricsRegistry } from "./metrics.js";
+
+// NDJSON
+export { writeNDJSONStream } from "./ndjson-writer.js";
+export type { NDJSONStreamOptions } from "./ndjson-writer.js";
+
+// Ollama
+export { handleOllama, handleOllamaGenerate, ollamaToCompletionRequest } from "./ollama.js";
+
+// Cohere
+export { handleCohere, cohereToCompletionRequest } from "./cohere.js";
+
+// WebSocket
+export { WebSocketConnection, upgradeToWebSocket, computeAcceptKey } from "./ws-framing.js";
+export { handleWebSocketResponses } from "./ws-responses.js";
+export { handleWebSocketRealtime } from "./ws-realtime.js";
+export { handleWebSocketGeminiLive } from "./ws-gemini-live.js";
 
 // Helpers
-export { generateId, generateToolCallId, buildTextChunks, buildToolCallChunks } from "./helpers.js";
+export {
+  flattenHeaders,
+  generateId,
+  generateToolCallId,
+  generateMessageId,
+  generateToolUseId,
+  buildTextChunks,
+  buildToolCallChunks,
+  isEmbeddingResponse,
+  generateDeterministicEmbedding,
+  buildEmbeddingResponse,
+} from "./helpers.js";
+export type { EmbeddingAPIResponse } from "./helpers.js";
+
+// Interruption
+export { createInterruptionSignal } from "./interruption.js";
+export type { InterruptionControl } from "./interruption.js";
 
 // SSE
-export { writeSSEStream, writeErrorResponse } from "./sse-writer.js";
+export { writeSSEStream, writeErrorResponse, delay, calculateDelay } from "./sse-writer.js";
+export type { StreamOptions } from "./sse-writer.js";
+
+// Chaos
+export { evaluateChaos, applyChaos } from "./chaos.js";
+export type { ChaosAction } from "./types.js";
+
+// Recorder
+export { proxyAndRecord } from "./recorder.js";
+
+// Stream Collapse
+export {
+  collapseOpenAISSE,
+  collapseAnthropicSSE,
+  collapseGeminiSSE,
+  collapseOllamaNDJSON,
+  collapseCohereSSE,
+  collapseBedrockEventStream,
+  collapseStreamingResponse,
+} from "./stream-collapse.js";
+export type { CollapseResult } from "./stream-collapse.js";
 
 // Types
 export type {
   ChatMessage,
   ChatCompletionRequest,
+  ContentPart,
   ToolDefinition,
   FixtureMatch,
   TextResponse,
   ToolCall,
   ToolCallResponse,
   ErrorResponse,
+  EmbeddingResponse,
   FixtureResponse,
   Fixture,
   FixtureFile,
@@ -38,6 +123,12 @@ export type {
   SSEChoice,
   SSEDelta,
   SSEToolCallDelta,
+  ChaosConfig,
   MockServerOptions,
+  StreamingProfile,
+  FixtureOpts,
+  EmbeddingFixtureOpts,
   ToolCallMessage,
+  RecordConfig,
+  RecordProviderKey,
 } from "./types.js";
diff --git a/src/interruption.ts b/src/interruption.ts
new file mode 100644
index 0000000..3b34299
--- /dev/null
+++ b/src/interruption.ts
@@ -0,0 +1,54 @@
+import type { Fixture } from "./types.js";
+
+export interface InterruptionControl {
+  signal: AbortSignal;
+  tick(): void;
+  cleanup(): void;
+  reason(): string | undefined;
+}
+
+export function createInterruptionSignal(fixture: Fixture): InterruptionControl | null {
+  const { truncateAfterChunks, disconnectAfterMs } = fixture;
+
+  if (truncateAfterChunks === undefined && disconnectAfterMs === undefined) {
+    return null;
+  }
+
+  const controller = new AbortController();
+  let abortReason: string | undefined;
+  let chunkCount = 0;
+  let timer: ReturnType<typeof setTimeout> | undefined;
+
+  if (disconnectAfterMs !== undefined) {
+    timer = setTimeout(() => {
+      if (!controller.signal.aborted) {
+        abortReason = "disconnectAfterMs";
+        controller.abort();
+      }
+    }, disconnectAfterMs);
+  }
+
+  return {
+    signal: controller.signal,
+
+    tick() {
+      if (controller.signal.aborted) return;
+      chunkCount++;
+      if (truncateAfterChunks !== undefined && chunkCount >= truncateAfterChunks) {
+        abortReason = "truncateAfterChunks";
+        controller.abort();
+      }
+    },
+
+    cleanup() {
+      if (timer !== undefined) {
+        clearTimeout(timer);
+        timer = undefined;
+      }
+    },
+
+    reason() {
+      return abortReason;
+    },
+  };
+}
diff --git a/src/journal.ts b/src/journal.ts
index 8a7dbe4..54e9c82 100644
--- a/src/journal.ts
+++ b/src/journal.ts
@@ -1,8 +1,34 @@
 import { generateId } from "./helpers.js";
-import type { Fixture, JournalEntry } from "./types.js";
+import type { Fixture, FixtureMatch, JournalEntry } from "./types.js";
+
+/**
+ * Compare two field values, handling RegExp by source+flags rather than reference.
+ */
+function fieldEqual(a: unknown, b: unknown): boolean {
+  if (a instanceof RegExp && b instanceof RegExp)
+    return a.source === b.source && a.flags === b.flags;
+  return a === b;
+}
+
+/**
+ * Check whether two fixture match objects have the same criteria
+ * (ignoring sequenceIndex). Used to group sequenced fixtures.
+ */
+function matchCriteriaEqual(a: FixtureMatch, b: FixtureMatch): boolean {
+  return (
+    fieldEqual(a.userMessage, b.userMessage) &&
+    fieldEqual(a.inputText, b.inputText) &&
+    fieldEqual(a.toolCallId, b.toolCallId) &&
+    fieldEqual(a.toolName, b.toolName) &&
+    fieldEqual(a.model, b.model) &&
+    fieldEqual(a.responseFormat, b.responseFormat) &&
+    fieldEqual(a.predicate, b.predicate)
+  );
+}
 
 export class Journal {
   private entries: JournalEntry[] = [];
+  readonly fixtureMatchCounts: Map<Fixture, number> = new Map();
 
   add(entry: Omit<JournalEntry, "id" | "timestamp">): JournalEntry {
     const full: JournalEntry = {
@@ -29,8 +55,31 @@ export class Journal {
     return this.entries.filter((e) => e.response.fixture === fixture);
   }
 
+  getFixtureMatchCount(fixture: Fixture): number {
+    return this.fixtureMatchCounts.get(fixture) ?? 0;
+  }
+
+  incrementFixtureMatchCount(fixture: Fixture, allFixtures?: readonly Fixture[]): void {
+    this.fixtureMatchCounts.set(fixture, this.getFixtureMatchCount(fixture) + 1);
+    // When a sequenced fixture matches, also increment all siblings with matching criteria
+    if (fixture.match.sequenceIndex !== undefined && allFixtures) {
+      for (const sibling of allFixtures) {
+        if (sibling === fixture) continue;
+        if (sibling.match.sequenceIndex === undefined) continue;
+        if (matchCriteriaEqual(fixture.match, sibling.match)) {
+          this.fixtureMatchCounts.set(sibling, this.getFixtureMatchCount(sibling) + 1);
+        }
+      }
+    }
+  }
+
+  clearMatchCounts(): void {
+    this.fixtureMatchCounts.clear();
+  }
+
   clear(): void {
     this.entries = [];
+    this.fixtureMatchCounts.clear();
   }
 
   get size(): number {
diff --git a/src/mock-openai.ts b/src/llmock.ts
similarity index 70%
rename from src/mock-openai.ts
rename to src/llmock.ts
index d75dcb2..d528c8a 100644
--- a/src/mock-openai.ts
+++ b/src/llmock.ts
@@ -1,9 +1,18 @@
-import type { Fixture, FixtureMatch, FixtureResponse, MockServerOptions } from "./types.js";
+import type {
+  ChaosConfig,
+  EmbeddingFixtureOpts,
+  Fixture,
+  FixtureMatch,
+  FixtureOpts,
+  FixtureResponse,
+  MockServerOptions,
+  RecordConfig,
+} from "./types.js";
 import { createServer, type ServerInstance } from "./server.js";
 import { loadFixtureFile, loadFixturesFromDir } from "./fixture-loader.js";
 import { Journal } from "./journal.js";
 
-export class MockOpenAI {
+export class LLMock {
   private fixtures: Fixture[] = [];
   private serverInstance: ServerInstance | null = null;
   private options: MockServerOptions;
@@ -24,6 +33,15 @@ export class MockOpenAI {
     return this;
   }
 
+  prependFixture(fixture: Fixture): this {
+    this.fixtures.unshift(fixture);
+    return this;
+  }
+
+  getFixtures(): readonly Fixture[] {
+    return this.fixtures;
+  }
+
   loadFixtureFile(filePath: string): this {
     this.fixtures.push(...loadFixtureFile(filePath));
     return this;
@@ -43,11 +61,7 @@ export class MockOpenAI {
 
   // ---- Convenience ----
 
-  on(
-    match: FixtureMatch,
-    response: FixtureResponse,
-    opts?: { latency?: number; chunkSize?: number },
-  ): this {
+  on(match: FixtureMatch, response: FixtureResponse, opts?: FixtureOpts): this {
     return this.addFixture({
       match,
       response,
@@ -55,27 +69,28 @@ export class MockOpenAI {
     });
   }
 
-  onMessage(
-    pattern: string | RegExp,
-    response: FixtureResponse,
-    opts?: { latency?: number; chunkSize?: number },
-  ): this {
+  onMessage(pattern: string | RegExp, response: FixtureResponse, opts?: FixtureOpts): this {
     return this.on({ userMessage: pattern }, response, opts);
   }
 
-  onToolCall(
-    name: string,
+  onEmbedding(
+    pattern: string | RegExp,
     response: FixtureResponse,
-    opts?: { latency?: number; chunkSize?: number },
+    opts?: EmbeddingFixtureOpts,
   ): this {
+    return this.on({ inputText: pattern }, response, opts);
+  }
+
+  onJsonOutput(pattern: string | RegExp, jsonContent: object | string, opts?: FixtureOpts): this {
+    const content = typeof jsonContent === "string" ? jsonContent : JSON.stringify(jsonContent);
+    return this.on({ userMessage: pattern, responseFormat: "json_object" }, { content }, opts);
+  }
+
+  onToolCall(name: string, response: FixtureResponse, opts?: FixtureOpts): this {
     return this.on({ toolName: name }, response, opts);
   }
 
-  onToolResult(
-    id: string,
-    response: FixtureResponse,
-    opts?: { latency?: number; chunkSize?: number },
-  ): this {
+  onToolResult(id: string, response: FixtureResponse, opts?: FixtureOpts): this {
     return this.on({ toolCallId: id }, response, opts);
   }
 
@@ -133,6 +148,37 @@ export class MockOpenAI {
     this.journal.clear();
   }
 
+  resetMatchCounts(): this {
+    if (this.serverInstance) {
+      this.serverInstance.journal.clearMatchCounts();
+    }
+    return this;
+  }
+
+  // ---- Chaos ----
+
+  setChaos(config: ChaosConfig): this {
+    this.options.chaos = config;
+    return this;
+  }
+
+  clearChaos(): this {
+    delete this.options.chaos;
+    return this;
+  }
+
+  // ---- Recording ----
+
+  enableRecording(config: RecordConfig): this {
+    this.options.record = config;
+    return this;
+  }
+
+  disableRecording(): this {
+    delete this.options.record;
+    return this;
+  }
+
   // ---- Reset ----
 
   reset(): this {
@@ -159,7 +205,7 @@ export class MockOpenAI {
     }
     const { server } = this.serverInstance;
     await new Promise<void>((resolve, reject) => {
-      server.close((err) => (err ? reject(err) : resolve()));
+      server.close((err: Error | undefined) => (err ? reject(err) : resolve()));
     });
     this.serverInstance = null;
   }
@@ -194,8 +240,8 @@ export class MockOpenAI {
 
   // ---- Static factory ----
 
-  static async create(options?: MockServerOptions): Promise<MockOpenAI> {
-    const instance = new MockOpenAI(options);
+  static async create(options?: MockServerOptions): Promise<LLMock> {
+    const instance = new LLMock(options);
     await instance.start();
     return instance;
   }
diff --git a/src/logger.ts b/src/logger.ts
new file mode 100644
index 0000000..1c1894d
--- /dev/null
+++ b/src/logger.ts
@@ -0,0 +1,35 @@
+export type LogLevel = "silent" | "info" | "debug";
+
+const LEVELS: Record<LogLevel, number> = {
+  silent: 0,
+  info: 1,
+  debug: 2,
+};
+
+export class Logger {
+  private level: number;
+
+  constructor(level: LogLevel = "silent") {
+    this.level = LEVELS[level];
+  }
+
+  info(...args: unknown[]): void {
+    if (this.level >= LEVELS.info) {
+      console.log("[llmock]", ...args);
+    }
+  }
+
+  debug(...args: unknown[]): void {
+    if (this.level >= LEVELS.debug) {
+      console.log("[llmock]", ...args);
+    }
+  }
+
+  warn(...args: unknown[]): void {
+    console.warn("[llmock]", ...args);
+  }
+
+  error(...args: unknown[]): void {
+    console.error("[llmock]", ...args);
+  }
+}
diff --git a/src/messages.ts b/src/messages.ts
new file mode 100644
index 0000000..7d4feb2
--- /dev/null
+++ b/src/messages.ts
@@ -0,0 +1,662 @@
+/**
+ * Anthropic Claude Messages API support.
+ *
+ * Translates incoming /v1/messages requests into the ChatCompletionRequest
+ * format used by the fixture router, and converts fixture responses back into
+ * the Claude Messages API streaming (or non-streaming) format.
+ */
+
+import type * as http from "node:http";
+import type {
+  ChatCompletionRequest,
+  ChatMessage,
+  Fixture,
+  HandlerDefaults,
+  StreamingProfile,
+  ToolCall,
+  ToolDefinition,
+} from "./types.js";
+import {
+  generateMessageId,
+  generateToolUseId,
+  isTextResponse,
+  isToolCallResponse,
+  isErrorResponse,
+  flattenHeaders,
+} from "./helpers.js";
+import { matchFixture } from "./router.js";
+import { writeErrorResponse, delay, calculateDelay } from "./sse-writer.js";
+import { createInterruptionSignal } from "./interruption.js";
+import type { Journal } from "./journal.js";
+import type { Logger } from "./logger.js";
+import { applyChaos } from "./chaos.js";
+import { proxyAndRecord } from "./recorder.js";
+
+// ─── Claude Messages API request types ──────────────────────────────────────
+
+interface ClaudeContentBlock {
+  type: "text" | "tool_use" | "tool_result" | "image" | "document";
+  text?: string;
+  id?: string;
+  name?: string;
+  input?: unknown;
+  tool_use_id?: string;
+  content?: string | ClaudeContentBlock[];
+  is_error?: boolean;
+}
+
+interface ClaudeMessage {
+  role: "user" | "assistant";
+  content: string | ClaudeContentBlock[];
+}
+
+interface ClaudeToolDef {
+  name: string;
+  description?: string;
+  input_schema?: object;
+}
+
+interface ClaudeRequest {
+  model: string;
+  messages: ClaudeMessage[];
+  system?: string | ClaudeContentBlock[];
+  tools?: ClaudeToolDef[];
+  tool_choice?: unknown;
+  stream?: boolean;
+  max_tokens: number;
+  temperature?: number;
+  [key: string]: unknown;
+}
+
+// ─── Input conversion: Claude → ChatCompletions messages ────────────────────
+
+function extractClaudeTextContent(content: string | ClaudeContentBlock[]): string {
+  if (typeof content === "string") return content;
+  return content
+    .filter((b) => b.type === "text")
+    .map((b) => b.text ?? "")
+    .join("");
+}
+
+export function claudeToCompletionRequest(req: ClaudeRequest): ChatCompletionRequest {
+  const messages: ChatMessage[] = [];
+
+  // system field → system message
+  if (req.system) {
+    const systemText =
+      typeof req.system === "string"
+        ? req.system
+        : req.system
+            .filter((b) => b.type === "text")
+            .map((b) => b.text ?? "")
+            .join("");
+    if (systemText) {
+      messages.push({ role: "system", content: systemText });
+    }
+  }
+
+  for (const msg of req.messages) {
+    if (msg.role === "user") {
+      // Check for tool_result blocks
+      if (typeof msg.content !== "string" && Array.isArray(msg.content)) {
+        const toolResults = msg.content.filter((b) => b.type === "tool_result");
+        const textBlocks = msg.content.filter((b) => b.type === "text");
+
+        if (toolResults.length > 0) {
+          // Each tool_result → tool message
+          for (const tr of toolResults) {
+            const resultContent =
+              typeof tr.content === "string"
+                ? tr.content
+                : Array.isArray(tr.content)
+                  ? tr.content
+                      .filter((b) => b.type === "text")
+                      .map((b) => b.text ?? "")
+                      .join("")
+                  : "";
+            messages.push({
+              role: "tool",
+              content: resultContent,
+              tool_call_id: tr.tool_use_id,
+            });
+          }
+          // Any accompanying text blocks → user message
+          if (textBlocks.length > 0) {
+            messages.push({
+              role: "user",
+              content: textBlocks.map((b) => b.text ?? "").join(""),
+            });
+          }
+          continue;
+        }
+      }
+      // Regular user message
+      messages.push({
+        role: "user",
+        content: extractClaudeTextContent(msg.content),
+      });
+    } else if (msg.role === "assistant") {
+      if (typeof msg.content === "string") {
+        messages.push({ role: "assistant", content: msg.content });
+      } else if (Array.isArray(msg.content)) {
+        const toolUseBlocks = msg.content.filter((b) => b.type === "tool_use");
+        const textContent = extractClaudeTextContent(msg.content);
+
+        if (toolUseBlocks.length > 0) {
+          messages.push({
+            role: "assistant",
+            content: textContent || null,
+            tool_calls: toolUseBlocks.map((b) => ({
+              id: b.id ?? generateToolUseId(),
+              type: "function" as const,
+              function: {
+                name: b.name ?? "",
+                arguments: typeof b.input === "string" ? b.input : JSON.stringify(b.input ?? {}),
+              },
+            })),
+          });
+        } else {
+          messages.push({ role: "assistant", content: textContent || null });
+        }
+      } else {
+        // null/undefined content — tool-only assistant turn
+        messages.push({ role: "assistant", content: null });
+      }
+    }
+  }
+
+  // Convert tools
+  let tools: ToolDefinition[] | undefined;
+  if (req.tools && req.tools.length > 0) {
+    tools = req.tools.map((t) => ({
+      type: "function" as const,
+      function: {
+        name: t.name,
+        description: t.description,
+        parameters: t.input_schema,
+      },
+    }));
+  }
+
+  return {
+    model: req.model,
+    messages,
+    stream: req.stream,
+    temperature: req.temperature,
+    tools,
+  };
+}
+
+// ─── Response building: fixture → Claude Messages API format ────────────────
+
+interface ClaudeSSEEvent {
+  type: string;
+  [key: string]: unknown;
+}
+
+function buildClaudeTextStreamEvents(
+  content: string,
+  model: string,
+  chunkSize: number,
+): ClaudeSSEEvent[] {
+  const msgId = generateMessageId();
+  const events: ClaudeSSEEvent[] = [];
+
+  // message_start
+  events.push({
+    type: "message_start",
+    message: {
+      id: msgId,
+      type: "message",
+      role: "assistant",
+      content: [],
+      model,
+      stop_reason: null,
+      stop_sequence: null,
+      usage: { input_tokens: 0, output_tokens: 0 },
+    },
+  });
+
+  // content_block_start
+  events.push({
+    type: "content_block_start",
+    index: 0,
+    content_block: { type: "text", text: "" },
+  });
+
+  // content_block_delta — text chunks
+  for (let i = 0; i < content.length; i += chunkSize) {
+    const slice = content.slice(i, i + chunkSize);
+    events.push({
+      type: "content_block_delta",
+      index: 0,
+      delta: { type: "text_delta", text: slice },
+    });
+  }
+
+  // content_block_stop
+  events.push({
+    type: "content_block_stop",
+    index: 0,
+  });
+
+  // message_delta
+  events.push({
+    type: "message_delta",
+    delta: { stop_reason: "end_turn", stop_sequence: null },
+    usage: { output_tokens: 0 },
+  });
+
+  // message_stop
+  events.push({ type: "message_stop" });
+
+  return events;
+}
+
+function buildClaudeToolCallStreamEvents(
+  toolCalls: ToolCall[],
+  model: string,
+  chunkSize: number,
+  logger: Logger,
+): ClaudeSSEEvent[] {
+  const msgId = generateMessageId();
+  const events: ClaudeSSEEvent[] = [];
+
+  // message_start
+  events.push({
+    type: "message_start",
+    message: {
+      id: msgId,
+      type: "message",
+      role: "assistant",
+      content: [],
+      model,
+      stop_reason: null,
+      stop_sequence: null,
+      usage: { input_tokens: 0, output_tokens: 0 },
+    },
+  });
+
+  for (let idx = 0; idx < toolCalls.length; idx++) {
+    const tc = toolCalls[idx];
+    const toolUseId = tc.id || generateToolUseId();
+
+    // Parse arguments to JSON object (Claude uses objects, not strings)
+    let argsObj: unknown;
+    try {
+      argsObj = JSON.parse(tc.arguments || "{}");
+    } catch {
+      logger.warn(
+        `Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`,
+      );
+      argsObj = {};
+    }
+    const argsJson = JSON.stringify(argsObj);
+
+    // content_block_start
+    events.push({
+      type: "content_block_start",
+      index: idx,
+      content_block: {
+        type: "tool_use",
+        id: toolUseId,
+        name: tc.name,
+        input: {},
+      },
+    });
+
+    // content_block_delta — input_json_delta chunks
+    for (let i = 0; i < argsJson.length; i += chunkSize) {
+      const slice = argsJson.slice(i, i + chunkSize);
+      events.push({
+        type: "content_block_delta",
+        index: idx,
+        delta: { type: "input_json_delta", partial_json: slice },
+      });
+    }
+
+    // content_block_stop
+    events.push({
+      type: "content_block_stop",
+      index: idx,
+    });
+  }
+
+  // message_delta
+  events.push({
+    type: "message_delta",
+    delta: { stop_reason: "tool_use", stop_sequence: null },
+    usage: { output_tokens: 0 },
+  });
+
+  // message_stop
+  events.push({ type: "message_stop" });
+
+  return events;
+}
+
+// Non-streaming response builders
+
+function buildClaudeTextResponse(content: string, model: string): object {
+  return {
+    id: generateMessageId(),
+    type: "message",
+    role: "assistant",
+    content: [{ type: "text", text: content }],
+    model,
+    stop_reason: "end_turn",
+    stop_sequence: null,
+    usage: { input_tokens: 0, output_tokens: 0 },
+  };
+}
+
+function buildClaudeToolCallResponse(toolCalls: ToolCall[], model: string, logger: Logger): object {
+  return {
+    id: generateMessageId(),
+    type: "message",
+    role: "assistant",
+    content: toolCalls.map((tc) => {
+      let argsObj: unknown;
+      try {
+        argsObj = JSON.parse(tc.arguments || "{}");
+      } catch {
+        logger.warn(
+          `Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`,
+        );
+        argsObj = {};
+      }
+      return {
+        type: "tool_use",
+        id: tc.id || generateToolUseId(),
+        name: tc.name,
+        input: argsObj,
+      };
+    }),
+    model,
+    stop_reason: "tool_use",
+    stop_sequence: null,
+    usage: { input_tokens: 0, output_tokens: 0 },
+  };
+}
+
+// ─── SSE writer for Claude Messages API ─────────────────────────────────────
+
+interface ClaudeStreamOptions {
+  latency?: number;
+  streamingProfile?: StreamingProfile;
+  signal?: AbortSignal;
+  onChunkSent?: () => void;
+}
+
+async function writeClaudeSSEStream(
+  res: http.ServerResponse,
+  events: ClaudeSSEEvent[],
+  optionsOrLatency?: number | ClaudeStreamOptions,
+): Promise<boolean> {
+  const opts: ClaudeStreamOptions =
+    typeof optionsOrLatency === "number" ? { latency: optionsOrLatency } : (optionsOrLatency ?? {});
+  const latency = opts.latency ?? 0;
+  const profile = opts.streamingProfile;
+  const signal = opts.signal;
+  const onChunkSent = opts.onChunkSent;
+
+  if (res.writableEnded) return true;
+  res.setHeader("Content-Type", "text/event-stream");
+  res.setHeader("Cache-Control", "no-cache");
+  res.setHeader("Connection", "keep-alive");
+
+  let chunkIndex = 0;
+  for (const event of events) {
+    const chunkDelay = calculateDelay(chunkIndex, profile, latency);
+    if (chunkDelay > 0) await delay(chunkDelay, signal);
+    if (signal?.aborted) return false;
+    if (res.writableEnded) return true;
+    res.write(`event: ${event.type}\ndata: ${JSON.stringify(event)}\n\n`);
+    onChunkSent?.();
+    if (signal?.aborted) return false;
+    chunkIndex++;
+  }
+
+  if (!res.writableEnded) {
+    res.end();
+  }
+  return true;
+}
+
+// ─── Request handler ────────────────────────────────────────────────────────
+
+export async function handleMessages(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: HandlerDefaults,
+  setCorsHeaders: (res: http.ServerResponse) => void,
+): Promise<void> {
+  const { logger } = defaults;
+  setCorsHeaders(res);
+
+  let claudeReq: ClaudeRequest;
+  try {
+    claudeReq = JSON.parse(raw) as ClaudeRequest;
+  } catch {
+    journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v1/messages",
+      headers: flattenHeaders(req.headers),
+      body: null,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Malformed JSON",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  // Convert to ChatCompletionRequest for fixture matching
+  const completionReq = claudeToCompletionRequest(claudeReq);
+
+  const fixture = matchFixture(
+    fixtures,
+    completionReq,
+    journal.fixtureMatchCounts,
+    defaults.requestTransform,
+  );
+
+  if (fixture) {
+    journal.incrementFixtureMatchCount(fixture, fixtures);
+  }
+
+  if (
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      {
+        method: req.method ?? "POST",
+        path: req.url ?? "/v1/messages",
+        headers: flattenHeaders(req.headers),
+        body: completionReq,
+      },
+      defaults.registry,
+      defaults.logger,
+    )
+  )
+    return;
+
+  if (!fixture) {
+    if (defaults.record) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        completionReq,
+        "anthropic",
+        req.url ?? "/v1/messages",
+        fixtures,
+        defaults,
+        raw,
+      );
+      if (proxied) {
+        journal.add({
+          method: req.method ?? "POST",
+          path: req.url ?? "/v1/messages",
+          headers: flattenHeaders(req.headers),
+          body: completionReq,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    if (defaults.strict) {
+      logger.error(
+        `STRICT: No fixture matched for ${req.method ?? "POST"} ${req.url ?? "/v1/messages"}`,
+      );
+    }
+    journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v1/messages",
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: strictStatus, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      strictStatus,
+      JSON.stringify({
+        error: {
+          message: strictMessage,
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  const response = fixture.response;
+  const latency = fixture.latency ?? defaults.latency;
+  const chunkSize = Math.max(1, fixture.chunkSize ?? defaults.chunkSize);
+
+  // Error response
+  if (isErrorResponse(response)) {
+    const status = response.status ?? 500;
+    journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v1/messages",
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status, fixture },
+    });
+    // Anthropic-style error format: { type: "error", error: { type, message } }
+    const anthropicError = {
+      type: "error",
+      error: {
+        type: response.error.type ?? "api_error",
+        message: response.error.message,
+      },
+    };
+    writeErrorResponse(res, status, JSON.stringify(anthropicError));
+    return;
+  }
+
+  // Text response
+  if (isTextResponse(response)) {
+    const journalEntry = journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v1/messages",
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    if (claudeReq.stream !== true) {
+      const body = buildClaudeTextResponse(response.content, completionReq.model);
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify(body));
+    } else {
+      const events = buildClaudeTextStreamEvents(response.content, completionReq.model, chunkSize);
+      const interruption = createInterruptionSignal(fixture);
+      const completed = await writeClaudeSSEStream(res, events, {
+        latency,
+        streamingProfile: fixture.streamingProfile,
+        signal: interruption?.signal,
+        onChunkSent: interruption?.tick,
+      });
+      if (!completed) {
+        if (!res.writableEnded) res.destroy();
+        journalEntry.response.interrupted = true;
+        journalEntry.response.interruptReason = interruption?.reason();
+      }
+      interruption?.cleanup();
+    }
+    return;
+  }
+
+  // Tool call response
+  if (isToolCallResponse(response)) {
+    const journalEntry = journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v1/messages",
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    if (claudeReq.stream !== true) {
+      const body = buildClaudeToolCallResponse(response.toolCalls, completionReq.model, logger);
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify(body));
+    } else {
+      const events = buildClaudeToolCallStreamEvents(
+        response.toolCalls,
+        completionReq.model,
+        chunkSize,
+        logger,
+      );
+      const interruption = createInterruptionSignal(fixture);
+      const completed = await writeClaudeSSEStream(res, events, {
+        latency,
+        streamingProfile: fixture.streamingProfile,
+        signal: interruption?.signal,
+        onChunkSent: interruption?.tick,
+      });
+      if (!completed) {
+        if (!res.writableEnded) res.destroy();
+        journalEntry.response.interrupted = true;
+        journalEntry.response.interruptReason = interruption?.reason();
+      }
+      interruption?.cleanup();
+    }
+    return;
+  }
+
+  // Unknown response type
+  journal.add({
+    method: req.method ?? "POST",
+    path: req.url ?? "/v1/messages",
+    headers: flattenHeaders(req.headers),
+    body: completionReq,
+    response: { status: 500, fixture },
+  });
+  writeErrorResponse(
+    res,
+    500,
+    JSON.stringify({
+      error: {
+        message: "Fixture response did not match any known type",
+        type: "server_error",
+      },
+    }),
+  );
+}
diff --git a/src/metrics.ts b/src/metrics.ts
new file mode 100644
index 0000000..48b71a3
--- /dev/null
+++ b/src/metrics.ts
@@ -0,0 +1,256 @@
+/**
+ * Lightweight Prometheus metrics registry for LLMock.
+ *
+ * Zero external dependencies — implements counters, histograms, and gauges
+ * with Prometheus text exposition format serialization.
+ */
+
+// ---------------------------------------------------------------------------
+// Public interface
+// ---------------------------------------------------------------------------
+
+export interface MetricsRegistry {
+  incrementCounter(name: string, labels: Record<string, string>): void;
+  observeHistogram(name: string, labels: Record<string, string>, value: number): void;
+  setGauge(name: string, labels: Record<string, string>, value: number): void;
+  serialize(): string;
+  reset(): void;
+}
+
+// ---------------------------------------------------------------------------
+// Histogram bucket boundaries (Prometheus default-ish)
+// ---------------------------------------------------------------------------
+
+const HISTOGRAM_BUCKETS = [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10];
+
+// ---------------------------------------------------------------------------
+// Internal helpers
+// ---------------------------------------------------------------------------
+
+/** Build a stable label key string for map lookups: `label1="v1",label2="v2"` */
+function labelKey(labels: Record<string, string>): string {
+  const entries = Object.entries(labels).sort(([a], [b]) => a.localeCompare(b));
+  if (entries.length === 0) return "";
+  return entries.map(([k, v]) => `${k}="${escapeLabelValue(v)}"`).join(",");
+}
+
+/** Escape a label value per Prometheus text exposition format. */
+function escapeLabelValue(v: string): string {
+  return v.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/\n/g, "\\n");
+}
+
+/** Format labels for Prometheus output: `{label1="v1",label2="v2"}` */
+function formatLabels(labels: Record<string, string>): string {
+  return `{${labelKey(labels)}}`;
+}
+
+// ---------------------------------------------------------------------------
+// Internal metric storage types
+// ---------------------------------------------------------------------------
+
+interface CounterData {
+  type: "counter";
+  /** Map from labelKey → value */
+  series: Map<string, { labels: Record<string, string>; value: number }>;
+}
+
+interface HistogramData {
+  type: "histogram";
+  /** Map from labelKey → bucket counts, sum, count */
+  series: Map<
+    string,
+    {
+      labels: Record<string, string>;
+      bucketCounts: number[]; // one per HISTOGRAM_BUCKETS entry
+      sum: number;
+      count: number;
+    }
+  >;
+}
+
+interface GaugeData {
+  type: "gauge";
+  /** Map from labelKey → value */
+  series: Map<string, { labels: Record<string, string>; value: number }>;
+}
+
+type MetricData = CounterData | HistogramData | GaugeData;
+
+// ---------------------------------------------------------------------------
+// Registry implementation
+// ---------------------------------------------------------------------------
+
+export function createMetricsRegistry(): MetricsRegistry {
+  /** Ordered map: metric name → data. Insertion order preserved for stable output. */
+  const metrics = new Map<string, MetricData>();
+
+  function getOrCreateCounter(name: string): CounterData {
+    let data = metrics.get(name);
+    if (!data) {
+      data = { type: "counter", series: new Map() };
+      metrics.set(name, data);
+    }
+    if (data.type !== "counter") throw new Error(`Metric ${name} is not a counter`);
+    return data as CounterData;
+  }
+
+  function getOrCreateHistogram(name: string): HistogramData {
+    let data = metrics.get(name);
+    if (!data) {
+      data = { type: "histogram", series: new Map() };
+      metrics.set(name, data);
+    }
+    if (data.type !== "histogram") throw new Error(`Metric ${name} is not a histogram`);
+    return data as HistogramData;
+  }
+
+  function getOrCreateGauge(name: string): GaugeData {
+    let data = metrics.get(name);
+    if (!data) {
+      data = { type: "gauge", series: new Map() };
+      metrics.set(name, data);
+    }
+    if (data.type !== "gauge") throw new Error(`Metric ${name} is not a gauge`);
+    return data as GaugeData;
+  }
+
+  return {
+    incrementCounter(name: string, labels: Record<string, string>): void {
+      const counter = getOrCreateCounter(name);
+      const key = labelKey(labels);
+      const existing = counter.series.get(key);
+      if (existing) {
+        existing.value += 1;
+      } else {
+        counter.series.set(key, { labels, value: 1 });
+      }
+    },
+
+    observeHistogram(name: string, labels: Record<string, string>, value: number): void {
+      const histogram = getOrCreateHistogram(name);
+      const key = labelKey(labels);
+      let existing = histogram.series.get(key);
+      if (!existing) {
+        existing = {
+          labels,
+          bucketCounts: new Array(HISTOGRAM_BUCKETS.length).fill(0) as number[],
+          sum: 0,
+          count: 0,
+        };
+        histogram.series.set(key, existing);
+      }
+      // Update cumulative bucket counts
+      for (let i = 0; i < HISTOGRAM_BUCKETS.length; i++) {
+        if (value <= HISTOGRAM_BUCKETS[i]) {
+          existing.bucketCounts[i] += 1;
+        }
+      }
+      existing.sum += value;
+      existing.count += 1;
+    },
+
+    setGauge(name: string, labels: Record<string, string>, value: number): void {
+      const gauge = getOrCreateGauge(name);
+      const key = labelKey(labels);
+      const existing = gauge.series.get(key);
+      if (existing) {
+        existing.value = value;
+      } else {
+        gauge.series.set(key, { labels, value });
+      }
+    },
+
+    serialize(): string {
+      const lines: string[] = [];
+
+      for (const [name, data] of metrics) {
+        switch (data.type) {
+          case "counter": {
+            lines.push(`# TYPE ${name} counter`);
+            for (const series of data.series.values()) {
+              lines.push(`${name}${formatLabels(series.labels)} ${series.value}`);
+            }
+            break;
+          }
+          case "histogram": {
+            lines.push(`# TYPE ${name} histogram`);
+            for (const series of data.series.values()) {
+              const lblStr = labelKey(series.labels);
+              const lblPrefix = lblStr ? `${lblStr},` : "";
+              // Bucket lines
+              for (let i = 0; i < HISTOGRAM_BUCKETS.length; i++) {
+                lines.push(
+                  `${name}_bucket{${lblPrefix}le="${HISTOGRAM_BUCKETS[i]}"} ${series.bucketCounts[i]}`,
+                );
+              }
+              // +Inf bucket
+              lines.push(`${name}_bucket{${lblPrefix}le="+Inf"} ${series.count}`);
+              // Sum and count
+              lines.push(`${name}_sum${formatLabels(series.labels)} ${series.sum}`);
+              lines.push(`${name}_count${formatLabels(series.labels)} ${series.count}`);
+            }
+            break;
+          }
+          case "gauge": {
+            lines.push(`# TYPE ${name} gauge`);
+            for (const series of data.series.values()) {
+              lines.push(`${name}${formatLabels(series.labels)} ${series.value}`);
+            }
+            break;
+          }
+        }
+      }
+
+      return lines.length > 0 ? lines.join("\n") + "\n" : "";
+    },
+
+    reset(): void {
+      metrics.clear();
+    },
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Path normalization for metric labels
+// ---------------------------------------------------------------------------
+
+// Regex patterns for parametric API routes
+const BEDROCK_RE =
+  /^\/model\/([^/]+)\/(invoke|invoke-with-response-stream|converse|converse-stream)$/;
+const GEMINI_RE = /^\/v1beta\/models\/([^:]+):(generateContent|streamGenerateContent)$/;
+const AZURE_RE = /^\/openai\/deployments\/([^/]+)\/(chat\/completions|embeddings)$/;
+const VERTEX_RE =
+  /^\/v1\/projects\/([^/]+)\/locations\/([^/]+)\/publishers\/google\/models\/([^:]+):(.+)$/;
+
+/**
+ * Normalize parametric API paths to route patterns for use as metric labels.
+ * Replaces dynamic segments (model IDs, deployment names, etc.) with placeholders.
+ */
+export function normalizePathLabel(pathname: string): string {
+  // Bedrock: /model/{modelId}/{operation}
+  const bedrockMatch = pathname.match(BEDROCK_RE);
+  if (bedrockMatch) {
+    return `/model/{modelId}/${bedrockMatch[2]}`;
+  }
+
+  // Gemini: /v1beta/models/{model}:{action}
+  const geminiMatch = pathname.match(GEMINI_RE);
+  if (geminiMatch) {
+    return `/v1beta/models/{model}:${geminiMatch[2]}`;
+  }
+
+  // Azure: /openai/deployments/{id}/{operation}
+  const azureMatch = pathname.match(AZURE_RE);
+  if (azureMatch) {
+    return `/openai/deployments/{id}/${azureMatch[2]}`;
+  }
+
+  // Vertex AI: /v1/projects/{p}/locations/{l}/publishers/google/models/{m}:{action}
+  const vertexMatch = pathname.match(VERTEX_RE);
+  if (vertexMatch) {
+    return `/v1/projects/{p}/locations/{l}/publishers/google/models/{m}:${vertexMatch[4]}`;
+  }
+
+  // Static path — return as-is
+  return pathname;
+}
diff --git a/src/ndjson-writer.ts b/src/ndjson-writer.ts
new file mode 100644
index 0000000..1e2ab7d
--- /dev/null
+++ b/src/ndjson-writer.ts
@@ -0,0 +1,53 @@
+/**
+ * NDJSON streaming writer for Ollama endpoints.
+ *
+ * Mirrors writeSSEStream from sse-writer.ts but writes newline-delimited JSON
+ * (one JSON object per line) instead of SSE events.
+ */
+
+import type * as http from "node:http";
+import type { StreamingProfile } from "./types.js";
+import { delay, calculateDelay } from "./sse-writer.js";
+
+export interface NDJSONStreamOptions {
+  latency?: number;
+  streamingProfile?: StreamingProfile;
+  signal?: AbortSignal;
+  onChunkSent?: () => void;
+}
+
+export async function writeNDJSONStream(
+  res: http.ServerResponse,
+  chunks: object[],
+  options?: NDJSONStreamOptions,
+): Promise<boolean> {
+  const opts = options ?? {};
+  const latency = opts.latency ?? 0;
+  const profile = opts.streamingProfile;
+  const signal = opts.signal;
+  const onChunkSent = opts.onChunkSent;
+
+  if (res.writableEnded) return true;
+  res.setHeader("Content-Type", "application/x-ndjson");
+  res.setHeader("Cache-Control", "no-cache");
+  res.setHeader("Connection", "keep-alive");
+
+  let chunkIndex = 0;
+  for (const chunk of chunks) {
+    const chunkDelay = calculateDelay(chunkIndex, profile, latency);
+    if (chunkDelay > 0) {
+      await delay(chunkDelay, signal);
+    }
+    if (signal?.aborted) return false;
+    if (res.writableEnded) return true;
+    res.write(JSON.stringify(chunk) + "\n");
+    onChunkSent?.();
+    if (signal?.aborted) return false;
+    chunkIndex++;
+  }
+
+  if (!res.writableEnded) {
+    res.end();
+  }
+  return true;
+}
diff --git a/src/ollama.ts b/src/ollama.ts
new file mode 100644
index 0000000..eba0111
--- /dev/null
+++ b/src/ollama.ts
@@ -0,0 +1,748 @@
+/**
+ * Ollama API endpoint support.
+ *
+ * Translates incoming /api/chat and /api/generate requests into the
+ * ChatCompletionRequest format used by the fixture router, and converts
+ * fixture responses back into Ollama's NDJSON streaming or non-streaming format.
+ *
+ * Key differences from OpenAI:
+ * - Ollama defaults to stream: true (opposite of OpenAI)
+ * - Streaming uses NDJSON, not SSE
+ * - Tool call arguments are objects, not JSON strings
+ * - Tool calls have no id field
+ */
+
+import type * as http from "node:http";
+import type {
+  ChatCompletionRequest,
+  ChatMessage,
+  Fixture,
+  HandlerDefaults,
+  ToolCall,
+  ToolDefinition,
+} from "./types.js";
+import { isTextResponse, isToolCallResponse, isErrorResponse, flattenHeaders } from "./helpers.js";
+import { matchFixture } from "./router.js";
+import { writeErrorResponse } from "./sse-writer.js";
+import { writeNDJSONStream } from "./ndjson-writer.js";
+import { createInterruptionSignal } from "./interruption.js";
+import type { Journal } from "./journal.js";
+import type { Logger } from "./logger.js";
+import { applyChaos } from "./chaos.js";
+import { proxyAndRecord } from "./recorder.js";
+
+// ─── Ollama request types ────────────────────────────────────────────────────
+
+interface OllamaMessage {
+  role: "system" | "user" | "assistant" | "tool";
+  content: string;
+}
+
+interface OllamaToolDef {
+  type: string;
+  function: {
+    name: string;
+    description?: string;
+    parameters?: object;
+  };
+}
+
+interface OllamaRequest {
+  model: string;
+  messages: OllamaMessage[];
+  stream?: boolean; // default true!
+  options?: { temperature?: number; num_predict?: number };
+  tools?: OllamaToolDef[];
+}
+
+interface OllamaGenerateRequest {
+  model: string;
+  prompt: string;
+  stream?: boolean; // default true!
+  options?: { temperature?: number; num_predict?: number };
+}
+
+// ─── Duration fields (zeroed, required on final/non-streaming responses) ────
+
+const DURATION_FIELDS = {
+  done_reason: "stop" as const,
+  total_duration: 0,
+  load_duration: 0,
+  prompt_eval_count: 0,
+  prompt_eval_duration: 0,
+  eval_count: 0,
+  eval_duration: 0,
+};
+
+// ─── Input conversion: Ollama → ChatCompletionRequest ────────────────────────
+
+export function ollamaToCompletionRequest(req: OllamaRequest): ChatCompletionRequest {
+  const messages: ChatMessage[] = [];
+
+  for (const msg of req.messages) {
+    messages.push({
+      role: msg.role as ChatMessage["role"],
+      content: msg.content,
+    });
+  }
+
+  // Convert tools
+  let tools: ToolDefinition[] | undefined;
+  if (req.tools && req.tools.length > 0) {
+    tools = req.tools.map((t) => ({
+      type: "function" as const,
+      function: {
+        name: t.function.name,
+        description: t.function.description,
+        parameters: t.function.parameters,
+      },
+    }));
+  }
+
+  return {
+    model: req.model,
+    messages,
+    stream: req.stream,
+    temperature: req.options?.temperature,
+    max_tokens: req.options?.num_predict,
+    tools,
+  };
+}
+
+function ollamaGenerateToCompletionRequest(req: OllamaGenerateRequest): ChatCompletionRequest {
+  return {
+    model: req.model,
+    messages: [{ role: "user", content: req.prompt }],
+    stream: req.stream,
+    temperature: req.options?.temperature,
+    max_tokens: req.options?.num_predict,
+  };
+}
+
+// ─── Response builders: /api/chat ────────────────────────────────────────────
+
+function buildOllamaChatTextChunks(content: string, model: string, chunkSize: number): object[] {
+  const chunks: object[] = [];
+
+  for (let i = 0; i < content.length; i += chunkSize) {
+    const slice = content.slice(i, i + chunkSize);
+    chunks.push({
+      model,
+      message: { role: "assistant", content: slice },
+      done: false,
+    });
+  }
+
+  // Final chunk with done: true and all duration fields
+  chunks.push({
+    model,
+    message: { role: "assistant", content: "" },
+    done: true,
+    ...DURATION_FIELDS,
+  });
+
+  return chunks;
+}
+
+function buildOllamaChatTextResponse(content: string, model: string): object {
+  return {
+    model,
+    message: { role: "assistant", content },
+    done: true,
+    ...DURATION_FIELDS,
+  };
+}
+
+function buildOllamaChatToolCallChunks(
+  toolCalls: ToolCall[],
+  model: string,
+  logger: Logger,
+): object[] {
+  const ollamaToolCalls = toolCalls.map((tc) => {
+    let argsObj: unknown;
+    try {
+      argsObj = JSON.parse(tc.arguments || "{}");
+    } catch {
+      logger.warn(
+        `Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`,
+      );
+      argsObj = {};
+    }
+    return {
+      function: {
+        name: tc.name,
+        arguments: argsObj,
+      },
+    };
+  });
+
+  // Tool calls are sent in a single chunk (no streaming of individual args)
+  const chunks: object[] = [];
+  chunks.push({
+    model,
+    message: {
+      role: "assistant",
+      content: "",
+      tool_calls: ollamaToolCalls,
+    },
+    done: false,
+  });
+
+  // Final chunk
+  chunks.push({
+    model,
+    message: { role: "assistant", content: "" },
+    done: true,
+    ...DURATION_FIELDS,
+  });
+
+  return chunks;
+}
+
+function buildOllamaChatToolCallResponse(
+  toolCalls: ToolCall[],
+  model: string,
+  logger: Logger,
+): object {
+  const ollamaToolCalls = toolCalls.map((tc) => {
+    let argsObj: unknown;
+    try {
+      argsObj = JSON.parse(tc.arguments || "{}");
+    } catch {
+      logger.warn(
+        `Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`,
+      );
+      argsObj = {};
+    }
+    return {
+      function: {
+        name: tc.name,
+        arguments: argsObj,
+      },
+    };
+  });
+
+  return {
+    model,
+    message: {
+      role: "assistant",
+      content: "",
+      tool_calls: ollamaToolCalls,
+    },
+    done: true,
+    ...DURATION_FIELDS,
+  };
+}
+
+// ─── Response builders: /api/generate ────────────────────────────────────────
+
+function buildOllamaGenerateTextChunks(
+  content: string,
+  model: string,
+  chunkSize: number,
+): object[] {
+  const chunks: object[] = [];
+  const createdAt = new Date().toISOString();
+
+  for (let i = 0; i < content.length; i += chunkSize) {
+    const slice = content.slice(i, i + chunkSize);
+    chunks.push({
+      model,
+      created_at: createdAt,
+      response: slice,
+      done: false,
+    });
+  }
+
+  // Final chunk
+  chunks.push({
+    model,
+    created_at: createdAt,
+    response: "",
+    done: true,
+    ...DURATION_FIELDS,
+    context: [],
+  });
+
+  return chunks;
+}
+
+function buildOllamaGenerateTextResponse(content: string, model: string): object {
+  return {
+    model,
+    created_at: new Date().toISOString(),
+    response: content,
+    done: true,
+    ...DURATION_FIELDS,
+    context: [],
+  };
+}
+
+// ─── Request handler: /api/chat ──────────────────────────────────────────────
+
+export async function handleOllama(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: HandlerDefaults,
+  setCorsHeaders: (res: http.ServerResponse) => void,
+): Promise<void> {
+  const { logger } = defaults;
+  setCorsHeaders(res);
+
+  const urlPath = req.url ?? "/api/chat";
+
+  let ollamaReq: OllamaRequest;
+  try {
+    ollamaReq = JSON.parse(raw) as OllamaRequest;
+  } catch {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: null,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Malformed JSON",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  if (!ollamaReq.messages || !Array.isArray(ollamaReq.messages)) {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: null,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Invalid request: messages array is required",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  // Convert to ChatCompletionRequest for fixture matching
+  const completionReq = ollamaToCompletionRequest(ollamaReq);
+
+  const fixture = matchFixture(
+    fixtures,
+    completionReq,
+    journal.fixtureMatchCounts,
+    defaults.requestTransform,
+  );
+
+  if (fixture) {
+    journal.incrementFixtureMatchCount(fixture, fixtures);
+  }
+
+  if (
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      {
+        method: req.method ?? "POST",
+        path: urlPath,
+        headers: flattenHeaders(req.headers),
+        body: completionReq,
+      },
+      defaults.registry,
+      defaults.logger,
+    )
+  )
+    return;
+
+  if (!fixture) {
+    if (defaults.record) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        completionReq,
+        "ollama",
+        urlPath,
+        fixtures,
+        defaults,
+        raw,
+      );
+      if (proxied) {
+        journal.add({
+          method: req.method ?? "POST",
+          path: urlPath,
+          headers: flattenHeaders(req.headers),
+          body: completionReq,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    if (defaults.strict) {
+      logger.error(`STRICT: No fixture matched for ${req.method ?? "POST"} ${urlPath}`);
+    }
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: strictStatus, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      strictStatus,
+      JSON.stringify({
+        error: {
+          message: strictMessage,
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  const response = fixture.response;
+  const latency = fixture.latency ?? defaults.latency;
+  const chunkSize = Math.max(1, fixture.chunkSize ?? defaults.chunkSize);
+
+  // Ollama defaults to streaming when stream is absent or true
+  const streaming = ollamaReq.stream !== false;
+
+  // Error response
+  if (isErrorResponse(response)) {
+    const status = response.status ?? 500;
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status, fixture },
+    });
+    writeErrorResponse(res, status, JSON.stringify(response));
+    return;
+  }
+
+  // Text response
+  if (isTextResponse(response)) {
+    const journalEntry = journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    if (!streaming) {
+      const body = buildOllamaChatTextResponse(response.content, completionReq.model);
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify(body));
+    } else {
+      const chunks = buildOllamaChatTextChunks(response.content, completionReq.model, chunkSize);
+      const interruption = createInterruptionSignal(fixture);
+      const completed = await writeNDJSONStream(res, chunks, {
+        latency,
+        streamingProfile: fixture.streamingProfile,
+        signal: interruption?.signal,
+        onChunkSent: interruption?.tick,
+      });
+      if (!completed) {
+        if (!res.writableEnded) res.destroy();
+        journalEntry.response.interrupted = true;
+        journalEntry.response.interruptReason = interruption?.reason();
+      }
+      interruption?.cleanup();
+    }
+    return;
+  }
+
+  // Tool call response
+  if (isToolCallResponse(response)) {
+    const journalEntry = journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    if (!streaming) {
+      const body = buildOllamaChatToolCallResponse(response.toolCalls, completionReq.model, logger);
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify(body));
+    } else {
+      const chunks = buildOllamaChatToolCallChunks(response.toolCalls, completionReq.model, logger);
+      const interruption = createInterruptionSignal(fixture);
+      const completed = await writeNDJSONStream(res, chunks, {
+        latency,
+        streamingProfile: fixture.streamingProfile,
+        signal: interruption?.signal,
+        onChunkSent: interruption?.tick,
+      });
+      if (!completed) {
+        if (!res.writableEnded) res.destroy();
+        journalEntry.response.interrupted = true;
+        journalEntry.response.interruptReason = interruption?.reason();
+      }
+      interruption?.cleanup();
+    }
+    return;
+  }
+
+  // Unknown response type
+  journal.add({
+    method: req.method ?? "POST",
+    path: urlPath,
+    headers: flattenHeaders(req.headers),
+    body: completionReq,
+    response: { status: 500, fixture },
+  });
+  writeErrorResponse(
+    res,
+    500,
+    JSON.stringify({
+      error: {
+        message: "Fixture response did not match any known type",
+        type: "server_error",
+      },
+    }),
+  );
+}
+
+// ─── Request handler: /api/generate ──────────────────────────────────────────
+
+export async function handleOllamaGenerate(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: HandlerDefaults,
+  setCorsHeaders: (res: http.ServerResponse) => void,
+): Promise<void> {
+  setCorsHeaders(res);
+
+  const urlPath = req.url ?? "/api/generate";
+
+  let generateReq: OllamaGenerateRequest;
+  try {
+    generateReq = JSON.parse(raw) as OllamaGenerateRequest;
+  } catch {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: null,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Malformed JSON",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  if (!generateReq.prompt || typeof generateReq.prompt !== "string") {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: null,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Invalid request: prompt field is required",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  // Convert to ChatCompletionRequest for fixture matching
+  const completionReq = ollamaGenerateToCompletionRequest(generateReq);
+
+  const fixture = matchFixture(
+    fixtures,
+    completionReq,
+    journal.fixtureMatchCounts,
+    defaults.requestTransform,
+  );
+
+  if (fixture) {
+    journal.incrementFixtureMatchCount(fixture, fixtures);
+  }
+
+  if (
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      {
+        method: req.method ?? "POST",
+        path: urlPath,
+        headers: flattenHeaders(req.headers),
+        body: completionReq,
+      },
+      defaults.registry,
+      defaults.logger,
+    )
+  )
+    return;
+
+  if (!fixture) {
+    if (defaults.record) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        completionReq,
+        "ollama",
+        urlPath,
+        fixtures,
+        defaults,
+        raw,
+      );
+      if (proxied) {
+        journal.add({
+          method: req.method ?? "POST",
+          path: urlPath,
+          headers: flattenHeaders(req.headers),
+          body: completionReq,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    if (defaults.strict) {
+      defaults.logger.error(`STRICT: No fixture matched for ${req.method ?? "POST"} ${urlPath}`);
+    }
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: strictStatus, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      strictStatus,
+      JSON.stringify({
+        error: {
+          message: strictMessage,
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  const response = fixture.response;
+  const latency = fixture.latency ?? defaults.latency;
+  const chunkSize = Math.max(1, fixture.chunkSize ?? defaults.chunkSize);
+
+  // Ollama defaults to streaming when stream is absent or true
+  const streaming = generateReq.stream !== false;
+
+  // Error response
+  if (isErrorResponse(response)) {
+    const status = response.status ?? 500;
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status, fixture },
+    });
+    writeErrorResponse(res, status, JSON.stringify(response));
+    return;
+  }
+
+  // Text response (only type supported for /api/generate)
+  if (isTextResponse(response)) {
+    const journalEntry = journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    if (!streaming) {
+      const body = buildOllamaGenerateTextResponse(response.content, completionReq.model);
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify(body));
+    } else {
+      const chunks = buildOllamaGenerateTextChunks(
+        response.content,
+        completionReq.model,
+        chunkSize,
+      );
+      const interruption = createInterruptionSignal(fixture);
+      const completed = await writeNDJSONStream(res, chunks, {
+        latency,
+        streamingProfile: fixture.streamingProfile,
+        signal: interruption?.signal,
+        onChunkSent: interruption?.tick,
+      });
+      if (!completed) {
+        if (!res.writableEnded) res.destroy();
+        journalEntry.response.interrupted = true;
+        journalEntry.response.interruptReason = interruption?.reason();
+      }
+      interruption?.cleanup();
+    }
+    return;
+  }
+
+  // Tool call responses not supported for /api/generate — fall through to error
+  journal.add({
+    method: req.method ?? "POST",
+    path: urlPath,
+    headers: flattenHeaders(req.headers),
+    body: completionReq,
+    response: { status: 500, fixture },
+  });
+  writeErrorResponse(
+    res,
+    500,
+    JSON.stringify({
+      error: {
+        message: "Fixture response did not match any known type",
+        type: "server_error",
+      },
+    }),
+  );
+}
diff --git a/src/recorder.ts b/src/recorder.ts
new file mode 100644
index 0000000..55bfe40
--- /dev/null
+++ b/src/recorder.ts
@@ -0,0 +1,471 @@
+import * as http from "node:http";
+import * as https from "node:https";
+import * as fs from "node:fs";
+import * as path from "node:path";
+import * as crypto from "node:crypto";
+import type {
+  ChatCompletionRequest,
+  Fixture,
+  FixtureResponse,
+  RecordConfig,
+  RecordProviderKey,
+  ToolCall,
+} from "./types.js";
+import { getLastMessageByRole, getTextContent } from "./router.js";
+import type { Logger } from "./logger.js";
+import { collapseStreamingResponse } from "./stream-collapse.js";
+import { writeErrorResponse } from "./sse-writer.js";
+
+/**
+ * Proxy an unmatched request to the real upstream provider, record the
+ * response as a fixture on disk and in memory, then relay the response
+ * back to the original client.
+ *
+ * Returns `true` if the request was proxied (provider configured),
+ * `false` if no upstream URL is configured for the given provider key.
+ */
+export async function proxyAndRecord(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  request: ChatCompletionRequest,
+  providerKey: RecordProviderKey,
+  pathname: string,
+  fixtures: Fixture[],
+  defaults: {
+    record?: RecordConfig;
+    logger: Logger;
+    requestTransform?: (req: ChatCompletionRequest) => ChatCompletionRequest;
+  },
+  rawBody?: string,
+): Promise<boolean> {
+  const record = defaults.record;
+  if (!record) return false;
+
+  const providers = record.providers;
+  const upstreamUrl = providers[providerKey];
+
+  if (!upstreamUrl) {
+    defaults.logger.warn(`No upstream URL configured for provider "${providerKey}" — cannot proxy`);
+    return false;
+  }
+
+  const fixturePath = record.fixturePath ?? "./fixtures/recorded";
+  let target: URL;
+  try {
+    target = new URL(pathname, upstreamUrl);
+  } catch {
+    defaults.logger.error(`Invalid upstream URL for provider "${providerKey}": ${upstreamUrl}`);
+    writeErrorResponse(
+      res,
+      502,
+      JSON.stringify({
+        error: { message: `Invalid upstream URL: ${upstreamUrl}`, type: "proxy_error" },
+      }),
+    );
+    return true;
+  }
+
+  defaults.logger.warn(`NO FIXTURE MATCH — proxying to ${upstreamUrl}${pathname}`);
+
+  // Forward only safe headers — auth and content negotiation
+  const forwardHeaders: Record<string, string> = {};
+  const headersToForward = ["authorization", "x-api-key", "api-key", "content-type", "accept"];
+  for (const name of headersToForward) {
+    const val = req.headers[name];
+    if (val !== undefined) {
+      forwardHeaders[name] = Array.isArray(val) ? val.join(", ") : val;
+    }
+  }
+
+  const requestBody = rawBody ?? JSON.stringify(request);
+
+  // Make upstream request
+  let upstreamStatus: number;
+  let upstreamHeaders: http.IncomingHttpHeaders;
+  let upstreamBody: string;
+  let rawBuffer: Buffer;
+
+  try {
+    const result = await makeUpstreamRequest(target, forwardHeaders, requestBody);
+    upstreamStatus = result.status;
+    upstreamHeaders = result.headers;
+    upstreamBody = result.body;
+    rawBuffer = result.rawBuffer;
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : "Unknown proxy error";
+    defaults.logger.error(`Proxy request failed: ${msg}`);
+    res.writeHead(502, { "Content-Type": "application/json" });
+    res.end(
+      JSON.stringify({
+        error: { message: `Proxy to upstream failed: ${msg}`, type: "proxy_error" },
+      }),
+    );
+    return true;
+  }
+
+  // Detect streaming response and collapse if necessary
+  const contentType = upstreamHeaders["content-type"];
+  const ctString = Array.isArray(contentType) ? contentType.join(", ") : (contentType ?? "");
+  const isBinaryStream = ctString.toLowerCase().includes("application/vnd.amazon.eventstream");
+  const collapsed = collapseStreamingResponse(
+    ctString,
+    providerKey,
+    isBinaryStream ? rawBuffer : upstreamBody,
+    defaults.logger,
+  );
+
+  let fixtureResponse: FixtureResponse;
+
+  if (collapsed) {
+    // Streaming response — use collapsed result
+    defaults.logger.warn(`Streaming response detected (${ctString}) — collapsing to fixture`);
+    if (collapsed.truncated) {
+      defaults.logger.warn("Bedrock EventStream: CRC mismatch — response may be truncated");
+    }
+    if (collapsed.droppedChunks && collapsed.droppedChunks > 0) {
+      defaults.logger.warn(`${collapsed.droppedChunks} chunk(s) dropped during stream collapse`);
+    }
+    if (collapsed.content === "" && (!collapsed.toolCalls || collapsed.toolCalls.length === 0)) {
+      defaults.logger.warn("Stream collapse produced empty content — fixture may be incomplete");
+    }
+    if (collapsed.toolCalls && collapsed.toolCalls.length > 0) {
+      if (collapsed.content) {
+        defaults.logger.warn(
+          "Collapsed response has both content and toolCalls — preferring toolCalls",
+        );
+      }
+      fixtureResponse = { toolCalls: collapsed.toolCalls };
+    } else {
+      fixtureResponse = { content: collapsed.content ?? "" };
+    }
+  } else {
+    // Non-streaming — try to parse as JSON
+    let parsedResponse: unknown = null;
+    try {
+      parsedResponse = JSON.parse(upstreamBody);
+    } catch {
+      // Not JSON — could be an unknown format
+      defaults.logger.warn("Upstream response is not valid JSON — saving as error fixture");
+    }
+    fixtureResponse = buildFixtureResponse(parsedResponse, upstreamStatus);
+  }
+
+  // Build the match criteria from the (possibly transformed) request
+  const normalizedReq = defaults.requestTransform ? defaults.requestTransform(request) : request;
+  const fixtureMatch = buildFixtureMatch(normalizedReq);
+
+  // Build and save the fixture
+  const fixture: Fixture = { match: fixtureMatch, response: fixtureResponse };
+
+  // Check if the match is empty (all undefined values) — warn but still save to disk
+  const matchValues = Object.values(fixtureMatch);
+  const isEmptyMatch = matchValues.length === 0 || matchValues.every((v) => v === undefined);
+  if (isEmptyMatch) {
+    defaults.logger.warn(
+      "Recorded fixture has empty match criteria — skipping in-memory registration",
+    );
+  }
+
+  const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
+  const filename = `${providerKey}-${timestamp}-${crypto.randomUUID().slice(0, 8)}.json`;
+  const filepath = path.join(fixturePath, filename);
+
+  let writtenToDisk = false;
+  try {
+    // Ensure fixture directory exists
+    fs.mkdirSync(fixturePath, { recursive: true });
+
+    // Collect warnings for the fixture file
+    const warnings: string[] = [];
+    if (isEmptyMatch) {
+      warnings.push("Empty match criteria — this fixture will not match any request");
+    }
+    if (collapsed?.truncated) {
+      warnings.push("Stream response was truncated — fixture may be incomplete");
+    }
+
+    // Auth headers are forwarded to upstream but excluded from saved fixtures for security
+    const fileContent: Record<string, unknown> = { fixtures: [fixture] };
+    if (warnings.length > 0) {
+      fileContent._warning = warnings.join("; ");
+    }
+    fs.writeFileSync(filepath, JSON.stringify(fileContent, null, 2), "utf-8");
+    writtenToDisk = true;
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : "Unknown filesystem error";
+    defaults.logger.error(`Failed to save fixture to disk: ${msg}`);
+    res.setHeader("X-LLMock-Record-Error", msg);
+  }
+
+  if (writtenToDisk) {
+    // Register in memory so subsequent identical requests match (skip if empty match)
+    if (!isEmptyMatch) {
+      fixtures.push(fixture);
+    }
+    defaults.logger.warn(`Response recorded → ${filepath}`);
+  } else {
+    defaults.logger.warn(`Response relayed but NOT saved to disk — see error above`);
+  }
+
+  // Relay upstream response to client
+  const relayHeaders: Record<string, string> = {};
+  if (ctString) {
+    relayHeaders["Content-Type"] = ctString;
+  }
+  res.writeHead(upstreamStatus, relayHeaders);
+  res.end(isBinaryStream ? rawBuffer : upstreamBody);
+
+  return true;
+}
+
+// ---------------------------------------------------------------------------
+// Internal helpers
+// ---------------------------------------------------------------------------
+
+function makeUpstreamRequest(
+  target: URL,
+  headers: Record<string, string>,
+  body: string,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string; rawBuffer: Buffer }> {
+  return new Promise((resolve, reject) => {
+    const transport = target.protocol === "https:" ? https : http;
+    const UPSTREAM_TIMEOUT_MS = 30_000;
+    const BODY_TIMEOUT_MS = 30_000;
+    const req = transport.request(
+      target,
+      {
+        method: "POST",
+        timeout: UPSTREAM_TIMEOUT_MS,
+        headers: {
+          ...headers,
+          "Content-Length": Buffer.byteLength(body).toString(),
+        },
+      },
+      (res) => {
+        res.setTimeout(BODY_TIMEOUT_MS, () => {
+          req.destroy(new Error(`Upstream response timed out after ${BODY_TIMEOUT_MS / 1000}s`));
+        });
+        const chunks: Buffer[] = [];
+        res.on("data", (chunk: Buffer) => chunks.push(chunk));
+        res.on("error", reject);
+        res.on("end", () => {
+          const rawBuffer = Buffer.concat(chunks);
+          resolve({
+            status: res.statusCode ?? 500,
+            headers: res.headers,
+            body: rawBuffer.toString(),
+            rawBuffer,
+          });
+        });
+      },
+    );
+    req.on("timeout", () => {
+      req.destroy(
+        new Error(
+          `Upstream request timed out after ${UPSTREAM_TIMEOUT_MS / 1000}s: ${target.href}`,
+        ),
+      );
+    });
+    req.on("error", reject);
+    req.write(body);
+    req.end();
+  });
+}
+
+/**
+ * Detect the response format from the parsed upstream JSON and convert
+ * it into an llmock FixtureResponse.
+ */
+function buildFixtureResponse(parsed: unknown, status: number): FixtureResponse {
+  if (parsed === null || parsed === undefined) {
+    // Raw / unparseable response — save as error
+    return {
+      error: { message: "Upstream returned non-JSON response", type: "proxy_error" },
+      status,
+    };
+  }
+
+  const obj = parsed as Record<string, unknown>;
+
+  // Error response
+  if (obj.error) {
+    const err = obj.error as Record<string, unknown>;
+    return {
+      error: {
+        message: String(err.message ?? "Unknown error"),
+        type: String(err.type ?? "api_error"),
+        code: err.code ? String(err.code) : undefined,
+      },
+      status,
+    };
+  }
+
+  // OpenAI embeddings: { data: [{ embedding: [...] }] }
+  if (Array.isArray(obj.data) && obj.data.length > 0) {
+    const first = obj.data[0] as Record<string, unknown>;
+    if (Array.isArray(first.embedding)) {
+      return { embedding: first.embedding as number[] };
+    }
+  }
+
+  // Direct embedding: { embedding: [...] }
+  if (Array.isArray(obj.embedding)) {
+    return { embedding: obj.embedding as number[] };
+  }
+
+  // OpenAI chat completion: { choices: [{ message: { content, tool_calls } }] }
+  if (Array.isArray(obj.choices) && obj.choices.length > 0) {
+    const choice = obj.choices[0] as Record<string, unknown>;
+    const message = choice.message as Record<string, unknown> | undefined;
+    if (message) {
+      // Tool calls
+      if (Array.isArray(message.tool_calls) && message.tool_calls.length > 0) {
+        const toolCalls: ToolCall[] = (message.tool_calls as Array<Record<string, unknown>>).map(
+          (tc) => {
+            const fn = tc.function as Record<string, unknown>;
+            return {
+              name: String(fn.name),
+              arguments: String(fn.arguments),
+            };
+          },
+        );
+        return { toolCalls };
+      }
+      // Text content
+      if (typeof message.content === "string") {
+        return { content: message.content };
+      }
+    }
+  }
+
+  // Anthropic: { content: [{ type: "text", text: "..." }] } or tool_use
+  if (Array.isArray(obj.content) && obj.content.length > 0) {
+    const blocks = obj.content as Array<Record<string, unknown>>;
+    // Check for tool_use blocks first
+    const toolUseBlocks = blocks.filter((b) => b.type === "tool_use");
+    if (toolUseBlocks.length > 0) {
+      const toolCalls: ToolCall[] = toolUseBlocks.map((b) => ({
+        name: String(b.name),
+        arguments: typeof b.input === "string" ? b.input : JSON.stringify(b.input),
+      }));
+      return { toolCalls };
+    }
+    // Text blocks
+    const textBlock = blocks.find((b) => b.type === "text");
+    if (textBlock && typeof textBlock.text === "string") {
+      return { content: textBlock.text };
+    }
+  }
+
+  // Gemini: { candidates: [{ content: { parts: [{ text: "..." }] } }] }
+  if (Array.isArray(obj.candidates) && obj.candidates.length > 0) {
+    const candidate = obj.candidates[0] as Record<string, unknown>;
+    const content = candidate.content as Record<string, unknown> | undefined;
+    if (content && Array.isArray(content.parts)) {
+      const parts = content.parts as Array<Record<string, unknown>>;
+      // Tool calls (functionCall)
+      const fnCallParts = parts.filter((p) => p.functionCall);
+      if (fnCallParts.length > 0) {
+        const toolCalls: ToolCall[] = fnCallParts.map((p) => {
+          const fc = p.functionCall as Record<string, unknown>;
+          return {
+            name: String(fc.name),
+            arguments: typeof fc.args === "string" ? fc.args : JSON.stringify(fc.args),
+          };
+        });
+        return { toolCalls };
+      }
+      // Text
+      const textPart = parts.find((p) => typeof p.text === "string");
+      if (textPart && typeof textPart.text === "string") {
+        return { content: textPart.text };
+      }
+    }
+  }
+
+  // Bedrock Converse: { output: { message: { role, content: [{ text }, { toolUse }] } } }
+  if (obj.output && typeof obj.output === "object") {
+    const output = obj.output as Record<string, unknown>;
+    const msg = output.message as Record<string, unknown> | undefined;
+    if (msg && Array.isArray(msg.content)) {
+      const blocks = msg.content as Array<Record<string, unknown>>;
+      const toolUseBlocks = blocks.filter((b) => b.toolUse);
+      if (toolUseBlocks.length > 0) {
+        const toolCalls: ToolCall[] = toolUseBlocks.map((b) => {
+          const tu = b.toolUse as Record<string, unknown>;
+          return {
+            name: String(tu.name ?? ""),
+            arguments: typeof tu.input === "string" ? tu.input : JSON.stringify(tu.input),
+          };
+        });
+        return { toolCalls };
+      }
+      const textBlock = blocks.find((b) => typeof b.text === "string");
+      if (textBlock && typeof textBlock.text === "string") {
+        return { content: textBlock.text };
+      }
+    }
+  }
+
+  // Ollama: { message: { content: "...", tool_calls: [...] } }
+  if (obj.message && typeof obj.message === "object") {
+    const msg = obj.message as Record<string, unknown>;
+    // Tool calls (check before content — Ollama sends content: "" alongside tool_calls)
+    if (Array.isArray(msg.tool_calls) && msg.tool_calls.length > 0) {
+      const toolCalls: ToolCall[] = (msg.tool_calls as Array<Record<string, unknown>>)
+        .filter((tc) => tc.function != null)
+        .map((tc) => {
+          const fn = tc.function as Record<string, unknown>;
+          return {
+            name: String(fn.name ?? ""),
+            arguments:
+              typeof fn.arguments === "string" ? fn.arguments : JSON.stringify(fn.arguments),
+          };
+        });
+      return { toolCalls };
+    }
+    if (typeof msg.content === "string" && msg.content.length > 0) {
+      return { content: msg.content };
+    }
+    // Ollama message with content array (like Cohere)
+    if (Array.isArray(msg.content) && msg.content.length > 0) {
+      const first = msg.content[0] as Record<string, unknown>;
+      if (typeof first.text === "string") {
+        return { content: first.text };
+      }
+    }
+  }
+
+  // Fallback: unknown format — save as error
+  return {
+    error: {
+      message: "Could not detect response format from upstream",
+      type: "proxy_error",
+    },
+    status,
+  };
+}
+
+/**
+ * Derive fixture match criteria from the original request.
+ */
+function buildFixtureMatch(request: ChatCompletionRequest): {
+  userMessage?: string;
+  inputText?: string;
+} {
+  // Embedding request
+  if (request.embeddingInput) {
+    return { inputText: request.embeddingInput };
+  }
+
+  // Chat request — match on the last user message
+  const lastUser = getLastMessageByRole(request.messages ?? [], "user");
+  if (lastUser) {
+    const text = getTextContent(lastUser.content);
+    if (text) {
+      return { userMessage: text };
+    }
+  }
+
+  return {};
+}
diff --git a/src/responses.ts b/src/responses.ts
index 17952af..ab83208 100644
--- a/src/responses.ts
+++ b/src/responses.ts
@@ -1,5 +1,5 @@
 /**
- * OpenAI Responses API support for MockOpenAI.
+ * OpenAI Responses API support for LLMock.
  *
  * Translates incoming /v1/responses requests into the ChatCompletionRequest
  * format used by the fixture router, and converts fixture responses back into
@@ -11,6 +11,8 @@ import type {
   ChatCompletionRequest,
   ChatMessage,
   Fixture,
+  HandlerDefaults,
+  StreamingProfile,
   ToolCall,
   ToolDefinition,
 } from "./types.js";
@@ -20,10 +22,14 @@ import {
   isTextResponse,
   isToolCallResponse,
   isErrorResponse,
+  flattenHeaders,
 } from "./helpers.js";
 import { matchFixture } from "./router.js";
-import { writeErrorResponse } from "./sse-writer.js";
+import { writeErrorResponse, delay, calculateDelay } from "./sse-writer.js";
+import { createInterruptionSignal } from "./interruption.js";
 import type { Journal } from "./journal.js";
+import { applyChaos } from "./chaos.js";
+import { proxyAndRecord } from "./recorder.js";
 
 // ─── Responses API request types ────────────────────────────────────────────
 
@@ -150,12 +156,12 @@ function itemId(): string {
 
 // Streaming events for Responses API
 
-interface ResponsesSSEEvent {
+export interface ResponsesSSEEvent {
   type: string;
   [key: string]: unknown;
 }
 
-function buildTextStreamEvents(
+export function buildTextStreamEvents(
   content: string,
   model: string,
   chunkSize: number,
@@ -282,7 +288,7 @@ function buildTextStreamEvents(
   return events;
 }
 
-function buildToolCallStreamEvents(
+export function buildToolCallStreamEvents(
   toolCalls: ToolCall[],
   model: string,
   chunkSize: number,
@@ -442,29 +448,46 @@ function buildToolCallResponse(toolCalls: ToolCall[], model: string): object {
 
 // ─── SSE writer for Responses API ───────────────────────────────────────────
 
-function delay(ms: number): Promise<void> {
-  return new Promise((resolve) => setTimeout(resolve, ms));
+interface ResponsesStreamOptions {
+  latency?: number;
+  streamingProfile?: StreamingProfile;
+  signal?: AbortSignal;
+  onChunkSent?: () => void;
 }
 
 async function writeResponsesSSEStream(
   res: http.ServerResponse,
   events: ResponsesSSEEvent[],
-  latency = 0,
-): Promise<void> {
-  if (res.writableEnded) return;
+  optionsOrLatency?: number | ResponsesStreamOptions,
+): Promise<boolean> {
+  const opts: ResponsesStreamOptions =
+    typeof optionsOrLatency === "number" ? { latency: optionsOrLatency } : (optionsOrLatency ?? {});
+  const latency = opts.latency ?? 0;
+  const profile = opts.streamingProfile;
+  const signal = opts.signal;
+  const onChunkSent = opts.onChunkSent;
+
+  if (res.writableEnded) return true;
   res.setHeader("Content-Type", "text/event-stream");
   res.setHeader("Cache-Control", "no-cache");
   res.setHeader("Connection", "keep-alive");
 
+  let chunkIndex = 0;
   for (const event of events) {
-    if (latency > 0) await delay(latency);
-    if (res.writableEnded) return;
+    const chunkDelay = calculateDelay(chunkIndex, profile, latency);
+    if (chunkDelay > 0) await delay(chunkDelay, signal);
+    if (signal?.aborted) return false;
+    if (res.writableEnded) return true;
     res.write(`event: ${event.type}\ndata: ${JSON.stringify(event)}\n\n`);
+    onChunkSent?.();
+    if (signal?.aborted) return false;
+    chunkIndex++;
   }
 
   if (!res.writableEnded) {
     res.end();
   }
+  return true;
 }
 
 // ─── Request handler ────────────────────────────────────────────────────────
@@ -475,7 +498,7 @@ export async function handleResponses(
   raw: string,
   fixtures: Fixture[],
   journal: Journal,
-  defaults: { latency: number; chunkSize: number },
+  defaults: HandlerDefaults,
   setCorsHeaders: (res: http.ServerResponse) => void,
 ): Promise<void> {
   setCorsHeaders(res);
@@ -484,6 +507,13 @@ export async function handleResponses(
   try {
     responsesReq = JSON.parse(raw) as ResponsesRequest;
   } catch {
+    journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v1/responses",
+      headers: flattenHeaders(req.headers),
+      body: null,
+      response: { status: 400, fixture: null },
+    });
     writeErrorResponse(
       res,
       400,
@@ -497,22 +527,81 @@ export async function handleResponses(
   // Convert to ChatCompletionRequest for fixture matching
   const completionReq = responsesToCompletionRequest(responsesReq);
 
-  const fixture = matchFixture(fixtures, completionReq);
+  const fixture = matchFixture(
+    fixtures,
+    completionReq,
+    journal.fixtureMatchCounts,
+    defaults.requestTransform,
+  );
+
+  if (fixture) {
+    journal.incrementFixtureMatchCount(fixture, fixtures);
+  }
+
+  if (
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      {
+        method: req.method ?? "POST",
+        path: req.url ?? "/v1/responses",
+        headers: flattenHeaders(req.headers),
+        body: completionReq,
+      },
+      defaults.registry,
+      defaults.logger,
+    )
+  )
+    return;
 
   if (!fixture) {
+    if (defaults.record) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        completionReq,
+        "openai",
+        req.url ?? "/v1/responses",
+        fixtures,
+        defaults,
+        raw,
+      );
+      if (proxied) {
+        journal.add({
+          method: req.method ?? "POST",
+          path: req.url ?? "/v1/responses",
+          headers: flattenHeaders(req.headers),
+          body: completionReq,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    if (defaults.strict) {
+      defaults.logger.error(
+        `STRICT: No fixture matched for ${req.method ?? "POST"} ${req.url ?? "/v1/responses"}`,
+      );
+    }
     journal.add({
       method: req.method ?? "POST",
       path: req.url ?? "/v1/responses",
-      headers: {},
+      headers: flattenHeaders(req.headers),
       body: completionReq,
-      response: { status: 404, fixture: null },
+      response: { status: strictStatus, fixture: null },
     });
     writeErrorResponse(
       res,
-      404,
+      strictStatus,
       JSON.stringify({
         error: {
-          message: "No fixture matched",
+          message: strictMessage,
           type: "invalid_request_error",
           code: "no_fixture_match",
         },
@@ -531,7 +620,7 @@ export async function handleResponses(
     journal.add({
       method: req.method ?? "POST",
       path: req.url ?? "/v1/responses",
-      headers: {},
+      headers: flattenHeaders(req.headers),
       body: completionReq,
       response: { status, fixture },
     });
@@ -541,40 +630,64 @@ export async function handleResponses(
 
   // Text response
   if (isTextResponse(response)) {
-    journal.add({
+    const journalEntry = journal.add({
       method: req.method ?? "POST",
       path: req.url ?? "/v1/responses",
-      headers: {},
+      headers: flattenHeaders(req.headers),
       body: completionReq,
       response: { status: 200, fixture },
     });
-    if (responsesReq.stream === false) {
+    if (responsesReq.stream !== true) {
       const body = buildTextResponse(response.content, completionReq.model);
       res.writeHead(200, { "Content-Type": "application/json" });
       res.end(JSON.stringify(body));
     } else {
       const events = buildTextStreamEvents(response.content, completionReq.model, chunkSize);
-      await writeResponsesSSEStream(res, events, latency);
+      const interruption = createInterruptionSignal(fixture);
+      const completed = await writeResponsesSSEStream(res, events, {
+        latency,
+        streamingProfile: fixture.streamingProfile,
+        signal: interruption?.signal,
+        onChunkSent: interruption?.tick,
+      });
+      if (!completed) {
+        if (!res.writableEnded) res.destroy();
+        journalEntry.response.interrupted = true;
+        journalEntry.response.interruptReason = interruption?.reason();
+      }
+      interruption?.cleanup();
     }
     return;
   }
 
   // Tool call response
   if (isToolCallResponse(response)) {
-    journal.add({
+    const journalEntry = journal.add({
       method: req.method ?? "POST",
       path: req.url ?? "/v1/responses",
-      headers: {},
+      headers: flattenHeaders(req.headers),
       body: completionReq,
       response: { status: 200, fixture },
     });
-    if (responsesReq.stream === false) {
+    if (responsesReq.stream !== true) {
       const body = buildToolCallResponse(response.toolCalls, completionReq.model);
       res.writeHead(200, { "Content-Type": "application/json" });
       res.end(JSON.stringify(body));
     } else {
       const events = buildToolCallStreamEvents(response.toolCalls, completionReq.model, chunkSize);
-      await writeResponsesSSEStream(res, events, latency);
+      const interruption = createInterruptionSignal(fixture);
+      const completed = await writeResponsesSSEStream(res, events, {
+        latency,
+        streamingProfile: fixture.streamingProfile,
+        signal: interruption?.signal,
+        onChunkSent: interruption?.tick,
+      });
+      if (!completed) {
+        if (!res.writableEnded) res.destroy();
+        journalEntry.response.interrupted = true;
+        journalEntry.response.interruptReason = interruption?.reason();
+      }
+      interruption?.cleanup();
     }
     return;
   }
@@ -583,7 +696,7 @@ export async function handleResponses(
   journal.add({
     method: req.method ?? "POST",
     path: req.url ?? "/v1/responses",
-    headers: {},
+    headers: flattenHeaders(req.headers),
     body: completionReq,
     response: { status: 500, fixture },
   });
diff --git a/src/router.ts b/src/router.ts
index c24dd20..4c42730 100644
--- a/src/router.ts
+++ b/src/router.ts
@@ -1,4 +1,4 @@
-import type { ChatCompletionRequest, ChatMessage, Fixture } from "./types.js";
+import type { ChatCompletionRequest, ChatMessage, ContentPart, Fixture } from "./types.js";
 
 export function getLastMessageByRole(messages: ChatMessage[], role: string): ChatMessage | null {
   for (let i = messages.length - 1; i >= 0; i--) {
@@ -7,48 +7,101 @@ export function getLastMessageByRole(messages: ChatMessage[], role: string): Cha
   return null;
 }
 
-export function matchFixture(fixtures: Fixture[], req: ChatCompletionRequest): Fixture | null {
+/**
+ * Extract the text content from a message's content field.
+ * Handles both plain string content and array-of-parts content
+ * (e.g. `[{type: "text", text: "..."}]` as sent by some SDKs).
+ */
+export function getTextContent(content: string | ContentPart[] | null): string | null {
+  if (typeof content === "string") return content;
+  if (Array.isArray(content)) {
+    const texts = content
+      .filter((p) => p.type === "text" && typeof p.text === "string" && p.text !== "")
+      .map((p) => p.text as string);
+    return texts.length > 0 ? texts.join("") : null;
+  }
+  return null;
+}
+
+export function matchFixture(
+  fixtures: Fixture[],
+  req: ChatCompletionRequest,
+  matchCounts?: Map<Fixture, number>,
+  requestTransform?: (req: ChatCompletionRequest) => ChatCompletionRequest,
+): Fixture | null {
+  const effectiveReq = requestTransform ? requestTransform(req) : req;
+
   for (const fixture of fixtures) {
     const { match } = fixture;
 
-    // predicate — if present, must return true
+    // predicate — if present, must return true (always receives original request)
     if (match.predicate !== undefined) {
       if (!match.predicate(req)) continue;
     }
 
     // userMessage — match against the last user message content
     if (match.userMessage !== undefined) {
-      const msg = getLastMessageByRole(req.messages, "user");
-      if (!msg || typeof msg.content !== "string") continue;
+      const msg = getLastMessageByRole(effectiveReq.messages, "user");
+      const text = msg ? getTextContent(msg.content) : null;
+      if (!text) continue;
       if (typeof match.userMessage === "string") {
-        if (!msg.content.includes(match.userMessage)) continue;
+        if (requestTransform ? text !== match.userMessage : !text.includes(match.userMessage))
+          continue;
       } else {
-        if (!match.userMessage.test(msg.content)) continue;
+        if (!match.userMessage.test(text)) continue;
       }
     }
 
     // toolCallId — match against the last tool message's tool_call_id
     if (match.toolCallId !== undefined) {
-      const msg = getLastMessageByRole(req.messages, "tool");
+      const msg = getLastMessageByRole(effectiveReq.messages, "tool");
       if (!msg || msg.tool_call_id !== match.toolCallId) continue;
     }
 
     // toolName — match against any tool definition by function.name
     if (match.toolName !== undefined) {
-      const tools = req.tools ?? [];
+      const tools = effectiveReq.tools ?? [];
       const found = tools.some((t) => t.function.name === match.toolName);
       if (!found) continue;
     }
 
+    // inputText — match against the embedding input text (used by embeddings endpoint)
+    if (match.inputText !== undefined) {
+      const embeddingInput = effectiveReq.embeddingInput;
+      if (!embeddingInput) continue;
+      if (typeof match.inputText === "string") {
+        if (
+          requestTransform
+            ? embeddingInput !== match.inputText
+            : !embeddingInput.includes(match.inputText)
+        )
+          continue;
+      } else {
+        if (!match.inputText.test(embeddingInput)) continue;
+      }
+    }
+
+    // responseFormat — exact string match against request response_format.type
+    if (match.responseFormat !== undefined) {
+      const reqType = effectiveReq.response_format?.type;
+      if (reqType !== match.responseFormat) continue;
+    }
+
     // model — exact string or regexp
     if (match.model !== undefined) {
       if (typeof match.model === "string") {
-        if (req.model !== match.model) continue;
+        if (effectiveReq.model !== match.model) continue;
       } else {
-        if (!match.model.test(req.model)) continue;
+        if (!match.model.test(effectiveReq.model)) continue;
       }
     }
 
+    // sequenceIndex — check against the fixture's match count
+    if (match.sequenceIndex !== undefined && matchCounts !== undefined) {
+      const count = matchCounts.get(fixture) ?? 0;
+      if (count !== match.sequenceIndex) continue;
+    }
+
     return fixture;
   }
 
diff --git a/src/server.ts b/src/server.ts
index 93b5c61..6485f31 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -1,8 +1,15 @@
 import * as http from "node:http";
-import type { Fixture, ChatCompletionRequest, MockServerOptions } from "./types.js";
+import type {
+  Fixture,
+  ChatCompletionRequest,
+  HandlerDefaults,
+  MockServerOptions,
+  RecordProviderKey,
+} from "./types.js";
 import { Journal } from "./journal.js";
 import { matchFixture } from "./router.js";
 import { writeSSEStream, writeErrorResponse } from "./sse-writer.js";
+import { createInterruptionSignal } from "./interruption.js";
 import {
   buildTextChunks,
   buildToolCallChunks,
@@ -11,21 +18,68 @@ import {
   isTextResponse,
   isToolCallResponse,
   isErrorResponse,
+  flattenHeaders,
 } from "./helpers.js";
 import { handleResponses } from "./responses.js";
+import { handleMessages } from "./messages.js";
+import { handleGemini } from "./gemini.js";
+import { handleBedrock, handleBedrockStream } from "./bedrock.js";
+import { handleConverse, handleConverseStream } from "./bedrock-converse.js";
+import { handleEmbeddings } from "./embeddings.js";
+import { handleOllama, handleOllamaGenerate } from "./ollama.js";
+import { handleCohere } from "./cohere.js";
+import { upgradeToWebSocket, type WebSocketConnection } from "./ws-framing.js";
+import { handleWebSocketResponses } from "./ws-responses.js";
+import { handleWebSocketRealtime } from "./ws-realtime.js";
+import { handleWebSocketGeminiLive } from "./ws-gemini-live.js";
+import { Logger } from "./logger.js";
+import { applyChaos } from "./chaos.js";
+import { createMetricsRegistry, normalizePathLabel } from "./metrics.js";
+import { proxyAndRecord } from "./recorder.js";
 
 export interface ServerInstance {
   server: http.Server;
   journal: Journal;
   url: string;
+  defaults: HandlerDefaults;
 }
 
 const COMPLETIONS_PATH = "/v1/chat/completions";
 const RESPONSES_PATH = "/v1/responses";
+const REALTIME_PATH = "/v1/realtime";
+const GEMINI_LIVE_PATH =
+  "/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent";
+const MESSAGES_PATH = "/v1/messages";
+const EMBEDDINGS_PATH = "/v1/embeddings";
+const COHERE_CHAT_PATH = "/v2/chat";
 const DEFAULT_CHUNK_SIZE = 20;
 
+const GEMINI_PATH_RE = /^\/v1beta\/models\/([^:]+):(generateContent|streamGenerateContent)$/;
+const AZURE_DEPLOYMENT_RE = /^\/openai\/deployments\/([^/]+)\/(chat\/completions|embeddings)$/;
+const BEDROCK_INVOKE_RE = /^\/model\/([^/]+)\/invoke$/;
+const BEDROCK_STREAM_RE = /^\/model\/([^/]+)\/invoke-with-response-stream$/;
+const BEDROCK_CONVERSE_RE = /^\/model\/([^/]+)\/converse$/;
+const BEDROCK_CONVERSE_STREAM_RE = /^\/model\/([^/]+)\/converse-stream$/;
+const VERTEX_AI_RE =
+  /^\/v1\/projects\/[^/]+\/locations\/[^/]+\/publishers\/google\/models\/([^/:]+):(generateContent|streamGenerateContent)$/;
+
+const OLLAMA_CHAT_PATH = "/api/chat";
+const OLLAMA_GENERATE_PATH = "/api/generate";
+const OLLAMA_TAGS_PATH = "/api/tags";
+
+const HEALTH_PATH = "/health";
+const READY_PATH = "/ready";
+const MODELS_PATH = "/v1/models";
 const REQUESTS_PATH = "/v1/_requests";
 
+const DEFAULT_MODELS = [
+  "gpt-4",
+  "gpt-4o",
+  "claude-3-5-sonnet-20241022",
+  "gemini-2.0-flash",
+  "text-embedding-3-small",
+];
+
 const CORS_HEADERS: Record<string, string> = {
   "Access-Control-Allow-Origin": "*",
   "Access-Control-Allow-Methods": "GET, POST, DELETE, OPTIONS",
@@ -60,7 +114,9 @@ async function handleCompletions(
   res: http.ServerResponse,
   fixtures: Fixture[],
   journal: Journal,
-  defaults: { latency: number; chunkSize: number },
+  defaults: HandlerDefaults,
+  modelFallback?: string,
+  providerKey?: RecordProviderKey,
 ): Promise<void> {
   setCorsHeaders(res);
 
@@ -74,7 +130,7 @@ async function handleCompletions(
       method: req.method ?? "POST",
       path: req.url ?? COMPLETIONS_PATH,
       headers: flattenHeaders(req.headers),
-      body: {} as ChatCompletionRequest,
+      body: null,
       response: { status: 500, fixture: null },
     });
     writeErrorResponse(
@@ -94,12 +150,16 @@ async function handleCompletions(
   let body: ChatCompletionRequest;
   try {
     body = JSON.parse(raw) as ChatCompletionRequest;
+    // Azure deployments may omit model from body — use deployment ID as fallback
+    if (modelFallback && !body.model) {
+      body.model = modelFallback;
+    }
   } catch {
     journal.add({
       method: req.method ?? "POST",
       path: req.url ?? COMPLETIONS_PATH,
       headers: flattenHeaders(req.headers),
-      body: {} as ChatCompletionRequest,
+      body: null,
       response: { status: 400, fixture: null },
     });
     writeErrorResponse(
@@ -117,22 +177,89 @@ async function handleCompletions(
   }
 
   // Match fixture
-  const fixture = matchFixture(fixtures, body);
+  const fixture = matchFixture(
+    fixtures,
+    body,
+    journal.fixtureMatchCounts,
+    defaults.requestTransform,
+  );
+
+  if (fixture) {
+    journal.incrementFixtureMatchCount(fixture, fixtures);
+  }
+
+  const method = req.method ?? "POST";
+  const path = req.url ?? COMPLETIONS_PATH;
+  const flatHeaders = flattenHeaders(req.headers);
+
+  // Apply chaos before normal response handling
+  if (
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      {
+        method,
+        path,
+        headers: flatHeaders,
+        body,
+      },
+      defaults.registry,
+      defaults.logger,
+    )
+  )
+    return;
 
   if (!fixture) {
+    // Try record-and-replay proxy if configured
+    if (defaults.record && providerKey) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        body,
+        providerKey,
+        req.url ?? COMPLETIONS_PATH,
+        fixtures,
+        defaults,
+        raw,
+      );
+      if (proxied) {
+        journal.add({
+          method: req.method ?? "POST",
+          path: req.url ?? COMPLETIONS_PATH,
+          headers: flattenHeaders(req.headers),
+          body,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    if (defaults.strict) {
+      defaults.logger.error(
+        `STRICT: No fixture matched for ${req.method ?? "POST"} ${req.url ?? COMPLETIONS_PATH}`,
+      );
+    }
+
     journal.add({
       method: req.method ?? "POST",
       path: req.url ?? COMPLETIONS_PATH,
       headers: flattenHeaders(req.headers),
       body,
-      response: { status: 404, fixture: null },
+      response: { status: strictStatus, fixture: null },
     });
     writeErrorResponse(
       res,
-      404,
+      strictStatus,
       JSON.stringify({
         error: {
-          message: "No fixture matched",
+          message: strictMessage,
           type: "invalid_request_error",
           code: "no_fixture_match",
         },
@@ -161,40 +288,64 @@ async function handleCompletions(
 
   // Text response
   if (isTextResponse(response)) {
-    journal.add({
+    const journalEntry = journal.add({
       method: req.method ?? "POST",
       path: req.url ?? COMPLETIONS_PATH,
       headers: flattenHeaders(req.headers),
       body,
       response: { status: 200, fixture },
     });
-    if (body.stream === false) {
+    if (body.stream !== true) {
       const completion = buildTextCompletion(response.content, body.model);
       res.writeHead(200, { "Content-Type": "application/json" });
       res.end(JSON.stringify(completion));
     } else {
       const chunks = buildTextChunks(response.content, body.model, chunkSize);
-      await writeSSEStream(res, chunks, latency);
+      const interruption = createInterruptionSignal(fixture);
+      const completed = await writeSSEStream(res, chunks, {
+        latency,
+        streamingProfile: fixture.streamingProfile,
+        signal: interruption?.signal,
+        onChunkSent: interruption?.tick,
+      });
+      if (!completed) {
+        if (!res.writableEnded) res.destroy();
+        journalEntry.response.interrupted = true;
+        journalEntry.response.interruptReason = interruption?.reason();
+      }
+      interruption?.cleanup();
     }
     return;
   }
 
   // Tool call response
   if (isToolCallResponse(response)) {
-    journal.add({
+    const journalEntry = journal.add({
       method: req.method ?? "POST",
       path: req.url ?? COMPLETIONS_PATH,
       headers: flattenHeaders(req.headers),
       body,
       response: { status: 200, fixture },
     });
-    if (body.stream === false) {
+    if (body.stream !== true) {
       const completion = buildToolCallCompletion(response.toolCalls, body.model);
       res.writeHead(200, { "Content-Type": "application/json" });
       res.end(JSON.stringify(completion));
     } else {
       const chunks = buildToolCallChunks(response.toolCalls, body.model, chunkSize);
-      await writeSSEStream(res, chunks, latency);
+      const interruption = createInterruptionSignal(fixture);
+      const completed = await writeSSEStream(res, chunks, {
+        latency,
+        streamingProfile: fixture.streamingProfile,
+        signal: interruption?.signal,
+        onChunkSent: interruption?.tick,
+      });
+      if (!completed) {
+        if (!res.writableEnded) res.destroy();
+        journalEntry.response.interrupted = true;
+        journalEntry.response.interruptReason = interruption?.reason();
+      }
+      interruption?.cleanup();
     }
     return;
   }
@@ -219,17 +370,8 @@ async function handleCompletions(
   );
 }
 
-function flattenHeaders(headers: http.IncomingHttpHeaders): Record<string, string> {
-  const flat: Record<string, string> = {};
-  for (const [key, value] of Object.entries(headers)) {
-    if (value === undefined) continue;
-    flat[key] = Array.isArray(value) ? value.join(", ") : value;
-  }
-  return flat;
-}
-
 // NOTE: The fixtures array is read by reference on each request. Callers
-// (e.g. MockOpenAI) may mutate it after the server starts and changes will
+// (e.g. LLMock) may mutate it after the server starts and changes will
 // be visible immediately. This is intentional — do not copy the array.
 export async function createServer(
   fixtures: Fixture[],
@@ -237,13 +379,49 @@ export async function createServer(
 ): Promise<ServerInstance> {
   const host = options?.host ?? "127.0.0.1";
   const port = options?.port ?? 0;
+  const logger = new Logger(options?.logLevel ?? "silent");
+  const registry = options?.metrics ? createMetricsRegistry() : undefined;
+  const serverOptions = options ?? {};
   const defaults = {
-    latency: options?.latency ?? 0,
-    chunkSize: Math.max(1, options?.chunkSize ?? DEFAULT_CHUNK_SIZE),
+    latency: serverOptions.latency ?? 0,
+    chunkSize: Math.max(1, serverOptions.chunkSize ?? DEFAULT_CHUNK_SIZE),
+    logger,
+    get chaos() {
+      return serverOptions.chaos;
+    },
+    registry,
+    get record() {
+      return serverOptions.record;
+    },
+    get strict() {
+      return serverOptions.strict;
+    },
+    get requestTransform() {
+      return serverOptions.requestTransform;
+    },
   };
 
+  // Validate chaos config rates
+  if (options?.chaos) {
+    const chaosRates = [
+      { name: "dropRate", value: options.chaos.dropRate },
+      { name: "malformedRate", value: options.chaos.malformedRate },
+      { name: "disconnectRate", value: options.chaos.disconnectRate },
+    ];
+    for (const { name, value } of chaosRates) {
+      if (value !== undefined && (value < 0 || value > 1)) {
+        logger.warn(`Chaos ${name} (${value}) is outside 0-1 range — will be clamped at runtime`);
+      }
+    }
+  }
+
   const journal = new Journal();
 
+  // Set initial fixtures-loaded gauge
+  if (registry) {
+    registry.setGauge("llmock_fixtures_loaded", {}, fixtures.length);
+  }
+
   const server = http.createServer((req: http.IncomingMessage, res: http.ServerResponse) => {
     // OPTIONS preflight
     if (req.method === "OPTIONS") {
@@ -251,9 +429,101 @@ export async function createServer(
       return;
     }
 
+    // Record start time for metrics
+    const startTime = registry ? process.hrtime.bigint() : 0n;
+
     // Parse the URL pathname (strip query string)
     const parsedUrl = new URL(req.url ?? "/", `http://${req.headers.host ?? "localhost"}`);
-    const pathname = parsedUrl.pathname;
+    let pathname = parsedUrl.pathname;
+
+    // Instrument response completion for metrics
+    if (registry) {
+      const rawPathname = pathname;
+      res.on("finish", () => {
+        try {
+          const normalizedPath = normalizePathLabel(rawPathname);
+          const method = req.method ?? "UNKNOWN";
+          const status = String(res.statusCode);
+          registry.incrementCounter("llmock_requests_total", {
+            method,
+            path: normalizedPath,
+            status,
+          });
+          const elapsed = Number(process.hrtime.bigint() - startTime) / 1e9;
+          registry.observeHistogram(
+            "llmock_request_duration_seconds",
+            { method, path: normalizedPath },
+            elapsed,
+          );
+        } catch (err) {
+          defaults.logger.warn("metrics instrumentation error", err);
+        }
+      });
+    }
+
+    // Azure OpenAI: /openai/deployments/{id}/{operation} → /v1/{operation} (chat/completions, embeddings)
+    // Must be checked BEFORE the generic /openai/ prefix strip
+    let azureDeploymentId: string | undefined;
+    const azureMatch = pathname.match(AZURE_DEPLOYMENT_RE);
+    if (azureMatch && req.method === "POST") {
+      azureDeploymentId = azureMatch[1];
+      const operation = azureMatch[2];
+      pathname = `/v1/${operation}`;
+    }
+
+    // Groq/OpenAI-compatible alias: strip /openai prefix so that
+    // /openai/v1/chat/completions → /v1/chat/completions, etc.
+    if (!azureDeploymentId && pathname.startsWith("/openai/")) {
+      pathname = pathname.slice(7); // remove "/openai" prefix, keep the rest
+    }
+
+    // Health / readiness probes
+    if (pathname === HEALTH_PATH && req.method === "GET") {
+      setCorsHeaders(res);
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({ status: "ok" }));
+      return;
+    }
+
+    if (pathname === READY_PATH && req.method === "GET") {
+      setCorsHeaders(res);
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({ status: "ready" }));
+      return;
+    }
+
+    // Prometheus metrics
+    if (pathname === "/metrics" && req.method === "GET") {
+      if (!registry) {
+        handleNotFound(res, "Not found");
+        return;
+      }
+      setCorsHeaders(res);
+      res.writeHead(200, { "Content-Type": "text/plain; version=0.0.4; charset=utf-8" });
+      res.end(registry.serialize());
+      return;
+    }
+
+    // Models listing
+    if (pathname === MODELS_PATH && req.method === "GET") {
+      setCorsHeaders(res);
+      const modelIds = new Set<string>();
+      for (const f of fixtures) {
+        if (f.match.model && typeof f.match.model === "string") {
+          modelIds.add(f.match.model);
+        }
+      }
+      const ids = modelIds.size > 0 ? [...modelIds] : DEFAULT_MODELS;
+      const data = ids.map((id) => ({
+        id,
+        object: "model" as const,
+        created: 1686935002,
+        owned_by: "llmock",
+      }));
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({ object: "list", data }));
+      return;
+    }
 
     // Journal inspection endpoints
     if (pathname === REQUESTS_PATH) {
@@ -308,8 +578,137 @@ export async function createServer(
           } else if (!res.writableEnded) {
             try {
               res.write(`event: error\ndata: ${JSON.stringify({ error: { message: msg } })}\n\n`);
+            } catch (writeErr) {
+              logger.debug("Failed to write error recovery response:", writeErr);
+            }
+            res.end();
+          }
+        });
+      return;
+    }
+
+    // POST /v1/messages — Anthropic Claude Messages API
+    if (pathname === MESSAGES_PATH && req.method === "POST") {
+      readBody(req)
+        .then((raw) => handleMessages(req, res, raw, fixtures, journal, defaults, setCorsHeaders))
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            try {
+              res.write(`event: error\ndata: ${JSON.stringify({ error: { message: msg } })}\n\n`);
+            } catch (writeErr) {
+              logger.debug("Failed to write error recovery response:", writeErr);
+            }
+            res.end();
+          }
+        });
+      return;
+    }
+
+    // POST /v2/chat — Cohere v2 Chat API
+    if (pathname === COHERE_CHAT_PATH && req.method === "POST") {
+      readBody(req)
+        .then((raw) => handleCohere(req, res, raw, fixtures, journal, defaults, setCorsHeaders))
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            try {
+              res.write(`event: error\ndata: ${JSON.stringify({ error: { message: msg } })}\n\n`);
+            } catch (writeErr) {
+              logger.debug("Failed to write error recovery response:", writeErr);
+            }
+            res.end();
+          }
+        });
+      return;
+    }
+
+    // POST /v1/embeddings — OpenAI Embeddings API
+    if (pathname === EMBEDDINGS_PATH && req.method === "POST") {
+      const deploymentId = azureDeploymentId;
+      readBody(req)
+        .then((raw) => {
+          // Azure deployments may omit model from body — use deployment ID as fallback
+          if (deploymentId) {
+            try {
+              const parsed = JSON.parse(raw) as Record<string, unknown>;
+              if (!parsed.model) {
+                parsed.model = deploymentId;
+                return handleEmbeddings(
+                  req,
+                  res,
+                  JSON.stringify(parsed),
+                  fixtures,
+                  journal,
+                  defaults,
+                  setCorsHeaders,
+                );
+              }
             } catch {
-              /* */
+              // Fall through — let handleEmbeddings report the parse error
+            }
+          }
+          return handleEmbeddings(req, res, raw, fixtures, journal, defaults, setCorsHeaders);
+        })
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            res.destroy();
+          }
+        });
+      return;
+    }
+
+    // POST /v1beta/models/{model}:(generateContent|streamGenerateContent) — Google Gemini
+    const geminiMatch = pathname.match(GEMINI_PATH_RE);
+    if (geminiMatch && req.method === "POST") {
+      const geminiModel = geminiMatch[1];
+      const streaming = geminiMatch[2] === "streamGenerateContent";
+      readBody(req)
+        .then((raw) =>
+          handleGemini(
+            req,
+            res,
+            raw,
+            geminiModel,
+            streaming,
+            fixtures,
+            journal,
+            defaults,
+            setCorsHeaders,
+          ),
+        )
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            try {
+              res.write(`data: ${JSON.stringify({ error: { message: msg } })}\n\n`);
+            } catch (writeErr) {
+              logger.debug("Failed to write error recovery response:", writeErr);
             }
             res.end();
           }
@@ -317,6 +716,228 @@ export async function createServer(
       return;
     }
 
+    // POST /v1/projects/{project}/locations/{location}/publishers/google/models/{model}:(generateContent|streamGenerateContent) — Vertex AI
+    const vertexMatch = pathname.match(VERTEX_AI_RE);
+    if (vertexMatch && req.method === "POST") {
+      const vertexModel = vertexMatch[1];
+      const streaming = vertexMatch[2] === "streamGenerateContent";
+      readBody(req)
+        .then((raw) =>
+          handleGemini(
+            req,
+            res,
+            raw,
+            vertexModel,
+            streaming,
+            fixtures,
+            journal,
+            defaults,
+            setCorsHeaders,
+            "vertexai",
+          ),
+        )
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            try {
+              res.write(`data: ${JSON.stringify({ error: { message: msg } })}\n\n`);
+            } catch (writeErr) {
+              logger.debug("Failed to write error recovery response:", writeErr);
+            }
+            res.end();
+          }
+        });
+      return;
+    }
+
+    // POST /model/{modelId}/invoke — AWS Bedrock Claude API
+    const bedrockMatch = pathname.match(BEDROCK_INVOKE_RE);
+    if (bedrockMatch && req.method === "POST") {
+      const bedrockModelId = bedrockMatch[1];
+      readBody(req)
+        .then((raw) =>
+          handleBedrock(req, res, raw, bedrockModelId, fixtures, journal, defaults, setCorsHeaders),
+        )
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            res.destroy();
+          }
+        });
+      return;
+    }
+
+    // POST /model/{modelId}/invoke-with-response-stream — AWS Bedrock Claude streaming
+    const bedrockStreamMatch = pathname.match(BEDROCK_STREAM_RE);
+    if (bedrockStreamMatch && req.method === "POST") {
+      const bedrockModelId = bedrockStreamMatch[1];
+      readBody(req)
+        .then((raw) =>
+          handleBedrockStream(
+            req,
+            res,
+            raw,
+            bedrockModelId,
+            fixtures,
+            journal,
+            defaults,
+            setCorsHeaders,
+          ),
+        )
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            res.destroy();
+          }
+        });
+      return;
+    }
+
+    // POST /model/{modelId}/converse — AWS Bedrock Converse API
+    const converseMatch = pathname.match(BEDROCK_CONVERSE_RE);
+    if (converseMatch && req.method === "POST") {
+      const converseModelId = converseMatch[1];
+      readBody(req)
+        .then((raw) =>
+          handleConverse(
+            req,
+            res,
+            raw,
+            converseModelId,
+            fixtures,
+            journal,
+            defaults,
+            setCorsHeaders,
+          ),
+        )
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            res.destroy();
+          }
+        });
+      return;
+    }
+
+    // POST /model/{modelId}/converse-stream — AWS Bedrock Converse streaming API
+    const converseStreamMatch = pathname.match(BEDROCK_CONVERSE_STREAM_RE);
+    if (converseStreamMatch && req.method === "POST") {
+      const converseStreamModelId = converseStreamMatch[1];
+      readBody(req)
+        .then((raw) =>
+          handleConverseStream(
+            req,
+            res,
+            raw,
+            converseStreamModelId,
+            fixtures,
+            journal,
+            defaults,
+            setCorsHeaders,
+          ),
+        )
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            res.destroy();
+          }
+        });
+      return;
+    }
+
+    // POST /api/chat — Ollama Chat API
+    if (pathname === OLLAMA_CHAT_PATH && req.method === "POST") {
+      readBody(req)
+        .then((raw) => handleOllama(req, res, raw, fixtures, journal, defaults, setCorsHeaders))
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            res.destroy();
+          }
+        });
+      return;
+    }
+
+    // POST /api/generate — Ollama Generate API
+    if (pathname === OLLAMA_GENERATE_PATH && req.method === "POST") {
+      readBody(req)
+        .then((raw) =>
+          handleOllamaGenerate(req, res, raw, fixtures, journal, defaults, setCorsHeaders),
+        )
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            res.destroy();
+          }
+        });
+      return;
+    }
+
+    // GET /api/tags — Ollama Models listing
+    if (pathname === OLLAMA_TAGS_PATH && req.method === "GET") {
+      setCorsHeaders(res);
+      const modelIds = new Set<string>();
+      for (const f of fixtures) {
+        if (f.match.model && typeof f.match.model === "string") {
+          modelIds.add(f.match.model);
+        }
+      }
+      const ids = modelIds.size > 0 ? [...modelIds] : DEFAULT_MODELS;
+      const models = ids.map((name) => ({
+        name,
+        model: name,
+        modified_at: new Date().toISOString(),
+        size: 0,
+        digest: "",
+        details: {},
+      }));
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({ models }));
+      return;
+    }
+
     // POST /v1/chat/completions — Chat Completions API
     if (pathname !== COMPLETIONS_PATH) {
       handleNotFound(res, "Not found");
@@ -327,7 +948,16 @@ export async function createServer(
       return;
     }
 
-    handleCompletions(req, res, fixtures, journal, defaults).catch((err: unknown) => {
+    const completionsProvider: RecordProviderKey = azureDeploymentId ? "azure" : "openai";
+    handleCompletions(
+      req,
+      res,
+      fixtures,
+      journal,
+      defaults,
+      azureDeploymentId,
+      completionsProvider,
+    ).catch((err: unknown) => {
       const msg = err instanceof Error ? err.message : "Internal error";
       if (!res.headersSent) {
         writeErrorResponse(
@@ -346,14 +976,92 @@ export async function createServer(
           res.write(
             `data: ${JSON.stringify({ error: { message: msg, type: "server_error" } })}\n\n`,
           );
-        } catch {
-          // write itself failed, nothing more we can do
+        } catch (writeErr) {
+          logger.debug("Failed to write error recovery response:", writeErr);
         }
         res.end();
       }
     });
   });
 
+  // ─── WebSocket upgrade handling ──────────────────────────────────────────
+
+  const activeConnections = new Set<WebSocketConnection>();
+
+  server.on(
+    "upgrade",
+    (req: http.IncomingMessage, socket: import("node:net").Socket, head: Buffer) => {
+      const parsedUrl = new URL(req.url ?? "/", `http://${req.headers.host ?? "localhost"}`);
+      const pathname = parsedUrl.pathname;
+
+      if (
+        pathname !== RESPONSES_PATH &&
+        pathname !== REALTIME_PATH &&
+        pathname !== GEMINI_LIVE_PATH
+      ) {
+        socket.write("HTTP/1.1 404 Not Found\r\n\r\n");
+        socket.destroy();
+        return;
+      }
+
+      // Push any buffered data back before upgrading
+      if (head.length > 0) {
+        socket.unshift(head);
+      }
+
+      let ws: WebSocketConnection;
+      try {
+        ws = upgradeToWebSocket(req, socket);
+      } catch (err: unknown) {
+        const msg = err instanceof Error ? err.message : "WebSocket upgrade failed";
+        logger.error(`WebSocket upgrade error: ${msg}`);
+        if (!socket.destroyed) socket.destroy();
+        return;
+      }
+
+      activeConnections.add(ws);
+
+      ws.on("error", (err: Error) => {
+        logger.error(`WebSocket error: ${err.message}`);
+        activeConnections.delete(ws);
+      });
+
+      ws.on("close", () => {
+        activeConnections.delete(ws);
+      });
+
+      // Route to handler
+      if (pathname === RESPONSES_PATH) {
+        handleWebSocketResponses(ws, fixtures, journal, {
+          ...defaults,
+          model: "gpt-4",
+        });
+      } else if (pathname === REALTIME_PATH) {
+        const model = parsedUrl.searchParams.get("model") ?? "gpt-4o-realtime";
+        handleWebSocketRealtime(ws, fixtures, journal, {
+          ...defaults,
+          model,
+        });
+      } else if (pathname === GEMINI_LIVE_PATH) {
+        handleWebSocketGeminiLive(ws, fixtures, journal, {
+          ...defaults,
+          model: "gemini-2.0-flash",
+        });
+      }
+    },
+  );
+
+  // Close active WS connections when server shuts down
+  const originalClose = server.close.bind(server);
+  server.close = function (this: http.Server, callback?: (err?: Error) => void) {
+    for (const ws of activeConnections) {
+      ws.close(1001, "Server shutting down");
+    }
+    activeConnections.clear();
+    originalClose(callback);
+    return this;
+  } as typeof server.close;
+
   return new Promise<ServerInstance>((resolve, reject) => {
     server.on("error", reject);
     server.listen(port, host, () => {
@@ -363,7 +1071,7 @@ export async function createServer(
         return;
       }
       const url = `http://${addr.address}:${addr.port}`;
-      resolve({ server, journal, url });
+      resolve({ server, journal, url, defaults });
     });
   });
 }
diff --git a/src/sse-writer.ts b/src/sse-writer.ts
index 3d9bc64..b7cd480 100644
--- a/src/sse-writer.ts
+++ b/src/sse-writer.ts
@@ -1,32 +1,87 @@
 import type * as http from "node:http";
-import type { SSEChunk } from "./types.js";
+import type { SSEChunk, StreamingProfile } from "./types.js";
 
-function delay(ms: number): Promise<void> {
-  return new Promise((resolve) => setTimeout(resolve, ms));
+export function delay(ms: number, signal?: AbortSignal): Promise<void> {
+  if (ms <= 0 || signal?.aborted) return Promise.resolve();
+  return new Promise((resolve) => {
+    const timer = setTimeout(resolve, ms);
+    signal?.addEventListener(
+      "abort",
+      () => {
+        clearTimeout(timer);
+        resolve();
+      },
+      { once: true },
+    );
+  });
+}
+
+export interface StreamOptions {
+  latency?: number;
+  streamingProfile?: StreamingProfile;
+  signal?: AbortSignal;
+  onChunkSent?: () => void;
+}
+
+export function calculateDelay(
+  chunkIndex: number,
+  profile?: StreamingProfile,
+  fallbackLatency?: number,
+): number {
+  if (!profile) return fallbackLatency ?? 0;
+
+  let delayMs: number;
+  if (chunkIndex === 0 && profile.ttft !== undefined) {
+    delayMs = profile.ttft;
+  } else if (profile.tps !== undefined && profile.tps > 0) {
+    delayMs = 1000 / profile.tps;
+  } else {
+    return fallbackLatency ?? 0;
+  }
+
+  if (profile.jitter && profile.jitter > 0) {
+    delayMs *= 1 + (Math.random() * 2 - 1) * profile.jitter;
+  }
+
+  return Math.max(0, delayMs);
 }
 
 export async function writeSSEStream(
   res: http.ServerResponse,
   chunks: SSEChunk[],
-  latency = 0,
-): Promise<void> {
-  if (res.writableEnded) return;
+  optionsOrLatency?: number | StreamOptions,
+): Promise<boolean> {
+  const opts: StreamOptions =
+    typeof optionsOrLatency === "number" ? { latency: optionsOrLatency } : (optionsOrLatency ?? {});
+  const latency = opts.latency ?? 0;
+  const profile = opts.streamingProfile;
+  const signal = opts.signal;
+  const onChunkSent = opts.onChunkSent;
+
+  if (res.writableEnded) return true;
   res.setHeader("Content-Type", "text/event-stream");
   res.setHeader("Cache-Control", "no-cache");
   res.setHeader("Connection", "keep-alive");
 
+  let chunkIndex = 0;
   for (const chunk of chunks) {
-    if (latency > 0) {
-      await delay(latency);
+    const chunkDelay = calculateDelay(chunkIndex, profile, latency);
+    if (chunkDelay > 0) {
+      await delay(chunkDelay, signal);
     }
-    if (res.writableEnded) return;
+    if (signal?.aborted) return false;
+    if (res.writableEnded) return true;
     res.write(`data: ${JSON.stringify(chunk)}\n\n`);
+    onChunkSent?.();
+    if (signal?.aborted) return false;
+    chunkIndex++;
   }
 
   if (!res.writableEnded) {
     res.write("data: [DONE]\n\n");
     res.end();
   }
+  return true;
 }
 
 export function writeErrorResponse(res: http.ServerResponse, status: number, body: string): void {
diff --git a/src/stream-collapse.ts b/src/stream-collapse.ts
new file mode 100644
index 0000000..73316e6
--- /dev/null
+++ b/src/stream-collapse.ts
@@ -0,0 +1,658 @@
+/**
+ * Stream collapsing functions for record-and-replay.
+ *
+ * Each function takes a raw streaming response body (SSE, NDJSON, or binary
+ * EventStream) and collapses it into a non-streaming fixture response
+ * containing either `{ content }` or `{ toolCalls }`.
+ */
+
+import { crc32 } from "node:zlib";
+import type { RecordProviderKey, ToolCall } from "./types.js";
+import type { Logger } from "./logger.js";
+
+// ---------------------------------------------------------------------------
+// Result type shared by all collapse functions
+// ---------------------------------------------------------------------------
+
+// TODO: Consider making this a discriminated union ({ type: "text"; content: string }
+// | { type: "toolCalls"; toolCalls: ToolCall[] } | { type: "empty" }) to prevent
+// ambiguous results and simplify downstream consumers.
+export interface CollapseResult {
+  content?: string;
+  toolCalls?: ToolCall[];
+  droppedChunks?: number;
+  truncated?: boolean;
+}
+
+// ---------------------------------------------------------------------------
+// 1. OpenAI SSE
+// ---------------------------------------------------------------------------
+
+/**
+ * Collapse OpenAI Chat Completions SSE stream into a single response.
+ *
+ * Format:
+ *   data: {"id":"chatcmpl-123","choices":[{"delta":{"content":"Hello"}}]}\n\n
+ *   data: [DONE]\n\n
+ */
+export function collapseOpenAISSE(body: string): CollapseResult {
+  const lines = body.split("\n\n").filter((l) => l.trim().length > 0);
+  let content = "";
+  let droppedChunks = 0;
+  const toolCallMap = new Map<number, { id: string; name: string; arguments: string }>();
+
+  for (const line of lines) {
+    const dataLine = line.split("\n").find((l) => l.startsWith("data:"));
+    if (!dataLine) continue;
+
+    const payload = dataLine.slice(5).trim();
+    if (payload === "[DONE]") continue;
+
+    let parsed: Record<string, unknown>;
+    try {
+      parsed = JSON.parse(payload) as Record<string, unknown>;
+    } catch {
+      droppedChunks++;
+      continue;
+    }
+
+    const choices = parsed.choices as Array<Record<string, unknown>> | undefined;
+    if (!choices || choices.length === 0) continue;
+
+    const delta = choices[0].delta as Record<string, unknown> | undefined;
+    if (!delta) continue;
+
+    // Text content
+    if (typeof delta.content === "string") {
+      content += delta.content;
+    }
+
+    // Tool calls
+    const toolCalls = delta.tool_calls as Array<Record<string, unknown>> | undefined;
+    if (toolCalls) {
+      for (const tc of toolCalls) {
+        const index = tc.index as number;
+        const fn = tc.function as Record<string, unknown> | undefined;
+
+        if (!toolCallMap.has(index)) {
+          toolCallMap.set(index, {
+            id: (tc.id as string) ?? "",
+            name: (fn?.name as string) ?? "",
+            arguments: "",
+          });
+        }
+
+        const entry = toolCallMap.get(index)!;
+        if (fn?.name && typeof fn.name === "string" && !entry.name) {
+          entry.name = fn.name;
+        }
+        if (tc.id && typeof tc.id === "string" && !entry.id) {
+          entry.id = tc.id;
+        }
+        if (fn?.arguments && typeof fn.arguments === "string") {
+          entry.arguments += fn.arguments;
+        }
+      }
+    }
+  }
+
+  if (toolCallMap.size > 0) {
+    const sorted = Array.from(toolCallMap.entries()).sort(([a], [b]) => a - b);
+    return {
+      toolCalls: sorted.map(([, tc]) => ({
+        name: tc.name,
+        arguments: tc.arguments,
+        ...(tc.id ? { id: tc.id } : {}),
+      })),
+      ...(droppedChunks > 0 ? { droppedChunks } : {}),
+    };
+  }
+
+  return { content, ...(droppedChunks > 0 ? { droppedChunks } : {}) };
+}
+
+// ---------------------------------------------------------------------------
+// 2. Anthropic SSE
+// ---------------------------------------------------------------------------
+
+/**
+ * Collapse Anthropic Claude Messages SSE stream into a single response.
+ *
+ * Format:
+ *   event: message_start\ndata: {...}\n\n
+ *   event: content_block_delta\ndata: {"delta":{"type":"text_delta","text":"Hello"}}\n\n
+ */
+export function collapseAnthropicSSE(body: string): CollapseResult {
+  const blocks = body.split("\n\n").filter((b) => b.trim().length > 0);
+  let content = "";
+  let droppedChunks = 0;
+  const toolCallMap = new Map<number, { id: string; name: string; arguments: string }>();
+
+  for (const block of blocks) {
+    const lines = block.split("\n");
+    const eventLine = lines.find((l) => l.startsWith("event:"));
+    const dataLine = lines.find((l) => l.startsWith("data:"));
+    if (!dataLine) continue;
+
+    const eventType = eventLine ? eventLine.slice(6).trim() : "";
+    const payload = dataLine.slice(5).trim();
+
+    let parsed: Record<string, unknown>;
+    try {
+      parsed = JSON.parse(payload) as Record<string, unknown>;
+    } catch {
+      droppedChunks++;
+      continue;
+    }
+
+    if (eventType === "content_block_start") {
+      const index = parsed.index as number;
+      const contentBlock = parsed.content_block as Record<string, unknown> | undefined;
+      if (contentBlock?.type === "tool_use") {
+        toolCallMap.set(index, {
+          id: (contentBlock.id as string) ?? "",
+          name: (contentBlock.name as string) ?? "",
+          arguments: "",
+        });
+      }
+    }
+
+    if (eventType === "content_block_delta") {
+      const index = parsed.index as number;
+      const delta = parsed.delta as Record<string, unknown> | undefined;
+      if (!delta) continue;
+
+      if (delta.type === "text_delta" && typeof delta.text === "string") {
+        content += delta.text;
+      }
+
+      if (delta.type === "input_json_delta" && typeof delta.partial_json === "string") {
+        const entry = toolCallMap.get(index);
+        if (entry) {
+          entry.arguments += delta.partial_json;
+        }
+      }
+    }
+  }
+
+  if (toolCallMap.size > 0) {
+    const sorted = Array.from(toolCallMap.entries()).sort(([a], [b]) => a - b);
+    return {
+      toolCalls: sorted.map(([, tc]) => ({
+        name: tc.name,
+        arguments: tc.arguments,
+        ...(tc.id ? { id: tc.id } : {}),
+      })),
+      ...(droppedChunks > 0 ? { droppedChunks } : {}),
+    };
+  }
+
+  return { content, ...(droppedChunks > 0 ? { droppedChunks } : {}) };
+}
+
+// ---------------------------------------------------------------------------
+// 3. Gemini SSE
+// ---------------------------------------------------------------------------
+
+/**
+ * Collapse Gemini SSE stream into a single response.
+ *
+ * Format (data-only, no event prefix, no [DONE]):
+ *   data: {"candidates":[{"content":{"parts":[{"text":"Hello"}]}}]}\n\n
+ */
+export function collapseGeminiSSE(body: string): CollapseResult {
+  const lines = body.split("\n\n").filter((l) => l.trim().length > 0);
+  let content = "";
+  let droppedChunks = 0;
+
+  for (const line of lines) {
+    const dataLine = line.split("\n").find((l) => l.startsWith("data:"));
+    if (!dataLine) continue;
+
+    const payload = dataLine.slice(5).trim();
+
+    let parsed: Record<string, unknown>;
+    try {
+      parsed = JSON.parse(payload) as Record<string, unknown>;
+    } catch {
+      droppedChunks++;
+      continue;
+    }
+
+    const candidates = parsed.candidates as Array<Record<string, unknown>> | undefined;
+    if (!candidates || candidates.length === 0) continue;
+
+    const candidateContent = candidates[0].content as Record<string, unknown> | undefined;
+    if (!candidateContent) continue;
+
+    const parts = candidateContent.parts as Array<Record<string, unknown>> | undefined;
+    if (!parts || parts.length === 0) continue;
+
+    // Handle functionCall parts
+    const fnCallParts = parts.filter((p) => p.functionCall);
+    if (fnCallParts.length > 0) {
+      const toolCallMap = new Map<number, { name: string; arguments: string }>();
+      for (let i = 0; i < fnCallParts.length; i++) {
+        const fc = fnCallParts[i].functionCall as Record<string, unknown>;
+        toolCallMap.set(i, {
+          name: String(fc.name ?? ""),
+          arguments: typeof fc.args === "string" ? (fc.args as string) : JSON.stringify(fc.args),
+        });
+      }
+      if (toolCallMap.size > 0) {
+        const sorted = Array.from(toolCallMap.entries()).sort(([a], [b]) => a - b);
+        return {
+          toolCalls: sorted.map(([, tc]) => ({
+            name: tc.name,
+            arguments: tc.arguments,
+          })),
+          ...(droppedChunks > 0 ? { droppedChunks } : {}),
+        };
+      }
+    }
+
+    if (typeof parts[0].text === "string") {
+      content += parts[0].text;
+    }
+  }
+
+  return { content, ...(droppedChunks > 0 ? { droppedChunks } : {}) };
+}
+
+// ---------------------------------------------------------------------------
+// 4. Ollama NDJSON
+// ---------------------------------------------------------------------------
+
+/**
+ * Collapse Ollama NDJSON stream into a single response.
+ *
+ * /api/chat format:
+ *   {"model":"llama3","message":{"role":"assistant","content":"Hello"},"done":false}\n
+ *
+ * /api/generate format:
+ *   {"model":"llama3","response":"Hello","done":false}\n
+ */
+export function collapseOllamaNDJSON(body: string): CollapseResult {
+  const lines = body.split("\n").filter((l) => l.trim().length > 0);
+  let content = "";
+  let droppedChunks = 0;
+  const toolCalls: ToolCall[] = [];
+
+  for (const line of lines) {
+    let parsed: Record<string, unknown>;
+    try {
+      parsed = JSON.parse(line.trim()) as Record<string, unknown>;
+    } catch {
+      droppedChunks++;
+      continue;
+    }
+
+    // /api/chat format
+    const message = parsed.message as Record<string, unknown> | undefined;
+    if (message) {
+      if (typeof message.content === "string") {
+        content += message.content;
+      }
+
+      // Tool calls
+      if (Array.isArray(message.tool_calls)) {
+        for (const tc of message.tool_calls as Array<Record<string, unknown>>) {
+          const fn = tc.function as Record<string, unknown> | undefined;
+          if (fn) {
+            toolCalls.push({
+              name: String(fn.name ?? ""),
+              arguments:
+                typeof fn.arguments === "string" ? fn.arguments : JSON.stringify(fn.arguments),
+            });
+          }
+        }
+      }
+    }
+
+    // /api/generate format
+    else if (typeof parsed.response === "string") {
+      content += parsed.response;
+    }
+  }
+
+  if (toolCalls.length > 0) {
+    return { toolCalls, ...(droppedChunks > 0 ? { droppedChunks } : {}) };
+  }
+
+  return { content, ...(droppedChunks > 0 ? { droppedChunks } : {}) };
+}
+
+// ---------------------------------------------------------------------------
+// 5. Cohere SSE
+// ---------------------------------------------------------------------------
+
+/**
+ * Collapse Cohere SSE stream into a single response.
+ *
+ * Format:
+ *   event: content-delta\ndata: {"type":"content-delta","delta":{"message":{"content":{"text":"Hello"}}}}\n\n
+ */
+export function collapseCohereSSE(body: string): CollapseResult {
+  const blocks = body.split("\n\n").filter((b) => b.trim().length > 0);
+  let content = "";
+  let droppedChunks = 0;
+  const toolCallMap = new Map<number, { id: string; name: string; arguments: string }>();
+
+  for (const block of blocks) {
+    const lines = block.split("\n");
+    const eventLine = lines.find((l) => l.startsWith("event:"));
+    const dataLine = lines.find((l) => l.startsWith("data:"));
+    if (!dataLine) continue;
+
+    const eventType = eventLine ? eventLine.slice(6).trim() : "";
+    const payload = dataLine.slice(5).trim();
+
+    let parsed: Record<string, unknown>;
+    try {
+      parsed = JSON.parse(payload) as Record<string, unknown>;
+    } catch {
+      droppedChunks++;
+      continue;
+    }
+
+    if (eventType === "content-delta") {
+      const delta = parsed.delta as Record<string, unknown> | undefined;
+      const message = delta?.message as Record<string, unknown> | undefined;
+      const contentObj = message?.content as Record<string, unknown> | undefined;
+      if (contentObj && typeof contentObj.text === "string") {
+        content += contentObj.text;
+      }
+    }
+
+    if (eventType === "tool-call-start") {
+      const index = parsed.index as number;
+      const delta = parsed.delta as Record<string, unknown> | undefined;
+      const message = delta?.message as Record<string, unknown> | undefined;
+      const toolCalls = message?.tool_calls as Record<string, unknown> | undefined;
+      if (toolCalls) {
+        const fn = toolCalls.function as Record<string, unknown> | undefined;
+        toolCallMap.set(index, {
+          id: (toolCalls.id as string) ?? "",
+          name: (fn?.name as string) ?? "",
+          arguments: "",
+        });
+      }
+    }
+
+    if (eventType === "tool-call-delta") {
+      const index = parsed.index as number;
+      const delta = parsed.delta as Record<string, unknown> | undefined;
+      const message = delta?.message as Record<string, unknown> | undefined;
+      const toolCalls = message?.tool_calls as Record<string, unknown> | undefined;
+      if (toolCalls) {
+        const fn = toolCalls.function as Record<string, unknown> | undefined;
+        if (fn && typeof fn.arguments === "string") {
+          const entry = toolCallMap.get(index);
+          if (entry) {
+            entry.arguments += fn.arguments;
+          }
+        }
+      }
+    }
+  }
+
+  if (toolCallMap.size > 0) {
+    const sorted = Array.from(toolCallMap.entries()).sort(([a], [b]) => a - b);
+    return {
+      toolCalls: sorted.map(([, tc]) => ({
+        name: tc.name,
+        arguments: tc.arguments,
+        ...(tc.id ? { id: tc.id } : {}),
+      })),
+      ...(droppedChunks > 0 ? { droppedChunks } : {}),
+    };
+  }
+
+  return { content, ...(droppedChunks > 0 ? { droppedChunks } : {}) };
+}
+
+// ---------------------------------------------------------------------------
+// 6. Bedrock EventStream (binary)
+// ---------------------------------------------------------------------------
+
+/**
+ * Decode AWS Event Stream binary frames and extract JSON payloads.
+ *
+ * Binary frame layout:
+ *   [total_length: 4B uint32-BE]
+ *   [headers_length: 4B uint32-BE]
+ *   [prelude_crc32: 4B]
+ *   [headers: variable]
+ *   [payload: variable]
+ *   [message_crc32: 4B]
+ */
+function decodeEventStreamFrames(buf: Buffer): {
+  frames: Array<{ headers: Record<string, string>; payload: Buffer }>;
+  truncated: boolean;
+} {
+  const frames: Array<{ headers: Record<string, string>; payload: Buffer }> = [];
+  let offset = 0;
+
+  while (offset < buf.length) {
+    if (offset + 12 > buf.length) break;
+
+    const totalLength = buf.readUInt32BE(offset);
+    const headersLength = buf.readUInt32BE(offset + 4);
+
+    // Validate bounds: ensure the full frame is within the buffer
+    if (totalLength < 12 || offset + totalLength > buf.length) {
+      return { frames, truncated: true };
+    }
+
+    // Validate prelude CRC
+    const preludeCrc = buf.readUInt32BE(offset + 8);
+    const computedPreludeCrc = crc32(buf.subarray(offset, offset + 8));
+    if (preludeCrc >>> 0 !== computedPreludeCrc >>> 0) {
+      return { frames, truncated: true }; // Prelude CRC mismatch — stop parsing
+    }
+
+    // Parse headers
+    const headersStart = offset + 12;
+    const headersEnd = headersStart + headersLength;
+    const headers: Record<string, string> = {};
+    let hOffset = headersStart;
+
+    while (hOffset < headersEnd) {
+      const nameLen = buf.readUInt8(hOffset);
+      hOffset += 1;
+      const name = buf.subarray(hOffset, hOffset + nameLen).toString("utf8");
+      hOffset += nameLen;
+      // Skip header type byte (type 7 = STRING)
+      hOffset += 1;
+      const valueLen = buf.readUInt16BE(hOffset);
+      hOffset += 2;
+      const value = buf.subarray(hOffset, hOffset + valueLen).toString("utf8");
+      hOffset += valueLen;
+      headers[name] = value;
+    }
+
+    // Extract payload
+    const payloadStart = headersEnd;
+    const payloadEnd = offset + totalLength - 4; // minus message CRC
+    const payload = buf.subarray(payloadStart, payloadEnd);
+
+    // Validate message CRC (covers entire frame minus last 4 bytes)
+    const messageCrc = buf.readUInt32BE(offset + totalLength - 4);
+    const computedMessageCrc = crc32(buf.subarray(offset, offset + totalLength - 4));
+    if (messageCrc >>> 0 !== computedMessageCrc >>> 0) {
+      return { frames, truncated: true }; // Message CRC mismatch — stop parsing
+    }
+
+    frames.push({ headers, payload });
+    offset += totalLength;
+  }
+
+  return { frames, truncated: false };
+}
+
+/**
+ * Collapse Bedrock binary Event Stream into a single response.
+ *
+ * Each frame contains a JSON payload with event types like:
+ *   contentBlockDelta, contentBlockStart, etc.
+ */
+export function collapseBedrockEventStream(body: Buffer): CollapseResult {
+  const { frames, truncated } = decodeEventStreamFrames(body);
+  let content = "";
+  let droppedChunks = 0;
+  const toolCallMap = new Map<number, { id: string; name: string; arguments: string }>();
+
+  for (const frame of frames) {
+    let parsed: Record<string, unknown>;
+    try {
+      parsed = JSON.parse(frame.payload.toString("utf8")) as Record<string, unknown>;
+    } catch {
+      droppedChunks++;
+      continue;
+    }
+
+    // Anthropic Messages format (invoke-with-response-stream): flat payload with "type" field
+    if (parsed.type === "content_block_delta") {
+      const delta = parsed.delta as Record<string, unknown> | undefined;
+      if (delta?.type === "text_delta" && typeof delta.text === "string") {
+        content += delta.text;
+      }
+      if (delta?.type === "input_json_delta" && typeof delta.partial_json === "string") {
+        const index = parsed.index as number | undefined;
+        if (index !== undefined) {
+          const entry = toolCallMap.get(index);
+          if (entry) entry.arguments += delta.partial_json;
+        }
+      }
+      continue;
+    }
+    if (parsed.type === "content_block_start") {
+      const block = parsed.content_block as Record<string, unknown> | undefined;
+      const index = parsed.index as number | undefined;
+      if (block?.type === "tool_use" && index !== undefined) {
+        toolCallMap.set(index, {
+          id: (block.id as string) ?? "",
+          name: (block.name as string) ?? "",
+          arguments: "",
+        });
+      }
+      continue;
+    }
+
+    // Converse format (converse-stream): camelCase wrapper keys
+    // contentBlockStart — may initiate a tool_use block
+    if (parsed.contentBlockStart) {
+      const blockStart = parsed.contentBlockStart as Record<string, unknown>;
+      const index = (parsed.contentBlockIndex ?? blockStart.contentBlockIndex) as
+        | number
+        | undefined;
+      const start = blockStart.start as Record<string, unknown> | undefined;
+      if (start?.toolUse && index !== undefined) {
+        const toolUse = start.toolUse as Record<string, unknown>;
+        toolCallMap.set(index, {
+          id: (toolUse.toolUseId as string) ?? "",
+          name: (toolUse.name as string) ?? "",
+          arguments: "",
+        });
+      }
+    }
+
+    // contentBlockDelta
+    if (parsed.contentBlockDelta) {
+      const blockDelta = parsed.contentBlockDelta as Record<string, unknown>;
+      const index = (parsed.contentBlockIndex ?? blockDelta.contentBlockIndex) as
+        | number
+        | undefined;
+      const delta = blockDelta.delta as Record<string, unknown> | undefined;
+      if (!delta) continue;
+
+      // Text delta
+      if (typeof delta.text === "string") {
+        content += delta.text;
+      }
+
+      // Tool use input JSON delta
+      if (typeof delta.toolUse === "object" && delta.toolUse !== null) {
+        const toolUseDelta = delta.toolUse as Record<string, unknown>;
+        if (typeof toolUseDelta.input === "string" && index !== undefined) {
+          const entry = toolCallMap.get(index);
+          if (entry) {
+            entry.arguments += toolUseDelta.input;
+          }
+        }
+      }
+    }
+  }
+
+  if (toolCallMap.size > 0) {
+    const sorted = Array.from(toolCallMap.entries()).sort(([a], [b]) => a - b);
+    return {
+      toolCalls: sorted.map(([, tc]) => ({
+        name: tc.name,
+        arguments: tc.arguments,
+        ...(tc.id ? { id: tc.id } : {}),
+      })),
+      ...(droppedChunks > 0 ? { droppedChunks } : {}),
+      ...(truncated ? { truncated } : {}),
+    };
+  }
+
+  return {
+    content,
+    ...(droppedChunks > 0 ? { droppedChunks } : {}),
+    ...(truncated ? { truncated } : {}),
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Dispatch helper — pick the right collapse function by provider
+// ---------------------------------------------------------------------------
+
+/**
+ * Collapse a streaming response body into a non-streaming fixture response.
+ * Returns null if the content type is not a known streaming format.
+ * Falls back to OpenAI SSE parsing for unrecognized provider keys with text/event-stream.
+ */
+export function collapseStreamingResponse(
+  contentType: string,
+  providerKey: RecordProviderKey,
+  body: string | Buffer,
+  logger?: Logger,
+): CollapseResult | null {
+  const ct = contentType.toLowerCase();
+
+  if (ct.includes("application/vnd.amazon.eventstream")) {
+    const buf = typeof body === "string" ? Buffer.from(body, "binary") : body;
+    return collapseBedrockEventStream(buf);
+  }
+
+  if (ct.includes("application/x-ndjson")) {
+    const str = typeof body === "string" ? body : body.toString("utf8");
+    return collapseOllamaNDJSON(str);
+  }
+
+  if (ct.includes("text/event-stream")) {
+    const str = typeof body === "string" ? body : body.toString("utf8");
+    switch (providerKey) {
+      case "openai":
+      case "azure":
+        return collapseOpenAISSE(str);
+      case "anthropic":
+        return collapseAnthropicSSE(str);
+      case "gemini":
+      case "vertexai":
+        return collapseGeminiSSE(str);
+      case "cohere":
+        return collapseCohereSSE(str);
+      case "bedrock":
+        return collapseAnthropicSSE(str);
+      default:
+        logger?.warn(
+          `[stream-collapse] unknown SSE provider "${providerKey}", falling back to OpenAI SSE format`,
+        );
+        return collapseOpenAISSE(str);
+    }
+  }
+
+  return null;
+}
diff --git a/src/types.ts b/src/types.ts
index c2d2ba4..5f2bdde 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -1,8 +1,17 @@
-// OpenAI Chat Completion request types (subset we care about)
+import type { Logger } from "./logger.js";
+import type { MetricsRegistry } from "./metrics.js";
+
+// LLMock type definitions — shared across all provider adapters and the fixture router.
+
+export interface ContentPart {
+  type: string;
+  text?: string;
+  [key: string]: unknown;
+}
 
 export interface ChatMessage {
   role: "system" | "user" | "assistant" | "tool";
-  content: string | null;
+  content: string | ContentPart[] | null;
   name?: string;
   tool_calls?: ToolCallMessage[];
   tool_call_id?: string;
@@ -22,6 +31,9 @@ export interface ChatCompletionRequest {
   max_tokens?: number;
   tools?: ToolDefinition[];
   tool_choice?: string | object;
+  response_format?: { type: string; [key: string]: unknown };
+  /** Embedding input text, set by the embeddings handler for fixture matching. */
+  embeddingInput?: string;
   [key: string]: unknown;
 }
 
@@ -34,10 +46,14 @@ export interface ToolDefinition {
 
 export interface FixtureMatch {
   userMessage?: string | RegExp;
+  inputText?: string | RegExp;
   toolCallId?: string;
   toolName?: string;
   model?: string | RegExp;
+  responseFormat?: string;
   predicate?: (req: ChatCompletionRequest) => boolean;
+  /** Which occurrence of this match to respond to (0-indexed). Undefined means match any. */
+  sequenceIndex?: number;
 }
 
 // Fixture response types
@@ -64,7 +80,27 @@ export interface ErrorResponse {
   status?: number;
 }
 
-export type FixtureResponse = TextResponse | ToolCallResponse | ErrorResponse;
+export interface EmbeddingResponse {
+  embedding: number[];
+}
+
+export type FixtureResponse = TextResponse | ToolCallResponse | ErrorResponse | EmbeddingResponse;
+
+// Streaming physics
+
+export interface StreamingProfile {
+  ttft?: number; // Time to first token (ms)
+  tps?: number; // Tokens per second
+  jitter?: number; // Random variance factor (0-1), default 0
+}
+
+export interface ChaosConfig {
+  dropRate?: number;
+  malformedRate?: number;
+  disconnectRate?: number;
+}
+
+export type ChaosAction = "drop" | "malformed" | "disconnect";
 
 // Fixture
 
@@ -73,8 +109,15 @@ export interface Fixture {
   response: FixtureResponse;
   latency?: number;
   chunkSize?: number;
+  truncateAfterChunks?: number;
+  disconnectAfterMs?: number;
+  streamingProfile?: StreamingProfile;
+  chaos?: ChaosConfig;
 }
 
+export type FixtureOpts = Omit<Fixture, "match" | "response">;
+export type EmbeddingFixtureOpts = Pick<FixtureOpts, "latency" | "chaos">;
+
 // Fixture file format (JSON on disk)
 
 export interface FixtureFile {
@@ -84,14 +127,21 @@ export interface FixtureFile {
 export interface FixtureFileEntry {
   match: {
     userMessage?: string;
+    inputText?: string;
     toolCallId?: string;
     toolName?: string;
     model?: string;
+    responseFormat?: string;
+    sequenceIndex?: number;
     // predicate not supported in JSON files
   };
   response: FixtureResponse;
   latency?: number;
   chunkSize?: number;
+  truncateAfterChunks?: number;
+  disconnectAfterMs?: number;
+  streamingProfile?: StreamingProfile;
+  chaos?: ChaosConfig;
 }
 
 // Request journal
@@ -102,8 +152,14 @@ export interface JournalEntry {
   method: string;
   path: string;
   headers: Record<string, string>;
-  body: ChatCompletionRequest;
-  response: { status: number; fixture: Fixture | null };
+  body: ChatCompletionRequest | null;
+  response: {
+    status: number;
+    fixture: Fixture | null;
+    interrupted?: boolean;
+    interruptReason?: string;
+    chaosAction?: ChaosAction;
+  };
 }
 
 // SSE chunk types (OpenAI format)
@@ -155,14 +211,56 @@ export interface ChatCompletionChoice {
 export interface ChatCompletionMessage {
   role: "assistant";
   content: string | null;
+  refusal: string | null;
   tool_calls?: ToolCallMessage[];
 }
 
 // Server options
 
+export type RecordProviderKey =
+  | "openai"
+  | "anthropic"
+  | "gemini"
+  | "vertexai"
+  | "bedrock"
+  | "azure"
+  | "ollama"
+  | "cohere";
+
+export interface RecordConfig {
+  providers: Partial<Record<RecordProviderKey, string>>;
+  fixturePath?: string;
+}
+
 export interface MockServerOptions {
   port?: number;
   host?: string;
   latency?: number;
   chunkSize?: number;
+  /** Log verbosity. CLI default is "info"; programmatic default (when omitted) is "silent". */
+  logLevel?: "silent" | "info" | "debug";
+  chaos?: ChaosConfig;
+  /** Enable Prometheus-compatible /metrics endpoint. */
+  metrics?: boolean;
+  /** Strict mode: return 503 instead of 404 when no fixture matches. */
+  strict?: boolean;
+  /** Record-and-replay: proxy unmatched requests to upstream and save fixtures. */
+  record?: RecordConfig;
+  /** Transform requests before fixture matching (e.g. strip dynamic fields for deterministic matching). */
+  requestTransform?: (req: ChatCompletionRequest) => ChatCompletionRequest;
+}
+
+// Handler defaults — the common shape passed from server.ts to every handler
+
+// TODO: Consider adding a resolveChunkSize(fixture, defaults) helper to centralize
+// the Math.max(1, fixture.chunkSize ?? defaults.chunkSize) pattern used by all handlers.
+export interface HandlerDefaults {
+  latency: number;
+  chunkSize: number;
+  logger: Logger;
+  chaos?: ChaosConfig;
+  registry?: MetricsRegistry;
+  record?: RecordConfig;
+  strict?: boolean;
+  requestTransform?: (req: ChatCompletionRequest) => ChatCompletionRequest;
 }
diff --git a/src/watcher.ts b/src/watcher.ts
new file mode 100644
index 0000000..4f89ec4
--- /dev/null
+++ b/src/watcher.ts
@@ -0,0 +1,87 @@
+import { watch, type FSWatcher } from "node:fs";
+import type { Fixture } from "./types.js";
+import type { Logger } from "./logger.js";
+import type { ValidationResult } from "./fixture-loader.js";
+
+const DEBOUNCE_MS = 500;
+
+export function watchFixtures(
+  fixturePath: string,
+  fixtures: Fixture[],
+  loadFn: () => Fixture[],
+  opts: {
+    logger: Logger;
+    validate?: boolean;
+    validateFn?: (fixtures: Fixture[]) => ValidationResult[];
+  },
+): { close: () => void } {
+  const { logger, validate, validateFn } = opts;
+  let debounceTimer: ReturnType<typeof setTimeout> | null = null;
+
+  function reload() {
+    logger.info(`File changed — reloading fixtures from ${fixturePath}...`);
+
+    let newFixtures: Fixture[];
+    try {
+      newFixtures = loadFn();
+    } catch (err) {
+      logger.error("Failed to reload fixtures:", err);
+      logger.error("Previous fixtures remain active. Fix the error and save again to retry.");
+      return;
+    }
+
+    if (newFixtures.length === 0 && fixtures.length > 0) {
+      logger.warn(
+        "Reload produced 0 fixtures — keeping previous fixtures. Check fixture file for errors.",
+      );
+      return;
+    }
+
+    if (validate && validateFn) {
+      const results = validateFn(newFixtures);
+      const errors = results.filter((r) => r.severity === "error");
+      const warnings = results.filter((r) => r.severity === "warning");
+
+      for (const w of warnings) {
+        logger.warn(`Fixture ${w.fixtureIndex}: ${w.message}`);
+      }
+
+      if (errors.length > 0) {
+        for (const e of errors) {
+          logger.error(`Fixture ${e.fixtureIndex}: ${e.message}`);
+        }
+        logger.error(`${errors.length} validation error(s) — keeping previous fixtures`);
+        return;
+      }
+    }
+
+    // Replace in-place to preserve array reference identity
+    fixtures.length = 0;
+    fixtures.push(...newFixtures);
+    logger.info(`Reloaded ${newFixtures.length} fixture(s)`);
+  }
+
+  const watcher: FSWatcher = watch(fixturePath, { recursive: true }, () => {
+    if (debounceTimer) clearTimeout(debounceTimer);
+    debounceTimer = setTimeout(reload, DEBOUNCE_MS);
+  });
+
+  watcher.on("error", (err: Error) => {
+    if (debounceTimer) clearTimeout(debounceTimer);
+    debounceTimer = null;
+    try {
+      watcher.close();
+    } catch {
+      /* already dead */
+    }
+    logger.error(`File watcher error on ${fixturePath}: ${err.message}`);
+    logger.error("Fixture auto-reload is no longer active. Restart the server to resume watching.");
+  });
+
+  return {
+    close() {
+      if (debounceTimer) clearTimeout(debounceTimer);
+      watcher.close();
+    },
+  };
+}
diff --git a/src/ws-framing.ts b/src/ws-framing.ts
new file mode 100644
index 0000000..643c74e
--- /dev/null
+++ b/src/ws-framing.ts
@@ -0,0 +1,262 @@
+/**
+ * Minimal RFC 6455 WebSocket server implementation.
+ *
+ * Zero dependencies — uses only Node.js builtins (node:crypto, node:events).
+ * Supports text frames, ping/pong, close handshake, and client frame unmasking.
+ * Designed for a mock server — no extensions, no binary frames, no compression.
+ */
+
+import { createHash } from "node:crypto";
+import { EventEmitter } from "node:events";
+import type * as net from "node:net";
+import type * as http from "node:http";
+
+const WS_GUID = "258EAFA5-E914-47DA-95CA-5AB5DC799C07";
+
+// Opcodes
+const OP_CONTINUATION = 0x0;
+const OP_TEXT = 0x1;
+const OP_CLOSE = 0x8;
+const OP_PING = 0x9;
+const OP_PONG = 0xa;
+
+export class WebSocketConnection extends EventEmitter {
+  private socket: net.Socket;
+  private buffer: Buffer = Buffer.alloc(0);
+  private closed = false;
+
+  // For fragmented messages (continuation frames)
+  private fragments: Buffer[] = [];
+
+  constructor(socket: net.Socket) {
+    super();
+    this.socket = socket;
+
+    socket.on("data", (data: Buffer) => {
+      this.buffer = Buffer.concat([this.buffer, data]);
+      this.parseFrames();
+    });
+
+    socket.on("close", () => {
+      if (!this.closed) {
+        this.closed = true;
+        this.emit("close", 1006, "Connection lost");
+      }
+    });
+
+    socket.on("error", (err: Error) => {
+      this.emit("error", err);
+    });
+  }
+
+  send(data: string): void {
+    if (this.closed) return;
+    const payload = Buffer.from(data, "utf-8");
+    this.writeFrame(OP_TEXT, payload);
+  }
+
+  close(code = 1000, reason = ""): void {
+    if (this.closed) return;
+    this.closed = true;
+
+    const reasonBuf = Buffer.from(reason, "utf-8");
+    const payload = Buffer.alloc(2 + reasonBuf.length);
+    payload.writeUInt16BE(code, 0);
+    reasonBuf.copy(payload, 2);
+    this.writeFrame(OP_CLOSE, payload);
+
+    // Give the client a moment to receive the close frame before destroying.
+    // If writeFrame failed (socket already destroyed), this is a no-op.
+    setTimeout(() => {
+      if (!this.socket.destroyed) {
+        this.socket.destroy();
+      }
+      // Emit close event for server-initiated closes so listeners
+      // (e.g. activeConnections.delete) always fire.
+      this.emit("close", code, reason);
+    }, 100);
+  }
+
+  destroy(): void {
+    if (this.closed) return;
+    this.closed = true;
+    if (!this.socket.destroyed) {
+      this.socket.destroy();
+    }
+    this.emit("close", 1006, "Connection destroyed");
+  }
+
+  get isClosed(): boolean {
+    return this.closed;
+  }
+
+  private writeFrame(opcode: number, payload: Buffer): void {
+    if (this.socket.destroyed) return;
+
+    // Server-to-client frames are NOT masked (per RFC 6455 §5.1)
+    const length = payload.length;
+    let header: Buffer;
+
+    if (length < 126) {
+      header = Buffer.alloc(2);
+      header[0] = 0x80 | opcode; // FIN + opcode
+      header[1] = length;
+    } else if (length < 65536) {
+      header = Buffer.alloc(4);
+      header[0] = 0x80 | opcode;
+      header[1] = 126;
+      header.writeUInt16BE(length, 2);
+    } else {
+      header = Buffer.alloc(10);
+      header[0] = 0x80 | opcode;
+      header[1] = 127;
+      header.writeUInt32BE(0, 2);
+      header.writeUInt32BE(length, 6);
+    }
+
+    try {
+      this.socket.write(Buffer.concat([header, payload]));
+    } catch (err: unknown) {
+      // Expected when socket is destroyed between our check and write.
+      // Log unexpected errors so they don't vanish silently.
+      if (!this.socket.destroyed) {
+        const msg = err instanceof Error ? err.message : String(err);
+        console.error(`[LLMock] Unexpected writeFrame error: ${msg}`);
+      }
+    }
+  }
+
+  private parseFrames(): void {
+    while (this.buffer.length >= 2 && !this.closed) {
+      const byte0 = this.buffer[0];
+      const byte1 = this.buffer[1];
+
+      const fin = (byte0 & 0x80) !== 0;
+      const opcode = byte0 & 0x0f;
+      const masked = (byte1 & 0x80) !== 0;
+      let payloadLength = byte1 & 0x7f;
+      let offset = 2;
+
+      if (payloadLength === 126) {
+        if (this.buffer.length < 4) return; // need more data
+        payloadLength = this.buffer.readUInt16BE(2);
+        offset = 4;
+      } else if (payloadLength === 127) {
+        if (this.buffer.length < 10) return;
+        // Read lower 32 bits (upper 32 should be 0 for reasonable payloads)
+        payloadLength = this.buffer.readUInt32BE(6) + this.buffer.readUInt32BE(2) * 0x100000000;
+        offset = 10;
+      }
+
+      const maskSize = masked ? 4 : 0;
+      const totalFrameSize = offset + maskSize + payloadLength;
+
+      if (this.buffer.length < totalFrameSize) return; // need more data
+
+      let maskKey: Buffer | null = null;
+      if (masked) {
+        maskKey = this.buffer.subarray(offset, offset + 4);
+        offset += 4;
+      }
+
+      let payload = this.buffer.subarray(offset, offset + payloadLength);
+
+      // Unmask client payload
+      if (maskKey) {
+        payload = Buffer.from(payload); // copy before mutating
+        for (let i = 0; i < payload.length; i++) {
+          payload[i] ^= maskKey[i % 4];
+        }
+      }
+
+      // Consume the frame from the buffer
+      this.buffer = this.buffer.subarray(totalFrameSize);
+
+      this.handleFrame(fin, opcode, payload);
+    }
+  }
+
+  private handleFrame(fin: boolean, opcode: number, payload: Buffer): void {
+    // Control frames (opcode >= 0x8) must not be fragmented
+    if (opcode === OP_PING) {
+      this.writeFrame(OP_PONG, payload);
+      return;
+    }
+
+    if (opcode === OP_PONG) {
+      // Ignore unsolicited pongs
+      return;
+    }
+
+    if (opcode === OP_CLOSE) {
+      const code = payload.length >= 2 ? payload.readUInt16BE(0) : 1005;
+      const reason = payload.length > 2 ? payload.subarray(2).toString("utf-8") : "";
+
+      if (!this.closed) {
+        this.closed = true;
+        // Echo close frame back
+        this.writeFrame(OP_CLOSE, payload);
+        this.socket.end();
+        this.emit("close", code, reason);
+      }
+      // If already closed (server-initiated or duplicate), ignore — the
+      // close event was already emitted by close() or the first OP_CLOSE.
+      return;
+    }
+
+    // Text or continuation frames
+    if (opcode === OP_TEXT || opcode === OP_CONTINUATION) {
+      this.fragments.push(payload);
+
+      if (fin) {
+        const message = Buffer.concat(this.fragments).toString("utf-8");
+        this.fragments = [];
+        this.emit("message", message);
+      }
+      // If !fin, wait for more continuation frames
+      return;
+    }
+
+    // Binary or unknown — just ignore for a mock server
+  }
+}
+
+export function computeAcceptKey(wsKey: string): string {
+  return createHash("sha1")
+    .update(wsKey + WS_GUID)
+    .digest("base64");
+}
+
+export function upgradeToWebSocket(
+  req: http.IncomingMessage,
+  socket: net.Socket,
+): WebSocketConnection {
+  const key = req.headers["sec-websocket-key"];
+  if (!key) {
+    socket.write("HTTP/1.1 400 Bad Request\r\n\r\n");
+    socket.destroy();
+    throw new Error("Missing Sec-WebSocket-Key header");
+  }
+
+  const acceptKey = computeAcceptKey(key);
+
+  let responseHeaders =
+    "HTTP/1.1 101 Switching Protocols\r\n" +
+    "Upgrade: websocket\r\n" +
+    "Connection: Upgrade\r\n" +
+    `Sec-WebSocket-Accept: ${acceptKey}\r\n`;
+
+  // Echo back requested subprotocol if present
+  const protocol = req.headers["sec-websocket-protocol"];
+  if (protocol) {
+    // Take the first offered protocol
+    const first = protocol.split(",")[0].trim();
+    responseHeaders += `Sec-WebSocket-Protocol: ${first}\r\n`;
+  }
+
+  responseHeaders += "\r\n";
+
+  socket.write(responseHeaders);
+
+  return new WebSocketConnection(socket);
+}
diff --git a/src/ws-gemini-live.ts b/src/ws-gemini-live.ts
new file mode 100644
index 0000000..11a9c21
--- /dev/null
+++ b/src/ws-gemini-live.ts
@@ -0,0 +1,536 @@
+/**
+ * WebSocket handler for Gemini Live BidiGenerateContent API.
+ *
+ * Accepts setup, clientContent, and toolResponse messages over WebSocket
+ * and responds with setupComplete, serverContent, toolCall, and error
+ * messages in the Gemini Live streaming format.
+ */
+
+import type { Fixture, ChatMessage, ChatCompletionRequest, ToolDefinition } from "./types.js";
+import { matchFixture } from "./router.js";
+import { isTextResponse, isToolCallResponse, isErrorResponse } from "./helpers.js";
+import { createInterruptionSignal } from "./interruption.js";
+import { delay } from "./sse-writer.js";
+import type { Journal } from "./journal.js";
+import type { Logger } from "./logger.js";
+import type { WebSocketConnection } from "./ws-framing.js";
+
+// ─── Gemini Live protocol types ─────────────────────────────────────────────
+
+interface GeminiLivePart {
+  text?: string;
+  functionCall?: { name: string; args: Record<string, unknown> };
+  functionResponse?: { name: string; response: unknown; id?: string };
+}
+
+interface GeminiLiveTurn {
+  role: string;
+  parts: GeminiLivePart[];
+}
+
+interface GeminiLiveFunctionDeclaration {
+  name: string;
+  description?: string;
+  parameters?: object;
+}
+
+interface GeminiLiveToolDef {
+  functionDeclarations?: GeminiLiveFunctionDeclaration[];
+}
+
+interface GeminiLiveSetup {
+  model?: string;
+  generationConfig?: Record<string, unknown>;
+  tools?: GeminiLiveToolDef[];
+}
+
+interface GeminiLiveClientContent {
+  turns: GeminiLiveTurn[];
+  turnComplete?: boolean;
+}
+
+interface GeminiLiveFunctionResponse {
+  id?: string;
+  name: string;
+  response: unknown;
+}
+
+interface GeminiLiveToolResponse {
+  functionResponses: GeminiLiveFunctionResponse[];
+}
+
+interface GeminiLiveMessage {
+  setup?: GeminiLiveSetup;
+  clientContent?: GeminiLiveClientContent;
+  toolResponse?: GeminiLiveToolResponse;
+}
+
+// ─── Session state ──────────────────────────────────────────────────────────
+
+interface SessionState {
+  setupDone: boolean;
+  model: string;
+  tools: ToolDefinition[];
+  conversationHistory: ChatMessage[];
+}
+
+// ─── Helpers ────────────────────────────────────────────────────────────────
+
+const WS_PATH = "/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent";
+
+/**
+ * Convert Gemini Live turns into ChatMessage[] for fixture matching.
+ */
+function geminiTurnsToMessages(turns: GeminiLiveTurn[]): ChatMessage[] {
+  const messages: ChatMessage[] = [];
+
+  for (const turn of turns) {
+    const role = turn.role ?? "user";
+
+    if (role === "user") {
+      const funcResponses = turn.parts.filter((p) => p.functionResponse);
+      const textParts = turn.parts.filter((p) => p.text !== undefined);
+
+      if (funcResponses.length > 0) {
+        for (let i = 0; i < funcResponses.length; i++) {
+          const part = funcResponses[i];
+          const fr = part.functionResponse!;
+          messages.push({
+            role: "tool",
+            content: typeof fr.response === "string" ? fr.response : JSON.stringify(fr.response),
+            tool_call_id: fr.id ?? `call_gemini_${fr.name}_${i}`,
+          });
+        }
+        if (textParts.length > 0) {
+          messages.push({
+            role: "user",
+            content: textParts.map((p) => p.text!).join(""),
+          });
+        }
+      } else {
+        const text = textParts.map((p) => p.text!).join("");
+        messages.push({ role: "user", content: text });
+      }
+    } else if (role === "model") {
+      const funcCalls = turn.parts.filter((p) => p.functionCall);
+      const textParts = turn.parts.filter((p) => p.text !== undefined);
+
+      if (funcCalls.length > 0) {
+        messages.push({
+          role: "assistant",
+          content: null,
+          tool_calls: funcCalls.map((p, i) => ({
+            id: `call_gemini_${p.functionCall!.name}_${i}`,
+            type: "function" as const,
+            function: {
+              name: p.functionCall!.name,
+              arguments: JSON.stringify(p.functionCall!.args),
+            },
+          })),
+        });
+      } else {
+        const text = textParts.map((p) => p.text!).join("");
+        messages.push({ role: "assistant", content: text });
+      }
+    }
+  }
+
+  return messages;
+}
+
+/**
+ * Convert toolResponse messages into ChatMessage[] for fixture matching.
+ */
+function toolResponseToMessages(toolResponse: GeminiLiveToolResponse): ChatMessage[] {
+  return toolResponse.functionResponses.map((fr, i) => ({
+    role: "tool" as const,
+    content: typeof fr.response === "string" ? fr.response : JSON.stringify(fr.response),
+    tool_call_id: fr.id ?? `call_gemini_${fr.name}_${i}`,
+  }));
+}
+
+/**
+ * Convert Gemini tool definitions to ChatCompletion ToolDefinition[].
+ */
+function convertTools(geminiTools?: GeminiLiveToolDef[]): ToolDefinition[] {
+  if (!geminiTools || geminiTools.length === 0) return [];
+  const decls = geminiTools.flatMap((t) => t.functionDeclarations ?? []);
+  return decls.map((d) => ({
+    type: "function" as const,
+    function: {
+      name: d.name,
+      description: d.description,
+      parameters: d.parameters,
+    },
+  }));
+}
+
+// ─── Main handler ───────────────────────────────────────────────────────────
+
+export function handleWebSocketGeminiLive(
+  ws: WebSocketConnection,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: {
+    latency: number;
+    chunkSize: number;
+    model: string;
+    logger: Logger;
+    strict?: boolean;
+    requestTransform?: (req: ChatCompletionRequest) => ChatCompletionRequest;
+  },
+): void {
+  const { logger } = defaults;
+  const session: SessionState = {
+    setupDone: false,
+    model: defaults.model,
+    tools: [],
+    conversationHistory: [],
+  };
+
+  let pending = Promise.resolve();
+  ws.on("message", (raw: string) => {
+    pending = pending.then(() =>
+      processMessage(raw, ws, fixtures, journal, defaults, session).catch((err: unknown) => {
+        const msg = err instanceof Error ? err.message : "Internal error";
+        logger.error(`WebSocket Gemini Live error: ${msg}`);
+        try {
+          ws.send(
+            JSON.stringify({
+              error: { code: 500, message: msg, status: "INTERNAL" },
+            }),
+          );
+        } catch {
+          // Connection already gone — original error already logged above
+        }
+      }),
+    );
+  });
+}
+
+async function processMessage(
+  raw: string,
+  ws: WebSocketConnection,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: {
+    latency: number;
+    chunkSize: number;
+    model: string;
+    logger: Logger;
+    strict?: boolean;
+    requestTransform?: (req: ChatCompletionRequest) => ChatCompletionRequest;
+  },
+  session: SessionState,
+): Promise<void> {
+  let parsed: GeminiLiveMessage;
+  try {
+    parsed = JSON.parse(raw) as GeminiLiveMessage;
+  } catch {
+    ws.send(
+      JSON.stringify({
+        error: { code: 400, message: "Malformed JSON", status: "INVALID_ARGUMENT" },
+      }),
+    );
+    return;
+  }
+
+  // Handle setup message
+  if (parsed.setup) {
+    session.setupDone = true;
+    session.model = parsed.setup.model ?? defaults.model;
+    session.tools = convertTools(parsed.setup.tools);
+    ws.send(JSON.stringify({ setupComplete: {} }));
+    return;
+  }
+
+  // Reject messages before setup
+  if (!session.setupDone) {
+    ws.send(
+      JSON.stringify({
+        error: { code: 400, message: "Setup required", status: "FAILED_PRECONDITION" },
+      }),
+    );
+    return;
+  }
+
+  // Build messages from this interaction
+  let newMessages: ChatMessage[];
+
+  if (parsed.clientContent) {
+    if (!parsed.clientContent.turns || !Array.isArray(parsed.clientContent.turns)) {
+      ws.send(
+        JSON.stringify({
+          error: {
+            code: 400,
+            message: "Missing 'turns' in clientContent",
+            status: "INVALID_ARGUMENT",
+          },
+        }),
+      );
+      return;
+    }
+    newMessages = geminiTurnsToMessages(parsed.clientContent.turns);
+  } else if (parsed.toolResponse) {
+    if (
+      !parsed.toolResponse.functionResponses ||
+      !Array.isArray(parsed.toolResponse.functionResponses)
+    ) {
+      ws.send(
+        JSON.stringify({
+          error: {
+            code: 400,
+            message: "Missing 'functionResponses' in toolResponse",
+            status: "INVALID_ARGUMENT",
+          },
+        }),
+      );
+      return;
+    }
+    newMessages = toolResponseToMessages(parsed.toolResponse);
+  } else {
+    ws.send(
+      JSON.stringify({
+        error: {
+          code: 400,
+          message: "Expected clientContent or toolResponse",
+          status: "INVALID_ARGUMENT",
+        },
+      }),
+    );
+    return;
+  }
+
+  // Build completion request for fixture matching (include new messages speculatively)
+  const completionReq: ChatCompletionRequest = {
+    model: session.model,
+    messages: [...session.conversationHistory, ...newMessages],
+    stream: true,
+    tools: session.tools.length > 0 ? session.tools : undefined,
+  };
+
+  const fixture = matchFixture(
+    fixtures,
+    completionReq,
+    journal.fixtureMatchCounts,
+    defaults.requestTransform,
+  );
+  const path = WS_PATH;
+
+  if (fixture) {
+    journal.incrementFixtureMatchCount(fixture, fixtures);
+  }
+
+  if (!fixture) {
+    if (defaults.strict) {
+      defaults.logger.warn(`STRICT: No fixture matched for WebSocket message`);
+      ws.close(1008, "Strict mode: no fixture matched");
+      return;
+    }
+    journal.add({
+      method: "WS",
+      path,
+      headers: {},
+      body: completionReq,
+      response: { status: 404, fixture: null },
+    });
+    ws.send(
+      JSON.stringify({
+        error: { code: 404, message: "No fixture matched", status: "NOT_FOUND" },
+      }),
+    );
+    return;
+  }
+
+  // Commit messages to conversation history only after successful fixture match
+  session.conversationHistory.push(...newMessages);
+
+  const response = fixture.response;
+  const latency = fixture.latency ?? defaults.latency;
+  const chunkSize = Math.max(1, fixture.chunkSize ?? defaults.chunkSize);
+
+  // Error response
+  if (isErrorResponse(response)) {
+    const status = response.status ?? 500;
+    journal.add({
+      method: "WS",
+      path,
+      headers: {},
+      body: completionReq,
+      response: { status, fixture },
+    });
+    ws.send(
+      JSON.stringify({
+        error: { code: status, message: response.error.message, status: "ERROR" },
+      }),
+    );
+    return;
+  }
+
+  // Text response — stream chunks with serverContent
+  if (isTextResponse(response)) {
+    const journalEntry = journal.add({
+      method: "WS",
+      path,
+      headers: {},
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+
+    const content = response.content;
+
+    if (content.length === 0) {
+      if (ws.isClosed) return;
+      ws.send(
+        JSON.stringify({
+          serverContent: {
+            modelTurn: { parts: [{ text: "" }] },
+            turnComplete: true,
+          },
+        }),
+      );
+      return;
+    }
+
+    // Chunk the content
+    const chunks: string[] = [];
+    for (let i = 0; i < content.length; i += chunkSize) {
+      chunks.push(content.slice(i, i + chunkSize));
+    }
+
+    const interruption = createInterruptionSignal(fixture);
+    let interrupted = false;
+
+    for (let i = 0; i < chunks.length; i++) {
+      if (ws.isClosed) break;
+      if (latency > 0) await delay(latency, interruption?.signal);
+      if (interruption?.signal.aborted) {
+        interrupted = true;
+        break;
+      }
+      if (ws.isClosed) break;
+
+      const isLast = i === chunks.length - 1;
+      ws.send(
+        JSON.stringify({
+          serverContent: {
+            modelTurn: { parts: [{ text: chunks[i] }] },
+            turnComplete: isLast,
+          },
+        }),
+      );
+      interruption?.tick();
+      if (interruption?.signal.aborted) {
+        interrupted = true;
+        break;
+      }
+    }
+
+    if (interrupted) {
+      ws.destroy();
+      journalEntry.response.interrupted = true;
+      journalEntry.response.interruptReason = interruption?.reason();
+      interruption?.cleanup();
+      return;
+    }
+
+    interruption?.cleanup();
+
+    // Add assistant response to conversation history
+    session.conversationHistory.push({ role: "assistant", content });
+    return;
+  }
+
+  // Tool call response
+  if (isToolCallResponse(response)) {
+    const journalEntry = journal.add({
+      method: "WS",
+      path,
+      headers: {},
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+
+    const interruption = createInterruptionSignal(fixture);
+
+    if (ws.isClosed) {
+      interruption?.cleanup();
+      return;
+    }
+    if (latency > 0) await delay(latency, interruption?.signal);
+    if (interruption?.signal.aborted) {
+      ws.destroy();
+      journalEntry.response.interrupted = true;
+      journalEntry.response.interruptReason = interruption?.reason();
+      interruption?.cleanup();
+      return;
+    }
+    if (ws.isClosed) {
+      interruption?.cleanup();
+      return;
+    }
+
+    const functionCalls = response.toolCalls.map((tc, i) => {
+      let argsObj: Record<string, unknown>;
+      try {
+        argsObj = JSON.parse(tc.arguments || "{}") as Record<string, unknown>;
+      } catch {
+        defaults.logger.warn(
+          `Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`,
+        );
+        argsObj = {};
+      }
+      return {
+        name: tc.name,
+        args: argsObj,
+        id: tc.id ?? `call_gemini_${tc.name}_${i}`,
+      };
+    });
+
+    ws.send(JSON.stringify({ toolCall: { functionCalls } }));
+    interruption?.tick();
+
+    if (interruption?.signal.aborted) {
+      ws.destroy();
+      journalEntry.response.interrupted = true;
+      journalEntry.response.interruptReason = interruption?.reason();
+      interruption?.cleanup();
+      return;
+    }
+
+    interruption?.cleanup();
+
+    // Add assistant tool_calls to conversation history
+    session.conversationHistory.push({
+      role: "assistant",
+      content: null,
+      tool_calls: response.toolCalls.map((tc, i) => ({
+        id: tc.id ?? `call_gemini_${tc.name}_${i}`,
+        type: "function" as const,
+        function: {
+          name: tc.name,
+          arguments: tc.arguments,
+        },
+      })),
+    });
+    return;
+  }
+
+  // Unknown response type
+  journal.add({
+    method: "WS",
+    path,
+    headers: {},
+    body: completionReq,
+    response: { status: 500, fixture },
+  });
+  ws.send(
+    JSON.stringify({
+      error: {
+        code: 500,
+        message: "Fixture response did not match any known type",
+        status: "INTERNAL",
+      },
+    }),
+  );
+}
diff --git a/src/ws-realtime.ts b/src/ws-realtime.ts
new file mode 100644
index 0000000..9deb16e
--- /dev/null
+++ b/src/ws-realtime.ts
@@ -0,0 +1,647 @@
+/**
+ * WebSocket handler for OpenAI Realtime API.
+ *
+ * Accepts Realtime API messages (session.update, conversation.item.create,
+ * response.create) over WebSocket and sends back Realtime API events as
+ * individual WebSocket text frames.
+ */
+
+import type { ChatCompletionRequest, ChatMessage, Fixture } from "./types.js";
+import { matchFixture } from "./router.js";
+import {
+  generateId,
+  generateToolCallId,
+  isTextResponse,
+  isToolCallResponse,
+  isErrorResponse,
+} from "./helpers.js";
+import { createInterruptionSignal } from "./interruption.js";
+import { delay } from "./sse-writer.js";
+import type { Journal } from "./journal.js";
+import type { Logger } from "./logger.js";
+import type { WebSocketConnection } from "./ws-framing.js";
+
+// ─── Realtime protocol types ────────────────────────────────────────────────
+
+interface RealtimeItem {
+  type: "message" | "function_call" | "function_call_output";
+  id?: string;
+  role?: "user" | "assistant" | "system";
+  content?: Array<{ type: string; text?: string }>;
+  name?: string;
+  call_id?: string;
+  arguments?: string;
+  output?: string;
+}
+
+interface SessionConfig {
+  model: string;
+  modalities: string[];
+  instructions: string;
+  tools: unknown[];
+  voice: string | null;
+  input_audio_format: string | null;
+  output_audio_format: string | null;
+  turn_detection: unknown | null;
+  temperature: number;
+}
+
+interface RealtimeMessage {
+  type: string;
+  event_id?: string;
+  session?: Partial<SessionConfig>;
+  item?: RealtimeItem;
+  response?: {
+    modalities?: string[];
+    instructions?: string;
+    [key: string]: unknown;
+  };
+}
+
+// ─── Conversion helpers ─────────────────────────────────────────────────────
+
+export function realtimeItemsToMessages(
+  items: RealtimeItem[],
+  instructions?: string,
+  logger?: Logger,
+): ChatMessage[] {
+  const messages: ChatMessage[] = [];
+
+  if (instructions) {
+    messages.push({ role: "system", content: instructions });
+  }
+
+  for (const item of items) {
+    if (item.type === "message") {
+      const text = item.content?.[0]?.text ?? "";
+      const role =
+        item.role === "assistant" ? "assistant" : item.role === "system" ? "system" : "user";
+      messages.push({ role, content: text });
+    } else if (item.type === "function_call") {
+      if (!item.name) {
+        logger?.warn("Realtime function_call item missing 'name'");
+      }
+      messages.push({
+        role: "assistant",
+        content: null,
+        tool_calls: [
+          {
+            id: item.call_id ?? generateToolCallId(),
+            type: "function",
+            function: {
+              name: item.name ?? "",
+              arguments: item.arguments ?? "",
+            },
+          },
+        ],
+      });
+    } else if (item.type === "function_call_output") {
+      if (!item.output) {
+        logger?.warn("Realtime function_call_output item missing 'output'");
+      }
+      messages.push({
+        role: "tool",
+        content: item.output ?? "",
+        tool_call_id: item.call_id,
+      });
+    }
+  }
+
+  return messages;
+}
+
+// ─── Event builders ─────────────────────────────────────────────────────────
+
+function evt(type: string, extra: Record<string, unknown> = {}): string {
+  return JSON.stringify({ type, event_id: generateId("evt"), ...extra });
+}
+
+function buildErrorRealtimeEvent(
+  message: string,
+  type = "invalid_request_error",
+  code?: string,
+): string {
+  return evt("error", { error: { message, type, code } });
+}
+
+// ─── Main handler ───────────────────────────────────────────────────────────
+
+export function handleWebSocketRealtime(
+  ws: WebSocketConnection,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: {
+    latency: number;
+    chunkSize: number;
+    model: string;
+    logger: Logger;
+    strict?: boolean;
+    requestTransform?: (req: ChatCompletionRequest) => ChatCompletionRequest;
+  },
+): void {
+  const { logger } = defaults;
+  const sessionId = generateId("sess");
+
+  const session: SessionConfig = {
+    model: defaults.model,
+    modalities: ["text"],
+    instructions: "",
+    tools: [],
+    voice: null,
+    input_audio_format: null,
+    output_audio_format: null,
+    turn_detection: null,
+    temperature: 0.8,
+  };
+
+  const conversationItems: RealtimeItem[] = [];
+
+  // Send session.created immediately on connect
+  ws.send(evt("session.created", { session: { id: sessionId, ...session } }));
+
+  // Serialize message processing to prevent event interleaving
+  let pending = Promise.resolve();
+  ws.on("message", (raw: string) => {
+    pending = pending.then(() =>
+      processMessage(raw, ws, fixtures, journal, defaults, session, conversationItems).catch(
+        (err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          logger.error(`WebSocket realtime error: ${msg}`);
+          try {
+            ws.send(buildErrorRealtimeEvent(msg, "server_error"));
+          } catch {
+            // Connection already gone — original error already logged above
+          }
+        },
+      ),
+    );
+  });
+}
+
+async function processMessage(
+  raw: string,
+  ws: WebSocketConnection,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: {
+    latency: number;
+    chunkSize: number;
+    model: string;
+    logger: Logger;
+    strict?: boolean;
+    requestTransform?: (req: ChatCompletionRequest) => ChatCompletionRequest;
+  },
+  session: SessionConfig,
+  conversationItems: RealtimeItem[],
+): Promise<void> {
+  let parsed: RealtimeMessage;
+  try {
+    parsed = JSON.parse(raw) as RealtimeMessage;
+  } catch {
+    ws.send(buildErrorRealtimeEvent("Malformed JSON", "invalid_request_error", "invalid_json"));
+    return;
+  }
+
+  const msgType = parsed.type;
+
+  // ── session.update ────────────────────────────────────────────────────
+  if (msgType === "session.update") {
+    if (parsed.session) {
+      if (parsed.session.instructions !== undefined) {
+        session.instructions = parsed.session.instructions;
+      }
+      if (parsed.session.tools !== undefined) {
+        session.tools = parsed.session.tools;
+      }
+      if (parsed.session.modalities !== undefined) {
+        session.modalities = parsed.session.modalities;
+      }
+      if (parsed.session.model !== undefined) {
+        session.model = parsed.session.model;
+      }
+      if (parsed.session.temperature !== undefined) {
+        session.temperature = parsed.session.temperature;
+      }
+    }
+    ws.send(evt("session.updated", { session: { ...session } }));
+    return;
+  }
+
+  // ── conversation.item.create ──────────────────────────────────────────
+  if (msgType === "conversation.item.create") {
+    if (!parsed.item) {
+      ws.send(
+        buildErrorRealtimeEvent(
+          "Missing 'item' in conversation.item.create",
+          "invalid_request_error",
+        ),
+      );
+      return;
+    }
+    const item = parsed.item;
+    if (!item.id) {
+      item.id = generateId("item");
+    }
+    conversationItems.push(item);
+    ws.send(evt("conversation.item.created", { item }));
+    return;
+  }
+
+  // ── response.create ───────────────────────────────────────────────────
+  if (msgType === "response.create") {
+    await handleResponseCreate(ws, fixtures, journal, defaults, session, conversationItems);
+    return;
+  }
+
+  // Unknown message type — ignore silently (matches OpenAI behavior)
+}
+
+async function handleResponseCreate(
+  ws: WebSocketConnection,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: {
+    latency: number;
+    chunkSize: number;
+    model: string;
+    logger: Logger;
+    strict?: boolean;
+    requestTransform?: (req: ChatCompletionRequest) => ChatCompletionRequest;
+  },
+  session: SessionConfig,
+  conversationItems: RealtimeItem[],
+): Promise<void> {
+  const instructions = session.instructions || undefined;
+  const messages = realtimeItemsToMessages(conversationItems, instructions, defaults.logger);
+
+  const completionReq: ChatCompletionRequest = {
+    model: session.model,
+    messages,
+  };
+
+  const fixture = matchFixture(
+    fixtures,
+    completionReq,
+    journal.fixtureMatchCounts,
+    defaults.requestTransform,
+  );
+  const responseId = generateId("resp");
+
+  if (fixture) {
+    journal.incrementFixtureMatchCount(fixture, fixtures);
+  }
+
+  if (!fixture) {
+    if (defaults.strict) {
+      defaults.logger.warn(`STRICT: No fixture matched for WebSocket message`);
+      ws.close(1008, "Strict mode: no fixture matched");
+      return;
+    }
+    journal.add({
+      method: "WS",
+      path: "/v1/realtime",
+      headers: {},
+      body: completionReq,
+      response: { status: 404, fixture: null },
+    });
+    // Send response.created with failed status then response.done with error
+    ws.send(
+      evt("response.created", {
+        response: { id: responseId, status: "failed", output: [] },
+      }),
+    );
+    ws.send(
+      evt("response.done", {
+        response: {
+          id: responseId,
+          status: "failed",
+          output: [],
+          status_details: {
+            type: "error",
+            error: {
+              message: "No fixture matched",
+              type: "invalid_request_error",
+              code: "no_fixture_match",
+            },
+          },
+        },
+      }),
+    );
+    return;
+  }
+
+  const response = fixture.response;
+  const latency = fixture.latency ?? defaults.latency;
+  const chunkSize = Math.max(1, fixture.chunkSize ?? defaults.chunkSize);
+
+  // ── Error fixture ───────────────────────────────────────────────────
+  if (isErrorResponse(response)) {
+    const status = response.status ?? 500;
+    journal.add({
+      method: "WS",
+      path: "/v1/realtime",
+      headers: {},
+      body: completionReq,
+      response: { status, fixture },
+    });
+    ws.send(
+      evt("response.created", {
+        response: { id: responseId, status: "failed", output: [] },
+      }),
+    );
+    ws.send(
+      evt("response.done", {
+        response: {
+          id: responseId,
+          status: "failed",
+          output: [],
+          status_details: {
+            type: "error",
+            error: {
+              message: response.error.message,
+              type: response.error.type,
+              code: response.error.code,
+            },
+          },
+        },
+      }),
+    );
+    return;
+  }
+
+  // ── Text response ───────────────────────────────────────────────────
+  if (isTextResponse(response)) {
+    const journalEntry = journal.add({
+      method: "WS",
+      path: "/v1/realtime",
+      headers: {},
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+
+    const itemId = generateId("item");
+    const contentIndex = 0;
+    const outputIndex = 0;
+
+    const outputItem = {
+      id: itemId,
+      type: "message",
+      role: "assistant",
+      content: [{ type: "text", text: response.content }],
+    };
+
+    // response.created
+    ws.send(
+      evt("response.created", {
+        response: { id: responseId, status: "in_progress", output: [] },
+      }),
+    );
+
+    // response.output_item.added
+    ws.send(
+      evt("response.output_item.added", {
+        response_id: responseId,
+        output_index: outputIndex,
+        item: { id: itemId, type: "message", role: "assistant", content: [] },
+      }),
+    );
+
+    // response.content_part.added
+    ws.send(
+      evt("response.content_part.added", {
+        response_id: responseId,
+        item_id: itemId,
+        output_index: outputIndex,
+        content_index: contentIndex,
+        part: { type: "text", text: "" },
+      }),
+    );
+
+    // response.text.delta (chunked)
+    const content = response.content;
+    const interruption = createInterruptionSignal(fixture);
+    let interrupted = false;
+
+    for (let i = 0; i < content.length; i += chunkSize) {
+      if (ws.isClosed) break;
+      if (latency > 0) await delay(latency, interruption?.signal);
+      if (interruption?.signal.aborted) {
+        interrupted = true;
+        break;
+      }
+      if (ws.isClosed) break;
+      const chunk = content.slice(i, i + chunkSize);
+      ws.send(
+        evt("response.text.delta", {
+          response_id: responseId,
+          item_id: itemId,
+          output_index: outputIndex,
+          content_index: contentIndex,
+          delta: chunk,
+        }),
+      );
+      interruption?.tick();
+      if (interruption?.signal.aborted) {
+        interrupted = true;
+        break;
+      }
+    }
+
+    if (interrupted) {
+      ws.destroy();
+      journalEntry.response.interrupted = true;
+      journalEntry.response.interruptReason = interruption?.reason();
+      interruption?.cleanup();
+      return;
+    }
+
+    interruption?.cleanup();
+
+    if (ws.isClosed) return;
+
+    // response.text.done
+    ws.send(
+      evt("response.text.done", {
+        response_id: responseId,
+        item_id: itemId,
+        output_index: outputIndex,
+        content_index: contentIndex,
+        text: content,
+      }),
+    );
+
+    // response.content_part.done
+    ws.send(
+      evt("response.content_part.done", {
+        response_id: responseId,
+        item_id: itemId,
+        output_index: outputIndex,
+        content_index: contentIndex,
+        part: { type: "text", text: content },
+      }),
+    );
+
+    // response.output_item.done
+    ws.send(
+      evt("response.output_item.done", {
+        response_id: responseId,
+        output_index: outputIndex,
+        item: outputItem,
+      }),
+    );
+
+    // response.done
+    ws.send(
+      evt("response.done", {
+        response: { id: responseId, status: "completed", output: [outputItem] },
+      }),
+    );
+
+    // Accumulate assistant response into conversation for multi-turn
+    conversationItems.push({
+      type: "message",
+      id: itemId,
+      role: "assistant",
+      content: [{ type: "text", text: content }],
+    });
+    return;
+  }
+
+  // ── Tool call response ──────────────────────────────────────────────
+  if (isToolCallResponse(response)) {
+    const journalEntry = journal.add({
+      method: "WS",
+      path: "/v1/realtime",
+      headers: {},
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+
+    // response.created
+    ws.send(
+      evt("response.created", {
+        response: { id: responseId, status: "in_progress", output: [] },
+      }),
+    );
+
+    const outputItems: unknown[] = [];
+    const interruption = createInterruptionSignal(fixture);
+    let interrupted = false;
+
+    for (let tcIdx = 0; tcIdx < response.toolCalls.length; tcIdx++) {
+      const tc = response.toolCalls[tcIdx];
+      const callId = tc.id ?? generateToolCallId();
+      const itemId = generateId("item");
+
+      const outputItem = {
+        id: itemId,
+        type: "function_call",
+        call_id: callId,
+        name: tc.name,
+        arguments: tc.arguments,
+      };
+
+      // response.output_item.added
+      ws.send(
+        evt("response.output_item.added", {
+          response_id: responseId,
+          output_index: tcIdx,
+          item: {
+            id: itemId,
+            type: "function_call",
+            call_id: callId,
+            name: tc.name,
+            arguments: "",
+          },
+        }),
+      );
+
+      // response.function_call_arguments.delta (chunked)
+      const args = tc.arguments;
+      for (let i = 0; i < args.length; i += chunkSize) {
+        if (ws.isClosed) break;
+        if (latency > 0) await delay(latency, interruption?.signal);
+        if (interruption?.signal.aborted) {
+          interrupted = true;
+          break;
+        }
+        if (ws.isClosed) break;
+        const chunk = args.slice(i, i + chunkSize);
+        ws.send(
+          evt("response.function_call_arguments.delta", {
+            response_id: responseId,
+            item_id: itemId,
+            output_index: tcIdx,
+            call_id: callId,
+            delta: chunk,
+          }),
+        );
+        interruption?.tick();
+        if (interruption?.signal.aborted) {
+          interrupted = true;
+          break;
+        }
+      }
+
+      if (interrupted) break;
+
+      // response.function_call_arguments.done
+      ws.send(
+        evt("response.function_call_arguments.done", {
+          response_id: responseId,
+          item_id: itemId,
+          output_index: tcIdx,
+          call_id: callId,
+          arguments: args,
+        }),
+      );
+
+      // response.output_item.done
+      ws.send(
+        evt("response.output_item.done", {
+          response_id: responseId,
+          output_index: tcIdx,
+          item: outputItem,
+        }),
+      );
+
+      outputItems.push(outputItem);
+    }
+
+    if (interrupted) {
+      ws.destroy();
+      journalEntry.response.interrupted = true;
+      journalEntry.response.interruptReason = interruption?.reason();
+      interruption?.cleanup();
+      return;
+    }
+
+    interruption?.cleanup();
+
+    if (ws.isClosed) return;
+
+    // response.done
+    ws.send(
+      evt("response.done", {
+        response: { id: responseId, status: "completed", output: outputItems },
+      }),
+    );
+
+    // Accumulate assistant tool calls into conversation for multi-turn
+    // Reuse outputItems (which already have the correct call_id) to avoid generating divergent IDs
+    for (const item of outputItems) {
+      conversationItems.push(item as RealtimeItem);
+    }
+    return;
+  }
+
+  // Unknown response type
+  journal.add({
+    method: "WS",
+    path: "/v1/realtime",
+    headers: {},
+    body: completionReq,
+    response: { status: 500, fixture },
+  });
+  ws.send(buildErrorRealtimeEvent("Fixture response did not match any known type", "server_error"));
+}
diff --git a/src/ws-responses.ts b/src/ws-responses.ts
new file mode 100644
index 0000000..1088297
--- /dev/null
+++ b/src/ws-responses.ts
@@ -0,0 +1,293 @@
+/**
+ * WebSocket handler for OpenAI Responses API.
+ *
+ * Accepts `{ type: "response.create", model: "...", input: [...] }` messages over
+ * WebSocket and sends back the same Responses API SSE events as the HTTP
+ * handler, but as individual WebSocket text frames.
+ */
+
+import type { ChatCompletionRequest, Fixture } from "./types.js";
+import { matchFixture } from "./router.js";
+import {
+  responsesToCompletionRequest,
+  buildTextStreamEvents,
+  buildToolCallStreamEvents,
+  type ResponsesSSEEvent,
+} from "./responses.js";
+import { isTextResponse, isToolCallResponse, isErrorResponse } from "./helpers.js";
+import { createInterruptionSignal } from "./interruption.js";
+import { delay } from "./sse-writer.js";
+import type { Journal } from "./journal.js";
+import type { Logger } from "./logger.js";
+import type { WebSocketConnection } from "./ws-framing.js";
+
+interface ResponseCreateMessage {
+  type: "response.create";
+  model?: string;
+  input?: unknown[];
+  instructions?: string;
+  tools?: unknown[];
+  tool_choice?: string | object;
+  stream?: boolean;
+  temperature?: number;
+  max_output_tokens?: number;
+  [key: string]: unknown;
+}
+
+function isResponseCreateMessage(msg: unknown): msg is ResponseCreateMessage {
+  return (
+    typeof msg === "object" &&
+    msg !== null &&
+    (msg as ResponseCreateMessage).type === "response.create"
+  );
+}
+
+function buildErrorEvent(
+  message: string,
+  type = "invalid_request_error",
+  code?: string,
+): ResponsesSSEEvent {
+  return {
+    type: "error",
+    error: { message, type, code },
+  };
+}
+
+export function handleWebSocketResponses(
+  ws: WebSocketConnection,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: {
+    latency: number;
+    chunkSize: number;
+    model: string;
+    logger: Logger;
+    strict?: boolean;
+    requestTransform?: (req: ChatCompletionRequest) => ChatCompletionRequest;
+  },
+): void {
+  const { logger } = defaults;
+  // Serialize message processing to prevent event interleaving
+  let pending = Promise.resolve();
+  ws.on("message", (raw: string) => {
+    pending = pending.then(() =>
+      processMessage(raw, ws, fixtures, journal, defaults).catch((err: unknown) => {
+        const msg = err instanceof Error ? err.message : "Internal error";
+        logger.error(`WebSocket responses error: ${msg}`);
+        try {
+          ws.send(JSON.stringify(buildErrorEvent(msg, "server_error")));
+        } catch {
+          // Connection already gone — original error already logged above
+        }
+      }),
+    );
+  });
+}
+
+async function processMessage(
+  raw: string,
+  ws: WebSocketConnection,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: {
+    latency: number;
+    chunkSize: number;
+    model: string;
+    logger: Logger;
+    strict?: boolean;
+    requestTransform?: (req: ChatCompletionRequest) => ChatCompletionRequest;
+  },
+): Promise<void> {
+  let parsed: unknown;
+  try {
+    parsed = JSON.parse(raw);
+  } catch {
+    ws.send(
+      JSON.stringify(buildErrorEvent("Malformed JSON", "invalid_request_error", "invalid_json")),
+    );
+    return;
+  }
+
+  if (!isResponseCreateMessage(parsed)) {
+    ws.send(
+      JSON.stringify(
+        buildErrorEvent(
+          'Expected message type "response.create"',
+          "invalid_request_error",
+          "invalid_message_type",
+        ),
+      ),
+    );
+    return;
+  }
+
+  const responsesReq = {
+    model: parsed.model ?? defaults.model,
+    input: (parsed.input ?? []) as {
+      role?: string;
+      type?: string;
+      content?: string | { type: string; text?: string }[];
+      call_id?: string;
+      name?: string;
+      arguments?: string;
+      output?: string;
+      id?: string;
+    }[],
+    instructions: parsed.instructions,
+    tools: parsed.tools as
+      | {
+          type: "function";
+          name: string;
+          description?: string;
+          parameters?: object;
+          strict?: boolean;
+        }[]
+      | undefined,
+    tool_choice: parsed.tool_choice,
+    stream: parsed.stream,
+    temperature: parsed.temperature,
+    max_output_tokens: parsed.max_output_tokens,
+  };
+
+  const completionReq = responsesToCompletionRequest(responsesReq);
+  const fixture = matchFixture(
+    fixtures,
+    completionReq,
+    journal.fixtureMatchCounts,
+    defaults.requestTransform,
+  );
+
+  if (fixture) {
+    journal.incrementFixtureMatchCount(fixture, fixtures);
+  }
+
+  if (!fixture) {
+    if (defaults.strict) {
+      defaults.logger.warn(`STRICT: No fixture matched for WebSocket message`);
+      ws.close(1008, "Strict mode: no fixture matched");
+      return;
+    }
+    journal.add({
+      method: "WS",
+      path: "/v1/responses",
+      headers: {},
+      body: completionReq,
+      response: { status: 404, fixture: null },
+    });
+    ws.send(
+      JSON.stringify(
+        buildErrorEvent("No fixture matched", "invalid_request_error", "no_fixture_match"),
+      ),
+    );
+    return;
+  }
+
+  const response = fixture.response;
+  const latency = fixture.latency ?? defaults.latency;
+  const chunkSize = Math.max(1, fixture.chunkSize ?? defaults.chunkSize);
+
+  // Error response
+  if (isErrorResponse(response)) {
+    const status = response.status ?? 500;
+    journal.add({
+      method: "WS",
+      path: "/v1/responses",
+      headers: {},
+      body: completionReq,
+      response: { status, fixture },
+    });
+    ws.send(
+      JSON.stringify(
+        buildErrorEvent(response.error.message, response.error.type, response.error.code),
+      ),
+    );
+    return;
+  }
+
+  // Text response
+  if (isTextResponse(response)) {
+    const journalEntry = journal.add({
+      method: "WS",
+      path: "/v1/responses",
+      headers: {},
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    const events = buildTextStreamEvents(response.content, completionReq.model, chunkSize);
+    const interruption = createInterruptionSignal(fixture);
+    const completed = await sendEvents(
+      ws,
+      events,
+      latency,
+      interruption?.signal,
+      interruption?.tick,
+    );
+    if (!completed) {
+      ws.destroy();
+      journalEntry.response.interrupted = true;
+      journalEntry.response.interruptReason = interruption?.reason();
+    }
+    interruption?.cleanup();
+    return;
+  }
+
+  // Tool call response
+  if (isToolCallResponse(response)) {
+    const journalEntry = journal.add({
+      method: "WS",
+      path: "/v1/responses",
+      headers: {},
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    const events = buildToolCallStreamEvents(response.toolCalls, completionReq.model, chunkSize);
+    const interruption = createInterruptionSignal(fixture);
+    const completed = await sendEvents(
+      ws,
+      events,
+      latency,
+      interruption?.signal,
+      interruption?.tick,
+    );
+    if (!completed) {
+      ws.destroy();
+      journalEntry.response.interrupted = true;
+      journalEntry.response.interruptReason = interruption?.reason();
+    }
+    interruption?.cleanup();
+    return;
+  }
+
+  // Unknown response type
+  journal.add({
+    method: "WS",
+    path: "/v1/responses",
+    headers: {},
+    body: completionReq,
+    response: { status: 500, fixture },
+  });
+  ws.send(
+    JSON.stringify(
+      buildErrorEvent("Fixture response did not match any known type", "server_error"),
+    ),
+  );
+}
+
+async function sendEvents(
+  ws: WebSocketConnection,
+  events: ResponsesSSEEvent[],
+  latency: number,
+  signal?: AbortSignal,
+  onChunkSent?: () => void,
+): Promise<boolean> {
+  for (const event of events) {
+    if (ws.isClosed) return true;
+    if (latency > 0) await delay(latency, signal);
+    if (signal?.aborted) return false;
+    if (ws.isClosed) return true;
+    ws.send(JSON.stringify(event));
+    onChunkSent?.();
+    if (signal?.aborted) return false;
+  }
+  return true;
+}
diff --git a/vitest.config.drift.ts b/vitest.config.drift.ts
new file mode 100644
index 0000000..cc2f1d0
--- /dev/null
+++ b/vitest.config.drift.ts
@@ -0,0 +1,9 @@
+import { defineConfig } from "vitest/config";
+export default defineConfig({
+  test: {
+    environment: "node",
+    globals: true,
+    include: ["src/__tests__/drift/**/*.drift.ts"],
+    testTimeout: 60000,
+  },
+});