diff --git a/.changeset/config.json b/.changeset/config.json deleted file mode 100644 index e8eef44..0000000 --- a/.changeset/config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "$schema": "https://unpkg.com/@changesets/config@3.1.1/schema.json", - "changelog": "@changesets/cli/changelog", - "commit": false, - "access": "public", - "baseBranch": "main" -} diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json new file mode 100644 index 0000000..6616b66 --- /dev/null +++ b/.claude-plugin/marketplace.json @@ -0,0 +1,17 @@ +{ + "name": "copilotkit-tools", + "owner": { + "name": "CopilotKit" + }, + "plugins": [ + { + "name": "llmock", + "source": { + "source": "npm", + "package": "@copilotkit/llmock", + "version": "^1.5.0" + }, + "description": "Fixture authoring skill for @copilotkit/llmock — match fields, response types, embeddings, structured output, sequential responses, streaming physics, agent loop patterns, gotchas, and debugging" + } + ] +} diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..cd8e5ae --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,12 @@ +{ + "name": "llmock", + "version": "1.5.0", + "description": "Fixture authoring guidance for @copilotkit/llmock", + "author": { + "name": "CopilotKit" + }, + "homepage": "https://github.com/CopilotKit/llmock", + "repository": "https://github.com/CopilotKit/llmock", + "license": "MIT", + "skills": "./skills" +} diff --git a/.claude/commands/write-fixtures.md b/.claude/commands/write-fixtures.md new file mode 120000 index 0000000..3d887c6 --- /dev/null +++ b/.claude/commands/write-fixtures.md @@ -0,0 +1 @@ +../../skills/write-fixtures/SKILL.md \ No newline at end of file diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..725e4f6 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,15 @@ +node_modules +.git +src/__tests__ +docs +.worktrees +.github +coverage +*.md +dist +.claude +.claude-plugin +skills +.husky +.vscode +.idea diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..d1c2923 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,9 @@ +*.gif filter=lfs diff=lfs merge=lfs -text +*.jpg filter=lfs diff=lfs merge=lfs -text +*.jpeg filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs diff=lfs merge=lfs -text +*.mp4 filter=lfs diff=lfs merge=lfs -text +*.webm filter=lfs diff=lfs merge=lfs -text +*.svg filter=lfs diff=lfs merge=lfs -text +docs/favicon.svg !filter !diff !merge diff --git a/.github/workflows/fix-drift.yml b/.github/workflows/fix-drift.yml new file mode 100644 index 0000000..1e44b97 --- /dev/null +++ b/.github/workflows/fix-drift.yml @@ -0,0 +1,128 @@ +name: Fix Drift +on: + workflow_dispatch: + workflow_run: + workflows: ["Drift Tests"] + types: [completed] + branches: [main] + +concurrency: + group: drift-fix + cancel-in-progress: false + +jobs: + fix: + if: >- + github.event_name == 'workflow_dispatch' || + github.event.workflow_run.conclusion == 'failure' + runs-on: ubuntu-latest + timeout-minutes: 30 + permissions: + contents: write + pull-requests: write + issues: write + steps: + - uses: actions/checkout@v4 + - uses: pnpm/action-setup@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 + cache: pnpm + - run: pnpm install --frozen-lockfile + + # Step 0: Configure git identity and create fix branch + - name: Configure git + run: | + git config user.name "llmock-drift-bot" + git config user.email "drift-bot@copilotkit.ai" + git checkout -B fix/drift-$(date +%Y-%m-%d)-${{ github.run_id }} + + # Step 1: Detect drift and produce report + - name: Collect drift report + id: detect + run: | + set +e + npx tsx scripts/drift-report-collector.ts + EXIT_CODE=$? + set -e + echo "exit_code=$EXIT_CODE" >> $GITHUB_OUTPUT + if [ "$EXIT_CODE" -eq 2 ]; then + : # critical drift found, continue + elif [ "$EXIT_CODE" -ne 0 ]; then + echo "::error::Collector script crashed with exit code $EXIT_CODE" + exit $EXIT_CODE + fi + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} + + # Always upload the report as an artifact + - name: Upload drift report + if: always() + uses: actions/upload-artifact@v4 + with: + name: drift-report + path: drift-report.json + if-no-files-found: warn + retention-days: 30 + + # Step 2: Exit if no critical drift + - name: Check for critical diffs + id: check + env: + DETECT_EXIT_CODE: ${{ steps.detect.outputs.exit_code }} + run: | + if [ "$DETECT_EXIT_CODE" = "2" ]; then + echo "skip=false" >> $GITHUB_OUTPUT + echo "Critical drift detected" + else + echo "skip=true" >> $GITHUB_OUTPUT + echo "No critical drift detected (exit code: $DETECT_EXIT_CODE) — skipping fix" + fi + + # Step 3: Invoke Claude Code to fix + - name: Auto-fix drift + if: steps.check.outputs.skip != 'true' + run: npx tsx scripts/fix-drift.ts + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} + + # Upload Claude Code output for debugging + - name: Upload Claude Code logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: claude-code-output + path: claude-code-output.log + if-no-files-found: warn + retention-days: 30 + + # Step 4: Verify fix independently + - name: Verify conformance + if: steps.check.outputs.skip != 'true' + run: pnpm test + + - name: Verify drift resolved + if: steps.check.outputs.skip != 'true' + run: pnpm test:drift + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} + + # Step 5: Create PR on success + - name: Create PR + if: success() && steps.check.outputs.skip != 'true' + run: npx tsx scripts/fix-drift.ts --create-pr + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + # Step 6: Open issue on failure + - name: Create issue on failure + if: failure() && steps.check.outputs.skip != 'true' + run: npx tsx scripts/fix-drift.ts --create-issue + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/publish-docker.yml b/.github/workflows/publish-docker.yml new file mode 100644 index 0000000..3b40eab --- /dev/null +++ b/.github/workflows/publish-docker.yml @@ -0,0 +1,58 @@ +name: Publish Docker Image + +on: + push: + tags: + - "v*" + pull_request: + branches: + - main + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + build-and-push: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to GHCR + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=semver,pattern={{version}} + type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/') }} + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: . + platforms: linux/amd64,linux/arm64 + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/.github/workflows/publish-release.yml b/.github/workflows/publish-release.yml index 4e88527..19f34f9 100644 --- a/.github/workflows/publish-release.yml +++ b/.github/workflows/publish-release.yml @@ -2,6 +2,7 @@ name: Release on: push: branches: [main] + workflow_dispatch: jobs: release: runs-on: ubuntu-latest @@ -14,11 +15,29 @@ jobs: cache: pnpm registry-url: "https://registry.npmjs.org" - run: pnpm install --frozen-lockfile - - uses: changesets/action@v1 - with: - publish: pnpm release - version: pnpm changeset version + + - name: Check if version is already published + id: check + run: | + PKG_NAME=$(node -p "require('./package.json').name") + PKG_VERSION=$(node -p "require('./package.json').version") + if npm view "${PKG_NAME}@${PKG_VERSION}" version 2>/dev/null; then + echo "published=true" >> "$GITHUB_OUTPUT" + else + echo "published=false" >> "$GITHUB_OUTPUT" + fi + + - name: Build and publish + if: steps.check.outputs.published == 'false' + run: pnpm release env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} NPM_TOKEN: ${{ secrets.NPM_TOKEN }} NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + + - name: Create GitHub Release + if: steps.check.outputs.published == 'false' + run: | + PKG_VERSION=$(node -p "require('./package.json').version") + gh release create "v${PKG_VERSION}" --generate-notes --title "v${PKG_VERSION}" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/test-drift.yml b/.github/workflows/test-drift.yml new file mode 100644 index 0000000..b76d6d1 --- /dev/null +++ b/.github/workflows/test-drift.yml @@ -0,0 +1,49 @@ +name: Drift Tests +on: + schedule: + - cron: "0 6 * * *" # Daily 6am UTC + workflow_dispatch: # Manual trigger +jobs: + drift: + runs-on: ubuntu-latest + timeout-minutes: 15 + steps: + - uses: actions/checkout@v4 + - uses: pnpm/action-setup@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 + cache: pnpm + - run: pnpm install --frozen-lockfile + + - name: Run drift tests + id: drift + run: | + set +e + npx tsx scripts/drift-report-collector.ts + EXIT_CODE=$? + set -e + echo "exit_code=$EXIT_CODE" >> $GITHUB_OUTPUT + if [ "$EXIT_CODE" -eq 2 ]; then + : # critical drift found, continue + elif [ "$EXIT_CODE" -ne 0 ]; then + echo "::error::Collector script crashed with exit code $EXIT_CODE" + exit $EXIT_CODE + fi + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} + + - name: Upload drift report + if: always() + uses: actions/upload-artifact@v4 + with: + name: drift-report + path: drift-report.json + if-no-files-found: warn + retention-days: 30 + + - name: Fail if critical drift detected + if: steps.drift.outputs.exit_code == '2' + run: exit 1 diff --git a/.github/workflows/update-competitive-matrix.yml b/.github/workflows/update-competitive-matrix.yml new file mode 100644 index 0000000..b6e3355 --- /dev/null +++ b/.github/workflows/update-competitive-matrix.yml @@ -0,0 +1,56 @@ +name: Update Competitive Matrix + +on: + schedule: + - cron: "0 9 * * 1" # Weekly Monday 9am UTC + workflow_dispatch: + +concurrency: + group: competitive-matrix + cancel-in-progress: true + +jobs: + update-matrix: + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + steps: + - uses: actions/checkout@v4 + - uses: pnpm/action-setup@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 + cache: pnpm + - run: pnpm install --frozen-lockfile + + - name: Update competitive matrix + run: npx tsx scripts/update-competitive-matrix.ts --summary /tmp/matrix-summary.md + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Check for changes + id: changes + run: | + if git diff --quiet docs/index.html; then + echo "changed=false" >> $GITHUB_OUTPUT + else + echo "changed=true" >> $GITHUB_OUTPUT + fi + + - name: Create PR + if: steps.changes.outputs.changed == 'true' + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + BRANCH="auto/competitive-matrix-$(date +%Y%m%d)" + git checkout -b "$BRANCH" + git add docs/index.html + git commit -m "docs: update competitive matrix from latest competitor data" + git push -u origin "$BRANCH" + gh pr create \ + --title "Update competitive matrix" \ + --body-file /tmp/matrix-summary.md \ + --base main + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore index f4e2c6d..cf9381d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ node_modules/ dist/ *.tsbuildinfo +.worktrees/ +.superpowers/ diff --git a/.husky/pre-commit b/.husky/pre-commit new file mode 100644 index 0000000..2312dc5 --- /dev/null +++ b/.husky/pre-commit @@ -0,0 +1 @@ +npx lint-staged diff --git a/.prettierignore b/.prettierignore index 29c69b2..52af816 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,3 +1,4 @@ dist/ node_modules/ pnpm-lock.yaml +charts/ diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..3b69f67 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,132 @@ +# @copilotkit/llmock + +## 1.6.0 + +### Minor Changes + +- Provider-specific endpoints: dedicated routes for Bedrock (`/model/{modelId}/invoke`), Ollama (`/api/chat`, `/api/generate`), Cohere (`/v2/chat`), and Azure OpenAI deployment-based routing (`/openai/deployments/{id}/chat/completions`) +- Chaos injection: `ChaosConfig` type with `drop`, `malformed`, and `disconnect` actions; supports per-fixture chaos via `chaos` config on each fixture and server-wide chaos via `--chaos-drop`, `--chaos-malformed`, and `--chaos-disconnect` CLI flags +- Metrics: `GET /metrics` endpoint exposing Prometheus text format with request counters and latency histograms per provider and route +- Record-and-replay: `--record` flag and `proxyAndRecord` helper that proxies requests to real LLM APIs, collapses streaming responses, and writes fixture JSON to disk for future playback + +## 1.5.1 + +### Patch Changes + +- Fix documentation URLs to use correct domain (llmock.copilotkit.dev) + +## 1.5.0 + +### Minor Changes + +- Embeddings API: `POST /v1/embeddings` endpoint, `onEmbedding()` convenience method, `inputText` match field, `EmbeddingResponse` type, deterministic fallback embeddings from input hash, Azure embedding routing +- Structured output / JSON mode: `responseFormat` match field, `onJsonOutput()` convenience method +- Sequential responses: `sequenceIndex` match field for stateful multi-turn fixtures, per-fixture-group match counting, `resetMatchCounts()` method +- Streaming physics: `StreamingProfile` type with `ttft`, `tps`, `jitter` fields for realistic timing simulation +- AWS Bedrock: `POST /model/{modelId}/invoke` endpoint, Anthropic Messages format translation +- Azure OpenAI: provider routing for `/openai/deployments/{id}/chat/completions` and `/openai/deployments/{id}/embeddings` +- Health & models endpoints: `GET /health`, `GET /ready`, `GET /v1/models` (auto-populated from fixtures) +- Docker & Helm: Dockerfile, Helm chart for Kubernetes deployment +- Documentation website: full docs site at llmock.copilotkit.dev with feature pages and competitive comparison matrix +- Automated drift remediation: `scripts/drift-report-collector.ts` and `scripts/fix-drift.ts` for CI-driven drift fixes +- CI automation: competitive matrix update workflow, drift fix workflow +- `FixtureOpts` and `EmbeddingFixtureOpts` type aliases exported for external consumers + +### Patch Changes + +- Fix Gemini Live handler crash on malformed `clientContent.turns` and `toolResponse.functionResponses` +- Add `isClosed` guard before WebSocket finalization events (prevents writes to closed connections) +- Default to non-streaming for Claude Messages API and Responses API (matching real API defaults) +- Fix `streamingProfile` missing from convenience method opts types (`on`, `onMessage`, etc.) +- Fix skills/ symlink direction so npm pack includes the write-fixtures skill +- Fix `.claude` removed from package.json files (was dead weight — symlink doesn't ship) +- Add `.worktrees/` to eslint ignores +- Remove dead `@keyframes sseLine` CSS from docs site +- Fix watcher cleanup on error (clear debounce timer, null guard) +- Fix empty-reload guard (keep previous fixtures when reload produces 0) +- README rewritten as concise overview with links to docs site +- Write-fixtures skill updated for all v1.5.0 features +- Docs site: Get Started links to docs, comparison above reliability, npm version badge + +## 1.4.0 + +### Minor Changes + +- `--watch` (`-w`): File-watching with 500ms debounced reload. Keeps previous fixtures on validation failure. +- `--log-level`: Configurable log verbosity (`silent`, `info`, `debug`). Default `info` for CLI, `silent` for programmatic API. +- `--validate-on-load`: Fixture schema validation at startup — checks response types, tool call JSON, numeric ranges, shadowing, and catch-all positioning. +- `validateFixtures()` exported for programmatic use +- `Logger` class exported for programmatic use + +## 1.3.3 + +### Patch Changes + +- Fix Responses WS handler to accept flat `response.create` format matching the real OpenAI API (previously required a non-standard nested `response: { ... }` envelope) +- WebSocket drift detection tests: TLS client for real provider WS endpoints, 4 verified drift tests (Responses WS + Realtime), Gemini Live canary for text-capable model availability +- Realtime model canary: detects when `gpt-4o-mini-realtime-preview` is deprecated and suggests GA replacement +- Gemini Live documented as unverified (no text-capable `bidiGenerateContent` model exists yet) +- Fix README Gemini Live response shape example (`modelTurn.parts`, not `modelTurnComplete`) + +## 1.3.2 + +### Patch Changes + +- Fix missing `refusal` field on OpenAI Chat Completions responses — both the SDK and real API return `refusal: null` on non-refusal messages, but llmock was omitting it +- Live API drift detection test suite: three-layer triangulation between SDK types, real API responses, and llmock output across OpenAI (Chat + Responses), Anthropic Claude, and Google Gemini +- Weekly CI workflow for automated drift checks +- `DRIFT.md` documentation for the drift detection system + +## 1.3.1 + +### Patch Changes + +- Claude Code fixture authoring skill (`/write-fixtures`) — comprehensive guide for match fields, response types, agent loop patterns, gotchas, and debugging +- Claude Code plugin structure for downstream consumers (`--plugin-dir`, `--add-dir`, or manual copy) +- README and docs site updated with Claude Code integration instructions + +## 1.3.0 + +### Minor Changes + +- Mid-stream interruption: `truncateAfterChunks` and `disconnectAfterMs` fixture fields to simulate abrupt server disconnects +- AbortSignal-based cancellation primitives (`createInterruptionSignal`, signal-aware `delay()`) +- Backward-compatible `writeSSEStream` overload with `StreamOptions` returning completion status +- Interruption support across all HTTP SSE and WebSocket streaming paths +- `destroy()` method on `WebSocketConnection` for abrupt disconnect simulation +- Journal records `interrupted` and `interruptReason` on interrupted streams +- LLMock convenience API extended with interruption options (`truncateAfterChunks`, `disconnectAfterMs`) + +## 1.2.0 + +### Minor Changes + +- Zero-dependency RFC 6455 WebSocket framing layer +- OpenAI Responses API over WebSocket (`/v1/responses`) +- OpenAI Realtime API over WebSocket (`/v1/realtime`) — text + tool calls +- Gemini Live BidiGenerateContent over WebSocket — text + tool calls + +### Patch Changes + +- WebSocket close-frame lifecycle fixes +- Improved error visibility across WebSocket handlers +- Future Direction section in README + +## 1.1.1 + +### Patch Changes + +- Add function call IDs to Gemini tool call responses +- Remove changesets, simplify release workflow + +## 1.1.0 + +### Minor Changes + +- 9948a8b: Add `prependFixture()` and `getFixtures()` public API methods + +## 1.0.1 + +### Patch Changes + +- Add `getTextContent` for array-format message content handling diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..2ba92b4 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,49 @@ +# llmock + +## Before Every Commit + +Run these checks on all changed files before committing: + +```bash +pnpm run format:check # prettier +pnpm run lint # eslint +pnpm run test # vitest +``` + +If prettier or eslint fail, fix with: + +```bash +npx prettier --write +npx eslint --fix +``` + +A pre-commit hook (husky + lint-staged) runs prettier and eslint automatically +on staged files, but always verify manually before pushing — CI checks the +entire repo, not just staged files. + +## Project Structure + +- `src/` — TypeScript source (server, router, helpers, responses, types) +- `src/__tests__/` — Vitest test suite +- `docs/` — GitHub Pages website (static HTML) +- `fixtures/` — Example fixture JSON files shipped with the package + +## Testing + +- Tests live in `src/__tests__/` and use Vitest +- When adding features or fixing bugs, add or update tests +- Run `pnpm test` before pushing + +## Drift Remediation + +Automated drift remediation lives in `scripts/`: + +- `scripts/drift-report-collector.ts` — runs drift tests, produces `drift-report.json` +- `scripts/fix-drift.ts` — reads drift report, invokes Claude Code to fix builders, creates PR or issue + +See `DRIFT.md` for full documentation and `.github/workflows/fix-drift.yml` for the CI workflow. + +## Commit Messages + +- This repo enforces conventional commit prefixes via commitlint: `fix:`, `feat:`, `docs:`, `test:`, `chore:`, `refactor:`, etc. +- No Co-Authored-By lines diff --git a/DRIFT.md b/DRIFT.md new file mode 100644 index 0000000..b8a0ffb --- /dev/null +++ b/DRIFT.md @@ -0,0 +1,166 @@ +# Live API Drift Detection + +llmock produces responses shaped like real LLM APIs. Providers change their APIs over time. **Drift** means the mock no longer matches reality — your tests pass against llmock but break against the real API. + +## Three-Layer Approach + +Drift detection compares three independent sources to triangulate the cause of any mismatch: + +| SDK types = Real API? | Real API = llmock? | Diagnosis | +| --------------------- | ------------------ | -------------------------------------------------------------------- | +| Yes | No | **llmock drift** — response builders need updating | +| No | No | **Provider changed before SDK update** — flag, wait for SDK catch-up | +| Yes | Yes | **No drift** — all clear | +| No | Yes | **SDK drift** — provider deprecated something SDK still references | + +Two-way comparison (mock vs real) can't distinguish between "we need to fix llmock" and "the SDK hasn't caught up yet." Three-way comparison can. + +## Running Drift Tests + +```bash +# All providers (requires all three API keys) +OPENAI_API_KEY=sk-... ANTHROPIC_API_KEY=sk-... GOOGLE_API_KEY=... pnpm test:drift + +# Single provider (others skip automatically) +OPENAI_API_KEY=sk-... pnpm test:drift + +# Strict mode — warnings also fail +STRICT_DRIFT=1 OPENAI_API_KEY=sk-... pnpm test:drift +``` + +Required environment variables: + +- `OPENAI_API_KEY` — OpenAI API key +- `ANTHROPIC_API_KEY` — Anthropic API key +- `GOOGLE_API_KEY` — Google AI API key + +Each provider's tests skip independently if its key is not set. You can run drift tests for just one provider. + +## Reading Results + +### Severity levels + +- **critical** — Test fails. llmock produces a different shape than the real API for a field that both the SDK and real API agree on. This means llmock needs an update. +- **warning** — Test passes (unless `STRICT_DRIFT=1`). The real API has a field that neither the SDK nor llmock knows about, or the SDK and real API disagree. Usually means a provider added something new. +- **info** — Always passes. Known intentional differences (usage fields are always zero, optional fields llmock omits, etc.). + +### Example report output + +``` +API DRIFT DETECTED: OpenAI Chat Completions (non-streaming text) + + 1. [critical] LLMOCK DRIFT — field in SDK + real API but missing from mock + Path: usage.completion_tokens_details + SDK: object { reasoning_tokens: number } + Real: object { reasoning_tokens: number, accepted_prediction_tokens: number } + Mock: + + 2. [warning] PROVIDER ADDED FIELD — in real API but not in SDK or mock + Path: system_fingerprint + SDK: + Real: string + Mock: + + 3. [info] MOCK EXTRA FIELD — in mock but not in real API + Path: choices[0].logprobs + SDK: null | object + Real: + Mock: null +``` + +## Fixing Detected Drift + +When a `critical` drift is detected: + +1. **Identify the response builder** — the report path tells you which provider and field: + - OpenAI Chat Completions → `src/helpers.ts` (`buildTextCompletion`, `buildToolCallCompletion`, `buildTextChunks`, `buildToolCallChunks`) + - OpenAI Responses API → `src/responses.ts` (`buildTextResponse`, `buildToolCallResponse`, `buildTextStreamEvents`, `buildToolCallStreamEvents`) + - Anthropic Claude → `src/messages.ts` (`buildClaudeTextResponse`, `buildClaudeToolCallResponse`, `buildClaudeTextStreamEvents`, `buildClaudeToolCallStreamEvents`) + - Google Gemini → `src/gemini.ts` (`buildGeminiTextResponse`, `buildGeminiToolCallResponse`, `buildGeminiTextStreamChunks`, `buildGeminiToolCallStreamChunks`) + +2. **Update the builder** — add or modify the field to match the real API shape. + +3. **Run conformance tests** — `pnpm test` to verify existing API conformance tests still pass. + +4. **Run drift tests** — `pnpm test:drift` to verify the drift is resolved. + +## Model Deprecation + +The `models.drift.ts` test scrapes model names referenced in llmock's test files, README, and fixtures, then checks each provider's model listing API to verify they still exist. + +When a model is deprecated: + +1. Update the model name in the affected test files and fixtures +2. Update `src/__tests__/drift/providers.ts` if the cheap test model changed +3. Run `pnpm test` and `pnpm test:drift` + +## Adding a New Provider + +1. Add the provider's SDK as a devDependency in `package.json` +2. Add shape extraction functions to `src/__tests__/drift/sdk-shapes.ts` +3. Add raw fetch client functions to `src/__tests__/drift/providers.ts` +4. Create `src/__tests__/drift/.drift.ts` with 4 test scenarios +5. Add model listing function to `providers.ts` and model check to `models.drift.ts` +6. If the provider uses WebSocket, add protocol functions to `ws-providers.ts` and create `ws-.drift.ts` +7. Update the allowlist in `schema.ts` if needed + +## WebSocket Drift Coverage + +In addition to the 19 existing drift tests (16 HTTP response-shape + 3 model deprecation), WebSocket drift tests cover llmock's WS protocols (4 verified + 2 canary = 6 WS tests): + +| Protocol | Text | Tool Call | Real Endpoint | Status | +| ------------------- | ---- | --------- | ------------------------------------------------------------------- | ---------- | +| OpenAI Responses WS | ✓ | ✓ | `wss://api.openai.com/v1/responses` | Verified | +| OpenAI Realtime | ✓ | ✓ | `wss://api.openai.com/v1/realtime` | Verified | +| Gemini Live | — | — | `wss://generativelanguage.googleapis.com/ws/...BidiGenerateContent` | Unverified | + +**Models**: `gpt-4o-mini` for Responses WS, `gpt-4o-mini-realtime-preview` for Realtime. + +**Auth**: Uses the same `OPENAI_API_KEY` and `GOOGLE_API_KEY` environment variables as HTTP tests. No new secrets needed. + +**How it works**: A TLS WebSocket client (`ws-providers.ts`) connects to real provider endpoints using `node:tls` with RFC 6455 framing. Each protocol function handles the setup sequence (e.g., Realtime session negotiation, Gemini Live setup/setupComplete) and collects messages until a terminal event. The mock side uses the existing `ws-test-client.ts` plaintext client against the local llmock server. + +### Gemini Live: unverified + +llmock's Gemini Live handler implements the text-based `BidiGenerateContent` protocol as documented in Google's [Live API reference](https://ai.google.dev/api/live) — `setup`/`setupComplete` handshake, `clientContent` with turns, `serverContent` with `modelTurn.parts[].text`, and `toolCall` responses. The protocol format is correct per the docs. + +However, as of March 2026, the only models that support `bidiGenerateContent` are native-audio models (`gemini-2.5-flash-native-audio-*`), which reject text-only requests. No text-capable model exists for this endpoint yet, so we cannot triangulate llmock's output against a real API response. + +A canary test (`ws-gemini-live.drift.ts`) queries the Gemini model listing API on each drift run and checks for a non-audio model that supports `bidiGenerateContent`. When Google ships one, the canary will flag it and the full drift tests can be enabled. + +## CI Schedule + +Drift tests run on a schedule: + +- **Daily**: 6:00 AM UTC +- **Manual**: Trigger via GitHub Actions UI (`workflow_dispatch`) +- **NOT** on PR or push — these tests hit real APIs and cost money + +See `.github/workflows/test-drift.yml`. + +## Automated Drift Remediation + +When the daily drift test detects critical diffs on the `main` branch, the `fix-drift.yml` workflow runs automatically: + +1. **Collect** — `scripts/drift-report-collector.ts` runs drift tests and produces a structured `drift-report.json` +2. **Fix** — `scripts/fix-drift.ts` (default mode) constructs a prompt from the report and invokes Claude Code to fix the builders +3. **Verify** — Independent `pnpm test` and `pnpm test:drift` steps confirm the fix works +4. **PR** — `scripts/fix-drift.ts --create-pr` stages and commits the changes, bumps the version, and opens a pull request +5. **Issue** (on failure) — `scripts/fix-drift.ts --create-issue` opens a GitHub issue with the drift report and Claude Code output + +Steps 2 and 4/5 are separate invocations of `fix-drift.ts` with different modes. + +### Artifacts + +Both workflows upload artifacts: + +- `drift-report.json` — structured drift data (retained 30 days) +- `claude-code-output.log` — Claude Code's reasoning and tool calls (fix workflow only) + +### Manual trigger + +The fix workflow also supports `workflow_dispatch` for manual runs. + +## Cost + +~25 API calls per run (16 HTTP response-shape + 3 model listing + 6 WS including canaries) using the cheapest available models (`gpt-4o-mini`, `gpt-4o-mini-realtime-preview`, `claude-haiku-4-5-20251001`, `gemini-2.5-flash`) with 10-100 max tokens each. Under $0.15/week at daily cadence. When Gemini Live text-capable models become available, the 2 canary tests will become full drift tests, increasing real WS connections from 4 to 6. diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..09b9811 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,31 @@ +# syntax=docker/dockerfile:1 + +# --- Build stage --- +FROM node:22-alpine AS build + +RUN corepack enable && corepack prepare pnpm@10.28.2 --activate + +WORKDIR /app + +COPY package.json pnpm-lock.yaml ./ +RUN --mount=type=cache,id=pnpm,target=/root/.local/share/pnpm/store \ + pnpm install --frozen-lockfile + +COPY tsconfig.json tsdown.config.ts ./ +COPY src/ src/ + +RUN pnpm run build + +# --- Production stage --- +FROM node:22-alpine + +WORKDIR /app + +# No runtime dependencies — all imports are node:* built-ins +COPY --from=build /app/dist/ dist/ +COPY fixtures/ fixtures/ + +EXPOSE 4010 + +ENTRYPOINT ["node", "dist/cli.js"] +CMD ["--fixtures", "./fixtures", "--host", "0.0.0.0"] diff --git a/README.md b/README.md index 2c39ec9..bd60779 100644 --- a/README.md +++ b/README.md @@ -1,38 +1,51 @@ -# @copilotkit/mock-openai +# @copilotkit/llmock [![Unit Tests](https://github.com/CopilotKit/llmock/actions/workflows/test-unit.yml/badge.svg)](https://github.com/CopilotKit/llmock/actions/workflows/test-unit.yml) [![Drift Tests](https://github.com/CopilotKit/llmock/actions/workflows/test-drift.yml/badge.svg)](https://github.com/CopilotKit/llmock/actions/workflows/test-drift.yml) [![npm version](https://img.shields.io/npm/v/@copilotkit/llmock)](https://www.npmjs.com/package/@copilotkit/llmock) -Deterministic mock OpenAI server for testing. Streams SSE responses in real OpenAI Chat Completions and Responses API format, driven entirely by fixtures. Zero runtime dependencies — built on Node.js builtins only. +Deterministic mock LLM server for testing. A real HTTP server on a real port — not an in-process interceptor — so every process in your stack (Playwright, Next.js, agent workers, microservices) can point at it via `OPENAI_BASE_URL` / `ANTHROPIC_BASE_URL` and get reproducible, instant responses. Streams SSE in real OpenAI, Claude, Gemini, Bedrock, Azure, Vertex AI, Ollama, and Cohere API formats, driven entirely by fixtures. Zero runtime dependencies. -Supports both streaming (SSE) and non-streaming JSON responses, text completions, tool calls, and error injection. Point any process at it via `OPENAI_BASE_URL` and get reproducible, instant responses. - -## Install +## Quick Start ```bash -npm install @copilotkit/mock-openai +npm install @copilotkit/llmock +``` + +```typescript +import { LLMock } from "@copilotkit/llmock"; + +const mock = new LLMock({ port: 5555 }); + +mock.onMessage("hello", { content: "Hi there!" }); + +const url = await mock.start(); +// Point your OpenAI client at `url` instead of https://api.openai.com + +// ... run your tests ... + +await mock.stop(); ``` ## When to Use This vs MSW [MSW (Mock Service Worker)](https://mswjs.io/) is a popular API mocking library, but it solves a different problem. -**The key difference is architecture.** mock-openai runs a real HTTP server on a port. MSW patches `http`/`https`/`fetch` modules inside a single Node.js process. MSW can only intercept requests from the process that calls `server.listen()` — child processes, separate services, and workers are unaffected. +**The key difference is architecture.** llmock runs a real HTTP server on a port. MSW patches `http`/`https`/`fetch` modules inside a single Node.js process. MSW can only intercept requests from the process that calls `server.listen()` — child processes, separate services, and workers are unaffected. -This matters for E2E tests where multiple processes make OpenAI calls: +This matters for E2E tests where multiple processes make LLM API calls: ``` Playwright test runner (Node) └─ controls browser → Next.js app (separate process) - └─ OPENAI_BASE_URL → mock-openai :5555 + └─ OPENAI_BASE_URL → llmock :5555 ├─ Mastra agent workers ├─ LangGraph workers └─ CopilotKit runtime ``` -MSW can't intercept any of those calls. mock-openai can — it's a real server on a real port. +MSW can't intercept any of those calls. llmock can — it's a real server on a real port. -**Use mock-openai when:** +**Use llmock when:** - Multiple processes need to hit the same mock (E2E tests, agent frameworks, microservices) -- You want OpenAI-specific SSE format out of the box (Chat Completions + Responses API) +- You want multi-provider SSE format out of the box (OpenAI, Claude, Gemini, Bedrock, Azure, Vertex AI, Ollama, Cohere) - You prefer defining fixtures as JSON files rather than code - You need a standalone CLI server @@ -42,11 +55,14 @@ MSW can't intercept any of those calls. mock-openai can — it's a real server o - You're mocking many different APIs, not just OpenAI - You want in-process interception without running a server -| Capability | mock-openai | MSW | +| Capability | llmock | MSW | | ---------------------------- | --------------------- | ------------------------------------------------------------------------- | | Cross-process interception | **Yes** (real server) | **No** (in-process only) | | OpenAI Chat Completions SSE | **Built-in** | Manual — build `data: {json}\n\n` + `[DONE]` yourself | | OpenAI Responses API SSE | **Built-in** | Manual — MSW's `sse()` sends `data:` events, not OpenAI's `event:` format | +| Claude Messages API SSE | **Built-in** | Manual — build `event:`/`data:` SSE yourself | +| Gemini streaming | **Built-in** | Manual — build `data:` SSE yourself | +| WebSocket APIs | **Built-in** | **No** | | Fixture file loading (JSON) | **Yes** | **No** — handlers are code-only | | Request journal / inspection | **Yes** | **No** — track requests manually | | Non-streaming responses | **Yes** | **Yes** | @@ -54,443 +70,76 @@ MSW can't intercept any of those calls. mock-openai can — it's a real server o | CLI for standalone use | **Yes** | **No** | | Zero dependencies | **Yes** | **No** (~300KB) | -## Quick Start - -```typescript -import { MockOpenAI } from "@copilotkit/mock-openai"; - -const mock = new MockOpenAI({ port: 5555 }); - -mock.onMessage("hello", { content: "Hi there!" }); - -const url = await mock.start(); -// Point your OpenAI client at `url` instead of https://api.openai.com - -// ... run your tests ... - -await mock.stop(); -``` - -## E2E Test Patterns - -Real-world patterns from using mock-openai in Playwright E2E tests with CopilotKit, Mastra, LangGraph, and Agno agent frameworks. - -### Global Setup/Teardown - -Start the mock server once for the entire test suite. All child processes (Next.js, agent workers) inherit the URL via environment variable. - -```typescript -// e2e/mock-openai-setup.ts -import { MockOpenAI } from "@copilotkit/mock-openai"; -import * as path from "node:path"; - -let mockServer: MockOpenAI | null = null; - -export async function setupMockOpenAI(): Promise { - mockServer = new MockOpenAI({ port: 5555 }); - - // Load JSON fixtures from a directory - mockServer.loadFixtureDir(path.join(__dirname, "fixtures", "openai")); - - const url = await mockServer.start(); - - // Child processes use this to find the mock - process.env.MOCK_OPENAI_URL = `${url}/v1`; -} - -export async function teardownMockOpenAI(): Promise { - if (mockServer) { - await mockServer.stop(); - mockServer = null; - } -} -``` - -The Next.js app (or any other service) just needs: - -```env -OPENAI_BASE_URL=http://localhost:5555/v1 -OPENAI_API_KEY=mock-key -``` - -### JSON Fixture Files - -Define fixtures as JSON — one file per feature, loaded with `loadFixtureFile` or `loadFixtureDir`. - -**Text responses** — match on a substring of the last user message: - -```json -{ - "fixtures": [ - { - "match": { "userMessage": "stock price of AAPL" }, - "response": { "content": "The current stock price of Apple Inc. (AAPL) is $150.25." } - }, - { - "match": { "userMessage": "capital of France" }, - "response": { "content": "The capital of France is Paris." } - } - ] -} -``` - -**Tool call responses** — the agent framework receives these as tool calls and executes them: - -```json -{ - "fixtures": [ - { - "match": { "userMessage": "one step with eggs" }, - "response": { - "toolCalls": [ - { - "name": "generate_task_steps", - "arguments": "{\"steps\":[{\"description\":\"Crack eggs into bowl\",\"status\":\"enabled\"},{\"description\":\"Preheat oven to 350F\",\"status\":\"enabled\"}]}" - } - ] - } - }, - { - "match": { "userMessage": "background color to blue" }, - "response": { - "toolCalls": [ - { - "name": "change_background", - "arguments": "{\"background\":\"blue\"}" - } - ] - } - } - ] -} -``` - -### Fixture Load Order Matters - -Fixtures are evaluated first-match-wins. When two fixtures could match the same message, load the more specific one first: - -```typescript -// Load HITL fixtures first — "one step with eggs" is more specific than -// "plan to make brownies" which also appears in the HITL user message -mockServer.loadFixtureFile(path.join(FIXTURES_DIR, "human-in-the-loop.json")); - -// Then load everything else — earlier matches take priority -mockServer.loadFixtureDir(FIXTURES_DIR); -``` - -### Predicate-Based Routing - -When substring matching isn't enough — for example, when the last user message is the same across multiple requests but the system prompt differs — use predicates: - -```typescript -// Supervisor agent: same user message every time, but system prompt -// contains state flags like "Flights found: false" -mockServer.addFixture({ - match: { - predicate: (req) => { - const sysMsg = req.messages.find((m) => m.role === "system"); - return sysMsg?.content?.includes("Flights found: false") ?? false; - }, - }, - response: { - toolCalls: [ - { - name: "supervisor_response", - arguments: '{"answer":"Let me find flights for you!","next_agent":"flights_agent"}', - }, - ], - }, -}); - -mockServer.addFixture({ - match: { - predicate: (req) => { - const sys = req.messages.find((m) => m.role === "system")?.content ?? ""; - return sys.includes("Flights found: true") && sys.includes("Hotels found: false"); - }, - }, - response: { - toolCalls: [ - { - name: "supervisor_response", - arguments: '{"answer":"Now let me find hotels.","next_agent":"hotels_agent"}', - }, - ], - }, -}); -``` - -### Tool Result Catch-All - -After a tool executes, the next request contains a `role: "tool"` message with the result. Add a catch-all for these so the conversation can continue: - -```typescript -const toolResultFixture = { - match: { - predicate: (req) => { - const last = req.messages[req.messages.length - 1]; - return last?.role === "tool"; - }, - }, - response: { content: "Done! I've completed that for you." }, -}; -mockServer.addFixture(toolResultFixture); - -// Move it to the front so it matches before substring-based fixtures -// (the last user message hasn't changed, so substring fixtures would -// match the same fixture again otherwise) -const fixtures = (mockServer as any).fixtures; -const idx = fixtures.indexOf(toolResultFixture); -if (idx > 0) { - fixtures.splice(idx, 1); - fixtures.unshift(toolResultFixture); -} -``` - -### Universal Catch-All - -Append a catch-all last to handle any request that doesn't match a specific fixture, preventing 404s from crashing the test: - -```typescript -mockServer.addFixture({ - match: { predicate: () => true }, - response: { content: "I understand. How can I help you with that?" }, -}); -``` - -## Programmatic API - -### `new MockOpenAI(options?)` - -Create a new mock server instance. - -| Option | Type | Default | Description | -| ----------- | -------- | ------------- | ----------------------------------- | -| `port` | `number` | `0` (random) | Port to listen on | -| `host` | `string` | `"127.0.0.1"` | Host to bind to | -| `latency` | `number` | `0` | Default ms delay between SSE chunks | -| `chunkSize` | `number` | `20` | Default characters per SSE chunk | - -### `MockOpenAI.create(options?)` - -Static factory — creates an instance and starts it in one call. Returns `Promise`. - -### Server Lifecycle - -| Method | Returns | Description | -| --------- | ----------------- | -------------------------------------- | -| `start()` | `Promise` | Start the server, returns the base URL | -| `stop()` | `Promise` | Stop the server | -| `url` | `string` | Base URL (throws if not started) | -| `baseUrl` | `string` | Alias for `url` | -| `port` | `number` | Listening port (throws if not started) | - -### Fixture Registration - -All registration methods return `this` for chaining. - -#### `on(match, response, opts?)` - -Register a fixture with full control over match criteria. - -```typescript -mock.on({ userMessage: /weather/i, model: "gpt-4" }, { content: "It's sunny!" }, { latency: 50 }); -``` - -#### `onMessage(pattern, response, opts?)` - -Shorthand — matches on the last user message. - -```typescript -mock.onMessage("hello", { content: "Hi!" }); -mock.onMessage(/greet/i, { content: "Hey there!" }); -``` - -#### `onToolCall(name, response, opts?)` - -Shorthand — matches when the request contains a tool with the given name. - -```typescript -mock.onToolCall("get_weather", { - toolCalls: [{ name: "get_weather", arguments: '{"location":"SF"}' }], -}); -``` - -#### `onToolResult(id, response, opts?)` - -Shorthand — matches when a tool result message has the given `tool_call_id`. - -```typescript -mock.onToolResult("call_abc123", { content: "Temperature is 72F" }); -``` - -#### `addFixture(fixture)` / `addFixtures(fixtures)` - -Add raw `Fixture` objects directly. - -#### `loadFixtureFile(path)` / `loadFixtureDir(path)` - -Load fixtures from JSON files on disk. See [Fixture Files](#json-fixture-files) above. - -#### `clearFixtures()` - -Remove all registered fixtures. - -### Error Injection - -#### `nextRequestError(status, errorBody?)` - -Queue a one-shot error for the very next request. The error fires once, then auto-removes itself. - -```typescript -mock.nextRequestError(429, { - message: "Rate limited", - type: "rate_limit_error", -}); - -// Next request → 429 error -// Subsequent requests → normal fixture matching -``` - -### Request Journal - -Every request to `/v1/chat/completions` and `/v1/responses` is recorded in a journal. - -#### Programmatic Access - -| Method | Returns | Description | -| ------------------ | ---------------------- | ------------------------------------- | -| `getRequests()` | `JournalEntry[]` | All recorded requests | -| `getLastRequest()` | `JournalEntry \| null` | Most recent request | -| `clearRequests()` | `void` | Clear the journal | -| `journal` | `Journal` | Direct access to the journal instance | - -```typescript -await fetch(mock.url + "/v1/chat/completions", { ... }); - -const last = mock.getLastRequest(); -expect(last?.body.messages).toContainEqual({ - role: "user", - content: "hello", -}); -``` - -#### HTTP Endpoints - -The server also exposes journal data over HTTP (useful in CLI mode): - -- `GET /v1/_requests` — returns all journal entries as JSON. Supports `?limit=N`. -- `DELETE /v1/_requests` — clears the journal. Returns 204. - -### Reset - -#### `reset()` - -Clear all fixtures **and** the journal in one call. Works before or after the server is started. - -```typescript -afterEach(() => { - mock.reset(); -}); -``` - -## Fixture Matching - -Fixtures are evaluated in registration order (first match wins). A fixture matches when **all** specified fields match the incoming request (AND logic). - -| Field | Type | Matches on | -| ------------- | ------------------ | --------------------------------------------- | -| `userMessage` | `string \| RegExp` | Content of the last `role: "user"` message | -| `toolName` | `string` | Name of a tool in the request's `tools` array | -| `toolCallId` | `string` | `tool_call_id` on a `role: "tool"` message | -| `model` | `string \| RegExp` | The `model` field in the request | -| `predicate` | `(req) => boolean` | Arbitrary matching function | - -## Fixture Responses - -### Text - -```typescript -{ - content: "Hello world"; -} -``` - -Streams as SSE chunks, splitting `content` by `chunkSize`. With `stream: false`, returns a standard `chat.completion` JSON object. - -### Tool Calls - -```typescript -{ - toolCalls: [{ name: "get_weather", arguments: '{"location":"SF"}' }]; -} -``` - -### Errors - -```typescript -{ - error: { message: "Rate limited", type: "rate_limit_error" }, - status: 429 -} -``` - -## API Endpoints - -The server handles: - -- **POST `/v1/chat/completions`** — OpenAI Chat Completions API (streaming and non-streaming) -- **POST `/v1/responses`** — OpenAI Responses API (streaming and non-streaming). Requests are translated to the Chat Completions fixture format internally, so the same fixtures work for both endpoints. - -## CLI - -The package includes a standalone server binary: +## Features + +- **[Multi-provider support](https://llmock.copilotkit.dev/compatible-providers.html)** — [OpenAI Chat Completions](https://llmock.copilotkit.dev/chat-completions.html), [OpenAI Responses](https://llmock.copilotkit.dev/responses-api.html), [Anthropic Claude](https://llmock.copilotkit.dev/claude-messages.html), [Google Gemini](https://llmock.copilotkit.dev/gemini.html), [AWS Bedrock](https://llmock.copilotkit.dev/aws-bedrock.html) (streaming + Converse), [Azure OpenAI](https://llmock.copilotkit.dev/azure-openai.html), [Vertex AI](https://llmock.copilotkit.dev/vertex-ai.html), [Ollama](https://llmock.copilotkit.dev/ollama.html), [Cohere](https://llmock.copilotkit.dev/cohere.html) +- **[Embeddings API](https://llmock.copilotkit.dev/embeddings.html)** — OpenAI-compatible embedding responses with configurable dimensions +- **[Structured output / JSON mode](https://llmock.copilotkit.dev/structured-output.html)** — `response_format`, `json_schema`, and function calling +- **[Sequential responses](https://llmock.copilotkit.dev/sequential-responses.html)** — Stateful multi-turn fixtures that return different responses on each call +- **[Streaming physics](https://llmock.copilotkit.dev/streaming-physics.html)** — Configurable `ttft`, `tps`, and `jitter` for realistic timing +- **[WebSocket APIs](https://llmock.copilotkit.dev/websocket.html)** — OpenAI Responses WS, Realtime API, and Gemini Live +- **[Error injection](https://llmock.copilotkit.dev/error-injection.html)** — One-shot errors, rate limiting, and provider-specific error formats +- **[Chaos testing](https://llmock.copilotkit.dev/chaos-testing.html)** — Probabilistic failure injection: 500 errors, malformed JSON, mid-stream disconnects +- **[Prometheus metrics](https://llmock.copilotkit.dev/metrics.html)** — Request counts, latencies, and fixture match rates at `/metrics` +- **[Request journal](https://llmock.copilotkit.dev/docs.html)** — Record, inspect, and assert on every request +- **[Fixture validation](https://llmock.copilotkit.dev/fixtures.html)** — Schema validation at load time with `--validate-on-load` +- **CLI with hot-reload** — Standalone server with `--watch` for live fixture editing +- **[Docker + Helm](https://llmock.copilotkit.dev/docker.html)** — Container image and Helm chart for CI/CD pipelines +- **Record-and-replay** — VCR-style proxy-on-miss records real API responses as fixtures for deterministic replay +- **[Drift detection](https://llmock.copilotkit.dev/drift-detection.html)** — Daily CI runs against real APIs to catch response format changes +- **Claude Code integration** — `/write-fixtures` skill teaches your AI assistant how to write fixtures correctly + +## CLI Quick Reference ```bash -mock-openai [options] -``` - -| Option | Short | Default | Description | -| -------------- | ----- | ------------ | ---------------------------------- | -| `--port` | `-p` | `4010` | Port to listen on | -| `--host` | `-h` | `127.0.0.1` | Host to bind to | -| `--fixtures` | `-f` | `./fixtures` | Path to fixtures directory or file | -| `--latency` | `-l` | `0` | Latency between SSE chunks (ms) | -| `--chunk-size` | `-c` | `20` | Characters per SSE chunk | -| `--help` | | | Show help | +llmock [options] +``` + +| Option | Short | Default | Description | +| -------------------- | ----- | ------------ | ------------------------------------------- | +| `--port` | `-p` | `4010` | Port to listen on | +| `--host` | `-h` | `127.0.0.1` | Host to bind to | +| `--fixtures` | `-f` | `./fixtures` | Path to fixtures directory or file | +| `--latency` | `-l` | `0` | Latency between SSE chunks (ms) | +| `--chunk-size` | `-c` | `20` | Characters per SSE chunk | +| `--watch` | `-w` | | Watch fixture path for changes and reload | +| `--log-level` | | `info` | Log verbosity: `silent`, `info`, `debug` | +| `--validate-on-load` | | | Validate fixture schemas at startup | +| `--chaos-drop` | | `0` | Chaos: probability of 500 errors (0-1) | +| `--chaos-malformed` | | `0` | Chaos: probability of malformed JSON (0-1) | +| `--chaos-disconnect` | | `0` | Chaos: probability of disconnect (0-1) | +| `--metrics` | | | Enable Prometheus metrics at /metrics | +| `--record` | | | Record mode: proxy unmatched to real APIs | +| `--strict` | | | Strict mode: fail on unmatched requests | +| `--provider-*` | | | Upstream URL per provider (with `--record`) | +| `--help` | | | Show help | ```bash # Start with bundled example fixtures -mock-openai +llmock # Custom fixtures on a specific port -mock-openai -p 8080 -f ./my-fixtures +llmock -p 8080 -f ./my-fixtures # Simulate slow responses -mock-openai --latency 100 --chunk-size 5 -``` - -## Advanced Usage +llmock --latency 100 --chunk-size 5 -### Low-level Server +# Record mode: proxy unmatched requests to real APIs and save as fixtures +llmock --record --provider-openai https://api.openai.com --provider-anthropic https://api.anthropic.com -If you need the raw HTTP server without the `MockOpenAI` wrapper: +# Strict mode in CI: fail if any request doesn't match a fixture +llmock --strict -f ./fixtures +``` -```typescript -import { createServer } from "@copilotkit/mock-openai"; +## Documentation -const fixtures = [{ match: { userMessage: "hi" }, response: { content: "Hello!" } }]; +Full API reference, fixture format, E2E patterns, and provider-specific guides: -const { server, journal, url } = await createServer(fixtures, { port: 0 }); -// ... use it ... -server.close(); -``` +**[https://llmock.copilotkit.dev/docs.html](https://llmock.copilotkit.dev/docs.html)** -### Per-Fixture Timing +## Real-World Usage -```typescript -mock.on({ userMessage: "slow" }, { content: "Finally..." }, { latency: 200, chunkSize: 5 }); -``` +[CopilotKit](https://github.com/CopilotKit/CopilotKit) uses llmock across its test suite to verify AI agent behavior across multiple LLM providers without hitting real APIs. ## License diff --git a/charts/llmock/Chart.yaml b/charts/llmock/Chart.yaml new file mode 100644 index 0000000..5603860 --- /dev/null +++ b/charts/llmock/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: llmock +description: Deterministic mock LLM server for testing (OpenAI, Anthropic, Gemini) +type: application +version: 0.1.0 +appVersion: "1.6.0" diff --git a/charts/llmock/templates/_helpers.tpl b/charts/llmock/templates/_helpers.tpl new file mode 100644 index 0000000..896b8d6 --- /dev/null +++ b/charts/llmock/templates/_helpers.tpl @@ -0,0 +1,40 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "llmock.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +*/}} +{{- define "llmock.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "llmock.labels" -}} +helm.sh/chart: {{ printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{ include "llmock.selectorLabels" . }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "llmock.selectorLabels" -}} +app.kubernetes.io/name: {{ include "llmock.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/charts/llmock/templates/deployment.yaml b/charts/llmock/templates/deployment.yaml new file mode 100644 index 0000000..22534ca --- /dev/null +++ b/charts/llmock/templates/deployment.yaml @@ -0,0 +1,70 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "llmock.fullname" . }} + labels: + {{- include "llmock.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "llmock.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "llmock.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: llmock + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + args: + - "--fixtures" + - "{{ .Values.fixtures.mountPath }}" + - "--host" + - "0.0.0.0" + - "--port" + - "{{ .Values.service.port }}" + ports: + - name: http + containerPort: {{ .Values.service.port }} + protocol: TCP + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /ready + port: http + initialDelaySeconds: 2 + periodSeconds: 5 + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- if .Values.fixtures.existingClaim }} + volumeMounts: + - name: fixtures + mountPath: {{ .Values.fixtures.mountPath }} + {{- end }} + {{- if .Values.fixtures.existingClaim }} + volumes: + - name: fixtures + persistentVolumeClaim: + claimName: {{ .Values.fixtures.existingClaim }} + {{- end }} diff --git a/charts/llmock/templates/service.yaml b/charts/llmock/templates/service.yaml new file mode 100644 index 0000000..894b443 --- /dev/null +++ b/charts/llmock/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "llmock.fullname" . }} + labels: + {{- include "llmock.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "llmock.selectorLabels" . | nindent 4 }} diff --git a/charts/llmock/values.yaml b/charts/llmock/values.yaml new file mode 100644 index 0000000..c33a2ea --- /dev/null +++ b/charts/llmock/values.yaml @@ -0,0 +1,31 @@ +nameOverride: "" +fullnameOverride: "" + +replicaCount: 1 + +image: + repository: ghcr.io/copilotkit/llmock + tag: "" + pullPolicy: IfNotPresent + +service: + type: ClusterIP + port: 4010 + +fixtures: + # Mount path inside the container where fixture files are served from + mountPath: /app/fixtures + # If set, use an existing PVC for fixtures + existingClaim: "" + +resources: {} + # limits: + # cpu: 200m + # memory: 256Mi + # requests: + # cpu: 100m + # memory: 128Mi + +nodeSelector: {} +tolerations: [] +affinity: {} diff --git a/docs/CNAME b/docs/CNAME index 3ce79fb..bd52770 100644 --- a/docs/CNAME +++ b/docs/CNAME @@ -1 +1 @@ -mock-openai.copilotkit.dev +llmock.copilotkit.dev diff --git a/docs/aws-bedrock.html b/docs/aws-bedrock.html new file mode 100644 index 0000000..09cf238 --- /dev/null +++ b/docs/aws-bedrock.html @@ -0,0 +1,389 @@ + + + + + + AWS Bedrock — llmock + + + + + + + + +
+ + +
+

AWS Bedrock

+

+ llmock supports the AWS Bedrock Claude invoke and Converse API endpoints — both + streaming and non-streaming. Point the AWS SDK at your llmock instance and fixtures match + against the Bedrock-format requests, returning responses in the authentic Bedrock format + including AWS Event Stream binary framing for streaming. +

+ +

How It Works

+

+ AWS Bedrock uses URL patterns like + /model/{modelId}/invoke and + /model/{modelId}/invoke-with-response-stream to call foundation models. The + request body uses the Anthropic Messages format with an additional + anthropic_version field, and does not include a + model field in the body (the model is in the URL). +

+

+ llmock detects the Bedrock URL pattern, extracts the model ID, translates the request to + the internal fixture-matching format, and returns the response in the Anthropic Messages + API format — which is identical to the Bedrock Claude response format. For + streaming, responses use the AWS Event Stream binary framing protocol. +

+

+ llmock also supports the Converse API (/model/{modelId}/converse + and /model/{modelId}/converse-stream), which uses a different + request/response format with camelCase field names. +

+ +

URL Patterns

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Bedrock URLDescription
POST /model/{modelId}/invokeNon-streaming Claude invoke
POST /model/{modelId}/invoke-with-response-streamStreaming Claude invoke (AWS Event Stream binary)
POST /model/{modelId}/converseConverse API (non-streaming)
POST /model/{modelId}/converse-streamConverse API (streaming, AWS Event Stream binary)
+ +

Request Format

+

+ Bedrock Claude requests use the Anthropic Messages format. The + anthropic_version field is accepted but not validated. The model is taken + from the URL path, not the request body. +

+ +
+
+ bedrock request body json +
+
{
+  "anthropic_version": "bedrock-2023-05-31",
+  "max_tokens": 512,
+  "messages": [
+    { "role": "user", "content": "Hello" }
+  ],
+  "system": "You are helpful"
+}
+
+ +

Response Format

+

+ Bedrock Claude responses are identical to the Anthropic Messages API non-streaming + responses: +

+ +
+
text response json
+
{
+  "id": "msg_...",
+  "type": "message",
+  "role": "assistant",
+  "content": [{ "type": "text", "text": "Hello!" }],
+  "stop_reason": "end_turn",
+  "stop_sequence": null,
+  "usage": { "input_tokens": 10, "output_tokens": 5 }
+}
+
+ +

Model Resolution

+

+ The model ID is extracted from the URL path. This is used both for fixture matching and + included in the response body. Bedrock model IDs typically look like: +

+
    +
  • anthropic.claude-3-5-sonnet-20241022-v2:0
  • +
  • anthropic.claude-3-haiku-20240307-v1:0
  • +
  • anthropic.claude-3-opus-20240229-v1:0
  • +
+

Write fixtures that match by Bedrock model ID:

+ +
+
+ fixture matching by Bedrock model ID json +
+
{
+  "match": {
+    "model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
+    "userMessage": "hello"
+  },
+  "response": {
+    "content": "Hello from Bedrock!"
+  }
+}
+
+ +

SDK Configuration

+

To point the AWS SDK Bedrock Runtime client at llmock, configure the endpoint URL:

+ +
+
bedrock-sdk.ts ts
+
import { BedrockRuntimeClient, InvokeModelCommand } from "@aws-sdk/client-bedrock-runtime";
+
+const client = new BedrockRuntimeClient({
+  region: "us-east-1",
+  endpoint: "http://localhost:4005",  // llmock URL
+  credentials: { accessKeyId: "mock", secretAccessKey: "mock" },
+});
+
+const response = await client.send(new InvokeModelCommand({
+  modelId: "anthropic.claude-3-5-sonnet-20241022-v2:0",
+  contentType: "application/json",
+  body: JSON.stringify({
+    anthropic_version: "bedrock-2023-05-31",
+    max_tokens: 512,
+    messages: [{ role: "user", content: "Hello" }],
+  }),
+}));
+
+ +

Fixture Examples

+ +
+
+ text response fixture json +
+
{
+  "fixtures": [
+    {
+      "match": { "userMessage": "hello" },
+      "response": { "content": "Hi there!" }
+    },
+    {
+      "match": { "userMessage": "weather" },
+      "response": {
+        "toolCalls": [{
+          "name": "get_weather",
+          "arguments": "{\"city\":\"SF\"}"
+        }]
+      }
+    }
+  ]
+}
+
+ +
+

+ Fixtures are shared across all providers. The same fixture file works for OpenAI, Claude + Messages, Gemini, Azure, and Bedrock endpoints — llmock translates each provider's + request format to a common internal format before matching. +

+
+ +

Streaming (invoke-with-response-stream)

+

+ The invoke-with-response-stream endpoint returns responses using the + AWS Event Stream binary protocol. llmock implements this protocol + natively — each response chunk is encoded as a binary frame with CRC32 checksums, + headers, and a JSON payload, exactly as the real Bedrock service sends them. +

+

Streaming events follow the Bedrock Claude streaming sequence:

+
    +
  • + messageStart — opens the message with role: "assistant" +
  • +
  • contentBlockStart — begins a content block
  • +
  • + contentBlockDelta — delivers text chunks (text_delta) or + tool input (input_json_delta) +
  • +
  • contentBlockStop — closes the content block
  • +
  • + messageStop — closes the message with a stopReason +
  • +
+ +
+
streaming SDK usage ts
+
import { BedrockRuntimeClient, InvokeModelWithResponseStreamCommand } from "@aws-sdk/client-bedrock-runtime";
+
+const client = new BedrockRuntimeClient({
+  region: "us-east-1",
+  endpoint: "http://localhost:4005",
+  credentials: { accessKeyId: "mock", secretAccessKey: "mock" },
+});
+
+const response = await client.send(new InvokeModelWithResponseStreamCommand({
+  modelId: "anthropic.claude-3-5-sonnet-20241022-v2:0",
+  contentType: "application/json",
+  body: JSON.stringify({
+    anthropic_version: "bedrock-2023-05-31",
+    max_tokens: 512,
+    messages: [{ role: "user", content: "Hello" }],
+  }),
+}));
+
+ +

AWS Event Stream Binary Format

+

+ Unlike SSE-based streaming used by OpenAI and Claude, AWS Bedrock streaming uses a + binary event stream protocol. Each frame has the following layout: +

+
+
+ binary frame layout text +
+
[total_length: 4B uint32-BE]
+[headers_length: 4B uint32-BE]
+[prelude_crc32: 4B CRC32 of first 8 bytes]
+[headers: variable-length string key-value pairs]
+[payload: raw JSON bytes]
+[message_crc32: 4B CRC32 of entire frame minus last 4 bytes]
+
+

+ llmock encodes these frames with proper CRC32 checksums, so the AWS SDK can decode them + natively. The :event-type header in each frame carries the event name (e.g. + chunk), and the :content-type header is set to + application/json. +

+ +

Converse API

+

+ The Converse API is AWS Bedrock's provider-agnostic conversation interface. It uses + camelCase field names and a different request structure than the Claude-native invoke + endpoints. llmock supports both /model/{modelId}/converse (non-streaming) and + /model/{modelId}/converse-stream (streaming via Event Stream binary). +

+ +
+
+ converse request body json +
+
{
+  "messages": [
+    {
+      "role": "user",
+      "content": [{ "text": "Hello" }]
+    }
+  ],
+  "system": [{ "text": "You are helpful" }],
+  "inferenceConfig": { "maxTokens": 512 }
+}
+
+ +
+
converse response json
+
{
+  "output": {
+    "message": {
+      "role": "assistant",
+      "content": [{ "text": "Hello!" }]
+    }
+  },
+  "stopReason": "end_turn",
+  "usage": { "inputTokens": 0, "outputTokens": 0, "totalTokens": 0 }
+}
+
+ +

+ The Converse API also supports tool calls via toolUse and + toolResult content blocks, and tool definitions via the + toolConfig field. llmock translates all of these to the unified internal + format for fixture matching. +

+
+
+
+ +
+ + diff --git a/docs/azure-openai.html b/docs/azure-openai.html new file mode 100644 index 0000000..c17a494 --- /dev/null +++ b/docs/azure-openai.html @@ -0,0 +1,217 @@ + + + + + + Azure OpenAI — llmock + + + + + + + + +
+ + +
+

Azure OpenAI

+

+ llmock routes Azure OpenAI deployment-based URLs to the existing chat completions and + embeddings handlers. Point the Azure OpenAI SDK at your llmock instance and fixtures work + exactly as they do with the standard OpenAI endpoints. +

+ +

How It Works

+

+ Azure OpenAI uses a different URL pattern than standard OpenAI. Instead of + /v1/chat/completions, Azure uses + /openai/deployments/{deployment-id}/chat/completions with an + api-version query parameter. +

+

+ llmock detects these Azure-style URLs and rewrites them to the standard paths before + routing to the existing handlers. The deployment ID is extracted and used as a model + fallback when the request body omits the model field (which Azure requests + commonly do, since the model is implied by the deployment). +

+ +

URL Pattern Mapping

+ + + + + + + + + + + + + + + + + +
Azure URLMapped To
/openai/deployments/{id}/chat/completions/v1/chat/completions
/openai/deployments/{id}/embeddings/v1/embeddings
+ +

Model Resolution

+

+ When a request arrives via an Azure deployment URL, llmock resolves the model name using + these rules: +

+
    +
  1. + If the request body includes a model field, that value is used (body takes + precedence). +
  2. +
  3. + If the body omits model, the deployment ID from the URL is used as the + model name for fixture matching. +
  4. +
+

This means you can write fixtures that match by deployment name:

+ +
+
+ fixture matching by deployment ID json +
+
{
+  "match": {
+    "model": "my-gpt4-deployment",
+    "userMessage": "hello"
+  },
+  "response": {
+    "content": "Hello from Azure!"
+  }
+}
+
+ +

Authentication

+

+ llmock does not validate authentication tokens, but it accepts both Azure-style and + standard auth headers without rejecting the request: +

+
    +
  • api-key: your-azure-key (Azure-native header)
  • +
  • Authorization: Bearer your-token (standard OAuth/OpenAI header)
  • +
+ +

SDK Configuration

+

To point the Azure OpenAI Node.js SDK at llmock, set the endpoint to your llmock URL:

+ +
+
azure-openai-sdk.ts ts
+
import { AzureOpenAI } from "openai";
+
+const client = new AzureOpenAI({
+  endpoint: "http://localhost:4005",  // llmock URL
+  apiKey: "mock-key",
+  apiVersion: "2024-10-21",
+  deployment: "my-gpt4-deployment",
+});
+
+const response = await client.chat.completions.create({
+  model: "my-gpt4-deployment",
+  messages: [{ role: "user", content: "hello" }],
+});
+
+ +

Environment Variables

+

+ When using the Azure OpenAI SDK, you can configure the endpoint via environment variables: +

+ +
+
.env sh
+
# Point Azure SDK at llmock
+AZURE_OPENAI_ENDPOINT=http://localhost:4005
+AZURE_OPENAI_API_KEY=mock-key
+
+ +
+

+ The api-version query parameter is accepted but ignored — llmock + responds identically regardless of which API version is requested. This means you can + test against any API version without changing fixtures. +

+
+
+
+
+ +
+ + diff --git a/docs/chaos-testing.html b/docs/chaos-testing.html new file mode 100644 index 0000000..e0dfc67 --- /dev/null +++ b/docs/chaos-testing.html @@ -0,0 +1,306 @@ + + + + + + Chaos Testing — llmock + + + + + + + + +
+ + +
+

Chaos Testing

+

+ llmock provides probabilistic failure injection to test how your application handles + unreliable LLM APIs. Three failure modes can be configured at the server, fixture, or + per-request level. +

+ +

Failure Modes

+ + + + + + + + + + + + + + + + + + + + + + + + + +
ModeActionDescription
dropHTTP 500 + Returns a 500 error with + {"error":{"message":"Chaos: request dropped","code":"chaos_drop"}} +
malformedBroken JSON + Returns HTTP 200 with invalid JSON body: + {malformed json: <<<chaos>>> +
disconnectConnection destroyedDestroys the TCP connection immediately with no response
+ +

Precedence

+

+ Chaos configuration is resolved with a three-level precedence hierarchy. Higher levels + override lower ones: +

+
    +
  1. Per-request headers (highest) — override everything
  2. +
  3. Fixture-level config — overrides server defaults
  4. +
  5. Server-level defaults (lowest)
  6. +
+

+ Within a single level, modes are evaluated in order: drop, malformed, disconnect. The + first mode that triggers (based on its probability) wins. +

+ +

Quick Start

+ +
+
chaos-quick-start.ts ts
+
import { LLMock } from "@copilotkit/llmock";
+
+const mock = new LLMock();
+mock.onMessage("hello", { content: "Hi!" });
+
+// 50% of all requests will be dropped with a 500
+mock.setChaos({ dropRate: 0.5 });
+
+await mock.start();
+
+// Later, remove chaos
+mock.clearChaos();
+
+ +

Programmatic API

+ +
+
+ Programmatic chaos control ts +
+
// Set server-level chaos (returns `this` for chaining)
+mock.setChaos({
+  dropRate: 0.1,        // 10% drop rate
+  malformedRate: 0.05,  // 5% malformed rate
+  disconnectRate: 0.02, // 2% disconnect rate
+});
+
+// Remove all server-level chaos
+mock.clearChaos();
+
+ +

Fixture-Level Chaos

+

+ Attach a chaos config to individual fixtures so only specific responses + experience failures: +

+ +
+
chaos-fixture.json json
+
{
+  "fixtures": [
+    {
+      "match": { "userMessage": "unstable" },
+      "response": { "content": "This might fail!" },
+      "chaos": {
+        "dropRate": 0.3,
+        "malformedRate": 0.2,
+        "disconnectRate": 0.1
+      }
+    },
+    {
+      "match": { "userMessage": "stable" },
+      "response": { "content": "This always works." }
+    }
+  ]
+}
+
+ +

Per-Request Headers

+

+ Override chaos rates on individual requests using HTTP headers. Values are floats between + 0 and 1: +

+ + + + + + + + + + + + + + + + + + + + + +
HeaderControls
x-llmock-chaos-dropDrop rate (0–1)
x-llmock-chaos-malformedMalformed rate (0–1)
x-llmock-chaos-disconnectDisconnect rate (0–1)
+ +
+
+ Per-request chaos via headers ts +
+
// Force 100% disconnect on this specific request
+await fetch(`${mock.url}/v1/chat/completions`, {
+  method: "POST",
+  headers: {
+    "Content-Type": "application/json",
+    "x-llmock-chaos-disconnect": "1.0",
+  },
+  body: JSON.stringify({ model: "gpt-4", messages: [...] }),
+});
+
+ +

CLI Flags

+

Set server-level chaos from the command line:

+ +
+
CLI chaos flags bash
+
npx llmock --fixtures ./fixtures \
+  --chaos-drop 0.1 \
+  --chaos-malformed 0.05 \
+  --chaos-disconnect 0.02
+
+ +

Journal Tracking

+

+ When chaos triggers, the journal entry includes a chaosAction field recording + which failure mode was applied: +

+ +
+
+ Journal entry with chaos json +
+
{
+  "method": "POST",
+  "path": "/v1/chat/completions",
+  "response": {
+    "status": 500,
+    "fixture": { "..." },
+    "chaosAction": "drop"
+  }
+}
+
+

+ The chaosAction values are "drop", "malformed", or + "disconnect". The status codes are 500 for drop, 200 for malformed, and 0 for + disconnect (connection destroyed). +

+ +

Prometheus Metrics

+

+ When metrics are enabled (--metrics), each chaos trigger increments the + llmock_chaos_triggered_total counter with an action label: +

+ +
+
Metrics output text
+
# TYPE llmock_chaos_triggered_total counter
+llmock_chaos_triggered_total{action="drop"} 3
+llmock_chaos_triggered_total{action="malformed"} 1
+llmock_chaos_triggered_total{action="disconnect"} 2
+
+
+
+
+ +
+ + diff --git a/docs/chat-completions.html b/docs/chat-completions.html new file mode 100644 index 0000000..353d4f2 --- /dev/null +++ b/docs/chat-completions.html @@ -0,0 +1,276 @@ + + + + + + Chat Completions — llmock + + + + + + + + + +
+ + +
+

OpenAI Chat Completions

+

+ The POST /v1/chat/completions endpoint supports both streaming (SSE) and + non-streaming JSON responses, including text content and tool calls. This is the most + commonly used endpoint. +

+ +

Endpoint

+ + + + + + + + + + + + + + + +
MethodPathFormat
POST/v1/chat/completionsSSE (stream: true) or JSON (stream: false)
+ +

Unit Test: Text Response

+

+ Using the programmatic API with vitest, register a fixture and assert on the response. +

+ +
+
+ text-response.test.ts ts +
+
import { LLMock } from "@copilotkit/llmock";
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+
+let mock: LLMock;
+
+beforeAll(async () => {
+  mock = new LLMock();
+  await mock.start();
+});
+
+afterAll(async () => {
+  await mock.stop();
+});
+
+it("non-streaming text response", async () => {
+  mock.on({ userMessage: "hello" }, { content: "Hello! How can I help?" });
+
+  const res = await fetch(`${mock.url}/v1/chat/completions`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      model: "gpt-4",
+      messages: [{ role: "user", content: "hello" }],
+      stream: false,
+    }),
+  });
+
+  const body = await res.json();
+  expect(body.choices[0].message.content).toBe("Hello! How can I help?");
+  expect(body.object).toBe("chat.completion");
+  expect(body.id).toMatch(/^chatcmpl-/);
+});
+
+ +

Unit Test: Tool Calls

+ +
+
tool-calls.test.ts ts
+
it("returns tool call in streaming mode", async () => {
+  mock.on(
+    { userMessage: "weather" },
+    { toolCalls: [{ name: "get_weather", arguments: '{"city":"SF"}' }] }
+  );
+
+  const res = await fetch(`${mock.url}/v1/chat/completions`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      model: "gpt-4",
+      messages: [{ role: "user", content: "what is the weather?" }],
+      stream: true,
+    }),
+  });
+
+  const text = await res.text();
+  expect(text).toContain("get_weather");
+  expect(text).toContain("data: [DONE]");
+});
+
+ +

Integration Test: Streaming SSE

+ +
+
+ streaming-integration.test.ts ts +
+
import { createServer, type ServerInstance } from "@copilotkit/llmock/server";
+
+const instance = await createServer(
+  [{ match: { userMessage: "hello" }, response: { content: "Hello! How can I help?" } }],
+  { port: 0, chunkSize: 10 }
+);
+
+const res = await httpPost(`${instance.url}/v1/chat/completions`, {
+  model: "gpt-4",
+  messages: [{ role: "user", content: "hello" }],
+  stream: true,
+});
+
+// Parse SSE chunks
+const chunks = res.body
+  .split("\n\n")
+  .filter(b => b.startsWith("data: ") && !b.includes("[DONE]"))
+  .map(b => JSON.parse(b.slice(6)));
+
+// First chunk has the role
+expect(chunks[0].choices[0].delta.role).toBe("assistant");
+
+// Reassemble content
+const content = chunks.map(c => c.choices[0].delta.content ?? "").join("");
+expect(content).toBe("Hello! How can I help?");
+
+// Last chunk has finish_reason
+expect(chunks.at(-1).choices[0].finish_reason).toBe("stop");
+
+ +

JSON Fixture

+ +
+
fixtures/chat.json json
+
{
+  "fixtures": [
+    {
+      "match": { "userMessage": "hello" },
+      "response": { "content": "Hello! How can I help?" }
+    },
+    {
+      "match": { "userMessage": "weather" },
+      "response": {
+        "toolCalls": [{
+          "name": "get_weather",
+          "arguments": "{\"city\":\"SF\"}"
+        }]
+      }
+    }
+  ]
+}
+
+ +

Response Format

+ +

Non-streaming (stream: false)

+

Returns a single JSON object matching the OpenAI ChatCompletion type:

+
    +
  • id — starts with chatcmpl-
  • +
  • object"chat.completion"
  • +
  • created — Unix timestamp
  • +
  • model — echoes the requested model
  • +
  • choices[0].message.content — the response text
  • +
  • choices[0].message.refusal — always null
  • +
  • + choices[0].finish_reason"stop" or + "tool_calls" +
  • +
  • usage — token counts (zeroed in mock)
  • +
+ +

Streaming (stream: true)

+

+ Returns text/event-stream with data: {json}\n\n lines, ending + with data: [DONE]\n\n. Each chunk matches the OpenAI + ChatCompletionChunk type with delta instead of + message. +

+
+
+ +
+ +
+ + diff --git a/docs/claude-messages.html b/docs/claude-messages.html new file mode 100644 index 0000000..d034278 --- /dev/null +++ b/docs/claude-messages.html @@ -0,0 +1,193 @@ + + + + + + Claude Messages — llmock + + + + + + + + +
+ + +
+

Anthropic Claude Messages API

+

+ The POST /v1/messages endpoint implements the Anthropic Messages API with + streaming SSE using event: + data: format, including content + blocks for text and tool use. +

+ +

Endpoint

+ + + + + + + + + + + + + + + +
MethodPathFormat
POST/v1/messagesSSE (event: + data:) or JSON
+ +

Unit Test: Text Streaming

+ +
+
claude-text.test.ts ts
+
const textFixture = {
+  match: { userMessage: "hello" },
+  response: { content: "Hi there!" },
+};
+
+const instance = await createServer([textFixture]);
+
+const res = await post(`${instance.url}/v1/messages`, {
+  model: "claude-sonnet-4-20250514",
+  max_tokens: 1024,
+  messages: [{ role: "user", content: "hello" }],
+  stream: true,
+});
+
+const events = parseClaudeSSEEvents(res.body);
+const types = events.map(e => e.type);
+
+expect(types).toContain("message_start");
+expect(types).toContain("content_block_start");
+expect(types).toContain("content_block_delta");
+expect(types).toContain("message_stop");
+
+ +

Unit Test: Tool Use

+ +
+
claude-tools.test.ts ts
+
const toolFixture = {
+  match: { userMessage: "weather" },
+  response: {
+    toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }]
+  },
+};
+
+const instance = await createServer([toolFixture]);
+
+const res = await post(`${instance.url}/v1/messages`, {
+  model: "claude-sonnet-4-20250514",
+  max_tokens: 1024,
+  messages: [{ role: "user", content: "what is the weather?" }],
+  stream: true,
+});
+
+const events = parseClaudeSSEEvents(res.body);
+const blockStart = events.find(
+  e => e.type === "content_block_start"
+    && e.content_block?.type === "tool_use"
+);
+expect(blockStart.content_block.name).toBe("get_weather");
+
+ +

SSE Event Sequence

+

Claude Messages streaming produces these events:

+
    +
  1. message_start — message metadata (id, model, role, usage)
  2. +
  3. content_block_start — text or tool_use block
  4. +
  5. content_block_delta — text_delta or input_json_delta
  6. +
  7. content_block_stop
  8. +
  9. message_delta — stop_reason, usage
  10. +
  11. message_stop
  12. +
+ +

Request Translation

+

+ llmock internally translates Anthropic requests to a unified format for fixture matching. + The claudeToCompletionRequest() function handles mapping Anthropic message + arrays (including content block arrays) to OpenAI-style messages so the same fixtures work + across all providers. +

+
+
+
+ +
+ + diff --git a/docs/cohere.html b/docs/cohere.html new file mode 100644 index 0000000..162f738 --- /dev/null +++ b/docs/cohere.html @@ -0,0 +1,279 @@ + + + + + + Cohere — llmock + + + + + + + + +
+ + +
+

Cohere v2 Chat API

+

+ The POST /v2/chat endpoint implements the Cohere v2 Chat API with typed SSE + streaming events and dual usage tracking (billed_units and + tokens). +

+ +

Endpoint

+ + + + + + + + + + + + + + + +
MethodPathDescription
POST/v2/chatCohere v2 Chat (SSE streaming or JSON)
+ +

Key Features

+
    +
  • + Model field required. Unlike OpenAI, Cohere requires the + model field — requests without it receive a 400 error. +
  • +
  • + Typed SSE events. Streaming uses event: + + data: pairs with event types like message-start, + content-delta, tool-call-start, etc. +
  • +
  • + Dual usage tracking. Responses include both + billed_units (input_tokens, output_tokens, search_units, classifications) + and tokens (input_tokens, output_tokens). llmock returns zeroed values. +
  • +
  • + Defaults to non-streaming. Set "stream": true explicitly + to enable SSE streaming. +
  • +
+ +

Quick Start

+ +
+
+ cohere-quick-start.ts ts +
+
import { LLMock } from "@copilotkit/llmock";
+
+const mock = new LLMock();
+mock.onMessage("hello", { content: "Hi from Cohere!" });
+await mock.start();
+
+// Point the Cohere SDK at llmock
+const res = await fetch(`${mock.url}/v2/chat`, {
+  method: "POST",
+  headers: { "Content-Type": "application/json" },
+  body: JSON.stringify({
+    model: "command-r-plus",
+    messages: [{ role: "user", content: "hello" }],
+  }),
+});
+
+ +

SSE Event Sequence (Text)

+

+ When stream: true, Cohere produces these typed events for text responses: +

+
    +
  1. + message-start — message metadata (role, empty content/tool arrays) +
  2. +
  3. content-start — content block type declaration
  4. +
  5. content-delta — text chunks
  6. +
  7. content-end
  8. +
  9. message-end — finish_reason (COMPLETE) and usage
  10. +
+ +

SSE Event Sequence (Tool Calls)

+

For tool call responses, the event sequence is:

+
    +
  1. message-start
  2. +
  3. tool-plan-delta — tool planning text
  4. +
  5. tool-call-start — tool call ID, function name
  6. +
  7. tool-call-delta — chunked arguments JSON
  8. +
  9. tool-call-end
  10. +
  11. message-end — finish_reason (TOOL_CALL) and usage
  12. +
+ +

Non-Streaming Response

+ +
+
+ /v2/chat non-streaming response json +
+
{
+  "id": "msg_abc123",
+  "finish_reason": "COMPLETE",
+  "message": {
+    "role": "assistant",
+    "content": [{ "type": "text", "text": "Hi from Cohere!" }],
+    "tool_calls": [],
+    "tool_plan": "",
+    "citations": []
+  },
+  "usage": {
+    "billed_units": {
+      "input_tokens": 0,
+      "output_tokens": 0,
+      "search_units": 0,
+      "classifications": 0
+    },
+    "tokens": { "input_tokens": 0, "output_tokens": 0 }
+  }
+}
+
+ +

Fixture Examples

+ +
+
+ cohere-fixtures.json json +
+
{
+  "fixtures": [
+    {
+      "match": { "userMessage": "hello" },
+      "response": { "content": "Hi from Cohere!" }
+    },
+    {
+      "match": { "userMessage": "search" },
+      "response": {
+        "toolCalls": [
+          {
+            "name": "web_search",
+            "arguments": "{\"query\":\"latest news\"}"
+          }
+        ]
+      }
+    }
+  ]
+}
+
+ +

Streaming Event Wire Format

+

Each SSE event is a typed event: + data: pair:

+ +
+
+ Cohere SSE wire format text +
+
event: message-start
+data: {"id":"msg_abc123","type":"message-start","delta":{"message":{"role":"assistant","content":[],"tool_plan":"","tool_calls":[],"citations":[]}}}
+
+event: content-start
+data: {"type":"content-start","index":0,"delta":{"message":{"content":{"type":"text"}}}}
+
+event: content-delta
+data: {"type":"content-delta","index":0,"delta":{"message":{"content":{"type":"text","text":"Hi "}}}}
+
+event: content-delta
+data: {"type":"content-delta","index":0,"delta":{"message":{"content":{"type":"text","text":"from Cohere!"}}}}
+
+event: content-end
+data: {"type":"content-end","index":0}
+
+event: message-end
+data: {"type":"message-end","delta":{"finish_reason":"COMPLETE","usage":{"billed_units":{"input_tokens":0,"output_tokens":0,"search_units":0,"classifications":0},"tokens":{"input_tokens":0,"output_tokens":0}}}}
+
+ +

Request Translation

+

+ llmock internally translates Cohere requests to a unified + ChatCompletionRequest format for fixture matching. The + cohereToCompletionRequest() function maps Cohere message roles (including + tool with tool_call_id) and tool definitions to the common + format. +

+
+
+
+ +
+ + diff --git a/docs/compatible-providers.html b/docs/compatible-providers.html new file mode 100644 index 0000000..2bb2179 --- /dev/null +++ b/docs/compatible-providers.html @@ -0,0 +1,324 @@ + + + + + + Compatible Providers — llmock + + + + + + + + +
+ + +
+

Compatible Providers

+

+ Many LLM providers use OpenAI-compatible + /v1/chat/completions endpoints. llmock works with all of them out of the box + — just point the SDK's base URL at your llmock instance. +

+ +

Supported Providers

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ProviderBase URL PathNotes
Mistral/v1/chat/completionsStandard OpenAI-compatible endpoint
Groq/openai/v1/chat/completionsUses /openai/ prefix — llmock strips it automatically
Ollama/v1/chat/completionsStandard OpenAI-compatible endpoint
Together AI/v1/chat/completionsStandard OpenAI-compatible endpoint
vLLM/v1/chat/completionsStandard OpenAI-compatible endpoint
Cohere/v1/chat/completions + OpenAI-compatible endpoint; see Cohere page for native + endpoints +
Vertex AI/v1/projects/.../models/:model:*Uses Gemini handler; see Vertex AI page
+ +

How It Works

+
    +
  • + Most OpenAI-compatible providers send requests to + /v1/chat/completions with the same JSON format — llmock already + handles this natively +
  • +
  • + Groq uses a /openai/v1/ prefix for all endpoints. llmock automatically + strips the /openai prefix, so /openai/v1/chat/completions, + /openai/v1/embeddings, and /openai/v1/models all work + transparently +
  • +
  • + Model names are passed through as-is — use + mistral-large-latest, llama-3.3-70b-versatile, + llama3.2, or any other model name in your fixtures +
  • +
+ +

Mistral Configuration

+

+ Mistral's SDK uses the standard OpenAI-compatible endpoint. Point + MISTRAL_API_ENDPOINT at llmock: +

+ +
+
+ Environment variables bash +
+
export MISTRAL_API_ENDPOINT="http://localhost:5555/v1"
+export MISTRAL_API_KEY="mock-key"
+
+ +
+
Programmatic setup ts
+
import { Mistral } from "@mistralai/mistralai";
+
+const client = new Mistral({
+  apiKey: "mock-key",
+  serverURL: "http://localhost:5555/v1",
+});
+
+ +

Groq Configuration

+

+ Groq's SDK sends requests to /openai/v1/chat/completions (note the + /openai prefix). llmock handles this automatically. +

+ +
+
+ Environment variables bash +
+
export GROQ_BASE_URL="http://localhost:5555/openai/v1"
+export GROQ_API_KEY="mock-key"
+
+ +
+
Programmatic setup ts
+
import Groq from "groq-sdk";
+
+const client = new Groq({
+  apiKey: "mock-key",
+  baseURL: "http://localhost:5555/openai/v1",
+});
+
+ +

Ollama Configuration

+

+ Ollama exposes an OpenAI-compatible endpoint locally. Point the OpenAI SDK at llmock + instead: +

+ +
+
+ Environment variables bash +
+
export OPENAI_BASE_URL="http://localhost:5555/v1"
+export OPENAI_API_KEY="mock-key"
+
+ +
+
Programmatic setup ts
+
import OpenAI from "openai";
+
+// Same SDK you'd use with Ollama, just different base URL
+const client = new OpenAI({
+  apiKey: "mock-key",
+  baseURL: "http://localhost:5555/v1",
+});
+
+ +

Together AI Configuration

+ +
+
+ Environment variables bash +
+
export TOGETHER_BASE_URL="http://localhost:5555/v1"
+export TOGETHER_API_KEY="mock-key"
+
+ +

vLLM Configuration

+ +
+
+ Environment variables bash +
+
# vLLM uses the OpenAI SDK — just change the base URL
+export OPENAI_BASE_URL="http://localhost:5555/v1"
+export OPENAI_API_KEY="mock-key"
+
+ +

Example Fixture

+

+ The same fixture works for all compatible providers. Model names are passed through + — match on whatever model name your code sends: +

+ +
+
+ fixtures/compat.json json +
+
{
+  "fixtures": [
+    {
+      "match": {
+        "model": "mistral-large-latest",
+        "userMessage": "hello"
+      },
+      "response": {
+        "content": "Bonjour! How can I help?"
+      }
+    },
+    {
+      "match": {
+        "model": "llama-3.3-70b-versatile",
+        "userMessage": "hello"
+      },
+      "response": {
+        "content": "Hey there! What can I do for you?"
+      }
+    },
+    {
+      "match": { "userMessage": "hello" },
+      "response": {
+        "content": "Hi! I'm a catch-all response."
+      }
+    }
+  ]
+}
+
+ +
+

+ The /openai/v1/* prefix alias also works for + /openai/v1/embeddings and /openai/v1/models — any + /openai/-prefixed path is transparently routed to the corresponding + /v1/ endpoint. +

+
+ +
+

+ Ollama native endpoints: In addition to the OpenAI-compatible endpoint + listed above, Ollama has its own native /api/chat and + /api/generate endpoints. llmock supports these natively — see the + Ollama page for details on the native endpoint format. +

+
+
+
+
+ +
+ + diff --git a/docs/docker.html b/docs/docker.html new file mode 100644 index 0000000..4f71441 --- /dev/null +++ b/docs/docker.html @@ -0,0 +1,230 @@ + + + + + + Docker & Helm — llmock + + + + + + + + +
+ + +
+

Docker & Helm

+

+ Run llmock as a container in Docker or deploy it to Kubernetes with the included Helm + chart. The image is based on node:22-alpine with zero runtime dependencies. +

+ +

Docker

+ +

Build the image

+
+
Build shell
+
docker build -t llmock .
+
+ +

Run with local fixtures

+
+
Run shell
+
# Mount your fixture directory into the container
+docker run -p 4010:4010 -v $(pwd)/fixtures:/fixtures llmock
+
+# Custom port
+docker run -p 5555:5555 llmock --fixtures /fixtures --port 5555
+
+# Pull from GitHub Container Registry
+docker pull ghcr.io/copilotkit/llmock:latest
+docker run -p 4010:4010 -v $(pwd)/fixtures:/fixtures ghcr.io/copilotkit/llmock
+
+ +

Dockerfile

+

+ The multi-stage Dockerfile builds the TypeScript source and copies only the compiled + output: +

+ +
+
Dockerfile docker
+
# --- Build stage ---
+FROM node:22-alpine AS build
+RUN corepack enable && corepack prepare pnpm@10.28.2 --activate
+WORKDIR /app
+COPY package.json pnpm-lock.yaml ./
+RUN pnpm install --frozen-lockfile
+COPY tsconfig.json tsdown.config.ts ./
+COPY src/ src/
+RUN pnpm run build
+
+# --- Production stage ---
+FROM node:22-alpine
+WORKDIR /app
+COPY --from=build /app/dist/ dist/
+COPY fixtures/ fixtures/
+EXPOSE 4010
+ENTRYPOINT ["node", "dist/cli.js"]
+CMD ["--fixtures", "/fixtures", "--host", "0.0.0.0"]
+
+ +

Helm Chart

+

Deploy to Kubernetes using the Helm chart in charts/llmock/.

+ +

Install

+
+
Helm install shell
+
helm install llmock ./charts/llmock
+
+# With custom values
+helm install llmock ./charts/llmock \
+  --set image.tag=1.4.0 \
+  --set service.port=5555 \
+  --set replicaCount=2
+
+ +

Configuration (values.yaml)

+ +
+
+ charts/llmock/values.yaml yaml +
+
replicaCount: 1
+
+image:
+  repository: ghcr.io/copilotkit/llmock
+  tag: ""            # defaults to Chart appVersion
+  pullPolicy: IfNotPresent
+
+service:
+  type: ClusterIP
+  port: 4010
+
+fixtures:
+  mountPath: /app/fixtures
+  existingClaim: ""  # Use a PVC for fixture files
+
+resources: {}
+  # limits:
+  #   cpu: 200m
+  #   memory: 256Mi
+
+ +

Fixture Loading

+

+ To load custom fixtures in Kubernetes, create a PersistentVolumeClaim with your fixture + JSON files and set fixtures.existingClaim in your values. The chart mounts + the PVC at fixtures.mountPath (default /app/fixtures). +

+ +

Health Checks

+

+ The deployment includes liveness and readiness probes using httpGet on + /health (liveness, starts after 5 seconds) and /ready + (readiness, starts after 2 seconds). +

+ +

v1.6.0 Features

+

The Docker image supports all v1.6.0 features out of the box:

+
    +
  • + Chaos testing — configure via --chaos-drop, + --chaos-malformed, and --chaos-disconnect flags +
  • +
  • + Prometheus metrics — exposed at /metrics when + enabled with --metrics +
  • +
  • + Record & replay — proxy to real APIs with + --record flag +
  • +
  • + Strict mode — return 503 for unmatched requests with + --strict +
  • +
  • Streaming physics — TTFT, TPS, and jitter simulation
  • +
  • AWS Bedrock streaming — Event Stream binary protocol
  • +
  • Converse API — Bedrock Converse and Converse-stream
  • +
+
+
+
+ +
+ + diff --git a/docs/docs.html b/docs/docs.html new file mode 100644 index 0000000..7df43c9 --- /dev/null +++ b/docs/docs.html @@ -0,0 +1,463 @@ + + + + + + Documentation — llmock + + + + + + + + + + + +
+ + + + +
+

llmock Documentation

+

+ llmock is a deterministic mock LLM server for testing. It runs a real HTTP server that any + process on the machine can reach, serving fixture-driven responses in the authentic SSE + format for OpenAI, Anthropic Claude, and Google Gemini APIs. +

+ +

Quick Start

+ +
+
+ Install + shell +
+
# npm
+npm install @copilotkit/llmock
+
+# pnpm
+pnpm add @copilotkit/llmock
+
+ +
+
+ Programmatic usage (vitest) + ts +
+
import { LLMock } from "@copilotkit/llmock";
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+
+let mock: LLMock;
+
+beforeAll(async () => {
+  mock = new LLMock();
+  await mock.start();
+});
+
+afterAll(async () => {
+  await mock.stop();
+});
+
+it("returns a text response", async () => {
+  mock.on({ userMessage: "hello" }, { content: "Hi there!" });
+
+  const res = await fetch(`${mock.url}/v1/chat/completions`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      model: "gpt-4",
+      messages: [{ role: "user", content: "hello" }],
+      stream: false,
+    }),
+  });
+  const body = await res.json();
+  expect(body.choices[0].message.content).toBe("Hi there!");
+});
+
+ +
+
+ CLI usage + shell +
+
# Start the server with fixture files
+npx llmock --fixtures ./fixtures --port 5555
+
+# Point your app at it
+export OPENAI_BASE_URL=http://localhost:5555/v1
+export OPENAI_API_KEY=mock-key
+
+ +

Supported Endpoints

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
EndpointProviderTransport
POST /v1/chat/completionsOpenAIHTTP SSE / JSON
POST /v1/responsesOpenAIHTTP SSE
WS /v1/responsesOpenAIWebSocket
WS /v1/realtimeOpenAIWebSocket
POST /v1/messagesAnthropicHTTP SSE / JSON
POST /v1beta/models/:model:*Google GeminiHTTP SSE / JSON
WS /ws/google.ai.generativelanguage.*Google Gemini LiveWebSocket
POST /v1/embeddingsOpenAIJSON
POST /openai/v1/chat/completionsGroq / OpenAI-CompatibleHTTP SSE / JSON
POST /model/{modelId}/invokeAWS BedrockJSON
POST /model/{modelId}/invoke-with-response-streamAWS BedrockAWS Event Stream (binary)
POST /model/{modelId}/converseAWS BedrockJSON
POST /model/{modelId}/converse-streamAWS BedrockAWS Event Stream (binary)
POST /v1/projects/.../models/:model:*Vertex AIHTTP SSE / JSON
POST /api/chatOllamaNDJSON / JSON
POST /api/generateOllamaNDJSON / JSON
POST /v2/chatCohereHTTP SSE / JSON
+ +

Feature Pages

+ +
+ + OpenAI +

Chat Completions

+

Streaming and non-streaming text + tool call responses via SSE.

+
+ + OpenAI +

Responses API

+

HTTP SSE and WebSocket transports for the Responses API.

+
+ + Anthropic +

Claude Messages

+

Anthropic-format SSE streaming with content blocks.

+
+ + Google +

Gemini

+

GenerateContent and StreamGenerateContent endpoints.

+
+ + New +

Embeddings

+

OpenAI-compatible /v1/embeddings endpoint with fixture or auto-generated vectors.

+
+ + New +

Structured Output

+

JSON mode and response_format matching for structured responses.

+
+ + New +

Sequential Responses

+

Stateful fixtures that return different responses on each call.

+
+ + Core +

Fixtures

+

JSON fixture file format, matching rules, and validation.

+
+ + Core +

Error Injection

+

One-shot errors, stream truncation, and disconnect simulation.

+
+ + New +

Chaos Testing

+

+ Probabilistic failure injection — random errors, latency spikes, stream + corruption. +

+
+ + Core +

WebSocket APIs

+

Realtime, Responses, and Gemini Live over WebSocket.

+
+ + New +

Record & Replay

+

Proxy to real APIs, record responses as fixtures, then replay deterministically.

+
+ + New +

Prometheus Metrics

+

Expose request counts, latencies, and fixture match rates via /metrics endpoint.

+
+ + Provider +

Ollama

+

Native Ollama /api/chat and /api/generate endpoints.

+
+ + Provider +

Cohere

+

Cohere Chat API with native and OpenAI-compatible endpoints.

+
+ + Provider +

Vertex AI

+

Google Cloud Vertex AI endpoints using the Gemini handler.

+
+ + Ops +

Docker & Helm

+

Container image and Kubernetes Helm chart deployment.

+
+ + CI +

Drift Detection

+

Three-way conformance testing against real APIs.

+
+
+ +

API Reference

+ +

LLMock class

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MethodDescription
new LLMock(opts?) + Create instance. Options: port, host, + latency, chunkSize, logLevel, + chaos, record, strict, metrics, + streamingProfile +
start()Start the HTTP server. Returns the base URL.
stop()Stop the server.
on(match, response, opts?)Add a fixture with match criteria and response.
onMessage(pattern, response)Shorthand: match on userMessage.
onToolCall(name, response)Shorthand: match on toolName.
onEmbedding(pattern, response)Shorthand: match on inputText (embeddings).
onJsonOutput(pattern, json)Shorthand: match userMessage + responseFormat=json_object.
onToolResult(id, response)Shorthand: match on toolCallId.
nextRequestError(status, body?)Queue a one-shot error for the next request.
addFixture(fixture)Add a raw Fixture object.
loadFixtureFile(path)Load fixtures from a JSON file.
loadFixtureDir(path)Load all fixture JSON files from a directory.
reset()Clear all fixtures and journal entries.
getRequests()Get all journal entries.
getLastRequest()Get the most recent journal entry.
.url / .portAccess the server URL and port.
+
+
+ + + + + diff --git a/docs/drift-detection.html b/docs/drift-detection.html new file mode 100644 index 0000000..dc8f9d4 --- /dev/null +++ b/docs/drift-detection.html @@ -0,0 +1,471 @@ + + + + + + Drift Detection — llmock + + + + + + + + +
+ + +
+

Drift Detection

+

+ A mock that does not match reality is worse than no mock. llmock includes three-way drift + tests that compare SDK types, real API responses, and mock output to catch shape + mismatches before your users do. +

+ +

Three-Way Comparison

+

Each drift test compares three sources:

+ + + + +
+ + + + + + + + + + + + + + + SDK = Real? + + + + SDK = Mock? + + + + Real = Mock? + + +
+
{ }
+

SDK Types

+

What TypeScript types say the shape should be

+
+
+
+

Real API

+

What OpenAI, Claude, Gemini actually return

+
+
+
+

llmock

+

What the mock produces for the same request

+
+
+ + +
+
+
+
+

Mock doesn't match real

+
+

+ llmock needs updating — test fails immediately. The SDK comparison tells us why + it drifted. +

+
+
+
+
+

Provider changed, SDK is behind

+
+

+ Early warning — the real API has new fields that neither the SDK nor llmock know + about yet. +

+
+
+
+
+

All three agree

+
+

No drift — the mock matches reality and the SDK types are current.

+
+
+ +

Running Drift Tests

+ +
+
Run drift tests shell
+
# Set API keys for providers you want to test
+export OPENAI_API_KEY=sk-...
+export ANTHROPIC_API_KEY=sk-ant-...
+export GOOGLE_API_KEY=AI...
+
+# Run all drift tests
+pnpm test:drift
+
+# Run for a specific provider
+pnpm test:drift -- --grep "OpenAI Chat"
+
+ +

Test Files

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FileProviderWhat it tests
openai-chat.drift.tsOpenAIChat Completions (streaming + non-streaming, text + tool calls)
openai-responses.drift.tsOpenAIResponses API (HTTP SSE)
anthropic.drift.tsAnthropicClaude Messages API
gemini.drift.tsGoogleGemini generateContent + streamGenerateContent
ws-realtime.drift.tsOpenAIRealtime API over WebSocket
ws-responses.drift.tsOpenAIResponses API over WebSocket
ws-gemini-live.drift.tsGoogleGemini Live over WebSocket
models.drift.tsAllModel list endpoint conformance
+ +

How Drift Analysis Works

+ +
+
drift-test.ts ts
+
import { extractShape, triangulate, formatDriftReport, shouldFail } from "./schema";
+
+// 1. Get the SDK shape (what TypeScript says)
+const sdkShape = openaiChatCompletionShape();
+
+// 2. Call the real API and the mock in parallel
+const [realRes, mockRes] = await Promise.all([
+  openaiChatNonStreaming(config, [{ role: "user", content: "Say hello" }]),
+  httpPost(`${instance.url}/v1/chat/completions`, { /* ... */ }),
+]);
+
+// 3. Extract response shapes
+const realShape = extractShape(realRes.body);
+const mockShape = extractShape(JSON.parse(mockRes.body));
+
+// 4. Three-way comparison
+const diffs = triangulate(sdkShape, realShape, mockShape);
+const report = formatDriftReport("OpenAI Chat (non-streaming text)", diffs);
+
+// 5. Critical diffs fail the test
+if (shouldFail(diffs)) {
+  expect.soft([], report).toEqual(
+    diffs.filter(d => d.severity === "critical")
+  );
+}
+
+ +

Severity Levels

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
SeverityMeaningAction
criticalMock does not match real APITest fails. llmock needs updating.
warningProvider added new field, neither SDK nor mock have itLogged. Early warning for future breakage.
okAll three agreeNo action needed.
+ +

CI Integration

+

+ Drift tests run daily in CI with real API keys stored as GitHub secrets. Tests that + require API keys are automatically skipped when the key is not set, so + pnpm test:drift is safe to run locally without any keys configured. +

+ +
+

+ Drift tests require real API keys and make real API calls. They are not part of the + regular pnpm test suite and must be run explicitly with + pnpm test:drift. +

+
+
+
+
+ +
+ + diff --git a/docs/embeddings.html b/docs/embeddings.html new file mode 100644 index 0000000..00d06f0 --- /dev/null +++ b/docs/embeddings.html @@ -0,0 +1,237 @@ + + + + + + Embeddings — llmock + + + + + + + + +
+ + +
+

Embeddings

+

+ The POST /v1/embeddings endpoint returns OpenAI-compatible embedding vectors. + You can provide explicit vectors in fixtures or let llmock generate deterministic + embeddings automatically from the input text. +

+ +

Endpoint

+ + + + + + + + + + + + + + + +
MethodPathFormat
POST/v1/embeddingsJSON
+ +

How It Works

+
    +
  • + If a fixture matches with an embedding response, that exact vector is + returned +
  • +
  • + If no fixture matches, a deterministic embedding is auto-generated from the input text + using a hash-based algorithm +
  • +
  • + Auto-generated embeddings are deterministic: same input always produces the same output +
  • +
  • + Default dimension is 1536 (matching text-embedding-3-small), configurable via the + dimensions request parameter +
  • +
+ +

Unit Test: Fixture-based Embedding

+ +
+
+ embedding-fixture.test.ts ts +
+
const mock = new LLMock();
+await mock.start();
+
+// Register a fixture with explicit embedding vector
+mock.onEmbedding("embed-this", { embedding: [0.1, -0.2, 0.3, 0.4, -0.5] });
+
+const res = await fetch(`${mock.url}/v1/embeddings`, {
+  method: "POST",
+  headers: { "Content-Type": "application/json" },
+  body: JSON.stringify({
+    model: "text-embedding-3-small",
+    input: "embed-this",
+  }),
+});
+
+const body = await res.json();
+expect(body.object).toBe("list");
+expect(body.data[0].embedding).toEqual([0.1, -0.2, 0.3, 0.4, -0.5]);
+expect(body.data[0].index).toBe(0);
+
+ +

Unit Test: Auto-generated Embedding

+ +
+
+ embedding-auto.test.ts ts +
+
import { generateDeterministicEmbedding } from "@copilotkit/llmock/helpers";
+
+// Deterministic: same input always produces the same output
+const a = generateDeterministicEmbedding("hello world");
+const b = generateDeterministicEmbedding("hello world");
+expect(a).toEqual(b);
+
+// Default dimension is 1536
+expect(a).toHaveLength(1536);
+
+// Custom dimension
+const c = generateDeterministicEmbedding("hello", 768);
+expect(c).toHaveLength(768);
+
+// All values are between -1 and 1
+for (const val of a) {
+  expect(val).toBeGreaterThanOrEqual(-1);
+  expect(val).toBeLessThanOrEqual(1);
+}
+
+ +

JSON Fixture

+ +
+
+ fixtures/embeddings.json json +
+
{
+  "fixtures": [
+    {
+      "match": { "inputText": "embed-this" },
+      "response": {
+        "embedding": [0.1, -0.2, 0.3, 0.4, -0.5]
+      }
+    }
+  ]
+}
+
+ +

Response Format

+

Matches the OpenAI /v1/embeddings response format:

+ +
+
Response shape json
+
{
+  "object": "list",
+  "model": "text-embedding-3-small",
+  "data": [
+    {
+      "object": "embedding",
+      "index": 0,
+      "embedding": [0.1, -0.2, 0.3, ...]
+    }
+  ],
+  "usage": { "prompt_tokens": 0, "total_tokens": 0 }
+}
+
+ +
+

+ Embedding fixtures use match.inputText instead of + match.userMessage. The inputText matcher checks the embedding + input string (or each string in an input array). +

+
+
+
+
+ +
+ + diff --git a/docs/error-injection.html b/docs/error-injection.html new file mode 100644 index 0000000..80ac5ee --- /dev/null +++ b/docs/error-injection.html @@ -0,0 +1,233 @@ + + + + + + Error Injection — llmock + + + + + + + + +
+ + +
+

Error Injection

+

+ Test your application's error handling with one-shot errors, stream truncation, and timed + disconnects. llmock provides three mechanisms for simulating failures. +

+ +

One-Shot Errors

+

+ Queue an error that fires on the next request and auto-removes itself. Useful for testing + retry logic. +

+ +
+
+ one-shot-error.test.ts ts +
+
const mock = new LLMock();
+await mock.start();
+mock.onMessage("hello", { content: "Hi!" });
+
+// Queue a 429 rate limit error for the next request
+mock.nextRequestError(429, {
+  message: "Rate limit exceeded",
+  type: "rate_limit_error",
+});
+
+// First request → 429 error
+const res1 = await fetch(`${mock.url}/v1/chat/completions`, {
+  method: "POST",
+  headers: { "Content-Type": "application/json" },
+  body: JSON.stringify({
+    model: "gpt-4",
+    messages: [{ role: "user", content: "hello" }],
+  }),
+});
+expect(res1.status).toBe(429);
+
+// Second request → normal response (error auto-removed)
+const res2 = await fetch(`${mock.url}/v1/chat/completions`, { /* same */ });
+expect(res2.status).toBe(200);
+
+ +

Stream Truncation

+

+ Abort a streaming response after a specific number of SSE chunks. Tests that your + application handles partial streams gracefully. +

+ +
+
truncation.test.ts ts
+
mock.on(
+  { userMessage: "long story" },
+  { content: "This is a very long response that will be cut short" },
+  { truncateAfterChunks: 3 }  // Abort after 3 SSE chunks
+);
+
+ +

Timed Disconnect

+

+ Disconnect after a specified number of milliseconds. Simulates network timeouts and + connection drops. +

+ +
+
disconnect.test.ts ts
+
mock.on(
+  { userMessage: "slow" },
+  { content: "This response will never complete" },
+  { disconnectAfterMs: 100 }  // Kill connection after 100ms
+);
+
+ +

Error Fixtures in JSON

+ +
+
+ fixtures/errors.json json +
+
{
+  "fixtures": [
+    {
+      "match": { "userMessage": "error-test" },
+      "response": {
+        "error": {
+          "message": "Rate limited",
+          "type": "rate_limit_error"
+        },
+        "status": 429
+      }
+    },
+    {
+      "match": { "userMessage": "partial" },
+      "response": { "content": "This gets cut off" },
+      "truncateAfterChunks": 2
+    },
+    {
+      "match": { "userMessage": "timeout" },
+      "response": { "content": "Never finishes" },
+      "disconnectAfterMs": 50
+    }
+  ]
+}
+
+ +

Interruption Behavior

+
    +
  • + truncateAfterChunks — counts SSE data lines sent; aborts on the Nth + chunk +
  • +
  • + disconnectAfterMs — starts a timer when the response begins; kills + the connection when it fires +
  • +
  • If both are set, whichever fires first wins
  • +
  • + Interrupted requests are recorded in the journal with + response.interrupted: true and response.interruptReason +
  • +
+ +
+

+ nextRequestError() is one-shot: it fires once and auto-removes itself. For + persistent error fixtures, use addFixture() with an error response. +

+
+ +
+

+ See also: Chaos Testing — for + probabilistic failure injection. Chaos testing adds configurable error rates, random + latency spikes, and stream corruption that trigger based on probability rather than + deterministic fixture matching. Use error injection for specific, reproducible failure + scenarios; use chaos testing for resilience testing under unpredictable conditions. +

+
+
+
+
+ +
+ + diff --git a/docs/favicon.svg b/docs/favicon.svg new file mode 100644 index 0000000..63285ea --- /dev/null +++ b/docs/favicon.svg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a218f7047973946fe28120c9209e2873144118d5b5a7e2ea9e7aa4c407559fb +size 3265 diff --git a/docs/fixtures.html b/docs/fixtures.html new file mode 100644 index 0000000..0a13382 --- /dev/null +++ b/docs/fixtures.html @@ -0,0 +1,330 @@ + + + + + + Fixtures — llmock + + + + + + + + +
+ + +
+

Fixtures

+

+ Fixtures define what the mock server returns. Each fixture has a + match criteria and a response. Load them from JSON files, + register them programmatically, or mix both approaches. +

+ +

File Format

+ +
+
+ fixtures/example.json json +
+
{
+  "fixtures": [
+    {
+      "match": {
+        "userMessage": "hello",
+        "model": "gpt-4"
+      },
+      "response": {
+        "content": "Hello!"
+      },
+      "latency": 200,
+      "chunkSize": 10
+    }
+  ]
+}
+
+ +

Match Fields

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
userMessagestring | RegExpSubstring or regex match on the last user message
inputTextstring | RegExpMatch on embedding input text
toolCallIdstringMatch on tool_call_id in the last message
toolNamestringMatch on tool function name
modelstring | RegExpMatch on the requested model name
responseFormatstringMatch on response_format.type (e.g. "json_object")
sequenceIndexnumberMatch on the Nth occurrence of this pattern
predicatefunctionCustom function: (req) => boolean (programmatic only)
+ +

Response Types

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TypeFieldsDescription
Textcontent, role?, finishReason?Plain text response
Tool CalltoolCalls[], finishReason?Function call(s) with name + arguments
Errorerror.message, error.type?, status?Error response with HTTP status
Embeddingembedding[]Vector of numbers
+ +

Fixture Options

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
latencynumberMilliseconds delay before first chunk
chunkSizenumberCharacters per SSE chunk (streaming)
truncateAfterChunksnumberAbort stream after N chunks (error injection)
disconnectAfterMsnumberDisconnect after N ms (error injection)
streamingProfileobject + Streaming physics profile: { ttftMs, tps, jitter }. See + Streaming Physics +
chaosobject + Per-fixture chaos config: { errorRate, latencyMs, ... }. See + Chaos Testing +
+ +

Loading Fixtures

+ +

From a file

+
+
load-file.ts ts
+
const mock = new LLMock();
+mock.loadFixtureFile("./fixtures/chat.json");
+mock.loadFixtureFile("./fixtures/tools.json");
+
+ +

From a directory

+
+
load-dir.ts ts
+
// Loads all .json files in the directory (non-recursive)
+mock.loadFixtureDir("./fixtures");
+
+ +

Programmatically

+
+
programmatic.ts ts
+
// Shorthand methods
+mock.onMessage("hello", { content: "Hi!" });
+mock.onToolCall("get_weather", { content: "72F" });
+mock.onEmbedding("my text", { embedding: [0.1, 0.2] });
+mock.onJsonOutput("data", { key: "value" });
+mock.onToolResult("call_123", { content: "Done" });
+
+// Full fixture object
+mock.addFixture({
+  match: { userMessage: "hello", model: "gpt-4" },
+  response: { content: "Hi!" },
+  latency: 100,
+  chunkSize: 5,
+});
+
+// Predicate-based routing
+mock.on(
+  { predicate: (req) => req.messages.at(-1)?.role === "tool" },
+  { content: "Done!" }
+);
+
+ +

Routing Rules

+
    +
  • + First match wins — fixtures are checked in registration order +
  • +
  • + All match fields must pass — multiple match fields are AND-ed +
  • +
  • + Substring matchinguserMessage: "hello" matches + "say hello world" +
  • +
  • + Cross-provider — the same fixtures work for OpenAI, Claude, and + Gemini requests +
  • +
+ +
+

+ JSON files cannot use predicate (functions can't be serialized). Use + programmatic registration for predicate-based routing. +

+
+
+
+
+ +
+ + diff --git a/docs/gemini.html b/docs/gemini.html new file mode 100644 index 0000000..b3beeb1 --- /dev/null +++ b/docs/gemini.html @@ -0,0 +1,232 @@ + + + + + + Gemini — llmock + + + + + + + + +
+ + +
+

Google Gemini

+

+ llmock supports both generateContent (non-streaming) and + streamGenerateContent (SSE) endpoints, plus Gemini Live over WebSocket. The + same fixtures drive all three transports. +

+ +

Endpoints

+ + + + + + + + + + + + + + + + + + + + + + + + + +
MethodPathFormat
POST/v1beta/models/:model:generateContentJSON
POST/v1beta/models/:model:streamGenerateContentSSE (data:)
WS/ws/google.ai.generativelanguage.*WebSocket JSON
+ +

Unit Test: Streaming Text

+ +
+
gemini-text.test.ts ts
+
const textFixture = {
+  match: { userMessage: "hello" },
+  response: { content: "Hi there!" },
+};
+
+const instance = await createServer([textFixture]);
+
+const res = await post(
+  `${instance.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent?alt=sse`,
+  {
+    contents: [{ role: "user", parts: [{ text: "hello" }] }],
+  }
+);
+
+// Parse Gemini SSE chunks
+const chunks = res.body.split("\n")
+  .filter(l => l.startsWith("data: "))
+  .map(l => JSON.parse(l.slice(6)));
+
+// Gemini response shape
+expect(chunks[0].candidates[0].content.parts[0].text).toBeDefined();
+
+// Reassemble text
+const text = chunks
+  .map(c => c.candidates[0].content.parts[0].text ?? "")
+  .join("");
+expect(text).toBe("Hi there!");
+
+ +

Unit Test: Tool Call

+ +
+
gemini-tools.test.ts ts
+
const toolFixture = {
+  match: { userMessage: "weather" },
+  response: {
+    toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }]
+  },
+};
+
+const instance = await createServer([toolFixture]);
+
+const res = await post(
+  `${instance.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent?alt=sse`,
+  {
+    contents: [{ role: "user", parts: [{ text: "what is the weather?" }] }],
+  }
+);
+
+const chunks = parseGeminiSSEChunks(res.body);
+const parts = chunks[0].candidates[0].content.parts;
+expect(parts[0].functionCall.name).toBe("get_weather");
+
+ +

Request Translation

+

+ Gemini uses a different request format (contents with parts) + than OpenAI. llmock translates Gemini requests to the unified format via + geminiToCompletionRequest() so the same fixture + match.userMessage works regardless of which provider endpoint the request + arrives on. +

+ +

Gemini Live (WebSocket)

+

+ Gemini Live uses WebSocket at /ws/google.ai.generativelanguage.* for + bidirectional streaming. See the WebSocket APIs page for + details. +

+ +
+

+ Gemini Live text support is unverified against a real model — no text-capable + Gemini Live model existed at time of writing. The implementation follows the API + specification. +

+
+ +

Vertex AI

+

+ Google Cloud's Vertex AI provides access to Gemini models through a + different URL pattern than the AI Studio API. llmock supports Vertex AI requests using the + same Gemini handler — the URL pattern is different, but the request and response + formats are identical. +

+

Vertex AI URLs follow the pattern:

+
+
+ Vertex AI URL pattern text +
+
POST /v1/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent
+POST /v1/projects/{project}/locations/{location}/publishers/google/models/{model}:streamGenerateContent
+
+

+ The same fixtures work for both Gemini AI Studio and Vertex AI endpoints. See the + Vertex AI page for configuration details. +

+
+
+
+ +
+ + diff --git a/docs/index.html b/docs/index.html index 39fbb43..bc2e99e 100644 --- a/docs/index.html +++ b/docs/index.html @@ -3,12 +3,14 @@ - mock-openai — Deterministic OpenAI mock server for testing + llmock — Deterministic mock LLM server for testing + + @@ -431,7 +433,7 @@ /* ─── Sections ───────────────────────────────────────────────── */ section { - padding: 6rem 0; + padding: 3rem 0; } .section-label { @@ -468,6 +470,10 @@ margin-top: 3.5rem; } + .features-grid > .feature-card:last-child:nth-child(3n + 1) { + grid-column: 2; + } + .feature-card { padding: 2rem; background: var(--bg-card); @@ -632,18 +638,193 @@ color: var(--warning); } - /* ─── Comparison Table ───────────────────────────────────────── */ - .comparison { + /* ─── Reliability / Drift Detection ─────────────────────────── */ + .triangle-wrapper { + position: relative; + width: 100%; + max-width: 600px; + margin: 3.5rem auto 1rem; + aspect-ratio: 1.3 / 1; + } + .triangle-wrapper svg { + position: absolute; + top: 0; + left: 0; + width: 100%; + height: 100%; + z-index: 0; + } + .tri-node { + position: absolute; + background: var(--bg-card); + border: 2px solid; + border-radius: 12px; + padding: 1rem 1.25rem; + text-align: center; + width: 170px; + z-index: 1; + } + .tri-node h3 { + font-size: 0.95rem; + font-weight: 600; + color: var(--text-primary); + margin-bottom: 0.3rem; + } + .tri-node p { + font-size: 0.75rem; + color: var(--text-secondary); + line-height: 1.4; + } + .tri-node .node-icon { + font-size: 1.5rem; + margin-bottom: 0.5rem; + } + .tri-node.sdk { + border-color: var(--blue); + top: 0; + left: 50%; + transform: translateX(-50%); + } + .tri-node.sdk .node-icon { + color: var(--blue); + } + .tri-node.real { + border-color: var(--accent); + bottom: 0; + left: 0; + } + .tri-node.real .node-icon { + color: var(--accent); + } + .tri-node.mock { + border-color: var(--purple); + bottom: 0; + right: 0; + } + .tri-node.mock .node-icon { + color: var(--purple); + } + .diagnosis-grid { + display: grid; + grid-template-columns: repeat(3, 1fr); + gap: 1rem; + margin-top: 2.5rem; + } + .diagnosis-card { + background: var(--bg-card); + border: 1px solid var(--border); + border-radius: 8px; + padding: 1rem 1.25rem; + } + .diagnosis-card .diag-header { + display: flex; + align-items: center; + gap: 0.5rem; + margin-bottom: 0.4rem; + } + .diagnosis-card .diag-dot { + width: 10px; + height: 10px; + border-radius: 50%; + flex-shrink: 0; + } + .diagnosis-card h4 { + font-size: 0.85rem; + font-weight: 600; + color: var(--text-primary); + } + .diagnosis-card p { + font-size: 0.78rem; + color: var(--text-secondary); + line-height: 1.5; + } + .drift-report { + background: var(--bg-deep); + border: 1px solid var(--border); + border-radius: 8px; + padding: 1.25rem 1.5rem; + margin-top: 2.5rem; + font-family: var(--font-mono); + font-size: 0.75rem; + line-height: 1.8; + color: var(--text-secondary); + overflow-x: auto; + } + .drift-report .report-header { + color: var(--text-primary); + font-weight: 600; + margin-bottom: 0.75rem; + font-size: 0.8rem; + } + .drift-report .severity-critical { + color: var(--error); + } + .drift-report .severity-warning { + color: var(--warning); + } + .drift-report .severity-ok { + color: var(--accent); + } + .drift-report .field-path { + color: var(--blue); + } + .drift-report .drift-label { + color: var(--text-primary); + } + .drift-report .report-summary { + color: var(--text-dim); + } + .drift-report .field-label { + color: var(--text-dim); + } + .drift-report .divider { + border-top: 1px solid var(--border); + margin: 0.6rem 0; + } + .ci-footer { + display: flex; + align-items: center; + gap: 1.5rem; + margin-top: 2rem; + padding-top: 1.5rem; border-top: 1px solid var(--border); } + .ci-badge { + display: inline-flex; + align-items: center; + gap: 0.5rem; + background: var(--bg-card); + border: 1px solid var(--border); + border-radius: 6px; + padding: 0.4rem 0.75rem; + font-size: 0.8rem; + color: var(--text-secondary); + font-family: var(--font-mono); + flex-shrink: 0; + } + .ci-badge .dot { + width: 8px; + height: 8px; + border-radius: 50%; + background: var(--accent); + } + .ci-text { + font-size: 0.9rem; + color: var(--text-secondary); + line-height: 1.6; + } + /* ─── Comparison Table ───────────────────────────────────────── */ + .comparison-table-wrap { + margin-top: 3rem; + } .comparison-table { width: 100%; - margin-top: 3rem; - border-collapse: collapse; + border-collapse: separate; + border-spacing: 0; font-size: 0.9rem; } - .comparison-table th { + .comparison-table thead th { text-align: left; padding: 1rem 1.25rem; font-family: var(--font-mono); @@ -653,10 +834,20 @@ letter-spacing: 0.08em; border-bottom: 2px solid var(--border-bright); color: var(--text-secondary); + position: sticky; + top: 56px; + background: var(--bg-deep); + z-index: 10; } - .comparison-table th:nth-child(2) { + .comparison-table thead th:nth-child(2) { color: var(--accent); } + .comparison-table thead th a { + text-decoration: none; + } + .comparison-table thead th a:hover { + text-decoration: underline; + } .comparison-table td { padding: 0.85rem 1.25rem; border-bottom: 1px solid var(--border); @@ -754,16 +945,6 @@ transform: translateY(0); } } - @keyframes sseLine { - from { - opacity: 0; - transform: translateX(-8px); - } - to { - opacity: 1; - transform: translateX(0); - } - } @keyframes blink { 50% { opacity: 0; @@ -793,6 +974,11 @@ opacity: 1; transform: translateY(0); } + /* Remove transform from comparison section so sticky headers work + (transform creates a new containing block that breaks sticky) */ + .comparison.reveal.visible { + transform: none; + } /* ─── Responsive ─────────────────────────────────────────────── */ @media (max-width: 900px) { @@ -802,6 +988,9 @@ .code-section { grid-template-columns: 1fr; } + .diagnosis-grid { + grid-template-columns: 1fr; + } .comparison-table { font-size: 0.8rem; } @@ -823,6 +1012,10 @@ .nav-links a:not(.gh-link) { display: none; } + .ci-footer { + flex-direction: column; + align-items: flex-start; + } footer .container { flex-direction: column; gap: 1.5rem; @@ -839,13 +1032,15 @@