Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions src/components/Sidebar.astro
Original file line number Diff line number Diff line change
Expand Up @@ -94,18 +94,18 @@ const isApiTab = activeTab?.tab === 'API';

function inferApiMethod(title: string): { method: string; css: string } | null {
const t = title.toLowerCase();
if (/\b(delete|remove)\b/.test(t)) {
return { method: 'DELETE', css: 'api-method-delete' };
}
if (/\b(update|edit|apply|restore)\b/.test(t)) {
return { method: 'PATCH', css: 'api-method-patch' };
}
if (/\b(list|get|retrieve|health|find|export|progress|analytics|agreement|compare|stats|summary|voices|tts)\b/.test(t)) {
return { method: 'GET', css: 'api-method-get' };
}
if (/\b(create|add|generate|execute|submit|assign|bulk|complete|skip|release|pause|unpause|check|upload|start|duplicate|fetch|run|rerun|cancel|clone|merge)\b/.test(t)) {
return { method: 'POST', css: 'api-method-post' };
}
if (/\b(delete|remove)\b/.test(t)) {
return { method: 'DEL', css: 'api-method-delete' };
}
if (/\b(update|edit|apply|restore)\b/.test(t)) {
return { method: 'PATCH', css: 'api-method-patch' };
}
return null;
}
---
Expand Down
15 changes: 0 additions & 15 deletions src/lib/api-navigation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,6 @@ export const apiNavigation: ApiNavGroup[] = [
"title": "Add columns to a scenario",
"href": "/docs/api/scenarios/addcolumns",
"method": "POST"
},
{
"title": "Add empty rows to a scenario",
"href": "/docs/api/scenarios/addemptyrowstodataset",
"method": "POST"
}
]
},
Expand Down Expand Up @@ -187,11 +182,6 @@ export const apiNavigation: ApiNavGroup[] = [
"href": "/docs/api/run-tests/executeruntest",
"method": "POST"
},
{
"title": "Update test run components",
"href": "/docs/api/run-tests/updatetestcomponents",
"method": "PATCH"
},
{
"title": "Get test executions",
"href": "/docs/api/run-tests/gettestexecutions",
Expand All @@ -202,11 +192,6 @@ export const apiNavigation: ApiNavGroup[] = [
"href": "/docs/api/run-tests/gettestscenarios",
"method": "GET"
},
{
"title": "Get call executions for a test run",
"href": "/docs/api/run-tests/getcallexecutions",
"method": "GET"
},
{
"title": "Get evaluation summary",
"href": "/docs/api/run-tests/getevalsummary",
Expand Down
3 changes: 0 additions & 3 deletions src/lib/navigation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1051,7 +1051,6 @@ export const tabNavigation: NavTab[] = [
{ title: 'Delete Scenario', href: '/docs/api/scenarios/deletescenario' },
{ title: 'Add Rows with AI', href: '/docs/api/scenarios/addscenariorowswithai' },
{ title: 'Add Columns', href: '/docs/api/scenarios/addcolumns' },
{ title: 'Add Empty Rows', href: '/docs/api/scenarios/addemptyrowstodataset' },
]
},
{
Expand Down Expand Up @@ -1092,10 +1091,8 @@ export const tabNavigation: NavTab[] = [
{ title: 'Get Test Run Details', href: '/docs/api/run-tests/getruntestdetails' },
{ title: 'Delete Test Run', href: '/docs/api/run-tests/deleteruntest' },
{ title: 'Execute Run Test', href: '/docs/api/run-tests/executeruntest' },
{ title: 'Update Components', href: '/docs/api/run-tests/updatetestcomponents' },
{ title: 'Get Test Executions', href: '/docs/api/run-tests/gettestexecutions' },
{ title: 'Get Test Scenarios', href: '/docs/api/run-tests/gettestscenarios' },
{ title: 'Get Call Executions', href: '/docs/api/run-tests/getcallexecutions' },
{ title: 'Get Eval Summary', href: '/docs/api/run-tests/getevalsummary' },
{ title: 'Compare Eval Summaries', href: '/docs/api/run-tests/compareevalsummaries' },
{ title: 'Add Eval Configs', href: '/docs/api/run-tests/addevalconfigs' },
Expand Down
1 change: 0 additions & 1 deletion src/lib/redirects.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ export const redirectMap: Record<string, string> = {
'/api-reference/prompt-workbench/get-prompt-version-by-name': '/docs/api',
'/api-reference/run-tests/create-a-new-test-run': '/docs/api/run-tests/createruntest',
'/api-reference/run-tests/execute-a-test-run': '/docs/api/run-tests/executeruntest',
'/api-reference/scenarios/add-empty-rows-to-a-scenario': '/docs/api/scenarios/addemptyrowstodataset',
'/api-reference/scenarios/add-rows-to-a-scenario-using-ai': '/docs/api/scenarios/addscenariorowswithai',
'/api-reference/scenarios/edit-a-scenario': '/docs/api/scenarios/editscenario',
'/api-reference/scenarios/generate-or-create-a-scenario': '/docs/api/scenarios/createscenario',
Expand Down
87 changes: 49 additions & 38 deletions src/pages/docs/api/run-tests/addevalconfigs.mdx
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
title: "Add Eval Configs to Run Test — API"
description: "Add one or more evaluation configurations to an existing test run. Specify template, name, config mapping, model, and error localizer per config. Returns created eval config objects."
title: "Add evaluation configurations"
description: "Adds evaluation configurations to a test run."
---

<ApiPlayground
Expand All @@ -10,8 +10,25 @@ description: "Add one or more evaluation configurations to an existing test run.
parameters={[
{"name": "run_test_id", "in": "path", "required": true, "description": "UUID of the test run to add evaluation configurations to.", "type": "string"}
]}
requestBody={{"evaluations_config": [{"template_id": "your-template-id", "name": "My Eval Config", "config": {}, "mapping": {}, "error_localizer": false, "model": "turing_large"}]}}
responseExample={{"message": "Evaluation configs added successfully", "created_eval_configs": [{"id": "uuid", "name": "My Eval Config", "config": {}, "mapping": {}, "filters": {}, "error_localizer": false, "model": "turing_large", "status": "pending", "template_id": "your-template-id"}], "run_test_id": "uuid"}}
requestBody={{"evaluations_config": [{"template_id": "your-template-id", "name": "My Eval Config", "config": {}, "mapping": {}, "filters": {}, "error_localizer": false, "model": "turing_large"}]}}
responseExample={{
message: "Successfully added 1 evaluation config(s) to run test",
created_eval_configs: [
{
id: "ec1b2c3d-e5f6-7890-abcd-ef1234567890",
name: "My Eval Config",
config: {},
mapping: {},
filters: {},
error_localizer: false,
model: "turing_large",
status: null,
eval_group: null,
template_id: "your-template-uuid"
}
],
run_test_id: "f7a8b9c0-d1e2-3456-789a-bcdef0123456"
}}
responseStatus={201}
responseStatusText="Created"
/>
Expand All @@ -33,65 +50,59 @@ description: "Add one or more evaluation configurations to an existing test run.

<ApiSection title="Request body">
<ParamField body="evaluations_config" type="array of objects" required>
Array of evaluation configuration objects.
Array of evaluation configuration objects. Each object supports the following fields:

<ApiCollapsible title="Show 7 properties">
<ResponseField name="template_id" type="string" required>
UUID of the evaluation template to use.
</ResponseField>
- **`template_id`** (string, UUID, required) -- UUID of the evaluation template to use.

<ResponseField name="name" type="string" required>
Name for this evaluation configuration. Must be unique within the test run.
</ResponseField>
- **`name`** (string, optional) -- Name for this evaluation configuration. Defaults to `Eval-<template_id>` if omitted. Must be unique within the test run.

<ResponseField name="config" type="object">
Template-specific configuration parameters.
</ResponseField>
- **`config`** (object, optional) -- Template-specific configuration parameters.

<ResponseField name="mapping" type="object">
Maps test execution data fields to the evaluation template's expected inputs.
</ResponseField>
- **`mapping`** (object, optional) -- Maps test execution data fields to the evaluation template's expected inputs.

<ResponseField name="filters" type="object">
Filter criteria to restrict which test results are evaluated.
</ResponseField>
- **`filters`** (object, optional) -- Filter criteria to restrict which test results are evaluated.

<ResponseField name="error_localizer" type="boolean">
Enables granular error localization on evaluation failures. Defaults to `false`.
</ResponseField>
- **`error_localizer`** (boolean, optional) -- Enables granular error localization on evaluation failures. Defaults to `false`.

<ResponseField name="model" type="string">
Model to use for running this evaluation.
</ResponseField>
</ApiCollapsible>
- **`model`** (string, optional) -- Model to use for running this evaluation.
</ParamField>
</ApiSection>

<ApiSection title="Response" status={201} statusText="Created">
<ResponseField name="message" type="string">Confirmation of successful addition.</ResponseField>

<ResponseField name="message" type="string">Confirmation message indicating how many evaluation configs were added.</ResponseField>
<ResponseField name="created_eval_configs" type="array of objects">
Array of the newly created evaluation config objects. Each object contains: `id`, `name`, `config`, `mapping`, `filters`, `error_localizer`, `model`, `status`, `template_id`.
</ResponseField>

<ResponseField name="run_test_id" type="string">UUID of the test run the configs were added to.</ResponseField>

<ResponseField name="warnings" type="array of strings" optional>
Present only when some configs in the request failed to create while others succeeded. Lists per-item error messages.
Array of created evaluation configuration objects. Each object contains: `id`, `name`, `config`, `mapping`, `filters`, `error_localizer`, `model`, `status`, `eval_group`, and `template_id`.
</ResponseField>
<ResponseField name="run_test_id" type="string">UUID of the parent test run.</ResponseField>
<ResponseField name="warnings" type="array of strings">Non-fatal issues encountered while processing individual configs. Only present if partial failures occurred.</ResponseField>
</ApiSection>

<ApiSection title="Errors">
<ParamField name="400" type="Bad Request">
Invalid or missing fields such as a non-existent `template_id`, duplicate `name`, or malformed `config`/`mapping`.
Validation error. Common causes: empty `evaluations_config`, duplicate `name` within request, name already exists in test run, non-existent `template_id`.
```json
{
"evaluations_config": ["Duplicate eval name 'My Eval Config' found in the request. Each evaluation config must have a unique name."]
}
```
Or for existing name conflict:
```json
{"error": "An evaluation config with the name 'My Eval Config' already exists in this run test. Please use a different name."}
```
</ParamField>
<ParamField name="401" type="Unauthorized">
Missing or invalid `X-Api-Key` or `X-Secret-Key` headers.
</ParamField>
<ParamField name="404" type="Not Found">
No test run found with the specified `run_test_id`.
```json
{"detail": "No RunTest matches the given query."}
```
</ParamField>
<ParamField name="500" type="Internal Server Error">
Unexpected server error.
```json
{"error": "Failed to add evaluation configs: <message>"}
```
</ParamField>
</ApiSection>
35 changes: 30 additions & 5 deletions src/pages/docs/api/run-tests/compareevalsummaries.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ description: "Compare evaluation summaries side-by-side across multiple test exe
{"name": "run_test_id", "in": "path", "required": true, "description": "UUID of the test run containing the executions to compare.", "type": "string"},
{"name": "execution_ids", "in": "query", "required": false, "description": "JSON-encoded array of test execution UUIDs to compare.", "type": "string"}
]}
responseExample={{"execution-uuid-1": {"evaluations": [{"name": "Tone Check", "average_score": 0.85}]}, "execution-uuid-2": {"evaluations": [{"name": "Tone Check", "average_score": 0.92}]}}}
responseExample={{
"execution-uuid-1": [{"name": "Tone Check", "average_score": 0.85, "total_runs": 10, "passed": 8, "failed": 2}],
"execution-uuid-2": [{"name": "Tone Check", "average_score": 0.92, "total_runs": 10, "passed": 9, "failed": 1}]
}}
responseStatus={200}
responseStatusText="OK"
/>
Expand All @@ -33,25 +36,47 @@ description: "Compare evaluation summaries side-by-side across multiple test exe

<ApiSection title="Query parameters">
<ParamField query="execution_ids" type="string" required>
JSON-encoded array of test execution UUIDs to compare. Must be URL-encoded.
JSON-encoded array of test execution UUIDs to compare. Must be URL-encoded. Example: `["uuid1","uuid2"]`.
</ParamField>
</ApiSection>

<ApiSection title="Response" status={200} statusText="OK">
<ResponseField name="comparison" type="object">Dictionary keyed by execution ID, each mapping to its evaluation summary metrics.</ResponseField>
<ResponseField name="(execution_id)" type="object">
Dictionary keyed by execution UUID. Each value is an array of evaluation summary objects for that execution.
</ResponseField>
<ApiCollapsible title="Show evaluation summary object properties">
<ResponseField name="name" type="string">Name of the evaluation configuration.</ResponseField>
<ResponseField name="average_score" type="number">Average score across all evaluated calls.</ResponseField>
<ResponseField name="total_runs" type="integer">Total evaluation runs for this config.</ResponseField>
<ResponseField name="passed" type="integer">Number of passing evaluations.</ResponseField>
<ResponseField name="failed" type="integer">Number of failing evaluations.</ResponseField>
</ApiCollapsible>
</ApiSection>

<ApiSection title="Errors">
<ParamField name="400" type="Bad Request">
Missing, malformed, or invalid `execution_ids` parameter.
Missing, malformed, or empty `execution_ids` parameter.
```json
{"execution_ids": ["execution_ids must be valid JSON"]}
```
Or when empty:
```json
{"execution_ids": ["execution_ids list is required"]}
```
</ParamField>
<ParamField name="401" type="Unauthorized">
Missing or invalid `X-Api-Key` or `X-Secret-Key` headers.
</ParamField>
<ParamField name="404" type="Not Found">
No test run found with the specified `run_test_id`.
```json
{"error": "RunTest not found."}
```
</ParamField>
<ParamField name="500" type="Internal Server Error">
Unexpected server error. Retry later or contact support.
Unexpected server error.
```json
{"error": "Unable to fetch eval summary"}
```
</ParamField>
</ApiSection>
Loading
Loading