diff --git a/api/oss/src/core/evaluators/service.py b/api/oss/src/core/evaluators/service.py index 3429c921d2..28fdef61a0 100644 --- a/api/oss/src/core/evaluators/service.py +++ b/api/oss/src/core/evaluators/service.py @@ -46,6 +46,7 @@ EvaluatorRevisionCommit, EvaluatorRevisionQuery, ) +from oss.src.core.evaluators.utils import build_evaluator_data from oss.src.core.shared.dtos import Reference from oss.src.utils.logging import get_module_logger @@ -774,6 +775,83 @@ def __init__( ): self.evaluators_service = evaluators_service + @staticmethod + def _extract_builtin_evaluator_key( + simple_evaluator_data: Optional[SimpleEvaluatorData], + ) -> Optional[str]: + uri = simple_evaluator_data.uri if simple_evaluator_data else None + + if not uri: + return None + + parts = uri.split(":") + + if len(parts) < 4: + return None + + if parts[0] != "agenta" or parts[1] != "builtin": + return None + + return parts[2] or None + + @staticmethod + def _has_outputs_schema( + simple_evaluator_data: Optional[SimpleEvaluatorData], + ) -> bool: + if not simple_evaluator_data or not isinstance( + simple_evaluator_data.schemas, dict + ): + return False + + return bool(simple_evaluator_data.schemas.get("outputs")) + + def _ensure_builtin_evaluator_data( + self, + simple_evaluator_data: Optional[SimpleEvaluatorData], + ) -> Optional[SimpleEvaluatorData]: + evaluator_key = self._extract_builtin_evaluator_key(simple_evaluator_data) + + if not evaluator_key: + return simple_evaluator_data + + if self._has_outputs_schema(simple_evaluator_data): + return simple_evaluator_data + + settings_values = ( + simple_evaluator_data.parameters + if simple_evaluator_data + and isinstance(simple_evaluator_data.parameters, dict) + else None + ) + + hydrated_data = build_evaluator_data( + evaluator_key=evaluator_key, + settings_values=settings_values, + ) + + hydrated_data_dict = hydrated_data.model_dump( + mode="json", + exclude_none=True, + exclude_unset=True, + ) + + existing_data_dict = ( + simple_evaluator_data.model_dump( + mode="json", + exclude_none=True, + exclude_unset=True, + ) + if simple_evaluator_data + else {} + ) + + return SimpleEvaluatorData( + **{ + **hydrated_data_dict, + **existing_data_dict, + } + ) + # public ------------------------------------------------------------------- async def create( @@ -864,6 +942,10 @@ async def create( evaluator_revision_slug = uuid4().hex[-12:] + hydrated_simple_evaluator_data = self._ensure_builtin_evaluator_data( + simple_evaluator_create.data, + ) + evaluator_revision_commit = EvaluatorRevisionCommit( slug=evaluator_revision_slug, # @@ -905,7 +987,7 @@ async def create( tags=evaluator_create.tags, meta=evaluator_create.meta, # - data=simple_evaluator_create.data, + data=hydrated_simple_evaluator_data, # evaluator_id=evaluator.id, evaluator_variant_id=evaluator_variant.id, @@ -1150,6 +1232,10 @@ async def edit( evaluator_revision_slug = uuid4().hex[-12:] + hydrated_simple_evaluator_data = self._ensure_builtin_evaluator_data( + simple_evaluator_edit.data, + ) + evaluator_revision_commit = EvaluatorRevisionCommit( slug=evaluator_revision_slug, # @@ -1160,7 +1246,7 @@ async def edit( tags=evaluator_edit.tags, meta=evaluator_edit.meta, # - data=simple_evaluator_edit.data, + data=hydrated_simple_evaluator_data, # evaluator_id=evaluator.id, evaluator_variant_id=evaluator_variant.id, diff --git a/api/oss/src/models/api/evaluation_model.py b/api/oss/src/models/api/evaluation_model.py index 6415402794..f26f109bed 100644 --- a/api/oss/src/models/api/evaluation_model.py +++ b/api/oss/src/models/api/evaluation_model.py @@ -14,6 +14,7 @@ class LegacyEvaluator(BaseModel): direct_use: bool settings_presets: Optional[list[dict]] = None settings_template: dict + outputs_schema: Optional[Dict[str, Any]] = None description: Optional[str] = None oss: Optional[bool] = False requires_llm_api_keys: Optional[bool] = False diff --git a/api/oss/src/resources/evaluators/evaluators.py b/api/oss/src/resources/evaluators/evaluators.py index 392b23be45..b1762d2f1e 100644 --- a/api/oss/src/resources/evaluators/evaluators.py +++ b/api/oss/src/resources/evaluators/evaluators.py @@ -1,3 +1,6 @@ +from copy import deepcopy + + rag_evaluator_settings_template = { "question_key": { "label": "Question Key", @@ -832,6 +835,76 @@ ] +_SUCCESS_ONLY_OUTPUT_SCHEMA = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "success": {"type": "boolean"}, + }, + "required": ["success"], + "additionalProperties": False, +} + +_SCORE_AND_SUCCESS_OUTPUT_SCHEMA = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "score": {"type": "number"}, + "success": {"type": "boolean"}, + }, + "required": [], + "additionalProperties": False, +} + +_FIXED_OUTPUT_SCHEMA_BY_KEY = { + "auto_custom_code_run": _SCORE_AND_SUCCESS_OUTPUT_SCHEMA, + "field_match_test": _SUCCESS_ONLY_OUTPUT_SCHEMA, + "auto_json_diff": _SCORE_AND_SUCCESS_OUTPUT_SCHEMA, + "auto_semantic_similarity": _SCORE_AND_SUCCESS_OUTPUT_SCHEMA, + "auto_webhook_test": _SCORE_AND_SUCCESS_OUTPUT_SCHEMA, + "auto_exact_match": _SUCCESS_ONLY_OUTPUT_SCHEMA, + "auto_contains_json": _SUCCESS_ONLY_OUTPUT_SCHEMA, + "auto_similarity_match": _SCORE_AND_SUCCESS_OUTPUT_SCHEMA, + "auto_regex_test": _SUCCESS_ONLY_OUTPUT_SCHEMA, + "auto_starts_with": _SUCCESS_ONLY_OUTPUT_SCHEMA, + "auto_ends_with": _SUCCESS_ONLY_OUTPUT_SCHEMA, + "auto_contains": _SUCCESS_ONLY_OUTPUT_SCHEMA, + "auto_contains_any": _SUCCESS_ONLY_OUTPUT_SCHEMA, + "auto_contains_all": _SUCCESS_ONLY_OUTPUT_SCHEMA, + "auto_levenshtein_distance": _SCORE_AND_SUCCESS_OUTPUT_SCHEMA, +} + + +def _extract_auto_ai_critique_default_outputs_schema(): + for evaluator in evaluators: + if evaluator.get("key") != "auto_ai_critique": + continue + + settings_template = evaluator.get("settings_template") or {} + json_schema_field = settings_template.get("json_schema") or {} + default_value = json_schema_field.get("default") or {} + schema = default_value.get("schema") + + return deepcopy(schema) if isinstance(schema, dict) else None + + return None + + +_auto_ai_critique_outputs_schema = _extract_auto_ai_critique_default_outputs_schema() +if _auto_ai_critique_outputs_schema is not None: + _FIXED_OUTPUT_SCHEMA_BY_KEY["auto_ai_critique"] = _auto_ai_critique_outputs_schema + +for evaluator in evaluators: + evaluator_key = evaluator.get("key") + outputs_schema = ( + _FIXED_OUTPUT_SCHEMA_BY_KEY.get(evaluator_key) + if isinstance(evaluator_key, str) + else None + ) + if outputs_schema is not None: + evaluator["outputs_schema"] = deepcopy(outputs_schema) + + def get_all_evaluators(): """ Returns a list of evaluators diff --git a/docs/design/migrate-evaluator-playground/current-system.md b/docs/design/migrate-evaluator-playground/current-system.md index 7797d76ec4..cf152b70fb 100644 --- a/docs/design/migrate-evaluator-playground/current-system.md +++ b/docs/design/migrate-evaluator-playground/current-system.md @@ -90,27 +90,26 @@ evaluatorByKeyAtomFamily // Find evaluator by key #### Evaluators Service (`/web/oss/src/services/evaluators/index.ts`) ```typescript -// Evaluator Templates (legacy) +// Evaluator Templates fetchAllEvaluators() // GET /evaluators -// Evaluator Configs (legacy) -fetchAllEvaluatorConfigs() // GET /evaluators/configs -createEvaluatorConfig() // POST /evaluators/configs -updateEvaluatorConfig() // PUT /evaluators/configs/{id} -deleteEvaluatorConfig() // DELETE /evaluators/configs/{id} +// Evaluator Configs +fetchAllEvaluatorConfigs() // POST /preview/simple/evaluators/query +createEvaluatorConfig() // POST /preview/simple/evaluators/ +updateEvaluatorConfig() // PUT /preview/simple/evaluators/{id} +deleteEvaluatorConfig() // POST /preview/simple/evaluators/{id}/archive -// Custom/Human Evaluators (new) +// Custom/Human Evaluators createEvaluator() // POST /preview/simple/evaluators/ updateEvaluator() // PUT /preview/simple/evaluators/{id} fetchEvaluatorById() // GET /preview/simple/evaluators/{id} deleteHumanEvaluator() // POST /preview/simple/evaluators/{id}/archive ``` -#### Evaluator Run Service (`/web/oss/src/services/evaluations/api_ee/index.ts`) +#### Evaluator Run Service (`/web/oss/src/services/workflows/invoke.ts`) ```typescript -createEvaluatorDataMapping() // POST /evaluators/map -createEvaluatorRunExecution() // POST /evaluators/{key}/run +invokeEvaluator() // POST /preview/workflows/invoke ``` ## Data Flow @@ -130,7 +129,7 @@ createEvaluatorRunExecution() // POST /evaluators/{key}/run │ /evaluators → EvaluatorsRegistry │ │ ├─ Uses useEvaluatorsRegistryData() hook │ │ │ ├─ Calls fetchAllEvaluators() → GET /evaluators │ -│ │ └─ Calls fetchAllEvaluatorConfigs() → GET /evaluators/configs │ +│ │ └─ Calls fetchAllEvaluatorConfigs() → POST /preview/simple/evaluators/query │ │ │ │ │ ├─ "Create new" → SelectEvaluatorModal → /evaluators/configure/new │ │ └─ Click row → /evaluators/configure/{id} │ @@ -153,64 +152,68 @@ createEvaluatorRunExecution() // POST /evaluators/{key}/run │ └─────────────────────────────┘ └─────────────────────────────┘ │ │ │ │ Commit Actions: │ -│ - Create: POST /evaluators/configs → createEvaluatorConfig() │ -│ - Update: PUT /evaluators/configs/{id} → updateEvaluatorConfig() │ +│ - Create: POST /preview/simple/evaluators → createEvaluatorConfig() │ +│ - Update: PUT /preview/simple/evaluators/{id} → updateEvaluatorConfig() │ │ │ │ Test Actions: │ │ - Run Variant: callVariant() → POST to variant URL │ -│ - Run Evaluator: createEvaluatorRunExecution() │ -│ → POST /evaluators/{key}/run │ +│ - Run Evaluator: invokeEvaluator() │ +│ → POST /preview/workflows/invoke │ └─────────────────────────────────────────────────────────────────────────────┘ ``` ## Current API Endpoints Used -### Legacy Endpoints (to be migrated) +### Evaluator Templates | Endpoint | Method | Frontend Function | Purpose | |----------|--------|-------------------|---------| | `/evaluators/` | GET | `fetchAllEvaluators()` | List evaluator templates | -| `/evaluators/configs/` | GET | `fetchAllEvaluatorConfigs()` | List evaluator configs | -| `/evaluators/configs/` | POST | `createEvaluatorConfig()` | Create new config | -| `/evaluators/configs/{id}/` | PUT | `updateEvaluatorConfig()` | Update existing config | -| `/evaluators/configs/{id}/` | DELETE | `deleteEvaluatorConfig()` | Delete config | -### Endpoints That Remain Unchanged +### Evaluator CRUD | Endpoint | Method | Frontend Function | Purpose | |----------|--------|-------------------|---------| -| `/evaluators/map/` | POST | `createEvaluatorDataMapping()` | Map trace data for RAG evaluators | -| `/evaluators/{key}/run/` | POST | `createEvaluatorRunExecution()` | Run evaluator (test) | +| `/preview/simple/evaluators/query` | POST | `fetchAllEvaluatorConfigs()` | List evaluator configs | +| `/preview/simple/evaluators/` | POST | `createEvaluatorConfig()` | Create evaluator config | +| `/preview/simple/evaluators/{id}` | PUT | `updateEvaluatorConfig()` | Update evaluator config | +| `/preview/simple/evaluators/{id}/archive` | POST | `deleteEvaluatorConfig()` | Archive evaluator config | -### Already Using New Endpoints (for custom evaluators) +### Evaluator Run (Playground) | Endpoint | Method | Frontend Function | Purpose | |----------|--------|-------------------|---------| -| `/preview/simple/evaluators/` | POST | `createEvaluator()` | Create custom evaluator | -| `/preview/simple/evaluators/{id}` | PUT | `updateEvaluator()` | Update custom evaluator | -| `/preview/simple/evaluators/{id}` | GET | `fetchEvaluatorById()` | Fetch evaluator by ID | -| `/preview/simple/evaluators/{id}/archive` | POST | `deleteHumanEvaluator()` | Archive human evaluator | +| `/preview/workflows/invoke` | POST | `invokeEvaluator()` | Run evaluator using workflow invocation | ## Data Types -### Current EvaluatorConfig (Legacy) +### Current Evaluator Config ```typescript -interface EvaluatorConfig { +interface SimpleEvaluator { id: string - evaluator_key: string - name: string - settings_values: Record + slug: string + name?: string + description?: string + tags?: string[] + flags?: { + is_custom?: boolean + is_evaluator?: boolean + is_human?: boolean + } + data?: { + uri?: string + parameters?: Record + schemas?: { + outputs?: Record + } + } created_at: string updated_at: string - color?: string - tags?: string[] - // Frontend additions - icon_url?: string | StaticImageData } ``` -### Current Evaluator Template (Legacy) +### Current Evaluator Template ```typescript interface Evaluator { @@ -218,6 +221,7 @@ interface Evaluator { key: string settings_presets?: SettingsPreset[] settings_template: Record + outputs_schema?: Record icon_url?: string | StaticImageData color?: string direct_use?: boolean diff --git a/docs/design/migrate-evaluator-playground/new-endpoints.md b/docs/design/migrate-evaluator-playground/new-endpoints.md index 05231c4813..95bb37622c 100644 --- a/docs/design/migrate-evaluator-playground/new-endpoints.md +++ b/docs/design/migrate-evaluator-playground/new-endpoints.md @@ -195,6 +195,20 @@ class SimpleEvaluatorData: configuration: Optional[dict] ``` +### Output schema behavior + +Frontend now sends `data.schemas.outputs` when the evaluator output shape is known at configure +time. + +Schema source by evaluator type: +- fixed evaluators: `outputs_schema` from `GET /evaluators` +- `auto_ai_critique`: `parameters.json_schema.schema` +- `json_multi_field_match`: derived from configured `fields` +- evaluators with no known template schema: omit `data.schemas.outputs` + +Backend builtin hydration remains as a fallback and can still fill missing schema fields for +builtin URIs. + ### URI-based Handler Registry The SDK maintains registries that map URIs to implementations: @@ -259,6 +273,8 @@ Response: SimpleEvaluatorsResponse } ``` +**Note:** For the Evaluator Registry (automatic configs), pass `flags.is_human = false` and `include_archived = false` so archived or human evaluators don't show up. + ### Create Evaluator Config **Old:** @@ -284,7 +300,7 @@ Request: SimpleEvaluatorCreateRequest evaluator: { slug: string # Generated from name name: string - flags: { is_evaluator: true } + flags: { is_evaluator: true, is_human: false } data: { uri: "agenta:builtin:{evaluator_key}:v0" parameters: object # settings_values @@ -300,6 +316,8 @@ Response: SimpleEvaluatorResponse } ``` +**Note:** Workflow slugs are unique per project. We append a short random suffix when generating slugs to avoid collisions when names repeat. + ### Update Evaluator Config **Old:** @@ -333,6 +351,8 @@ Request: SimpleEvaluatorEditRequest Response: SimpleEvaluatorResponse ``` +**Note:** `SimpleEvaluatorEdit.data` is treated as the full revision payload. When updating, include the existing `data.uri` (and any schemas) along with `data.parameters` to avoid clearing the URI. + ### Delete Evaluator Config **Old:** diff --git a/docs/design/migrate-evaluator-playground/plan.md b/docs/design/migrate-evaluator-playground/plan.md index a234ec2111..44fdb10404 100644 --- a/docs/design/migrate-evaluator-playground/plan.md +++ b/docs/design/migrate-evaluator-playground/plan.md @@ -113,6 +113,13 @@ export interface SimpleEvaluatorsResponse { **File:** `web/oss/src/services/evaluators/index.ts` +Output schema ownership for create and edit: + +- If evaluator template includes `outputs_schema`, send it as `data.schemas.outputs` +- If evaluator is `auto_ai_critique`, derive from `parameters.json_schema.schema` +- If evaluator is `json_multi_field_match`, derive from `parameters.fields` +- If evaluator has no known schema, omit `data.schemas.outputs` + Replace legacy functions with new implementations: ```typescript @@ -139,14 +146,17 @@ export function buildEvaluatorUri(evaluatorKey: string): string { } /** - * Generate slug from name + * Generate slug from name (append suffix to avoid collisions) */ export function generateSlug(name: string): string { - return name + const base = name .toLowerCase() .replace(/[^a-z0-9]+/g, "-") .replace(/^-|-$/g, "") - .substring(0, 50) // limit length + + const suffix = Math.random().toString(36).slice(2, 8) + const maxBaseLength = Math.max(1, 50 - suffix.length - 1) + return `${base.slice(0, maxBaseLength)}-${suffix}` } // ============ CRUD Functions ============ @@ -162,7 +172,10 @@ export const fetchAllEvaluatorConfigs = async ( const response = await axios.post( `${getAgentaApiUrl()}/preview/simple/evaluators/query?project_id=${projectId}`, - { evaluator: { flags: { is_evaluator: true } } } + { + evaluator: { flags: { is_evaluator: true, is_human: false } }, + include_archived: false, + } ) return response.data?.evaluators || [] @@ -178,7 +191,7 @@ export const createEvaluatorConfig = async ( const payload: SimpleEvaluatorCreate = { slug: generateSlug(name), name, - flags: { is_evaluator: true }, + flags: { is_evaluator: true, is_human: false }, data: { uri: buildEvaluatorUri(evaluatorKey), parameters: settingsValues, @@ -199,15 +212,21 @@ export const createEvaluatorConfig = async ( export const updateEvaluatorConfig = async ( evaluatorId: string, updates: { name?: string; settingsValues?: Record }, + existing?: SimpleEvaluator, ): Promise => { const {projectId} = getProjectValues() + // IMPORTANT: include existing data (uri/schemas) when editing const payload: SimpleEvaluatorEdit = { id: evaluatorId, - name: updates.name, - data: updates.settingsValues - ? { parameters: updates.settingsValues } - : undefined, + name: updates.name ?? existing?.name, + data: { + ...(existing?.data ?? {}), + ...(updates.settingsValues ? {parameters: updates.settingsValues} : {}), + }, + tags: existing?.tags, + meta: existing?.meta, + flags: existing?.flags, } const response = await axios.put( @@ -338,7 +357,7 @@ form.setFieldsValue({ settings_values: editEvalEditValues.settings_values, }) -// After +// After (use parameters field to match SimpleEvaluator) form.setFieldsValue({ name: simpleEvaluator.name, parameters: simpleEvaluator.data?.parameters, @@ -354,7 +373,7 @@ Update to work with `SimpleEvaluator[]`: const enrichedEvaluators = evaluators.map((e) => ({ ...e, evaluator_key: extractEvaluatorKeyFromUri(e.data?.uri), - settings_values: e.data?.parameters, // for backward compat in UI + parameters: e.data?.parameters, })) ``` @@ -506,18 +525,21 @@ export interface WorkflowServiceBatchResponse { ```typescript import axios from "@/oss/lib/api/assets/axiosConfig" -import { getAgentaApiUrl } from "@/oss/lib/helpers/utils" -import { getProjectValues } from "@/oss/contexts/project.context" -import { - WorkflowServiceRequest, - WorkflowServiceBatchResponse, - SimpleEvaluator, -} from "@/oss/lib/Types" +import type { SimpleEvaluator } from "@/oss/lib/Types" +import axios from "@/oss/lib/api/assets/axiosConfig" +import { getAgentaApiUrl } from "@/oss/lib/helpers/api" +import { buildEvaluatorUri, resolveEvaluatorKey } from "@/oss/lib/evaluators/utils" +import { getProjectValues } from "@/oss/state/project" + +export interface WorkflowServiceBatchResponse { + status?: { code?: number; message?: string } + data?: { outputs?: any } +} export interface InvokeEvaluatorParams { - evaluator: SimpleEvaluator - inputs: Record // testcase data + any extra inputs - outputs: any // prediction/output from variant + evaluator?: Partial | null + inputs?: Record // testcase data + any extra inputs + outputs?: any // prediction/output from variant parameters?: Record // override settings (optional) } @@ -530,16 +552,12 @@ export const invokeEvaluator = async ( const { projectId } = getProjectValues() const { evaluator, inputs, outputs, parameters } = params - const uri = evaluator.data?.uri - if (!uri) { - throw new Error("Evaluator has no URI configured") - } + const evaluatorKey = resolveEvaluatorKey(evaluator) + const uri = evaluator?.data?.uri || (evaluatorKey ? buildEvaluatorUri(evaluatorKey) : undefined) + if (!uri) throw new Error("Evaluator URI is missing") - const request: WorkflowServiceRequest = { - version: "2025.07.14", - interface: { - uri, - }, + const request = { + interface: { uri }, configuration: { parameters: parameters ?? evaluator.data?.parameters, }, @@ -588,7 +606,7 @@ const runResponse = await createEvaluatorRunExecution( selectedEvaluator.key, { inputs: outputs, - settings: formValues.settings_values, + settings: formValues.parameters, } ) @@ -596,11 +614,8 @@ const runResponse = await createEvaluatorRunExecution( import { invokeEvaluator, mapWorkflowResponseToEvaluatorOutput } from "@/oss/services/workflows/invoke" const workflowResponse = await invokeEvaluator({ - evaluator: simpleEvaluator, // from playground state - inputs: { - ...testcaseData, - prediction: variantOutput, - }, + evaluator: simpleEvaluator ?? { data: { uri: buildEvaluatorUri(selectedEvaluator.key) } }, + inputs: evaluatorInputs, outputs: variantOutput, parameters: formValues.parameters, // current form settings }) @@ -713,9 +728,9 @@ If other parts of the app use `createEvaluatorRunExecution`, update them too: ## Open Questions -1. **Slug uniqueness:** Does backend enforce unique slugs? If collision, does it auto-suffix? +1. **Slug uniqueness:** Backend enforces unique slugs per project; generate a short suffix client-side to avoid collisions. -2. **Output schemas:** Should frontend pass `data.schemas.outputs` when creating? Or does backend derive from evaluator type? +2. **Output schemas:** Resolved. Backend hydrates missing builtin evaluator schemas from URI + parameters during create/edit. 3. **Permission model:** Is `RUN_WORKFLOWS` the right permission for evaluator playground? Or should there be `RUN_EVALUATORS`? diff --git a/docs/design/migrate-evaluator-playground/status.md b/docs/design/migrate-evaluator-playground/status.md index b566579b5d..2f4f5a7d24 100644 --- a/docs/design/migrate-evaluator-playground/status.md +++ b/docs/design/migrate-evaluator-playground/status.md @@ -1,8 +1,8 @@ # Status: Evaluator Playground Migration -## Current Phase: Planning Complete +## Current Phase: PR 2 (Run) In Progress -**Last Updated:** 2026-01-27 +**Last Updated:** 2026-02-13 --- @@ -10,8 +10,8 @@ **Direct Migration (No Adapters)** - Split into two PRs: -1. **PR 1:** CRUD migration to `SimpleEvaluator` endpoints -2. **PR 2:** Run migration to native workflow invoke +1. **PR 1:** CRUD migration to `SimpleEvaluator` endpoints (draft PR) +2. **PR 2:** Run migration to native workflow invoke (in progress) See [plan.md](./plan.md) for detailed implementation steps. @@ -55,9 +55,8 @@ See [plan.md](./plan.md) for detailed implementation steps. ### Next Steps -- [ ] Wait for PR #3527 to be merged -- [ ] Start PR 1: CRUD migration -- [ ] After PR 1 stable, start PR 2: Run migration +- [ ] Finalize PR 1: CRUD migration (stacked on PR #3527) +- [ ] Finish PR 2: Run migration --- @@ -102,13 +101,32 @@ The SDK maintains a `HANDLER_REGISTRY` that maps URIs to handler functions: | `settings_values` | `data.parameters` | | `EvaluatorConfig` | `SimpleEvaluator` | +### 5. Output schema ownership moved to frontend templates + +Legacy config creation (`/evaluators/configs`) called `build_evaluator_data`, which generated +`data.schemas.outputs` and `data.service.format` for builtin evaluators. + +The migrated frontend CRUD path uses `/preview/simple/evaluators` and initially sent only +`data.uri` plus `data.parameters`. That can create revisions without output schemas. + +Frontend now receives `outputs_schema` in the evaluator template payload (`GET /evaluators`) and +sends `data.schemas.outputs` during create and edit. + +Schema selection rules are now: +- fixed evaluators: use template `outputs_schema` +- `auto_ai_critique`: use `parameters.json_schema.schema` +- `json_multi_field_match`: derive schema from configured `fields` +- evaluators without template schema: send no output schema + +Backend hydration still exists as a fallback path for builtin evaluators. + --- ## Open Questions -1. **Slug uniqueness:** Does backend enforce unique slugs? If collision, does it auto-suffix? +1. **Slug uniqueness:** Backend enforces unique slugs per project; generate a short suffix client-side to avoid collisions. -2. **Output schemas:** Should frontend pass `data.schemas.outputs` when creating? Or does backend derive from evaluator type? +2. **Output schemas:** Resolved. Frontend now sends known output schemas from evaluator templates and dynamic settings. 3. **Permission model:** Is `RUN_WORKFLOWS` the right permission for evaluator playground? Or should there be `RUN_EVALUATORS`? diff --git a/web/oss/src/components/Evaluators/assets/types.ts b/web/oss/src/components/Evaluators/assets/types.ts index f928cdc801..ccfdfaaa06 100644 --- a/web/oss/src/components/Evaluators/assets/types.ts +++ b/web/oss/src/components/Evaluators/assets/types.ts @@ -1,5 +1,5 @@ import {EvaluatorPreviewDto} from "@/oss/lib/hooks/useEvaluators/types" -import {Evaluator, EvaluatorConfig} from "@/oss/lib/Types" +import {Evaluator, SimpleEvaluator} from "@/oss/lib/Types" export type EvaluatorCategory = "automatic" | "human" @@ -15,7 +15,7 @@ export type EvaluatorPreview = EvaluatorPreviewDto & { metrics?: Record } -export type EvaluatorConfigRow = EvaluatorConfig & { +export type EvaluatorConfigRow = SimpleEvaluator & { evaluator?: Evaluator | null kind?: "config" } diff --git a/web/oss/src/components/Evaluators/assets/utils.ts b/web/oss/src/components/Evaluators/assets/utils.ts index 4b09fa2d46..a750ce248f 100644 --- a/web/oss/src/components/Evaluators/assets/utils.ts +++ b/web/oss/src/components/Evaluators/assets/utils.ts @@ -1,6 +1,7 @@ +import {resolveEvaluatorKey} from "@/oss/lib/evaluators/utils" import {formatDay} from "@/oss/lib/helpers/dateTimeHelper" import {capitalize} from "@/oss/lib/helpers/utils" -import {Evaluator, EvaluatorConfig} from "@/oss/lib/Types" +import {Evaluator, SimpleEvaluator} from "@/oss/lib/Types" import { EvaluatorCategory, @@ -54,7 +55,7 @@ const formatDate = (value?: string) => { return formatDay({date: value}) } -const collectConfigTags = (config: EvaluatorConfig, evaluator?: Evaluator | null) => { +const collectConfigTags = (config: SimpleEvaluator, evaluator?: Evaluator | null) => { const tags = new Set() if (Array.isArray(config.tags)) { @@ -132,11 +133,12 @@ export const transformEvaluatorsToRows = ( } const buildConfigTypeBadge = ( - config: EvaluatorConfig, + config: SimpleEvaluator, category: Extract, evaluator?: Evaluator | null, ): EvaluatorTypeBadge => { - const label = evaluator?.name || createTypeLabel(config.evaluator_key, config.name) + const evaluatorKey = resolveEvaluatorKey(config) + const label = evaluator?.name || createTypeLabel(evaluatorKey, config.name) const colorHex = config.color || evaluator?.color return { @@ -146,44 +148,54 @@ const buildConfigTypeBadge = ( } } -const extractConfigVersion = (config: EvaluatorConfig) => { - const serviceValues = (config.settings_values as any)?.service || {} +const extractConfigVersion = (config: SimpleEvaluator) => { + const parameters = (config.data as any)?.parameters || {} + const serviceValues = (config.data as any)?.service || {} + const serviceConfig = serviceValues?.configuration || {} const candidate = (config as any)?.version || serviceValues?.agenta || serviceValues?.version || - (config.settings_values as any)?.version || + serviceConfig?.version || + serviceConfig?.agenta || + parameters?.version || "" return sanitizeVersion(typeof candidate === "string" ? candidate : "") } -const extractConfigModifiedBy = (config: EvaluatorConfig) => { +const extractConfigModifiedBy = (config: SimpleEvaluator) => { const modifiedBy = (config as any)?.updated_by || (config as any)?.updatedBy || + (config as any)?.updated_by_id || + (config as any)?.updatedById || (config as any)?.created_by || (config as any)?.createdBy || + (config as any)?.created_by_id || + (config as any)?.createdById || "" return typeof modifiedBy === "string" ? modifiedBy : "" } export const transformEvaluatorConfigsToRows = ( - configs: EvaluatorConfig[], + configs: SimpleEvaluator[], category: Extract, evaluators: Evaluator[], ): EvaluatorRegistryRow[] => { const evaluatorsMap = new Map(evaluators.map((item) => [item.key, item])) return configs.map((config) => { - const evaluator = evaluatorsMap.get(config.evaluator_key) || null + const evaluatorKey = resolveEvaluatorKey(config) + const evaluator = evaluatorKey ? evaluatorsMap.get(evaluatorKey) || null : null const badge = buildConfigTypeBadge(config, category, evaluator) const versionLabel = extractConfigVersion(config) const tags = collectConfigTags(config, evaluator) const modifiedBy = extractConfigModifiedBy(config) const createdAt = config.created_at const updatedAt = config.updated_at || createdAt + const displayName = config.name || evaluator?.name || evaluatorKey || config.slug || "" const raw: EvaluatorConfigRow = { ...config, @@ -194,15 +206,15 @@ export const transformEvaluatorConfigsToRows = ( return { key: config.id, id: config.id, - name: config.name, - slug: config.evaluator_key, + name: displayName, + slug: evaluatorKey || config.slug, typeBadge: badge, versionLabel, tags, dateCreated: formatDate(createdAt), lastModified: formatDate(updatedAt), modifiedBy, - avatarName: modifiedBy || config.name, + avatarName: modifiedBy || displayName, raw, } }) diff --git a/web/oss/src/components/Evaluators/components/ConfigureEvaluator/index.tsx b/web/oss/src/components/Evaluators/components/ConfigureEvaluator/index.tsx index 3ec5eac9ce..3f8c0cd62e 100644 --- a/web/oss/src/components/Evaluators/components/ConfigureEvaluator/index.tsx +++ b/web/oss/src/components/Evaluators/components/ConfigureEvaluator/index.tsx @@ -25,6 +25,7 @@ import { resetPlaygroundAtom, } from "@/oss/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/state/atoms" import useURL from "@/oss/hooks/useURL" +import {resolveEvaluatorKey} from "@/oss/lib/evaluators/utils" import useFetchEvaluatorsData from "@/oss/lib/hooks/useFetchEvaluatorsData" import {recordWidgetEventAtom} from "@/oss/lib/onboarding" import {Evaluator} from "@/oss/lib/Types" @@ -65,7 +66,7 @@ const ConfigureEvaluatorPage = ({evaluatorId}: {evaluatorId?: string | null}) => ) }, [evaluatorConfigs, evaluatorId, stagedConfig]) - const evaluatorKey = existingConfig?.evaluator_key ?? evaluatorId ?? null + const evaluatorKey = resolveEvaluatorKey(existingConfig) ?? evaluatorId ?? null const evaluatorQuery = useAtomValue(evaluatorByKeyAtomFamily(evaluatorKey)) const evaluatorFromRegular = evaluators.find((item) => item.key === evaluatorKey) diff --git a/web/oss/src/components/Evaluators/hooks/useEvaluatorsRegistryData.ts b/web/oss/src/components/Evaluators/hooks/useEvaluatorsRegistryData.ts index 3aa171dc76..97fbb7ffc4 100644 --- a/web/oss/src/components/Evaluators/hooks/useEvaluatorsRegistryData.ts +++ b/web/oss/src/components/Evaluators/hooks/useEvaluatorsRegistryData.ts @@ -2,7 +2,7 @@ import {useCallback, useMemo} from "react" import useEvaluators from "@/oss/lib/hooks/useEvaluators" import useFetchEvaluatorsData from "@/oss/lib/hooks/useFetchEvaluatorsData" -import {Evaluator, EvaluatorConfig} from "@/oss/lib/Types" +import {Evaluator, SimpleEvaluator} from "@/oss/lib/Types" import {EvaluatorCategory, EvaluatorPreview, EvaluatorRegistryRow} from "../assets/types" import { @@ -33,7 +33,7 @@ const useEvaluatorsRegistryData = (category: EvaluatorCategory) => { const humanEvaluators = (humanEvaluatorsSwr.data || []) as EvaluatorPreview[] unsortedRows = transformEvaluatorsToRows(humanEvaluators, "human") } else { - const evaluatorConfigs = (evaluatorConfigsSwr.data || []) as EvaluatorConfig[] + const evaluatorConfigs = (evaluatorConfigsSwr.data || []) as SimpleEvaluator[] const baseEvaluators = (baseEvaluatorsSwr.data || []) as Evaluator[] unsortedRows = transformEvaluatorConfigsToRows( diff --git a/web/oss/src/components/pages/evaluations/NewEvaluation/Components/NewEvaluationModalInner.tsx b/web/oss/src/components/pages/evaluations/NewEvaluation/Components/NewEvaluationModalInner.tsx index 313c9ef605..be52c03140 100644 --- a/web/oss/src/components/pages/evaluations/NewEvaluation/Components/NewEvaluationModalInner.tsx +++ b/web/oss/src/components/pages/evaluations/NewEvaluation/Components/NewEvaluationModalInner.tsx @@ -8,6 +8,7 @@ import {useRouter} from "next/router" import {FIRST_EVALUATION_TOUR_ID} from "@/oss/components/Onboarding/tours/firstEvaluationTour" import useURL from "@/oss/hooks/useURL" import {useVaultSecret} from "@/oss/hooks/useVaultSecret" +import {resolveEvaluatorKey} from "@/oss/lib/evaluators/utils" import {redirectIfNoLLMKeys} from "@/oss/lib/helpers/utils" import useAppVariantRevisions from "@/oss/lib/hooks/useAppVariantRevisions" import useFetchEvaluatorsData from "@/oss/lib/hooks/useFetchEvaluatorsData" @@ -336,7 +337,7 @@ const NewEvaluationModalInner = ({ !preview && selectedEvalConfigs.some( (id) => - evaluatorConfigs.find((config) => config.id === id)?.evaluator_key === + resolveEvaluatorKey(evaluatorConfigs.find((config) => config.id === id)) === "auto_ai_critique", ) && (await redirectIfNoLLMKeys({secrets})) diff --git a/web/oss/src/components/pages/evaluations/NewEvaluation/Components/SelectEvaluatorSection/SelectEvaluatorSection.tsx b/web/oss/src/components/pages/evaluations/NewEvaluation/Components/SelectEvaluatorSection/SelectEvaluatorSection.tsx index 7aca9b75b4..ceed5ab507 100644 --- a/web/oss/src/components/pages/evaluations/NewEvaluation/Components/SelectEvaluatorSection/SelectEvaluatorSection.tsx +++ b/web/oss/src/components/pages/evaluations/NewEvaluation/Components/SelectEvaluatorSection/SelectEvaluatorSection.tsx @@ -11,9 +11,10 @@ import router from "next/router" import {getMetricsFromEvaluator} from "@/oss/components/SharedDrawers/AnnotateDrawer/assets/transforms" import useURL from "@/oss/hooks/useURL" +import {resolveEvaluatorKey} from "@/oss/lib/evaluators/utils" import {EvaluatorDto} from "@/oss/lib/hooks/useEvaluators/types" import useFetchEvaluatorsData from "@/oss/lib/hooks/useFetchEvaluatorsData" -import {Evaluator, EvaluatorConfig} from "@/oss/lib/Types" +import {Evaluator, SimpleEvaluator} from "@/oss/lib/Types" import {openEvaluatorDrawerAtom} from "../../../autoEvaluation/EvaluatorsModal/ConfigureEvaluator/state/atoms" import type {SelectEvaluatorSectionProps} from "../../types" @@ -88,12 +89,12 @@ const SelectEvaluatorSection = ({ const evaluatorConfigs = useMemo(() => { if (preview) { - return evaluators as EvaluatorConfig[] + return [] as SimpleEvaluator[] } return ( propsEvaluatorConfigs?.length ? propsEvaluatorConfigs : evaluatorConfigsSwr.data || [] - ) as EvaluatorConfig[] - }, [preview, propsEvaluatorConfigs, evaluatorConfigsSwr.data, evaluators]) + ) as SimpleEvaluator[] + }, [preview, propsEvaluatorConfigs, evaluatorConfigsSwr.data]) const isLoadingEvaluators = fetchLoadingEvaluators const isLoadingEvaluatorConfigs = fetchLoadingConfigs @@ -122,7 +123,7 @@ const SelectEvaluatorSection = ({ const availableIds = new Set( (preview ? (evaluators as EvaluatorDto<"response">[]) - : (evaluatorConfigs as EvaluatorConfig[]) + : (evaluatorConfigs as SimpleEvaluator[]) ).map((config) => config.id), ) @@ -141,10 +142,9 @@ const SelectEvaluatorSection = ({ // Handler to open the drawer in edit mode const handleEditConfig = useCallback( - (record: EvaluatorConfig) => { - const evaluator = (evaluators as Evaluator[]).find( - (e) => e.key === record.evaluator_key, - ) + (record: SimpleEvaluator) => { + const evaluatorKey = resolveEvaluatorKey(record) + const evaluator = (evaluators as Evaluator[]).find((e) => e.key === evaluatorKey) if (evaluator) { openEvaluatorDrawer({ evaluator, @@ -158,10 +158,9 @@ const SelectEvaluatorSection = ({ // Handler to open the drawer in clone mode const handleCloneConfig = useCallback( - (record: EvaluatorConfig) => { - const evaluator = (evaluators as Evaluator[]).find( - (e) => e.key === record.evaluator_key, - ) + (record: SimpleEvaluator) => { + const evaluatorKey = resolveEvaluatorKey(record) + const evaluator = (evaluators as Evaluator[]).find((e) => e.key === evaluatorKey) if (evaluator) { openEvaluatorDrawer({ evaluator, @@ -203,13 +202,13 @@ const SelectEvaluatorSection = ({ [], ) - const columnsConfig: ColumnsType = useMemo( + const columnsConfig: ColumnsType = useMemo( () => [ { title: "Name", dataIndex: "name", key: "name", - render: (_, record: EvaluatorConfig) => { + render: (_, record: SimpleEvaluator) => { return
{record.name}
}, }, @@ -217,10 +216,11 @@ const SelectEvaluatorSection = ({ title: "Type", dataIndex: "type", key: "type", - render: (x, record: EvaluatorConfig) => { + render: (x, record: SimpleEvaluator) => { // Find the evaluator by key to display its name + const evaluatorKey = resolveEvaluatorKey(record) const evaluator = (evaluators as Evaluator[]).find( - (item) => item.key === record.evaluator_key, + (item) => item.key === evaluatorKey, ) return {evaluator?.name} }, @@ -231,7 +231,7 @@ const SelectEvaluatorSection = ({ width: 56, fixed: "right", align: "center", - render: (_, record: EvaluatorConfig) => { + render: (_, record: SimpleEvaluator) => { return ( ({ // Conditionally type filteredEvalConfigs based on Preview const filteredEvalConfigs: Preview extends true ? EvaluatorDto<"response">[] - : EvaluatorConfig[] = useMemo(() => { + : SimpleEvaluator[] = useMemo(() => { if (preview) { // Explicitly narrow types for Preview = true (human evaluations) let data = evaluators as EvaluatorDto<"response">[] @@ -295,21 +295,21 @@ const SelectEvaluatorSection = ({ if (!searchTerm) return data as any return data.filter((item) => - item.name.toLowerCase().includes(searchTerm.toLowerCase()), + (item.name || "").toLowerCase().includes(searchTerm.toLowerCase()), ) as any } else { // Explicitly narrow types for Preview = false - const data = evaluatorConfigs as EvaluatorConfig[] + const data = evaluatorConfigs as SimpleEvaluator[] if (!searchTerm) return data return data.filter((item) => - item.name.toLowerCase().includes(searchTerm.toLowerCase()), + (item.name || "").toLowerCase().includes(searchTerm.toLowerCase()), ) as any } }, [searchTerm, evaluatorConfigs, preview, evaluators]) const onSelectEvalConfig = (selectedRowKeys: React.Key[]) => { const currentSelected = new Set(selectedEvalConfigs) - const configs = filteredEvalConfigs as EvaluatorDto<"response">[] + const configs = filteredEvalConfigs as {id: string}[] configs.forEach((item) => { if (selectedRowKeys.includes(item.id)) { currentSelected.add(item.id) @@ -331,7 +331,7 @@ const SelectEvaluatorSection = ({ ).length > 0 ) } - return (evaluatorConfigs as EvaluatorConfig[]).length > 0 + return (evaluatorConfigs as SimpleEvaluator[]).length > 0 }, [preview, evaluators, evaluatorConfigs]) return ( @@ -419,7 +419,7 @@ const SelectEvaluatorSection = ({ pagination={false} /> ) : ( - + rowSelection={{ type: "checkbox", columnWidth: 48, @@ -444,7 +444,7 @@ const SelectEvaluatorSection = ({ className="ph-no-capture" columns={columnsConfig} rowKey={"id"} - dataSource={filteredEvalConfigs as EvaluatorConfig[]} + dataSource={filteredEvalConfigs as SimpleEvaluator[]} scroll={{x: true, y: 455}} bordered pagination={false} diff --git a/web/oss/src/components/pages/evaluations/NewEvaluation/types.ts b/web/oss/src/components/pages/evaluations/NewEvaluation/types.ts index 4bf88a9a3e..e292e2601a 100644 --- a/web/oss/src/components/pages/evaluations/NewEvaluation/types.ts +++ b/web/oss/src/components/pages/evaluations/NewEvaluation/types.ts @@ -4,7 +4,7 @@ import {ModalProps} from "antd" import {EvaluatorDto} from "@/oss/lib/hooks/useEvaluators/types" import {EnhancedVariant} from "@/oss/lib/shared/variant/transformer/types" -import {LLMRunRateLimit, Evaluator, EvaluatorConfig, testset} from "@/oss/lib/Types" +import {LLMRunRateLimit, Evaluator, SimpleEvaluator, testset} from "@/oss/lib/Types" export interface NewEvaluationAppOption { label: string @@ -54,7 +54,7 @@ export interface NewEvaluationModalContentProps extends HTMLProps[] - evaluatorConfigs: EvaluatorConfig[] + evaluatorConfigs: SimpleEvaluator[] advanceSettings: LLMRunRateLimitWithCorrectAnswer setAdvanceSettings: Dispatch> appOptions: NewEvaluationAppOption[] @@ -97,7 +97,7 @@ export interface SelectTestsetSectionProps extends HTMLProps { } export interface SelectEvaluatorSectionProps extends HTMLProps { - evaluatorConfigs: EvaluatorConfig[] + evaluatorConfigs: SimpleEvaluator[] evaluators: Evaluator[] selectedEvalConfigs: string[] setSelectedEvalConfigs: Dispatch> diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/AdvancedSettings.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/AdvancedSettings.tsx index 6957d3438a..6a0aed5f8f 100644 --- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/AdvancedSettings.tsx +++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/AdvancedSettings.tsx @@ -71,7 +71,7 @@ const AdvancedSettings: React.FC = ({settings, selectedTe return ( { const traceTree = useAtomValue(playgroundTraceTreeAtom) const setTraceTree = useSetAtom(playgroundTraceTreeAtom) const selectedEvaluator = useAtomValue(playgroundEvaluatorAtom) + const evaluatorConfig = useAtomValue(playgroundEditValuesAtom) const form = useAtomValue(playgroundFormRefAtom) const [lastAppId, setLastAppId] = useAtom(playgroundLastAppIdAtom) const [lastVariantId, setLastVariantId] = useAtom(playgroundLastVariantIdAtom) @@ -402,8 +405,8 @@ const DebugSection = () => { setEvalOutputStatus({success: false, error: false}) setIsLoadingResult(true) - const settingsValues = form.getFieldValue("settings_values") || {} - let normalizedSettings = {...settingsValues} + const parameters = form.getFieldValue("parameters") || {} + let normalizedSettings = {...parameters} if (typeof normalizedSettings.json_schema === "string") { try { @@ -429,68 +432,81 @@ const DebugSection = () => { return } - const {testcaseObj, evalMapObj} = mapTestcaseAndEvalValues( + const {testcaseObj} = mapTestcaseAndEvalValues( normalizedSettings, selectedTestcase.testcase, ) let outputs = {} - if (Object.keys(evalMapObj).length && selectedEvaluator.key.startsWith("rag_")) { - const mapResponse = await createEvaluatorDataMapping({ - inputs: baseResponseData, - mapping: transformTraceKeysInSettings(evalMapObj), - }) - outputs = {...outputs, ...mapResponse.outputs} - } - if (Object.keys(testcaseObj).length) { outputs = {...outputs, ...testcaseObj} } - if (!selectedEvaluator.key.startsWith("rag_")) { - const correctAnswerKey = settingsValues.correct_answer_key - const groundTruthKey = - typeof correctAnswerKey === "string" && correctAnswerKey.startsWith("testcase.") - ? correctAnswerKey.split(".")[1] - : correctAnswerKey + const correctAnswerKey = parameters.correct_answer_key + const groundTruthKey = + typeof correctAnswerKey === "string" && correctAnswerKey.startsWith("testcase.") + ? correctAnswerKey.split(".")[1] + : correctAnswerKey - const normalizeCompact = (val: any) => { - try { - if (val === undefined || val === null) return "" - const str = typeof val === "string" ? val : JSON.stringify(val) - const parsed = safeJson5Parse(str) - if (parsed && typeof parsed === "object") { - return JSON.stringify(parsed) - } - return str - } catch { - return typeof val === "string" ? val : JSON.stringify(val) + const normalizeCompact = (val: any) => { + try { + if (val === undefined || val === null) return "" + const str = typeof val === "string" ? val : JSON.stringify(val) + const parsed = safeJson5Parse(str) + if (parsed && typeof parsed === "object") { + return JSON.stringify(parsed) } + return str + } catch { + return typeof val === "string" ? val : JSON.stringify(val) } + } - const rawGT = selectedTestcase?.["testcase"]?.[groundTruthKey] - const ground_truth = normalizeCompact(rawGT) - const prediction = normalizeCompact(variantResult) - - outputs = { - ...outputs, - ...selectedTestcase.testcase, - ground_truth, - [groundTruthKey]: ground_truth, - prediction, - ...(selectedEvaluator.key === "auto_custom_code_run" ? {app_config: {}} : {}), - } + const rawGT = selectedTestcase?.["testcase"]?.[groundTruthKey] + const ground_truth = normalizeCompact(rawGT) + const prediction = normalizeCompact(variantResult) + + outputs = { + ...outputs, + ...selectedTestcase.testcase, + ground_truth, + [groundTruthKey]: ground_truth, + prediction, + ...(selectedEvaluator.key === "auto_custom_code_run" ? {app_config: {}} : {}), } - const runResponse = await createEvaluatorRunExecution( - selectedEvaluator.key, - { - inputs: outputs, - settings: transformTraceKeysInSettings(normalizedSettings), - }, - {signal: controller.signal}, - ) + const evaluatorKey = resolveEvaluatorKey(evaluatorConfig) || selectedEvaluator?.key + const evaluatorUri = + evaluatorConfig?.data?.uri || + (evaluatorKey ? buildEvaluatorUri(evaluatorKey) : undefined) + const evaluatorUrl = evaluatorConfig?.data?.url + + if (!evaluatorUri && !evaluatorUrl) { + setOutputResult( + "Evaluator interface is missing (uri/url). Save the evaluator and try again.", + ) + setEvalOutputStatus({success: false, error: true}) + return + } + + const evaluatorParameters = transformTraceKeysInSettings(normalizedSettings) + const parsedVariantOutput = safeParse(variantResult, variantResult) + const workflowOutputs = + variantResult !== "" + ? parsedVariantOutput + : (baseResponseData?.data ?? parsedVariantOutput) + + const workflowResponse = await invokeEvaluator({ + uri: evaluatorUri, + url: evaluatorUrl, + evaluator: evaluatorConfig, + inputs: outputs, + outputs: workflowOutputs, + parameters: evaluatorParameters, + options: {signal: controller.signal}, + }) + const runResponse = mapWorkflowResponseToEvaluatorOutput(workflowResponse) setEvalOutputStatus({success: true, error: false}) setOutputResult(getStringOrJson(runResponse.outputs)) diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DynamicFormField.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DynamicFormField.tsx index a8128c43e7..c7a3df73f6 100644 --- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DynamicFormField.tsx +++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DynamicFormField.tsx @@ -105,7 +105,7 @@ export const DynamicFormField: React.FC = ({ form, }) => { const settingsValue = Form.useWatch(name, form) - const runtime = Form.useWatch(["settings_values", "runtime"], form) + const runtime = Form.useWatch(["parameters", "runtime"], form) const classes = useStyles() const {token} = theme.useToken() diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/FieldsTagsEditor.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/FieldsTagsEditor.tsx index a96a07a37f..f5ddf000df 100644 --- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/FieldsTagsEditor.tsx +++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/FieldsTagsEditor.tsx @@ -55,7 +55,7 @@ export const FieldsTagsEditor: React.FC = ({ // Watch the correct_answer_key from form to react to changes // Using Form.useWatch instead of form.getFieldValue for reactivity - const formCorrectAnswerKey = Form.useWatch(["settings_values", "correct_answer_key"], form) + const formCorrectAnswerKey = Form.useWatch(["parameters", "correct_answer_key"], form) const effectiveKey = formCorrectAnswerKey || correctAnswerKey // Check if we can detect fields from testcase diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx index 4a9759f47e..5dc143bb86 100644 --- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx +++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx @@ -11,9 +11,10 @@ import {createUseStyles} from "react-jss" import {useAppId} from "@/oss/hooks/useAppId" import useURL from "@/oss/hooks/useURL" +import {deriveEvaluatorOutputsSchema} from "@/oss/lib/evaluators/utils" import {EvaluationSettingsTemplate, JSSTheme, SettingsPreset} from "@/oss/lib/Types" import { - CreateEvaluationConfigData, + CreateEvaluatorConfigData, createEvaluatorConfig, updateEvaluatorConfig, } from "@/oss/services/evaluations/api" @@ -69,6 +70,13 @@ interface ConfigureEvaluatorProps { onToggleTestPanel?: () => void } +interface ConfigureEvaluatorFormValues { + name: string + description?: string + tags?: string[] + parameters?: Record +} + const useStyles = createUseStyles((theme: JSSTheme) => ({ collapseContainer: { "& .ant-collapse-header": { @@ -199,12 +207,10 @@ const ConfigureEvaluator = ({ const allKeys = Array.from(new Set([...templateKeys, ...presetKeys])) // Clear subtree before applying new values to avoid stale keys - form.setFieldsValue({settings_values: {}}) + form.setFieldsValue({parameters: {}}) if (allKeys.length) { - const fieldNames = allKeys.map( - (key) => ["settings_values", key] as (string | number)[], - ) + const fieldNames = allKeys.map((key) => ["parameters", key] as (string | number)[]) form.resetFields(fieldNames) const nextFields = fieldNames @@ -248,7 +254,7 @@ const ConfigureEvaluator = ({ const evaluatorVersionNumber = useMemo(() => { const raw = - editEvalEditValues?.settings_values?.version ?? + editEvalEditValues?.data?.parameters?.version ?? selectedEvaluator?.settings_template?.version?.default ?? 3 @@ -256,7 +262,7 @@ const ConfigureEvaluator = ({ // extract leading number (e.g., "4", "4.1", "v4") const match = String(raw).match(/\d+(\.\d+)?/) return match ? parseFloat(match[0]) : 3 - }, [editEvalEditValues?.settings_values?.version, selectedEvaluator]) + }, [editEvalEditValues?.data?.parameters?.version, selectedEvaluator]) const evalFields = useMemo(() => { const templateEntries = Object.entries(selectedEvaluator?.settings_template || {}) @@ -283,28 +289,25 @@ const ConfigureEvaluator = ({ const advancedSettingsFields = evalFields.filter((field) => field.advanced) const basicSettingsFields = evalFields.filter((field) => !field.advanced) - const onSubmit = async (values: CreateEvaluationConfigData) => { + const onSubmit = async (values: ConfigureEvaluatorFormValues) => { try { setSubmitLoading(true) if (!selectedEvaluator?.key) throw new Error("No selected key") - const settingsValues = values.settings_values || {} + const parameters = values.parameters || {} - const jsonSchemaFieldPath: (string | number)[] = ["settings_values", "json_schema"] - const hasJsonSchema = Object.prototype.hasOwnProperty.call( - settingsValues, - "json_schema", - ) + const jsonSchemaFieldPath: (string | number)[] = ["parameters", "json_schema"] + const hasJsonSchema = Object.prototype.hasOwnProperty.call(parameters, "json_schema") if (hasJsonSchema) { form.setFields([{name: jsonSchemaFieldPath, errors: []}]) - if (typeof settingsValues.json_schema === "string") { + if (typeof parameters.json_schema === "string") { try { - const parsed = JSON.parse(settingsValues.json_schema) + const parsed = JSON.parse(parameters.json_schema) if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) { throw new Error() } - settingsValues.json_schema = parsed + parameters.json_schema = parsed } catch { form.setFields([ { @@ -315,9 +318,9 @@ const ConfigureEvaluator = ({ throw new Error("JSON schema must be a valid JSON object") } } else if ( - settingsValues.json_schema && - (typeof settingsValues.json_schema !== "object" || - Array.isArray(settingsValues.json_schema)) + parameters.json_schema && + (typeof parameters.json_schema !== "object" || + Array.isArray(parameters.json_schema)) ) { form.setFields([ { @@ -329,40 +332,81 @@ const ConfigureEvaluator = ({ } } - const data = { - ...values, - evaluator_key: selectedEvaluator!.key, - settings_values: settingsValues, + const existingParameters = editEvalEditValues?.data?.parameters || {} + const mergedParameters = {...existingParameters, ...parameters} + const createOutputsSchema = deriveEvaluatorOutputsSchema({ + evaluatorKey: selectedEvaluator.key, + evaluatorTemplate: selectedEvaluator, + parameters, + }) + const updateOutputsSchema = deriveEvaluatorOutputsSchema({ + evaluatorKey: selectedEvaluator.key, + evaluatorTemplate: selectedEvaluator, + parameters: mergedParameters, + }) + + const payload: CreateEvaluatorConfigData = { + name: values.name, + description: values.description, + tags: values.tags, + evaluator_key: selectedEvaluator.key, + parameters, + outputs_schema: createOutputsSchema, } if (editMode) { - await updateEvaluatorConfig(editEvalEditValues?.id!, data) - - // Update atom with merged values - const updatedConfig = editEvalEditValues - ? { - ...editEvalEditValues, - ...data, - settings_values: settingsValues, - } - : null - if (updatedConfig) { - commitPlayground(updatedConfig) - } - } else { - const response = await createEvaluatorConfig(appId, data) - const createdConfig = response?.data - - if (createdConfig) { - // Use commitPlayground to update state and switch to edit mode - commitPlayground(createdConfig) - if (uiVariant === "page" && createdConfig.id) { - await router.replace( - `${projectURL}/evaluators/configure/${encodeURIComponent( - createdConfig.id, - )}`, - ) + const existingData = editEvalEditValues?.data ?? {} + const existingSchemas = + existingData.schemas && + typeof existingData.schemas === "object" && + !Array.isArray(existingData.schemas) + ? existingData.schemas + : undefined + + const nextSchemas = (() => { + if (updateOutputsSchema) { + return { + ...(existingSchemas ?? {}), + outputs: updateOutputsSchema, + } } + + if (!existingSchemas) return undefined + + const {outputs, ...remainingSchemas} = existingSchemas + void outputs + return Object.keys(remainingSchemas).length ? remainingSchemas : undefined + })() + + const {schemas: _unusedSchemas, ...dataWithoutSchemas} = existingData + void _unusedSchemas + + const updatedEvaluator = await updateEvaluatorConfig(editEvalEditValues?.id!, { + id: editEvalEditValues?.id!, + name: values.name, + description: editEvalEditValues?.description, + tags: editEvalEditValues?.tags, + meta: editEvalEditValues?.meta, + flags: editEvalEditValues?.flags, + data: { + ...dataWithoutSchemas, + parameters: mergedParameters, + ...(nextSchemas ? {schemas: nextSchemas} : {}), + }, + }) + + commitPlayground(updatedEvaluator) + } else { + const createdConfig = await createEvaluatorConfig(appId, payload) + + // Use commitPlayground to update state and switch to edit mode + commitPlayground(createdConfig) + if (uiVariant === "page" && createdConfig.id) { + await router.replace( + `${projectURL}/evaluators/configure/${encodeURIComponent( + createdConfig.id, + )}`, + ) } } @@ -381,15 +425,15 @@ const ConfigureEvaluator = ({ form.resetFields() if (editMode && editEvalEditValues) { - // Load all values including nested settings_values + // Load all values including nested parameters form.setFieldsValue({ ...editEvalEditValues, - settings_values: editEvalEditValues.settings_values || {}, + parameters: editEvalEditValues.data?.parameters || {}, }) } else if (cloneConfig && editEvalEditValues) { - // When cloning, copy only settings_values and clear the name so user provides a new name + // When cloning, copy only parameters and clear the name so user provides a new name form.setFieldsValue({ - settings_values: editEvalEditValues.settings_values || {}, + parameters: editEvalEditValues.data?.parameters || {}, name: "", }) } else if (selectedEvaluator?.settings_template) { @@ -404,7 +448,7 @@ const ConfigureEvaluator = ({ } if (Object.keys(defaultSettings).length > 0) { form.setFieldsValue({ - settings_values: defaultSettings, + parameters: defaultSettings, }) } } @@ -557,7 +601,7 @@ const ConfigureEvaluator = ({ key={field.key} traceTree={traceTree} form={form} - name={["settings_values", field.key]} + name={["parameters", field.key]} /> ))} @@ -676,7 +720,7 @@ const ConfigureEvaluator = ({ key={field.key} traceTree={traceTree} form={form} - name={["settings_values", field.key]} + name={["parameters", field.key]} /> ))} diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/state/atoms.ts b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/state/atoms.ts index 76b8c134c2..dcb15dcd42 100644 --- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/state/atoms.ts +++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/state/atoms.ts @@ -18,7 +18,7 @@ import type {FormInstance} from "antd" import {atom} from "jotai" import {atomWithReset, atomWithStorage, RESET} from "jotai/utils" -import type {Evaluator, EvaluatorConfig, Variant} from "@/oss/lib/Types" +import type {Evaluator, SimpleEvaluator, Variant} from "@/oss/lib/Types" import {stringStorage} from "@/oss/state/utils/stringStorage" // ================================================================ @@ -84,7 +84,7 @@ export const playgroundIsCloneModeAtom = atom((get) => get(playgroundSessionAtom * - In edit mode: loaded from existing config * - In clone mode: copied from source config (with cleared name) */ -export const playgroundEditValuesAtom = atomWithReset(null) +export const playgroundEditValuesAtom = atomWithReset(null) // ================================================================ // FORM STATE @@ -95,7 +95,7 @@ export const playgroundEditValuesAtom = atomWithReset(nu * Allows DebugSection to read form values for running the evaluator * * This is set by ConfigureEvaluator when the form mounts - * and read by DebugSection to get current settings_values + * and read by DebugSection to get current parameters */ export const playgroundFormRefAtom = atom(null) @@ -179,7 +179,7 @@ export const initPlaygroundAtom = atom( set, payload: { evaluator: Evaluator - existingConfig?: EvaluatorConfig | null + existingConfig?: SimpleEvaluator | null mode?: PlaygroundMode }, ) => { @@ -226,7 +226,7 @@ export const resetPlaygroundAtom = atom(null, (get, set) => { * * @param savedConfig - The config returned from the API */ -export const commitPlaygroundAtom = atom(null, (get, set, savedConfig: EvaluatorConfig) => { +export const commitPlaygroundAtom = atom(null, (get, set, savedConfig: SimpleEvaluator) => { // Update edit values with saved config set(playgroundEditValuesAtom, savedConfig) @@ -280,7 +280,7 @@ export const openEvaluatorDrawerAtom = atom( set, payload: { evaluator: Evaluator - existingConfig?: EvaluatorConfig | null + existingConfig?: SimpleEvaluator | null mode?: PlaygroundMode }, ) => { diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/DeleteModal.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/DeleteModal.tsx index 0ac235b386..c30bb3c1f1 100644 --- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/DeleteModal.tsx +++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/DeleteModal.tsx @@ -5,11 +5,11 @@ import {Modal, Space, theme, Typography} from "antd" import {createUseStyles} from "react-jss" import {checkIfResourceValidForDeletion} from "@/oss/lib/evaluations/legacy" -import {EvaluatorConfig, JSSTheme} from "@/oss/lib/Types" +import {JSSTheme, SimpleEvaluator} from "@/oss/lib/Types" import {deleteEvaluatorConfig} from "@/oss/services/evaluations/api" type DeleteModalProps = { - selectedEvalConfig: EvaluatorConfig + selectedEvalConfig: SimpleEvaluator onSuccess: () => void } & React.ComponentProps diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorCard.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorCard.tsx index f3c9434a38..72aaf034fc 100644 --- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorCard.tsx +++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorCard.tsx @@ -7,18 +7,19 @@ import {useAtom} from "jotai" import {createUseStyles} from "react-jss" import {evaluatorsAtom} from "@/oss/lib/atoms/evaluation" +import {resolveEvaluatorKey} from "@/oss/lib/evaluators/utils" import {formatDay} from "@/oss/lib/helpers/dateTimeHelper" -import {Evaluator, EvaluatorConfig, JSSTheme} from "@/oss/lib/Types" +import {Evaluator, JSSTheme, SimpleEvaluator} from "@/oss/lib/Types" import DeleteModal from "./DeleteModal" interface EvaluatorCardProps { - evaluatorConfigs: EvaluatorConfig[] + evaluatorConfigs: SimpleEvaluator[] setEditMode: React.Dispatch> setCloneConfig: React.Dispatch> setCurrent: React.Dispatch> setSelectedEvaluator: React.Dispatch> - setEditEvalEditValues: React.Dispatch> + setEditEvalEditValues: React.Dispatch> onSuccess: () => void } @@ -88,22 +89,21 @@ const EvaluatorCard = ({ const classes = useStyles() const evaluators = useAtom(evaluatorsAtom)[0] const [openDeleteModal, setOpenDeleteModal] = useState(false) - const [selectedDelEval, setSelectedDelEval] = useState(null) + const [selectedDelEval, setSelectedDelEval] = useState(null) return (
{evaluatorConfigs.length ? ( evaluatorConfigs.map((item) => { - const evaluator = evaluators.find((e) => e.key === item.evaluator_key) + const evaluatorKey = resolveEvaluatorKey(item) + const evaluator = evaluators.find((e) => e.key === evaluatorKey) return ( { - const selectedEval = evaluators.find( - (e) => e.key === item.evaluator_key, - ) + const selectedEval = evaluators.find((e) => e.key === evaluatorKey) if (selectedEval) { setEditMode(true) setSelectedEvaluator(selectedEval) @@ -130,7 +130,7 @@ const EvaluatorCard = ({ onClick: (e: any) => { e.domEvent.stopPropagation() const selectedEval = evaluators.find( - (e) => e.key === item.evaluator_key, + (e) => e.key === evaluatorKey, ) if (selectedEval) { setEditMode(true) @@ -147,7 +147,7 @@ const EvaluatorCard = ({ onClick: (e: any) => { e.domEvent.stopPropagation() const selectedEval = evaluators.find( - (e) => e.key === item.evaluator_key, + (e) => e.key === evaluatorKey, ) if (selectedEval) { setCloneConfig(true) diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorList.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorList.tsx index 2e38bfd1c2..33c03a9f89 100644 --- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorList.tsx +++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorList.tsx @@ -7,17 +7,18 @@ import {ColumnsType} from "antd/es/table" import {useAtom} from "jotai" import {evaluatorsAtom} from "@/oss/lib/atoms/evaluation" -import {Evaluator, EvaluatorConfig} from "@/oss/lib/Types" +import {resolveEvaluatorKey} from "@/oss/lib/evaluators/utils" +import {Evaluator, SimpleEvaluator} from "@/oss/lib/Types" import DeleteModal from "./DeleteModal" interface EvaluatorListProps { - evaluatorConfigs: EvaluatorConfig[] + evaluatorConfigs: SimpleEvaluator[] setEditMode: React.Dispatch> setCloneConfig: React.Dispatch> setCurrent: React.Dispatch> setSelectedEvaluator: React.Dispatch> - setEditEvalEditValues: React.Dispatch> + setEditEvalEditValues: React.Dispatch> onSuccess: () => void } @@ -32,9 +33,9 @@ const EvaluatorList = ({ }: EvaluatorListProps) => { const evaluators = useAtom(evaluatorsAtom)[0] const [openDeleteModal, setOpenDeleteModal] = useState(false) - const [selectedDelEval, setSelectedDelEval] = useState(null) + const [selectedDelEval, setSelectedDelEval] = useState(null) - const columns: ColumnsType = [ + const columns: ColumnsType = [ // { // title: "Version", // dataIndex: "version", @@ -56,7 +57,8 @@ const EvaluatorList = ({ dataIndex: "type", key: "type", render: (_, record) => { - const evaluator = evaluators.find((item) => item.key === record.evaluator_key) + const evaluatorKey = resolveEvaluatorKey(record) + const evaluator = evaluators.find((item) => item.key === evaluatorKey) return {evaluator?.name} }, }, @@ -84,8 +86,9 @@ const EvaluatorList = ({ icon: , onClick: (e: any) => { e.domEvent.stopPropagation() + const evaluatorKey = resolveEvaluatorKey(record) const selectedEval = evaluators.find( - (e) => e.key === record.evaluator_key, + (e) => e.key === evaluatorKey, ) if (selectedEval) { setEditMode(true) @@ -101,8 +104,9 @@ const EvaluatorList = ({ icon: , onClick: (e: any) => { e.domEvent.stopPropagation() + const evaluatorKey = resolveEvaluatorKey(record) const selectedEval = evaluators.find( - (e) => e.key === record.evaluator_key, + (e) => e.key === evaluatorKey, ) if (selectedEval) { setCloneConfig(true) @@ -151,7 +155,8 @@ const EvaluatorList = ({ onRow={(record) => ({ style: {cursor: "pointer"}, onClick: () => { - const selectedEval = evaluators.find((e) => e.key === record.evaluator_key) + const evaluatorKey = resolveEvaluatorKey(record) + const selectedEval = evaluators.find((e) => e.key === evaluatorKey) if (selectedEval) { setEditMode(true) setSelectedEvaluator(selectedEval) diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/index.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/index.tsx index 60569766c2..564bc38df9 100644 --- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/index.tsx +++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/index.tsx @@ -8,21 +8,22 @@ import {createUseStyles} from "react-jss" import {evaluatorsAtom} from "@/oss/lib/atoms/evaluation" import {getEvaluatorTags} from "@/oss/lib/evaluations/legacy" -import {Evaluator, EvaluatorConfig, JSSTheme} from "@/oss/lib/Types" +import {resolveEvaluatorKey} from "@/oss/lib/evaluators/utils" +import {Evaluator, JSSTheme, SimpleEvaluator} from "@/oss/lib/Types" import {nonArchivedEvaluatorsAtom} from "@/oss/state/evaluators" import EvaluatorCard from "./EvaluatorCard" import EvaluatorList from "./EvaluatorList" interface EvaluatorsProps { - evaluatorConfigs: EvaluatorConfig[] + evaluatorConfigs: SimpleEvaluator[] handleOnCancel: () => void setCurrent: React.Dispatch> setSelectedEvaluator: React.Dispatch> fetchingEvalConfigs: boolean setEditMode: React.Dispatch> setCloneConfig: React.Dispatch> - setEditEvalEditValues: React.Dispatch> + setEditEvalEditValues: React.Dispatch> onSuccess: () => void setEvaluatorsDisplay: any evaluatorsDisplay: string @@ -95,10 +96,13 @@ const Evaluators = ({ const updatedEvaluatorConfigs = useMemo(() => { return evaluatorConfigs.map((config) => { - const matchingEvaluator = evaluators.find( - (evaluator) => evaluator.key === config.evaluator_key, + const evaluatorKey = resolveEvaluatorKey(config) + const matchingEvaluator = evaluators.find((evaluator) => evaluator.key === evaluatorKey) + if (!matchingEvaluator) return config + const nextTags = Array.from( + new Set([...(config.tags || []), ...(matchingEvaluator.tags || [])]), ) - return matchingEvaluator ? {...config, tags: matchingEvaluator.tags} : config + return {...config, tags: nextTags} }) }, [evaluatorConfigs, evaluators]) @@ -111,7 +115,7 @@ const Evaluators = ({ if (searchTerm) { filtered = filtered.filter((item) => - item.name.toLowerCase().includes(searchTerm.toLowerCase()), + (item.name || "").toLowerCase().includes(searchTerm.toLowerCase()), ) } diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/EvaluatorsModal.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/EvaluatorsModal.tsx index b89da2ee19..c06202394c 100644 --- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/EvaluatorsModal.tsx +++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/EvaluatorsModal.tsx @@ -9,7 +9,7 @@ import EnhancedModal from "@/oss/components/EnhancedUIs/Modal" import {useAppId} from "@/oss/hooks/useAppId" import {evaluatorConfigsAtom} from "@/oss/lib/atoms/evaluation" import useFetchEvaluatorsData from "@/oss/lib/hooks/useFetchEvaluatorsData" -import {Evaluator, EvaluatorConfig} from "@/oss/lib/Types" +import {Evaluator, SimpleEvaluator} from "@/oss/lib/Types" import ConfigureEvaluator from "./ConfigureEvaluator" import {initPlaygroundAtom, resetPlaygroundAtom} from "./ConfigureEvaluator/state/atoms" @@ -39,7 +39,7 @@ const EvaluatorsModal = ({ useFetchEvaluatorsData({appId: appId ?? ""}) const [editMode, setEditMode] = useState(false) const [cloneConfig, setCloneConfig] = useState(false) - const [editEvalEditValues, setEditEvalEditValues] = useState(null) + const [editEvalEditValues, setEditEvalEditValues] = useState(null) const [evaluatorsDisplay, setEvaluatorsDisplay] = useLocalStorage<"card" | "list">( "evaluator_view", "list", diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/OnlineEvaluationDrawer.tsx b/web/oss/src/components/pages/evaluations/onlineEvaluation/OnlineEvaluationDrawer.tsx index be59ee56da..856638e53c 100644 --- a/web/oss/src/components/pages/evaluations/onlineEvaluation/OnlineEvaluationDrawer.tsx +++ b/web/oss/src/components/pages/evaluations/onlineEvaluation/OnlineEvaluationDrawer.tsx @@ -63,7 +63,7 @@ const OnlineEvaluationDrawer = ({open, onClose, onCreate}: OnlineEvaluationDrawe const filterColumns = useMemo(() => getFilterColumns(), []) const [filters, setFilters] = useAtom(onlineEvalFiltersAtom) const resetFilters = useSetAtom(resetOnlineEvalFiltersAtom) - // Load preview evaluators (with IDs) to map evaluator_config.evaluator_key -> evaluator.id + // Load preview evaluators (with IDs) to map config URI key -> evaluator.id const previewEvaluatorsSwr = useEvaluators({preview: true, queries: {is_human: false}}) const baseEvaluators = (baseEvaluatorsSwr.data as Evaluator[] | undefined) ?? [] const evaluators = useAtomValue(evaluatorConfigsAtom) diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorDetails.ts b/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorDetails.ts index a49787e814..0545163cae 100644 --- a/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorDetails.ts +++ b/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorDetails.ts @@ -47,18 +47,41 @@ const mergeEvaluatorWithConfig = ( ...configAny, } - const previewSettings = isPlainObject(evaluatorAny.settings_values) - ? (evaluatorAny.settings_values as Record) + const previewData = isPlainObject(evaluatorAny.data) + ? (evaluatorAny.data as Record) : undefined - const configSettings = isPlainObject(configAny.settings_values) - ? (configAny.settings_values as Record) + const configData = isPlainObject(configAny.data) + ? (configAny.data as Record) : undefined + if (previewData || configData) { + const mergedData: Record = { + ...(previewData ?? {}), + ...(configData ?? {}), + } - if (previewSettings || configSettings) { - merged.settings_values = { + const previewParameters = isPlainObject(previewData?.parameters) + ? (previewData?.parameters as Record) + : undefined + const configParameters = isPlainObject(configData?.parameters) + ? (configData?.parameters as Record) + : undefined + const previewSettings = isPlainObject(evaluatorAny.settings_values) + ? (evaluatorAny.settings_values as Record) + : undefined + const configSettings = isPlainObject(configAny.settings_values) + ? (configAny.settings_values as Record) + : undefined + const mergedParameters = { + ...(previewParameters ?? {}), ...(previewSettings ?? {}), + ...(configParameters ?? {}), ...(configSettings ?? {}), } + if (Object.keys(mergedParameters).length) { + mergedData.parameters = mergedParameters + } + + merged.data = mergedData } return merged as EvaluatorPreviewDto diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorSelection.tsx b/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorSelection.tsx index af624b9f83..d5e724dcef 100644 --- a/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorSelection.tsx +++ b/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorSelection.tsx @@ -2,8 +2,9 @@ import {useMemo} from "react" import {SelectProps} from "antd" +import {getEvaluatorParameters, resolveEvaluatorKey} from "@/oss/lib/evaluators/utils" import type {EvaluatorPreviewDto} from "@/oss/lib/hooks/useEvaluators/types" -import type {Evaluator} from "@/oss/lib/Types" +import type {Evaluator, SimpleEvaluator} from "@/oss/lib/Types" import { ALLOWED_ONLINE_EVALUATOR_KEYS, @@ -13,7 +14,7 @@ import { import {capitalize, collectEvaluatorCandidates} from "../utils/evaluatorDetails" interface UseEvaluatorSelectionParams { - evaluators: any[] + evaluators: SimpleEvaluator[] selectedEvaluatorId: string | undefined previewEvaluators: EvaluatorPreviewDto[] baseEvaluators: Evaluator[] @@ -21,16 +22,17 @@ interface UseEvaluatorSelectionParams { interface EvaluatorSelectionResult { evaluatorOptions: SelectProps["options"] - selectedEvaluatorConfig?: any + selectedEvaluatorConfig?: SimpleEvaluator matchedPreviewEvaluator?: EvaluatorPreviewDto evaluatorTypeLookup: Map } -const buildEvaluatorOptions = (configs: any[]): SelectProps["options"] => +const buildEvaluatorOptions = (configs: SimpleEvaluator[]): SelectProps["options"] => (configs || []).map((cfg: any) => { const iconSrc = (cfg?.icon_url && (cfg.icon_url.src || cfg.icon_url)) || undefined const displayName = cfg?.name || "" - const searchable = [displayName, cfg?.evaluator_key, cfg?.id] + const evaluatorKey = resolveEvaluatorKey(cfg) + const searchable = [displayName, evaluatorKey, cfg?.id, cfg?.slug, cfg?.data?.uri] .map((item) => { if (item === undefined || item === null) return undefined const text = String(item).trim() @@ -61,6 +63,7 @@ const buildPreviewLookup = (previewEvaluators: EvaluatorPreviewDto[]) => { const map = new Map() previewEvaluators.forEach((evaluator) => { const rawKey = + resolveEvaluatorKey(evaluator as any) || (evaluator as any)?.evaluator_key || (evaluator as any)?.flags?.evaluator_key || (evaluator as any)?.meta?.evaluator_key || @@ -122,13 +125,14 @@ export const useEvaluatorSelection = ({ const allowedEvaluators = useMemo(() => { if (!evaluators?.length) return [] - return evaluators.filter((config: any) => { + return evaluators.filter((config: SimpleEvaluator) => { if (!config) return false + const evaluatorKey = resolveEvaluatorKey(config) const candidates = collectEvaluatorCandidates( - config?.evaluator_key, - (config as any)?.slug, + evaluatorKey, + config?.slug, config?.name, - config?.key, + (config as any)?.key, config?.meta?.evaluator_key, config?.meta?.key, ) @@ -141,13 +145,13 @@ export const useEvaluatorSelection = ({ if (!allowedEvaluators.length) return [] if (!ENABLE_CORRECT_ANSWER_KEY_FILTER) return allowedEvaluators const requiringKey = evaluatorsRequiringCorrectAnswerKey ?? new Set() - return allowedEvaluators.filter((config: any) => { + return allowedEvaluators.filter((config: SimpleEvaluator) => { if (!config) return false - const evaluatorKey = config?.evaluator_key + const evaluatorKey = resolveEvaluatorKey(config) if (evaluatorKey && requiringKey.has(evaluatorKey)) { return false } - const settingsValues = config?.settings_values || {} + const settingsValues = getEvaluatorParameters(config) const requiresCorrectAnswerKey = Object.entries(settingsValues).some(([key, value]) => { if (!key) return false const normalizedKey = key.toLowerCase() @@ -176,7 +180,7 @@ export const useEvaluatorSelection = ({ const previewLookup = useMemo(() => buildPreviewLookup(previewEvaluators), [previewEvaluators]) const matchedPreviewEvaluator = useMemo(() => { - const key = (selectedEvaluatorConfig as any)?.evaluator_key as string | undefined + const key = resolveEvaluatorKey(selectedEvaluatorConfig) if (!key) return undefined return previewLookup.get(key.toLowerCase()) }, [selectedEvaluatorConfig, previewLookup]) diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorTypeFromConfigs.ts b/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorTypeFromConfigs.ts index fb54e0978b..3cef385d5d 100644 --- a/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorTypeFromConfigs.ts +++ b/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorTypeFromConfigs.ts @@ -3,6 +3,7 @@ import {useMemo} from "react" import {useAtomValue} from "jotai" import {evaluatorConfigsAtom} from "@/oss/lib/atoms/evaluation" +import {resolveEvaluatorKey} from "@/oss/lib/evaluators/utils" import useEvaluatorConfigs from "@/oss/lib/hooks/useEvaluatorConfigs" import {EVALUATOR_CATEGORY_LABEL_MAP} from "../constants" @@ -25,6 +26,7 @@ export const useEvaluatorTypeFromConfigs = ({ } const candidates = collectEvaluatorCandidates( + resolveEvaluatorKey(evaluator as any), (evaluator as any)?.slug, (evaluator as any)?.key, (evaluator as any)?.meta?.evaluator_key, @@ -32,7 +34,7 @@ export const useEvaluatorTypeFromConfigs = ({ ) const match = configs.find((cfg) => { - const key = (cfg?.evaluator_key || cfg?.name || cfg?.id || "").toString().trim() + const key = (resolveEvaluatorKey(cfg) || cfg?.name || cfg?.id || "").toString().trim() if (!key) return false const lower = key.toLowerCase() if (candidates.includes(lower)) return true @@ -63,7 +65,7 @@ export const useEvaluatorTypeFromConfigs = ({ // 2) Infer label by scanning evaluator_key/name tokens for known category slugs const categorySlugs = Object.keys(EVALUATOR_CATEGORY_LABEL_MAP || {}) const keyTokens = [ - (match as any)?.evaluator_key, + resolveEvaluatorKey(match), (match as any)?.name, (evaluator as any)?.key, (evaluator as any)?.name, diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/utils/evaluatorDetails.ts b/web/oss/src/components/pages/evaluations/onlineEvaluation/utils/evaluatorDetails.ts index fddb7511af..fdbd26e16f 100644 --- a/web/oss/src/components/pages/evaluations/onlineEvaluation/utils/evaluatorDetails.ts +++ b/web/oss/src/components/pages/evaluations/onlineEvaluation/utils/evaluatorDetails.ts @@ -1,3 +1,4 @@ +import {resolveEvaluatorKey} from "@/oss/lib/evaluators/utils" import type {EvaluatorPreviewDto} from "@/oss/lib/hooks/useEvaluators/types" import { @@ -126,6 +127,7 @@ export const extractEvaluatorType = ( } const candidates = collectEvaluatorCandidates( + resolveEvaluatorKey(evaluator as any), (evaluator as any)?.slug, (evaluator as any)?.key, (evaluator as any)?.name, @@ -290,8 +292,8 @@ export const extractParameterList = (evaluator?: EvaluatorPreviewDto): Parameter // Support both simple preview artifacts and workflow evaluators const parameterSources = [ - (evaluator as any)?.settings_values, (evaluator as any)?.data?.parameters, + (evaluator as any)?.settings_values, (evaluator as any)?.data?.service?.configuration?.parameters, (evaluator as any)?.data?.configuration?.parameters, ] @@ -359,8 +361,8 @@ export const extractModelName = (evaluator?: EvaluatorPreviewDto) => { } const sources = [ - (evaluator as any)?.settings_values, (evaluator as any)?.data?.parameters, + (evaluator as any)?.settings_values, (evaluator as any)?.data?.service?.configuration, (evaluator as any)?.data?.service?.configuration?.parameters, (evaluator as any)?.data?.configuration, @@ -660,7 +662,8 @@ const normalizeMessageContent = ( export const extractPromptSections = (evaluator?: EvaluatorPreviewDto): PromptPreviewSection[] => { if (!evaluator) return [] const data = (evaluator as any)?.data ?? {} - const settings = (evaluator as any)?.settings_values + const parameters = data?.parameters + const settings = parameters ?? (evaluator as any)?.settings_values const agConfig = data?.parameters?.ag_config ?? data?.parameters?.agConfig const messages = findFirstMessages(settings) ?? @@ -728,7 +731,6 @@ export const extractPromptSections = (evaluator?: EvaluatorPreviewDto): PromptPr const promptSources = [ settings, - data?.parameters, data?.service?.configuration?.parameters, data?.configuration?.parameters, ] diff --git a/web/oss/src/lib/Types.ts b/web/oss/src/lib/Types.ts index 83a5b00cad..2d093f02a1 100644 --- a/web/oss/src/lib/Types.ts +++ b/web/oss/src/lib/Types.ts @@ -650,6 +650,7 @@ export interface Evaluator { key: string settings_presets?: SettingsPreset[] settings_template: Record + outputs_schema?: Record icon_url?: string | StaticImageData color?: string direct_use?: boolean @@ -660,6 +661,76 @@ export interface Evaluator { archived?: boolean } +export interface SimpleEvaluatorData { + version?: string + uri?: string + url?: string + headers?: Record + schemas?: Record + script?: {content?: string; runtime?: string} + parameters?: Record + service?: Record + configuration?: Record +} + +export interface SimpleEvaluatorFlags { + is_custom?: boolean + is_evaluator?: boolean + is_human?: boolean + requires_llm_api_keys?: boolean + evaluator_key?: string + color?: string +} + +export interface SimpleEvaluator { + id: string + slug: string + name?: string + description?: string + tags?: string[] + meta?: Record + flags?: SimpleEvaluatorFlags + data?: SimpleEvaluatorData + created_at?: string + updated_at?: string + deleted_at?: string | null + created_by_id?: string + updated_by_id?: string + deleted_by_id?: string + color?: string + icon_url?: string | StaticImageData +} + +export interface SimpleEvaluatorCreate { + slug: string + name?: string + description?: string + tags?: string[] + meta?: Record + flags?: SimpleEvaluatorFlags + data?: SimpleEvaluatorData +} + +export interface SimpleEvaluatorEdit { + id: string + name?: string + description?: string + tags?: string[] + meta?: Record + flags?: SimpleEvaluatorFlags + data?: SimpleEvaluatorData +} + +export interface SimpleEvaluatorResponse { + count: number + evaluator: SimpleEvaluator | null +} + +export interface SimpleEvaluatorsResponse { + count: number + evaluators: SimpleEvaluator[] +} + export interface EvaluatorConfig { id: string evaluator_key: string diff --git a/web/oss/src/lib/atoms/evaluation.ts b/web/oss/src/lib/atoms/evaluation.ts index 50f34a8884..9abd5bac5f 100644 --- a/web/oss/src/lib/atoms/evaluation.ts +++ b/web/oss/src/lib/atoms/evaluation.ts @@ -1,7 +1,7 @@ import {atom} from "jotai" -import {Evaluator, EvaluatorConfig} from "../Types" +import {Evaluator, SimpleEvaluator} from "../Types" export const evaluatorsAtom = atom([]) -export const evaluatorConfigsAtom = atom([]) +export const evaluatorConfigsAtom = atom([]) diff --git a/web/oss/src/lib/evaluators/utils.ts b/web/oss/src/lib/evaluators/utils.ts new file mode 100644 index 0000000000..a2ecf7bac9 --- /dev/null +++ b/web/oss/src/lib/evaluators/utils.ts @@ -0,0 +1,135 @@ +import type {Evaluator, SimpleEvaluator, SimpleEvaluatorData} from "@/oss/lib/Types" + +const normalizeSlugBase = (value?: string | null) => + String(value ?? "") + .trim() + .toLowerCase() + .replace(/[^a-z0-9]+/g, "-") + .replace(/^-+|-+$/g, "") + +const trimVersionSuffix = (value: string) => value.replace(/-v\d+$/i, "") + +const OUTPUT_SCHEMA_DRAFT = "https://json-schema.org/draft/2020-12/schema" + +const isPlainObject = (value: unknown): value is Record => + typeof value === "object" && value !== null && !Array.isArray(value) + +const normalizeFieldNames = (value: unknown): string[] => { + if (!Array.isArray(value)) return [] + + return value.map((entry) => (typeof entry === "string" ? entry.trim() : "")).filter(Boolean) +} + +export const buildJsonMultiFieldMatchOutputsSchema = (fields: unknown): Record => { + const dynamicFields = normalizeFieldNames(fields) + const properties: Record = {aggregate_score: {type: "number"}} + + dynamicFields.forEach((field) => { + properties[field] = {type: "number"} + }) + + return { + $schema: OUTPUT_SCHEMA_DRAFT, + type: "object", + properties, + required: ["aggregate_score"], + additionalProperties: false, + } +} + +export const deriveEvaluatorOutputsSchema = ({ + evaluatorKey, + evaluatorTemplate, + parameters, +}: { + evaluatorKey?: string | null + evaluatorTemplate?: Partial | null + parameters?: Record | null +}): Record | undefined => { + const defaultOutputsSchema = evaluatorTemplate?.outputs_schema + + if (evaluatorKey === "auto_ai_critique") { + const jsonSchema = parameters?.json_schema + if (isPlainObject(jsonSchema) && isPlainObject(jsonSchema.schema)) { + return jsonSchema.schema + } + + return isPlainObject(defaultOutputsSchema) ? defaultOutputsSchema : undefined + } + + if (evaluatorKey === "json_multi_field_match") { + return buildJsonMultiFieldMatchOutputsSchema(parameters?.fields) + } + + return isPlainObject(defaultOutputsSchema) ? defaultOutputsSchema : undefined +} + +export const extractEvaluatorKeyFromUri = (uri?: string | null): string | undefined => { + if (!uri) return undefined + const trimmed = uri.trim() + if (!trimmed) return undefined + + const builtinMatch = trimmed.match(/^agenta:builtin:([^:]+)(:|$)/i) + if (builtinMatch?.[1]) { + return trimVersionSuffix(builtinMatch[1]) + } + + const parts = trimmed.split(":").filter(Boolean) + if (parts.length >= 3 && parts[2]) { + return trimVersionSuffix(parts[2]) + } + + const slashParts = trimmed.split("/").filter(Boolean) + const lastSegment = slashParts[slashParts.length - 1] + if (lastSegment) { + return trimVersionSuffix(lastSegment) + } + + return undefined +} + +export const resolveEvaluatorKey = ( + evaluator?: Partial | null, +): string | undefined => { + if (!evaluator) return undefined + + const candidate = + extractEvaluatorKeyFromUri(evaluator.data?.uri) || + (typeof (evaluator as any)?.evaluator_key === "string" + ? (evaluator as any).evaluator_key + : undefined) || + (typeof evaluator.meta?.evaluator_key === "string" + ? evaluator.meta.evaluator_key + : undefined) || + (typeof evaluator.flags?.evaluator_key === "string" + ? evaluator.flags.evaluator_key + : undefined) || + (typeof (evaluator as any)?.key === "string" ? (evaluator as any).key : undefined) + + return candidate ? String(candidate).trim() : undefined +} + +export const buildEvaluatorUri = (evaluatorKey: string, version = "v0") => + `agenta:builtin:${evaluatorKey}:${version}` + +export const buildEvaluatorSlug = (name?: string | null) => { + const base = normalizeSlugBase(name) || "evaluator" + const suffix = Math.random().toString(36).slice(2, 8) + const maxBaseLength = Math.max(1, 50 - suffix.length - 1) + const trimmedBase = base.slice(0, maxBaseLength) + return `${trimmedBase}-${suffix}` +} + +export const mergeEvaluatorData = ( + base?: SimpleEvaluatorData | null, + updates?: Partial | null, +): SimpleEvaluatorData | undefined => { + if (!base && !updates) return undefined + return { + ...(base ?? {}), + ...(updates ?? {}), + } +} + +export const getEvaluatorParameters = (evaluator?: Partial | null) => + (evaluator?.data?.parameters as Record) || {} diff --git a/web/oss/src/lib/hooks/useEvaluatorConfigs/index.ts b/web/oss/src/lib/hooks/useEvaluatorConfigs/index.ts index 3765eb6677..998f65459e 100644 --- a/web/oss/src/lib/hooks/useEvaluatorConfigs/index.ts +++ b/web/oss/src/lib/hooks/useEvaluatorConfigs/index.ts @@ -6,11 +6,11 @@ import {SWRConfiguration} from "swr" import {useAppId} from "@/oss/hooks/useAppId" import {evaluatorConfigsQueryAtomFamily} from "@/oss/state/evaluators" -import {EvaluatorConfig} from "../../Types" +import {SimpleEvaluator} from "../../Types" type EvaluatorConfigResult = Preview extends true ? undefined - : EvaluatorConfig[] + : SimpleEvaluator[] type EvaluatorConfigsOptions = { preview?: Preview diff --git a/web/oss/src/services/evaluations/api/index.ts b/web/oss/src/services/evaluations/api/index.ts index a60159eddd..46ce6c431a 100644 --- a/web/oss/src/services/evaluations/api/index.ts +++ b/web/oss/src/services/evaluations/api/index.ts @@ -17,7 +17,7 @@ export { createEvaluatorConfig, updateEvaluatorConfig, deleteEvaluatorConfig, - type CreateEvaluationConfigData, + type CreateEvaluatorConfigData, } from "@/oss/services/evaluators" //Prefix convention: diff --git a/web/oss/src/services/evaluations/api_ee/index.ts b/web/oss/src/services/evaluations/api_ee/index.ts index 43ed9fb334..4eeb964323 100644 --- a/web/oss/src/services/evaluations/api_ee/index.ts +++ b/web/oss/src/services/evaluations/api_ee/index.ts @@ -1,9 +1,4 @@ -import { - EvaluatorInputInterface, - EvaluatorMappingInput, - EvaluatorMappingOutput, - EvaluatorOutputInterface, -} from "@agenta/oss/src/lib/types_ee" +import {EvaluatorInputInterface, EvaluatorOutputInterface} from "@agenta/oss/src/lib/types_ee" import axios from "@/oss/lib/api/assets/axiosConfig" import {getAgentaApiUrl} from "@/oss/lib/helpers/api" @@ -16,18 +11,6 @@ import {getProjectValues} from "@/oss/state/project" // - update: PUT data to server // - delete: DELETE data from server -export const createEvaluatorDataMapping = async ( - config: EvaluatorMappingInput, -): Promise => { - const {projectId} = getProjectValues() - - const response = await axios.post( - `${getAgentaApiUrl()}/evaluators/map?project_id=${projectId}`, - {...config}, - ) - return response.data -} - export interface EvaluatorRunOptions { signal?: AbortSignal timeout?: number diff --git a/web/oss/src/services/evaluators/index.ts b/web/oss/src/services/evaluators/index.ts index 2a9bb15de7..c6b8666041 100644 --- a/web/oss/src/services/evaluators/index.ts +++ b/web/oss/src/services/evaluators/index.ts @@ -1,9 +1,22 @@ import axios from "@/oss/lib/api/assets/axiosConfig" +import { + buildEvaluatorSlug, + buildEvaluatorUri, + resolveEvaluatorKey, +} from "@/oss/lib/evaluators/utils" import {getAgentaApiUrl} from "@/oss/lib/helpers/api" import {getTagColors} from "@/oss/lib/helpers/colors" import {isDemo, stringToNumberInRange} from "@/oss/lib/helpers/utils" import {EvaluatorResponseDto} from "@/oss/lib/hooks/useEvaluators/types" -import {Evaluator, EvaluatorConfig} from "@/oss/lib/Types" +import { + Evaluator, + SimpleEvaluator, + SimpleEvaluatorCreate, + SimpleEvaluatorData, + SimpleEvaluatorEdit, + SimpleEvaluatorResponse, + SimpleEvaluatorsResponse, +} from "@/oss/lib/Types" import aiImg from "@/oss/media/artificial-intelligence.png" import bracketCurlyImg from "@/oss/media/bracket-curly.png" import codeImg from "@/oss/media/browser.png" @@ -48,7 +61,7 @@ export const updateEvaluator = async ( } } -export const fetchEvaluatorById = async (evaluatorId: string) => { +export const fetchEvaluatorById = async (evaluatorId: string): Promise => { const {projectId} = getProjectValues() if (!projectId) { return null @@ -59,7 +72,7 @@ export const fetchEvaluatorById = async (evaluatorId: string) => { ) const payload = (response?.data as any)?.evaluator ?? response?.data ?? null if (!payload) return null - return payload as EvaluatorResponseDto<"response">["evaluator"] + return decorateSimpleEvaluator(payload as SimpleEvaluator) } const evaluatorIconsMap = { @@ -103,58 +116,118 @@ export const fetchAllEvaluators = async (includeArchived = false) => { } // Evaluator Configs +function decorateSimpleEvaluator(evaluator: SimpleEvaluator) { + const tagColors = getTagColors() + const evaluatorKey = resolveEvaluatorKey(evaluator) + if (!evaluatorKey) return evaluator + + return { + ...evaluator, + icon_url: evaluatorIconsMap[evaluatorKey as keyof typeof evaluatorIconsMap], + color: tagColors[stringToNumberInRange(evaluatorKey, 0, tagColors.length - 1)], + } +} + export const fetchAllEvaluatorConfigs = async ( appId?: string | null, projectIdOverride?: string | null, -) => { - const tagColors = getTagColors() +): Promise => { const {projectId: projectIdFromStore} = getProjectValues() const projectId = projectIdOverride ?? projectIdFromStore + void appId if (!projectId) { - return [] as EvaluatorConfig[] + return [] as SimpleEvaluator[] } - const response = await axios.get("/evaluators/configs", { - params: { - project_id: projectId, - ...(appId ? {app_id: appId} : {}), + const response = await axios.post( + `${getAgentaApiUrl()}/preview/simple/evaluators/query?project_id=${projectId}`, + { + include_archived: false, }, - }) - const evaluatorConfigs = (response.data || []).map((item: EvaluatorConfig) => ({ - ...item, - icon_url: evaluatorIconsMap[item.evaluator_key as keyof typeof evaluatorIconsMap], - color: tagColors[stringToNumberInRange(item.evaluator_key, 0, tagColors.length - 1)], - })) as EvaluatorConfig[] - return evaluatorConfigs + ) + + const evaluators = response.data?.evaluators ?? [] + return evaluators + .filter((item) => !item.deleted_at) + .filter((item) => item.flags?.is_human !== true) + .map(decorateSimpleEvaluator) +} + +export interface CreateEvaluatorConfigData { + name: string + evaluator_key: string + parameters: Record + outputs_schema?: Record + tags?: string[] + description?: string } -export type CreateEvaluationConfigData = Omit export const createEvaluatorConfig = async ( _appId: string | null | undefined, - config: CreateEvaluationConfigData, -) => { + config: CreateEvaluatorConfigData, +): Promise => { const {projectId} = getProjectValues() void _appId - return axios.post(`/evaluators/configs?project_id=${projectId}`, { - ...config, - }) + const data: SimpleEvaluatorData = { + uri: buildEvaluatorUri(config.evaluator_key), + parameters: config.parameters, + } + + if (config.outputs_schema) { + data.schemas = { + outputs: config.outputs_schema, + } + } + + const payload: SimpleEvaluatorCreate = { + slug: buildEvaluatorSlug(config.name), + name: config.name, + description: config.description, + tags: config.tags, + flags: {is_evaluator: true, is_human: false}, + data, + } + + const response = await axios.post( + `${getAgentaApiUrl()}/preview/simple/evaluators/?project_id=${projectId}`, + {evaluator: payload}, + ) + + const evaluator = response.data?.evaluator + if (!evaluator) { + throw new Error("Failed to create evaluator") + } + + return decorateSimpleEvaluator(evaluator) } export const updateEvaluatorConfig = async ( configId: string, - config: Partial, -) => { + config: SimpleEvaluatorEdit, +): Promise => { const {projectId} = getProjectValues() - return axios.put(`/evaluators/configs/${configId}?project_id=${projectId}`, config) + const response = await axios.put( + `${getAgentaApiUrl()}/preview/simple/evaluators/${configId}?project_id=${projectId}`, + {evaluator: {...config, id: configId}}, + ) + + const evaluator = response.data?.evaluator + if (!evaluator) { + throw new Error("Failed to update evaluator") + } + + return decorateSimpleEvaluator(evaluator) } export const deleteEvaluatorConfig = async (configId: string) => { const {projectId} = getProjectValues() - return axios.delete(`/evaluators/configs/${configId}?project_id=${projectId}`) + return axios.post( + `${getAgentaApiUrl()}/preview/simple/evaluators/${configId}/archive?project_id=${projectId}`, + ) } export const deleteHumanEvaluator = async (evaluatorId: string) => { diff --git a/web/oss/src/services/workflows/invoke.ts b/web/oss/src/services/workflows/invoke.ts new file mode 100644 index 0000000000..9d4091816f --- /dev/null +++ b/web/oss/src/services/workflows/invoke.ts @@ -0,0 +1,101 @@ +import axios from "@/oss/lib/api/assets/axiosConfig" +import {buildEvaluatorUri, resolveEvaluatorKey} from "@/oss/lib/evaluators/utils" +import {getAgentaApiUrl} from "@/oss/lib/helpers/api" +import type {SimpleEvaluator} from "@/oss/lib/Types" +import {getProjectValues} from "@/oss/state/project" + +export interface WorkflowServiceStatus { + code?: number + message?: string + type?: string + stacktrace?: string[] | string +} + +export interface WorkflowServiceBatchResponse { + version?: string + trace_id?: string + span_id?: string + status?: WorkflowServiceStatus + data?: { + outputs?: any + } +} + +export interface InvokeEvaluatorOptions { + signal?: AbortSignal + timeout?: number +} + +export interface InvokeEvaluatorParams { + uri?: string + url?: string + evaluator?: Partial | null + inputs?: Record + outputs?: any + parameters?: Record + options?: InvokeEvaluatorOptions +} + +const DEFAULT_EVALUATOR_TIMEOUT = 120_000 + +export const invokeEvaluator = async ({ + uri, + url, + evaluator, + inputs, + outputs, + parameters, + options, +}: InvokeEvaluatorParams): Promise => { + const {projectId} = getProjectValues() + const evaluatorKey = resolveEvaluatorKey(evaluator) + const explicitUri = typeof uri === "string" ? uri.trim() : "" + const explicitUrl = typeof url === "string" ? url.trim() : "" + const evaluatorUri = + explicitUri || + evaluator?.data?.uri || + (evaluatorKey ? buildEvaluatorUri(evaluatorKey) : undefined) + const evaluatorUrl = explicitUrl || evaluator?.data?.url + + if (!evaluatorUri && !evaluatorUrl) { + throw new Error("Evaluator interface is missing (uri/url)") + } + + const request: Record = { + interface: evaluatorUri ? {uri: evaluatorUri} : {url: evaluatorUrl}, + configuration: parameters ? {parameters} : undefined, + data: { + inputs, + outputs, + parameters, + }, + } + + const timeout = options?.timeout ?? DEFAULT_EVALUATOR_TIMEOUT + + const response = await axios.post( + `${getAgentaApiUrl()}/preview/workflows/invoke?project_id=${projectId}`, + request, + { + signal: options?.signal, + timeout, + }, + ) + + return response.data +} + +export const mapWorkflowResponseToEvaluatorOutput = ( + response: WorkflowServiceBatchResponse, +): {outputs: Record} => { + const statusType = response.status?.type?.toLowerCase() + const hasErrorType = + statusType === "error" || statusType === "failure" || statusType === "failed" + if ((response.status?.code && response.status.code >= 400) || hasErrorType) { + throw new Error(response.status.message || "Evaluator execution failed") + } + + return { + outputs: response.data?.outputs ?? {}, + } +} diff --git a/web/oss/src/state/evaluators/atoms.ts b/web/oss/src/state/evaluators/atoms.ts index 24f390e884..5c6b34c9e2 100644 --- a/web/oss/src/state/evaluators/atoms.ts +++ b/web/oss/src/state/evaluators/atoms.ts @@ -5,6 +5,7 @@ import {atomWithQuery} from "jotai-tanstack-query" import {getMetricsFromEvaluator} from "@/oss/components/SharedDrawers/AnnotateDrawer/assets/transforms" import axios from "@/oss/lib/api/assets/axiosConfig" import {evaluatorsAtom} from "@/oss/lib/atoms/evaluation" +import {extractEvaluatorKeyFromUri} from "@/oss/lib/evaluators/utils" import {transformApiData} from "@/oss/lib/hooks/useAnnotations/assets/transformer" import { EvaluatorDto, @@ -13,7 +14,7 @@ import { EvaluatorRevisionsResponseDto, EvaluatorsResponseDto, } from "@/oss/lib/hooks/useEvaluators/types" -import {Evaluator, EvaluatorConfig} from "@/oss/lib/Types" +import {Evaluator, SimpleEvaluator} from "@/oss/lib/Types" import {fetchAllEvaluatorConfigs, fetchAllEvaluators} from "@/oss/services/evaluators" import {selectedAppIdAtom} from "@/oss/state/app" import {selectedOrgAtom} from "@/oss/state/org" @@ -26,16 +27,15 @@ import {EvaluatorConfigsParams, EvaluatorsParams} from "./types" const extractKeyFromUri = (uri: unknown): string | undefined => { if (typeof uri !== "string") return undefined - const match = uri.match(/[:/](auto_[a-z0-9_]+)/i) - if (match?.[1]) return match[1] - const parts = uri.split(":").filter(Boolean) - if (parts.length) { - const candidate = parts[parts.length - 1] - if (candidate) { - return candidate.replace(/-v\d+$/i, "") - } - } - return undefined + return ( + extractEvaluatorKeyFromUri(uri) || + uri.match(/[:/](auto_[a-z0-9_]+)/i)?.[1] || + uri + .split(":") + .filter(Boolean) + .slice(-1)[0] + ?.replace(/-v\d+$/i, "") + ) } const isPlainObject = (value: unknown): value is Record => { @@ -102,7 +102,7 @@ const extractRequiresLlmApiKeys = (source: unknown): boolean | undefined => { export const evaluatorConfigsQueryAtomFamily = atomFamily( ({projectId: overrideProjectId, appId: overrideAppId, preview}: EvaluatorConfigsParams = {}) => - atomWithQuery((get) => { + atomWithQuery((get) => { const projectId = overrideProjectId || get(projectIdAtom) const appId = overrideAppId || get(selectedAppIdAtom) const user = get(userAtom) as {id?: string} | null