diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_api_version_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_api_version_test.go index ae4c59b7335..ce94430c557 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_api_version_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_api_version_test.go @@ -170,7 +170,7 @@ func TestUpdateDataset_APIVersion(t *testing.T) { configPath := filepath.Join(dir, "eval.yaml") cfg := &evalConfig{ Config: opt_eval.Config{ - DatasetReference: &evalDatasetRef{ + Dataset: &evalDatasetRef{ Name: "test-ds", Version: "v1", LocalURI: dataDir, diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_generate.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_generate.go index 10c7c14b3b4..509fe245f9d 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_generate.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_generate.go @@ -300,7 +300,7 @@ func submitEvalJobs( needEvalGen = flags.regenerateEvaluator if !needDatasetGen { evalCfg.DatasetFile = existingCfg.DatasetFile - evalCfg.Config.DatasetReference = existingCfg.Config.DatasetReference + evalCfg.Config.Dataset = existingCfg.Config.Dataset } if !needEvalGen { evalCfg.Evaluators = existingCfg.Evaluators @@ -313,7 +313,9 @@ func submitEvalJobs( if err != nil { return nil, err } - evalCfg.DatasetFile = datasetPath + evalCfg.Dataset = &opt_eval.DatasetRef{ + LocalURI: datasetPath, + } } } @@ -363,16 +365,16 @@ func writeAndPrintEvalResult( fmt.Println(color.GreenString("\nEval suite created")) } fmt.Printf(" Config: %s\n", configPath) - if evalCfg.DatasetFile != "" { - fmt.Printf(" Dataset: %s\n", evalCfg.DatasetFile) - } else if evalCfg.DatasetReference != nil && evalCfg.DatasetReference.Name != "" { - ds := evalCfg.DatasetReference.Name - if evalCfg.DatasetReference.Version != "" { - ds += " (" + evalCfg.DatasetReference.Version + ")" + if localPath := evalCfg.LocalDatasetPath(); localPath != "" { + fmt.Printf(" Dataset: %s\n", localPath) + } else if ref := evalCfg.RemoteDatasetReference(); ref != nil { + ds := ref.Name + if ref.Version != "" { + ds += " (" + ref.Version + ")" } fmt.Printf(" Dataset: %s\n", ds) if resolved.hasProject { - fmt.Printf(" %s\n", eval_api.DatasetArtifactPath(resolved.agentProject, evalCfg.DatasetReference)) + fmt.Printf(" %s\n", eval_api.DatasetArtifactPath(resolved.agentProject, ref)) } } for _, evaluator := range evalCfg.Evaluators { @@ -418,9 +420,9 @@ func printEvalPortalLinks(ctx context.Context, resolved *evalResolvedContext, ev return } hasLink := false - if evalCfg.DatasetReference != nil && evalCfg.DatasetReference.Name != "" { + if ref := evalCfg.RemoteDatasetReference(); ref != nil { fmt.Printf("\n "+color.HiBlackString("Portal:")+"\n Dataset: %s\n", - color.CyanString(prefix.DatasetURL(evalCfg.DatasetReference.Name, evalCfg.DatasetReference.Version))) + color.CyanString(prefix.DatasetURL(ref.Name, ref.Version))) hasLink = true } for _, evaluator := range evalCfg.Evaluators { diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_generate_jobs.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_generate_jobs.go index 8fe8ca243f9..759375ed866 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_generate_jobs.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_generate_jobs.go @@ -279,7 +279,7 @@ func pollAndFinalizeJobs( if dsRef == nil { return } - evalCfg.DatasetReference = dsRef + evalCfg.Dataset = dsRef if resolved.hasProject { localURI, err := eval_api.DownloadDatasetArtifact( diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_generate_prompts.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_generate_prompts.go index 5f924891639..d70ed6432d3 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_generate_prompts.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_generate_prompts.go @@ -20,7 +20,7 @@ import ( // promptEvalGenerateOptions runs interactive prompts for eval generate options that // were not provided via flags: name, instruction, trace days, eval model, -// and max samples. +// and max samples. Max samples is skipped when an existing dataset is supplied. func promptEvalGenerateOptions(ctx context.Context, resolved *evalResolvedContext, flags *evalGenerateFlags, noPrompt bool) error { azdClient := resolved.azdClient if noPrompt { @@ -158,7 +158,9 @@ func promptEvalGenerateOptions(ctx context.Context, resolved *evalResolvedContex flags.evalModel = selected } - if !flags.maxSamplesSet { + // Max samples only applies when generating a dataset. If the user supplied + // an existing dataset (--dataset), there is nothing to generate, so skip it. + if !flags.maxSamplesSet && flags.dataset == "" { resp, err := azdClient.Prompt().Prompt(ctx, &azdext.PromptRequest{ Options: &azdext.PromptOptions{ Message: "Max samples (between 15 and 1000)", @@ -193,8 +195,8 @@ func promptRegenerateChoices( // Ask about dataset. datasetLabel := existingCfg.DatasetFile - if datasetLabel == "" && existingCfg.DatasetReference != nil { - datasetLabel = existingCfg.DatasetReference.Name + if datasetLabel == "" && existingCfg.Dataset != nil { + datasetLabel = existingCfg.Dataset.Name } if datasetLabel != "" { resp, err := prompt.Confirm(ctx, &azdext.ConfirmRequest{ diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_generate_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_generate_test.go index 46f13b087a1..d5ba73a5dc4 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_generate_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_generate_test.go @@ -408,8 +408,8 @@ func TestBuildOpenAIEvalRequest(t *testing.T) { Name: "agent-1", Version: "v1", }, - DatasetReference: &evalDatasetRef{Name: "ds", Version: "v1"}, - Evaluators: opt_eval.EvaluatorList{{Name: "builtin.quality"}}, + Dataset: &evalDatasetRef{Name: "ds", Version: "v1"}, + Evaluators: opt_eval.EvaluatorList{{Name: "builtin.quality"}}, }, Options: &opt_eval.Options{EvalModel: "gpt-4o"}, } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go index 09ca5622fe1..b6def21f4cb 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_helpers.go @@ -467,7 +467,7 @@ func promptDatasetSelection( value := strings.TrimSpace(resp.Value) if value == "" { return "", nil, fmt.Errorf( - "a dataset is required: use --dataset , or provide dataset_file / dataset_reference " + + "a dataset is required: use --dataset , or provide a dataset " + "in your config, or run 'azd ai agent eval generate' to generate one") } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go index bbadd25cb66..ebfc761214a 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go @@ -133,21 +133,21 @@ func runEvalRun(ctx context.Context, flags *evalRunFlags, noPrompt bool) error { ) // Set source from local dataset file or remote dataset reference. - if evalCfg.DatasetFile != "" { + if localPath := evalCfg.LocalDatasetPath(); localPath != "" { // Resolve relative paths against the agent project directory so - // eval.yaml files with project-relative dataset_file entries work + // eval.yaml files with project-relative local dataset entries work // regardless of the caller's working directory. - datasetPath := eval_api.ResolveRelPath(evalCfg.DatasetFile, resolved.agentProject) + datasetPath := eval_api.ResolveRelPath(localPath, resolved.agentProject) items, err := loadJSONLFile[map[string]any](datasetPath) if err != nil { return err } dataSource.SetFileContent(items) - } else if evalCfg.DatasetReference != nil { - fileID := buildDatasetFileID(resolved.projectEndpoint, evalCfg.DatasetReference) + } else if ref := evalCfg.RemoteDatasetReference(); ref != nil { + fileID := buildDatasetFileID(resolved.projectEndpoint, ref) dataSource.SetFileID(fileID) } else { - return fmt.Errorf("no dataset configured; run 'azd ai agent eval generate' or specify dataset_file / dataset_reference in the eval config") + return fmt.Errorf("no dataset configured; run 'azd ai agent eval generate' or specify a dataset in the eval config") } runReq.DataSource = dataSource diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go index 40e970368ab..6ec623453d5 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go @@ -208,7 +208,7 @@ func TestWriteEvalReviewArtifacts(t *testing.T) { dir := t.TempDir() cfg := &evalConfig{} - cfg.DatasetReference = &evalDatasetRef{Name: "test-data", Version: "v1"} + cfg.Dataset = &evalDatasetRef{Name: "test-data", Version: "v1"} cfg.Evaluators = opt_eval.EvaluatorList{{Name: "quality"}} err := eval_api.WriteEvalReviewArtifacts(dir, cfg) @@ -420,8 +420,8 @@ func TestEvalConfigRoundTrip(t *testing.T) { Kind: agent_yaml.AgentKindHosted, Version: "v1", }, - DatasetReference: &evalDatasetRef{Name: "ds", Version: "v1"}, - Evaluators: opt_eval.EvaluatorList{{Name: "builtin.task_adherence"}}, + Dataset: &evalDatasetRef{Name: "ds", Version: "v1"}, + Evaluators: opt_eval.EvaluatorList{{Name: "builtin.task_adherence"}}, }, Options: &opt_eval.Options{ EvalModel: "gpt-4o", @@ -441,8 +441,8 @@ func TestEvalConfigRoundTrip(t *testing.T) { assert.Equal(t, original.Agent.Version, loaded.Agent.Version) assert.Equal(t, "gpt-4o", loaded.Options.EvalModel) assert.Equal(t, original.MaxSamples, loaded.MaxSamples) - require.NotNil(t, loaded.DatasetReference) - assert.Equal(t, "ds", loaded.DatasetReference.Name) + require.NotNil(t, loaded.Dataset) + assert.Equal(t, "ds", loaded.Dataset.Name) require.Len(t, loaded.Evaluators, 1) assert.Equal(t, "builtin.task_adherence", loaded.Evaluators[0].Name) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_update.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_update.go index c9596d79906..dd9f9062886 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_update.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/eval_update.go @@ -71,9 +71,9 @@ func runEvalUpdate(ctx context.Context, flags *evalUpdateFlags, noPrompt bool) e } // Detect what has local changes. - hasDataset := evalCfg.DatasetReference != nil && - evalCfg.DatasetReference.Name != "" && - evalCfg.DatasetReference.LocalURI != "" + hasDataset := evalCfg.Dataset != nil && + evalCfg.Dataset.Name != "" && + evalCfg.Dataset.LocalURI != "" hasEvaluators := len(evalCfg.Evaluators.FindByLocalURI()) > 0 // Determine what to update based on flags and interactive prompts. @@ -85,7 +85,7 @@ func runEvalUpdate(ctx context.Context, flags *evalUpdateFlags, noPrompt bool) e if hasDataset { updateDS = confirmUpdate(ctx, resolved, fmt.Sprintf( "Dataset %s has local changes. Upload new version?", - evalCfg.DatasetReference.Name, + evalCfg.Dataset.Name, )) } if hasEvaluators { @@ -145,7 +145,7 @@ func updateDataset( evalCfg *evalConfig, configPath string, ) (int, error) { - ref := evalCfg.DatasetReference + ref := evalCfg.Dataset if ref == nil || ref.Name == "" || ref.LocalURI == "" { return 0, nil } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go index befb47fb85f..2ad2d18680e 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize.go @@ -34,6 +34,7 @@ type optimizeAgentContext struct { agentName string // deployed agent name agentVersion string // deployed agent version (empty = latest) agentProject string // agent project directory (empty if not in an azd project) + serviceName string // azd service name (env key prefix source); empty for standalone --agent } // resolveOptimizeAgent resolves the agent name and project directory. @@ -76,6 +77,7 @@ func resolveOptimizeAgent(ctx context.Context, flagValue, envName string, noProm agentName: v.Value, agentVersion: version, agentProject: agentProject, + serviceName: svc.Name, }, nil } } @@ -87,14 +89,15 @@ func resolveOptimizeAgent(ctx context.Context, flagValue, envName string, noProm // optimizeFlags holds CLI flags for the optimize (submit) command. type optimizeFlags struct { - configFile string // path to YAML config file - agent string // agent name override - dataset string // existing dataset file or registered dataset name - evalModel string // model for evaluation - optimizationModel string // model for optimization reasoning (gpt-5 family) - maxIterations int // max optimization iterations per strategy - noWait bool // return immediately after submission - pollInterval int // polling interval in seconds + configFile string // path to YAML config file + agent string // agent name override + dataset string // existing dataset file or registered dataset name + evalModel string // model for evaluation + optimizationModel string // model for optimization reasoning (gpt-5 family) + evaluators []string // built-in or custom evaluator names + maxCandidates int // max optimization candidates to generate + noWait bool // return immediately after submission + pollInterval int // polling interval in seconds optimizeConnectionFlags } @@ -147,8 +150,10 @@ Use --config for a custom YAML spec, or just provide the agent name to use sensi cmd.Flags().StringVarP(&flags.dataset, "dataset", "d", "", "Existing local file or registered dataset name") cmd.Flags().StringVarP(&flags.evalModel, "eval-model", "m", "", "Model for evaluation (required)") cmd.Flags().StringVar(&flags.optimizationModel, "optimize-model", "", - "Model for optimization reasoning (gpt-5 family recommended; falls back to eval model when not set)") - cmd.Flags().IntVar(&flags.maxIterations, "max-iterations", 0, "Maximum number of optimization iterations (must be >= 1; default: 5)") + "Model for optimization reasoning (gpt-5 family recommended; required)") + cmd.Flags().StringArrayVar(&flags.evaluators, "evaluator", nil, + "Built-in or custom evaluator name (repeatable; required when not set in config)") + cmd.Flags().IntVar(&flags.maxCandidates, "max-candidates", 0, "Maximum number of optimization candidates to generate (must be >= 1; default: 5)") cmd.Flags().BoolVar(&flags.noWait, "no-wait", false, "Submit job and return immediately without waiting for completion") cmd.Flags().IntVar(&flags.pollInterval, "poll-interval", 5, "Polling interval in seconds") flags.optimizeConnectionFlags.register(cmd) @@ -164,9 +169,10 @@ Use --config for a custom YAML spec, or just provide the agent name to use sensi // OptimizeAction implements the optimize (submit job) command. type OptimizeAction struct { - flags *optimizeFlags - envName string - noPrompt bool + flags *optimizeFlags + envName string + noPrompt bool + serviceName string // azd service name for per-agent env key derivation } // Run executes the optimize command: resolves the agent, loads/builds the config, applies overrides, submits the job, and optionally polls for results. @@ -232,6 +238,7 @@ func (a *OptimizeAction) resolveConfig( resolved, resolveErr := resolveOptimizeAgent(ctx, a.flags.agent, a.envName, a.noPrompt) if resolveErr == nil { agentProject = resolved.agentProject + a.serviceName = resolved.serviceName reconcileConfigAgent(os.Stderr, &cfg.Agent, resolved.agentName, resolved.agentVersion, a.flags.configFile) } @@ -243,6 +250,7 @@ func (a *OptimizeAction) resolveConfig( return nil, "", "", err } agentProject = resolved.agentProject + a.serviceName = resolved.serviceName // Check if eval.yaml exists in the agent project and offer to use it. // In --no-prompt mode, use it automatically. @@ -292,7 +300,7 @@ func (a *OptimizeAction) applyOverrides( // Apply --dataset flag before anything else. if a.flags.dataset != "" { if eval_api.IsDatasetName(a.flags.dataset) { - cfg.DatasetReference = &opt_eval.DatasetRef{Name: a.flags.dataset} + cfg.Dataset = &opt_eval.DatasetRef{Name: a.flags.dataset} cfg.DatasetFile = "" } else { resolved, err := resolveLocalDatasetFile(resolveCwdRelative(a.flags.dataset), agentProject) @@ -300,7 +308,7 @@ func (a *OptimizeAction) applyOverrides( return err } cfg.DatasetFile = resolved - cfg.DatasetReference = nil + cfg.Dataset = nil } } @@ -318,8 +326,13 @@ func (a *OptimizeAction) applyOverrides( if a.flags.optimizationModel != "" { cfg.Options.OptimizationModel = a.flags.optimizationModel } - if a.flags.maxIterations > 0 { - cfg.Options.MaxIterations = &a.flags.maxIterations + if len(a.flags.evaluators) > 0 { + // Append flag evaluators to any already in the config (deduped by name) + // so --evaluator adds to config evaluators instead of replacing them. + cfg.Evaluators = mergeEvaluators(cfg.Evaluators, evaluatorsFromFlags(a.flags.evaluators)) + } + if a.flags.maxCandidates > 0 { + cfg.Options.MaxCandidates = &a.flags.maxCandidates } // Resolve agent config: try existing config pointer, then default baseline. @@ -381,7 +394,7 @@ func (a *OptimizeAction) applyOverrides( } // Resolve dataset: prompt user if neither file nor reference is set. - if cfg.DatasetFile == "" && cfg.DatasetReference == nil { + if cfg.DatasetFile == "" && cfg.Dataset == nil { if err := resolveOptimizeDataset(ctx, azdClient, cfg, agentProject, a.noPrompt); err != nil { return err } @@ -501,7 +514,7 @@ func (a *OptimizeAction) submitJob( printOptimizePortalLink(ctx, out, cfg.Agent.Name, resp.OperationID, a.envName) fmt.Fprintln(out) - saveLastOptimizeJobID(ctx, resp.OperationID, a.envName) + saveLastOptimizeJobID(ctx, optimizeEnvKeyName(a.serviceName, cfg.Agent.Name), resp.OperationID, a.envName) return resp, client, nil } @@ -530,14 +543,9 @@ func pollOptimizeJob( progress := fmt.Sprintf("\r %s %s", spin, status.Status) if status.Progress != nil { p := status.Progress - if p.CurrentTargetAttribute != "" { - progress += fmt.Sprintf(" · strategy: %s", p.CurrentTargetAttribute) - } - if p.CurrentIteration > 0 { - progress += fmt.Sprintf(" · iteration %d", p.CurrentIteration) - } + progress += fmt.Sprintf(" · candidates completed: %d", p.CandidatesCompleted) if p.BestScore > 0 { - progress += fmt.Sprintf(" · score: %.2f", p.BestScore) + progress += fmt.Sprintf(" · best score: %.2f", p.BestScore) } } progress += fmt.Sprintf(" · %s", elapsed) @@ -560,7 +568,7 @@ func printOptimizeResults(ctx context.Context, out io.Writer, status *optimize_a fmt.Fprintf(out, "\n %s %s\n", color.RedString("Error:"), status.Error.Message) } - if len(status.Candidates) == 0 { + if len(status.Candidates()) == 0 { return } @@ -569,38 +577,59 @@ func printOptimizeResults(ctx context.Context, out io.Writer, status *optimize_a _, _ = bold.Fprintln(out, "\nResults:") // Resolve eval portal prefix once for building hyperlinks in the table. - evalURLs := buildCandidateEvalURLs(ctx, status.Candidates, envName) + candidates := status.Candidates() + evalURLs := buildCandidateEvalURLs(ctx, candidates, envName) hasEvalLinks := len(evalURLs) > 0 - header := fmt.Sprintf(" %-20s %7s %7s", "Candidate", "Score", "Pass") - sep := fmt.Sprintf(" %-20s %7s %7s", + best := status.BestCandidate() + bestName := "" + if best != nil { + bestName = best.Name + } + + // Show the Strategy column only when at least one candidate reports the + // mutated agent attributes. It is placed last so it can grow freely. + hasStrategy := false + for _, c := range candidates { + if len(c.Mutations) > 0 { + hasStrategy = true + break + } + } + + header := fmt.Sprintf(" %-20s %7s", "Candidate", "Score") + sep := fmt.Sprintf(" %-20s %7s", strings.Repeat("─", 20), - strings.Repeat("─", 7), strings.Repeat("─", 7)) + strings.Repeat("─", 7)) if hasEvalLinks { header += " Eval" - sep += " " + strings.Repeat("─", 6) + sep += " " + strings.Repeat("─", 4) + } + if hasStrategy { + header += " Strategy" + sep += " " + strings.Repeat("─", 8) } fmt.Fprintln(out, header) fmt.Fprintln(out, sep) - bestName := "" - if status.Best != nil { - bestName = status.Best.Name - } - - for _, c := range status.Candidates { + for _, c := range candidates { isBest := c.Name == bestName name := c.Name if isBest { name += " ★" } - line := fmt.Sprintf(" %-20s %7.2f %6.0f%%", name, c.AvgScore, c.PassRate*100) + line := fmt.Sprintf(" %-20s %7.2f", name, c.AvgScore) if hasEvalLinks { if url, ok := evalURLs[c.Name]; ok { line += " " + terminalHyperlink(url, "View") } } + if hasStrategy { + // MutationKeys returns the keys in stable (sorted) order. + strategy := strings.Join(c.MutationKeys(), ", ") + line += " " + strategy + } if isBest { _, _ = green.Fprintln(out, line) } else { @@ -610,7 +639,7 @@ func printOptimizeResults(ctx context.Context, out io.Writer, status *optimize_a // Print candidate IDs for deploy hasIDs := false - for _, c := range status.Candidates { + for _, c := range candidates { if c.CandidateID != "" { if !hasIDs { fmt.Fprintf(out, "\n Candidate IDs:\n") @@ -625,19 +654,16 @@ func printOptimizeResults(ctx context.Context, out io.Writer, status *optimize_a } // Print next-step commands for best candidate - if status.Best != nil && status.Best.CandidateID != "" { - agentName := "" - if status.Agent != nil { - agentName = status.Agent.AgentName - } + if best != nil && best.CandidateID != "" { + agentName := status.AgentName() if hasProject { fmt.Fprintf(out, "\n Apply the best candidate locally, then deploy:\n") - fmt.Fprintf(out, " azd ai agent optimize apply --candidate %s\n", status.Best.CandidateID) + fmt.Fprintf(out, " azd ai agent optimize apply --candidate %s\n", best.CandidateID) fmt.Fprintf(out, " azd deploy\n") } else { fmt.Fprintf(out, "\n Deploy the best candidate:\n") fmt.Fprintf(out, " azd ai agent optimize deploy --candidate %s --agent %s\n", - status.Best.CandidateID, agentName) + best.CandidateID, agentName) } } fmt.Fprintln(out) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go index 95009c9010b..312d5cdf7b6 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply.go @@ -128,9 +128,16 @@ func (a *OptimizeApplyAction) apply( } optClient := optimize_api.NewOptimizeClient(projectEndpoint, credential) + // Resolve the optimization job ID — candidate endpoints are nested under it. + jobID := loadOptimizeJobIDForAgent(ctx, svc.Name, a.envName) + if jobID == "" { + return fmt.Errorf( + "no optimization job found in the environment; run 'azd ai agent optimize' first") + } + // Step 1: Fetch candidate config from the optimization service. fmt.Fprintf(out, " Fetching candidate config...\n") - candidateConfig, err := optClient.GetCandidateConfig(ctx, a.flags.candidate) + candidateConfig, err := optClient.GetCandidateConfig(ctx, jobID, a.flags.candidate) if err != nil { return fmt.Errorf("failed to fetch candidate config: %w", err) } @@ -141,7 +148,7 @@ func (a *OptimizeApplyAction) apply( // Step 2: Download skill files into the candidate directory (before metadata.yaml // so the skills/ dir exists when writeAgentConfigFromCandidate checks for it). - if n, dlErr := downloadSkillFilesToDir(ctx, optClient, a.flags.candidate, candidateDir, out); dlErr != nil { + if n, dlErr := downloadSkillFilesToDir(ctx, optClient, jobID, a.flags.candidate, candidateDir, out); dlErr != nil { fmt.Fprintf(out, " warning: failed to download skill files: %s\n", dlErr) } else if n > 0 { fmt.Fprintf(out, " Downloaded %d skill file(s)\n", n) @@ -293,7 +300,13 @@ func writeAgentConfigFromCandidate(candidateDir string, rawConfig json.RawMessag meta.Name = s } } - if v, exists := m["agentName"]; exists { + // Candidate API uses snake_case (agent_name); accept the legacy + // camelCase form (agentName) for backward compatibility. + if v, exists := m["agent_name"]; exists { + if s, ok := v.(string); ok { + meta.Name = s + } + } else if v, exists := m["agentName"]; exists { if s, ok := v.(string); ok { meta.Name = s } @@ -425,11 +438,12 @@ func writeToolsFile(candidateDir string, config map[string]any) error { func downloadSkillFilesToDir( ctx context.Context, client *optimize_api.OptimizeClient, + jobID string, candidateID string, destDir string, out io.Writer, ) (int, error) { - manifest, err := client.GetCandidate(ctx, candidateID) + manifest, err := client.GetCandidate(ctx, jobID, candidateID) if err != nil { return 0, fmt.Errorf("fetching candidate manifest: %w", err) } @@ -450,7 +464,7 @@ func downloadSkillFilesToDir( continue } - content, err := client.GetCandidateFile(ctx, candidateID, f.Path) + content, err := client.GetCandidateFile(ctx, jobID, candidateID, f.Path) if err != nil { fmt.Fprintf(out, " warning: failed to download skill file %s: %s\n", f.Path, err) continue @@ -477,11 +491,18 @@ func downloadSkillFilesToDir( } // extractInstructions retrieves the system prompt string from a candidate config -// returned by the optimization service. +// returned by the optimization service. The candidate API uses snake_case +// (system_prompt); the legacy camelCase form (systemPrompt) is accepted for +// backward compatibility. func extractInstructions(m map[string]any) string { if m == nil { return "" } + if v, exists := m["system_prompt"]; exists { + if s, ok := v.(string); ok { + return s + } + } if v, exists := m["systemPrompt"]; exists { if s, ok := v.(string); ok { return s diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply_test.go index e4d24e1b3d9..f84f87e372e 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_apply_test.go @@ -205,6 +205,19 @@ func TestExtractInstructions(t *testing.T) { map[string]any{"systemPrompt": "You are a helpful assistant."}, "You are a helpful assistant.", }, + { + "system_prompt field (snake_case)", + map[string]any{"system_prompt": "Snake-case prompt."}, + "Snake-case prompt.", + }, + { + "system_prompt takes precedence over camelCase", + map[string]any{ + "system_prompt": "From snake_case", + "systemPrompt": "From camelCase", + }, + "From snake_case", + }, { "instructions field", map[string]any{"instructions": "Follow the rules."}, diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go index 2e49b47d34b..47398ab8547 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config.go @@ -27,14 +27,29 @@ type OptimizeConfig struct { opt_eval.Config `yaml:",inline"` // Optimize-specific YAML fields. - ValidationReference *opt_eval.DatasetRef `yaml:"validation_reference,omitempty"` - Options *opt_eval.Options `yaml:"options"` + ValidationDataset *opt_eval.DatasetRef `yaml:"validation_dataset,omitempty"` + // LegacyValidationReference reads the deprecated `validation_reference` + // YAML key. Use ValidationDataset instead; this is consulted only for + // backward compatibility and is merged into ValidationDataset by + // normalizeValidationDataset at load time. + LegacyValidationReference *opt_eval.DatasetRef `yaml:"validation_reference,omitempty"` + Options *opt_eval.Options `yaml:"options"` // Runtime-only: resolved skill directory and tools file (not serialized to YAML). SkillDir string `yaml:"-"` ToolsFile string `yaml:"-"` } +// normalizeValidationDataset merges the deprecated `validation_reference` key +// into ValidationDataset when it is unset, then clears the legacy field so it +// is not re-written. +func (c *OptimizeConfig) normalizeValidationDataset() { + if c.ValidationDataset == nil && c.LegacyValidationReference != nil { + c.ValidationDataset = c.LegacyValidationReference + } + c.LegacyValidationReference = nil +} + // LoadOptimizeConfig reads and parses a YAML optimization config file. func LoadOptimizeConfig(path string) (*OptimizeConfig, error) { data, err := os.ReadFile(path) //nolint:gosec // path is provided by user for local config @@ -46,11 +61,13 @@ func LoadOptimizeConfig(path string) (*OptimizeConfig, error) { if err := yaml.Unmarshal(data, &cfg); err != nil { return nil, fmt.Errorf("failed to parse config file %s: %w", path, err) } + cfg.NormalizeDataset() + cfg.normalizeValidationDataset() return &cfg, nil } -// Validate checks required fields and mutual exclusivity constraints. +// Validate checks required fields and dataset constraints. func (c *OptimizeConfig) Validate() error { if c.Agent.Name == "" { return fmt.Errorf("agent.name is required") @@ -60,16 +77,24 @@ func (c *OptimizeConfig) Validate() error { return fmt.Errorf("options.eval_model is required") } - hasFile := c.DatasetFile != "" - hasRef := c.DatasetReference != nil + if c.Options.OptimizationModel == "" { + return fmt.Errorf( + "options.optimization_model is required: pass --optimize-model , " + + "or add 'optimization_model' under 'options:' in your config") + } - if hasFile && hasRef { - return fmt.Errorf("dataset_file and dataset_reference are mutually exclusive; specify one, not both") + if len(c.Evaluators) == 0 { + return fmt.Errorf( + "at least one evaluator is required: pass --evaluator (repeatable), " + + "add an 'evaluators:' section to your config, or run 'azd ai agent eval generate' to generate one") } - if !hasFile && !hasRef { + hasLocal := c.LocalDatasetPath() != "" + hasRemote := c.RemoteDatasetReference() != nil + + if !hasLocal && !hasRemote { return fmt.Errorf( - "a dataset is required: provide dataset_file or dataset_reference in your config, " + + "a dataset is required: provide a local or registered dataset in your config, " + "or run 'azd ai agent eval generate' to generate one") } @@ -77,15 +102,14 @@ func (c *OptimizeConfig) Validate() error { } // defaultOptimizeConfig returns a minimal config skeleton with sensible defaults. -// Dataset, eval model, and other values are resolved interactively or via flags. +// Dataset, eval model, evaluators, and other values are resolved interactively or via flags. func defaultOptimizeConfig(agentName string) *OptimizeConfig { return &OptimizeConfig{ Config: opt_eval.Config{ - Agent: opt_eval.AgentRef{Name: agentName}, - Evaluators: opt_eval.EvaluatorList{{Name: "builtin.task_adherence"}}, + Agent: opt_eval.AgentRef{Name: agentName}, }, Options: &opt_eval.Options{ - MaxIterations: new(5), + MaxCandidates: new(5), }, } } @@ -99,10 +123,10 @@ func (c *OptimizeConfig) ToRequest() (*optimize_api.OptimizeRequest, []string, e AgentName: c.Agent.Name, AgentVersion: c.Agent.Version, }, - Evaluators: c.Evaluators.Names(), + Evaluators: evaluatorRefs(c.Evaluators), Options: optimize_api.OptimizeOptions{ EvalModel: c.Options.EvalModel, - MaxIterations: c.Options.MaxIterations, + MaxCandidates: c.Options.MaxCandidates, OptimizationModel: c.Options.OptimizationModel, EvaluationLevel: c.Options.EvaluationLevel, }, @@ -113,40 +137,45 @@ func (c *OptimizeConfig) ToRequest() (*optimize_api.OptimizeRequest, []string, e req.Options.OptimizationConfig = c.Options.OptimizationConfig } - // Put baselineModel into optimizationConfig. + // Put the baseline model into optimization_config as "model". if c.Agent.Model != "" { if req.Options.OptimizationConfig == nil { req.Options.OptimizationConfig = make(map[string]json.RawMessage) } raw, _ := json.Marshal(c.Agent.Model) - req.Options.OptimizationConfig["baselineModel"] = raw + req.Options.OptimizationConfig["model"] = raw } var warnings []string - if c.DatasetReference != nil { - req.TrainDatasetReference = &optimize_api.DatasetReference{ - Name: c.DatasetReference.Name, - Version: c.DatasetReference.Version, + if ref := c.RemoteDatasetReference(); ref != nil { + req.TrainDataset = &optimize_api.Dataset{ + Type: optimize_api.DatasetTypeReference, + Name: ref.Name, + Version: ref.Version, } } - if c.ValidationReference != nil { - req.ValidationDatasetReference = &optimize_api.DatasetReference{ - Name: c.ValidationReference.Name, - Version: c.ValidationReference.Version, + if c.ValidationDataset != nil { + req.ValidationDataset = &optimize_api.Dataset{ + Type: optimize_api.DatasetTypeReference, + Name: c.ValidationDataset.Name, + Version: c.ValidationDataset.Version, } } - if c.DatasetFile != "" { - lines, err := loadJSONLRawFile(c.DatasetFile) + if localPath := c.LocalDatasetPath(); localPath != "" { + lines, err := loadJSONLRawFile(localPath) if err != nil { return nil, nil, err } - req.Dataset = lines + req.TrainDataset = &optimize_api.Dataset{ + Type: optimize_api.DatasetTypeInline, + Items: lines, + } } - // Populate optimization_config with systemPrompt, skills, tools. + // Populate optimization_config with system_prompt, skills, tools. ensureOptConfig := func() { if req.Options.OptimizationConfig == nil { req.Options.OptimizationConfig = make(map[string]json.RawMessage) @@ -156,7 +185,7 @@ func (c *OptimizeConfig) ToRequest() (*optimize_api.OptimizeRequest, []string, e if prompt := c.Agent.ResolvedSystemPrompt(); prompt != "" { ensureOptConfig() raw, _ := json.Marshal(prompt) - req.Options.OptimizationConfig["systemPrompt"] = raw + req.Options.OptimizationConfig["system_prompt"] = raw } // Load skills from skill_dir if specified. @@ -185,6 +214,39 @@ func (c *OptimizeConfig) ToRequest() (*optimize_api.OptimizeRequest, []string, e return req, warnings, nil } +// evaluatorRefs converts a YAML evaluator list into API evaluator references, +// preserving each evaluator's name and optional version. +func evaluatorRefs(list opt_eval.EvaluatorList) []optimize_api.EvaluatorRef { + if len(list) == 0 { + return nil + } + refs := make([]optimize_api.EvaluatorRef, 0, len(list)) + for _, e := range list { + refs = append(refs, optimize_api.EvaluatorRef{Name: e.Name, Version: e.Version}) + } + return refs +} + +// mergeEvaluators appends add to base, skipping entries whose name already +// exists in base (case-sensitive). Order is preserved: base first, then any +// new entries from add. Used to layer --evaluator flags on top of config +// evaluators without dropping the config entries. +func mergeEvaluators(base, add opt_eval.EvaluatorList) opt_eval.EvaluatorList { + seen := make(map[string]struct{}, len(base)) + for _, e := range base { + seen[e.Name] = struct{}{} + } + merged := base + for _, e := range add { + if _, ok := seen[e.Name]; ok { + continue + } + seen[e.Name] = struct{}{} + merged = append(merged, e) + } + return merged +} + // loadToolDefinitions reads an OpenAI-format tools JSON file, deserializes // into typed ToolDefinition structs, and warns about non-function tool types. func loadToolDefinitions(path string) ([]optimize_api.ToolDefinition, []string, error) { diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go index cec89fbc7d2..fd5b7e72545 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_config_test.go @@ -45,7 +45,8 @@ evaluators: options: eval_model: gpt-4o-mini budget: 100 - max_iterations: 5 + max_candidates: 5 + optimization_model: gpt-5 ` cfgPath := writeTestFile(t, dir, "optimize.yaml", yamlContent) @@ -58,30 +59,35 @@ options: assert.Equal(t, "my-agent", req.Agent.AgentName) assert.Equal(t, "1", req.Agent.AgentVersion) - assert.Len(t, req.Dataset, 2) - assert.Contains(t, string(req.Dataset[0]), `"What is 2+2?"`) - assert.Contains(t, string(req.Dataset[0]), `"groundTruth"`) - assert.Nil(t, req.TrainDatasetReference) + require.NotNil(t, req.TrainDataset) + assert.Equal(t, optimize_api.DatasetTypeInline, req.TrainDataset.Type) + assert.Len(t, req.TrainDataset.Items, 2) + assert.Contains(t, string(req.TrainDataset.Items[0]), `"What is 2+2?"`) + assert.Contains(t, string(req.TrainDataset.Items[0]), `"groundTruth"`) assert.Equal(t, "gpt-4o-mini", req.Options.EvalModel) - assert.Equal(t, []string{"coherence", "relevance"}, req.Evaluators) + assert.Equal(t, []optimize_api.EvaluatorRef{{Name: "coherence"}, {Name: "relevance"}}, req.Evaluators) } -func TestLoadOptimizeConfig_WithDatasetReference(t *testing.T) { +func TestLoadOptimizeConfig_WithDataset(t *testing.T) { t.Parallel() dir := t.TempDir() + // Uses the deprecated validation_reference key to verify backward compatibility. yamlContent := ` agent: name: ref-agent -dataset_reference: +dataset: name: my-dataset version: "2" validation_reference: name: val-dataset version: "1" +evaluators: + - builtin.task_adherence options: eval_model: gpt-4o-mini + optimization_model: gpt-5 ` cfgPath := writeTestFile(t, dir, "optimize.yaml", yamlContent) @@ -93,12 +99,50 @@ options: require.NoError(t, err) assert.Equal(t, "ref-agent", req.Agent.AgentName) - assert.Empty(t, req.Dataset) - require.NotNil(t, req.TrainDatasetReference) - assert.Equal(t, "my-dataset", req.TrainDatasetReference.Name) - assert.Equal(t, "2", req.TrainDatasetReference.Version) - require.NotNil(t, req.ValidationDatasetReference) - assert.Equal(t, "val-dataset", req.ValidationDatasetReference.Name) + require.NotNil(t, req.TrainDataset) + assert.Equal(t, optimize_api.DatasetTypeReference, req.TrainDataset.Type) + assert.Empty(t, req.TrainDataset.Items) + assert.Equal(t, "my-dataset", req.TrainDataset.Name) + assert.Equal(t, "2", req.TrainDataset.Version) + require.NotNil(t, req.ValidationDataset) + assert.Equal(t, optimize_api.DatasetTypeReference, req.ValidationDataset.Type) + assert.Equal(t, "val-dataset", req.ValidationDataset.Name) +} + +// TestLoadOptimizeConfig_ValidationDataset verifies the new validation_dataset +// key is honored. +func TestLoadOptimizeConfig_ValidationDataset(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + + yamlContent := ` +agent: + name: ref-agent +dataset: + name: my-dataset + version: "2" +validation_dataset: + name: val-dataset + version: "3" +evaluators: + - builtin.task_adherence +options: + eval_model: gpt-4o-mini + optimization_model: gpt-5 +` + cfgPath := writeTestFile(t, dir, "optimize.yaml", yamlContent) + + cfg, err := LoadOptimizeConfig(cfgPath) + require.NoError(t, err) + require.NoError(t, cfg.Validate()) + + req, _, err := cfg.ToRequest() + require.NoError(t, err) + + require.NotNil(t, req.ValidationDataset) + assert.Equal(t, "val-dataset", req.ValidationDataset.Name) + assert.Equal(t, "3", req.ValidationDataset.Version) } func TestValidate_MissingAgentName(t *testing.T) { @@ -106,7 +150,7 @@ func TestValidate_MissingAgentName(t *testing.T) { cfg := &OptimizeConfig{ Config: opt_eval.Config{ - DatasetReference: &opt_eval.DatasetRef{Name: "ds", Version: "1"}, + Dataset: &opt_eval.DatasetRef{Name: "ds", Version: "1"}, }, Options: &opt_eval.Options{EvalModel: "gpt-4o-mini"}, } @@ -121,8 +165,8 @@ func TestValidate_MissingEvalModel(t *testing.T) { cfg := &OptimizeConfig{ Config: opt_eval.Config{ - Agent: opt_eval.AgentRef{Name: "agent"}, - DatasetReference: &opt_eval.DatasetRef{Name: "ds", Version: "1"}, + Agent: opt_eval.AgentRef{Name: "agent"}, + Dataset: &opt_eval.DatasetRef{Name: "ds", Version: "1"}, }, } @@ -131,29 +175,66 @@ func TestValidate_MissingEvalModel(t *testing.T) { assert.Contains(t, err.Error(), "eval_model is required") } -func TestValidate_BothDatasetFileAndReference(t *testing.T) { +func TestValidate_MissingOptimizationModel(t *testing.T) { t.Parallel() cfg := &OptimizeConfig{ Config: opt_eval.Config{ - Agent: opt_eval.AgentRef{Name: "agent"}, - DatasetFile: "tasks.jsonl", - DatasetReference: &opt_eval.DatasetRef{Name: "ds", Version: "1"}, + Agent: opt_eval.AgentRef{Name: "agent"}, + Dataset: &opt_eval.DatasetRef{Name: "ds", Version: "1"}, }, Options: &opt_eval.Options{EvalModel: "gpt-4o-mini"}, } err := cfg.Validate() require.Error(t, err) - assert.Contains(t, err.Error(), "mutually exclusive") + assert.Contains(t, err.Error(), "optimization_model is required") +} + +func TestValidate_MissingEvaluators(t *testing.T) { + t.Parallel() + + cfg := &OptimizeConfig{ + Config: opt_eval.Config{ + Agent: opt_eval.AgentRef{Name: "agent"}, + Dataset: &opt_eval.DatasetRef{Name: "ds", Version: "1"}, + }, + Options: &opt_eval.Options{EvalModel: "gpt-4o-mini", OptimizationModel: "gpt-5"}, + } + + err := cfg.Validate() + require.Error(t, err) + assert.Contains(t, err.Error(), "at least one evaluator is required") +} + +func TestValidate_DatasetFileTakesPrecedence(t *testing.T) { + t.Parallel() + + // dataset_file is the deprecated local form; it remains valid for backward + // compatibility and takes precedence over a registered dataset_reference. + cfg := &OptimizeConfig{ + Config: opt_eval.Config{ + Agent: opt_eval.AgentRef{Name: "agent"}, + DatasetFile: "tasks.jsonl", + Dataset: &opt_eval.DatasetRef{Name: "ds", Version: "1"}, + Evaluators: opt_eval.EvaluatorList{{Name: "builtin.task_adherence"}}, + }, + Options: &opt_eval.Options{EvalModel: "gpt-4o-mini", OptimizationModel: "gpt-5"}, + } + + require.NoError(t, cfg.Validate()) + assert.Equal(t, "tasks.jsonl", cfg.LocalDatasetPath()) } func TestValidate_NeitherDatasetFileNorReference(t *testing.T) { t.Parallel() cfg := &OptimizeConfig{ - Config: opt_eval.Config{Agent: opt_eval.AgentRef{Name: "agent"}}, - Options: &opt_eval.Options{EvalModel: "gpt-4o-mini"}, + Config: opt_eval.Config{ + Agent: opt_eval.AgentRef{Name: "agent"}, + Evaluators: opt_eval.EvaluatorList{{Name: "builtin.task_adherence"}}, + }, + Options: &opt_eval.Options{EvalModel: "gpt-4o-mini", OptimizationModel: "gpt-5"}, } err := cfg.Validate() @@ -213,8 +294,8 @@ options: assert.Equal(t, "gpt-4o", cfg.Options.EvalModel) assert.Len(t, cfg.Evaluators, 1) assert.Equal(t, "builtin.task_adherence", cfg.Evaluators[0].Name) - require.NotNil(t, cfg.DatasetReference) - assert.Equal(t, "eval-dataset", cfg.DatasetReference.Name) + require.NotNil(t, cfg.Dataset) + assert.Equal(t, "eval-dataset", cfg.Dataset.Name) } func TestLoadOptimizeConfig_ScalarEvaluatorsWithOptions(t *testing.T) { @@ -248,6 +329,7 @@ evaluators: options: eval_model: gpt-4o budget: 3 + optimization_model: gpt-5 ` cfgPath := writeTestFile(t, dir, "spec.yaml", yamlContent) @@ -259,7 +341,7 @@ options: // Dataset assert.Equal(t, datasetPath, cfg.DatasetFile) - assert.Nil(t, cfg.DatasetReference) + assert.Nil(t, cfg.Dataset) // Evaluator — scalar string without builtin. prefix resolves as custom. require.Len(t, cfg.Evaluators, 1) @@ -274,8 +356,10 @@ options: req, _, err := cfg.ToRequest() require.NoError(t, err) assert.Equal(t, "my-test-agent", req.Agent.AgentName) - assert.Len(t, req.Dataset, 1) - assert.Equal(t, []string{"builtin.task_adherence"}, req.Evaluators) + require.NotNil(t, req.TrainDataset) + assert.Equal(t, optimize_api.DatasetTypeInline, req.TrainDataset.Type) + assert.Len(t, req.TrainDataset.Items, 1) + assert.Equal(t, []optimize_api.EvaluatorRef{{Name: "builtin.task_adherence"}}, req.Evaluators) } // --------------------------------------------------------------------------- @@ -465,7 +549,7 @@ func TestToRequest_WithToolsFile(t *testing.T) { assert.Equal(t, "calculator", tools[0].Function.Name) } -// ---- ToRequest: BaselineModel in OptimizationConfig ---- +// ---- ToRequest: baseline model in OptimizationConfig ---- func TestToRequest_SetsBaselineModelInOptimizationConfig(t *testing.T) { t.Parallel() @@ -483,8 +567,8 @@ func TestToRequest_SetsBaselineModelInOptimizationConfig(t *testing.T) { require.NoError(t, err) require.NotNil(t, req.Options.OptimizationConfig) - raw, ok := req.Options.OptimizationConfig["baselineModel"] - require.True(t, ok, "baselineModel should be in optimizationConfig") + raw, ok := req.Options.OptimizationConfig["model"] + require.True(t, ok, "baseline model should be in optimization_config under the model key") assert.Equal(t, `"gpt-4o"`, string(raw)) } @@ -504,8 +588,8 @@ func TestToRequest_BaselineModelOmittedWhenEmpty(t *testing.T) { require.NoError(t, err) if req.Options.OptimizationConfig != nil { - _, hasKey := req.Options.OptimizationConfig["baselineModel"] - assert.False(t, hasKey, "baselineModel should not be set when model is empty") + _, hasKey := req.Options.OptimizationConfig["model"] + assert.False(t, hasKey, "model should not be set when baseline model is empty") } } @@ -524,10 +608,75 @@ func TestToRequest_BaselineModelInJSON(t *testing.T) { req, _, err := cfg.ToRequest() require.NoError(t, err) - // Verify the JSON output contains baselineModel inside optimizationConfig. + // Verify the JSON output contains the model key inside optimization_config. data, err := json.Marshal(req) require.NoError(t, err) - assert.Contains(t, string(data), `"baselineModel"`) + assert.Contains(t, string(data), `"model"`) +} + +// --------------------------------------------------------------------------- +// evaluatorRefs — preserves name + version +// --------------------------------------------------------------------------- + +func TestEvaluatorRefs(t *testing.T) { + t.Parallel() + + t.Run("nil list returns nil", func(t *testing.T) { + t.Parallel() + assert.Nil(t, evaluatorRefs(nil)) + }) + + t.Run("empty list returns nil", func(t *testing.T) { + t.Parallel() + assert.Nil(t, evaluatorRefs(opt_eval.EvaluatorList{})) + }) + + t.Run("preserves name and version", func(t *testing.T) { + t.Parallel() + list := opt_eval.EvaluatorList{ + {Name: "builtin.task_adherence"}, + {Name: "custom-quality", Version: "2", LocalURI: "evaluators/custom-quality_2.json"}, + } + got := evaluatorRefs(list) + require.Len(t, got, 2) + assert.Equal(t, optimize_api.EvaluatorRef{Name: "builtin.task_adherence"}, got[0]) + // local_uri is not part of the wire EvaluatorRef — only name + version. + assert.Equal(t, optimize_api.EvaluatorRef{Name: "custom-quality", Version: "2"}, got[1]) + }) +} + +// --------------------------------------------------------------------------- +// mergeEvaluators +// --------------------------------------------------------------------------- + +func TestMergeEvaluators(t *testing.T) { + t.Parallel() + + t.Run("appends new and dedups by name", func(t *testing.T) { + t.Parallel() + base := opt_eval.EvaluatorList{{Name: "a"}, {Name: "b"}} + add := opt_eval.EvaluatorList{{Name: "b"}, {Name: "c"}} + got := mergeEvaluators(base, add) + require.Len(t, got, 3) + assert.Equal(t, "a", got[0].Name) + assert.Equal(t, "b", got[1].Name) + assert.Equal(t, "c", got[2].Name) + }) + + t.Run("empty base returns add", func(t *testing.T) { + t.Parallel() + got := mergeEvaluators(nil, opt_eval.EvaluatorList{{Name: "x"}}) + require.Len(t, got, 1) + assert.Equal(t, "x", got[0].Name) + }) + + t.Run("empty add returns base", func(t *testing.T) { + t.Parallel() + base := opt_eval.EvaluatorList{{Name: "x"}} + got := mergeEvaluators(base, nil) + require.Len(t, got, 1) + assert.Equal(t, "x", got[0].Name) + }) } // --------------------------------------------------------------------------- diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go index 3dbd59cd75c..87ec01fd887 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_deploy.go @@ -116,7 +116,15 @@ func (a *OptimizeDeployAction) runDirect( return err } optClient := optimize_api.NewOptimizeClient(projectEndpoint, credential) - candidateConfig, err := optClient.GetCandidateConfig(ctx, a.flags.candidate) + + // Resolve the optimization job ID — candidate endpoints are nested under it. + jobID := loadOptimizeJobIDForAgent(ctx, optimizeEnvKeyName(resolved.serviceName, agentName), a.envName) + if jobID == "" { + return fmt.Errorf( + "no optimization job found in the environment; run 'azd ai agent optimize' first") + } + + candidateConfig, err := optClient.GetCandidateConfig(ctx, jobID, a.flags.candidate) if err != nil { return fmt.Errorf("failed to fetch candidate config: %w", err) } @@ -178,7 +186,7 @@ func (a *OptimizeDeployAction) runDirect( } // Step 5: Report the deployment to the optimization service (best-effort). - if err := optClient.ReportDeployment(ctx, &optimize_api.DeploymentReport{ + if err := optClient.ReportDeployment(ctx, jobID, &optimize_api.DeploymentReport{ CandidateID: a.flags.candidate, AgentName: agentName, AgentVersion: versionObj.Version, diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go index fb783a8cd1b..58042f63d32 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers.go @@ -107,12 +107,34 @@ func endpointFromNamedEnv(ctx context.Context, envName string) string { return v.Value } -// optimizeLastJobIDKey is the azd environment key for the last optimization job ID. +// optimizeLastJobIDKey is the global azd environment key for the most recent +// optimization job ID. It is used by `optimize status` (which has no agent +// context) and as a fallback when a per-agent job ID is not set. const optimizeLastJobIDKey = "OPTIMIZE_LAST_OPERATION_ID" -// saveLastOptimizeJobID stores the operation ID in the azd environment. -// Best-effort — silently ignores errors (e.g., when running outside azd). -func saveLastOptimizeJobID(ctx context.Context, operationID, envName string) { +// optimizeEnvKeyName returns the identifier used to derive per-agent optimize +// env keys (job ID, candidate ID). It prefers the azd service name so the keys +// align with AGENT_{KEY}_NAME / AGENT_{KEY}_OPTIMIZATION_CANDIDATE_ID, and +// falls back to the agent name for standalone --agent usage without a project. +func optimizeEnvKeyName(serviceName, agentName string) string { + if serviceName != "" { + return serviceName + } + return agentName +} + +// optimizeJobIDKeyForAgent returns the per-agent azd environment key that stores +// the optimization job ID, mirroring AGENT_{KEY}_OPTIMIZATION_CANDIDATE_ID. The +// name should be the azd service name (see optimizeEnvKeyName). +func optimizeJobIDKeyForAgent(name string) string { + return fmt.Sprintf("AGENT_%s_OPTIMIZATION_JOB_ID", toServiceKey(name)) +} + +// saveLastOptimizeJobID stores the operation ID in the azd environment under +// both the per-agent key (AGENT_{KEY}_OPTIMIZATION_JOB_ID) and the global +// last-job key. Best-effort — silently ignores errors (e.g., when running +// outside azd). +func saveLastOptimizeJobID(ctx context.Context, agentName, operationID, envName string) { azdClient, err := azdext.NewAzdClient() if err != nil { return @@ -124,6 +146,16 @@ func saveLastOptimizeJobID(ctx context.Context, operationID, envName string) { return } + // Per-agent key — used by apply/deploy/postdeploy to promote the correct job. + if agentName != "" { + _, _ = azdClient.Environment().SetValue(ctx, &azdext.SetEnvRequest{ + EnvName: env.Name, + Key: optimizeJobIDKeyForAgent(agentName), + Value: operationID, + }) + } + + // Global key — convenience for `optimize status` and a fallback. _, _ = azdClient.Environment().SetValue(ctx, &azdext.SetEnvRequest{ EnvName: env.Name, Key: optimizeLastJobIDKey, @@ -131,7 +163,42 @@ func saveLastOptimizeJobID(ctx context.Context, operationID, envName string) { }) } -// loadLastOptimizeJobID retrieves the last operation ID from the azd environment. +// loadOptimizeJobIDForAgent retrieves the optimization job ID for a specific +// agent, preferring the per-agent key and falling back to the global last-job +// key. Returns empty string if neither is available. +func loadOptimizeJobIDForAgent(ctx context.Context, agentName, envName string) string { + azdClient, err := azdext.NewAzdClient() + if err != nil { + return "" + } + defer azdClient.Close() + + env := getExistingEnvironment(ctx, envName, azdClient) + if env == nil { + return "" + } + + if agentName != "" { + if resp, err := azdClient.Environment().GetValue(ctx, &azdext.GetEnvRequest{ + EnvName: env.Name, + Key: optimizeJobIDKeyForAgent(agentName), + }); err == nil && resp != nil && resp.Value != "" { + return resp.Value + } + } + + resp, err := azdClient.Environment().GetValue(ctx, &azdext.GetEnvRequest{ + EnvName: env.Name, + Key: optimizeLastJobIDKey, + }) + if err != nil || resp == nil { + return "" + } + return resp.Value +} + +// loadLastOptimizeJobID retrieves the global last optimization job ID from the +// azd environment. Used by `optimize status`, which has no agent context. // Returns empty string if not available. func loadLastOptimizeJobID(ctx context.Context, envName string) string { azdClient, err := azdext.NewAzdClient() @@ -291,8 +358,29 @@ func reportSvcOptimizationDeployment( log.Printf("postdeploy: promoting candidate %s for %s (version %s)", candidateResp.Value, svc.Name, versionResp.Value) + // Candidate promotion is nested under the optimization job; resolve the + // per-agent job ID (AGENT_{KEY}_OPTIMIZATION_JOB_ID), falling back to the + // global last-job key. + jobIDKey := optimizeJobIDKeyForAgent(svc.Name) + jobID := "" + if jobResp, jobErr := azdClient.Environment().GetValue(ctx, &azdext.GetEnvRequest{ + EnvName: envName, + Key: jobIDKey, + }); jobErr == nil && jobResp != nil && jobResp.Value != "" { + jobID = jobResp.Value + } else if jobResp, jobErr := azdClient.Environment().GetValue(ctx, &azdext.GetEnvRequest{ + EnvName: envName, + Key: optimizeLastJobIDKey, + }); jobErr == nil && jobResp != nil { + jobID = jobResp.Value + } + if jobID == "" { + log.Printf("postdeploy: no optimization job ID for %s, skipping promotion", svc.Name) + return + } + optClient := newClient(projectEndpoint) - if err := optClient.ReportDeployment(ctx, &optimize_api.DeploymentReport{ + if err := optClient.ReportDeployment(ctx, jobID, &optimize_api.DeploymentReport{ CandidateID: candidateResp.Value, AgentName: svc.Name, AgentVersion: versionResp.Value, diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers_test.go index 1b0d9a5c0ec..a96e26ecfca 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_helpers_test.go @@ -94,6 +94,28 @@ func newTestOptimizeClient(endpoint string) *optimize_api.OptimizeClient { return optimize_api.NewOptimizeClientFromPipeline(endpoint, pl) } +// optimizeJobIDKeyForAgent mirrors the AGENT_{KEY}_OPTIMIZATION_CANDIDATE_ID +// naming and applies the same service-key normalization (dashes -> underscores, +// uppercased). +func TestOptimizeJobIDKeyForAgent(t *testing.T) { + t.Parallel() + tests := []struct { + name string + agent string + want string + }{ + {"simple", "echo", "AGENT_ECHO_OPTIMIZATION_JOB_ID"}, + {"dashes", "my-cool-agent", "AGENT_MY_COOL_AGENT_OPTIMIZATION_JOB_ID"}, + {"already upper", "BOT", "AGENT_BOT_OPTIMIZATION_JOB_ID"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tt.want, optimizeJobIDKeyForAgent(tt.agent)) + }) + } +} + func TestReportOptimizationDeployments_NoAgents(t *testing.T) { t.Parallel() @@ -119,6 +141,7 @@ func TestReportOptimizationDeployments_Success_ClearsCandidate(t *testing.T) { "dev": { "AGENT_MY_AGENT_OPTIMIZATION_CANDIDATE_ID": "cand-123", "AGENT_MY_AGENT_VERSION": "v2", + "OPTIMIZE_LAST_OPERATION_ID": "opt-1", }, }, } @@ -141,7 +164,7 @@ func TestReportOptimizationDeployments_Success_ClearsCandidate(t *testing.T) { newTestOptimizeClient, ) - assert.Contains(t, gotURL, "/optimize/candidates/cand-123:promote") + assert.Contains(t, gotURL, "/agent_optimization_jobs/opt-1/candidates/cand-123:promote") assert.Equal(t, "my-agent", gotBody.AgentName) assert.Equal(t, "v2", gotBody.AgentVersion) // CandidateID is json:"-", so it should not appear in the body. @@ -217,6 +240,7 @@ func TestReportOptimizationDeployments_APIFailure_DoesNotClearCandidate(t *testi "dev": { "AGENT_SVC_OPTIMIZATION_CANDIDATE_ID": "cand-789", "AGENT_SVC_VERSION": "v3", + "OPTIMIZE_LAST_OPERATION_ID": "opt-1", }, }, } @@ -250,6 +274,7 @@ func TestReportOptimizationDeployments_MultipleAgents(t *testing.T) { // gamma has candidate but API will fail for it. "AGENT_GAMMA_OPTIMIZATION_CANDIDATE_ID": "c-g", "AGENT_GAMMA_VERSION": "v3", + "OPTIMIZE_LAST_OPERATION_ID": "opt-1", }, }, } @@ -257,7 +282,7 @@ func TestReportOptimizationDeployments_MultipleAgents(t *testing.T) { promoted := map[string]bool{} srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if r.URL.Path == "/optimize/candidates/c-g:promote" { + if r.URL.Path == "/agent_optimization_jobs/opt-1/candidates/c-g:promote" { w.WriteHeader(http.StatusInternalServerError) return } @@ -278,11 +303,11 @@ func TestReportOptimizationDeployments_MultipleAgents(t *testing.T) { ) // Alpha: promoted and cleared. - assert.True(t, promoted["/optimize/candidates/c-a:promote"]) + assert.True(t, promoted["/agent_optimization_jobs/opt-1/candidates/c-a:promote"]) assert.Equal(t, "", envServer.values["dev"]["AGENT_ALPHA_OPTIMIZATION_CANDIDATE_ID"]) // Beta: skipped (no candidate ID), no API call. - assert.False(t, promoted["/optimize/candidates/:promote"]) // shouldn't appear + assert.False(t, promoted["/agent_optimization_jobs/opt-1/candidates/:promote"]) // shouldn't appear // Gamma: API failed, so candidate key should remain. assert.Equal(t, "c-g", envServer.values["dev"]["AGENT_GAMMA_OPTIMIZATION_CANDIDATE_ID"]) @@ -296,6 +321,7 @@ func TestReportOptimizationDeployments_ServiceNameWithDashes(t *testing.T) { "dev": { "AGENT_MY_COOL_AGENT_OPTIMIZATION_CANDIDATE_ID": "cand-dash", "AGENT_MY_COOL_AGENT_VERSION": "v5", + "OPTIMIZE_LAST_OPERATION_ID": "opt-1", }, }, } @@ -314,7 +340,7 @@ func TestReportOptimizationDeployments_ServiceNameWithDashes(t *testing.T) { newTestOptimizeClient, ) - assert.Contains(t, gotURL, "/optimize/candidates/cand-dash:promote") + assert.Contains(t, gotURL, "/agent_optimization_jobs/opt-1/candidates/cand-dash:promote") assert.Equal(t, "", envServer.values["dev"]["AGENT_MY_COOL_AGENT_OPTIMIZATION_CANDIDATE_ID"]) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_list.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_list.go index 0bd75ab603c..c646f729310 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_list.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_list.go @@ -11,6 +11,7 @@ import ( "io" "strings" + "azureaiagent/internal/pkg/agents/eval_api" "azureaiagent/internal/pkg/agents/optimize_api" "github.com/azure/azure-dev/cli/azd/pkg/azdext" @@ -100,32 +101,26 @@ func runOptimizeList(cmd *cobra.Command, flags *optimizeListFlags) error { func printOptimizeListTable(out io.Writer, jobs []optimize_api.OptimizeJobStatus) { bold := color.New(color.Bold) - _, _ = bold.Fprintf(out, " %-38s %-12s %-14s %7s %s\n", "ID", "Status", "Agent", "Score", "Created") - fmt.Fprintf(out, " %-38s %-12s %-14s %7s %s\n", + _, _ = bold.Fprintf(out, " %-38s %-12s %-14s %s\n", "ID", "Status", "Agent", "Created") + fmt.Fprintf(out, " %-38s %-12s %-14s %s\n", strings.Repeat("─", 38), strings.Repeat("─", 12), - strings.Repeat("─", 14), strings.Repeat("─", 7), strings.Repeat("─", 19)) + strings.Repeat("─", 14), strings.Repeat("─", 19)) for _, job := range jobs { - scoreStr := "—" - if job.Best != nil { - scoreStr = fmt.Sprintf("%.2f", job.Best.AvgScore) - } - agentName := "—" - if job.Agent != nil && job.Agent.AgentName != "" { - agentName = job.Agent.AgentName + if name := job.AgentName(); name != "" { + agentName = name } - created := job.CreatedAt - if created == "" { - created = "—" + created := "—" + if job.CreatedAt != 0 { + created = eval_api.FormatTimestamp(job.CreatedAt) } - fmt.Fprintf(out, " %-38s %-12s %-14s %7s %s\n", - job.OperationID, + fmt.Fprintf(out, " %-38s %-12s %-14s %s\n", + job.ID, formatOptimizeStatus(job.Status), truncateString(agentName, 14), - scoreStr, truncateString(created, 19), ) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_prompts.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_prompts.go index 029dde949a1..a6728f5c0e5 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_prompts.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_prompts.go @@ -341,7 +341,7 @@ func resolveOptimizeDataset( ) error { if noPrompt { return fmt.Errorf( - "a dataset is required: use --dataset , or provide dataset_file / dataset_reference " + + "a dataset is required: use --dataset , or provide a dataset " + "in your config, or run 'azd ai agent eval generate' to generate one") } @@ -354,16 +354,17 @@ func resolveOptimizeDataset( return err } cfg.DatasetFile = file - cfg.DatasetReference = ref + cfg.Dataset = ref return nil } -// hasModelConfig reports whether OptimizationConfig contains a "model" entry. +// hasModelConfig reports whether OptimizationConfig contains a +// "model_search_space" entry. func hasModelConfig(oc opt_eval.OptimizationConfig) bool { if oc == nil { return false } - _, ok := oc["model"] + _, ok := oc["model_search_space"] return ok } @@ -420,7 +421,7 @@ func resolveOptimizeTargetModels( if cfg.Options.OptimizationConfig == nil { cfg.Options.OptimizationConfig = make(opt_eval.OptimizationConfig) } - cfg.Options.OptimizationConfig["model"] = modelJSON + cfg.Options.OptimizationConfig["model_search_space"] = modelJSON } return nil diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_prompts_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_prompts_test.go index 2e58da7c0f4..eec8ee95e50 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_prompts_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_prompts_test.go @@ -25,11 +25,11 @@ func TestHasModelConfig(t *testing.T) { }{ {"nil config", nil, false}, {"empty config", opt_eval.OptimizationConfig{}, false}, - {"has model key", opt_eval.OptimizationConfig{ - "model": json.RawMessage(`["gpt-4o"]`), + {"has model_search_space key", opt_eval.OptimizationConfig{ + "model_search_space": json.RawMessage(`["gpt-4o"]`), }, true}, {"has other keys only", opt_eval.OptimizationConfig{ - "systemPrompt": json.RawMessage(`"hello"`), + "system_prompt": json.RawMessage(`"hello"`), }, false}, } for _, tt := range tests { @@ -43,7 +43,7 @@ func TestHasModelConfig(t *testing.T) { // ---- model JSON serialization ---- // TestModelConfigIsPlainArray verifies that when models are stored in -// OptimizationConfig["model"], the value is a JSON array of strings +// OptimizationConfig["model_search_space"], the value is a JSON array of strings // (e.g. ["gpt-4o","gpt-5"]) — not a wrapped object like {"model":[...]}. func TestModelConfigIsPlainArray(t *testing.T) { t.Parallel() @@ -53,17 +53,17 @@ func TestModelConfigIsPlainArray(t *testing.T) { require.NoError(t, err) oc := make(opt_eval.OptimizationConfig) - oc["model"] = modelJSON + oc["model_search_space"] = modelJSON // Deserialize and verify it's a plain array. var parsed []string - require.NoError(t, json.Unmarshal(oc["model"], &parsed)) + require.NoError(t, json.Unmarshal(oc["model_search_space"], &parsed)) assert.Equal(t, models, parsed) // Verify it does NOT deserialize as an object with a "model" key. var asObject map[string]any - err = json.Unmarshal(oc["model"], &asObject) - assert.Error(t, err, "model value should not be a JSON object") + err = json.Unmarshal(oc["model_search_space"], &asObject) + assert.Error(t, err, "model_search_space value should not be a JSON object") } // ---- isRecommendedOptimizationModel ---- diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_status.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_status.go index 02c64b43cf2..dac4f136ff4 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_status.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_status.go @@ -10,6 +10,7 @@ import ( "fmt" "io" + "azureaiagent/internal/pkg/agents/eval_api" "azureaiagent/internal/pkg/agents/optimize_api" azdext "github.com/azure/azure-dev/cli/azd/pkg/azdext" @@ -97,7 +98,7 @@ func runOptimizeStatus(cmd *cobra.Command, flags *optimizeStatusFlags, operation return err } printOptimizeResults(cmd.Context(), out, finalStatus, hasProject, flags.envName) - } else if len(status.Candidates) > 0 { + } else if len(status.Candidates()) > 0 { printOptimizeResults(cmd.Context(), out, status, hasProject, flags.envName) } @@ -110,21 +111,22 @@ func runOptimizeStatus(cmd *cobra.Command, flags *optimizeStatusFlags, operation // printOptimizeJobSummary prints a brief summary of an optimization job's state. func printOptimizeJobSummary(out io.Writer, status *optimize_api.OptimizeJobStatus) { - fmt.Fprintf(out, " Job ID: %s\n", color.CyanString(status.OperationID)) + fmt.Fprintf(out, " Job ID: %s\n", color.CyanString(status.ID)) fmt.Fprintf(out, " Status: %s\n", formatOptimizeStatus(status.Status)) - if status.Agent != nil && status.Agent.AgentName != "" { - fmt.Fprintf(out, " Agent: %s\n", status.Agent.AgentName) + if agentName := status.AgentName(); agentName != "" { + fmt.Fprintf(out, " Agent: %s\n", agentName) } if status.AllTargetAttributesFailed { fmt.Fprintf(out, " Strategy: %s\n", color.YellowString("failed (baseline only — no candidates generated)")) - } else if status.Progress != nil && status.Progress.CurrentTargetAttribute != "" { - fmt.Fprintf(out, " Strategy: %s\n", status.Progress.CurrentTargetAttribute) } - if status.Best != nil { - fmt.Fprintf(out, " Best: %.2f\n", status.Best.AvgScore) + if status.Progress != nil { + fmt.Fprintf(out, " Candidates Completed: %d\n", status.Progress.CandidatesCompleted) } - if status.CreatedAt != "" { - fmt.Fprintf(out, " Created: %s\n", status.CreatedAt) + if best := status.BestCandidate(); best != nil { + fmt.Fprintf(out, " Best: %.2f\n", best.AvgScore) + } + if status.CreatedAt != 0 { + fmt.Fprintf(out, " Created: %s\n", eval_api.FormatTimestamp(status.CreatedAt)) } if status.Error != nil { fmt.Fprintf(out, " Error: %s\n", color.RedString(status.Error.Message)) diff --git a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_test.go b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_test.go index 32ce411defb..f60afe7c9dd 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/cmd/optimize_test.go @@ -107,8 +107,8 @@ func TestDefaultOptimizeConfig(t *testing.T) { assert.Equal(t, "my-agent", cfg.Agent.Name) require.NotNil(t, cfg.Options) assert.Empty(t, cfg.Options.EvalModel) - require.Len(t, cfg.Evaluators, 1) - assert.Equal(t, "builtin.task_adherence", cfg.Evaluators[0].Name) + // No default evaluator is set; it must come from config or --evaluator. + assert.Empty(t, cfg.Evaluators) } // ---- LoadOptimizeConfig + reconcileConfigAgent (--config path) ---- @@ -191,9 +191,10 @@ func TestApplyOverrides_DatasetFlag_LocalFile(t *testing.T) { cfg := &OptimizeConfig{ Config: opt_eval.Config{ - Agent: opt_eval.AgentRef{Name: "a", Instruction: opt_eval.InstructionRef{Value: "test"}}, + Agent: opt_eval.AgentRef{Name: "a", Instruction: opt_eval.InstructionRef{Value: "test"}}, + Evaluators: opt_eval.EvaluatorList{{Name: "builtin.task_adherence"}}, }, - Options: &opt_eval.Options{EvalModel: "gpt-4o"}, + Options: &opt_eval.Options{EvalModel: "gpt-4o", OptimizationModel: "gpt-5"}, } action := &OptimizeAction{ @@ -204,7 +205,7 @@ func TestApplyOverrides_DatasetFlag_LocalFile(t *testing.T) { err := action.applyOverrides(t.Context(), cfg, dir) require.NoError(t, err) assert.Equal(t, dataFile, cfg.DatasetFile) - assert.Nil(t, cfg.DatasetReference) + assert.Nil(t, cfg.Dataset) } func TestApplyOverrides_DatasetFlag_RegisteredName(t *testing.T) { @@ -212,9 +213,10 @@ func TestApplyOverrides_DatasetFlag_RegisteredName(t *testing.T) { cfg := &OptimizeConfig{ Config: opt_eval.Config{ - Agent: opt_eval.AgentRef{Name: "a", Instruction: opt_eval.InstructionRef{Value: "test"}}, + Agent: opt_eval.AgentRef{Name: "a", Instruction: opt_eval.InstructionRef{Value: "test"}}, + Evaluators: opt_eval.EvaluatorList{{Name: "builtin.task_adherence"}}, }, - Options: &opt_eval.Options{EvalModel: "gpt-4o"}, + Options: &opt_eval.Options{EvalModel: "gpt-4o", OptimizationModel: "gpt-5"}, } action := &OptimizeAction{ @@ -225,8 +227,8 @@ func TestApplyOverrides_DatasetFlag_RegisteredName(t *testing.T) { err := action.applyOverrides(t.Context(), cfg, "") require.NoError(t, err) assert.Empty(t, cfg.DatasetFile) - require.NotNil(t, cfg.DatasetReference) - assert.Equal(t, "my-golden-dataset", cfg.DatasetReference.Name) + require.NotNil(t, cfg.Dataset) + assert.Equal(t, "my-golden-dataset", cfg.Dataset.Name) } func TestApplyOverrides_DatasetFlag_OverridesExisting(t *testing.T) { @@ -237,10 +239,11 @@ func TestApplyOverrides_DatasetFlag_OverridesExisting(t *testing.T) { cfg := &OptimizeConfig{ Config: opt_eval.Config{ - Agent: opt_eval.AgentRef{Name: "a", Instruction: opt_eval.InstructionRef{Value: "test"}}, - DatasetReference: &opt_eval.DatasetRef{Name: "old-ref"}, + Agent: opt_eval.AgentRef{Name: "a", Instruction: opt_eval.InstructionRef{Value: "test"}}, + Dataset: &opt_eval.DatasetRef{Name: "old-ref"}, + Evaluators: opt_eval.EvaluatorList{{Name: "builtin.task_adherence"}}, }, - Options: &opt_eval.Options{EvalModel: "gpt-4o"}, + Options: &opt_eval.Options{EvalModel: "gpt-4o", OptimizationModel: "gpt-5"}, } action := &OptimizeAction{ @@ -251,7 +254,7 @@ func TestApplyOverrides_DatasetFlag_OverridesExisting(t *testing.T) { err := action.applyOverrides(t.Context(), cfg, dir) require.NoError(t, err) assert.Equal(t, dataFile, cfg.DatasetFile, "file should replace ref") - assert.Nil(t, cfg.DatasetReference, "ref should be cleared") + assert.Nil(t, cfg.Dataset, "ref should be cleared") } // ---- eval.yaml auto-use in --no-prompt mode ---- @@ -279,7 +282,7 @@ options: require.NoError(t, err) assert.Equal(t, "travel-agent", cfg.Agent.Name) assert.Equal(t, dataFile, cfg.DatasetFile, "dataset_file from eval.yaml should be loaded") - assert.Nil(t, cfg.DatasetReference) + assert.Nil(t, cfg.Dataset) assert.Equal(t, "gpt-4o", cfg.Options.EvalModel) } @@ -304,9 +307,9 @@ options: require.NoError(t, err) assert.Equal(t, "travel-agent", cfg.Agent.Name) assert.Empty(t, cfg.DatasetFile) - require.NotNil(t, cfg.DatasetReference, "dataset_reference from eval.yaml should be loaded") - assert.Equal(t, "golden-dataset", cfg.DatasetReference.Name) - assert.Equal(t, "2", cfg.DatasetReference.Version) + require.NotNil(t, cfg.Dataset, "dataset_reference from eval.yaml should be loaded") + assert.Equal(t, "golden-dataset", cfg.Dataset.Name) + assert.Equal(t, "2", cfg.Dataset.Version) } func TestApplyOverrides_NoPrompt_EvalYAML_WithDataset_Succeeds(t *testing.T) { @@ -324,7 +327,7 @@ func TestApplyOverrides_NoPrompt_EvalYAML_WithDataset_Succeeds(t *testing.T) { DatasetFile: dataFile, Evaluators: opt_eval.EvaluatorList{{Name: "builtin.task_adherence"}}, }, - Options: &opt_eval.Options{EvalModel: "gpt-4o"}, + Options: &opt_eval.Options{EvalModel: "gpt-4o", OptimizationModel: "gpt-5"}, } action := &OptimizeAction{ @@ -379,11 +382,13 @@ func TestPrintOptimizeResults_TableHasCandidateScorePass(t *testing.T) { t.Parallel() status := &optimize_api.OptimizeJobStatus{ - Candidates: []optimize_api.CandidateResult{ - {Name: "baseline", AvgScore: 0.91, PassRate: 1.0}, - {Name: "candidate_1", AvgScore: 0.95, PassRate: 1.0}, + Result: &optimize_api.OptimizeResult{ + Best: "candidate_1", + Candidates: []optimize_api.CandidateResult{ + {Name: "baseline", AvgScore: 0.91}, + {Name: "candidate_1", AvgScore: 0.95}, + }, }, - Best: &optimize_api.CandidateResult{Name: "candidate_1"}, } var buf strings.Builder @@ -393,30 +398,31 @@ func TestPrintOptimizeResults_TableHasCandidateScorePass(t *testing.T) { // Verify header columns. assert.Contains(t, out, "Candidate") assert.Contains(t, out, "Score") - assert.Contains(t, out, "Pass") // Verify no removed columns. assert.NotContains(t, out, "Strategies") assert.NotContains(t, out, "Tokens") assert.NotContains(t, out, "Optimal") + assert.NotContains(t, out, "Pass") // Verify candidate data. assert.Contains(t, out, "baseline") assert.Contains(t, out, "candidate_1") assert.Contains(t, out, "0.91") assert.Contains(t, out, "0.95") - assert.Contains(t, out, "100%") } func TestPrintOptimizeResults_BestMarkedWithStar(t *testing.T) { t.Parallel() status := &optimize_api.OptimizeJobStatus{ - Candidates: []optimize_api.CandidateResult{ - {Name: "baseline", AvgScore: 0.80, PassRate: 0.7}, - {Name: "candidate_1", AvgScore: 0.95, PassRate: 1.0}, + Result: &optimize_api.OptimizeResult{ + Best: "candidate_1", + Candidates: []optimize_api.CandidateResult{ + {Name: "baseline", AvgScore: 0.80}, + {Name: "candidate_1", AvgScore: 0.95}, + }, }, - Best: &optimize_api.CandidateResult{Name: "candidate_1"}, } var buf strings.Builder @@ -440,10 +446,12 @@ func TestPrintOptimizeResults_ShowsCandidateIDs(t *testing.T) { t.Parallel() status := &optimize_api.OptimizeJobStatus{ - Candidates: []optimize_api.CandidateResult{ - {Name: "candidate_1", AvgScore: 0.95, PassRate: 1.0, CandidateID: "abc-123"}, + Result: &optimize_api.OptimizeResult{ + Best: "abc-123", + Candidates: []optimize_api.CandidateResult{ + {Name: "candidate_1", AvgScore: 0.95, CandidateID: "abc-123"}, + }, }, - Best: &optimize_api.CandidateResult{Name: "candidate_1", CandidateID: "abc-123"}, } var buf strings.Builder @@ -454,3 +462,50 @@ func TestPrintOptimizeResults_ShowsCandidateIDs(t *testing.T) { assert.Contains(t, out, "abc-123") assert.Contains(t, out, "optimize apply") } + +func TestPrintOptimizeResults_ShowsStrategyColumn(t *testing.T) { + t.Parallel() + + status := &optimize_api.OptimizeJobStatus{ + Result: &optimize_api.OptimizeResult{ + Best: "candidate_1", + Candidates: []optimize_api.CandidateResult{ + {Name: "baseline", AvgScore: 0.90}, + { + Name: "candidate_1", + AvgScore: 0.95, + Mutations: map[string]string{ + "skill_policy-reviewer": "updated instructions", + "system_prompt": "new prompt", + }, + }, + }, + }, + } + + var buf strings.Builder + printOptimizeResults(t.Context(), &buf, status, false, "") + out := buf.String() + + // Strategy header and mutation keys (sorted) are shown. + assert.Contains(t, out, "Strategy") + assert.Contains(t, out, "skill_policy-reviewer") +} + +func TestPrintOptimizeResults_NoStrategyColumnWhenNoMutations(t *testing.T) { + t.Parallel() + + status := &optimize_api.OptimizeJobStatus{ + Result: &optimize_api.OptimizeResult{ + Best: "candidate_1", + Candidates: []optimize_api.CandidateResult{ + {Name: "candidate_1", AvgScore: 0.95}, + }, + }, + } + + var buf strings.Builder + printOptimizeResults(t.Context(), &buf, status, false, "") + + assert.NotContains(t, buf.String(), "Strategy") +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go index 365153b6ced..5a5704f4c7d 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config.go @@ -40,6 +40,7 @@ func LoadEvalConfig(path string) (*EvalConfig, error) { if err := yaml.Unmarshal(data, &cfg); err != nil { return nil, fmt.Errorf("failed to parse eval config %q: %w", path, err) } + cfg.NormalizeDataset() return &cfg, nil } @@ -85,15 +86,11 @@ func (c *EvalConfig) Validate() error { "add an 'evaluators:' section to your eval.yaml or use --evaluator") } - hasFile := c.DatasetFile != "" - hasRef := c.DatasetReference != nil + hasLocal := c.LocalDatasetPath() != "" + hasRemote := c.RemoteDatasetReference() != nil - if hasFile && hasRef { - return fmt.Errorf("dataset_file and dataset_reference are mutually exclusive; specify one, not both") - } - - if !hasFile && !hasRef { - return fmt.Errorf("one of dataset_file or dataset_reference is required") + if !hasLocal && !hasRemote { + return fmt.Errorf("a dataset is required: provide a local or registered dataset in your eval config") } return nil diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go index c0acd91bf35..1f877b5739e 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/eval_config_test.go @@ -24,9 +24,9 @@ func TestValidate_RequiresName(t *testing.T) { cfg := &EvalConfig{ Config: opt_eval.Config{ - Agent: opt_eval.AgentRef{Name: "agent-1"}, - DatasetReference: &opt_eval.DatasetRef{Name: "ds", Version: "v1"}, - Evaluators: opt_eval.EvaluatorList{{Name: "coherence"}}, + Agent: opt_eval.AgentRef{Name: "agent-1"}, + Dataset: &opt_eval.DatasetRef{Name: "ds", Version: "v1"}, + Evaluators: opt_eval.EvaluatorList{{Name: "coherence"}}, }, } err := cfg.Validate() @@ -39,10 +39,10 @@ func TestValidate_RequiresAgentName(t *testing.T) { cfg := &EvalConfig{ Config: opt_eval.Config{ - Name: "my-eval", - Agent: opt_eval.AgentRef{}, - DatasetReference: &opt_eval.DatasetRef{Name: "ds", Version: "v1"}, - Evaluators: opt_eval.EvaluatorList{{Name: "coherence"}}, + Name: "my-eval", + Agent: opt_eval.AgentRef{}, + Dataset: &opt_eval.DatasetRef{Name: "ds", Version: "v1"}, + Evaluators: opt_eval.EvaluatorList{{Name: "coherence"}}, }, } err := cfg.Validate() @@ -55,9 +55,9 @@ func TestValidate_RequiresEvaluators(t *testing.T) { cfg := &EvalConfig{ Config: opt_eval.Config{ - Name: "my-eval", - Agent: opt_eval.AgentRef{Name: "agent-1"}, - DatasetReference: &opt_eval.DatasetRef{Name: "ds", Version: "v1"}, + Name: "my-eval", + Agent: opt_eval.AgentRef{Name: "agent-1"}, + Dataset: &opt_eval.DatasetRef{Name: "ds", Version: "v1"}, }, } err := cfg.Validate() @@ -77,24 +77,25 @@ func TestValidate_RequiresDataset(t *testing.T) { } err := cfg.Validate() require.Error(t, err) - assert.Contains(t, err.Error(), "dataset_file or dataset_reference is required") + assert.Contains(t, err.Error(), "a dataset is required") } -func TestValidate_MutuallyExclusiveDataset(t *testing.T) { +func TestValidate_DatasetFileTakesPrecedence(t *testing.T) { t.Parallel() + // dataset_file (deprecated local form) and a registered dataset may coexist; + // the local dataset_file takes precedence and validation passes. cfg := &EvalConfig{ Config: opt_eval.Config{ - Name: "my-eval", - Agent: opt_eval.AgentRef{Name: "agent-1"}, - DatasetFile: "tasks.jsonl", - DatasetReference: &opt_eval.DatasetRef{Name: "ds", Version: "v1"}, - Evaluators: opt_eval.EvaluatorList{{Name: "coherence"}}, + Name: "my-eval", + Agent: opt_eval.AgentRef{Name: "agent-1"}, + DatasetFile: "tasks.jsonl", + Dataset: &opt_eval.DatasetRef{Name: "ds", Version: "v1"}, + Evaluators: opt_eval.EvaluatorList{{Name: "coherence"}}, }, } - err := cfg.Validate() - require.Error(t, err) - assert.Contains(t, err.Error(), "mutually exclusive") + require.NoError(t, cfg.Validate()) + assert.Equal(t, "tasks.jsonl", cfg.LocalDatasetPath()) } func TestValidate_ValidWithDatasetFile(t *testing.T) { @@ -116,10 +117,10 @@ func TestValidate_ValidWithDatasetReference(t *testing.T) { cfg := &EvalConfig{ Config: opt_eval.Config{ - Name: "my-eval", - Agent: opt_eval.AgentRef{Name: "agent-1"}, - DatasetReference: &opt_eval.DatasetRef{Name: "ds", Version: "v1"}, - Evaluators: opt_eval.EvaluatorList{{Name: "coherence"}}, + Name: "my-eval", + Agent: opt_eval.AgentRef{Name: "agent-1"}, + Dataset: &opt_eval.DatasetRef{Name: "ds", Version: "v1"}, + Evaluators: opt_eval.EvaluatorList{{Name: "coherence"}}, }, } assert.NoError(t, cfg.Validate()) @@ -143,8 +144,8 @@ func TestEvalConfig_RoundTrip_FullFields(t *testing.T) { Version: "v3", Model: "gpt-4.1", }, - DatasetReference: &opt_eval.DatasetRef{Name: "golden-data", Version: "v2"}, - Evaluators: opt_eval.EvaluatorList{{Name: "builtin.task_adherence"}, {Name: "custom-quality"}}, + Dataset: &opt_eval.DatasetRef{Name: "golden-data", Version: "v2"}, + Evaluators: opt_eval.EvaluatorList{{Name: "builtin.task_adherence"}, {Name: "custom-quality"}}, }, Options: &opt_eval.Options{ EvalModel: "gpt-4o", @@ -161,9 +162,9 @@ func TestEvalConfig_RoundTrip_FullFields(t *testing.T) { assert.Equal(t, agent_yaml.AgentKind("hosted"), loaded.Agent.Kind) assert.Equal(t, "v3", loaded.Agent.Version) assert.Equal(t, "gpt-4.1", loaded.Agent.Model) - require.NotNil(t, loaded.DatasetReference) - assert.Equal(t, "golden-data", loaded.DatasetReference.Name) - assert.Equal(t, "v2", loaded.DatasetReference.Version) + require.NotNil(t, loaded.Dataset) + assert.Equal(t, "golden-data", loaded.Dataset.Name) + assert.Equal(t, "v2", loaded.Dataset.Version) require.Len(t, loaded.Evaluators, 2) assert.Equal(t, "builtin.task_adherence", loaded.Evaluators[0].Name) assert.Equal(t, "custom-quality", loaded.Evaluators[1].Name) @@ -189,7 +190,7 @@ func TestEvalConfig_RoundTrip_MinimalFields(t *testing.T) { assert.Equal(t, "simple-agent", loaded.Agent.Name) assert.Equal(t, "data.jsonl", loaded.DatasetFile) - assert.Nil(t, loaded.DatasetReference) + assert.Nil(t, loaded.Dataset) assert.Empty(t, loaded.Evaluators) assert.True(t, loaded.Agent.Instruction.IsEmpty()) assert.Zero(t, loaded.MaxSamples) @@ -263,9 +264,9 @@ func TestToAgentTargetAdaptableEvalGroupRequest_WithDatasetReference(t *testing. cfg := &EvalConfig{ Config: opt_eval.Config{ - Name: "ref-eval", - Agent: opt_eval.AgentRef{Name: "agent-1"}, - DatasetReference: &opt_eval.DatasetRef{Name: "ds", Version: "v1"}, + Name: "ref-eval", + Agent: opt_eval.AgentRef{Name: "agent-1"}, + Dataset: &opt_eval.DatasetRef{Name: "ds", Version: "v1"}, }, } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go index 47d9410083d..cb327a1be9f 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation.go @@ -89,11 +89,12 @@ func NewEvaluatorGenerationJobRequest( sources []GenerationSource, ) *EvaluatorGenerationJobRequest { return &EvaluatorGenerationJobRequest{ - Name: name, - EvaluatorName: name, - Category: "quality", - Model: evalModel, - Sources: sources, + Inputs: EvaluatorGenerationInputs{ + Name: name, + EvaluatorName: name, + Model: evalModel, + Sources: sources, + }, } } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation_test.go index f4d95245408..a7f0412c9fd 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/generation_test.go @@ -65,11 +65,11 @@ func TestNewEvaluatorGenerationJobRequest(t *testing.T) { t.Parallel() sources := []GenerationSource{{Type: "agent", AgentName: "a1"}} req := NewEvaluatorGenerationJobRequest("eval-suite", "gpt-4o", sources) - assert.Equal(t, "eval-suite", req.Name) - assert.Equal(t, "eval-suite", req.EvaluatorName) - assert.Equal(t, "quality", req.Category) - assert.Equal(t, "gpt-4o", req.Model) - require.Len(t, req.Sources, 1) + assert.Equal(t, "eval-suite", req.Inputs.Name) + assert.Equal(t, "eval-suite", req.Inputs.EvaluatorName) + assert.Empty(t, req.Inputs.Category) + assert.Equal(t, "gpt-4o", req.Inputs.Model) + require.Len(t, req.Inputs.Sources, 1) } // --------------------------------------------------------------------------- diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go index a917e700140..cf51017746c 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/models.go @@ -127,9 +127,14 @@ func (j *GenerationJob) resultStringField(key string) string { // EvaluatorGenerationJobRequest is the request body for CreateEvaluatorGenerationJob. type EvaluatorGenerationJobRequest struct { + Inputs EvaluatorGenerationInputs `json:"inputs"` +} + +// EvaluatorGenerationInputs holds the inputs for an evaluator generation job. +type EvaluatorGenerationInputs struct { Name string `json:"name"` EvaluatorName string `json:"evaluator_name"` - Category string `json:"category"` + Category string `json:"category,omitempty"` Model string `json:"model"` Sources []GenerationSource `json:"sources"` } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations_test.go index 05b8aca73dd..c6e7eb03fde 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/eval_api/operations_test.go @@ -144,7 +144,7 @@ func TestCreateEvaluatorGenerationJob_Success(t *testing.T) { client, _ := newTestClient(t, handler) result, err := client.CreateEvaluatorGenerationJob( - t.Context(), &EvaluatorGenerationJobRequest{Name: "my-eval"}, "v1", + t.Context(), &EvaluatorGenerationJobRequest{Inputs: EvaluatorGenerationInputs{Name: "my-eval"}}, "v1", ) require.NoError(t, err) diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml.go index 220d6d518b7..77c0fda1ad6 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml.go @@ -35,17 +35,57 @@ func SafePath(baseDir, untrusted string) (string, error) { // Config is the shared YAML configuration for eval and optimize commands. // // Contains fields common to both commands. Optimize-specific fields -// (Criteria, ValidationReference, etc) live in +// (Criteria, ValidationDataset, etc) live in // the OptimizeConfig wrapper in the cmd package. // // Runtime state (operation IDs, eval IDs, status) is stored in // the azd environment rather than in this config file. type Config struct { - Name string `yaml:"name,omitempty"` - Agent AgentRef `yaml:"agent"` - DatasetFile string `yaml:"dataset_file,omitempty"` - DatasetReference *DatasetRef `yaml:"dataset_reference,omitempty"` - Evaluators EvaluatorList `yaml:"evaluators,omitempty"` + Name string `yaml:"name,omitempty"` + Agent AgentRef `yaml:"agent"` + // DatasetFile is the deprecated local dataset path. New configs use a + // local dataset block instead (a `dataset:` with only `local_uri` set, no + // `name`). It is still read for backward compatibility and takes + // precedence when set. + DatasetFile string `yaml:"dataset_file,omitempty"` + Dataset *DatasetRef `yaml:"dataset,omitempty"` + // LegacyDatasetReference reads the deprecated `dataset_reference` YAML key. + // Use Dataset instead; this is consulted only for backward compatibility + // and is merged into Dataset by NormalizeDataset at load time. + LegacyDatasetReference *DatasetRef `yaml:"dataset_reference,omitempty"` + Evaluators EvaluatorList `yaml:"evaluators,omitempty"` +} + +// NormalizeDataset merges the deprecated `dataset_reference` key into Dataset +// when Dataset is unset, then clears the legacy field so it is not re-written. +// Loaders call this after unmarshaling for backward compatibility. +func (c *Config) NormalizeDataset() { + if c.Dataset == nil && c.LegacyDatasetReference != nil { + c.Dataset = c.LegacyDatasetReference + } + c.LegacyDatasetReference = nil +} + +// LocalDatasetPath returns the local dataset path, if any. It prefers the +// deprecated dataset_file (backward compatibility), then falls back to a +// local dataset block (a `dataset:` with `local_uri` set and no `name`). +func (c *Config) LocalDatasetPath() string { + if c.DatasetFile != "" { + return c.DatasetFile + } + if c.Dataset.IsLocal() { + return c.Dataset.LocalURI + } + return "" +} + +// RemoteDatasetReference returns the dataset when it points to a registered +// dataset, or nil for a local-only or unset dataset. +func (c *Config) RemoteDatasetReference() *DatasetRef { + if c.Dataset.IsRemote() { + return c.Dataset + } + return nil } // EvaluatorRef describes an evaluator. It can be a simple string name or a @@ -364,13 +404,34 @@ func (r InstructionRef) MarshalYAML() (any, error) { return r.Value, nil } -// DatasetRef references a named/versioned dataset. +// DatasetRef references a dataset. A remote dataset points to a registered +// dataset by name and optional version; a local dataset points to a local file +// via local_uri. The kind is inferred from the populated fields: a name implies +// a remote dataset, otherwise a populated local_uri implies a local one. type DatasetRef struct { - Name string `yaml:"name"` + Name string `yaml:"name,omitempty"` Version string `yaml:"version,omitempty"` LocalURI string `yaml:"local_uri,omitempty"` } +// IsLocal reports whether the reference points to a local dataset file. +// A populated local_uri without a registered name implies a local dataset. +func (d *DatasetRef) IsLocal() bool { + if d == nil { + return false + } + return d.Name == "" && d.LocalURI != "" +} + +// IsRemote reports whether the reference points to a registered dataset. +// A registered name implies a remote dataset reference. +func (d *DatasetRef) IsRemote() bool { + if d == nil { + return false + } + return d.Name != "" +} + // OptimizationConfig is a per-target-attribute map of configuration overrides. // Each key is a target attribute name (e.g. "model") and the value is the // JSON-encoded configuration for that attribute. @@ -379,8 +440,8 @@ type DatasetRef struct { // automatically converted to json.RawMessage, allowing users to write: // // optimization_config: -// model: ["gpt-4o", "gpt-5"] -// baselineModel: gpt-4o +// model_search_space: ["gpt-4o", "gpt-5"] +// model: gpt-4o type OptimizationConfig map[string]json.RawMessage // UnmarshalYAML decodes each value as a YAML native type and re-encodes it as @@ -416,22 +477,11 @@ func (oc *OptimizationConfig) UnmarshalYAML(value *yaml.Node) error { type Options struct { EvalModel string `yaml:"eval_model,omitempty"` OptimizationConfig OptimizationConfig `yaml:"optimization_config,omitempty"` - MaxIterations *int `yaml:"max_iterations,omitempty"` + MaxCandidates *int `yaml:"max_candidates,omitempty"` OptimizationModel string `yaml:"optimization_model,omitempty"` EvaluationLevel string `yaml:"evaluation_level,omitempty"` } -// UnmarshalYAML decodes Options from a YAML node. -func (o *Options) UnmarshalYAML(value *yaml.Node) error { - // Alias avoids infinite recursion. - type raw Options - if err := value.Decode((*raw)(o)); err != nil { - return err - } - - return nil -} - // Read reads a YAML config file (eval or optimize format). func Read(path string) (*Config, error) { data, err := os.ReadFile(path) //nolint:gosec // path is provided by user for local config diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml_test.go index 6f6ade26da7..7c7525400d4 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/opt_eval/yaml_test.go @@ -110,20 +110,154 @@ func TestConfig_RoundTrip_DatasetReference(t *testing.T) { path := filepath.Join(dir, "config.yaml") original := &Config{ - Agent: AgentRef{Name: "a1"}, - DatasetReference: &DatasetRef{Name: "golden", Version: "v2"}, + Agent: AgentRef{Name: "a1"}, + Dataset: &DatasetRef{Name: "golden", Version: "v2"}, } require.NoError(t, Write(path, original)) loaded, err := Read(path) require.NoError(t, err) - require.NotNil(t, loaded.DatasetReference) - assert.Equal(t, "golden", loaded.DatasetReference.Name) - assert.Equal(t, "v2", loaded.DatasetReference.Version) + require.NotNil(t, loaded.Dataset) + assert.Equal(t, "golden", loaded.Dataset.Name) + assert.Equal(t, "v2", loaded.Dataset.Version) assert.Empty(t, loaded.DatasetFile) } +// --------------------------------------------------------------------------- +// DatasetRef.IsLocal / IsRemote — inferred from populated fields +// --------------------------------------------------------------------------- + +func TestDatasetRef_IsLocal(t *testing.T) { + t.Parallel() + tests := []struct { + name string + ref *DatasetRef + want bool + }{ + {"nil", nil, false}, + {"local_uri only", &DatasetRef{LocalURI: "./data.jsonl"}, true}, + {"name present", &DatasetRef{Name: "ds"}, false}, + {"name and local_uri", &DatasetRef{Name: "ds", LocalURI: "./cache.jsonl"}, false}, + {"empty", &DatasetRef{}, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tt.want, tt.ref.IsLocal()) + }) + } +} + +func TestDatasetRef_IsRemote(t *testing.T) { + t.Parallel() + tests := []struct { + name string + ref *DatasetRef + want bool + }{ + {"nil", nil, false}, + {"name present", &DatasetRef{Name: "ds"}, true}, + {"name and version", &DatasetRef{Name: "ds", Version: "2"}, true}, + {"local_uri only", &DatasetRef{LocalURI: "./data.jsonl"}, false}, + {"empty", &DatasetRef{}, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tt.want, tt.ref.IsRemote()) + }) + } +} + +// --------------------------------------------------------------------------- +// Config dataset helpers — NormalizeDataset / LocalDatasetPath / RemoteDatasetReference +// --------------------------------------------------------------------------- + +func TestConfig_NormalizeDataset(t *testing.T) { + t.Parallel() + + t.Run("merges legacy dataset_reference into Dataset", func(t *testing.T) { + t.Parallel() + c := &Config{LegacyDatasetReference: &DatasetRef{Name: "golden", Version: "2"}} + c.NormalizeDataset() + require.NotNil(t, c.Dataset) + assert.Equal(t, "golden", c.Dataset.Name) + assert.Nil(t, c.LegacyDatasetReference, "legacy field should be cleared") + }) + + t.Run("Dataset takes precedence over legacy", func(t *testing.T) { + t.Parallel() + c := &Config{ + Dataset: &DatasetRef{Name: "new"}, + LegacyDatasetReference: &DatasetRef{Name: "old"}, + } + c.NormalizeDataset() + assert.Equal(t, "new", c.Dataset.Name) + assert.Nil(t, c.LegacyDatasetReference) + }) + + t.Run("no-op when neither set", func(t *testing.T) { + t.Parallel() + c := &Config{} + c.NormalizeDataset() + assert.Nil(t, c.Dataset) + }) +} + +func TestConfig_LocalDatasetPath(t *testing.T) { + t.Parallel() + + t.Run("dataset_file takes precedence", func(t *testing.T) { + t.Parallel() + c := &Config{ + DatasetFile: "tasks.jsonl", + Dataset: &DatasetRef{LocalURI: "./other.jsonl"}, + } + assert.Equal(t, "tasks.jsonl", c.LocalDatasetPath()) + }) + + t.Run("falls back to local dataset", func(t *testing.T) { + t.Parallel() + c := &Config{Dataset: &DatasetRef{LocalURI: "./golden.jsonl"}} + assert.Equal(t, "./golden.jsonl", c.LocalDatasetPath()) + }) + + t.Run("empty for remote dataset", func(t *testing.T) { + t.Parallel() + c := &Config{Dataset: &DatasetRef{Name: "registered"}} + assert.Empty(t, c.LocalDatasetPath()) + }) + + t.Run("empty when nothing set", func(t *testing.T) { + t.Parallel() + assert.Empty(t, (&Config{}).LocalDatasetPath()) + }) +} + +func TestConfig_RemoteDatasetReference(t *testing.T) { + t.Parallel() + + t.Run("returns ref for remote dataset", func(t *testing.T) { + t.Parallel() + c := &Config{Dataset: &DatasetRef{Name: "ds", Version: "3"}} + ref := c.RemoteDatasetReference() + require.NotNil(t, ref) + assert.Equal(t, "ds", ref.Name) + }) + + t.Run("nil for local dataset", func(t *testing.T) { + t.Parallel() + c := &Config{Dataset: &DatasetRef{LocalURI: "./data.jsonl"}} + assert.Nil(t, c.RemoteDatasetReference()) + }) + + t.Run("nil when unset", func(t *testing.T) { + t.Parallel() + assert.Nil(t, (&Config{}).RemoteDatasetReference()) + }) +} + func TestRead_MissingFile(t *testing.T) { t.Parallel() _, err := Read("/nonexistent/config.yaml") @@ -189,18 +323,34 @@ func TestOptions_YAMLFields(t *testing.T) { input := ` eval_model: gpt-4.1 -max_iterations: 10 +max_candidates: 10 optimization_model: gpt-4o ` var opts Options require.NoError(t, yaml.Unmarshal([]byte(input), &opts)) assert.Equal(t, "gpt-4.1", opts.EvalModel) - require.NotNil(t, opts.MaxIterations) - assert.Equal(t, 10, *opts.MaxIterations) + require.NotNil(t, opts.MaxCandidates) + assert.Equal(t, 10, *opts.MaxCandidates) assert.Equal(t, "gpt-4o", opts.OptimizationModel) } +// TestOptions_MaxCandidates verifies the max_candidates key populates +// MaxCandidates. +func TestOptions_MaxCandidates(t *testing.T) { + t.Parallel() + + input := ` +eval_model: gpt-4.1 +max_candidates: 7 +` + var opts Options + require.NoError(t, yaml.Unmarshal([]byte(input), &opts)) + + require.NotNil(t, opts.MaxCandidates) + assert.Equal(t, 7, *opts.MaxCandidates) +} + func TestOptions_OptimizationConfig_NativeYAML(t *testing.T) { t.Parallel() @@ -208,11 +358,11 @@ func TestOptions_OptimizationConfig_NativeYAML(t *testing.T) { eval_model: gpt-4o optimization_model: gpt-5.1 optimization_config: - model: + model_search_space: - gpt-4o - gpt-5 - gpt-5.1 - baselineModel: gpt-4o + model: gpt-4o ` var opts Options require.NoError(t, yaml.Unmarshal([]byte(input), &opts)) @@ -222,11 +372,11 @@ optimization_config: require.NotNil(t, opts.OptimizationConfig) - // model should be a JSON array. - assert.JSONEq(t, `["gpt-4o","gpt-5","gpt-5.1"]`, string(opts.OptimizationConfig["model"])) + // model_search_space should be a JSON array. + assert.JSONEq(t, `["gpt-4o","gpt-5","gpt-5.1"]`, string(opts.OptimizationConfig["model_search_space"])) - // baselineModel should be a JSON string. - assert.JSONEq(t, `"gpt-4o"`, string(opts.OptimizationConfig["baselineModel"])) + // model should be a JSON string. + assert.JSONEq(t, `"gpt-4o"`, string(opts.OptimizationConfig["model"])) } func TestOptions_OptimizationConfig_QuotedJSON(t *testing.T) { @@ -236,17 +386,17 @@ func TestOptions_OptimizationConfig_QuotedJSON(t *testing.T) { // These should be stored as-is, not double-encoded. input := ` optimization_config: - model: '["gpt-4o","gpt-5"]' - baselineModel: '"gpt-4o"' + model_search_space: '["gpt-4o","gpt-5"]' + model: '"gpt-4o"' ` var opts Options require.NoError(t, yaml.Unmarshal([]byte(input), &opts)) require.NotNil(t, opts.OptimizationConfig) - // model should be the JSON array, not a JSON-encoded string. - assert.JSONEq(t, `["gpt-4o","gpt-5"]`, string(opts.OptimizationConfig["model"])) + // model_search_space should be the JSON array, not a JSON-encoded string. + assert.JSONEq(t, `["gpt-4o","gpt-5"]`, string(opts.OptimizationConfig["model_search_space"])) - // baselineModel should be the JSON string, not double-quoted. - assert.JSONEq(t, `"gpt-4o"`, string(opts.OptimizationConfig["baselineModel"])) + // model should be the JSON string, not double-quoted. + assert.JSONEq(t, `"gpt-4o"`, string(opts.OptimizationConfig["model"])) } diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client.go index fd121cb5c80..51878a410b0 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client.go @@ -30,6 +30,22 @@ type OptimizeClient struct { pipeline runtime.Pipeline } +// optimizeJobsPath is the base path segment for optimization job endpoints. +// (defined in models.go) + +// optimizeFeatureHeader is the Foundry-Features value required by the v2 +// optimization API. It is sent on every request via foundryFeaturesPolicy. +const optimizeFeatureHeader = "AgentsOptimization=V2Preview" + +// foundryFeaturesPolicy sets the Foundry-Features header on every request so +// the optimization service enables the required preview feature. +type foundryFeaturesPolicy struct{} + +func (foundryFeaturesPolicy) Do(req *policy.Request) (*http.Response, error) { + req.Raw().Header.Set("Foundry-Features", optimizeFeatureHeader) + return req.Next() +} + // NewOptimizeClient creates a new OptimizeClient with the given endpoint and credential. func NewOptimizeClient(endpoint string, cred azcore.TokenCredential) *OptimizeClient { userAgent := fmt.Sprintf("azd-ext-azure-ai-agents/%s", version.Version) @@ -43,6 +59,7 @@ func NewOptimizeClient(endpoint string, cred azcore.TokenCredential) *OptimizeCl runtime.NewBearerTokenPolicy(cred, []string{"https://ai.azure.com/.default"}, nil), azsdk.NewMsCorrelationPolicy(), azsdk.NewUserAgentPolicy(userAgent), + foundryFeaturesPolicy{}, }, } @@ -73,9 +90,13 @@ func (c *OptimizeClient) StartOptimize( ctx context.Context, optimizeReq *OptimizeRequest, ) (*OptimizeResponse, error) { - url := fmt.Sprintf("%s/optimize?api-version=%s", c.endpoint, APIVersion) + url := fmt.Sprintf("%s/%s?api-version=%s", c.endpoint, optimizeJobsPath, APIVersion) - payload, err := json.Marshal(optimizeReq) + // The service expects the request body wrapped in an "inputs" envelope, + // mirroring the shape echoed back in the job status response. + payload, err := json.Marshal(struct { + Inputs *OptimizeRequest `json:"inputs"` + }{Inputs: optimizeReq}) if err != nil { return nil, fmt.Errorf("failed to marshal request: %w", err) } @@ -117,7 +138,7 @@ func (c *OptimizeClient) GetOptimizeStatus( ctx context.Context, operationID string, ) (*OptimizeJobStatus, error) { - url := fmt.Sprintf("%s/optimize/%s?api-version=%s", c.endpoint, netURL.PathEscape(operationID), APIVersion) + url := fmt.Sprintf("%s/%s/%s?api-version=%s", c.endpoint, optimizeJobsPath, netURL.PathEscape(operationID), APIVersion) req, err := runtime.NewRequest(ctx, http.MethodGet, url) if err != nil { @@ -153,7 +174,7 @@ func (c *OptimizeClient) ListOptimizeJobs( limit int, status string, ) (*OptimizeListResponse, error) { - url := fmt.Sprintf("%s/optimize?api-version=%s&limit=%d", c.endpoint, APIVersion, limit) + url := fmt.Sprintf("%s/%s?api-version=%s&limit=%d", c.endpoint, optimizeJobsPath, APIVersion, limit) if status != "" { url += "&status=" + netURL.QueryEscape(status) } @@ -191,7 +212,7 @@ func (c *OptimizeClient) CancelOptimize( ctx context.Context, operationID string, ) (*OptimizeCancelResponse, error) { - url := fmt.Sprintf("%s/optimize/%s/cancel?api-version=%s", c.endpoint, netURL.PathEscape(operationID), APIVersion) + url := fmt.Sprintf("%s/%s/%s:cancel?api-version=%s", c.endpoint, optimizeJobsPath, netURL.PathEscape(operationID), APIVersion) req, err := runtime.NewRequest(ctx, http.MethodPost, url) if err != nil { @@ -225,11 +246,12 @@ func (c *OptimizeClient) CancelOptimize( // deployed. This allows the optimization service to track which candidates have been deployed. func (c *OptimizeClient) ReportDeployment( ctx context.Context, + jobID string, report *DeploymentReport, ) error { url := fmt.Sprintf( - "%s/optimize/candidates/%s:promote?api-version=%s", - c.endpoint, netURL.PathEscape(report.CandidateID), APIVersion, + "%s/%s/%s/candidates/%s:promote?api-version=%s", + c.endpoint, optimizeJobsPath, netURL.PathEscape(jobID), netURL.PathEscape(report.CandidateID), APIVersion, ) payload, err := json.Marshal(report) @@ -262,12 +284,14 @@ func (c *OptimizeClient) ReportDeployment( } // GetCandidateConfig fetches the candidate configuration from the optimization service. -// GET /optimize/candidates/{id}/config +// GET /agent_optimization_jobs/{jobId}/candidates/{id}/config func (c *OptimizeClient) GetCandidateConfig( ctx context.Context, + jobID string, candidateID string, ) (json.RawMessage, error) { - url := fmt.Sprintf("%s/optimize/candidates/%s/config?api-version=%s", c.endpoint, netURL.PathEscape(candidateID), APIVersion) + url := fmt.Sprintf("%s/%s/%s/candidates/%s/config?api-version=%s", + c.endpoint, optimizeJobsPath, netURL.PathEscape(jobID), netURL.PathEscape(candidateID), APIVersion) req, err := runtime.NewRequest(ctx, http.MethodGet, url) if err != nil { @@ -297,12 +321,14 @@ func (c *OptimizeClient) GetCandidateConfig( } // GetCandidate fetches the candidate manifest (metadata + file list) from the optimization service. -// GET /optimize/candidates/{id} +// GET /agent_optimization_jobs/{jobId}/candidates/{id} func (c *OptimizeClient) GetCandidate( ctx context.Context, + jobID string, candidateID string, ) (*CandidateManifest, error) { - url := fmt.Sprintf("%s/optimize/candidates/%s?api-version=%s", c.endpoint, netURL.PathEscape(candidateID), APIVersion) + url := fmt.Sprintf("%s/%s/%s/candidates/%s?api-version=%s", + c.endpoint, optimizeJobsPath, netURL.PathEscape(jobID), netURL.PathEscape(candidateID), APIVersion) req, err := runtime.NewRequest(ctx, http.MethodGet, url) if err != nil { @@ -332,14 +358,16 @@ func (c *OptimizeClient) GetCandidate( } // GetCandidateFile downloads a single file from a candidate. -// GET /optimize/candidates/{id}/files?path={path} +// GET /agent_optimization_jobs/{jobId}/candidates/{id}/files?path={path} func (c *OptimizeClient) GetCandidateFile( ctx context.Context, + jobID string, candidateID string, filePath string, ) (string, error) { - url := fmt.Sprintf("%s/optimize/candidates/%s/files?api-version=%s&path=%s", - c.endpoint, netURL.PathEscape(candidateID), APIVersion, netURL.QueryEscape(filePath)) + url := fmt.Sprintf("%s/%s/%s/candidates/%s/files?api-version=%s&path=%s", + c.endpoint, optimizeJobsPath, netURL.PathEscape(jobID), netURL.PathEscape(candidateID), + APIVersion, netURL.QueryEscape(filePath)) req, err := runtime.NewRequest(ctx, http.MethodGet, url) if err != nil { diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client_test.go index def48bc18a0..7e36d86d829 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/client_test.go @@ -24,7 +24,9 @@ func newTestClient(serverURL string) *OptimizeClient { pipeline := runtime.NewPipeline( "test", "v0.0.0", - runtime.PipelineOptions{}, + runtime.PipelineOptions{ + PerCall: []policy.Policy{foundryFeaturesPolicy{}}, + }, &policy.ClientOptions{}, ) return &OptimizeClient{ @@ -52,7 +54,7 @@ func TestStartOptimize(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { assert.Equal(t, http.MethodPost, r.Method) - assert.True(t, strings.HasSuffix(r.URL.Path, "/optimize")) + assert.True(t, strings.HasSuffix(r.URL.Path, "/agent_optimization_jobs")) assert.Contains(t, r.URL.RawQuery, "api-version="+APIVersion) w.WriteHeader(http.StatusAccepted) @@ -81,22 +83,22 @@ func TestGetOptimizeStatus(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { assert.Equal(t, http.MethodGet, r.Method) - assert.Contains(t, r.URL.Path, "/optimize/op-123") + assert.Contains(t, r.URL.Path, "/agent_optimization_jobs/op-123") assert.Contains(t, r.URL.RawQuery, "api-version="+APIVersion) + assert.Equal(t, "AgentsOptimization=V2Preview", r.Header.Get("Foundry-Features")) _ = json.NewEncoder(w).Encode(OptimizeJobStatus{ - OperationID: "op-123", - Status: StatusCompleted, - CreatedAt: "2024-01-01T00:00:00Z", - UpdatedAt: "2024-01-01T01:00:00Z", - Best: &CandidateResult{ - Name: "candidate-1", - AvgScore: 0.92, - PassRate: 0.95, - }, - Baseline: &CandidateResult{ - Name: "baseline", - AvgScore: 0.6, + ID: "op-123", + Status: StatusCompleted, + CreatedAt: 1781036157, + UpdatedAt: 1781037526, + Result: &OptimizeResult{ + Best: "cand-1", + Baseline: "cand-0", + Candidates: []CandidateResult{ + {Name: "candidate-1", CandidateID: "cand-1", AvgScore: 0.92}, + {Name: "baseline", CandidateID: "cand-0", AvgScore: 0.6}, + }, }, }) })) @@ -106,12 +108,12 @@ func TestGetOptimizeStatus(t *testing.T) { status, err := client.GetOptimizeStatus(context.Background(), "op-123") require.NoError(t, err) - assert.Equal(t, "op-123", status.OperationID) + assert.Equal(t, "op-123", status.ID) assert.Equal(t, StatusCompleted, status.Status) - require.NotNil(t, status.Best) - assert.InDelta(t, 0.92, status.Best.AvgScore, 0.001) - require.NotNil(t, status.Baseline) - assert.InDelta(t, 0.6, status.Baseline.AvgScore, 0.001) + require.NotNil(t, status.BestCandidate()) + assert.InDelta(t, 0.92, status.BestCandidate().AvgScore, 0.001) + require.NotNil(t, status.BaselineCandidate()) + assert.InDelta(t, 0.6, status.BaselineCandidate().AvgScore, 0.001) } func TestListOptimizeJobs(t *testing.T) { @@ -125,8 +127,8 @@ func TestListOptimizeJobs(t *testing.T) { _ = json.NewEncoder(w).Encode(OptimizeListResponse{ Data: []OptimizeJobStatus{ - {OperationID: "op-1", Status: StatusRunning}, - {OperationID: "op-2", Status: StatusRunning}, + {ID: "op-1", Status: StatusRunning}, + {ID: "op-2", Status: StatusRunning}, }, FirstID: "op-1", LastID: "op-2", @@ -150,7 +152,7 @@ func TestCancelOptimize(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { assert.Equal(t, http.MethodPost, r.Method) - assert.Contains(t, r.URL.Path, "/optimize/op-xyz/cancel") + assert.Contains(t, r.URL.Path, "/agent_optimization_jobs/op-xyz:cancel") assert.Contains(t, r.URL.RawQuery, "api-version="+APIVersion) _ = json.NewEncoder(w).Encode(OptimizeCancelResponse{ @@ -226,7 +228,7 @@ func TestReportDeployment(t *testing.T) { var capturedBody map[string]string server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { assert.Equal(t, http.MethodPost, r.Method) - assert.Contains(t, r.URL.Path, "/optimize/candidates/cand-42:promote") + assert.Contains(t, r.URL.Path, "/agent_optimization_jobs/opt-1/candidates/cand-42:promote") assert.Contains(t, r.URL.RawQuery, "api-version="+APIVersion) err := json.NewDecoder(r.Body).Decode(&capturedBody) @@ -237,17 +239,17 @@ func TestReportDeployment(t *testing.T) { defer server.Close() client := newTestClient(server.URL) - err := client.ReportDeployment(t.Context(), &DeploymentReport{ + err := client.ReportDeployment(t.Context(), "opt-1", &DeploymentReport{ CandidateID: "cand-42", AgentName: "my-agent", AgentVersion: "3", }) require.NoError(t, err) - assert.Equal(t, "my-agent", capturedBody["agentName"]) - assert.Equal(t, "3", capturedBody["agentVersion"]) + assert.Equal(t, "my-agent", capturedBody["agent_name"]) + assert.Equal(t, "3", capturedBody["agent_version"]) // CandidateID should not appear in the body (json:"-") - assert.Empty(t, capturedBody["candidateId"]) + assert.Empty(t, capturedBody["candidate_id"]) } func TestReportDeployment_HTTPError(t *testing.T) { @@ -260,7 +262,7 @@ func TestReportDeployment_HTTPError(t *testing.T) { defer server.Close() client := newTestClient(server.URL) - err := client.ReportDeployment(t.Context(), &DeploymentReport{ + err := client.ReportDeployment(t.Context(), "opt-1", &DeploymentReport{ CandidateID: "bad-id", AgentName: "agent", AgentVersion: "1", @@ -269,3 +271,90 @@ func TestReportDeployment_HTTPError(t *testing.T) { require.Error(t, err) assert.Contains(t, err.Error(), "400") } + +// --------------------------------------------------------------------------- +// Candidate endpoints — nested under agent_optimization_jobs/{jobId} +// --------------------------------------------------------------------------- + +func TestGetCandidateConfig(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, http.MethodGet, r.Method) + assert.Contains(t, r.URL.Path, "/agent_optimization_jobs/opt-1/candidates/cand-9/config") + assert.Contains(t, r.URL.RawQuery, "api-version="+APIVersion) + + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"system_prompt":"hello","model":"gpt-4o"}`)) + })) + defer server.Close() + + client := newTestClient(server.URL) + cfg, err := client.GetCandidateConfig(context.Background(), "opt-1", "cand-9") + + require.NoError(t, err) + assert.JSONEq(t, `{"system_prompt":"hello","model":"gpt-4o"}`, string(cfg)) +} + +func TestGetCandidateConfig_InvalidJSON(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`not-json`)) + })) + defer server.Close() + + client := newTestClient(server.URL) + _, err := client.GetCandidateConfig(context.Background(), "opt-1", "cand-9") + + require.Error(t, err) + assert.Contains(t, err.Error(), "not valid JSON") +} + +func TestGetCandidate(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, http.MethodGet, r.Method) + assert.Contains(t, r.URL.Path, "/agent_optimization_jobs/opt-1/candidates/cand-9") + assert.Contains(t, r.URL.RawQuery, "api-version="+APIVersion) + + _ = json.NewEncoder(w).Encode(CandidateManifest{ + Files: []CandidateFile{ + {Path: "skills/foo/SKILL.md", Type: "skill"}, + {Path: "tools.json", Type: "tools"}, + }, + }) + })) + defer server.Close() + + client := newTestClient(server.URL) + manifest, err := client.GetCandidate(context.Background(), "opt-1", "cand-9") + + require.NoError(t, err) + require.Len(t, manifest.Files, 2) + assert.Equal(t, "skills/foo/SKILL.md", manifest.Files[0].Path) + assert.Equal(t, "skill", manifest.Files[0].Type) +} + +func TestGetCandidateFile(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, http.MethodGet, r.Method) + assert.Contains(t, r.URL.Path, "/agent_optimization_jobs/opt-1/candidates/cand-9/files") + assert.Equal(t, "skills/foo/SKILL.md", r.URL.Query().Get("path")) + assert.Contains(t, r.URL.RawQuery, "api-version="+APIVersion) + + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("# Skill content")) + })) + defer server.Close() + + client := newTestClient(server.URL) + content, err := client.GetCandidateFile(context.Background(), "opt-1", "cand-9", "skills/foo/SKILL.md") + + require.NoError(t, err) + assert.Equal(t, "# Skill content", content) +} diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go index 2a2242588c5..f78db6fdbff 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models.go @@ -6,11 +6,18 @@ // dataset tasks, and skill/tool definitions. package optimize_api -import "encoding/json" +import ( + "encoding/json" + "maps" + "slices" +) // APIVersion is the API version used for all optimization service calls. const APIVersion = "v1" +// optimizeJobsPath is the base path segment for optimization job endpoints. +const optimizeJobsPath = "agent_optimization_jobs" + // Optimization job status constants. // The server may return either the old names (pending/running/completed) or // the new names (queued/in_progress/succeeded). Both sets are accepted. @@ -41,18 +48,39 @@ func IsTerminal(status string) bool { // OptimizeRequest is the top-level payload sent to POST /optimize. type OptimizeRequest struct { - Agent AgentIdentifier `json:"agent"` - Dataset []json.RawMessage `json:"dataset,omitempty"` - TrainDatasetReference *DatasetReference `json:"trainDatasetReference,omitempty"` - ValidationDatasetReference *DatasetReference `json:"validationDatasetReference,omitempty"` - Evaluators []string `json:"evaluators,omitempty"` - Options OptimizeOptions `json:"options"` + Agent AgentIdentifier `json:"agent"` + TrainDataset *Dataset `json:"train_dataset,omitempty"` + ValidationDataset *Dataset `json:"validation_dataset,omitempty"` + Evaluators []EvaluatorRef `json:"evaluators,omitempty"` + Options OptimizeOptions `json:"options"` } // AgentIdentifier references the agent to optimize by name and optional version. type AgentIdentifier struct { - AgentName string `json:"agentName"` - AgentVersion string `json:"agentVersion,omitempty"` + AgentName string `json:"agent_name"` + AgentVersion string `json:"agent_version,omitempty"` +} + +// Dataset type discriminator values for Dataset.Type. +const ( + DatasetTypeReference = "reference" + DatasetTypeInline = "inline" +) + +// Dataset is the optimization dataset payload. It is either a registered +// dataset reference (Type "reference", with Name/Version) or an inline set of +// items (Type "inline", with Items). +type Dataset struct { + Type string `json:"type"` + Name string `json:"name,omitempty"` + Version string `json:"version,omitempty"` + Items []json.RawMessage `json:"items,omitempty"` +} + +// EvaluatorRef references an evaluator by name and optional version. +type EvaluatorRef struct { + Name string `json:"name"` + Version string `json:"version,omitempty"` } // SkillDefinition describes a skill attached to an agent. @@ -78,68 +106,106 @@ type ToolFunction struct { Strict *bool `json:"strict,omitempty"` } -// DatasetTask is a single task in an inline dataset. -type DatasetTask struct { - Name string `json:"name,omitempty"` - Query string `json:"query,omitempty"` - Prompt string `json:"prompt"` - GroundTruth string `json:"groundTruth,omitempty"` - Criteria []Criterion `json:"criteria,omitempty"` -} - -// DatasetReference points to a registered dataset by name and version. -type DatasetReference struct { - Name string `json:"name"` - Version string `json:"version"` -} - -// Criterion is a named evaluation criterion. -type Criterion struct { - Name string `json:"name"` - Instruction string `json:"instruction"` -} - // OptimizeOptions controls the optimization run. type OptimizeOptions struct { - MaxIterations *int `json:"maxIterations,omitempty"` - EvalModel string `json:"evalModel,omitempty"` - OptimizationConfig map[string]json.RawMessage `json:"optimizationConfig,omitempty"` - OptimizationModel string `json:"optimizationModel,omitempty"` - EvaluationLevel string `json:"evaluationLevel,omitempty"` + MaxCandidates *int `json:"max_candidates,omitempty"` + EvalModel string `json:"eval_model,omitempty"` + OptimizationModel string `json:"optimization_model"` + OptimizationConfig map[string]json.RawMessage `json:"optimization_config,omitempty"` + EvaluationLevel string `json:"evaluation_level,omitempty"` } // --- Response models --- // OptimizeResponse is the immediate response from POST /optimize. type OptimizeResponse struct { - OperationID string `json:"operationId"` + OperationID string `json:"id"` Status string `json:"status"` } // OptimizeJobStatus is the full status of an optimization job. type OptimizeJobStatus struct { - OperationID string `json:"operationId"` - Status string `json:"status"` - CreatedAt string `json:"createdAt"` - UpdatedAt string `json:"updatedAt"` - Agent *AgentIdentifier `json:"agent,omitempty"` - Progress *JobProgress `json:"progress,omitempty"` - Error *JobError `json:"error,omitempty"` - Baseline *CandidateResult `json:"baseline,omitempty"` - Best *CandidateResult `json:"best,omitempty"` - Candidates []CandidateResult `json:"candidates,omitempty"` - AllTargetAttributesFailed bool `json:"allTargetAttributesFailed,omitempty"` - Warnings []string `json:"warnings,omitempty"` -} - -// JobProgress reports iteration-level progress. + ID string `json:"id"` + Status string `json:"status"` + Inputs *OptimizeRequest `json:"inputs,omitempty"` + // Agent is the top-level agent identifier returned by the list endpoint, + // where jobs are not wrapped in an "inputs" envelope. + Agent *AgentIdentifier `json:"agent,omitempty"` + Result *OptimizeResult `json:"result,omitempty"` + Progress *JobProgress `json:"progress,omitempty"` + Error *JobError `json:"error,omitempty"` + Warnings []string `json:"warnings,omitempty"` + AllTargetAttributesFailed bool `json:"all_target_attributes_failed,omitempty"` + CreatedAt int64 `json:"created_at,omitempty"` + UpdatedAt int64 `json:"updated_at,omitempty"` +} + +// OptimizeResult holds the optimization outcome. Baseline and Best are +// candidate IDs that reference entries in Candidates. +type OptimizeResult struct { + Baseline string `json:"baseline,omitempty"` + Best string `json:"best,omitempty"` + Candidates []CandidateResult `json:"candidates,omitempty"` +} + +// findCandidate returns the candidate whose CandidateID or Name matches ref, +// or nil when ref is empty or no candidate matches. +func (r *OptimizeResult) findCandidate(ref string) *CandidateResult { + if ref == "" { + return nil + } + for i := range r.Candidates { + if r.Candidates[i].CandidateID == ref || r.Candidates[i].Name == ref { + return &r.Candidates[i] + } + } + return nil +} + +// AgentName returns the agent name from the job inputs, falling back to the +// top-level agent field used by the list endpoint. Returns "" when neither is +// present. +func (s *OptimizeJobStatus) AgentName() string { + if s.Inputs != nil && s.Inputs.Agent.AgentName != "" { + return s.Inputs.Agent.AgentName + } + if s.Agent != nil { + return s.Agent.AgentName + } + return "" +} + +// Candidates returns the result candidates (nil-safe). +func (s *OptimizeJobStatus) Candidates() []CandidateResult { + if s.Result == nil { + return nil + } + return s.Result.Candidates +} + +// BestCandidate resolves the best candidate by matching Result.Best against +// the candidate list. Returns nil when there is no result or no match. +func (s *OptimizeJobStatus) BestCandidate() *CandidateResult { + if s.Result == nil { + return nil + } + return s.Result.findCandidate(s.Result.Best) +} + +// BaselineCandidate resolves the baseline candidate by matching +// Result.Baseline against the candidate list. +func (s *OptimizeJobStatus) BaselineCandidate() *CandidateResult { + if s.Result == nil { + return nil + } + return s.Result.findCandidate(s.Result.Baseline) +} + +// JobProgress reports candidate-level progress for a running optimization job. type JobProgress struct { - CurrentTargetAttribute string `json:"currentTargetAttribute"` - CurrentIteration int `json:"currentIteration"` - TasksCompleted int `json:"tasksCompleted"` - TasksTotal int `json:"tasksTotal"` - BestScore float64 `json:"bestScore"` - ElapsedSeconds float64 `json:"elapsedSeconds"` + CandidatesCompleted int `json:"candidates_completed"` + BestScore float64 `json:"best_score"` + ElapsedSeconds float64 `json:"elapsed_seconds"` } // JobError captures an error from a failed job. @@ -168,15 +234,22 @@ func (e *JobError) UnmarshalJSON(data []byte) error { // CandidateResult holds the evaluation result for a single candidate. type CandidateResult struct { - Name string `json:"name"` - AvgScore float64 `json:"avgScore"` - AvgTokens float64 `json:"avgTokens"` - PassRate float64 `json:"passRate"` - IsParetoOptimal bool `json:"isParetoOptimal,omitempty"` - Rationale string `json:"rationale,omitempty"` - CandidateID string `json:"candidateId,omitempty"` - EvalID string `json:"evalId,omitempty"` - EvalRunID string `json:"evalRunId,omitempty"` + Name string `json:"name"` + Mutations map[string]string `json:"mutations,omitempty"` + AvgScore float64 `json:"avg_score"` + AvgTokens float64 `json:"avg_tokens"` + CandidateID string `json:"candidate_id,omitempty"` + EvalID string `json:"eval_id,omitempty"` + EvalRunID string `json:"eval_run_id,omitempty"` +} + +// MutationKeys returns the candidate's mutation keys (the names of the agent +// attributes that were changed), sorted for stable display. +func (c *CandidateResult) MutationKeys() []string { + if len(c.Mutations) == 0 { + return nil + } + return slices.Sorted(maps.Keys(c.Mutations)) } // --- List response --- @@ -184,16 +257,16 @@ type CandidateResult struct { // OptimizeListResponse is the paginated list of optimization jobs. type OptimizeListResponse struct { Data []OptimizeJobStatus `json:"data"` - FirstID string `json:"firstId"` - LastID string `json:"lastId"` - HasMore bool `json:"hasMore"` + FirstID string `json:"first_id"` + LastID string `json:"last_id"` + HasMore bool `json:"has_more"` } // --- Cancel response --- // OptimizeCancelResponse is returned when cancelling an optimization job. type OptimizeCancelResponse struct { - OperationID string `json:"operationId"` + OperationID string `json:"operation_id"` Status string `json:"status"` } @@ -202,9 +275,9 @@ type OptimizeCancelResponse struct { // DeploymentReport is sent to the optimization service after a candidate is promoted, // creating the candidate→deployment mapping. type DeploymentReport struct { - CandidateID string `json:"-"` // used in URL path, not serialized - AgentName string `json:"agentName"` // deployed agent name - AgentVersion string `json:"agentVersion"` // deployed agent version + CandidateID string `json:"-"` // used in URL path, not serialized + AgentName string `json:"agent_name"` // deployed agent name + AgentVersion string `json:"agent_version"` // deployed agent version } // --- Candidate models --- diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go index 1e0ee5a84a9..1ba21abf18a 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/models_test.go @@ -20,16 +20,18 @@ func TestOptimizeRequest_RoundTrip(t *testing.T) { AgentName: "my-agent", AgentVersion: "1", }, - Dataset: []json.RawMessage{ - json.RawMessage(`{"name":"task1","prompt":"What is 2+2?","groundTruth":"4","criteria":[{"name":"accuracy","instruction":"answer must be correct"}]}`), + TrainDataset: &Dataset{ + Type: DatasetTypeInline, + Items: []json.RawMessage{ + json.RawMessage(`{"query":"What is 2+2?","ground_truth":"4"}`), + }, }, - TrainDatasetReference: &DatasetReference{ - Name: "train-ds", - Version: "1", + Evaluators: []EvaluatorRef{ + {Name: "coherence"}, + {Name: "relevance", Version: "1"}, }, - Evaluators: []string{"coherence", "relevance"}, Options: OptimizeOptions{ - MaxIterations: new(5), + MaxCandidates: new(5), EvalModel: "gpt-4o-mini", OptimizationModel: "gpt-4o", }, @@ -39,13 +41,13 @@ func TestOptimizeRequest_RoundTrip(t *testing.T) { require.NoError(t, err, "marshal should succeed") s := string(data) - // Verify camelCase JSON tags + // Verify snake_case JSON tags for _, field := range []string{ - `"agent"`, `"agentName"`, `"agentVersion"`, - `"dataset"`, `"trainDatasetReference"`, `"evaluators"`, - `"options"`, `"evalModel"`, `"maxIterations"`, - `"optimizationModel"`, - `"groundTruth"`, + `"agent"`, `"agent_name"`, `"agent_version"`, + `"train_dataset"`, `"type"`, `"items"`, `"evaluators"`, + `"options"`, `"eval_model"`, `"max_candidates"`, + `"optimization_model"`, + `"ground_truth"`, } { assert.True(t, strings.Contains(s, field), "JSON should contain %s", field) } @@ -54,11 +56,13 @@ func TestOptimizeRequest_RoundTrip(t *testing.T) { require.NoError(t, json.Unmarshal(data, &got), "unmarshal should succeed") assert.Equal(t, original.Agent.AgentName, got.Agent.AgentName) - assert.Len(t, got.Dataset, 1) - assert.Contains(t, string(got.Dataset[0]), `"task1"`) - assert.Contains(t, string(got.Dataset[0]), `"groundTruth"`) - assert.NotNil(t, got.TrainDatasetReference) - assert.Equal(t, "train-ds", got.TrainDatasetReference.Name) + require.NotNil(t, got.TrainDataset) + assert.Equal(t, DatasetTypeInline, got.TrainDataset.Type) + assert.Len(t, got.TrainDataset.Items, 1) + assert.Contains(t, string(got.TrainDataset.Items[0]), `"ground_truth"`) + require.Len(t, got.Evaluators, 2) + assert.Equal(t, "relevance", got.Evaluators[1].Name) + assert.Equal(t, "1", got.Evaluators[1].Version) assert.Equal(t, "gpt-4o-mini", got.Options.EvalModel) } @@ -66,37 +70,34 @@ func TestOptimizeJobStatus_RoundTrip(t *testing.T) { t.Parallel() original := OptimizeJobStatus{ - OperationID: "op-123", - Status: StatusRunning, - CreatedAt: "2024-01-01T00:00:00Z", - UpdatedAt: "2024-01-01T01:00:00Z", - Agent: &AgentIdentifier{ - AgentName: "agent-1", + ID: "op-123", + Status: StatusSucceeded, + CreatedAt: 1781036157, + UpdatedAt: 1781037526, + Inputs: &OptimizeRequest{ + Agent: AgentIdentifier{AgentName: "agent-1", AgentVersion: "1"}, + Options: OptimizeOptions{ + EvalModel: "gpt-4o", + MaxCandidates: new(5), + }, + TrainDataset: &Dataset{Type: DatasetTypeReference, Name: "ds", Version: "2.0"}, + Evaluators: []EvaluatorRef{{Name: "task_adherence"}}, }, - Progress: &JobProgress{ - CurrentTargetAttribute: "prompt_mutation", - CurrentIteration: 3, - TasksCompleted: 15, - TasksTotal: 20, - BestScore: 0.85, - ElapsedSeconds: 120.5, - }, - Baseline: &CandidateResult{ - Name: "baseline", - AvgScore: 0.6, - PassRate: 0.5, - }, - Best: &CandidateResult{ - Name: "candidate-2", - AvgScore: 0.9, - AvgTokens: 150.0, - PassRate: 0.95, - CandidateID: "cand-2", - Rationale: "Improved prompt clarity", - }, - Candidates: []CandidateResult{ - {Name: "candidate-1", AvgScore: 0.7}, + Result: &OptimizeResult{ + Baseline: "cand-1", + Best: "cand-1", + Candidates: []CandidateResult{ + { + Name: "baseline", + AvgScore: 0.87, + AvgTokens: 0.0, + CandidateID: "cand-1", + EvalID: "eval-1", + EvalRunID: "evalrun-1", + }, + }, }, + Warnings: []string{"baseline only"}, } data, err := json.Marshal(original) @@ -104,12 +105,11 @@ func TestOptimizeJobStatus_RoundTrip(t *testing.T) { s := string(data) for _, field := range []string{ - `"operationId"`, `"status"`, `"createdAt"`, `"updatedAt"`, - `"progress"`, `"currentTargetAttribute"`, `"currentIteration"`, - `"tasksCompleted"`, `"tasksTotal"`, `"bestScore"`, `"elapsedSeconds"`, - `"baseline"`, `"best"`, `"candidates"`, `"candidateId"`, - `"avgScore"`, `"avgTokens"`, `"passRate"`, - `"rationale"`, + `"id"`, `"status"`, `"created_at"`, `"updated_at"`, + `"inputs"`, `"agent"`, `"options"`, `"train_dataset"`, `"evaluators"`, + `"result"`, `"baseline"`, `"best"`, `"candidates"`, `"candidate_id"`, + `"avg_score"`, `"avg_tokens"`, + `"eval_id"`, `"eval_run_id"`, `"warnings"`, } { assert.True(t, strings.Contains(s, field), "JSON should contain %s", field) } @@ -117,26 +117,25 @@ func TestOptimizeJobStatus_RoundTrip(t *testing.T) { var got OptimizeJobStatus require.NoError(t, json.Unmarshal(data, &got), "unmarshal should succeed") - assert.Equal(t, "op-123", got.OperationID) - assert.Equal(t, StatusRunning, got.Status) - assert.NotNil(t, got.Agent) - assert.Equal(t, "agent-1", got.Agent.AgentName) - assert.NotNil(t, got.Progress) - assert.Equal(t, 3, got.Progress.CurrentIteration) - assert.InDelta(t, 0.85, got.Progress.BestScore, 0.001) - assert.NotNil(t, got.Baseline) - assert.InDelta(t, 0.6, got.Baseline.AvgScore, 0.001) - assert.NotNil(t, got.Best) - assert.Equal(t, "cand-2", got.Best.CandidateID) - assert.Len(t, got.Candidates, 1) + assert.Equal(t, "op-123", got.ID) + assert.Equal(t, StatusSucceeded, got.Status) + assert.Equal(t, int64(1781036157), got.CreatedAt) + assert.Equal(t, "agent-1", got.AgentName()) + require.NotNil(t, got.Result) + assert.Len(t, got.Candidates(), 1) + // Baseline and Best are candidate IDs resolved against the candidate list. + require.NotNil(t, got.BestCandidate()) + assert.InDelta(t, 0.87, got.BestCandidate().AvgScore, 0.001) + require.NotNil(t, got.BaselineCandidate()) + assert.Equal(t, "cand-1", got.BaselineCandidate().CandidateID) } func TestOptimizeJobStatus_ErrorField(t *testing.T) { t.Parallel() original := OptimizeJobStatus{ - OperationID: "op-err", - Status: StatusFailed, + ID: "op-err", + Status: StatusFailed, Error: &JobError{ Code: "InternalError", Message: "something went wrong", @@ -171,8 +170,8 @@ func TestOptimizeListResponse_RoundTrip(t *testing.T) { original := OptimizeListResponse{ Data: []OptimizeJobStatus{ - {OperationID: "op-1", Status: StatusCompleted}, - {OperationID: "op-2", Status: StatusRunning}, + {ID: "op-1", Status: StatusCompleted}, + {ID: "op-2", Status: StatusRunning}, }, FirstID: "op-1", LastID: "op-2", @@ -191,6 +190,30 @@ func TestOptimizeListResponse_RoundTrip(t *testing.T) { assert.True(t, got.HasMore) } +// TestAgentName_ListShape verifies the list endpoint's top-level "agent" field +// is parsed (jobs there are not wrapped in an "inputs" envelope). +func TestAgentName_ListShape(t *testing.T) { + t.Parallel() + + body := `{"id":"opt_1","status":"succeeded","agent":{"agent_name":"optimization-demo-v6","agent_version":"56"}}` + var got OptimizeJobStatus + require.NoError(t, json.Unmarshal([]byte(body), &got)) + + assert.Equal(t, "optimization-demo-v6", got.AgentName()) +} + +// TestAgentName_InputsTakesPrecedence verifies inputs.agent wins over the +// top-level agent when both are present. +func TestAgentName_InputsTakesPrecedence(t *testing.T) { + t.Parallel() + + s := OptimizeJobStatus{ + Inputs: &OptimizeRequest{Agent: AgentIdentifier{AgentName: "from-inputs"}}, + Agent: &AgentIdentifier{AgentName: "from-top-level"}, + } + assert.Equal(t, "from-inputs", s.AgentName()) +} + // ---- DeploymentReport serialization ---- func TestDeploymentReport_JSON_ExcludesCandidateID(t *testing.T) { @@ -206,18 +229,18 @@ func TestDeploymentReport_JSON_ExcludesCandidateID(t *testing.T) { require.NoError(t, err) // CandidateID has json:"-", so it must not appear in the body. - assert.NotContains(t, string(data), "candidateId") + assert.NotContains(t, string(data), "candidate_id") assert.NotContains(t, string(data), "cand_abc123") - // agentName and agentVersion must be present. - assert.Contains(t, string(data), `"agentName":"my-agent"`) - assert.Contains(t, string(data), `"agentVersion":"3"`) + // agent_name and agent_version must be present. + assert.Contains(t, string(data), `"agent_name":"my-agent"`) + assert.Contains(t, string(data), `"agent_version":"3"`) } func TestDeploymentReport_JSON_RoundTrip(t *testing.T) { t.Parallel() - body := `{"agentName":"test-agent","agentVersion":"5"}` + body := `{"agent_name":"test-agent","agent_version":"5"}` var report DeploymentReport require.NoError(t, json.Unmarshal([]byte(body), &report)) diff --git a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller_test.go b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller_test.go index fe623db11d7..9e0ad4da5a8 100644 --- a/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller_test.go +++ b/cli/azd/extensions/azure.ai.agents/internal/pkg/agents/optimize_api/poller_test.go @@ -42,10 +42,10 @@ func TestPoller_PollsUntilCompleted(t *testing.T) { status = StatusCompleted } _ = json.NewEncoder(w).Encode(OptimizeJobStatus{ - OperationID: "op-1", - Status: status, + ID: "op-1", + Status: status, Progress: &JobProgress{ - CurrentIteration: int(n), + CandidatesCompleted: int(n), }, }) })) @@ -79,8 +79,8 @@ func TestPoller_PollsUntilFailed(t *testing.T) { status = StatusFailed } _ = json.NewEncoder(w).Encode(OptimizeJobStatus{ - OperationID: "op-fail", - Status: status, + ID: "op-fail", + Status: status, Error: &JobError{ Code: "InternalError", Message: "something broke", @@ -105,8 +105,8 @@ func TestPoller_ContextCancellation(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { _ = json.NewEncoder(w).Encode(OptimizeJobStatus{ - OperationID: "op-cancel", - Status: StatusRunning, + ID: "op-cancel", + Status: StatusRunning, }) })) defer server.Close() @@ -137,8 +137,8 @@ func TestPoller_OnProgressCalled(t *testing.T) { status = StatusCompleted } _ = json.NewEncoder(w).Encode(OptimizeJobStatus{ - OperationID: "op-prog", - Status: status, + ID: "op-prog", + Status: status, }) })) defer server.Close() @@ -173,8 +173,8 @@ func TestPoller_TransientRetryThenSuccess(t *testing.T) { return } _ = json.NewEncoder(w).Encode(OptimizeJobStatus{ - OperationID: "op-retry", - Status: StatusCompleted, + ID: "op-retry", + Status: StatusCompleted, }) })) defer server.Close()