Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ func TestUpdateDataset_APIVersion(t *testing.T) {
configPath := filepath.Join(dir, "eval.yaml")
cfg := &evalConfig{
Config: opt_eval.Config{
DatasetReference: &evalDatasetRef{
Dataset: &evalDatasetRef{
Name: "test-ds",
Version: "v1",
LocalURI: dataDir,
Expand Down
24 changes: 13 additions & 11 deletions cli/azd/extensions/azure.ai.agents/internal/cmd/eval_generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ func submitEvalJobs(
needEvalGen = flags.regenerateEvaluator
if !needDatasetGen {
evalCfg.DatasetFile = existingCfg.DatasetFile
evalCfg.Config.DatasetReference = existingCfg.Config.DatasetReference
evalCfg.Config.Dataset = existingCfg.Config.Dataset
}
if !needEvalGen {
evalCfg.Evaluators = existingCfg.Evaluators
Expand All @@ -313,7 +313,9 @@ func submitEvalJobs(
if err != nil {
return nil, err
}
evalCfg.DatasetFile = datasetPath
evalCfg.Dataset = &opt_eval.DatasetRef{
LocalURI: datasetPath,
}
}
}

Expand Down Expand Up @@ -363,16 +365,16 @@ func writeAndPrintEvalResult(
fmt.Println(color.GreenString("\nEval suite created"))
}
fmt.Printf(" Config: %s\n", configPath)
if evalCfg.DatasetFile != "" {
fmt.Printf(" Dataset: %s\n", evalCfg.DatasetFile)
} else if evalCfg.DatasetReference != nil && evalCfg.DatasetReference.Name != "" {
ds := evalCfg.DatasetReference.Name
if evalCfg.DatasetReference.Version != "" {
ds += " (" + evalCfg.DatasetReference.Version + ")"
if localPath := evalCfg.LocalDatasetPath(); localPath != "" {
fmt.Printf(" Dataset: %s\n", localPath)
} else if ref := evalCfg.RemoteDatasetReference(); ref != nil {
ds := ref.Name
if ref.Version != "" {
ds += " (" + ref.Version + ")"
}
fmt.Printf(" Dataset: %s\n", ds)
if resolved.hasProject {
fmt.Printf(" %s\n", eval_api.DatasetArtifactPath(resolved.agentProject, evalCfg.DatasetReference))
fmt.Printf(" %s\n", eval_api.DatasetArtifactPath(resolved.agentProject, ref))
}
}
for _, evaluator := range evalCfg.Evaluators {
Expand Down Expand Up @@ -418,9 +420,9 @@ func printEvalPortalLinks(ctx context.Context, resolved *evalResolvedContext, ev
return
}
hasLink := false
if evalCfg.DatasetReference != nil && evalCfg.DatasetReference.Name != "" {
if ref := evalCfg.RemoteDatasetReference(); ref != nil {
fmt.Printf("\n "+color.HiBlackString("Portal:")+"\n Dataset: %s\n",
color.CyanString(prefix.DatasetURL(evalCfg.DatasetReference.Name, evalCfg.DatasetReference.Version)))
color.CyanString(prefix.DatasetURL(ref.Name, ref.Version)))
hasLink = true
}
for _, evaluator := range evalCfg.Evaluators {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ func pollAndFinalizeJobs(
if dsRef == nil {
return
}
evalCfg.DatasetReference = dsRef
evalCfg.Dataset = dsRef

if resolved.hasProject {
localURI, err := eval_api.DownloadDatasetArtifact(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import (

// promptEvalGenerateOptions runs interactive prompts for eval generate options that
// were not provided via flags: name, instruction, trace days, eval model,
// and max samples.
// and max samples. Max samples is skipped when an existing dataset is supplied.
func promptEvalGenerateOptions(ctx context.Context, resolved *evalResolvedContext, flags *evalGenerateFlags, noPrompt bool) error {
azdClient := resolved.azdClient
if noPrompt {
Expand Down Expand Up @@ -158,7 +158,9 @@ func promptEvalGenerateOptions(ctx context.Context, resolved *evalResolvedContex
flags.evalModel = selected
}

if !flags.maxSamplesSet {
// Max samples only applies when generating a dataset. If the user supplied
// an existing dataset (--dataset), there is nothing to generate, so skip it.
if !flags.maxSamplesSet && flags.dataset == "" {
resp, err := azdClient.Prompt().Prompt(ctx, &azdext.PromptRequest{
Options: &azdext.PromptOptions{
Message: "Max samples (between 15 and 1000)",
Expand Down Expand Up @@ -193,8 +195,8 @@ func promptRegenerateChoices(

// Ask about dataset.
datasetLabel := existingCfg.DatasetFile
if datasetLabel == "" && existingCfg.DatasetReference != nil {
datasetLabel = existingCfg.DatasetReference.Name
if datasetLabel == "" && existingCfg.Dataset != nil {
datasetLabel = existingCfg.Dataset.Name
}
if datasetLabel != "" {
resp, err := prompt.Confirm(ctx, &azdext.ConfirmRequest{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -408,8 +408,8 @@ func TestBuildOpenAIEvalRequest(t *testing.T) {
Name: "agent-1",
Version: "v1",
},
DatasetReference: &evalDatasetRef{Name: "ds", Version: "v1"},
Evaluators: opt_eval.EvaluatorList{{Name: "builtin.quality"}},
Dataset: &evalDatasetRef{Name: "ds", Version: "v1"},
Evaluators: opt_eval.EvaluatorList{{Name: "builtin.quality"}},
},
Options: &opt_eval.Options{EvalModel: "gpt-4o"},
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@ func promptDatasetSelection(
value := strings.TrimSpace(resp.Value)
if value == "" {
return "", nil, fmt.Errorf(
"a dataset is required: use --dataset <file-or-name>, or provide dataset_file / dataset_reference " +
"a dataset is required: use --dataset <file-or-name>, or provide a dataset " +
"in your config, or run 'azd ai agent eval generate' to generate one")
}

Expand Down
12 changes: 6 additions & 6 deletions cli/azd/extensions/azure.ai.agents/internal/cmd/eval_run.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,21 +133,21 @@ func runEvalRun(ctx context.Context, flags *evalRunFlags, noPrompt bool) error {
)

// Set source from local dataset file or remote dataset reference.
if evalCfg.DatasetFile != "" {
if localPath := evalCfg.LocalDatasetPath(); localPath != "" {
// Resolve relative paths against the agent project directory so
// eval.yaml files with project-relative dataset_file entries work
// eval.yaml files with project-relative local dataset entries work
// regardless of the caller's working directory.
datasetPath := eval_api.ResolveRelPath(evalCfg.DatasetFile, resolved.agentProject)
datasetPath := eval_api.ResolveRelPath(localPath, resolved.agentProject)
items, err := loadJSONLFile[map[string]any](datasetPath)
if err != nil {
return err
}
dataSource.SetFileContent(items)
} else if evalCfg.DatasetReference != nil {
fileID := buildDatasetFileID(resolved.projectEndpoint, evalCfg.DatasetReference)
} else if ref := evalCfg.RemoteDatasetReference(); ref != nil {
fileID := buildDatasetFileID(resolved.projectEndpoint, ref)
dataSource.SetFileID(fileID)
} else {
return fmt.Errorf("no dataset configured; run 'azd ai agent eval generate' or specify dataset_file / dataset_reference in the eval config")
return fmt.Errorf("no dataset configured; run 'azd ai agent eval generate' or specify a dataset in the eval config")
}

runReq.DataSource = dataSource
Expand Down
10 changes: 5 additions & 5 deletions cli/azd/extensions/azure.ai.agents/internal/cmd/eval_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ func TestWriteEvalReviewArtifacts(t *testing.T) {
dir := t.TempDir()

cfg := &evalConfig{}
cfg.DatasetReference = &evalDatasetRef{Name: "test-data", Version: "v1"}
cfg.Dataset = &evalDatasetRef{Name: "test-data", Version: "v1"}
cfg.Evaluators = opt_eval.EvaluatorList{{Name: "quality"}}

err := eval_api.WriteEvalReviewArtifacts(dir, cfg)
Expand Down Expand Up @@ -420,8 +420,8 @@ func TestEvalConfigRoundTrip(t *testing.T) {
Kind: agent_yaml.AgentKindHosted,
Version: "v1",
},
DatasetReference: &evalDatasetRef{Name: "ds", Version: "v1"},
Evaluators: opt_eval.EvaluatorList{{Name: "builtin.task_adherence"}},
Dataset: &evalDatasetRef{Name: "ds", Version: "v1"},
Evaluators: opt_eval.EvaluatorList{{Name: "builtin.task_adherence"}},
},
Options: &opt_eval.Options{
EvalModel: "gpt-4o",
Expand All @@ -441,8 +441,8 @@ func TestEvalConfigRoundTrip(t *testing.T) {
assert.Equal(t, original.Agent.Version, loaded.Agent.Version)
assert.Equal(t, "gpt-4o", loaded.Options.EvalModel)
assert.Equal(t, original.MaxSamples, loaded.MaxSamples)
require.NotNil(t, loaded.DatasetReference)
assert.Equal(t, "ds", loaded.DatasetReference.Name)
require.NotNil(t, loaded.Dataset)
assert.Equal(t, "ds", loaded.Dataset.Name)
require.Len(t, loaded.Evaluators, 1)
assert.Equal(t, "builtin.task_adherence", loaded.Evaluators[0].Name)
}
Expand Down
10 changes: 5 additions & 5 deletions cli/azd/extensions/azure.ai.agents/internal/cmd/eval_update.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ func runEvalUpdate(ctx context.Context, flags *evalUpdateFlags, noPrompt bool) e
}

// Detect what has local changes.
hasDataset := evalCfg.DatasetReference != nil &&
evalCfg.DatasetReference.Name != "" &&
evalCfg.DatasetReference.LocalURI != ""
hasDataset := evalCfg.Dataset != nil &&
evalCfg.Dataset.Name != "" &&
evalCfg.Dataset.LocalURI != ""
hasEvaluators := len(evalCfg.Evaluators.FindByLocalURI()) > 0

// Determine what to update based on flags and interactive prompts.
Expand All @@ -85,7 +85,7 @@ func runEvalUpdate(ctx context.Context, flags *evalUpdateFlags, noPrompt bool) e
if hasDataset {
updateDS = confirmUpdate(ctx, resolved, fmt.Sprintf(
"Dataset %s has local changes. Upload new version?",
evalCfg.DatasetReference.Name,
evalCfg.Dataset.Name,
))
}
if hasEvaluators {
Expand Down Expand Up @@ -145,7 +145,7 @@ func updateDataset(
evalCfg *evalConfig,
configPath string,
) (int, error) {
ref := evalCfg.DatasetReference
ref := evalCfg.Dataset
if ref == nil || ref.Name == "" || ref.LocalURI == "" {
return 0, nil
}
Expand Down
Loading
Loading