From 9d4b94f653470fda23257558639b70b565577c11 Mon Sep 17 00:00:00 2001 From: Paulo Lacerda Date: Wed, 17 Jun 2026 08:53:37 -0300 Subject: [PATCH] chore: prepare release 0.4.2 Patch release: make 'agentops eval init' resilient to the azure.ai.agents extension 0.1.40 rename of 'azd ai agent eval init' -> 'generate' by trying 'generate' first and falling back to legacy 'init' on older extensions. See CHANGELOG [0.4.2]. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- CHANGELOG.md | 16 +++ src/agentops/services/azd_eval_init.py | 162 +++++++++++++++++----- tests/unit/test_azd_eval_init.py | 177 ++++++++++++++++++++++++- 3 files changed, 318 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 39fc7226..e7180fa3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,22 @@ This format follows [Keep a Changelog](https://keepachangelog.com/) and adheres ## [Unreleased] +## [0.4.2] - 2026-06-17 + +### Fixed +- **`agentops eval init` now works with both old and new `azure.ai.agents` azd + extensions.** Version 0.1.40 of the extension renamed the eval subcommand from + `azd ai agent eval init` to `azd ai agent eval generate`, which made + `agentops eval init` hard-fail with `Command "init" is deprecated, use 'azd ai + agent eval generate' instead`. AgentOps now invokes `generate` first and + transparently falls back to the legacy `init` subcommand when an older + extension does not recognise `generate`. The fallback only triggers on + subcommand-name/deprecation errors; genuine failures (authentication, project + endpoint, timeouts) are still surfaced immediately and unchanged. All + previously passed flags (`--project-endpoint`, `--agent`, + `--gen-instruction-file`, `--eval-model`, `--dataset`, `--evaluator`) and the + recipe discovery/persistence behaviour are preserved. + ## [0.4.1] - 2026-06-15 ### Changed diff --git a/src/agentops/services/azd_eval_init.py b/src/agentops/services/azd_eval_init.py index 2162769b..0c78b755 100644 --- a/src/agentops/services/azd_eval_init.py +++ b/src/agentops/services/azd_eval_init.py @@ -1,4 +1,10 @@ -"""Wrapper helpers for ``azd ai agent eval init``.""" +"""Wrapper helpers for ``azd ai agent eval`` recipe generation. + +The ``azure.ai.agents`` azd extension renamed this subcommand in 0.1.40: +``azd ai agent eval init`` became ``azd ai agent eval generate``. These helpers +prefer the new ``generate`` name and fall back to the legacy ``init`` name so +AgentOps keeps working across extension versions. +""" from __future__ import annotations @@ -62,9 +68,11 @@ def run_azd_eval_init( force: bool = False, timeout_seconds: float = AZD_EVAL_TIMEOUT_SECONDS, ) -> AzdEvalInitResult: - """Run ``azd ai agent eval init`` and persist ``eval_recipe``. + """Run ``azd ai agent eval generate`` and persist ``eval_recipe``. - The azd command remains the source of truth for generating datasets, + Prefers the ``generate`` subcommand (azure.ai.agents >= 0.1.40) and falls + back to the legacy ``init`` subcommand on older extensions. The azd command + remains the source of truth for generating datasets, evaluators, and rubric assets. AgentOps only delegates the command, finds the generated recipe, and records the recipe path in ``agentops.yaml`` so future gates are deterministic. @@ -95,17 +103,18 @@ def run_azd_eval_init( f"{AZD_EXTENSION_NAME}`), then rerun `agentops eval init`." ) - command = ["azd", "--no-prompt", "ai", "agent", "eval", "init"] + base_command = ["azd", "--no-prompt", "ai", "agent", "eval"] + arguments: list[str] = [] project_endpoint = _project_endpoint_from_config_or_env(resolved_config) if project_endpoint: - command.extend(["--project-endpoint", project_endpoint]) + arguments.extend(["--project-endpoint", project_endpoint]) agent_name = _agent_name_from_config(resolved_config) if agent_name: - command.extend(["--agent", agent_name]) + arguments.extend(["--agent", agent_name]) effective_dataset = dataset or _dataset_from_config(resolved_config) instruction_file = _prompt_file_from_config(resolved_config) if effective_dataset is None and instruction_file is not None: - command.extend( + arguments.extend( [ "--gen-instruction-file", _command_path(instruction_file, workspace=root), @@ -113,47 +122,29 @@ def run_azd_eval_init( ) eval_model = _eval_model_from_config(resolved_config) if eval_model: - command.extend(["--eval-model", eval_model]) + arguments.extend(["--eval-model", eval_model]) if effective_dataset is not None: effective_dataset = _azd_dataset_from_agentops_dataset( effective_dataset, workspace=root, ) - command.extend( + arguments.extend( ["--dataset", _command_path(effective_dataset, workspace=root)] ) for evaluator in _azd_evaluators_from_config(resolved_config): - command.extend(["--evaluator", evaluator]) - - try: - completed = subprocess.run( - command, - cwd=str(root), - text=True, - encoding="utf-8", - errors="replace", - capture_output=True, - timeout=timeout_seconds, - check=False, - ) - except FileNotFoundError as exc: - raise AzdBackendError( - "azd was not found on PATH. Install the Azure Developer CLI and the " - f"`{AZD_EXTENSION_NAME}` extension, then rerun `agentops eval init`." - ) from exc - except subprocess.TimeoutExpired as exc: - raise AzdBackendError( - f"{' '.join(command)} timed out after {timeout_seconds:g}s." - ) from exc + arguments.extend(["--evaluator", evaluator]) - if completed.returncode != 0: - detail = completed.stderr.strip() or completed.stdout.strip() or f"exit code {completed.returncode}" - raise AzdBackendError(f"azd ai agent eval init failed: {detail}") + completed = _run_eval_subcommand( + base_command, + arguments, + cwd=root, + timeout_seconds=timeout_seconds, + ) recipe = find_eval_yaml(root) if recipe is None: raise AzdBackendError( - "azd ai agent eval init completed, but AgentOps could not find the " + "azd ai agent eval completed, but AgentOps could not find the " "generated eval.yaml. Move it under the workspace root or src// " "and set `eval_recipe:` in agentops.yaml." ) @@ -175,6 +166,107 @@ def _find_recipe_if_unambiguous(workspace: Path) -> Optional[Path]: return None +# azd renamed this subcommand in the ``azure.ai.agents`` extension 0.1.40: +# ``init`` became ``generate``. Try the new name first and fall back to the +# legacy name so AgentOps works whether the consumer has an old or new +# extension installed. +_EVAL_SUBCOMMANDS: tuple[str, ...] = ("generate", "init") + + +def _eval_subcommand_unsupported(*outputs: str) -> bool: + """Return True when azd reports the eval subcommand name is unknown/deprecated. + + Matches the azd/cobra-style messages emitted when an installed + ``azure.ai.agents`` extension does not recognise a subcommand name (older + extensions lack ``generate``) or reports the legacy ``init`` name as + deprecated (newer extensions). Centralised here so the fallback decision is + unit-testable and robust to minor wording changes. + """ + haystack = " ".join(text.lower() for text in outputs if text) + return any( + phrase in haystack + for phrase in ( + "unknown command", + "unrecognized", + "is not a valid", + "invalid command", + "is deprecated, use", + ) + ) + + +def _azd_failure_detail(completed: "subprocess.CompletedProcess[str]") -> str: + return ( + completed.stderr.strip() + or completed.stdout.strip() + or f"exit code {completed.returncode}" + ) + + +def _run_eval_subcommand( + base_command: list[str], + arguments: list[str], + *, + cwd: Path, + timeout_seconds: float, +) -> "subprocess.CompletedProcess[str]": + """Run ``azd ai agent eval `` resiliently across extensions. + + Prefers ``generate`` (azure.ai.agents >= 0.1.40) and falls back to the + legacy ``init`` subcommand when the installed extension does not recognise + ``generate``. A non-zero result that is not a subcommand-name problem (for + example an authentication or endpoint error) is surfaced immediately rather + than masked by the fallback, preserving the previous error behaviour. + """ + last_completed: Optional["subprocess.CompletedProcess[str]"] = None + for subcommand in _EVAL_SUBCOMMANDS: + command = [*base_command, subcommand, *arguments] + try: + completed = subprocess.run( + command, + cwd=str(cwd), + text=True, + encoding="utf-8", + errors="replace", + capture_output=True, + timeout=timeout_seconds, + check=False, + ) + except FileNotFoundError as exc: + raise AzdBackendError( + "azd was not found on PATH. Install the Azure Developer CLI and " + f"the `{AZD_EXTENSION_NAME}` extension, then rerun `agentops eval " + "init`." + ) from exc + except subprocess.TimeoutExpired as exc: + raise AzdBackendError( + f"{' '.join(command)} timed out after {timeout_seconds:g}s." + ) from exc + + if completed.returncode == 0: + return completed + + if _eval_subcommand_unsupported(completed.stderr, completed.stdout): + # This subcommand name is not supported (or is deprecated) by the + # installed extension. Remember it and try the next candidate. + last_completed = completed + continue + + # A real failure (not a subcommand-name issue): surface it now. + raise AzdBackendError( + f"azd ai agent eval {subcommand} failed: {_azd_failure_detail(completed)}" + ) + + if last_completed is not None: + detail = _azd_failure_detail(last_completed) + else: # pragma: no cover - _EVAL_SUBCOMMANDS is never empty + detail = ( + "no azd eval subcommand (generate/init) was accepted by the " + f"installed `{AZD_EXTENSION_NAME}` extension" + ) + raise AzdBackendError(f"azd ai agent eval failed: {detail}") + + def _dataset_from_config(config_path: Path) -> Optional[Path]: data = load_yaml(config_path) raw_dataset = data.get("dataset") diff --git a/tests/unit/test_azd_eval_init.py b/tests/unit/test_azd_eval_init.py index 29240b6a..d21123be 100644 --- a/tests/unit/test_azd_eval_init.py +++ b/tests/unit/test_azd_eval_init.py @@ -67,7 +67,7 @@ def fake_run(command, **kwargs): "ai", "agent", "eval", - "init", + "generate", "--project-endpoint", "https://contoso.services.ai.azure.com/api/projects/travel", "--agent", @@ -133,7 +133,7 @@ def fake_run(command, **kwargs): "ai", "agent", "eval", - "init", + "generate", "--project-endpoint", "https://contoso.services.ai.azure.com/api/projects/travel", "--agent", @@ -349,3 +349,176 @@ def fake_init(**kwargs): assert result.exit_code == 0 assert " * updated" in result.output assert "✓" not in result.output + + +def _setup_eval_workspace(tmp_path: Path) -> Path: + """Create a minimal workspace and return the config path.""" + config_path = tmp_path / "agentops.yaml" + _write_config(config_path) + dataset = tmp_path / ".agentops" / "data" / "smoke.jsonl" + dataset.parent.mkdir(parents=True) + dataset.write_text('{"input":"hello"}\n', encoding="utf-8") + prompt_file = tmp_path / ".agentops" / "prompts" / "travel.md" + prompt_file.parent.mkdir(parents=True) + prompt_file.write_text("You are a travel planner.", encoding="utf-8") + return config_path + + +def _eval_subcommand(command: list[str]) -> str: + # command is ["azd", "--no-prompt", "ai", "agent", "eval", , ...] + return command[5] + + +def test_run_azd_eval_init_prefers_generate_on_new_extension( + tmp_path: Path, + monkeypatch, +) -> None: + config_path = _setup_eval_workspace(tmp_path) + monkeypatch.setattr(azd_eval_init, "azd_available", lambda *, cwd=None: True) + + subcommands: list[str] = [] + + def fake_run(command, **kwargs): + if command[:3] == ["az", "resource", "list"]: + return subprocess.CompletedProcess(command, 0, stdout="[]", stderr="") + subcommands.append(_eval_subcommand(command)) + recipe = Path(kwargs["cwd"]) / "eval.yaml" + recipe.write_text("name: travel-agent-eval\n", encoding="utf-8") + return subprocess.CompletedProcess(command, 0, stdout="created", stderr="") + + monkeypatch.setattr(subprocess, "run", fake_run) + + result = azd_eval_init.run_azd_eval_init( + workspace=tmp_path, + config_path=config_path, + ) + + assert result.command_ran is True + # generate succeeds on the first try; the legacy init is never invoked. + assert subcommands == ["generate"] + + +def test_run_azd_eval_init_falls_back_to_init_on_old_extension( + tmp_path: Path, + monkeypatch, +) -> None: + config_path = _setup_eval_workspace(tmp_path) + monkeypatch.setattr(azd_eval_init, "azd_available", lambda *, cwd=None: True) + + seen: list[list[str]] = [] + + def fake_run(command, **kwargs): + if command[:3] == ["az", "resource", "list"]: + return subprocess.CompletedProcess(command, 0, stdout="[]", stderr="") + seen.append(command) + subcommand = _eval_subcommand(command) + if subcommand == "generate": + # Older azure.ai.agents extensions do not know `generate`. + return subprocess.CompletedProcess( + command, + 1, + stdout="", + stderr='Error: unknown command "generate" for "azd ai agent eval"', + ) + recipe = Path(kwargs["cwd"]) / "eval.yaml" + recipe.write_text("name: travel-agent-eval\n", encoding="utf-8") + return subprocess.CompletedProcess(command, 0, stdout="created", stderr="") + + monkeypatch.setattr(subprocess, "run", fake_run) + + result = azd_eval_init.run_azd_eval_init( + workspace=tmp_path, + config_path=config_path, + ) + + assert result.command_ran is True + eval_calls = [ + cmd for cmd in seen if cmd[:5] == ["azd", "--no-prompt", "ai", "agent", "eval"] + ] + assert [_eval_subcommand(cmd) for cmd in eval_calls] == ["generate", "init"] + # The preserved flags are identical for both attempts (only the subcommand + # token differs). + assert eval_calls[0][6:] == eval_calls[1][6:] + updated = config_path.read_text(encoding="utf-8") + assert "eval_recipe:" in updated + + +def test_run_azd_eval_init_init_deprecation_does_not_hard_fail( + tmp_path: Path, + monkeypatch, +) -> None: + # On a new extension, init is deprecated but generate works. The wrapper + # must succeed via generate and never surface the deprecation as an error. + config_path = _setup_eval_workspace(tmp_path) + monkeypatch.setattr(azd_eval_init, "azd_available", lambda *, cwd=None: True) + + def fake_run(command, **kwargs): + if command[:3] == ["az", "resource", "list"]: + return subprocess.CompletedProcess(command, 0, stdout="[]", stderr="") + if _eval_subcommand(command) == "init": + raise AssertionError("init must not be invoked when generate works") + recipe = Path(kwargs["cwd"]) / "eval.yaml" + recipe.write_text("name: travel-agent-eval\n", encoding="utf-8") + return subprocess.CompletedProcess(command, 0, stdout="created", stderr="") + + monkeypatch.setattr(subprocess, "run", fake_run) + + result = azd_eval_init.run_azd_eval_init( + workspace=tmp_path, + config_path=config_path, + ) + + assert result.command_ran is True + + +def test_run_azd_eval_init_surfaces_real_errors_without_fallback( + tmp_path: Path, + monkeypatch, +) -> None: + config_path = _setup_eval_workspace(tmp_path) + monkeypatch.setattr(azd_eval_init, "azd_available", lambda *, cwd=None: True) + + subcommands: list[str] = [] + + def fake_run(command, **kwargs): + if command[:3] == ["az", "resource", "list"]: + return subprocess.CompletedProcess(command, 0, stdout="[]", stderr="") + subcommands.append(_eval_subcommand(command)) + return subprocess.CompletedProcess( + command, + 1, + stdout="", + stderr="ERROR: failed to authenticate to the Azure AI project", + ) + + monkeypatch.setattr(subprocess, "run", fake_run) + + try: + azd_eval_init.run_azd_eval_init( + workspace=tmp_path, + config_path=config_path, + ) + except azd_eval_init.AzdBackendError as exc: + assert "failed to authenticate" in str(exc) + assert "azd ai agent eval generate failed" in str(exc) + else: # pragma: no cover - assertion helper + raise AssertionError("expected AzdBackendError") + + # A genuine error must not trigger the init fallback. + assert subcommands == ["generate"] + + +def test_eval_subcommand_unsupported_matches_known_messages() -> None: + assert azd_eval_init._eval_subcommand_unsupported( + 'Error: unknown command "generate" for "azd ai agent eval"' + ) + assert azd_eval_init._eval_subcommand_unsupported( + "Command \"init\" is deprecated, use 'azd ai agent eval generate' instead" + ) + assert azd_eval_init._eval_subcommand_unsupported("Unrecognized flag") + assert azd_eval_init._eval_subcommand_unsupported('"generate" is not a valid command') + # Real runtime errors must not be treated as a subcommand-name problem. + assert not azd_eval_init._eval_subcommand_unsupported( + "ERROR: failed to authenticate to the Azure AI project" + ) + assert not azd_eval_init._eval_subcommand_unsupported("", "")