Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .claude-plugin/marketplace.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"name": "agentops-accelerator",
"source": "../../plugins/agentops",
"description": "Copilot agent skills for running standardized evaluation workflows with AgentOps Toolkit and Microsoft Foundry agents.",
"version": "0.3.17",
"version": "0.3.18",
"keywords": [
"agentops",
"evaluation",
Expand Down
2 changes: 1 addition & 1 deletion .github/plugin/marketplace.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"name": "agentops-accelerator",
"source": "../../plugins/agentops",
"description": "Copilot agent skills for running standardized evaluation workflows with AgentOps Toolkit and Microsoft Foundry agents.",
"version": "0.3.17",
"version": "0.3.18",
"keywords": [
"agentops",
"evaluation",
Expand Down
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,19 @@ This format follows [Keep a Changelog](https://keepachangelog.com/) and adheres

## [Unreleased]

## [0.3.18] - 2026-06-10

### Fixed
- **`agentops eval run` surfaces real azd failures instead of swallowing them
behind a spinner line.** When `execution: azd` fails, the error now includes
the full command, exit code, and both stderr and stdout (truncated
defensively). Raw streams are also persisted to
`.agentops/results/<ts>/azd_eval_run_stdout.log` and `_stderr.log` (and the
matching `azd_eval_show_*` files when `azd ai agent eval show` fails) so the
underlying cause is always recoverable from disk. Previously the CLI printed
only the active azd spinner step (e.g. "Resolving eval context..."), leaving
users with no actionable diagnostic.

## [0.3.17] - 2026-06-09

## [0.3.16] - 2026-06-09
Expand Down
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<h1 align="center">AgentOps Accelerator</h1>

<p align="center">
<b>Open-source framework and CLI for continuous evaluation, safety testing, and release readiness of Microsoft Foundry agents.</b>
<b>The open-source AgentOps jumpstart for continuous evaluation, safety testing, observability, and release readiness of Microsoft Foundry agents.</b>
<br/>
Can we ship it, and how do we know?
</p>
Expand All @@ -21,10 +21,10 @@ Can we ship it, and how do we know?

## Overview

AgentOps Accelerator is an open-source framework and CLI that standardizes
continuous evaluation, safety testing, and release readiness for enterprise
AI agents on Microsoft Foundry. It connects Foundry Evaluations, ASSERT,
the PyRIT-backed AI Red Teaming agent, Azure Monitor, and your CI/CD
AgentOps Accelerator is an open-source AgentOps jumpstart that standardizes
continuous evaluation, safety testing, observability, and release readiness
for enterprise AI agents on Microsoft Foundry. It connects Foundry Evaluations,
ASSERT, the PyRIT-backed AI Red Teaming agent, Azure Monitor, and your CI/CD
platform into one repeatable release loop, packaging every result into a
stable evidence pack that proves the release is ready for production.

Expand Down
299 changes: 135 additions & 164 deletions docs/tutorial-prompt-agent-quickstart.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion plugins/agentops/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "agentops-accelerator",
"displayName": "AgentOps Accelerator — Skills for GitHub Copilot",
"description": "Copilot agent skills for running standardized evaluation workflows with AgentOps Accelerator and Microsoft Foundry agents.",
"version": "0.3.17",
"version": "0.3.18",
"publisher": "AgentOpsAccelerator",
"icon": "icon.png",
"license": "MIT",
Expand Down
2 changes: 1 addition & 1 deletion plugins/agentops/plugin.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "agentops-accelerator",
"description": "Copilot agent skills for running standardized evaluation workflows with AgentOps Accelerator and Microsoft Foundry agents.",
"version": "0.3.17",
"version": "0.3.18",
"author": {
"name": "AgentOps Accelerator",
"url": "https://github.com/Azure/agentops"
Expand Down
125 changes: 105 additions & 20 deletions src/agentops/pipeline/azd_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def run_azd_eval(
workspace: Path,
progress: Optional[Callable[[str], None]] = None,
timeout_seconds: float = AZD_EVAL_TIMEOUT_SECONDS,
debug_dir: Optional[Path] = None,
) -> AzdEvalRun:
"""Run ``azd ai agent eval`` and return its normalized native payload."""

Expand Down Expand Up @@ -129,7 +130,17 @@ def run_azd_eval(
)
duration = time.perf_counter() - started
if completed.returncode != 0:
raise AzdBackendError(_format_command_failure("azd ai agent eval run", completed))
debug_paths = _persist_failure_logs(
debug_dir, step="azd_eval_run", completed=completed, command=command
)
raise AzdBackendError(
_format_command_failure(
"azd ai agent eval run",
completed,
command=command,
debug_paths=debug_paths,
)
)

run_payload = _parse_json_object(completed.stdout)
run_id = _extract_run_id(run_payload) or _extract_labeled_id(completed.stdout, "Run")
Expand All @@ -140,27 +151,38 @@ def run_azd_eval(
if run_id and eval_id:
with tempfile.TemporaryDirectory() as temp_dir:
details_path = Path(temp_dir) / "azd-eval-run.json"
show_command = [
"azd",
"--no-prompt",
"ai",
"agent",
"eval",
"show",
eval_id,
"--eval-run-id",
run_id,
"--out-file",
str(details_path),
]
show = _run_command(
[
"azd",
"--no-prompt",
"ai",
"agent",
"eval",
"show",
eval_id,
"--eval-run-id",
run_id,
"--out-file",
str(details_path),
],
show_command,
cwd=workspace,
timeout_seconds=timeout_seconds,
)
show_stdout = show.stdout
show_stderr = show.stderr
if show.returncode != 0:
raise AzdBackendError(_format_command_failure("azd ai agent eval show", show))
debug_paths = _persist_failure_logs(
debug_dir, step="azd_eval_show", completed=show, command=show_command
)
raise AzdBackendError(
_format_command_failure(
"azd ai agent eval show",
show,
command=show_command,
debug_paths=debug_paths,
)
)
if details_path.exists():
show_stdout = "\n".join(
part for part in (show_stdout, details_path.read_text(encoding="utf-8")) if part
Expand Down Expand Up @@ -574,8 +596,71 @@ def _recipe_dataset_path(recipe: EvalRecipe, recipe_path: Path) -> str:
return ""


def _format_command_failure(label: str, completed: subprocess.CompletedProcess[str]) -> str:
stderr = completed.stderr.strip()
stdout = completed.stdout.strip()
detail = stderr or stdout or f"exit code {completed.returncode}"
return f"{label} failed: {detail}"
_AZD_FAILURE_STREAM_LIMIT = 4000


def _format_command_failure(
label: str,
completed: subprocess.CompletedProcess[str],
*,
command: Optional[list[str]] = None,
debug_paths: Optional[Dict[str, Path]] = None,
) -> str:
stderr = (completed.stderr or "").strip()
stdout = (completed.stdout or "").strip()
lines: list[str] = [f"{label} failed (exit code {completed.returncode})."]
if command:
lines.append(f"Command: {' '.join(command)}")
if stderr:
lines.append("--- stderr ---")
lines.append(_truncate_stream(stderr, _AZD_FAILURE_STREAM_LIMIT))
if stdout:
lines.append("--- stdout ---")
lines.append(_truncate_stream(stdout, _AZD_FAILURE_STREAM_LIMIT))
if not stderr and not stdout:
lines.append("(no output captured on either stream)")
if debug_paths:
lines.append("--- debug logs ---")
for name, path in debug_paths.items():
lines.append(f"{name}: {path}")
return "\n".join(lines)


def _truncate_stream(value: str, limit: int) -> str:
if len(value) <= limit:
return value
head = value[: limit // 2]
tail = value[-limit // 2 :]
skipped = len(value) - len(head) - len(tail)
return f"{head}\n... [{skipped} chars truncated] ...\n{tail}"


def _persist_failure_logs(
debug_dir: Optional[Path],
*,
step: str,
completed: subprocess.CompletedProcess[str],
command: list[str],
) -> Dict[str, Path]:
if debug_dir is None:
return {}
try:
debug_dir.mkdir(parents=True, exist_ok=True)
except OSError:
return {}
written: Dict[str, Path] = {}
safe_step = re.sub(r"[^A-Za-z0-9._-]+", "_", step).strip("_") or "azd"
header = f"# command: {' '.join(command)}\n# exit_code: {completed.returncode}\n"
stdout_path = debug_dir / f"{safe_step}_stdout.log"
stderr_path = debug_dir / f"{safe_step}_stderr.log"
try:
stdout_path.write_text(header + (completed.stdout or ""), encoding="utf-8")
written["stdout"] = stdout_path
except OSError:
pass
try:
stderr_path.write_text(header + (completed.stderr or ""), encoding="utf-8")
written["stderr"] = stderr_path
except OSError:
pass
return written
1 change: 1 addition & 0 deletions src/agentops/pipeline/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,7 @@ def _run_evaluation_azd(
workspace=workspace,
progress=progress,
timeout_seconds=max(options.timeout_seconds, azd_runner.AZD_EVAL_TIMEOUT_SECONDS),
debug_dir=options.output_dir,
)
result = azd_runner.normalize_to_results(
azd_run,
Expand Down
Loading