From 5ee5abedefa07d6bc256390fd26f0714a2e136c5 Mon Sep 17 00:00:00 2001 From: Paulo Lacerda Date: Mon, 8 Jun 2026 23:09:09 -0300 Subject: [PATCH 1/2] fix: remove placeholder rubric quickstart gate Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- CHANGELOG.md | 7 ++ docs/tutorial-end-to-end.md | 12 ++-- docs/tutorial-hosted-agent-quickstart.md | 13 ++-- docs/tutorial-prompt-agent-quickstart.md | 64 +++++++----------- src/agentops/agent/checks/catalog.py | 14 ---- src/agentops/agent/checks/observability.py | 33 +--------- src/agentops/agent/cockpit.py | 13 ++-- src/agentops/pipeline/azd_runner.py | 43 ------------ src/agentops/templates/agentops.yaml | 20 +++--- tests/unit/test_agent_checks_observability.py | 10 +-- tests/unit/test_azd_runner.py | 66 +++---------------- tests/unit/test_cockpit.py | 2 +- 12 files changed, 69 insertions(+), 228 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 137f6081..b3672e3a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ This format follows [Keep a Changelog](https://keepachangelog.com/) and adheres ## [Unreleased] +### Fixed +- **Quickstart rubrics no longer block azd eval runs with placeholder evidence.** + The Travel Agent hardening flow now defaults to multi-turn dataset coverage and + treats rubric evaluators as advanced opt-in only after Foundry / azd emits real + metric names, while AgentOps preserves rubric metadata without failing a normal + azd result solely because matching rubric metrics were not emitted. + ## [0.3.12] - 2026-06-09 ### Added diff --git a/docs/tutorial-end-to-end.md b/docs/tutorial-end-to-end.md index 08ebdd23..45b03b9f 100644 --- a/docs/tutorial-end-to-end.md +++ b/docs/tutorial-end-to-end.md @@ -444,12 +444,12 @@ Foundry through `agentops eval run`, so AgentOps can enforce thresholds and writ repo-side evidence. AgentOps keeps the local path for hosted endpoints, models, unsupported evaluator mappings, and fallback cases. -When the quality gate uses a task-specific rubric, choose the azd runner instead -of local execution. Add `rubrics:` to `agentops.yaml`, set -`rubrics[].evaluator` to the Foundry / azd evaluator name, set -`execution: azd`, and run `agentops eval init --force`. AgentOps then passes the -rubric evaluator into the generated azd recipe and fails closed if someone tries -to run that rubric gate with the local backend. +When the quality gate uses a task-specific rubric, keep it as an advanced +Foundry / azd hardening step: first confirm the rubric evaluator exists in the +Foundry project and that an azd run emits stable metric names for its scores. +Then add `rubrics:` and matching thresholds to `agentops.yaml`, set +`execution: azd`, and run `agentops eval init --force`. Do not use placeholder +rubric names in the first tutorial pass. ## 5. Run the first eval diff --git a/docs/tutorial-hosted-agent-quickstart.md b/docs/tutorial-hosted-agent-quickstart.md index 1b38f626..d68b9063 100644 --- a/docs/tutorial-hosted-agent-quickstart.md +++ b/docs/tutorial-hosted-agent-quickstart.md @@ -648,13 +648,12 @@ This is the core AgentOps loop for hosted endpoints: keep a stable dataset, compare a changed runtime against the last known result, fix the agent, and rerun the same gate before a PR or release. -If this hosted endpoint is backed by a Foundry / azd eval recipe, you can use -the same rubric contract as the prompt-agent Travel Agent tutorial before you -generate CI: set `execution: azd`, add `dataset_kind: multi-turn`, declare -`rubrics[].evaluator` in `agentops.yaml`, run `agentops eval init --force`, and -then run `agentops eval run`. AgentOps will require the azd backend whenever -rubrics are configured, so a passing hosted-agent gate means the rubric evaluator -actually ran instead of being recorded as metadata only. +If this hosted endpoint is backed by a Foundry / azd eval recipe, you can use the +same conversation-aware contract as the prompt-agent Travel Agent tutorial +before you generate CI: set `execution: azd`, add `dataset_kind: multi-turn`, run +`agentops eval init --force`, and then run `agentops eval run`. Add a rubric only +after your Foundry project already has a real rubric evaluator and the azd run +emits metric names you can bind to thresholds. ## 10. Generate CI and Doctor evidence diff --git a/docs/tutorial-prompt-agent-quickstart.md b/docs/tutorial-prompt-agent-quickstart.md index 91ee0b6c..e69dc7c7 100644 --- a/docs/tutorial-prompt-agent-quickstart.md +++ b/docs/tutorial-prompt-agent-quickstart.md @@ -803,10 +803,11 @@ You should see `execution: azd` and `Threshold status: PASSED`. The raw azd run details are kept under `.agentops/results/latest/` alongside AgentOps' normalized `results.json` and `report.md`. -Before generating CI, turn the Travel Agent gate from a basic smoke test into -the proof you want reviewers to see later. Keep the recording you already made -through this step: the smoke run above proves the workspace works. The next -commands only harden the same gate. +Before generating CI, turn the Travel Agent gate from a basic smoke test into a +conversation-aware proof. Keep the recording you already made through this step: +the smoke run above proves the workspace works. The next commands only harden +the same gate with multi-turn rows that can later line up with trace replay and +trace-to-dataset evidence. Create a small conversation-shaped dataset. It still keeps `input` and `expected` so AgentOps and azd can route the row, but it also carries the @@ -819,49 +820,29 @@ conversation turns that multi-turn evaluators and trace-derived rows use: '@ | Set-Content -Encoding utf8 .agentops\data\travel-conversations.jsonl ``` -Then update the evaluation contract in `agentops.yaml`. The important part is -that `rubrics[].evaluator` names the rubric evaluator that Foundry / azd will -run. If your Foundry Observe flow generated a different rubric evaluator name, -use that exact name here. +Then update the dataset in `agentops.yaml`: ```yaml dataset: .agentops/data/travel-conversations.jsonl dataset_kind: multi-turn - -rubrics: - - name: travel-concierge-quality - evaluator: travel-concierge-quality - description: Scores the Travel Agent against the intended product behavior. - dimensions: - - name: task_success - description: Completes the user's travel-planning goal across the conversation. - weight: 0.5 - - name: constraint_following - description: Carries user constraints such as kids, budget, duration, and pace. - weight: 0.3 - - name: safe_booking_behavior - description: Avoids claiming live bookings, confirmations, or prices it cannot verify. - weight: 0.2 - -thresholds: - task_success: ">=4" - constraint_following: ">=4" - safe_booking_behavior: ">=4" ``` -Re-run init so the azd recipe includes the rubric evaluator in the actual -evaluation, not only in documentation: +Re-run init so the azd recipe points at the conversation dataset, then run the +gate again: ```powershell agentops eval init --force agentops eval run ``` -If the rubric evaluator name is wrong or missing in Foundry, the run should fail -closed. That is intentional: a green gate must mean the rubric really ran. When -it passes, `results.json` records `execution: azd`, the evaluator list, the -rubric metadata from `agentops.yaml`, and threshold results for the rubric -dimensions. +When it passes, `results.json` records `execution: azd`, the evaluator list, the +multi-turn dataset kind, and the threshold results emitted by azd. + +If your Foundry project already has a real rubric evaluator, add it later as an +advanced hardening step: declare `rubrics:` in `agentops.yaml`, bind thresholds +only to metric names that appear in the azd run output, and regenerate the recipe +with `agentops eval init --force`. Do not use placeholder rubric names in the +quickstart path. ## 11. Generate the PR + dev deploy workflows @@ -907,10 +888,10 @@ The PR workflow now has two jobs: staged candidate. 2. **`eval`** — runs `agentops eval run` against the candidate, then runs Doctor with `--severity-fail critical`. Because the previous step - moved the gate to `execution: azd` with `rubrics:`, the workflow is not - just checking a smoke response: it runs the Foundry / azd evaluation recipe, - applies the Travel Agent rubric dimensions as thresholds, and writes the - normalized rubric evidence to `.agentops/results/latest/results.json`. + moved the gate to a conversation dataset, the workflow is not just checking a + single smoke response: it runs the Foundry / azd evaluation recipe against the + multi-turn Travel Agent rows and writes normalized evidence to + `.agentops/results/latest/results.json`. > **Why does the PR workflow stage in dev, not sandbox?** The PR gate > must evaluate the same target the deploy workflow will use. Sandbox @@ -923,9 +904,8 @@ The PR workflow now has two jobs: The dev deploy workflow stages a candidate (same logic), evaluates it, summarizes the deployment via `prompt_deploy summarize`, and uploads `.agentops/deployments/foundry-agent.json` as a workflow artifact. -The deploy gate uses the same rubric-aware `agentops eval run`, so the candidate -that lands in dev has already passed the conversation/rubric gate reviewers saw -on the PR. +The deploy gate uses the same conversation-aware `agentops eval run`, so the +candidate that lands in dev has already passed the gate reviewers saw on the PR. The `--doctor-gate critical` flag controls the Doctor severity floor in the PR template. The table below summarizes the three values: diff --git a/src/agentops/agent/checks/catalog.py b/src/agentops/agent/checks/catalog.py index 7db2e2ab..332b68f5 100644 --- a/src/agentops/agent/checks/catalog.py +++ b/src/agentops/agent/checks/catalog.py @@ -144,9 +144,6 @@ "observability.multiturn_coverage_missing": ( "https://learn.microsoft.com/azure/foundry/concepts/observability" ), - "observability.rubric_missing": ( - "https://learn.microsoft.com/azure/foundry/concepts/observability" - ), "observability.trace_sampling_missing": ( "https://learn.microsoft.com/azure/foundry/concepts/observability" ), @@ -222,17 +219,6 @@ def is_llm_judged(self) -> bool: severities=(Severity.INFO,), requires=("workspace",), ), - CheckSpec( - id="observability.rubric_missing", - category=Category.QUALITY, - title="No context-specific rubric evaluator is declared", - summary=( - "The workspace does not declare a Foundry rubric evaluator or " - "rubric dimensions that can be bound to release thresholds." - ), - severities=(Severity.INFO,), - requires=("workspace",), - ), # ------------------------------------------------------------------ # Performance # ------------------------------------------------------------------ diff --git a/src/agentops/agent/checks/observability.py b/src/agentops/agent/checks/observability.py index 40ce615a..ec6cc849 100644 --- a/src/agentops/agent/checks/observability.py +++ b/src/agentops/agent/checks/observability.py @@ -16,9 +16,9 @@ def run_observability_check(workspace: Path) -> List[Finding]: """Validate repo-side intent for Foundry observability signals. These checks are deliberately read-only. Foundry owns the runtime surfaces - for traces, intelligent sampling, replay, multi-turn eval, and rubric - evaluators; AgentOps verifies whether the repo has enough metadata and - evidence to make those signals part of release readiness. + for traces, intelligent sampling, replay, multi-turn eval, and optional + rubric evaluators; AgentOps verifies whether the repo has enough metadata + and evidence to make those signals part of release readiness. """ config = _safe_config(workspace) @@ -27,7 +27,6 @@ def run_observability_check(workspace: Path) -> List[Finding]: findings: List[Finding] = [] findings.extend(_check_multiturn_coverage(config, workspace)) - findings.extend(_check_rubric_coverage(config)) findings.extend(_check_trace_sampling(config, workspace)) findings.extend(_check_trace_replay(config, workspace)) return findings @@ -62,32 +61,6 @@ def _check_multiturn_coverage(config: dict[str, Any], workspace: Path) -> List[F ] -def _check_rubric_coverage(config: dict[str, Any]) -> List[Finding]: - rubrics = config.get("rubrics") - if isinstance(rubrics, list) and rubrics: - return [] - return [ - Finding( - id="observability.rubric_missing", - severity=Severity.INFO, - category=Category.QUALITY, - title="No context-specific rubric evaluator is declared", - summary=( - "Foundry rubric evaluators let teams score the agent against " - "task-specific criteria such as task success, tone, safety, cost, " - "and latency. AgentOps did not find a `rubrics:` block in " - "agentops.yaml." - ), - recommendation=( - "Declare at least one rubric in agentops.yaml and bind its " - "dimension metrics to thresholds, or reference the rubric through " - "the azd eval recipe used by `execution: azd`." - ), - source=SOURCE_NAME, - ) - ] - - def _check_trace_sampling(config: dict[str, Any], workspace: Path) -> List[Finding]: observability = config.get("observability") trace_sampling = ( diff --git a/src/agentops/agent/cockpit.py b/src/agentops/agent/cockpit.py index b4cd71a4..260e3bb4 100644 --- a/src/agentops/agent/cockpit.py +++ b/src/agentops/agent/cockpit.py @@ -1979,16 +1979,17 @@ def _build_readiness_checklist( rubric_ready = isinstance(rubrics, list) and bool(rubrics) checks.append( { - "title": "Rubric evaluator gate", + "title": "Optional rubric evaluator gate", "status": "ok" if rubric_ready else "muted", "detail": ( "Detected rubrics: in agentops.yaml. " - "AgentOps requires execution: azd so the Foundry " - "rubric evaluator actually runs." + "Keep thresholds bound only to metric names emitted by your " + "Foundry / azd run." if rubric_ready - else "How to complete: declare a task-specific " - "rubrics: block and bind its dimensions to thresholds. " - "Use execution: azd so Foundry evaluates the rubric." + else "How to complete: optional - add " + "rubrics: only after a real Foundry rubric evaluator " + "exists and azd emits stable metric names you can bind to " + "thresholds." ), } ) diff --git a/src/agentops/pipeline/azd_runner.py b/src/agentops/pipeline/azd_runner.py index cf77a64d..03253473 100644 --- a/src/agentops/pipeline/azd_runner.py +++ b/src/agentops/pipeline/azd_runner.py @@ -202,8 +202,6 @@ def normalize_to_results( "azd eval run returned no numeric metrics, so AgentOps cannot apply " "thresholds or claim the gate passed." ) - _validate_rubric_evidence(config=config, recipe=recipe, metrics=aggregate_metrics) - metric_binding = bind_threshold_metrics(config.thresholds.keys(), aggregate_metrics.keys()) if metric_binding.unmatched: names = ", ".join(metric_binding.unmatched) @@ -293,47 +291,6 @@ def normalize_to_results( }, ) - -def _validate_rubric_evidence( - *, - config: AgentOpsConfig, - recipe: EvalRecipe, - metrics: Dict[str, float], -) -> None: - if not config.rubrics: - return - - recipe_evaluators = {evaluator.name for evaluator in recipe.evaluators} - threshold_names = set(config.thresholds) - metric_names = set(metrics) - missing: list[str] = [] - - for rubric in config.rubrics: - evaluator_name = (rubric.evaluator or rubric.name).strip() - if evaluator_name not in recipe_evaluators: - missing.append(f"rubric evaluator `{evaluator_name}` in eval.yaml") - dimension_names = [dimension.name for dimension in rubric.dimensions] - thresholded_dimensions = [ - name for name in dimension_names if name in threshold_names - ] - if not thresholded_dimensions: - missing.append( - f"threshold for at least one dimension of rubric `{rubric.name}`" - ) - continue - for dimension_name in thresholded_dimensions: - if dimension_name not in metric_names: - missing.append(f"azd metric for rubric dimension `{dimension_name}`") - - if missing: - raise AzdBackendError( - "rubric evidence is incomplete; " - + "; ".join(missing) - + ". Run `agentops eval init --force` after configuring rubrics and " - "bind rubric dimension thresholds in agentops.yaml." - ) - - def write_raw_artifacts(azd_run: AzdEvalRun, output_dir: Path) -> None: """Write native azd payload and command streams for debugging/evidence.""" diff --git a/src/agentops/templates/agentops.yaml b/src/agentops/templates/agentops.yaml index 30493277..fbcf0551 100644 --- a/src/agentops/templates/agentops.yaml +++ b/src/agentops/templates/agentops.yaml @@ -60,24 +60,20 @@ dataset: .agentops/data/smoke.jsonl # groundedness: ">=3" # avg_latency_seconds: "<=30" # -# Optional. Context-specific rubric evaluators. When this block is present, -# AgentOps requires execution: azd so the Foundry / azd evaluator actually runs; -# local execution will fail closed instead of pretending rubric scoring happened. +# Optional advanced hardening. Use only after the Foundry project already has a +# real rubric evaluator and azd emits stable metric names you can bind to +# thresholds. Placeholder rubric names will not create a Foundry evaluator. # # rubrics: -# - name: travel-concierge-quality -# evaluator: travel-concierge-rubric +# - name: my-production-rubric +# evaluator: existing-foundry-rubric-evaluator # dimensions: -# - name: task_success -# description: "Completes the requested task without losing context." +# - name: rubric_metric_from_azd_output +# description: "Metric emitted by the Foundry / azd rubric run." # weight: 0.5 -# - name: safety -# description: "Avoids unsafe or unsupported claims." -# weight: 0.3 # # thresholds: -# task_success: ">=4" -# safety: ">=4" +# rubric_metric_from_azd_output: ">=4" # Optional. Foundry prompt agents and Foundry publishing need a project # endpoint. If both this value and AZURE_AI_FOUNDRY_PROJECT_ENDPOINT are set, diff --git a/tests/unit/test_agent_checks_observability.py b/tests/unit/test_agent_checks_observability.py index aa581e8f..976739a3 100644 --- a/tests/unit/test_agent_checks_observability.py +++ b/tests/unit/test_agent_checks_observability.py @@ -18,7 +18,6 @@ def test_observability_check_flags_missing_build_2026_readiness(tmp_path: Path) ids = {finding.id for finding in findings} assert "observability.multiturn_coverage_missing" in ids - assert "observability.rubric_missing" in ids assert "observability.trace_sampling_missing" in ids assert "observability.trace_replay_missing" in ids @@ -29,11 +28,6 @@ def test_observability_check_accepts_declared_readiness(tmp_path: Path) -> None: "agent: travel-agent:2\n" "dataset: .agentops/data/conversations.jsonl\n" "dataset_kind: multi-turn\n" - "rubrics:\n" - " - name: travel-concierge-quality\n" - " dimensions:\n" - " - name: task_success\n" - " description: Completes the requested travel task.\n" "observability:\n" " trace_sampling:\n" " enabled: true\n" @@ -51,9 +45,7 @@ def test_observability_check_accepts_trace_manifest_lineage(tmp_path: Path) -> N (tmp_path / "agentops.yaml").write_text( "version: 1\n" "agent: travel-agent:2\n" - "dataset: .agentops/data/smoke.jsonl\n" - "rubrics:\n" - " - name: travel-concierge-quality\n", + "dataset: .agentops/data/smoke.jsonl\n", encoding="utf-8", ) manifest = tmp_path / ".agentops" / "data" / "trace-regression-manifest.json" diff --git a/tests/unit/test_azd_runner.py b/tests/unit/test_azd_runner.py index 73a8a612..6cfa5a0c 100644 --- a/tests/unit/test_azd_runner.py +++ b/tests/unit/test_azd_runner.py @@ -349,7 +349,7 @@ def test_normalize_to_results_binds_rubric_dimensions(tmp_path: Path) -> None: assert result.config["rubrics"][0]["evaluator"] == "travel_quality_rubric" -def test_rubric_config_requires_dimension_threshold_evidence(tmp_path: Path) -> None: +def test_rubric_metadata_does_not_block_azd_metrics(tmp_path: Path) -> None: recipe_path = tmp_path / "eval.yaml" recipe_path.write_text( """ @@ -393,65 +393,15 @@ def test_rubric_config_requires_dimension_threshold_evidence(tmp_path: Path) -> duration_seconds=3.0, ) - with pytest.raises(azd_runner.AzdBackendError, match="rubric evidence"): - azd_runner.normalize_to_results( - azd_run, - config=config, - recipe=recipe, - started_at=datetime.now(timezone.utc), - ) - - -def test_rubric_config_requires_recipe_evaluator(tmp_path: Path) -> None: - recipe_path = tmp_path / "eval.yaml" - recipe_path.write_text( - """ -name: rubric-eval -agent: - name: travel-agent - kind: prompt-agent -evaluators: - - builtin.coherence -""".lstrip(), - encoding="utf-8", - ) - recipe = load_eval_recipe(recipe_path) - config = AgentOpsConfig( - version=1, - agent="travel-agent:1", - dataset="ignored.jsonl", - execution="azd", - rubrics=[ - { - "name": "travel_quality", - "evaluator": "travel_quality_rubric", - "dimensions": [ - { - "name": "booking_accuracy", - "description": "Books or recommends options accurately.", - } - ], - } - ], - thresholds={"booking_accuracy": ">=0.8"}, - ) - azd_run = azd_runner.AzdEvalRun( - recipe_path=recipe_path, - payload={"metrics": {"booking_accuracy": 0.91}}, - run_id="run-1", - status="completed", - stdout="{}", - stderr="", - duration_seconds=3.0, + result = azd_runner.normalize_to_results( + azd_run, + config=config, + recipe=recipe, + started_at=datetime.now(timezone.utc), ) - with pytest.raises(azd_runner.AzdBackendError, match="rubric evaluator"): - azd_runner.normalize_to_results( - azd_run, - config=config, - recipe=recipe, - started_at=datetime.now(timezone.utc), - ) + assert result.summary.overall_passed is True + assert result.config["rubrics"][0]["evaluator"] == "travel_quality_rubric" def test_orchestrator_azd_dispatch_never_invokes_local_runtime(tmp_path: Path) -> None: diff --git a/tests/unit/test_cockpit.py b/tests/unit/test_cockpit.py index 46edb242..32c68037 100644 --- a/tests/unit/test_cockpit.py +++ b/tests/unit/test_cockpit.py @@ -459,7 +459,7 @@ def test_readiness_detects_multiturn_rubric_sampling_and_replay(tmp_path: Path): by_title = {check["title"]: check for check in readiness["checks"]} assert by_title["Multi-turn eval coverage"]["status"] == "ok" - assert by_title["Rubric evaluator gate"]["status"] == "ok" + assert by_title["Optional rubric evaluator gate"]["status"] == "ok" assert by_title["Trace sampling for live quality"]["status"] == "ok" assert by_title["Trace replay linked to evidence"]["status"] == "ok" From aa81124c12cae9e558fcf092813114c89c545b5d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 9 Jun 2026 02:16:10 +0000 Subject: [PATCH 2/2] chore: prepare release 0.3.13 --- .claude-plugin/marketplace.json | 2 +- .github/plugin/marketplace.json | 2 +- CHANGELOG.md | 2 ++ plugins/agentops/package.json | 2 +- plugins/agentops/plugin.json | 2 +- 5 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index c58e5fcd..23596e4d 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -13,7 +13,7 @@ "name": "agentops-accelerator", "source": "../../plugins/agentops", "description": "Copilot agent skills for running standardized evaluation workflows with AgentOps Toolkit and Microsoft Foundry agents.", - "version": "0.3.12", + "version": "0.3.13", "keywords": [ "agentops", "evaluation", diff --git a/.github/plugin/marketplace.json b/.github/plugin/marketplace.json index c58e5fcd..23596e4d 100644 --- a/.github/plugin/marketplace.json +++ b/.github/plugin/marketplace.json @@ -13,7 +13,7 @@ "name": "agentops-accelerator", "source": "../../plugins/agentops", "description": "Copilot agent skills for running standardized evaluation workflows with AgentOps Toolkit and Microsoft Foundry agents.", - "version": "0.3.12", + "version": "0.3.13", "keywords": [ "agentops", "evaluation", diff --git a/CHANGELOG.md b/CHANGELOG.md index b3672e3a..225c7db4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ This format follows [Keep a Changelog](https://keepachangelog.com/) and adheres ## [Unreleased] +## [0.3.13] - 2026-06-09 + ### Fixed - **Quickstart rubrics no longer block azd eval runs with placeholder evidence.** The Travel Agent hardening flow now defaults to multi-turn dataset coverage and diff --git a/plugins/agentops/package.json b/plugins/agentops/package.json index d9962578..094946f6 100644 --- a/plugins/agentops/package.json +++ b/plugins/agentops/package.json @@ -2,7 +2,7 @@ "name": "agentops-accelerator", "displayName": "AgentOps Accelerator — Skills for GitHub Copilot", "description": "Copilot agent skills for running standardized evaluation workflows with AgentOps Accelerator and Microsoft Foundry agents.", - "version": "0.3.12", + "version": "0.3.13", "publisher": "AgentOpsAccelerator", "icon": "icon.png", "license": "MIT", diff --git a/plugins/agentops/plugin.json b/plugins/agentops/plugin.json index 95b07344..4cf2b4ae 100644 --- a/plugins/agentops/plugin.json +++ b/plugins/agentops/plugin.json @@ -1,7 +1,7 @@ { "name": "agentops-accelerator", "description": "Copilot agent skills for running standardized evaluation workflows with AgentOps Accelerator and Microsoft Foundry agents.", - "version": "0.3.12", + "version": "0.3.13", "author": { "name": "AgentOps Accelerator", "url": "https://github.com/Azure/agentops"