From 5ee5abedefa07d6bc256390fd26f0714a2e136c5 Mon Sep 17 00:00:00 2001
From: Paulo Lacerda <pclacerda@gmail.com>
Date: Mon, 8 Jun 2026 23:09:09 -0300
Subject: [PATCH 1/2] fix: remove placeholder rubric quickstart gate

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 CHANGELOG.md                                  |  7 ++
 docs/tutorial-end-to-end.md                   | 12 ++--
 docs/tutorial-hosted-agent-quickstart.md      | 13 ++--
 docs/tutorial-prompt-agent-quickstart.md      | 64 +++++++-----------
 src/agentops/agent/checks/catalog.py          | 14 ----
 src/agentops/agent/checks/observability.py    | 33 +---------
 src/agentops/agent/cockpit.py                 | 13 ++--
 src/agentops/pipeline/azd_runner.py           | 43 ------------
 src/agentops/templates/agentops.yaml          | 20 +++---
 tests/unit/test_agent_checks_observability.py | 10 +--
 tests/unit/test_azd_runner.py                 | 66 +++----------------
 tests/unit/test_cockpit.py                    |  2 +-
 12 files changed, 69 insertions(+), 228 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 137f6081..b3672e3a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,13 @@ This format follows [Keep a Changelog](https://keepachangelog.com/) and adheres
 
 ## [Unreleased]
 
+### Fixed
+- **Quickstart rubrics no longer block azd eval runs with placeholder evidence.**
+  The Travel Agent hardening flow now defaults to multi-turn dataset coverage and
+  treats rubric evaluators as advanced opt-in only after Foundry / azd emits real
+  metric names, while AgentOps preserves rubric metadata without failing a normal
+  azd result solely because matching rubric metrics were not emitted.
+
 ## [0.3.12] - 2026-06-09
 
 ### Added
diff --git a/docs/tutorial-end-to-end.md b/docs/tutorial-end-to-end.md
index 08ebdd23..45b03b9f 100644
--- a/docs/tutorial-end-to-end.md
+++ b/docs/tutorial-end-to-end.md
@@ -444,12 +444,12 @@ Foundry through `agentops eval run`, so AgentOps can enforce thresholds and writ
 repo-side evidence. AgentOps keeps the local path for hosted endpoints, models,
 unsupported evaluator mappings, and fallback cases.
 
-When the quality gate uses a task-specific rubric, choose the azd runner instead
-of local execution. Add `rubrics:` to `agentops.yaml`, set
-`rubrics[].evaluator` to the Foundry / azd evaluator name, set
-`execution: azd`, and run `agentops eval init --force`. AgentOps then passes the
-rubric evaluator into the generated azd recipe and fails closed if someone tries
-to run that rubric gate with the local backend.
+When the quality gate uses a task-specific rubric, keep it as an advanced
+Foundry / azd hardening step: first confirm the rubric evaluator exists in the
+Foundry project and that an azd run emits stable metric names for its scores.
+Then add `rubrics:` and matching thresholds to `agentops.yaml`, set
+`execution: azd`, and run `agentops eval init --force`. Do not use placeholder
+rubric names in the first tutorial pass.
 
 ## 5. Run the first eval
 
diff --git a/docs/tutorial-hosted-agent-quickstart.md b/docs/tutorial-hosted-agent-quickstart.md
index 1b38f626..d68b9063 100644
--- a/docs/tutorial-hosted-agent-quickstart.md
+++ b/docs/tutorial-hosted-agent-quickstart.md
@@ -648,13 +648,12 @@ This is the core AgentOps loop for hosted endpoints: keep a stable dataset,
 compare a changed runtime against the last known result, fix the agent, and
 rerun the same gate before a PR or release.
 
-If this hosted endpoint is backed by a Foundry / azd eval recipe, you can use
-the same rubric contract as the prompt-agent Travel Agent tutorial before you
-generate CI: set `execution: azd`, add `dataset_kind: multi-turn`, declare
-`rubrics[].evaluator` in `agentops.yaml`, run `agentops eval init --force`, and
-then run `agentops eval run`. AgentOps will require the azd backend whenever
-rubrics are configured, so a passing hosted-agent gate means the rubric evaluator
-actually ran instead of being recorded as metadata only.
+If this hosted endpoint is backed by a Foundry / azd eval recipe, you can use the
+same conversation-aware contract as the prompt-agent Travel Agent tutorial
+before you generate CI: set `execution: azd`, add `dataset_kind: multi-turn`, run
+`agentops eval init --force`, and then run `agentops eval run`. Add a rubric only
+after your Foundry project already has a real rubric evaluator and the azd run
+emits metric names you can bind to thresholds.
 
 ## 10. Generate CI and Doctor evidence
 
diff --git a/docs/tutorial-prompt-agent-quickstart.md b/docs/tutorial-prompt-agent-quickstart.md
index 91ee0b6c..e69dc7c7 100644
--- a/docs/tutorial-prompt-agent-quickstart.md
+++ b/docs/tutorial-prompt-agent-quickstart.md
@@ -803,10 +803,11 @@ You should see `execution: azd` and `Threshold status: PASSED`. The raw azd run
 details are kept under `.agentops/results/latest/` alongside AgentOps'
 normalized `results.json` and `report.md`.
 
-Before generating CI, turn the Travel Agent gate from a basic smoke test into
-the proof you want reviewers to see later. Keep the recording you already made
-through this step: the smoke run above proves the workspace works. The next
-commands only harden the same gate.
+Before generating CI, turn the Travel Agent gate from a basic smoke test into a
+conversation-aware proof. Keep the recording you already made through this step:
+the smoke run above proves the workspace works. The next commands only harden
+the same gate with multi-turn rows that can later line up with trace replay and
+trace-to-dataset evidence.
 
 Create a small conversation-shaped dataset. It still keeps `input` and
 `expected` so AgentOps and azd can route the row, but it also carries the
@@ -819,49 +820,29 @@ conversation turns that multi-turn evaluators and trace-derived rows use:
 '@ | Set-Content -Encoding utf8 .agentops\data\travel-conversations.jsonl
 ```
 
-Then update the evaluation contract in `agentops.yaml`. The important part is
-that `rubrics[].evaluator` names the rubric evaluator that Foundry / azd will
-run. If your Foundry Observe flow generated a different rubric evaluator name,
-use that exact name here.
+Then update the dataset in `agentops.yaml`:
 
 ```yaml
 dataset: .agentops/data/travel-conversations.jsonl
 dataset_kind: multi-turn
-
-rubrics:
-  - name: travel-concierge-quality
-    evaluator: travel-concierge-quality
-    description: Scores the Travel Agent against the intended product behavior.
-    dimensions:
-      - name: task_success
-        description: Completes the user's travel-planning goal across the conversation.
-        weight: 0.5
-      - name: constraint_following
-        description: Carries user constraints such as kids, budget, duration, and pace.
-        weight: 0.3
-      - name: safe_booking_behavior
-        description: Avoids claiming live bookings, confirmations, or prices it cannot verify.
-        weight: 0.2
-
-thresholds:
-  task_success: ">=4"
-  constraint_following: ">=4"
-  safe_booking_behavior: ">=4"
 ```
 
-Re-run init so the azd recipe includes the rubric evaluator in the actual
-evaluation, not only in documentation:
+Re-run init so the azd recipe points at the conversation dataset, then run the
+gate again:
 
 ```powershell
 agentops eval init --force
 agentops eval run
 ```
 
-If the rubric evaluator name is wrong or missing in Foundry, the run should fail
-closed. That is intentional: a green gate must mean the rubric really ran. When
-it passes, `results.json` records `execution: azd`, the evaluator list, the
-rubric metadata from `agentops.yaml`, and threshold results for the rubric
-dimensions.
+When it passes, `results.json` records `execution: azd`, the evaluator list, the
+multi-turn dataset kind, and the threshold results emitted by azd.
+
+If your Foundry project already has a real rubric evaluator, add it later as an
+advanced hardening step: declare `rubrics:` in `agentops.yaml`, bind thresholds
+only to metric names that appear in the azd run output, and regenerate the recipe
+with `agentops eval init --force`. Do not use placeholder rubric names in the
+quickstart path.
 
 ## 11. Generate the PR + dev deploy workflows
 
@@ -907,10 +888,10 @@ The PR workflow now has two jobs:
    staged candidate.
 2. **`eval`** — runs `agentops eval run` against the candidate, then
    runs Doctor with `--severity-fail critical`. Because the previous step
-   moved the gate to `execution: azd` with `rubrics:`, the workflow is not
-   just checking a smoke response: it runs the Foundry / azd evaluation recipe,
-   applies the Travel Agent rubric dimensions as thresholds, and writes the
-   normalized rubric evidence to `.agentops/results/latest/results.json`.
+   moved the gate to a conversation dataset, the workflow is not just checking a
+   single smoke response: it runs the Foundry / azd evaluation recipe against the
+   multi-turn Travel Agent rows and writes normalized evidence to
+   `.agentops/results/latest/results.json`.
 
 > **Why does the PR workflow stage in dev, not sandbox?** The PR gate
 > must evaluate the same target the deploy workflow will use. Sandbox
@@ -923,9 +904,8 @@ The PR workflow now has two jobs:
 The dev deploy workflow stages a candidate (same logic), evaluates it,
 summarizes the deployment via `prompt_deploy summarize`, and uploads
 `.agentops/deployments/foundry-agent.json` as a workflow artifact.
-The deploy gate uses the same rubric-aware `agentops eval run`, so the candidate
-that lands in dev has already passed the conversation/rubric gate reviewers saw
-on the PR.
+The deploy gate uses the same conversation-aware `agentops eval run`, so the
+candidate that lands in dev has already passed the gate reviewers saw on the PR.
 
 The `--doctor-gate critical` flag controls the Doctor severity floor in
 the PR template. The table below summarizes the three values:
diff --git a/src/agentops/agent/checks/catalog.py b/src/agentops/agent/checks/catalog.py
index 7db2e2ab..332b68f5 100644
--- a/src/agentops/agent/checks/catalog.py
+++ b/src/agentops/agent/checks/catalog.py
@@ -144,9 +144,6 @@
     "observability.multiturn_coverage_missing": (
         "https://learn.microsoft.com/azure/foundry/concepts/observability"
     ),
-    "observability.rubric_missing": (
-        "https://learn.microsoft.com/azure/foundry/concepts/observability"
-    ),
     "observability.trace_sampling_missing": (
         "https://learn.microsoft.com/azure/foundry/concepts/observability"
     ),
@@ -222,17 +219,6 @@ def is_llm_judged(self) -> bool:
         severities=(Severity.INFO,),
         requires=("workspace",),
     ),
-    CheckSpec(
-        id="observability.rubric_missing",
-        category=Category.QUALITY,
-        title="No context-specific rubric evaluator is declared",
-        summary=(
-            "The workspace does not declare a Foundry rubric evaluator or "
-            "rubric dimensions that can be bound to release thresholds."
-        ),
-        severities=(Severity.INFO,),
-        requires=("workspace",),
-    ),
     # ------------------------------------------------------------------
     # Performance
     # ------------------------------------------------------------------
diff --git a/src/agentops/agent/checks/observability.py b/src/agentops/agent/checks/observability.py
index 40ce615a..ec6cc849 100644
--- a/src/agentops/agent/checks/observability.py
+++ b/src/agentops/agent/checks/observability.py
@@ -16,9 +16,9 @@ def run_observability_check(workspace: Path) -> List[Finding]:
     """Validate repo-side intent for Foundry observability signals.
 
     These checks are deliberately read-only. Foundry owns the runtime surfaces
-    for traces, intelligent sampling, replay, multi-turn eval, and rubric
-    evaluators; AgentOps verifies whether the repo has enough metadata and
-    evidence to make those signals part of release readiness.
+    for traces, intelligent sampling, replay, multi-turn eval, and optional
+    rubric evaluators; AgentOps verifies whether the repo has enough metadata
+    and evidence to make those signals part of release readiness.
     """
 
     config = _safe_config(workspace)
@@ -27,7 +27,6 @@ def run_observability_check(workspace: Path) -> List[Finding]:
 
     findings: List[Finding] = []
     findings.extend(_check_multiturn_coverage(config, workspace))
-    findings.extend(_check_rubric_coverage(config))
     findings.extend(_check_trace_sampling(config, workspace))
     findings.extend(_check_trace_replay(config, workspace))
     return findings
@@ -62,32 +61,6 @@ def _check_multiturn_coverage(config: dict[str, Any], workspace: Path) -> List[F
     ]
 
 
-def _check_rubric_coverage(config: dict[str, Any]) -> List[Finding]:
-    rubrics = config.get("rubrics")
-    if isinstance(rubrics, list) and rubrics:
-        return []
-    return [
-        Finding(
-            id="observability.rubric_missing",
-            severity=Severity.INFO,
-            category=Category.QUALITY,
-            title="No context-specific rubric evaluator is declared",
-            summary=(
-                "Foundry rubric evaluators let teams score the agent against "
-                "task-specific criteria such as task success, tone, safety, cost, "
-                "and latency. AgentOps did not find a `rubrics:` block in "
-                "agentops.yaml."
-            ),
-            recommendation=(
-                "Declare at least one rubric in agentops.yaml and bind its "
-                "dimension metrics to thresholds, or reference the rubric through "
-                "the azd eval recipe used by `execution: azd`."
-            ),
-            source=SOURCE_NAME,
-        )
-    ]
-
-
 def _check_trace_sampling(config: dict[str, Any], workspace: Path) -> List[Finding]:
     observability = config.get("observability")
     trace_sampling = (
diff --git a/src/agentops/agent/cockpit.py b/src/agentops/agent/cockpit.py
index b4cd71a4..260e3bb4 100644
--- a/src/agentops/agent/cockpit.py
+++ b/src/agentops/agent/cockpit.py
@@ -1979,16 +1979,17 @@ def _build_readiness_checklist(
     rubric_ready = isinstance(rubrics, list) and bool(rubrics)
     checks.append(
         {
-            "title": "Rubric evaluator gate",
+            "title": "Optional rubric evaluator gate",
             "status": "ok" if rubric_ready else "muted",
             "detail": (
                 "Detected <code>rubrics:</code> in <code>agentops.yaml</code>. "
-                "AgentOps requires <code>execution: azd</code> so the Foundry "
-                "rubric evaluator actually runs."
+                "Keep thresholds bound only to metric names emitted by your "
+                "Foundry / azd run."
                 if rubric_ready
-                else "<strong>How to complete:</strong> declare a task-specific "
-                "<code>rubrics:</code> block and bind its dimensions to thresholds. "
-                "Use <code>execution: azd</code> so Foundry evaluates the rubric."
+                else "<strong>How to complete:</strong> optional - add "
+                "<code>rubrics:</code> only after a real Foundry rubric evaluator "
+                "exists and azd emits stable metric names you can bind to "
+                "thresholds."
             ),
         }
     )
diff --git a/src/agentops/pipeline/azd_runner.py b/src/agentops/pipeline/azd_runner.py
index cf77a64d..03253473 100644
--- a/src/agentops/pipeline/azd_runner.py
+++ b/src/agentops/pipeline/azd_runner.py
@@ -202,8 +202,6 @@ def normalize_to_results(
             "azd eval run returned no numeric metrics, so AgentOps cannot apply "
             "thresholds or claim the gate passed."
         )
-    _validate_rubric_evidence(config=config, recipe=recipe, metrics=aggregate_metrics)
-
     metric_binding = bind_threshold_metrics(config.thresholds.keys(), aggregate_metrics.keys())
     if metric_binding.unmatched:
         names = ", ".join(metric_binding.unmatched)
@@ -293,47 +291,6 @@ def normalize_to_results(
         },
     )
 
-
-def _validate_rubric_evidence(
-    *,
-    config: AgentOpsConfig,
-    recipe: EvalRecipe,
-    metrics: Dict[str, float],
-) -> None:
-    if not config.rubrics:
-        return
-
-    recipe_evaluators = {evaluator.name for evaluator in recipe.evaluators}
-    threshold_names = set(config.thresholds)
-    metric_names = set(metrics)
-    missing: list[str] = []
-
-    for rubric in config.rubrics:
-        evaluator_name = (rubric.evaluator or rubric.name).strip()
-        if evaluator_name not in recipe_evaluators:
-            missing.append(f"rubric evaluator `{evaluator_name}` in eval.yaml")
-        dimension_names = [dimension.name for dimension in rubric.dimensions]
-        thresholded_dimensions = [
-            name for name in dimension_names if name in threshold_names
-        ]
-        if not thresholded_dimensions:
-            missing.append(
-                f"threshold for at least one dimension of rubric `{rubric.name}`"
-            )
-            continue
-        for dimension_name in thresholded_dimensions:
-            if dimension_name not in metric_names:
-                missing.append(f"azd metric for rubric dimension `{dimension_name}`")
-
-    if missing:
-        raise AzdBackendError(
-            "rubric evidence is incomplete; "
-            + "; ".join(missing)
-            + ". Run `agentops eval init --force` after configuring rubrics and "
-            "bind rubric dimension thresholds in agentops.yaml."
-        )
-
-
 def write_raw_artifacts(azd_run: AzdEvalRun, output_dir: Path) -> None:
     """Write native azd payload and command streams for debugging/evidence."""
 
diff --git a/src/agentops/templates/agentops.yaml b/src/agentops/templates/agentops.yaml
index 30493277..fbcf0551 100644
--- a/src/agentops/templates/agentops.yaml
+++ b/src/agentops/templates/agentops.yaml
@@ -60,24 +60,20 @@ dataset: .agentops/data/smoke.jsonl
 #   groundedness: ">=3"
 #   avg_latency_seconds: "<=30"
 #
-# Optional. Context-specific rubric evaluators. When this block is present,
-# AgentOps requires execution: azd so the Foundry / azd evaluator actually runs;
-# local execution will fail closed instead of pretending rubric scoring happened.
+# Optional advanced hardening. Use only after the Foundry project already has a
+# real rubric evaluator and azd emits stable metric names you can bind to
+# thresholds. Placeholder rubric names will not create a Foundry evaluator.
 #
 # rubrics:
-#   - name: travel-concierge-quality
-#     evaluator: travel-concierge-rubric
+#   - name: my-production-rubric
+#     evaluator: existing-foundry-rubric-evaluator
 #     dimensions:
-#       - name: task_success
-#         description: "Completes the requested task without losing context."
+#       - name: rubric_metric_from_azd_output
+#         description: "Metric emitted by the Foundry / azd rubric run."
 #         weight: 0.5
-#       - name: safety
-#         description: "Avoids unsafe or unsupported claims."
-#         weight: 0.3
 #
 # thresholds:
-#   task_success: ">=4"
-#   safety: ">=4"
+#   rubric_metric_from_azd_output: ">=4"
 
 # Optional. Foundry prompt agents and Foundry publishing need a project
 # endpoint. If both this value and AZURE_AI_FOUNDRY_PROJECT_ENDPOINT are set,
diff --git a/tests/unit/test_agent_checks_observability.py b/tests/unit/test_agent_checks_observability.py
index aa581e8f..976739a3 100644
--- a/tests/unit/test_agent_checks_observability.py
+++ b/tests/unit/test_agent_checks_observability.py
@@ -18,7 +18,6 @@ def test_observability_check_flags_missing_build_2026_readiness(tmp_path: Path)
     ids = {finding.id for finding in findings}
 
     assert "observability.multiturn_coverage_missing" in ids
-    assert "observability.rubric_missing" in ids
     assert "observability.trace_sampling_missing" in ids
     assert "observability.trace_replay_missing" in ids
 
@@ -29,11 +28,6 @@ def test_observability_check_accepts_declared_readiness(tmp_path: Path) -> None:
         "agent: travel-agent:2\n"
         "dataset: .agentops/data/conversations.jsonl\n"
         "dataset_kind: multi-turn\n"
-        "rubrics:\n"
-        "  - name: travel-concierge-quality\n"
-        "    dimensions:\n"
-        "      - name: task_success\n"
-        "        description: Completes the requested travel task.\n"
         "observability:\n"
         "  trace_sampling:\n"
         "    enabled: true\n"
@@ -51,9 +45,7 @@ def test_observability_check_accepts_trace_manifest_lineage(tmp_path: Path) -> N
     (tmp_path / "agentops.yaml").write_text(
         "version: 1\n"
         "agent: travel-agent:2\n"
-        "dataset: .agentops/data/smoke.jsonl\n"
-        "rubrics:\n"
-        "  - name: travel-concierge-quality\n",
+        "dataset: .agentops/data/smoke.jsonl\n",
         encoding="utf-8",
     )
     manifest = tmp_path / ".agentops" / "data" / "trace-regression-manifest.json"
diff --git a/tests/unit/test_azd_runner.py b/tests/unit/test_azd_runner.py
index 73a8a612..6cfa5a0c 100644
--- a/tests/unit/test_azd_runner.py
+++ b/tests/unit/test_azd_runner.py
@@ -349,7 +349,7 @@ def test_normalize_to_results_binds_rubric_dimensions(tmp_path: Path) -> None:
     assert result.config["rubrics"][0]["evaluator"] == "travel_quality_rubric"
 
 
-def test_rubric_config_requires_dimension_threshold_evidence(tmp_path: Path) -> None:
+def test_rubric_metadata_does_not_block_azd_metrics(tmp_path: Path) -> None:
     recipe_path = tmp_path / "eval.yaml"
     recipe_path.write_text(
         """
@@ -393,65 +393,15 @@ def test_rubric_config_requires_dimension_threshold_evidence(tmp_path: Path) ->
         duration_seconds=3.0,
     )
 
-    with pytest.raises(azd_runner.AzdBackendError, match="rubric evidence"):
-        azd_runner.normalize_to_results(
-            azd_run,
-            config=config,
-            recipe=recipe,
-            started_at=datetime.now(timezone.utc),
-        )
-
-
-def test_rubric_config_requires_recipe_evaluator(tmp_path: Path) -> None:
-    recipe_path = tmp_path / "eval.yaml"
-    recipe_path.write_text(
-        """
-name: rubric-eval
-agent:
-  name: travel-agent
-  kind: prompt-agent
-evaluators:
-  - builtin.coherence
-""".lstrip(),
-        encoding="utf-8",
-    )
-    recipe = load_eval_recipe(recipe_path)
-    config = AgentOpsConfig(
-        version=1,
-        agent="travel-agent:1",
-        dataset="ignored.jsonl",
-        execution="azd",
-        rubrics=[
-            {
-                "name": "travel_quality",
-                "evaluator": "travel_quality_rubric",
-                "dimensions": [
-                    {
-                        "name": "booking_accuracy",
-                        "description": "Books or recommends options accurately.",
-                    }
-                ],
-            }
-        ],
-        thresholds={"booking_accuracy": ">=0.8"},
-    )
-    azd_run = azd_runner.AzdEvalRun(
-        recipe_path=recipe_path,
-        payload={"metrics": {"booking_accuracy": 0.91}},
-        run_id="run-1",
-        status="completed",
-        stdout="{}",
-        stderr="",
-        duration_seconds=3.0,
+    result = azd_runner.normalize_to_results(
+        azd_run,
+        config=config,
+        recipe=recipe,
+        started_at=datetime.now(timezone.utc),
     )
 
-    with pytest.raises(azd_runner.AzdBackendError, match="rubric evaluator"):
-        azd_runner.normalize_to_results(
-            azd_run,
-            config=config,
-            recipe=recipe,
-            started_at=datetime.now(timezone.utc),
-        )
+    assert result.summary.overall_passed is True
+    assert result.config["rubrics"][0]["evaluator"] == "travel_quality_rubric"
 
 
 def test_orchestrator_azd_dispatch_never_invokes_local_runtime(tmp_path: Path) -> None:
diff --git a/tests/unit/test_cockpit.py b/tests/unit/test_cockpit.py
index 46edb242..32c68037 100644
--- a/tests/unit/test_cockpit.py
+++ b/tests/unit/test_cockpit.py
@@ -459,7 +459,7 @@ def test_readiness_detects_multiturn_rubric_sampling_and_replay(tmp_path: Path):
     by_title = {check["title"]: check for check in readiness["checks"]}
 
     assert by_title["Multi-turn eval coverage"]["status"] == "ok"
-    assert by_title["Rubric evaluator gate"]["status"] == "ok"
+    assert by_title["Optional rubric evaluator gate"]["status"] == "ok"
     assert by_title["Trace sampling for live quality"]["status"] == "ok"
     assert by_title["Trace replay linked to evidence"]["status"] == "ok"
 

From aa81124c12cae9e558fcf092813114c89c545b5d Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 9 Jun 2026 02:16:10 +0000
Subject: [PATCH 2/2] chore: prepare release 0.3.13

---
 .claude-plugin/marketplace.json | 2 +-
 .github/plugin/marketplace.json | 2 +-
 CHANGELOG.md                    | 2 ++
 plugins/agentops/package.json   | 2 +-
 plugins/agentops/plugin.json    | 2 +-
 5 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
index c58e5fcd..23596e4d 100644
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -13,7 +13,7 @@
       "name": "agentops-accelerator",
       "source": "../../plugins/agentops",
       "description": "Copilot agent skills for running standardized evaluation workflows with AgentOps Toolkit and Microsoft Foundry agents.",
-      "version": "0.3.12",
+      "version": "0.3.13",
       "keywords": [
         "agentops",
         "evaluation",
diff --git a/.github/plugin/marketplace.json b/.github/plugin/marketplace.json
index c58e5fcd..23596e4d 100644
--- a/.github/plugin/marketplace.json
+++ b/.github/plugin/marketplace.json
@@ -13,7 +13,7 @@
       "name": "agentops-accelerator",
       "source": "../../plugins/agentops",
       "description": "Copilot agent skills for running standardized evaluation workflows with AgentOps Toolkit and Microsoft Foundry agents.",
-      "version": "0.3.12",
+      "version": "0.3.13",
       "keywords": [
         "agentops",
         "evaluation",
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b3672e3a..225c7db4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,8 @@ This format follows [Keep a Changelog](https://keepachangelog.com/) and adheres
 
 ## [Unreleased]
 
+## [0.3.13] - 2026-06-09
+
 ### Fixed
 - **Quickstart rubrics no longer block azd eval runs with placeholder evidence.**
   The Travel Agent hardening flow now defaults to multi-turn dataset coverage and
diff --git a/plugins/agentops/package.json b/plugins/agentops/package.json
index d9962578..094946f6 100644
--- a/plugins/agentops/package.json
+++ b/plugins/agentops/package.json
@@ -2,7 +2,7 @@
   "name": "agentops-accelerator",
   "displayName": "AgentOps Accelerator — Skills for GitHub Copilot",
   "description": "Copilot agent skills for running standardized evaluation workflows with AgentOps Accelerator and Microsoft Foundry agents.",
-  "version": "0.3.12",
+  "version": "0.3.13",
   "publisher": "AgentOpsAccelerator",
   "icon": "icon.png",
   "license": "MIT",
diff --git a/plugins/agentops/plugin.json b/plugins/agentops/plugin.json
index 95b07344..4cf2b4ae 100644
--- a/plugins/agentops/plugin.json
+++ b/plugins/agentops/plugin.json
@@ -1,7 +1,7 @@
 {
   "name": "agentops-accelerator",
   "description": "Copilot agent skills for running standardized evaluation workflows with AgentOps Accelerator and Microsoft Foundry agents.",
-  "version": "0.3.12",
+  "version": "0.3.13",
   "author": {
     "name": "AgentOps Accelerator",
     "url": "https://github.com/Azure/agentops"