From feedaaed7a90de6772673b7018225dfb3ac5a52c Mon Sep 17 00:00:00 2001 From: Paulo Lacerda Date: Mon, 15 Jun 2026 14:15:30 -0300 Subject: [PATCH] feat(prompt-deploy): tag PR candidates as agentops:candidate=true When `agentops.pipeline.prompt_deploy stage` runs in a PR-stage context (GitHub Actions `pull_request` event or Azure DevOps `BUILD_REASON=PullRequest`), the version it creates in the dev Foundry project now carries metadata `agentops:candidate=true`, `agentops:pr=`, and `agentops:created_at=`. This solves the auditability and naive-consumer concerns raised in issue #214 at the source: portal viewers can filter the Versions tab on `agentops:candidate` to separate abandoned PR candidates from deployed-of-record versions, and downstream consumers that resolve "latest" can refuse to pick up candidates. Deployed-of-record runs (push to main/develop/release/** or workflow_dispatch) are not tagged, so absence of `agentops:candidate` is the deployed-of-record signal. The PR/deploy workflow templates (GitHub Actions and Azure DevOps) and the prompt-agent quickstart tutorial are updated to describe the new contract. A scheduled cleanup workflow can land later as a follow-up filtering on the same tag if numeric accumulation in Foundry becomes painful. Closes #214 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- CHANGELOG.md | 16 ++ docs/tutorial-prompt-agent-quickstart.md | 17 +- src/agentops/pipeline/prompt_deploy.py | 50 +++++ .../azuredevops/agentops-pr-prompt-agent.yml | 7 +- .../workflows/agentops-pr-prompt-agent.yml | 7 +- tests/unit/test_prompt_deploy.py | 205 ++++++++++++++++++ 6 files changed, 291 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a604970..067f179e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,22 @@ This format follows [Keep a Changelog](https://keepachangelog.com/) and adheres ## [Unreleased] +### Changed +- **PR-stage Foundry prompt-agent versions are now tagged at the source.** When + `agentops.pipeline.prompt_deploy stage` runs in a PR context (GitHub Actions + `pull_request` event or Azure DevOps `BUILD_REASON=PullRequest`), the version + it creates in the dev Foundry project carries metadata + `agentops:candidate=true`, `agentops:pr=`, and + `agentops:created_at=`. Portal viewers can filter the + Versions tab on `agentops:candidate` to separate abandoned PR candidates + from deployed-of-record versions, and downstream consumers that resolve + "latest" can refuse to pick up candidates. Deployed-of-record versions + (push to `main`/`develop`/`release/**` or `workflow_dispatch`) are not + tagged, so absence of `agentops:candidate` is the deployed-of-record + signal. The PR/deploy workflow templates and the prompt-agent quickstart + tutorial are updated to describe the new contract. + ([#214](https://github.com/Azure/agentops/issues/214)) + ## [0.4.0] - 2026-06-14 ### Added diff --git a/docs/tutorial-prompt-agent-quickstart.md b/docs/tutorial-prompt-agent-quickstart.md index 96ba6c95..8b503364 100644 --- a/docs/tutorial-prompt-agent-quickstart.md +++ b/docs/tutorial-prompt-agent-quickstart.md @@ -1370,11 +1370,18 @@ The PR workflow now has two jobs: > **Why does the PR workflow stage in dev, not sandbox?** The PR gate > must evaluate the same target the deploy workflow will use. Sandbox -> is the author's playground and never receives CI traffic. PR -> candidates accumulate in dev over time and may need periodic -> cleanup according to your team's Foundry retention policy; AgentOps -> uses prompt SHAs and git SHAs as the durable identity, not old -> candidate version numbers. +> is the author's playground and never receives CI traffic. +> +> Candidate versions created by PR runs are tagged in Foundry with +> `agentops:candidate=true` plus `agentops:pr=` and +> `agentops:created_at=`. Portal viewers can filter the +> Versions tab on `agentops:candidate` to separate "abandoned PR +> candidates" from "deployed versions of record". Downstream consumers +> that resolve `` to "latest" should skip versions carrying +> `agentops:candidate=true`; the supported pinning mechanism remains +> `foundry-agent.json`, which always points at the deployed-of-record +> version. AgentOps uses prompt SHAs and git SHAs as the durable +> identity, not old candidate version numbers. The dev deploy workflow stages a candidate (same logic), evaluates it, summarizes the deployment via `prompt_deploy summarize`, and uploads diff --git a/src/agentops/pipeline/prompt_deploy.py b/src/agentops/pipeline/prompt_deploy.py index 331c21c4..b9794b2f 100644 --- a/src/agentops/pipeline/prompt_deploy.py +++ b/src/agentops/pipeline/prompt_deploy.py @@ -424,9 +424,59 @@ def _deployment_metadata(*, environment: str, prompt_hash: str) -> Dict[str, str workflow_url = _workflow_url() if workflow_url: metadata["agentops.workflow_url"] = workflow_url[:512] + + # When the staging step is invoked from a PR-stage workflow, mark the + # version as a candidate so portal viewers can filter it out and naive + # consumers that resolve "latest" can refuse to pick it up. See issue + # #214 for the full rationale. + pr_number = _detect_pr_stage() + if pr_number is not None: + metadata["agentops:candidate"] = "true" + if pr_number: + metadata["agentops:pr"] = pr_number[:512] + metadata["agentops:created_at"] = datetime.now(timezone.utc).isoformat() + return {key: value for key, value in metadata.items() if value} +def _detect_pr_stage() -> Optional[str]: + """Return the PR number string when running in a PR-stage context. + + Returns: + - A PR number (e.g. ``"42"``) when both the PR context and number are + identifiable. + - An empty string when the PR context is detected but the number cannot + be parsed (the version is still flagged as a candidate). + - ``None`` when no PR context is detected (deployed-of-record path). + + Detection covers the two CI platforms AgentOps generates workflows for: + GitHub Actions (``GITHUB_EVENT_NAME == 'pull_request'``) and Azure + DevOps (``BUILD_REASON == 'PullRequest'``). + """ + + if os.environ.get("GITHUB_EVENT_NAME") == "pull_request": + ref = os.environ.get("GITHUB_REF", "") + # ``refs/pull//merge`` or ``refs/pull//head``. + if ref.startswith("refs/pull/"): + parts = ref.split("/") + if len(parts) >= 3 and parts[2].isdigit(): + return parts[2] + ref_name = os.environ.get("GITHUB_REF_NAME", "") + # GITHUB_REF_NAME for PRs is shaped like ``/merge``. + head = ref_name.split("/", 1)[0] if ref_name else "" + if head.isdigit(): + return head + return "" + + if os.environ.get("BUILD_REASON") == "PullRequest": + return ( + os.environ.get("SYSTEM_PULLREQUEST_PULLREQUESTNUMBER") + or "" + ) + + return None + + def _git_sha() -> str: return ( os.environ.get("GITHUB_SHA") diff --git a/src/agentops/templates/pipelines/azuredevops/agentops-pr-prompt-agent.yml b/src/agentops/templates/pipelines/azuredevops/agentops-pr-prompt-agent.yml index 02dfdf06..aeb3a986 100644 --- a/src/agentops/templates/pipelines/azuredevops/agentops-pr-prompt-agent.yml +++ b/src/agentops/templates/pipelines/azuredevops/agentops-pr-prompt-agent.yml @@ -12,9 +12,10 @@ # Notes: # - Each PR run creates or reuses a candidate version in the dev # Foundry project. AgentOps deduplicates only when the prompt is -# byte-identical to the current seed version's instructions; PR -# candidates can therefore accumulate over time and may need to be -# cleaned up out-of-band. +# byte-identical to the current seed version's instructions. +# Candidate versions are tagged with `agentops:candidate=true` and +# `agentops:pr=` so portal viewers can filter them out and +# consumers that resolve "latest" can refuse to pick them up. # - Merge is what promotes the candidate via the deploy pipeline. # This PR pipeline does not record the candidate as deployed. # diff --git a/src/agentops/templates/workflows/agentops-pr-prompt-agent.yml b/src/agentops/templates/workflows/agentops-pr-prompt-agent.yml index 81e069ab..aeed0773 100644 --- a/src/agentops/templates/workflows/agentops-pr-prompt-agent.yml +++ b/src/agentops/templates/workflows/agentops-pr-prompt-agent.yml @@ -12,9 +12,10 @@ # Notes: # - Each PR run creates or reuses a candidate version in the dev # Foundry project. AgentOps deduplicates only when the prompt is -# byte-identical to the current seed version's instructions; PR -# candidates can therefore accumulate over time and may need to be -# cleaned up out-of-band. +# byte-identical to the current seed version's instructions. +# Candidate versions are tagged with `agentops:candidate=true` and +# `agentops:pr=` so portal viewers can filter them out and +# consumers that resolve "latest" can refuse to pick them up. # - Merge is what promotes the candidate via the deploy workflow. # This PR workflow does not record the candidate as deployed. # diff --git a/tests/unit/test_prompt_deploy.py b/tests/unit/test_prompt_deploy.py index 92f02561..701514c9 100644 --- a/tests/unit/test_prompt_deploy.py +++ b/tests/unit/test_prompt_deploy.py @@ -454,3 +454,208 @@ class _FakeClient: assert body["definition"]["kind"] == "prompt" assert body["metadata"] == {"agentops.env": "dev"} assert body["description"] == "desc" + + +# --------------------------------------------------------------------------- +# Candidate tagging (issue #214) +# +# When the staging step runs from a PR-stage workflow, the version it creates +# in Foundry must carry `agentops:candidate=true` so portal viewers can filter +# it out and downstream consumers can refuse to resolve "latest" to it. +# Deployed-of-record runs (push / workflow_dispatch / Azure DevOps non-PR) +# must NOT carry that tag. +# --------------------------------------------------------------------------- + + +def _clear_ci_env(monkeypatch) -> None: + """Drop CI env vars that ``_detect_pr_stage`` reads, so tests start clean.""" + + for name in ( + "GITHUB_EVENT_NAME", + "GITHUB_REF", + "GITHUB_REF_NAME", + "BUILD_REASON", + "SYSTEM_PULLREQUEST_PULLREQUESTNUMBER", + ): + monkeypatch.delenv(name, raising=False) + + +def test_detect_pr_stage_returns_pr_number_in_github_pull_request(monkeypatch) -> None: + _clear_ci_env(monkeypatch) + monkeypatch.setenv("GITHUB_EVENT_NAME", "pull_request") + monkeypatch.setenv("GITHUB_REF", "refs/pull/214/merge") + + assert prompt_deploy._detect_pr_stage() == "214" + + +def test_detect_pr_stage_falls_back_to_ref_name(monkeypatch) -> None: + _clear_ci_env(monkeypatch) + monkeypatch.setenv("GITHUB_EVENT_NAME", "pull_request") + monkeypatch.setenv("GITHUB_REF", "") + monkeypatch.setenv("GITHUB_REF_NAME", "42/merge") + + assert prompt_deploy._detect_pr_stage() == "42" + + +def test_detect_pr_stage_returns_empty_string_when_pr_number_unknown(monkeypatch) -> None: + _clear_ci_env(monkeypatch) + monkeypatch.setenv("GITHUB_EVENT_NAME", "pull_request") + monkeypatch.setenv("GITHUB_REF", "refs/heads/feature") + + # PR context but no parseable number: still flag as candidate. + assert prompt_deploy._detect_pr_stage() == "" + + +def test_detect_pr_stage_returns_none_for_push_or_dispatch(monkeypatch) -> None: + _clear_ci_env(monkeypatch) + monkeypatch.setenv("GITHUB_EVENT_NAME", "push") + monkeypatch.setenv("GITHUB_REF", "refs/heads/main") + + assert prompt_deploy._detect_pr_stage() is None + + +def test_detect_pr_stage_returns_pr_number_in_azure_devops(monkeypatch) -> None: + _clear_ci_env(monkeypatch) + monkeypatch.setenv("BUILD_REASON", "PullRequest") + monkeypatch.setenv("SYSTEM_PULLREQUEST_PULLREQUESTNUMBER", "77") + + assert prompt_deploy._detect_pr_stage() == "77" + + +def test_detect_pr_stage_returns_none_when_no_ci_env(monkeypatch) -> None: + _clear_ci_env(monkeypatch) + + assert prompt_deploy._detect_pr_stage() is None + + +def test_stage_prompt_agent_candidate_tags_pr_versions( + tmp_path: Path, + monkeypatch, +) -> None: + """In a GitHub PR context, the version created in Foundry must carry + ``agentops:candidate=true`` and ``agentops:pr=`` so portal + viewers and downstream resolvers can recognize it as ephemeral. + """ + + _clear_ci_env(monkeypatch) + monkeypatch.setenv("GITHUB_EVENT_NAME", "pull_request") + monkeypatch.setenv("GITHUB_REF", "refs/pull/214/merge") + + config = tmp_path / "agentops.yaml" + dataset = tmp_path / "data.jsonl" + prompt = tmp_path / "prompt.md" + dataset.write_text('{"input":"hi","expected":"hello"}\n', encoding="utf-8") + prompt.write_text("new instructions\n", encoding="utf-8") + config.write_text( + "\n".join( + [ + "version: 1", + "agent: travel-agent:3", + "dataset: data.jsonl", + "prompt_file: prompt.md", + "project_endpoint: https://example.services.ai.azure.com/api/projects/p", + ] + ), + encoding="utf-8", + ) + + current = SimpleNamespace( + id="agent-version-3", + version="3", + definition={"kind": "prompt", "model": "gpt-4o-mini", "instructions": "old"}, + metadata={}, + ) + captured: dict = {} + + monkeypatch.setattr( + prompt_deploy, + "_get_agent_version", + lambda endpoint, name, version: current, + ) + + def fake_create(endpoint, name, definition, *, metadata, description): + captured["metadata"] = metadata + return SimpleNamespace(id="agent-version-4", version="4") + + monkeypatch.setattr(prompt_deploy, "_create_agent_version", fake_create) + + prompt_deploy.stage_prompt_agent_candidate( + config_path=config, + environment="dev", + output_path=tmp_path / ".agentops/deployments/foundry-agent.json", + eval_config_path=tmp_path / ".agentops/deployments/agentops.candidate.yaml", + ) + + metadata = captured["metadata"] + assert metadata["agentops:candidate"] == "true" + assert metadata["agentops:pr"] == "214" + assert "agentops:created_at" in metadata + # Existing deployment metadata is preserved alongside candidate tags. + assert metadata["agentops.env"] == "dev" + + +def test_stage_prompt_agent_candidate_does_not_tag_deployed_of_record( + tmp_path: Path, + monkeypatch, +) -> None: + """Push to main / workflow_dispatch / non-PR Azure DevOps runs must NOT + carry the ``agentops:candidate`` tag — absence of the tag is what marks + a version as deployed of record. + """ + + _clear_ci_env(monkeypatch) + monkeypatch.setenv("GITHUB_EVENT_NAME", "push") + monkeypatch.setenv("GITHUB_REF", "refs/heads/main") + + config = tmp_path / "agentops.yaml" + dataset = tmp_path / "data.jsonl" + prompt = tmp_path / "prompt.md" + dataset.write_text('{"input":"hi","expected":"hello"}\n', encoding="utf-8") + prompt.write_text("new instructions\n", encoding="utf-8") + config.write_text( + "\n".join( + [ + "version: 1", + "agent: travel-agent:3", + "dataset: data.jsonl", + "prompt_file: prompt.md", + "project_endpoint: https://example.services.ai.azure.com/api/projects/p", + ] + ), + encoding="utf-8", + ) + + current = SimpleNamespace( + id="agent-version-3", + version="3", + definition={"kind": "prompt", "model": "gpt-4o-mini", "instructions": "old"}, + metadata={}, + ) + captured: dict = {} + + monkeypatch.setattr( + prompt_deploy, + "_get_agent_version", + lambda endpoint, name, version: current, + ) + + def fake_create(endpoint, name, definition, *, metadata, description): + captured["metadata"] = metadata + return SimpleNamespace(id="agent-version-4", version="4") + + monkeypatch.setattr(prompt_deploy, "_create_agent_version", fake_create) + + prompt_deploy.stage_prompt_agent_candidate( + config_path=config, + environment="dev", + output_path=tmp_path / ".agentops/deployments/foundry-agent.json", + eval_config_path=tmp_path / ".agentops/deployments/agentops.candidate.yaml", + ) + + metadata = captured["metadata"] + assert "agentops:candidate" not in metadata + assert "agentops:pr" not in metadata + assert "agentops:created_at" not in metadata + # Deployment-identity metadata is still present. + assert metadata["agentops.env"] == "dev" +