diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index bf525cc7..5fcb249b 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -13,7 +13,7 @@ "name": "agentops-accelerator", "source": "../../plugins/agentops", "description": "Copilot agent skills for running standardized evaluation workflows with AgentOps Toolkit and Microsoft Foundry agents.", - "version": "0.3.23", + "version": "0.4.1", "keywords": [ "agentops", "evaluation", diff --git a/.github/plugin/marketplace.json b/.github/plugin/marketplace.json index bf525cc7..5fcb249b 100644 --- a/.github/plugin/marketplace.json +++ b/.github/plugin/marketplace.json @@ -13,7 +13,7 @@ "name": "agentops-accelerator", "source": "../../plugins/agentops", "description": "Copilot agent skills for running standardized evaluation workflows with AgentOps Toolkit and Microsoft Foundry agents.", - "version": "0.3.23", + "version": "0.4.1", "keywords": [ "agentops", "evaluation", diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a604970..39fc7226 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,24 @@ This format follows [Keep a Changelog](https://keepachangelog.com/) and adheres ## [Unreleased] +## [0.4.1] - 2026-06-15 + +### Changed +- **PR-stage Foundry prompt-agent versions are now tagged at the source.** When + `agentops.pipeline.prompt_deploy stage` runs in a PR context (GitHub Actions + `pull_request` event or Azure DevOps `BUILD_REASON=PullRequest`), the version + it creates in the dev Foundry project carries metadata + `agentops:candidate=true`, `agentops:pr=`, and + `agentops:created_at=`. Portal viewers can filter the + Versions tab on `agentops:candidate` to separate abandoned PR candidates + from deployed-of-record versions, and downstream consumers that resolve + "latest" can refuse to pick up candidates. Deployed-of-record versions + (push to `main`/`develop`/`release/**` or `workflow_dispatch`) are not + tagged, so absence of `agentops:candidate` is the deployed-of-record + signal. The PR/deploy workflow templates and the prompt-agent quickstart + tutorial are updated to describe the new contract. + ([#214](https://github.com/Azure/agentops/issues/214)) + ## [0.4.0] - 2026-06-14 ### Added diff --git a/docs/tutorial-prompt-agent-quickstart.md b/docs/tutorial-prompt-agent-quickstart.md index 96ba6c95..8b503364 100644 --- a/docs/tutorial-prompt-agent-quickstart.md +++ b/docs/tutorial-prompt-agent-quickstart.md @@ -1370,11 +1370,18 @@ The PR workflow now has two jobs: > **Why does the PR workflow stage in dev, not sandbox?** The PR gate > must evaluate the same target the deploy workflow will use. Sandbox -> is the author's playground and never receives CI traffic. PR -> candidates accumulate in dev over time and may need periodic -> cleanup according to your team's Foundry retention policy; AgentOps -> uses prompt SHAs and git SHAs as the durable identity, not old -> candidate version numbers. +> is the author's playground and never receives CI traffic. +> +> Candidate versions created by PR runs are tagged in Foundry with +> `agentops:candidate=true` plus `agentops:pr=` and +> `agentops:created_at=`. Portal viewers can filter the +> Versions tab on `agentops:candidate` to separate "abandoned PR +> candidates" from "deployed versions of record". Downstream consumers +> that resolve `` to "latest" should skip versions carrying +> `agentops:candidate=true`; the supported pinning mechanism remains +> `foundry-agent.json`, which always points at the deployed-of-record +> version. AgentOps uses prompt SHAs and git SHAs as the durable +> identity, not old candidate version numbers. The dev deploy workflow stages a candidate (same logic), evaluates it, summarizes the deployment via `prompt_deploy summarize`, and uploads diff --git a/plugins/agentops/package.json b/plugins/agentops/package.json index 79f710c6..2709db0b 100644 --- a/plugins/agentops/package.json +++ b/plugins/agentops/package.json @@ -2,7 +2,7 @@ "name": "agentops-accelerator", "displayName": "AgentOps Accelerator — Skills for GitHub Copilot", "description": "Copilot agent skills for running standardized evaluation workflows with AgentOps Accelerator and Microsoft Foundry agents.", - "version": "0.3.23", + "version": "0.4.1", "publisher": "AgentOpsAccelerator", "icon": "icon.png", "license": "MIT", diff --git a/plugins/agentops/plugin.json b/plugins/agentops/plugin.json index 73bdffbb..83087442 100644 --- a/plugins/agentops/plugin.json +++ b/plugins/agentops/plugin.json @@ -1,7 +1,7 @@ { "name": "agentops-accelerator", "description": "Copilot agent skills for running standardized evaluation workflows with AgentOps Accelerator and Microsoft Foundry agents.", - "version": "0.3.23", + "version": "0.4.1", "author": { "name": "AgentOps Accelerator", "url": "https://github.com/Azure/agentops" diff --git a/pyproject.toml b/pyproject.toml index d53a67ef..7a74d519 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ foundry = [ "azure-ai-evaluation>=1.0,<2.0", "azure-identity>=1.17,<2.0", "azure-monitor-opentelemetry>=1.6,<2.0", - "pandas>=2.0,<3.0", + "pandas>=2.0,<4.0", ] agent = [ "fastapi>=0.110,<1.0", @@ -30,10 +30,10 @@ agent = [ "httpx>=0.27,<1.0", "cryptography>=42", "markdown>=3.6,<4.0", - "azure-monitor-query>=1.3,<2.0", + "azure-monitor-query>=1.3,<3.0", "azure-monitor-opentelemetry>=1.6,<2.0", "azure-identity>=1.17,<2.0", - "azure-mgmt-cognitiveservices>=13.5,<14.0", + "azure-mgmt-cognitiveservices>=13.5,<15.0", "azure-mgmt-monitor>=6.0,<7.0", "azure-mgmt-authorization>=4.0,<5.0", ] diff --git a/src/agentops/pipeline/prompt_deploy.py b/src/agentops/pipeline/prompt_deploy.py index 331c21c4..b9794b2f 100644 --- a/src/agentops/pipeline/prompt_deploy.py +++ b/src/agentops/pipeline/prompt_deploy.py @@ -424,9 +424,59 @@ def _deployment_metadata(*, environment: str, prompt_hash: str) -> Dict[str, str workflow_url = _workflow_url() if workflow_url: metadata["agentops.workflow_url"] = workflow_url[:512] + + # When the staging step is invoked from a PR-stage workflow, mark the + # version as a candidate so portal viewers can filter it out and naive + # consumers that resolve "latest" can refuse to pick it up. See issue + # #214 for the full rationale. + pr_number = _detect_pr_stage() + if pr_number is not None: + metadata["agentops:candidate"] = "true" + if pr_number: + metadata["agentops:pr"] = pr_number[:512] + metadata["agentops:created_at"] = datetime.now(timezone.utc).isoformat() + return {key: value for key, value in metadata.items() if value} +def _detect_pr_stage() -> Optional[str]: + """Return the PR number string when running in a PR-stage context. + + Returns: + - A PR number (e.g. ``"42"``) when both the PR context and number are + identifiable. + - An empty string when the PR context is detected but the number cannot + be parsed (the version is still flagged as a candidate). + - ``None`` when no PR context is detected (deployed-of-record path). + + Detection covers the two CI platforms AgentOps generates workflows for: + GitHub Actions (``GITHUB_EVENT_NAME == 'pull_request'``) and Azure + DevOps (``BUILD_REASON == 'PullRequest'``). + """ + + if os.environ.get("GITHUB_EVENT_NAME") == "pull_request": + ref = os.environ.get("GITHUB_REF", "") + # ``refs/pull//merge`` or ``refs/pull//head``. + if ref.startswith("refs/pull/"): + parts = ref.split("/") + if len(parts) >= 3 and parts[2].isdigit(): + return parts[2] + ref_name = os.environ.get("GITHUB_REF_NAME", "") + # GITHUB_REF_NAME for PRs is shaped like ``/merge``. + head = ref_name.split("/", 1)[0] if ref_name else "" + if head.isdigit(): + return head + return "" + + if os.environ.get("BUILD_REASON") == "PullRequest": + return ( + os.environ.get("SYSTEM_PULLREQUEST_PULLREQUESTNUMBER") + or "" + ) + + return None + + def _git_sha() -> str: return ( os.environ.get("GITHUB_SHA") diff --git a/src/agentops/templates/pipelines/azuredevops/agentops-pr-prompt-agent.yml b/src/agentops/templates/pipelines/azuredevops/agentops-pr-prompt-agent.yml index 02dfdf06..aeb3a986 100644 --- a/src/agentops/templates/pipelines/azuredevops/agentops-pr-prompt-agent.yml +++ b/src/agentops/templates/pipelines/azuredevops/agentops-pr-prompt-agent.yml @@ -12,9 +12,10 @@ # Notes: # - Each PR run creates or reuses a candidate version in the dev # Foundry project. AgentOps deduplicates only when the prompt is -# byte-identical to the current seed version's instructions; PR -# candidates can therefore accumulate over time and may need to be -# cleaned up out-of-band. +# byte-identical to the current seed version's instructions. +# Candidate versions are tagged with `agentops:candidate=true` and +# `agentops:pr=` so portal viewers can filter them out and +# consumers that resolve "latest" can refuse to pick them up. # - Merge is what promotes the candidate via the deploy pipeline. # This PR pipeline does not record the candidate as deployed. # diff --git a/src/agentops/templates/workflows/agentops-pr-prompt-agent.yml b/src/agentops/templates/workflows/agentops-pr-prompt-agent.yml index 81e069ab..aeed0773 100644 --- a/src/agentops/templates/workflows/agentops-pr-prompt-agent.yml +++ b/src/agentops/templates/workflows/agentops-pr-prompt-agent.yml @@ -12,9 +12,10 @@ # Notes: # - Each PR run creates or reuses a candidate version in the dev # Foundry project. AgentOps deduplicates only when the prompt is -# byte-identical to the current seed version's instructions; PR -# candidates can therefore accumulate over time and may need to be -# cleaned up out-of-band. +# byte-identical to the current seed version's instructions. +# Candidate versions are tagged with `agentops:candidate=true` and +# `agentops:pr=` so portal viewers can filter them out and +# consumers that resolve "latest" can refuse to pick them up. # - Merge is what promotes the candidate via the deploy workflow. # This PR workflow does not record the candidate as deployed. # diff --git a/tests/unit/test_prompt_deploy.py b/tests/unit/test_prompt_deploy.py index 92f02561..701514c9 100644 --- a/tests/unit/test_prompt_deploy.py +++ b/tests/unit/test_prompt_deploy.py @@ -454,3 +454,208 @@ class _FakeClient: assert body["definition"]["kind"] == "prompt" assert body["metadata"] == {"agentops.env": "dev"} assert body["description"] == "desc" + + +# --------------------------------------------------------------------------- +# Candidate tagging (issue #214) +# +# When the staging step runs from a PR-stage workflow, the version it creates +# in Foundry must carry `agentops:candidate=true` so portal viewers can filter +# it out and downstream consumers can refuse to resolve "latest" to it. +# Deployed-of-record runs (push / workflow_dispatch / Azure DevOps non-PR) +# must NOT carry that tag. +# --------------------------------------------------------------------------- + + +def _clear_ci_env(monkeypatch) -> None: + """Drop CI env vars that ``_detect_pr_stage`` reads, so tests start clean.""" + + for name in ( + "GITHUB_EVENT_NAME", + "GITHUB_REF", + "GITHUB_REF_NAME", + "BUILD_REASON", + "SYSTEM_PULLREQUEST_PULLREQUESTNUMBER", + ): + monkeypatch.delenv(name, raising=False) + + +def test_detect_pr_stage_returns_pr_number_in_github_pull_request(monkeypatch) -> None: + _clear_ci_env(monkeypatch) + monkeypatch.setenv("GITHUB_EVENT_NAME", "pull_request") + monkeypatch.setenv("GITHUB_REF", "refs/pull/214/merge") + + assert prompt_deploy._detect_pr_stage() == "214" + + +def test_detect_pr_stage_falls_back_to_ref_name(monkeypatch) -> None: + _clear_ci_env(monkeypatch) + monkeypatch.setenv("GITHUB_EVENT_NAME", "pull_request") + monkeypatch.setenv("GITHUB_REF", "") + monkeypatch.setenv("GITHUB_REF_NAME", "42/merge") + + assert prompt_deploy._detect_pr_stage() == "42" + + +def test_detect_pr_stage_returns_empty_string_when_pr_number_unknown(monkeypatch) -> None: + _clear_ci_env(monkeypatch) + monkeypatch.setenv("GITHUB_EVENT_NAME", "pull_request") + monkeypatch.setenv("GITHUB_REF", "refs/heads/feature") + + # PR context but no parseable number: still flag as candidate. + assert prompt_deploy._detect_pr_stage() == "" + + +def test_detect_pr_stage_returns_none_for_push_or_dispatch(monkeypatch) -> None: + _clear_ci_env(monkeypatch) + monkeypatch.setenv("GITHUB_EVENT_NAME", "push") + monkeypatch.setenv("GITHUB_REF", "refs/heads/main") + + assert prompt_deploy._detect_pr_stage() is None + + +def test_detect_pr_stage_returns_pr_number_in_azure_devops(monkeypatch) -> None: + _clear_ci_env(monkeypatch) + monkeypatch.setenv("BUILD_REASON", "PullRequest") + monkeypatch.setenv("SYSTEM_PULLREQUEST_PULLREQUESTNUMBER", "77") + + assert prompt_deploy._detect_pr_stage() == "77" + + +def test_detect_pr_stage_returns_none_when_no_ci_env(monkeypatch) -> None: + _clear_ci_env(monkeypatch) + + assert prompt_deploy._detect_pr_stage() is None + + +def test_stage_prompt_agent_candidate_tags_pr_versions( + tmp_path: Path, + monkeypatch, +) -> None: + """In a GitHub PR context, the version created in Foundry must carry + ``agentops:candidate=true`` and ``agentops:pr=`` so portal + viewers and downstream resolvers can recognize it as ephemeral. + """ + + _clear_ci_env(monkeypatch) + monkeypatch.setenv("GITHUB_EVENT_NAME", "pull_request") + monkeypatch.setenv("GITHUB_REF", "refs/pull/214/merge") + + config = tmp_path / "agentops.yaml" + dataset = tmp_path / "data.jsonl" + prompt = tmp_path / "prompt.md" + dataset.write_text('{"input":"hi","expected":"hello"}\n', encoding="utf-8") + prompt.write_text("new instructions\n", encoding="utf-8") + config.write_text( + "\n".join( + [ + "version: 1", + "agent: travel-agent:3", + "dataset: data.jsonl", + "prompt_file: prompt.md", + "project_endpoint: https://example.services.ai.azure.com/api/projects/p", + ] + ), + encoding="utf-8", + ) + + current = SimpleNamespace( + id="agent-version-3", + version="3", + definition={"kind": "prompt", "model": "gpt-4o-mini", "instructions": "old"}, + metadata={}, + ) + captured: dict = {} + + monkeypatch.setattr( + prompt_deploy, + "_get_agent_version", + lambda endpoint, name, version: current, + ) + + def fake_create(endpoint, name, definition, *, metadata, description): + captured["metadata"] = metadata + return SimpleNamespace(id="agent-version-4", version="4") + + monkeypatch.setattr(prompt_deploy, "_create_agent_version", fake_create) + + prompt_deploy.stage_prompt_agent_candidate( + config_path=config, + environment="dev", + output_path=tmp_path / ".agentops/deployments/foundry-agent.json", + eval_config_path=tmp_path / ".agentops/deployments/agentops.candidate.yaml", + ) + + metadata = captured["metadata"] + assert metadata["agentops:candidate"] == "true" + assert metadata["agentops:pr"] == "214" + assert "agentops:created_at" in metadata + # Existing deployment metadata is preserved alongside candidate tags. + assert metadata["agentops.env"] == "dev" + + +def test_stage_prompt_agent_candidate_does_not_tag_deployed_of_record( + tmp_path: Path, + monkeypatch, +) -> None: + """Push to main / workflow_dispatch / non-PR Azure DevOps runs must NOT + carry the ``agentops:candidate`` tag — absence of the tag is what marks + a version as deployed of record. + """ + + _clear_ci_env(monkeypatch) + monkeypatch.setenv("GITHUB_EVENT_NAME", "push") + monkeypatch.setenv("GITHUB_REF", "refs/heads/main") + + config = tmp_path / "agentops.yaml" + dataset = tmp_path / "data.jsonl" + prompt = tmp_path / "prompt.md" + dataset.write_text('{"input":"hi","expected":"hello"}\n', encoding="utf-8") + prompt.write_text("new instructions\n", encoding="utf-8") + config.write_text( + "\n".join( + [ + "version: 1", + "agent: travel-agent:3", + "dataset: data.jsonl", + "prompt_file: prompt.md", + "project_endpoint: https://example.services.ai.azure.com/api/projects/p", + ] + ), + encoding="utf-8", + ) + + current = SimpleNamespace( + id="agent-version-3", + version="3", + definition={"kind": "prompt", "model": "gpt-4o-mini", "instructions": "old"}, + metadata={}, + ) + captured: dict = {} + + monkeypatch.setattr( + prompt_deploy, + "_get_agent_version", + lambda endpoint, name, version: current, + ) + + def fake_create(endpoint, name, definition, *, metadata, description): + captured["metadata"] = metadata + return SimpleNamespace(id="agent-version-4", version="4") + + monkeypatch.setattr(prompt_deploy, "_create_agent_version", fake_create) + + prompt_deploy.stage_prompt_agent_candidate( + config_path=config, + environment="dev", + output_path=tmp_path / ".agentops/deployments/foundry-agent.json", + eval_config_path=tmp_path / ".agentops/deployments/agentops.candidate.yaml", + ) + + metadata = captured["metadata"] + assert "agentops:candidate" not in metadata + assert "agentops:pr" not in metadata + assert "agentops:created_at" not in metadata + # Deployment-identity metadata is still present. + assert metadata["agentops.env"] == "dev" +