Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,22 @@ This format follows [Keep a Changelog](https://keepachangelog.com/) and adheres

## [Unreleased]

### Changed
- **PR-stage Foundry prompt-agent versions are now tagged at the source.** When
`agentops.pipeline.prompt_deploy stage` runs in a PR context (GitHub Actions
`pull_request` event or Azure DevOps `BUILD_REASON=PullRequest`), the version
it creates in the dev Foundry project carries metadata
`agentops:candidate=true`, `agentops:pr=<number>`, and
`agentops:created_at=<ISO timestamp>`. Portal viewers can filter the
Versions tab on `agentops:candidate` to separate abandoned PR candidates
from deployed-of-record versions, and downstream consumers that resolve
"latest" can refuse to pick up candidates. Deployed-of-record versions
(push to `main`/`develop`/`release/**` or `workflow_dispatch`) are not
tagged, so absence of `agentops:candidate` is the deployed-of-record
signal. The PR/deploy workflow templates and the prompt-agent quickstart
tutorial are updated to describe the new contract.
([#214](https://github.com/Azure/agentops/issues/214))

## [0.4.0] - 2026-06-14

### Added
Expand Down
17 changes: 12 additions & 5 deletions docs/tutorial-prompt-agent-quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -1370,11 +1370,18 @@ The PR workflow now has two jobs:

> **Why does the PR workflow stage in dev, not sandbox?** The PR gate
> must evaluate the same target the deploy workflow will use. Sandbox
> is the author's playground and never receives CI traffic. PR
> candidates accumulate in dev over time and may need periodic
> cleanup according to your team's Foundry retention policy; AgentOps
> uses prompt SHAs and git SHAs as the durable identity, not old
> candidate version numbers.
> is the author's playground and never receives CI traffic.
>
> Candidate versions created by PR runs are tagged in Foundry with
> `agentops:candidate=true` plus `agentops:pr=<number>` and
> `agentops:created_at=<ISO timestamp>`. Portal viewers can filter the
> Versions tab on `agentops:candidate` to separate "abandoned PR
> candidates" from "deployed versions of record". Downstream consumers
> that resolve `<agent>` to "latest" should skip versions carrying
> `agentops:candidate=true`; the supported pinning mechanism remains
> `foundry-agent.json`, which always points at the deployed-of-record
> version. AgentOps uses prompt SHAs and git SHAs as the durable
> identity, not old candidate version numbers.

The dev deploy workflow stages a candidate (same logic), evaluates it,
summarizes the deployment via `prompt_deploy summarize`, and uploads
Expand Down
50 changes: 50 additions & 0 deletions src/agentops/pipeline/prompt_deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,9 +424,59 @@ def _deployment_metadata(*, environment: str, prompt_hash: str) -> Dict[str, str
workflow_url = _workflow_url()
if workflow_url:
metadata["agentops.workflow_url"] = workflow_url[:512]

# When the staging step is invoked from a PR-stage workflow, mark the
# version as a candidate so portal viewers can filter it out and naive
# consumers that resolve "latest" can refuse to pick it up. See issue
# #214 for the full rationale.
pr_number = _detect_pr_stage()
if pr_number is not None:
metadata["agentops:candidate"] = "true"
if pr_number:
metadata["agentops:pr"] = pr_number[:512]
metadata["agentops:created_at"] = datetime.now(timezone.utc).isoformat()

return {key: value for key, value in metadata.items() if value}


def _detect_pr_stage() -> Optional[str]:
"""Return the PR number string when running in a PR-stage context.

Returns:
- A PR number (e.g. ``"42"``) when both the PR context and number are
identifiable.
- An empty string when the PR context is detected but the number cannot
be parsed (the version is still flagged as a candidate).
- ``None`` when no PR context is detected (deployed-of-record path).

Detection covers the two CI platforms AgentOps generates workflows for:
GitHub Actions (``GITHUB_EVENT_NAME == 'pull_request'``) and Azure
DevOps (``BUILD_REASON == 'PullRequest'``).
"""

if os.environ.get("GITHUB_EVENT_NAME") == "pull_request":
ref = os.environ.get("GITHUB_REF", "")
# ``refs/pull/<N>/merge`` or ``refs/pull/<N>/head``.
if ref.startswith("refs/pull/"):
parts = ref.split("/")
if len(parts) >= 3 and parts[2].isdigit():
return parts[2]
ref_name = os.environ.get("GITHUB_REF_NAME", "")
# GITHUB_REF_NAME for PRs is shaped like ``<N>/merge``.
head = ref_name.split("/", 1)[0] if ref_name else ""
if head.isdigit():
return head
return ""

if os.environ.get("BUILD_REASON") == "PullRequest":
return (
os.environ.get("SYSTEM_PULLREQUEST_PULLREQUESTNUMBER")
or ""
)

return None


def _git_sha() -> str:
return (
os.environ.get("GITHUB_SHA")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
# Notes:
# - Each PR run creates or reuses a candidate version in the dev
# Foundry project. AgentOps deduplicates only when the prompt is
# byte-identical to the current seed version's instructions; PR
# candidates can therefore accumulate over time and may need to be
# cleaned up out-of-band.
# byte-identical to the current seed version's instructions.
# Candidate versions are tagged with `agentops:candidate=true` and
# `agentops:pr=<number>` so portal viewers can filter them out and
# consumers that resolve "latest" can refuse to pick them up.
# - Merge is what promotes the candidate via the deploy pipeline.
# This PR pipeline does not record the candidate as deployed.
#
Expand Down
7 changes: 4 additions & 3 deletions src/agentops/templates/workflows/agentops-pr-prompt-agent.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
# Notes:
# - Each PR run creates or reuses a candidate version in the dev
# Foundry project. AgentOps deduplicates only when the prompt is
# byte-identical to the current seed version's instructions; PR
# candidates can therefore accumulate over time and may need to be
# cleaned up out-of-band.
# byte-identical to the current seed version's instructions.
# Candidate versions are tagged with `agentops:candidate=true` and
# `agentops:pr=<number>` so portal viewers can filter them out and
# consumers that resolve "latest" can refuse to pick them up.
# - Merge is what promotes the candidate via the deploy workflow.
# This PR workflow does not record the candidate as deployed.
#
Expand Down
205 changes: 205 additions & 0 deletions tests/unit/test_prompt_deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,3 +454,208 @@ class _FakeClient:
assert body["definition"]["kind"] == "prompt"
assert body["metadata"] == {"agentops.env": "dev"}
assert body["description"] == "desc"


# ---------------------------------------------------------------------------
# Candidate tagging (issue #214)
#
# When the staging step runs from a PR-stage workflow, the version it creates
# in Foundry must carry `agentops:candidate=true` so portal viewers can filter
# it out and downstream consumers can refuse to resolve "latest" to it.
# Deployed-of-record runs (push / workflow_dispatch / Azure DevOps non-PR)
# must NOT carry that tag.
# ---------------------------------------------------------------------------


def _clear_ci_env(monkeypatch) -> None:
"""Drop CI env vars that ``_detect_pr_stage`` reads, so tests start clean."""

for name in (
"GITHUB_EVENT_NAME",
"GITHUB_REF",
"GITHUB_REF_NAME",
"BUILD_REASON",
"SYSTEM_PULLREQUEST_PULLREQUESTNUMBER",
):
monkeypatch.delenv(name, raising=False)


def test_detect_pr_stage_returns_pr_number_in_github_pull_request(monkeypatch) -> None:
_clear_ci_env(monkeypatch)
monkeypatch.setenv("GITHUB_EVENT_NAME", "pull_request")
monkeypatch.setenv("GITHUB_REF", "refs/pull/214/merge")

assert prompt_deploy._detect_pr_stage() == "214"


def test_detect_pr_stage_falls_back_to_ref_name(monkeypatch) -> None:
_clear_ci_env(monkeypatch)
monkeypatch.setenv("GITHUB_EVENT_NAME", "pull_request")
monkeypatch.setenv("GITHUB_REF", "")
monkeypatch.setenv("GITHUB_REF_NAME", "42/merge")

assert prompt_deploy._detect_pr_stage() == "42"


def test_detect_pr_stage_returns_empty_string_when_pr_number_unknown(monkeypatch) -> None:
_clear_ci_env(monkeypatch)
monkeypatch.setenv("GITHUB_EVENT_NAME", "pull_request")
monkeypatch.setenv("GITHUB_REF", "refs/heads/feature")

# PR context but no parseable number: still flag as candidate.
assert prompt_deploy._detect_pr_stage() == ""


def test_detect_pr_stage_returns_none_for_push_or_dispatch(monkeypatch) -> None:
_clear_ci_env(monkeypatch)
monkeypatch.setenv("GITHUB_EVENT_NAME", "push")
monkeypatch.setenv("GITHUB_REF", "refs/heads/main")

assert prompt_deploy._detect_pr_stage() is None


def test_detect_pr_stage_returns_pr_number_in_azure_devops(monkeypatch) -> None:
_clear_ci_env(monkeypatch)
monkeypatch.setenv("BUILD_REASON", "PullRequest")
monkeypatch.setenv("SYSTEM_PULLREQUEST_PULLREQUESTNUMBER", "77")

assert prompt_deploy._detect_pr_stage() == "77"


def test_detect_pr_stage_returns_none_when_no_ci_env(monkeypatch) -> None:
_clear_ci_env(monkeypatch)

assert prompt_deploy._detect_pr_stage() is None


def test_stage_prompt_agent_candidate_tags_pr_versions(
tmp_path: Path,
monkeypatch,
) -> None:
"""In a GitHub PR context, the version created in Foundry must carry
``agentops:candidate=true`` and ``agentops:pr=<number>`` so portal
viewers and downstream resolvers can recognize it as ephemeral.
"""

_clear_ci_env(monkeypatch)
monkeypatch.setenv("GITHUB_EVENT_NAME", "pull_request")
monkeypatch.setenv("GITHUB_REF", "refs/pull/214/merge")

config = tmp_path / "agentops.yaml"
dataset = tmp_path / "data.jsonl"
prompt = tmp_path / "prompt.md"
dataset.write_text('{"input":"hi","expected":"hello"}\n', encoding="utf-8")
prompt.write_text("new instructions\n", encoding="utf-8")
config.write_text(
"\n".join(
[
"version: 1",
"agent: travel-agent:3",
"dataset: data.jsonl",
"prompt_file: prompt.md",
"project_endpoint: https://example.services.ai.azure.com/api/projects/p",
]
),
encoding="utf-8",
)

current = SimpleNamespace(
id="agent-version-3",
version="3",
definition={"kind": "prompt", "model": "gpt-4o-mini", "instructions": "old"},
metadata={},
)
captured: dict = {}

monkeypatch.setattr(
prompt_deploy,
"_get_agent_version",
lambda endpoint, name, version: current,
)

def fake_create(endpoint, name, definition, *, metadata, description):
captured["metadata"] = metadata
return SimpleNamespace(id="agent-version-4", version="4")

monkeypatch.setattr(prompt_deploy, "_create_agent_version", fake_create)

prompt_deploy.stage_prompt_agent_candidate(
config_path=config,
environment="dev",
output_path=tmp_path / ".agentops/deployments/foundry-agent.json",
eval_config_path=tmp_path / ".agentops/deployments/agentops.candidate.yaml",
)

metadata = captured["metadata"]
assert metadata["agentops:candidate"] == "true"
assert metadata["agentops:pr"] == "214"
assert "agentops:created_at" in metadata
# Existing deployment metadata is preserved alongside candidate tags.
assert metadata["agentops.env"] == "dev"


def test_stage_prompt_agent_candidate_does_not_tag_deployed_of_record(
tmp_path: Path,
monkeypatch,
) -> None:
"""Push to main / workflow_dispatch / non-PR Azure DevOps runs must NOT
carry the ``agentops:candidate`` tag — absence of the tag is what marks
a version as deployed of record.
"""

_clear_ci_env(monkeypatch)
monkeypatch.setenv("GITHUB_EVENT_NAME", "push")
monkeypatch.setenv("GITHUB_REF", "refs/heads/main")

config = tmp_path / "agentops.yaml"
dataset = tmp_path / "data.jsonl"
prompt = tmp_path / "prompt.md"
dataset.write_text('{"input":"hi","expected":"hello"}\n', encoding="utf-8")
prompt.write_text("new instructions\n", encoding="utf-8")
config.write_text(
"\n".join(
[
"version: 1",
"agent: travel-agent:3",
"dataset: data.jsonl",
"prompt_file: prompt.md",
"project_endpoint: https://example.services.ai.azure.com/api/projects/p",
]
),
encoding="utf-8",
)

current = SimpleNamespace(
id="agent-version-3",
version="3",
definition={"kind": "prompt", "model": "gpt-4o-mini", "instructions": "old"},
metadata={},
)
captured: dict = {}

monkeypatch.setattr(
prompt_deploy,
"_get_agent_version",
lambda endpoint, name, version: current,
)

def fake_create(endpoint, name, definition, *, metadata, description):
captured["metadata"] = metadata
return SimpleNamespace(id="agent-version-4", version="4")

monkeypatch.setattr(prompt_deploy, "_create_agent_version", fake_create)

prompt_deploy.stage_prompt_agent_candidate(
config_path=config,
environment="dev",
output_path=tmp_path / ".agentops/deployments/foundry-agent.json",
eval_config_path=tmp_path / ".agentops/deployments/agentops.candidate.yaml",
)

metadata = captured["metadata"]
assert "agentops:candidate" not in metadata
assert "agentops:pr" not in metadata
assert "agentops:created_at" not in metadata
# Deployment-identity metadata is still present.
assert metadata["agentops.env"] == "dev"

Loading