From ae37d3410eba57267f5c79a35bd830cd46039787 Mon Sep 17 00:00:00 2001 From: placerda Date: Sun, 14 Jun 2026 12:44:42 -0300 Subject: [PATCH] feat(workflow): auto-detect committed baseline in agentops-pr templates The agentops-pr GitHub Actions and Azure DevOps templates emitted by 'agentops workflow generate' now wrap 'agentops eval run' with a small bash guard that auto-detects a committed baseline file at .agentops/baseline/results.json. When the file is present (e.g. dropped into the repo per docs/tutorial-baseline-comparison.md Section 4), the eval step compares against it via --baseline. When absent, BASELINE_ARG stays empty and the bash invocation is byte-equivalent to the prior behaviour, so existing PR pipelines do not change. Deploy templates (dev/qa/prod) are intentionally untouched. Implementation: _eval_substitutions, _github_eval_substitutions, and _ado_eval_substitutions now accept a kind kwarg and only inject the baseline-detection bash when kind == 'pr'. Two helper functions (_github_baseline_autodetect_block, _ado_baseline_autodetect_block) keep the indentation correct for both the GitHub Actions run block and the ADO AzureCLI@2 inlineScript. Tests added in tests/unit/test_cicd.py guard the PR / deploy split for both GitHub Actions and Azure DevOps. Validation: 946 unit tests pass. Closes #155 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- CHANGELOG.md | 12 ++++++ src/agentops/services/cicd.py | 67 +++++++++++++++++++++++++++--- tests/unit/test_cicd.py | 77 +++++++++++++++++++++++++++++++++++ 3 files changed, 151 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1256f90e..4702848c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,18 @@ This format follows [Keep a Changelog](https://keepachangelog.com/) and adheres ## [Unreleased] +### Changed +- **`agentops-pr` workflow templates now auto-detect a committed baseline.** + Both the GitHub Actions (`.github/workflows/agentops-pr.yml`) and Azure + DevOps (`.azuredevops/pipelines/agentops-pr.yml`) PR templates emitted by + `agentops workflow generate` wrap `agentops eval run` with a small bash + guard. When `.agentops/baseline/results.json` exists in the consumer + repo, the step automatically passes `--baseline .agentops/baseline/results.json`; + without the file the behaviour is unchanged. Aligns shipped templates + with the `tutorial-baseline-comparison.md` "drop a file in your repo" + promise. Deploy templates (`dev`/`qa`/`prod`) are untouched. + ([#155](https://github.com/Azure/agentops/issues/155)) + ### Fixed - **`agentops skills install --platform` help text now lists `cursor`.** The CLI option help was advertising only `copilot` and `claude` even though the diff --git a/src/agentops/services/cicd.py b/src/agentops/services/cicd.py index e03d2451..ee1e8d61 100644 --- a/src/agentops/services/cicd.py +++ b/src/agentops/services/cicd.py @@ -301,19 +301,74 @@ def _branch_block_ado(*branches: str) -> str: } +# Path of the optional committed baseline file consumed by the PR template's +# ``agentops eval run`` step. When present in the consumer repo, the PR +# eval step passes ``--baseline `` so threshold comparisons run +# against the committed baseline instead of any prior artifact. +_PR_BASELINE_PATH: str = ".agentops/baseline/results.json" + + +def _baseline_arg_suffix(kind: str) -> str: + """Suffix appended to ``agentops eval run --config ...`` for PR templates. + + Returns ``" $BASELINE_ARG"`` for ``kind == 'pr'`` so the bash variable + set by the auto-detect block is honored, or an empty string otherwise. + """ + return " $BASELINE_ARG" if kind == "pr" else "" + + +def _github_baseline_autodetect_block(kind: str) -> str: + """Bash that auto-detects ``.agentops/baseline/results.json`` for PR jobs. + + Indented to match the ``run: |`` block in the GitHub Actions step + (``" "`` for the leading spaces). For non-PR kinds returns the + empty string so the deploy templates' bash is unchanged. + """ + if kind != "pr": + return "" + return ( + f" BASELINE_ARG=\"\"\n" + f" if [ -f {_PR_BASELINE_PATH} ]; then\n" + f" BASELINE_ARG=\"--baseline {_PR_BASELINE_PATH}\"\n" + f" fi\n" + ) + + +def _ado_baseline_autodetect_block(kind: str) -> str: + """Bash that auto-detects the committed baseline for ADO PR pipelines. + + Indented to sit inside ``inlineScript: |`` (``" "`` leading + spaces). The whole task block is later re-indented by ``_indent_block`` + using ``base_indent`` so the resulting YAML stays valid. + """ + if kind != "pr": + return "" + return ( + f" BASELINE_ARG=\"\"\n" + f" if [ -f {_PR_BASELINE_PATH} ]; then\n" + f" BASELINE_ARG=\"--baseline {_PR_BASELINE_PATH}\"\n" + f" fi\n" + ) + + def _eval_substitutions( platform: str, eval_runner: str, config_path: str, *, + kind: str, ado_indent: int = 10, ) -> Mapping[str, str]: if platform == "azure-devops": - return _ado_eval_substitutions(eval_runner, config_path, base_indent=ado_indent) - return _github_eval_substitutions(eval_runner, config_path) + return _ado_eval_substitutions( + eval_runner, config_path, kind=kind, base_indent=ado_indent + ) + return _github_eval_substitutions(eval_runner, config_path, kind=kind) -def _github_eval_substitutions(eval_runner: str, config_path: str) -> Mapping[str, str]: +def _github_eval_substitutions( + eval_runner: str, config_path: str, *, kind: str +) -> Mapping[str, str]: if eval_runner == AZD_EVAL_RUNNER: extension_version = azd_ai_agents_extension_version() return { @@ -478,7 +533,7 @@ def _github_eval_substitutions(eval_runner: str, config_path: str) -> Mapping[st APPLICATIONINSIGHTS_CONNECTION_STRING: ${{{{ secrets.APPLICATIONINSIGHTS_CONNECTION_STRING || vars.APPLICATIONINSIGHTS_CONNECTION_STRING }}}} run: | set +e - agentops eval run --config \"{config_path}\" +{_github_baseline_autodetect_block(kind)} agentops eval run --config \"{config_path}\"{_baseline_arg_suffix(kind)} ec=$? echo \"exit_code=$ec\" >> \"$GITHUB_OUTPUT\" if [ $ec -eq 0 ]; then @@ -500,6 +555,7 @@ def _ado_eval_substitutions( eval_runner: str, config_path: str, *, + kind: str, base_indent: int, ) -> Mapping[str, str]: if eval_runner == AZD_EVAL_RUNNER: @@ -647,7 +703,7 @@ def _ado_eval_substitutions( scriptLocation: inlineScript inlineScript: | set +e - agentops eval run --config \"{config_path}\" +{_ado_baseline_autodetect_block(kind)} agentops eval run --config \"{config_path}\"{_baseline_arg_suffix(kind)} code=$? echo \"##vso[task.setvariable variable=AGENTOPS_EVAL_EXIT_CODE]$code\" exit $code @@ -789,6 +845,7 @@ def generate_cicd_workflows( platform, effective_eval_runner, eval_config, + kind=kind, ado_indent=ado_indent, ) ) diff --git a/tests/unit/test_cicd.py b/tests/unit/test_cicd.py index afd8d178..dce5e8b0 100644 --- a/tests/unit/test_cicd.py +++ b/tests/unit/test_cicd.py @@ -286,6 +286,83 @@ def test_generate_workflows_rejects_unknown_doctor_gate(tmp_path: Path) -> None: generate_cicd_workflows(directory=tmp_path, kinds=["pr"], doctor_gate="info") +def test_pr_template_autodetects_committed_baseline(tmp_path: Path) -> None: + """The PR gate must auto-detect a committed baseline file. + + If `.agentops/baseline/results.json` is present in the consumer repo, + the GitHub Actions PR template wraps `agentops eval run` so it passes + `--baseline .agentops/baseline/results.json`. Without the file the + behaviour is unchanged because `BASELINE_ARG` stays empty. + + Regression guard for #155. + """ + generate_cicd_workflows(directory=tmp_path, kinds=["pr"]) + content = (tmp_path / _PR_PATH).read_text(encoding="utf-8") + + assert 'BASELINE_ARG=""' in content + assert "if [ -f .agentops/baseline/results.json ]; then" in content + assert ( + 'BASELINE_ARG="--baseline .agentops/baseline/results.json"' + in content + ) + # The eval invocation honours the variable. + assert "agentops eval run" in content + assert "$BASELINE_ARG" in content + + +def test_deploy_templates_do_not_inject_baseline_autodetect( + tmp_path: Path, +) -> None: + """Baseline auto-detect is PR-only; deploy templates stay unchanged.""" + generate_cicd_workflows(directory=tmp_path, kinds=["dev", "qa", "prod"]) + for rel in (_DEV_PATH, _QA_PATH, _PROD_PATH): + content = (tmp_path / rel).read_text(encoding="utf-8") + assert "BASELINE_ARG" not in content, rel + assert "$BASELINE_ARG" not in content, rel + + +def test_azure_devops_pr_template_autodetects_committed_baseline( + tmp_path: Path, +) -> None: + """ADO PR pipeline mirrors the GitHub Actions baseline auto-detect. + + Regression guard for #155 across both supported CI platforms. + """ + generate_cicd_workflows( + directory=tmp_path, kinds=["pr"], platform="azure-devops" + ) + content = ( + tmp_path / ".azuredevops/pipelines/agentops-pr.yml" + ).read_text(encoding="utf-8") + + assert 'BASELINE_ARG=""' in content + assert "if [ -f .agentops/baseline/results.json ]; then" in content + assert ( + 'BASELINE_ARG="--baseline .agentops/baseline/results.json"' + in content + ) + assert "agentops eval run" in content + assert "$BASELINE_ARG" in content + + +def test_azure_devops_deploy_templates_do_not_inject_baseline_autodetect( + tmp_path: Path, +) -> None: + """ADO deploy pipelines stay unchanged; baseline detect is PR-only.""" + generate_cicd_workflows( + directory=tmp_path, + kinds=["dev", "qa", "prod"], + platform="azure-devops", + ) + for rel in ( + ".azuredevops/pipelines/agentops-deploy-dev.yml", + ".azuredevops/pipelines/agentops-deploy-qa.yml", + ".azuredevops/pipelines/agentops-deploy-prod.yml", + ): + content = (tmp_path / rel).read_text(encoding="utf-8") + assert "BASELINE_ARG" not in content, rel + + def test_dev_template_triggers_and_environment(tmp_path: Path) -> None: generate_cicd_workflows(directory=tmp_path, kinds=["dev"]) content = (tmp_path / _DEV_PATH).read_text(encoding="utf-8")