diff --git a/CHANGELOG.md b/CHANGELOG.md index 1256f90..4702848 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,18 @@ This format follows [Keep a Changelog](https://keepachangelog.com/) and adheres ## [Unreleased] +### Changed +- **`agentops-pr` workflow templates now auto-detect a committed baseline.** + Both the GitHub Actions (`.github/workflows/agentops-pr.yml`) and Azure + DevOps (`.azuredevops/pipelines/agentops-pr.yml`) PR templates emitted by + `agentops workflow generate` wrap `agentops eval run` with a small bash + guard. When `.agentops/baseline/results.json` exists in the consumer + repo, the step automatically passes `--baseline .agentops/baseline/results.json`; + without the file the behaviour is unchanged. Aligns shipped templates + with the `tutorial-baseline-comparison.md` "drop a file in your repo" + promise. Deploy templates (`dev`/`qa`/`prod`) are untouched. + ([#155](https://github.com/Azure/agentops/issues/155)) + ### Fixed - **`agentops skills install --platform` help text now lists `cursor`.** The CLI option help was advertising only `copilot` and `claude` even though the diff --git a/src/agentops/services/cicd.py b/src/agentops/services/cicd.py index e03d245..ee1e8d6 100644 --- a/src/agentops/services/cicd.py +++ b/src/agentops/services/cicd.py @@ -301,19 +301,74 @@ def _branch_block_ado(*branches: str) -> str: } +# Path of the optional committed baseline file consumed by the PR template's +# ``agentops eval run`` step. When present in the consumer repo, the PR +# eval step passes ``--baseline `` so threshold comparisons run +# against the committed baseline instead of any prior artifact. +_PR_BASELINE_PATH: str = ".agentops/baseline/results.json" + + +def _baseline_arg_suffix(kind: str) -> str: + """Suffix appended to ``agentops eval run --config ...`` for PR templates. + + Returns ``" $BASELINE_ARG"`` for ``kind == 'pr'`` so the bash variable + set by the auto-detect block is honored, or an empty string otherwise. + """ + return " $BASELINE_ARG" if kind == "pr" else "" + + +def _github_baseline_autodetect_block(kind: str) -> str: + """Bash that auto-detects ``.agentops/baseline/results.json`` for PR jobs. + + Indented to match the ``run: |`` block in the GitHub Actions step + (``" "`` for the leading spaces). For non-PR kinds returns the + empty string so the deploy templates' bash is unchanged. + """ + if kind != "pr": + return "" + return ( + f" BASELINE_ARG=\"\"\n" + f" if [ -f {_PR_BASELINE_PATH} ]; then\n" + f" BASELINE_ARG=\"--baseline {_PR_BASELINE_PATH}\"\n" + f" fi\n" + ) + + +def _ado_baseline_autodetect_block(kind: str) -> str: + """Bash that auto-detects the committed baseline for ADO PR pipelines. + + Indented to sit inside ``inlineScript: |`` (``" "`` leading + spaces). The whole task block is later re-indented by ``_indent_block`` + using ``base_indent`` so the resulting YAML stays valid. + """ + if kind != "pr": + return "" + return ( + f" BASELINE_ARG=\"\"\n" + f" if [ -f {_PR_BASELINE_PATH} ]; then\n" + f" BASELINE_ARG=\"--baseline {_PR_BASELINE_PATH}\"\n" + f" fi\n" + ) + + def _eval_substitutions( platform: str, eval_runner: str, config_path: str, *, + kind: str, ado_indent: int = 10, ) -> Mapping[str, str]: if platform == "azure-devops": - return _ado_eval_substitutions(eval_runner, config_path, base_indent=ado_indent) - return _github_eval_substitutions(eval_runner, config_path) + return _ado_eval_substitutions( + eval_runner, config_path, kind=kind, base_indent=ado_indent + ) + return _github_eval_substitutions(eval_runner, config_path, kind=kind) -def _github_eval_substitutions(eval_runner: str, config_path: str) -> Mapping[str, str]: +def _github_eval_substitutions( + eval_runner: str, config_path: str, *, kind: str +) -> Mapping[str, str]: if eval_runner == AZD_EVAL_RUNNER: extension_version = azd_ai_agents_extension_version() return { @@ -478,7 +533,7 @@ def _github_eval_substitutions(eval_runner: str, config_path: str) -> Mapping[st APPLICATIONINSIGHTS_CONNECTION_STRING: ${{{{ secrets.APPLICATIONINSIGHTS_CONNECTION_STRING || vars.APPLICATIONINSIGHTS_CONNECTION_STRING }}}} run: | set +e - agentops eval run --config \"{config_path}\" +{_github_baseline_autodetect_block(kind)} agentops eval run --config \"{config_path}\"{_baseline_arg_suffix(kind)} ec=$? echo \"exit_code=$ec\" >> \"$GITHUB_OUTPUT\" if [ $ec -eq 0 ]; then @@ -500,6 +555,7 @@ def _ado_eval_substitutions( eval_runner: str, config_path: str, *, + kind: str, base_indent: int, ) -> Mapping[str, str]: if eval_runner == AZD_EVAL_RUNNER: @@ -647,7 +703,7 @@ def _ado_eval_substitutions( scriptLocation: inlineScript inlineScript: | set +e - agentops eval run --config \"{config_path}\" +{_ado_baseline_autodetect_block(kind)} agentops eval run --config \"{config_path}\"{_baseline_arg_suffix(kind)} code=$? echo \"##vso[task.setvariable variable=AGENTOPS_EVAL_EXIT_CODE]$code\" exit $code @@ -789,6 +845,7 @@ def generate_cicd_workflows( platform, effective_eval_runner, eval_config, + kind=kind, ado_indent=ado_indent, ) ) diff --git a/tests/unit/test_cicd.py b/tests/unit/test_cicd.py index afd8d17..dce5e8b 100644 --- a/tests/unit/test_cicd.py +++ b/tests/unit/test_cicd.py @@ -286,6 +286,83 @@ def test_generate_workflows_rejects_unknown_doctor_gate(tmp_path: Path) -> None: generate_cicd_workflows(directory=tmp_path, kinds=["pr"], doctor_gate="info") +def test_pr_template_autodetects_committed_baseline(tmp_path: Path) -> None: + """The PR gate must auto-detect a committed baseline file. + + If `.agentops/baseline/results.json` is present in the consumer repo, + the GitHub Actions PR template wraps `agentops eval run` so it passes + `--baseline .agentops/baseline/results.json`. Without the file the + behaviour is unchanged because `BASELINE_ARG` stays empty. + + Regression guard for #155. + """ + generate_cicd_workflows(directory=tmp_path, kinds=["pr"]) + content = (tmp_path / _PR_PATH).read_text(encoding="utf-8") + + assert 'BASELINE_ARG=""' in content + assert "if [ -f .agentops/baseline/results.json ]; then" in content + assert ( + 'BASELINE_ARG="--baseline .agentops/baseline/results.json"' + in content + ) + # The eval invocation honours the variable. + assert "agentops eval run" in content + assert "$BASELINE_ARG" in content + + +def test_deploy_templates_do_not_inject_baseline_autodetect( + tmp_path: Path, +) -> None: + """Baseline auto-detect is PR-only; deploy templates stay unchanged.""" + generate_cicd_workflows(directory=tmp_path, kinds=["dev", "qa", "prod"]) + for rel in (_DEV_PATH, _QA_PATH, _PROD_PATH): + content = (tmp_path / rel).read_text(encoding="utf-8") + assert "BASELINE_ARG" not in content, rel + assert "$BASELINE_ARG" not in content, rel + + +def test_azure_devops_pr_template_autodetects_committed_baseline( + tmp_path: Path, +) -> None: + """ADO PR pipeline mirrors the GitHub Actions baseline auto-detect. + + Regression guard for #155 across both supported CI platforms. + """ + generate_cicd_workflows( + directory=tmp_path, kinds=["pr"], platform="azure-devops" + ) + content = ( + tmp_path / ".azuredevops/pipelines/agentops-pr.yml" + ).read_text(encoding="utf-8") + + assert 'BASELINE_ARG=""' in content + assert "if [ -f .agentops/baseline/results.json ]; then" in content + assert ( + 'BASELINE_ARG="--baseline .agentops/baseline/results.json"' + in content + ) + assert "agentops eval run" in content + assert "$BASELINE_ARG" in content + + +def test_azure_devops_deploy_templates_do_not_inject_baseline_autodetect( + tmp_path: Path, +) -> None: + """ADO deploy pipelines stay unchanged; baseline detect is PR-only.""" + generate_cicd_workflows( + directory=tmp_path, + kinds=["dev", "qa", "prod"], + platform="azure-devops", + ) + for rel in ( + ".azuredevops/pipelines/agentops-deploy-dev.yml", + ".azuredevops/pipelines/agentops-deploy-qa.yml", + ".azuredevops/pipelines/agentops-deploy-prod.yml", + ): + content = (tmp_path / rel).read_text(encoding="utf-8") + assert "BASELINE_ARG" not in content, rel + + def test_dev_template_triggers_and_environment(tmp_path: Path) -> None: generate_cicd_workflows(directory=tmp_path, kinds=["dev"]) content = (tmp_path / _DEV_PATH).read_text(encoding="utf-8")