Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,19 @@ This format follows [Keep a Changelog](https://keepachangelog.com/) and adheres

## [Unreleased]

### Added
- **`agentops doctor` now detects missing OpenAI data-plane RBAC on the Foundry
resource.** A new `security.missing_openai_data_plane_rbac` check resolves the
signed-in principal (via the `oid` claim of the access token used by
`DefaultAzureCredential`) and lists role assignments at the Foundry account
scope using `azure-mgmt-authorization`. When none of *Cognitive Services
OpenAI User*, *Cognitive Services OpenAI Contributor* or *Cognitive Services
Contributor* is present (directly or inherited), Doctor surfaces an
actionable WARNING that includes the exact `az role assignment create`
command for *Cognitive Services OpenAI User* scoped to the Foundry account.
The check is read-only and skips silently when the SDK, principal or scope
cannot be resolved. ([#228](https://github.com/Azure/agentops/issues/228))

### Changed
- **`agentops-pr` workflow templates now auto-detect a committed baseline.**
Both the GitHub Actions (`.github/workflows/agentops-pr.yml`) and Azure
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ agent = [
"azure-identity>=1.17,<2.0",
"azure-mgmt-cognitiveservices>=13.5,<14.0",
"azure-mgmt-monitor>=6.0,<7.0",
"azure-mgmt-authorization>=4.0,<5.0",
]

[project.scripts]
Expand Down
4 changes: 4 additions & 0 deletions src/agentops/agent/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
from agentops.agent.checks.opex_workspace import run_opex_workspace_check
from agentops.agent.checks.opex import run_opex_check
from agentops.agent.checks.posture import run_posture_check
from agentops.agent.checks.rbac_openai_data_plane import (
run_rbac_openai_data_plane_check,
)
from agentops.agent.checks.regression import run_regression_check
from agentops.agent.checks.release_readiness import run_release_readiness_check
from agentops.agent.checks.safety import run_safety_check
Expand Down Expand Up @@ -145,6 +148,7 @@ def analyze(
findings.extend(run_errors_check(monitor, foundry, config.checks.errors))
findings.extend(run_safety_check(history, config.checks.safety, monitor, foundry))
findings.extend(run_posture_check(resources, posture_config))
findings.extend(run_rbac_openai_data_plane_check(resources))
findings.extend(run_opex_workspace_check(workspace))
findings.extend(run_governance_check(workspace))
findings.extend(run_observability_check(workspace))
Expand Down
118 changes: 118 additions & 0 deletions src/agentops/agent/checks/_rbac_authorization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
"""Lazy Azure SDK glue for the ``rbac_openai_data_plane`` Doctor check.

Kept in a private module so the parent check can attempt the lazy
import in a single place and stay silent when ``azure-identity`` /
``azure-mgmt-authorization`` are not installed. All errors that should
make the check skip are normalised into :class:`AuthorizationCheckError`.
"""

from __future__ import annotations

import logging
from typing import List

log = logging.getLogger(__name__)


class AuthorizationCheckError(RuntimeError):
"""Raised when the RBAC check cannot run for an environmental reason."""


def resolve_signed_in_principal_object_id() -> str:
"""Return the ``oid`` claim of the shared Azure credential's access token.

Raises :class:`AuthorizationCheckError` when the credential chain cannot
return a token, or when the token does not expose an ``oid`` claim.
"""
try:
from agentops.agent.sources._credentials import (
format_source_error,
get_shared_credential,
)
except ImportError as exc: # pragma: no cover - shipped together
raise AuthorizationCheckError(
f"shared credential factory unavailable: {exc}"
) from exc

try:
credential = get_shared_credential(process_timeout=30)
token = credential.get_token("https://management.azure.com/.default")
except Exception as exc: # noqa: BLE001 - normalised to skip-error
raise AuthorizationCheckError(format_source_error(exc)) from exc

from agentops.agent.checks.rbac_openai_data_plane import decode_oid_from_jwt

oid = decode_oid_from_jwt(getattr(token, "token", "") or "")
if not oid:
raise AuthorizationCheckError(
"access token did not include an 'oid' claim; cannot identify "
"the signed-in principal"
)
return oid


def list_principal_role_definition_ids(
*,
subscription_id: str,
scope: str,
principal_object_id: str,
) -> List[str]:
"""List role definition GUIDs assigned to the principal at/above scope.

Uses ``RoleAssignmentsOperations.list_for_scope`` with the
``atScopeAndAbove() and assignedTo('<oid>')`` filter so management-plane
inheritance (subscription, resource group, account) is honoured.
"""
try:
from azure.mgmt.authorization import AuthorizationManagementClient
except ImportError as exc:
raise AuthorizationCheckError(
"azure-mgmt-authorization not installed; install "
"`agentops-accelerator[agent]` (or add the package directly) to "
"enable the OpenAI data-plane RBAC check"
) from exc

try:
from agentops.agent.sources._credentials import (
format_source_error,
get_shared_credential,
)
except ImportError as exc: # pragma: no cover - shipped together
raise AuthorizationCheckError(
f"shared credential factory unavailable: {exc}"
) from exc

try:
credential = get_shared_credential(process_timeout=30)
client = AuthorizationManagementClient(
credential=credential,
subscription_id=subscription_id,
)
except Exception as exc: # noqa: BLE001
raise AuthorizationCheckError(format_source_error(exc)) from exc

try:
assignments = list(
client.role_assignments.list_for_scope(
scope=scope,
filter=(
f"atScopeAndAbove() and assignedTo('{principal_object_id}')"
),
)
)
except Exception as exc: # noqa: BLE001
raise AuthorizationCheckError(format_source_error(exc)) from exc

role_definition_ids: List[str] = []
for assignment in assignments:
rd_id = (
getattr(assignment, "role_definition_id", None)
or getattr(getattr(assignment, "properties", None), "role_definition_id", None)
)
if not rd_id:
continue
# role_definition_id is a full ARM id ending in `/<guid>`.
guid = rd_id.rstrip("/").rsplit("/", 1)[-1]
if guid:
role_definition_ids.append(guid)
return role_definition_ids
Loading
Loading