From 3da9e12f1fdc3d981b58a09dd4503f8e4ee87062 Mon Sep 17 00:00:00 2001 From: Andrey Zvonov Date: Tue, 12 May 2026 15:52:17 +0200 Subject: [PATCH 1/5] add 'entra' as a separate type --- src/Access/TokenProcessorsOpaque.cpp | 7 +++-- src/Access/TokenProcessorsParse.cpp | 46 ++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/src/Access/TokenProcessorsOpaque.cpp b/src/Access/TokenProcessorsOpaque.cpp index d84b749b4dee..4b09167a1c79 100644 --- a/src/Access/TokenProcessorsOpaque.cpp +++ b/src/Access/TokenProcessorsOpaque.cpp @@ -313,11 +313,12 @@ OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, { const picojson::object openid_config = getObjectFromURI(Poco::URI(openid_config_endpoint_)); - if (!openid_config.contains("userinfo_endpoint") || !openid_config.contains("introspection_endpoint")) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "{}: Cannot extract userinfo_endpoint or introspection_endpoint from OIDC configuration, consider manual configuration.", processor_name); + if (!openid_config.contains("userinfo_endpoint")) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "{}: Cannot extract userinfo_endpoint from OIDC configuration, consider manual configuration.", processor_name); userinfo_endpoint = Poco::URI(getValueByKey(openid_config, "userinfo_endpoint").value()); - token_introspection_endpoint = Poco::URI(getValueByKey(openid_config, "introspection_endpoint").value()); + if (openid_config.contains("introspection_endpoint")) + token_introspection_endpoint = Poco::URI(getValueByKey(openid_config, "introspection_endpoint").value()); if (openid_config.contains("jwks_uri")) { diff --git a/src/Access/TokenProcessorsParse.cpp b/src/Access/TokenProcessorsParse.cpp index fa83c5fa6a34..2880b8de336f 100644 --- a/src/Access/TokenProcessorsParse.cpp +++ b/src/Access/TokenProcessorsParse.cpp @@ -43,7 +43,7 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( auto jwks_cache_lifetime = config.getUInt64(prefix + ".jwks_cache_lifetime", 3600); bool externally_configured = config.hasProperty(prefix + ".configuration_endpoint") && !config.hasProperty(prefix + ".jwks_uri"); - bool locally_configured = config.hasProperty(prefix + ".userinfo_endpoint") && config.hasProperty(prefix + ".token_introspection_endpoint"); + bool locally_configured = config.hasProperty(prefix + ".userinfo_endpoint"); if (externally_configured && ! locally_configured) { @@ -58,13 +58,53 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, expected_issuer, expected_audience, allow_no_expiration, config.getString(prefix + ".userinfo_endpoint"), - config.getString(prefix + ".token_introspection_endpoint"), + config.getString(prefix + ".token_introspection_endpoint", ""), verifier_leeway, config.getString(prefix + ".jwks_uri", ""), jwks_cache_lifetime); } - throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Either 'configuration_endpoint' or both 'userinfo_endpoint' and 'token_introspection_endpoint' (and, optionally, 'jwks_uri') must be specified for 'openid' processor"); + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Either 'configuration_endpoint' or 'userinfo_endpoint' (and, optionally, 'jwks_uri' and 'token_introspection_endpoint') must be specified for 'openid' processor"); + } + else if (provider_type == "entra") + { + /// Preset for Microsoft Entra ID built on top of the OpenID Connect processor. + /// Derives the per-tenant OIDC discovery URL from `tenant_id` and lets `OpenIdTokenProcessor` + /// fetch `jwks_uri` (and, when published, `introspection_endpoint`) from it, so future + /// endpoint changes on the Entra side flow through without code changes here. + if (!config.hasProperty(prefix + ".tenant_id")) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "'tenant_id' must be specified for 'entra' processor"); + + const String tenant_id = config.getString(prefix + ".tenant_id"); + + if (tenant_id.empty()) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "'tenant_id' must not be empty for 'entra' processor"); + + for (char c : tenant_id) + { + if (!std::isalnum(static_cast(c)) && c != '-' && c != '.' && c != '_') + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "'tenant_id' {} contains invalid characters", tenant_id); + } + + /// Multi-tenant aliases require templated-issuer validation that the underlying JWKS JWT + /// validator does not implement (it does exact-match on `iss`). Reject explicitly rather + /// than silently failing issuer checks at token-validation time. + const String lower_tenant_id = Poco::toLower(tenant_id); + if (lower_tenant_id == "common" || lower_tenant_id == "organizations" || lower_tenant_id == "consumers") + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "Multi-tenant 'tenant_id' '{}' is not supported for 'entra' processor type: " + "exact issuer validation requires a single tenant identifier (GUID or onmicrosoft.com domain).", + tenant_id); + + const String default_configuration_endpoint = "https://login.microsoftonline.com/" + tenant_id + "/v2.0/.well-known/openid-configuration"; + const String configuration_endpoint = config.getString(prefix + ".configuration_endpoint", default_configuration_endpoint); + + return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, + expected_issuer, expected_audience, allow_no_expiration, + configuration_endpoint, + config.getUInt64(prefix + ".verifier_leeway", 60), + config.getUInt64(prefix + ".jwks_cache_lifetime", 3600)); } else if (provider_type == "jwt_static_key") { From ba36b2665f533fbe01522a8358a6aa656a11967c Mon Sep 17 00:00:00 2001 From: Andrey Zvonov Date: Thu, 14 May 2026 10:34:41 +0200 Subject: [PATCH 2/5] add static role mapping, add entra as a standalone type (wip) --- .../external-authenticators/tokens.md | 17 +++- src/Access/TokenAccessStorage.cpp | 83 ++++++++++++----- src/Access/TokenAccessStorage.h | 4 + .../test_token_roles_mapping/__init__.py | 0 .../configs/users.xml | 11 +++ .../configs/validators.xml | 29 ++++++ .../test_token_roles_mapping/test.py | 89 +++++++++++++++++++ 7 files changed, 209 insertions(+), 24 deletions(-) create mode 100644 tests/integration/test_token_roles_mapping/__init__.py create mode 100644 tests/integration/test_token_roles_mapping/configs/users.xml create mode 100644 tests/integration/test_token_roles_mapping/configs/validators.xml create mode 100644 tests/integration/test_token_roles_mapping/test.py diff --git a/docs/en/operations/external-authenticators/tokens.md b/docs/en/operations/external-authenticators/tokens.md index 74c02a56900b..10f88f062884 100644 --- a/docs/en/operations/external-authenticators/tokens.md +++ b/docs/en/operations/external-authenticators/tokens.md @@ -256,6 +256,16 @@ All this implies that the SQL-driven [Access Control and Account Management](/do my_profile + + + 8a1b2c3d-4e5f-6789-abcd-ef0123456789 + ch_admin + + + 9f8e7d6c-5b4a-3210-fedc-ba0987654321 + ch_analyst + + \bclickhouse-[a-zA-Z0-9]+\b @@ -274,5 +284,8 @@ For now, no more than one `token` section can be defined inside `user_directorie - `processor` — Name of one of processors defined in `token_processors` config section described above. This parameter is mandatory and cannot be empty. - `common_roles` — Section with a list of locally defined roles that will be assigned to each user retrieved from the IdP. Optional. - `default_profile` — Name of a locally defined settings profile that will be assigned to each user retrieved from the IdP. If the profile does not exist, a warning will be logged and the user will be created without a profile. Optional. -- `roles_filter` — Regex string for groups filtering. Only groups matching this regex will be mapped to roles. Optional. -- `roles_transform` — Sed-style transform pattern to apply to group names before mapping to roles. Format: `s/pattern/replacement/flags`. The `g` flag applies the replacement globally (all occurrences). Example: `s/-/_/g` converts `clickhouse-grp-dba` to `clickhouse_grp_dba`. Optional. +- `roles_mapping` — Explicit map from incoming group identifier (e.g. an Entra security-group object ID) to a ClickHouse role name. Each entry is a `` element with `` and `` children. Applied **before** `roles_filter` and `roles_transform`; groups absent from the map pass through unchanged, so the filter stage can be used to drop unmapped entries. Optional. +- `roles_filter` — Regex string for groups filtering. Only groups (after `roles_mapping` is applied) that match this regex will be considered. Optional. +- `roles_transform` — Sed-style transform pattern applied to group names (after `roles_mapping` and `roles_filter`) before mapping to roles. Format: `s/pattern/replacement/flags`. The `g` flag applies the replacement globally (all occurrences). Example: `s/-/_/g` converts `clickhouse-grp-dba` to `clickhouse_grp_dba`. Optional. + +The three stages run in this order: `roles_mapping` → `roles_filter` → `roles_transform`. Stages are independent and any of them may be omitted. diff --git a/src/Access/TokenAccessStorage.cpp b/src/Access/TokenAccessStorage.cpp index fed2e86e1e0a..b37b91223b46 100644 --- a/src/Access/TokenAccessStorage.cpp +++ b/src/Access/TokenAccessStorage.cpp @@ -148,6 +148,38 @@ TokenAccessStorage::TokenAccessStorage(const String & storage_name_, AccessContr roles_transform_global = parsed.global; } + /// Explicit `roles_mapping` entries are read as a list of XY + /// children. The mapping rewrites incoming group names BEFORE `roles_filter` / `roles_transform`, + /// so each subsequent stage operates on the mapped value. Groups not listed here pass through + /// to filter/transform unchanged. + if (config.has(prefix_str + "roles_mapping")) + { + Poco::Util::AbstractConfiguration::Keys map_keys; + config.keys(prefix_str + "roles_mapping", map_keys); + + for (const auto & key : map_keys) + { + const String entry_prefix = prefix_str + "roles_mapping." + key; + if (!config.has(entry_prefix + ".from") || !config.has(entry_prefix + ".to")) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "roles_mapping entry '{}' must contain both 'from' and 'to' subelements", key); + + const String from = config.getString(entry_prefix + ".from"); + const String to = config.getString(entry_prefix + ".to"); + + if (from.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "roles_mapping entry '{}': 'from' must not be empty", key); + if (to.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "roles_mapping entry '{}': 'to' must not be empty", key); + + auto [it, inserted] = roles_mapping.emplace(from, to); + if (!inserted) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "roles_mapping has duplicate 'from' value '{}' (already mapped to '{}', cannot remap to '{}')", + from, it->second, to); + } + } + provider_name = config.getString(prefix_str + "processor"); if (provider_name.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "'processor' must be specified for Token user directory"); @@ -525,37 +557,44 @@ std::optional TokenAccessStorage::authenticateImpl( if (!isAddressAllowed(*user, address)) throwAddressNotAllowed(address); + /// Pipeline: incoming group --(roles_mapping)--> mapped name --(roles_filter)--> kept/dropped --(roles_transform)--> CH role name. + /// Each stage is independent and optional; groups absent from `roles_mapping` pass through unchanged. + const bool has_filter = roles_filter.has_value() && roles_filter.value().ok(); + const bool has_transform = roles_transform_pattern.has_value() && roles_transform_replacement.has_value(); + std::set external_roles; - if (roles_filter.has_value() && roles_filter.value().ok()) + for (const auto & group : token_credentials.getGroups()) { - LOG_TRACE(getLogger(), "{}: External role filter found, applying only matching groups", getStorageName()); - for (const auto & group: token_credentials.getGroups()) { - if (RE2::FullMatch(group, roles_filter.value())) + String name = group; + + if (!roles_mapping.empty()) + { + const auto it = roles_mapping.find(group); + if (it != roles_mapping.end()) { - String transformed_group = group; - if (roles_transform_pattern.has_value() && roles_transform_replacement.has_value()) - { - transformed_group = applyTransform(group, roles_transform_pattern.value(), roles_transform_replacement.value(), roles_transform_global); - LOG_TRACE(getLogger(), "{}: Transformed group '{}' to '{}'", getStorageName(), group, transformed_group); - } - external_roles.insert(transformed_group); - LOG_TRACE(getLogger(), "{}: Granted role (group) {} to user", getStorageName(), transformed_group); + name = it->second; + LOG_TRACE(getLogger(), "{}: Mapped group '{}' to '{}'", getStorageName(), group, name); } } - } - else - { - LOG_TRACE(getLogger(), "{}: No external role filtering set, applying all available groups", getStorageName()); - for (const auto & group: token_credentials.getGroups()) + + if (has_filter && !RE2::FullMatch(name, roles_filter.value())) { - String transformed_group = group; - if (roles_transform_pattern.has_value() && roles_transform_replacement.has_value()) + LOG_TRACE(getLogger(), "{}: Group '{}' (after mapping) did not match roles_filter, skipping", getStorageName(), name); + continue; + } + + if (has_transform) + { + String transformed = applyTransform(name, roles_transform_pattern.value(), roles_transform_replacement.value(), roles_transform_global); + if (transformed != name) { - transformed_group = applyTransform(group, roles_transform_pattern.value(), roles_transform_replacement.value(), roles_transform_global); - LOG_TRACE(getLogger(), "{}: Transformed group '{}' to '{}'", getStorageName(), group, transformed_group); + LOG_TRACE(getLogger(), "{}: Transformed '{}' to '{}'", getStorageName(), name, transformed); + name = std::move(transformed); } - external_roles.insert(transformed_group); } + + external_roles.insert(name); + LOG_TRACE(getLogger(), "{}: Granted role (group) {} to user", getStorageName(), name); } if (new_user) diff --git a/src/Access/TokenAccessStorage.h b/src/Access/TokenAccessStorage.h index aedf8843f2b9..110af07e13d9 100644 --- a/src/Access/TokenAccessStorage.h +++ b/src/Access/TokenAccessStorage.h @@ -48,6 +48,10 @@ class TokenAccessStorage : public IAccessStorage const String & prefix; String provider_name; + /// Explicit mapping from incoming group (e.g. Entra group object ID) to a ClickHouse role name. + /// Applied BEFORE `roles_filter` and `roles_transform`. Groups absent from this map pass through + /// unchanged, so the filter stage can be used to drop unmapped entries. + std::map roles_mapping; std::optional roles_filter = std::nullopt; std::optional roles_transform_pattern = std::nullopt; std::optional roles_transform_replacement = std::nullopt; diff --git a/tests/integration/test_token_roles_mapping/__init__.py b/tests/integration/test_token_roles_mapping/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/integration/test_token_roles_mapping/configs/users.xml b/tests/integration/test_token_roles_mapping/configs/users.xml new file mode 100644 index 000000000000..7e7e96a262b5 --- /dev/null +++ b/tests/integration/test_token_roles_mapping/configs/users.xml @@ -0,0 +1,11 @@ + + + + + + + 1 + 1 + + + diff --git a/tests/integration/test_token_roles_mapping/configs/validators.xml b/tests/integration/test_token_roles_mapping/configs/validators.xml new file mode 100644 index 000000000000..e52b92574d49 --- /dev/null +++ b/tests/integration/test_token_roles_mapping/configs/validators.xml @@ -0,0 +1,29 @@ + + + + jwt_static_key + HS256 + roles_mapping_test_secret + false + true + + + + + + hs256_groups + default + + + 8a1b2c3d-4e5f-6789-abcd-ef0123456789 + ch_admin + + + 9f8e7d6c-5b4a-3210-fedc-ba0987654321 + ch_analyst + + + ^ch_[a-z_]+$ + + + diff --git a/tests/integration/test_token_roles_mapping/test.py b/tests/integration/test_token_roles_mapping/test.py new file mode 100644 index 000000000000..60c18d138c47 --- /dev/null +++ b/tests/integration/test_token_roles_mapping/test.py @@ -0,0 +1,89 @@ +""" +Smoke tests for the `` stage in TokenAccessStorage. + +The mapping rewrites incoming group identifiers (e.g. Entra security-group object IDs) +to ClickHouse role names BEFORE `roles_filter` and `roles_transform` run. The processor +under test is `jwt_static_key` with HS256 so tokens can be crafted inline without an IdP. + +Run: + pytest tests/integration/test_token_roles_mapping/test.py -v +""" + +import jwt +import pytest + +from helpers.cluster import ClickHouseCluster + +SECRET = "roles_mapping_test_secret" + +GUID_ADMIN = "8a1b2c3d-4e5f-6789-abcd-ef0123456789" +GUID_ANALYST = "9f8e7d6c-5b4a-3210-fedc-ba0987654321" +GUID_UNMAPPED = "11111111-2222-3333-4444-555555555555" + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=["configs/validators.xml"], + user_configs=["configs/users.xml"], + stay_alive=True, +) + + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + node.query("DROP ROLE IF EXISTS ch_admin") + node.query("DROP ROLE IF EXISTS ch_analyst") + node.query("CREATE ROLE ch_admin") + node.query("CREATE ROLE ch_analyst") + yield cluster + finally: + cluster.shutdown() + + +def make_jwt(sub, groups): + return jwt.encode({"sub": sub, "groups": groups}, SECRET, algorithm="HS256") + + +def query_with_token(token, sql): + resp = node.http_request( + "", + method="POST", + data=sql, + headers={"Authorization": f"Bearer {token}"}, + ) + resp.raise_for_status() + return resp.text + + +def current_roles(sub, groups): + token = make_jwt(sub, groups) + raw = query_with_token( + token, + "SELECT role_name FROM system.current_roles ORDER BY role_name FORMAT TabSeparated", + ) + return [line for line in raw.splitlines() if line] + + +def test_mapped_guid_grants_mapped_role(): + """A GUID listed in resolves to the mapped ClickHouse role.""" + assert current_roles("alice", [GUID_ADMIN]) == ["ch_admin"] + + +def test_multiple_guids_grant_multiple_roles(): + assert current_roles("bob", [GUID_ADMIN, GUID_ANALYST]) == ["ch_admin", "ch_analyst"] + + +def test_unmapped_guid_is_dropped_by_filter(): + """An unmapped GUID passes through `roles_mapping` unchanged and is then rejected by + `roles_filter` (^ch_[a-z_]+$ doesn't match a raw GUID), so only the mapped role survives.""" + assert current_roles("charlie", [GUID_ADMIN, GUID_UNMAPPED]) == ["ch_admin"] + + +def test_only_unmapped_guids_yield_no_roles(): + """No GUID is in the mapping and the filter rejects all of them: no roles are granted, + but authentication itself still succeeds and the user is created from the token.""" + assert current_roles("dave", [GUID_UNMAPPED]) == [] + token = make_jwt("dave", [GUID_UNMAPPED]) + assert query_with_token(token, "SELECT currentUser() FORMAT TabSeparated").strip() == "dave" From 284618f4d53cab6aa3c588154cb99ace8b9babfc Mon Sep 17 00:00:00 2001 From: Andrey Zvonov Date: Thu, 14 May 2026 15:30:46 +0200 Subject: [PATCH 3/5] upd entra flow to jwks --- src/Access/TokenProcessorsParse.cpp | 53 ++++++++++++++--------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/src/Access/TokenProcessorsParse.cpp b/src/Access/TokenProcessorsParse.cpp index 072f6b165a5c..6788272eb438 100644 --- a/src/Access/TokenProcessorsParse.cpp +++ b/src/Access/TokenProcessorsParse.cpp @@ -135,10 +135,14 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( } else if (provider_type == "entra") { - /// Preset for Microsoft Entra ID built on top of the OpenID Connect processor. - /// Derives the per-tenant OIDC discovery URL from `tenant_id` and lets `OpenIdTokenProcessor` - /// fetch `jwks_uri` (and, when published, `introspection_endpoint`) from it, so future - /// endpoint changes on the Entra side flow through without code changes here. + /// Preset for Microsoft Entra ID built on top of the pure-JWKS JWT processor. + /// Validation is fully local: signature against Entra's published JWKS plus the + /// operator-chosen iss/aud/typ/claims pins. No OIDC discovery fetch, no userinfo + /// endpoint, no Microsoft Graph URL stored on the processor. `groups_claim` and + /// `username_claim` are read directly from the JWT payload -- which requires the + /// access token's audience to be the operator's own app, not Microsoft Graph + /// (Graph-audience tokens are not JWKS-verifiable; see TokenProcessorsOpaque's + /// `azure` shortcut for that path). if (!config.hasProperty(prefix + ".tenant_id")) throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "'tenant_id' must be specified for 'entra' processor"); @@ -154,9 +158,9 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( "'tenant_id' {} contains invalid characters", tenant_id); } - /// Multi-tenant aliases require templated-issuer validation that the underlying JWKS JWT - /// validator does not implement (it does exact-match on `iss`). Reject explicitly rather - /// than silently failing issuer checks at token-validation time. + /// Multi-tenant aliases require templated-issuer validation that JwksJwtProcessor does not + /// implement (it does exact-match on `iss`). Reject explicitly rather than silently failing + /// issuer checks at token-validation time. const String lower_tenant_id = Poco::toLower(tenant_id); if (lower_tenant_id == "common" || lower_tenant_id == "organizations" || lower_tenant_id == "consumers") throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, @@ -164,20 +168,16 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( "exact issuer validation requires a single tenant identifier (GUID or onmicrosoft.com domain).", tenant_id); - const String default_configuration_endpoint = "https://login.microsoftonline.com/" + tenant_id + "/v2.0/.well-known/openid-configuration"; - const String configuration_endpoint = config.getString(prefix + ".configuration_endpoint", default_configuration_endpoint); - require_allowed_url(configuration_endpoint, "configuration_endpoint"); - - /// Symmetric with the 'openid' processor: operators who knowingly mock Entra over plain - /// HTTP (tests, sovereign-cloud reverse proxies) can opt out of the HTTPS-on-discovery - /// check. False by default since real Entra is always HTTPS. - const auto allow_http_discovery_urls = config.getBool(prefix + ".allow_http_discovery_urls", false); + const String default_jwks_uri = "https://login.microsoftonline.com/" + tenant_id + "/discovery/v2.0/keys"; + const String jwks_uri = config.getString(prefix + ".jwks_uri", default_jwks_uri); + require_allowed_url(jwks_uri, "jwks_uri"); - if (expected_issuer.empty()) - LOG_WARNING(getLogger("TokenAuthentication"), - "{}: 'expected_issuer' is not set for 'entra' processor: the 'iss' claim will not be validated, " - "so tokens issued by any tenant will be accepted as long as the signature is valid.", - processor_name); + /// `expected_issuer` is auto-derived from `tenant_id` since the v2.0 issuer URL is fully + /// determined by the tenant. Users can still override -- typically for v1.0 tokens + /// ('https://sts.windows.net/{tenant_id}/') or for sovereign-cloud authorities + /// ('https://login.microsoftonline.us/{tenant_id}/v2.0' etc.). + const String default_issuer = "https://login.microsoftonline.com/" + tenant_id + "/v2.0"; + const String issuer = config.getString(prefix + ".expected_issuer", default_issuer); if (expected_audience.empty()) LOG_WARNING(getLogger("TokenAuthentication"), @@ -185,13 +185,12 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( "so tokens issued for any application will be accepted as long as the signature is valid.", processor_name); - return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, - expected_issuer, expected_audience, allow_no_expiration, - configuration_endpoint, - config.getUInt64(prefix + ".verifier_leeway", 60), - config.getUInt64(prefix + ".jwks_cache_lifetime", 3600), - remote_host_filter, - allow_http_discovery_urls); + return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, + issuer, expected_audience, expected_typ, allow_no_expiration, + config.getString(prefix + ".claims", ""), + config.getUInt64(prefix + ".verifier_leeway", 60), + jwks_uri, + config.getUInt64(prefix + ".jwks_cache_lifetime", 3600)); } else if (provider_type == "jwt_static_key") { From 678f7a1c5cff727b7a4a1b650e4f6092c0e69710 Mon Sep 17 00:00:00 2001 From: Andrey Zvonov Date: Thu, 14 May 2026 21:09:40 +0200 Subject: [PATCH 4/5] better docs for azure --- .../external-authenticators/tokens.md | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/docs/en/operations/external-authenticators/tokens.md b/docs/en/operations/external-authenticators/tokens.md index 10f88f062884..8338a730b9f4 100644 --- a/docs/en/operations/external-authenticators/tokens.md +++ b/docs/en/operations/external-authenticators/tokens.md @@ -146,6 +146,52 @@ Some tokens cannot be decoded and validated locally. External service is needed No additional parameters are required. +### Entra (Microsoft Entra ID, pure OIDC) + +Preset for Microsoft Entra ID that does **not** involve Microsoft Graph. Tokens are validated locally against Entra's per-tenant JWKS; `username_claim` and `groups_claim` are read directly from the JWT payload. Use this when the access token's `aud` is your own app (registered via Entra's "Expose an API"), not `https://graph.microsoft.com`. + +Minimum configuration — only `tenant_id` is required; all other parameters have sensible defaults: + +```xml + + + + entra + aaaabbbb-0000-cccc-1111-dddd2222eeee + + + +``` + +Example with common overrides (audience binding to a specific app, Entra-flavored username/groups claims): + +```xml + + entra + aaaabbbb-0000-cccc-1111-dddd2222eeee + api://clickhouse + preferred_username + roles + +``` + +**Parameters:** + +- `tenant_id` — Microsoft Entra tenant identifier (a GUID, or an `*.onmicrosoft.com` domain). **Mandatory.** Multi-tenant aliases (`common`, `organizations`, `consumers`) are rejected because `JwksJwtProcessor` does exact-match issuer validation. + +All remaining parameters are optional: + +- `jwks_uri` — Override for the JWKS endpoint. Default: `https://login.microsoftonline.com/{tenant_id}/discovery/v2.0/keys`. Override only for sovereign clouds (`login.microsoftonline.us`, `login.partner.microsoftonline.cn`). +- `expected_issuer` — Expected value of the `iss` claim. Default: `https://login.microsoftonline.com/{tenant_id}/v2.0` (derived from `tenant_id`). Override for v1.0 tokens (`https://sts.windows.net/{tenant_id}/`) or sovereign clouds. +- `expected_audience` — Expected value of the `aud` claim, normally your app's Application ID URI (e.g. `api://clickhouse`) or client ID. If unset, no audience check is performed (any signature-valid token from the tenant will authenticate); a warning is logged at startup so the gap is visible. +- `username_claim` — JWT claim to use as the ClickHouse username. Default: `sub`. Common Entra alternatives: `preferred_username`, `upn`, `oid`. +- `groups_claim` — JWT claim that carries the array of group identifiers. Default: `groups`. Set to `roles` if you use App Roles in Entra instead of security-group claims. +- `expected_typ`, `verifier_leeway`, `jwks_cache_lifetime`, `claims`, `allow_no_expiration`, `token_cache_lifetime` — Same as for `jwt_dynamic_jwks`. + +:::note +The `groups` claim must be enabled in the app registration's manifest (`"groupMembershipClaims": "ApplicationGroup"` is recommended) and exposed in access tokens via `optionalClaims.accessToken`. Group identifiers in the token are object IDs (GUIDs) by default; map them to ClickHouse roles via the user-directory's `roles_mapping` block (see [Identity Provider as an External User Directory](#idp-external-user-directory)). +::: + ### OpenID ```xml From b755c732106d954f2f1b2413503dc502b3ff6675 Mon Sep 17 00:00:00 2001 From: Andrey Zvonov Date: Thu, 14 May 2026 21:18:50 +0200 Subject: [PATCH 5/5] remove old azure flow --- .../external-authenticators/tokens.md | 27 +-- src/Access/TokenProcessors.h | 15 -- src/Access/TokenProcessorsOpaque.cpp | 173 ------------------ src/Access/TokenProcessorsParse.cpp | 17 +- 4 files changed, 21 insertions(+), 211 deletions(-) diff --git a/docs/en/operations/external-authenticators/tokens.md b/docs/en/operations/external-authenticators/tokens.md index 8338a730b9f4..aefa7e6eb549 100644 --- a/docs/en/operations/external-authenticators/tokens.md +++ b/docs/en/operations/external-authenticators/tokens.md @@ -29,7 +29,7 @@ To use token-based authentication, add `token_processors` section to `config.xml Its contents are different for different token processor types. **Common parameters** -- `type` -- type of token processor. Supported values: "jwt_static_key", "jwt_static_jwks", "jwt_dynamic_jwks", "azure", "openid". Mandatory. Case-insensitive. +- `type` -- type of token processor. Supported values: `jwt_static_key`, `jwt_static_jwks`, `jwt_dynamic_jwks`, `entra` (`azure` is accepted as a back-compat alias and resolves to the same `entra` processor — see the [Entra](#entra) section), `openid`. Mandatory. Case-insensitive. - `token_cache_lifetime` -- maximum lifetime of cached token (in seconds). Optional, default: 3600. - `username_claim` -- name of claim (field) that will be treated as ClickHouse username. Optional, default: "sub". - `groups_claim` -- name of claim (field) that contains list of groups user belongs to. This claim will be looked up in the token itself (in case token is a valid JWT, e.g. in Keycloak) or in response from `/userinfo`. Optional, default: "groups". @@ -129,26 +129,19 @@ For JWKS-based validators (`jwt_static_jwks` and `jwt_dynamic_jwks`), RS* and ES - `allow_no_expiration` - If `true`, tokens without the `exp` (expiration) claim are accepted. Otherwise they are rejected. Optional, default: `false`. -## Processors with external providers +## IdP-specific presets and generic external providers -Some tokens cannot be decoded and validated locally. External service is needed in this case. "Azure" and "OpenID" (a generic type) are supported now. +This section covers two related kinds of processor: per-IdP convenience presets built on top of the generic JWT processors (currently `entra`), and the generic `openid` processor that talks to an arbitrary OIDC-compliant identity provider. -### Azure -```xml - - - - azure - - - -``` +### Entra (Microsoft Entra ID, pure OIDC) {#entra} -No additional parameters are required. +`entra` is a preset for Microsoft Entra ID built on top of `jwt_dynamic_jwks`. Tokens are validated **locally** against Entra's per-tenant JWKS — no Microsoft Graph call, no userinfo round trip, no OIDC discovery fetch. `username_claim` and `groups_claim` are read directly from the JWT payload. Use this when the access token's `aud` is your own app (registered via Entra's *Expose an API* blade), not `https://graph.microsoft.com`. -### Entra (Microsoft Entra ID, pure OIDC) +:::note Migrating from the legacy `azure` processor +`azure` is now an **alias** for `entra` — at config-parse time the type string is rewritten and the rest of the pipeline is identical. The previous `azure` implementation (which round-tripped every token through Microsoft Graph's `/oidc/userinfo` and `/v1.0/me/memberOf` endpoints) has been removed entirely. -Preset for Microsoft Entra ID that does **not** involve Microsoft Graph. Tokens are validated locally against Entra's per-tenant JWKS; `username_claim` and `groups_claim` are read directly from the JWT payload. Use this when the access token's `aud` is your own app (registered via Entra's "Expose an API"), not `https://graph.microsoft.com`. +For operators upgrading: an `azure` block that previously had no other parameters will now fail to load with `'tenant_id' must be specified for 'entra' processor`. To migrate, add `` (and ideally ``) and make sure your application is configured to mint tokens whose `aud` is your own app, not Microsoft Graph. The setup recipe lives in `docs/entra-setup-draft.md`. +::: Minimum configuration — only `tenant_id` is required; all other parameters have sensible defaults: @@ -258,7 +251,7 @@ Example (goes into `users.xml`): Here, the JWT payload must contain `["view-profile"]` on path `resource_access.account.roles`, otherwise authentication will not succeed even with a valid JWT. :::note -Per-user `claims` are enforced only when the token is a JWT (validated by a JWT processor such as `jwt_static_key` or `jwt_dynamic_jwks`). When the user authenticates with an opaque (access) token (e.g. via Azure, OpenID, or Google token processors), claims are not checked and authentication succeeds if the token is otherwise valid. +Per-user `claims` are enforced only when the token is a JWT (validated by a JWT processor such as `jwt_static_key`, `jwt_dynamic_jwks`, or `entra`). When the user authenticates with an opaque (access) token (e.g. via OpenID or Google token processors), claims are not checked and authentication succeeds if the token is otherwise valid. ::: ``` diff --git a/src/Access/TokenProcessors.h b/src/Access/TokenProcessors.h index f2ee32d90887..35c6a218b5b1 100644 --- a/src/Access/TokenProcessors.h +++ b/src/Access/TokenProcessors.h @@ -194,21 +194,6 @@ class GoogleTokenProcessor : public ITokenProcessor const String expected_audience; }; -class AzureTokenProcessor : public ITokenProcessor -{ -public: - AzureTokenProcessor(const String & processor_name_, - UInt64 token_cache_lifetime_, - const String & username_claim_, - const String & groups_claim_, - const String & expected_audience_); - - bool resolveAndValidate(TokenCredentials & credentials) const override; - -private: - const String expected_audience; -}; - class OpenIdTokenProcessor : public ITokenProcessor { public: diff --git a/src/Access/TokenProcessorsOpaque.cpp b/src/Access/TokenProcessorsOpaque.cpp index cc3ec9755b07..06471bf9af99 100644 --- a/src/Access/TokenProcessorsOpaque.cpp +++ b/src/Access/TokenProcessorsOpaque.cpp @@ -283,179 +283,6 @@ bool GoogleTokenProcessor::resolveAndValidate(TokenCredentials & credentials) co return true; } -AzureTokenProcessor::AzureTokenProcessor(const String & processor_name_, - UInt64 token_cache_lifetime_, - const String & username_claim_, - const String & groups_claim_, - const String & expected_audience_) - : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_) - , expected_audience(expected_audience_) -{ - /// Without an audience pin, this processor accepts any Azure AD access token - /// that Microsoft Graph happens to honor -- which includes tokens minted for - /// other applications inside the same tenant. Surface the gap so operators - /// can lock the processor to their own application's audience. - if (expected_audience.empty()) - LOG_WARNING(getLogger("TokenAuthentication"), - "{}: 'expected_audience' is not configured for Azure token processor. " - "Any Azure access token Microsoft Graph accepts will authenticate here, " - "regardless of which application it was issued for; set 'expected_audience' " - "to the audience this processor should accept.", - processor_name); -} - -bool AzureTokenProcessor::resolveAndValidate(TokenCredentials & credentials) const -{ - /// Token is a JWT in this case, but we cannot directly verify it against Azure AD JWKS. - /// We will not trust user data in this token except for 'exp' value to determine caching duration. - /// Explanation here: https://stackoverflow.com/questions/60778634/failing-signature-validation-of-jwt-tokens-from-azure-ad - /// Let Azure validate it: only valid tokens will be accepted. - /// Use GET https://graph.microsoft.com/oidc/userinfo to verify token and get user info at the same time - - const String & token = credentials.getToken(); - - String username; - try - { - picojson::object user_info_json = getObjectFromURI(Poco::URI("https://graph.microsoft.com/oidc/userinfo"), token); - username = getValueByKey(user_info_json, username_claim).value(); - } - catch (...) - { - return false; - } - - /// Audience binding (H-10): only after Microsoft Graph has accepted the - /// token (proving it is a real, signed Azure AD token) do we trust its - /// claims. We then enforce that the 'aud' claim matches the operator-pinned - /// audience -- without this check, *any* token issued for *any* application - /// in the tenant that has Graph access would authenticate. With the check, - /// tokens minted for other applications are rejected even though Graph - /// itself would honor them. - if (!expected_audience.empty()) - { - try - { - auto decoded_token = jwt::decode(token); - if (!decoded_token.has_audience()) - { - LOG_TRACE(getLogger("TokenAuthentication"), - "{}: Azure access token has no 'aud' claim; cannot enforce 'expected_audience' '{}'; rejecting", - processor_name, expected_audience); - return false; - } - const auto auds = decoded_token.get_audience(); - if (auds.find(expected_audience) == auds.end()) - { - LOG_TRACE(getLogger("TokenAuthentication"), - "{}: Azure access token audience does not contain configured 'expected_audience' '{}'; rejecting", - processor_name, expected_audience); - return false; - } - } - catch (const std::exception & e) - { - LOG_TRACE(getLogger("TokenAuthentication"), - "{}: Failed to decode Azure access token while enforcing 'expected_audience': {}; rejecting", - processor_name, e.what()); - return false; - } - } - - /// Reject empty resolved username (M-27). Previously this branch only - /// logged the gap and proceeded to return true at the end of the function, - /// which would cache an entry under user_name "" and collapse every - /// empty-username token across all IdPs into the same dynamic user. - if (username.empty()) - { - LOG_TRACE(getLogger("TokenAuthentication"), - "{}: Resolved username from token is empty; rejecting", processor_name); - return false; - } - credentials.setUserName(username); - - try - { - credentials.setExpiresAt(jwt::decode(token).get_expires_at()); - } - catch (...) { - LOG_TRACE(getLogger("TokenAuthentication"), - "{}: No expiration data found in a valid token, will use default cache lifetime", processor_name); - } - - std::set external_groups_names; - const Poco::URI get_groups_uri = Poco::URI("https://graph.microsoft.com/v1.0/me/memberOf"); - - try - { - auto groups_response = getObjectFromURI(get_groups_uri, token); - - if (!groups_response.contains("value") || !groups_response["value"].is()) - { - LOG_TRACE(getLogger("TokenAuthentication"), - "{}: Failed to get Azure groups: invalid content in response from server", processor_name); - return true; - } - - picojson::array groups_array = groups_response["value"].get(); - - for (const auto & group: groups_array) - { - /// Got some invalid response. Ignore this, log this. - if (!group.is()) - { - LOG_TRACE(getLogger("TokenAuthentication"), - "{}: Failed to get Azure groups: invalid content in response from server", processor_name); - continue; - } - - auto group_data = group.get(); - - /// Use the immutable `id` (GUID), not the mutable `displayName`, - /// for role-mapping. `displayName` can be renamed by an Azure AD - /// admin -- and on rename, every ClickHouse role-mapping regex - /// that referenced the old name silently stops matching, while - /// every regex that matches the new name silently starts. Two - /// distinct AAD groups can also share a display name and merge - /// into a single ClickHouse group; deleting and recreating a - /// group with the same name silently inherits the old grants. - /// `id` is a GUID assigned by AAD at group creation; it never - /// changes, never collides, and is never reused. - /// - /// Operators upgrading from a build that emitted `displayName` - /// must update their `roles_filter` / `roles_transform` regex - /// to reference the GUIDs Azure AD assigns to the groups they - /// want to map. The role identifier is not human-friendly -- - /// that is the cost of using an immutable handle. - if (!group_data.contains("id")) - continue; - - String group_name = getValueByKey(group_data, "id").value_or(""); - if (!group_name.empty()) - { - external_groups_names.insert(group_name); - String display_name = getValueByKey(group_data, "displayName").value_or(""); - LOG_TRACE(getLogger("TokenAuthentication"), - "{}: User {}: new external group id={} (displayName={})", - processor_name, quoteString(credentials.getUserName()), - quoteString(group_name), quoteString(display_name)); - } - } - } - catch (const std::exception & e) - { - /// Defense in depth (M-10 sibling): broadened to `std::exception` so a - /// picojson `std::bad_cast` from a malformed response degrades to "no - /// roles mapped" rather than aborting the whole authentication. - LOG_TRACE(getLogger("TokenAuthentication"), - "{}: Failed to get Azure groups, no external roles will be mapped. reason: {}", processor_name, e.what()); - return true; - } - - credentials.setGroups(external_groups_names); - return true; -} - OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, UInt64 token_cache_lifetime_, const String & username_claim_, diff --git a/src/Access/TokenProcessorsParse.cpp b/src/Access/TokenProcessorsParse.cpp index 6788272eb438..f48ee3fddd07 100644 --- a/src/Access/TokenProcessorsParse.cpp +++ b/src/Access/TokenProcessorsParse.cpp @@ -24,6 +24,14 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( auto provider_type = Poco::toLower(config.getString(prefix + ".type")); + /// `azure` is a back-compat alias for `entra`. The legacy `azure` processor + /// validated tokens by round-tripping through Microsoft Graph; the `entra` + /// processor does pure local JWKS validation, which is what every operator + /// actually wants. Treat both names as the same processor type so existing + /// configs continue to parse, just under stricter validation rules. + if (provider_type == "azure") + provider_type = "entra"; + auto token_cache_lifetime = config.getUInt64(prefix + ".token_cache_lifetime", 3600); auto username_claim = config.getString(prefix + ".username_claim", "sub"); auto groups_claim = config.getString(prefix + ".groups_claim", "groups"); @@ -78,10 +86,6 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( { return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, expected_audience); } - else if (provider_type == "azure") - { - return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, expected_audience); - } else if (provider_type == "openid") { auto verifier_leeway = config.getUInt64(prefix + ".verifier_leeway", 60); @@ -141,8 +145,9 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( /// endpoint, no Microsoft Graph URL stored on the processor. `groups_claim` and /// `username_claim` are read directly from the JWT payload -- which requires the /// access token's audience to be the operator's own app, not Microsoft Graph - /// (Graph-audience tokens are not JWKS-verifiable; see TokenProcessorsOpaque's - /// `azure` shortcut for that path). + /// (Graph-audience tokens are not JWKS-verifiable -- their signing keys are not + /// in the tenant JWKS and their headers carry a `nonce` that breaks third-party + /// validation; see `docs/entra-setup-draft.md` for how to mint app-audience tokens). if (!config.hasProperty(prefix + ".tenant_id")) throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "'tenant_id' must be specified for 'entra' processor");