From 98f63e76e1ef66ffd3cc488fadd8c72d3a76dec0 Mon Sep 17 00:00:00 2001 From: Nick Larsen Date: Wed, 25 Mar 2026 17:05:39 +0100 Subject: [PATCH 01/14] chore: Describe RBAC rules, remove unnecessary rules --- .../spark-k8s-operator/templates/roles.yaml | 116 ++++++++++++++---- .../templates/spark-clusterrole.yaml | 8 ++ .../templates/spark-connect-clusterrole.yaml | 8 ++ .../templates/spark-history-clusterrole.yaml | 7 ++ 4 files changed, 113 insertions(+), 26 deletions(-) diff --git a/deploy/helm/spark-k8s-operator/templates/roles.yaml b/deploy/helm/spark-k8s-operator/templates/roles.yaml index 5599763f..6b1a86b8 100644 --- a/deploy/helm/spark-k8s-operator/templates/roles.yaml +++ b/deploy/helm/spark-k8s-operator/templates/roles.yaml @@ -6,6 +6,9 @@ metadata: labels: {{- include "operator.labels" . | nindent 4 }} rules: + # For automatic cluster domain detection: the operator lists and watches nodes to + # determine the Kubernetes cluster domain (e.g. cluster.local), and reads kubelet + # information via the nodes/proxy subresource. - apiGroups: - "" resources: @@ -13,44 +16,77 @@ rules: verbs: - list - watch - # For automatic cluster domain detection - apiGroups: - "" resources: - nodes/proxy verbs: - get + # The pod-driver controller (Controller::new(Pod)) watches Spark driver pods + # (labelled spark-role=driver) to track SparkApplication completion. It also deletes + # driver pods once the application reaches a terminal phase (Succeeded or Failed). - apiGroups: - "" resources: - - persistentvolumeclaims + - pods + verbs: + - delete + - get + - list + - watch + # ConfigMaps hold pod templates and Spark configuration. All three controllers apply + # them via Server-Side Apply (create + patch). The history and connect controllers + # track them for orphan cleanup (list + delete). All controllers watch ConfigMaps via + # .owns(ConfigMap) so that changes trigger re-reconciliation. + # get is required for the ReconciliationPaused strategy in cluster_resources.add(). + - apiGroups: + - "" + resources: + - configmaps verbs: - create - delete - - deletecollection - get - list - patch - - update - watch + # Services expose Spark History Server and Spark Connect Server for metrics and + # inter-component communication. Applied via Server-Side Apply and tracked for orphan + # cleanup by the history and connect controllers. The history and connect controllers + # watch Services via .owns(Service) to trigger re-reconciliation on change. + # get is required for the ReconciliationPaused strategy in cluster_resources.add(). - apiGroups: - "" resources: - - pods - - configmaps - - secrets - services - - endpoints - - serviceaccounts verbs: - create - delete - - deletecollection - get - list - patch - - update - watch + # ServiceAccounts are created per SparkApplication (directly via client.apply_patch, + # referencing spark-k8s-clusterrole) and per SparkHistoryServer/SparkConnectServer + # (via cluster_resources.add). The history and connect controllers track them for + # orphan cleanup (list + delete). No controller watches ServiceAccounts via .owns(). + # get is required for the ReconciliationPaused strategy in cluster_resources.add(). + - apiGroups: + - "" + resources: + - serviceaccounts + verbs: + - create + - delete + - get + - list + - patch + # RoleBindings are created per SparkApplication (directly via client.apply_patch, + # binding to spark-k8s-clusterrole) and per SparkHistoryServer/SparkConnectServer + # (via cluster_resources.add, binding to their respective ClusterRoles). The history + # and connect controllers track them for orphan cleanup (list + delete). + # No controller watches RoleBindings via .owns(). + # get is required for the ReconciliationPaused strategy in cluster_resources.add(). - apiGroups: - rbac.authorization.k8s.io resources: @@ -61,32 +97,36 @@ rules: - get - list - patch - - update - - watch + # StatefulSets run the Spark History Server and Spark Connect Server. Applied via + # Server-Side Apply (create + patch), tracked for orphan cleanup (list + delete), + # and watched by the history and connect controllers via .owns(StatefulSet). + # get is required for the ReconciliationPaused strategy in cluster_resources.add(). - apiGroups: - apps resources: - statefulsets - - deployments verbs: - create - delete + - get - list - patch - - update - watch + # A Kubernetes Job is created per SparkApplication via Server-Side Apply to run + # spark-submit. The app controller applies Jobs directly (not via cluster_resources), + # so only create + patch (SSA) are needed. Jobs are not watched and not tracked for + # orphan cleanup by any controller. - apiGroups: - batch resources: - jobs verbs: - create - - delete - - get - - list - patch - - update - - watch + # PodDisruptionBudgets limit voluntary disruptions to Spark History Server pods. + # Applied via Server-Side Apply and tracked for orphan cleanup by the history + # controller. No controller watches PDBs via .owns(). + # get is required for the ReconciliationPaused strategy in cluster_resources.add(). - apiGroups: - policy resources: @@ -97,8 +137,6 @@ rules: - get - list - patch - - update - - watch - apiGroups: - apiextensions.k8s.io resources: @@ -114,6 +152,8 @@ rules: - list - watch {{- end }} + # The operator emits Kubernetes events for controller reconciliation outcomes + # (create for new events, patch to aggregate/update existing events). - apiGroups: - events.k8s.io resources: @@ -121,6 +161,11 @@ rules: verbs: - create - patch + # The operator reconciles SparkApplication, SparkHistoryServer, SparkConnectServer, + # and SparkApplicationTemplate objects as the primary resources for their respective + # controllers. get + list + watch are required for Controller::new() and .owns(). + # The main resource objects are never patched directly; only the /status subresources + # are patched (see the separate rule below). - apiGroups: - spark.stackable.tech resources: @@ -131,8 +176,12 @@ rules: verbs: - get - list - - patch - watch + # The app controller patches SparkApplication status after creating the Job (to prevent + # duplicate job creation on restart). The pod-driver controller also patches it when the + # driver pod transitions to a terminal phase. The connect controller patches + # SparkConnectServer status each reconciliation with readiness conditions. + # The history controller does not update SparkHistoryServer status. - apiGroups: - spark.stackable.tech resources: @@ -140,6 +189,9 @@ rules: - sparkconnectservers/status verbs: - patch + # S3Connection and S3Bucket objects provide S3 configuration for Spark (event log + # storage, data access). The operator reads them during reconciliation and watches them + # so that S3 configuration changes trigger re-reconciliation. - apiGroups: - s3.stackable.tech resources: @@ -149,6 +201,12 @@ rules: - get - list - watch + # The operator creates per-application/per-server RoleBindings that reference the + # product ClusterRoles, granting workload pods the permissions they need at runtime. + # The bind verb is required to create RoleBindings that reference a ClusterRole. + # - {{ include "operator.name" . }}-clusterrole: bound per SparkApplication + # - spark-history-clusterrole: bound per SparkHistoryServer + # - spark-connect-clusterrole: bound per SparkConnectServer - apiGroups: - rbac.authorization.k8s.io resources: @@ -157,17 +215,23 @@ rules: - bind resourceNames: - {{ include "operator.name" . }}-clusterrole + - spark-history-clusterrole + - spark-connect-clusterrole + # Listeners expose the Spark History Server and Spark Connect Server to the network. + # Applied via Server-Side Apply (create + patch) and tracked for orphan cleanup + # (list + delete) by the history and connect controllers. + # get is required for the ReconciliationPaused strategy in cluster_resources.add(). + # No controller watches Listeners via .owns(), so the watch verb is not required. - apiGroups: - listeners.stackable.tech resources: - listeners verbs: + - create + - delete - get - list - - watch - patch - - create - - delete {{ if .Capabilities.APIVersions.Has "security.openshift.io/v1" }} - apiGroups: - security.openshift.io diff --git a/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml b/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml index de4beef8..57f6abc3 100644 --- a/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml +++ b/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml @@ -6,6 +6,13 @@ metadata: labels: {{- include "operator.labels" . | nindent 4 }} rules: + # These permissions are for the Spark driver pod at runtime, not the operator itself. + # The driver uses Kubernetes-native scheduling to create and manage executor pods, + # and needs access to configmaps (executor config), services (driver-executor + # communication), secrets (credentials), persistentvolumeclaims (PVC-based dynamic + # allocation scratch space), and pods (executor lifecycle management). + # serviceaccounts is included from the upstream template but Spark does not create + # service accounts at runtime; it could be removed in a future cleanup. - apiGroups: - "" resources: @@ -24,6 +31,7 @@ rules: - patch - update - watch + # Spark may emit events for executor lifecycle transitions. - apiGroups: - events.k8s.io resources: diff --git a/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml b/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml index eafc5e73..cbd045df 100644 --- a/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml +++ b/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml @@ -6,6 +6,13 @@ metadata: labels: {{- include "operator.labels" . | nindent 4 }} rules: + # These permissions are for the Spark Connect Server pod at runtime, not the operator. + # The Spark Connect Server acts as a long-running Spark driver that creates and manages + # executor pods via Kubernetes-native scheduling. It requires access to pods (executor + # lifecycle), configmaps (executor config), services (driver-executor communication), + # secrets (credentials), and persistentvolumeclaims (PVC-based dynamic allocation). + # serviceaccounts is included from the upstream template but Spark Connect does not + # create service accounts at runtime; it could be removed in a future cleanup. - apiGroups: - "" resources: @@ -24,6 +31,7 @@ rules: - patch - update - watch + # Spark Connect Server may emit Kubernetes events for executor lifecycle transitions. - apiGroups: - events.k8s.io resources: diff --git a/deploy/helm/spark-k8s-operator/templates/spark-history-clusterrole.yaml b/deploy/helm/spark-k8s-operator/templates/spark-history-clusterrole.yaml index 4b9013c6..0e23acc7 100644 --- a/deploy/helm/spark-k8s-operator/templates/spark-history-clusterrole.yaml +++ b/deploy/helm/spark-k8s-operator/templates/spark-history-clusterrole.yaml @@ -6,6 +6,12 @@ metadata: labels: {{- include "operator.labels" . | nindent 4 }} rules: + # These permissions are for the Spark History Server pod at runtime, not the operator. + # The History Server is a read-only web UI that reads completed Spark event logs from + # a shared log directory (typically S3 or HDFS). It does not create pods, services, + # or other Kubernetes resources at runtime — this ClusterRole is significantly + # over-permissioned and should be tightened in a future audit once the minimal + # runtime requirements are confirmed against a live deployment. - apiGroups: - "" resources: @@ -24,6 +30,7 @@ rules: - patch - update - watch + # Spark History Server may emit Kubernetes events. - apiGroups: - events.k8s.io resources: From 8cd0d98f646fcd32fed54e6278777fe1e0d29554 Mon Sep 17 00:00:00 2001 From: Nick Larsen Date: Tue, 7 Apr 2026 12:20:49 +0200 Subject: [PATCH 02/14] chore: Update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 030a67ba..e57b2b1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +- Document Helm deployed RBAC permissions and remove unnecessary permissions ([#674]). + +[#674]: https://github.com/stackabletech/spark-k8s-operator/pull/674 + ## [26.3.0] - 2026-03-16 ## [26.3.0-rc1] - 2026-03-16 From c311f3df8e079e0365d9b4e95827f4dc5df28b54 Mon Sep 17 00:00:00 2001 From: Nick Larsen Date: Tue, 7 Apr 2026 12:27:55 +0200 Subject: [PATCH 03/14] chore: Remove the get for customresourcedefinitions for the operator clusterrole --- deploy/helm/spark-k8s-operator/templates/roles.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/deploy/helm/spark-k8s-operator/templates/roles.yaml b/deploy/helm/spark-k8s-operator/templates/roles.yaml index 6b1a86b8..bca1d0d3 100644 --- a/deploy/helm/spark-k8s-operator/templates/roles.yaml +++ b/deploy/helm/spark-k8s-operator/templates/roles.yaml @@ -142,7 +142,6 @@ rules: resources: - customresourcedefinitions verbs: - - get # Required to maintain the CRD. The operator needs to do this, as it needs to enter e.g. it's # generated certificate in the conversion webhook. {{- if .Values.maintenance.customResourceDefinitions.maintain }} From 1e38eb944b74e79ba5b02cfe00a9ddef318811b7 Mon Sep 17 00:00:00 2001 From: Nick Larsen Date: Tue, 7 Apr 2026 12:29:41 +0200 Subject: [PATCH 04/14] chore: Remove the nodes list/watch rule for the operator clusterrole --- deploy/helm/spark-k8s-operator/templates/roles.yaml | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/deploy/helm/spark-k8s-operator/templates/roles.yaml b/deploy/helm/spark-k8s-operator/templates/roles.yaml index bca1d0d3..b9641dd0 100644 --- a/deploy/helm/spark-k8s-operator/templates/roles.yaml +++ b/deploy/helm/spark-k8s-operator/templates/roles.yaml @@ -6,16 +6,7 @@ metadata: labels: {{- include "operator.labels" . | nindent 4 }} rules: - # For automatic cluster domain detection: the operator lists and watches nodes to - # determine the Kubernetes cluster domain (e.g. cluster.local), and reads kubelet - # information via the nodes/proxy subresource. - - apiGroups: - - "" - resources: - - nodes - verbs: - - list - - watch + # For automatic cluster domain detection. - apiGroups: - "" resources: From 767849908339be82b6cca6da92fbbbe5f4d326ef Mon Sep 17 00:00:00 2001 From: Nick Larsen Date: Tue, 7 Apr 2026 12:47:22 +0200 Subject: [PATCH 05/14] fix: Always allow customresourcedefinitions list/watch Required for startup condition regardless of CRD maintenance --- deploy/helm/spark-k8s-operator/templates/roles.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deploy/helm/spark-k8s-operator/templates/roles.yaml b/deploy/helm/spark-k8s-operator/templates/roles.yaml index b9641dd0..7e687313 100644 --- a/deploy/helm/spark-k8s-operator/templates/roles.yaml +++ b/deploy/helm/spark-k8s-operator/templates/roles.yaml @@ -133,15 +133,15 @@ rules: resources: - customresourcedefinitions verbs: - # Required to maintain the CRD. The operator needs to do this, as it needs to enter e.g. it's + # Required to maintain the CRD. The operator needs to do this, as it needs to enter e.g. its # generated certificate in the conversion webhook. {{- if .Values.maintenance.customResourceDefinitions.maintain }} - create - patch + {{- end }} # Required for startup condition - list - watch - {{- end }} # The operator emits Kubernetes events for controller reconciliation outcomes # (create for new events, patch to aggregate/update existing events). - apiGroups: From 404d1ef21a3c84bb87b26f10158221b71daf8dce Mon Sep 17 00:00:00 2001 From: Nick Larsen Date: Tue, 7 Apr 2026 21:16:18 +0200 Subject: [PATCH 06/14] chore: Simplify operator clusterrole rule descriptions --- .../spark-k8s-operator/templates/roles.yaml | 41 ++++++------------- 1 file changed, 12 insertions(+), 29 deletions(-) diff --git a/deploy/helm/spark-k8s-operator/templates/roles.yaml b/deploy/helm/spark-k8s-operator/templates/roles.yaml index 7e687313..f3eef7a1 100644 --- a/deploy/helm/spark-k8s-operator/templates/roles.yaml +++ b/deploy/helm/spark-k8s-operator/templates/roles.yaml @@ -13,9 +13,10 @@ rules: - nodes/proxy verbs: - get - # The pod-driver controller (Controller::new(Pod)) watches Spark driver pods - # (labelled spark-role=driver) to track SparkApplication completion. It also deletes - # driver pods once the application reaches a terminal phase (Succeeded or Failed). + # The pod-driver controller watches Spark driver pods + # (labelled spark-role=driver) to track SparkApplication completion. It also + # deletes driver pods once the application reaches a terminal phase (Succeeded + # or Failed). - apiGroups: - "" resources: @@ -128,6 +129,7 @@ rules: - get - list - patch + # The operator can maintain its own CRDs, e.g. to update conversion webhook certificates. - apiGroups: - apiextensions.k8s.io resources: @@ -142,8 +144,7 @@ rules: # Required for startup condition - list - watch - # The operator emits Kubernetes events for controller reconciliation outcomes - # (create for new events, patch to aggregate/update existing events). + # The operator emits Kubernetes events. - apiGroups: - events.k8s.io resources: @@ -151,11 +152,7 @@ rules: verbs: - create - patch - # The operator reconciles SparkApplication, SparkHistoryServer, SparkConnectServer, - # and SparkApplicationTemplate objects as the primary resources for their respective - # controllers. get + list + watch are required for Controller::new() and .owns(). - # The main resource objects are never patched directly; only the /status subresources - # are patched (see the separate rule below). + # The custom resources reconciled by this operator. - apiGroups: - spark.stackable.tech resources: @@ -167,11 +164,7 @@ rules: - get - list - watch - # The app controller patches SparkApplication status after creating the Job (to prevent - # duplicate job creation on restart). The pod-driver controller also patches it when the - # driver pod transitions to a terminal phase. The connect controller patches - # SparkConnectServer status each reconciliation with readiness conditions. - # The history controller does not update SparkHistoryServer status. + # Status updates for SparkApplication and SparkConnectServer. - apiGroups: - spark.stackable.tech resources: @@ -179,9 +172,7 @@ rules: - sparkconnectservers/status verbs: - patch - # S3Connection and S3Bucket objects provide S3 configuration for Spark (event log - # storage, data access). The operator reads them during reconciliation and watches them - # so that S3 configuration changes trigger re-reconciliation. + # S3 configuration for event log storage and data access. - apiGroups: - s3.stackable.tech resources: @@ -191,12 +182,7 @@ rules: - get - list - watch - # The operator creates per-application/per-server RoleBindings that reference the - # product ClusterRoles, granting workload pods the permissions they need at runtime. - # The bind verb is required to create RoleBindings that reference a ClusterRole. - # - {{ include "operator.name" . }}-clusterrole: bound per SparkApplication - # - spark-history-clusterrole: bound per SparkHistoryServer - # - spark-connect-clusterrole: bound per SparkConnectServer + # Required to create RoleBindings that reference these ClusterRoles. - apiGroups: - rbac.authorization.k8s.io resources: @@ -207,11 +193,8 @@ rules: - {{ include "operator.name" . }}-clusterrole - spark-history-clusterrole - spark-connect-clusterrole - # Listeners expose the Spark History Server and Spark Connect Server to the network. - # Applied via Server-Side Apply (create + patch) and tracked for orphan cleanup - # (list + delete) by the history and connect controllers. - # get is required for the ReconciliationPaused strategy in cluster_resources.add(). - # No controller watches Listeners via .owns(), so the watch verb is not required. + # Required for managing how the History Server and Connect Server are exposed + # outside of the cluster. - apiGroups: - listeners.stackable.tech resources: From 078e90d7f973ec7bef2ae50abd01d035982530da Mon Sep 17 00:00:00 2001 From: Nick Larsen Date: Tue, 7 Apr 2026 21:25:11 +0200 Subject: [PATCH 07/14] chore: Remove unused events rules from the product clusterroles The operator creates and patches events, not the product pods. --- .../spark-k8s-operator/templates/spark-clusterrole.yaml | 8 +------- .../templates/spark-connect-clusterrole.yaml | 8 +------- .../templates/spark-history-clusterrole.yaml | 8 +------- 3 files changed, 3 insertions(+), 21 deletions(-) diff --git a/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml b/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml index 57f6abc3..a9012df6 100644 --- a/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml +++ b/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml @@ -31,13 +31,7 @@ rules: - patch - update - watch - # Spark may emit events for executor lifecycle transitions. - - apiGroups: - - events.k8s.io - resources: - - events - verbs: - - create + {{ if .Capabilities.APIVersions.Has "security.openshift.io/v1" }} - apiGroups: - security.openshift.io diff --git a/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml b/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml index cbd045df..6b8edafa 100644 --- a/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml +++ b/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml @@ -31,13 +31,7 @@ rules: - patch - update - watch - # Spark Connect Server may emit Kubernetes events for executor lifecycle transitions. - - apiGroups: - - events.k8s.io - resources: - - events - verbs: - - create + {{ if .Capabilities.APIVersions.Has "security.openshift.io/v1" }} - apiGroups: - security.openshift.io diff --git a/deploy/helm/spark-k8s-operator/templates/spark-history-clusterrole.yaml b/deploy/helm/spark-k8s-operator/templates/spark-history-clusterrole.yaml index 0e23acc7..1d415559 100644 --- a/deploy/helm/spark-k8s-operator/templates/spark-history-clusterrole.yaml +++ b/deploy/helm/spark-k8s-operator/templates/spark-history-clusterrole.yaml @@ -30,13 +30,7 @@ rules: - patch - update - watch - # Spark History Server may emit Kubernetes events. - - apiGroups: - - events.k8s.io - resources: - - events - verbs: - - create + {{ if .Capabilities.APIVersions.Has "security.openshift.io/v1" }} - apiGroups: - security.openshift.io From ff20dfd7b5eb4a6703f0c4c53c0aab204ea77b1b Mon Sep 17 00:00:00 2001 From: Nick Larsen Date: Tue, 7 Apr 2026 21:27:03 +0200 Subject: [PATCH 08/14] chore: Move rbac.authorization.k8s.io next to each other --- .../spark-k8s-operator/templates/roles.yaml | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/deploy/helm/spark-k8s-operator/templates/roles.yaml b/deploy/helm/spark-k8s-operator/templates/roles.yaml index f3eef7a1..bf6cd2a7 100644 --- a/deploy/helm/spark-k8s-operator/templates/roles.yaml +++ b/deploy/helm/spark-k8s-operator/templates/roles.yaml @@ -89,6 +89,17 @@ rules: - get - list - patch + # Required to create RoleBindings that reference these ClusterRoles. + - apiGroups: + - rbac.authorization.k8s.io + resources: + - clusterroles + verbs: + - bind + resourceNames: + - {{ include "operator.name" . }}-clusterrole + - spark-history-clusterrole + - spark-connect-clusterrole # StatefulSets run the Spark History Server and Spark Connect Server. Applied via # Server-Side Apply (create + patch), tracked for orphan cleanup (list + delete), # and watched by the history and connect controllers via .owns(StatefulSet). @@ -182,17 +193,6 @@ rules: - get - list - watch - # Required to create RoleBindings that reference these ClusterRoles. - - apiGroups: - - rbac.authorization.k8s.io - resources: - - clusterroles - verbs: - - bind - resourceNames: - - {{ include "operator.name" . }}-clusterrole - - spark-history-clusterrole - - spark-connect-clusterrole # Required for managing how the History Server and Connect Server are exposed # outside of the cluster. - apiGroups: From 9dc37e95ab6166ad521163aa52df3e6ed8be7033 Mon Sep 17 00:00:00 2001 From: Nick Larsen Date: Tue, 7 Apr 2026 21:28:50 +0200 Subject: [PATCH 09/14] chore: Remove the security.openshift.io rule from the operator clusterrole All other operators have it only for the product clusterrole(s). --- deploy/helm/spark-k8s-operator/templates/roles.yaml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/deploy/helm/spark-k8s-operator/templates/roles.yaml b/deploy/helm/spark-k8s-operator/templates/roles.yaml index bf6cd2a7..e3fab4ff 100644 --- a/deploy/helm/spark-k8s-operator/templates/roles.yaml +++ b/deploy/helm/spark-k8s-operator/templates/roles.yaml @@ -205,13 +205,3 @@ rules: - get - list - patch -{{ if .Capabilities.APIVersions.Has "security.openshift.io/v1" }} - - apiGroups: - - security.openshift.io - resources: - - securitycontextconstraints - resourceNames: - - nonroot-v2 - verbs: - - use -{{ end }} From ff184b27eb193174a3bb3cdfe9567eae813c7361 Mon Sep 17 00:00:00 2001 From: Nick Larsen Date: Tue, 7 Apr 2026 21:50:22 +0200 Subject: [PATCH 10/14] chore: Remove rules from the history clusterrole --- .../templates/spark-history-clusterrole.yaml | 28 ++----------------- 1 file changed, 3 insertions(+), 25 deletions(-) diff --git a/deploy/helm/spark-k8s-operator/templates/spark-history-clusterrole.yaml b/deploy/helm/spark-k8s-operator/templates/spark-history-clusterrole.yaml index 1d415559..9b23074e 100644 --- a/deploy/helm/spark-k8s-operator/templates/spark-history-clusterrole.yaml +++ b/deploy/helm/spark-k8s-operator/templates/spark-history-clusterrole.yaml @@ -1,4 +1,7 @@ --- +# The Spark History Server is a read-only web UI that reads completed Spark event logs +# from a storage backend (S3 or HDFS). It receives all configuration via mounted volumes +# and does not call the Kubernetes API at runtime, so no additional rules are needed here. apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: @@ -6,31 +9,6 @@ metadata: labels: {{- include "operator.labels" . | nindent 4 }} rules: - # These permissions are for the Spark History Server pod at runtime, not the operator. - # The History Server is a read-only web UI that reads completed Spark event logs from - # a shared log directory (typically S3 or HDFS). It does not create pods, services, - # or other Kubernetes resources at runtime — this ClusterRole is significantly - # over-permissioned and should be tightened in a future audit once the minimal - # runtime requirements are confirmed against a live deployment. - - apiGroups: - - "" - resources: - - configmaps - - persistentvolumeclaims - - pods - - secrets - - serviceaccounts - - services - verbs: - - create - - delete - - deletecollection - - get - - list - - patch - - update - - watch - {{ if .Capabilities.APIVersions.Has "security.openshift.io/v1" }} - apiGroups: - security.openshift.io From 7d8be7e9579fd51dd615f13087e98c6e9d7e0cdd Mon Sep 17 00:00:00 2001 From: Nick Larsen Date: Tue, 7 Apr 2026 22:12:35 +0200 Subject: [PATCH 11/14] chore: Remove rules for serviceaccounts for the driver and connect server --- deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml | 1 - .../spark-k8s-operator/templates/spark-connect-clusterrole.yaml | 1 - 2 files changed, 2 deletions(-) diff --git a/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml b/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml index a9012df6..42d7b6a9 100644 --- a/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml +++ b/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml @@ -20,7 +20,6 @@ rules: - persistentvolumeclaims - pods - secrets - - serviceaccounts - services verbs: - create diff --git a/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml b/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml index 6b8edafa..210899c1 100644 --- a/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml +++ b/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml @@ -20,7 +20,6 @@ rules: - persistentvolumeclaims - pods - secrets - - serviceaccounts - services verbs: - create From a15984f04d61308557eaa1c01b41b4790fe09b5a Mon Sep 17 00:00:00 2001 From: Nick Larsen Date: Tue, 7 Apr 2026 22:13:32 +0200 Subject: [PATCH 12/14] restore: Bring back create events permission for spark driver and connect server --- .../spark-k8s-operator/templates/spark-clusterrole.yaml | 8 +++++++- .../templates/spark-connect-clusterrole.yaml | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml b/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml index 42d7b6a9..4414bec0 100644 --- a/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml +++ b/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml @@ -30,7 +30,13 @@ rules: - patch - update - watch - + # The Spark driver may emit Kubernetes events, e.g. for executor lifecycle transitions. + - apiGroups: + - events.k8s.io + resources: + - events + verbs: + - create {{ if .Capabilities.APIVersions.Has "security.openshift.io/v1" }} - apiGroups: - security.openshift.io diff --git a/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml b/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml index 210899c1..18f22fee 100644 --- a/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml +++ b/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml @@ -30,7 +30,13 @@ rules: - patch - update - watch - + # The Spark Connect Server may emit Kubernetes events, e.g. for executor lifecycle transitions. + - apiGroups: + - events.k8s.io + resources: + - events + verbs: + - create {{ if .Capabilities.APIVersions.Has "security.openshift.io/v1" }} - apiGroups: - security.openshift.io From efe35b676484c5d4c9311e3d55515ef6ad21392e Mon Sep 17 00:00:00 2001 From: Nick Larsen Date: Tue, 7 Apr 2026 22:13:53 +0200 Subject: [PATCH 13/14] chore: Simplify docs on spark driver and connect server clusterroles --- .../templates/spark-clusterrole.yaml | 11 ++++------- .../templates/spark-connect-clusterrole.yaml | 11 ++++------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml b/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml index 4414bec0..7a4682f0 100644 --- a/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml +++ b/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml @@ -1,4 +1,7 @@ --- +# The Spark driver uses Kubernetes-native scheduling to launch and manage executor pods. +# It interacts directly with the Kubernetes API at runtime to create executor pods and +# the supporting resources they need. apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: @@ -6,13 +9,7 @@ metadata: labels: {{- include "operator.labels" . | nindent 4 }} rules: - # These permissions are for the Spark driver pod at runtime, not the operator itself. - # The driver uses Kubernetes-native scheduling to create and manage executor pods, - # and needs access to configmaps (executor config), services (driver-executor - # communication), secrets (credentials), persistentvolumeclaims (PVC-based dynamic - # allocation scratch space), and pods (executor lifecycle management). - # serviceaccounts is included from the upstream template but Spark does not create - # service accounts at runtime; it could be removed in a future cleanup. + # The Spark driver manages executor pods and their supporting resources at runtime. - apiGroups: - "" resources: diff --git a/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml b/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml index 18f22fee..fd3e9feb 100644 --- a/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml +++ b/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml @@ -1,4 +1,7 @@ --- +# The Spark Connect Server acts as a long-running Spark driver that uses Kubernetes-native +# scheduling to launch and manage executor pods. It interacts directly with the Kubernetes +# API at runtime to create executor pods and the supporting resources they need. apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: @@ -6,13 +9,7 @@ metadata: labels: {{- include "operator.labels" . | nindent 4 }} rules: - # These permissions are for the Spark Connect Server pod at runtime, not the operator. - # The Spark Connect Server acts as a long-running Spark driver that creates and manages - # executor pods via Kubernetes-native scheduling. It requires access to pods (executor - # lifecycle), configmaps (executor config), services (driver-executor communication), - # secrets (credentials), and persistentvolumeclaims (PVC-based dynamic allocation). - # serviceaccounts is included from the upstream template but Spark Connect does not - # create service accounts at runtime; it could be removed in a future cleanup. + # The Spark Connect Server manages executor pods and their supporting resources at runtime. - apiGroups: - "" resources: From b397d47eb50ebb8d30e5508893528c75b41f925e Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Wed, 8 Apr 2026 12:11:30 +0200 Subject: [PATCH 14/14] remove rules for "events" --- .../spark-k8s-operator/templates/spark-clusterrole.yaml | 7 ------- .../templates/spark-connect-clusterrole.yaml | 7 ------- 2 files changed, 14 deletions(-) diff --git a/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml b/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml index 7a4682f0..5c7386e0 100644 --- a/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml +++ b/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml @@ -27,13 +27,6 @@ rules: - patch - update - watch - # The Spark driver may emit Kubernetes events, e.g. for executor lifecycle transitions. - - apiGroups: - - events.k8s.io - resources: - - events - verbs: - - create {{ if .Capabilities.APIVersions.Has "security.openshift.io/v1" }} - apiGroups: - security.openshift.io diff --git a/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml b/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml index fd3e9feb..4d01114b 100644 --- a/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml +++ b/deploy/helm/spark-k8s-operator/templates/spark-connect-clusterrole.yaml @@ -27,13 +27,6 @@ rules: - patch - update - watch - # The Spark Connect Server may emit Kubernetes events, e.g. for executor lifecycle transitions. - - apiGroups: - - events.k8s.io - resources: - - events - verbs: - - create {{ if .Capabilities.APIVersions.Has "security.openshift.io/v1" }} - apiGroups: - security.openshift.io