From 2a150448ac5c8e2205685049306b380411682771 Mon Sep 17 00:00:00 2001 From: Alexander Laye Date: Mon, 20 Apr 2026 13:31:57 -0400 Subject: [PATCH 1/3] Force password use for gateway and certs for PG --- .../deploy-fleet-bicep.sh | 6 ++- .../documentdb-resource-crp.yaml | 39 +++++++++++++++++++ .../crds/documentdb.io_dbs.yaml | 25 ++++++++++-- operator/src/api/preview/documentdb_types.go | 12 ++++++ .../config/crd/bases/documentdb.io_dbs.yaml | 25 ++++++++++-- operator/src/internal/cnpg/cnpg_cluster.go | 5 +-- operator/src/internal/cnpg/cnpg_sync.go | 4 +- .../controller/documentdb_controller.go | 1 - .../controller/physical_replication.go | 31 +++++++++++++-- operator/src/internal/utils/constants.go | 2 +- operator/src/internal/utils/pv_recovery.go | 6 +-- .../src/internal/utils/replication_context.go | 4 ++ 12 files changed, 140 insertions(+), 20 deletions(-) diff --git a/documentdb-playground/aks-fleet-deployment/deploy-fleet-bicep.sh b/documentdb-playground/aks-fleet-deployment/deploy-fleet-bicep.sh index 6770fe16..38b61af2 100755 --- a/documentdb-playground/aks-fleet-deployment/deploy-fleet-bicep.sh +++ b/documentdb-playground/aks-fleet-deployment/deploy-fleet-bicep.sh @@ -153,7 +153,6 @@ helm upgrade --install hub-agent ./charts/hub-agent/ \ # Run the script. chmod +x ./hack/membership/joinMC.sh -sed -i 's/--set namespace=fleet-system/--namespace=fleet-system --create-namespace/' hack/membership/joinMC.sh ./hack/membership/joinMC.sh $TAG $HUB_CLUSTER $MEMBER_CLUSTER_NAMES popd @@ -171,12 +170,17 @@ helm install hub-net-controller-manager ./charts/hub-net-controller-manager/ \ --set image.tag=$NETWORKING_TAG HUB_CLUSTER_ADDRESS=$(kubectl config view -o jsonpath="{.clusters[?(@.name==\"$HUB_CLUSTER\")].cluster.server}") +HUB_CA=$(kubectl config view --raw -o jsonpath="{.clusters[?(@.name==\"$HUB_CLUSTER\")].cluster.certificate-authority-data}") while read -r MEMBER_CLUSTER; do kubectl config use-context $MEMBER_CLUSTER kubectl apply -f config/crd/* + # ADD HUB CA to member cluster (temp fix while joinMC.sh is out of date) + kubectl -n fleet-system set env deploy/member-agent \ + HUB_CERTIFICATE_AUTHORITY="$HUB_CA" -c member-agent + echo "Installing mcs-controller-manager..." helm install mcs-controller-manager ./charts/mcs-controller-manager/ \ --set refreshtoken.repository=$REGISTRY/refresh-token \ diff --git a/documentdb-playground/aks-fleet-deployment/documentdb-resource-crp.yaml b/documentdb-playground/aks-fleet-deployment/documentdb-resource-crp.yaml index 93a6a402..42c9fa0e 100644 --- a/documentdb-playground/aks-fleet-deployment/documentdb-resource-crp.yaml +++ b/documentdb-playground/aks-fleet-deployment/documentdb-resource-crp.yaml @@ -18,6 +18,39 @@ stringData: --- +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: selfsigned-cross-region-issuer + namespace: documentdb-preview-ns +spec: + selfSigned: {} +--- +apiVersion: v1 +kind: Secret +metadata: + name: cross-region-client-cert + namespace: documentdb-preview-ns + labels: + cnpg.io/reload: "" +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: cross-region-client-cert + namespace: documentdb-preview-ns +spec: + secretName: cross-region-client-cert + usages: + - client auth + commonName: streaming_replica + issuerRef: + name: selfsigned-cross-region-issuer + kind: Issuer + group: cert-manager.io + +--- + apiVersion: documentdb.io/preview kind: DocumentDB metadata: @@ -32,6 +65,8 @@ spec: environment: aks clusterReplication: highAvailability: true + replicationTLSSecret: cross-region-client-cert + clientCASecret: cross-region-client-cert crossCloudNetworkingStrategy: AzureFleet primary: {{PRIMARY_CLUSTER}} clusterList: @@ -75,6 +110,10 @@ spec: version: v1 kind: Secret name: documentdb-credentials + - group: "" + version: v1 + kind: Secret + name: cross-region-client-cert policy: placementType: PickAll affinity: diff --git a/operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml b/operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml index 36191073..3acadc5a 100644 --- a/operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml +++ b/operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml @@ -1091,6 +1091,13 @@ spec: description: ClusterReplication configures cross-cluster replication for DocumentDB. properties: + clientCASecret: + description: |- + ClientCASecret is the name of a Kubernetes Secret containing the CA certificate + used to verify the streaming_replica client certificate. The secret must contain + a "ca.crt" key. When specified, the secret is propagated to all clusters + participating in replication. + type: string clusterList: description: ClusterList is the list of clusters participating in replication. @@ -1131,6 +1138,13 @@ spec: primary: description: Primary is the name of the primary cluster for replication. type: string + replicationTLSSecret: + description: |- + ReplicationTLSSecret is the name of a Kubernetes Secret containing TLS certificates + for the streaming_replica user used in physical replication. The secret must contain + "tls.crt" and "tls.key" keys. When specified, the secret is propagated to all + clusters participating in replication. + type: string required: - clusterList - primary @@ -1268,8 +1282,13 @@ spec: description: |- SchemaVersion controls the desired schema version for the DocumentDB extension. - This field decouples the extension binary (image) update from the schema update - (ALTER EXTENSION documentdb UPDATE), providing a rollback-safe upgrade window. + The operator never changes your database schema unless you ask: + - Set documentDBVersion → updates the binary (safe to roll back) + - Set schemaVersion → updates the database schema (irreversible) + - Set schemaVersion: "auto" → schema auto-updates with binary + + Once the schema has been updated, the operator blocks image rollback below the + installed schema version to prevent running an untested binary/schema combination. Values: - "" (empty, default): Two-phase mode. Image upgrades happen automatically, @@ -1278,7 +1297,7 @@ spec: as it allows rollback by reverting the image before committing the schema change. - "auto": Schema automatically updates to match the binary version whenever the binary is upgraded. This is the simplest mode but provides no rollback - safety window. Recommended for development and testing environments. + safety window. - "" (e.g. "0.112.0"): Schema updates to exactly this version. Must be <= the binary version. pattern: ^(auto|[0-9]+\.[0-9]+\.[0-9]+)?$ diff --git a/operator/src/api/preview/documentdb_types.go b/operator/src/api/preview/documentdb_types.go index 4d95ac9c..85bb1903 100644 --- a/operator/src/api/preview/documentdb_types.go +++ b/operator/src/api/preview/documentdb_types.go @@ -219,6 +219,18 @@ type ClusterReplication struct { ClusterList []MemberCluster `json:"clusterList"` // Whether or not to have replicas on the primary cluster. HighAvailability bool `json:"highAvailability,omitempty"` + // ReplicationTLSSecret is the name of a Kubernetes Secret containing TLS certificates + // for the streaming_replica user used in physical replication. The secret must contain + // "tls.crt" and "tls.key" keys. When specified, the secret is propagated to all + // clusters participating in replication. + // +optional + ReplicationTLSSecret string `json:"replicationTLSSecret,omitempty"` + // ClientCASecret is the name of a Kubernetes Secret containing the CA certificate + // used to verify the streaming_replica client certificate. The secret must contain + // a "ca.crt" key. When specified, the secret is propagated to all clusters + // participating in replication. + // +optional + ClientCASecret string `json:"clientCASecret,omitempty"` } type MemberCluster struct { diff --git a/operator/src/config/crd/bases/documentdb.io_dbs.yaml b/operator/src/config/crd/bases/documentdb.io_dbs.yaml index 36191073..3acadc5a 100644 --- a/operator/src/config/crd/bases/documentdb.io_dbs.yaml +++ b/operator/src/config/crd/bases/documentdb.io_dbs.yaml @@ -1091,6 +1091,13 @@ spec: description: ClusterReplication configures cross-cluster replication for DocumentDB. properties: + clientCASecret: + description: |- + ClientCASecret is the name of a Kubernetes Secret containing the CA certificate + used to verify the streaming_replica client certificate. The secret must contain + a "ca.crt" key. When specified, the secret is propagated to all clusters + participating in replication. + type: string clusterList: description: ClusterList is the list of clusters participating in replication. @@ -1131,6 +1138,13 @@ spec: primary: description: Primary is the name of the primary cluster for replication. type: string + replicationTLSSecret: + description: |- + ReplicationTLSSecret is the name of a Kubernetes Secret containing TLS certificates + for the streaming_replica user used in physical replication. The secret must contain + "tls.crt" and "tls.key" keys. When specified, the secret is propagated to all + clusters participating in replication. + type: string required: - clusterList - primary @@ -1268,8 +1282,13 @@ spec: description: |- SchemaVersion controls the desired schema version for the DocumentDB extension. - This field decouples the extension binary (image) update from the schema update - (ALTER EXTENSION documentdb UPDATE), providing a rollback-safe upgrade window. + The operator never changes your database schema unless you ask: + - Set documentDBVersion → updates the binary (safe to roll back) + - Set schemaVersion → updates the database schema (irreversible) + - Set schemaVersion: "auto" → schema auto-updates with binary + + Once the schema has been updated, the operator blocks image rollback below the + installed schema version to prevent running an untested binary/schema combination. Values: - "" (empty, default): Two-phase mode. Image upgrades happen automatically, @@ -1278,7 +1297,7 @@ spec: as it allows rollback by reverting the image before committing the schema change. - "auto": Schema automatically updates to match the binary version whenever the binary is upgraded. This is the simplest mode but provides no rollback - safety window. Recommended for development and testing environments. + safety window. - "" (e.g. "0.112.0"): Schema updates to exactly this version. Must be <= the binary version. pattern: ^(auto|[0-9]+\.[0-9]+\.[0-9]+)?$ diff --git a/operator/src/internal/cnpg/cnpg_cluster.go b/operator/src/internal/cnpg/cnpg_cluster.go index 6d7faa17..f2f6cf1a 100644 --- a/operator/src/internal/cnpg/cnpg_cluster.go +++ b/operator/src/internal/cnpg/cnpg_cluster.go @@ -115,9 +115,8 @@ func GetCnpgClusterSpec(req ctrl.Request, documentdb *dbpreview.DocumentDB, docu return params }(), PgHBA: []string{ - "host all all 0.0.0.0/0 trust", - "host all all ::0/0 trust", - "host replication all all trust", + "host all postgres localhost trust", + "host replication streaming_replica all scram-sha-256", }, }, Bootstrap: getBootstrapConfiguration(documentdb, isPrimaryRegion, log), diff --git a/operator/src/internal/cnpg/cnpg_sync.go b/operator/src/internal/cnpg/cnpg_sync.go index ae542493..db42925f 100644 --- a/operator/src/internal/cnpg/cnpg_sync.go +++ b/operator/src/internal/cnpg/cnpg_sync.go @@ -107,8 +107,8 @@ func SyncCnpgCluster( // JSON Patch "add" requires the parent path to exist. if current.Annotations == nil { patchOps = append(patchOps, JSONPatch{ - Op: PatchOpAdd, - Path: "/metadata/annotations", + Op: PatchOpAdd, + Path: "/metadata/annotations", Value: map[string]string{ "kubectl.kubernetes.io/restartedAt": time.Now().Format(time.RFC3339Nano), }, diff --git a/operator/src/internal/controller/documentdb_controller.go b/operator/src/internal/controller/documentdb_controller.go index f27b3c4c..d6d92d02 100644 --- a/operator/src/internal/controller/documentdb_controller.go +++ b/operator/src/internal/controller/documentdb_controller.go @@ -1001,7 +1001,6 @@ func (r *DocumentDBReconciler) determineSchemaTarget( } } - // updateImageStatus reads the current extension and gateway images from the CNPG cluster // and persists them into the DocumentDB status fields. This is a no-op if both fields // are already up to date. diff --git a/operator/src/internal/controller/physical_replication.go b/operator/src/internal/controller/physical_replication.go index d5b0066e..859181ab 100644 --- a/operator/src/internal/controller/physical_replication.go +++ b/operator/src/internal/controller/physical_replication.go @@ -136,15 +136,40 @@ func (r *DocumentDBReconciler) AddClusterReplicationToClusterSpec( }, } for clusterName, serviceName := range replicationContext.GenerateExternalClusterServices(documentdb.Name, documentdb.Namespace, replicationContext.IsAzureFleetNetworking()) { - cnpgCluster.Spec.ExternalClusters = append(cnpgCluster.Spec.ExternalClusters, cnpgv1.ExternalCluster{ + externalCluster := cnpgv1.ExternalCluster{ Name: clusterName, ConnectionParameters: map[string]string{ "host": serviceName, "port": "5432", "dbname": "postgres", - "user": "postgres", + "user": "streaming_replica", }, - }) + } + if replicationContext.ReplicationTLSSecret != "" { + externalCluster.SSLCert = &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: replicationContext.ReplicationTLSSecret, + }, + Key: "tls.crt", + } + externalCluster.SSLKey = &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: replicationContext.ReplicationTLSSecret, + }, + Key: "tls.key", + } + cnpgCluster.Spec.Certificates = &cnpgv1.CertificatesConfiguration{ + ReplicationTLSSecret: replicationContext.ReplicationTLSSecret, + ClientCASecret: replicationContext.ClientCASecret, + } + } else { + // If we don't have a cert, we just need to trust + cnpgCluster.Spec.PostgresConfiguration.PgHBA = []string{ + "host all postgres localhost trust", + "host replication streaming_replica all trust", + } + } + cnpgCluster.Spec.ExternalClusters = append(cnpgCluster.Spec.ExternalClusters, externalCluster) } return nil diff --git a/operator/src/internal/utils/constants.go b/operator/src/internal/utils/constants.go index c28b13a0..ffc6fc39 100644 --- a/operator/src/internal/utils/constants.go +++ b/operator/src/internal/utils/constants.go @@ -26,7 +26,7 @@ const ( MinK8sMinorVersion = 35 // DEFAULT_DOCUMENTDB_IMAGE is the extension image used in ImageVolume mode. - DEFAULT_DOCUMENTDB_IMAGE = DOCUMENTDB_EXTENSION_IMAGE_REPO + ":0.109.0" + DEFAULT_DOCUMENTDB_IMAGE = DOCUMENTDB_EXTENSION_IMAGE_REPO + ":0.109.0" // NOTE: Keep in sync with operator/cnpg-plugins/sidecar-injector/internal/config/config.go:applyDefaults() DEFAULT_GATEWAY_IMAGE = GATEWAY_IMAGE_REPO + ":0.109.0" DEFAULT_DOCUMENTDB_CREDENTIALS_SECRET = "documentdb-credentials" diff --git a/operator/src/internal/utils/pv_recovery.go b/operator/src/internal/utils/pv_recovery.go index 6c09796b..16cd387e 100644 --- a/operator/src/internal/utils/pv_recovery.go +++ b/operator/src/internal/utils/pv_recovery.go @@ -13,10 +13,10 @@ import ( const ( // Label for identifying temporary PVCs created for PV recovery LabelRecoveryTemp = "documentdb.io/recovery-temp" - + // Label for identifying the DocumentDB cluster a PV/PVC belongs to - LabelCluster = "documentdb.io/cluster" - LabelNamespace = "documentdb.io/namespace" + LabelCluster = "documentdb.io/cluster" + LabelNamespace = "documentdb.io/namespace" ) // TempPVCNameForPVRecovery generates the name for a temporary PVC used during PV recovery. diff --git a/operator/src/internal/utils/replication_context.go b/operator/src/internal/utils/replication_context.go index da76dfd8..2c76feef 100644 --- a/operator/src/internal/utils/replication_context.go +++ b/operator/src/internal/utils/replication_context.go @@ -23,6 +23,8 @@ type ReplicationContext struct { StorageClass string FleetMemberName string OtherFleetMemberNames []string + ReplicationTLSSecret string + ClientCASecret string currentLocalPrimary string targetLocalPrimary string state replicationState @@ -103,6 +105,8 @@ func GetReplicationContext(ctx context.Context, client client.Client, documentdb PrimaryCNPGClusterName: primaryCluster, Environment: environment, StorageClass: storageClass, + ReplicationTLSSecret: documentdb.Spec.ClusterReplication.ReplicationTLSSecret, + ClientCASecret: documentdb.Spec.ClusterReplication.ClientCASecret, state: replicationState, FleetMemberName: self.Name, OtherFleetMemberNames: others, From ec85aed61f0b5875f859c69f565a138eabdd197d Mon Sep 17 00:00:00 2001 From: Alexander Laye Date: Mon, 20 Apr 2026 16:37:36 -0400 Subject: [PATCH 2/3] copilot review Signed-off-by: Alexander Laye --- .../crds/documentdb.io_dbs.yaml | 8 +++++--- operator/src/api/preview/documentdb_types.go | 8 +++++--- .../config/crd/bases/documentdb.io_dbs.yaml | 8 +++++--- operator/src/internal/cnpg/cnpg_cluster.go | 4 ++-- .../src/internal/cnpg/cnpg_cluster_test.go | 2 +- operator/src/internal/cnpg/cnpg_patch.go | 1 + .../controller/physical_replication.go | 20 +++++++++++++------ 7 files changed, 33 insertions(+), 18 deletions(-) diff --git a/operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml b/operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml index 3acadc5a..9f379f89 100644 --- a/operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml +++ b/operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml @@ -1095,8 +1095,9 @@ spec: description: |- ClientCASecret is the name of a Kubernetes Secret containing the CA certificate used to verify the streaming_replica client certificate. The secret must contain - a "ca.crt" key. When specified, the secret is propagated to all clusters - participating in replication. + a "ca.crt" key. When specified, the operator references this secret in + clusters participating in replication. + NOTE: It needs to be the same for all clusters type: string clusterList: description: ClusterList is the list of clusters participating @@ -1142,8 +1143,9 @@ spec: description: |- ReplicationTLSSecret is the name of a Kubernetes Secret containing TLS certificates for the streaming_replica user used in physical replication. The secret must contain - "tls.crt" and "tls.key" keys. When specified, the secret is propagated to all + "tls.crt" and "tls.key" keys. When specified, the operator references this secret in clusters participating in replication. + NOTE: It needs to be the same for all clusters type: string required: - clusterList diff --git a/operator/src/api/preview/documentdb_types.go b/operator/src/api/preview/documentdb_types.go index 85bb1903..71c4a0d4 100644 --- a/operator/src/api/preview/documentdb_types.go +++ b/operator/src/api/preview/documentdb_types.go @@ -221,14 +221,16 @@ type ClusterReplication struct { HighAvailability bool `json:"highAvailability,omitempty"` // ReplicationTLSSecret is the name of a Kubernetes Secret containing TLS certificates // for the streaming_replica user used in physical replication. The secret must contain - // "tls.crt" and "tls.key" keys. When specified, the secret is propagated to all + // "tls.crt" and "tls.key" keys. When specified, the operator references this secret in // clusters participating in replication. + // NOTE: It needs to be the same for all clusters // +optional ReplicationTLSSecret string `json:"replicationTLSSecret,omitempty"` // ClientCASecret is the name of a Kubernetes Secret containing the CA certificate // used to verify the streaming_replica client certificate. The secret must contain - // a "ca.crt" key. When specified, the secret is propagated to all clusters - // participating in replication. + // a "ca.crt" key. When specified, the operator references this secret in + // clusters participating in replication. + // NOTE: It needs to be the same for all clusters // +optional ClientCASecret string `json:"clientCASecret,omitempty"` } diff --git a/operator/src/config/crd/bases/documentdb.io_dbs.yaml b/operator/src/config/crd/bases/documentdb.io_dbs.yaml index 3acadc5a..9f379f89 100644 --- a/operator/src/config/crd/bases/documentdb.io_dbs.yaml +++ b/operator/src/config/crd/bases/documentdb.io_dbs.yaml @@ -1095,8 +1095,9 @@ spec: description: |- ClientCASecret is the name of a Kubernetes Secret containing the CA certificate used to verify the streaming_replica client certificate. The secret must contain - a "ca.crt" key. When specified, the secret is propagated to all clusters - participating in replication. + a "ca.crt" key. When specified, the operator references this secret in + clusters participating in replication. + NOTE: It needs to be the same for all clusters type: string clusterList: description: ClusterList is the list of clusters participating @@ -1142,8 +1143,9 @@ spec: description: |- ReplicationTLSSecret is the name of a Kubernetes Secret containing TLS certificates for the streaming_replica user used in physical replication. The secret must contain - "tls.crt" and "tls.key" keys. When specified, the secret is propagated to all + "tls.crt" and "tls.key" keys. When specified, the operator references this secret in clusters participating in replication. + NOTE: It needs to be the same for all clusters type: string required: - clusterList diff --git a/operator/src/internal/cnpg/cnpg_cluster.go b/operator/src/internal/cnpg/cnpg_cluster.go index f2f6cf1a..aca5a9c2 100644 --- a/operator/src/internal/cnpg/cnpg_cluster.go +++ b/operator/src/internal/cnpg/cnpg_cluster.go @@ -115,8 +115,8 @@ func GetCnpgClusterSpec(req ctrl.Request, documentdb *dbpreview.DocumentDB, docu return params }(), PgHBA: []string{ - "host all postgres localhost trust", - "host replication streaming_replica all scram-sha-256", + "host all all localhost trust", + "hostssl replication streaming_replica all cert", }, }, Bootstrap: getBootstrapConfiguration(documentdb, isPrimaryRegion, log), diff --git a/operator/src/internal/cnpg/cnpg_cluster_test.go b/operator/src/internal/cnpg/cnpg_cluster_test.go index 37384aaf..1652d2fd 100644 --- a/operator/src/internal/cnpg/cnpg_cluster_test.go +++ b/operator/src/internal/cnpg/cnpg_cluster_test.go @@ -213,7 +213,7 @@ var _ = Describe("GetCnpgClusterSpec", func() { Expect(result.Spec.PostgresConfiguration.Extensions[0].LdLibraryPath).To(Equal([]string{"lib", "system"})) Expect(result.Spec.PostgresConfiguration.AdditionalLibraries).To(ConsistOf("pg_cron", "pg_documentdb_core", "pg_documentdb")) Expect(result.Spec.PostgresConfiguration.Parameters).To(HaveKeyWithValue("cron.database_name", "postgres")) - Expect(result.Spec.PostgresConfiguration.PgHBA).To(HaveLen(3)) + Expect(result.Spec.PostgresConfiguration.PgHBA).To(HaveLen(2)) Expect(result.Spec.PostgresUID).To(Equal(int64(0))) Expect(result.Spec.PostgresGID).To(Equal(int64(0))) }) diff --git a/operator/src/internal/cnpg/cnpg_patch.go b/operator/src/internal/cnpg/cnpg_patch.go index 238f231f..dc2739b5 100644 --- a/operator/src/internal/cnpg/cnpg_patch.go +++ b/operator/src/internal/cnpg/cnpg_patch.go @@ -24,6 +24,7 @@ const ( PatchPathPlugins = "/spec/plugins" PatchPathReplicationSlots = "/spec/replicationSlots" PatchPathExternalClusters = "/spec/externalClusters" + PatchPathCertificates = "/spec/certificates" PatchPathManagedServices = "/spec/managed/services/additional" PatchPathSynchronous = "/spec/postgresql/synchronous" PatchPathBootstrap = "/spec/bootstrap" diff --git a/operator/src/internal/controller/physical_replication.go b/operator/src/internal/controller/physical_replication.go index 859181ab..deb47411 100644 --- a/operator/src/internal/controller/physical_replication.go +++ b/operator/src/internal/controller/physical_replication.go @@ -139,13 +139,14 @@ func (r *DocumentDBReconciler) AddClusterReplicationToClusterSpec( externalCluster := cnpgv1.ExternalCluster{ Name: clusterName, ConnectionParameters: map[string]string{ - "host": serviceName, - "port": "5432", - "dbname": "postgres", - "user": "streaming_replica", + "host": serviceName, + "port": "5432", + "dbname": "postgres", + "user": "streaming_replica", }, } if replicationContext.ReplicationTLSSecret != "" { + externalCluster.ConnectionParameters["sslmode"] = "require" externalCluster.SSLCert = &corev1.SecretKeySelector{ LocalObjectReference: corev1.LocalObjectReference{ Name: replicationContext.ReplicationTLSSecret, @@ -160,12 +161,14 @@ func (r *DocumentDBReconciler) AddClusterReplicationToClusterSpec( } cnpgCluster.Spec.Certificates = &cnpgv1.CertificatesConfiguration{ ReplicationTLSSecret: replicationContext.ReplicationTLSSecret, - ClientCASecret: replicationContext.ClientCASecret, + } + if replicationContext.ClientCASecret != "" { + cnpgCluster.Spec.Certificates.ClientCASecret = replicationContext.ClientCASecret } } else { // If we don't have a cert, we just need to trust cnpgCluster.Spec.PostgresConfiguration.PgHBA = []string{ - "host all postgres localhost trust", + "host all all localhost trust", "host replication streaming_replica all trust", } } @@ -518,6 +521,11 @@ func getReplicasChangePatchOps(patchOps *[]cnpg.JSONPatch, desired *cnpgv1.Clust Path: cnpg.PatchPathExternalClusters, Value: desired.Spec.ExternalClusters, }) + *patchOps = append(*patchOps, cnpg.JSONPatch{ + Op: cnpg.PatchOpReplace, + Path: cnpg.PatchPathCertificates, + Value: desired.Spec.Certificates, + }) if replicationContext.IsAzureFleetNetworking() { *patchOps = append(*patchOps, cnpg.JSONPatch{ Op: cnpg.PatchOpReplace, From 865339cca0d180aae515f73c65d9f5c509656d23 Mon Sep 17 00:00:00 2001 From: Alexander Laye Date: Tue, 21 Apr 2026 13:06:50 -0400 Subject: [PATCH 3/3] tests and docs Signed-off-by: Alexander Laye --- .../preview/api-reference.md | 3 + .../multi-region-deployment/overview.md | 2 +- .../preview/multi-region-deployment/setup.md | 41 ++++ .../crds/documentdb.io_dbs.yaml | 4 + operator/src/api/preview/documentdb_types.go | 4 + .../config/crd/bases/documentdb.io_dbs.yaml | 4 + .../controller/physical_replication.go | 38 ++-- .../controller/physical_replication_test.go | 181 ++++++++++++++++++ 8 files changed, 260 insertions(+), 17 deletions(-) diff --git a/docs/operator-public-documentation/preview/api-reference.md b/docs/operator-public-documentation/preview/api-reference.md index 73f5144e..c71c1a8c 100644 --- a/docs/operator-public-documentation/preview/api-reference.md +++ b/docs/operator-public-documentation/preview/api-reference.md @@ -117,6 +117,8 @@ _Appears in:_ | `primary` _string_ | Primary is the name of the primary cluster for replication. | | | | `clusterList` _[MemberCluster](#membercluster) array_ | ClusterList is the list of clusters participating in replication. | | | | `highAvailability` _boolean_ | Whether or not to have replicas on the primary cluster. | | | +| `replicationTLSSecret` _string_ | ReplicationTLSSecret is the name of a Kubernetes Secret containing TLS certificates
for the streaming_replica user used in physical replication. The secret must contain
"tls.crt" and "tls.key" keys. When specified, the operator references this secret in
clusters participating in replication.
NOTE: It needs to be the same for all clusters | | MaxLength: 253
Pattern: `^[a-z0-9]([-a-z0-9]*[a-z0-9])?$`
Optional: \{\}
| +| `clientCASecret` _string_ | ClientCASecret is the name of a Kubernetes Secret containing the CA certificate
used to verify the streaming_replica client certificate. The secret must contain
a "ca.crt" key. When specified, the operator references this secret in
clusters participating in replication.
NOTE: It needs to be the same for all clusters | | MaxLength: 253
Pattern: `^[a-z0-9]([-a-z0-9]*[a-z0-9])?$`
Optional: \{\}
| #### DocumentDB @@ -169,6 +171,7 @@ _Appears in:_ | `bootstrap` _[BootstrapConfiguration](#bootstrapconfiguration)_ | Bootstrap configures the initialization of the DocumentDB cluster. | | Optional: \{\}
| | `backup` _[BackupConfiguration](#backupconfiguration)_ | Backup configures backup settings for DocumentDB. | | Optional: \{\}
| | `featureGates` _object (keys:string, values:boolean)_ | FeatureGates enables or disables optional DocumentDB features.
Keys are PascalCase feature names following the Kubernetes feature gate convention.
Example: \{"ChangeStreams": true\}
IMPORTANT: When adding a new feature gate, update ALL of the following:
1. Add a new FeatureGate* constant in documentdb_types.go
2. Add the key name to the XValidation CEL rule's allowed list below
3. Add a default entry in the featureGateDefaults map in documentdb_types.go | | Optional: \{\}
| +| `schemaVersion` _string_ | SchemaVersion controls the desired schema version for the DocumentDB extension.
The operator never changes your database schema unless you ask:
- Set documentDBVersion → updates the binary (safe to roll back)
- Set schemaVersion → updates the database schema (irreversible)
- Set schemaVersion: "auto" → schema auto-updates with binary
Once the schema has been updated, the operator blocks image rollback below the
installed schema version to prevent running an untested binary/schema combination.
Values:
- "" (empty, default): Two-phase mode. Image upgrades happen automatically,
but ALTER EXTENSION UPDATE does NOT run. Users must explicitly set this
field to finalize the schema upgrade. This is the safest option for production
as it allows rollback by reverting the image before committing the schema change.
- "auto": Schema automatically updates to match the binary version whenever
the binary is upgraded. This is the simplest mode but provides no rollback
safety window.
- "" (e.g. "0.112.0"): Schema updates to exactly this version.
Must be <= the binary version. | | Pattern: `^(auto\|[0-9]+\.[0-9]+\.[0-9]+)?$`
Optional: \{\}
| | `affinity` _[AffinityConfiguration](https://pkg.go.dev/github.com/cloudnative-pg/cloudnative-pg/api/v1#AffinityConfiguration)_ | Affinity/Anti-affinity rules for Pods (cnpg passthrough) | | Optional: \{\}
| diff --git a/docs/operator-public-documentation/preview/multi-region-deployment/overview.md b/docs/operator-public-documentation/preview/multi-region-deployment/overview.md index 341b3e7d..cec802fc 100644 --- a/docs/operator-public-documentation/preview/multi-region-deployment/overview.md +++ b/docs/operator-public-documentation/preview/multi-region-deployment/overview.md @@ -156,7 +156,7 @@ an equal or greater volume of available storage compared to the primary. Enable TLS for all connections: - **Client-to-gateway:** Encrypt application connections (see [TLS configuration](../configuration/tls.md)) -- **Replication traffic:** PostgreSQL SSL for inter-cluster replication +- **Replication traffic:** Cross-Kubernetes-cluster streaming replication is authenticated with mutual TLS using the `streaming_replica` PostgreSQL role. Configure `spec.clusterReplication.replicationTLSSecret` and optionally `spec.clusterReplication.clientCASecret` to wire up the client certificate and CA. See [Securing replication with TLS](setup.md#securing-replication-with-tls) for the complete setup. - **Service mesh:** mTLS for cross-cluster service communication ### Authentication and authorization diff --git a/docs/operator-public-documentation/preview/multi-region-deployment/setup.md b/docs/operator-public-documentation/preview/multi-region-deployment/setup.md index 1b7890d7..fdf88aa9 100644 --- a/docs/operator-public-documentation/preview/multi-region-deployment/setup.md +++ b/docs/operator-public-documentation/preview/multi-region-deployment/setup.md @@ -151,6 +151,47 @@ spec: - name: member-westus3-cluster ``` +#### Securing replication with TLS + +Cross-Kubernetes-cluster streaming replication flows over the network between +member Kubernetes clusters, so the operator secures it with mutual TLS instead +of password or trust-based authentication. Each replica connects to the primary +as the dedicated `streaming_replica` PostgreSQL role and presents a client +certificate that the primary verifies against a shared certificate authority (CA). + +!!! important "Insecure by default" + If no cert is provided, the operator defaults to trusting all external replication + connections + +When you provide a replication cert for your multi-regional setup, the operator +configures PostgreSQL to only accepts replication connections over TLS with a valid +client certificate (`hostssl replication streaming_replica all cert` in `pg_hba.conf`). +Each member Kubernetes cluster must use the same replication certificate and CA, +so any replica can authenticate to any primary after a failover. Put the cert into +a Kubernetes Secret, then pass the name in using the following fields. + +| Field | Type | Required | Description | +| --- | --- | --- | --- | +| `replicationTLSSecret` | string | Yes for secure multi-region | Name of a Kubernetes Secret that contains the `streaming_replica` client certificate and key. Must contain `tls.crt` and `tls.key`. Must be the same name in every member Kubernetes cluster. | +| `clientCASecret` | string | Optional | Name of a Kubernetes Secret that contains the CA certificate (`ca.crt`) used to verify the client certificate. If omitted, the operator falls back to the CA embedded in `replicationTLSSecret`. Must be the same name in every member Kubernetes cluster. | + +The operator looks up the secrets by name in the DocumentDB namespace on each member +Kubernetes cluster. Both the secret name and the certificate material must match +across Kubernetes clusters — otherwise the replica can't authenticate to the primary. + +For a working KubeFleet example that propagates a Secret to every member Kubernetes +cluster via `ClusterResourcePlacement`, see [`documentdb-resource-crp.yaml`](https://github.com/documentdb/documentdb-kubernetes-operator/blob/main/documentdb-playground/aks-fleet-deployment/documentdb-resource-crp.yaml) +in the playground. + +!!! tip "Single-region deployments" + The `replicationTLSSecret` and `clientCASecret` fields aren't required for + single-region clusters. Intra-Kubernetes-cluster replication between CloudNative-PG + pods is already secured by the certificates CloudNative-PG provisions for each + cluster. + +See the [ClusterReplication API Reference](../api-reference.md#clusterreplication) +for the full field list. + ## Deployment options Choose a deployment approach based on your infrastructure and operational preferences. diff --git a/operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml b/operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml index 9f379f89..37f1c0f1 100644 --- a/operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml +++ b/operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml @@ -1098,6 +1098,8 @@ spec: a "ca.crt" key. When specified, the operator references this secret in clusters participating in replication. NOTE: It needs to be the same for all clusters + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ type: string clusterList: description: ClusterList is the list of clusters participating @@ -1146,6 +1148,8 @@ spec: "tls.crt" and "tls.key" keys. When specified, the operator references this secret in clusters participating in replication. NOTE: It needs to be the same for all clusters + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ type: string required: - clusterList diff --git a/operator/src/api/preview/documentdb_types.go b/operator/src/api/preview/documentdb_types.go index 71c4a0d4..b9ed32ec 100644 --- a/operator/src/api/preview/documentdb_types.go +++ b/operator/src/api/preview/documentdb_types.go @@ -225,6 +225,8 @@ type ClusterReplication struct { // clusters participating in replication. // NOTE: It needs to be the same for all clusters // +optional + // +kubebuilder:validation:Pattern=`^[a-z0-9]([-a-z0-9]*[a-z0-9])?$` + // +kubebuilder:validation:MaxLength=253 ReplicationTLSSecret string `json:"replicationTLSSecret,omitempty"` // ClientCASecret is the name of a Kubernetes Secret containing the CA certificate // used to verify the streaming_replica client certificate. The secret must contain @@ -232,6 +234,8 @@ type ClusterReplication struct { // clusters participating in replication. // NOTE: It needs to be the same for all clusters // +optional + // +kubebuilder:validation:Pattern=`^[a-z0-9]([-a-z0-9]*[a-z0-9])?$` + // +kubebuilder:validation:MaxLength=253 ClientCASecret string `json:"clientCASecret,omitempty"` } diff --git a/operator/src/config/crd/bases/documentdb.io_dbs.yaml b/operator/src/config/crd/bases/documentdb.io_dbs.yaml index 9f379f89..37f1c0f1 100644 --- a/operator/src/config/crd/bases/documentdb.io_dbs.yaml +++ b/operator/src/config/crd/bases/documentdb.io_dbs.yaml @@ -1098,6 +1098,8 @@ spec: a "ca.crt" key. When specified, the operator references this secret in clusters participating in replication. NOTE: It needs to be the same for all clusters + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ type: string clusterList: description: ClusterList is the list of clusters participating @@ -1146,6 +1148,8 @@ spec: "tls.crt" and "tls.key" keys. When specified, the operator references this secret in clusters participating in replication. NOTE: It needs to be the same for all clusters + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ type: string required: - clusterList diff --git a/operator/src/internal/controller/physical_replication.go b/operator/src/internal/controller/physical_replication.go index deb47411..3972bf3b 100644 --- a/operator/src/internal/controller/physical_replication.go +++ b/operator/src/internal/controller/physical_replication.go @@ -139,12 +139,14 @@ func (r *DocumentDBReconciler) AddClusterReplicationToClusterSpec( externalCluster := cnpgv1.ExternalCluster{ Name: clusterName, ConnectionParameters: map[string]string{ - "host": serviceName, - "port": "5432", - "dbname": "postgres", - "user": "streaming_replica", + "host": serviceName, + "port": "5432", + "dbname": "postgres", + "user": "streaming_replica", }, } + + // Add certificates to external connections if replicationContext.ReplicationTLSSecret != "" { externalCluster.ConnectionParameters["sslmode"] = "require" externalCluster.SSLCert = &corev1.SecretKeySelector{ @@ -159,22 +161,26 @@ func (r *DocumentDBReconciler) AddClusterReplicationToClusterSpec( }, Key: "tls.key", } - cnpgCluster.Spec.Certificates = &cnpgv1.CertificatesConfiguration{ - ReplicationTLSSecret: replicationContext.ReplicationTLSSecret, - } - if replicationContext.ClientCASecret != "" { - cnpgCluster.Spec.Certificates.ClientCASecret = replicationContext.ClientCASecret - } - } else { - // If we don't have a cert, we just need to trust - cnpgCluster.Spec.PostgresConfiguration.PgHBA = []string{ - "host all all localhost trust", - "host replication streaming_replica all trust", - } } cnpgCluster.Spec.ExternalClusters = append(cnpgCluster.Spec.ExternalClusters, externalCluster) } + // Add certificate configuration for incoming connections + if replicationContext.ReplicationTLSSecret != "" { + cnpgCluster.Spec.Certificates = &cnpgv1.CertificatesConfiguration{ + ReplicationTLSSecret: replicationContext.ReplicationTLSSecret, + } + if replicationContext.ClientCASecret != "" { + cnpgCluster.Spec.Certificates.ClientCASecret = replicationContext.ClientCASecret + } + } else { + // If we don't have a cert AND we're multi-regional, we just need to trust (for now) + cnpgCluster.Spec.PostgresConfiguration.PgHBA = []string{ + "host all all localhost trust", + "host replication streaming_replica all trust", + } + } + return nil } diff --git a/operator/src/internal/controller/physical_replication_test.go b/operator/src/internal/controller/physical_replication_test.go index cdc7a42a..74b6fd86 100644 --- a/operator/src/internal/controller/physical_replication_test.go +++ b/operator/src/internal/controller/physical_replication_test.go @@ -546,3 +546,184 @@ var _ = Describe("Physical Replication", func() { Expect(updated.Spec.PostgresConfiguration.Synchronous.Number).To(Equal(2)) }) }) + +var _ = Describe("AddClusterReplicationToClusterSpec - cert management fields", func() { + // Helper to build a minimal cnpgCluster suitable for AddClusterReplicationToClusterSpec. + buildCnpgCluster := func(name, namespace string) *cnpgv1.Cluster { + return &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: namespace}, + Spec: cnpgv1.ClusterSpec{ + InheritedMetadata: &cnpgv1.EmbeddedObjectMetadata{ + Labels: map[string]string{}, + }, + }, + } + } + + // Helper to build a ReplicationContext in primary state (zero value state == NoReplication which + // satisfies IsPrimary()) with two remote cluster members, using the None networking strategy so + // no service import/export objects are required. + buildPrimaryReplicationContext := func(name string, tlsSecret, caSecret string) *util.ReplicationContext { + return &util.ReplicationContext{ + CNPGClusterName: name + "-local", + OtherCNPGClusterNames: []string{name + "-remote-a", name + "-remote-b"}, + PrimaryCNPGClusterName: name + "-local", + CrossCloudNetworkingStrategy: util.None, + ReplicationTLSSecret: tlsSecret, + ClientCASecret: caSecret, + } + } + + It("does not set Certificates and falls back to trust-based pg_hba when ReplicationTLSSecret is empty", func() { + ctx := context.Background() + namespace := "default" + + documentdb := baseDocumentDB("docdb-cert-none", namespace) + documentdb.Spec.ClusterReplication = &dbpreview.ClusterReplication{ + CrossCloudNetworkingStrategy: string(util.None), + Primary: "cluster-a", + ClusterList: []dbpreview.MemberCluster{ + {Name: "cluster-a"}, + {Name: "cluster-b"}, + }, + } + + cnpgCluster := buildCnpgCluster("docdb-cert-none", namespace) + replicationContext := buildPrimaryReplicationContext("docdb-cert-none", "", "") + + reconciler := buildDocumentDBReconciler() + Expect(reconciler.AddClusterReplicationToClusterSpec(ctx, documentdb, replicationContext, cnpgCluster)).To(Succeed()) + + Expect(cnpgCluster.Spec.Certificates).To(BeNil()) + // Self + two remote external clusters + Expect(cnpgCluster.Spec.ExternalClusters).To(HaveLen(3)) + for _, ec := range cnpgCluster.Spec.ExternalClusters { + if ec.Name == replicationContext.CNPGClusterName { + // Self cluster still uses the superuser for self-loopback. + Expect(ec.ConnectionParameters["user"]).To(Equal("postgres")) + continue + } + // External (remote) clusters use the dedicated replication user but no TLS material. + Expect(ec.ConnectionParameters["user"]).To(Equal("streaming_replica")) + Expect(ec.ConnectionParameters).ToNot(HaveKey("sslmode")) + Expect(ec.SSLCert).To(BeNil()) + Expect(ec.SSLKey).To(BeNil()) + } + // Fallback pg_hba configuration is applied when no TLS secret is provided. + Expect(cnpgCluster.Spec.PostgresConfiguration.PgHBA).To(Equal([]string{ + "host all all localhost trust", + "host replication streaming_replica all trust", + })) + }) + + It("propagates ClientCASecret onto the Certificates spec when set alongside ReplicationTLSSecret", func() { + ctx := context.Background() + namespace := "default" + + documentdb := baseDocumentDB("docdb-cert-ca", namespace) + documentdb.Spec.ClusterReplication = &dbpreview.ClusterReplication{ + CrossCloudNetworkingStrategy: string(util.None), + Primary: "cluster-a", + ReplicationTLSSecret: "replication-tls", + ClientCASecret: "client-ca", + ClusterList: []dbpreview.MemberCluster{ + {Name: "cluster-a"}, + {Name: "cluster-b"}, + }, + } + + cnpgCluster := buildCnpgCluster("docdb-cert-ca", namespace) + replicationContext := buildPrimaryReplicationContext("docdb-cert-ca", "replication-tls", "client-ca") + + reconciler := buildDocumentDBReconciler() + Expect(reconciler.AddClusterReplicationToClusterSpec(ctx, documentdb, replicationContext, cnpgCluster)).To(Succeed()) + + Expect(cnpgCluster.Spec.Certificates).ToNot(BeNil()) + Expect(cnpgCluster.Spec.Certificates.ReplicationTLSSecret).To(Equal("replication-tls")) + Expect(cnpgCluster.Spec.Certificates.ClientCASecret).To(Equal("client-ca")) + }) + + It("ignores ClientCASecret when ReplicationTLSSecret is empty", func() { + ctx := context.Background() + namespace := "default" + + documentdb := baseDocumentDB("docdb-cert-ca-only", namespace) + documentdb.Spec.ClusterReplication = &dbpreview.ClusterReplication{ + CrossCloudNetworkingStrategy: string(util.None), + Primary: "cluster-a", + ClientCASecret: "client-ca", + ClusterList: []dbpreview.MemberCluster{ + {Name: "cluster-a"}, + {Name: "cluster-b"}, + }, + } + + cnpgCluster := buildCnpgCluster("docdb-cert-ca-only", namespace) + // A ClientCASecret without a ReplicationTLSSecret should not enable TLS. + replicationContext := buildPrimaryReplicationContext("docdb-cert-ca-only", "", "client-ca") + + reconciler := buildDocumentDBReconciler() + Expect(reconciler.AddClusterReplicationToClusterSpec(ctx, documentdb, replicationContext, cnpgCluster)).To(Succeed()) + + Expect(cnpgCluster.Spec.Certificates).To(BeNil()) + for _, ec := range cnpgCluster.Spec.ExternalClusters { + Expect(ec.ConnectionParameters).ToNot(HaveKey("sslmode")) + Expect(ec.SSLCert).To(BeNil()) + Expect(ec.SSLKey).To(BeNil()) + } + }) +}) + +var _ = Describe("getReplicasChangePatchOps - cert management fields", func() { + It("emits a replace patch for spec.certificates alongside externalClusters", func() { + desired := &cnpgv1.Cluster{ + Spec: cnpgv1.ClusterSpec{ + ExternalClusters: []cnpgv1.ExternalCluster{{Name: "cluster-a"}}, + Certificates: &cnpgv1.CertificatesConfiguration{ + ReplicationTLSSecret: "replication-tls", + ClientCASecret: "client-ca", + }, + }, + } + replicationContext := &util.ReplicationContext{ + CrossCloudNetworkingStrategy: util.None, + } + + var patchOps []cnpg.JSONPatch + getReplicasChangePatchOps(&patchOps, desired, replicationContext) + + hasCerts := false + for _, op := range patchOps { + if op.Path == cnpg.PatchPathCertificates { + Expect(op.Op).To(Equal(cnpg.PatchOpReplace)) + Expect(op.Value).To(Equal(desired.Spec.Certificates)) + hasCerts = true + } + } + Expect(hasCerts).To(BeTrue()) + }) + + It("emits a replace patch for spec.certificates with a nil value when TLS is disabled", func() { + desired := &cnpgv1.Cluster{ + Spec: cnpgv1.ClusterSpec{ + ExternalClusters: []cnpgv1.ExternalCluster{{Name: "cluster-a"}}, + }, + } + replicationContext := &util.ReplicationContext{ + CrossCloudNetworkingStrategy: util.None, + } + + var patchOps []cnpg.JSONPatch + getReplicasChangePatchOps(&patchOps, desired, replicationContext) + + hasCerts := false + for _, op := range patchOps { + if op.Path == cnpg.PatchPathCertificates { + Expect(op.Op).To(Equal(cnpg.PatchOpReplace)) + Expect(op.Value).To(BeNil()) + hasCerts = true + } + } + Expect(hasCerts).To(BeTrue()) + }) +})