diff --git a/api/v1alpha1/conditions.go b/api/v1alpha1/conditions.go index bc31f1bb..bfe86c62 100644 --- a/api/v1alpha1/conditions.go +++ b/api/v1alpha1/conditions.go @@ -82,6 +82,10 @@ const ( ClickHouseConditionReasonExternalSecretValid ConditionReason = "ExternalSecretValid" ClickHouseConditionReasonExternalSecretNotFound ConditionReason = "ExternalSecretNotFound" ClickHouseConditionReasonExternalSecretInvalid ConditionReason = "ExternalSecretInvalid" + + // ClickHouseConditionReasonPingImpossible is a special condition reason. It is set in case the operator cannot + // create a client to ping replica, but it may be healthy. + ClickHouseConditionReasonPingImpossible ConditionReason = "PingImpossible" ) // KeeperCluster specific condition types and reasons. diff --git a/internal/controller/clickhouse/controller_test.go b/internal/controller/clickhouse/controller_test.go index aced0fd7..5ffd3fc8 100644 --- a/internal/controller/clickhouse/controller_test.go +++ b/internal/controller/clickhouse/controller_test.go @@ -133,10 +133,6 @@ var _ = When("reconciling ClickHouseCluster", Ordered, func() { Expect(jobs.Items).To(HaveLen(1)) Expect(jobs.Items[0].Labels[controllerutil.LabelRoleKey]).To(Equal(controllerutil.LabelVersionProbe)) - testutil.AssertEvents(recorder.Events, map[string]int{ - "ClusterNotReady": 1, - }) - By("completing the version probe job") testutil.CompleteVersionProbeJob(ctx, suite, cr.Namespace, cr.SpecificName(), "26.1.1.1") }) @@ -148,6 +144,10 @@ var _ = When("reconciling ClickHouseCluster", Ordered, func() { Expect(err).NotTo(HaveOccurred()) Expect(suite.Client.Get(ctx, cr.NamespacedName(), cr)).To(Succeed()) + testutil.AssertEvents(recorder.Events, map[string]int{ + "ClusterNotReady": 1, + }) + Expect(suite.Client.List(ctx, &services, listOpts)).To(Succeed()) Expect(services.Items).To(HaveLen(1)) @@ -208,15 +208,16 @@ var _ = When("reconciling ClickHouseCluster", Ordered, func() { _, err := controller.Reconcile(ctx, ctrl.Request{NamespacedName: crossNamespaceCluster.NamespacedName()}) Expect(err).NotTo(HaveOccurred()) - testutil.AssertEvents(recorder.Events, map[string]int{ - "ClusterNotReady": 1, - }) testutil.CompleteVersionProbeJob(ctx, suite, crossNamespaceCluster.Namespace, crossNamespaceCluster.SpecificName(), "26.1.1.1") _, err = controller.Reconcile(ctx, ctrl.Request{NamespacedName: crossNamespaceCluster.NamespacedName()}) Expect(err).NotTo(HaveOccurred()) + testutil.AssertEvents(recorder.Events, map[string]int{ + "ClusterNotReady": 1, + }) + var config corev1.ConfigMap Expect(suite.Client.Get(ctx, types.NamespacedName{ Namespace: crossNamespaceCluster.Namespace, @@ -631,7 +632,6 @@ var _ = When("reconciling ClickHouseCluster", Ordered, func() { testutil.AssertEvents(recorder.Events, map[string]int{ "ExternalSecretNotFound": 1, - "ClusterNotReady": 1, }) testutil.CompleteVersionProbeJob(ctx, suite, esoCR.Namespace, esoCR.SpecificName(), "26.1.1.1") @@ -652,6 +652,12 @@ var _ = When("reconciling ClickHouseCluster", Ordered, func() { Expect(cond.Message).To(ContainSubstring("plaintext password")) Expect(cond.Message).NotTo(ContainSubstring("interserver-password")) + // commander becomes non-nil once the partial secret is found, + // so Ready transitions Unknown -> False and emits ClusterNotReady. + testutil.AssertEvents(recorder.Events, map[string]int{ + "ClusterNotReady": 1, + }) + By("reconciling with external secret manage policy") esoCR.Spec.ExternalSecret.Policy = v1.ExternalSecretPolicyManage diff --git a/internal/controller/clickhouse/sync.go b/internal/controller/clickhouse/sync.go index 8bab65e4..0558bc0b 100644 --- a/internal/controller/clickhouse/sync.go +++ b/internal/controller/clickhouse/sync.go @@ -927,21 +927,37 @@ func (r *clickhouseReconciler) evaluateReplicaConditions() { expected := int(r.Cluster.Replicas() * r.Cluster.Shards()) r.SetCondition(chctrl.ReplicaStartupCondition(errorIDs)) - r.SetCondition(chctrl.HealthyCondition(notReadyIDs)) r.SetCondition(chctrl.ClusterSizeCondition(exists, expected)) - if len(notReadyShards) == 0 { - r.SetCondition( - metav1.Condition{Type: v1.ConditionTypeReady, Status: metav1.ConditionTrue, Reason: v1.ClickHouseConditionAllShardsReady, Message: "All shards are ready"}, - chctrl.EventSpec{Type: corev1.EventTypeNormal, Reason: v1.EventReasonClusterReady, Action: v1.EventActionBecameReady, Message: "ClickHouse cluster is ready"}, - ) + if r.commander == nil { + r.SetCondition(metav1.Condition{ + Type: v1.ConditionTypeHealthy, + Status: metav1.ConditionUnknown, + Reason: v1.ClickHouseConditionReasonPingImpossible, + Message: "Cannot probe replicas", + }) + r.SetCondition(metav1.Condition{ + Type: v1.ConditionTypeReady, + Status: metav1.ConditionUnknown, + Reason: v1.ClickHouseConditionReasonPingImpossible, + Message: "Cannot probe replicas", + }) } else { - slices.Sort(notReadyShards) - message := fmt.Sprintf("Not Ready shards: %v", notReadyShards) - r.SetCondition( - metav1.Condition{Type: v1.ConditionTypeReady, Status: metav1.ConditionFalse, Reason: v1.ClickHouseConditionSomeShardsNotReady, Message: message}, - chctrl.EventSpec{Type: corev1.EventTypeWarning, Reason: v1.EventReasonClusterNotReady, Action: v1.EventActionBecameNotReady, Message: message}, - ) + r.SetCondition(chctrl.HealthyCondition(notReadyIDs)) + + if len(notReadyShards) == 0 { + r.SetCondition( + metav1.Condition{Type: v1.ConditionTypeReady, Status: metav1.ConditionTrue, Reason: v1.ClickHouseConditionAllShardsReady, Message: "All shards are ready"}, + chctrl.EventSpec{Type: corev1.EventTypeNormal, Reason: v1.EventReasonClusterReady, Action: v1.EventActionBecameReady, Message: "ClickHouse cluster is ready"}, + ) + } else { + slices.Sort(notReadyShards) + message := fmt.Sprintf("Not Ready shards: %v", notReadyShards) + r.SetCondition( + metav1.Condition{Type: v1.ConditionTypeReady, Status: metav1.ConditionFalse, Reason: v1.ClickHouseConditionSomeShardsNotReady, Message: message}, + chctrl.EventSpec{Type: corev1.EventTypeWarning, Reason: v1.EventReasonClusterNotReady, Action: v1.EventActionBecameNotReady, Message: message}, + ) + } } }