diff --git a/internal/controller/nodeagent.go b/internal/controller/nodeagent.go index e78e830309..67711bc02f 100644 --- a/internal/controller/nodeagent.go +++ b/internal/controller/nodeagent.go @@ -324,6 +324,7 @@ func (r *DataProtectionApplicationReconciler) ReconcileNodeAgentDaemonset(log lo terms = append(terms, a.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms...) } } + common.SortNodeSelectorTerms(terms) if len(terms) > 0 { ds.Spec.Template.Spec.Affinity = &corev1.Affinity{ NodeAffinity: &corev1.NodeAffinity{ diff --git a/internal/controller/nodeagent_test.go b/internal/controller/nodeagent_test.go index b71470bfa6..0b3f086a1c 100644 --- a/internal/controller/nodeagent_test.go +++ b/internal/controller/nodeagent_test.go @@ -2771,3 +2771,69 @@ func TestDPAReconciler_buildNodeAgentDaemonsetWithAzureWorkloadIdentity(t *testi }) } } + +// TestNodeAgentAffinityMatchExpressionsDeterministicOrder verifies that +// matchExpressions built from a multi-label NodeSelector are always sorted +// by key, preventing non-deterministic DaemonSet spec changes that cause +// unnecessary node-agent pod restarts. See OADP-7541. +func TestNodeAgentAffinityMatchExpressionsDeterministicOrder(t *testing.T) { + tests := []struct { + name string + matchLabels map[string]string + wantKeys []string + }{ + { + name: "two labels matching customer config from OADP-7541", + matchLabels: map[string]string{ + "network": "int", + "node-role.kubernetes.io/infra": "", + }, + wantKeys: []string{"network", "node-role.kubernetes.io/infra"}, + }, + { + name: "three labels to verify sorting with more keys", + matchLabels: map[string]string{ + "zone": "us-east-1a", + "disk-type": "ssd", + "node-role.kubernetes.io/worker": "", + }, + wantKeys: []string{"disk-type", "node-role.kubernetes.io/worker", "zone"}, + }, + { + name: "single label is trivially stable", + matchLabels: map[string]string{ + "node-role.kubernetes.io/infra": "", + }, + wantKeys: []string{"node-role.kubernetes.io/infra"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + for i := 0; i < 100; i++ { + la := &kube.LoadAffinity{ + NodeSelector: metav1.LabelSelector{ + MatchLabels: tt.matchLabels, + }, + } + affinity := kube.ToSystemAffinity(la, nil) + require.NotNil(t, affinity, "iteration %d: affinity should not be nil", i) + + terms := affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms + require.Len(t, terms, 1, "iteration %d: expected 1 NodeSelectorTerm", i) + + common.SortNodeSelectorTerms(terms) + + exprs := terms[0].MatchExpressions + require.Len(t, exprs, len(tt.wantKeys), "iteration %d: wrong number of matchExpressions", i) + + gotKeys := make([]string, len(exprs)) + for j, expr := range exprs { + gotKeys[j] = expr.Key + } + require.Equal(t, tt.wantKeys, gotKeys, + "iteration %d: matchExpressions keys not in sorted order", i) + } + }) + } +} diff --git a/internal/controller/velero.go b/internal/controller/velero.go index 5dffa21d74..6a33d09f1f 100644 --- a/internal/controller/velero.go +++ b/internal/controller/velero.go @@ -295,6 +295,7 @@ func (r *DataProtectionApplicationReconciler) customizeVeleroDeployment(veleroDe terms = append(terms, a.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms...) } } + common.SortNodeSelectorTerms(terms) if len(terms) > 0 { veleroDeployment.Spec.Template.Spec.Affinity = &corev1.Affinity{ NodeAffinity: &corev1.NodeAffinity{ diff --git a/internal/controller/velero_test.go b/internal/controller/velero_test.go index 5cfc3dced0..15599c6e55 100644 --- a/internal/controller/velero_test.go +++ b/internal/controller/velero_test.go @@ -23,7 +23,9 @@ import ( "github.com/onsi/gomega" "github.com/operator-framework/operator-lib/proxy" "github.com/sirupsen/logrus" + "github.com/stretchr/testify/require" velerov1 "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" + "github.com/vmware-tanzu/velero/pkg/util/kube" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -2095,11 +2097,6 @@ func TestDPAReconciler_buildVeleroDeployment(t *testing.T) { loadAffinity: []corev1.NodeSelectorTerm{ { MatchExpressions: []corev1.NodeSelectorRequirement{ - { - Key: "zone", - Operator: corev1.NodeSelectorOpIn, - Values: []string{"east"}, - }, { Key: "disk-type", Operator: corev1.NodeSelectorOpIn, @@ -2109,20 +2106,25 @@ func TestDPAReconciler_buildVeleroDeployment(t *testing.T) { Key: "gpu", Operator: corev1.NodeSelectorOpDoesNotExist, }, + { + Key: "zone", + Operator: corev1.NodeSelectorOpIn, + Values: []string{"east"}, + }, }, }, { MatchExpressions: []corev1.NodeSelectorRequirement{ - { - Key: "instance-type", - Operator: corev1.NodeSelectorOpIn, - Values: []string{"m5.large", "m5.xlarge"}, - }, { Key: "environment", Operator: corev1.NodeSelectorOpNotIn, Values: []string{"dev"}, }, + { + Key: "instance-type", + Operator: corev1.NodeSelectorOpIn, + Values: []string{"m5.large", "m5.xlarge"}, + }, }, }, }, @@ -4188,3 +4190,39 @@ func TestApplyResourceAnnotations(t *testing.T) { }) } } + +// TestVeleroAffinityMatchExpressionsDeterministicOrder verifies that +// matchExpressions built from a multi-label NodeSelector are always sorted +// by key, preventing non-deterministic Deployment spec changes. See OADP-7541. +func TestVeleroAffinityMatchExpressionsDeterministicOrder(t *testing.T) { + matchLabels := map[string]string{ + "network": "int", + "node-role.kubernetes.io/infra": "", + } + wantKeys := []string{"network", "node-role.kubernetes.io/infra"} + + for i := 0; i < 100; i++ { + la := &kube.LoadAffinity{ + NodeSelector: metav1.LabelSelector{ + MatchLabels: matchLabels, + }, + } + affinity := kube.ToSystemAffinity(la, nil) + require.NotNil(t, affinity, "iteration %d: affinity should not be nil", i) + + terms := affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms + require.Len(t, terms, 1, "iteration %d: expected 1 NodeSelectorTerm", i) + + common.SortNodeSelectorTerms(terms) + + exprs := terms[0].MatchExpressions + require.Len(t, exprs, len(wantKeys), "iteration %d: wrong number of matchExpressions", i) + + gotKeys := make([]string, len(exprs)) + for j, expr := range exprs { + gotKeys[j] = expr.Key + } + require.Equal(t, wantKeys, gotKeys, + "iteration %d: matchExpressions keys not in sorted order", i) + } +} diff --git a/pkg/common/common.go b/pkg/common/common.go index 32eea51c15..c0ea75bbe2 100644 --- a/pkg/common/common.go +++ b/pkg/common/common.go @@ -1,8 +1,10 @@ package common import ( + "cmp" "fmt" "regexp" + "slices" "sort" "strings" @@ -374,3 +376,18 @@ func UpdateBackupStorageLocation(bsl *velerov1.BackupStorageLocation, bslSpec ve return nil } + +// SortNodeSelectorTerms sorts MatchExpressions and MatchFields by key +// within each NodeSelectorTerm to ensure deterministic ordering. Without +// this, Go map iteration randomness in upstream kube.ToSystemAffinity +// produces non-deterministic ordering, which Kubernetes treats as a spec +// change and triggers unnecessary DaemonSet rollouts. +func SortNodeSelectorTerms(terms []corev1.NodeSelectorTerm) { + cmpKey := func(a, b corev1.NodeSelectorRequirement) int { + return cmp.Compare(a.Key, b.Key) + } + for i := range terms { + slices.SortFunc(terms[i].MatchExpressions, cmpKey) + slices.SortFunc(terms[i].MatchFields, cmpKey) + } +}