Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions internal/controller/nodeagent.go
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,7 @@ func (r *DataProtectionApplicationReconciler) ReconcileNodeAgentDaemonset(log lo
terms = append(terms, a.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms...)
}
}
common.SortNodeSelectorTerms(terms)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you considered filing an upstream Velero issue (or PR) to sort there? That would fix it for all consumers, not just OADP. This workaround is fine to merge now, but an upstream fix would let us eventually drop it.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok.

if len(terms) > 0 {
ds.Spec.Template.Spec.Affinity = &corev1.Affinity{
NodeAffinity: &corev1.NodeAffinity{
Expand Down
66 changes: 66 additions & 0 deletions internal/controller/nodeagent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2771,3 +2771,69 @@ func TestDPAReconciler_buildNodeAgentDaemonsetWithAzureWorkloadIdentity(t *testi
})
}
}

// TestNodeAgentAffinityMatchExpressionsDeterministicOrder verifies that
// matchExpressions built from a multi-label NodeSelector are always sorted
// by key, preventing non-deterministic DaemonSet spec changes that cause
// unnecessary node-agent pod restarts. See OADP-7541.
func TestNodeAgentAffinityMatchExpressionsDeterministicOrder(t *testing.T) {
tests := []struct {
name string
matchLabels map[string]string
wantKeys []string
}{
{
name: "two labels matching customer config from OADP-7541",
matchLabels: map[string]string{
"network": "int",
"node-role.kubernetes.io/infra": "",
},
wantKeys: []string{"network", "node-role.kubernetes.io/infra"},
},
{
name: "three labels to verify sorting with more keys",
matchLabels: map[string]string{
"zone": "us-east-1a",
"disk-type": "ssd",
"node-role.kubernetes.io/worker": "",
},
wantKeys: []string{"disk-type", "node-role.kubernetes.io/worker", "zone"},
},
{
name: "single label is trivially stable",
matchLabels: map[string]string{
"node-role.kubernetes.io/infra": "",
},
wantKeys: []string{"node-role.kubernetes.io/infra"},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
for i := 0; i < 100; i++ {
la := &kube.LoadAffinity{
NodeSelector: metav1.LabelSelector{
MatchLabels: tt.matchLabels,
},
}
affinity := kube.ToSystemAffinity(la, nil)
require.NotNil(t, affinity, "iteration %d: affinity should not be nil", i)

terms := affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms
require.Len(t, terms, 1, "iteration %d: expected 1 NodeSelectorTerm", i)

common.SortNodeSelectorTerms(terms)

exprs := terms[0].MatchExpressions
require.Len(t, exprs, len(tt.wantKeys), "iteration %d: wrong number of matchExpressions", i)

gotKeys := make([]string, len(exprs))
for j, expr := range exprs {
gotKeys[j] = expr.Key
}
require.Equal(t, tt.wantKeys, gotKeys,
"iteration %d: matchExpressions keys not in sorted order", i)
}
})
}
}
1 change: 1 addition & 0 deletions internal/controller/velero.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ func (r *DataProtectionApplicationReconciler) customizeVeleroDeployment(veleroDe
terms = append(terms, a.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms...)
}
}
common.SortNodeSelectorTerms(terms)
if len(terms) > 0 {
veleroDeployment.Spec.Template.Spec.Affinity = &corev1.Affinity{
NodeAffinity: &corev1.NodeAffinity{
Expand Down
58 changes: 48 additions & 10 deletions internal/controller/velero_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ import (
"github.com/onsi/gomega"
"github.com/operator-framework/operator-lib/proxy"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/require"
velerov1 "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
"github.com/vmware-tanzu/velero/pkg/util/kube"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
Expand Down Expand Up @@ -2095,11 +2097,6 @@ func TestDPAReconciler_buildVeleroDeployment(t *testing.T) {
loadAffinity: []corev1.NodeSelectorTerm{
{
MatchExpressions: []corev1.NodeSelectorRequirement{
{
Key: "zone",
Operator: corev1.NodeSelectorOpIn,
Values: []string{"east"},
},
{
Key: "disk-type",
Operator: corev1.NodeSelectorOpIn,
Expand All @@ -2109,20 +2106,25 @@ func TestDPAReconciler_buildVeleroDeployment(t *testing.T) {
Key: "gpu",
Operator: corev1.NodeSelectorOpDoesNotExist,
},
{
Key: "zone",
Operator: corev1.NodeSelectorOpIn,
Values: []string{"east"},
},
},
},
{
MatchExpressions: []corev1.NodeSelectorRequirement{
{
Key: "instance-type",
Operator: corev1.NodeSelectorOpIn,
Values: []string{"m5.large", "m5.xlarge"},
},
{
Key: "environment",
Operator: corev1.NodeSelectorOpNotIn,
Values: []string{"dev"},
},
{
Key: "instance-type",
Operator: corev1.NodeSelectorOpIn,
Values: []string{"m5.large", "m5.xlarge"},
},
},
},
},
Expand Down Expand Up @@ -4188,3 +4190,39 @@ func TestApplyResourceAnnotations(t *testing.T) {
})
}
}

// TestVeleroAffinityMatchExpressionsDeterministicOrder verifies that
// matchExpressions built from a multi-label NodeSelector are always sorted
// by key, preventing non-deterministic Deployment spec changes. See OADP-7541.
func TestVeleroAffinityMatchExpressionsDeterministicOrder(t *testing.T) {
matchLabels := map[string]string{
"network": "int",
"node-role.kubernetes.io/infra": "",
}
wantKeys := []string{"network", "node-role.kubernetes.io/infra"}

for i := 0; i < 100; i++ {
la := &kube.LoadAffinity{
NodeSelector: metav1.LabelSelector{
MatchLabels: matchLabels,
},
}
affinity := kube.ToSystemAffinity(la, nil)
require.NotNil(t, affinity, "iteration %d: affinity should not be nil", i)

terms := affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms
require.Len(t, terms, 1, "iteration %d: expected 1 NodeSelectorTerm", i)

common.SortNodeSelectorTerms(terms)

exprs := terms[0].MatchExpressions
require.Len(t, exprs, len(wantKeys), "iteration %d: wrong number of matchExpressions", i)

gotKeys := make([]string, len(exprs))
for j, expr := range exprs {
gotKeys[j] = expr.Key
}
require.Equal(t, wantKeys, gotKeys,
"iteration %d: matchExpressions keys not in sorted order", i)
}
}
17 changes: 17 additions & 0 deletions pkg/common/common.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package common

import (
"cmp"
"fmt"
"regexp"
"slices"
"sort"
"strings"

Expand Down Expand Up @@ -374,3 +376,18 @@ func UpdateBackupStorageLocation(bsl *velerov1.BackupStorageLocation, bslSpec ve

return nil
}

// SortNodeSelectorTerms sorts MatchExpressions and MatchFields by key
// within each NodeSelectorTerm to ensure deterministic ordering. Without
// this, Go map iteration randomness in upstream kube.ToSystemAffinity
// produces non-deterministic ordering, which Kubernetes treats as a spec
// change and triggers unnecessary DaemonSet rollouts.
func SortNodeSelectorTerms(terms []corev1.NodeSelectorTerm) {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SortNodeSelectorTerms only sorts MatchExpressions but the name suggests it handles the full NodeSelectorTerm. Two options:

  1. Also sort MatchFields for completeness (same pattern, one extra loop).
  2. Rename to something like SortMatchExpressions to match what it actually does.

I'd lean toward option 1 since it's trivial and future-proofs us if MatchFields ever comes into play.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call — added MatchFields sorting too in 516587c. Extracted the comparator to avoid duplication.

Note

Responses generated with Claude

cmpKey := func(a, b corev1.NodeSelectorRequirement) int {
return cmp.Compare(a.Key, b.Key)
}
for i := range terms {
slices.SortFunc(terms[i].MatchExpressions, cmpKey)
slices.SortFunc(terms[i].MatchFields, cmpKey)
}
}