Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 62 additions & 45 deletions cmd/postgres-operator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"time"
"unicode"

"github.com/kelseyhightower/envconfig"
"github.com/pkg/errors"
"go.opentelemetry.io/otel"
uzap "go.uber.org/zap"
Expand Down Expand Up @@ -280,69 +281,68 @@ func initManager(ctx context.Context) (runtime.Options, error) {

options.HealthProbeBindAddress = ":8081"

options.Controller.GroupKindConcurrency = map[string]int{
"PostgresCluster." + v1beta1.GroupVersion.Group: 1,
"PGUpgrade." + v1beta1.GroupVersion.Group: 1,
"PGAdmin." + v1beta1.GroupVersion.Group: 1,
"PerconaPGCluster." + v2.GroupVersion.Group: 1,
"PerconaPGUpgrade." + v2.GroupVersion.Group: 1,
"PerconaPGBackup." + v2.GroupVersion.Group: 1,
"PerconaPGRestore." + v2.GroupVersion.Group: 1,
}

// K8SPG-915
envs := new(envConfig)
if err := envconfig.Process("", envs); err != nil {
return options, errors.Wrap(err, "parse env vars")
}
Comment on lines +294 to +298
Copy link

Copilot AI Feb 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Problem: envconfig.Process now makes initManager fail when PGO_WORKERS is set to a non-integer value (e.g., 3.14), whereas previously invalid values were logged and ignored.
  2. Why it matters: This is a behavior/compatibility change that can prevent the operator from starting if a bad value is present in an existing deployment.
  3. Fix: If fail-fast isn’t intended, parse PGO_WORKERS separately (string -> int) and keep the previous log-and-ignore behavior for invalid values, while still using envconfig for the new leader-election durations.

Copilot uses AI. Check for mistakes.

options.LeaseDuration = &envs.LeaseDuration
options.RenewDeadline = &envs.RenewDeadline
options.RetryPeriod = &envs.RetryPeriod
options.PprofBindAddress = envs.PprofBindAddress

options.LeaderElection = envs.LeaderElection
if options.LeaderElection {
options.LeaderElectionID = perconaRuntime.ElectionID
}

// Enable leader elections when configured with a valid Lease.coordination.k8s.io name.
// - https://docs.k8s.io/concepts/architecture/leases
// - https://releases.k8s.io/v1.30.0/pkg/apis/coordination/validation/validation.go#L26
if lease := os.Getenv("PGO_CONTROLLER_LEASE_NAME"); len(lease) > 0 {
if lease := envs.LeaderElectionID; options.LeaderElection && len(lease) > 0 {
if errs := validation.IsDNS1123Subdomain(lease); len(errs) > 0 {
return options, fmt.Errorf("value for PGO_CONTROLLER_LEASE_NAME is invalid: %v", errs)
}

options.LeaderElection = true
options.LeaderElectionID = lease
options.LeaderElectionNamespace = os.Getenv("PGO_NAMESPACE")
} else {
// K8SPG-761
options.LeaderElection = true
options.LeaderElectionID = perconaRuntime.ElectionID
options.LeaderElectionNamespace = envs.LeaderElectionNamespace
}

// Check PGO_TARGET_NAMESPACE for backwards compatibility with
// "singlenamespace" installations
singlenamespace := strings.TrimSpace(os.Getenv("PGO_TARGET_NAMESPACE"))

// Check PGO_TARGET_NAMESPACES for non-cluster-wide, multi-namespace
// installations
multinamespace := strings.TrimSpace(os.Getenv("PGO_TARGET_NAMESPACES"))

// Initialize DefaultNamespaces if any target namespaces are set
if len(singlenamespace) > 0 || len(multinamespace) > 0 {
if len(envs.SingleNamespace) > 0 || len(envs.MultiNamespaces) > 0 {
// Initialize DefaultNamespaces if any target namespaces are set
options.Cache.DefaultNamespaces = map[string]runtime.CacheConfig{}
}

if len(singlenamespace) > 0 {
options.Cache.DefaultNamespaces[singlenamespace] = runtime.CacheConfig{}
}

if len(multinamespace) > 0 {
for _, namespace := range strings.FieldsFunc(multinamespace, func(c rune) bool {
return c != '-' && !unicode.IsLetter(c) && !unicode.IsNumber(c)
}) {
options.Cache.DefaultNamespaces[namespace] = runtime.CacheConfig{}
if len(envs.SingleNamespace) > 0 {
options.Cache.DefaultNamespaces[envs.SingleNamespace] = runtime.CacheConfig{}
}
}

options.Controller.GroupKindConcurrency = map[string]int{
"PostgresCluster." + v1beta1.GroupVersion.Group: 1,
"PGUpgrade." + v1beta1.GroupVersion.Group: 1,
"PGAdmin." + v1beta1.GroupVersion.Group: 1,
"PerconaPGCluster." + v2.GroupVersion.Group: 1,
"PerconaPGUpgrade." + v2.GroupVersion.Group: 1,
"PerconaPGBackup." + v2.GroupVersion.Group: 1,
"PerconaPGRestore." + v2.GroupVersion.Group: 1,
}

if s := os.Getenv("PGO_WORKERS"); s != "" {
if i, err := strconv.Atoi(s); err == nil && i > 0 {
for kind := range options.Controller.GroupKindConcurrency {
options.Controller.GroupKindConcurrency[kind] = i
if len(envs.MultiNamespaces) > 0 {
for _, namespace := range strings.FieldsFunc(envs.MultiNamespaces, func(c rune) bool {
return c != '-' && !unicode.IsLetter(c) && !unicode.IsNumber(c)
}) {
options.Cache.DefaultNamespaces[namespace] = runtime.CacheConfig{}
}
} else {
log.Error(err, "PGO_WORKERS must be a positive number")
}
}

options.PprofBindAddress = os.Getenv("PPROF_BIND_ADDRESS")
if envs.Workers < 0 {
log.Error(nil, "PGO_WORKERS must be a non-negative number; 0 disables the override")
} else if envs.Workers > 0 {
for kind := range options.Controller.GroupKindConcurrency {
options.Controller.GroupKindConcurrency[kind] = envs.Workers
}
}

return options, nil
}
Expand Down Expand Up @@ -516,3 +516,20 @@ func isOpenshift(ctx context.Context, cfg *rest.Config) bool {

return false
}

type envConfig struct {
LeaderElection bool `default:"true" envconfig:"PGO_CONTROLLER_LEADER_ELECTION_ENABLED"`
LeaderElectionID string `envconfig:"PGO_CONTROLLER_LEASE_NAME"`
LeaderElectionNamespace string `envconfig:"PGO_NAMESPACE"`

Comment on lines +520 to +524
Copy link

Copilot AI Feb 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Problem: envConfig uses unexported (lowercase) struct fields, which github.com/kelseyhightower/envconfig cannot populate via reflection, so defaults/env vars may not be applied.
  2. Why it matters: This can silently disable leader election and set zero durations, potentially breaking operator behavior at runtime.
  3. Fix: Export the fields (e.g., LeaderElection, LeaseDuration, etc.) or switch to a parsing approach that does not rely on setting unexported fields.

Copilot uses AI. Check for mistakes.
LeaseDuration time.Duration `default:"60s" envconfig:"PGO_CONTROLLER_LEASE_DURATION"`
RenewDeadline time.Duration `default:"40s" envconfig:"PGO_CONTROLLER_RENEW_DEADLINE"`
RetryPeriod time.Duration `default:"10s" envconfig:"PGO_CONTROLLER_RETRY_PERIOD"`

SingleNamespace string `envconfig:"PGO_TARGET_NAMESPACE"`
MultiNamespaces string `envconfig:"PGO_TARGET_NAMESPACES"`

PprofBindAddress string `envconfig:"PPROF_BIND_ADDRESS"`

Workers int `envconfig:"PGO_WORKERS"`
}
34 changes: 31 additions & 3 deletions cmd/postgres-operator/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
package main

import (
"context"
"reflect"
"testing"
"time"
Expand All @@ -15,7 +14,7 @@ import (
)

func TestInitManager(t *testing.T) {
ctx := context.Background()
ctx := t.Context()
t.Run("Defaults", func(t *testing.T) {
options, err := initManager(ctx)
assert.NilError(t, err)
Expand All @@ -39,13 +38,19 @@ func TestInitManager(t *testing.T) {

assert.Assert(t, options.Cache.DefaultNamespaces == nil)
assert.Assert(t, options.LeaderElection == true)
assert.Assert(t, options.LeaseDuration.Seconds() == 60)
assert.Assert(t, options.RenewDeadline.Seconds() == 40)
assert.Assert(t, options.RetryPeriod.Seconds() == 10)

{
options.Cache.SyncPeriod = nil
options.Controller.GroupKindConcurrency = nil
options.HealthProbeBindAddress = ""
options.LeaderElection = false
options.LeaderElectionID = ""
options.LeaseDuration = nil
options.RenewDeadline = nil
options.RetryPeriod = nil

assert.Assert(t, reflect.ValueOf(options).IsZero(),
"expected remaining fields to be unset:\n%+v", options)
Expand All @@ -62,6 +67,13 @@ func TestInitManager(t *testing.T) {
assert.ErrorContains(t, err, "PGO_CONTROLLER_LEASE_NAME")
assert.ErrorContains(t, err, "invalid")

assert.Assert(t, options.LeaderElection == true)
assert.Equal(t, options.LeaderElectionNamespace, "")

t.Setenv("PGO_CONTROLLER_LEADER_ELECTION_ENABLED", "false")
options, err = initManager(ctx)
assert.NilError(t, err)

assert.Assert(t, options.LeaderElection == false)
assert.Equal(t, options.LeaderElectionNamespace, "")
})
Expand Down Expand Up @@ -106,7 +118,11 @@ func TestInitManager(t *testing.T) {
t.Setenv("PGO_WORKERS", v)

options, err := initManager(ctx)
assert.NilError(t, err)
if v == "3.14" {
assert.ErrorContains(t, err, "parse env vars: envconfig.Process: assigning PGO_WORKERS to Workers: converting '3.14' to type int. details: strconv.ParseInt: parsing \"3.14\": invalid syntax")
Copy link

Copilot AI Feb 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Problem: The test asserts an overly specific substring for the envconfig parse error, including library-internal wording.
  2. Why it matters: Minor changes in envconfig (or Go’s strconv error text) can break the test even when behavior is still correct.
  3. Fix: Assert on a few stable substrings (e.g., "parse env vars", "PGO_WORKERS", and "invalid syntax") instead of the full error text.
Suggested change
assert.ErrorContains(t, err, "parse env vars: envconfig.Process: assigning PGO_WORKERS to Workers: converting '3.14' to type int. details: strconv.ParseInt: parsing \"3.14\": invalid syntax")
assert.ErrorContains(t, err, "parse env vars")
assert.ErrorContains(t, err, "PGO_WORKERS")
assert.ErrorContains(t, err, "invalid syntax")

Copilot uses AI. Check for mistakes.
} else {
assert.NilError(t, err)
}
assert.DeepEqual(t, options.Controller.GroupKindConcurrency,
map[string]int{
"PGAdmin.postgres-operator.crunchydata.com": 1,
Expand Down Expand Up @@ -148,4 +164,16 @@ func TestInitManager(t *testing.T) {
assert.NilError(t, err)
assert.DeepEqual(t, options.PprofBindAddress, "pprof-addr")
})

t.Run("Duration options", func(t *testing.T) {
t.Setenv("PGO_CONTROLLER_LEASE_DURATION", "1s")
t.Setenv("PGO_CONTROLLER_RENEW_DEADLINE", "2s")
t.Setenv("PGO_CONTROLLER_RETRY_PERIOD", "3s")
options, err := initManager(ctx)
assert.NilError(t, err)

assert.Equal(t, options.LeaseDuration.Seconds(), float64(1))
assert.Equal(t, options.RenewDeadline.Seconds(), float64(2))
assert.Equal(t, options.RetryPeriod.Seconds(), float64(3))
})
}
10 changes: 10 additions & 0 deletions config/manager/default/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,16 @@ spec:
value: "1"
- name: PPROF_BIND_ADDRESS
value: "0"
- name: PGO_CONTROLLER_LEADER_ELECTION_ENABLED
value: "true"
- name: PGO_CONTROLLER_LEASE_NAME
value: ""
- name: PGO_CONTROLLER_LEASE_DURATION
value: "60s"
- name: PGO_CONTROLLER_RENEW_DEADLINE
value: "40s"
- name: PGO_CONTROLLER_RETRY_PERIOD
value: "10s"
ports:
- containerPort: 8080
name: metrics
Expand Down
10 changes: 10 additions & 0 deletions deploy/bundle.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54215,6 +54215,16 @@ spec:
value: "1"
- name: PPROF_BIND_ADDRESS
value: "0"
- name: PGO_CONTROLLER_LEADER_ELECTION_ENABLED
value: "true"
- name: PGO_CONTROLLER_LEASE_NAME
value: ""
- name: PGO_CONTROLLER_LEASE_DURATION
value: 60s
- name: PGO_CONTROLLER_RENEW_DEADLINE
value: 40s
- name: PGO_CONTROLLER_RETRY_PERIOD
value: 10s
image: docker.io/perconalab/percona-postgresql-operator:main
imagePullPolicy: Always
livenessProbe:
Expand Down
10 changes: 10 additions & 0 deletions deploy/cw-bundle.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54213,6 +54213,16 @@ spec:
value: "1"
- name: PPROF_BIND_ADDRESS
value: "0"
- name: PGO_CONTROLLER_LEADER_ELECTION_ENABLED
value: "true"
- name: PGO_CONTROLLER_LEASE_NAME
value: ""
- name: PGO_CONTROLLER_LEASE_DURATION
value: 60s
- name: PGO_CONTROLLER_RENEW_DEADLINE
value: 40s
- name: PGO_CONTROLLER_RETRY_PERIOD
value: 10s
image: docker.io/perconalab/percona-postgresql-operator:main
imagePullPolicy: Always
livenessProbe:
Expand Down
10 changes: 10 additions & 0 deletions deploy/cw-operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,16 @@ spec:
value: "1"
- name: PPROF_BIND_ADDRESS
value: "0"
- name: PGO_CONTROLLER_LEADER_ELECTION_ENABLED
value: "true"
- name: PGO_CONTROLLER_LEASE_NAME
value: ""
- name: PGO_CONTROLLER_LEASE_DURATION
value: 60s
- name: PGO_CONTROLLER_RENEW_DEADLINE
value: 40s
- name: PGO_CONTROLLER_RETRY_PERIOD
value: 10s
image: docker.io/perconalab/percona-postgresql-operator:main
imagePullPolicy: Always
livenessProbe:
Expand Down
10 changes: 10 additions & 0 deletions deploy/operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,16 @@ spec:
value: "1"
- name: PPROF_BIND_ADDRESS
value: "0"
- name: PGO_CONTROLLER_LEADER_ELECTION_ENABLED
value: "true"
- name: PGO_CONTROLLER_LEASE_NAME
value: ""
- name: PGO_CONTROLLER_LEASE_DURATION
value: 60s
- name: PGO_CONTROLLER_RENEW_DEADLINE
value: 40s
- name: PGO_CONTROLLER_RETRY_PERIOD
value: 10s
image: docker.io/perconalab/percona-postgresql-operator:main
imagePullPolicy: Always
livenessProbe:
Expand Down
Loading
Loading