Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 29 additions & 10 deletions .github/workflows/code-lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,9 @@ on:

env:
# Common versions
GO_VERSION: '1.24.13'
GO_VERSION: "1.24.13"

jobs:

detect-noop:
runs-on: ubuntu-latest
outputs:
Expand Down Expand Up @@ -58,13 +57,33 @@ jobs:
contents: read

steps:
- name: Set up Go ${{ env.GO_VERSION }}
uses: actions/setup-go@v6
with:
go-version: ${{ env.GO_VERSION }}
- name: Set up Go ${{ env.GO_VERSION }}
uses: actions/setup-go@v6
with:
go-version: ${{ env.GO_VERSION }}

- name: Check out code into the Go module directory
uses: actions/checkout@v6.0.2
- name: Check out code into the Go module directory
uses: actions/checkout@v6.0.2

- name: golangci-lint
run: make lint

helm-lint:
name: "Helm Lint"
runs-on: ubuntu-latest
needs: detect-noop
if: needs.detect-noop.outputs.noop != 'true'

steps:
- name: Check out code
uses: actions/checkout@v6.0.2

- name: Set up Helm
uses: azure/setup-helm@v4
with:
version: v3.17.0

- name: golangci-lint
run: make lint
- name: Lint Helm charts
run: |
helm lint charts/hub-agent
helm lint charts/member-agent
8 changes: 8 additions & 0 deletions apis/cluster/v1beta1/membercluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,14 @@ type MemberClusterStatus struct {
// +optional
ResourceUsage ResourceUsage `json:"resourceUsage,omitempty"`

// Namespaces is a map of namespace names to their associated work names for namespaces
// that are managed by Fleet (i.e., have AppliedWork owner references when created).
// The key is the namespace name and the value is the work name from the AppliedWork owner reference.
// If the namespace does not have an AppliedWork owner reference, the value will be an empty string.
// This field is copied from the corresponding InternalMemberCluster object.
// +optional
Namespaces map[string]string `json:"namespaces,omitempty"`

// AgentStatus is an array of current observed status, each corresponding to one member agent running in the member cluster.
// +optional
AgentStatus []AgentStatus `json:"agentStatus,omitempty"`
Expand Down
7 changes: 7 additions & 0 deletions apis/cluster/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 18 additions & 16 deletions cmd/hubagent/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,29 +103,31 @@ func main() {
ctrl.SetLogger(zap.New(zap.UseDevMode(true)))

config := ctrl.GetConfigOrDie()
config.QPS, config.Burst = float32(opts.HubQPS), opts.HubBurst
config.QPS, config.Burst = float32(opts.CtrlMgrOpts.HubQPS), opts.CtrlMgrOpts.HubBurst

mgrOpts := ctrl.Options{
Scheme: scheme,
Cache: cache.Options{
SyncPeriod: &opts.ResyncPeriod.Duration,
SyncPeriod: &opts.CtrlMgrOpts.ResyncPeriod.Duration,
DefaultTransform: cache.TransformStripManagedFields(),
},
LeaderElection: opts.LeaderElection.LeaderElect,
LeaderElectionID: opts.LeaderElection.ResourceName,
LeaderElectionNamespace: opts.LeaderElection.ResourceNamespace,
LeaderElectionResourceLock: opts.LeaderElection.ResourceLock,
HealthProbeBindAddress: opts.HealthProbeAddress,
LeaderElection: opts.LeaderElectionOpts.LeaderElect,
LeaderElectionID: "136224848560.hub.fleet.azure.com",
LeaderElectionNamespace: opts.LeaderElectionOpts.ResourceNamespace,
LeaseDuration: &opts.LeaderElectionOpts.LeaseDuration.Duration,
RenewDeadline: &opts.LeaderElectionOpts.RenewDeadline.Duration,
RetryPeriod: &opts.LeaderElectionOpts.RetryPeriod.Duration,
HealthProbeBindAddress: opts.CtrlMgrOpts.HealthProbeBindAddress,
Metrics: metricsserver.Options{
BindAddress: opts.MetricsBindAddress,
BindAddress: opts.CtrlMgrOpts.MetricsBindAddress,
},
WebhookServer: ctrlwebhook.NewServer(ctrlwebhook.Options{
Port: FleetWebhookPort,
CertDir: webhook.FleetWebhookCertDir,
}),
}
if opts.EnablePprof {
mgrOpts.PprofBindAddress = fmt.Sprintf(":%d", opts.PprofPort)
if opts.CtrlMgrOpts.EnablePprof {
mgrOpts.PprofBindAddress = fmt.Sprintf(":%d", opts.CtrlMgrOpts.PprofPort)
}
mgr, err := ctrl.NewManager(config, mgrOpts)
if err != nil {
Expand All @@ -134,13 +136,13 @@ func main() {
}

klog.V(2).InfoS("starting hubagent")
if opts.EnableV1Beta1APIs {
if opts.FeatureFlags.EnableV1Beta1APIs {
klog.Info("Setting up memberCluster v1beta1 controller")
if err = (&mcv1beta1.Reconciler{
Client: mgr.GetClient(),
NetworkingAgentsEnabled: opts.NetworkingAgentsEnabled,
MaxConcurrentReconciles: int(math.Ceil(float64(opts.MaxFleetSizeSupported) / 100)), //one member cluster reconciler routine per 100 member clusters
ForceDeleteWaitTime: opts.ForceDeleteWaitTime.Duration,
NetworkingAgentsEnabled: opts.ClusterMgmtOpts.NetworkingAgentsEnabled,
MaxConcurrentReconciles: int(math.Ceil(float64(opts.PlacementMgmtOpts.MaxFleetSize) / 100)), //one member cluster reconciler routine per 100 member clusters
ForceDeleteWaitTime: opts.ClusterMgmtOpts.ForceDeleteWaitTime.Duration,
}).SetupWithManager(mgr, "membercluster-controller"); err != nil {
klog.ErrorS(err, "unable to create v1beta1 controller", "controller", "MemberCluster")
exitWithErrorFunc()
Expand All @@ -156,7 +158,7 @@ func main() {
exitWithErrorFunc()
}

if opts.EnableWebhook {
if opts.WebhookOpts.EnableWebhooks {
// Generate webhook configuration with certificates
webhookConfig, err := webhook.NewWebhookConfigFromOptions(mgr, opts, FleetWebhookPort)
if err != nil {
Expand All @@ -173,7 +175,7 @@ func main() {
// When using cert-manager, add a readiness check to ensure CA bundles are injected before marking ready.
// This prevents the pod from accepting traffic before cert-manager has populated the webhook CA bundles,
// which would cause webhook calls to fail.
if opts.UseCertManager {
if opts.WebhookOpts.UseCertManager {
if err := mgr.AddReadyzCheck("cert-manager-ca-injection", func(req *http.Request) error {
return webhookConfig.CheckCAInjection(req.Context())
}); err != nil {
Expand Down
111 changes: 111 additions & 0 deletions cmd/hubagent/options/clustermgmt.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/*
Copyright 2025 The KubeFleet Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package options

import (
"flag"
"fmt"
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// ClusterManagementOptions is a set of options the KubeFleet hub agent exposes for
// managing member clusters.
type ClusterManagementOptions struct {
// Expect that Fleet networking agents have been installed in the fleet or not. If set to true,
// the hub agent will start to expect heartbeats from the networking agents on the member cluster related
// resources.
NetworkingAgentsEnabled bool

// The duration the KubeFleet hub agent will wait for new heartbeats before marking a member cluster as unhealthy.
UnhealthyThreshold metav1.Duration

// The duration the KubeFleet hub agent will wait before force-deleting a member cluster resource after it has been
// marked for deletion.
ForceDeleteWaitTime metav1.Duration
}

// AddFlags adds flags for ClusterManagementOptions to the specified FlagSet.
func (o *ClusterManagementOptions) AddFlags(flags *flag.FlagSet) {
flags.BoolVar(
&o.NetworkingAgentsEnabled,
"networking-agents-enabled",
false,
"Expect that Fleet networking agents have been installed in the fleet or not. If set to true, the hub agent will start to expect heartbeats from the networking agents on the member cluster related resources.",
)

flags.Var(
newClusterUnhealthyThresholdValueWithValidation(60*time.Second, &o.UnhealthyThreshold),
"cluster-unhealthy-threshold",
"The duration the KubeFleet hub agent will wait for new heartbeats before marking a member cluster as unhealthy. Defaults to 60 seconds. Must be a duration in the range [30s, 1h].",
)

flags.Var(
newForceDeleteWaitTimeValueWithValidation(15*time.Minute, &o.ForceDeleteWaitTime),
"force-delete-wait-time",
"The duration the KubeFleet hub agent will wait before force-deleting a member cluster resource after it has been marked for deletion. Defaults to 15 minutes. Must be a duration in the range [30s, 1h].",
)
}

// A list of flag variables that allow pluggable validation logic when parsing the input args.

type ClusterUnhealthyThresholdValueWithValidation metav1.Duration

func (v *ClusterUnhealthyThresholdValueWithValidation) String() string {
return v.Duration.String()
}

func (v *ClusterUnhealthyThresholdValueWithValidation) Set(s string) error {
duration, err := time.ParseDuration(s)
if err != nil {
return fmt.Errorf("failed to parse duration: %w", err)
}
if duration < 30*time.Second || duration > time.Hour {
return fmt.Errorf("duration must be in the range [30s, 1h]")
}
v.Duration = duration
return nil
}

func newClusterUnhealthyThresholdValueWithValidation(defaultVal time.Duration, p *metav1.Duration) *ClusterUnhealthyThresholdValueWithValidation {
p.Duration = defaultVal
return (*ClusterUnhealthyThresholdValueWithValidation)(p)
}

type ForceDeleteWaitTimeValueWithValidation metav1.Duration

func (v *ForceDeleteWaitTimeValueWithValidation) String() string {
return v.Duration.String()
}

func (v *ForceDeleteWaitTimeValueWithValidation) Set(s string) error {
duration, err := time.ParseDuration(s)
if err != nil {
return fmt.Errorf("failed to parse duration: %w", err)
}
if duration < 30*time.Second || duration > time.Hour {
return fmt.Errorf("duration must be in the range [30s, 1h]")
}
v.Duration = duration
return nil
}

func newForceDeleteWaitTimeValueWithValidation(defaultVal time.Duration, p *metav1.Duration) *ForceDeleteWaitTimeValueWithValidation {
p.Duration = defaultVal
return (*ForceDeleteWaitTimeValueWithValidation)(p)
}
Loading
Loading