@@ -6,40 +6,42 @@ import (
66 "time"
77
88 v1 "k8s.io/api/core/v1"
9+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
910 "k8s.io/apimachinery/pkg/runtime/schema"
11+ "k8s.io/klog/v2"
1012
1113 "github.com/containers/kubernetes-mcp-server/pkg/api"
1214 "github.com/containers/kubernetes-mcp-server/pkg/kubernetes"
1315)
1416
1517// clusterHealthCheckHandler implements the cluster health check prompt
1618func clusterHealthCheckHandler (params api.PromptHandlerParams ) (* api.PromptCallResult , error ) {
19+ // Parse arguments (GetArguments returns map[string]string for prompts)
1720 args := params .GetArguments ()
18-
19- // Parse arguments
2021 namespace := args ["namespace" ]
21- verbose := args ["verbose" ] == "true"
2222 checkEvents := args ["check_events" ] != "false" // default true
2323
24+ klog .Info ("Starting cluster health check..." )
25+
2426 // Check if namespace exists if specified
2527 namespaceWarning := ""
2628 requestedNamespace := namespace
2729 if namespace != "" {
28- nsGVK := & schema.GroupVersionKind {
29- Group : "" ,
30- Version : "v1" ,
31- Kind : "Namespace" ,
32- }
33- _ , err := kubernetes .NewCore (params ).ResourcesGet (params , nsGVK , "" , namespace )
30+ _ , err := params .CoreV1 ().Namespaces ().Get (params .Context , namespace , metav1.GetOptions {})
3431 if err != nil {
3532 // Namespace doesn't exist - show warning and proceed with cluster-wide check
3633 namespaceWarning = fmt .Sprintf ("Namespace '%s' not found or not accessible. Showing cluster-wide information instead." , namespace )
3734 namespace = "" // Fall back to cluster-wide check
35+ klog .Warningf ("Namespace '%s' not found, performing cluster-wide health check" , requestedNamespace )
36+ } else {
37+ klog .Infof ("Performing health check for namespace: %s" , namespace )
3838 }
39+ } else {
40+ klog .Info ("Performing cluster-wide health check" )
3941 }
4042
4143 // Gather cluster diagnostics using the KubernetesClient interface
42- diagnostics , err := gatherClusterDiagnostics (params , namespace , verbose , checkEvents )
44+ diagnostics , err := gatherClusterDiagnostics (params , namespace , checkEvents )
4345 if err != nil {
4446 return nil , fmt .Errorf ("failed to gather cluster diagnostics: %w" , err )
4547 }
@@ -94,134 +96,144 @@ type clusterDiagnostics struct {
9496}
9597
9698// gatherClusterDiagnostics collects comprehensive diagnostic data from the cluster
97- func gatherClusterDiagnostics (params api.PromptHandlerParams , namespace string , verbose bool , checkEvents bool ) (* clusterDiagnostics , error ) {
99+ func gatherClusterDiagnostics (params api.PromptHandlerParams , namespace string , checkEvents bool ) (* clusterDiagnostics , error ) {
98100 diag := & clusterDiagnostics {
99101 CollectionTime : time .Now (),
100102 NamespaceScoped : namespace != "" ,
101103 TargetNamespace : namespace ,
102104 }
103105
104106 // Gather node diagnostics using ResourcesList
107+ klog .Info ("Collecting node diagnostics..." )
105108 nodeDiag , err := gatherNodeDiagnostics (params )
106109 if err == nil {
107110 diag .Nodes = nodeDiag
111+ klog .Info ("Node diagnostics collected" )
112+ } else {
113+ klog .Warningf ("Failed to collect node diagnostics: %v" , err )
108114 }
109115
110116 // Gather pod diagnostics
117+ klog .Info ("Collecting pod diagnostics..." )
111118 podDiag , err := gatherPodDiagnostics (params , namespace )
112119 if err == nil {
113120 diag .Pods = podDiag
121+ klog .Info ("Pod diagnostics collected" )
122+ } else {
123+ klog .Warningf ("Failed to collect pod diagnostics: %v" , err )
114124 }
115125
116126 // Gather workload diagnostics
127+ klog .Info ("Collecting deployment diagnostics..." )
117128 deployDiag , err := gatherWorkloadDiagnostics (params , "Deployment" , namespace )
118129 if err == nil {
119130 diag .Deployments = deployDiag
131+ klog .Info ("Deployment diagnostics collected" )
132+ } else {
133+ klog .Warningf ("Failed to collect deployment diagnostics: %v" , err )
120134 }
121135
136+ klog .Info ("Collecting statefulset diagnostics..." )
122137 stsDiag , err := gatherWorkloadDiagnostics (params , "StatefulSet" , namespace )
123138 if err == nil {
124139 diag .StatefulSets = stsDiag
140+ klog .Info ("StatefulSet diagnostics collected" )
141+ } else {
142+ klog .Warningf ("Failed to collect statefulset diagnostics: %v" , err )
125143 }
126144
145+ klog .Info ("Collecting daemonset diagnostics..." )
127146 dsDiag , err := gatherWorkloadDiagnostics (params , "DaemonSet" , namespace )
128147 if err == nil {
129148 diag .DaemonSets = dsDiag
149+ klog .Info ("DaemonSet diagnostics collected" )
150+ } else {
151+ klog .Warningf ("Failed to collect daemonset diagnostics: %v" , err )
130152 }
131153
132154 // Gather PVC diagnostics
155+ klog .Info ("Collecting PVC diagnostics..." )
133156 pvcDiag , err := gatherPVCDiagnostics (params , namespace )
134157 if err == nil {
135158 diag .PVCs = pvcDiag
159+ klog .Info ("PVC diagnostics collected" )
160+ } else {
161+ klog .Warningf ("Failed to collect PVC diagnostics: %v" , err )
136162 }
137163
138164 // Gather cluster operator diagnostics (OpenShift only)
165+ klog .Info ("Checking for cluster operators (OpenShift)..." )
139166 operatorDiag , err := gatherClusterOperatorDiagnostics (params )
140167 if err == nil {
141168 diag .ClusterOperators = operatorDiag
169+ klog .Info ("Cluster operator diagnostics collected" )
142170 }
143171
144172 // Gather recent events if requested
145173 if checkEvents {
174+ klog .Info ("Collecting recent events..." )
146175 eventDiag , err := gatherEventDiagnostics (params , namespace )
147176 if err == nil {
148177 diag .Events = eventDiag
178+ klog .Info ("Event diagnostics collected" )
179+ } else {
180+ klog .Warningf ("Failed to collect event diagnostics: %v" , err )
149181 }
150182 }
151183
152184 // Count namespaces
185+ klog .Info ("Counting namespaces..." )
153186 namespaceList , err := kubernetes .NewCore (params ).NamespacesList (params , api.ListOptions {})
154187 if err == nil {
155188 if items , ok := namespaceList .UnstructuredContent ()["items" ].([]interface {}); ok {
156189 diag .TotalNamespaces = len (items )
190+ klog .Infof ("Found %d namespaces" , diag .TotalNamespaces )
157191 }
158192 }
159193
194+ klog .Info ("Cluster health check data collection completed" )
160195 return diag , nil
161196}
162197
163- // gatherNodeDiagnostics collects node status using ResourcesList
198+ // gatherNodeDiagnostics collects node status using CoreV1 clientset
164199func gatherNodeDiagnostics (params api.PromptHandlerParams ) (string , error ) {
165- gvk := & schema.GroupVersionKind {
166- Group : "" ,
167- Version : "v1" ,
168- Kind : "Node" ,
169- }
170-
171- nodeList , err := kubernetes .NewCore (params ).ResourcesList (params , gvk , "" , api.ListOptions {})
200+ nodeList , err := params .CoreV1 ().Nodes ().List (params .Context , metav1.ListOptions {})
172201 if err != nil {
173202 return "" , err
174203 }
175204
176- items , ok := nodeList .UnstructuredContent ()["items" ].([]interface {})
177- if ! ok || len (items ) == 0 {
205+ if len (nodeList .Items ) == 0 {
178206 return "No nodes found" , nil
179207 }
180208
181209 var sb strings.Builder
182- totalNodes := len (items )
210+ totalNodes := len (nodeList . Items )
183211 healthyNodes := 0
184212 nodesWithIssues := []string {}
185213
186- for _ , item := range items {
187- nodeMap , ok := item .(map [string ]interface {})
188- if ! ok {
189- continue
190- }
191-
192- metadata , _ := nodeMap ["metadata" ].(map [string ]interface {})
193- name , _ := metadata ["name" ].(string )
194-
195- status , _ := nodeMap ["status" ].(map [string ]interface {})
196- conditions , _ := status ["conditions" ].([]interface {})
197-
214+ for _ , node := range nodeList .Items {
198215 nodeStatus := "Unknown"
199216 issues := []string {}
200217
201218 // Parse node conditions
202- for _ , cond := range conditions {
203- condMap , _ := cond .(map [string ]interface {})
204- condType , _ := condMap ["type" ].(string )
205- condStatus , _ := condMap ["status" ].(string )
206- message , _ := condMap ["message" ].(string )
207-
208- if condType == "Ready" {
209- if condStatus == "True" {
219+ for _ , cond := range node .Status .Conditions {
220+ if cond .Type == v1 .NodeReady {
221+ if cond .Status == v1 .ConditionTrue {
210222 nodeStatus = "Ready"
211223 healthyNodes ++
212224 } else {
213225 nodeStatus = "NotReady"
214- issues = append (issues , fmt .Sprintf ("Not ready: %s" , message ))
226+ issues = append (issues , fmt .Sprintf ("Not ready: %s" , cond . Message ))
215227 }
216- } else if condStatus == "True" && condType != "Ready" {
228+ } else if cond . Status == v1 . ConditionTrue {
217229 // Pressure conditions
218- issues = append (issues , fmt .Sprintf ("%s: %s" , condType , message ))
230+ issues = append (issues , fmt .Sprintf ("%s: %s" , cond . Type , cond . Message ))
219231 }
220232 }
221233
222234 // Only report nodes with issues
223235 if len (issues ) > 0 {
224- nodesWithIssues = append (nodesWithIssues , fmt .Sprintf ("- **%s** (Status: %s)\n %s" , name , nodeStatus , " - " + strings .Join (issues , "\n - " )))
236+ nodesWithIssues = append (nodesWithIssues , fmt .Sprintf ("- **%s** (Status: %s)\n %s" , node . Name , nodeStatus , " - " + strings .Join (issues , "\n - " )))
225237 }
226238 }
227239
@@ -571,46 +583,40 @@ func gatherEventDiagnostics(params api.PromptHandlerParams, namespace string) (s
571583 recentEvents := []string {}
572584
573585 for _ , ns := range namespaces {
574- eventMaps , err := kubernetes . NewCore ( params ). EventsList ( params , ns )
586+ eventList , err := params . CoreV1 (). Events ( ns ). List ( params . Context , metav1. ListOptions {} )
575587 if err != nil {
576588 continue
577589 }
578590
579- for _ , eventMap := range eventMaps {
580- eventType , _ := eventMap ["type" ].(string )
581-
591+ for _ , event := range eventList .Items {
582592 // Only include Warning and Error events
583- if eventType != string (v1 .EventTypeWarning ) && eventType != "Error" {
593+ if event . Type != string (v1 .EventTypeWarning ) && event . Type != "Error" {
584594 continue
585595 }
586596
587- lastSeen , _ := eventMap ["lastTimestamp" ].(string )
588- lastSeenTime , err := time .Parse (time .RFC3339 , lastSeen )
589- if err != nil || lastSeenTime .Before (oneHourAgo ) {
597+ // Check timestamp
598+ lastSeenTime := event .LastTimestamp .Time
599+ if lastSeenTime .IsZero () {
600+ lastSeenTime = event .EventTime .Time
601+ }
602+ if lastSeenTime .Before (oneHourAgo ) {
590603 continue
591604 }
592605
593- reason , _ := eventMap ["reason" ].(string )
594- message , _ := eventMap ["message" ].(string )
595- count , _ := eventMap ["count" ].(int32 )
596-
597- involvedObject , _ := eventMap ["involvedObject" ].(map [string ]interface {})
598- objectKind , _ := involvedObject ["kind" ].(string )
599- objectName , _ := involvedObject ["name" ].(string )
600-
601- if eventType == string (v1 .EventTypeWarning ) {
606+ if event .Type == string (v1 .EventTypeWarning ) {
602607 totalWarnings ++
603608 } else {
604609 totalErrors ++
605610 }
606611
607612 // Limit message length
613+ message := event .Message
608614 if len (message ) > 150 {
609615 message = message [:150 ] + "..."
610616 }
611617
612618 recentEvents = append (recentEvents , fmt .Sprintf ("- **%s/%s** in `%s` (%s, Count: %d)\n - %s" ,
613- objectKind , objectName , ns , reason , count , message ))
619+ event . InvolvedObject . Kind , event . InvolvedObject . Name , ns , event . Reason , event . Count , message ))
614620 }
615621 }
616622
@@ -716,3 +722,29 @@ func formatHealthCheckPrompt(diag *clusterDiagnostics) string {
716722
717723 return sb .String ()
718724}
725+
726+ // initHealthChecks initializes the cluster health check prompts
727+ func initHealthChecks () []api.ServerPrompt {
728+ return []api.ServerPrompt {
729+ {
730+ Prompt : api.Prompt {
731+ Name : "cluster-health-check" ,
732+ Title : "Cluster Health Check" ,
733+ Description : "Perform comprehensive health assessment of Kubernetes/OpenShift cluster" ,
734+ Arguments : []api.PromptArgument {
735+ {
736+ Name : "namespace" ,
737+ Description : "Optional namespace to limit health check scope (default: all namespaces)" ,
738+ Required : false ,
739+ },
740+ {
741+ Name : "check_events" ,
742+ Description : "Include recent warning/error events (true/false, default: true)" ,
743+ Required : false ,
744+ },
745+ },
746+ },
747+ Handler : clusterHealthCheckHandler ,
748+ },
749+ }
750+ }
0 commit comments