Skip to content

Commit 60c6270

Browse files
committed
refactor(core): add logging and improve health check API
- Remove verbose argument, add klog progress logging - Use CoreV1 clientset directly for cleaner code - Extract prompt initialization to initHealthChecks() Signed-off-by: Rohit Patil <ropatil@redhat.com>
1 parent c3f3217 commit 60c6270

File tree

3 files changed

+102
-99
lines changed

3 files changed

+102
-99
lines changed

pkg/toolsets/core/health_check.go

Lines changed: 96 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -6,40 +6,42 @@ import (
66
"time"
77

88
v1 "k8s.io/api/core/v1"
9+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
910
"k8s.io/apimachinery/pkg/runtime/schema"
11+
"k8s.io/klog/v2"
1012

1113
"github.com/containers/kubernetes-mcp-server/pkg/api"
1214
"github.com/containers/kubernetes-mcp-server/pkg/kubernetes"
1315
)
1416

1517
// clusterHealthCheckHandler implements the cluster health check prompt
1618
func clusterHealthCheckHandler(params api.PromptHandlerParams) (*api.PromptCallResult, error) {
19+
// Parse arguments (GetArguments returns map[string]string for prompts)
1720
args := params.GetArguments()
18-
19-
// Parse arguments
2021
namespace := args["namespace"]
21-
verbose := args["verbose"] == "true"
2222
checkEvents := args["check_events"] != "false" // default true
2323

24+
klog.Info("Starting cluster health check...")
25+
2426
// Check if namespace exists if specified
2527
namespaceWarning := ""
2628
requestedNamespace := namespace
2729
if namespace != "" {
28-
nsGVK := &schema.GroupVersionKind{
29-
Group: "",
30-
Version: "v1",
31-
Kind: "Namespace",
32-
}
33-
_, err := kubernetes.NewCore(params).ResourcesGet(params, nsGVK, "", namespace)
30+
_, err := params.CoreV1().Namespaces().Get(params.Context, namespace, metav1.GetOptions{})
3431
if err != nil {
3532
// Namespace doesn't exist - show warning and proceed with cluster-wide check
3633
namespaceWarning = fmt.Sprintf("Namespace '%s' not found or not accessible. Showing cluster-wide information instead.", namespace)
3734
namespace = "" // Fall back to cluster-wide check
35+
klog.Warningf("Namespace '%s' not found, performing cluster-wide health check", requestedNamespace)
36+
} else {
37+
klog.Infof("Performing health check for namespace: %s", namespace)
3838
}
39+
} else {
40+
klog.Info("Performing cluster-wide health check")
3941
}
4042

4143
// Gather cluster diagnostics using the KubernetesClient interface
42-
diagnostics, err := gatherClusterDiagnostics(params, namespace, verbose, checkEvents)
44+
diagnostics, err := gatherClusterDiagnostics(params, namespace, checkEvents)
4345
if err != nil {
4446
return nil, fmt.Errorf("failed to gather cluster diagnostics: %w", err)
4547
}
@@ -94,134 +96,144 @@ type clusterDiagnostics struct {
9496
}
9597

9698
// gatherClusterDiagnostics collects comprehensive diagnostic data from the cluster
97-
func gatherClusterDiagnostics(params api.PromptHandlerParams, namespace string, verbose bool, checkEvents bool) (*clusterDiagnostics, error) {
99+
func gatherClusterDiagnostics(params api.PromptHandlerParams, namespace string, checkEvents bool) (*clusterDiagnostics, error) {
98100
diag := &clusterDiagnostics{
99101
CollectionTime: time.Now(),
100102
NamespaceScoped: namespace != "",
101103
TargetNamespace: namespace,
102104
}
103105

104106
// Gather node diagnostics using ResourcesList
107+
klog.Info("Collecting node diagnostics...")
105108
nodeDiag, err := gatherNodeDiagnostics(params)
106109
if err == nil {
107110
diag.Nodes = nodeDiag
111+
klog.Info("Node diagnostics collected")
112+
} else {
113+
klog.Warningf("Failed to collect node diagnostics: %v", err)
108114
}
109115

110116
// Gather pod diagnostics
117+
klog.Info("Collecting pod diagnostics...")
111118
podDiag, err := gatherPodDiagnostics(params, namespace)
112119
if err == nil {
113120
diag.Pods = podDiag
121+
klog.Info("Pod diagnostics collected")
122+
} else {
123+
klog.Warningf("Failed to collect pod diagnostics: %v", err)
114124
}
115125

116126
// Gather workload diagnostics
127+
klog.Info("Collecting deployment diagnostics...")
117128
deployDiag, err := gatherWorkloadDiagnostics(params, "Deployment", namespace)
118129
if err == nil {
119130
diag.Deployments = deployDiag
131+
klog.Info("Deployment diagnostics collected")
132+
} else {
133+
klog.Warningf("Failed to collect deployment diagnostics: %v", err)
120134
}
121135

136+
klog.Info("Collecting statefulset diagnostics...")
122137
stsDiag, err := gatherWorkloadDiagnostics(params, "StatefulSet", namespace)
123138
if err == nil {
124139
diag.StatefulSets = stsDiag
140+
klog.Info("StatefulSet diagnostics collected")
141+
} else {
142+
klog.Warningf("Failed to collect statefulset diagnostics: %v", err)
125143
}
126144

145+
klog.Info("Collecting daemonset diagnostics...")
127146
dsDiag, err := gatherWorkloadDiagnostics(params, "DaemonSet", namespace)
128147
if err == nil {
129148
diag.DaemonSets = dsDiag
149+
klog.Info("DaemonSet diagnostics collected")
150+
} else {
151+
klog.Warningf("Failed to collect daemonset diagnostics: %v", err)
130152
}
131153

132154
// Gather PVC diagnostics
155+
klog.Info("Collecting PVC diagnostics...")
133156
pvcDiag, err := gatherPVCDiagnostics(params, namespace)
134157
if err == nil {
135158
diag.PVCs = pvcDiag
159+
klog.Info("PVC diagnostics collected")
160+
} else {
161+
klog.Warningf("Failed to collect PVC diagnostics: %v", err)
136162
}
137163

138164
// Gather cluster operator diagnostics (OpenShift only)
165+
klog.Info("Checking for cluster operators (OpenShift)...")
139166
operatorDiag, err := gatherClusterOperatorDiagnostics(params)
140167
if err == nil {
141168
diag.ClusterOperators = operatorDiag
169+
klog.Info("Cluster operator diagnostics collected")
142170
}
143171

144172
// Gather recent events if requested
145173
if checkEvents {
174+
klog.Info("Collecting recent events...")
146175
eventDiag, err := gatherEventDiagnostics(params, namespace)
147176
if err == nil {
148177
diag.Events = eventDiag
178+
klog.Info("Event diagnostics collected")
179+
} else {
180+
klog.Warningf("Failed to collect event diagnostics: %v", err)
149181
}
150182
}
151183

152184
// Count namespaces
185+
klog.Info("Counting namespaces...")
153186
namespaceList, err := kubernetes.NewCore(params).NamespacesList(params, api.ListOptions{})
154187
if err == nil {
155188
if items, ok := namespaceList.UnstructuredContent()["items"].([]interface{}); ok {
156189
diag.TotalNamespaces = len(items)
190+
klog.Infof("Found %d namespaces", diag.TotalNamespaces)
157191
}
158192
}
159193

194+
klog.Info("Cluster health check data collection completed")
160195
return diag, nil
161196
}
162197

163-
// gatherNodeDiagnostics collects node status using ResourcesList
198+
// gatherNodeDiagnostics collects node status using CoreV1 clientset
164199
func gatherNodeDiagnostics(params api.PromptHandlerParams) (string, error) {
165-
gvk := &schema.GroupVersionKind{
166-
Group: "",
167-
Version: "v1",
168-
Kind: "Node",
169-
}
170-
171-
nodeList, err := kubernetes.NewCore(params).ResourcesList(params, gvk, "", api.ListOptions{})
200+
nodeList, err := params.CoreV1().Nodes().List(params.Context, metav1.ListOptions{})
172201
if err != nil {
173202
return "", err
174203
}
175204

176-
items, ok := nodeList.UnstructuredContent()["items"].([]interface{})
177-
if !ok || len(items) == 0 {
205+
if len(nodeList.Items) == 0 {
178206
return "No nodes found", nil
179207
}
180208

181209
var sb strings.Builder
182-
totalNodes := len(items)
210+
totalNodes := len(nodeList.Items)
183211
healthyNodes := 0
184212
nodesWithIssues := []string{}
185213

186-
for _, item := range items {
187-
nodeMap, ok := item.(map[string]interface{})
188-
if !ok {
189-
continue
190-
}
191-
192-
metadata, _ := nodeMap["metadata"].(map[string]interface{})
193-
name, _ := metadata["name"].(string)
194-
195-
status, _ := nodeMap["status"].(map[string]interface{})
196-
conditions, _ := status["conditions"].([]interface{})
197-
214+
for _, node := range nodeList.Items {
198215
nodeStatus := "Unknown"
199216
issues := []string{}
200217

201218
// Parse node conditions
202-
for _, cond := range conditions {
203-
condMap, _ := cond.(map[string]interface{})
204-
condType, _ := condMap["type"].(string)
205-
condStatus, _ := condMap["status"].(string)
206-
message, _ := condMap["message"].(string)
207-
208-
if condType == "Ready" {
209-
if condStatus == "True" {
219+
for _, cond := range node.Status.Conditions {
220+
if cond.Type == v1.NodeReady {
221+
if cond.Status == v1.ConditionTrue {
210222
nodeStatus = "Ready"
211223
healthyNodes++
212224
} else {
213225
nodeStatus = "NotReady"
214-
issues = append(issues, fmt.Sprintf("Not ready: %s", message))
226+
issues = append(issues, fmt.Sprintf("Not ready: %s", cond.Message))
215227
}
216-
} else if condStatus == "True" && condType != "Ready" {
228+
} else if cond.Status == v1.ConditionTrue {
217229
// Pressure conditions
218-
issues = append(issues, fmt.Sprintf("%s: %s", condType, message))
230+
issues = append(issues, fmt.Sprintf("%s: %s", cond.Type, cond.Message))
219231
}
220232
}
221233

222234
// Only report nodes with issues
223235
if len(issues) > 0 {
224-
nodesWithIssues = append(nodesWithIssues, fmt.Sprintf("- **%s** (Status: %s)\n%s", name, nodeStatus, " - "+strings.Join(issues, "\n - ")))
236+
nodesWithIssues = append(nodesWithIssues, fmt.Sprintf("- **%s** (Status: %s)\n%s", node.Name, nodeStatus, " - "+strings.Join(issues, "\n - ")))
225237
}
226238
}
227239

@@ -571,46 +583,40 @@ func gatherEventDiagnostics(params api.PromptHandlerParams, namespace string) (s
571583
recentEvents := []string{}
572584

573585
for _, ns := range namespaces {
574-
eventMaps, err := kubernetes.NewCore(params).EventsList(params, ns)
586+
eventList, err := params.CoreV1().Events(ns).List(params.Context, metav1.ListOptions{})
575587
if err != nil {
576588
continue
577589
}
578590

579-
for _, eventMap := range eventMaps {
580-
eventType, _ := eventMap["type"].(string)
581-
591+
for _, event := range eventList.Items {
582592
// Only include Warning and Error events
583-
if eventType != string(v1.EventTypeWarning) && eventType != "Error" {
593+
if event.Type != string(v1.EventTypeWarning) && event.Type != "Error" {
584594
continue
585595
}
586596

587-
lastSeen, _ := eventMap["lastTimestamp"].(string)
588-
lastSeenTime, err := time.Parse(time.RFC3339, lastSeen)
589-
if err != nil || lastSeenTime.Before(oneHourAgo) {
597+
// Check timestamp
598+
lastSeenTime := event.LastTimestamp.Time
599+
if lastSeenTime.IsZero() {
600+
lastSeenTime = event.EventTime.Time
601+
}
602+
if lastSeenTime.Before(oneHourAgo) {
590603
continue
591604
}
592605

593-
reason, _ := eventMap["reason"].(string)
594-
message, _ := eventMap["message"].(string)
595-
count, _ := eventMap["count"].(int32)
596-
597-
involvedObject, _ := eventMap["involvedObject"].(map[string]interface{})
598-
objectKind, _ := involvedObject["kind"].(string)
599-
objectName, _ := involvedObject["name"].(string)
600-
601-
if eventType == string(v1.EventTypeWarning) {
606+
if event.Type == string(v1.EventTypeWarning) {
602607
totalWarnings++
603608
} else {
604609
totalErrors++
605610
}
606611

607612
// Limit message length
613+
message := event.Message
608614
if len(message) > 150 {
609615
message = message[:150] + "..."
610616
}
611617

612618
recentEvents = append(recentEvents, fmt.Sprintf("- **%s/%s** in `%s` (%s, Count: %d)\n - %s",
613-
objectKind, objectName, ns, reason, count, message))
619+
event.InvolvedObject.Kind, event.InvolvedObject.Name, ns, event.Reason, event.Count, message))
614620
}
615621
}
616622

@@ -716,3 +722,29 @@ func formatHealthCheckPrompt(diag *clusterDiagnostics) string {
716722

717723
return sb.String()
718724
}
725+
726+
// initHealthChecks initializes the cluster health check prompts
727+
func initHealthChecks() []api.ServerPrompt {
728+
return []api.ServerPrompt{
729+
{
730+
Prompt: api.Prompt{
731+
Name: "cluster-health-check",
732+
Title: "Cluster Health Check",
733+
Description: "Perform comprehensive health assessment of Kubernetes/OpenShift cluster",
734+
Arguments: []api.PromptArgument{
735+
{
736+
Name: "namespace",
737+
Description: "Optional namespace to limit health check scope (default: all namespaces)",
738+
Required: false,
739+
},
740+
{
741+
Name: "check_events",
742+
Description: "Include recent warning/error events (true/false, default: true)",
743+
Required: false,
744+
},
745+
},
746+
},
747+
Handler: clusterHealthCheckHandler,
748+
},
749+
}
750+
}

pkg/toolsets/core/health_check_test.go

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,23 +33,18 @@ func (s *ClusterHealthCheckSuite) TestPromptIsRegistered() {
3333
s.Contains(prompt.Prompt.Description, "comprehensive health assessment")
3434

3535
// Verify arguments
36-
s.Require().Len(prompt.Prompt.Arguments, 3, "should have 3 arguments")
36+
s.Require().Len(prompt.Prompt.Arguments, 2, "should have 2 arguments")
3737

3838
// Check namespace argument
3939
s.Equal("namespace", prompt.Prompt.Arguments[0].Name)
4040
s.NotEmpty(prompt.Prompt.Arguments[0].Description)
4141
s.False(prompt.Prompt.Arguments[0].Required)
4242

43-
// Check verbose argument
44-
s.Equal("verbose", prompt.Prompt.Arguments[1].Name)
43+
// Check check_events argument
44+
s.Equal("check_events", prompt.Prompt.Arguments[1].Name)
4545
s.NotEmpty(prompt.Prompt.Arguments[1].Description)
4646
s.False(prompt.Prompt.Arguments[1].Required)
4747

48-
// Check check_events argument
49-
s.Equal("check_events", prompt.Prompt.Arguments[2].Name)
50-
s.NotEmpty(prompt.Prompt.Arguments[2].Description)
51-
s.False(prompt.Prompt.Arguments[2].Required)
52-
5348
// Verify handler is set
5449
s.NotNil(prompt.Handler, "handler should be set")
5550

pkg/toolsets/core/toolset.go

Lines changed: 3 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -30,33 +30,9 @@ func (t *Toolset) GetTools(o api.Openshift) []api.ServerTool {
3030
}
3131

3232
func (t *Toolset) GetPrompts() []api.ServerPrompt {
33-
return []api.ServerPrompt{
34-
{
35-
Prompt: api.Prompt{
36-
Name: "cluster-health-check",
37-
Title: "Cluster Health Check",
38-
Description: "Perform comprehensive health assessment of Kubernetes/OpenShift cluster",
39-
Arguments: []api.PromptArgument{
40-
{
41-
Name: "namespace",
42-
Description: "Optional namespace to limit health check scope (default: all namespaces)",
43-
Required: false,
44-
},
45-
{
46-
Name: "verbose",
47-
Description: "Enable detailed resource-level information (true/false, default: false)",
48-
Required: false,
49-
},
50-
{
51-
Name: "check_events",
52-
Description: "Include recent warning/error events (true/false, default: true)",
53-
Required: false,
54-
},
55-
},
56-
},
57-
Handler: clusterHealthCheckHandler,
58-
},
59-
}
33+
return slices.Concat(
34+
initHealthChecks(),
35+
)
6036
}
6137

6238
func init() {

0 commit comments

Comments
 (0)