Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@ pvt.yaml
# Zig build artifacts
tui/zig-out/
tui/.zig-cache/
tui/target/

vitui.md
75 changes: 71 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,83 @@ Pre-flight validation, cluster status, and lifecycle orchestration for Talos clu
go install github.com/OneNoted/pvt@latest
```

## TUI build

The interactive `pvt tui` dashboard now uses a Rust + Ratatui binary named `vitui`.

Build it from the repository root with:

```bash
cd tui
cargo build --release
```

`pvt tui` will look for `vitui` next to the `pvt` binary, then in `tui/target/release/`
and `tui/target/debug/` relative to the current working directory, the `pvt` binary
directory, and its parent directory. Set `PVT_VITUI_BIN` to use an explicit binary path.

If your system installs helper binaries outside standard locations, you can also override:

- `PVT_KUBECTL_BIN`
- `PVT_TALOSCTL_BIN`
- `PVT_CURL_BIN`

## Usage

```bash
pvt config init # generate starter config
pvt config validate # validate config syntax
pvt doctor # diagnose local config, helper tools, and API access
pvt status summary # per-node cluster overview
pvt drift # compare pvt.yaml with live Proxmox/Talos state
pvt plan remediate # print known remediation commands for drift
pvt validate vms # pre-flight VM checks
pvt validate vm <name> # check a single VM
pvt backups stale # list stale Proxmox backups
pvt node reboot <name> # plan a safe node reboot
pvt machineconfig diff --against <dir> # normalized Talos machine config diff
pvt bootstrap # apply machine configs + bootstrap etcd
pvt upgrade --image <img> # rolling Talos upgrade across all nodes
```

### Doctor, Drift, and Plans

`pvt doctor` checks config discovery, config parsing, helper binaries, Talos and
Kubernetes config files, and Proxmox API reachability. `pvt drift` uses the same
Go health snapshot engine as `pvt status summary` to surface VM, Talos, and
validation drift. `pvt plan remediate` prints known fix commands, but does not
apply them.

```bash
pvt doctor
pvt drift
pvt plan remediate
```

### Node Lifecycle

Node lifecycle commands are plan-first. `drain` and `reboot` can be run with
`--execute`; `add`, `replace`, and `remove` print the ordered operational plan
for review.

```bash
pvt node drain worker-1
pvt node reboot worker-1 --execute
pvt node replace old-worker --replacement new-worker
```

### Backups

The backups commands inspect Proxmox storage that supports backup content and
only include backups whose VMID matches a node in `pvt.yaml`. Pruning is a dry
run unless `--execute` is provided.

```bash
pvt backups list
pvt backups stale --older-than-days 30
pvt backups prune --older-than-days 30
```

### Bootstrap

Applies Talos machine configs and bootstraps etcd for a new cluster. Nodes must already be booted with the Talos ISO in maintenance mode.
Expand All @@ -51,6 +116,8 @@ Upgrades Talos on all nodes one at a time: workers first, then control plane nod

```bash
pvt upgrade --image ghcr.io/siderolabs/installer:v1.12.5
pvt upgrade preflight --image ghcr.io/siderolabs/installer:v1.12.5
pvt upgrade postflight --image ghcr.io/siderolabs/installer:v1.12.5
pvt upgrade --image <img> --dry-run # preview upgrade plan
pvt upgrade --image <img> --stage # stage upgrade, reboot later
pvt upgrade --image <img> --force # skip pre-flight health check
Expand All @@ -71,7 +138,7 @@ proxmox:
endpoint: "https://pve.local:8006"
token_id: "pvt@pam!automation"
token_secret: "${PVT_PVE_TOKEN}"
tls_verify: false
tls_verify: false # only for self-signed lab setups; prefer true

talos:
config_path: "~/talos/mycluster/talosconfig"
Expand Down Expand Up @@ -110,6 +177,6 @@ Findings include the corresponding `qm set` fix command.
- [x] Cluster status overview
- [x] Bootstrap orchestration
- [x] Rolling upgrades
- [ ] Node lifecycle management
- [ ] Drift detection
- [ ] TUI dashboard
- [x] Node lifecycle management
- [x] Drift detection
- [x] Rust Ratatui TUI dashboard
174 changes: 174 additions & 0 deletions cmd/backups.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
package cmd

import (
"fmt"
"os"
"time"

"github.com/spf13/cobra"

"github.com/OneNoted/pvt/internal/backups"
"github.com/OneNoted/pvt/internal/config"
"github.com/OneNoted/pvt/internal/proxmox"
"github.com/OneNoted/pvt/internal/ui"
)

var backupsOlderThanDays int
var backupsExecute bool

const maxBackupRetentionDays = 36500

var backupsCmd = &cobra.Command{
Use: "backups",
Short: "Inspect and manage Proxmox backup retention",
}

var backupsListCmd = &cobra.Command{
Use: "list",
Short: "List Proxmox backup entries",
RunE: runBackupsList,
}

var backupsStaleCmd = &cobra.Command{
Use: "stale",
Short: "List Proxmox backup entries older than the retention threshold",
RunE: runBackupsStale,
}

var backupsPruneCmd = &cobra.Command{
Use: "prune",
Short: "Prune stale Proxmox backups",
RunE: runBackupsPrune,
}

func init() {
rootCmd.AddCommand(backupsCmd)
backupsCmd.AddCommand(backupsListCmd)
backupsCmd.AddCommand(backupsStaleCmd)
backupsCmd.AddCommand(backupsPruneCmd)

for _, command := range []*cobra.Command{backupsStaleCmd, backupsPruneCmd} {
command.Flags().IntVar(&backupsOlderThanDays, "older-than-days", 30, "backup age threshold in days")
}
backupsPruneCmd.Flags().BoolVar(&backupsExecute, "execute", false, "delete stale backups instead of printing the plan")
}

func runBackupsList(cmd *cobra.Command, args []string) error {
_, cfg, err := loadConfig()
if err != nil {
return err
}
ctx, cancel := liveContext()
defer cancel()
entries, errs := backups.List(ctx, cfg)
printBackupErrors(errs)
printBackups(entries)
return nil
}

func runBackupsStale(cmd *cobra.Command, args []string) error {
_, cfg, err := loadConfig()
if err != nil {
return err
}
retention, err := backupRetention()
if err != nil {
return err
}
ctx, cancel := liveContext()
defer cancel()
entries, errs := backups.List(ctx, cfg)
printBackupErrors(errs)
printBackups(backups.Stale(entries, retention, time.Now()))
return nil
}

func runBackupsPrune(cmd *cobra.Command, args []string) error {
_, cfg, err := loadConfig()
if err != nil {
return err
}
retention, err := backupRetention()
if err != nil {
return err
}

ctx, cancel := liveContext()
defer cancel()
stale, errs := backups.List(ctx, cfg)
printBackupErrors(errs)
stale = backups.Stale(stale, retention, time.Now())
if len(stale) == 0 {
fmt.Println("No stale backups found.")
return nil
}
if !backupsExecute {
printBackups(stale)
fmt.Println("Dry run. Re-run with --execute to delete these backups.")
return nil
}

clients := proxmoxClientsByName(cfg)
for _, entry := range stale {
client := clients[entry.Cluster]
if client == nil {
return fmt.Errorf("%s: proxmox client unavailable", entry.Cluster)
}
fmt.Printf("Deleting %s on %s/%s\n", entry.VolID, entry.Node, entry.Storage)
if err := client.DeleteBackup(ctx, entry.BackupEntry); err != nil {
return err
}
}
return nil
}

func backupRetention() (time.Duration, error) {
if backupsOlderThanDays < 1 {
return 0, fmt.Errorf("--older-than-days must be at least 1")
}
if backupsOlderThanDays > maxBackupRetentionDays {
return 0, fmt.Errorf("--older-than-days must be at most %d", maxBackupRetentionDays)
}
return time.Duration(backupsOlderThanDays) * 24 * time.Hour, nil
}

func printBackups(entries []backups.Entry) {
tbl := ui.NewTable("Cluster", "Node", "Storage", "VMID", "Age", "Size", "VolID")
now := time.Now()
for _, entry := range entries {
ui.AddRow(tbl,
entry.Cluster,
entry.Node,
entry.Storage,
fmt.Sprintf("%d", entry.VMID),
fmt.Sprintf("%dd", backups.AgeDays(entry, now)),
fmt.Sprintf("%d", entry.Size),
entry.VolID,
)
}
tbl.Render(os.Stdout)
}

func printBackupErrors(errs []string) {
for _, err := range errs {
fmt.Fprintf(os.Stderr, "Warning: %s\n", err)
}
}

func proxmoxClientsByName(cfg *config.Config) map[string]*proxmox.Client {
pxByName := map[string]config.ProxmoxCluster{}
for _, cluster := range cfg.Proxmox.Clusters {
pxByName[cluster.Name] = cluster
}
out := map[string]*proxmox.Client{}
for _, cluster := range cfg.Clusters {
if out[cluster.Name] != nil {
continue
}
client, err := proxmox.NewClient(pxByName[cluster.ProxmoxCluster])
if err == nil {
out[cluster.Name] = client
}
}
return out
}
35 changes: 35 additions & 0 deletions cmd/backups_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package cmd

import "testing"

func TestBackupRetentionRejectsZeroAndNegativeValues(t *testing.T) {
prev := backupsOlderThanDays
t.Cleanup(func() { backupsOlderThanDays = prev })

for _, value := range []int{0, -1} {
backupsOlderThanDays = value
if _, err := backupRetention(); err == nil {
t.Fatalf("backupRetention() with %d days returned nil error", value)
}
}
}

func TestBackupRetentionRejectsUnreasonablyLargeValues(t *testing.T) {
prev := backupsOlderThanDays
t.Cleanup(func() { backupsOlderThanDays = prev })

backupsOlderThanDays = maxBackupRetentionDays + 1
if _, err := backupRetention(); err == nil {
t.Fatal("backupRetention() returned nil error for excessive retention days")
}
}

func TestBackupRetentionAcceptsPositiveValues(t *testing.T) {
prev := backupsOlderThanDays
t.Cleanup(func() { backupsOlderThanDays = prev })

backupsOlderThanDays = 1
if got, err := backupRetention(); err != nil || got == 0 {
t.Fatalf("backupRetention() = %s, %v; want positive duration", got, err)
}
}
43 changes: 43 additions & 0 deletions cmd/doctor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package cmd

import (
"fmt"
"os"

"github.com/spf13/cobra"

"github.com/OneNoted/pvt/internal/doctor"
"github.com/OneNoted/pvt/internal/ui"
)

var doctorCmd = &cobra.Command{
Use: "doctor",
Short: "Diagnose local pvt configuration and tool access",
RunE: runDoctor,
}

func init() {
rootCmd.AddCommand(doctorCmd)
}

func runDoctor(cmd *cobra.Command, args []string) error {
ctx, cancel := liveContext()
defer cancel()
checks := doctor.Run(ctx, cfgFile)

tbl := ui.NewTable("Severity", "Check", "Status", "Detail")
for _, check := range checks {
status := "OK"
if !check.OK {
status = "FAIL"
}
ui.AddRow(tbl, check.Severity.String(), check.Name, status, check.Detail)
}
tbl.Render(os.Stdout)
fmt.Println(doctor.Summary(checks))

if doctor.HasErrors(checks) {
return fmt.Errorf("doctor found error-level failures")
}
return nil
}
Loading
Loading