diff --git a/.github/workflows/host-status.yml b/.github/workflows/host-status.yml new file mode 100644 index 0000000..098aaeb --- /dev/null +++ b/.github/workflows/host-status.yml @@ -0,0 +1,83 @@ +name: host-status + +on: + push: + branches: [main] + paths: + - "modules/host-status/**" + - ".github/workflows/host-status.yml" + pull_request: + paths: + - "modules/host-status/**" + - ".github/workflows/host-status.yml" + +jobs: + test: + name: Test Go Module + runs-on: ubuntu-latest + defaults: + run: + working-directory: modules/host-status + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version-file: 'modules/host-status/go.mod' + cache-dependency-path: modules/host-status/go.sum + + - name: Download dependencies + run: go mod download + + - name: Run tests + run: go test -v -race -coverprofile=coverage.txt -covermode=atomic ./... + + - name: Upload coverage to artifact + uses: actions/upload-artifact@v4 + with: + name: coverage + path: modules/host-status/coverage.txt + retention-days: 7 + + lint: + name: Go Lint and Format Check + runs-on: ubuntu-latest + defaults: + run: + working-directory: modules/host-status + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version-file: 'modules/host-status/go.mod' + cache-dependency-path: modules/host-status/go.sum + + - name: Check formatting + run: | + if [ "$(gofmt -l . | wc -l)" -gt 0 ]; then + echo "The following files are not formatted:" + gofmt -l . + exit 1 + fi + + - name: Run go vet + run: go vet ./... + + shellcheck: + name: Validate Shell Scripts + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Run ShellCheck + uses: ludeeus/action-shellcheck@master + with: + scandir: 'modules/host-status/examples/providers' + severity: warning + additional_files: 'modules/host-status/install.sh' diff --git a/modules/host-status/.dockerignore b/modules/host-status/.dockerignore new file mode 100644 index 0000000..b7722ee --- /dev/null +++ b/modules/host-status/.dockerignore @@ -0,0 +1,16 @@ +# Binaries +host-status + +# Tests +*_test.go + +# Documentation +README.md +AGENTS.md + +# Config files +test-config.yaml + +# Development +.git +.gitignore diff --git a/modules/host-status/.gitignore b/modules/host-status/.gitignore new file mode 100644 index 0000000..aba51a4 --- /dev/null +++ b/modules/host-status/.gitignore @@ -0,0 +1,16 @@ +# Binaries +host-status + +# Test artifacts +test-config.yaml + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db diff --git a/modules/host-status/AGENTS.md b/modules/host-status/AGENTS.md new file mode 100644 index 0000000..4fc7b62 --- /dev/null +++ b/modules/host-status/AGENTS.md @@ -0,0 +1,135 @@ +# host-status — Agent Guidance + +## Architecture Overview + +The host-status module is designed around three core components: + +1. **Provider System**: Executes external programs to collect metrics +2. **Pull Model**: HTTP server for on-demand status queries +3. **Push Model**: Periodic scheduler that sends status to remote endpoints + +## Design Principles + +- **Simplicity**: Providers are just executables that output JSON +- **Flexibility**: Both pull and push can be enabled independently or together +- **Robustness**: Timeouts, retries, and error handling at every layer +- **Observability**: Comprehensive logging and metrics in responses + +## Provider Contract + +Providers MUST: +- Output valid JSON to stdout with fields: `status`, `metrics`, `message` +- Use status values: `ok`, `warn`, or `error` +- Exit with code 0 on success +- Complete within the configured timeout + +Providers MAY: +- Read environment variables for configuration +- Accept command-line arguments +- Write logs to stderr (captured separately) +- Return empty metrics object + +## Code Organization + +- `main.go`: Entry point, signal handling, graceful shutdown +- `config.go`: Configuration parsing and validation +- `provider.go`: Provider execution and registry +- `pusher.go`: Periodic scheduler for push model +- `internal/server`: HTTP server for pull model +- `internal/providers/host`: Built-in system metrics providers +- `examples/`: Example configuration files + +## Development Guidelines + +### Adding New Features + +1. Update configuration structs in `config.go` if needed +2. Add validation logic for new config fields +3. Update example config in `examples/config.yaml` +4. Document in `README.md` + +### Testing + +```bash +# Run tests +go test -v ./... + +# Test with module +go run . -config examples/config.toml +curl http://localhost:8080/status +``` + +### Error Handling + +- Provider failures should not crash the service +- Failed providers return error status, not panic +- Push failures are logged but don't stop the scheduler +- HTTP errors return appropriate status codes + +### Logging + +Use `log.Printf()` for important events: +- Configuration loading +- Server start/stop +- Provider execution errors +- Push success/failure +- Shutdown events + +Avoid verbose logging for normal operations. + +## Extending the Module + +### Adding Provider Types + +No code changes needed! Just create a new executable that follows the provider contract. + +### New Push Destinations + +The current HTTP POST implementation should work for most cases. For special protocols (e.g., MQTT, gRPC), consider: +1. Adding a `type` field to `PushDestination` +2. Implementing destination-specific clients +3. Maintaining backward compatibility + +### Authentication Methods + +Currently supports: +- Bearer tokens via `auth` field +- Custom headers via `headers` map + +For OAuth2 or other flows, consider: +- Adding auth configuration section +- Token refresh logic +- Credential management + +## Performance Considerations + +- Providers execute serially by design (predictable timing) +- Consider parallel execution for many providers (future enhancement) +- HTTP server handles requests concurrently +- Push scheduler runs in separate goroutine + +## Security Notes + +- Providers execute with service permissions (principle of least privilege) +- No shell expansion in command execution (security) +- Authentication tokens in config (consider vault integration) +- HTTP server has no built-in auth (use reverse proxy) + +## Future Enhancements + +Potential improvements: +- [ ] Parallel provider execution with semaphore +- [ ] Provider result caching +- [ ] Metrics persistence (time-series data) +- [ ] WebSocket support for real-time updates +- [ ] Built-in authentication for HTTP server +- [ ] gRPC support for push destinations +- [ ] Provider health checks and auto-disable +- [ ] Configuration hot-reload +- [ ] Prometheus metrics endpoint + +## References + +- Provider interface design inspired by Nagios/Icinga plugin API +- Push/pull patterns common in monitoring systems (Prometheus, Telegraf) +- Configuration format follows standard YAML conventions diff --git a/modules/host-status/Dockerfile b/modules/host-status/Dockerfile new file mode 100644 index 0000000..154a49c --- /dev/null +++ b/modules/host-status/Dockerfile @@ -0,0 +1,48 @@ +# Build stage +FROM golang:1.21-alpine AS builder + +WORKDIR /build + +# Copy go mod files +COPY go.mod go.sum ./ +RUN go mod download + +# Copy source +COPY *.go ./ + +# Build binary +RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o host-status . + +# Runtime stage +FROM alpine:latest + +# Install runtime dependencies +RUN apk --no-cache add ca-certificates bc bash coreutils + +# Create non-root user +RUN addgroup -g 1000 hoststatus && \ + adduser -D -u 1000 -G hoststatus hoststatus + +WORKDIR /app + +# Copy binary from builder +COPY --from=builder /build/host-status . + +# Copy examples +COPY examples/ ./examples/ +RUN chmod +x ./examples/providers/*.sh + +# Copy example config as default +COPY examples/config.yaml ./config.yaml + +# Set ownership +RUN chown -R hoststatus:hoststatus /app + +# Switch to non-root user +USER hoststatus + +# Expose default port +EXPOSE 8080 + +# Run the binary +CMD ["./host-status", "-config", "config.yaml"] diff --git a/modules/host-status/README.md b/modules/host-status/README.md new file mode 100644 index 0000000..508d628 --- /dev/null +++ b/modules/host-status/README.md @@ -0,0 +1,134 @@ +# host-status + +Host monitoring with pull and push status collection. + +## Quick Start + +```bash +# Build +go build + +# Run with config +./host-status -config config.toml + +# Query status +curl http://localhost:8080/status +``` + +## Configuration + +Example `config.toml`: + +```toml +[pull] +enabled = true +port = 8080 +host = "0.0.0.0" + +[push] +enabled = true +interval = "5m" + +[[push.destinations]] +url = "https://monitoring.example.com/api/status" +auth = "Bearer " + +[[providers]] +name = "cpu" +timeout = "10s" + +[[providers]] +name = "memory" +timeout = "10s" + +[[providers]] +name = "disk" +timeout = "10s" + +[[providers]] +name = "uptime" +timeout = "10s" +``` + +## Built-in Providers + +- **cpu**: CPU load and percentage +- **memory**: Memory usage statistics +- **disk**: Disk usage (default: /) +- **uptime**: System uptime + +## External Providers + +Run custom programs that output JSON: + +```toml +[[providers]] +name = "custom" +command = "/path/to/script.sh" +args = ["arg1", "arg2"] +timeout = "30s" +``` + +Expected JSON format: + +```json +{ + "status": "ok", + "metrics": { + "key": "value" + }, + "message": "Optional message" +} +``` + +Status values: `ok`, `warn`, `error` + +## API Endpoints + +### GET /status + +Returns aggregated status from all providers: + +```json +{ + "hostname": "server1", + "timestamp": "2024-03-30T10:00:00Z", + "overall": "ok", + "providers": [ + { + "name": "cpu", + "status": "ok", + "metrics": {...}, + "timestamp": "2024-03-30T10:00:00Z" + } + ] +} +``` + +### GET /health + +Health check endpoint: + +```json +{"status": "ok"} +``` + +## Deployment + +### systemd + +```bash +# Install service +sudo cp host-status /usr/local/bin/ +sudo cp config.toml /etc/host-status/ +sudo cp host-status.service /etc/systemd/system/ +sudo systemctl daemon-reload +sudo systemctl enable --now host-status +``` + +### Docker + +```bash +docker build -t host-status . +docker run -p 8080:8080 -v $(pwd)/config.toml:/etc/host-status/config.toml host-status +``` diff --git a/modules/host-status/config.go b/modules/host-status/config.go new file mode 100644 index 0000000..98c4172 --- /dev/null +++ b/modules/host-status/config.go @@ -0,0 +1,69 @@ +package main + +import ( + "time" + + "github.com/BurntSushi/toml" +) + +// Config represents the host-status configuration +type Config struct { + Pull PullConfig `toml:"pull"` + Push PushConfig `toml:"push"` + Providers []ProviderConfig `toml:"providers"` +} + +// PullConfig configures the pull-based HTTP server +type PullConfig struct { + Enabled bool `toml:"enabled"` + Port int `toml:"port"` + Host string `toml:"host"` +} + +// PushConfig configures the push-based reporting +type PushConfig struct { + Enabled bool `toml:"enabled"` + Interval string `toml:"interval"` + Destinations []PushDestination `toml:"destinations"` +} + +// PushDestination represents a push target +type PushDestination struct { + URL string `toml:"url"` + Auth string `toml:"auth"` + Headers map[string]string `toml:"headers"` +} + +// ProviderConfig defines a status provider +type ProviderConfig struct { + Name string `toml:"name"` + Command string `toml:"command"` + Args []string `toml:"args"` + Timeout string `toml:"timeout"` + Env map[string]string `toml:"env"` +} + +// GetParsedInterval returns the push interval as time.Duration +func (p *PushConfig) GetParsedInterval() (time.Duration, error) { + if p.Interval == "" { + return 5 * time.Minute, nil // Default to 5 minutes + } + return time.ParseDuration(p.Interval) +} + +// GetParsedTimeout returns the provider timeout as time.Duration +func (p *ProviderConfig) GetParsedTimeout() (time.Duration, error) { + if p.Timeout == "" { + return 30 * time.Second, nil // Default to 30 seconds + } + return time.ParseDuration(p.Timeout) +} + +// LoadConfig reads and parses the configuration file +func LoadConfig(path string) (*Config, error) { + var config Config + if _, err := toml.DecodeFile(path, &config); err != nil { + return nil, err + } + return &config, nil +} diff --git a/modules/host-status/examples/config.toml b/modules/host-status/examples/config.toml new file mode 100644 index 0000000..d4f7f36 --- /dev/null +++ b/modules/host-status/examples/config.toml @@ -0,0 +1,46 @@ +# Host Status Configuration + +# Pull-based HTTP server +[pull] +enabled = true +host = "0.0.0.0" +port = 8080 + +# Push-based periodic reporting +[push] +enabled = true +interval = "5m" + +[[push.destinations]] +url = "https://monitoring.example.com/api/status" +auth = "Bearer your-token-here" + +[push.destinations.headers] +X-Host-ID = "server-001" + +# Built-in providers +[[providers]] +name = "cpu" +timeout = "10s" + +[[providers]] +name = "memory" +timeout = "10s" + +[[providers]] +name = "disk" +timeout = "10s" +# args = ["/data"] # Optional: custom disk path + +[[providers]] +name = "uptime" +timeout = "10s" + +# External provider example +# [[providers]] +# name = "custom" +# command = "/usr/local/bin/custom-monitor" +# args = ["--format", "json"] +# timeout = "30s" +# [providers.env] +# MONITOR_CONFIG = "/etc/monitor/config.json" diff --git a/modules/host-status/flake.nix b/modules/host-status/flake.nix new file mode 100644 index 0000000..6bfa410 --- /dev/null +++ b/modules/host-status/flake.nix @@ -0,0 +1,53 @@ +{ + description = "host-status - Host status monitoring module"; + + inputs.nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + + outputs = { self, nixpkgs }: + let + supportedSystems = [ "x86_64-linux" "aarch64-linux" "x86_64-darwin" "aarch64-darwin" ]; + forAllSystems = nixpkgs.lib.genAttrs supportedSystems; + in + { + devShells = forAllSystems (system: + let pkgs = nixpkgs.legacyPackages.${system}; in + { + default = pkgs.mkShell { + buildInputs = with pkgs; [ + go_1_25 + gopls + gotools + ]; + shellHook = '' + echo "host-status dev shell" + echo "Run 'go build' to build, or 'go run . -config examples/config.toml' to test" + '}; + }; + }); + + packages = forAllSystems (system: + let pkgs = nixpkgs.legacyPackages.${system}; in + { + default = pkgs.buildGoModule { + pname = "host-status"; + version = "0.0.1"; + src = ./.; + vendorHash = "sha256-wJPJlebGAGEHq6UEO16rkPW7CHldKDZjJZQpauVvTog="; + + buildInputs = [ pkgs.bc ]; + + postInstall = '' + mkdir -p $out/share/host-status/examples + cp -r examples/* $out/share/host-status/examples/ + chmod +x $out/share/host-status/examples/providers/*.sh + ''; + + meta = with pkgs.lib; { + description = "Host status monitoring with pull and push models"; + license = licenses.mit; + platforms = supportedSystems; + }; + }; + }); + }; +} diff --git a/modules/host-status/go.mod b/modules/host-status/go.mod new file mode 100644 index 0000000..5837338 --- /dev/null +++ b/modules/host-status/go.mod @@ -0,0 +1,5 @@ +module github.com/b4fun/smol-modules/modules/host-status + +go 1.22.2 + +require github.com/BurntSushi/toml v1.6.0 diff --git a/modules/host-status/go.sum b/modules/host-status/go.sum new file mode 100644 index 0000000..f74b269 --- /dev/null +++ b/modules/host-status/go.sum @@ -0,0 +1,2 @@ +github.com/BurntSushi/toml v1.6.0 h1:dRaEfpa2VI55EwlIW72hMRHdWouJeRF7TPYhI+AUQjk= +github.com/BurntSushi/toml v1.6.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= diff --git a/modules/host-status/host-status.service b/modules/host-status/host-status.service new file mode 100644 index 0000000..d6772db --- /dev/null +++ b/modules/host-status/host-status.service @@ -0,0 +1,26 @@ +[Unit] +Description=Host Status Monitor +Documentation=https://github.com/b4fun/smol-modules/tree/main/modules/host-status +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +User=hoststatus +Group=hoststatus +WorkingDirectory=/opt/host-status +ExecStart=/opt/host-status/host-status -config /etc/host-status/config.yaml +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal + +# Security hardening +NoNewPrivileges=true +PrivateTmp=true +ProtectSystem=strict +ProtectHome=true +ReadWritePaths=/var/lib/host-status + +[Install] +WantedBy=multi-user.target diff --git a/modules/host-status/install.sh b/modules/host-status/install.sh new file mode 100755 index 0000000..2d19f60 --- /dev/null +++ b/modules/host-status/install.sh @@ -0,0 +1,65 @@ +#!/bin/bash +set -euo pipefail + +# Installation script for host-status +# Usage: sudo ./install.sh + +if [ "$EUID" -ne 0 ]; then + echo "Please run as root (sudo ./install.sh)" + exit 1 +fi + +echo "Installing host-status..." + +# Build the binary +echo "Building binary..." +go build -o host-status + +# Create user and group +if ! id -u hoststatus >/dev/null 2>&1; then + echo "Creating hoststatus user..." + useradd -r -s /bin/false -d /opt/host-status hoststatus +fi + +# Create directories +echo "Creating directories..." +mkdir -p /opt/host-status +mkdir -p /etc/host-status +mkdir -p /var/lib/host-status + +# Copy binary and examples +echo "Copying files..." +cp host-status /opt/host-status/ +cp -r examples /opt/host-status/ +chmod +x /opt/host-status/examples/providers/*.sh + +# Copy example config if config doesn't exist +if [ ! -f /etc/host-status/config.yaml ]; then + echo "Installing example configuration..." + cp examples/config.yaml /etc/host-status/config.yaml + echo "WARNING: Edit /etc/host-status/config.yaml before starting the service" +fi + +# Set ownership +chown -R hoststatus:hoststatus /opt/host-status +chown -R hoststatus:hoststatus /var/lib/host-status +chown root:hoststatus /etc/host-status/config.yaml +chmod 640 /etc/host-status/config.yaml + +# Install systemd service +echo "Installing systemd service..." +cp host-status.service /etc/systemd/system/ +systemctl daemon-reload + +echo "" +echo "Installation complete!" +echo "" +echo "Next steps:" +echo "1. Edit /etc/host-status/config.yaml" +echo "2. Update provider paths to /opt/host-status/examples/providers/" +echo "3. Enable and start the service:" +echo " sudo systemctl enable --now host-status" +echo "4. Check status:" +echo " sudo systemctl status host-status" +echo " journalctl -u host-status -f" +echo "" diff --git a/modules/host-status/internal/providers/host/providers.go b/modules/host-status/internal/providers/host/providers.go new file mode 100644 index 0000000..c88541a --- /dev/null +++ b/modules/host-status/internal/providers/host/providers.go @@ -0,0 +1,275 @@ +package host + +import ( + "context" + "fmt" + "runtime" + "syscall" + "time" +) + +// ProviderStatus represents the status of a provider result +type ProviderStatus string + +const ( + StatusOK ProviderStatus = "ok" + StatusWarn ProviderStatus = "warn" + StatusError ProviderStatus = "error" +) + +// Result represents the result from a provider execution +type Result struct { + Name string `json:"name"` + Status ProviderStatus `json:"status"` + Metrics map[string]interface{} `json:"metrics"` + Error string `json:"error,omitempty"` + Timestamp time.Time `json:"timestamp"` +} + +// Provider represents a builtin provider +type Provider interface { + Name() string + Execute(ctx context.Context) (*Result, error) +} + +// CPUProvider monitors CPU load +type CPUProvider struct{} + +func (p *CPUProvider) Name() string { + return "cpu" +} + +func (p *CPUProvider) Execute(ctx context.Context) (*Result, error) { + start := time.Now() + + // Get load averages + var si syscall.Sysinfo_t + if err := syscall.Sysinfo(&si); err != nil { + return &Result{ + Name: p.Name(), + Status: StatusError, + Error: fmt.Sprintf("failed to get system info: %v", err), + Timestamp: time.Now(), + }, nil + } + + // Load averages are provided as integers, need to divide by 65536.0 + load1 := float64(si.Loads[0]) / 65536.0 + load5 := float64(si.Loads[1]) / 65536.0 + load15 := float64(si.Loads[2]) / 65536.0 + + cpuCount := runtime.NumCPU() + loadPct := (load1 / float64(cpuCount)) * 100 + + // Determine status + status := StatusOK + if loadPct > 80 { + status = StatusError + } else if loadPct > 60 { + status = StatusWarn + } + + metrics := map[string]interface{}{ + "load_1min": load1, + "load_5min": load5, + "load_15min": load15, + "cpu_count": cpuCount, + "load_percentage": loadPct, + "execution_time_ms": time.Since(start).Milliseconds(), + "message": fmt.Sprintf("CPU load: %.2f (%.2f%%)", load1, loadPct), + } + + return &Result{ + Name: p.Name(), + Status: status, + Metrics: metrics, + Timestamp: time.Now(), + }, nil +} + +// MemoryProvider monitors memory usage +type MemoryProvider struct{} + +func (p *MemoryProvider) Name() string { + return "memory" +} + +func (p *MemoryProvider) Execute(ctx context.Context) (*Result, error) { + start := time.Now() + + var si syscall.Sysinfo_t + if err := syscall.Sysinfo(&si); err != nil { + return &Result{ + Name: p.Name(), + Status: StatusError, + Error: fmt.Sprintf("failed to get system info: %v", err), + Timestamp: time.Now(), + }, nil + } + + // Convert to MB + unit := uint64(si.Unit) + totalMB := (si.Totalram * unit) / (1024 * 1024) + freeMB := (si.Freeram * unit) / (1024 * 1024) + buffersMB := (si.Bufferram * unit) / (1024 * 1024) + + // Calculate available memory (free + buffers is a simple approximation) + availableMB := freeMB + buffersMB + usedMB := totalMB - availableMB + usedPct := (float64(usedMB) / float64(totalMB)) * 100 + + // Determine status + status := StatusOK + if usedPct > 90 { + status = StatusError + } else if usedPct > 80 { + status = StatusWarn + } + + metrics := map[string]interface{}{ + "total_mb": totalMB, + "used_mb": usedMB, + "available_mb": availableMB, + "used_percentage": usedPct, + "execution_time_ms": time.Since(start).Milliseconds(), + "message": fmt.Sprintf("Memory usage: %dMB / %dMB (%.2f%%)", usedMB, totalMB, usedPct), + } + + return &Result{ + Name: p.Name(), + Status: status, + Metrics: metrics, + Timestamp: time.Now(), + }, nil +} + +// DiskProvider monitors disk usage +type DiskProvider struct { + Path string +} + +func (p *DiskProvider) Name() string { + return "disk" +} + +func (p *DiskProvider) Execute(ctx context.Context) (*Result, error) { + start := time.Now() + + path := p.Path + if path == "" { + path = "/" + } + + var stat syscall.Statfs_t + if err := syscall.Statfs(path, &stat); err != nil { + return &Result{ + Name: p.Name(), + Status: StatusError, + Error: fmt.Sprintf("failed to get disk stats: %v", err), + Timestamp: time.Now(), + }, nil + } + + // Calculate sizes in GB + totalGB := float64(stat.Blocks*uint64(stat.Bsize)) / (1024 * 1024 * 1024) + availableGB := float64(stat.Bavail*uint64(stat.Bsize)) / (1024 * 1024 * 1024) + usedGB := totalGB - availableGB + usedPct := (usedGB / totalGB) * 100 + + // Determine status + status := StatusOK + if usedPct > 90 { + status = StatusError + } else if usedPct > 80 { + status = StatusWarn + } + + metrics := map[string]interface{}{ + "path": path, + "total_gb": totalGB, + "used_gb": usedGB, + "available_gb": availableGB, + "used_percentage": usedPct, + "execution_time_ms": time.Since(start).Milliseconds(), + "message": fmt.Sprintf("Disk usage (%s): %.2fGB / %.2fGB (%.2f%%)", path, usedGB, totalGB, usedPct), + } + + return &Result{ + Name: p.Name(), + Status: status, + Metrics: metrics, + Timestamp: time.Now(), + }, nil +} + +// UptimeProvider reports system uptime +type UptimeProvider struct{} + +func (p *UptimeProvider) Name() string { + return "uptime" +} + +func (p *UptimeProvider) Execute(ctx context.Context) (*Result, error) { + start := time.Now() + + var si syscall.Sysinfo_t + if err := syscall.Sysinfo(&si); err != nil { + return &Result{ + Name: p.Name(), + Status: StatusError, + Error: fmt.Sprintf("failed to get system info: %v", err), + Timestamp: time.Now(), + }, nil + } + + uptimeSeconds := si.Uptime + days := uptimeSeconds / 86400 + hours := (uptimeSeconds % 86400) / 3600 + minutes := (uptimeSeconds % 3600) / 60 + + metrics := map[string]interface{}{ + "uptime_seconds": uptimeSeconds, + "days": days, + "hours": hours, + "minutes": minutes, + "execution_time_ms": time.Since(start).Milliseconds(), + "message": fmt.Sprintf("System uptime: %dd %dh %dm", days, hours, minutes), + } + + return &Result{ + Name: p.Name(), + Status: StatusOK, + Metrics: metrics, + Timestamp: time.Now(), + }, nil +} + +// New returns a provider by name +func New(name string, args []string) Provider { + switch name { + case "cpu": + return &CPUProvider{} + case "memory": + return &MemoryProvider{} + case "disk": + path := "/" + if len(args) > 0 { + path = args[0] + } + return &DiskProvider{Path: path} + case "uptime": + return &UptimeProvider{} + default: + return nil + } +} + +// IsBuiltin checks if a provider name is builtin +func IsBuiltin(name string) bool { + switch name { + case "cpu", "memory", "disk", "uptime": + return true + default: + return false + } +} diff --git a/modules/host-status/internal/server/server.go b/modules/host-status/internal/server/server.go new file mode 100644 index 0000000..02dcdbc --- /dev/null +++ b/modules/host-status/internal/server/server.go @@ -0,0 +1,132 @@ +package server + +import ( + "context" + "encoding/json" + "fmt" + "log" + "net/http" + "os" + "time" +) + +// ProviderStatus represents the status reported by a provider +type ProviderStatus string + +const ( + StatusOK ProviderStatus = "ok" + StatusWarn ProviderStatus = "warn" + StatusError ProviderStatus = "error" +) + +// ProviderResult represents the output from a provider +type ProviderResult struct { + Name string `json:"name"` + Status ProviderStatus `json:"status"` + Metrics map[string]interface{} `json:"metrics"` + Timestamp time.Time `json:"timestamp"` + Error string `json:"error,omitempty"` +} + +// ProviderExecutor is the interface for anything that can execute and return provider results +type ProviderExecutor interface { + ExecuteAll(ctx context.Context) []*ProviderResult +} + +// Config holds server configuration +type Config struct { + Enabled bool + Port int + Host string +} + +// Response represents the aggregated status response +type Response struct { + Hostname string `json:"hostname"` + Timestamp time.Time `json:"timestamp"` + Providers []*ProviderResult `json:"providers"` + Overall ProviderStatus `json:"overall"` +} + +// Server handles HTTP requests for status +type Server struct { + config *Config + executor ProviderExecutor + server *http.Server +} + +// New creates a new HTTP server +func New(config *Config, executor ProviderExecutor) *Server { + return &Server{ + config: config, + executor: executor, + } +} + +// Start begins serving HTTP requests +func (s *Server) Start() error { + mux := http.NewServeMux() + mux.HandleFunc("/status", s.handleStatus) + mux.HandleFunc("/health", s.handleHealth) + + addr := fmt.Sprintf("%s:%d", s.config.Host, s.config.Port) + s.server = &http.Server{ + Addr: addr, + Handler: mux, + } + + log.Printf("Starting HTTP server on %s", addr) + return s.server.ListenAndServe() +} + +// Shutdown gracefully stops the server +func (s *Server) Shutdown(ctx context.Context) error { + if s.server != nil { + return s.server.Shutdown(ctx) + } + return nil +} + +// handleStatus processes /status requests +func (s *Server) handleStatus(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + return + } + + ctx := r.Context() + results := s.executor.ExecuteAll(ctx) + + // Determine overall status + overall := StatusOK + for _, result := range results { + if result.Status == StatusError { + overall = StatusError + break + } else if result.Status == StatusWarn && overall != StatusError { + overall = StatusWarn + } + } + + hostname, _ := os.Hostname() + response := Response{ + Hostname: hostname, + Timestamp: time.Now(), + Providers: results, + Overall: overall, + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(response); err != nil { + log.Printf("Error encoding response: %v", err) + } +} + +// handleHealth processes /health requests +func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(map[string]string{ + "status": "ok", + }) +} diff --git a/modules/host-status/main.go b/modules/host-status/main.go new file mode 100644 index 0000000..da3b6d7 --- /dev/null +++ b/modules/host-status/main.go @@ -0,0 +1,122 @@ +package main + +import ( + "context" + "flag" + "fmt" + "log" + "os" + "os/signal" + "syscall" + "time" + + "github.com/b4fun/smol-modules/modules/host-status/internal/server" +) + +func main() { + configPath := flag.String("config", "config.toml", "Path to configuration file") + flag.Parse() + + if err := run(*configPath); err != nil { + log.Fatalf("Error: %v", err) + } +} + +func run(configPath string) error { + // Load configuration + config, err := LoadConfig(configPath) + if err != nil { + return fmt.Errorf("failed to load config: %w", err) + } + + log.Printf("Loaded configuration from %s", configPath) + log.Printf("Pull enabled: %v, Push enabled: %v, Providers: %d", + config.Pull.Enabled, config.Push.Enabled, len(config.Providers)) + + // Create provider registry + registry := NewProviderRegistry(config.Providers) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Setup signal handling + sigChan := make(chan os.Signal, 1) + signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM) + + errChan := make(chan error, 2) + + // Start pull server if enabled + var srv *server.Server + if config.Pull.Enabled { + srvConfig := &server.Config{ + Enabled: config.Pull.Enabled, + Port: config.Pull.Port, + Host: config.Pull.Host, + } + adapter := &RegistryAdapter{registry: registry} + srv = server.New(srvConfig, adapter) + go func() { + if err := srv.Start(); err != nil { + errChan <- fmt.Errorf("server error: %w", err) + } + }() + } + + // Start pusher if enabled + var pusher *Pusher + if config.Push.Enabled { + pusher = NewPusher(&config.Push, registry) + go func() { + if err := pusher.Start(ctx); err != nil && err != context.Canceled { + errChan <- fmt.Errorf("pusher error: %w", err) + } + }() + } + + // Wait for shutdown signal or error + select { + case <-sigChan: + log.Println("Received shutdown signal") + case err := <-errChan: + log.Printf("Error occurred: %v", err) + } + + // Graceful shutdown + log.Println("Shutting down...") + cancel() + + if pusher != nil { + pusher.Stop() + } + + if srv != nil { + shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 5*time.Second) + defer shutdownCancel() + if err := srv.Shutdown(shutdownCtx); err != nil { + log.Printf("Server shutdown error: %v", err) + } + } + + log.Println("Shutdown complete") + return nil +} + +// RegistryAdapter adapts ProviderRegistry to server.ProviderExecutor +type RegistryAdapter struct { + registry *ProviderRegistry +} + +func (a *RegistryAdapter) ExecuteAll(ctx context.Context) []*server.ProviderResult { + results := a.registry.ExecuteAll(ctx) + serverResults := make([]*server.ProviderResult, len(results)) + for i, r := range results { + serverResults[i] = &server.ProviderResult{ + Name: r.Name, + Status: server.ProviderStatus(r.Status), + Metrics: r.Metrics, + Timestamp: r.Timestamp, + Error: r.Error, + } + } + return serverResults +} diff --git a/modules/host-status/provider.go b/modules/host-status/provider.go new file mode 100644 index 0000000..2ed4ead --- /dev/null +++ b/modules/host-status/provider.go @@ -0,0 +1,192 @@ +package main + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "os/exec" + "time" + + "github.com/b4fun/smol-modules/modules/host-status/internal/providers/host" +) + +// Provider executes and manages status providers +type Provider struct { + config ProviderConfig +} + +// NewProvider creates a new Provider instance +func NewProvider(config ProviderConfig) *Provider { + return &Provider{config: config} +} + +// Execute runs the provider command and returns the result +func (p *Provider) Execute(ctx context.Context) (*ProviderResult, error) { + timeout, err := p.config.GetParsedTimeout() + if err != nil { + return nil, fmt.Errorf("invalid timeout: %w", err) + } + + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + cmd := exec.CommandContext(ctx, p.config.Command, p.config.Args...) + + // Set environment variables + if len(p.config.Env) > 0 { + for k, v := range p.config.Env { + cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", k, v)) + } + } + + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + start := time.Now() + err = cmd.Run() + execTime := time.Since(start) + + result := &ProviderResult{ + Name: p.config.Name, + Timestamp: time.Now(), + } + + if err != nil { + result.Status = StatusError + result.Error = fmt.Sprintf("execution failed: %v (stderr: %s)", err, stderr.String()) + result.Metrics = map[string]interface{}{ + "execution_time_ms": execTime.Milliseconds(), + } + return result, nil + } + + // Parse stdout as JSON + var providerOutput struct { + Status string `json:"status"` + Metrics map[string]interface{} `json:"metrics"` + Message string `json:"message"` + } + + if err := json.Unmarshal(stdout.Bytes(), &providerOutput); err != nil { + result.Status = StatusError + result.Error = fmt.Sprintf("invalid JSON output: %v", err) + result.Metrics = map[string]interface{}{ + "execution_time_ms": execTime.Milliseconds(), + "raw_output": stdout.String(), + } + return result, nil + } + + // Populate result from provider output + result.Status = ProviderStatus(providerOutput.Status) + if result.Status == "" { + result.Status = StatusOK + } + + result.Metrics = providerOutput.Metrics + if result.Metrics == nil { + result.Metrics = make(map[string]interface{}) + } + result.Metrics["execution_time_ms"] = execTime.Milliseconds() + + if providerOutput.Message != "" { + result.Metrics["message"] = providerOutput.Message + } + + return result, nil +} + +// ProviderExecutor is the interface for anything that can execute and return provider results +type ProviderExecutor interface { + Execute(ctx context.Context) (*ProviderResult, error) +} + +// ProviderRegistry manages multiple providers +type ProviderRegistry struct { + providers []ProviderExecutor +} + +// NewProviderRegistry creates a new provider registry +func NewProviderRegistry(configs []ProviderConfig) *ProviderRegistry { + providers := make([]ProviderExecutor, 0, len(configs)) + for _, config := range configs { + // Check if this is a builtin provider + if config.Command == "" && host.IsBuiltin(config.Name) { + // Use builtin provider + builtinProvider := host.New(config.Name, config.Args) + if builtinProvider != nil { + providers = append(providers, NewBuiltinProviderAdapter(builtinProvider, config)) + continue + } + } + // Use external command provider + providers = append(providers, NewProvider(config)) + } + return &ProviderRegistry{providers: providers} +} + +// BuiltinProviderAdapter adapts host.Provider to ProviderExecutor +type BuiltinProviderAdapter struct { + provider host.Provider + config ProviderConfig +} + +func NewBuiltinProviderAdapter(provider host.Provider, config ProviderConfig) *BuiltinProviderAdapter { + return &BuiltinProviderAdapter{ + provider: provider, + config: config, + } +} + +func (a *BuiltinProviderAdapter) Execute(ctx context.Context) (*ProviderResult, error) { + timeout, err := a.config.GetParsedTimeout() + if err != nil { + return nil, fmt.Errorf("invalid timeout: %w", err) + } + + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + result, err := a.provider.Execute(ctx) + if err != nil { + return &ProviderResult{ + Name: a.config.Name, + Status: StatusError, + Error: err.Error(), + Timestamp: time.Now(), + Metrics: make(map[string]interface{}), + }, nil + } + + return &ProviderResult{ + Name: result.Name, + Status: ProviderStatus(result.Status), + Metrics: result.Metrics, + Error: result.Error, + Timestamp: result.Timestamp, + }, nil +} + +// ExecuteAll runs all providers and returns their results +func (r *ProviderRegistry) ExecuteAll(ctx context.Context) []*ProviderResult { + results := make([]*ProviderResult, 0, len(r.providers)) + + for _, provider := range r.providers { + result, err := provider.Execute(ctx) + if err != nil { + // Create generic error result + result = &ProviderResult{ + Name: "unknown", + Status: StatusError, + Timestamp: time.Now(), + Error: err.Error(), + Metrics: make(map[string]interface{}), + } + } + results = append(results, result) + } + + return results +} diff --git a/modules/host-status/provider_test.go b/modules/host-status/provider_test.go new file mode 100644 index 0000000..7d2ed5f --- /dev/null +++ b/modules/host-status/provider_test.go @@ -0,0 +1,177 @@ +package main + +import ( + "context" + "os" + "path/filepath" + "testing" + "time" +) + +func TestProviderExecution(t *testing.T) { + // Create a simple test provider script + tmpDir := t.TempDir() + scriptPath := filepath.Join(tmpDir, "test-provider.sh") + + script := `#!/bin/bash +cat < 10*time.Second { + t.Errorf("Timeout took too long: %v", duration) + } +} + +func TestProviderInvalidJSON(t *testing.T) { + tmpDir := t.TempDir() + scriptPath := filepath.Join(tmpDir, "invalid-provider.sh") + + script := `#!/bin/bash +echo "not json" +` + if err := os.WriteFile(scriptPath, []byte(script), 0755); err != nil { + t.Fatalf("Failed to create test script: %v", err) + } + + config := ProviderConfig{ + Name: "invalid", + Command: scriptPath, + Timeout: "10s", + } + + provider := NewProvider(config) + ctx := context.Background() + + result, err := provider.Execute(ctx) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + if result.Status != StatusError { + t.Errorf("Expected error status for invalid JSON, got '%s'", result.Status) + } + + if result.Error == "" { + t.Error("Expected error message for invalid JSON") + } +} + +func TestProviderRegistry(t *testing.T) { + tmpDir := t.TempDir() + + // Create two test providers + script1Path := filepath.Join(tmpDir, "provider1.sh") + script1 := `#!/bin/bash +cat < 0 { + time.Sleep(time.Second * time.Duration(attempt)) + } + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, dest.URL, bytes.NewReader(data)) + if err != nil { + return fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("Content-Type", "application/json") + if dest.Auth != "" { + req.Header.Set("Authorization", dest.Auth) + } + for k, v := range dest.Headers { + req.Header.Set(k, v) + } + + client := &http.Client{Timeout: 30 * time.Second} + resp, err := client.Do(req) + if err != nil { + if attempt < maxRetries-1 { + log.Printf("Push attempt %d failed: %v, retrying...", attempt+1, err) + continue + } + return fmt.Errorf("all retry attempts failed: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode >= 200 && resp.StatusCode < 300 { + return nil + } + + if attempt < maxRetries-1 { + log.Printf("Push attempt %d returned status %d, retrying...", attempt+1, resp.StatusCode) + continue + } + return fmt.Errorf("received status code %d after all retries", resp.StatusCode) + } + + return fmt.Errorf("unexpected error in retry loop") +} diff --git a/modules/host-status/types.go b/modules/host-status/types.go new file mode 100644 index 0000000..77fce89 --- /dev/null +++ b/modules/host-status/types.go @@ -0,0 +1,29 @@ +package main + +import "time" + +// ProviderStatus represents the status reported by a provider +type ProviderStatus string + +const ( + StatusOK ProviderStatus = "ok" + StatusWarn ProviderStatus = "warn" + StatusError ProviderStatus = "error" +) + +// ProviderResult represents the output from a provider +type ProviderResult struct { + Name string `json:"name"` + Status ProviderStatus `json:"status"` + Metrics map[string]interface{} `json:"metrics"` + Timestamp time.Time `json:"timestamp"` + Error string `json:"error,omitempty"` +} + +// StatusResponse represents the aggregated status response +type StatusResponse struct { + Hostname string `json:"hostname"` + Timestamp time.Time `json:"timestamp"` + Providers []*ProviderResult `json:"providers"` + Overall ProviderStatus `json:"overall"` +}