From 96ac453bd0d623687e854a352395050be5d2550d Mon Sep 17 00:00:00 2001 From: Jack Schultz Date: Wed, 10 Jun 2026 08:44:56 -0500 Subject: [PATCH 1/2] Plan A: Land dba wal command, prep v0.6.0 release MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Built: dba wal diagnostic (src/commands/wal.rs) — WAL config, directory size, archiving status, generation rate, issue detection; JSON envelope with pgcrate.diagnostics.wal schema; capabilities wiring; integration tests - Fixed: check_function_access used query_one with LIMIT 0 (always 0 rows -> always errored -> diagnostics.wal capability always reported degraded); switched to query() so access detection is correct - Version 0.6.0 in Cargo.toml/Cargo.lock; added v0.6.0 CHANGELOG section - Validation: cargo fmt --check (pass), cargo clippy --all-targets (exit 0, no findings on touched files), cargo test --test integration (155 pass), wal+capabilities unit tests pass, dba wal/--json verified against live PG18 - Notes: plan's gotcha about an unused import in tests/diagnostics/wal.rs was stale (no such warning); pre-existing failures in doctor/role/extension/ timeout integration tests reproduce on clean main and are out of scope --- CHANGELOG.md | 23 ++ Cargo.lock | 2 +- Cargo.toml | 2 +- schemas/wal.schema.json | 81 ++++++ src/commands/capabilities.rs | 59 ++++ src/commands/mod.rs | 1 + src/commands/wal.rs | 507 +++++++++++++++++++++++++++++++++++ src/main.rs | 18 ++ src/output.rs | 1 + tests/diagnostics/mod.rs | 1 + tests/diagnostics/wal.rs | 245 +++++++++++++++++ 11 files changed, 938 insertions(+), 2 deletions(-) create mode 100644 schemas/wal.schema.json create mode 100644 src/commands/wal.rs create mode 100644 tests/diagnostics/wal.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c721be..5d1d2cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,28 @@ # Changelog +## v0.6.0 + +**WAL Monitoring** + +### New Commands + +- **`pgcrate dba wal`**: Monitor WAL generation, archiving, and disk consumption + - WAL configuration: `wal_level`, segment size, current LSN + - WAL directory size and segment count (via `pg_ls_waldir()`; requires `pg_monitor` or superuser) + - Archiving status: mode, command, last archived/failed WAL, failed count, pending file count + - Generation rate estimated from `pg_stat_wal` (PostgreSQL 14+) + - Issue detection with severity: oversized WAL directory, archive failures, archive lag, `wal_level=minimal`, archiving disabled + - Exit codes: 0 healthy, 1 warning, 2 critical + - Full JSON support with `pgcrate.diagnostics.wal` schema + +### Improvements + +- **`pgcrate inspect capabilities`**: Reports the `diagnostics.wal` capability + - Checks `pg_stat_archiver` SELECT and `pg_ls_waldir()` access + - Degrades gracefully when `pg_ls_waldir()` is unavailable (directory size and pending file counts omitted) + +--- + ## v0.5.0 **Query Analysis and Index Remediation** diff --git a/Cargo.lock b/Cargo.lock index 6b0ae51..92e298a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -775,7 +775,7 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" [[package]] name = "pgcrate" -version = "0.4.0" +version = "0.6.0" dependencies = [ "anyhow", "bytes", diff --git a/Cargo.toml b/Cargo.toml index f1b9eef..6b825ae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pgcrate" -version = "0.4.0" +version = "0.6.0" edition = "2021" description = "PostgreSQL companion for teams not using Rails or Django. Migrations, introspection, diffing, and more." license = "MIT" diff --git a/schemas/wal.schema.json b/schemas/wal.schema.json new file mode 100644 index 0000000..d7ec53e --- /dev/null +++ b/schemas/wal.schema.json @@ -0,0 +1,81 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "pgcrate.diagnostics.wal", + "title": "pgcrate wal output", + "description": "WAL generation, archiving, and disk consumption analysis", + "allOf": [ + { "$ref": "envelope.schema.json" } + ], + "properties": { + "schema_id": { "const": "pgcrate.diagnostics.wal" }, + "data": { "$ref": "#/$defs/walData" } + }, + "$defs": { + "walData": { + "type": "object", + "additionalProperties": false, + "required": ["wal_level", "current_wal_lsn", "wal_segment_size_bytes", "wal_directory", "archiving", "generation_rate", "issues", "overall_status"], + "properties": { + "wal_level": { "type": "string" }, + "current_wal_lsn": { "type": "string" }, + "wal_segment_size_bytes": { "type": "integer" }, + "wal_directory": { "$ref": "#/$defs/walDirectory" }, + "archiving": { "$ref": "#/$defs/archiveStatus" }, + "generation_rate": { "$ref": "#/$defs/generationRate" }, + "issues": { + "type": "array", + "items": { "$ref": "#/$defs/walIssue" } + }, + "overall_status": { "$ref": "#/$defs/walStatus" } + } + }, + "walStatus": { + "type": "string", + "enum": ["healthy", "warning", "critical"] + }, + "walDirectory": { + "type": "object", + "additionalProperties": false, + "required": [], + "properties": { + "size_bytes": { "type": ["integer", "null"] }, + "segment_count": { "type": ["integer", "null"] } + } + }, + "archiveStatus": { + "type": "object", + "additionalProperties": false, + "required": ["enabled", "archive_mode", "failed_count"], + "properties": { + "enabled": { "type": "boolean" }, + "archive_mode": { "type": "string" }, + "archive_command": { "type": ["string", "null"] }, + "last_archived_wal": { "type": ["string", "null"] }, + "last_archived_time": { "type": ["string", "null"] }, + "failed_count": { "type": "integer" }, + "last_failed_wal": { "type": ["string", "null"] }, + "last_failed_time": { "type": ["string", "null"] }, + "pending_count": { "type": ["integer", "null"] } + } + }, + "generationRate": { + "type": "object", + "additionalProperties": false, + "required": ["measurement_note"], + "properties": { + "bytes_per_second": { "type": ["number", "null"] }, + "measurement_note": { "type": "string" } + } + }, + "walIssue": { + "type": "object", + "additionalProperties": false, + "required": ["code", "message", "severity"], + "properties": { + "code": { "type": "string" }, + "message": { "type": "string" }, + "severity": { "$ref": "#/$defs/walStatus" } + } + } + } +} diff --git a/src/commands/capabilities.rs b/src/commands/capabilities.rs index b759c0d..3f1e477 100644 --- a/src/commands/capabilities.rs +++ b/src/commands/capabilities.rs @@ -82,6 +82,8 @@ pub async fn run_capabilities(client: &Client, read_only: bool) -> Result Result bool .unwrap_or(false) } +async fn check_function_access(client: &Client, function: &str) -> bool { + // Try to actually call the function - some functions require specific roles + // (e.g., pg_ls_waldir requires pg_monitor or superuser). LIMIT 0 still + // triggers the permission check at execution without materializing rows; + // query() (not query_one) is required since the result is zero rows. + client + .query(&format!("SELECT 1 FROM {} LIMIT 0", function), &[]) + .await + .is_ok() +} + fn check_locks_capability( has_pg_stat_activity: bool, has_pg_cancel: bool, @@ -381,6 +396,50 @@ fn check_replication_capability(has_pg_stat_replication: bool) -> CapabilityInfo } } +fn check_wal_capability(has_pg_stat_archiver: bool, has_pg_ls_waldir: bool) -> CapabilityInfo { + let requirements = vec![ + Requirement { + what: "pg_stat_archiver SELECT".to_string(), + met: has_pg_stat_archiver, + }, + Requirement { + what: "pg_ls_waldir() access (pg_monitor role)".to_string(), + met: has_pg_ls_waldir, + }, + ]; + + let mut reasons = vec![]; + let mut limitations = vec![]; + + let status = if !has_pg_stat_archiver { + reasons.push(ReasonInfo::new( + ReasonCode::MissingPrivilege, + "Cannot read pg_stat_archiver", + )); + CapabilityStatus::Unavailable + } else if !has_pg_ls_waldir { + reasons.push(ReasonInfo::new( + ReasonCode::MissingPrivilege, + "Cannot access pg_ls_waldir (requires pg_monitor or superuser)", + )); + limitations.push("WAL directory size and segment count unavailable".to_string()); + limitations.push("Archive pending file count unavailable".to_string()); + CapabilityStatus::Degraded + } else { + CapabilityStatus::Available + }; + + CapabilityInfo { + id: "diagnostics.wal", + name: "WAL", + description: "WAL generation, archiving, and disk consumption", + status, + reasons, + requirements, + limitations, + } +} + fn check_bloat_capability(has_pg_stat_user_tables: bool) -> CapabilityInfo { let requirements = vec![Requirement { what: "pg_stat_user_tables SELECT".to_string(), diff --git a/src/commands/mod.rs b/src/commands/mod.rs index 5daeede..21eaf83 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -33,6 +33,7 @@ pub mod stats_age; pub mod storage; pub mod triage; pub mod vacuum; +pub mod wal; pub mod xid; // Re-export snapshot commands from new module diff --git a/src/commands/wal.rs b/src/commands/wal.rs new file mode 100644 index 0000000..153f658 --- /dev/null +++ b/src/commands/wal.rs @@ -0,0 +1,507 @@ +//! WAL command: Monitor Write-Ahead Log health. +//! +//! Shows WAL generation rate, archiving status, and disk consumption. +//! Helps identify WAL accumulation issues and archive lag. + +use anyhow::Result; +use serde::Serialize; +use tokio_postgres::Client; + +// Thresholds for severity +const WAL_DIR_WARNING_BYTES: i64 = 10_737_418_240; // 10GB +const WAL_DIR_CRITICAL_BYTES: i64 = 53_687_091_200; // 50GB +const ARCHIVE_LAG_WARNING_FILES: i64 = 10; +const ARCHIVE_LAG_CRITICAL_FILES: i64 = 100; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] +#[serde(rename_all = "lowercase")] +pub enum WalStatus { + Healthy, + Warning, + Critical, +} + +#[derive(Debug, Clone, Serialize)] +pub struct WalDirectory { + pub size_bytes: Option, + pub segment_count: Option, +} + +#[derive(Debug, Clone, Serialize)] +pub struct ArchiveStatus { + pub enabled: bool, + pub archive_mode: String, + pub archive_command: Option, + pub last_archived_wal: Option, + pub last_archived_time: Option, + pub failed_count: i64, + pub last_failed_wal: Option, + pub last_failed_time: Option, + pub pending_count: Option, +} + +#[derive(Debug, Clone, Serialize)] +pub struct GenerationRate { + pub bytes_per_second: Option, + pub measurement_note: String, +} + +#[derive(Debug, Clone, Serialize)] +pub struct WalIssue { + pub code: String, + pub message: String, + pub severity: WalStatus, +} + +#[derive(Debug, Serialize)] +pub struct WalResult { + pub wal_level: String, + pub current_wal_lsn: String, + pub wal_segment_size_bytes: i64, + pub wal_directory: WalDirectory, + pub archiving: ArchiveStatus, + pub generation_rate: GenerationRate, + pub issues: Vec, + pub overall_status: WalStatus, +} + +async fn get_wal_settings(client: &Client) -> Result<(String, i64)> { + let query = r#" +SELECT + current_setting('wal_level') AS wal_level, + pg_size_bytes(current_setting('wal_segment_size'))::bigint AS wal_segment_size +"#; + let row = client.query_one(query, &[]).await?; + let wal_level: String = row.get("wal_level"); + let wal_segment_size: i64 = row.get("wal_segment_size"); + Ok((wal_level, wal_segment_size)) +} + +async fn get_current_lsn(client: &Client) -> Result { + let row = client + .query_one("SELECT pg_current_wal_lsn()::text AS lsn", &[]) + .await?; + Ok(row.get("lsn")) +} + +async fn get_wal_directory_info(client: &Client) -> Result { + // Try to get WAL directory stats using pg_ls_waldir() - requires superuser or pg_monitor + let query = r#" +SELECT + COALESCE(SUM(size), 0)::bigint AS total_size, + COUNT(*)::bigint AS segment_count +FROM pg_ls_waldir() +"#; + + match client.query_one(query, &[]).await { + Ok(row) => Ok(WalDirectory { + size_bytes: Some(row.get("total_size")), + segment_count: Some(row.get("segment_count")), + }), + Err(_) => { + // Fallback: no access to pg_ls_waldir + Ok(WalDirectory { + size_bytes: None, + segment_count: None, + }) + } + } +} + +async fn get_archive_status(client: &Client) -> Result { + // Get archive settings + let settings_query = r#" +SELECT + current_setting('archive_mode') AS archive_mode, + current_setting('archive_command') AS archive_command +"#; + let settings_row = client.query_one(settings_query, &[]).await?; + let archive_mode: String = settings_row.get("archive_mode"); + let archive_command: String = settings_row.get("archive_command"); + + let enabled = archive_mode == "on" || archive_mode == "always"; + + // Get archiver stats + let stats_query = r#" +SELECT + last_archived_wal, + last_archived_time::text, + failed_count, + last_failed_wal, + last_failed_time::text +FROM pg_stat_archiver +"#; + let stats_row = client.query_one(stats_query, &[]).await?; + + // Calculate pending files (WAL files not yet archived) + let pending_count = if enabled { + let pending_query = r#" +SELECT COUNT(*)::bigint AS pending +FROM pg_ls_waldir() +WHERE name ~ '^[0-9A-F]{24}$' + AND name > COALESCE( + (SELECT last_archived_wal FROM pg_stat_archiver), + '000000000000000000000000' + ) +"#; + match client.query_one(pending_query, &[]).await { + Ok(row) => Some(row.get::<_, i64>("pending")), + Err(_) => None, // No access to pg_ls_waldir + } + } else { + None + }; + + Ok(ArchiveStatus { + enabled, + archive_mode, + archive_command: if archive_command.is_empty() { + None + } else { + Some(archive_command) + }, + last_archived_wal: stats_row.get("last_archived_wal"), + last_archived_time: stats_row.get("last_archived_time"), + failed_count: stats_row.get("failed_count"), + last_failed_wal: stats_row.get("last_failed_wal"), + last_failed_time: stats_row.get("last_failed_time"), + pending_count, + }) +} + +async fn get_generation_rate(client: &Client) -> Result { + // Try to estimate WAL generation from pg_stat_wal (PG14+) + // Note: wal_bytes is numeric type, cast to bigint for Rust compatibility + let query = r#" +SELECT + wal_bytes::bigint AS wal_bytes, + EXTRACT(EPOCH FROM (now() - stats_reset))::float8 AS seconds_since_reset +FROM pg_stat_wal +"#; + + match client.query_one(query, &[]).await { + Ok(row) => { + let wal_bytes: i64 = row.get("wal_bytes"); + let seconds: Option = row.get("seconds_since_reset"); + + let bytes_per_second = seconds.and_then(|s| { + if s > 0.0 { + Some(wal_bytes as f64 / s) + } else { + None + } + }); + + let note = match seconds { + Some(s) if s > 86400.0 => { + format!("Average over {:.1} days since stats reset", s / 86400.0) + } + Some(s) if s > 3600.0 => { + format!("Average over {:.1} hours since stats reset", s / 3600.0) + } + Some(s) => format!("Average over {:.0} seconds since stats reset", s), + None => "Stats reset time unknown".to_string(), + }; + + Ok(GenerationRate { + bytes_per_second, + measurement_note: note, + }) + } + Err(_) => { + // pg_stat_wal not available (PG < 14) + Ok(GenerationRate { + bytes_per_second: None, + measurement_note: "pg_stat_wal not available (requires PostgreSQL 14+)".to_string(), + }) + } + } +} + +fn analyze_issues( + wal_dir: &WalDirectory, + archive: &ArchiveStatus, + wal_level: &str, +) -> Vec { + let mut issues = Vec::new(); + + // Check WAL directory size + if let Some(size) = wal_dir.size_bytes { + if size >= WAL_DIR_CRITICAL_BYTES { + issues.push(WalIssue { + code: "wal_dir_critical".to_string(), + message: format!( + "WAL directory is {} - check replication slots and archive status", + format_bytes(size) + ), + severity: WalStatus::Critical, + }); + } else if size >= WAL_DIR_WARNING_BYTES { + issues.push(WalIssue { + code: "wal_dir_large".to_string(), + message: format!( + "WAL directory is {} - monitor for growth", + format_bytes(size) + ), + severity: WalStatus::Warning, + }); + } + } + + // Check archive failures + if archive.enabled && archive.failed_count > 0 { + issues.push(WalIssue { + code: "archive_failures".to_string(), + message: format!( + "{} archive failures - check archive_command and destination", + archive.failed_count + ), + severity: if archive.failed_count > 10 { + WalStatus::Critical + } else { + WalStatus::Warning + }, + }); + } + + // Check archive lag + if let Some(pending) = archive.pending_count { + if pending >= ARCHIVE_LAG_CRITICAL_FILES { + issues.push(WalIssue { + code: "archive_lag_critical".to_string(), + message: format!("{} WAL files pending archive", pending), + severity: WalStatus::Critical, + }); + } else if pending >= ARCHIVE_LAG_WARNING_FILES { + issues.push(WalIssue { + code: "archive_lag".to_string(), + message: format!("{} WAL files pending archive", pending), + severity: WalStatus::Warning, + }); + } + } + + // Check wal_level for replication capability + if wal_level == "minimal" { + issues.push(WalIssue { + code: "wal_level_minimal".to_string(), + message: "wal_level=minimal - replication and PITR not possible".to_string(), + severity: WalStatus::Warning, + }); + } + + // Check archive disabled with replication-capable wal_level + if !archive.enabled && (wal_level == "replica" || wal_level == "logical") { + issues.push(WalIssue { + code: "archive_disabled".to_string(), + message: "Archiving disabled - point-in-time recovery not possible".to_string(), + severity: WalStatus::Warning, + }); + } + + issues +} + +fn calculate_overall_status(issues: &[WalIssue]) -> WalStatus { + if issues.iter().any(|i| i.severity == WalStatus::Critical) { + WalStatus::Critical + } else if issues.iter().any(|i| i.severity == WalStatus::Warning) { + WalStatus::Warning + } else { + WalStatus::Healthy + } +} + +pub async fn get_wal(client: &Client) -> Result { + let (wal_level, wal_segment_size) = get_wal_settings(client).await?; + let current_lsn = get_current_lsn(client).await?; + let wal_directory = get_wal_directory_info(client).await?; + let archiving = get_archive_status(client).await?; + let generation_rate = get_generation_rate(client).await?; + + let issues = analyze_issues(&wal_directory, &archiving, &wal_level); + let overall_status = calculate_overall_status(&issues); + + Ok(WalResult { + wal_level, + current_wal_lsn: current_lsn, + wal_segment_size_bytes: wal_segment_size, + wal_directory, + archiving, + generation_rate, + issues, + overall_status, + }) +} + +fn format_bytes(bytes: i64) -> String { + if bytes >= 1_073_741_824 { + format!("{:.1} GB", bytes as f64 / 1_073_741_824.0) + } else if bytes >= 1_048_576 { + format!("{:.1} MB", bytes as f64 / 1_048_576.0) + } else if bytes >= 1024 { + format!("{:.1} KB", bytes as f64 / 1024.0) + } else { + format!("{} B", bytes) + } +} + +fn status_emoji(status: &WalStatus) -> &'static str { + match status { + WalStatus::Healthy => "✓", + WalStatus::Warning => "⚠", + WalStatus::Critical => "✗", + } +} + +pub fn print_human(result: &WalResult, quiet: bool) { + if !quiet { + println!( + "WAL HEALTH: {} {}", + status_emoji(&result.overall_status), + match result.overall_status { + WalStatus::Healthy => "healthy", + WalStatus::Warning => "warning", + WalStatus::Critical => "critical", + } + ); + println!(); + } + + // WAL Settings + println!("WAL CONFIGURATION:"); + println!(" wal_level: {}", result.wal_level); + println!( + " segment_size: {}", + format_bytes(result.wal_segment_size_bytes) + ); + println!(" current_lsn: {}", result.current_wal_lsn); + println!(); + + // WAL Directory + println!("WAL DIRECTORY:"); + match ( + result.wal_directory.size_bytes, + result.wal_directory.segment_count, + ) { + (Some(size), Some(count)) => { + println!(" size: {}", format_bytes(size)); + println!(" segment_count: {}", count); + } + _ => { + println!(" (requires pg_monitor or superuser to read pg_ls_waldir)"); + } + } + println!(); + + // Archive Status + println!("ARCHIVING:"); + println!( + " mode: {}", + if result.archiving.enabled { + "enabled" + } else { + "disabled" + } + ); + if let Some(ref cmd) = result.archiving.archive_command { + let cmd_display = if cmd.len() > 50 { + format!("{}...", &cmd[..47]) + } else { + cmd.clone() + }; + println!(" command: {}", cmd_display); + } + if result.archiving.enabled { + if let Some(ref wal) = result.archiving.last_archived_wal { + println!(" last_archived: {}", wal); + } + if let Some(pending) = result.archiving.pending_count { + println!(" pending_files: {}", pending); + } + if result.archiving.failed_count > 0 { + println!(" failed_count: {}", result.archiving.failed_count); + if let Some(ref wal) = result.archiving.last_failed_wal { + println!(" last_failed: {}", wal); + } + } + } + println!(); + + // Generation Rate + println!("WAL GENERATION:"); + match result.generation_rate.bytes_per_second { + Some(rate) => { + println!(" rate: {}/s", format_bytes(rate as i64)); + println!( + " note: {}", + result.generation_rate.measurement_note + ); + } + None => { + println!(" {}", result.generation_rate.measurement_note); + } + } + + // Issues + if !result.issues.is_empty() { + println!(); + println!("ISSUES:"); + for issue in &result.issues { + println!(" {} {}", status_emoji(&issue.severity), issue.message); + } + } +} + +pub fn print_json( + result: &WalResult, + timeouts: Option, +) -> Result<()> { + use crate::output::{schema, DiagnosticOutput, Severity}; + + let severity = match result.overall_status { + WalStatus::Healthy => Severity::Healthy, + WalStatus::Warning => Severity::Warning, + WalStatus::Critical => Severity::Critical, + }; + + let output = match timeouts { + Some(t) => DiagnosticOutput::with_timeouts(schema::WAL, result, severity, t), + None => DiagnosticOutput::new(schema::WAL, result, severity), + }; + output.print()?; + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_format_bytes() { + assert_eq!(format_bytes(500), "500 B"); + assert_eq!(format_bytes(1024), "1.0 KB"); + assert_eq!(format_bytes(1_500_000), "1.4 MB"); + assert_eq!(format_bytes(2_000_000_000), "1.9 GB"); + } + + #[test] + fn test_calculate_overall_status() { + let empty: Vec = vec![]; + assert_eq!(calculate_overall_status(&empty), WalStatus::Healthy); + + let warning = vec![WalIssue { + code: "test".to_string(), + message: "test".to_string(), + severity: WalStatus::Warning, + }]; + assert_eq!(calculate_overall_status(&warning), WalStatus::Warning); + + let critical = vec![WalIssue { + code: "test".to_string(), + message: "test".to_string(), + severity: WalStatus::Critical, + }]; + assert_eq!(calculate_overall_status(&critical), WalStatus::Critical); + } +} diff --git a/src/main.rs b/src/main.rs index 0c84344..d195c6d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -848,6 +848,8 @@ enum DbaCommands { AutovacuumProgress, /// Review PostgreSQL configuration settings Config, + /// Monitor WAL generation, archiving, and disk consumption + Wal, } /// Schema and permission inspection commands @@ -2007,6 +2009,22 @@ async fn run(cli: Cli, output: &Output) -> Result<()> { std::process::exit(code); } } + DbaCommands::Wal => { + let result = commands::wal::get_wal(client).await?; + + if cli.json { + commands::wal::print_json(&result, timeouts)?; + } else { + commands::wal::print_human(&result, cli.quiet); + } + + // Exit code based on overall status + let is_critical = result.overall_status == commands::wal::WalStatus::Critical; + let is_warning = result.overall_status == commands::wal::WalStatus::Warning; + if let Some(code) = exit_codes::for_finding(cli.json, is_critical, is_warning) { + std::process::exit(code); + } + } } } Commands::Inspect { command } => { diff --git a/src/output.rs b/src/output.rs index 2a9aa43..12fa83d 100644 --- a/src/output.rs +++ b/src/output.rs @@ -438,6 +438,7 @@ pub mod schema { pub const CHECKPOINTS: &str = "pgcrate.diagnostics.checkpoints"; pub const AUTOVACUUM_PROGRESS: &str = "pgcrate.diagnostics.autovacuum_progress"; pub const CONFIG: &str = "pgcrate.diagnostics.config"; + pub const WAL: &str = "pgcrate.diagnostics.wal"; } // ============================================================================= diff --git a/tests/diagnostics/mod.rs b/tests/diagnostics/mod.rs index 9c79ffb..003f767 100644 --- a/tests/diagnostics/mod.rs +++ b/tests/diagnostics/mod.rs @@ -6,3 +6,4 @@ mod locks; mod maintenance; mod replication; mod sequences_scenarios; +mod wal; diff --git a/tests/diagnostics/wal.rs b/tests/diagnostics/wal.rs new file mode 100644 index 0000000..2a538f0 --- /dev/null +++ b/tests/diagnostics/wal.rs @@ -0,0 +1,245 @@ +//! Integration tests for WAL diagnostic command. +//! +//! Tests run against a standard PostgreSQL instance (typically with archiving disabled), +//! testing the basic WAL status path and JSON structure. + +use crate::common::{parse_json, stdout, TestDatabase, TestProject}; + +#[test] +fn test_wal_basic() { + skip_if_no_db!(); + let db = TestDatabase::new(); + let project = TestProject::empty(&db); + + std::fs::write( + project.path("pgcrate.toml"), + format!( + r#"[database] +url = "{}" +"#, + db.url() + ), + ) + .unwrap(); + + // WAL command should succeed (may return warning for disabled archiving) + let output = project.run_pgcrate(&["dba", "wal"]); + + // Exit code 0 (healthy) or 1 (warning) are both acceptable + assert!( + output.status.code() == Some(0) || output.status.code() == Some(1), + "WAL command should return healthy or warning status, got: {:?}", + output.status.code() + ); +} + +#[test] +fn test_wal_json_structure() { + skip_if_no_db!(); + let db = TestDatabase::new(); + let project = TestProject::empty(&db); + + std::fs::write( + project.path("pgcrate.toml"), + format!( + r#"[database] +url = "{}" +"#, + db.url() + ), + ) + .unwrap(); + + let output = project.run_pgcrate(&["dba", "wal", "--json"]); + + // May be warning (exit code 1) if archiving is disabled + assert!( + output.status.code() == Some(0) || output.status.code() == Some(1), + "WAL command should return healthy or warning status" + ); + + let json = parse_json(&output); + assert!(json.is_object(), "Should return JSON object"); + + // Schema versioning fields + assert_eq!(json.get("ok"), Some(&serde_json::json!(true))); + assert_eq!( + json.get("schema_id"), + Some(&serde_json::json!("pgcrate.diagnostics.wal")) + ); + assert!(json.get("schema_version").is_some()); + assert!(json.get("tool_version").is_some()); + + // Data fields + let data = json.get("data").expect("JSON should have data field"); + assert!(data.get("wal_level").is_some(), "Should have wal_level"); + assert!( + data.get("current_wal_lsn").is_some(), + "Should have current_wal_lsn" + ); + assert!( + data.get("wal_segment_size_bytes").is_some(), + "Should have wal_segment_size_bytes" + ); + assert!( + data.get("wal_directory").is_some(), + "Should have wal_directory" + ); + assert!(data.get("archiving").is_some(), "Should have archiving"); + assert!( + data.get("generation_rate").is_some(), + "Should have generation_rate" + ); + assert!(data.get("issues").is_some(), "Should have issues array"); + assert!( + data.get("overall_status").is_some(), + "Should have overall_status" + ); +} + +#[test] +fn test_wal_shows_wal_level() { + skip_if_no_db!(); + let db = TestDatabase::new(); + let project = TestProject::empty(&db); + + std::fs::write( + project.path("pgcrate.toml"), + format!( + r#"[database] +url = "{}" +"#, + db.url() + ), + ) + .unwrap(); + + let output = project.run_pgcrate(&["dba", "wal", "--json"]); + let json = parse_json(&output); + let data = json.get("data").expect("JSON should have data field"); + + // wal_level should be a valid PostgreSQL wal_level + let wal_level = data + .get("wal_level") + .and_then(|v| v.as_str()) + .expect("wal_level should be a string"); + + assert!( + ["minimal", "replica", "logical"].contains(&wal_level), + "wal_level should be minimal, replica, or logical, got: {}", + wal_level + ); +} + +#[test] +fn test_wal_archiving_structure() { + skip_if_no_db!(); + let db = TestDatabase::new(); + let project = TestProject::empty(&db); + + std::fs::write( + project.path("pgcrate.toml"), + format!( + r#"[database] +url = "{}" +"#, + db.url() + ), + ) + .unwrap(); + + let output = project.run_pgcrate(&["dba", "wal", "--json"]); + let json = parse_json(&output); + let data = json.get("data").expect("JSON should have data field"); + let archiving = data.get("archiving").expect("Should have archiving object"); + + // Check archiving fields + assert!( + archiving.get("enabled").is_some(), + "archiving should have enabled field" + ); + assert!( + archiving.get("archive_mode").is_some(), + "archiving should have archive_mode field" + ); + assert!( + archiving.get("failed_count").is_some(), + "archiving should have failed_count field" + ); +} + +#[test] +fn test_wal_human_output() { + skip_if_no_db!(); + let db = TestDatabase::new(); + let project = TestProject::empty(&db); + + std::fs::write( + project.path("pgcrate.toml"), + format!( + r#"[database] +url = "{}" +"#, + db.url() + ), + ) + .unwrap(); + + let output = project.run_pgcrate(&["dba", "wal"]); + let out = stdout(&output); + + // Should show WAL health header + assert!( + out.contains("WAL HEALTH") || out.contains("WAL CONFIGURATION"), + "Should show WAL sections: {}", + out + ); + + // Should show wal_level + assert!( + out.contains("wal_level") || out.contains("replica") || out.contains("logical"), + "Should show wal_level info: {}", + out + ); + + // Should show archiving section + assert!( + out.contains("ARCHIVING") || out.contains("archive"), + "Should show archiving info: {}", + out + ); +} + +#[test] +fn test_wal_segment_size_is_valid() { + skip_if_no_db!(); + let db = TestDatabase::new(); + let project = TestProject::empty(&db); + + std::fs::write( + project.path("pgcrate.toml"), + format!( + r#"[database] +url = "{}" +"#, + db.url() + ), + ) + .unwrap(); + + let output = project.run_pgcrate(&["dba", "wal", "--json"]); + let json = parse_json(&output); + let data = json.get("data").expect("JSON should have data field"); + + let segment_size = data + .get("wal_segment_size_bytes") + .and_then(|v| v.as_i64()) + .expect("wal_segment_size_bytes should be an integer"); + + // Default is 16MB, but can be configured between 1MB and 1GB + assert!( + (1_048_576..=1_073_741_824).contains(&segment_size), + "wal_segment_size should be between 1MB and 1GB, got: {}", + segment_size + ); +} From d3d1f9760da3b05939e9c935d623997191d54dcb Mon Sep 17 00:00:00 2001 From: Jack Schultz Date: Wed, 10 Jun 2026 08:51:43 -0500 Subject: [PATCH 2/2] Review: land dba wal v0.6.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fixed: CHANGELOG v0.6.0 referenced nonexistent 'pgcrate inspect capabilities'; corrected to 'pgcrate capabilities' - Noted: check_function_access is net-new code in this commit (not a repair of pre-existing code as the task framing implied); it correctly uses query() not query_one() — no LIMIT 0 trap remains in capabilities.rs - Noted: 18 pre-existing integration failures (doctor/extension/role/timeout) confirmed identical on main — environmental drift, out of scope --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d1d2cd..445499f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,7 @@ ### Improvements -- **`pgcrate inspect capabilities`**: Reports the `diagnostics.wal` capability +- **`pgcrate capabilities`**: Reports the `diagnostics.wal` capability - Checks `pg_stat_archiver` SELECT and `pg_ls_waldir()` access - Degrades gracefully when `pg_ls_waldir()` is unavailable (directory size and pending file counts omitted)