From 4ec88f452b33d90c5e65af49bec38ca92e07a809 Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Thu, 7 May 2026 12:55:58 -0500 Subject: [PATCH] =?UTF-8?q?Fix=20#938=20=E2=80=94=20preserve=20mute=20rule?= =?UTF-8?q?s=20across=20size-triggered=20DB=20reset?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When monitor.duckdb exceeds 512 MB, ArchiveAllAndResetAsync deletes the file and reinitializes empty tables. config_mute_rules was not in ArchivableTables, so all mute rules — including permanent rules with expires_at_utc = NULL — were silently lost. Export config_mute_rules and dismissed_archive_alerts to a temp Parquet dir before the reset and re-import after. Parquet roundtrip keeps this schema-agnostic. On restore failure, the temp dir is retained for manual recovery. Co-Authored-By: Claude Opus 4.7 (1M context) --- Lite.Tests/MuteRulesSurviveResetTests.cs | 153 +++++++++++++++++++++++ Lite/Services/ArchiveService.cs | 87 ++++++++++++- 2 files changed, 239 insertions(+), 1 deletion(-) create mode 100644 Lite.Tests/MuteRulesSurviveResetTests.cs diff --git a/Lite.Tests/MuteRulesSurviveResetTests.cs b/Lite.Tests/MuteRulesSurviveResetTests.cs new file mode 100644 index 0000000..b8a1eb2 --- /dev/null +++ b/Lite.Tests/MuteRulesSurviveResetTests.cs @@ -0,0 +1,153 @@ +using System; +using System.IO; +using System.Threading.Tasks; +using DuckDB.NET.Data; +using PerformanceMonitorLite.Database; +using PerformanceMonitorLite.Services; +using Xunit; + +namespace PerformanceMonitorLite.Tests; + +/// +/// Issue #938 — mute rules (especially expires_at_utc = NULL "permanent" rules) +/// were silently lost when ArchiveAllAndResetAsync fired due to the 512 MB size threshold. +/// The reset deletes monitor.duckdb outright, and config_mute_rules was not preserved. +/// +public class MuteRulesSurviveResetTests : IDisposable +{ + private readonly string _tempDir; + private readonly string _dbPath; + private readonly string _archiveDir; + + public MuteRulesSurviveResetTests() + { + _tempDir = Path.Combine(Path.GetTempPath(), "LiteTests_" + Guid.NewGuid().ToString("N")[..8]); + Directory.CreateDirectory(_tempDir); + _dbPath = Path.Combine(_tempDir, "test.duckdb"); + _archiveDir = Path.Combine(_tempDir, "archive"); + Directory.CreateDirectory(_archiveDir); + } + + public void Dispose() + { + try + { + if (Directory.Exists(_tempDir)) + Directory.Delete(_tempDir, recursive: true); + } + catch + { + /* Best-effort cleanup */ + } + } + + [Fact] + public async Task PermanentMuteRule_SurvivesArchiveAllAndReset() + { + var initializer = new DuckDbInitializer(_dbPath); + await initializer.InitializeAsync(); + + var ruleId = Guid.NewGuid().ToString(); + var createdAt = new DateTime(2026, 5, 1, 12, 0, 0, DateTimeKind.Utc); + + await InsertMuteRuleAsync(ruleId, createdAt, expiresAtUtc: null, + serverName: "ProdSql01", metricName: "Blocking Detected"); + + var archiveService = new ArchiveService(initializer, _archiveDir); + await archiveService.ArchiveAllAndResetAsync(); + + var (count, expiresIsNull, serverName) = await ReadMuteRuleAsync(ruleId); + + Assert.Equal(1, count); + Assert.True(expiresIsNull); + Assert.Equal("ProdSql01", serverName); + } + + [Fact] + public async Task ExpiringMuteRule_SurvivesArchiveAllAndReset() + { + var initializer = new DuckDbInitializer(_dbPath); + await initializer.InitializeAsync(); + + var ruleId = Guid.NewGuid().ToString(); + var createdAt = DateTime.UtcNow; + var expiresAt = createdAt.AddDays(7); + + await InsertMuteRuleAsync(ruleId, createdAt, expiresAt, + serverName: "ProdSql02", metricName: "Long-Running Job"); + + var archiveService = new ArchiveService(initializer, _archiveDir); + await archiveService.ArchiveAllAndResetAsync(); + + var (count, expiresIsNull, serverName) = await ReadMuteRuleAsync(ruleId); + + Assert.Equal(1, count); + Assert.False(expiresIsNull); + Assert.Equal("ProdSql02", serverName); + } + + [Fact] + public async Task EmptyMuteRulesTable_DoesNotBreakReset() + { + var initializer = new DuckDbInitializer(_dbPath); + await initializer.InitializeAsync(); + + var archiveService = new ArchiveService(initializer, _archiveDir); + await archiveService.ArchiveAllAndResetAsync(); + + using var connection = new DuckDBConnection($"Data Source={_dbPath}"); + await connection.OpenAsync(TestContext.Current.CancellationToken); + using var cmd = connection.CreateCommand(); + cmd.CommandText = "SELECT COUNT(*) FROM config_mute_rules"; + var count = Convert.ToInt64(await cmd.ExecuteScalarAsync(TestContext.Current.CancellationToken)); + + Assert.Equal(0, count); + } + + private async Task InsertMuteRuleAsync( + string id, + DateTime createdAt, + DateTime? expiresAtUtc, + string serverName, + string metricName) + { + using var connection = new DuckDBConnection($"Data Source={_dbPath}"); + await connection.OpenAsync(TestContext.Current.CancellationToken); + using var cmd = connection.CreateCommand(); + cmd.CommandText = @" +INSERT INTO config_mute_rules + (id, enabled, created_at_utc, expires_at_utc, reason, + server_name, metric_name, database_pattern, + query_text_pattern, wait_type_pattern, job_name_pattern) +VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)"; + cmd.Parameters.Add(new DuckDBParameter { Value = id }); + cmd.Parameters.Add(new DuckDBParameter { Value = true }); + cmd.Parameters.Add(new DuckDBParameter { Value = createdAt }); + cmd.Parameters.Add(new DuckDBParameter { Value = (object?)expiresAtUtc ?? DBNull.Value }); + cmd.Parameters.Add(new DuckDBParameter { Value = "test rule" }); + cmd.Parameters.Add(new DuckDBParameter { Value = serverName }); + cmd.Parameters.Add(new DuckDBParameter { Value = metricName }); + cmd.Parameters.Add(new DuckDBParameter { Value = DBNull.Value }); + cmd.Parameters.Add(new DuckDBParameter { Value = DBNull.Value }); + cmd.Parameters.Add(new DuckDBParameter { Value = DBNull.Value }); + cmd.Parameters.Add(new DuckDBParameter { Value = DBNull.Value }); + await cmd.ExecuteNonQueryAsync(TestContext.Current.CancellationToken); + } + + private async Task<(int Count, bool ExpiresIsNull, string ServerName)> ReadMuteRuleAsync(string id) + { + using var connection = new DuckDBConnection($"Data Source={_dbPath}"); + await connection.OpenAsync(TestContext.Current.CancellationToken); + using var cmd = connection.CreateCommand(); + cmd.CommandText = "SELECT expires_at_utc, server_name FROM config_mute_rules WHERE id = $1"; + cmd.Parameters.Add(new DuckDBParameter { Value = id }); + + using var reader = await cmd.ExecuteReaderAsync(TestContext.Current.CancellationToken); + if (!await reader.ReadAsync(TestContext.Current.CancellationToken)) + return (0, false, ""); + + var expiresIsNull = reader.IsDBNull(0); + var serverName = reader.GetString(1); + return (1, expiresIsNull, serverName); + } +} diff --git a/Lite/Services/ArchiveService.cs b/Lite/Services/ArchiveService.cs index ec69121..f0f55cc 100644 --- a/Lite/Services/ArchiveService.cs +++ b/Lite/Services/ArchiveService.cs @@ -42,6 +42,16 @@ public static bool IsArchiving private set => s_isArchiving = value; } + /* Config tables that must be preserved through ArchiveAllAndResetAsync. + These hold user configuration (not time-series) and must survive when the + size threshold trips a database reset. Issue #938 — permanent mute rules + were silently lost because ResetDatabaseAsync deletes monitor.duckdb. */ + private static readonly string[] PreservedConfigTables = + [ + "config_mute_rules", + "dismissed_archive_alerts" + ]; + /* Tables eligible for archival with their time column. IMPORTANT: Every table with time-series data must be listed here, or it will grow unbounded and push the DB past the 512 MB reset threshold. */ @@ -502,12 +512,16 @@ public async Task ArchiveAllAndResetAsync() } IsArchiving = true; + var preserveDir = Path.Combine(Path.GetTempPath(), $"pm_preserve_{Guid.NewGuid():N}"); + var preservedFiles = new Dictionary(); try { var timestamp = DateTime.UtcNow.ToString("yyyyMMdd_HHmm"); _logger?.LogInformation("Archiving ALL data to Parquet (prefix: {Timestamp}) and resetting database", timestamp); + Directory.CreateDirectory(preserveDir); + /* Export everything under write lock */ using (_duckDb.AcquireWriteLock()) { @@ -541,6 +555,32 @@ Archive views use glob (*_table.parquet) to pick up all files. */ _logger?.LogError(ex, "Failed to archive table {Table}", table); } } + + /* Preserve config tables that must survive the reset (issue #938). + Written to a temp dir, not the archive dir — these are restored + into the new database, not exposed via archive views. */ + foreach (var table in PreservedConfigTables) + { + try + { + using var countCmd = connection.CreateCommand(); + countCmd.CommandText = $"SELECT COUNT(*) FROM {table}"; + var rowCount = Convert.ToInt64(await countCmd.ExecuteScalarAsync()); + if (rowCount == 0) continue; + + var preservePath = Path.Combine(preserveDir, $"{table}.parquet").Replace("\\", "/"); + using var exportCmd = connection.CreateCommand(); + exportCmd.CommandText = $"COPY (SELECT * FROM {table}) TO '{EscapeSqlPath(preservePath)}' (FORMAT PARQUET)"; + await exportCmd.ExecuteNonQueryAsync(); + preservedFiles[table] = preservePath; + + _logger?.LogInformation("Preserved {Count} rows from {Table} for restoration after reset", rowCount, table); + } + catch (Exception ex) + { + _logger?.LogError(ex, "Failed to preserve {Table} before reset — rows will be lost", table); + } + } } /* Compact per-cycle files into monthly parquet files before reset. @@ -553,11 +593,56 @@ and only touches filesystem files — no contention with collectors. */ _logger?.LogInformation("Deleting and reinitializing database"); await _duckDb.ResetDatabaseAsync(); + /* Restore preserved config rows into the freshly initialized tables. */ + var allRestoresSucceeded = true; + if (preservedFiles.Count > 0) + { + using (_duckDb.AcquireWriteLock()) + { + using var connection = _duckDb.CreateConnection(); + await connection.OpenAsync(); + foreach (var (table, path) in preservedFiles) + { + try + { + using var insertCmd = connection.CreateCommand(); + insertCmd.CommandText = $"INSERT INTO {table} SELECT * FROM read_parquet('{EscapeSqlPath(path)}')"; + await insertCmd.ExecuteNonQueryAsync(); + _logger?.LogInformation("Restored rows to {Table} after database reset", table); + } + catch (Exception ex) + { + allRestoresSucceeded = false; + _logger?.LogError(ex, "Failed to restore {Table} from {Path} — preservation files retained for manual recovery", table, path); + } + } + } + } + _logger?.LogInformation("Database reset complete — archive views now serve all historical data from Parquet"); + + /* Clean up temp preservation dir only if every restore succeeded. + On failure, leave the parquet files so the user can recover manually. */ + if (allRestoresSucceeded) + { + try + { + if (Directory.Exists(preserveDir)) + Directory.Delete(preserveDir, recursive: true); + } + catch (Exception ex) + { + _logger?.LogWarning(ex, "Could not clean up preservation temp dir {Dir}", preserveDir); + } + } + else + { + _logger?.LogWarning("Preservation files retained at {Dir} for manual recovery", preserveDir); + } } catch (Exception ex) { - _logger?.LogError(ex, "Archive-all-and-reset failed"); + _logger?.LogError(ex, "Archive-all-and-reset failed — preservation files (if any) retained at {Dir}", preserveDir); } finally {