From 200d0ea91b4f42b50f08c83da0c5712c8d6d8335 Mon Sep 17 00:00:00 2001 From: Adam Cimarosti Date: Thu, 19 Feb 2026 11:33:18 +0000 Subject: [PATCH 1/5] replication wal cleaner docs --- .../_replication.config.json | 52 +++ documentation/high-availability/setup.md | 28 +- .../high-availability/wal-cleanup.md | 324 ++++++++++++++++++ documentation/operations/backup.md | 16 + documentation/query/sql/checkpoint.md | 13 + documentation/sidebars.js | 5 + 6 files changed, 420 insertions(+), 18 deletions(-) create mode 100644 documentation/high-availability/wal-cleanup.md diff --git a/documentation/configuration/configuration-utils/_replication.config.json b/documentation/configuration/configuration-utils/_replication.config.json index b78eb20a6..a42104620 100644 --- a/documentation/configuration/configuration-utils/_replication.config.json +++ b/documentation/configuration/configuration-utils/_replication.config.json @@ -94,5 +94,57 @@ "native.max.blocking.threads": { "default": "cpuCount * 4", "description": "Maximum number of threads for parallel blocking disk IO read/write operations for replication (and other). These threads are ephemeral: They are spawned per need and shut down after a short duration if no longer in use. These are not cpu-bound threads, hence the relative large number. The default should be appropriate for most use cases." + }, + "replication.primary.cleaner.enabled": { + "default": "true", + "description": "Master switch for the WAL cleaner." + }, + "replication.primary.cleaner.interval": { + "default": "10m", + "description": "Time between cleanup cycles. Range: 1s – 24h." + }, + "replication.primary.cleaner.checkpoint.source": { + "default": "true", + "description": "Use checkpoint history as a cleanup trigger source." + }, + "replication.primary.cleaner.backup.window.count": { + "default": "backup.cleanup.keep.latest.n or 5", + "description": "Minimum complete backups/checkpoints per instance before cleanup starts. Defaults to `backup.cleanup.keep.latest.n` if backups are enabled, otherwise `5`." + }, + "replication.primary.cleaner.delete.concurrency": { + "default": "4 – 12 (auto)", + "description": "Concurrent deletion tasks. Derived from `replication.requests.max.concurrent`. Range: 4 – 32." + }, + "replication.primary.cleaner.max.requests.per.second": { + "default": "service-dependent", + "description": "Rate limit for object store delete requests. Set to `0` for unlimited. Range: 0 – 10000." + }, + "replication.primary.cleaner.progress.write.interval": { + "default": "5s", + "description": "How often progress is persisted during a cleanup cycle. Lower values mean less re-work after a crash but more writes. Range: 100ms – 60s." + }, + "replication.primary.cleaner.dropped.table.cooloff": { + "default": "1h", + "description": "Wait time after `DROP TABLE` before removing the table's data from object storage. Guards against clock skew." + }, + "replication.primary.cleaner.retry.attempts": { + "default": "20", + "description": "Retries for transient object store failures during cleanup. Range: 0 – 100." + }, + "replication.primary.cleaner.retry.interval": { + "default": "2s", + "description": "Delay between cleanup retries. Range: 0 – 5m." + }, + "checkpoint.history.enabled": { + "default": "true (when replication is enabled)", + "description": "Enable the checkpoint history tracker. Requires replication." + }, + "checkpoint.history.keep.count": { + "default": "100", + "description": "Maximum checkpoint records retained per instance." + }, + "checkpoint.history.long.retry.interval": { + "default": "1m", + "description": "Retry interval for syncing checkpoint history to the object store after burst retries fail." } } diff --git a/documentation/high-availability/setup.md b/documentation/high-availability/setup.md index 8dfebfe89..e279346e6 100644 --- a/documentation/high-availability/setup.md +++ b/documentation/high-availability/setup.md @@ -40,9 +40,6 @@ Create an S3 bucket following **Recommendations:** - Select a region close to your primary node - Disable blob versioning -- Set up a - [lifecycle policy](https://docs.aws.amazon.com/AmazonS3/latest/userguide/how-to-set-lifecycle-configuration-intro.html) - to manage WAL file retention (see [Snapshot and expiration policies](#snapshot-and-expiration-policies)) **Connection string:** @@ -73,9 +70,6 @@ then create a Blob Container. **Recommendations:** - Select a region close to your primary node - Disable blob versioning -- Set up - [Lifecycle Management](https://learn.microsoft.com/en-us/azure/storage/blobs/lifecycle-management-policy-configure?tabs=azure-portal) - for WAL file retention **Connection string:** @@ -158,9 +152,7 @@ nodes. See [Backup and restore](/docs/operations/backup/) for the full procedure. :::tip -Set up regular snapshots (daily or weekly). See -[Snapshot and expiration policies](#snapshot-and-expiration-policies) for -guidance. +Set up regular snapshots (daily or weekly). ::: ## 4. Configure replica node(s) @@ -197,18 +189,18 @@ export QDB_REPLICATION_OBJECT_STORE="azblob::..." For tuning options, see the [Tuning guide](/docs/high-availability/tuning/). -## Snapshot and expiration policies +## WAL data cleanup -WAL files are typically read by replicas shortly after upload. To optimize -costs, move files to cooler storage tiers after 1-7 days. +Replicated WAL data accumulates in object storage over time. The **WAL +cleaner** runs on the primary node and automatically removes data that is no +longer needed, based on your backup and checkpoint history. -**Recommendations:** -- Take snapshots every 1-7 days -- Keep WAL files for at least 30 days -- Ensure snapshot interval is shorter than WAL expiration +The cleaner is enabled by default and requires no configuration when backups +or checkpoint history are active. By default, it retains replication data +for the most recent 5 backups or checkpoints and deletes everything older. -Example: Weekly snapshots + 30-day WAL retention = ability to restore up to 23 -days back. Daily snapshots restore faster but use more storage. +See the [WAL Cleanup guide](/docs/high-availability/wal-cleanup/) for +configuration options, tuning, and troubleshooting. ## Disaster recovery diff --git a/documentation/high-availability/wal-cleanup.md b/documentation/high-availability/wal-cleanup.md new file mode 100644 index 000000000..51b9c0efc --- /dev/null +++ b/documentation/high-availability/wal-cleanup.md @@ -0,0 +1,324 @@ +--- +title: WAL cleanup +sidebar_label: WAL Cleanup +description: + Configure automatic cleanup of replicated WAL data in object storage to + control storage costs. +--- + +import { EnterpriseNote } from "@site/src/components/EnterpriseNote" + + + Automatic cleanup of replicated WAL data in object storage. + + +QuestDB's [replication feature](/docs/high-availability/setup/) streams +write-ahead log (WAL) data from a primary node to object storage, where replica +nodes consume it. Without cleanup, this replicated WAL data accumulates +indefinitely. The WAL cleaner runs on the primary node and automatically deletes +data that is no longer needed, based on your backup and checkpoint history, +keeping storage usage under control. + +The WAL cleaner is _enabled by default_ and keeps as much replication data as your latest N backups or checkpoints: + +```ini +# server.conf (the are defaults — no action needed) +replication.primary.cleaner.enabled=true +replication.primary.cleaner.backup.window.count=5 +``` + +Each WAL cleaner cycle runs every 10 minutes by default +(`replication.primary.cleaner.interval`). + +The cleaner requires at least one **cleanup trigger source** before it will delete +anything. The two supported sources are: + +- **[Enterprise backups](/docs/operations/backup/)** — the cleaner reads backup + manifests to determine what can be safely deleted +- **[Checkpoint history](#integrating-with-the-sql-checkpoint-commands)** — the cleaner reads + `CHECKPOINT RELEASE` records synced to the replication object store + +Both sources are enabled by default when replication is active. If you only +use one backup method, the cleaner simply ignores the source that has no +history. + +The core principle is simple: the cleaner retains enough WAL data to support +your most recent N backups or checkpoints, and deletes everything older. + + +## Integrating with Enterprise backups + +The cleaner automatically reads your backup manifests to determine what can be +safely deleted. The backup feature must be enabled and configured on the +primary, even if you only run backups from a replica. + +```ini +# server.conf (primary) +replication.role=primary +replication.object.store=... +backup.enabled=true +backup.object.store=s3::bucket=my-backup-bucket;... # same on all cluster nodes +``` + +The cleaner waits until at least N complete backups exist before it starts +deleting anything. N defaults to your +[`backup.cleanup.keep.latest.n`](/docs/operations/backup/#backup-retention) +setting (itself default 5) and can be overridden with +`replication.primary.cleaner.backup.window.count`. For example, with the default +of 5 the cleaner deletes data older than the 5th-newest complete backup. + +:::warning +All nodes in a replication cluster should use the **same `backup.object.store`** +connection string. The cleaner on the primary reads backup manifests from every +node to compute the cleanup boundary. If nodes back up to different object +stores, the cleaner cannot see all manifests and will not trigger correctly. +::: + +## Integrating with the SQL CHECKPOINT commands + +If you take filesystem snapshots, AWS EBS volume snapshots, or use custom backup +scripts that issue `CHECKPOINT` / `CHECKPOINT RELEASE`, checkpoint history +tracking is all you need. + +Both `checkpoint.history.enabled` and +`replication.primary.cleaner.checkpoint.source` default to `true` when +replication is enabled, so no extra configuration is required: + +```ini +# server.conf — checkpoint history works out of the box +replication.role=primary # or replica +replication.object.store=... +``` + +Checkpoint history does not need to be configured on the primary. It only needs +to be enabled on the node(s) where you actually run checkpoints. For example, +you might run a primary and two replicas, and back up both replicas but not the +primary. As long as each node that issues checkpoints is part of the same +replication cluster and has checkpoint history enabled, the cleaner on the +primary will see their checkpoint records. + +Each time `CHECKPOINT RELEASE` runs on any node with checkpoint history enabled, +QuestDB records the per-table transaction state to the shared replication object +store. The cleaner uses these records the same way it uses backup manifests. + +As with backups, the cleaner waits until at least N complete checkpoints exist +before deleting anything. N is controlled by +`replication.primary.cleaner.backup.window.count` (default 5). + +Checkpoint records are synced to the replication object store at +`checkpoint_history/{instance_name}/history.msgpack`. If the sync fails +transiently, QuestDB retries in the background (controlled by +`checkpoint.history.long.retry.interval`). + +:::note +`CHECKPOINT` itself is available in both OSS and Enterprise, but checkpoint +history tracking, the mechanism that syncs checkpoint records to the +replication object store for WAL cleanup, requires QuestDB Enterprise with +replication enabled. +::: + +## Operational Notes + +### Mixing Backups and Checkpoints + +By default, both trigger sources are enabled: + +```ini +backup.enabled=true +checkpoint.history.enabled=true +replication.primary.cleaner.checkpoint.source=true +``` + +The cleaner merges both sources and always picks the more conservative (older) +boundary, so data is not deleted until it is safe according to **both** your +backups and your checkpoints. + +### Disabling the cleaner + +```ini +replication.primary.cleaner.enabled=false +``` + +With the cleaner disabled, WAL data accumulates indefinitely. Useful for +debugging, not recommended for production. + +## Cleanup boundary and recovery range + +The cleanup boundary determines the oldest point from which you can restore a +backup and still replay WAL data to rejoin the replication cluster. Any +[point-in-time recovery](/docs/high-availability/setup/#point-in-time-recovery) +target must be at or after this boundary. + +Each node in a replication cluster has a unique **backup instance name**. You can +find a node's backup instance name by running: + +```questdb-sql +SELECT backup_instance_name; +``` + +Backup manifests and checkpoint history records are stored per backup instance +name. The cleaner computes the boundary as follows: + +1. For each instance name, read the most recent N complete entries (backups or + checkpoints, regardless of source). N is + `replication.primary.cleaner.backup.window.count` (default 5). +2. Ignore any instance that has fewer than N entries. +3. From all remaining entries across eligible instances, pick the **oldest** + one. That is the cleanup boundary — WAL data before it is deleted. + + +In this example with N=5: + +```mermaid +--- +displayMode: compact +config: + gantt: + useWidth: 1000 + barHeight: 30 + barGap: 6 + topPadding: 50 + bottomPadding: 40 + sidePadding: 40 + leftPadding: 200 + gridLineStartPadding: 30 + fontSize: 14 + sectionFontSize: 16 + themeVariables: + fontFamily: sans-serif + fontSize: 14px +--- +gantt + tickInterval 1day + axisFormat %b %d + + section door-echo-yoyo + B :done, milestone, d1, 2026-01-01, 0d + B :done, milestone, d2, 2026-01-02, 0d + B :crit, milestone, d3, 2026-01-03, 0d + B :milestone, d4, 2026-01-04, 0d + B :milestone, d5, 2026-01-05, 0d + B :milestone, d6, 2026-01-06, 0d + B :milestone, d7, 2026-01-07, 0d + + section park-sugar-system + B :done, milestone, p1, 2026-01-06, 0d + B :milestone, p2, 2026-01-07, 0d + B :milestone, p3, 2026-01-08, 0d + B :milestone, p4, 2026-01-09, 0d + B :milestone, p5, 2026-01-10, 0d + B :milestone, p6, 2026-01-11, 0d + + section apple-parrot-baby + B :milestone, a1, 2026-01-10, 0d + B :milestone, a2, 2026-01-11, 0d + B :milestone, a3, 2026-01-12, 0d + + B :crit, vert, cb, 2026-01-03, 0d +``` + +- Considered entries (backups or checkpoints): + - **door-echo-yoyo** has 7 entries. Its 5th newest entry of the 3rd of Jan is considered. + - **park-sugar-system** has 6 entries. Its 5th entry (Jan 7th) is considered. + - **apple-parrot-baby** has only 3, fewer than N, so it is skipped. +- The oldest most recent 5th entry is of **door-echo-yoyo**, so the cleanup boundary falls on Jan 3. All replication WAL data before it is deleted. +- After cleanup, restoring from a backup older than Jan 3 (such as door-echo-yoyo's Jan 1 or Jan 2 backups) is only possible as a standalone instance and not as part of the replication cluster. +- Any point-in-time recovery target must be **on or after** Jan 3. + +### Abandoned backup instance names + +Notice that door-echo-yoyo has no backups after Jan 7. If it is a +decommissioned node, its old history is dragging the cleanup boundary back to +Jan 3. Without it, the boundary would jump forward to Jan 7 +(park-sugar-system's oldest eligible entry), freeing several days of WAL +data. The cleaner is conservative and assumes every instance is still active. + +If a node is decommissioned without removing its history from the object store, +its stale entries hold back the cleanup boundary indefinitely. + +You can identify this from the cleaner's log. Each cleanup cycle logs: + +``` +prune requested [c=1, trigger=backup, instance=door-echo-yoyo, backup_ts=1771597937483926 (2026-02-20T14:32:17.483926Z), tables=42] +``` + +If `instance` shows a name you don't recognise or one that belongs to a +decommissioned node, that instance is holding things back. If `backup_ts` +is unexpectedly old, that confirms the boundary is being dragged behind by +abandoned history. + +To unblock cleanup, delete the abandoned backup instance name's directory. The +location depends on the source: + +In the **backup** object store: + +``` +backup/{backup_instance_name}/ +``` + +In the **replication** object store: + +``` +checkpoint_history/{backup_instance_name}/ +``` + +You can discover which backup instance names exist by listing these prefixes in +your object store. + +## Configuration reference + +All settings go in `server.conf`. Defaults are tuned for typical production +use — most deployments only need the quick-start settings above. + +### Core settings + +| Property | Default | Description | +|---|---|---| +| `replication.primary.cleaner.enabled` | `true` | Master switch for the cleaner. | +| `replication.primary.cleaner.interval` | `10m` | Time between cleanup cycles. Range: 1s – 24h. | +| `replication.primary.cleaner.checkpoint.source` | `true` | Use checkpoint history as a cleanup trigger source. | +| `replication.primary.cleaner.backup.window.count` | `backup.cleanup.keep.latest.n` (if backups enabled) or 5 | Minimum complete backups/checkpoints per instance before cleanup starts. | + +### Performance tuning + +| Property | Default | Description | +|---|---|---| +| `replication.primary.cleaner.delete.concurrency` | 4 – 12 (auto) | Concurrent deletion tasks. Derived from `replication.requests.max.concurrent`. Range: 4 – 32. | +| `replication.primary.cleaner.max.requests.per.second` | Service-dependent | Rate limit for object store requests. Set to 0 for unlimited. Range: 0 – 10000. | +| `replication.primary.cleaner.progress.write.interval` | `5s` | How often progress is persisted during a cycle. Lower = less re-work after crash, more writes. Range: 100ms – 60s. | + +Default rate limits per object store: + +| Service | Default | Basis | +|---|---|---| +| GCS | 500 req/s | 50% of ~1,000 write ops/s per bucket | +| Azure Blob | 10,000 req/s | 50% of ~20,000 requests/s per account | +| S3 / R2 / DO Spaces | 1,750 req/s | 50% of ~3,500 DELETE/s per prefix | +| Filesystem | 100 req/s | Conservative default for potential NFS | + +### Safety settings + +| Property | Default | Description | +|---|---|---| +| `replication.primary.cleaner.dropped.table.cooloff` | `1h` | Wait time after `DROP TABLE` before removing the table's data from object storage. Guards against clock skew. | +| `replication.primary.cleaner.retry.attempts` | `20` | Retries for transient object store failures. Range: 0 – 100. | +| `replication.primary.cleaner.retry.interval` | `2s` | Delay between retries. Range: 0 – 5m. | + +### Checkpoint history settings + +Only relevant when `checkpoint.history.enabled=true`. + +| Property | Default | Description | +|---|---|---| +| `checkpoint.history.enabled` | `true` (when replication is enabled) | Enable the checkpoint history tracker. Requires replication. | +| `checkpoint.history.keep.count` | `100` | Maximum checkpoint records retained per instance. | +| `checkpoint.history.long.retry.interval` | `1m` | Retry interval for syncing to object store after burst retries fail. | + +The remaining checkpoint history settings (`requests.retry.attempts`, +`requests.retry.interval`, `requests.max.concurrent`, timeouts, throughput) +default to the corresponding `replication.requests.*` values and rarely need +to be overridden. + +## Logging + +You can find the WAL cleaner's logs grepping for `wal::uploader::cleaner`. diff --git a/documentation/operations/backup.md b/documentation/operations/backup.md index bb066ea39..08bfe029f 100644 --- a/documentation/operations/backup.md +++ b/documentation/operations/backup.md @@ -246,6 +246,16 @@ SELECT backup_instance_name; Returns `null` if no backup has been run yet. +### Replication WAL cleanup integration + +When replication is enabled, the +[WAL cleaner](/docs/high-availability/wal-cleanup/) uses backup manifests to +determine which replicated WAL data in object storage can be safely deleted. +By default, the cleaner retains replication data for as many backups as your +[`backup.cleanup.keep.latest.n`](#backup-retention) setting (default 5) and +deletes everything older. No additional configuration is required — enabling +backups on a replicated instance is sufficient. + ### Performance characteristics Backup is designed to prioritize database availability over backup speed. Key @@ -334,6 +344,12 @@ To find your instance name, see [Backup instance name](#backup-instance-name). a primary and its replica creates two separate backup sets in the object store. Typically, backing up the primary is sufficient since replicas sync from the same data. +- **Same backup object store for all nodes**: When using replication, all + nodes in the cluster should use the same `backup.object.store` connection + string. The [WAL cleaner](/docs/high-availability/wal-cleanup/) reads + backup manifests from every node to determine what replication data can be + safely deleted. If nodes back up to different object stores, the cleaner + cannot see all manifests and will not trigger correctly. ### Backup validation diff --git a/documentation/query/sql/checkpoint.md b/documentation/query/sql/checkpoint.md index e919ba145..5e3a5fa5a 100644 --- a/documentation/query/sql/checkpoint.md +++ b/documentation/query/sql/checkpoint.md @@ -80,6 +80,19 @@ The restore procedure will use `/var/lib/questdb/.checkpoint` to adjust the database files and remove extra data copies. After the restore is successful the database is avaialble as normal with no extra intervantion required. +## Checkpoint history (Enterprise) + +In QuestDB Enterprise with replication enabled, each `CHECKPOINT RELEASE` +automatically records the per-table transaction state to the shared replication +object store. The [WAL cleaner](/docs/high-availability/wal-cleanup/) uses these +records to determine which replicated WAL data can be safely deleted from object +storage. + +Checkpoint history tracking is enabled by default when replication is active. +No additional configuration is required. See the +[WAL Cleanup guide](/docs/high-availability/wal-cleanup/#integrating-with-the-sql-checkpoint-commands) +for details. + ## CHECKPOINT examples To enter checkpoint mode: diff --git a/documentation/sidebars.js b/documentation/sidebars.js index 7ebaea7b2..7b24527d4 100644 --- a/documentation/sidebars.js +++ b/documentation/sidebars.js @@ -625,6 +625,11 @@ module.exports = { type: "doc", label: "Tuning", }, + { + id: "high-availability/wal-cleanup", + type: "doc", + label: "WAL Cleanup", + }, ], }, From 5d54eab187763afbbba24680be5bfdb17b2a0f2f Mon Sep 17 00:00:00 2001 From: Adam Cimarosti Date: Thu, 19 Feb 2026 14:32:06 +0000 Subject: [PATCH 2/5] diagram fix --- documentation/high-availability/wal-cleanup.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/documentation/high-availability/wal-cleanup.md b/documentation/high-availability/wal-cleanup.md index 51b9c0efc..c7b888b1d 100644 --- a/documentation/high-availability/wal-cleanup.md +++ b/documentation/high-availability/wal-cleanup.md @@ -210,9 +210,9 @@ gantt B :milestone, p6, 2026-01-11, 0d section apple-parrot-baby - B :milestone, a1, 2026-01-10, 0d - B :milestone, a2, 2026-01-11, 0d - B :milestone, a3, 2026-01-12, 0d + B :done, milestone, a1, 2026-01-10, 0d + B :done, milestone, a2, 2026-01-11, 0d + B :done, milestone, a3, 2026-01-12, 0d B :crit, vert, cb, 2026-01-03, 0d ``` From 277acd0c8a06970201527d9d9b1c7415f96ad8a9 Mon Sep 17 00:00:00 2001 From: Vlad Ilyushchenko Date: Mon, 23 Feb 2026 15:45:35 +0000 Subject: [PATCH 3/5] Restructure WAL cleanup docs, fix factual errors, fix sql-parser webpack alias MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Restructure wal-cleanup.md: action-first layout (quick start, verify, integrate, troubleshoot, config reference) - Fix incorrect claim that both trigger sources make cleanup less aggressive (code merges sources per instance, making it more aggressive) - Fix boundary inclusivity: boundary entry is deleted (inclusive), not retained - Fix recovery target: must be after boundary, not on-or-after - Fix broken anchor in checkpoint.md (#integrating-with-the-sql-checkpoint-commands → #checkpoint-history) - Add troubleshooting section with common operational scenarios - Add "Verifying cleanup is running" section with log format reference - Add prominent warning that cleaner needs N entries before acting - Fix "recognise" → "recognize" for American English consistency - Fix webpack alias for @questdb/sql-parser/grammar subpath export - Fix pre-existing path.resolve("/node_modules/react") → use __dirname Co-Authored-By: Claude Opus 4.6 --- documentation/high-availability/setup.md | 2 +- .../high-availability/wal-cleanup.md | 228 ++++++++++++------ documentation/operations/backup.md | 7 +- documentation/query/sql/checkpoint.md | 2 +- docusaurus.config.js | 3 +- 5 files changed, 157 insertions(+), 85 deletions(-) diff --git a/documentation/high-availability/setup.md b/documentation/high-availability/setup.md index e279346e6..b87072468 100644 --- a/documentation/high-availability/setup.md +++ b/documentation/high-availability/setup.md @@ -197,7 +197,7 @@ longer needed, based on your backup and checkpoint history. The cleaner is enabled by default and requires no configuration when backups or checkpoint history are active. By default, it retains replication data -for the most recent 5 backups or checkpoints and deletes everything older. +needed by the 4 most recent backups or checkpoints and deletes the rest. See the [WAL Cleanup guide](/docs/high-availability/wal-cleanup/) for configuration options, tuning, and troubleshooting. diff --git a/documentation/high-availability/wal-cleanup.md b/documentation/high-availability/wal-cleanup.md index c7b888b1d..4a7da3d09 100644 --- a/documentation/high-availability/wal-cleanup.md +++ b/documentation/high-availability/wal-cleanup.md @@ -15,38 +15,74 @@ import { EnterpriseNote } from "@site/src/components/EnterpriseNote" QuestDB's [replication feature](/docs/high-availability/setup/) streams write-ahead log (WAL) data from a primary node to object storage, where replica nodes consume it. Without cleanup, this replicated WAL data accumulates -indefinitely. The WAL cleaner runs on the primary node and automatically deletes -data that is no longer needed, based on your backup and checkpoint history, -keeping storage usage under control. +indefinitely. The **WAL cleaner** runs on the primary node and automatically +deletes data that is no longer needed, keeping storage usage under control. -The WAL cleaner is _enabled by default_ and keeps as much replication data as your latest N backups or checkpoints: +Requires: QuestDB Enterprise with replication enabled. + +:::warning +The WAL cleaner is enabled by default, **but it will not delete anything until +at least 5 completed backups or checkpoints exist.** If you have not configured +[Enterprise backups](/docs/operations/backup/) or run +[`CHECKPOINT`](/docs/query/sql/checkpoint/) commands, WAL data accumulates +indefinitely regardless of this setting. +::: + +## Quick start + +The WAL cleaner is _enabled by default_. With either backups or checkpoint +history active, no additional configuration is needed: ```ini -# server.conf (the are defaults — no action needed) +# server.conf (these are defaults — no action needed) replication.primary.cleaner.enabled=true replication.primary.cleaner.backup.window.count=5 ``` -Each WAL cleaner cycle runs every 10 minutes by default +The cleaner retains WAL data needed by your 4 most recent backups or +checkpoints, and deletes the rest (including the 5th-newest entry). It runs every 10 minutes (`replication.primary.cleaner.interval`). -The cleaner requires at least one **cleanup trigger source** before it will delete -anything. The two supported sources are: +The cleaner requires at least one **trigger source** with sufficient history +before it will delete anything. The two supported sources are: - **[Enterprise backups](/docs/operations/backup/)** — the cleaner reads backup - manifests to determine what can be safely deleted -- **[Checkpoint history](#integrating-with-the-sql-checkpoint-commands)** — the cleaner reads - `CHECKPOINT RELEASE` records synced to the replication object store + manifests to determine what can be safely deleted. +- **[Checkpoint history](#checkpoint-history)** — the cleaner reads + `CHECKPOINT RELEASE` records synced to the replication object store. -Both sources are enabled by default when replication is active. If you only -use one backup method, the cleaner simply ignores the source that has no -history. +Both sources are enabled by default when replication is active. If you only use +one backup method, the cleaner simply ignores the source that has no history. -The core principle is simple: the cleaner retains enough WAL data to support -your most recent N backups or checkpoints, and deletes everything older. +## Verifying cleanup is running +Search the QuestDB logs for `wal::uploader::cleaner`. Each cleanup cycle logs a +line like: -## Integrating with Enterprise backups +``` +prune requested [c=1, trigger=backup, instance=door-echo-yoyo, backup_ts=1771597937483926 (2026-02-20T14:32:17.483926Z), tables=42] +``` + +Key fields: + +| Field | Meaning | +|---|---| +| `trigger` | Which source determined the boundary (`backup` or `checkpoint`). | +| `instance` | The backup instance name whose entry set the boundary. | +| `backup_ts` | Timestamp of the boundary entry. Data up to and including this entry is deleted. | +| `tables` | Number of tables processed in this cycle. | + +If you see no `prune requested` lines, the cleaner has not yet accumulated +enough history to act. Check that backups or checkpoints are running +successfully. + +You can find a node's backup instance name by running: + +```questdb-sql +SELECT backup_instance_name; +``` + +## Enterprise backup integration The cleaner automatically reads your backup manifests to determine what can be safely deleted. The backup feature must be enabled and configured on the @@ -65,7 +101,7 @@ deleting anything. N defaults to your [`backup.cleanup.keep.latest.n`](/docs/operations/backup/#backup-retention) setting (itself default 5) and can be overridden with `replication.primary.cleaner.backup.window.count`. For example, with the default -of 5 the cleaner deletes data older than the 5th-newest complete backup. +of 5 the cleaner deletes data up to and including the 5th-newest complete backup. :::warning All nodes in a replication cluster should use the **same `backup.object.store`** @@ -74,7 +110,7 @@ node to compute the cleanup boundary. If nodes back up to different object stores, the cleaner cannot see all manifests and will not trigger correctly. ::: -## Integrating with the SQL CHECKPOINT commands +## Checkpoint history If you take filesystem snapshots, AWS EBS volume snapshots, or use custom backup scripts that issue `CHECKPOINT` / `CHECKPOINT RELEASE`, checkpoint history @@ -112,16 +148,14 @@ transiently, QuestDB retries in the background (controlled by :::note `CHECKPOINT` itself is available in both OSS and Enterprise, but checkpoint -history tracking, the mechanism that syncs checkpoint records to the -replication object store for WAL cleanup, requires QuestDB Enterprise with +history tracking — the mechanism that syncs checkpoint records to the +replication object store for WAL cleanup — requires QuestDB Enterprise with replication enabled. ::: -## Operational Notes - -### Mixing Backups and Checkpoints +## Using both backups and checkpoints -By default, both trigger sources are enabled: +When both trigger sources are active (the default): ```ini backup.enabled=true @@ -129,45 +163,44 @@ checkpoint.history.enabled=true replication.primary.cleaner.checkpoint.source=true ``` -The cleaner merges both sources and always picks the more conservative (older) -boundary, so data is not deleted until it is safe according to **both** your -backups and your checkpoints. +The cleaner **merges entries from both sources into a single list per backup +instance name**, then keeps the newest N entries. This means enabling both +sources can make cleanup **more** aggressive: an instance with 3 backups and 3 +checkpoints has 6 entries total, crossing the N=5 threshold, whereas neither +source alone would be sufficient to trigger cleanup. -### Disabling the cleaner +:::tip +If you want the cleaner to act only on backup history and ignore checkpoints +(or vice versa), disable the unwanted source: ```ini -replication.primary.cleaner.enabled=false +replication.primary.cleaner.checkpoint.source=false ``` +::: -With the cleaner disabled, WAL data accumulates indefinitely. Useful for -debugging, not recommended for production. - -## Cleanup boundary and recovery range +## How the cleanup boundary works -The cleanup boundary determines the oldest point from which you can restore a -backup and still replay WAL data to rejoin the replication cluster. Any +The cleanup boundary determines how far back you can restore. WAL data up to +and including the boundary is deleted; data after the boundary is retained. Any [point-in-time recovery](/docs/high-availability/setup/#point-in-time-recovery) -target must be at or after this boundary. - -Each node in a replication cluster has a unique **backup instance name**. You can -find a node's backup instance name by running: - -```questdb-sql -SELECT backup_instance_name; -``` +target must be **after** this boundary. Backup manifests and checkpoint history records are stored per backup instance name. The cleaner computes the boundary as follows: -1. For each instance name, read the most recent N complete entries (backups or +1. For each backup instance name, collect the most recent N entries (backups or checkpoints, regardless of source). N is `replication.primary.cleaner.backup.window.count` (default 5). -2. Ignore any instance that has fewer than N entries. -3. From all remaining entries across eligible instances, pick the **oldest** - one. That is the cleanup boundary — WAL data before it is deleted. +2. Skip any instance that has fewer than N entries. +3. Compare the Nth-newest entry from each eligible instance. The entry with the + **earliest timestamp** is the cleanup boundary — WAL data up to and including + that entry's transactions is deleted. +### Example -In this example with N=5: +Consider three nodes with N=5: **door-echo-yoyo** has 7 entries (Jan 1–7), +**park-sugar-system** has 6 entries (Jan 6–11), and **apple-parrot-baby** has 3 +entries (Jan 10–12). ```mermaid --- @@ -202,7 +235,7 @@ gantt B :milestone, d7, 2026-01-07, 0d section park-sugar-system - B :done, milestone, p1, 2026-01-06, 0d + B :milestone, p1, 2026-01-06, 0d B :milestone, p2, 2026-01-07, 0d B :milestone, p3, 2026-01-08, 0d B :milestone, p4, 2026-01-09, 0d @@ -210,45 +243,67 @@ gantt B :milestone, p6, 2026-01-11, 0d section apple-parrot-baby - B :done, milestone, a1, 2026-01-10, 0d - B :done, milestone, a2, 2026-01-11, 0d - B :done, milestone, a3, 2026-01-12, 0d + B :milestone, a1, 2026-01-10, 0d + B :milestone, a2, 2026-01-11, 0d + B :milestone, a3, 2026-01-12, 0d B :crit, vert, cb, 2026-01-03, 0d ``` -- Considered entries (backups or checkpoints): - - **door-echo-yoyo** has 7 entries. Its 5th newest entry of the 3rd of Jan is considered. - - **park-sugar-system** has 6 entries. Its 5th entry (Jan 7th) is considered. - - **apple-parrot-baby** has only 3, fewer than N, so it is skipped. -- The oldest most recent 5th entry is of **door-echo-yoyo**, so the cleanup boundary falls on Jan 3. All replication WAL data before it is deleted. -- After cleanup, restoring from a backup older than Jan 3 (such as door-echo-yoyo's Jan 1 or Jan 2 backups) is only possible as a standalone instance and not as part of the replication cluster. -- Any point-in-time recovery target must be **on or after** Jan 3. +- **door-echo-yoyo** has 7 entries. Its 5th newest entry is **Jan 3**. +- **park-sugar-system** has 6 entries. Its 5th newest entry is **Jan 7**. +- **apple-parrot-baby** has only 3 entries, fewer than N=5, so it is skipped. +- Comparing the Nth-newest entries: Jan 3 (door-echo-yoyo) vs Jan 7 + (park-sugar-system). The earliest is **Jan 3**, so the cleanup boundary falls + there. All replication WAL data up to and including Jan 3 is deleted. +- After cleanup, restoring from the Jan 3 backup or older (such as + door-echo-yoyo's Jan 1 or Jan 2 backups) is only possible as a standalone + instance, not as part of the replication cluster. +- Any point-in-time recovery target must be **after** Jan 3. -### Abandoned backup instance names +## Troubleshooting + +### Storage growing despite cleaner being enabled + +1. **Check that trigger sources have enough history.** The cleaner needs at + least N entries (default 5) from at least one backup instance name. If you + recently set up replication and have fewer than 5 backups or checkpoints, the + cleaner has not started yet. Run `SELECT * FROM backups();` or check your + checkpoint schedule. -Notice that door-echo-yoyo has no backups after Jan 7. If it is a -decommissioned node, its old history is dragging the cleanup boundary back to -Jan 3. Without it, the boundary would jump forward to Jan 7 -(park-sugar-system's oldest eligible entry), freeing several days of WAL -data. The cleaner is conservative and assumes every instance is still active. +2. **Check the logs.** Search for `wal::uploader::cleaner`. If there are no + `prune requested` lines, the cleaner is not finding enough history to act. -If a node is decommissioned without removing its history from the object store, -its stale entries hold back the cleanup boundary indefinitely. +3. **Check for abandoned backup instance names.** A decommissioned node whose + history remains in the object store drags the cleanup boundary backward + indefinitely. See [Abandoned backup instance names](#abandoned-backup-instance-names). -You can identify this from the cleaner's log. Each cleanup cycle logs: +4. **Verify both sources are producing entries.** When both backups and + checkpoints are enabled, entries from both are merged per instance. If one + source stopped producing entries (e.g., scheduled backups are failing), the + total entry count per instance may drop below N, preventing cleanup. Check + that all configured sources are running on schedule. + +### Abandoned backup instance names + +A decommissioned node whose history is still in the object store holds back the +cleanup boundary for the entire cluster. In the [example above](#example), +door-echo-yoyo has no entries after Jan 7. If it is decommissioned, its old +history pins the boundary to Jan 3. Without it, the boundary would advance to +Jan 7 (park-sugar-system's 5th newest entry). + +You can identify this from the cleaner log: ``` prune requested [c=1, trigger=backup, instance=door-echo-yoyo, backup_ts=1771597937483926 (2026-02-20T14:32:17.483926Z), tables=42] ``` -If `instance` shows a name you don't recognise or one that belongs to a -decommissioned node, that instance is holding things back. If `backup_ts` -is unexpectedly old, that confirms the boundary is being dragged behind by -abandoned history. +If `instance` shows a name you don't recognize or one belonging to a +decommissioned node, and `backup_ts` is unexpectedly old, that instance is the +problem. -To unblock cleanup, delete the abandoned backup instance name's directory. The -location depends on the source: +To unblock cleanup, delete the abandoned instance's directory from the object +store: In the **backup** object store: @@ -265,6 +320,25 @@ checkpoint_history/{backup_instance_name}/ You can discover which backup instance names exist by listing these prefixes in your object store. +### Cleanup boundary not advancing + +If the `backup_ts` in the cleaner log stays the same across cycles: + +- An instance may have stopped producing new backups or checkpoints. Check that + all active nodes are backing up on schedule. +- The N-entry threshold may be too high. Lowering + `replication.primary.cleaner.backup.window.count` reduces how many entries + are required before cleanup starts, but also reduces your recovery window. + +### Disabling the cleaner + +```ini +replication.primary.cleaner.enabled=false +``` + +With the cleaner disabled, WAL data accumulates indefinitely. Useful for +debugging, not recommended for production. + ## Configuration reference All settings go in `server.conf`. Defaults are tuned for typical production @@ -318,7 +392,3 @@ The remaining checkpoint history settings (`requests.retry.attempts`, `requests.retry.interval`, `requests.max.concurrent`, timeouts, throughput) default to the corresponding `replication.requests.*` values and rarely need to be overridden. - -## Logging - -You can find the WAL cleaner's logs grepping for `wal::uploader::cleaner`. diff --git a/documentation/operations/backup.md b/documentation/operations/backup.md index 08bfe029f..99cd49f9e 100644 --- a/documentation/operations/backup.md +++ b/documentation/operations/backup.md @@ -251,9 +251,10 @@ Returns `null` if no backup has been run yet. When replication is enabled, the [WAL cleaner](/docs/high-availability/wal-cleanup/) uses backup manifests to determine which replicated WAL data in object storage can be safely deleted. -By default, the cleaner retains replication data for as many backups as your -[`backup.cleanup.keep.latest.n`](#backup-retention) setting (default 5) and -deletes everything older. No additional configuration is required — enabling +By default, the cleaner retains replication data needed by your most recent +backups, controlled by the +[`backup.cleanup.keep.latest.n`](#backup-retention) setting (default 5), and +deletes the rest. No additional configuration is required — enabling backups on a replicated instance is sufficient. ### Performance characteristics diff --git a/documentation/query/sql/checkpoint.md b/documentation/query/sql/checkpoint.md index 5e3a5fa5a..1c511251b 100644 --- a/documentation/query/sql/checkpoint.md +++ b/documentation/query/sql/checkpoint.md @@ -90,7 +90,7 @@ storage. Checkpoint history tracking is enabled by default when replication is active. No additional configuration is required. See the -[WAL Cleanup guide](/docs/high-availability/wal-cleanup/#integrating-with-the-sql-checkpoint-commands) +[WAL Cleanup guide](/docs/high-availability/wal-cleanup/#checkpoint-history) for details. ## CHECKPOINT examples diff --git a/docusaurus.config.js b/docusaurus.config.js index 5885aa063..70df07404 100644 --- a/docusaurus.config.js +++ b/docusaurus.config.js @@ -149,7 +149,8 @@ const config = { return { resolve: { alias: { - react: path.resolve("/node_modules/react"), + react: path.resolve(__dirname, "node_modules/react"), + "@questdb/sql-parser/grammar": path.resolve(__dirname, "node_modules/@questdb/sql-parser/dist/grammar/index.js"), }, }, } From bf13611e1fcd35069df58c5fb2bb797ff81d18a4 Mon Sep 17 00:00:00 2001 From: Vlad Ilyushchenko Date: Mon, 23 Feb 2026 17:46:56 +0000 Subject: [PATCH 4/5] Fix off-by-one: cleaner retains 5 backups, not 4 The backup.window.count default is 5, meaning the cleaner retains data needed by the 5 most recent backups/checkpoints. The boundary entry itself is retained, with only older data deleted. Co-Authored-By: Claude Opus 4.6 --- documentation/high-availability/setup.md | 2 +- .../high-availability/wal-cleanup.md | 23 ++++++++++--------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/documentation/high-availability/setup.md b/documentation/high-availability/setup.md index b87072468..e083efcfe 100644 --- a/documentation/high-availability/setup.md +++ b/documentation/high-availability/setup.md @@ -197,7 +197,7 @@ longer needed, based on your backup and checkpoint history. The cleaner is enabled by default and requires no configuration when backups or checkpoint history are active. By default, it retains replication data -needed by the 4 most recent backups or checkpoints and deletes the rest. +needed by the 5 most recent backups or checkpoints and deletes older data. See the [WAL Cleanup guide](/docs/high-availability/wal-cleanup/) for configuration options, tuning, and troubleshooting. diff --git a/documentation/high-availability/wal-cleanup.md b/documentation/high-availability/wal-cleanup.md index 4a7da3d09..be7275d4d 100644 --- a/documentation/high-availability/wal-cleanup.md +++ b/documentation/high-availability/wal-cleanup.md @@ -39,8 +39,8 @@ replication.primary.cleaner.enabled=true replication.primary.cleaner.backup.window.count=5 ``` -The cleaner retains WAL data needed by your 4 most recent backups or -checkpoints, and deletes the rest (including the 5th-newest entry). It runs every 10 minutes +The cleaner retains WAL data needed by your 5 most recent backups or +checkpoints and deletes older data. It runs every 10 minutes (`replication.primary.cleaner.interval`). The cleaner requires at least one **trigger source** with sufficient history @@ -101,7 +101,7 @@ deleting anything. N defaults to your [`backup.cleanup.keep.latest.n`](/docs/operations/backup/#backup-retention) setting (itself default 5) and can be overridden with `replication.primary.cleaner.backup.window.count`. For example, with the default -of 5 the cleaner deletes data up to and including the 5th-newest complete backup. +of 5 the cleaner retains data needed by the 5 most recent complete backups and deletes anything older. :::warning All nodes in a replication cluster should use the **same `backup.object.store`** @@ -180,10 +180,10 @@ replication.primary.cleaner.checkpoint.source=false ## How the cleanup boundary works -The cleanup boundary determines how far back you can restore. WAL data up to -and including the boundary is deleted; data after the boundary is retained. Any +The cleanup boundary determines how far back you can restore. WAL data older +than the boundary is deleted; data from the boundary onward is retained. Any [point-in-time recovery](/docs/high-availability/setup/#point-in-time-recovery) -target must be **after** this boundary. +target must be **on or after** this boundary. Backup manifests and checkpoint history records are stored per backup instance name. The cleaner computes the boundary as follows: @@ -193,8 +193,8 @@ name. The cleaner computes the boundary as follows: `replication.primary.cleaner.backup.window.count` (default 5). 2. Skip any instance that has fewer than N entries. 3. Compare the Nth-newest entry from each eligible instance. The entry with the - **earliest timestamp** is the cleanup boundary — WAL data up to and including - that entry's transactions is deleted. + **earliest timestamp** is the cleanup boundary — WAL data older than that + entry is deleted, while the boundary entry itself is retained. ### Example @@ -255,11 +255,12 @@ gantt - **apple-parrot-baby** has only 3 entries, fewer than N=5, so it is skipped. - Comparing the Nth-newest entries: Jan 3 (door-echo-yoyo) vs Jan 7 (park-sugar-system). The earliest is **Jan 3**, so the cleanup boundary falls - there. All replication WAL data up to and including Jan 3 is deleted. -- After cleanup, restoring from the Jan 3 backup or older (such as + there. All replication WAL data older than Jan 3 is deleted (Jan 1 and Jan 2). + The Jan 3 entry and everything newer is retained. +- After cleanup, restoring from backups older than the boundary (such as door-echo-yoyo's Jan 1 or Jan 2 backups) is only possible as a standalone instance, not as part of the replication cluster. -- Any point-in-time recovery target must be **after** Jan 3. +- Any point-in-time recovery target must be **on or after** Jan 3. ## Troubleshooting From e78be03139bb28132f279a21141e428417bea442 Mon Sep 17 00:00:00 2001 From: Adam Cimarosti Date: Mon, 23 Feb 2026 18:48:28 +0000 Subject: [PATCH 5/5] fixed factual errors introduced by the previous commit --- documentation/high-availability/setup.md | 2 +- .../high-availability/wal-cleanup.md | 113 ++++++++++-------- documentation/operations/backup.md | 7 +- documentation/query/sql/checkpoint.md | 2 +- 4 files changed, 66 insertions(+), 58 deletions(-) diff --git a/documentation/high-availability/setup.md b/documentation/high-availability/setup.md index b87072468..e279346e6 100644 --- a/documentation/high-availability/setup.md +++ b/documentation/high-availability/setup.md @@ -197,7 +197,7 @@ longer needed, based on your backup and checkpoint history. The cleaner is enabled by default and requires no configuration when backups or checkpoint history are active. By default, it retains replication data -needed by the 4 most recent backups or checkpoints and deletes the rest. +for the most recent 5 backups or checkpoints and deletes everything older. See the [WAL Cleanup guide](/docs/high-availability/wal-cleanup/) for configuration options, tuning, and troubleshooting. diff --git a/documentation/high-availability/wal-cleanup.md b/documentation/high-availability/wal-cleanup.md index 4a7da3d09..dc70330d6 100644 --- a/documentation/high-availability/wal-cleanup.md +++ b/documentation/high-availability/wal-cleanup.md @@ -22,9 +22,8 @@ Requires: QuestDB Enterprise with replication enabled. :::warning The WAL cleaner is enabled by default, **but it will not delete anything until -at least 5 completed backups or checkpoints exist.** If you have not configured -[Enterprise backups](/docs/operations/backup/) or run -[`CHECKPOINT`](/docs/query/sql/checkpoint/) commands, WAL data accumulates +at least 5 completed backups or checkpoints exist.** Without configuring backups +or checkpoint history, and executing regular backups, WAL data accumulates indefinitely regardless of this setting. ::: @@ -39,16 +38,14 @@ replication.primary.cleaner.enabled=true replication.primary.cleaner.backup.window.count=5 ``` -The cleaner retains WAL data needed by your 4 most recent backups or -checkpoints, and deletes the rest (including the 5th-newest entry). It runs every 10 minutes -(`replication.primary.cleaner.interval`). +It runs every 10 minutes (`replication.primary.cleaner.interval`). The cleaner requires at least one **trigger source** with sufficient history before it will delete anything. The two supported sources are: - **[Enterprise backups](/docs/operations/backup/)** — the cleaner reads backup manifests to determine what can be safely deleted. -- **[Checkpoint history](#checkpoint-history)** — the cleaner reads +- **[Checkpoint integration](#checkpoint-integration)** — the cleaner reads `CHECKPOINT RELEASE` records synced to the replication object store. Both sources are enabled by default when replication is active. If you only use @@ -72,9 +69,14 @@ Key fields: | `backup_ts` | Timestamp of the boundary entry. Data up to and including this entry is deleted. | | `tables` | Number of tables processed in this cycle. | -If you see no `prune requested` lines, the cleaner has not yet accumulated -enough history to act. Check that backups or checkpoints are running -successfully. +If the cleaner does not have enough history to act, it logs: + +``` +insufficient backup history, skipping WAL cleanup [backup_window_count=5, history={door-echo-yoyo:3, park-sugar-system:2}] +``` + +This means no instance has reached the N-entry threshold yet. Check that +backups or checkpoints are running successfully. You can find a node's backup instance name by running: @@ -82,7 +84,7 @@ You can find a node's backup instance name by running: SELECT backup_instance_name; ``` -## Enterprise backup integration +## Backup integration The cleaner automatically reads your backup manifests to determine what can be safely deleted. The backup feature must be enabled and configured on the @@ -101,7 +103,7 @@ deleting anything. N defaults to your [`backup.cleanup.keep.latest.n`](/docs/operations/backup/#backup-retention) setting (itself default 5) and can be overridden with `replication.primary.cleaner.backup.window.count`. For example, with the default -of 5 the cleaner deletes data up to and including the 5th-newest complete backup. +of 5 the cleaner deletes data up to and including the 5th-newest complete backup, which becomes the oldest backup from which a node can be restored into the replication cluster. :::warning All nodes in a replication cluster should use the **same `backup.object.store`** @@ -110,7 +112,7 @@ node to compute the cleanup boundary. If nodes back up to different object stores, the cleaner cannot see all manifests and will not trigger correctly. ::: -## Checkpoint history +## Checkpoint integration If you take filesystem snapshots, AWS EBS volume snapshots, or use custom backup scripts that issue `CHECKPOINT` / `CHECKPOINT RELEASE`, checkpoint history @@ -153,31 +155,6 @@ replication object store for WAL cleanup — requires QuestDB Enterprise with replication enabled. ::: -## Using both backups and checkpoints - -When both trigger sources are active (the default): - -```ini -backup.enabled=true -checkpoint.history.enabled=true -replication.primary.cleaner.checkpoint.source=true -``` - -The cleaner **merges entries from both sources into a single list per backup -instance name**, then keeps the newest N entries. This means enabling both -sources can make cleanup **more** aggressive: an instance with 3 backups and 3 -checkpoints has 6 entries total, crossing the N=5 threshold, whereas neither -source alone would be sufficient to trigger cleanup. - -:::tip -If you want the cleaner to act only on backup history and ignore checkpoints -(or vice versa), disable the unwanted source: - -```ini -replication.primary.cleaner.checkpoint.source=false -``` -::: - ## How the cleanup boundary works The cleanup boundary determines how far back you can restore. WAL data up to @@ -235,7 +212,7 @@ gantt B :milestone, d7, 2026-01-07, 0d section park-sugar-system - B :milestone, p1, 2026-01-06, 0d + B :done, milestone, p1, 2026-01-06, 0d B :milestone, p2, 2026-01-07, 0d B :milestone, p3, 2026-01-08, 0d B :milestone, p4, 2026-01-09, 0d @@ -243,23 +220,54 @@ gantt B :milestone, p6, 2026-01-11, 0d section apple-parrot-baby - B :milestone, a1, 2026-01-10, 0d - B :milestone, a2, 2026-01-11, 0d - B :milestone, a3, 2026-01-12, 0d + B :done, milestone, a1, 2026-01-10, 0d + B :done, milestone, a2, 2026-01-11, 0d + B :done, milestone, a3, 2026-01-12, 0d B :crit, vert, cb, 2026-01-03, 0d ``` +Greyed-out entries in the diagram are not considered by the algorithm. + +Skipped and not considered: +- **apple-parrot-baby** has only 3 entries, fewer than N=5, so it is skipped. + +The following entries are considered by the algorithm: - **door-echo-yoyo** has 7 entries. Its 5th newest entry is **Jan 3**. - **park-sugar-system** has 6 entries. Its 5th newest entry is **Jan 7**. -- **apple-parrot-baby** has only 3 entries, fewer than N=5, so it is skipped. -- Comparing the Nth-newest entries: Jan 3 (door-echo-yoyo) vs Jan 7 - (park-sugar-system). The earliest is **Jan 3**, so the cleanup boundary falls - there. All replication WAL data up to and including Jan 3 is deleted. -- After cleanup, restoring from the Jan 3 backup or older (such as - door-echo-yoyo's Jan 1 or Jan 2 backups) is only possible as a standalone - instance, not as part of the replication cluster. -- Any point-in-time recovery target must be **after** Jan 3. + +Comparing the Nth-newest considered entries: Jan 3 (door-echo-yoyo) vs Jan 7 +(park-sugar-system). The earliest is **Jan 3**, so the cleanup boundary falls +there. All replication WAL data up to and including Jan 3 is deleted. +After cleanup, restoring from a backup older than Jan 3 (such as +door-echo-yoyo's Jan 1 or Jan 2 backups) is only possible as a standalone +instance, not as part of the replication cluster. +Any point-in-time recovery target must be **after** Jan 3. + +## Using both backups and checkpoints + +By default, both trigger sources are active: + +```ini +backup.enabled=true +checkpoint.history.enabled=true +replication.primary.cleaner.checkpoint.source=true +``` + +Backups and checkpoints are merged into a single list per backup instance name +before the boundary is computed. The effect depends on your cluster topology: + +- **Same instance produces both backups and checkpoints** — more entries + accumulate per instance, crossing the N threshold sooner and pushing the + Nth-newest entry forward in time. This more eagerly reduces the time window + of data kept in the replication object store. +- **Different instances produce backups vs checkpoints** — each instance has + fewer entries individually, and the cross-instance comparison in step 3 picks + the oldest boundary. This increases the time window of data retained. + +To restrict the cleaner to a single source, set +`replication.primary.cleaner.checkpoint.source=false` to ignore checkpoint +history, or disable `backup.enabled` to ignore backup manifests. ## Troubleshooting @@ -271,8 +279,9 @@ gantt cleaner has not started yet. Run `SELECT * FROM backups();` or check your checkpoint schedule. -2. **Check the logs.** Search for `wal::uploader::cleaner`. If there are no - `prune requested` lines, the cleaner is not finding enough history to act. +2. **Check the logs.** Search for `wal::uploader::cleaner`. If you see + `insufficient backup history, skipping WAL cleanup`, the cleaner is not + finding enough history to act. 3. **Check for abandoned backup instance names.** A decommissioned node whose history remains in the object store drags the cleanup boundary backward diff --git a/documentation/operations/backup.md b/documentation/operations/backup.md index 99cd49f9e..08bfe029f 100644 --- a/documentation/operations/backup.md +++ b/documentation/operations/backup.md @@ -251,10 +251,9 @@ Returns `null` if no backup has been run yet. When replication is enabled, the [WAL cleaner](/docs/high-availability/wal-cleanup/) uses backup manifests to determine which replicated WAL data in object storage can be safely deleted. -By default, the cleaner retains replication data needed by your most recent -backups, controlled by the -[`backup.cleanup.keep.latest.n`](#backup-retention) setting (default 5), and -deletes the rest. No additional configuration is required — enabling +By default, the cleaner retains replication data for as many backups as your +[`backup.cleanup.keep.latest.n`](#backup-retention) setting (default 5) and +deletes everything older. No additional configuration is required — enabling backups on a replicated instance is sufficient. ### Performance characteristics diff --git a/documentation/query/sql/checkpoint.md b/documentation/query/sql/checkpoint.md index 1c511251b..05aee042b 100644 --- a/documentation/query/sql/checkpoint.md +++ b/documentation/query/sql/checkpoint.md @@ -90,7 +90,7 @@ storage. Checkpoint history tracking is enabled by default when replication is active. No additional configuration is required. See the -[WAL Cleanup guide](/docs/high-availability/wal-cleanup/#checkpoint-history) +[WAL Cleanup guide](/docs/high-availability/wal-cleanup/#checkpoint-integration) for details. ## CHECKPOINT examples