questdb · bluestreak01 · Feb 24, 2026 · Feb 19, 2026 · Feb 19, 2026 · Feb 23, 2026
diff --git a/documentation/configuration/configuration-utils/_replication.config.json b/documentation/configuration/configuration-utils/_replication.config.json
@@ -94,5 +94,57 @@
   "native.max.blocking.threads": {
     "default": "cpuCount * 4",
     "description": "Maximum number of threads for parallel blocking disk IO read/write operations for replication (and other). These threads are ephemeral: They are spawned per need and shut down after a short duration if no longer in use. These are not cpu-bound threads, hence the relative large number. The default should be appropriate for most use cases."
+  },
+  "replication.primary.cleaner.enabled": {
+    "default": "true",
+    "description": "Master switch for the WAL cleaner."
+  },
+  "replication.primary.cleaner.interval": {
+    "default": "10m",
+    "description": "Time between cleanup cycles. Range: 1s – 24h."
+  },
+  "replication.primary.cleaner.checkpoint.source": {
+    "default": "true",
+    "description": "Use checkpoint history as a cleanup trigger source."
+  },
+  "replication.primary.cleaner.backup.window.count": {
+    "default": "backup.cleanup.keep.latest.n or 5",
+    "description": "Minimum complete backups/checkpoints per instance before cleanup starts. Defaults to `backup.cleanup.keep.latest.n` if backups are enabled, otherwise `5`."
+  },
+  "replication.primary.cleaner.delete.concurrency": {
+    "default": "4 – 12 (auto)",
+    "description": "Concurrent deletion tasks. Derived from `replication.requests.max.concurrent`. Range: 4 – 32."
+  },
+  "replication.primary.cleaner.max.requests.per.second": {
+    "default": "service-dependent",
+    "description": "Rate limit for object store delete requests. Set to `0` for unlimited. Range: 0 – 10000."
+  },
+  "replication.primary.cleaner.progress.write.interval": {
+    "default": "5s",
+    "description": "How often progress is persisted during a cleanup cycle. Lower values mean less re-work after a crash but more writes. Range: 100ms – 60s."
+  },
+  "replication.primary.cleaner.dropped.table.cooloff": {
+    "default": "1h",
+    "description": "Wait time after `DROP TABLE` before removing the table's data from object storage. Guards against clock skew."
+  },
+  "replication.primary.cleaner.retry.attempts": {
+    "default": "20",
+    "description": "Retries for transient object store failures during cleanup. Range: 0 – 100."
+  },
+  "replication.primary.cleaner.retry.interval": {
+    "default": "2s",
+    "description": "Delay between cleanup retries. Range: 0 – 5m."
+  },
+  "checkpoint.history.enabled": {
+    "default": "true (when replication is enabled)",
+    "description": "Enable the checkpoint history tracker. Requires replication."
+  },
+  "checkpoint.history.keep.count": {
+    "default": "100",
+    "description": "Maximum checkpoint records retained per instance."
+  },
+  "checkpoint.history.long.retry.interval": {
+    "default": "1m",
+    "description": "Retry interval for syncing checkpoint history to the object store after burst retries fail."
   }
 }
diff --git a/documentation/high-availability/setup.md b/documentation/high-availability/setup.md
@@ -40,9 +40,6 @@ Create an S3 bucket following
 **Recommendations:**
 - Select a region close to your primary node
 - Disable blob versioning
-- Set up a
-  [lifecycle policy](https://docs.aws.amazon.com/AmazonS3/latest/userguide/how-to-set-lifecycle-configuration-intro.html)
-  to manage WAL file retention (see [Snapshot and expiration policies](#snapshot-and-expiration-policies))
 
 **Connection string:**
 
@@ -73,9 +70,6 @@ then create a Blob Container.
 **Recommendations:**
 - Select a region close to your primary node
 - Disable blob versioning
-- Set up
-  [Lifecycle Management](https://learn.microsoft.com/en-us/azure/storage/blobs/lifecycle-management-policy-configure?tabs=azure-portal)
-  for WAL file retention
 
 **Connection string:**
 
@@ -158,9 +152,7 @@ nodes.
 See [Backup and restore](/docs/operations/backup/) for the full procedure.
 
 :::tip
-Set up regular snapshots (daily or weekly). See
-[Snapshot and expiration policies](#snapshot-and-expiration-policies) for
-guidance.
+Set up regular snapshots (daily or weekly).
 :::
 
 ## 4. Configure replica node(s)
@@ -197,18 +189,18 @@ export QDB_REPLICATION_OBJECT_STORE="azblob::..."
 
 For tuning options, see the [Tuning guide](/docs/high-availability/tuning/).
 
-## Snapshot and expiration policies
+## WAL data cleanup
 
-WAL files are typically read by replicas shortly after upload. To optimize
-costs, move files to cooler storage tiers after 1-7 days.
+Replicated WAL data accumulates in object storage over time. The **WAL
+cleaner** runs on the primary node and automatically removes data that is no
+longer needed, based on your backup and checkpoint history.
 
-**Recommendations:**
-- Take snapshots every 1-7 days
-- Keep WAL files for at least 30 days
-- Ensure snapshot interval is shorter than WAL expiration
+The cleaner is enabled by default and requires no configuration when backups
+or checkpoint history are active. By default, it retains replication data
+for the most recent 5 backups or checkpoints and deletes everything older.
 
-Example: Weekly snapshots + 30-day WAL retention = ability to restore up to 23
-days back. Daily snapshots restore faster but use more storage.
+See the [WAL Cleanup guide](/docs/high-availability/wal-cleanup/) for
+configuration options, tuning, and troubleshooting.
 
 ## Disaster recovery