From 83d0375300828f5da5b148a514e6e17ccb684964 Mon Sep 17 00:00:00 2001
From: Jie Yao <jyao3@ebaychina.com>
Date: Fri, 8 May 2026 16:56:18 +0800
Subject: [PATCH] SDSTOR-21465: scrubber phase 1

---
 CHANGELOG.md                                  |   13 -
 conanfile.py                                  |    2 +-
 src/include/homeobject/common.hpp             |    2 +-
 src/lib/homeobject_impl.hpp                   |    6 +-
 src/lib/homestore_backend/CMakeLists.txt      |   23 +-
 .../homestore_backend/MPMCPriorityQueue.hpp   |  192 ++
 src/lib/homestore_backend/gc_manager.cpp      |   16 +-
 src/lib/homestore_backend/gc_manager.hpp      |   12 +-
 .../homestore_backend/heap_chunk_selector.cpp |    2 -
 src/lib/homestore_backend/hs_blob_manager.cpp |   16 +-
 src/lib/homestore_backend/hs_homeobject.cpp   |   30 +-
 src/lib/homestore_backend/hs_homeobject.hpp   |   41 +-
 .../hs_backend_config.fbs                     |    4 +
 .../resync_blob_data.fbs                      |    0
 .../resync_pg_data.fbs                        |    0
 .../resync_shard_data.fbs                     |    0
 .../hs_homeobject_fbs/scrub_common.fbs        |   25 +
 .../hs_homeobject_fbs/scrub_req.fbs           |   17 +
 .../hs_homeobject_fbs/scrub_result.fbs        |   11 +
 src/lib/homestore_backend/hs_http_manager.cpp |  400 +++-
 src/lib/homestore_backend/hs_http_manager.hpp |   54 +
 src/lib/homestore_backend/hs_pg_manager.cpp   |  268 ++-
 .../homestore_backend/hs_shard_manager.cpp    |   14 +-
 .../replication_state_machine.cpp             |   35 +-
 .../replication_state_machine.hpp             |    8 +
 src/lib/homestore_backend/scrub_manager.cpp   | 2123 +++++++++++++++++
 src/lib/homestore_backend/scrub_manager.hpp   |  366 +++
 .../homestore_backend/tests/CMakeLists.txt    |    9 +
 .../tests/homeobj_fixture.hpp                 |   23 +-
 .../tests/hs_scrubber_tests.cpp               | 1251 ++++++++++
 .../tests/test_mpmc_priority_queue.cpp        |  413 ++++
 31 files changed, 5274 insertions(+), 102 deletions(-)
 delete mode 100644 CHANGELOG.md
 create mode 100644 src/lib/homestore_backend/MPMCPriorityQueue.hpp
 rename src/lib/homestore_backend/{ => hs_homeobject_fbs}/hs_backend_config.fbs (93%)
 rename src/lib/homestore_backend/{ => hs_homeobject_fbs}/resync_blob_data.fbs (100%)
 rename src/lib/homestore_backend/{ => hs_homeobject_fbs}/resync_pg_data.fbs (100%)
 rename src/lib/homestore_backend/{ => hs_homeobject_fbs}/resync_shard_data.fbs (100%)
 create mode 100644 src/lib/homestore_backend/hs_homeobject_fbs/scrub_common.fbs
 create mode 100644 src/lib/homestore_backend/hs_homeobject_fbs/scrub_req.fbs
 create mode 100644 src/lib/homestore_backend/hs_homeobject_fbs/scrub_result.fbs
 create mode 100644 src/lib/homestore_backend/scrub_manager.cpp
 create mode 100644 src/lib/homestore_backend/scrub_manager.hpp
 create mode 100644 src/lib/homestore_backend/tests/hs_scrubber_tests.cpp
 create mode 100644 src/lib/homestore_backend/tests/test_mpmc_priority_queue.cpp

diff --git a/CHANGELOG.md b/CHANGELOG.md
deleted file mode 100644
index 51f00cd2e..000000000
--- a/CHANGELOG.md
+++ /dev/null
@@ -1,13 +0,0 @@
-# Changelog
-All notable changes to this project will be documented in this file.
-
-The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
-and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-
-## [Unreleased]
-
-### Added
-
-- Created repository
-
-[Unreleased]: https://github.com/eBay/HomeObject/compare/...HEAD
diff --git a/conanfile.py b/conanfile.py
index 917669267..81eac46ed 100644
--- a/conanfile.py
+++ b/conanfile.py
@@ -10,7 +10,7 @@
 
 class HomeObjectConan(ConanFile):
     name = "homeobject"
-    version = "4.1.10"
+    version = "4.2.0"
 
     homepage = "https://github.com/eBay/HomeObject"
     description = "Blob Store built on HomeStore"
diff --git a/src/include/homeobject/common.hpp b/src/include/homeobject/common.hpp
index 29a0589a9..63eff1305 100644
--- a/src/include/homeobject/common.hpp
+++ b/src/include/homeobject/common.hpp
@@ -14,7 +14,7 @@
 
 SISL_LOGGING_DECL(homeobject);
 
-#define HOMEOBJECT_LOG_MODS homeobject, blobmgr, shardmgr, gcmgr
+#define HOMEOBJECT_LOG_MODS homeobject, blobmgr, shardmgr, gcmgr, scrubmgr
 
 #ifndef Ki
 constexpr uint64_t Ki = 1024ul;
diff --git a/src/lib/homeobject_impl.hpp b/src/lib/homeobject_impl.hpp
index 4eb2af48f..b905cbc96 100644
--- a/src/lib/homeobject_impl.hpp
+++ b/src/lib/homeobject_impl.hpp
@@ -88,7 +88,8 @@ class HomeObjectImpl : public HomeObject,
                        public std::enable_shared_from_this< HomeObjectImpl > {
 
     /// Implementation defines these
-    virtual ShardManager::AsyncResult< ShardInfo > _create_shard(pg_id_t, uint64_t size_bytes, std::string meta, trace_id_t tid) = 0;
+    virtual ShardManager::AsyncResult< ShardInfo > _create_shard(pg_id_t, uint64_t size_bytes, std::string meta,
+                                                                 trace_id_t tid) = 0;
     virtual ShardManager::AsyncResult< ShardInfo > _seal_shard(ShardInfo const&, trace_id_t tid) = 0;
 
     virtual BlobManager::AsyncResult< blob_id_t > _put_blob(ShardInfo const&, Blob&&, trace_id_t tid) = 0;
@@ -189,7 +190,8 @@ class HomeObjectImpl : public HomeObject,
 
     /// ShardManager
     ShardManager::AsyncResult< ShardInfo > get_shard(shard_id_t id, trace_id_t tid) const final;
-    ShardManager::AsyncResult< ShardInfo > create_shard(pg_id_t pg_owner, uint64_t size_bytes, std::string meta, trace_id_t tid) final;
+    ShardManager::AsyncResult< ShardInfo > create_shard(pg_id_t pg_owner, uint64_t size_bytes, std::string meta,
+                                                        trace_id_t tid) final;
     ShardManager::AsyncResult< InfoList > list_shards(pg_id_t pg, trace_id_t tid) const final;
     ShardManager::AsyncResult< ShardInfo > seal_shard(shard_id_t id, trace_id_t tid) final;
     uint64_t get_current_timestamp();
diff --git a/src/lib/homestore_backend/CMakeLists.txt b/src/lib/homestore_backend/CMakeLists.txt
index 7fd3d6fe2..88a132ada 100644
--- a/src/lib/homestore_backend/CMakeLists.txt
+++ b/src/lib/homestore_backend/CMakeLists.txt
@@ -30,6 +30,8 @@ target_sources("${PROJECT_NAME}_homestore" PRIVATE
     hs_cp_callbacks.cpp
     hs_http_manager.cpp
     gc_manager.cpp
+    scrub_manager.cpp
+    MPMCPriorityQueue.hpp
     $<TARGET_OBJECTS:${PROJECT_NAME}_core>
 )
 target_link_libraries("${PROJECT_NAME}_homestore" PUBLIC
@@ -42,10 +44,14 @@ settings_gen_cpp(
     ${FLATBUFFERS_FLATC_EXECUTABLE}
     ${CMAKE_CURRENT_BINARY_DIR}/generated/
     "${PROJECT_NAME}_homestore"
-    hs_backend_config.fbs
-    resync_pg_data.fbs
-    resync_shard_data.fbs
-    resync_blob_data.fbs
+    hs_homeobject_fbs/hs_backend_config.fbs
+    hs_homeobject_fbs/resync_pg_data.fbs
+    hs_homeobject_fbs/resync_shard_data.fbs
+    hs_homeobject_fbs/resync_blob_data.fbs
+    
+    hs_homeobject_fbs/scrub_common.fbs
+    hs_homeobject_fbs/scrub_req.fbs
+    hs_homeobject_fbs/scrub_result.fbs
   )
 
 # Unit test objects
@@ -165,3 +171,12 @@ add_test(NAME HomestoreTestGC COMMAND homestore_test_gc -csv error --executor im
         --override_config hs_backend_config.gc_enable_read_verify=true
         --override_config hs_backend_config.gc_garbage_rate_threshold=0 
         --override_config hs_backend_config.gc_scan_interval_sec=5)
+
+add_executable(homestore_test_scrubber)
+target_sources(homestore_test_scrubber PRIVATE $<TARGET_OBJECTS:homestore_tests_scrubber>)
+target_link_libraries(homestore_test_scrubber PUBLIC homeobject_homestore ${COMMON_TEST_DEPS})
+add_test(NAME HomestoreTestScrubber COMMAND homestore_test_scrubber -csv error --executor immediate --config_path ./
+        --override_config hs_backend_config.enable_scrubber=true
+        --override_config nuraft_mesg_config.mesg_factory_config.data_request_deadline_secs:10)
+
+
diff --git a/src/lib/homestore_backend/MPMCPriorityQueue.hpp b/src/lib/homestore_backend/MPMCPriorityQueue.hpp
new file mode 100644
index 000000000..9b0ba02d5
--- /dev/null
+++ b/src/lib/homestore_backend/MPMCPriorityQueue.hpp
@@ -0,0 +1,192 @@
+#pragma once
+
+#include <condition_variable>
+#include <concepts>
+#include <cstddef>
+#include <functional>
+#include <mutex>
+#include <optional>
+#include <queue>
+#include <utility>
+#include <vector>
+
+namespace homeobject {
+
+/**
+ * @brief Multi-Producer Multi-Consumer Priority Queue (C++20)
+ *
+ * Thread-safe priority queue that supports:
+ * - Concurrent push operations from multiple producers
+ * - Concurrent pop operations from multiple consumers
+ * - Blocking pop when queue is empty
+ * - Graceful shutdown via close() method
+ *
+ * @tparam T Element type (must be comparable)
+ * @tparam Compare Comparison function (default: std::less for max-heap)
+ */
+template < typename T, typename Compare = std::less< T > >
+    requires std::movable< T > && std::predicate< Compare, T, T >
+class MPMCPriorityQueue {
+public:
+    using value_type = T;
+    using size_type = std::size_t;
+    using comparator_type = Compare;
+
+    /**
+     * @brief Status codes returned by pop operations
+     */
+    enum class Status : uint8_t {
+        Ok,    ///< Successfully popped an element
+        Closed ///< Queue is closed, no more elements available
+    };
+
+    /**
+     * @brief Result of a pop operation
+     */
+    struct PopResult {
+        Status status;
+        std::optional< T > value; ///< Has value only if status == Ok
+
+        // Convenience methods
+        [[nodiscard]] constexpr bool is_ok() const noexcept { return status == Status::Ok; }
+        [[nodiscard]] constexpr bool is_closed() const noexcept { return status == Status::Closed; }
+    };
+
+    /**
+     * @brief Construct an empty priority queue
+     */
+    constexpr MPMCPriorityQueue() noexcept(std::is_nothrow_default_constructible_v< Compare >) = default;
+
+    /**
+     * @brief Destructor - automatically closes the queue
+     */
+    ~MPMCPriorityQueue() { close(); }
+
+    // Disable copy and move to prevent issues with condition variables
+    MPMCPriorityQueue(const MPMCPriorityQueue&) = delete;
+    MPMCPriorityQueue& operator=(const MPMCPriorityQueue&) = delete;
+    MPMCPriorityQueue(MPMCPriorityQueue&&) = delete;
+    MPMCPriorityQueue& operator=(MPMCPriorityQueue&&) = delete;
+
+    /**
+     * @brief Thread-safe push operation (copy)
+     *
+     * @param value Element to insert
+     * @return true if pushed successfully, false if queue is closed
+     */
+    bool push(const T& value)
+        requires std::copy_constructible< T >
+    {
+        {
+            std::scoped_lock lock(mutex_);
+            if (closed_) [[unlikely]] {
+                return false; // Queue is closed, cannot push
+            }
+            pq_.push(value);
+        }
+        cv_.notify_one(); // Wake one waiting consumer
+        return true;
+    }
+
+    /**
+     * @brief Thread-safe push operation (move)
+     *
+     * @param value Element to insert (will be moved)
+     * @return true if pushed successfully, false if queue is closed
+     */
+    bool push(T&& value) {
+        {
+            std::scoped_lock lock(mutex_);
+            if (closed_) [[unlikely]] { return false; }
+            pq_.push(std::move(value));
+        }
+        cv_.notify_one();
+        return true;
+    }
+
+    /**
+     * @brief Thread-safe pop operation
+     *
+     * Blocks if queue is empty and not closed.
+     * Returns immediately if queue is closed.
+     *
+     * @return PopResult containing status and optional value
+     * @note Thread-safe for multiple concurrent consumers
+     */
+    [[nodiscard]] PopResult pop() {
+        std::unique_lock lock(mutex_);
+
+        // Wait until queue has elements or is closed
+        cv_.wait(lock, [this] { return closed_ || !pq_.empty(); });
+
+        // Try to pop an element
+        if (!pq_.empty()) {
+            T top = std::move(const_cast< T& >(pq_.top()));
+            pq_.pop();
+            return PopResult{.status = Status::Ok, .value = std::move(top)};
+        }
+
+        // Queue is empty and closed
+        return PopResult{.status = Status::Closed, .value = std::nullopt};
+    }
+
+    /**
+     * @brief Close the queue
+     *
+     * After calling close():
+     * - All blocked pop() calls will wake up
+     * - Existing elements can still be popped
+     * - New push() calls will be ignored
+     * - pop() returns Status::Closed when queue becomes empty
+     *
+     * @note Thread-safe and idempotent
+     */
+    void close() noexcept {
+        {
+            std::scoped_lock lock(mutex_);
+            closed_ = true;
+        }
+        cv_.notify_all(); // Wake all waiting consumers
+    }
+
+    /**
+     * @brief Get current number of elements
+     *
+     * @return Number of elements in the queue
+     * @note Thread-safe
+     */
+    [[nodiscard]] size_type size() const {
+        std::scoped_lock lock(mutex_);
+        return pq_.size();
+    }
+
+    /**
+     * @brief Check if queue is empty
+     *
+     * @return true if queue has no elements
+     * @note Thread-safe
+     */
+    [[nodiscard]] bool empty() const {
+        std::scoped_lock lock(mutex_);
+        return pq_.empty();
+    }
+
+    /**
+     * @brief Check if queue is closed
+     *
+     * @return true if close() has been called
+     * @note Thread-safe
+     */
+    [[nodiscard]] bool is_closed() const {
+        std::scoped_lock lock(mutex_);
+        return closed_;
+    }
+
+private:
+    mutable std::mutex mutex_;
+    std::condition_variable cv_;
+    bool closed_{false};
+    std::priority_queue< T, std::vector< T >, Compare > pq_;
+};
+
+} // namespace homeobject
diff --git a/src/lib/homestore_backend/gc_manager.cpp b/src/lib/homestore_backend/gc_manager.cpp
index 8076d92f3..83fcaf1df 100644
--- a/src/lib/homestore_backend/gc_manager.cpp
+++ b/src/lib/homestore_backend/gc_manager.cpp
@@ -25,14 +25,14 @@ SISL_LOGGING_DECL(gcmgr)
 GCManager::GCManager(HSHomeObject* homeobject) :
         m_chunk_selector{homeobject->chunk_selector()}, m_hs_home_object{homeobject} {
     homestore::meta_service().register_handler(
-        _gc_actor_meta_name,
+        gc_actor_meta_name,
         [this](homestore::meta_blk* mblk, sisl::byte_view buf, size_t size) {
             on_gc_actor_meta_blk_found(std::move(buf), voidptr_cast(mblk));
         },
         nullptr, true);
 
     homestore::meta_service().register_handler(
-        _gc_reserved_chunk_meta_name,
+        gc_reserved_chunk_meta_name,
         [this](homestore::meta_blk* mblk, sisl::byte_view buf, size_t size) {
             on_reserved_chunk_meta_blk_found(std::move(buf), voidptr_cast(mblk));
         },
@@ -44,7 +44,7 @@ GCManager::GCManager(HSHomeObject* homeobject) :
         true);
 
     homestore::meta_service().register_handler(
-        _gc_task_meta_name,
+        gc_task_meta_name,
         [this](homestore::meta_blk* mblk, sisl::byte_view buf, size_t size) {
             on_gc_task_meta_blk_found(std::move(buf), voidptr_cast(mblk));
         },
@@ -64,7 +64,7 @@ void GCManager::on_gc_task_meta_blk_found(sisl::byte_view const& buf, void* meta
     // here, we are under the protection of the lock of metaservice. however, we will also try to update pg and shard
     // metablk and then destroy the gc_task_sb, which will also try to acquire the lock of metaservice, as a result, a
     // dead lock will happen. so here we will handle all the gc tasks after read all the metablks
-    m_recovered_gc_tasks.emplace_back(_gc_task_meta_name);
+    m_recovered_gc_tasks.emplace_back(gc_task_meta_name);
     m_recovered_gc_tasks.back().load(buf, meta_cookie);
 }
 
@@ -89,7 +89,7 @@ void GCManager::handle_all_recovered_gc_tasks() {
 }
 
 void GCManager::on_gc_actor_meta_blk_found(sisl::byte_view const& buf, void* meta_cookie) {
-    m_gc_actor_sbs.emplace_back(_gc_actor_meta_name);
+    m_gc_actor_sbs.emplace_back(gc_actor_meta_name);
     auto& gc_actor_sb = m_gc_actor_sbs.back();
     gc_actor_sb.load(buf, meta_cookie);
     auto pdev_id = gc_actor_sb->pdev_id;
@@ -100,7 +100,7 @@ void GCManager::on_gc_actor_meta_blk_found(sisl::byte_view const& buf, void* met
 }
 
 void GCManager::on_reserved_chunk_meta_blk_found(sisl::byte_view const& buf, void* meta_cookie) {
-    homestore::superblk< gc_reserved_chunk_superblk > reserved_chunk_sb(_gc_reserved_chunk_meta_name);
+    homestore::superblk< gc_reserved_chunk_superblk > reserved_chunk_sb(gc_reserved_chunk_meta_name);
     auto chunk_id = reserved_chunk_sb.load(buf, meta_cookie)->chunk_id;
     auto EXVchunk = m_chunk_selector->get_extend_vchunk(chunk_id);
     if (EXVchunk == nullptr) {
@@ -976,7 +976,7 @@ bool GCManager::pdev_gc_actor::copy_valid_data(
 
                             if (err) {
                                 // we will come here if:
-                                //  1 any blob copy fails, then err is operation_canceled
+                                //  1 any blob copy fails, then err is operation_cancelled
                                 //  2 write footer fails， then err is the error code of write footer
                                 GCLOGE(task_id, pg_id, shard_id,
                                        "Failed to copy some blos or failed to write shard footer for move_to_chunk={}, "
@@ -1271,7 +1271,7 @@ void GCManager::pdev_gc_actor::process_gc_task(chunk_id_t move_from_chunk, uint8
 
     // after data copy, we persist the gc task meta blk. now, we can make sure all the valid blobs are successfully
     // copyed and new blob indexes have be written to gc index table before gc task superblk is persisted.
-    homestore::superblk< GCManager::gc_task_superblk > gc_task_sb{GCManager::_gc_task_meta_name};
+    homestore::superblk< GCManager::gc_task_superblk > gc_task_sb{GCManager::gc_task_meta_name};
     gc_task_sb.create(sizeof(GCManager::gc_task_superblk));
     gc_task_sb->move_from_chunk = move_from_chunk;
     gc_task_sb->move_to_chunk = move_to_chunk;
diff --git a/src/lib/homestore_backend/gc_manager.hpp b/src/lib/homestore_backend/gc_manager.hpp
index 7fd2a46be..6a0415023 100644
--- a/src/lib/homestore_backend/gc_manager.hpp
+++ b/src/lib/homestore_backend/gc_manager.hpp
@@ -46,9 +46,9 @@ class GCManager {
     GCManager& operator=(GCManager&&) = delete;
 
 public:
-    inline static auto const _gc_actor_meta_name = std::string("GCActor");
-    inline static auto const _gc_task_meta_name = std::string("GCTask");
-    inline static auto const _gc_reserved_chunk_meta_name = std::string("GCReservedChunk");
+    inline static auto const gc_actor_meta_name = std::string("GCActor");
+    inline static auto const gc_task_meta_name = std::string("GCTask");
+    inline static auto const gc_reserved_chunk_meta_name = std::string("GCReservedChunk");
     inline static atomic_uint64_t _gc_task_id{1}; // 0 is used for crash recovery
 
 #pragma pack(1)
@@ -61,7 +61,7 @@ class GCManager {
         uint64_t failed_egc_task_count{0ull};
         uint64_t total_reclaimed_blk_count_by_gc{0ull};
         uint64_t total_reclaimed_blk_count_by_egc{0ull};
-        static std::string name() { return _gc_actor_meta_name; }
+        static std::string name() { return gc_actor_meta_name; }
     };
 
     struct gc_task_superblk {
@@ -70,12 +70,12 @@ class GCManager {
         chunk_id_t vchunk_id;
         pg_id_t pg_id;
         uint8_t priority;
-        static std::string name() { return _gc_task_meta_name; }
+        static std::string name() { return gc_task_meta_name; }
     };
 
     struct gc_reserved_chunk_superblk {
         chunk_id_t chunk_id;
-        static std::string name() { return _gc_reserved_chunk_meta_name; }
+        static std::string name() { return gc_reserved_chunk_meta_name; }
     };
 #pragma pack()
 
diff --git a/src/lib/homestore_backend/heap_chunk_selector.cpp b/src/lib/homestore_backend/heap_chunk_selector.cpp
index 1068ebb02..ba04d276c 100644
--- a/src/lib/homestore_backend/heap_chunk_selector.cpp
+++ b/src/lib/homestore_backend/heap_chunk_selector.cpp
@@ -381,8 +381,6 @@ void HeapChunkSelector::switch_chunks_for_pg(const pg_id_t pg_id, const chunk_nu
     std::unique_lock lk(pg_chunk_collection->mtx);
     auto& pg_chunks = pg_chunk_collection->m_pg_chunks;
 
-    // LOGDEBUGMOD(homeobject, "gc: before switch chunks for pg_id={}, pg_chunks={}", pg_chunks);
-
     if (sisl_unlikely(pg_chunks[v_chunk_id]->get_chunk_id() == new_chunk_id)) {
         // this might happens when crash recovery. the crash happens after pg metablk is updated but before gc task
         // metablk is destroyed.
diff --git a/src/lib/homestore_backend/hs_blob_manager.cpp b/src/lib/homestore_backend/hs_blob_manager.cpp
index c89c32ec5..1c01c1125 100644
--- a/src/lib/homestore_backend/hs_blob_manager.cpp
+++ b/src/lib/homestore_backend/hs_blob_manager.cpp
@@ -88,7 +88,7 @@ BlobManager::AsyncResult< blob_id_t > HSHomeObject::_put_blob(ShardInfo const& s
         return folly::makeUnexpected(BlobErrorCode::SHUTTING_DOWN);
     }
     incr_pending_request_num();
-        // check user key size
+    // check user key size
     if (blob.user_key.size() > BlobHeader::max_user_key_length) {
         BLOGE(tid, shard.id, 0, "input user key length > max_user_key_length {}", blob.user_key.size(),
               BlobHeader::max_user_key_length);
@@ -167,8 +167,7 @@ BlobManager::AsyncResult< blob_id_t > HSHomeObject::_put_blob(ShardInfo const& s
 
     // Set offset of actual data after the blob header and user key (rounded off)
     req->blob_header()->data_offset = req->blob_header_buf().size();
-    RELEASE_ASSERT(req->blob_header()->data_offset == _data_block_size,
-                       "blob header should equals _data_block_size");
+    RELEASE_ASSERT(req->blob_header()->data_offset == _data_block_size, "blob header should equals _data_block_size");
     // In case blob body is not aligned, create a new aligned buffer and copy the blob body.
     if (((r_cast< uintptr_t >(blob.body.cbytes()) % io_align) != 0) || ((blob_size % io_align) != 0)) {
         // If address or size is not aligned, create a separate aligned buffer and do expensive memcpy.
@@ -248,6 +247,13 @@ bool HSHomeObject::local_add_blob_info(pg_id_t const pg_id, BlobInfo const& blob
     } else {
         BLOGT(tid, blob_info.shard_id, blob_info.blob_id, "blob already exists in index table, skip it.");
     }
+
+    hs_pg->last_committed_blob_id.store(blob_info.blob_id);
+
+    // local_add_blob_info will also be called if br happens, in this case, last_committed_blob_id will be finally
+    // updated to the correct value after br is done, so we don't need to worry about the case where
+    // last_committed_blob_id is updated to a smaller value than the current last_committed_blob_id
+
     return true;
 }
 
@@ -367,9 +373,7 @@ BlobManager::AsyncResult< Blob > HSHomeObject::_get_blob_data(const shared< home
             }
 
             auto verify_result = do_verify_blob(read_buf.cbytes(), shard_id, 0 /* no blob_id check */);
-            if (!verify_result.hasValue()) {
-                return folly::makeUnexpected(verify_result.error());
-            }
+            if (!verify_result.hasValue()) { return folly::makeUnexpected(verify_result.error()); }
             std::string user_key = std::move(verify_result.value());
 
             BlobHeader const* header = r_cast< BlobHeader const* >(read_buf.cbytes());
diff --git a/src/lib/homestore_backend/hs_homeobject.cpp b/src/lib/homestore_backend/hs_homeobject.cpp
index ef84a4c27..b030815cd 100644
--- a/src/lib/homestore_backend/hs_homeobject.cpp
+++ b/src/lib/homestore_backend/hs_homeobject.cpp
@@ -259,6 +259,14 @@ void HSHomeObject::init_homestore() {
     } else {
         LOGI("GC is disabled");
     }
+
+    // start scrubber
+    if (HS_BACKEND_DYNAMIC_CONFIG(enable_scrubber)) {
+        LOGI("Starting scrub manager");
+        scrub_mgr_->start();
+    } else {
+        LOGI("scrub manager is disabled");
+    }
 }
 
 void HSHomeObject::on_replica_restart() {
@@ -309,7 +317,6 @@ void HSHomeObject::on_replica_restart() {
 
         // gc_manager will be created only once here. we need make sure gc manager is created after all the pg meta blk
         // are replayed since we build pdev chunk heap in the constructor of gc manager , which depends on the pg meta.
-
         // gc metablk handlers are registered in the constructor of gc manager
         gc_mgr_ = std::make_shared< GCManager >(this);
 
@@ -326,7 +333,7 @@ void HSHomeObject::on_replica_restart() {
                 gc_index_table_map.emplace(boost::uuids::to_string(uuid), gc_index_table);
 
                 // 2 create gc actor superblk for each pdev, which contains the pdev_id and index table uuid.
-                homestore::superblk< GCManager::gc_actor_superblk > gc_actor_sb{GCManager::_gc_actor_meta_name};
+                homestore::superblk< GCManager::gc_actor_superblk > gc_actor_sb{GCManager::gc_actor_meta_name};
                 gc_actor_sb.create(sizeof(GCManager::gc_actor_superblk));
                 gc_actor_sb->pdev_id = pdev_id;
                 gc_actor_sb->index_table_uuid = uuid;
@@ -340,7 +347,7 @@ void HSHomeObject::on_replica_restart() {
                 for (size_t i = 0; i < reserved_chunk_num_per_pdev; ++i) {
                     auto chunk = chunks[i];
                     homestore::superblk< GCManager::gc_reserved_chunk_superblk > reserved_chunk_sb{
-                        GCManager::_gc_reserved_chunk_meta_name};
+                        GCManager::gc_reserved_chunk_meta_name};
                     reserved_chunk_sb.create(sizeof(GCManager::gc_reserved_chunk_superblk));
                     reserved_chunk_sb->chunk_id = chunk;
                     reserved_chunk_sb.write();
@@ -356,9 +363,9 @@ void HSHomeObject::on_replica_restart() {
 
         // when initializing, there is not gc task. we need to recover reserved chunks here, so that the reserved chunks
         // will not be put into pdev heap when built
-        homestore::meta_service().read_sub_sb(GCManager::_gc_actor_meta_name);
-        homestore::meta_service().read_sub_sb(GCManager::_gc_reserved_chunk_meta_name);
-        homestore::meta_service().read_sub_sb(GCManager::_gc_task_meta_name);
+        homestore::meta_service().read_sub_sb(GCManager::gc_actor_meta_name);
+        homestore::meta_service().read_sub_sb(GCManager::gc_reserved_chunk_meta_name);
+        homestore::meta_service().read_sub_sb(GCManager::gc_task_meta_name);
 
         // At this point, log replay has not started yet. We must process all recovered GC tasks before replay begins.
         // After log replay completes, ReplicationStateMachine::on_log_replay_done() calls select_specific_chunk() for
@@ -377,6 +384,9 @@ void HSHomeObject::on_replica_restart() {
 
         gc_mgr_->handle_all_recovered_gc_tasks();
     });
+
+    // initialize scrub manager
+    scrub_mgr_ = std::make_shared< ScrubManager >(this);
 }
 
 #if 0
@@ -446,16 +456,20 @@ void HSHomeObject::shutdown() {
         LOGI("waiting for {} pending requests to complete", pending_reqs);
         std::this_thread::sleep_for(std::chrono::milliseconds(1000));
     };
-    LOGI("start stopping GC");
+    LOGI("stopping GC");
     // we need stop gc before shutting down homestore(where metaservice is shutdown), because gc mgr needs metaservice
     // to persist gc task metablk if there is any ongoing gc task. after stopping gc manager, there is no gc task
     // anymore, and thus now new gc task will be written to metaservice during homestore shutdown.
-    gc_mgr_->stop();
+    if (gc_mgr_) gc_mgr_->stop();
+
+    LOGI("stopping scrubbing");
+    if (scrub_mgr_) scrub_mgr_->stop();
 
     LOGI("start shutting down HomeStore");
     homestore::HomeStore::instance()->shutdown();
     homestore::HomeStore::reset_instance();
     gc_mgr_.reset();
+    scrub_mgr_.reset();
     iomanager.stop();
     LOGI("complete shutting down HomeStore");
 }
diff --git a/src/lib/homestore_backend/hs_homeobject.hpp b/src/lib/homestore_backend/hs_homeobject.hpp
index d4a1d25f4..3fced7559 100644
--- a/src/lib/homestore_backend/hs_homeobject.hpp
+++ b/src/lib/homestore_backend/hs_homeobject.hpp
@@ -14,11 +14,17 @@
 #include "homeobject/common.hpp"
 #include "index_kv.hpp"
 #include "gc_manager.hpp"
+#include "scrub_manager.hpp"
 #include "hs_backend_config.hpp"
 #include "generated/resync_pg_data_generated.h"
 #include "generated/resync_shard_data_generated.h"
 #include "generated/resync_blob_data_generated.h"
 
+// scrubber fbs headers.
+#include "generated/scrub_common_generated.h"
+#include "generated/scrub_req_generated.h"
+#include "generated/scrub_result_generated.h"
+
 namespace homestore {
 struct meta_blk;
 class IndexTableBase;
@@ -364,7 +370,10 @@ class HSHomeObject : public HomeObjectImpl {
         shared< homestore::ReplDev > repl_dev_;
         std::shared_ptr< BlobIndexTable > index_table_;
         PGMetrics metrics_;
+        HSHomeObject& home_obj_;
         mutable pg_state pg_state_{0};
+        mutable std::atomic_bool in_scrubbing{false};
+        mutable std::atomic_uint64_t last_committed_blob_id{0};
 
         // Snapshot receiver progress info, used as a checkpoint for recovery
         // Placed within HS_PG since HomeObject is unable to locate the ReplicationStateMachine
@@ -372,8 +381,8 @@ class HSHomeObject : public HomeObjectImpl {
         mutable homestore::superblk< snapshot_rcvr_shard_list_superblk > snp_rcvr_shard_list_sb_;
 
         HS_PG(PGInfo info, shared< homestore::ReplDev > rdev, shared< BlobIndexTable > index_table,
-              std::shared_ptr< const std::vector< homestore::chunk_num_t > > pg_chunk_ids);
-        HS_PG(homestore::superblk< pg_info_superblk >&& sb, shared< homestore::ReplDev > rdev);
+              std::shared_ptr< const std::vector< homestore::chunk_num_t > > pg_chunk_ids, HSHomeObject& home_obj);
+        HS_PG(homestore::superblk< pg_info_superblk >&& sb, shared< homestore::ReplDev > rdev, HSHomeObject& home_obj);
         ~HS_PG() override = default;
 
         static PGInfo pg_info_from_sb(homestore::superblk< pg_info_superblk > const& sb);
@@ -396,6 +405,13 @@ class HSHomeObject : public HomeObjectImpl {
          */
         uint32_t get_snp_progress() const;
 
+        /**
+         * Returns the blob_id of the last committed put_blob.
+         */
+        blob_id_t get_last_committed_blob_id() const;
+
+        pg_id_t pg_id() const { return pg_sb_->id; }
+
         /**
          * Returns all replication info of all peers.
          */
@@ -416,6 +432,19 @@ class HSHomeObject : public HomeObjectImpl {
          * Update membership in pg's superblock.
          */
         void update_membership(const MemberSet& members);
+
+        /*
+         * RPC handlers for scrub:
+         * 1. on_scrub_req_received: receive the scrub req from leader
+         * 2. on_scrub_result_received: receive the scrub map from followers
+         */
+        void on_scrub_req_received(boost::intrusive_ptr< sisl::GenericRpcData >& rpc_data);
+        void on_scrub_result_received(boost::intrusive_ptr< sisl::GenericRpcData >& rpc_data);
+
+        /**
+         * Register data RPC handlers for this PG
+         */
+        void register_data_rpc_handlers();
     };
 
     struct HS_Shard : public Shard {
@@ -537,6 +566,11 @@ class HSHomeObject : public HomeObjectImpl {
     inline const static homestore::MultiBlkId tombstone_pbas{0, 0, 0};
     inline const static std::string delete_marker_blob_data{"HOMEOBJECT_BLOB_DELETE_MARKER"};
 
+    // ask followers to scrub
+    inline const static std::string PUSH_SCRUB_REQ{"PUSH_SCRUB_REQ"};
+    // return scrub map to leader
+    inline const static std::string PUSH_SCRUB_RESULT{"PUSH_SCRUB_RESULT"};
+
     class PGBlobIterator {
     public:
         struct blob_read_result {
@@ -732,6 +766,7 @@ class HSHomeObject : public HomeObjectImpl {
     mutable std::shared_mutex snp_sbs_lock_;
     shared< HeapChunkSelector > chunk_selector_;
     shared< GCManager > gc_mgr_;
+    shared< ScrubManager > scrub_mgr_;
     unique< HttpManager > http_mgr_;
 
     static constexpr size_t max_zpad_bufs = _data_block_size / io_align;
@@ -986,6 +1021,7 @@ class HSHomeObject : public HomeObjectImpl {
 
     cshared< HeapChunkSelector > chunk_selector() const { return chunk_selector_; }
     cshared< GCManager > gc_manager() const { return gc_mgr_; }
+    cshared< ScrubManager > scrub_manager() const { return scrub_mgr_; }
 
     /**
      * @brief Reconciles the leaders for all PGs or a specific PG identified by pg_id.
@@ -1057,6 +1093,7 @@ class HSHomeObject : public HomeObjectImpl {
 
     // Refresh PG statistics (called after log replay)
     void refresh_pg_statistics(pg_id_t pg_id);
+    shard_id_t get_last_shard_id_in_pg(pg_id_t pg_id) const;
 
 private:
     BlobManager::Result< std::string > do_verify_blob(const void* blob, shard_id_t expected_shard_id,
diff --git a/src/lib/homestore_backend/hs_backend_config.fbs b/src/lib/homestore_backend/hs_homeobject_fbs/hs_backend_config.fbs
similarity index 93%
rename from src/lib/homestore_backend/hs_backend_config.fbs
rename to src/lib/homestore_backend/hs_homeobject_fbs/hs_backend_config.fbs
index bd6991db9..983d19208 100644
--- a/src/lib/homestore_backend/hs_backend_config.fbs
+++ b/src/lib/homestore_backend/hs_homeobject_fbs/hs_backend_config.fbs
@@ -23,6 +23,10 @@ table HSBackendSettings {
     //TODO: make this hotswap after gc is well tested
     enable_gc: bool = true;
 
+    //Enable scrubber
+    //TODO: make this hotswap after scrubber is well tested
+    enable_scrubber: bool = false;
+
     //Total reserved chunk num (dedicated for gc/egc) per pdev 
     reserved_chunk_num_per_pdev: uint8 = 6;
 
diff --git a/src/lib/homestore_backend/resync_blob_data.fbs b/src/lib/homestore_backend/hs_homeobject_fbs/resync_blob_data.fbs
similarity index 100%
rename from src/lib/homestore_backend/resync_blob_data.fbs
rename to src/lib/homestore_backend/hs_homeobject_fbs/resync_blob_data.fbs
diff --git a/src/lib/homestore_backend/resync_pg_data.fbs b/src/lib/homestore_backend/hs_homeobject_fbs/resync_pg_data.fbs
similarity index 100%
rename from src/lib/homestore_backend/resync_pg_data.fbs
rename to src/lib/homestore_backend/hs_homeobject_fbs/resync_pg_data.fbs
diff --git a/src/lib/homestore_backend/resync_shard_data.fbs b/src/lib/homestore_backend/hs_homeobject_fbs/resync_shard_data.fbs
similarity index 100%
rename from src/lib/homestore_backend/resync_shard_data.fbs
rename to src/lib/homestore_backend/hs_homeobject_fbs/resync_shard_data.fbs
diff --git a/src/lib/homestore_backend/hs_homeobject_fbs/scrub_common.fbs b/src/lib/homestore_backend/hs_homeobject_fbs/scrub_common.fbs
new file mode 100644
index 000000000..c08fdab0e
--- /dev/null
+++ b/src/lib/homestore_backend/hs_homeobject_fbs/scrub_common.fbs
@@ -0,0 +1,25 @@
+native_include "sisl/utility/non_null_ptr.hpp";
+
+namespace homeobject;
+
+enum ScrubStatus : uint8 {
+  NONE = 0,
+  IO_ERROR = 1,
+  MISMATCH = 2,
+  NOT_FOUND = 3
+}
+
+enum ScrubType : uint8 {
+  META = 0,
+  SHALLOW_BLOB = 1,
+  DEEP_BLOB = 2,
+  CHECK_BLOB_EXISTENCE = 3,
+  CHECK_SHARD_EXISTENCE = 4
+}
+
+table ScrubResultEntry {
+  shard_id:     uint64;
+  blob_id:      uint64;
+  scrub_result: ScrubStatus;  
+  hash:         uint64;
+}
\ No newline at end of file
diff --git a/src/lib/homestore_backend/hs_homeobject_fbs/scrub_req.fbs b/src/lib/homestore_backend/hs_homeobject_fbs/scrub_req.fbs
new file mode 100644
index 000000000..23e8d5452
--- /dev/null
+++ b/src/lib/homestore_backend/hs_homeobject_fbs/scrub_req.fbs
@@ -0,0 +1,17 @@
+include "scrub_common.fbs";
+
+namespace homeobject;
+
+table ScrubReq {
+  pg_id:          uint16;
+  req_id:         uint64;
+  scrub_lsn:      int64;
+  start_shard_id: uint64;
+  start_blob_id:  uint64;
+  end_shard_id:   uint64;
+  end_blob_id:    uint64;
+  issuer_uuid:    [ubyte];
+  scrub_type:     ScrubType;  
+}
+
+root_type ScrubReq;
\ No newline at end of file
diff --git a/src/lib/homestore_backend/hs_homeobject_fbs/scrub_result.fbs b/src/lib/homestore_backend/hs_homeobject_fbs/scrub_result.fbs
new file mode 100644
index 000000000..f342344cc
--- /dev/null
+++ b/src/lib/homestore_backend/hs_homeobject_fbs/scrub_result.fbs
@@ -0,0 +1,11 @@
+include "scrub_common.fbs";
+
+namespace homeobject;
+
+table ScrubResult {
+  req_id:        uint64;
+  issuer_uuid:   [ubyte];
+  scrub_results: [ScrubResultEntry];
+}
+
+root_type ScrubResult;
\ No newline at end of file
diff --git a/src/lib/homestore_backend/hs_http_manager.cpp b/src/lib/homestore_backend/hs_http_manager.cpp
index b77718bad..8ae007603 100644
--- a/src/lib/homestore_backend/hs_http_manager.cpp
+++ b/src/lib/homestore_backend/hs_http_manager.cpp
@@ -17,12 +17,29 @@
 #include <sisl/version.hpp>
 #include <sisl/settings/settings.hpp>
 #include <boost/uuid/string_generator.hpp>
+#include <boost/uuid/uuid_io.hpp>
+#include <ctime>
+#include <limits>
+#include <string>
 
 #include "hs_http_manager.hpp"
 #include "hs_homeobject.hpp"
 
 namespace homeobject {
 
+namespace {
+// Helper function to format time as ISO 8601
+std::string format_iso8601_time(const std::chrono::system_clock::time_point& tp) {
+    auto time_t = std::chrono::system_clock::to_time_t(tp);
+    std::tm tm;
+    gmtime_r(&time_t, &tm); // Thread-safe version
+    char buf[32];
+    std::strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%SZ", &tm);
+    return std::string(buf);
+}
+
+} // anonymous namespace
+
 HttpManager::HttpManager(HSHomeObject& ho) : ho_(ho) {
     using namespace Pistache;
     using namespace Pistache::Rest;
@@ -74,7 +91,13 @@ HttpManager::HttpManager(HSHomeObject& ho) : ho_(ho) {
         {Pistache::Http::Method::Post, "/api/v1/trigger_gc",
          Pistache::Rest::Routes::bind(&HttpManager::trigger_gc, this)},
         {Pistache::Http::Method::Get, "/api/v1/gc_job_status",
-         Pistache::Rest::Routes::bind(&HttpManager::get_gc_job_status, this)}};
+         Pistache::Rest::Routes::bind(&HttpManager::get_gc_job_status, this)},
+        {Pistache::Http::Method::Post, "/api/v1/trigger_pg_scrub",
+         Pistache::Rest::Routes::bind(&HttpManager::trigger_pg_scrub, this)},
+        {Pistache::Http::Method::Get, "/api/v1/scrub_job_status",
+         Pistache::Rest::Routes::bind(&HttpManager::get_scrub_job_status, this)},
+        {Pistache::Http::Method::Post, "/api/v1/cancel_scrub_job",
+         Pistache::Rest::Routes::bind(&HttpManager::cancel_scrub_job, this)}};
 
     auto http_server = ioenvironment.get_http_server();
     if (!http_server) {
@@ -486,6 +509,196 @@ void HttpManager::exit_pg(const Pistache::Rest::Request& request, Pistache::Http
     response.send(Pistache::Http::Code::Ok, "Exit pg request submitted");
 }
 
+void HttpManager::trigger_pg_scrub(const Pistache::Rest::Request& request, Pistache::Http::ResponseWriter response) {
+    auto scrub_mgr = ho_.scrub_manager();
+    if (!scrub_mgr) {
+        response.send(Pistache::Http::Code::Internal_Server_Error, "Scrub manager not available");
+        return;
+    }
+
+    // Get query parameters
+    const auto pg_id_param = request.query().get("pg_id");
+    const auto is_deep_param = request.query().get("deep");
+
+    // Validate pg_id parameter (required)
+    if (!pg_id_param || pg_id_param.value().empty()) {
+        nlohmann::json error;
+        error["error"] = "Missing required parameter: pg_id";
+        error["usage"] = "POST /api/v1/trigger_pg_scrub?pg_id=<id>&deep=<true|false>";
+        response.send(Pistache::Http::Code::Bad_Request, error.dump());
+        return;
+    }
+
+    uint16_t pg_id;
+    try {
+        auto val = std::stoul(pg_id_param.value());
+        if (val > std::numeric_limits< uint16_t >::max()) {
+            nlohmann::json error;
+            error["error"] = "pg_id out of range";
+            error["pg_id"] = pg_id_param.value();
+            response.send(Pistache::Http::Code::Bad_Request, error.dump());
+            return;
+        }
+        pg_id = static_cast< uint16_t >(val);
+    } catch (const std::invalid_argument& e) {
+        nlohmann::json error;
+        error["error"] = "Invalid pg_id format: not a number";
+        error["pg_id"] = pg_id_param.value();
+        response.send(Pistache::Http::Code::Bad_Request, error.dump());
+        return;
+    } catch (const std::out_of_range& e) {
+        nlohmann::json error;
+        error["error"] = "pg_id out of range";
+        error["pg_id"] = pg_id_param.value();
+        response.send(Pistache::Http::Code::Bad_Request, error.dump());
+        return;
+    }
+
+    // Parse optional parameters
+    bool is_deep = false;
+    if (is_deep_param && !is_deep_param.value().empty()) {
+        const auto& value = is_deep_param.value();
+        is_deep = (value == "true" || value == "1" || value == "yes");
+    }
+
+    LOGINFO("Received trigger_pg_scrub request for pg_id={}, deep={}", pg_id, is_deep);
+
+    // Verify PG exists
+    auto hs_pg = ho_.get_hs_pg(pg_id);
+    if (!hs_pg) {
+        nlohmann::json error;
+        error["error"] = "PG not found";
+        error["pg_id"] = pg_id;
+        response.send(Pistache::Http::Code::Not_Found, error.dump());
+        return;
+    }
+
+    // Generate job ID and create job info
+    const auto job_id = generate_job_id();
+    auto job_info = std::make_shared< ScrubJobInfo >(job_id, pg_id, is_deep);
+
+    {
+        std::lock_guard< std::shared_mutex > lock(scrub_job_mutex_);
+        scrub_jobs_map_.set(job_id, job_info);
+    }
+
+    // Prepare immediate response
+    nlohmann::json result;
+    result["job_id"] = job_id;
+    result["pg_id"] = pg_id;
+    result["scrub_type"] = is_deep ? "deep" : "shallow";
+    result["message"] = "Scrub task submitted, query status using /api/v1/scrub_job_status?job_id=" + job_id;
+
+    // Return immediately with HTTP 202 Accepted
+    response.send(Pistache::Http::Code::Accepted, result.dump());
+
+    // Submit scrub task (MANUALLY trigger type) - runs asynchronously
+    scrub_mgr->submit_scrub_task(pg_id, is_deep, SCRUB_TRIGGER_TYPE::MANUALLY)
+        .via(&folly::InlineExecutor::instance())
+        .thenValue([job_info, is_deep](std::shared_ptr< ScrubManager::ShallowScrubReport > report) {
+            if (!report) {
+                job_info->try_complete(ScrubJobStatus::FAILED, "Scrub task failed or was cancelled");
+                return;
+            }
+
+            // Build report summary
+            nlohmann::json report_summary;
+            report_summary["pg_id"] = report->get_pg_id();
+
+            // Add missing shards info
+            const auto& missing_shards = report->get_missing_shard_ids();
+            if (!missing_shards.empty()) {
+                nlohmann::json missing_shards_json;
+                for (const auto& [shard_id, peer_ids] : missing_shards) {
+                    nlohmann::json peer_list = nlohmann::json::array();
+                    for (const auto& peer_id : peer_ids) {
+                        peer_list.push_back(boost::uuids::to_string(peer_id));
+                    }
+                    missing_shards_json[std::to_string(shard_id)] = peer_list;
+                }
+                report_summary["missing_shards"] = missing_shards_json;
+            }
+
+            // Add missing blobs info
+            const auto& missing_blobs = report->get_missing_blobs();
+            if (!missing_blobs.empty()) {
+                nlohmann::json missing_blobs_json;
+                for (const auto& [blob_route, peer_ids] : missing_blobs) {
+                    nlohmann::json peer_list = nlohmann::json::array();
+                    for (const auto& peer_id : peer_ids) {
+                        peer_list.push_back(boost::uuids::to_string(peer_id));
+                    }
+                    missing_blobs_json[fmt::format("{}", blob_route)] = peer_list;
+                }
+                report_summary["missing_blobs"] = missing_blobs_json;
+            }
+
+            // If it's a deep scrub report, add additional info
+            if (is_deep) {
+                auto deep_report = std::dynamic_pointer_cast< ScrubManager::DeepScrubReport >(report);
+                if (deep_report) {
+                    // Add corrupted blobs info
+                    const auto& corrupted_blobs = deep_report->get_corrupted_blobs();
+                    if (!corrupted_blobs.empty()) {
+                        nlohmann::json corrupted_blobs_json;
+                        for (const auto& [peer_id, blob_map] : corrupted_blobs) {
+                            nlohmann::json blob_status_json;
+                            for (const auto& [blob_route, status] : blob_map) {
+                                blob_status_json[fmt::format("{}", blob_route)] = scrub_result_to_string(status);
+                            }
+                            corrupted_blobs_json[boost::uuids::to_string(peer_id)] = blob_status_json;
+                        }
+                        report_summary["corrupted_blobs"] = corrupted_blobs_json;
+                    }
+
+                    // Add inconsistent blobs info
+                    const auto& inconsistent_blobs = deep_report->get_inconsistent_blobs();
+                    if (!inconsistent_blobs.empty()) {
+                        nlohmann::json inconsistent_blobs_json;
+                        for (const auto& [blob_route, peer_hash_map] : inconsistent_blobs) {
+                            nlohmann::json peer_hash_json;
+                            for (const auto& [peer_id, hash] : peer_hash_map) {
+                                peer_hash_json[boost::uuids::to_string(peer_id)] = fmt::format("{:016x}", hash);
+                            }
+                            inconsistent_blobs_json[fmt::format("{}", blob_route)] = peer_hash_json;
+                        }
+                        report_summary["inconsistent_blobs"] = inconsistent_blobs_json;
+                    }
+
+                    // Add corrupted shards info
+                    const auto& corrupted_shards = deep_report->get_corrupted_shards();
+                    if (!corrupted_shards.empty()) {
+                        nlohmann::json corrupted_shards_json;
+                        for (const auto& [peer_id, shard_map] : corrupted_shards) {
+                            nlohmann::json shard_status_json;
+                            for (const auto& [shard_id, status] : shard_map) {
+                                shard_status_json[std::to_string(shard_id)] = scrub_result_to_string(status);
+                            }
+                            corrupted_shards_json[boost::uuids::to_string(peer_id)] = shard_status_json;
+                        }
+                        report_summary["corrupted_shards"] = corrupted_shards_json;
+                    }
+
+                    // Add corrupted PG meta info
+                    const auto& corrupted_pg_metas = deep_report->get_corrupted_pg_metas();
+                    if (!corrupted_pg_metas.empty()) {
+                        nlohmann::json corrupted_pg_metas_json;
+                        for (const auto& [peer_id, status] : corrupted_pg_metas) {
+                            corrupted_pg_metas_json[boost::uuids::to_string(peer_id)] = scrub_result_to_string(status);
+                        }
+                        report_summary["corrupted_pg_metas"] = corrupted_pg_metas_json;
+                    }
+                }
+            }
+
+            // Complete the job with success status and report
+            job_info->try_complete(ScrubJobStatus::COMPLETED, "", report_summary);
+        })
+        .thenError([job_info](const folly::exception_wrapper& ew) {
+            job_info->try_complete(ScrubJobStatus::FAILED, ew.what().c_str());
+        });
+}
+
 void HttpManager::trigger_gc(const Pistache::Rest::Request& request, Pistache::Http::ResponseWriter response) {
     auto gc_mgr = ho_.gc_manager();
     if (!gc_mgr) {
@@ -651,7 +864,7 @@ void HttpManager::trigger_gc(const Pistache::Rest::Request& request, Pistache::H
 
 std::string HttpManager::generate_job_id() {
     auto counter = job_counter_.fetch_add(1, std::memory_order_relaxed);
-    return fmt::format("trigger-gc-task-{}", counter);
+    return fmt::format("job-{}", counter);
 }
 
 void HttpManager::get_job_status(const std::string& job_id, nlohmann::json& result) {
@@ -783,6 +996,189 @@ folly::Future< folly::Unit > HttpManager::trigger_gc_for_pg(uint16_t pg_id, cons
         });
 }
 
+void HttpManager::get_scrub_job_status(const Pistache::Rest::Request& request,
+                                       Pistache::Http::ResponseWriter response) {
+    auto job_id_param = request.query().get("job_id");
+
+    if (job_id_param && !job_id_param.value().empty()) {
+        // Query specific job
+        const auto job_id = job_id_param.value();
+        LOGINFO("Query scrub job {} status", job_id);
+
+        std::shared_ptr< ScrubJobInfo > job_info;
+        {
+            std::shared_lock lock(scrub_job_mutex_);
+            job_info = scrub_jobs_map_.get(job_id);
+        }
+
+        if (!job_info) {
+            nlohmann::json error;
+            error["error"] = "Job not found";
+            error["job_id"] = job_id;
+            response.send(Pistache::Http::Code::Not_Found, error.dump());
+            return;
+        }
+
+        nlohmann::json result = build_scrub_job_json(job_info);
+        response.send(Pistache::Http::Code::Ok, result.dump());
+        return;
+    }
+
+    // Query all jobs
+    LOGINFO("Query all scrub job status");
+    nlohmann::json result;
+    std::vector< std::shared_ptr< ScrubJobInfo > > all_jobs;
+
+    {
+        std::shared_lock lock(scrub_job_mutex_);
+        for (const auto& [k, v] : scrub_jobs_map_) {
+            all_jobs.push_back(v);
+        }
+    }
+
+    for (const auto& job_info : all_jobs) {
+        result["jobs"].push_back(build_scrub_job_json(job_info));
+    }
+
+    response.send(Pistache::Http::Code::Ok, result.dump());
+}
+
+nlohmann::json HttpManager::build_scrub_job_json(const std::shared_ptr< ScrubJobInfo >& job_info) {
+    nlohmann::json result;
+
+    // Helper to convert status enum to string
+    auto status_to_string = [](ScrubJobStatus status) -> std::string {
+        switch (status) {
+        case ScrubJobStatus::RUNNING:
+            return "running";
+        case ScrubJobStatus::COMPLETED:
+            return "completed";
+        case ScrubJobStatus::FAILED:
+            return "failed";
+        case ScrubJobStatus::CANCELLED:
+            return "cancelled";
+        default:
+            return "unknown";
+        }
+    };
+
+    // Thread-unsafe fields (read-only after construction)
+    result["job_id"] = job_info->job_id;
+    result["pg_id"] = job_info->pg_id;
+    result["scrub_type"] = job_info->is_deep ? "deep" : "shallow";
+
+    // Thread-safe fields (protected by mutex)
+    {
+        std::lock_guard< std::mutex > lock(job_info->mtx_);
+
+        // Status
+        result["status"] = status_to_string(job_info->status);
+
+        // Timestamps - convert to ISO 8601 format (no newline)
+        result["start_time"] = format_iso8601_time(job_info->start_time);
+
+        if (job_info->status != ScrubJobStatus::RUNNING) {
+            result["end_time"] = format_iso8601_time(job_info->end_time);
+
+            auto duration =
+                std::chrono::duration_cast< std::chrono::seconds >(job_info->end_time - job_info->start_time);
+            result["duration_seconds"] = duration.count();
+        }
+
+        // Error message (if any)
+        if (!job_info->error_message.empty()) { result["error_message"] = job_info->error_message; }
+
+        // Report summary (if completed)
+        if (job_info->status == ScrubJobStatus::COMPLETED && !job_info->report_summary.empty()) {
+            result["report"] = job_info->report_summary;
+        }
+    }
+
+    return result;
+}
+
+void HttpManager::cancel_scrub_job(const Pistache::Rest::Request& request, Pistache::Http::ResponseWriter response) {
+    auto job_id_param = request.query().get("job_id");
+
+    if (!job_id_param || job_id_param.value().empty()) {
+        nlohmann::json error;
+        error["error"] = "Missing required parameter: job_id";
+        error["usage"] = "POST /api/v1/cancel_scrub_job?job_id=<id>";
+        response.send(Pistache::Http::Code::Bad_Request, error.dump());
+        return;
+    }
+
+    const auto job_id = job_id_param.value();
+    LOGINFO("Cancel scrub job {}", job_id);
+
+    std::shared_ptr< ScrubJobInfo > job_info;
+    {
+        std::shared_lock lock(scrub_job_mutex_);
+        job_info = scrub_jobs_map_.get(job_id);
+    }
+
+    if (!job_info) {
+        nlohmann::json error;
+        error["error"] = "Job not found";
+        error["job_id"] = job_id;
+        response.send(Pistache::Http::Code::Not_Found, error.dump());
+        return;
+    }
+
+    // Check if job is still running (thread-safe)
+    bool can_cancel = false;
+    std::string current_status_str;
+    {
+        std::lock_guard< std::mutex > lock(job_info->mtx_);
+        can_cancel = (job_info->status == ScrubJobStatus::RUNNING);
+        if (!can_cancel) {
+            // Get status string for error message
+            switch (job_info->status) {
+            case ScrubJobStatus::COMPLETED:
+                current_status_str = "completed";
+                break;
+            case ScrubJobStatus::FAILED:
+                current_status_str = "failed";
+                break;
+            case ScrubJobStatus::CANCELLED:
+                current_status_str = "cancelled";
+                break;
+            default:
+                current_status_str = "unknown";
+            }
+        }
+    }
+
+    if (!can_cancel) {
+        nlohmann::json result;
+        result["job_id"] = job_id;
+        result["message"] = "Job is not running, cannot cancel";
+        result["current_status"] = current_status_str;
+        response.send(Pistache::Http::Code::Bad_Request, result.dump());
+        return;
+    }
+
+    // Cancel the scrub task
+    auto scrub_mgr = ho_.scrub_manager();
+    if (!scrub_mgr) {
+        nlohmann::json error;
+        error["error"] = "Scrub manager not available";
+        response.send(Pistache::Http::Code::Internal_Server_Error, error.dump());
+        return;
+    }
+
+    // Cancel in scrub manager first (this will stop ongoing work)
+    scrub_mgr->cancel_scrub_task(job_info->pg_id);
+
+    // Update job status (thread-safe)
+    job_info->cancel();
+
+    nlohmann::json result;
+    result["job_id"] = job_id;
+    result["message"] = "Scrub job cancelled successfully";
+    response.send(Pistache::Http::Code::Ok, result.dump());
+}
+
 #ifdef _PRERELEASE
 void HttpManager::crash_system(const Pistache::Rest::Request& request, Pistache::Http::ResponseWriter response) {
     std::string crash_type;
diff --git a/src/lib/homestore_backend/hs_http_manager.hpp b/src/lib/homestore_backend/hs_http_manager.hpp
index 016c537cc..02a24dcc4 100644
--- a/src/lib/homestore_backend/hs_http_manager.hpp
+++ b/src/lib/homestore_backend/hs_http_manager.hpp
@@ -50,6 +50,9 @@ class HttpManager {
     void get_gc_job_status(const Pistache::Rest::Request& request, Pistache::Http::ResponseWriter response);
     folly::Future< folly::Unit > trigger_gc_for_pg(uint16_t pg_id, const std::string& job_id);
     void get_job_status(const std::string& job_id, nlohmann::json& result);
+    void trigger_pg_scrub(const Pistache::Rest::Request& request, Pistache::Http::ResponseWriter response);
+    void get_scrub_job_status(const Pistache::Rest::Request& request, Pistache::Http::ResponseWriter response);
+    void cancel_scrub_job(const Pistache::Rest::Request& request, Pistache::Http::ResponseWriter response);
 
 #ifdef _PRERELEASE
     void crash_system(const Pistache::Rest::Request& request, Pistache::Http::ResponseWriter response);
@@ -74,15 +77,66 @@ class HttpManager {
                 job_id(id), status(GCJobStatus::RUNNING), pg_id(pgid), chunk_id(cid) {}
     };
 
+    enum class ScrubJobStatus { RUNNING, COMPLETED, FAILED, CANCELLED };
+
+    struct ScrubJobInfo {
+        std::string job_id;
+        uint16_t pg_id;
+        bool is_deep;
+
+        // Mutable fields protected by mutex
+        mutable std::mutex mtx_;
+        ScrubJobStatus status;
+        std::chrono::system_clock::time_point start_time;
+        std::chrono::system_clock::time_point end_time;
+        std::string error_message;
+        nlohmann::json report_summary;
+
+        // Flag to prevent status update after cancellation
+        std::atomic< bool > is_cancelled{false};
+
+        ScrubJobInfo(const std::string& id, uint16_t pgid, bool deep) :
+                job_id(id),
+                pg_id(pgid),
+                is_deep(deep),
+                status(ScrubJobStatus::RUNNING),
+                start_time(std::chrono::system_clock::now()) {}
+
+        // Thread-safe status update - returns false if already cancelled
+        bool try_complete(ScrubJobStatus new_status, const std::string& error_msg = "",
+                          const nlohmann::json& summary = nlohmann::json()) {
+            std::lock_guard< std::mutex > lock(mtx_);
+            if (is_cancelled.load(std::memory_order_acquire)) { return false; } // Already cancelled, reject update
+
+            status = new_status;
+            end_time = std::chrono::system_clock::now();
+            error_message = error_msg;
+            if (!summary.empty()) { report_summary = summary; }
+            return true;
+        }
+
+        // Thread-safe cancel
+        void cancel() {
+            std::lock_guard< std::mutex > lock(mtx_);
+            is_cancelled.store(true, std::memory_order_release);
+            status = ScrubJobStatus::CANCELLED;
+            end_time = std::chrono::system_clock::now();
+            error_message = "Cancelled by user";
+        }
+    };
+
     std::string generate_job_id();
+    nlohmann::json build_scrub_job_json(const std::shared_ptr< ScrubJobInfo >& job_info);
 
 private:
     HSHomeObject& ho_;
     std::atomic< uint64_t > job_counter_{0};
     std::shared_mutex gc_job_mutex_;
+    std::shared_mutex scrub_job_mutex_;
 
     // we don`t have an external DB to store the job status, so we only keep the status of the lastest 100 jobs for
     // query. or, we can evict the job after it is completed after a timeout period.
     folly::EvictingCacheMap< std::string, std::shared_ptr< GCJobInfo > > gc_jobs_map_{100};
+    folly::EvictingCacheMap< std::string, std::shared_ptr< ScrubJobInfo > > scrub_jobs_map_{100};
 };
 } // namespace homeobject
\ No newline at end of file
diff --git a/src/lib/homestore_backend/hs_pg_manager.cpp b/src/lib/homestore_backend/hs_pg_manager.cpp
index 605d5f872..442ec4078 100644
--- a/src/lib/homestore_backend/hs_pg_manager.cpp
+++ b/src/lib/homestore_backend/hs_pg_manager.cpp
@@ -223,7 +223,7 @@ folly::Expected< HSHomeObject::HS_PG*, PGError > HSHomeObject::local_create_pg(s
     auto uuid_str = boost::uuids::to_string(index_table->uuid());
 
     repl_dev->set_custom_rdev_name(fmt::format("rdev{}", pg_info.id));
-    auto hs_pg = std::make_unique< HS_PG >(std::move(pg_info), std::move(repl_dev), index_table, chunk_ids);
+    auto hs_pg = std::make_unique< HS_PG >(std::move(pg_info), std::move(repl_dev), index_table, chunk_ids, *this);
     auto ret = hs_pg.get();
     {
         scoped_lock lck(index_lock_);
@@ -236,6 +236,9 @@ folly::Expected< HSHomeObject::HS_PG*, PGError > HSHomeObject::local_create_pg(s
         // Add to index service, so that it gets cleaned up when index service is shutdown.
         hs()->index_service().add_index_table(index_table);
         add_pg_to_map(std::move(hs_pg));
+
+        // when local_create_pg is called by BR ,pg scrub superblk will not be overrite if it already exists
+        scrub_mgr_->add_pg(pg_info.id);
     }
     return ret;
 }
@@ -350,7 +353,6 @@ void HSHomeObject::on_pg_start_replace_member(group_id_t group_id, const std::st
             auto hs_pg = static_cast< HSHomeObject::HS_PG* >(pg.get());
             pg->pg_info_.members.emplace(std::move(to_pg_member(member_in)));
             pg->pg_info_.members.emplace(std::move(to_pg_member(member_out)));
-
             uint32_t i{0};
             pg_members* sb_members = hs_pg->pg_sb_->get_pg_members_mutable();
             for (auto const& m : pg->pg_info_.members) {
@@ -368,6 +370,12 @@ void HSHomeObject::on_pg_start_replace_member(group_id_t group_id, const std::st
             LOGI("PG start replace member done, task_id={} member_out={} member_in={}, member_nums={}, trace_id={}",
                  task_id, boost::uuids::to_string(member_out.id), boost::uuids::to_string(member_in.id),
                  pg->pg_info_.members.size(), tid);
+
+            // TODO::change the pg state to indicate it's under replacing member, so that we can reject some
+            // operations(like scrub) which may conflict with replace member. Currently we just cancel scrub task if
+            // it's running
+            scrub_mgr_->cancel_scrub_task(pg->pg_info_.id);
+
             return;
         }
     }
@@ -698,7 +706,8 @@ bool HSHomeObject::pg_destroy(pg_id_t pg_id, bool need_to_pause_pg_state_machine
 
     // we have the assumption that after pg is marked as destroyed, it will not be marked as alive again.
     // TODO:: if this assumption is broken, we need to handle it.
-    gc_mgr_->drain_pg_pending_gc_task(pg_id);
+    if (gc_mgr_) gc_mgr_->drain_pg_pending_gc_task(pg_id);
+    if (scrub_mgr_) scrub_mgr_->remove_pg(pg_id);
 
     destroy_shards(pg_id);
     destroy_hs_resources(pg_id);
@@ -815,7 +824,6 @@ void HSHomeObject::destroy_hs_resources(pg_id_t pg_id) { chunk_selector_->reset_
 
 void HSHomeObject::destroy_pg_index_table(pg_id_t pg_id) {
     std::shared_ptr< BlobIndexTable > index_table;
-
     {
         // index_table->destroy() will trigger a cp_flush, which will call homeobject#cp_flush and try to acquire
         // `_pg_lock`, so we need to release the lock here to avoid a dead lock
@@ -935,7 +943,7 @@ void HSHomeObject::on_pg_meta_blk_found(sisl::byte_view const& buf, void* meta_c
     std::vector< chunk_num_t > p_chunk_ids(pg_sb->get_chunk_ids(), pg_sb->get_chunk_ids() + pg_sb->num_chunks);
     bool set_pg_chunks_res = chunk_selector_->recover_pg_chunks(pg_id, std::move(p_chunk_ids));
     auto uuid_str = boost::uuids::to_string(pg_sb->index_table_uuid);
-    auto hs_pg = std::make_unique< HS_PG >(std::move(pg_sb), std::move(v.value()));
+    auto hs_pg = std::make_unique< HS_PG >(std::move(pg_sb), std::move(v.value()), *this);
     if (!set_pg_chunks_res) {
         hs_pg->pg_state_.set_state(PGStateMask::DISK_DOWN);
         hs_pg->repl_dev_->set_stage(homestore::repl_dev_stage_t::UNREADY);
@@ -971,12 +979,13 @@ PGInfo HSHomeObject::HS_PG::pg_info_from_sb(homestore::superblk< pg_info_superbl
 }
 
 HSHomeObject::HS_PG::HS_PG(PGInfo info, shared< homestore::ReplDev > rdev, shared< BlobIndexTable > index_table,
-                           std::shared_ptr< const std::vector< chunk_num_t > > pg_chunk_ids) :
+                           std::shared_ptr< const std::vector< chunk_num_t > > pg_chunk_ids, HSHomeObject& home_obj) :
         PG{std::move(info)},
         pg_sb_{_pg_meta_name},
         repl_dev_{std::move(rdev)},
         index_table_{std::move(index_table)},
         metrics_{*this},
+        home_obj_{home_obj},
         snp_rcvr_info_sb_{_snp_rcvr_meta_name},
         snp_rcvr_shard_list_sb_{_snp_rcvr_shard_list_meta_name} {
     RELEASE_ASSERT(pg_chunk_ids != nullptr, "PG chunks null, pg={}", pg_info_.id);
@@ -1011,19 +1020,29 @@ HSHomeObject::HS_PG::HS_PG(PGInfo info, shared< homestore::ReplDev > rdev, share
         pg_sb_chunk_ids[i] = pg_chunk_ids->at(i);
     }
     pg_sb_.write();
+
+    register_data_rpc_handlers();
 }
 
-HSHomeObject::HS_PG::HS_PG(superblk< pg_info_superblk >&& sb, shared< ReplDev > rdev) :
-        PG{pg_info_from_sb(sb)}, pg_sb_{std::move(sb)}, repl_dev_{std::move(rdev)}, metrics_{*this} {
+HSHomeObject::HS_PG::HS_PG(superblk< pg_info_superblk >&& sb, shared< ReplDev > rdev, HSHomeObject& home_obj) :
+        PG{pg_info_from_sb(sb)},
+        pg_sb_{std::move(sb)},
+        repl_dev_{std::move(rdev)},
+        metrics_{*this},
+        home_obj_{home_obj} {
     durable_entities_.blob_sequence_num = pg_sb_->blob_sequence_num;
     durable_entities_.active_blob_count = pg_sb_->active_blob_count;
     durable_entities_.tombstone_blob_count = pg_sb_->tombstone_blob_count;
     durable_entities_.total_occupied_blk_count = pg_sb_->total_occupied_blk_count;
     durable_entities_.total_reclaimed_blk_count = pg_sb_->total_reclaimed_blk_count;
+
+    register_data_rpc_handlers();
 }
 
 uint32_t HSHomeObject::HS_PG::total_shards() const { return shards_.size(); }
 
+blob_id_t HSHomeObject::HS_PG::get_last_committed_blob_id() const { return last_committed_blob_id.load(); }
+
 uint32_t HSHomeObject::HS_PG::open_shards() const {
     return std::count_if(shards_.begin(), shards_.end(), [](auto const& s) { return s->is_open(); });
 }
@@ -1115,6 +1134,170 @@ void HSHomeObject::HS_PG::update_membership(const MemberSet& members) {
     LOGI("PG membership updated, member_nums={}", pg_sb_->num_dynamic_members);
 }
 
+void HSHomeObject::HS_PG::register_data_rpc_handlers() {
+    const auto& pg_id = pg_info_.id;
+    bool success;
+
+    success = repl_dev_->add_data_rpc_service(PUSH_SCRUB_REQ, bind_this(HS_PG::on_scrub_req_received, 1));
+    if (success) {
+        LOGI("Successfully registered PUSH_SCRUB_REQ RPC handler for pg={}", pg_id);
+    } else {
+        LOGW("PUSH_SCRUB_REQ RPC handler already registered for pg={}", pg_id);
+    }
+
+    success = repl_dev_->add_data_rpc_service(PUSH_SCRUB_RESULT, bind_this(HS_PG::on_scrub_result_received, 1));
+    if (success) {
+        LOGI("Successfully registered PUSH_SCRUB_RESULT RPC handler for pg={}", pg_id);
+    } else {
+        LOGW("PUSH_SCRUB_RESULT RPC handler already registered for pg={}", pg_id);
+    }
+}
+
+void HSHomeObject::HS_PG::on_scrub_req_received(boost::intrusive_ptr< sisl::GenericRpcData >& rpc_data) {
+    const auto pg_id = pg_info_.id;
+    LOGD("Received scrub req for pg={}", pg_id);
+
+    auto const& incoming_buf = rpc_data->request_blob();
+    const auto buf_size = incoming_buf.size();
+    const auto buf_ptr = incoming_buf.cbytes();
+
+    if (!buf_ptr || !buf_size) {
+        LOGW("scrub req received with empty buffer for pg={}", pg_id);
+        rpc_data->send_response();
+        return;
+    }
+
+    flatbuffers::Verifier verifier(buf_ptr, buf_size);
+    if (!VerifySizePrefixedScrubReqBuffer(verifier)) {
+        LOGW("received with invalid flatbuffer for pg={}", pg_id);
+        rpc_data->send_response();
+        return;
+    }
+
+    std::shared_ptr< ScrubManager::scrub_req > scrub_req = std::make_shared< ScrubManager::scrub_req >();
+
+    if (!scrub_req->load(buf_ptr, buf_size)) {
+        LOGW("Failed to load scrub_blob request from flatbuffer for pg={}", pg_id);
+        rpc_data->send_response();
+        return;
+    }
+
+    LOGD("Scrub req loaded from flatbuffer for pg={}, scrub_type:{}, issuer_peer_id:{}", pg_id, scrub_req->scrub_type,
+         scrub_req->issuer_peer_id);
+
+    const bool is_check_existence_req = scrub_req->scrub_type == SCRUB_TYPE::CHECK_BLOB_EXISTENCE ||
+        scrub_req->scrub_type == SCRUB_TYPE::CHECK_SHARD_EXISTENCE;
+
+    // handle check existence req
+    if (is_check_existence_req) {
+        const auto& shard_id = scrub_req->start_shard_id;
+
+        LOGD("handle check existence req for pg={}, shard_id={}, blob_id={}, req_type={}", pg_id,
+             scrub_req->start_shard_id, scrub_req->start_blob_id, scrub_req->scrub_type);
+
+        // sicne check existence is light weight, we can handle it immediately without adding it to scrub manager, and
+        // reply the result in rpc response directly.
+        bool exists = false;
+
+        if (scrub_req->scrub_type == SCRUB_TYPE::CHECK_BLOB_EXISTENCE) {
+            const auto blob_id = scrub_req->start_blob_id;
+            BlobRouteKey key{BlobRoute{shard_id, blob_id}};
+            BlobRouteValue value;
+            homestore::BtreeSingleGetRequest get_req{&key, &value};
+            auto ret = index_table_->get(get_req);
+            if (homestore::btree_status_t::success == ret && value.pbas() != HSHomeObject::tombstone_pbas) {
+                exists = true;
+            }
+        } else {
+            // check if shard exists in pg_index_table
+            auto start_key = BlobRouteKey{BlobRoute{shard_id, 0}};
+            auto end_key = BlobRouteKey{BlobRoute{shard_id, UINT64_MAX}};
+
+            homestore::BtreeQueryRequest< BlobRouteKey > qr{
+                homestore::BtreeKeyRange< BlobRouteKey >{start_key, true, end_key, true},
+                homestore::BtreeQueryType::SWEEP_NON_INTRUSIVE_PAGINATION_QUERY, 1,
+                [](homestore::BtreeKey const& /*key*/, homestore::BtreeValue const& value) mutable -> bool {
+                    BlobRouteValue blob_value{value};
+                    // we consider shard exists only if we can find at least one blob route entry for that shard and the
+                    // blob is not deleted(tombstone)
+                    return blob_value.pbas() != HSHomeObject::tombstone_pbas;
+                }};
+
+            std::vector< std::pair< BlobRouteKey, BlobRouteValue > > out;
+            index_table_->query(qr, out);
+            // if query returns error, we just return false to indicate blob not exists, but it may be better to have a
+            // separate error code to indicate the failure case.
+
+            // TODO:: handle the case if query returns error, currently we just return false to indicate blob not
+            // exists, but it may be better to have a separate error code to indicate the failure case.
+            exists = !out.empty();
+        }
+
+        // TODO:: handle the case if index_table returns error, currently we just return false to indicate blob not
+        // exists, but it may be better to have a separate error code to indicate the failure case.
+
+        auto resp = std::make_shared< bool >(exists);
+        sisl::io_blob_list_t blob_list;
+        blob_list.emplace_back(reinterpret_cast< uint8_t* >(resp.get()), static_cast< uint32_t >(sizeof(bool)), false);
+
+        rpc_data->set_comp_cb([resp](boost::intrusive_ptr< sisl::GenericRpcData >&) {});
+        // we only send a boolean back to indicate whether the blob exists
+        rpc_data->send_response(blob_list);
+
+        return;
+    }
+
+    // handle scrub req
+    rpc_data->send_response();
+    auto scrub_mgr = home_obj_.scrub_manager();
+    if (!scrub_mgr) {
+        LOGW("ScrubManager is not initialized in HS_PG::on_scrub_req_received for pg={}", pg_id);
+        return;
+    }
+    scrub_mgr->add_scrub_req(scrub_req);
+}
+
+void HSHomeObject::HS_PG::on_scrub_result_received(boost::intrusive_ptr< sisl::GenericRpcData >& rpc_data) {
+    const auto pg_id = pg_info_.id;
+    LOGD("Received scrub result for pg={}", pg_id);
+
+    struct rpc_cleanup {
+        boost::intrusive_ptr< sisl::GenericRpcData >& rpc_data_;
+        ~rpc_cleanup() {
+            if (rpc_data_) { rpc_data_->send_response(); }
+        }
+    } rpc_cleanup{rpc_data};
+
+    auto const& incoming_buf = rpc_data->request_blob();
+    const auto buf_size = incoming_buf.size();
+    const auto buf_ptr = incoming_buf.cbytes();
+
+    if (!buf_ptr || !buf_size) {
+        LOGW("PUSH_DEEP_BLOB_SM received with empty buffer for pg={}, buffer_size={}", pg_id, buf_size);
+        return;
+    }
+    flatbuffers::Verifier verifier(buf_ptr, buf_size);
+    if (!VerifySizePrefixedScrubResultBuffer(verifier)) {
+        LOGW("scrub result received with invalid flatbuffer for pg={}, buffer_size={}", pg_id, buf_size);
+        return;
+    }
+
+    std::shared_ptr< ScrubManager::scrub_result > scrub_result = std::make_shared< ScrubManager::scrub_result >();
+    if (!scrub_result->load(buf_ptr, buf_size)) {
+        LOGW("Failed to load scrub result from flatbuffer for pg={}", pg_id);
+        return;
+    }
+    LOGD("Scrub result loaded from flatbuffer for pg={}, req_id:{}, issuer_peer_id:{}", pg_id, scrub_result->req_id,
+         scrub_result->issuer_peer_id);
+
+    auto scrub_mgr = home_obj_.scrub_manager();
+    if (!scrub_mgr) {
+        LOGW("ScrubManager is not initialized in HS_PG::on_scrub_result_received for pg={}", pg_id);
+        return;
+    }
+    scrub_mgr->add_scrub_result(pg_id, scrub_result);
+}
+
 // NOTE: caller should hold the _pg_lock
 const HSHomeObject::HS_PG* HSHomeObject::_get_hs_pg_unlocked(pg_id_t pg_id) const {
     auto iter = _pg_map.find(pg_id);
@@ -1254,24 +1437,26 @@ void HSHomeObject::refresh_pg_statistics(pg_id_t pg_id) {
     uint64_t active_count = 0;
     uint64_t tombstone_count = 0;
 
-    auto start_key =
-        BlobRouteKey{BlobRoute{uint64_t(pg_id) << homeobject::shard_width, std::numeric_limits< uint64_t >::min()}};
-    auto end_key =
-        BlobRouteKey{BlobRoute{uint64_t(pg_id + 1) << homeobject::shard_width, std::numeric_limits< uint64_t >::min()}};
+    auto start_key = BlobRouteKey{BlobRoute{uint64_t(pg_id) << homeobject::shard_width, 0}};
+    auto end_key = BlobRouteKey{BlobRoute{uint64_t(pg_id + 1) << homeobject::shard_width, 0}};
 
+    uint64_t last_blob_id = 0;
     homestore::BtreeQueryRequest< BlobRouteKey > query_req{
         homestore::BtreeKeyRange< BlobRouteKey >{std::move(start_key), true /* inclusive */, std::move(end_key),
                                                  false /* inclusive */},
         homestore::BtreeQueryType::SWEEP_NON_INTRUSIVE_PAGINATION_QUERY,
         std::numeric_limits< uint32_t >::max() /* blob count in a pg will not exceed uint32_t_max*/,
-        [&active_count, &tombstone_count](homestore::BtreeKey const& key,
-                                          homestore::BtreeValue const& value) mutable -> bool {
+        [&active_count, &tombstone_count, &last_blob_id](homestore::BtreeKey const& key,
+                                                         homestore::BtreeValue const& value) mutable -> bool {
             BlobRouteValue blob_value{value};
             if (blob_value.pbas() == HSHomeObject::tombstone_pbas) {
                 tombstone_count++;
             } else {
                 active_count++;
+                BlobRouteKey blob_key{key};
+                last_blob_id = std::max(last_blob_id, blob_key.key().blob);
             }
+
             return false; // Continue scanning
         }};
 
@@ -1309,6 +1494,8 @@ void HSHomeObject::refresh_pg_statistics(pg_id_t pg_id) {
         de.total_occupied_blk_count.store(total_occupied, std::memory_order_relaxed);
     });
 
+    hs_pg->last_committed_blob_id.store(last_blob_id);
+
     LOGI("Refreshed statistics for pg={}: active_blobs={} (original={}), tombstone_blobs={} (original={}), "
          "occupied_blocks={} (original={})",
          pg_id, active_count, original_active_count, tombstone_count, original_tombstone_count, total_occupied,
@@ -1325,9 +1512,9 @@ void HSHomeObject::update_pg_meta_after_gc(const pg_id_t pg_id, const homestore:
     auto hs_pg = dynamic_cast< HS_PG* >(iter->second.get());
     auto move_from_v_chunk = chunk_selector()->get_extend_vchunk(move_from_chunk);
 
-    // TODO:: for now, when updating pchunk for a vchunk, we have to update the whole pg super blk. we can optimize this
-    // by persist a single superblk for each vchunk in the pg, so that we only need to update the vchunk superblk
-    // itself.
+    // TODO:: for now, when updating pchunk for a vchunk, we have to update the whole pg super blk. we can optimize
+    // this by persist a single superblk for each vchunk in the pg, so that we only need to update the vchunk
+    // superblk itself.
 
     auto pg_chunks = hs_pg->pg_sb_->get_chunk_ids_mutable();
 
@@ -1339,7 +1526,7 @@ void HSHomeObject::update_pg_meta_after_gc(const pg_id_t pg_id, const homestore:
     if (sisl_unlikely(pg_chunks[v_chunk_id] == move_to_chunk)) {
         // this might happens when crash recovery. the crash happens after pg metablk is updated but before gc task
         // metablk is destroyed.
-        LOGD("gc task_id={}, the pchunk_id for vchunk={} for pg_id={} is already {},  update pg metablk again!",
+        LOGD("gc task_id={}, the pchunk_id for vchunk={} for pg_id={} is already {}, skip updating pg metablk!",
              task_id, v_chunk_id, pg_id, move_to_chunk);
     } else {
         RELEASE_ASSERT(pg_chunks[v_chunk_id] == move_from_chunk,
@@ -1350,35 +1537,36 @@ void HSHomeObject::update_pg_meta_after_gc(const pg_id_t pg_id, const homestore:
         LOGD("gc task_id={}, pchunk for vchunk={} of pg_id={} is updated from {} to {}", task_id, v_chunk_id, pg_id,
              move_from_chunk, move_to_chunk);
 
-        // TODO:hs_pg->shards_.size() will be decreased by 1 in delete_shard if gc finds a empty shard, which will be
-        // implemented later
-        hs_pg->durable_entities_update([this, move_from_v_chunk, &move_to_chunk, &move_from_chunk, &pg_id,
-                                        &task_id](auto& de) {
-            // active_blob_count is updated by put/delete blob, not change it here.
+        // TODO:hs_pg->shards_.size() will be decreased by 1 in delete_shard if gc finds a empty shard, which will
+        // be implemented later
+        hs_pg->durable_entities_update(
+            [this, move_from_v_chunk, &move_to_chunk, &move_from_chunk, &pg_id, &task_id](auto& de) {
+                // active_blob_count is updated by put/delete blob, not change it here.
 
-            // considering the complexity of gc crash recovery for tombstone_blob_count, we get it directly from index
-            // table , which is the most accurate.
+                // considering the complexity of gc crash recovery for tombstone_blob_count, we get it directly from
+                // index table , which is the most accurate.
 
-            // TODO::do we need this as durable entity? remove it and get all the from pg index in real time.
-            de.tombstone_blob_count = get_pg_tombstone_blob_count(pg_id);
+                // TODO::do we need this as durable entity? remove it and get all the from pg index in real time.
+                de.tombstone_blob_count = get_pg_tombstone_blob_count(pg_id);
 
-            auto move_to_v_chunk = chunk_selector()->get_extend_vchunk(move_to_chunk);
+                auto move_to_v_chunk = chunk_selector()->get_extend_vchunk(move_to_chunk);
 
-            auto total_occupied_blk_count_by_move_from_chunk = move_from_v_chunk->get_used_blks();
-            auto total_occupied_blk_count_by_move_to_chunk = move_to_v_chunk->get_used_blks();
+                auto total_occupied_blk_count_by_move_from_chunk = move_from_v_chunk->get_used_blks();
+                auto total_occupied_blk_count_by_move_to_chunk = move_to_v_chunk->get_used_blks();
 
-            // TODO::in recovery case , this might be updated again , fix me later.
-            const auto reclaimed_blk_count =
-                total_occupied_blk_count_by_move_from_chunk - total_occupied_blk_count_by_move_to_chunk;
+                // TODO::in recovery case , this might be updated again , fix me later.
+                const auto reclaimed_blk_count =
+                    total_occupied_blk_count_by_move_from_chunk - total_occupied_blk_count_by_move_to_chunk;
 
-            de.total_occupied_blk_count -= reclaimed_blk_count;
-            de.total_reclaimed_blk_count += reclaimed_blk_count;
+                de.total_occupied_blk_count -= reclaimed_blk_count;
+                de.total_reclaimed_blk_count += reclaimed_blk_count;
 
-            LOGD("gc task_id={}, move_from_chunk={}, total_occupied_blk_count_by_move_from_chunk={}, move_to_chunk={}, "
-                 "total_occupied_blk_count_by_move_to_chunk={}, total_occupied_blk_count={}",
-                 task_id, move_from_chunk, total_occupied_blk_count_by_move_from_chunk, move_to_chunk,
-                 total_occupied_blk_count_by_move_to_chunk, de.total_occupied_blk_count.load());
-        });
+                LOGD("gc task_id={}, move_from_chunk={}, total_occupied_blk_count_by_move_from_chunk={}, "
+                     "move_to_chunk={}, "
+                     "total_occupied_blk_count_by_move_to_chunk={}, total_occupied_blk_count={}",
+                     task_id, move_from_chunk, total_occupied_blk_count_by_move_from_chunk, move_to_chunk,
+                     total_occupied_blk_count_by_move_to_chunk, de.total_occupied_blk_count.load());
+            });
 
         hs_pg->pg_sb_->total_occupied_blk_count =
             hs_pg->durable_entities().total_occupied_blk_count.load(std::memory_order_relaxed);
diff --git a/src/lib/homestore_backend/hs_shard_manager.cpp b/src/lib/homestore_backend/hs_shard_manager.cpp
index 8c949cb3e..9f2bf8ce5 100644
--- a/src/lib/homestore_backend/hs_shard_manager.cpp
+++ b/src/lib/homestore_backend/hs_shard_manager.cpp
@@ -63,15 +63,15 @@ uint64_t ShardManager::max_shard_size() { return Gi; }
 
 uint64_t ShardManager::max_shard_num_in_pg() { return ((uint64_t)0x01) << shard_width; }
 
-shard_id_t HSHomeObject::generate_new_shard_id(pg_id_t pgid) {
+shard_id_t HSHomeObject::generate_new_shard_id(pg_id_t pg_id) {
     std::scoped_lock lock_guard(_pg_lock);
-    auto hs_pg = const_cast< HS_PG* >(_get_hs_pg_unlocked(pgid));
+    auto hs_pg = const_cast< HS_PG* >(_get_hs_pg_unlocked(pg_id));
     RELEASE_ASSERT(hs_pg, "Missing pg info");
 
     auto new_sequence_num = ++hs_pg->shard_sequence_num_;
     RELEASE_ASSERT(new_sequence_num < ShardManager::max_shard_num_in_pg(),
                    "new shard id must be less than ShardManager::max_shard_num_in_pg()");
-    return make_new_shard_id(pgid, new_sequence_num);
+    return make_new_shard_id(pg_id, new_sequence_num);
 }
 
 uint64_t HSHomeObject::get_sequence_num_from_shard_id(uint64_t shard_id) {
@@ -704,6 +704,14 @@ void HSHomeObject::write_migrated_shard_metablks() {
     }
 }
 
+shard_id_t HSHomeObject::get_last_shard_id_in_pg(pg_id_t pg_id) const {
+    std::scoped_lock lock_guard(_pg_lock, _shard_lock);
+    auto hs_pg = const_cast< HS_PG* >(_get_hs_pg_unlocked(pg_id));
+    RELEASE_ASSERT(hs_pg, "Missing pg info, pg={}", pg_id);
+    auto& shards = hs_pg->shards_;
+    return shards.empty() ? 0 : shards.back()->info.id;
+}
+
 void HSHomeObject::add_new_shard_to_map(std::unique_ptr< HS_Shard > shard) {
     // TODO: We are taking a global lock for all pgs to create shard. Is it really needed??
     // We need to have fine grained per PG lock and take only that.
diff --git a/src/lib/homestore_backend/replication_state_machine.cpp b/src/lib/homestore_backend/replication_state_machine.cpp
index 81ec3e6d0..991352b6b 100644
--- a/src/lib/homestore_backend/replication_state_machine.cpp
+++ b/src/lib/homestore_backend/replication_state_machine.cpp
@@ -293,9 +293,10 @@ void ReplicationStateMachine::on_destroy(const homestore::group_id_t& group_id)
         LOGW("do not have pg mapped by group_id={}", boost::uuids::to_string(group_id));
         return;
     }
-    home_object_->pg_destroy(PG_ID.value());
-    LOGI("replica destroyed, cleared pg={} resources with group_id={}", PG_ID.value(),
-         boost::uuids::to_string(group_id));
+
+    const auto pg_id = PG_ID.value();
+    home_object_->pg_destroy(pg_id);
+    LOGI("replica destroyed, cleared pg={} resources with group_id={}", pg_id, boost::uuids::to_string(group_id));
 }
 
 void ReplicationStateMachine::on_remove_member(const homestore::replica_id_t& member, trace_id_t tid) {
@@ -1049,4 +1050,32 @@ void ReplicationStateMachine::on_log_replay_done(const homestore::group_id_t& gr
     home_object_->refresh_pg_statistics(pg_id);
 }
 
+void ReplicationStateMachine::on_become_leader(const homestore::group_id_t& group_id) {
+    auto pg_id_opt = home_object_->get_pg_id_with_group_id(group_id);
+    if (!pg_id_opt.has_value()) {
+        LOGE("become leader but can not find any pg for group={}!", group_id);
+        return;
+    }
+    const auto pg_id = pg_id_opt.value();
+    RELEASE_ASSERT(home_object_->pg_exists(pg_id), "pg={} should exist, but not! fatal error!", pg_id);
+    // TODO:: add whatever acitons needed to be take.
+}
+
+void ReplicationStateMachine::on_become_follower(const homestore::group_id_t& group_id) {
+    auto pg_id_opt = home_object_->get_pg_id_with_group_id(group_id);
+    if (!pg_id_opt.has_value()) {
+        LOGE("become follower but can not find any pg for group={}!", group_id);
+        return;
+    }
+    const auto pg_id = pg_id_opt.value();
+    RELEASE_ASSERT(home_object_->pg_exists(pg_id), "pg={} should exist, but not! fatal error!", pg_id);
+
+    LOGI("become follower of group {}, cancel scrub task for pg={}", group_id, pg_id);
+    // TODO:: add whatever acitons needed to be take.
+
+    // cancel scrub task if I am not leader again.
+    auto& scrub_mgr = home_object_->scrub_manager();
+    if (scrub_mgr) scrub_mgr->cancel_scrub_task(pg_id);
+}
+
 } // namespace homeobject
diff --git a/src/lib/homestore_backend/replication_state_machine.hpp b/src/lib/homestore_backend/replication_state_machine.hpp
index 724f091d0..75d2d4187 100644
--- a/src/lib/homestore_backend/replication_state_machine.hpp
+++ b/src/lib/homestore_backend/replication_state_machine.hpp
@@ -240,6 +240,14 @@ class ReplicationStateMachine : public homestore::ReplDevListener {
     ///
     void on_log_replay_done(const homestore::group_id_t& group_id) override;
 
+    /// @brief  this is called when this node becomes leader for the group
+    /// @param group_id - the group , where all the logs are replayed but not join raft group
+    virtual void on_become_leader(const homestore::group_id_t& group_id) override;
+
+    /// @brief  this is called when this node becomes follower for the group
+    /// @param group_id - the group , where all the logs are replayed but not join raft group
+    virtual void on_become_follower(const homestore::group_id_t& group_id) override;
+
 private:
     HSHomeObject* home_object_{nullptr};
 
diff --git a/src/lib/homestore_backend/scrub_manager.cpp b/src/lib/homestore_backend/scrub_manager.cpp
new file mode 100644
index 000000000..4d5ca9ef4
--- /dev/null
+++ b/src/lib/homestore_backend/scrub_manager.cpp
@@ -0,0 +1,2123 @@
+#include "hs_homeobject.hpp"
+#include <homestore/btree/btree_req.hpp>
+#include <homestore/btree/btree_kv.hpp>
+#include <array>
+#include <sstream>
+#include <algorithm>
+
+namespace homeobject {
+
+SISL_LOGGING_DECL(scrubmgr)
+
+#define SCRUBLOG(level, pg_id, task_id, msg, ...)                                                                      \
+    LOG##level##MOD(scrubmgr, "[pg_id={}, task_id={}] " msg, pg_id, task_id, ##__VA_ARGS__)
+
+#define SCRUBLOGD(pg_id, task_id, msg, ...) SCRUBLOG(DEBUG, pg_id, task_id, msg, ##__VA_ARGS__)
+#define SCRUBLOGI(pg_id, task_id, msg, ...) SCRUBLOG(INFO, pg_id, task_id, msg, ##__VA_ARGS__)
+#define SCRUBLOGW(pg_id, task_id, msg, ...) SCRUBLOG(WARN, pg_id, task_id, msg, ##__VA_ARGS__)
+#define SCRUBLOGE(pg_id, task_id, msg, ...) SCRUBLOG(ERROR, pg_id, task_id, msg, ##__VA_ARGS__)
+#define SCRUBLOGC(pg_id, task_id, msg, ...) SCRUBLOG(CRITICAL, pg_id, task_id, msg, ##__VA_ARGS__)
+
+class ScrubManager::PGScrubContext {
+public:
+    PGScrubContext(uint64_t task_id, const HSHomeObject::HS_PG* hs_pg);
+    ~PGScrubContext();
+
+    bool scrub_meta_batch(std::shared_ptr< ScrubManager::MetaScrubReport > scrub_report, shard_id_t start_shard_id,
+                          shard_id_t end_shard_id, blob_id_t last_blob_id, int64_t scrub_lsn,
+                          std::map< shard_id_t, uint32_t >& shard_blob_count_in_batch);
+
+    bool scrub_blob_batch(std::shared_ptr< ScrubManager::ShallowScrubReport > scrub_report, shard_id_t start_shard_id,
+                          shard_id_t end_shard_id, blob_id_t last_blob_id, int64_t scrub_lsn, bool is_deep_scrub);
+
+    void reconcile_scrub_report(std::shared_ptr< ScrubManager::ShallowScrubReport > scrub_report);
+
+    folly::Future< bool > check_existence_in_peer(peer_id_t peer_id, BlobRoute blob, bool check_blob);
+
+    void add_scrub_result(std::shared_ptr< ScrubManager::scrub_result > result);
+    uint64_t random_req_id() const;
+    void send_req_to_peer(const ScrubManager::scrub_req& req, const peer_id_t& peer_id);
+    void cancel() {
+        cancelled.store(true);
+        const auto pg_id = hs_pg->pg_id();
+        SCRUBLOGI(pg_id, task_id, "scrub task is cancelled");
+    }
+
+    uint64_t task_id{0};
+    std::atomic_bool cancelled{false};
+
+private:
+    std::shared_ptr< folly::IOThreadPoolExecutor > m_scrub_executor;
+    const HSHomeObject::HS_PG* hs_pg;
+    folly::ConcurrentHashMap< peer_id_t,
+                              std::shared_ptr< folly::MPMCQueue< std::shared_ptr< ScrubManager::scrub_result > > > >
+        peer_scrub_result_queue_map_;
+};
+
+ScrubManager::ScrubManager(HSHomeObject* homeobject) : m_hs_home_object{homeobject} {
+    // Register meta_service handlers to recover pg scrub superblocks
+    std::vector< homestore::superblk< pg_scrub_superblk > > stale_pg_scrub_sbs;
+    homestore::meta_service().register_handler(
+        pg_scrub_meta_name,
+        [this, &stale_pg_scrub_sbs](homestore::meta_blk* mblk, sisl::byte_view buf, size_t size) {
+            on_pg_scrub_meta_blk_found(std::move(buf), voidptr_cast(mblk), stale_pg_scrub_sbs);
+        },
+        nullptr, true);
+    homestore::meta_service().read_sub_sb(pg_scrub_meta_name);
+
+    // remove stale pg scrub superblocks
+    for (auto& sb : stale_pg_scrub_sbs)
+        sb.destroy();
+}
+
+ScrubManager::~ScrubManager() { stop(); }
+
+void ScrubManager::scan_pg_for_scrub() {
+    for (auto const& [pg_id, _] : m_pg_scrub_sb_map) {
+        if (is_eligible_for_deep_scrub(pg_id)) {
+            LOGINFOMOD(scrubmgr, "pg={} is eligible for deep scrub, submit scrub task", pg_id);
+            submit_scrub_task(pg_id, true)
+                .via(&folly::InlineExecutor::instance())
+                .thenValue([this, pg_id](std::shared_ptr< ShallowScrubReport > report) {
+                    if (!report) {
+                        LOGERRORMOD(scrubmgr, "deep scrub failed for pg={}", pg_id);
+                        return;
+                    }
+                    LOGINFOMOD(scrubmgr, "deep scrub is completed for pg={}", pg_id);
+                    auto deep_report = std::dynamic_pointer_cast< DeepScrubReport >(report);
+                    if (!deep_report) {
+                        LOGERRORMOD(scrubmgr, "report for deep scrub cannot be casted to DeepScrubReport for pg={}",
+                                    pg_id);
+                        return;
+                    }
+                    handle_deep_pg_scrub_report(std::move(deep_report));
+                });
+            return;
+        }
+
+        if (is_eligible_for_shallow_scrub(pg_id)) {
+            LOGINFOMOD(scrubmgr, "pg={} is eligible for shallow scrub, submit scrub task", pg_id);
+            submit_scrub_task(pg_id, false)
+                .via(&folly::InlineExecutor::instance())
+                .thenValue([this, pg_id](std::shared_ptr< ShallowScrubReport > report) {
+                    if (!report) {
+                        LOGERRORMOD(scrubmgr, "shallow scrub failed for pg={}", pg_id);
+                        return;
+                    }
+                    LOGINFOMOD(scrubmgr, "shallow scrub is completed for pg={}", pg_id);
+                    handle_shallow_pg_scrub_report(std::move(report));
+                });
+            return;
+        }
+
+        LOGDEBUGMOD(scrubmgr, "pg={} is not eligible for any scrubbing", pg_id);
+    }
+}
+
+void ScrubManager::handle_shallow_pg_scrub_report(std::shared_ptr< ShallowScrubReport > report) {
+    if (!report) {
+        LOGERRORMOD(scrubmgr, "Shallow scrub report is null!");
+        return;
+    }
+
+    report->print();
+    // TODO:: add more logic, log event for notification, report to metrics?.
+}
+
+void ScrubManager::handle_deep_pg_scrub_report(std::shared_ptr< DeepScrubReport > report) {
+    if (!report) {
+        LOGERRORMOD(scrubmgr, "Deep scrub report is null!");
+        return;
+    }
+
+    report->print();
+    // TODO:: add more logic, log event for notification, report to metrics?.
+}
+
+bool ScrubManager::is_eligible_for_deep_scrub(const pg_id_t& pg_id) {
+    // TODO:: add the real eligibility check logic
+    return false;
+}
+
+bool ScrubManager::is_eligible_for_shallow_scrub(const pg_id_t& pg_id) {
+    // TODO:: add the real eligibility check logic
+    return false;
+}
+
+void ScrubManager::start() {
+    // 1 set scrub task handling threads.
+    // TODO :: make thread count configurable, thread number is the most concurrent scrub tasks that can be handled
+    // concurrently. Too many concurrent scrub tasks may bring too much pressure to the node
+    const auto most_concurrent_scrub_task_num = 2;
+    m_scrub_executor = std::make_shared< folly::IOThreadPoolExecutor >(most_concurrent_scrub_task_num);
+    for (int i = 0; i < most_concurrent_scrub_task_num; ++i) {
+        m_scrub_executor->add([this]() {
+            while (true) {
+                // if no available scrub task, it will be blocked here.
+                auto pop_result = m_scrub_task_queue.pop();
+                if (pop_result.is_closed()) {
+                    LOGINFOMOD(scrubmgr, "scrub task queue is stopped, no need to handle scrub task anymore!");
+                    break;
+                }
+                RELEASE_ASSERT(pop_result.value.has_value() && pop_result.is_ok(),
+                               "pop from scrub task queue should not fail when it is not closed!");
+                auto task = std::move(pop_result.value.value());
+                // we handle pg scrub task in a single thread , so that we can control the concurrent scrub tasks by
+                // controlling the thread number of m_scrub_executor.
+                handle_pg_scrub_task(std::move(task));
+            }
+        });
+    }
+
+    // 2 set scrub req handling threads.
+    const auto most_concurrent_scrub_req_num = 2;
+    // we don't set priority for req as that of task, only control the concurrency to not bring too much io/cpu pressure
+    // to this node.
+    m_scrub_req_executor = std::make_shared< folly::IOThreadPoolExecutor >(most_concurrent_scrub_req_num);
+
+    iomanager.run_on_wait(iomgr::reactor_regex::random_worker, [&]() {
+        m_scrub_timer_fiber = iomanager.iofiber_self();
+        // TODO: make the interval configurable, for now set it to 60 seconds
+        m_scrub_timer_hdl = iomanager.schedule_thread_timer(60ull * 1000 * 1000 * 1000, true, nullptr /*cookie*/,
+                                                            [this](void*) { scan_pg_for_scrub(); });
+    });
+    LOGINFOMOD(scrubmgr, "scrub manager started!");
+}
+
+void ScrubManager::stop() {
+    // shutdown timer — only if it was ever started
+    if (m_scrub_timer_hdl != iomgr::null_timer_handle) {
+        RELEASE_ASSERT(m_scrub_timer_fiber,
+                       "m_scrub_timer_hdl is not null_timer_handle, but m_scrub_timer_fiber is null, fatal error!");
+        LOGINFOMOD(scrubmgr, "stop scrub scheduler timer");
+        iomanager.run_on_wait(m_scrub_timer_fiber, [&]() {
+            iomanager.cancel_timer(m_scrub_timer_hdl, true);
+            m_scrub_timer_hdl = iomgr::null_timer_handle;
+        });
+        m_scrub_timer_fiber = nullptr;
+    } else {
+        LOGINFOMOD(scrubmgr, "scrub scheduler timer is not running, no need to stop it");
+    }
+
+    // cancel all the running scrub tasks and clear the scrub task queue.
+    // TODO:: add a stopped flag to avoid adding new scrub task if stopped.
+    if (!m_scrub_task_queue.is_closed()) { m_scrub_task_queue.close(); }
+    for (auto& [_, pg_scrub_ctx] : m_pg_scrub_ctx_map) {
+        pg_scrub_ctx->cancel();
+    }
+
+    if (m_scrub_executor) {
+        m_scrub_executor->stop();
+        m_scrub_executor.reset();
+    }
+    if (m_scrub_req_executor) {
+        m_scrub_req_executor->stop();
+        m_scrub_req_executor.reset();
+    }
+    LOGINFOMOD(scrubmgr, "scrub manager stopped!");
+}
+
+void ScrubManager::add_scrub_req(std::shared_ptr< scrub_req > req) {
+    m_scrub_req_executor->add([this, req = std::move(req)]() { handle_scrub_req(req); });
+}
+
+void ScrubManager::add_scrub_result(const pg_id_t pg_id, std::shared_ptr< scrub_result > result) {
+    auto pg_scrub_ctx_it = m_pg_scrub_ctx_map.find(pg_id);
+    if (pg_scrub_ctx_it == m_pg_scrub_ctx_map.end()) {
+        LOGERRORMOD(scrubmgr, "cannot find scrub context for pg_id={}, fail to add scrub map!", pg_id);
+        return;
+    }
+
+    pg_scrub_ctx_it->second->add_scrub_result(std::move(result));
+}
+
+void ScrubManager::handle_scrub_req(std::shared_ptr< scrub_req > req) {
+    if (!req) {
+        LOGERRORMOD(scrubmgr, "scrub req is null, cannot handle it!");
+        return;
+    }
+
+    const auto& pg_id = req->pg_id;
+    const auto hs_pg = m_hs_home_object->get_hs_pg(pg_id);
+    if (!hs_pg) {
+        LOGERRORMOD(scrubmgr, "cannot find hs_pg for pg {}, fail to handle scrub req!", pg_id);
+        return;
+    }
+
+    const auto& pg_repl_dev = hs_pg->repl_dev_;
+    if (!pg_repl_dev) {
+        LOGERRORMOD(scrubmgr, "repl_dev is null for pg {}, fail to handle scrub req!", pg_id);
+        return;
+    }
+
+    // leader still need to handle the scrub req, as leader also needs to do scrub and send scrub result to itself to
+    // trigger the logic after receiving scrub result.
+
+    std::shared_ptr< scrub_result > range_result;
+    auto& remote_peer_id = req->issuer_peer_id;
+
+    // sleep for a while to avoid handling scrub req immediately, which may cause high IOPS to the node.
+    // for example, handling a deep blob scrub req will take some io resource. we sleep 1s here so that there is a
+    // interval in the middle of handing two deep blob scrub reqs.
+
+    // TODO:: for different scrub req, we sleep different duration.
+    std::this_thread::sleep_for(std::chrono::seconds(1));
+
+    // 1 do scrub
+    const auto& scrub_type = req->scrub_type;
+    switch (scrub_type) {
+    case SCRUB_TYPE::META: {
+        LOGDEBUGMOD(scrubmgr, "handling meta scrub req for pg {}", pg_id);
+        range_result = local_scrub_meta(req);
+        break;
+    }
+    case SCRUB_TYPE::DEEP_BLOB:
+    case SCRUB_TYPE::SHALLOW_BLOB: {
+        LOGDEBUGMOD(scrubmgr, "handling blob scrub req for pg {}, scrub_type={}", pg_id, scrub_type);
+        range_result = local_scrub_blob(req);
+        break;
+    }
+    default:
+        RELEASE_ASSERT(false, "unknown scrub req type: {}!", scrub_type);
+    }
+
+    if (!range_result) {
+        LOGERRORMOD(scrubmgr, "fail to handle scrub req for pg {}, scrub_type={}, drop it!", pg_id, scrub_type);
+        return;
+    }
+
+    // 2 send scrub result back to leader
+    auto flatbuffer = range_result->build_flat_buffer();
+    sisl::io_blob_list_t blob_list;
+    blob_list.emplace_back(flatbuffer.data(), flatbuffer.size(), false);
+    // no need to retry, leader will handle retries
+    pg_repl_dev->data_request_unidirectional(remote_peer_id, HSHomeObject::PUSH_SCRUB_RESULT, blob_list)
+        .via(&folly::InlineExecutor::instance())
+        .thenValue([pg_id, remote_peer_id, flatbuffer = std::move(flatbuffer), scrub_type](auto&& response) {
+            if (response.hasError()) {
+                LOGERRORMOD(scrubmgr, "failed to send scrub result to peer {} in pg {}, scrub_type:{}, error={}",
+                            remote_peer_id, pg_id, scrub_type, response.error());
+                return;
+            }
+
+            LOGDEBUGMOD(scrubmgr, "successfully sent scrub map to peer {} in pg {}, scrub_type:{}", remote_peer_id,
+                        pg_id, scrub_type);
+        });
+}
+
+bool ScrubManager::wait_for_scrub_lsn_commit(shared< homestore::ReplDev > repl_dev, int64_t scrub_lsn) {
+    if (!repl_dev) {
+        LOGERRORMOD(scrubmgr, "repl_dev is null, cannot wait for scrub lsn commit!");
+        return false;
+    }
+
+    // TODO:: make this configurable
+    const auto wait_retry_times = 5;
+    for (auto i = 0; i < wait_retry_times; ++i) {
+        auto commit_lsn = repl_dev->get_last_commit_lsn();
+        if (commit_lsn >= scrub_lsn) {
+            LOGINFOMOD(scrubmgr, "commit lsn {} is greater than or equal to scrub lsn {}, wait successfully",
+                       commit_lsn, scrub_lsn);
+            return true;
+        }
+        LOGDEBUGMOD(scrubmgr,
+                    "commit lsn {} is less than scrub lsn {}, wait for 1 second before retrying, retry times {}/{}",
+                    commit_lsn, scrub_lsn, i + 1, wait_retry_times);
+        std::this_thread::sleep_for(std::chrono::seconds(1));
+    }
+
+    return false;
+}
+
+uint64_t ScrubManager::compute_crc64(const void* data, size_t len, uint64_t crc) const {
+    static constexpr uint64_t kCrc64Poly = 0x42F0E1EBA9EA3693ULL;
+    static constexpr auto kCrc64Table = []() {
+        std::array< uint64_t, 256 > t{};
+        for (int i = 0; i < 256; ++i) {
+            uint64_t c = static_cast< uint64_t >(i) << 56;
+            for (int b = 0; b < 8; ++b) {
+                c = (c & 0x8000000000000000ULL) ? ((c << 1) ^ kCrc64Poly) : (c << 1);
+            }
+            t[i] = c;
+        }
+        return t;
+    }();
+
+    const uint8_t* p = static_cast< const uint8_t* >(data);
+    while (len--) {
+        uint8_t idx = static_cast< uint8_t >((crc >> 56) ^ *p++);
+        crc = kCrc64Table[idx] ^ (crc << 8);
+    }
+    return crc;
+}
+
+std::shared_ptr< ScrubManager::scrub_result > ScrubManager::local_scrub_blob(std::shared_ptr< scrub_req > req) {
+    if (!req) {
+        LOGERRORMOD(scrubmgr, "blob scrub req is null, cannot handle it!");
+        return nullptr;
+    }
+
+    const auto& req_id = req->req_id;
+    const auto& scrub_lsn = req->scrub_lsn;
+    const auto& pg_id = req->pg_id;
+    const auto& scrub_type = req->scrub_type;
+
+    if (scrub_type != SCRUB_TYPE::DEEP_BLOB && scrub_type != SCRUB_TYPE::SHALLOW_BLOB) {
+        LOGERRORMOD(scrubmgr,
+                    "invalid scrub req type for local_scrub_blob, pg_id={}, req_id={}, scrub_type={}, scrub_lsn={}",
+                    pg_id, req_id, scrub_type, scrub_lsn);
+        return nullptr;
+    }
+
+    LOGDEBUGMOD(scrubmgr, "handling blob scrub req for pg {}, req_id={}, scrub_lsn={}, scrub_type={}", pg_id, req_id,
+                scrub_lsn, scrub_type);
+
+    auto hs_pg = m_hs_home_object->get_hs_pg(pg_id);
+    if (!hs_pg) {
+        LOGERRORMOD(scrubmgr, "req_id={} cannot find hs_pg for pg={}, fail to do deep blob scrub!", req_id, pg_id);
+        return nullptr;
+    }
+
+    if (!wait_for_scrub_lsn_commit(hs_pg->repl_dev_, scrub_lsn)) {
+        LOGERRORMOD(scrubmgr,
+                    "pg_id={}, req_id={}, commit lsn is not advanced to scrub lsn {} after waiting for a while, fail "
+                    "to do {} blob scrub",
+                    pg_id, req_id, scrub_lsn, scrub_type == SCRUB_TYPE::DEEP_BLOB ? "deep" : "shallow");
+        return nullptr;
+    }
+
+    if (req->start_shard_id > req->end_shard_id) {
+        LOGERRORMOD(
+            scrubmgr,
+            "received incorrect blob scrub req, start_shard_id={}, end_shard_id={}, start_blob_id={}, end_blob_id={}",
+            req->start_shard_id, req->end_shard_id, req->start_blob_id, req->end_blob_id);
+        return nullptr;
+    }
+
+    // refer to docs/adr/scrub-blob-range-coverage.md
+    // TODO:: make this configurable.
+    uint32_t batch_capacity = static_cast< uint32_t >(
+        scrub_type == SCRUB_TYPE::SHALLOW_BLOB ? max_scrub_batch_size : deep_blob_scrub_batch_size);
+
+    const auto start = BlobRouteKey{BlobRoute{req->start_shard_id, req->start_blob_id}};
+    const auto end = BlobRouteKey{BlobRoute{req->end_shard_id, req->end_blob_id}};
+    homestore::BtreeQueryRequest< BlobRouteKey > query_req{
+        homestore::BtreeKeyRange< BlobRouteKey >{start, true /* inclusive */, end, true /* inclusive */},
+        homestore::BtreeQueryType::SWEEP_NON_INTRUSIVE_PAGINATION_QUERY, batch_capacity,
+        [last_blob_id = req->end_blob_id](homestore::BtreeKey const& key, homestore::BtreeValue const& value) -> bool {
+            BlobRouteValue blob_value{value};
+            BlobRouteKey blob_key{key};
+
+            return blob_value.pbas() != HSHomeObject::tombstone_pbas && blob_key.key().blob <= last_blob_id;
+        }};
+
+    std::vector< std::pair< BlobRouteKey, BlobRouteValue > > out;
+    auto const status = hs_pg->index_table_->query(query_req, out);
+
+    // if there are more blobs to be scrubbed, we will handle them in the next scrub req, so we don't consider has_more
+    // as an error here.
+    if (status != homestore::btree_status_t::success && status != homestore::btree_status_t::has_more) {
+        LOGERRORMOD(
+            scrubmgr,
+            "pg_id={}, req_id={}, scrub_type={}, scrub_lsn={}, Failed to query blobs in index table for status={}",
+            pg_id, req_id, scrub_type, scrub_lsn, status);
+        return nullptr;
+    }
+
+    auto blob_scrub_result = std::make_shared< ScrubManager::scrub_result >(req_id, m_hs_home_object->our_uuid());
+
+    if (scrub_type == SCRUB_TYPE::SHALLOW_BLOB) {
+        // for shallow blob scrubbing, we only check the existence of blobs, no io will be issued to hard drive.
+        for (const auto& [k, _] : out) {
+            blob_scrub_result->add_entry({k.key().shard, k.key().blob, ScrubStatus::NONE});
+        }
+        LOGDEBUGMOD(scrubmgr,
+                    "pg_id={}, req_id={}, scrub_lsn={}, shallow blob scrub completed, return {} blobs in range [{},{})",
+                    pg_id, req_id, scrub_lsn, blob_scrub_result->entries.size(), start, end);
+        return blob_scrub_result;
+    }
+
+    // Sort blobs by PBA (physical block address) for sequential disk access, this is a best effort, not guaranteed,
+    // since client io will move the disk pointer and break the sequence of io.
+    std::sort(out.begin(), out.end(), [](const auto& a, const auto& b) {
+        // Compare by PBA single blkid for ordering
+        const auto pba_a = a.second.pbas().to_single_blkid();
+        const auto pba_b = b.second.pbas().to_single_blkid();
+        return pba_a.blk_num() < pba_b.blk_num();
+    });
+
+    // deep scrub: read and check blobs.
+    auto& data_service = homestore::data_service();
+    const auto blk_size = data_service.get_blk_size();
+    std::vector< folly::Future< folly::Unit > > futs;
+
+    for (const auto& [k, v] : out) {
+        auto pba = v.pbas();
+        auto total_size = pba.blk_count() * blk_size;
+        sisl::sg_list data_sgs;
+        data_sgs.size = total_size;
+        data_sgs.iovs.emplace_back(
+            iovec{.iov_base = iomanager.iobuf_alloc(blk_size, total_size), .iov_len = total_size});
+
+        const auto& shard_id = k.key().shard;
+        const auto& blob_id = k.key().blob;
+
+        futs.emplace_back(std::move(
+            data_service.async_read(pba, data_sgs, total_size)
+                .thenValue([this, shard_id, blob_id, data_sgs = std::move(data_sgs), blob_scrub_result](auto&& err) {
+                    auto blob = data_sgs.iovs[0].iov_base;
+                    struct buffer_free_guard {
+                        uint8_t* buf;
+                        ~buffer_free_guard() { iomanager.iobuf_free(buf); }
+                    } guard{reinterpret_cast< uint8_t* >(blob)};
+
+                    ScrubManager::scrub_result_entry entry{shard_id, blob_id, ScrubStatus::NONE};
+
+                    if (err) {
+                        LOGERRORMOD(scrubmgr, "Failed to read blob for deep scrub, shard_id={}, blob_id={}, error={}",
+                                    shard_id, blob_id, err.message());
+                        entry.status_or_hash = ScrubStatus::IO_ERROR;
+                    } else {
+                        const auto blob_verify_succeed = m_hs_home_object->verify_blob(blob, shard_id, blob_id, true);
+                        if (!blob_verify_succeed) {
+                            // note that, if gc kicks in, the pba might be overwritten and lead to verification
+                            // failure.
+
+                            // FIXME:: handle this case by query and read the blob again.
+                            LOGERRORMOD(scrubmgr, "Blob verification failed for deep scrub, shard_id={}, blob_id={}",
+                                        shard_id, blob_id);
+                            entry.status_or_hash = ScrubStatus::MISMATCH;
+                        } else {
+                            // we only calculate crc64 for data part.
+                            const auto* header = reinterpret_cast< const HSHomeObject::BlobHeader* >(blob);
+                            const auto* blob_data = reinterpret_cast< const uint8_t* >(blob) + header->data_offset;
+                            entry.status_or_hash = compute_crc64(blob_data, header->blob_size);
+                        }
+                    }
+
+                    LOGDEBUGMOD(scrubmgr, "add entry to blob scrub result: shard_id={}, blob_id={}", entry.shard_id,
+                                entry.blob_id);
+                    blob_scrub_result->add_entry(entry);
+                })));
+    }
+
+    folly::collectAllUnsafe(futs).wait();
+
+    LOGDEBUGMOD(scrubmgr, "pg_id={}, req_id={}, deep blob scrub completed, found {} blobs in range [{},{}] to [{},{})",
+                pg_id, req_id, out.size(), req->start_shard_id, req->start_blob_id, req->end_shard_id,
+                req->end_blob_id);
+
+    return blob_scrub_result;
+}
+
+std::shared_ptr< ScrubManager::scrub_result > ScrubManager::local_scrub_meta(std::shared_ptr< scrub_req > req) {
+    if (!req) {
+        LOGERRORMOD(scrubmgr, "meta scrub req is null, cannot handle it!");
+        return nullptr;
+    }
+
+    const auto& req_id = req->req_id;
+    const auto& scrub_lsn = req->scrub_lsn;
+    const auto& pg_id = req->pg_id;
+    const auto& end_shard_id = req->end_shard_id;
+    const auto& start_shard_id = req->start_shard_id;
+
+    if (req->scrub_type != SCRUB_TYPE::META) {
+        LOGERRORMOD(scrubmgr,
+                    "invalid scrub req type for local_scrub_meta, pg_id={}, req_id={}, scrub_type={}, scrub_lsn={}",
+                    pg_id, req_id, req->scrub_type, scrub_lsn);
+        return nullptr;
+    }
+
+    LOGDEBUGMOD(scrubmgr, "handling meta scrub req for pg {}, req_id={}, scrub_lsn={}", pg_id, req_id, scrub_lsn);
+
+    auto hs_pg = m_hs_home_object->get_hs_pg(pg_id);
+    if (!hs_pg) {
+        LOGERRORMOD(scrubmgr, "cannot find hs_pg for pg={}, fail to scrub meta!", pg_id);
+        return nullptr;
+    }
+
+    if (start_shard_id > end_shard_id) {
+        LOGERRORMOD(scrubmgr, "received incorrect meta scrub req, start_shard_id={} > end_shard_id={}", start_shard_id,
+                    end_shard_id);
+        return nullptr;
+    }
+
+    if (!wait_for_scrub_lsn_commit(hs_pg->repl_dev_, scrub_lsn)) {
+        LOGERRORMOD(
+            scrubmgr,
+            "commit lsn is not advanced to scrub lsn {} after waiting for a while, fail to do local shard scrub, pg={}",
+            scrub_lsn, pg_id);
+        return nullptr;
+    }
+
+    RELEASE_ASSERT(0 == req->start_blob_id, "for meta scrub, start_blob_id should be 0, pg_id={}, req_id={}", pg_id,
+                   req_id);
+    const auto& end_blob_id = req->end_blob_id;
+
+    LOGDEBUGMOD(scrubmgr,
+                "received meta scrub req for pg {}, req_id={}, scrub_lsn={}, start_shard_id={}, end_shard_id={}, "
+                "end_blob_id={}",
+                pg_id, req_id, scrub_lsn, start_shard_id, end_shard_id, end_blob_id);
+
+    auto meta_scrub_result = std::make_shared< ScrubManager::scrub_result >(req_id, m_hs_home_object->our_uuid());
+
+    // we don't have a specific method to directly read a specific pg/shard meta_blk. the only way we can read metablk
+    // for now is registering a handler and then call meta_service read_sub_sb. we just skip this step for now
+    // TODO::
+    // 1 to add a new method to directly read a specific meta_blk in meta_service. or we do this by registering handler
+    // and scan!!
+    // 2 add the ScrubResult for metablk to scrub_result_entry
+    // 3 calculate the hash of metablk.
+
+    // for empty shard, without deleting shard(to be done), we can find the shard meta blk and shard meta data in
+    // memory, but cannot find any blob of this shard in pg_index_table. we consider the pg_index_table, not the
+    // shard_meta_blk, as the source of truth for meta scrub, so we will scan the index table to do meta scrub and don't
+    // care about empty shard, since no valid data in it.
+
+    // scrub pg meta if start_shard_id is 0. since shard_id starts from 1, we use shard_id 0 to represent pg meta for
+    // convenience.
+    if (0 == (start_shard_id & homeobject::shard_mask)) {
+        LOGDEBUGMOD(scrubmgr, "scrubbing pg meta of pg={}", pg_id);
+        // blob_id here means the shard count of this pg. Since this is useless ATM, we just do it like this.
+
+        // TODO:: do real pg meta blk scrub and calculate the hash for pg meta blk, for now we just skip this step and
+        // set its hash to 0.
+        meta_scrub_result->add_entry({0, hs_pg->total_shards() /*pg shard count*/, uint64_t(0) /*pg metablk hash*/});
+    }
+
+    // FIXME:: after we have shard sealed lsn, we only scrub those shards whose sealed_lsn is less than or equal to
+    // scrub_lsn.
+    auto start_key = BlobRouteKey{BlobRoute{std::max(uint64_t{1}, start_shard_id), 0}};
+    auto end_key = BlobRouteKey{BlobRoute{end_shard_id, end_blob_id}};
+
+    ScrubManager::scrub_result_entry entry{0, 0, ScrubStatus::NONE};
+    // max_scrub_batch_size here means how many shards we want to scrub in one batch, referring to
+    // docs/adr/scrub-blob-range-coverage.md
+
+    // TODO: make this configurable
+    uint32_t batch_capacity = max_scrub_batch_size;
+
+    homestore::BtreeQueryRequest< BlobRouteKey > qr{
+        homestore::BtreeKeyRange< BlobRouteKey >{start_key, true, end_key, true},
+        homestore::BtreeQueryType::SWEEP_NON_INTRUSIVE_PAGINATION_QUERY, std::numeric_limits< uint32_t >::max(),
+        [&entry, &batch_capacity, end_blob_id, meta_scrub_result](homestore::BtreeKey const& key,
+                                                                  homestore::BtreeValue const& value) mutable -> bool {
+            if (batch_capacity) {
+                BlobRouteKey blob_key{key};
+                BlobRouteValue blob_value{value};
+
+                const auto shard_id = blob_key.key().shard;
+                if (shard_id != entry.shard_id) {
+                    // coming to a new shard
+                    if (entry.shard_id) {
+                        // TODO:: do real shard meta blk scrub and calculate the hash for shard meta blk, for now we
+                        // just set its hash to 0.
+                        entry.status_or_hash = uint64_t(0);
+                        meta_scrub_result->add_entry(entry);
+                        if (--batch_capacity == 0) {
+                            // so that it will not be added to meta_scrub_result again outside of the query loop.
+                            entry.shard_id = 0;
+                            return false; // Continue scanning
+                        }
+                    }
+                    // reset entry for the new shard
+                    entry.shard_id = shard_id;
+                    entry.blob_id = 0;
+                }
+
+                // there might be some deletion happens when we do meta scrub and lead to the inconsistency of the blob
+                // count of a specific shard among different replicas. this does not matter, since we will use the max
+                // blobs count of this shard in different replicas as the actual blob count of this shard, referring to
+                // docs/adr/scrub-blob-range-coverage.md
+                const auto blob_id = blob_key.key().blob;
+                if (blob_value.pbas() != HSHomeObject::tombstone_pbas && blob_id <= end_blob_id) { entry.blob_id++; }
+            }
+
+            return false; // Continue scanning
+        }};
+
+    std::vector< std::pair< BlobRouteKey, BlobRouteValue > > out;
+    auto const ret = hs_pg->index_table_->query(qr, out);
+    if (ret != homestore::btree_status_t::success && ret != homestore::btree_status_t::has_more) {
+        LOGERRORMOD(scrubmgr, "[pg={}] failed to query index table, error={}", pg_id, ret);
+        return nullptr;
+    }
+
+    // if we the last scrubbed shard in this batch is not 0, we add the scrub_result_entry for it here since it cannot
+    // be added in the query loop.
+    if (entry.shard_id) {
+        LOGDEBUGMOD(scrubmgr, "add last entry, shard_id={}, blob_id={}", entry.shard_id, entry.blob_id);
+        meta_scrub_result->add_entry(entry);
+    }
+
+    LOGDEBUGMOD(scrubmgr, "meta scrub completed, checked {} shards in range [{},{}) to [{}, {}] in pg={}",
+                meta_scrub_result->entries.size(), start_shard_id, 0, end_shard_id, end_blob_id, pg_id);
+
+    return meta_scrub_result;
+}
+
+folly::SemiFuture< std::shared_ptr< ScrubManager::ShallowScrubReport > >
+ScrubManager::submit_scrub_task(const pg_id_t& pg_id, const bool is_deep, SCRUB_TRIGGER_TYPE trigger_type) {
+    LOGINFOMOD(scrubmgr, "submit a scrub task for pg={}, deep_scrub={}, trigger_type={}", pg_id, is_deep, trigger_type);
+
+    // Check if a scrub task is already running for this PG.
+    // Note: There's still a small race window between this check and task execution in handle_pg_scrub_task,
+    // but the in_scrubbing CAS below provides the final guard. This check prevents unnecessary work.
+    auto it = m_pg_scrub_ctx_map.find(pg_id);
+    if (it != m_pg_scrub_ctx_map.end()) {
+        LOGWARNMOD(scrubmgr, "a scrub task is already running for pg={}, no need to submit another one!", pg_id);
+        return folly::makeSemiFuture(std::shared_ptr< ScrubManager::ShallowScrubReport >(nullptr));
+    }
+
+    const auto ps_scrub_super_blk_it = m_pg_scrub_sb_map.find(pg_id);
+    if (ps_scrub_super_blk_it == m_pg_scrub_sb_map.end()) {
+        LOGERRORMOD(scrubmgr, "cannot find scrub superblk for pg={}, fail to submit scrub task!", pg_id);
+        return folly::makeSemiFuture(std::shared_ptr< ScrubManager::ShallowScrubReport >(nullptr));
+    }
+
+    // Get the PG and check its state
+    const auto hs_pg = m_hs_home_object->get_hs_pg(pg_id);
+    if (!hs_pg) {
+        LOGERRORMOD(scrubmgr, "cannot find hs_pg for pg={}, fail to submit scrub task!", pg_id);
+        return folly::makeSemiFuture(std::shared_ptr< ScrubManager::ShallowScrubReport >(nullptr));
+    }
+
+    // Check if pg_state is HEALTHY (state must be 0)
+    const auto current_state = hs_pg->pg_state_.get();
+    if (current_state != 0) {
+        LOGWARNMOD(scrubmgr, "pg={} is not in HEALTHY state (current_state={}), cannot submit scrub task!", pg_id,
+                   current_state);
+        return folly::makeSemiFuture(std::shared_ptr< ScrubManager::ShallowScrubReport >(nullptr));
+    }
+
+    // TODO:: use PGStateMask::SCRUBBING state to replace the in_scrubbing flag after cm supports
+    // PGStateMask::SCRUBBING. in_scrubbing here is used to indicate whether there is a scrub task  pending/running for
+    // this pg.
+    bool expected = false;
+    if (!hs_pg->in_scrubbing.compare_exchange_strong(expected, true)) {
+        LOGWARNMOD(scrubmgr, "pg={} scrub submission already in-flight, skip!", pg_id);
+        return folly::makeSemiFuture(std::shared_ptr< ScrubManager::ShallowScrubReport >(nullptr));
+    }
+
+    const auto& pg_scrub_sb = *(ps_scrub_super_blk_it->second);
+    const auto last_scrub_time =
+        is_deep ? pg_scrub_sb->last_deep_scrub_timestamp : pg_scrub_sb->last_shallow_scrub_timestamp;
+
+    auto [promise, future] = folly::makePromiseContract< std::shared_ptr< ShallowScrubReport > >();
+    ScrubManager::scrub_task task(last_scrub_time, pg_id, is_deep, trigger_type, std::move(promise));
+    if (!m_scrub_task_queue.push(std::move(task))) {
+        // Queue is closed (scrub manager is stopped); roll back in_scrubbing so future submissions are not blocked.
+        hs_pg->in_scrubbing.store(false);
+        LOGWARNMOD(scrubmgr, "pg={} scrub task queue is closed/stopped, skip!", pg_id);
+        return folly::makeSemiFuture(std::shared_ptr< ScrubManager::ShallowScrubReport >(nullptr));
+    }
+    return std::move(future);
+}
+
+void ScrubManager::cancel_scrub_task(const pg_id_t& pg_id) {
+    auto it = m_pg_scrub_ctx_map.find(pg_id);
+    if (it == m_pg_scrub_ctx_map.end()) {
+        LOGWARNMOD(scrubmgr, "no running scrub task for pg={}, no need to cancel!", pg_id);
+        return;
+    }
+    it->second->cancel();
+    LOGINFOMOD(scrubmgr, "cancel scrub task for pg={}", pg_id);
+}
+
+void ScrubManager::handle_pg_scrub_task(scrub_task task) {
+    const auto& pg_id = task.pg_id;
+    const auto& task_id = task.task_id;
+    const auto& is_deep_scrub = task.is_deep_scrub;
+
+    SCRUBLOGD(pg_id, task_id,
+              "Starting handling {} scrub task, last_scrub_time={} =====", is_deep_scrub ? "deep" : "shallow",
+              task.last_scrub_time);
+
+    std::shared_ptr< ShallowScrubReport > pg_scrub_report =
+        is_deep_scrub ? std::make_shared< DeepScrubReport >(pg_id) : std::make_shared< ShallowScrubReport >(pg_id);
+
+    struct scrub_task_guard {
+        HSHomeObject* home_obj;
+        folly::ConcurrentHashMap< pg_id_t, std::shared_ptr< PGScrubContext > >& pg_scrub_ctx_map;
+        scrub_task& task;
+        std::shared_ptr< ShallowScrubReport >& scrub_report;
+        const pg_id_t& pg_id;
+
+        ~scrub_task_guard() {
+            pg_scrub_ctx_map.erase(pg_id);
+            task.scrub_report_promise->setValue(scrub_report);
+            auto hs_pg = home_obj->get_hs_pg(pg_id);
+            if (hs_pg) {
+                hs_pg->in_scrubbing.store(false);
+                LOGINFOMOD(scrubmgr, "cleared SCRUBBING state for pg={}", pg_id);
+            } else {
+                // pg destroyed during scrubbing
+                LOGWARNMOD(scrubmgr, "cannot find hs_pg to clear SCRUBBING state for pg={}!", pg_id);
+            }
+        }
+    } guard{m_hs_home_object, m_pg_scrub_ctx_map, task, pg_scrub_report, pg_id};
+
+    const auto hs_pg = m_hs_home_object->get_hs_pg(pg_id);
+    if (!hs_pg) {
+        SCRUBLOGE(pg_id, task_id, "cannot find hs_pg for this pg, fail this scrub task!");
+        return;
+    }
+
+    auto [ctx_it, happened] = m_pg_scrub_ctx_map.try_emplace(pg_id, std::make_shared< PGScrubContext >(task_id, hs_pg));
+    RELEASE_ASSERT(happened,
+                   "pg={} should not have a running scrub task since we set in_scrubbing in submit_scrub_task", pg_id);
+    auto& scrub_ctx = ctx_it->second;
+
+    // this is the last committed shard_id. we cannot get shard_sequence_num here since some of the shard might be
+    // not committed yet. note that, this depends on the fact that the last committed shard is always at the end of
+    // the shard list.
+    const auto last_committed_shard_id = m_hs_home_object->get_last_shard_id_in_pg(pg_id);
+    const auto last_committed_blob_id = hs_pg->get_last_committed_blob_id();
+
+    // we get scrub_lsn after we get last_committed_shard_id and last_committed_blob_id, so we can guarantee for any
+    // replica, if it has committed to scrub_lsn , it can at least see last_committed_shard_id and
+    // last_committed_blob_id.
+    // Now, the scrub range is finalized to [{0,0}, {last_committed_shard_id, last_committed_blob_id}]
+    const int64_t scrub_lsn = hs_pg->repl_dev_->get_last_commit_lsn();
+
+    // Step 1: Scrub META
+    SCRUBLOGD(pg_id, task_id, "Starting META scrubbing");
+    std::map< shard_id_t, uint32_t > shard_blob_count;
+    for (shard_id_t start_shard_id = 0; start_shard_id <= last_committed_shard_id;) {
+        if (scrub_ctx->cancelled.load()) {
+            SCRUBLOGD(pg_id, task_id, "scrub task cancelled after meta scrub, skip blob scrub");
+            return;
+        }
+
+        std::map< shard_id_t, uint32_t > shard_blob_count_in_batch;
+        if (!scrub_ctx->scrub_meta_batch(pg_scrub_report, start_shard_id, last_committed_shard_id,
+                                         last_committed_blob_id, scrub_lsn, shard_blob_count_in_batch)) {
+            SCRUBLOGE(pg_id, task_id, "meta scrub failed for batch in range: {} to {}, scrub_lsn={}", start_shard_id,
+                      last_committed_shard_id, scrub_lsn);
+            return;
+        }
+
+        SCRUBLOGD(pg_id, task_id, "meta scrub batch completed in range: {} to {}, scrub_lsn={}", start_shard_id,
+                  last_committed_shard_id, scrub_lsn);
+
+        if (shard_blob_count_in_batch.empty()) {
+            SCRUBLOGD(pg_id, task_id, "no more shard to scrub, end meta scrub");
+            break;
+        }
+
+        // next shard_id after the last shard_id in this batch
+        start_shard_id = shard_blob_count_in_batch.rbegin()->first + 1;
+        shard_blob_count.merge(shard_blob_count_in_batch);
+    }
+
+    // Step 2: Scrub BLOB
+    if (!shard_blob_count.empty()) {
+        SCRUBLOGD(pg_id, task_id, "Starting {} blob scrubbing", is_deep_scrub ? "deep" : "shallow");
+        auto it = shard_blob_count.begin();
+        shard_id_t start_shard_id = it->first;
+        shard_id_t end_shard_id = start_shard_id;
+        uint64_t total_blob_count_in_batch = it->second;
+
+        // start from the second shard
+        for (++it; it != shard_blob_count.end(); ++it) {
+            if (scrub_ctx->cancelled.load()) {
+                SCRUBLOGD(pg_id, task_id, "scrub task cancelled during blob batch accumulation, stop");
+                return;
+            }
+            auto blob_count = it->second;
+            if (total_blob_count_in_batch + blob_count >= max_scrub_batch_size) {
+                // scrub current batch
+                if (!scrub_ctx->scrub_blob_batch(pg_scrub_report, start_shard_id, end_shard_id, last_committed_blob_id,
+                                                 scrub_lsn, is_deep_scrub)) {
+                    SCRUBLOGE(pg_id, task_id, "{} blob scrub failed for shard range: {} to {}, scrub_lsn={}",
+                              is_deep_scrub ? "deep" : "shallow", start_shard_id, end_shard_id, scrub_lsn);
+                    return;
+                }
+
+                // start a new batch
+                start_shard_id = it->first;
+                total_blob_count_in_batch = 0;
+            }
+
+            total_blob_count_in_batch += blob_count;
+            end_shard_id = it->first;
+        }
+
+        // scrub last batch
+        if (!scrub_ctx->scrub_blob_batch(pg_scrub_report, start_shard_id, end_shard_id, last_committed_blob_id,
+                                         scrub_lsn, is_deep_scrub)) {
+            SCRUBLOGE(pg_id, task_id, "{} blob scrub batch failed for shard range: {} to {}, scrub_lsn={}",
+                      is_deep_scrub ? "deep" : "shallow", start_shard_id, end_shard_id, scrub_lsn);
+            return;
+        }
+    }
+
+#ifdef _PRERELEASE
+    // Trigger the callback flip to delete missing blob during scrub if enabled
+    iomgr_flip::instance()->callback_flip("delete_missing_blob_through_raft");
+#endif
+
+    // when scrubbing is on going, blob or shard deletion probably happens and lead to false-positive missing blobs(and
+    // shards after we have delete shard). we reconcile the missing blobs in scrub report after all the scrubbing is
+    // completed to reduce the false-positive item.
+
+    // if we reach here, we can make sure the other replicas have committed to scrub_lsn.
+    scrub_ctx->reconcile_scrub_report(pg_scrub_report);
+
+    // only if pg is successfully scrubbed, we persist scrub metablk.
+
+    // FIXME:: spread this to all followers, so that if leader changes, the new leader konws the last scrub time and can
+    // trigger the next scrub in time.
+    save_scrub_superblk(pg_id, is_deep_scrub, true);
+    SCRUBLOGD(pg_id, task_id, "successfully complete {} scrub task!", is_deep_scrub ? "deep" : "shallow");
+}
+
+void ScrubManager::add_pg(const pg_id_t pg_id) {
+    // TODO:: make this thread safe.
+    LOGINFOMOD(scrubmgr, "added new scrub superblock for pg={}", pg_id);
+    if (nullptr == m_hs_home_object->get_hs_pg(pg_id)) {
+        LOGINFOMOD(scrubmgr, "cannot find pg={}!", pg_id);
+        return;
+    }
+
+    // to avoid create-pg log replay overriding existing scrub superblock, we only create new superblock when there is
+    // no existing one
+    save_scrub_superblk(pg_id, false, false);
+}
+
+void ScrubManager::remove_pg(const pg_id_t pg_id) {
+    cancel_scrub_task(pg_id);
+    m_pg_scrub_ctx_map.erase(pg_id);
+
+    auto it = m_pg_scrub_sb_map.find(pg_id);
+    if (it == m_pg_scrub_sb_map.end()) {
+        LOGINFOMOD(scrubmgr, "no scrub superblock found for pg={}, no need to remove", pg_id);
+        return;
+    }
+
+    LOGINFOMOD(scrubmgr, "removed pg={} in scrub manager!", pg_id);
+    it->second->destroy();
+    m_pg_scrub_sb_map.erase(it);
+}
+
+// this function is called in meta_service thread context
+void ScrubManager::on_pg_scrub_meta_blk_found(
+    sisl::byte_view const& buf, void* meta_cookie,
+    std::vector< homestore::superblk< pg_scrub_superblk > >& stale_pg_scrub_sbs) {
+    auto sb = std::make_shared< homestore::superblk< pg_scrub_superblk > >();
+    (*sb).load(buf, meta_cookie);
+    const auto pg_id = (*sb)->pg_id;
+
+    auto hs_pg = m_hs_home_object->get_hs_pg(pg_id);
+    if (!hs_pg) {
+        // this is a stale pg scrub superblock, we just log and destroy it.
+        LOGINFOMOD(scrubmgr, "cannot find pg={}, destroy stale scrub superblock", pg_id);
+        stale_pg_scrub_sbs.emplace_back(std::move(*sb));
+        return;
+    }
+    const auto last_deep_scrub_time = (*sb)->last_deep_scrub_timestamp;
+    const auto last_shallow_scrub_time = (*sb)->last_shallow_scrub_timestamp;
+
+    m_pg_scrub_sb_map.emplace(pg_id, std::move(sb));
+    LOGINFOMOD(scrubmgr, "loaded scrub superblock for pg={}, last_deep_scrub_time={}, last_shallow_scrub_time={}",
+               pg_id, last_deep_scrub_time, last_shallow_scrub_time);
+}
+
+void ScrubManager::save_scrub_superblk(const pg_id_t pg_id, const bool is_deep_scrub, bool force_update) {
+    const auto current_time =
+        std::chrono::duration_cast< std::chrono::seconds >(std::chrono::system_clock::now().time_since_epoch()).count();
+
+    auto it = m_pg_scrub_sb_map.find(pg_id);
+    if (it == m_pg_scrub_sb_map.end()) {
+        // Create new superblock for this PG
+        auto sb = std::make_shared< homestore::superblk< pg_scrub_superblk > >(pg_scrub_meta_name);
+        (*sb).create(sizeof(pg_scrub_superblk));
+        (*sb)->pg_id = pg_id;
+        (*sb)->last_deep_scrub_timestamp = current_time;
+        (*sb)->last_shallow_scrub_timestamp = current_time;
+        (*sb).write();
+        m_pg_scrub_sb_map.emplace(pg_id, std::move(sb));
+        return;
+    }
+
+    if (force_update) {
+        // Update existing superblock
+        if (is_deep_scrub) {
+            (*(it->second))->last_deep_scrub_timestamp = current_time;
+        } else {
+            (*(it->second))->last_shallow_scrub_timestamp = current_time;
+        }
+        (*(it->second)).write();
+    } else {
+        LOGINFOMOD(scrubmgr, "skip updating scrub superblock for pg={} since there is no scrub progress update", pg_id);
+    }
+}
+
+std::optional< ScrubManager::pg_scrub_superblk > ScrubManager::get_scrub_superblk(const pg_id_t pg_id) const {
+    auto it = m_pg_scrub_sb_map.find(pg_id);
+    if (it == m_pg_scrub_sb_map.end()) {
+        LOGWARNMOD(scrubmgr, "scrub superblk not found for pg {}", pg_id);
+        return std::nullopt;
+    }
+
+    return *(*(it->second));
+}
+
+ScrubManager::PGScrubContext::PGScrubContext(uint64_t task_id, const HSHomeObject::HS_PG* hs_pg) :
+        task_id(task_id), hs_pg(hs_pg) {
+
+    const auto& members = (hs_pg->pg_info_).members;
+    for (const auto& member : members) {
+        // TODO::make the queue size configurable
+        peer_scrub_result_queue_map_.emplace(
+            member.id, std::make_shared< folly::MPMCQueue< std::shared_ptr< scrub_result > > >(10));
+    }
+
+    m_scrub_executor = std::make_shared< folly::IOThreadPoolExecutor >(peer_scrub_result_queue_map_.size());
+    // TODO: handle the following cases:
+    // 1 the node is removed from the raft group? handle this case later
+    // 2 pg is destroyed during scrubbing?
+}
+
+ScrubManager::PGScrubContext::~PGScrubContext() {
+    m_scrub_executor->stop();
+    m_scrub_executor.reset();
+}
+
+void ScrubManager::PGScrubContext::add_scrub_result(std::shared_ptr< ScrubManager::scrub_result > result) {
+    auto it = peer_scrub_result_queue_map_.find(result->issuer_peer_id);
+    if (it != peer_scrub_result_queue_map_.end()) {
+        SCRUBLOGD(hs_pg->pg_id(), task_id, "add scrub result from peer {}, req_id={}, result entries count={}",
+                  result->issuer_peer_id, result->req_id, result->entries.size());
+        it->second->blockingWrite(result);
+    } else {
+        SCRUBLOGW(hs_pg->pg_id(), task_id, "received scrub result from unknown peer {}, req_id={}, dropping",
+                  result->issuer_peer_id, result->req_id);
+    }
+}
+
+void ScrubManager::PGScrubContext::send_req_to_peer(const ScrubManager::scrub_req& req, const peer_id_t& peer_id) {
+    const auto pg_id = hs_pg->pg_id();
+    auto& repl_dev = hs_pg->repl_dev_;
+    if (!repl_dev) {
+        cancel();
+        SCRUBLOGE(pg_id, task_id,
+                  "replication device is not available, cannot send scrub req to peer {}, req_id={}, "
+                  "scrub_type={}",
+                  peer_id, req.req_id, req.scrub_type);
+        return;
+    }
+
+    auto flatbuffer = req.build_flat_buffer();
+    sisl::io_blob_list_t blob_list;
+    blob_list.emplace_back(flatbuffer.data(), flatbuffer.size(), false);
+
+    repl_dev->data_request_unidirectional(peer_id, HSHomeObject::PUSH_SCRUB_REQ, blob_list)
+        .via(&folly::InlineExecutor::instance())
+        .thenValue([pg_id, peer_id, task_id = this->task_id, flatbuffer = std::move(flatbuffer), req_id = req.req_id,
+                    scrub_type = req.scrub_type](auto&& response) {
+            if (response.hasError()) {
+                SCRUBLOGE(pg_id, task_id, "failed to send scrub req to peer {}, req_id={}, error={}, scrub_type={}",
+                          peer_id, req_id, response.error(), scrub_type);
+            } else {
+                SCRUBLOGD(pg_id, task_id, "successfully sent scrub req to peer {}, req_id={}, scrub_type={}", peer_id,
+                          req_id, scrub_type);
+            }
+        });
+}
+
+bool ScrubManager::PGScrubContext::scrub_meta_batch(std::shared_ptr< ScrubManager::MetaScrubReport > scrub_report,
+                                                    shard_id_t start_shard_id, shard_id_t end_shard_id,
+                                                    blob_id_t last_blob_id, int64_t scrub_lsn,
+                                                    std::map< shard_id_t, uint32_t >& shard_blob_count_in_batch) {
+    const auto pg_id = hs_pg->pg_id();
+    SCRUBLOGD(pg_id, task_id, "start scrubbing meta for shard range: {} to {}, last_blob_id={}, scrub_lsn={}",
+              start_shard_id, end_shard_id, last_blob_id, scrub_lsn);
+
+    std::vector< folly::Future< std::shared_ptr< ScrubManager::range_scrub_result > > > futs;
+    for (const auto& [peer_id, scrub_result_queue] : peer_scrub_result_queue_map_) {
+        auto [promise, future] = folly::makePromiseContract< std::shared_ptr< ScrubManager::range_scrub_result > >();
+        futs.emplace_back(std::move(future).via(&folly::InlineExecutor::instance()));
+        m_scrub_executor->add([this, pg_id, peer_id, start_shard_id, end_shard_id, last_blob_id, scrub_lsn,
+                               promise = std::move(promise), scrub_result_queue]() mutable {
+            std::shared_ptr< ScrubManager::scrub_result > scrub_result;
+            ScrubManager::scrub_req current_req(pg_id, random_req_id(), scrub_lsn, start_shard_id, 0, end_shard_id,
+                                                last_blob_id, SCRUB_TYPE::META, hs_pg->home_obj_.our_uuid());
+            auto range_scrub_result = std::make_shared< ScrubManager::range_scrub_result >(
+                start_shard_id, 0, end_shard_id, last_blob_id, SCRUB_TYPE::META, peer_id);
+
+            for (uint8_t retry_count = 0;;) {
+                if (cancelled.load()) {
+                    SCRUBLOGD(pg_id, task_id,
+                              "scrub task is cancelled, stop waiting for scrub result from peer {}, "
+                              "shard range: {} to {}, last_blob_id={}, scrub_lsn={}",
+                              peer_id, start_shard_id, end_shard_id, last_blob_id, scrub_lsn);
+                    break;
+                }
+
+                send_req_to_peer(current_req, peer_id);
+
+                // Drain stale results until we get the expected req_id or time out.
+                // Do NOT re-send on stale results — the peer already has the outstanding request.
+                bool got_expected = false;
+                while (!got_expected) {
+                    // TODO::make the timeout here configurable
+                    if (!scrub_result_queue->tryReadUntil(std::chrono::steady_clock::now() + std::chrono::seconds{10},
+                                                          scrub_result)) {
+                        // timeout
+                        SCRUBLOGD(pg_id, task_id,
+                                  "did not receive scrub result from peer {} for shard range: {} to {}, "
+                                  "last_blob_id={}, scrub_lsn={}, try again",
+                                  peer_id, start_shard_id, end_shard_id, last_blob_id, scrub_lsn);
+                        retry_count++;
+
+                        // TODO::make the max retry count configurable
+                        if (retry_count > 5) {
+                            SCRUBLOGE(pg_id, task_id,
+                                      "did not receive scrub result from peer {} after {} retries, "
+                                      "shard range: {} to {}, last_blob_id={}, scrub_lsn={}, "
+                                      "cancel this scrub task",
+                                      peer_id, retry_count, start_shard_id, end_shard_id, last_blob_id, scrub_lsn);
+                            // this will cancel the entire task
+                            cancel();
+                        }
+                        break; // timed out — outer loop will re-send or detect cancellation
+                    }
+
+                    // TODO:: add more logic to check if the scrub result is the expected one, for example, we can add
+                    // the shard range and scrub_lsn in the scrub req and check if they are consistent with the received
+                    // scrub result
+                    if (scrub_result->req_id != current_req.req_id) {
+                        SCRUBLOGD(pg_id, task_id,
+                                  "received scrub result with unexpected req_id from peer {}, expected req_id={}, "
+                                  "actual req_id={}, drain and wait again",
+                                  peer_id, current_req.req_id, scrub_result->req_id);
+                        continue; // drain stale result and wait again — do NOT re-send
+                    }
+
+                    got_expected = true;
+                }
+
+                if (!got_expected) {
+                    continue; // timed out — outer loop re-sends or cancels
+                }
+
+                // Got the expected result. Reset retry count since the peer is responsive.
+                retry_count = 0;
+                SCRUBLOGD(pg_id, task_id, "meta scrub: received scrub result from peer {}, result entries count={}",
+                          peer_id, scrub_result->entries.size());
+
+                if (scrub_result->entries.empty()) {
+                    SCRUBLOGD(pg_id, task_id,
+                              "received empty scrub result from peer {}, shard range: {} to {}, "
+                              "last_blob_id={}, scrub_lsn={}, we consider this range scrub is completed and break "
+                              "the loop!",
+                              peer_id, start_shard_id, end_shard_id, last_blob_id, scrub_lsn);
+                    break;
+                }
+
+                const auto received_batch_size = scrub_result->entries.size();
+                SCRUBLOGD(pg_id, task_id,
+                          "received expected meta scrub result from peer {}, req_id={}, start_shard_id={}, "
+                          "start_blob_id={}, end_shard_id={}, end_blob_id={}, scrub_lsn={}, details={}",
+                          peer_id, current_req.req_id, current_req.start_shard_id, current_req.start_blob_id,
+                          current_req.end_shard_id, current_req.end_blob_id, scrub_lsn, scrub_result->to_string());
+
+                const auto last_shard_id_in_result = (scrub_result->entries).rbegin()->first.shard;
+                range_scrub_result->add_scrub_result(*scrub_result);
+                SCRUBLOGD(pg_id, task_id, "after adding new scrub_result for meta scrub, {}",
+                          range_scrub_result->to_string());
+
+                if (received_batch_size < max_scrub_batch_size) {
+                    // if the received batch size is smaller than the max batch size, it means the peer has no more data
+                    // to scrub in this range, we can consider the range scrub is completed for this peer and break the
+                    // loop to avoid unnecessary scrub req sending and waiting.
+                    SCRUBLOGD(pg_id, task_id,
+                              "received meta scrub result with batch size {} smaller than max batch size {} from peer "
+                              "{}, shard range: {} to {}, last_blob_id={}, scrub_lsn={}, we consider this range scrub "
+                              "is completed and break the loop!",
+                              received_batch_size, max_scrub_batch_size, peer_id, start_shard_id, end_shard_id,
+                              last_blob_id, scrub_lsn);
+                    break;
+                }
+
+                RELEASE_ASSERT(received_batch_size == max_scrub_batch_size,
+                               "the received batch size {} should be equal to max scrub batch size {} for meta scrub, "
+                               "but it is not, peer {}, shard range: {} to {}, last_blob_id={}, scrub_lsn={}",
+                               received_batch_size, max_scrub_batch_size, peer_id, start_shard_id, end_shard_id,
+                               last_blob_id, scrub_lsn);
+
+                current_req.start_shard_id = last_shard_id_in_result + 1;
+                if (current_req.start_shard_id > end_shard_id) {
+                    // the range scrub is completed for this batch, we can break the loop and return the result.
+                    SCRUBLOGD(pg_id, task_id,
+                              "completed scrubbing meta for shard range: {} to {}, last_blob_id={}, scrub_lsn={}, "
+                              "peer {}",
+                              start_shard_id, end_shard_id, last_blob_id, scrub_lsn, peer_id);
+                    break;
+                }
+
+                // new req_id for the next scrub req
+                current_req.req_id = random_req_id();
+            }
+
+            if (cancelled.load()) {
+                promise.setException(folly::make_exception_wrapper< std::runtime_error >("cancelled"));
+            } else {
+                promise.setValue(range_scrub_result);
+            }
+        });
+    }
+
+    return folly::collectAllUnsafe(futs)
+        .thenValue([this, pg_id, &scrub_report, &shard_blob_count_in_batch](auto&& results) {
+            std::map< peer_id_t, std::shared_ptr< range_scrub_result > > peer_scrub_result_map;
+            for (auto& r : results) {
+                if (r.hasException()) {
+                    SCRUBLOGE(pg_id, task_id, "scrub meta batch is failed, error={}", r.exception().what());
+                    return false;
+                }
+                auto range_scrub_result = r.value();
+                if (!range_scrub_result) {
+                    SCRUBLOGE(pg_id, task_id, "scrub meta batch is failed, receive nullptr scrub result");
+                    return false;
+                }
+                peer_scrub_result_map[range_scrub_result->peer_id] = range_scrub_result;
+
+                SCRUBLOGD(pg_id, task_id, "complete meta range scrub: {}", range_scrub_result->to_string());
+            }
+
+            // 1 consolidate peer_scrub_result_map into shard_blob_count_in_batch
+            std::map< shard_id_t, std::pair< size_t /*occurrence count*/, uint32_t /*max blob count*/ > >
+                shard_count_map;
+
+            for (const auto& [_, range_scrub_result] : peer_scrub_result_map) {
+                for (const auto& [route, _] : range_scrub_result->results) {
+                    // shard occurrence count
+                    shard_count_map[route.shard].first++;
+
+                    // blob count. in meta scrub result, route.blob stands for the blob count in this shard, we need to
+                    // get the max blob count among all peers since some peer might lose some blobs
+                    const auto blob_count = shard_count_map[route.shard].second;
+                    shard_count_map[route.shard].second = std::max(blob_count, static_cast< uint32_t >(route.blob));
+                }
+            }
+
+            for (const auto& [shard_id, count_pair] : shard_count_map) {
+                // if shard_id is 0, this is a pg meta scrub result, which does not represent a real shard, we can skip
+                // it in blob scrub phase.
+                if (!shard_id) continue;
+
+                if (count_pair.first == peer_scrub_result_map.size()) {
+                    // all peers have this shard, we can consider this shard is successfully scrubbed, and update the
+                    // blob count. for the empty shard, we set the blob count to 1. Actually, empty shard should not
+                    // appear since it does not in pg_index_table.
+                    shard_blob_count_in_batch[shard_id] = std::max(count_pair.second, uint32_t{1});
+                } else {
+                    // missing shard
+                    RELEASE_ASSERT(
+                        count_pair.first < peer_scrub_result_map.size(),
+                        "the occurrence count of shard_id {} should not be larger than peer count, but it is {}",
+                        shard_id, count_pair.first);
+                    // not all peers have this shard, we consider this shard is missing, and set blob count to max value
+                    // to make sure it will be scrubbed in a single batch in blob scrub phase
+                    shard_blob_count_in_batch[shard_id] = UINT32_MAX;
+                }
+            }
+
+            // 2 consolidate peer_scrub_result_map into scrub_report
+            scrub_report->merge(peer_scrub_result_map);
+
+            return true;
+        })
+        .get();
+}
+
+bool ScrubManager::PGScrubContext::scrub_blob_batch(std::shared_ptr< ScrubManager::ShallowScrubReport > scrub_report,
+                                                    shard_id_t start_shard_id, shard_id_t end_shard_id,
+                                                    blob_id_t last_blob_id, int64_t scrub_lsn, bool is_deep_scrub) {
+    const auto pg_id = hs_pg->pg_id();
+    const auto scrub_type = is_deep_scrub ? SCRUB_TYPE::DEEP_BLOB : SCRUB_TYPE::SHALLOW_BLOB;
+
+    SCRUBLOGD(pg_id, task_id,
+              "start scrubbing blob for shard range: {} to {}, last_blob_id={}, scrub_lsn={}, scrub_type={}",
+              start_shard_id, end_shard_id, last_blob_id, scrub_lsn, scrub_type);
+
+    std::vector< folly::Future< std::shared_ptr< ScrubManager::range_scrub_result > > > futs;
+    for (const auto& [peer_id, scrub_result_queue] : peer_scrub_result_queue_map_) {
+        auto [promise, future] = folly::makePromiseContract< std::shared_ptr< ScrubManager::range_scrub_result > >();
+        futs.emplace_back(std::move(future).via(&folly::InlineExecutor::instance()));
+        m_scrub_executor->add([this, pg_id, peer_id, start_shard_id, end_shard_id, last_blob_id, scrub_lsn, scrub_type,
+                               promise = std::move(promise), scrub_result_queue]() mutable {
+            std::shared_ptr< ScrubManager::scrub_result > scrub_result;
+            ScrubManager::scrub_req current_req(pg_id, random_req_id(), scrub_lsn, start_shard_id, 0, end_shard_id,
+                                                last_blob_id, scrub_type, hs_pg->home_obj_.our_uuid());
+            auto range_scrub_result = std::make_shared< ScrubManager::range_scrub_result >(
+                start_shard_id, 0, end_shard_id, last_blob_id, scrub_type, peer_id);
+
+            for (uint8_t retry_count = 0;;) {
+                if (cancelled.load()) {
+                    SCRUBLOGD(pg_id, task_id,
+                              "scrub task is cancelled, stop waiting for scrub result from peer {}, "
+                              "shard range: {} to {}, last_blob_id={}, scrub_lsn={}",
+                              peer_id, start_shard_id, end_shard_id, last_blob_id, scrub_lsn);
+                    break;
+                }
+
+                send_req_to_peer(current_req, peer_id);
+
+                // Drain stale results until we get the expected req_id or time out.
+                // Do NOT re-send on stale results — the peer already has the outstanding request.
+                // TODO::make the timeout here configurable
+                bool got_expected = false;
+                while (!got_expected) {
+                    if (!scrub_result_queue->tryReadUntil(std::chrono::steady_clock::now() + std::chrono::seconds{10},
+                                                          scrub_result)) {
+                        // timeout
+                        SCRUBLOGD(pg_id, task_id,
+                                  "did not receive scrub result from peer {} for shard range: {} to {}, "
+                                  "last_blob_id={}, scrub_lsn={}, try again",
+                                  peer_id, start_shard_id, end_shard_id, last_blob_id, scrub_lsn);
+                        retry_count++;
+
+                        // TODO::make the max retry count configurable
+                        if (retry_count > 5) {
+                            SCRUBLOGE(pg_id, task_id,
+                                      "did not receive scrub result from peer {} after {} retries, "
+                                      "shard range: {} to {}, last_blob_id={}, scrub_lsn={}, "
+                                      "cancel this scrub task",
+                                      peer_id, retry_count, start_shard_id, end_shard_id, last_blob_id, scrub_lsn);
+                            // this will cancel the entire task
+                            cancel();
+                        }
+                        break; // timed out — outer loop will re-send or detect cancellation
+                    }
+
+                    // TODO:: add more logic to check if the scrub result is the expected one, for example, we can
+                    // add the shard range and scrub_lsn in the scrub req and check if they are consistent with the
+                    // received scrub result
+                    if (scrub_result->req_id != current_req.req_id) {
+                        SCRUBLOGD(pg_id, task_id,
+                                  "received scrub result with unexpected req_id from peer {}, expected req_id={}, "
+                                  "actual req_id={}, scrub_type={}, drain and wait again",
+                                  peer_id, current_req.req_id, scrub_result->req_id, scrub_type);
+                        continue; // drain stale result and wait again — do NOT re-send
+                    }
+
+                    got_expected = true;
+                }
+
+                if (!got_expected) {
+                    continue; // timed out — outer loop re-sends or cancels
+                }
+
+                // Got the expected result. Reset retry count since the peer is responsive.
+                retry_count = 0;
+                SCRUBLOGD(pg_id, task_id, "blob scrub: received scrub result from peer {}, result entries count={}",
+                          peer_id, scrub_result->entries.size());
+
+                if (scrub_result->entries.empty()) {
+                    SCRUBLOGD(pg_id, task_id,
+                              "received empty scrub result from peer {}, shard range: {} to {}, "
+                              "last_blob_id={}, scrub_lsn={}, we consider this range scrub is completed and break "
+                              "the loop!",
+                              peer_id, start_shard_id, end_shard_id, last_blob_id, scrub_lsn);
+                    break;
+                }
+
+                const auto received_batch_size = scrub_result->entries.size();
+                SCRUBLOGD(pg_id, task_id,
+                          "received expected blob scrub result from peer {}, req_id={}, start_shard_id={}, "
+                          "start_blob_id={}, end_shard_id={}, end_blob_id={}, scrub_lsn={}, details={}",
+                          peer_id, current_req.req_id, current_req.start_shard_id, current_req.start_blob_id,
+                          current_req.end_shard_id, current_req.end_blob_id, scrub_lsn, scrub_result->to_string());
+
+                const auto last_shard_id_in_result = (scrub_result->entries).rbegin()->first.shard;
+                const auto last_blob_id_in_result = (scrub_result->entries).rbegin()->first.blob;
+
+                range_scrub_result->add_scrub_result(*scrub_result);
+                SCRUBLOGD(pg_id, task_id, "after adding new scrub_result for blob scrub, {}",
+                          range_scrub_result->to_string());
+
+                uint32_t batch_capacity = static_cast< uint32_t >(
+                    scrub_type == SCRUB_TYPE::SHALLOW_BLOB ? max_scrub_batch_size : deep_blob_scrub_batch_size);
+                if (received_batch_size < batch_capacity) {
+                    // if the received batch size is smaller than the batch capacity, it means the peer has no more data
+                    // to scrub in this range, we can consider the range scrub is completed for this peer and break the
+                    // loop to avoid unnecessary scrub req sending and waiting.
+                    SCRUBLOGD(pg_id, task_id,
+                              "received blob scrub result with batch size {} smaller than batch capacity {} from peer "
+                              "{}, shard range: {} to {}, last_blob_id={}, scrub_lsn={}, we consider this range scrub "
+                              "is completed and break the loop!",
+                              received_batch_size, batch_capacity, peer_id, start_shard_id, end_shard_id, last_blob_id,
+                              scrub_lsn);
+                    break;
+                }
+
+                RELEASE_ASSERT(
+                    received_batch_size == batch_capacity,
+                    "the received batch size {} should be equal to batch capacity {} for blob scrub, but it is not, "
+                    "peer {}, shard range: {} to {}, last_blob_id={}, scrub_lsn={}, scrub_type={}",
+                    received_batch_size, batch_capacity, peer_id, start_shard_id, end_shard_id, last_blob_id, scrub_lsn,
+                    scrub_type);
+
+                RELEASE_ASSERT(last_blob_id_in_result <= last_blob_id,
+                               "the last_blob_id_in_result {} should not be larger than last_blob_id {}, but it is, "
+                               "peer {}, shard range: {} to {}, scrub_lsn={}, scrub_type={}",
+                               last_blob_id_in_result, last_blob_id, peer_id, start_shard_id, end_shard_id, scrub_lsn,
+                               scrub_type);
+
+                if (last_blob_id_in_result == last_blob_id) {
+                    current_req.start_shard_id = last_shard_id_in_result + 1;
+                    current_req.start_blob_id = 0;
+                } else {
+                    current_req.start_shard_id = last_shard_id_in_result;
+                    current_req.start_blob_id = last_blob_id_in_result + 1;
+                }
+
+                // Completed when we've passed the last blob of the last shard in this batch.
+                if (current_req.start_shard_id > end_shard_id) {
+                    SCRUBLOGD(pg_id, task_id,
+                              "completed scrubbing blob for shard range: {} to {}, last_blob_id={}, scrub_lsn={}, "
+                              "peer {}",
+                              start_shard_id, end_shard_id, last_blob_id, scrub_lsn, peer_id);
+                    break;
+                }
+
+                // new req_id for the next scrub req
+                current_req.req_id = random_req_id();
+            }
+
+            if (cancelled.load()) {
+                promise.setException(folly::make_exception_wrapper< std::runtime_error >("cancelled"));
+            } else {
+                promise.setValue(range_scrub_result);
+            }
+        });
+    }
+
+    return folly::collectAllUnsafe(futs)
+        .thenValue([this, pg_id, &scrub_report](auto&& results) {
+            std::map< peer_id_t, std::shared_ptr< range_scrub_result > > peer_scrub_result_map;
+            for (auto& r : results) {
+                if (r.hasException()) {
+                    SCRUBLOGE(pg_id, task_id, "scrub blob batch is failed, error={}", r.exception().what());
+                    return false;
+                }
+                auto range_scrub_result = r.value();
+                if (!range_scrub_result) {
+                    SCRUBLOGE(pg_id, task_id, "scrub blob batch is failed, receive nullptr scrub result");
+                    return false;
+                }
+                peer_scrub_result_map.emplace(range_scrub_result->peer_id, range_scrub_result);
+
+                SCRUBLOGD(pg_id, task_id, "complete blob range scrub: {}", range_scrub_result->to_string());
+            }
+
+            // consolidate peer_scrub_result_map into scrub_report
+            scrub_report->merge(peer_scrub_result_map);
+
+            return true;
+        })
+        .get();
+}
+
+void ScrubManager::PGScrubContext::reconcile_scrub_report(std::shared_ptr< ShallowScrubReport > scrub_report) {
+    // A shard/blob reported missing may be a false positive from a concurrent deletion during scrubbing.
+    // For each peer that is tracked as HAVING the shard/blob (in the existence-tracking set), we re-check
+    // whether it still holds the item. If it no longer does (concurrent deletion), we remove that peer from
+    // the existence-tracking set; when the set empties the entry is dropped, eliminating the false positive.
+    const auto pg_id = hs_pg->pg_id();
+
+    // TODO::make this configurable
+    const uint8_t max_reconcile_retry_count = 3;
+    for (uint8_t retry_count = 0; retry_count < max_reconcile_retry_count; ++retry_count) {
+        if (cancelled.load()) {
+            SCRUBLOGD(pg_id, task_id, "scrub task cancelled, skip reconciliation");
+            return;
+        }
+
+        const auto missing_shards = scrub_report->get_missing_shard_ids();
+        const auto missing_blobs = scrub_report->get_missing_blobs();
+
+        if (missing_shards.empty() && missing_blobs.empty()) {
+            SCRUBLOGD(pg_id, task_id, "no missing shard/blob in scrub report, no need to reconcile");
+            return;
+        }
+
+        std::vector< folly::Future< folly::Unit > > reconcile_futs;
+
+        for (const auto& [shard_id, peer_set] : missing_shards) {
+            for (const auto& peer_id : peer_set) {
+                reconcile_futs.emplace_back(std::move(
+                    check_existence_in_peer(peer_id, {shard_id, 0}, false /* check blob */)
+                        .thenTry([this, pg_id, peer_id, shard_id, &scrub_report](folly::Try< bool > result) {
+                            if (result.hasException()) {
+                                SCRUBLOGE(pg_id, task_id,
+                                          "failed to check shard existence in peer {}, shard {}, error: {}", peer_id,
+                                          shard_id, result.exception().what());
+                                return;
+                            }
+
+                            const auto& exists = result.value();
+                            if (!exists) {
+                                SCRUBLOGD(pg_id, task_id,
+                                          "reconcile check: shard {} confirmed absent on peer {}, removing from "
+                                          "existence-tracking set",
+                                          shard_id, peer_id);
+                                scrub_report->remove_shard_existence_from_peer(shard_id, peer_id);
+                            } else {
+                                SCRUBLOGD(pg_id, task_id,
+                                          "reconcile check: shard {} still present on peer {}, no change", shard_id,
+                                          peer_id);
+                            }
+                        })));
+            }
+        }
+
+        for (const auto& [blob_route, peer_set] : missing_blobs) {
+            for (const auto& peer_id : peer_set) {
+                reconcile_futs.emplace_back(std::move(
+                    check_existence_in_peer(peer_id, blob_route, true /* check blob */)
+                        .thenTry([this, pg_id, peer_id, blob_route, &scrub_report](folly::Try< bool > result) {
+                            if (result.hasException()) {
+                                SCRUBLOGE(
+                                    pg_id, task_id,
+                                    "failed to check blob existence in peer {}, shard_id={}, blob_id={}, error: {}",
+                                    peer_id, blob_route.shard, blob_route.blob, result.exception().what());
+                                return;
+                            }
+
+                            const auto& exists = result.value();
+                            if (!exists) {
+                                SCRUBLOGD(pg_id, task_id,
+                                          "reconcile check: shard_id={}, blob_id={} confirmed absent on peer {}, "
+                                          "removing from existence-tracking set",
+                                          blob_route.shard, blob_route.blob, peer_id);
+                                scrub_report->remove_blob_existence_from_peer(blob_route, peer_id);
+                            } else {
+                                SCRUBLOGD(
+                                    pg_id, task_id,
+                                    "reconcile check: shard_id={}, blob_id={} still present on peer {}, no change",
+                                    blob_route.shard, blob_route.blob, peer_id);
+                            }
+                        })));
+            }
+        }
+
+        folly::collectAllUnsafe(reconcile_futs).wait();
+    }
+
+    const auto remaining_missing_shards = scrub_report->get_missing_shard_ids().size();
+    const auto remaining_missing_blobs = scrub_report->get_missing_blobs().size();
+    if (remaining_missing_shards || remaining_missing_blobs) {
+        SCRUBLOGW(pg_id, task_id,
+                  "reconciliation finished after {} retries but {} missing shards and {} missing blobs remain",
+                  max_reconcile_retry_count, remaining_missing_shards, remaining_missing_blobs);
+    } else {
+        SCRUBLOGD(pg_id, task_id, "reconciliation cleared all missing items after {} retries",
+                  max_reconcile_retry_count);
+    }
+}
+
+folly::Future< bool > ScrubManager::PGScrubContext::check_existence_in_peer(peer_id_t peer_id, BlobRoute blob,
+                                                                            bool check_blob) {
+    auto [promise, future] = folly::makePromiseContract< bool >();
+    const auto pg_id = hs_pg->pg_id();
+
+    auto repl_dev = hs_pg->repl_dev_;
+    if (!repl_dev) {
+        promise.setException(folly::make_exception_wrapper< std::runtime_error >("repl dev is not available"));
+        return std::move(future).via(&folly::InlineExecutor::instance());
+    }
+
+    ScrubManager::scrub_req check_blob_req;
+    check_blob_req.start_shard_id = blob.shard;
+    check_blob_req.start_blob_id = blob.blob;
+    check_blob_req.scrub_type = check_blob ? SCRUB_TYPE::CHECK_BLOB_EXISTENCE : SCRUB_TYPE::CHECK_SHARD_EXISTENCE;
+
+    auto flatbuffer = check_blob_req.build_flat_buffer();
+    sisl::io_blob_list_t blob_list;
+    blob_list.emplace_back(flatbuffer.data(), flatbuffer.size(), false);
+
+    const auto check_type_str = check_blob ? "blob" : "shard";
+
+    // this is a bidirectional request, no need to add a req_id.
+    repl_dev->data_request_bidirectional(peer_id, HSHomeObject::PUSH_SCRUB_REQ, blob_list)
+        .via(&folly::InlineExecutor::instance())
+        .thenValue([pg_id, peer_id, task_id = this->task_id, blob, check_type_str, flatbuffer = std::move(flatbuffer),
+                    promise = std::move(promise)](auto&& response) mutable {
+            if (response.hasError()) {
+                SCRUBLOGE(pg_id, task_id, "failed to check {} existence in peer {}, blob {}, error code: {}",
+                          check_type_str, peer_id, blob, static_cast< int >(response.error()));
+                promise.setException(
+                    folly::make_exception_wrapper< std::runtime_error >("rpc bidirectional request failed"));
+            } else {
+                const auto& resp_blob = response.value().response_blob();
+                if (resp_blob.size() != sizeof(bool)) {
+                    SCRUBLOGE(pg_id, task_id,
+                              "invalid response for {} existence check from peer {}, blob {}, response size={}",
+                              check_type_str, peer_id, blob, resp_blob.size());
+                    promise.setException(
+                        folly::make_exception_wrapper< std::runtime_error >("invalid response for existence check"));
+                } else {
+                    const bool exists = *reinterpret_cast< const bool* >(resp_blob.cbytes());
+                    SCRUBLOGD(pg_id, task_id,
+                              "successfully checked {} existence in peer {}, shard_id={}, blob_id={}, exists={}",
+                              check_type_str, peer_id, blob.shard, blob.blob, exists);
+                    promise.setValue(exists);
+                }
+            }
+        });
+
+    return std::move(future).via(&folly::InlineExecutor::instance());
+}
+
+uint64_t ScrubManager::PGScrubContext::random_req_id() const {
+    static std::atomic< uint64_t > ctr{0};
+
+    static const uint64_t seed = []() -> uint64_t {
+        std::random_device rd;
+        uint64_t s = (uint64_t(rd()) << 32) ^ uint64_t(rd());
+        return s ? s : 0x123456789abcdef0ULL;
+    }();
+
+    auto splitmix64 = [](uint64_t x) -> uint64_t {
+        x += 0x9e3779b97f4a7c15ULL;
+        x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9ULL;
+        x = (x ^ (x >> 27)) * 0x94d049bb133111ebULL;
+        return x ^ (x >> 31);
+    };
+
+    uint64_t x = ctr.fetch_add(1, std::memory_order_relaxed) ^ seed;
+    return splitmix64(x);
+}
+
+flatbuffers::DetachedBuffer ScrubManager::scrub_req::build_flat_buffer() const {
+    flatbuffers::FlatBufferBuilder builder;
+
+    // Convert peer_id_t (boost::uuids::uuid, 16 bytes) to a byte vector for the issuer_uuid field.
+    // issuer_uuid is [ubyte:16] in the schema; if issuer_peer_id is nil (all-zero), it is still
+    // serialized so the receiver can always read a consistent value.
+    std::vector< uint8_t > uuid_bytes(issuer_peer_id.begin(), issuer_peer_id.end());
+    auto issuer_uuid_offset = builder.CreateVector(uuid_bytes);
+
+    // Build the ScrubReq table.  All scalar fields are written unconditionally; absent values on
+    // the reader side will fall back to FlatBuffers defaults (0 / ScrubType::META).
+    auto req_offset =
+        CreateScrubReq(builder, static_cast< uint16_t >(pg_id), req_id, scrub_lsn, start_shard_id, start_blob_id,
+                       end_shard_id, end_blob_id, issuer_uuid_offset, static_cast< ScrubType >(scrub_type));
+
+    builder.FinishSizePrefixed(req_offset);
+    return builder.Release();
+}
+
+bool ScrubManager::scrub_req::load(uint8_t const* buf_ptr, const uint32_t buf_size) {
+    if (!buf_ptr || buf_size == 0) {
+        LOGERRORMOD(scrubmgr, "scrub_req::load called with null or empty buffer");
+        return false;
+    }
+
+    auto scrub_req_fb = GetSizePrefixedScrubReq(buf_ptr);
+    if (!scrub_req_fb) {
+        LOGERRORMOD(scrubmgr, "scrub_req::load: GetSizePrefixedScrubReq returned null");
+        return false;
+    }
+
+    // Scalar fields always carry a value (FlatBuffers default = 0 when absent in the wire format).
+    pg_id = static_cast< pg_id_t >(scrub_req_fb->pg_id());
+    req_id = scrub_req_fb->req_id();
+    scrub_lsn = scrub_req_fb->scrub_lsn();
+    start_shard_id = scrub_req_fb->start_shard_id();
+    start_blob_id = scrub_req_fb->start_blob_id();
+    end_shard_id = scrub_req_fb->end_shard_id();
+    end_blob_id = scrub_req_fb->end_blob_id();
+    scrub_type = static_cast< SCRUB_TYPE >(scrub_req_fb->scrub_type());
+
+    // issuer_uuid is a vector field and may be absent (nullptr) when the sender did not set it.
+    // In that case issuer_peer_id keeps its default-initialized nil UUID.
+    if (auto uuid_vec = scrub_req_fb->issuer_uuid(); uuid_vec != nullptr) {
+        const auto copy_len = std::min(static_cast< size_t >(uuid_vec->size()), issuer_peer_id.size());
+        std::copy_n(uuid_vec->data(), copy_len, issuer_peer_id.begin());
+    }
+
+    return true;
+}
+
+flatbuffers::DetachedBuffer ScrubManager::scrub_result::build_flat_buffer() const {
+    flatbuffers::FlatBufferBuilder builder;
+
+    std::vector< flatbuffers::Offset< ScrubResultEntry > > entry_offsets;
+    entry_offsets.reserve(entries.size());
+
+    {
+        std::lock_guard lock(mutex_);
+        for (const auto& [route, result_variant] : entries) {
+            ScrubStatus status;
+            uint64_t hash = 0;
+            if (std::holds_alternative< uint64_t >(result_variant)) {
+                status = ScrubStatus::NONE;
+                hash = std::get< uint64_t >(result_variant);
+            } else {
+                status = std::get< ScrubStatus >(result_variant);
+            }
+            entry_offsets.push_back(CreateScrubResultEntry(builder, route.shard, route.blob, status, hash));
+        }
+    }
+
+    auto entries_offset = builder.CreateVector(entry_offsets);
+
+    std::vector< uint8_t > uuid_bytes(issuer_peer_id.begin(), issuer_peer_id.end());
+    auto uuid_offset = builder.CreateVector(uuid_bytes);
+
+    auto result_offset = CreateScrubResult(builder, req_id, uuid_offset, entries_offset);
+    builder.FinishSizePrefixed(result_offset);
+    return builder.Release();
+}
+
+void ScrubManager::scrub_result::add_entry(const scrub_result_entry& entry) {
+    BlobRoute route{entry.shard_id, entry.blob_id};
+    std::lock_guard lock(mutex_);
+    RELEASE_ASSERT(entries.find(route) == entries.end(), "duplicate scrub result entry for {}", route);
+    entries[route] = entry.status_or_hash;
+}
+
+bool ScrubManager::scrub_result::load(uint8_t const* buf_ptr, uint32_t buf_size) {
+    if (!buf_ptr || buf_size == 0) {
+        LOGERRORMOD(scrubmgr, "scrub_result::load called with null or empty buffer");
+        return false;
+    }
+
+    auto result_fb = GetSizePrefixedScrubResult(buf_ptr);
+    if (!result_fb) {
+        LOGERRORMOD(scrubmgr, "scrub_result::load: GetSizePrefixedScrubResult returned null");
+        return false;
+    }
+
+    req_id = result_fb->req_id();
+
+    // issuer_uuid is a vector field — absent (nullptr) when the sender omitted it;
+    // in that case issuer_peer_id keeps its default-initialized nil UUID.
+    if (auto uuid_vec = result_fb->issuer_uuid(); uuid_vec != nullptr) {
+        const auto copy_len = std::min(static_cast< size_t >(uuid_vec->size()), issuer_peer_id.size());
+        std::copy_n(uuid_vec->data(), copy_len, issuer_peer_id.begin());
+    }
+
+    {
+        std::lock_guard lock(mutex_);
+        entries.clear();
+        if (auto results_vec = result_fb->scrub_results(); results_vec != nullptr) {
+            for (const auto* entry_fb : *results_vec) {
+                if (!entry_fb) { continue; }
+                BlobRoute route{entry_fb->shard_id(), entry_fb->blob_id()};
+                std::variant< ScrubStatus, uint64_t > result_variant;
+                if (entry_fb->scrub_result() == ScrubStatus::NONE) {
+                    result_variant = entry_fb->hash();
+                } else {
+                    result_variant = static_cast< ScrubStatus >(entry_fb->scrub_result());
+                }
+                entries.emplace(route, std::move(result_variant));
+            }
+        }
+    }
+
+    return true;
+}
+
+std::string ScrubManager::scrub_result::to_string() const {
+    std::lock_guard lock(mutex_);
+    std::stringstream ss;
+    ss << "scrub_result[req_id=" << req_id << ",issuer=" << issuer_peer_id << ",entries_count=" << entries.size()
+       << ",entries={";
+    bool first = true;
+    for (const auto& [route, result_variant] : entries) {
+        if (!first) ss << ",";
+        ss << "shard=" << static_cast< uint64_t >(route.shard) << ",blob=" << static_cast< uint64_t >(route.blob)
+           << ":";
+        if (std::holds_alternative< uint64_t >(result_variant)) {
+            ss << fmt::format("{:016x}", std::get< uint64_t >(result_variant));
+        } else {
+            ss << scrub_result_to_string(std::get< ScrubStatus >(result_variant));
+        }
+        first = false;
+    }
+    ss << "}]";
+    return ss.str();
+}
+
+std::string ScrubManager::range_scrub_result::to_string() const {
+    std::stringstream ss;
+    ss << "range_scrub_result[peer=" << peer_id << ",scrub_type=" << scrub_type << ",range=[shard=" << start_shard_id
+       << ",blob=" << start_blob_id << "]->[shard=" << end_shard_id << ",blob=" << end_blob_id
+       << "],results_count=" << results.size() << ",results={";
+    bool first = true;
+    for (const auto& [route, result_variant] : results) {
+        if (!first) ss << ",";
+        ss << "shard=" << static_cast< uint64_t >(route.shard) << ",blob=" << static_cast< uint64_t >(route.blob)
+           << ":";
+        if (std::holds_alternative< uint64_t >(result_variant)) {
+            ss << fmt::format("{:016x}", std::get< uint64_t >(result_variant));
+        } else {
+            ss << scrub_result_to_string(std::get< ScrubStatus >(result_variant));
+        }
+        first = false;
+    }
+    ss << "}]";
+    return ss.str();
+}
+
+//=========================== Scrub Report Merge Functions ===========================//
+
+void ScrubManager::MetaScrubReport::print() const {
+    std::stringstream ss;
+    ss << "MetaScrubReport for pg=" << pg_id_ << " | ";
+
+    ss << "CorruptedPgMeta={";
+    bool first = true;
+    for (const auto& [peer_id, scrub_status] : corrupted_pg_metas) {
+        if (!first) ss << ",";
+        ss << "peer=" << peer_id << "(" << scrub_result_to_string(scrub_status) << ")";
+        first = false;
+    }
+    ss << "} | ";
+
+    ss << "CorruptedShardMeta={";
+    first = true;
+    for (const auto& [peer_id, shard_map] : corrupted_shard_metas) {
+        if (!first) ss << ",";
+        ss << "peer=" << peer_id << ":[";
+        bool inner_first = true;
+        for (const auto& [shard_id, scrub_status] : shard_map) {
+            if (!inner_first) ss << ",";
+            ss << shard_id << "(" << scrub_result_to_string(scrub_status) << ")";
+            inner_first = false;
+        }
+        ss << "]";
+        first = false;
+    }
+    ss << "} | ";
+
+    ss << "InconsistentShardMeta={";
+    first = true;
+    for (const auto& [shard_id, peer_hash_map] : inconsistent_shard_metas) {
+        if (!first) ss << ",";
+        ss << "shard=" << shard_id << ":[";
+        bool inner_first = true;
+        for (const auto& [peer_id, hash] : peer_hash_map) {
+            if (!inner_first) ss << ",";
+            ss << "peer=" << peer_id << fmt::format("(hash={:016x})", hash);
+            inner_first = false;
+        }
+        ss << "]";
+        first = false;
+    }
+    ss << "} | ";
+
+    ss << "MissingShards={";
+    first = true;
+    for (const auto& [shard_id, peer_set] : missing_shard_ids) {
+        if (!first) ss << ",";
+        ss << "shard=" << shard_id << ":[";
+        bool inner_first = true;
+        for (const auto& peer_id : peer_set) {
+            if (!inner_first) ss << ",";
+            ss << peer_id;
+            inner_first = false;
+        }
+        ss << "]";
+        first = false;
+    }
+    ss << "}";
+
+    LOGINFOMOD(scrubmgr, "{}", ss.str());
+}
+
+bool ScrubManager::MetaScrubReport::merge(
+    const std::map< peer_id_t, std::shared_ptr< range_scrub_result > >& peer_scrub_result_map) {
+    if (peer_scrub_result_map.empty()) {
+        LOGWARNMOD(scrubmgr, "[pg={}] MetaScrubReport::merge: empty map, skip", pg_id_);
+        return false;
+    }
+
+    // All entries must have scrub_type == META and identical range fields.
+    const auto& ref = peer_scrub_result_map.begin()->second;
+    if (!ref || ref->scrub_type != SCRUB_TYPE::META) {
+        LOGWARNMOD(scrubmgr, "[pg={}] MetaScrubReport::merge: first entry is null or not META, skip", pg_id_);
+        return false;
+    }
+    for (auto it = std::next(peer_scrub_result_map.cbegin()); it != peer_scrub_result_map.cend(); ++it) {
+        const auto& [peer_id, rsr] = *it;
+        if (!ref->match(rsr)) {
+            LOGWARNMOD(scrubmgr,
+                       "[pg={}] MetaScrubReport::merge: null, wrong type, or mismatched range from peer {}, skip",
+                       pg_id_, peer_id);
+            return false;
+        }
+    }
+
+    std::map< shard_id_t, std::map< peer_id_t, uint64_t > > shard_hash_map;
+    std::set< peer_id_t > shard_reporting_peers;
+    std::map< shard_id_t, std::set< peer_id_t > > shard_present_map;
+
+    for (const auto& [peer_id, range_result] : peer_scrub_result_map) {
+        shard_reporting_peers.insert(peer_id);
+        for (const auto& [route, result_variant] : range_result->results) {
+            const auto shard_id = route.shard;
+            shard_present_map[shard_id].insert(peer_id);
+
+            // For META results, healthy shards always carry uint64_t (the active-blob count used
+            // as a placeholder hash); only a non-NONE ScrubStatus indicates corruption.
+            // ScrubStatus::NONE does not appear in META results: local_scrub_meta stores uint64_t(0)
+            // directly, and scrub_result::load converts any on-wire NONE to uint64_t via the hash field.
+            if (std::holds_alternative< ScrubStatus >(result_variant)) {
+                auto status = std::get< ScrubStatus >(result_variant);
+                if (status != ScrubStatus::NONE) {
+                    if (shard_id == 0) {
+                        corrupted_pg_metas[peer_id] = status;
+                    } else {
+                        corrupted_shard_metas[peer_id][shard_id] = status;
+                    }
+                    LOGWARNMOD(scrubmgr, "[pg={}] find corruption for META shard={} peer={}", pg_id_, shard_id,
+                               peer_id);
+                }
+                continue;
+            }
+
+            // uint64_t: shard is healthy; use its hash value for cross-peer consistency comparison.
+            shard_hash_map[shard_id][peer_id] = std::get< uint64_t >(result_variant);
+        }
+    }
+
+    // Detect shard meta inconsistency across peers.
+    // Note: peers that reported corruption (IO_ERROR/MISMATCH) are excluded from shard_hash_map and therefore
+    // from this check. Corruption and inconsistency are tracked separately; callers must correlate
+    // corrupted_shard_metas with inconsistent_shard_metas to get the full picture.
+    for (const auto& [shard_id, peer_hash_map] : shard_hash_map) {
+        if (peer_hash_map.size() > 1) {
+            const uint64_t ref_hash = peer_hash_map.begin()->second;
+            bool consistent = std::all_of(peer_hash_map.begin(), peer_hash_map.end(),
+                                          [ref_hash](const auto& kv) { return kv.second == ref_hash; });
+            if (!consistent) {
+                for (const auto& [peer_id, hash] : peer_hash_map) {
+                    inconsistent_shard_metas[shard_id][peer_id] = hash;
+                }
+            }
+        }
+    }
+
+    // Detect missing shards: shards seen by some peers but absent on others.
+    // Record which peers HAVE the shard (existence-tracking set; see missing_shard_ids semantics in hpp).
+    // shard_id=0 (pg_meta) is intentionally excluded: its corruption is captured in
+    // corrupted_pg_metas, and reconcile_scrub_report uses {shard_id, 0} as the existence-
+    // check route, which would be wrong for pg_meta whose route is {0, total_shards}.
+    for (const auto& [shard_id, peer_set] : shard_present_map) {
+        if (shard_id) {
+            if (peer_set.size() < shard_reporting_peers.size()) {
+                RELEASE_ASSERT(missing_shard_ids.find(shard_id) == missing_shard_ids.end(),
+                               "shard_id {} should not already exist in missing_shard_ids", shard_id);
+
+                missing_shard_ids[shard_id] = peer_set;
+            }
+        }
+
+        // shard_id == 0 represents pg_meta, which is not a real shard and should not be treated as missing even if some
+        // peers don't report it.
+    }
+
+    LOGINFOMOD(scrubmgr,
+               "[pg={}] Meta scrub merge completed: {} corrupted shard metas, {} inconsistent shard metas, "
+               "{} peers with missing shards",
+               pg_id_, corrupted_shard_metas.size(), inconsistent_shard_metas.size(), missing_shard_ids.size());
+    return true;
+}
+
+void ScrubManager::MetaScrubReport::remove_shard_existence_from_peer(shard_id_t shard_id, peer_id_t peer) {
+    std::lock_guard lock(mutex_);
+    auto it = missing_shard_ids.find(shard_id);
+    if (it != missing_shard_ids.end()) {
+        it->second.erase(peer);
+        if (it->second.empty()) { missing_shard_ids.erase(it); }
+    }
+}
+
+void ScrubManager::ShallowScrubReport::print() const {
+    MetaScrubReport::print();
+    std::stringstream ss;
+    ss << "ShallowScrubReport for pg=" << pg_id_ << " | MissingBlobs={";
+    bool first = true;
+    for (const auto& [blob_route, peer_set] : missing_blobs) {
+        if (!first) ss << ",";
+        ss << fmt::format("{}", blob_route) << ":[";
+        bool inner = true;
+        for (const auto& peer_id : peer_set) {
+            if (!inner) ss << ",";
+            ss << peer_id;
+            inner = false;
+        }
+        ss << "]";
+        first = false;
+    }
+    ss << "}";
+    LOGINFOMOD(scrubmgr, "{}", ss.str());
+}
+
+bool ScrubManager::ShallowScrubReport::merge(
+    const std::map< peer_id_t, std::shared_ptr< range_scrub_result > >& peer_scrub_result_map) {
+    if (peer_scrub_result_map.empty()) {
+        LOGWARNMOD(scrubmgr, "[pg={}] ShallowScrubReport::merge: empty map, skip", pg_id_);
+        return false;
+    }
+
+    const auto& ref = peer_scrub_result_map.begin()->second;
+    if (!ref) {
+        LOGWARNMOD(scrubmgr, "[pg={}] ShallowScrubReport::merge: first entry is null, skip", pg_id_);
+        return false;
+    }
+
+    // META results are fully handled by the base class.
+    if (ref->scrub_type == SCRUB_TYPE::META) { return MetaScrubReport::merge(peer_scrub_result_map); }
+
+    RELEASE_ASSERT(ref->scrub_type == SCRUB_TYPE::SHALLOW_BLOB || ref->scrub_type == SCRUB_TYPE::DEEP_BLOB,
+                   "unexpected scrub_type {} in ShallowScrubReport::merge", (int)ref->scrub_type);
+
+    for (auto it = std::next(peer_scrub_result_map.cbegin()); it != peer_scrub_result_map.cend(); ++it) {
+        const auto& [peer_id, rsr] = *it;
+        if (!ref->match(rsr)) {
+            LOGWARNMOD(scrubmgr,
+                       "[pg={}] ShallowScrubReport::merge: null, wrong type, or mismatched range from peer {}, skip",
+                       pg_id_, peer_id);
+            return false;
+        }
+    }
+
+    // Detect missing blobs: track which peers reported each blob, find absent ones
+    std::map< BlobRoute, std::set< peer_id_t > > blob_peers_map;
+    for (const auto& [peer_id, range_result] : peer_scrub_result_map) {
+        if (!range_result) continue;
+        for (const auto& [route, result_variant] : range_result->results) {
+            blob_peers_map[route].insert(peer_id);
+        }
+    }
+
+    for (const auto& [blob_route, peer_set] : blob_peers_map) {
+        if (peer_set.size() < peer_scrub_result_map.size()) {
+            RELEASE_ASSERT(missing_blobs.find(blob_route) == missing_blobs.end(),
+                           "blob_route {} should not already exist in missing_blobs", blob_route);
+            missing_blobs[blob_route] = peer_set;
+        }
+    }
+
+    LOGDEBUGMOD(scrubmgr, "[pg={}] Shallow scrub merge completed!", pg_id_);
+    return true;
+}
+
+void ScrubManager::ShallowScrubReport::remove_blob_existence_from_peer(BlobRoute blob_route, peer_id_t peer) {
+    std::lock_guard lock(mutex_);
+    auto it = missing_blobs.find(blob_route);
+    if (it != missing_blobs.end()) {
+        it->second.erase(peer);
+        if (it->second.empty()) { missing_blobs.erase(it); }
+    }
+}
+
+void ScrubManager::DeepScrubReport::print() const {
+    ShallowScrubReport::print();
+
+    std::stringstream ss;
+    ss << "DeepScrubReport for pg=" << pg_id_ << " | CorruptedBlobs={";
+    bool first = true;
+    for (const auto& [peer_id, blob_map] : corrupted_blobs) {
+        if (!first) ss << ",";
+        ss << "peer=" << peer_id << ":[";
+        bool inner = true;
+        for (const auto& [blob_route, scrub_result] : blob_map) {
+            if (!inner) ss << ",";
+            ss << fmt::format("{}", blob_route) << "(" << scrub_result_to_string(scrub_result) << ")";
+            inner = false;
+        }
+        ss << "]";
+        first = false;
+    }
+    ss << "} | InconsistentBlobs={";
+    first = true;
+    for (const auto& [blob_route, peer_hash_map] : inconsistent_blobs) {
+        if (!first) ss << ",";
+        ss << fmt::format("{}", blob_route) << ":[";
+        bool inner = true;
+        for (const auto& [peer_id, hash] : peer_hash_map) {
+            if (!inner) ss << ",";
+            ss << "peer=" << peer_id << fmt::format("(hash={:016x})", hash);
+            inner = false;
+        }
+        ss << "]";
+        first = false;
+    }
+    ss << "}";
+    LOGINFOMOD(scrubmgr, "{}", ss.str());
+}
+
+bool ScrubManager::DeepScrubReport::merge(
+    const std::map< peer_id_t, std::shared_ptr< range_scrub_result > >& peer_scrub_result_map) {
+    if (peer_scrub_result_map.empty()) {
+        LOGWARNMOD(scrubmgr, "[pg={}] DeepScrubReport::merge: empty map, skip", pg_id_);
+        return false;
+    }
+
+    const auto& ref = peer_scrub_result_map.begin()->second;
+    if (!ref) {
+        LOGWARNMOD(scrubmgr, "[pg={}] DeepScrubReport::merge: first entry is null, skip", pg_id_);
+        return false;
+    }
+
+    const auto scrub_type = ref->scrub_type;
+
+    // META and SHALLOW_BLOB results are fully handled by parent classes; no deep-specific work needed.
+    if (scrub_type != SCRUB_TYPE::DEEP_BLOB) { return ShallowScrubReport::merge(peer_scrub_result_map); }
+
+    // DEEP_BLOB: first detect missing blobs via ShallowScrubReport (which also validates the range),
+    // then add corrupted-blob and hash-inconsistency detection.
+    if (!ShallowScrubReport::merge(peer_scrub_result_map)) { return false; }
+
+    // Detect corrupted blobs (non-NONE scrub_result) reported by any peer.
+    // Detect hash inconsistencies among healthy blobs in the same pass.
+    std::map< BlobRoute, std::map< peer_id_t, uint64_t > > hash_map_per_blob;
+    for (const auto& [peer_id, range_result] : peer_scrub_result_map) {
+        if (!range_result) continue;
+        for (const auto& [blob_route, result_variant] : range_result->results) {
+            if (std::holds_alternative< ScrubStatus >(result_variant)) {
+                auto status = std::get< ScrubStatus >(result_variant);
+                if (status != ScrubStatus::NONE) {
+                    corrupted_blobs[peer_id][blob_route] = status;
+                    LOGWARNMOD(scrubmgr, "[pg={}] find corruption for blob shard_id={}, blob_id={}, peer={}", pg_id_,
+                               blob_route.shard, blob_route.blob, peer_id);
+                }
+            } else {
+                hash_map_per_blob[blob_route][peer_id] = std::get< uint64_t >(result_variant);
+            }
+        }
+    }
+
+    // Note: peers that reported corruption (IO_ERROR/MISMATCH) have no hash entry in hash_map_per_blob.
+    // Hash inconsistency therefore requires ≥2 healthy peers for detection. Corruption is captured
+    // separately in corrupted_blobs.
+    for (const auto& [blob_route, hash_map] : hash_map_per_blob) {
+        if (hash_map.size() > 1) {
+            uint64_t ref_hash = hash_map.begin()->second;
+            bool consistent = std::all_of(hash_map.begin(), hash_map.end(),
+                                          [ref_hash](const auto& kv) { return kv.second == ref_hash; });
+            if (!consistent) {
+                for (const auto& [peer_id, hash_val] : hash_map) {
+                    inconsistent_blobs[blob_route][peer_id] = hash_val;
+                }
+            }
+        }
+    }
+
+    LOGINFOMOD(scrubmgr,
+               "[pg={}] Deep blob scrub merge completed: {} missing blobs, {} corrupted blobs, {} inconsistent blobs",
+               pg_id_, missing_blobs.size(), corrupted_blobs.size(), inconsistent_blobs.size());
+    return true;
+}
+
+} // namespace homeobject
\ No newline at end of file
diff --git a/src/lib/homestore_backend/scrub_manager.hpp b/src/lib/homestore_backend/scrub_manager.hpp
new file mode 100644
index 000000000..0dd651645
--- /dev/null
+++ b/src/lib/homestore_backend/scrub_manager.hpp
@@ -0,0 +1,366 @@
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wuninitialized"
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#include <folly/futures/Future.h>
+#include <folly/concurrency/ConcurrentHashMap.h>
+#include <folly/executors/IOThreadPoolExecutor.h>
+#include <folly/MPMCQueue.h>
+#pragma GCC diagnostic pop
+
+#include <iomgr/iomgr.hpp>
+#include "homeobject/common.hpp"
+#include <homestore/blk.h>
+#include <homestore/superblk_handler.hpp>
+#include "lib/blob_route.hpp"
+#include "MPMCPriorityQueue.hpp"
+#include "generated/scrub_common_generated.h"
+#include "generated/scrub_req_generated.h"
+#include "generated/scrub_result_generated.h"
+
+namespace homeobject {
+
+class HSHomeObject;
+
+ENUM(SCRUB_TRIGGER_TYPE, uint8_t, PERIODICALLY = 0, MANUALLY);
+ENUM(SCRUB_TYPE, uint8_t, META = 0, DEEP_BLOB, SHALLOW_BLOB, CHECK_BLOB_EXISTENCE, CHECK_SHARD_EXISTENCE);
+
+inline const char* scrub_result_to_string(ScrubStatus type) {
+    switch (type) {
+    case ScrubStatus::NONE:
+        return "NONE";
+    case ScrubStatus::IO_ERROR:
+        return "IO_ERROR";
+    case ScrubStatus::MISMATCH:
+        return "MISMATCH";
+    case ScrubStatus::NOT_FOUND:
+        return "NOT_FOUND";
+    default:
+        return "UNKNOWN";
+    }
+}
+
+class ScrubManager {
+public:
+    explicit ScrubManager(HSHomeObject* homeobject);
+    ~ScrubManager();
+
+    // Disallow copy and move
+    ScrubManager(const ScrubManager&) = delete;
+    ScrubManager(ScrubManager&&) = delete;
+    ScrubManager& operator=(const ScrubManager&) = delete;
+    ScrubManager& operator=(ScrubManager&&) = delete;
+
+    // pg scrub superblk
+    // TODO:: move this into pg_super_blk with a separate PR, since this is not a backward incompatible change.
+    // other backward incompatible meta change will be:
+    // 1 shard sealed lsn.
+
+#pragma pack(1)
+    struct pg_scrub_superblk {
+        uint64_t last_deep_scrub_timestamp{0};
+        uint64_t last_shallow_scrub_timestamp{0};
+        pg_id_t pg_id{0};
+        static std::string name() { return pg_scrub_meta_name; }
+    };
+#pragma pack()
+
+    // scrub req
+    struct scrub_req {
+        scrub_req() = default;
+        ~scrub_req() = default;
+
+        scrub_req(pg_id_t pg_id, uint64_t req_id, int64_t scrub_lsn, uint64_t start_shard_id, uint64_t start_blob_id,
+                  uint64_t end_shard_id, uint64_t end_blob_id, SCRUB_TYPE scrub_type, peer_id_t issuer_peer_id) :
+                pg_id{pg_id},
+                req_id{req_id},
+                scrub_lsn{scrub_lsn},
+                start_shard_id{start_shard_id},
+                start_blob_id{start_blob_id},
+                end_shard_id{end_shard_id},
+                end_blob_id{end_blob_id},
+                scrub_type{scrub_type},
+                issuer_peer_id{issuer_peer_id} {}
+
+        flatbuffers::DetachedBuffer build_flat_buffer() const;
+        bool load(uint8_t const* buf_ptr, uint32_t buf_size);
+
+        pg_id_t pg_id{0};
+        uint64_t req_id{0};
+        int64_t scrub_lsn{0};
+        uint64_t start_shard_id{0};
+        uint64_t start_blob_id{0};
+        uint64_t end_shard_id{0};
+        uint64_t end_blob_id{0};
+        SCRUB_TYPE scrub_type{SCRUB_TYPE::META};
+        peer_id_t issuer_peer_id{};
+    };
+
+    class range_scrub_result;
+
+    // Maps to the ScrubResult / ScrubResultEntry tables in scrub_result.fbs.
+    // One entry inside a ScrubResult — maps to the ScrubResultEntry FlatBuffers table.
+    struct scrub_result_entry {
+        shard_id_t shard_id{0};
+        blob_id_t blob_id{0};
+
+        // only when ScrubStatus is NONE, we store the CRC64 hash
+        std::variant< ScrubStatus, uint64_t > status_or_hash{ScrubStatus::NONE};
+    };
+
+    // Full result for one scrub request — maps to the ScrubResult FlatBuffers table.
+    class scrub_result {
+    public:
+        scrub_result() = default;
+        scrub_result(uint64_t req_id, peer_id_t issuer_peer_id) : req_id{req_id}, issuer_peer_id{issuer_peer_id} {}
+        ~scrub_result() = default;
+
+        void add_entry(const scrub_result_entry& entry);
+        flatbuffers::DetachedBuffer build_flat_buffer() const;
+        bool load(uint8_t const* buf_ptr, uint32_t buf_size);
+        std::string to_string() const;
+
+        uint64_t req_id{0};
+        peer_id_t issuer_peer_id{};
+
+    private:
+        friend class ScrubManager;
+        friend class PGScrubContext;
+        friend class range_scrub_result;
+        std::map< BlobRoute, std::variant< ScrubStatus, uint64_t > > entries;
+        mutable std::mutex mutex_;
+    };
+
+    // Aggregated results across a contiguous range of shards/blobs, which collects multiple scrub_result objects (one
+    // per scrub req) that together cover [start_shard_id, start_shard_id] to [end_shard_id, end_blob_id].
+    class range_scrub_result {
+    public:
+        range_scrub_result(uint64_t start_shard_id, uint64_t start_blob_id, uint64_t end_shard_id, uint64_t end_blob_id,
+                           SCRUB_TYPE scrub_type, peer_id_t peer_id) :
+                start_shard_id{start_shard_id},
+                start_blob_id{start_blob_id},
+                end_shard_id{end_shard_id},
+                end_blob_id{end_blob_id},
+                scrub_type{scrub_type},
+                peer_id{peer_id} {}
+        ~range_scrub_result() = default;
+
+        bool match(const std::shared_ptr< range_scrub_result >& other) const {
+            return other && scrub_type == other->scrub_type && start_shard_id == other->start_shard_id &&
+                start_blob_id == other->start_blob_id && end_shard_id == other->end_shard_id &&
+                end_blob_id == other->end_blob_id;
+        }
+
+        std::string to_string() const;
+
+        // this will only be called in a single thread, add a lock if we want to make it thread safe.
+        // TODO:: add a lock if we want to make it thread safe, but currently the design is one range_scrub_result per
+        // scrub req, so it should be only accessed by one thread.
+        void add_scrub_result(scrub_result& result) {
+            results.merge(result.entries);
+
+            // the newly added result should not overlap with existing results in the range_scrub_result, otherwise it
+            // means there are duplicate scrub reqs covering the same shard/blob range, which should not happen.
+            RELEASE_ASSERT(result.entries.empty(),
+                           "should not have duplicate blob route in range_scrub_result, scrub_type={}, "
+                           "start_shard_id={}, start_blob_id={}, end_shard_id={}, end_blob_id={}, req_id={}",
+                           (int)scrub_type, start_shard_id, start_blob_id, end_shard_id, end_blob_id, result.req_id);
+        }
+
+        uint64_t start_shard_id{0};
+        uint64_t start_blob_id{0};
+        uint64_t end_shard_id{0};
+        uint64_t end_blob_id{0};
+        SCRUB_TYPE scrub_type{SCRUB_TYPE::META};
+        peer_id_t peer_id{};
+
+    private:
+        friend class ScrubManager;
+        friend class PGScrubContext;
+        friend class MetaScrubReport;
+        friend class ShallowScrubReport;
+        friend class DeepScrubReport;
+        std::map< BlobRoute, std::variant< ScrubStatus, uint64_t > > results;
+    };
+
+    // scrub report
+    // base class for all scrub reports — handles PG and shard scrub results (SCRUB_TYPE::META)
+    class MetaScrubReport {
+    public:
+        MetaScrubReport(pg_id_t pg_id) : pg_id_(pg_id) {}
+        virtual ~MetaScrubReport() = default;
+
+        pg_id_t get_pg_id() const { return pg_id_; }
+        // shard_id starts from 1, so we use shard_id=0 for pg meta
+        const auto& get_corrupted_shards() const { return corrupted_shard_metas; }
+        const auto& get_corrupted_pg_metas() const { return corrupted_pg_metas; }
+        const auto& get_inconsistent_shard_metas() const { return inconsistent_shard_metas; }
+        const auto& get_missing_shard_ids() const { return missing_shard_ids; }
+
+        virtual bool
+        merge(const std::map< peer_id_t, std::shared_ptr< range_scrub_result > >& peer_scrub_result_map) = 0;
+        virtual void print() const = 0;
+
+    protected:
+        void remove_shard_existence_from_peer(shard_id_t shard_id, peer_id_t peer);
+
+    private:
+        friend class ScrubManager;
+        std::map< peer_id_t, std::map< shard_id_t, ScrubStatus > > corrupted_shard_metas;
+        std::map< peer_id_t, ScrubStatus > corrupted_pg_metas;
+        std::map< shard_id_t, std::map< peer_id_t, uint64_t > > inconsistent_shard_metas;
+        // peer set means shard only exists on these peers.
+        std::map< shard_id_t, std::set< peer_id_t > > missing_shard_ids;
+        pg_id_t pg_id_;
+        std::mutex mutex_;
+    };
+
+    // shallow scrub report for a pg — extends MetaScrubReport with missing blob tracking (SHALLOW_BLOB)
+    class ShallowScrubReport : public MetaScrubReport {
+    public:
+        ShallowScrubReport(pg_id_t pg_id) : MetaScrubReport(pg_id) {}
+        ~ShallowScrubReport() override = default;
+
+        const auto& get_missing_blobs() const { return missing_blobs; }
+        bool merge(const std::map< peer_id_t, std::shared_ptr< range_scrub_result > >& peer_scrub_result_map) override;
+        void print() const override;
+
+    protected:
+        void remove_blob_existence_from_peer(BlobRoute blob_route, peer_id_t peer);
+
+    private:
+        friend class ScrubManager;
+        // peer set means blob only exists on these peers.
+        std::map< BlobRoute, std::set< peer_id_t > > missing_blobs;
+    };
+
+    // deep scrub report for a pg — extends ShallowScrubReport with blob corruption and inconsistency (DEEP_BLOB)
+    class DeepScrubReport : public ShallowScrubReport {
+    public:
+        DeepScrubReport(pg_id_t pg_id) : ShallowScrubReport(pg_id) {}
+        ~DeepScrubReport() override = default;
+
+        const auto& get_corrupted_blobs() const { return corrupted_blobs; }
+        const auto& get_inconsistent_blobs() const { return inconsistent_blobs; }
+        bool merge(const std::map< peer_id_t, std::shared_ptr< range_scrub_result > >& peer_scrub_result_map) override;
+        void print() const override;
+
+    private:
+        std::map< peer_id_t, std::map< BlobRoute, ScrubStatus > > corrupted_blobs;
+        std::map< BlobRoute, std::map< peer_id_t, uint64_t > > inconsistent_blobs;
+    };
+
+    // PG Scrub Context — full definition lives in scrub_manager.cpp to avoid referencing
+    // HSHomeObject::HS_PG while HSHomeObject is still an incomplete type in this header.
+private:
+    class PGScrubContext;
+
+    // scrub scheduler
+public:
+    void start();
+    void stop();
+
+    folly::SemiFuture< std::shared_ptr< ShallowScrubReport > >
+    submit_scrub_task(const pg_id_t& pg_id, const bool is_deep,
+                      SCRUB_TRIGGER_TYPE trigger_type = SCRUB_TRIGGER_TYPE::PERIODICALLY);
+
+    // cancel will only cancel a running scrub task. for those submitted but not running tasks in the queue, cancel will
+    // not remove them from the queue.
+    void cancel_scrub_task(const pg_id_t& pg_id);
+
+    void add_scrub_result(const pg_id_t pg_id, std::shared_ptr< scrub_result > scrub_result);
+    // new pg is created
+    void add_pg(const pg_id_t pg_id);
+    // new pg permanently removed
+    void remove_pg(const pg_id_t pg_id);
+    std::optional< pg_scrub_superblk > get_scrub_superblk(const pg_id_t pg_id) const;
+    void save_scrub_superblk(const pg_id_t pg_id, const bool is_deep_scrub, bool force_update = true);
+    void add_scrub_req(std::shared_ptr< scrub_req > req);
+
+    // local scrub
+    std::shared_ptr< scrub_result > local_scrub_blob(std::shared_ptr< scrub_req > req);
+    std::shared_ptr< scrub_result > local_scrub_meta(std::shared_ptr< scrub_req > req);
+
+private:
+    inline static auto const pg_scrub_meta_name = std::string("PG_SCRUB");
+    // TODO: persist this into metablk.
+    inline static atomic_uint64_t scrub_task_id{1};
+
+    // refer to docs/adr/scrub-blob-range-coverage.md
+    static constexpr uint64_t max_scrub_batch_size = 100'000;
+    static constexpr uint64_t deep_blob_scrub_batch_size = 10;
+
+    struct scrub_task {
+        scrub_task(uint64_t last_scrub_time, pg_id_t pg_id, bool is_deep_scrub, SCRUB_TRIGGER_TYPE trigger_type,
+                   folly::Promise< std::shared_ptr< ShallowScrubReport > > promise) :
+                task_id{scrub_task_id.fetch_add(1)},
+                last_scrub_time{last_scrub_time},
+                pg_id{pg_id},
+                is_deep_scrub{is_deep_scrub},
+                triggered{trigger_type},
+                scrub_report_promise{
+                    std::make_shared< folly::Promise< std::shared_ptr< ShallowScrubReport > > >(std::move(promise))} {}
+
+        ~scrub_task() {
+            if (scrub_report_promise && !scrub_report_promise->isFulfilled()) {
+                scrub_report_promise->setValue(nullptr);
+            }
+        }
+
+        scrub_task(scrub_task&&) = default;
+        scrub_task& operator=(scrub_task&&) = default;
+        scrub_task(const scrub_task&) = delete;
+        scrub_task& operator=(const scrub_task&) = delete;
+
+        uint64_t task_id;
+        uint64_t last_scrub_time;
+        pg_id_t pg_id;
+        bool is_deep_scrub;
+        SCRUB_TRIGGER_TYPE triggered;
+        std::shared_ptr< folly::Promise< std::shared_ptr< ShallowScrubReport > > > scrub_report_promise;
+
+        bool operator==(const scrub_task& other) const noexcept { return task_id == other.task_id; }
+
+        // manually > periodically; among equal triggers, earlier task_id wins.
+        bool operator<(const scrub_task& other) const noexcept {
+            using U = std::underlying_type_t< SCRUB_TRIGGER_TYPE >;
+            if (static_cast< U >(triggered) != static_cast< U >(other.triggered)) {
+                return static_cast< U >(triggered) < static_cast< U >(other.triggered);
+            }
+            return task_id > other.task_id;
+        }
+    };
+
+    void scan_pg_for_scrub();
+    void handle_pg_scrub_task(scrub_task task);
+    bool is_eligible_for_deep_scrub(const pg_id_t& pg_id);
+    bool is_eligible_for_shallow_scrub(const pg_id_t& pg_id);
+    void on_pg_scrub_meta_blk_found(sisl::byte_view const& buf, void* meta_cookie,
+                                    std::vector< homestore::superblk< pg_scrub_superblk > >& stale_pg_scrub_sbs);
+    void handle_deep_pg_scrub_report(std::shared_ptr< DeepScrubReport > report);
+    void handle_shallow_pg_scrub_report(std::shared_ptr< ShallowScrubReport > report);
+    void handle_scrub_req(std::shared_ptr< scrub_req > req);
+    bool wait_for_scrub_lsn_commit(shared< homestore::ReplDev > repl_dev, int64_t scrub_lsn);
+    uint64_t compute_crc64(const void* data, size_t len, uint64_t crc = 0) const;
+
+    iomgr::timer_handle_t m_scrub_timer_hdl{iomgr::null_timer_handle};
+    iomgr::io_fiber_t m_scrub_timer_fiber{nullptr};
+    HSHomeObject* m_hs_home_object{nullptr};
+    MPMCPriorityQueue< scrub_task > m_scrub_task_queue;
+    std::shared_ptr< folly::IOThreadPoolExecutor > m_scrub_executor;
+    folly::ConcurrentHashMap< pg_id_t, std::shared_ptr< PGScrubContext > > m_pg_scrub_ctx_map;
+    folly::ConcurrentHashMap< pg_id_t, std::shared_ptr< homestore::superblk< pg_scrub_superblk > > > m_pg_scrub_sb_map;
+
+    std::shared_ptr< folly::IOThreadPoolExecutor > m_scrub_req_executor;
+};
+} // namespace homeobject
+
+// TODO:: consider the following scenarios and decide how we want to handle them in scrub manager
+// 1 baseline resync
+// 2 replace member
+// 3 permanently destroy pg
+// 4 GC
\ No newline at end of file
diff --git a/src/lib/homestore_backend/tests/CMakeLists.txt b/src/lib/homestore_backend/tests/CMakeLists.txt
index a40812ab3..8eceb3d1f 100644
--- a/src/lib/homestore_backend/tests/CMakeLists.txt
+++ b/src/lib/homestore_backend/tests/CMakeLists.txt
@@ -30,3 +30,12 @@ add_test(NAME HeapChunkSelectorTest COMMAND test_heap_chunk_selector)
 add_library(homestore_tests_gc OBJECT)
 target_sources(homestore_tests_gc PRIVATE test_homestore_backend.cpp hs_gc_tests.cpp)
 target_link_libraries(homestore_tests_gc homeobject_homestore ${COMMON_TEST_DEPS})
+
+add_library(homestore_tests_scrubber OBJECT)
+target_sources(homestore_tests_scrubber PRIVATE test_homestore_backend.cpp hs_scrubber_tests.cpp)
+target_link_libraries(homestore_tests_scrubber homeobject_homestore ${COMMON_TEST_DEPS})
+
+add_executable(test_mpmc_priority_queue)
+target_sources(test_mpmc_priority_queue PRIVATE test_mpmc_priority_queue.cpp)
+target_link_libraries(test_mpmc_priority_queue homeobject_homestore ${COMMON_TEST_DEPS})
+add_test(NAME MPMCPriorityQueueTest COMMAND test_mpmc_priority_queue)
diff --git a/src/lib/homestore_backend/tests/homeobj_fixture.hpp b/src/lib/homestore_backend/tests/homeobj_fixture.hpp
index 499968ab3..955884847 100644
--- a/src/lib/homestore_backend/tests/homeobj_fixture.hpp
+++ b/src/lib/homestore_backend/tests/homeobj_fixture.hpp
@@ -49,7 +49,7 @@ class HomeObjectFixture : public ::testing::Test {
 
         HSHomeObject::_hs_chunk_size = SISL_OPTIONS["chunk_size"].as< uint64_t >() * Mi;
         _obj_inst = std::dynamic_pointer_cast< HSHomeObject >(g_helper->build_new_homeobject());
-        
+
         // Used to export metrics, it should be called after init_homeobject
         if (SISL_OPTIONS["enable_http"].as< bool >()) { g_helper->app->start_http_server(); }
         if (!g_helper->is_current_testcase_restarted()) {
@@ -906,6 +906,27 @@ class HomeObjectFixture : public ::testing::Test {
         LOGINFO("Flip {} set", flip_name);
     }
 
+    void set_callback_flip(const std::string flip_name, std::function< void() > callback, uint32_t count = 1,
+                           uint32_t percent = 100) {
+        flip::FlipCondition null_cond;
+        flip::FlipFrequency freq;
+        freq.set_count(count);
+        freq.set_percent(percent);
+        m_fc.inject_callback_flip(flip_name, {null_cond}, freq, callback);
+        LOGINFO("Flip {} with callback set", flip_name);
+    }
+
+    template < typename T >
+    void set_callback_retval_flip(const std::string flip_name, std::function< T() > callback, uint32_t count = 1,
+                                  uint32_t percent = 100) {
+        flip::FlipCondition null_cond;
+        flip::FlipFrequency freq;
+        freq.set_count(count);
+        freq.set_percent(percent);
+        ASSERT_TRUE(m_fc.inject_callback_retval_flip(flip_name, {null_cond}, freq, callback));
+        LOGINFO("Flip {} with callback retval set", flip_name);
+    }
+
     void remove_flip(const std::string flip_name) {
         m_fc.remove_flip(flip_name);
         LOGINFO("Flip {} removed", flip_name);
diff --git a/src/lib/homestore_backend/tests/hs_scrubber_tests.cpp b/src/lib/homestore_backend/tests/hs_scrubber_tests.cpp
new file mode 100644
index 000000000..4ca4f9797
--- /dev/null
+++ b/src/lib/homestore_backend/tests/hs_scrubber_tests.cpp
@@ -0,0 +1,1251 @@
+#include "homeobj_fixture.hpp"
+#include <homestore/blk.h>
+#include <homestore/btree/btree_req.hpp>
+#include <homestore/btree/btree_kv.hpp>
+#include <random>
+#include "lib/homestore_backend/hs_homeobject.hpp"
+
+using namespace homeobject;
+using BlobHeader = HSHomeObject::BlobHeader;
+
+// Helper function to delete a blob from index table
+static void delete_blob_from_index(shared< homestore::IndexTable< BlobRouteKey, BlobRouteValue > > pg_index_table,
+                                   shard_id_t shard_id, blob_id_t blob_id) {
+    LOGINFO("Deleting blob from index, shard_id={}, blob_id={}", shard_id, blob_id);
+    BlobRouteKey blob_key{BlobRoute{shard_id, blob_id}};
+    BlobRouteValue out_value;
+    homestore::BtreeSingleRemoveRequest remove_req{&blob_key, &out_value};
+    auto status = pg_index_table->remove(remove_req);
+    ASSERT_TRUE(status == homestore::btree_status_t::success)
+        << "Failed to remove blob key from index table, status=" << status;
+}
+
+static void delete_shard_from_index(shared< homestore::IndexTable< BlobRouteKey, BlobRouteValue > > pg_index_table,
+                                    shard_id_t missing_shard_id) {
+    LOGINFO("Deleting shard from index, shard_id={}", missing_shard_id);
+    auto start_key = BlobRouteKey{BlobRoute{missing_shard_id, 0}};
+    auto end_key = BlobRouteKey{BlobRoute{missing_shard_id, std::numeric_limits< uint64_t >::max()}};
+    homestore::BtreeRangeRemoveRequest< BlobRouteKey > range_remove_req{
+        homestore::BtreeKeyRange< BlobRouteKey >{
+            std::move(start_key), true /* inclusive */, std::move(end_key), true /* inclusive */
+        },
+        nullptr, std::numeric_limits< uint32_t >::max(),
+        [](homestore::BtreeKey const& key, homestore::BtreeValue const& value) -> bool { return true; }};
+
+    auto status = pg_index_table->remove(range_remove_req);
+    ASSERT_TRUE(status == homestore::btree_status_t::success || status == homestore::btree_status_t::not_found)
+        << "Failed to remove shard keys from index table, status=" << status;
+}
+
+// Helper function to corrupt a blob's data
+static void corrupt_blob_data(shared< homestore::IndexTable< BlobRouteKey, BlobRouteValue > > pg_index_table,
+                              shard_id_t shard_id, blob_id_t blob_id) {
+    auto& data_service = homestore::data_service();
+    const auto blk_size = data_service.get_blk_size();
+
+    BlobRouteKey blob_key{BlobRoute{shard_id, blob_id}};
+    BlobRouteValue out_value;
+    homestore::BtreeSingleGetRequest blob_get_req{&blob_key, &out_value};
+
+    auto status = pg_index_table->get(blob_get_req);
+    ASSERT_TRUE(status == homestore::btree_status_t::success)
+        << "Failed to get blob key from index table, status=" << status;
+
+    auto pbas = out_value.pbas();
+    auto total_size = pbas.blk_count() * blk_size;
+    sisl::sg_list data_sgs;
+    data_sgs.size = total_size;
+    data_sgs.iovs.emplace_back(iovec{.iov_base = iomanager.iobuf_alloc(blk_size, total_size), .iov_len = total_size});
+
+    data_service.async_read(pbas, data_sgs, total_size)
+        .thenValue([&](auto&& err) {
+            if (err) {
+                LOGE("Failed to read blob data, blob_id={}, err={}", blob_id, err.message());
+                iomanager.iobuf_free(reinterpret_cast< uint8_t* >(data_sgs.iovs[0].iov_base));
+                throw std::runtime_error(fmt::format("Failed to read blob data: {}", err.message()));
+            }
+
+            auto* data_ptr = reinterpret_cast< uint8_t* >(data_sgs.iovs[0].iov_base);
+            for (size_t i = 0; i < data_sgs.iovs[0].iov_len / 2; i++) {
+                data_ptr[i] ^= 0xFF; // Flip first half of data
+            }
+
+            return data_service.async_write(data_sgs, pbas).thenValue([data_sgs = std::move(data_sgs)](auto&& err) {
+                iomanager.iobuf_free(reinterpret_cast< uint8_t* >(data_sgs.iovs[0].iov_base));
+                ASSERT_FALSE(err) << "Failed to write corrupted blob data";
+            });
+        })
+        .get();
+}
+
+// Helper function to make a blob inconsistent (valid but different hash)
+static void make_blob_inconsistent(shared< homestore::IndexTable< BlobRouteKey, BlobRouteValue > > pg_index_table,
+                                   shard_id_t shard_id, blob_id_t blob_id, HSHomeObject* obj_inst) {
+    auto& data_service = homestore::data_service();
+    const auto blk_size = data_service.get_blk_size();
+
+    BlobRouteKey blob_key{BlobRoute{shard_id, blob_id}};
+    BlobRouteValue out_value;
+    homestore::BtreeSingleGetRequest blob_get_req{&blob_key, &out_value};
+
+    auto status = pg_index_table->get(blob_get_req);
+    ASSERT_TRUE(status == homestore::btree_status_t::success) << "Failed to get blob key from index table";
+
+    auto pbas = out_value.pbas();
+    auto total_size = pbas.blk_count() * blk_size;
+    sisl::sg_list data_sgs;
+    data_sgs.size = total_size;
+    data_sgs.iovs.emplace_back(iovec{.iov_base = iomanager.iobuf_alloc(blk_size, total_size), .iov_len = total_size});
+
+    data_service.async_read(pbas, data_sgs, total_size)
+        .thenValue([&](auto&& err) {
+            if (err) {
+                LOGE("Failed to read blob data, blob_id={}, err={}", blob_id, err.message());
+                iomanager.iobuf_free(reinterpret_cast< uint8_t* >(data_sgs.iovs[0].iov_base));
+                throw std::runtime_error(fmt::format("Failed to read blob data: {}", err.message()));
+            }
+
+            // Modify blob data and recompute valid hash
+            uint8_t* read_buf = r_cast< uint8_t* >(data_sgs.iovs[0].iov_base);
+            auto header = r_cast< BlobHeader* >(read_buf);
+            uint8_t* blob_bytes = read_buf + header->data_offset;
+
+            std::mt19937 rng{std::random_device{}()};
+            std::uniform_int_distribution< int > dist(0, 255);
+
+            for (size_t i = 0; i < header->blob_size / 2; i++) {
+                blob_bytes[i] ^= static_cast< uint8_t >(dist(rng));
+            }
+
+            uint8_t computed_hash[BlobHeader::blob_max_hash_len]{};
+            obj_inst->compute_blob_payload_hash(header->hash_algorithm, blob_bytes, header->blob_size, computed_hash,
+                                                BlobHeader::blob_max_hash_len);
+
+            std::memcpy(header->hash, computed_hash, BlobHeader::blob_max_hash_len);
+            std::memset(header->header_hash, 0, BlobHeader::blob_max_hash_len);
+            uint32_t computed_header_hash = crc32_ieee(0, (uint8_t*)header, sizeof(BlobHeader));
+            std::memcpy(header->header_hash, &computed_header_hash, sizeof(uint32_t));
+
+            if (!obj_inst->verify_blob(data_sgs.iovs[0].iov_base, header->shard_id, header->blob_id)) {
+                LOGE("Blob verification failed after modification, blob_id={}", blob_id);
+                iomanager.iobuf_free(reinterpret_cast< uint8_t* >(data_sgs.iovs[0].iov_base));
+                throw std::runtime_error(fmt::format("Blob verification failed for blob_id={}", blob_id));
+            }
+
+            return data_service.async_write(data_sgs, pbas).thenValue([data_sgs = std::move(data_sgs)](auto&& err) {
+                iomanager.iobuf_free(reinterpret_cast< uint8_t* >(data_sgs.iovs[0].iov_base));
+                ASSERT_FALSE(err) << "Failed to write inconsistent blob data";
+            });
+        })
+        .get();
+}
+
+// Helper function to verify missing blobs in scrub report
+// missing_blobs[blob_route] = set of peers that HAVE the blob; peer_id is missing it iff not in that set.
+static void verify_missing_blobs(std::shared_ptr< ScrubManager::DeepScrubReport > report, const peer_id_t& peer_id,
+                                 const BlobRoute& expected_blob) {
+    const auto& missing_blobs = report->get_missing_blobs();
+    auto it = missing_blobs.find(expected_blob);
+    EXPECT_TRUE(it != missing_blobs.end())
+        << "Missing blob should be reported for shard_id=" << expected_blob.shard << ", blob_id=" << expected_blob.blob;
+    if (it != missing_blobs.end()) {
+        EXPECT_TRUE(it->second.count(peer_id) == 0)
+            << "peer_id=" << peer_id << " should not have the blob (it is missing on this peer)";
+    }
+}
+
+// Helper function to verify corrupted blobs in scrub report
+static void verify_corrupted_blobs(std::shared_ptr< ScrubManager::DeepScrubReport > report, const peer_id_t& peer_id,
+                                   const BlobRoute& expected_blob) {
+    const auto& corrupted_blobs = report->get_corrupted_blobs();
+    auto it = corrupted_blobs.find(peer_id);
+    EXPECT_TRUE(it != corrupted_blobs.end()) << "Corrupted blob should be reported for peer_id=" << peer_id;
+    if (it != corrupted_blobs.end()) {
+        EXPECT_TRUE(it->second.count(expected_blob) == 1) << "Expected corrupted blob should be in the report";
+    }
+}
+
+// Helper function to verify missing shards in scrub report
+// missing_shard_ids[shard_id] = set of peers that HAVE the shard; peer_id is missing it if not in that set.
+static void verify_missing_shards(std::shared_ptr< ScrubManager::DeepScrubReport > report, const peer_id_t& peer_id,
+                                  shard_id_t expected_shard) {
+    const auto& missing_shards = report->get_missing_shard_ids();
+    auto it = missing_shards.find(expected_shard);
+    EXPECT_TRUE(it != missing_shards.end()) << "Missing shard should be reported for shard_id=" << expected_shard;
+    if (it != missing_shards.end()) {
+        EXPECT_TRUE(it->second.count(peer_id) == 0)
+            << "peer_id=" << peer_id << " should not have the shard (it is missing on this peer)";
+    }
+}
+
+TEST_F(HomeObjectFixture, BasicScrubTest) {
+    const pg_id_t pg_id = 1;
+    create_pg(pg_id);
+    auto scrub_mgr = _obj_inst->scrub_manager();
+
+    // empty pg scrub should report no issues
+    run_on_pg_leader(pg_id, [&]() {
+        // Deep scrub on empty PG should complete without errors
+        auto scrub_report = scrub_mgr->submit_scrub_task(pg_id, true /* is_deep */, SCRUB_TRIGGER_TYPE::MANUALLY).get();
+
+        ASSERT_NE(scrub_report, nullptr) << "Deep scrub report should not be null for empty PG";
+        auto deep_scrub_report = std::dynamic_pointer_cast< ScrubManager::DeepScrubReport >(scrub_report);
+        ASSERT_NE(deep_scrub_report, nullptr) << "Should be DeepScrubReport";
+
+        // Empty PG should have no issues
+        EXPECT_TRUE(deep_scrub_report->get_corrupted_shards().empty()) << "Empty PG should have no corrupted shards";
+        EXPECT_TRUE(deep_scrub_report->get_corrupted_pg_metas().empty())
+            << "No PG metas should be corrupted in normal case";
+        EXPECT_TRUE(deep_scrub_report->get_inconsistent_shard_metas().empty())
+            << "No shard metas should be inconsistent in normal case";
+        EXPECT_TRUE(deep_scrub_report->get_missing_shard_ids().empty()) << "Empty PG should have no missing shards";
+        EXPECT_TRUE(deep_scrub_report->get_missing_blobs().empty()) << "Empty PG should have no missing blobs";
+
+        EXPECT_TRUE(deep_scrub_report->get_corrupted_blobs().empty()) << "Empty PG should have no corrupted blobs";
+        EXPECT_TRUE(deep_scrub_report->get_inconsistent_blobs().empty())
+            << "Empty PG should have no inconsistent blobs";
+
+        // Shallow scrub on empty PG
+        scrub_report = scrub_mgr->submit_scrub_task(pg_id, false /* is_deep */, SCRUB_TRIGGER_TYPE::MANUALLY).get();
+
+        EXPECT_TRUE(scrub_report->get_corrupted_shards().empty()) << "Empty PG should have no corrupted shards";
+        EXPECT_TRUE(scrub_report->get_corrupted_pg_metas().empty()) << "No PG metas should be corrupted in normal case";
+        EXPECT_TRUE(scrub_report->get_inconsistent_shard_metas().empty())
+            << "No shard metas should be inconsistent in normal case";
+        EXPECT_TRUE(scrub_report->get_missing_shard_ids().empty()) << "Empty PG should have no missing shards";
+        EXPECT_TRUE(scrub_report->get_missing_blobs().empty()) << "Empty PG should have no missing blobs";
+    });
+
+    const uint64_t num_shards = SISL_OPTIONS["num_shards"].as< uint64_t >();
+    // follower test uses indices 0-2, leader test uses indices 3-5 in blob_op_shard_id
+    const uint64_t num_blobs_per_shard = std::max(SISL_OPTIONS["num_blobs"].as< uint64_t >(), uint64_t{6});
+    const uint64_t shard_size = 64 * Mi;
+
+    std::map< pg_id_t, std::vector< shard_id_t > > pg_shard_id_vec;
+    std::map< pg_id_t, blob_id_t > pg_blob_id;
+    pg_blob_id[pg_id] = 0;
+
+    std::map< shard_id_t, std::map< blob_id_t, uint64_t > > shard_blob_ids_map;
+
+    // Create multiple shards
+    for (uint64_t i = 0; i < num_shards; i++) {
+        auto shard_info = create_shard(pg_id, shard_size, "shard meta");
+        pg_shard_id_vec[pg_id].push_back(shard_info.id);
+        LOGINFO("Created pg={} shard={} (shard {}/{})", pg_id, shard_info.id, i + 1, num_shards);
+    }
+
+    // pg with empty shard scrub should report no issues
+    run_on_pg_leader(pg_id, [&]() {
+        // Deep scrub on PG with empty shards should complete without errors
+        auto scrub_report = scrub_mgr->submit_scrub_task(pg_id, true /* is_deep */, SCRUB_TRIGGER_TYPE::MANUALLY).get();
+
+        ASSERT_NE(scrub_report, nullptr) << "Deep scrub report should not be null for PG with empty shards";
+        auto deep_scrub_report = std::dynamic_pointer_cast< ScrubManager::DeepScrubReport >(scrub_report);
+        ASSERT_NE(deep_scrub_report, nullptr) << "Should be DeepScrubReport";
+
+        // PG with empty shards should have no issues
+        EXPECT_TRUE(deep_scrub_report->get_corrupted_shards().empty())
+            << "PG with empty shards should have no corrupted shards";
+        EXPECT_TRUE(deep_scrub_report->get_corrupted_pg_metas().empty())
+            << "No PG metas should be corrupted in normal case";
+        EXPECT_TRUE(deep_scrub_report->get_inconsistent_shard_metas().empty())
+            << "No shard metas should be inconsistent in normal case";
+        EXPECT_TRUE(deep_scrub_report->get_missing_shard_ids().empty())
+            << "PG with empty shards should have no missing shards";
+        EXPECT_TRUE(deep_scrub_report->get_missing_blobs().empty())
+            << "PG with empty shards should have no missing blobs";
+
+        EXPECT_TRUE(deep_scrub_report->get_corrupted_blobs().empty())
+            << "PG with empty shards should have no corrupted blobs";
+        EXPECT_TRUE(deep_scrub_report->get_inconsistent_blobs().empty())
+            << "PG with empty shards should have no inconsistent blobs";
+
+        // Shallow scrub on PG with empty shards should complete without errors
+        scrub_report = scrub_mgr->submit_scrub_task(pg_id, false /* is_deep */, SCRUB_TRIGGER_TYPE::MANUALLY).get();
+
+        EXPECT_TRUE(scrub_report->get_corrupted_shards().empty())
+            << "PG with empty shards should have no corrupted shards";
+        EXPECT_TRUE(scrub_report->get_corrupted_pg_metas().empty()) << "No PG metas should be corrupted in normal case";
+        EXPECT_TRUE(scrub_report->get_inconsistent_shard_metas().empty())
+            << "No shard metas should be inconsistent in normal case";
+        EXPECT_TRUE(scrub_report->get_missing_shard_ids().empty())
+            << "PG with empty shards should have no missing shards";
+        EXPECT_TRUE(scrub_report->get_missing_blobs().empty()) << "PG with empty shards should have no missing blobs";
+    });
+
+    // Create blobs in all shards
+    shard_blob_ids_map = put_blobs(pg_shard_id_vec, num_blobs_per_shard, pg_blob_id);
+    LOGINFO("Created {} blobs per shard, total {} blobs", num_blobs_per_shard, num_shards * num_blobs_per_shard);
+
+    // Verify blobs were created
+    verify_get_blob(pg_shard_id_vec, num_blobs_per_shard);
+
+    // everything is healthy, deep scrub should report no issues.
+    run_on_pg_leader(pg_id, [&]() {
+        // Deep scrub on healthy PG should complete without errors
+        auto scrub_report = scrub_mgr->submit_scrub_task(pg_id, true /* is_deep */, SCRUB_TRIGGER_TYPE::MANUALLY).get();
+
+        ASSERT_NE(scrub_report, nullptr) << "Deep scrub report should not be null for healthy PG";
+        auto deep_scrub_report = std::dynamic_pointer_cast< ScrubManager::DeepScrubReport >(scrub_report);
+        ASSERT_NE(deep_scrub_report, nullptr) << "Should be DeepScrubReport";
+
+        // Healthy PG with blobs should have no issues
+        EXPECT_TRUE(deep_scrub_report->get_corrupted_shards().empty()) << "Healthy PG should have no corrupted shards";
+        EXPECT_TRUE(deep_scrub_report->get_corrupted_pg_metas().empty())
+            << "No PG metas should be corrupted in normal case";
+        EXPECT_TRUE(deep_scrub_report->get_inconsistent_shard_metas().empty())
+            << "No shard metas should be inconsistent in normal case";
+        EXPECT_TRUE(deep_scrub_report->get_missing_shard_ids().empty()) << "Healthy PG should have no missing shards";
+        EXPECT_TRUE(deep_scrub_report->get_missing_blobs().empty()) << "Healthy PG should have no missing blobs";
+        EXPECT_TRUE(deep_scrub_report->get_corrupted_blobs().empty()) << "Healthy PG should have no corrupted blobs";
+        EXPECT_TRUE(deep_scrub_report->get_inconsistent_blobs().empty())
+            << "Healthy PG should have no inconsistent blobs";
+
+        // Shallow scrub on healthy PG should complete without errors
+        scrub_report = scrub_mgr->submit_scrub_task(pg_id, false /* is_deep */, SCRUB_TRIGGER_TYPE::MANUALLY).get();
+
+        EXPECT_TRUE(scrub_report->get_corrupted_shards().empty()) << "Healthy PG should have no corrupted shards";
+        EXPECT_TRUE(scrub_report->get_corrupted_pg_metas().empty()) << "No PG metas should be corrupted in normal case";
+        EXPECT_TRUE(scrub_report->get_inconsistent_shard_metas().empty())
+            << "No shard metas should be inconsistent in normal case";
+        EXPECT_TRUE(scrub_report->get_missing_shard_ids().empty()) << "Healthy PG should have no missing shards";
+        EXPECT_TRUE(scrub_report->get_missing_blobs().empty()) << "Healthy PG should have no missing blobs";
+    });
+
+    g_helper->sync();
+
+    const auto hs_pg = _obj_inst->get_hs_pg(pg_id);
+    ASSERT_TRUE(hs_pg) << "PG should exist for pg_id=" << pg_id;
+
+    ASSERT_GE(num_shards, 2u) << "BasicScrubTest requires at least 2 shards";
+    // First shard: simulate a missing shard (entire shard deleted from followers)
+    const auto missing_shard_id = shard_blob_ids_map.begin()->first;
+    // Second shard: simulate blob-level issues; this shard still exists on followers
+    const auto blob_op_shard_id = std::next(shard_blob_ids_map.begin())->first;
+    auto it = shard_blob_ids_map[blob_op_shard_id].begin();
+    const auto missing_blob_id = it->first;
+    const auto corrupted_blob_id = (++it)->first;
+    const auto inconsistent_blob_id = (++it)->first;
+
+    // TODO:: add corruptted shard and corrupted pg meta after we have the implementation for corrupting them.
+
+    // Corrupt data on followers
+    run_on_pg_follower(pg_id, [&]() {
+        auto& pg_index_table = hs_pg->index_table_;
+
+        // 1. Remove missing_shard_id to simulate missing shard
+        delete_shard_from_index(pg_index_table, missing_shard_id);
+
+        // 2. Delete missing_blob_id from blob_op_shard_id (different shard, still exists on follower)
+        delete_blob_from_index(pg_index_table, blob_op_shard_id, missing_blob_id);
+
+        // 3. Make corrupted_blob_id corrupted
+        corrupt_blob_data(pg_index_table, blob_op_shard_id, corrupted_blob_id);
+
+        // 4. Make inconsistent_blob_id inconsistent (valid but different hash)
+        make_blob_inconsistent(pg_index_table, blob_op_shard_id, inconsistent_blob_id, _obj_inst.get());
+    });
+
+    g_helper->sync();
+
+    run_on_pg_leader(pg_id, [&]() {
+        // do deep scrub and check the scrub report
+        auto scrub_report = scrub_mgr->submit_scrub_task(pg_id, true /* is_deep */, SCRUB_TRIGGER_TYPE::MANUALLY).get();
+
+        ASSERT_NE(scrub_report, nullptr) << "Deep scrub report should not be null";
+        auto deep_scrub_report = std::dynamic_pointer_cast< ScrubManager::DeepScrubReport >(scrub_report);
+        ASSERT_NE(deep_scrub_report, nullptr) << "Should be DeepScrubReport";
+
+        const auto& members = (hs_pg->pg_info_).members;
+        std::set< peer_id_t > follower_peer_ids;
+        const auto& leader_uuid = _obj_inst->our_uuid();
+        for (const auto& member : members) {
+            if (member.id == leader_uuid) { continue; }
+            follower_peer_ids.insert(member.id);
+        }
+
+        // Verify missing blobs, missing shards, and corrupted blobs for all followers
+        for (const auto& peer_id : follower_peer_ids) {
+            verify_missing_blobs(deep_scrub_report, peer_id, BlobRoute{blob_op_shard_id, missing_blob_id});
+            verify_missing_shards(deep_scrub_report, peer_id, missing_shard_id);
+            verify_corrupted_blobs(deep_scrub_report, peer_id, BlobRoute{blob_op_shard_id, corrupted_blob_id});
+        }
+
+        // False positive guard: exactly one shard should be missing, no healthy shard must leak in.
+        EXPECT_EQ(deep_scrub_report->get_missing_shard_ids().size(), 1u)
+            << "Exactly one shard should be reported missing after follower corruption";
+
+        // False positive guard: each follower must have exactly the one blob we corrupted.
+        {
+            const auto& all_corrupted = deep_scrub_report->get_corrupted_blobs();
+            for (const auto& peer_id : follower_peer_ids) {
+                auto cit = all_corrupted.find(peer_id);
+                if (cit != all_corrupted.end()) {
+                    EXPECT_EQ(cit->second.size(), 1u)
+                        << "Follower peer_id=" << peer_id << " should have exactly 1 corrupted blob";
+                }
+            }
+        }
+
+        const auto inconsistent_blobs = deep_scrub_report->get_inconsistent_blobs();
+        EXPECT_TRUE(inconsistent_blobs.size() == 1)
+            << "Inconsistent blob should be reported in deep scrub report for one of the followers";
+        const auto it = inconsistent_blobs.find(BlobRoute{blob_op_shard_id, inconsistent_blob_id});
+        EXPECT_TRUE(it != inconsistent_blobs.end())
+            << "The inconsistent blob should be reported in deep scrub report for blob_id=" << inconsistent_blob_id;
+        auto& inconsistent_blob_peers = it->second;
+
+        // inconsistent_blob_peers should contains all the peers.
+        EXPECT_TRUE(inconsistent_blob_peers.size() == follower_peer_ids.size() + 1)
+            << "Inconsistent blob should be reported in deep scrub report for all followers";
+        for (const auto& peer_id : follower_peer_ids) {
+            EXPECT_TRUE(inconsistent_blob_peers.count(peer_id) == 1)
+                << "The inconsistent blob should be reported in deep scrub report for peer_id=" << peer_id;
+        }
+        EXPECT_TRUE(inconsistent_blob_peers.count(leader_uuid) == 1)
+            << "The inconsistent blob should be reported in deep scrub report for leader peer_id=" << leader_uuid;
+
+        // do shallow scrub， shallow scrub can only find missing blob/shard
+        auto shallow_scrub_report = scrub_mgr->submit_scrub_task(pg_id, false, SCRUB_TRIGGER_TYPE::MANUALLY).get();
+        ASSERT_NE(shallow_scrub_report, nullptr) << "Shallow scrub report should not be null";
+
+        auto miss_blob_in_shallow_report = shallow_scrub_report->get_missing_blobs();
+        EXPECT_TRUE(miss_blob_in_shallow_report.size() == num_blobs_per_shard + 1)
+            << "Should report all blobs from missing_shard_id (" << num_blobs_per_shard
+            << ") plus 1 from blob_op_shard_id, got " << miss_blob_in_shallow_report.size();
+        {
+            auto it = miss_blob_in_shallow_report.find(BlobRoute{blob_op_shard_id, missing_blob_id});
+            EXPECT_TRUE(it != miss_blob_in_shallow_report.end())
+                << "The missing blob should be reported in shallow scrub report";
+            if (it != miss_blob_in_shallow_report.end()) {
+                for (const auto& peer_id : follower_peer_ids) {
+                    EXPECT_TRUE(it->second.count(peer_id) == 0)
+                        << "Follower peer_id=" << peer_id << " should not have the missing blob";
+                }
+            }
+        }
+
+        // Verify each individual blob from missing_shard_id appears in the report.
+        // Checking only the total count is insufficient: if the reported blob_ids were wrong
+        // but the count matched, the test would still pass.
+        for (const auto& [blob_id, _] : shard_blob_ids_map[missing_shard_id]) {
+            auto it = miss_blob_in_shallow_report.find(BlobRoute{missing_shard_id, blob_id});
+            EXPECT_TRUE(it != miss_blob_in_shallow_report.end())
+                << "Blob " << blob_id << " from missing_shard_id=" << missing_shard_id
+                << " should be reported in shallow scrub report";
+            if (it != miss_blob_in_shallow_report.end()) {
+                for (const auto& peer_id : follower_peer_ids) {
+                    EXPECT_TRUE(it->second.count(peer_id) == 0)
+                        << "Follower peer_id=" << peer_id << " should not have blob " << blob_id
+                        << " from missing_shard_id";
+                }
+            }
+        }
+
+        // missing_shard_ids[shard_id] = peers that have the shard; followers are absent from that set.
+        const auto missing_shards_in_shallow_report = shallow_scrub_report->get_missing_shard_ids();
+        EXPECT_TRUE(missing_shards_in_shallow_report.size() == 1)
+            << "One missing shard should be reported in shallow scrub report";
+        {
+            auto it = missing_shards_in_shallow_report.find(missing_shard_id);
+            EXPECT_TRUE(it != missing_shards_in_shallow_report.end())
+                << "The missing shard should be reported in shallow scrub report";
+            if (it != missing_shards_in_shallow_report.end()) {
+                for (const auto& peer_id : follower_peer_ids) {
+                    EXPECT_TRUE(it->second.count(peer_id) == 0)
+                        << "Follower peer_id=" << peer_id << " should not have the missing shard";
+                }
+            }
+        }
+    });
+
+    g_helper->sync();
+
+    // Test case for leader missing/corrupted
+    LOGINFO("Starting leader missing/corrupted test case");
+
+    // Get new blob ids for leader corruption test.
+    // Must use blob_op_shard_id (not missing_shard_id) because followers deleted the entire
+    // missing_shard_id — no hash comparison is possible for blobs in that shard.
+    // blob_op_shard_id exists on both leader and followers; skip the first 3 blobs already
+    // used by the follower test (missing/corrupted/inconsistent at indices 0/1/2).
+    auto& leader_shard_blobs = shard_blob_ids_map[blob_op_shard_id];
+    auto leader_it = leader_shard_blobs.begin();
+    std::advance(leader_it, 3);
+    const auto leader_missing_blob_id = leader_it->first;
+    const auto leader_corrupted_blob_id = (++leader_it)->first;
+    const auto leader_inconsistent_blob_id = (++leader_it)->first;
+
+    // Corrupt data on leader
+    run_on_pg_leader(pg_id, [&]() {
+        auto& pg_index_table = hs_pg->index_table_;
+
+        // 1. Delete leader_missing_blob_id from pg_index table on leader
+        delete_blob_from_index(pg_index_table, blob_op_shard_id, leader_missing_blob_id);
+        LOGINFO("Deleted blob {} from leader index table", leader_missing_blob_id);
+
+        // 2. Make leader_corrupted_blob_id corrupted on leader
+        corrupt_blob_data(pg_index_table, blob_op_shard_id, leader_corrupted_blob_id);
+        LOGINFO("Corrupted blob {} on leader", leader_corrupted_blob_id);
+
+        // 3. Make leader_inconsistent_blob_id inconsistent on leader
+        make_blob_inconsistent(pg_index_table, blob_op_shard_id, leader_inconsistent_blob_id, _obj_inst.get());
+        LOGINFO("Made blob {} inconsistent on leader", leader_inconsistent_blob_id);
+    });
+
+    g_helper->sync();
+
+    // Run scrub and verify both leader and follower corruptions are detected
+    run_on_pg_leader(pg_id, [&]() {
+        LOGINFO("Running deep scrub to detect both leader and follower corruptions");
+        auto scrub_report = scrub_mgr->submit_scrub_task(pg_id, true /* is_deep */, SCRUB_TRIGGER_TYPE::MANUALLY).get();
+
+        ASSERT_NE(scrub_report, nullptr) << "Deep scrub report should not be null";
+        auto deep_scrub_report = std::dynamic_pointer_cast< ScrubManager::DeepScrubReport >(scrub_report);
+        ASSERT_NE(deep_scrub_report, nullptr) << "Should be DeepScrubReport";
+
+        const auto& leader_uuid = _obj_inst->our_uuid();
+        const auto& members = (hs_pg->pg_info_).members;
+        std::set< peer_id_t > follower_peer_ids;
+        for (const auto& member : members) {
+            if (member.id != leader_uuid) { follower_peer_ids.insert(member.id); }
+        }
+
+        // ========== Verify Missing Blobs ==========
+        LOGINFO("Verifying missing blobs detection");
+        verify_missing_blobs(deep_scrub_report, leader_uuid, BlobRoute{blob_op_shard_id, leader_missing_blob_id});
+        for (const auto& peer_id : follower_peer_ids) {
+            verify_missing_blobs(deep_scrub_report, peer_id, BlobRoute{blob_op_shard_id, missing_blob_id});
+        }
+
+        // ========== Verify Missing Shards ==========
+        LOGINFO("Verifying missing shards detection");
+        for (const auto& peer_id : follower_peer_ids) {
+            verify_missing_shards(deep_scrub_report, peer_id, missing_shard_id);
+        }
+
+        // ========== Verify Corrupted Blobs ==========
+        LOGINFO("Verifying corrupted blobs detection");
+        verify_corrupted_blobs(deep_scrub_report, leader_uuid, BlobRoute{blob_op_shard_id, leader_corrupted_blob_id});
+        for (const auto& peer_id : follower_peer_ids) {
+            verify_corrupted_blobs(deep_scrub_report, peer_id, BlobRoute{blob_op_shard_id, corrupted_blob_id});
+        }
+
+        // False positive guard: still only one missing shard (follower's missing_shard_id);
+        // leader corruption was blob-level only, no shard should be newly added.
+        EXPECT_EQ(deep_scrub_report->get_missing_shard_ids().size(), 1u)
+            << "Exactly one shard should be reported missing in combined leader+follower test";
+
+        // False positive guard: each node should have exactly the one blob we corrupted.
+        {
+            const auto& all_corrupted = deep_scrub_report->get_corrupted_blobs();
+            auto lit = all_corrupted.find(leader_uuid);
+            if (lit != all_corrupted.end()) {
+                EXPECT_EQ(lit->second.size(), 1u) << "Leader should have exactly 1 corrupted blob";
+            }
+            for (const auto& peer_id : follower_peer_ids) {
+                auto fit = all_corrupted.find(peer_id);
+                if (fit != all_corrupted.end()) {
+                    EXPECT_EQ(fit->second.size(), 1u)
+                        << "Follower peer_id=" << peer_id << " should have exactly 1 corrupted blob";
+                }
+            }
+        }
+
+        // ========== Verify Inconsistent Blobs ==========
+        const auto inconsistent_blobs = deep_scrub_report->get_inconsistent_blobs();
+        LOGINFO("Verifying inconsistent blobs detection, inconsistent_blobs.size()={}", inconsistent_blobs.size());
+
+        // Should have 2 inconsistent blobs: one from follower test, one from leader test
+        EXPECT_TRUE(inconsistent_blobs.size() == 2)
+            << "Should have 2 inconsistent blobs (1 from follower, 1 from leader)";
+
+        // Verify leader's inconsistent blob
+        auto leader_inconsistent_it = inconsistent_blobs.find(BlobRoute{blob_op_shard_id, leader_inconsistent_blob_id});
+        EXPECT_TRUE(leader_inconsistent_it != inconsistent_blobs.end())
+            << "The leader's inconsistent blob should be reported in deep scrub report";
+        if (leader_inconsistent_it != inconsistent_blobs.end()) {
+            auto& inconsistent_blob_peers = leader_inconsistent_it->second;
+            // All peers including leader should be in the inconsistent blob report
+            EXPECT_TRUE(inconsistent_blob_peers.size() == follower_peer_ids.size() + 1)
+                << "Leader's inconsistent blob should be reported for all peers including leader";
+            EXPECT_TRUE(inconsistent_blob_peers.count(leader_uuid) == 1)
+                << "Leader should be in the inconsistent blob peers";
+            for (const auto& peer_id : follower_peer_ids) {
+                EXPECT_TRUE(inconsistent_blob_peers.count(peer_id) == 1)
+                    << "Follower peer_id=" << peer_id << " should be in leader's inconsistent blob peers";
+            }
+        }
+
+        // Verify follower's inconsistent blob (from earlier test)
+        auto follower_inconsistent_it = inconsistent_blobs.find(BlobRoute{blob_op_shard_id, inconsistent_blob_id});
+        EXPECT_TRUE(follower_inconsistent_it != inconsistent_blobs.end())
+            << "The follower's inconsistent blob should be reported in deep scrub report";
+        if (follower_inconsistent_it != inconsistent_blobs.end()) {
+            auto& inconsistent_blob_peers = follower_inconsistent_it->second;
+            // All peers should be in the inconsistent blob report
+            EXPECT_TRUE(inconsistent_blob_peers.size() == follower_peer_ids.size() + 1)
+                << "Follower's inconsistent blob should be reported for all peers";
+            EXPECT_TRUE(inconsistent_blob_peers.count(leader_uuid) == 1)
+                << "Leader should be in follower's inconsistent blob peers";
+            for (const auto& peer_id : follower_peer_ids) {
+                EXPECT_TRUE(inconsistent_blob_peers.count(peer_id) == 1)
+                    << "Follower peer_id=" << peer_id << " should be in follower's inconsistent blob peers";
+            }
+        }
+    });
+
+    g_helper->sync();
+}
+
+// Test leader missing an entire shard: followers have the shard but leader's index doesn't.
+// Verifies that deep and shallow scrub both detect the missing shard and all its blobs.
+TEST_F(HomeObjectFixture, LeaderMissingShardTest) {
+    const pg_id_t pg_id = 1;
+    create_pg(pg_id);
+    auto scrub_mgr = _obj_inst->scrub_manager();
+
+    const uint64_t num_blobs_per_shard = SISL_OPTIONS["num_blobs"].as< uint64_t >();
+    const uint64_t shard_size = 64 * Mi;
+
+    std::map< pg_id_t, std::vector< shard_id_t > > pg_shard_id_vec;
+    std::map< pg_id_t, blob_id_t > pg_blob_id;
+    pg_blob_id[pg_id] = 0;
+
+    // Two shards: one will be deleted from the leader's index, one stays healthy on all peers.
+    auto missing_shard_info = create_shard(pg_id, shard_size, "leader missing shard");
+    auto healthy_shard_info = create_shard(pg_id, shard_size, "healthy shard");
+    pg_shard_id_vec[pg_id] = {missing_shard_info.id, healthy_shard_info.id};
+
+    const auto leader_missing_shard_id = missing_shard_info.id;
+    const auto healthy_shard_id = healthy_shard_info.id;
+
+    auto shard_blob_ids_map = put_blobs(pg_shard_id_vec, num_blobs_per_shard, pg_blob_id);
+    verify_get_blob(pg_shard_id_vec, num_blobs_per_shard);
+
+    g_helper->sync();
+
+    const auto hs_pg = _obj_inst->get_hs_pg(pg_id);
+    ASSERT_TRUE(hs_pg) << "PG should exist for pg_id=" << pg_id;
+
+    // Simulate leader losing the shard by removing it from the local B-tree index only.
+    // Followers still have the shard, so scrub should detect the leader as missing it.
+    run_on_pg_leader(pg_id, [&]() {
+        delete_shard_from_index(hs_pg->index_table_, leader_missing_shard_id);
+        LOGINFO("Deleted shard {} from leader index to simulate leader missing shard", leader_missing_shard_id);
+    });
+
+    g_helper->sync();
+
+    run_on_pg_leader(pg_id, [&]() {
+        const auto& leader_uuid = _obj_inst->our_uuid();
+        std::set< peer_id_t > follower_peer_ids;
+        for (const auto& member : hs_pg->pg_info_.members) {
+            if (member.id != leader_uuid) { follower_peer_ids.insert(member.id); }
+        }
+
+        // ===== Deep scrub =====
+        auto scrub_report = scrub_mgr->submit_scrub_task(pg_id, true, SCRUB_TRIGGER_TYPE::MANUALLY).get();
+        ASSERT_NE(scrub_report, nullptr) << "Deep scrub report should not be null";
+        auto deep_scrub_report = std::dynamic_pointer_cast< ScrubManager::DeepScrubReport >(scrub_report);
+        ASSERT_NE(deep_scrub_report, nullptr) << "Should be DeepScrubReport";
+
+        // The missing shard must appear in missing_shard_ids.
+        // missing_shard_ids[shard_id] = peers that HAVE the shard; leader absent from this set.
+        verify_missing_shards(deep_scrub_report, leader_uuid, leader_missing_shard_id);
+        {
+            const auto& missing_shards = deep_scrub_report->get_missing_shard_ids();
+            auto it = missing_shards.find(leader_missing_shard_id);
+            ASSERT_TRUE(it != missing_shards.end()) << "leader_missing_shard_id must be in missing_shard_ids";
+            for (const auto& peer_id : follower_peer_ids) {
+                EXPECT_TRUE(it->second.count(peer_id) == 1)
+                    << "Follower peer_id=" << peer_id << " should be in peer set (it has the shard)";
+            }
+            // Healthy shard must not appear as missing.
+            EXPECT_TRUE(missing_shards.find(healthy_shard_id) == missing_shards.end())
+                << "Healthy shard should not be reported as missing";
+        }
+
+        // Every blob in the missing shard must be reported as missing on the leader.
+        for (const auto& [blob_id, _] : shard_blob_ids_map[leader_missing_shard_id]) {
+            verify_missing_blobs(deep_scrub_report, leader_uuid, BlobRoute{leader_missing_shard_id, blob_id});
+        }
+
+        // Blobs from the healthy shard must not appear in missing_blobs.
+        {
+            const auto& missing_blobs = deep_scrub_report->get_missing_blobs();
+            for (const auto& [blob_id, _] : shard_blob_ids_map[healthy_shard_id]) {
+                EXPECT_TRUE(missing_blobs.find(BlobRoute{healthy_shard_id, blob_id}) == missing_blobs.end())
+                    << "Healthy blob " << blob_id << " should not be reported as missing";
+            }
+        }
+
+        // ===== Shallow scrub =====
+        auto shallow_scrub_report = scrub_mgr->submit_scrub_task(pg_id, false, SCRUB_TRIGGER_TYPE::MANUALLY).get();
+        ASSERT_NE(shallow_scrub_report, nullptr) << "Shallow scrub report should not be null";
+
+        // Missing shard must be detected in shallow scrub as well.
+        {
+            const auto& shallow_missing_shards = shallow_scrub_report->get_missing_shard_ids();
+            auto it = shallow_missing_shards.find(leader_missing_shard_id);
+            EXPECT_TRUE(it != shallow_missing_shards.end()) << "Missing shard should appear in shallow scrub report";
+            if (it != shallow_missing_shards.end()) {
+                EXPECT_TRUE(it->second.count(leader_uuid) == 0) << "Leader should not have the missing shard";
+                for (const auto& peer_id : follower_peer_ids) {
+                    EXPECT_TRUE(it->second.count(peer_id) == 1)
+                        << "Follower peer_id=" << peer_id << " should have the shard";
+                }
+            }
+        }
+
+        // Shallow scrub missing_blobs should contain exactly the blobs from the missing shard.
+        EXPECT_EQ(shallow_scrub_report->get_missing_blobs().size(), num_blobs_per_shard)
+            << "Missing blob count should equal num_blobs_per_shard";
+        {
+            const auto& shallow_missing_blobs = shallow_scrub_report->get_missing_blobs();
+            for (const auto& [blob_id, _] : shard_blob_ids_map[leader_missing_shard_id]) {
+                auto it = shallow_missing_blobs.find(BlobRoute{leader_missing_shard_id, blob_id});
+                EXPECT_TRUE(it != shallow_missing_blobs.end())
+                    << "Blob " << blob_id << " from missing shard should be in shallow scrub report";
+                if (it != shallow_missing_blobs.end()) {
+                    EXPECT_TRUE(it->second.count(leader_uuid) == 0) << "Leader should not have blob " << blob_id;
+                    for (const auto& peer_id : follower_peer_ids) {
+                        EXPECT_TRUE(it->second.count(peer_id) == 1)
+                            << "Follower peer_id=" << peer_id << " should have blob " << blob_id;
+                    }
+                }
+            }
+        }
+    });
+
+    g_helper->sync();
+}
+
+// Test scrub superblock persistence across deep and shallow scrubs
+TEST_F(HomeObjectFixture, ScrubSuperblockPersistenceTest) {
+    const pg_id_t pg_id = 1;
+    create_pg(pg_id);
+
+    const uint64_t shard_size = 64 * Mi;
+    create_shard(pg_id, shard_size, "shard_meta");
+    auto scrub_mgr = _obj_inst->scrub_manager();
+
+    run_on_pg_leader(pg_id, [&]() {
+        // Get initial scrub superblock (should be newly created)
+        auto initial_sb = scrub_mgr->get_scrub_superblk(pg_id);
+        ASSERT_TRUE(initial_sb.has_value()) << "Should have scrub superblock";
+
+        auto initial_deep_scrub_time = initial_sb->last_deep_scrub_timestamp;
+        auto initial_shallow_scrub_time = initial_sb->last_shallow_scrub_timestamp;
+
+        // Give some time to ensure timestamps will be different
+        std::this_thread::sleep_for(std::chrono::seconds(2));
+
+        // Run a deep scrub
+        scrub_mgr->submit_scrub_task(pg_id, true /* is_deep */, SCRUB_TRIGGER_TYPE::MANUALLY).get();
+
+        // Check that deep scrub timestamp updated
+        auto after_deep_sb = scrub_mgr->get_scrub_superblk(pg_id);
+        ASSERT_TRUE(after_deep_sb.has_value());
+        EXPECT_GT(after_deep_sb->last_deep_scrub_timestamp, initial_deep_scrub_time)
+            << "Deep scrub timestamp should be updated";
+        EXPECT_EQ(after_deep_sb->last_shallow_scrub_timestamp, initial_shallow_scrub_time)
+            << "Shallow scrub timestamp should not change after deep scrub";
+
+        std::this_thread::sleep_for(std::chrono::seconds(2));
+
+        // Run a shallow scrub
+        scrub_mgr->submit_scrub_task(pg_id, false /* is_deep */, SCRUB_TRIGGER_TYPE::MANUALLY).get();
+
+        // Check that shallow scrub timestamp updated
+        auto after_shallow_sb = scrub_mgr->get_scrub_superblk(pg_id);
+        ASSERT_TRUE(after_shallow_sb.has_value());
+        EXPECT_EQ(after_shallow_sb->last_deep_scrub_timestamp, after_deep_sb->last_deep_scrub_timestamp)
+            << "Deep scrub timestamp should not change after shallow scrub";
+        EXPECT_GT(after_shallow_sb->last_shallow_scrub_timestamp, after_deep_sb->last_shallow_scrub_timestamp)
+            << "Shallow scrub timestamp should be updated";
+    });
+
+    g_helper->sync();
+}
+
+// Test cancel scrub task
+TEST_F(HomeObjectFixture, CancelScrubTaskTest) {
+    const pg_id_t pg_id = 1;
+    create_pg(pg_id);
+    auto scrub_mgr = _obj_inst->scrub_manager();
+
+    const uint64_t shard_size = 64 * Mi;
+    auto shard_info = create_shard(pg_id, shard_size, "shard meta");
+
+    std::map< pg_id_t, std::vector< shard_id_t > > pg_shard_id_vec;
+    std::map< pg_id_t, blob_id_t > pg_blob_id;
+    pg_shard_id_vec[pg_id].push_back(shard_info.id);
+    pg_blob_id[pg_id] = 0;
+
+    const uint64_t num_blobs = 10;
+    put_blobs(pg_shard_id_vec, num_blobs, pg_blob_id);
+    g_helper->sync();
+
+    // Submit a scrub task and then cancel it
+    run_on_pg_leader(pg_id, [&]() {
+        auto scrub_future = scrub_mgr->submit_scrub_task(pg_id, true, SCRUB_TRIGGER_TYPE::MANUALLY);
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+        scrub_mgr->cancel_scrub_task(pg_id);
+        LOGINFO("Cancelled scrub task for pg={}", pg_id);
+        auto scrub_report = std::move(scrub_future).get();
+        LOGINFO("Scrub task cancelled, report: {}", scrub_report ? "present" : "null");
+
+        // The critical invariant: cancel must clear in_scrubbing so that a subsequent
+        // submit_scrub_task is accepted. A null return here means the state was not cleaned up.
+        auto followup_report = scrub_mgr->submit_scrub_task(pg_id, true, SCRUB_TRIGGER_TYPE::MANUALLY).get();
+        EXPECT_NE(followup_report, nullptr) << "A new scrub task should be accepted after cancellation; "
+                                               "null means in_scrubbing was not cleared";
+        scrub_mgr->cancel_scrub_task(pg_id);
+        LOGINFO("Cancel non-existent scrub task for pg={} - should not crash", pg_id);
+    });
+
+    g_helper->sync();
+}
+
+// Test concurrent scrubs on multiple PGs
+TEST_F(HomeObjectFixture, ConcurrentScrubsOnMultiplePGsTest) {
+    const uint64_t num_pgs = 3;
+    const uint64_t shard_size = 64 * Mi;
+
+    std::vector< pg_id_t > pg_ids;
+    std::map< pg_id_t, std::vector< shard_id_t > > pg_shard_id_vec;
+    std::map< pg_id_t, blob_id_t > pg_blob_id;
+
+    // Create multiple PGs with shards and blobs
+    for (uint64_t i = 1; i <= num_pgs; ++i) {
+        pg_id_t pg_id = i;
+        pg_ids.push_back(pg_id);
+        create_pg(pg_id);
+        auto shard_info = create_shard(pg_id, shard_size, "shard meta " + std::to_string(pg_id));
+        pg_shard_id_vec[pg_id].push_back(shard_info.id);
+        pg_blob_id[pg_id] = 0;
+        put_blobs(pg_shard_id_vec, 5, pg_blob_id);
+    }
+
+    auto scrub_mgr = _obj_inst->scrub_manager();
+
+    // Submit scrub tasks for all PGs concurrently
+    std::vector< std::pair< pg_id_t, folly::SemiFuture< std::shared_ptr< ScrubManager::ShallowScrubReport > > > >
+        scrub_futures;
+
+    for (const auto& pg_id : pg_ids) {
+        run_on_pg_leader(pg_id, [&]() {
+            auto future = scrub_mgr->submit_scrub_task(pg_id, true, SCRUB_TRIGGER_TYPE::MANUALLY);
+            scrub_futures.emplace_back(pg_id, std::move(future));
+            LOGINFO("Submitted deep scrub for pg={}", pg_id);
+        });
+    }
+
+    // Wait for all scrub tasks to complete and verify each report is clean
+    for (auto& [pg_id, future] : scrub_futures) {
+        auto report = std::move(future).get();
+        ASSERT_NE(report, nullptr) << "Scrub report should not be null for pg=" << pg_id;
+
+        auto deep_report = std::dynamic_pointer_cast< ScrubManager::DeepScrubReport >(report);
+        ASSERT_NE(deep_report, nullptr) << "Should be DeepScrubReport for pg=" << pg_id;
+
+        EXPECT_TRUE(deep_report->get_missing_shard_ids().empty()) << "pg=" << pg_id << " should have no missing shards";
+        EXPECT_TRUE(deep_report->get_missing_blobs().empty()) << "pg=" << pg_id << " should have no missing blobs";
+        EXPECT_TRUE(deep_report->get_corrupted_blobs().empty()) << "pg=" << pg_id << " should have no corrupted blobs";
+        EXPECT_TRUE(deep_report->get_inconsistent_blobs().empty())
+            << "pg=" << pg_id << " should have no inconsistent blobs";
+        LOGINFO("PG {} concurrent scrub completed cleanly", pg_id);
+    }
+
+    g_helper->sync();
+}
+
+// Test deleted blob filter in scrub report
+TEST_F(HomeObjectFixture, ReconcileScrubReportTest) {
+    const pg_id_t pg_id = 1;
+    create_pg(pg_id);
+    auto scrub_mgr = _obj_inst->scrub_manager();
+
+    const uint64_t shard_size = 64 * Mi;
+    auto shard_info = create_shard(pg_id, shard_size, "shard meta");
+
+    std::map< pg_id_t, std::vector< shard_id_t > > pg_shard_id_vec;
+    std::map< pg_id_t, blob_id_t > pg_blob_id;
+    pg_shard_id_vec[pg_id].push_back(shard_info.id);
+    pg_blob_id[pg_id] = 0;
+
+    std::map< shard_id_t, std::map< blob_id_t, uint64_t > > shard_blob_ids_map;
+
+    // Create some blobs
+    const uint64_t num_blobs = 10;
+    shard_blob_ids_map = put_blobs(pg_shard_id_vec, num_blobs, pg_blob_id);
+    const auto hs_pg = _obj_inst->get_hs_pg(pg_id);
+    ASSERT_TRUE(hs_pg) << "PG should exist for pg_id=" << pg_id;
+
+    const auto shard_id = shard_info.id;
+    auto& shard_blobs = shard_blob_ids_map[shard_id];
+
+    // Select blobs to test:
+    // - missing_blob_to_delete: will be missing from leader index AND deleted via blob delete
+    // - missing_blob_not_deleted: will be missing from leader index but NOT deleted
+    auto it = shard_blobs.begin();
+    const auto missing_blob_to_delete = it->first;       // First blob: will be deleted via blob delete
+    const auto missing_blob_not_deleted = (++it)->first; // Second blob: will NOT be deleted
+
+    // Delete both blobs from index table to simulate missing blobs on followers
+    run_on_pg_follower(pg_id, [&]() {
+        auto& pg_index_table = hs_pg->index_table_;
+        delete_blob_from_index(pg_index_table, shard_id, missing_blob_to_delete);
+        delete_blob_from_index(pg_index_table, shard_id, missing_blob_not_deleted);
+        LOGINFO("Deleted blobs {} and {} from follower index table", missing_blob_to_delete, missing_blob_not_deleted);
+    });
+
+    g_helper->sync();
+
+    run_on_pg_leader(pg_id, [&]() {
+        // only the blob that was deleted via blob delete should be filtered out, the other missing blob should be
+        // reported in the scrub report
+        std::set< peer_id_t > follower_peer_ids;
+        const auto& leader_uuid = _obj_inst->our_uuid();
+        const auto& members = (hs_pg->pg_info_).members;
+        for (const auto& member : members) {
+            if (member.id == leader_uuid) { continue; }
+            follower_peer_ids.insert(member.id);
+        }
+
+        auto scrub_report =
+            scrub_mgr->submit_scrub_task(pg_id, false /* shallow */, SCRUB_TRIGGER_TYPE::MANUALLY).get();
+
+        // missing_blobs[blob_route] = peers that have the blob; followers are absent from that set.
+        auto missing_blobs = scrub_report->get_missing_blobs();
+        EXPECT_TRUE(missing_blobs.size() == 2) << "There should be two missing blobs in scrub report";
+        for (const auto& blob_route :
+             {BlobRoute{shard_id, missing_blob_to_delete}, BlobRoute{shard_id, missing_blob_not_deleted}}) {
+            auto it = missing_blobs.find(blob_route);
+            ASSERT_TRUE(it != missing_blobs.end()) << "Missing blob should be reported in scrub report";
+            for (const auto& peer_id : follower_peer_ids) {
+                EXPECT_TRUE(it->second.count(peer_id) == 0)
+                    << "Follower peer_id=" << peer_id << " should not have the missing blob";
+            }
+        }
+
+#ifdef _PRERELEASE
+        set_callback_flip(
+            "delete_missing_blob_through_raft", std::function< void() >([this, missing_blob_to_delete, shard_id]() {
+                auto ret =
+                    _obj_inst->blob_manager()->del(shard_id, missing_blob_to_delete, generateRandomTraceId()).get();
+                if (!ret) {
+                    FAIL() << "Blob deletion via raft failed for shard=" << shard_id
+                           << " blob=" << missing_blob_to_delete << ", error=" << fmt::format("{}", ret.error());
+                } else {
+                    LOGINFO("Successfully deleted blob {} in shard {} via raft", missing_blob_to_delete, shard_id);
+                }
+                // wait until all the pending gc tasks for this pg are completed
+                std::this_thread::sleep_for(std::chrono::seconds(2));
+            }));
+
+        scrub_report = scrub_mgr->submit_scrub_task(pg_id, false /* shallow */, SCRUB_TRIGGER_TYPE::MANUALLY).get();
+
+        remove_flip("delete_missing_blob_through_raft");
+
+        // Verify the scrub report
+        ASSERT_NE(scrub_report, nullptr) << "Scrub report should not be null";
+
+        missing_blobs = scrub_report->get_missing_blobs();
+        EXPECT_TRUE(missing_blobs.size() == 1) << "There should be one missing blob in scrub report after deletion";
+        {
+            auto it = missing_blobs.find(BlobRoute{shard_id, missing_blob_not_deleted});
+            ASSERT_TRUE(it != missing_blobs.end())
+                << "The missing blob that was not deleted should be reported in scrub report";
+            for (const auto& peer_id : follower_peer_ids) {
+                EXPECT_TRUE(it->second.count(peer_id) == 0)
+                    << "Follower peer_id=" << peer_id << " should not have the missing blob";
+            }
+        }
+#endif
+    });
+
+    g_helper->sync();
+}
+
+// Test add and remove PG from scrub manager
+TEST_F(HomeObjectFixture, AddRemovePGScrubTest) {
+    const pg_id_t pg_id = 1;
+    const uint64_t shard_size = 64 * Mi;
+
+    // Create PG and verify scrub superblock is created
+    create_pg(pg_id);
+    create_shard(pg_id, shard_size, "shard meta");
+
+    auto scrub_mgr = _obj_inst->scrub_manager();
+
+    // Verify scrub superblock exists
+    run_on_pg_leader(pg_id, [&]() {
+        auto sb = scrub_mgr->get_scrub_superblk(pg_id);
+        ASSERT_TRUE(sb.has_value()) << "Scrub superblock should exist after PG creation";
+        LOGINFO("Scrub superblock created for pg={}", pg_id);
+    });
+
+    // Run a scrub to update timestamps
+    run_on_pg_leader(pg_id, [&]() {
+        // Get initial timestamp before scrub
+        auto sb_before = scrub_mgr->get_scrub_superblk(pg_id);
+        ASSERT_TRUE(sb_before.has_value()) << "Scrub superblock should exist before scrub";
+        uint64_t timestamp_before = sb_before->last_shallow_scrub_timestamp;
+        LOGINFO("Timestamp before scrub: {}", timestamp_before);
+
+        // Wait a bit to ensure timestamp will be different
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+
+        auto report = scrub_mgr->submit_scrub_task(pg_id, false, SCRUB_TRIGGER_TYPE::MANUALLY).get();
+        ASSERT_NE(report, nullptr) << "Scrub report should not be null";
+
+        // Verify timestamp was updated after scrub
+        auto sb_after = scrub_mgr->get_scrub_superblk(pg_id);
+        ASSERT_TRUE(sb_after.has_value()) << "Scrub superblock should exist after scrub";
+        uint64_t timestamp_after = sb_after->last_shallow_scrub_timestamp;
+        EXPECT_GT(timestamp_after, timestamp_before) << "Shallow scrub timestamp should be updated after scrub";
+        LOGINFO("Timestamp after scrub: {} (updated from {})", timestamp_after, timestamp_before);
+    });
+    g_helper->sync();
+
+    // Now delete the PG - this should cancel any running scrub and remove superblock
+    _obj_inst->pg_manager()->destroy_pg(pg_id);
+    auto report = scrub_mgr->submit_scrub_task(pg_id, false, SCRUB_TRIGGER_TYPE::MANUALLY).get();
+    ASSERT_EQ(report, nullptr) << "Scrub report should be null after PG deletion";
+    LOGINFO("Scrub task for deleted pg={} returned null report as expected", pg_id);
+
+    // Verify scrub superblock is cleaned up: remove_pg erases the entry from m_pg_scrub_sb_map,
+    // so get_scrub_superblk must return nullopt after the PG is fully deleted.
+    auto cleaned_sb = scrub_mgr->get_scrub_superblk(pg_id);
+    EXPECT_FALSE(cleaned_sb.has_value()) << "Scrub superblock should be removed from scrub manager after PG deletion";
+    LOGINFO("PG deleted, scrub superblock correctly cleaned up");
+}
+
+// Test local scrub methods
+TEST_F(HomeObjectFixture, LocalScrubMethodsTest) {
+    const pg_id_t pg_id = 1;
+    create_pg(pg_id);
+    auto scrub_mgr = _obj_inst->scrub_manager();
+
+    const uint64_t shard_size = 64 * Mi;
+    auto shard_info = create_shard(pg_id, shard_size, "shard meta");
+
+    std::map< pg_id_t, std::vector< shard_id_t > > pg_shard_id_vec;
+    std::map< pg_id_t, blob_id_t > pg_blob_id;
+    pg_shard_id_vec[pg_id].push_back(shard_info.id);
+    pg_blob_id[pg_id] = 0;
+
+    // Create blobs first
+    const uint64_t num_blobs = 10;
+    auto shard_blob_ids_map = put_blobs(pg_shard_id_vec, num_blobs, pg_blob_id);
+    LOGINFO("Created {} blobs for local scrub test", num_blobs);
+    const auto hs_pg = _obj_inst->get_hs_pg(pg_id);
+    ASSERT_TRUE(hs_pg) << "PG should exist for pg_id=" << pg_id;
+
+    const auto shard_id = shard_info.id;
+    auto& shard_blobs = shard_blob_ids_map[shard_id];
+
+    auto it = shard_blobs.begin();
+    const auto corrupted_blob_id = it->first;
+
+    // corrupt one blob's data in the index table to simulate a blob-level corruption that should be detected by local
+    // blob scrub.
+    auto& pg_index_table = hs_pg->index_table_;
+    corrupt_blob_data(pg_index_table, shard_id, corrupted_blob_id);
+
+    auto my_uuid = _obj_inst->our_uuid();
+    const int64_t scrub_lsn = hs_pg->repl_dev_->get_last_commit_lsn();
+
+    // Test local_scrub_meta: covers range [0, shard_id]
+    auto meta_req = std::make_shared< ScrubManager::scrub_req >(pg_id, 1, scrub_lsn, 0, 0, shard_id, UINT64_MAX,
+                                                                SCRUB_TYPE::META, my_uuid);
+    auto meta_result = scrub_mgr->local_scrub_meta(meta_req);
+    ASSERT_NE(meta_result, nullptr) << "local_scrub_meta should return a result";
+    LOGINFO("Meta scrub completed, {} entries", meta_result->entries.size());
+
+    // Test local_scrub_blob (shallow): all entries should carry ScrubStatus::NONE
+    auto shallow_req = std::make_shared< ScrubManager::scrub_req >(pg_id, 2, scrub_lsn, shard_id, 0, shard_id,
+                                                                   UINT64_MAX, SCRUB_TYPE::SHALLOW_BLOB, my_uuid);
+    auto shallow_result = scrub_mgr->local_scrub_blob(shallow_req);
+    ASSERT_NE(shallow_result, nullptr) << "local_scrub_blob (shallow) should return a result";
+    LOGINFO("Shallow blob scrub completed, {} entries", shallow_result->entries.size());
+    for (const auto& [route, val] : shallow_result->entries) {
+        auto* status = std::get_if< ScrubStatus >(&val);
+        ASSERT_TRUE(status != nullptr) << "Shallow entry should carry ScrubStatus";
+        EXPECT_EQ(*status, ScrubStatus::NONE) << "Shallow blob entry should be NONE";
+    }
+
+    // Test local_scrub_blob (deep): should detect the corrupted blob
+    auto deep_req = std::make_shared< ScrubManager::scrub_req >(pg_id, 3, scrub_lsn, shard_id, 0, shard_id, UINT64_MAX,
+                                                                SCRUB_TYPE::DEEP_BLOB, my_uuid);
+    auto deep_result = scrub_mgr->local_scrub_blob(deep_req);
+    ASSERT_NE(deep_result, nullptr) << "local_scrub_blob (deep) should return a result";
+    LOGINFO("Deep blob scrub completed, {} entries", deep_result->entries.size());
+
+    auto corrupted_it = deep_result->entries.find(BlobRoute{shard_id, corrupted_blob_id});
+    EXPECT_TRUE(corrupted_it != deep_result->entries.end()) << "Corrupted blob should appear in deep scrub result";
+    if (corrupted_it != deep_result->entries.end()) {
+        auto* status = std::get_if< ScrubStatus >(&corrupted_it->second);
+        ASSERT_TRUE(status != nullptr) << "Corrupted blob result should be ScrubStatus";
+        EXPECT_EQ(*status, ScrubStatus::MISMATCH) << "Corrupted blob should have MISMATCH status";
+        LOGINFO("Deep scrub correctly detected corrupted blob {}", corrupted_blob_id);
+    }
+}
+
+// Test scrub request serialization and deserialization
+TEST_F(HomeObjectFixture, ScrubRequestSerializationTest) {
+    const pg_id_t pg_id = 10;
+    auto my_uuid = _obj_inst->our_uuid();
+
+    // Test META scrub_req serialization
+    {
+        auto req = std::make_shared< ScrubManager::scrub_req >(pg_id, 1, 100, 0, 0, UINT64_MAX, UINT64_MAX,
+                                                               SCRUB_TYPE::META, my_uuid);
+        auto buffer = req->build_flat_buffer();
+        EXPECT_GT(buffer.size(), 0) << "Serialized buffer should not be empty";
+        auto req_loaded = std::make_shared< ScrubManager::scrub_req >();
+        bool load_success = req_loaded->load(buffer.data(), buffer.size());
+        EXPECT_TRUE(load_success) << "Deserialization should succeed";
+
+        EXPECT_EQ(req_loaded->pg_id, pg_id);
+        EXPECT_EQ(req_loaded->req_id, 1u);
+        EXPECT_EQ(req_loaded->scrub_lsn, 100);
+        EXPECT_EQ(req_loaded->start_shard_id, 0u);
+        EXPECT_EQ(req_loaded->end_shard_id, UINT64_MAX);
+        EXPECT_EQ(req_loaded->scrub_type, SCRUB_TYPE::META);
+        EXPECT_EQ(req_loaded->issuer_peer_id, my_uuid);
+    }
+
+    // Test DEEP_BLOB scrub_req serialization
+    {
+        auto req = std::make_shared< ScrubManager::scrub_req >(pg_id, 2, 200, 100, 0, 200, UINT64_MAX,
+                                                               SCRUB_TYPE::DEEP_BLOB, my_uuid);
+        auto buffer = req->build_flat_buffer();
+        EXPECT_GT(buffer.size(), 0);
+        auto req_loaded = std::make_shared< ScrubManager::scrub_req >();
+        bool load_success = req_loaded->load(buffer.data(), buffer.size());
+        EXPECT_TRUE(load_success);
+
+        EXPECT_EQ(req_loaded->pg_id, pg_id);
+        EXPECT_EQ(req_loaded->req_id, 2u);
+        EXPECT_EQ(req_loaded->scrub_lsn, 200);
+        EXPECT_EQ(req_loaded->start_shard_id, 100u);
+        EXPECT_EQ(req_loaded->end_shard_id, 200u);
+        EXPECT_EQ(req_loaded->scrub_type, SCRUB_TYPE::DEEP_BLOB);
+    }
+
+    // Test SHALLOW_BLOB scrub_req serialization
+    {
+        auto req = std::make_shared< ScrubManager::scrub_req >(pg_id, 3, 300, 0, 0, 100, UINT64_MAX,
+                                                               SCRUB_TYPE::SHALLOW_BLOB, my_uuid);
+        auto buffer = req->build_flat_buffer();
+        EXPECT_GT(buffer.size(), 0);
+        auto req_loaded = std::make_shared< ScrubManager::scrub_req >();
+        bool load_success = req_loaded->load(buffer.data(), buffer.size());
+        EXPECT_TRUE(load_success);
+
+        EXPECT_EQ(req_loaded->pg_id, pg_id);
+        EXPECT_EQ(req_loaded->req_id, 3u);
+        EXPECT_EQ(req_loaded->scrub_lsn, 300);
+        EXPECT_EQ(req_loaded->start_shard_id, 0u);
+        EXPECT_EQ(req_loaded->end_shard_id, 100u);
+        EXPECT_EQ(req_loaded->scrub_type, SCRUB_TYPE::SHALLOW_BLOB);
+    }
+}
+
+// Test scrub_result serialization and deserialization.
+// scrub_result carries three distinct entry kinds that follow different wire/load paths:
+//   - uint64_t hash  : written as (status=NONE, hash=value); loaded back as uint64_t
+//   - ScrubStatus::NONE : written as (status=NONE, hash=0);   loaded back as uint64_t(0)
+//   - non-NONE ScrubStatus: written as (status=X, hash=0);    loaded back as ScrubStatus
+TEST_F(HomeObjectFixture, ScrubResultSerializationTest) {
+    const auto my_uuid = _obj_inst->our_uuid();
+    const uint64_t req_id = 99;
+
+    // ---- Case 1: hash entries (deep blob scrub) ----
+    {
+        auto result = std::make_shared< ScrubManager::scrub_result >(req_id, my_uuid);
+        result->add_entry({10, 1, uint64_t{0xDEADBEEFCAFEBABEULL}});
+        result->add_entry({10, 2, uint64_t{0x123456789ABCDEF0ULL}});
+
+        auto buf = result->build_flat_buffer();
+        EXPECT_GT(buf.size(), 0u);
+
+        auto loaded = std::make_shared< ScrubManager::scrub_result >();
+        EXPECT_TRUE(loaded->load(buf.data(), buf.size()));
+
+        EXPECT_EQ(loaded->req_id, req_id);
+        EXPECT_EQ(loaded->issuer_peer_id, my_uuid);
+        EXPECT_EQ(loaded->entries.size(), 2u);
+
+        auto it1 = loaded->entries.find(BlobRoute{10, 1});
+        ASSERT_NE(it1, loaded->entries.end());
+        auto* h1 = std::get_if< uint64_t >(&it1->second);
+        ASSERT_NE(h1, nullptr) << "Hash entry should deserialize as uint64_t";
+        EXPECT_EQ(*h1, 0xDEADBEEFCAFEBABEULL);
+
+        auto it2 = loaded->entries.find(BlobRoute{10, 2});
+        ASSERT_NE(it2, loaded->entries.end());
+        auto* h2 = std::get_if< uint64_t >(&it2->second);
+        ASSERT_NE(h2, nullptr);
+        EXPECT_EQ(*h2, 0x123456789ABCDEF0ULL);
+    }
+
+    // ---- Case 2: ScrubStatus::NONE (shallow blob scrub existence entries) ----
+    // On the wire NONE maps to (status=NONE, hash=0) and loads back as uint64_t(0).
+    {
+        auto result = std::make_shared< ScrubManager::scrub_result >(req_id + 1, my_uuid);
+        result->add_entry({20, 1, ScrubStatus::NONE});
+        result->add_entry({20, 2, ScrubStatus::NONE});
+
+        auto buf = result->build_flat_buffer();
+        auto loaded = std::make_shared< ScrubManager::scrub_result >();
+        EXPECT_TRUE(loaded->load(buf.data(), buf.size()));
+
+        EXPECT_EQ(loaded->entries.size(), 2u);
+        for (const BlobRoute& route : {BlobRoute{20, 1}, BlobRoute{20, 2}}) {
+            auto it = loaded->entries.find(route);
+            ASSERT_NE(it, loaded->entries.end());
+            auto* h = std::get_if< uint64_t >(&it->second);
+            ASSERT_NE(h, nullptr) << "NONE entry should deserialize as uint64_t(0)";
+            EXPECT_EQ(*h, 0u);
+        }
+    }
+
+    // ---- Case 3: error status entries (IO_ERROR, MISMATCH) ----
+    {
+        auto result = std::make_shared< ScrubManager::scrub_result >(req_id + 2, my_uuid);
+        result->add_entry({30, 1, ScrubStatus::IO_ERROR});
+        result->add_entry({30, 2, ScrubStatus::MISMATCH});
+
+        auto buf = result->build_flat_buffer();
+        auto loaded = std::make_shared< ScrubManager::scrub_result >();
+        EXPECT_TRUE(loaded->load(buf.data(), buf.size()));
+
+        EXPECT_EQ(loaded->entries.size(), 2u);
+
+        auto it1 = loaded->entries.find(BlobRoute{30, 1});
+        ASSERT_NE(it1, loaded->entries.end());
+        auto* s1 = std::get_if< ScrubStatus >(&it1->second);
+        ASSERT_NE(s1, nullptr) << "IO_ERROR entry should deserialize as ScrubStatus";
+        EXPECT_EQ(*s1, ScrubStatus::IO_ERROR);
+
+        auto it2 = loaded->entries.find(BlobRoute{30, 2});
+        ASSERT_NE(it2, loaded->entries.end());
+        auto* s2 = std::get_if< ScrubStatus >(&it2->second);
+        ASSERT_NE(s2, nullptr) << "MISMATCH entry should deserialize as ScrubStatus";
+        EXPECT_EQ(*s2, ScrubStatus::MISMATCH);
+    }
+
+    // ---- Case 4: empty result ----
+    {
+        auto result = std::make_shared< ScrubManager::scrub_result >(req_id + 3, my_uuid);
+        auto buf = result->build_flat_buffer();
+        auto loaded = std::make_shared< ScrubManager::scrub_result >();
+        EXPECT_TRUE(loaded->load(buf.data(), buf.size()));
+        EXPECT_EQ(loaded->req_id, req_id + 3);
+        EXPECT_TRUE(loaded->entries.empty()) << "Empty result should round-trip with no entries";
+    }
+}
diff --git a/src/lib/homestore_backend/tests/test_mpmc_priority_queue.cpp b/src/lib/homestore_backend/tests/test_mpmc_priority_queue.cpp
new file mode 100644
index 000000000..c90ad7d1b
--- /dev/null
+++ b/src/lib/homestore_backend/tests/test_mpmc_priority_queue.cpp
@@ -0,0 +1,413 @@
+#include <gtest/gtest.h>
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <thread>
+#include <vector>
+
+#include "../MPMCPriorityQueue.hpp"
+
+using namespace homeobject;
+using namespace std::chrono_literals;
+
+// ============================================================================
+// Basic Functionality Tests
+// ============================================================================
+
+TEST(MPMCPriorityQueueTest, BasicPushPop) {
+    MPMCPriorityQueue< int > queue;
+
+    // Push elements
+    queue.push(5);
+    queue.push(2);
+    queue.push(8);
+    queue.push(1);
+
+    EXPECT_EQ(queue.size(), 4);
+    EXPECT_FALSE(queue.empty());
+
+    // Pop in priority order (max heap by default)
+    auto r1 = queue.pop();
+    EXPECT_TRUE(r1.is_ok());
+    EXPECT_EQ(r1.value.value(), 8);
+
+    auto r2 = queue.pop();
+    EXPECT_TRUE(r2.is_ok());
+    EXPECT_EQ(r2.value.value(), 5);
+
+    auto r3 = queue.pop();
+    EXPECT_TRUE(r3.is_ok());
+    EXPECT_EQ(r3.value.value(), 2);
+
+    auto r4 = queue.pop();
+    EXPECT_TRUE(r4.is_ok());
+    EXPECT_EQ(r4.value.value(), 1);
+
+    EXPECT_EQ(queue.size(), 0);
+    EXPECT_TRUE(queue.empty());
+}
+
+TEST(MPMCPriorityQueueTest, CustomComparator) {
+    // Min-heap using std::greater
+    MPMCPriorityQueue< int, std::greater< int > > queue;
+
+    queue.push(5);
+    queue.push(2);
+    queue.push(8);
+    queue.push(1);
+
+    // Pop in ascending order
+    EXPECT_EQ(queue.pop().value.value(), 1);
+    EXPECT_EQ(queue.pop().value.value(), 2);
+    EXPECT_EQ(queue.pop().value.value(), 5);
+    EXPECT_EQ(queue.pop().value.value(), 8);
+}
+
+TEST(MPMCPriorityQueueTest, MoveSemantics) {
+    struct MoveOnly {
+        int value;
+
+        explicit MoveOnly(int v) : value(v) {}
+        MoveOnly(const MoveOnly&) = delete;
+        MoveOnly& operator=(const MoveOnly&) = delete;
+        MoveOnly(MoveOnly&&) = default;
+        MoveOnly& operator=(MoveOnly&&) = default;
+
+        bool operator<(const MoveOnly& other) const { return value < other.value; }
+    };
+
+    MPMCPriorityQueue< MoveOnly > queue;
+
+    queue.push(MoveOnly(5));
+    queue.push(MoveOnly(2));
+    queue.push(MoveOnly(8));
+
+    EXPECT_EQ(queue.pop().value.value().value, 8);
+    EXPECT_EQ(queue.pop().value.value().value, 5);
+    EXPECT_EQ(queue.pop().value.value().value, 2);
+}
+
+// ============================================================================
+// Close Operation Tests
+// ============================================================================
+
+TEST(MPMCPriorityQueueTest, Close) {
+    MPMCPriorityQueue< int > queue;
+
+    queue.push(1);
+    queue.push(2);
+    queue.push(3);
+
+    EXPECT_FALSE(queue.is_closed());
+    queue.close();
+    EXPECT_TRUE(queue.is_closed());
+
+    // Can still pop existing elements
+    EXPECT_EQ(queue.pop().value.value(), 3);
+    EXPECT_EQ(queue.pop().value.value(), 2);
+    EXPECT_EQ(queue.pop().value.value(), 1);
+
+    // Now should return Closed status
+    auto result = queue.pop();
+    EXPECT_TRUE(result.is_closed());
+    EXPECT_FALSE(result.value.has_value());
+}
+
+TEST(MPMCPriorityQueueTest, PushAfterClose) {
+    MPMCPriorityQueue< int > queue;
+
+    queue.push(1);
+    queue.close();
+
+    // Pushes after close are ignored
+    queue.push(2);
+    queue.push(3);
+
+    EXPECT_EQ(queue.size(), 1);
+
+    auto r1 = queue.pop();
+    EXPECT_TRUE(r1.is_ok());
+    EXPECT_EQ(r1.value.value(), 1);
+
+    auto r2 = queue.pop();
+    EXPECT_TRUE(r2.is_closed());
+}
+
+TEST(MPMCPriorityQueueTest, CloseIdempotent) {
+    MPMCPriorityQueue< int > queue;
+
+    queue.push(1);
+    queue.close();
+    queue.close(); // Should be safe
+    queue.close();
+
+    EXPECT_TRUE(queue.is_closed());
+    EXPECT_EQ(queue.size(), 1);
+}
+
+// ============================================================================
+// Blocking Behavior Tests
+// ============================================================================
+
+TEST(MPMCPriorityQueueTest, BlockingPop) {
+    MPMCPriorityQueue< int > queue;
+    std::atomic< bool > pop_started{false};
+    std::atomic< bool > pop_completed{false};
+
+    // Consumer thread that will block
+    std::thread consumer([&]() {
+        pop_started = true;
+        auto result = queue.pop();
+        pop_completed = true;
+
+        EXPECT_TRUE(result.is_ok());
+        EXPECT_EQ(result.value.value(), 42);
+    });
+
+    // Wait for consumer to start
+    while (!pop_started) {
+        std::this_thread::yield();
+    }
+
+    std::this_thread::sleep_for(50ms);
+    EXPECT_FALSE(pop_completed);
+
+    // Unblock consumer by pushing
+    queue.push(42);
+
+    consumer.join();
+    EXPECT_TRUE(pop_completed);
+}
+
+TEST(MPMCPriorityQueueTest, CloseUnblocksWaiters) {
+    MPMCPriorityQueue< int > queue;
+    std::atomic< int > closed_count{0};
+
+    // Start multiple waiting consumers
+    std::vector< std::thread > consumers;
+    for (int i = 0; i < 5; ++i) {
+        consumers.emplace_back([&]() {
+            auto result = queue.pop();
+            if (result.is_closed()) { closed_count.fetch_add(1, std::memory_order_relaxed); }
+        });
+    }
+
+    std::this_thread::sleep_for(100ms);
+
+    // Close should wake all waiters
+    queue.close();
+
+    for (auto& t : consumers) {
+        t.join();
+    }
+
+    EXPECT_EQ(closed_count.load(), 5);
+}
+
+// ============================================================================
+// Multi-threaded Producer Tests
+// ============================================================================
+
+TEST(MPMCPriorityQueueTest, MultipleProducers) {
+    MPMCPriorityQueue< int > queue;
+    constexpr int num_producers = 4;
+    constexpr int items_per_producer = 250;
+
+    std::vector< std::thread > producers;
+    for (int i = 0; i < num_producers; ++i) {
+        producers.emplace_back([&, i]() {
+            for (int j = 0; j < items_per_producer; ++j) {
+                queue.push(i * items_per_producer + j);
+            }
+        });
+    }
+
+    for (auto& t : producers) {
+        t.join();
+    }
+
+    EXPECT_EQ(queue.size(), num_producers * items_per_producer);
+
+    // Verify all elements come out in descending order
+    std::vector< int > popped;
+    for (int i = 0; i < num_producers * items_per_producer; ++i) {
+        auto result = queue.pop();
+        ASSERT_TRUE(result.is_ok());
+        popped.push_back(result.value.value());
+    }
+
+    EXPECT_TRUE(std::is_sorted(popped.rbegin(), popped.rend()));
+}
+
+// ============================================================================
+// Multi-threaded Consumer Tests
+// ============================================================================
+
+TEST(MPMCPriorityQueueTest, MultipleConsumers) {
+    MPMCPriorityQueue< int > queue;
+    constexpr int num_items = 1000;
+
+    // Fill queue
+    for (int i = 0; i < num_items; ++i) {
+        queue.push(i);
+    }
+
+    constexpr int num_consumers = 4;
+    std::vector< std::thread > consumers;
+    std::atomic< int > total_consumed{0};
+
+    for (int i = 0; i < num_consumers; ++i) {
+        consumers.emplace_back([&]() {
+            int count = 0;
+            while (true) {
+                auto result = queue.pop();
+                if (result.is_closed()) { break; }
+                ++count;
+            }
+            total_consumed.fetch_add(count, std::memory_order_relaxed);
+        });
+    }
+
+    // Give consumers time to start
+    std::this_thread::sleep_for(50ms);
+
+    // Close to signal completion
+    queue.close();
+
+    for (auto& t : consumers) {
+        t.join();
+    }
+
+    EXPECT_EQ(total_consumed.load(), num_items);
+}
+
+// ============================================================================
+// Concurrent Producers and Consumers
+// ============================================================================
+
+TEST(MPMCPriorityQueueTest, ConcurrentProducersConsumers) {
+    MPMCPriorityQueue< int > queue;
+    constexpr int num_producers = 3;
+    constexpr int num_consumers = 3;
+    constexpr int items_per_producer = 200;
+
+    std::atomic< int > total_consumed{0};
+    std::vector< std::thread > threads;
+
+    // Start consumers
+    for (int i = 0; i < num_consumers; ++i) {
+        threads.emplace_back([&]() {
+            int count = 0;
+            while (true) {
+                auto result = queue.pop();
+                if (result.is_closed()) { break; }
+                ++count;
+            }
+            total_consumed.fetch_add(count, std::memory_order_relaxed);
+        });
+    }
+
+    // Start producers
+    for (int i = 0; i < num_producers; ++i) {
+        threads.emplace_back([&, i]() {
+            for (int j = 0; j < items_per_producer; ++j) {
+                queue.push(i * items_per_producer + j);
+                std::this_thread::sleep_for(10us); // Simulate work
+            }
+        });
+    }
+
+    // Wait for producers
+    for (int i = num_consumers; i < num_consumers + num_producers; ++i) {
+        threads[i].join();
+    }
+
+    // Close and wait for consumers
+    queue.close();
+    for (int i = 0; i < num_consumers; ++i) {
+        threads[i].join();
+    }
+
+    EXPECT_EQ(total_consumed.load(), num_producers * items_per_producer);
+}
+
+// ============================================================================
+// Stress Test
+// ============================================================================
+
+TEST(MPMCPriorityQueueTest, StressTest) {
+    MPMCPriorityQueue< int > queue;
+    constexpr int num_threads = 8;
+    constexpr int operations_per_thread = 1000;
+
+    std::atomic< int > push_count{0};
+    std::atomic< int > pop_count{0};
+    std::vector< std::thread > threads;
+
+    // Half producers, half consumers
+    for (int i = 0; i < num_threads / 2; ++i) {
+        threads.emplace_back([&]() {
+            for (int j = 0; j < operations_per_thread; ++j) {
+                queue.push(j);
+                push_count.fetch_add(1, std::memory_order_relaxed);
+            }
+        });
+    }
+
+    for (int i = 0; i < num_threads / 2; ++i) {
+        threads.emplace_back([&]() {
+            for (int j = 0; j < operations_per_thread; ++j) {
+                auto result = queue.pop();
+                if (result.is_ok()) { pop_count.fetch_add(1, std::memory_order_relaxed); }
+            }
+        });
+    }
+
+    for (auto& t : threads) {
+        t.join();
+    }
+
+    EXPECT_EQ(push_count.load(), (num_threads / 2) * operations_per_thread);
+
+    // Pop remaining elements
+    while (!queue.empty()) {
+        auto result = queue.pop();
+        if (result.is_ok()) { pop_count.fetch_add(1, std::memory_order_relaxed); }
+    }
+
+    EXPECT_EQ(pop_count.load(), push_count.load());
+}
+
+// ============================================================================
+// Destructor Test
+// ============================================================================
+
+TEST(MPMCPriorityQueueTest, DestructorClosesQueue) {
+    std::atomic< bool > consumer_unblocked{false};
+
+    std::thread consumer([&]() {
+        auto queue = std::make_unique< MPMCPriorityQueue< int > >();
+        queue->push(1);
+
+        std::thread waiter([&, q = queue.get()]() {
+            auto first_result = q->pop(); // Pop the 1
+            (void)first_result;           // Explicitly ignore the result
+            auto result = q->pop();       // This will block until destructor closes queue
+            if (result.is_closed()) { consumer_unblocked = true; }
+        });
+
+        std::this_thread::sleep_for(100ms);
+        // Destructor will be called here
+        queue.reset();
+
+        waiter.join();
+    });
+
+    consumer.join();
+    EXPECT_TRUE(consumer_unblocked);
+}
+
+int main(int argc, char** argv) {
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}