diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock
index 330e6591856..5902c1024be 100644
--- a/quickwit/Cargo.lock
+++ b/quickwit/Cargo.lock
@@ -8551,6 +8551,29 @@ dependencies = [
  "tracing-opentelemetry",
 ]
 
+[[package]]
+name = "quickwit-compaction"
+version = "0.8.0"
+dependencies = [
+ "anyhow",
+ "async-trait",
+ "itertools 0.14.0",
+ "quickwit-actors",
+ "quickwit-common",
+ "quickwit-config",
+ "quickwit-doc-mapper",
+ "quickwit-indexing",
+ "quickwit-metastore",
+ "quickwit-metrics",
+ "quickwit-proto",
+ "quickwit-storage",
+ "serde_json",
+ "time",
+ "tokio",
+ "tracing",
+ "ulid",
+]
+
 [[package]]
 name = "quickwit-config"
 version = "0.8.0"
@@ -8956,6 +8979,7 @@ dependencies = [
  "mockall",
  "quickwit-actors",
  "quickwit-common",
+ "quickwit-compaction",
  "quickwit-config",
  "quickwit-doc-mapper",
  "quickwit-index-management",
@@ -9330,6 +9354,7 @@ dependencies = [
  "quickwit-actors",
  "quickwit-cluster",
  "quickwit-common",
+ "quickwit-compaction",
  "quickwit-config",
  "quickwit-control-plane",
  "quickwit-datafusion",
diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml
index 74b7f6069b9..e42092ed7ae 100644
--- a/quickwit/Cargo.toml
+++ b/quickwit/Cargo.toml
@@ -14,6 +14,7 @@ members = [
   "quickwit-datetime",
   "quickwit-df-core",
   "quickwit-directories",
+  "quickwit-compaction",
   "quickwit-doc-mapper",
   "quickwit-dst",
   "quickwit-index-management",
@@ -60,6 +61,7 @@ default-members = [
   "quickwit-control-plane",
   "quickwit-datetime",
   "quickwit-directories",
+  "quickwit-compaction",
   "quickwit-doc-mapper",
   "quickwit-index-management",
   "quickwit-indexing",
@@ -242,6 +244,7 @@ sea-query = { version = "0.32" }
 sea-query-binder = { version = "0.7", features = [
   "runtime-tokio-rustls",
   "sqlx-postgres",
+  "postgres-array",
 ] }
 # ^1.0.184 due to serde-rs/serde#2538
 serde = { version = "1.0.228", features = ["derive", "rc"] }
@@ -372,6 +375,7 @@ quickwit-cluster = { path = "quickwit-cluster" }
 quickwit-codegen = { path = "quickwit-codegen" }
 quickwit-codegen-example = { path = "quickwit-codegen/example" }
 quickwit-common = { path = "quickwit-common" }
+quickwit-compaction = { path = "quickwit-compaction" }
 quickwit-config = { path = "quickwit-config" }
 quickwit-control-plane = { path = "quickwit-control-plane" }
 quickwit-datafusion = { path = "quickwit-datafusion" }
diff --git a/quickwit/quickwit-cli/src/tool.rs b/quickwit/quickwit-cli/src/tool.rs
index 0fd6b1917ca..88bb53887bf 100644
--- a/quickwit/quickwit-cli/src/tool.rs
+++ b/quickwit/quickwit-cli/src/tool.rs
@@ -17,6 +17,7 @@ use std::io::{IsTerminal, Stdout, Write, stdout};
 use std::num::NonZeroUsize;
 use std::path::PathBuf;
 use std::str::FromStr;
+use std::sync::Arc;
 use std::time::{Duration, Instant};
 use std::{env, fmt, io};
 
@@ -37,11 +38,11 @@ use quickwit_config::{
     TransformConfig, VecSourceParams,
 };
 use quickwit_index_management::{IndexService, clear_cache_directory};
-use quickwit_indexing::BoxedPipelineHandle;
 use quickwit_indexing::actors::{IndexingService, MergePipeline, MergeSchedulerService};
 use quickwit_indexing::models::{
     DetachIndexingPipeline, DetachMergePipeline, IndexingStatistics, SpawnPipeline,
 };
+use quickwit_indexing::{BoxedPipelineHandle, IndexingSplitCache};
 use quickwit_ingest::IngesterPool;
 use quickwit_metastore::IndexMetadataResponseExt;
 use quickwit_proto::indexing::CpuCapacity;
@@ -55,12 +56,12 @@ use quickwit_serve::{
 };
 use quickwit_storage::{BundleStorage, Storage};
 use thousands::Separable;
-use tracing::{debug, info};
+use tracing::debug;
 
 use crate::checklist::{GREEN_COLOR, RED_COLOR};
 use crate::{
-    THROUGHPUT_WINDOW_SIZE, config_cli_arg, get_resolvers, load_node_config, run_index_checklist,
-    start_actor_runtimes,
+    THROUGHPUT_WINDOW_SIZE, config_cli_arg, get_resolvers, info, load_node_config,
+    run_index_checklist, start_actor_runtimes,
 };
 
 pub fn build_tool_command() -> Command {
@@ -221,8 +222,8 @@ pub enum ToolCliCommand {
     GarbageCollect(GarbageCollectIndexArgs),
     LocalIngest(LocalIngestDocsArgs),
     LocalSearch(LocalSearchArgs),
-    Merge(MergeArgs),
     ExtractSplit(ExtractSplitArgs),
+    Merge(MergeArgs),
 }
 
 impl ToolCliCommand {
@@ -448,6 +449,8 @@ pub async fn local_ingest_docs_cli(args: LocalIngestDocsArgs) -> anyhow::Result<
     )?;
     let universe = Universe::new();
     let merge_scheduler_service_mailbox = universe.get_or_spawn_one();
+    let split_cache =
+        Arc::new(IndexingSplitCache::from_config(&indexer_config, &config.data_dir_path).await?);
     let indexing_server = IndexingService::new(
         config.node_id.clone(),
         config.data_dir_path.clone(),
@@ -456,10 +459,11 @@ pub async fn local_ingest_docs_cli(args: LocalIngestDocsArgs) -> anyhow::Result<
         cluster,
         metastore,
         None,
-        merge_scheduler_service_mailbox,
+        Some(merge_scheduler_service_mailbox),
         IngesterPool::default(),
         storage_resolver,
         EventBroker::default(),
+        split_cache,
     )
     .await?;
     let (indexing_server_mailbox, indexing_server_handle) =
@@ -493,11 +497,13 @@ pub async fn local_ingest_docs_cli(args: LocalIngestDocsArgs) -> anyhow::Result<
     let statistics =
         start_statistics_reporting_loop(indexing_pipeline_handle, args.input_path_opt.is_none())
             .await?;
+
     merge_pipeline_handle
         .mailbox()
         .ask(quickwit_indexing::FinishPendingMergesAndShutdownPipeline)
         .await?;
     merge_pipeline_handle.join().await;
+
     // Shutdown the indexing server.
     universe
         .send_exit_with_success(&indexing_server_mailbox)
@@ -593,10 +599,11 @@ pub async fn merge_cli(args: MergeArgs) -> anyhow::Result<()> {
         cluster,
         metastore,
         None,
-        merge_scheduler_service,
+        Some(merge_scheduler_service),
         IngesterPool::default(),
         storage_resolver,
         EventBroker::default(),
+        Arc::new(IndexingSplitCache::no_caching()),
     )
     .await?;
     let (indexing_service_mailbox, indexing_service_handle) =
diff --git a/quickwit/quickwit-compaction/Cargo.toml b/quickwit/quickwit-compaction/Cargo.toml
new file mode 100644
index 00000000000..caf7fde7e1b
--- /dev/null
+++ b/quickwit/quickwit-compaction/Cargo.toml
@@ -0,0 +1,37 @@
+[package]
+name = "quickwit-compaction"
+description = "Compactor implementation and CompactionService"
+
+version.workspace = true
+edition.workspace = true
+homepage.workspace = true
+documentation.workspace = true
+repository.workspace = true
+authors.workspace = true
+license.workspace = true
+
+[dependencies]
+anyhow = { workspace = true }
+async-trait = { workspace = true }
+quickwit-actors = { workspace = true }
+quickwit-common = { workspace = true }
+quickwit-config = { workspace = true }
+quickwit-doc-mapper = { workspace = true }
+quickwit-indexing = { workspace = true }
+quickwit-metastore = { workspace = true }
+quickwit-metrics = { workspace = true }
+quickwit-proto = { workspace = true }
+quickwit-storage = { workspace = true }
+serde_json = { workspace = true }
+time = { workspace = true }
+tracing = { workspace = true }
+tokio = { workspace = true }
+ulid = { workspace = true }
+itertools = "0.14.0"
+
+[dev-dependencies]
+quickwit-actors = { workspace = true, features = ["testsuite"] }
+quickwit-doc-mapper = { workspace = true, features = ["testsuite"] }
+quickwit-metastore = { workspace = true, features = ["testsuite"] }
+quickwit-proto = { workspace = true, features = ["testsuite"] }
+quickwit-storage = { workspace = true, features = ["testsuite"] }
diff --git a/quickwit/quickwit-compaction/src/compaction_pipeline.rs b/quickwit/quickwit-compaction/src/compaction_pipeline.rs
new file mode 100644
index 00000000000..9fc2b261967
--- /dev/null
+++ b/quickwit/quickwit-compaction/src/compaction_pipeline.rs
@@ -0,0 +1,469 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+use std::time::Instant;
+
+use quickwit_actors::{ActorHandle, HEARTBEAT, Health, QueueCapacity, SpawnContext, Supervisable};
+use quickwit_common::KillSwitch;
+use quickwit_common::io::{IoControls, Limiter};
+use quickwit_common::pubsub::EventBroker;
+use quickwit_common::temp_dir::TempDirectory;
+use quickwit_config::RetentionPolicy;
+use quickwit_doc_mapper::DocMapper;
+use quickwit_indexing::actors::{
+    MergeExecutor, MergeSplitDownloader, Packager, Publisher, Uploader, UploaderType,
+};
+use quickwit_indexing::merge_policy::MergeOperation;
+use quickwit_indexing::{IndexingSplitStore, SplitsUpdateMailbox};
+use quickwit_metrics::{counter, gauge, histogram, label_values};
+use quickwit_proto::indexing::MergePipelineId;
+use quickwit_proto::metastore::MetastoreServiceClient;
+use quickwit_proto::types::{IndexUid, SourceId, SplitId};
+use tokio::sync::Semaphore;
+use tracing::{error, info};
+
+use crate::metrics::{
+    COMPACTION_DURATION, COMPACTIONS_FAILED, COMPACTIONS_IN_PROGRESS, COMPACTIONS_SUCCEEDED,
+    SOURCE_UID_MERGE_LEVEL,
+};
+use crate::{TaskId, source_uid_metrics_label};
+
+#[derive(Clone, Debug, PartialEq)]
+pub enum PipelineStatus {
+    InProgress,
+    Completed,
+    Failed { error: String },
+}
+
+impl PipelineStatus {
+    pub fn is_terminal(&self) -> bool {
+        matches!(
+            self,
+            PipelineStatus::Completed | PipelineStatus::Failed { .. }
+        )
+    }
+}
+
+pub struct PipelineStatusUpdate {
+    pub task_id: TaskId,
+    pub index_uid: IndexUid,
+    pub source_id: SourceId,
+    pub split_ids: Vec<SplitId>,
+    pub status: PipelineStatus,
+}
+
+struct CompactionPipelineHandles {
+    merge_split_downloader: ActorHandle<MergeSplitDownloader>,
+    merge_executor: ActorHandle<MergeExecutor>,
+    merge_packager: ActorHandle<Packager>,
+    merge_uploader: ActorHandle<Uploader>,
+    merge_publisher: ActorHandle<Publisher>,
+    next_check_for_progress: Instant,
+}
+
+impl CompactionPipelineHandles {
+    /// Returns true once per HEARTBEAT interval. Between heartbeats we only
+    /// check whether actors are alive, not whether they are making progress.
+    /// This gives CPU-intensive actors (like the packager) up to 30s to
+    /// complete before being declared unhealthy.
+    fn should_check_for_progress(&mut self) -> bool {
+        let now = Instant::now();
+        let check_for_progress = now > self.next_check_for_progress;
+        if check_for_progress {
+            self.next_check_for_progress = now + *HEARTBEAT;
+        }
+        check_for_progress
+    }
+}
+
+/// A single-use compaction pipeline. Processes one merge task and terminates.
+///
+/// Owned by the `CompactorSupervisor`, which periodically calls
+/// `check_actor_health()` to collect status updates. The pipeline manages
+/// its own retry logic internally.
+pub struct CompactionPipeline {
+    task_id: TaskId,
+    merge_operation: MergeOperation,
+    pipeline_id: MergePipelineId,
+    status: PipelineStatus,
+    kill_switch: KillSwitch,
+    scratch_directory: TempDirectory,
+    handles: Option<CompactionPipelineHandles>,
+    doc_mapper: Arc<DocMapper>,
+    merge_policy: Arc<dyn quickwit_indexing::merge_policy::MergePolicy>,
+    retention_policy: Option<RetentionPolicy>,
+    metastore: MetastoreServiceClient,
+    split_store: IndexingSplitStore,
+    io_throughput_limiter: Option<Limiter>,
+    max_concurrent_split_uploads: usize,
+    event_broker: EventBroker,
+    merge_execution_semaphore: Arc<Semaphore>,
+    pipeline_start: Option<Instant>,
+}
+
+impl CompactionPipeline {
+    #[allow(clippy::too_many_arguments)]
+    pub fn new(
+        task_id: TaskId,
+        scratch_directory: TempDirectory,
+        merge_operation: MergeOperation,
+        pipeline_id: MergePipelineId,
+        doc_mapper: Arc<DocMapper>,
+        merge_policy: Arc<dyn quickwit_indexing::merge_policy::MergePolicy>,
+        retention_policy: Option<RetentionPolicy>,
+        metastore: MetastoreServiceClient,
+        split_store: IndexingSplitStore,
+        io_throughput_limiter: Option<Limiter>,
+        max_concurrent_split_uploads: usize,
+        event_broker: EventBroker,
+        merge_execution_semaphore: Arc<Semaphore>,
+    ) -> Self {
+        CompactionPipeline {
+            task_id,
+            status: PipelineStatus::InProgress,
+            kill_switch: KillSwitch::default(),
+            scratch_directory,
+            handles: None,
+            merge_operation,
+            pipeline_id,
+            doc_mapper,
+            merge_policy,
+            retention_policy,
+            metastore,
+            split_store,
+            io_throughput_limiter,
+            max_concurrent_split_uploads,
+            event_broker,
+            merge_execution_semaphore,
+            pipeline_start: None,
+        }
+    }
+
+    pub fn status(&self) -> &PipelineStatus {
+        &self.status
+    }
+
+    fn supervisables(&self) -> Vec<&dyn Supervisable> {
+        let Some(handles) = &self.handles else {
+            return Vec::new();
+        };
+        vec![
+            &handles.merge_split_downloader,
+            &handles.merge_executor,
+            &handles.merge_packager,
+            &handles.merge_uploader,
+            &handles.merge_publisher,
+        ]
+    }
+
+    /// Returns pipeline status update by checking the health of individual pipeline actors.
+    ///
+    /// If the pipeline is already completed or failed (terminal status), returns the status
+    /// without re-checking actors. The pipeline sits in this "completed" state (finished or failed)
+    /// until the supervisor cleans up the spot in favor of a new pipeline. This is done to
+    /// ensure that the planner acks the success/failure.
+    pub fn pipeline_status_update(&mut self) -> PipelineStatusUpdate {
+        self.update_status();
+        self.build_status_update()
+    }
+
+    fn update_status(&mut self) {
+        // Pipeline is finished but yet to be cleaned up.
+        if matches!(
+            self.status,
+            PipelineStatus::Completed | PipelineStatus::Failed { .. }
+        ) {
+            return;
+        }
+        let Some(handles) = self.handles.as_mut() else {
+            return;
+        };
+
+        // We check whether actors are alive on every tick (1s), but only
+        // check whether they are making progress once per HEARTBEAT (30s).
+        // This gives CPU-intensive actors like the packager time to finish
+        // without being falsely declared unhealthy.
+        let check_for_progress = handles.should_check_for_progress();
+
+        let mut has_healthy = false;
+        let mut failure_actor_names: Vec<String> = Vec::new();
+
+        for supervisable in self.supervisables() {
+            match supervisable.check_health(check_for_progress) {
+                Health::Healthy => {
+                    has_healthy = true;
+                }
+                Health::FailureOrUnhealthy => {
+                    failure_actor_names.push(supervisable.name().to_string());
+                }
+                Health::Success => {}
+            }
+        }
+
+        if !failure_actor_names.is_empty() {
+            self.record_pipeline_duration();
+            let error_msg = format!("failed actors: {:?}", failure_actor_names);
+            error!(task_id=%self.task_id, "{error_msg}");
+            self.status = PipelineStatus::Failed { error: error_msg };
+            self.record_terminal_metrics(false);
+            return;
+        }
+        if !has_healthy {
+            self.record_pipeline_duration();
+            info!(task_id=%self.task_id, "all compaction pipeline actors completed");
+            self.status = PipelineStatus::Completed;
+            self.record_terminal_metrics(true);
+        }
+    }
+
+    /// Fires once when the pipeline transitions from InProgress to a terminal state.
+    /// Pairs with the `COMPACTIONS_IN_PROGRESS.inc()` in `CompactorSupervisor::spawn_task`.
+    fn record_terminal_metrics(&self, succeeded: bool) {
+        let merge_level = self.merge_operation.merge_level().to_string();
+        let index_label =
+            source_uid_metrics_label(&self.pipeline_id.index_uid, &self.pipeline_id.source_id);
+        let labels = label_values!(SOURCE_UID_MERGE_LEVEL => index_label, merge_level);
+        gauge!(parent: COMPACTIONS_IN_PROGRESS, labels: [labels.clone()]).dec();
+        if succeeded {
+            counter!(parent: COMPACTIONS_SUCCEEDED, labels: [labels]).inc();
+        } else {
+            counter!(parent: COMPACTIONS_FAILED, labels: [labels]).inc();
+        }
+    }
+
+    fn record_pipeline_duration(&self) {
+        if let Some(pipeline_start_time) = self.pipeline_start {
+            let elapsed = pipeline_start_time.elapsed().as_secs_f64();
+            let merge_level = self.merge_operation.merge_level().to_string();
+            let index_label =
+                source_uid_metrics_label(&self.pipeline_id.index_uid, &self.pipeline_id.source_id);
+            let labels = label_values!(SOURCE_UID_MERGE_LEVEL => index_label, merge_level);
+            histogram!(parent: COMPACTION_DURATION, labels: [labels]).observe(elapsed);
+        }
+    }
+
+    fn build_status_update(&self) -> PipelineStatusUpdate {
+        PipelineStatusUpdate {
+            task_id: self.task_id.clone(),
+            index_uid: self.pipeline_id.index_uid.clone(),
+            source_id: self.pipeline_id.source_id.clone(),
+            split_ids: self
+                .merge_operation
+                .splits_as_slice()
+                .iter()
+                .map(|split| split.split_id().to_string())
+                .collect(),
+            status: self.status.clone(),
+        }
+    }
+
+    /// Spawns the 5-actor merge execution chain and sends the `MergeOperation`
+    /// to the downloader to kick off execution.
+    pub(crate) fn spawn_pipeline(&mut self, spawn_ctx: &SpawnContext) -> anyhow::Result<()> {
+        info!(
+            task_id=%self.task_id,
+            pipeline_id=%self.pipeline_id,
+            "spawning compaction pipeline"
+        );
+
+        // Publisher (no merge planner feedback, no source)
+        let merge_publisher = Publisher::new(
+            quickwit_indexing::MERGE_PUBLISHER_NAME,
+            QueueCapacity::Unbounded,
+            self.metastore.clone(),
+            None,
+            None,
+        );
+        let (merge_publisher_mailbox, merge_publisher_handle) = spawn_ctx
+            .spawn_builder()
+            .set_kill_switch(self.kill_switch.child())
+            .spawn(merge_publisher);
+
+        // Uploader
+        let merge_uploader = Uploader::new(
+            UploaderType::MergeUploader,
+            self.metastore.clone(),
+            self.merge_policy.clone(),
+            self.retention_policy.clone(),
+            self.split_store.clone(),
+            SplitsUpdateMailbox::from(merge_publisher_mailbox),
+            self.max_concurrent_split_uploads,
+            self.event_broker.clone(),
+        );
+        let (merge_uploader_mailbox, merge_uploader_handle) = spawn_ctx
+            .spawn_builder()
+            .set_kill_switch(self.kill_switch.child())
+            .spawn(merge_uploader);
+
+        // Packager
+        let tag_fields = self.doc_mapper.tag_named_fields()?;
+        let merge_packager = Packager::new("MergePackager", tag_fields, merge_uploader_mailbox);
+        let (merge_packager_mailbox, merge_packager_handle) = spawn_ctx
+            .spawn_builder()
+            .set_kill_switch(self.kill_switch.child())
+            .spawn(merge_packager);
+
+        // MergeExecutor
+        let split_downloader_io_controls = IoControls::default()
+            .set_throughput_limiter_opt(self.io_throughput_limiter.clone())
+            .set_component("split_downloader_merge");
+        let merge_executor_io_controls =
+            split_downloader_io_controls.clone().set_component("merger");
+
+        let merge_executor = MergeExecutor::new(
+            self.pipeline_id.clone(),
+            self.metastore.clone(),
+            self.doc_mapper.clone(),
+            merge_executor_io_controls,
+            merge_packager_mailbox,
+            Some(self.merge_execution_semaphore.clone()),
+        );
+        let (merge_executor_mailbox, merge_executor_handle) = spawn_ctx
+            .spawn_builder()
+            .set_kill_switch(self.kill_switch.child())
+            .spawn(merge_executor);
+
+        // MergeSplitDownloader
+        let merge_split_downloader = MergeSplitDownloader {
+            scratch_directory: self.scratch_directory.clone(),
+            split_store: self.split_store.clone(),
+            executor_mailbox: merge_executor_mailbox,
+            io_controls: split_downloader_io_controls,
+        };
+        let (merge_split_downloader_mailbox, merge_split_downloader_handle) = spawn_ctx
+            .spawn_builder()
+            .set_kill_switch(self.kill_switch.child())
+            .spawn(merge_split_downloader);
+
+        let now = Instant::now();
+        self.pipeline_start = Some(now);
+        // Kick off the pipeline.
+        merge_split_downloader_mailbox
+            .try_send_message(self.merge_operation.clone())
+            .map_err(|err| {
+                anyhow::anyhow!("failed to send merge operation to downloader: {err:?}")
+            })?;
+
+        self.handles = Some(CompactionPipelineHandles {
+            merge_split_downloader: merge_split_downloader_handle,
+            merge_executor: merge_executor_handle,
+            merge_packager: merge_packager_handle,
+            merge_uploader: merge_uploader_handle,
+            merge_publisher: merge_publisher_handle,
+            next_check_for_progress: Instant::now() + *HEARTBEAT,
+        });
+
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+pub(crate) mod tests {
+    use std::sync::Arc;
+
+    use quickwit_actors::Universe;
+    use quickwit_common::pubsub::EventBroker;
+    use quickwit_common::temp_dir::TempDirectory;
+    use quickwit_doc_mapper::default_doc_mapper_for_test;
+    use quickwit_indexing::IndexingSplitStore;
+    use quickwit_indexing::merge_policy::{MergeOperation, default_merge_policy};
+    use quickwit_metastore::SplitMetadata;
+    use quickwit_proto::indexing::MergePipelineId;
+    use quickwit_proto::metastore::{MetastoreServiceClient, MockMetastoreService};
+    use quickwit_proto::types::{IndexUid, NodeId};
+    use quickwit_storage::RamStorage;
+    use tokio::sync::Semaphore;
+
+    use super::{CompactionPipeline, PipelineStatus};
+
+    pub fn test_pipeline(task_id: &str, split_ids: &[&str]) -> CompactionPipeline {
+        let storage = Arc::new(RamStorage::default());
+        let split_store = IndexingSplitStore::create_without_local_store_for_test(storage);
+        let metastore = MetastoreServiceClient::from_mock(MockMetastoreService::new());
+        let splits: Vec<SplitMetadata> = split_ids
+            .iter()
+            .map(|id| SplitMetadata::for_test(id.to_string()))
+            .collect();
+        let merge_operation = MergeOperation::new_merge_operation(splits);
+        let pipeline_id = MergePipelineId {
+            node_id: NodeId::from("test-node"),
+            index_uid: IndexUid::for_test("test-index", 0),
+            source_id: "test-source".to_string(),
+        };
+        CompactionPipeline::new(
+            task_id.to_string(),
+            TempDirectory::for_test(),
+            merge_operation,
+            pipeline_id,
+            Arc::new(default_doc_mapper_for_test()),
+            default_merge_policy(),
+            None,
+            metastore,
+            split_store,
+            None,
+            2,
+            EventBroker::default(),
+            Arc::new(Semaphore::new(2)),
+        )
+    }
+
+    #[tokio::test]
+    async fn test_spawn_pipeline_creates_handles() {
+        let universe = Universe::new();
+        let mut pipeline = test_pipeline("task-1", &["split-1", "split-2"]);
+        assert!(pipeline.handles.is_none());
+        pipeline.spawn_pipeline(universe.spawn_ctx()).unwrap();
+        assert!(pipeline.handles.is_some());
+        universe.assert_quit().await;
+    }
+
+    #[tokio::test]
+    async fn test_status_update_unspawned_pipeline() {
+        let mut pipeline = test_pipeline("task-1", &["split-1"]);
+        let update = pipeline.pipeline_status_update();
+        assert_eq!(update.status, PipelineStatus::InProgress);
+        assert_eq!(update.task_id, "task-1");
+        assert_eq!(update.split_ids, vec!["split-1"]);
+        assert_eq!(update.source_id, "test-source");
+        assert_eq!(update.index_uid, IndexUid::for_test("test-index", 0));
+    }
+
+    #[tokio::test]
+    async fn test_status_update_healthy_pipeline() {
+        let universe = Universe::new();
+        let mut pipeline = test_pipeline("task-1", &["split-1"]);
+        pipeline.spawn_pipeline(universe.spawn_ctx()).unwrap();
+        let update = pipeline.pipeline_status_update();
+        assert_eq!(update.status, PipelineStatus::InProgress);
+        universe.assert_quit().await;
+    }
+
+    #[tokio::test]
+    async fn test_killed_pipeline_fails() {
+        let universe = Universe::new();
+        let mut pipeline = test_pipeline("task-1", &["split-1"]);
+        pipeline.spawn_pipeline(universe.spawn_ctx()).unwrap();
+
+        pipeline.kill_switch.kill();
+        tokio::task::yield_now().await;
+        let update = pipeline.pipeline_status_update();
+        assert!(matches!(update.status, PipelineStatus::Failed { .. }));
+
+        // Calling again still returns Failed (sticky).
+        let update = pipeline.pipeline_status_update();
+        assert!(matches!(update.status, PipelineStatus::Failed { .. }));
+        universe.assert_quit().await;
+    }
+}
diff --git a/quickwit/quickwit-compaction/src/compactor_supervisor.rs b/quickwit/quickwit-compaction/src/compactor_supervisor.rs
new file mode 100644
index 00000000000..128770b02f5
--- /dev/null
+++ b/quickwit/quickwit-compaction/src/compactor_supervisor.rs
@@ -0,0 +1,653 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::str::FromStr;
+use std::sync::Arc;
+use std::time::Duration;
+
+use async_trait::async_trait;
+use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler, SpawnContext};
+use quickwit_common::io::{self, Limiter};
+use quickwit_common::pubsub::EventBroker;
+use quickwit_common::temp_dir::TempDirectory;
+use quickwit_common::uri::Uri;
+use quickwit_config::{
+    CompactorConfig, IndexingSettings, RetentionPolicy, SearchSettings, build_doc_mapper,
+};
+use quickwit_doc_mapper::DocMapping;
+use quickwit_indexing::merge_policy::{MergeOperation, merge_policy_from_settings};
+use quickwit_indexing::{IndexingSplitCache, IndexingSplitStore};
+use quickwit_metastore::SplitMetadata;
+use quickwit_metrics::{gauge, label_values};
+use quickwit_proto::compaction::{
+    CompactionFailure, CompactionInProgress, CompactionPlannerService,
+    CompactionPlannerServiceClient, CompactionSuccess, MergeTaskAssignment, ReportStatusRequest,
+};
+use quickwit_proto::indexing::MergePipelineId;
+use quickwit_proto::metastore::MetastoreServiceClient;
+use quickwit_proto::types::NodeId;
+use quickwit_storage::StorageResolver;
+use tokio::sync::Semaphore;
+use tracing::{error, info};
+
+use crate::compaction_pipeline::{CompactionPipeline, PipelineStatus, PipelineStatusUpdate};
+use crate::metrics::{AVAILABLE_SLOTS, COMPACTIONS_IN_PROGRESS, SOURCE_UID_MERGE_LEVEL};
+use crate::source_uid_metrics_label;
+
+const CHECK_PIPELINE_STATUSES_INTERVAL: Duration = Duration::from_secs(1);
+
+#[derive(Debug)]
+struct CheckPipelineStatuses;
+
+/// Manages a pool of `CompactionPipeline`s, each executing a single merge task.
+///
+/// Periodically collects pipeline status updates and forwards them to the
+/// compaction planner. Pipelines manage their own retry logic internally.
+pub struct CompactorSupervisor {
+    node_id: NodeId,
+    planner_client: CompactionPlannerServiceClient,
+    pipelines: Vec<Option<CompactionPipeline>>,
+    // Bounds the number of pipelines running Tantivy merge step concurrently. There are 2x as many
+    // pipeline slots as permits, so half the pipelines can be doing IO while the other half hold a
+    // permit.
+    merge_execution_semaphore: Arc<Semaphore>,
+    // Shared resources distributed to pipelines when spawning actor chains.
+    io_throughput_limiter: Option<Limiter>,
+    metastore: MetastoreServiceClient,
+    storage_resolver: StorageResolver,
+    split_cache: Arc<IndexingSplitCache>,
+    max_concurrent_split_uploads: usize,
+    event_broker: EventBroker,
+    // Scratch directory root (<data_dir>/compaction/).
+    compaction_root_directory: TempDirectory,
+}
+
+impl CompactorSupervisor {
+    #[allow(clippy::too_many_arguments)]
+    pub fn new(
+        node_id: NodeId,
+        planner_client: CompactionPlannerServiceClient,
+        compactor_config: &CompactorConfig,
+        metastore: MetastoreServiceClient,
+        storage_resolver: StorageResolver,
+        split_cache: Arc<IndexingSplitCache>,
+        event_broker: EventBroker,
+        compaction_root_directory: TempDirectory,
+    ) -> Self {
+        let &CompactorConfig {
+            max_concurrent_merge_executions,
+            pipeline_slots_per_merge_execution,
+            max_concurrent_split_uploads,
+            max_merge_write_throughput,
+        } = compactor_config;
+        let num_pipeline_slots =
+            max_concurrent_merge_executions.get() * pipeline_slots_per_merge_execution.get();
+        let pipelines = (0..num_pipeline_slots).map(|_| None).collect();
+        let merge_execution_semaphore =
+            Arc::new(Semaphore::new(max_concurrent_merge_executions.get()));
+        let io_throughput_limiter = max_merge_write_throughput.map(io::limiter);
+        CompactorSupervisor {
+            node_id,
+            planner_client,
+            pipelines,
+            merge_execution_semaphore,
+            io_throughput_limiter,
+            metastore,
+            storage_resolver,
+            split_cache,
+            max_concurrent_split_uploads,
+            event_broker,
+            compaction_root_directory,
+        }
+    }
+
+    fn check_pipeline_statuses(&mut self) -> Vec<PipelineStatusUpdate> {
+        let mut statuses = Vec::new();
+        for slot in &mut self.pipelines {
+            let Some(pipeline) = slot else {
+                continue;
+            };
+            statuses.push(pipeline.pipeline_status_update());
+        }
+        statuses
+    }
+
+    async fn process_new_tasks(
+        &mut self,
+        assignments: Vec<MergeTaskAssignment>,
+        spawn_ctx: &SpawnContext,
+    ) {
+        // The planner has acknowledged the statuses we just sent — drop any
+        // Completed/Failed pipelines so their scratch directories and actor
+        // handles are released.
+        for slot in &mut self.pipelines {
+            slot.take_if(|p| p.status().is_terminal());
+        }
+        for assignment in assignments {
+            let task_id = assignment.task_id.clone();
+            if let Err(error) = self.spawn_task(assignment, spawn_ctx).await {
+                error!(%task_id, %error, "failed to spawn compaction task");
+            }
+        }
+        let available_slots = self
+            .pipelines
+            .iter()
+            .filter(|pipeline_opt| pipeline_opt.is_none())
+            .count();
+        AVAILABLE_SLOTS.set(available_slots as f64);
+    }
+
+    async fn spawn_task(
+        &mut self,
+        assignment: MergeTaskAssignment,
+        spawn_ctx: &SpawnContext,
+    ) -> anyhow::Result<()> {
+        let source_uid_label = source_uid_metrics_label(
+            assignment.index_uid.as_ref().unwrap(),
+            &assignment.source_id,
+        );
+        let merge_level = assignment.merge_level.to_string();
+        let labels = label_values!(SOURCE_UID_MERGE_LEVEL => source_uid_label, merge_level);
+
+        let slot_idx = self
+            .pipelines
+            .iter()
+            .position(Option::is_none)
+            .ok_or_else(|| anyhow::anyhow!("no free pipeline slot"))?;
+        let scratch_directory = self
+            .compaction_root_directory
+            .named_temp_child(&assignment.task_id)?;
+        let mut pipeline = self
+            .build_compaction_pipeline(assignment, scratch_directory)
+            .await?;
+        pipeline.spawn_pipeline(spawn_ctx)?;
+        self.pipelines[slot_idx] = Some(pipeline);
+        gauge!(parent: COMPACTIONS_IN_PROGRESS, labels: [labels]).inc();
+        Ok(())
+    }
+
+    async fn build_compaction_pipeline(
+        &self,
+        assignment: MergeTaskAssignment,
+        scratch_directory: TempDirectory,
+    ) -> anyhow::Result<CompactionPipeline> {
+        let splits: Vec<SplitMetadata> = assignment
+            .splits_metadata_json
+            .iter()
+            .map(|json| serde_json::from_str(json))
+            .collect::<Result<Vec<SplitMetadata>, serde_json::Error>>()?;
+        let doc_mapping: DocMapping = serde_json::from_str(&assignment.doc_mapping_json)?;
+        let search_settings: SearchSettings =
+            serde_json::from_str(&assignment.search_settings_json)?;
+        let indexing_settings: IndexingSettings =
+            serde_json::from_str(&assignment.indexing_settings_json)?;
+        let retention_policy: Option<RetentionPolicy> =
+            if assignment.retention_policy_json.is_empty() {
+                None
+            } else {
+                Some(serde_json::from_str(&assignment.retention_policy_json)?)
+            };
+        let index_uid = assignment
+            .index_uid
+            .ok_or_else(|| anyhow::anyhow!("missing index_uid in MergeTaskAssignment"))?;
+
+        let index_storage_uri = Uri::from_str(&assignment.index_storage_uri)?;
+        let index_storage = self.storage_resolver.resolve(&index_storage_uri).await?;
+        let split_store = IndexingSplitStore::new(index_storage, self.split_cache.clone());
+
+        let doc_mapper = build_doc_mapper(&doc_mapping, &search_settings)?;
+        let merge_policy = merge_policy_from_settings(&indexing_settings);
+        let merge_operation = MergeOperation::new_merge_operation(splits);
+        let pipeline_id = MergePipelineId {
+            node_id: self.node_id.clone(),
+            index_uid,
+            source_id: assignment.source_id,
+        };
+
+        Ok(CompactionPipeline::new(
+            assignment.task_id,
+            scratch_directory,
+            merge_operation,
+            pipeline_id,
+            doc_mapper,
+            merge_policy,
+            retention_policy,
+            self.metastore.clone(),
+            split_store,
+            self.io_throughput_limiter.clone(),
+            self.max_concurrent_split_uploads,
+            self.event_broker.clone(),
+            self.merge_execution_semaphore.clone(),
+        ))
+    }
+
+    fn build_report_status_request(
+        &self,
+        statuses: &[PipelineStatusUpdate],
+    ) -> ReportStatusRequest {
+        let in_progress_count = statuses
+            .iter()
+            .filter(|s| matches!(s.status, PipelineStatus::InProgress))
+            .count();
+        let available_slots = (self.pipelines.len() - in_progress_count) as i64;
+
+        let mut in_progress = Vec::new();
+        let mut successes = Vec::new();
+        let mut failures = Vec::new();
+
+        for update in statuses {
+            match &update.status {
+                PipelineStatus::InProgress => {
+                    in_progress.push(CompactionInProgress {
+                        task_id: update.task_id.clone(),
+                        index_uid: Some(update.index_uid.clone()),
+                        source_id: update.source_id.clone(),
+                        split_ids: update.split_ids.clone(),
+                    });
+                }
+                PipelineStatus::Completed => {
+                    successes.push(CompactionSuccess {
+                        task_id: update.task_id.clone(),
+                    });
+                }
+                PipelineStatus::Failed { error } => {
+                    failures.push(CompactionFailure {
+                        task_id: update.task_id.clone(),
+                        error_message: error.clone(),
+                    });
+                }
+            }
+        }
+
+        ReportStatusRequest {
+            node_id: self.node_id.to_string(),
+            available_slots: available_slots as u32,
+            in_progress,
+            successes,
+            failures,
+        }
+    }
+}
+
+#[async_trait]
+impl Actor for CompactorSupervisor {
+    type ObservableState = ();
+
+    fn name(&self) -> String {
+        "CompactorSupervisor".to_string()
+    }
+
+    fn observable_state(&self) -> Self::ObservableState {}
+
+    async fn initialize(&mut self, ctx: &ActorContext<Self>) -> Result<(), ActorExitStatus> {
+        info!(
+            num_pipeline_slots=%self.pipelines.len(),
+            "compactor supervisor started"
+        );
+        ctx.schedule_self_msg(CHECK_PIPELINE_STATUSES_INTERVAL, CheckPipelineStatuses);
+        Ok(())
+    }
+}
+
+#[async_trait]
+impl Handler<CheckPipelineStatuses> for CompactorSupervisor {
+    type Reply = ();
+
+    async fn handle(
+        &mut self,
+        _msg: CheckPipelineStatuses,
+        ctx: &ActorContext<Self>,
+    ) -> Result<(), ActorExitStatus> {
+        let statuses = self.check_pipeline_statuses();
+        let request = self.build_report_status_request(&statuses);
+        match ctx
+            .protect_future(self.planner_client.report_status(request))
+            .await
+        {
+            Ok(response) => {
+                ctx.protect_future(self.process_new_tasks(response.new_tasks, ctx.spawn_ctx()))
+                    .await;
+            }
+            Err(error) => {
+                error!(%error, "failed to report status to compaction planner");
+            }
+        }
+        ctx.schedule_self_msg(CHECK_PIPELINE_STATUSES_INTERVAL, CheckPipelineStatuses);
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::num::NonZeroUsize;
+
+    use quickwit_actors::Universe;
+    use quickwit_common::temp_dir::TempDirectory;
+    use quickwit_proto::compaction::{
+        CompactionPlannerServiceClient, MockCompactionPlannerService,
+    };
+    use quickwit_proto::metastore::{MetastoreServiceClient, MockMetastoreService};
+    use quickwit_proto::types::NodeId;
+    use quickwit_storage::StorageResolver;
+
+    use super::*;
+    use crate::compaction_pipeline::tests::test_pipeline;
+
+    /// Builds a test supervisor with `max_concurrent_merge_executions` permits
+    /// and `2 * max_concurrent_merge_executions` pipeline slots.
+    fn test_supervisor(max_concurrent_merge_executions: usize) -> CompactorSupervisor {
+        let metastore = MetastoreServiceClient::from_mock(MockMetastoreService::new());
+        let compaction_client =
+            CompactionPlannerServiceClient::from_mock(MockCompactionPlannerService::new());
+        let compactor_config = CompactorConfig {
+            max_concurrent_merge_executions: NonZeroUsize::new(max_concurrent_merge_executions)
+                .expect("max_concurrent_merge_executions must be non-zero"),
+            ..CompactorConfig::for_test()
+        };
+        CompactorSupervisor::new(
+            NodeId::from("test-node"),
+            compaction_client,
+            &compactor_config,
+            metastore,
+            StorageResolver::for_test(),
+            Arc::new(IndexingSplitCache::no_caching()),
+            EventBroker::default(),
+            TempDirectory::for_test(),
+        )
+    }
+
+    #[test]
+    fn test_check_pipeline_statuses_empty_slots() {
+        let mut supervisor = test_supervisor(4);
+        let statuses = supervisor.check_pipeline_statuses();
+        assert!(statuses.is_empty());
+    }
+
+    #[tokio::test]
+    async fn test_check_pipeline_statuses_with_pipelines() {
+        let universe = Universe::new();
+        let mut supervisor = test_supervisor(4);
+
+        let mut pipeline = test_pipeline("task-1", &["split-a", "split-b"]);
+        pipeline.spawn_pipeline(universe.spawn_ctx()).unwrap();
+        supervisor.pipelines[0] = Some(pipeline);
+
+        let mut pipeline = test_pipeline("task-2", &["split-c"]);
+        pipeline.spawn_pipeline(universe.spawn_ctx()).unwrap();
+        supervisor.pipelines[2] = Some(pipeline);
+
+        let statuses = supervisor.check_pipeline_statuses();
+        assert_eq!(statuses.len(), 2);
+        assert_eq!(statuses[0].task_id, "task-1");
+        assert_eq!(statuses[0].split_ids, vec!["split-a", "split-b"]);
+        assert_eq!(statuses[1].task_id, "task-2");
+        assert_eq!(statuses[1].split_ids, vec!["split-c"]);
+        universe.assert_quit().await;
+    }
+
+    #[tokio::test]
+    async fn test_end_to_end_statuses_to_proto() {
+        let universe = Universe::new();
+        // 3 merge-executions → 6 pipeline slots
+        let mut supervisor = test_supervisor(3);
+
+        let mut pipeline = test_pipeline("task-1", &["s1", "s2"]);
+        pipeline.spawn_pipeline(universe.spawn_ctx()).unwrap();
+        supervisor.pipelines[0] = Some(pipeline);
+
+        let statuses = supervisor.check_pipeline_statuses();
+        let request = supervisor.build_report_status_request(&statuses);
+
+        assert_eq!(request.node_id, "test-node");
+        // 6 slots, 1 in-progress = 5 available
+        assert_eq!(request.available_slots, 5);
+        assert_eq!(request.in_progress.len(), 1);
+        assert_eq!(request.in_progress[0].task_id, "task-1");
+        assert_eq!(request.in_progress[0].split_ids, vec!["s1", "s2"]);
+        assert!(request.successes.is_empty());
+        assert!(request.failures.is_empty());
+        universe.assert_quit().await;
+    }
+
+    #[test]
+    fn test_build_report_status_request_empty() {
+        // 4 merge-executions → 8 pipeline slots
+        let supervisor = test_supervisor(4);
+        let request = supervisor.build_report_status_request(&[]);
+        assert_eq!(request.node_id, "test-node");
+        assert_eq!(request.available_slots, 8);
+        assert!(request.in_progress.is_empty());
+        assert!(request.successes.is_empty());
+        assert!(request.failures.is_empty());
+    }
+
+    fn test_assignment(task_id: &str, split_ids: &[&str]) -> MergeTaskAssignment {
+        let index_metadata =
+            quickwit_metastore::IndexMetadata::for_test("test-index", "ram:///test-index");
+        let config = &index_metadata.index_config;
+        let splits: Vec<SplitMetadata> = split_ids
+            .iter()
+            .map(|id| SplitMetadata::for_test(id.to_string()))
+            .collect();
+        MergeTaskAssignment {
+            task_id: task_id.to_string(),
+            splits_metadata_json: splits
+                .iter()
+                .map(|s| serde_json::to_string(s).unwrap())
+                .collect(),
+            doc_mapping_json: serde_json::to_string(&config.doc_mapping).unwrap(),
+            search_settings_json: serde_json::to_string(&config.search_settings).unwrap(),
+            indexing_settings_json: serde_json::to_string(&config.indexing_settings).unwrap(),
+            retention_policy_json: String::new(),
+            index_uid: Some(index_metadata.index_uid.clone()),
+            source_id: "test-source".to_string(),
+            index_storage_uri: config.index_uri.to_string(),
+            merge_level: 1,
+        }
+    }
+
+    #[tokio::test]
+    async fn test_build_compaction_pipeline_deserialization_errors() {
+        let supervisor = test_supervisor(4);
+        let scratch = TempDirectory::for_test;
+
+        // Bad splits JSON.
+        let mut assignment = test_assignment("t", &["s1"]);
+        assignment.splits_metadata_json = vec!["not json".to_string()];
+        assert!(
+            supervisor
+                .build_compaction_pipeline(assignment, scratch())
+                .await
+                .is_err()
+        );
+
+        // Bad doc mapping JSON.
+        let mut assignment = test_assignment("t", &["s1"]);
+        assignment.doc_mapping_json = "not json".to_string();
+        assert!(
+            supervisor
+                .build_compaction_pipeline(assignment, scratch())
+                .await
+                .is_err()
+        );
+
+        // Bad search settings JSON.
+        let mut assignment = test_assignment("t", &["s1"]);
+        assignment.search_settings_json = "not json".to_string();
+        assert!(
+            supervisor
+                .build_compaction_pipeline(assignment, scratch())
+                .await
+                .is_err()
+        );
+
+        // Bad indexing settings JSON.
+        let mut assignment = test_assignment("t", &["s1"]);
+        assignment.indexing_settings_json = "not json".to_string();
+        assert!(
+            supervisor
+                .build_compaction_pipeline(assignment, scratch())
+                .await
+                .is_err()
+        );
+
+        // Bad retention policy JSON (non-empty but invalid).
+        let mut assignment = test_assignment("t", &["s1"]);
+        assignment.retention_policy_json = "not json".to_string();
+        assert!(
+            supervisor
+                .build_compaction_pipeline(assignment, scratch())
+                .await
+                .is_err()
+        );
+
+        // Missing index_uid.
+        let mut assignment = test_assignment("t", &["s1"]);
+        assignment.index_uid = None;
+        assert!(
+            supervisor
+                .build_compaction_pipeline(assignment, scratch())
+                .await
+                .is_err()
+        );
+    }
+
+    #[tokio::test]
+    async fn test_spawn_task_fails_when_all_slots_occupied() {
+        let universe = Universe::new();
+        // 1 merge-execution → 2 pipeline slots
+        let mut supervisor = test_supervisor(1);
+
+        supervisor
+            .spawn_task(test_assignment("task-1", &["s1"]), universe.spawn_ctx())
+            .await
+            .unwrap();
+        supervisor
+            .spawn_task(test_assignment("task-2", &["s2"]), universe.spawn_ctx())
+            .await
+            .unwrap();
+
+        // Both slots InProgress — no room.
+        assert!(
+            supervisor
+                .spawn_task(test_assignment("task-3", &["s3"]), universe.spawn_ctx())
+                .await
+                .is_err()
+        );
+
+        universe.assert_quit().await;
+    }
+
+    #[tokio::test]
+    async fn test_end_to_end_report_status_and_spawn() {
+        let universe = Universe::new();
+
+        // Mock planner that returns one assignment on the first call.
+        let assignment = test_assignment("planner-task-1", &["s1", "s2"]);
+        let assignments = vec![assignment];
+        let assignments_clone = assignments.clone();
+        let mut mock = MockCompactionPlannerService::new();
+        mock.expect_report_status().times(1).returning(move |_req| {
+            Ok(quickwit_proto::compaction::ReportStatusResponse {
+                new_tasks: assignments_clone.clone(),
+            })
+        });
+
+        let metastore = MetastoreServiceClient::from_mock(MockMetastoreService::new());
+        let client = CompactionPlannerServiceClient::from_mock(mock);
+        // 3 merge-executions → 6 pipeline slots
+        let compactor_config = CompactorConfig {
+            max_concurrent_merge_executions: NonZeroUsize::new(3).unwrap(),
+            ..CompactorConfig::for_test()
+        };
+        let mut supervisor = CompactorSupervisor::new(
+            NodeId::from("test-node"),
+            client,
+            &compactor_config,
+            metastore,
+            StorageResolver::for_test(),
+            Arc::new(IndexingSplitCache::no_caching()),
+            EventBroker::default(),
+            TempDirectory::for_test(),
+        );
+
+        // Simulate what the handler does: collect statuses, report, process response.
+        let statuses = supervisor.check_pipeline_statuses();
+        let request = supervisor.build_report_status_request(&statuses);
+        assert_eq!(request.available_slots, 6);
+
+        let response = supervisor
+            .planner_client
+            .report_status(request)
+            .await
+            .unwrap();
+        supervisor
+            .process_new_tasks(response.new_tasks, universe.spawn_ctx())
+            .await;
+
+        // Verify the pipeline was spawned.
+        let statuses = supervisor.check_pipeline_statuses();
+        let request = supervisor.build_report_status_request(&statuses);
+        assert_eq!(request.in_progress.len(), 1);
+        assert_eq!(request.in_progress[0].task_id, "planner-task-1");
+        assert_eq!(request.in_progress[0].split_ids.len(), 2);
+        assert_eq!(request.available_slots, 5);
+
+        universe.assert_quit().await;
+    }
+
+    #[test]
+    fn test_build_report_status_request_mixed_statuses() {
+        // 4 merge-executions → 8 pipeline slots
+        let supervisor = test_supervisor(4);
+        let statuses = vec![
+            PipelineStatusUpdate {
+                task_id: "task-1".to_string(),
+                index_uid: quickwit_proto::types::IndexUid::for_test("test-index", 0),
+                source_id: "src".to_string(),
+                split_ids: vec!["s1".to_string(), "s2".to_string()],
+                status: PipelineStatus::InProgress,
+            },
+            PipelineStatusUpdate {
+                task_id: "task-2".to_string(),
+                index_uid: quickwit_proto::types::IndexUid::for_test("test-index", 0),
+                source_id: "src".to_string(),
+                split_ids: vec!["s3".to_string()],
+                status: PipelineStatus::Completed,
+            },
+            PipelineStatusUpdate {
+                task_id: "task-3".to_string(),
+                index_uid: quickwit_proto::types::IndexUid::for_test("test-index", 0),
+                source_id: "src".to_string(),
+                split_ids: vec!["s4".to_string()],
+                status: PipelineStatus::Failed {
+                    error: "boom".to_string(),
+                },
+            },
+        ];
+
+        let request = supervisor.build_report_status_request(&statuses);
+
+        // 8 slots, 1 in-progress = 7 available
+        assert_eq!(request.available_slots, 7);
+        assert_eq!(request.in_progress.len(), 1);
+        assert_eq!(request.in_progress[0].task_id, "task-1");
+        assert_eq!(request.in_progress[0].split_ids, vec!["s1", "s2"]);
+        assert_eq!(request.successes.len(), 1);
+        assert_eq!(request.successes[0].task_id, "task-2");
+        assert_eq!(request.failures.len(), 1);
+        assert_eq!(request.failures[0].task_id, "task-3");
+        assert_eq!(request.failures[0].error_message, "boom");
+    }
+}
diff --git a/quickwit/quickwit-compaction/src/lib.rs b/quickwit/quickwit-compaction/src/lib.rs
new file mode 100644
index 00000000000..ada03eb1294
--- /dev/null
+++ b/quickwit/quickwit-compaction/src/lib.rs
@@ -0,0 +1,67 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#![deny(clippy::disallowed_methods)]
+
+mod compaction_pipeline;
+mod compactor_supervisor;
+mod metrics;
+pub mod planner;
+
+pub type TaskId = String;
+
+use std::sync::Arc;
+
+pub use compactor_supervisor::CompactorSupervisor;
+use quickwit_actors::{Mailbox, Universe};
+use quickwit_common::pubsub::EventBroker;
+use quickwit_common::temp_dir::TempDirectory;
+use quickwit_config::CompactorConfig;
+use quickwit_indexing::IndexingSplitCache;
+use quickwit_proto::compaction::CompactionPlannerServiceClient;
+use quickwit_proto::metastore::MetastoreServiceClient;
+use quickwit_proto::types::{IndexUid, NodeId, SourceId};
+use quickwit_storage::StorageResolver;
+use tracing::info;
+
+pub fn source_uid_metrics_label(index_uid: &IndexUid, source_id: &SourceId) -> String {
+    format!("{}-{}", index_uid, source_id)
+}
+
+#[allow(clippy::too_many_arguments)]
+pub async fn start_compactor_service(
+    universe: &Universe,
+    node_id: NodeId,
+    compaction_client: CompactionPlannerServiceClient,
+    compactor_config: &CompactorConfig,
+    metastore: MetastoreServiceClient,
+    storage_resolver: StorageResolver,
+    split_cache: Arc<IndexingSplitCache>,
+    event_broker: EventBroker,
+    compaction_root_directory: TempDirectory,
+) -> anyhow::Result<Mailbox<CompactorSupervisor>> {
+    info!("starting compactor service");
+    let supervisor = CompactorSupervisor::new(
+        node_id,
+        compaction_client,
+        compactor_config,
+        metastore,
+        storage_resolver,
+        split_cache,
+        event_broker,
+        compaction_root_directory,
+    );
+    let (mailbox, _handle) = universe.spawn_builder().spawn(supervisor);
+    Ok(mailbox)
+}
diff --git a/quickwit/quickwit-compaction/src/metrics.rs b/quickwit/quickwit-compaction/src/metrics.rs
new file mode 100644
index 00000000000..64243204e91
--- /dev/null
+++ b/quickwit/quickwit-compaction/src/metrics.rs
@@ -0,0 +1,53 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use quickwit_common::metrics::exponential_buckets;
+use quickwit_metrics::{
+    LabelNames, LazyCounter, LazyGauge, LazyHistogram, label_names, lazy_counter, lazy_gauge,
+    lazy_histogram,
+};
+
+/// Shared label template for the per-compaction metrics keyed by source and merge level.
+pub(crate) const SOURCE_UID_MERGE_LEVEL: LabelNames<2> = label_names!("source_uid", "merge_level");
+
+pub(crate) static COMPACTIONS_IN_PROGRESS: LazyGauge = lazy_gauge!(
+    name: "compactions_in_progress",
+    description: "number of compaction merge operations currently running on this compactor",
+    subsystem: "compactor",
+);
+
+pub(crate) static COMPACTIONS_FAILED: LazyCounter = lazy_counter!(
+    name: "compactions_failed",
+    description: "total number of compaction merge operations that have failed",
+    subsystem: "compactor",
+);
+
+pub(crate) static COMPACTIONS_SUCCEEDED: LazyCounter = lazy_counter!(
+    name: "compactions_succeeded",
+    description: "total number of compaction merge operations that have completed successfully",
+    subsystem: "compactor",
+);
+
+pub(crate) static AVAILABLE_SLOTS: LazyGauge = lazy_gauge!(
+    name: "available_slots",
+    description: "number of compaction slots currently available on this compactor",
+    subsystem: "compactor",
+);
+
+pub(crate) static COMPACTION_DURATION: LazyHistogram = lazy_histogram!(
+    name: "compaction_duration_seconds",
+    description: "duration of compaction merge operations in seconds",
+    subsystem: "compactor",
+    buckets: exponential_buckets(0.5, 2.0, 14).expect("compaction duration buckets should be valid"),
+);
diff --git a/quickwit/quickwit-compaction/src/planner/compaction_planner.rs b/quickwit/quickwit-compaction/src/planner/compaction_planner.rs
new file mode 100644
index 00000000000..77546099715
--- /dev/null
+++ b/quickwit/quickwit-compaction/src/planner/compaction_planner.rs
@@ -0,0 +1,643 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::fmt::Debug;
+use std::time::Duration;
+
+use anyhow::Result;
+use async_trait::async_trait;
+use itertools::Itertools;
+use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler};
+use quickwit_metastore::{
+    ListSplitsQuery, ListSplitsRequestExt, MetastoreServiceStreamSplitsExt, Split, SplitState,
+};
+use quickwit_metrics::{counter, label_values};
+use quickwit_proto::compaction::{
+    CompactionResult, MergeTaskAssignment, ReportStatusRequest, ReportStatusResponse,
+};
+use quickwit_proto::metastore::{ListSplitsRequest, MetastoreService, MetastoreServiceClient};
+use quickwit_proto::types::NodeId;
+use time::OffsetDateTime;
+use tracing::{error, info};
+use ulid::Ulid;
+
+use super::PendingMerge;
+use super::compaction_state::CompactionState;
+use super::index_config_metastore::{IndexConfigMetastore, IndexEntry};
+use crate::planner::metrics::{METASTORE_ERRORS, NEW_SPLITS_SCANNED, OPERATION, SOURCE_UID};
+
+/// Cap on splits fetched per tick. Every tick, the planner re-scans the immature published set,
+/// sorted by `maturity_timestamp` ASC so the most-urgent splits are processed first when a backlog
+/// exists. Splits beyond this cap aren't lost -- they bubble into range as the front of the queue
+/// is merged off.
+const SCAN_PAGE_SIZE: usize = 5_000;
+
+/// Cap on the size of the `excluded_split_ids` list we send to the metastore.
+/// It's a sanity max rather than some invariant.
+const MAX_EXCLUDED_SPLIT_IDS: usize = 50_000;
+
+#[derive(Debug)]
+pub struct CompactionPlanner {
+    state: CompactionState,
+    index_config_metastore: IndexConfigMetastore,
+    metastore: MetastoreServiceClient,
+}
+
+const SCAN_AND_PLAN_INTERVAL: Duration = Duration::from_secs(5);
+/// On initialization, we want to wait for two intervals to allow any in-progress workers to report
+/// their progress, preventing us from frivolously rescheduling work.
+const INITIAL_SCAN_AND_PLAN_INTERVAL: Duration = SCAN_AND_PLAN_INTERVAL.saturating_mul(2);
+
+#[derive(Debug)]
+struct ScanAndPlan;
+
+#[async_trait]
+impl Actor for CompactionPlanner {
+    type ObservableState = ();
+
+    fn name(&self) -> String {
+        "CompactionPlanner".to_string()
+    }
+
+    fn observable_state(&self) -> Self::ObservableState {}
+
+    async fn initialize(&mut self, ctx: &ActorContext<Self>) -> Result<(), ActorExitStatus> {
+        info!("compaction planner starting, scanning metastore for immature splits");
+        ctx.schedule_self_msg(INITIAL_SCAN_AND_PLAN_INTERVAL, ScanAndPlan);
+        Ok(())
+    }
+}
+
+#[async_trait]
+impl Handler<ScanAndPlan> for CompactionPlanner {
+    type Reply = ();
+
+    async fn handle(
+        &mut self,
+        _msg: ScanAndPlan,
+        ctx: &ActorContext<Self>,
+    ) -> Result<(), ActorExitStatus> {
+        if let Err(error) = ctx.protect_future(self.scan_and_plan()).await {
+            error!(%error, "error scanning metastore and planning merges");
+        }
+        self.state.check_heartbeat_timeouts();
+        ctx.schedule_self_msg(SCAN_AND_PLAN_INTERVAL, ScanAndPlan);
+        Ok(())
+    }
+}
+
+#[async_trait]
+impl Handler<ReportStatusRequest> for CompactionPlanner {
+    type Reply = CompactionResult<ReportStatusResponse>;
+
+    async fn handle(
+        &mut self,
+        msg: ReportStatusRequest,
+        _ctx: &ActorContext<Self>,
+    ) -> Result<CompactionResult<ReportStatusResponse>, ActorExitStatus> {
+        let node_id = NodeId::from(msg.node_id);
+        self.state.process_successes(&msg.successes);
+        self.state.process_failures(&msg.failures);
+        self.state.update_heartbeats(&node_id, &msg.in_progress);
+        let new_tasks = self.assign_tasks(&node_id, msg.available_slots);
+        Ok(Ok(ReportStatusResponse { new_tasks }))
+    }
+}
+
+impl CompactionPlanner {
+    pub fn new(metastore: MetastoreServiceClient) -> Self {
+        CompactionPlanner {
+            state: CompactionState::new(),
+            index_config_metastore: IndexConfigMetastore::new(metastore.clone()),
+            metastore,
+        }
+    }
+
+    async fn ingest_splits(&mut self, splits: Vec<Split>) {
+        for split in splits {
+            if self.state.is_split_tracked(&split.split_metadata.split_id) {
+                continue;
+            }
+            let Ok(index_entry) = self
+                .index_config_metastore
+                .get_for_split(&split.split_metadata)
+                .await
+            else {
+                error!(split_id=%split.split_metadata.split_id, "failed to load index config, skipping split");
+                continue;
+            };
+            if index_entry.is_split_mature(&split.split_metadata) {
+                continue;
+            }
+            self.state.track_split(split.split_metadata);
+        }
+    }
+
+    async fn scan_metastore(&self) -> Result<Vec<Split>> {
+        let excluded_split_ids = self.state.tracked_split_ids(MAX_EXCLUDED_SPLIT_IDS);
+        let query = ListSplitsQuery::for_all_indexes()
+            .with_split_state(SplitState::Published)
+            .retain_immature(OffsetDateTime::now_utc())
+            .sort_by_maturity_timestamp()
+            .with_limit(SCAN_PAGE_SIZE)
+            .with_excluded_split_ids(excluded_split_ids);
+        let request = ListSplitsRequest::try_from_list_splits_query(&query)?;
+        let splits = self
+            .metastore
+            .list_splits(request)
+            .await
+            .inspect_err(|error| {
+                error!(%error, "[compaction-planner] error calling metastore list_splits");
+                let labels = label_values!(OPERATION => "scan");
+                counter!(parent: METASTORE_ERRORS, labels: [labels]).inc();
+            })?
+            .collect_splits()
+            .await
+            .inspect_err(|error| {
+                error!(%error, "[compaction-planner] error collecting metastore splits");
+                let labels = label_values!(OPERATION => "collect_splits");
+                counter!(parent: METASTORE_ERRORS, labels: [labels]).inc();
+            })?;
+        emit_metastore_scan_metrics(&splits);
+        Ok(splits)
+    }
+
+    async fn scan_and_plan(&mut self) -> Result<()> {
+        let splits = self.scan_metastore().await?;
+        self.ingest_splits(splits).await;
+        self.run_merge_policies();
+        self.state.emit_metrics();
+        Ok(())
+    }
+
+    fn run_merge_policies(&mut self) {
+        for partition_key in self.state.partition_keys() {
+            if let Some(index_entry) = self.index_config_metastore.get(&partition_key.index_uid) {
+                self.state
+                    .plan_partition(&partition_key, index_entry.merge_policy());
+            }
+        }
+    }
+
+    fn assign_tasks(&mut self, node_id: &NodeId, available_slots: u32) -> Vec<MergeTaskAssignment> {
+        let pending_merge_ops = self.state.pop_pending(available_slots as usize);
+        let mut assignments = Vec::with_capacity(pending_merge_ops.len());
+
+        for merge_op in pending_merge_ops {
+            let task_id = Ulid::new().to_string();
+            let Some(index_entry) = self.index_config_metastore.get(&merge_op.index_uid) else {
+                error!(index_uid=%merge_op.index_uid, "index config not found for pending operation, skipping");
+                continue;
+            };
+            let assignment = build_task_assignment(&task_id, index_entry, &merge_op);
+
+            let split_ids = merge_op
+                .operation
+                .splits_as_slice()
+                .iter()
+                .map(|s| s.split_id().to_string())
+                .collect();
+            self.state
+                .record_assignment(task_id, split_ids, node_id.clone());
+
+            assignments.push(assignment);
+        }
+        assignments
+    }
+}
+
+fn emit_metastore_scan_metrics(new_splits: &[Split]) {
+    let size = new_splits.len();
+    info!(%size, "[compaction planner] new splits scanned from metastore");
+    let counts = new_splits
+        .iter()
+        .counts_by(|split| &split.split_metadata.index_uid);
+    for (&index_uid, &count) in counts.iter() {
+        let labels = label_values!(SOURCE_UID => index_uid.to_string());
+        counter!(parent: NEW_SPLITS_SCANNED, labels: [labels]).inc_by(count as u64);
+    }
+}
+
+fn build_task_assignment(
+    task_id: &str,
+    index_entry: &IndexEntry,
+    merge_op: &PendingMerge,
+) -> MergeTaskAssignment {
+    MergeTaskAssignment {
+        task_id: task_id.to_string(),
+        splits_metadata_json: merge_op
+            .operation
+            .splits_as_slice()
+            .iter()
+            .map(|s| {
+                serde_json::to_string(s).expect("split metadata serialization should not fail")
+            })
+            .collect(),
+        doc_mapping_json: index_entry.doc_mapping_json(),
+        search_settings_json: index_entry.search_settings_json(),
+        indexing_settings_json: index_entry.indexing_settings_json(),
+        retention_policy_json: index_entry.retention_policy_json(),
+        index_uid: Some(merge_op.index_uid.clone()),
+        source_id: merge_op.source_id.clone(),
+        index_storage_uri: index_entry.index_storage_uri(),
+        merge_level: merge_op.operation.merge_level() as u64,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::ops::Bound;
+    use std::time::Duration;
+
+    use quickwit_common::ServiceStream;
+    use quickwit_config::IndexingSettings;
+    use quickwit_config::merge_policy_config::{
+        ConstWriteAmplificationMergePolicyConfig, MergePolicyConfig,
+    };
+    use quickwit_metastore::{
+        IndexMetadata, IndexMetadataResponseExt, ListSplitsRequestExt, ListSplitsResponseExt,
+        SortBy, Split, SplitMaturity, SplitMetadata, SplitState,
+    };
+    use quickwit_proto::compaction::{CompactionInProgress, CompactionSuccess};
+    use quickwit_proto::metastore::{
+        IndexMetadataResponse, ListSplitsResponse, MetastoreError, MockMetastoreService,
+    };
+    use quickwit_proto::types::IndexUid;
+    use time::OffsetDateTime;
+
+    use super::*;
+
+    fn test_split(split_id: &str, index_uid: &IndexUid, update_timestamp: i64) -> Split {
+        Split {
+            split_state: SplitState::Published,
+            update_timestamp,
+            publish_timestamp: Some(update_timestamp),
+            split_metadata: SplitMetadata {
+                split_id: split_id.to_string(),
+                index_uid: index_uid.clone(),
+                source_id: "test-source".to_string(),
+                node_id: "test-node".to_string(),
+                num_docs: 100,
+                create_timestamp: OffsetDateTime::now_utc().unix_timestamp(),
+                maturity: SplitMaturity::Immature {
+                    maturation_period: Duration::from_secs(3600),
+                },
+                ..Default::default()
+            },
+        }
+    }
+
+    fn test_index_metadata() -> IndexMetadata {
+        IndexMetadata::for_test("test-index", "ram:///test-index")
+    }
+
+    /// Returns an IndexMetadata with merge_factor=2 so two splits trigger a merge.
+    fn test_index_metadata_with_merge_factor_2() -> IndexMetadata {
+        let mut metadata = test_index_metadata();
+        metadata.index_config.indexing_settings = IndexingSettings {
+            merge_policy: MergePolicyConfig::ConstWriteAmplification(
+                ConstWriteAmplificationMergePolicyConfig {
+                    merge_factor: 2,
+                    max_merge_factor: 2,
+                    ..Default::default()
+                },
+            ),
+            ..Default::default()
+        };
+        metadata
+    }
+
+    fn test_index_metadata_response(index_metadata: &IndexMetadata) -> IndexMetadataResponse {
+        IndexMetadataResponse::try_from_index_metadata(index_metadata).unwrap()
+    }
+
+    #[tokio::test]
+    async fn test_scan_metastore_query_shape_and_passthrough() {
+        let index_uid = IndexUid::for_test("test-index", 0);
+        let returned_splits = vec![
+            test_split("a", &index_uid, 1000),
+            test_split("b", &index_uid, 2000),
+        ];
+        let returned_clone = returned_splits.clone();
+        let scan_started_at = OffsetDateTime::now_utc().unix_timestamp();
+
+        let mut mock = MockMetastoreService::new();
+        mock.expect_list_splits().returning(move |req| {
+            let query = req.deserialize_list_splits_query().unwrap();
+
+            assert_eq!(query.split_states, vec![SplitState::Published]);
+            assert_eq!(query.limit, Some(SCAN_PAGE_SIZE));
+            assert_eq!(query.sort_by, SortBy::MaturityTimestamp);
+
+            let Bound::Excluded(mature_at) = query.mature else {
+                panic!("expected Excluded mature bound, got {:?}", query.mature);
+            };
+            let now_secs = OffsetDateTime::now_utc().unix_timestamp();
+            assert!(mature_at.unix_timestamp() >= scan_started_at);
+            assert!(mature_at.unix_timestamp() <= now_secs);
+
+            assert_eq!(query.update_timestamp.start, Bound::Unbounded);
+            assert_eq!(query.update_timestamp.end, Bound::Unbounded);
+
+            // No tracked splits → empty exclude list. The non-empty case is
+            // covered by `test_scan_metastore_excludes_tracked_splits`.
+            assert!(query.excluded_split_ids.is_empty());
+
+            let response = ListSplitsResponse::try_from_splits(returned_clone.clone()).unwrap();
+            Ok(ServiceStream::from(vec![Ok(response)]))
+        });
+
+        let planner = CompactionPlanner::new(MetastoreServiceClient::from_mock(mock));
+        let result = planner.scan_metastore().await.unwrap();
+
+        assert_eq!(result.len(), 2);
+        assert_eq!(result[0].split_metadata.split_id, "a");
+        assert_eq!(result[1].split_metadata.split_id, "b");
+    }
+
+    #[tokio::test]
+    async fn test_scan_metastore_excludes_tracked_splits() {
+        let index_uid = IndexUid::for_test("test-index", 0);
+
+        let mut mock = MockMetastoreService::new();
+        mock.expect_list_splits().returning(move |req| {
+            let query = req.deserialize_list_splits_query().unwrap();
+            let mut excluded = query.excluded_split_ids.clone();
+            excluded.sort();
+            assert_eq!(
+                excluded,
+                vec!["in-flight".to_string(), "tracked".to_string()]
+            );
+
+            let response = ListSplitsResponse::try_from_splits(Vec::new()).unwrap();
+            Ok(ServiceStream::from(vec![Ok(response)]))
+        });
+
+        let mut planner = CompactionPlanner::new(MetastoreServiceClient::from_mock(mock));
+        planner.state.track_split(SplitMetadata {
+            split_id: "tracked".to_string(),
+            index_uid: index_uid.clone(),
+            ..Default::default()
+        });
+        planner.state.update_heartbeats(
+            &NodeId::from("test-node"),
+            &[CompactionInProgress {
+                task_id: "t-1".to_string(),
+                index_uid: Some(index_uid.clone()),
+                source_id: "src".to_string(),
+                split_ids: vec!["in-flight".to_string()],
+            }],
+        );
+
+        planner.scan_metastore().await.unwrap();
+    }
+
+    #[tokio::test]
+    async fn test_ingest_splits_dedups_and_skips_mature() {
+        let index_metadata = test_index_metadata();
+        let response = test_index_metadata_response(&index_metadata);
+        let index_uid = index_metadata.index_uid.clone();
+
+        let mut mock = MockMetastoreService::new();
+        mock.expect_index_metadata()
+            .returning(move |_| Ok(response.clone()));
+
+        let mut planner = CompactionPlanner::new(MetastoreServiceClient::from_mock(mock));
+
+        // Pre-populate: "in-flight" is already being compacted.
+        planner.state.track_split(SplitMetadata {
+            split_id: "in-flight".to_string(),
+            index_uid: index_uid.clone(),
+            ..Default::default()
+        });
+
+        let mut mature_split = test_split("mature", &index_uid, 4000);
+        mature_split.split_metadata.num_docs = 20_000_000;
+
+        let splits = vec![
+            test_split("in-flight", &index_uid, 1000),
+            test_split("fresh", &index_uid, 3000),
+            mature_split,
+        ];
+
+        planner.ingest_splits(splits).await;
+
+        assert!(planner.state.is_split_tracked("fresh"));
+        assert!(planner.state.is_split_tracked("in-flight"));
+        assert!(!planner.state.is_split_tracked("mature"));
+    }
+
+    #[tokio::test]
+    async fn test_scan_and_plan_propagates_metastore_error() {
+        let mut mock = MockMetastoreService::new();
+        mock.expect_list_splits().returning(|_| {
+            Err(MetastoreError::Internal {
+                message: "test error".to_string(),
+                cause: String::new(),
+            })
+        });
+
+        let mut planner = CompactionPlanner::new(MetastoreServiceClient::from_mock(mock));
+        assert!(planner.scan_and_plan().await.is_err());
+    }
+
+    #[tokio::test]
+    async fn test_ingest_splits_skips_on_config_error() {
+        let index_uid = IndexUid::for_test("missing-index", 0);
+        let splits = vec![test_split("orphan", &index_uid, 1000)];
+
+        let mut mock = MockMetastoreService::new();
+        mock.expect_index_metadata().returning(|_| {
+            Err(MetastoreError::Internal {
+                message: "test error".to_string(),
+                cause: String::new(),
+            })
+        });
+
+        let mut planner = CompactionPlanner::new(MetastoreServiceClient::from_mock(mock));
+        planner.ingest_splits(splits).await;
+
+        assert!(!planner.state.is_split_tracked("orphan"));
+    }
+
+    #[tokio::test]
+    async fn test_scan_and_plan_happy_path() {
+        let index_metadata = test_index_metadata();
+        let index_metadata_response = test_index_metadata_response(&index_metadata);
+        let index_uid = index_metadata.index_uid.clone();
+
+        let splits = vec![
+            test_split("s1", &index_uid, 5000),
+            test_split("s2", &index_uid, 6000),
+        ];
+        let splits_clone = splits.clone();
+
+        let mut mock = MockMetastoreService::new();
+        mock.expect_list_splits().returning(move |_| {
+            let response = ListSplitsResponse::try_from_splits(splits_clone.clone()).unwrap();
+            Ok(ServiceStream::from(vec![Ok(response)]))
+        });
+        mock.expect_index_metadata()
+            .returning(move |_| Ok(index_metadata_response.clone()));
+
+        let mut planner = CompactionPlanner::new(MetastoreServiceClient::from_mock(mock));
+        planner.scan_and_plan().await.unwrap();
+
+        assert!(planner.state.is_split_tracked("s1"));
+        assert!(planner.state.is_split_tracked("s2"));
+    }
+
+    #[tokio::test]
+    async fn test_failed_task_is_retracked_on_next_scan() {
+        // After a worker reports failure (or times out), planner-local
+        // tracking is cleared. Because there is no cursor, the next scan
+        // rediscovers the still-Published, still-immature splits and
+        // re-tracks them.
+        let index_metadata = test_index_metadata_with_merge_factor_2();
+        let index_metadata_response = test_index_metadata_response(&index_metadata);
+        let index_uid = index_metadata.index_uid.clone();
+
+        let splits = vec![
+            test_split("s1", &index_uid, 1000),
+            test_split("s2", &index_uid, 2000),
+        ];
+        let splits_clone = splits.clone();
+
+        let mut mock = MockMetastoreService::new();
+        mock.expect_list_splits().returning(move |_| {
+            let response = ListSplitsResponse::try_from_splits(splits_clone.clone()).unwrap();
+            Ok(ServiceStream::from(vec![Ok(response)]))
+        });
+        mock.expect_index_metadata()
+            .returning(move |_| Ok(index_metadata_response.clone()));
+
+        let mut planner = CompactionPlanner::new(MetastoreServiceClient::from_mock(mock));
+        let node_id = NodeId::from("worker-1");
+
+        planner.scan_and_plan().await.unwrap();
+        let assignments = planner.assign_tasks(&node_id, 10);
+        assert_eq!(assignments.len(), 1);
+        let task_id = assignments[0].task_id.clone();
+        assert!(planner.state.is_split_tracked("s1"));
+        assert!(planner.state.is_split_tracked("s2"));
+
+        // Worker reports failure; planner forgets the splits.
+        planner
+            .state
+            .process_failures(&[quickwit_proto::compaction::CompactionFailure {
+                task_id,
+                error_message: "boom".to_string(),
+            }]);
+        assert!(!planner.state.is_split_tracked("s1"));
+        assert!(!planner.state.is_split_tracked("s2"));
+
+        // Next scan rediscovers them and re-tracks them.
+        planner.scan_and_plan().await.unwrap();
+        assert!(planner.state.is_split_tracked("s1"));
+        assert!(planner.state.is_split_tracked("s2"));
+    }
+
+    /// Helper: creates a planner with merge_factor=2, ingests the given splits,
+    /// and runs merge policies. Returns the planner ready for `assign_tasks`.
+    async fn planner_with_pending_merges(split_ids: &[&str]) -> (CompactionPlanner, IndexUid) {
+        let index_metadata = test_index_metadata_with_merge_factor_2();
+        let index_uid = index_metadata.index_uid.clone();
+        let response = test_index_metadata_response(&index_metadata);
+
+        let mut mock = MockMetastoreService::new();
+        mock.expect_index_metadata()
+            .returning(move |_| Ok(response.clone()));
+        mock.expect_list_splits().returning(|_| {
+            Ok(ServiceStream::from(vec![Ok(
+                ListSplitsResponse::try_from_splits(Vec::new()).unwrap(),
+            )]))
+        });
+
+        let mut planner = CompactionPlanner::new(MetastoreServiceClient::from_mock(mock));
+
+        let splits: Vec<Split> = split_ids
+            .iter()
+            .enumerate()
+            .map(|(i, id)| test_split(id, &index_uid, (i + 1) as i64 * 1000))
+            .collect();
+        planner.ingest_splits(splits).await;
+        planner.run_merge_policies();
+        (planner, index_uid)
+    }
+
+    #[tokio::test]
+    async fn test_assign_tasks_returns_assignments_and_drains_queue() {
+        let (mut planner, index_uid) = planner_with_pending_merges(&["s1", "s2"]).await;
+        let node_id = NodeId::from("worker-1");
+
+        // First call: get the assignment.
+        let assignments = planner.assign_tasks(&node_id, 10);
+        assert_eq!(assignments.len(), 1);
+
+        let assignment = &assignments[0];
+        assert!(!assignment.task_id.is_empty());
+        assert_eq!(assignment.splits_metadata_json.len(), 2);
+        assert_eq!(assignment.index_uid, Some(index_uid));
+        assert_eq!(assignment.source_id, "test-source");
+        assert!(!assignment.doc_mapping_json.is_empty());
+        assert!(!assignment.index_storage_uri.is_empty());
+
+        // Second call: queue is drained, no more assignments.
+        let assignments = planner.assign_tasks(&node_id, 10);
+        assert!(assignments.is_empty());
+    }
+
+    #[tokio::test]
+    async fn test_assign_tasks_respects_available_slots() {
+        // 4 splits with merge_factor=2 produces 2 merge operations.
+        let (mut planner, _) = planner_with_pending_merges(&["s1", "s2", "s3", "s4"]).await;
+        let node_id = NodeId::from("worker-1");
+
+        // Request only 1 slot.
+        let assignments = planner.assign_tasks(&node_id, 1);
+        assert_eq!(assignments.len(), 1);
+
+        // The remaining operation is still pending.
+        let assignments = planner.assign_tasks(&node_id, 10);
+        assert_eq!(assignments.len(), 1);
+    }
+
+    #[tokio::test]
+    async fn test_report_status_success_frees_splits_for_future_merges() {
+        let (mut planner, index_uid) = planner_with_pending_merges(&["s1", "s2"]).await;
+        let node_id = NodeId::from("worker-1");
+
+        let assignments = planner.assign_tasks(&node_id, 10);
+        assert_eq!(assignments.len(), 1);
+        let task_id = assignments[0].task_id.clone();
+
+        // Report success for the task.
+        planner
+            .state
+            .process_successes(&[CompactionSuccess { task_id }]);
+
+        // The original splits are no longer tracked. Re-ingesting them
+        // (simulating the merged output being immature) creates new work.
+        let new_splits = vec![
+            test_split("s5", &index_uid, 5000),
+            test_split("s6", &index_uid, 6000),
+        ];
+        planner.ingest_splits(new_splits).await;
+        planner.run_merge_policies();
+
+        let assignments = planner.assign_tasks(&node_id, 10);
+        assert_eq!(assignments.len(), 1);
+        assert_eq!(assignments[0].splits_metadata_json.len(), 2);
+    }
+}
diff --git a/quickwit/quickwit-compaction/src/planner/compaction_state.rs b/quickwit/quickwit-compaction/src/planner/compaction_state.rs
new file mode 100644
index 00000000000..59113070da8
--- /dev/null
+++ b/quickwit/quickwit-compaction/src/planner/compaction_state.rs
@@ -0,0 +1,467 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::{HashMap, HashSet};
+use std::fmt::Debug;
+use std::sync::Arc;
+use std::time::{Duration, Instant};
+
+use itertools::Itertools;
+use quickwit_indexing::merge_policy::MergePolicy;
+use quickwit_metastore::SplitMetadata;
+use quickwit_metrics::{gauge, label_values};
+use quickwit_proto::compaction::{CompactionFailure, CompactionInProgress, CompactionSuccess};
+use quickwit_proto::types::{DocMappingUid, IndexUid, NodeId, SourceId, SplitId};
+use tracing::{error, info, warn};
+
+use crate::planner::metrics::{SOURCE_UID, SPLITS_NEEDING_COMPACTION, TIMED_OUT_OPERATIONS};
+use crate::planner::{PendingMerge, PendingOperations};
+use crate::{TaskId, source_uid_metrics_label};
+
+const HEARTBEAT_TIMEOUT: Duration = Duration::from_secs(60);
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct CompactionPartitionKey {
+    pub index_uid: IndexUid,
+    pub source_id: SourceId,
+    pub partition_id: u64,
+    pub doc_mapping_uid: DocMappingUid,
+}
+
+impl CompactionPartitionKey {
+    pub fn from_split(split: &SplitMetadata) -> Self {
+        CompactionPartitionKey {
+            index_uid: split.index_uid.clone(),
+            source_id: split.source_id.clone(),
+            partition_id: split.partition_id,
+            doc_mapping_uid: split.doc_mapping_uid,
+        }
+    }
+}
+
+#[derive(Debug)]
+struct InFlightCompaction {
+    split_ids: Vec<SplitId>,
+    node_id: NodeId,
+    last_heartbeat: Instant,
+}
+
+/// Tracks all split-level state for the compaction planner:
+/// which splits need compaction, which are in-flight, and which
+/// operations are pending assignment to workers.
+#[derive(Debug)]
+pub struct CompactionState {
+    needs_compaction: HashMap<CompactionPartitionKey, Vec<SplitMetadata>>,
+    needs_compaction_split_ids: HashSet<SplitId>,
+    in_flight: HashMap<TaskId, InFlightCompaction>,
+    in_flight_split_ids: HashSet<SplitId>,
+    pending_operations: PendingOperations,
+}
+
+impl CompactionState {
+    pub fn new() -> Self {
+        CompactionState {
+            needs_compaction: HashMap::new(),
+            needs_compaction_split_ids: HashSet::new(),
+            in_flight: HashMap::new(),
+            in_flight_split_ids: HashSet::new(),
+            pending_operations: PendingOperations::new(),
+        }
+    }
+
+    /// Returns true if the split is already tracked (either awaiting compaction or in-flight).
+    pub fn is_split_tracked(&self, split_id: &str) -> bool {
+        self.needs_compaction_split_ids.contains(split_id)
+            || self.in_flight_split_ids.contains(split_id)
+    }
+
+    /// All split IDs the planner is currently tracking. Used by the metastore
+    /// scan to exclude already-known splits so each tick returns fresh work.
+    pub fn tracked_split_ids(&self, max_size: usize) -> Vec<SplitId> {
+        let mut out = Vec::with_capacity(
+            (self.needs_compaction_split_ids.len() + self.in_flight_split_ids.len()).min(max_size),
+        );
+        out.extend(
+            self.needs_compaction_split_ids
+                .iter()
+                .chain(self.in_flight_split_ids.iter())
+                .take(max_size)
+                .cloned(),
+        );
+        out
+    }
+
+    /// Adds a split to the needs_compaction set.
+    pub fn track_split(&mut self, split: SplitMetadata) {
+        let split_id = split.split_id().to_string();
+        let key = CompactionPartitionKey::from_split(&split);
+        self.needs_compaction_split_ids.insert(split_id);
+        self.needs_compaction.entry(key).or_default().push(split);
+    }
+
+    /// Returns the partition keys for iteration. The caller drives the loop.
+    pub fn partition_keys(&self) -> Vec<CompactionPartitionKey> {
+        self.needs_compaction.keys().cloned().collect()
+    }
+
+    /// Runs a merge policy on a single partition and queues the resulting operations.
+    pub fn plan_partition(
+        &mut self,
+        partition_key: &CompactionPartitionKey,
+        merge_policy: &Arc<dyn MergePolicy>,
+    ) {
+        let Some(splits) = self.needs_compaction.get_mut(partition_key) else {
+            return;
+        };
+        // `MergePolicy::operations` emits at most one op per level per call, which under a backlog
+        // leaves the bulk of `splits` untouched per tick. Loop until no new operations are created.
+        loop {
+            let operations = merge_policy.operations(splits);
+            if operations.is_empty() {
+                break;
+            }
+            for operation in operations {
+                for split in &operation.splits {
+                    self.needs_compaction_split_ids.remove(split.split_id());
+                    self.in_flight_split_ids
+                        .insert(split.split_id().to_string());
+                }
+                self.pending_operations.push(PendingMerge::new(operation));
+            }
+        }
+        if splits.is_empty() {
+            self.needs_compaction.remove(partition_key);
+        }
+    }
+
+    pub fn process_successes(&mut self, successes: &[CompactionSuccess]) {
+        for success in successes {
+            if let Some(inflight) = self.in_flight.remove(&success.task_id) {
+                info!(task_id=%success.task_id, "compaction task completed");
+                for split_id in &inflight.split_ids {
+                    self.in_flight_split_ids.remove(split_id.as_str());
+                }
+            }
+        }
+    }
+
+    pub fn process_failures(&mut self, failures: &[CompactionFailure]) {
+        for failure in failures {
+            if let Some(inflight) = self.in_flight.remove(&failure.task_id) {
+                warn!(task_id=%failure.task_id, error=%failure.error_message, "compaction task failed");
+                for split_id in &inflight.split_ids {
+                    // these splits will be picked up again on the next metastore scan.
+                    self.in_flight_split_ids.remove(split_id.as_str());
+                }
+            }
+        }
+    }
+
+    pub fn update_heartbeats(&mut self, node_id: &NodeId, in_progress: &[CompactionInProgress]) {
+        for task in in_progress {
+            if let Some(inflight) = self.in_flight.get_mut(&task.task_id) {
+                inflight.last_heartbeat = Instant::now();
+            } else {
+                // Task not tracked — start tracking it. Happens when
+                // workers report tasks the planner doesn't know about yet
+                // (e.g. after planner start).
+                for split_id in &task.split_ids {
+                    self.in_flight_split_ids.insert(split_id.clone());
+                    self.needs_compaction_split_ids.remove(split_id.as_str());
+                }
+                self.in_flight.insert(
+                    task.task_id.clone(),
+                    InFlightCompaction {
+                        split_ids: task.split_ids.clone(),
+                        node_id: node_id.clone(),
+                        last_heartbeat: Instant::now(),
+                    },
+                );
+            }
+        }
+    }
+
+    pub fn check_heartbeat_timeouts(&mut self) {
+        let now = Instant::now();
+        let timed_out_task_ids: Vec<TaskId> = self
+            .in_flight
+            .iter()
+            .filter(|(_, inflight)| now.duration_since(inflight.last_heartbeat) > HEARTBEAT_TIMEOUT)
+            .map(|(task_id, _)| task_id.clone())
+            .collect();
+
+        for task_id in timed_out_task_ids {
+            if let Some(inflight) = self.in_flight.remove(&task_id) {
+                error!(%task_id, node_id=%inflight.node_id, "compaction task timed out");
+                TIMED_OUT_OPERATIONS.inc();
+                for split_id in &inflight.split_ids {
+                    // these splits will be picked up again on the next metastore scan.
+                    self.in_flight_split_ids.remove(split_id.as_str());
+                }
+            }
+        }
+    }
+
+    /// Pops up to `count` pending merges for assignment, highest-priority first.
+    pub fn pop_pending(&mut self, count: usize) -> Vec<PendingMerge> {
+        let count = count.min(self.pending_operations.len());
+        let mut pending = Vec::with_capacity(count);
+        for _ in 0..count {
+            pending.push(self.pending_operations.pop().unwrap());
+        }
+        pending
+    }
+
+    /// Records that an operation has been assigned to a worker.
+    pub fn record_assignment(&mut self, task_id: TaskId, split_ids: Vec<SplitId>, node_id: NodeId) {
+        self.in_flight.insert(
+            task_id,
+            InFlightCompaction {
+                split_ids,
+                node_id,
+                last_heartbeat: Instant::now(),
+            },
+        );
+    }
+
+    pub fn emit_metrics(&self) {
+        // total number of splits that need to be merged by compaction partition key
+        self.needs_compaction
+            .iter()
+            .map(|(compaction_partition_key, splits)| {
+                (
+                    source_uid_metrics_label(
+                        &compaction_partition_key.index_uid,
+                        &compaction_partition_key.source_id,
+                    ),
+                    splits.len() as i64,
+                )
+            })
+            .into_grouping_map()
+            .sum()
+            .iter()
+            .for_each(|(partition_key, &total_splits)| {
+                let labels = label_values!(SOURCE_UID => partition_key.clone());
+                gauge!(parent: SPLITS_NEEDING_COMPACTION, labels: [labels])
+                    .set(total_splits as f64);
+            });
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::time::Duration;
+
+    use quickwit_config::IndexingSettings;
+    use quickwit_config::merge_policy_config::{
+        ConstWriteAmplificationMergePolicyConfig, MergePolicyConfig,
+    };
+    use quickwit_indexing::merge_policy::merge_policy_from_settings;
+    use quickwit_proto::types::IndexUid;
+
+    use super::*;
+
+    fn test_merge_policy() -> Arc<dyn MergePolicy> {
+        let settings = IndexingSettings {
+            merge_policy: MergePolicyConfig::ConstWriteAmplification(
+                ConstWriteAmplificationMergePolicyConfig {
+                    merge_factor: 2,
+                    max_merge_factor: 2,
+                    ..Default::default()
+                },
+            ),
+            ..Default::default()
+        };
+        merge_policy_from_settings(&settings)
+    }
+
+    fn test_split(split_id: &str, index_uid: &IndexUid) -> SplitMetadata {
+        SplitMetadata {
+            split_id: split_id.to_string(),
+            index_uid: index_uid.clone(),
+            source_id: "test-source".to_string(),
+            node_id: "test-node".to_string(),
+            num_docs: 1000,
+            create_timestamp: time::OffsetDateTime::now_utc().unix_timestamp(),
+            maturity: quickwit_metastore::SplitMaturity::Immature {
+                maturation_period: Duration::from_secs(3600),
+            },
+            ..Default::default()
+        }
+    }
+
+    #[test]
+    fn test_track_and_is_known() {
+        let index_uid = IndexUid::for_test("test-index", 0);
+        let mut state = CompactionState::new();
+
+        assert!(!state.is_split_tracked("s1"));
+
+        state.track_split(test_split("s1", &index_uid));
+        assert!(state.is_split_tracked("s1"));
+        assert!(!state.is_split_tracked("s2"));
+    }
+
+    #[test]
+    fn test_track_split_partitions_correctly() {
+        let index_uid = IndexUid::for_test("test-index", 0);
+        let mut state = CompactionState::new();
+
+        state.track_split(test_split("s1", &index_uid));
+        state.track_split(test_split("s2", &index_uid));
+
+        assert_eq!(state.partition_keys().len(), 1);
+    }
+
+    #[test]
+    fn test_plan_partition_moves_splits_to_in_flight() {
+        let index_uid = IndexUid::for_test("test-index", 0);
+        let merge_policy = test_merge_policy();
+        let mut state = CompactionState::new();
+
+        state.track_split(test_split("s0", &index_uid));
+        state.track_split(test_split("s1", &index_uid));
+
+        let keys = state.partition_keys();
+        assert_eq!(keys.len(), 1);
+
+        state.plan_partition(&keys[0], &merge_policy);
+
+        // Splits moved from needs_compaction to in_flight.
+        assert!(!state.pending_operations.is_empty());
+        for pending in state.pending_operations.iter() {
+            for split in pending.operation.splits_as_slice() {
+                assert!(!state.needs_compaction_split_ids.contains(split.split_id()));
+                assert!(state.in_flight_split_ids.contains(split.split_id()));
+            }
+        }
+    }
+
+    #[test]
+    fn test_process_successes_and_failures_clear_in_flight() {
+        let node_id = NodeId::from("worker-1");
+        let mut state = CompactionState::new();
+
+        // Simulate what plan_partition + record_assignment does:
+        // split IDs go into in_flight_split_ids, then into InFlightCompaction.
+        state.in_flight_split_ids.insert("s1".to_string());
+        state.in_flight_split_ids.insert("s2".to_string());
+        state.record_assignment(
+            "task-1".to_string(),
+            vec!["s1".to_string(), "s2".to_string()],
+            node_id.clone(),
+        );
+
+        state.in_flight_split_ids.insert("s3".to_string());
+        state.record_assignment("task-2".to_string(), vec!["s3".to_string()], node_id);
+
+        assert!(state.is_split_tracked("s1"));
+        assert!(state.is_split_tracked("s3"));
+
+        state.process_successes(&[CompactionSuccess {
+            task_id: "task-1".to_string(),
+        }]);
+        assert!(!state.is_split_tracked("s1"));
+        assert!(!state.is_split_tracked("s2"));
+
+        state.process_failures(&[CompactionFailure {
+            task_id: "task-2".to_string(),
+            error_message: "boom".to_string(),
+        }]);
+        assert!(!state.is_split_tracked("s3"));
+    }
+
+    #[test]
+    fn test_update_heartbeats_adopts_unknown_tasks() {
+        let node_id = NodeId::from("worker-1");
+        let mut state = CompactionState::new();
+
+        // Simulate a split that was tracked as needing compaction.
+        let index_uid = IndexUid::for_test("test-index", 0);
+        state.track_split(test_split("s1", &index_uid));
+        assert!(state.needs_compaction_split_ids.contains("s1"));
+
+        // Worker reports an in-progress task the planner doesn't know about.
+        state.update_heartbeats(
+            &node_id,
+            &[CompactionInProgress {
+                task_id: "task-1".to_string(),
+                index_uid: Some(index_uid),
+                source_id: "test-source".to_string(),
+                split_ids: vec!["s1".to_string()],
+            }],
+        );
+
+        // Split moved from needs_compaction to in_flight.
+        assert!(state.in_flight_split_ids.contains("s1"));
+        assert!(!state.needs_compaction_split_ids.contains("s1"));
+    }
+
+    #[test]
+    fn test_pop_pending_and_record_assignment() {
+        let index_uid = IndexUid::for_test("test-index", 0);
+        let merge_policy = test_merge_policy();
+        let mut state = CompactionState::new();
+
+        state.track_split(test_split("s0", &index_uid));
+        state.track_split(test_split("s1", &index_uid));
+
+        let keys = state.partition_keys();
+        state.plan_partition(&keys[0], &merge_policy);
+
+        let pending = state.pop_pending(1);
+        assert_eq!(pending.len(), 1);
+
+        let operation = &pending[0].operation;
+        let split_ids: Vec<String> = operation
+            .splits_as_slice()
+            .iter()
+            .map(|s| s.split_id().to_string())
+            .collect();
+
+        // Splits are already in in_flight_split_ids from plan_partition.
+        state.record_assignment("task-1".to_string(), split_ids.clone(), NodeId::from("w1"));
+
+        for split_id in &split_ids {
+            assert!(state.is_split_tracked(split_id));
+        }
+    }
+
+    #[test]
+    fn test_pop_pending_respects_count() {
+        let index_uid = IndexUid::for_test("test-index", 0);
+        let merge_policy = test_merge_policy();
+        let mut state = CompactionState::new();
+
+        // With merge_factor=2, 4 splits produces 2 operations.
+        for i in 0..4 {
+            state.track_split(test_split(&format!("s{i}"), &index_uid));
+        }
+
+        let keys = state.partition_keys();
+        state.plan_partition(&keys[0], &merge_policy);
+
+        let total_pending = state.pending_operations.len();
+        assert!(total_pending >= 2);
+
+        let popped = state.pop_pending(1);
+        assert_eq!(popped.len(), 1);
+        assert_eq!(state.pending_operations.len(), total_pending - 1);
+
+        // Asking for more than available returns what's there.
+        let rest = state.pop_pending(100);
+        assert_eq!(rest.len(), total_pending - 1);
+        assert!(state.pending_operations.is_empty());
+    }
+}
diff --git a/quickwit/quickwit-compaction/src/planner/index_config_metastore.rs b/quickwit/quickwit-compaction/src/planner/index_config_metastore.rs
new file mode 100644
index 00000000000..2c409daea7e
--- /dev/null
+++ b/quickwit/quickwit-compaction/src/planner/index_config_metastore.rs
@@ -0,0 +1,318 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use quickwit_config::{IndexConfig, build_doc_mapper};
+use quickwit_doc_mapper::DocMapper;
+use quickwit_indexing::merge_policy::{MergePolicy, merge_policy_from_settings};
+use quickwit_metastore::{IndexMetadataResponseExt, SplitMaturity, SplitMetadata};
+use quickwit_metrics::{counter, label_values};
+use quickwit_proto::metastore::{IndexMetadataRequest, MetastoreService, MetastoreServiceClient};
+use quickwit_proto::types::{DocMappingUid, IndexUid};
+use tracing::error;
+
+use crate::planner::metrics::{METASTORE_ERRORS, OPERATION};
+
+/// Everything the planner needs to know about a single index.
+#[derive(Debug)]
+pub struct IndexEntry {
+    config: IndexConfig,
+    merge_policy: Arc<dyn MergePolicy>,
+    doc_mappers: HashMap<DocMappingUid, Arc<DocMapper>>,
+}
+
+impl IndexEntry {
+    pub fn is_split_mature(&self, split: &SplitMetadata) -> bool {
+        matches!(
+            self.merge_policy
+                .split_maturity(split.num_docs, split.num_merge_ops),
+            SplitMaturity::Mature
+        )
+    }
+
+    pub fn merge_policy(&self) -> &Arc<dyn MergePolicy> {
+        &self.merge_policy
+    }
+
+    pub fn doc_mapping_json(&self) -> String {
+        serde_json::to_string(&self.config.doc_mapping)
+            .expect("doc mapping serialization should not fail")
+    }
+
+    pub fn search_settings_json(&self) -> String {
+        serde_json::to_string(&self.config.search_settings)
+            .expect("search settings serialization should not fail")
+    }
+
+    pub fn indexing_settings_json(&self) -> String {
+        serde_json::to_string(&self.config.indexing_settings)
+            .expect("indexing settings serialization should not fail")
+    }
+
+    pub fn retention_policy_json(&self) -> String {
+        match &self.config.retention_policy_opt {
+            Some(policy) => serde_json::to_string(policy)
+                .expect("retention policy serialization should not fail"),
+            None => String::new(),
+        }
+    }
+
+    pub fn index_storage_uri(&self) -> String {
+        self.config.index_uri.to_string()
+    }
+}
+
+/// Caches per-index configuration, merge policies, and doc mappers.
+/// Fetches from the metastore on demand. All accessors panic if called
+/// for an index that hasn't been loaded.
+#[derive(Debug)]
+pub struct IndexConfigMetastore {
+    indexes: HashMap<IndexUid, IndexEntry>,
+    metastore_client: MetastoreServiceClient,
+}
+
+impl IndexConfigMetastore {
+    pub fn new(metastore_client: MetastoreServiceClient) -> Self {
+        IndexConfigMetastore {
+            indexes: HashMap::new(),
+            metastore_client,
+        }
+    }
+
+    /// Fetches an index config from the metastore and builds the derived
+    /// artifacts.
+    async fn fetch_index_config(
+        &mut self,
+        index_uid: &IndexUid,
+        doc_mapping_uid: &DocMappingUid,
+    ) -> anyhow::Result<()> {
+        let response = self
+            .metastore_client
+            .index_metadata(IndexMetadataRequest {
+                index_uid: Some(index_uid.clone()),
+                index_id: None,
+            })
+            .await
+            .inspect_err(|error| {
+                error!(%error, "[compaction-planner] error getting index metadata from metastore");
+                let labels = label_values!(OPERATION => "index_metadata");
+                counter!(parent: METASTORE_ERRORS, labels: [labels]).inc();
+            })?;
+
+        let index_metadata = response.deserialize_index_metadata()?;
+
+        let doc_mapper = build_doc_mapper(
+            &index_metadata.index_config.doc_mapping,
+            &index_metadata.index_config.search_settings,
+        )?;
+        let merge_policy =
+            merge_policy_from_settings(&index_metadata.index_config.indexing_settings);
+
+        let mut doc_mappers = HashMap::new();
+        doc_mappers.insert(*doc_mapping_uid, doc_mapper);
+        let entry = IndexEntry {
+            config: index_metadata.index_config,
+            merge_policy,
+            doc_mappers,
+        };
+        self.indexes.insert(index_uid.clone(), entry);
+        Ok(())
+    }
+
+    /// Gets the index entry, fetching from the metastore if not cached.
+    pub async fn get_or_fetch(
+        &mut self,
+        index_uid: &IndexUid,
+        doc_mapping_uid: &DocMappingUid,
+    ) -> anyhow::Result<&IndexEntry> {
+        match self.indexes.get(index_uid) {
+            Some(entry) if entry.doc_mappers.contains_key(doc_mapping_uid) => {}
+            _ => self.fetch_index_config(index_uid, doc_mapping_uid).await?,
+        }
+        Ok(&self.indexes[index_uid])
+    }
+
+    pub async fn get_for_split(&mut self, split: &SplitMetadata) -> anyhow::Result<&IndexEntry> {
+        self.get_or_fetch(&split.index_uid, &split.doc_mapping_uid)
+            .await
+    }
+
+    pub fn get(&self, index_uid: &IndexUid) -> Option<&IndexEntry> {
+        self.indexes.get(index_uid)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use quickwit_metastore::{IndexMetadata, IndexMetadataResponseExt};
+    use quickwit_proto::metastore::{IndexMetadataResponse, MetastoreError, MockMetastoreService};
+
+    use super::*;
+
+    fn test_index_metadata() -> IndexMetadata {
+        IndexMetadata::for_test("test-index", "ram:///test-index")
+    }
+
+    fn test_index_metadata_response(index_metadata: &IndexMetadata) -> IndexMetadataResponse {
+        IndexMetadataResponse::try_from_index_metadata(index_metadata).unwrap()
+    }
+
+    #[tokio::test]
+    async fn test_get_or_fetch_loads_and_caches() {
+        let index_metadata = test_index_metadata();
+        let response = test_index_metadata_response(&index_metadata);
+        let index_uid = index_metadata.index_uid.clone();
+        let doc_mapping_uid = DocMappingUid::default();
+
+        let mut mock = MockMetastoreService::new();
+        mock.expect_index_metadata()
+            .times(1)
+            .returning(move |_| Ok(response.clone()));
+
+        let mut store = IndexConfigMetastore::new(MetastoreServiceClient::from_mock(mock));
+
+        // First call fetches from metastore.
+        let entry = store
+            .get_or_fetch(&index_uid, &doc_mapping_uid)
+            .await
+            .unwrap();
+        assert!(!entry.doc_mapping_json().is_empty());
+        assert!(!entry.search_settings_json().is_empty());
+        assert!(!entry.indexing_settings_json().is_empty());
+        assert!(!entry.index_storage_uri().is_empty());
+
+        // Second call hits cache (times(1) would panic otherwise).
+        store
+            .get_or_fetch(&index_uid, &doc_mapping_uid)
+            .await
+            .unwrap();
+    }
+
+    #[tokio::test]
+    async fn test_get_or_fetch_metastore_error() {
+        let mut mock = MockMetastoreService::new();
+        mock.expect_index_metadata().returning(|_| {
+            Err(MetastoreError::Internal {
+                message: "test error".to_string(),
+                cause: String::new(),
+            })
+        });
+
+        let mut store = IndexConfigMetastore::new(MetastoreServiceClient::from_mock(mock));
+        let result = store
+            .get_or_fetch(&IndexUid::for_test("missing", 0), &DocMappingUid::default())
+            .await;
+        assert!(result.is_err());
+    }
+
+    #[tokio::test]
+    async fn test_get_returns_none_before_fetch() {
+        let mock = MockMetastoreService::new();
+        let store = IndexConfigMetastore::new(MetastoreServiceClient::from_mock(mock));
+        assert!(store.get(&IndexUid::for_test("test-index", 0)).is_none());
+    }
+
+    #[tokio::test]
+    async fn test_get_returns_some_after_fetch() {
+        let index_metadata = test_index_metadata();
+        let response = test_index_metadata_response(&index_metadata);
+        let index_uid = index_metadata.index_uid.clone();
+
+        let mut mock = MockMetastoreService::new();
+        mock.expect_index_metadata()
+            .returning(move |_| Ok(response.clone()));
+
+        let mut store = IndexConfigMetastore::new(MetastoreServiceClient::from_mock(mock));
+        store
+            .get_or_fetch(&index_uid, &DocMappingUid::default())
+            .await
+            .unwrap();
+
+        assert!(store.get(&index_uid).is_some());
+    }
+
+    #[tokio::test]
+    async fn test_get_for_split() {
+        let index_metadata = test_index_metadata();
+        let response = test_index_metadata_response(&index_metadata);
+        let index_uid = index_metadata.index_uid.clone();
+
+        let mut mock = MockMetastoreService::new();
+        mock.expect_index_metadata()
+            .returning(move |_| Ok(response.clone()));
+
+        let mut store = IndexConfigMetastore::new(MetastoreServiceClient::from_mock(mock));
+        let split = SplitMetadata {
+            split_id: "split-1".to_string(),
+            index_uid: index_uid.clone(),
+            ..Default::default()
+        };
+
+        let entry = store.get_for_split(&split).await.unwrap();
+        assert!(!entry.index_storage_uri().is_empty());
+    }
+
+    #[tokio::test]
+    async fn test_is_split_mature() {
+        let index_metadata = test_index_metadata();
+        let response = test_index_metadata_response(&index_metadata);
+        let index_uid = index_metadata.index_uid.clone();
+
+        let mut mock = MockMetastoreService::new();
+        mock.expect_index_metadata()
+            .returning(move |_| Ok(response.clone()));
+
+        let mut store = IndexConfigMetastore::new(MetastoreServiceClient::from_mock(mock));
+        let entry = store
+            .get_or_fetch(&index_uid, &DocMappingUid::default())
+            .await
+            .unwrap();
+
+        let small_split = SplitMetadata {
+            num_docs: 100,
+            num_merge_ops: 0,
+            ..Default::default()
+        };
+        assert!(!entry.is_split_mature(&small_split));
+
+        let large_split = SplitMetadata {
+            num_docs: 20_000_000,
+            num_merge_ops: 0,
+            ..Default::default()
+        };
+        assert!(entry.is_split_mature(&large_split));
+    }
+
+    #[tokio::test]
+    async fn test_retention_policy_json_empty_when_none() {
+        let index_metadata = test_index_metadata();
+        let response = test_index_metadata_response(&index_metadata);
+        let index_uid = index_metadata.index_uid.clone();
+
+        let mut mock = MockMetastoreService::new();
+        mock.expect_index_metadata()
+            .returning(move |_| Ok(response.clone()));
+
+        let mut store = IndexConfigMetastore::new(MetastoreServiceClient::from_mock(mock));
+        let entry = store
+            .get_or_fetch(&index_uid, &DocMappingUid::default())
+            .await
+            .unwrap();
+
+        // Default test index has no retention policy.
+        assert!(entry.retention_policy_json().is_empty());
+    }
+}
diff --git a/quickwit/quickwit-compaction/src/planner/metrics.rs b/quickwit/quickwit-compaction/src/planner/metrics.rs
new file mode 100644
index 00000000000..2333434b8df
--- /dev/null
+++ b/quickwit/quickwit-compaction/src/planner/metrics.rs
@@ -0,0 +1,49 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use quickwit_metrics::{LabelNames, LazyCounter, LazyGauge, label_names, lazy_counter, lazy_gauge};
+
+pub(crate) const SOURCE_UID: LabelNames<1> = label_names!("source_uid");
+pub(crate) const SOURCE_UID_MERGE_LEVEL: LabelNames<2> = label_names!("source_uid", "merge_level");
+pub(crate) const OPERATION: LabelNames<1> = label_names!("operation");
+
+pub(crate) static NEW_SPLITS_SCANNED: LazyCounter = lazy_counter!(
+    name: "new_splits_scanned",
+    description: "cumulative number of immature splits scanned from the metastore",
+    subsystem: "compaction_planner",
+);
+
+pub(crate) static SPLITS_NEEDING_COMPACTION: LazyGauge = lazy_gauge!(
+    name: "splits_needing_compaction",
+    description: "number of splits currently tracked as needing compaction",
+    subsystem: "compaction_planner",
+);
+
+pub(crate) static PENDING_MERGE_OPERATIONS: LazyGauge = lazy_gauge!(
+    name: "pending_merge_operations",
+    description: "number of pending merge operations awaiting assignment",
+    subsystem: "compaction_planner",
+);
+
+pub(crate) static TIMED_OUT_OPERATIONS: LazyCounter = lazy_counter!(
+    name: "timed_out_operations",
+    description: "cumulative number of merge operations that timed out waiting for a worker heartbeat",
+    subsystem: "compaction_planner",
+);
+
+pub(crate) static METASTORE_ERRORS: LazyCounter = lazy_counter!(
+    name: "metastore_errors",
+    description: "cumulative number of metastore errors encountered by the compaction planner",
+    subsystem: "compaction_planner",
+);
diff --git a/quickwit/quickwit-compaction/src/planner/mod.rs b/quickwit/quickwit-compaction/src/planner/mod.rs
new file mode 100644
index 00000000000..0cc7c3132b9
--- /dev/null
+++ b/quickwit/quickwit-compaction/src/planner/mod.rs
@@ -0,0 +1,138 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+mod compaction_planner;
+mod compaction_state;
+mod index_config_metastore;
+pub(crate) mod metrics;
+
+use std::cmp::Ordering;
+use std::collections::BinaryHeap;
+
+pub use compaction_planner::CompactionPlanner;
+use quickwit_indexing::merge_policy::{MergeOperation, compute_merge_score};
+use quickwit_metrics::{gauge, label_values};
+use quickwit_proto::types::{IndexUid, SourceId};
+
+use crate::planner::metrics::{PENDING_MERGE_OPERATIONS, SOURCE_UID_MERGE_LEVEL};
+use crate::source_uid_metrics_label;
+
+/// A `MergeOperation` waiting to be assigned, with `priority_score` used to order the heap.
+#[derive(Debug)]
+pub(super) struct PendingMerge {
+    pub(super) operation: MergeOperation,
+    pub(super) index_uid: IndexUid,
+    pub(super) source_id: SourceId,
+    pub(super) priority_score: u64,
+}
+
+impl PendingMerge {
+    pub(super) fn new(operation: MergeOperation) -> Self {
+        let first_split = operation
+            .splits
+            .first()
+            .expect("merge operation must have splits");
+        let index_uid = first_split.index_uid.clone();
+        let source_id = first_split.source_id.clone();
+        let total_num_bytes: u64 = operation
+            .splits
+            .iter()
+            .map(|split| split.footer_offsets.end)
+            .sum();
+        let priority_score = compute_merge_score(operation.splits.len(), total_num_bytes);
+        Self {
+            operation,
+            index_uid,
+            source_id,
+            priority_score,
+        }
+    }
+}
+
+impl PartialEq for PendingMerge {
+    fn eq(&self, other: &Self) -> bool {
+        self.operation.merge_split_id == other.operation.merge_split_id
+    }
+}
+
+impl Eq for PendingMerge {}
+
+impl PartialOrd for PendingMerge {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for PendingMerge {
+    /// Higher `priority_score` sorts first (max-heap); ties broken by
+    /// `merge_split_id` so `Ord::cmp == Equal` agrees with `PartialEq`.
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.priority_score
+            .cmp(&other.priority_score)
+            .then_with(|| {
+                self.operation
+                    .merge_split_id
+                    .cmp(&other.operation.merge_split_id)
+            })
+    }
+}
+
+/// Max-heap of pending merges awaiting assignment, ordered by
+/// `PendingMerge`'s priority score. The `pending_merge_operations` gauge is
+/// maintained inline; push/pop are the only mutation paths so the metric
+/// stays consistent with `len()`.
+#[derive(Debug)]
+pub(super) struct PendingOperations {
+    inner: BinaryHeap<PendingMerge>,
+}
+
+impl PendingOperations {
+    pub(super) fn new() -> Self {
+        Self {
+            inner: BinaryHeap::new(),
+        }
+    }
+
+    pub(super) fn push(&mut self, pending: PendingMerge) {
+        Self::adjust_gauge(&pending, 1);
+        self.inner.push(pending);
+    }
+
+    pub(super) fn pop(&mut self) -> Option<PendingMerge> {
+        let pending = self.inner.pop()?;
+        Self::adjust_gauge(&pending, -1);
+        Some(pending)
+    }
+
+    pub(super) fn len(&self) -> usize {
+        self.inner.len()
+    }
+
+    #[cfg(test)]
+    fn is_empty(&self) -> bool {
+        self.inner.is_empty()
+    }
+
+    #[cfg(test)]
+    fn iter(&self) -> impl Iterator<Item = &PendingMerge> {
+        self.inner.iter()
+    }
+
+    fn adjust_gauge(pending: &PendingMerge, delta: i64) {
+        let source_uid_label = source_uid_metrics_label(&pending.index_uid, &pending.source_id);
+        let merge_level = pending.operation.merge_level().to_string();
+        let labels = label_values!(SOURCE_UID_MERGE_LEVEL => source_uid_label, merge_level);
+        gauge!(parent: PENDING_MERGE_OPERATIONS, labels: [labels]).inc_by(delta as f64);
+    }
+}
diff --git a/quickwit/quickwit-config/src/lib.rs b/quickwit/quickwit-config/src/lib.rs
index b10afbc8b0b..330c3e9fe8c 100644
--- a/quickwit/quickwit-config/src/lib.rs
+++ b/quickwit/quickwit-config/src/lib.rs
@@ -74,9 +74,9 @@ pub use crate::metastore_config::{
     MetastoreBackend, MetastoreConfig, MetastoreConfigs, PostgresMetastoreConfig,
 };
 pub use crate::node_config::{
-    CacheConfig, CachePolicy, DEFAULT_QW_CONFIG_PATH, GrpcConfig, IndexerConfig, IngestApiConfig,
-    JaegerConfig, KeepAliveConfig, LambdaConfig, LambdaDeployConfig, NodeConfig, RestConfig,
-    SearcherConfig, SplitCacheLimits, StorageTimeoutPolicy, TlsConfig,
+    CacheConfig, CachePolicy, CompactorConfig, DEFAULT_QW_CONFIG_PATH, GrpcConfig, IndexerConfig,
+    IngestApiConfig, JaegerConfig, KeepAliveConfig, LambdaConfig, LambdaDeployConfig, NodeConfig,
+    RestConfig, SearcherConfig, SplitCacheLimits, StorageTimeoutPolicy, TlsConfig,
 };
 use crate::source_config::serialize::{SourceConfigV0_7, SourceConfigV0_8, VersionedSourceConfig};
 pub use crate::storage_config::{
diff --git a/quickwit/quickwit-config/src/node_config/mod.rs b/quickwit/quickwit-config/src/node_config/mod.rs
index fff19960e57..a66d9ae714a 100644
--- a/quickwit/quickwit-config/src/node_config/mod.rs
+++ b/quickwit/quickwit-config/src/node_config/mod.rs
@@ -163,6 +163,11 @@ pub struct IndexerConfig {
     pub enable_cooperative_indexing: bool,
     #[serde(default = "IndexerConfig::default_cpu_capacity")]
     pub cpu_capacity: CpuCapacity,
+    /// When true, the compactor service is not implicitly started on indexer
+    /// nodes. Dedicated compactor nodes must be deployed separately.
+    /// When false (default), every indexer node also runs the compactor.
+    #[serde(default = "IndexerConfig::default_enable_standalone_compactors")]
+    pub enable_standalone_compactors: bool,
     /// If true, run Parquet merges through the streaming column-major engine
     /// (`execute_merge_operation`). If false (default), use the in-memory
     /// `merge_sorted_parquet_files` engine. The legacy in-memory engine is
@@ -211,6 +216,10 @@ impl IndexerConfig {
         CpuCapacity::one_cpu_thread() * (quickwit_common::num_cpus() as u32)
     }
 
+    fn default_enable_standalone_compactors() -> bool {
+        false
+    }
+
     fn default_parquet_merge_use_streaming_engine() -> bool {
         false
     }
@@ -227,6 +236,7 @@ impl IndexerConfig {
             cpu_capacity: PIPELINE_FULL_CAPACITY * 4u32,
             max_merge_write_throughput: None,
             merge_concurrency: NonZeroUsize::new(3).unwrap(),
+            enable_standalone_compactors: false,
             parquet_merge_use_streaming_engine: Self::default_parquet_merge_use_streaming_engine(),
         };
         Ok(indexer_config)
@@ -244,11 +254,67 @@ impl Default for IndexerConfig {
             cpu_capacity: Self::default_cpu_capacity(),
             merge_concurrency: Self::default_merge_concurrency(),
             max_merge_write_throughput: None,
+            enable_standalone_compactors: Self::default_enable_standalone_compactors(),
             parquet_merge_use_streaming_engine: Self::default_parquet_merge_use_streaming_engine(),
         }
     }
 }
 
+#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
+#[serde(deny_unknown_fields)]
+pub struct CompactorConfig {
+    /// Maximum number of concurrent merges, which hold the CPU for
+    /// a long time. Defaults to `num_cpus - 1`.
+    #[serde(default = "CompactorConfig::default_max_concurrent_merge_executions")]
+    pub max_concurrent_merge_executions: NonZeroUsize,
+    /// Number of pipelines to run per merge executions. Scalar. Since merges perform a lot
+    /// of IO, multiple concurrent merges can be interleaved.
+    #[serde(default = "CompactorConfig::default_pipeline_slots_per_merge_execution")]
+    pub pipeline_slots_per_merge_execution: NonZeroUsize,
+    /// Maximum number of concurrent split uploads across all pipelines.
+    #[serde(default = "CompactorConfig::default_max_concurrent_split_uploads")]
+    pub max_concurrent_split_uploads: usize,
+    /// Limits the IO throughput of the split downloader and the merge executor.
+    #[serde(default)]
+    pub max_merge_write_throughput: Option<ByteSize>,
+}
+
+impl CompactorConfig {
+    fn default_max_concurrent_merge_executions() -> NonZeroUsize {
+        let cpus = quickwit_common::num_cpus().saturating_sub(1);
+        NonZeroUsize::new(cpus).unwrap_or(NonZeroUsize::MIN)
+    }
+
+    fn default_pipeline_slots_per_merge_execution() -> NonZeroUsize {
+        NonZeroUsize::new(2).unwrap()
+    }
+
+    fn default_max_concurrent_split_uploads() -> usize {
+        12
+    }
+
+    #[cfg(any(test, feature = "testsuite"))]
+    pub fn for_test() -> Self {
+        CompactorConfig {
+            max_concurrent_merge_executions: NonZeroUsize::new(2).unwrap(),
+            pipeline_slots_per_merge_execution: Self::default_pipeline_slots_per_merge_execution(),
+            max_concurrent_split_uploads: 4,
+            max_merge_write_throughput: None,
+        }
+    }
+}
+
+impl Default for CompactorConfig {
+    fn default() -> Self {
+        Self {
+            max_concurrent_merge_executions: Self::default_max_concurrent_merge_executions(),
+            pipeline_slots_per_merge_execution: Self::default_pipeline_slots_per_merge_execution(),
+            max_concurrent_split_uploads: Self::default_max_concurrent_split_uploads(),
+            max_merge_write_throughput: None,
+        }
+    }
+}
+
 #[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
 #[serde(deny_unknown_fields)]
 pub struct SplitCacheLimits {
@@ -794,6 +860,7 @@ pub struct NodeConfig {
     pub searcher_config: SearcherConfig,
     pub ingest_api_config: IngestApiConfig,
     pub jaeger_config: JaegerConfig,
+    pub compactor_config: CompactorConfig,
 }
 
 impl NodeConfig {
@@ -892,23 +959,6 @@ mod tests {
                 "1500m"
             );
         }
-        {
-            let indexer_config: IndexerConfig =
-                serde_yaml::from_str(r#"merge_concurrency: 5"#).unwrap();
-            assert_eq!(
-                indexer_config.merge_concurrency,
-                NonZeroUsize::new(5).unwrap()
-            );
-            let indexer_config_json = serde_json::to_value(&indexer_config).unwrap();
-            assert_eq!(
-                indexer_config_json
-                    .get("merge_concurrency")
-                    .unwrap()
-                    .as_u64()
-                    .unwrap(),
-                5
-            );
-        }
         {
             let indexer_config: IndexerConfig =
                 serde_yaml::from_str(r#"cpu_capacity: 1500m"#).unwrap();
diff --git a/quickwit/quickwit-config/src/node_config/serialize.rs b/quickwit/quickwit-config/src/node_config/serialize.rs
index c7740e7146e..5830c292d47 100644
--- a/quickwit/quickwit-config/src/node_config/serialize.rs
+++ b/quickwit/quickwit-config/src/node_config/serialize.rs
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
 use std::net::{IpAddr, SocketAddr};
 use std::str::FromStr;
 use std::time::Duration;
@@ -35,8 +35,8 @@ use crate::service::QuickwitService;
 use crate::storage_config::StorageConfigs;
 use crate::templating::render_config;
 use crate::{
-    ConfigFormat, IndexerConfig, IngestApiConfig, JaegerConfig, MetastoreConfigs, NodeConfig,
-    SearcherConfig, TlsConfig, validate_identifier, validate_node_id,
+    CompactorConfig, ConfigFormat, IndexerConfig, IngestApiConfig, JaegerConfig, MetastoreConfigs,
+    NodeConfig, SearcherConfig, TlsConfig, validate_identifier, validate_node_id,
 };
 
 pub const DEFAULT_CLUSTER_ID: &str = "quickwit-default-cluster";
@@ -87,9 +87,12 @@ impl FromStr for List {
 }
 
 fn default_enabled_services() -> ConfigValue<List, QW_ENABLED_SERVICES> {
+    // The compactor is excluded by default — it only runs in standalone mode,
+    // which an operator must explicitly opt into via `enable_standalone_compactors`.
     ConfigValue::with_default(List(
         QuickwitService::supported_services()
             .into_iter()
+            .filter(|service| *service != QuickwitService::Compactor)
             .map(|service| service.to_string())
             .collect(),
     ))
@@ -192,6 +195,8 @@ struct NodeConfigBuilder {
     data_dir_uri: ConfigValue<Uri, QW_DATA_DIR>,
     metastore_uri: ConfigValue<Uri, QW_METASTORE_URI>,
     default_index_root_uri: ConfigValue<Uri, QW_DEFAULT_INDEX_ROOT_URI>,
+    #[serde(default)]
+    enable_standalone_compactors: ConfigValue<bool, QW_ENABLE_STANDALONE_COMPACTORS>,
     #[serde(rename = "rest")]
     #[serde(default)]
     rest_config_builder: RestConfigBuilder,
@@ -216,6 +221,9 @@ struct NodeConfigBuilder {
     #[serde(rename = "jaeger")]
     #[serde(default)]
     jaeger_config: JaegerConfig,
+    #[serde(rename = "compactor")]
+    #[serde(default)]
+    compactor_config: CompactorConfig,
 }
 
 impl NodeConfigBuilder {
@@ -226,7 +234,10 @@ impl NodeConfigBuilder {
         let node_id = self.node_id.resolve(env_vars).map(NodeId::new)?;
         let availability_zone = self.availability_zone.resolve_optional(env_vars)?;
 
-        let enabled_services = self
+        self.indexer_config.enable_standalone_compactors =
+            self.enable_standalone_compactors.resolve(env_vars)?;
+
+        let enabled_services: HashSet<QuickwitService> = self
             .enabled_services
             .resolve(env_vars)?
             .0
@@ -331,6 +342,7 @@ impl NodeConfigBuilder {
             searcher_config: self.searcher_config,
             ingest_api_config: self.ingest_api_config,
             jaeger_config: self.jaeger_config,
+            compactor_config: self.compactor_config,
         };
 
         validate(&node_config)?;
@@ -348,6 +360,15 @@ fn validate(node_config: &NodeConfig) -> anyhow::Result<()> {
     if node_config.peer_seeds.is_empty() {
         warn!("peer seeds are empty");
     }
+    if node_config.is_service_enabled(QuickwitService::Compactor)
+        && !node_config.indexer_config.enable_standalone_compactors
+    {
+        bail!(
+            "the `compactor` service can only be enabled when `enable_standalone_compactors` is \
+             true (or `QW_ENABLE_STANDALONE_COMPACTORS=true`). With the default indexer-local \
+             merge pipeline, the compactor service must not be enabled."
+        );
+    }
     validate_disk_usage(node_config);
     Ok(())
 }
@@ -421,6 +442,7 @@ impl Default for NodeConfigBuilder {
             data_dir_uri: default_data_dir_uri(),
             metastore_uri: ConfigValue::none(),
             default_index_root_uri: ConfigValue::none(),
+            enable_standalone_compactors: Default::default(),
             rest_config_builder: RestConfigBuilder::default(),
             grpc_config: GrpcConfig::default(),
             storage_configs: StorageConfigs::default(),
@@ -429,6 +451,7 @@ impl Default for NodeConfigBuilder {
             searcher_config: SearcherConfig::default(),
             ingest_api_config: IngestApiConfig::default(),
             jaeger_config: JaegerConfig::default(),
+            compactor_config: CompactorConfig::default(),
         }
     }
 }
@@ -528,6 +551,7 @@ pub fn node_config_for_tests_from_ports(
         searcher_config: SearcherConfig::default(),
         ingest_api_config: IngestApiConfig::default(),
         jaeger_config: JaegerConfig::default(),
+        compactor_config: CompactorConfig::default(),
     }
 }
 
@@ -657,6 +681,7 @@ mod tests {
                 cpu_capacity: IndexerConfig::default_cpu_capacity(),
                 enable_cooperative_indexing: false,
                 max_merge_write_throughput: Some(ByteSize::mb(100)),
+                enable_standalone_compactors: false,
                 parquet_merge_use_streaming_engine: true,
             }
         );
@@ -768,6 +793,9 @@ mod tests {
         assert_eq!(
             config.enabled_services,
             QuickwitService::supported_services()
+                .into_iter()
+                .filter(|service| *service != QuickwitService::Compactor)
+                .collect::<HashSet<_>>(),
         );
         assert_eq!(
             config.rest_config.listen_addr,
@@ -822,6 +850,10 @@ mod tests {
             "test-peer-seed-0,test-peer-seed-1".to_string(),
         );
         env_vars.insert("QW_DATA_DIR".to_string(), "test-data-dir".to_string());
+        env_vars.insert(
+            "QW_ENABLE_STANDALONE_COMPACTORS".to_string(),
+            "true".to_string(),
+        );
         env_vars.insert(
             "QW_METASTORE_URI".to_string(),
             "postgresql://test-user:test-password@test-host:4321/test-db".to_string(),
@@ -1355,4 +1387,48 @@ mod tests {
         .to_string();
         assert!(error_message.contains("replication factor"));
     }
+
+    #[tokio::test]
+    async fn test_standalone_compactors_prevents_implicit_compactor() {
+        let config_yaml = r#"
+            version: 0.8
+            enabled_services: [indexer]
+        "#;
+        let env_vars = HashMap::from([(
+            "QW_ENABLE_STANDALONE_COMPACTORS".to_string(),
+            "true".to_string(),
+        )]);
+        let config =
+            load_node_config_with_env(ConfigFormat::Yaml, config_yaml.as_bytes(), &env_vars)
+                .await
+                .unwrap();
+        assert!(config.enabled_services.contains(&QuickwitService::Indexer));
+        assert!(
+            !config
+                .enabled_services
+                .contains(&QuickwitService::Compactor)
+        );
+    }
+
+    #[tokio::test]
+    async fn test_standalone_compactors_env_var_override() {
+        let env_vars = HashMap::from([(
+            "QW_ENABLE_STANDALONE_COMPACTORS".to_string(),
+            "true".to_string(),
+        )]);
+        let config_yaml = r#"
+            version: 0.8
+            enabled_services: [indexer]
+        "#;
+        let config =
+            load_node_config_with_env(ConfigFormat::Yaml, config_yaml.as_bytes(), &env_vars)
+                .await
+                .unwrap();
+        assert!(config.enabled_services.contains(&QuickwitService::Indexer));
+        assert!(
+            !config
+                .enabled_services
+                .contains(&QuickwitService::Compactor)
+        );
+    }
 }
diff --git a/quickwit/quickwit-config/src/qw_env_vars.rs b/quickwit/quickwit-config/src/qw_env_vars.rs
index 75f891d332c..4f69448d26c 100644
--- a/quickwit/quickwit-config/src/qw_env_vars.rs
+++ b/quickwit/quickwit-config/src/qw_env_vars.rs
@@ -55,7 +55,8 @@ qw_env_vars!(
     QW_PEER_SEEDS,
     QW_DATA_DIR,
     QW_METASTORE_URI,
-    QW_DEFAULT_INDEX_ROOT_URI
+    QW_DEFAULT_INDEX_ROOT_URI,
+    QW_ENABLE_STANDALONE_COMPACTORS
 );
 
 #[cfg(test)]
diff --git a/quickwit/quickwit-config/src/service.rs b/quickwit/quickwit-config/src/service.rs
index d323331b510..0382231330d 100644
--- a/quickwit/quickwit-config/src/service.rs
+++ b/quickwit/quickwit-config/src/service.rs
@@ -29,6 +29,7 @@ pub enum QuickwitService {
     Searcher,
     Janitor,
     Metastore,
+    Compactor,
 }
 
 #[allow(clippy::from_over_into)]
@@ -46,6 +47,7 @@ impl QuickwitService {
             QuickwitService::Searcher => "searcher",
             QuickwitService::Janitor => "janitor",
             QuickwitService::Metastore => "metastore",
+            QuickwitService::Compactor => "compactor",
         }
     }
 
@@ -70,6 +72,7 @@ impl FromStr for QuickwitService {
             "searcher" => Ok(QuickwitService::Searcher),
             "janitor" => Ok(QuickwitService::Janitor),
             "metastore" => Ok(QuickwitService::Metastore),
+            "compactor" => Ok(QuickwitService::Compactor),
             _ => {
                 bail!(
                     "failed to parse service `{service_str}`. supported services are: `{}`",
diff --git a/quickwit/quickwit-indexing/failpoints/mod.rs b/quickwit/quickwit-indexing/failpoints/mod.rs
index d8c5ab0e418..25b6522851b 100644
--- a/quickwit/quickwit-indexing/failpoints/mod.rs
+++ b/quickwit/quickwit-indexing/failpoints/mod.rs
@@ -285,9 +285,10 @@ async fn test_merge_executor_controlled_directory_kill_switch() -> anyhow::Resul
         tantivy_dirs.push(get_tantivy_directory_from_split_bundle(&dest_filepath).unwrap());
     }
     let merge_operation = MergeOperation::new_merge_operation(split_metadatas);
-    let merge_task = MergeTask::from_merge_operation_for_test(merge_operation);
+    let merge_task = MergeTask::from_merge_operation_for_test(merge_operation.clone());
     let merge_scratch = MergeScratch {
-        merge_task,
+        merge_operation,
+        merge_task: Some(merge_task),
         merge_scratch_directory,
         downloaded_splits_directory,
         tantivy_dirs,
@@ -307,6 +308,7 @@ async fn test_merge_executor_controlled_directory_kill_switch() -> anyhow::Resul
         doc_mapper,
         io_controls,
         merge_packager_mailbox,
+        None,
     );
 
     let (merge_executor_mailbox, _merge_executor_handle) =
diff --git a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs
index 686bd942495..b35bb1901bc 100644
--- a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs
+++ b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs
@@ -304,7 +304,7 @@ impl IndexingPipeline {
             super::PUBLISHER_NAME,
             QueueCapacity::Bounded(1),
             self.params.metastore.clone(),
-            Some(self.params.merge_planner_mailbox.clone()),
+            self.params.merge_planner_mailbox_opt.clone(),
             Some(source_mailbox.clone()),
         );
         let (publisher_mailbox, publisher_handle) = ctx
@@ -530,7 +530,7 @@ pub struct IndexingPipelineParams {
     // Merge-related parameters
     pub merge_policy: Arc<dyn MergePolicy>,
     pub retention_policy: Option<RetentionPolicy>,
-    pub merge_planner_mailbox: Mailbox<MergePlanner>,
+    pub merge_planner_mailbox_opt: Option<Mailbox<MergePlanner>>,
     pub max_concurrent_split_uploads_merge: usize,
 
     // Source-related parameters
@@ -668,7 +668,7 @@ mod tests {
             max_concurrent_split_uploads_index: 4,
             max_concurrent_split_uploads_merge: 5,
             cooperative_indexing_permits: None,
-            merge_planner_mailbox,
+            merge_planner_mailbox_opt: Some(merge_planner_mailbox),
             event_broker: EventBroker::default(),
             params_fingerprint: 42u64,
         };
@@ -773,8 +773,8 @@ mod tests {
 
         let universe = Universe::new();
         let storage = Arc::new(RamStorage::default());
-        let split_store = IndexingSplitStore::create_without_local_store_for_test(storage.clone());
         let (merge_planner_mailbox, _) = universe.create_test_mailbox();
+        let split_store = IndexingSplitStore::create_without_local_store_for_test(storage.clone());
         let pipeline_params = IndexingPipelineParams {
             pipeline_id,
             doc_mapper: Arc::new(default_doc_mapper_for_test()),
@@ -792,7 +792,7 @@ mod tests {
             max_concurrent_split_uploads_index: 4,
             max_concurrent_split_uploads_merge: 5,
             cooperative_indexing_permits: None,
-            merge_planner_mailbox,
+            merge_planner_mailbox_opt: Some(merge_planner_mailbox),
             event_broker: Default::default(),
             params_fingerprint: 42u64,
         };
@@ -816,7 +816,6 @@ mod tests {
     async fn test_indexing_pipeline_simple_gz() -> anyhow::Result<()> {
         indexing_pipeline_simple("data/test_corpus.json.gz").await
     }
-
     #[tokio::test]
     async fn test_merge_pipeline_does_not_stop_on_indexing_pipeline_failure() {
         let node_id = NodeId::from("test-node");
@@ -893,7 +892,7 @@ mod tests {
             max_concurrent_split_uploads_index: 4,
             max_concurrent_split_uploads_merge: 5,
             cooperative_indexing_permits: None,
-            merge_planner_mailbox: merge_planner_mailbox.clone(),
+            merge_planner_mailbox_opt: Some(merge_planner_mailbox.clone()),
             event_broker: Default::default(),
             params_fingerprint: 42u64,
         };
@@ -1021,7 +1020,7 @@ mod tests {
             max_concurrent_split_uploads_index: 4,
             max_concurrent_split_uploads_merge: 5,
             cooperative_indexing_permits: None,
-            merge_planner_mailbox,
+            merge_planner_mailbox_opt: Some(merge_planner_mailbox),
             params_fingerprint: 42u64,
             event_broker: Default::default(),
         };
diff --git a/quickwit/quickwit-indexing/src/actors/indexing_service.rs b/quickwit/quickwit-indexing/src/actors/indexing_service.rs
index 4453ffbfe00..8fe95f79cf7 100644
--- a/quickwit/quickwit-indexing/src/actors/indexing_service.rs
+++ b/quickwit/quickwit-indexing/src/actors/indexing_service.rs
@@ -26,8 +26,6 @@ use quickwit_actors::{
     Observation,
 };
 use quickwit_cluster::Cluster;
-use quickwit_common::fs::get_cache_directory_path;
-use quickwit_common::io::Limiter;
 use quickwit_common::pubsub::EventBroker;
 use quickwit_common::{io, temp_dir};
 use quickwit_config::{
@@ -64,7 +62,7 @@ use super::pipeline_shared::{ActorPipeline, PipelineHandle};
 use super::{FinishPendingMergesAndShutdownPipeline, MergePlanner, MergeSchedulerService};
 use crate::models::{DetachIndexingPipeline, DetachMergePipeline, ObservePipeline, SpawnPipeline};
 use crate::source::{AssignShards, Assignment};
-use crate::split_store::{IndexingSplitCache, SplitStoreQuota};
+use crate::split_store::IndexingSplitCache;
 use crate::{IndexingPipeline, IndexingPipelineParams, IndexingSplitStore, IndexingStatistics};
 
 /// Name of the indexing directory, usually located at `<data_dir_path>/indexing`.
@@ -106,13 +104,13 @@ pub struct IndexingService {
     cluster: Cluster,
     pub(crate) metastore: MetastoreServiceClient,
     ingest_api_service_opt: Option<Mailbox<IngestApiService>>,
-    merge_scheduler_service: Mailbox<MergeSchedulerService>,
     pub(crate) ingester_pool: IngesterPool,
     pub(crate) storage_resolver: StorageResolver,
     indexing_pipelines: HashMap<PipelineUid, BoxedPipelineHandle>,
     counters: IndexingServiceCounters,
-    local_split_store: Arc<IndexingSplitCache>,
     pub(crate) max_concurrent_split_uploads: usize,
+    merge_scheduler_service_opt: Option<Mailbox<MergeSchedulerService>>,
+    split_cache: Arc<IndexingSplitCache>,
     /// Cached from `IndexerConfig`. Selects whether new Parquet merge
     /// pipelines route regular merges through the streaming engine or
     /// the in-memory fallback. Promotion merges always use the
@@ -123,7 +121,7 @@ pub struct IndexingService {
     #[cfg(feature = "metrics")]
     parquet_merge_pipeline_handles: HashMap<IndexUid, ParquetMergePipelineHandle>,
     cooperative_indexing_permits: Option<Arc<Semaphore>>,
-    merge_io_throughput_limiter_opt: Option<Limiter>,
+    merge_io_throughput_limiter_opt: Option<io::Limiter>,
     pub(crate) event_broker: EventBroker,
 }
 
@@ -148,20 +146,14 @@ impl IndexingService {
         cluster: Cluster,
         metastore: MetastoreServiceClient,
         ingest_api_service_opt: Option<Mailbox<IngestApiService>>,
-        merge_scheduler_service: Mailbox<MergeSchedulerService>,
+        merge_scheduler_service_opt: Option<Mailbox<MergeSchedulerService>>,
         ingester_pool: IngesterPool,
         storage_resolver: StorageResolver,
         event_broker: EventBroker,
+        split_cache: Arc<IndexingSplitCache>,
     ) -> anyhow::Result<IndexingService> {
-        let split_store_space_quota = SplitStoreQuota::try_new(
-            indexer_config.split_store_max_num_splits,
-            indexer_config.split_store_max_num_bytes,
-        )?;
         let merge_io_throughput_limiter_opt =
             indexer_config.max_merge_write_throughput.map(io::limiter);
-        let split_cache_dir_path = get_cache_directory_path(&data_dir_path);
-        let local_split_store =
-            IndexingSplitCache::open(split_cache_dir_path, split_store_space_quota).await?;
         let indexing_root_directory =
             temp_dir::create_or_purge_directory(&data_dir_path.join(INDEXING_DIR_NAME)).await?;
         let queue_dir_path = data_dir_path.join(QUEUES_DIR_NAME);
@@ -177,10 +169,10 @@ impl IndexingService {
             cluster,
             metastore,
             ingest_api_service_opt,
-            merge_scheduler_service,
+            merge_scheduler_service_opt,
             ingester_pool,
             storage_resolver,
-            local_split_store: Arc::new(local_split_store),
+            split_cache,
             indexing_pipelines: Default::default(),
             counters: Default::default(),
             max_concurrent_split_uploads: indexer_config.max_concurrent_split_uploads,
@@ -365,32 +357,43 @@ impl IndexingService {
         let merge_policy =
             crate::merge_policy::merge_policy_from_settings(&index_config.indexing_settings);
         let retention_policy = index_config.retention_policy_opt.clone();
-        let split_store = IndexingSplitStore::new(storage.clone(), self.local_split_store.clone());
+        let split_store = IndexingSplitStore::new(storage.clone(), self.split_cache.clone());
 
         let doc_mapper = build_doc_mapper(&index_config.doc_mapping, &index_config.search_settings)
             .map_err(|error| IndexingError::Internal(error.to_string()))?;
 
-        let merge_pipeline_id = indexing_pipeline_id.merge_pipeline_id();
-        let merge_pipeline_params = MergePipelineParams {
-            pipeline_id: merge_pipeline_id.clone(),
-            doc_mapper: doc_mapper.clone(),
-            indexing_directory: indexing_directory.clone(),
-            metastore: self.metastore.clone(),
-            split_store: split_store.clone(),
-            merge_scheduler_service: self.merge_scheduler_service.clone(),
-            merge_policy: merge_policy.clone(),
-            retention_policy: retention_policy.clone(),
-            merge_io_throughput_limiter_opt: self.merge_io_throughput_limiter_opt.clone(),
-            max_concurrent_split_uploads: self.max_concurrent_split_uploads,
-            event_broker: self.event_broker.clone(),
+        let merge_planner_mailbox_opt =
+            if let Some(merge_scheduler_service) = self.merge_scheduler_service_opt.clone() {
+                let merge_pipeline_id = indexing_pipeline_id.merge_pipeline_id();
+                let merge_pipeline_params = MergePipelineParams {
+                    pipeline_id: merge_pipeline_id,
+                    doc_mapper: doc_mapper.clone(),
+                    indexing_directory: indexing_directory.clone(),
+                    metastore: self.metastore.clone(),
+                    split_store: split_store.clone(),
+                    merge_scheduler_service,
+                    merge_policy: merge_policy.clone(),
+                    retention_policy: retention_policy.clone(),
+                    merge_io_throughput_limiter_opt: self.merge_io_throughput_limiter_opt.clone(),
+                    max_concurrent_split_uploads: self.max_concurrent_split_uploads,
+                    event_broker: self.event_broker.clone(),
+                };
+                Some(self.get_or_create_merge_pipeline(
+                    merge_pipeline_params,
+                    immature_splits_opt,
+                    ctx,
+                )?)
+            } else {
+                None
+            };
+
+        let max_concurrent_split_uploads_index = if self.merge_scheduler_service_opt.is_some() {
+            (self.max_concurrent_split_uploads / 2).max(1)
+        } else {
+            self.max_concurrent_split_uploads
         };
-        let merge_planner_mailbox =
-            self.get_or_create_merge_pipeline(merge_pipeline_params, immature_splits_opt, ctx)?;
-        // The concurrent uploads budget is split in 2: 1/2 for the indexing pipeline, 1/2 for the
-        // merge pipeline.
-        let max_concurrent_split_uploads_index = (self.max_concurrent_split_uploads / 2).max(1);
         let max_concurrent_split_uploads_merge =
-            (self.max_concurrent_split_uploads - max_concurrent_split_uploads_index).max(1);
+            self.max_concurrent_split_uploads - max_concurrent_split_uploads_index;
 
         let pipeline_params = IndexingPipelineParams {
             pipeline_id: indexing_pipeline_id.clone(),
@@ -405,7 +408,7 @@ impl IndexingService {
             merge_policy,
             retention_policy,
             max_concurrent_split_uploads_merge,
-            merge_planner_mailbox,
+            merge_planner_mailbox_opt,
             source_config,
             ingester_pool: self.ingester_pool.clone(),
             queues_dir_path: self.queue_dir_path.clone(),
@@ -472,6 +475,9 @@ impl IndexingService {
         indexing_pipeline_ids: &[IndexingPipelineId],
         ctx: &ActorContext<Self>,
     ) -> MetastoreResult<HashMap<MergePipelineId, Vec<SplitMetadata>>> {
+        if self.merge_scheduler_service_opt.is_none() {
+            return Ok(Default::default());
+        }
         let mut index_uids = Vec::new();
 
         for indexing_pipeline_id in indexing_pipeline_ids {
@@ -685,6 +691,10 @@ impl IndexingService {
     /// Returns the Parquet merge planner mailbox for the given index, creating
     /// a new ParquetMergePipeline if one isn't already running.
     ///
+    /// Returns `Ok(None)` when there is no `merge_scheduler_service_opt` on the
+    /// service — mirrors the log pipeline path, which does not spawn a merge
+    /// pipeline in that case.
+    ///
     /// Keyed by IndexUid (not MergePipelineId) because Parquet merge pipelines
     /// are shared across all sources for the same index — unlike Tantivy merge
     /// pipelines which are per-source.
@@ -697,9 +707,12 @@ impl IndexingService {
         indexing_directory: quickwit_common::temp_dir::TempDirectory,
         immature_splits_opt: Option<Vec<quickwit_parquet_engine::split::ParquetSplitMetadata>>,
         ctx: &ActorContext<Self>,
-    ) -> Result<Mailbox<super::parquet_pipeline::ParquetMergePlanner>, IndexingError> {
+    ) -> Result<Option<Mailbox<super::parquet_pipeline::ParquetMergePlanner>>, IndexingError> {
+        let Some(merge_scheduler_service) = self.merge_scheduler_service_opt.clone() else {
+            return Ok(None);
+        };
         if let Some(handle) = self.parquet_merge_pipeline_handles.get(&index_uid) {
-            return Ok(handle.mailbox.clone());
+            return Ok(Some(handle.mailbox.clone()));
         }
 
         // Convert the config-crate merge policy into the engine-crate type.
@@ -727,7 +740,7 @@ impl IndexingService {
             metastore: self.metastore.clone(),
             storage,
             merge_policy,
-            merge_scheduler_service: self.merge_scheduler_service.clone(),
+            merge_scheduler_service,
             max_concurrent_split_uploads: self.max_concurrent_split_uploads,
             event_broker: self.event_broker.clone(),
             writer_config,
@@ -748,7 +761,7 @@ impl IndexingService {
         };
         self.parquet_merge_pipeline_handles
             .insert(index_uid, handle);
-        Ok(merge_planner_mailbox)
+        Ok(Some(merge_planner_mailbox))
     }
 
     /// For all Ingest V2 pipelines, assigns the set of shards they should be working on.
@@ -1209,10 +1222,11 @@ mod tests {
             cluster,
             metastore,
             Some(ingest_api_service),
-            merge_scheduler_mailbox,
+            Some(merge_scheduler_mailbox),
             IngesterPool::default(),
             storage_resolver.clone(),
             EventBroker::default(),
+            Arc::new(IndexingSplitCache::no_caching()),
         )
         .await
         .unwrap();
@@ -1384,9 +1398,9 @@ mod tests {
         // change whenever IndexingSettings fields are added/removed. Recompute
         // by temporarily adding a test that prints
         // `indexing_pipeline_params_fingerprint(&index_config, &source_config)`.
-        const PARAMS_FINGERPRINT_INGEST_API: u64 = 7973087274884969148;
-        const PARAMS_FINGERPRINT_SOURCE_1: u64 = 9420938500552890840;
-        const PARAMS_FINGERPRINT_SOURCE_2: u64 = 16199199787360162635;
+        const PARAMS_FINGERPRINT_INGEST_API: u64 = 1637744865450232394;
+        const PARAMS_FINGERPRINT_SOURCE_1: u64 = 1705211905504908791;
+        const PARAMS_FINGERPRINT_SOURCE_2: u64 = 8706667372658059428;
 
         quickwit_common::setup_logging_for_tests();
         let transport = ChannelTransport::default();
@@ -1428,7 +1442,6 @@ mod tests {
             .unwrap()
             .deserialize_index_metadata()
             .unwrap();
-
         let source_config_1 = SourceConfig {
             source_id: "test-indexing-service--source-1".to_string(),
             num_pipelines: NonZeroUsize::MIN,
@@ -1726,10 +1739,11 @@ mod tests {
             cluster.clone(),
             metastore.clone(),
             Some(ingest_api_service),
-            merge_scheduler_service,
+            Some(merge_scheduler_service),
             IngesterPool::default(),
             storage_resolver.clone(),
             EventBroker::default(),
+            Arc::new(IndexingSplitCache::no_caching()),
         )
         .await
         .unwrap();
@@ -1770,6 +1784,167 @@ mod tests {
         universe.quit().await;
     }
 
+    #[tokio::test]
+    async fn test_indexing_service_no_merge_pipeline_when_no_merge_scheduler() {
+        quickwit_common::setup_logging_for_tests();
+        let transport = ChannelTransport::default();
+        let cluster = create_cluster_for_test(Vec::new(), &["indexer"], &transport, true)
+            .await
+            .unwrap();
+        let metastore = metastore_for_test();
+
+        let index_id = append_random_suffix("test-indexing-service-no-merge");
+        let index_uri = format!("ram:///indexes/{index_id}");
+        let index_config = IndexConfig::for_test(&index_id, &index_uri);
+
+        let source_config = SourceConfig {
+            source_id: "test-source".to_string(),
+            num_pipelines: NonZeroUsize::MIN,
+            enabled: true,
+            source_params: SourceParams::void(),
+            transform_config: None,
+            input_format: SourceInputFormat::Json,
+        };
+        let create_index_request =
+            CreateIndexRequest::try_from_index_config(&index_config).unwrap();
+        let index_uid: IndexUid = metastore
+            .create_index(create_index_request)
+            .await
+            .unwrap()
+            .index_uid()
+            .clone();
+        let add_source_request =
+            AddSourceRequest::try_from_source_config(index_uid.clone(), &source_config).unwrap();
+        metastore.add_source(add_source_request).await.unwrap();
+
+        let temp_dir = tempfile::tempdir().unwrap();
+        let data_dir_path = temp_dir.path().to_path_buf();
+        let indexer_config = IndexerConfig::for_test().unwrap();
+        let num_blocking_threads = 1;
+        let storage_resolver = StorageResolver::unconfigured();
+        let universe = Universe::with_accelerated_time();
+        let queues_dir_path = data_dir_path.join(QUEUES_DIR_NAME);
+        let ingest_api_service =
+            init_ingest_api(&universe, &queues_dir_path, &IngestApiConfig::default())
+                .await
+                .unwrap();
+        let indexing_server = IndexingService::new(
+            NodeId::from("test-node"),
+            data_dir_path,
+            indexer_config,
+            num_blocking_threads,
+            cluster.clone(),
+            metastore.clone(),
+            Some(ingest_api_service),
+            None, // No merge scheduler — external merge service handles compaction.
+            IngesterPool::default(),
+            storage_resolver.clone(),
+            EventBroker::default(),
+            Arc::new(IndexingSplitCache::no_caching()),
+        )
+        .await
+        .unwrap();
+        let (indexing_server_mailbox, indexing_server_handle) =
+            universe.spawn_builder().spawn(indexing_server);
+
+        indexing_server_mailbox
+            .ask_for_res(SpawnPipeline {
+                index_id: index_id.clone(),
+                source_config,
+                pipeline_uid: PipelineUid::default(),
+            })
+            .await
+            .unwrap();
+
+        let observation = indexing_server_handle.observe().await;
+        assert_eq!(observation.num_running_pipelines, 1);
+        assert_eq!(observation.num_running_merge_pipelines, 0);
+        assert!(universe.get_one::<MergePipeline>().is_none());
+
+        universe.quit().await;
+    }
+
+    #[tokio::test]
+    async fn test_indexing_service_spawns_merge_pipeline_with_merge_scheduler() {
+        quickwit_common::setup_logging_for_tests();
+        let transport = ChannelTransport::default();
+        let cluster = create_cluster_for_test(Vec::new(), &["indexer"], &transport, true)
+            .await
+            .unwrap();
+        let metastore = metastore_for_test();
+
+        let index_id = append_random_suffix("test-indexing-service-with-merge");
+        let index_uri = format!("ram:///indexes/{index_id}");
+        let index_config = IndexConfig::for_test(&index_id, &index_uri);
+
+        let source_config = SourceConfig {
+            source_id: "test-source".to_string(),
+            num_pipelines: NonZeroUsize::MIN,
+            enabled: true,
+            source_params: SourceParams::void(),
+            transform_config: None,
+            input_format: SourceInputFormat::Json,
+        };
+        let create_index_request =
+            CreateIndexRequest::try_from_index_config(&index_config).unwrap();
+        let index_uid: IndexUid = metastore
+            .create_index(create_index_request)
+            .await
+            .unwrap()
+            .index_uid()
+            .clone();
+        let add_source_request =
+            AddSourceRequest::try_from_source_config(index_uid.clone(), &source_config).unwrap();
+        metastore.add_source(add_source_request).await.unwrap();
+
+        let temp_dir = tempfile::tempdir().unwrap();
+        let data_dir_path = temp_dir.path().to_path_buf();
+        let indexer_config = IndexerConfig::for_test().unwrap();
+        let num_blocking_threads = 1;
+        let storage_resolver = StorageResolver::unconfigured();
+        let universe = Universe::with_accelerated_time();
+        let queues_dir_path = data_dir_path.join(QUEUES_DIR_NAME);
+        let ingest_api_service =
+            init_ingest_api(&universe, &queues_dir_path, &IngestApiConfig::default())
+                .await
+                .unwrap();
+        let merge_scheduler_mailbox: Mailbox<MergeSchedulerService> = universe.get_or_spawn_one();
+        let indexing_server = IndexingService::new(
+            NodeId::from("test-node"),
+            data_dir_path,
+            indexer_config,
+            num_blocking_threads,
+            cluster.clone(),
+            metastore.clone(),
+            Some(ingest_api_service),
+            Some(merge_scheduler_mailbox),
+            IngesterPool::default(),
+            storage_resolver.clone(),
+            EventBroker::default(),
+            Arc::new(IndexingSplitCache::no_caching()),
+        )
+        .await
+        .unwrap();
+        let (indexing_server_mailbox, indexing_server_handle) =
+            universe.spawn_builder().spawn(indexing_server);
+
+        indexing_server_mailbox
+            .ask_for_res(SpawnPipeline {
+                index_id: index_id.clone(),
+                source_config,
+                pipeline_uid: PipelineUid::default(),
+            })
+            .await
+            .unwrap();
+
+        let observation = indexing_server_handle.observe().await;
+        assert_eq!(observation.num_running_pipelines, 1);
+        assert_eq!(observation.num_running_merge_pipelines, 1);
+        assert!(universe.get_one::<MergePipeline>().is_some());
+
+        universe.quit().await;
+    }
+
     #[derive(Debug)]
     struct FreezePipeline;
     #[async_trait]
@@ -1927,10 +2102,11 @@ mod tests {
             cluster.clone(),
             metastore.clone(),
             Some(ingest_api_service.clone()),
-            merge_scheduler_service,
+            Some(merge_scheduler_service),
             IngesterPool::default(),
             storage_resolver.clone(),
             EventBroker::default(),
+            Arc::new(IndexingSplitCache::no_caching()),
         )
         .await
         .unwrap();
@@ -1995,6 +2171,7 @@ mod tests {
                 let response = IndexesMetadataResponse::for_test(indexes_metadata, failures);
                 Ok(response)
             });
+
         mock_metastore
             .expect_list_splits()
             .withf(|request| {
diff --git a/quickwit/quickwit-indexing/src/actors/log_publisher_impl.rs b/quickwit/quickwit-indexing/src/actors/log_publisher_impl.rs
index 02a4bca1661..e3924c0d8a6 100644
--- a/quickwit/quickwit-indexing/src/actors/log_publisher_impl.rs
+++ b/quickwit/quickwit-indexing/src/actors/log_publisher_impl.rs
@@ -28,7 +28,7 @@ use crate::actors::publisher::{
 use crate::models::{NewSplits, SplitsUpdate};
 
 pub(crate) const PUBLISHER_NAME: &str = "Publisher";
-pub(crate) const MERGE_PUBLISHER_NAME: &str = "MergePublisher";
+pub const MERGE_PUBLISHER_NAME: &str = "MergePublisher";
 
 #[async_trait]
 impl Handler<DisconnectMergePlanner> for Publisher {
diff --git a/quickwit/quickwit-indexing/src/actors/merge_executor.rs b/quickwit/quickwit-indexing/src/actors/merge_executor.rs
index 39305ca1935..d51796e9774 100644
--- a/quickwit/quickwit-indexing/src/actors/merge_executor.rs
+++ b/quickwit/quickwit-indexing/src/actors/merge_executor.rs
@@ -42,6 +42,7 @@ use tantivy::index::SegmentId;
 use tantivy::tokenizer::TokenizerManager;
 use tantivy::{DateTime, Directory, Index, IndexMeta, IndexWriter, SegmentReader};
 use tokio::runtime::Handle;
+use tokio::sync::Semaphore;
 use tracing::{debug, error, info, instrument, warn};
 
 use crate::actors::Packager;
@@ -56,6 +57,9 @@ pub struct MergeExecutor {
     doc_mapper: Arc<DocMapper>,
     io_controls: IoControls,
     merge_packager_mailbox: Mailbox<Packager>,
+    /// Bounds concurrent Tantivy merges across pipelines on this node.
+    /// `None` in the legacy indexing-side merge pipeline.
+    merge_execution_semaphore: Option<Arc<Semaphore>>,
 }
 
 #[async_trait]
@@ -81,22 +85,38 @@ impl Actor for MergeExecutor {
 impl Handler<MergeScratch> for MergeExecutor {
     type Reply = ();
 
-    #[instrument(level = "info", name = "merge_executor", parent = merge_scratch.merge_task.merge_parent_span.id(), skip_all)]
+    #[instrument(level = "info", name = "merge_executor", parent = merge_scratch.merge_operation.merge_parent_span.id(), skip_all)]
     async fn handle(
         &mut self,
         merge_scratch: MergeScratch,
         ctx: &ActorContext<Self>,
     ) -> Result<(), ActorExitStatus> {
         let start = Instant::now();
-        let merge_task = merge_scratch.merge_task;
-        let indexed_split_opt: Option<IndexedSplit> = match merge_task.operation_type {
+        let MergeScratch {
+            merge_operation,
+            merge_task,
+            tantivy_dirs,
+            merge_scratch_directory,
+            ..
+        } = merge_scratch;
+        // On nodes running the split compaction architecture, merge pipelines are ephemeral, and we
+        // need to make sure there aren't too many CPU-bound operations occurring concurrently.
+        let _cpu_permit = match &self.merge_execution_semaphore {
+            Some(semaphore) => Some(
+                ctx.protect_future(semaphore.clone().acquire_owned())
+                    .await
+                    .expect("merge execution semaphore should never be closed"),
+            ),
+            None => None,
+        };
+        let indexed_split_opt: Option<IndexedSplit> = match merge_operation.operation_type {
             MergeOperationType::Merge => {
                 let merge_res = self
                     .process_merge(
-                        merge_task.merge_split_id.clone(),
-                        merge_task.splits.clone(),
-                        merge_scratch.tantivy_dirs,
-                        merge_scratch.merge_scratch_directory,
+                        merge_operation.merge_split_id.clone(),
+                        merge_operation.splits.clone(),
+                        tantivy_dirs,
+                        merge_scratch_directory,
                         ctx,
                     )
                     .await;
@@ -114,24 +134,24 @@ impl Handler<MergeScratch> for MergeExecutor {
                         // With a merge policy that marks splits as mature after a day or so, this
                         // limits the noise associated to those failed
                         // merges.
-                        error!(task=?merge_task, err=?err, "failed to merge splits");
+                        error!(task=?merge_operation, err=?err, "failed to merge splits");
                         return Ok(());
                     }
                 }
             }
             MergeOperationType::DeleteAndMerge => {
                 assert_eq!(
-                    merge_task.splits.len(),
+                    merge_operation.splits.len(),
                     1,
                     "Delete tasks can be applied only on one split."
                 );
-                assert_eq!(merge_scratch.tantivy_dirs.len(), 1);
-                let split_with_docs_to_delete = merge_task.splits[0].clone();
+                assert_eq!(tantivy_dirs.len(), 1);
+                let split_with_docs_to_delete = merge_operation.splits[0].clone();
                 self.process_delete_and_merge(
-                    merge_task.merge_split_id.clone(),
+                    merge_operation.merge_split_id.clone(),
                     split_with_docs_to_delete,
-                    merge_scratch.tantivy_dirs,
-                    merge_scratch.merge_scratch_directory,
+                    tantivy_dirs,
+                    merge_scratch_directory,
                     ctx,
                 )
                 .await?
@@ -141,7 +161,7 @@ impl Handler<MergeScratch> for MergeExecutor {
             info!(
                 merged_num_docs = %indexed_split.split_attrs.num_docs,
                 elapsed_secs = %start.elapsed().as_secs_f32(),
-                operation_type = %merge_task.operation_type,
+                operation_type = %merge_operation.operation_type,
                 "merge-operation-success"
             );
             ctx.send_message(
@@ -151,8 +171,8 @@ impl Handler<MergeScratch> for MergeExecutor {
                     checkpoint_delta_opt: Default::default(),
                     publish_lock: PublishLock::default(),
                     publish_token_opt: None,
-                    batch_parent_span: merge_task.merge_parent_span.clone(),
-                    merge_task_opt: Some(merge_task),
+                    batch_parent_span: merge_operation.merge_parent_span.clone(),
+                    merge_task_opt: merge_task,
                 },
             )
             .await?;
@@ -305,6 +325,7 @@ impl MergeExecutor {
         doc_mapper: Arc<DocMapper>,
         io_controls: IoControls,
         merge_packager_mailbox: Mailbox<Packager>,
+        merge_execution_semaphore: Option<Arc<Semaphore>>,
     ) -> Self {
         MergeExecutor {
             pipeline_id,
@@ -312,6 +333,7 @@ impl MergeExecutor {
             doc_mapper,
             io_controls,
             merge_packager_mailbox,
+            merge_execution_semaphore,
         }
     }
 
@@ -642,9 +664,10 @@ mod tests {
             tantivy_dirs.push(get_tantivy_directory_from_split_bundle(&dest_filepath).unwrap())
         }
         let merge_operation = MergeOperation::new_merge_operation(split_metas);
-        let merge_task = MergeTask::from_merge_operation_for_test(merge_operation);
+        let merge_task = MergeTask::from_merge_operation_for_test(merge_operation.clone());
         let merge_scratch = MergeScratch {
-            merge_task,
+            merge_operation,
+            merge_task: Some(merge_task),
             tantivy_dirs,
             merge_scratch_directory,
             downloaded_splits_directory,
@@ -662,6 +685,7 @@ mod tests {
             test_sandbox.doc_mapper(),
             IoControls::default(),
             merge_packager_mailbox,
+            None,
         );
         let (merge_executor_mailbox, merge_executor_handle) = test_sandbox
             .universe()
@@ -786,9 +810,10 @@ mod tests {
             .await?;
         let tantivy_dir = get_tantivy_directory_from_split_bundle(&dest_filepath).unwrap();
         let merge_operation = MergeOperation::new_delete_and_merge_operation(new_split_metadata);
-        let merge_task = MergeTask::from_merge_operation_for_test(merge_operation);
+        let merge_task = MergeTask::from_merge_operation_for_test(merge_operation.clone());
         let merge_scratch = MergeScratch {
-            merge_task,
+            merge_operation,
+            merge_task: Some(merge_task),
             tantivy_dirs: vec![tantivy_dir],
             merge_scratch_directory,
             downloaded_splits_directory,
@@ -806,6 +831,7 @@ mod tests {
             test_sandbox.doc_mapper(),
             IoControls::default(),
             merge_packager_mailbox,
+            None,
         );
         let (delete_task_executor_mailbox, delete_task_executor_handle) =
             universe.spawn_builder().spawn(delete_task_executor);
diff --git a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs
index 656200717c2..0788474bb21 100644
--- a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs
+++ b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs
@@ -316,6 +316,7 @@ impl MergePipeline {
             self.params.doc_mapper.clone(),
             merge_executor_io_controls,
             merge_packager_mailbox,
+            None,
         );
         let (merge_executor_mailbox, merge_executor_handle) = ctx
             .spawn_actor()
diff --git a/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs b/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs
index 1e79ed58ea3..dfe832f0c61 100644
--- a/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs
+++ b/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs
@@ -29,7 +29,7 @@ use tracing::error;
 use super::MergeSplitDownloader;
 #[cfg(feature = "metrics")]
 use super::parquet_pipeline::{ParquetMergeSplitDownloader, ParquetMergeTask};
-use crate::merge_policy::{MergeOperation, MergeTask};
+use crate::merge_policy::{MergeOperation, MergeTask, compute_merge_score};
 use crate::metrics::{ONGOING_MERGE_OPERATIONS, PENDING_MERGE_BYTES, PENDING_MERGE_OPERATIONS};
 
 pub struct MergePermit {
@@ -309,36 +309,17 @@ struct ScheduleMerge {
     split_downloader_mailbox: Mailbox<MergeSplitDownloader>,
 }
 
-/// Scores a merge operation for priority scheduling.
-///
-/// Higher score = scheduled sooner. Prefers merges that strongly reduce
-/// split count relative to their total byte cost. Used by both Tantivy
-/// and Parquet merge scheduling.
-fn score_merge(num_splits: usize, total_num_bytes: u64) -> u64 {
-    if total_num_bytes == 0 {
-        return u64::MAX;
-    }
-    // We will remove num_splits and add 1 merged split.
-    let delta_num_splits = (num_splits - 1) as u64;
-    // Integer arithmetic to avoid `f64 are not ordered` silliness.
-    (delta_num_splits << 48)
-        .checked_div(total_num_bytes)
-        .unwrap_or(1u64)
-}
-
-fn score_merge_operation(merge_operation: &MergeOperation) -> u64 {
-    score_merge(
-        merge_operation.splits.len(),
-        merge_operation.total_num_bytes(),
-    )
-}
-
 impl ScheduleMerge {
     pub fn new(
         merge_operation: TrackedObject<MergeOperation>,
         split_downloader_mailbox: Mailbox<MergeSplitDownloader>,
     ) -> ScheduleMerge {
-        let score = score_merge_operation(&merge_operation);
+        let total_num_bytes: u64 = merge_operation
+            .splits
+            .iter()
+            .map(|split| split.footer_offsets.end)
+            .sum();
+        let score = compute_merge_score(merge_operation.splits.len(), total_num_bytes);
         ScheduleMerge {
             score,
             merge_operation,
@@ -399,7 +380,7 @@ impl Handler<PermitReleased> for MergeSchedulerService {
 
 #[cfg(feature = "metrics")]
 fn score_parquet_merge_operation(merge_operation: &ParquetMergeOperation) -> u64 {
-    score_merge(
+    compute_merge_score(
         merge_operation.splits.len(),
         merge_operation.total_size_bytes(),
     )
@@ -483,24 +464,6 @@ mod tests {
         MergeOperation::new_merge_operation(splits)
     }
 
-    #[test]
-    fn test_score_merge_operation() {
-        let score_merge_operation_aux = |num_splits, num_bytes_per_split| {
-            let merge_operation = build_merge_operation(num_splits, num_bytes_per_split);
-            score_merge_operation(&merge_operation)
-        };
-        assert!(score_merge_operation_aux(10, 10_000_000) < score_merge_operation_aux(10, 999_999));
-        assert!(
-            score_merge_operation_aux(10, 10_000_000) > score_merge_operation_aux(9, 10_000_000)
-        );
-        assert_eq!(
-            // 9 - 1 = 8 splits removed.
-            score_merge_operation_aux(9, 10_000_000),
-            // 5 - 1  = 4 splits removed.
-            score_merge_operation_aux(5, 10_000_000 * 9 / 10)
-        );
-    }
-
     #[tokio::test]
     async fn test_merge_schedule_service_prioritize() {
         let universe = Universe::new();
diff --git a/quickwit/quickwit-indexing/src/actors/merge_split_downloader.rs b/quickwit/quickwit-indexing/src/actors/merge_split_downloader.rs
index 5d68bb59285..1911ecc0363 100644
--- a/quickwit/quickwit-indexing/src/actors/merge_split_downloader.rs
+++ b/quickwit/quickwit-indexing/src/actors/merge_split_downloader.rs
@@ -23,7 +23,7 @@ use tantivy::Directory;
 use tracing::{debug, info, instrument};
 
 use super::MergeExecutor;
-use crate::merge_policy::MergeTask;
+use crate::merge_policy::{MergeOperation, MergeTask};
 use crate::models::MergeScratch;
 use crate::split_store::IndexingSplitStore;
 
@@ -62,6 +62,35 @@ impl Handler<MergeTask> for MergeSplitDownloader {
         merge_task: MergeTask,
         ctx: &ActorContext<Self>,
     ) -> Result<(), quickwit_actors::ActorExitStatus> {
+        let merge_operation = merge_task.merge_operation.as_ref().clone();
+        self.download_and_send(merge_operation, ctx).await
+    }
+}
+
+#[async_trait]
+impl Handler<MergeOperation> for MergeSplitDownloader {
+    type Reply = ();
+
+    #[instrument(
+        name = "merge_split_downloader",
+        parent = merge_operation.merge_parent_span.id(),
+        skip_all,
+    )]
+    async fn handle(
+        &mut self,
+        merge_operation: MergeOperation,
+        ctx: &ActorContext<Self>,
+    ) -> Result<(), quickwit_actors::ActorExitStatus> {
+        self.download_and_send(merge_operation, ctx).await
+    }
+}
+
+impl MergeSplitDownloader {
+    async fn download_and_send(
+        &mut self,
+        merge_operation: MergeOperation,
+        ctx: &ActorContext<Self>,
+    ) -> Result<(), ActorExitStatus> {
         let merge_scratch_directory = temp_dir::Builder::default()
             .join("merge")
             .tempdir_in(self.scratch_directory.path())
@@ -73,13 +102,14 @@ impl Handler<MergeTask> for MergeSplitDownloader {
             .map_err(|error| anyhow::anyhow!(error))?;
         let tantivy_dirs = self
             .download_splits(
-                merge_task.splits_as_slice(),
+                merge_operation.splits_as_slice(),
                 downloaded_splits_directory.path(),
                 ctx,
             )
             .await?;
         let msg = MergeScratch {
-            merge_task,
+            merge_operation,
+            merge_task: None,
             merge_scratch_directory,
             downloaded_splits_directory,
             tantivy_dirs,
@@ -96,32 +126,30 @@ impl MergeSplitDownloader {
         download_directory: &Path,
         ctx: &ActorContext<Self>,
     ) -> Result<Vec<Box<dyn Directory>>, quickwit_actors::ActorExitStatus> {
-        // we download all of the split files in the scratch directory.
-        let mut tantivy_dirs = Vec::new();
-        for split in splits {
-            if ctx.kill_switch().is_dead() {
-                debug!(
-                    split_id = split.split_id(),
-                    "Kill switch was activated. Cancelling download."
-                );
-                return Err(ActorExitStatus::Killed);
-            }
-            let io_controls = self
-                .io_controls
-                .clone()
-                .set_progress(ctx.progress().clone())
-                .set_kill_switch(ctx.kill_switch().clone());
-            let _protect_guard = ctx.protect_zone();
-            let tantivy_dir = self
-                .split_store
-                .fetch_and_open_split(split.split_id(), download_directory, &io_controls)
-                .await
-                .map_err(|error| {
-                    let split_id = split.split_id();
-                    anyhow::anyhow!(error).context(format!("failed to download split `{split_id}`"))
-                })?;
-            tantivy_dirs.push(tantivy_dir);
+        if ctx.kill_switch().is_dead() {
+            debug!("kill switch was activated, cancelling download");
+            return Err(ActorExitStatus::Killed);
         }
+        let io_controls = self
+            .io_controls
+            .clone()
+            .set_progress(ctx.progress().clone())
+            .set_kill_switch(ctx.kill_switch().clone());
+        let _protect_guard = ctx.protect_zone();
+        let download_futures = splits.iter().map(|split| {
+            let io_controls = io_controls.clone();
+            async move {
+                self.split_store
+                    .fetch_and_open_split(split.split_id(), download_directory, &io_controls)
+                    .await
+                    .map_err(|error| {
+                        let split_id = split.split_id();
+                        anyhow::anyhow!(error)
+                            .context(format!("failed to download split `{split_id}`"))
+                    })
+            }
+        });
+        let tantivy_dirs = futures::future::try_join_all(download_futures).await?;
         Ok(tantivy_dirs)
     }
 }
@@ -190,8 +218,8 @@ mod tests {
             .unwrap()
             .downcast::<MergeScratch>()
             .unwrap();
-        assert_eq!(merge_scratch.merge_task.splits_as_slice().len(), 10);
-        for split in merge_scratch.merge_task.splits_as_slice() {
+        assert_eq!(merge_scratch.merge_operation.splits_as_slice().len(), 10);
+        for split in merge_scratch.merge_operation.splits_as_slice() {
             let split_filename = split_file(split.split_id());
             let split_filepath = merge_scratch
                 .downloaded_splits_directory
diff --git a/quickwit/quickwit-indexing/src/actors/mod.rs b/quickwit/quickwit-indexing/src/actors/mod.rs
index f7a69265662..fdab5df4bcc 100644
--- a/quickwit/quickwit-indexing/src/actors/mod.rs
+++ b/quickwit/quickwit-indexing/src/actors/mod.rs
@@ -41,7 +41,8 @@ pub use indexing_pipeline::{IndexingPipeline, IndexingPipelineParams};
 pub use indexing_service::{
     BoxedPipelineHandle, INDEXING_DIR_NAME, IndexingService, IndexingServiceCounters,
 };
-pub(crate) use log_publisher_impl::{MERGE_PUBLISHER_NAME, PUBLISHER_NAME};
+pub use log_publisher_impl::MERGE_PUBLISHER_NAME;
+pub(crate) use log_publisher_impl::PUBLISHER_NAME;
 pub use merge_executor::{MergeExecutor, combine_partition_ids, merge_split_attrs};
 pub use merge_pipeline::{
     FinishPendingMergesAndShutdownPipeline, MergePipeline, MergePipelineParams,
diff --git a/quickwit/quickwit-indexing/src/actors/parquet_pipeline/indexing_service_impl.rs b/quickwit/quickwit-indexing/src/actors/parquet_pipeline/indexing_service_impl.rs
index e39a7e4989f..9d5b82b9a44 100644
--- a/quickwit/quickwit-indexing/src/actors/parquet_pipeline/indexing_service_impl.rs
+++ b/quickwit/quickwit-indexing/src/actors/parquet_pipeline/indexing_service_impl.rs
@@ -70,7 +70,9 @@ impl IndexingService {
         // Spawn the Parquet merge pipeline (or reuse an existing one for this
         // index). The planner mailbox is wired into the MetricsPipeline's
         // Publisher so newly ingested splits are fed back for merging.
-        let merge_planner_mailbox = self.get_or_create_parquet_merge_pipeline(
+        // Returns `None` when there is no local merge scheduler — the metrics
+        // pipeline then runs without local merging (mirrors the log path).
+        let merge_planner_mailbox_opt = self.get_or_create_parquet_merge_pipeline(
             indexing_pipeline_id.index_uid.clone(),
             &index_config,
             storage.clone(),
@@ -97,7 +99,7 @@ impl IndexingService {
             use_sketch_processors,
             partition_key,
             max_num_partitions: index_config.doc_mapping.max_num_partitions,
-            parquet_merge_planner_mailbox_opt: Some(merge_planner_mailbox),
+            parquet_merge_planner_mailbox_opt: merge_planner_mailbox_opt,
         };
         let pipeline = MetricsPipeline::new(pipeline_params);
         let (mailbox, handle) = ctx.spawn_actor().spawn(pipeline);
diff --git a/quickwit/quickwit-indexing/src/lib.rs b/quickwit/quickwit-indexing/src/lib.rs
index f0e79499b8a..1888bfaffa7 100644
--- a/quickwit/quickwit-indexing/src/lib.rs
+++ b/quickwit/quickwit-indexing/src/lib.rs
@@ -14,6 +14,8 @@
 
 #![deny(clippy::disallowed_methods)]
 
+use std::sync::Arc;
+
 use quickwit_actors::{Mailbox, Universe};
 use quickwit_cluster::Cluster;
 use quickwit_common::pubsub::EventBroker;
@@ -27,11 +29,14 @@ use tracing::info;
 use crate::actors::MergeSchedulerService;
 pub use crate::actors::{
     BoxedPipelineHandle, FinishPendingMergesAndShutdownPipeline, IndexingError, IndexingPipeline,
-    IndexingPipelineParams, IndexingService, Sequencer, SplitsUpdateMailbox,
+    IndexingPipelineParams, IndexingService, MERGE_PUBLISHER_NAME, Sequencer, SplitsUpdateMailbox,
 };
 pub use crate::controlled_directory::ControlledDirectory;
 use crate::models::IndexingStatistics;
-pub use crate::split_store::{IndexingSplitStore, get_tantivy_directory_from_split_bundle};
+pub use crate::split_store::{
+    IndexingSplitCache, IndexingSplitStore, SplitStoreQuota,
+    get_tantivy_directory_from_split_bundle,
+};
 
 pub mod actors;
 mod controlled_directory;
@@ -69,13 +74,11 @@ pub async fn start_indexing_service(
     ingester_pool: IngesterPool,
     storage_resolver: StorageResolver,
     event_broker: EventBroker,
+    merge_scheduler_mailbox_opt: Option<Mailbox<MergeSchedulerService>>,
+    indexing_split_cache: Arc<IndexingSplitCache>,
 ) -> anyhow::Result<Mailbox<IndexingService>> {
     info!("starting indexer service");
     let ingest_api_service_mailbox = universe.get_one::<IngestApiService>();
-    let (merge_scheduler_mailbox, _) = universe.spawn_builder().spawn(MergeSchedulerService::new(
-        config.indexer_config.merge_concurrency.get(),
-    ));
-    // Spawn indexing service.
     let indexing_service = IndexingService::new(
         config.node_id.clone(),
         config.data_dir_path.to_path_buf(),
@@ -84,10 +87,11 @@ pub async fn start_indexing_service(
         cluster,
         metastore.clone(),
         ingest_api_service_mailbox,
-        merge_scheduler_mailbox,
+        merge_scheduler_mailbox_opt,
         ingester_pool,
         storage_resolver,
         event_broker,
+        indexing_split_cache,
     )
     .await?;
     let (indexing_service, _) = universe.spawn_builder().spawn(indexing_service);
diff --git a/quickwit/quickwit-indexing/src/merge_policy/mod.rs b/quickwit/quickwit-indexing/src/merge_policy/mod.rs
index e94e70d3370..408dc7cd798 100644
--- a/quickwit/quickwit-indexing/src/merge_policy/mod.rs
+++ b/quickwit/quickwit-indexing/src/merge_policy/mod.rs
@@ -121,6 +121,31 @@ impl MergeOperation {
     pub fn splits_as_slice(&self) -> &[SplitMetadata] {
         self.splits.as_slice()
     }
+
+    pub fn merge_level(&self) -> usize {
+        self.splits
+            .iter()
+            .map(|s| s.num_merge_ops)
+            .max()
+            .unwrap_or(0)
+    }
+}
+
+// The higher, the sooner we will execute the merge operation.
+// A good merge operation:
+// - strongly reduces the number of splits
+// - is light.
+pub fn compute_merge_score(num_splits: usize, total_num_bytes: u64) -> u64 {
+    if total_num_bytes == 0 {
+        // Silly corner case that should never happen.
+        return u64::MAX;
+    }
+    // We will remove num_splits and add 1 merge split.
+    let delta_num_splits = num_splits.saturating_sub(1) as u64;
+    // Integer arithmetic to avoid `f64 are not ordered` silliness.
+    (delta_num_splits << 48)
+        .checked_div(total_num_bytes)
+        .unwrap_or(1u64)
 }
 
 impl fmt::Debug for MergeOperation {
@@ -254,6 +279,21 @@ pub mod tests {
     };
     use crate::models::{NewSplits, create_split_metadata};
 
+    #[test]
+    fn test_score() {
+        // Lighter merge at the same split count scores higher.
+        assert!(compute_merge_score(10, 100_000_000) < compute_merge_score(10, 9_999_990));
+        // More splits removed at the same total bytes scores higher.
+        assert!(compute_merge_score(10, 100_000_000) > compute_merge_score(9, 100_000_000));
+        // Equal `(delta_splits / total_bytes)` ratios yield equal scores.
+        assert_eq!(
+            // delta=8, 90M bytes.
+            compute_merge_score(9, 90_000_000),
+            // delta=4, 45M bytes (same 8/90M ratio).
+            compute_merge_score(5, 45_000_000),
+        );
+    }
+
     fn pow_of_10(n: usize) -> usize {
         10usize.pow(n as u32)
     }
diff --git a/quickwit/quickwit-indexing/src/models/merge_scratch.rs b/quickwit/quickwit-indexing/src/models/merge_scratch.rs
index 392ca60b42c..a0296f69d2d 100644
--- a/quickwit/quickwit-indexing/src/models/merge_scratch.rs
+++ b/quickwit/quickwit-indexing/src/models/merge_scratch.rs
@@ -15,14 +15,15 @@
 use quickwit_common::temp_dir::TempDirectory;
 use tantivy::Directory;
 
-use crate::merge_policy::MergeTask;
+use crate::merge_policy::{MergeOperation, MergeTask};
 
 #[derive(Debug)]
 pub struct MergeScratch {
-    /// A [`MergeTask`] tracked by either the `MergePlanner` or the `DeleteTaskPlanner`
-    /// See planners docs to understand the usage.
-    pub merge_task: MergeTask,
-    /// Scratch directory for computing the merge.
+    /// The merge operation data (splits, merge_split_id, operation type).
+    pub merge_operation: MergeOperation,
+    // TODO: remove once the old MergePipeline is deleted and the
+    // DeleteTaskPipeline no longer routes through MergeSchedulerService.
+    pub merge_task: Option<MergeTask>,
     pub merge_scratch_directory: TempDirectory,
     pub downloaded_splits_directory: TempDirectory,
     pub tantivy_dirs: Vec<Box<dyn Directory>>,
diff --git a/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs b/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs
index 70f19410ecf..47676669487 100644
--- a/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs
+++ b/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs
@@ -21,11 +21,13 @@ use std::time::{Duration, SystemTime};
 
 use anyhow::Context;
 use bytesize::ByteSize;
+use quickwit_common::fs::get_cache_directory_path;
 use quickwit_common::split_file;
+use quickwit_config::IndexerConfig;
 use quickwit_directories::BundleDirectory;
 use quickwit_storage::StorageResult;
 use tantivy::Directory;
-use tantivy::directory::MmapDirectory;
+use tantivy::directory::{Advice, MmapDirectory};
 use tokio::sync::Mutex;
 use tracing::{debug, error, warn};
 use ulid::Ulid;
@@ -38,12 +40,15 @@ const SPLIT_MAX_AGE: Duration = Duration::from_secs(2 * 24 * 3_600); // 2 days
 pub fn get_tantivy_directory_from_split_bundle(
     split_file: &Path,
 ) -> StorageResult<Box<dyn Directory>> {
-    let mmap_directory = MmapDirectory::open(split_file.parent().ok_or_else(|| {
-        io::Error::new(
-            io::ErrorKind::NotFound,
-            format!("couldn't find parent for {}", split_file.display()),
-        )
-    })?)?;
+    let mmap_directory = MmapDirectory::open_with_madvice(
+        split_file.parent().ok_or_else(|| {
+            io::Error::new(
+                io::ErrorKind::NotFound,
+                format!("couldn't find parent for {}", split_file.display()),
+            )
+        })?,
+        Advice::Sequential,
+    )?;
     let split_fileslice = mmap_directory.open_read(Path::new(&split_file))?;
     Ok(Box::new(BundleDirectory::open_split(split_fileslice)?))
 }
@@ -361,6 +366,22 @@ impl IndexingSplitCache {
         IndexingSplitCache { inner }
     }
 
+    /// Builds an [`IndexingSplitCache`] from an [`IndexerConfig`], rooted at
+    /// `<data_dir>/indexer-split-cache/splits`.
+    pub async fn from_config(
+        indexer_config: &IndexerConfig,
+        data_dir_path: &Path,
+    ) -> anyhow::Result<IndexingSplitCache> {
+        let cache_path = get_cache_directory_path(data_dir_path);
+        let quota = SplitStoreQuota::try_new(
+            indexer_config.split_store_max_num_splits,
+            indexer_config.split_store_max_num_bytes,
+        )?;
+        IndexingSplitCache::open(cache_path, quota)
+            .await
+            .context("failed to open indexing split cache")
+    }
+
     /// Try to open an existing local split store directory.
     ///
     /// If the directory does not exists, it will be created.
@@ -508,6 +529,7 @@ mod tests {
     use std::time::Duration;
 
     use bytesize::ByteSize;
+    use quickwit_config::IndexerConfig;
     use quickwit_directories::BundleDirectory;
     use quickwit_storage::{PutPayload, SplitPayloadBuilder};
     use tantivy::Directory;
@@ -530,6 +552,20 @@ mod tests {
         Ok(())
     }
 
+    #[tokio::test]
+    async fn test_from_config() {
+        let data_dir = tempdir().unwrap();
+        let config = IndexerConfig::for_test().unwrap();
+        let _cache = IndexingSplitCache::from_config(&config, data_dir.path())
+            .await
+            .unwrap();
+        let cache_dir = data_dir.path().join("indexer-split-cache").join("splits");
+        assert!(
+            cache_dir.is_dir(),
+            "from_config must open (and create) the cache directory",
+        );
+    }
+
     #[tokio::test]
     async fn test_local_split_store_load_existing_splits() -> anyhow::Result<()> {
         let temp_dir = tempfile::tempdir()?;
diff --git a/quickwit/quickwit-indexing/src/test_utils.rs b/quickwit/quickwit-indexing/src/test_utils.rs
index 82198c820f4..e6c824ce788 100644
--- a/quickwit/quickwit-indexing/src/test_utils.rs
+++ b/quickwit/quickwit-indexing/src/test_utils.rs
@@ -40,6 +40,7 @@ use serde_json::Value as JsonValue;
 
 use crate::actors::IndexingService;
 use crate::models::{DetachIndexingPipeline, IndexingStatistics, SpawnPipeline};
+use crate::split_store::IndexingSplitCache;
 
 /// Creates a Test environment.
 ///
@@ -123,10 +124,11 @@ impl TestSandbox {
             cluster,
             metastore.clone(),
             Some(ingest_api_service),
-            merge_scheduler_mailbox,
+            Some(merge_scheduler_mailbox),
             IngesterPool::default(),
             storage_resolver.clone(),
             EventBroker::default(),
+            Arc::new(IndexingSplitCache::no_caching()),
         )
         .await?;
         let (indexing_service, _indexing_service_handle) =
diff --git a/quickwit/quickwit-integration-tests/src/test_utils/cluster_sandbox.rs b/quickwit/quickwit-integration-tests/src/test_utils/cluster_sandbox.rs
index a7385ca0946..9c2fe3effde 100644
--- a/quickwit/quickwit-integration-tests/src/test_utils/cluster_sandbox.rs
+++ b/quickwit/quickwit-integration-tests/src/test_utils/cluster_sandbox.rs
@@ -76,6 +76,11 @@ impl TestNodeConfig {
         tcp_listener_resolver.add_listener(grpc_tcp_listener).await;
         config.indexer_config.enable_otlp_endpoint = self.enable_otlp;
         config.enabled_services.clone_from(&self.services);
+        if config.enabled_services.contains(&QuickwitService::Indexer)
+            && !config.indexer_config.enable_standalone_compactors
+        {
+            config.enabled_services.insert(QuickwitService::Compactor);
+        }
         config.jaeger_config.enable_endpoint = true;
         config.cluster_id.clone_from(&cluster_id);
         config.node_id = NodeId::new(format!("test-node-{node_idx}"));
@@ -678,7 +683,11 @@ impl ClusterSandbox {
 #[tokio::test]
 async fn test_sandbox_happy_path() {
     let sandbox = ClusterSandboxBuilder::default()
-        .add_node([QuickwitService::ControlPlane, QuickwitService::Metastore])
+        .add_node([
+            QuickwitService::ControlPlane,
+            QuickwitService::Metastore,
+            QuickwitService::Janitor,
+        ])
         .add_node([QuickwitService::Searcher])
         .add_node([QuickwitService::Indexer])
         .build_and_start()
@@ -691,7 +700,11 @@ async fn test_sandbox_happy_path() {
 #[tokio::test]
 async fn test_sandbox_add_node_dynamically() {
     let mut sandbox = ClusterSandboxBuilder::default()
-        .add_node([QuickwitService::ControlPlane, QuickwitService::Metastore])
+        .add_node([
+            QuickwitService::ControlPlane,
+            QuickwitService::Metastore,
+            QuickwitService::Janitor,
+        ])
         .add_node([QuickwitService::Searcher])
         .build_and_start()
         .await;
diff --git a/quickwit/quickwit-integration-tests/src/tests/basic_tests.rs b/quickwit/quickwit-integration-tests/src/tests/basic_tests.rs
index 91c6b0ebeb1..96bf311cc01 100644
--- a/quickwit/quickwit-integration-tests/src/tests/basic_tests.rs
+++ b/quickwit/quickwit-integration-tests/src/tests/basic_tests.rs
@@ -162,3 +162,47 @@ async fn test_multi_nodes_cluster() {
 
     sandbox.shutdown().await.unwrap();
 }
+
+#[tokio::test]
+async fn test_indexer_implicitly_runs_compactor() {
+    quickwit_common::setup_logging_for_tests();
+    let sandbox = ClusterSandboxBuilder::default()
+        .add_node([QuickwitService::Searcher])
+        .add_node([QuickwitService::Metastore])
+        .add_node([QuickwitService::ControlPlane])
+        .add_node([QuickwitService::Janitor])
+        .add_node([QuickwitService::Indexer])
+        .build_and_start()
+        .await;
+
+    let cluster_snapshot = sandbox
+        .rest_client(QuickwitService::Searcher)
+        .cluster()
+        .snapshot()
+        .await
+        .unwrap();
+    assert_eq!(cluster_snapshot.ready_nodes.len(), 5);
+
+    // The indexer node should also advertise the compactor service.
+    let indexer_node = cluster_snapshot
+        .chitchat_state_snapshot
+        .node_states
+        .iter()
+        .find(|node_state| {
+            node_state
+                .get("enabled_services")
+                .map(|s| s.contains("indexer"))
+                .unwrap_or(false)
+        })
+        .expect("indexer node not found in cluster state");
+    let services = indexer_node.get("enabled_services").unwrap();
+    assert!(
+        services.contains("compactor"),
+        "indexer node should implicitly run the compactor, got: {services}"
+    );
+
+    sandbox.shutdown().await.unwrap();
+}
+
+// TODO: add test_standalone_compactor_node once the test config builder
+// resolves QW_ENABLE_STANDALONE_COMPACTORS from the environment.
diff --git a/quickwit/quickwit-janitor/Cargo.toml b/quickwit/quickwit-janitor/Cargo.toml
index e8063895f24..c633c8e4aac 100644
--- a/quickwit/quickwit-janitor/Cargo.toml
+++ b/quickwit/quickwit-janitor/Cargo.toml
@@ -27,6 +27,7 @@ utoipa = { workspace = true }
 quickwit-actors = { workspace = true }
 quickwit-common = { workspace = true }
 quickwit-metrics = { workspace = true }
+quickwit-compaction = { workspace = true }
 quickwit-config = { workspace = true }
 quickwit-doc-mapper = { workspace = true }
 quickwit-index-management = { workspace = true }
diff --git a/quickwit/quickwit-janitor/src/actors/delete_task_pipeline.rs b/quickwit/quickwit-janitor/src/actors/delete_task_pipeline.rs
index be11509c3d7..37884040715 100644
--- a/quickwit/quickwit-janitor/src/actors/delete_task_pipeline.rs
+++ b/quickwit/quickwit-janitor/src/actors/delete_task_pipeline.rs
@@ -207,6 +207,7 @@ impl DeleteTaskPipeline {
             doc_mapper.clone(),
             delete_executor_io_controls,
             packager_mailbox,
+            None,
         );
         let (delete_executor_mailbox, task_executor_supervisor_handler) =
             ctx.spawn_actor().supervise(delete_executor);
diff --git a/quickwit/quickwit-janitor/src/janitor_service.rs b/quickwit/quickwit-janitor/src/janitor_service.rs
index b8bfe5e54b0..1e843f57b9b 100644
--- a/quickwit/quickwit-janitor/src/janitor_service.rs
+++ b/quickwit/quickwit-janitor/src/janitor_service.rs
@@ -16,6 +16,7 @@ use async_trait::async_trait;
 use quickwit_actors::{
     Actor, ActorContext, ActorExitStatus, ActorHandle, ActorState, Handler, Healthz,
 };
+use quickwit_compaction::planner::CompactionPlanner;
 use serde_json::{Value as JsonValue, json};
 
 use crate::actors::{DeleteTaskService, GarbageCollector, RetentionPolicyExecutor};
@@ -24,6 +25,7 @@ pub struct JanitorService {
     delete_task_service_handle: Option<ActorHandle<DeleteTaskService>>,
     garbage_collector_handle: ActorHandle<GarbageCollector>,
     retention_policy_executor_handle: ActorHandle<RetentionPolicyExecutor>,
+    compaction_planner_handle: Option<ActorHandle<CompactionPlanner>>,
 }
 
 impl JanitorService {
@@ -31,11 +33,13 @@ impl JanitorService {
         delete_task_service_handle: Option<ActorHandle<DeleteTaskService>>,
         garbage_collector_handle: ActorHandle<GarbageCollector>,
         retention_policy_executor_handle: ActorHandle<RetentionPolicyExecutor>,
+        compaction_planner_handle: Option<ActorHandle<CompactionPlanner>>,
     ) -> Self {
         Self {
             delete_task_service_handle,
             garbage_collector_handle,
             retention_policy_executor_handle,
+            compaction_planner_handle,
         }
     }
 
@@ -46,9 +50,16 @@ impl JanitorService {
             } else {
                 true
             };
+        let compaction_planner_is_not_failure: bool =
+            if let Some(compaction_planner_handle) = &self.compaction_planner_handle {
+                compaction_planner_handle.state() != ActorState::Failure
+            } else {
+                true
+            };
         delete_task_is_not_failure
             && self.garbage_collector_handle.state() != ActorState::Failure
             && self.retention_policy_executor_handle.state() != ActorState::Failure
+            && compaction_planner_is_not_failure
     }
 }
 
diff --git a/quickwit/quickwit-janitor/src/lib.rs b/quickwit/quickwit-janitor/src/lib.rs
index bef73160377..7e6734d9016 100644
--- a/quickwit/quickwit-janitor/src/lib.rs
+++ b/quickwit/quickwit-janitor/src/lib.rs
@@ -14,8 +14,9 @@
 
 #![deny(clippy::disallowed_methods)]
 
-use quickwit_actors::{Mailbox, Universe};
+use quickwit_actors::{ActorHandle, Mailbox, Universe};
 use quickwit_common::pubsub::EventBroker;
+use quickwit_compaction::planner::CompactionPlanner;
 use quickwit_config::NodeConfig;
 use quickwit_indexing::actors::MergeSchedulerService;
 use quickwit_metastore::SplitInfo;
@@ -39,6 +40,7 @@ use crate::actors::{DeleteTaskService, GarbageCollector, RetentionPolicyExecutor
 /// Schema used for the OpenAPI generation which are apart of this crate.
 pub struct JanitorApiSchemas;
 
+#[allow(clippy::too_many_arguments)]
 pub async fn start_janitor_service(
     universe: &Universe,
     config: &NodeConfig,
@@ -47,6 +49,7 @@ pub async fn start_janitor_service(
     storage_resolver: StorageResolver,
     event_broker: EventBroker,
     run_delete_task_service: bool,
+    compaction_planner_handle: Option<ActorHandle<CompactionPlanner>>,
 ) -> anyhow::Result<Mailbox<JanitorService>> {
     info!("starting janitor service");
     let garbage_collector = GarbageCollector::new(metastore.clone(), storage_resolver.clone());
@@ -77,6 +80,7 @@ pub async fn start_janitor_service(
         delete_task_service_handle,
         garbage_collector_handle,
         retention_policy_executor_handle,
+        compaction_planner_handle,
     );
     let (janitor_service_mailbox, _janitor_service_handle) =
         universe.spawn_builder().spawn(janitor_service);
diff --git a/quickwit/quickwit-metastore/migrations/postgresql/29_create-index-splits-published-maturity-timestamp.down.sql b/quickwit/quickwit-metastore/migrations/postgresql/29_create-index-splits-published-maturity-timestamp.down.sql
new file mode 100644
index 00000000000..c8bd22af8c3
--- /dev/null
+++ b/quickwit/quickwit-metastore/migrations/postgresql/29_create-index-splits-published-maturity-timestamp.down.sql
@@ -0,0 +1,4 @@
+-- no-transaction
+-- CONCURRENTLY matches the up migration: avoids blocking writes on `splits`
+-- while the index is being dropped. Cannot run inside a transaction.
+DROP INDEX CONCURRENTLY IF EXISTS splits_published_maturity_timestamp_idx;
diff --git a/quickwit/quickwit-metastore/migrations/postgresql/29_create-index-splits-published-maturity-timestamp.up.sql b/quickwit/quickwit-metastore/migrations/postgresql/29_create-index-splits-published-maturity-timestamp.up.sql
new file mode 100644
index 00000000000..f6a4c7a53d0
--- /dev/null
+++ b/quickwit/quickwit-metastore/migrations/postgresql/29_create-index-splits-published-maturity-timestamp.up.sql
@@ -0,0 +1,24 @@
+-- no-transaction
+-- Index for the compaction planner's scan, which on every tick reads up to
+-- LIMIT splits matching:
+--   split_state = 'Published'
+--   maturity_timestamp > now()
+-- ordered by maturity_timestamp ascending. The planner keeps a local set of
+-- already-tracked splits and dedups against it, so re-reading the immature
+-- set every tick is intentional -- it's how the planner recovers splits
+-- whose merge timed out or failed.
+--
+-- The btree on (maturity_timestamp, split_id) lets postgres seek to the live
+-- "still immature" range in index order, satisfying both the filter and the
+-- ORDER BY without an extra sort. The split_id column is included as a
+-- tiebreaker so postgres returns deterministic pages under LIMIT.
+--
+-- The partial predicate is restricted to split_state = 'Published' because
+-- partial-index predicates must be IMMUTABLE; "now()" cannot appear here.
+--
+-- CONCURRENTLY avoids taking a SHARE lock on splits, which would block all
+-- writes for the duration of the build. CONCURRENTLY cannot run inside a
+-- transaction block, hence the `-- no-transaction` directive above.
+CREATE INDEX CONCURRENTLY IF NOT EXISTS splits_published_maturity_timestamp_idx
+    ON splits (maturity_timestamp, split_id)
+    WHERE split_state = 'Published';
diff --git a/quickwit/quickwit-metastore/src/lib.rs b/quickwit/quickwit-metastore/src/lib.rs
index b437a1c1bef..7e02fcd0ec9 100644
--- a/quickwit/quickwit-metastore/src/lib.rs
+++ b/quickwit/quickwit-metastore/src/lib.rs
@@ -48,7 +48,7 @@ pub use metastore::{
     ListParquetSplitsQuery, ListParquetSplitsRequestExt, ListParquetSplitsResponseExt,
     ListSplitsQuery, ListSplitsRequestExt, ListSplitsResponseExt, MetastoreServiceExt,
     MetastoreServiceStreamSplitsExt, ParquetSplitRecord, PublishParquetSplitsRequestExt,
-    PublishSplitsRequestExt, StageParquetSplitsRequestExt, StageSplitsRequestExt,
+    PublishSplitsRequestExt, SortBy, StageParquetSplitsRequestExt, StageSplitsRequestExt,
     UpdateIndexRequestExt, UpdateSourceRequestExt, file_backed,
 };
 pub use metastore_factory::{MetastoreFactory, UnsupportedMetastore};
diff --git a/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs b/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs
index 88b95d003ee..444032b8ae2 100644
--- a/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs
+++ b/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs
@@ -1290,6 +1290,14 @@ fn split_query_predicate(split: &&Split, query: &ListSplitsQuery) -> bool {
         }
     }
 
+    if query
+        .excluded_split_ids
+        .iter()
+        .any(|excluded| excluded == &split.split_metadata.split_id)
+    {
+        return false;
+    }
+
     true
 }
 
diff --git a/quickwit/quickwit-metastore/src/metastore/mod.rs b/quickwit/quickwit-metastore/src/metastore/mod.rs
index 8752d348e02..6ebef78e583 100644
--- a/quickwit/quickwit-metastore/src/metastore/mod.rs
+++ b/quickwit/quickwit-metastore/src/metastore/mod.rs
@@ -992,13 +992,28 @@ pub struct ListSplitsQuery {
 
     /// Only return splits whose (index_uid, split_id) are lexicographically after this split
     pub after_split: Option<(IndexUid, SplitId)>,
+
+    /// Exclude any split whose `split_id` appears in this list. Empty means no
+    /// exclusion.
+    pub excluded_split_ids: Vec<SplitId>,
 }
 
 #[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
+/// Ordering applied to the result of a [`ListSplitsQuery`].
 pub enum SortBy {
+    /// No ordering — the metastore may return splits in any order.
     None,
+    /// Order by `(delete_opstamp ASC, publish_timestamp ASC)`. Used by the
+    /// delete pipeline to process the splits with the most pending delete
+    /// work first.
     Staleness,
+    /// Order by `(index_uid ASC, split_id ASC)`, matching the splits-table
+    /// primary key. Used for stable pagination across all indexes.
     IndexUid,
+    /// Order by `(maturity_timestamp ASC, split_id ASC)`. Used by the
+    /// compaction planner so that under a backlog the splits closest to
+    /// becoming mature are processed first.
+    MaturityTimestamp,
 }
 
 impl SortBy {
@@ -1024,6 +1039,16 @@ impl SortBy {
                         .split_id
                         .cmp(&right_split.split_metadata.split_id)
                 }),
+            SortBy::MaturityTimestamp => left_split
+                .split_metadata
+                .maturity_unix_timestamp()
+                .cmp(&right_split.split_metadata.maturity_unix_timestamp())
+                .then_with(|| {
+                    left_split
+                        .split_metadata
+                        .split_id
+                        .cmp(&right_split.split_metadata.split_id)
+                }),
         }
     }
 }
@@ -1047,6 +1072,7 @@ impl ListSplitsQuery {
             mature: Bound::Unbounded,
             sort_by: SortBy::None,
             after_split: None,
+            excluded_split_ids: Vec::new(),
         }
     }
 
@@ -1071,6 +1097,7 @@ impl ListSplitsQuery {
             mature: Bound::Unbounded,
             sort_by: SortBy::None,
             after_split: None,
+            excluded_split_ids: Vec::new(),
         })
     }
 
@@ -1091,6 +1118,7 @@ impl ListSplitsQuery {
             mature: Bound::Unbounded,
             sort_by: SortBy::None,
             after_split: None,
+            excluded_split_ids: Vec::new(),
         }
     }
 
@@ -1274,12 +1302,25 @@ impl ListSplitsQuery {
         self
     }
 
+    /// Sorts the splits by maturity_timestamp ascending, with split_id as a tiebreaker.
+    pub fn sort_by_maturity_timestamp(mut self) -> Self {
+        self.sort_by = SortBy::MaturityTimestamp;
+        self
+    }
+
     /// Only return splits whose (index_uid, split_id) are lexicographically after this split.
     /// This is only useful if results are sorted by index_uid and split_id.
     pub fn after_split(mut self, split_meta: &SplitMetadata) -> Self {
         self.after_split = Some((split_meta.index_uid.clone(), split_meta.split_id.clone()));
         self
     }
+
+    /// Excludes splits whose `split_id` is in the provided list. Used by the
+    /// compaction planner to skip splits it is already tracking locally.
+    pub fn with_excluded_split_ids(mut self, excluded_split_ids: Vec<SplitId>) -> Self {
+        self.excluded_split_ids = excluded_split_ids;
+        self
+    }
 }
 
 #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
diff --git a/quickwit/quickwit-metastore/src/metastore/postgres/metastore.rs b/quickwit/quickwit-metastore/src/metastore/postgres/metastore.rs
index e9d0b1ed8fa..9675c372102 100644
--- a/quickwit/quickwit-metastore/src/metastore/postgres/metastore.rs
+++ b/quickwit/quickwit-metastore/src/metastore/postgres/metastore.rs
@@ -873,7 +873,7 @@ impl MetastoreService for PostgresqlMetastore {
         let list_splits_query = request.deserialize_list_splits_query()?;
         let mut sql_query_builder = Query::select();
         sql_query_builder.column(Asterisk).from(Splits::Table);
-        append_query_filters_and_order_by(&mut sql_query_builder, &list_splits_query);
+        append_query_filters_and_order_by(&mut sql_query_builder, list_splits_query);
 
         let (sql_query, values) = sql_query_builder.build_sqlx(PostgresQueryBuilder);
         let pg_split_stream = SplitStream::new(
@@ -3194,7 +3194,7 @@ mod tests {
         let index_uid = IndexUid::new_with_random_ulid("test-index");
         let query =
             ListSplitsQuery::for_index(index_uid.clone()).with_split_state(SplitState::Staged);
-        append_query_filters_and_order_by(sql, &query);
+        append_query_filters_and_order_by(sql, query);
 
         assert_eq!(
             sql.to_string(PostgresQueryBuilder),
@@ -3208,7 +3208,7 @@ mod tests {
 
         let query =
             ListSplitsQuery::for_index(index_uid.clone()).with_split_state(SplitState::Published);
-        append_query_filters_and_order_by(sql, &query);
+        append_query_filters_and_order_by(sql, query);
 
         assert_eq!(
             sql.to_string(PostgresQueryBuilder),
@@ -3222,7 +3222,7 @@ mod tests {
 
         let query = ListSplitsQuery::for_index(index_uid.clone())
             .with_split_states([SplitState::Published, SplitState::MarkedForDeletion]);
-        append_query_filters_and_order_by(sql, &query);
+        append_query_filters_and_order_by(sql, query);
         assert_eq!(
             sql.to_string(PostgresQueryBuilder),
             format!(
@@ -3234,7 +3234,7 @@ mod tests {
         let sql = select_statement.column(Asterisk).from(Splits::Table);
 
         let query = ListSplitsQuery::for_index(index_uid.clone()).with_update_timestamp_lt(51);
-        append_query_filters_and_order_by(sql, &query);
+        append_query_filters_and_order_by(sql, query);
         assert_eq!(
             sql.to_string(PostgresQueryBuilder),
             format!(
@@ -3246,7 +3246,7 @@ mod tests {
         let sql = select_statement.column(Asterisk).from(Splits::Table);
 
         let query = ListSplitsQuery::for_index(index_uid.clone()).with_create_timestamp_lte(55);
-        append_query_filters_and_order_by(sql, &query);
+        append_query_filters_and_order_by(sql, query);
         assert_eq!(
             sql.to_string(PostgresQueryBuilder),
             format!(
@@ -3260,7 +3260,7 @@ mod tests {
         let maturity_evaluation_datetime = OffsetDateTime::from_unix_timestamp(55).unwrap();
         let query = ListSplitsQuery::for_index(index_uid.clone())
             .retain_mature(maturity_evaluation_datetime);
-        append_query_filters_and_order_by(sql, &query);
+        append_query_filters_and_order_by(sql, query);
 
         assert_eq!(
             sql.to_string(PostgresQueryBuilder),
@@ -3274,7 +3274,7 @@ mod tests {
 
         let query = ListSplitsQuery::for_index(index_uid.clone())
             .retain_immature(maturity_evaluation_datetime);
-        append_query_filters_and_order_by(sql, &query);
+        append_query_filters_and_order_by(sql, query);
         assert_eq!(
             sql.to_string(PostgresQueryBuilder),
             format!(
@@ -3286,7 +3286,7 @@ mod tests {
         let sql = select_statement.column(Asterisk).from(Splits::Table);
 
         let query = ListSplitsQuery::for_index(index_uid.clone()).with_delete_opstamp_gte(4);
-        append_query_filters_and_order_by(sql, &query);
+        append_query_filters_and_order_by(sql, query);
         assert_eq!(
             sql.to_string(PostgresQueryBuilder),
             format!(
@@ -3298,7 +3298,7 @@ mod tests {
         let sql = select_statement.column(Asterisk).from(Splits::Table);
 
         let query = ListSplitsQuery::for_index(index_uid.clone()).with_time_range_start_gt(45);
-        append_query_filters_and_order_by(sql, &query);
+        append_query_filters_and_order_by(sql, query);
         assert_eq!(
             sql.to_string(PostgresQueryBuilder),
             format!(
@@ -3310,7 +3310,7 @@ mod tests {
         let sql = select_statement.column(Asterisk).from(Splits::Table);
 
         let query = ListSplitsQuery::for_index(index_uid.clone()).with_time_range_end_lt(45);
-        append_query_filters_and_order_by(sql, &query);
+        append_query_filters_and_order_by(sql, query);
         assert_eq!(
             sql.to_string(PostgresQueryBuilder),
             format!(
@@ -3326,7 +3326,7 @@ mod tests {
                 is_present: false,
                 tag: "tag-2".to_string(),
             });
-        append_query_filters_and_order_by(sql, &query);
+        append_query_filters_and_order_by(sql, query);
 
         assert_eq!(
             sql.to_string(PostgresQueryBuilder),
@@ -3339,7 +3339,7 @@ mod tests {
         let sql = select_statement.column(Asterisk).from(Splits::Table);
 
         let query = ListSplitsQuery::for_index(index_uid.clone()).with_offset(4);
-        append_query_filters_and_order_by(sql, &query);
+        append_query_filters_and_order_by(sql, query);
 
         assert_eq!(
             sql.to_string(PostgresQueryBuilder),
@@ -3352,7 +3352,7 @@ mod tests {
         let sql = select_statement.column(Asterisk).from(Splits::Table);
 
         let query = ListSplitsQuery::for_index(index_uid.clone()).sort_by_index_uid();
-        append_query_filters_and_order_by(sql, &query);
+        append_query_filters_and_order_by(sql, query);
 
         assert_eq!(
             sql.to_string(PostgresQueryBuilder),
@@ -3370,7 +3370,7 @@ mod tests {
                 split_id: "my_split".to_string(),
                 ..Default::default()
             });
-        append_query_filters_and_order_by(sql, &query);
+        append_query_filters_and_order_by(sql, query);
 
         assert_eq!(
             sql.to_string(PostgresQueryBuilder),
@@ -3383,7 +3383,7 @@ mod tests {
         let sql = select_statement.column(Asterisk).from(Splits::Table);
 
         let query = ListSplitsQuery::for_all_indexes().with_split_state(SplitState::Staged);
-        append_query_filters_and_order_by(sql, &query);
+        append_query_filters_and_order_by(sql, query);
 
         assert_eq!(
             sql.to_string(PostgresQueryBuilder),
@@ -3394,7 +3394,7 @@ mod tests {
         let sql = select_statement.column(Asterisk).from(Splits::Table);
 
         let query = ListSplitsQuery::for_all_indexes().with_max_time_range_end(42);
-        append_query_filters_and_order_by(sql, &query);
+        append_query_filters_and_order_by(sql, query);
 
         assert_eq!(
             sql.to_string(PostgresQueryBuilder),
@@ -3411,7 +3411,8 @@ mod tests {
         let query = ListSplitsQuery::for_index(index_uid.clone())
             .with_time_range_start_gt(0)
             .with_time_range_end_lt(40);
-        append_query_filters_and_order_by(sql, &query);
+        append_query_filters_and_order_by(sql, query);
+
         assert_eq!(
             sql.to_string(PostgresQueryBuilder),
             format!(
@@ -3425,7 +3426,7 @@ mod tests {
         let query = ListSplitsQuery::for_index(index_uid.clone())
             .with_time_range_start_gt(45)
             .with_delete_opstamp_gt(0);
-        append_query_filters_and_order_by(sql, &query);
+        append_query_filters_and_order_by(sql, query);
         assert_eq!(
             sql.to_string(PostgresQueryBuilder),
             format!(
@@ -3439,7 +3440,7 @@ mod tests {
         let query = ListSplitsQuery::for_index(index_uid.clone())
             .with_update_timestamp_lt(51)
             .with_create_timestamp_lte(63);
-        append_query_filters_and_order_by(sql, &query);
+        append_query_filters_and_order_by(sql, query);
         assert_eq!(
             sql.to_string(PostgresQueryBuilder),
             format!(
@@ -3456,7 +3457,7 @@ mod tests {
                 is_present: true,
                 tag: "tag-1".to_string(),
             });
-        append_query_filters_and_order_by(sql, &query);
+        append_query_filters_and_order_by(sql, query);
         assert_eq!(
             sql.to_string(PostgresQueryBuilder),
             format!(
@@ -3471,7 +3472,7 @@ mod tests {
         let query =
             ListSplitsQuery::try_from_index_uids(vec![index_uid.clone(), index_uid_2.clone()])
                 .unwrap();
-        append_query_filters_and_order_by(sql, &query);
+        append_query_filters_and_order_by(sql, query);
         assert_eq!(
             sql.to_string(PostgresQueryBuilder),
             format!(
@@ -3480,6 +3481,20 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_list_splits_query_excluded_split_ids() {
+        let mut select_statement = Query::select();
+        let sql = select_statement.column(Asterisk).from(Splits::Table);
+
+        let query = ListSplitsQuery::for_all_indexes()
+            .with_excluded_split_ids(vec!["s1".to_string(), "s2".to_string()]);
+        append_query_filters_and_order_by(sql, query);
+        assert_eq!(
+            sql.to_string(PostgresQueryBuilder),
+            r#"SELECT * FROM "splits" WHERE split_id <> ALL(ARRAY ['s1','s2']::text[])"#
+        );
+    }
+
     #[test]
     fn test_index_id_pattern_like_query() {
         assert_eq!(
diff --git a/quickwit/quickwit-metastore/src/metastore/postgres/migrator.rs b/quickwit/quickwit-metastore/src/metastore/postgres/migrator.rs
index 03c40eec666..aa51a42c43e 100644
--- a/quickwit/quickwit-metastore/src/metastore/postgres/migrator.rs
+++ b/quickwit/quickwit-metastore/src/metastore/postgres/migrator.rs
@@ -27,40 +27,45 @@ fn get_migrations() -> Migrator {
 
 /// Initializes the database and runs the SQL migrations stored in the
 /// `quickwit-metastore/migrations` directory.
+///
+/// Runs on a raw pooled connection -- not wrapped in an outer transaction.
+/// sqlx's `Migrator::run_direct` handles per-migration transactionality
+/// itself, honoring each migration's `no_tx` flag (set by a
+/// `-- no-transaction` directive as the first line of the migration file).
+/// Wrapping the run in our own transaction would defeat that for migrations
+/// that must execute outside a transaction block (e.g. `CREATE INDEX
+/// CONCURRENTLY`).
+///
+/// Atomicity is per-migration, not per-run: a failure on migration N leaves
+/// migrations 1..N-1 applied and committed in `_sqlx_migrations`. The
+/// operator fixes the failing migration and re-runs.
 #[instrument(skip_all)]
 pub(super) async fn run_migrations(
     pool: &TrackedPool<Postgres>,
     skip_migrations: bool,
     skip_locking: bool,
 ) -> MetastoreResult<()> {
-    let mut tx = pool.begin().await?;
-    let conn = tx.acquire().await?;
-
+    let mut conn = pool.acquire().await?;
     let mut migrator = get_migrations();
 
     if skip_locking {
         migrator.set_locking(false);
     }
 
-    if !skip_migrations {
-        // this is an hidden function, made to get "around the annoying "implementation of `Acquire`
-        // is not general enough" error", which is the error we get otherwise.
-        let migrate_result = migrator.run_direct(conn).await;
+    if skip_migrations {
+        return check_migrations(migrator, &mut conn).await;
+    }
 
-        let Err(migrate_error) = migrate_result else {
-            tx.commit().await?;
-            return Ok(());
-        };
-        tx.rollback().await?;
+    // this is an hidden function, made to get "around the annoying "implementation of `Acquire`
+    // is not general enough" error", which is the error we get otherwise.
+    if let Err(migrate_error) = migrator.run_direct(&mut *conn).await {
         error!(error=%migrate_error, "failed to run PostgreSQL migrations");
-
-        Err(MetastoreError::Internal {
+        return Err(MetastoreError::Internal {
             message: "failed to run PostgreSQL migrations".to_string(),
             cause: migrate_error.to_string(),
-        })
-    } else {
-        check_migrations(migrator, conn).await
+        });
     }
+    Ok(())
 }
 
 async fn check_migrations(migrator: Migrator, conn: &mut PgConnection) -> MetastoreResult<()> {
diff --git a/quickwit/quickwit-metastore/src/metastore/postgres/utils.rs b/quickwit/quickwit-metastore/src/metastore/postgres/utils.rs
index 28519b8a294..d63755a06f6 100644
--- a/quickwit/quickwit-metastore/src/metastore/postgres/utils.rs
+++ b/quickwit/quickwit-metastore/src/metastore/postgres/utils.rs
@@ -19,7 +19,7 @@ use std::time::Duration;
 
 use quickwit_common::uri::Uri;
 use quickwit_proto::metastore::{MetastoreError, MetastoreResult};
-use sea_query::{Expr, Func, Order, SelectStatement, any};
+use sea_query::{ArrayType, Expr, Func, Order, SelectStatement, Value, any};
 use sqlx::postgres::{PgConnectOptions, PgPoolOptions};
 use sqlx::{ConnectOptions, Postgres};
 use tracing::error;
@@ -96,10 +96,7 @@ pub(super) fn append_range_filters<V: Display>(
     };
 }
 
-pub(super) fn append_query_filters_and_order_by(
-    sql: &mut SelectStatement,
-    query: &ListSplitsQuery,
-) {
+pub(super) fn append_query_filters_and_order_by(sql: &mut SelectStatement, query: ListSplitsQuery) {
     if let Some(index_uids) = &query.index_uids {
         // Note: `ListSplitsQuery` builder enforces a non empty `index_uids` list.
         // TODO we should explore IN VALUES, = ANY and similar constructs in case they perform
@@ -202,6 +199,23 @@ pub(super) fn append_query_filters_and_order_by(
         );
     }
 
+    if !query.excluded_split_ids.is_empty() {
+        // One bind regardless of list length: avoids postgres' 65535 param
+        // ceiling and keeps parse-time O(1) in the exclude size. The `$1` is
+        // postgres' placeholder; sea-query substitutes it with the next bind
+        // slot at build time.
+        let excluded_values: Vec<Value> = query
+            .excluded_split_ids
+            .into_iter()
+            .map(|split_id| Value::String(Some(Box::new(split_id))))
+            .collect();
+        let excluded_array = Value::Array(ArrayType::String, Some(Box::new(excluded_values)));
+        sql.cond_where(Expr::cust_with_values(
+            "split_id <> ALL($1::text[])",
+            [excluded_array],
+        ));
+    }
+
     match query.sort_by {
         SortBy::Staleness => {
             sql.order_by(Splits::DeleteOpstamp, Order::Asc)
@@ -211,6 +225,10 @@ pub(super) fn append_query_filters_and_order_by(
             sql.order_by(Splits::IndexUid, Order::Asc)
                 .order_by(Splits::SplitId, Order::Asc);
         }
+        SortBy::MaturityTimestamp => {
+            sql.order_by(Splits::MaturityTimestamp, Order::Asc)
+                .order_by(Splits::SplitId, Order::Asc);
+        }
         SortBy::None => (),
     }
 
diff --git a/quickwit/quickwit-metastore/src/split_metadata.rs b/quickwit/quickwit-metastore/src/split_metadata.rs
index fe88fe379d3..95066e87838 100644
--- a/quickwit/quickwit-metastore/src/split_metadata.rs
+++ b/quickwit/quickwit-metastore/src/split_metadata.rs
@@ -218,6 +218,18 @@ impl SplitMetadata {
         }
     }
 
+    /// Returns the unix timestamp at which the split becomes mature, or 0 if
+    /// the split is already mature (matching the metastore's stored
+    /// `maturity_timestamp` column).
+    pub fn maturity_unix_timestamp(&self) -> i64 {
+        match self.maturity {
+            SplitMaturity::Mature => 0,
+            SplitMaturity::Immature { maturation_period } => {
+                self.create_timestamp + maturation_period.as_secs() as i64
+            }
+        }
+    }
+
     #[cfg(any(test, feature = "testsuite"))]
     /// Returns an instance of `SplitMetadata` for testing.
     pub fn for_test(split_id: SplitId) -> SplitMetadata {
diff --git a/quickwit/quickwit-proto/build.rs b/quickwit/quickwit-proto/build.rs
index a5f2e1f2b53..20724113bb2 100644
--- a/quickwit/quickwit-proto/build.rs
+++ b/quickwit/quickwit-proto/build.rs
@@ -35,6 +35,22 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         .run()
         .unwrap();
 
+    // Compaction service.
+    let mut prost_config = prost_build::Config::default();
+    prost_config.file_descriptor_set_path("src/codegen/quickwit/compaction_descriptor.bin");
+    prost_config.extern_path(".quickwit.common.IndexUid", "crate::types::IndexUid");
+
+    Codegen::builder()
+        .with_prost_config(prost_config)
+        .with_protos(&["protos/quickwit/compaction.proto"])
+        .with_includes(&["protos"])
+        .with_output_dir("src/codegen/quickwit")
+        .with_result_type_path("crate::compaction::CompactionResult")
+        .with_error_type_path("crate::compaction::CompactionError")
+        .generate_rpc_name_impls()
+        .run()
+        .unwrap();
+
     // Control plane.
     let mut prost_config = prost_build::Config::default();
     prost_config.file_descriptor_set_path("src/codegen/quickwit/control_plane_descriptor.bin");
diff --git a/quickwit/quickwit-proto/protos/quickwit/compaction.proto b/quickwit/quickwit-proto/protos/quickwit/compaction.proto
new file mode 100644
index 00000000000..3c82d6b34c7
--- /dev/null
+++ b/quickwit/quickwit-proto/protos/quickwit/compaction.proto
@@ -0,0 +1,64 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package quickwit.compaction;
+
+import "quickwit/common.proto";
+
+service CompactionPlannerService {
+  rpc ReportStatus(ReportStatusRequest) returns (ReportStatusResponse);
+}
+
+message ReportStatusRequest {
+  string node_id = 1;
+  uint32 available_slots = 2;
+  repeated CompactionInProgress in_progress = 3;
+  repeated CompactionSuccess successes = 4;
+  repeated CompactionFailure failures = 5;
+}
+
+message CompactionInProgress {
+  string task_id = 1;
+  quickwit.common.IndexUid index_uid = 2;
+  string source_id = 3;
+  repeated string split_ids = 4;
+}
+
+message CompactionSuccess {
+  string task_id = 1;
+}
+
+message CompactionFailure {
+  string task_id = 1;
+  string error_message = 2;
+}
+
+message ReportStatusResponse {
+  repeated MergeTaskAssignment new_tasks = 1;
+}
+
+message MergeTaskAssignment {
+  string task_id = 1;
+  repeated string splits_metadata_json = 2;
+  string doc_mapping_json = 3;
+  string search_settings_json = 4;
+  string indexing_settings_json = 5;
+  string retention_policy_json = 6;
+  quickwit.common.IndexUid index_uid = 7;
+  string source_id = 8;
+  string index_storage_uri = 9;
+  uint64 merge_level = 10;
+}
\ No newline at end of file
diff --git a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.compaction.rs b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.compaction.rs
new file mode 100644
index 00000000000..708c35fa5af
--- /dev/null
+++ b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.compaction.rs
@@ -0,0 +1,901 @@
+// This file is @generated by prost-build.
+#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct ReportStatusRequest {
+    #[prost(string, tag = "1")]
+    pub node_id: ::prost::alloc::string::String,
+    #[prost(uint32, tag = "2")]
+    pub available_slots: u32,
+    #[prost(message, repeated, tag = "3")]
+    pub in_progress: ::prost::alloc::vec::Vec<CompactionInProgress>,
+    #[prost(message, repeated, tag = "4")]
+    pub successes: ::prost::alloc::vec::Vec<CompactionSuccess>,
+    #[prost(message, repeated, tag = "5")]
+    pub failures: ::prost::alloc::vec::Vec<CompactionFailure>,
+}
+#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
+pub struct CompactionInProgress {
+    #[prost(string, tag = "1")]
+    pub task_id: ::prost::alloc::string::String,
+    #[prost(message, optional, tag = "2")]
+    pub index_uid: ::core::option::Option<crate::types::IndexUid>,
+    #[prost(string, tag = "3")]
+    pub source_id: ::prost::alloc::string::String,
+    #[prost(string, repeated, tag = "4")]
+    pub split_ids: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
+}
+#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
+pub struct CompactionSuccess {
+    #[prost(string, tag = "1")]
+    pub task_id: ::prost::alloc::string::String,
+}
+#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
+pub struct CompactionFailure {
+    #[prost(string, tag = "1")]
+    pub task_id: ::prost::alloc::string::String,
+    #[prost(string, tag = "2")]
+    pub error_message: ::prost::alloc::string::String,
+}
+#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)]
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct ReportStatusResponse {
+    #[prost(message, repeated, tag = "1")]
+    pub new_tasks: ::prost::alloc::vec::Vec<MergeTaskAssignment>,
+}
+#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
+pub struct MergeTaskAssignment {
+    #[prost(string, tag = "1")]
+    pub task_id: ::prost::alloc::string::String,
+    #[prost(string, repeated, tag = "2")]
+    pub splits_metadata_json: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
+    #[prost(string, tag = "3")]
+    pub doc_mapping_json: ::prost::alloc::string::String,
+    #[prost(string, tag = "4")]
+    pub search_settings_json: ::prost::alloc::string::String,
+    #[prost(string, tag = "5")]
+    pub indexing_settings_json: ::prost::alloc::string::String,
+    #[prost(string, tag = "6")]
+    pub retention_policy_json: ::prost::alloc::string::String,
+    #[prost(message, optional, tag = "7")]
+    pub index_uid: ::core::option::Option<crate::types::IndexUid>,
+    #[prost(string, tag = "8")]
+    pub source_id: ::prost::alloc::string::String,
+    #[prost(string, tag = "9")]
+    pub index_storage_uri: ::prost::alloc::string::String,
+    #[prost(uint64, tag = "10")]
+    pub merge_level: u64,
+}
+/// BEGIN quickwit-codegen
+#[allow(unused_imports)]
+use std::str::FromStr;
+use tower::{Layer, Service, ServiceExt};
+use quickwit_common::tower::RpcName;
+impl RpcName for ReportStatusRequest {
+    fn rpc_name() -> &'static str {
+        "report_status"
+    }
+}
+#[cfg_attr(any(test, feature = "testsuite"), mockall::automock)]
+#[async_trait::async_trait]
+pub trait CompactionPlannerService: std::fmt::Debug + Send + Sync + 'static {
+    async fn report_status(
+        &self,
+        request: ReportStatusRequest,
+    ) -> crate::compaction::CompactionResult<ReportStatusResponse>;
+}
+#[derive(Debug, Clone)]
+pub struct CompactionPlannerServiceClient {
+    inner: InnerCompactionPlannerServiceClient,
+}
+#[derive(Debug, Clone)]
+struct InnerCompactionPlannerServiceClient(std::sync::Arc<dyn CompactionPlannerService>);
+impl CompactionPlannerServiceClient {
+    pub fn new<T>(instance: T) -> Self
+    where
+        T: CompactionPlannerService,
+    {
+        #[cfg(any(test, feature = "testsuite"))]
+        assert!(
+            std::any::TypeId::of:: < T > () != std::any::TypeId::of:: <
+            MockCompactionPlannerService > (),
+            "`MockCompactionPlannerService` must be wrapped in a `MockCompactionPlannerServiceWrapper`: use `CompactionPlannerServiceClient::from_mock(mock)` to instantiate the client"
+        );
+        Self {
+            inner: InnerCompactionPlannerServiceClient(std::sync::Arc::new(instance)),
+        }
+    }
+    pub fn as_grpc_service(
+        &self,
+        max_message_size: bytesize::ByteSize,
+    ) -> compaction_planner_service_grpc_server::CompactionPlannerServiceGrpcServer<
+        CompactionPlannerServiceGrpcServerAdapter,
+    > {
+        let adapter = CompactionPlannerServiceGrpcServerAdapter::new(self.clone());
+        compaction_planner_service_grpc_server::CompactionPlannerServiceGrpcServer::new(
+                adapter,
+            )
+            .accept_compressed(tonic::codec::CompressionEncoding::Gzip)
+            .accept_compressed(tonic::codec::CompressionEncoding::Zstd)
+            .send_compressed(tonic::codec::CompressionEncoding::Gzip)
+            .send_compressed(tonic::codec::CompressionEncoding::Zstd)
+            .max_decoding_message_size(max_message_size.0 as usize)
+            .max_encoding_message_size(max_message_size.0 as usize)
+    }
+    pub fn from_channel(
+        addr: std::net::SocketAddr,
+        channel: tonic::transport::Channel,
+        max_message_size: bytesize::ByteSize,
+        compression_encoding_opt: Option<tonic::codec::CompressionEncoding>,
+    ) -> Self {
+        let (_, connection_keys_watcher) = tokio::sync::watch::channel(
+            std::collections::HashSet::from_iter([addr]),
+        );
+        let mut client = compaction_planner_service_grpc_client::CompactionPlannerServiceGrpcClient::new(
+                channel,
+            )
+            .max_decoding_message_size(max_message_size.0 as usize)
+            .max_encoding_message_size(max_message_size.0 as usize);
+        if let Some(compression_encoding) = compression_encoding_opt {
+            client = client
+                .accept_compressed(compression_encoding)
+                .send_compressed(compression_encoding);
+        }
+        let adapter = CompactionPlannerServiceGrpcClientAdapter::new(
+            client,
+            connection_keys_watcher,
+        );
+        Self::new(adapter)
+    }
+    pub fn from_balance_channel(
+        balance_channel: quickwit_common::tower::BalanceChannel<std::net::SocketAddr>,
+        max_message_size: bytesize::ByteSize,
+        compression_encoding_opt: Option<tonic::codec::CompressionEncoding>,
+    ) -> CompactionPlannerServiceClient {
+        let connection_keys_watcher = balance_channel.connection_keys_watcher();
+        let mut client = compaction_planner_service_grpc_client::CompactionPlannerServiceGrpcClient::new(
+                balance_channel,
+            )
+            .max_decoding_message_size(max_message_size.0 as usize)
+            .max_encoding_message_size(max_message_size.0 as usize);
+        if let Some(compression_encoding) = compression_encoding_opt {
+            client = client
+                .accept_compressed(compression_encoding)
+                .send_compressed(compression_encoding);
+        }
+        let adapter = CompactionPlannerServiceGrpcClientAdapter::new(
+            client,
+            connection_keys_watcher,
+        );
+        Self::new(adapter)
+    }
+    pub fn from_mailbox<A>(mailbox: quickwit_actors::Mailbox<A>) -> Self
+    where
+        A: quickwit_actors::Actor + std::fmt::Debug + Send + 'static,
+        CompactionPlannerServiceMailbox<A>: CompactionPlannerService,
+    {
+        CompactionPlannerServiceClient::new(
+            CompactionPlannerServiceMailbox::new(mailbox),
+        )
+    }
+    pub fn tower() -> CompactionPlannerServiceTowerLayerStack {
+        CompactionPlannerServiceTowerLayerStack::default()
+    }
+    #[cfg(any(test, feature = "testsuite"))]
+    pub fn from_mock(mock: MockCompactionPlannerService) -> Self {
+        let mock_wrapper = mock_compaction_planner_service::MockCompactionPlannerServiceWrapper {
+            inner: tokio::sync::Mutex::new(mock),
+        };
+        Self::new(mock_wrapper)
+    }
+    #[cfg(any(test, feature = "testsuite"))]
+    pub fn mocked() -> Self {
+        Self::from_mock(MockCompactionPlannerService::new())
+    }
+}
+#[async_trait::async_trait]
+impl CompactionPlannerService for CompactionPlannerServiceClient {
+    #[tracing::instrument(skip_all, name = "compaction.report_status")]
+    async fn report_status(
+        &self,
+        request: ReportStatusRequest,
+    ) -> crate::compaction::CompactionResult<ReportStatusResponse> {
+        self.inner.0.report_status(request).await
+    }
+}
+#[cfg(any(test, feature = "testsuite"))]
+pub mod mock_compaction_planner_service {
+    use super::*;
+    #[derive(Debug)]
+    pub struct MockCompactionPlannerServiceWrapper {
+        pub(super) inner: tokio::sync::Mutex<MockCompactionPlannerService>,
+    }
+    #[async_trait::async_trait]
+    impl CompactionPlannerService for MockCompactionPlannerServiceWrapper {
+        async fn report_status(
+            &self,
+            request: super::ReportStatusRequest,
+        ) -> crate::compaction::CompactionResult<super::ReportStatusResponse> {
+            self.inner.lock().await.report_status(request).await
+        }
+    }
+}
+pub type BoxFuture<T, E> = std::pin::Pin<
+    Box<dyn std::future::Future<Output = Result<T, E>> + Send + 'static>,
+>;
+impl tower::Service<ReportStatusRequest> for InnerCompactionPlannerServiceClient {
+    type Response = ReportStatusResponse;
+    type Error = crate::compaction::CompactionError;
+    type Future = BoxFuture<Self::Response, Self::Error>;
+    fn poll_ready(
+        &mut self,
+        _cx: &mut std::task::Context<'_>,
+    ) -> std::task::Poll<Result<(), Self::Error>> {
+        std::task::Poll::Ready(Ok(()))
+    }
+    fn call(&mut self, request: ReportStatusRequest) -> Self::Future {
+        let svc = self.clone();
+        let fut = async move { svc.0.report_status(request).await };
+        Box::pin(fut)
+    }
+}
+/// A tower service stack is a set of tower services.
+#[derive(Debug)]
+struct CompactionPlannerServiceTowerServiceStack {
+    #[allow(dead_code)]
+    inner: InnerCompactionPlannerServiceClient,
+    report_status_svc: quickwit_common::tower::BoxService<
+        ReportStatusRequest,
+        ReportStatusResponse,
+        crate::compaction::CompactionError,
+    >,
+}
+#[async_trait::async_trait]
+impl CompactionPlannerService for CompactionPlannerServiceTowerServiceStack {
+    async fn report_status(
+        &self,
+        request: ReportStatusRequest,
+    ) -> crate::compaction::CompactionResult<ReportStatusResponse> {
+        self.report_status_svc.clone().ready().await?.call(request).await
+    }
+}
+type ReportStatusLayer = quickwit_common::tower::BoxLayer<
+    quickwit_common::tower::BoxService<
+        ReportStatusRequest,
+        ReportStatusResponse,
+        crate::compaction::CompactionError,
+    >,
+    ReportStatusRequest,
+    ReportStatusResponse,
+    crate::compaction::CompactionError,
+>;
+#[derive(Debug, Default)]
+pub struct CompactionPlannerServiceTowerLayerStack {
+    report_status_layers: Vec<ReportStatusLayer>,
+}
+impl CompactionPlannerServiceTowerLayerStack {
+    pub fn stack_layer<L>(mut self, layer: L) -> Self
+    where
+        L: tower::Layer<
+                quickwit_common::tower::BoxService<
+                    ReportStatusRequest,
+                    ReportStatusResponse,
+                    crate::compaction::CompactionError,
+                >,
+            > + Clone + Send + Sync + 'static,
+        <L as tower::Layer<
+            quickwit_common::tower::BoxService<
+                ReportStatusRequest,
+                ReportStatusResponse,
+                crate::compaction::CompactionError,
+            >,
+        >>::Service: tower::Service<
+                ReportStatusRequest,
+                Response = ReportStatusResponse,
+                Error = crate::compaction::CompactionError,
+            > + Clone + Send + Sync + 'static,
+        <<L as tower::Layer<
+            quickwit_common::tower::BoxService<
+                ReportStatusRequest,
+                ReportStatusResponse,
+                crate::compaction::CompactionError,
+            >,
+        >>::Service as tower::Service<ReportStatusRequest>>::Future: Send + 'static,
+    {
+        self.report_status_layers
+            .push(quickwit_common::tower::BoxLayer::new(layer.clone()));
+        self
+    }
+    pub fn stack_report_status_layer<L>(mut self, layer: L) -> Self
+    where
+        L: tower::Layer<
+                quickwit_common::tower::BoxService<
+                    ReportStatusRequest,
+                    ReportStatusResponse,
+                    crate::compaction::CompactionError,
+                >,
+            > + Send + Sync + 'static,
+        L::Service: tower::Service<
+                ReportStatusRequest,
+                Response = ReportStatusResponse,
+                Error = crate::compaction::CompactionError,
+            > + Clone + Send + Sync + 'static,
+        <L::Service as tower::Service<ReportStatusRequest>>::Future: Send + 'static,
+    {
+        self.report_status_layers.push(quickwit_common::tower::BoxLayer::new(layer));
+        self
+    }
+    pub fn build<T>(self, instance: T) -> CompactionPlannerServiceClient
+    where
+        T: CompactionPlannerService,
+    {
+        let inner_client = InnerCompactionPlannerServiceClient(
+            std::sync::Arc::new(instance),
+        );
+        self.build_from_inner_client(inner_client)
+    }
+    pub fn build_from_channel(
+        self,
+        addr: std::net::SocketAddr,
+        channel: tonic::transport::Channel,
+        max_message_size: bytesize::ByteSize,
+        compression_encoding_opt: Option<tonic::codec::CompressionEncoding>,
+    ) -> CompactionPlannerServiceClient {
+        let client = CompactionPlannerServiceClient::from_channel(
+            addr,
+            channel,
+            max_message_size,
+            compression_encoding_opt,
+        );
+        let inner_client = client.inner;
+        self.build_from_inner_client(inner_client)
+    }
+    pub fn build_from_balance_channel(
+        self,
+        balance_channel: quickwit_common::tower::BalanceChannel<std::net::SocketAddr>,
+        max_message_size: bytesize::ByteSize,
+        compression_encoding_opt: Option<tonic::codec::CompressionEncoding>,
+    ) -> CompactionPlannerServiceClient {
+        let client = CompactionPlannerServiceClient::from_balance_channel(
+            balance_channel,
+            max_message_size,
+            compression_encoding_opt,
+        );
+        let inner_client = client.inner;
+        self.build_from_inner_client(inner_client)
+    }
+    pub fn build_from_mailbox<A>(
+        self,
+        mailbox: quickwit_actors::Mailbox<A>,
+    ) -> CompactionPlannerServiceClient
+    where
+        A: quickwit_actors::Actor + std::fmt::Debug + Send + 'static,
+        CompactionPlannerServiceMailbox<A>: CompactionPlannerService,
+    {
+        let inner_client = InnerCompactionPlannerServiceClient(
+            std::sync::Arc::new(CompactionPlannerServiceMailbox::new(mailbox)),
+        );
+        self.build_from_inner_client(inner_client)
+    }
+    #[cfg(any(test, feature = "testsuite"))]
+    pub fn build_from_mock(
+        self,
+        mock: MockCompactionPlannerService,
+    ) -> CompactionPlannerServiceClient {
+        let client = CompactionPlannerServiceClient::from_mock(mock);
+        let inner_client = client.inner;
+        self.build_from_inner_client(inner_client)
+    }
+    fn build_from_inner_client(
+        self,
+        inner_client: InnerCompactionPlannerServiceClient,
+    ) -> CompactionPlannerServiceClient {
+        let report_status_svc = self
+            .report_status_layers
+            .into_iter()
+            .rev()
+            .fold(
+                quickwit_common::tower::BoxService::new(inner_client.clone()),
+                |svc, layer| layer.layer(svc),
+            );
+        let tower_svc_stack = CompactionPlannerServiceTowerServiceStack {
+            inner: inner_client,
+            report_status_svc,
+        };
+        CompactionPlannerServiceClient::new(tower_svc_stack)
+    }
+}
+#[derive(Debug, Clone)]
+struct MailboxAdapter<A: quickwit_actors::Actor, E> {
+    inner: quickwit_actors::Mailbox<A>,
+    phantom: std::marker::PhantomData<E>,
+}
+impl<A, E> std::ops::Deref for MailboxAdapter<A, E>
+where
+    A: quickwit_actors::Actor,
+{
+    type Target = quickwit_actors::Mailbox<A>;
+    fn deref(&self) -> &Self::Target {
+        &self.inner
+    }
+}
+#[derive(Debug)]
+pub struct CompactionPlannerServiceMailbox<A: quickwit_actors::Actor> {
+    inner: MailboxAdapter<A, crate::compaction::CompactionError>,
+}
+impl<A: quickwit_actors::Actor> CompactionPlannerServiceMailbox<A> {
+    pub fn new(instance: quickwit_actors::Mailbox<A>) -> Self {
+        let inner = MailboxAdapter {
+            inner: instance,
+            phantom: std::marker::PhantomData,
+        };
+        Self { inner }
+    }
+}
+impl<A: quickwit_actors::Actor> Clone for CompactionPlannerServiceMailbox<A> {
+    fn clone(&self) -> Self {
+        let inner = MailboxAdapter {
+            inner: self.inner.clone(),
+            phantom: std::marker::PhantomData,
+        };
+        Self { inner }
+    }
+}
+impl<A, M, T, E> tower::Service<M> for CompactionPlannerServiceMailbox<A>
+where
+    A: quickwit_actors::Actor
+        + quickwit_actors::DeferableReplyHandler<M, Reply = Result<T, E>> + Send
+        + 'static,
+    M: std::fmt::Debug + Send + 'static,
+    T: Send + 'static,
+    E: std::fmt::Debug + Send + 'static,
+    crate::compaction::CompactionError: From<quickwit_actors::AskError<E>>,
+{
+    type Response = T;
+    type Error = crate::compaction::CompactionError;
+    type Future = BoxFuture<Self::Response, Self::Error>;
+    fn poll_ready(
+        &mut self,
+        _cx: &mut std::task::Context<'_>,
+    ) -> std::task::Poll<Result<(), Self::Error>> {
+        //! This does not work with balance middlewares such as `tower::balance::pool::Pool` because
+        //! this always returns `Poll::Ready`. The fix is to acquire a permit from the
+        //! mailbox in `poll_ready` and consume it in `call`.
+        std::task::Poll::Ready(Ok(()))
+    }
+    fn call(&mut self, message: M) -> Self::Future {
+        let mailbox = self.inner.clone();
+        let fut = async move {
+            mailbox.ask_for_res(message).await.map_err(|error| error.into())
+        };
+        Box::pin(fut)
+    }
+}
+#[async_trait::async_trait]
+impl<A> CompactionPlannerService for CompactionPlannerServiceMailbox<A>
+where
+    A: quickwit_actors::Actor + std::fmt::Debug,
+    CompactionPlannerServiceMailbox<
+        A,
+    >: tower::Service<
+        ReportStatusRequest,
+        Response = ReportStatusResponse,
+        Error = crate::compaction::CompactionError,
+        Future = BoxFuture<ReportStatusResponse, crate::compaction::CompactionError>,
+    >,
+{
+    async fn report_status(
+        &self,
+        request: ReportStatusRequest,
+    ) -> crate::compaction::CompactionResult<ReportStatusResponse> {
+        self.clone().call(request).await
+    }
+}
+#[derive(Debug, Clone)]
+pub struct CompactionPlannerServiceGrpcClientAdapter<T> {
+    inner: T,
+    #[allow(dead_code)]
+    connection_addrs_rx: tokio::sync::watch::Receiver<
+        std::collections::HashSet<std::net::SocketAddr>,
+    >,
+}
+impl<T> CompactionPlannerServiceGrpcClientAdapter<T> {
+    pub fn new(
+        instance: T,
+        connection_addrs_rx: tokio::sync::watch::Receiver<
+            std::collections::HashSet<std::net::SocketAddr>,
+        >,
+    ) -> Self {
+        Self {
+            inner: instance,
+            connection_addrs_rx,
+        }
+    }
+}
+#[async_trait::async_trait]
+impl<T> CompactionPlannerService
+for CompactionPlannerServiceGrpcClientAdapter<
+    compaction_planner_service_grpc_client::CompactionPlannerServiceGrpcClient<T>,
+>
+where
+    T: tonic::client::GrpcService<tonic::body::Body> + std::fmt::Debug + Clone + Send
+        + Sync + 'static,
+    T::ResponseBody: tonic::codegen::Body<Data = tonic::codegen::Bytes> + Send + 'static,
+    <T::ResponseBody as tonic::codegen::Body>::Error: Into<tonic::codegen::StdError>
+        + Send,
+    T::Future: Send,
+{
+    async fn report_status(
+        &self,
+        request: ReportStatusRequest,
+    ) -> crate::compaction::CompactionResult<ReportStatusResponse> {
+        let mut tonic_request = tonic::Request::new(request);
+        quickwit_common::tracing_utils::inject_current_context(
+            tonic_request.metadata_mut(),
+        );
+        self.inner
+            .clone()
+            .report_status(tonic_request)
+            .await
+            .map(|response| response.into_inner())
+            .map_err(|status| crate::error::grpc_status_to_service_error(
+                status,
+                ReportStatusRequest::rpc_name(),
+            ))
+    }
+}
+#[derive(Debug)]
+pub struct CompactionPlannerServiceGrpcServerAdapter {
+    inner: InnerCompactionPlannerServiceClient,
+}
+impl CompactionPlannerServiceGrpcServerAdapter {
+    pub fn new<T>(instance: T) -> Self
+    where
+        T: CompactionPlannerService,
+    {
+        Self {
+            inner: InnerCompactionPlannerServiceClient(std::sync::Arc::new(instance)),
+        }
+    }
+}
+#[async_trait::async_trait]
+impl compaction_planner_service_grpc_server::CompactionPlannerServiceGrpc
+for CompactionPlannerServiceGrpcServerAdapter {
+    async fn report_status(
+        &self,
+        tonic_request: tonic::Request<ReportStatusRequest>,
+    ) -> Result<tonic::Response<ReportStatusResponse>, tonic::Status> {
+        let parent_context = quickwit_common::tracing_utils::extract_context(
+            tonic_request.metadata(),
+        );
+        let request = tonic_request.into_inner();
+        let span = tracing::info_span!("compaction.report_status");
+        let _ = <tracing::Span as tracing_opentelemetry::OpenTelemetrySpanExt>::set_parent(
+            &span,
+            parent_context,
+        );
+        let fut = async move {
+            self.inner
+                .0
+                .report_status(request)
+                .await
+                .map(tonic::Response::new)
+                .map_err(crate::error::grpc_error_to_grpc_status)
+        };
+        <_ as tracing::Instrument>::instrument(fut, span).await
+    }
+}
+/// Generated client implementations.
+pub mod compaction_planner_service_grpc_client {
+    #![allow(
+        unused_variables,
+        dead_code,
+        missing_docs,
+        clippy::wildcard_imports,
+        clippy::let_unit_value,
+    )]
+    use tonic::codegen::*;
+    use tonic::codegen::http::Uri;
+    #[derive(Debug, Clone)]
+    pub struct CompactionPlannerServiceGrpcClient<T> {
+        inner: tonic::client::Grpc<T>,
+    }
+    impl CompactionPlannerServiceGrpcClient<tonic::transport::Channel> {
+        /// Attempt to create a new client by connecting to a given endpoint.
+        pub async fn connect<D>(dst: D) -> Result<Self, tonic::transport::Error>
+        where
+            D: TryInto<tonic::transport::Endpoint>,
+            D::Error: Into<StdError>,
+        {
+            let conn = tonic::transport::Endpoint::new(dst)?.connect().await?;
+            Ok(Self::new(conn))
+        }
+    }
+    impl<T> CompactionPlannerServiceGrpcClient<T>
+    where
+        T: tonic::client::GrpcService<tonic::body::Body>,
+        T::Error: Into<StdError>,
+        T::ResponseBody: Body<Data = Bytes> + std::marker::Send + 'static,
+        <T::ResponseBody as Body>::Error: Into<StdError> + std::marker::Send,
+    {
+        pub fn new(inner: T) -> Self {
+            let inner = tonic::client::Grpc::new(inner);
+            Self { inner }
+        }
+        pub fn with_origin(inner: T, origin: Uri) -> Self {
+            let inner = tonic::client::Grpc::with_origin(inner, origin);
+            Self { inner }
+        }
+        pub fn with_interceptor<F>(
+            inner: T,
+            interceptor: F,
+        ) -> CompactionPlannerServiceGrpcClient<InterceptedService<T, F>>
+        where
+            F: tonic::service::Interceptor,
+            T::ResponseBody: Default,
+            T: tonic::codegen::Service<
+                http::Request<tonic::body::Body>,
+                Response = http::Response<
+                    <T as tonic::client::GrpcService<tonic::body::Body>>::ResponseBody,
+                >,
+            >,
+            <T as tonic::codegen::Service<
+                http::Request<tonic::body::Body>,
+            >>::Error: Into<StdError> + std::marker::Send + std::marker::Sync,
+        {
+            CompactionPlannerServiceGrpcClient::new(
+                InterceptedService::new(inner, interceptor),
+            )
+        }
+        /// Compress requests with the given encoding.
+        ///
+        /// This requires the server to support it otherwise it might respond with an
+        /// error.
+        #[must_use]
+        pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self {
+            self.inner = self.inner.send_compressed(encoding);
+            self
+        }
+        /// Enable decompressing responses.
+        #[must_use]
+        pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self {
+            self.inner = self.inner.accept_compressed(encoding);
+            self
+        }
+        /// Limits the maximum size of a decoded message.
+        ///
+        /// Default: `4MB`
+        #[must_use]
+        pub fn max_decoding_message_size(mut self, limit: usize) -> Self {
+            self.inner = self.inner.max_decoding_message_size(limit);
+            self
+        }
+        /// Limits the maximum size of an encoded message.
+        ///
+        /// Default: `usize::MAX`
+        #[must_use]
+        pub fn max_encoding_message_size(mut self, limit: usize) -> Self {
+            self.inner = self.inner.max_encoding_message_size(limit);
+            self
+        }
+        pub async fn report_status(
+            &mut self,
+            request: impl tonic::IntoRequest<super::ReportStatusRequest>,
+        ) -> std::result::Result<
+            tonic::Response<super::ReportStatusResponse>,
+            tonic::Status,
+        > {
+            self.inner
+                .ready()
+                .await
+                .map_err(|e| {
+                    tonic::Status::unknown(
+                        format!("Service was not ready: {}", e.into()),
+                    )
+                })?;
+            let codec = tonic_prost::ProstCodec::default();
+            let path = http::uri::PathAndQuery::from_static(
+                "/quickwit.compaction.CompactionPlannerService/ReportStatus",
+            );
+            let mut req = request.into_request();
+            req.extensions_mut()
+                .insert(
+                    GrpcMethod::new(
+                        "quickwit.compaction.CompactionPlannerService",
+                        "ReportStatus",
+                    ),
+                );
+            self.inner.unary(req, path, codec).await
+        }
+    }
+}
+/// Generated server implementations.
+pub mod compaction_planner_service_grpc_server {
+    #![allow(
+        unused_variables,
+        dead_code,
+        missing_docs,
+        clippy::wildcard_imports,
+        clippy::let_unit_value,
+    )]
+    use tonic::codegen::*;
+    /// Generated trait containing gRPC methods that should be implemented for use with CompactionPlannerServiceGrpcServer.
+    #[async_trait]
+    pub trait CompactionPlannerServiceGrpc: std::marker::Send + std::marker::Sync + 'static {
+        async fn report_status(
+            &self,
+            request: tonic::Request<super::ReportStatusRequest>,
+        ) -> std::result::Result<
+            tonic::Response<super::ReportStatusResponse>,
+            tonic::Status,
+        >;
+    }
+    #[derive(Debug)]
+    pub struct CompactionPlannerServiceGrpcServer<T> {
+        inner: Arc<T>,
+        accept_compression_encodings: EnabledCompressionEncodings,
+        send_compression_encodings: EnabledCompressionEncodings,
+        max_decoding_message_size: Option<usize>,
+        max_encoding_message_size: Option<usize>,
+    }
+    impl<T> CompactionPlannerServiceGrpcServer<T> {
+        pub fn new(inner: T) -> Self {
+            Self::from_arc(Arc::new(inner))
+        }
+        pub fn from_arc(inner: Arc<T>) -> Self {
+            Self {
+                inner,
+                accept_compression_encodings: Default::default(),
+                send_compression_encodings: Default::default(),
+                max_decoding_message_size: None,
+                max_encoding_message_size: None,
+            }
+        }
+        pub fn with_interceptor<F>(
+            inner: T,
+            interceptor: F,
+        ) -> InterceptedService<Self, F>
+        where
+            F: tonic::service::Interceptor,
+        {
+            InterceptedService::new(Self::new(inner), interceptor)
+        }
+        /// Enable decompressing requests with the given encoding.
+        #[must_use]
+        pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self {
+            self.accept_compression_encodings.enable(encoding);
+            self
+        }
+        /// Compress responses with the given encoding, if the client supports it.
+        #[must_use]
+        pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self {
+            self.send_compression_encodings.enable(encoding);
+            self
+        }
+        /// Limits the maximum size of a decoded message.
+        ///
+        /// Default: `4MB`
+        #[must_use]
+        pub fn max_decoding_message_size(mut self, limit: usize) -> Self {
+            self.max_decoding_message_size = Some(limit);
+            self
+        }
+        /// Limits the maximum size of an encoded message.
+        ///
+        /// Default: `usize::MAX`
+        #[must_use]
+        pub fn max_encoding_message_size(mut self, limit: usize) -> Self {
+            self.max_encoding_message_size = Some(limit);
+            self
+        }
+    }
+    impl<T, B> tonic::codegen::Service<http::Request<B>>
+    for CompactionPlannerServiceGrpcServer<T>
+    where
+        T: CompactionPlannerServiceGrpc,
+        B: Body + std::marker::Send + 'static,
+        B::Error: Into<StdError> + std::marker::Send + 'static,
+    {
+        type Response = http::Response<tonic::body::Body>;
+        type Error = std::convert::Infallible;
+        type Future = BoxFuture<Self::Response, Self::Error>;
+        fn poll_ready(
+            &mut self,
+            _cx: &mut Context<'_>,
+        ) -> Poll<std::result::Result<(), Self::Error>> {
+            Poll::Ready(Ok(()))
+        }
+        fn call(&mut self, req: http::Request<B>) -> Self::Future {
+            match req.uri().path() {
+                "/quickwit.compaction.CompactionPlannerService/ReportStatus" => {
+                    #[allow(non_camel_case_types)]
+                    struct ReportStatusSvc<T: CompactionPlannerServiceGrpc>(pub Arc<T>);
+                    impl<
+                        T: CompactionPlannerServiceGrpc,
+                    > tonic::server::UnaryService<super::ReportStatusRequest>
+                    for ReportStatusSvc<T> {
+                        type Response = super::ReportStatusResponse;
+                        type Future = BoxFuture<
+                            tonic::Response<Self::Response>,
+                            tonic::Status,
+                        >;
+                        fn call(
+                            &mut self,
+                            request: tonic::Request<super::ReportStatusRequest>,
+                        ) -> Self::Future {
+                            let inner = Arc::clone(&self.0);
+                            let fut = async move {
+                                <T as CompactionPlannerServiceGrpc>::report_status(
+                                        &inner,
+                                        request,
+                                    )
+                                    .await
+                            };
+                            Box::pin(fut)
+                        }
+                    }
+                    let accept_compression_encodings = self.accept_compression_encodings;
+                    let send_compression_encodings = self.send_compression_encodings;
+                    let max_decoding_message_size = self.max_decoding_message_size;
+                    let max_encoding_message_size = self.max_encoding_message_size;
+                    let inner = self.inner.clone();
+                    let fut = async move {
+                        let method = ReportStatusSvc(inner);
+                        let codec = tonic_prost::ProstCodec::default();
+                        let mut grpc = tonic::server::Grpc::new(codec)
+                            .apply_compression_config(
+                                accept_compression_encodings,
+                                send_compression_encodings,
+                            )
+                            .apply_max_message_size_config(
+                                max_decoding_message_size,
+                                max_encoding_message_size,
+                            );
+                        let res = grpc.unary(method, req).await;
+                        Ok(res)
+                    };
+                    Box::pin(fut)
+                }
+                _ => {
+                    Box::pin(async move {
+                        let mut response = http::Response::new(
+                            tonic::body::Body::default(),
+                        );
+                        let headers = response.headers_mut();
+                        headers
+                            .insert(
+                                tonic::Status::GRPC_STATUS,
+                                (tonic::Code::Unimplemented as i32).into(),
+                            );
+                        headers
+                            .insert(
+                                http::header::CONTENT_TYPE,
+                                tonic::metadata::GRPC_CONTENT_TYPE,
+                            );
+                        Ok(response)
+                    })
+                }
+            }
+        }
+    }
+    impl<T> Clone for CompactionPlannerServiceGrpcServer<T> {
+        fn clone(&self) -> Self {
+            let inner = self.inner.clone();
+            Self {
+                inner,
+                accept_compression_encodings: self.accept_compression_encodings,
+                send_compression_encodings: self.send_compression_encodings,
+                max_decoding_message_size: self.max_decoding_message_size,
+                max_encoding_message_size: self.max_encoding_message_size,
+            }
+        }
+    }
+    /// Generated gRPC service name
+    pub const SERVICE_NAME: &str = "quickwit.compaction.CompactionPlannerService";
+    impl<T> tonic::server::NamedService for CompactionPlannerServiceGrpcServer<T> {
+        const NAME: &'static str = SERVICE_NAME;
+    }
+}
diff --git a/quickwit/quickwit-proto/src/compaction/mod.rs b/quickwit/quickwit-proto/src/compaction/mod.rs
new file mode 100644
index 00000000000..0eee7adb6b0
--- /dev/null
+++ b/quickwit/quickwit-proto/src/compaction/mod.rs
@@ -0,0 +1,91 @@
+// Copyright 2021-Present Datadog, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use quickwit_actors::AskError;
+use quickwit_common::rate_limited_error;
+use quickwit_common::tower::MakeLoadShedError;
+use serde::{Deserialize, Serialize};
+
+use crate::GrpcServiceError;
+use crate::error::{ServiceError, ServiceErrorCode};
+
+include!("../codegen/quickwit/quickwit.compaction.rs");
+
+pub const COMPACTION_FILE_DESCRIPTOR_SET: &[u8] =
+    include_bytes!("../codegen/quickwit/compaction_descriptor.bin");
+
+pub type CompactionResult<T> = std::result::Result<T, CompactionError>;
+
+#[derive(Debug, thiserror::Error, Eq, PartialEq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum CompactionError {
+    #[error("internal error: {0}")]
+    Internal(String),
+    #[error("request timed out: {0}")]
+    Timeout(String),
+    #[error("too many requests")]
+    TooManyRequests,
+    #[error("service unavailable: {0}")]
+    Unavailable(String),
+}
+
+impl ServiceError for CompactionError {
+    fn error_code(&self) -> ServiceErrorCode {
+        match self {
+            Self::Internal(err_msg) => {
+                rate_limited_error!(limit_per_min = 6, "compaction error: {err_msg}");
+                ServiceErrorCode::Internal
+            }
+            Self::Timeout(_) => ServiceErrorCode::Timeout,
+            Self::TooManyRequests => ServiceErrorCode::TooManyRequests,
+            Self::Unavailable(_) => ServiceErrorCode::Unavailable,
+        }
+    }
+}
+
+// Required by the codegen tower layers. All four constructors are mandatory.
+impl GrpcServiceError for CompactionError {
+    fn new_internal(message: String) -> Self {
+        Self::Internal(message)
+    }
+    fn new_timeout(message: String) -> Self {
+        Self::Timeout(message)
+    }
+    fn new_too_many_requests() -> Self {
+        Self::TooManyRequests
+    }
+    fn new_unavailable(message: String) -> Self {
+        Self::Unavailable(message)
+    }
+}
+
+impl MakeLoadShedError for CompactionError {
+    fn make_load_shed_error() -> Self {
+        CompactionError::TooManyRequests
+    }
+}
+
+impl From<AskError<CompactionError>> for CompactionError {
+    fn from(error: AskError<CompactionError>) -> Self {
+        match error {
+            AskError::ErrorReply(error) => error,
+            AskError::MessageNotDelivered => {
+                Self::new_unavailable("request could not be delivered to actor".to_string())
+            }
+            AskError::ProcessMessageError => {
+                Self::new_internal("an error occurred while processing the request".to_string())
+            }
+        }
+    }
+}
diff --git a/quickwit/quickwit-proto/src/indexing/mod.rs b/quickwit/quickwit-proto/src/indexing/mod.rs
index bf0d147b685..281bc73e7df 100644
--- a/quickwit/quickwit-proto/src/indexing/mod.rs
+++ b/quickwit/quickwit-proto/src/indexing/mod.rs
@@ -138,6 +138,7 @@ impl Display for IndexingPipelineId {
 /// Uniquely identifies a merge pipeline. There exists at most one merge pipeline per
 /// `(index_uid, source_id)` running on indexer at any given time fed by one or more indexing
 /// pipelines.
+/// TODO: Rework/remove this as part of splitting up merges.
 #[derive(Clone, Debug, Hash, Eq, PartialEq)]
 pub struct MergePipelineId {
     pub node_id: NodeId,
diff --git a/quickwit/quickwit-proto/src/lib.rs b/quickwit/quickwit-proto/src/lib.rs
index 428387c6353..fb21c378a18 100644
--- a/quickwit/quickwit-proto/src/lib.rs
+++ b/quickwit/quickwit-proto/src/lib.rs
@@ -21,6 +21,7 @@
 use std::cmp::Ordering;
 
 pub mod cluster;
+pub mod compaction;
 pub mod control_plane;
 pub use bytes;
 pub use tonic;
diff --git a/quickwit/quickwit-search/src/leaf.rs b/quickwit/quickwit-search/src/leaf.rs
index 8bf45a20aba..6e78cf48c2b 100644
--- a/quickwit/quickwit-search/src/leaf.rs
+++ b/quickwit/quickwit-search/src/leaf.rs
@@ -41,7 +41,7 @@ use quickwit_query::query_ast::{
 };
 use quickwit_query::tokenizers::TokenizerManager;
 use quickwit_storage::{
-    BundleStorage, ByteRangeCache, CountingStorage, MemorySizedCache, OwnedBytes, SplitCache,
+    BundleStorage, ByteRangeCache, CountingStorage, MemorySizedCache, OwnedBytes, SearchSplitCache,
     Storage, StorageResolver, TimeoutAndRetryStorage, wrap_storage_with_cache,
 };
 use tantivy::aggregation::AggContextParams;
@@ -174,7 +174,7 @@ pub(crate) async fn open_split_bundle(
     // This is before the bundle storage: at this point, this storage is reading `.split` files.
     let index_storage_with_split_cache =
         if let Some(split_cache) = searcher_context.split_cache_opt.as_ref() {
-            SplitCache::wrap_storage(split_cache.clone(), index_storage.clone())
+            SearchSplitCache::wrap_storage(split_cache.clone(), index_storage.clone())
         } else {
             index_storage.clone()
         };
diff --git a/quickwit/quickwit-search/src/service.rs b/quickwit/quickwit-search/src/service.rs
index 930ae5096fb..2d340a0dd16 100644
--- a/quickwit/quickwit-search/src/service.rs
+++ b/quickwit/quickwit-search/src/service.rs
@@ -29,7 +29,7 @@ use quickwit_proto::search::{
     SearchPlanResponse, SearchRequest, SearchResponse, SnippetRequest,
 };
 use quickwit_storage::{
-    MemorySizedCache, QuickwitCache, SplitCache, StorageCache, StorageResolver,
+    MemorySizedCache, QuickwitCache, SearchSplitCache, StorageCache, StorageResolver,
 };
 use tantivy::aggregation::AggregationLimitsGuard;
 
@@ -417,7 +417,7 @@ pub struct SearcherContext {
     /// Per-split and per-predicate cache.
     pub predicate_cache: Arc<PredicateCacheImpl>,
     /// Search split cache. `None` if no split cache is configured.
-    pub split_cache_opt: Option<Arc<SplitCache>>,
+    pub split_cache_opt: Option<Arc<SearchSplitCache>>,
     /// List fields cache. Caches the raw fields-metadata blob for a given split.
     pub list_fields_cache: ListFieldsCache,
     /// The aggregation limits are passed to limit the memory usage.
@@ -446,7 +446,7 @@ impl SearcherContext {
     /// Creates a new searcher context without a lambda invoker.
     pub fn new_without_invoker(
         searcher_config: SearcherConfig,
-        split_cache_opt: Option<Arc<SplitCache>>,
+        split_cache_opt: Option<Arc<SearchSplitCache>>,
     ) -> Self {
         Self::new(
             searcher_config,
@@ -458,7 +458,7 @@ impl SearcherContext {
     /// Creates a new searcher context, given a searcher config, and an optional `SplitCache`.
     pub fn new(
         searcher_config: SearcherConfig,
-        split_cache_opt: Option<Arc<SplitCache>>,
+        split_cache_opt: Option<Arc<SearchSplitCache>>,
         lambda_invoker: Option<impl LambdaLeafSearchInvoker + 'static>,
     ) -> Self {
         let global_split_footer_cache = MemorySizedCache::from_config(
diff --git a/quickwit/quickwit-serve/Cargo.toml b/quickwit/quickwit-serve/Cargo.toml
index bd3ee2006a0..0d892796fef 100644
--- a/quickwit/quickwit-serve/Cargo.toml
+++ b/quickwit/quickwit-serve/Cargo.toml
@@ -68,6 +68,7 @@ quickwit-datafusion = { workspace = true, optional = true }
 quickwit-doc-mapper = { workspace = true }
 quickwit-index-management = { workspace = true }
 quickwit-indexing = { workspace = true }
+quickwit-compaction = { workspace = true }
 quickwit-ingest = { workspace = true }
 quickwit-jaeger = { workspace = true }
 quickwit-janitor = { workspace = true }
diff --git a/quickwit/quickwit-serve/src/grpc.rs b/quickwit/quickwit-serve/src/grpc.rs
index e7bf1245792..5107ad8ea79 100644
--- a/quickwit/quickwit-serve/src/grpc.rs
+++ b/quickwit/quickwit-serve/src/grpc.rs
@@ -141,6 +141,21 @@ pub(crate) async fn start_grpc_server(
         None
     };
 
+    // Mount gRPC compaction service if this node is a janitor with the compaction service enabled.
+    let compaction_grpc_service = if services
+        .node_config
+        .is_service_enabled(QuickwitService::Janitor)
+    {
+        if let Some(compaction_service) = &services.compaction_service_client_opt {
+            enabled_grpc_services.insert("compaction");
+            file_descriptor_sets.push(quickwit_proto::compaction::COMPACTION_FILE_DESCRIPTOR_SET);
+            Some(compaction_service.as_grpc_service(grpc_config.max_message_size))
+        } else {
+            None
+        }
+    } else {
+        None
+    };
     // Mount gRPC control plane service if `QuickwitService::ControlPlane` is enabled on node.
     let control_plane_grpc_service = if services
         .node_config
@@ -250,6 +265,7 @@ pub(crate) async fn start_grpc_server(
         .add_service(developer_grpc_service)
         .add_service(health_service)
         .add_service(reflection_service)
+        .add_optional_service(compaction_grpc_service)
         .add_optional_service(control_plane_grpc_service)
         .add_optional_service(indexing_grpc_service)
         .add_optional_service(ingest_api_grpc_service)
diff --git a/quickwit/quickwit-serve/src/lib.rs b/quickwit/quickwit-serve/src/lib.rs
index 04ab93cf2e7..f2c9c5659b3 100644
--- a/quickwit/quickwit-serve/src/lib.rs
+++ b/quickwit/quickwit-serve/src/lib.rs
@@ -59,7 +59,7 @@ pub(crate) use decompression::Body;
 pub use format::BodyFormat;
 use futures::StreamExt;
 use itertools::Itertools;
-use quickwit_actors::{ActorExitStatus, Mailbox, SpawnContext, Universe};
+use quickwit_actors::{ActorExitStatus, ActorHandle, Mailbox, SpawnContext, Universe};
 use quickwit_cluster::{
     Cluster, ClusterChange, ClusterChangeStream, ClusterNode, ListenerHandle, start_cluster_service,
 };
@@ -74,14 +74,16 @@ use quickwit_common::tower::{
 };
 use quickwit_common::uri::Uri;
 use quickwit_common::{get_bool_from_env, spawn_named_task};
+use quickwit_compaction::planner::CompactionPlanner;
+use quickwit_compaction::{CompactorSupervisor, start_compactor_service};
 use quickwit_config::service::QuickwitService;
 use quickwit_config::{ClusterConfig, IngestApiConfig, NodeConfig};
 use quickwit_control_plane::control_plane::{ControlPlane, ControlPlaneEventSubscriber};
 use quickwit_control_plane::{IndexerNodeInfo, IndexerPool};
 use quickwit_index_management::{IndexService as IndexManager, IndexServiceError};
-use quickwit_indexing::actors::IndexingService;
+use quickwit_indexing::actors::{IndexingService, MergeSchedulerService};
 use quickwit_indexing::models::ShardPositionsService;
-use quickwit_indexing::start_indexing_service;
+use quickwit_indexing::{IndexingSplitCache, start_indexing_service};
 use quickwit_ingest::{
     GetMemoryCapacity, IngestRequest, IngestRouter, IngestServiceClient, Ingester, IngesterPool,
     IngesterPoolEntry, LocalShardsUpdate, get_idle_shard_timeout,
@@ -95,6 +97,7 @@ use quickwit_metastore::{
     ControlPlaneMetastore, ListIndexesMetadataResponseExt, MetastoreResolver,
 };
 use quickwit_opentelemetry::otlp::{OtlpGrpcLogsService, OtlpGrpcTracesService};
+use quickwit_proto::compaction::CompactionPlannerServiceClient;
 use quickwit_proto::control_plane::ControlPlaneServiceClient;
 use quickwit_proto::indexing::{IndexingServiceClient, ShardPositionsUpdate};
 use quickwit_proto::ingest::ingester::{
@@ -113,7 +116,7 @@ use quickwit_search::{
     SearchJobPlacer, SearchService, SearchServiceClient, SearcherContext, SearcherPool,
     create_search_client_from_channel, start_searcher_service,
 };
-use quickwit_storage::{SplitCache, StorageResolver};
+use quickwit_storage::{SearchSplitCache, StorageResolver};
 pub use quickwit_telemetry_exporters::{EnvFilterReloadFn, do_nothing_env_filter_reload_fn};
 use tcp_listener::TcpListenerResolver;
 use tokio::sync::oneshot;
@@ -140,6 +143,12 @@ const READINESS_REPORTING_INTERVAL: Duration = if cfg!(any(test, feature = "test
     Duration::from_secs(10)
 };
 
+const COMPACTION_SERVICE_DISCOVERY_TIMEOUT: Duration = if cfg!(any(test, feature = "testsuite")) {
+    Duration::from_millis(100)
+} else {
+    Duration::from_secs(300)
+};
+
 const METASTORE_CLIENT_MAX_CONCURRENCY_ENV_KEY: &str = "QW_METASTORE_CLIENT_MAX_CONCURRENCY";
 const DEFAULT_METASTORE_CLIENT_MAX_CONCURRENCY: usize = 6;
 const DISABLE_DELETE_TASK_SERVICE_ENV_KEY: &str = "QW_DISABLE_DELETE_TASK_SERVICE";
@@ -192,6 +201,8 @@ struct QuickwitServices {
     pub ingest_router_service: IngestRouterServiceClient,
     ingester_opt: Option<Ingester>,
 
+    pub compaction_service_client_opt: Option<CompactionPlannerServiceClient>,
+    pub _compactor_supervisor_opt: Option<Mailbox<CompactorSupervisor>>,
     pub janitor_service_opt: Option<Mailbox<JanitorService>>,
     pub jaeger_service_opt: Option<JaegerService>,
     pub otlp_logs_service_opt: Option<OtlpGrpcLogsService>,
@@ -265,6 +276,71 @@ async fn balance_channel_for_service(
     BalanceChannel::from_stream(service_change_stream)
 }
 
+/// Builds a `CompactionPlannerServiceClient` if standalone compactors are enabled
+/// and the node runs the janitor or compactor.
+///
+/// On janitor nodes, spawns a `CompactionPlanner` actor and builds the client from
+/// its mailbox. On compactor-only nodes, connects to a remote janitor via gRPC.
+///
+/// The second tuple element is the local planner's `ActorHandle`, returned only
+/// on janitor nodes so the caller can attach it to the janitor liveness probe.
+async fn get_compaction_planner_client_if_needed(
+    node_config: &NodeConfig,
+    cluster: &Cluster,
+    universe: &Universe,
+    metastore_client: &MetastoreServiceClient,
+) -> anyhow::Result<(
+    Option<CompactionPlannerServiceClient>,
+    Option<ActorHandle<CompactionPlanner>>,
+)> {
+    if !node_config.indexer_config.enable_standalone_compactors {
+        return Ok((None, None));
+    }
+    let is_janitor = node_config.is_service_enabled(QuickwitService::Janitor);
+    let is_compactor = node_config.is_service_enabled(QuickwitService::Compactor);
+    if !is_janitor && !is_compactor {
+        return Ok((None, None));
+    }
+    if is_janitor {
+        let planner = CompactionPlanner::new(metastore_client.clone());
+        let (mailbox, handle) = universe.spawn_builder().spawn(planner);
+        info!("compaction planner actor started on janitor node");
+        return Ok((
+            Some(CompactionPlannerServiceClient::from_mailbox(mailbox)),
+            Some(handle),
+        ));
+    }
+    // Compactor-only node: connect to the planner on a remote janitor.
+    let balance_channel = balance_channel_for_service(cluster, QuickwitService::Janitor).await;
+    let found = balance_channel
+        .wait_for(COMPACTION_SERVICE_DISCOVERY_TIMEOUT, |connections| {
+            !connections.is_empty()
+        })
+        .await;
+    if !found {
+        bail!("compactor is enabled but no janitor node was found in the cluster")
+    }
+    info!("remote compaction planner detected on janitor node");
+    Ok((
+        Some(CompactionPlannerServiceClient::from_balance_channel(
+            balance_channel,
+            node_config.grpc_config.max_message_size,
+            None,
+        )),
+        None,
+    ))
+}
+
+fn spawn_merge_scheduler_service(
+    universe: &Universe,
+    node_config: &NodeConfig,
+) -> Mailbox<MergeSchedulerService> {
+    let (mailbox, _) = universe.spawn_builder().spawn(MergeSchedulerService::new(
+        node_config.indexer_config.merge_concurrency.get(),
+    ));
+    mailbox
+}
+
 async fn start_ingest_client_if_needed(
     node_config: &NodeConfig,
     universe: &Universe,
@@ -535,7 +611,7 @@ pub async fn serve_quickwit(
     // Set up the "control plane proxy" for the metastore.
     let metastore_through_control_plane = MetastoreServiceClient::new(ControlPlaneMetastore::new(
         control_plane_client.clone(),
-        metastore_client,
+        metastore_client.clone(),
     ));
 
     // Setup ingest service v1.
@@ -543,7 +619,44 @@ pub async fn serve_quickwit(
         .await
         .context("failed to start ingest v1 service")?;
 
+    let (compaction_service_client_opt, compaction_planner_handle_opt) =
+        get_compaction_planner_client_if_needed(
+            &node_config,
+            &cluster,
+            &universe,
+            &metastore_client,
+        )
+        .await
+        .context("failed to initialize compaction service client")?;
+
+    // In the case where a compactor and indexer run on the same node (in a single node deployment,
+    // for example), they should share the split cache so that downloads/new splits end up in
+    // predictable locations. If there's only one service enabled, no harm, no foul.
+    let indexing_split_cache: Arc<IndexingSplitCache> = if node_config
+        .is_service_enabled(QuickwitService::Indexer)
+        && node_config.is_service_enabled(QuickwitService::Compactor)
+    {
+        let cache = IndexingSplitCache::from_config(
+            &node_config.indexer_config,
+            &node_config.data_dir_path,
+        )
+        .await?;
+        Arc::new(cache)
+    } else {
+        Arc::new(IndexingSplitCache::no_caching())
+    };
+
     let indexing_service_opt = if node_config.is_service_enabled(QuickwitService::Indexer) {
+        // if standalone compactors is enabled, indexing pipelines don't perform any merges.
+        // if standalone compactors is disabled, indexing pipelines perform all merges as before.
+        let merge_scheduler_mailbox_opt =
+            if !node_config.indexer_config.enable_standalone_compactors {
+                Some(spawn_merge_scheduler_service(&universe, &node_config))
+            } else {
+                None
+            };
+
+        let split_cache = indexing_split_cache.clone();
         let indexing_service = start_indexing_service(
             &universe,
             &node_config,
@@ -553,6 +666,8 @@ pub async fn serve_quickwit(
             ingester_pool.clone(),
             storage_resolver.clone(),
             event_broker.clone(),
+            merge_scheduler_mailbox_opt,
+            split_cache,
         )
         .await
         .context("failed to start indexing service")?;
@@ -623,15 +738,15 @@ pub async fn serve_quickwit(
         }
     }
 
-    let split_cache_opt: Option<Arc<SplitCache>> =
+    let search_split_cache_opt: Option<Arc<SearchSplitCache>> =
         if let Some(split_cache_limits) = node_config.searcher_config.split_cache {
-            let split_cache = SplitCache::with_root_path(
+            let search_split_cache = SearchSplitCache::with_root_path(
                 node_config.data_dir_path.join("searcher-split-cache"),
                 storage_resolver.clone(),
                 split_cache_limits,
             )
             .context("failed to load searcher split cache")?;
-            Some(split_cache)
+            Some(search_split_cache)
         } else {
             None
         };
@@ -647,7 +762,7 @@ pub async fn serve_quickwit(
                     quickwit_lambda_client::try_get_or_deploy_invoker(lambda_config).await?;
                 Arc::new(SearcherContext::new(
                     node_config.searcher_config.clone(),
-                    split_cache_opt,
+                    search_split_cache_opt,
                     Some(invoker),
                 ))
             }
@@ -659,13 +774,13 @@ pub async fn serve_quickwit(
         } else {
             Arc::new(SearcherContext::new_without_invoker(
                 node_config.searcher_config.clone(),
-                split_cache_opt,
+                search_split_cache_opt,
             ))
         }
     } else {
         Arc::new(SearcherContext::new_without_invoker(
             node_config.searcher_config.clone(),
-            split_cache_opt,
+            search_split_cache_opt,
         ))
     };
 
@@ -727,6 +842,7 @@ pub async fn serve_quickwit(
             storage_resolver.clone(),
             event_broker.clone(),
             !get_bool_from_env(DISABLE_DELETE_TASK_SERVICE_ENV_KEY, false),
+            compaction_planner_handle_opt,
         )
         .await
         .context("failed to start janitor service")?;
@@ -735,6 +851,36 @@ pub async fn serve_quickwit(
         None
     };
 
+    let compactor_supervisor_opt = if node_config.is_service_enabled(QuickwitService::Compactor)
+        && node_config.indexer_config.enable_standalone_compactors
+    {
+        let compaction_dir = node_config.data_dir_path.join("compaction");
+        fs::create_dir_all(&compaction_dir)?;
+        let compaction_root_directory = quickwit_common::temp_dir::Builder::default()
+            .tempdir_in(&compaction_dir)
+            .context("failed to create compaction temp directory")?;
+        let compaction_client = compaction_service_client_opt
+            .clone()
+            .expect("compactor service enabled but no compaction client available");
+        let split_cache = indexing_split_cache.clone();
+        let compactor_mailbox = start_compactor_service(
+            &universe,
+            cluster.self_node_id().into(),
+            compaction_client,
+            &node_config.compactor_config,
+            metastore_client.clone(),
+            storage_resolver.clone(),
+            split_cache,
+            event_broker.clone(),
+            compaction_root_directory,
+        )
+        .await
+        .context("failed to start compactor service")?;
+        Some(compactor_mailbox)
+    } else {
+        None
+    };
+
     let jaeger_service_opt = if node_config.jaeger_config.enable_endpoint
         && node_config.is_service_enabled(QuickwitService::Searcher)
     {
@@ -783,6 +929,8 @@ pub async fn serve_quickwit(
         ingest_router_service,
         ingest_service,
         ingester_opt: ingester_opt.clone(),
+        compaction_service_client_opt,
+        _compactor_supervisor_opt: compactor_supervisor_opt,
         janitor_service_opt,
         jaeger_service_opt,
         otlp_logs_service_opt,
@@ -1833,4 +1981,84 @@ mod tests {
 
         assert!(ingester_pool.is_empty());
     }
+
+    #[tokio::test]
+    async fn test_compaction_service_on_janitor_node() {
+        let transport = ChannelTransport::default();
+        let cluster =
+            create_cluster_for_test(Vec::new(), &["janitor", "indexer"], &transport, true)
+                .await
+                .unwrap();
+        let universe = Universe::new();
+        let metastore = MetastoreServiceClient::from_mock(MockMetastoreService::new());
+
+        // Janitor + indexer with standalone compactors enabled: planner client is returned.
+        let mut node_config = NodeConfig::for_test();
+        node_config.indexer_config.enable_standalone_compactors = true;
+        node_config.enabled_services =
+            HashSet::from([QuickwitService::Janitor, QuickwitService::Indexer]);
+        let (client_opt, handle_opt) =
+            get_compaction_planner_client_if_needed(&node_config, &cluster, &universe, &metastore)
+                .await
+                .unwrap();
+        assert!(client_opt.is_some());
+        assert!(handle_opt.is_some());
+
+        // With compactor + janitor enabled, planner client is also returned.
+        node_config.enabled_services = HashSet::from([
+            QuickwitService::Janitor,
+            QuickwitService::Indexer,
+            QuickwitService::Compactor,
+        ]);
+        let (client_opt, handle_opt) =
+            get_compaction_planner_client_if_needed(&node_config, &cluster, &universe, &metastore)
+                .await
+                .unwrap();
+        assert!(client_opt.is_some());
+        assert!(handle_opt.is_some());
+
+        // Neither janitor nor compactor: no client, no handle.
+        node_config.enabled_services = HashSet::from([QuickwitService::Indexer]);
+        let (client_opt, handle_opt) =
+            get_compaction_planner_client_if_needed(&node_config, &cluster, &universe, &metastore)
+                .await
+                .unwrap();
+        assert!(client_opt.is_none());
+        assert!(handle_opt.is_none());
+
+        // Standalone compactors disabled: short-circuit returns (None, None) regardless of
+        // which services are enabled.
+        node_config.indexer_config.enable_standalone_compactors = false;
+        node_config.enabled_services =
+            HashSet::from([QuickwitService::Janitor, QuickwitService::Indexer]);
+        let (client_opt, handle_opt) =
+            get_compaction_planner_client_if_needed(&node_config, &cluster, &universe, &metastore)
+                .await
+                .unwrap();
+        assert!(client_opt.is_none());
+        assert!(handle_opt.is_none());
+
+        universe.assert_quit().await;
+    }
+
+    #[tokio::test]
+    async fn test_compaction_service_returns_error_when_no_janitor() {
+        let transport = ChannelTransport::default();
+        let cluster = create_cluster_for_test(Vec::new(), &["indexer"], &transport, false)
+            .await
+            .unwrap();
+        let universe = Universe::new();
+        let metastore = MetastoreServiceClient::from_mock(MockMetastoreService::new());
+
+        let mut node_config = NodeConfig::for_test();
+        node_config.indexer_config.enable_standalone_compactors = true;
+        node_config.enabled_services =
+            HashSet::from([QuickwitService::Indexer, QuickwitService::Compactor]);
+        let result =
+            get_compaction_planner_client_if_needed(&node_config, &cluster, &universe, &metastore)
+                .await;
+        assert!(result.is_err());
+
+        universe.assert_quit().await;
+    }
 }
diff --git a/quickwit/quickwit-serve/src/rest.rs b/quickwit/quickwit-serve/src/rest.rs
index 082bdf6d3ad..38c16b38a85 100644
--- a/quickwit/quickwit-serve/src/rest.rs
+++ b/quickwit/quickwit-serve/src/rest.rs
@@ -859,6 +859,7 @@ mod tests {
             _report_splits_subscription_handle_opt: None,
             _local_shards_update_listener_handle_opt: None,
             cluster,
+            compaction_service_client_opt: None,
             control_plane_server_opt: None,
             control_plane_client,
             indexing_service_opt: None,
@@ -875,6 +876,7 @@ mod tests {
             node_config: Arc::new(node_config.clone()),
             search_service: Arc::new(MockSearchService::new()),
             jaeger_service_opt: None,
+            _compactor_supervisor_opt: None,
             env_filter_reload_fn: crate::do_nothing_env_filter_reload_fn(),
             #[cfg(feature = "datafusion")]
             datafusion_session_builder: None,
diff --git a/quickwit/quickwit-storage/src/lib.rs b/quickwit/quickwit-storage/src/lib.rs
index 5f2c6057b5c..2a6338fc33c 100644
--- a/quickwit/quickwit-storage/src/lib.rs
+++ b/quickwit/quickwit-storage/src/lib.rs
@@ -56,7 +56,7 @@ mod storage_resolver;
 mod versioned_component;
 
 use quickwit_common::uri::Uri;
-pub use split_cache::SplitCache;
+pub use split_cache::SearchSplitCache;
 pub use tantivy::directory::OwnedBytes;
 pub use versioned_component::VersionedComponent;
 
diff --git a/quickwit/quickwit-storage/src/metrics.rs b/quickwit/quickwit-storage/src/metrics.rs
index 520d5bd2bcf..34d199b470c 100644
--- a/quickwit/quickwit-storage/src/metrics.rs
+++ b/quickwit/quickwit-storage/src/metrics.rs
@@ -25,6 +25,10 @@ use quickwit_metrics::{
 };
 
 const ACTION_DELETE_OBJECT: Labels<1> = labels!("action" => "delete_object");
+const ACTION_DELETE_OBJECTS: Labels<1> = labels!("action" => "delete_objects");
+const ACTION_GET_OBJECT: Labels<1> = labels!("action" => "get_object");
+const ACTION_PUT_OBJECT: Labels<1> = labels!("action" => "put_object");
+const ACTION_UPLOAD_PART: Labels<1> = labels!("action" => "upload_part");
 const OUTCOME_NAME: LabelNames<1> = label_names!("outcome");
 const COMPONENT_NAME: LabelNames<1> = label_names!("component_name");
 const COMPONENT_CAPACITY_POLICY: LabelNames<3> =
@@ -73,13 +77,22 @@ pub(crate) static OBJECT_STORAGE_DELETE_REQUESTS_TOTAL: LazyCounter =
     lazy_counter!(parent: OBJECT_STORAGE_REQUESTS_TOTAL, labels: [ACTION_DELETE_OBJECT]);
 
 pub(crate) static OBJECT_STORAGE_BULK_DELETE_REQUESTS_TOTAL: LazyCounter =
-    lazy_counter!(parent: OBJECT_STORAGE_REQUESTS_TOTAL, labels: [ACTION_DELETE_OBJECT]);
+    lazy_counter!(parent: OBJECT_STORAGE_REQUESTS_TOTAL, labels: [ACTION_DELETE_OBJECTS]);
 
 pub(crate) static OBJECT_STORAGE_DELETE_REQUEST_DURATION: LazyHistogram =
     lazy_histogram!(parent: OBJECT_STORAGE_REQUEST_DURATION, labels: [ACTION_DELETE_OBJECT]);
 
 pub(crate) static OBJECT_STORAGE_BULK_DELETE_REQUEST_DURATION: LazyHistogram =
-    lazy_histogram!(parent: OBJECT_STORAGE_REQUEST_DURATION, labels: [ACTION_DELETE_OBJECT]);
+    lazy_histogram!(parent: OBJECT_STORAGE_REQUEST_DURATION, labels: [ACTION_DELETE_OBJECTS]);
+
+pub(crate) static OBJECT_STORAGE_GET_OBJECT_DURATION: LazyHistogram =
+    lazy_histogram!(parent: OBJECT_STORAGE_REQUEST_DURATION, labels: [ACTION_GET_OBJECT]);
+
+pub(crate) static OBJECT_STORAGE_PUT_OBJECT_DURATION: LazyHistogram =
+    lazy_histogram!(parent: OBJECT_STORAGE_REQUEST_DURATION, labels: [ACTION_PUT_OBJECT]);
+
+pub(crate) static OBJECT_STORAGE_UPLOAD_PART_DURATION: LazyHistogram =
+    lazy_histogram!(parent: OBJECT_STORAGE_REQUEST_DURATION, labels: [ACTION_UPLOAD_PART]);
 
 pub(crate) static OBJECT_STORAGE_GET_TOTAL: LazyCounter = lazy_counter!(
         name: "object_storage_gets_total",
diff --git a/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs b/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs
index 0a7cb969639..d8c0d0346f4 100644
--- a/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs
+++ b/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs
@@ -27,6 +27,7 @@ use azure_storage::prelude::*;
 use azure_storage_blobs::blob::operations::GetBlobResponse;
 use azure_storage_blobs::prelude::*;
 use bytes::{Bytes, BytesMut};
+use bytesize::ByteSize;
 use futures::io::Error as FutureError;
 use futures::stream::{StreamExt, TryStreamExt};
 use md5::Digest;
@@ -34,6 +35,7 @@ use quickwit_common::retry::{RetryParams, Retryable, retry};
 use quickwit_common::uri::Uri;
 use quickwit_common::{chunk_range, ignore_error_kind, into_u64_range};
 use quickwit_config::{AzureStorageConfig, StorageBackend};
+use quickwit_metrics::HistogramTimer;
 use regex::Regex;
 use tantivy::directory::OwnedBytes;
 use thiserror::Error;
@@ -110,10 +112,11 @@ impl AzureBlobStorage {
             multipart_policy: MultiPartPolicy {
                 // Azure max part size is 100MB
                 // https://azure.microsoft.com/en-us/blog/general-availability-larger-block-blobs-in-azure-storage/
-                target_part_num_bytes: 100_000_000,
-                multipart_threshold_num_bytes: 100_000_000,
+                target_part_num_bytes: ByteSize::mb(100),
+                multipart_threshold_num_bytes: ByteSize::mb(100),
                 max_num_parts: 50_000, // Azure allows up to 50,000 blocks
-                max_object_num_bytes: 4_770_000_000_000u64, // Azure allows up to 4.77TB objects
+                max_object_num_bytes: ByteSize::b(4_770_000_000_000), /* Azure allows up to
+                                        * 4.77TB objects */
                 max_concurrent_uploads: 100,
             },
             retry_params: RetryParams::aggressive(),
@@ -213,6 +216,7 @@ impl AzureBlobStorage {
         let name = self.blob_name(path);
         let capacity = range_opt.as_ref().map(Range::len).unwrap_or(0);
         retry(&self.retry_params, || async {
+            let _timer = HistogramTimer::new(&crate::metrics::OBJECT_STORAGE_GET_OBJECT_DURATION);
             let (mut response_stream, _in_flight_guards) = if let Some(range) = range_opt.as_ref() {
                 let stream = self
                     .container_client
@@ -242,6 +246,7 @@ impl AzureBlobStorage {
     ) -> StorageResult<()> {
         crate::metrics::OBJECT_STORAGE_PUT_PARTS.inc();
         crate::metrics::OBJECT_STORAGE_UPLOAD_NUM_BYTES.inc_by(payload.len());
+        let _timer = HistogramTimer::new(&crate::metrics::OBJECT_STORAGE_PUT_OBJECT_DURATION);
         retry(&self.retry_params, || async {
             let data = Bytes::from(payload.read_all().await?.to_vec());
             let hash = azure_storage_blobs::prelude::Hash::from(md5::compute(&data[..]).0);
@@ -277,6 +282,8 @@ impl AzureBlobStorage {
                 crate::metrics::OBJECT_STORAGE_PUT_PARTS.inc();
                 crate::metrics::OBJECT_STORAGE_UPLOAD_NUM_BYTES.inc_by(range.end - range.start);
                 async move {
+                    let _timer =
+                        HistogramTimer::new(&crate::metrics::OBJECT_STORAGE_UPLOAD_PART_DURATION);
                     retry(&self.retry_params, || async {
                         // zero pad block ids to make them sortable as strings
                         let block_id = format!("block:{:05}", num);
diff --git a/quickwit/quickwit-storage/src/object_storage/policy.rs b/quickwit/quickwit-storage/src/object_storage/policy.rs
index 6ce48ab7a94..bfb6baf3f12 100644
--- a/quickwit/quickwit-storage/src/object_storage/policy.rs
+++ b/quickwit/quickwit-storage/src/object_storage/policy.rs
@@ -12,6 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use bytesize::ByteSize;
+
 /// The multipart policy defines when and how multipart upload / download should happen.
 ///
 /// The right settings might be vendor specific, but if not available the default values
@@ -20,13 +22,13 @@ pub struct MultiPartPolicy {
     /// Ideal part size.
     /// Since S3 has a constraint on the number of parts, it cannot always be
     /// respected.
-    pub target_part_num_bytes: usize,
+    pub target_part_num_bytes: ByteSize,
     /// Maximum number of parts allowed.
     pub max_num_parts: usize,
     /// Threshold above which multipart is triggered.
-    pub multipart_threshold_num_bytes: u64,
+    pub multipart_threshold_num_bytes: ByteSize,
     /// Maximum size allowed for an object.
-    pub max_object_num_bytes: u64,
+    pub max_object_num_bytes: ByteSize,
     /// Maximum number of parts to be upload concurrently.
     pub max_concurrent_uploads: usize,
 }
@@ -39,22 +41,22 @@ impl MultiPartPolicy {
     /// will not be used.
     pub fn part_num_bytes(&self, len: u64) -> u64 {
         assert!(
-            len < self.max_object_num_bytes,
+            len < self.max_object_num_bytes.as_u64(),
             "This object storage does not support object of that size {}",
-            self.max_object_num_bytes
+            self.max_object_num_bytes.as_u64()
         );
         assert!(
             self.max_num_parts > 0,
             "Misconfiguration: max_num_parts == 0 makes no sense."
         );
-        if len < self.multipart_threshold_num_bytes || self.max_num_parts == 1 {
+        if len < self.multipart_threshold_num_bytes.as_u64() || self.max_num_parts == 1 {
             return len;
         }
         let max_num_parts = self.max_num_parts as u64;
         // complete part is the smallest integer such that
         // <max_num_parts> * <min_part_len> >= len.
         let min_part_len = 1u64 + (len - 1u64) / max_num_parts;
-        (min_part_len).max(self.target_part_num_bytes as u64)
+        min_part_len.max(self.target_part_num_bytes.as_u64())
     }
 
     /// Limits the number of parts that can be concurrently uploaded.
@@ -69,10 +71,10 @@ impl Default for MultiPartPolicy {
         MultiPartPolicy {
             // S3 limits part size from 5M to 5GB, we want to end up with as few parts as possible
             // since each part is charged as a put request.
-            target_part_num_bytes: 5_000_000_000, // 5GB
-            multipart_threshold_num_bytes: 128 * 1_024 * 1_024, // 128 MiB
+            target_part_num_bytes: ByteSize::gib(5),
+            multipart_threshold_num_bytes: ByteSize::mib(128),
             max_num_parts: 10_000,
-            max_object_num_bytes: 5_000_000_000_000u64, // S3 allows up to 5TB objects
+            max_object_num_bytes: ByteSize::tb(5), // S3 allows up to 5TB objects
             max_concurrent_uploads: 100,
         }
     }
diff --git a/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs b/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs
index a407ccdaeca..c2c8f0a68ff 100644
--- a/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs
+++ b/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs
@@ -303,6 +303,7 @@ impl S3CompatibleObjectStorage {
 
         crate::metrics::OBJECT_STORAGE_PUT_PARTS.inc();
         crate::metrics::OBJECT_STORAGE_UPLOAD_NUM_BYTES.inc_by(len);
+        let _timer = HistogramTimer::new(&crate::metrics::OBJECT_STORAGE_PUT_OBJECT_DURATION);
 
         self.s3_client
             .put_object()
@@ -420,6 +421,7 @@ impl S3CompatibleObjectStorage {
         Ok(delete_requests)
     }
 
+    #[tracing::instrument(skip_all, fields(part_number = part.part_number, num_bytes=part.len()))]
     async fn upload_part<'a>(
         &'a self,
         upload_id: MultipartUploadId,
@@ -436,6 +438,7 @@ impl S3CompatibleObjectStorage {
 
         crate::metrics::OBJECT_STORAGE_PUT_PARTS.inc();
         crate::metrics::OBJECT_STORAGE_UPLOAD_NUM_BYTES.inc_by(part.len());
+        let _timer = HistogramTimer::new(&crate::metrics::OBJECT_STORAGE_UPLOAD_PART_DURATION);
 
         let upload_part_output = self
             .s3_client
@@ -556,6 +559,7 @@ impl S3CompatibleObjectStorage {
         let range_str = range_opt.map(|range| format!("bytes={}-{}", range.start, range.end - 1));
 
         crate::metrics::OBJECT_STORAGE_GET_TOTAL.inc();
+        let _timer = HistogramTimer::new(&crate::metrics::OBJECT_STORAGE_GET_OBJECT_DURATION);
 
         let get_object_output = self
             .s3_client
diff --git a/quickwit/quickwit-storage/src/opendal_storage/base.rs b/quickwit/quickwit-storage/src/opendal_storage/base.rs
index a6933db9178..3fe53881e7f 100644
--- a/quickwit/quickwit-storage/src/opendal_storage/base.rs
+++ b/quickwit/quickwit-storage/src/opendal_storage/base.rs
@@ -84,6 +84,7 @@ impl Storage for OpendalStorage {
     #[instrument(name = "storage.gcs.put", level = "debug", skip(self, payload), fields(payload_len = payload.len()))]
     async fn put(&self, path: &Path, payload: Box<dyn PutPayload>) -> StorageResult<()> {
         crate::metrics::OBJECT_STORAGE_PUT_TOTAL.inc();
+        let _timer = HistogramTimer::new(&crate::metrics::OBJECT_STORAGE_PUT_OBJECT_DURATION);
         let path = path.as_os_str().to_string_lossy();
         let mut payload_reader = payload.byte_stream().await?.into_async_read();
 
@@ -125,6 +126,7 @@ impl Storage for OpendalStorage {
         // recorded before issuing the query to the object store.
         let _inflight_guards = object_storage_get_slice_in_flight_guards(size);
         crate::metrics::OBJECT_STORAGE_GET_TOTAL.inc();
+        let _timer = HistogramTimer::new(&crate::metrics::OBJECT_STORAGE_GET_OBJECT_DURATION);
         // `Buffer::to_bytes` is zero-copy when the underlying buffer is contiguous, and coalesces
         // into a single `Bytes` otherwise — avoiding the extra `Vec<u8>` round-trip `to_vec` would
         // perform.
diff --git a/quickwit/quickwit-storage/src/split_cache/download_task.rs b/quickwit/quickwit-storage/src/split_cache/download_task.rs
index 9af0390bfb5..50631836203 100644
--- a/quickwit/quickwit-storage/src/split_cache/download_task.rs
+++ b/quickwit/quickwit-storage/src/split_cache/download_task.rs
@@ -21,7 +21,7 @@ use quickwit_common::split_file;
 use tokio::sync::{OwnedSemaphorePermit, Semaphore};
 
 use crate::split_cache::split_table::{CandidateSplit, DownloadOpportunity};
-use crate::{SplitCache, StorageResolver};
+use crate::{SearchSplitCache, StorageResolver};
 
 async fn download_split(
     root_path: &Path,
@@ -44,7 +44,7 @@ async fn download_split(
 
 async fn perform_eviction_and_download(
     download_opportunity: DownloadOpportunity,
-    split_cache: Arc<SplitCache>,
+    split_cache: Arc<SearchSplitCache>,
     storage_resolver: StorageResolver,
     _download_permit: OwnedSemaphorePermit,
 ) -> anyhow::Result<()> {
@@ -67,7 +67,7 @@ async fn perform_eviction_and_download(
 }
 
 pub(crate) fn spawn_download_task(
-    split_cache: Arc<SplitCache>,
+    split_cache: Arc<SearchSplitCache>,
     storage_resolver: StorageResolver,
     num_concurrent_downloads: NonZeroU32,
 ) {
diff --git a/quickwit/quickwit-storage/src/split_cache/mod.rs b/quickwit/quickwit-storage/src/split_cache/mod.rs
index 1c0de124d73..ca6b914d8d1 100644
--- a/quickwit/quickwit-storage/src/split_cache/mod.rs
+++ b/quickwit/quickwit-storage/src/split_cache/mod.rs
@@ -40,7 +40,7 @@ use crate::{Storage, StorageCache, wrap_storage_with_cache};
 /// On disk Cache of splits for searchers.
 ///
 /// The search acts receives reports of splits.
-pub struct SplitCache {
+pub struct SearchSplitCache {
     // Directory containing the cached split files.
     // Split ids are universally unique, so we all put them in the same directory.
     root_path: PathBuf,
@@ -50,14 +50,14 @@ pub struct SplitCache {
     fd_cache: FileDescriptorCache,
 }
 
-impl SplitCache {
+impl SearchSplitCache {
     /// Creates a new SplitCache and spawns the task that will continuously search for
     /// download opportunities.
     pub fn with_root_path(
         root_path: PathBuf,
         storage_resolver: crate::StorageResolver,
         limits: SplitCacheLimits,
-    ) -> io::Result<Arc<SplitCache>> {
+    ) -> io::Result<Arc<SearchSplitCache>> {
         std::fs::create_dir_all(&root_path)?;
         let mut existing_splits: BTreeMap<Ulid, u64> = Default::default();
         for dir_entry_res in std::fs::read_dir(&root_path)? {
@@ -103,7 +103,7 @@ impl SplitCache {
             delete_evicted_splits(&root_path, &splits_to_remove[..]);
         }
         let fd_cache = FileDescriptorCache::with_fd_cache_capacity(limits.max_file_descriptors);
-        let split_cache = Arc::new(SplitCache {
+        let split_cache = Arc::new(SearchSplitCache {
             root_path,
             split_table: Mutex::new(split_table),
             fd_cache,
@@ -193,7 +193,7 @@ fn split_id_from_path(split_path: &Path) -> Option<Ulid> {
 }
 
 struct SplitCacheBackingStorage {
-    split_cache: Arc<SplitCache>,
+    split_cache: Arc<SearchSplitCache>,
     storage_root_uri: Uri,
 }
 
diff --git a/quickwit/quickwit-storage/tests/azure_storage.rs b/quickwit/quickwit-storage/tests/azure_storage.rs
index c5d4937220a..1619596a822 100644
--- a/quickwit/quickwit-storage/tests/azure_storage.rs
+++ b/quickwit/quickwit-storage/tests/azure_storage.rs
@@ -45,10 +45,10 @@ async fn azure_storage_test_suite() -> anyhow::Result<()> {
 
     object_storage.set_policy(MultiPartPolicy {
         // On azure, block size is limited between 64KB and 100MB.
-        target_part_num_bytes: 5 * 1_024 * 1_024, // 5MiB
+        target_part_num_bytes: bytesize::ByteSize::mib(5),
         max_num_parts: 10_000,
-        multipart_threshold_num_bytes: 10_000_000,
-        max_object_num_bytes: 5_000_000_000_000,
+        multipart_threshold_num_bytes: bytesize::ByteSize::mb(10),
+        max_object_num_bytes: bytesize::ByteSize::tb(5),
         max_concurrent_uploads: 100,
     });
     quickwit_storage::storage_test_multi_part_upload(&mut object_storage)
diff --git a/quickwit/quickwit-storage/tests/s3_storage.rs b/quickwit/quickwit-storage/tests/s3_storage.rs
index 73e0454af77..428093fb093 100644
--- a/quickwit/quickwit-storage/tests/s3_storage.rs
+++ b/quickwit/quickwit-storage/tests/s3_storage.rs
@@ -72,10 +72,10 @@ pub mod s3_storage_test_suite {
             .unwrap();
 
         object_storage.set_policy(MultiPartPolicy {
-            target_part_num_bytes: 5 * 1_024 * 1_024, //< the minimum on S3 is 5MB.
+            target_part_num_bytes: bytesize::ByteSize::mib(5), //< the minimum on S3 is 5MB.
             max_num_parts: 10_000,
-            multipart_threshold_num_bytes: 10_000_000,
-            max_object_num_bytes: 5_000_000_000_000,
+            multipart_threshold_num_bytes: bytesize::ByteSize::mb(10),
+            max_object_num_bytes: bytesize::ByteSize::tb(5),
             max_concurrent_uploads: 100,
         });