From 7a7207b6640918ffb65f01c585d297e349dbd6a0 Mon Sep 17 00:00:00 2001 From: nadav-govari Date: Mon, 6 Apr 2026 13:38:15 -0400 Subject: [PATCH 01/21] Make MergeSchedulerService optional (#6266) Make MergeSchedulerService optional --- quickwit/quickwit-cli/src/tool.rs | 4 +- .../src/actors/indexing_pipeline.rs | 12 +- .../src/actors/indexing_service.rs | 148 ++++++++++++++---- .../quickwit-indexing/src/actors/publisher.rs | 21 ++- quickwit/quickwit-indexing/src/lib.rs | 5 +- quickwit/quickwit-indexing/src/test_utils.rs | 2 +- quickwit/quickwit-serve/src/lib.rs | 6 +- 7 files changed, 148 insertions(+), 50 deletions(-) diff --git a/quickwit/quickwit-cli/src/tool.rs b/quickwit/quickwit-cli/src/tool.rs index 0f34a3017e6..04843d7e801 100644 --- a/quickwit/quickwit-cli/src/tool.rs +++ b/quickwit/quickwit-cli/src/tool.rs @@ -456,7 +456,7 @@ pub async fn local_ingest_docs_cli(args: LocalIngestDocsArgs) -> anyhow::Result< cluster, metastore, None, - merge_scheduler_service_mailbox, + Some(merge_scheduler_service_mailbox), IngesterPool::default(), storage_resolver, EventBroker::default(), @@ -593,7 +593,7 @@ pub async fn merge_cli(args: MergeArgs) -> anyhow::Result<()> { cluster, metastore, None, - merge_scheduler_service, + Some(merge_scheduler_service), IngesterPool::default(), storage_resolver, EventBroker::default(), diff --git a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs index 16daf18102f..04a9ab004b6 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs @@ -431,7 +431,7 @@ impl IndexingPipeline { let publisher = Publisher::new( PublisherType::MainPublisher, self.params.metastore.clone(), - Some(self.params.merge_planner_mailbox.clone()), + self.params.merge_planner_mailbox_opt.clone(), Some(source_mailbox.clone()), ); let (publisher_mailbox, publisher_handle) = ctx @@ -854,7 +854,7 @@ pub struct IndexingPipelineParams { // Merge-related parameters pub merge_policy: Arc, pub retention_policy: Option, - pub merge_planner_mailbox: Mailbox, + pub merge_planner_mailbox_opt: Option>, pub max_concurrent_split_uploads_merge: usize, // Source-related parameters @@ -991,7 +991,7 @@ mod tests { max_concurrent_split_uploads_index: 4, max_concurrent_split_uploads_merge: 5, cooperative_indexing_permits: None, - merge_planner_mailbox, + merge_planner_mailbox_opt: Some(merge_planner_mailbox), event_broker: EventBroker::default(), params_fingerprint: 42u64, }; @@ -1115,7 +1115,7 @@ mod tests { max_concurrent_split_uploads_index: 4, max_concurrent_split_uploads_merge: 5, cooperative_indexing_permits: None, - merge_planner_mailbox, + merge_planner_mailbox_opt: Some(merge_planner_mailbox), event_broker: Default::default(), params_fingerprint: 42u64, }; @@ -1216,7 +1216,7 @@ mod tests { max_concurrent_split_uploads_index: 4, max_concurrent_split_uploads_merge: 5, cooperative_indexing_permits: None, - merge_planner_mailbox: merge_planner_mailbox.clone(), + merge_planner_mailbox_opt: Some(merge_planner_mailbox.clone()), event_broker: Default::default(), params_fingerprint: 42u64, }; @@ -1344,7 +1344,7 @@ mod tests { max_concurrent_split_uploads_index: 4, max_concurrent_split_uploads_merge: 5, cooperative_indexing_permits: None, - merge_planner_mailbox, + merge_planner_mailbox_opt: Some(merge_planner_mailbox), params_fingerprint: 42u64, event_broker: Default::default(), }; diff --git a/quickwit/quickwit-indexing/src/actors/indexing_service.rs b/quickwit/quickwit-indexing/src/actors/indexing_service.rs index 5d307fb200b..247dc53c41a 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_service.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_service.rs @@ -104,7 +104,7 @@ pub struct IndexingService { cluster: Cluster, metastore: MetastoreServiceClient, ingest_api_service_opt: Option>, - merge_scheduler_service: Mailbox, + merge_scheduler_service_opt: Option>, ingester_pool: IngesterPool, storage_resolver: StorageResolver, indexing_pipelines: HashMap, @@ -138,7 +138,7 @@ impl IndexingService { cluster: Cluster, metastore: MetastoreServiceClient, ingest_api_service_opt: Option>, - merge_scheduler_service: Mailbox, + merge_scheduler_service_opt: Option>, ingester_pool: IngesterPool, storage_resolver: StorageResolver, event_broker: EventBroker, @@ -167,7 +167,7 @@ impl IndexingService { cluster, metastore, ingest_api_service_opt, - merge_scheduler_service, + merge_scheduler_service_opt, ingester_pool, storage_resolver, local_split_store: Arc::new(local_split_store), @@ -297,27 +297,37 @@ impl IndexingService { let doc_mapper = build_doc_mapper(&index_config.doc_mapping, &index_config.search_settings) .map_err(|error| IndexingError::Internal(error.to_string()))?; - let merge_pipeline_id = indexing_pipeline_id.merge_pipeline_id(); - let merge_pipeline_params = MergePipelineParams { - pipeline_id: merge_pipeline_id.clone(), - doc_mapper: doc_mapper.clone(), - indexing_directory: indexing_directory.clone(), - metastore: self.metastore.clone(), - split_store: split_store.clone(), - merge_scheduler_service: self.merge_scheduler_service.clone(), - merge_policy: merge_policy.clone(), - retention_policy: retention_policy.clone(), - merge_io_throughput_limiter_opt: self.merge_io_throughput_limiter_opt.clone(), - max_concurrent_split_uploads: self.max_concurrent_split_uploads, - event_broker: self.event_broker.clone(), + let merge_planner_mailbox_opt = if let Some(merge_scheduler_service) = + self.merge_scheduler_service_opt.clone() + { + let merge_pipeline_id = indexing_pipeline_id.merge_pipeline_id(); + let merge_pipeline_params = MergePipelineParams { + pipeline_id: merge_pipeline_id, + doc_mapper: doc_mapper.clone(), + indexing_directory: indexing_directory.clone(), + metastore: self.metastore.clone(), + split_store: split_store.clone(), + merge_scheduler_service, + merge_policy: merge_policy.clone(), + retention_policy: retention_policy.clone(), + merge_io_throughput_limiter_opt: self.merge_io_throughput_limiter_opt.clone(), + max_concurrent_split_uploads: self.max_concurrent_split_uploads, + event_broker: self.event_broker.clone(), + }; + Some(self.get_or_create_merge_pipeline(merge_pipeline_params, immature_splits_opt, ctx)) + } else { + None }; - let merge_planner_mailbox = - self.get_or_create_merge_pipeline(merge_pipeline_params, immature_splits_opt, ctx)?; // The concurrent uploads budget is split in 2: 1/2 for the indexing pipeline, 1/2 for the - // merge pipeline. - let max_concurrent_split_uploads_index = (self.max_concurrent_split_uploads / 2).max(1); + // merge pipeline. When there is no local merge pipeline, the indexing pipeline gets the + // full budget. + let max_concurrent_split_uploads_index = if self.merge_scheduler_service_opt.is_some() { + (self.max_concurrent_split_uploads / 2).max(1) + } else { + self.max_concurrent_split_uploads + }; let max_concurrent_split_uploads_merge = - (self.max_concurrent_split_uploads - max_concurrent_split_uploads_index).max(1); + self.max_concurrent_split_uploads - max_concurrent_split_uploads_index; let params_fingerprint = indexing_pipeline_params_fingerprint(&index_config, &source_config); @@ -354,7 +364,7 @@ impl IndexingService { merge_policy, retention_policy, max_concurrent_split_uploads_merge, - merge_planner_mailbox, + merge_planner_mailbox_opt, // Source-related parameters source_config, @@ -428,6 +438,9 @@ impl IndexingService { indexing_pipeline_ids: &[IndexingPipelineId], ctx: &ActorContext, ) -> MetastoreResult>> { + if self.merge_scheduler_service_opt.is_none() { + return Ok(Default::default()); + } let mut index_uids = Vec::new(); for indexing_pipeline_id in indexing_pipeline_ids { @@ -573,12 +586,12 @@ impl IndexingService { merge_pipeline_params: MergePipelineParams, immature_splits_opt: Option>, ctx: &ActorContext, - ) -> Result, IndexingError> { + ) -> Mailbox { if let Some(merge_pipeline_handle) = self .merge_pipeline_handles .get(&merge_pipeline_params.pipeline_id) { - return Ok(merge_pipeline_handle.mailbox.clone()); + return merge_pipeline_handle.mailbox.clone(); } let merge_pipeline_id = merge_pipeline_params.pipeline_id.clone(); let merge_pipeline = @@ -592,7 +605,7 @@ impl IndexingService { self.merge_pipeline_handles .insert(merge_pipeline_id, merge_pipeline_handle); self.counters.num_running_merge_pipelines += 1; - Ok(merge_planner_mailbox) + merge_planner_mailbox } /// For all Ingest V2 pipelines, assigns the set of shards they should be working on. @@ -1053,7 +1066,7 @@ mod tests { cluster, metastore, Some(ingest_api_service), - merge_scheduler_mailbox, + Some(merge_scheduler_mailbox), IngesterPool::default(), storage_resolver.clone(), EventBroker::default(), @@ -1566,7 +1579,7 @@ mod tests { cluster.clone(), metastore.clone(), Some(ingest_api_service), - merge_scheduler_service, + Some(merge_scheduler_service), IngesterPool::default(), storage_resolver.clone(), EventBroker::default(), @@ -1610,6 +1623,85 @@ mod tests { universe.quit().await; } + #[tokio::test] + async fn test_indexing_service_no_merge_pipeline_when_no_merge_scheduler() { + quickwit_common::setup_logging_for_tests(); + let transport = ChannelTransport::default(); + let cluster = create_cluster_for_test(Vec::new(), &["indexer"], &transport, true) + .await + .unwrap(); + let metastore = metastore_for_test(); + + let index_id = append_random_suffix("test-indexing-service-no-merge"); + let index_uri = format!("ram:///indexes/{index_id}"); + let index_config = IndexConfig::for_test(&index_id, &index_uri); + + let source_config = SourceConfig { + source_id: "test-source".to_string(), + num_pipelines: NonZeroUsize::MIN, + enabled: true, + source_params: SourceParams::void(), + transform_config: None, + input_format: SourceInputFormat::Json, + }; + let create_index_request = + CreateIndexRequest::try_from_index_config(&index_config).unwrap(); + let index_uid: IndexUid = metastore + .create_index(create_index_request) + .await + .unwrap() + .index_uid() + .clone(); + let add_source_request = + AddSourceRequest::try_from_source_config(index_uid.clone(), &source_config).unwrap(); + metastore.add_source(add_source_request).await.unwrap(); + + let temp_dir = tempfile::tempdir().unwrap(); + let data_dir_path = temp_dir.path().to_path_buf(); + let indexer_config = IndexerConfig::for_test().unwrap(); + let num_blocking_threads = 1; + let storage_resolver = StorageResolver::unconfigured(); + let universe = Universe::with_accelerated_time(); + let queues_dir_path = data_dir_path.join(QUEUES_DIR_NAME); + let ingest_api_service = + init_ingest_api(&universe, &queues_dir_path, &IngestApiConfig::default()) + .await + .unwrap(); + let indexing_server = IndexingService::new( + NodeId::from("test-node"), + data_dir_path, + indexer_config, + num_blocking_threads, + cluster.clone(), + metastore.clone(), + Some(ingest_api_service), + None, // No merge scheduler — external merge service handles compaction. + IngesterPool::default(), + storage_resolver.clone(), + EventBroker::default(), + ) + .await + .unwrap(); + let (indexing_server_mailbox, indexing_server_handle) = + universe.spawn_builder().spawn(indexing_server); + + indexing_server_mailbox + .ask_for_res(SpawnPipeline { + index_id: index_id.clone(), + source_config, + pipeline_uid: PipelineUid::default(), + }) + .await + .unwrap(); + + let observation = indexing_server_handle.observe().await; + assert_eq!(observation.num_running_pipelines, 1); + assert_eq!(observation.num_running_merge_pipelines, 0); + assert!(universe.get_one::().is_none()); + + universe.quit().await; + } + #[derive(Debug)] struct FreezePipeline; #[async_trait] @@ -1768,7 +1860,7 @@ mod tests { cluster.clone(), metastore.clone(), Some(ingest_api_service.clone()), - merge_scheduler_service, + Some(merge_scheduler_service), IngesterPool::default(), storage_resolver.clone(), EventBroker::default(), diff --git a/quickwit/quickwit-indexing/src/actors/publisher.rs b/quickwit/quickwit-indexing/src/actors/publisher.rs index c4ec75b1a8a..4ed7baec043 100644 --- a/quickwit/quickwit-indexing/src/actors/publisher.rs +++ b/quickwit/quickwit-indexing/src/actors/publisher.rs @@ -21,7 +21,7 @@ use quickwit_proto::metastore::{ MetastoreService, MetastoreServiceClient, PublishMetricsSplitsRequest, PublishSplitsRequest, }; use serde::Serialize; -use tracing::{info, instrument}; +use tracing::{error, info, instrument}; use crate::actors::{DocProcessor, MergePlanner, ParquetDocProcessor, Processor}; use crate::models::{NewSplits, ParquetSplitsUpdate, SplitsUpdate}; @@ -210,14 +210,19 @@ impl Handler for Publisher { suggest_truncate(ctx, &self.source_mailbox_opt, checkpoint_delta_opt).await; if !new_splits.is_empty() { - // The merge planner is not necessarily awake and this is not an error. - // For instance, when a source reaches its end, and the last "new" split - // has been packaged, the packager finalizer sends a message to the merge - // planner in order to stop it. - if let Some(merge_planner_mailbox) = self.merge_planner_mailbox_opt.as_ref() { - let _ = ctx + // When merging is handled locally, notify the merge planner about new + // splits. The mailbox is None when an external merge service is active, + // or when the planner has already shut down (e.g. source reached its end). + if let Some(merge_planner_mailbox) = &self.merge_planner_mailbox_opt { + match ctx .send_message(merge_planner_mailbox, NewSplits { new_splits }) - .await; + .await + { + Ok(_) => {} + Err(error) => { + error!(error=?error, "failed to send new splits to merge planner"); + } + } } if replaced_split_ids.is_empty() { diff --git a/quickwit/quickwit-indexing/src/lib.rs b/quickwit/quickwit-indexing/src/lib.rs index 2c2b28a09d7..8adde285bf3 100644 --- a/quickwit/quickwit-indexing/src/lib.rs +++ b/quickwit/quickwit-indexing/src/lib.rs @@ -69,13 +69,10 @@ pub async fn start_indexing_service( ingester_pool: IngesterPool, storage_resolver: StorageResolver, event_broker: EventBroker, + merge_scheduler_mailbox: Option>, ) -> anyhow::Result> { info!("starting indexer service"); let ingest_api_service_mailbox = universe.get_one::(); - let (merge_scheduler_mailbox, _) = universe.spawn_builder().spawn(MergeSchedulerService::new( - config.indexer_config.merge_concurrency.get(), - )); - // Spawn indexing service. let indexing_service = IndexingService::new( config.node_id.clone(), config.data_dir_path.to_path_buf(), diff --git a/quickwit/quickwit-indexing/src/test_utils.rs b/quickwit/quickwit-indexing/src/test_utils.rs index 82198c820f4..550e40aa16f 100644 --- a/quickwit/quickwit-indexing/src/test_utils.rs +++ b/quickwit/quickwit-indexing/src/test_utils.rs @@ -123,7 +123,7 @@ impl TestSandbox { cluster, metastore.clone(), Some(ingest_api_service), - merge_scheduler_mailbox, + Some(merge_scheduler_mailbox), IngesterPool::default(), storage_resolver.clone(), EventBroker::default(), diff --git a/quickwit/quickwit-serve/src/lib.rs b/quickwit/quickwit-serve/src/lib.rs index cc261cec7a2..4d674d9cb7b 100644 --- a/quickwit/quickwit-serve/src/lib.rs +++ b/quickwit/quickwit-serve/src/lib.rs @@ -77,7 +77,7 @@ use quickwit_config::{ClusterConfig, IngestApiConfig, NodeConfig}; use quickwit_control_plane::control_plane::{ControlPlane, ControlPlaneEventSubscriber}; use quickwit_control_plane::{IndexerNodeInfo, IndexerPool}; use quickwit_index_management::{IndexService as IndexManager, IndexServiceError}; -use quickwit_indexing::actors::IndexingService; +use quickwit_indexing::actors::{IndexingService, MergeSchedulerService}; use quickwit_indexing::models::ShardPositionsService; use quickwit_indexing::start_indexing_service; use quickwit_ingest::{ @@ -540,6 +540,9 @@ pub async fn serve_quickwit( .context("failed to start ingest v1 service")?; let indexing_service_opt = if node_config.is_service_enabled(QuickwitService::Indexer) { + let (merge_scheduler_mailbox, _) = universe.spawn_builder().spawn( + MergeSchedulerService::new(node_config.indexer_config.merge_concurrency.get()), + ); let indexing_service = start_indexing_service( &universe, &node_config, @@ -549,6 +552,7 @@ pub async fn serve_quickwit( ingester_pool.clone(), storage_resolver.clone(), event_broker.clone(), + Some(merge_scheduler_mailbox), ) .await .context("failed to start indexing service")?; From 2862e8740141c45b0fce5fabb7c3910f7b9df17f Mon Sep 17 00:00:00 2001 From: nadav-govari Date: Thu, 9 Apr 2026 14:43:07 -0400 Subject: [PATCH 02/21] Create compaction planner service stub; wire up serving layer (#6269) --- quickwit/Cargo.lock | 9 + quickwit/Cargo.toml | 3 + quickwit/quickwit-compaction/Cargo.toml | 15 + quickwit/quickwit-compaction/src/lib.rs | 17 + .../src/planner/compaction_service.rs | 26 + .../quickwit-compaction/src/planner/mod.rs | 17 + quickwit/quickwit-config/src/service.rs | 3 + .../src/actors/indexing_service.rs | 80 ++ .../quickwit-indexing/src/actors/publisher.rs | 12 +- .../src/tests/basic_tests.rs | 96 +++ quickwit/quickwit-proto/build.rs | 14 + .../protos/quickwit/compaction.proto | 24 + .../codegen/quickwit/quickwit.compaction.rs | 797 ++++++++++++++++++ quickwit/quickwit-proto/src/compaction/mod.rs | 76 ++ quickwit/quickwit-proto/src/lib.rs | 1 + quickwit/quickwit-serve/Cargo.toml | 1 + quickwit/quickwit-serve/src/grpc.rs | 16 + quickwit/quickwit-serve/src/lib.rs | 120 ++- quickwit/quickwit-serve/src/rest.rs | 1 + 19 files changed, 1316 insertions(+), 12 deletions(-) create mode 100644 quickwit/quickwit-compaction/Cargo.toml create mode 100644 quickwit/quickwit-compaction/src/lib.rs create mode 100644 quickwit/quickwit-compaction/src/planner/compaction_service.rs create mode 100644 quickwit/quickwit-compaction/src/planner/mod.rs create mode 100644 quickwit/quickwit-proto/protos/quickwit/compaction.proto create mode 100644 quickwit/quickwit-proto/src/codegen/quickwit/quickwit.compaction.rs create mode 100644 quickwit/quickwit-proto/src/compaction/mod.rs diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index 89eefe2bed7..544b882d332 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -7144,6 +7144,14 @@ dependencies = [ "tracing", ] +[[package]] +name = "quickwit-compaction" +version = "0.8.0" +dependencies = [ + "async-trait", + "quickwit-proto", +] + [[package]] name = "quickwit-config" version = "0.8.0" @@ -7808,6 +7816,7 @@ dependencies = [ "quickwit-actors", "quickwit-cluster", "quickwit-common", + "quickwit-compaction", "quickwit-config", "quickwit-control-plane", "quickwit-doc-mapper", diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml index 9242390d898..9d8cf833dc3 100644 --- a/quickwit/Cargo.toml +++ b/quickwit/Cargo.toml @@ -12,6 +12,7 @@ members = [ "quickwit-control-plane", "quickwit-datetime", "quickwit-directories", + "quickwit-compaction", "quickwit-doc-mapper", "quickwit-index-management", "quickwit-indexing", @@ -52,6 +53,7 @@ default-members = [ "quickwit-control-plane", "quickwit-datetime", "quickwit-directories", + "quickwit-compaction", "quickwit-doc-mapper", "quickwit-index-management", "quickwit-indexing", @@ -352,6 +354,7 @@ quickwit-control-plane = { path = "quickwit-control-plane" } quickwit-datetime = { path = "quickwit-datetime" } quickwit-directories = { path = "quickwit-directories" } quickwit-doc-mapper = { path = "quickwit-doc-mapper" } +quickwit-compaction = { path = "quickwit-compaction" } quickwit-index-management = { path = "quickwit-index-management" } quickwit-indexing = { path = "quickwit-indexing" } quickwit-ingest = { path = "quickwit-ingest" } diff --git a/quickwit/quickwit-compaction/Cargo.toml b/quickwit/quickwit-compaction/Cargo.toml new file mode 100644 index 00000000000..07925537dce --- /dev/null +++ b/quickwit/quickwit-compaction/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "quickwit-compaction" +description = "Merge planner and merge worker services for split compaction" + +version.workspace = true +edition.workspace = true +homepage.workspace = true +documentation.workspace = true +repository.workspace = true +authors.workspace = true +license.workspace = true + +[dependencies] +async-trait = { workspace = true } +quickwit-proto = { workspace = true } \ No newline at end of file diff --git a/quickwit/quickwit-compaction/src/lib.rs b/quickwit/quickwit-compaction/src/lib.rs new file mode 100644 index 00000000000..d59933e227e --- /dev/null +++ b/quickwit/quickwit-compaction/src/lib.rs @@ -0,0 +1,17 @@ +// Copyright 2021-Present Datadog, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![deny(clippy::disallowed_methods)] + +pub mod planner; diff --git a/quickwit/quickwit-compaction/src/planner/compaction_service.rs b/quickwit/quickwit-compaction/src/planner/compaction_service.rs new file mode 100644 index 00000000000..0e3104a2129 --- /dev/null +++ b/quickwit/quickwit-compaction/src/planner/compaction_service.rs @@ -0,0 +1,26 @@ +// Copyright 2021-Present Datadog, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use async_trait::async_trait; +use quickwit_proto::compaction::{CompactionResult, CompactionService, PingRequest, PingResponse}; + +#[derive(Debug, Clone)] +pub struct StubCompactionService; + +#[async_trait] +impl CompactionService for StubCompactionService { + async fn ping(&self, _request: PingRequest) -> CompactionResult { + Ok(PingResponse {}) + } +} diff --git a/quickwit/quickwit-compaction/src/planner/mod.rs b/quickwit/quickwit-compaction/src/planner/mod.rs new file mode 100644 index 00000000000..3b531a439f2 --- /dev/null +++ b/quickwit/quickwit-compaction/src/planner/mod.rs @@ -0,0 +1,17 @@ +// Copyright 2021-Present Datadog, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod compaction_service; + +pub use compaction_service::StubCompactionService; diff --git a/quickwit/quickwit-config/src/service.rs b/quickwit/quickwit-config/src/service.rs index d323331b510..0382231330d 100644 --- a/quickwit/quickwit-config/src/service.rs +++ b/quickwit/quickwit-config/src/service.rs @@ -29,6 +29,7 @@ pub enum QuickwitService { Searcher, Janitor, Metastore, + Compactor, } #[allow(clippy::from_over_into)] @@ -46,6 +47,7 @@ impl QuickwitService { QuickwitService::Searcher => "searcher", QuickwitService::Janitor => "janitor", QuickwitService::Metastore => "metastore", + QuickwitService::Compactor => "compactor", } } @@ -70,6 +72,7 @@ impl FromStr for QuickwitService { "searcher" => Ok(QuickwitService::Searcher), "janitor" => Ok(QuickwitService::Janitor), "metastore" => Ok(QuickwitService::Metastore), + "compactor" => Ok(QuickwitService::Compactor), _ => { bail!( "failed to parse service `{service_str}`. supported services are: `{}`", diff --git a/quickwit/quickwit-indexing/src/actors/indexing_service.rs b/quickwit/quickwit-indexing/src/actors/indexing_service.rs index 247dc53c41a..5442a98589e 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_service.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_service.rs @@ -1702,6 +1702,86 @@ mod tests { universe.quit().await; } + #[tokio::test] + async fn test_indexing_service_spawns_merge_pipeline_with_merge_scheduler() { + quickwit_common::setup_logging_for_tests(); + let transport = ChannelTransport::default(); + let cluster = create_cluster_for_test(Vec::new(), &["indexer"], &transport, true) + .await + .unwrap(); + let metastore = metastore_for_test(); + + let index_id = append_random_suffix("test-indexing-service-with-merge"); + let index_uri = format!("ram:///indexes/{index_id}"); + let index_config = IndexConfig::for_test(&index_id, &index_uri); + + let source_config = SourceConfig { + source_id: "test-source".to_string(), + num_pipelines: NonZeroUsize::MIN, + enabled: true, + source_params: SourceParams::void(), + transform_config: None, + input_format: SourceInputFormat::Json, + }; + let create_index_request = + CreateIndexRequest::try_from_index_config(&index_config).unwrap(); + let index_uid: IndexUid = metastore + .create_index(create_index_request) + .await + .unwrap() + .index_uid() + .clone(); + let add_source_request = + AddSourceRequest::try_from_source_config(index_uid.clone(), &source_config).unwrap(); + metastore.add_source(add_source_request).await.unwrap(); + + let temp_dir = tempfile::tempdir().unwrap(); + let data_dir_path = temp_dir.path().to_path_buf(); + let indexer_config = IndexerConfig::for_test().unwrap(); + let num_blocking_threads = 1; + let storage_resolver = StorageResolver::unconfigured(); + let universe = Universe::with_accelerated_time(); + let queues_dir_path = data_dir_path.join(QUEUES_DIR_NAME); + let ingest_api_service = + init_ingest_api(&universe, &queues_dir_path, &IngestApiConfig::default()) + .await + .unwrap(); + let merge_scheduler_mailbox: Mailbox = universe.get_or_spawn_one(); + let indexing_server = IndexingService::new( + NodeId::from("test-node"), + data_dir_path, + indexer_config, + num_blocking_threads, + cluster.clone(), + metastore.clone(), + Some(ingest_api_service), + Some(merge_scheduler_mailbox), + IngesterPool::default(), + storage_resolver.clone(), + EventBroker::default(), + ) + .await + .unwrap(); + let (indexing_server_mailbox, indexing_server_handle) = + universe.spawn_builder().spawn(indexing_server); + + indexing_server_mailbox + .ask_for_res(SpawnPipeline { + index_id: index_id.clone(), + source_config, + pipeline_uid: PipelineUid::default(), + }) + .await + .unwrap(); + + let observation = indexing_server_handle.observe().await; + assert_eq!(observation.num_running_pipelines, 1); + assert_eq!(observation.num_running_merge_pipelines, 1); + assert!(universe.get_one::().is_some()); + + universe.quit().await; + } + #[derive(Debug)] struct FreezePipeline; #[async_trait] diff --git a/quickwit/quickwit-indexing/src/actors/publisher.rs b/quickwit/quickwit-indexing/src/actors/publisher.rs index 4ed7baec043..e0b4b103a4b 100644 --- a/quickwit/quickwit-indexing/src/actors/publisher.rs +++ b/quickwit/quickwit-indexing/src/actors/publisher.rs @@ -213,16 +213,12 @@ impl Handler for Publisher { // When merging is handled locally, notify the merge planner about new // splits. The mailbox is None when an external merge service is active, // or when the planner has already shut down (e.g. source reached its end). - if let Some(merge_planner_mailbox) = &self.merge_planner_mailbox_opt { - match ctx + if let Some(merge_planner_mailbox) = &self.merge_planner_mailbox_opt + && let Err(error) = ctx .send_message(merge_planner_mailbox, NewSplits { new_splits }) .await - { - Ok(_) => {} - Err(error) => { - error!(error=?error, "failed to send new splits to merge planner"); - } - } + { + error!(error=?error, "failed to send new splits to merge planner"); } if replaced_split_ids.is_empty() { diff --git a/quickwit/quickwit-integration-tests/src/tests/basic_tests.rs b/quickwit/quickwit-integration-tests/src/tests/basic_tests.rs index 91c6b0ebeb1..2715e310a97 100644 --- a/quickwit/quickwit-integration-tests/src/tests/basic_tests.rs +++ b/quickwit/quickwit-integration-tests/src/tests/basic_tests.rs @@ -162,3 +162,99 @@ async fn test_multi_nodes_cluster() { sandbox.shutdown().await.unwrap(); } + +#[tokio::test] +async fn test_no_merge_pipelines_when_compaction_service_enabled() { + quickwit_common::setup_logging_for_tests(); + unsafe { std::env::set_var("QW_ENABLE_COMPACTION_SERVICE", "true") }; + + let sandbox = ClusterSandboxBuilder::default() + .add_node([QuickwitService::Searcher]) + .add_node([QuickwitService::Metastore]) + .add_node([QuickwitService::Indexer]) + .add_node([QuickwitService::ControlPlane]) + .add_node([QuickwitService::Janitor]) + .build_and_start() + .await; + + sandbox + .rest_client(QuickwitService::Indexer) + .indexes() + .create( + r#" + version: 0.8 + index_id: test-no-merge-pipelines + doc_mapping: + field_mappings: + - name: body + type: text + indexing_settings: + commit_timeout_secs: 1 + "#, + quickwit_config::ConfigFormat::Yaml, + false, + ) + .await + .unwrap(); + + sandbox.wait_for_indexing_pipelines(1).await.unwrap(); + + let stats = sandbox + .rest_client(QuickwitService::Indexer) + .node_stats() + .indexing() + .await + .unwrap(); + assert_eq!(stats.num_running_merge_pipelines, 0); + + unsafe { std::env::remove_var("QW_ENABLE_COMPACTION_SERVICE") }; + sandbox.shutdown().await.unwrap(); +} + +#[tokio::test] +async fn test_merge_pipelines_present_without_compaction_service() { + quickwit_common::setup_logging_for_tests(); + unsafe { std::env::set_var("QW_ENABLE_COMPACTION_SERVICE", "false") }; + + let sandbox = ClusterSandboxBuilder::default() + .add_node([QuickwitService::Searcher]) + .add_node([QuickwitService::Metastore]) + .add_node([QuickwitService::Indexer]) + .add_node([QuickwitService::ControlPlane]) + .add_node([QuickwitService::Janitor]) + .build_and_start() + .await; + + sandbox + .rest_client(QuickwitService::Indexer) + .indexes() + .create( + r#" + version: 0.8 + index_id: test-with-merge-pipelines + doc_mapping: + field_mappings: + - name: body + type: text + indexing_settings: + commit_timeout_secs: 1 + "#, + quickwit_config::ConfigFormat::Yaml, + false, + ) + .await + .unwrap(); + + sandbox.wait_for_indexing_pipelines(1).await.unwrap(); + + let stats = sandbox + .rest_client(QuickwitService::Indexer) + .node_stats() + .indexing() + .await + .unwrap(); + assert!(stats.num_running_merge_pipelines > 0); + + unsafe { std::env::remove_var("QW_ENABLE_COMPACTION_SERVICE") }; + sandbox.shutdown().await.unwrap(); +} diff --git a/quickwit/quickwit-proto/build.rs b/quickwit/quickwit-proto/build.rs index 569d9b5315b..a83e8723bd2 100644 --- a/quickwit/quickwit-proto/build.rs +++ b/quickwit/quickwit-proto/build.rs @@ -35,6 +35,20 @@ fn main() -> Result<(), Box> { .run() .unwrap(); + // Compaction service. + let mut prost_config = prost_build::Config::default(); + prost_config.file_descriptor_set_path("src/codegen/quickwit/compaction_descriptor.bin"); + + Codegen::builder() + .with_prost_config(prost_config) + .with_protos(&["protos/quickwit/compaction.proto"]) + .with_output_dir("src/codegen/quickwit") + .with_result_type_path("crate::compaction::CompactionResult") + .with_error_type_path("crate::compaction::CompactionError") + .generate_rpc_name_impls() + .run() + .unwrap(); + // Control plane. let mut prost_config = prost_build::Config::default(); prost_config.file_descriptor_set_path("src/codegen/quickwit/control_plane_descriptor.bin"); diff --git a/quickwit/quickwit-proto/protos/quickwit/compaction.proto b/quickwit/quickwit-proto/protos/quickwit/compaction.proto new file mode 100644 index 00000000000..82979c3b4ee --- /dev/null +++ b/quickwit/quickwit-proto/protos/quickwit/compaction.proto @@ -0,0 +1,24 @@ +// Copyright 2021-Present Datadog, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package quickwit.compaction; + +service CompactionService { + rpc Ping(PingRequest) returns (PingResponse); +} + +message PingRequest {} +message PingResponse {} \ No newline at end of file diff --git a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.compaction.rs b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.compaction.rs new file mode 100644 index 00000000000..eec6be161ca --- /dev/null +++ b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.compaction.rs @@ -0,0 +1,797 @@ +// This file is @generated by prost-build. +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] +#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] +pub struct PingRequest {} +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] +#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] +pub struct PingResponse {} +/// BEGIN quickwit-codegen +#[allow(unused_imports)] +use std::str::FromStr; +use tower::{Layer, Service, ServiceExt}; +use quickwit_common::tower::RpcName; +impl RpcName for PingRequest { + fn rpc_name() -> &'static str { + "ping" + } +} +#[cfg_attr(any(test, feature = "testsuite"), mockall::automock)] +#[async_trait::async_trait] +pub trait CompactionService: std::fmt::Debug + Send + Sync + 'static { + async fn ping( + &self, + request: PingRequest, + ) -> crate::compaction::CompactionResult; +} +#[derive(Debug, Clone)] +pub struct CompactionServiceClient { + inner: InnerCompactionServiceClient, +} +#[derive(Debug, Clone)] +struct InnerCompactionServiceClient(std::sync::Arc); +impl CompactionServiceClient { + pub fn new(instance: T) -> Self + where + T: CompactionService, + { + #[cfg(any(test, feature = "testsuite"))] + assert!( + std::any::TypeId::of:: < T > () != std::any::TypeId::of:: < + MockCompactionService > (), + "`MockCompactionService` must be wrapped in a `MockCompactionServiceWrapper`: use `CompactionServiceClient::from_mock(mock)` to instantiate the client" + ); + Self { + inner: InnerCompactionServiceClient(std::sync::Arc::new(instance)), + } + } + pub fn as_grpc_service( + &self, + max_message_size: bytesize::ByteSize, + ) -> compaction_service_grpc_server::CompactionServiceGrpcServer< + CompactionServiceGrpcServerAdapter, + > { + let adapter = CompactionServiceGrpcServerAdapter::new(self.clone()); + compaction_service_grpc_server::CompactionServiceGrpcServer::new(adapter) + .accept_compressed(tonic::codec::CompressionEncoding::Gzip) + .accept_compressed(tonic::codec::CompressionEncoding::Zstd) + .send_compressed(tonic::codec::CompressionEncoding::Gzip) + .send_compressed(tonic::codec::CompressionEncoding::Zstd) + .max_decoding_message_size(max_message_size.0 as usize) + .max_encoding_message_size(max_message_size.0 as usize) + } + pub fn from_channel( + addr: std::net::SocketAddr, + channel: tonic::transport::Channel, + max_message_size: bytesize::ByteSize, + compression_encoding_opt: Option, + ) -> Self { + let (_, connection_keys_watcher) = tokio::sync::watch::channel( + std::collections::HashSet::from_iter([addr]), + ); + let mut client = compaction_service_grpc_client::CompactionServiceGrpcClient::new( + channel, + ) + .max_decoding_message_size(max_message_size.0 as usize) + .max_encoding_message_size(max_message_size.0 as usize); + if let Some(compression_encoding) = compression_encoding_opt { + client = client + .accept_compressed(compression_encoding) + .send_compressed(compression_encoding); + } + let adapter = CompactionServiceGrpcClientAdapter::new( + client, + connection_keys_watcher, + ); + Self::new(adapter) + } + pub fn from_balance_channel( + balance_channel: quickwit_common::tower::BalanceChannel, + max_message_size: bytesize::ByteSize, + compression_encoding_opt: Option, + ) -> CompactionServiceClient { + let connection_keys_watcher = balance_channel.connection_keys_watcher(); + let mut client = compaction_service_grpc_client::CompactionServiceGrpcClient::new( + balance_channel, + ) + .max_decoding_message_size(max_message_size.0 as usize) + .max_encoding_message_size(max_message_size.0 as usize); + if let Some(compression_encoding) = compression_encoding_opt { + client = client + .accept_compressed(compression_encoding) + .send_compressed(compression_encoding); + } + let adapter = CompactionServiceGrpcClientAdapter::new( + client, + connection_keys_watcher, + ); + Self::new(adapter) + } + pub fn from_mailbox(mailbox: quickwit_actors::Mailbox) -> Self + where + A: quickwit_actors::Actor + std::fmt::Debug + Send + 'static, + CompactionServiceMailbox: CompactionService, + { + CompactionServiceClient::new(CompactionServiceMailbox::new(mailbox)) + } + pub fn tower() -> CompactionServiceTowerLayerStack { + CompactionServiceTowerLayerStack::default() + } + #[cfg(any(test, feature = "testsuite"))] + pub fn from_mock(mock: MockCompactionService) -> Self { + let mock_wrapper = mock_compaction_service::MockCompactionServiceWrapper { + inner: tokio::sync::Mutex::new(mock), + }; + Self::new(mock_wrapper) + } + #[cfg(any(test, feature = "testsuite"))] + pub fn mocked() -> Self { + Self::from_mock(MockCompactionService::new()) + } +} +#[async_trait::async_trait] +impl CompactionService for CompactionServiceClient { + async fn ping( + &self, + request: PingRequest, + ) -> crate::compaction::CompactionResult { + self.inner.0.ping(request).await + } +} +#[cfg(any(test, feature = "testsuite"))] +pub mod mock_compaction_service { + use super::*; + #[derive(Debug)] + pub struct MockCompactionServiceWrapper { + pub(super) inner: tokio::sync::Mutex, + } + #[async_trait::async_trait] + impl CompactionService for MockCompactionServiceWrapper { + async fn ping( + &self, + request: super::PingRequest, + ) -> crate::compaction::CompactionResult { + self.inner.lock().await.ping(request).await + } + } +} +pub type BoxFuture = std::pin::Pin< + Box> + Send + 'static>, +>; +impl tower::Service for InnerCompactionServiceClient { + type Response = PingResponse; + type Error = crate::compaction::CompactionError; + type Future = BoxFuture; + fn poll_ready( + &mut self, + _cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + std::task::Poll::Ready(Ok(())) + } + fn call(&mut self, request: PingRequest) -> Self::Future { + let svc = self.clone(); + let fut = async move { svc.0.ping(request).await }; + Box::pin(fut) + } +} +/// A tower service stack is a set of tower services. +#[derive(Debug)] +struct CompactionServiceTowerServiceStack { + #[allow(dead_code)] + inner: InnerCompactionServiceClient, + ping_svc: quickwit_common::tower::BoxService< + PingRequest, + PingResponse, + crate::compaction::CompactionError, + >, +} +#[async_trait::async_trait] +impl CompactionService for CompactionServiceTowerServiceStack { + async fn ping( + &self, + request: PingRequest, + ) -> crate::compaction::CompactionResult { + self.ping_svc.clone().ready().await?.call(request).await + } +} +type PingLayer = quickwit_common::tower::BoxLayer< + quickwit_common::tower::BoxService< + PingRequest, + PingResponse, + crate::compaction::CompactionError, + >, + PingRequest, + PingResponse, + crate::compaction::CompactionError, +>; +#[derive(Debug, Default)] +pub struct CompactionServiceTowerLayerStack { + ping_layers: Vec, +} +impl CompactionServiceTowerLayerStack { + pub fn stack_layer(mut self, layer: L) -> Self + where + L: tower::Layer< + quickwit_common::tower::BoxService< + PingRequest, + PingResponse, + crate::compaction::CompactionError, + >, + > + Clone + Send + Sync + 'static, + , + >>::Service: tower::Service< + PingRequest, + Response = PingResponse, + Error = crate::compaction::CompactionError, + > + Clone + Send + Sync + 'static, + <, + >>::Service as tower::Service>::Future: Send + 'static, + { + self.ping_layers.push(quickwit_common::tower::BoxLayer::new(layer.clone())); + self + } + pub fn stack_ping_layer(mut self, layer: L) -> Self + where + L: tower::Layer< + quickwit_common::tower::BoxService< + PingRequest, + PingResponse, + crate::compaction::CompactionError, + >, + > + Send + Sync + 'static, + L::Service: tower::Service< + PingRequest, + Response = PingResponse, + Error = crate::compaction::CompactionError, + > + Clone + Send + Sync + 'static, + >::Future: Send + 'static, + { + self.ping_layers.push(quickwit_common::tower::BoxLayer::new(layer)); + self + } + pub fn build(self, instance: T) -> CompactionServiceClient + where + T: CompactionService, + { + let inner_client = InnerCompactionServiceClient(std::sync::Arc::new(instance)); + self.build_from_inner_client(inner_client) + } + pub fn build_from_channel( + self, + addr: std::net::SocketAddr, + channel: tonic::transport::Channel, + max_message_size: bytesize::ByteSize, + compression_encoding_opt: Option, + ) -> CompactionServiceClient { + let client = CompactionServiceClient::from_channel( + addr, + channel, + max_message_size, + compression_encoding_opt, + ); + let inner_client = client.inner; + self.build_from_inner_client(inner_client) + } + pub fn build_from_balance_channel( + self, + balance_channel: quickwit_common::tower::BalanceChannel, + max_message_size: bytesize::ByteSize, + compression_encoding_opt: Option, + ) -> CompactionServiceClient { + let client = CompactionServiceClient::from_balance_channel( + balance_channel, + max_message_size, + compression_encoding_opt, + ); + let inner_client = client.inner; + self.build_from_inner_client(inner_client) + } + pub fn build_from_mailbox( + self, + mailbox: quickwit_actors::Mailbox, + ) -> CompactionServiceClient + where + A: quickwit_actors::Actor + std::fmt::Debug + Send + 'static, + CompactionServiceMailbox: CompactionService, + { + let inner_client = InnerCompactionServiceClient( + std::sync::Arc::new(CompactionServiceMailbox::new(mailbox)), + ); + self.build_from_inner_client(inner_client) + } + #[cfg(any(test, feature = "testsuite"))] + pub fn build_from_mock( + self, + mock: MockCompactionService, + ) -> CompactionServiceClient { + let client = CompactionServiceClient::from_mock(mock); + let inner_client = client.inner; + self.build_from_inner_client(inner_client) + } + fn build_from_inner_client( + self, + inner_client: InnerCompactionServiceClient, + ) -> CompactionServiceClient { + let ping_svc = self + .ping_layers + .into_iter() + .rev() + .fold( + quickwit_common::tower::BoxService::new(inner_client.clone()), + |svc, layer| layer.layer(svc), + ); + let tower_svc_stack = CompactionServiceTowerServiceStack { + inner: inner_client, + ping_svc, + }; + CompactionServiceClient::new(tower_svc_stack) + } +} +#[derive(Debug, Clone)] +struct MailboxAdapter { + inner: quickwit_actors::Mailbox, + phantom: std::marker::PhantomData, +} +impl std::ops::Deref for MailboxAdapter +where + A: quickwit_actors::Actor, +{ + type Target = quickwit_actors::Mailbox; + fn deref(&self) -> &Self::Target { + &self.inner + } +} +#[derive(Debug)] +pub struct CompactionServiceMailbox { + inner: MailboxAdapter, +} +impl CompactionServiceMailbox { + pub fn new(instance: quickwit_actors::Mailbox) -> Self { + let inner = MailboxAdapter { + inner: instance, + phantom: std::marker::PhantomData, + }; + Self { inner } + } +} +impl Clone for CompactionServiceMailbox { + fn clone(&self) -> Self { + let inner = MailboxAdapter { + inner: self.inner.clone(), + phantom: std::marker::PhantomData, + }; + Self { inner } + } +} +impl tower::Service for CompactionServiceMailbox +where + A: quickwit_actors::Actor + + quickwit_actors::DeferableReplyHandler> + Send + + 'static, + M: std::fmt::Debug + Send + 'static, + T: Send + 'static, + E: std::fmt::Debug + Send + 'static, + crate::compaction::CompactionError: From>, +{ + type Response = T; + type Error = crate::compaction::CompactionError; + type Future = BoxFuture; + fn poll_ready( + &mut self, + _cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + //! This does not work with balance middlewares such as `tower::balance::pool::Pool` because + //! this always returns `Poll::Ready`. The fix is to acquire a permit from the + //! mailbox in `poll_ready` and consume it in `call`. + std::task::Poll::Ready(Ok(())) + } + fn call(&mut self, message: M) -> Self::Future { + let mailbox = self.inner.clone(); + let fut = async move { + mailbox.ask_for_res(message).await.map_err(|error| error.into()) + }; + Box::pin(fut) + } +} +#[async_trait::async_trait] +impl CompactionService for CompactionServiceMailbox +where + A: quickwit_actors::Actor + std::fmt::Debug, + CompactionServiceMailbox< + A, + >: tower::Service< + PingRequest, + Response = PingResponse, + Error = crate::compaction::CompactionError, + Future = BoxFuture, + >, +{ + async fn ping( + &self, + request: PingRequest, + ) -> crate::compaction::CompactionResult { + self.clone().call(request).await + } +} +#[derive(Debug, Clone)] +pub struct CompactionServiceGrpcClientAdapter { + inner: T, + #[allow(dead_code)] + connection_addrs_rx: tokio::sync::watch::Receiver< + std::collections::HashSet, + >, +} +impl CompactionServiceGrpcClientAdapter { + pub fn new( + instance: T, + connection_addrs_rx: tokio::sync::watch::Receiver< + std::collections::HashSet, + >, + ) -> Self { + Self { + inner: instance, + connection_addrs_rx, + } + } +} +#[async_trait::async_trait] +impl CompactionService +for CompactionServiceGrpcClientAdapter< + compaction_service_grpc_client::CompactionServiceGrpcClient, +> +where + T: tonic::client::GrpcService + std::fmt::Debug + Clone + Send + + Sync + 'static, + T::ResponseBody: tonic::codegen::Body + Send + 'static, + ::Error: Into + + Send, + T::Future: Send, +{ + async fn ping( + &self, + request: PingRequest, + ) -> crate::compaction::CompactionResult { + self.inner + .clone() + .ping(request) + .await + .map(|response| response.into_inner()) + .map_err(|status| crate::error::grpc_status_to_service_error( + status, + PingRequest::rpc_name(), + )) + } +} +#[derive(Debug)] +pub struct CompactionServiceGrpcServerAdapter { + inner: InnerCompactionServiceClient, +} +impl CompactionServiceGrpcServerAdapter { + pub fn new(instance: T) -> Self + where + T: CompactionService, + { + Self { + inner: InnerCompactionServiceClient(std::sync::Arc::new(instance)), + } + } +} +#[async_trait::async_trait] +impl compaction_service_grpc_server::CompactionServiceGrpc +for CompactionServiceGrpcServerAdapter { + async fn ping( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + self.inner + .0 + .ping(request.into_inner()) + .await + .map(tonic::Response::new) + .map_err(crate::error::grpc_error_to_grpc_status) + } +} +/// Generated client implementations. +pub mod compaction_service_grpc_client { + #![allow( + unused_variables, + dead_code, + missing_docs, + clippy::wildcard_imports, + clippy::let_unit_value, + )] + use tonic::codegen::*; + use tonic::codegen::http::Uri; + #[derive(Debug, Clone)] + pub struct CompactionServiceGrpcClient { + inner: tonic::client::Grpc, + } + impl CompactionServiceGrpcClient { + /// Attempt to create a new client by connecting to a given endpoint. + pub async fn connect(dst: D) -> Result + where + D: TryInto, + D::Error: Into, + { + let conn = tonic::transport::Endpoint::new(dst)?.connect().await?; + Ok(Self::new(conn)) + } + } + impl CompactionServiceGrpcClient + where + T: tonic::client::GrpcService, + T::Error: Into, + T::ResponseBody: Body + std::marker::Send + 'static, + ::Error: Into + std::marker::Send, + { + pub fn new(inner: T) -> Self { + let inner = tonic::client::Grpc::new(inner); + Self { inner } + } + pub fn with_origin(inner: T, origin: Uri) -> Self { + let inner = tonic::client::Grpc::with_origin(inner, origin); + Self { inner } + } + pub fn with_interceptor( + inner: T, + interceptor: F, + ) -> CompactionServiceGrpcClient> + where + F: tonic::service::Interceptor, + T::ResponseBody: Default, + T: tonic::codegen::Service< + http::Request, + Response = http::Response< + >::ResponseBody, + >, + >, + , + >>::Error: Into + std::marker::Send + std::marker::Sync, + { + CompactionServiceGrpcClient::new(InterceptedService::new(inner, interceptor)) + } + /// Compress requests with the given encoding. + /// + /// This requires the server to support it otherwise it might respond with an + /// error. + #[must_use] + pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.inner = self.inner.send_compressed(encoding); + self + } + /// Enable decompressing responses. + #[must_use] + pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.inner = self.inner.accept_compressed(encoding); + self + } + /// Limits the maximum size of a decoded message. + /// + /// Default: `4MB` + #[must_use] + pub fn max_decoding_message_size(mut self, limit: usize) -> Self { + self.inner = self.inner.max_decoding_message_size(limit); + self + } + /// Limits the maximum size of an encoded message. + /// + /// Default: `usize::MAX` + #[must_use] + pub fn max_encoding_message_size(mut self, limit: usize) -> Self { + self.inner = self.inner.max_encoding_message_size(limit); + self + } + pub async fn ping( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result, tonic::Status> { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::unknown( + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic_prost::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/quickwit.compaction.CompactionService/Ping", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert( + GrpcMethod::new("quickwit.compaction.CompactionService", "Ping"), + ); + self.inner.unary(req, path, codec).await + } + } +} +/// Generated server implementations. +pub mod compaction_service_grpc_server { + #![allow( + unused_variables, + dead_code, + missing_docs, + clippy::wildcard_imports, + clippy::let_unit_value, + )] + use tonic::codegen::*; + /// Generated trait containing gRPC methods that should be implemented for use with CompactionServiceGrpcServer. + #[async_trait] + pub trait CompactionServiceGrpc: std::marker::Send + std::marker::Sync + 'static { + async fn ping( + &self, + request: tonic::Request, + ) -> std::result::Result, tonic::Status>; + } + #[derive(Debug)] + pub struct CompactionServiceGrpcServer { + inner: Arc, + accept_compression_encodings: EnabledCompressionEncodings, + send_compression_encodings: EnabledCompressionEncodings, + max_decoding_message_size: Option, + max_encoding_message_size: Option, + } + impl CompactionServiceGrpcServer { + pub fn new(inner: T) -> Self { + Self::from_arc(Arc::new(inner)) + } + pub fn from_arc(inner: Arc) -> Self { + Self { + inner, + accept_compression_encodings: Default::default(), + send_compression_encodings: Default::default(), + max_decoding_message_size: None, + max_encoding_message_size: None, + } + } + pub fn with_interceptor( + inner: T, + interceptor: F, + ) -> InterceptedService + where + F: tonic::service::Interceptor, + { + InterceptedService::new(Self::new(inner), interceptor) + } + /// Enable decompressing requests with the given encoding. + #[must_use] + pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.accept_compression_encodings.enable(encoding); + self + } + /// Compress responses with the given encoding, if the client supports it. + #[must_use] + pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.send_compression_encodings.enable(encoding); + self + } + /// Limits the maximum size of a decoded message. + /// + /// Default: `4MB` + #[must_use] + pub fn max_decoding_message_size(mut self, limit: usize) -> Self { + self.max_decoding_message_size = Some(limit); + self + } + /// Limits the maximum size of an encoded message. + /// + /// Default: `usize::MAX` + #[must_use] + pub fn max_encoding_message_size(mut self, limit: usize) -> Self { + self.max_encoding_message_size = Some(limit); + self + } + } + impl tonic::codegen::Service> + for CompactionServiceGrpcServer + where + T: CompactionServiceGrpc, + B: Body + std::marker::Send + 'static, + B::Error: Into + std::marker::Send + 'static, + { + type Response = http::Response; + type Error = std::convert::Infallible; + type Future = BoxFuture; + fn poll_ready( + &mut self, + _cx: &mut Context<'_>, + ) -> Poll> { + Poll::Ready(Ok(())) + } + fn call(&mut self, req: http::Request) -> Self::Future { + match req.uri().path() { + "/quickwit.compaction.CompactionService/Ping" => { + #[allow(non_camel_case_types)] + struct PingSvc(pub Arc); + impl< + T: CompactionServiceGrpc, + > tonic::server::UnaryService for PingSvc { + type Response = super::PingResponse; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::ping(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = PingSvc(inner); + let codec = tonic_prost::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + _ => { + Box::pin(async move { + let mut response = http::Response::new( + tonic::body::Body::default(), + ); + let headers = response.headers_mut(); + headers + .insert( + tonic::Status::GRPC_STATUS, + (tonic::Code::Unimplemented as i32).into(), + ); + headers + .insert( + http::header::CONTENT_TYPE, + tonic::metadata::GRPC_CONTENT_TYPE, + ); + Ok(response) + }) + } + } + } + } + impl Clone for CompactionServiceGrpcServer { + fn clone(&self) -> Self { + let inner = self.inner.clone(); + Self { + inner, + accept_compression_encodings: self.accept_compression_encodings, + send_compression_encodings: self.send_compression_encodings, + max_decoding_message_size: self.max_decoding_message_size, + max_encoding_message_size: self.max_encoding_message_size, + } + } + } + /// Generated gRPC service name + pub const SERVICE_NAME: &str = "quickwit.compaction.CompactionService"; + impl tonic::server::NamedService for CompactionServiceGrpcServer { + const NAME: &'static str = SERVICE_NAME; + } +} diff --git a/quickwit/quickwit-proto/src/compaction/mod.rs b/quickwit/quickwit-proto/src/compaction/mod.rs new file mode 100644 index 00000000000..1e27855add3 --- /dev/null +++ b/quickwit/quickwit-proto/src/compaction/mod.rs @@ -0,0 +1,76 @@ +// Copyright 2021-Present Datadog, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use quickwit_common::rate_limited_error; +use quickwit_common::tower::MakeLoadShedError; +use serde::{Deserialize, Serialize}; + +use crate::GrpcServiceError; +use crate::error::{ServiceError, ServiceErrorCode}; + +include!("../codegen/quickwit/quickwit.compaction.rs"); + +pub const COMPACTION_FILE_DESCRIPTOR_SET: &[u8] = + include_bytes!("../codegen/quickwit/compaction_descriptor.bin"); + +pub type CompactionResult = std::result::Result; + +#[derive(Debug, thiserror::Error, Eq, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum CompactionError { + #[error("internal error: {0}")] + Internal(String), + #[error("request timed out: {0}")] + Timeout(String), + #[error("too many requests")] + TooManyRequests, + #[error("service unavailable: {0}")] + Unavailable(String), +} + +impl ServiceError for CompactionError { + fn error_code(&self) -> ServiceErrorCode { + match self { + Self::Internal(err_msg) => { + rate_limited_error!(limit_per_min = 6, "compaction error: {err_msg}"); + ServiceErrorCode::Internal + } + Self::Timeout(_) => ServiceErrorCode::Timeout, + Self::TooManyRequests => ServiceErrorCode::TooManyRequests, + Self::Unavailable(_) => ServiceErrorCode::Unavailable, + } + } +} + +// Required by the codegen tower layers. All four constructors are mandatory. +impl GrpcServiceError for CompactionError { + fn new_internal(message: String) -> Self { + Self::Internal(message) + } + fn new_timeout(message: String) -> Self { + Self::Timeout(message) + } + fn new_too_many_requests() -> Self { + Self::TooManyRequests + } + fn new_unavailable(message: String) -> Self { + Self::Unavailable(message) + } +} + +impl MakeLoadShedError for CompactionError { + fn make_load_shed_error() -> Self { + CompactionError::TooManyRequests + } +} diff --git a/quickwit/quickwit-proto/src/lib.rs b/quickwit/quickwit-proto/src/lib.rs index dbe850b55b7..da39dc11711 100644 --- a/quickwit/quickwit-proto/src/lib.rs +++ b/quickwit/quickwit-proto/src/lib.rs @@ -27,6 +27,7 @@ use tracing::Span; use tracing_opentelemetry::OpenTelemetrySpanExt; pub mod cluster; +pub mod compaction; pub mod control_plane; pub use bytes; pub use tonic; diff --git a/quickwit/quickwit-serve/Cargo.toml b/quickwit/quickwit-serve/Cargo.toml index 2721aa719f3..7b5aa303f26 100644 --- a/quickwit/quickwit-serve/Cargo.toml +++ b/quickwit/quickwit-serve/Cargo.toml @@ -65,6 +65,7 @@ quickwit-control-plane = { workspace = true } quickwit-doc-mapper = { workspace = true } quickwit-index-management = { workspace = true } quickwit-indexing = { workspace = true } +quickwit-compaction = { workspace = true } quickwit-ingest = { workspace = true } quickwit-jaeger = { workspace = true } quickwit-janitor = { workspace = true } diff --git a/quickwit/quickwit-serve/src/grpc.rs b/quickwit/quickwit-serve/src/grpc.rs index 698c9e07d71..382b026a6a8 100644 --- a/quickwit/quickwit-serve/src/grpc.rs +++ b/quickwit/quickwit-serve/src/grpc.rs @@ -141,6 +141,21 @@ pub(crate) async fn start_grpc_server( None }; + // Mount gRPC compaction service if this node is a janitor with the compaction service enabled. + let compaction_grpc_service = if services + .node_config + .is_service_enabled(QuickwitService::Janitor) + { + if let Some(compaction_service) = &services.compaction_service_client_opt { + enabled_grpc_services.insert("compaction"); + file_descriptor_sets.push(quickwit_proto::compaction::COMPACTION_FILE_DESCRIPTOR_SET); + Some(compaction_service.as_grpc_service(grpc_config.max_message_size)) + } else { + None + } + } else { + None + }; // Mount gRPC control plane service if `QuickwitService::ControlPlane` is enabled on node. let control_plane_grpc_service = if services .node_config @@ -238,6 +253,7 @@ pub(crate) async fn start_grpc_server( .add_service(developer_grpc_service) .add_service(health_service) .add_service(reflection_service) + .add_optional_service(compaction_grpc_service) .add_optional_service(control_plane_grpc_service) .add_optional_service(indexing_grpc_service) .add_optional_service(ingest_api_grpc_service) diff --git a/quickwit/quickwit-serve/src/lib.rs b/quickwit/quickwit-serve/src/lib.rs index 4d674d9cb7b..e288161393e 100644 --- a/quickwit/quickwit-serve/src/lib.rs +++ b/quickwit/quickwit-serve/src/lib.rs @@ -72,6 +72,7 @@ use quickwit_common::tower::{ }; use quickwit_common::uri::Uri; use quickwit_common::{get_bool_from_env, spawn_named_task}; +use quickwit_compaction::planner::StubCompactionService; use quickwit_config::service::QuickwitService; use quickwit_config::{ClusterConfig, IngestApiConfig, NodeConfig}; use quickwit_control_plane::control_plane::{ControlPlane, ControlPlaneEventSubscriber}; @@ -93,6 +94,7 @@ use quickwit_metastore::{ ControlPlaneMetastore, ListIndexesMetadataResponseExt, MetastoreResolver, }; use quickwit_opentelemetry::otlp::{OtlpGrpcLogsService, OtlpGrpcTracesService}; +use quickwit_proto::compaction::CompactionServiceClient; use quickwit_proto::control_plane::ControlPlaneServiceClient; use quickwit_proto::indexing::{IndexingServiceClient, ShardPositionsUpdate}; use quickwit_proto::ingest::ingester::{ @@ -137,9 +139,16 @@ const READINESS_REPORTING_INTERVAL: Duration = if cfg!(any(test, feature = "test Duration::from_secs(10) }; +const COMPACTION_SERVICE_DISCOVERY_TIMEOUT: Duration = if cfg!(any(test, feature = "testsuite")) { + Duration::from_millis(100) +} else { + Duration::from_secs(300) +}; + const METASTORE_CLIENT_MAX_CONCURRENCY_ENV_KEY: &str = "QW_METASTORE_CLIENT_MAX_CONCURRENCY"; const DEFAULT_METASTORE_CLIENT_MAX_CONCURRENCY: usize = 6; const DISABLE_DELETE_TASK_SERVICE_ENV_KEY: &str = "QW_DISABLE_DELETE_TASK_SERVICE"; +const ENABLE_COMPACTION_SERVICE_ENV_KEY: &str = "QW_ENABLE_COMPACTION_SERVICE"; pub type EnvFilterReloadFn = Arc anyhow::Result<()> + Send + Sync>; @@ -195,6 +204,7 @@ struct QuickwitServices { pub ingest_router_service: IngestRouterServiceClient, ingester_opt: Option, + pub compaction_service_client_opt: Option, pub janitor_service_opt: Option>, pub jaeger_service_opt: Option, pub otlp_logs_service_opt: Option, @@ -261,6 +271,56 @@ async fn balance_channel_for_service( BalanceChannel::from_stream(service_change_stream) } +/// Builds a `CompactionServiceClient` if the compaction service is available. +/// +/// On janitor nodes with `QW_ENABLE_COMPACTION_SERVICE=true`, wraps a local stub. +/// On non-janitor nodes with the flag set, waits up to 10s for a remote janitor +/// exposing the gRPC endpoint and logs an error if none is found. +async fn get_compaction_service_client_if_needed( + node_config: &NodeConfig, + cluster: &Cluster, +) -> anyhow::Result, anyhow::Error> { + if !get_bool_from_env(ENABLE_COMPACTION_SERVICE_ENV_KEY, false) { + return Ok(None); + } + // Only janitor nodes (which host the planner) and indexer nodes (which need + // to know whether to spawn local merge pipelines) care about this service. + if !node_config.is_service_enabled(QuickwitService::Indexer) { + return Ok(None); + } + if node_config.is_service_enabled(QuickwitService::Janitor) + && node_config.is_service_enabled(QuickwitService::Indexer) + { + info!("compaction service enabled on this node"); + return Ok(Some(CompactionServiceClient::new(StubCompactionService))); + } + let balance_channel = balance_channel_for_service(cluster, QuickwitService::Janitor).await; + let found = balance_channel + .wait_for(COMPACTION_SERVICE_DISCOVERY_TIMEOUT, |connections| { + !connections.is_empty() + }) + .await; + if !found { + bail!("compaction service is enabled but no janitor node was found in the cluster") + } + info!("remote compaction service detected on janitor node"); + Ok(Some(CompactionServiceClient::from_balance_channel( + balance_channel, + node_config.grpc_config.max_message_size, + None, + ))) +} + +fn spawn_merge_scheduler_service( + universe: &Universe, + node_config: &NodeConfig, +) -> Mailbox { + let (mailbox, _) = universe.spawn_builder().spawn(MergeSchedulerService::new( + node_config.indexer_config.merge_concurrency.get(), + )); + mailbox +} + async fn start_ingest_client_if_needed( node_config: &NodeConfig, universe: &Universe, @@ -539,10 +599,17 @@ pub async fn serve_quickwit( .await .context("failed to start ingest v1 service")?; + let compaction_service_client_opt = + get_compaction_service_client_if_needed(&node_config, &cluster) + .await + .context("failed to initialize compaction service client")?; + let indexing_service_opt = if node_config.is_service_enabled(QuickwitService::Indexer) { - let (merge_scheduler_mailbox, _) = universe.spawn_builder().spawn( - MergeSchedulerService::new(node_config.indexer_config.merge_concurrency.get()), - ); + let merge_scheduler_mailbox_opt = if compaction_service_client_opt.is_none() { + Some(spawn_merge_scheduler_service(&universe, &node_config)) + } else { + None + }; let indexing_service = start_indexing_service( &universe, &node_config, @@ -552,7 +619,7 @@ pub async fn serve_quickwit( ingester_pool.clone(), storage_resolver.clone(), event_broker.clone(), - Some(merge_scheduler_mailbox), + merge_scheduler_mailbox_opt, ) .await .context("failed to start indexing service")?; @@ -766,6 +833,7 @@ pub async fn serve_quickwit( ingest_router_service, ingest_service, ingester_opt: ingester_opt.clone(), + compaction_service_client_opt, janitor_service_opt, jaeger_service_opt, otlp_logs_service_opt, @@ -1814,4 +1882,48 @@ mod tests { assert!(ingester_pool.is_empty()); } + + #[tokio::test] + async fn test_compaction_service_on_janitor_node() { + let transport = ChannelTransport::default(); + let cluster = + create_cluster_for_test(Vec::new(), &["janitor", "indexer"], &transport, true) + .await + .unwrap(); + + // Without the env var, no compaction service. + let mut node_config = NodeConfig::for_test(); + node_config.enabled_services = + HashSet::from([QuickwitService::Janitor, QuickwitService::Indexer]); + let result = get_compaction_service_client_if_needed(&node_config, &cluster) + .await + .unwrap(); + assert!(result.is_none()); + + // With the env var, compaction service client is returned. + unsafe { std::env::set_var(ENABLE_COMPACTION_SERVICE_ENV_KEY, "true") }; + let result = get_compaction_service_client_if_needed(&node_config, &cluster) + .await + .unwrap(); + assert!(result.is_some()); + + unsafe { std::env::remove_var(ENABLE_COMPACTION_SERVICE_ENV_KEY) }; + } + + #[tokio::test] + async fn test_compaction_service_returns_error_when_no_janitor() { + let transport = ChannelTransport::default(); + let cluster = create_cluster_for_test(Vec::new(), &["indexer"], &transport, false) + .await + .unwrap(); + + unsafe { std::env::set_var(ENABLE_COMPACTION_SERVICE_ENV_KEY, "true") }; + + let mut node_config = NodeConfig::for_test(); + node_config.enabled_services = HashSet::from([QuickwitService::Indexer]); + let result = get_compaction_service_client_if_needed(&node_config, &cluster).await; + assert!(result.is_err()); + + unsafe { std::env::remove_var(ENABLE_COMPACTION_SERVICE_ENV_KEY) }; + } } diff --git a/quickwit/quickwit-serve/src/rest.rs b/quickwit/quickwit-serve/src/rest.rs index 3f193783b04..c52d790167d 100644 --- a/quickwit/quickwit-serve/src/rest.rs +++ b/quickwit/quickwit-serve/src/rest.rs @@ -843,6 +843,7 @@ mod tests { _report_splits_subscription_handle_opt: None, _local_shards_update_listener_handle_opt: None, cluster, + compaction_service_client_opt: None, control_plane_server_opt: None, control_plane_client, indexing_service_opt: None, From 19300bc11cbb06aa40cf48cc65edd3ad25459bce Mon Sep 17 00:00:00 2001 From: nadav-govari Date: Mon, 13 Apr 2026 13:37:22 -0400 Subject: [PATCH 03/21] Scaffold Compactor supervisor and pipeline (#6282) --- quickwit/Cargo.lock | 9 + quickwit/quickwit-compaction/Cargo.toml | 11 +- .../src/compaction_pipeline.rs | 186 +++++++++++++++ .../src/compactor_supervisor.rs | 224 ++++++++++++++++++ quickwit/quickwit-compaction/src/lib.rs | 6 + 5 files changed, 434 insertions(+), 2 deletions(-) create mode 100644 quickwit/quickwit-compaction/src/compaction_pipeline.rs create mode 100644 quickwit/quickwit-compaction/src/compactor_supervisor.rs diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index 544b882d332..f97d39a633b 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -7148,8 +7148,17 @@ dependencies = [ name = "quickwit-compaction" version = "0.8.0" dependencies = [ + "anyhow", "async-trait", + "quickwit-actors", + "quickwit-common", + "quickwit-indexing", + "quickwit-metastore", "quickwit-proto", + "quickwit-storage", + "serde", + "tokio", + "tracing", ] [[package]] diff --git a/quickwit/quickwit-compaction/Cargo.toml b/quickwit/quickwit-compaction/Cargo.toml index 07925537dce..9efa0e2ce94 100644 --- a/quickwit/quickwit-compaction/Cargo.toml +++ b/quickwit/quickwit-compaction/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "quickwit-compaction" -description = "Merge planner and merge worker services for split compaction" +description = "Compactor implementation and CompactionService" version.workspace = true edition.workspace = true @@ -12,4 +12,11 @@ license.workspace = true [dependencies] async-trait = { workspace = true } -quickwit-proto = { workspace = true } \ No newline at end of file +quickwit-actors = { workspace = true } +quickwit-common = { workspace = true } +quickwit-indexing = { workspace = true } +quickwit-proto = { workspace = true } +quickwit-storage = { workspace = true } +serde = { workspace = true } +tracing = { workspace = true } +tokio = { workspace = true } \ No newline at end of file diff --git a/quickwit/quickwit-compaction/src/compaction_pipeline.rs b/quickwit/quickwit-compaction/src/compaction_pipeline.rs new file mode 100644 index 00000000000..926a5a5eb93 --- /dev/null +++ b/quickwit/quickwit-compaction/src/compaction_pipeline.rs @@ -0,0 +1,186 @@ +// Copyright 2021-Present Datadog, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use quickwit_actors::{ActorHandle, Health, Supervisable}; +use quickwit_common::KillSwitch; +use quickwit_common::temp_dir::TempDirectory; +use quickwit_indexing::actors::{ + MergeExecutor, MergeSplitDownloader, Packager, Publisher, Uploader, +}; +use tracing::{debug, error}; + +pub struct CompactionPipelineHandles { + pub merge_split_downloader: ActorHandle, + pub merge_executor: ActorHandle, + pub merge_packager: ActorHandle, + pub merge_uploader: ActorHandle, + pub merge_publisher: ActorHandle, +} + +/// A single-use merge execution pipeline. Processes one merge task and +/// terminates. +/// +/// Owned by the `CompactorSupervisor`, which periodically calls +/// `check_actor_health()` and acts on the result (retry, reap, etc.). +pub struct CompactionPipeline { + pub task_id: String, + pub split_ids: Vec, + pub retry_count: usize, + pub kill_switch: KillSwitch, + pub scratch_directory: TempDirectory, + pub handles: Option, +} + +impl CompactionPipeline { + pub fn new(task_id: String, split_ids: Vec, scratch_directory: TempDirectory) -> Self { + CompactionPipeline { + task_id, + split_ids, + retry_count: 0, + kill_switch: KillSwitch::default(), + scratch_directory, + handles: None, + } + } + + fn supervisables(&self) -> Vec<&dyn Supervisable> { + let Some(handles) = &self.handles else { + return Vec::new(); + }; + vec![ + &handles.merge_split_downloader, + &handles.merge_executor, + &handles.merge_packager, + &handles.merge_uploader, + &handles.merge_publisher, + ] + } + + /// Checks child actor health. + /// + /// `check_for_progress` controls whether stall detection is performed + /// (actors that are alive but haven't recorded progress since last check). + /// The supervisor controls the cadence of progress checks. + /// + /// Returns: + /// - `Success` when all actors have completed (merge published). + /// - `FailureOrUnhealthy` when any actor has died or stalled. + /// - `Healthy` when actors are running and making progress. + pub fn check_actor_health(&self) -> Health { + if self.handles.is_none() { + return Health::Healthy; + } + + let mut healthy_actors: Vec<&str> = Vec::new(); + let mut failure_or_unhealthy_actors: Vec<&str> = Vec::new(); + let mut success_actors: Vec<&str> = Vec::new(); + + for supervisable in self.supervisables() { + match supervisable.check_health(true) { + Health::Healthy => { + healthy_actors.push(supervisable.name()); + } + Health::FailureOrUnhealthy => { + failure_or_unhealthy_actors.push(supervisable.name()); + } + Health::Success => { + success_actors.push(supervisable.name()); + } + } + } + + if !failure_or_unhealthy_actors.is_empty() { + error!( + task_id=%self.task_id, + healthy_actors=?healthy_actors, + failed_or_unhealthy_actors=?failure_or_unhealthy_actors, + success_actors=?success_actors, + "compaction pipeline actor failure detected" + ); + return Health::FailureOrUnhealthy; + } + if healthy_actors.is_empty() { + debug!(task_id=%self.task_id, "all compaction pipeline actors completed"); + return Health::Success; + } + Health::Healthy + } + + pub async fn terminate(&mut self) { + self.kill_switch.kill(); + if let Some(handles) = self.handles.take() { + tokio::join!( + handles.merge_split_downloader.kill(), + handles.merge_executor.kill(), + handles.merge_packager.kill(), + handles.merge_uploader.kill(), + handles.merge_publisher.kill(), + ); + } + } + + /// Terminates the current actor chain, increments retry count, and + /// re-spawns. Downloaded splits remain on disk in the scratch directory. + pub async fn restart(&mut self) { + self.terminate().await; + self.retry_count += 1; + self.spawn_pipeline(); + } + + /// Spawns the actor chain. Currently a no-op stub — actor chain + /// construction will be implemented in a later PR. + fn spawn_pipeline(&mut self) { + // TODO: construct MergeSplitDownloader → MergeExecutor → Packager → + // Uploader → Publisher actor chain and set self.handles. + } +} + +#[cfg(test)] +mod tests { + use quickwit_actors::Health; + use quickwit_common::temp_dir::TempDirectory; + + use super::CompactionPipeline; + + fn test_pipeline() -> CompactionPipeline { + CompactionPipeline::new( + "test-task".to_string(), + vec!["split-1".to_string(), "split-2".to_string()], + TempDirectory::for_test(), + ) + } + + #[test] + fn test_pipeline_no_handles_is_healthy() { + let pipeline = test_pipeline(); + assert!(pipeline.handles.is_none()); + assert_eq!(pipeline.check_actor_health(), Health::Healthy); + } + + #[tokio::test] + async fn test_pipeline_terminate_without_handles() { + let mut pipeline = test_pipeline(); + // Should not panic when there are no handles. + pipeline.terminate().await; + assert!(pipeline.handles.is_none()); + } + + #[tokio::test] + async fn test_pipeline_restart_increments_retry_count() { + let mut pipeline = test_pipeline(); + assert_eq!(pipeline.retry_count, 0); + pipeline.restart().await; + assert_eq!(pipeline.retry_count, 1); + } +} diff --git a/quickwit/quickwit-compaction/src/compactor_supervisor.rs b/quickwit/quickwit-compaction/src/compactor_supervisor.rs new file mode 100644 index 00000000000..78e87c79df8 --- /dev/null +++ b/quickwit/quickwit-compaction/src/compactor_supervisor.rs @@ -0,0 +1,224 @@ +// Copyright 2021-Present Datadog, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::time::Duration; + +use async_trait::async_trait; +use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler, Health}; +use quickwit_common::io::Limiter; +use quickwit_common::pubsub::EventBroker; +use quickwit_common::temp_dir::TempDirectory; +use quickwit_indexing::IndexingSplitStore; +use quickwit_proto::metastore::MetastoreServiceClient; +use quickwit_storage::StorageResolver; +use serde::Serialize; +use tracing::{error, info}; + +use crate::compaction_pipeline::CompactionPipeline; + +const SUPERVISE_LOOP_INTERVAL: Duration = Duration::from_secs(1); + +#[derive(Debug)] +struct SuperviseLoop; + +#[derive(Clone, Debug, Default, Serialize)] +pub struct CompactorSupervisorState { + pub num_pipeline_slots: usize, + pub num_occupied_slots: usize, + pub num_completed_tasks: usize, + pub num_failed_tasks: usize, +} + +/// Manages a pool of `CompactionPipeline`s, each executing a single merge task. +/// +/// Periodically checks pipeline health, handles retries on failure, and reaps +/// completed/failed pipelines. +pub struct CompactorSupervisor { + pipelines: Vec>, + + // Shared resources distributed to pipelines when spawning actor chains. + io_throughput_limiter: Option, + split_store: IndexingSplitStore, + metastore: MetastoreServiceClient, + storage_resolver: StorageResolver, + max_concurrent_split_uploads: usize, + event_broker: EventBroker, + + // Scratch directory root (/compaction/). + compaction_root_directory: TempDirectory, + + max_local_retries: usize, + + // dummy counters until we have real state + num_completed_tasks: usize, + num_failed_tasks: usize, +} + +impl CompactorSupervisor { + #[allow(clippy::too_many_arguments)] + pub fn new( + num_pipeline_slots: usize, + io_throughput_limiter: Option, + split_store: IndexingSplitStore, + metastore: MetastoreServiceClient, + storage_resolver: StorageResolver, + max_concurrent_split_uploads: usize, + event_broker: EventBroker, + compaction_root_directory: TempDirectory, + max_local_retries: usize, + ) -> Self { + let pipelines = (0..num_pipeline_slots).map(|_| None).collect(); + CompactorSupervisor { + pipelines, + io_throughput_limiter, + split_store, + metastore, + storage_resolver, + max_concurrent_split_uploads, + event_broker, + compaction_root_directory, + max_local_retries, + num_completed_tasks: 0, + num_failed_tasks: 0, + } + } + + async fn supervise(&mut self) { + for slot in &mut self.pipelines { + let Some(pipeline) = slot else { + continue; + }; + + match pipeline.check_actor_health() { + Health::Healthy => {} + Health::Success => { + info!(task_id=%pipeline.task_id, "compaction task completed"); + self.num_completed_tasks += 1; + *slot = None; + } + Health::FailureOrUnhealthy => { + if pipeline.retry_count < self.max_local_retries { + info!( + task_id=%pipeline.task_id, + retry_count=%pipeline.retry_count, + "retrying compaction pipeline" + ); + pipeline.restart().await; + } else { + error!( + task_id=%pipeline.task_id, + retry_count=%pipeline.retry_count, + "compaction pipeline exhausted retries" + ); + pipeline.terminate().await; + self.num_failed_tasks += 1; + *slot = None; + } + } + } + } + } +} + +#[async_trait] +impl Actor for CompactorSupervisor { + type ObservableState = (); + + fn name(&self) -> String { + "CompactorSupervisor".to_string() + } + + fn observable_state(&self) -> Self::ObservableState {} + + async fn initialize(&mut self, ctx: &ActorContext) -> Result<(), ActorExitStatus> { + info!( + num_pipeline_slots=%self.pipelines.len(), + "compactor supervisor started" + ); + ctx.schedule_self_msg(SUPERVISE_LOOP_INTERVAL, SuperviseLoop); + Ok(()) + } +} + +#[async_trait] +impl Handler for CompactorSupervisor { + type Reply = (); + + async fn handle( + &mut self, + _msg: SuperviseLoop, + ctx: &ActorContext, + ) -> Result<(), ActorExitStatus> { + self.supervise().await; + ctx.schedule_self_msg(SUPERVISE_LOOP_INTERVAL, SuperviseLoop); + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use quickwit_actors::Universe; + use quickwit_common::temp_dir::TempDirectory; + use quickwit_proto::metastore::{MetastoreServiceClient, MockMetastoreService}; + use quickwit_storage::{RamStorage, StorageResolver}; + + use super::*; + use crate::compaction_pipeline::CompactionPipeline; + + fn test_supervisor(num_slots: usize) -> CompactorSupervisor { + let storage = Arc::new(RamStorage::default()); + let split_store = IndexingSplitStore::create_without_local_store_for_test(storage); + let metastore = MetastoreServiceClient::from_mock(MockMetastoreService::new()); + CompactorSupervisor::new( + num_slots, + None, + split_store, + metastore, + StorageResolver::for_test(), + 2, + EventBroker::default(), + TempDirectory::for_test(), + 2, + ) + } + + #[tokio::test] + async fn test_supervisor_starts_with_empty_slots() { + let universe = Universe::with_accelerated_time(); + let supervisor = test_supervisor(4); + let (_mailbox, handle) = universe.spawn_builder().spawn(supervisor); + let obs = handle.process_pending_and_observe().await; + assert_eq!(obs.obs_type, quickwit_actors::ObservationType::Alive); + universe.assert_quit().await; + } + + #[tokio::test] + async fn test_supervisor_supervise_reaps_no_handle_pipelines() { + // A pipeline with no handles returns Healthy, so it stays in its slot. + let mut supervisor = test_supervisor(2); + let pipeline = CompactionPipeline::new( + "task-1".to_string(), + vec!["split-1".to_string()], + TempDirectory::for_test(), + ); + supervisor.pipelines[0] = Some(pipeline); + supervisor.supervise().await; + // Pipeline has no handles → Healthy → not reaped. + assert!(supervisor.pipelines[0].is_some()); + assert_eq!(supervisor.num_completed_tasks, 0); + assert_eq!(supervisor.num_failed_tasks, 0); + } +} diff --git a/quickwit/quickwit-compaction/src/lib.rs b/quickwit/quickwit-compaction/src/lib.rs index d59933e227e..b867eed1b86 100644 --- a/quickwit/quickwit-compaction/src/lib.rs +++ b/quickwit/quickwit-compaction/src/lib.rs @@ -14,4 +14,10 @@ #![deny(clippy::disallowed_methods)] +#[allow(dead_code)] +mod compaction_pipeline; +#[allow(dead_code)] +mod compactor_supervisor; pub mod planner; + +pub use compactor_supervisor::CompactorSupervisor; From 40b307c9e3c94ff5a2bf12e030d3c7e301ab561c Mon Sep 17 00:00:00 2001 From: nadav-govari Date: Mon, 13 Apr 2026 13:43:51 -0400 Subject: [PATCH 04/21] Spawn merge pipeline from new compactor (#6284) * Spawn merge pipeline from new compactor * new line because adrien is annoyed about it * comment --- quickwit/Cargo.lock | 2 + quickwit/quickwit-compaction/Cargo.toml | 11 +- .../src/compaction_pipeline.rs | 206 ++++++++++++++++-- .../src/compactor_supervisor.rs | 27 +-- quickwit/quickwit-indexing/failpoints/mod.rs | 5 +- .../src/actors/merge_executor.rs | 50 +++-- .../src/actors/merge_split_downloader.rs | 40 +++- .../src/models/merge_scratch.rs | 11 +- 8 files changed, 279 insertions(+), 73 deletions(-) diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index f97d39a633b..39cef7929cb 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -7152,6 +7152,8 @@ dependencies = [ "async-trait", "quickwit-actors", "quickwit-common", + "quickwit-config", + "quickwit-doc-mapper", "quickwit-indexing", "quickwit-metastore", "quickwit-proto", diff --git a/quickwit/quickwit-compaction/Cargo.toml b/quickwit/quickwit-compaction/Cargo.toml index 9efa0e2ce94..878ca2384ba 100644 --- a/quickwit/quickwit-compaction/Cargo.toml +++ b/quickwit/quickwit-compaction/Cargo.toml @@ -11,12 +11,21 @@ authors.workspace = true license.workspace = true [dependencies] +anyhow = { workspace = true } async-trait = { workspace = true } quickwit-actors = { workspace = true } quickwit-common = { workspace = true } +quickwit-config = { workspace = true } +quickwit-doc-mapper = { workspace = true } quickwit-indexing = { workspace = true } quickwit-proto = { workspace = true } quickwit-storage = { workspace = true } serde = { workspace = true } tracing = { workspace = true } -tokio = { workspace = true } \ No newline at end of file +tokio = { workspace = true } + +[dev-dependencies] +quickwit-doc-mapper = { workspace = true, features = ["testsuite"] } +quickwit-metastore = { workspace = true, features = ["testsuite"] } +quickwit-proto = { workspace = true, features = ["testsuite"] } +quickwit-storage = { workspace = true, features = ["testsuite"] } diff --git a/quickwit/quickwit-compaction/src/compaction_pipeline.rs b/quickwit/quickwit-compaction/src/compaction_pipeline.rs index 926a5a5eb93..309a5865a34 100644 --- a/quickwit/quickwit-compaction/src/compaction_pipeline.rs +++ b/quickwit/quickwit-compaction/src/compaction_pipeline.rs @@ -12,13 +12,25 @@ // See the License for the specific language governing permissions and // limitations under the License. -use quickwit_actors::{ActorHandle, Health, Supervisable}; +use std::sync::Arc; + +use quickwit_actors::{ActorContext, ActorHandle, Health, Supervisable}; use quickwit_common::KillSwitch; +use quickwit_common::io::{IoControls, Limiter}; +use quickwit_common::pubsub::EventBroker; use quickwit_common::temp_dir::TempDirectory; +use quickwit_config::RetentionPolicy; +use quickwit_doc_mapper::DocMapper; use quickwit_indexing::actors::{ - MergeExecutor, MergeSplitDownloader, Packager, Publisher, Uploader, + MergeExecutor, MergeSplitDownloader, Packager, Publisher, Uploader, UploaderType, }; -use tracing::{debug, error}; +use quickwit_indexing::merge_policy::MergeOperation; +use quickwit_indexing::{IndexingSplitStore, PublisherType, SplitsUpdateMailbox}; +use quickwit_proto::indexing::MergePipelineId; +use quickwit_proto::metastore::MetastoreServiceClient; +use tracing::{debug, error, info}; + +use crate::CompactorSupervisor; pub struct CompactionPipelineHandles { pub merge_split_downloader: ActorHandle, @@ -35,22 +47,58 @@ pub struct CompactionPipelineHandles { /// `check_actor_health()` and acts on the result (retry, reap, etc.). pub struct CompactionPipeline { pub task_id: String, - pub split_ids: Vec, pub retry_count: usize, pub kill_switch: KillSwitch, pub scratch_directory: TempDirectory, pub handles: Option, + + // Per-task parameters. + pub merge_operation: MergeOperation, + pub pipeline_id: MergePipelineId, + pub doc_mapper: Arc, + pub merge_policy: Arc, + pub retention_policy: Option, + + // Shared resources (cloned from CompactorSupervisor). + pub metastore: MetastoreServiceClient, + pub split_store: IndexingSplitStore, + pub io_throughput_limiter: Option, + pub max_concurrent_split_uploads: usize, + pub event_broker: EventBroker, } impl CompactionPipeline { - pub fn new(task_id: String, split_ids: Vec, scratch_directory: TempDirectory) -> Self { + #[allow(clippy::too_many_arguments)] + pub fn new( + task_id: String, + scratch_directory: TempDirectory, + merge_operation: MergeOperation, + pipeline_id: MergePipelineId, + doc_mapper: Arc, + merge_policy: Arc, + retention_policy: Option, + metastore: MetastoreServiceClient, + split_store: IndexingSplitStore, + io_throughput_limiter: Option, + max_concurrent_split_uploads: usize, + event_broker: EventBroker, + ) -> Self { CompactionPipeline { task_id, - split_ids, retry_count: 0, kill_switch: KillSwitch::default(), scratch_directory, handles: None, + merge_operation, + pipeline_id, + doc_mapper, + merge_policy, + retention_policy, + metastore, + split_store, + io_throughput_limiter, + max_concurrent_split_uploads, + event_broker, } } @@ -132,32 +180,153 @@ impl CompactionPipeline { /// Terminates the current actor chain, increments retry count, and /// re-spawns. Downloaded splits remain on disk in the scratch directory. - pub async fn restart(&mut self) { + pub async fn restart(&mut self, ctx: &ActorContext) { self.terminate().await; self.retry_count += 1; - self.spawn_pipeline(); + if let Err(err) = self.spawn_pipeline(ctx) { + error!(task_id=%self.task_id, error=?err, "failed to respawn compaction pipeline"); + } } - /// Spawns the actor chain. Currently a no-op stub — actor chain - /// construction will be implemented in a later PR. - fn spawn_pipeline(&mut self) { - // TODO: construct MergeSplitDownloader → MergeExecutor → Packager → - // Uploader → Publisher actor chain and set self.handles. + /// Spawns the 5-actor merge execution chain and sends the `MergeOperation` + /// to the downloader to kick off execution. + fn spawn_pipeline(&mut self, ctx: &ActorContext) -> anyhow::Result<()> { + self.kill_switch = ctx.kill_switch().child(); + + info!( + task_id=%self.task_id, + pipeline_id=%self.pipeline_id, + "spawning compaction pipeline" + ); + + // Publisher (no merge planner feedback, no source) + let merge_publisher = Publisher::new( + PublisherType::MergePublisher, + self.metastore.clone(), + None, + None, + ); + let (merge_publisher_mailbox, merge_publisher_handle) = ctx + .spawn_actor() + .set_kill_switch(self.kill_switch.clone()) + .spawn(merge_publisher); + + // Uploader + let merge_uploader = Uploader::new( + UploaderType::MergeUploader, + self.metastore.clone(), + self.merge_policy.clone(), + self.retention_policy.clone(), + self.split_store.clone(), + SplitsUpdateMailbox::from(merge_publisher_mailbox), + self.max_concurrent_split_uploads, + self.event_broker.clone(), + ); + let (merge_uploader_mailbox, merge_uploader_handle) = ctx + .spawn_actor() + .set_kill_switch(self.kill_switch.clone()) + .spawn(merge_uploader); + + // Packager + let tag_fields = self.doc_mapper.tag_named_fields()?; + let merge_packager = Packager::new("MergePackager", tag_fields, merge_uploader_mailbox); + let (merge_packager_mailbox, merge_packager_handle) = ctx + .spawn_actor() + .set_kill_switch(self.kill_switch.clone()) + .spawn(merge_packager); + + // MergeExecutor + let split_downloader_io_controls = IoControls::default() + .set_throughput_limiter_opt(self.io_throughput_limiter.clone()) + .set_component("split_downloader_merge"); + let merge_executor_io_controls = + split_downloader_io_controls.clone().set_component("merger"); + + let merge_executor = MergeExecutor::new( + self.pipeline_id.clone(), + self.metastore.clone(), + self.doc_mapper.clone(), + merge_executor_io_controls, + merge_packager_mailbox, + ); + let (merge_executor_mailbox, merge_executor_handle) = ctx + .spawn_actor() + .set_kill_switch(self.kill_switch.clone()) + .spawn(merge_executor); + + // MergeSplitDownloader + let merge_split_downloader = MergeSplitDownloader { + scratch_directory: self.scratch_directory.clone(), + split_store: self.split_store.clone(), + executor_mailbox: merge_executor_mailbox, + io_controls: split_downloader_io_controls, + }; + let (merge_split_downloader_mailbox, merge_split_downloader_handle) = ctx + .spawn_actor() + .set_kill_switch(self.kill_switch.clone()) + .spawn(merge_split_downloader); + + // Kick off the pipeline. + merge_split_downloader_mailbox + .try_send_message(self.merge_operation.clone()) + .map_err(|err| { + anyhow::anyhow!("failed to send merge operation to downloader: {err:?}") + })?; + + self.handles = Some(CompactionPipelineHandles { + merge_split_downloader: merge_split_downloader_handle, + merge_executor: merge_executor_handle, + merge_packager: merge_packager_handle, + merge_uploader: merge_uploader_handle, + merge_publisher: merge_publisher_handle, + }); + + Ok(()) } } #[cfg(test)] mod tests { + use std::sync::Arc; + use quickwit_actors::Health; + use quickwit_common::pubsub::EventBroker; use quickwit_common::temp_dir::TempDirectory; + use quickwit_doc_mapper::default_doc_mapper_for_test; + use quickwit_indexing::IndexingSplitStore; + use quickwit_indexing::merge_policy::{MergeOperation, default_merge_policy}; + use quickwit_metastore::SplitMetadata; + use quickwit_proto::indexing::MergePipelineId; + use quickwit_proto::metastore::{MetastoreServiceClient, MockMetastoreService}; + use quickwit_proto::types::{IndexUid, NodeId}; + use quickwit_storage::RamStorage; use super::CompactionPipeline; fn test_pipeline() -> CompactionPipeline { + let storage = Arc::new(RamStorage::default()); + let split_store = IndexingSplitStore::create_without_local_store_for_test(storage); + let metastore = MetastoreServiceClient::from_mock(MockMetastoreService::new()); + let splits = vec![SplitMetadata::for_test("split-1".to_string())]; + let merge_operation = MergeOperation::new_merge_operation(splits); + let pipeline_id = MergePipelineId { + node_id: NodeId::from("test-node"), + index_uid: IndexUid::for_test("test-index", 0), + source_id: "test-source".to_string(), + }; CompactionPipeline::new( "test-task".to_string(), - vec!["split-1".to_string(), "split-2".to_string()], TempDirectory::for_test(), + merge_operation, + pipeline_id, + Arc::new(default_doc_mapper_for_test()), + default_merge_policy(), + None, + metastore, + split_store, + None, + 2, + EventBroker::default(), ) } @@ -171,16 +340,7 @@ mod tests { #[tokio::test] async fn test_pipeline_terminate_without_handles() { let mut pipeline = test_pipeline(); - // Should not panic when there are no handles. pipeline.terminate().await; assert!(pipeline.handles.is_none()); } - - #[tokio::test] - async fn test_pipeline_restart_increments_retry_count() { - let mut pipeline = test_pipeline(); - assert_eq!(pipeline.retry_count, 0); - pipeline.restart().await; - assert_eq!(pipeline.retry_count, 1); - } } diff --git a/quickwit/quickwit-compaction/src/compactor_supervisor.rs b/quickwit/quickwit-compaction/src/compactor_supervisor.rs index 78e87c79df8..4ba270c270b 100644 --- a/quickwit/quickwit-compaction/src/compactor_supervisor.rs +++ b/quickwit/quickwit-compaction/src/compactor_supervisor.rs @@ -94,7 +94,7 @@ impl CompactorSupervisor { } } - async fn supervise(&mut self) { + async fn supervise(&mut self, ctx: &ActorContext) { for slot in &mut self.pipelines { let Some(pipeline) = slot else { continue; @@ -114,7 +114,7 @@ impl CompactorSupervisor { retry_count=%pipeline.retry_count, "retrying compaction pipeline" ); - pipeline.restart().await; + pipeline.restart(ctx).await; } else { error!( task_id=%pipeline.task_id, @@ -160,7 +160,7 @@ impl Handler for CompactorSupervisor { _msg: SuperviseLoop, ctx: &ActorContext, ) -> Result<(), ActorExitStatus> { - self.supervise().await; + self.supervise(ctx).await; ctx.schedule_self_msg(SUPERVISE_LOOP_INTERVAL, SuperviseLoop); Ok(()) } @@ -176,7 +176,6 @@ mod tests { use quickwit_storage::{RamStorage, StorageResolver}; use super::*; - use crate::compaction_pipeline::CompactionPipeline; fn test_supervisor(num_slots: usize) -> CompactorSupervisor { let storage = Arc::new(RamStorage::default()); @@ -208,17 +207,13 @@ mod tests { #[tokio::test] async fn test_supervisor_supervise_reaps_no_handle_pipelines() { // A pipeline with no handles returns Healthy, so it stays in its slot. - let mut supervisor = test_supervisor(2); - let pipeline = CompactionPipeline::new( - "task-1".to_string(), - vec!["split-1".to_string()], - TempDirectory::for_test(), - ); - supervisor.pipelines[0] = Some(pipeline); - supervisor.supervise().await; - // Pipeline has no handles → Healthy → not reaped. - assert!(supervisor.pipelines[0].is_some()); - assert_eq!(supervisor.num_completed_tasks, 0); - assert_eq!(supervisor.num_failed_tasks, 0); + // We spawn the supervisor as an actor so we can get a context for supervise(). + let universe = Universe::with_accelerated_time(); + let supervisor = test_supervisor(2); + let (_mailbox, handle) = universe.spawn_builder().spawn(supervisor); + // Let the supervisor run one supervision loop (it schedules SuperviseLoop on init). + let obs = handle.process_pending_and_observe().await; + assert_eq!(obs.obs_type, quickwit_actors::ObservationType::Alive); + universe.assert_quit().await; } } diff --git a/quickwit/quickwit-indexing/failpoints/mod.rs b/quickwit/quickwit-indexing/failpoints/mod.rs index d8c5ab0e418..b68245ff718 100644 --- a/quickwit/quickwit-indexing/failpoints/mod.rs +++ b/quickwit/quickwit-indexing/failpoints/mod.rs @@ -285,9 +285,10 @@ async fn test_merge_executor_controlled_directory_kill_switch() -> anyhow::Resul tantivy_dirs.push(get_tantivy_directory_from_split_bundle(&dest_filepath).unwrap()); } let merge_operation = MergeOperation::new_merge_operation(split_metadatas); - let merge_task = MergeTask::from_merge_operation_for_test(merge_operation); + let merge_task = MergeTask::from_merge_operation_for_test(merge_operation.clone()); let merge_scratch = MergeScratch { - merge_task, + merge_operation, + merge_task: Some(merge_task), merge_scratch_directory, downloaded_splits_directory, tantivy_dirs, diff --git a/quickwit/quickwit-indexing/src/actors/merge_executor.rs b/quickwit/quickwit-indexing/src/actors/merge_executor.rs index d6ada9dde0f..5141227bee9 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_executor.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_executor.rs @@ -81,22 +81,28 @@ impl Actor for MergeExecutor { impl Handler for MergeExecutor { type Reply = (); - #[instrument(level = "info", name = "merge_executor", parent = merge_scratch.merge_task.merge_parent_span.id(), skip_all)] + #[instrument(level = "info", name = "merge_executor", parent = merge_scratch.merge_operation.merge_parent_span.id(), skip_all)] async fn handle( &mut self, merge_scratch: MergeScratch, ctx: &ActorContext, ) -> Result<(), ActorExitStatus> { let start = Instant::now(); - let merge_task = merge_scratch.merge_task; - let indexed_split_opt: Option = match merge_task.operation_type { + let MergeScratch { + merge_operation, + merge_task, + tantivy_dirs, + merge_scratch_directory, + .. + } = merge_scratch; + let indexed_split_opt: Option = match merge_operation.operation_type { MergeOperationType::Merge => { let merge_res = self .process_merge( - merge_task.merge_split_id.clone(), - merge_task.splits.clone(), - merge_scratch.tantivy_dirs, - merge_scratch.merge_scratch_directory, + merge_operation.merge_split_id.clone(), + merge_operation.splits.clone(), + tantivy_dirs, + merge_scratch_directory, ctx, ) .await; @@ -114,24 +120,24 @@ impl Handler for MergeExecutor { // With a merge policy that marks splits as mature after a day or so, this // limits the noise associated to those failed // merges. - error!(task=?merge_task, err=?err, "failed to merge splits"); + error!(task=?merge_operation, err=?err, "failed to merge splits"); return Ok(()); } } } MergeOperationType::DeleteAndMerge => { assert_eq!( - merge_task.splits.len(), + merge_operation.splits.len(), 1, "Delete tasks can be applied only on one split." ); - assert_eq!(merge_scratch.tantivy_dirs.len(), 1); - let split_with_docs_to_delete = merge_task.splits[0].clone(); + assert_eq!(tantivy_dirs.len(), 1); + let split_with_docs_to_delete = merge_operation.splits[0].clone(); self.process_delete_and_merge( - merge_task.merge_split_id.clone(), + merge_operation.merge_split_id.clone(), split_with_docs_to_delete, - merge_scratch.tantivy_dirs, - merge_scratch.merge_scratch_directory, + tantivy_dirs, + merge_scratch_directory, ctx, ) .await? @@ -141,7 +147,7 @@ impl Handler for MergeExecutor { info!( merged_num_docs = %indexed_split.split_attrs.num_docs, elapsed_secs = %start.elapsed().as_secs_f32(), - operation_type = %merge_task.operation_type, + operation_type = %merge_operation.operation_type, "merge-operation-success" ); ctx.send_message( @@ -151,8 +157,8 @@ impl Handler for MergeExecutor { checkpoint_delta_opt: Default::default(), publish_lock: PublishLock::default(), publish_token_opt: None, - batch_parent_span: merge_task.merge_parent_span.clone(), - merge_task_opt: Some(merge_task), + batch_parent_span: merge_operation.merge_parent_span.clone(), + merge_task_opt: merge_task, }, ) .await?; @@ -642,9 +648,10 @@ mod tests { tantivy_dirs.push(get_tantivy_directory_from_split_bundle(&dest_filepath).unwrap()) } let merge_operation = MergeOperation::new_merge_operation(split_metas); - let merge_task = MergeTask::from_merge_operation_for_test(merge_operation); + let merge_task = MergeTask::from_merge_operation_for_test(merge_operation.clone()); let merge_scratch = MergeScratch { - merge_task, + merge_operation, + merge_task: Some(merge_task), tantivy_dirs, merge_scratch_directory, downloaded_splits_directory, @@ -786,9 +793,10 @@ mod tests { .await?; let tantivy_dir = get_tantivy_directory_from_split_bundle(&dest_filepath).unwrap(); let merge_operation = MergeOperation::new_delete_and_merge_operation(new_split_metadata); - let merge_task = MergeTask::from_merge_operation_for_test(merge_operation); + let merge_task = MergeTask::from_merge_operation_for_test(merge_operation.clone()); let merge_scratch = MergeScratch { - merge_task, + merge_operation, + merge_task: Some(merge_task), tantivy_dirs: vec![tantivy_dir], merge_scratch_directory, downloaded_splits_directory, diff --git a/quickwit/quickwit-indexing/src/actors/merge_split_downloader.rs b/quickwit/quickwit-indexing/src/actors/merge_split_downloader.rs index 5d68bb59285..6b1bcc0ddb9 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_split_downloader.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_split_downloader.rs @@ -23,7 +23,7 @@ use tantivy::Directory; use tracing::{debug, info, instrument}; use super::MergeExecutor; -use crate::merge_policy::MergeTask; +use crate::merge_policy::{MergeOperation, MergeTask}; use crate::models::MergeScratch; use crate::split_store::IndexingSplitStore; @@ -62,6 +62,35 @@ impl Handler for MergeSplitDownloader { merge_task: MergeTask, ctx: &ActorContext, ) -> Result<(), quickwit_actors::ActorExitStatus> { + let merge_operation = merge_task.merge_operation.as_ref().clone(); + self.download_and_send(merge_operation, ctx).await + } +} + +#[async_trait] +impl Handler for MergeSplitDownloader { + type Reply = (); + + #[instrument( + name = "merge_split_downloader", + parent = merge_operation.merge_parent_span.id(), + skip_all, + )] + async fn handle( + &mut self, + merge_operation: MergeOperation, + ctx: &ActorContext, + ) -> Result<(), quickwit_actors::ActorExitStatus> { + self.download_and_send(merge_operation, ctx).await + } +} + +impl MergeSplitDownloader { + async fn download_and_send( + &mut self, + merge_operation: MergeOperation, + ctx: &ActorContext, + ) -> Result<(), ActorExitStatus> { let merge_scratch_directory = temp_dir::Builder::default() .join("merge") .tempdir_in(self.scratch_directory.path()) @@ -73,13 +102,14 @@ impl Handler for MergeSplitDownloader { .map_err(|error| anyhow::anyhow!(error))?; let tantivy_dirs = self .download_splits( - merge_task.splits_as_slice(), + merge_operation.splits_as_slice(), downloaded_splits_directory.path(), ctx, ) .await?; let msg = MergeScratch { - merge_task, + merge_operation, + merge_task: None, merge_scratch_directory, downloaded_splits_directory, tantivy_dirs, @@ -190,8 +220,8 @@ mod tests { .unwrap() .downcast::() .unwrap(); - assert_eq!(merge_scratch.merge_task.splits_as_slice().len(), 10); - for split in merge_scratch.merge_task.splits_as_slice() { + assert_eq!(merge_scratch.merge_operation.splits_as_slice().len(), 10); + for split in merge_scratch.merge_operation.splits_as_slice() { let split_filename = split_file(split.split_id()); let split_filepath = merge_scratch .downloaded_splits_directory diff --git a/quickwit/quickwit-indexing/src/models/merge_scratch.rs b/quickwit/quickwit-indexing/src/models/merge_scratch.rs index 392ca60b42c..a0296f69d2d 100644 --- a/quickwit/quickwit-indexing/src/models/merge_scratch.rs +++ b/quickwit/quickwit-indexing/src/models/merge_scratch.rs @@ -15,14 +15,15 @@ use quickwit_common::temp_dir::TempDirectory; use tantivy::Directory; -use crate::merge_policy::MergeTask; +use crate::merge_policy::{MergeOperation, MergeTask}; #[derive(Debug)] pub struct MergeScratch { - /// A [`MergeTask`] tracked by either the `MergePlanner` or the `DeleteTaskPlanner` - /// See planners docs to understand the usage. - pub merge_task: MergeTask, - /// Scratch directory for computing the merge. + /// The merge operation data (splits, merge_split_id, operation type). + pub merge_operation: MergeOperation, + // TODO: remove once the old MergePipeline is deleted and the + // DeleteTaskPipeline no longer routes through MergeSchedulerService. + pub merge_task: Option, pub merge_scratch_directory: TempDirectory, pub downloaded_splits_directory: TempDirectory, pub tantivy_dirs: Vec>, From ecab99b7bb514b5020910048a56705ce879bc737 Mon Sep 17 00:00:00 2001 From: nadav-govari Date: Mon, 13 Apr 2026 13:44:50 -0400 Subject: [PATCH 05/21] Wire up compactor service (#6291) * Wire up compactor service * lints --- quickwit/quickwit-compaction/src/lib.rs | 35 ++++++ quickwit/quickwit-config/src/lib.rs | 6 +- .../quickwit-config/src/node_config/mod.rs | 82 ++++++++++++++ .../src/node_config/serialize.rs | 95 +++++++++++++++- quickwit/quickwit-config/src/qw_env_vars.rs | 3 +- .../src/test_utils/cluster_sandbox.rs | 5 + .../src/tests/basic_tests.rs | 106 +++++------------- quickwit/quickwit-serve/src/lib.rs | 49 +++++--- quickwit/quickwit-serve/src/rest.rs | 1 + 9 files changed, 281 insertions(+), 101 deletions(-) diff --git a/quickwit/quickwit-compaction/src/lib.rs b/quickwit/quickwit-compaction/src/lib.rs index b867eed1b86..fd2c5b05885 100644 --- a/quickwit/quickwit-compaction/src/lib.rs +++ b/quickwit/quickwit-compaction/src/lib.rs @@ -21,3 +21,38 @@ mod compactor_supervisor; pub mod planner; pub use compactor_supervisor::CompactorSupervisor; +use quickwit_actors::{Mailbox, Universe}; +use quickwit_common::io; +use quickwit_common::pubsub::EventBroker; +use quickwit_common::temp_dir::TempDirectory; +use quickwit_config::CompactorConfig; +use quickwit_indexing::IndexingSplitStore; +use quickwit_proto::metastore::MetastoreServiceClient; +use quickwit_storage::StorageResolver; +use tracing::info; + +pub async fn start_compactor_service( + universe: &Universe, + compactor_config: &CompactorConfig, + split_store: IndexingSplitStore, + metastore: MetastoreServiceClient, + storage_resolver: StorageResolver, + event_broker: EventBroker, + compaction_root_directory: TempDirectory, +) -> anyhow::Result> { + info!("starting compactor service"); + let io_throughput_limiter = compactor_config.max_merge_write_throughput.map(io::limiter); + let supervisor = CompactorSupervisor::new( + compactor_config.max_concurrent_pipelines.get(), + io_throughput_limiter, + split_store, + metastore, + storage_resolver, + compactor_config.max_concurrent_split_uploads, + event_broker, + compaction_root_directory, + compactor_config.max_local_retries, + ); + let (mailbox, _handle) = universe.spawn_builder().spawn(supervisor); + Ok(mailbox) +} diff --git a/quickwit/quickwit-config/src/lib.rs b/quickwit/quickwit-config/src/lib.rs index 22cdb2538b4..79b285eee39 100644 --- a/quickwit/quickwit-config/src/lib.rs +++ b/quickwit/quickwit-config/src/lib.rs @@ -73,9 +73,9 @@ pub use crate::metastore_config::{ MetastoreBackend, MetastoreConfig, MetastoreConfigs, PostgresMetastoreConfig, }; pub use crate::node_config::{ - CacheConfig, CachePolicy, DEFAULT_QW_CONFIG_PATH, GrpcConfig, IndexerConfig, IngestApiConfig, - JaegerConfig, KeepAliveConfig, LambdaConfig, LambdaDeployConfig, NodeConfig, RestConfig, - SearcherConfig, SplitCacheLimits, StorageTimeoutPolicy, TlsConfig, + CacheConfig, CachePolicy, CompactorConfig, DEFAULT_QW_CONFIG_PATH, GrpcConfig, IndexerConfig, + IngestApiConfig, JaegerConfig, KeepAliveConfig, LambdaConfig, LambdaDeployConfig, NodeConfig, + RestConfig, SearcherConfig, SplitCacheLimits, StorageTimeoutPolicy, TlsConfig, }; use crate::source_config::serialize::{SourceConfigV0_7, SourceConfigV0_8, VersionedSourceConfig}; pub use crate::storage_config::{ diff --git a/quickwit/quickwit-config/src/node_config/mod.rs b/quickwit/quickwit-config/src/node_config/mod.rs index cf67768966d..47c3b6a77cb 100644 --- a/quickwit/quickwit-config/src/node_config/mod.rs +++ b/quickwit/quickwit-config/src/node_config/mod.rs @@ -163,6 +163,11 @@ pub struct IndexerConfig { pub enable_cooperative_indexing: bool, #[serde(default = "IndexerConfig::default_cpu_capacity")] pub cpu_capacity: CpuCapacity, + /// When true, the compactor service is not implicitly started on indexer + /// nodes. Dedicated compactor nodes must be deployed separately. + /// When false (default), every indexer node also runs the compactor. + #[serde(default = "IndexerConfig::default_enable_standalone_compactors")] + pub enable_standalone_compactors: bool, } impl IndexerConfig { @@ -170,6 +175,10 @@ impl IndexerConfig { false } + fn default_enable_standalone_compactors() -> bool { + false + } + fn default_enable_otlp_endpoint() -> bool { #[cfg(any(test, feature = "testsuite"))] { @@ -213,6 +222,7 @@ impl IndexerConfig { cpu_capacity: PIPELINE_FULL_CAPACITY * 4u32, max_merge_write_throughput: None, merge_concurrency: NonZeroUsize::new(3).unwrap(), + enable_standalone_compactors: false, }; Ok(indexer_config) } @@ -229,6 +239,77 @@ impl Default for IndexerConfig { cpu_capacity: Self::default_cpu_capacity(), merge_concurrency: Self::default_merge_concurrency(), max_merge_write_throughput: None, + enable_standalone_compactors: Self::default_enable_standalone_compactors(), + } + } +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct CompactorConfig { + /// Maximum number of concurrent merge pipelines. Defaults to 2/3 of CPU count. + #[serde(default = "CompactorConfig::default_max_concurrent_pipelines")] + pub max_concurrent_pipelines: NonZeroUsize, + /// Maximum number of concurrent split uploads across all pipelines. + #[serde(default = "CompactorConfig::default_max_concurrent_split_uploads")] + pub max_concurrent_split_uploads: usize, + /// Limits the IO throughput of the split downloader and the merge executor. + #[serde(default)] + pub max_merge_write_throughput: Option, + /// Maximum local retries before reporting a merge as failed to the planner. + #[serde(default = "CompactorConfig::default_max_local_retries")] + pub max_local_retries: usize, + /// Maximum size of the local split store cache in bytes. + #[serde(default = "CompactorConfig::default_split_store_max_num_bytes")] + pub split_store_max_num_bytes: ByteSize, + /// Maximum number of splits in the local split store cache. + #[serde(default = "CompactorConfig::default_split_store_max_num_splits")] + pub split_store_max_num_splits: usize, +} + +impl CompactorConfig { + fn default_max_concurrent_pipelines() -> NonZeroUsize { + NonZeroUsize::new(quickwit_common::num_cpus() * 2 / 3).unwrap_or(NonZeroUsize::MIN) + } + + fn default_max_concurrent_split_uploads() -> usize { + 12 + } + + fn default_max_local_retries() -> usize { + 2 + } + + pub fn default_split_store_max_num_bytes() -> ByteSize { + ByteSize::gib(100) + } + + pub fn default_split_store_max_num_splits() -> usize { + 1_000 + } + + #[cfg(any(test, feature = "testsuite"))] + pub fn for_test() -> Self { + CompactorConfig { + max_concurrent_pipelines: NonZeroUsize::new(2).unwrap(), + max_concurrent_split_uploads: 4, + max_merge_write_throughput: None, + max_local_retries: 2, + split_store_max_num_bytes: ByteSize::mb(1), + split_store_max_num_splits: 3, + } + } +} + +impl Default for CompactorConfig { + fn default() -> Self { + Self { + max_concurrent_pipelines: Self::default_max_concurrent_pipelines(), + max_concurrent_split_uploads: Self::default_max_concurrent_split_uploads(), + max_merge_write_throughput: None, + max_local_retries: Self::default_max_local_retries(), + split_store_max_num_bytes: Self::default_split_store_max_num_bytes(), + split_store_max_num_splits: Self::default_split_store_max_num_splits(), } } } @@ -778,6 +859,7 @@ pub struct NodeConfig { pub searcher_config: SearcherConfig, pub ingest_api_config: IngestApiConfig, pub jaeger_config: JaegerConfig, + pub compactor_config: CompactorConfig, } impl NodeConfig { diff --git a/quickwit/quickwit-config/src/node_config/serialize.rs b/quickwit/quickwit-config/src/node_config/serialize.rs index ae19a92c45f..0bf765df123 100644 --- a/quickwit/quickwit-config/src/node_config/serialize.rs +++ b/quickwit/quickwit-config/src/node_config/serialize.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::net::{IpAddr, SocketAddr}; use std::str::FromStr; use std::time::Duration; @@ -35,8 +35,8 @@ use crate::service::QuickwitService; use crate::storage_config::StorageConfigs; use crate::templating::render_config; use crate::{ - ConfigFormat, IndexerConfig, IngestApiConfig, JaegerConfig, MetastoreConfigs, NodeConfig, - SearcherConfig, TlsConfig, validate_identifier, validate_node_id, + CompactorConfig, ConfigFormat, IndexerConfig, IngestApiConfig, JaegerConfig, MetastoreConfigs, + NodeConfig, SearcherConfig, TlsConfig, validate_identifier, validate_node_id, }; pub const DEFAULT_CLUSTER_ID: &str = "quickwit-default-cluster"; @@ -192,6 +192,8 @@ struct NodeConfigBuilder { data_dir_uri: ConfigValue, metastore_uri: ConfigValue, default_index_root_uri: ConfigValue, + #[serde(default)] + enable_standalone_compactors: ConfigValue, #[serde(rename = "rest")] #[serde(default)] rest_config_builder: RestConfigBuilder, @@ -216,6 +218,9 @@ struct NodeConfigBuilder { #[serde(rename = "jaeger")] #[serde(default)] jaeger_config: JaegerConfig, + #[serde(rename = "compactor")] + #[serde(default)] + compactor_config: CompactorConfig, } impl NodeConfigBuilder { @@ -226,7 +231,10 @@ impl NodeConfigBuilder { let node_id = self.node_id.resolve(env_vars).map(NodeId::new)?; let availability_zone = self.availability_zone.resolve_optional(env_vars)?; - let enabled_services = self + self.indexer_config.enable_standalone_compactors = + self.enable_standalone_compactors.resolve(env_vars)?; + + let mut enabled_services: HashSet = self .enabled_services .resolve(env_vars)? .0 @@ -234,6 +242,14 @@ impl NodeConfigBuilder { .map(|service| service.parse()) .collect::>()?; + // Indexers implicitly run the compactor unless standalone compactors + // are enabled. + if enabled_services.contains(&QuickwitService::Indexer) + && !self.indexer_config.enable_standalone_compactors + { + enabled_services.insert(QuickwitService::Compactor); + } + let listen_address = self.listen_address.resolve(env_vars)?; let listen_host = listen_address.parse::()?; let listen_ip = listen_host.resolve().await?; @@ -331,6 +347,7 @@ impl NodeConfigBuilder { searcher_config: self.searcher_config, ingest_api_config: self.ingest_api_config, jaeger_config: self.jaeger_config, + compactor_config: self.compactor_config, }; validate(&node_config)?; @@ -421,6 +438,7 @@ impl Default for NodeConfigBuilder { data_dir_uri: default_data_dir_uri(), metastore_uri: ConfigValue::none(), default_index_root_uri: ConfigValue::none(), + enable_standalone_compactors: Default::default(), rest_config_builder: RestConfigBuilder::default(), grpc_config: GrpcConfig::default(), storage_configs: StorageConfigs::default(), @@ -429,6 +447,7 @@ impl Default for NodeConfigBuilder { searcher_config: SearcherConfig::default(), ingest_api_config: IngestApiConfig::default(), jaeger_config: JaegerConfig::default(), + compactor_config: CompactorConfig::default(), } } } @@ -528,6 +547,7 @@ pub fn node_config_for_tests_from_ports( searcher_config: SearcherConfig::default(), ingest_api_config: IngestApiConfig::default(), jaeger_config: JaegerConfig::default(), + compactor_config: CompactorConfig::default(), } } @@ -657,6 +677,7 @@ mod tests { cpu_capacity: IndexerConfig::default_cpu_capacity(), enable_cooperative_indexing: false, max_merge_write_throughput: Some(ByteSize::mb(100)), + enable_standalone_compactors: false, } ); assert_eq!( @@ -821,6 +842,10 @@ mod tests { "test-peer-seed-0,test-peer-seed-1".to_string(), ); env_vars.insert("QW_DATA_DIR".to_string(), "test-data-dir".to_string()); + env_vars.insert( + "QW_ENABLE_STANDALONE_COMPACTORS".to_string(), + "true".to_string(), + ); env_vars.insert( "QW_METASTORE_URI".to_string(), "postgresql://test-user:test-password@test-host:4321/test-db".to_string(), @@ -1354,4 +1379,66 @@ mod tests { .to_string(); assert!(error_message.contains("replication factor")); } + + #[tokio::test] + async fn test_indexer_implicitly_enables_compactor() { + let config_yaml = r#" + version: 0.8 + enabled_services: [indexer] + "#; + let config = + load_node_config_with_env(ConfigFormat::Yaml, config_yaml.as_bytes(), &HashMap::new()) + .await + .unwrap(); + assert!(config.enabled_services.contains(&QuickwitService::Indexer)); + assert!( + config + .enabled_services + .contains(&QuickwitService::Compactor) + ); + } + + #[tokio::test] + async fn test_standalone_compactors_prevents_implicit_compactor() { + let config_yaml = r#" + version: 0.8 + enabled_services: [indexer] + "#; + let env_vars = HashMap::from([( + "QW_ENABLE_STANDALONE_COMPACTORS".to_string(), + "true".to_string(), + )]); + let config = + load_node_config_with_env(ConfigFormat::Yaml, config_yaml.as_bytes(), &env_vars) + .await + .unwrap(); + assert!(config.enabled_services.contains(&QuickwitService::Indexer)); + assert!( + !config + .enabled_services + .contains(&QuickwitService::Compactor) + ); + } + + #[tokio::test] + async fn test_standalone_compactors_env_var_override() { + let env_vars = HashMap::from([( + "QW_ENABLE_STANDALONE_COMPACTORS".to_string(), + "true".to_string(), + )]); + let config_yaml = r#" + version: 0.8 + enabled_services: [indexer] + "#; + let config = + load_node_config_with_env(ConfigFormat::Yaml, config_yaml.as_bytes(), &env_vars) + .await + .unwrap(); + assert!(config.enabled_services.contains(&QuickwitService::Indexer)); + assert!( + !config + .enabled_services + .contains(&QuickwitService::Compactor) + ); + } } diff --git a/quickwit/quickwit-config/src/qw_env_vars.rs b/quickwit/quickwit-config/src/qw_env_vars.rs index 7f36fdc2911..ec6d61e9498 100644 --- a/quickwit/quickwit-config/src/qw_env_vars.rs +++ b/quickwit/quickwit-config/src/qw_env_vars.rs @@ -56,7 +56,8 @@ qw_env_vars!( QW_PEER_SEEDS, QW_DATA_DIR, QW_METASTORE_URI, - QW_DEFAULT_INDEX_ROOT_URI + QW_DEFAULT_INDEX_ROOT_URI, + QW_ENABLE_STANDALONE_COMPACTORS ); #[cfg(test)] diff --git a/quickwit/quickwit-integration-tests/src/test_utils/cluster_sandbox.rs b/quickwit/quickwit-integration-tests/src/test_utils/cluster_sandbox.rs index a7385ca0946..7b425061134 100644 --- a/quickwit/quickwit-integration-tests/src/test_utils/cluster_sandbox.rs +++ b/quickwit/quickwit-integration-tests/src/test_utils/cluster_sandbox.rs @@ -76,6 +76,11 @@ impl TestNodeConfig { tcp_listener_resolver.add_listener(grpc_tcp_listener).await; config.indexer_config.enable_otlp_endpoint = self.enable_otlp; config.enabled_services.clone_from(&self.services); + if config.enabled_services.contains(&QuickwitService::Indexer) + && !config.indexer_config.enable_standalone_compactors + { + config.enabled_services.insert(QuickwitService::Compactor); + } config.jaeger_config.enable_endpoint = true; config.cluster_id.clone_from(&cluster_id); config.node_id = NodeId::new(format!("test-node-{node_idx}")); diff --git a/quickwit/quickwit-integration-tests/src/tests/basic_tests.rs b/quickwit/quickwit-integration-tests/src/tests/basic_tests.rs index 2715e310a97..96bf311cc01 100644 --- a/quickwit/quickwit-integration-tests/src/tests/basic_tests.rs +++ b/quickwit/quickwit-integration-tests/src/tests/basic_tests.rs @@ -164,97 +164,45 @@ async fn test_multi_nodes_cluster() { } #[tokio::test] -async fn test_no_merge_pipelines_when_compaction_service_enabled() { +async fn test_indexer_implicitly_runs_compactor() { quickwit_common::setup_logging_for_tests(); - unsafe { std::env::set_var("QW_ENABLE_COMPACTION_SERVICE", "true") }; - let sandbox = ClusterSandboxBuilder::default() .add_node([QuickwitService::Searcher]) .add_node([QuickwitService::Metastore]) - .add_node([QuickwitService::Indexer]) .add_node([QuickwitService::ControlPlane]) .add_node([QuickwitService::Janitor]) - .build_and_start() - .await; - - sandbox - .rest_client(QuickwitService::Indexer) - .indexes() - .create( - r#" - version: 0.8 - index_id: test-no-merge-pipelines - doc_mapping: - field_mappings: - - name: body - type: text - indexing_settings: - commit_timeout_secs: 1 - "#, - quickwit_config::ConfigFormat::Yaml, - false, - ) - .await - .unwrap(); - - sandbox.wait_for_indexing_pipelines(1).await.unwrap(); - - let stats = sandbox - .rest_client(QuickwitService::Indexer) - .node_stats() - .indexing() - .await - .unwrap(); - assert_eq!(stats.num_running_merge_pipelines, 0); - - unsafe { std::env::remove_var("QW_ENABLE_COMPACTION_SERVICE") }; - sandbox.shutdown().await.unwrap(); -} - -#[tokio::test] -async fn test_merge_pipelines_present_without_compaction_service() { - quickwit_common::setup_logging_for_tests(); - unsafe { std::env::set_var("QW_ENABLE_COMPACTION_SERVICE", "false") }; - - let sandbox = ClusterSandboxBuilder::default() - .add_node([QuickwitService::Searcher]) - .add_node([QuickwitService::Metastore]) .add_node([QuickwitService::Indexer]) - .add_node([QuickwitService::ControlPlane]) - .add_node([QuickwitService::Janitor]) .build_and_start() .await; - sandbox - .rest_client(QuickwitService::Indexer) - .indexes() - .create( - r#" - version: 0.8 - index_id: test-with-merge-pipelines - doc_mapping: - field_mappings: - - name: body - type: text - indexing_settings: - commit_timeout_secs: 1 - "#, - quickwit_config::ConfigFormat::Yaml, - false, - ) - .await - .unwrap(); - - sandbox.wait_for_indexing_pipelines(1).await.unwrap(); - - let stats = sandbox - .rest_client(QuickwitService::Indexer) - .node_stats() - .indexing() + let cluster_snapshot = sandbox + .rest_client(QuickwitService::Searcher) + .cluster() + .snapshot() .await .unwrap(); - assert!(stats.num_running_merge_pipelines > 0); + assert_eq!(cluster_snapshot.ready_nodes.len(), 5); + + // The indexer node should also advertise the compactor service. + let indexer_node = cluster_snapshot + .chitchat_state_snapshot + .node_states + .iter() + .find(|node_state| { + node_state + .get("enabled_services") + .map(|s| s.contains("indexer")) + .unwrap_or(false) + }) + .expect("indexer node not found in cluster state"); + let services = indexer_node.get("enabled_services").unwrap(); + assert!( + services.contains("compactor"), + "indexer node should implicitly run the compactor, got: {services}" + ); - unsafe { std::env::remove_var("QW_ENABLE_COMPACTION_SERVICE") }; sandbox.shutdown().await.unwrap(); } + +// TODO: add test_standalone_compactor_node once the test config builder +// resolves QW_ENABLE_STANDALONE_COMPACTORS from the environment. diff --git a/quickwit/quickwit-serve/src/lib.rs b/quickwit/quickwit-serve/src/lib.rs index e288161393e..8c974a4c990 100644 --- a/quickwit/quickwit-serve/src/lib.rs +++ b/quickwit/quickwit-serve/src/lib.rs @@ -73,6 +73,7 @@ use quickwit_common::tower::{ use quickwit_common::uri::Uri; use quickwit_common::{get_bool_from_env, spawn_named_task}; use quickwit_compaction::planner::StubCompactionService; +use quickwit_compaction::{CompactorSupervisor, start_compactor_service}; use quickwit_config::service::QuickwitService; use quickwit_config::{ClusterConfig, IngestApiConfig, NodeConfig}; use quickwit_control_plane::control_plane::{ControlPlane, ControlPlaneEventSubscriber}; @@ -80,7 +81,7 @@ use quickwit_control_plane::{IndexerNodeInfo, IndexerPool}; use quickwit_index_management::{IndexService as IndexManager, IndexServiceError}; use quickwit_indexing::actors::{IndexingService, MergeSchedulerService}; use quickwit_indexing::models::ShardPositionsService; -use quickwit_indexing::start_indexing_service; +use quickwit_indexing::{IndexingSplitStore, start_indexing_service}; use quickwit_ingest::{ GetMemoryCapacity, IngestRequest, IngestRouter, IngestServiceClient, Ingester, IngesterPool, IngesterPoolEntry, LocalShardsUpdate, get_idle_shard_timeout, @@ -113,7 +114,7 @@ use quickwit_search::{ SearchJobPlacer, SearchService, SearchServiceClient, SearcherContext, SearcherPool, create_search_client_from_channel, start_searcher_service, }; -use quickwit_storage::{SplitCache, StorageResolver}; +use quickwit_storage::{RamStorage, SplitCache, StorageResolver}; use tcp_listener::TcpListenerResolver; use tokio::sync::oneshot; use tonic::codec::CompressionEncoding; @@ -148,8 +149,6 @@ const COMPACTION_SERVICE_DISCOVERY_TIMEOUT: Duration = if cfg!(any(test, feature const METASTORE_CLIENT_MAX_CONCURRENCY_ENV_KEY: &str = "QW_METASTORE_CLIENT_MAX_CONCURRENCY"; const DEFAULT_METASTORE_CLIENT_MAX_CONCURRENCY: usize = 6; const DISABLE_DELETE_TASK_SERVICE_ENV_KEY: &str = "QW_DISABLE_DELETE_TASK_SERVICE"; -const ENABLE_COMPACTION_SERVICE_ENV_KEY: &str = "QW_ENABLE_COMPACTION_SERVICE"; - pub type EnvFilterReloadFn = Arc anyhow::Result<()> + Send + Sync>; pub fn do_nothing_env_filter_reload_fn() -> EnvFilterReloadFn { @@ -205,6 +204,7 @@ struct QuickwitServices { ingester_opt: Option, pub compaction_service_client_opt: Option, + pub _compactor_supervisor_opt: Option>, pub janitor_service_opt: Option>, pub jaeger_service_opt: Option, pub otlp_logs_service_opt: Option, @@ -280,7 +280,7 @@ async fn get_compaction_service_client_if_needed( node_config: &NodeConfig, cluster: &Cluster, ) -> anyhow::Result, anyhow::Error> { - if !get_bool_from_env(ENABLE_COMPACTION_SERVICE_ENV_KEY, false) { + if !node_config.indexer_config.enable_standalone_compactors { return Ok(None); } // Only janitor nodes (which host the planner) and indexer nodes (which need @@ -785,6 +785,31 @@ pub async fn serve_quickwit( None }; + let compactor_supervisor_opt = if node_config.is_service_enabled(QuickwitService::Compactor) { + let compaction_dir = node_config.data_dir_path.join("compaction"); + fs::create_dir_all(&compaction_dir)?; + let compaction_root_directory = quickwit_common::temp_dir::Builder::default() + .tempdir_in(&compaction_dir) + .context("failed to create compaction temp directory")?; + let split_store = IndexingSplitStore::create_without_local_store_for_test(Arc::new( + RamStorage::default(), + )); + let compactor_mailbox = start_compactor_service( + &universe, + &node_config.compactor_config, + split_store, + metastore_through_control_plane.clone(), + storage_resolver.clone(), + event_broker.clone(), + compaction_root_directory, + ) + .await + .context("failed to start compactor service")?; + Some(compactor_mailbox) + } else { + None + }; + let jaeger_service_opt = if node_config.jaeger_config.enable_endpoint && node_config.is_service_enabled(QuickwitService::Searcher) { @@ -834,6 +859,7 @@ pub async fn serve_quickwit( ingest_service, ingester_opt: ingester_opt.clone(), compaction_service_client_opt, + _compactor_supervisor_opt: compactor_supervisor_opt, janitor_service_opt, jaeger_service_opt, otlp_logs_service_opt, @@ -1891,7 +1917,7 @@ mod tests { .await .unwrap(); - // Without the env var, no compaction service. + // Without standalone compactors, no compaction service. let mut node_config = NodeConfig::for_test(); node_config.enabled_services = HashSet::from([QuickwitService::Janitor, QuickwitService::Indexer]); @@ -1900,14 +1926,12 @@ mod tests { .unwrap(); assert!(result.is_none()); - // With the env var, compaction service client is returned. - unsafe { std::env::set_var(ENABLE_COMPACTION_SERVICE_ENV_KEY, "true") }; + // With standalone compactors enabled, compaction service client is returned. + node_config.indexer_config.enable_standalone_compactors = true; let result = get_compaction_service_client_if_needed(&node_config, &cluster) .await .unwrap(); assert!(result.is_some()); - - unsafe { std::env::remove_var(ENABLE_COMPACTION_SERVICE_ENV_KEY) }; } #[tokio::test] @@ -1917,13 +1941,10 @@ mod tests { .await .unwrap(); - unsafe { std::env::set_var(ENABLE_COMPACTION_SERVICE_ENV_KEY, "true") }; - let mut node_config = NodeConfig::for_test(); node_config.enabled_services = HashSet::from([QuickwitService::Indexer]); + node_config.indexer_config.enable_standalone_compactors = true; let result = get_compaction_service_client_if_needed(&node_config, &cluster).await; assert!(result.is_err()); - - unsafe { std::env::remove_var(ENABLE_COMPACTION_SERVICE_ENV_KEY) }; } } diff --git a/quickwit/quickwit-serve/src/rest.rs b/quickwit/quickwit-serve/src/rest.rs index c52d790167d..7a30086ca5f 100644 --- a/quickwit/quickwit-serve/src/rest.rs +++ b/quickwit/quickwit-serve/src/rest.rs @@ -860,6 +860,7 @@ mod tests { node_config: Arc::new(node_config.clone()), search_service: Arc::new(MockSearchService::new()), jaeger_service_opt: None, + _compactor_supervisor_opt: None, env_filter_reload_fn: crate::do_nothing_env_filter_reload_fn(), }; From f35053e1b4c89baed0a1b48897abf10e6d9be250 Mon Sep 17 00:00:00 2001 From: nadav-govari Date: Mon, 13 Apr 2026 14:56:55 -0400 Subject: [PATCH 06/21] Implement compactor pipeline update logic (#6297) * Implement compactor pipeline update logic * lints and fixes * comments, lints, test fixes, other things --- quickwit/Cargo.lock | 1 - quickwit/quickwit-compaction/Cargo.toml | 2 +- .../src/compaction_pipeline.rs | 268 ++++++---- .../src/compactor_supervisor.rs | 253 ++++++--- quickwit/quickwit-compaction/src/lib.rs | 9 +- .../src/planner/compaction_service.rs | 16 +- .../quickwit-compaction/src/planner/mod.rs | 2 +- .../quickwit-config/src/node_config/mod.rs | 9 - .../src/test_utils/cluster_sandbox.rs | 12 +- quickwit/quickwit-proto/build.rs | 2 + .../protos/quickwit/compaction.proto | 34 +- .../codegen/quickwit/quickwit.compaction.rs | 491 ++++++++++++++---- quickwit/quickwit-proto/src/indexing/mod.rs | 1 + quickwit/quickwit-serve/src/lib.rs | 74 +-- 14 files changed, 834 insertions(+), 340 deletions(-) diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index 39cef7929cb..93206bf5d46 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -7158,7 +7158,6 @@ dependencies = [ "quickwit-metastore", "quickwit-proto", "quickwit-storage", - "serde", "tokio", "tracing", ] diff --git a/quickwit/quickwit-compaction/Cargo.toml b/quickwit/quickwit-compaction/Cargo.toml index 878ca2384ba..88735bb1606 100644 --- a/quickwit/quickwit-compaction/Cargo.toml +++ b/quickwit/quickwit-compaction/Cargo.toml @@ -20,11 +20,11 @@ quickwit-doc-mapper = { workspace = true } quickwit-indexing = { workspace = true } quickwit-proto = { workspace = true } quickwit-storage = { workspace = true } -serde = { workspace = true } tracing = { workspace = true } tokio = { workspace = true } [dev-dependencies] +quickwit-actors = { workspace = true, features = ["testsuite"] } quickwit-doc-mapper = { workspace = true, features = ["testsuite"] } quickwit-metastore = { workspace = true, features = ["testsuite"] } quickwit-proto = { workspace = true, features = ["testsuite"] } diff --git a/quickwit/quickwit-compaction/src/compaction_pipeline.rs b/quickwit/quickwit-compaction/src/compaction_pipeline.rs index 309a5865a34..62bb040e0ba 100644 --- a/quickwit/quickwit-compaction/src/compaction_pipeline.rs +++ b/quickwit/quickwit-compaction/src/compaction_pipeline.rs @@ -14,7 +14,7 @@ use std::sync::Arc; -use quickwit_actors::{ActorContext, ActorHandle, Health, Supervisable}; +use quickwit_actors::{ActorHandle, Health, SpawnContext, Supervisable}; use quickwit_common::KillSwitch; use quickwit_common::io::{IoControls, Limiter}; use quickwit_common::pubsub::EventBroker; @@ -28,43 +28,54 @@ use quickwit_indexing::merge_policy::MergeOperation; use quickwit_indexing::{IndexingSplitStore, PublisherType, SplitsUpdateMailbox}; use quickwit_proto::indexing::MergePipelineId; use quickwit_proto::metastore::MetastoreServiceClient; +use quickwit_proto::types::{IndexUid, SourceId, SplitId}; use tracing::{debug, error, info}; -use crate::CompactorSupervisor; +#[derive(Clone, Debug, PartialEq)] +pub enum PipelineStatus { + InProgress, + Completed, + Failed { error: String }, +} + +pub struct PipelineStatusUpdate { + pub task_id: String, + pub index_uid: IndexUid, + pub source_id: SourceId, + pub split_ids: Vec, + pub merged_split_id: SplitId, + pub status: PipelineStatus, +} -pub struct CompactionPipelineHandles { - pub merge_split_downloader: ActorHandle, - pub merge_executor: ActorHandle, - pub merge_packager: ActorHandle, - pub merge_uploader: ActorHandle, - pub merge_publisher: ActorHandle, +struct CompactionPipelineHandles { + merge_split_downloader: ActorHandle, + merge_executor: ActorHandle, + merge_packager: ActorHandle, + merge_uploader: ActorHandle, + merge_publisher: ActorHandle, } -/// A single-use merge execution pipeline. Processes one merge task and -/// terminates. +/// A single-use compaction pipeline. Processes one merge task and terminates. /// /// Owned by the `CompactorSupervisor`, which periodically calls -/// `check_actor_health()` and acts on the result (retry, reap, etc.). +/// `check_actor_health()` to collect status updates. The pipeline manages +/// its own retry logic internally. pub struct CompactionPipeline { - pub task_id: String, - pub retry_count: usize, - pub kill_switch: KillSwitch, - pub scratch_directory: TempDirectory, - pub handles: Option, - - // Per-task parameters. - pub merge_operation: MergeOperation, - pub pipeline_id: MergePipelineId, - pub doc_mapper: Arc, - pub merge_policy: Arc, - pub retention_policy: Option, - - // Shared resources (cloned from CompactorSupervisor). - pub metastore: MetastoreServiceClient, - pub split_store: IndexingSplitStore, - pub io_throughput_limiter: Option, - pub max_concurrent_split_uploads: usize, - pub event_broker: EventBroker, + task_id: String, + merge_operation: MergeOperation, + pipeline_id: MergePipelineId, + status: PipelineStatus, + kill_switch: KillSwitch, + scratch_directory: TempDirectory, + handles: Option, + doc_mapper: Arc, + merge_policy: Arc, + retention_policy: Option, + metastore: MetastoreServiceClient, + split_store: IndexingSplitStore, + io_throughput_limiter: Option, + max_concurrent_split_uploads: usize, + event_broker: EventBroker, } impl CompactionPipeline { @@ -85,7 +96,7 @@ impl CompactionPipeline { ) -> Self { CompactionPipeline { task_id, - retry_count: 0, + status: PipelineStatus::InProgress, kill_switch: KillSwitch::default(), scratch_directory, handles: None, @@ -115,84 +126,76 @@ impl CompactionPipeline { ] } - /// Checks child actor health. - /// - /// `check_for_progress` controls whether stall detection is performed - /// (actors that are alive but haven't recorded progress since last check). - /// The supervisor controls the cadence of progress checks. + /// Returns pipeline status update by checking the health of individual pipeline actors. /// - /// Returns: - /// - `Success` when all actors have completed (merge published). - /// - `FailureOrUnhealthy` when any actor has died or stalled. - /// - `Healthy` when actors are running and making progress. - pub fn check_actor_health(&self) -> Health { + /// If the pipeline is already completed or failed (terminal status), returns the status + /// without re-checking actors. The pipeline sits in this "completed" state (finished or failed) + /// until the supervisor cleans up the spot in favor of a new pipeline. This is done to + /// ensure that the planner acks the success/failure. + pub fn pipeline_status_update(&mut self) -> PipelineStatusUpdate { + self.update_status(); + self.build_status_update() + } + + fn update_status(&mut self) { + // Pipeline is finished but yet to be cleaned up. + if matches!( + self.status, + PipelineStatus::Completed | PipelineStatus::Failed { .. } + ) { + return; + } + // Pipeline is not initialized yet. if self.handles.is_none() { - return Health::Healthy; + return; } - let mut healthy_actors: Vec<&str> = Vec::new(); - let mut failure_or_unhealthy_actors: Vec<&str> = Vec::new(); - let mut success_actors: Vec<&str> = Vec::new(); + let mut has_healthy = false; + let mut failure_actor_names: Vec = Vec::new(); for supervisable in self.supervisables() { match supervisable.check_health(true) { Health::Healthy => { - healthy_actors.push(supervisable.name()); + has_healthy = true; } Health::FailureOrUnhealthy => { - failure_or_unhealthy_actors.push(supervisable.name()); - } - Health::Success => { - success_actors.push(supervisable.name()); + failure_actor_names.push(supervisable.name().to_string()); } + Health::Success => {} } } - if !failure_or_unhealthy_actors.is_empty() { - error!( - task_id=%self.task_id, - healthy_actors=?healthy_actors, - failed_or_unhealthy_actors=?failure_or_unhealthy_actors, - success_actors=?success_actors, - "compaction pipeline actor failure detected" - ); - return Health::FailureOrUnhealthy; + if !failure_actor_names.is_empty() { + let error_msg = format!("failed actors: {:?}", failure_actor_names); + error!(task_id=%self.task_id, "{error_msg}"); + self.status = PipelineStatus::Failed { error: error_msg }; + return; } - if healthy_actors.is_empty() { + if !has_healthy { debug!(task_id=%self.task_id, "all compaction pipeline actors completed"); - return Health::Success; - } - Health::Healthy - } - - pub async fn terminate(&mut self) { - self.kill_switch.kill(); - if let Some(handles) = self.handles.take() { - tokio::join!( - handles.merge_split_downloader.kill(), - handles.merge_executor.kill(), - handles.merge_packager.kill(), - handles.merge_uploader.kill(), - handles.merge_publisher.kill(), - ); + self.status = PipelineStatus::Completed; } } - /// Terminates the current actor chain, increments retry count, and - /// re-spawns. Downloaded splits remain on disk in the scratch directory. - pub async fn restart(&mut self, ctx: &ActorContext) { - self.terminate().await; - self.retry_count += 1; - if let Err(err) = self.spawn_pipeline(ctx) { - error!(task_id=%self.task_id, error=?err, "failed to respawn compaction pipeline"); + fn build_status_update(&self) -> PipelineStatusUpdate { + PipelineStatusUpdate { + task_id: self.task_id.clone(), + index_uid: self.pipeline_id.index_uid.clone(), + source_id: self.pipeline_id.source_id.clone(), + split_ids: self + .merge_operation + .splits_as_slice() + .iter() + .map(|split| split.split_id().to_string()) + .collect(), + merged_split_id: self.merge_operation.merge_split_id.clone(), + status: self.status.clone(), } } /// Spawns the 5-actor merge execution chain and sends the `MergeOperation` /// to the downloader to kick off execution. - fn spawn_pipeline(&mut self, ctx: &ActorContext) -> anyhow::Result<()> { - self.kill_switch = ctx.kill_switch().child(); - + pub(crate) fn spawn_pipeline(&mut self, spawn_ctx: &SpawnContext) -> anyhow::Result<()> { info!( task_id=%self.task_id, pipeline_id=%self.pipeline_id, @@ -206,9 +209,9 @@ impl CompactionPipeline { None, None, ); - let (merge_publisher_mailbox, merge_publisher_handle) = ctx - .spawn_actor() - .set_kill_switch(self.kill_switch.clone()) + let (merge_publisher_mailbox, merge_publisher_handle) = spawn_ctx + .spawn_builder() + .set_kill_switch(self.kill_switch.child()) .spawn(merge_publisher); // Uploader @@ -222,17 +225,17 @@ impl CompactionPipeline { self.max_concurrent_split_uploads, self.event_broker.clone(), ); - let (merge_uploader_mailbox, merge_uploader_handle) = ctx - .spawn_actor() - .set_kill_switch(self.kill_switch.clone()) + let (merge_uploader_mailbox, merge_uploader_handle) = spawn_ctx + .spawn_builder() + .set_kill_switch(self.kill_switch.child()) .spawn(merge_uploader); // Packager let tag_fields = self.doc_mapper.tag_named_fields()?; let merge_packager = Packager::new("MergePackager", tag_fields, merge_uploader_mailbox); - let (merge_packager_mailbox, merge_packager_handle) = ctx - .spawn_actor() - .set_kill_switch(self.kill_switch.clone()) + let (merge_packager_mailbox, merge_packager_handle) = spawn_ctx + .spawn_builder() + .set_kill_switch(self.kill_switch.child()) .spawn(merge_packager); // MergeExecutor @@ -249,9 +252,9 @@ impl CompactionPipeline { merge_executor_io_controls, merge_packager_mailbox, ); - let (merge_executor_mailbox, merge_executor_handle) = ctx - .spawn_actor() - .set_kill_switch(self.kill_switch.clone()) + let (merge_executor_mailbox, merge_executor_handle) = spawn_ctx + .spawn_builder() + .set_kill_switch(self.kill_switch.child()) .spawn(merge_executor); // MergeSplitDownloader @@ -261,9 +264,9 @@ impl CompactionPipeline { executor_mailbox: merge_executor_mailbox, io_controls: split_downloader_io_controls, }; - let (merge_split_downloader_mailbox, merge_split_downloader_handle) = ctx - .spawn_actor() - .set_kill_switch(self.kill_switch.clone()) + let (merge_split_downloader_mailbox, merge_split_downloader_handle) = spawn_ctx + .spawn_builder() + .set_kill_switch(self.kill_switch.child()) .spawn(merge_split_downloader); // Kick off the pipeline. @@ -286,10 +289,10 @@ impl CompactionPipeline { } #[cfg(test)] -mod tests { +pub(crate) mod tests { use std::sync::Arc; - use quickwit_actors::Health; + use quickwit_actors::Universe; use quickwit_common::pubsub::EventBroker; use quickwit_common::temp_dir::TempDirectory; use quickwit_doc_mapper::default_doc_mapper_for_test; @@ -301,13 +304,16 @@ mod tests { use quickwit_proto::types::{IndexUid, NodeId}; use quickwit_storage::RamStorage; - use super::CompactionPipeline; + use super::{CompactionPipeline, PipelineStatus}; - fn test_pipeline() -> CompactionPipeline { + pub fn test_pipeline(task_id: &str, split_ids: &[&str]) -> CompactionPipeline { let storage = Arc::new(RamStorage::default()); let split_store = IndexingSplitStore::create_without_local_store_for_test(storage); let metastore = MetastoreServiceClient::from_mock(MockMetastoreService::new()); - let splits = vec![SplitMetadata::for_test("split-1".to_string())]; + let splits: Vec = split_ids + .iter() + .map(|id| SplitMetadata::for_test(id.to_string())) + .collect(); let merge_operation = MergeOperation::new_merge_operation(splits); let pipeline_id = MergePipelineId { node_id: NodeId::from("test-node"), @@ -315,7 +321,7 @@ mod tests { source_id: "test-source".to_string(), }; CompactionPipeline::new( - "test-task".to_string(), + task_id.to_string(), TempDirectory::for_test(), merge_operation, pipeline_id, @@ -330,17 +336,51 @@ mod tests { ) } - #[test] - fn test_pipeline_no_handles_is_healthy() { - let pipeline = test_pipeline(); + #[tokio::test] + async fn test_spawn_pipeline_creates_handles() { + let universe = Universe::new(); + let mut pipeline = test_pipeline("task-1", &["split-1", "split-2"]); assert!(pipeline.handles.is_none()); - assert_eq!(pipeline.check_actor_health(), Health::Healthy); + pipeline.spawn_pipeline(universe.spawn_ctx()).unwrap(); + assert!(pipeline.handles.is_some()); + universe.assert_quit().await; } #[tokio::test] - async fn test_pipeline_terminate_without_handles() { - let mut pipeline = test_pipeline(); - pipeline.terminate().await; - assert!(pipeline.handles.is_none()); + async fn test_status_update_unspawned_pipeline() { + let mut pipeline = test_pipeline("task-1", &["split-1"]); + let update = pipeline.pipeline_status_update(); + assert_eq!(update.status, PipelineStatus::InProgress); + assert_eq!(update.task_id, "task-1"); + assert_eq!(update.split_ids, vec!["split-1"]); + assert_eq!(update.source_id, "test-source"); + assert_eq!(update.index_uid, IndexUid::for_test("test-index", 0)); + } + + #[tokio::test] + async fn test_status_update_healthy_pipeline() { + let universe = Universe::new(); + let mut pipeline = test_pipeline("task-1", &["split-1"]); + pipeline.spawn_pipeline(universe.spawn_ctx()).unwrap(); + let update = pipeline.pipeline_status_update(); + assert_eq!(update.status, PipelineStatus::InProgress); + universe.assert_quit().await; + } + + #[tokio::test] + async fn test_killed_pipeline_fails() { + let universe = Universe::new(); + let mut pipeline = test_pipeline("task-1", &["split-1"]); + pipeline.spawn_pipeline(universe.spawn_ctx()).unwrap(); + + pipeline.kill_switch.kill(); + tokio::task::yield_now().await; + let update = pipeline.pipeline_status_update(); + assert!(matches!(update.status, PipelineStatus::Failed { .. })); + + // Calling again still returns Failed (sticky). + let update = pipeline.pipeline_status_update(); + assert!(matches!(update.status, PipelineStatus::Failed { .. })); + universe.assert_quit().await; } } diff --git a/quickwit/quickwit-compaction/src/compactor_supervisor.rs b/quickwit/quickwit-compaction/src/compactor_supervisor.rs index 4ba270c270b..e2809c4e8d3 100644 --- a/quickwit/quickwit-compaction/src/compactor_supervisor.rs +++ b/quickwit/quickwit-compaction/src/compactor_supervisor.rs @@ -15,36 +15,34 @@ use std::time::Duration; use async_trait::async_trait; -use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler, Health}; +use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler}; use quickwit_common::io::Limiter; use quickwit_common::pubsub::EventBroker; use quickwit_common::temp_dir::TempDirectory; use quickwit_indexing::IndexingSplitStore; +use quickwit_proto::compaction::{ + CompactionFailure, CompactionInProgress, CompactionPlannerServiceClient, CompactionSuccess, + ReportStatusRequest, +}; use quickwit_proto::metastore::MetastoreServiceClient; +use quickwit_proto::types::NodeId; use quickwit_storage::StorageResolver; -use serde::Serialize; -use tracing::{error, info}; +use tracing::info; -use crate::compaction_pipeline::CompactionPipeline; +use crate::compaction_pipeline::{CompactionPipeline, PipelineStatus, PipelineStatusUpdate}; -const SUPERVISE_LOOP_INTERVAL: Duration = Duration::from_secs(1); +const CHECK_PIPELINE_STATUSES_INTERVAL: Duration = Duration::from_secs(1); #[derive(Debug)] -struct SuperviseLoop; - -#[derive(Clone, Debug, Default, Serialize)] -pub struct CompactorSupervisorState { - pub num_pipeline_slots: usize, - pub num_occupied_slots: usize, - pub num_completed_tasks: usize, - pub num_failed_tasks: usize, -} +struct CheckPipelineStatuses; /// Manages a pool of `CompactionPipeline`s, each executing a single merge task. /// -/// Periodically checks pipeline health, handles retries on failure, and reaps -/// completed/failed pipelines. +/// Periodically collects pipeline status updates and forwards them to the +/// compaction planner. Pipelines manage their own retry logic internally. pub struct CompactorSupervisor { + node_id: NodeId, + planner_client: CompactionPlannerServiceClient, pipelines: Vec>, // Shared resources distributed to pipelines when spawning actor chains. @@ -57,17 +55,13 @@ pub struct CompactorSupervisor { // Scratch directory root (/compaction/). compaction_root_directory: TempDirectory, - - max_local_retries: usize, - - // dummy counters until we have real state - num_completed_tasks: usize, - num_failed_tasks: usize, } impl CompactorSupervisor { #[allow(clippy::too_many_arguments)] pub fn new( + node_id: NodeId, + planner_client: CompactionPlannerServiceClient, num_pipeline_slots: usize, io_throughput_limiter: Option, split_store: IndexingSplitStore, @@ -76,10 +70,11 @@ impl CompactorSupervisor { max_concurrent_split_uploads: usize, event_broker: EventBroker, compaction_root_directory: TempDirectory, - max_local_retries: usize, ) -> Self { let pipelines = (0..num_pipeline_slots).map(|_| None).collect(); CompactorSupervisor { + node_id, + planner_client, pipelines, io_throughput_limiter, split_store, @@ -88,46 +83,66 @@ impl CompactorSupervisor { max_concurrent_split_uploads, event_broker, compaction_root_directory, - max_local_retries, - num_completed_tasks: 0, - num_failed_tasks: 0, } } - async fn supervise(&mut self, ctx: &ActorContext) { + fn check_pipeline_statuses(&mut self) -> Vec { + let mut statuses = Vec::new(); for slot in &mut self.pipelines { let Some(pipeline) = slot else { continue; }; + statuses.push(pipeline.pipeline_status_update()); + } + statuses + } + + fn build_report_status_request( + &self, + statuses: &[PipelineStatusUpdate], + ) -> ReportStatusRequest { + let in_progress_count = statuses + .iter() + .filter(|s| matches!(s.status, PipelineStatus::InProgress)) + .count(); + let available_slots = (self.pipelines.len() - in_progress_count) as u32; + + let mut in_progress = Vec::new(); + let mut successes = Vec::new(); + let mut failures = Vec::new(); - match pipeline.check_actor_health() { - Health::Healthy => {} - Health::Success => { - info!(task_id=%pipeline.task_id, "compaction task completed"); - self.num_completed_tasks += 1; - *slot = None; + for update in statuses { + match &update.status { + PipelineStatus::InProgress => { + in_progress.push(CompactionInProgress { + task_id: update.task_id.clone(), + index_uid: Some(update.index_uid.clone()), + source_id: update.source_id.clone(), + split_ids: update.split_ids.clone(), + }); } - Health::FailureOrUnhealthy => { - if pipeline.retry_count < self.max_local_retries { - info!( - task_id=%pipeline.task_id, - retry_count=%pipeline.retry_count, - "retrying compaction pipeline" - ); - pipeline.restart(ctx).await; - } else { - error!( - task_id=%pipeline.task_id, - retry_count=%pipeline.retry_count, - "compaction pipeline exhausted retries" - ); - pipeline.terminate().await; - self.num_failed_tasks += 1; - *slot = None; - } + PipelineStatus::Completed => { + successes.push(CompactionSuccess { + task_id: update.task_id.clone(), + merged_split_id: update.merged_split_id.clone(), + }); + } + PipelineStatus::Failed { error } => { + failures.push(CompactionFailure { + task_id: update.task_id.clone(), + error_message: error.clone(), + }); } } } + + ReportStatusRequest { + node_id: self.node_id.to_string(), + available_slots, + in_progress, + successes, + failures, + } } } @@ -146,22 +161,24 @@ impl Actor for CompactorSupervisor { num_pipeline_slots=%self.pipelines.len(), "compactor supervisor started" ); - ctx.schedule_self_msg(SUPERVISE_LOOP_INTERVAL, SuperviseLoop); + ctx.schedule_self_msg(CHECK_PIPELINE_STATUSES_INTERVAL, CheckPipelineStatuses); Ok(()) } } #[async_trait] -impl Handler for CompactorSupervisor { +impl Handler for CompactorSupervisor { type Reply = (); async fn handle( &mut self, - _msg: SuperviseLoop, + _msg: CheckPipelineStatuses, ctx: &ActorContext, ) -> Result<(), ActorExitStatus> { - self.supervise(ctx).await; - ctx.schedule_self_msg(SUPERVISE_LOOP_INTERVAL, SuperviseLoop); + let statuses = self.check_pipeline_statuses(); + let _request = self.build_report_status_request(&statuses); + // TODO: send request to planner via gRPC, clear completed/failed slots on success. + ctx.schedule_self_msg(CHECK_PIPELINE_STATUSES_INTERVAL, CheckPipelineStatuses); Ok(()) } } @@ -172,16 +189,23 @@ mod tests { use quickwit_actors::Universe; use quickwit_common::temp_dir::TempDirectory; + use quickwit_proto::compaction::CompactionPlannerServiceClient; use quickwit_proto::metastore::{MetastoreServiceClient, MockMetastoreService}; + use quickwit_proto::types::NodeId; use quickwit_storage::{RamStorage, StorageResolver}; use super::*; + use crate::compaction_pipeline::tests::test_pipeline; + use crate::planner::StubCompactionPlannerService; fn test_supervisor(num_slots: usize) -> CompactorSupervisor { let storage = Arc::new(RamStorage::default()); let split_store = IndexingSplitStore::create_without_local_store_for_test(storage); let metastore = MetastoreServiceClient::from_mock(MockMetastoreService::new()); + let compaction_client = CompactionPlannerServiceClient::new(StubCompactionPlannerService); CompactorSupervisor::new( + NodeId::from("test-node"), + compaction_client, num_slots, None, split_store, @@ -190,30 +214,115 @@ mod tests { 2, EventBroker::default(), TempDirectory::for_test(), - 2, ) } + #[test] + fn test_check_pipeline_statuses_empty_slots() { + let mut supervisor = test_supervisor(4); + let statuses = supervisor.check_pipeline_statuses(); + assert!(statuses.is_empty()); + } + #[tokio::test] - async fn test_supervisor_starts_with_empty_slots() { - let universe = Universe::with_accelerated_time(); - let supervisor = test_supervisor(4); - let (_mailbox, handle) = universe.spawn_builder().spawn(supervisor); - let obs = handle.process_pending_and_observe().await; - assert_eq!(obs.obs_type, quickwit_actors::ObservationType::Alive); + async fn test_check_pipeline_statuses_with_pipelines() { + let universe = Universe::new(); + let mut supervisor = test_supervisor(4); + + let mut pipeline = test_pipeline("task-1", &["split-a", "split-b"]); + pipeline.spawn_pipeline(universe.spawn_ctx()).unwrap(); + supervisor.pipelines[0] = Some(pipeline); + + let mut pipeline = test_pipeline("task-2", &["split-c"]); + pipeline.spawn_pipeline(universe.spawn_ctx()).unwrap(); + supervisor.pipelines[2] = Some(pipeline); + + let statuses = supervisor.check_pipeline_statuses(); + assert_eq!(statuses.len(), 2); + assert_eq!(statuses[0].task_id, "task-1"); + assert_eq!(statuses[0].split_ids, vec!["split-a", "split-b"]); + assert_eq!(statuses[1].task_id, "task-2"); + assert_eq!(statuses[1].split_ids, vec!["split-c"]); universe.assert_quit().await; } #[tokio::test] - async fn test_supervisor_supervise_reaps_no_handle_pipelines() { - // A pipeline with no handles returns Healthy, so it stays in its slot. - // We spawn the supervisor as an actor so we can get a context for supervise(). - let universe = Universe::with_accelerated_time(); - let supervisor = test_supervisor(2); - let (_mailbox, handle) = universe.spawn_builder().spawn(supervisor); - // Let the supervisor run one supervision loop (it schedules SuperviseLoop on init). - let obs = handle.process_pending_and_observe().await; - assert_eq!(obs.obs_type, quickwit_actors::ObservationType::Alive); + async fn test_end_to_end_statuses_to_proto() { + let universe = Universe::new(); + let mut supervisor = test_supervisor(3); + + let mut pipeline = test_pipeline("task-1", &["s1", "s2"]); + pipeline.spawn_pipeline(universe.spawn_ctx()).unwrap(); + supervisor.pipelines[0] = Some(pipeline); + + let statuses = supervisor.check_pipeline_statuses(); + let request = supervisor.build_report_status_request(&statuses); + + assert_eq!(request.node_id, "test-node"); + // 3 slots, 1 in-progress = 2 available + assert_eq!(request.available_slots, 2); + assert_eq!(request.in_progress.len(), 1); + assert_eq!(request.in_progress[0].task_id, "task-1"); + assert_eq!(request.in_progress[0].split_ids, vec!["s1", "s2"]); + assert!(request.successes.is_empty()); + assert!(request.failures.is_empty()); universe.assert_quit().await; } + + #[test] + fn test_build_report_status_request_empty() { + let supervisor = test_supervisor(4); + let request = supervisor.build_report_status_request(&[]); + assert_eq!(request.node_id, "test-node"); + assert_eq!(request.available_slots, 4); + assert!(request.in_progress.is_empty()); + assert!(request.successes.is_empty()); + assert!(request.failures.is_empty()); + } + + #[test] + fn test_build_report_status_request_mixed_statuses() { + let supervisor = test_supervisor(4); + let statuses = vec![ + PipelineStatusUpdate { + task_id: "task-1".to_string(), + index_uid: quickwit_proto::types::IndexUid::for_test("test-index", 0), + source_id: "src".to_string(), + split_ids: vec!["s1".to_string(), "s2".to_string()], + merged_split_id: "merged-1".to_string(), + status: PipelineStatus::InProgress, + }, + PipelineStatusUpdate { + task_id: "task-2".to_string(), + index_uid: quickwit_proto::types::IndexUid::for_test("test-index", 0), + source_id: "src".to_string(), + split_ids: vec!["s3".to_string()], + merged_split_id: "merged-2".to_string(), + status: PipelineStatus::Completed, + }, + PipelineStatusUpdate { + task_id: "task-3".to_string(), + index_uid: quickwit_proto::types::IndexUid::for_test("test-index", 0), + source_id: "src".to_string(), + split_ids: vec!["s4".to_string()], + merged_split_id: "merged-3".to_string(), + status: PipelineStatus::Failed { + error: "boom".to_string(), + }, + }, + ]; + + let request = supervisor.build_report_status_request(&statuses); + + assert_eq!(request.available_slots, 3); + assert_eq!(request.in_progress.len(), 1); + assert_eq!(request.in_progress[0].task_id, "task-1"); + assert_eq!(request.in_progress[0].split_ids, vec!["s1", "s2"]); + assert_eq!(request.successes.len(), 1); + assert_eq!(request.successes[0].task_id, "task-2"); + assert_eq!(request.successes[0].merged_split_id, "merged-2"); + assert_eq!(request.failures.len(), 1); + assert_eq!(request.failures[0].task_id, "task-3"); + assert_eq!(request.failures[0].error_message, "boom"); + } } diff --git a/quickwit/quickwit-compaction/src/lib.rs b/quickwit/quickwit-compaction/src/lib.rs index fd2c5b05885..05f79d83f73 100644 --- a/quickwit/quickwit-compaction/src/lib.rs +++ b/quickwit/quickwit-compaction/src/lib.rs @@ -27,12 +27,17 @@ use quickwit_common::pubsub::EventBroker; use quickwit_common::temp_dir::TempDirectory; use quickwit_config::CompactorConfig; use quickwit_indexing::IndexingSplitStore; +use quickwit_proto::compaction::CompactionPlannerServiceClient; use quickwit_proto::metastore::MetastoreServiceClient; +use quickwit_proto::types::NodeId; use quickwit_storage::StorageResolver; use tracing::info; +#[allow(clippy::too_many_arguments)] pub async fn start_compactor_service( universe: &Universe, + node_id: NodeId, + compaction_client: CompactionPlannerServiceClient, compactor_config: &CompactorConfig, split_store: IndexingSplitStore, metastore: MetastoreServiceClient, @@ -41,8 +46,11 @@ pub async fn start_compactor_service( compaction_root_directory: TempDirectory, ) -> anyhow::Result> { info!("starting compactor service"); + // TODO: configure this for real let io_throughput_limiter = compactor_config.max_merge_write_throughput.map(io::limiter); let supervisor = CompactorSupervisor::new( + node_id, + compaction_client, compactor_config.max_concurrent_pipelines.get(), io_throughput_limiter, split_store, @@ -51,7 +59,6 @@ pub async fn start_compactor_service( compactor_config.max_concurrent_split_uploads, event_broker, compaction_root_directory, - compactor_config.max_local_retries, ); let (mailbox, _handle) = universe.spawn_builder().spawn(supervisor); Ok(mailbox) diff --git a/quickwit/quickwit-compaction/src/planner/compaction_service.rs b/quickwit/quickwit-compaction/src/planner/compaction_service.rs index 0e3104a2129..340ff4e0d5c 100644 --- a/quickwit/quickwit-compaction/src/planner/compaction_service.rs +++ b/quickwit/quickwit-compaction/src/planner/compaction_service.rs @@ -13,14 +13,24 @@ // limitations under the License. use async_trait::async_trait; -use quickwit_proto::compaction::{CompactionResult, CompactionService, PingRequest, PingResponse}; +use quickwit_proto::compaction::{ + CompactionPlannerService, CompactionResult, PingRequest, PingResponse, ReportStatusRequest, + ReportStatusResponse, +}; #[derive(Debug, Clone)] -pub struct StubCompactionService; +pub struct StubCompactionPlannerService; #[async_trait] -impl CompactionService for StubCompactionService { +impl CompactionPlannerService for StubCompactionPlannerService { async fn ping(&self, _request: PingRequest) -> CompactionResult { Ok(PingResponse {}) } + + async fn report_status( + &self, + _request: ReportStatusRequest, + ) -> CompactionResult { + Ok(ReportStatusResponse {}) + } } diff --git a/quickwit/quickwit-compaction/src/planner/mod.rs b/quickwit/quickwit-compaction/src/planner/mod.rs index 3b531a439f2..5ccd750a20c 100644 --- a/quickwit/quickwit-compaction/src/planner/mod.rs +++ b/quickwit/quickwit-compaction/src/planner/mod.rs @@ -14,4 +14,4 @@ mod compaction_service; -pub use compaction_service::StubCompactionService; +pub use compaction_service::StubCompactionPlannerService; diff --git a/quickwit/quickwit-config/src/node_config/mod.rs b/quickwit/quickwit-config/src/node_config/mod.rs index 47c3b6a77cb..ff981fcf31c 100644 --- a/quickwit/quickwit-config/src/node_config/mod.rs +++ b/quickwit/quickwit-config/src/node_config/mod.rs @@ -256,9 +256,6 @@ pub struct CompactorConfig { /// Limits the IO throughput of the split downloader and the merge executor. #[serde(default)] pub max_merge_write_throughput: Option, - /// Maximum local retries before reporting a merge as failed to the planner. - #[serde(default = "CompactorConfig::default_max_local_retries")] - pub max_local_retries: usize, /// Maximum size of the local split store cache in bytes. #[serde(default = "CompactorConfig::default_split_store_max_num_bytes")] pub split_store_max_num_bytes: ByteSize, @@ -276,10 +273,6 @@ impl CompactorConfig { 12 } - fn default_max_local_retries() -> usize { - 2 - } - pub fn default_split_store_max_num_bytes() -> ByteSize { ByteSize::gib(100) } @@ -294,7 +287,6 @@ impl CompactorConfig { max_concurrent_pipelines: NonZeroUsize::new(2).unwrap(), max_concurrent_split_uploads: 4, max_merge_write_throughput: None, - max_local_retries: 2, split_store_max_num_bytes: ByteSize::mb(1), split_store_max_num_splits: 3, } @@ -307,7 +299,6 @@ impl Default for CompactorConfig { max_concurrent_pipelines: Self::default_max_concurrent_pipelines(), max_concurrent_split_uploads: Self::default_max_concurrent_split_uploads(), max_merge_write_throughput: None, - max_local_retries: Self::default_max_local_retries(), split_store_max_num_bytes: Self::default_split_store_max_num_bytes(), split_store_max_num_splits: Self::default_split_store_max_num_splits(), } diff --git a/quickwit/quickwit-integration-tests/src/test_utils/cluster_sandbox.rs b/quickwit/quickwit-integration-tests/src/test_utils/cluster_sandbox.rs index 7b425061134..9c2fe3effde 100644 --- a/quickwit/quickwit-integration-tests/src/test_utils/cluster_sandbox.rs +++ b/quickwit/quickwit-integration-tests/src/test_utils/cluster_sandbox.rs @@ -683,7 +683,11 @@ impl ClusterSandbox { #[tokio::test] async fn test_sandbox_happy_path() { let sandbox = ClusterSandboxBuilder::default() - .add_node([QuickwitService::ControlPlane, QuickwitService::Metastore]) + .add_node([ + QuickwitService::ControlPlane, + QuickwitService::Metastore, + QuickwitService::Janitor, + ]) .add_node([QuickwitService::Searcher]) .add_node([QuickwitService::Indexer]) .build_and_start() @@ -696,7 +700,11 @@ async fn test_sandbox_happy_path() { #[tokio::test] async fn test_sandbox_add_node_dynamically() { let mut sandbox = ClusterSandboxBuilder::default() - .add_node([QuickwitService::ControlPlane, QuickwitService::Metastore]) + .add_node([ + QuickwitService::ControlPlane, + QuickwitService::Metastore, + QuickwitService::Janitor, + ]) .add_node([QuickwitService::Searcher]) .build_and_start() .await; diff --git a/quickwit/quickwit-proto/build.rs b/quickwit/quickwit-proto/build.rs index a83e8723bd2..7671fd9ea19 100644 --- a/quickwit/quickwit-proto/build.rs +++ b/quickwit/quickwit-proto/build.rs @@ -38,10 +38,12 @@ fn main() -> Result<(), Box> { // Compaction service. let mut prost_config = prost_build::Config::default(); prost_config.file_descriptor_set_path("src/codegen/quickwit/compaction_descriptor.bin"); + prost_config.extern_path(".quickwit.common.IndexUid", "crate::types::IndexUid"); Codegen::builder() .with_prost_config(prost_config) .with_protos(&["protos/quickwit/compaction.proto"]) + .with_includes(&["protos"]) .with_output_dir("src/codegen/quickwit") .with_result_type_path("crate::compaction::CompactionResult") .with_error_type_path("crate::compaction::CompactionError") diff --git a/quickwit/quickwit-proto/protos/quickwit/compaction.proto b/quickwit/quickwit-proto/protos/quickwit/compaction.proto index 82979c3b4ee..f071cc3bc01 100644 --- a/quickwit/quickwit-proto/protos/quickwit/compaction.proto +++ b/quickwit/quickwit-proto/protos/quickwit/compaction.proto @@ -16,9 +16,39 @@ syntax = "proto3"; package quickwit.compaction; -service CompactionService { +import "quickwit/common.proto"; + +service CompactionPlannerService { rpc Ping(PingRequest) returns (PingResponse); + rpc ReportStatus(ReportStatusRequest) returns (ReportStatusResponse); } message PingRequest {} -message PingResponse {} \ No newline at end of file +message PingResponse {} + +message ReportStatusRequest { + string node_id = 1; + uint32 available_slots = 2; + repeated CompactionInProgress in_progress = 3; + repeated CompactionSuccess successes = 4; + repeated CompactionFailure failures = 5; +} + +message CompactionInProgress { + string task_id = 1; + quickwit.common.IndexUid index_uid = 2; + string source_id = 3; + repeated string split_ids = 4; +} + +message CompactionSuccess { + string task_id = 1; + string merged_split_id = 2; +} + +message CompactionFailure { + string task_id = 1; + string error_message = 2; +} + +message ReportStatusResponse {} \ No newline at end of file diff --git a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.compaction.rs b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.compaction.rs index eec6be161ca..df33d068f9a 100644 --- a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.compaction.rs +++ b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.compaction.rs @@ -5,6 +5,51 @@ pub struct PingRequest {} #[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] pub struct PingResponse {} +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ReportStatusRequest { + #[prost(string, tag = "1")] + pub node_id: ::prost::alloc::string::String, + #[prost(uint32, tag = "2")] + pub available_slots: u32, + #[prost(message, repeated, tag = "3")] + pub in_progress: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag = "4")] + pub successes: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag = "5")] + pub failures: ::prost::alloc::vec::Vec, +} +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct CompactionInProgress { + #[prost(string, tag = "1")] + pub task_id: ::prost::alloc::string::String, + #[prost(message, optional, tag = "2")] + pub index_uid: ::core::option::Option, + #[prost(string, tag = "3")] + pub source_id: ::prost::alloc::string::String, + #[prost(string, repeated, tag = "4")] + pub split_ids: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, +} +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct CompactionSuccess { + #[prost(string, tag = "1")] + pub task_id: ::prost::alloc::string::String, + #[prost(string, tag = "2")] + pub merged_split_id: ::prost::alloc::string::String, +} +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct CompactionFailure { + #[prost(string, tag = "1")] + pub task_id: ::prost::alloc::string::String, + #[prost(string, tag = "2")] + pub error_message: ::prost::alloc::string::String, +} +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] +#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] +pub struct ReportStatusResponse {} /// BEGIN quickwit-codegen #[allow(unused_imports)] use std::str::FromStr; @@ -15,43 +60,54 @@ impl RpcName for PingRequest { "ping" } } +impl RpcName for ReportStatusRequest { + fn rpc_name() -> &'static str { + "report_status" + } +} #[cfg_attr(any(test, feature = "testsuite"), mockall::automock)] #[async_trait::async_trait] -pub trait CompactionService: std::fmt::Debug + Send + Sync + 'static { +pub trait CompactionPlannerService: std::fmt::Debug + Send + Sync + 'static { async fn ping( &self, request: PingRequest, ) -> crate::compaction::CompactionResult; + async fn report_status( + &self, + request: ReportStatusRequest, + ) -> crate::compaction::CompactionResult; } #[derive(Debug, Clone)] -pub struct CompactionServiceClient { - inner: InnerCompactionServiceClient, +pub struct CompactionPlannerServiceClient { + inner: InnerCompactionPlannerServiceClient, } #[derive(Debug, Clone)] -struct InnerCompactionServiceClient(std::sync::Arc); -impl CompactionServiceClient { +struct InnerCompactionPlannerServiceClient(std::sync::Arc); +impl CompactionPlannerServiceClient { pub fn new(instance: T) -> Self where - T: CompactionService, + T: CompactionPlannerService, { #[cfg(any(test, feature = "testsuite"))] assert!( std::any::TypeId::of:: < T > () != std::any::TypeId::of:: < - MockCompactionService > (), - "`MockCompactionService` must be wrapped in a `MockCompactionServiceWrapper`: use `CompactionServiceClient::from_mock(mock)` to instantiate the client" + MockCompactionPlannerService > (), + "`MockCompactionPlannerService` must be wrapped in a `MockCompactionPlannerServiceWrapper`: use `CompactionPlannerServiceClient::from_mock(mock)` to instantiate the client" ); Self { - inner: InnerCompactionServiceClient(std::sync::Arc::new(instance)), + inner: InnerCompactionPlannerServiceClient(std::sync::Arc::new(instance)), } } pub fn as_grpc_service( &self, max_message_size: bytesize::ByteSize, - ) -> compaction_service_grpc_server::CompactionServiceGrpcServer< - CompactionServiceGrpcServerAdapter, + ) -> compaction_planner_service_grpc_server::CompactionPlannerServiceGrpcServer< + CompactionPlannerServiceGrpcServerAdapter, > { - let adapter = CompactionServiceGrpcServerAdapter::new(self.clone()); - compaction_service_grpc_server::CompactionServiceGrpcServer::new(adapter) + let adapter = CompactionPlannerServiceGrpcServerAdapter::new(self.clone()); + compaction_planner_service_grpc_server::CompactionPlannerServiceGrpcServer::new( + adapter, + ) .accept_compressed(tonic::codec::CompressionEncoding::Gzip) .accept_compressed(tonic::codec::CompressionEncoding::Zstd) .send_compressed(tonic::codec::CompressionEncoding::Gzip) @@ -68,7 +124,7 @@ impl CompactionServiceClient { let (_, connection_keys_watcher) = tokio::sync::watch::channel( std::collections::HashSet::from_iter([addr]), ); - let mut client = compaction_service_grpc_client::CompactionServiceGrpcClient::new( + let mut client = compaction_planner_service_grpc_client::CompactionPlannerServiceGrpcClient::new( channel, ) .max_decoding_message_size(max_message_size.0 as usize) @@ -78,7 +134,7 @@ impl CompactionServiceClient { .accept_compressed(compression_encoding) .send_compressed(compression_encoding); } - let adapter = CompactionServiceGrpcClientAdapter::new( + let adapter = CompactionPlannerServiceGrpcClientAdapter::new( client, connection_keys_watcher, ); @@ -88,9 +144,9 @@ impl CompactionServiceClient { balance_channel: quickwit_common::tower::BalanceChannel, max_message_size: bytesize::ByteSize, compression_encoding_opt: Option, - ) -> CompactionServiceClient { + ) -> CompactionPlannerServiceClient { let connection_keys_watcher = balance_channel.connection_keys_watcher(); - let mut client = compaction_service_grpc_client::CompactionServiceGrpcClient::new( + let mut client = compaction_planner_service_grpc_client::CompactionPlannerServiceGrpcClient::new( balance_channel, ) .max_decoding_message_size(max_message_size.0 as usize) @@ -100,7 +156,7 @@ impl CompactionServiceClient { .accept_compressed(compression_encoding) .send_compressed(compression_encoding); } - let adapter = CompactionServiceGrpcClientAdapter::new( + let adapter = CompactionPlannerServiceGrpcClientAdapter::new( client, connection_keys_watcher, ); @@ -109,55 +165,69 @@ impl CompactionServiceClient { pub fn from_mailbox(mailbox: quickwit_actors::Mailbox) -> Self where A: quickwit_actors::Actor + std::fmt::Debug + Send + 'static, - CompactionServiceMailbox: CompactionService, + CompactionPlannerServiceMailbox: CompactionPlannerService, { - CompactionServiceClient::new(CompactionServiceMailbox::new(mailbox)) + CompactionPlannerServiceClient::new( + CompactionPlannerServiceMailbox::new(mailbox), + ) } - pub fn tower() -> CompactionServiceTowerLayerStack { - CompactionServiceTowerLayerStack::default() + pub fn tower() -> CompactionPlannerServiceTowerLayerStack { + CompactionPlannerServiceTowerLayerStack::default() } #[cfg(any(test, feature = "testsuite"))] - pub fn from_mock(mock: MockCompactionService) -> Self { - let mock_wrapper = mock_compaction_service::MockCompactionServiceWrapper { + pub fn from_mock(mock: MockCompactionPlannerService) -> Self { + let mock_wrapper = mock_compaction_planner_service::MockCompactionPlannerServiceWrapper { inner: tokio::sync::Mutex::new(mock), }; Self::new(mock_wrapper) } #[cfg(any(test, feature = "testsuite"))] pub fn mocked() -> Self { - Self::from_mock(MockCompactionService::new()) + Self::from_mock(MockCompactionPlannerService::new()) } } #[async_trait::async_trait] -impl CompactionService for CompactionServiceClient { +impl CompactionPlannerService for CompactionPlannerServiceClient { async fn ping( &self, request: PingRequest, ) -> crate::compaction::CompactionResult { self.inner.0.ping(request).await } + async fn report_status( + &self, + request: ReportStatusRequest, + ) -> crate::compaction::CompactionResult { + self.inner.0.report_status(request).await + } } #[cfg(any(test, feature = "testsuite"))] -pub mod mock_compaction_service { +pub mod mock_compaction_planner_service { use super::*; #[derive(Debug)] - pub struct MockCompactionServiceWrapper { - pub(super) inner: tokio::sync::Mutex, + pub struct MockCompactionPlannerServiceWrapper { + pub(super) inner: tokio::sync::Mutex, } #[async_trait::async_trait] - impl CompactionService for MockCompactionServiceWrapper { + impl CompactionPlannerService for MockCompactionPlannerServiceWrapper { async fn ping( &self, request: super::PingRequest, ) -> crate::compaction::CompactionResult { self.inner.lock().await.ping(request).await } + async fn report_status( + &self, + request: super::ReportStatusRequest, + ) -> crate::compaction::CompactionResult { + self.inner.lock().await.report_status(request).await + } } } pub type BoxFuture = std::pin::Pin< Box> + Send + 'static>, >; -impl tower::Service for InnerCompactionServiceClient { +impl tower::Service for InnerCompactionPlannerServiceClient { type Response = PingResponse; type Error = crate::compaction::CompactionError; type Future = BoxFuture; @@ -173,25 +243,52 @@ impl tower::Service for InnerCompactionServiceClient { Box::pin(fut) } } +impl tower::Service for InnerCompactionPlannerServiceClient { + type Response = ReportStatusResponse; + type Error = crate::compaction::CompactionError; + type Future = BoxFuture; + fn poll_ready( + &mut self, + _cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + std::task::Poll::Ready(Ok(())) + } + fn call(&mut self, request: ReportStatusRequest) -> Self::Future { + let svc = self.clone(); + let fut = async move { svc.0.report_status(request).await }; + Box::pin(fut) + } +} /// A tower service stack is a set of tower services. #[derive(Debug)] -struct CompactionServiceTowerServiceStack { +struct CompactionPlannerServiceTowerServiceStack { #[allow(dead_code)] - inner: InnerCompactionServiceClient, + inner: InnerCompactionPlannerServiceClient, ping_svc: quickwit_common::tower::BoxService< PingRequest, PingResponse, crate::compaction::CompactionError, >, + report_status_svc: quickwit_common::tower::BoxService< + ReportStatusRequest, + ReportStatusResponse, + crate::compaction::CompactionError, + >, } #[async_trait::async_trait] -impl CompactionService for CompactionServiceTowerServiceStack { +impl CompactionPlannerService for CompactionPlannerServiceTowerServiceStack { async fn ping( &self, request: PingRequest, ) -> crate::compaction::CompactionResult { self.ping_svc.clone().ready().await?.call(request).await } + async fn report_status( + &self, + request: ReportStatusRequest, + ) -> crate::compaction::CompactionResult { + self.report_status_svc.clone().ready().await?.call(request).await + } } type PingLayer = quickwit_common::tower::BoxLayer< quickwit_common::tower::BoxService< @@ -203,11 +300,22 @@ type PingLayer = quickwit_common::tower::BoxLayer< PingResponse, crate::compaction::CompactionError, >; +type ReportStatusLayer = quickwit_common::tower::BoxLayer< + quickwit_common::tower::BoxService< + ReportStatusRequest, + ReportStatusResponse, + crate::compaction::CompactionError, + >, + ReportStatusRequest, + ReportStatusResponse, + crate::compaction::CompactionError, +>; #[derive(Debug, Default)] -pub struct CompactionServiceTowerLayerStack { +pub struct CompactionPlannerServiceTowerLayerStack { ping_layers: Vec, + report_status_layers: Vec, } -impl CompactionServiceTowerLayerStack { +impl CompactionPlannerServiceTowerLayerStack { pub fn stack_layer(mut self, layer: L) -> Self where L: tower::Layer< @@ -235,8 +343,35 @@ impl CompactionServiceTowerLayerStack { crate::compaction::CompactionError, >, >>::Service as tower::Service>::Future: Send + 'static, + L: tower::Layer< + quickwit_common::tower::BoxService< + ReportStatusRequest, + ReportStatusResponse, + crate::compaction::CompactionError, + >, + > + Clone + Send + Sync + 'static, + , + >>::Service: tower::Service< + ReportStatusRequest, + Response = ReportStatusResponse, + Error = crate::compaction::CompactionError, + > + Clone + Send + Sync + 'static, + <, + >>::Service as tower::Service>::Future: Send + 'static, { self.ping_layers.push(quickwit_common::tower::BoxLayer::new(layer.clone())); + self.report_status_layers + .push(quickwit_common::tower::BoxLayer::new(layer.clone())); self } pub fn stack_ping_layer(mut self, layer: L) -> Self @@ -258,11 +393,32 @@ impl CompactionServiceTowerLayerStack { self.ping_layers.push(quickwit_common::tower::BoxLayer::new(layer)); self } - pub fn build(self, instance: T) -> CompactionServiceClient + pub fn stack_report_status_layer(mut self, layer: L) -> Self + where + L: tower::Layer< + quickwit_common::tower::BoxService< + ReportStatusRequest, + ReportStatusResponse, + crate::compaction::CompactionError, + >, + > + Send + Sync + 'static, + L::Service: tower::Service< + ReportStatusRequest, + Response = ReportStatusResponse, + Error = crate::compaction::CompactionError, + > + Clone + Send + Sync + 'static, + >::Future: Send + 'static, + { + self.report_status_layers.push(quickwit_common::tower::BoxLayer::new(layer)); + self + } + pub fn build(self, instance: T) -> CompactionPlannerServiceClient where - T: CompactionService, + T: CompactionPlannerService, { - let inner_client = InnerCompactionServiceClient(std::sync::Arc::new(instance)); + let inner_client = InnerCompactionPlannerServiceClient( + std::sync::Arc::new(instance), + ); self.build_from_inner_client(inner_client) } pub fn build_from_channel( @@ -271,8 +427,8 @@ impl CompactionServiceTowerLayerStack { channel: tonic::transport::Channel, max_message_size: bytesize::ByteSize, compression_encoding_opt: Option, - ) -> CompactionServiceClient { - let client = CompactionServiceClient::from_channel( + ) -> CompactionPlannerServiceClient { + let client = CompactionPlannerServiceClient::from_channel( addr, channel, max_message_size, @@ -286,8 +442,8 @@ impl CompactionServiceTowerLayerStack { balance_channel: quickwit_common::tower::BalanceChannel, max_message_size: bytesize::ByteSize, compression_encoding_opt: Option, - ) -> CompactionServiceClient { - let client = CompactionServiceClient::from_balance_channel( + ) -> CompactionPlannerServiceClient { + let client = CompactionPlannerServiceClient::from_balance_channel( balance_channel, max_message_size, compression_encoding_opt, @@ -298,29 +454,29 @@ impl CompactionServiceTowerLayerStack { pub fn build_from_mailbox( self, mailbox: quickwit_actors::Mailbox, - ) -> CompactionServiceClient + ) -> CompactionPlannerServiceClient where A: quickwit_actors::Actor + std::fmt::Debug + Send + 'static, - CompactionServiceMailbox: CompactionService, + CompactionPlannerServiceMailbox: CompactionPlannerService, { - let inner_client = InnerCompactionServiceClient( - std::sync::Arc::new(CompactionServiceMailbox::new(mailbox)), + let inner_client = InnerCompactionPlannerServiceClient( + std::sync::Arc::new(CompactionPlannerServiceMailbox::new(mailbox)), ); self.build_from_inner_client(inner_client) } #[cfg(any(test, feature = "testsuite"))] pub fn build_from_mock( self, - mock: MockCompactionService, - ) -> CompactionServiceClient { - let client = CompactionServiceClient::from_mock(mock); + mock: MockCompactionPlannerService, + ) -> CompactionPlannerServiceClient { + let client = CompactionPlannerServiceClient::from_mock(mock); let inner_client = client.inner; self.build_from_inner_client(inner_client) } fn build_from_inner_client( self, - inner_client: InnerCompactionServiceClient, - ) -> CompactionServiceClient { + inner_client: InnerCompactionPlannerServiceClient, + ) -> CompactionPlannerServiceClient { let ping_svc = self .ping_layers .into_iter() @@ -329,11 +485,20 @@ impl CompactionServiceTowerLayerStack { quickwit_common::tower::BoxService::new(inner_client.clone()), |svc, layer| layer.layer(svc), ); - let tower_svc_stack = CompactionServiceTowerServiceStack { + let report_status_svc = self + .report_status_layers + .into_iter() + .rev() + .fold( + quickwit_common::tower::BoxService::new(inner_client.clone()), + |svc, layer| layer.layer(svc), + ); + let tower_svc_stack = CompactionPlannerServiceTowerServiceStack { inner: inner_client, ping_svc, + report_status_svc, }; - CompactionServiceClient::new(tower_svc_stack) + CompactionPlannerServiceClient::new(tower_svc_stack) } } #[derive(Debug, Clone)] @@ -351,10 +516,10 @@ where } } #[derive(Debug)] -pub struct CompactionServiceMailbox { +pub struct CompactionPlannerServiceMailbox { inner: MailboxAdapter, } -impl CompactionServiceMailbox { +impl CompactionPlannerServiceMailbox { pub fn new(instance: quickwit_actors::Mailbox) -> Self { let inner = MailboxAdapter { inner: instance, @@ -363,7 +528,7 @@ impl CompactionServiceMailbox { Self { inner } } } -impl Clone for CompactionServiceMailbox { +impl Clone for CompactionPlannerServiceMailbox { fn clone(&self) -> Self { let inner = MailboxAdapter { inner: self.inner.clone(), @@ -372,7 +537,7 @@ impl Clone for CompactionServiceMailbox { Self { inner } } } -impl tower::Service for CompactionServiceMailbox +impl tower::Service for CompactionPlannerServiceMailbox where A: quickwit_actors::Actor + quickwit_actors::DeferableReplyHandler> + Send @@ -403,17 +568,23 @@ where } } #[async_trait::async_trait] -impl CompactionService for CompactionServiceMailbox +impl CompactionPlannerService for CompactionPlannerServiceMailbox where A: quickwit_actors::Actor + std::fmt::Debug, - CompactionServiceMailbox< + CompactionPlannerServiceMailbox< A, >: tower::Service< - PingRequest, - Response = PingResponse, - Error = crate::compaction::CompactionError, - Future = BoxFuture, - >, + PingRequest, + Response = PingResponse, + Error = crate::compaction::CompactionError, + Future = BoxFuture, + > + + tower::Service< + ReportStatusRequest, + Response = ReportStatusResponse, + Error = crate::compaction::CompactionError, + Future = BoxFuture, + >, { async fn ping( &self, @@ -421,16 +592,22 @@ where ) -> crate::compaction::CompactionResult { self.clone().call(request).await } + async fn report_status( + &self, + request: ReportStatusRequest, + ) -> crate::compaction::CompactionResult { + self.clone().call(request).await + } } #[derive(Debug, Clone)] -pub struct CompactionServiceGrpcClientAdapter { +pub struct CompactionPlannerServiceGrpcClientAdapter { inner: T, #[allow(dead_code)] connection_addrs_rx: tokio::sync::watch::Receiver< std::collections::HashSet, >, } -impl CompactionServiceGrpcClientAdapter { +impl CompactionPlannerServiceGrpcClientAdapter { pub fn new( instance: T, connection_addrs_rx: tokio::sync::watch::Receiver< @@ -444,9 +621,9 @@ impl CompactionServiceGrpcClientAdapter { } } #[async_trait::async_trait] -impl CompactionService -for CompactionServiceGrpcClientAdapter< - compaction_service_grpc_client::CompactionServiceGrpcClient, +impl CompactionPlannerService +for CompactionPlannerServiceGrpcClientAdapter< + compaction_planner_service_grpc_client::CompactionPlannerServiceGrpcClient, > where T: tonic::client::GrpcService + std::fmt::Debug + Clone + Send @@ -470,24 +647,38 @@ where PingRequest::rpc_name(), )) } + async fn report_status( + &self, + request: ReportStatusRequest, + ) -> crate::compaction::CompactionResult { + self.inner + .clone() + .report_status(request) + .await + .map(|response| response.into_inner()) + .map_err(|status| crate::error::grpc_status_to_service_error( + status, + ReportStatusRequest::rpc_name(), + )) + } } #[derive(Debug)] -pub struct CompactionServiceGrpcServerAdapter { - inner: InnerCompactionServiceClient, +pub struct CompactionPlannerServiceGrpcServerAdapter { + inner: InnerCompactionPlannerServiceClient, } -impl CompactionServiceGrpcServerAdapter { +impl CompactionPlannerServiceGrpcServerAdapter { pub fn new(instance: T) -> Self where - T: CompactionService, + T: CompactionPlannerService, { Self { - inner: InnerCompactionServiceClient(std::sync::Arc::new(instance)), + inner: InnerCompactionPlannerServiceClient(std::sync::Arc::new(instance)), } } } #[async_trait::async_trait] -impl compaction_service_grpc_server::CompactionServiceGrpc -for CompactionServiceGrpcServerAdapter { +impl compaction_planner_service_grpc_server::CompactionPlannerServiceGrpc +for CompactionPlannerServiceGrpcServerAdapter { async fn ping( &self, request: tonic::Request, @@ -499,9 +690,20 @@ for CompactionServiceGrpcServerAdapter { .map(tonic::Response::new) .map_err(crate::error::grpc_error_to_grpc_status) } + async fn report_status( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + self.inner + .0 + .report_status(request.into_inner()) + .await + .map(tonic::Response::new) + .map_err(crate::error::grpc_error_to_grpc_status) + } } /// Generated client implementations. -pub mod compaction_service_grpc_client { +pub mod compaction_planner_service_grpc_client { #![allow( unused_variables, dead_code, @@ -512,10 +714,10 @@ pub mod compaction_service_grpc_client { use tonic::codegen::*; use tonic::codegen::http::Uri; #[derive(Debug, Clone)] - pub struct CompactionServiceGrpcClient { + pub struct CompactionPlannerServiceGrpcClient { inner: tonic::client::Grpc, } - impl CompactionServiceGrpcClient { + impl CompactionPlannerServiceGrpcClient { /// Attempt to create a new client by connecting to a given endpoint. pub async fn connect(dst: D) -> Result where @@ -526,7 +728,7 @@ pub mod compaction_service_grpc_client { Ok(Self::new(conn)) } } - impl CompactionServiceGrpcClient + impl CompactionPlannerServiceGrpcClient where T: tonic::client::GrpcService, T::Error: Into, @@ -544,7 +746,7 @@ pub mod compaction_service_grpc_client { pub fn with_interceptor( inner: T, interceptor: F, - ) -> CompactionServiceGrpcClient> + ) -> CompactionPlannerServiceGrpcClient> where F: tonic::service::Interceptor, T::ResponseBody: Default, @@ -558,7 +760,9 @@ pub mod compaction_service_grpc_client { http::Request, >>::Error: Into + std::marker::Send + std::marker::Sync, { - CompactionServiceGrpcClient::new(InterceptedService::new(inner, interceptor)) + CompactionPlannerServiceGrpcClient::new( + InterceptedService::new(inner, interceptor), + ) } /// Compress requests with the given encoding. /// @@ -605,19 +809,51 @@ pub mod compaction_service_grpc_client { })?; let codec = tonic_prost::ProstCodec::default(); let path = http::uri::PathAndQuery::from_static( - "/quickwit.compaction.CompactionService/Ping", + "/quickwit.compaction.CompactionPlannerService/Ping", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert( + GrpcMethod::new( + "quickwit.compaction.CompactionPlannerService", + "Ping", + ), + ); + self.inner.unary(req, path, codec).await + } + pub async fn report_status( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::unknown( + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic_prost::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/quickwit.compaction.CompactionPlannerService/ReportStatus", ); let mut req = request.into_request(); req.extensions_mut() .insert( - GrpcMethod::new("quickwit.compaction.CompactionService", "Ping"), + GrpcMethod::new( + "quickwit.compaction.CompactionPlannerService", + "ReportStatus", + ), ); self.inner.unary(req, path, codec).await } } } /// Generated server implementations. -pub mod compaction_service_grpc_server { +pub mod compaction_planner_service_grpc_server { #![allow( unused_variables, dead_code, @@ -626,23 +862,30 @@ pub mod compaction_service_grpc_server { clippy::let_unit_value, )] use tonic::codegen::*; - /// Generated trait containing gRPC methods that should be implemented for use with CompactionServiceGrpcServer. + /// Generated trait containing gRPC methods that should be implemented for use with CompactionPlannerServiceGrpcServer. #[async_trait] - pub trait CompactionServiceGrpc: std::marker::Send + std::marker::Sync + 'static { + pub trait CompactionPlannerServiceGrpc: std::marker::Send + std::marker::Sync + 'static { async fn ping( &self, request: tonic::Request, ) -> std::result::Result, tonic::Status>; + async fn report_status( + &self, + request: tonic::Request, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + >; } #[derive(Debug)] - pub struct CompactionServiceGrpcServer { + pub struct CompactionPlannerServiceGrpcServer { inner: Arc, accept_compression_encodings: EnabledCompressionEncodings, send_compression_encodings: EnabledCompressionEncodings, max_decoding_message_size: Option, max_encoding_message_size: Option, } - impl CompactionServiceGrpcServer { + impl CompactionPlannerServiceGrpcServer { pub fn new(inner: T) -> Self { Self::from_arc(Arc::new(inner)) } @@ -694,9 +937,9 @@ pub mod compaction_service_grpc_server { } } impl tonic::codegen::Service> - for CompactionServiceGrpcServer + for CompactionPlannerServiceGrpcServer where - T: CompactionServiceGrpc, + T: CompactionPlannerServiceGrpc, B: Body + std::marker::Send + 'static, B::Error: Into + std::marker::Send + 'static, { @@ -711,11 +954,11 @@ pub mod compaction_service_grpc_server { } fn call(&mut self, req: http::Request) -> Self::Future { match req.uri().path() { - "/quickwit.compaction.CompactionService/Ping" => { + "/quickwit.compaction.CompactionPlannerService/Ping" => { #[allow(non_camel_case_types)] - struct PingSvc(pub Arc); + struct PingSvc(pub Arc); impl< - T: CompactionServiceGrpc, + T: CompactionPlannerServiceGrpc, > tonic::server::UnaryService for PingSvc { type Response = super::PingResponse; type Future = BoxFuture< @@ -728,7 +971,8 @@ pub mod compaction_service_grpc_server { ) -> Self::Future { let inner = Arc::clone(&self.0); let fut = async move { - ::ping(&inner, request).await + ::ping(&inner, request) + .await }; Box::pin(fut) } @@ -755,6 +999,55 @@ pub mod compaction_service_grpc_server { }; Box::pin(fut) } + "/quickwit.compaction.CompactionPlannerService/ReportStatus" => { + #[allow(non_camel_case_types)] + struct ReportStatusSvc(pub Arc); + impl< + T: CompactionPlannerServiceGrpc, + > tonic::server::UnaryService + for ReportStatusSvc { + type Response = super::ReportStatusResponse; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::report_status( + &inner, + request, + ) + .await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = ReportStatusSvc(inner); + let codec = tonic_prost::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } _ => { Box::pin(async move { let mut response = http::Response::new( @@ -777,7 +1070,7 @@ pub mod compaction_service_grpc_server { } } } - impl Clone for CompactionServiceGrpcServer { + impl Clone for CompactionPlannerServiceGrpcServer { fn clone(&self) -> Self { let inner = self.inner.clone(); Self { @@ -790,8 +1083,8 @@ pub mod compaction_service_grpc_server { } } /// Generated gRPC service name - pub const SERVICE_NAME: &str = "quickwit.compaction.CompactionService"; - impl tonic::server::NamedService for CompactionServiceGrpcServer { + pub const SERVICE_NAME: &str = "quickwit.compaction.CompactionPlannerService"; + impl tonic::server::NamedService for CompactionPlannerServiceGrpcServer { const NAME: &'static str = SERVICE_NAME; } } diff --git a/quickwit/quickwit-proto/src/indexing/mod.rs b/quickwit/quickwit-proto/src/indexing/mod.rs index bf0d147b685..281bc73e7df 100644 --- a/quickwit/quickwit-proto/src/indexing/mod.rs +++ b/quickwit/quickwit-proto/src/indexing/mod.rs @@ -138,6 +138,7 @@ impl Display for IndexingPipelineId { /// Uniquely identifies a merge pipeline. There exists at most one merge pipeline per /// `(index_uid, source_id)` running on indexer at any given time fed by one or more indexing /// pipelines. +/// TODO: Rework/remove this as part of splitting up merges. #[derive(Clone, Debug, Hash, Eq, PartialEq)] pub struct MergePipelineId { pub node_id: NodeId, diff --git a/quickwit/quickwit-serve/src/lib.rs b/quickwit/quickwit-serve/src/lib.rs index 8c974a4c990..3eda56d5e5e 100644 --- a/quickwit/quickwit-serve/src/lib.rs +++ b/quickwit/quickwit-serve/src/lib.rs @@ -72,7 +72,7 @@ use quickwit_common::tower::{ }; use quickwit_common::uri::Uri; use quickwit_common::{get_bool_from_env, spawn_named_task}; -use quickwit_compaction::planner::StubCompactionService; +use quickwit_compaction::planner::StubCompactionPlannerService; use quickwit_compaction::{CompactorSupervisor, start_compactor_service}; use quickwit_config::service::QuickwitService; use quickwit_config::{ClusterConfig, IngestApiConfig, NodeConfig}; @@ -95,7 +95,7 @@ use quickwit_metastore::{ ControlPlaneMetastore, ListIndexesMetadataResponseExt, MetastoreResolver, }; use quickwit_opentelemetry::otlp::{OtlpGrpcLogsService, OtlpGrpcTracesService}; -use quickwit_proto::compaction::CompactionServiceClient; +use quickwit_proto::compaction::CompactionPlannerServiceClient; use quickwit_proto::control_plane::ControlPlaneServiceClient; use quickwit_proto::indexing::{IndexingServiceClient, ShardPositionsUpdate}; use quickwit_proto::ingest::ingester::{ @@ -203,7 +203,7 @@ struct QuickwitServices { pub ingest_router_service: IngestRouterServiceClient, ingester_opt: Option, - pub compaction_service_client_opt: Option, + pub compaction_service_client_opt: Option, pub _compactor_supervisor_opt: Option>, pub janitor_service_opt: Option>, pub jaeger_service_opt: Option, @@ -271,28 +271,22 @@ async fn balance_channel_for_service( BalanceChannel::from_stream(service_change_stream) } -/// Builds a `CompactionServiceClient` if the compaction service is available. +/// Builds a `CompactionPlannerServiceClient` if the node runs the compactor. /// -/// On janitor nodes with `QW_ENABLE_COMPACTION_SERVICE=true`, wraps a local stub. -/// On non-janitor nodes with the flag set, waits up to 10s for a remote janitor -/// exposing the gRPC endpoint and logs an error if none is found. -async fn get_compaction_service_client_if_needed( +/// On janitor+compactor nodes, wraps a local `StubCompactionPlannerService`. +/// On compactor-only nodes, connects to a remote janitor via gRPC. +async fn get_compaction_planner_client_if_needed( node_config: &NodeConfig, cluster: &Cluster, -) -> anyhow::Result, anyhow::Error> { - if !node_config.indexer_config.enable_standalone_compactors { +) -> anyhow::Result, anyhow::Error> { + if !node_config.is_service_enabled(QuickwitService::Compactor) { return Ok(None); } - // Only janitor nodes (which host the planner) and indexer nodes (which need - // to know whether to spawn local merge pipelines) care about this service. - if !node_config.is_service_enabled(QuickwitService::Indexer) { - return Ok(None); - } - if node_config.is_service_enabled(QuickwitService::Janitor) - && node_config.is_service_enabled(QuickwitService::Indexer) - { - info!("compaction service enabled on this node"); - return Ok(Some(CompactionServiceClient::new(StubCompactionService))); + if node_config.is_service_enabled(QuickwitService::Janitor) { + info!("compaction planner service enabled on this node"); + return Ok(Some(CompactionPlannerServiceClient::new( + StubCompactionPlannerService, + ))); } let balance_channel = balance_channel_for_service(cluster, QuickwitService::Janitor).await; let found = balance_channel @@ -301,10 +295,10 @@ async fn get_compaction_service_client_if_needed( }) .await; if !found { - bail!("compaction service is enabled but no janitor node was found in the cluster") + bail!("compactor is enabled but no janitor node was found in the cluster") } - info!("remote compaction service detected on janitor node"); - Ok(Some(CompactionServiceClient::from_balance_channel( + info!("remote compaction planner detected on janitor node"); + Ok(Some(CompactionPlannerServiceClient::from_balance_channel( balance_channel, node_config.grpc_config.max_message_size, None, @@ -591,7 +585,7 @@ pub async fn serve_quickwit( // Set up the "control plane proxy" for the metastore. let metastore_through_control_plane = MetastoreServiceClient::new(ControlPlaneMetastore::new( control_plane_client.clone(), - metastore_client, + metastore_client.clone(), )); // Setup ingest service v1. @@ -600,12 +594,12 @@ pub async fn serve_quickwit( .context("failed to start ingest v1 service")?; let compaction_service_client_opt = - get_compaction_service_client_if_needed(&node_config, &cluster) + get_compaction_planner_client_if_needed(&node_config, &cluster) .await .context("failed to initialize compaction service client")?; let indexing_service_opt = if node_config.is_service_enabled(QuickwitService::Indexer) { - let merge_scheduler_mailbox_opt = if compaction_service_client_opt.is_none() { + let merge_scheduler_mailbox_opt = if !node_config.indexer_config.enable_standalone_compactors { Some(spawn_merge_scheduler_service(&universe, &node_config)) } else { None @@ -791,14 +785,20 @@ pub async fn serve_quickwit( let compaction_root_directory = quickwit_common::temp_dir::Builder::default() .tempdir_in(&compaction_dir) .context("failed to create compaction temp directory")?; + // TODO: Real split store let split_store = IndexingSplitStore::create_without_local_store_for_test(Arc::new( RamStorage::default(), )); + let compaction_client = compaction_service_client_opt + .clone() + .expect("compactor service enabled but no compaction client available"); let compactor_mailbox = start_compactor_service( &universe, + cluster.self_node_id().into(), + compaction_client, &node_config.compactor_config, split_store, - metastore_through_control_plane.clone(), + metastore_client.clone(), storage_resolver.clone(), event_broker.clone(), compaction_root_directory, @@ -1917,18 +1917,22 @@ mod tests { .await .unwrap(); - // Without standalone compactors, no compaction service. + // Without compactor service enabled, no planner client. let mut node_config = NodeConfig::for_test(); node_config.enabled_services = HashSet::from([QuickwitService::Janitor, QuickwitService::Indexer]); - let result = get_compaction_service_client_if_needed(&node_config, &cluster) + let result = get_compaction_planner_client_if_needed(&node_config, &cluster) .await .unwrap(); assert!(result.is_none()); - // With standalone compactors enabled, compaction service client is returned. - node_config.indexer_config.enable_standalone_compactors = true; - let result = get_compaction_service_client_if_needed(&node_config, &cluster) + // With compactor + janitor enabled, planner client is returned (local stub). + node_config.enabled_services = HashSet::from([ + QuickwitService::Janitor, + QuickwitService::Indexer, + QuickwitService::Compactor, + ]); + let result = get_compaction_planner_client_if_needed(&node_config, &cluster) .await .unwrap(); assert!(result.is_some()); @@ -1942,9 +1946,9 @@ mod tests { .unwrap(); let mut node_config = NodeConfig::for_test(); - node_config.enabled_services = HashSet::from([QuickwitService::Indexer]); - node_config.indexer_config.enable_standalone_compactors = true; - let result = get_compaction_service_client_if_needed(&node_config, &cluster).await; + node_config.enabled_services = + HashSet::from([QuickwitService::Indexer, QuickwitService::Compactor]); + let result = get_compaction_planner_client_if_needed(&node_config, &cluster).await; assert!(result.is_err()); } } From 597a81e667608a99c4768a3c9cee183ac5174378 Mon Sep 17 00:00:00 2001 From: nadav-govari Date: Thu, 16 Apr 2026 14:49:00 -0400 Subject: [PATCH 07/21] Merge planner part 1: Metastore (#6305) --- quickwit/Cargo.lock | 2 + quickwit/quickwit-compaction/Cargo.toml | 3 + .../src/planner/compaction_planner.rs | 310 +++++++++++++ .../src/planner/compaction_state.rs | 415 ++++++++++++++++++ .../src/planner/index_config_store.rs | 307 +++++++++++++ .../quickwit-compaction/src/planner/mod.rs | 7 + quickwit/quickwit-serve/src/lib.rs | 11 +- 7 files changed, 1050 insertions(+), 5 deletions(-) create mode 100644 quickwit/quickwit-compaction/src/planner/compaction_planner.rs create mode 100644 quickwit/quickwit-compaction/src/planner/compaction_state.rs create mode 100644 quickwit/quickwit-compaction/src/planner/index_config_store.rs diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index 93206bf5d46..73a205987a1 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -7158,6 +7158,8 @@ dependencies = [ "quickwit-metastore", "quickwit-proto", "quickwit-storage", + "serde_json", + "time", "tokio", "tracing", ] diff --git a/quickwit/quickwit-compaction/Cargo.toml b/quickwit/quickwit-compaction/Cargo.toml index 88735bb1606..dd817fe6614 100644 --- a/quickwit/quickwit-compaction/Cargo.toml +++ b/quickwit/quickwit-compaction/Cargo.toml @@ -18,8 +18,11 @@ quickwit-common = { workspace = true } quickwit-config = { workspace = true } quickwit-doc-mapper = { workspace = true } quickwit-indexing = { workspace = true } +quickwit-metastore = { workspace = true } quickwit-proto = { workspace = true } quickwit-storage = { workspace = true } +serde_json = { workspace = true } +time = { workspace = true } tracing = { workspace = true } tokio = { workspace = true } diff --git a/quickwit/quickwit-compaction/src/planner/compaction_planner.rs b/quickwit/quickwit-compaction/src/planner/compaction_planner.rs new file mode 100644 index 00000000000..45d6fe5973c --- /dev/null +++ b/quickwit/quickwit-compaction/src/planner/compaction_planner.rs @@ -0,0 +1,310 @@ +// Copyright 2021-Present Datadog, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::time::Duration; + +use anyhow::Result; +use async_trait::async_trait; +use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler}; +use quickwit_metastore::{ + ListSplitsQuery, ListSplitsRequestExt, MetastoreServiceStreamSplitsExt, Split, SplitState, +}; +use quickwit_proto::metastore::{ListSplitsRequest, MetastoreService, MetastoreServiceClient}; +use time::OffsetDateTime; +use tracing::error; + +use super::compaction_state::CompactionState; +use super::index_config_store::IndexConfigStore; + +pub struct CompactionPlanner { + state: CompactionState, + index_config_store: IndexConfigStore, + cursor: i64, + metastore: MetastoreServiceClient, +} + +const STARTUP_LOOKBACK: Duration = Duration::from_secs(24 * 60 * 60); + +impl CompactionPlanner { + pub fn new(metastore: MetastoreServiceClient) -> Self { + let cursor = OffsetDateTime::now_utc().unix_timestamp() - STARTUP_LOOKBACK.as_secs() as i64; + CompactionPlanner { + state: CompactionState::new(), + index_config_store: IndexConfigStore::new(metastore.clone()), + cursor, + metastore, + } + } + + async fn ingest_splits(&mut self, splits: Vec) { + for split in splits { + if self.state.is_split_known(&split.split_metadata.split_id) { + continue; + } + let Ok(index_entry) = self + .index_config_store + .get_for_split(&split.split_metadata) + .await + else { + error!(split_id=%split.split_metadata.split_id, "failed to load index config, skipping split"); + continue; + }; + if index_entry.is_split_mature(&split.split_metadata) { + continue; + } + self.cursor = self.cursor.max(split.update_timestamp); + self.state.track_split(split.split_metadata); + } + } + + async fn scan_metastore(&self) -> Result> { + let query = ListSplitsQuery::for_all_indexes() + .with_split_state(SplitState::Published) + .retain_immature(OffsetDateTime::now_utc()) + .with_update_timestamp_gte(self.cursor); + let request = ListSplitsRequest::try_from_list_splits_query(&query)?; + let splits = self + .metastore + .list_splits(request) + .await? + .collect_splits() + .await?; + Ok(splits) + } + + async fn scan_and_plan(&mut self) -> Result<()> { + let splits = self.scan_metastore().await?; + self.ingest_splits(splits).await; + self.run_merge_policies(); + Ok(()) + } + + fn run_merge_policies(&mut self) { + for partition_key in self.state.partition_keys() { + if let Some(index_entry) = self.index_config_store.get(&partition_key.index_uid) { + self.state + .plan_partition(&partition_key, index_entry.merge_policy()); + } + } + } +} + +const SCAN_AND_PLAN_INTERVAL: Duration = Duration::from_secs(5); + +#[derive(Debug)] +struct ScanAndPlan; + +#[async_trait] +impl Actor for CompactionPlanner { + type ObservableState = (); + + fn name(&self) -> String { + "CompactionPlanner".to_string() + } + + fn observable_state(&self) -> Self::ObservableState {} + + async fn initialize(&mut self, ctx: &ActorContext) -> Result<(), ActorExitStatus> { + tracing::info!("compaction planner starting, scanning metastore for immature splits"); + if let Err(err) = self.scan_and_plan().await { + error!(error=%err, "error scanning metastore and planning merges"); + } + ctx.schedule_self_msg(SCAN_AND_PLAN_INTERVAL, ScanAndPlan); + Ok(()) + } +} + +#[async_trait] +impl Handler for CompactionPlanner { + type Reply = (); + + async fn handle( + &mut self, + _msg: ScanAndPlan, + ctx: &ActorContext, + ) -> Result<(), ActorExitStatus> { + if let Err(err) = self.scan_and_plan().await { + error!(error=%err, "error scanning metastore and planning merges"); + } + self.state.check_heartbeat_timeouts(); + ctx.schedule_self_msg(SCAN_AND_PLAN_INTERVAL, ScanAndPlan); + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use quickwit_common::ServiceStream; + use quickwit_metastore::{ + IndexMetadata, IndexMetadataResponseExt, ListSplitsResponseExt, Split, SplitMetadata, + SplitState, + }; + use quickwit_proto::metastore::{ + IndexMetadataResponse, ListSplitsResponse, MetastoreError, MockMetastoreService, + }; + use quickwit_proto::types::IndexUid; + + use super::*; + + fn test_split(split_id: &str, index_uid: &IndexUid, update_timestamp: i64) -> Split { + Split { + split_state: SplitState::Published, + update_timestamp, + publish_timestamp: Some(update_timestamp), + split_metadata: SplitMetadata { + split_id: split_id.to_string(), + index_uid: index_uid.clone(), + source_id: "test-source".to_string(), + node_id: "test-node".to_string(), + num_docs: 100, + ..Default::default() + }, + } + } + + fn test_index_metadata() -> IndexMetadata { + IndexMetadata::for_test("test-index", "ram:///test-index") + } + + fn test_index_metadata_response(index_metadata: &IndexMetadata) -> IndexMetadataResponse { + IndexMetadataResponse::try_from_index_metadata(index_metadata).unwrap() + } + + #[tokio::test] + async fn test_scan_metastore() { + let index_uid = IndexUid::for_test("test-index", 0); + let splits = vec![ + test_split("split-1", &index_uid, 1000), + test_split("split-2", &index_uid, 2000), + ]; + let splits_clone = splits.clone(); + + let mut mock = MockMetastoreService::new(); + mock.expect_list_splits().returning(move |_| { + let response = ListSplitsResponse::try_from_splits(splits_clone.clone()).unwrap(); + Ok(ServiceStream::from(vec![Ok(response)])) + }); + + let planner = CompactionPlanner::new(MetastoreServiceClient::from_mock(mock)); + let result = planner.scan_metastore().await.unwrap(); + + assert_eq!(result.len(), 2); + assert_eq!(result[0].split_metadata.split_id, "split-1"); + assert_eq!(result[1].split_metadata.split_id, "split-2"); + } + + #[tokio::test] + async fn test_ingest_splits_dedup_maturity_and_cursor() { + let index_metadata = test_index_metadata(); + let response = test_index_metadata_response(&index_metadata); + let index_uid = index_metadata.index_uid.clone(); + + let mut mock = MockMetastoreService::new(); + mock.expect_index_metadata() + .returning(move |_| Ok(response.clone())); + + let mut planner = CompactionPlanner::new(MetastoreServiceClient::from_mock(mock)); + planner.cursor = 0; + + // Pre-populate: "in-flight" is already being compacted. + planner.state.track_split(SplitMetadata { + split_id: "in-flight".to_string(), + index_uid: index_uid.clone(), + ..Default::default() + }); + + let mut mature_split = test_split("mature", &index_uid, 4000); + mature_split.split_metadata.num_docs = 20_000_000; + + let splits = vec![ + test_split("in-flight", &index_uid, 1000), + test_split("fresh", &index_uid, 3000), + mature_split, + ]; + + planner.ingest_splits(splits).await; + + assert!(planner.state.is_split_known("fresh")); + assert!(planner.state.is_split_known("in-flight")); + assert!(!planner.state.is_split_known("mature")); + assert_eq!(planner.cursor, 3000); + } + + #[tokio::test] + async fn test_scan_and_plan_metastore_error() { + let mut mock = MockMetastoreService::new(); + mock.expect_list_splits().returning(|_| { + Err(MetastoreError::Internal { + message: "test error".to_string(), + cause: String::new(), + }) + }); + + let mut planner = CompactionPlanner::new(MetastoreServiceClient::from_mock(mock)); + let original_cursor = planner.cursor; + + let result = planner.scan_and_plan().await; + assert!(result.is_err()); + assert_eq!(planner.cursor, original_cursor); + } + + #[tokio::test] + async fn test_ingest_splits_skips_on_config_error() { + let index_uid = IndexUid::for_test("missing-index", 0); + let splits = vec![test_split("orphan", &index_uid, 1000)]; + + let mut mock = MockMetastoreService::new(); + mock.expect_index_metadata().returning(|_| { + Err(MetastoreError::Internal { + message: "test error".to_string(), + cause: String::new(), + }) + }); + + let mut planner = CompactionPlanner::new(MetastoreServiceClient::from_mock(mock)); + planner.cursor = 0; + planner.ingest_splits(splits).await; + + assert!(!planner.state.is_split_known("orphan")); + } + + #[tokio::test] + async fn test_scan_and_plan_happy_path() { + let index_metadata = test_index_metadata(); + let index_metadata_response = test_index_metadata_response(&index_metadata); + let index_uid = index_metadata.index_uid.clone(); + + let splits = vec![ + test_split("s1", &index_uid, 5000), + test_split("s2", &index_uid, 6000), + ]; + let splits_clone = splits.clone(); + + let mut mock = MockMetastoreService::new(); + mock.expect_list_splits().returning(move |_| { + let response = ListSplitsResponse::try_from_splits(splits_clone.clone()).unwrap(); + Ok(ServiceStream::from(vec![Ok(response)])) + }); + mock.expect_index_metadata() + .returning(move |_| Ok(index_metadata_response.clone())); + + let mut planner = CompactionPlanner::new(MetastoreServiceClient::from_mock(mock)); + planner.cursor = 0; + planner.scan_and_plan().await.unwrap(); + + assert!(planner.state.is_split_known("s1")); + assert!(planner.state.is_split_known("s2")); + assert_eq!(planner.cursor, 6000); + } +} diff --git a/quickwit/quickwit-compaction/src/planner/compaction_state.rs b/quickwit/quickwit-compaction/src/planner/compaction_state.rs new file mode 100644 index 00000000000..1d1ea07e91d --- /dev/null +++ b/quickwit/quickwit-compaction/src/planner/compaction_state.rs @@ -0,0 +1,415 @@ +// Copyright 2021-Present Datadog, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::{HashMap, HashSet, VecDeque}; +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use quickwit_indexing::merge_policy::{MergeOperation, MergePolicy}; +use quickwit_metastore::SplitMetadata; +use quickwit_proto::compaction::{CompactionFailure, CompactionInProgress, CompactionSuccess}; +use quickwit_proto::types::{DocMappingUid, IndexUid, NodeId, SourceId, SplitId}; +use tracing::{error, info, warn}; + +const HEARTBEAT_TIMEOUT: Duration = Duration::from_secs(60); + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct CompactionPartitionKey { + pub index_uid: IndexUid, + pub source_id: SourceId, + pub partition_id: u64, + pub doc_mapping_uid: DocMappingUid, +} + +impl CompactionPartitionKey { + pub fn from_split(split: &SplitMetadata) -> Self { + CompactionPartitionKey { + index_uid: split.index_uid.clone(), + source_id: split.source_id.clone(), + partition_id: split.partition_id, + doc_mapping_uid: split.doc_mapping_uid, + } + } +} + +struct InFlightCompaction { + task_id: String, + split_ids: Vec, + node_id: NodeId, + last_heartbeat: Instant, +} + +/// Tracks all split-level state for the compaction planner: +/// which splits need compaction, which are in-flight, and which +/// operations are pending assignment to workers. +pub struct CompactionState { + needs_compaction: HashMap>, + needs_compaction_split_ids: HashSet, + in_flight: HashMap, + in_flight_split_ids: HashSet, + /// TODO: add index_uid and source_id to MergeOperation so we don't need the partition key + /// here. + pending_operations: VecDeque<(CompactionPartitionKey, MergeOperation)>, +} + +impl CompactionState { + pub fn new() -> Self { + CompactionState { + needs_compaction: HashMap::new(), + needs_compaction_split_ids: HashSet::new(), + in_flight: HashMap::new(), + in_flight_split_ids: HashSet::new(), + pending_operations: VecDeque::new(), + } + } + + /// Returns true if the split is already tracked (either awaiting compaction or in-flight). + pub fn is_split_known(&self, split_id: &str) -> bool { + self.needs_compaction_split_ids.contains(split_id) + || self.in_flight_split_ids.contains(split_id) + } + + /// Adds a split to the needs_compaction set. + pub fn track_split(&mut self, split: SplitMetadata) { + let split_id = split.split_id().to_string(); + let key = CompactionPartitionKey::from_split(&split); + self.needs_compaction_split_ids.insert(split_id); + self.needs_compaction.entry(key).or_default().push(split); + } + + /// Returns the partition keys for iteration. The caller drives the loop. + pub fn partition_keys(&self) -> Vec { + self.needs_compaction.keys().cloned().collect() + } + + /// Runs a merge policy on a single partition and queues the resulting operations. + pub fn plan_partition( + &mut self, + partition_key: &CompactionPartitionKey, + merge_policy: &Arc, + ) { + let Some(splits) = self.needs_compaction.get_mut(partition_key) else { + return; + }; + for operation in merge_policy.operations(splits) { + for split in operation.splits_as_slice() { + self.needs_compaction_split_ids.remove(split.split_id()); + self.in_flight_split_ids + .insert(split.split_id().to_string()); + } + self.pending_operations + .push_back((partition_key.clone(), operation)); + } + if splits.is_empty() { + self.needs_compaction.remove(partition_key); + } + } + + pub fn process_successes(&mut self, successes: &[CompactionSuccess]) { + for success in successes { + if let Some(inflight) = self.in_flight.remove(&success.task_id) { + info!(task_id=%success.task_id, "compaction task completed"); + for split_id in &inflight.split_ids { + self.in_flight_split_ids.remove(split_id.as_str()); + } + } + } + } + + pub fn process_failures(&mut self, failures: &[CompactionFailure]) { + for failure in failures { + if let Some(inflight) = self.in_flight.remove(&failure.task_id) { + warn!(task_id=%failure.task_id, error=%failure.error_message, "compaction task failed"); + for split_id in &inflight.split_ids { + self.in_flight_split_ids.remove(split_id.as_str()); + } + } + } + } + + pub fn update_heartbeats(&mut self, node_id: &NodeId, in_progress: &[CompactionInProgress]) { + for task in in_progress { + if let Some(inflight) = self.in_flight.get_mut(&task.task_id) { + inflight.last_heartbeat = Instant::now(); + } else { + // Task not tracked — start tracking it. Happens when + // workers report tasks the planner doesn't know about yet + // (e.g. after planner start). + for split_id in &task.split_ids { + self.in_flight_split_ids.insert(split_id.clone()); + self.needs_compaction_split_ids.remove(split_id.as_str()); + } + self.in_flight.insert( + task.task_id.clone(), + InFlightCompaction { + task_id: task.task_id.clone(), + split_ids: task.split_ids.clone(), + node_id: node_id.clone(), + last_heartbeat: Instant::now(), + }, + ); + } + } + } + + pub fn check_heartbeat_timeouts(&mut self) { + let now = Instant::now(); + let timed_out_task_ids: Vec = self + .in_flight + .iter() + .filter(|(_, inflight)| now.duration_since(inflight.last_heartbeat) > HEARTBEAT_TIMEOUT) + .map(|(task_id, _)| task_id.clone()) + .collect(); + + for task_id in timed_out_task_ids { + if let Some(inflight) = self.in_flight.remove(&task_id) { + error!(task_id=%task_id, node_id=%inflight.node_id, "compaction task timed out"); + for split_id in &inflight.split_ids { + self.in_flight_split_ids.remove(split_id.as_str()); + } + } + } + } + + /// Pops up to `count` pending operations for assignment. + pub fn pop_pending(&mut self, count: usize) -> Vec<(CompactionPartitionKey, MergeOperation)> { + let count = count.min(self.pending_operations.len()); + let mut operations = Vec::with_capacity(count); + for _ in 0..count { + operations.push(self.pending_operations.pop_front().unwrap()); + } + operations + } + + /// Records that an operation has been assigned to a worker. + pub fn record_assignment(&mut self, task_id: String, split_ids: Vec, node_id: NodeId) { + self.in_flight.insert( + task_id.clone(), + InFlightCompaction { + task_id, + split_ids, + node_id, + last_heartbeat: Instant::now(), + }, + ); + } +} + +#[cfg(test)] +mod tests { + use std::time::Duration; + + use quickwit_config::IndexingSettings; + use quickwit_config::merge_policy_config::{ + ConstWriteAmplificationMergePolicyConfig, MergePolicyConfig, + }; + use quickwit_indexing::merge_policy::merge_policy_from_settings; + use quickwit_proto::types::IndexUid; + + use super::*; + + fn test_merge_policy() -> Arc { + let settings = IndexingSettings { + merge_policy: MergePolicyConfig::ConstWriteAmplification( + ConstWriteAmplificationMergePolicyConfig { + merge_factor: 2, + max_merge_factor: 2, + ..Default::default() + }, + ), + ..Default::default() + }; + merge_policy_from_settings(&settings) + } + + fn test_split(split_id: &str, index_uid: &IndexUid) -> SplitMetadata { + SplitMetadata { + split_id: split_id.to_string(), + index_uid: index_uid.clone(), + source_id: "test-source".to_string(), + node_id: "test-node".to_string(), + num_docs: 1000, + create_timestamp: time::OffsetDateTime::now_utc().unix_timestamp(), + maturity: quickwit_metastore::SplitMaturity::Immature { + maturation_period: Duration::from_secs(3600), + }, + ..Default::default() + } + } + + #[test] + fn test_track_and_is_known() { + let index_uid = IndexUid::for_test("test-index", 0); + let mut state = CompactionState::new(); + + assert!(!state.is_split_known("s1")); + + state.track_split(test_split("s1", &index_uid)); + assert!(state.is_split_known("s1")); + assert!(!state.is_split_known("s2")); + } + + #[test] + fn test_track_split_partitions_correctly() { + let index_uid = IndexUid::for_test("test-index", 0); + let mut state = CompactionState::new(); + + state.track_split(test_split("s1", &index_uid)); + state.track_split(test_split("s2", &index_uid)); + + assert_eq!(state.partition_keys().len(), 1); + } + + #[test] + fn test_plan_partition_moves_splits_to_in_flight() { + let index_uid = IndexUid::for_test("test-index", 0); + let merge_policy = test_merge_policy(); + let mut state = CompactionState::new(); + + state.track_split(test_split("s0", &index_uid)); + state.track_split(test_split("s1", &index_uid)); + + let keys = state.partition_keys(); + assert_eq!(keys.len(), 1); + + state.plan_partition(&keys[0], &merge_policy); + + // Splits moved from needs_compaction to in_flight. + assert!(!state.pending_operations.is_empty()); + for (_, op) in &state.pending_operations { + for split in op.splits_as_slice() { + assert!(!state.needs_compaction_split_ids.contains(split.split_id())); + assert!(state.in_flight_split_ids.contains(split.split_id())); + } + } + } + + #[test] + fn test_process_successes_and_failures_clear_in_flight() { + let node_id = NodeId::from("worker-1"); + let mut state = CompactionState::new(); + + // Simulate what plan_partition + record_assignment does: + // split IDs go into in_flight_split_ids, then into InFlightCompaction. + state.in_flight_split_ids.insert("s1".to_string()); + state.in_flight_split_ids.insert("s2".to_string()); + state.record_assignment( + "task-1".to_string(), + vec!["s1".to_string(), "s2".to_string()], + node_id.clone(), + ); + + state.in_flight_split_ids.insert("s3".to_string()); + state.record_assignment("task-2".to_string(), vec!["s3".to_string()], node_id); + + assert!(state.is_split_known("s1")); + assert!(state.is_split_known("s3")); + + state.process_successes(&[CompactionSuccess { + task_id: "task-1".to_string(), + merged_split_id: "merged-1".to_string(), + }]); + assert!(!state.is_split_known("s1")); + assert!(!state.is_split_known("s2")); + + state.process_failures(&[CompactionFailure { + task_id: "task-2".to_string(), + error_message: "boom".to_string(), + }]); + assert!(!state.is_split_known("s3")); + } + + #[test] + fn test_update_heartbeats_adopts_unknown_tasks() { + let node_id = NodeId::from("worker-1"); + let mut state = CompactionState::new(); + + // Simulate a split that was tracked as needing compaction. + let index_uid = IndexUid::for_test("test-index", 0); + state.track_split(test_split("s1", &index_uid)); + assert!(state.needs_compaction_split_ids.contains("s1")); + + // Worker reports an in-progress task the planner doesn't know about. + state.update_heartbeats( + &node_id, + &[CompactionInProgress { + task_id: "task-1".to_string(), + index_uid: Some(index_uid), + source_id: "test-source".to_string(), + split_ids: vec!["s1".to_string()], + }], + ); + + // Split moved from needs_compaction to in_flight. + assert!(state.in_flight_split_ids.contains("s1")); + assert!(!state.needs_compaction_split_ids.contains("s1")); + } + + #[test] + fn test_pop_pending_and_record_assignment() { + let index_uid = IndexUid::for_test("test-index", 0); + let merge_policy = test_merge_policy(); + let mut state = CompactionState::new(); + + state.track_split(test_split("s0", &index_uid)); + state.track_split(test_split("s1", &index_uid)); + + let keys = state.partition_keys(); + state.plan_partition(&keys[0], &merge_policy); + + let pending = state.pop_pending(1); + assert_eq!(pending.len(), 1); + + let (_, operation) = &pending[0]; + let split_ids: Vec = operation + .splits_as_slice() + .iter() + .map(|s| s.split_id().to_string()) + .collect(); + + // Splits are already in in_flight_split_ids from plan_partition. + state.record_assignment("task-1".to_string(), split_ids.clone(), NodeId::from("w1")); + + for split_id in &split_ids { + assert!(state.is_split_known(split_id)); + } + } + + #[test] + fn test_pop_pending_respects_count() { + let index_uid = IndexUid::for_test("test-index", 0); + let merge_policy = test_merge_policy(); + let mut state = CompactionState::new(); + + // With merge_factor=2, 4 splits produces 2 operations. + for i in 0..4 { + state.track_split(test_split(&format!("s{i}"), &index_uid)); + } + + let keys = state.partition_keys(); + state.plan_partition(&keys[0], &merge_policy); + + let total_pending = state.pending_operations.len(); + assert!(total_pending >= 2); + + let popped = state.pop_pending(1); + assert_eq!(popped.len(), 1); + assert_eq!(state.pending_operations.len(), total_pending - 1); + + // Asking for more than available returns what's there. + let rest = state.pop_pending(100); + assert_eq!(rest.len(), total_pending - 1); + assert!(state.pending_operations.is_empty()); + } +} diff --git a/quickwit/quickwit-compaction/src/planner/index_config_store.rs b/quickwit/quickwit-compaction/src/planner/index_config_store.rs new file mode 100644 index 00000000000..e47044d21e3 --- /dev/null +++ b/quickwit/quickwit-compaction/src/planner/index_config_store.rs @@ -0,0 +1,307 @@ +// Copyright 2021-Present Datadog, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::sync::Arc; + +use quickwit_config::{IndexConfig, build_doc_mapper}; +use quickwit_doc_mapper::DocMapper; +use quickwit_indexing::merge_policy::{MergePolicy, merge_policy_from_settings}; +use quickwit_metastore::{IndexMetadataResponseExt, SplitMaturity, SplitMetadata}; +use quickwit_proto::metastore::{IndexMetadataRequest, MetastoreService, MetastoreServiceClient}; +use quickwit_proto::types::{DocMappingUid, IndexUid}; + +/// Everything the planner needs to know about a single index. +#[derive(Clone)] +pub struct IndexEntry { + config: IndexConfig, + merge_policy: Arc, + doc_mappers: HashMap>, +} + +impl IndexEntry { + pub fn is_split_mature(&self, split: &SplitMetadata) -> bool { + matches!( + self.merge_policy + .split_maturity(split.num_docs, split.num_merge_ops), + SplitMaturity::Mature + ) + } + + pub fn merge_policy(&self) -> &Arc { + &self.merge_policy + } + + pub fn doc_mapping_json(&self) -> String { + serde_json::to_string(&self.config.doc_mapping) + .expect("doc mapping serialization should not fail") + } + + pub fn search_settings_json(&self) -> String { + serde_json::to_string(&self.config.search_settings) + .expect("search settings serialization should not fail") + } + + pub fn indexing_settings_json(&self) -> String { + serde_json::to_string(&self.config.indexing_settings) + .expect("indexing settings serialization should not fail") + } + + pub fn retention_policy_json(&self) -> String { + match &self.config.retention_policy_opt { + Some(policy) => serde_json::to_string(policy) + .expect("retention policy serialization should not fail"), + None => String::new(), + } + } + + pub fn index_storage_uri(&self) -> String { + self.config.index_uri.to_string() + } +} + +/// Caches per-index configuration, merge policies, and doc mappers. +/// Fetches from the metastore on demand. All accessors panic if called +/// for an index that hasn't been loaded. +pub struct IndexConfigStore { + indexes: HashMap, + metastore: MetastoreServiceClient, +} + +impl IndexConfigStore { + pub fn new(metastore: MetastoreServiceClient) -> Self { + IndexConfigStore { + indexes: HashMap::new(), + metastore, + } + } + + /// Fetches an index config from the metastore and builds the derived + /// artifacts. + async fn fetch_index_config( + &mut self, + index_uid: &IndexUid, + doc_mapping_uid: &DocMappingUid, + ) -> anyhow::Result<()> { + let response = self + .metastore + .index_metadata(IndexMetadataRequest { + index_uid: Some(index_uid.clone()), + index_id: None, + }) + .await?; + let index_metadata = response.deserialize_index_metadata()?; + + let doc_mapper = build_doc_mapper( + &index_metadata.index_config.doc_mapping, + &index_metadata.index_config.search_settings, + )?; + let merge_policy = + merge_policy_from_settings(&index_metadata.index_config.indexing_settings); + + let mut doc_mappers = HashMap::new(); + doc_mappers.insert(*doc_mapping_uid, doc_mapper); + let entry = IndexEntry { + config: index_metadata.index_config, + merge_policy, + doc_mappers, + }; + self.indexes.insert(index_uid.clone(), entry); + Ok(()) + } + + /// Gets the index entry, fetching from the metastore if not cached. + pub async fn get_or_fetch( + &mut self, + index_uid: &IndexUid, + doc_mapping_uid: &DocMappingUid, + ) -> anyhow::Result<&IndexEntry> { + match self.indexes.get(index_uid) { + Some(entry) if entry.doc_mappers.contains_key(doc_mapping_uid) => {} + _ => self.fetch_index_config(index_uid, doc_mapping_uid).await?, + } + Ok(&self.indexes[index_uid]) + } + + pub async fn get_for_split(&mut self, split: &SplitMetadata) -> anyhow::Result<&IndexEntry> { + self.get_or_fetch(&split.index_uid, &split.doc_mapping_uid) + .await + } + + pub fn get(&self, index_uid: &IndexUid) -> Option<&IndexEntry> { + self.indexes.get(index_uid) + } +} + +#[cfg(test)] +mod tests { + use quickwit_metastore::{IndexMetadata, IndexMetadataResponseExt}; + use quickwit_proto::metastore::{IndexMetadataResponse, MetastoreError, MockMetastoreService}; + + use super::*; + + fn test_index_metadata() -> IndexMetadata { + IndexMetadata::for_test("test-index", "ram:///test-index") + } + + fn test_index_metadata_response(index_metadata: &IndexMetadata) -> IndexMetadataResponse { + IndexMetadataResponse::try_from_index_metadata(index_metadata).unwrap() + } + + #[tokio::test] + async fn test_get_or_fetch_loads_and_caches() { + let index_metadata = test_index_metadata(); + let response = test_index_metadata_response(&index_metadata); + let index_uid = index_metadata.index_uid.clone(); + let doc_mapping_uid = DocMappingUid::default(); + + let mut mock = MockMetastoreService::new(); + mock.expect_index_metadata() + .times(1) + .returning(move |_| Ok(response.clone())); + + let mut store = IndexConfigStore::new(MetastoreServiceClient::from_mock(mock)); + + // First call fetches from metastore. + let entry = store + .get_or_fetch(&index_uid, &doc_mapping_uid) + .await + .unwrap(); + assert!(!entry.doc_mapping_json().is_empty()); + assert!(!entry.search_settings_json().is_empty()); + assert!(!entry.indexing_settings_json().is_empty()); + assert!(!entry.index_storage_uri().is_empty()); + + // Second call hits cache (times(1) would panic otherwise). + store + .get_or_fetch(&index_uid, &doc_mapping_uid) + .await + .unwrap(); + } + + #[tokio::test] + async fn test_get_or_fetch_metastore_error() { + let mut mock = MockMetastoreService::new(); + mock.expect_index_metadata().returning(|_| { + Err(MetastoreError::Internal { + message: "test error".to_string(), + cause: String::new(), + }) + }); + + let mut store = IndexConfigStore::new(MetastoreServiceClient::from_mock(mock)); + let result = store + .get_or_fetch(&IndexUid::for_test("missing", 0), &DocMappingUid::default()) + .await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_get_returns_none_before_fetch() { + let mock = MockMetastoreService::new(); + let store = IndexConfigStore::new(MetastoreServiceClient::from_mock(mock)); + assert!(store.get(&IndexUid::for_test("test-index", 0)).is_none()); + } + + #[tokio::test] + async fn test_get_returns_some_after_fetch() { + let index_metadata = test_index_metadata(); + let response = test_index_metadata_response(&index_metadata); + let index_uid = index_metadata.index_uid.clone(); + + let mut mock = MockMetastoreService::new(); + mock.expect_index_metadata() + .returning(move |_| Ok(response.clone())); + + let mut store = IndexConfigStore::new(MetastoreServiceClient::from_mock(mock)); + store + .get_or_fetch(&index_uid, &DocMappingUid::default()) + .await + .unwrap(); + + assert!(store.get(&index_uid).is_some()); + } + + #[tokio::test] + async fn test_get_for_split() { + let index_metadata = test_index_metadata(); + let response = test_index_metadata_response(&index_metadata); + let index_uid = index_metadata.index_uid.clone(); + + let mut mock = MockMetastoreService::new(); + mock.expect_index_metadata() + .returning(move |_| Ok(response.clone())); + + let mut store = IndexConfigStore::new(MetastoreServiceClient::from_mock(mock)); + let split = SplitMetadata { + split_id: "split-1".to_string(), + index_uid: index_uid.clone(), + ..Default::default() + }; + + let entry = store.get_for_split(&split).await.unwrap(); + assert!(!entry.index_storage_uri().is_empty()); + } + + #[tokio::test] + async fn test_is_split_mature() { + let index_metadata = test_index_metadata(); + let response = test_index_metadata_response(&index_metadata); + let index_uid = index_metadata.index_uid.clone(); + + let mut mock = MockMetastoreService::new(); + mock.expect_index_metadata() + .returning(move |_| Ok(response.clone())); + + let mut store = IndexConfigStore::new(MetastoreServiceClient::from_mock(mock)); + let entry = store + .get_or_fetch(&index_uid, &DocMappingUid::default()) + .await + .unwrap(); + + let small_split = SplitMetadata { + num_docs: 100, + num_merge_ops: 0, + ..Default::default() + }; + assert!(!entry.is_split_mature(&small_split)); + + let large_split = SplitMetadata { + num_docs: 20_000_000, + num_merge_ops: 0, + ..Default::default() + }; + assert!(entry.is_split_mature(&large_split)); + } + + #[tokio::test] + async fn test_retention_policy_json_empty_when_none() { + let index_metadata = test_index_metadata(); + let response = test_index_metadata_response(&index_metadata); + let index_uid = index_metadata.index_uid.clone(); + + let mut mock = MockMetastoreService::new(); + mock.expect_index_metadata() + .returning(move |_| Ok(response.clone())); + + let mut store = IndexConfigStore::new(MetastoreServiceClient::from_mock(mock)); + let entry = store + .get_or_fetch(&index_uid, &DocMappingUid::default()) + .await + .unwrap(); + + // Default test index has no retention policy. + assert!(entry.retention_policy_json().is_empty()); + } +} diff --git a/quickwit/quickwit-compaction/src/planner/mod.rs b/quickwit/quickwit-compaction/src/planner/mod.rs index 5ccd750a20c..e780b30e0ef 100644 --- a/quickwit/quickwit-compaction/src/planner/mod.rs +++ b/quickwit/quickwit-compaction/src/planner/mod.rs @@ -12,6 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. +#[allow(dead_code)] +mod compaction_planner; mod compaction_service; +#[allow(dead_code)] +mod compaction_state; +#[allow(dead_code)] +mod index_config_store; +pub use compaction_planner::CompactionPlanner; pub use compaction_service::StubCompactionPlannerService; diff --git a/quickwit/quickwit-serve/src/lib.rs b/quickwit/quickwit-serve/src/lib.rs index 3eda56d5e5e..9dafbfe9d4a 100644 --- a/quickwit/quickwit-serve/src/lib.rs +++ b/quickwit/quickwit-serve/src/lib.rs @@ -599,11 +599,12 @@ pub async fn serve_quickwit( .context("failed to initialize compaction service client")?; let indexing_service_opt = if node_config.is_service_enabled(QuickwitService::Indexer) { - let merge_scheduler_mailbox_opt = if !node_config.indexer_config.enable_standalone_compactors { - Some(spawn_merge_scheduler_service(&universe, &node_config)) - } else { - None - }; + let merge_scheduler_mailbox_opt = + if !node_config.indexer_config.enable_standalone_compactors { + Some(spawn_merge_scheduler_service(&universe, &node_config)) + } else { + None + }; let indexing_service = start_indexing_service( &universe, &node_config, From 7fcb4cefdd6283efe4a9b91559859ff9ee374dcf Mon Sep 17 00:00:00 2001 From: nadav-govari Date: Thu, 16 Apr 2026 14:51:00 -0400 Subject: [PATCH 08/21] Merge planner part 2: gRPCs (#6310) --- quickwit/Cargo.lock | 1 + quickwit/quickwit-compaction/Cargo.toml | 1 + .../src/compactor_supervisor.rs | 8 +- .../src/planner/compaction_planner.rs | 290 +++++++++++++++--- .../src/planner/compaction_service.rs | 36 --- .../quickwit-compaction/src/planner/mod.rs | 3 - .../protos/quickwit/compaction.proto | 20 +- .../codegen/quickwit/quickwit.compaction.rs | 270 ++-------------- quickwit/quickwit-proto/src/compaction/mod.rs | 15 + quickwit/quickwit-serve/src/lib.rs | 79 +++-- 10 files changed, 374 insertions(+), 349 deletions(-) delete mode 100644 quickwit/quickwit-compaction/src/planner/compaction_service.rs diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index 73a205987a1..68d9ad60f96 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -7162,6 +7162,7 @@ dependencies = [ "time", "tokio", "tracing", + "ulid", ] [[package]] diff --git a/quickwit/quickwit-compaction/Cargo.toml b/quickwit/quickwit-compaction/Cargo.toml index dd817fe6614..c018327c8c4 100644 --- a/quickwit/quickwit-compaction/Cargo.toml +++ b/quickwit/quickwit-compaction/Cargo.toml @@ -25,6 +25,7 @@ serde_json = { workspace = true } time = { workspace = true } tracing = { workspace = true } tokio = { workspace = true } +ulid = { workspace = true } [dev-dependencies] quickwit-actors = { workspace = true, features = ["testsuite"] } diff --git a/quickwit/quickwit-compaction/src/compactor_supervisor.rs b/quickwit/quickwit-compaction/src/compactor_supervisor.rs index e2809c4e8d3..e1b27eb4a30 100644 --- a/quickwit/quickwit-compaction/src/compactor_supervisor.rs +++ b/quickwit/quickwit-compaction/src/compactor_supervisor.rs @@ -189,20 +189,22 @@ mod tests { use quickwit_actors::Universe; use quickwit_common::temp_dir::TempDirectory; - use quickwit_proto::compaction::CompactionPlannerServiceClient; + use quickwit_proto::compaction::{ + CompactionPlannerServiceClient, MockCompactionPlannerService, + }; use quickwit_proto::metastore::{MetastoreServiceClient, MockMetastoreService}; use quickwit_proto::types::NodeId; use quickwit_storage::{RamStorage, StorageResolver}; use super::*; use crate::compaction_pipeline::tests::test_pipeline; - use crate::planner::StubCompactionPlannerService; fn test_supervisor(num_slots: usize) -> CompactorSupervisor { let storage = Arc::new(RamStorage::default()); let split_store = IndexingSplitStore::create_without_local_store_for_test(storage); let metastore = MetastoreServiceClient::from_mock(MockMetastoreService::new()); - let compaction_client = CompactionPlannerServiceClient::new(StubCompactionPlannerService); + let compaction_client = + CompactionPlannerServiceClient::from_mock(MockCompactionPlannerService::new()); CompactorSupervisor::new( NodeId::from("test-node"), compaction_client, diff --git a/quickwit/quickwit-compaction/src/planner/compaction_planner.rs b/quickwit/quickwit-compaction/src/planner/compaction_planner.rs index 45d6fe5973c..0fb6ccc6d50 100644 --- a/quickwit/quickwit-compaction/src/planner/compaction_planner.rs +++ b/quickwit/quickwit-compaction/src/planner/compaction_planner.rs @@ -12,20 +12,27 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::fmt::Debug; use std::time::Duration; use anyhow::Result; use async_trait::async_trait; use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler}; +use quickwit_indexing::merge_policy::MergeOperation; use quickwit_metastore::{ ListSplitsQuery, ListSplitsRequestExt, MetastoreServiceStreamSplitsExt, Split, SplitState, }; +use quickwit_proto::compaction::{ + CompactionResult, MergeTaskAssignment, ReportStatusRequest, ReportStatusResponse, +}; use quickwit_proto::metastore::{ListSplitsRequest, MetastoreService, MetastoreServiceClient}; +use quickwit_proto::types::{IndexUid, NodeId, SourceId}; use time::OffsetDateTime; use tracing::error; +use ulid::Ulid; use super::compaction_state::CompactionState; -use super::index_config_store::IndexConfigStore; +use super::index_config_store::{IndexConfigStore, IndexEntry}; pub struct CompactionPlanner { state: CompactionState, @@ -34,6 +41,75 @@ pub struct CompactionPlanner { metastore: MetastoreServiceClient, } +impl Debug for CompactionPlanner { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CompactionPlanner") + .field("cursor", &self.cursor) + .finish() + } +} + +const SCAN_AND_PLAN_INTERVAL: Duration = Duration::from_secs(5); +/// On initialization, we want to wait for two intervals to allow any in-progress workers to report +/// their progress, preventing us from frivolously rescheduling work. +const INITIAL_SCAN_AND_PLAN_INTERVAL: Duration = SCAN_AND_PLAN_INTERVAL.saturating_mul(2); + +#[derive(Debug)] +struct ScanAndPlan; + +#[async_trait] +impl Actor for CompactionPlanner { + type ObservableState = (); + + fn name(&self) -> String { + "CompactionPlanner".to_string() + } + + fn observable_state(&self) -> Self::ObservableState {} + + async fn initialize(&mut self, ctx: &ActorContext) -> Result<(), ActorExitStatus> { + tracing::info!("compaction planner starting, scanning metastore for immature splits"); + ctx.schedule_self_msg(INITIAL_SCAN_AND_PLAN_INTERVAL, ScanAndPlan); + Ok(()) + } +} + +#[async_trait] +impl Handler for CompactionPlanner { + type Reply = (); + + async fn handle( + &mut self, + _msg: ScanAndPlan, + ctx: &ActorContext, + ) -> Result<(), ActorExitStatus> { + if let Err(err) = self.scan_and_plan().await { + error!(error=%err, "error scanning metastore and planning merges"); + } + self.state.check_heartbeat_timeouts(); + ctx.schedule_self_msg(SCAN_AND_PLAN_INTERVAL, ScanAndPlan); + Ok(()) + } +} + +#[async_trait] +impl Handler for CompactionPlanner { + type Reply = CompactionResult; + + async fn handle( + &mut self, + msg: ReportStatusRequest, + _ctx: &ActorContext, + ) -> Result, ActorExitStatus> { + let node_id = NodeId::from(msg.node_id); + self.state.process_successes(&msg.successes); + self.state.process_failures(&msg.failures); + self.state.update_heartbeats(&node_id, &msg.in_progress); + let new_tasks = self.assign_tasks(&node_id, msg.available_slots); + Ok(Ok(ReportStatusResponse { new_tasks })) + } +} + const STARTUP_LOOKBACK: Duration = Duration::from_secs(24 * 60 * 60); impl CompactionPlanner { @@ -98,58 +174,79 @@ impl CompactionPlanner { } } } -} - -const SCAN_AND_PLAN_INTERVAL: Duration = Duration::from_secs(5); - -#[derive(Debug)] -struct ScanAndPlan; - -#[async_trait] -impl Actor for CompactionPlanner { - type ObservableState = (); - - fn name(&self) -> String { - "CompactionPlanner".to_string() - } - fn observable_state(&self) -> Self::ObservableState {} + fn assign_tasks(&mut self, node_id: &NodeId, available_slots: u32) -> Vec { + let pending = self.state.pop_pending(available_slots as usize); + let mut assignments = Vec::with_capacity(pending.len()); - async fn initialize(&mut self, ctx: &ActorContext) -> Result<(), ActorExitStatus> { - tracing::info!("compaction planner starting, scanning metastore for immature splits"); - if let Err(err) = self.scan_and_plan().await { - error!(error=%err, "error scanning metastore and planning merges"); + for (partition_key, operation) in pending { + let task_id = Ulid::new().to_string(); + let Some(index_entry) = self.index_config_store.get(&partition_key.index_uid) else { + error!(index_uid=%partition_key.index_uid, "index config not found for pending operation, skipping"); + continue; + }; + let assignment = build_task_assignment( + &task_id, + index_entry, + &operation, + &partition_key.index_uid, + &partition_key.source_id, + ); + + let split_ids = operation + .splits_as_slice() + .iter() + .map(|s| s.split_id().to_string()) + .collect(); + self.state + .record_assignment(task_id, split_ids, node_id.clone()); + + assignments.push(assignment); } - ctx.schedule_self_msg(SCAN_AND_PLAN_INTERVAL, ScanAndPlan); - Ok(()) + assignments } } -#[async_trait] -impl Handler for CompactionPlanner { - type Reply = (); - - async fn handle( - &mut self, - _msg: ScanAndPlan, - ctx: &ActorContext, - ) -> Result<(), ActorExitStatus> { - if let Err(err) = self.scan_and_plan().await { - error!(error=%err, "error scanning metastore and planning merges"); - } - self.state.check_heartbeat_timeouts(); - ctx.schedule_self_msg(SCAN_AND_PLAN_INTERVAL, ScanAndPlan); - Ok(()) +fn build_task_assignment( + task_id: &str, + index_entry: &IndexEntry, + operation: &MergeOperation, + index_uid: &IndexUid, + source_id: &SourceId, +) -> MergeTaskAssignment { + MergeTaskAssignment { + task_id: task_id.to_string(), + splits_metadata_json: operation + .splits_as_slice() + .iter() + .map(|s| { + serde_json::to_string(s).expect("split metadata serialization should not fail") + }) + .collect(), + doc_mapping_json: index_entry.doc_mapping_json(), + search_settings_json: index_entry.search_settings_json(), + indexing_settings_json: index_entry.indexing_settings_json(), + retention_policy_json: index_entry.retention_policy_json(), + index_uid: Some(index_uid.clone()), + source_id: source_id.to_string(), + index_storage_uri: index_entry.index_storage_uri(), } } #[cfg(test)] mod tests { + use std::time::Duration; + use quickwit_common::ServiceStream; + use quickwit_config::IndexingSettings; + use quickwit_config::merge_policy_config::{ + ConstWriteAmplificationMergePolicyConfig, MergePolicyConfig, + }; use quickwit_metastore::{ - IndexMetadata, IndexMetadataResponseExt, ListSplitsResponseExt, Split, SplitMetadata, - SplitState, + IndexMetadata, IndexMetadataResponseExt, ListSplitsResponseExt, Split, SplitMaturity, + SplitMetadata, SplitState, }; + use quickwit_proto::compaction::CompactionSuccess; use quickwit_proto::metastore::{ IndexMetadataResponse, ListSplitsResponse, MetastoreError, MockMetastoreService, }; @@ -168,6 +265,10 @@ mod tests { source_id: "test-source".to_string(), node_id: "test-node".to_string(), num_docs: 100, + create_timestamp: OffsetDateTime::now_utc().unix_timestamp(), + maturity: SplitMaturity::Immature { + maturation_period: Duration::from_secs(3600), + }, ..Default::default() }, } @@ -177,6 +278,22 @@ mod tests { IndexMetadata::for_test("test-index", "ram:///test-index") } + /// Returns an IndexMetadata with merge_factor=2 so two splits trigger a merge. + fn test_index_metadata_with_merge_factor_2() -> IndexMetadata { + let mut metadata = test_index_metadata(); + metadata.index_config.indexing_settings = IndexingSettings { + merge_policy: MergePolicyConfig::ConstWriteAmplification( + ConstWriteAmplificationMergePolicyConfig { + merge_factor: 2, + max_merge_factor: 2, + ..Default::default() + }, + ), + ..Default::default() + }; + metadata + } + fn test_index_metadata_response(index_metadata: &IndexMetadata) -> IndexMetadataResponse { IndexMetadataResponse::try_from_index_metadata(index_metadata).unwrap() } @@ -307,4 +424,99 @@ mod tests { assert!(planner.state.is_split_known("s2")); assert_eq!(planner.cursor, 6000); } + + /// Helper: creates a planner with merge_factor=2, ingests the given splits, + /// and runs merge policies. Returns the planner ready for `assign_tasks`. + async fn planner_with_pending_merges(split_ids: &[&str]) -> (CompactionPlanner, IndexUid) { + let index_metadata = test_index_metadata_with_merge_factor_2(); + let index_uid = index_metadata.index_uid.clone(); + let response = test_index_metadata_response(&index_metadata); + + let mut mock = MockMetastoreService::new(); + mock.expect_index_metadata() + .returning(move |_| Ok(response.clone())); + mock.expect_list_splits().returning(|_| { + Ok(ServiceStream::from(vec![Ok( + ListSplitsResponse::try_from_splits(Vec::new()).unwrap(), + )])) + }); + + let mut planner = CompactionPlanner::new(MetastoreServiceClient::from_mock(mock)); + planner.cursor = 0; + + let splits: Vec = split_ids + .iter() + .enumerate() + .map(|(i, id)| test_split(id, &index_uid, (i + 1) as i64 * 1000)) + .collect(); + planner.ingest_splits(splits).await; + planner.run_merge_policies(); + (planner, index_uid) + } + + #[tokio::test] + async fn test_assign_tasks_returns_assignments_and_drains_queue() { + let (mut planner, index_uid) = planner_with_pending_merges(&["s1", "s2"]).await; + let node_id = NodeId::from("worker-1"); + + // First call: get the assignment. + let assignments = planner.assign_tasks(&node_id, 10); + assert_eq!(assignments.len(), 1); + + let assignment = &assignments[0]; + assert!(!assignment.task_id.is_empty()); + assert_eq!(assignment.splits_metadata_json.len(), 2); + assert_eq!(assignment.index_uid, Some(index_uid)); + assert_eq!(assignment.source_id, "test-source"); + assert!(!assignment.doc_mapping_json.is_empty()); + assert!(!assignment.index_storage_uri.is_empty()); + + // Second call: queue is drained, no more assignments. + let assignments = planner.assign_tasks(&node_id, 10); + assert!(assignments.is_empty()); + } + + #[tokio::test] + async fn test_assign_tasks_respects_available_slots() { + // 4 splits with merge_factor=2 produces 2 merge operations. + let (mut planner, _) = planner_with_pending_merges(&["s1", "s2", "s3", "s4"]).await; + let node_id = NodeId::from("worker-1"); + + // Request only 1 slot. + let assignments = planner.assign_tasks(&node_id, 1); + assert_eq!(assignments.len(), 1); + + // The remaining operation is still pending. + let assignments = planner.assign_tasks(&node_id, 10); + assert_eq!(assignments.len(), 1); + } + + #[tokio::test] + async fn test_report_status_success_frees_splits_for_future_merges() { + let (mut planner, index_uid) = planner_with_pending_merges(&["s1", "s2"]).await; + let node_id = NodeId::from("worker-1"); + + let assignments = planner.assign_tasks(&node_id, 10); + assert_eq!(assignments.len(), 1); + let task_id = assignments[0].task_id.clone(); + + // Report success for the task. + planner.state.process_successes(&[CompactionSuccess { + task_id, + merged_split_id: "merged-1".to_string(), + }]); + + // The original splits are no longer tracked. Re-ingesting them + // (simulating the merged output being immature) creates new work. + let new_splits = vec![ + test_split("s5", &index_uid, 5000), + test_split("s6", &index_uid, 6000), + ]; + planner.ingest_splits(new_splits).await; + planner.run_merge_policies(); + + let assignments = planner.assign_tasks(&node_id, 10); + assert_eq!(assignments.len(), 1); + assert_eq!(assignments[0].splits_metadata_json.len(), 2); + } } diff --git a/quickwit/quickwit-compaction/src/planner/compaction_service.rs b/quickwit/quickwit-compaction/src/planner/compaction_service.rs deleted file mode 100644 index 340ff4e0d5c..00000000000 --- a/quickwit/quickwit-compaction/src/planner/compaction_service.rs +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2021-Present Datadog, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use async_trait::async_trait; -use quickwit_proto::compaction::{ - CompactionPlannerService, CompactionResult, PingRequest, PingResponse, ReportStatusRequest, - ReportStatusResponse, -}; - -#[derive(Debug, Clone)] -pub struct StubCompactionPlannerService; - -#[async_trait] -impl CompactionPlannerService for StubCompactionPlannerService { - async fn ping(&self, _request: PingRequest) -> CompactionResult { - Ok(PingResponse {}) - } - - async fn report_status( - &self, - _request: ReportStatusRequest, - ) -> CompactionResult { - Ok(ReportStatusResponse {}) - } -} diff --git a/quickwit/quickwit-compaction/src/planner/mod.rs b/quickwit/quickwit-compaction/src/planner/mod.rs index e780b30e0ef..b89d67ee8a2 100644 --- a/quickwit/quickwit-compaction/src/planner/mod.rs +++ b/quickwit/quickwit-compaction/src/planner/mod.rs @@ -12,13 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#[allow(dead_code)] mod compaction_planner; -mod compaction_service; #[allow(dead_code)] mod compaction_state; #[allow(dead_code)] mod index_config_store; pub use compaction_planner::CompactionPlanner; -pub use compaction_service::StubCompactionPlannerService; diff --git a/quickwit/quickwit-proto/protos/quickwit/compaction.proto b/quickwit/quickwit-proto/protos/quickwit/compaction.proto index f071cc3bc01..7313d85db91 100644 --- a/quickwit/quickwit-proto/protos/quickwit/compaction.proto +++ b/quickwit/quickwit-proto/protos/quickwit/compaction.proto @@ -19,13 +19,9 @@ package quickwit.compaction; import "quickwit/common.proto"; service CompactionPlannerService { - rpc Ping(PingRequest) returns (PingResponse); rpc ReportStatus(ReportStatusRequest) returns (ReportStatusResponse); } -message PingRequest {} -message PingResponse {} - message ReportStatusRequest { string node_id = 1; uint32 available_slots = 2; @@ -51,4 +47,18 @@ message CompactionFailure { string error_message = 2; } -message ReportStatusResponse {} \ No newline at end of file +message ReportStatusResponse { + repeated MergeTaskAssignment new_tasks = 1; +} + +message MergeTaskAssignment { + string task_id = 1; + repeated string splits_metadata_json = 2; + string doc_mapping_json = 3; + string search_settings_json = 4; + string indexing_settings_json = 5; + string retention_policy_json = 6; + quickwit.common.IndexUid index_uid = 7; + string source_id = 8; + string index_storage_uri = 9; +} \ No newline at end of file diff --git a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.compaction.rs b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.compaction.rs index df33d068f9a..f96bb33ed20 100644 --- a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.compaction.rs +++ b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.compaction.rs @@ -1,11 +1,5 @@ // This file is @generated by prost-build. #[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] -pub struct PingRequest {} -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] -pub struct PingResponse {} -#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ReportStatusRequest { #[prost(string, tag = "1")] @@ -48,18 +42,38 @@ pub struct CompactionFailure { pub error_message: ::prost::alloc::string::String, } #[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] -#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] -pub struct ReportStatusResponse {} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ReportStatusResponse { + #[prost(message, repeated, tag = "1")] + pub new_tasks: ::prost::alloc::vec::Vec, +} +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct MergeTaskAssignment { + #[prost(string, tag = "1")] + pub task_id: ::prost::alloc::string::String, + #[prost(string, repeated, tag = "2")] + pub splits_metadata_json: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + #[prost(string, tag = "3")] + pub doc_mapping_json: ::prost::alloc::string::String, + #[prost(string, tag = "4")] + pub search_settings_json: ::prost::alloc::string::String, + #[prost(string, tag = "5")] + pub indexing_settings_json: ::prost::alloc::string::String, + #[prost(string, tag = "6")] + pub retention_policy_json: ::prost::alloc::string::String, + #[prost(message, optional, tag = "7")] + pub index_uid: ::core::option::Option, + #[prost(string, tag = "8")] + pub source_id: ::prost::alloc::string::String, + #[prost(string, tag = "9")] + pub index_storage_uri: ::prost::alloc::string::String, +} /// BEGIN quickwit-codegen #[allow(unused_imports)] use std::str::FromStr; use tower::{Layer, Service, ServiceExt}; use quickwit_common::tower::RpcName; -impl RpcName for PingRequest { - fn rpc_name() -> &'static str { - "ping" - } -} impl RpcName for ReportStatusRequest { fn rpc_name() -> &'static str { "report_status" @@ -68,10 +82,6 @@ impl RpcName for ReportStatusRequest { #[cfg_attr(any(test, feature = "testsuite"), mockall::automock)] #[async_trait::async_trait] pub trait CompactionPlannerService: std::fmt::Debug + Send + Sync + 'static { - async fn ping( - &self, - request: PingRequest, - ) -> crate::compaction::CompactionResult; async fn report_status( &self, request: ReportStatusRequest, @@ -188,12 +198,6 @@ impl CompactionPlannerServiceClient { } #[async_trait::async_trait] impl CompactionPlannerService for CompactionPlannerServiceClient { - async fn ping( - &self, - request: PingRequest, - ) -> crate::compaction::CompactionResult { - self.inner.0.ping(request).await - } async fn report_status( &self, request: ReportStatusRequest, @@ -210,12 +214,6 @@ pub mod mock_compaction_planner_service { } #[async_trait::async_trait] impl CompactionPlannerService for MockCompactionPlannerServiceWrapper { - async fn ping( - &self, - request: super::PingRequest, - ) -> crate::compaction::CompactionResult { - self.inner.lock().await.ping(request).await - } async fn report_status( &self, request: super::ReportStatusRequest, @@ -227,22 +225,6 @@ pub mod mock_compaction_planner_service { pub type BoxFuture = std::pin::Pin< Box> + Send + 'static>, >; -impl tower::Service for InnerCompactionPlannerServiceClient { - type Response = PingResponse; - type Error = crate::compaction::CompactionError; - type Future = BoxFuture; - fn poll_ready( - &mut self, - _cx: &mut std::task::Context<'_>, - ) -> std::task::Poll> { - std::task::Poll::Ready(Ok(())) - } - fn call(&mut self, request: PingRequest) -> Self::Future { - let svc = self.clone(); - let fut = async move { svc.0.ping(request).await }; - Box::pin(fut) - } -} impl tower::Service for InnerCompactionPlannerServiceClient { type Response = ReportStatusResponse; type Error = crate::compaction::CompactionError; @@ -264,11 +246,6 @@ impl tower::Service for InnerCompactionPlannerServiceClient struct CompactionPlannerServiceTowerServiceStack { #[allow(dead_code)] inner: InnerCompactionPlannerServiceClient, - ping_svc: quickwit_common::tower::BoxService< - PingRequest, - PingResponse, - crate::compaction::CompactionError, - >, report_status_svc: quickwit_common::tower::BoxService< ReportStatusRequest, ReportStatusResponse, @@ -277,12 +254,6 @@ struct CompactionPlannerServiceTowerServiceStack { } #[async_trait::async_trait] impl CompactionPlannerService for CompactionPlannerServiceTowerServiceStack { - async fn ping( - &self, - request: PingRequest, - ) -> crate::compaction::CompactionResult { - self.ping_svc.clone().ready().await?.call(request).await - } async fn report_status( &self, request: ReportStatusRequest, @@ -290,16 +261,6 @@ impl CompactionPlannerService for CompactionPlannerServiceTowerServiceStack { self.report_status_svc.clone().ready().await?.call(request).await } } -type PingLayer = quickwit_common::tower::BoxLayer< - quickwit_common::tower::BoxService< - PingRequest, - PingResponse, - crate::compaction::CompactionError, - >, - PingRequest, - PingResponse, - crate::compaction::CompactionError, ->; type ReportStatusLayer = quickwit_common::tower::BoxLayer< quickwit_common::tower::BoxService< ReportStatusRequest, @@ -312,37 +273,11 @@ type ReportStatusLayer = quickwit_common::tower::BoxLayer< >; #[derive(Debug, Default)] pub struct CompactionPlannerServiceTowerLayerStack { - ping_layers: Vec, report_status_layers: Vec, } impl CompactionPlannerServiceTowerLayerStack { pub fn stack_layer(mut self, layer: L) -> Self where - L: tower::Layer< - quickwit_common::tower::BoxService< - PingRequest, - PingResponse, - crate::compaction::CompactionError, - >, - > + Clone + Send + Sync + 'static, - , - >>::Service: tower::Service< - PingRequest, - Response = PingResponse, - Error = crate::compaction::CompactionError, - > + Clone + Send + Sync + 'static, - <, - >>::Service as tower::Service>::Future: Send + 'static, L: tower::Layer< quickwit_common::tower::BoxService< ReportStatusRequest, @@ -369,30 +304,10 @@ impl CompactionPlannerServiceTowerLayerStack { >, >>::Service as tower::Service>::Future: Send + 'static, { - self.ping_layers.push(quickwit_common::tower::BoxLayer::new(layer.clone())); self.report_status_layers .push(quickwit_common::tower::BoxLayer::new(layer.clone())); self } - pub fn stack_ping_layer(mut self, layer: L) -> Self - where - L: tower::Layer< - quickwit_common::tower::BoxService< - PingRequest, - PingResponse, - crate::compaction::CompactionError, - >, - > + Send + Sync + 'static, - L::Service: tower::Service< - PingRequest, - Response = PingResponse, - Error = crate::compaction::CompactionError, - > + Clone + Send + Sync + 'static, - >::Future: Send + 'static, - { - self.ping_layers.push(quickwit_common::tower::BoxLayer::new(layer)); - self - } pub fn stack_report_status_layer(mut self, layer: L) -> Self where L: tower::Layer< @@ -477,14 +392,6 @@ impl CompactionPlannerServiceTowerLayerStack { self, inner_client: InnerCompactionPlannerServiceClient, ) -> CompactionPlannerServiceClient { - let ping_svc = self - .ping_layers - .into_iter() - .rev() - .fold( - quickwit_common::tower::BoxService::new(inner_client.clone()), - |svc, layer| layer.layer(svc), - ); let report_status_svc = self .report_status_layers .into_iter() @@ -495,7 +402,6 @@ impl CompactionPlannerServiceTowerLayerStack { ); let tower_svc_stack = CompactionPlannerServiceTowerServiceStack { inner: inner_client, - ping_svc, report_status_svc, }; CompactionPlannerServiceClient::new(tower_svc_stack) @@ -574,24 +480,12 @@ where CompactionPlannerServiceMailbox< A, >: tower::Service< - PingRequest, - Response = PingResponse, - Error = crate::compaction::CompactionError, - Future = BoxFuture, - > - + tower::Service< - ReportStatusRequest, - Response = ReportStatusResponse, - Error = crate::compaction::CompactionError, - Future = BoxFuture, - >, + ReportStatusRequest, + Response = ReportStatusResponse, + Error = crate::compaction::CompactionError, + Future = BoxFuture, + >, { - async fn ping( - &self, - request: PingRequest, - ) -> crate::compaction::CompactionResult { - self.clone().call(request).await - } async fn report_status( &self, request: ReportStatusRequest, @@ -633,20 +527,6 @@ where + Send, T::Future: Send, { - async fn ping( - &self, - request: PingRequest, - ) -> crate::compaction::CompactionResult { - self.inner - .clone() - .ping(request) - .await - .map(|response| response.into_inner()) - .map_err(|status| crate::error::grpc_status_to_service_error( - status, - PingRequest::rpc_name(), - )) - } async fn report_status( &self, request: ReportStatusRequest, @@ -679,17 +559,6 @@ impl CompactionPlannerServiceGrpcServerAdapter { #[async_trait::async_trait] impl compaction_planner_service_grpc_server::CompactionPlannerServiceGrpc for CompactionPlannerServiceGrpcServerAdapter { - async fn ping( - &self, - request: tonic::Request, - ) -> Result, tonic::Status> { - self.inner - .0 - .ping(request.into_inner()) - .await - .map(tonic::Response::new) - .map_err(crate::error::grpc_error_to_grpc_status) - } async fn report_status( &self, request: tonic::Request, @@ -795,32 +664,6 @@ pub mod compaction_planner_service_grpc_client { self.inner = self.inner.max_encoding_message_size(limit); self } - pub async fn ping( - &mut self, - request: impl tonic::IntoRequest, - ) -> std::result::Result, tonic::Status> { - self.inner - .ready() - .await - .map_err(|e| { - tonic::Status::unknown( - format!("Service was not ready: {}", e.into()), - ) - })?; - let codec = tonic_prost::ProstCodec::default(); - let path = http::uri::PathAndQuery::from_static( - "/quickwit.compaction.CompactionPlannerService/Ping", - ); - let mut req = request.into_request(); - req.extensions_mut() - .insert( - GrpcMethod::new( - "quickwit.compaction.CompactionPlannerService", - "Ping", - ), - ); - self.inner.unary(req, path, codec).await - } pub async fn report_status( &mut self, request: impl tonic::IntoRequest, @@ -865,10 +708,6 @@ pub mod compaction_planner_service_grpc_server { /// Generated trait containing gRPC methods that should be implemented for use with CompactionPlannerServiceGrpcServer. #[async_trait] pub trait CompactionPlannerServiceGrpc: std::marker::Send + std::marker::Sync + 'static { - async fn ping( - &self, - request: tonic::Request, - ) -> std::result::Result, tonic::Status>; async fn report_status( &self, request: tonic::Request, @@ -954,51 +793,6 @@ pub mod compaction_planner_service_grpc_server { } fn call(&mut self, req: http::Request) -> Self::Future { match req.uri().path() { - "/quickwit.compaction.CompactionPlannerService/Ping" => { - #[allow(non_camel_case_types)] - struct PingSvc(pub Arc); - impl< - T: CompactionPlannerServiceGrpc, - > tonic::server::UnaryService for PingSvc { - type Response = super::PingResponse; - type Future = BoxFuture< - tonic::Response, - tonic::Status, - >; - fn call( - &mut self, - request: tonic::Request, - ) -> Self::Future { - let inner = Arc::clone(&self.0); - let fut = async move { - ::ping(&inner, request) - .await - }; - Box::pin(fut) - } - } - let accept_compression_encodings = self.accept_compression_encodings; - let send_compression_encodings = self.send_compression_encodings; - let max_decoding_message_size = self.max_decoding_message_size; - let max_encoding_message_size = self.max_encoding_message_size; - let inner = self.inner.clone(); - let fut = async move { - let method = PingSvc(inner); - let codec = tonic_prost::ProstCodec::default(); - let mut grpc = tonic::server::Grpc::new(codec) - .apply_compression_config( - accept_compression_encodings, - send_compression_encodings, - ) - .apply_max_message_size_config( - max_decoding_message_size, - max_encoding_message_size, - ); - let res = grpc.unary(method, req).await; - Ok(res) - }; - Box::pin(fut) - } "/quickwit.compaction.CompactionPlannerService/ReportStatus" => { #[allow(non_camel_case_types)] struct ReportStatusSvc(pub Arc); diff --git a/quickwit/quickwit-proto/src/compaction/mod.rs b/quickwit/quickwit-proto/src/compaction/mod.rs index 1e27855add3..0eee7adb6b0 100644 --- a/quickwit/quickwit-proto/src/compaction/mod.rs +++ b/quickwit/quickwit-proto/src/compaction/mod.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use quickwit_actors::AskError; use quickwit_common::rate_limited_error; use quickwit_common::tower::MakeLoadShedError; use serde::{Deserialize, Serialize}; @@ -74,3 +75,17 @@ impl MakeLoadShedError for CompactionError { CompactionError::TooManyRequests } } + +impl From> for CompactionError { + fn from(error: AskError) -> Self { + match error { + AskError::ErrorReply(error) => error, + AskError::MessageNotDelivered => { + Self::new_unavailable("request could not be delivered to actor".to_string()) + } + AskError::ProcessMessageError => { + Self::new_internal("an error occurred while processing the request".to_string()) + } + } + } +} diff --git a/quickwit/quickwit-serve/src/lib.rs b/quickwit/quickwit-serve/src/lib.rs index 9dafbfe9d4a..bc6db8835e0 100644 --- a/quickwit/quickwit-serve/src/lib.rs +++ b/quickwit/quickwit-serve/src/lib.rs @@ -72,7 +72,7 @@ use quickwit_common::tower::{ }; use quickwit_common::uri::Uri; use quickwit_common::{get_bool_from_env, spawn_named_task}; -use quickwit_compaction::planner::StubCompactionPlannerService; +use quickwit_compaction::planner::CompactionPlanner; use quickwit_compaction::{CompactorSupervisor, start_compactor_service}; use quickwit_config::service::QuickwitService; use quickwit_config::{ClusterConfig, IngestApiConfig, NodeConfig}; @@ -271,23 +271,28 @@ async fn balance_channel_for_service( BalanceChannel::from_stream(service_change_stream) } -/// Builds a `CompactionPlannerServiceClient` if the node runs the compactor. +/// Builds a `CompactionPlannerServiceClient` if the node runs the janitor or compactor. /// -/// On janitor+compactor nodes, wraps a local `StubCompactionPlannerService`. -/// On compactor-only nodes, connects to a remote janitor via gRPC. +/// On janitor nodes, spawns a `CompactionPlanner` actor and builds the client from +/// its mailbox. On compactor-only nodes, connects to a remote janitor via gRPC. async fn get_compaction_planner_client_if_needed( node_config: &NodeConfig, cluster: &Cluster, -) -> anyhow::Result, anyhow::Error> { - if !node_config.is_service_enabled(QuickwitService::Compactor) { + universe: &Universe, + metastore_client: &MetastoreServiceClient, +) -> anyhow::Result> { + let is_janitor = node_config.is_service_enabled(QuickwitService::Janitor); + let is_compactor = node_config.is_service_enabled(QuickwitService::Compactor); + if !is_janitor && !is_compactor { return Ok(None); } - if node_config.is_service_enabled(QuickwitService::Janitor) { - info!("compaction planner service enabled on this node"); - return Ok(Some(CompactionPlannerServiceClient::new( - StubCompactionPlannerService, - ))); + if is_janitor { + let planner = CompactionPlanner::new(metastore_client.clone()); + let (mailbox, _handle) = universe.spawn_builder().spawn(planner); + info!("compaction planner actor started on janitor node"); + return Ok(Some(CompactionPlannerServiceClient::from_mailbox(mailbox))); } + // Compactor-only node: connect to the planner on a remote janitor. let balance_channel = balance_channel_for_service(cluster, QuickwitService::Janitor).await; let found = balance_channel .wait_for(COMPACTION_SERVICE_DISCOVERY_TIMEOUT, |connections| { @@ -593,10 +598,14 @@ pub async fn serve_quickwit( .await .context("failed to start ingest v1 service")?; - let compaction_service_client_opt = - get_compaction_planner_client_if_needed(&node_config, &cluster) - .await - .context("failed to initialize compaction service client")?; + let compaction_service_client_opt = get_compaction_planner_client_if_needed( + &node_config, + &cluster, + &universe, + &metastore_client, + ) + .await + .context("failed to initialize compaction service client")?; let indexing_service_opt = if node_config.is_service_enabled(QuickwitService::Indexer) { let merge_scheduler_mailbox_opt = @@ -1917,26 +1926,40 @@ mod tests { create_cluster_for_test(Vec::new(), &["janitor", "indexer"], &transport, true) .await .unwrap(); + let universe = Universe::new(); + let metastore = MetastoreServiceClient::from_mock(MockMetastoreService::new()); - // Without compactor service enabled, no planner client. + // Janitor without compactor: planner client is returned (for gRPC registration). let mut node_config = NodeConfig::for_test(); node_config.enabled_services = HashSet::from([QuickwitService::Janitor, QuickwitService::Indexer]); - let result = get_compaction_planner_client_if_needed(&node_config, &cluster) - .await - .unwrap(); - assert!(result.is_none()); + let result = + get_compaction_planner_client_if_needed(&node_config, &cluster, &universe, &metastore) + .await + .unwrap(); + assert!(result.is_some()); - // With compactor + janitor enabled, planner client is returned (local stub). + // With compactor + janitor enabled, planner client is also returned. node_config.enabled_services = HashSet::from([ QuickwitService::Janitor, QuickwitService::Indexer, QuickwitService::Compactor, ]); - let result = get_compaction_planner_client_if_needed(&node_config, &cluster) - .await - .unwrap(); + let result = + get_compaction_planner_client_if_needed(&node_config, &cluster, &universe, &metastore) + .await + .unwrap(); assert!(result.is_some()); + + // Neither janitor nor compactor: no client. + node_config.enabled_services = HashSet::from([QuickwitService::Indexer]); + let result = + get_compaction_planner_client_if_needed(&node_config, &cluster, &universe, &metastore) + .await + .unwrap(); + assert!(result.is_none()); + + universe.assert_quit().await; } #[tokio::test] @@ -1945,11 +1968,17 @@ mod tests { let cluster = create_cluster_for_test(Vec::new(), &["indexer"], &transport, false) .await .unwrap(); + let universe = Universe::new(); + let metastore = MetastoreServiceClient::from_mock(MockMetastoreService::new()); let mut node_config = NodeConfig::for_test(); node_config.enabled_services = HashSet::from([QuickwitService::Indexer, QuickwitService::Compactor]); - let result = get_compaction_planner_client_if_needed(&node_config, &cluster).await; + let result = + get_compaction_planner_client_if_needed(&node_config, &cluster, &universe, &metastore) + .await; assert!(result.is_err()); + + universe.assert_quit().await; } } From 48d499c4fb88bc8ab752d5335812528c2fb56c83 Mon Sep 17 00:00:00 2001 From: nadav-govari Date: Thu, 16 Apr 2026 15:09:15 -0400 Subject: [PATCH 09/21] Make ReportStatus gRPC from compactors (#6311) --- .../src/compaction_pipeline.rs | 4 + .../src/compactor_supervisor.rs | 306 +++++++++++++++++- quickwit/quickwit-compaction/src/lib.rs | 8 +- quickwit/quickwit-indexing/src/lib.rs | 4 +- quickwit/quickwit-serve/src/lib.rs | 11 +- 5 files changed, 308 insertions(+), 25 deletions(-) diff --git a/quickwit/quickwit-compaction/src/compaction_pipeline.rs b/quickwit/quickwit-compaction/src/compaction_pipeline.rs index 62bb040e0ba..021b3fa27c5 100644 --- a/quickwit/quickwit-compaction/src/compaction_pipeline.rs +++ b/quickwit/quickwit-compaction/src/compaction_pipeline.rs @@ -113,6 +113,10 @@ impl CompactionPipeline { } } + pub fn status(&self) -> &PipelineStatus { + &self.status + } + fn supervisables(&self) -> Vec<&dyn Supervisable> { let Some(handles) = &self.handles else { return Vec::new(); diff --git a/quickwit/quickwit-compaction/src/compactor_supervisor.rs b/quickwit/quickwit-compaction/src/compactor_supervisor.rs index e1b27eb4a30..292c861972c 100644 --- a/quickwit/quickwit-compaction/src/compactor_supervisor.rs +++ b/quickwit/quickwit-compaction/src/compactor_supervisor.rs @@ -12,22 +12,30 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::str::FromStr; +use std::sync::Arc; use std::time::Duration; use async_trait::async_trait; -use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler}; +use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler, SpawnContext}; use quickwit_common::io::Limiter; use quickwit_common::pubsub::EventBroker; use quickwit_common::temp_dir::TempDirectory; -use quickwit_indexing::IndexingSplitStore; +use quickwit_common::uri::Uri; +use quickwit_config::{IndexingSettings, RetentionPolicy, SearchSettings, build_doc_mapper}; +use quickwit_doc_mapper::DocMapping; +use quickwit_indexing::merge_policy::{MergeOperation, merge_policy_from_settings}; +use quickwit_indexing::{IndexingSplitCache, IndexingSplitStore}; +use quickwit_metastore::SplitMetadata; use quickwit_proto::compaction::{ - CompactionFailure, CompactionInProgress, CompactionPlannerServiceClient, CompactionSuccess, - ReportStatusRequest, + CompactionFailure, CompactionInProgress, CompactionPlannerService, + CompactionPlannerServiceClient, CompactionSuccess, MergeTaskAssignment, ReportStatusRequest, }; +use quickwit_proto::indexing::MergePipelineId; use quickwit_proto::metastore::MetastoreServiceClient; use quickwit_proto::types::NodeId; use quickwit_storage::StorageResolver; -use tracing::info; +use tracing::{error, info, warn}; use crate::compaction_pipeline::{CompactionPipeline, PipelineStatus, PipelineStatusUpdate}; @@ -47,7 +55,7 @@ pub struct CompactorSupervisor { // Shared resources distributed to pipelines when spawning actor chains. io_throughput_limiter: Option, - split_store: IndexingSplitStore, + split_cache: Arc, metastore: MetastoreServiceClient, storage_resolver: StorageResolver, max_concurrent_split_uploads: usize, @@ -64,7 +72,7 @@ impl CompactorSupervisor { planner_client: CompactionPlannerServiceClient, num_pipeline_slots: usize, io_throughput_limiter: Option, - split_store: IndexingSplitStore, + split_cache: Arc, metastore: MetastoreServiceClient, storage_resolver: StorageResolver, max_concurrent_split_uploads: usize, @@ -77,7 +85,7 @@ impl CompactorSupervisor { planner_client, pipelines, io_throughput_limiter, - split_store, + split_cache, metastore, storage_resolver, max_concurrent_split_uploads, @@ -97,6 +105,100 @@ impl CompactorSupervisor { statuses } + async fn process_new_tasks( + &mut self, + assignments: Vec, + spawn_ctx: &SpawnContext, + ) { + for assignment in assignments { + let task_id = assignment.task_id.clone(); + if let Err(err) = self.spawn_task(assignment, spawn_ctx).await { + error!(task_id=%task_id, error=%err, "failed to spawn compaction task"); + } + } + } + + async fn spawn_task( + &mut self, + assignment: MergeTaskAssignment, + spawn_ctx: &SpawnContext, + ) -> anyhow::Result<()> { + let slot_idx = self + .pipelines + .iter() + .position(|slot| match slot { + None => true, + Some(p) => matches!( + p.status(), + PipelineStatus::Completed | PipelineStatus::Failed { .. } + ), + }) + .ok_or_else(|| anyhow::anyhow!("no free pipeline slot"))?; + let scratch_directory = self + .compaction_root_directory + .named_temp_child(&assignment.task_id)?; + let mut pipeline = self + .build_compaction_pipeline(assignment, scratch_directory) + .await?; + pipeline.spawn_pipeline(spawn_ctx)?; + self.pipelines[slot_idx] = Some(pipeline); + Ok(()) + } + + async fn build_compaction_pipeline( + &self, + assignment: MergeTaskAssignment, + scratch_directory: TempDirectory, + ) -> anyhow::Result { + let splits: Vec = assignment + .splits_metadata_json + .iter() + .map(|json| serde_json::from_str(json)) + .collect::, serde_json::Error>>()?; + let doc_mapping: DocMapping = serde_json::from_str(&assignment.doc_mapping_json)?; + let search_settings: SearchSettings = + serde_json::from_str(&assignment.search_settings_json)?; + let indexing_settings: IndexingSettings = + serde_json::from_str(&assignment.indexing_settings_json)?; + let retention_policy: Option = + if assignment.retention_policy_json.is_empty() { + None + } else { + Some(serde_json::from_str(&assignment.retention_policy_json)?) + }; + let index_uid = assignment + .index_uid + .ok_or_else(|| anyhow::anyhow!("missing index_uid in MergeTaskAssignment"))?; + + let index_storage_uri = Uri::from_str(&assignment.index_storage_uri)?; + let index_storage = self.storage_resolver.resolve(&index_storage_uri).await?; + let split_store = IndexingSplitStore::new(index_storage, self.split_cache.clone()); + + let doc_mapper = build_doc_mapper(&doc_mapping, &search_settings)?; + let merge_policy = merge_policy_from_settings(&indexing_settings); + let merge_operation = MergeOperation::new_merge_operation(splits); + let pipeline_id = MergePipelineId { + node_id: self.node_id.clone(), + index_uid, + source_id: assignment.source_id, + }; + + Ok(CompactionPipeline::new( + assignment.task_id, + scratch_directory, + merge_operation, + pipeline_id, + doc_mapper, + merge_policy, + retention_policy, + self.metastore.clone(), + split_store, + self.io_throughput_limiter.clone(), + self.max_concurrent_split_uploads, + self.event_broker.clone(), + )) + } + fn build_report_status_request( &self, statuses: &[PipelineStatusUpdate], @@ -176,8 +278,16 @@ impl Handler for CompactorSupervisor { ctx: &ActorContext, ) -> Result<(), ActorExitStatus> { let statuses = self.check_pipeline_statuses(); - let _request = self.build_report_status_request(&statuses); - // TODO: send request to planner via gRPC, clear completed/failed slots on success. + let request = self.build_report_status_request(&statuses); + match self.planner_client.report_status(request).await { + Ok(response) => { + self.process_new_tasks(response.new_tasks, ctx.spawn_ctx()) + .await; + } + Err(err) => { + warn!(error=%err, "failed to report status to compaction planner"); + } + } ctx.schedule_self_msg(CHECK_PIPELINE_STATUSES_INTERVAL, CheckPipelineStatuses); Ok(()) } @@ -194,14 +304,12 @@ mod tests { }; use quickwit_proto::metastore::{MetastoreServiceClient, MockMetastoreService}; use quickwit_proto::types::NodeId; - use quickwit_storage::{RamStorage, StorageResolver}; + use quickwit_storage::StorageResolver; use super::*; use crate::compaction_pipeline::tests::test_pipeline; fn test_supervisor(num_slots: usize) -> CompactorSupervisor { - let storage = Arc::new(RamStorage::default()); - let split_store = IndexingSplitStore::create_without_local_store_for_test(storage); let metastore = MetastoreServiceClient::from_mock(MockMetastoreService::new()); let compaction_client = CompactionPlannerServiceClient::from_mock(MockCompactionPlannerService::new()); @@ -210,7 +318,7 @@ mod tests { compaction_client, num_slots, None, - split_store, + Arc::new(IndexingSplitCache::no_caching()), metastore, StorageResolver::for_test(), 2, @@ -282,6 +390,176 @@ mod tests { assert!(request.failures.is_empty()); } + fn test_assignment(task_id: &str, split_ids: &[&str]) -> MergeTaskAssignment { + let index_metadata = + quickwit_metastore::IndexMetadata::for_test("test-index", "ram:///test-index"); + let config = &index_metadata.index_config; + let splits: Vec = split_ids + .iter() + .map(|id| quickwit_metastore::SplitMetadata::for_test(id.to_string())) + .collect(); + MergeTaskAssignment { + task_id: task_id.to_string(), + splits_metadata_json: splits + .iter() + .map(|s| serde_json::to_string(s).unwrap()) + .collect(), + doc_mapping_json: serde_json::to_string(&config.doc_mapping).unwrap(), + search_settings_json: serde_json::to_string(&config.search_settings).unwrap(), + indexing_settings_json: serde_json::to_string(&config.indexing_settings).unwrap(), + retention_policy_json: String::new(), + index_uid: Some(index_metadata.index_uid.clone()), + source_id: "test-source".to_string(), + index_storage_uri: config.index_uri.to_string(), + } + } + + #[tokio::test] + async fn test_build_compaction_pipeline_deserialization_errors() { + let supervisor = test_supervisor(4); + let scratch = TempDirectory::for_test; + + // Bad splits JSON. + let mut assignment = test_assignment("t", &["s1"]); + assignment.splits_metadata_json = vec!["not json".to_string()]; + assert!( + supervisor + .build_compaction_pipeline(assignment, scratch()) + .await + .is_err() + ); + + // Bad doc mapping JSON. + let mut assignment = test_assignment("t", &["s1"]); + assignment.doc_mapping_json = "not json".to_string(); + assert!( + supervisor + .build_compaction_pipeline(assignment, scratch()) + .await + .is_err() + ); + + // Bad search settings JSON. + let mut assignment = test_assignment("t", &["s1"]); + assignment.search_settings_json = "not json".to_string(); + assert!( + supervisor + .build_compaction_pipeline(assignment, scratch()) + .await + .is_err() + ); + + // Bad indexing settings JSON. + let mut assignment = test_assignment("t", &["s1"]); + assignment.indexing_settings_json = "not json".to_string(); + assert!( + supervisor + .build_compaction_pipeline(assignment, scratch()) + .await + .is_err() + ); + + // Bad retention policy JSON (non-empty but invalid). + let mut assignment = test_assignment("t", &["s1"]); + assignment.retention_policy_json = "not json".to_string(); + assert!( + supervisor + .build_compaction_pipeline(assignment, scratch()) + .await + .is_err() + ); + + // Missing index_uid. + let mut assignment = test_assignment("t", &["s1"]); + assignment.index_uid = None; + assert!( + supervisor + .build_compaction_pipeline(assignment, scratch()) + .await + .is_err() + ); + } + + #[tokio::test] + async fn test_spawn_task_fails_when_all_slots_occupied() { + let universe = Universe::new(); + let mut supervisor = test_supervisor(2); + + supervisor + .spawn_task(test_assignment("task-1", &["s1"]), universe.spawn_ctx()) + .await + .unwrap(); + supervisor + .spawn_task(test_assignment("task-2", &["s2"]), universe.spawn_ctx()) + .await + .unwrap(); + + // Both slots InProgress — no room. + assert!( + supervisor + .spawn_task(test_assignment("task-3", &["s3"]), universe.spawn_ctx()) + .await + .is_err() + ); + + universe.assert_quit().await; + } + + #[tokio::test] + async fn test_end_to_end_report_status_and_spawn() { + let universe = Universe::new(); + + // Mock planner that returns one assignment on the first call. + let assignment = test_assignment("planner-task-1", &["s1", "s2"]); + let assignments = vec![assignment]; + let assignments_clone = assignments.clone(); + let mut mock = quickwit_proto::compaction::MockCompactionPlannerService::new(); + mock.expect_report_status().times(1).returning(move |_req| { + Ok(quickwit_proto::compaction::ReportStatusResponse { + new_tasks: assignments_clone.clone(), + }) + }); + + let metastore = MetastoreServiceClient::from_mock(MockMetastoreService::new()); + let client = CompactionPlannerServiceClient::from_mock(mock); + let mut supervisor = CompactorSupervisor::new( + NodeId::from("test-node"), + client, + 3, + None, + Arc::new(IndexingSplitCache::no_caching()), + metastore, + StorageResolver::for_test(), + 2, + EventBroker::default(), + TempDirectory::for_test(), + ); + + // Simulate what the handler does: collect statuses, report, process response. + let statuses = supervisor.check_pipeline_statuses(); + let request = supervisor.build_report_status_request(&statuses); + assert_eq!(request.available_slots, 3); + + let response = supervisor + .planner_client + .report_status(request) + .await + .unwrap(); + supervisor + .process_new_tasks(response.new_tasks, universe.spawn_ctx()) + .await; + + // Verify the pipeline was spawned. + let statuses = supervisor.check_pipeline_statuses(); + let request = supervisor.build_report_status_request(&statuses); + assert_eq!(request.in_progress.len(), 1); + assert_eq!(request.in_progress[0].task_id, "planner-task-1"); + assert_eq!(request.in_progress[0].split_ids.len(), 2); + assert_eq!(request.available_slots, 2); + + universe.assert_quit().await; + } + #[test] fn test_build_report_status_request_mixed_statuses() { let supervisor = test_supervisor(4); diff --git a/quickwit/quickwit-compaction/src/lib.rs b/quickwit/quickwit-compaction/src/lib.rs index 05f79d83f73..98e5fc9a3d1 100644 --- a/quickwit/quickwit-compaction/src/lib.rs +++ b/quickwit/quickwit-compaction/src/lib.rs @@ -20,13 +20,15 @@ mod compaction_pipeline; mod compactor_supervisor; pub mod planner; +use std::sync::Arc; + pub use compactor_supervisor::CompactorSupervisor; use quickwit_actors::{Mailbox, Universe}; use quickwit_common::io; use quickwit_common::pubsub::EventBroker; use quickwit_common::temp_dir::TempDirectory; use quickwit_config::CompactorConfig; -use quickwit_indexing::IndexingSplitStore; +use quickwit_indexing::IndexingSplitCache; use quickwit_proto::compaction::CompactionPlannerServiceClient; use quickwit_proto::metastore::MetastoreServiceClient; use quickwit_proto::types::NodeId; @@ -39,7 +41,7 @@ pub async fn start_compactor_service( node_id: NodeId, compaction_client: CompactionPlannerServiceClient, compactor_config: &CompactorConfig, - split_store: IndexingSplitStore, + split_cache: Arc, metastore: MetastoreServiceClient, storage_resolver: StorageResolver, event_broker: EventBroker, @@ -53,7 +55,7 @@ pub async fn start_compactor_service( compaction_client, compactor_config.max_concurrent_pipelines.get(), io_throughput_limiter, - split_store, + split_cache, metastore, storage_resolver, compactor_config.max_concurrent_split_uploads, diff --git a/quickwit/quickwit-indexing/src/lib.rs b/quickwit/quickwit-indexing/src/lib.rs index 8adde285bf3..66c50d4d150 100644 --- a/quickwit/quickwit-indexing/src/lib.rs +++ b/quickwit/quickwit-indexing/src/lib.rs @@ -31,7 +31,9 @@ pub use crate::actors::{ }; pub use crate::controlled_directory::ControlledDirectory; use crate::models::IndexingStatistics; -pub use crate::split_store::{IndexingSplitStore, get_tantivy_directory_from_split_bundle}; +pub use crate::split_store::{ + IndexingSplitCache, IndexingSplitStore, get_tantivy_directory_from_split_bundle, +}; pub mod actors; mod controlled_directory; diff --git a/quickwit/quickwit-serve/src/lib.rs b/quickwit/quickwit-serve/src/lib.rs index bc6db8835e0..4c2e8d8499c 100644 --- a/quickwit/quickwit-serve/src/lib.rs +++ b/quickwit/quickwit-serve/src/lib.rs @@ -81,7 +81,7 @@ use quickwit_control_plane::{IndexerNodeInfo, IndexerPool}; use quickwit_index_management::{IndexService as IndexManager, IndexServiceError}; use quickwit_indexing::actors::{IndexingService, MergeSchedulerService}; use quickwit_indexing::models::ShardPositionsService; -use quickwit_indexing::{IndexingSplitStore, start_indexing_service}; +use quickwit_indexing::start_indexing_service; use quickwit_ingest::{ GetMemoryCapacity, IngestRequest, IngestRouter, IngestServiceClient, Ingester, IngesterPool, IngesterPoolEntry, LocalShardsUpdate, get_idle_shard_timeout, @@ -114,7 +114,7 @@ use quickwit_search::{ SearchJobPlacer, SearchService, SearchServiceClient, SearcherContext, SearcherPool, create_search_client_from_channel, start_searcher_service, }; -use quickwit_storage::{RamStorage, SplitCache, StorageResolver}; +use quickwit_storage::{SplitCache, StorageResolver}; use tcp_listener::TcpListenerResolver; use tokio::sync::oneshot; use tonic::codec::CompressionEncoding; @@ -795,10 +795,7 @@ pub async fn serve_quickwit( let compaction_root_directory = quickwit_common::temp_dir::Builder::default() .tempdir_in(&compaction_dir) .context("failed to create compaction temp directory")?; - // TODO: Real split store - let split_store = IndexingSplitStore::create_without_local_store_for_test(Arc::new( - RamStorage::default(), - )); + let split_cache = Arc::new(quickwit_indexing::IndexingSplitCache::no_caching()); let compaction_client = compaction_service_client_opt .clone() .expect("compactor service enabled but no compaction client available"); @@ -807,7 +804,7 @@ pub async fn serve_quickwit( cluster.self_node_id().into(), compaction_client, &node_config.compactor_config, - split_store, + split_cache, metastore_client.clone(), storage_resolver.clone(), event_broker.clone(), From 1e44a30c7e651382f7c47baed668beb863feef52 Mon Sep 17 00:00:00 2001 From: nadav-govari Date: Tue, 12 May 2026 10:48:00 -0400 Subject: [PATCH 10/21] Remove merge code from indexers; Renaming and cleanup (#6346) --- quickwit/quickwit-cli/src/main.rs | 27 +- quickwit/quickwit-cli/src/tool.rs | 140 +---- .../src/compaction_pipeline.rs | 43 +- .../src/compactor_supervisor.rs | 26 +- quickwit/quickwit-compaction/src/lib.rs | 2 + .../src/planner/compaction_planner.rs | 38 +- .../src/planner/compaction_state.rs | 33 +- ...fig_store.rs => index_config_metastore.rs} | 29 +- .../quickwit-compaction/src/planner/mod.rs | 2 +- .../src/actors/indexing_pipeline.rs | 132 +---- .../src/actors/indexing_service.rs | 550 +----------------- quickwit/quickwit-indexing/src/lib.rs | 3 - .../src/models/indexing_service_message.rs | 10 +- quickwit/quickwit-indexing/src/models/mod.rs | 4 +- quickwit/quickwit-indexing/src/test_utils.rs | 2 - .../protos/quickwit/compaction.proto | 1 - .../codegen/quickwit/quickwit.compaction.rs | 2 - quickwit/quickwit-serve/src/lib.rs | 19 +- 18 files changed, 123 insertions(+), 940 deletions(-) rename quickwit/quickwit-compaction/src/planner/{index_config_store.rs => index_config_metastore.rs} (91%) diff --git a/quickwit/quickwit-cli/src/main.rs b/quickwit/quickwit-cli/src/main.rs index 4a1f9ce036e..0d7d5f7c91a 100644 --- a/quickwit/quickwit-cli/src/main.rs +++ b/quickwit/quickwit-cli/src/main.rs @@ -150,7 +150,7 @@ mod tests { }; use quickwit_cli::split::{DescribeSplitArgs, SplitCliCommand}; use quickwit_cli::tool::{ - ExtractSplitArgs, GarbageCollectIndexArgs, LocalIngestDocsArgs, LocalSearchArgs, MergeArgs, + ExtractSplitArgs, GarbageCollectIndexArgs, LocalIngestDocsArgs, LocalSearchArgs, ToolCliCommand, }; use quickwit_common::uri::Uri; @@ -740,31 +740,6 @@ mod tests { Ok(()) } - #[test] - fn test_parse_merge_args() -> anyhow::Result<()> { - let app = build_cli().no_binary_name(true); - let matches = app.try_get_matches_from([ - "tool", - "merge", - "--index", - "wikipedia", - "--source", - "ingest-source", - "--config", - "/config.yaml", - ])?; - let command = CliCommand::parse_cli_args(matches)?; - assert!(matches!( - command, - CliCommand::Tool(ToolCliCommand::Merge(MergeArgs { - index_id, - source_id, - .. - })) if &index_id == "wikipedia" && source_id == "ingest-source" - )); - Ok(()) - } - #[test] fn test_parse_no_color() { // SAFETY: this test may not be entirely sound if not run with nextest or --test-threads=1 diff --git a/quickwit/quickwit-cli/src/tool.rs b/quickwit/quickwit-cli/src/tool.rs index 04843d7e801..12d174fc572 100644 --- a/quickwit/quickwit-cli/src/tool.rs +++ b/quickwit/quickwit-cli/src/tool.rs @@ -24,7 +24,7 @@ use anyhow::{Context, bail}; use clap::{ArgMatches, Command, arg}; use colored::{ColoredString, Colorize}; use humantime::format_duration; -use quickwit_actors::{ActorExitStatus, ActorHandle, Mailbox, Universe}; +use quickwit_actors::{ActorHandle, Universe}; use quickwit_cluster::{ ChannelTransport, Cluster, ClusterMember, FailureDetectorConfig, make_client_grpc_config, }; @@ -34,28 +34,26 @@ use quickwit_common::uri::Uri; use quickwit_config::service::QuickwitService; use quickwit_config::{ CLI_SOURCE_ID, IndexerConfig, NodeConfig, SourceConfig, SourceInputFormat, SourceParams, - TransformConfig, VecSourceParams, + TransformConfig, }; use quickwit_index_management::{IndexService, clear_cache_directory}; use quickwit_indexing::IndexingPipeline; -use quickwit_indexing::actors::{IndexingService, MergePipeline, MergeSchedulerService}; -use quickwit_indexing::models::{ - DetachIndexingPipeline, DetachMergePipeline, IndexingStatistics, SpawnPipeline, -}; +use quickwit_indexing::actors::IndexingService; +use quickwit_indexing::models::{DetachIndexingPipeline, IndexingStatistics, SpawnPipeline}; use quickwit_ingest::IngesterPool; use quickwit_metastore::IndexMetadataResponseExt; use quickwit_proto::indexing::CpuCapacity; use quickwit_proto::ingest::ingester::IngesterStatus; use quickwit_proto::metastore::{IndexMetadataRequest, MetastoreService, MetastoreServiceClient}; use quickwit_proto::search::{CountHits, SearchResponse}; -use quickwit_proto::types::{IndexId, PipelineUid, SourceId, SplitId}; +use quickwit_proto::types::{IndexId, PipelineUid, SplitId}; use quickwit_search::{SearchResponseRest, single_node_search}; use quickwit_serve::{ BodyFormat, SearchRequestQueryString, SortBy, search_request_from_api_request, }; use quickwit_storage::{BundleStorage, Storage}; use thousands::Separable; -use tracing::{debug, info}; +use tracing::debug; use crate::checklist::{GREEN_COLOR, RED_COLOR}; use crate::{ @@ -201,13 +199,6 @@ pub struct GarbageCollectIndexArgs { pub dry_run: bool, } -#[derive(Debug, Eq, PartialEq)] -pub struct MergeArgs { - pub config_uri: Uri, - pub index_id: IndexId, - pub source_id: SourceId, -} - #[derive(Debug, Eq, PartialEq)] pub struct ExtractSplitArgs { pub config_uri: Uri, @@ -221,7 +212,6 @@ pub enum ToolCliCommand { GarbageCollect(GarbageCollectIndexArgs), LocalIngest(LocalIngestDocsArgs), LocalSearch(LocalSearchArgs), - Merge(MergeArgs), ExtractSplit(ExtractSplitArgs), } @@ -234,7 +224,6 @@ impl ToolCliCommand { "gc" => Self::parse_garbage_collect_args(submatches), "local-ingest" => Self::parse_local_ingest_args(submatches), "local-search" => Self::parse_local_search_args(submatches), - "merge" => Self::parse_merge_args(submatches), "extract-split" => Self::parse_extract_split_args(submatches), _ => bail!("unknown tool subcommand `{subcommand}`"), } @@ -324,24 +313,6 @@ impl ToolCliCommand { })) } - fn parse_merge_args(mut matches: ArgMatches) -> anyhow::Result { - let config_uri = matches - .remove_one::("config") - .map(|uri_str| Uri::from_str(&uri_str)) - .expect("`config` should be a required arg.")?; - let index_id = matches - .remove_one::("index") - .expect("'index-id' should be a required arg."); - let source_id = matches - .remove_one::("source") - .expect("'source-id' should be a required arg."); - Ok(Self::Merge(MergeArgs { - index_id, - source_id, - config_uri, - })) - } - fn parse_garbage_collect_args(mut matches: ArgMatches) -> anyhow::Result { let config_uri = matches .get_one("config") @@ -391,7 +362,6 @@ impl ToolCliCommand { Self::GarbageCollect(args) => garbage_collect_index_cli(args).await, Self::LocalIngest(args) => local_ingest_docs_cli(args).await, Self::LocalSearch(args) => local_search_cli(args).await, - Self::Merge(args) => merge_cli(args).await, Self::ExtractSplit(args) => extract_split_cli(args).await, } } @@ -447,7 +417,6 @@ pub async fn local_ingest_docs_cli(args: LocalIngestDocsArgs) -> anyhow::Result< &HashSet::from_iter([QuickwitService::Indexer]), )?; let universe = Universe::new(); - let merge_scheduler_service_mailbox = universe.get_or_spawn_one(); let indexing_server = IndexingService::new( config.node_id.clone(), config.data_dir_path.clone(), @@ -456,7 +425,6 @@ pub async fn local_ingest_docs_cli(args: LocalIngestDocsArgs) -> anyhow::Result< cluster, metastore, None, - Some(merge_scheduler_service_mailbox), IngesterPool::default(), storage_resolver, EventBroker::default(), @@ -471,11 +439,6 @@ pub async fn local_ingest_docs_cli(args: LocalIngestDocsArgs) -> anyhow::Result< pipeline_uid: PipelineUid::random(), }) .await?; - let merge_pipeline_handle = indexing_server_mailbox - .ask_for_res(DetachMergePipeline { - pipeline_id: pipeline_id.merge_pipeline_id(), - }) - .await?; let indexing_pipeline_handle = indexing_server_mailbox .ask_for_res(DetachIndexingPipeline { pipeline_id }) .await?; @@ -493,11 +456,6 @@ pub async fn local_ingest_docs_cli(args: LocalIngestDocsArgs) -> anyhow::Result< let statistics = start_statistics_reporting_loop(indexing_pipeline_handle, args.input_path_opt.is_none()) .await?; - merge_pipeline_handle - .mailbox() - .ask(quickwit_indexing::FinishPendingMergesAndShutdownPipeline) - .await?; - merge_pipeline_handle.join().await; // Shutdown the indexing server. universe .send_exit_with_success(&indexing_server_mailbox) @@ -565,92 +523,6 @@ pub async fn local_search_cli(args: LocalSearchArgs) -> anyhow::Result<()> { Ok(()) } -pub async fn merge_cli(args: MergeArgs) -> anyhow::Result<()> { - debug!(args=?args, "run-merge-operations"); - println!("❯ Merging splits locally..."); - let config = load_node_config(&args.config_uri).await?; - let (storage_resolver, metastore_resolver) = - get_resolvers(&config.storage_configs, &config.metastore_configs); - let mut metastore = metastore_resolver.resolve(&config.metastore_uri).await?; - run_index_checklist(&mut metastore, &storage_resolver, &args.index_id, None).await?; - // The indexing service needs to update its cluster chitchat state so that the control plane is - // aware of the running tasks. We thus create a fake cluster to instantiate the indexing service - // and avoid impacting potential control plane running on the cluster. - let cluster = create_empty_cluster(&config).await?; - let runtimes_config = RuntimesConfig::default(); - start_actor_runtimes( - runtimes_config, - &HashSet::from_iter([QuickwitService::Indexer]), - )?; - let indexer_config = IndexerConfig::default(); - let universe = Universe::new(); - let merge_scheduler_service: Mailbox = universe.get_or_spawn_one(); - let indexing_server = IndexingService::new( - config.node_id, - config.data_dir_path, - indexer_config, - runtimes_config.num_threads_blocking, - cluster, - metastore, - None, - Some(merge_scheduler_service), - IngesterPool::default(), - storage_resolver, - EventBroker::default(), - ) - .await?; - let (indexing_service_mailbox, indexing_service_handle) = - universe.spawn_builder().spawn(indexing_server); - let pipeline_id = indexing_service_mailbox - .ask_for_res(SpawnPipeline { - index_id: args.index_id, - source_config: SourceConfig { - source_id: args.source_id, - num_pipelines: NonZeroUsize::MIN, - enabled: true, - source_params: SourceParams::Vec(VecSourceParams::default()), - transform_config: None, - input_format: SourceInputFormat::Json, - }, - pipeline_uid: PipelineUid::random(), - }) - .await?; - let pipeline_handle: ActorHandle = indexing_service_mailbox - .ask_for_res(DetachMergePipeline { - pipeline_id: pipeline_id.merge_pipeline_id(), - }) - .await?; - - let mut check_interval = tokio::time::interval(Duration::from_secs(1)); - loop { - check_interval.tick().await; - - pipeline_handle.refresh_observe(); - let observation = pipeline_handle.last_observation(); - - if observation.num_ongoing_merges == 0 { - info!("merge pipeline has no more ongoing merges, exiting"); - break; - } - - if pipeline_handle.state().is_exit() { - info!("merge pipeline has exited, exiting"); - break; - } - } - - let (pipeline_exit_status, _pipeline_statistics) = pipeline_handle.quit().await; - indexing_service_handle.quit().await; - if !matches!( - pipeline_exit_status, - ActorExitStatus::Success | ActorExitStatus::Quit - ) { - bail!(pipeline_exit_status); - } - println!("{} Merge successful.", "✔".color(GREEN_COLOR)); - Ok(()) -} - pub async fn garbage_collect_index_cli(args: GarbageCollectIndexArgs) -> anyhow::Result<()> { debug!(args=?args, "garbage-collect-index"); println!("❯ Garbage collecting index..."); diff --git a/quickwit/quickwit-compaction/src/compaction_pipeline.rs b/quickwit/quickwit-compaction/src/compaction_pipeline.rs index 021b3fa27c5..270cb5312d9 100644 --- a/quickwit/quickwit-compaction/src/compaction_pipeline.rs +++ b/quickwit/quickwit-compaction/src/compaction_pipeline.rs @@ -13,8 +13,9 @@ // limitations under the License. use std::sync::Arc; +use std::time::Instant; -use quickwit_actors::{ActorHandle, Health, SpawnContext, Supervisable}; +use quickwit_actors::{ActorHandle, HEARTBEAT, Health, SpawnContext, Supervisable}; use quickwit_common::KillSwitch; use quickwit_common::io::{IoControls, Limiter}; use quickwit_common::pubsub::EventBroker; @@ -31,6 +32,8 @@ use quickwit_proto::metastore::MetastoreServiceClient; use quickwit_proto::types::{IndexUid, SourceId, SplitId}; use tracing::{debug, error, info}; +use crate::TaskId; + #[derive(Clone, Debug, PartialEq)] pub enum PipelineStatus { InProgress, @@ -39,11 +42,10 @@ pub enum PipelineStatus { } pub struct PipelineStatusUpdate { - pub task_id: String, + pub task_id: TaskId, pub index_uid: IndexUid, pub source_id: SourceId, pub split_ids: Vec, - pub merged_split_id: SplitId, pub status: PipelineStatus, } @@ -53,6 +55,22 @@ struct CompactionPipelineHandles { merge_packager: ActorHandle, merge_uploader: ActorHandle, merge_publisher: ActorHandle, + next_check_for_progress: Instant, +} + +impl CompactionPipelineHandles { + /// Returns true once per HEARTBEAT interval. Between heartbeats we only + /// check whether actors are alive, not whether they are making progress. + /// This gives CPU-intensive actors (like the packager) up to 30s to + /// complete before being declared unhealthy. + fn should_check_for_progress(&mut self) -> bool { + let now = Instant::now(); + let check_for_progress = now > self.next_check_for_progress; + if check_for_progress { + self.next_check_for_progress = now + *HEARTBEAT; + } + check_for_progress + } } /// A single-use compaction pipeline. Processes one merge task and terminates. @@ -61,7 +79,7 @@ struct CompactionPipelineHandles { /// `check_actor_health()` to collect status updates. The pipeline manages /// its own retry logic internally. pub struct CompactionPipeline { - task_id: String, + task_id: TaskId, merge_operation: MergeOperation, pipeline_id: MergePipelineId, status: PipelineStatus, @@ -81,7 +99,7 @@ pub struct CompactionPipeline { impl CompactionPipeline { #[allow(clippy::too_many_arguments)] pub fn new( - task_id: String, + task_id: TaskId, scratch_directory: TempDirectory, merge_operation: MergeOperation, pipeline_id: MergePipelineId, @@ -149,16 +167,21 @@ impl CompactionPipeline { ) { return; } - // Pipeline is not initialized yet. - if self.handles.is_none() { + let Some(handles) = self.handles.as_mut() else { return; - } + }; + + // We check whether actors are alive on every tick (1s), but only + // check whether they are making progress once per HEARTBEAT (30s). + // This gives CPU-intensive actors like the packager time to finish + // without being falsely declared unhealthy. + let check_for_progress = handles.should_check_for_progress(); let mut has_healthy = false; let mut failure_actor_names: Vec = Vec::new(); for supervisable in self.supervisables() { - match supervisable.check_health(true) { + match supervisable.check_health(check_for_progress) { Health::Healthy => { has_healthy = true; } @@ -192,7 +215,6 @@ impl CompactionPipeline { .iter() .map(|split| split.split_id().to_string()) .collect(), - merged_split_id: self.merge_operation.merge_split_id.clone(), status: self.status.clone(), } } @@ -286,6 +308,7 @@ impl CompactionPipeline { merge_packager: merge_packager_handle, merge_uploader: merge_uploader_handle, merge_publisher: merge_publisher_handle, + next_check_for_progress: Instant::now() + *HEARTBEAT, }); Ok(()) diff --git a/quickwit/quickwit-compaction/src/compactor_supervisor.rs b/quickwit/quickwit-compaction/src/compactor_supervisor.rs index 292c861972c..188125689aa 100644 --- a/quickwit/quickwit-compaction/src/compactor_supervisor.rs +++ b/quickwit/quickwit-compaction/src/compactor_supervisor.rs @@ -112,8 +112,8 @@ impl CompactorSupervisor { ) { for assignment in assignments { let task_id = assignment.task_id.clone(); - if let Err(err) = self.spawn_task(assignment, spawn_ctx).await { - error!(task_id=%task_id, error=%err, "failed to spawn compaction task"); + if let Err(error) = self.spawn_task(assignment, spawn_ctx).await { + error!(%task_id, %error, "failed to spawn compaction task"); } } } @@ -226,7 +226,6 @@ impl CompactorSupervisor { PipelineStatus::Completed => { successes.push(CompactionSuccess { task_id: update.task_id.clone(), - merged_split_id: update.merged_split_id.clone(), }); } PipelineStatus::Failed { error } => { @@ -279,13 +278,16 @@ impl Handler for CompactorSupervisor { ) -> Result<(), ActorExitStatus> { let statuses = self.check_pipeline_statuses(); let request = self.build_report_status_request(&statuses); - match self.planner_client.report_status(request).await { + match ctx + .protect_future(self.planner_client.report_status(request)) + .await + { Ok(response) => { - self.process_new_tasks(response.new_tasks, ctx.spawn_ctx()) + ctx.protect_future(self.process_new_tasks(response.new_tasks, ctx.spawn_ctx())) .await; } - Err(err) => { - warn!(error=%err, "failed to report status to compaction planner"); + Err(error) => { + warn!(%error, "failed to report status to compaction planner"); } } ctx.schedule_self_msg(CHECK_PIPELINE_STATUSES_INTERVAL, CheckPipelineStatuses); @@ -394,9 +396,9 @@ mod tests { let index_metadata = quickwit_metastore::IndexMetadata::for_test("test-index", "ram:///test-index"); let config = &index_metadata.index_config; - let splits: Vec = split_ids + let splits: Vec = split_ids .iter() - .map(|id| quickwit_metastore::SplitMetadata::for_test(id.to_string())) + .map(|id| SplitMetadata::for_test(id.to_string())) .collect(); MergeTaskAssignment { task_id: task_id.to_string(), @@ -513,7 +515,7 @@ mod tests { let assignment = test_assignment("planner-task-1", &["s1", "s2"]); let assignments = vec![assignment]; let assignments_clone = assignments.clone(); - let mut mock = quickwit_proto::compaction::MockCompactionPlannerService::new(); + let mut mock = MockCompactionPlannerService::new(); mock.expect_report_status().times(1).returning(move |_req| { Ok(quickwit_proto::compaction::ReportStatusResponse { new_tasks: assignments_clone.clone(), @@ -569,7 +571,6 @@ mod tests { index_uid: quickwit_proto::types::IndexUid::for_test("test-index", 0), source_id: "src".to_string(), split_ids: vec!["s1".to_string(), "s2".to_string()], - merged_split_id: "merged-1".to_string(), status: PipelineStatus::InProgress, }, PipelineStatusUpdate { @@ -577,7 +578,6 @@ mod tests { index_uid: quickwit_proto::types::IndexUid::for_test("test-index", 0), source_id: "src".to_string(), split_ids: vec!["s3".to_string()], - merged_split_id: "merged-2".to_string(), status: PipelineStatus::Completed, }, PipelineStatusUpdate { @@ -585,7 +585,6 @@ mod tests { index_uid: quickwit_proto::types::IndexUid::for_test("test-index", 0), source_id: "src".to_string(), split_ids: vec!["s4".to_string()], - merged_split_id: "merged-3".to_string(), status: PipelineStatus::Failed { error: "boom".to_string(), }, @@ -600,7 +599,6 @@ mod tests { assert_eq!(request.in_progress[0].split_ids, vec!["s1", "s2"]); assert_eq!(request.successes.len(), 1); assert_eq!(request.successes[0].task_id, "task-2"); - assert_eq!(request.successes[0].merged_split_id, "merged-2"); assert_eq!(request.failures.len(), 1); assert_eq!(request.failures[0].task_id, "task-3"); assert_eq!(request.failures[0].error_message, "boom"); diff --git a/quickwit/quickwit-compaction/src/lib.rs b/quickwit/quickwit-compaction/src/lib.rs index 98e5fc9a3d1..352f153b4fe 100644 --- a/quickwit/quickwit-compaction/src/lib.rs +++ b/quickwit/quickwit-compaction/src/lib.rs @@ -20,6 +20,8 @@ mod compaction_pipeline; mod compactor_supervisor; pub mod planner; +pub type TaskId = String; + use std::sync::Arc; pub use compactor_supervisor::CompactorSupervisor; diff --git a/quickwit/quickwit-compaction/src/planner/compaction_planner.rs b/quickwit/quickwit-compaction/src/planner/compaction_planner.rs index 0fb6ccc6d50..57144df0088 100644 --- a/quickwit/quickwit-compaction/src/planner/compaction_planner.rs +++ b/quickwit/quickwit-compaction/src/planner/compaction_planner.rs @@ -32,11 +32,11 @@ use tracing::error; use ulid::Ulid; use super::compaction_state::CompactionState; -use super::index_config_store::{IndexConfigStore, IndexEntry}; +use super::index_config_metastore::{IndexConfigMetastore, IndexEntry}; pub struct CompactionPlanner { state: CompactionState, - index_config_store: IndexConfigStore, + index_config_metastore: IndexConfigMetastore, cursor: i64, metastore: MetastoreServiceClient, } @@ -83,8 +83,8 @@ impl Handler for CompactionPlanner { _msg: ScanAndPlan, ctx: &ActorContext, ) -> Result<(), ActorExitStatus> { - if let Err(err) = self.scan_and_plan().await { - error!(error=%err, "error scanning metastore and planning merges"); + if let Err(error) = ctx.protect_future(self.scan_and_plan()).await { + error!(%error, "error scanning metastore and planning merges"); } self.state.check_heartbeat_timeouts(); ctx.schedule_self_msg(SCAN_AND_PLAN_INTERVAL, ScanAndPlan); @@ -117,7 +117,7 @@ impl CompactionPlanner { let cursor = OffsetDateTime::now_utc().unix_timestamp() - STARTUP_LOOKBACK.as_secs() as i64; CompactionPlanner { state: CompactionState::new(), - index_config_store: IndexConfigStore::new(metastore.clone()), + index_config_metastore: IndexConfigMetastore::new(metastore.clone()), cursor, metastore, } @@ -125,11 +125,11 @@ impl CompactionPlanner { async fn ingest_splits(&mut self, splits: Vec) { for split in splits { - if self.state.is_split_known(&split.split_metadata.split_id) { + if self.state.is_split_tracked(&split.split_metadata.split_id) { continue; } let Ok(index_entry) = self - .index_config_store + .index_config_metastore .get_for_split(&split.split_metadata) .await else { @@ -168,7 +168,7 @@ impl CompactionPlanner { fn run_merge_policies(&mut self) { for partition_key in self.state.partition_keys() { - if let Some(index_entry) = self.index_config_store.get(&partition_key.index_uid) { + if let Some(index_entry) = self.index_config_metastore.get(&partition_key.index_uid) { self.state .plan_partition(&partition_key, index_entry.merge_policy()); } @@ -181,7 +181,8 @@ impl CompactionPlanner { for (partition_key, operation) in pending { let task_id = Ulid::new().to_string(); - let Some(index_entry) = self.index_config_store.get(&partition_key.index_uid) else { + let Some(index_entry) = self.index_config_metastore.get(&partition_key.index_uid) + else { error!(index_uid=%partition_key.index_uid, "index config not found for pending operation, skipping"); continue; }; @@ -352,9 +353,9 @@ mod tests { planner.ingest_splits(splits).await; - assert!(planner.state.is_split_known("fresh")); - assert!(planner.state.is_split_known("in-flight")); - assert!(!planner.state.is_split_known("mature")); + assert!(planner.state.is_split_tracked("fresh")); + assert!(planner.state.is_split_tracked("in-flight")); + assert!(!planner.state.is_split_tracked("mature")); assert_eq!(planner.cursor, 3000); } @@ -393,7 +394,7 @@ mod tests { planner.cursor = 0; planner.ingest_splits(splits).await; - assert!(!planner.state.is_split_known("orphan")); + assert!(!planner.state.is_split_tracked("orphan")); } #[tokio::test] @@ -420,8 +421,8 @@ mod tests { planner.cursor = 0; planner.scan_and_plan().await.unwrap(); - assert!(planner.state.is_split_known("s1")); - assert!(planner.state.is_split_known("s2")); + assert!(planner.state.is_split_tracked("s1")); + assert!(planner.state.is_split_tracked("s2")); assert_eq!(planner.cursor, 6000); } @@ -501,10 +502,9 @@ mod tests { let task_id = assignments[0].task_id.clone(); // Report success for the task. - planner.state.process_successes(&[CompactionSuccess { - task_id, - merged_split_id: "merged-1".to_string(), - }]); + planner + .state + .process_successes(&[CompactionSuccess { task_id }]); // The original splits are no longer tracked. Re-ingesting them // (simulating the merged output being immature) creates new work. diff --git a/quickwit/quickwit-compaction/src/planner/compaction_state.rs b/quickwit/quickwit-compaction/src/planner/compaction_state.rs index 1d1ea07e91d..1bc1fb91cc8 100644 --- a/quickwit/quickwit-compaction/src/planner/compaction_state.rs +++ b/quickwit/quickwit-compaction/src/planner/compaction_state.rs @@ -22,6 +22,8 @@ use quickwit_proto::compaction::{CompactionFailure, CompactionInProgress, Compac use quickwit_proto::types::{DocMappingUid, IndexUid, NodeId, SourceId, SplitId}; use tracing::{error, info, warn}; +use crate::TaskId; + const HEARTBEAT_TIMEOUT: Duration = Duration::from_secs(60); #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -44,7 +46,7 @@ impl CompactionPartitionKey { } struct InFlightCompaction { - task_id: String, + task_id: TaskId, split_ids: Vec, node_id: NodeId, last_heartbeat: Instant, @@ -56,7 +58,7 @@ struct InFlightCompaction { pub struct CompactionState { needs_compaction: HashMap>, needs_compaction_split_ids: HashSet, - in_flight: HashMap, + in_flight: HashMap, in_flight_split_ids: HashSet, /// TODO: add index_uid and source_id to MergeOperation so we don't need the partition key /// here. @@ -75,7 +77,7 @@ impl CompactionState { } /// Returns true if the split is already tracked (either awaiting compaction or in-flight). - pub fn is_split_known(&self, split_id: &str) -> bool { + pub fn is_split_tracked(&self, split_id: &str) -> bool { self.needs_compaction_split_ids.contains(split_id) || self.in_flight_split_ids.contains(split_id) } @@ -165,7 +167,7 @@ impl CompactionState { pub fn check_heartbeat_timeouts(&mut self) { let now = Instant::now(); - let timed_out_task_ids: Vec = self + let timed_out_task_ids: Vec = self .in_flight .iter() .filter(|(_, inflight)| now.duration_since(inflight.last_heartbeat) > HEARTBEAT_TIMEOUT) @@ -174,7 +176,7 @@ impl CompactionState { for task_id in timed_out_task_ids { if let Some(inflight) = self.in_flight.remove(&task_id) { - error!(task_id=%task_id, node_id=%inflight.node_id, "compaction task timed out"); + error!(%task_id, node_id=%inflight.node_id, "compaction task timed out"); for split_id in &inflight.split_ids { self.in_flight_split_ids.remove(split_id.as_str()); } @@ -193,7 +195,7 @@ impl CompactionState { } /// Records that an operation has been assigned to a worker. - pub fn record_assignment(&mut self, task_id: String, split_ids: Vec, node_id: NodeId) { + pub fn record_assignment(&mut self, task_id: TaskId, split_ids: Vec, node_id: NodeId) { self.in_flight.insert( task_id.clone(), InFlightCompaction { @@ -253,11 +255,11 @@ mod tests { let index_uid = IndexUid::for_test("test-index", 0); let mut state = CompactionState::new(); - assert!(!state.is_split_known("s1")); + assert!(!state.is_split_tracked("s1")); state.track_split(test_split("s1", &index_uid)); - assert!(state.is_split_known("s1")); - assert!(!state.is_split_known("s2")); + assert!(state.is_split_tracked("s1")); + assert!(!state.is_split_tracked("s2")); } #[test] @@ -313,21 +315,20 @@ mod tests { state.in_flight_split_ids.insert("s3".to_string()); state.record_assignment("task-2".to_string(), vec!["s3".to_string()], node_id); - assert!(state.is_split_known("s1")); - assert!(state.is_split_known("s3")); + assert!(state.is_split_tracked("s1")); + assert!(state.is_split_tracked("s3")); state.process_successes(&[CompactionSuccess { task_id: "task-1".to_string(), - merged_split_id: "merged-1".to_string(), }]); - assert!(!state.is_split_known("s1")); - assert!(!state.is_split_known("s2")); + assert!(!state.is_split_tracked("s1")); + assert!(!state.is_split_tracked("s2")); state.process_failures(&[CompactionFailure { task_id: "task-2".to_string(), error_message: "boom".to_string(), }]); - assert!(!state.is_split_known("s3")); + assert!(!state.is_split_tracked("s3")); } #[test] @@ -382,7 +383,7 @@ mod tests { state.record_assignment("task-1".to_string(), split_ids.clone(), NodeId::from("w1")); for split_id in &split_ids { - assert!(state.is_split_known(split_id)); + assert!(state.is_split_tracked(split_id)); } } diff --git a/quickwit/quickwit-compaction/src/planner/index_config_store.rs b/quickwit/quickwit-compaction/src/planner/index_config_metastore.rs similarity index 91% rename from quickwit/quickwit-compaction/src/planner/index_config_store.rs rename to quickwit/quickwit-compaction/src/planner/index_config_metastore.rs index e47044d21e3..5fe2a9a6bdc 100644 --- a/quickwit/quickwit-compaction/src/planner/index_config_store.rs +++ b/quickwit/quickwit-compaction/src/planner/index_config_metastore.rs @@ -23,7 +23,6 @@ use quickwit_proto::metastore::{IndexMetadataRequest, MetastoreService, Metastor use quickwit_proto::types::{DocMappingUid, IndexUid}; /// Everything the planner needs to know about a single index. -#[derive(Clone)] pub struct IndexEntry { config: IndexConfig, merge_policy: Arc, @@ -74,16 +73,16 @@ impl IndexEntry { /// Caches per-index configuration, merge policies, and doc mappers. /// Fetches from the metastore on demand. All accessors panic if called /// for an index that hasn't been loaded. -pub struct IndexConfigStore { +pub struct IndexConfigMetastore { indexes: HashMap, - metastore: MetastoreServiceClient, + metastore_client: MetastoreServiceClient, } -impl IndexConfigStore { - pub fn new(metastore: MetastoreServiceClient) -> Self { - IndexConfigStore { +impl IndexConfigMetastore { + pub fn new(metastore_client: MetastoreServiceClient) -> Self { + IndexConfigMetastore { indexes: HashMap::new(), - metastore, + metastore_client, } } @@ -95,7 +94,7 @@ impl IndexConfigStore { doc_mapping_uid: &DocMappingUid, ) -> anyhow::Result<()> { let response = self - .metastore + .metastore_client .index_metadata(IndexMetadataRequest { index_uid: Some(index_uid.clone()), index_id: None, @@ -171,7 +170,7 @@ mod tests { .times(1) .returning(move |_| Ok(response.clone())); - let mut store = IndexConfigStore::new(MetastoreServiceClient::from_mock(mock)); + let mut store = IndexConfigMetastore::new(MetastoreServiceClient::from_mock(mock)); // First call fetches from metastore. let entry = store @@ -200,7 +199,7 @@ mod tests { }) }); - let mut store = IndexConfigStore::new(MetastoreServiceClient::from_mock(mock)); + let mut store = IndexConfigMetastore::new(MetastoreServiceClient::from_mock(mock)); let result = store .get_or_fetch(&IndexUid::for_test("missing", 0), &DocMappingUid::default()) .await; @@ -210,7 +209,7 @@ mod tests { #[tokio::test] async fn test_get_returns_none_before_fetch() { let mock = MockMetastoreService::new(); - let store = IndexConfigStore::new(MetastoreServiceClient::from_mock(mock)); + let store = IndexConfigMetastore::new(MetastoreServiceClient::from_mock(mock)); assert!(store.get(&IndexUid::for_test("test-index", 0)).is_none()); } @@ -224,7 +223,7 @@ mod tests { mock.expect_index_metadata() .returning(move |_| Ok(response.clone())); - let mut store = IndexConfigStore::new(MetastoreServiceClient::from_mock(mock)); + let mut store = IndexConfigMetastore::new(MetastoreServiceClient::from_mock(mock)); store .get_or_fetch(&index_uid, &DocMappingUid::default()) .await @@ -243,7 +242,7 @@ mod tests { mock.expect_index_metadata() .returning(move |_| Ok(response.clone())); - let mut store = IndexConfigStore::new(MetastoreServiceClient::from_mock(mock)); + let mut store = IndexConfigMetastore::new(MetastoreServiceClient::from_mock(mock)); let split = SplitMetadata { split_id: "split-1".to_string(), index_uid: index_uid.clone(), @@ -264,7 +263,7 @@ mod tests { mock.expect_index_metadata() .returning(move |_| Ok(response.clone())); - let mut store = IndexConfigStore::new(MetastoreServiceClient::from_mock(mock)); + let mut store = IndexConfigMetastore::new(MetastoreServiceClient::from_mock(mock)); let entry = store .get_or_fetch(&index_uid, &DocMappingUid::default()) .await @@ -295,7 +294,7 @@ mod tests { mock.expect_index_metadata() .returning(move |_| Ok(response.clone())); - let mut store = IndexConfigStore::new(MetastoreServiceClient::from_mock(mock)); + let mut store = IndexConfigMetastore::new(MetastoreServiceClient::from_mock(mock)); let entry = store .get_or_fetch(&index_uid, &DocMappingUid::default()) .await diff --git a/quickwit/quickwit-compaction/src/planner/mod.rs b/quickwit/quickwit-compaction/src/planner/mod.rs index b89d67ee8a2..bd075ab900d 100644 --- a/quickwit/quickwit-compaction/src/planner/mod.rs +++ b/quickwit/quickwit-compaction/src/planner/mod.rs @@ -16,6 +16,6 @@ mod compaction_planner; #[allow(dead_code)] mod compaction_state; #[allow(dead_code)] -mod index_config_store; +mod index_config_metastore; pub use compaction_planner::CompactionPlanner; diff --git a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs index 04a9ab004b6..6cfe1d6ea5a 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs @@ -27,7 +27,7 @@ use quickwit_common::metrics::OwnedGaugeGuard; use quickwit_common::pubsub::EventBroker; use quickwit_common::temp_dir::TempDirectory; use quickwit_common::{KillSwitch, is_metrics_index}; -use quickwit_config::{IndexingSettings, RetentionPolicy, SourceConfig}; +use quickwit_config::{IndexingSettings, SourceConfig}; use quickwit_doc_mapper::DocMapper; use quickwit_ingest::IngesterPool; use quickwit_proto::indexing::IndexingPipelineId; @@ -37,7 +37,6 @@ use quickwit_storage::{Storage, StorageResolver}; use tokio::sync::Semaphore; use tracing::{debug, error, info, instrument}; -use super::MergePlanner; use crate::SplitsUpdateMailbox; use crate::actors::doc_processor::DocProcessor; use crate::actors::index_serializer::IndexSerializer; @@ -431,7 +430,7 @@ impl IndexingPipeline { let publisher = Publisher::new( PublisherType::MainPublisher, self.params.metastore.clone(), - self.params.merge_planner_mailbox_opt.clone(), + None, Some(source_mailbox.clone()), ); let (publisher_mailbox, publisher_handle) = ctx @@ -460,7 +459,7 @@ impl IndexingPipeline { UploaderType::IndexUploader, self.params.metastore.clone(), self.params.merge_policy.clone(), - self.params.retention_policy.clone(), + None, self.params.split_store.clone(), SplitsUpdateMailbox::Sequencer(sequencer_mailbox), self.params.max_concurrent_split_uploads_index, @@ -853,9 +852,6 @@ pub struct IndexingPipelineParams { // Merge-related parameters pub merge_policy: Arc, - pub retention_policy: Option, - pub merge_planner_mailbox_opt: Option>, - pub max_concurrent_split_uploads_merge: usize, // Source-related parameters pub source_config: SourceConfig, @@ -873,8 +869,7 @@ mod tests { use std::path::PathBuf; use std::sync::Arc; - use quickwit_actors::{Command, Universe}; - use quickwit_common::ServiceStream; + use quickwit_actors::Universe; use quickwit_config::{IndexingSettings, SourceInputFormat, SourceParams}; use quickwit_doc_mapper::{DocMapper, default_doc_mapper_for_test}; use quickwit_metastore::checkpoint::IndexCheckpointDelta; @@ -887,7 +882,6 @@ mod tests { use quickwit_storage::RamStorage; use super::{IndexingPipeline, *}; - use crate::actors::merge_pipeline::{MergePipeline, MergePipelineParams}; use crate::merge_policy::default_merge_policy; #[test] @@ -971,7 +965,6 @@ mod tests { .returning(|_| Ok(EmptyResponse {})); let universe = Universe::new(); - let (merge_planner_mailbox, _) = universe.create_test_mailbox(); let storage = Arc::new(RamStorage::default()); let split_store = IndexingSplitStore::create_without_local_store_for_test(storage.clone()); let pipeline_params = IndexingPipelineParams { @@ -986,12 +979,9 @@ mod tests { storage, split_store, merge_policy: default_merge_policy(), - retention_policy: None, queues_dir_path: PathBuf::from("./queues"), max_concurrent_split_uploads_index: 4, - max_concurrent_split_uploads_merge: 5, cooperative_indexing_permits: None, - merge_planner_mailbox_opt: Some(merge_planner_mailbox), event_broker: EventBroker::default(), params_fingerprint: 42u64, }; @@ -1097,7 +1087,6 @@ mod tests { let universe = Universe::new(); let storage = Arc::new(RamStorage::default()); let split_store = IndexingSplitStore::create_without_local_store_for_test(storage.clone()); - let (merge_planner_mailbox, _) = universe.create_test_mailbox(); let pipeline_params = IndexingPipelineParams { pipeline_id, doc_mapper: Arc::new(default_doc_mapper_for_test()), @@ -1111,11 +1100,8 @@ mod tests { storage, split_store, merge_policy: default_merge_policy(), - retention_policy: None, max_concurrent_split_uploads_index: 4, - max_concurrent_split_uploads_merge: 5, cooperative_indexing_permits: None, - merge_planner_mailbox_opt: Some(merge_planner_mailbox), event_broker: Default::default(), params_fingerprint: 42u64, }; @@ -1140,112 +1126,6 @@ mod tests { indexing_pipeline_simple("data/test_corpus.json.gz").await } - #[tokio::test] - async fn test_merge_pipeline_does_not_stop_on_indexing_pipeline_failure() { - let node_id = NodeId::from("test-node"); - let pipeline_id = IndexingPipelineId { - node_id, - index_uid: IndexUid::new_with_random_ulid("test-index"), - source_id: "test-source".to_string(), - pipeline_uid: PipelineUid::for_test(0u128), - }; - let source_config = SourceConfig { - source_id: "test-source".to_string(), - num_pipelines: NonZeroUsize::MIN, - enabled: true, - source_params: SourceParams::void(), - transform_config: None, - input_format: SourceInputFormat::Json, - }; - let source_config_clone = source_config.clone(); - - let mut mock_metastore = MockMetastoreService::new(); - mock_metastore - .expect_index_metadata() - .withf(|index_metadata_request| { - index_metadata_request.index_uid.as_ref().unwrap() == &("test-index", 2) - }) - .returning(move |_| { - let mut index_metadata = - IndexMetadata::for_test("test-index", "ram:///indexes/test-index"); - index_metadata - .add_source(source_config_clone.clone()) - .unwrap(); - Ok(IndexMetadataResponse::try_from_index_metadata(&index_metadata).unwrap()) - }); - mock_metastore - .expect_list_splits() - .returning(|_| Ok(ServiceStream::empty())); - let metastore = MetastoreServiceClient::from_mock(mock_metastore); - - let universe = Universe::with_accelerated_time(); - let doc_mapper = Arc::new(default_doc_mapper_for_test()); - let storage = Arc::new(RamStorage::default()); - let split_store = IndexingSplitStore::create_without_local_store_for_test(storage.clone()); - let merge_pipeline_params = MergePipelineParams { - pipeline_id: pipeline_id.merge_pipeline_id(), - doc_mapper: doc_mapper.clone(), - indexing_directory: TempDirectory::for_test(), - metastore: metastore.clone(), - split_store: split_store.clone(), - merge_policy: default_merge_policy(), - retention_policy: None, - max_concurrent_split_uploads: 2, - merge_io_throughput_limiter_opt: None, - merge_scheduler_service: universe.get_or_spawn_one(), - event_broker: Default::default(), - }; - let merge_pipeline = MergePipeline::new(merge_pipeline_params, None, universe.spawn_ctx()); - let merge_planner_mailbox = merge_pipeline.merge_planner_mailbox().clone(); - let (_merge_pipeline_mailbox, merge_pipeline_handler) = - universe.spawn_builder().spawn(merge_pipeline); - let indexing_pipeline_params = IndexingPipelineParams { - pipeline_id, - doc_mapper, - source_config, - source_storage_resolver: StorageResolver::for_test(), - indexing_directory: TempDirectory::for_test(), - indexing_settings: IndexingSettings::for_test(), - ingester_pool: IngesterPool::default(), - metastore, - queues_dir_path: PathBuf::from("./queues"), - storage, - split_store, - merge_policy: default_merge_policy(), - retention_policy: None, - max_concurrent_split_uploads_index: 4, - max_concurrent_split_uploads_merge: 5, - cooperative_indexing_permits: None, - merge_planner_mailbox_opt: Some(merge_planner_mailbox.clone()), - event_broker: Default::default(), - params_fingerprint: 42u64, - }; - let indexing_pipeline = IndexingPipeline::new(indexing_pipeline_params); - let (_indexing_pipeline_mailbox, indexing_pipeline_handler) = - universe.spawn_builder().spawn(indexing_pipeline); - let obs = indexing_pipeline_handler - .process_pending_and_observe() - .await; - assert_eq!(obs.generation, 1); - // Let's shutdown the indexer, this will trigger the indexing pipeline failure and the - // restart. - let indexer = universe.get::().into_iter().next().unwrap(); - let _ = indexer.ask(Command::Quit).await; - for _ in 0..10 { - universe.sleep(*quickwit_actors::HEARTBEAT).await; - // Check indexing pipeline has restarted. - let obs = indexing_pipeline_handler - .process_pending_and_observe() - .await; - if obs.generation == 2 { - assert_eq!(merge_pipeline_handler.check_health(true), Health::Healthy); - universe.quit().await; - return; - } - } - panic!("Pipeline was apparently not restarted."); - } - async fn indexing_pipeline_all_failures_handling(test_file: &str) -> anyhow::Result<()> { let node_id = NodeId::from("test-node"); let index_uid: IndexUid = IndexUid::for_test("test-index", 2); @@ -1308,7 +1188,6 @@ mod tests { let universe = Universe::new(); let storage = Arc::new(RamStorage::default()); let split_store = IndexingSplitStore::create_without_local_store_for_test(storage.clone()); - let (merge_planner_mailbox, _) = universe.create_test_mailbox(); // Create a minimal mapper with wrong date format to ensure that all documents will fail let broken_mapper = serde_json::from_str::( r#" @@ -1340,11 +1219,8 @@ mod tests { storage, split_store, merge_policy: default_merge_policy(), - retention_policy: None, max_concurrent_split_uploads_index: 4, - max_concurrent_split_uploads_merge: 5, cooperative_indexing_permits: None, - merge_planner_mailbox_opt: Some(merge_planner_mailbox), params_fingerprint: 42u64, event_broker: Default::default(), }; diff --git a/quickwit/quickwit-indexing/src/actors/indexing_service.rs b/quickwit/quickwit-indexing/src/actors/indexing_service.rs index 5442a98589e..1aafbc01dec 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_service.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_service.rs @@ -19,7 +19,6 @@ use std::sync::Arc; use anyhow::Context; use async_trait::async_trait; -use futures::TryStreamExt; use itertools::Itertools; use quickwit_actors::{ Actor, ActorContext, ActorExitStatus, ActorHandle, ActorState, Handler, Healthz, Mailbox, @@ -27,9 +26,8 @@ use quickwit_actors::{ }; use quickwit_cluster::Cluster; use quickwit_common::fs::get_cache_directory_path; -use quickwit_common::io::Limiter; use quickwit_common::pubsub::EventBroker; -use quickwit_common::{io, temp_dir}; +use quickwit_common::temp_dir; use quickwit_config::{ INGEST_API_SOURCE_ID, IndexConfig, IndexerConfig, SourceConfig, build_doc_mapper, indexing_pipeline_params_fingerprint, @@ -40,29 +38,23 @@ use quickwit_ingest::{ }; use quickwit_metastore::{ IndexMetadata, IndexMetadataResponseExt, IndexesMetadataResponseExt, - ListIndexesMetadataResponseExt, ListSplitsQuery, ListSplitsRequestExt, ListSplitsResponseExt, - SplitMetadata, SplitState, + ListIndexesMetadataResponseExt, }; use quickwit_proto::indexing::{ ApplyIndexingPlanRequest, ApplyIndexingPlanResponse, IndexingError, IndexingPipelineId, - IndexingTask, MergePipelineId, PipelineMetrics, + IndexingTask, PipelineMetrics, }; use quickwit_proto::metastore::{ IndexMetadataRequest, IndexMetadataSubrequest, IndexesMetadataRequest, - ListIndexesMetadataRequest, ListSplitsRequest, MetastoreResult, MetastoreService, - MetastoreServiceClient, + ListIndexesMetadataRequest, MetastoreService, MetastoreServiceClient, }; use quickwit_proto::types::{IndexId, IndexUid, NodeId, PipelineUid, ShardId}; use quickwit_storage::StorageResolver; use serde::{Deserialize, Serialize}; -use time::OffsetDateTime; use tokio::sync::Semaphore; use tracing::{debug, error, info, warn}; -use super::merge_pipeline::{MergePipeline, MergePipelineParams}; -use super::{MergePlanner, MergeSchedulerService}; -use crate::actors::merge_pipeline::FinishPendingMergesAndShutdownPipeline; -use crate::models::{DetachIndexingPipeline, DetachMergePipeline, ObservePipeline, SpawnPipeline}; +use crate::models::{DetachIndexingPipeline, ObservePipeline, SpawnPipeline}; use crate::source::{AssignShards, Assignment}; use crate::split_store::{IndexingSplitCache, SplitStoreQuota}; use crate::{IndexingPipeline, IndexingPipelineParams, IndexingSplitStore, IndexingStatistics}; @@ -75,16 +67,10 @@ pub struct IndexingServiceCounters { pub num_running_pipelines: usize, pub num_successful_pipelines: usize, pub num_failed_pipelines: usize, - pub num_running_merge_pipelines: usize, pub num_deleted_queues: usize, pub num_delete_queue_failures: usize, } -struct MergePipelineHandle { - mailbox: Mailbox, - handle: ActorHandle, -} - struct PipelineHandle { mailbox: Mailbox, handle: ActorHandle, @@ -104,16 +90,13 @@ pub struct IndexingService { cluster: Cluster, metastore: MetastoreServiceClient, ingest_api_service_opt: Option>, - merge_scheduler_service_opt: Option>, ingester_pool: IngesterPool, storage_resolver: StorageResolver, indexing_pipelines: HashMap, counters: IndexingServiceCounters, local_split_store: Arc, max_concurrent_split_uploads: usize, - merge_pipeline_handles: HashMap, cooperative_indexing_permits: Option>, - merge_io_throughput_limiter_opt: Option, event_broker: EventBroker, } @@ -138,7 +121,6 @@ impl IndexingService { cluster: Cluster, metastore: MetastoreServiceClient, ingest_api_service_opt: Option>, - merge_scheduler_service_opt: Option>, ingester_pool: IngesterPool, storage_resolver: StorageResolver, event_broker: EventBroker, @@ -147,8 +129,6 @@ impl IndexingService { indexer_config.split_store_max_num_splits, indexer_config.split_store_max_num_bytes, )?; - let merge_io_throughput_limiter_opt = - indexer_config.max_merge_write_throughput.map(io::limiter); let split_cache_dir_path = get_cache_directory_path(&data_dir_path); let local_split_store = IndexingSplitCache::open(split_cache_dir_path, split_store_space_quota).await?; @@ -167,15 +147,12 @@ impl IndexingService { cluster, metastore, ingest_api_service_opt, - merge_scheduler_service_opt, ingester_pool, storage_resolver, local_split_store: Arc::new(local_split_store), indexing_pipelines: Default::default(), counters: Default::default(), max_concurrent_split_uploads: indexer_config.max_concurrent_split_uploads, - merge_pipeline_handles: HashMap::new(), - merge_io_throughput_limiter_opt, cooperative_indexing_permits, event_broker, }) @@ -196,21 +173,6 @@ impl IndexingService { Ok(pipeline_handle.handle) } - async fn detach_merge_pipeline( - &mut self, - pipeline_id: &MergePipelineId, - ) -> Result, IndexingError> { - let pipeline_handle = self - .merge_pipeline_handles - .remove(pipeline_id) - .ok_or_else(|| { - let message = format!("could not find merge pipeline `{pipeline_id}`"); - IndexingError::Internal(message) - })?; - self.counters.num_running_merge_pipelines -= 1; - Ok(pipeline_handle.handle) - } - async fn observe_pipeline( &mut self, pipeline_uid: &PipelineUid, @@ -242,15 +204,8 @@ impl IndexingService { pipeline_uid, }; let index_config = index_metadata.into_index_config(); - self.spawn_pipeline_inner( - ctx, - pipeline_id.clone(), - index_config, - source_config, - None, - None, - ) - .await?; + self.spawn_pipeline_inner(ctx, pipeline_id.clone(), index_config, source_config, None) + .await?; Ok(pipeline_id) } @@ -260,7 +215,6 @@ impl IndexingService { indexing_pipeline_id: IndexingPipelineId, index_config: IndexConfig, source_config: SourceConfig, - immature_splits_opt: Option>, expected_params_fingerprint: Option, ) -> Result<(), IndexingError> { if self @@ -291,43 +245,15 @@ impl IndexingService { })?; let merge_policy = crate::merge_policy::merge_policy_from_settings(&index_config.indexing_settings); - let retention_policy = index_config.retention_policy_opt.clone(); let split_store = IndexingSplitStore::new(storage.clone(), self.local_split_store.clone()); let doc_mapper = build_doc_mapper(&index_config.doc_mapping, &index_config.search_settings) .map_err(|error| IndexingError::Internal(error.to_string()))?; - let merge_planner_mailbox_opt = if let Some(merge_scheduler_service) = - self.merge_scheduler_service_opt.clone() - { - let merge_pipeline_id = indexing_pipeline_id.merge_pipeline_id(); - let merge_pipeline_params = MergePipelineParams { - pipeline_id: merge_pipeline_id, - doc_mapper: doc_mapper.clone(), - indexing_directory: indexing_directory.clone(), - metastore: self.metastore.clone(), - split_store: split_store.clone(), - merge_scheduler_service, - merge_policy: merge_policy.clone(), - retention_policy: retention_policy.clone(), - merge_io_throughput_limiter_opt: self.merge_io_throughput_limiter_opt.clone(), - max_concurrent_split_uploads: self.max_concurrent_split_uploads, - event_broker: self.event_broker.clone(), - }; - Some(self.get_or_create_merge_pipeline(merge_pipeline_params, immature_splits_opt, ctx)) - } else { - None - }; // The concurrent uploads budget is split in 2: 1/2 for the indexing pipeline, 1/2 for the // merge pipeline. When there is no local merge pipeline, the indexing pipeline gets the // full budget. - let max_concurrent_split_uploads_index = if self.merge_scheduler_service_opt.is_some() { - (self.max_concurrent_split_uploads / 2).max(1) - } else { - self.max_concurrent_split_uploads - }; - let max_concurrent_split_uploads_merge = - self.max_concurrent_split_uploads - max_concurrent_split_uploads_index; + let max_concurrent_split_uploads_index = self.max_concurrent_split_uploads; let params_fingerprint = indexing_pipeline_params_fingerprint(&index_config, &source_config); @@ -359,12 +285,8 @@ impl IndexingService { split_store, max_concurrent_split_uploads_index, cooperative_indexing_permits: self.cooperative_indexing_permits.clone(), - - // Merge-related parameters + // The merge policy is needed in the uploader for determining split maturity merge_policy, - retention_policy, - max_concurrent_split_uploads_merge, - merge_planner_mailbox_opt, // Source-related parameters source_config, @@ -431,70 +353,6 @@ impl IndexingService { Ok(indexes_metadata) } - /// Fetches the immature splits candidates for merge for all the indexing pipelines for which a - /// merge pipeline is not running. - async fn fetch_immature_splits_for_new_merge_pipelines( - &mut self, - indexing_pipeline_ids: &[IndexingPipelineId], - ctx: &ActorContext, - ) -> MetastoreResult>> { - if self.merge_scheduler_service_opt.is_none() { - return Ok(Default::default()); - } - let mut index_uids = Vec::new(); - - for indexing_pipeline_id in indexing_pipeline_ids { - let merge_pipeline_id = indexing_pipeline_id.merge_pipeline_id(); - - if !self.merge_pipeline_handles.contains_key(&merge_pipeline_id) { - index_uids.push(merge_pipeline_id.index_uid); - } - } - if index_uids.is_empty() { - return Ok(Default::default()); - } - index_uids.sort_unstable(); - index_uids.dedup(); - - let list_splits_query = ListSplitsQuery::try_from_index_uids(index_uids) - .expect("`index_uids` should not be empty") - .with_node_id(self.node_id.clone()) - .with_split_state(SplitState::Published) - .retain_immature(OffsetDateTime::now_utc()); - let list_splits_request = - ListSplitsRequest::try_from_list_splits_query(&list_splits_query)?; - - let mut immature_splits_stream = ctx - .protect_future(self.metastore.list_splits(list_splits_request)) - .await?; - - let mut per_merge_pipeline_immature_splits: HashMap> = - indexing_pipeline_ids - .iter() - .map(|indexing_pipeline_id| (indexing_pipeline_id.merge_pipeline_id(), Vec::new())) - .collect(); - - let mut num_immature_splits = 0usize; - - while let Some(list_splits_response) = immature_splits_stream.try_next().await? { - for split_metadata in list_splits_response.deserialize_splits_metadata().await? { - num_immature_splits += 1; - - let merge_pipeline_id = MergePipelineId { - node_id: self.node_id.clone(), - index_uid: split_metadata.index_uid.clone(), - source_id: split_metadata.source_id.clone(), - }; - per_merge_pipeline_immature_splits - .entry(merge_pipeline_id) - .or_default() - .push(split_metadata); - } - } - info!("fetched {num_immature_splits} splits candidates for merge"); - Ok(per_merge_pipeline_immature_splits) - } - async fn handle_supervise(&mut self) -> Result<(), ActorExitStatus> { self.indexing_pipelines .retain(|pipeline_uid, pipeline_handle| { @@ -522,48 +380,6 @@ impl IndexingService { } } }); - let merge_pipelines_to_retain: HashSet = self - .indexing_pipelines - .values() - .map(|pipeline_handle| pipeline_handle.indexing_pipeline_id.merge_pipeline_id()) - .collect(); - - let merge_pipelines_to_shutdown: Vec = self - .merge_pipeline_handles - .keys() - .filter(|running_merge_pipeline_id| { - !merge_pipelines_to_retain.contains(running_merge_pipeline_id) - }) - .cloned() - .collect(); - - for merge_pipeline_to_shutdown in merge_pipelines_to_shutdown { - if let Some((_, merge_pipeline_handle)) = self - .merge_pipeline_handles - .remove_entry(&merge_pipeline_to_shutdown) - { - // We gracefully shutdown the merge pipeline, so we can complete the in-flight - // merges. - info!( - index_uid=%merge_pipeline_to_shutdown.index_uid, - source_id=%merge_pipeline_to_shutdown.source_id, - "shutting down orphan merge pipeline" - ); - // The queue capacity of the merge pipeline is unbounded, so `.send_message(...)` - // should not block. - // We avoid using `.quit()` here because it waits for the actor to exit. - merge_pipeline_handle - .handle - .mailbox() - .send_message(FinishPendingMergesAndShutdownPipeline) - .await - .expect("merge pipeline mailbox should not be full"); - } - } - // Finally, we remove the completed or failed merge pipelines. - self.merge_pipeline_handles - .retain(|_, merge_pipeline_handle| merge_pipeline_handle.handle.state().is_running()); - self.counters.num_running_merge_pipelines = self.merge_pipeline_handles.len(); self.update_chitchat_running_plan().await; let pipeline_metrics: HashMap<&IndexingPipelineId, PipelineMetrics> = self @@ -581,33 +397,6 @@ impl IndexingService { Ok(()) } - fn get_or_create_merge_pipeline( - &mut self, - merge_pipeline_params: MergePipelineParams, - immature_splits_opt: Option>, - ctx: &ActorContext, - ) -> Mailbox { - if let Some(merge_pipeline_handle) = self - .merge_pipeline_handles - .get(&merge_pipeline_params.pipeline_id) - { - return merge_pipeline_handle.mailbox.clone(); - } - let merge_pipeline_id = merge_pipeline_params.pipeline_id.clone(); - let merge_pipeline = - MergePipeline::new(merge_pipeline_params, immature_splits_opt, ctx.spawn_ctx()); - let merge_planner_mailbox = merge_pipeline.merge_planner_mailbox().clone(); - let (_pipeline_mailbox, pipeline_handle) = ctx.spawn_actor().spawn(merge_pipeline); - let merge_pipeline_handle = MergePipelineHandle { - mailbox: merge_planner_mailbox.clone(), - handle: pipeline_handle, - }; - self.merge_pipeline_handles - .insert(merge_pipeline_id, merge_pipeline_handle); - self.counters.num_running_merge_pipelines += 1; - merge_planner_mailbox - } - /// For all Ingest V2 pipelines, assigns the set of shards they should be working on. /// This is done regardless of whether there has been a change in their shard list /// or not. @@ -714,10 +503,6 @@ impl IndexingService { .map(|index_metadata| (index_metadata.index_uid.clone(), index_metadata)) .collect(); - let mut per_merge_pipeline_immature_splits: HashMap> = - self.fetch_immature_splits_for_new_merge_pipelines(&pipelines_to_spawn_ids, ctx) - .await?; - let mut spawn_pipeline_failures: Vec = Vec::new(); for (task_to_spawn, id_to_spawn) in pipelines_to_spawn.iter().zip(pipelines_to_spawn_ids) { @@ -725,17 +510,12 @@ impl IndexingService { per_index_uid_indexes_metadata.get(task_to_spawn.index_uid()) { if let Some(source_config) = index_metadata.sources.get(&task_to_spawn.source_id) { - let merge_pipeline_id = id_to_spawn.merge_pipeline_id(); - let immature_splits_opt = - per_merge_pipeline_immature_splits.remove(&merge_pipeline_id); - if let Err(error) = self .spawn_pipeline_inner( ctx, id_to_spawn.clone(), index_metadata.index_config.clone(), source_config.clone(), - immature_splits_opt, Some(task_to_spawn.params_fingerprint), ) .await @@ -914,19 +694,6 @@ impl Handler for IndexingService { } } -#[async_trait] -impl Handler for IndexingService { - type Reply = Result, IndexingError>; - - async fn handle( - &mut self, - msg: DetachMergePipeline, - _ctx: &ActorContext, - ) -> Result { - Ok(self.detach_merge_pipeline(&msg.pipeline_id).await) - } -} - #[derive(Debug)] struct SuperviseLoop; @@ -1030,18 +797,16 @@ mod tests { }; use quickwit_ingest::{CreateQueueIfNotExistsRequest, init_ingest_api}; use quickwit_metastore::{ - AddSourceRequestExt, CreateIndexRequestExt, ListIndexesMetadataResponseExt, Split, + AddSourceRequestExt, CreateIndexRequestExt, ListIndexesMetadataResponseExt, metastore_for_test, }; use quickwit_proto::indexing::IndexingTask; use quickwit_proto::metastore::{ AddSourceRequest, CreateIndexRequest, DeleteIndexRequest, IndexMetadataResponse, - IndexesMetadataResponse, ListIndexesMetadataResponse, ListSplitsResponse, - MockMetastoreService, + IndexesMetadataResponse, ListIndexesMetadataResponse, MockMetastoreService, }; use super::*; - use crate::actors::merge_pipeline::SUPERVISE_LOOP_INTERVAL; async fn spawn_indexing_service_for_test( data_dir_path: &Path, @@ -1057,7 +822,6 @@ mod tests { init_ingest_api(universe, &queues_dir_path, &IngestApiConfig::default()) .await .unwrap(); - let merge_scheduler_mailbox: Mailbox = universe.get_or_spawn_one(); let indexing_server = IndexingService::new( NodeId::from("test-node"), data_dir_path.to_path_buf(), @@ -1066,7 +830,6 @@ mod tests { cluster, metastore, Some(ingest_api_service), - Some(merge_scheduler_mailbox), IngesterPool::default(), storage_resolver.clone(), EventBroker::default(), @@ -1166,15 +929,8 @@ mod tests { .await .unwrap(); pipeline_handle.kill().await; - let _merge_pipeline = indexing_service - .ask_for_res(DetachMergePipeline { - pipeline_id: pipeline_id.merge_pipeline_id(), - }) - .await - .unwrap(); let observation = indexing_service_handle.process_pending_and_observe().await; assert_eq!(observation.num_running_pipelines, 0); - assert_eq!(observation.num_running_merge_pipelines, 0); universe.assert_quit().await; } @@ -1525,263 +1281,6 @@ mod tests { universe.assert_quit().await; } - #[tokio::test] - async fn test_indexing_service_shutdown_merge_pipeline_when_no_indexing_pipeline() { - quickwit_common::setup_logging_for_tests(); - let transport = ChannelTransport::default(); - let cluster = create_cluster_for_test(Vec::new(), &["indexer"], &transport, true) - .await - .unwrap(); - let metastore = metastore_for_test(); - - let index_id = append_random_suffix("test-indexing-service"); - let index_uri = format!("ram:///indexes/{index_id}"); - let index_config = IndexConfig::for_test(&index_id, &index_uri); - - let source_config = SourceConfig { - source_id: "test-indexing-service--source".to_string(), - num_pipelines: NonZeroUsize::MIN, - enabled: true, - source_params: SourceParams::void(), - transform_config: None, - input_format: SourceInputFormat::Json, - }; - let create_index_request = - CreateIndexRequest::try_from_index_config(&index_config).unwrap(); - let index_uid: IndexUid = metastore - .create_index(create_index_request) - .await - .unwrap() - .index_uid() - .clone(); - let add_source_request = - AddSourceRequest::try_from_source_config(index_uid.clone(), &source_config).unwrap(); - metastore.add_source(add_source_request).await.unwrap(); - - // Test `IndexingService::new`. - let temp_dir = tempfile::tempdir().unwrap(); - let data_dir_path = temp_dir.path().to_path_buf(); - let indexer_config = IndexerConfig::for_test().unwrap(); - let num_blocking_threads = 1; - let storage_resolver = StorageResolver::unconfigured(); - let universe = Universe::with_accelerated_time(); - let queues_dir_path = data_dir_path.join(QUEUES_DIR_NAME); - let ingest_api_service = - init_ingest_api(&universe, &queues_dir_path, &IngestApiConfig::default()) - .await - .unwrap(); - let merge_scheduler_service = universe.get_or_spawn_one(); - let indexing_server = IndexingService::new( - NodeId::from("test-node"), - data_dir_path, - indexer_config, - num_blocking_threads, - cluster.clone(), - metastore.clone(), - Some(ingest_api_service), - Some(merge_scheduler_service), - IngesterPool::default(), - storage_resolver.clone(), - EventBroker::default(), - ) - .await - .unwrap(); - let (indexing_server_mailbox, indexing_server_handle) = - universe.spawn_builder().spawn(indexing_server); - let pipeline_id = indexing_server_mailbox - .ask_for_res(SpawnPipeline { - index_id: index_id.clone(), - source_config, - pipeline_uid: PipelineUid::default(), - }) - .await - .unwrap(); - let observation = indexing_server_handle.observe().await; - assert_eq!(observation.num_running_pipelines, 1); - assert_eq!(observation.num_failed_pipelines, 0); - assert_eq!(observation.num_successful_pipelines, 0); - assert_eq!(observation.num_running_merge_pipelines, 1); - - // Test `shutdown_pipeline` - let pipeline = indexing_server_mailbox - .ask_for_res(DetachIndexingPipeline { pipeline_id }) - .await - .unwrap(); - pipeline.quit().await; - - // Let the service cleanup the merge pipelines. - universe.sleep(*HEARTBEAT).await; - - let observation = indexing_server_handle.process_pending_and_observe().await; - assert_eq!(observation.num_running_pipelines, 0); - assert_eq!(observation.num_running_merge_pipelines, 0); - universe.sleep(SUPERVISE_LOOP_INTERVAL).await; - // Check that the merge pipeline is also shut down as they are no more indexing pipeilne on - // the index. - assert!(universe.get_one::().is_none()); - // It may or may not panic - universe.quit().await; - } - - #[tokio::test] - async fn test_indexing_service_no_merge_pipeline_when_no_merge_scheduler() { - quickwit_common::setup_logging_for_tests(); - let transport = ChannelTransport::default(); - let cluster = create_cluster_for_test(Vec::new(), &["indexer"], &transport, true) - .await - .unwrap(); - let metastore = metastore_for_test(); - - let index_id = append_random_suffix("test-indexing-service-no-merge"); - let index_uri = format!("ram:///indexes/{index_id}"); - let index_config = IndexConfig::for_test(&index_id, &index_uri); - - let source_config = SourceConfig { - source_id: "test-source".to_string(), - num_pipelines: NonZeroUsize::MIN, - enabled: true, - source_params: SourceParams::void(), - transform_config: None, - input_format: SourceInputFormat::Json, - }; - let create_index_request = - CreateIndexRequest::try_from_index_config(&index_config).unwrap(); - let index_uid: IndexUid = metastore - .create_index(create_index_request) - .await - .unwrap() - .index_uid() - .clone(); - let add_source_request = - AddSourceRequest::try_from_source_config(index_uid.clone(), &source_config).unwrap(); - metastore.add_source(add_source_request).await.unwrap(); - - let temp_dir = tempfile::tempdir().unwrap(); - let data_dir_path = temp_dir.path().to_path_buf(); - let indexer_config = IndexerConfig::for_test().unwrap(); - let num_blocking_threads = 1; - let storage_resolver = StorageResolver::unconfigured(); - let universe = Universe::with_accelerated_time(); - let queues_dir_path = data_dir_path.join(QUEUES_DIR_NAME); - let ingest_api_service = - init_ingest_api(&universe, &queues_dir_path, &IngestApiConfig::default()) - .await - .unwrap(); - let indexing_server = IndexingService::new( - NodeId::from("test-node"), - data_dir_path, - indexer_config, - num_blocking_threads, - cluster.clone(), - metastore.clone(), - Some(ingest_api_service), - None, // No merge scheduler — external merge service handles compaction. - IngesterPool::default(), - storage_resolver.clone(), - EventBroker::default(), - ) - .await - .unwrap(); - let (indexing_server_mailbox, indexing_server_handle) = - universe.spawn_builder().spawn(indexing_server); - - indexing_server_mailbox - .ask_for_res(SpawnPipeline { - index_id: index_id.clone(), - source_config, - pipeline_uid: PipelineUid::default(), - }) - .await - .unwrap(); - - let observation = indexing_server_handle.observe().await; - assert_eq!(observation.num_running_pipelines, 1); - assert_eq!(observation.num_running_merge_pipelines, 0); - assert!(universe.get_one::().is_none()); - - universe.quit().await; - } - - #[tokio::test] - async fn test_indexing_service_spawns_merge_pipeline_with_merge_scheduler() { - quickwit_common::setup_logging_for_tests(); - let transport = ChannelTransport::default(); - let cluster = create_cluster_for_test(Vec::new(), &["indexer"], &transport, true) - .await - .unwrap(); - let metastore = metastore_for_test(); - - let index_id = append_random_suffix("test-indexing-service-with-merge"); - let index_uri = format!("ram:///indexes/{index_id}"); - let index_config = IndexConfig::for_test(&index_id, &index_uri); - - let source_config = SourceConfig { - source_id: "test-source".to_string(), - num_pipelines: NonZeroUsize::MIN, - enabled: true, - source_params: SourceParams::void(), - transform_config: None, - input_format: SourceInputFormat::Json, - }; - let create_index_request = - CreateIndexRequest::try_from_index_config(&index_config).unwrap(); - let index_uid: IndexUid = metastore - .create_index(create_index_request) - .await - .unwrap() - .index_uid() - .clone(); - let add_source_request = - AddSourceRequest::try_from_source_config(index_uid.clone(), &source_config).unwrap(); - metastore.add_source(add_source_request).await.unwrap(); - - let temp_dir = tempfile::tempdir().unwrap(); - let data_dir_path = temp_dir.path().to_path_buf(); - let indexer_config = IndexerConfig::for_test().unwrap(); - let num_blocking_threads = 1; - let storage_resolver = StorageResolver::unconfigured(); - let universe = Universe::with_accelerated_time(); - let queues_dir_path = data_dir_path.join(QUEUES_DIR_NAME); - let ingest_api_service = - init_ingest_api(&universe, &queues_dir_path, &IngestApiConfig::default()) - .await - .unwrap(); - let merge_scheduler_mailbox: Mailbox = universe.get_or_spawn_one(); - let indexing_server = IndexingService::new( - NodeId::from("test-node"), - data_dir_path, - indexer_config, - num_blocking_threads, - cluster.clone(), - metastore.clone(), - Some(ingest_api_service), - Some(merge_scheduler_mailbox), - IngesterPool::default(), - storage_resolver.clone(), - EventBroker::default(), - ) - .await - .unwrap(); - let (indexing_server_mailbox, indexing_server_handle) = - universe.spawn_builder().spawn(indexing_server); - - indexing_server_mailbox - .ask_for_res(SpawnPipeline { - index_id: index_id.clone(), - source_config, - pipeline_uid: PipelineUid::default(), - }) - .await - .unwrap(); - - let observation = indexing_server_handle.observe().await; - assert_eq!(observation.num_running_pipelines, 1); - assert_eq!(observation.num_running_merge_pipelines, 1); - assert!(universe.get_one::().is_some()); - - universe.quit().await; - } - #[derive(Debug)] struct FreezePipeline; #[async_trait] @@ -1886,7 +1385,6 @@ mod tests { let observation = indexing_service_handle.observe().await; assert_eq!(observation.num_running_pipelines, 1); assert_eq!(observation.num_failed_pipelines, 0); - assert_eq!(observation.num_running_merge_pipelines, 1); // Might generate panics universe.quit().await; } @@ -1931,7 +1429,6 @@ mod tests { let indexer_config = IndexerConfig::for_test().unwrap(); let num_blocking_threads = 1; let storage_resolver = StorageResolver::unconfigured(); - let merge_scheduler_service: Mailbox = universe.get_or_spawn_one(); let mut indexing_server = IndexingService::new( NodeId::from("test-ingest-api-gc-node"), data_dir_path, @@ -1940,7 +1437,6 @@ mod tests { cluster.clone(), metastore.clone(), Some(ingest_api_service.clone()), - Some(merge_scheduler_service), IngesterPool::default(), storage_resolver.clone(), EventBroker::default(), @@ -2008,30 +1504,6 @@ mod tests { let response = IndexesMetadataResponse::for_test(indexes_metadata, failures); Ok(response) }); - mock_metastore - .expect_list_splits() - .withf(|request| { - let list_splits_query = request.deserialize_list_splits_query().unwrap(); - list_splits_query.index_uids.unwrap() == [("test-index-0", 0)] - }) - .return_once(|_request| Ok(ServiceStream::empty())); - mock_metastore - .expect_list_splits() - .withf(|request| { - let list_splits_query = request.deserialize_list_splits_query().unwrap(); - list_splits_query.index_uids.unwrap() == [("test-index-1", 0), ("test-index-2", 0)] - }) - .return_once(|_request| { - let splits = vec![Split { - split_metadata: SplitMetadata::for_test("test-split".to_string()), - split_state: SplitState::Published, - update_timestamp: 0, - publish_timestamp: Some(0), - }]; - let list_splits_response = ListSplitsResponse::try_from_splits(splits).unwrap(); - let response = ServiceStream::from(vec![Ok(list_splits_response)]); - Ok(response) - }); let transport = ChannelTransport::default(); let cluster = create_cluster_for_test(Vec::new(), &["indexer"], &transport, true) diff --git a/quickwit/quickwit-indexing/src/lib.rs b/quickwit/quickwit-indexing/src/lib.rs index 66c50d4d150..df0b8339c02 100644 --- a/quickwit/quickwit-indexing/src/lib.rs +++ b/quickwit/quickwit-indexing/src/lib.rs @@ -24,7 +24,6 @@ use quickwit_proto::metastore::MetastoreServiceClient; use quickwit_storage::StorageResolver; use tracing::info; -use crate::actors::MergeSchedulerService; pub use crate::actors::{ FinishPendingMergesAndShutdownPipeline, IndexingError, IndexingPipeline, IndexingPipelineParams, IndexingService, PublisherType, Sequencer, SplitsUpdateMailbox, @@ -71,7 +70,6 @@ pub async fn start_indexing_service( ingester_pool: IngesterPool, storage_resolver: StorageResolver, event_broker: EventBroker, - merge_scheduler_mailbox: Option>, ) -> anyhow::Result> { info!("starting indexer service"); let ingest_api_service_mailbox = universe.get_one::(); @@ -83,7 +81,6 @@ pub async fn start_indexing_service( cluster, metastore.clone(), ingest_api_service_mailbox, - merge_scheduler_mailbox, ingester_pool, storage_resolver, event_broker, diff --git a/quickwit/quickwit-indexing/src/models/indexing_service_message.rs b/quickwit/quickwit-indexing/src/models/indexing_service_message.rs index 67548b9c09d..868f8029be7 100644 --- a/quickwit/quickwit-indexing/src/models/indexing_service_message.rs +++ b/quickwit/quickwit-indexing/src/models/indexing_service_message.rs @@ -13,7 +13,7 @@ // limitations under the License. use quickwit_config::SourceConfig; -use quickwit_proto::indexing::{IndexingPipelineId, MergePipelineId}; +use quickwit_proto::indexing::IndexingPipelineId; use quickwit_proto::types::{IndexId, PipelineUid}; #[derive(Clone, Debug)] @@ -31,14 +31,6 @@ pub struct DetachIndexingPipeline { pub pipeline_id: IndexingPipelineId, } -/// Detaches a merge pipeline from the indexing service. The pipeline is no longer managed by the -/// server. This is mostly useful for preventing the server killing an existing merge pipeline -/// if a indexing pipeline is detached. -#[derive(Debug)] -pub struct DetachMergePipeline { - pub pipeline_id: MergePipelineId, -} - #[derive(Debug)] pub struct ObservePipeline { pub pipeline_id: IndexingPipelineId, diff --git a/quickwit/quickwit-indexing/src/models/mod.rs b/quickwit/quickwit-indexing/src/models/mod.rs index fd3188e2104..7f84bbab0af 100644 --- a/quickwit/quickwit-indexing/src/models/mod.rs +++ b/quickwit/quickwit-indexing/src/models/mod.rs @@ -34,9 +34,7 @@ pub use indexed_split::{ CommitTrigger, EmptySplit, IndexedSplit, IndexedSplitBatch, IndexedSplitBatchBuilder, IndexedSplitBuilder, }; -pub use indexing_service_message::{ - DetachIndexingPipeline, DetachMergePipeline, ObservePipeline, SpawnPipeline, -}; +pub use indexing_service_message::{DetachIndexingPipeline, ObservePipeline, SpawnPipeline}; pub use indexing_statistics::IndexingStatistics; pub use merge_planner_message::NewSplits; pub use merge_scratch::MergeScratch; diff --git a/quickwit/quickwit-indexing/src/test_utils.rs b/quickwit/quickwit-indexing/src/test_utils.rs index 550e40aa16f..18c3f34dc1a 100644 --- a/quickwit/quickwit-indexing/src/test_utils.rs +++ b/quickwit/quickwit-indexing/src/test_utils.rs @@ -111,7 +111,6 @@ impl TestSandbox { let num_blocking_threads = 1; let storage = storage_resolver.resolve(&index_uri).await?; let universe = Universe::with_accelerated_time(); - let merge_scheduler_mailbox = universe.get_or_spawn_one(); let queues_dir_path = temp_dir.path().join(QUEUES_DIR_NAME); let ingest_api_service = init_ingest_api(&universe, &queues_dir_path, &IngestApiConfig::default()).await?; @@ -123,7 +122,6 @@ impl TestSandbox { cluster, metastore.clone(), Some(ingest_api_service), - Some(merge_scheduler_mailbox), IngesterPool::default(), storage_resolver.clone(), EventBroker::default(), diff --git a/quickwit/quickwit-proto/protos/quickwit/compaction.proto b/quickwit/quickwit-proto/protos/quickwit/compaction.proto index 7313d85db91..6f6e4bc6f75 100644 --- a/quickwit/quickwit-proto/protos/quickwit/compaction.proto +++ b/quickwit/quickwit-proto/protos/quickwit/compaction.proto @@ -39,7 +39,6 @@ message CompactionInProgress { message CompactionSuccess { string task_id = 1; - string merged_split_id = 2; } message CompactionFailure { diff --git a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.compaction.rs b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.compaction.rs index f96bb33ed20..67b2f24fe15 100644 --- a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.compaction.rs +++ b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.compaction.rs @@ -30,8 +30,6 @@ pub struct CompactionInProgress { pub struct CompactionSuccess { #[prost(string, tag = "1")] pub task_id: ::prost::alloc::string::String, - #[prost(string, tag = "2")] - pub merged_split_id: ::prost::alloc::string::String, } #[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] diff --git a/quickwit/quickwit-serve/src/lib.rs b/quickwit/quickwit-serve/src/lib.rs index 4c2e8d8499c..3d5a8cd2428 100644 --- a/quickwit/quickwit-serve/src/lib.rs +++ b/quickwit/quickwit-serve/src/lib.rs @@ -79,7 +79,7 @@ use quickwit_config::{ClusterConfig, IngestApiConfig, NodeConfig}; use quickwit_control_plane::control_plane::{ControlPlane, ControlPlaneEventSubscriber}; use quickwit_control_plane::{IndexerNodeInfo, IndexerPool}; use quickwit_index_management::{IndexService as IndexManager, IndexServiceError}; -use quickwit_indexing::actors::{IndexingService, MergeSchedulerService}; +use quickwit_indexing::actors::IndexingService; use quickwit_indexing::models::ShardPositionsService; use quickwit_indexing::start_indexing_service; use quickwit_ingest::{ @@ -310,16 +310,6 @@ async fn get_compaction_planner_client_if_needed( ))) } -fn spawn_merge_scheduler_service( - universe: &Universe, - node_config: &NodeConfig, -) -> Mailbox { - let (mailbox, _) = universe.spawn_builder().spawn(MergeSchedulerService::new( - node_config.indexer_config.merge_concurrency.get(), - )); - mailbox -} - async fn start_ingest_client_if_needed( node_config: &NodeConfig, universe: &Universe, @@ -608,12 +598,6 @@ pub async fn serve_quickwit( .context("failed to initialize compaction service client")?; let indexing_service_opt = if node_config.is_service_enabled(QuickwitService::Indexer) { - let merge_scheduler_mailbox_opt = - if !node_config.indexer_config.enable_standalone_compactors { - Some(spawn_merge_scheduler_service(&universe, &node_config)) - } else { - None - }; let indexing_service = start_indexing_service( &universe, &node_config, @@ -623,7 +607,6 @@ pub async fn serve_quickwit( ingester_pool.clone(), storage_resolver.clone(), event_broker.clone(), - merge_scheduler_mailbox_opt, ) .await .context("failed to start indexing service")?; From d3a780c4ddc60ed0483d870b2273eebcb6c832f5 Mon Sep 17 00:00:00 2001 From: nadav-govari Date: Tue, 12 May 2026 10:51:04 -0400 Subject: [PATCH 11/21] Add metrics (#6350) --- quickwit/Cargo.lock | 2 + quickwit/quickwit-compaction/Cargo.toml | 2 + .../src/compaction_pipeline.rs | 63 +++++++++++++++- .../src/compactor_supervisor.rs | 40 +++++++--- quickwit/quickwit-compaction/src/lib.rs | 7 +- quickwit/quickwit-compaction/src/metrics.rs | 75 +++++++++++++++++++ .../src/planner/compaction_planner.rs | 42 ++++++++++- .../src/planner/compaction_state.rs | 43 +++++++++-- .../src/planner/index_config_metastore.rs | 13 +++- .../src/planner/metrics.rs | 68 +++++++++++++++++ .../quickwit-compaction/src/planner/mod.rs | 62 +++++++++++++++ .../resources/tests/node_config/quickwit.json | 4 +- .../resources/tests/node_config/quickwit.toml | 2 - .../resources/tests/node_config/quickwit.yaml | 2 - .../quickwit-config/src/node_config/mod.rs | 34 --------- .../src/node_config/serialize.rs | 2 - .../src/actors/indexing_pipeline.rs | 12 +-- .../src/actors/indexing_service.rs | 7 +- quickwit/quickwit-indexing/src/lib.rs | 3 +- .../quickwit-indexing/src/merge_policy/mod.rs | 8 ++ .../protos/quickwit/compaction.proto | 1 + .../codegen/quickwit/quickwit.compaction.rs | 2 + quickwit/quickwit-serve/src/lib.rs | 14 +++- 23 files changed, 424 insertions(+), 84 deletions(-) create mode 100644 quickwit/quickwit-compaction/src/metrics.rs create mode 100644 quickwit/quickwit-compaction/src/planner/metrics.rs diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index 68d9ad60f96..5922b140e68 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -7150,6 +7150,8 @@ version = "0.8.0" dependencies = [ "anyhow", "async-trait", + "itertools 0.14.0", + "once_cell", "quickwit-actors", "quickwit-common", "quickwit-config", diff --git a/quickwit/quickwit-compaction/Cargo.toml b/quickwit/quickwit-compaction/Cargo.toml index c018327c8c4..556fe491a5b 100644 --- a/quickwit/quickwit-compaction/Cargo.toml +++ b/quickwit/quickwit-compaction/Cargo.toml @@ -26,6 +26,8 @@ time = { workspace = true } tracing = { workspace = true } tokio = { workspace = true } ulid = { workspace = true } +itertools = "0.14.0" +once_cell = "1.21.4" [dev-dependencies] quickwit-actors = { workspace = true, features = ["testsuite"] } diff --git a/quickwit/quickwit-compaction/src/compaction_pipeline.rs b/quickwit/quickwit-compaction/src/compaction_pipeline.rs index 270cb5312d9..91a23e6bb96 100644 --- a/quickwit/quickwit-compaction/src/compaction_pipeline.rs +++ b/quickwit/quickwit-compaction/src/compaction_pipeline.rs @@ -30,9 +30,10 @@ use quickwit_indexing::{IndexingSplitStore, PublisherType, SplitsUpdateMailbox}; use quickwit_proto::indexing::MergePipelineId; use quickwit_proto::metastore::MetastoreServiceClient; use quickwit_proto::types::{IndexUid, SourceId, SplitId}; -use tracing::{debug, error, info}; +use tracing::{error, info}; -use crate::TaskId; +use crate::metrics::COMPACTOR_METRICS; +use crate::{TaskId, source_uid_metrics_label}; #[derive(Clone, Debug, PartialEq)] pub enum PipelineStatus { @@ -41,11 +42,21 @@ pub enum PipelineStatus { Failed { error: String }, } +impl PipelineStatus { + pub fn is_terminal(&self) -> bool { + matches!( + self, + PipelineStatus::Completed | PipelineStatus::Failed { .. } + ) + } +} + pub struct PipelineStatusUpdate { pub task_id: TaskId, pub index_uid: IndexUid, pub source_id: SourceId, pub split_ids: Vec, + pub merge_level: u64, pub status: PipelineStatus, } @@ -94,6 +105,7 @@ pub struct CompactionPipeline { io_throughput_limiter: Option, max_concurrent_split_uploads: usize, event_broker: EventBroker, + pipeline_start: Option, } impl CompactionPipeline { @@ -128,6 +140,7 @@ impl CompactionPipeline { io_throughput_limiter, max_concurrent_split_uploads, event_broker, + pipeline_start: None, } } @@ -193,14 +206,55 @@ impl CompactionPipeline { } if !failure_actor_names.is_empty() { + self.record_pipeline_duration(); let error_msg = format!("failed actors: {:?}", failure_actor_names); error!(task_id=%self.task_id, "{error_msg}"); self.status = PipelineStatus::Failed { error: error_msg }; + self.record_terminal_metrics(false); return; } if !has_healthy { - debug!(task_id=%self.task_id, "all compaction pipeline actors completed"); + self.record_pipeline_duration(); + info!(task_id=%self.task_id, "all compaction pipeline actors completed"); self.status = PipelineStatus::Completed; + self.record_terminal_metrics(true); + } + } + + /// Fires once when the pipeline transitions from InProgress to a terminal state. + /// Pairs with the `compactions_in_progress.inc()` in `CompactorSupervisor::spawn_task`. + fn record_terminal_metrics(&self, succeeded: bool) { + let merge_level = self.merge_operation.merge_level(); + let index_label = + source_uid_metrics_label(&self.pipeline_id.index_uid, &self.pipeline_id.source_id); + let label_values = [index_label.as_str(), &merge_level.to_string()]; + COMPACTOR_METRICS + .compactions_in_progress + .with_label_values(label_values) + .dec(); + if succeeded { + COMPACTOR_METRICS + .compactions_succeeded + .with_label_values(label_values) + .inc(); + } else { + COMPACTOR_METRICS + .compactions_failed + .with_label_values(label_values) + .inc(); + } + } + + fn record_pipeline_duration(&self) { + if let Some(pipeline_start_time) = self.pipeline_start { + let elapsed = pipeline_start_time.elapsed().as_secs_f64(); + let merge_level = self.merge_operation.merge_level(); + let index_label = + source_uid_metrics_label(&self.pipeline_id.index_uid, &self.pipeline_id.source_id); + COMPACTOR_METRICS + .compaction_duration + .with_label_values([index_label.as_str(), &merge_level.to_string()]) + .observe(elapsed); } } @@ -216,6 +270,7 @@ impl CompactionPipeline { .map(|split| split.split_id().to_string()) .collect(), status: self.status.clone(), + merge_level: self.merge_operation.merge_level() as u64, } } @@ -295,6 +350,8 @@ impl CompactionPipeline { .set_kill_switch(self.kill_switch.child()) .spawn(merge_split_downloader); + let now = Instant::now(); + self.pipeline_start = Some(now); // Kick off the pipeline. merge_split_downloader_mailbox .try_send_message(self.merge_operation.clone()) diff --git a/quickwit/quickwit-compaction/src/compactor_supervisor.rs b/quickwit/quickwit-compaction/src/compactor_supervisor.rs index 188125689aa..e07940c9706 100644 --- a/quickwit/quickwit-compaction/src/compactor_supervisor.rs +++ b/quickwit/quickwit-compaction/src/compactor_supervisor.rs @@ -35,9 +35,11 @@ use quickwit_proto::indexing::MergePipelineId; use quickwit_proto::metastore::MetastoreServiceClient; use quickwit_proto::types::NodeId; use quickwit_storage::StorageResolver; -use tracing::{error, info, warn}; +use tracing::{error, info}; use crate::compaction_pipeline::{CompactionPipeline, PipelineStatus, PipelineStatusUpdate}; +use crate::metrics::COMPACTOR_METRICS; +use crate::source_uid_metrics_label; const CHECK_PIPELINE_STATUSES_INTERVAL: Duration = Duration::from_secs(1); @@ -110,6 +112,12 @@ impl CompactorSupervisor { assignments: Vec, spawn_ctx: &SpawnContext, ) { + // The planner has acknowledged the statuses we just sent — drop any + // Completed/Failed pipelines so their scratch directories and actor + // handles are released. + for slot in &mut self.pipelines { + slot.take_if(|p| p.status().is_terminal()); + } for assignment in assignments { let task_id = assignment.task_id.clone(); if let Err(error) = self.spawn_task(assignment, spawn_ctx).await { @@ -123,16 +131,17 @@ impl CompactorSupervisor { assignment: MergeTaskAssignment, spawn_ctx: &SpawnContext, ) -> anyhow::Result<()> { + let source_uid_label = source_uid_metrics_label( + assignment.index_uid.as_ref().unwrap(), + &assignment.source_id, + ); + let merge_level = assignment.merge_level.to_string(); + let label_values = [source_uid_label.as_str(), merge_level.as_str()]; + let slot_idx = self .pipelines .iter() - .position(|slot| match slot { - None => true, - Some(p) => matches!( - p.status(), - PipelineStatus::Completed | PipelineStatus::Failed { .. } - ), - }) + .position(Option::is_none) .ok_or_else(|| anyhow::anyhow!("no free pipeline slot"))?; let scratch_directory = self .compaction_root_directory @@ -142,6 +151,10 @@ impl CompactorSupervisor { .await?; pipeline.spawn_pipeline(spawn_ctx)?; self.pipelines[slot_idx] = Some(pipeline); + COMPACTOR_METRICS + .compactions_in_progress + .with_label_values(label_values) + .inc(); Ok(()) } @@ -207,7 +220,8 @@ impl CompactorSupervisor { .iter() .filter(|s| matches!(s.status, PipelineStatus::InProgress)) .count(); - let available_slots = (self.pipelines.len() - in_progress_count) as u32; + let available_slots = (self.pipelines.len() - in_progress_count) as i64; + COMPACTOR_METRICS.available_slots.set(available_slots); let mut in_progress = Vec::new(); let mut successes = Vec::new(); @@ -239,7 +253,7 @@ impl CompactorSupervisor { ReportStatusRequest { node_id: self.node_id.to_string(), - available_slots, + available_slots: available_slots as u32, in_progress, successes, failures, @@ -287,7 +301,7 @@ impl Handler for CompactorSupervisor { .await; } Err(error) => { - warn!(%error, "failed to report status to compaction planner"); + error!(%error, "failed to report status to compaction planner"); } } ctx.schedule_self_msg(CHECK_PIPELINE_STATUSES_INTERVAL, CheckPipelineStatuses); @@ -413,6 +427,7 @@ mod tests { index_uid: Some(index_metadata.index_uid.clone()), source_id: "test-source".to_string(), index_storage_uri: config.index_uri.to_string(), + merge_level: 1, } } @@ -572,6 +587,7 @@ mod tests { source_id: "src".to_string(), split_ids: vec!["s1".to_string(), "s2".to_string()], status: PipelineStatus::InProgress, + merge_level: 1, }, PipelineStatusUpdate { task_id: "task-2".to_string(), @@ -579,6 +595,7 @@ mod tests { source_id: "src".to_string(), split_ids: vec!["s3".to_string()], status: PipelineStatus::Completed, + merge_level: 1, }, PipelineStatusUpdate { task_id: "task-3".to_string(), @@ -588,6 +605,7 @@ mod tests { status: PipelineStatus::Failed { error: "boom".to_string(), }, + merge_level: 1, }, ]; diff --git a/quickwit/quickwit-compaction/src/lib.rs b/quickwit/quickwit-compaction/src/lib.rs index 352f153b4fe..44948bc70ea 100644 --- a/quickwit/quickwit-compaction/src/lib.rs +++ b/quickwit/quickwit-compaction/src/lib.rs @@ -18,6 +18,7 @@ mod compaction_pipeline; #[allow(dead_code)] mod compactor_supervisor; +mod metrics; pub mod planner; pub type TaskId = String; @@ -33,10 +34,14 @@ use quickwit_config::CompactorConfig; use quickwit_indexing::IndexingSplitCache; use quickwit_proto::compaction::CompactionPlannerServiceClient; use quickwit_proto::metastore::MetastoreServiceClient; -use quickwit_proto::types::NodeId; +use quickwit_proto::types::{IndexUid, NodeId, SourceId}; use quickwit_storage::StorageResolver; use tracing::info; +pub fn source_uid_metrics_label(index_uid: &IndexUid, source_id: &SourceId) -> String { + format!("{}-{}", index_uid, source_id) +} + #[allow(clippy::too_many_arguments)] pub async fn start_compactor_service( universe: &Universe, diff --git a/quickwit/quickwit-compaction/src/metrics.rs b/quickwit/quickwit-compaction/src/metrics.rs new file mode 100644 index 00000000000..1d6bb234065 --- /dev/null +++ b/quickwit/quickwit-compaction/src/metrics.rs @@ -0,0 +1,75 @@ +// Copyright 2021-Present Datadog, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use once_cell::sync::Lazy; +use quickwit_common::metrics::{ + HistogramVec, IntCounterVec, IntGauge, IntGaugeVec, exponential_buckets, new_counter_vec, + new_gauge, new_gauge_vec, new_histogram_vec, +}; + +pub struct CompactorMetrics { + pub compactions_in_progress: IntGaugeVec<2>, + pub compactions_failed: IntCounterVec<2>, + pub compactions_succeeded: IntCounterVec<2>, + pub available_slots: IntGauge, + pub compaction_duration: HistogramVec<2>, +} + +fn compaction_duration_buckets() -> Vec { + exponential_buckets(0.5, 2.0, 14).expect("compaction duration buckets should be valid") +} + +impl Default for CompactorMetrics { + fn default() -> Self { + CompactorMetrics { + compactions_in_progress: new_gauge_vec( + "compactions_in_progress", + "number of compaction merge operations currently running on this compactor", + "compactor", + &[], + ["source_uid", "merge_level"], + ), + compactions_failed: new_counter_vec( + "compactions_failed", + "total number of compaction merge operations that have failed", + "compactor", + &[], + ["source_uid", "merge_level"], + ), + compactions_succeeded: new_counter_vec( + "compactions_succeeded", + "total number of compaction merge operations that have completed successfully", + "compactor", + &[], + ["source_uid", "merge_level"], + ), + available_slots: new_gauge( + "available_slots", + "number of compaction slots currently available on this compactor", + "compactor", + &[], + ), + compaction_duration: new_histogram_vec( + "compaction_duration_seconds", + "duration of compaction merge operations in seconds", + "compactor", + &[], + ["source_uid", "merge_level"], + compaction_duration_buckets(), + ), + } + } +} + +pub static COMPACTOR_METRICS: Lazy = Lazy::new(CompactorMetrics::default); diff --git a/quickwit/quickwit-compaction/src/planner/compaction_planner.rs b/quickwit/quickwit-compaction/src/planner/compaction_planner.rs index 57144df0088..94bfebacb54 100644 --- a/quickwit/quickwit-compaction/src/planner/compaction_planner.rs +++ b/quickwit/quickwit-compaction/src/planner/compaction_planner.rs @@ -17,6 +17,7 @@ use std::time::Duration; use anyhow::Result; use async_trait::async_trait; +use itertools::Itertools; use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler}; use quickwit_indexing::merge_policy::MergeOperation; use quickwit_metastore::{ @@ -28,11 +29,12 @@ use quickwit_proto::compaction::{ use quickwit_proto::metastore::{ListSplitsRequest, MetastoreService, MetastoreServiceClient}; use quickwit_proto::types::{IndexUid, NodeId, SourceId}; use time::OffsetDateTime; -use tracing::error; +use tracing::{error, info}; use ulid::Ulid; use super::compaction_state::CompactionState; use super::index_config_metastore::{IndexConfigMetastore, IndexEntry}; +use crate::planner::metrics::COMPACTION_PLANNER_METRICS; pub struct CompactionPlanner { state: CompactionState, @@ -68,7 +70,7 @@ impl Actor for CompactionPlanner { fn observable_state(&self) -> Self::ObservableState {} async fn initialize(&mut self, ctx: &ActorContext) -> Result<(), ActorExitStatus> { - tracing::info!("compaction planner starting, scanning metastore for immature splits"); + info!("compaction planner starting, scanning metastore for immature splits"); ctx.schedule_self_msg(INITIAL_SCAN_AND_PLAN_INTERVAL, ScanAndPlan); Ok(()) } @@ -140,6 +142,7 @@ impl CompactionPlanner { continue; } self.cursor = self.cursor.max(split.update_timestamp); + info!(max_timestamp=%self.cursor, "[compaction planner] update metastore cursor min_timestamp cursor"); self.state.track_split(split.split_metadata); } } @@ -153,9 +156,24 @@ impl CompactionPlanner { let splits = self .metastore .list_splits(request) - .await? + .await + .inspect_err(|error| { + error!(%error, "[compaction-planner] error calling metastore list_splits"); + COMPACTION_PLANNER_METRICS + .metastore_errors + .with_label_values(["scan"]) + .inc(); + })? .collect_splits() - .await?; + .await + .inspect_err(|error| { + error!(%error, "[compaction-planner] error collecting metastore splits"); + COMPACTION_PLANNER_METRICS + .metastore_errors + .with_label_values(["collect_splits"]) + .inc(); + })?; + emit_metastore_scan_metrics(&splits); Ok(splits) } @@ -163,6 +181,7 @@ impl CompactionPlanner { let splits = self.scan_metastore().await?; self.ingest_splits(splits).await; self.run_merge_policies(); + self.state.emit_metrics(); Ok(()) } @@ -208,6 +227,20 @@ impl CompactionPlanner { } } +fn emit_metastore_scan_metrics(new_splits: &[Split]) { + let size = new_splits.len(); + info!(%size, "[compaction planner] new splits scanned from metastore"); + let counts = new_splits + .iter() + .counts_by(|split| &split.split_metadata.index_uid); + for (&index_uid, &count) in counts.iter() { + COMPACTION_PLANNER_METRICS + .new_splits_scanned + .with_label_values([&index_uid.to_string()]) + .set(count as i64); + } +} + fn build_task_assignment( task_id: &str, index_entry: &IndexEntry, @@ -231,6 +264,7 @@ fn build_task_assignment( index_uid: Some(index_uid.clone()), source_id: source_id.to_string(), index_storage_uri: index_entry.index_storage_uri(), + merge_level: operation.merge_level() as u64, } } diff --git a/quickwit/quickwit-compaction/src/planner/compaction_state.rs b/quickwit/quickwit-compaction/src/planner/compaction_state.rs index 1bc1fb91cc8..9658e958b7f 100644 --- a/quickwit/quickwit-compaction/src/planner/compaction_state.rs +++ b/quickwit/quickwit-compaction/src/planner/compaction_state.rs @@ -12,17 +12,21 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::{HashMap, HashSet, VecDeque}; +use std::collections::{HashMap, HashSet}; +use std::fmt::Debug; use std::sync::Arc; use std::time::{Duration, Instant}; +use itertools::Itertools; use quickwit_indexing::merge_policy::{MergeOperation, MergePolicy}; use quickwit_metastore::SplitMetadata; use quickwit_proto::compaction::{CompactionFailure, CompactionInProgress, CompactionSuccess}; use quickwit_proto::types::{DocMappingUid, IndexUid, NodeId, SourceId, SplitId}; use tracing::{error, info, warn}; -use crate::TaskId; +use crate::planner::PendingOperations; +use crate::planner::metrics::COMPACTION_PLANNER_METRICS; +use crate::{TaskId, source_uid_metrics_label}; const HEARTBEAT_TIMEOUT: Duration = Duration::from_secs(60); @@ -62,7 +66,7 @@ pub struct CompactionState { in_flight_split_ids: HashSet, /// TODO: add index_uid and source_id to MergeOperation so we don't need the partition key /// here. - pending_operations: VecDeque<(CompactionPartitionKey, MergeOperation)>, + pending_operations: PendingOperations, } impl CompactionState { @@ -72,7 +76,7 @@ impl CompactionState { needs_compaction_split_ids: HashSet::new(), in_flight: HashMap::new(), in_flight_split_ids: HashSet::new(), - pending_operations: VecDeque::new(), + pending_operations: PendingOperations::new(), } } @@ -111,7 +115,7 @@ impl CompactionState { .insert(split.split_id().to_string()); } self.pending_operations - .push_back((partition_key.clone(), operation)); + .push(partition_key.clone(), operation); } if splits.is_empty() { self.needs_compaction.remove(partition_key); @@ -177,6 +181,7 @@ impl CompactionState { for task_id in timed_out_task_ids { if let Some(inflight) = self.in_flight.remove(&task_id) { error!(%task_id, node_id=%inflight.node_id, "compaction task timed out"); + COMPACTION_PLANNER_METRICS.timed_out_operations.inc(); for split_id in &inflight.split_ids { self.in_flight_split_ids.remove(split_id.as_str()); } @@ -189,7 +194,7 @@ impl CompactionState { let count = count.min(self.pending_operations.len()); let mut operations = Vec::with_capacity(count); for _ in 0..count { - operations.push(self.pending_operations.pop_front().unwrap()); + operations.push(self.pending_operations.pop().unwrap()); } operations } @@ -206,6 +211,30 @@ impl CompactionState { }, ); } + + pub fn emit_metrics(&self) { + // total number of splits that need to be merged by compaction partition key + self.needs_compaction + .iter() + .map(|(compaction_partition_key, splits)| { + ( + source_uid_metrics_label( + &compaction_partition_key.index_uid, + &compaction_partition_key.source_id, + ), + splits.len() as i64, + ) + }) + .into_grouping_map() + .sum() + .iter() + .for_each(|(partition_key, &total_splits)| { + COMPACTION_PLANNER_METRICS + .splits_needing_compaction + .with_label_values([partition_key.as_str()]) + .set(total_splits) + }); + } } #[cfg(test)] @@ -289,7 +318,7 @@ mod tests { // Splits moved from needs_compaction to in_flight. assert!(!state.pending_operations.is_empty()); - for (_, op) in &state.pending_operations { + for (_, op) in state.pending_operations.iter() { for split in op.splits_as_slice() { assert!(!state.needs_compaction_split_ids.contains(split.split_id())); assert!(state.in_flight_split_ids.contains(split.split_id())); diff --git a/quickwit/quickwit-compaction/src/planner/index_config_metastore.rs b/quickwit/quickwit-compaction/src/planner/index_config_metastore.rs index 5fe2a9a6bdc..e2d42d99ba2 100644 --- a/quickwit/quickwit-compaction/src/planner/index_config_metastore.rs +++ b/quickwit/quickwit-compaction/src/planner/index_config_metastore.rs @@ -21,6 +21,9 @@ use quickwit_indexing::merge_policy::{MergePolicy, merge_policy_from_settings}; use quickwit_metastore::{IndexMetadataResponseExt, SplitMaturity, SplitMetadata}; use quickwit_proto::metastore::{IndexMetadataRequest, MetastoreService, MetastoreServiceClient}; use quickwit_proto::types::{DocMappingUid, IndexUid}; +use tracing::error; + +use crate::planner::metrics::COMPACTION_PLANNER_METRICS; /// Everything the planner needs to know about a single index. pub struct IndexEntry { @@ -99,7 +102,15 @@ impl IndexConfigMetastore { index_uid: Some(index_uid.clone()), index_id: None, }) - .await?; + .await + .inspect_err(|error| { + error!(%error, "[compaction-planner] error getting index metadata from metastore"); + COMPACTION_PLANNER_METRICS + .metastore_errors + .with_label_values(["index_metadata"]) + .inc(); + })?; + let index_metadata = response.deserialize_index_metadata()?; let doc_mapper = build_doc_mapper( diff --git a/quickwit/quickwit-compaction/src/planner/metrics.rs b/quickwit/quickwit-compaction/src/planner/metrics.rs new file mode 100644 index 00000000000..2a4b2d9d4c7 --- /dev/null +++ b/quickwit/quickwit-compaction/src/planner/metrics.rs @@ -0,0 +1,68 @@ +// Copyright 2021-Present Datadog, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use once_cell::sync::Lazy; +use quickwit_common::metrics::{IntGauge, IntGaugeVec, new_gauge, new_gauge_vec}; + +pub struct CompactionPlannerMetrics { + pub new_splits_scanned: IntGaugeVec<1>, + pub splits_needing_compaction: IntGaugeVec<1>, + pub pending_merge_operations: IntGaugeVec<2>, + pub timed_out_operations: IntGauge, + pub metastore_errors: IntGaugeVec<1>, +} + +impl Default for CompactionPlannerMetrics { + fn default() -> Self { + CompactionPlannerMetrics { + new_splits_scanned: new_gauge_vec( + "new_splits_scanned", + "number of new immature splits scanned from the metastore on the last tick", + "compaction_planner", + &[], + ["source_uid"], + ), + splits_needing_compaction: new_gauge_vec( + "splits_needing_compaction", + "number of splits currently tracked as needing compaction", + "compaction_planner", + &[], + ["source_uid"], + ), + pending_merge_operations: new_gauge_vec( + "pending_merge_operations", + "number of pending merge operations awaiting assignment", + "compaction_planner", + &[], + ["source_uid", "merge_level"], + ), + timed_out_operations: new_gauge( + "timed_out_operations", + "number of merge operations that timed out waiting for a worker heartbeat", + "compaction_planner", + &[], + ), + metastore_errors: new_gauge_vec( + "metastore_errors", + "number of metastore errors encountered by the compaction planner", + "compaction_planner", + &[], + ["operation"], + ), + } + } +} + +pub static COMPACTION_PLANNER_METRICS: Lazy = + Lazy::new(CompactionPlannerMetrics::default); diff --git a/quickwit/quickwit-compaction/src/planner/mod.rs b/quickwit/quickwit-compaction/src/planner/mod.rs index bd075ab900d..1dc50d6ca46 100644 --- a/quickwit/quickwit-compaction/src/planner/mod.rs +++ b/quickwit/quickwit-compaction/src/planner/mod.rs @@ -17,5 +17,67 @@ mod compaction_planner; mod compaction_state; #[allow(dead_code)] mod index_config_metastore; +pub(crate) mod metrics; + +use std::collections::VecDeque; pub use compaction_planner::CompactionPlanner; +use quickwit_indexing::merge_policy::MergeOperation; + +use crate::planner::compaction_state::CompactionPartitionKey; +use crate::planner::metrics::COMPACTION_PLANNER_METRICS; +use crate::source_uid_metrics_label; + +/// Queue of merge operations awaiting assignment, with the +/// `pending_merge_operations` gauge maintained inline. Push/pop are the only +/// mutation paths so the metric stays consistent with `len()`. +struct PendingOperations { + inner: VecDeque<(CompactionPartitionKey, MergeOperation)>, +} + +impl PendingOperations { + fn new() -> Self { + Self { + inner: VecDeque::new(), + } + } + + fn push(&mut self, partition_key: CompactionPartitionKey, operation: MergeOperation) { + Self::adjust_gauge(&partition_key, &operation, 1); + self.inner.push_back((partition_key, operation)); + } + + fn pop(&mut self) -> Option<(CompactionPartitionKey, MergeOperation)> { + let (partition_key, operation) = self.inner.pop_front()?; + Self::adjust_gauge(&partition_key, &operation, -1); + Some((partition_key, operation)) + } + + fn len(&self) -> usize { + self.inner.len() + } + + #[cfg(test)] + fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + #[cfg(test)] + fn iter(&self) -> impl Iterator { + self.inner.iter() + } + + fn adjust_gauge( + partition_key: &CompactionPartitionKey, + operation: &MergeOperation, + delta: i64, + ) { + let source_uid_label = + source_uid_metrics_label(&partition_key.index_uid, &partition_key.source_id); + let merge_level = operation.merge_level().to_string(); + COMPACTION_PLANNER_METRICS + .pending_merge_operations + .with_label_values([source_uid_label.as_str(), merge_level.as_str()]) + .add(delta); + } +} diff --git a/quickwit/quickwit-config/resources/tests/node_config/quickwit.json b/quickwit/quickwit-config/resources/tests/node_config/quickwit.json index 7269b37ae22..996d682b6e8 100644 --- a/quickwit/quickwit-config/resources/tests/node_config/quickwit.json +++ b/quickwit/quickwit-config/resources/tests/node_config/quickwit.json @@ -52,9 +52,7 @@ "enable_otlp_endpoint": true, "split_store_max_num_bytes": "1T", "split_store_max_num_splits": 10000, - "max_concurrent_split_uploads": 8, - "max_merge_write_throughput": "100mb", - "merge_concurrency": 2 + "max_concurrent_split_uploads": 8 }, "ingest_api": { "replication_factor": 2 diff --git a/quickwit/quickwit-config/resources/tests/node_config/quickwit.toml b/quickwit/quickwit-config/resources/tests/node_config/quickwit.toml index ea715dcffe0..6a63ff31883 100644 --- a/quickwit/quickwit-config/resources/tests/node_config/quickwit.toml +++ b/quickwit/quickwit-config/resources/tests/node_config/quickwit.toml @@ -43,8 +43,6 @@ enable_otlp_endpoint = true split_store_max_num_bytes = "1T" split_store_max_num_splits = 10_000 max_concurrent_split_uploads = 8 -max_merge_write_throughput = "100mb" -merge_concurrency = 2 [ingest_api] replication_factor = 2 diff --git a/quickwit/quickwit-config/resources/tests/node_config/quickwit.yaml b/quickwit/quickwit-config/resources/tests/node_config/quickwit.yaml index face0852972..49e418ed924 100644 --- a/quickwit/quickwit-config/resources/tests/node_config/quickwit.yaml +++ b/quickwit/quickwit-config/resources/tests/node_config/quickwit.yaml @@ -47,8 +47,6 @@ indexer: split_store_max_num_bytes: 1T split_store_max_num_splits: 10000 max_concurrent_split_uploads: 8 - max_merge_write_throughput: 100mb - merge_concurrency: 2 ingest_api: replication_factor: 2 diff --git a/quickwit/quickwit-config/src/node_config/mod.rs b/quickwit/quickwit-config/src/node_config/mod.rs index ff981fcf31c..4df0a4e102d 100644 --- a/quickwit/quickwit-config/src/node_config/mod.rs +++ b/quickwit/quickwit-config/src/node_config/mod.rs @@ -146,15 +146,6 @@ pub struct IndexerConfig { pub split_store_max_num_splits: usize, #[serde(default = "IndexerConfig::default_max_concurrent_split_uploads")] pub max_concurrent_split_uploads: usize, - /// Limits the IO throughput of the `SplitDownloader` and the `MergeExecutor`. - /// On hardware where IO is constrained, it makes sure that Merges (a batch operation) - /// does not starve indexing itself (as it is a latency sensitive operation). - #[serde(default)] - pub max_merge_write_throughput: Option, - /// Maximum number of merge or delete operation that can be executed concurrently. - /// (defaults to num_cpu / 2). - #[serde(default = "IndexerConfig::default_merge_concurrency")] - pub merge_concurrency: NonZeroUsize, /// Enables the OpenTelemetry exporter endpoint to ingest logs and traces via the OpenTelemetry /// Protocol (OTLP). #[serde(default = "IndexerConfig::default_enable_otlp_endpoint")] @@ -202,10 +193,6 @@ impl IndexerConfig { 1_000 } - pub fn default_merge_concurrency() -> NonZeroUsize { - NonZeroUsize::new(quickwit_common::num_cpus() * 2 / 3).unwrap_or(NonZeroUsize::MIN) - } - fn default_cpu_capacity() -> CpuCapacity { CpuCapacity::one_cpu_thread() * (quickwit_common::num_cpus() as u32) } @@ -220,8 +207,6 @@ impl IndexerConfig { split_store_max_num_splits: 3, max_concurrent_split_uploads: 4, cpu_capacity: PIPELINE_FULL_CAPACITY * 4u32, - max_merge_write_throughput: None, - merge_concurrency: NonZeroUsize::new(3).unwrap(), enable_standalone_compactors: false, }; Ok(indexer_config) @@ -237,8 +222,6 @@ impl Default for IndexerConfig { split_store_max_num_splits: Self::default_split_store_max_num_splits(), max_concurrent_split_uploads: Self::default_max_concurrent_split_uploads(), cpu_capacity: Self::default_cpu_capacity(), - merge_concurrency: Self::default_merge_concurrency(), - max_merge_write_throughput: None, enable_standalone_compactors: Self::default_enable_standalone_compactors(), } } @@ -949,23 +932,6 @@ mod tests { "1500m" ); } - { - let indexer_config: IndexerConfig = - serde_yaml::from_str(r#"merge_concurrency: 5"#).unwrap(); - assert_eq!( - indexer_config.merge_concurrency, - NonZeroUsize::new(5).unwrap() - ); - let indexer_config_json = serde_json::to_value(&indexer_config).unwrap(); - assert_eq!( - indexer_config_json - .get("merge_concurrency") - .unwrap() - .as_u64() - .unwrap(), - 5 - ); - } { let indexer_config: IndexerConfig = serde_yaml::from_str(r#"cpu_capacity: 1500m"#).unwrap(); diff --git a/quickwit/quickwit-config/src/node_config/serialize.rs b/quickwit/quickwit-config/src/node_config/serialize.rs index 0bf765df123..1f9dc30d6d3 100644 --- a/quickwit/quickwit-config/src/node_config/serialize.rs +++ b/quickwit/quickwit-config/src/node_config/serialize.rs @@ -673,10 +673,8 @@ mod tests { split_store_max_num_bytes: ByteSize::tb(1), split_store_max_num_splits: 10_000, max_concurrent_split_uploads: 8, - merge_concurrency: NonZeroUsize::new(2).unwrap(), cpu_capacity: IndexerConfig::default_cpu_capacity(), enable_cooperative_indexing: false, - max_merge_write_throughput: Some(ByteSize::mb(100)), enable_standalone_compactors: false, } ); diff --git a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs index 6cfe1d6ea5a..0acd20706cb 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs @@ -462,7 +462,7 @@ impl IndexingPipeline { None, self.params.split_store.clone(), SplitsUpdateMailbox::Sequencer(sequencer_mailbox), - self.params.max_concurrent_split_uploads_index, + self.params.max_concurrent_split_uploads, self.params.event_broker.clone(), ); let (uploader_mailbox, uploader_handle) = ctx @@ -628,7 +628,7 @@ impl IndexingPipeline { self.params.metastore.clone(), self.params.storage.clone(), SplitsUpdateMailbox::Sequencer(parquet_sequencer_mailbox), - self.params.max_concurrent_split_uploads_index, + self.params.max_concurrent_split_uploads, ); let (parquet_uploader_mailbox, parquet_uploader_handle) = ctx .spawn_actor() @@ -847,7 +847,7 @@ pub struct IndexingPipelineParams { pub indexing_directory: TempDirectory, pub indexing_settings: IndexingSettings, pub split_store: IndexingSplitStore, - pub max_concurrent_split_uploads_index: usize, + pub max_concurrent_split_uploads: usize, pub cooperative_indexing_permits: Option>, // Merge-related parameters @@ -980,7 +980,7 @@ mod tests { split_store, merge_policy: default_merge_policy(), queues_dir_path: PathBuf::from("./queues"), - max_concurrent_split_uploads_index: 4, + max_concurrent_split_uploads: 4, cooperative_indexing_permits: None, event_broker: EventBroker::default(), params_fingerprint: 42u64, @@ -1100,7 +1100,7 @@ mod tests { storage, split_store, merge_policy: default_merge_policy(), - max_concurrent_split_uploads_index: 4, + max_concurrent_split_uploads: 4, cooperative_indexing_permits: None, event_broker: Default::default(), params_fingerprint: 42u64, @@ -1219,7 +1219,7 @@ mod tests { storage, split_store, merge_policy: default_merge_policy(), - max_concurrent_split_uploads_index: 4, + max_concurrent_split_uploads: 4, cooperative_indexing_permits: None, params_fingerprint: 42u64, event_broker: Default::default(), diff --git a/quickwit/quickwit-indexing/src/actors/indexing_service.rs b/quickwit/quickwit-indexing/src/actors/indexing_service.rs index 1aafbc01dec..9c27a06f450 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_service.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_service.rs @@ -250,11 +250,6 @@ impl IndexingService { let doc_mapper = build_doc_mapper(&index_config.doc_mapping, &index_config.search_settings) .map_err(|error| IndexingError::Internal(error.to_string()))?; - // The concurrent uploads budget is split in 2: 1/2 for the indexing pipeline, 1/2 for the - // merge pipeline. When there is no local merge pipeline, the indexing pipeline gets the - // full budget. - let max_concurrent_split_uploads_index = self.max_concurrent_split_uploads; - let params_fingerprint = indexing_pipeline_params_fingerprint(&index_config, &source_config); if let Some(expected_params_fingerprint) = expected_params_fingerprint { @@ -283,7 +278,7 @@ impl IndexingService { indexing_directory, indexing_settings: index_config.indexing_settings.clone(), split_store, - max_concurrent_split_uploads_index, + max_concurrent_split_uploads: self.max_concurrent_split_uploads, cooperative_indexing_permits: self.cooperative_indexing_permits.clone(), // The merge policy is needed in the uploader for determining split maturity merge_policy, diff --git a/quickwit/quickwit-indexing/src/lib.rs b/quickwit/quickwit-indexing/src/lib.rs index df0b8339c02..1c78783089e 100644 --- a/quickwit/quickwit-indexing/src/lib.rs +++ b/quickwit/quickwit-indexing/src/lib.rs @@ -31,7 +31,8 @@ pub use crate::actors::{ pub use crate::controlled_directory::ControlledDirectory; use crate::models::IndexingStatistics; pub use crate::split_store::{ - IndexingSplitCache, IndexingSplitStore, get_tantivy_directory_from_split_bundle, + IndexingSplitCache, IndexingSplitStore, SplitStoreQuota, + get_tantivy_directory_from_split_bundle, }; pub mod actors; diff --git a/quickwit/quickwit-indexing/src/merge_policy/mod.rs b/quickwit/quickwit-indexing/src/merge_policy/mod.rs index 9319f8d8498..37bbf15edbc 100644 --- a/quickwit/quickwit-indexing/src/merge_policy/mod.rs +++ b/quickwit/quickwit-indexing/src/merge_policy/mod.rs @@ -121,6 +121,14 @@ impl MergeOperation { pub fn splits_as_slice(&self) -> &[SplitMetadata] { self.splits.as_slice() } + + pub fn merge_level(&self) -> usize { + self.splits + .iter() + .map(|s| s.num_merge_ops) + .max() + .unwrap_or(0) + } } impl fmt::Debug for MergeOperation { diff --git a/quickwit/quickwit-proto/protos/quickwit/compaction.proto b/quickwit/quickwit-proto/protos/quickwit/compaction.proto index 6f6e4bc6f75..3c82d6b34c7 100644 --- a/quickwit/quickwit-proto/protos/quickwit/compaction.proto +++ b/quickwit/quickwit-proto/protos/quickwit/compaction.proto @@ -60,4 +60,5 @@ message MergeTaskAssignment { quickwit.common.IndexUid index_uid = 7; string source_id = 8; string index_storage_uri = 9; + uint64 merge_level = 10; } \ No newline at end of file diff --git a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.compaction.rs b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.compaction.rs index 67b2f24fe15..1404816f276 100644 --- a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.compaction.rs +++ b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.compaction.rs @@ -66,6 +66,8 @@ pub struct MergeTaskAssignment { pub source_id: ::prost::alloc::string::String, #[prost(string, tag = "9")] pub index_storage_uri: ::prost::alloc::string::String, + #[prost(uint64, tag = "10")] + pub merge_level: u64, } /// BEGIN quickwit-codegen #[allow(unused_imports)] diff --git a/quickwit/quickwit-serve/src/lib.rs b/quickwit/quickwit-serve/src/lib.rs index 3d5a8cd2428..9be9b60971a 100644 --- a/quickwit/quickwit-serve/src/lib.rs +++ b/quickwit/quickwit-serve/src/lib.rs @@ -778,7 +778,19 @@ pub async fn serve_quickwit( let compaction_root_directory = quickwit_common::temp_dir::Builder::default() .tempdir_in(&compaction_dir) .context("failed to create compaction temp directory")?; - let split_cache = Arc::new(quickwit_indexing::IndexingSplitCache::no_caching()); + let split_store_quota = quickwit_indexing::SplitStoreQuota::try_new( + node_config.compactor_config.split_store_max_num_splits, + node_config.compactor_config.split_store_max_num_bytes, + )?; + let split_cache_dir = node_config + .data_dir_path + .join("compactor-split-cache") + .join("splits"); + let split_cache = Arc::new( + quickwit_indexing::IndexingSplitCache::open(split_cache_dir, split_store_quota) + .await + .context("failed to open compactor split cache")?, + ); let compaction_client = compaction_service_client_opt .clone() .expect("compactor service enabled but no compaction client available"); From a289fa9df89bec139347fc1f8cb237c3152998ce Mon Sep 17 00:00:00 2001 From: nadav-govari Date: Tue, 12 May 2026 11:06:05 -0400 Subject: [PATCH 12/21] Improvements from live testing (#6404) Refactor migrations --- quickwit/Cargo.lock | 1 + .../src/compactor_supervisor.rs | 10 +- quickwit/quickwit-compaction/src/lib.rs | 5 - .../src/planner/compaction_planner.rs | 133 ++++++++++++------ .../src/planner/compaction_state.rs | 26 +++- .../src/planner/index_config_metastore.rs | 2 + .../src/planner/metrics.rs | 23 +-- .../quickwit-compaction/src/planner/mod.rs | 1 + .../quickwit-config/src/node_config/mod.rs | 20 +-- .../src/split_store/indexing_split_cache.rs | 17 ++- quickwit/quickwit-janitor/Cargo.toml | 1 + .../quickwit-janitor/src/janitor_service.rs | 5 + quickwit/quickwit-janitor/src/lib.rs | 6 +- ...lits-published-maturity-timestamp.down.sql | 4 + ...splits-published-maturity-timestamp.up.sql | 24 ++++ quickwit/quickwit-metastore/src/lib.rs | 2 +- .../quickwit-metastore/src/metastore/mod.rs | 27 ++++ .../src/metastore/postgres/migrator.rs | 39 ++--- .../src/metastore/postgres/utils.rs | 4 + .../quickwit-metastore/src/split_metadata.rs | 12 ++ quickwit/quickwit-serve/src/lib.rs | 81 ++++++----- 21 files changed, 289 insertions(+), 154 deletions(-) create mode 100644 quickwit/quickwit-metastore/migrations/postgresql/27_create-index-splits-published-maturity-timestamp.down.sql create mode 100644 quickwit/quickwit-metastore/migrations/postgresql/27_create-index-splits-published-maturity-timestamp.up.sql diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index 5922b140e68..61aa6959f3b 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -7496,6 +7496,7 @@ dependencies = [ "once_cell", "quickwit-actors", "quickwit-common", + "quickwit-compaction", "quickwit-config", "quickwit-doc-mapper", "quickwit-index-management", diff --git a/quickwit/quickwit-compaction/src/compactor_supervisor.rs b/quickwit/quickwit-compaction/src/compactor_supervisor.rs index e07940c9706..8508a82ccaf 100644 --- a/quickwit/quickwit-compaction/src/compactor_supervisor.rs +++ b/quickwit/quickwit-compaction/src/compactor_supervisor.rs @@ -57,7 +57,6 @@ pub struct CompactorSupervisor { // Shared resources distributed to pipelines when spawning actor chains. io_throughput_limiter: Option, - split_cache: Arc, metastore: MetastoreServiceClient, storage_resolver: StorageResolver, max_concurrent_split_uploads: usize, @@ -74,7 +73,6 @@ impl CompactorSupervisor { planner_client: CompactionPlannerServiceClient, num_pipeline_slots: usize, io_throughput_limiter: Option, - split_cache: Arc, metastore: MetastoreServiceClient, storage_resolver: StorageResolver, max_concurrent_split_uploads: usize, @@ -87,7 +85,6 @@ impl CompactorSupervisor { planner_client, pipelines, io_throughput_limiter, - split_cache, metastore, storage_resolver, max_concurrent_split_uploads, @@ -185,7 +182,8 @@ impl CompactorSupervisor { let index_storage_uri = Uri::from_str(&assignment.index_storage_uri)?; let index_storage = self.storage_resolver.resolve(&index_storage_uri).await?; - let split_store = IndexingSplitStore::new(index_storage, self.split_cache.clone()); + let split_cache = Arc::new(IndexingSplitCache::no_caching()); + let split_store = IndexingSplitStore::new(index_storage, split_cache); let doc_mapper = build_doc_mapper(&doc_mapping, &search_settings)?; let merge_policy = merge_policy_from_settings(&indexing_settings); @@ -311,8 +309,6 @@ impl Handler for CompactorSupervisor { #[cfg(test)] mod tests { - use std::sync::Arc; - use quickwit_actors::Universe; use quickwit_common::temp_dir::TempDirectory; use quickwit_proto::compaction::{ @@ -334,7 +330,6 @@ mod tests { compaction_client, num_slots, None, - Arc::new(IndexingSplitCache::no_caching()), metastore, StorageResolver::for_test(), 2, @@ -544,7 +539,6 @@ mod tests { client, 3, None, - Arc::new(IndexingSplitCache::no_caching()), metastore, StorageResolver::for_test(), 2, diff --git a/quickwit/quickwit-compaction/src/lib.rs b/quickwit/quickwit-compaction/src/lib.rs index 44948bc70ea..30c61587c41 100644 --- a/quickwit/quickwit-compaction/src/lib.rs +++ b/quickwit/quickwit-compaction/src/lib.rs @@ -23,15 +23,12 @@ pub mod planner; pub type TaskId = String; -use std::sync::Arc; - pub use compactor_supervisor::CompactorSupervisor; use quickwit_actors::{Mailbox, Universe}; use quickwit_common::io; use quickwit_common::pubsub::EventBroker; use quickwit_common::temp_dir::TempDirectory; use quickwit_config::CompactorConfig; -use quickwit_indexing::IndexingSplitCache; use quickwit_proto::compaction::CompactionPlannerServiceClient; use quickwit_proto::metastore::MetastoreServiceClient; use quickwit_proto::types::{IndexUid, NodeId, SourceId}; @@ -48,7 +45,6 @@ pub async fn start_compactor_service( node_id: NodeId, compaction_client: CompactionPlannerServiceClient, compactor_config: &CompactorConfig, - split_cache: Arc, metastore: MetastoreServiceClient, storage_resolver: StorageResolver, event_broker: EventBroker, @@ -62,7 +58,6 @@ pub async fn start_compactor_service( compaction_client, compactor_config.max_concurrent_pipelines.get(), io_throughput_limiter, - split_cache, metastore, storage_resolver, compactor_config.max_concurrent_split_uploads, diff --git a/quickwit/quickwit-compaction/src/planner/compaction_planner.rs b/quickwit/quickwit-compaction/src/planner/compaction_planner.rs index 94bfebacb54..637fe72017b 100644 --- a/quickwit/quickwit-compaction/src/planner/compaction_planner.rs +++ b/quickwit/quickwit-compaction/src/planner/compaction_planner.rs @@ -36,21 +36,18 @@ use super::compaction_state::CompactionState; use super::index_config_metastore::{IndexConfigMetastore, IndexEntry}; use crate::planner::metrics::COMPACTION_PLANNER_METRICS; +/// Cap on splits fetched per tick. Every tick, the planner re-scans the immature published set, +/// sorted by `maturity_timestamp` ASC so the most-urgent splits are processed first when a backlog +/// exists. Splits beyond this cap aren't lost -- they bubble into range as the front of the queue +/// is merged off. +const SCAN_PAGE_SIZE: usize = 5_000; +#[derive(Debug)] pub struct CompactionPlanner { state: CompactionState, index_config_metastore: IndexConfigMetastore, - cursor: i64, metastore: MetastoreServiceClient, } -impl Debug for CompactionPlanner { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("CompactionPlanner") - .field("cursor", &self.cursor) - .finish() - } -} - const SCAN_AND_PLAN_INTERVAL: Duration = Duration::from_secs(5); /// On initialization, we want to wait for two intervals to allow any in-progress workers to report /// their progress, preventing us from frivolously rescheduling work. @@ -112,15 +109,11 @@ impl Handler for CompactionPlanner { } } -const STARTUP_LOOKBACK: Duration = Duration::from_secs(24 * 60 * 60); - impl CompactionPlanner { pub fn new(metastore: MetastoreServiceClient) -> Self { - let cursor = OffsetDateTime::now_utc().unix_timestamp() - STARTUP_LOOKBACK.as_secs() as i64; CompactionPlanner { state: CompactionState::new(), index_config_metastore: IndexConfigMetastore::new(metastore.clone()), - cursor, metastore, } } @@ -141,8 +134,6 @@ impl CompactionPlanner { if index_entry.is_split_mature(&split.split_metadata) { continue; } - self.cursor = self.cursor.max(split.update_timestamp); - info!(max_timestamp=%self.cursor, "[compaction planner] update metastore cursor min_timestamp cursor"); self.state.track_split(split.split_metadata); } } @@ -151,7 +142,8 @@ impl CompactionPlanner { let query = ListSplitsQuery::for_all_indexes() .with_split_state(SplitState::Published) .retain_immature(OffsetDateTime::now_utc()) - .with_update_timestamp_gte(self.cursor); + .sort_by_maturity_timestamp() + .with_limit(SCAN_PAGE_SIZE); let request = ListSplitsRequest::try_from_list_splits_query(&query)?; let splits = self .metastore @@ -237,7 +229,7 @@ fn emit_metastore_scan_metrics(new_splits: &[Split]) { COMPACTION_PLANNER_METRICS .new_splits_scanned .with_label_values([&index_uid.to_string()]) - .set(count as i64); + .inc_by(count as u64); } } @@ -270,6 +262,7 @@ fn build_task_assignment( #[cfg(test)] mod tests { + use std::ops::Bound; use std::time::Duration; use quickwit_common::ServiceStream; @@ -278,14 +271,15 @@ mod tests { ConstWriteAmplificationMergePolicyConfig, MergePolicyConfig, }; use quickwit_metastore::{ - IndexMetadata, IndexMetadataResponseExt, ListSplitsResponseExt, Split, SplitMaturity, - SplitMetadata, SplitState, + IndexMetadata, IndexMetadataResponseExt, ListSplitsRequestExt, ListSplitsResponseExt, + SortBy, Split, SplitMaturity, SplitMetadata, SplitState, }; use quickwit_proto::compaction::CompactionSuccess; use quickwit_proto::metastore::{ IndexMetadataResponse, ListSplitsResponse, MetastoreError, MockMetastoreService, }; use quickwit_proto::types::IndexUid; + use time::OffsetDateTime; use super::*; @@ -334,17 +328,34 @@ mod tests { } #[tokio::test] - async fn test_scan_metastore() { + async fn test_scan_metastore_query_shape_and_passthrough() { let index_uid = IndexUid::for_test("test-index", 0); - let splits = vec![ - test_split("split-1", &index_uid, 1000), - test_split("split-2", &index_uid, 2000), + let returned_splits = vec![ + test_split("a", &index_uid, 1000), + test_split("b", &index_uid, 2000), ]; - let splits_clone = splits.clone(); + let returned_clone = returned_splits.clone(); + let scan_started_at = OffsetDateTime::now_utc().unix_timestamp(); let mut mock = MockMetastoreService::new(); - mock.expect_list_splits().returning(move |_| { - let response = ListSplitsResponse::try_from_splits(splits_clone.clone()).unwrap(); + mock.expect_list_splits().returning(move |req| { + let query = req.deserialize_list_splits_query().unwrap(); + + assert_eq!(query.split_states, vec![SplitState::Published]); + assert_eq!(query.limit, Some(SCAN_PAGE_SIZE)); + assert_eq!(query.sort_by, SortBy::MaturityTimestamp); + + let Bound::Excluded(mature_at) = query.mature else { + panic!("expected Excluded mature bound, got {:?}", query.mature); + }; + let now_secs = OffsetDateTime::now_utc().unix_timestamp(); + assert!(mature_at.unix_timestamp() >= scan_started_at); + assert!(mature_at.unix_timestamp() <= now_secs); + + assert_eq!(query.update_timestamp.start, Bound::Unbounded); + assert_eq!(query.update_timestamp.end, Bound::Unbounded); + + let response = ListSplitsResponse::try_from_splits(returned_clone.clone()).unwrap(); Ok(ServiceStream::from(vec![Ok(response)])) }); @@ -352,12 +363,12 @@ mod tests { let result = planner.scan_metastore().await.unwrap(); assert_eq!(result.len(), 2); - assert_eq!(result[0].split_metadata.split_id, "split-1"); - assert_eq!(result[1].split_metadata.split_id, "split-2"); + assert_eq!(result[0].split_metadata.split_id, "a"); + assert_eq!(result[1].split_metadata.split_id, "b"); } #[tokio::test] - async fn test_ingest_splits_dedup_maturity_and_cursor() { + async fn test_ingest_splits_dedups_and_skips_mature() { let index_metadata = test_index_metadata(); let response = test_index_metadata_response(&index_metadata); let index_uid = index_metadata.index_uid.clone(); @@ -367,7 +378,6 @@ mod tests { .returning(move |_| Ok(response.clone())); let mut planner = CompactionPlanner::new(MetastoreServiceClient::from_mock(mock)); - planner.cursor = 0; // Pre-populate: "in-flight" is already being compacted. planner.state.track_split(SplitMetadata { @@ -390,11 +400,10 @@ mod tests { assert!(planner.state.is_split_tracked("fresh")); assert!(planner.state.is_split_tracked("in-flight")); assert!(!planner.state.is_split_tracked("mature")); - assert_eq!(planner.cursor, 3000); } #[tokio::test] - async fn test_scan_and_plan_metastore_error() { + async fn test_scan_and_plan_propagates_metastore_error() { let mut mock = MockMetastoreService::new(); mock.expect_list_splits().returning(|_| { Err(MetastoreError::Internal { @@ -404,11 +413,7 @@ mod tests { }); let mut planner = CompactionPlanner::new(MetastoreServiceClient::from_mock(mock)); - let original_cursor = planner.cursor; - - let result = planner.scan_and_plan().await; - assert!(result.is_err()); - assert_eq!(planner.cursor, original_cursor); + assert!(planner.scan_and_plan().await.is_err()); } #[tokio::test] @@ -425,7 +430,6 @@ mod tests { }); let mut planner = CompactionPlanner::new(MetastoreServiceClient::from_mock(mock)); - planner.cursor = 0; planner.ingest_splits(splits).await; assert!(!planner.state.is_split_tracked("orphan")); @@ -452,12 +456,60 @@ mod tests { .returning(move |_| Ok(index_metadata_response.clone())); let mut planner = CompactionPlanner::new(MetastoreServiceClient::from_mock(mock)); - planner.cursor = 0; planner.scan_and_plan().await.unwrap(); assert!(planner.state.is_split_tracked("s1")); assert!(planner.state.is_split_tracked("s2")); - assert_eq!(planner.cursor, 6000); + } + + #[tokio::test] + async fn test_failed_task_is_retracked_on_next_scan() { + // After a worker reports failure (or times out), planner-local + // tracking is cleared. Because there is no cursor, the next scan + // rediscovers the still-Published, still-immature splits and + // re-tracks them. + let index_metadata = test_index_metadata_with_merge_factor_2(); + let index_metadata_response = test_index_metadata_response(&index_metadata); + let index_uid = index_metadata.index_uid.clone(); + + let splits = vec![ + test_split("s1", &index_uid, 1000), + test_split("s2", &index_uid, 2000), + ]; + let splits_clone = splits.clone(); + + let mut mock = MockMetastoreService::new(); + mock.expect_list_splits().returning(move |_| { + let response = ListSplitsResponse::try_from_splits(splits_clone.clone()).unwrap(); + Ok(ServiceStream::from(vec![Ok(response)])) + }); + mock.expect_index_metadata() + .returning(move |_| Ok(index_metadata_response.clone())); + + let mut planner = CompactionPlanner::new(MetastoreServiceClient::from_mock(mock)); + let node_id = NodeId::from("worker-1"); + + planner.scan_and_plan().await.unwrap(); + let assignments = planner.assign_tasks(&node_id, 10); + assert_eq!(assignments.len(), 1); + let task_id = assignments[0].task_id.clone(); + assert!(planner.state.is_split_tracked("s1")); + assert!(planner.state.is_split_tracked("s2")); + + // Worker reports failure; planner forgets the splits. + planner + .state + .process_failures(&[quickwit_proto::compaction::CompactionFailure { + task_id, + error_message: "boom".to_string(), + }]); + assert!(!planner.state.is_split_tracked("s1")); + assert!(!planner.state.is_split_tracked("s2")); + + // Next scan rediscovers them and re-tracks them. + planner.scan_and_plan().await.unwrap(); + assert!(planner.state.is_split_tracked("s1")); + assert!(planner.state.is_split_tracked("s2")); } /// Helper: creates a planner with merge_factor=2, ingests the given splits, @@ -477,7 +529,6 @@ mod tests { }); let mut planner = CompactionPlanner::new(MetastoreServiceClient::from_mock(mock)); - planner.cursor = 0; let splits: Vec = split_ids .iter() diff --git a/quickwit/quickwit-compaction/src/planner/compaction_state.rs b/quickwit/quickwit-compaction/src/planner/compaction_state.rs index 9658e958b7f..58cf85a5bb3 100644 --- a/quickwit/quickwit-compaction/src/planner/compaction_state.rs +++ b/quickwit/quickwit-compaction/src/planner/compaction_state.rs @@ -49,6 +49,7 @@ impl CompactionPartitionKey { } } +#[derive(Debug)] struct InFlightCompaction { task_id: TaskId, split_ids: Vec, @@ -59,6 +60,7 @@ struct InFlightCompaction { /// Tracks all split-level state for the compaction planner: /// which splits need compaction, which are in-flight, and which /// operations are pending assignment to workers. +#[derive(Debug)] pub struct CompactionState { needs_compaction: HashMap>, needs_compaction_split_ids: HashSet, @@ -108,14 +110,22 @@ impl CompactionState { let Some(splits) = self.needs_compaction.get_mut(partition_key) else { return; }; - for operation in merge_policy.operations(splits) { - for split in operation.splits_as_slice() { - self.needs_compaction_split_ids.remove(split.split_id()); - self.in_flight_split_ids - .insert(split.split_id().to_string()); + // `MergePolicy::operations` emits at most one op per level per call, which under a backlog + // leaves the bulk of `splits` untouched per tick. Loop until no new operations are created. + loop { + let operations = merge_policy.operations(splits); + if operations.is_empty() { + break; + } + for operation in operations { + for split in operation.splits_as_slice() { + self.needs_compaction_split_ids.remove(split.split_id()); + self.in_flight_split_ids + .insert(split.split_id().to_string()); + } + self.pending_operations + .push(partition_key.clone(), operation); } - self.pending_operations - .push(partition_key.clone(), operation); } if splits.is_empty() { self.needs_compaction.remove(partition_key); @@ -138,6 +148,7 @@ impl CompactionState { if let Some(inflight) = self.in_flight.remove(&failure.task_id) { warn!(task_id=%failure.task_id, error=%failure.error_message, "compaction task failed"); for split_id in &inflight.split_ids { + // these splits will be picked up again on the next metastore scan. self.in_flight_split_ids.remove(split_id.as_str()); } } @@ -183,6 +194,7 @@ impl CompactionState { error!(%task_id, node_id=%inflight.node_id, "compaction task timed out"); COMPACTION_PLANNER_METRICS.timed_out_operations.inc(); for split_id in &inflight.split_ids { + // these splits will be picked up again on the next metastore scan. self.in_flight_split_ids.remove(split_id.as_str()); } } diff --git a/quickwit/quickwit-compaction/src/planner/index_config_metastore.rs b/quickwit/quickwit-compaction/src/planner/index_config_metastore.rs index e2d42d99ba2..ba37b11c758 100644 --- a/quickwit/quickwit-compaction/src/planner/index_config_metastore.rs +++ b/quickwit/quickwit-compaction/src/planner/index_config_metastore.rs @@ -26,6 +26,7 @@ use tracing::error; use crate::planner::metrics::COMPACTION_PLANNER_METRICS; /// Everything the planner needs to know about a single index. +#[derive(Debug)] pub struct IndexEntry { config: IndexConfig, merge_policy: Arc, @@ -76,6 +77,7 @@ impl IndexEntry { /// Caches per-index configuration, merge policies, and doc mappers. /// Fetches from the metastore on demand. All accessors panic if called /// for an index that hasn't been loaded. +#[derive(Debug)] pub struct IndexConfigMetastore { indexes: HashMap, metastore_client: MetastoreServiceClient, diff --git a/quickwit/quickwit-compaction/src/planner/metrics.rs b/quickwit/quickwit-compaction/src/planner/metrics.rs index 2a4b2d9d4c7..4d8312d02c7 100644 --- a/quickwit/quickwit-compaction/src/planner/metrics.rs +++ b/quickwit/quickwit-compaction/src/planner/metrics.rs @@ -13,22 +13,24 @@ // limitations under the License. use once_cell::sync::Lazy; -use quickwit_common::metrics::{IntGauge, IntGaugeVec, new_gauge, new_gauge_vec}; +use quickwit_common::metrics::{ + IntCounter, IntCounterVec, IntGaugeVec, new_counter, new_counter_vec, new_gauge_vec, +}; pub struct CompactionPlannerMetrics { - pub new_splits_scanned: IntGaugeVec<1>, + pub new_splits_scanned: IntCounterVec<1>, pub splits_needing_compaction: IntGaugeVec<1>, pub pending_merge_operations: IntGaugeVec<2>, - pub timed_out_operations: IntGauge, - pub metastore_errors: IntGaugeVec<1>, + pub timed_out_operations: IntCounter, + pub metastore_errors: IntCounterVec<1>, } impl Default for CompactionPlannerMetrics { fn default() -> Self { CompactionPlannerMetrics { - new_splits_scanned: new_gauge_vec( + new_splits_scanned: new_counter_vec( "new_splits_scanned", - "number of new immature splits scanned from the metastore on the last tick", + "cumulative number of immature splits scanned from the metastore", "compaction_planner", &[], ["source_uid"], @@ -47,15 +49,16 @@ impl Default for CompactionPlannerMetrics { &[], ["source_uid", "merge_level"], ), - timed_out_operations: new_gauge( + timed_out_operations: new_counter( "timed_out_operations", - "number of merge operations that timed out waiting for a worker heartbeat", + "cumulative number of merge operations that timed out waiting for a worker \ + heartbeat", "compaction_planner", &[], ), - metastore_errors: new_gauge_vec( + metastore_errors: new_counter_vec( "metastore_errors", - "number of metastore errors encountered by the compaction planner", + "cumulative number of metastore errors encountered by the compaction planner", "compaction_planner", &[], ["operation"], diff --git a/quickwit/quickwit-compaction/src/planner/mod.rs b/quickwit/quickwit-compaction/src/planner/mod.rs index 1dc50d6ca46..0fe49bd190e 100644 --- a/quickwit/quickwit-compaction/src/planner/mod.rs +++ b/quickwit/quickwit-compaction/src/planner/mod.rs @@ -31,6 +31,7 @@ use crate::source_uid_metrics_label; /// Queue of merge operations awaiting assignment, with the /// `pending_merge_operations` gauge maintained inline. Push/pop are the only /// mutation paths so the metric stays consistent with `len()`. +#[derive(Debug)] struct PendingOperations { inner: VecDeque<(CompactionPartitionKey, MergeOperation)>, } diff --git a/quickwit/quickwit-config/src/node_config/mod.rs b/quickwit/quickwit-config/src/node_config/mod.rs index 4df0a4e102d..8d405d443ec 100644 --- a/quickwit/quickwit-config/src/node_config/mod.rs +++ b/quickwit/quickwit-config/src/node_config/mod.rs @@ -239,39 +239,23 @@ pub struct CompactorConfig { /// Limits the IO throughput of the split downloader and the merge executor. #[serde(default)] pub max_merge_write_throughput: Option, - /// Maximum size of the local split store cache in bytes. - #[serde(default = "CompactorConfig::default_split_store_max_num_bytes")] - pub split_store_max_num_bytes: ByteSize, - /// Maximum number of splits in the local split store cache. - #[serde(default = "CompactorConfig::default_split_store_max_num_splits")] - pub split_store_max_num_splits: usize, } impl CompactorConfig { fn default_max_concurrent_pipelines() -> NonZeroUsize { - NonZeroUsize::new(quickwit_common::num_cpus() * 2 / 3).unwrap_or(NonZeroUsize::MIN) + NonZeroUsize::new(quickwit_common::num_cpus()).unwrap_or(NonZeroUsize::MIN) } fn default_max_concurrent_split_uploads() -> usize { 12 } - pub fn default_split_store_max_num_bytes() -> ByteSize { - ByteSize::gib(100) - } - - pub fn default_split_store_max_num_splits() -> usize { - 1_000 - } - #[cfg(any(test, feature = "testsuite"))] pub fn for_test() -> Self { CompactorConfig { max_concurrent_pipelines: NonZeroUsize::new(2).unwrap(), max_concurrent_split_uploads: 4, max_merge_write_throughput: None, - split_store_max_num_bytes: ByteSize::mb(1), - split_store_max_num_splits: 3, } } } @@ -282,8 +266,6 @@ impl Default for CompactorConfig { max_concurrent_pipelines: Self::default_max_concurrent_pipelines(), max_concurrent_split_uploads: Self::default_max_concurrent_split_uploads(), max_merge_write_throughput: None, - split_store_max_num_bytes: Self::default_split_store_max_num_bytes(), - split_store_max_num_splits: Self::default_split_store_max_num_splits(), } } } diff --git a/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs b/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs index 70f19410ecf..1a4270d48ee 100644 --- a/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs +++ b/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs @@ -25,7 +25,7 @@ use quickwit_common::split_file; use quickwit_directories::BundleDirectory; use quickwit_storage::StorageResult; use tantivy::Directory; -use tantivy::directory::MmapDirectory; +use tantivy::directory::{Advice, MmapDirectory}; use tokio::sync::Mutex; use tracing::{debug, error, warn}; use ulid::Ulid; @@ -38,12 +38,15 @@ const SPLIT_MAX_AGE: Duration = Duration::from_secs(2 * 24 * 3_600); // 2 days pub fn get_tantivy_directory_from_split_bundle( split_file: &Path, ) -> StorageResult> { - let mmap_directory = MmapDirectory::open(split_file.parent().ok_or_else(|| { - io::Error::new( - io::ErrorKind::NotFound, - format!("couldn't find parent for {}", split_file.display()), - ) - })?)?; + let mmap_directory = MmapDirectory::open_with_madvice( + split_file.parent().ok_or_else(|| { + io::Error::new( + io::ErrorKind::NotFound, + format!("couldn't find parent for {}", split_file.display()), + ) + })?, + Advice::Sequential, + )?; let split_fileslice = mmap_directory.open_read(Path::new(&split_file))?; Ok(Box::new(BundleDirectory::open_split(split_fileslice)?)) } diff --git a/quickwit/quickwit-janitor/Cargo.toml b/quickwit/quickwit-janitor/Cargo.toml index 8d4cad3beb9..16a9a540806 100644 --- a/quickwit/quickwit-janitor/Cargo.toml +++ b/quickwit/quickwit-janitor/Cargo.toml @@ -27,6 +27,7 @@ utoipa = { workspace = true } quickwit-actors = { workspace = true } quickwit-common = { workspace = true } +quickwit-compaction = { workspace = true } quickwit-config = { workspace = true } quickwit-doc-mapper = { workspace = true } quickwit-index-management = { workspace = true } diff --git a/quickwit/quickwit-janitor/src/janitor_service.rs b/quickwit/quickwit-janitor/src/janitor_service.rs index b8bfe5e54b0..712458ad805 100644 --- a/quickwit/quickwit-janitor/src/janitor_service.rs +++ b/quickwit/quickwit-janitor/src/janitor_service.rs @@ -16,6 +16,7 @@ use async_trait::async_trait; use quickwit_actors::{ Actor, ActorContext, ActorExitStatus, ActorHandle, ActorState, Handler, Healthz, }; +use quickwit_compaction::planner::CompactionPlanner; use serde_json::{Value as JsonValue, json}; use crate::actors::{DeleteTaskService, GarbageCollector, RetentionPolicyExecutor}; @@ -24,6 +25,7 @@ pub struct JanitorService { delete_task_service_handle: Option>, garbage_collector_handle: ActorHandle, retention_policy_executor_handle: ActorHandle, + compaction_planner_handle: ActorHandle, } impl JanitorService { @@ -31,11 +33,13 @@ impl JanitorService { delete_task_service_handle: Option>, garbage_collector_handle: ActorHandle, retention_policy_executor_handle: ActorHandle, + compaction_planner_handle: ActorHandle, ) -> Self { Self { delete_task_service_handle, garbage_collector_handle, retention_policy_executor_handle, + compaction_planner_handle, } } @@ -49,6 +53,7 @@ impl JanitorService { delete_task_is_not_failure && self.garbage_collector_handle.state() != ActorState::Failure && self.retention_policy_executor_handle.state() != ActorState::Failure + && self.compaction_planner_handle.state() != ActorState::Failure } } diff --git a/quickwit/quickwit-janitor/src/lib.rs b/quickwit/quickwit-janitor/src/lib.rs index bef73160377..8bac8c46c21 100644 --- a/quickwit/quickwit-janitor/src/lib.rs +++ b/quickwit/quickwit-janitor/src/lib.rs @@ -14,8 +14,9 @@ #![deny(clippy::disallowed_methods)] -use quickwit_actors::{Mailbox, Universe}; +use quickwit_actors::{ActorHandle, Mailbox, Universe}; use quickwit_common::pubsub::EventBroker; +use quickwit_compaction::planner::CompactionPlanner; use quickwit_config::NodeConfig; use quickwit_indexing::actors::MergeSchedulerService; use quickwit_metastore::SplitInfo; @@ -39,6 +40,7 @@ use crate::actors::{DeleteTaskService, GarbageCollector, RetentionPolicyExecutor /// Schema used for the OpenAPI generation which are apart of this crate. pub struct JanitorApiSchemas; +#[allow(clippy::too_many_arguments)] pub async fn start_janitor_service( universe: &Universe, config: &NodeConfig, @@ -47,6 +49,7 @@ pub async fn start_janitor_service( storage_resolver: StorageResolver, event_broker: EventBroker, run_delete_task_service: bool, + compaction_planner_handle: ActorHandle, ) -> anyhow::Result> { info!("starting janitor service"); let garbage_collector = GarbageCollector::new(metastore.clone(), storage_resolver.clone()); @@ -77,6 +80,7 @@ pub async fn start_janitor_service( delete_task_service_handle, garbage_collector_handle, retention_policy_executor_handle, + compaction_planner_handle, ); let (janitor_service_mailbox, _janitor_service_handle) = universe.spawn_builder().spawn(janitor_service); diff --git a/quickwit/quickwit-metastore/migrations/postgresql/27_create-index-splits-published-maturity-timestamp.down.sql b/quickwit/quickwit-metastore/migrations/postgresql/27_create-index-splits-published-maturity-timestamp.down.sql new file mode 100644 index 00000000000..e4568958939 --- /dev/null +++ b/quickwit/quickwit-metastore/migrations/postgresql/27_create-index-splits-published-maturity-timestamp.down.sql @@ -0,0 +1,4 @@ +-- no-transaction +-- CONCURRENTLY matches the up migration: avoids blocking writes on `splits` +-- while the index is being dropped. Cannot run inside a transaction. +DROP INDEX CONCURRENTLY IF EXISTS splits_published_maturity_timestamp_idx; \ No newline at end of file diff --git a/quickwit/quickwit-metastore/migrations/postgresql/27_create-index-splits-published-maturity-timestamp.up.sql b/quickwit/quickwit-metastore/migrations/postgresql/27_create-index-splits-published-maturity-timestamp.up.sql new file mode 100644 index 00000000000..0d9fa0f5cff --- /dev/null +++ b/quickwit/quickwit-metastore/migrations/postgresql/27_create-index-splits-published-maturity-timestamp.up.sql @@ -0,0 +1,24 @@ +-- no-transaction +-- Index for the compaction planner's scan, which on every tick reads up to +-- LIMIT splits matching: +-- split_state = 'Published' +-- maturity_timestamp > now() +-- ordered by maturity_timestamp ascending. The planner keeps a local set of +-- already-tracked splits and dedups against it, so re-reading the immature +-- set every tick is intentional -- it's how the planner recovers splits +-- whose merge timed out or failed. +-- +-- The btree on (maturity_timestamp, split_id) lets postgres seek to the live +-- "still immature" range in index order, satisfying both the filter and the +-- ORDER BY without an extra sort. The split_id column is included as a +-- tiebreaker so postgres returns deterministic pages under LIMIT. +-- +-- The partial predicate is restricted to split_state = 'Published' because +-- partial-index predicates must be IMMUTABLE; "now()" cannot appear here. +-- +-- CONCURRENTLY avoids taking a SHARE lock on splits, which would block all +-- writes for the duration of the build. CONCURRENTLY cannot run inside a +-- transaction block, hence the `-- no-transaction` directive above. +CREATE INDEX CONCURRENTLY IF NOT EXISTS splits_published_maturity_timestamp_idx + ON splits (maturity_timestamp, split_id) + WHERE split_state = 'Published'; \ No newline at end of file diff --git a/quickwit/quickwit-metastore/src/lib.rs b/quickwit/quickwit-metastore/src/lib.rs index e8fffeb7962..83c492e1ba0 100644 --- a/quickwit/quickwit-metastore/src/lib.rs +++ b/quickwit/quickwit-metastore/src/lib.rs @@ -48,7 +48,7 @@ pub use metastore::{ ListMetricsSplitsQuery, ListMetricsSplitsRequestExt, ListMetricsSplitsResponseExt, ListSplitsQuery, ListSplitsRequestExt, ListSplitsResponseExt, MetastoreServiceExt, MetastoreServiceStreamSplitsExt, PublishMetricsSplitsRequestExt, PublishSplitsRequestExt, - StageMetricsSplitsRequestExt, StageSplitsRequestExt, UpdateIndexRequestExt, + SortBy, StageMetricsSplitsRequestExt, StageSplitsRequestExt, UpdateIndexRequestExt, UpdateSourceRequestExt, file_backed, }; pub use metastore_factory::{MetastoreFactory, UnsupportedMetastore}; diff --git a/quickwit/quickwit-metastore/src/metastore/mod.rs b/quickwit/quickwit-metastore/src/metastore/mod.rs index ddfee25afab..caaae7316a8 100644 --- a/quickwit/quickwit-metastore/src/metastore/mod.rs +++ b/quickwit/quickwit-metastore/src/metastore/mod.rs @@ -875,10 +875,21 @@ pub struct ListSplitsQuery { } #[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)] +/// Ordering applied to the result of a [`ListSplitsQuery`]. pub enum SortBy { + /// No ordering — the metastore may return splits in any order. None, + /// Order by `(delete_opstamp ASC, publish_timestamp ASC)`. Used by the + /// delete pipeline to process the splits with the most pending delete + /// work first. Staleness, + /// Order by `(index_uid ASC, split_id ASC)`, matching the splits-table + /// primary key. Used for stable pagination across all indexes. IndexUid, + /// Order by `(maturity_timestamp ASC, split_id ASC)`. Used by the + /// compaction planner so that under a backlog the splits closest to + /// becoming mature are processed first. + MaturityTimestamp, } impl SortBy { @@ -904,6 +915,16 @@ impl SortBy { .split_id .cmp(&right_split.split_metadata.split_id) }), + SortBy::MaturityTimestamp => left_split + .split_metadata + .maturity_unix_timestamp() + .cmp(&right_split.split_metadata.maturity_unix_timestamp()) + .then_with(|| { + left_split + .split_metadata + .split_id + .cmp(&right_split.split_metadata.split_id) + }), } } } @@ -1154,6 +1175,12 @@ impl ListSplitsQuery { self } + /// Sorts the splits by maturity_timestamp ascending, with split_id as a tiebreaker. + pub fn sort_by_maturity_timestamp(mut self) -> Self { + self.sort_by = SortBy::MaturityTimestamp; + self + } + /// Only return splits whose (index_uid, split_id) are lexicographically after this split. /// This is only useful if results are sorted by index_uid and split_id. pub fn after_split(mut self, split_meta: &SplitMetadata) -> Self { diff --git a/quickwit/quickwit-metastore/src/metastore/postgres/migrator.rs b/quickwit/quickwit-metastore/src/metastore/postgres/migrator.rs index 03c40eec666..aa51a42c43e 100644 --- a/quickwit/quickwit-metastore/src/metastore/postgres/migrator.rs +++ b/quickwit/quickwit-metastore/src/metastore/postgres/migrator.rs @@ -27,40 +27,45 @@ fn get_migrations() -> Migrator { /// Initializes the database and runs the SQL migrations stored in the /// `quickwit-metastore/migrations` directory. +/// +/// Runs on a raw pooled connection -- not wrapped in an outer transaction. +/// sqlx's `Migrator::run_direct` handles per-migration transactionality +/// itself, honoring each migration's `no_tx` flag (set by a +/// `-- no-transaction` directive as the first line of the migration file). +/// Wrapping the run in our own transaction would defeat that for migrations +/// that must execute outside a transaction block (e.g. `CREATE INDEX +/// CONCURRENTLY`). +/// +/// Atomicity is per-migration, not per-run: a failure on migration N leaves +/// migrations 1..N-1 applied and committed in `_sqlx_migrations`. The +/// operator fixes the failing migration and re-runs. #[instrument(skip_all)] pub(super) async fn run_migrations( pool: &TrackedPool, skip_migrations: bool, skip_locking: bool, ) -> MetastoreResult<()> { - let mut tx = pool.begin().await?; - let conn = tx.acquire().await?; - + let mut conn = pool.acquire().await?; let mut migrator = get_migrations(); if skip_locking { migrator.set_locking(false); } - if !skip_migrations { - // this is an hidden function, made to get "around the annoying "implementation of `Acquire` - // is not general enough" error", which is the error we get otherwise. - let migrate_result = migrator.run_direct(conn).await; + if skip_migrations { + return check_migrations(migrator, &mut conn).await; + } - let Err(migrate_error) = migrate_result else { - tx.commit().await?; - return Ok(()); - }; - tx.rollback().await?; + // this is an hidden function, made to get "around the annoying "implementation of `Acquire` + // is not general enough" error", which is the error we get otherwise. + if let Err(migrate_error) = migrator.run_direct(&mut *conn).await { error!(error=%migrate_error, "failed to run PostgreSQL migrations"); - - Err(MetastoreError::Internal { + return Err(MetastoreError::Internal { message: "failed to run PostgreSQL migrations".to_string(), cause: migrate_error.to_string(), - }) - } else { - check_migrations(migrator, conn).await + }); } + Ok(()) } async fn check_migrations(migrator: Migrator, conn: &mut PgConnection) -> MetastoreResult<()> { diff --git a/quickwit/quickwit-metastore/src/metastore/postgres/utils.rs b/quickwit/quickwit-metastore/src/metastore/postgres/utils.rs index 28519b8a294..29c26e41fb2 100644 --- a/quickwit/quickwit-metastore/src/metastore/postgres/utils.rs +++ b/quickwit/quickwit-metastore/src/metastore/postgres/utils.rs @@ -211,6 +211,10 @@ pub(super) fn append_query_filters_and_order_by( sql.order_by(Splits::IndexUid, Order::Asc) .order_by(Splits::SplitId, Order::Asc); } + SortBy::MaturityTimestamp => { + sql.order_by(Splits::MaturityTimestamp, Order::Asc) + .order_by(Splits::SplitId, Order::Asc); + } SortBy::None => (), } diff --git a/quickwit/quickwit-metastore/src/split_metadata.rs b/quickwit/quickwit-metastore/src/split_metadata.rs index fe88fe379d3..95066e87838 100644 --- a/quickwit/quickwit-metastore/src/split_metadata.rs +++ b/quickwit/quickwit-metastore/src/split_metadata.rs @@ -218,6 +218,18 @@ impl SplitMetadata { } } + /// Returns the unix timestamp at which the split becomes mature, or 0 if + /// the split is already mature (matching the metastore's stored + /// `maturity_timestamp` column). + pub fn maturity_unix_timestamp(&self) -> i64 { + match self.maturity { + SplitMaturity::Mature => 0, + SplitMaturity::Immature { maturation_period } => { + self.create_timestamp + maturation_period.as_secs() as i64 + } + } + } + #[cfg(any(test, feature = "testsuite"))] /// Returns an instance of `SplitMetadata` for testing. pub fn for_test(split_id: SplitId) -> SplitMetadata { diff --git a/quickwit/quickwit-serve/src/lib.rs b/quickwit/quickwit-serve/src/lib.rs index 9be9b60971a..554820dfc80 100644 --- a/quickwit/quickwit-serve/src/lib.rs +++ b/quickwit/quickwit-serve/src/lib.rs @@ -57,7 +57,7 @@ pub use format::BodyFormat; use futures::StreamExt; use itertools::Itertools; use once_cell::sync::Lazy; -use quickwit_actors::{ActorExitStatus, Mailbox, SpawnContext, Universe}; +use quickwit_actors::{ActorExitStatus, ActorHandle, Mailbox, SpawnContext, Universe}; use quickwit_cluster::{ Cluster, ClusterChange, ClusterChangeStream, ClusterNode, ListenerHandle, start_cluster_service, }; @@ -275,22 +275,31 @@ async fn balance_channel_for_service( /// /// On janitor nodes, spawns a `CompactionPlanner` actor and builds the client from /// its mailbox. On compactor-only nodes, connects to a remote janitor via gRPC. +/// +/// The second tuple element is the local planner's `ActorHandle`, returned only +/// on janitor nodes so the caller can attach it to the janitor liveness probe. async fn get_compaction_planner_client_if_needed( node_config: &NodeConfig, cluster: &Cluster, universe: &Universe, metastore_client: &MetastoreServiceClient, -) -> anyhow::Result> { +) -> anyhow::Result<( + Option, + Option>, +)> { let is_janitor = node_config.is_service_enabled(QuickwitService::Janitor); let is_compactor = node_config.is_service_enabled(QuickwitService::Compactor); if !is_janitor && !is_compactor { - return Ok(None); + return Ok((None, None)); } if is_janitor { let planner = CompactionPlanner::new(metastore_client.clone()); - let (mailbox, _handle) = universe.spawn_builder().spawn(planner); + let (mailbox, handle) = universe.spawn_builder().spawn(planner); info!("compaction planner actor started on janitor node"); - return Ok(Some(CompactionPlannerServiceClient::from_mailbox(mailbox))); + return Ok(( + Some(CompactionPlannerServiceClient::from_mailbox(mailbox)), + Some(handle), + )); } // Compactor-only node: connect to the planner on a remote janitor. let balance_channel = balance_channel_for_service(cluster, QuickwitService::Janitor).await; @@ -303,11 +312,14 @@ async fn get_compaction_planner_client_if_needed( bail!("compactor is enabled but no janitor node was found in the cluster") } info!("remote compaction planner detected on janitor node"); - Ok(Some(CompactionPlannerServiceClient::from_balance_channel( - balance_channel, - node_config.grpc_config.max_message_size, + Ok(( + Some(CompactionPlannerServiceClient::from_balance_channel( + balance_channel, + node_config.grpc_config.max_message_size, + None, + )), None, - ))) + )) } async fn start_ingest_client_if_needed( @@ -588,14 +600,15 @@ pub async fn serve_quickwit( .await .context("failed to start ingest v1 service")?; - let compaction_service_client_opt = get_compaction_planner_client_if_needed( - &node_config, - &cluster, - &universe, - &metastore_client, - ) - .await - .context("failed to initialize compaction service client")?; + let (compaction_service_client_opt, compaction_planner_handle_opt) = + get_compaction_planner_client_if_needed( + &node_config, + &cluster, + &universe, + &metastore_client, + ) + .await + .context("failed to initialize compaction service client")?; let indexing_service_opt = if node_config.is_service_enabled(QuickwitService::Indexer) { let indexing_service = start_indexing_service( @@ -756,6 +769,8 @@ pub async fn serve_quickwit( }; let janitor_service_opt = if node_config.is_service_enabled(QuickwitService::Janitor) { + let compaction_planner_handle = compaction_planner_handle_opt + .expect("compaction planner handle must exist on janitor nodes"); let janitor_service = start_janitor_service( &universe, &node_config, @@ -764,6 +779,7 @@ pub async fn serve_quickwit( storage_resolver.clone(), event_broker.clone(), !get_bool_from_env(DISABLE_DELETE_TASK_SERVICE_ENV_KEY, false), + compaction_planner_handle, ) .await .context("failed to start janitor service")?; @@ -778,19 +794,6 @@ pub async fn serve_quickwit( let compaction_root_directory = quickwit_common::temp_dir::Builder::default() .tempdir_in(&compaction_dir) .context("failed to create compaction temp directory")?; - let split_store_quota = quickwit_indexing::SplitStoreQuota::try_new( - node_config.compactor_config.split_store_max_num_splits, - node_config.compactor_config.split_store_max_num_bytes, - )?; - let split_cache_dir = node_config - .data_dir_path - .join("compactor-split-cache") - .join("splits"); - let split_cache = Arc::new( - quickwit_indexing::IndexingSplitCache::open(split_cache_dir, split_store_quota) - .await - .context("failed to open compactor split cache")?, - ); let compaction_client = compaction_service_client_opt .clone() .expect("compactor service enabled but no compaction client available"); @@ -799,7 +802,6 @@ pub async fn serve_quickwit( cluster.self_node_id().into(), compaction_client, &node_config.compactor_config, - split_cache, metastore_client.clone(), storage_resolver.clone(), event_broker.clone(), @@ -1925,11 +1927,12 @@ mod tests { let mut node_config = NodeConfig::for_test(); node_config.enabled_services = HashSet::from([QuickwitService::Janitor, QuickwitService::Indexer]); - let result = + let (client_opt, handle_opt) = get_compaction_planner_client_if_needed(&node_config, &cluster, &universe, &metastore) .await .unwrap(); - assert!(result.is_some()); + assert!(client_opt.is_some()); + assert!(handle_opt.is_some()); // With compactor + janitor enabled, planner client is also returned. node_config.enabled_services = HashSet::from([ @@ -1937,19 +1940,21 @@ mod tests { QuickwitService::Indexer, QuickwitService::Compactor, ]); - let result = + let (client_opt, handle_opt) = get_compaction_planner_client_if_needed(&node_config, &cluster, &universe, &metastore) .await .unwrap(); - assert!(result.is_some()); + assert!(client_opt.is_some()); + assert!(handle_opt.is_some()); - // Neither janitor nor compactor: no client. + // Neither janitor nor compactor: no client, no handle. node_config.enabled_services = HashSet::from([QuickwitService::Indexer]); - let result = + let (client_opt, handle_opt) = get_compaction_planner_client_if_needed(&node_config, &cluster, &universe, &metastore) .await .unwrap(); - assert!(result.is_none()); + assert!(client_opt.is_none()); + assert!(handle_opt.is_none()); universe.assert_quit().await; } From 80563710ad944bbd6142cf4b3edf12ee6ce03bf3 Mon Sep 17 00:00:00 2001 From: nadav-govari Date: Wed, 20 May 2026 14:03:28 -0400 Subject: [PATCH 13/21] Wire up shared indexing split cache between indexers and compactors (#6411) --- quickwit/quickwit-cli/src/tool.rs | 6 +- .../src/compaction_pipeline.rs | 2 - .../src/compactor_supervisor.rs | 11 +-- quickwit/quickwit-compaction/src/lib.rs | 8 +- .../quickwit-config/src/node_config/mod.rs | 2 +- .../src/actors/indexing_service.rs | 19 ++--- quickwit/quickwit-indexing/src/lib.rs | 4 + .../src/split_store/indexing_split_cache.rs | 76 +++++++++++++++++++ quickwit/quickwit-indexing/src/test_utils.rs | 2 + quickwit/quickwit-serve/src/lib.rs | 27 ++++++- 10 files changed, 132 insertions(+), 25 deletions(-) diff --git a/quickwit/quickwit-cli/src/tool.rs b/quickwit/quickwit-cli/src/tool.rs index 12d174fc572..557d61921e0 100644 --- a/quickwit/quickwit-cli/src/tool.rs +++ b/quickwit/quickwit-cli/src/tool.rs @@ -17,6 +17,7 @@ use std::io::{IsTerminal, Stdout, Write, stdout}; use std::num::NonZeroUsize; use std::path::PathBuf; use std::str::FromStr; +use std::sync::Arc; use std::time::{Duration, Instant}; use std::{env, fmt, io}; @@ -37,9 +38,9 @@ use quickwit_config::{ TransformConfig, }; use quickwit_index_management::{IndexService, clear_cache_directory}; -use quickwit_indexing::IndexingPipeline; use quickwit_indexing::actors::IndexingService; use quickwit_indexing::models::{DetachIndexingPipeline, IndexingStatistics, SpawnPipeline}; +use quickwit_indexing::{IndexingPipeline, IndexingSplitCache}; use quickwit_ingest::IngesterPool; use quickwit_metastore::IndexMetadataResponseExt; use quickwit_proto::indexing::CpuCapacity; @@ -417,6 +418,8 @@ pub async fn local_ingest_docs_cli(args: LocalIngestDocsArgs) -> anyhow::Result< &HashSet::from_iter([QuickwitService::Indexer]), )?; let universe = Universe::new(); + let split_cache = + Arc::new(IndexingSplitCache::from_config(&indexer_config, &config.data_dir_path).await?); let indexing_server = IndexingService::new( config.node_id.clone(), config.data_dir_path.clone(), @@ -428,6 +431,7 @@ pub async fn local_ingest_docs_cli(args: LocalIngestDocsArgs) -> anyhow::Result< IngesterPool::default(), storage_resolver, EventBroker::default(), + split_cache, ) .await?; let (indexing_server_mailbox, indexing_server_handle) = diff --git a/quickwit/quickwit-compaction/src/compaction_pipeline.rs b/quickwit/quickwit-compaction/src/compaction_pipeline.rs index 91a23e6bb96..a35017384a0 100644 --- a/quickwit/quickwit-compaction/src/compaction_pipeline.rs +++ b/quickwit/quickwit-compaction/src/compaction_pipeline.rs @@ -56,7 +56,6 @@ pub struct PipelineStatusUpdate { pub index_uid: IndexUid, pub source_id: SourceId, pub split_ids: Vec, - pub merge_level: u64, pub status: PipelineStatus, } @@ -270,7 +269,6 @@ impl CompactionPipeline { .map(|split| split.split_id().to_string()) .collect(), status: self.status.clone(), - merge_level: self.merge_operation.merge_level() as u64, } } diff --git a/quickwit/quickwit-compaction/src/compactor_supervisor.rs b/quickwit/quickwit-compaction/src/compactor_supervisor.rs index 8508a82ccaf..52da22c4951 100644 --- a/quickwit/quickwit-compaction/src/compactor_supervisor.rs +++ b/quickwit/quickwit-compaction/src/compactor_supervisor.rs @@ -59,6 +59,7 @@ pub struct CompactorSupervisor { io_throughput_limiter: Option, metastore: MetastoreServiceClient, storage_resolver: StorageResolver, + split_cache: Arc, max_concurrent_split_uploads: usize, event_broker: EventBroker, @@ -75,6 +76,7 @@ impl CompactorSupervisor { io_throughput_limiter: Option, metastore: MetastoreServiceClient, storage_resolver: StorageResolver, + split_cache: Arc, max_concurrent_split_uploads: usize, event_broker: EventBroker, compaction_root_directory: TempDirectory, @@ -87,6 +89,7 @@ impl CompactorSupervisor { io_throughput_limiter, metastore, storage_resolver, + split_cache, max_concurrent_split_uploads, event_broker, compaction_root_directory, @@ -182,8 +185,7 @@ impl CompactorSupervisor { let index_storage_uri = Uri::from_str(&assignment.index_storage_uri)?; let index_storage = self.storage_resolver.resolve(&index_storage_uri).await?; - let split_cache = Arc::new(IndexingSplitCache::no_caching()); - let split_store = IndexingSplitStore::new(index_storage, split_cache); + let split_store = IndexingSplitStore::new(index_storage, self.split_cache.clone()); let doc_mapper = build_doc_mapper(&doc_mapping, &search_settings)?; let merge_policy = merge_policy_from_settings(&indexing_settings); @@ -332,6 +334,7 @@ mod tests { None, metastore, StorageResolver::for_test(), + Arc::new(IndexingSplitCache::no_caching()), 2, EventBroker::default(), TempDirectory::for_test(), @@ -541,6 +544,7 @@ mod tests { None, metastore, StorageResolver::for_test(), + Arc::new(IndexingSplitCache::no_caching()), 2, EventBroker::default(), TempDirectory::for_test(), @@ -581,7 +585,6 @@ mod tests { source_id: "src".to_string(), split_ids: vec!["s1".to_string(), "s2".to_string()], status: PipelineStatus::InProgress, - merge_level: 1, }, PipelineStatusUpdate { task_id: "task-2".to_string(), @@ -589,7 +592,6 @@ mod tests { source_id: "src".to_string(), split_ids: vec!["s3".to_string()], status: PipelineStatus::Completed, - merge_level: 1, }, PipelineStatusUpdate { task_id: "task-3".to_string(), @@ -599,7 +601,6 @@ mod tests { status: PipelineStatus::Failed { error: "boom".to_string(), }, - merge_level: 1, }, ]; diff --git a/quickwit/quickwit-compaction/src/lib.rs b/quickwit/quickwit-compaction/src/lib.rs index 30c61587c41..5307f267932 100644 --- a/quickwit/quickwit-compaction/src/lib.rs +++ b/quickwit/quickwit-compaction/src/lib.rs @@ -14,21 +14,22 @@ #![deny(clippy::disallowed_methods)] -#[allow(dead_code)] mod compaction_pipeline; -#[allow(dead_code)] mod compactor_supervisor; mod metrics; pub mod planner; pub type TaskId = String; +use std::sync::Arc; + pub use compactor_supervisor::CompactorSupervisor; use quickwit_actors::{Mailbox, Universe}; use quickwit_common::io; use quickwit_common::pubsub::EventBroker; use quickwit_common::temp_dir::TempDirectory; use quickwit_config::CompactorConfig; +use quickwit_indexing::IndexingSplitCache; use quickwit_proto::compaction::CompactionPlannerServiceClient; use quickwit_proto::metastore::MetastoreServiceClient; use quickwit_proto::types::{IndexUid, NodeId, SourceId}; @@ -47,11 +48,11 @@ pub async fn start_compactor_service( compactor_config: &CompactorConfig, metastore: MetastoreServiceClient, storage_resolver: StorageResolver, + split_cache: Arc, event_broker: EventBroker, compaction_root_directory: TempDirectory, ) -> anyhow::Result> { info!("starting compactor service"); - // TODO: configure this for real let io_throughput_limiter = compactor_config.max_merge_write_throughput.map(io::limiter); let supervisor = CompactorSupervisor::new( node_id, @@ -60,6 +61,7 @@ pub async fn start_compactor_service( io_throughput_limiter, metastore, storage_resolver, + split_cache, compactor_config.max_concurrent_split_uploads, event_broker, compaction_root_directory, diff --git a/quickwit/quickwit-config/src/node_config/mod.rs b/quickwit/quickwit-config/src/node_config/mod.rs index 8d405d443ec..71eaa16c1d7 100644 --- a/quickwit/quickwit-config/src/node_config/mod.rs +++ b/quickwit/quickwit-config/src/node_config/mod.rs @@ -230,7 +230,7 @@ impl Default for IndexerConfig { #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub struct CompactorConfig { - /// Maximum number of concurrent merge pipelines. Defaults to 2/3 of CPU count. + /// Maximum number of concurrent merge pipelines. Defaults to CPU count. #[serde(default = "CompactorConfig::default_max_concurrent_pipelines")] pub max_concurrent_pipelines: NonZeroUsize, /// Maximum number of concurrent split uploads across all pipelines. diff --git a/quickwit/quickwit-indexing/src/actors/indexing_service.rs b/quickwit/quickwit-indexing/src/actors/indexing_service.rs index 9c27a06f450..7ae957937aa 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_service.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_service.rs @@ -25,7 +25,6 @@ use quickwit_actors::{ Observation, }; use quickwit_cluster::Cluster; -use quickwit_common::fs::get_cache_directory_path; use quickwit_common::pubsub::EventBroker; use quickwit_common::temp_dir; use quickwit_config::{ @@ -56,7 +55,7 @@ use tracing::{debug, error, info, warn}; use crate::models::{DetachIndexingPipeline, ObservePipeline, SpawnPipeline}; use crate::source::{AssignShards, Assignment}; -use crate::split_store::{IndexingSplitCache, SplitStoreQuota}; +use crate::split_store::IndexingSplitCache; use crate::{IndexingPipeline, IndexingPipelineParams, IndexingSplitStore, IndexingStatistics}; /// Name of the indexing directory, usually located at `/indexing`. @@ -94,7 +93,7 @@ pub struct IndexingService { storage_resolver: StorageResolver, indexing_pipelines: HashMap, counters: IndexingServiceCounters, - local_split_store: Arc, + split_cache: Arc, max_concurrent_split_uploads: usize, cooperative_indexing_permits: Option>, event_broker: EventBroker, @@ -124,14 +123,8 @@ impl IndexingService { ingester_pool: IngesterPool, storage_resolver: StorageResolver, event_broker: EventBroker, + split_cache: Arc, ) -> anyhow::Result { - let split_store_space_quota = SplitStoreQuota::try_new( - indexer_config.split_store_max_num_splits, - indexer_config.split_store_max_num_bytes, - )?; - let split_cache_dir_path = get_cache_directory_path(&data_dir_path); - let local_split_store = - IndexingSplitCache::open(split_cache_dir_path, split_store_space_quota).await?; let indexing_root_directory = temp_dir::create_or_purge_directory(&data_dir_path.join(INDEXING_DIR_NAME)).await?; let queue_dir_path = data_dir_path.join(QUEUES_DIR_NAME); @@ -149,7 +142,7 @@ impl IndexingService { ingest_api_service_opt, ingester_pool, storage_resolver, - local_split_store: Arc::new(local_split_store), + split_cache, indexing_pipelines: Default::default(), counters: Default::default(), max_concurrent_split_uploads: indexer_config.max_concurrent_split_uploads, @@ -245,7 +238,7 @@ impl IndexingService { })?; let merge_policy = crate::merge_policy::merge_policy_from_settings(&index_config.indexing_settings); - let split_store = IndexingSplitStore::new(storage.clone(), self.local_split_store.clone()); + let split_store = IndexingSplitStore::new(storage.clone(), self.split_cache.clone()); let doc_mapper = build_doc_mapper(&index_config.doc_mapping, &index_config.search_settings) .map_err(|error| IndexingError::Internal(error.to_string()))?; @@ -828,6 +821,7 @@ mod tests { IngesterPool::default(), storage_resolver.clone(), EventBroker::default(), + Arc::new(IndexingSplitCache::no_caching()), ) .await .unwrap(); @@ -1435,6 +1429,7 @@ mod tests { IngesterPool::default(), storage_resolver.clone(), EventBroker::default(), + Arc::new(IndexingSplitCache::no_caching()), ) .await .unwrap(); diff --git a/quickwit/quickwit-indexing/src/lib.rs b/quickwit/quickwit-indexing/src/lib.rs index 1c78783089e..379f038484f 100644 --- a/quickwit/quickwit-indexing/src/lib.rs +++ b/quickwit/quickwit-indexing/src/lib.rs @@ -14,6 +14,8 @@ #![deny(clippy::disallowed_methods)] +use std::sync::Arc; + use quickwit_actors::{Mailbox, Universe}; use quickwit_cluster::Cluster; use quickwit_common::pubsub::EventBroker; @@ -71,6 +73,7 @@ pub async fn start_indexing_service( ingester_pool: IngesterPool, storage_resolver: StorageResolver, event_broker: EventBroker, + indexing_split_cache: Arc, ) -> anyhow::Result> { info!("starting indexer service"); let ingest_api_service_mailbox = universe.get_one::(); @@ -85,6 +88,7 @@ pub async fn start_indexing_service( ingester_pool, storage_resolver, event_broker, + indexing_split_cache, ) .await?; let (indexing_service, _) = universe.spawn_builder().spawn(indexing_service); diff --git a/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs b/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs index 1a4270d48ee..66ef9dd6005 100644 --- a/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs +++ b/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs @@ -21,7 +21,9 @@ use std::time::{Duration, SystemTime}; use anyhow::Context; use bytesize::ByteSize; +use quickwit_common::fs::get_cache_directory_path; use quickwit_common::split_file; +use quickwit_config::IndexerConfig; use quickwit_directories::BundleDirectory; use quickwit_storage::StorageResult; use tantivy::Directory; @@ -364,6 +366,31 @@ impl IndexingSplitCache { IndexingSplitCache { inner } } + /// Builds an [`IndexingSplitCache`] from an [`IndexerConfig`]. + /// + /// A zero quota for either dimension produces a [`IndexingSplitCache::no_caching`] + /// instance — useful when compaction runs on dedicated nodes and indexers no + /// longer benefit from caching freshly produced splits. Otherwise, opens the + /// cache rooted at `/indexer-split-cache/splits`. + pub async fn from_config( + indexer_config: &IndexerConfig, + data_dir_path: &Path, + ) -> anyhow::Result { + if indexer_config.split_store_max_num_bytes.as_u64() == 0 + || indexer_config.split_store_max_num_splits == 0 + { + return Ok(IndexingSplitCache::no_caching()); + } + let cache_path = get_cache_directory_path(data_dir_path); + let quota = SplitStoreQuota::try_new( + indexer_config.split_store_max_num_splits, + indexer_config.split_store_max_num_bytes, + )?; + IndexingSplitCache::open(cache_path, quota) + .await + .context("failed to open indexing split cache") + } + /// Try to open an existing local split store directory. /// /// If the directory does not exists, it will be created. @@ -511,6 +538,7 @@ mod tests { use std::time::Duration; use bytesize::ByteSize; + use quickwit_config::IndexerConfig; use quickwit_directories::BundleDirectory; use quickwit_storage::{PutPayload, SplitPayloadBuilder}; use tantivy::Directory; @@ -533,6 +561,54 @@ mod tests { Ok(()) } + #[tokio::test] + async fn test_from_config() { + // A zero quota in either dimension yields a no-caching cache that does + // not touch the filesystem; a positive quota opens (and creates) the + // cache directory at `/indexer-split-cache/splits`. + let zero_bytes = { + let mut config = IndexerConfig::for_test().unwrap(); + config.split_store_max_num_bytes = ByteSize(0); + config + }; + let zero_splits = { + let mut config = IndexerConfig::for_test().unwrap(); + config.split_store_max_num_splits = 0; + config + }; + let both_zero = { + let mut config = IndexerConfig::for_test().unwrap(); + config.split_store_max_num_bytes = ByteSize(0); + config.split_store_max_num_splits = 0; + config + }; + for config in [zero_bytes, zero_splits, both_zero] { + let data_dir = tempdir().unwrap(); + let _cache = IndexingSplitCache::from_config(&config, data_dir.path()) + .await + .unwrap(); + assert!( + !data_dir + .path() + .join("indexer-split-cache") + .try_exists() + .unwrap(), + "no-caching variant must not create the cache directory", + ); + } + + let data_dir = tempdir().unwrap(); + let config = IndexerConfig::for_test().unwrap(); + let _cache = IndexingSplitCache::from_config(&config, data_dir.path()) + .await + .unwrap(); + let cache_dir = data_dir.path().join("indexer-split-cache").join("splits"); + assert!( + cache_dir.is_dir(), + "positive quota must open (and create) the cache directory", + ); + } + #[tokio::test] async fn test_local_split_store_load_existing_splits() -> anyhow::Result<()> { let temp_dir = tempfile::tempdir()?; diff --git a/quickwit/quickwit-indexing/src/test_utils.rs b/quickwit/quickwit-indexing/src/test_utils.rs index 18c3f34dc1a..62634d136cc 100644 --- a/quickwit/quickwit-indexing/src/test_utils.rs +++ b/quickwit/quickwit-indexing/src/test_utils.rs @@ -40,6 +40,7 @@ use serde_json::Value as JsonValue; use crate::actors::IndexingService; use crate::models::{DetachIndexingPipeline, IndexingStatistics, SpawnPipeline}; +use crate::split_store::IndexingSplitCache; /// Creates a Test environment. /// @@ -125,6 +126,7 @@ impl TestSandbox { IngesterPool::default(), storage_resolver.clone(), EventBroker::default(), + Arc::new(IndexingSplitCache::no_caching()), ) .await?; let (indexing_service, _indexing_service_handle) = diff --git a/quickwit/quickwit-serve/src/lib.rs b/quickwit/quickwit-serve/src/lib.rs index 554820dfc80..1fe1b69069c 100644 --- a/quickwit/quickwit-serve/src/lib.rs +++ b/quickwit/quickwit-serve/src/lib.rs @@ -81,7 +81,7 @@ use quickwit_control_plane::{IndexerNodeInfo, IndexerPool}; use quickwit_index_management::{IndexService as IndexManager, IndexServiceError}; use quickwit_indexing::actors::IndexingService; use quickwit_indexing::models::ShardPositionsService; -use quickwit_indexing::start_indexing_service; +use quickwit_indexing::{IndexingSplitCache, start_indexing_service}; use quickwit_ingest::{ GetMemoryCapacity, IngestRequest, IngestRouter, IngestServiceClient, Ingester, IngesterPool, IngesterPoolEntry, LocalShardsUpdate, get_idle_shard_timeout, @@ -610,7 +610,27 @@ pub async fn serve_quickwit( .await .context("failed to initialize compaction service client")?; + // Build the indexing split cache once and share it between the indexing + // service and the compactor supervisor (when both run on the same node). + // A zero quota in `IndexerConfig` produces a no-op cache. + let indexing_split_cache_opt: Option> = if node_config + .is_service_enabled(QuickwitService::Indexer) + || node_config.is_service_enabled(QuickwitService::Compactor) + { + let cache = IndexingSplitCache::from_config( + &node_config.indexer_config, + &node_config.data_dir_path, + ) + .await?; + Some(Arc::new(cache)) + } else { + None + }; + let indexing_service_opt = if node_config.is_service_enabled(QuickwitService::Indexer) { + let split_cache = indexing_split_cache_opt + .clone() + .expect("indexing split cache must exist on indexer nodes"); let indexing_service = start_indexing_service( &universe, &node_config, @@ -620,6 +640,7 @@ pub async fn serve_quickwit( ingester_pool.clone(), storage_resolver.clone(), event_broker.clone(), + split_cache, ) .await .context("failed to start indexing service")?; @@ -797,6 +818,9 @@ pub async fn serve_quickwit( let compaction_client = compaction_service_client_opt .clone() .expect("compactor service enabled but no compaction client available"); + let split_cache = indexing_split_cache_opt + .clone() + .expect("indexing split cache must exist on compactor nodes"); let compactor_mailbox = start_compactor_service( &universe, cluster.self_node_id().into(), @@ -804,6 +828,7 @@ pub async fn serve_quickwit( &node_config.compactor_config, metastore_client.clone(), storage_resolver.clone(), + split_cache, event_broker.clone(), compaction_root_directory, ) From d611865786579327152c20d58709e6486e62c170 Mon Sep 17 00:00:00 2001 From: nadav-govari Date: Wed, 20 May 2026 14:07:02 -0400 Subject: [PATCH 14/21] Score merge operations like before; restructure MergeOperation (#6412) --- .../src/planner/compaction_planner.rs | 23 ++--- .../src/planner/compaction_state.rs | 18 ++-- .../quickwit-compaction/src/planner/mod.rs | 41 ++++---- .../src/actors/merge_scheduler_service.rs | 38 +------- .../quickwit-indexing/src/merge_policy/mod.rs | 93 ++++++++++++++++++- 5 files changed, 122 insertions(+), 91 deletions(-) diff --git a/quickwit/quickwit-compaction/src/planner/compaction_planner.rs b/quickwit/quickwit-compaction/src/planner/compaction_planner.rs index 637fe72017b..ff46948e429 100644 --- a/quickwit/quickwit-compaction/src/planner/compaction_planner.rs +++ b/quickwit/quickwit-compaction/src/planner/compaction_planner.rs @@ -27,7 +27,7 @@ use quickwit_proto::compaction::{ CompactionResult, MergeTaskAssignment, ReportStatusRequest, ReportStatusResponse, }; use quickwit_proto::metastore::{ListSplitsRequest, MetastoreService, MetastoreServiceClient}; -use quickwit_proto::types::{IndexUid, NodeId, SourceId}; +use quickwit_proto::types::NodeId; use time::OffsetDateTime; use tracing::{error, info}; use ulid::Ulid; @@ -190,20 +190,13 @@ impl CompactionPlanner { let pending = self.state.pop_pending(available_slots as usize); let mut assignments = Vec::with_capacity(pending.len()); - for (partition_key, operation) in pending { + for operation in pending { let task_id = Ulid::new().to_string(); - let Some(index_entry) = self.index_config_metastore.get(&partition_key.index_uid) - else { - error!(index_uid=%partition_key.index_uid, "index config not found for pending operation, skipping"); + let Some(index_entry) = self.index_config_metastore.get(&operation.index_uid) else { + error!(index_uid=%operation.index_uid, "index config not found for pending operation, skipping"); continue; }; - let assignment = build_task_assignment( - &task_id, - index_entry, - &operation, - &partition_key.index_uid, - &partition_key.source_id, - ); + let assignment = build_task_assignment(&task_id, index_entry, &operation); let split_ids = operation .splits_as_slice() @@ -237,8 +230,6 @@ fn build_task_assignment( task_id: &str, index_entry: &IndexEntry, operation: &MergeOperation, - index_uid: &IndexUid, - source_id: &SourceId, ) -> MergeTaskAssignment { MergeTaskAssignment { task_id: task_id.to_string(), @@ -253,8 +244,8 @@ fn build_task_assignment( search_settings_json: index_entry.search_settings_json(), indexing_settings_json: index_entry.indexing_settings_json(), retention_policy_json: index_entry.retention_policy_json(), - index_uid: Some(index_uid.clone()), - source_id: source_id.to_string(), + index_uid: Some(operation.index_uid.clone()), + source_id: operation.source_id.clone(), index_storage_uri: index_entry.index_storage_uri(), merge_level: operation.merge_level() as u64, } diff --git a/quickwit/quickwit-compaction/src/planner/compaction_state.rs b/quickwit/quickwit-compaction/src/planner/compaction_state.rs index 58cf85a5bb3..6f9dc2069c7 100644 --- a/quickwit/quickwit-compaction/src/planner/compaction_state.rs +++ b/quickwit/quickwit-compaction/src/planner/compaction_state.rs @@ -51,7 +51,6 @@ impl CompactionPartitionKey { #[derive(Debug)] struct InFlightCompaction { - task_id: TaskId, split_ids: Vec, node_id: NodeId, last_heartbeat: Instant, @@ -66,8 +65,6 @@ pub struct CompactionState { needs_compaction_split_ids: HashSet, in_flight: HashMap, in_flight_split_ids: HashSet, - /// TODO: add index_uid and source_id to MergeOperation so we don't need the partition key - /// here. pending_operations: PendingOperations, } @@ -123,8 +120,7 @@ impl CompactionState { self.in_flight_split_ids .insert(split.split_id().to_string()); } - self.pending_operations - .push(partition_key.clone(), operation); + self.pending_operations.push(operation); } } if splits.is_empty() { @@ -170,7 +166,6 @@ impl CompactionState { self.in_flight.insert( task.task_id.clone(), InFlightCompaction { - task_id: task.task_id.clone(), split_ids: task.split_ids.clone(), node_id: node_id.clone(), last_heartbeat: Instant::now(), @@ -201,8 +196,8 @@ impl CompactionState { } } - /// Pops up to `count` pending operations for assignment. - pub fn pop_pending(&mut self, count: usize) -> Vec<(CompactionPartitionKey, MergeOperation)> { + /// Pops up to `count` pending operations for assignment, highest-score first. + pub fn pop_pending(&mut self, count: usize) -> Vec { let count = count.min(self.pending_operations.len()); let mut operations = Vec::with_capacity(count); for _ in 0..count { @@ -214,9 +209,8 @@ impl CompactionState { /// Records that an operation has been assigned to a worker. pub fn record_assignment(&mut self, task_id: TaskId, split_ids: Vec, node_id: NodeId) { self.in_flight.insert( - task_id.clone(), + task_id, InFlightCompaction { - task_id, split_ids, node_id, last_heartbeat: Instant::now(), @@ -330,7 +324,7 @@ mod tests { // Splits moved from needs_compaction to in_flight. assert!(!state.pending_operations.is_empty()); - for (_, op) in state.pending_operations.iter() { + for op in state.pending_operations.iter() { for split in op.splits_as_slice() { assert!(!state.needs_compaction_split_ids.contains(split.split_id())); assert!(state.in_flight_split_ids.contains(split.split_id())); @@ -413,7 +407,7 @@ mod tests { let pending = state.pop_pending(1); assert_eq!(pending.len(), 1); - let (_, operation) = &pending[0]; + let operation = &pending[0]; let split_ids: Vec = operation .splits_as_slice() .iter() diff --git a/quickwit/quickwit-compaction/src/planner/mod.rs b/quickwit/quickwit-compaction/src/planner/mod.rs index 0fe49bd190e..18b408580d3 100644 --- a/quickwit/quickwit-compaction/src/planner/mod.rs +++ b/quickwit/quickwit-compaction/src/planner/mod.rs @@ -13,45 +13,43 @@ // limitations under the License. mod compaction_planner; -#[allow(dead_code)] mod compaction_state; -#[allow(dead_code)] mod index_config_metastore; pub(crate) mod metrics; -use std::collections::VecDeque; +use std::collections::BinaryHeap; pub use compaction_planner::CompactionPlanner; use quickwit_indexing::merge_policy::MergeOperation; -use crate::planner::compaction_state::CompactionPartitionKey; use crate::planner::metrics::COMPACTION_PLANNER_METRICS; use crate::source_uid_metrics_label; -/// Queue of merge operations awaiting assignment, with the -/// `pending_merge_operations` gauge maintained inline. Push/pop are the only -/// mutation paths so the metric stays consistent with `len()`. +/// Max-heap of merge operations awaiting assignment, ordered by +/// `MergeOperation`'s score-based `Ord`. The `pending_merge_operations` gauge +/// is maintained inline; push/pop are the only mutation paths so the metric +/// stays consistent with `len()`. #[derive(Debug)] struct PendingOperations { - inner: VecDeque<(CompactionPartitionKey, MergeOperation)>, + inner: BinaryHeap, } impl PendingOperations { fn new() -> Self { Self { - inner: VecDeque::new(), + inner: BinaryHeap::new(), } } - fn push(&mut self, partition_key: CompactionPartitionKey, operation: MergeOperation) { - Self::adjust_gauge(&partition_key, &operation, 1); - self.inner.push_back((partition_key, operation)); + fn push(&mut self, operation: MergeOperation) { + Self::adjust_gauge(&operation, 1); + self.inner.push(operation); } - fn pop(&mut self) -> Option<(CompactionPartitionKey, MergeOperation)> { - let (partition_key, operation) = self.inner.pop_front()?; - Self::adjust_gauge(&partition_key, &operation, -1); - Some((partition_key, operation)) + fn pop(&mut self) -> Option { + let operation = self.inner.pop()?; + Self::adjust_gauge(&operation, -1); + Some(operation) } fn len(&self) -> usize { @@ -64,17 +62,12 @@ impl PendingOperations { } #[cfg(test)] - fn iter(&self) -> impl Iterator { + fn iter(&self) -> impl Iterator { self.inner.iter() } - fn adjust_gauge( - partition_key: &CompactionPartitionKey, - operation: &MergeOperation, - delta: i64, - ) { - let source_uid_label = - source_uid_metrics_label(&partition_key.index_uid, &partition_key.source_id); + fn adjust_gauge(operation: &MergeOperation, delta: i64) { + let source_uid_label = source_uid_metrics_label(&operation.index_uid, &operation.source_id); let merge_level = operation.merge_level().to_string(); COMPACTION_PLANNER_METRICS .pending_merge_operations diff --git a/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs b/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs index bbe5267d514..fcdbad2c815 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs @@ -209,30 +209,12 @@ struct ScheduleMerge { split_downloader_mailbox: Mailbox, } -/// The higher, the sooner we will execute the merge operation. -/// A good merge operation -/// - strongly reduces the number splits -/// - is light. -fn score_merge_operation(merge_operation: &MergeOperation) -> u64 { - let total_num_bytes: u64 = merge_operation.total_num_bytes(); - if total_num_bytes == 0 { - // Silly corner case that should never happen. - return u64::MAX; - } - // We will remove splits.len() and add 1 merge splits. - let delta_num_splits = (merge_operation.splits.len() - 1) as u64; - // We use integer arithmetic to avoid `f64 are not ordered` silliness. - (delta_num_splits << 48) - .checked_div(total_num_bytes) - .unwrap_or(1u64) -} - impl ScheduleMerge { pub fn new( merge_operation: TrackedObject, split_downloader_mailbox: Mailbox, ) -> ScheduleMerge { - let score = score_merge_operation(&merge_operation); + let score = merge_operation.score; ScheduleMerge { score, merge_operation, @@ -315,24 +297,6 @@ mod tests { MergeOperation::new_merge_operation(splits) } - #[test] - fn test_score_merge_operation() { - let score_merge_operation_aux = |num_splits, num_bytes_per_split| { - let merge_operation = build_merge_operation(num_splits, num_bytes_per_split); - score_merge_operation(&merge_operation) - }; - assert!(score_merge_operation_aux(10, 10_000_000) < score_merge_operation_aux(10, 999_999)); - assert!( - score_merge_operation_aux(10, 10_000_000) > score_merge_operation_aux(9, 10_000_000) - ); - assert_eq!( - // 9 - 1 = 8 splits removed. - score_merge_operation_aux(9, 10_000_000), - // 5 - 1 = 4 splits removed. - score_merge_operation_aux(5, 10_000_000 * 9 / 10) - ); - } - #[tokio::test] async fn test_merge_schedule_service_prioritize() { let universe = Universe::new(); diff --git a/quickwit/quickwit-indexing/src/merge_policy/mod.rs b/quickwit/quickwit-indexing/src/merge_policy/mod.rs index 37bbf15edbc..f950c30635c 100644 --- a/quickwit/quickwit-indexing/src/merge_policy/mod.rs +++ b/quickwit/quickwit-indexing/src/merge_policy/mod.rs @@ -26,7 +26,7 @@ pub use nop_merge_policy::NopMergePolicy; use quickwit_config::IndexingSettings; use quickwit_config::merge_policy_config::MergePolicyConfig; use quickwit_metastore::{SplitMaturity, SplitMetadata}; -use quickwit_proto::types::SplitId; +use quickwit_proto::types::{IndexUid, SourceId, SplitId}; use serde::Serialize; pub(crate) use stable_log_merge_policy::StableLogMergePolicy; use tantivy::TrackedObject; @@ -83,20 +83,35 @@ pub struct MergeOperation { #[serde(skip_serializing)] pub merge_parent_span: Span, pub merge_split_id: SplitId, + pub index_uid: IndexUid, + pub source_id: SourceId, pub splits: Vec, pub operation_type: MergeOperationType, + /// Priority score, computed once at construction. Higher = run sooner. + /// `Ord`/`Eq` for `MergeOperation` are defined purely on this field so a + /// `BinaryHeap` is a max-heap by score. + pub score: u64, } impl MergeOperation { + /// All splits must belong to the same `(index_uid, source_id)` — a precondition + /// merge policies already satisfy because they partition before merging. pub fn new_merge_operation(splits: Vec) -> Self { + let first_split = splits.first().expect("merge operation must have splits"); + let index_uid = first_split.index_uid.clone(); + let source_id = first_split.source_id.clone(); let merge_split_id = new_split_id(); let split_ids = splits.iter().map(|split| split.split_id()).collect_vec(); let merge_parent_span = info_span!("merge", merge_split_id=%merge_split_id, split_ids=?split_ids, typ=%MergeOperationType::Merge); + let score = compute_score(&splits); Self { merge_parent_span, merge_split_id, + index_uid, + source_id, splits, operation_type: MergeOperationType::Merge, + score, } } @@ -108,13 +123,20 @@ impl MergeOperation { } pub fn new_delete_and_merge_operation(split: SplitMetadata) -> Self { + let index_uid = split.index_uid.clone(); + let source_id = split.source_id.clone(); let merge_split_id = new_split_id(); let merge_parent_span = info_span!("delete", merge_split_id=%merge_split_id, split_ids=?split.split_id(), typ=%MergeOperationType::DeleteAndMerge); + let splits = vec![split]; + let score = compute_score(&splits); Self { merge_parent_span, merge_split_id, - splits: vec![split], + index_uid, + source_id, + splits, operation_type: MergeOperationType::DeleteAndMerge, + score, } } @@ -131,6 +153,49 @@ impl MergeOperation { } } +/// The higher, the sooner we will execute the merge operation. +/// A good merge operation: +/// - strongly reduces the number of splits +/// - is light. +fn compute_score(splits: &[SplitMetadata]) -> u64 { + let total_num_bytes: u64 = splits.iter().map(|split| split.footer_offsets.end).sum(); + if total_num_bytes == 0 { + // Silly corner case that should never happen. + return u64::MAX; + } + // We will remove splits.len() and add 1 merge split. + let delta_num_splits = (splits.len() - 1) as u64; + // We use integer arithmetic to avoid `f64 are not ordered` silliness. + (delta_num_splits << 48) + .checked_div(total_num_bytes) + .unwrap_or(1u64) +} + +impl PartialEq for MergeOperation { + fn eq(&self, other: &Self) -> bool { + self.merge_split_id == other.merge_split_id + } +} + +impl Eq for MergeOperation {} + +impl PartialOrd for MergeOperation { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for MergeOperation { + /// The way we reason about ordering merge operations is that a highest score should take + /// precedence over a lower score; by that logic, score already serves the Ord property; we + /// formalize that here. + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.score + .cmp(&other.score) + .then_with(|| self.merge_split_id.cmp(&other.merge_split_id)) + } +} + impl fmt::Debug for MergeOperation { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( @@ -241,6 +306,30 @@ pub mod tests { }; use crate::models::{NewSplits, create_split_metadata}; + #[test] + fn test_score() { + fn op(num_splits: usize, num_bytes_per_split: u64) -> MergeOperation { + let splits: Vec = (0..num_splits) + .map(|_| SplitMetadata { + footer_offsets: 0..num_bytes_per_split, + ..Default::default() + }) + .collect(); + MergeOperation::new_merge_operation(splits) + } + // Lighter merge (smaller total bytes) at the same split count scores higher. + assert!(op(10, 10_000_000).score < op(10, 999_999).score); + // More splits removed at the same total bytes scores higher. + assert!(op(10, 10_000_000).score > op(9, 10_000_000).score); + // Score is `(delta_splits << 48) / total_bytes` — equal ratios yield equal scores. + assert_eq!( + // 9 splits, 90M bytes → delta=8. + op(9, 10_000_000).score, + // 5 splits, 45M bytes → delta=4 (same 8/90M ratio). + op(5, 10_000_000 * 9 / 10).score, + ); + } + fn pow_of_10(n: usize) -> usize { 10usize.pow(n as u32) } From 8ff8744b678284afe5998054011b14fa0ca0b174 Mon Sep 17 00:00:00 2001 From: nadav-govari Date: Wed, 20 May 2026 14:09:21 -0400 Subject: [PATCH 15/21] Performance improvements: multipart upload, parallel gets, spans (#6451) --- .../src/compaction_pipeline.rs | 1 + .../src/compactor_supervisor.rs | 3 +- .../src/planner/compaction_planner.rs | 1 + .../src/planner/compaction_state.rs | 2 +- .../src/actors/merge_split_downloader.rs | 48 +++++++++---------- ...its-published-maturity-timestamp.down.sql} | 2 +- ...plits-published-maturity-timestamp.up.sql} | 2 +- quickwit/quickwit-storage/src/metrics.rs | 19 ++++++-- .../src/object_storage/policy.rs | 7 +-- .../object_storage/s3_compatible_storage.rs | 1 + 10 files changed, 51 insertions(+), 35 deletions(-) rename quickwit/quickwit-metastore/migrations/postgresql/{27_create-index-splits-published-maturity-timestamp.down.sql => 29_create-index-splits-published-maturity-timestamp.down.sql} (95%) rename quickwit/quickwit-metastore/migrations/postgresql/{27_create-index-splits-published-maturity-timestamp.up.sql => 29_create-index-splits-published-maturity-timestamp.up.sql} (97%) diff --git a/quickwit/quickwit-compaction/src/compaction_pipeline.rs b/quickwit/quickwit-compaction/src/compaction_pipeline.rs index a35017384a0..f3095564795 100644 --- a/quickwit/quickwit-compaction/src/compaction_pipeline.rs +++ b/quickwit/quickwit-compaction/src/compaction_pipeline.rs @@ -26,6 +26,7 @@ use quickwit_indexing::actors::{ MergeExecutor, MergeSplitDownloader, Packager, Publisher, Uploader, UploaderType, }; use quickwit_indexing::merge_policy::MergeOperation; +use quickwit_indexing::metrics::INDEXER_METRICS; use quickwit_indexing::{IndexingSplitStore, PublisherType, SplitsUpdateMailbox}; use quickwit_proto::indexing::MergePipelineId; use quickwit_proto::metastore::MetastoreServiceClient; diff --git a/quickwit/quickwit-compaction/src/compactor_supervisor.rs b/quickwit/quickwit-compaction/src/compactor_supervisor.rs index 52da22c4951..bb570505db7 100644 --- a/quickwit/quickwit-compaction/src/compactor_supervisor.rs +++ b/quickwit/quickwit-compaction/src/compactor_supervisor.rs @@ -124,6 +124,8 @@ impl CompactorSupervisor { error!(%task_id, %error, "failed to spawn compaction task"); } } + let available_slots = self.pipelines.iter().filter(|pipeline_opt| pipeline_opt.is_none()).count(); + COMPACTOR_METRICS.available_slots.set(available_slots as i64); } async fn spawn_task( @@ -221,7 +223,6 @@ impl CompactorSupervisor { .filter(|s| matches!(s.status, PipelineStatus::InProgress)) .count(); let available_slots = (self.pipelines.len() - in_progress_count) as i64; - COMPACTOR_METRICS.available_slots.set(available_slots); let mut in_progress = Vec::new(); let mut successes = Vec::new(); diff --git a/quickwit/quickwit-compaction/src/planner/compaction_planner.rs b/quickwit/quickwit-compaction/src/planner/compaction_planner.rs index ff46948e429..7b30c2174df 100644 --- a/quickwit/quickwit-compaction/src/planner/compaction_planner.rs +++ b/quickwit/quickwit-compaction/src/planner/compaction_planner.rs @@ -41,6 +41,7 @@ use crate::planner::metrics::COMPACTION_PLANNER_METRICS; /// exists. Splits beyond this cap aren't lost -- they bubble into range as the front of the queue /// is merged off. const SCAN_PAGE_SIZE: usize = 5_000; + #[derive(Debug)] pub struct CompactionPlanner { state: CompactionState, diff --git a/quickwit/quickwit-compaction/src/planner/compaction_state.rs b/quickwit/quickwit-compaction/src/planner/compaction_state.rs index 6f9dc2069c7..4e0616cdfc1 100644 --- a/quickwit/quickwit-compaction/src/planner/compaction_state.rs +++ b/quickwit/quickwit-compaction/src/planner/compaction_state.rs @@ -115,7 +115,7 @@ impl CompactionState { break; } for operation in operations { - for split in operation.splits_as_slice() { + for split in &operation.splits { self.needs_compaction_split_ids.remove(split.split_id()); self.in_flight_split_ids .insert(split.split_id().to_string()); diff --git a/quickwit/quickwit-indexing/src/actors/merge_split_downloader.rs b/quickwit/quickwit-indexing/src/actors/merge_split_downloader.rs index 6b1bcc0ddb9..1911ecc0363 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_split_downloader.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_split_downloader.rs @@ -126,32 +126,30 @@ impl MergeSplitDownloader { download_directory: &Path, ctx: &ActorContext, ) -> Result>, quickwit_actors::ActorExitStatus> { - // we download all of the split files in the scratch directory. - let mut tantivy_dirs = Vec::new(); - for split in splits { - if ctx.kill_switch().is_dead() { - debug!( - split_id = split.split_id(), - "Kill switch was activated. Cancelling download." - ); - return Err(ActorExitStatus::Killed); - } - let io_controls = self - .io_controls - .clone() - .set_progress(ctx.progress().clone()) - .set_kill_switch(ctx.kill_switch().clone()); - let _protect_guard = ctx.protect_zone(); - let tantivy_dir = self - .split_store - .fetch_and_open_split(split.split_id(), download_directory, &io_controls) - .await - .map_err(|error| { - let split_id = split.split_id(); - anyhow::anyhow!(error).context(format!("failed to download split `{split_id}`")) - })?; - tantivy_dirs.push(tantivy_dir); + if ctx.kill_switch().is_dead() { + debug!("kill switch was activated, cancelling download"); + return Err(ActorExitStatus::Killed); } + let io_controls = self + .io_controls + .clone() + .set_progress(ctx.progress().clone()) + .set_kill_switch(ctx.kill_switch().clone()); + let _protect_guard = ctx.protect_zone(); + let download_futures = splits.iter().map(|split| { + let io_controls = io_controls.clone(); + async move { + self.split_store + .fetch_and_open_split(split.split_id(), download_directory, &io_controls) + .await + .map_err(|error| { + let split_id = split.split_id(); + anyhow::anyhow!(error) + .context(format!("failed to download split `{split_id}`")) + }) + } + }); + let tantivy_dirs = futures::future::try_join_all(download_futures).await?; Ok(tantivy_dirs) } } diff --git a/quickwit/quickwit-metastore/migrations/postgresql/27_create-index-splits-published-maturity-timestamp.down.sql b/quickwit/quickwit-metastore/migrations/postgresql/29_create-index-splits-published-maturity-timestamp.down.sql similarity index 95% rename from quickwit/quickwit-metastore/migrations/postgresql/27_create-index-splits-published-maturity-timestamp.down.sql rename to quickwit/quickwit-metastore/migrations/postgresql/29_create-index-splits-published-maturity-timestamp.down.sql index e4568958939..c8bd22af8c3 100644 --- a/quickwit/quickwit-metastore/migrations/postgresql/27_create-index-splits-published-maturity-timestamp.down.sql +++ b/quickwit/quickwit-metastore/migrations/postgresql/29_create-index-splits-published-maturity-timestamp.down.sql @@ -1,4 +1,4 @@ -- no-transaction -- CONCURRENTLY matches the up migration: avoids blocking writes on `splits` -- while the index is being dropped. Cannot run inside a transaction. -DROP INDEX CONCURRENTLY IF EXISTS splits_published_maturity_timestamp_idx; \ No newline at end of file +DROP INDEX CONCURRENTLY IF EXISTS splits_published_maturity_timestamp_idx; diff --git a/quickwit/quickwit-metastore/migrations/postgresql/27_create-index-splits-published-maturity-timestamp.up.sql b/quickwit/quickwit-metastore/migrations/postgresql/29_create-index-splits-published-maturity-timestamp.up.sql similarity index 97% rename from quickwit/quickwit-metastore/migrations/postgresql/27_create-index-splits-published-maturity-timestamp.up.sql rename to quickwit/quickwit-metastore/migrations/postgresql/29_create-index-splits-published-maturity-timestamp.up.sql index 0d9fa0f5cff..f6a4c7a53d0 100644 --- a/quickwit/quickwit-metastore/migrations/postgresql/27_create-index-splits-published-maturity-timestamp.up.sql +++ b/quickwit/quickwit-metastore/migrations/postgresql/29_create-index-splits-published-maturity-timestamp.up.sql @@ -21,4 +21,4 @@ -- transaction block, hence the `-- no-transaction` directive above. CREATE INDEX CONCURRENTLY IF NOT EXISTS splits_published_maturity_timestamp_idx ON splits (maturity_timestamp, split_id) - WHERE split_state = 'Published'; \ No newline at end of file + WHERE split_state = 'Published'; diff --git a/quickwit/quickwit-storage/src/metrics.rs b/quickwit/quickwit-storage/src/metrics.rs index 8b439bfaeaa..8ce8299569c 100644 --- a/quickwit/quickwit-storage/src/metrics.rs +++ b/quickwit/quickwit-storage/src/metrics.rs @@ -19,8 +19,8 @@ use std::sync::RwLock; use once_cell::sync::Lazy; use quickwit_common::metrics::{ - GaugeGuard, Histogram, IntCounter, IntCounterVec, IntGauge, new_counter, new_counter_vec, - new_gauge, new_histogram_vec, + GaugeGuard, Histogram, IntCounter, IntCounterVec, IntGauge, exponential_buckets, new_counter, + new_counter_vec, new_gauge, new_histogram_vec, }; use quickwit_config::CacheConfig; @@ -48,6 +48,9 @@ pub struct StorageMetrics { pub object_storage_bulk_delete_requests_total: IntCounter, pub object_storage_delete_request_duration: Histogram, pub object_storage_bulk_delete_request_duration: Histogram, + pub object_storage_get_object_duration: Histogram, + pub object_storage_put_object_duration: Histogram, + pub object_storage_upload_part_duration: Histogram, } impl Default for StorageMetrics { @@ -86,12 +89,19 @@ impl Default for StorageMetrics { "storage", &[], ["action"], - vec![0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0], + exponential_buckets(0.001, 2.0, 18) + .expect("object storage duration buckets should be valid"), ); let object_storage_delete_request_duration = object_storage_request_duration.with_label_values(["delete_object"]); let object_storage_bulk_delete_request_duration = object_storage_request_duration.with_label_values(["delete_objects"]); + let object_storage_get_object_duration = + object_storage_request_duration.with_label_values(["get_object"]); + let object_storage_put_object_duration = + object_storage_request_duration.with_label_values(["put_object"]); + let object_storage_upload_part_duration = + object_storage_request_duration.with_label_values(["upload_part"]); StorageMetrics { fast_field_cache: CacheMetrics::for_component("fastfields"), @@ -159,6 +169,9 @@ impl Default for StorageMetrics { object_storage_bulk_delete_requests_total, object_storage_delete_request_duration, object_storage_bulk_delete_request_duration, + object_storage_get_object_duration, + object_storage_put_object_duration, + object_storage_upload_part_duration, } } } diff --git a/quickwit/quickwit-storage/src/object_storage/policy.rs b/quickwit/quickwit-storage/src/object_storage/policy.rs index 6ce48ab7a94..797cd612029 100644 --- a/quickwit/quickwit-storage/src/object_storage/policy.rs +++ b/quickwit/quickwit-storage/src/object_storage/policy.rs @@ -67,9 +67,10 @@ impl MultiPartPolicy { impl Default for MultiPartPolicy { fn default() -> Self { MultiPartPolicy { - // S3 limits part size from 5M to 5GB, we want to end up with as few parts as possible - // since each part is charged as a put request. - target_part_num_bytes: 5_000_000_000, // 5GB + /// We want to balance time spent waiting on uploads while still being careful about + /// not hammering S3 Puts, which can be expensive. + /// TODO: Dynamic multipart policy. + target_part_num_bytes: 512 * 1_024 * 1_024, // 5GB multipart_threshold_num_bytes: 128 * 1_024 * 1_024, // 128 MiB max_num_parts: 10_000, max_object_num_bytes: 5_000_000_000_000u64, // S3 allows up to 5TB objects diff --git a/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs b/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs index 5214b2bc37f..0b58858b786 100644 --- a/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs +++ b/quickwit/quickwit-storage/src/object_storage/s3_compatible_storage.rs @@ -409,6 +409,7 @@ impl S3CompatibleObjectStorage { Ok(delete_requests) } + #[tracing::instrument(skip_all, fields(part_number = part.part_number, num_bytes=part.len()))] async fn upload_part<'a>( &'a self, upload_id: MultipartUploadId, From e2e5d6adb988978add3ce4d2a8c0b697a7353195 Mon Sep 17 00:00:00 2001 From: nadav-govari Date: Wed, 20 May 2026 14:10:55 -0400 Subject: [PATCH 16/21] Exclude known split ids; double merge concurrency (#6452) --- quickwit/Cargo.toml | 1 + .../src/compaction_pipeline.rs | 8 ++- .../src/compactor_supervisor.rs | 49 ++++++++++----- quickwit/quickwit-compaction/src/lib.rs | 2 +- .../src/planner/compaction_planner.rs | 53 ++++++++++++++++- .../src/planner/compaction_state.rs | 16 +++++ .../quickwit-config/src/node_config/mod.rs | 16 ++--- quickwit/quickwit-indexing/failpoints/mod.rs | 1 + .../src/actors/merge_executor.rs | 18 ++++++ .../src/actors/merge_pipeline.rs | 1 + .../src/actors/delete_task_pipeline.rs | 1 + .../file_backed/file_backed_index/mod.rs | 8 +++ .../quickwit-metastore/src/metastore/mod.rs | 14 +++++ .../src/metastore/postgres/metastore.rs | 59 ++++++++++++------- .../src/metastore/postgres/utils.rs | 24 ++++++-- .../src/object_storage/policy.rs | 8 +-- 16 files changed, 222 insertions(+), 57 deletions(-) diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml index 9d8cf833dc3..362239948a9 100644 --- a/quickwit/Cargo.toml +++ b/quickwit/Cargo.toml @@ -224,6 +224,7 @@ sea-query = { version = "0.32" } sea-query-binder = { version = "0.7", features = [ "runtime-tokio-rustls", "sqlx-postgres", + "postgres-array", ] } # ^1.0.184 due to serde-rs/serde#2538 serde = { version = "1.0.228", features = ["derive", "rc"] } diff --git a/quickwit/quickwit-compaction/src/compaction_pipeline.rs b/quickwit/quickwit-compaction/src/compaction_pipeline.rs index f3095564795..4206ebec935 100644 --- a/quickwit/quickwit-compaction/src/compaction_pipeline.rs +++ b/quickwit/quickwit-compaction/src/compaction_pipeline.rs @@ -26,11 +26,11 @@ use quickwit_indexing::actors::{ MergeExecutor, MergeSplitDownloader, Packager, Publisher, Uploader, UploaderType, }; use quickwit_indexing::merge_policy::MergeOperation; -use quickwit_indexing::metrics::INDEXER_METRICS; use quickwit_indexing::{IndexingSplitStore, PublisherType, SplitsUpdateMailbox}; use quickwit_proto::indexing::MergePipelineId; use quickwit_proto::metastore::MetastoreServiceClient; use quickwit_proto::types::{IndexUid, SourceId, SplitId}; +use tokio::sync::Semaphore; use tracing::{error, info}; use crate::metrics::COMPACTOR_METRICS; @@ -105,6 +105,7 @@ pub struct CompactionPipeline { io_throughput_limiter: Option, max_concurrent_split_uploads: usize, event_broker: EventBroker, + merge_execution_semaphore: Arc, pipeline_start: Option, } @@ -123,6 +124,7 @@ impl CompactionPipeline { io_throughput_limiter: Option, max_concurrent_split_uploads: usize, event_broker: EventBroker, + merge_execution_semaphore: Arc, ) -> Self { CompactionPipeline { task_id, @@ -140,6 +142,7 @@ impl CompactionPipeline { io_throughput_limiter, max_concurrent_split_uploads, event_broker, + merge_execution_semaphore, pipeline_start: None, } } @@ -331,6 +334,7 @@ impl CompactionPipeline { self.doc_mapper.clone(), merge_executor_io_controls, merge_packager_mailbox, + Some(self.merge_execution_semaphore.clone()), ); let (merge_executor_mailbox, merge_executor_handle) = spawn_ctx .spawn_builder() @@ -386,6 +390,7 @@ pub(crate) mod tests { use quickwit_proto::metastore::{MetastoreServiceClient, MockMetastoreService}; use quickwit_proto::types::{IndexUid, NodeId}; use quickwit_storage::RamStorage; + use tokio::sync::Semaphore; use super::{CompactionPipeline, PipelineStatus}; @@ -416,6 +421,7 @@ pub(crate) mod tests { None, 2, EventBroker::default(), + Arc::new(Semaphore::new(2)), ) } diff --git a/quickwit/quickwit-compaction/src/compactor_supervisor.rs b/quickwit/quickwit-compaction/src/compactor_supervisor.rs index bb570505db7..af1f3bab115 100644 --- a/quickwit/quickwit-compaction/src/compactor_supervisor.rs +++ b/quickwit/quickwit-compaction/src/compactor_supervisor.rs @@ -35,6 +35,7 @@ use quickwit_proto::indexing::MergePipelineId; use quickwit_proto::metastore::MetastoreServiceClient; use quickwit_proto::types::NodeId; use quickwit_storage::StorageResolver; +use tokio::sync::Semaphore; use tracing::{error, info}; use crate::compaction_pipeline::{CompactionPipeline, PipelineStatus, PipelineStatusUpdate}; @@ -54,7 +55,10 @@ pub struct CompactorSupervisor { node_id: NodeId, planner_client: CompactionPlannerServiceClient, pipelines: Vec>, - + // Bounds the number of pipelines running Tantivy merge step concurrently. There are 2x as many + // pipeline slots as permits, so half the pipelines can be doing IO while the other half hold a + // permit. + merge_execution_semaphore: Arc, // Shared resources distributed to pipelines when spawning actor chains. io_throughput_limiter: Option, metastore: MetastoreServiceClient, @@ -62,7 +66,6 @@ pub struct CompactorSupervisor { split_cache: Arc, max_concurrent_split_uploads: usize, event_broker: EventBroker, - // Scratch directory root (/compaction/). compaction_root_directory: TempDirectory, } @@ -72,7 +75,7 @@ impl CompactorSupervisor { pub fn new( node_id: NodeId, planner_client: CompactionPlannerServiceClient, - num_pipeline_slots: usize, + max_concurrent_merge_executions: usize, io_throughput_limiter: Option, metastore: MetastoreServiceClient, storage_resolver: StorageResolver, @@ -81,11 +84,14 @@ impl CompactorSupervisor { event_broker: EventBroker, compaction_root_directory: TempDirectory, ) -> Self { + let num_pipeline_slots = max_concurrent_merge_executions * 2; let pipelines = (0..num_pipeline_slots).map(|_| None).collect(); + let merge_execution_semaphore = Arc::new(Semaphore::new(max_concurrent_merge_executions)); CompactorSupervisor { node_id, planner_client, pipelines, + merge_execution_semaphore, io_throughput_limiter, metastore, storage_resolver, @@ -124,8 +130,14 @@ impl CompactorSupervisor { error!(%task_id, %error, "failed to spawn compaction task"); } } - let available_slots = self.pipelines.iter().filter(|pipeline_opt| pipeline_opt.is_none()).count(); - COMPACTOR_METRICS.available_slots.set(available_slots as i64); + let available_slots = self + .pipelines + .iter() + .filter(|pipeline_opt| pipeline_opt.is_none()) + .count(); + COMPACTOR_METRICS + .available_slots + .set(available_slots as i64); } async fn spawn_task( @@ -211,6 +223,7 @@ impl CompactorSupervisor { self.io_throughput_limiter.clone(), self.max_concurrent_split_uploads, self.event_broker.clone(), + self.merge_execution_semaphore.clone(), )) } @@ -324,14 +337,16 @@ mod tests { use super::*; use crate::compaction_pipeline::tests::test_pipeline; - fn test_supervisor(num_slots: usize) -> CompactorSupervisor { + /// Builds a test supervisor with `max_concurrent_merge_executions` permits + /// and `2 * max_concurrent_merge_executions` pipeline slots. + fn test_supervisor(max_concurrent_merge_executions: usize) -> CompactorSupervisor { let metastore = MetastoreServiceClient::from_mock(MockMetastoreService::new()); let compaction_client = CompactionPlannerServiceClient::from_mock(MockCompactionPlannerService::new()); CompactorSupervisor::new( NodeId::from("test-node"), compaction_client, - num_slots, + max_concurrent_merge_executions, None, metastore, StorageResolver::for_test(), @@ -374,6 +389,7 @@ mod tests { #[tokio::test] async fn test_end_to_end_statuses_to_proto() { let universe = Universe::new(); + // 3 merge-executions → 6 pipeline slots let mut supervisor = test_supervisor(3); let mut pipeline = test_pipeline("task-1", &["s1", "s2"]); @@ -384,8 +400,8 @@ mod tests { let request = supervisor.build_report_status_request(&statuses); assert_eq!(request.node_id, "test-node"); - // 3 slots, 1 in-progress = 2 available - assert_eq!(request.available_slots, 2); + // 6 slots, 1 in-progress = 5 available + assert_eq!(request.available_slots, 5); assert_eq!(request.in_progress.len(), 1); assert_eq!(request.in_progress[0].task_id, "task-1"); assert_eq!(request.in_progress[0].split_ids, vec!["s1", "s2"]); @@ -396,10 +412,11 @@ mod tests { #[test] fn test_build_report_status_request_empty() { + // 4 merge-executions → 8 pipeline slots let supervisor = test_supervisor(4); let request = supervisor.build_report_status_request(&[]); assert_eq!(request.node_id, "test-node"); - assert_eq!(request.available_slots, 4); + assert_eq!(request.available_slots, 8); assert!(request.in_progress.is_empty()); assert!(request.successes.is_empty()); assert!(request.failures.is_empty()); @@ -499,7 +516,8 @@ mod tests { #[tokio::test] async fn test_spawn_task_fails_when_all_slots_occupied() { let universe = Universe::new(); - let mut supervisor = test_supervisor(2); + // 1 merge-execution → 2 pipeline slots + let mut supervisor = test_supervisor(1); supervisor .spawn_task(test_assignment("task-1", &["s1"]), universe.spawn_ctx()) @@ -538,6 +556,7 @@ mod tests { let metastore = MetastoreServiceClient::from_mock(MockMetastoreService::new()); let client = CompactionPlannerServiceClient::from_mock(mock); + // 3 merge-executions → 6 pipeline slots let mut supervisor = CompactorSupervisor::new( NodeId::from("test-node"), client, @@ -554,7 +573,7 @@ mod tests { // Simulate what the handler does: collect statuses, report, process response. let statuses = supervisor.check_pipeline_statuses(); let request = supervisor.build_report_status_request(&statuses); - assert_eq!(request.available_slots, 3); + assert_eq!(request.available_slots, 6); let response = supervisor .planner_client @@ -571,13 +590,14 @@ mod tests { assert_eq!(request.in_progress.len(), 1); assert_eq!(request.in_progress[0].task_id, "planner-task-1"); assert_eq!(request.in_progress[0].split_ids.len(), 2); - assert_eq!(request.available_slots, 2); + assert_eq!(request.available_slots, 5); universe.assert_quit().await; } #[test] fn test_build_report_status_request_mixed_statuses() { + // 4 merge-executions → 8 pipeline slots let supervisor = test_supervisor(4); let statuses = vec![ PipelineStatusUpdate { @@ -607,7 +627,8 @@ mod tests { let request = supervisor.build_report_status_request(&statuses); - assert_eq!(request.available_slots, 3); + // 8 slots, 1 in-progress = 7 available + assert_eq!(request.available_slots, 7); assert_eq!(request.in_progress.len(), 1); assert_eq!(request.in_progress[0].task_id, "task-1"); assert_eq!(request.in_progress[0].split_ids, vec!["s1", "s2"]); diff --git a/quickwit/quickwit-compaction/src/lib.rs b/quickwit/quickwit-compaction/src/lib.rs index 5307f267932..21997b3ec5e 100644 --- a/quickwit/quickwit-compaction/src/lib.rs +++ b/quickwit/quickwit-compaction/src/lib.rs @@ -57,7 +57,7 @@ pub async fn start_compactor_service( let supervisor = CompactorSupervisor::new( node_id, compaction_client, - compactor_config.max_concurrent_pipelines.get(), + compactor_config.max_concurrent_merge_executions.get(), io_throughput_limiter, metastore, storage_resolver, diff --git a/quickwit/quickwit-compaction/src/planner/compaction_planner.rs b/quickwit/quickwit-compaction/src/planner/compaction_planner.rs index 7b30c2174df..1ea2d119711 100644 --- a/quickwit/quickwit-compaction/src/planner/compaction_planner.rs +++ b/quickwit/quickwit-compaction/src/planner/compaction_planner.rs @@ -37,11 +37,15 @@ use super::index_config_metastore::{IndexConfigMetastore, IndexEntry}; use crate::planner::metrics::COMPACTION_PLANNER_METRICS; /// Cap on splits fetched per tick. Every tick, the planner re-scans the immature published set, -/// sorted by `maturity_timestamp` ASC so the most-urgent splits are processed first when a backlog +/// sorted by `maturity_timestamp` ASC so the most-urgent splits are processed first when a backlog /// exists. Splits beyond this cap aren't lost -- they bubble into range as the front of the queue /// is merged off. const SCAN_PAGE_SIZE: usize = 5_000; +/// Cap on the size of the `excluded_split_ids` list we send to the metastore. +/// It's a sanity max rather than some invariant. +const MAX_EXCLUDED_SPLIT_IDS: usize = 50_000; + #[derive(Debug)] pub struct CompactionPlanner { state: CompactionState, @@ -140,11 +144,13 @@ impl CompactionPlanner { } async fn scan_metastore(&self) -> Result> { + let excluded_split_ids = self.state.tracked_split_ids(MAX_EXCLUDED_SPLIT_IDS); let query = ListSplitsQuery::for_all_indexes() .with_split_state(SplitState::Published) .retain_immature(OffsetDateTime::now_utc()) .sort_by_maturity_timestamp() - .with_limit(SCAN_PAGE_SIZE); + .with_limit(SCAN_PAGE_SIZE) + .with_excluded_split_ids(excluded_split_ids); let request = ListSplitsRequest::try_from_list_splits_query(&query)?; let splits = self .metastore @@ -266,7 +272,7 @@ mod tests { IndexMetadata, IndexMetadataResponseExt, ListSplitsRequestExt, ListSplitsResponseExt, SortBy, Split, SplitMaturity, SplitMetadata, SplitState, }; - use quickwit_proto::compaction::CompactionSuccess; + use quickwit_proto::compaction::{CompactionInProgress, CompactionSuccess}; use quickwit_proto::metastore::{ IndexMetadataResponse, ListSplitsResponse, MetastoreError, MockMetastoreService, }; @@ -347,6 +353,10 @@ mod tests { assert_eq!(query.update_timestamp.start, Bound::Unbounded); assert_eq!(query.update_timestamp.end, Bound::Unbounded); + // No tracked splits → empty exclude list. The non-empty case is + // covered by `test_scan_metastore_excludes_tracked_splits`. + assert!(query.excluded_split_ids.is_empty()); + let response = ListSplitsResponse::try_from_splits(returned_clone.clone()).unwrap(); Ok(ServiceStream::from(vec![Ok(response)])) }); @@ -359,6 +369,43 @@ mod tests { assert_eq!(result[1].split_metadata.split_id, "b"); } + #[tokio::test] + async fn test_scan_metastore_excludes_tracked_splits() { + let index_uid = IndexUid::for_test("test-index", 0); + + let mut mock = MockMetastoreService::new(); + mock.expect_list_splits().returning(move |req| { + let query = req.deserialize_list_splits_query().unwrap(); + let mut excluded = query.excluded_split_ids.clone(); + excluded.sort(); + assert_eq!( + excluded, + vec!["in-flight".to_string(), "tracked".to_string()] + ); + + let response = ListSplitsResponse::try_from_splits(Vec::new()).unwrap(); + Ok(ServiceStream::from(vec![Ok(response)])) + }); + + let mut planner = CompactionPlanner::new(MetastoreServiceClient::from_mock(mock)); + planner.state.track_split(SplitMetadata { + split_id: "tracked".to_string(), + index_uid: index_uid.clone(), + ..Default::default() + }); + planner.state.update_heartbeats( + &NodeId::from("test-node"), + &[CompactionInProgress { + task_id: "t-1".to_string(), + index_uid: Some(index_uid.clone()), + source_id: "src".to_string(), + split_ids: vec!["in-flight".to_string()], + }], + ); + + planner.scan_metastore().await.unwrap(); + } + #[tokio::test] async fn test_ingest_splits_dedups_and_skips_mature() { let index_metadata = test_index_metadata(); diff --git a/quickwit/quickwit-compaction/src/planner/compaction_state.rs b/quickwit/quickwit-compaction/src/planner/compaction_state.rs index 4e0616cdfc1..bd8cecc82e9 100644 --- a/quickwit/quickwit-compaction/src/planner/compaction_state.rs +++ b/quickwit/quickwit-compaction/src/planner/compaction_state.rs @@ -85,6 +85,22 @@ impl CompactionState { || self.in_flight_split_ids.contains(split_id) } + /// All split IDs the planner is currently tracking. Used by the metastore + /// scan to exclude already-known splits so each tick returns fresh work. + pub fn tracked_split_ids(&self, max_size: usize) -> Vec { + let mut out = Vec::with_capacity( + (self.needs_compaction_split_ids.len() + self.in_flight_split_ids.len()).min(max_size), + ); + out.extend( + self.needs_compaction_split_ids + .iter() + .chain(self.in_flight_split_ids.iter()) + .take(max_size) + .cloned(), + ); + out + } + /// Adds a split to the needs_compaction set. pub fn track_split(&mut self, split: SplitMetadata) { let split_id = split.split_id().to_string(); diff --git a/quickwit/quickwit-config/src/node_config/mod.rs b/quickwit/quickwit-config/src/node_config/mod.rs index 71eaa16c1d7..cfc7387a5d7 100644 --- a/quickwit/quickwit-config/src/node_config/mod.rs +++ b/quickwit/quickwit-config/src/node_config/mod.rs @@ -230,9 +230,10 @@ impl Default for IndexerConfig { #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub struct CompactorConfig { - /// Maximum number of concurrent merge pipelines. Defaults to CPU count. - #[serde(default = "CompactorConfig::default_max_concurrent_pipelines")] - pub max_concurrent_pipelines: NonZeroUsize, + /// Maximum number of concurrent merges, which hold the CPU for + /// a long time. Defaults to `num_cpus - 1`. + #[serde(default = "CompactorConfig::default_max_concurrent_merge_executions")] + pub max_concurrent_merge_executions: NonZeroUsize, /// Maximum number of concurrent split uploads across all pipelines. #[serde(default = "CompactorConfig::default_max_concurrent_split_uploads")] pub max_concurrent_split_uploads: usize, @@ -242,8 +243,9 @@ pub struct CompactorConfig { } impl CompactorConfig { - fn default_max_concurrent_pipelines() -> NonZeroUsize { - NonZeroUsize::new(quickwit_common::num_cpus()).unwrap_or(NonZeroUsize::MIN) + fn default_max_concurrent_merge_executions() -> NonZeroUsize { + let cpus = quickwit_common::num_cpus().saturating_sub(1); + NonZeroUsize::new(cpus).unwrap_or(NonZeroUsize::MIN) } fn default_max_concurrent_split_uploads() -> usize { @@ -253,7 +255,7 @@ impl CompactorConfig { #[cfg(any(test, feature = "testsuite"))] pub fn for_test() -> Self { CompactorConfig { - max_concurrent_pipelines: NonZeroUsize::new(2).unwrap(), + max_concurrent_merge_executions: NonZeroUsize::new(2).unwrap(), max_concurrent_split_uploads: 4, max_merge_write_throughput: None, } @@ -263,7 +265,7 @@ impl CompactorConfig { impl Default for CompactorConfig { fn default() -> Self { Self { - max_concurrent_pipelines: Self::default_max_concurrent_pipelines(), + max_concurrent_merge_executions: Self::default_max_concurrent_merge_executions(), max_concurrent_split_uploads: Self::default_max_concurrent_split_uploads(), max_merge_write_throughput: None, } diff --git a/quickwit/quickwit-indexing/failpoints/mod.rs b/quickwit/quickwit-indexing/failpoints/mod.rs index b68245ff718..25b6522851b 100644 --- a/quickwit/quickwit-indexing/failpoints/mod.rs +++ b/quickwit/quickwit-indexing/failpoints/mod.rs @@ -308,6 +308,7 @@ async fn test_merge_executor_controlled_directory_kill_switch() -> anyhow::Resul doc_mapper, io_controls, merge_packager_mailbox, + None, ); let (merge_executor_mailbox, _merge_executor_handle) = diff --git a/quickwit/quickwit-indexing/src/actors/merge_executor.rs b/quickwit/quickwit-indexing/src/actors/merge_executor.rs index 5141227bee9..417c87e871f 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_executor.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_executor.rs @@ -42,6 +42,7 @@ use tantivy::index::SegmentId; use tantivy::tokenizer::TokenizerManager; use tantivy::{DateTime, Directory, Index, IndexMeta, IndexWriter, SegmentReader}; use tokio::runtime::Handle; +use tokio::sync::Semaphore; use tracing::{debug, error, info, instrument, warn}; use crate::actors::Packager; @@ -56,6 +57,9 @@ pub struct MergeExecutor { doc_mapper: Arc, io_controls: IoControls, merge_packager_mailbox: Mailbox, + /// Bounds concurrent Tantivy merges across pipelines on this node. + /// `None` in the legacy indexing-side merge pipeline. + merge_execution_semaphore: Option>, } #[async_trait] @@ -95,6 +99,16 @@ impl Handler for MergeExecutor { merge_scratch_directory, .. } = merge_scratch; + // On nodes running the split compaction architecture, merge pipelines are ephemeral, and we + // need to make sure there aren't too many CPU-bound operations occurring concurrently. + let _cpu_permit = match &self.merge_execution_semaphore { + Some(semaphore) => Some( + ctx.protect_future(semaphore.clone().acquire_owned()) + .await + .expect("merge execution semaphore should never be closed"), + ), + None => None, + }; let indexed_split_opt: Option = match merge_operation.operation_type { MergeOperationType::Merge => { let merge_res = self @@ -311,6 +325,7 @@ impl MergeExecutor { doc_mapper: Arc, io_controls: IoControls, merge_packager_mailbox: Mailbox, + merge_execution_semaphore: Option>, ) -> Self { MergeExecutor { pipeline_id, @@ -318,6 +333,7 @@ impl MergeExecutor { doc_mapper, io_controls, merge_packager_mailbox, + merge_execution_semaphore, } } @@ -669,6 +685,7 @@ mod tests { test_sandbox.doc_mapper(), IoControls::default(), merge_packager_mailbox, + None, ); let (merge_executor_mailbox, merge_executor_handle) = test_sandbox .universe() @@ -814,6 +831,7 @@ mod tests { test_sandbox.doc_mapper(), IoControls::default(), merge_packager_mailbox, + None, ); let (delete_task_executor_mailbox, delete_task_executor_handle) = universe.spawn_builder().spawn(delete_task_executor); diff --git a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs index f871fbabb5e..53cfd0da16d 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs @@ -318,6 +318,7 @@ impl MergePipeline { self.params.doc_mapper.clone(), merge_executor_io_controls, merge_packager_mailbox, + None, ); let (merge_executor_mailbox, merge_executor_handle) = ctx .spawn_actor() diff --git a/quickwit/quickwit-janitor/src/actors/delete_task_pipeline.rs b/quickwit/quickwit-janitor/src/actors/delete_task_pipeline.rs index 9506a587bd4..9daf747c40f 100644 --- a/quickwit/quickwit-janitor/src/actors/delete_task_pipeline.rs +++ b/quickwit/quickwit-janitor/src/actors/delete_task_pipeline.rs @@ -206,6 +206,7 @@ impl DeleteTaskPipeline { doc_mapper.clone(), delete_executor_io_controls, packager_mailbox, + None, ); let (delete_executor_mailbox, task_executor_supervisor_handler) = ctx.spawn_actor().supervise(delete_executor); diff --git a/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs b/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs index 6fd5ce244be..1bf24d0f088 100644 --- a/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs +++ b/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs @@ -1071,6 +1071,14 @@ fn split_query_predicate(split: &&Split, query: &ListSplitsQuery) -> bool { } } + if query + .excluded_split_ids + .iter() + .any(|excluded| excluded == &split.split_metadata.split_id) + { + return false; + } + true } diff --git a/quickwit/quickwit-metastore/src/metastore/mod.rs b/quickwit/quickwit-metastore/src/metastore/mod.rs index caaae7316a8..405dafb9ef2 100644 --- a/quickwit/quickwit-metastore/src/metastore/mod.rs +++ b/quickwit/quickwit-metastore/src/metastore/mod.rs @@ -872,6 +872,10 @@ pub struct ListSplitsQuery { /// Only return splits whose (index_uid, split_id) are lexicographically after this split pub after_split: Option<(IndexUid, SplitId)>, + + /// Exclude any split whose `split_id` appears in this list. Empty means no + /// exclusion. + pub excluded_split_ids: Vec, } #[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)] @@ -948,6 +952,7 @@ impl ListSplitsQuery { mature: Bound::Unbounded, sort_by: SortBy::None, after_split: None, + excluded_split_ids: Vec::new(), } } @@ -972,6 +977,7 @@ impl ListSplitsQuery { mature: Bound::Unbounded, sort_by: SortBy::None, after_split: None, + excluded_split_ids: Vec::new(), }) } @@ -992,6 +998,7 @@ impl ListSplitsQuery { mature: Bound::Unbounded, sort_by: SortBy::None, after_split: None, + excluded_split_ids: Vec::new(), } } @@ -1187,6 +1194,13 @@ impl ListSplitsQuery { self.after_split = Some((split_meta.index_uid.clone(), split_meta.split_id.clone())); self } + + /// Excludes splits whose `split_id` is in the provided list. Used by the + /// compaction planner to skip splits it is already tracking locally. + pub fn with_excluded_split_ids(mut self, excluded_split_ids: Vec) -> Self { + self.excluded_split_ids = excluded_split_ids; + self + } } #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] diff --git a/quickwit/quickwit-metastore/src/metastore/postgres/metastore.rs b/quickwit/quickwit-metastore/src/metastore/postgres/metastore.rs index d459ceb243b..397b84d3fbf 100644 --- a/quickwit/quickwit-metastore/src/metastore/postgres/metastore.rs +++ b/quickwit/quickwit-metastore/src/metastore/postgres/metastore.rs @@ -868,7 +868,7 @@ impl MetastoreService for PostgresqlMetastore { let list_splits_query = request.deserialize_list_splits_query()?; let mut sql_query_builder = Query::select(); sql_query_builder.column(Asterisk).from(Splits::Table); - append_query_filters_and_order_by(&mut sql_query_builder, &list_splits_query); + append_query_filters_and_order_by(&mut sql_query_builder, list_splits_query); let (sql_query, values) = sql_query_builder.build_sqlx(PostgresQueryBuilder); let pg_split_stream = SplitStream::new( @@ -2683,7 +2683,7 @@ mod tests { let index_uid = IndexUid::new_with_random_ulid("test-index"); let query = ListSplitsQuery::for_index(index_uid.clone()).with_split_state(SplitState::Staged); - append_query_filters_and_order_by(sql, &query); + append_query_filters_and_order_by(sql, query); assert_eq!( sql.to_string(PostgresQueryBuilder), @@ -2697,7 +2697,7 @@ mod tests { let query = ListSplitsQuery::for_index(index_uid.clone()).with_split_state(SplitState::Published); - append_query_filters_and_order_by(sql, &query); + append_query_filters_and_order_by(sql, query); assert_eq!( sql.to_string(PostgresQueryBuilder), @@ -2711,7 +2711,7 @@ mod tests { let query = ListSplitsQuery::for_index(index_uid.clone()) .with_split_states([SplitState::Published, SplitState::MarkedForDeletion]); - append_query_filters_and_order_by(sql, &query); + append_query_filters_and_order_by(sql, query); assert_eq!( sql.to_string(PostgresQueryBuilder), format!( @@ -2723,7 +2723,7 @@ mod tests { let sql = select_statement.column(Asterisk).from(Splits::Table); let query = ListSplitsQuery::for_index(index_uid.clone()).with_update_timestamp_lt(51); - append_query_filters_and_order_by(sql, &query); + append_query_filters_and_order_by(sql, query); assert_eq!( sql.to_string(PostgresQueryBuilder), format!( @@ -2735,7 +2735,7 @@ mod tests { let sql = select_statement.column(Asterisk).from(Splits::Table); let query = ListSplitsQuery::for_index(index_uid.clone()).with_create_timestamp_lte(55); - append_query_filters_and_order_by(sql, &query); + append_query_filters_and_order_by(sql, query); assert_eq!( sql.to_string(PostgresQueryBuilder), format!( @@ -2749,7 +2749,7 @@ mod tests { let maturity_evaluation_datetime = OffsetDateTime::from_unix_timestamp(55).unwrap(); let query = ListSplitsQuery::for_index(index_uid.clone()) .retain_mature(maturity_evaluation_datetime); - append_query_filters_and_order_by(sql, &query); + append_query_filters_and_order_by(sql, query); assert_eq!( sql.to_string(PostgresQueryBuilder), @@ -2763,7 +2763,7 @@ mod tests { let query = ListSplitsQuery::for_index(index_uid.clone()) .retain_immature(maturity_evaluation_datetime); - append_query_filters_and_order_by(sql, &query); + append_query_filters_and_order_by(sql, query); assert_eq!( sql.to_string(PostgresQueryBuilder), format!( @@ -2775,7 +2775,7 @@ mod tests { let sql = select_statement.column(Asterisk).from(Splits::Table); let query = ListSplitsQuery::for_index(index_uid.clone()).with_delete_opstamp_gte(4); - append_query_filters_and_order_by(sql, &query); + append_query_filters_and_order_by(sql, query); assert_eq!( sql.to_string(PostgresQueryBuilder), format!( @@ -2787,7 +2787,7 @@ mod tests { let sql = select_statement.column(Asterisk).from(Splits::Table); let query = ListSplitsQuery::for_index(index_uid.clone()).with_time_range_start_gt(45); - append_query_filters_and_order_by(sql, &query); + append_query_filters_and_order_by(sql, query); assert_eq!( sql.to_string(PostgresQueryBuilder), format!( @@ -2799,7 +2799,7 @@ mod tests { let sql = select_statement.column(Asterisk).from(Splits::Table); let query = ListSplitsQuery::for_index(index_uid.clone()).with_time_range_end_lt(45); - append_query_filters_and_order_by(sql, &query); + append_query_filters_and_order_by(sql, query); assert_eq!( sql.to_string(PostgresQueryBuilder), format!( @@ -2815,7 +2815,7 @@ mod tests { is_present: false, tag: "tag-2".to_string(), }); - append_query_filters_and_order_by(sql, &query); + append_query_filters_and_order_by(sql, query); assert_eq!( sql.to_string(PostgresQueryBuilder), @@ -2828,7 +2828,7 @@ mod tests { let sql = select_statement.column(Asterisk).from(Splits::Table); let query = ListSplitsQuery::for_index(index_uid.clone()).with_offset(4); - append_query_filters_and_order_by(sql, &query); + append_query_filters_and_order_by(sql, query); assert_eq!( sql.to_string(PostgresQueryBuilder), @@ -2841,7 +2841,7 @@ mod tests { let sql = select_statement.column(Asterisk).from(Splits::Table); let query = ListSplitsQuery::for_index(index_uid.clone()).sort_by_index_uid(); - append_query_filters_and_order_by(sql, &query); + append_query_filters_and_order_by(sql, query); assert_eq!( sql.to_string(PostgresQueryBuilder), @@ -2859,7 +2859,7 @@ mod tests { split_id: "my_split".to_string(), ..Default::default() }); - append_query_filters_and_order_by(sql, &query); + append_query_filters_and_order_by(sql, query); assert_eq!( sql.to_string(PostgresQueryBuilder), @@ -2872,7 +2872,7 @@ mod tests { let sql = select_statement.column(Asterisk).from(Splits::Table); let query = ListSplitsQuery::for_all_indexes().with_split_state(SplitState::Staged); - append_query_filters_and_order_by(sql, &query); + append_query_filters_and_order_by(sql, query); assert_eq!( sql.to_string(PostgresQueryBuilder), @@ -2883,7 +2883,7 @@ mod tests { let sql = select_statement.column(Asterisk).from(Splits::Table); let query = ListSplitsQuery::for_all_indexes().with_max_time_range_end(42); - append_query_filters_and_order_by(sql, &query); + append_query_filters_and_order_by(sql, query); assert_eq!( sql.to_string(PostgresQueryBuilder), @@ -2900,7 +2900,8 @@ mod tests { let query = ListSplitsQuery::for_index(index_uid.clone()) .with_time_range_start_gt(0) .with_time_range_end_lt(40); - append_query_filters_and_order_by(sql, &query); + append_query_filters_and_order_by(sql, query); + assert_eq!( sql.to_string(PostgresQueryBuilder), format!( @@ -2914,7 +2915,7 @@ mod tests { let query = ListSplitsQuery::for_index(index_uid.clone()) .with_time_range_start_gt(45) .with_delete_opstamp_gt(0); - append_query_filters_and_order_by(sql, &query); + append_query_filters_and_order_by(sql, query); assert_eq!( sql.to_string(PostgresQueryBuilder), format!( @@ -2928,7 +2929,7 @@ mod tests { let query = ListSplitsQuery::for_index(index_uid.clone()) .with_update_timestamp_lt(51) .with_create_timestamp_lte(63); - append_query_filters_and_order_by(sql, &query); + append_query_filters_and_order_by(sql, query); assert_eq!( sql.to_string(PostgresQueryBuilder), format!( @@ -2945,7 +2946,7 @@ mod tests { is_present: true, tag: "tag-1".to_string(), }); - append_query_filters_and_order_by(sql, &query); + append_query_filters_and_order_by(sql, query); assert_eq!( sql.to_string(PostgresQueryBuilder), format!( @@ -2960,7 +2961,7 @@ mod tests { let query = ListSplitsQuery::try_from_index_uids(vec![index_uid.clone(), index_uid_2.clone()]) .unwrap(); - append_query_filters_and_order_by(sql, &query); + append_query_filters_and_order_by(sql, query); assert_eq!( sql.to_string(PostgresQueryBuilder), format!( @@ -2969,6 +2970,20 @@ mod tests { ); } + #[test] + fn test_list_splits_query_excluded_split_ids() { + let mut select_statement = Query::select(); + let sql = select_statement.column(Asterisk).from(Splits::Table); + + let query = ListSplitsQuery::for_all_indexes() + .with_excluded_split_ids(vec!["s1".to_string(), "s2".to_string()]); + append_query_filters_and_order_by(sql, query); + assert_eq!( + sql.to_string(PostgresQueryBuilder), + r#"SELECT * FROM "splits" WHERE split_id <> ALL(ARRAY ['s1','s2']::text[])"# + ); + } + #[test] fn test_index_id_pattern_like_query() { assert_eq!( diff --git a/quickwit/quickwit-metastore/src/metastore/postgres/utils.rs b/quickwit/quickwit-metastore/src/metastore/postgres/utils.rs index 29c26e41fb2..d63755a06f6 100644 --- a/quickwit/quickwit-metastore/src/metastore/postgres/utils.rs +++ b/quickwit/quickwit-metastore/src/metastore/postgres/utils.rs @@ -19,7 +19,7 @@ use std::time::Duration; use quickwit_common::uri::Uri; use quickwit_proto::metastore::{MetastoreError, MetastoreResult}; -use sea_query::{Expr, Func, Order, SelectStatement, any}; +use sea_query::{ArrayType, Expr, Func, Order, SelectStatement, Value, any}; use sqlx::postgres::{PgConnectOptions, PgPoolOptions}; use sqlx::{ConnectOptions, Postgres}; use tracing::error; @@ -96,10 +96,7 @@ pub(super) fn append_range_filters( }; } -pub(super) fn append_query_filters_and_order_by( - sql: &mut SelectStatement, - query: &ListSplitsQuery, -) { +pub(super) fn append_query_filters_and_order_by(sql: &mut SelectStatement, query: ListSplitsQuery) { if let Some(index_uids) = &query.index_uids { // Note: `ListSplitsQuery` builder enforces a non empty `index_uids` list. // TODO we should explore IN VALUES, = ANY and similar constructs in case they perform @@ -202,6 +199,23 @@ pub(super) fn append_query_filters_and_order_by( ); } + if !query.excluded_split_ids.is_empty() { + // One bind regardless of list length: avoids postgres' 65535 param + // ceiling and keeps parse-time O(1) in the exclude size. The `$1` is + // postgres' placeholder; sea-query substitutes it with the next bind + // slot at build time. + let excluded_values: Vec = query + .excluded_split_ids + .into_iter() + .map(|split_id| Value::String(Some(Box::new(split_id)))) + .collect(); + let excluded_array = Value::Array(ArrayType::String, Some(Box::new(excluded_values))); + sql.cond_where(Expr::cust_with_values( + "split_id <> ALL($1::text[])", + [excluded_array], + )); + } + match query.sort_by { SortBy::Staleness => { sql.order_by(Splits::DeleteOpstamp, Order::Asc) diff --git a/quickwit/quickwit-storage/src/object_storage/policy.rs b/quickwit/quickwit-storage/src/object_storage/policy.rs index 797cd612029..3369f948e06 100644 --- a/quickwit/quickwit-storage/src/object_storage/policy.rs +++ b/quickwit/quickwit-storage/src/object_storage/policy.rs @@ -67,10 +67,10 @@ impl MultiPartPolicy { impl Default for MultiPartPolicy { fn default() -> Self { MultiPartPolicy { - /// We want to balance time spent waiting on uploads while still being careful about - /// not hammering S3 Puts, which can be expensive. - /// TODO: Dynamic multipart policy. - target_part_num_bytes: 512 * 1_024 * 1_024, // 5GB + // We want to balance time spent waiting on uploads while still being careful about + // not hammering S3 Puts, which can be expensive. + // TODO: Dynamic multipart policy. + target_part_num_bytes: 512 * 1_024 * 1_024, // 512mib multipart_threshold_num_bytes: 128 * 1_024 * 1_024, // 128 MiB max_num_parts: 10_000, max_object_num_bytes: 5_000_000_000_000u64, // S3 allows up to 5TB objects From efaba73633c5e907442189f51beafe5d6615e6cc Mon Sep 17 00:00:00 2001 From: nadav-govari Date: Wed, 20 May 2026 14:12:21 -0400 Subject: [PATCH 17/21] Revert to using existing merge flow when standalone compactors isnt explicitly enabled (#6453) * Revert to using existing merge flow when standalone compactors isnt explicitly enabled * Pull out some configs --- quickwit/quickwit-cli/src/main.rs | 27 +- quickwit/quickwit-cli/src/tool.rs | 137 +++++ .../src/compactor_supervisor.rs | 42 +- quickwit/quickwit-compaction/src/lib.rs | 6 +- .../resources/tests/node_config/quickwit.json | 4 +- .../resources/tests/node_config/quickwit.toml | 2 + .../resources/tests/node_config/quickwit.yaml | 2 + .../quickwit-config/src/node_config/mod.rs | 35 +- .../src/node_config/serialize.rs | 45 +- .../src/actors/indexing_pipeline.rs | 143 ++++- .../src/actors/indexing_service.rs | 550 +++++++++++++++++- quickwit/quickwit-indexing/src/lib.rs | 3 + .../src/models/indexing_service_message.rs | 10 +- quickwit/quickwit-indexing/src/models/mod.rs | 4 +- quickwit/quickwit-indexing/src/test_utils.rs | 2 + .../quickwit-janitor/src/janitor_service.rs | 12 +- quickwit/quickwit-janitor/src/lib.rs | 2 +- quickwit/quickwit-serve/src/lib.rs | 59 +- 18 files changed, 999 insertions(+), 86 deletions(-) diff --git a/quickwit/quickwit-cli/src/main.rs b/quickwit/quickwit-cli/src/main.rs index 0d7d5f7c91a..23691362f9c 100644 --- a/quickwit/quickwit-cli/src/main.rs +++ b/quickwit/quickwit-cli/src/main.rs @@ -151,7 +151,7 @@ mod tests { use quickwit_cli::split::{DescribeSplitArgs, SplitCliCommand}; use quickwit_cli::tool::{ ExtractSplitArgs, GarbageCollectIndexArgs, LocalIngestDocsArgs, LocalSearchArgs, - ToolCliCommand, + ToolCliCommand, MergeArgs }; use quickwit_common::uri::Uri; use quickwit_config::SourceInputFormat; @@ -740,6 +740,31 @@ mod tests { Ok(()) } + #[test] + fn test_parse_merge_args() -> anyhow::Result<()> { + let app = build_cli().no_binary_name(true); + let matches = app.try_get_matches_from([ + "tool", + "merge", + "--index", + "wikipedia", + "--source", + "ingest-source", + "--config", + "/config.yaml", + ])?; + let command = CliCommand::parse_cli_args(matches)?; + assert!(matches!( + command, + CliCommand::Tool(ToolCliCommand::Merge(MergeArgs { + index_id, + source_id, + .. + })) if &index_id == "wikipedia" && source_id == "ingest-source" + )); + Ok(()) + } + #[test] fn test_parse_no_color() { // SAFETY: this test may not be entirely sound if not run with nextest or --test-threads=1 diff --git a/quickwit/quickwit-cli/src/tool.rs b/quickwit/quickwit-cli/src/tool.rs index 557d61921e0..136be2812f5 100644 --- a/quickwit/quickwit-cli/src/tool.rs +++ b/quickwit/quickwit-cli/src/tool.rs @@ -12,6 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +use crate::info; +use quickwit_config::VecSourceParams; +use quickwit_actors::Mailbox; +use quickwit_actors::ActorExitStatus; +use quickwit_indexing::actors::MergePipeline; +use quickwit_indexing::actors::MergeSchedulerService; +use quickwit_indexing::models::DetachMergePipeline; +use quickwit_proto::types::SourceId; use std::collections::{HashSet, VecDeque}; use std::io::{IsTerminal, Stdout, Write, stdout}; use std::num::NonZeroUsize; @@ -200,6 +208,13 @@ pub struct GarbageCollectIndexArgs { pub dry_run: bool, } +#[derive(Debug, Eq, PartialEq)] +pub struct MergeArgs { + pub config_uri: Uri, + pub index_id: IndexId, + pub source_id: SourceId, +} + #[derive(Debug, Eq, PartialEq)] pub struct ExtractSplitArgs { pub config_uri: Uri, @@ -214,6 +229,7 @@ pub enum ToolCliCommand { LocalIngest(LocalIngestDocsArgs), LocalSearch(LocalSearchArgs), ExtractSplit(ExtractSplitArgs), + Merge(MergeArgs), } impl ToolCliCommand { @@ -225,6 +241,7 @@ impl ToolCliCommand { "gc" => Self::parse_garbage_collect_args(submatches), "local-ingest" => Self::parse_local_ingest_args(submatches), "local-search" => Self::parse_local_search_args(submatches), + "merge" => Self::parse_merge_args(submatches), "extract-split" => Self::parse_extract_split_args(submatches), _ => bail!("unknown tool subcommand `{subcommand}`"), } @@ -314,6 +331,24 @@ impl ToolCliCommand { })) } + fn parse_merge_args(mut matches: ArgMatches) -> anyhow::Result { + let config_uri = matches + .remove_one::("config") + .map(|uri_str| Uri::from_str(&uri_str)) + .expect("`config` should be a required arg.")?; + let index_id = matches + .remove_one::("index") + .expect("'index-id' should be a required arg."); + let source_id = matches + .remove_one::("source") + .expect("'source-id' should be a required arg."); + Ok(Self::Merge(MergeArgs { + index_id, + source_id, + config_uri, + })) + } + fn parse_garbage_collect_args(mut matches: ArgMatches) -> anyhow::Result { let config_uri = matches .get_one("config") @@ -363,6 +398,7 @@ impl ToolCliCommand { Self::GarbageCollect(args) => garbage_collect_index_cli(args).await, Self::LocalIngest(args) => local_ingest_docs_cli(args).await, Self::LocalSearch(args) => local_search_cli(args).await, + Self::Merge(args) => merge_cli(args).await, Self::ExtractSplit(args) => extract_split_cli(args).await, } } @@ -418,6 +454,7 @@ pub async fn local_ingest_docs_cli(args: LocalIngestDocsArgs) -> anyhow::Result< &HashSet::from_iter([QuickwitService::Indexer]), )?; let universe = Universe::new(); + let merge_scheduler_service_mailbox = universe.get_or_spawn_one(); let split_cache = Arc::new(IndexingSplitCache::from_config(&indexer_config, &config.data_dir_path).await?); let indexing_server = IndexingService::new( @@ -428,6 +465,7 @@ pub async fn local_ingest_docs_cli(args: LocalIngestDocsArgs) -> anyhow::Result< cluster, metastore, None, + Some(merge_scheduler_service_mailbox), IngesterPool::default(), storage_resolver, EventBroker::default(), @@ -443,6 +481,11 @@ pub async fn local_ingest_docs_cli(args: LocalIngestDocsArgs) -> anyhow::Result< pipeline_uid: PipelineUid::random(), }) .await?; + let merge_pipeline_handle = indexing_server_mailbox + .ask_for_res(DetachMergePipeline { + pipeline_id: pipeline_id.merge_pipeline_id(), + }) + .await?; let indexing_pipeline_handle = indexing_server_mailbox .ask_for_res(DetachIndexingPipeline { pipeline_id }) .await?; @@ -460,6 +503,13 @@ pub async fn local_ingest_docs_cli(args: LocalIngestDocsArgs) -> anyhow::Result< let statistics = start_statistics_reporting_loop(indexing_pipeline_handle, args.input_path_opt.is_none()) .await?; + + merge_pipeline_handle + .mailbox() + .ask(quickwit_indexing::FinishPendingMergesAndShutdownPipeline) + .await?; + merge_pipeline_handle.join().await; + // Shutdown the indexing server. universe .send_exit_with_success(&indexing_server_mailbox) @@ -527,6 +577,93 @@ pub async fn local_search_cli(args: LocalSearchArgs) -> anyhow::Result<()> { Ok(()) } +pub async fn merge_cli(args: MergeArgs) -> anyhow::Result<()> { + debug!(args=?args, "run-merge-operations"); + println!("❯ Merging splits locally..."); + let config = load_node_config(&args.config_uri).await?; + let (storage_resolver, metastore_resolver) = + get_resolvers(&config.storage_configs, &config.metastore_configs); + let mut metastore = metastore_resolver.resolve(&config.metastore_uri).await?; + run_index_checklist(&mut metastore, &storage_resolver, &args.index_id, None).await?; + // The indexing service needs to update its cluster chitchat state so that the control plane is + // aware of the running tasks. We thus create a fake cluster to instantiate the indexing service + // and avoid impacting potential control plane running on the cluster. + let cluster = create_empty_cluster(&config).await?; + let runtimes_config = RuntimesConfig::default(); + start_actor_runtimes( + runtimes_config, + &HashSet::from_iter([QuickwitService::Indexer]), + )?; + let indexer_config = IndexerConfig::default(); + let universe = Universe::new(); + let merge_scheduler_service: Mailbox = universe.get_or_spawn_one(); + let indexing_server = IndexingService::new( + config.node_id, + config.data_dir_path, + indexer_config, + runtimes_config.num_threads_blocking, + cluster, + metastore, + None, + Some(merge_scheduler_service), + IngesterPool::default(), + storage_resolver, + EventBroker::default(), + Arc::new(IndexingSplitCache::no_caching()), + ) + .await?; + let (indexing_service_mailbox, indexing_service_handle) = + universe.spawn_builder().spawn(indexing_server); + let pipeline_id = indexing_service_mailbox + .ask_for_res(SpawnPipeline { + index_id: args.index_id, + source_config: SourceConfig { + source_id: args.source_id, + num_pipelines: NonZeroUsize::MIN, + enabled: true, + source_params: SourceParams::Vec(VecSourceParams::default()), + transform_config: None, + input_format: SourceInputFormat::Json, + }, + pipeline_uid: PipelineUid::random(), + }) + .await?; + let pipeline_handle: ActorHandle = indexing_service_mailbox + .ask_for_res(DetachMergePipeline { + pipeline_id: pipeline_id.merge_pipeline_id(), + }) + .await?; + + let mut check_interval = tokio::time::interval(Duration::from_secs(1)); + loop { + check_interval.tick().await; + + pipeline_handle.refresh_observe(); + let observation = pipeline_handle.last_observation(); + + if observation.num_ongoing_merges == 0 { + info!("merge pipeline has no more ongoing merges, exiting"); + break; + } + + if pipeline_handle.state().is_exit() { + info!("merge pipeline has exited, exiting"); + break; + } + } + + let (pipeline_exit_status, _pipeline_statistics) = pipeline_handle.quit().await; + indexing_service_handle.quit().await; + if !matches!( + pipeline_exit_status, + ActorExitStatus::Success | ActorExitStatus::Quit + ) { + bail!(pipeline_exit_status); + } + println!("{} Merge successful.", "✔".color(GREEN_COLOR)); + Ok(()) +} + pub async fn garbage_collect_index_cli(args: GarbageCollectIndexArgs) -> anyhow::Result<()> { debug!(args=?args, "garbage-collect-index"); println!("❯ Garbage collecting index..."); diff --git a/quickwit/quickwit-compaction/src/compactor_supervisor.rs b/quickwit/quickwit-compaction/src/compactor_supervisor.rs index af1f3bab115..f83d596758a 100644 --- a/quickwit/quickwit-compaction/src/compactor_supervisor.rs +++ b/quickwit/quickwit-compaction/src/compactor_supervisor.rs @@ -18,11 +18,13 @@ use std::time::Duration; use async_trait::async_trait; use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler, SpawnContext}; -use quickwit_common::io::Limiter; +use quickwit_common::io::{self, Limiter}; use quickwit_common::pubsub::EventBroker; use quickwit_common::temp_dir::TempDirectory; use quickwit_common::uri::Uri; -use quickwit_config::{IndexingSettings, RetentionPolicy, SearchSettings, build_doc_mapper}; +use quickwit_config::{ + CompactorConfig, IndexingSettings, RetentionPolicy, SearchSettings, build_doc_mapper, +}; use quickwit_doc_mapper::DocMapping; use quickwit_indexing::merge_policy::{MergeOperation, merge_policy_from_settings}; use quickwit_indexing::{IndexingSplitCache, IndexingSplitStore}; @@ -75,18 +77,25 @@ impl CompactorSupervisor { pub fn new( node_id: NodeId, planner_client: CompactionPlannerServiceClient, - max_concurrent_merge_executions: usize, - io_throughput_limiter: Option, + compactor_config: &CompactorConfig, metastore: MetastoreServiceClient, storage_resolver: StorageResolver, split_cache: Arc, - max_concurrent_split_uploads: usize, event_broker: EventBroker, compaction_root_directory: TempDirectory, ) -> Self { - let num_pipeline_slots = max_concurrent_merge_executions * 2; + let &CompactorConfig { + max_concurrent_merge_executions, + pipeline_slots_per_merge_execution, + max_concurrent_split_uploads, + max_merge_write_throughput, + } = compactor_config; + let num_pipeline_slots = + max_concurrent_merge_executions.get() * pipeline_slots_per_merge_execution.get(); let pipelines = (0..num_pipeline_slots).map(|_| None).collect(); - let merge_execution_semaphore = Arc::new(Semaphore::new(max_concurrent_merge_executions)); + let merge_execution_semaphore = + Arc::new(Semaphore::new(max_concurrent_merge_executions.get())); + let io_throughput_limiter = max_merge_write_throughput.map(io::limiter); CompactorSupervisor { node_id, planner_client, @@ -325,6 +334,8 @@ impl Handler for CompactorSupervisor { #[cfg(test)] mod tests { + use std::num::NonZeroUsize; + use quickwit_actors::Universe; use quickwit_common::temp_dir::TempDirectory; use quickwit_proto::compaction::{ @@ -343,15 +354,18 @@ mod tests { let metastore = MetastoreServiceClient::from_mock(MockMetastoreService::new()); let compaction_client = CompactionPlannerServiceClient::from_mock(MockCompactionPlannerService::new()); + let compactor_config = CompactorConfig { + max_concurrent_merge_executions: NonZeroUsize::new(max_concurrent_merge_executions) + .expect("max_concurrent_merge_executions must be non-zero"), + ..CompactorConfig::for_test() + }; CompactorSupervisor::new( NodeId::from("test-node"), compaction_client, - max_concurrent_merge_executions, - None, + &compactor_config, metastore, StorageResolver::for_test(), Arc::new(IndexingSplitCache::no_caching()), - 2, EventBroker::default(), TempDirectory::for_test(), ) @@ -557,15 +571,17 @@ mod tests { let metastore = MetastoreServiceClient::from_mock(MockMetastoreService::new()); let client = CompactionPlannerServiceClient::from_mock(mock); // 3 merge-executions → 6 pipeline slots + let compactor_config = CompactorConfig { + max_concurrent_merge_executions: NonZeroUsize::new(3).unwrap(), + ..CompactorConfig::for_test() + }; let mut supervisor = CompactorSupervisor::new( NodeId::from("test-node"), client, - 3, - None, + &compactor_config, metastore, StorageResolver::for_test(), Arc::new(IndexingSplitCache::no_caching()), - 2, EventBroker::default(), TempDirectory::for_test(), ); diff --git a/quickwit/quickwit-compaction/src/lib.rs b/quickwit/quickwit-compaction/src/lib.rs index 21997b3ec5e..ada03eb1294 100644 --- a/quickwit/quickwit-compaction/src/lib.rs +++ b/quickwit/quickwit-compaction/src/lib.rs @@ -25,7 +25,6 @@ use std::sync::Arc; pub use compactor_supervisor::CompactorSupervisor; use quickwit_actors::{Mailbox, Universe}; -use quickwit_common::io; use quickwit_common::pubsub::EventBroker; use quickwit_common::temp_dir::TempDirectory; use quickwit_config::CompactorConfig; @@ -53,16 +52,13 @@ pub async fn start_compactor_service( compaction_root_directory: TempDirectory, ) -> anyhow::Result> { info!("starting compactor service"); - let io_throughput_limiter = compactor_config.max_merge_write_throughput.map(io::limiter); let supervisor = CompactorSupervisor::new( node_id, compaction_client, - compactor_config.max_concurrent_merge_executions.get(), - io_throughput_limiter, + compactor_config, metastore, storage_resolver, split_cache, - compactor_config.max_concurrent_split_uploads, event_broker, compaction_root_directory, ); diff --git a/quickwit/quickwit-config/resources/tests/node_config/quickwit.json b/quickwit/quickwit-config/resources/tests/node_config/quickwit.json index 996d682b6e8..7269b37ae22 100644 --- a/quickwit/quickwit-config/resources/tests/node_config/quickwit.json +++ b/quickwit/quickwit-config/resources/tests/node_config/quickwit.json @@ -52,7 +52,9 @@ "enable_otlp_endpoint": true, "split_store_max_num_bytes": "1T", "split_store_max_num_splits": 10000, - "max_concurrent_split_uploads": 8 + "max_concurrent_split_uploads": 8, + "max_merge_write_throughput": "100mb", + "merge_concurrency": 2 }, "ingest_api": { "replication_factor": 2 diff --git a/quickwit/quickwit-config/resources/tests/node_config/quickwit.toml b/quickwit/quickwit-config/resources/tests/node_config/quickwit.toml index 6a63ff31883..ea715dcffe0 100644 --- a/quickwit/quickwit-config/resources/tests/node_config/quickwit.toml +++ b/quickwit/quickwit-config/resources/tests/node_config/quickwit.toml @@ -43,6 +43,8 @@ enable_otlp_endpoint = true split_store_max_num_bytes = "1T" split_store_max_num_splits = 10_000 max_concurrent_split_uploads = 8 +max_merge_write_throughput = "100mb" +merge_concurrency = 2 [ingest_api] replication_factor = 2 diff --git a/quickwit/quickwit-config/resources/tests/node_config/quickwit.yaml b/quickwit/quickwit-config/resources/tests/node_config/quickwit.yaml index 49e418ed924..face0852972 100644 --- a/quickwit/quickwit-config/resources/tests/node_config/quickwit.yaml +++ b/quickwit/quickwit-config/resources/tests/node_config/quickwit.yaml @@ -47,6 +47,8 @@ indexer: split_store_max_num_bytes: 1T split_store_max_num_splits: 10000 max_concurrent_split_uploads: 8 + max_merge_write_throughput: 100mb + merge_concurrency: 2 ingest_api: replication_factor: 2 diff --git a/quickwit/quickwit-config/src/node_config/mod.rs b/quickwit/quickwit-config/src/node_config/mod.rs index cfc7387a5d7..0d1658f1a4c 100644 --- a/quickwit/quickwit-config/src/node_config/mod.rs +++ b/quickwit/quickwit-config/src/node_config/mod.rs @@ -146,6 +146,15 @@ pub struct IndexerConfig { pub split_store_max_num_splits: usize, #[serde(default = "IndexerConfig::default_max_concurrent_split_uploads")] pub max_concurrent_split_uploads: usize, + /// Limits the IO throughput of the `SplitDownloader` and the `MergeExecutor`. + /// On hardware where IO is constrained, it makes sure that Merges (a batch operation) + /// does not starve indexing itself (as it is a latency sensitive operation). + #[serde(default)] + pub max_merge_write_throughput: Option, + /// Maximum number of merge or delete operation that can be executed concurrently. + /// (defaults to num_cpu / 2). + #[serde(default = "IndexerConfig::default_merge_concurrency")] + pub merge_concurrency: NonZeroUsize, /// Enables the OpenTelemetry exporter endpoint to ingest logs and traces via the OpenTelemetry /// Protocol (OTLP). #[serde(default = "IndexerConfig::default_enable_otlp_endpoint")] @@ -166,10 +175,6 @@ impl IndexerConfig { false } - fn default_enable_standalone_compactors() -> bool { - false - } - fn default_enable_otlp_endpoint() -> bool { #[cfg(any(test, feature = "testsuite"))] { @@ -193,10 +198,18 @@ impl IndexerConfig { 1_000 } + pub fn default_merge_concurrency() -> NonZeroUsize { + NonZeroUsize::new(quickwit_common::num_cpus() * 2 / 3).unwrap_or(NonZeroUsize::MIN) + } + fn default_cpu_capacity() -> CpuCapacity { CpuCapacity::one_cpu_thread() * (quickwit_common::num_cpus() as u32) } + fn default_enable_standalone_compactors() -> bool { + false + } + #[cfg(any(test, feature = "testsuite"))] pub fn for_test() -> anyhow::Result { use quickwit_proto::indexing::PIPELINE_FULL_CAPACITY; @@ -207,6 +220,8 @@ impl IndexerConfig { split_store_max_num_splits: 3, max_concurrent_split_uploads: 4, cpu_capacity: PIPELINE_FULL_CAPACITY * 4u32, + max_merge_write_throughput: None, + merge_concurrency: NonZeroUsize::new(3).unwrap(), enable_standalone_compactors: false, }; Ok(indexer_config) @@ -222,6 +237,8 @@ impl Default for IndexerConfig { split_store_max_num_splits: Self::default_split_store_max_num_splits(), max_concurrent_split_uploads: Self::default_max_concurrent_split_uploads(), cpu_capacity: Self::default_cpu_capacity(), + merge_concurrency: Self::default_merge_concurrency(), + max_merge_write_throughput: None, enable_standalone_compactors: Self::default_enable_standalone_compactors(), } } @@ -234,6 +251,10 @@ pub struct CompactorConfig { /// a long time. Defaults to `num_cpus - 1`. #[serde(default = "CompactorConfig::default_max_concurrent_merge_executions")] pub max_concurrent_merge_executions: NonZeroUsize, + /// Number of pipelines to run per merge executions. Scalar. Since merges perform a lot + /// of IO, multiple concurrent merges can be interleaved. + #[serde(default = "CompactorConfig::default_pipeline_slots_per_merge_execution")] + pub pipeline_slots_per_merge_execution: NonZeroUsize, /// Maximum number of concurrent split uploads across all pipelines. #[serde(default = "CompactorConfig::default_max_concurrent_split_uploads")] pub max_concurrent_split_uploads: usize, @@ -248,6 +269,10 @@ impl CompactorConfig { NonZeroUsize::new(cpus).unwrap_or(NonZeroUsize::MIN) } + fn default_pipeline_slots_per_merge_execution() -> NonZeroUsize { + NonZeroUsize::new(2).unwrap() + } + fn default_max_concurrent_split_uploads() -> usize { 12 } @@ -256,6 +281,7 @@ impl CompactorConfig { pub fn for_test() -> Self { CompactorConfig { max_concurrent_merge_executions: NonZeroUsize::new(2).unwrap(), + pipeline_slots_per_merge_execution: Self::default_pipeline_slots_per_merge_execution(), max_concurrent_split_uploads: 4, max_merge_write_throughput: None, } @@ -266,6 +292,7 @@ impl Default for CompactorConfig { fn default() -> Self { Self { max_concurrent_merge_executions: Self::default_max_concurrent_merge_executions(), + pipeline_slots_per_merge_execution: Self::default_pipeline_slots_per_merge_execution(), max_concurrent_split_uploads: Self::default_max_concurrent_split_uploads(), max_merge_write_throughput: None, } diff --git a/quickwit/quickwit-config/src/node_config/serialize.rs b/quickwit/quickwit-config/src/node_config/serialize.rs index 1f9dc30d6d3..9ce1c357f3f 100644 --- a/quickwit/quickwit-config/src/node_config/serialize.rs +++ b/quickwit/quickwit-config/src/node_config/serialize.rs @@ -87,9 +87,12 @@ impl FromStr for List { } fn default_enabled_services() -> ConfigValue { + // The compactor is excluded by default — it only runs in standalone mode, + // which an operator must explicitly opt into via `enable_standalone_compactors`. ConfigValue::with_default(List( QuickwitService::supported_services() .into_iter() + .filter(|service| *service != QuickwitService::Compactor) .map(|service| service.to_string()) .collect(), )) @@ -234,7 +237,7 @@ impl NodeConfigBuilder { self.indexer_config.enable_standalone_compactors = self.enable_standalone_compactors.resolve(env_vars)?; - let mut enabled_services: HashSet = self + let enabled_services: HashSet = self .enabled_services .resolve(env_vars)? .0 @@ -242,14 +245,6 @@ impl NodeConfigBuilder { .map(|service| service.parse()) .collect::>()?; - // Indexers implicitly run the compactor unless standalone compactors - // are enabled. - if enabled_services.contains(&QuickwitService::Indexer) - && !self.indexer_config.enable_standalone_compactors - { - enabled_services.insert(QuickwitService::Compactor); - } - let listen_address = self.listen_address.resolve(env_vars)?; let listen_host = listen_address.parse::()?; let listen_ip = listen_host.resolve().await?; @@ -365,6 +360,15 @@ fn validate(node_config: &NodeConfig) -> anyhow::Result<()> { if node_config.peer_seeds.is_empty() { warn!("peer seeds are empty"); } + if node_config.is_service_enabled(QuickwitService::Compactor) + && !node_config.indexer_config.enable_standalone_compactors + { + bail!( + "the `compactor` service can only be enabled when `enable_standalone_compactors` is \ + true (or `QW_ENABLE_STANDALONE_COMPACTORS=true`). With the default \ + indexer-local merge pipeline, the compactor service must not be enabled." + ); + } validate_disk_usage(node_config); Ok(()) } @@ -673,8 +677,10 @@ mod tests { split_store_max_num_bytes: ByteSize::tb(1), split_store_max_num_splits: 10_000, max_concurrent_split_uploads: 8, + merge_concurrency: NonZeroUsize::new(2).unwrap(), cpu_capacity: IndexerConfig::default_cpu_capacity(), enable_cooperative_indexing: false, + max_merge_write_throughput: Some(ByteSize::mb(100)), enable_standalone_compactors: false, } ); @@ -786,6 +792,9 @@ mod tests { assert_eq!( config.enabled_services, QuickwitService::supported_services() + .into_iter() + .filter(|service| *service != QuickwitService::Compactor) + .collect::>(), ); assert_eq!( config.rest_config.listen_addr, @@ -1378,24 +1387,6 @@ mod tests { assert!(error_message.contains("replication factor")); } - #[tokio::test] - async fn test_indexer_implicitly_enables_compactor() { - let config_yaml = r#" - version: 0.8 - enabled_services: [indexer] - "#; - let config = - load_node_config_with_env(ConfigFormat::Yaml, config_yaml.as_bytes(), &HashMap::new()) - .await - .unwrap(); - assert!(config.enabled_services.contains(&QuickwitService::Indexer)); - assert!( - config - .enabled_services - .contains(&QuickwitService::Compactor) - ); - } - #[tokio::test] async fn test_standalone_compactors_prevents_implicit_compactor() { let config_yaml = r#" diff --git a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs index 0acd20706cb..13560cb7e49 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs @@ -27,7 +27,7 @@ use quickwit_common::metrics::OwnedGaugeGuard; use quickwit_common::pubsub::EventBroker; use quickwit_common::temp_dir::TempDirectory; use quickwit_common::{KillSwitch, is_metrics_index}; -use quickwit_config::{IndexingSettings, SourceConfig}; +use quickwit_config::{IndexingSettings, SourceConfig, RetentionPolicy}; use quickwit_doc_mapper::DocMapper; use quickwit_ingest::IngesterPool; use quickwit_proto::indexing::IndexingPipelineId; @@ -37,6 +37,7 @@ use quickwit_storage::{Storage, StorageResolver}; use tokio::sync::Semaphore; use tracing::{debug, error, info, instrument}; +use super::MergePlanner; use crate::SplitsUpdateMailbox; use crate::actors::doc_processor::DocProcessor; use crate::actors::index_serializer::IndexSerializer; @@ -430,7 +431,7 @@ impl IndexingPipeline { let publisher = Publisher::new( PublisherType::MainPublisher, self.params.metastore.clone(), - None, + self.params.merge_planner_mailbox_opt.clone(), Some(source_mailbox.clone()), ); let (publisher_mailbox, publisher_handle) = ctx @@ -459,10 +460,10 @@ impl IndexingPipeline { UploaderType::IndexUploader, self.params.metastore.clone(), self.params.merge_policy.clone(), - None, + self.params.retention_policy.clone(), self.params.split_store.clone(), SplitsUpdateMailbox::Sequencer(sequencer_mailbox), - self.params.max_concurrent_split_uploads, + self.params.max_concurrent_split_uploads_index, self.params.event_broker.clone(), ); let (uploader_mailbox, uploader_handle) = ctx @@ -628,7 +629,7 @@ impl IndexingPipeline { self.params.metastore.clone(), self.params.storage.clone(), SplitsUpdateMailbox::Sequencer(parquet_sequencer_mailbox), - self.params.max_concurrent_split_uploads, + self.params.max_concurrent_split_uploads_index, ); let (parquet_uploader_mailbox, parquet_uploader_handle) = ctx .spawn_actor() @@ -847,11 +848,14 @@ pub struct IndexingPipelineParams { pub indexing_directory: TempDirectory, pub indexing_settings: IndexingSettings, pub split_store: IndexingSplitStore, - pub max_concurrent_split_uploads: usize, + pub max_concurrent_split_uploads_index: usize, pub cooperative_indexing_permits: Option>, // Merge-related parameters pub merge_policy: Arc, + pub retention_policy: Option, + pub merge_planner_mailbox_opt: Option>, + pub max_concurrent_split_uploads_merge: usize, // Source-related parameters pub source_config: SourceConfig, @@ -869,7 +873,8 @@ mod tests { use std::path::PathBuf; use std::sync::Arc; - use quickwit_actors::Universe; + use quickwit_actors::{Command, Universe}; + use quickwit_common::ServiceStream; use quickwit_config::{IndexingSettings, SourceInputFormat, SourceParams}; use quickwit_doc_mapper::{DocMapper, default_doc_mapper_for_test}; use quickwit_metastore::checkpoint::IndexCheckpointDelta; @@ -881,6 +886,7 @@ mod tests { use quickwit_proto::types::{IndexUid, NodeId, PipelineUid}; use quickwit_storage::RamStorage; + use crate::actors::merge_pipeline::{MergePipeline, MergePipelineParams}; use super::{IndexingPipeline, *}; use crate::merge_policy::default_merge_policy; @@ -965,6 +971,7 @@ mod tests { .returning(|_| Ok(EmptyResponse {})); let universe = Universe::new(); + let (merge_planner_mailbox, _) = universe.create_test_mailbox(); let storage = Arc::new(RamStorage::default()); let split_store = IndexingSplitStore::create_without_local_store_for_test(storage.clone()); let pipeline_params = IndexingPipelineParams { @@ -979,9 +986,12 @@ mod tests { storage, split_store, merge_policy: default_merge_policy(), + retention_policy: None, queues_dir_path: PathBuf::from("./queues"), - max_concurrent_split_uploads: 4, + max_concurrent_split_uploads_index: 4, + max_concurrent_split_uploads_merge: 5, cooperative_indexing_permits: None, + merge_planner_mailbox_opt: Some(merge_planner_mailbox), event_broker: EventBroker::default(), params_fingerprint: 42u64, }; @@ -1086,6 +1096,7 @@ mod tests { let universe = Universe::new(); let storage = Arc::new(RamStorage::default()); + let (merge_planner_mailbox, _) = universe.create_test_mailbox(); let split_store = IndexingSplitStore::create_without_local_store_for_test(storage.clone()); let pipeline_params = IndexingPipelineParams { pipeline_id, @@ -1100,8 +1111,11 @@ mod tests { storage, split_store, merge_policy: default_merge_policy(), - max_concurrent_split_uploads: 4, + retention_policy: None, + max_concurrent_split_uploads_index: 4, + max_concurrent_split_uploads_merge: 5, cooperative_indexing_permits: None, + merge_planner_mailbox_opt: Some(merge_planner_mailbox), event_broker: Default::default(), params_fingerprint: 42u64, }; @@ -1125,6 +1139,111 @@ mod tests { async fn test_indexing_pipeline_simple_gz() -> anyhow::Result<()> { indexing_pipeline_simple("data/test_corpus.json.gz").await } + #[tokio::test] + async fn test_merge_pipeline_does_not_stop_on_indexing_pipeline_failure() { + let node_id = NodeId::from("test-node"); + let pipeline_id = IndexingPipelineId { + node_id, + index_uid: IndexUid::new_with_random_ulid("test-index"), + source_id: "test-source".to_string(), + pipeline_uid: PipelineUid::for_test(0u128), + }; + let source_config = SourceConfig { + source_id: "test-source".to_string(), + num_pipelines: NonZeroUsize::MIN, + enabled: true, + source_params: SourceParams::void(), + transform_config: None, + input_format: SourceInputFormat::Json, + }; + let source_config_clone = source_config.clone(); + + let mut mock_metastore = MockMetastoreService::new(); + mock_metastore + .expect_index_metadata() + .withf(|index_metadata_request| { + index_metadata_request.index_uid.as_ref().unwrap() == &("test-index", 2) + }) + .returning(move |_| { + let mut index_metadata = + IndexMetadata::for_test("test-index", "ram:///indexes/test-index"); + index_metadata + .add_source(source_config_clone.clone()) + .unwrap(); + Ok(IndexMetadataResponse::try_from_index_metadata(&index_metadata).unwrap()) + }); + mock_metastore + .expect_list_splits() + .returning(|_| Ok(ServiceStream::empty())); + let metastore = MetastoreServiceClient::from_mock(mock_metastore); + + let universe = Universe::with_accelerated_time(); + let doc_mapper = Arc::new(default_doc_mapper_for_test()); + let storage = Arc::new(RamStorage::default()); + let split_store = IndexingSplitStore::create_without_local_store_for_test(storage.clone()); + let merge_pipeline_params = MergePipelineParams { + pipeline_id: pipeline_id.merge_pipeline_id(), + doc_mapper: doc_mapper.clone(), + indexing_directory: TempDirectory::for_test(), + metastore: metastore.clone(), + split_store: split_store.clone(), + merge_policy: default_merge_policy(), + retention_policy: None, + max_concurrent_split_uploads: 2, + merge_io_throughput_limiter_opt: None, + merge_scheduler_service: universe.get_or_spawn_one(), + event_broker: Default::default(), + }; + let merge_pipeline = MergePipeline::new(merge_pipeline_params, None, universe.spawn_ctx()); + let merge_planner_mailbox = merge_pipeline.merge_planner_mailbox().clone(); + let (_merge_pipeline_mailbox, merge_pipeline_handler) = + universe.spawn_builder().spawn(merge_pipeline); + let indexing_pipeline_params = IndexingPipelineParams { + pipeline_id, + doc_mapper, + source_config, + source_storage_resolver: StorageResolver::for_test(), + indexing_directory: TempDirectory::for_test(), + indexing_settings: IndexingSettings::for_test(), + ingester_pool: IngesterPool::default(), + metastore, + queues_dir_path: PathBuf::from("./queues"), + storage, + split_store, + merge_policy: default_merge_policy(), + retention_policy: None, + max_concurrent_split_uploads_index: 4, + max_concurrent_split_uploads_merge: 5, + cooperative_indexing_permits: None, + merge_planner_mailbox_opt: Some(merge_planner_mailbox.clone()), + event_broker: Default::default(), + params_fingerprint: 42u64, + }; + let indexing_pipeline = IndexingPipeline::new(indexing_pipeline_params); + let (_indexing_pipeline_mailbox, indexing_pipeline_handler) = + universe.spawn_builder().spawn(indexing_pipeline); + let obs = indexing_pipeline_handler + .process_pending_and_observe() + .await; + assert_eq!(obs.generation, 1); + // Let's shutdown the indexer, this will trigger the indexing pipeline failure and the + // restart. + let indexer = universe.get::().into_iter().next().unwrap(); + let _ = indexer.ask(Command::Quit).await; + for _ in 0..10 { + universe.sleep(*quickwit_actors::HEARTBEAT).await; + // Check indexing pipeline has restarted. + let obs = indexing_pipeline_handler + .process_pending_and_observe() + .await; + if obs.generation == 2 { + assert_eq!(merge_pipeline_handler.check_health(true), Health::Healthy); + universe.quit().await; + return; + } + } + panic!("Pipeline was apparently not restarted."); + } async fn indexing_pipeline_all_failures_handling(test_file: &str) -> anyhow::Result<()> { let node_id = NodeId::from("test-node"); @@ -1188,6 +1307,7 @@ mod tests { let universe = Universe::new(); let storage = Arc::new(RamStorage::default()); let split_store = IndexingSplitStore::create_without_local_store_for_test(storage.clone()); + let (merge_planner_mailbox, _) = universe.create_test_mailbox(); // Create a minimal mapper with wrong date format to ensure that all documents will fail let broken_mapper = serde_json::from_str::( r#" @@ -1219,8 +1339,11 @@ mod tests { storage, split_store, merge_policy: default_merge_policy(), - max_concurrent_split_uploads: 4, + retention_policy: None, + max_concurrent_split_uploads_index: 4, + max_concurrent_split_uploads_merge: 5, cooperative_indexing_permits: None, + merge_planner_mailbox_opt: Some(merge_planner_mailbox), params_fingerprint: 42u64, event_broker: Default::default(), }; diff --git a/quickwit/quickwit-indexing/src/actors/indexing_service.rs b/quickwit/quickwit-indexing/src/actors/indexing_service.rs index 7ae957937aa..52bd36f1e9a 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_service.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_service.rs @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +use crate::models::DetachMergePipeline; +use quickwit_common::io; +use futures::TryStreamExt; use std::collections::{HashMap, HashSet}; use std::fmt::{Debug, Formatter}; use std::path::PathBuf; @@ -37,15 +40,15 @@ use quickwit_ingest::{ }; use quickwit_metastore::{ IndexMetadata, IndexMetadataResponseExt, IndexesMetadataResponseExt, - ListIndexesMetadataResponseExt, + ListIndexesMetadataResponseExt, ListSplitsQuery, ListSplitsRequestExt, ListSplitsResponseExt, SplitMetadata, SplitState, }; use quickwit_proto::indexing::{ ApplyIndexingPlanRequest, ApplyIndexingPlanResponse, IndexingError, IndexingPipelineId, - IndexingTask, PipelineMetrics, + IndexingTask, MergePipelineId, PipelineMetrics, }; use quickwit_proto::metastore::{ IndexMetadataRequest, IndexMetadataSubrequest, IndexesMetadataRequest, - ListIndexesMetadataRequest, MetastoreService, MetastoreServiceClient, + ListIndexesMetadataRequest, MetastoreService, MetastoreServiceClient, ListSplitsRequest, MetastoreResult, }; use quickwit_proto::types::{IndexId, IndexUid, NodeId, PipelineUid, ShardId}; use quickwit_storage::StorageResolver; @@ -55,8 +58,12 @@ use tracing::{debug, error, info, warn}; use crate::models::{DetachIndexingPipeline, ObservePipeline, SpawnPipeline}; use crate::source::{AssignShards, Assignment}; +use time::OffsetDateTime; use crate::split_store::IndexingSplitCache; use crate::{IndexingPipeline, IndexingPipelineParams, IndexingSplitStore, IndexingStatistics}; +use super::merge_pipeline::{MergePipeline, MergePipelineParams}; +use super::{MergePlanner, MergeSchedulerService}; +use crate::actors::merge_pipeline::FinishPendingMergesAndShutdownPipeline; /// Name of the indexing directory, usually located at `/indexing`. pub const INDEXING_DIR_NAME: &str = "indexing"; @@ -66,10 +73,16 @@ pub struct IndexingServiceCounters { pub num_running_pipelines: usize, pub num_successful_pipelines: usize, pub num_failed_pipelines: usize, + pub num_running_merge_pipelines: usize, pub num_deleted_queues: usize, pub num_delete_queue_failures: usize, } +struct MergePipelineHandle { + mailbox: Mailbox, + handle: ActorHandle, +} + struct PipelineHandle { mailbox: Mailbox, handle: ActorHandle, @@ -93,9 +106,12 @@ pub struct IndexingService { storage_resolver: StorageResolver, indexing_pipelines: HashMap, counters: IndexingServiceCounters, + merge_scheduler_service_opt: Option>, split_cache: Arc, max_concurrent_split_uploads: usize, + merge_pipeline_handles: HashMap, cooperative_indexing_permits: Option>, + merge_io_throughput_limiter_opt: Option, event_broker: EventBroker, } @@ -120,11 +136,14 @@ impl IndexingService { cluster: Cluster, metastore: MetastoreServiceClient, ingest_api_service_opt: Option>, + merge_scheduler_service_opt: Option>, ingester_pool: IngesterPool, storage_resolver: StorageResolver, event_broker: EventBroker, split_cache: Arc, ) -> anyhow::Result { + let merge_io_throughput_limiter_opt = + indexer_config.max_merge_write_throughput.map(io::limiter); let indexing_root_directory = temp_dir::create_or_purge_directory(&data_dir_path.join(INDEXING_DIR_NAME)).await?; let queue_dir_path = data_dir_path.join(QUEUES_DIR_NAME); @@ -140,12 +159,15 @@ impl IndexingService { cluster, metastore, ingest_api_service_opt, + merge_scheduler_service_opt, ingester_pool, storage_resolver, split_cache, indexing_pipelines: Default::default(), counters: Default::default(), max_concurrent_split_uploads: indexer_config.max_concurrent_split_uploads, + merge_pipeline_handles: HashMap::new(), + merge_io_throughput_limiter_opt, cooperative_indexing_permits, event_broker, }) @@ -166,6 +188,21 @@ impl IndexingService { Ok(pipeline_handle.handle) } + async fn detach_merge_pipeline( + &mut self, + pipeline_id: &MergePipelineId, + ) -> Result, IndexingError> { + let pipeline_handle = self + .merge_pipeline_handles + .remove(pipeline_id) + .ok_or_else(|| { + let message = format!("could not find merge pipeline `{pipeline_id}`"); + IndexingError::Internal(message) + })?; + self.counters.num_running_merge_pipelines -= 1; + Ok(pipeline_handle.handle) + } + async fn observe_pipeline( &mut self, pipeline_uid: &PipelineUid, @@ -197,7 +234,14 @@ impl IndexingService { pipeline_uid, }; let index_config = index_metadata.into_index_config(); - self.spawn_pipeline_inner(ctx, pipeline_id.clone(), index_config, source_config, None) + self.spawn_pipeline_inner( + ctx, + pipeline_id.clone(), + index_config, + source_config, + None, + None, + ) .await?; Ok(pipeline_id) } @@ -208,6 +252,7 @@ impl IndexingService { indexing_pipeline_id: IndexingPipelineId, index_config: IndexConfig, source_config: SourceConfig, + immature_splits_opt: Option>, expected_params_fingerprint: Option, ) -> Result<(), IndexingError> { if self @@ -238,11 +283,42 @@ impl IndexingService { })?; let merge_policy = crate::merge_policy::merge_policy_from_settings(&index_config.indexing_settings); + let retention_policy = index_config.retention_policy_opt.clone(); let split_store = IndexingSplitStore::new(storage.clone(), self.split_cache.clone()); let doc_mapper = build_doc_mapper(&index_config.doc_mapping, &index_config.search_settings) .map_err(|error| IndexingError::Internal(error.to_string()))?; + let merge_planner_mailbox_opt = if let Some(merge_scheduler_service) = + self.merge_scheduler_service_opt.clone() + { + let merge_pipeline_id = indexing_pipeline_id.merge_pipeline_id(); + let merge_pipeline_params = MergePipelineParams { + pipeline_id: merge_pipeline_id, + doc_mapper: doc_mapper.clone(), + indexing_directory: indexing_directory.clone(), + metastore: self.metastore.clone(), + split_store: split_store.clone(), + merge_scheduler_service, + merge_policy: merge_policy.clone(), + retention_policy: retention_policy.clone(), + merge_io_throughput_limiter_opt: self.merge_io_throughput_limiter_opt.clone(), + max_concurrent_split_uploads: self.max_concurrent_split_uploads, + event_broker: self.event_broker.clone(), + }; + Some(self.get_or_create_merge_pipeline(merge_pipeline_params, immature_splits_opt, ctx)) + } else { + None + }; + + let max_concurrent_split_uploads_index = if self.merge_scheduler_service_opt.is_some() { + (self.max_concurrent_split_uploads / 2).max(1) + } else { + self.max_concurrent_split_uploads + }; + let max_concurrent_split_uploads_merge = + self.max_concurrent_split_uploads - max_concurrent_split_uploads_index; + let params_fingerprint = indexing_pipeline_params_fingerprint(&index_config, &source_config); if let Some(expected_params_fingerprint) = expected_params_fingerprint { @@ -271,10 +347,13 @@ impl IndexingService { indexing_directory, indexing_settings: index_config.indexing_settings.clone(), split_store, - max_concurrent_split_uploads: self.max_concurrent_split_uploads, + max_concurrent_split_uploads_index: self.max_concurrent_split_uploads, cooperative_indexing_permits: self.cooperative_indexing_permits.clone(), - // The merge policy is needed in the uploader for determining split maturity + // Merge-related parameters merge_policy, + retention_policy, + max_concurrent_split_uploads_merge, + merge_planner_mailbox_opt, // Source-related parameters source_config, @@ -341,6 +420,70 @@ impl IndexingService { Ok(indexes_metadata) } + /// Fetches the immature splits candidates for merge for all the indexing pipelines for which a + /// merge pipeline is not running. + async fn fetch_immature_splits_for_new_merge_pipelines( + &mut self, + indexing_pipeline_ids: &[IndexingPipelineId], + ctx: &ActorContext, + ) -> MetastoreResult>> { + if self.merge_scheduler_service_opt.is_none() { + return Ok(Default::default()); + } + let mut index_uids = Vec::new(); + + for indexing_pipeline_id in indexing_pipeline_ids { + let merge_pipeline_id = indexing_pipeline_id.merge_pipeline_id(); + + if !self.merge_pipeline_handles.contains_key(&merge_pipeline_id) { + index_uids.push(merge_pipeline_id.index_uid); + } + } + if index_uids.is_empty() { + return Ok(Default::default()); + } + index_uids.sort_unstable(); + index_uids.dedup(); + + let list_splits_query = ListSplitsQuery::try_from_index_uids(index_uids) + .expect("`index_uids` should not be empty") + .with_node_id(self.node_id.clone()) + .with_split_state(SplitState::Published) + .retain_immature(OffsetDateTime::now_utc()); + let list_splits_request = + ListSplitsRequest::try_from_list_splits_query(&list_splits_query)?; + + let mut immature_splits_stream = ctx + .protect_future(self.metastore.list_splits(list_splits_request)) + .await?; + + let mut per_merge_pipeline_immature_splits: HashMap> = + indexing_pipeline_ids + .iter() + .map(|indexing_pipeline_id| (indexing_pipeline_id.merge_pipeline_id(), Vec::new())) + .collect(); + + let mut num_immature_splits = 0usize; + + while let Some(list_splits_response) = immature_splits_stream.try_next().await? { + for split_metadata in list_splits_response.deserialize_splits_metadata().await? { + num_immature_splits += 1; + + let merge_pipeline_id = MergePipelineId { + node_id: self.node_id.clone(), + index_uid: split_metadata.index_uid.clone(), + source_id: split_metadata.source_id.clone(), + }; + per_merge_pipeline_immature_splits + .entry(merge_pipeline_id) + .or_default() + .push(split_metadata); + } + } + info!("fetched {num_immature_splits} splits candidates for merge"); + Ok(per_merge_pipeline_immature_splits) + } + async fn handle_supervise(&mut self) -> Result<(), ActorExitStatus> { self.indexing_pipelines .retain(|pipeline_uid, pipeline_handle| { @@ -368,6 +511,48 @@ impl IndexingService { } } }); + let merge_pipelines_to_retain: HashSet = self + .indexing_pipelines + .values() + .map(|pipeline_handle| pipeline_handle.indexing_pipeline_id.merge_pipeline_id()) + .collect(); + + let merge_pipelines_to_shutdown: Vec = self + .merge_pipeline_handles + .keys() + .filter(|running_merge_pipeline_id| { + !merge_pipelines_to_retain.contains(running_merge_pipeline_id) + }) + .cloned() + .collect(); + + for merge_pipeline_to_shutdown in merge_pipelines_to_shutdown { + if let Some((_, merge_pipeline_handle)) = self + .merge_pipeline_handles + .remove_entry(&merge_pipeline_to_shutdown) + { + // We gracefully shutdown the merge pipeline, so we can complete the in-flight + // merges. + info!( + index_uid=%merge_pipeline_to_shutdown.index_uid, + source_id=%merge_pipeline_to_shutdown.source_id, + "shutting down orphan merge pipeline" + ); + // The queue capacity of the merge pipeline is unbounded, so `.send_message(...)` + // should not block. + // We avoid using `.quit()` here because it waits for the actor to exit. + merge_pipeline_handle + .handle + .mailbox() + .send_message(FinishPendingMergesAndShutdownPipeline) + .await + .expect("merge pipeline mailbox should not be full"); + } + } + // Finally, we remove the completed or failed merge pipelines. + self.merge_pipeline_handles + .retain(|_, merge_pipeline_handle| merge_pipeline_handle.handle.state().is_running()); + self.counters.num_running_merge_pipelines = self.merge_pipeline_handles.len(); self.update_chitchat_running_plan().await; let pipeline_metrics: HashMap<&IndexingPipelineId, PipelineMetrics> = self @@ -385,6 +570,33 @@ impl IndexingService { Ok(()) } + fn get_or_create_merge_pipeline( + &mut self, + merge_pipeline_params: MergePipelineParams, + immature_splits_opt: Option>, + ctx: &ActorContext, + ) -> Mailbox { + if let Some(merge_pipeline_handle) = self + .merge_pipeline_handles + .get(&merge_pipeline_params.pipeline_id) + { + return merge_pipeline_handle.mailbox.clone(); + } + let merge_pipeline_id = merge_pipeline_params.pipeline_id.clone(); + let merge_pipeline = + MergePipeline::new(merge_pipeline_params, immature_splits_opt, ctx.spawn_ctx()); + let merge_planner_mailbox = merge_pipeline.merge_planner_mailbox().clone(); + let (_pipeline_mailbox, pipeline_handle) = ctx.spawn_actor().spawn(merge_pipeline); + let merge_pipeline_handle = MergePipelineHandle { + mailbox: merge_planner_mailbox.clone(), + handle: pipeline_handle, + }; + self.merge_pipeline_handles + .insert(merge_pipeline_id, merge_pipeline_handle); + self.counters.num_running_merge_pipelines += 1; + merge_planner_mailbox + } + /// For all Ingest V2 pipelines, assigns the set of shards they should be working on. /// This is done regardless of whether there has been a change in their shard list /// or not. @@ -491,6 +703,10 @@ impl IndexingService { .map(|index_metadata| (index_metadata.index_uid.clone(), index_metadata)) .collect(); + let mut per_merge_pipeline_immature_splits: HashMap> = + self.fetch_immature_splits_for_new_merge_pipelines(&pipelines_to_spawn_ids, ctx) + .await?; + let mut spawn_pipeline_failures: Vec = Vec::new(); for (task_to_spawn, id_to_spawn) in pipelines_to_spawn.iter().zip(pipelines_to_spawn_ids) { @@ -498,12 +714,17 @@ impl IndexingService { per_index_uid_indexes_metadata.get(task_to_spawn.index_uid()) { if let Some(source_config) = index_metadata.sources.get(&task_to_spawn.source_id) { + let merge_pipeline_id = id_to_spawn.merge_pipeline_id(); + let immature_splits_opt = + per_merge_pipeline_immature_splits.remove(&merge_pipeline_id); + if let Err(error) = self .spawn_pipeline_inner( ctx, id_to_spawn.clone(), index_metadata.index_config.clone(), source_config.clone(), + immature_splits_opt, Some(task_to_spawn.params_fingerprint), ) .await @@ -682,6 +903,19 @@ impl Handler for IndexingService { } } +#[async_trait] +impl Handler for IndexingService { + type Reply = Result, IndexingError>; + + async fn handle( + &mut self, + msg: DetachMergePipeline, + _ctx: &ActorContext, + ) -> Result { + Ok(self.detach_merge_pipeline(&msg.pipeline_id).await) + } +} + #[derive(Debug)] struct SuperviseLoop; @@ -785,16 +1019,17 @@ mod tests { }; use quickwit_ingest::{CreateQueueIfNotExistsRequest, init_ingest_api}; use quickwit_metastore::{ - AddSourceRequestExt, CreateIndexRequestExt, ListIndexesMetadataResponseExt, + AddSourceRequestExt, CreateIndexRequestExt, ListIndexesMetadataResponseExt, Split, metastore_for_test, }; use quickwit_proto::indexing::IndexingTask; use quickwit_proto::metastore::{ AddSourceRequest, CreateIndexRequest, DeleteIndexRequest, IndexMetadataResponse, - IndexesMetadataResponse, ListIndexesMetadataResponse, MockMetastoreService, - }; + IndexesMetadataResponse, ListIndexesMetadataResponse, ListSplitsResponse, + MockMetastoreService, }; use super::*; + use crate::actors::merge_pipeline::SUPERVISE_LOOP_INTERVAL; async fn spawn_indexing_service_for_test( data_dir_path: &Path, @@ -810,6 +1045,7 @@ mod tests { init_ingest_api(universe, &queues_dir_path, &IngestApiConfig::default()) .await .unwrap(); + let merge_scheduler_mailbox: Mailbox = universe.get_or_spawn_one(); let indexing_server = IndexingService::new( NodeId::from("test-node"), data_dir_path.to_path_buf(), @@ -818,6 +1054,7 @@ mod tests { cluster, metastore, Some(ingest_api_service), + Some(merge_scheduler_mailbox), IngesterPool::default(), storage_resolver.clone(), EventBroker::default(), @@ -918,8 +1155,15 @@ mod tests { .await .unwrap(); pipeline_handle.kill().await; + let _merge_pipeline = indexing_service + .ask_for_res(DetachMergePipeline { + pipeline_id: pipeline_id.merge_pipeline_id(), + }) + .await + .unwrap(); let observation = indexing_service_handle.process_pending_and_observe().await; assert_eq!(observation.num_running_pipelines, 0); + assert_eq!(observation.num_running_merge_pipelines, 0); universe.assert_quit().await; } @@ -1270,6 +1514,266 @@ mod tests { universe.assert_quit().await; } + #[tokio::test] + async fn test_indexing_service_shutdown_merge_pipeline_when_no_indexing_pipeline() { + quickwit_common::setup_logging_for_tests(); + let transport = ChannelTransport::default(); + let cluster = create_cluster_for_test(Vec::new(), &["indexer"], &transport, true) + .await + .unwrap(); + let metastore = metastore_for_test(); + + let index_id = append_random_suffix("test-indexing-service"); + let index_uri = format!("ram:///indexes/{index_id}"); + let index_config = IndexConfig::for_test(&index_id, &index_uri); + + let source_config = SourceConfig { + source_id: "test-indexing-service--source".to_string(), + num_pipelines: NonZeroUsize::MIN, + enabled: true, + source_params: SourceParams::void(), + transform_config: None, + input_format: SourceInputFormat::Json, + }; + let create_index_request = + CreateIndexRequest::try_from_index_config(&index_config).unwrap(); + let index_uid: IndexUid = metastore + .create_index(create_index_request) + .await + .unwrap() + .index_uid() + .clone(); + let add_source_request = + AddSourceRequest::try_from_source_config(index_uid.clone(), &source_config).unwrap(); + metastore.add_source(add_source_request).await.unwrap(); + + // Test `IndexingService::new`. + let temp_dir = tempfile::tempdir().unwrap(); + let data_dir_path = temp_dir.path().to_path_buf(); + let indexer_config = IndexerConfig::for_test().unwrap(); + let num_blocking_threads = 1; + let storage_resolver = StorageResolver::unconfigured(); + let universe = Universe::with_accelerated_time(); + let queues_dir_path = data_dir_path.join(QUEUES_DIR_NAME); + let ingest_api_service = + init_ingest_api(&universe, &queues_dir_path, &IngestApiConfig::default()) + .await + .unwrap(); + let merge_scheduler_service = universe.get_or_spawn_one(); + let indexing_server = IndexingService::new( + NodeId::from("test-node"), + data_dir_path, + indexer_config, + num_blocking_threads, + cluster.clone(), + metastore.clone(), + Some(ingest_api_service), + Some(merge_scheduler_service), + IngesterPool::default(), + storage_resolver.clone(), + EventBroker::default(), + Arc::new(IndexingSplitCache::no_caching()), + ) + .await + .unwrap(); + let (indexing_server_mailbox, indexing_server_handle) = + universe.spawn_builder().spawn(indexing_server); + let pipeline_id = indexing_server_mailbox + .ask_for_res(SpawnPipeline { + index_id: index_id.clone(), + source_config, + pipeline_uid: PipelineUid::default(), + }) + .await + .unwrap(); + let observation = indexing_server_handle.observe().await; + assert_eq!(observation.num_running_pipelines, 1); + assert_eq!(observation.num_failed_pipelines, 0); + assert_eq!(observation.num_successful_pipelines, 0); + assert_eq!(observation.num_running_merge_pipelines, 1); + + // Test `shutdown_pipeline` + let pipeline = indexing_server_mailbox + .ask_for_res(DetachIndexingPipeline { pipeline_id }) + .await + .unwrap(); + pipeline.quit().await; + + // Let the service cleanup the merge pipelines. + universe.sleep(*HEARTBEAT).await; + + let observation = indexing_server_handle.process_pending_and_observe().await; + assert_eq!(observation.num_running_pipelines, 0); + assert_eq!(observation.num_running_merge_pipelines, 0); + universe.sleep(SUPERVISE_LOOP_INTERVAL).await; + // Check that the merge pipeline is also shut down as they are no more indexing pipeilne on + // the index. + assert!(universe.get_one::().is_none()); + // It may or may not panic + universe.quit().await; + } + + #[tokio::test] + async fn test_indexing_service_no_merge_pipeline_when_no_merge_scheduler() { + quickwit_common::setup_logging_for_tests(); + let transport = ChannelTransport::default(); + let cluster = create_cluster_for_test(Vec::new(), &["indexer"], &transport, true) + .await + .unwrap(); + let metastore = metastore_for_test(); + + let index_id = append_random_suffix("test-indexing-service-no-merge"); + let index_uri = format!("ram:///indexes/{index_id}"); + let index_config = IndexConfig::for_test(&index_id, &index_uri); + + let source_config = SourceConfig { + source_id: "test-source".to_string(), + num_pipelines: NonZeroUsize::MIN, + enabled: true, + source_params: SourceParams::void(), + transform_config: None, + input_format: SourceInputFormat::Json, + }; + let create_index_request = + CreateIndexRequest::try_from_index_config(&index_config).unwrap(); + let index_uid: IndexUid = metastore + .create_index(create_index_request) + .await + .unwrap() + .index_uid() + .clone(); + let add_source_request = + AddSourceRequest::try_from_source_config(index_uid.clone(), &source_config).unwrap(); + metastore.add_source(add_source_request).await.unwrap(); + + let temp_dir = tempfile::tempdir().unwrap(); + let data_dir_path = temp_dir.path().to_path_buf(); + let indexer_config = IndexerConfig::for_test().unwrap(); + let num_blocking_threads = 1; + let storage_resolver = StorageResolver::unconfigured(); + let universe = Universe::with_accelerated_time(); + let queues_dir_path = data_dir_path.join(QUEUES_DIR_NAME); + let ingest_api_service = + init_ingest_api(&universe, &queues_dir_path, &IngestApiConfig::default()) + .await + .unwrap(); + let indexing_server = IndexingService::new( + NodeId::from("test-node"), + data_dir_path, + indexer_config, + num_blocking_threads, + cluster.clone(), + metastore.clone(), + Some(ingest_api_service), + None, // No merge scheduler — external merge service handles compaction. + IngesterPool::default(), + storage_resolver.clone(), + EventBroker::default(), + Arc::new(IndexingSplitCache::no_caching()), + ) + .await + .unwrap(); + let (indexing_server_mailbox, indexing_server_handle) = + universe.spawn_builder().spawn(indexing_server); + + indexing_server_mailbox + .ask_for_res(SpawnPipeline { + index_id: index_id.clone(), + source_config, + pipeline_uid: PipelineUid::default(), + }) + .await + .unwrap(); + + let observation = indexing_server_handle.observe().await; + assert_eq!(observation.num_running_pipelines, 1); + assert_eq!(observation.num_running_merge_pipelines, 0); + assert!(universe.get_one::().is_none()); + + universe.quit().await; + } + + #[tokio::test] + async fn test_indexing_service_spawns_merge_pipeline_with_merge_scheduler() { + quickwit_common::setup_logging_for_tests(); + let transport = ChannelTransport::default(); + let cluster = create_cluster_for_test(Vec::new(), &["indexer"], &transport, true) + .await + .unwrap(); + let metastore = metastore_for_test(); + + let index_id = append_random_suffix("test-indexing-service-with-merge"); + let index_uri = format!("ram:///indexes/{index_id}"); + let index_config = IndexConfig::for_test(&index_id, &index_uri); + + let source_config = SourceConfig { + source_id: "test-source".to_string(), + num_pipelines: NonZeroUsize::MIN, + enabled: true, + source_params: SourceParams::void(), + transform_config: None, + input_format: SourceInputFormat::Json, + }; + let create_index_request = + CreateIndexRequest::try_from_index_config(&index_config).unwrap(); + let index_uid: IndexUid = metastore + .create_index(create_index_request) + .await + .unwrap() + .index_uid() + .clone(); + let add_source_request = + AddSourceRequest::try_from_source_config(index_uid.clone(), &source_config).unwrap(); + metastore.add_source(add_source_request).await.unwrap(); + + let temp_dir = tempfile::tempdir().unwrap(); + let data_dir_path = temp_dir.path().to_path_buf(); + let indexer_config = IndexerConfig::for_test().unwrap(); + let num_blocking_threads = 1; + let storage_resolver = StorageResolver::unconfigured(); + let universe = Universe::with_accelerated_time(); + let queues_dir_path = data_dir_path.join(QUEUES_DIR_NAME); + let ingest_api_service = + init_ingest_api(&universe, &queues_dir_path, &IngestApiConfig::default()) + .await + .unwrap(); + let merge_scheduler_mailbox: Mailbox = universe.get_or_spawn_one(); + let indexing_server = IndexingService::new( + NodeId::from("test-node"), + data_dir_path, + indexer_config, + num_blocking_threads, + cluster.clone(), + metastore.clone(), + Some(ingest_api_service), + Some(merge_scheduler_mailbox), + IngesterPool::default(), + storage_resolver.clone(), + EventBroker::default(), + Arc::new(IndexingSplitCache::no_caching()), + ) + .await + .unwrap(); + let (indexing_server_mailbox, indexing_server_handle) = + universe.spawn_builder().spawn(indexing_server); + + indexing_server_mailbox + .ask_for_res(SpawnPipeline { + index_id: index_id.clone(), + source_config, + pipeline_uid: PipelineUid::default(), + }) + .await + .unwrap(); + + let observation = indexing_server_handle.observe().await; + assert_eq!(observation.num_running_pipelines, 1); + assert_eq!(observation.num_running_merge_pipelines, 1); + assert!(universe.get_one::().is_some()); + + universe.quit().await; + } + #[derive(Debug)] struct FreezePipeline; #[async_trait] @@ -1374,6 +1878,7 @@ mod tests { let observation = indexing_service_handle.observe().await; assert_eq!(observation.num_running_pipelines, 1); assert_eq!(observation.num_failed_pipelines, 0); + assert_eq!(observation.num_running_merge_pipelines, 1); // Might generate panics universe.quit().await; } @@ -1418,6 +1923,7 @@ mod tests { let indexer_config = IndexerConfig::for_test().unwrap(); let num_blocking_threads = 1; let storage_resolver = StorageResolver::unconfigured(); + let merge_scheduler_service: Mailbox = universe.get_or_spawn_one(); let mut indexing_server = IndexingService::new( NodeId::from("test-ingest-api-gc-node"), data_dir_path, @@ -1426,6 +1932,7 @@ mod tests { cluster.clone(), metastore.clone(), Some(ingest_api_service.clone()), + Some(merge_scheduler_service), IngesterPool::default(), storage_resolver.clone(), EventBroker::default(), @@ -1495,6 +2002,31 @@ mod tests { Ok(response) }); + mock_metastore + .expect_list_splits() + .withf(|request| { + let list_splits_query = request.deserialize_list_splits_query().unwrap(); + list_splits_query.index_uids.unwrap() == [("test-index-0", 0)] + }) + .return_once(|_request| Ok(ServiceStream::empty())); + mock_metastore + .expect_list_splits() + .withf(|request| { + let list_splits_query = request.deserialize_list_splits_query().unwrap(); + list_splits_query.index_uids.unwrap() == [("test-index-1", 0), ("test-index-2", 0)] + }) + .return_once(|_request| { + let splits = vec![Split { + split_metadata: SplitMetadata::for_test("test-split".to_string()), + split_state: SplitState::Published, + update_timestamp: 0, + publish_timestamp: Some(0), + }]; + let list_splits_response = ListSplitsResponse::try_from_splits(splits).unwrap(); + let response = ServiceStream::from(vec![Ok(list_splits_response)]); + Ok(response) + }); + let transport = ChannelTransport::default(); let cluster = create_cluster_for_test(Vec::new(), &["indexer"], &transport, true) .await diff --git a/quickwit/quickwit-indexing/src/lib.rs b/quickwit/quickwit-indexing/src/lib.rs index 379f038484f..cff5452b89f 100644 --- a/quickwit/quickwit-indexing/src/lib.rs +++ b/quickwit/quickwit-indexing/src/lib.rs @@ -26,6 +26,7 @@ use quickwit_proto::metastore::MetastoreServiceClient; use quickwit_storage::StorageResolver; use tracing::info; +use crate::actors::MergeSchedulerService; pub use crate::actors::{ FinishPendingMergesAndShutdownPipeline, IndexingError, IndexingPipeline, IndexingPipelineParams, IndexingService, PublisherType, Sequencer, SplitsUpdateMailbox, @@ -73,6 +74,7 @@ pub async fn start_indexing_service( ingester_pool: IngesterPool, storage_resolver: StorageResolver, event_broker: EventBroker, + merge_scheduler_mailbox: Option>, indexing_split_cache: Arc, ) -> anyhow::Result> { info!("starting indexer service"); @@ -85,6 +87,7 @@ pub async fn start_indexing_service( cluster, metastore.clone(), ingest_api_service_mailbox, + merge_scheduler_mailbox, ingester_pool, storage_resolver, event_broker, diff --git a/quickwit/quickwit-indexing/src/models/indexing_service_message.rs b/quickwit/quickwit-indexing/src/models/indexing_service_message.rs index 868f8029be7..67548b9c09d 100644 --- a/quickwit/quickwit-indexing/src/models/indexing_service_message.rs +++ b/quickwit/quickwit-indexing/src/models/indexing_service_message.rs @@ -13,7 +13,7 @@ // limitations under the License. use quickwit_config::SourceConfig; -use quickwit_proto::indexing::IndexingPipelineId; +use quickwit_proto::indexing::{IndexingPipelineId, MergePipelineId}; use quickwit_proto::types::{IndexId, PipelineUid}; #[derive(Clone, Debug)] @@ -31,6 +31,14 @@ pub struct DetachIndexingPipeline { pub pipeline_id: IndexingPipelineId, } +/// Detaches a merge pipeline from the indexing service. The pipeline is no longer managed by the +/// server. This is mostly useful for preventing the server killing an existing merge pipeline +/// if a indexing pipeline is detached. +#[derive(Debug)] +pub struct DetachMergePipeline { + pub pipeline_id: MergePipelineId, +} + #[derive(Debug)] pub struct ObservePipeline { pub pipeline_id: IndexingPipelineId, diff --git a/quickwit/quickwit-indexing/src/models/mod.rs b/quickwit/quickwit-indexing/src/models/mod.rs index 7f84bbab0af..fd3188e2104 100644 --- a/quickwit/quickwit-indexing/src/models/mod.rs +++ b/quickwit/quickwit-indexing/src/models/mod.rs @@ -34,7 +34,9 @@ pub use indexed_split::{ CommitTrigger, EmptySplit, IndexedSplit, IndexedSplitBatch, IndexedSplitBatchBuilder, IndexedSplitBuilder, }; -pub use indexing_service_message::{DetachIndexingPipeline, ObservePipeline, SpawnPipeline}; +pub use indexing_service_message::{ + DetachIndexingPipeline, DetachMergePipeline, ObservePipeline, SpawnPipeline, +}; pub use indexing_statistics::IndexingStatistics; pub use merge_planner_message::NewSplits; pub use merge_scratch::MergeScratch; diff --git a/quickwit/quickwit-indexing/src/test_utils.rs b/quickwit/quickwit-indexing/src/test_utils.rs index 62634d136cc..e6c824ce788 100644 --- a/quickwit/quickwit-indexing/src/test_utils.rs +++ b/quickwit/quickwit-indexing/src/test_utils.rs @@ -112,6 +112,7 @@ impl TestSandbox { let num_blocking_threads = 1; let storage = storage_resolver.resolve(&index_uri).await?; let universe = Universe::with_accelerated_time(); + let merge_scheduler_mailbox = universe.get_or_spawn_one(); let queues_dir_path = temp_dir.path().join(QUEUES_DIR_NAME); let ingest_api_service = init_ingest_api(&universe, &queues_dir_path, &IngestApiConfig::default()).await?; @@ -123,6 +124,7 @@ impl TestSandbox { cluster, metastore.clone(), Some(ingest_api_service), + Some(merge_scheduler_mailbox), IngesterPool::default(), storage_resolver.clone(), EventBroker::default(), diff --git a/quickwit/quickwit-janitor/src/janitor_service.rs b/quickwit/quickwit-janitor/src/janitor_service.rs index 712458ad805..1e843f57b9b 100644 --- a/quickwit/quickwit-janitor/src/janitor_service.rs +++ b/quickwit/quickwit-janitor/src/janitor_service.rs @@ -25,7 +25,7 @@ pub struct JanitorService { delete_task_service_handle: Option>, garbage_collector_handle: ActorHandle, retention_policy_executor_handle: ActorHandle, - compaction_planner_handle: ActorHandle, + compaction_planner_handle: Option>, } impl JanitorService { @@ -33,7 +33,7 @@ impl JanitorService { delete_task_service_handle: Option>, garbage_collector_handle: ActorHandle, retention_policy_executor_handle: ActorHandle, - compaction_planner_handle: ActorHandle, + compaction_planner_handle: Option>, ) -> Self { Self { delete_task_service_handle, @@ -50,10 +50,16 @@ impl JanitorService { } else { true }; + let compaction_planner_is_not_failure: bool = + if let Some(compaction_planner_handle) = &self.compaction_planner_handle { + compaction_planner_handle.state() != ActorState::Failure + } else { + true + }; delete_task_is_not_failure && self.garbage_collector_handle.state() != ActorState::Failure && self.retention_policy_executor_handle.state() != ActorState::Failure - && self.compaction_planner_handle.state() != ActorState::Failure + && compaction_planner_is_not_failure } } diff --git a/quickwit/quickwit-janitor/src/lib.rs b/quickwit/quickwit-janitor/src/lib.rs index 8bac8c46c21..7e6734d9016 100644 --- a/quickwit/quickwit-janitor/src/lib.rs +++ b/quickwit/quickwit-janitor/src/lib.rs @@ -49,7 +49,7 @@ pub async fn start_janitor_service( storage_resolver: StorageResolver, event_broker: EventBroker, run_delete_task_service: bool, - compaction_planner_handle: ActorHandle, + compaction_planner_handle: Option>, ) -> anyhow::Result> { info!("starting janitor service"); let garbage_collector = GarbageCollector::new(metastore.clone(), storage_resolver.clone()); diff --git a/quickwit/quickwit-serve/src/lib.rs b/quickwit/quickwit-serve/src/lib.rs index 1fe1b69069c..ede9d3aaf0c 100644 --- a/quickwit/quickwit-serve/src/lib.rs +++ b/quickwit/quickwit-serve/src/lib.rs @@ -79,7 +79,7 @@ use quickwit_config::{ClusterConfig, IngestApiConfig, NodeConfig}; use quickwit_control_plane::control_plane::{ControlPlane, ControlPlaneEventSubscriber}; use quickwit_control_plane::{IndexerNodeInfo, IndexerPool}; use quickwit_index_management::{IndexService as IndexManager, IndexServiceError}; -use quickwit_indexing::actors::IndexingService; +use quickwit_indexing::actors::{IndexingService, MergeSchedulerService}; use quickwit_indexing::models::ShardPositionsService; use quickwit_indexing::{IndexingSplitCache, start_indexing_service}; use quickwit_ingest::{ @@ -271,7 +271,8 @@ async fn balance_channel_for_service( BalanceChannel::from_stream(service_change_stream) } -/// Builds a `CompactionPlannerServiceClient` if the node runs the janitor or compactor. +/// Builds a `CompactionPlannerServiceClient` if standalone compactors are enabled +/// and the node runs the janitor or compactor. /// /// On janitor nodes, spawns a `CompactionPlanner` actor and builds the client from /// its mailbox. On compactor-only nodes, connects to a remote janitor via gRPC. @@ -287,6 +288,9 @@ async fn get_compaction_planner_client_if_needed( Option, Option>, )> { + if !node_config.indexer_config.enable_standalone_compactors { + return Ok((None, None)); + } let is_janitor = node_config.is_service_enabled(QuickwitService::Janitor); let is_compactor = node_config.is_service_enabled(QuickwitService::Compactor); if !is_janitor && !is_compactor { @@ -322,6 +326,16 @@ async fn get_compaction_planner_client_if_needed( )) } +fn spawn_merge_scheduler_service( + universe: &Universe, + node_config: &NodeConfig, +) -> Mailbox { + let (mailbox, _) = universe.spawn_builder().spawn(MergeSchedulerService::new( + node_config.indexer_config.merge_concurrency.get(), + )); + mailbox +} + async fn start_ingest_client_if_needed( node_config: &NodeConfig, universe: &Universe, @@ -610,9 +624,7 @@ pub async fn serve_quickwit( .await .context("failed to initialize compaction service client")?; - // Build the indexing split cache once and share it between the indexing - // service and the compactor supervisor (when both run on the same node). - // A zero quota in `IndexerConfig` produces a no-op cache. + // In the case where a compactor and indexer run on the same node (in a single node deployment, for example), they should share the split cache so that downloads/new splits end up in predictable locations. If there's only one service enabled, no harm, no foul. let indexing_split_cache_opt: Option> = if node_config .is_service_enabled(QuickwitService::Indexer) || node_config.is_service_enabled(QuickwitService::Compactor) @@ -628,6 +640,15 @@ pub async fn serve_quickwit( }; let indexing_service_opt = if node_config.is_service_enabled(QuickwitService::Indexer) { + // if standalone compactors is enabled, indexing pipelines don't perform any merges. + // if standalone compactors is disabled, indexing pipelines perform all merges as before. + let merge_scheduler_mailbox_opt = + if !node_config.indexer_config.enable_standalone_compactors { + Some(spawn_merge_scheduler_service(&universe, &node_config)) + } else { + None + }; + let split_cache = indexing_split_cache_opt .clone() .expect("indexing split cache must exist on indexer nodes"); @@ -640,6 +661,7 @@ pub async fn serve_quickwit( ingester_pool.clone(), storage_resolver.clone(), event_broker.clone(), + merge_scheduler_mailbox_opt, split_cache, ) .await @@ -712,6 +734,7 @@ pub async fn serve_quickwit( } } + // this is the search SplitCache, not to be confused with the IndexingSplitCache. let split_cache_opt: Option> = if let Some(split_cache_limits) = node_config.searcher_config.split_cache { let split_cache = SplitCache::with_root_path( @@ -790,8 +813,6 @@ pub async fn serve_quickwit( }; let janitor_service_opt = if node_config.is_service_enabled(QuickwitService::Janitor) { - let compaction_planner_handle = compaction_planner_handle_opt - .expect("compaction planner handle must exist on janitor nodes"); let janitor_service = start_janitor_service( &universe, &node_config, @@ -800,7 +821,7 @@ pub async fn serve_quickwit( storage_resolver.clone(), event_broker.clone(), !get_bool_from_env(DISABLE_DELETE_TASK_SERVICE_ENV_KEY, false), - compaction_planner_handle, + compaction_planner_handle_opt, ) .await .context("failed to start janitor service")?; @@ -809,7 +830,9 @@ pub async fn serve_quickwit( None }; - let compactor_supervisor_opt = if node_config.is_service_enabled(QuickwitService::Compactor) { + let compactor_supervisor_opt = if node_config.is_service_enabled(QuickwitService::Compactor) + && node_config.indexer_config.enable_standalone_compactors + { let compaction_dir = node_config.data_dir_path.join("compaction"); fs::create_dir_all(&compaction_dir)?; let compaction_root_directory = quickwit_common::temp_dir::Builder::default() @@ -1948,8 +1971,9 @@ mod tests { let universe = Universe::new(); let metastore = MetastoreServiceClient::from_mock(MockMetastoreService::new()); - // Janitor without compactor: planner client is returned (for gRPC registration). + // Janitor + indexer with standalone compactors enabled: planner client is returned. let mut node_config = NodeConfig::for_test(); + node_config.indexer_config.enable_standalone_compactors = true; node_config.enabled_services = HashSet::from([QuickwitService::Janitor, QuickwitService::Indexer]); let (client_opt, handle_opt) = @@ -1981,6 +2005,20 @@ mod tests { assert!(client_opt.is_none()); assert!(handle_opt.is_none()); + // Standalone compactors disabled: short-circuit returns (None, None) regardless of + // which services are enabled. + node_config.indexer_config.enable_standalone_compactors = false; + node_config.enabled_services = HashSet::from([ + QuickwitService::Janitor, + QuickwitService::Indexer, + ]); + let (client_opt, handle_opt) = + get_compaction_planner_client_if_needed(&node_config, &cluster, &universe, &metastore) + .await + .unwrap(); + assert!(client_opt.is_none()); + assert!(handle_opt.is_none()); + universe.assert_quit().await; } @@ -1994,6 +2032,7 @@ mod tests { let metastore = MetastoreServiceClient::from_mock(MockMetastoreService::new()); let mut node_config = NodeConfig::for_test(); + node_config.indexer_config.enable_standalone_compactors = true; node_config.enabled_services = HashSet::from([QuickwitService::Indexer, QuickwitService::Compactor]); let result = From 1d5baf59998d16dfd39798102d637b2de7609863 Mon Sep 17 00:00:00 2001 From: Nadav Gov-Ari Date: Wed, 20 May 2026 15:10:30 -0400 Subject: [PATCH 18/21] Comments from all PRs --- quickwit/quickwit-cli/src/tool.rs | 9 +- .../src/planner/compaction_planner.rs | 28 +++--- .../src/planner/compaction_state.rs | 22 ++--- .../quickwit-compaction/src/planner/mod.rs | 91 +++++++++++++++---- .../src/actors/merge_scheduler_service.rs | 4 +- .../quickwit-indexing/src/merge_policy/mod.rs | 65 ++----------- .../src/split_store/indexing_split_cache.rs | 49 +--------- quickwit/quickwit-search/src/leaf.rs | 4 +- quickwit/quickwit-search/src/service.rs | 8 +- quickwit/quickwit-serve/src/lib.rs | 31 +++---- quickwit/quickwit-storage/src/lib.rs | 2 +- .../src/object_storage/azure_blob_storage.rs | 7 +- .../src/object_storage/policy.rs | 27 +++--- .../src/split_cache/download_task.rs | 6 +- .../quickwit-storage/src/split_cache/mod.rs | 10 +- .../quickwit-storage/tests/azure_storage.rs | 6 +- quickwit/quickwit-storage/tests/s3_storage.rs | 6 +- 17 files changed, 171 insertions(+), 204 deletions(-) diff --git a/quickwit/quickwit-cli/src/tool.rs b/quickwit/quickwit-cli/src/tool.rs index 136be2812f5..304d0a89561 100644 --- a/quickwit/quickwit-cli/src/tool.rs +++ b/quickwit/quickwit-cli/src/tool.rs @@ -455,8 +455,13 @@ pub async fn local_ingest_docs_cli(args: LocalIngestDocsArgs) -> anyhow::Result< )?; let universe = Universe::new(); let merge_scheduler_service_mailbox = universe.get_or_spawn_one(); - let split_cache = - Arc::new(IndexingSplitCache::from_config(&indexer_config, &config.data_dir_path).await?); + let split_cache = if config.is_service_enabled(QuickwitService::Indexer) + && config.is_service_enabled(QuickwitService::Compactor) + { + Arc::new(IndexingSplitCache::from_config(&indexer_config, &config.data_dir_path).await?) + } else { + Arc::new(IndexingSplitCache::no_caching()) + }; let indexing_server = IndexingService::new( config.node_id.clone(), config.data_dir_path.clone(), diff --git a/quickwit/quickwit-compaction/src/planner/compaction_planner.rs b/quickwit/quickwit-compaction/src/planner/compaction_planner.rs index 1ea2d119711..5c9d24dbc32 100644 --- a/quickwit/quickwit-compaction/src/planner/compaction_planner.rs +++ b/quickwit/quickwit-compaction/src/planner/compaction_planner.rs @@ -19,7 +19,6 @@ use anyhow::Result; use async_trait::async_trait; use itertools::Itertools; use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler}; -use quickwit_indexing::merge_policy::MergeOperation; use quickwit_metastore::{ ListSplitsQuery, ListSplitsRequestExt, MetastoreServiceStreamSplitsExt, Split, SplitState, }; @@ -32,6 +31,7 @@ use time::OffsetDateTime; use tracing::{error, info}; use ulid::Ulid; +use super::PendingMerge; use super::compaction_state::CompactionState; use super::index_config_metastore::{IndexConfigMetastore, IndexEntry}; use crate::planner::metrics::COMPACTION_PLANNER_METRICS; @@ -194,18 +194,19 @@ impl CompactionPlanner { } fn assign_tasks(&mut self, node_id: &NodeId, available_slots: u32) -> Vec { - let pending = self.state.pop_pending(available_slots as usize); - let mut assignments = Vec::with_capacity(pending.len()); + let pending_merge_ops = self.state.pop_pending(available_slots as usize); + let mut assignments = Vec::with_capacity(pending_merge_ops.len()); - for operation in pending { + for merge_op in pending_merge_ops { let task_id = Ulid::new().to_string(); - let Some(index_entry) = self.index_config_metastore.get(&operation.index_uid) else { - error!(index_uid=%operation.index_uid, "index config not found for pending operation, skipping"); + let Some(index_entry) = self.index_config_metastore.get(&merge_op.index_uid) else { + error!(index_uid=%merge_op.index_uid, "index config not found for pending operation, skipping"); continue; }; - let assignment = build_task_assignment(&task_id, index_entry, &operation); + let assignment = build_task_assignment(&task_id, index_entry, &merge_op); - let split_ids = operation + let split_ids = merge_op + .operation .splits_as_slice() .iter() .map(|s| s.split_id().to_string()) @@ -236,11 +237,12 @@ fn emit_metastore_scan_metrics(new_splits: &[Split]) { fn build_task_assignment( task_id: &str, index_entry: &IndexEntry, - operation: &MergeOperation, + merge_op: &PendingMerge, ) -> MergeTaskAssignment { MergeTaskAssignment { task_id: task_id.to_string(), - splits_metadata_json: operation + splits_metadata_json: merge_op + .operation .splits_as_slice() .iter() .map(|s| { @@ -251,10 +253,10 @@ fn build_task_assignment( search_settings_json: index_entry.search_settings_json(), indexing_settings_json: index_entry.indexing_settings_json(), retention_policy_json: index_entry.retention_policy_json(), - index_uid: Some(operation.index_uid.clone()), - source_id: operation.source_id.clone(), + index_uid: Some(merge_op.index_uid.clone()), + source_id: merge_op.source_id.clone(), index_storage_uri: index_entry.index_storage_uri(), - merge_level: operation.merge_level() as u64, + merge_level: merge_op.operation.merge_level() as u64, } } diff --git a/quickwit/quickwit-compaction/src/planner/compaction_state.rs b/quickwit/quickwit-compaction/src/planner/compaction_state.rs index bd8cecc82e9..65abf323af1 100644 --- a/quickwit/quickwit-compaction/src/planner/compaction_state.rs +++ b/quickwit/quickwit-compaction/src/planner/compaction_state.rs @@ -18,13 +18,13 @@ use std::sync::Arc; use std::time::{Duration, Instant}; use itertools::Itertools; -use quickwit_indexing::merge_policy::{MergeOperation, MergePolicy}; +use quickwit_indexing::merge_policy::MergePolicy; use quickwit_metastore::SplitMetadata; use quickwit_proto::compaction::{CompactionFailure, CompactionInProgress, CompactionSuccess}; use quickwit_proto::types::{DocMappingUid, IndexUid, NodeId, SourceId, SplitId}; use tracing::{error, info, warn}; -use crate::planner::PendingOperations; +use crate::planner::{PendingMerge, PendingOperations}; use crate::planner::metrics::COMPACTION_PLANNER_METRICS; use crate::{TaskId, source_uid_metrics_label}; @@ -136,7 +136,7 @@ impl CompactionState { self.in_flight_split_ids .insert(split.split_id().to_string()); } - self.pending_operations.push(operation); + self.pending_operations.push(PendingMerge::new(operation)); } } if splits.is_empty() { @@ -212,14 +212,14 @@ impl CompactionState { } } - /// Pops up to `count` pending operations for assignment, highest-score first. - pub fn pop_pending(&mut self, count: usize) -> Vec { + /// Pops up to `count` pending merges for assignment, highest-priority first. + pub fn pop_pending(&mut self, count: usize) -> Vec { let count = count.min(self.pending_operations.len()); - let mut operations = Vec::with_capacity(count); + let mut pending = Vec::with_capacity(count); for _ in 0..count { - operations.push(self.pending_operations.pop().unwrap()); + pending.push(self.pending_operations.pop().unwrap()); } - operations + pending } /// Records that an operation has been assigned to a worker. @@ -340,8 +340,8 @@ mod tests { // Splits moved from needs_compaction to in_flight. assert!(!state.pending_operations.is_empty()); - for op in state.pending_operations.iter() { - for split in op.splits_as_slice() { + for pending in state.pending_operations.iter() { + for split in pending.operation.splits_as_slice() { assert!(!state.needs_compaction_split_ids.contains(split.split_id())); assert!(state.in_flight_split_ids.contains(split.split_id())); } @@ -423,7 +423,7 @@ mod tests { let pending = state.pop_pending(1); assert_eq!(pending.len(), 1); - let operation = &pending[0]; + let operation = &pending[0].operation; let split_ids: Vec = operation .splits_as_slice() .iter() diff --git a/quickwit/quickwit-compaction/src/planner/mod.rs b/quickwit/quickwit-compaction/src/planner/mod.rs index 18b408580d3..d071fdc6262 100644 --- a/quickwit/quickwit-compaction/src/planner/mod.rs +++ b/quickwit/quickwit-compaction/src/planner/mod.rs @@ -17,42 +17,95 @@ mod compaction_state; mod index_config_metastore; pub(crate) mod metrics; +use std::cmp::Ordering; use std::collections::BinaryHeap; pub use compaction_planner::CompactionPlanner; -use quickwit_indexing::merge_policy::MergeOperation; +use quickwit_indexing::merge_policy::{MergeOperation, compute_score}; +use quickwit_proto::types::{IndexUid, SourceId}; use crate::planner::metrics::COMPACTION_PLANNER_METRICS; use crate::source_uid_metrics_label; -/// Max-heap of merge operations awaiting assignment, ordered by -/// `MergeOperation`'s score-based `Ord`. The `pending_merge_operations` gauge -/// is maintained inline; push/pop are the only mutation paths so the metric +/// A `MergeOperation` waiting to be assigned, with `priority_score` used to order the heap. +#[derive(Debug)] +pub(super) struct PendingMerge { + pub(super) operation: MergeOperation, + pub(super) index_uid: IndexUid, + pub(super) source_id: SourceId, + pub(super) priority_score: u64, +} + +impl PendingMerge { + pub(super) fn new(operation: MergeOperation) -> Self { + let first_split = operation + .splits + .first() + .expect("merge operation must have splits"); + let index_uid = first_split.index_uid.clone(); + let source_id = first_split.source_id.clone(); + let priority_score = compute_score(&operation.splits); + Self { + operation, + index_uid, + source_id, + priority_score, + } + } +} + +impl PartialEq for PendingMerge { + fn eq(&self, other: &Self) -> bool { + self.operation.merge_split_id == other.operation.merge_split_id + } +} + +impl Eq for PendingMerge {} + +impl PartialOrd for PendingMerge { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for PendingMerge { + /// Higher `priority_score` sorts first (max-heap); ties broken by + /// `merge_split_id` so `Ord::cmp == Equal` agrees with `PartialEq`. + fn cmp(&self, other: &Self) -> Ordering { + self.priority_score + .cmp(&other.priority_score) + .then_with(|| self.operation.merge_split_id.cmp(&other.operation.merge_split_id)) + } +} + +/// Max-heap of pending merges awaiting assignment, ordered by +/// `PendingMerge`'s priority score. The `pending_merge_operations` gauge is +/// maintained inline; push/pop are the only mutation paths so the metric /// stays consistent with `len()`. #[derive(Debug)] -struct PendingOperations { - inner: BinaryHeap, +pub(super) struct PendingOperations { + inner: BinaryHeap, } impl PendingOperations { - fn new() -> Self { + pub(super) fn new() -> Self { Self { inner: BinaryHeap::new(), } } - fn push(&mut self, operation: MergeOperation) { - Self::adjust_gauge(&operation, 1); - self.inner.push(operation); + pub(super) fn push(&mut self, pending: PendingMerge) { + Self::adjust_gauge(&pending, 1); + self.inner.push(pending); } - fn pop(&mut self) -> Option { - let operation = self.inner.pop()?; - Self::adjust_gauge(&operation, -1); - Some(operation) + pub(super) fn pop(&mut self) -> Option { + let pending = self.inner.pop()?; + Self::adjust_gauge(&pending, -1); + Some(pending) } - fn len(&self) -> usize { + pub(super) fn len(&self) -> usize { self.inner.len() } @@ -62,13 +115,13 @@ impl PendingOperations { } #[cfg(test)] - fn iter(&self) -> impl Iterator { + fn iter(&self) -> impl Iterator { self.inner.iter() } - fn adjust_gauge(operation: &MergeOperation, delta: i64) { - let source_uid_label = source_uid_metrics_label(&operation.index_uid, &operation.source_id); - let merge_level = operation.merge_level().to_string(); + fn adjust_gauge(pending: &PendingMerge, delta: i64) { + let source_uid_label = source_uid_metrics_label(&pending.index_uid, &pending.source_id); + let merge_level = pending.operation.merge_level().to_string(); COMPACTION_PLANNER_METRICS .pending_merge_operations .with_label_values([source_uid_label.as_str(), merge_level.as_str()]) diff --git a/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs b/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs index fcdbad2c815..10afc58f429 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs @@ -25,7 +25,7 @@ use tokio::sync::{OwnedSemaphorePermit, Semaphore}; use tracing::error; use super::MergeSplitDownloader; -use crate::merge_policy::{MergeOperation, MergeTask}; +use crate::merge_policy::{MergeOperation, MergeTask, compute_score}; pub struct MergePermit { _semaphore_permit: Option, @@ -214,7 +214,7 @@ impl ScheduleMerge { merge_operation: TrackedObject, split_downloader_mailbox: Mailbox, ) -> ScheduleMerge { - let score = merge_operation.score; + let score = compute_score(&merge_operation.splits); ScheduleMerge { score, merge_operation, diff --git a/quickwit/quickwit-indexing/src/merge_policy/mod.rs b/quickwit/quickwit-indexing/src/merge_policy/mod.rs index f950c30635c..33fb1c08dcb 100644 --- a/quickwit/quickwit-indexing/src/merge_policy/mod.rs +++ b/quickwit/quickwit-indexing/src/merge_policy/mod.rs @@ -26,7 +26,7 @@ pub use nop_merge_policy::NopMergePolicy; use quickwit_config::IndexingSettings; use quickwit_config::merge_policy_config::MergePolicyConfig; use quickwit_metastore::{SplitMaturity, SplitMetadata}; -use quickwit_proto::types::{IndexUid, SourceId, SplitId}; +use quickwit_proto::types::SplitId; use serde::Serialize; pub(crate) use stable_log_merge_policy::StableLogMergePolicy; use tantivy::TrackedObject; @@ -83,35 +83,20 @@ pub struct MergeOperation { #[serde(skip_serializing)] pub merge_parent_span: Span, pub merge_split_id: SplitId, - pub index_uid: IndexUid, - pub source_id: SourceId, pub splits: Vec, pub operation_type: MergeOperationType, - /// Priority score, computed once at construction. Higher = run sooner. - /// `Ord`/`Eq` for `MergeOperation` are defined purely on this field so a - /// `BinaryHeap` is a max-heap by score. - pub score: u64, } impl MergeOperation { - /// All splits must belong to the same `(index_uid, source_id)` — a precondition - /// merge policies already satisfy because they partition before merging. pub fn new_merge_operation(splits: Vec) -> Self { - let first_split = splits.first().expect("merge operation must have splits"); - let index_uid = first_split.index_uid.clone(); - let source_id = first_split.source_id.clone(); let merge_split_id = new_split_id(); let split_ids = splits.iter().map(|split| split.split_id()).collect_vec(); let merge_parent_span = info_span!("merge", merge_split_id=%merge_split_id, split_ids=?split_ids, typ=%MergeOperationType::Merge); - let score = compute_score(&splits); Self { merge_parent_span, merge_split_id, - index_uid, - source_id, splits, operation_type: MergeOperationType::Merge, - score, } } @@ -123,20 +108,13 @@ impl MergeOperation { } pub fn new_delete_and_merge_operation(split: SplitMetadata) -> Self { - let index_uid = split.index_uid.clone(); - let source_id = split.source_id.clone(); let merge_split_id = new_split_id(); let merge_parent_span = info_span!("delete", merge_split_id=%merge_split_id, split_ids=?split.split_id(), typ=%MergeOperationType::DeleteAndMerge); - let splits = vec![split]; - let score = compute_score(&splits); Self { merge_parent_span, merge_split_id, - index_uid, - source_id, - splits, + splits: vec![split], operation_type: MergeOperationType::DeleteAndMerge, - score, } } @@ -157,7 +135,7 @@ impl MergeOperation { /// A good merge operation: /// - strongly reduces the number of splits /// - is light. -fn compute_score(splits: &[SplitMetadata]) -> u64 { +pub fn compute_score(splits: &[SplitMetadata]) -> u64 { let total_num_bytes: u64 = splits.iter().map(|split| split.footer_offsets.end).sum(); if total_num_bytes == 0 { // Silly corner case that should never happen. @@ -171,31 +149,6 @@ fn compute_score(splits: &[SplitMetadata]) -> u64 { .unwrap_or(1u64) } -impl PartialEq for MergeOperation { - fn eq(&self, other: &Self) -> bool { - self.merge_split_id == other.merge_split_id - } -} - -impl Eq for MergeOperation {} - -impl PartialOrd for MergeOperation { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for MergeOperation { - /// The way we reason about ordering merge operations is that a highest score should take - /// precedence over a lower score; by that logic, score already serves the Ord property; we - /// formalize that here. - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.score - .cmp(&other.score) - .then_with(|| self.merge_split_id.cmp(&other.merge_split_id)) - } -} - impl fmt::Debug for MergeOperation { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( @@ -308,25 +261,25 @@ pub mod tests { #[test] fn test_score() { - fn op(num_splits: usize, num_bytes_per_split: u64) -> MergeOperation { + fn score(num_splits: usize, num_bytes_per_split: u64) -> u64 { let splits: Vec = (0..num_splits) .map(|_| SplitMetadata { footer_offsets: 0..num_bytes_per_split, ..Default::default() }) .collect(); - MergeOperation::new_merge_operation(splits) + compute_score(&splits) } // Lighter merge (smaller total bytes) at the same split count scores higher. - assert!(op(10, 10_000_000).score < op(10, 999_999).score); + assert!(score(10, 10_000_000) < score(10, 999_999)); // More splits removed at the same total bytes scores higher. - assert!(op(10, 10_000_000).score > op(9, 10_000_000).score); + assert!(score(10, 10_000_000) > score(9, 10_000_000)); // Score is `(delta_splits << 48) / total_bytes` — equal ratios yield equal scores. assert_eq!( // 9 splits, 90M bytes → delta=8. - op(9, 10_000_000).score, + score(9, 10_000_000), // 5 splits, 45M bytes → delta=4 (same 8/90M ratio). - op(5, 10_000_000 * 9 / 10).score, + score(5, 10_000_000 * 9 / 10), ); } diff --git a/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs b/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs index 66ef9dd6005..47676669487 100644 --- a/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs +++ b/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs @@ -366,21 +366,12 @@ impl IndexingSplitCache { IndexingSplitCache { inner } } - /// Builds an [`IndexingSplitCache`] from an [`IndexerConfig`]. - /// - /// A zero quota for either dimension produces a [`IndexingSplitCache::no_caching`] - /// instance — useful when compaction runs on dedicated nodes and indexers no - /// longer benefit from caching freshly produced splits. Otherwise, opens the - /// cache rooted at `/indexer-split-cache/splits`. + /// Builds an [`IndexingSplitCache`] from an [`IndexerConfig`], rooted at + /// `/indexer-split-cache/splits`. pub async fn from_config( indexer_config: &IndexerConfig, data_dir_path: &Path, ) -> anyhow::Result { - if indexer_config.split_store_max_num_bytes.as_u64() == 0 - || indexer_config.split_store_max_num_splits == 0 - { - return Ok(IndexingSplitCache::no_caching()); - } let cache_path = get_cache_directory_path(data_dir_path); let quota = SplitStoreQuota::try_new( indexer_config.split_store_max_num_splits, @@ -563,40 +554,6 @@ mod tests { #[tokio::test] async fn test_from_config() { - // A zero quota in either dimension yields a no-caching cache that does - // not touch the filesystem; a positive quota opens (and creates) the - // cache directory at `/indexer-split-cache/splits`. - let zero_bytes = { - let mut config = IndexerConfig::for_test().unwrap(); - config.split_store_max_num_bytes = ByteSize(0); - config - }; - let zero_splits = { - let mut config = IndexerConfig::for_test().unwrap(); - config.split_store_max_num_splits = 0; - config - }; - let both_zero = { - let mut config = IndexerConfig::for_test().unwrap(); - config.split_store_max_num_bytes = ByteSize(0); - config.split_store_max_num_splits = 0; - config - }; - for config in [zero_bytes, zero_splits, both_zero] { - let data_dir = tempdir().unwrap(); - let _cache = IndexingSplitCache::from_config(&config, data_dir.path()) - .await - .unwrap(); - assert!( - !data_dir - .path() - .join("indexer-split-cache") - .try_exists() - .unwrap(), - "no-caching variant must not create the cache directory", - ); - } - let data_dir = tempdir().unwrap(); let config = IndexerConfig::for_test().unwrap(); let _cache = IndexingSplitCache::from_config(&config, data_dir.path()) @@ -605,7 +562,7 @@ mod tests { let cache_dir = data_dir.path().join("indexer-split-cache").join("splits"); assert!( cache_dir.is_dir(), - "positive quota must open (and create) the cache directory", + "from_config must open (and create) the cache directory", ); } diff --git a/quickwit/quickwit-search/src/leaf.rs b/quickwit/quickwit-search/src/leaf.rs index 37f99a36241..79a57ca8dbe 100644 --- a/quickwit/quickwit-search/src/leaf.rs +++ b/quickwit/quickwit-search/src/leaf.rs @@ -39,7 +39,7 @@ use quickwit_query::query_ast::{ }; use quickwit_query::tokenizers::TokenizerManager; use quickwit_storage::{ - BundleStorage, ByteRangeCache, MemorySizedCache, OwnedBytes, SplitCache, Storage, + BundleStorage, ByteRangeCache, MemorySizedCache, OwnedBytes, SearchSplitCache, Storage, StorageResolver, TimeoutAndRetryStorage, wrap_storage_with_cache, }; use tantivy::aggregation::AggContextParams; @@ -169,7 +169,7 @@ pub(crate) async fn open_split_bundle( // This is before the bundle storage: at this point, this storage is reading `.split` files. let index_storage_with_split_cache = if let Some(split_cache) = searcher_context.split_cache_opt.as_ref() { - SplitCache::wrap_storage(split_cache.clone(), index_storage.clone()) + SearchSplitCache::wrap_storage(split_cache.clone(), index_storage.clone()) } else { index_storage.clone() }; diff --git a/quickwit/quickwit-search/src/service.rs b/quickwit/quickwit-search/src/service.rs index 5e04e6a4dcf..9b43de9008e 100644 --- a/quickwit/quickwit-search/src/service.rs +++ b/quickwit/quickwit-search/src/service.rs @@ -29,7 +29,7 @@ use quickwit_proto::search::{ SearchResponse, SnippetRequest, }; use quickwit_storage::{ - MemorySizedCache, QuickwitCache, SplitCache, StorageCache, StorageResolver, + MemorySizedCache, QuickwitCache, SearchSplitCache, StorageCache, StorageResolver, }; use tantivy::aggregation::AggregationLimitsGuard; @@ -413,7 +413,7 @@ pub struct SearcherContext { /// Per-split and per-predicate cache. pub predicate_cache: Arc, /// Search split cache. `None` if no split cache is configured. - pub split_cache_opt: Option>, + pub split_cache_opt: Option>, /// List fields cache. Caches the list fields response for a given split. pub list_fields_cache: ListFieldsCache, /// The aggregation limits are passed to limit the memory usage. @@ -442,7 +442,7 @@ impl SearcherContext { /// Creates a new searcher context without a lambda invoker. pub fn new_without_invoker( searcher_config: SearcherConfig, - split_cache_opt: Option>, + split_cache_opt: Option>, ) -> Self { Self::new( searcher_config, @@ -454,7 +454,7 @@ impl SearcherContext { /// Creates a new searcher context, given a searcher config, and an optional `SplitCache`. pub fn new( searcher_config: SearcherConfig, - split_cache_opt: Option>, + split_cache_opt: Option>, lambda_invoker: Option, ) -> Self { let global_split_footer_cache = MemorySizedCache::from_config( diff --git a/quickwit/quickwit-serve/src/lib.rs b/quickwit/quickwit-serve/src/lib.rs index ede9d3aaf0c..c99778724a2 100644 --- a/quickwit/quickwit-serve/src/lib.rs +++ b/quickwit/quickwit-serve/src/lib.rs @@ -114,7 +114,7 @@ use quickwit_search::{ SearchJobPlacer, SearchService, SearchServiceClient, SearcherContext, SearcherPool, create_search_client_from_channel, start_searcher_service, }; -use quickwit_storage::{SplitCache, StorageResolver}; +use quickwit_storage::{SearchSplitCache, StorageResolver}; use tcp_listener::TcpListenerResolver; use tokio::sync::oneshot; use tonic::codec::CompressionEncoding; @@ -625,18 +625,18 @@ pub async fn serve_quickwit( .context("failed to initialize compaction service client")?; // In the case where a compactor and indexer run on the same node (in a single node deployment, for example), they should share the split cache so that downloads/new splits end up in predictable locations. If there's only one service enabled, no harm, no foul. - let indexing_split_cache_opt: Option> = if node_config + let indexing_split_cache: Arc = if node_config .is_service_enabled(QuickwitService::Indexer) - || node_config.is_service_enabled(QuickwitService::Compactor) + && node_config.is_service_enabled(QuickwitService::Compactor) { let cache = IndexingSplitCache::from_config( &node_config.indexer_config, &node_config.data_dir_path, ) .await?; - Some(Arc::new(cache)) + Arc::new(cache) } else { - None + Arc::new(IndexingSplitCache::no_caching()) }; let indexing_service_opt = if node_config.is_service_enabled(QuickwitService::Indexer) { @@ -649,9 +649,7 @@ pub async fn serve_quickwit( None }; - let split_cache = indexing_split_cache_opt - .clone() - .expect("indexing split cache must exist on indexer nodes"); + let split_cache = indexing_split_cache.clone(); let indexing_service = start_indexing_service( &universe, &node_config, @@ -734,16 +732,15 @@ pub async fn serve_quickwit( } } - // this is the search SplitCache, not to be confused with the IndexingSplitCache. - let split_cache_opt: Option> = + let search_split_cache_opt: Option> = if let Some(split_cache_limits) = node_config.searcher_config.split_cache { - let split_cache = SplitCache::with_root_path( + let search_split_cache = SearchSplitCache::with_root_path( node_config.data_dir_path.join("searcher-split-cache"), storage_resolver.clone(), split_cache_limits, ) .context("failed to load searcher split cache")?; - Some(split_cache) + Some(search_split_cache) } else { None }; @@ -759,7 +756,7 @@ pub async fn serve_quickwit( quickwit_lambda_client::try_get_or_deploy_invoker(lambda_config).await?; Arc::new(SearcherContext::new( node_config.searcher_config.clone(), - split_cache_opt, + search_split_cache_opt, Some(invoker), )) } @@ -771,13 +768,13 @@ pub async fn serve_quickwit( } else { Arc::new(SearcherContext::new_without_invoker( node_config.searcher_config.clone(), - split_cache_opt, + search_split_cache_opt, )) } } else { Arc::new(SearcherContext::new_without_invoker( node_config.searcher_config.clone(), - split_cache_opt, + search_split_cache_opt, )) }; @@ -841,9 +838,7 @@ pub async fn serve_quickwit( let compaction_client = compaction_service_client_opt .clone() .expect("compactor service enabled but no compaction client available"); - let split_cache = indexing_split_cache_opt - .clone() - .expect("indexing split cache must exist on compactor nodes"); + let split_cache = indexing_split_cache.clone(); let compactor_mailbox = start_compactor_service( &universe, cluster.self_node_id().into(), diff --git a/quickwit/quickwit-storage/src/lib.rs b/quickwit/quickwit-storage/src/lib.rs index f808ac83286..e23c37b05de 100644 --- a/quickwit/quickwit-storage/src/lib.rs +++ b/quickwit/quickwit-storage/src/lib.rs @@ -55,7 +55,7 @@ mod storage_resolver; mod versioned_component; use quickwit_common::uri::Uri; -pub use split_cache::SplitCache; +pub use split_cache::SearchSplitCache; pub use tantivy::directory::OwnedBytes; pub use versioned_component::VersionedComponent; diff --git a/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs b/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs index d4c9bd67d84..e1d026769e7 100644 --- a/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs +++ b/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs @@ -27,6 +27,7 @@ use azure_storage::prelude::*; use azure_storage_blobs::blob::operations::GetBlobResponse; use azure_storage_blobs::prelude::*; use bytes::Bytes; +use bytesize::ByteSize; use futures::io::Error as FutureError; use futures::stream::{StreamExt, TryStreamExt}; use md5::Digest; @@ -110,10 +111,10 @@ impl AzureBlobStorage { multipart_policy: MultiPartPolicy { // Azure max part size is 100MB // https://azure.microsoft.com/en-us/blog/general-availability-larger-block-blobs-in-azure-storage/ - target_part_num_bytes: 100_000_000, - multipart_threshold_num_bytes: 100_000_000, + target_part_num_bytes: ByteSize::mb(100), + multipart_threshold_num_bytes: ByteSize::mb(100), max_num_parts: 50_000, // Azure allows up to 50,000 blocks - max_object_num_bytes: 4_770_000_000_000u64, // Azure allows up to 4.77TB objects + max_object_num_bytes: ByteSize::b(4_770_000_000_000), // Azure allows up to 4.77TB objects max_concurrent_uploads: 100, }, retry_params: RetryParams::aggressive(), diff --git a/quickwit/quickwit-storage/src/object_storage/policy.rs b/quickwit/quickwit-storage/src/object_storage/policy.rs index 3369f948e06..bfb6baf3f12 100644 --- a/quickwit/quickwit-storage/src/object_storage/policy.rs +++ b/quickwit/quickwit-storage/src/object_storage/policy.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +use bytesize::ByteSize; + /// The multipart policy defines when and how multipart upload / download should happen. /// /// The right settings might be vendor specific, but if not available the default values @@ -20,13 +22,13 @@ pub struct MultiPartPolicy { /// Ideal part size. /// Since S3 has a constraint on the number of parts, it cannot always be /// respected. - pub target_part_num_bytes: usize, + pub target_part_num_bytes: ByteSize, /// Maximum number of parts allowed. pub max_num_parts: usize, /// Threshold above which multipart is triggered. - pub multipart_threshold_num_bytes: u64, + pub multipart_threshold_num_bytes: ByteSize, /// Maximum size allowed for an object. - pub max_object_num_bytes: u64, + pub max_object_num_bytes: ByteSize, /// Maximum number of parts to be upload concurrently. pub max_concurrent_uploads: usize, } @@ -39,22 +41,22 @@ impl MultiPartPolicy { /// will not be used. pub fn part_num_bytes(&self, len: u64) -> u64 { assert!( - len < self.max_object_num_bytes, + len < self.max_object_num_bytes.as_u64(), "This object storage does not support object of that size {}", - self.max_object_num_bytes + self.max_object_num_bytes.as_u64() ); assert!( self.max_num_parts > 0, "Misconfiguration: max_num_parts == 0 makes no sense." ); - if len < self.multipart_threshold_num_bytes || self.max_num_parts == 1 { + if len < self.multipart_threshold_num_bytes.as_u64() || self.max_num_parts == 1 { return len; } let max_num_parts = self.max_num_parts as u64; // complete part is the smallest integer such that // * >= len. let min_part_len = 1u64 + (len - 1u64) / max_num_parts; - (min_part_len).max(self.target_part_num_bytes as u64) + min_part_len.max(self.target_part_num_bytes.as_u64()) } /// Limits the number of parts that can be concurrently uploaded. @@ -67,13 +69,12 @@ impl MultiPartPolicy { impl Default for MultiPartPolicy { fn default() -> Self { MultiPartPolicy { - // We want to balance time spent waiting on uploads while still being careful about - // not hammering S3 Puts, which can be expensive. - // TODO: Dynamic multipart policy. - target_part_num_bytes: 512 * 1_024 * 1_024, // 512mib - multipart_threshold_num_bytes: 128 * 1_024 * 1_024, // 128 MiB + // S3 limits part size from 5M to 5GB, we want to end up with as few parts as possible + // since each part is charged as a put request. + target_part_num_bytes: ByteSize::gib(5), + multipart_threshold_num_bytes: ByteSize::mib(128), max_num_parts: 10_000, - max_object_num_bytes: 5_000_000_000_000u64, // S3 allows up to 5TB objects + max_object_num_bytes: ByteSize::tb(5), // S3 allows up to 5TB objects max_concurrent_uploads: 100, } } diff --git a/quickwit/quickwit-storage/src/split_cache/download_task.rs b/quickwit/quickwit-storage/src/split_cache/download_task.rs index 9af0390bfb5..50631836203 100644 --- a/quickwit/quickwit-storage/src/split_cache/download_task.rs +++ b/quickwit/quickwit-storage/src/split_cache/download_task.rs @@ -21,7 +21,7 @@ use quickwit_common::split_file; use tokio::sync::{OwnedSemaphorePermit, Semaphore}; use crate::split_cache::split_table::{CandidateSplit, DownloadOpportunity}; -use crate::{SplitCache, StorageResolver}; +use crate::{SearchSplitCache, StorageResolver}; async fn download_split( root_path: &Path, @@ -44,7 +44,7 @@ async fn download_split( async fn perform_eviction_and_download( download_opportunity: DownloadOpportunity, - split_cache: Arc, + split_cache: Arc, storage_resolver: StorageResolver, _download_permit: OwnedSemaphorePermit, ) -> anyhow::Result<()> { @@ -67,7 +67,7 @@ async fn perform_eviction_and_download( } pub(crate) fn spawn_download_task( - split_cache: Arc, + split_cache: Arc, storage_resolver: StorageResolver, num_concurrent_downloads: NonZeroU32, ) { diff --git a/quickwit/quickwit-storage/src/split_cache/mod.rs b/quickwit/quickwit-storage/src/split_cache/mod.rs index 7417471af58..c7231c83914 100644 --- a/quickwit/quickwit-storage/src/split_cache/mod.rs +++ b/quickwit/quickwit-storage/src/split_cache/mod.rs @@ -40,7 +40,7 @@ use crate::{Storage, StorageCache, wrap_storage_with_cache}; /// On disk Cache of splits for searchers. /// /// The search acts receives reports of splits. -pub struct SplitCache { +pub struct SearchSplitCache { // Directory containing the cached split files. // Split ids are universally unique, so we all put them in the same directory. root_path: PathBuf, @@ -50,14 +50,14 @@ pub struct SplitCache { fd_cache: FileDescriptorCache, } -impl SplitCache { +impl SearchSplitCache { /// Creates a new SplitCache and spawns the task that will continuously search for /// download opportunities. pub fn with_root_path( root_path: PathBuf, storage_resolver: crate::StorageResolver, limits: SplitCacheLimits, - ) -> io::Result> { + ) -> io::Result> { std::fs::create_dir_all(&root_path)?; let mut existing_splits: BTreeMap = Default::default(); for dir_entry_res in std::fs::read_dir(&root_path)? { @@ -103,7 +103,7 @@ impl SplitCache { delete_evicted_splits(&root_path, &splits_to_remove[..]); } let fd_cache = FileDescriptorCache::with_fd_cache_capacity(limits.max_file_descriptors); - let split_cache = Arc::new(SplitCache { + let split_cache = Arc::new(SearchSplitCache { root_path, split_table: Mutex::new(split_table), fd_cache, @@ -193,7 +193,7 @@ fn split_id_from_path(split_path: &Path) -> Option { } struct SplitCacheBackingStorage { - split_cache: Arc, + split_cache: Arc, storage_root_uri: Uri, } diff --git a/quickwit/quickwit-storage/tests/azure_storage.rs b/quickwit/quickwit-storage/tests/azure_storage.rs index c5d4937220a..1619596a822 100644 --- a/quickwit/quickwit-storage/tests/azure_storage.rs +++ b/quickwit/quickwit-storage/tests/azure_storage.rs @@ -45,10 +45,10 @@ async fn azure_storage_test_suite() -> anyhow::Result<()> { object_storage.set_policy(MultiPartPolicy { // On azure, block size is limited between 64KB and 100MB. - target_part_num_bytes: 5 * 1_024 * 1_024, // 5MiB + target_part_num_bytes: bytesize::ByteSize::mib(5), max_num_parts: 10_000, - multipart_threshold_num_bytes: 10_000_000, - max_object_num_bytes: 5_000_000_000_000, + multipart_threshold_num_bytes: bytesize::ByteSize::mb(10), + max_object_num_bytes: bytesize::ByteSize::tb(5), max_concurrent_uploads: 100, }); quickwit_storage::storage_test_multi_part_upload(&mut object_storage) diff --git a/quickwit/quickwit-storage/tests/s3_storage.rs b/quickwit/quickwit-storage/tests/s3_storage.rs index 6dc1bfacaab..08fe1953b7a 100644 --- a/quickwit/quickwit-storage/tests/s3_storage.rs +++ b/quickwit/quickwit-storage/tests/s3_storage.rs @@ -72,10 +72,10 @@ pub mod s3_storage_test_suite { .unwrap(); object_storage.set_policy(MultiPartPolicy { - target_part_num_bytes: 5 * 1_024 * 1_024, //< the minimum on S3 is 5MB. + target_part_num_bytes: bytesize::ByteSize::mib(5), //< the minimum on S3 is 5MB. max_num_parts: 10_000, - multipart_threshold_num_bytes: 10_000_000, - max_object_num_bytes: 5_000_000_000_000, + multipart_threshold_num_bytes: bytesize::ByteSize::mb(10), + max_object_num_bytes: bytesize::ByteSize::tb(5), max_concurrent_uploads: 100, }); From 62176e62ea5ce910167a86efde40740f616f0992 Mon Sep 17 00:00:00 2001 From: Nadav Gov-Ari Date: Thu, 21 May 2026 10:19:59 -0400 Subject: [PATCH 19/21] Optionally disable metrics merge pipelines like logs --- .../src/actors/indexing_service.rs | 15 +++++++++++---- .../parquet_pipeline/indexing_service_impl.rs | 6 ++++-- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/quickwit/quickwit-indexing/src/actors/indexing_service.rs b/quickwit/quickwit-indexing/src/actors/indexing_service.rs index e9a1fa348b3..16922b0acef 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_service.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_service.rs @@ -680,6 +680,10 @@ impl IndexingService { /// Returns the Parquet merge planner mailbox for the given index, creating /// a new ParquetMergePipeline if one isn't already running. /// + /// Returns `Ok(None)` when there is no `merge_scheduler_service_opt` on the + /// service — mirrors the log pipeline path, which does not spawn a merge + /// pipeline in that case. + /// /// Keyed by IndexUid (not MergePipelineId) because Parquet merge pipelines /// are shared across all sources for the same index — unlike Tantivy merge /// pipelines which are per-source. @@ -692,9 +696,12 @@ impl IndexingService { indexing_directory: quickwit_common::temp_dir::TempDirectory, immature_splits_opt: Option>, ctx: &ActorContext, - ) -> Result, IndexingError> { + ) -> Result>, IndexingError> { + let Some(merge_scheduler_service) = self.merge_scheduler_service_opt.clone() else { + return Ok(None); + }; if let Some(handle) = self.parquet_merge_pipeline_handles.get(&index_uid) { - return Ok(handle.mailbox.clone()); + return Ok(Some(handle.mailbox.clone())); } // Convert the config-crate merge policy into the engine-crate type. @@ -722,7 +729,7 @@ impl IndexingService { metastore: self.metastore.clone(), storage, merge_policy, - merge_scheduler_service: self.merge_scheduler_service.clone(), + merge_scheduler_service, max_concurrent_split_uploads: self.max_concurrent_split_uploads, event_broker: self.event_broker.clone(), writer_config, @@ -741,7 +748,7 @@ impl IndexingService { }; self.parquet_merge_pipeline_handles .insert(index_uid, handle); - Ok(merge_planner_mailbox) + Ok(Some(merge_planner_mailbox)) } /// For all Ingest V2 pipelines, assigns the set of shards they should be working on. diff --git a/quickwit/quickwit-indexing/src/actors/parquet_pipeline/indexing_service_impl.rs b/quickwit/quickwit-indexing/src/actors/parquet_pipeline/indexing_service_impl.rs index e39a7e4989f..9d5b82b9a44 100644 --- a/quickwit/quickwit-indexing/src/actors/parquet_pipeline/indexing_service_impl.rs +++ b/quickwit/quickwit-indexing/src/actors/parquet_pipeline/indexing_service_impl.rs @@ -70,7 +70,9 @@ impl IndexingService { // Spawn the Parquet merge pipeline (or reuse an existing one for this // index). The planner mailbox is wired into the MetricsPipeline's // Publisher so newly ingested splits are fed back for merging. - let merge_planner_mailbox = self.get_or_create_parquet_merge_pipeline( + // Returns `None` when there is no local merge scheduler — the metrics + // pipeline then runs without local merging (mirrors the log path). + let merge_planner_mailbox_opt = self.get_or_create_parquet_merge_pipeline( indexing_pipeline_id.index_uid.clone(), &index_config, storage.clone(), @@ -97,7 +99,7 @@ impl IndexingService { use_sketch_processors, partition_key, max_num_partitions: index_config.doc_mapping.max_num_partitions, - parquet_merge_planner_mailbox_opt: Some(merge_planner_mailbox), + parquet_merge_planner_mailbox_opt: merge_planner_mailbox_opt, }; let pipeline = MetricsPipeline::new(pipeline_params); let (mailbox, handle) = ctx.spawn_actor().spawn(pipeline); From 82cb6feecd81a2ae93b3111019c62332b1356e8f Mon Sep 17 00:00:00 2001 From: Nadav Gov-Ari Date: Thu, 21 May 2026 10:27:35 -0400 Subject: [PATCH 20/21] lints --- quickwit/Cargo.lock | 1 - quickwit/quickwit-cli/src/main.rs | 4 +- quickwit/quickwit-cli/src/tool.rs | 23 ++---- quickwit/quickwit-compaction/Cargo.toml | 1 - .../src/compaction_pipeline.rs | 5 +- .../src/compactor_supervisor.rs | 3 +- quickwit/quickwit-compaction/src/metrics.rs | 10 ++- .../src/planner/compaction_planner.rs | 3 +- .../src/planner/compaction_state.rs | 7 +- .../src/planner/index_config_metastore.rs | 3 +- .../src/planner/metrics.rs | 9 +- .../quickwit-compaction/src/planner/mod.rs | 9 +- .../quickwit-config/src/node_config/mod.rs | 2 +- .../src/node_config/serialize.rs | 4 +- .../src/actors/indexing_service.rs | 82 ++++++++++--------- quickwit/quickwit-metastore/src/lib.rs | 2 +- quickwit/quickwit-serve/src/lib.rs | 12 +-- .../src/object_storage/azure_blob_storage.rs | 11 ++- 18 files changed, 91 insertions(+), 100 deletions(-) diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index 5fee77527ec..5902c1024be 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -8558,7 +8558,6 @@ dependencies = [ "anyhow", "async-trait", "itertools 0.14.0", - "once_cell", "quickwit-actors", "quickwit-common", "quickwit-config", diff --git a/quickwit/quickwit-cli/src/main.rs b/quickwit/quickwit-cli/src/main.rs index 37cb72c1486..9f8662ccf7e 100644 --- a/quickwit/quickwit-cli/src/main.rs +++ b/quickwit/quickwit-cli/src/main.rs @@ -167,8 +167,8 @@ mod tests { }; use quickwit_cli::split::{DescribeSplitArgs, SplitCliCommand}; use quickwit_cli::tool::{ - ExtractSplitArgs, GarbageCollectIndexArgs, LocalIngestDocsArgs, LocalSearchArgs, - ToolCliCommand, MergeArgs + ExtractSplitArgs, GarbageCollectIndexArgs, LocalIngestDocsArgs, LocalSearchArgs, MergeArgs, + ToolCliCommand, }; use quickwit_common::uri::Uri; use quickwit_config::SourceInputFormat; diff --git a/quickwit/quickwit-cli/src/tool.rs b/quickwit/quickwit-cli/src/tool.rs index 5e8d72f1157..88bb53887bf 100644 --- a/quickwit/quickwit-cli/src/tool.rs +++ b/quickwit/quickwit-cli/src/tool.rs @@ -12,11 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::info; -use quickwit_config::VecSourceParams; -use quickwit_actors::Mailbox; -use quickwit_actors::ActorExitStatus; -use quickwit_proto::types::SourceId; use std::collections::{HashSet, VecDeque}; use std::io::{IsTerminal, Stdout, Write, stdout}; use std::num::NonZeroUsize; @@ -30,7 +25,7 @@ use anyhow::{Context, bail}; use clap::{ArgMatches, Command, arg}; use colored::{ColoredString, Colorize}; use humantime::format_duration; -use quickwit_actors::{ActorHandle, Universe}; +use quickwit_actors::{ActorExitStatus, ActorHandle, Mailbox, Universe}; use quickwit_cluster::{ ChannelTransport, Cluster, ClusterMember, FailureDetectorConfig, make_client_grpc_config, }; @@ -40,22 +35,21 @@ use quickwit_common::uri::Uri; use quickwit_config::service::QuickwitService; use quickwit_config::{ CLI_SOURCE_ID, IndexerConfig, NodeConfig, SourceConfig, SourceInputFormat, SourceParams, - TransformConfig, + TransformConfig, VecSourceParams, }; use quickwit_index_management::{IndexService, clear_cache_directory}; -use quickwit_indexing::BoxedPipelineHandle; use quickwit_indexing::actors::{IndexingService, MergePipeline, MergeSchedulerService}; use quickwit_indexing::models::{ DetachIndexingPipeline, DetachMergePipeline, IndexingStatistics, SpawnPipeline, }; -use quickwit_indexing::IndexingSplitCache; +use quickwit_indexing::{BoxedPipelineHandle, IndexingSplitCache}; use quickwit_ingest::IngesterPool; use quickwit_metastore::IndexMetadataResponseExt; use quickwit_proto::indexing::CpuCapacity; use quickwit_proto::ingest::ingester::IngesterStatus; use quickwit_proto::metastore::{IndexMetadataRequest, MetastoreService, MetastoreServiceClient}; use quickwit_proto::search::{CountHits, SearchResponse}; -use quickwit_proto::types::{IndexId, PipelineUid, SplitId}; +use quickwit_proto::types::{IndexId, PipelineUid, SourceId, SplitId}; use quickwit_search::{SearchResponseRest, single_node_search}; use quickwit_serve::{ BodyFormat, SearchRequestQueryString, SortBy, search_request_from_api_request, @@ -66,8 +60,8 @@ use tracing::debug; use crate::checklist::{GREEN_COLOR, RED_COLOR}; use crate::{ - THROUGHPUT_WINDOW_SIZE, config_cli_arg, get_resolvers, load_node_config, run_index_checklist, - start_actor_runtimes, + THROUGHPUT_WINDOW_SIZE, config_cli_arg, get_resolvers, info, load_node_config, + run_index_checklist, start_actor_runtimes, }; pub fn build_tool_command() -> Command { @@ -455,7 +449,8 @@ pub async fn local_ingest_docs_cli(args: LocalIngestDocsArgs) -> anyhow::Result< )?; let universe = Universe::new(); let merge_scheduler_service_mailbox = universe.get_or_spawn_one(); - let split_cache = Arc::new(IndexingSplitCache::from_config(&indexer_config, &config.data_dir_path).await?); + let split_cache = + Arc::new(IndexingSplitCache::from_config(&indexer_config, &config.data_dir_path).await?); let indexing_server = IndexingService::new( config.node_id.clone(), config.data_dir_path.clone(), @@ -610,7 +605,7 @@ pub async fn merge_cli(args: MergeArgs) -> anyhow::Result<()> { EventBroker::default(), Arc::new(IndexingSplitCache::no_caching()), ) - .await?; + .await?; let (indexing_service_mailbox, indexing_service_handle) = universe.spawn_builder().spawn(indexing_server); let pipeline_id = indexing_service_mailbox diff --git a/quickwit/quickwit-compaction/Cargo.toml b/quickwit/quickwit-compaction/Cargo.toml index a1e6b5e348b..caf7fde7e1b 100644 --- a/quickwit/quickwit-compaction/Cargo.toml +++ b/quickwit/quickwit-compaction/Cargo.toml @@ -28,7 +28,6 @@ tracing = { workspace = true } tokio = { workspace = true } ulid = { workspace = true } itertools = "0.14.0" -once_cell = "1.21.4" [dev-dependencies] quickwit-actors = { workspace = true, features = ["testsuite"] } diff --git a/quickwit/quickwit-compaction/src/compaction_pipeline.rs b/quickwit/quickwit-compaction/src/compaction_pipeline.rs index 6c66321c6bb..9fc2b261967 100644 --- a/quickwit/quickwit-compaction/src/compaction_pipeline.rs +++ b/quickwit/quickwit-compaction/src/compaction_pipeline.rs @@ -15,7 +15,7 @@ use std::sync::Arc; use std::time::Instant; -use quickwit_actors::{ActorHandle, HEARTBEAT, Health, SpawnContext, Supervisable, QueueCapacity}; +use quickwit_actors::{ActorHandle, HEARTBEAT, Health, QueueCapacity, SpawnContext, Supervisable}; use quickwit_common::KillSwitch; use quickwit_common::io::{IoControls, Limiter}; use quickwit_common::pubsub::EventBroker; @@ -27,14 +27,13 @@ use quickwit_indexing::actors::{ }; use quickwit_indexing::merge_policy::MergeOperation; use quickwit_indexing::{IndexingSplitStore, SplitsUpdateMailbox}; +use quickwit_metrics::{counter, gauge, histogram, label_values}; use quickwit_proto::indexing::MergePipelineId; use quickwit_proto::metastore::MetastoreServiceClient; use quickwit_proto::types::{IndexUid, SourceId, SplitId}; use tokio::sync::Semaphore; use tracing::{error, info}; -use quickwit_metrics::{counter, gauge, histogram, label_values}; - use crate::metrics::{ COMPACTION_DURATION, COMPACTIONS_FAILED, COMPACTIONS_IN_PROGRESS, COMPACTIONS_SUCCEEDED, SOURCE_UID_MERGE_LEVEL, diff --git a/quickwit/quickwit-compaction/src/compactor_supervisor.rs b/quickwit/quickwit-compaction/src/compactor_supervisor.rs index d923d5eab5e..128770b02f5 100644 --- a/quickwit/quickwit-compaction/src/compactor_supervisor.rs +++ b/quickwit/quickwit-compaction/src/compactor_supervisor.rs @@ -29,6 +29,7 @@ use quickwit_doc_mapper::DocMapping; use quickwit_indexing::merge_policy::{MergeOperation, merge_policy_from_settings}; use quickwit_indexing::{IndexingSplitCache, IndexingSplitStore}; use quickwit_metastore::SplitMetadata; +use quickwit_metrics::{gauge, label_values}; use quickwit_proto::compaction::{ CompactionFailure, CompactionInProgress, CompactionPlannerService, CompactionPlannerServiceClient, CompactionSuccess, MergeTaskAssignment, ReportStatusRequest, @@ -40,8 +41,6 @@ use quickwit_storage::StorageResolver; use tokio::sync::Semaphore; use tracing::{error, info}; -use quickwit_metrics::{gauge, label_values}; - use crate::compaction_pipeline::{CompactionPipeline, PipelineStatus, PipelineStatusUpdate}; use crate::metrics::{AVAILABLE_SLOTS, COMPACTIONS_IN_PROGRESS, SOURCE_UID_MERGE_LEVEL}; use crate::source_uid_metrics_label; diff --git a/quickwit/quickwit-compaction/src/metrics.rs b/quickwit/quickwit-compaction/src/metrics.rs index 34e24240389..64243204e91 100644 --- a/quickwit/quickwit-compaction/src/metrics.rs +++ b/quickwit/quickwit-compaction/src/metrics.rs @@ -13,11 +13,13 @@ // limitations under the License. use quickwit_common::metrics::exponential_buckets; -use quickwit_metrics::{LabelNames, LazyCounter, LazyGauge, LazyHistogram, lazy_counter, lazy_gauge, lazy_histogram, label_names}; +use quickwit_metrics::{ + LabelNames, LazyCounter, LazyGauge, LazyHistogram, label_names, lazy_counter, lazy_gauge, + lazy_histogram, +}; /// Shared label template for the per-compaction metrics keyed by source and merge level. -pub(crate) const SOURCE_UID_MERGE_LEVEL: LabelNames<2> = - label_names!("source_uid", "merge_level"); +pub(crate) const SOURCE_UID_MERGE_LEVEL: LabelNames<2> = label_names!("source_uid", "merge_level"); pub(crate) static COMPACTIONS_IN_PROGRESS: LazyGauge = lazy_gauge!( name: "compactions_in_progress", @@ -48,4 +50,4 @@ pub(crate) static COMPACTION_DURATION: LazyHistogram = lazy_histogram!( description: "duration of compaction merge operations in seconds", subsystem: "compactor", buckets: exponential_buckets(0.5, 2.0, 14).expect("compaction duration buckets should be valid"), -); \ No newline at end of file +); diff --git a/quickwit/quickwit-compaction/src/planner/compaction_planner.rs b/quickwit/quickwit-compaction/src/planner/compaction_planner.rs index 3ec1d201019..77546099715 100644 --- a/quickwit/quickwit-compaction/src/planner/compaction_planner.rs +++ b/quickwit/quickwit-compaction/src/planner/compaction_planner.rs @@ -22,6 +22,7 @@ use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler}; use quickwit_metastore::{ ListSplitsQuery, ListSplitsRequestExt, MetastoreServiceStreamSplitsExt, Split, SplitState, }; +use quickwit_metrics::{counter, label_values}; use quickwit_proto::compaction::{ CompactionResult, MergeTaskAssignment, ReportStatusRequest, ReportStatusResponse, }; @@ -34,8 +35,6 @@ use ulid::Ulid; use super::PendingMerge; use super::compaction_state::CompactionState; use super::index_config_metastore::{IndexConfigMetastore, IndexEntry}; -use quickwit_metrics::{counter, label_values}; - use crate::planner::metrics::{METASTORE_ERRORS, NEW_SPLITS_SCANNED, OPERATION, SOURCE_UID}; /// Cap on splits fetched per tick. Every tick, the planner re-scans the immature published set, diff --git a/quickwit/quickwit-compaction/src/planner/compaction_state.rs b/quickwit/quickwit-compaction/src/planner/compaction_state.rs index 8f4b1b71768..59113070da8 100644 --- a/quickwit/quickwit-compaction/src/planner/compaction_state.rs +++ b/quickwit/quickwit-compaction/src/planner/compaction_state.rs @@ -20,16 +20,13 @@ use std::time::{Duration, Instant}; use itertools::Itertools; use quickwit_indexing::merge_policy::MergePolicy; use quickwit_metastore::SplitMetadata; +use quickwit_metrics::{gauge, label_values}; use quickwit_proto::compaction::{CompactionFailure, CompactionInProgress, CompactionSuccess}; use quickwit_proto::types::{DocMappingUid, IndexUid, NodeId, SourceId, SplitId}; use tracing::{error, info, warn}; +use crate::planner::metrics::{SOURCE_UID, SPLITS_NEEDING_COMPACTION, TIMED_OUT_OPERATIONS}; use crate::planner::{PendingMerge, PendingOperations}; -use quickwit_metrics::{gauge, label_values}; - -use crate::planner::metrics::{ - SOURCE_UID, SPLITS_NEEDING_COMPACTION, TIMED_OUT_OPERATIONS, -}; use crate::{TaskId, source_uid_metrics_label}; const HEARTBEAT_TIMEOUT: Duration = Duration::from_secs(60); diff --git a/quickwit/quickwit-compaction/src/planner/index_config_metastore.rs b/quickwit/quickwit-compaction/src/planner/index_config_metastore.rs index 285eede71d4..2c409daea7e 100644 --- a/quickwit/quickwit-compaction/src/planner/index_config_metastore.rs +++ b/quickwit/quickwit-compaction/src/planner/index_config_metastore.rs @@ -19,12 +19,11 @@ use quickwit_config::{IndexConfig, build_doc_mapper}; use quickwit_doc_mapper::DocMapper; use quickwit_indexing::merge_policy::{MergePolicy, merge_policy_from_settings}; use quickwit_metastore::{IndexMetadataResponseExt, SplitMaturity, SplitMetadata}; +use quickwit_metrics::{counter, label_values}; use quickwit_proto::metastore::{IndexMetadataRequest, MetastoreService, MetastoreServiceClient}; use quickwit_proto::types::{DocMappingUid, IndexUid}; use tracing::error; -use quickwit_metrics::{counter, label_values}; - use crate::planner::metrics::{METASTORE_ERRORS, OPERATION}; /// Everything the planner needs to know about a single index. diff --git a/quickwit/quickwit-compaction/src/planner/metrics.rs b/quickwit/quickwit-compaction/src/planner/metrics.rs index 351c2cdb7b9..2333434b8df 100644 --- a/quickwit/quickwit-compaction/src/planner/metrics.rs +++ b/quickwit/quickwit-compaction/src/planner/metrics.rs @@ -12,13 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -use quickwit_metrics::{ - LabelNames, LazyCounter, LazyGauge, label_names, lazy_counter, lazy_gauge, -}; +use quickwit_metrics::{LabelNames, LazyCounter, LazyGauge, label_names, lazy_counter, lazy_gauge}; pub(crate) const SOURCE_UID: LabelNames<1> = label_names!("source_uid"); -pub(crate) const SOURCE_UID_MERGE_LEVEL: LabelNames<2> = - label_names!("source_uid", "merge_level"); +pub(crate) const SOURCE_UID_MERGE_LEVEL: LabelNames<2> = label_names!("source_uid", "merge_level"); pub(crate) const OPERATION: LabelNames<1> = label_names!("operation"); pub(crate) static NEW_SPLITS_SCANNED: LazyCounter = lazy_counter!( @@ -49,4 +46,4 @@ pub(crate) static METASTORE_ERRORS: LazyCounter = lazy_counter!( name: "metastore_errors", description: "cumulative number of metastore errors encountered by the compaction planner", subsystem: "compaction_planner", -); \ No newline at end of file +); diff --git a/quickwit/quickwit-compaction/src/planner/mod.rs b/quickwit/quickwit-compaction/src/planner/mod.rs index fa49c978d17..0cc7c3132b9 100644 --- a/quickwit/quickwit-compaction/src/planner/mod.rs +++ b/quickwit/quickwit-compaction/src/planner/mod.rs @@ -22,9 +22,8 @@ use std::collections::BinaryHeap; pub use compaction_planner::CompactionPlanner; use quickwit_indexing::merge_policy::{MergeOperation, compute_merge_score}; -use quickwit_proto::types::{IndexUid, SourceId}; - use quickwit_metrics::{gauge, label_values}; +use quickwit_proto::types::{IndexUid, SourceId}; use crate::planner::metrics::{PENDING_MERGE_OPERATIONS, SOURCE_UID_MERGE_LEVEL}; use crate::source_uid_metrics_label; @@ -81,7 +80,11 @@ impl Ord for PendingMerge { fn cmp(&self, other: &Self) -> Ordering { self.priority_score .cmp(&other.priority_score) - .then_with(|| self.operation.merge_split_id.cmp(&other.operation.merge_split_id)) + .then_with(|| { + self.operation + .merge_split_id + .cmp(&other.operation.merge_split_id) + }) } } diff --git a/quickwit/quickwit-config/src/node_config/mod.rs b/quickwit/quickwit-config/src/node_config/mod.rs index 0d1658f1a4c..1cfabf66dd0 100644 --- a/quickwit/quickwit-config/src/node_config/mod.rs +++ b/quickwit/quickwit-config/src/node_config/mod.rs @@ -252,7 +252,7 @@ pub struct CompactorConfig { #[serde(default = "CompactorConfig::default_max_concurrent_merge_executions")] pub max_concurrent_merge_executions: NonZeroUsize, /// Number of pipelines to run per merge executions. Scalar. Since merges perform a lot - /// of IO, multiple concurrent merges can be interleaved. + /// of IO, multiple concurrent merges can be interleaved. #[serde(default = "CompactorConfig::default_pipeline_slots_per_merge_execution")] pub pipeline_slots_per_merge_execution: NonZeroUsize, /// Maximum number of concurrent split uploads across all pipelines. diff --git a/quickwit/quickwit-config/src/node_config/serialize.rs b/quickwit/quickwit-config/src/node_config/serialize.rs index 9ce1c357f3f..feded092e6f 100644 --- a/quickwit/quickwit-config/src/node_config/serialize.rs +++ b/quickwit/quickwit-config/src/node_config/serialize.rs @@ -365,8 +365,8 @@ fn validate(node_config: &NodeConfig) -> anyhow::Result<()> { { bail!( "the `compactor` service can only be enabled when `enable_standalone_compactors` is \ - true (or `QW_ENABLE_STANDALONE_COMPACTORS=true`). With the default \ - indexer-local merge pipeline, the compactor service must not be enabled." + true (or `QW_ENABLE_STANDALONE_COMPACTORS=true`). With the default indexer-local \ + merge pipeline, the compactor service must not be enabled." ); } validate_disk_usage(node_config); diff --git a/quickwit/quickwit-indexing/src/actors/indexing_service.rs b/quickwit/quickwit-indexing/src/actors/indexing_service.rs index 16922b0acef..0807e1f111b 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_service.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_service.rs @@ -12,9 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::models::DetachMergePipeline; -use quickwit_common::io; -use futures::TryStreamExt; use std::collections::{HashMap, HashSet}; use std::fmt::{Debug, Formatter}; use std::path::PathBuf; @@ -22,6 +19,7 @@ use std::sync::Arc; use anyhow::Context; use async_trait::async_trait; +use futures::TryStreamExt; use itertools::Itertools; use quickwit_actors::{ Actor, ActorContext, ActorExitStatus, ActorHandle, ActorState, Handler, Healthz, Mailbox, @@ -29,7 +27,7 @@ use quickwit_actors::{ }; use quickwit_cluster::Cluster; use quickwit_common::pubsub::EventBroker; -use quickwit_common::temp_dir; +use quickwit_common::{io, temp_dir}; use quickwit_config::{ INGEST_API_SOURCE_ID, IndexConfig, IndexerConfig, SourceConfig, build_doc_mapper, indexing_pipeline_params_fingerprint, @@ -40,7 +38,8 @@ use quickwit_ingest::{ }; use quickwit_metastore::{ IndexMetadata, IndexMetadataResponseExt, IndexesMetadataResponseExt, - ListIndexesMetadataResponseExt, ListSplitsQuery, ListSplitsRequestExt, ListSplitsResponseExt, SplitMetadata, SplitState, + ListIndexesMetadataResponseExt, ListSplitsQuery, ListSplitsRequestExt, ListSplitsResponseExt, + SplitMetadata, SplitState, }; use quickwit_proto::indexing::{ ApplyIndexingPlanRequest, ApplyIndexingPlanResponse, IndexingError, IndexingPipelineId, @@ -48,22 +47,23 @@ use quickwit_proto::indexing::{ }; use quickwit_proto::metastore::{ IndexMetadataRequest, IndexMetadataSubrequest, IndexesMetadataRequest, - ListIndexesMetadataRequest, MetastoreService, MetastoreServiceClient, ListSplitsRequest, MetastoreResult, + ListIndexesMetadataRequest, ListSplitsRequest, MetastoreResult, MetastoreService, + MetastoreServiceClient, }; use quickwit_proto::types::{IndexId, IndexUid, NodeId, PipelineUid, ShardId}; use quickwit_storage::StorageResolver; use serde::{Deserialize, Serialize}; +use time::OffsetDateTime; use tokio::sync::Semaphore; use tracing::{debug, error, info, warn}; -use crate::models::{DetachIndexingPipeline, ObservePipeline, SpawnPipeline}; -use crate::source::{AssignShards, Assignment}; -use time::OffsetDateTime; -use crate::split_store::IndexingSplitCache; -use crate::{IndexingPipeline, IndexingPipelineParams, IndexingSplitStore, IndexingStatistics}; use super::merge_pipeline::{MergePipeline, MergePipelineParams}; use super::pipeline_shared::{ActorPipeline, PipelineHandle}; use super::{FinishPendingMergesAndShutdownPipeline, MergePlanner, MergeSchedulerService}; +use crate::models::{DetachIndexingPipeline, DetachMergePipeline, ObservePipeline, SpawnPipeline}; +use crate::source::{AssignShards, Assignment}; +use crate::split_store::IndexingSplitCache; +use crate::{IndexingPipeline, IndexingPipelineParams, IndexingSplitStore, IndexingStatistics}; /// Name of the indexing directory, usually located at `/indexing`. pub const INDEXING_DIR_NAME: &str = "indexing"; @@ -244,7 +244,7 @@ impl IndexingService { None, None, ) - .await?; + .await?; Ok(pipeline_id) } @@ -354,27 +354,30 @@ impl IndexingService { let doc_mapper = build_doc_mapper(&index_config.doc_mapping, &index_config.search_settings) .map_err(|error| IndexingError::Internal(error.to_string()))?; - let merge_planner_mailbox_opt = if let Some(merge_scheduler_service) = - self.merge_scheduler_service_opt.clone() - { - let merge_pipeline_id = indexing_pipeline_id.merge_pipeline_id(); - let merge_pipeline_params = MergePipelineParams { - pipeline_id: merge_pipeline_id, - doc_mapper: doc_mapper.clone(), - indexing_directory: indexing_directory.clone(), - metastore: self.metastore.clone(), - split_store: split_store.clone(), - merge_scheduler_service, - merge_policy: merge_policy.clone(), - retention_policy: retention_policy.clone(), - merge_io_throughput_limiter_opt: self.merge_io_throughput_limiter_opt.clone(), - max_concurrent_split_uploads: self.max_concurrent_split_uploads, - event_broker: self.event_broker.clone(), + let merge_planner_mailbox_opt = + if let Some(merge_scheduler_service) = self.merge_scheduler_service_opt.clone() { + let merge_pipeline_id = indexing_pipeline_id.merge_pipeline_id(); + let merge_pipeline_params = MergePipelineParams { + pipeline_id: merge_pipeline_id, + doc_mapper: doc_mapper.clone(), + indexing_directory: indexing_directory.clone(), + metastore: self.metastore.clone(), + split_store: split_store.clone(), + merge_scheduler_service, + merge_policy: merge_policy.clone(), + retention_policy: retention_policy.clone(), + merge_io_throughput_limiter_opt: self.merge_io_throughput_limiter_opt.clone(), + max_concurrent_split_uploads: self.max_concurrent_split_uploads, + event_broker: self.event_broker.clone(), + }; + Some(self.get_or_create_merge_pipeline( + merge_pipeline_params, + immature_splits_opt, + ctx, + )?) + } else { + None }; - Some(self.get_or_create_merge_pipeline(merge_pipeline_params, immature_splits_opt, ctx)?) - } else { - None - }; let max_concurrent_split_uploads_index = if self.merge_scheduler_service_opt.is_some() { (self.max_concurrent_split_uploads / 2).max(1) @@ -1180,7 +1183,8 @@ mod tests { use quickwit_proto::metastore::{ AddSourceRequest, CreateIndexRequest, DeleteIndexRequest, IndexMetadataResponse, IndexesMetadataResponse, ListIndexesMetadataResponse, ListSplitsResponse, - MockMetastoreService, }; + MockMetastoreService, + }; use super::*; use crate::actors::merge_pipeline::SUPERVISE_LOOP_INTERVAL; @@ -1731,8 +1735,8 @@ mod tests { EventBroker::default(), Arc::new(IndexingSplitCache::no_caching()), ) - .await - .unwrap(); + .await + .unwrap(); let (indexing_server_mailbox, indexing_server_handle) = universe.spawn_builder().spawn(indexing_server); let pipeline_id = indexing_server_mailbox @@ -1828,8 +1832,8 @@ mod tests { EventBroker::default(), Arc::new(IndexingSplitCache::no_caching()), ) - .await - .unwrap(); + .await + .unwrap(); let (indexing_server_mailbox, indexing_server_handle) = universe.spawn_builder().spawn(indexing_server); @@ -1909,8 +1913,8 @@ mod tests { EventBroker::default(), Arc::new(IndexingSplitCache::no_caching()), ) - .await - .unwrap(); + .await + .unwrap(); let (indexing_server_mailbox, indexing_server_handle) = universe.spawn_builder().spawn(indexing_server); diff --git a/quickwit/quickwit-metastore/src/lib.rs b/quickwit/quickwit-metastore/src/lib.rs index 4f61d59d29f..7e02fcd0ec9 100644 --- a/quickwit/quickwit-metastore/src/lib.rs +++ b/quickwit/quickwit-metastore/src/lib.rs @@ -48,7 +48,7 @@ pub use metastore::{ ListParquetSplitsQuery, ListParquetSplitsRequestExt, ListParquetSplitsResponseExt, ListSplitsQuery, ListSplitsRequestExt, ListSplitsResponseExt, MetastoreServiceExt, MetastoreServiceStreamSplitsExt, ParquetSplitRecord, PublishParquetSplitsRequestExt, - PublishSplitsRequestExt, StageParquetSplitsRequestExt, StageSplitsRequestExt, SortBy, + PublishSplitsRequestExt, SortBy, StageParquetSplitsRequestExt, StageSplitsRequestExt, UpdateIndexRequestExt, UpdateSourceRequestExt, file_backed, }; pub use metastore_factory::{MetastoreFactory, UnsupportedMetastore}; diff --git a/quickwit/quickwit-serve/src/lib.rs b/quickwit/quickwit-serve/src/lib.rs index bcedb87792a..f2c9c5659b3 100644 --- a/quickwit/quickwit-serve/src/lib.rs +++ b/quickwit/quickwit-serve/src/lib.rs @@ -116,8 +116,8 @@ use quickwit_search::{ SearchJobPlacer, SearchService, SearchServiceClient, SearcherContext, SearcherPool, create_search_client_from_channel, start_searcher_service, }; -pub use quickwit_telemetry_exporters::{EnvFilterReloadFn, do_nothing_env_filter_reload_fn}; use quickwit_storage::{SearchSplitCache, StorageResolver}; +pub use quickwit_telemetry_exporters::{EnvFilterReloadFn, do_nothing_env_filter_reload_fn}; use tcp_listener::TcpListenerResolver; use tokio::sync::oneshot; use tonic::codec::CompressionEncoding; @@ -629,7 +629,9 @@ pub async fn serve_quickwit( .await .context("failed to initialize compaction service client")?; - // In the case where a compactor and indexer run on the same node (in a single node deployment, for example), they should share the split cache so that downloads/new splits end up in predictable locations. If there's only one service enabled, no harm, no foul. + // In the case where a compactor and indexer run on the same node (in a single node deployment, + // for example), they should share the split cache so that downloads/new splits end up in + // predictable locations. If there's only one service enabled, no harm, no foul. let indexing_split_cache: Arc = if node_config .is_service_enabled(QuickwitService::Indexer) && node_config.is_service_enabled(QuickwitService::Compactor) @@ -2027,10 +2029,8 @@ mod tests { // Standalone compactors disabled: short-circuit returns (None, None) regardless of // which services are enabled. node_config.indexer_config.enable_standalone_compactors = false; - node_config.enabled_services = HashSet::from([ - QuickwitService::Janitor, - QuickwitService::Indexer, - ]); + node_config.enabled_services = + HashSet::from([QuickwitService::Janitor, QuickwitService::Indexer]); let (client_opt, handle_opt) = get_compaction_planner_client_if_needed(&node_config, &cluster, &universe, &metastore) .await diff --git a/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs b/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs index 1c6e1c585d2..d8c0d0346f4 100644 --- a/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs +++ b/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs @@ -115,7 +115,8 @@ impl AzureBlobStorage { target_part_num_bytes: ByteSize::mb(100), multipart_threshold_num_bytes: ByteSize::mb(100), max_num_parts: 50_000, // Azure allows up to 50,000 blocks - max_object_num_bytes: ByteSize::b(4_770_000_000_000), // Azure allows up to 4.77TB objects + max_object_num_bytes: ByteSize::b(4_770_000_000_000), /* Azure allows up to + * 4.77TB objects */ max_concurrent_uploads: 100, }, retry_params: RetryParams::aggressive(), @@ -215,8 +216,7 @@ impl AzureBlobStorage { let name = self.blob_name(path); let capacity = range_opt.as_ref().map(Range::len).unwrap_or(0); retry(&self.retry_params, || async { - let _timer = - HistogramTimer::new(&crate::metrics::OBJECT_STORAGE_GET_OBJECT_DURATION); + let _timer = HistogramTimer::new(&crate::metrics::OBJECT_STORAGE_GET_OBJECT_DURATION); let (mut response_stream, _in_flight_guards) = if let Some(range) = range_opt.as_ref() { let stream = self .container_client @@ -282,9 +282,8 @@ impl AzureBlobStorage { crate::metrics::OBJECT_STORAGE_PUT_PARTS.inc(); crate::metrics::OBJECT_STORAGE_UPLOAD_NUM_BYTES.inc_by(range.end - range.start); async move { - let _timer = HistogramTimer::new( - &crate::metrics::OBJECT_STORAGE_UPLOAD_PART_DURATION, - ); + let _timer = + HistogramTimer::new(&crate::metrics::OBJECT_STORAGE_UPLOAD_PART_DURATION); retry(&self.retry_params, || async { // zero pad block ids to make them sortable as strings let block_id = format!("block:{:05}", num); From e60210f3d1200d8bb8a2d826eea2c2a08708a727 Mon Sep 17 00:00:00 2001 From: Nadav Gov-Ari Date: Thu, 21 May 2026 10:46:05 -0400 Subject: [PATCH 21/21] lint --- quickwit/quickwit-config/src/node_config/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/quickwit/quickwit-config/src/node_config/mod.rs b/quickwit/quickwit-config/src/node_config/mod.rs index faa6ce33e4c..a66d9ae714a 100644 --- a/quickwit/quickwit-config/src/node_config/mod.rs +++ b/quickwit/quickwit-config/src/node_config/mod.rs @@ -216,7 +216,9 @@ impl IndexerConfig { CpuCapacity::one_cpu_thread() * (quickwit_common::num_cpus() as u32) } - fn default_enable_standalone_compactors() -> bool { false } + fn default_enable_standalone_compactors() -> bool { + false + } fn default_parquet_merge_use_streaming_engine() -> bool { false