From ab706312c2ec379a6d7f9a897aaddb29d6652890 Mon Sep 17 00:00:00 2001 From: Sebastian Baunsgaard Date: Sun, 29 Mar 2026 12:49:12 +0000 Subject: [PATCH 1/3] feat: add insert_range to DeleteVector Expose RoaringTreemap::insert_range through DeleteVector for bulk range insertion of deleted positions, avoiding per-position insert overhead. --- crates/iceberg/src/delete_vector.rs | 89 +++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/crates/iceberg/src/delete_vector.rs b/crates/iceberg/src/delete_vector.rs index df8a10193c..e2bc6ae25d 100644 --- a/crates/iceberg/src/delete_vector.rs +++ b/crates/iceberg/src/delete_vector.rs @@ -45,6 +45,18 @@ impl DeleteVector { self.inner.insert(pos) } + /// Inserts all positions in the range [start, end) into the delete vector. + /// If start >= end, this method does nothing and returns 0. + /// + /// Returns the number of newly inserted positions. + #[allow(unused)] + pub fn insert_range(&mut self, start: u64, end: u64) -> u64 { + if start >= end { + return 0; + } + self.inner.insert_range(start..end) + } + /// Marks the given `positions` as deleted and returns the number of elements appended. /// /// The input slice must be strictly ordered in ascending order, and every value must be greater than all existing values already in the set. @@ -198,4 +210,81 @@ mod tests { let res = dv.insert_positions(&positions); assert!(res.is_err()); } + + #[test] + fn test_insert_range_single_key() { + let mut dv = DeleteVector::default(); + assert_eq!(dv.insert_range(10, 20), 10); + assert_eq!(dv.len(), 10); + for pos in 10..20 { + assert!(dv.iter().any(|p| p == pos), "missing {pos}"); + } + assert!(!dv.iter().any(|p| p == 9)); + assert!(!dv.iter().any(|p| p == 20)); + } + + #[test] + fn test_insert_range_single_position() { + let mut dv = DeleteVector::default(); + assert_eq!(dv.insert_range(42, 43), 1); + assert_eq!(dv.len(), 1); + assert!(dv.iter().any(|p| p == 42)); + assert!(!dv.iter().any(|p| p == 41)); + assert!(!dv.iter().any(|p| p == 43)); + } + + #[test] + fn test_insert_range_across_keys() { + let mut dv = DeleteVector::default(); + let start = (1u64 << 32) - 5; + let end = (1u64 << 32) + 5; + assert_eq!(dv.insert_range(start, end), 10); + assert_eq!(dv.len(), 10); + for pos in start..end { + assert!(dv.iter().any(|p| p == pos), "missing {pos}"); + } + assert!(!dv.iter().any(|p| p == start - 1)); + assert!(!dv.iter().any(|p| p == end)); + } + + #[test] + fn test_insert_range_spanning_three_keys() { + let mut dv = DeleteVector::default(); + let start = 0xFFFFFFF0u64; + let end = (2u64 << 32) | 0x10; + let inserted = dv.insert_range(start, end); + assert_eq!(inserted, end - start); + assert_eq!(dv.len(), end - start); + assert!(dv.iter().any(|p| p == start)); + assert!(dv.iter().any(|p| p == end - 1)); + assert!(dv.iter().any(|p| p == 1u64 << 32)); + assert!(dv.iter().any(|p| p == (1u64 << 32) | 0xFFFFFFF0)); + assert!(!dv.iter().any(|p| p == start - 1)); + assert!(!dv.iter().any(|p| p == end)); + } + + #[test] + fn test_insert_range_empty_when_start_equals_end() { + let mut dv = DeleteVector::default(); + assert_eq!(dv.insert_range(100, 100), 0); + assert_eq!(dv.len(), 0); + } + + #[test] + fn test_insert_range_empty_when_start_greater_than_end() { + let mut dv = DeleteVector::default(); + assert_eq!(dv.insert_range(100, 50), 0); + assert_eq!(dv.len(), 0); + } + + #[test] + fn test_insert_range_large_contiguous() { + let mut dv = DeleteVector::default(); + assert_eq!(dv.insert_range(500, 200_500), 200_000); + assert_eq!(dv.len(), 200_000); + assert!(dv.iter().any(|p| p == 500)); + assert!(dv.iter().any(|p| p == 200_499)); + assert!(!dv.iter().any(|p| p == 499)); + assert!(!dv.iter().any(|p| p == 200_500)); + } } From a7e31f996e6309d914dd23b040390b09397fd388 Mon Sep 17 00:00:00 2001 From: Sebastian Baunsgaard Date: Sun, 29 Mar 2026 21:16:09 +0000 Subject: [PATCH 2/3] Add contains() to DeleteVector and fix slow test --- crates/iceberg/src/delete_vector.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/crates/iceberg/src/delete_vector.rs b/crates/iceberg/src/delete_vector.rs index e2bc6ae25d..d690918ef9 100644 --- a/crates/iceberg/src/delete_vector.rs +++ b/crates/iceberg/src/delete_vector.rs @@ -76,6 +76,11 @@ impl DeleteVector { Ok(positions.len()) } + #[allow(unused)] + pub fn contains(&self, pos: u64) -> bool { + self.inner.contains(pos) + } + #[allow(unused)] pub fn len(&self) -> u64 { self.inner.len() @@ -255,12 +260,12 @@ mod tests { let inserted = dv.insert_range(start, end); assert_eq!(inserted, end - start); assert_eq!(dv.len(), end - start); - assert!(dv.iter().any(|p| p == start)); - assert!(dv.iter().any(|p| p == end - 1)); - assert!(dv.iter().any(|p| p == 1u64 << 32)); - assert!(dv.iter().any(|p| p == (1u64 << 32) | 0xFFFFFFF0)); - assert!(!dv.iter().any(|p| p == start - 1)); - assert!(!dv.iter().any(|p| p == end)); + assert!(dv.contains(start)); + assert!(dv.contains(end - 1)); + assert!(dv.contains(1u64 << 32)); + assert!(dv.contains((1u64 << 32) | 0xFFFFFFF0)); + assert!(!dv.contains(start - 1)); + assert!(!dv.contains(end)); } #[test] From dd7956e5d45eef8586ef2b02d904f9ca6467a49f Mon Sep 17 00:00:00 2001 From: Sebastian Baunsgaard Date: Tue, 31 Mar 2026 10:17:43 +0000 Subject: [PATCH 3/3] Panic on reversed range in insert_range for consistency A reversed range (start > end) indicates a bug in the caller and should fail fast with a panic rather than silently return 0. An empty range (start == end) remains a valid no-op. This makes insert_range behavior consistent across the Java, C++, and Rust Iceberg implementations. --- crates/iceberg/src/delete_vector.rs | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/crates/iceberg/src/delete_vector.rs b/crates/iceberg/src/delete_vector.rs index d690918ef9..48e1d70aa9 100644 --- a/crates/iceberg/src/delete_vector.rs +++ b/crates/iceberg/src/delete_vector.rs @@ -46,12 +46,20 @@ impl DeleteVector { } /// Inserts all positions in the range [start, end) into the delete vector. - /// If start >= end, this method does nothing and returns 0. + /// If start == end, this method does nothing and returns 0. + /// + /// # Panics + /// + /// Panics if start > end (a reversed range indicates a bug in the caller). /// /// Returns the number of newly inserted positions. #[allow(unused)] pub fn insert_range(&mut self, start: u64, end: u64) -> u64 { - if start >= end { + assert!( + start <= end, + "insert_range requires start <= end, got [{start}, {end})" + ); + if start == end { return 0; } self.inner.insert_range(start..end) @@ -76,6 +84,7 @@ impl DeleteVector { Ok(positions.len()) } + /// Returns true if the given position is present in the delete vector. #[allow(unused)] pub fn contains(&self, pos: u64) -> bool { self.inner.contains(pos) @@ -276,10 +285,10 @@ mod tests { } #[test] - fn test_insert_range_empty_when_start_greater_than_end() { + #[should_panic(expected = "insert_range requires start <= end")] + fn test_insert_range_reversed_panics() { let mut dv = DeleteVector::default(); - assert_eq!(dv.insert_range(100, 50), 0); - assert_eq!(dv.len(), 0); + dv.insert_range(100, 50); } #[test]