From faef9e36384187e167eb4425da9174f0a1b04cb3 Mon Sep 17 00:00:00 2001 From: Ruzel Ibragimov Date: Mon, 2 Feb 2026 16:48:27 +0000 Subject: [PATCH] test --- kernel/src/engine/default/stats.rs | 96 +++++++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 1 deletion(-) diff --git a/kernel/src/engine/default/stats.rs b/kernel/src/engine/default/stats.rs index 2232ef1274..71324ac91e 100644 --- a/kernel/src/engine/default/stats.rs +++ b/kernel/src/engine/default/stats.rs @@ -529,7 +529,8 @@ pub(crate) fn collect_stats( #[cfg(test)] mod tests { use super::*; - use crate::arrow::array::{Array, Int64Array, StringArray}; + use crate::arrow::array::{Array, Int32Array, Int64Array, StringArray}; + use crate::arrow::buffer::NullBuffer; use crate::arrow::datatypes::{Fields, Schema}; use crate::expressions::column_name; @@ -1165,4 +1166,97 @@ mod tests { assert!(max_values.column_by_name("id").is_some()); assert!(max_values.column_by_name("list_col").is_none()); } + + #[test] + fn test_collect_stats_struct_with_nulls_at_struct_level() { + // Schema: { my_struct: { a: int32, b: int32 (nullable) } } + // Test both struct-level nulls and field-level nulls + let child_fields = Fields::from(vec![ + Field::new("a", DataType::Int32, false), + Field::new("b", DataType::Int32, true), // Now nullable + ]); + + let a_values = Int32Array::from(vec![1, 2, 3, 4]); + // b has field-level nulls at rows 0 and 2 + let b_values = Int32Array::from(vec![None, Some(20), None, Some(40)]); + + // Nulls at struct level: [false, true, true, false] + // This means rows 1 and 2 have null structs (entire struct is null) + let nulls = NullBuffer::from(vec![false, true, true, false]); + + let struct_array = StructArray::new( + child_fields.clone(), + vec![Arc::new(a_values), Arc::new(b_values)], + Some(nulls), + ); + + let schema = Schema::new(vec![Field::new( + "my_struct", + DataType::Struct(child_fields), + true, + )]); + + let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(struct_array)]).unwrap(); + + let stats = collect_stats(&batch, &[column_name!("my_struct")]).unwrap(); + + // Check null counts for child fields + // Is it expected? column 'a' has no field-level nulls, and rows 0 and 2 are null at struct level + assert_eq!( + get_stat::(&stats, "nullCount", "my_struct", "a"), + 0 + ); + assert_eq!( + get_stat::(&stats, "nullCount", "my_struct", "b"), + 2 + ); + + // Check minValues + assert_eq!( + get_stat::(&stats, "minValues", "my_struct", "a"), + 1 + ); + // Is it expected? It's marked with NULL for 20 at higher level. Shouldn't be 40? + assert_eq!( + get_stat::(&stats, "minValues", "my_struct", "b"), + 20 + ); + + // Check maxValues + assert_eq!( + get_stat::(&stats, "maxValues", "my_struct", "a"), + 4 + ); + assert_eq!( + get_stat::(&stats, "maxValues", "my_struct", "b"), + 40 + ); + } + + // Generic helper to extract and downcast nested columns from stats + fn get_stat( + stats: &StructArray, + stat_name: &str, + struct_name: &str, + field_name: &str, + ) -> T::Native + where + T: crate::arrow::datatypes::ArrowPrimitiveType, + { + stats + .column_by_name(stat_name) + .unwrap() + .as_any() + .downcast_ref::() + .unwrap() + .column_by_name(struct_name) + .unwrap() + .as_any() + .downcast_ref::() + .unwrap() + .column_by_name(field_name) + .unwrap() + .as_primitive::() + .value(0) + } }