diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index b72ced1ebf3..8d0f421ade2 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -9938,12 +9938,112 @@ impl core::default::Default for vortex_array::display::DisplayOptions pub fn vortex_array::display::DisplayOptions::default() -> Self +pub struct vortex_array::display::BufferExtractor + +pub vortex_array::display::BufferExtractor::show_percent: bool + +impl vortex_array::display::TreeExtractor for vortex_array::display::BufferExtractor + +pub fn vortex_array::display::BufferExtractor::write_details(&self, array: &dyn vortex_array::DynArray, _ctx: &vortex_array::display::TreeContext, f: &mut dyn core::fmt::Write) -> core::fmt::Result + +pub fn vortex_array::display::BufferExtractor::write_header(&self, array: &dyn vortex_array::DynArray, ctx: &vortex_array::display::TreeContext, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + pub struct vortex_array::display::DisplayArrayAs<'a>(pub &'a dyn vortex_array::DynArray, pub vortex_array::display::DisplayOptions) impl core::fmt::Display for vortex_array::display::DisplayArrayAs<'_> pub fn vortex_array::display::DisplayArrayAs<'_>::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result +pub struct vortex_array::display::EncodingSummaryExtractor + +impl vortex_array::display::TreeExtractor for vortex_array::display::EncodingSummaryExtractor + +pub fn vortex_array::display::EncodingSummaryExtractor::write_details(&self, array: &dyn vortex_array::DynArray, ctx: &vortex_array::display::TreeContext, f: &mut dyn core::fmt::Write) -> core::fmt::Result + +pub fn vortex_array::display::EncodingSummaryExtractor::write_header(&self, array: &dyn vortex_array::DynArray, _ctx: &vortex_array::display::TreeContext, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +pub struct vortex_array::display::MetadataExtractor + +impl vortex_array::display::TreeExtractor for vortex_array::display::MetadataExtractor + +pub fn vortex_array::display::MetadataExtractor::write_details(&self, array: &dyn vortex_array::DynArray, _ctx: &vortex_array::display::TreeContext, f: &mut dyn core::fmt::Write) -> core::fmt::Result + +pub fn vortex_array::display::MetadataExtractor::write_header(&self, array: &dyn vortex_array::DynArray, ctx: &vortex_array::display::TreeContext, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +pub struct vortex_array::display::NbytesExtractor + +impl vortex_array::display::TreeExtractor for vortex_array::display::NbytesExtractor + +pub fn vortex_array::display::NbytesExtractor::write_details(&self, array: &dyn vortex_array::DynArray, ctx: &vortex_array::display::TreeContext, f: &mut dyn core::fmt::Write) -> core::fmt::Result + +pub fn vortex_array::display::NbytesExtractor::write_header(&self, array: &dyn vortex_array::DynArray, ctx: &vortex_array::display::TreeContext, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +pub struct vortex_array::display::StatsExtractor + +impl vortex_array::display::TreeExtractor for vortex_array::display::StatsExtractor + +pub fn vortex_array::display::StatsExtractor::write_details(&self, array: &dyn vortex_array::DynArray, ctx: &vortex_array::display::TreeContext, f: &mut dyn core::fmt::Write) -> core::fmt::Result + +pub fn vortex_array::display::StatsExtractor::write_header(&self, array: &dyn vortex_array::DynArray, _ctx: &vortex_array::display::TreeContext, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +pub struct vortex_array::display::TreeContext + +impl vortex_array::display::TreeContext + +pub fn vortex_array::display::TreeContext::parent_total_size(&self) -> core::option::Option + +pub struct vortex_array::display::TreeDisplay + +impl vortex_array::display::TreeDisplay + +pub fn vortex_array::display::TreeDisplay::default_display(array: vortex_array::ArrayRef) -> Self + +pub fn vortex_array::display::TreeDisplay::new(array: vortex_array::ArrayRef) -> Self + +pub fn vortex_array::display::TreeDisplay::with(self, extractor: E) -> Self + +pub fn vortex_array::display::TreeDisplay::with_boxed(self, extractor: alloc::boxed::Box) -> Self + +impl core::fmt::Display for vortex_array::display::TreeDisplay + +pub fn vortex_array::display::TreeDisplay::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +pub trait vortex_array::display::TreeExtractor: core::marker::Send + core::marker::Sync + +pub fn vortex_array::display::TreeExtractor::write_details(&self, array: &dyn vortex_array::DynArray, ctx: &vortex_array::display::TreeContext, f: &mut dyn core::fmt::Write) -> core::fmt::Result + +pub fn vortex_array::display::TreeExtractor::write_header(&self, array: &dyn vortex_array::DynArray, ctx: &vortex_array::display::TreeContext, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl vortex_array::display::TreeExtractor for vortex_array::display::BufferExtractor + +pub fn vortex_array::display::BufferExtractor::write_details(&self, array: &dyn vortex_array::DynArray, _ctx: &vortex_array::display::TreeContext, f: &mut dyn core::fmt::Write) -> core::fmt::Result + +pub fn vortex_array::display::BufferExtractor::write_header(&self, array: &dyn vortex_array::DynArray, ctx: &vortex_array::display::TreeContext, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl vortex_array::display::TreeExtractor for vortex_array::display::EncodingSummaryExtractor + +pub fn vortex_array::display::EncodingSummaryExtractor::write_details(&self, array: &dyn vortex_array::DynArray, ctx: &vortex_array::display::TreeContext, f: &mut dyn core::fmt::Write) -> core::fmt::Result + +pub fn vortex_array::display::EncodingSummaryExtractor::write_header(&self, array: &dyn vortex_array::DynArray, _ctx: &vortex_array::display::TreeContext, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl vortex_array::display::TreeExtractor for vortex_array::display::MetadataExtractor + +pub fn vortex_array::display::MetadataExtractor::write_details(&self, array: &dyn vortex_array::DynArray, _ctx: &vortex_array::display::TreeContext, f: &mut dyn core::fmt::Write) -> core::fmt::Result + +pub fn vortex_array::display::MetadataExtractor::write_header(&self, array: &dyn vortex_array::DynArray, ctx: &vortex_array::display::TreeContext, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl vortex_array::display::TreeExtractor for vortex_array::display::NbytesExtractor + +pub fn vortex_array::display::NbytesExtractor::write_details(&self, array: &dyn vortex_array::DynArray, ctx: &vortex_array::display::TreeContext, f: &mut dyn core::fmt::Write) -> core::fmt::Result + +pub fn vortex_array::display::NbytesExtractor::write_header(&self, array: &dyn vortex_array::DynArray, ctx: &vortex_array::display::TreeContext, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl vortex_array::display::TreeExtractor for vortex_array::display::StatsExtractor + +pub fn vortex_array::display::StatsExtractor::write_details(&self, array: &dyn vortex_array::DynArray, ctx: &vortex_array::display::TreeContext, f: &mut dyn core::fmt::Write) -> core::fmt::Result + +pub fn vortex_array::display::StatsExtractor::write_header(&self, array: &dyn vortex_array::DynArray, _ctx: &vortex_array::display::TreeContext, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + pub mod vortex_array::dtype pub use vortex_array::dtype::half @@ -22038,7 +22138,7 @@ pub fn vortex_array::ArrayAdapter::is_host(&self) -> bool pub fn vortex_array::ArrayAdapter::metadata(&self) -> vortex_error::VortexResult>> -pub fn vortex_array::ArrayAdapter::metadata_fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result +pub fn vortex_array::ArrayAdapter::metadata_fmt(&self, f: &mut dyn core::fmt::Write) -> core::fmt::Result pub fn vortex_array::ArrayAdapter::named_buffers(&self) -> alloc::vec::Vec<(alloc::string::String, vortex_array::buffer::BufferHandle)> @@ -22330,7 +22430,7 @@ pub fn vortex_array::ArrayVisitor::is_host(&self) -> bool pub fn vortex_array::ArrayVisitor::metadata(&self) -> vortex_error::VortexResult>> -pub fn vortex_array::ArrayVisitor::metadata_fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result +pub fn vortex_array::ArrayVisitor::metadata_fmt(&self, f: &mut dyn core::fmt::Write) -> core::fmt::Result pub fn vortex_array::ArrayVisitor::named_buffers(&self) -> alloc::vec::Vec<(alloc::string::String, vortex_array::buffer::BufferHandle)> @@ -22358,7 +22458,7 @@ pub fn alloc::sync::Arc::is_host(&self) -> bool pub fn alloc::sync::Arc::metadata(&self) -> vortex_error::VortexResult>> -pub fn alloc::sync::Arc::metadata_fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result +pub fn alloc::sync::Arc::metadata_fmt(&self, f: &mut dyn core::fmt::Write) -> core::fmt::Result pub fn alloc::sync::Arc::named_buffers(&self) -> alloc::vec::Vec<(alloc::string::String, vortex_array::buffer::BufferHandle)> @@ -22386,7 +22486,7 @@ pub fn vortex_array::ArrayAdapter::is_host(&self) -> bool pub fn vortex_array::ArrayAdapter::metadata(&self) -> vortex_error::VortexResult>> -pub fn vortex_array::ArrayAdapter::metadata_fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result +pub fn vortex_array::ArrayAdapter::metadata_fmt(&self, f: &mut dyn core::fmt::Write) -> core::fmt::Result pub fn vortex_array::ArrayAdapter::named_buffers(&self) -> alloc::vec::Vec<(alloc::string::String, vortex_array::buffer::BufferHandle)> diff --git a/vortex-array/src/array/mod.rs b/vortex-array/src/array/mod.rs index 79eb980d317..cfd6e0f5b8e 100644 --- a/vortex-array/src/array/mod.rs +++ b/vortex-array/src/array/mod.rs @@ -737,10 +737,11 @@ impl ArrayVisitor for ArrayAdapter { V::serialize(V::metadata(&self.0)?) } - fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + fn metadata_fmt(&self, f: &mut dyn std::fmt::Write) -> std::fmt::Result { match V::metadata(&self.0) { Err(e) => write!(f, ""), - Ok(metadata) => Debug::fmt(&metadata, f), + #[expect(clippy::use_debug)] + Ok(metadata) => write!(f, "{metadata:?}"), } } diff --git a/vortex-array/src/array/visitor.rs b/vortex-array/src/array/visitor.rs index 24440a99ada..d96b7e38fd3 100644 --- a/vortex-array/src/array/visitor.rs +++ b/vortex-array/src/array/visitor.rs @@ -1,7 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::fmt::Formatter; use std::sync::Arc; use vortex_buffer::ByteBuffer; @@ -49,7 +48,7 @@ pub trait ArrayVisitor { fn metadata(&self) -> VortexResult>>; /// Formats a human-readable metadata description. - fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result; + fn metadata_fmt(&self, f: &mut dyn std::fmt::Write) -> std::fmt::Result; /// Checks if all buffers in the array tree are host-resident. /// @@ -102,7 +101,7 @@ impl ArrayVisitor for Arc { self.as_ref().metadata() } - fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + fn metadata_fmt(&self, f: &mut dyn std::fmt::Write) -> std::fmt::Result { self.as_ref().metadata_fmt(f) } diff --git a/vortex-array/src/display/extractor.rs b/vortex-array/src/display/extractor.rs new file mode 100644 index 00000000000..62e5bf7e3d1 --- /dev/null +++ b/vortex-array/src/display/extractor.rs @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::fmt; +use std::fmt::Write; + +use crate::DynArray; + +/// Context threaded through tree traversal for percentage calculations etc. +pub struct TreeContext { + /// Stack of ancestor nbytes values. `None` entries reset the percentage root + /// (e.g. for chunked arrays where each chunk is its own root). + pub(crate) ancestor_sizes: Vec>, +} + +impl TreeContext { + pub(crate) fn new() -> Self { + Self { + ancestor_sizes: Vec::new(), + } + } + + /// The total size used as the denominator for percentage calculations. + /// Returns `None` if there is no ancestor (i.e., this node is the root or + /// a chunk boundary reset the percentage root). + pub fn parent_total_size(&self) -> Option { + self.ancestor_sizes.last().cloned().flatten() + } + + pub(crate) fn push(&mut self, size: Option) { + self.ancestor_sizes.push(size); + } + + pub(crate) fn pop(&mut self) { + self.ancestor_sizes.pop(); + } +} + +/// A formatter wrapper that automatically prepends indentation at the start of each line. +pub(crate) struct IndentedFormatter<'a, 'b> { + inner: &'a mut fmt::Formatter<'b>, + indent: &'a str, + at_line_start: bool, +} + +impl<'a, 'b> IndentedFormatter<'a, 'b> { + pub(crate) fn new(f: &'a mut fmt::Formatter<'b>, indent: &'a str) -> Self { + Self { + inner: f, + indent, + at_line_start: true, + } + } +} + +impl Write for IndentedFormatter<'_, '_> { + fn write_str(&mut self, s: &str) -> fmt::Result { + let mut parts = s.split('\n'); + + if let Some(first) = parts.next() + && !first.is_empty() + { + if self.at_line_start { + self.inner.write_str(self.indent)?; + self.at_line_start = false; + } + self.inner.write_str(first)?; + } + + for part in parts { + self.inner.write_char('\n')?; + self.at_line_start = true; + if !part.is_empty() { + self.inner.write_str(self.indent)?; + self.at_line_start = false; + self.inner.write_str(part)?; + } + } + + Ok(()) + } +} + +/// Trait for contributing display information to tree nodes. +/// +/// Each extractor represents one "dimension" of display (e.g., nbytes, stats, metadata, buffers). +/// Extractors are composable: you can combine any number of them via [`TreeDisplay::with`]. +/// +/// [`TreeDisplay::with`]: super::TreeDisplay::with +pub trait TreeExtractor: Send + Sync { + /// Write header annotations (space-prefixed) to the formatter. + fn write_header( + &self, + array: &dyn DynArray, + ctx: &TreeContext, + f: &mut fmt::Formatter<'_>, + ) -> fmt::Result { + let _ = (array, ctx, f); + Ok(()) + } + + /// Write detail lines below the header. + /// + /// The caller handles indentation — extractors just write their content directly. + fn write_details( + &self, + array: &dyn DynArray, + ctx: &TreeContext, + f: &mut dyn Write, + ) -> fmt::Result { + let _ = (array, ctx, f); + Ok(()) + } +} diff --git a/vortex-array/src/display/extractors/buffer.rs b/vortex-array/src/display/extractors/buffer.rs new file mode 100644 index 00000000000..6bc75c57408 --- /dev/null +++ b/vortex-array/src/display/extractors/buffer.rs @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::fmt; + +use humansize::DECIMAL; +use humansize::format_size; + +use crate::DynArray; +use crate::display::extractor::TreeContext; +use crate::display::extractor::TreeExtractor; + +/// Extractor that adds buffer detail lines. +pub struct BufferExtractor { + /// Whether to show buffer-level percentage of parent nbytes. + pub show_percent: bool, +} + +impl TreeExtractor for BufferExtractor { + fn write_details( + &self, + array: &dyn DynArray, + _ctx: &TreeContext, + f: &mut dyn fmt::Write, + ) -> fmt::Result { + let nbytes = array.nbytes(); + for (name, buffer) in array.named_buffers() { + let loc = if buffer.is_on_device() { + "device" + } else if buffer.is_on_host() { + "host" + } else { + "location-unknown" + }; + let align = if buffer.is_on_host() { + buffer.as_host().alignment().to_string() + } else { + String::new() + }; + + if self.show_percent { + let buffer_percent = if nbytes == 0 { + 0.0 + } else { + 100_f64 * buffer.len() as f64 / nbytes as f64 + }; + writeln!( + f, + "buffer: {} {loc} {} (align={}) ({:.2}%)", + name, + format_size(buffer.len(), DECIMAL), + align, + buffer_percent, + )?; + } else { + writeln!( + f, + "buffer: {} {loc} {} (align={})", + name, + format_size(buffer.len(), DECIMAL), + align, + )?; + } + } + Ok(()) + } +} diff --git a/vortex-array/src/display/extractors/encoding_summary.rs b/vortex-array/src/display/extractors/encoding_summary.rs new file mode 100644 index 00000000000..7431e32b604 --- /dev/null +++ b/vortex-array/src/display/extractors/encoding_summary.rs @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::fmt; + +use crate::DynArray; +use crate::display::DisplayOptions; +use crate::display::extractor::TreeContext; +use crate::display::extractor::TreeExtractor; + +/// Extractor that adds the encoding summary (e.g. `vortex.primitive(i16, len=5)`) to the header. +pub struct EncodingSummaryExtractor; + +impl TreeExtractor for EncodingSummaryExtractor { + fn write_header( + &self, + array: &dyn DynArray, + _ctx: &TreeContext, + f: &mut fmt::Formatter<'_>, + ) -> fmt::Result { + write!(f, " {}", array.display_as(DisplayOptions::MetadataOnly)) + } +} diff --git a/vortex-array/src/display/extractors/metadata.rs b/vortex-array/src/display/extractors/metadata.rs new file mode 100644 index 00000000000..edb373bb37f --- /dev/null +++ b/vortex-array/src/display/extractors/metadata.rs @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::fmt; + +use crate::DynArray; +use crate::display::extractor::TreeContext; +use crate::display::extractor::TreeExtractor; + +/// Extractor that adds a `metadata: ...` detail line. +pub struct MetadataExtractor; + +impl TreeExtractor for MetadataExtractor { + fn write_details( + &self, + array: &dyn DynArray, + _ctx: &TreeContext, + f: &mut dyn fmt::Write, + ) -> fmt::Result { + write!(f, "metadata: ")?; + array.metadata_fmt(f)?; + writeln!(f) + } +} diff --git a/vortex-array/src/display/extractors/mod.rs b/vortex-array/src/display/extractors/mod.rs new file mode 100644 index 00000000000..754ac16a57a --- /dev/null +++ b/vortex-array/src/display/extractors/mod.rs @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod buffer; +mod encoding_summary; +mod metadata; +mod nbytes; +mod stats; + +pub use buffer::BufferExtractor; +pub use encoding_summary::EncodingSummaryExtractor; +pub use metadata::MetadataExtractor; +pub use nbytes::NbytesExtractor; +pub use stats::StatsExtractor; diff --git a/vortex-array/src/display/extractors/nbytes.rs b/vortex-array/src/display/extractors/nbytes.rs new file mode 100644 index 00000000000..5b558657df0 --- /dev/null +++ b/vortex-array/src/display/extractors/nbytes.rs @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::fmt; + +use humansize::DECIMAL; +use humansize::format_size; + +use crate::DynArray; +use crate::display::extractor::TreeContext; +use crate::display::extractor::TreeExtractor; + +/// Extractor that adds `nbytes=X (Y%)` to the header line. +pub struct NbytesExtractor; + +impl TreeExtractor for NbytesExtractor { + fn write_header( + &self, + array: &dyn DynArray, + ctx: &TreeContext, + f: &mut fmt::Formatter<'_>, + ) -> fmt::Result { + let nbytes = array.nbytes(); + let total_size = ctx.parent_total_size().unwrap_or(nbytes); + let percent = if total_size == 0 { + 0.0 + } else { + 100_f64 * nbytes as f64 / total_size as f64 + }; + write!( + f, + " nbytes={} ({:.2}%)", + format_size(nbytes, DECIMAL), + percent + ) + } +} diff --git a/vortex-array/src/display/extractors/stats.rs b/vortex-array/src/display/extractors/stats.rs new file mode 100644 index 00000000000..b93edade32d --- /dev/null +++ b/vortex-array/src/display/extractors/stats.rs @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::fmt::Write; +use std::fmt::{self}; + +use crate::DynArray; +use crate::display::extractor::TreeContext; +use crate::display::extractor::TreeExtractor; +use crate::expr::stats::Stat; +use crate::expr::stats::StatsProvider; + +/// Display wrapper for array statistics in compact format. +/// +/// Produces output like ` [nulls=3, min=5, max=100]` (with leading space). +pub(crate) struct StatsDisplay<'a>(pub(crate) &'a dyn DynArray); + +impl fmt::Display for StatsDisplay<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let stats = self.0.statistics(); + let mut first = true; + + let mut sep = |f: &mut fmt::Formatter<'_>| -> fmt::Result { + if first { + first = false; + f.write_str(" [") + } else { + f.write_str(", ") + } + }; + + // Null count or validity fallback + if let Some(nc) = stats.get(Stat::NullCount) { + if let Ok(n) = usize::try_from(&nc.clone().into_inner()) { + sep(f)?; + write!(f, "nulls={}", n)?; + } else { + sep(f)?; + write!(f, "nulls={}", nc)?; + } + } else if self.0.dtype().is_nullable() { + match self.0.all_valid() { + Ok(true) => { + sep(f)?; + f.write_str("all_valid")?; + } + Ok(false) => { + if self.0.all_invalid().unwrap_or(false) { + sep(f)?; + f.write_str("all_invalid")?; + } + } + Err(e) => { + tracing::warn!("Failed to check validity: {e}"); + sep(f)?; + f.write_str("validity_failed")?; + } + } + } + + // NaN count (only if > 0) + if let Some(nan) = stats.get(Stat::NaNCount) + && let Ok(n) = usize::try_from(&nan.into_inner()) + && n > 0 + { + sep(f)?; + write!(f, "nan={}", n)?; + } + + // Min/Max + if let Some(min) = stats.get(Stat::Min) { + sep(f)?; + write!(f, "min={}", min)?; + } + if let Some(max) = stats.get(Stat::Max) { + sep(f)?; + write!(f, "max={}", max)?; + } + + // Sum + if let Some(sum) = stats.get(Stat::Sum) { + sep(f)?; + write!(f, "sum={}", sum)?; + } + + // Boolean flags (compact) + if let Some(c) = stats.get(Stat::IsConstant) + && bool::try_from(&c.into_inner()).unwrap_or(false) + { + sep(f)?; + f.write_str("const")?; + } + if let Some(s) = stats.get(Stat::IsStrictSorted) { + if bool::try_from(&s.into_inner()).unwrap_or(false) { + sep(f)?; + f.write_str("strict")?; + } + } else if let Some(s) = stats.get(Stat::IsSorted) + && bool::try_from(&s.into_inner()).unwrap_or(false) + { + sep(f)?; + f.write_str("sorted")?; + } + + // Close bracket if we wrote anything + if !first { + f.write_char(']')?; + } + + Ok(()) + } +} + +/// Extractor that adds stats annotations (e.g. `[nulls=3, min=5]`) to the header line. +pub struct StatsExtractor; + +impl TreeExtractor for StatsExtractor { + fn write_header( + &self, + array: &dyn DynArray, + _ctx: &TreeContext, + f: &mut fmt::Formatter<'_>, + ) -> fmt::Result { + write!(f, "{}", StatsDisplay(array)) + } +} diff --git a/vortex-array/src/display/mod.rs b/vortex-array/src/display/mod.rs index 9c34d6642b1..d7db7a15770 100644 --- a/vortex-array/src/display/mod.rs +++ b/vortex-array/src/display/mod.rs @@ -1,12 +1,21 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -mod tree; +mod extractor; +mod extractors; +mod tree_display; use std::fmt::Display; +pub use extractor::TreeContext; +pub use extractor::TreeExtractor; +pub use extractors::BufferExtractor; +pub use extractors::EncodingSummaryExtractor; +pub use extractors::MetadataExtractor; +pub use extractors::NbytesExtractor; +pub use extractors::StatsExtractor; use itertools::Itertools as _; -use tree::TreeDisplayWrapper; +pub use tree_display::TreeDisplay; use crate::DynArray; @@ -390,15 +399,8 @@ impl dyn DynArray + '_ { /// "; /// assert_eq!(format!("{}", array.display_tree_encodings_only()), expected); /// ``` - pub fn display_tree_encodings_only(&self) -> impl Display { - DisplayArrayAs( - self, - DisplayOptions::TreeDisplay { - buffers: false, - metadata: false, - stats: false, - }, - ) + pub fn display_tree_encodings_only(&self) -> TreeDisplay { + self.tree_display_builder().with(EncodingSummaryExtractor) } /// Display the tree of encodings of this array as an indented lists. @@ -420,15 +422,68 @@ impl dyn DynArray + '_ { /// "; /// assert_eq!(format!("{}", array.display_tree()), expected); /// ``` - pub fn display_tree(&self) -> impl Display { - DisplayArrayAs( - self, - DisplayOptions::TreeDisplay { - buffers: true, - metadata: true, - stats: true, - }, - ) + pub fn display_tree(&self) -> TreeDisplay { + TreeDisplay::default_display(self.to_array()) + } + + /// Create a tree display with all built-in extractors (nbytes, stats, metadata, buffers). + /// + /// This is the default, fully-detailed tree display. Use + /// `tree_display_builder()` for a blank slate. + /// + /// # Examples + /// ``` + /// # use vortex_array::IntoArray; + /// # use vortex_buffer::buffer; + /// let array = buffer![0_i16, 1, 2, 3, 4].into_array(); + /// let expected = "root: vortex.primitive(i16, len=5) nbytes=10 B (100.00%) + /// metadata: EmptyMetadata + /// buffer: values host 10 B (align=2) (100.00%) + /// "; + /// assert_eq!(array.tree_display().to_string(), expected); + /// ``` + pub fn tree_display(&self) -> TreeDisplay { + TreeDisplay::default_display(self.to_array()) + } + + /// Create a composable tree display builder with no extractors. + /// + /// With no extractors, only the node names are shown. + /// Add extractors with [`.with()`][TreeDisplay::with] to include additional information. + /// Most builders should start with [`EncodingSummaryExtractor`] to include encoding headers. + /// + /// # Examples + /// ``` + /// # use vortex_array::IntoArray; + /// # use vortex_buffer::buffer; + /// use vortex_array::display::{EncodingSummaryExtractor, NbytesExtractor, MetadataExtractor, BufferExtractor}; + /// + /// let array = buffer![0_i16, 1, 2, 3, 4].into_array(); + /// + /// // Encodings only + /// let encodings = array.tree_display_builder() + /// .with(EncodingSummaryExtractor) + /// .to_string(); + /// assert_eq!(encodings, "root: vortex.primitive(i16, len=5)\n"); + /// + /// // With encoding + nbytes + /// let with_nbytes = array.tree_display_builder() + /// .with(EncodingSummaryExtractor) + /// .with(NbytesExtractor) + /// .to_string(); + /// assert_eq!(with_nbytes, "root: vortex.primitive(i16, len=5) nbytes=10 B (100.00%)\n"); + /// + /// // With encoding, metadata, and buffers + /// let detailed = array.tree_display_builder() + /// .with(EncodingSummaryExtractor) + /// .with(MetadataExtractor) + /// .with(BufferExtractor { show_percent: false }) + /// .to_string(); + /// let expected = "root: vortex.primitive(i16, len=5)\n metadata: EmptyMetadata\n buffer: values host 10 B (align=2)\n"; + /// assert_eq!(detailed, expected); + /// ``` + pub fn tree_display_builder(&self) -> TreeDisplay { + TreeDisplay::new(self.to_array()) } /// Display the array as a formatted table. @@ -497,16 +552,25 @@ impl dyn DynArray + '_ { metadata, stats, } => { - write!( - f, - "{}", - TreeDisplayWrapper { - array: self.to_array(), - buffers: *buffers, - metadata: *metadata, - stats: *stats + let extractors: [(bool, Box); 5] = [ + (true, Box::new(EncodingSummaryExtractor)), + (*stats, Box::new(NbytesExtractor)), + (*stats, Box::new(StatsExtractor)), + (*metadata, Box::new(MetadataExtractor)), + ( + *buffers, + Box::new(BufferExtractor { + show_percent: *stats, + }), + ), + ]; + let mut display = TreeDisplay::new(self.to_array()); + for (enabled, extractor) in extractors { + if enabled { + display = display.with_boxed(extractor); } - ) + } + write!(f, "{display}") } #[cfg(feature = "table-display")] DisplayOptions::TableDisplay => { diff --git a/vortex-array/src/display/tree.rs b/vortex-array/src/display/tree.rs deleted file mode 100644 index 7ff8b2991d9..00000000000 --- a/vortex-array/src/display/tree.rs +++ /dev/null @@ -1,285 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use std::fmt::Write; -use std::fmt::{self}; - -use humansize::DECIMAL; -use humansize::format_size; -use vortex_error::VortexExpect as _; - -use crate::ArrayRef; -use crate::ArrayVisitor; -use crate::DynArray; -use crate::arrays::Chunked; -use crate::display::DisplayOptions; -use crate::expr::stats::Stat; -use crate::expr::stats::StatsProvider; - -/// Display wrapper for array statistics in compact format. -struct StatsDisplay<'a>(&'a dyn DynArray); - -impl fmt::Display for StatsDisplay<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let stats = self.0.statistics(); - let mut first = true; - - // Helper to write separator - let mut sep = |f: &mut fmt::Formatter<'_>| -> fmt::Result { - if first { - first = false; - f.write_str(" [") - } else { - f.write_str(", ") - } - }; - - // Null count or validity fallback - if let Some(nc) = stats.get(Stat::NullCount) { - if let Ok(n) = usize::try_from(&nc.clone().into_inner()) { - sep(f)?; - write!(f, "nulls={}", n)?; - } else { - sep(f)?; - write!(f, "nulls={}", nc)?; - } - } else if self.0.dtype().is_nullable() { - match self.0.all_valid() { - Ok(true) => { - sep(f)?; - f.write_str("all_valid")?; - } - Ok(false) => { - if self.0.all_invalid().unwrap_or(false) { - sep(f)?; - f.write_str("all_invalid")?; - } - } - Err(e) => { - tracing::warn!("Failed to check validity: {e}"); - sep(f)?; - f.write_str("validity_failed")?; - } - } - } - - // NaN count (only if > 0) - if let Some(nan) = stats.get(Stat::NaNCount) - && let Ok(n) = usize::try_from(&nan.into_inner()) - && n > 0 - { - sep(f)?; - write!(f, "nan={}", n)?; - } - - // Min/Max - if let Some(min) = stats.get(Stat::Min) { - sep(f)?; - write!(f, "min={}", min)?; - } - if let Some(max) = stats.get(Stat::Max) { - sep(f)?; - write!(f, "max={}", max)?; - } - - // Sum - if let Some(sum) = stats.get(Stat::Sum) { - sep(f)?; - write!(f, "sum={}", sum)?; - } - - // Boolean flags (compact) - if let Some(c) = stats.get(Stat::IsConstant) - && bool::try_from(&c.into_inner()).unwrap_or(false) - { - sep(f)?; - f.write_str("const")?; - } - if let Some(s) = stats.get(Stat::IsStrictSorted) { - if bool::try_from(&s.into_inner()).unwrap_or(false) { - sep(f)?; - f.write_str("strict")?; - } - } else if let Some(s) = stats.get(Stat::IsSorted) - && bool::try_from(&s.into_inner()).unwrap_or(false) - { - sep(f)?; - f.write_str("sorted")?; - } - - // Close bracket if we wrote anything - if !first { - f.write_char(']')?; - } - - Ok(()) - } -} - -#[derive(Clone)] -pub(crate) struct TreeDisplayWrapper { - pub(crate) array: ArrayRef, - pub(crate) buffers: bool, - pub(crate) metadata: bool, - pub(crate) stats: bool, -} - -impl fmt::Display for TreeDisplayWrapper { - fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { - let TreeDisplayWrapper { - array, - buffers, - metadata, - stats, - } = self.clone(); - let mut array_fmt = TreeFormatter { - fmt, - indent: "".to_string(), - ancestor_sizes: Vec::new(), - buffers, - metadata, - stats, - }; - array_fmt.format("root", array) - } -} - -pub struct TreeFormatter<'a, 'b: 'a> { - fmt: &'a mut fmt::Formatter<'b>, - indent: String, - ancestor_sizes: Vec>, - buffers: bool, - metadata: bool, - stats: bool, -} - -impl<'a, 'b: 'a> TreeFormatter<'a, 'b> { - fn format(&mut self, name: &str, array: ArrayRef) -> fmt::Result { - if self.stats { - let nbytes = array.nbytes(); - let total_size = self - .ancestor_sizes - .last() - .cloned() - .flatten() - .unwrap_or(nbytes); - - self.ancestor_sizes.push(if array.is::() { - // Treat each chunk as a new root - None - } else { - // Children will present themselves as a percentage of our size. - Some(nbytes) - }); - let percent = if total_size == 0 { - 0.0 - } else { - 100_f64 * nbytes as f64 / total_size as f64 - }; - - writeln!( - self, - "{}: {} nbytes={} ({:.2}%){}", - name, - array.display_as(DisplayOptions::MetadataOnly), - format_size(nbytes, DECIMAL), - percent, - StatsDisplay(array.as_ref()), - )?; - } else { - writeln!( - self, - "{}: {}", - name, - array.display_as(DisplayOptions::MetadataOnly) - )?; - } - - self.indent(|i| { - if i.metadata { - write!(i, "metadata: ")?; - array.metadata_fmt(i.fmt)?; - writeln!(i.fmt)?; - } - - if i.buffers { - let nbytes = array.nbytes(); - for (name, buffer) in array.named_buffers() { - let loc = if buffer.is_on_device() { - "device" - } else if buffer.is_on_host() { - "host" - } else { - "location-unknown" - }; - let align = if buffer.is_on_host() { - buffer.as_host().alignment().to_string() - } else { - "".to_string() - }; - - if i.stats { - let buffer_percent = if nbytes == 0 { - 0.0 - } else { - 100_f64 * buffer.len() as f64 / nbytes as f64 - }; - writeln!( - i, - "buffer: {} {loc} {} (align={}) ({:.2}%)", - name, - format_size(buffer.len(), DECIMAL), - align, - buffer_percent - )?; - } else { - writeln!( - i, - "buffer: {} {loc} {} (align={})", - name, - format_size(buffer.len(), DECIMAL), - align, - )?; - } - } - } - - Ok(()) - })?; - - self.indent(|i| { - for (name, child) in array - .children_names() - .into_iter() - .zip(array.children().into_iter()) - { - i.format(&name, child)?; - } - Ok(()) - })?; - - if self.stats { - let _ = self - .ancestor_sizes - .pop() - .vortex_expect("pushes and pops are matched"); - } - - Ok(()) - } - - fn indent(&mut self, indented: F) -> fmt::Result - where - F: FnOnce(&mut TreeFormatter) -> fmt::Result, - { - let original_ident = self.indent.clone(); - self.indent += " "; - let res = indented(self); - self.indent = original_ident; - res - } - - fn write_fmt(&mut self, fmt: fmt::Arguments<'_>) -> fmt::Result { - write!(self.fmt, "{}{}", self.indent, fmt) - } -} diff --git a/vortex-array/src/display/tree_display.rs b/vortex-array/src/display/tree_display.rs new file mode 100644 index 00000000000..0b589c39cd2 --- /dev/null +++ b/vortex-array/src/display/tree_display.rs @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::fmt; + +use crate::ArrayRef; +use crate::arrays::Chunked; +use crate::display::extractor::IndentedFormatter; +use crate::display::extractor::TreeContext; +use crate::display::extractor::TreeExtractor; +use crate::display::extractors::BufferExtractor; +use crate::display::extractors::EncodingSummaryExtractor; +use crate::display::extractors::MetadataExtractor; +use crate::display::extractors::NbytesExtractor; +use crate::display::extractors::StatsExtractor; + +/// Composable tree display builder. +/// +/// Use `tree_display()` for the default display with all built-in extractors, +/// or `tree_display_builder()` to start with a blank slate and compose your own: +/// +/// ``` +/// # use vortex_array::IntoArray; +/// # use vortex_buffer::buffer; +/// use vortex_array::display::{EncodingSummaryExtractor, NbytesExtractor, MetadataExtractor, BufferExtractor}; +/// +/// let array = buffer![0_i16, 1, 2, 3, 4].into_array(); +/// +/// // Default: all built-in extractors +/// let full = array.tree_display(); +/// +/// // Custom: pick only what you need +/// let custom = array.tree_display_builder() +/// .with(EncodingSummaryExtractor) +/// .with(NbytesExtractor) +/// .with(MetadataExtractor); +/// ``` +pub struct TreeDisplay { + array: ArrayRef, + extractors: Vec>, +} + +impl TreeDisplay { + /// Create a new tree display for the given array with no extractors. + /// + /// With no extractors, only node names and the tree structure are shown. + /// Use [`Self::default_display`] for the standard set of all built-in extractors. + pub fn new(array: ArrayRef) -> Self { + Self { + array, + extractors: Vec::new(), + } + } + + /// Create a tree display with all built-in extractors: encoding summary, nbytes, stats, + /// metadata, and buffers. + pub fn default_display(array: ArrayRef) -> Self { + Self::new(array) + .with(EncodingSummaryExtractor) + .with(NbytesExtractor) + .with(StatsExtractor) + .with(MetadataExtractor) + .with(BufferExtractor { show_percent: true }) + } + + /// Add an extractor to the display pipeline. + pub fn with(mut self, extractor: E) -> Self { + self.extractors.push(Box::new(extractor)); + self + } + + /// Add a pre-boxed extractor to the display pipeline. + pub fn with_boxed(mut self, extractor: Box) -> Self { + self.extractors.push(extractor); + self + } + + /// Recursively write a node and all its descendants directly to the formatter. + fn write_node( + &self, + name: &str, + array: &ArrayRef, + ctx: &mut TreeContext, + indent: &str, + f: &mut fmt::Formatter<'_>, + ) -> fmt::Result { + // Header line: "{indent}{name}:{annotations...}\n" + write!(f, "{indent}{name}:")?; + for extractor in &self.extractors { + extractor.write_header(array.as_ref(), ctx, f)?; + } + writeln!(f)?; + + // Detail lines + let child_indent = format!("{indent} "); + { + let mut indented = IndentedFormatter::new(f, &child_indent); + for extractor in &self.extractors { + extractor.write_details(array.as_ref(), ctx, &mut indented)?; + } + } + + // Push context for children: chunked arrays reset the percentage root + let child_size = if array.is::() { + None + } else { + Some(array.nbytes()) + }; + ctx.push(child_size); + + // Recurse into children + for (child_name, child) in array.children_names().into_iter().zip(array.children()) { + self.write_node(&child_name, &child, ctx, &child_indent, f)?; + } + + ctx.pop(); + + Ok(()) + } +} + +impl fmt::Display for TreeDisplay { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut ctx = TreeContext::new(); + self.write_node("root", &self.array, &mut ctx, "", f) + } +} diff --git a/vortex-python/src/object_store/registry.rs b/vortex-python/src/object_store/registry.rs index e20b6ef46ab..13a16e3c36c 100644 --- a/vortex-python/src/object_store/registry.rs +++ b/vortex-python/src/object_store/registry.rs @@ -172,7 +172,7 @@ mod tests { } #[test] - #[allow(clippy::use_debug)] + #[expect(clippy::use_debug)] fn test_resolve_url() { with_var("AWS_REGION", "us-east-3", || { let registry = Registry::default();