From fd91871c41b0a1b788b173a30a1662e87c464655 Mon Sep 17 00:00:00 2001 From: Qi Zhu <821684824@qq.com> Date: Tue, 31 Mar 2026 15:01:54 +0800 Subject: [PATCH] feat: feature-gate datafusion-substrait behind optional 'substrait' feature Makes datafusion-substrait an optional dependency in datafusion-sqllogictest, gated behind a new 'substrait' feature flag. This saves compile time when substrait is not needed (the common case for most developers). Changes: - datafusion-substrait is now optional in sqllogictest/Cargo.toml - Added 'substrait' feature flag - Gated substrait imports, module, and round-trip function behind cfg - Added no-op fallback when feature is disabled - Updated CI to pass --features substrait for round-trip test --- .github/workflows/rust.yml | 8 +++---- datafusion/sqllogictest/Cargo.toml | 3 ++- datafusion/sqllogictest/bin/sqllogictests.rs | 22 +++++++++++++++++--- datafusion/sqllogictest/src/engines/mod.rs | 2 ++ datafusion/sqllogictest/src/lib.rs | 1 + 5 files changed, 28 insertions(+), 8 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 0d12ddc375718..ba2a206b5fa63 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -305,7 +305,7 @@ jobs: --lib \ --tests \ --bins \ - --features serde,avro,json,backtrace,integration-tests,parquet_encryption + --features serde,avro,json,backtrace,integration-tests,parquet_encryption,substrait - name: Verify Working Directory Clean run: git diff --exit-code # Check no temporary directories created during test. @@ -473,7 +473,7 @@ jobs: export RUST_MIN_STACK=20971520 export TPCH_DATA=`realpath datafusion/sqllogictest/test_files/tpch/data` cargo test plan_q --package datafusion-benchmarks --profile ci --features=ci -- --test-threads=1 - INCLUDE_TPCH=true cargo test --features backtrace,parquet_encryption --profile ci --package datafusion-sqllogictest --test sqllogictests + INCLUDE_TPCH=true cargo test --features backtrace,parquet_encryption,substrait --profile ci --package datafusion-sqllogictest --test sqllogictests - name: Verify Working Directory Clean run: git diff --exit-code @@ -537,7 +537,7 @@ jobs: # command cannot be run for all the .slt files. Run it for just one that works (limit.slt) # until most of the tickets in https://github.com/apache/datafusion/issues/16248 are addressed # and this command can be run without filters. - run: cargo test --test sqllogictests -- --substrait-round-trip limit.slt + run: cargo test -p datafusion-sqllogictest --test sqllogictests --features substrait -- --substrait-round-trip limit.slt # Temporarily commenting out the Windows flow, the reason is enormously slow running build # Waiting for new Windows 2025 github runner @@ -570,7 +570,7 @@ jobs: uses: ./.github/actions/setup-macos-aarch64-builder - name: Run tests (excluding doctests) shell: bash - run: cargo test --profile ci --exclude datafusion-cli --workspace --lib --tests --bins --features avro,json,backtrace,integration-tests + run: cargo test --profile ci --exclude datafusion-cli --workspace --lib --tests --bins --features avro,json,backtrace,integration-tests,substrait vendor: name: Verify Vendored Code diff --git a/datafusion/sqllogictest/Cargo.toml b/datafusion/sqllogictest/Cargo.toml index 7a341988d5803..d7bb2583c9d8c 100644 --- a/datafusion/sqllogictest/Cargo.toml +++ b/datafusion/sqllogictest/Cargo.toml @@ -48,7 +48,7 @@ chrono = { workspace = true, optional = true } clap = { version = "4.5.60", features = ["derive", "env"] } datafusion = { workspace = true, default-features = true, features = ["avro"] } datafusion-spark = { workspace = true, features = ["core"] } -datafusion-substrait = { workspace = true, default-features = true } +datafusion-substrait = { workspace = true, default-features = true, optional = true } futures = { workspace = true } half = { workspace = true, default-features = true } indicatif = "0.18" @@ -79,6 +79,7 @@ postgres = [ parquet_encryption = [ "datafusion/parquet_encryption", ] +substrait = ["datafusion-substrait"] [dev-dependencies] env_logger = { workspace = true } diff --git a/datafusion/sqllogictest/bin/sqllogictests.rs b/datafusion/sqllogictest/bin/sqllogictests.rs index cb76cc30fd08f..8ca19025bd7b7 100644 --- a/datafusion/sqllogictest/bin/sqllogictests.rs +++ b/datafusion/sqllogictest/bin/sqllogictests.rs @@ -19,11 +19,13 @@ use clap::{ColorChoice, Parser}; use datafusion::common::instant::Instant; use datafusion::common::utils::get_available_parallelism; use datafusion::common::{DataFusionError, Result, exec_datafusion_err, exec_err}; +#[cfg(feature = "substrait")] +use datafusion_sqllogictest::DataFusionSubstraitRoundTrip; use datafusion_sqllogictest::TestFile; use datafusion_sqllogictest::{ - CurrentlyExecutingSqlTracker, DataFusion, DataFusionSubstraitRoundTrip, Filter, - TestContext, df_value_validator, read_dir_recursive, setup_scratch_dir, - should_skip_file, should_skip_record, value_normalizer, + CurrentlyExecutingSqlTracker, DataFusion, Filter, TestContext, df_value_validator, + read_dir_recursive, setup_scratch_dir, should_skip_file, should_skip_record, + value_normalizer, }; use futures::stream::StreamExt; use indicatif::{ @@ -426,6 +428,7 @@ fn is_env_truthy(name: &str) -> bool { }) } +#[cfg(feature = "substrait")] async fn run_test_file_substrait_round_trip( test_file: TestFile, validator: Validator, @@ -468,6 +471,19 @@ async fn run_test_file_substrait_round_trip( res } +#[cfg(not(feature = "substrait"))] +async fn run_test_file_substrait_round_trip( + _test_file: TestFile, + _validator: Validator, + _mp: MultiProgress, + _mp_style: ProgressStyle, + _filters: &[Filter], + _currently_executing_sql_tracker: CurrentlyExecutingSqlTracker, + _colored_output: bool, +) -> Result<()> { + exec_err!("Cannot run substrait round-trip: the 'substrait' feature is not enabled") +} + async fn run_test_file( test_file: TestFile, validator: Validator, diff --git a/datafusion/sqllogictest/src/engines/mod.rs b/datafusion/sqllogictest/src/engines/mod.rs index ee2987db07593..0c9643b1c73a0 100644 --- a/datafusion/sqllogictest/src/engines/mod.rs +++ b/datafusion/sqllogictest/src/engines/mod.rs @@ -19,6 +19,7 @@ mod conversion; mod currently_executed_sql; mod datafusion_engine; +#[cfg(feature = "substrait")] mod datafusion_substrait_roundtrip_engine; mod output; @@ -26,6 +27,7 @@ pub use datafusion_engine::DFSqlLogicTestError; pub use datafusion_engine::DataFusion; pub use datafusion_engine::convert_batches; pub use datafusion_engine::convert_schema_to_types; +#[cfg(feature = "substrait")] pub use datafusion_substrait_roundtrip_engine::DataFusionSubstraitRoundTrip; pub use output::DFColumnType; pub use output::DFOutput; diff --git a/datafusion/sqllogictest/src/lib.rs b/datafusion/sqllogictest/src/lib.rs index bb12c58bdcc20..6b6c40365f855 100644 --- a/datafusion/sqllogictest/src/lib.rs +++ b/datafusion/sqllogictest/src/lib.rs @@ -34,6 +34,7 @@ pub use engines::DFColumnType; pub use engines::DFOutput; pub use engines::DFSqlLogicTestError; pub use engines::DataFusion; +#[cfg(feature = "substrait")] pub use engines::DataFusionSubstraitRoundTrip; pub use engines::convert_batches; pub use engines::convert_schema_to_types;