From 17fbd7a1d5f8ab9e8e4bdaa02a990c892c53eb89 Mon Sep 17 00:00:00 2001 From: guihuawen Date: Sat, 14 Mar 2026 23:41:03 +0800 Subject: [PATCH] Add a version method to the configuration information of CometConf #3700 --- .../scala/org/apache/comet/CometConf.scala | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/common/src/main/scala/org/apache/comet/CometConf.scala b/common/src/main/scala/org/apache/comet/CometConf.scala index 4d2e37924a..0a8ff9e6df 100644 --- a/common/src/main/scala/org/apache/comet/CometConf.scala +++ b/common/src/main/scala/org/apache/comet/CometConf.scala @@ -86,6 +86,7 @@ object CometConf extends ShimCometConf { val COMET_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.enabled") .category(CATEGORY_EXEC) + .version("0.14.0") .doc( "Whether to enable Comet extension for Spark. When this is turned on, Spark will use " + "Comet to read Parquet data source. Note that to enable native vectorized execution, " + @@ -95,6 +96,7 @@ object CometConf extends ShimCometConf { val COMET_NATIVE_SCAN_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.scan.enabled") .category(CATEGORY_SCAN) + .version("0.14.0") .doc( "Whether to enable native scans. When this is turned on, Spark will use Comet to " + "read supported data sources (currently only Parquet is supported natively). Note " + @@ -106,6 +108,7 @@ object CometConf extends ShimCometConf { val COMET_NATIVE_PARQUET_WRITE_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.parquet.write.enabled") .category(CATEGORY_TESTING) + .version("0.14.0") .doc( "Whether to enable native Parquet write through Comet. When enabled, " + "Comet will intercept Parquet write operations and execute them natively. This " + @@ -120,6 +123,7 @@ object CometConf extends ShimCometConf { val COMET_NATIVE_SCAN_IMPL: ConfigEntry[String] = conf("spark.comet.scan.impl") .category(CATEGORY_PARQUET) + .version("0.14.0") .doc( "The implementation of Comet's Parquet scan to use. Available scans are " + s"`$SCAN_NATIVE_DATAFUSION`, and `$SCAN_NATIVE_ICEBERG_COMPAT`. " + @@ -135,6 +139,7 @@ object CometConf extends ShimCometConf { val COMET_ICEBERG_NATIVE_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.scan.icebergNative.enabled") .category(CATEGORY_SCAN) + .version("0.14.0") .doc( "Whether to enable native Iceberg table scan using iceberg-rust. When enabled, " + "Iceberg tables are read directly through native execution, bypassing Spark's " + @@ -145,6 +150,7 @@ object CometConf extends ShimCometConf { val COMET_ICEBERG_DATA_FILE_CONCURRENCY_LIMIT: ConfigEntry[Int] = conf("spark.comet.scan.icebergNative.dataFileConcurrencyLimit") .category(CATEGORY_SCAN) + .version("0.14.0") .doc( "The number of Iceberg data files to read concurrently within a single task. " + "Higher values improve throughput for tables with many small files by overlapping " + @@ -156,6 +162,7 @@ object CometConf extends ShimCometConf { val COMET_CSV_V2_NATIVE_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.scan.csv.v2.enabled") .category(CATEGORY_TESTING) + .version("0.14.0") .doc( "Whether to use the native Comet V2 CSV reader for improved performance. " + "Default: false (uses standard Spark CSV reader) " + @@ -166,6 +173,7 @@ object CometConf extends ShimCometConf { val COMET_RESPECT_PARQUET_FILTER_PUSHDOWN: ConfigEntry[Boolean] = conf("spark.comet.parquet.respectFilterPushdown") .category(CATEGORY_PARQUET) + .version("0.14.0") .doc( "Whether to respect Spark's PARQUET_FILTER_PUSHDOWN_ENABLED config. This needs to be " + "respected when running the Spark SQL test suite but the default setting " + @@ -177,6 +185,7 @@ object CometConf extends ShimCometConf { val COMET_PARQUET_PARALLEL_IO_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.parquet.read.parallel.io.enabled") .category(CATEGORY_PARQUET) + .version("0.14.0") .doc( "Whether to enable Comet's parallel reader for Parquet files. The parallel reader reads " + "ranges of consecutive data in a file in parallel. It is faster for large files and " + @@ -187,6 +196,7 @@ object CometConf extends ShimCometConf { val COMET_PARQUET_PARALLEL_IO_THREADS: ConfigEntry[Int] = conf("spark.comet.parquet.read.parallel.io.thread-pool.size") .category(CATEGORY_PARQUET) + .version("0.14.0") .doc("The maximum number of parallel threads the parallel reader will use in a single " + "executor. For executors configured with a smaller number of cores, use a smaller number.") .intConf @@ -195,6 +205,7 @@ object CometConf extends ShimCometConf { val COMET_IO_MERGE_RANGES: ConfigEntry[Boolean] = conf("spark.comet.parquet.read.io.mergeRanges") .category(CATEGORY_PARQUET) + .version("0.14.0") .doc( "When enabled the parallel reader will try to merge ranges of data that are separated " + "by less than `comet.parquet.read.io.mergeRanges.delta` bytes. Longer continuous reads " + @@ -205,6 +216,7 @@ object CometConf extends ShimCometConf { val COMET_IO_MERGE_RANGES_DELTA: ConfigEntry[Int] = conf("spark.comet.parquet.read.io.mergeRanges.delta") .category(CATEGORY_PARQUET) + .version("0.14.0") .doc("The delta in bytes between consecutive read ranges below which the parallel reader " + "will try to merge the ranges. The default is 8MB.") .intConf @@ -213,6 +225,7 @@ object CometConf extends ShimCometConf { val COMET_IO_ADJUST_READRANGE_SKEW: ConfigEntry[Boolean] = conf("spark.comet.parquet.read.io.adjust.readRange.skew") .category(CATEGORY_PARQUET) + .version("0.14.0") .doc("In the parallel reader, if the read ranges submitted are skewed in sizes, this " + "option will cause the reader to break up larger read ranges into smaller ranges to " + "reduce the skew. This will result in a slightly larger number of connections opened to " + @@ -223,6 +236,7 @@ object CometConf extends ShimCometConf { val COMET_CONVERT_FROM_PARQUET_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.convert.parquet.enabled") .category(CATEGORY_TESTING) + .version("0.14.0") .doc( "When enabled, data from Spark (non-native) Parquet v1 and v2 scans will be converted to " + "Arrow format. This is an experimental feature and has known issues with " + @@ -233,6 +247,7 @@ object CometConf extends ShimCometConf { val COMET_CONVERT_FROM_JSON_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.convert.json.enabled") .category(CATEGORY_TESTING) + .version("0.14.0") .doc( "When enabled, data from Spark (non-native) JSON v1 and v2 scans will be converted to " + "Arrow format. This is an experimental feature and has known issues with " + @@ -243,6 +258,7 @@ object CometConf extends ShimCometConf { val COMET_CONVERT_FROM_CSV_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.convert.csv.enabled") .category(CATEGORY_TESTING) + .version("0.14.0") .doc( "When enabled, data from Spark (non-native) CSV v1 and v2 scans will be converted to " + "Arrow format. This is an experimental feature and has known issues with " + @@ -252,6 +268,7 @@ object CometConf extends ShimCometConf { val COMET_EXEC_ENABLED: ConfigEntry[Boolean] = conf(s"$COMET_EXEC_CONFIG_PREFIX.enabled") .category(CATEGORY_EXEC) + .version("0.14.0") .doc( "Whether to enable Comet native vectorized execution for Spark. This controls whether " + "Spark should convert operators into their Comet counterparts and execute them in " + @@ -302,6 +319,7 @@ object CometConf extends ShimCometConf { val COMET_NATIVE_COLUMNAR_TO_ROW_ENABLED: ConfigEntry[Boolean] = conf(s"$COMET_EXEC_CONFIG_PREFIX.columnarToRow.native.enabled") .category(CATEGORY_EXEC) + .version("0.14.0") .doc( "Whether to enable native columnar to row conversion. When enabled, Comet will use " + "native Rust code to convert Arrow columnar data to Spark UnsafeRow format instead " + @@ -313,18 +331,21 @@ object CometConf extends ShimCometConf { val COMET_EXEC_SORT_MERGE_JOIN_WITH_JOIN_FILTER_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.exec.sortMergeJoinWithJoinFilter.enabled") .category(CATEGORY_ENABLE_EXEC) + .version("0.14.0") .doc("Experimental support for Sort Merge Join with filter") .booleanConf .createWithDefault(false) val COMET_TRACING_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.tracing.enabled") .category(CATEGORY_TUNING) + .version("0.14.0") .doc(s"Enable fine-grained tracing of events and memory usage. $TRACING_GUIDE.") .booleanConf .createWithDefault(false) val COMET_ONHEAP_MEMORY_OVERHEAD: ConfigEntry[Long] = conf("spark.comet.memoryOverhead") .category(CATEGORY_TESTING) + .version("0.14.0") .doc( "The amount of additional memory to be allocated per executor process for Comet, in MiB, " + "when running Spark in on-heap mode.") @@ -334,6 +355,7 @@ object CometConf extends ShimCometConf { val COMET_EXEC_SHUFFLE_ENABLED: ConfigEntry[Boolean] = conf(s"$COMET_EXEC_CONFIG_PREFIX.shuffle.enabled") .category(CATEGORY_SHUFFLE) + .version("0.14.0") .doc( "Whether to enable Comet native shuffle. " + "Note that this requires setting `spark.shuffle.manager` to " + @@ -345,6 +367,7 @@ object CometConf extends ShimCometConf { val COMET_SHUFFLE_MODE: ConfigEntry[String] = conf(s"$COMET_EXEC_CONFIG_PREFIX.shuffle.mode") .category(CATEGORY_SHUFFLE) + .version("0.14.0") .doc( "This is test config to allow tests to force a particular shuffle implementation to be " + "used. Valid values are `jvm` for Columnar Shuffle, `native` for Native Shuffle, " + @@ -358,6 +381,7 @@ object CometConf extends ShimCometConf { val COMET_EXEC_BROADCAST_FORCE_ENABLED: ConfigEntry[Boolean] = conf(s"$COMET_EXEC_CONFIG_PREFIX.broadcast.enabled") .category(CATEGORY_EXEC) + .version("0.14.0") .doc( "Whether to force enabling broadcasting for Comet native operators. " + "Comet broadcast feature will be enabled automatically by " + @@ -370,6 +394,7 @@ object CometConf extends ShimCometConf { val COMET_REPLACE_SMJ: ConfigEntry[Boolean] = conf(s"$COMET_EXEC_CONFIG_PREFIX.replaceSortMergeJoin") .category(CATEGORY_EXEC) + .version("0.14.0") .doc("Experimental feature to force Spark to replace SortMergeJoin with ShuffledHashJoin " + s"for improved performance. This feature is not stable yet. $TUNING_GUIDE.") .booleanConf @@ -378,6 +403,7 @@ object CometConf extends ShimCometConf { val COMET_EXEC_SHUFFLE_WITH_HASH_PARTITIONING_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.native.shuffle.partitioning.hash.enabled") .category(CATEGORY_SHUFFLE) + .version("0.14.0") .doc("Whether to enable hash partitioning for Comet native shuffle.") .booleanConf .createWithDefault(true) @@ -385,6 +411,7 @@ object CometConf extends ShimCometConf { val COMET_EXEC_SHUFFLE_WITH_RANGE_PARTITIONING_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.native.shuffle.partitioning.range.enabled") .category(CATEGORY_SHUFFLE) + .version("0.14.0") .doc("Whether to enable range partitioning for Comet native shuffle.") .booleanConf .createWithDefault(true) @@ -392,6 +419,7 @@ object CometConf extends ShimCometConf { val COMET_EXEC_SHUFFLE_WITH_ROUND_ROBIN_PARTITIONING_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.native.shuffle.partitioning.roundrobin.enabled") .category(CATEGORY_SHUFFLE) + .version("0.14.0") .doc( "Whether to enable round robin partitioning for Comet native shuffle. " + "This is disabled by default because Comet's round-robin produces different " + @@ -407,6 +435,7 @@ object CometConf extends ShimCometConf { val COMET_EXEC_SHUFFLE_WITH_ROUND_ROBIN_PARTITIONING_MAX_HASH_COLUMNS: ConfigEntry[Int] = conf("spark.comet.native.shuffle.partitioning.roundrobin.maxHashColumns") .category(CATEGORY_SHUFFLE) + .version("0.14.0") .doc( "The maximum number of columns to hash for round robin partitioning. " + "When set to 0 (the default), all columns are hashed. " + @@ -422,6 +451,7 @@ object CometConf extends ShimCometConf { val COMET_EXEC_SHUFFLE_COMPRESSION_CODEC: ConfigEntry[String] = conf(s"$COMET_EXEC_CONFIG_PREFIX.shuffle.compression.codec") .category(CATEGORY_SHUFFLE) + .version("0.14.0") .doc( "The codec of Comet native shuffle used to compress shuffle data. lz4, zstd, and " + "snappy are supported. Compression can be disabled by setting " + @@ -433,6 +463,7 @@ object CometConf extends ShimCometConf { val COMET_EXEC_SHUFFLE_COMPRESSION_ZSTD_LEVEL: ConfigEntry[Int] = conf(s"$COMET_EXEC_CONFIG_PREFIX.shuffle.compression.zstd.level") .category(CATEGORY_SHUFFLE) + .version("0.14.0") .doc("The compression level to use when compressing shuffle files with zstd.") .intConf .createWithDefault(1) @@ -440,6 +471,7 @@ object CometConf extends ShimCometConf { val COMET_COLUMNAR_SHUFFLE_ASYNC_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.columnar.shuffle.async.enabled") .category(CATEGORY_SHUFFLE) + .version("0.14.0") .doc("Whether to enable asynchronous shuffle for Arrow-based shuffle.") .booleanConf .createWithDefault(false) @@ -447,6 +479,7 @@ object CometConf extends ShimCometConf { val COMET_COLUMNAR_SHUFFLE_ASYNC_THREAD_NUM: ConfigEntry[Int] = conf("spark.comet.columnar.shuffle.async.thread.num") .category(CATEGORY_SHUFFLE) + .version("0.14.0") .doc( "Number of threads used for Comet async columnar shuffle per shuffle task. " + "Note that more threads means more memory requirement to " + @@ -458,6 +491,7 @@ object CometConf extends ShimCometConf { val COMET_COLUMNAR_SHUFFLE_ASYNC_MAX_THREAD_NUM: ConfigEntry[Int] = { conf("spark.comet.columnar.shuffle.async.max.thread.num") .category(CATEGORY_SHUFFLE) + .version("0.14.0") .doc("Maximum number of threads on an executor used for Comet async columnar shuffle. " + "This is the upper bound of total number of shuffle " + "threads per executor. In other words, if the number of cores * the number of shuffle " + @@ -471,6 +505,7 @@ object CometConf extends ShimCometConf { val COMET_COLUMNAR_SHUFFLE_SPILL_THRESHOLD: ConfigEntry[Int] = conf("spark.comet.columnar.shuffle.spill.threshold") .category(CATEGORY_SHUFFLE) + .version("0.14.0") .doc( "Number of rows to be spilled used for Comet columnar shuffle. " + "For every configured number of rows, a new spill file will be created. " + @@ -485,6 +520,7 @@ object CometConf extends ShimCometConf { val COMET_ONHEAP_SHUFFLE_MEMORY_FACTOR: ConfigEntry[Double] = conf("spark.comet.columnar.shuffle.memory.factor") .category(CATEGORY_TESTING) + .version("0.14.0") .doc("Fraction of Comet memory to be allocated per executor process for columnar shuffle " + s"when running in on-heap mode. $TUNING_GUIDE.") .doubleConf @@ -495,6 +531,7 @@ object CometConf extends ShimCometConf { val COMET_BATCH_SIZE: ConfigEntry[Int] = conf("spark.comet.batchSize") .category(CATEGORY_TUNING) + .version("0.14.0") .doc("The columnar batch size, i.e., the maximum number of rows that a batch can contain.") .intConf .checkValue(v => v > 0, "Batch size must be positive") @@ -503,6 +540,7 @@ object CometConf extends ShimCometConf { val COMET_COLUMNAR_SHUFFLE_BATCH_SIZE: ConfigEntry[Int] = conf("spark.comet.columnar.shuffle.batch.size") .category(CATEGORY_SHUFFLE) + .version("0.14.0") .doc("Batch size when writing out sorted spill files on the native side. Note that " + "this should not be larger than batch size (i.e., `spark.comet.batchSize`). Otherwise " + "it will produce larger batches than expected in the native operator after shuffle.") @@ -515,6 +553,7 @@ object CometConf extends ShimCometConf { val COMET_SHUFFLE_WRITE_BUFFER_SIZE: ConfigEntry[Long] = conf(s"$COMET_EXEC_CONFIG_PREFIX.shuffle.writeBufferSize") .category(CATEGORY_SHUFFLE) + .version("0.14.0") .doc("Size of the write buffer in bytes used by the native shuffle writer when writing " + "shuffle data to disk. Larger values may improve write performance by reducing " + "the number of system calls, but will use more memory. " + @@ -526,6 +565,7 @@ object CometConf extends ShimCometConf { val COMET_SHUFFLE_PREFER_DICTIONARY_RATIO: ConfigEntry[Double] = conf( "spark.comet.shuffle.preferDictionary.ratio") .category(CATEGORY_SHUFFLE) + .version("0.14.0") .doc( "The ratio of total values to distinct values in a string column to decide whether to " + "prefer dictionary encoding when shuffling the column. If the ratio is higher than " + @@ -538,6 +578,7 @@ object CometConf extends ShimCometConf { val COMET_EXCHANGE_SIZE_MULTIPLIER: ConfigEntry[Double] = conf( "spark.comet.shuffle.sizeInBytesMultiplier") .category(CATEGORY_SHUFFLE) + .version("0.14.0") .doc( "Comet reports smaller sizes for shuffle due to using Arrow's columnar memory format " + "and this can result in Spark choosing a different join strategy due to the estimated " + @@ -549,6 +590,7 @@ object CometConf extends ShimCometConf { val COMET_DPP_FALLBACK_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.dppFallback.enabled") .category(CATEGORY_EXEC) + .version("0.14.0") .doc("Whether to fall back to Spark for queries that use DPP.") .booleanConf .createWithDefault(true) @@ -556,6 +598,7 @@ object CometConf extends ShimCometConf { val COMET_DEBUG_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.debug.enabled") .category(CATEGORY_EXEC) + .version("0.14.0") .doc( "Whether to enable debug mode for Comet. " + "When enabled, Comet will do additional checks for debugging purpose. For example, " + @@ -569,6 +612,7 @@ object CometConf extends ShimCometConf { val COMET_DEBUG_MEMORY_ENABLED: ConfigEntry[Boolean] = conf(s"$COMET_PREFIX.debug.memory") .category(CATEGORY_TESTING) + .version("0.14.0") .doc(s"When enabled, log all native memory pool interactions. $DEBUGGING_GUIDE.") .booleanConf .createWithDefault(false) @@ -579,6 +623,7 @@ object CometConf extends ShimCometConf { val COMET_EXTENDED_EXPLAIN_FORMAT: ConfigEntry[String] = conf("spark.comet.explain.format") .category(CATEGORY_EXEC_EXPLAIN) + .version("0.14.0") .doc("Choose extended explain output. The default format of " + s"'$COMET_EXTENDED_EXPLAIN_FORMAT_VERBOSE' will provide the full query plan annotated " + "with fallback reasons as well as a summary of how much of the plan was accelerated " + @@ -592,6 +637,7 @@ object CometConf extends ShimCometConf { val COMET_EXPLAIN_NATIVE_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.explain.native.enabled") .category(CATEGORY_EXEC_EXPLAIN) + .version("0.14.0") .doc( "When this setting is enabled, Comet will provide a tree representation of " + "the native query plan before execution and again after execution, with " + @@ -602,6 +648,7 @@ object CometConf extends ShimCometConf { val COMET_EXPLAIN_TRANSFORMATIONS: ConfigEntry[Boolean] = conf("spark.comet.explain.rules") .category(CATEGORY_EXEC_EXPLAIN) + .version("0.14.0") .doc("When this setting is enabled, Comet will log all plan transformations performed " + "in physical optimizer rules. Default: false") .booleanConf @@ -610,6 +657,7 @@ object CometConf extends ShimCometConf { val COMET_LOG_FALLBACK_REASONS: ConfigEntry[Boolean] = conf("spark.comet.logFallbackReasons.enabled") .category(CATEGORY_EXEC_EXPLAIN) + .version("0.14.0") .doc("When this setting is enabled, Comet will log warnings for all fallback reasons.") .booleanConf .createWithEnvVarOrDefault("ENABLE_COMET_LOG_FALLBACK_REASONS", false) @@ -617,6 +665,7 @@ object CometConf extends ShimCometConf { val COMET_EXPLAIN_FALLBACK_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.explainFallback.enabled") .category(CATEGORY_EXEC_EXPLAIN) + .version("0.14.0") .doc( "When this setting is enabled, Comet will provide logging explaining the reason(s) " + "why a query stage cannot be executed natively. Set this to false to " + @@ -627,6 +676,7 @@ object CometConf extends ShimCometConf { val COMET_PARQUET_ENABLE_DIRECT_BUFFER: ConfigEntry[Boolean] = conf("spark.comet.parquet.enable.directBuffer") .category(CATEGORY_PARQUET) + .version("0.14.0") .doc("Whether to use Java direct byte buffer when reading Parquet.") .booleanConf .createWithDefault(false) @@ -634,6 +684,7 @@ object CometConf extends ShimCometConf { val COMET_ONHEAP_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.exec.onHeap.enabled") .category(CATEGORY_TESTING) + .version("0.14.0") .doc("Whether to allow Comet to run in on-heap mode. Required for running Spark SQL tests.") .booleanConf .createWithEnvVarOrDefault("ENABLE_COMET_ONHEAP", false) @@ -641,6 +692,7 @@ object CometConf extends ShimCometConf { val COMET_OFFHEAP_MEMORY_POOL_TYPE: ConfigEntry[String] = conf("spark.comet.exec.memoryPool") .category(CATEGORY_TUNING) + .version("0.14.0") .doc( "The type of memory pool to be used for Comet native execution when running Spark in " + "off-heap mode. Available pool types are `greedy_unified` and `fair_unified`. " + @@ -651,6 +703,7 @@ object CometConf extends ShimCometConf { val COMET_ONHEAP_MEMORY_POOL_TYPE: ConfigEntry[String] = conf( "spark.comet.exec.onHeap.memoryPool") .category(CATEGORY_TESTING) + .version("0.14.0") .doc( "The type of memory pool to be used for Comet native execution " + "when running Spark in on-heap mode. Available pool types are `greedy`, `fair_spill`, " + @@ -662,6 +715,7 @@ object CometConf extends ShimCometConf { val COMET_OFFHEAP_MEMORY_POOL_FRACTION: ConfigEntry[Double] = conf("spark.comet.exec.memoryPool.fraction") .category(CATEGORY_TUNING) + .version("0.14.0") .doc( "Fraction of off-heap memory pool that is available to Comet. " + "Only applies to off-heap mode. " + @@ -671,6 +725,7 @@ object CometConf extends ShimCometConf { val COMET_NATIVE_LOAD_REQUIRED: ConfigEntry[Boolean] = conf("spark.comet.nativeLoadRequired") .category(CATEGORY_EXEC) + .version("0.14.0") .doc( "Whether to require Comet native library to load successfully when Comet is enabled. " + "If not, Comet will silently fallback to Spark when it fails to load the native lib. " + @@ -681,6 +736,7 @@ object CometConf extends ShimCometConf { val COMET_EXCEPTION_ON_LEGACY_DATE_TIMESTAMP: ConfigEntry[Boolean] = conf("spark.comet.exceptionOnDatetimeRebase") .category(CATEGORY_EXEC) + .version("0.14.0") .doc("Whether to throw exception when seeing dates/timestamps from the legacy hybrid " + "(Julian + Gregorian) calendar. Since Spark 3, dates/timestamps were written according " + "to the Proleptic Gregorian calendar. When this is true, Comet will " + @@ -693,6 +749,7 @@ object CometConf extends ShimCometConf { val COMET_USE_DECIMAL_128: ConfigEntry[Boolean] = conf("spark.comet.use.decimal128") .internal() .category(CATEGORY_EXEC) + .version("0.14.0") .doc("If true, Comet will always use 128 bits to represent a decimal value, regardless of " + "its precision. If false, Comet will use 32, 64 and 128 bits respectively depending on " + "the precision. N.B. this is NOT a user-facing config but should be inferred and set by " + @@ -704,6 +761,7 @@ object CometConf extends ShimCometConf { "spark.comet.use.lazyMaterialization") .internal() .category(CATEGORY_PARQUET) + .version("0.14.0") .doc( "Whether to enable lazy materialization for Comet. When this is turned on, Comet will " + "read Parquet data source lazily for string and binary columns. For filter operations, " + @@ -715,6 +773,7 @@ object CometConf extends ShimCometConf { conf("spark.comet.schemaEvolution.enabled") .internal() .category(CATEGORY_SCAN) + .version("0.14.0") .doc("Whether to enable schema evolution in Comet. For instance, promoting a integer " + "column to a long column, a float column to a double column, etc. This is automatically" + "enabled when reading from Iceberg tables.") @@ -725,6 +784,7 @@ object CometConf extends ShimCometConf { conf("spark.comet.testing.aggregate.partialMode.enabled") .internal() .category(CATEGORY_TESTING) + .version("0.14.0") .doc("This setting is used in unit tests") .booleanConf .createWithDefault(true) @@ -733,6 +793,7 @@ object CometConf extends ShimCometConf { conf("spark.comet.testing.aggregate.finalMode.enabled") .internal() .category(CATEGORY_TESTING) + .version("0.14.0") .doc("This setting is used in unit tests") .booleanConf .createWithDefault(true) @@ -740,6 +801,7 @@ object CometConf extends ShimCometConf { val COMET_SPARK_TO_ARROW_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.sparkToColumnar.enabled") .category(CATEGORY_TESTING) + .version("0.14.0") .doc("Whether to enable Spark to Arrow columnar conversion. When this is turned on, " + "Comet will convert operators in " + "`spark.comet.sparkToColumnar.supportedOperatorList` into Arrow columnar format before " + @@ -750,6 +812,7 @@ object CometConf extends ShimCometConf { val COMET_SPARK_TO_ARROW_SUPPORTED_OPERATOR_LIST: ConfigEntry[Seq[String]] = conf("spark.comet.sparkToColumnar.supportedOperatorList") .category(CATEGORY_TESTING) + .version("0.14.0") .doc("A comma-separated list of operators that will be converted to Arrow columnar " + s"format when `${COMET_SPARK_TO_ARROW_ENABLED.key}` is true.") .stringConf @@ -759,6 +822,7 @@ object CometConf extends ShimCometConf { val COMET_CASE_CONVERSION_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.caseConversion.enabled") .category(CATEGORY_EXEC) + .version("0.14.0") .doc("Java uses locale-specific rules when converting strings to upper or lower case and " + "Rust does not, so we disable upper and lower by default.") .booleanConf @@ -767,6 +831,7 @@ object CometConf extends ShimCometConf { val COMET_PARQUET_UNSIGNED_SMALL_INT_CHECK: ConfigEntry[Boolean] = conf("spark.comet.scan.unsignedSmallIntSafetyCheck") .category(CATEGORY_SCAN) + .version("0.14.0") .doc( "Parquet files may contain unsigned 8-bit integers (UINT_8) which Spark maps to " + "ShortType. When this config is true (default), Comet falls back to Spark for " + @@ -780,6 +845,7 @@ object CometConf extends ShimCometConf { val COMET_EXEC_STRICT_FLOATING_POINT: ConfigEntry[Boolean] = conf("spark.comet.exec.strictFloatingPoint") .category(CATEGORY_EXEC) + .version("0.14.0") .doc( "When enabled, fall back to Spark for floating-point operations that may differ from " + s"Spark, such as when comparing or sorting -0.0 and 0.0. $COMPAT_GUIDE.") @@ -789,6 +855,7 @@ object CometConf extends ShimCometConf { val COMET_METRICS_UPDATE_INTERVAL: ConfigEntry[Long] = conf("spark.comet.metrics.updateInterval") .category(CATEGORY_EXEC) + .version("0.14.0") .doc("The interval in milliseconds to update metrics. If interval is negative," + " metrics will be updated upon task completion.") .longConf @@ -799,6 +866,7 @@ object CometConf extends ShimCometConf { val COMET_LIBHDFS_SCHEMES: OptionalConfigEntry[String] = conf(s"spark.hadoop.$COMET_LIBHDFS_SCHEMES_KEY") .category(CATEGORY_SCAN) + .version("0.14.0") .doc("Defines filesystem schemes (e.g., hdfs, webhdfs) that the native side accesses " + "via libhdfs, separated by commas. Valid only when built with hdfs feature enabled.") .stringConf @@ -808,6 +876,7 @@ object CometConf extends ShimCometConf { val COMET_MAX_TEMP_DIRECTORY_SIZE: ConfigEntry[Long] = conf("spark.comet.maxTempDirectorySize") .category(CATEGORY_EXEC) + .version("0.14.0") .doc("The maximum amount of data (in bytes) stored inside the temporary directories.") .bytesConf(ByteUnit.BYTE) .createWithDefault(100L * 1024 * 1024 * 1024) // 100 GB @@ -815,6 +884,7 @@ object CometConf extends ShimCometConf { val COMET_RESPECT_DATAFUSION_CONFIGS: ConfigEntry[Boolean] = conf(s"$COMET_EXEC_CONFIG_PREFIX.respectDataFusionConfigs") .category(CATEGORY_TESTING) + .version("0.14.0") .doc( "Development and testing configuration option to allow DataFusion configs set in " + "Spark configuration settings starting with `spark.comet.datafusion.` to be passed " + @@ -824,6 +894,7 @@ object CometConf extends ShimCometConf { val COMET_STRICT_TESTING: ConfigEntry[Boolean] = conf(s"$COMET_PREFIX.testing.strict") .category(CATEGORY_TESTING) + .version("0.14.0") .doc("Experimental option to enable strict testing, which will fail tests that could be " + "more comprehensive, such as checking for a specific fallback reason.") .booleanConf