From 27d9de7c4a9d720a201def7a707f9af1dc6d00bf Mon Sep 17 00:00:00 2001 From: guihuawen Date: Sun, 3 May 2026 11:00:15 +0800 Subject: [PATCH 1/4] [AURON #2221]Eliminate hardcoding in the scenario of determining Iceberg table types. --- .../spark/source/AuronIcebergSourceUtil.scala | 28 +++++++++++++++++++ .../auron/iceberg/IcebergScanSupport.scala | 16 ++++------- 2 files changed, 33 insertions(+), 11 deletions(-) create mode 100644 thirdparty/auron-iceberg/src/main/scala/org/apache/iceberg/spark/source/AuronIcebergSourceUtil.scala diff --git a/thirdparty/auron-iceberg/src/main/scala/org/apache/iceberg/spark/source/AuronIcebergSourceUtil.scala b/thirdparty/auron-iceberg/src/main/scala/org/apache/iceberg/spark/source/AuronIcebergSourceUtil.scala new file mode 100644 index 000000000..6b04f16a3 --- /dev/null +++ b/thirdparty/auron-iceberg/src/main/scala/org/apache/iceberg/spark/source/AuronIcebergSourceUtil.scala @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.iceberg.spark.source + +object AuronIcebergSourceUtil { + + def getClassOfSparkBatchQueryScan(): Class[SparkBatchQueryScan] = { + classOf[SparkBatchQueryScan] + } + + def getClassOfSparkInputPartition(): Class[SparkInputPartition] = { + classOf[SparkInputPartition] + } +} diff --git a/thirdparty/auron-iceberg/src/main/scala/org/apache/spark/sql/auron/iceberg/IcebergScanSupport.scala b/thirdparty/auron-iceberg/src/main/scala/org/apache/spark/sql/auron/iceberg/IcebergScanSupport.scala index 1d9efbc3b..16cc06a9a 100644 --- a/thirdparty/auron-iceberg/src/main/scala/org/apache/spark/sql/auron/iceberg/IcebergScanSupport.scala +++ b/thirdparty/auron-iceberg/src/main/scala/org/apache/spark/sql/auron/iceberg/IcebergScanSupport.scala @@ -18,18 +18,17 @@ package org.apache.spark.sql.auron.iceberg import scala.collection.JavaConverters._ import scala.util.control.NonFatal - import org.apache.iceberg.{FileFormat, FileScanTask, MetadataColumns} -import org.apache.iceberg.expressions.{And => IcebergAnd, BoundPredicate, Expression => IcebergExpression, Not => IcebergNot, Or => IcebergOr, UnboundPredicate} +import org.apache.iceberg.expressions.{BoundPredicate, UnboundPredicate, And => IcebergAnd, Expression => IcebergExpression, Not => IcebergNot, Or => IcebergOr} import org.apache.spark.internal.Logging import org.apache.spark.sql.auron.NativeConverters -import org.apache.spark.sql.catalyst.expressions.{And => SparkAnd, AttributeReference, EqualTo, Expression => SparkExpression, GreaterThan, GreaterThanOrEqual, In, IsNaN, IsNotNull, IsNull, LessThan, LessThanOrEqual, Literal, Not => SparkNot, Or => SparkOr} +import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, GreaterThan, GreaterThanOrEqual, In, IsNaN, IsNotNull, IsNull, LessThan, LessThanOrEqual, Literal, And => SparkAnd, Expression => SparkExpression, Not => SparkNot, Or => SparkOr} import org.apache.spark.sql.connector.read.InputPartition import org.apache.spark.sql.execution.datasources.v2.BatchScanExec import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{BinaryType, DataType, DecimalType, StringType, StructField, StructType} - import org.apache.auron.{protobuf => pb} +import org.apache.iceberg.spark.source.AuronIcebergSourceUtil // fileSchema is read from the data files. partitionSchema carries supported metadata columns // (for example _file) that are materialized as per-file constant values in the native scan. @@ -47,12 +46,7 @@ object IcebergScanSupport extends Logging { val scan = exec.scan val scanClassName = scan.getClass.getName // Only handle Iceberg scans; other sources must stay on Spark's path. - if (!scanClassName.startsWith("org.apache.iceberg.spark.source.")) { - return None - } - - // Changelog scan carries row-level changes; not supported by native COW-only path. - if (scanClassName == "org.apache.iceberg.spark.source.SparkChangelogScan") { + if (!(scan.getClass == AuronIcebergSourceUtil.getClassOfSparkBatchQueryScan)) { return None } @@ -199,7 +193,7 @@ object IcebergScanSupport extends Logging { private def icebergPartition(partition: InputPartition): Option[IcebergPartitionView] = { val className = partition.getClass.getName // Only accept Iceberg SparkInputPartition to access task groups. - if (className != "org.apache.iceberg.spark.source.SparkInputPartition") { + if (className != AuronIcebergSourceUtil.getClassOfSparkInputPartition()) { return None } From ebb64be29cc35529d96bed51375dec928053b865 Mon Sep 17 00:00:00 2001 From: guihuawen Date: Sun, 3 May 2026 12:45:59 +0800 Subject: [PATCH 2/4] [AURON #2221] Remove hard-coded Iceberg scan class name detection using type-based check --- .../spark/sql/auron/iceberg/IcebergScanSupport.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/thirdparty/auron-iceberg/src/main/scala/org/apache/spark/sql/auron/iceberg/IcebergScanSupport.scala b/thirdparty/auron-iceberg/src/main/scala/org/apache/spark/sql/auron/iceberg/IcebergScanSupport.scala index 16cc06a9a..2bfa5fc2a 100644 --- a/thirdparty/auron-iceberg/src/main/scala/org/apache/spark/sql/auron/iceberg/IcebergScanSupport.scala +++ b/thirdparty/auron-iceberg/src/main/scala/org/apache/spark/sql/auron/iceberg/IcebergScanSupport.scala @@ -18,17 +18,19 @@ package org.apache.spark.sql.auron.iceberg import scala.collection.JavaConverters._ import scala.util.control.NonFatal + import org.apache.iceberg.{FileFormat, FileScanTask, MetadataColumns} -import org.apache.iceberg.expressions.{BoundPredicate, UnboundPredicate, And => IcebergAnd, Expression => IcebergExpression, Not => IcebergNot, Or => IcebergOr} +import org.apache.iceberg.expressions.{And => IcebergAnd, BoundPredicate, Expression => IcebergExpression, Not => IcebergNot, Or => IcebergOr, UnboundPredicate} +import org.apache.iceberg.spark.source.AuronIcebergSourceUtil import org.apache.spark.internal.Logging import org.apache.spark.sql.auron.NativeConverters -import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, GreaterThan, GreaterThanOrEqual, In, IsNaN, IsNotNull, IsNull, LessThan, LessThanOrEqual, Literal, And => SparkAnd, Expression => SparkExpression, Not => SparkNot, Or => SparkOr} +import org.apache.spark.sql.catalyst.expressions.{And => SparkAnd, AttributeReference, EqualTo, Expression => SparkExpression, GreaterThan, GreaterThanOrEqual, In, IsNaN, IsNotNull, IsNull, LessThan, LessThanOrEqual, Literal, Not => SparkNot, Or => SparkOr} import org.apache.spark.sql.connector.read.InputPartition import org.apache.spark.sql.execution.datasources.v2.BatchScanExec import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{BinaryType, DataType, DecimalType, StringType, StructField, StructType} + import org.apache.auron.{protobuf => pb} -import org.apache.iceberg.spark.source.AuronIcebergSourceUtil // fileSchema is read from the data files. partitionSchema carries supported metadata columns // (for example _file) that are materialized as per-file constant values in the native scan. From 199ee2d5799508cfe1c3a616343cd242ad660e32 Mon Sep 17 00:00:00 2001 From: guihuawen Date: Sun, 3 May 2026 15:04:15 +0800 Subject: [PATCH 3/4] [AURON #2221] Remove hard-coded Iceberg scan class name detection using type-based check --- .../apache/spark/sql/auron/iceberg/IcebergScanSupport.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/thirdparty/auron-iceberg/src/main/scala/org/apache/spark/sql/auron/iceberg/IcebergScanSupport.scala b/thirdparty/auron-iceberg/src/main/scala/org/apache/spark/sql/auron/iceberg/IcebergScanSupport.scala index 2bfa5fc2a..67cd46312 100644 --- a/thirdparty/auron-iceberg/src/main/scala/org/apache/spark/sql/auron/iceberg/IcebergScanSupport.scala +++ b/thirdparty/auron-iceberg/src/main/scala/org/apache/spark/sql/auron/iceberg/IcebergScanSupport.scala @@ -195,7 +195,8 @@ object IcebergScanSupport extends Logging { private def icebergPartition(partition: InputPartition): Option[IcebergPartitionView] = { val className = partition.getClass.getName // Only accept Iceberg SparkInputPartition to access task groups. - if (className != AuronIcebergSourceUtil.getClassOfSparkInputPartition()) { + if (partition.getClass + != AuronIcebergSourceUtil.getClassOfSparkInputPartition()) { return None } From 26a471e3e56172cd34d553a9914bd750b789dec1 Mon Sep 17 00:00:00 2001 From: guihuawen Date: Tue, 5 May 2026 14:57:03 +0800 Subject: [PATCH 4/4] [AURON #2221] Remove hard-coded Iceberg scan class name detection using type-based check --- .../apache/spark/sql/auron/iceberg/IcebergScanSupport.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/thirdparty/auron-iceberg/src/main/scala/org/apache/spark/sql/auron/iceberg/IcebergScanSupport.scala b/thirdparty/auron-iceberg/src/main/scala/org/apache/spark/sql/auron/iceberg/IcebergScanSupport.scala index 67cd46312..f2820d154 100644 --- a/thirdparty/auron-iceberg/src/main/scala/org/apache/spark/sql/auron/iceberg/IcebergScanSupport.scala +++ b/thirdparty/auron-iceberg/src/main/scala/org/apache/spark/sql/auron/iceberg/IcebergScanSupport.scala @@ -48,7 +48,7 @@ object IcebergScanSupport extends Logging { val scan = exec.scan val scanClassName = scan.getClass.getName // Only handle Iceberg scans; other sources must stay on Spark's path. - if (!(scan.getClass == AuronIcebergSourceUtil.getClassOfSparkBatchQueryScan)) { + if (!AuronIcebergSourceUtil.getClassOfSparkBatchQueryScan.isInstance(scan)) { return None } @@ -195,8 +195,7 @@ object IcebergScanSupport extends Logging { private def icebergPartition(partition: InputPartition): Option[IcebergPartitionView] = { val className = partition.getClass.getName // Only accept Iceberg SparkInputPartition to access task groups. - if (partition.getClass - != AuronIcebergSourceUtil.getClassOfSparkInputPartition()) { + if (!AuronIcebergSourceUtil.getClassOfSparkInputPartition().isInstance(partition)) { return None }