Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions native/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 0 additions & 2 deletions native/core/src/execution/jni_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ use datafusion_spark::function::datetime::last_day::SparkLastDay;
use datafusion_spark::function::datetime::next_day::SparkNextDay;
use datafusion_spark::function::hash::crc32::SparkCrc32;
use datafusion_spark::function::hash::sha1::SparkSha1;
use datafusion_spark::function::hash::sha2::SparkSha2;
use datafusion_spark::function::map::map_from_entries::MapFromEntries;
use datafusion_spark::function::math::expm1::SparkExpm1;
use datafusion_spark::function::math::hex::SparkHex;
Expand Down Expand Up @@ -390,7 +389,6 @@ fn prepare_datafusion_session_context(
// register UDFs from datafusion-spark crate
fn register_datafusion_spark_function(session_ctx: &SessionContext) {
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkExpm1::default()));
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkSha2::default()));
session_ctx.register_udf(ScalarUDF::new_from_impl(CharFunc::default()));
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkBitGet::default()));
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkDateAdd::default()));
Expand Down
24 changes: 10 additions & 14 deletions spark/src/main/scala/org/apache/comet/serde/hash.scala
Original file line number Diff line number Diff line change
Expand Up @@ -68,24 +68,20 @@ object CometMurmur3Hash extends CometExpressionSerde[Murmur3Hash] {
}

object CometSha2 extends CometExpressionSerde[Sha2] {
override def getSupportLevel(expr: Sha2): SupportLevel = {
// Currently, sha2 is not registered in the native engine, so we fall back to Spark
// for all cases until native support is available.
Unsupported(Some("sha2 function is not yet registered in native engine"))
}

override def convert(
expr: Sha2,
inputs: Seq[Attribute],
binding: Boolean): Option[ExprOuterClass.Expr] = {
if (!HashUtils.isSupportedType(expr)) {
return None
}

// It's possible for spark to dynamically compute the number of bits from input
// expression, however DataFusion does not support that yet.
if (!expr.right.foldable) {
withInfo(expr, "For Sha2, non literal numBits is not supported")
return None
}

val leftExpr = exprToProtoInternal(expr.left, inputs, binding)
val numBitsExpr = exprToProtoInternal(expr.right, inputs, binding)
scalarFunctionExprToProtoWithReturnType("sha2", StringType, false, leftExpr, numBitsExpr)
// Currently, sha2 is not registered in the native engine registry.
// Fall back to Spark execution until native support is available.
withInfo(expr, "sha2 function is not yet registered in native engine")
None
}
}

Expand Down
9 changes: 7 additions & 2 deletions spark/src/test/resources/sql-tests/expressions/hash/hash.sql
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,13 @@ statement
INSERT INTO test VALUES ('Spark SQL ', 10, 1.2), (NULL, NULL, NULL), ('', 0, 0.0), ('苹果手机', NULL, 3.999999), ('Spark SQL ', 10, 1.2), (NULL, NULL, NULL), ('', 0, 0.0), ('苹果手机', NULL, 3.999999)

query
SELECT md5(col), md5(cast(a as string)), md5(cast(b as string)), hash(col), hash(col, 1), hash(col, 0), hash(col, a, b), hash(b, a, col), xxhash64(col), xxhash64(col, 1), xxhash64(col, 0), xxhash64(col, a, b), xxhash64(b, a, col), sha2(col, 0), sha2(col, 256), sha2(col, 224), sha2(col, 384), sha2(col, 512), sha2(col, 128), sha2(col, -1), sha1(col), sha1(cast(a as string)), sha1(cast(b as string)) FROM test
SELECT md5(col), md5(cast(a as string)), md5(cast(b as string)), hash(col), hash(col, 1), hash(col, 0), hash(col, a, b), hash(b, a, col), xxhash64(col), xxhash64(col, 1), xxhash64(col, 0), xxhash64(col, a, b), xxhash64(b, a, col), sha1(col), sha1(cast(a as string)), sha1(cast(b as string)) FROM test

-- sha2 currently falls back to Spark as it's not yet registered in native engine
query spark_answer_only
SELECT sha2(col, 0), sha2(col, 256), sha2(col, 224), sha2(col, 384), sha2(col, 512), sha2(col, 128), sha2(col, -1) FROM test

-- literal arguments
query ignore(https://github.com/apache/datafusion-comet/issues/3340)
-- sha2 currently falls back to Spark as it's not yet registered in native engine
query expect_fallback(sha2 function is not yet registered in native engine)
SELECT md5('Spark SQL'), sha1('test'), sha2('test', 256), hash('test'), xxhash64('test')
14 changes: 12 additions & 2 deletions spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1891,10 +1891,15 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
|hash(col), hash(col, 1), hash(col, 0), hash(col, a, b), hash(b, a, col),
|xxhash64(col), xxhash64(col, 1), xxhash64(col, 0), xxhash64(col, a, b), xxhash64(b, a, col),
|crc32(col), crc32(cast(a as string)), crc32(cast(b as string)),
|sha2(col, 0), sha2(col, 256), sha2(col, 224), sha2(col, 384), sha2(col, 512), sha2(col, 128), sha2(col, -1),
|sha1(col), sha1(cast(a as string)), sha1(cast(b as string))
|from test
|""".stripMargin)
// sha2 falls back to Spark, so only verify answer correctness
checkSparkAnswer("""
|select
|sha2(col, 0), sha2(col, 256), sha2(col, 224), sha2(col, 384), sha2(col, 512), sha2(col, 128), sha2(col, -1)
|from test
|""".stripMargin)
}
}
}
Expand Down Expand Up @@ -2002,10 +2007,15 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
|hash(col), hash(col, 1), hash(col, 0), hash(col, a, b), hash(b, a, col),
|xxhash64(col), xxhash64(col, 1), xxhash64(col, 0), xxhash64(col, a, b), xxhash64(b, a, col),
|crc32(col), crc32(cast(a as string)),
|sha2(col, 0), sha2(col, 256), sha2(col, 224), sha2(col, 384), sha2(col, 512), sha2(col, 128), sha2(col, -1),
|sha1(col), sha1(cast(a as string))
|from test
|""".stripMargin)
// sha2 falls back to Spark, so only verify answer correctness
checkSparkAnswer("""
|select
|sha2(col, 0), sha2(col, 256), sha2(col, 224), sha2(col, 384), sha2(col, 512), sha2(col, 128), sha2(col, -1)
|from test
|""".stripMargin)
}
}
}
Expand Down
Loading