From ce1c7444db3c90734be69e8a85a24443718fbf7b Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 4 Jun 2026 10:22:02 +0200 Subject: [PATCH 1/6] [SPARK-57256][SQL] Cast nanosecond-precision timestamps to string ### What changes were proposed in this pull request? Implement casting of the nanosecond-precision timestamp types `TIMESTAMP_NTZ(p)` (`TimestampNTZNanosType`) and `TIMESTAMP_LTZ(p)` (`TimestampLTZNanosType`), `p` in [7, 9], to `STRING`. Casting is implemented in `ToStringBase` (mixed into `Cast`), so this also fixes `ToPrettyString` (and therefore `Dataset.show()`) for these types via the shared base. The change wires the SPARK-57162 formatter methods into the existing cast-to-string paths (interpreted and codegen): - `TimestampLTZNanosType(p)` -> `TimestampFormatter.formatNanos(v, p)` (session time zone). - `TimestampNTZNanosType(p)` -> `TimestampFormatter.formatWithoutTimeZoneNanos(v, p)` (zone-independent, UTC wall-clock grid). The fractional-second precision `p` is taken from the source type; sub-`p` digits are floored and trailing zeros are trimmed, consistent with the microsecond cast path (both use `FractionTimestampFormatter`). `Cast.needsTimeZone` is extended so that `TimestampLTZNanosType -> StringType` resolves the session time zone (mirroring `TimestampType -> StringType`); the NTZ variant does not need a time zone. ### Why are the changes needed? Today `Cast` permits these casts at analysis time (the generic `(_, StringType)` rule), but at runtime the nanosecond types have no dedicated case in `ToStringBase` and fall through to the default `String.valueOf(...)` branch, producing the internal form `TimestampNanosVal(epochMicros, nanosWithinMicro)` instead of a proper SQL timestamp string. A correct textual representation is a prerequisite for nanosecond support in expressions, SHOW/pretty output, and downstream text-based sinks. ### Does this PR introduce _any_ user-facing change? User-facing only when `spark.sql.timestampNanosTypes.enabled=true`; these types are not available otherwise. Casting to string never fails, so ANSI and non-ANSI modes behave identically. With `spark.sql.timestampNanosTypes.enabled=true`: ``` SELECT CAST(ts AS STRING); -- TIMESTAMP_NTZ(9) value 2020-01-01 00:00:00.123456789 -- before: TimestampNanosVal(1577836800000000, 789) -- after: 2020-01-01 00:00:00.123456789 ``` ### How was this patch tested? New cases in `CastSuiteBase` (run under both ANSI on/off; `checkEvaluation` exercises the interpreted and codegen paths): precision 7/8/9, trailing-zero trimming, `nanosWithinMicro` 0 and 999, LTZ time-zone shift under a non-UTC session zone vs. NTZ remaining unshifted, pre-epoch and year-9999 boundaries, and null input. ### Was this patch authored or co-authored using generative AI tooling? Generated-by: Cursor --- .../spark/sql/catalyst/expressions/Cast.scala | 3 +- .../catalyst/expressions/ToStringBase.scala | 20 ++++- .../catalyst/expressions/CastSuiteBase.scala | 83 +++++++++++++++++++ 3 files changed, 104 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index a1935c739643..4a57ac4eeeed 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -342,8 +342,9 @@ object Cast extends QueryErrorsBase { case (TimestampType, TimestampNTZType) => true case (TimestampNTZType, TimestampType) => true // NTZ string is zone-independent (mirroring micro TIMESTAMP_NTZ, which is not listed); only - // the LTZ string parse depends on the session time zone. + // the LTZ string parse/render depends on the session time zone. case (_: StringType, _: TimestampLTZNanosType) => true + case (_: TimestampLTZNanosType, _: StringType) => true case (ArrayType(fromType, _), ArrayType(toType, _)) => needsTimeZone(fromType, toType) case (MapType(fromKey, fromValue, _), MapType(toKey, toValue, _)) => needsTimeZone(fromKey, toKey) || needsTimeZone(fromValue, toValue) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala index 1f157d6ac18a..5c1ac774c978 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala @@ -30,7 +30,7 @@ import org.apache.spark.sql.internal.SQLConf.BinaryOutputStyle import org.apache.spark.sql.types._ import org.apache.spark.sql.types.ops.TypeApiOps import org.apache.spark.unsafe.UTF8StringBuilder -import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} +import org.apache.spark.unsafe.types.{CalendarInterval, TimestampNanosVal, UTF8String} import org.apache.spark.util.ArrayImplicits._ import org.apache.spark.util.SparkStringUtils @@ -82,6 +82,12 @@ trait ToStringBase { self: UnaryExpression with TimeZoneAwareExpression => acceptAny[Long](t => UTF8String.fromString(timestampFormatter.format(t))) case TimestampNTZType => acceptAny[Long](t => UTF8String.fromString(timestampNTZFormatter.format(t))) + case t: TimestampLTZNanosType => + acceptAny[TimestampNanosVal](v => + UTF8String.fromString(timestampFormatter.formatNanos(v, t.precision))) + case t: TimestampNTZNanosType => + acceptAny[TimestampNanosVal](v => + UTF8String.fromString(timestampNTZFormatter.formatWithoutTimeZoneNanos(v, t.precision))) case _: TimeType => acceptAny[Long](t => UTF8String.fromString(timeFormatter.format(t))) case ArrayType(et, _) => @@ -235,6 +241,18 @@ trait ToStringBase { self: UnaryExpression with TimeZoneAwareExpression => ctx.addReferenceObj("timestampNTZFormatter", timestampNTZFormatter), timestampNTZFormatter.getClass) (c, evPrim) => code"$evPrim = UTF8String.fromString($tf.format($c));" + case t: TimestampLTZNanosType => + val tf = JavaCode.global( + ctx.addReferenceObj("timestampFormatter", timestampFormatter), + timestampFormatter.getClass) + (c, evPrim) => + code"$evPrim = UTF8String.fromString($tf.formatNanos($c, ${t.precision}));" + case t: TimestampNTZNanosType => + val tf = JavaCode.global( + ctx.addReferenceObj("timestampNTZFormatter", timestampNTZFormatter), + timestampNTZFormatter.getClass) + (c, evPrim) => + code"$evPrim = UTF8String.fromString($tf.formatWithoutTimeZoneNanos($c, ${t.precision}));" case _: TimeType => val tf = JavaCode.global( ctx.addReferenceObj("timeFormatter", timeFormatter), diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala index b33045ad90a8..3724ae6d1624 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala @@ -1070,6 +1070,89 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { } } + test("SPARK-57256: cast timestamp_ntz with nanosecond precision to string") { + // NTZ rendering uses the UTC wall-clock grid and is independent of the session time zone. + def ntz(ldt: LocalDateTime, precision: Int, zoneId: Option[String]): Cast = + cast(Literal.create(localDateTimeToNanosVal(ldt), TimestampNTZNanosType(precision)), + StringType, zoneId) + + outstandingZoneIds.foreach { zid => + val tz = Option(zid.getId) + val ldt = LocalDateTime.of(2020, 1, 1, 0, 0, 0, 123456789) + // Sub-precision digits are floored, then trailing zeros are trimmed. + checkEvaluation(ntz(ldt, 9, tz), "2020-01-01 00:00:00.123456789") + checkEvaluation(ntz(ldt, 8, tz), "2020-01-01 00:00:00.12345678") + checkEvaluation(ntz(ldt, 7, tz), "2020-01-01 00:00:00.1234567") + + // nanosWithinMicro boundaries 0 and 999. + checkEvaluation( + ntz(LocalDateTime.of(2020, 1, 1, 0, 0, 0, 123456000), 9, tz), + "2020-01-01 00:00:00.123456") + checkEvaluation( + ntz(LocalDateTime.of(2020, 1, 1, 0, 0, 0, 123456999), 9, tz), + "2020-01-01 00:00:00.123456999") + + // An all-zero fraction trims to no fractional part at all. + checkEvaluation(ntz(LocalDateTime.of(2020, 1, 1, 0, 0, 0, 0), 9, tz), + "2020-01-01 00:00:00") + + // Pre-epoch and year-9999 boundaries. + checkEvaluation( + ntz(LocalDateTime.of(1969, 12, 31, 23, 59, 59, 123456789), 9, tz), + "1969-12-31 23:59:59.123456789") + checkEvaluation( + ntz(LocalDateTime.of(9999, 12, 31, 23, 59, 59, 999999999), 9, tz), + "9999-12-31 23:59:59.999999999") + + // Null input. + checkEvaluation( + cast(Literal.create(null, TimestampNTZNanosType(9)), StringType, tz), null) + } + } + + test("SPARK-57256: cast timestamp_ltz with nanosecond precision to string") { + // The physical value is an epoch instant (built here from a UTC wall clock); the string is + // rendered in the session time zone. + def ltz(ldt: LocalDateTime, precision: Int, zoneId: String): Cast = + cast(Literal.create(localDateTimeToNanosVal(ldt), TimestampLTZNanosType(precision)), + StringType, Option(zoneId)) + + val ldt = LocalDateTime.of(2020, 1, 1, 0, 0, 0, 123456789) + // UTC session zone: the wall clock matches the UTC instant. Sub-precision digits are floored + // and trailing zeros trimmed. + checkEvaluation(ltz(ldt, 9, "UTC"), "2020-01-01 00:00:00.123456789") + checkEvaluation(ltz(ldt, 8, "UTC"), "2020-01-01 00:00:00.12345678") + checkEvaluation(ltz(ldt, 7, "UTC"), "2020-01-01 00:00:00.1234567") + + // A non-UTC session zone shifts the wall clock; the fractional second is unaffected. + checkEvaluation(ltz(ldt, 9, "America/Los_Angeles"), "2019-12-31 16:00:00.123456789") + checkEvaluation(ltz(ldt, 9, "Asia/Kolkata"), "2020-01-01 05:30:00.123456789") + + // nanosWithinMicro boundaries 0 and 999 (under UTC). + checkEvaluation( + ltz(LocalDateTime.of(2020, 1, 1, 0, 0, 0, 123456000), 9, "UTC"), + "2020-01-01 00:00:00.123456") + checkEvaluation( + ltz(LocalDateTime.of(2020, 1, 1, 0, 0, 0, 123456999), 9, "UTC"), + "2020-01-01 00:00:00.123456999") + + // An all-zero fraction trims to no fractional part at all. + checkEvaluation(ltz(LocalDateTime.of(2020, 1, 1, 0, 0, 0, 0), 9, "UTC"), + "2020-01-01 00:00:00") + + // Pre-epoch and year-9999 boundaries (under UTC). + checkEvaluation( + ltz(LocalDateTime.of(1969, 12, 31, 23, 59, 59, 123456789), 9, "UTC"), + "1969-12-31 23:59:59.123456789") + checkEvaluation( + ltz(LocalDateTime.of(9999, 12, 31, 23, 59, 59, 999999999), 9, "UTC"), + "9999-12-31 23:59:59.999999999") + + // Null input. + checkEvaluation( + cast(Literal.create(null, TimestampLTZNanosType(9)), StringType, UTC_OPT), null) + } + test("SPARK-35112: Cast string to day-time interval") { checkEvaluation(cast(Literal.create("0 0:0:0"), DayTimeIntervalType()), 0L) checkEvaluation(cast(Literal.create(" interval '0 0:0:0' Day TO second "), From 39a026139818cd09df979960b75f11615c3759c1 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 4 Jun 2026 10:59:28 +0200 Subject: [PATCH 2/6] [SPARK-57256][SQL] Add DST transition test for nanosecond LTZ cast to string Co-authored-by: Max Gekk --- .../spark/sql/catalyst/expressions/CastSuiteBase.scala | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala index 3724ae6d1624..040a6460e345 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala @@ -1128,6 +1128,12 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(ltz(ldt, 9, "America/Los_Angeles"), "2019-12-31 16:00:00.123456789") checkEvaluation(ltz(ldt, 9, "Asia/Kolkata"), "2020-01-01 05:30:00.123456789") + // DST spring-forward boundary in America/Los_Angeles: 2020-03-08 02:00 PST -> 03:00 PDT. + // The UTC instant 10:00:00 lands at 03:00:00 PDT (UTC-7); fractional part is unaffected. + checkEvaluation( + ltz(LocalDateTime.of(2020, 3, 8, 10, 0, 0, 123456789), 9, "America/Los_Angeles"), + "2020-03-08 03:00:00.123456789") + // nanosWithinMicro boundaries 0 and 999 (under UTC). checkEvaluation( ltz(LocalDateTime.of(2020, 1, 1, 0, 0, 0, 123456000), 9, "UTC"), From 76a1be28f49f9cf453e24514fc0c604438051212 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 4 Jun 2026 11:13:04 +0200 Subject: [PATCH 3/6] [SPARK-57256][SQL] Add ToPrettyStringSuite tests for nanosecond timestamp types Co-authored-by: Max Gekk --- .../expressions/ToPrettyStringSuite.scala | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyStringSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyStringSuite.scala index 6a2651edd9ab..4d9fc41a5af6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyStringSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyStringSuite.scala @@ -21,7 +21,7 @@ import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.UTC_OPT import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ -import org.apache.spark.unsafe.types.{UTF8String, VariantVal} +import org.apache.spark.unsafe.types.{TimestampNanosVal, UTF8String, VariantVal} class ToPrettyStringSuite extends SparkFunSuite with ExpressionEvalHelper { @@ -135,6 +135,28 @@ class ToPrettyStringSuite extends SparkFunSuite with ExpressionEvalHelper { assert(prettyString.sql === child.sql) } + test("TimestampNTZNanos as pretty strings") { + def ntzNanos(micros: Long, nanos: Short): Expression = + ToPrettyString(Literal.create(new TimestampNanosVal(micros, nanos), TimestampNTZNanosType(9))) + checkEvaluation(ntzNanos(0L, 1), "1970-01-01 00:00:00.000000001") + checkEvaluation(ntzNanos(1L, 0), "1970-01-01 00:00:00.000001") + checkEvaluation( + ToPrettyString(Literal.create(TimestampNanosVal.ZERO, TimestampNTZNanosType(9))), + "1970-01-01 00:00:00") + } + + test("TimestampLTZNanos as pretty strings") { + def ltzNanos(micros: Long, nanos: Short): Expression = + ToPrettyString(Literal.create(new TimestampNanosVal(micros, nanos), TimestampLTZNanosType(9))) + withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") { + checkEvaluation(ltzNanos(0L, 1), "1970-01-01 00:00:00.000000001") + checkEvaluation(ltzNanos(1L, 0), "1970-01-01 00:00:00.000001") + checkEvaluation( + ToPrettyString(Literal.create(TimestampNanosVal.ZERO, TimestampLTZNanosType(9))), + "1970-01-01 00:00:00") + } + } + test("Time as pretty strings") { checkEvaluation(ToPrettyString(Literal(1000 * 1000L, TimeType())), "00:00:00.001") checkEvaluation(ToPrettyString(Literal(1000L, TimeType())), "00:00:00.000001") From f40005f280656e6961daefde9d7ecc95c575b5ee Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 4 Jun 2026 11:19:26 +0200 Subject: [PATCH 4/6] [SPARK-57256][SQL] Include nanosecond timestamp types in null-cast-to-string sweep Co-authored-by: Max Gekk --- .../spark/sql/catalyst/expressions/CastSuiteBase.scala | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala index 040a6460e345..b3d990974ecc 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala @@ -83,7 +83,12 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { } atomicTypes.foreach(dt => checkNullCast(NullType, dt)) - (atomicTypes ++ timeTypes).foreach(dt => checkNullCast(dt, StringType)) + val timestampNanosTypes = Seq( + TimestampLTZNanosType(TimestampLTZNanosType.MIN_PRECISION), + TimestampLTZNanosType(TimestampLTZNanosType.MAX_PRECISION), + TimestampNTZNanosType(TimestampNTZNanosType.MIN_PRECISION), + TimestampNTZNanosType(TimestampNTZNanosType.MAX_PRECISION)) + (atomicTypes ++ timeTypes ++ timestampNanosTypes).foreach(dt => checkNullCast(dt, StringType)) checkNullCast(StringType, BinaryType) checkNullCast(StringType, BooleanType) numericTypes.foreach(dt => checkNullCast(dt, BooleanType)) From 836d1cdc5882ed6e83c6f16a9d3337fdec077d03 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Fri, 5 Jun 2026 16:11:00 +0200 Subject: [PATCH 5/6] [SPARK-57256][SQL] Address review: prefix pretty-string tests and add complex-type cast coverage Rename the two nanosecond pretty-string tests in ToPrettyStringSuite with the `SPARK-57256:` prefix for traceability, matching the CastSuiteBase tests. Add a cast-to-string test for complex types nesting nanosecond timestamps: array / array (with a null element to cover the recursive nullString path) and a struct nesting both variants. --- .../catalyst/expressions/CastSuiteBase.scala | 31 +++++++++++++++++++ .../expressions/ToPrettyStringSuite.scala | 4 +-- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala index b3d990974ecc..5c346a980d58 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala @@ -1164,6 +1164,37 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { cast(Literal.create(null, TimestampLTZNanosType(9)), StringType, UTC_OPT), null) } + test("SPARK-57256: cast complex types with nanosecond timestamps to string") { + val ntzElem = Literal.create( + localDateTimeToNanosVal(LocalDateTime.of(2020, 1, 1, 0, 0, 0, 123456789)), + TimestampNTZNanosType(9)) + val ltzElem = Literal.create( + localDateTimeToNanosVal(LocalDateTime.of(2020, 1, 1, 0, 0, 0, 123456789)), + TimestampLTZNanosType(9)) + + // array with a null element exercises the recursive element path + // (including nullString) in ToStringBase. NTZ is independent of the session time zone. + checkEvaluation( + cast(CreateArray(Seq(ntzElem, Literal.create(null, TimestampNTZNanosType(9)))), StringType), + "[2020-01-01 00:00:00.123456789, null]") + + // array is rendered in the session time zone (here UTC). + checkEvaluation( + cast( + CreateArray(Seq(ltzElem, Literal.create(null, TimestampLTZNanosType(9)))), + StringType, + UTC_OPT), + "[2020-01-01 00:00:00.123456789, null]") + + // A struct nesting both nanosecond timestamp variants. + checkEvaluation( + cast( + CreateNamedStruct(Seq(Literal("ntz"), ntzElem, Literal("ltz"), ltzElem)), + StringType, + UTC_OPT), + "{2020-01-01 00:00:00.123456789, 2020-01-01 00:00:00.123456789}") + } + test("SPARK-35112: Cast string to day-time interval") { checkEvaluation(cast(Literal.create("0 0:0:0"), DayTimeIntervalType()), 0L) checkEvaluation(cast(Literal.create(" interval '0 0:0:0' Day TO second "), diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyStringSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyStringSuite.scala index 4d9fc41a5af6..c00a33ee48d8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyStringSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyStringSuite.scala @@ -135,7 +135,7 @@ class ToPrettyStringSuite extends SparkFunSuite with ExpressionEvalHelper { assert(prettyString.sql === child.sql) } - test("TimestampNTZNanos as pretty strings") { + test("SPARK-57256: TimestampNTZNanos as pretty strings") { def ntzNanos(micros: Long, nanos: Short): Expression = ToPrettyString(Literal.create(new TimestampNanosVal(micros, nanos), TimestampNTZNanosType(9))) checkEvaluation(ntzNanos(0L, 1), "1970-01-01 00:00:00.000000001") @@ -145,7 +145,7 @@ class ToPrettyStringSuite extends SparkFunSuite with ExpressionEvalHelper { "1970-01-01 00:00:00") } - test("TimestampLTZNanos as pretty strings") { + test("SPARK-57256: TimestampLTZNanos as pretty strings") { def ltzNanos(micros: Long, nanos: Short): Expression = ToPrettyString(Literal.create(new TimestampNanosVal(micros, nanos), TimestampLTZNanosType(9))) withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") { From ec68a983f263f840b38a4bad9e26e6d20ec25a21 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Fri, 5 Jun 2026 22:01:08 +0200 Subject: [PATCH 6/6] [SPARK-57256][SQL] Route nanos cast-to-string through ToStringBase and add SQL tests After rebasing onto the merged SPARK-57257, master routes interpreted CAST(nanos AS STRING) through the Types Framework's zone-less format(), which deliberately raised UNSUPPORTED_FEATURE.TIMESTAMP_NANOS_TO_STRING as a placeholder. That shadowed the zone-aware formatting added here. Bypass TypeApiOps for the nanosecond timestamp types in ToStringBase.castToString so they use the zone-aware castToStringDefault cases (LTZ renders in the session time zone, mirroring the microsecond timestamp types). Remove the now-dead codegen error case and drop the obsolete TimestampNanosRowSuite test that asserted the error; positive cast coverage lives in CastSuiteBase/ToPrettyStringSuite. The framework format()/toSQLValue() still raise for the zone-less EXPLAIN / SQL-literal paths. Add end-to-end golden-file checks to cast.sql now that SPARK-57257 wired HiveResult: precision flooring, trailing-zero trimming, nanosWithinMicro boundaries, pre-1970 negative-epoch, complex types with nested NULL, top-level NULL, and string-context use. All result columns are STRING. ### Was this patch authored or co-authored using generative AI tooling? Generated-by: Cursor 1.7.0 --- .../types/ops/TimestampNanosTypeApiOps.scala | 18 +-- .../catalyst/expressions/ToStringBase.scala | 22 ++-- .../expressions/TimestampNanosRowSuite.scala | 17 +-- .../sql-tests/analyzer-results/cast.sql.out | 105 +++++++++++++++ .../analyzer-results/nonansi/cast.sql.out | 105 +++++++++++++++ .../test/resources/sql-tests/inputs/cast.sql | 31 ++++- .../resources/sql-tests/results/cast.sql.out | 120 ++++++++++++++++++ .../sql-tests/results/nonansi/cast.sql.out | 120 ++++++++++++++++++ 8 files changed, 498 insertions(+), 40 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/ops/TimestampNanosTypeApiOps.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/ops/TimestampNanosTypeApiOps.scala index aa3d52f750ef..ee6125c8f1af 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/ops/TimestampNanosTypeApiOps.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/ops/TimestampNanosTypeApiOps.scala @@ -31,11 +31,10 @@ import org.apache.spark.sql.types.{TimestampLTZNanosType, TimestampNTZNanosType} * prefix; storage and formatting are identical. * * SCOPE (SPARK-57207): this issue wires physical representation, literals, row accessors, and - * codegen class selection. Dedicated fractional-second string formatting is not implemented yet: - * there is no TimestampFormatter for the nanos timestamp types. Until one lands, format() (and - * the toSQLValue() that delegates to it) raises the user-facing - * UNSUPPORTED_FEATURE.TIMESTAMP_NANOS_TO_STRING error rather than silently truncating to - * microsecond precision. + * codegen class selection. CAST to STRING is implemented separately, zone-aware, in ToStringBase + * (SPARK-57256). The zone-less, type-level format() here (and the toSQLValue() that delegates to + * it) still raises the user-facing UNSUPPORTED_FEATURE.TIMESTAMP_NANOS_TO_STRING error, since LTZ + * rendering needs the session time zone that this op does not have. * * Dataset encoders are wired here to the precision-aware leaves added by SPARK-57033 * (LocalDateTimeNanosEncoder / InstantNanosEncoder), so that turning on the Types Framework @@ -50,10 +49,11 @@ abstract class TimestampNanosTypeApiOps extends TypeApiOps with DataTypeErrorsBa // ==================== String Formatting ==================== - // Fractional-second (nanosecond) string formatting is not implemented yet: there is no - // TimestampFormatter for the nanos timestamp types. Until one lands, formatting (CAST to STRING, - // EXPLAIN / SHOW output, and SQL-literal rendering via toSQLValue) raises a user-facing - // unsupported-feature error rather than silently truncating to microsecond precision. + // CAST to STRING for the nanosecond timestamp types is handled zone-aware by ToStringBase + // (SPARK-57256), alongside the microsecond timestamp types, because LTZ rendering depends on the + // session time zone that this zone-less, type-level formatter does not have. The remaining + // zone-less callers (EXPLAIN plan output and SQL-literal rendering via toSQLValue) still raise a + // user-facing unsupported-feature error here rather than silently truncating to microseconds. override def format(v: Any): String = throw DataTypeErrors.cannotConvertNanosTimestampToStringError(dataType) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala index 5c1ac774c978..561fd33b0a18 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala @@ -24,7 +24,6 @@ import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.catalyst.util.{ArrayData, CharVarcharCodegenUtils, DateFormatter, FractionTimeFormatter, IntervalStringStyles, IntervalUtils, MapData, TimestampFormatter} import org.apache.spark.sql.catalyst.util.IntervalStringStyles.ANSI_STYLE -import org.apache.spark.sql.errors.DataTypeErrors import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.BinaryOutputStyle import org.apache.spark.sql.types._ @@ -67,10 +66,16 @@ trait ToStringBase { self: UnaryExpression with TimeZoneAwareExpression => case NoConstraint => castToString(from) } - private def castToString(from: DataType): Any => UTF8String = - TypeApiOps(from) - .map(ops => acceptAny[Any](v => ops.formatUTF8(v))) - .getOrElse(castToStringDefault(from)) + private def castToString(from: DataType): Any => UTF8String = from match { + // Nanosecond timestamp string formatting is zone-aware (LTZ renders in the session time zone), + // so it lives in castToStringDefault alongside the microsecond timestamp types rather than the + // zone-less Types Framework formatter (SPARK-57256). + case _: TimestampNTZNanosType | _: TimestampLTZNanosType => castToStringDefault(from) + case _ => + TypeApiOps(from) + .map(ops => acceptAny[Any](v => ops.formatUTF8(v))) + .getOrElse(castToStringDefault(from)) + } private def castToStringDefault(from: DataType): Any => UTF8String = from match { case CalendarIntervalType => @@ -324,13 +329,6 @@ trait ToStringBase { self: UnaryExpression with TimeZoneAwareExpression => (c, evPrim) => code"$evPrim = UTF8String.fromString($c.toPlainString());" case _: StringType => (c, evPrim) => code"$evPrim = $c;" - // Fractional-second (nanosecond) timestamp formatting is not implemented yet: there is no - // TimestampFormatter for the nanos timestamp types. The interpreted path raises this via the - // Types Framework (castToString -> TypeApiOps.format); the codegen path has no framework - // hook, so it raises the same user-facing error directly until a formatter lands - // (SPARK-57207). - case _: TimestampNTZNanosType | _: TimestampLTZNanosType => - throw DataTypeErrors.cannotConvertNanosTimestampToStringError(from) case _ => (c, evPrim) => code"$evPrim = UTF8String.fromString(String.valueOf($c));" } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimestampNanosRowSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimestampNanosRowSuite.scala index 02c967c0ffec..d85350504f7f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimestampNanosRowSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimestampNanosRowSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.expressions -import org.apache.spark.{SparkFunSuite, SparkUnsupportedOperationException} +import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection import org.apache.spark.sql.catalyst.util.GenericArrayData @@ -184,21 +184,6 @@ class TimestampNanosRowSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(Literal.create(null, TimestampLTZNanosType(7)), null) } - // Fractional-second formatting is not implemented yet, so CAST(nanos AS STRING) raises the - // user-facing UNSUPPORTED_FEATURE.TIMESTAMP_NANOS_TO_STRING error. Both the interpreted - // (ToStringBase.castToString -> TypeApiOps.format) and codegen (ToStringBase.castToStringCode) - // paths must fail the same way (SPARK-57207). - test("CAST nanos timestamp to STRING raises an unsupported-feature error in both eval modes") { - Seq( - Literal.create(ntzValue, TimestampNTZNanosType(9)), - Literal.create(ltzValue, TimestampLTZNanosType(7))).foreach { lit => - checkErrorInExpression[SparkUnsupportedOperationException]( - Cast(lit, StringType), - condition = "UNSUPPORTED_FEATURE.TIMESTAMP_NANOS_TO_STRING", - parameters = Map("dataType" -> ("\"" + lit.dataType.sql + "\""))) - } - } - testBothCodegenAndInterpreted("UnsafeRow handles extreme epoch micros for nanos") { val fieldTypes: Array[DataType] = Array(TimestampNTZNanosType(9)) val converter = UnsafeProjection.create(fieldTypes) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cast.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cast.sql.out index b077443a9f28..cc296a57abc5 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cast.sql.out @@ -669,6 +669,111 @@ Project [isnull(cast(a as timestamp_ltz(9))) AS (CAST(a AS TIMESTAMP_LTZ(9)) IS +- OneRowRelation +-- !query +select cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(9)) as string) +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(9)) as string) AS CAST(CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(9)) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(8)) as string) +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(8)) as string) AS CAST(CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(8)) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(7)) as string) +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(7)) as string) AS CAST(CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(7)) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(cast('2020-01-01 00:00:00.000000000' as timestamp_ntz(9)) as string) +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.000000000 as timestamp_ntz(9)) as string) AS CAST(CAST(2020-01-01 00:00:00.000000000 AS TIMESTAMP_NTZ(9)) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(cast('2020-01-01 00:00:00.000000999' as timestamp_ntz(9)) as string) +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.000000999 as timestamp_ntz(9)) as string) AS CAST(CAST(2020-01-01 00:00:00.000000999 AS TIMESTAMP_NTZ(9)) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(cast('2020-01-01 00:00:00.000000001' as timestamp_ntz(8)) as string) +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.000000001 as timestamp_ntz(8)) as string) AS CAST(CAST(2020-01-01 00:00:00.000000001 AS TIMESTAMP_NTZ(8)) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ltz(9)) as string) +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(9)) as string) AS CAST(CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(9)) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ltz(7)) as string) +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(7)) as string) AS CAST(CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(7)) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(cast('1960-01-01 00:00:00.000000001' as timestamp_ntz(9)) as string) +-- !query analysis +Project [cast(cast(1960-01-01 00:00:00.000000001 as timestamp_ntz(9)) as string) AS CAST(CAST(1960-01-01 00:00:00.000000001 AS TIMESTAMP_NTZ(9)) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(cast('1960-01-01 00:00:00.123456789' as timestamp_ltz(7)) as string) +-- !query analysis +Project [cast(cast(1960-01-01 00:00:00.123456789 as timestamp_ltz(7)) as string) AS CAST(CAST(1960-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(7)) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(array(cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(9)), cast(null as timestamp_ntz(9))) as string) +-- !query analysis +Project [cast(array(cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(9)), cast(null as timestamp_ntz(9))) as string) AS CAST(array(CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(9)), CAST(NULL AS TIMESTAMP_NTZ(9))) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(map('k', cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(9))) as string) +-- !query analysis +Project [cast(map(k, cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(9))) as string) AS CAST(map(k, CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(9))) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(named_struct('f', cast('2020-01-01 00:00:00.123456789' as timestamp_ltz(9))) as string) +-- !query analysis +Project [cast(named_struct(f, cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(9))) as string) AS CAST(named_struct(f, CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(9))) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(cast(null as timestamp_ntz(9)) as string) +-- !query analysis +Project [cast(cast(null as timestamp_ntz(9)) as string) AS CAST(CAST(NULL AS TIMESTAMP_NTZ(9)) AS STRING)#x] ++- OneRowRelation + + +-- !query +select concat('ts=', cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(9)) as string)) +-- !query analysis +Project [concat(ts=, cast(cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(9)) as string)) AS concat(ts=, CAST(CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(9)) AS STRING))#x] ++- OneRowRelation + + -- !query select cast(cast('inf' as double) as timestamp) -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/cast.sql.out index 1255f2266629..bcc410b322ae 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/cast.sql.out @@ -533,6 +533,111 @@ Project [isnull(cast(a as timestamp_ltz(9))) AS (CAST(a AS TIMESTAMP_LTZ(9)) IS +- OneRowRelation +-- !query +select cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(9)) as string) +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(9)) as string) AS CAST(CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(9)) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(8)) as string) +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(8)) as string) AS CAST(CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(8)) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(7)) as string) +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(7)) as string) AS CAST(CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(7)) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(cast('2020-01-01 00:00:00.000000000' as timestamp_ntz(9)) as string) +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.000000000 as timestamp_ntz(9)) as string) AS CAST(CAST(2020-01-01 00:00:00.000000000 AS TIMESTAMP_NTZ(9)) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(cast('2020-01-01 00:00:00.000000999' as timestamp_ntz(9)) as string) +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.000000999 as timestamp_ntz(9)) as string) AS CAST(CAST(2020-01-01 00:00:00.000000999 AS TIMESTAMP_NTZ(9)) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(cast('2020-01-01 00:00:00.000000001' as timestamp_ntz(8)) as string) +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.000000001 as timestamp_ntz(8)) as string) AS CAST(CAST(2020-01-01 00:00:00.000000001 AS TIMESTAMP_NTZ(8)) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ltz(9)) as string) +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(9)) as string) AS CAST(CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(9)) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ltz(7)) as string) +-- !query analysis +Project [cast(cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(7)) as string) AS CAST(CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(7)) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(cast('1960-01-01 00:00:00.000000001' as timestamp_ntz(9)) as string) +-- !query analysis +Project [cast(cast(1960-01-01 00:00:00.000000001 as timestamp_ntz(9)) as string) AS CAST(CAST(1960-01-01 00:00:00.000000001 AS TIMESTAMP_NTZ(9)) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(cast('1960-01-01 00:00:00.123456789' as timestamp_ltz(7)) as string) +-- !query analysis +Project [cast(cast(1960-01-01 00:00:00.123456789 as timestamp_ltz(7)) as string) AS CAST(CAST(1960-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(7)) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(array(cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(9)), cast(null as timestamp_ntz(9))) as string) +-- !query analysis +Project [cast(array(cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(9)), cast(null as timestamp_ntz(9))) as string) AS CAST(array(CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(9)), CAST(NULL AS TIMESTAMP_NTZ(9))) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(map('k', cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(9))) as string) +-- !query analysis +Project [cast(map(k, cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(9))) as string) AS CAST(map(k, CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(9))) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(named_struct('f', cast('2020-01-01 00:00:00.123456789' as timestamp_ltz(9))) as string) +-- !query analysis +Project [cast(named_struct(f, cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(9))) as string) AS CAST(named_struct(f, CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(9))) AS STRING)#x] ++- OneRowRelation + + +-- !query +select cast(cast(null as timestamp_ntz(9)) as string) +-- !query analysis +Project [cast(cast(null as timestamp_ntz(9)) as string) AS CAST(CAST(NULL AS TIMESTAMP_NTZ(9)) AS STRING)#x] ++- OneRowRelation + + +-- !query +select concat('ts=', cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(9)) as string)) +-- !query analysis +Project [concat(ts=, cast(cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(9)) as string)) AS concat(ts=, CAST(CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(9)) AS STRING))#x] ++- OneRowRelation + + -- !query select cast(cast('inf' as double) as timestamp) -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/inputs/cast.sql b/sql/core/src/test/resources/sql-tests/inputs/cast.sql index 5065e7c335e7..5aead54b2722 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cast.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cast.sql @@ -103,14 +103,39 @@ select cast('2022-01-01 00:00:00' as timestamp_ntz); select cast('a' as timestamp_ntz); -- SPARK-57211: cast string to nanosecond-precision timestamps TIMESTAMP_NTZ(p)/TIMESTAMP_LTZ(p). --- The reverse direction (nanos -> string) is not wired yet, so positive cases assert the result --- type via typeof. Negative cases exercise the ANSI parse-error path and use IS NULL so the result --- column stays non-nanos (a bare nanos result column is not yet serializable by JDBC/thrift). +-- Positive cases assert the result type via typeof. Negative cases exercise the ANSI parse-error +-- path and use IS NULL so the result column stays non-nanos (a bare nanos result column is not yet +-- serializable by JDBC/thrift). select typeof(cast('2022-01-01 00:00:00.123456789' as timestamp_ntz(9))); select typeof(cast('2022-01-01 00:00:00.123456789' as timestamp_ltz(7))); select cast('a' as timestamp_ntz(9)) is null; select cast('a' as timestamp_ltz(9)) is null; +-- SPARK-57256: cast nanosecond-precision timestamps to string. The result column is STRING (the +-- nanos type is only intermediate), so the value is produced by the Cast-to-string expression +-- (ToStringBase). The nanos preview flag defaults to enabled under tests, and LTZ wall-clock inputs +-- round-trip in any session time zone, so these cases stay zone-independent. +-- TIMESTAMP_NTZ(p): precision-driven fraction width and trailing-zero trimming. +select cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(9)) as string); +select cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(8)) as string); +select cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(7)) as string); +select cast(cast('2020-01-01 00:00:00.000000000' as timestamp_ntz(9)) as string); +select cast(cast('2020-01-01 00:00:00.000000999' as timestamp_ntz(9)) as string); +select cast(cast('2020-01-01 00:00:00.000000001' as timestamp_ntz(8)) as string); +-- TIMESTAMP_LTZ(p): exercises the zone-aware path; a wall-clock input round-trips. +select cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ltz(9)) as string); +select cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ltz(7)) as string); +-- Pre-1970 (negative-epoch) values. +select cast(cast('1960-01-01 00:00:00.000000001' as timestamp_ntz(9)) as string); +select cast(cast('1960-01-01 00:00:00.123456789' as timestamp_ltz(7)) as string); +-- Complex types cast to string (recursive element path, including a nested NULL). +select cast(array(cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(9)), cast(null as timestamp_ntz(9))) as string); +select cast(map('k', cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(9))) as string); +select cast(named_struct('f', cast('2020-01-01 00:00:00.123456789' as timestamp_ltz(9))) as string); +-- NULL and a real string context. +select cast(cast(null as timestamp_ntz(9)) as string); +select concat('ts=', cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(9)) as string)); + select cast(cast('inf' as double) as timestamp); select cast(cast('inf' as float) as timestamp); diff --git a/sql/core/src/test/resources/sql-tests/results/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/cast.sql.out index 10b6f4526889..2cff542eb98d 100644 --- a/sql/core/src/test/resources/sql-tests/results/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cast.sql.out @@ -1354,6 +1354,126 @@ org.apache.spark.SparkDateTimeException } +-- !query +select cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(9)) as string) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.123456789 + + +-- !query +select cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(8)) as string) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.12345678 + + +-- !query +select cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(7)) as string) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.1234567 + + +-- !query +select cast(cast('2020-01-01 00:00:00.000000000' as timestamp_ntz(9)) as string) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00 + + +-- !query +select cast(cast('2020-01-01 00:00:00.000000999' as timestamp_ntz(9)) as string) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.000000999 + + +-- !query +select cast(cast('2020-01-01 00:00:00.000000001' as timestamp_ntz(8)) as string) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00 + + +-- !query +select cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ltz(9)) as string) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.123456789 + + +-- !query +select cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ltz(7)) as string) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.1234567 + + +-- !query +select cast(cast('1960-01-01 00:00:00.000000001' as timestamp_ntz(9)) as string) +-- !query schema +struct +-- !query output +1960-01-01 00:00:00.000000001 + + +-- !query +select cast(cast('1960-01-01 00:00:00.123456789' as timestamp_ltz(7)) as string) +-- !query schema +struct +-- !query output +1960-01-01 00:00:00.1234567 + + +-- !query +select cast(array(cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(9)), cast(null as timestamp_ntz(9))) as string) +-- !query schema +struct +-- !query output +[2020-01-01 00:00:00.123456789, null] + + +-- !query +select cast(map('k', cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(9))) as string) +-- !query schema +struct +-- !query output +{k -> 2020-01-01 00:00:00.123456789} + + +-- !query +select cast(named_struct('f', cast('2020-01-01 00:00:00.123456789' as timestamp_ltz(9))) as string) +-- !query schema +struct +-- !query output +{2020-01-01 00:00:00.123456789} + + +-- !query +select cast(cast(null as timestamp_ntz(9)) as string) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select concat('ts=', cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(9)) as string)) +-- !query schema +struct +-- !query output +ts=2020-01-01 00:00:00.123456789 + + -- !query select cast(cast('inf' as double) as timestamp) -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/nonansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/cast.sql.out index 2b73fe4e63da..d215cf7cee3c 100644 --- a/sql/core/src/test/resources/sql-tests/results/nonansi/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/nonansi/cast.sql.out @@ -616,6 +616,126 @@ struct<(CAST(a AS TIMESTAMP_LTZ(9)) IS NULL):boolean> true +-- !query +select cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(9)) as string) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.123456789 + + +-- !query +select cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(8)) as string) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.12345678 + + +-- !query +select cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(7)) as string) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.1234567 + + +-- !query +select cast(cast('2020-01-01 00:00:00.000000000' as timestamp_ntz(9)) as string) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00 + + +-- !query +select cast(cast('2020-01-01 00:00:00.000000999' as timestamp_ntz(9)) as string) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.000000999 + + +-- !query +select cast(cast('2020-01-01 00:00:00.000000001' as timestamp_ntz(8)) as string) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00 + + +-- !query +select cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ltz(9)) as string) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.123456789 + + +-- !query +select cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ltz(7)) as string) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.1234567 + + +-- !query +select cast(cast('1960-01-01 00:00:00.000000001' as timestamp_ntz(9)) as string) +-- !query schema +struct +-- !query output +1960-01-01 00:00:00.000000001 + + +-- !query +select cast(cast('1960-01-01 00:00:00.123456789' as timestamp_ltz(7)) as string) +-- !query schema +struct +-- !query output +1960-01-01 00:00:00.1234567 + + +-- !query +select cast(array(cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(9)), cast(null as timestamp_ntz(9))) as string) +-- !query schema +struct +-- !query output +[2020-01-01 00:00:00.123456789, null] + + +-- !query +select cast(map('k', cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(9))) as string) +-- !query schema +struct +-- !query output +{k -> 2020-01-01 00:00:00.123456789} + + +-- !query +select cast(named_struct('f', cast('2020-01-01 00:00:00.123456789' as timestamp_ltz(9))) as string) +-- !query schema +struct +-- !query output +{2020-01-01 00:00:00.123456789} + + +-- !query +select cast(cast(null as timestamp_ntz(9)) as string) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select concat('ts=', cast(cast('2020-01-01 00:00:00.123456789' as timestamp_ntz(9)) as string)) +-- !query schema +struct +-- !query output +ts=2020-01-01 00:00:00.123456789 + + -- !query select cast(cast('inf' as double) as timestamp) -- !query schema