From e2811323a8f885821bbc4708757191828f3e0ff1 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 4 Jun 2026 12:39:13 +0200 Subject: [PATCH 1/2] [SPARK-57257][SQL] Support nanosecond-precision timestamps in Hive results ### What changes were proposed in this pull request? This PR modifies `HiveResult` to support the nanosecond-precision timestamp types `TIMESTAMP_LTZ(p)` (`TimestampLTZNanosType`) and `TIMESTAMP_NTZ(p)` (`TimestampNTZNanosType`), `p` in [7, 9]. Two cases are added to `HiveResult.toHiveStringDefault`, mirroring the existing microsecond timestamp cases: - `(i: Instant, _: TimestampLTZNanosType)` -> rendered in the session time zone. - `(l: LocalDateTime, _: TimestampNTZNanosType)` -> rendered zone-independently. The external collected values are `Instant` (LTZ) and `LocalDateTime` (NTZ); they are converted to the physical `TimestampNanosVal` at the column precision and formatted with the nanosecond-aware `TimestampFormatter` (`formatNanos` / `formatWithoutTimeZoneNanos`, SPARK-57162), flooring sub-`p` digits and trimming trailing zeros. This is the same rendering used by casting these types to string (SPARK-57256), so Hive output stays consistent. ### Why are the changes needed? Before the change, formatting a nanosecond timestamp column through `HiveResult` (e.g. end-to-end SQL / golden-file tests, spark-sql CLI, Thrift server output) hits the catch-all match and fails with a `MatchError`, analogous to the `TimeType` issue fixed in SPARK-51517: ``` scala.MatchError (2020-01-01T00:00:00.123456789Z, TimestampLTZNanosType(9)) (of class scala.Tuple2) ``` ### Does this PR introduce _any_ user-facing change? Yes. It fixes the error above. After the change, nanosecond timestamp values are rendered as proper strings in Hive results (only reachable when `spark.sql.timestampNanosTypes.enabled=true`). ### How was this patch tested? - New cases in `HiveResultSuite` covering `TIMESTAMP_LTZ(p)` / `TIMESTAMP_NTZ(p)` for `p` in [7, 9]: precision-driven fraction width, trailing-zero trimming, nanosWithinMicro 0 and 999, LTZ session-zone rendering vs. zone-independent NTZ, and nested (array/map/struct) values. - New golden-file end-to-end tests `timestamp-ltz-nanos.sql` and `timestamp-ntz-nanos.sql` (as SPARK-51517 added `time.sql`), disabled in `ThriftServerQueryTestSuite`. ### Was this patch authored or co-authored using generative AI tooling? Generated-by: Cursor 1.7.0 --- .../spark/sql/execution/HiveResult.scala | 10 +++ .../timestamp-ltz-nanos.sql.out | 76 ++++++++++++++++ .../timestamp-ntz-nanos.sql.out | 76 ++++++++++++++++ .../sql-tests/inputs/timestamp-ltz-nanos.sql | 22 +++++ .../sql-tests/inputs/timestamp-ntz-nanos.sql | 22 +++++ .../results/timestamp-ltz-nanos.sql.out | 87 +++++++++++++++++++ .../results/timestamp-ntz-nanos.sql.out | 87 +++++++++++++++++++ .../spark/sql/execution/HiveResultSuite.scala | 69 ++++++++++++++- .../ThriftServerQueryTestSuite.scala | 5 +- 9 files changed, 450 insertions(+), 4 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql create mode 100644 sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala index 6714510874351..25170da800aec 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala @@ -131,6 +131,16 @@ object HiveResult extends SQLConfHelper { case (t: Timestamp, TimestampType) => formatters.timestamp.format(t) case (i: Instant, TimestampType) => formatters.timestamp.format(i) case (l: LocalDateTime, TimestampNTZType) => formatters.timestamp.format(l) + // Nanosecond-precision timestamps. The external values are `Instant` (LTZ) and + // `LocalDateTime` (NTZ); convert to the physical `TimestampNanosVal` at the column precision + // and render via the same formatter methods as the cast-to-string path (SPARK-57256), so the + // output stays consistent. LTZ uses the session zone; NTZ is zone-independent. + case (i: Instant, t: TimestampLTZNanosType) => + formatters.timestamp.formatNanos( + DateTimeUtils.instantToTimestampNanos(i, t.precision), t.precision) + case (l: LocalDateTime, t: TimestampNTZNanosType) => + formatters.timestamp.formatWithoutTimeZoneNanos( + DateTimeUtils.localDateTimeToTimestampNanos(l, t.precision), t.precision) case (bin: Array[Byte], BinaryType) => binaryFormatter(bin) case (decimal: java.math.BigDecimal, DecimalType()) => decimal.toPlainString case (n, _: NumericType) => n.toString diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out new file mode 100644 index 0000000000000..9be5de3a105a1 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out @@ -0,0 +1,76 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(7)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(7)) AS CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(7))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(8)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(8)) AS CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(8))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(9)) AS CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(9))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.999999000' AS timestamp_ltz(9)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.999999000 as timestamp_ltz(9)) AS CAST(2020-01-01 00:00:00.999999000 AS TIMESTAMP_LTZ(9))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000999' AS timestamp_ltz(9)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.000000999 as timestamp_ltz(9)) AS CAST(2020-01-01 00:00:00.000000999 AS TIMESTAMP_LTZ(9))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(9)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.000000001 as timestamp_ltz(9)) AS CAST(2020-01-01 00:00:00.000000001 AS TIMESTAMP_LTZ(9))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(8)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.000000001 as timestamp_ltz(8)) AS CAST(2020-01-01 00:00:00.000000001 AS TIMESTAMP_LTZ(8))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(7)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.000000001 as timestamp_ltz(7)) AS CAST(2020-01-01 00:00:00.000000001 AS TIMESTAMP_LTZ(7))#x] ++- OneRowRelation + + +-- !query +SELECT array(CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9))) +-- !query analysis +Project [array(cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(9))) AS array(CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(9)))#x] ++- OneRowRelation + + +-- !query +SELECT map('k', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9))) +-- !query analysis +Project [map(k, cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(9))) AS map(k, CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(9)))#x] ++- OneRowRelation + + +-- !query +SELECT named_struct('f', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9))) +-- !query analysis +Project [named_struct(f, cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(9))) AS named_struct(f, CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(9)))#x] ++- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out new file mode 100644 index 0000000000000..83549e36597a1 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out @@ -0,0 +1,76 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(7)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(7)) AS CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(7))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(8)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(8)) AS CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(8))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(9)) AS CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(9))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.999999000' AS timestamp_ntz(9)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.999999000 as timestamp_ntz(9)) AS CAST(2020-01-01 00:00:00.999999000 AS TIMESTAMP_NTZ(9))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000999' AS timestamp_ntz(9)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.000000999 as timestamp_ntz(9)) AS CAST(2020-01-01 00:00:00.000000999 AS TIMESTAMP_NTZ(9))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(9)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.000000001 as timestamp_ntz(9)) AS CAST(2020-01-01 00:00:00.000000001 AS TIMESTAMP_NTZ(9))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(8)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.000000001 as timestamp_ntz(8)) AS CAST(2020-01-01 00:00:00.000000001 AS TIMESTAMP_NTZ(8))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(7)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.000000001 as timestamp_ntz(7)) AS CAST(2020-01-01 00:00:00.000000001 AS TIMESTAMP_NTZ(7))#x] ++- OneRowRelation + + +-- !query +SELECT array(CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9))) +-- !query analysis +Project [array(cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(9))) AS array(CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(9)))#x] ++- OneRowRelation + + +-- !query +SELECT map('k', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9))) +-- !query analysis +Project [map(k, cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(9))) AS map(k, CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(9)))#x] ++- OneRowRelation + + +-- !query +SELECT named_struct('f', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9))) +-- !query analysis +Project [named_struct(f, cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(9))) AS named_struct(f, CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(9)))#x] ++- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql new file mode 100644 index 0000000000000..181cf9f5d7693 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql @@ -0,0 +1,22 @@ +-- Nanosecond-precision TIMESTAMP_LTZ(p) (p in [7, 9]) in Hive results (SPARK-57257). +-- LTZ values are rendered in the session time zone. + +--SET spark.sql.timestampNanosTypes.enabled=true +--SET spark.sql.session.timeZone=America/Los_Angeles + +-- Precision-driven fraction width: sub-p digits are floored. +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(7)); +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(8)); +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9)); + +-- Trailing-zero trimming: an all-zero fraction renders as no fraction at all. +SELECT CAST('2020-01-01 00:00:00.999999000' AS timestamp_ltz(9)); +SELECT CAST('2020-01-01 00:00:00.000000999' AS timestamp_ltz(9)); +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(9)); +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(8)); +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(7)); + +-- Nested values (array / map / struct). +SELECT array(CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9))); +SELECT map('k', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9))); +SELECT named_struct('f', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9))); diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql new file mode 100644 index 0000000000000..563e5e029c761 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql @@ -0,0 +1,22 @@ +-- Nanosecond-precision TIMESTAMP_NTZ(p) (p in [7, 9]) in Hive results (SPARK-57257). +-- NTZ values are zone-independent. + +--SET spark.sql.timestampNanosTypes.enabled=true +--SET spark.sql.session.timeZone=America/Los_Angeles + +-- Precision-driven fraction width: sub-p digits are floored. +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(7)); +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(8)); +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9)); + +-- Trailing-zero trimming: an all-zero fraction renders as no fraction at all. +SELECT CAST('2020-01-01 00:00:00.999999000' AS timestamp_ntz(9)); +SELECT CAST('2020-01-01 00:00:00.000000999' AS timestamp_ntz(9)); +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(9)); +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(8)); +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(7)); + +-- Nested values (array / map / struct). +SELECT array(CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9))); +SELECT map('k', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9))); +SELECT named_struct('f', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9))); diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out new file mode 100644 index 0000000000000..6115d49fdbce8 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out @@ -0,0 +1,87 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(7)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.1234567 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(8)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.12345678 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.123456789 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.999999000' AS timestamp_ltz(9)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.999999 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000999' AS timestamp_ltz(9)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.000000999 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(9)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.000000001 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(8)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(7)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00 + + +-- !query +SELECT array(CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9))) +-- !query schema +struct> +-- !query output +[2020-01-01 00:00:00.123456789] + + +-- !query +SELECT map('k', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9))) +-- !query schema +struct> +-- !query output +{"k":2020-01-01 00:00:00.123456789} + + +-- !query +SELECT named_struct('f', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9))) +-- !query schema +struct> +-- !query output +{"f":2020-01-01 00:00:00.123456789} diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out new file mode 100644 index 0000000000000..4859984f4c82d --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out @@ -0,0 +1,87 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(7)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.1234567 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(8)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.12345678 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.123456789 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.999999000' AS timestamp_ntz(9)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.999999 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000999' AS timestamp_ntz(9)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.000000999 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(9)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.000000001 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(8)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(7)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00 + + +-- !query +SELECT array(CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9))) +-- !query schema +struct> +-- !query output +[2020-01-01 00:00:00.123456789] + + +-- !query +SELECT map('k', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9))) +-- !query schema +struct> +-- !query output +{"k":2020-01-01 00:00:00.123456789} + + +-- !query +SELECT named_struct('f', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9))) +-- !query schema +struct> +-- !query output +{"f":2020-01-01 00:00:00.123456789} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala index 251ed064af91e..b65fe36914902 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala @@ -17,15 +17,17 @@ package org.apache.spark.sql.execution -import java.time.{Duration, Period, Year} +import java.time.{Duration, LocalDateTime, Period, Year, ZoneOffset} -import org.apache.spark.sql.YearUDT +import scala.jdk.CollectionConverters._ + +import org.apache.spark.sql.{Row, YearUDT} import org.apache.spark.sql.catalyst.util.DateTimeTestUtils import org.apache.spark.sql.connector.catalog.InMemoryTableCatalog import org.apache.spark.sql.execution.HiveResult._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.{ExamplePoint, ExamplePointUDT, SharedSparkSession} -import org.apache.spark.sql.types.{YearMonthIntervalType, YearMonthIntervalType => YM} +import org.apache.spark.sql.types.{StructField, StructType, TimestampLTZNanosType, TimestampNTZNanosType, YearMonthIntervalType, YearMonthIntervalType => YM} class HiveResultSuite extends SharedSparkSession { @@ -76,6 +78,67 @@ class HiveResultSuite extends SharedSparkSession { assert(result2 == timestamps.map(x => s"[$x]")) } + test("SPARK-57257: nanosecond timestamp formatting in hive result") { + // Each input fraction maps to the expected rendered fraction at precision 7, 8, 9. Sub-`p` + // digits are floored and trailing zeros trimmed, so an all-zero fraction renders as no + // fraction at all (e.g. ".000000001" at p=7/8). + val base = "2020-01-01 00:00:00" + val cases = Seq( + ".123456789" -> Seq(".1234567", ".12345678", ".123456789"), + ".999999999" -> Seq(".9999999", ".99999999", ".999999999"), + ".999999000" -> Seq(".999999", ".999999", ".999999"), + ".000000001" -> Seq("", "", ".000000001"), + ".000000999" -> Seq(".0000009", ".00000099", ".000000999")) + // Render LTZ in a fixed zone so the wall-clock fields round-trip from the cast. + withSQLConf( + SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true", + SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") { + Seq(7, 8, 9).zipWithIndex.foreach { case (p, idx) => + cases.foreach { case (frac, expectedByPrecision) => + val input = base + frac + val expected = base + expectedByPrecision(idx) + Seq("timestamp_ltz", "timestamp_ntz").foreach { typeName => + val df = spark.sql(s"SELECT CAST('$input' AS $typeName($p)) AS b") + assert(hiveResultString(df.queryExecution.executedPlan) === Seq(expected), + s"type = $typeName($p), input = $input") + val nested = spark.sql(s"SELECT array(CAST('$input' AS $typeName($p))) AS b") + assert(hiveResultString(nested.queryExecution.executedPlan) === Seq(s"[$expected]"), + s"nested type = $typeName($p), input = $input") + } + } + } + } + } + + test("SPARK-57257: LTZ nanos timestamp honors session time zone, NTZ is zone-independent") { + withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") { + // A fixed instant and the matching local date-time at UTC. + val ldt = LocalDateTime.of(2020, 1, 1, 12, 0, 0, 123456789) + val instant = ldt.toInstant(ZoneOffset.UTC) + val ltzDf = spark.createDataFrame( + Seq(Row(instant)).asJava, + StructType(Seq(StructField("b", TimestampLTZNanosType(9))))) + val ntzDf = spark.createDataFrame( + Seq(Row(ldt)).asJava, + StructType(Seq(StructField("b", TimestampNTZNanosType(9))))) + + withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") { + assert(hiveResultString(ltzDf.queryExecution.executedPlan) === + Seq("2020-01-01 12:00:00.123456789")) + assert(hiveResultString(ntzDf.queryExecution.executedPlan) === + Seq("2020-01-01 12:00:00.123456789")) + } + withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "America/Los_Angeles") { + // LTZ shifts with the session zone (UTC-08:00 on this date) ... + assert(hiveResultString(ltzDf.queryExecution.executedPlan) === + Seq("2020-01-01 04:00:00.123456789")) + // ... while NTZ stays the same wall-clock value. + assert(hiveResultString(ntzDf.queryExecution.executedPlan) === + Seq("2020-01-01 12:00:00.123456789")) + } + } + } + test("toHiveString correctly handles UDTs") { val point = new ExamplePoint(50.0, 50.0) val tpe = new ExamplePointUDT() diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala index 401b3c126d854..f9d333479bbdb 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala @@ -118,7 +118,10 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ "pipe-operators.sql", // VARIANT type "variant/named-function-arguments.sql", - "variant-field-extractions.sql" + "variant-field-extractions.sql", + // SPARK-57257: nanosecond-precision timestamp types are not yet mapped by the Thrift Server + "timestamp-ltz-nanos.sql", + "timestamp-ntz-nanos.sql" ) override def runQueries( From d578fbba68fd4b7ea928f759cb18cc73178a5859 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Fri, 5 Jun 2026 15:51:12 +0200 Subject: [PATCH 2/2] [SPARK-57257][SQL][TESTS] Add pre-1970 and NULL coverage for nanos timestamps in Hive results Address review feedback on apache/spark#56320: - Add a pre-1970 base (1960-01-01) to exercise the negative-epoch path (negative epochMicros + positive nanosWithinMicro), in both HiveResultSuite and the timestamp-{ltz,ntz}-nanos.sql golden files. - Add top-level and nested (array/map/struct) NULL cases. NULLs are handled by the generic `(null, _)` branch in `HiveResult.toHiveString`, but this locks in the behavior. ### Was this patch authored or co-authored using generative AI tooling? Generated-by: Cursor 1.7.0 --- .../timestamp-ltz-nanos.sql.out | 42 ++++++++++++++++ .../timestamp-ntz-nanos.sql.out | 42 ++++++++++++++++ .../sql-tests/inputs/timestamp-ltz-nanos.sql | 10 ++++ .../sql-tests/inputs/timestamp-ntz-nanos.sql | 10 ++++ .../results/timestamp-ltz-nanos.sql.out | 48 +++++++++++++++++++ .../results/timestamp-ntz-nanos.sql.out | 48 +++++++++++++++++++ .../spark/sql/execution/HiveResultSuite.scala | 46 +++++++++++++----- 7 files changed, 234 insertions(+), 12 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out index 9be5de3a105a1..6f896ab8014c7 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out @@ -55,6 +55,20 @@ Project [cast(2020-01-01 00:00:00.000000001 as timestamp_ltz(7)) AS CAST(2020-01 +- OneRowRelation +-- !query +SELECT CAST('1960-01-01 00:00:00.000000001' AS timestamp_ltz(9)) +-- !query analysis +Project [cast(1960-01-01 00:00:00.000000001 as timestamp_ltz(9)) AS CAST(1960-01-01 00:00:00.000000001 AS TIMESTAMP_LTZ(9))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('1960-01-01 00:00:00.123456789' AS timestamp_ltz(7)) +-- !query analysis +Project [cast(1960-01-01 00:00:00.123456789 as timestamp_ltz(7)) AS CAST(1960-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(7))#x] ++- OneRowRelation + + -- !query SELECT array(CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9))) -- !query analysis @@ -74,3 +88,31 @@ SELECT named_struct('f', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9 -- !query analysis Project [named_struct(f, cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(9))) AS named_struct(f, CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(9)))#x] +- OneRowRelation + + +-- !query +SELECT CAST(NULL AS timestamp_ltz(9)) +-- !query analysis +Project [cast(null as timestamp_ltz(9)) AS CAST(NULL AS TIMESTAMP_LTZ(9))#x] ++- OneRowRelation + + +-- !query +SELECT array(CAST(NULL AS timestamp_ltz(9))) +-- !query analysis +Project [array(cast(null as timestamp_ltz(9))) AS array(CAST(NULL AS TIMESTAMP_LTZ(9)))#x] ++- OneRowRelation + + +-- !query +SELECT map('k', CAST(NULL AS timestamp_ltz(9))) +-- !query analysis +Project [map(k, cast(null as timestamp_ltz(9))) AS map(k, CAST(NULL AS TIMESTAMP_LTZ(9)))#x] ++- OneRowRelation + + +-- !query +SELECT named_struct('f', CAST(NULL AS timestamp_ltz(9))) +-- !query analysis +Project [named_struct(f, cast(null as timestamp_ltz(9))) AS named_struct(f, CAST(NULL AS TIMESTAMP_LTZ(9)))#x] ++- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out index 83549e36597a1..94570f139e849 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out @@ -55,6 +55,20 @@ Project [cast(2020-01-01 00:00:00.000000001 as timestamp_ntz(7)) AS CAST(2020-01 +- OneRowRelation +-- !query +SELECT CAST('1960-01-01 00:00:00.000000001' AS timestamp_ntz(9)) +-- !query analysis +Project [cast(1960-01-01 00:00:00.000000001 as timestamp_ntz(9)) AS CAST(1960-01-01 00:00:00.000000001 AS TIMESTAMP_NTZ(9))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('1960-01-01 00:00:00.123456789' AS timestamp_ntz(7)) +-- !query analysis +Project [cast(1960-01-01 00:00:00.123456789 as timestamp_ntz(7)) AS CAST(1960-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(7))#x] ++- OneRowRelation + + -- !query SELECT array(CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9))) -- !query analysis @@ -74,3 +88,31 @@ SELECT named_struct('f', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9 -- !query analysis Project [named_struct(f, cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(9))) AS named_struct(f, CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(9)))#x] +- OneRowRelation + + +-- !query +SELECT CAST(NULL AS timestamp_ntz(9)) +-- !query analysis +Project [cast(null as timestamp_ntz(9)) AS CAST(NULL AS TIMESTAMP_NTZ(9))#x] ++- OneRowRelation + + +-- !query +SELECT array(CAST(NULL AS timestamp_ntz(9))) +-- !query analysis +Project [array(cast(null as timestamp_ntz(9))) AS array(CAST(NULL AS TIMESTAMP_NTZ(9)))#x] ++- OneRowRelation + + +-- !query +SELECT map('k', CAST(NULL AS timestamp_ntz(9))) +-- !query analysis +Project [map(k, cast(null as timestamp_ntz(9))) AS map(k, CAST(NULL AS TIMESTAMP_NTZ(9)))#x] ++- OneRowRelation + + +-- !query +SELECT named_struct('f', CAST(NULL AS timestamp_ntz(9))) +-- !query analysis +Project [named_struct(f, cast(null as timestamp_ntz(9))) AS named_struct(f, CAST(NULL AS TIMESTAMP_NTZ(9)))#x] ++- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql index 181cf9f5d7693..f7c36256a6bef 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql @@ -16,7 +16,17 @@ SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(9)); SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(8)); SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(7)); +-- Pre-1970 values exercise the negative-epoch path. +SELECT CAST('1960-01-01 00:00:00.000000001' AS timestamp_ltz(9)); +SELECT CAST('1960-01-01 00:00:00.123456789' AS timestamp_ltz(7)); + -- Nested values (array / map / struct). SELECT array(CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9))); SELECT map('k', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9))); SELECT named_struct('f', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9))); + +-- NULL values (top-level and nested). +SELECT CAST(NULL AS timestamp_ltz(9)); +SELECT array(CAST(NULL AS timestamp_ltz(9))); +SELECT map('k', CAST(NULL AS timestamp_ltz(9))); +SELECT named_struct('f', CAST(NULL AS timestamp_ltz(9))); diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql index 563e5e029c761..c1db88eb409b4 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql @@ -16,7 +16,17 @@ SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(9)); SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(8)); SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(7)); +-- Pre-1970 values exercise the negative-epoch path. +SELECT CAST('1960-01-01 00:00:00.000000001' AS timestamp_ntz(9)); +SELECT CAST('1960-01-01 00:00:00.123456789' AS timestamp_ntz(7)); + -- Nested values (array / map / struct). SELECT array(CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9))); SELECT map('k', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9))); SELECT named_struct('f', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9))); + +-- NULL values (top-level and nested). +SELECT CAST(NULL AS timestamp_ntz(9)); +SELECT array(CAST(NULL AS timestamp_ntz(9))); +SELECT map('k', CAST(NULL AS timestamp_ntz(9))); +SELECT named_struct('f', CAST(NULL AS timestamp_ntz(9))); diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out index 6115d49fdbce8..75171edef611c 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out @@ -63,6 +63,22 @@ struct 2020-01-01 00:00:00 +-- !query +SELECT CAST('1960-01-01 00:00:00.000000001' AS timestamp_ltz(9)) +-- !query schema +struct +-- !query output +1960-01-01 00:00:00.000000001 + + +-- !query +SELECT CAST('1960-01-01 00:00:00.123456789' AS timestamp_ltz(7)) +-- !query schema +struct +-- !query output +1960-01-01 00:00:00.1234567 + + -- !query SELECT array(CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9))) -- !query schema @@ -85,3 +101,35 @@ SELECT named_struct('f', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9 struct> -- !query output {"f":2020-01-01 00:00:00.123456789} + + +-- !query +SELECT CAST(NULL AS timestamp_ltz(9)) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT array(CAST(NULL AS timestamp_ltz(9))) +-- !query schema +struct> +-- !query output +[null] + + +-- !query +SELECT map('k', CAST(NULL AS timestamp_ltz(9))) +-- !query schema +struct> +-- !query output +{"k":null} + + +-- !query +SELECT named_struct('f', CAST(NULL AS timestamp_ltz(9))) +-- !query schema +struct> +-- !query output +{"f":null} diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out index 4859984f4c82d..39542fdd121ed 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out @@ -63,6 +63,22 @@ struct 2020-01-01 00:00:00 +-- !query +SELECT CAST('1960-01-01 00:00:00.000000001' AS timestamp_ntz(9)) +-- !query schema +struct +-- !query output +1960-01-01 00:00:00.000000001 + + +-- !query +SELECT CAST('1960-01-01 00:00:00.123456789' AS timestamp_ntz(7)) +-- !query schema +struct +-- !query output +1960-01-01 00:00:00.1234567 + + -- !query SELECT array(CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9))) -- !query schema @@ -85,3 +101,35 @@ SELECT named_struct('f', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9 struct> -- !query output {"f":2020-01-01 00:00:00.123456789} + + +-- !query +SELECT CAST(NULL AS timestamp_ntz(9)) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT array(CAST(NULL AS timestamp_ntz(9))) +-- !query schema +struct> +-- !query output +[null] + + +-- !query +SELECT map('k', CAST(NULL AS timestamp_ntz(9))) +-- !query schema +struct> +-- !query output +{"k":null} + + +-- !query +SELECT named_struct('f', CAST(NULL AS timestamp_ntz(9))) +-- !query schema +struct> +-- !query output +{"f":null} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala index b65fe36914902..fd1b8eaf20cf2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala @@ -81,8 +81,10 @@ class HiveResultSuite extends SharedSparkSession { test("SPARK-57257: nanosecond timestamp formatting in hive result") { // Each input fraction maps to the expected rendered fraction at precision 7, 8, 9. Sub-`p` // digits are floored and trailing zeros trimmed, so an all-zero fraction renders as no - // fraction at all (e.g. ".000000001" at p=7/8). - val base = "2020-01-01 00:00:00" + // fraction at all (e.g. ".000000001" at p=7/8). The flooring/trimming is independent of the + // epoch sign, so the pre-1970 base (negative epoch micros + positive nanosWithinMicro) + // shares the same expected fractions. + val bases = Seq("2020-01-01 00:00:00", "1960-01-01 00:00:00") val cases = Seq( ".123456789" -> Seq(".1234567", ".12345678", ".123456789"), ".999999999" -> Seq(".9999999", ".99999999", ".999999999"), @@ -94,19 +96,39 @@ class HiveResultSuite extends SharedSparkSession { SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true", SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") { Seq(7, 8, 9).zipWithIndex.foreach { case (p, idx) => - cases.foreach { case (frac, expectedByPrecision) => - val input = base + frac - val expected = base + expectedByPrecision(idx) - Seq("timestamp_ltz", "timestamp_ntz").foreach { typeName => - val df = spark.sql(s"SELECT CAST('$input' AS $typeName($p)) AS b") - assert(hiveResultString(df.queryExecution.executedPlan) === Seq(expected), - s"type = $typeName($p), input = $input") - val nested = spark.sql(s"SELECT array(CAST('$input' AS $typeName($p))) AS b") - assert(hiveResultString(nested.queryExecution.executedPlan) === Seq(s"[$expected]"), - s"nested type = $typeName($p), input = $input") + bases.foreach { base => + cases.foreach { case (frac, expectedByPrecision) => + val input = base + frac + val expected = base + expectedByPrecision(idx) + Seq("timestamp_ltz", "timestamp_ntz").foreach { typeName => + val df = spark.sql(s"SELECT CAST('$input' AS $typeName($p)) AS b") + assert(hiveResultString(df.queryExecution.executedPlan) === Seq(expected), + s"type = $typeName($p), input = $input") + val nested = spark.sql(s"SELECT array(CAST('$input' AS $typeName($p))) AS b") + assert(hiveResultString(nested.queryExecution.executedPlan) === Seq(s"[$expected]"), + s"nested type = $typeName($p), input = $input") + } } } } + + // NULL values: handled by the generic `(null, _)` branch in `toHiveString` (before the + // type-specific cases), so the path is type-agnostic. Verify top-level and nested NULLs. + Seq("timestamp_ltz(9)", "timestamp_ntz(9)").foreach { typeName => + val nullCast = s"CAST(NULL AS $typeName)" + val topLevel = spark.sql(s"SELECT $nullCast AS b") + assert(hiveResultString(topLevel.queryExecution.executedPlan) === Seq("NULL"), + s"top-level NULL of $typeName") + val inArray = spark.sql(s"SELECT array($nullCast) AS b") + assert(hiveResultString(inArray.queryExecution.executedPlan) === Seq("[null]"), + s"array NULL of $typeName") + val inMap = spark.sql(s"SELECT map('k', $nullCast) AS b") + assert(hiveResultString(inMap.queryExecution.executedPlan) === Seq("{\"k\":null}"), + s"map NULL of $typeName") + val inStruct = spark.sql(s"SELECT named_struct('f', $nullCast) AS b") + assert(hiveResultString(inStruct.queryExecution.executedPlan) === Seq("{\"f\":null}"), + s"struct NULL of $typeName") + } } }