diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala index 6714510874351..25170da800aec 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala @@ -131,6 +131,16 @@ object HiveResult extends SQLConfHelper { case (t: Timestamp, TimestampType) => formatters.timestamp.format(t) case (i: Instant, TimestampType) => formatters.timestamp.format(i) case (l: LocalDateTime, TimestampNTZType) => formatters.timestamp.format(l) + // Nanosecond-precision timestamps. The external values are `Instant` (LTZ) and + // `LocalDateTime` (NTZ); convert to the physical `TimestampNanosVal` at the column precision + // and render via the same formatter methods as the cast-to-string path (SPARK-57256), so the + // output stays consistent. LTZ uses the session zone; NTZ is zone-independent. + case (i: Instant, t: TimestampLTZNanosType) => + formatters.timestamp.formatNanos( + DateTimeUtils.instantToTimestampNanos(i, t.precision), t.precision) + case (l: LocalDateTime, t: TimestampNTZNanosType) => + formatters.timestamp.formatWithoutTimeZoneNanos( + DateTimeUtils.localDateTimeToTimestampNanos(l, t.precision), t.precision) case (bin: Array[Byte], BinaryType) => binaryFormatter(bin) case (decimal: java.math.BigDecimal, DecimalType()) => decimal.toPlainString case (n, _: NumericType) => n.toString diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out new file mode 100644 index 0000000000000..6f896ab8014c7 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out @@ -0,0 +1,118 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(7)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(7)) AS CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(7))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(8)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(8)) AS CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(8))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(9)) AS CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(9))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.999999000' AS timestamp_ltz(9)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.999999000 as timestamp_ltz(9)) AS CAST(2020-01-01 00:00:00.999999000 AS TIMESTAMP_LTZ(9))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000999' AS timestamp_ltz(9)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.000000999 as timestamp_ltz(9)) AS CAST(2020-01-01 00:00:00.000000999 AS TIMESTAMP_LTZ(9))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(9)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.000000001 as timestamp_ltz(9)) AS CAST(2020-01-01 00:00:00.000000001 AS TIMESTAMP_LTZ(9))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(8)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.000000001 as timestamp_ltz(8)) AS CAST(2020-01-01 00:00:00.000000001 AS TIMESTAMP_LTZ(8))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(7)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.000000001 as timestamp_ltz(7)) AS CAST(2020-01-01 00:00:00.000000001 AS TIMESTAMP_LTZ(7))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('1960-01-01 00:00:00.000000001' AS timestamp_ltz(9)) +-- !query analysis +Project [cast(1960-01-01 00:00:00.000000001 as timestamp_ltz(9)) AS CAST(1960-01-01 00:00:00.000000001 AS TIMESTAMP_LTZ(9))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('1960-01-01 00:00:00.123456789' AS timestamp_ltz(7)) +-- !query analysis +Project [cast(1960-01-01 00:00:00.123456789 as timestamp_ltz(7)) AS CAST(1960-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(7))#x] ++- OneRowRelation + + +-- !query +SELECT array(CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9))) +-- !query analysis +Project [array(cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(9))) AS array(CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(9)))#x] ++- OneRowRelation + + +-- !query +SELECT map('k', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9))) +-- !query analysis +Project [map(k, cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(9))) AS map(k, CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(9)))#x] ++- OneRowRelation + + +-- !query +SELECT named_struct('f', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9))) +-- !query analysis +Project [named_struct(f, cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(9))) AS named_struct(f, CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(9)))#x] ++- OneRowRelation + + +-- !query +SELECT CAST(NULL AS timestamp_ltz(9)) +-- !query analysis +Project [cast(null as timestamp_ltz(9)) AS CAST(NULL AS TIMESTAMP_LTZ(9))#x] ++- OneRowRelation + + +-- !query +SELECT array(CAST(NULL AS timestamp_ltz(9))) +-- !query analysis +Project [array(cast(null as timestamp_ltz(9))) AS array(CAST(NULL AS TIMESTAMP_LTZ(9)))#x] ++- OneRowRelation + + +-- !query +SELECT map('k', CAST(NULL AS timestamp_ltz(9))) +-- !query analysis +Project [map(k, cast(null as timestamp_ltz(9))) AS map(k, CAST(NULL AS TIMESTAMP_LTZ(9)))#x] ++- OneRowRelation + + +-- !query +SELECT named_struct('f', CAST(NULL AS timestamp_ltz(9))) +-- !query analysis +Project [named_struct(f, cast(null as timestamp_ltz(9))) AS named_struct(f, CAST(NULL AS TIMESTAMP_LTZ(9)))#x] ++- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out new file mode 100644 index 0000000000000..94570f139e849 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out @@ -0,0 +1,118 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(7)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(7)) AS CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(7))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(8)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(8)) AS CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(8))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(9)) AS CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(9))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.999999000' AS timestamp_ntz(9)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.999999000 as timestamp_ntz(9)) AS CAST(2020-01-01 00:00:00.999999000 AS TIMESTAMP_NTZ(9))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000999' AS timestamp_ntz(9)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.000000999 as timestamp_ntz(9)) AS CAST(2020-01-01 00:00:00.000000999 AS TIMESTAMP_NTZ(9))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(9)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.000000001 as timestamp_ntz(9)) AS CAST(2020-01-01 00:00:00.000000001 AS TIMESTAMP_NTZ(9))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(8)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.000000001 as timestamp_ntz(8)) AS CAST(2020-01-01 00:00:00.000000001 AS TIMESTAMP_NTZ(8))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(7)) +-- !query analysis +Project [cast(2020-01-01 00:00:00.000000001 as timestamp_ntz(7)) AS CAST(2020-01-01 00:00:00.000000001 AS TIMESTAMP_NTZ(7))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('1960-01-01 00:00:00.000000001' AS timestamp_ntz(9)) +-- !query analysis +Project [cast(1960-01-01 00:00:00.000000001 as timestamp_ntz(9)) AS CAST(1960-01-01 00:00:00.000000001 AS TIMESTAMP_NTZ(9))#x] ++- OneRowRelation + + +-- !query +SELECT CAST('1960-01-01 00:00:00.123456789' AS timestamp_ntz(7)) +-- !query analysis +Project [cast(1960-01-01 00:00:00.123456789 as timestamp_ntz(7)) AS CAST(1960-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(7))#x] ++- OneRowRelation + + +-- !query +SELECT array(CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9))) +-- !query analysis +Project [array(cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(9))) AS array(CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(9)))#x] ++- OneRowRelation + + +-- !query +SELECT map('k', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9))) +-- !query analysis +Project [map(k, cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(9))) AS map(k, CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(9)))#x] ++- OneRowRelation + + +-- !query +SELECT named_struct('f', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9))) +-- !query analysis +Project [named_struct(f, cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(9))) AS named_struct(f, CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(9)))#x] ++- OneRowRelation + + +-- !query +SELECT CAST(NULL AS timestamp_ntz(9)) +-- !query analysis +Project [cast(null as timestamp_ntz(9)) AS CAST(NULL AS TIMESTAMP_NTZ(9))#x] ++- OneRowRelation + + +-- !query +SELECT array(CAST(NULL AS timestamp_ntz(9))) +-- !query analysis +Project [array(cast(null as timestamp_ntz(9))) AS array(CAST(NULL AS TIMESTAMP_NTZ(9)))#x] ++- OneRowRelation + + +-- !query +SELECT map('k', CAST(NULL AS timestamp_ntz(9))) +-- !query analysis +Project [map(k, cast(null as timestamp_ntz(9))) AS map(k, CAST(NULL AS TIMESTAMP_NTZ(9)))#x] ++- OneRowRelation + + +-- !query +SELECT named_struct('f', CAST(NULL AS timestamp_ntz(9))) +-- !query analysis +Project [named_struct(f, cast(null as timestamp_ntz(9))) AS named_struct(f, CAST(NULL AS TIMESTAMP_NTZ(9)))#x] ++- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql new file mode 100644 index 0000000000000..f7c36256a6bef --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql @@ -0,0 +1,32 @@ +-- Nanosecond-precision TIMESTAMP_LTZ(p) (p in [7, 9]) in Hive results (SPARK-57257). +-- LTZ values are rendered in the session time zone. + +--SET spark.sql.timestampNanosTypes.enabled=true +--SET spark.sql.session.timeZone=America/Los_Angeles + +-- Precision-driven fraction width: sub-p digits are floored. +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(7)); +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(8)); +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9)); + +-- Trailing-zero trimming: an all-zero fraction renders as no fraction at all. +SELECT CAST('2020-01-01 00:00:00.999999000' AS timestamp_ltz(9)); +SELECT CAST('2020-01-01 00:00:00.000000999' AS timestamp_ltz(9)); +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(9)); +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(8)); +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(7)); + +-- Pre-1970 values exercise the negative-epoch path. +SELECT CAST('1960-01-01 00:00:00.000000001' AS timestamp_ltz(9)); +SELECT CAST('1960-01-01 00:00:00.123456789' AS timestamp_ltz(7)); + +-- Nested values (array / map / struct). +SELECT array(CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9))); +SELECT map('k', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9))); +SELECT named_struct('f', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9))); + +-- NULL values (top-level and nested). +SELECT CAST(NULL AS timestamp_ltz(9)); +SELECT array(CAST(NULL AS timestamp_ltz(9))); +SELECT map('k', CAST(NULL AS timestamp_ltz(9))); +SELECT named_struct('f', CAST(NULL AS timestamp_ltz(9))); diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql new file mode 100644 index 0000000000000..c1db88eb409b4 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql @@ -0,0 +1,32 @@ +-- Nanosecond-precision TIMESTAMP_NTZ(p) (p in [7, 9]) in Hive results (SPARK-57257). +-- NTZ values are zone-independent. + +--SET spark.sql.timestampNanosTypes.enabled=true +--SET spark.sql.session.timeZone=America/Los_Angeles + +-- Precision-driven fraction width: sub-p digits are floored. +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(7)); +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(8)); +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9)); + +-- Trailing-zero trimming: an all-zero fraction renders as no fraction at all. +SELECT CAST('2020-01-01 00:00:00.999999000' AS timestamp_ntz(9)); +SELECT CAST('2020-01-01 00:00:00.000000999' AS timestamp_ntz(9)); +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(9)); +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(8)); +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(7)); + +-- Pre-1970 values exercise the negative-epoch path. +SELECT CAST('1960-01-01 00:00:00.000000001' AS timestamp_ntz(9)); +SELECT CAST('1960-01-01 00:00:00.123456789' AS timestamp_ntz(7)); + +-- Nested values (array / map / struct). +SELECT array(CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9))); +SELECT map('k', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9))); +SELECT named_struct('f', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9))); + +-- NULL values (top-level and nested). +SELECT CAST(NULL AS timestamp_ntz(9)); +SELECT array(CAST(NULL AS timestamp_ntz(9))); +SELECT map('k', CAST(NULL AS timestamp_ntz(9))); +SELECT named_struct('f', CAST(NULL AS timestamp_ntz(9))); diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out new file mode 100644 index 0000000000000..75171edef611c --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out @@ -0,0 +1,135 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(7)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.1234567 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(8)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.12345678 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.123456789 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.999999000' AS timestamp_ltz(9)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.999999 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000999' AS timestamp_ltz(9)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.000000999 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(9)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.000000001 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(8)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(7)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00 + + +-- !query +SELECT CAST('1960-01-01 00:00:00.000000001' AS timestamp_ltz(9)) +-- !query schema +struct +-- !query output +1960-01-01 00:00:00.000000001 + + +-- !query +SELECT CAST('1960-01-01 00:00:00.123456789' AS timestamp_ltz(7)) +-- !query schema +struct +-- !query output +1960-01-01 00:00:00.1234567 + + +-- !query +SELECT array(CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9))) +-- !query schema +struct> +-- !query output +[2020-01-01 00:00:00.123456789] + + +-- !query +SELECT map('k', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9))) +-- !query schema +struct> +-- !query output +{"k":2020-01-01 00:00:00.123456789} + + +-- !query +SELECT named_struct('f', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9))) +-- !query schema +struct> +-- !query output +{"f":2020-01-01 00:00:00.123456789} + + +-- !query +SELECT CAST(NULL AS timestamp_ltz(9)) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT array(CAST(NULL AS timestamp_ltz(9))) +-- !query schema +struct> +-- !query output +[null] + + +-- !query +SELECT map('k', CAST(NULL AS timestamp_ltz(9))) +-- !query schema +struct> +-- !query output +{"k":null} + + +-- !query +SELECT named_struct('f', CAST(NULL AS timestamp_ltz(9))) +-- !query schema +struct> +-- !query output +{"f":null} diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out new file mode 100644 index 0000000000000..39542fdd121ed --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out @@ -0,0 +1,135 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(7)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.1234567 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(8)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.12345678 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.123456789 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.999999000' AS timestamp_ntz(9)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.999999 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000999' AS timestamp_ntz(9)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.000000999 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(9)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00.000000001 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(8)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00 + + +-- !query +SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(7)) +-- !query schema +struct +-- !query output +2020-01-01 00:00:00 + + +-- !query +SELECT CAST('1960-01-01 00:00:00.000000001' AS timestamp_ntz(9)) +-- !query schema +struct +-- !query output +1960-01-01 00:00:00.000000001 + + +-- !query +SELECT CAST('1960-01-01 00:00:00.123456789' AS timestamp_ntz(7)) +-- !query schema +struct +-- !query output +1960-01-01 00:00:00.1234567 + + +-- !query +SELECT array(CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9))) +-- !query schema +struct> +-- !query output +[2020-01-01 00:00:00.123456789] + + +-- !query +SELECT map('k', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9))) +-- !query schema +struct> +-- !query output +{"k":2020-01-01 00:00:00.123456789} + + +-- !query +SELECT named_struct('f', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9))) +-- !query schema +struct> +-- !query output +{"f":2020-01-01 00:00:00.123456789} + + +-- !query +SELECT CAST(NULL AS timestamp_ntz(9)) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT array(CAST(NULL AS timestamp_ntz(9))) +-- !query schema +struct> +-- !query output +[null] + + +-- !query +SELECT map('k', CAST(NULL AS timestamp_ntz(9))) +-- !query schema +struct> +-- !query output +{"k":null} + + +-- !query +SELECT named_struct('f', CAST(NULL AS timestamp_ntz(9))) +-- !query schema +struct> +-- !query output +{"f":null} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala index 251ed064af91e..fd1b8eaf20cf2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala @@ -17,15 +17,17 @@ package org.apache.spark.sql.execution -import java.time.{Duration, Period, Year} +import java.time.{Duration, LocalDateTime, Period, Year, ZoneOffset} -import org.apache.spark.sql.YearUDT +import scala.jdk.CollectionConverters._ + +import org.apache.spark.sql.{Row, YearUDT} import org.apache.spark.sql.catalyst.util.DateTimeTestUtils import org.apache.spark.sql.connector.catalog.InMemoryTableCatalog import org.apache.spark.sql.execution.HiveResult._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.{ExamplePoint, ExamplePointUDT, SharedSparkSession} -import org.apache.spark.sql.types.{YearMonthIntervalType, YearMonthIntervalType => YM} +import org.apache.spark.sql.types.{StructField, StructType, TimestampLTZNanosType, TimestampNTZNanosType, YearMonthIntervalType, YearMonthIntervalType => YM} class HiveResultSuite extends SharedSparkSession { @@ -76,6 +78,89 @@ class HiveResultSuite extends SharedSparkSession { assert(result2 == timestamps.map(x => s"[$x]")) } + test("SPARK-57257: nanosecond timestamp formatting in hive result") { + // Each input fraction maps to the expected rendered fraction at precision 7, 8, 9. Sub-`p` + // digits are floored and trailing zeros trimmed, so an all-zero fraction renders as no + // fraction at all (e.g. ".000000001" at p=7/8). The flooring/trimming is independent of the + // epoch sign, so the pre-1970 base (negative epoch micros + positive nanosWithinMicro) + // shares the same expected fractions. + val bases = Seq("2020-01-01 00:00:00", "1960-01-01 00:00:00") + val cases = Seq( + ".123456789" -> Seq(".1234567", ".12345678", ".123456789"), + ".999999999" -> Seq(".9999999", ".99999999", ".999999999"), + ".999999000" -> Seq(".999999", ".999999", ".999999"), + ".000000001" -> Seq("", "", ".000000001"), + ".000000999" -> Seq(".0000009", ".00000099", ".000000999")) + // Render LTZ in a fixed zone so the wall-clock fields round-trip from the cast. + withSQLConf( + SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true", + SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") { + Seq(7, 8, 9).zipWithIndex.foreach { case (p, idx) => + bases.foreach { base => + cases.foreach { case (frac, expectedByPrecision) => + val input = base + frac + val expected = base + expectedByPrecision(idx) + Seq("timestamp_ltz", "timestamp_ntz").foreach { typeName => + val df = spark.sql(s"SELECT CAST('$input' AS $typeName($p)) AS b") + assert(hiveResultString(df.queryExecution.executedPlan) === Seq(expected), + s"type = $typeName($p), input = $input") + val nested = spark.sql(s"SELECT array(CAST('$input' AS $typeName($p))) AS b") + assert(hiveResultString(nested.queryExecution.executedPlan) === Seq(s"[$expected]"), + s"nested type = $typeName($p), input = $input") + } + } + } + } + + // NULL values: handled by the generic `(null, _)` branch in `toHiveString` (before the + // type-specific cases), so the path is type-agnostic. Verify top-level and nested NULLs. + Seq("timestamp_ltz(9)", "timestamp_ntz(9)").foreach { typeName => + val nullCast = s"CAST(NULL AS $typeName)" + val topLevel = spark.sql(s"SELECT $nullCast AS b") + assert(hiveResultString(topLevel.queryExecution.executedPlan) === Seq("NULL"), + s"top-level NULL of $typeName") + val inArray = spark.sql(s"SELECT array($nullCast) AS b") + assert(hiveResultString(inArray.queryExecution.executedPlan) === Seq("[null]"), + s"array NULL of $typeName") + val inMap = spark.sql(s"SELECT map('k', $nullCast) AS b") + assert(hiveResultString(inMap.queryExecution.executedPlan) === Seq("{\"k\":null}"), + s"map NULL of $typeName") + val inStruct = spark.sql(s"SELECT named_struct('f', $nullCast) AS b") + assert(hiveResultString(inStruct.queryExecution.executedPlan) === Seq("{\"f\":null}"), + s"struct NULL of $typeName") + } + } + } + + test("SPARK-57257: LTZ nanos timestamp honors session time zone, NTZ is zone-independent") { + withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") { + // A fixed instant and the matching local date-time at UTC. + val ldt = LocalDateTime.of(2020, 1, 1, 12, 0, 0, 123456789) + val instant = ldt.toInstant(ZoneOffset.UTC) + val ltzDf = spark.createDataFrame( + Seq(Row(instant)).asJava, + StructType(Seq(StructField("b", TimestampLTZNanosType(9))))) + val ntzDf = spark.createDataFrame( + Seq(Row(ldt)).asJava, + StructType(Seq(StructField("b", TimestampNTZNanosType(9))))) + + withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") { + assert(hiveResultString(ltzDf.queryExecution.executedPlan) === + Seq("2020-01-01 12:00:00.123456789")) + assert(hiveResultString(ntzDf.queryExecution.executedPlan) === + Seq("2020-01-01 12:00:00.123456789")) + } + withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "America/Los_Angeles") { + // LTZ shifts with the session zone (UTC-08:00 on this date) ... + assert(hiveResultString(ltzDf.queryExecution.executedPlan) === + Seq("2020-01-01 04:00:00.123456789")) + // ... while NTZ stays the same wall-clock value. + assert(hiveResultString(ntzDf.queryExecution.executedPlan) === + Seq("2020-01-01 12:00:00.123456789")) + } + } + } + test("toHiveString correctly handles UDTs") { val point = new ExamplePoint(50.0, 50.0) val tpe = new ExamplePointUDT() diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala index 401b3c126d854..f9d333479bbdb 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala @@ -118,7 +118,10 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ "pipe-operators.sql", // VARIANT type "variant/named-function-arguments.sql", - "variant-field-extractions.sql" + "variant-field-extractions.sql", + // SPARK-57257: nanosecond-precision timestamp types are not yet mapped by the Thrift Server + "timestamp-ltz-nanos.sql", + "timestamp-ntz-nanos.sql" ) override def runQueries(