Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,10 @@ import org.apache.spark.sql.types.{TimestampLTZNanosType, TimestampNTZNanosType}
* prefix; storage and formatting are identical.
*
* SCOPE (SPARK-57207): this issue wires physical representation, literals, row accessors, and
* codegen class selection. Dedicated fractional-second string formatting is not implemented yet:
* there is no TimestampFormatter for the nanos timestamp types. Until one lands, format() (and
* the toSQLValue() that delegates to it) raises the user-facing
* UNSUPPORTED_FEATURE.TIMESTAMP_NANOS_TO_STRING error rather than silently truncating to
* microsecond precision.
* codegen class selection. CAST to STRING is implemented separately, zone-aware, in ToStringBase
* (SPARK-57256). The zone-less, type-level format() here (and the toSQLValue() that delegates to
* it) still raises the user-facing UNSUPPORTED_FEATURE.TIMESTAMP_NANOS_TO_STRING error, since LTZ
* rendering needs the session time zone that this op does not have.
*
* Dataset encoders are wired here to the precision-aware leaves added by SPARK-57033
* (LocalDateTimeNanosEncoder / InstantNanosEncoder), so that turning on the Types Framework
Expand All @@ -50,10 +49,11 @@ abstract class TimestampNanosTypeApiOps extends TypeApiOps with DataTypeErrorsBa

// ==================== String Formatting ====================

// Fractional-second (nanosecond) string formatting is not implemented yet: there is no
// TimestampFormatter for the nanos timestamp types. Until one lands, formatting (CAST to STRING,
// EXPLAIN / SHOW output, and SQL-literal rendering via toSQLValue) raises a user-facing
// unsupported-feature error rather than silently truncating to microsecond precision.
// CAST to STRING for the nanosecond timestamp types is handled zone-aware by ToStringBase
// (SPARK-57256), alongside the microsecond timestamp types, because LTZ rendering depends on the
// session time zone that this zone-less, type-level formatter does not have. The remaining
// zone-less callers (EXPLAIN plan output and SQL-literal rendering via toSQLValue) still raise a
// user-facing unsupported-feature error here rather than silently truncating to microseconds.
override def format(v: Any): String =
throw DataTypeErrors.cannotConvertNanosTimestampToStringError(dataType)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -342,8 +342,9 @@ object Cast extends QueryErrorsBase {
case (TimestampType, TimestampNTZType) => true
case (TimestampNTZType, TimestampType) => true
// NTZ string is zone-independent (mirroring micro TIMESTAMP_NTZ, which is not listed); only
// the LTZ string parse depends on the session time zone.
// the LTZ string parse/render depends on the session time zone.
case (_: StringType, _: TimestampLTZNanosType) => true
case (_: TimestampLTZNanosType, _: StringType) => true
case (ArrayType(fromType, _), ArrayType(toType, _)) => needsTimeZone(fromType, toType)
case (MapType(fromKey, fromValue, _), MapType(toKey, toValue, _)) =>
needsTimeZone(fromKey, toKey) || needsTimeZone(fromValue, toValue)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,12 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
import org.apache.spark.sql.catalyst.util.{ArrayData, CharVarcharCodegenUtils, DateFormatter, FractionTimeFormatter, IntervalStringStyles, IntervalUtils, MapData, TimestampFormatter}
import org.apache.spark.sql.catalyst.util.IntervalStringStyles.ANSI_STYLE
import org.apache.spark.sql.errors.DataTypeErrors
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.BinaryOutputStyle
import org.apache.spark.sql.types._
import org.apache.spark.sql.types.ops.TypeApiOps
import org.apache.spark.unsafe.UTF8StringBuilder
import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
import org.apache.spark.unsafe.types.{CalendarInterval, TimestampNanosVal, UTF8String}
import org.apache.spark.util.ArrayImplicits._
import org.apache.spark.util.SparkStringUtils

Expand Down Expand Up @@ -67,10 +66,16 @@ trait ToStringBase { self: UnaryExpression with TimeZoneAwareExpression =>
case NoConstraint => castToString(from)
}

private def castToString(from: DataType): Any => UTF8String =
TypeApiOps(from)
.map(ops => acceptAny[Any](v => ops.formatUTF8(v)))
.getOrElse(castToStringDefault(from))
private def castToString(from: DataType): Any => UTF8String = from match {
// Nanosecond timestamp string formatting is zone-aware (LTZ renders in the session time zone),
// so it lives in castToStringDefault alongside the microsecond timestamp types rather than the
// zone-less Types Framework formatter (SPARK-57256).
case _: TimestampNTZNanosType | _: TimestampLTZNanosType => castToStringDefault(from)
case _ =>
TypeApiOps(from)
.map(ops => acceptAny[Any](v => ops.formatUTF8(v)))
.getOrElse(castToStringDefault(from))
}

private def castToStringDefault(from: DataType): Any => UTF8String = from match {
case CalendarIntervalType =>
Expand All @@ -82,6 +87,12 @@ trait ToStringBase { self: UnaryExpression with TimeZoneAwareExpression =>
acceptAny[Long](t => UTF8String.fromString(timestampFormatter.format(t)))
case TimestampNTZType =>
acceptAny[Long](t => UTF8String.fromString(timestampNTZFormatter.format(t)))
case t: TimestampLTZNanosType =>
acceptAny[TimestampNanosVal](v =>
UTF8String.fromString(timestampFormatter.formatNanos(v, t.precision)))
case t: TimestampNTZNanosType =>
acceptAny[TimestampNanosVal](v =>
UTF8String.fromString(timestampNTZFormatter.formatWithoutTimeZoneNanos(v, t.precision)))
case _: TimeType =>
acceptAny[Long](t => UTF8String.fromString(timeFormatter.format(t)))
case ArrayType(et, _) =>
Expand Down Expand Up @@ -235,6 +246,18 @@ trait ToStringBase { self: UnaryExpression with TimeZoneAwareExpression =>
ctx.addReferenceObj("timestampNTZFormatter", timestampNTZFormatter),
timestampNTZFormatter.getClass)
(c, evPrim) => code"$evPrim = UTF8String.fromString($tf.format($c));"
case t: TimestampLTZNanosType =>
val tf = JavaCode.global(
ctx.addReferenceObj("timestampFormatter", timestampFormatter),
timestampFormatter.getClass)
(c, evPrim) =>
code"$evPrim = UTF8String.fromString($tf.formatNanos($c, ${t.precision}));"
case t: TimestampNTZNanosType =>
val tf = JavaCode.global(
ctx.addReferenceObj("timestampNTZFormatter", timestampNTZFormatter),
timestampNTZFormatter.getClass)
(c, evPrim) =>
code"$evPrim = UTF8String.fromString($tf.formatWithoutTimeZoneNanos($c, ${t.precision}));"
case _: TimeType =>
val tf = JavaCode.global(
ctx.addReferenceObj("timeFormatter", timeFormatter),
Expand Down Expand Up @@ -306,13 +329,6 @@ trait ToStringBase { self: UnaryExpression with TimeZoneAwareExpression =>
(c, evPrim) => code"$evPrim = UTF8String.fromString($c.toPlainString());"
case _: StringType =>
(c, evPrim) => code"$evPrim = $c;"
// Fractional-second (nanosecond) timestamp formatting is not implemented yet: there is no
// TimestampFormatter for the nanos timestamp types. The interpreted path raises this via the
// Types Framework (castToString -> TypeApiOps.format); the codegen path has no framework
// hook, so it raises the same user-facing error directly until a formatter lands
// (SPARK-57207).
case _: TimestampNTZNanosType | _: TimestampLTZNanosType =>
throw DataTypeErrors.cannotConvertNanosTimestampToStringError(from)
case _ =>
(c, evPrim) => code"$evPrim = UTF8String.fromString(String.valueOf($c));"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,12 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
}

atomicTypes.foreach(dt => checkNullCast(NullType, dt))
(atomicTypes ++ timeTypes).foreach(dt => checkNullCast(dt, StringType))
val timestampNanosTypes = Seq(
TimestampLTZNanosType(TimestampLTZNanosType.MIN_PRECISION),
TimestampLTZNanosType(TimestampLTZNanosType.MAX_PRECISION),
TimestampNTZNanosType(TimestampNTZNanosType.MIN_PRECISION),
TimestampNTZNanosType(TimestampNTZNanosType.MAX_PRECISION))
(atomicTypes ++ timeTypes ++ timestampNanosTypes).foreach(dt => checkNullCast(dt, StringType))
checkNullCast(StringType, BinaryType)
checkNullCast(StringType, BooleanType)
numericTypes.foreach(dt => checkNullCast(dt, BooleanType))
Expand Down Expand Up @@ -1070,6 +1075,126 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
}
}

test("SPARK-57256: cast timestamp_ntz with nanosecond precision to string") {
// NTZ rendering uses the UTC wall-clock grid and is independent of the session time zone.
def ntz(ldt: LocalDateTime, precision: Int, zoneId: Option[String]): Cast =
cast(Literal.create(localDateTimeToNanosVal(ldt), TimestampNTZNanosType(precision)),
StringType, zoneId)

outstandingZoneIds.foreach { zid =>
val tz = Option(zid.getId)
val ldt = LocalDateTime.of(2020, 1, 1, 0, 0, 0, 123456789)
// Sub-precision digits are floored, then trailing zeros are trimmed.
checkEvaluation(ntz(ldt, 9, tz), "2020-01-01 00:00:00.123456789")
checkEvaluation(ntz(ldt, 8, tz), "2020-01-01 00:00:00.12345678")
checkEvaluation(ntz(ldt, 7, tz), "2020-01-01 00:00:00.1234567")

// nanosWithinMicro boundaries 0 and 999.
checkEvaluation(
ntz(LocalDateTime.of(2020, 1, 1, 0, 0, 0, 123456000), 9, tz),
"2020-01-01 00:00:00.123456")
checkEvaluation(
ntz(LocalDateTime.of(2020, 1, 1, 0, 0, 0, 123456999), 9, tz),
"2020-01-01 00:00:00.123456999")

// An all-zero fraction trims to no fractional part at all.
checkEvaluation(ntz(LocalDateTime.of(2020, 1, 1, 0, 0, 0, 0), 9, tz),
"2020-01-01 00:00:00")

// Pre-epoch and year-9999 boundaries.
checkEvaluation(
ntz(LocalDateTime.of(1969, 12, 31, 23, 59, 59, 123456789), 9, tz),
"1969-12-31 23:59:59.123456789")
checkEvaluation(
ntz(LocalDateTime.of(9999, 12, 31, 23, 59, 59, 999999999), 9, tz),
"9999-12-31 23:59:59.999999999")

// Null input.
checkEvaluation(
cast(Literal.create(null, TimestampNTZNanosType(9)), StringType, tz), null)
}
}

test("SPARK-57256: cast timestamp_ltz with nanosecond precision to string") {
// The physical value is an epoch instant (built here from a UTC wall clock); the string is
// rendered in the session time zone.
def ltz(ldt: LocalDateTime, precision: Int, zoneId: String): Cast =
cast(Literal.create(localDateTimeToNanosVal(ldt), TimestampLTZNanosType(precision)),
StringType, Option(zoneId))

val ldt = LocalDateTime.of(2020, 1, 1, 0, 0, 0, 123456789)
// UTC session zone: the wall clock matches the UTC instant. Sub-precision digits are floored
// and trailing zeros trimmed.
checkEvaluation(ltz(ldt, 9, "UTC"), "2020-01-01 00:00:00.123456789")
checkEvaluation(ltz(ldt, 8, "UTC"), "2020-01-01 00:00:00.12345678")
checkEvaluation(ltz(ldt, 7, "UTC"), "2020-01-01 00:00:00.1234567")

// A non-UTC session zone shifts the wall clock; the fractional second is unaffected.
checkEvaluation(ltz(ldt, 9, "America/Los_Angeles"), "2019-12-31 16:00:00.123456789")
checkEvaluation(ltz(ldt, 9, "Asia/Kolkata"), "2020-01-01 05:30:00.123456789")

// DST spring-forward boundary in America/Los_Angeles: 2020-03-08 02:00 PST -> 03:00 PDT.
// The UTC instant 10:00:00 lands at 03:00:00 PDT (UTC-7); fractional part is unaffected.
checkEvaluation(
ltz(LocalDateTime.of(2020, 3, 8, 10, 0, 0, 123456789), 9, "America/Los_Angeles"),
"2020-03-08 03:00:00.123456789")

// nanosWithinMicro boundaries 0 and 999 (under UTC).
checkEvaluation(
ltz(LocalDateTime.of(2020, 1, 1, 0, 0, 0, 123456000), 9, "UTC"),
"2020-01-01 00:00:00.123456")
checkEvaluation(
ltz(LocalDateTime.of(2020, 1, 1, 0, 0, 0, 123456999), 9, "UTC"),
"2020-01-01 00:00:00.123456999")

// An all-zero fraction trims to no fractional part at all.
checkEvaluation(ltz(LocalDateTime.of(2020, 1, 1, 0, 0, 0, 0), 9, "UTC"),
"2020-01-01 00:00:00")

// Pre-epoch and year-9999 boundaries (under UTC).
checkEvaluation(
ltz(LocalDateTime.of(1969, 12, 31, 23, 59, 59, 123456789), 9, "UTC"),
"1969-12-31 23:59:59.123456789")
checkEvaluation(
ltz(LocalDateTime.of(9999, 12, 31, 23, 59, 59, 999999999), 9, "UTC"),
"9999-12-31 23:59:59.999999999")

// Null input.
checkEvaluation(
cast(Literal.create(null, TimestampLTZNanosType(9)), StringType, UTC_OPT), null)
}

test("SPARK-57256: cast complex types with nanosecond timestamps to string") {
val ntzElem = Literal.create(
localDateTimeToNanosVal(LocalDateTime.of(2020, 1, 1, 0, 0, 0, 123456789)),
TimestampNTZNanosType(9))
val ltzElem = Literal.create(
localDateTimeToNanosVal(LocalDateTime.of(2020, 1, 1, 0, 0, 0, 123456789)),
TimestampLTZNanosType(9))

// array<timestamp_ntz_nanos> with a null element exercises the recursive element path
// (including nullString) in ToStringBase. NTZ is independent of the session time zone.
checkEvaluation(
cast(CreateArray(Seq(ntzElem, Literal.create(null, TimestampNTZNanosType(9)))), StringType),
"[2020-01-01 00:00:00.123456789, null]")

// array<timestamp_ltz_nanos> is rendered in the session time zone (here UTC).
checkEvaluation(
cast(
CreateArray(Seq(ltzElem, Literal.create(null, TimestampLTZNanosType(9)))),
StringType,
UTC_OPT),
"[2020-01-01 00:00:00.123456789, null]")

// A struct nesting both nanosecond timestamp variants.
checkEvaluation(
cast(
CreateNamedStruct(Seq(Literal("ntz"), ntzElem, Literal("ltz"), ltzElem)),
StringType,
UTC_OPT),
"{2020-01-01 00:00:00.123456789, 2020-01-01 00:00:00.123456789}")
}

test("SPARK-35112: Cast string to day-time interval") {
checkEvaluation(cast(Literal.create("0 0:0:0"), DayTimeIntervalType()), 0L)
checkEvaluation(cast(Literal.create(" interval '0 0:0:0' Day TO second "),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.{SparkFunSuite, SparkUnsupportedOperationException}
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
import org.apache.spark.sql.catalyst.util.GenericArrayData
Expand Down Expand Up @@ -184,21 +184,6 @@ class TimestampNanosRowSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(Literal.create(null, TimestampLTZNanosType(7)), null)
}

// Fractional-second formatting is not implemented yet, so CAST(nanos AS STRING) raises the
// user-facing UNSUPPORTED_FEATURE.TIMESTAMP_NANOS_TO_STRING error. Both the interpreted
// (ToStringBase.castToString -> TypeApiOps.format) and codegen (ToStringBase.castToStringCode)
// paths must fail the same way (SPARK-57207).
test("CAST nanos timestamp to STRING raises an unsupported-feature error in both eval modes") {
Seq(
Literal.create(ntzValue, TimestampNTZNanosType(9)),
Literal.create(ltzValue, TimestampLTZNanosType(7))).foreach { lit =>
checkErrorInExpression[SparkUnsupportedOperationException](
Cast(lit, StringType),
condition = "UNSUPPORTED_FEATURE.TIMESTAMP_NANOS_TO_STRING",
parameters = Map("dataType" -> ("\"" + lit.dataType.sql + "\"")))
}
}

testBothCodegenAndInterpreted("UnsafeRow handles extreme epoch micros for nanos") {
val fieldTypes: Array[DataType] = Array(TimestampNTZNanosType(9))
val converter = UnsafeProjection.create(fieldTypes)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.UTC_OPT
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.{UTF8String, VariantVal}
import org.apache.spark.unsafe.types.{TimestampNanosVal, UTF8String, VariantVal}

class ToPrettyStringSuite extends SparkFunSuite with ExpressionEvalHelper {

Expand Down Expand Up @@ -135,6 +135,28 @@ class ToPrettyStringSuite extends SparkFunSuite with ExpressionEvalHelper {
assert(prettyString.sql === child.sql)
}

test("SPARK-57256: TimestampNTZNanos as pretty strings") {
def ntzNanos(micros: Long, nanos: Short): Expression =
ToPrettyString(Literal.create(new TimestampNanosVal(micros, nanos), TimestampNTZNanosType(9)))
checkEvaluation(ntzNanos(0L, 1), "1970-01-01 00:00:00.000000001")
checkEvaluation(ntzNanos(1L, 0), "1970-01-01 00:00:00.000001")
checkEvaluation(
ToPrettyString(Literal.create(TimestampNanosVal.ZERO, TimestampNTZNanosType(9))),
"1970-01-01 00:00:00")
}

test("SPARK-57256: TimestampLTZNanos as pretty strings") {
def ltzNanos(micros: Long, nanos: Short): Expression =
ToPrettyString(Literal.create(new TimestampNanosVal(micros, nanos), TimestampLTZNanosType(9)))
withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
checkEvaluation(ltzNanos(0L, 1), "1970-01-01 00:00:00.000000001")
checkEvaluation(ltzNanos(1L, 0), "1970-01-01 00:00:00.000001")
checkEvaluation(
ToPrettyString(Literal.create(TimestampNanosVal.ZERO, TimestampLTZNanosType(9))),
"1970-01-01 00:00:00")
}
}

test("Time as pretty strings") {
checkEvaluation(ToPrettyString(Literal(1000 * 1000L, TimeType())), "00:00:00.001")
checkEvaluation(ToPrettyString(Literal(1000L, TimeType())), "00:00:00.000001")
Expand Down
Loading