Skip to content

Commit 293b19b

Browse files
committed
feat: add parse_datetime_tz with UTC normalization
Introduces parse_datetime_tz() which accepts an optional timezone offset ("+HH:MM", "+HH", "-HH:MM", "-HH", or "Z") and normalizes the resulting microseconds-since-epoch to UTC by subtracting the offset. When no offset is present, the behavior matches parse_datetime(). Adds 7 tests covering positive/negative offsets, short "+HH" form (as returned by PostgreSQL), the ISO-8601 'Z' marker, fractional seconds combined with an offset, and the no-offset fallback.
1 parent 93148f8 commit 293b19b

3 files changed

Lines changed: 117 additions & 0 deletions

File tree

include/sql_engine/datetime_parse.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,14 @@ int32_t parse_date(const char* s);
1313
// Parse "YYYY-MM-DD HH:MM:SS[.uuuuuu]" to microseconds since epoch.
1414
int64_t parse_datetime(const char* s);
1515

16+
// Parse a datetime string that MAY include a timezone offset.
17+
// Accepts: "YYYY-MM-DD HH:MM:SS[.UUUUUU][+HH:MM|+HH|Z|-HH:MM|-HH]"
18+
// If a timezone offset is present, normalizes the timestamp to UTC
19+
// (by subtracting the offset).
20+
// If no timezone is present, behaves identically to parse_datetime().
21+
// Returns microseconds since UTC epoch 1970-01-01 00:00:00.
22+
int64_t parse_datetime_tz(const char* s);
23+
1624
// Parse "HH:MM:SS[.uuuuuu]" to microseconds since midnight.
1725
int64_t parse_time(const char* s);
1826

src/sql_engine/datetime_parse.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,63 @@ int64_t parse_datetime(const char* s) {
106106
return us;
107107
}
108108

109+
// Parse an optional timezone offset at position `s`. Advances `s` past the offset.
110+
// Returns the offset in microseconds (positive for east of UTC, negative for west).
111+
// Recognized formats: "Z", "+HH", "+HH:MM", "-HH", "-HH:MM" (also without colon).
112+
// Returns 0 if no offset found (does not advance `s`).
113+
static int64_t parse_tz_offset_us(const char*& s) {
114+
if (!s || !*s) return 0;
115+
if (*s == 'Z' || *s == 'z') {
116+
++s;
117+
return 0;
118+
}
119+
if (*s != '+' && *s != '-') return 0;
120+
int sign = (*s == '-') ? -1 : 1;
121+
++s;
122+
int hours = parse_int(s, 2);
123+
int minutes = 0;
124+
if (*s == ':') {
125+
++s;
126+
minutes = parse_int(s, 2);
127+
} else if (*s >= '0' && *s <= '9') {
128+
minutes = parse_int(s, 2);
129+
}
130+
int64_t offset_us = (static_cast<int64_t>(hours) * 3600LL
131+
+ static_cast<int64_t>(minutes) * 60LL)
132+
* 1000000LL;
133+
return sign * offset_us;
134+
}
135+
136+
int64_t parse_datetime_tz(const char* s) {
137+
if (!s || !*s) return 0;
138+
const char* p = s;
139+
int year = parse_int(p, 4);
140+
if (*p == '-') ++p;
141+
int month = parse_int(p, 2);
142+
if (*p == '-') ++p;
143+
int day = parse_int(p, 2);
144+
if (*p == ' ' || *p == 'T') ++p;
145+
int hour = parse_int(p, 2);
146+
if (*p == ':') ++p;
147+
int minute = parse_int(p, 2);
148+
if (*p == ':') ++p;
149+
int second = parse_int(p, 2);
150+
int64_t frac = parse_frac_us(p);
151+
152+
int32_t days = days_since_epoch(year, month, day);
153+
int64_t us = static_cast<int64_t>(days) * 86400LL * 1000000LL
154+
+ static_cast<int64_t>(hour) * 3600LL * 1000000LL
155+
+ static_cast<int64_t>(minute) * 60LL * 1000000LL
156+
+ static_cast<int64_t>(second) * 1000000LL
157+
+ frac;
158+
159+
// Parse optional timezone offset and normalize to UTC.
160+
// "2024-06-15 14:30:00+05:30" means 14:30 local with offset +05:30.
161+
// UTC = local - offset. So for positive offset (east of UTC), subtract.
162+
int64_t tz_us = parse_tz_offset_us(p);
163+
return us - tz_us;
164+
}
165+
109166
int64_t parse_time(const char* s) {
110167
if (!s || !*s) return 0;
111168
bool negative = false;

tests/test_datetime_format.cpp

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,3 +164,55 @@ TEST(FormatDatetime, DateBeforeEpoch) {
164164
size_t n = format_datetime(us, buf, sizeof(buf));
165165
EXPECT_EQ(std::string(buf, n), "1969-07-20 20:17:40");
166166
}
167+
168+
// ----- parse_datetime_tz timezone normalization -----
169+
170+
TEST(DatetimeParseTimezoneTest, ParseDatetimeWithPositiveOffset) {
171+
// "2024-06-15 14:30:00+05:30" -> normalized to UTC: 09:00:00 same day
172+
int64_t us = parse_datetime_tz("2024-06-15 14:30:00+05:30");
173+
int64_t expected_us = parse_datetime("2024-06-15 09:00:00");
174+
EXPECT_EQ(us, expected_us);
175+
}
176+
177+
TEST(DatetimeParseTimezoneTest, ParseDatetimeWithNegativeOffset) {
178+
// "2024-06-15 14:30:00-08:00" -> UTC: 22:30:00 same day
179+
int64_t us = parse_datetime_tz("2024-06-15 14:30:00-08:00");
180+
int64_t expected_us = parse_datetime("2024-06-15 22:30:00");
181+
EXPECT_EQ(us, expected_us);
182+
}
183+
184+
TEST(DatetimeParseTimezoneTest, ParseDatetimeWithZOffset) {
185+
// 'Z' is ISO-8601 UTC marker
186+
int64_t us = parse_datetime_tz("2024-06-15 14:30:00Z");
187+
int64_t expected_us = parse_datetime("2024-06-15 14:30:00");
188+
EXPECT_EQ(us, expected_us);
189+
}
190+
191+
TEST(DatetimeParseTimezoneTest, ParseDatetimeWithNoOffsetSameAsPlain) {
192+
int64_t us_tz = parse_datetime_tz("2024-06-15 14:30:00");
193+
int64_t us_plain = parse_datetime("2024-06-15 14:30:00");
194+
EXPECT_EQ(us_tz, us_plain);
195+
}
196+
197+
TEST(DatetimeParseTimezoneTest, ParseDatetimeWithZeroOffsetEquivalents) {
198+
int64_t us1 = parse_datetime_tz("2024-06-15 14:30:00+00:00");
199+
int64_t us2 = parse_datetime_tz("2024-06-15 14:30:00+00");
200+
int64_t us3 = parse_datetime_tz("2024-06-15 14:30:00Z");
201+
EXPECT_EQ(us1, us2);
202+
EXPECT_EQ(us2, us3);
203+
}
204+
205+
TEST(DatetimeParseTimezoneTest, ParseDatetimeWithFractionalAndTZ) {
206+
// "2024-06-15 14:30:00.123456+05:30" -> 09:00:00.123456 UTC
207+
int64_t us = parse_datetime_tz("2024-06-15 14:30:00.123456+05:30");
208+
int64_t expected_us = parse_datetime("2024-06-15 09:00:00.123456");
209+
EXPECT_EQ(us, expected_us);
210+
}
211+
212+
TEST(DatetimeParseTimezoneTest, ParseDatetimeShortOffsetPostgres) {
213+
// PostgreSQL often returns just "+05" without minutes
214+
// "14:30:00+05" -> 09:30:00 UTC
215+
int64_t us = parse_datetime_tz("2024-06-15 14:30:00+05");
216+
int64_t expected_us = parse_datetime("2024-06-15 09:30:00");
217+
EXPECT_EQ(us, expected_us);
218+
}

0 commit comments

Comments
 (0)