This is an automated email from the ASF dual-hosted git repository.
xiangfu0 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new d1d9d6e4aa1 Handle DST spring-forward gaps in fromDateTime() scalar
(#18311)
d1d9d6e4aa1 is described below
commit d1d9d6e4aa16ac1d75e4ceb22a489b8c7986ef51
Author: Yash Mayya <[email protected]>
AuthorDate: Thu Apr 23 15:45:12 2026 -0700
Handle DST spring-forward gaps in fromDateTime() scalar (#18311)
---
.../common/function/DateTimePatternHandler.java | 68 +++++++--
.../function/DateTimePatternHandlerTest.java | 156 +++++++++++++++++++++
.../core/data/function/DateTimeFunctionsTest.java | 22 +++
3 files changed, 236 insertions(+), 10 deletions(-)
diff --git
a/pinot-common/src/main/java/org/apache/pinot/common/function/DateTimePatternHandler.java
b/pinot-common/src/main/java/org/apache/pinot/common/function/DateTimePatternHandler.java
index ab563e6080c..5223520c794 100644
---
a/pinot-common/src/main/java/org/apache/pinot/common/function/DateTimePatternHandler.java
+++
b/pinot-common/src/main/java/org/apache/pinot/common/function/DateTimePatternHandler.java
@@ -19,6 +19,7 @@
package org.apache.pinot.common.function;
import org.joda.time.DateTimeZone;
+import org.joda.time.IllegalInstantException;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
@@ -34,16 +35,14 @@ public class DateTimePatternHandler {
* Converts the dateTimeString of passed pattern into a long of the millis
since epoch
*/
public static long parseDateTimeStringToEpochMillis(String dateTimeString,
String pattern) {
- DateTimeFormatter dateTimeFormatter = getDateTimeFormatter(pattern);
- return dateTimeFormatter.parseMillis(dateTimeString);
+ return parseWithDstGapFallback(dateTimeString, pattern, DateTimeZone.UTC);
}
/**
* Converts the dateTimeString of passed pattern into a long of the millis
since epoch
*/
public static long parseDateTimeStringToEpochMillis(String dateTimeString,
String pattern, String timezoneId) {
- DateTimeFormatter dateTimeFormatter = getDateTimeFormatter(pattern,
timezoneId);
- return dateTimeFormatter.parseMillis(dateTimeString);
+ return parseWithDstGapFallback(dateTimeString, pattern,
DateTimeZone.forID(timezoneId));
}
/**
@@ -52,8 +51,7 @@ public class DateTimePatternHandler {
public static long parseDateTimeStringToEpochMillis(String dateTimeString,
String pattern, String timezoneId,
long defaultVal) {
try {
- DateTimeFormatter dateTimeFormatter = getDateTimeFormatter(pattern,
timezoneId);
- return dateTimeFormatter.parseMillis(dateTimeString);
+ return parseWithDstGapFallback(dateTimeString, pattern,
DateTimeZone.forID(timezoneId));
} catch (Exception e) {
return defaultVal;
}
@@ -63,16 +61,66 @@ public class DateTimePatternHandler {
* Converts the millis representing seconds since epoch into a string of
passed pattern
*/
public static String parseEpochMillisToDateTimeString(long millis, String
pattern) {
- DateTimeFormatter dateTimeFormatter = getDateTimeFormatter(pattern);
- return dateTimeFormatter.print(millis);
+ return getDateTimeFormatter(pattern).print(millis);
}
/**
* Converts the millis representing seconds since epoch into a string of
passed pattern and time zone id
*/
public static String parseEpochMillisToDateTimeString(long millis, String
pattern, String timezoneId) {
- DateTimeFormatter dateTimeFormatter = getDateTimeFormatter(pattern,
timezoneId);
- return dateTimeFormatter.print(millis);
+ return getDateTimeFormatter(pattern, timezoneId).print(millis);
+ }
+
+ /**
+ * Parses {@code dateTimeString} as an epoch-millis instant in {@code zone}.
+ *
+ * <p>On a DST spring-forward transition the local wall-clock time being
parsed does not exist (e.g. midnight
+ * of 2026-04-24 in {@code Africa/Cairo}). Joda strict parsing throws {@link
IllegalInstantException} there,
+ * which turns a perfectly valid calendar input into a user-facing parse
failure. This method catches that
+ * specific exception, re-parses the wall-clock time as if it were in UTC,
and then uses
+ * {@link DateTimeZone#convertLocalToUTC(long, boolean)} with {@code
strict=false} to shift the instant
+ * forward past the gap to the next valid wall-clock time in {@code zone}.
All other parse errors
+ * (unparseable input, out-of-range fields) continue to propagate so typos
still surface.
+ *
+ * <p>This catch is narrow by design. In Joda's strict parsing path, {@code
IllegalInstantException} is
+ * only thrown for non-existent local times (spring-forward gaps). Ambiguous
times during a fall-back
+ * transition are not affected — Joda already resolves those to a
deterministic offset without throwing.
+ *
+ * <p>Strict-first (rather than always routing through the fallback) is
deliberate. Joda handles the
+ * four timezone pattern tokens differently inside {@code
DateTimeParserBucket.computeMillis}:
+ * <ul>
+ * <li>{@code Z} ({@code -0800}) and {@code ZZ} ({@code -08:00}) populate
{@code iOffset}, which
+ * short-circuits the gap check entirely — strict parsing of such
inputs never throws
+ * {@code IllegalInstantException}.</li>
+ * <li>{@code ZZZ} ({@code America/Los_Angeles}) and {@code z} overwrite
the bucket's {@code iZone},
+ * so the gap check runs against the <em>parsed</em> zone, not the
formatter's {@code withZone}
+ * argument. For the common case where the parsed zone has no gap at
the parsed local time, strict
+ * parsing succeeds and the fallback is never entered.</li>
+ * </ul>
+ * If we always routed through the fallback path, a successful {@code
withZoneUTC().parseMillis(...)}
+ * call on a {@code Z}/{@code ZZ} input would yield a real UTC instant;
passing that value to
+ * {@link DateTimeZone#convertLocalToUTC(long, boolean)} would then
reinterpret it as a local wall-clock
+ * value in {@code zone} and produce a doubly-adjusted, incorrect result.
Entering the fallback only when
+ * strict parsing throws {@code IllegalInstantException} guarantees that
either (a) the input carried no
+ * explicit zone/offset token, so reinterpreting the parsed value as
local-in-{@code zone} is safe, or
+ * (b) the input carried a {@code ZZZ}/{@code z} token whose parsed zone
itself lay in a gap, in which
+ * case the fallback re-parse will throw {@code IllegalInstantException}
again and propagate unchanged.
+ * Out-of-range field validation (e.g. month 13, day 32) is not a
differentiator between the two paths:
+ * both paths use strict ISO field-range parsing and reject such inputs
identically.
+ *
+ * <p>The lenient fallback is agnostic to gap size. A 30-minute
spring-forward (e.g. Australia/Lord_Howe)
+ * resolves to the first valid instant 30 minutes after the strict boundary,
and an unusual day-length
+ * change — e.g. Pacific/Apia on 2011-12-30, a date that was skipped
entirely when Samoa crossed the
+ * International Date Line — resolves to the first valid instant on the next
calendar day.
+ */
+ private static long parseWithDstGapFallback(String dateTimeString, String
pattern, DateTimeZone zone) {
+ DateTimeFormatter formatter = DateTimeFormat.forPattern(pattern);
+ try {
+ return formatter.withZone(zone).parseMillis(dateTimeString);
+ } catch (IllegalInstantException e) {
+ long localAsUtcMillis =
formatter.withZoneUTC().parseMillis(dateTimeString);
+ return zone.convertLocalToUTC(localAsUtcMillis, false);
+ }
}
private static DateTimeFormatter getDateTimeFormatter(String pattern, String
timezoneId) {
diff --git
a/pinot-common/src/test/java/org/apache/pinot/common/function/DateTimePatternHandlerTest.java
b/pinot-common/src/test/java/org/apache/pinot/common/function/DateTimePatternHandlerTest.java
new file mode 100644
index 00000000000..e7c2aad0101
--- /dev/null
+++
b/pinot-common/src/test/java/org/apache/pinot/common/function/DateTimePatternHandlerTest.java
@@ -0,0 +1,156 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.common.function;
+
+import org.joda.time.DateTime;
+import org.joda.time.DateTimeZone;
+import org.joda.time.IllegalFieldValueException;
+import org.joda.time.IllegalInstantException;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertThrows;
+
+
+/**
+ * Tests for {@link DateTimePatternHandler}, with particular focus on DST
spring-forward gaps where the
+ * requested local wall-clock time does not exist in the target zone.
+ *
+ * <p>Gap dates chosen here are all historical facts already frozen in the
tzdata database (Cairo 2010-04-30,
+ * Santiago 2024-09-08, Lord_Howe 2010-10-03) so these tests are not fragile
against future tzdata releases
+ * that might, for example, drop DST from a country that currently observes it.
+ */
+public class DateTimePatternHandlerTest {
+
+ // Cairo sprang forward at 00:00 local on 2010-04-30 (last Friday of April),
so 00:00 did not exist that
+ // day; the first valid wall-clock instant is 01:00 EEST (UTC+3). Parsing a
date-only string should land
+ // on that first valid instant rather than throwing.
+ @Test
+ public void testDstGapAtMidnightCairo() {
+ long actual = DateTimePatternHandler.parseDateTimeStringToEpochMillis(
+ "2010-04-30", "yyyy-MM-dd", "Africa/Cairo");
+ long expected = new DateTime(2010, 4, 30, 1, 0, 0, 0,
DateTimeZone.forID("Africa/Cairo")).getMillis();
+ assertEquals(actual, expected);
+ }
+
+ // Same semantics when an explicit gap-local time is given: the parser
should shift forward past the gap.
+ @Test
+ public void testDstGapExplicitGapTimeCairo() {
+ long actual = DateTimePatternHandler.parseDateTimeStringToEpochMillis(
+ "2010-04-30 00:30:00", "yyyy-MM-dd HH:mm:ss", "Africa/Cairo");
+ // 00:30 lies inside the gap; lenient resolution shifts forward by the gap
size (1h) -> 01:30 EEST.
+ long expected = new DateTime(2010, 4, 30, 1, 30, 0, 0,
DateTimeZone.forID("Africa/Cairo")).getMillis();
+ assertEquals(actual, expected);
+ }
+
+ // The default-value overload is currently silent on DST gaps: it returns
the default and hides data. After the
+ // fix, a DST gap should resolve to a valid instant; the default is only
used when the input is truly malformed.
+ @Test
+ public void testDstGapDoesNotFallBackToDefault() {
+ long actual = DateTimePatternHandler.parseDateTimeStringToEpochMillis(
+ "2010-04-30", "yyyy-MM-dd", "Africa/Cairo", -1L);
+ long expected = new DateTime(2010, 4, 30, 1, 0, 0, 0,
DateTimeZone.forID("Africa/Cairo")).getMillis();
+ assertEquals(actual, expected);
+ }
+
+ // Genuinely unparseable input should still fall through to the default
value.
+ @Test
+ public void testUnparseableStillReturnsDefault() {
+ long actual = DateTimePatternHandler.parseDateTimeStringToEpochMillis(
+ "not-a-date", "yyyy-MM-dd", "UTC", -1L);
+ assertEquals(actual, -1L);
+ }
+
+ // We only want to be lenient about DST gaps; other field-range errors must
remain strict so that typos
+ // (e.g. month 13) continue to surface rather than silently rolling into the
next year.
+ @Test
+ public void testOutOfRangeFieldStillThrows() {
+ assertThrows(IllegalFieldValueException.class, () ->
DateTimePatternHandler.parseDateTimeStringToEpochMillis(
+ "2026-13-05", "yyyy-MM-dd", "UTC"));
+ }
+
+ // Ordinary inputs in a zone that has DST must be unaffected by the fallback
logic.
+ @Test
+ public void testNonGapDateInDstZoneUnchanged() {
+ long actual = DateTimePatternHandler.parseDateTimeStringToEpochMillis(
+ "2026-05-15 12:00:00", "yyyy-MM-dd HH:mm:ss", "Africa/Cairo");
+ long expected = new DateTime(2026, 5, 15, 12, 0, 0, 0,
DateTimeZone.forID("Africa/Cairo")).getMillis();
+ assertEquals(actual, expected);
+ }
+
+ // UTC has no DST, so behavior must be identical to before.
+ @Test
+ public void testUtcUnaffected() {
+ long actual = DateTimePatternHandler.parseDateTimeStringToEpochMillis(
+ "2026-04-24 00:00:00", "yyyy-MM-dd HH:mm:ss");
+ long expected = new DateTime(2026, 4, 24, 0, 0, 0, 0,
DateTimeZone.UTC).getMillis();
+ assertEquals(actual, expected);
+ }
+
+ // Exercises the Western-hemisphere branch of
DateTimeZone.convertLocalToUTC(strict=false). Santiago
+ // springs forward at 00:00 local on the first Sunday of September
(2024-09-08); the fallback resolution
+ // for a negative pre-transition offset picks the post-transition offset so
the returned instant lands at
+ // 01:00 -03:00, i.e. the first valid wall-clock time after the gap. Without
this test, the fix is only
+ // exercised on Eastern-hemisphere zones (e.g. Cairo) and the Western branch
of convertLocalToUTC is
+ // uncovered.
+ @Test
+ public void testDstGapWesternHemisphereSantiago() {
+ long actual = DateTimePatternHandler.parseDateTimeStringToEpochMillis(
+ "2024-09-08", "yyyy-MM-dd", "America/Santiago");
+ long expected =
+ new DateTime(2024, 9, 8, 1, 0, 0, 0,
DateTimeZone.forID("America/Santiago")).getMillis();
+ assertEquals(actual, expected);
+ }
+
+ // Regression guard for the strict-first ordering. If a future refactor
accidentally routes all inputs
+ // through the lenient fallback, a pattern with an explicit offset token (Z)
would be parsed in UTC and
+ // then re-adjusted by convertLocalToUTC as if it were a local wall-clock
time in `zone`, producing a
+ // doubly-adjusted (wrong) result. The expected value here is the real UTC
instant encoded by the input
+ // offset, independent of the `zone` argument.
+ @Test
+ public void testExplicitOffsetTokenNotDoubleAdjustedByFallback() {
+ long actual = DateTimePatternHandler.parseDateTimeStringToEpochMillis(
+ "2024-01-01T12:00:00+0500", "yyyy-MM-dd'T'HH:mm:ssZ", "Africa/Cairo");
+ long expected = new DateTime(2024, 1, 1, 7, 0, 0, 0,
DateTimeZone.UTC).getMillis();
+ assertEquals(actual, expected);
+ }
+
+ // Lord Howe Island uses a 30-minute DST shift (02:00 -> 02:30 local on the
first Sunday of October). The
+ // fallback is agnostic to gap size, so a half-hour gap must be handled the
same way as a one-hour gap.
+ // 2010-10-03 is a historical transition, safe from tzdata drift.
+ @Test
+ public void testHalfHourDstGapLordHowe() {
+ long actual = DateTimePatternHandler.parseDateTimeStringToEpochMillis(
+ "2010-10-03 02:15:00", "yyyy-MM-dd HH:mm:ss", "Australia/Lord_Howe");
+ // 02:15 lies inside the 30-minute gap; lenient resolution shifts forward
by 30 minutes to 02:45 +11:00.
+ long expected =
+ new DateTime(2010, 10, 3, 2, 45, 0, 0,
DateTimeZone.forID("Australia/Lord_Howe")).getMillis();
+ assertEquals(actual, expected);
+ }
+
+ // Pins the Javadoc claim that when a ZZZ/z token in the input encodes a
zone whose wall-clock time is
+ // itself in a gap, the fallback re-parse throws IllegalInstantException
again and the exception
+ // propagates unchanged. Without this test, a future refactor that quietly
broadens what the fallback
+ // swallows could silently accept a genuinely broken input.
+ @Test
+ public void testParsedZoneTokenWhoseZoneIsInGapStillThrows() {
+ assertThrows(IllegalInstantException.class, () ->
DateTimePatternHandler.parseDateTimeStringToEpochMillis(
+ "2010-04-30 00:30:00 Africa/Cairo", "yyyy-MM-dd HH:mm:ss ZZZ", "UTC"));
+ }
+}
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/data/function/DateTimeFunctionsTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/data/function/DateTimeFunctionsTest.java
index b75cd5e7ba3..aa340186bd5 100644
---
a/pinot-core/src/test/java/org/apache/pinot/core/data/function/DateTimeFunctionsTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/core/data/function/DateTimeFunctionsTest.java
@@ -309,6 +309,28 @@ public class DateTimeFunctionsTest {
row114, -1L
});
+ // fromDateTime on a DST spring-forward gap: 2010-04-30 00:00 did not
exist in Africa/Cairo because the
+ // clock sprang forward to 01:00. Instead of throwing, the scalar should
shift forward past the gap and
+ // return the first valid instant (01:00 EEST). Using a historical date
keeps this test stable against
+ // future tzdata updates.
+ GenericRow row115 = new GenericRow();
+ row115.putValue("dateTime", "2010-04-30");
+ long cairoDstGapExpected =
+ new DateTime(2010, 4, 30, 1, 0, 0, 0,
DateTimeZone.forID("Africa/Cairo")).getMillis();
+ inputs.add(new Object[]{
+ "fromDateTime(dateTime, 'yyyy-MM-dd', 'Africa/Cairo')",
Lists.newArrayList("dateTime"), row115,
+ cairoDstGapExpected
+ });
+
+ // Same DST gap through the defaultVal overload: the fix resolves a valid
instant rather than silently
+ // falling through to the default sentinel.
+ GenericRow row116 = new GenericRow();
+ row116.putValue("dateTime", "2010-04-30");
+ inputs.add(new Object[]{
+ "fromDateTime(dateTime, 'yyyy-MM-dd', 'Africa/Cairo', -1)",
Lists.newArrayList("dateTime"), row116,
+ cairoDstGapExpected
+ });
+
// timezone_hour and timezone_minute
List<String> expectedArguments = Collections.singletonList("tz");
GenericRow row120 = new GenericRow();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]