This is an automated email from the ASF dual-hosted git repository.

xiangfu0 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new d1d9d6e4aa1 Handle DST spring-forward gaps in fromDateTime() scalar 
(#18311)
d1d9d6e4aa1 is described below

commit d1d9d6e4aa16ac1d75e4ceb22a489b8c7986ef51
Author: Yash Mayya <[email protected]>
AuthorDate: Thu Apr 23 15:45:12 2026 -0700

    Handle DST spring-forward gaps in fromDateTime() scalar (#18311)
---
 .../common/function/DateTimePatternHandler.java    |  68 +++++++--
 .../function/DateTimePatternHandlerTest.java       | 156 +++++++++++++++++++++
 .../core/data/function/DateTimeFunctionsTest.java  |  22 +++
 3 files changed, 236 insertions(+), 10 deletions(-)

diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/function/DateTimePatternHandler.java
 
b/pinot-common/src/main/java/org/apache/pinot/common/function/DateTimePatternHandler.java
index ab563e6080c..5223520c794 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/function/DateTimePatternHandler.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/function/DateTimePatternHandler.java
@@ -19,6 +19,7 @@
 package org.apache.pinot.common.function;
 
 import org.joda.time.DateTimeZone;
+import org.joda.time.IllegalInstantException;
 import org.joda.time.format.DateTimeFormat;
 import org.joda.time.format.DateTimeFormatter;
 
@@ -34,16 +35,14 @@ public class DateTimePatternHandler {
    * Converts the dateTimeString of passed pattern into a long of the millis 
since epoch
    */
   public static long parseDateTimeStringToEpochMillis(String dateTimeString, 
String pattern) {
-    DateTimeFormatter dateTimeFormatter = getDateTimeFormatter(pattern);
-    return dateTimeFormatter.parseMillis(dateTimeString);
+    return parseWithDstGapFallback(dateTimeString, pattern, DateTimeZone.UTC);
   }
 
   /**
    * Converts the dateTimeString of passed pattern into a long of the millis 
since epoch
    */
   public static long parseDateTimeStringToEpochMillis(String dateTimeString, 
String pattern, String timezoneId) {
-    DateTimeFormatter dateTimeFormatter = getDateTimeFormatter(pattern, 
timezoneId);
-    return dateTimeFormatter.parseMillis(dateTimeString);
+    return parseWithDstGapFallback(dateTimeString, pattern, 
DateTimeZone.forID(timezoneId));
   }
 
   /**
@@ -52,8 +51,7 @@ public class DateTimePatternHandler {
   public static long parseDateTimeStringToEpochMillis(String dateTimeString, 
String pattern, String timezoneId,
       long defaultVal) {
     try {
-      DateTimeFormatter dateTimeFormatter = getDateTimeFormatter(pattern, 
timezoneId);
-      return dateTimeFormatter.parseMillis(dateTimeString);
+      return parseWithDstGapFallback(dateTimeString, pattern, 
DateTimeZone.forID(timezoneId));
     } catch (Exception e) {
       return defaultVal;
     }
@@ -63,16 +61,66 @@ public class DateTimePatternHandler {
    * Converts the millis representing seconds since epoch into a string of 
passed pattern
    */
   public static String parseEpochMillisToDateTimeString(long millis, String 
pattern) {
-    DateTimeFormatter dateTimeFormatter = getDateTimeFormatter(pattern);
-    return dateTimeFormatter.print(millis);
+    return getDateTimeFormatter(pattern).print(millis);
   }
 
   /**
    * Converts the millis representing seconds since epoch into a string of 
passed pattern and time zone id
    */
   public static String parseEpochMillisToDateTimeString(long millis, String 
pattern, String timezoneId) {
-    DateTimeFormatter dateTimeFormatter = getDateTimeFormatter(pattern, 
timezoneId);
-    return dateTimeFormatter.print(millis);
+    return getDateTimeFormatter(pattern, timezoneId).print(millis);
+  }
+
+  /**
+   * Parses {@code dateTimeString} as an epoch-millis instant in {@code zone}.
+   *
+   * <p>On a DST spring-forward transition the local wall-clock time being 
parsed does not exist (e.g. midnight
+   * of 2026-04-24 in {@code Africa/Cairo}). Joda strict parsing throws {@link 
IllegalInstantException} there,
+   * which turns a perfectly valid calendar input into a user-facing parse 
failure. This method catches that
+   * specific exception, re-parses the wall-clock time as if it were in UTC, 
and then uses
+   * {@link DateTimeZone#convertLocalToUTC(long, boolean)} with {@code 
strict=false} to shift the instant
+   * forward past the gap to the next valid wall-clock time in {@code zone}. 
All other parse errors
+   * (unparseable input, out-of-range fields) continue to propagate so typos 
still surface.
+   *
+   * <p>This catch is narrow by design. In Joda's strict parsing path, {@code 
IllegalInstantException} is
+   * only thrown for non-existent local times (spring-forward gaps). Ambiguous 
times during a fall-back
+   * transition are not affected — Joda already resolves those to a 
deterministic offset without throwing.
+   *
+   * <p>Strict-first (rather than always routing through the fallback) is 
deliberate. Joda handles the
+   * four timezone pattern tokens differently inside {@code 
DateTimeParserBucket.computeMillis}:
+   * <ul>
+   *   <li>{@code Z} ({@code -0800}) and {@code ZZ} ({@code -08:00}) populate 
{@code iOffset}, which
+   *       short-circuits the gap check entirely — strict parsing of such 
inputs never throws
+   *       {@code IllegalInstantException}.</li>
+   *   <li>{@code ZZZ} ({@code America/Los_Angeles}) and {@code z} overwrite 
the bucket's {@code iZone},
+   *       so the gap check runs against the <em>parsed</em> zone, not the 
formatter's {@code withZone}
+   *       argument. For the common case where the parsed zone has no gap at 
the parsed local time, strict
+   *       parsing succeeds and the fallback is never entered.</li>
+   * </ul>
+   * If we always routed through the fallback path, a successful {@code 
withZoneUTC().parseMillis(...)}
+   * call on a {@code Z}/{@code ZZ} input would yield a real UTC instant; 
passing that value to
+   * {@link DateTimeZone#convertLocalToUTC(long, boolean)} would then 
reinterpret it as a local wall-clock
+   * value in {@code zone} and produce a doubly-adjusted, incorrect result. 
Entering the fallback only when
+   * strict parsing throws {@code IllegalInstantException} guarantees that 
either (a) the input carried no
+   * explicit zone/offset token, so reinterpreting the parsed value as 
local-in-{@code zone} is safe, or
+   * (b) the input carried a {@code ZZZ}/{@code z} token whose parsed zone 
itself lay in a gap, in which
+   * case the fallback re-parse will throw {@code IllegalInstantException} 
again and propagate unchanged.
+   * Out-of-range field validation (e.g. month 13, day 32) is not a 
differentiator between the two paths:
+   * both paths use strict ISO field-range parsing and reject such inputs 
identically.
+   *
+   * <p>The lenient fallback is agnostic to gap size. A 30-minute 
spring-forward (e.g. Australia/Lord_Howe)
+   * resolves to the first valid instant 30 minutes after the strict boundary, 
and an unusual day-length
+   * change — e.g. Pacific/Apia on 2011-12-30, a date that was skipped 
entirely when Samoa crossed the
+   * International Date Line — resolves to the first valid instant on the next 
calendar day.
+   */
+  private static long parseWithDstGapFallback(String dateTimeString, String 
pattern, DateTimeZone zone) {
+    DateTimeFormatter formatter = DateTimeFormat.forPattern(pattern);
+    try {
+      return formatter.withZone(zone).parseMillis(dateTimeString);
+    } catch (IllegalInstantException e) {
+      long localAsUtcMillis = 
formatter.withZoneUTC().parseMillis(dateTimeString);
+      return zone.convertLocalToUTC(localAsUtcMillis, false);
+    }
   }
 
   private static DateTimeFormatter getDateTimeFormatter(String pattern, String 
timezoneId) {
diff --git 
a/pinot-common/src/test/java/org/apache/pinot/common/function/DateTimePatternHandlerTest.java
 
b/pinot-common/src/test/java/org/apache/pinot/common/function/DateTimePatternHandlerTest.java
new file mode 100644
index 00000000000..e7c2aad0101
--- /dev/null
+++ 
b/pinot-common/src/test/java/org/apache/pinot/common/function/DateTimePatternHandlerTest.java
@@ -0,0 +1,156 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.common.function;
+
+import org.joda.time.DateTime;
+import org.joda.time.DateTimeZone;
+import org.joda.time.IllegalFieldValueException;
+import org.joda.time.IllegalInstantException;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertThrows;
+
+
+/**
+ * Tests for {@link DateTimePatternHandler}, with particular focus on DST 
spring-forward gaps where the
+ * requested local wall-clock time does not exist in the target zone.
+ *
+ * <p>Gap dates chosen here are all historical facts already frozen in the 
tzdata database (Cairo 2010-04-30,
+ * Santiago 2024-09-08, Lord_Howe 2010-10-03) so these tests are not fragile 
against future tzdata releases
+ * that might, for example, drop DST from a country that currently observes it.
+ */
+public class DateTimePatternHandlerTest {
+
+  // Cairo sprang forward at 00:00 local on 2010-04-30 (last Friday of April), 
so 00:00 did not exist that
+  // day; the first valid wall-clock instant is 01:00 EEST (UTC+3). Parsing a 
date-only string should land
+  // on that first valid instant rather than throwing.
+  @Test
+  public void testDstGapAtMidnightCairo() {
+    long actual = DateTimePatternHandler.parseDateTimeStringToEpochMillis(
+        "2010-04-30", "yyyy-MM-dd", "Africa/Cairo");
+    long expected = new DateTime(2010, 4, 30, 1, 0, 0, 0, 
DateTimeZone.forID("Africa/Cairo")).getMillis();
+    assertEquals(actual, expected);
+  }
+
+  // Same semantics when an explicit gap-local time is given: the parser 
should shift forward past the gap.
+  @Test
+  public void testDstGapExplicitGapTimeCairo() {
+    long actual = DateTimePatternHandler.parseDateTimeStringToEpochMillis(
+        "2010-04-30 00:30:00", "yyyy-MM-dd HH:mm:ss", "Africa/Cairo");
+    // 00:30 lies inside the gap; lenient resolution shifts forward by the gap 
size (1h) -> 01:30 EEST.
+    long expected = new DateTime(2010, 4, 30, 1, 30, 0, 0, 
DateTimeZone.forID("Africa/Cairo")).getMillis();
+    assertEquals(actual, expected);
+  }
+
+  // The default-value overload is currently silent on DST gaps: it returns 
the default and hides data. After the
+  // fix, a DST gap should resolve to a valid instant; the default is only 
used when the input is truly malformed.
+  @Test
+  public void testDstGapDoesNotFallBackToDefault() {
+    long actual = DateTimePatternHandler.parseDateTimeStringToEpochMillis(
+        "2010-04-30", "yyyy-MM-dd", "Africa/Cairo", -1L);
+    long expected = new DateTime(2010, 4, 30, 1, 0, 0, 0, 
DateTimeZone.forID("Africa/Cairo")).getMillis();
+    assertEquals(actual, expected);
+  }
+
+  // Genuinely unparseable input should still fall through to the default 
value.
+  @Test
+  public void testUnparseableStillReturnsDefault() {
+    long actual = DateTimePatternHandler.parseDateTimeStringToEpochMillis(
+        "not-a-date", "yyyy-MM-dd", "UTC", -1L);
+    assertEquals(actual, -1L);
+  }
+
+  // We only want to be lenient about DST gaps; other field-range errors must 
remain strict so that typos
+  // (e.g. month 13) continue to surface rather than silently rolling into the 
next year.
+  @Test
+  public void testOutOfRangeFieldStillThrows() {
+    assertThrows(IllegalFieldValueException.class, () -> 
DateTimePatternHandler.parseDateTimeStringToEpochMillis(
+        "2026-13-05", "yyyy-MM-dd", "UTC"));
+  }
+
+  // Ordinary inputs in a zone that has DST must be unaffected by the fallback 
logic.
+  @Test
+  public void testNonGapDateInDstZoneUnchanged() {
+    long actual = DateTimePatternHandler.parseDateTimeStringToEpochMillis(
+        "2026-05-15 12:00:00", "yyyy-MM-dd HH:mm:ss", "Africa/Cairo");
+    long expected = new DateTime(2026, 5, 15, 12, 0, 0, 0, 
DateTimeZone.forID("Africa/Cairo")).getMillis();
+    assertEquals(actual, expected);
+  }
+
+  // UTC has no DST, so behavior must be identical to before.
+  @Test
+  public void testUtcUnaffected() {
+    long actual = DateTimePatternHandler.parseDateTimeStringToEpochMillis(
+        "2026-04-24 00:00:00", "yyyy-MM-dd HH:mm:ss");
+    long expected = new DateTime(2026, 4, 24, 0, 0, 0, 0, 
DateTimeZone.UTC).getMillis();
+    assertEquals(actual, expected);
+  }
+
+  // Exercises the Western-hemisphere branch of 
DateTimeZone.convertLocalToUTC(strict=false). Santiago
+  // springs forward at 00:00 local on the first Sunday of September 
(2024-09-08); the fallback resolution
+  // for a negative pre-transition offset picks the post-transition offset so 
the returned instant lands at
+  // 01:00 -03:00, i.e. the first valid wall-clock time after the gap. Without 
this test, the fix is only
+  // exercised on Eastern-hemisphere zones (e.g. Cairo) and the Western branch 
of convertLocalToUTC is
+  // uncovered.
+  @Test
+  public void testDstGapWesternHemisphereSantiago() {
+    long actual = DateTimePatternHandler.parseDateTimeStringToEpochMillis(
+        "2024-09-08", "yyyy-MM-dd", "America/Santiago");
+    long expected =
+        new DateTime(2024, 9, 8, 1, 0, 0, 0, 
DateTimeZone.forID("America/Santiago")).getMillis();
+    assertEquals(actual, expected);
+  }
+
+  // Regression guard for the strict-first ordering. If a future refactor 
accidentally routes all inputs
+  // through the lenient fallback, a pattern with an explicit offset token (Z) 
would be parsed in UTC and
+  // then re-adjusted by convertLocalToUTC as if it were a local wall-clock 
time in `zone`, producing a
+  // doubly-adjusted (wrong) result. The expected value here is the real UTC 
instant encoded by the input
+  // offset, independent of the `zone` argument.
+  @Test
+  public void testExplicitOffsetTokenNotDoubleAdjustedByFallback() {
+    long actual = DateTimePatternHandler.parseDateTimeStringToEpochMillis(
+        "2024-01-01T12:00:00+0500", "yyyy-MM-dd'T'HH:mm:ssZ", "Africa/Cairo");
+    long expected = new DateTime(2024, 1, 1, 7, 0, 0, 0, 
DateTimeZone.UTC).getMillis();
+    assertEquals(actual, expected);
+  }
+
+  // Lord Howe Island uses a 30-minute DST shift (02:00 -> 02:30 local on the 
first Sunday of October). The
+  // fallback is agnostic to gap size, so a half-hour gap must be handled the 
same way as a one-hour gap.
+  // 2010-10-03 is a historical transition, safe from tzdata drift.
+  @Test
+  public void testHalfHourDstGapLordHowe() {
+    long actual = DateTimePatternHandler.parseDateTimeStringToEpochMillis(
+        "2010-10-03 02:15:00", "yyyy-MM-dd HH:mm:ss", "Australia/Lord_Howe");
+    // 02:15 lies inside the 30-minute gap; lenient resolution shifts forward 
by 30 minutes to 02:45 +11:00.
+    long expected =
+        new DateTime(2010, 10, 3, 2, 45, 0, 0, 
DateTimeZone.forID("Australia/Lord_Howe")).getMillis();
+    assertEquals(actual, expected);
+  }
+
+  // Pins the Javadoc claim that when a ZZZ/z token in the input encodes a 
zone whose wall-clock time is
+  // itself in a gap, the fallback re-parse throws IllegalInstantException 
again and the exception
+  // propagates unchanged. Without this test, a future refactor that quietly 
broadens what the fallback
+  // swallows could silently accept a genuinely broken input.
+  @Test
+  public void testParsedZoneTokenWhoseZoneIsInGapStillThrows() {
+    assertThrows(IllegalInstantException.class, () -> 
DateTimePatternHandler.parseDateTimeStringToEpochMillis(
+        "2010-04-30 00:30:00 Africa/Cairo", "yyyy-MM-dd HH:mm:ss ZZZ", "UTC"));
+  }
+}
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/data/function/DateTimeFunctionsTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/data/function/DateTimeFunctionsTest.java
index b75cd5e7ba3..aa340186bd5 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/core/data/function/DateTimeFunctionsTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/data/function/DateTimeFunctionsTest.java
@@ -309,6 +309,28 @@ public class DateTimeFunctionsTest {
         row114, -1L
     });
 
+    // fromDateTime on a DST spring-forward gap: 2010-04-30 00:00 did not 
exist in Africa/Cairo because the
+    // clock sprang forward to 01:00. Instead of throwing, the scalar should 
shift forward past the gap and
+    // return the first valid instant (01:00 EEST). Using a historical date 
keeps this test stable against
+    // future tzdata updates.
+    GenericRow row115 = new GenericRow();
+    row115.putValue("dateTime", "2010-04-30");
+    long cairoDstGapExpected =
+        new DateTime(2010, 4, 30, 1, 0, 0, 0, 
DateTimeZone.forID("Africa/Cairo")).getMillis();
+    inputs.add(new Object[]{
+        "fromDateTime(dateTime, 'yyyy-MM-dd', 'Africa/Cairo')", 
Lists.newArrayList("dateTime"), row115,
+        cairoDstGapExpected
+    });
+
+    // Same DST gap through the defaultVal overload: the fix resolves a valid 
instant rather than silently
+    // falling through to the default sentinel.
+    GenericRow row116 = new GenericRow();
+    row116.putValue("dateTime", "2010-04-30");
+    inputs.add(new Object[]{
+        "fromDateTime(dateTime, 'yyyy-MM-dd', 'Africa/Cairo', -1)", 
Lists.newArrayList("dateTime"), row116,
+        cairoDstGapExpected
+    });
+
     // timezone_hour and timezone_minute
     List<String> expectedArguments = Collections.singletonList("tz");
     GenericRow row120 = new GenericRow();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to