This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new d436b238a9 GH-49454: [C++][Gandiva] Fix castVARCHAR_timestamp for
pre-epoch timestamps (#49455)
d436b238a9 is described below
commit d436b238a9df1472c86bc2cc1bce46df7cb1d6db
Author: Dmitry Chirkov <[email protected]>
AuthorDate: Thu Mar 12 21:38:26 2026 -0700
GH-49454: [C++][Gandiva] Fix castVARCHAR_timestamp for pre-epoch timestamps
(#49455)
### Rationale for this change
GH-49454 castVARCHAR_timestamp_int64 produces negative milliseconds for
pre-epoch timestamps
### What changes are included in this PR?
Fixed `castVARCHAR_timestamp_int64` to correctly handle pre-epoch
timestamps (before 1970-01-01). The issue was that using `in % MILLIS_IN_SEC`
on negative timestamps produces negative milliseconds, resulting in output like
`"0107-10-17 12:20:03.-10"`.
### Are these changes tested?
Yes, added 4 new test cases covering pre-epoch timestamps with milliseconds
### Are there any user-facing changes?
**This PR contains a "Critical Fix".**
This fixes a bug that caused **incorrect data to be produced** when casting
pre-epoch timestamps to VARCHAR in Gandiva. Previously, timestamps before
1970-01-01 with non-zero milliseconds would produce invalid output with
negative millisecond values (e.g., `"0107-10-17 12:20:03.-10"` instead of
`"0107-10-17 12:20:03.900"`).
* GitHub Issue: #49454
Authored-by: Dmitry Chirkov <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
cpp/src/gandiva/precompiled/time.cc | 16 +++++++++-------
cpp/src/gandiva/precompiled/time_test.cc | 18 ++++++++++++++++++
2 files changed, 27 insertions(+), 7 deletions(-)
diff --git a/cpp/src/gandiva/precompiled/time.cc
b/cpp/src/gandiva/precompiled/time.cc
index e1e9ac4456..8414d0ed37 100644
--- a/cpp/src/gandiva/precompiled/time.cc
+++ b/cpp/src/gandiva/precompiled/time.cc
@@ -923,13 +923,15 @@ gdv_time32 castTIME_int32(int32_t int_val) {
const char* castVARCHAR_timestamp_int64(gdv_int64 context, gdv_timestamp in,
gdv_int64 length, gdv_int32* out_len) {
- gdv_int64 year = extractYear_timestamp(in);
- gdv_int64 month = extractMonth_timestamp(in);
- gdv_int64 day = extractDay_timestamp(in);
- gdv_int64 hour = extractHour_timestamp(in);
- gdv_int64 minute = extractMinute_timestamp(in);
- gdv_int64 second = extractSecond_timestamp(in);
- gdv_int64 millis = in % MILLIS_IN_SEC;
+ EpochTimePoint tp(in);
+ gdv_int64 year = 1900 + tp.TmYear();
+ gdv_int64 month = 1 + tp.TmMon();
+ gdv_int64 day = tp.TmMday();
+ gdv_int64 hour = tp.TmHour();
+ gdv_int64 minute = tp.TmMin();
+ gdv_int64 second = tp.TmSec();
+ // Use TimeOfDay().subseconds() to correctly handle negative timestamps
+ gdv_int64 millis = tp.TimeOfDay().subseconds().count();
static const int kTimeStampStringLen = 23;
const int char_buffer_length = kTimeStampStringLen + 1; // snprintf adds \0
diff --git a/cpp/src/gandiva/precompiled/time_test.cc
b/cpp/src/gandiva/precompiled/time_test.cc
index 82b38d1b57..6cfa6acf57 100644
--- a/cpp/src/gandiva/precompiled/time_test.cc
+++ b/cpp/src/gandiva/precompiled/time_test.cc
@@ -904,6 +904,24 @@ TEST(TestTime, castVarcharTimestamp) {
ts = StringToTimestamp("2-5-1 00:00:04");
out = castVARCHAR_timestamp_int64(context_ptr, ts, 24L, &out_len);
EXPECT_EQ(std::string(out, out_len), "0002-05-01 00:00:04.000");
+
+ // StringToTimestamp doesn't parse milliseconds, so we add them manually
+ ts = StringToTimestamp("67-5-1 00:00:04") + 920;
+ out = castVARCHAR_timestamp_int64(context_ptr, ts, 24L, &out_len);
+ EXPECT_EQ(std::string(out, out_len), "0067-05-01 00:00:04.920");
+
+ ts = StringToTimestamp("107-10-17 12:20:03") + 900;
+ out = castVARCHAR_timestamp_int64(context_ptr, ts, 24L, &out_len);
+ EXPECT_EQ(std::string(out, out_len), "0107-10-17 12:20:03.900");
+
+ // Test pre-epoch timestamps with 4-digit years
+ ts = StringToTimestamp("1969-12-31 23:59:59") + 920;
+ out = castVARCHAR_timestamp_int64(context_ptr, ts, 24L, &out_len);
+ EXPECT_EQ(std::string(out, out_len), "1969-12-31 23:59:59.920");
+
+ ts = StringToTimestamp("1899-12-31 23:59:59") + 123;
+ out = castVARCHAR_timestamp_int64(context_ptr, ts, 24L, &out_len);
+ EXPECT_EQ(std::string(out, out_len), "1899-12-31 23:59:59.123");
}
TEST(TestTime, TestCastTimestampToDate) {