From 05f0fbd010f1417e65cdf5c61218f7b9d110a6e3 Mon Sep 17 00:00:00 2001
From: Moaaz Assali <ma5679@nyu.edu>
Date: Wed, 28 Feb 2024 14:57:23 +0000
Subject: [PATCH v2] Fix for edge cases and integer overflow in date_bin function

All the calculations done in timestamp_bin() and timestamptz_bin() do not account for integer overflow.
Consider date_bin('15 minutes'::interval, timestamp '294276-12-30 10:24:00', timestamp '4000-12-20 23:00:00 BC');
This will return '294276-12-30 10:31:49.551616' instead of '294276-12-30 10:15:00'.

This happens beacuse the source timestamp is a very large int64 value and the origin has a negative int64 value,
which results in an overflow and the subsequent calculations for date binning are no longer correct.

To fix all issues of integer overflow when any valid timestamp is provided, this patch maps timestamp, origin
and stride_usecs to uint64 to safely perform all the calculations without overflow risk.
At the end the calculated uint64 result is mapped back to Timestamp(tz)/int64 to return the correct result.

____________________________________
This patch also fixes the issue where the returned result is incorrect when the source timestamp is already a valid
binned date when timestamp < origin.

Consider date_bin('30 minutes'::interval, '2024-01-01 15:00:00'::timestamp, '2024-01-01 17:00:00'::timestamp);
This will return '2024-01-01 14:30:00' instead of '2024-01-01 15:00:00'.

The existing implementation subtracts a stride interval for all cases when timestamp < origin because rounding to
-infinity is needed for the case of timestamp < origin.
However, when the source timestamp is already equivalent to a valid binned date, the additional stride interval
subtraction is not needed.

To fix that, this patch adds a tm_modulo check to only subtract the stride interval when it is not 0.
---
 src/backend/utils/adt/timestamp.c | 82 +++++++++++++++++++++++--------
 1 file changed, 62 insertions(+), 20 deletions(-)

diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index ed03c50a6d..3e8bbfe9c7 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -4539,8 +4539,12 @@ timestamp_bin(PG_FUNCTION_ARGS)
 	Timestamp	timestamp = PG_GETARG_TIMESTAMP(1);
 	Timestamp	origin = PG_GETARG_TIMESTAMP(2);
 	Timestamp	result,
+				stride_usecs;
+	uint64		utimestamp,
+				uorigin,
+				ustride_usecs,
 				tm_diff,
-				stride_usecs,
+				tm_modulo,
 				tm_delta;
 
 	if (TIMESTAMP_NOT_FINITE(timestamp))
@@ -4568,17 +4572,34 @@ timestamp_bin(PG_FUNCTION_ARGS)
 				(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
 				 errmsg("stride must be greater than zero")));
 
-	tm_diff = timestamp - origin;
-	tm_delta = tm_diff - tm_diff % stride_usecs;
+	utimestamp = timestamp + INT64_MAX + 1;
+	uorigin = origin + INT64_MAX + 1;
+	ustride_usecs = stride_usecs;
 
-	/*
-	 * Make sure the returned timestamp is at the start of the bin, even if
-	 * the origin is in the future.
-	 */
-	if (origin > timestamp && stride_usecs > 1)
-		tm_delta -= stride_usecs;
+	if (utimestamp < uorigin)
+	{
+		tm_diff = uorigin - utimestamp;
+		tm_modulo = tm_diff % ustride_usecs;
+		tm_delta = tm_diff - tm_modulo;
+
+		/*
+		 * Add one stride in order to round to -infinity when utimestamp <
+		 * uorigin beacuse our tm_delta calculation rounds to +infinity which
+		 * is not correct for utimestamp < uorigin. We also check if tm_modulo
+		 * is not zero to avoid adding an extra stride when timestamp is
+		 * already a valid binned date.
+		 */
+		if (tm_modulo != 0)
+			tm_delta += ustride_usecs;
 
-	result = origin + tm_delta;
+		result = uorigin - tm_delta - INT64_MAX - 1;
+	}
+	else
+	{
+		tm_diff = utimestamp - uorigin;
+		tm_delta = tm_diff - tm_diff % ustride_usecs;
+		result = uorigin + tm_delta - INT64_MAX - 1;
+	}
 
 	PG_RETURN_TIMESTAMP(result);
 }
@@ -4727,8 +4748,12 @@ timestamptz_bin(PG_FUNCTION_ARGS)
 	TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(1);
 	TimestampTz origin = PG_GETARG_TIMESTAMPTZ(2);
 	TimestampTz result,
-				stride_usecs,
+				stride_usecs;
+	uint64		utimestamp,
+				uorigin,
+				ustride_usecs,
 				tm_diff,
+				tm_modulo,
 				tm_delta;
 
 	if (TIMESTAMP_NOT_FINITE(timestamp))
@@ -4756,17 +4781,34 @@ timestamptz_bin(PG_FUNCTION_ARGS)
 				(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
 				 errmsg("stride must be greater than zero")));
 
-	tm_diff = timestamp - origin;
-	tm_delta = tm_diff - tm_diff % stride_usecs;
+	utimestamp = timestamp + INT64_MAX + 1;
+	uorigin = origin + INT64_MAX + 1;
+	ustride_usecs = stride_usecs;
 
-	/*
-	 * Make sure the returned timestamp is at the start of the bin, even if
-	 * the origin is in the future.
-	 */
-	if (origin > timestamp && stride_usecs > 1)
-		tm_delta -= stride_usecs;
+	if (utimestamp < uorigin)
+	{
+		tm_diff = uorigin - utimestamp;
+		tm_modulo = tm_diff % ustride_usecs;
+		tm_delta = tm_diff - tm_modulo;
+
+		/*
+		 * Add one stride in order to round to -infinity when utimestamp <
+		 * uorigin beacuse our tm_delta calculation rounds to +infinity which
+		 * is not correct for utimestamp < uorigin. We also check if tm_modulo
+		 * is not zero to avoid adding an extra stride when timestamp is
+		 * already a valid binned date.
+		 */
+		if (tm_modulo != 0)
+			tm_delta += ustride_usecs;
 
-	result = origin + tm_delta;
+		result = uorigin - tm_delta - INT64_MAX - 1;
+	}
+	else
+	{
+		tm_diff = utimestamp - uorigin;
+		tm_delta = tm_diff - tm_diff % ustride_usecs;
+		result = uorigin + tm_delta - INT64_MAX - 1;
+	}
 
 	PG_RETURN_TIMESTAMPTZ(result);
 }
-- 
2.34.1

