From 82ba268b7af93a75c76cf36a85c764761e0dbeb1 Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.mshk@gmail.com>
Date: Tue, 25 Mar 2025 15:14:42 -0700
Subject: [PATCH v5] Fix timestamp overflow in UUIDv7 implementation.

Previously, the uuidv7_interval() function performed timestamp
shifting calculations using microsecond precision, but then converted
the result back to nanosecond precision. Since the millisecond and
sub-millisecond parts were extracted from this nanosecond timestamp
and stored into the UUIDv7 value, overflow occurred for timestamps
beyond the year 2262.

With this commit, the millisecond and sub-millisecond parts are stored
directly into the UUIDv7 value without being converted back to a
nanosecond precision timestamp. Following RFC 9562, the timestamp is
stored as an unsigned integer, enabling support for dates up to the
year 10889.

Reported and fixed by Andrey Borodin, with cosmetic changes and
regression tests by me.

Reported-by: Andrey Borodin <x4mmm@yandex-team.ru>
Author: Andrey Borodin <x4mmm@yandex-team.ru>
Discussion: https://postgr.es/m/96DEC2D9-659A-40E8-B7BA-AF5D162A9E21@yandex-team.ru
---
 src/backend/utils/adt/uuid.c       | 34 +++++++++++++++---------------
 src/test/regress/expected/uuid.out | 14 ++++++++++++
 src/test/regress/sql/uuid.sql      | 11 ++++++++++
 3 files changed, 42 insertions(+), 17 deletions(-)

diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c
index 4f8402ef925..be0f0f9f1ce 100644
--- a/src/backend/utils/adt/uuid.c
+++ b/src/backend/utils/adt/uuid.c
@@ -29,6 +29,7 @@
 #define NS_PER_S	INT64CONST(1000000000)
 #define NS_PER_MS	INT64CONST(1000000)
 #define NS_PER_US	INT64CONST(1000)
+#define US_PER_MS	INT64CONST(1000)
 
 /*
  * UUID version 7 uses 12 bits in "rand_a" to store  1/4096 (or 2^12) fractions of
@@ -69,6 +70,7 @@ static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
 static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);
 static inline void uuid_set_version(pg_uuid_t *uuid, unsigned char version);
 static inline int64 get_real_time_ns_ascending();
+static pg_uuid_t *generate_uuidv7(uint64 unix_ts_ms, uint32 sub_ms);
 
 Datum
 uuid_in(PG_FUNCTION_ARGS)
@@ -523,17 +525,17 @@ get_real_time_ns_ascending()
  * described in the RFC. This method utilizes 12 bits from the "rand_a" bits
  * to store a 1/4096 (or 2^12) fraction of sub-millisecond precision.
  *
- * ns is a number of nanoseconds since start of the UNIX epoch. This value is
+ * unix_ts_ms is a number of milliseconds since start of the UNIX epoch,
+ * and sub_ms is a number of nanoseconds within millisecond. These values are
  * used for time-dependent bits of UUID.
+ *
+ * NB: all numbers here are unsigned, unix_ts_ms cannot be negative per RFC.
  */
 static pg_uuid_t *
-generate_uuidv7(int64 ns)
+generate_uuidv7(uint64 unix_ts_ms, uint32 sub_ms)
 {
 	pg_uuid_t  *uuid = palloc(UUID_LEN);
-	int64		unix_ts_ms;
-	int32		increased_clock_precision;
-
-	unix_ts_ms = ns / NS_PER_MS;
+	uint32		increased_clock_precision;
 
 	/* Fill in time part */
 	uuid->data[0] = (unsigned char) (unix_ts_ms >> 40);
@@ -547,7 +549,7 @@ generate_uuidv7(int64 ns)
 	 * sub-millisecond timestamp fraction (SUBMS_BITS bits, not
 	 * SUBMS_MINIMAL_STEP_BITS)
 	 */
-	increased_clock_precision = ((ns % NS_PER_MS) * (1 << SUBMS_BITS)) / NS_PER_MS;
+	increased_clock_precision = (sub_ms * (1 << SUBMS_BITS)) / NS_PER_MS;
 
 	/* Fill the increased clock precision to "rand_a" bits */
 	uuid->data[6] = (unsigned char) (increased_clock_precision >> 8);
@@ -586,7 +588,8 @@ generate_uuidv7(int64 ns)
 Datum
 uuidv7(PG_FUNCTION_ARGS)
 {
-	pg_uuid_t  *uuid = generate_uuidv7(get_real_time_ns_ascending());
+	int64		ns = get_real_time_ns_ascending();
+	pg_uuid_t  *uuid = generate_uuidv7(ns / NS_PER_MS, ns % NS_PER_MS);
 
 	PG_RETURN_UUID_P(uuid);
 }
@@ -601,13 +604,13 @@ uuidv7_interval(PG_FUNCTION_ARGS)
 	TimestampTz ts;
 	pg_uuid_t  *uuid;
 	int64		ns = get_real_time_ns_ascending();
+	int64		us;
 
 	/*
 	 * Shift the current timestamp by the given interval. To calculate time
 	 * shift correctly, we convert the UNIX epoch to TimestampTz and use
-	 * timestamptz_pl_interval(). Since this calculation is done with
-	 * microsecond precision, we carry nanoseconds from original ns value to
-	 * shifted ns value.
+	 * timestamptz_pl_interval(). This calculation is done with microsecond
+	 * precision.
 	 */
 
 	ts = (TimestampTz) (ns / NS_PER_US) -
@@ -618,14 +621,11 @@ uuidv7_interval(PG_FUNCTION_ARGS)
 												 TimestampTzGetDatum(ts),
 												 IntervalPGetDatum(shift)));
 
-	/*
-	 * Convert a TimestampTz value back to an UNIX epoch and back nanoseconds.
-	 */
-	ns = (ts + (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC)
-		* NS_PER_US + ns % NS_PER_US;
+	/* Convert a TimestampTz value back to an UNIX epoch timestamp */
+	us = ts + (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
 
 	/* Generate an UUIDv7 */
-	uuid = generate_uuidv7(ns);
+	uuid = generate_uuidv7(us / US_PER_MS, (us % US_PER_MS) * NS_PER_US + ns % NS_PER_US);
 
 	PG_RETURN_UUID_P(uuid);
 }
diff --git a/src/test/regress/expected/uuid.out b/src/test/regress/expected/uuid.out
index 798633ad51e..cbd497376c4 100644
--- a/src/test/regress/expected/uuid.out
+++ b/src/test/regress/expected/uuid.out
@@ -233,6 +233,20 @@ SELECT array_agg(id ORDER BY guid_field) FROM guid3;
  {1,2,3,4,5,6,7,8,9,10}
 (1 row)
 
+-- Check the timestamp offsets for v7.
+--
+-- generate UUIDv7 having timestamps up to 10889 year, which is the maximum year
+-- can be stored in UUIDv7, and then check if the timestamps extracted from UUIDv7
+-- values are not overflowed.
+WITH uuidts AS (
+     SELECT y, ts as ts, lag(ts) OVER (ORDER BY y) AS prev_ts
+     FROM (SELECT y, uuid_extract_timestamp(uuidv7((y || ' years')::interval)) AS ts FROM generate_series(-50, 10889 - extract(year from now())::int) y)
+)
+SELECT y, ts, prev_ts FROM uuidts WHERE ts < prev_ts;
+ y | ts | prev_ts 
+---+----+---------
+(0 rows)
+
 -- extract functions
 -- version
 SELECT uuid_extract_version('11111111-1111-5111-8111-111111111111');  -- 5
diff --git a/src/test/regress/sql/uuid.sql b/src/test/regress/sql/uuid.sql
index 110188361d1..cd0e65d3a8b 100644
--- a/src/test/regress/sql/uuid.sql
+++ b/src/test/regress/sql/uuid.sql
@@ -119,6 +119,17 @@ SELECT count(DISTINCT guid_field) FROM guid1;
 INSERT INTO guid3 (guid_field) SELECT uuidv7() FROM generate_series(1, 10);
 SELECT array_agg(id ORDER BY guid_field) FROM guid3;
 
+-- Check the timestamp offsets for v7.
+--
+-- generate UUIDv7 having timestamps up to 10889 year, which is the maximum year
+-- can be stored in UUIDv7, and then check if the timestamps extracted from UUIDv7
+-- values are not overflowed.
+WITH uuidts AS (
+     SELECT y, ts as ts, lag(ts) OVER (ORDER BY y) AS prev_ts
+     FROM (SELECT y, uuid_extract_timestamp(uuidv7((y || ' years')::interval)) AS ts FROM generate_series(-50, 10889 - extract(year from now())::int) y)
+)
+SELECT y, ts, prev_ts FROM uuidts WHERE ts < prev_ts;
+
 -- extract functions
 
 -- version
-- 
2.43.5

