From 175dff14da96fe531c3c42fba114642236a9dd94 Mon Sep 17 00:00:00 2001
From: "Andrey M. Borodin" <x4mmm@night.local>
Date: Sun, 20 Aug 2023 23:55:31 +0300
Subject: [PATCH v8 1/3] Implement UUID v7 as per IETF draft

Authors: Andrey Borodin, Sergey Prokhorenko
---
 doc/src/sgml/func.sgml                   |  18 +++-
 src/backend/utils/adt/uuid.c             | 114 +++++++++++++++++++++++
 src/include/catalog/pg_proc.dat          |   6 ++
 src/test/regress/expected/opr_sanity.out |   2 +
 src/test/regress/expected/uuid.out       |  22 +++++
 src/test/regress/sql/uuid.sql            |  14 +++
 6 files changed, 175 insertions(+), 1 deletion(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index cec21e42c0..7a1b728bed 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -14130,13 +14130,29 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
    <primary>gen_random_uuid</primary>
   </indexterm>
 
+  <indexterm>
+   <primary>gen_uuid_v7</primary>
+  </indexterm>
+
+  <indexterm>
+   <primary>get_uuid_v7_time</primary>
+  </indexterm>
+
   <para>
-   <productname>PostgreSQL</productname> includes one function to generate a UUID:
+   <productname>PostgreSQL</productname> includes two functions to generate a UUID:
 <synopsis>
 <function>gen_random_uuid</function> () <returnvalue>uuid</returnvalue>
 </synopsis>
    This function returns a version 4 (random) UUID.  This is the most commonly
    used type of UUID and is appropriate for most applications.
+<synopsis>
+<function>gen_uuid_v7</function> () <returnvalue>uuid</returnvalue>
+</synopsis>
+   This function returns a version 7 (time-ordered + random) UUID.
+<synopsis>
+<function>get_uuid_v7_time</function> (uuid) <returnvalue>timestamptz</returnvalue>
+</synopsis>
+   This function extracts a timestamptz from UUID version 7.
   </para>
 
   <para>
diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c
index 4f7aa768fd..3455b9f564 100644
--- a/src/backend/utils/adt/uuid.c
+++ b/src/backend/utils/adt/uuid.c
@@ -13,6 +13,9 @@
 
 #include "postgres.h"
 
+#include <sys/time.h>
+
+#include "access/xlog.h"
 #include "common/hashfn.h"
 #include "lib/hyperloglog.h"
 #include "libpq/pqformat.h"
@@ -20,6 +23,7 @@
 #include "utils/builtins.h"
 #include "utils/guc.h"
 #include "utils/sortsupport.h"
+#include "utils/timestamp.h"
 #include "utils/uuid.h"
 
 /* sortsupport for uuid */
@@ -421,3 +425,113 @@ gen_random_uuid(PG_FUNCTION_ARGS)
 
 	PG_RETURN_UUID_P(uuid);
 }
+
+static uint32_t sequence_counter;
+static uint64_t previous_timestamp = 0;
+
+
+Datum
+gen_uuid_v7(PG_FUNCTION_ARGS)
+{
+	pg_uuid_t  *uuid = palloc(UUID_LEN);
+	uint64_t tms;
+	struct timeval tp;
+
+	gettimeofday(&tp, NULL);
+
+	tms = ((uint64_t)tp.tv_sec) * 1000 + (tp.tv_usec) / 1000;
+
+	if (tms <= previous_timestamp)
+	{
+		/* Time did not increment from the previous generation, we must increment counter */
+		++sequence_counter;
+		if (sequence_counter > 0x3ffff)
+		{
+			/* We only have 18-bit counter */
+			sequence_counter = 0;
+			previous_timestamp++;
+		}
+
+		/* protection from leap backward */
+		tms = previous_timestamp;
+
+		/* fill everything after the timestamp and counter with random bytes */
+		if (!pg_strong_random(&uuid->data[8], UUID_LEN - 8))
+			ereport(ERROR,
+					(errcode(ERRCODE_INTERNAL_ERROR),
+					errmsg("could not generate random values")));
+
+		/* most significant 4 bits of 18-bit counter */
+		uuid->data[6] = (unsigned char)(sequence_counter >> 14);
+		/* next 8 bits */
+		uuid->data[7] = (unsigned char)(sequence_counter >> 6);
+		/* least significant 6 bits */
+		uuid->data[8] = (unsigned char)(sequence_counter);
+	}
+	else
+	{
+		/* fill everything after the timestamp with random bytes */
+		if (!pg_strong_random(&uuid->data[6], UUID_LEN - 6))
+			ereport(ERROR,
+					(errcode(ERRCODE_INTERNAL_ERROR),
+					errmsg("could not generate random values")));
+
+		/*
+		 * Left-most counter bits are initialized as zero for the sole purpose
+		 * of guarding against counter rollovers.
+		 * See section "Fixed-Length Dedicated Counter Seeding"
+		 * https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis-09#monotonicity_counters
+		 */
+		uuid->data[6] = (uuid->data[6] & 0xf7);
+
+		sequence_counter = ((uint32_t)uuid->data[8] & 0x3f) +
+							(((uint32_t)uuid->data[7]) << 6) +
+							(((uint32_t)uuid->data[6] & 0x0f) << 14);
+
+		previous_timestamp = tms;
+	}
+
+	/* Fill in time part */
+	uuid->data[0] = (unsigned char)(tms >> 40);
+	uuid->data[1] = (unsigned char)(tms >> 32);
+	uuid->data[2] = (unsigned char)(tms >> 24);
+	uuid->data[3] = (unsigned char)(tms >> 16);
+	uuid->data[4] = (unsigned char)(tms >> 8);
+	uuid->data[5] = (unsigned char)tms;
+
+	/*
+	 * Set magic numbers for a "version 7" (pseudorandom) UUID, see
+	 * http://tools.ietf.org/html/rfc ???
+	 * https://datatracker.ietf.org/doc/html/draft-peabody-dispatch-new-uuid-format#name-creating-a-uuidv7-value
+	 */
+	/* set version field, top four bits are 0, 1, 1, 1 */
+	uuid->data[6] = (uuid->data[6] & 0x0f) | 0x70;
+	/* set variant field, top two bits are 1, 0 */
+	uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80;
+
+	PG_RETURN_UUID_P(uuid);
+}
+
+Datum
+get_uuid_v7_time(PG_FUNCTION_ARGS)
+{
+	pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
+	TimestampTz ts;
+	uint64_t tms;
+
+	if (((uuid->data[6] & 0xf0) != 0x70)
+		|| ((uuid->data[8] & 0xc0) != 0x80))
+		elog(ERROR,"get_uuid_v7_time() can only extract timestamp from UUID v7");
+
+	tms =			  uuid->data[5];
+	tms += ((uint64_t)uuid->data[4]) << 8;
+	tms += ((uint64_t)uuid->data[3]) << 16;
+	tms += ((uint64_t)uuid->data[2]) << 24;
+	tms += ((uint64_t)uuid->data[1]) << 32;
+	tms += ((uint64_t)uuid->data[0]) << 40;
+
+	ts = (TimestampTz) (tms * 1000) - /* convert ms to us, than adjust */
+		(POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
+
+	PG_RETURN_TIMESTAMPTZ(ts);
+}
\ No newline at end of file
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 5b67784731..209d1bd0ca 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -9174,6 +9174,12 @@
 { oid => '3432', descr => 'generate random UUID',
   proname => 'gen_random_uuid', proleakproof => 't', provolatile => 'v',
   prorettype => 'uuid', proargtypes => '', prosrc => 'gen_random_uuid' },
+{ oid => '9895', descr => 'generate UUID version 7',
+  proname => 'gen_uuid_v7', proleakproof => 't', provolatile => 'v',
+  prorettype => 'uuid', proargtypes => '', prosrc => 'gen_uuid_v7' },
+{ oid => '9896', descr => 'extract timestamp from UUID version 7',
+  proname => 'get_uuid_v7_time', proleakproof => 't', provolatile => 'i',
+  prorettype => 'timestamptz', proargtypes => 'uuid', prosrc => 'get_uuid_v7_time' },
 
 # pg_lsn
 { oid => '3229', descr => 'I/O',
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 7610b011d6..853d3574a3 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -872,6 +872,8 @@ xid8ge(xid8,xid8)
 xid8eq(xid8,xid8)
 xid8ne(xid8,xid8)
 xid8cmp(xid8,xid8)
+gen_uuid_v7()
+get_uuid_v7_time(uuid)
 -- restore normal output mode
 \a\t
 -- List of functions used by libpq's fe-lobj.c
diff --git a/src/test/regress/expected/uuid.out b/src/test/regress/expected/uuid.out
index 8e7f21910d..dfb446b4cb 100644
--- a/src/test/regress/expected/uuid.out
+++ b/src/test/regress/expected/uuid.out
@@ -168,5 +168,27 @@ SELECT count(DISTINCT guid_field) FROM guid1;
      2
 (1 row)
 
+-- generation test for v7
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (gen_uuid_v7());
+INSERT INTO guid1 (guid_field) VALUES (gen_uuid_v7());
+SELECT count(DISTINCT guid_field) FROM guid1;
+ count 
+-------
+     2
+(1 row)
+
+-- check that timestamp is extracted correctly
+WITH uuid_time_extraction AS
+(SELECT get_uuid_v7_time(gen_uuid_v7())-now() d)
+SELECT (d <= '100ms') AND (d >= '-100ms') FROM uuid_time_extraction;
+ ?column? 
+----------
+ t
+(1 row)
+
+-- get_uuid_v7_time() must refuse to accept non-UUIDv7
+select get_uuid_v7_time(gen_random_uuid());
+ERROR:  get_uuid_v7_time() can only extract timestamp from UUID v7
 -- clean up
 DROP TABLE guid1, guid2 CASCADE;
diff --git a/src/test/regress/sql/uuid.sql b/src/test/regress/sql/uuid.sql
index 9a8f437c7d..dccb8335cd 100644
--- a/src/test/regress/sql/uuid.sql
+++ b/src/test/regress/sql/uuid.sql
@@ -85,5 +85,19 @@ INSERT INTO guid1 (guid_field) VALUES (gen_random_uuid());
 INSERT INTO guid1 (guid_field) VALUES (gen_random_uuid());
 SELECT count(DISTINCT guid_field) FROM guid1;
 
+-- generation test for v7
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (gen_uuid_v7());
+INSERT INTO guid1 (guid_field) VALUES (gen_uuid_v7());
+SELECT count(DISTINCT guid_field) FROM guid1;
+
+-- check that timestamp is extracted correctly
+WITH uuid_time_extraction AS
+(SELECT get_uuid_v7_time(gen_uuid_v7())-now() d)
+SELECT (d <= '100ms') AND (d >= '-100ms') FROM uuid_time_extraction;
+
+-- get_uuid_v7_time() must refuse to accept non-UUIDv7
+select get_uuid_v7_time(gen_random_uuid());
+
 -- clean up
 DROP TABLE guid1, guid2 CASCADE;
-- 
2.37.1 (Apple Git-137.1)

