From 3fb70e9e6e0a3faa5b0e467ee460aae3223e0bef Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amborodin@acm.org>
Date: Thu, 9 Oct 2025 18:13:11 +0500
Subject: [PATCH v2] Add uuid_encode() and uuid_decode() functions

Add functions for encoding UUIDs to alternative text formats,
following the encode()/decode() pattern. Initial support includes
base32hex format (RFC 4648) producing 26-character unpadded strings.
Decoding accepts both padded and unpadded input and is case-insensitive.

The extensible design allows easy addition of more formats.
---
 doc/src/sgml/func/func-uuid.sgml   |  75 ++++++++
 src/backend/utils/adt/uuid.c       | 265 +++++++++++++++++++++++++++++
 src/include/catalog/pg_proc.dat    |   6 +
 src/test/regress/expected/uuid.out |  98 +++++++++++
 src/test/regress/sql/uuid.sql      |  34 ++++
 5 files changed, 478 insertions(+)

diff --git a/doc/src/sgml/func/func-uuid.sgml b/doc/src/sgml/func/func-uuid.sgml
index 2638e2bf855..93d11b8341b 100644
--- a/doc/src/sgml/func/func-uuid.sgml
+++ b/doc/src/sgml/func/func-uuid.sgml
@@ -26,6 +26,14 @@
    <primary>uuid_extract_version</primary>
   </indexterm>
 
+  <indexterm>
+   <primary>uuid_to_base32hex</primary>
+  </indexterm>
+
+  <indexterm>
+   <primary>base32hex_to_uuid</primary>
+  </indexterm>
+
   <para>
    <xref linkend="func_uuid_gen_table"/> shows the <productname>PostgreSQL</productname>
    functions that can be used to generate UUIDs.
@@ -167,6 +175,73 @@
    </tgroup>
   </table>
 
+  <para>
+   <xref linkend="func_uuid_conversion_table"/> shows the <productname>PostgreSQL</productname>
+   functions that can be used to convert UUIDs to and from alternative text representations.
+  </para>
+
+  <table id="func_uuid_conversion_table">
+   <title><acronym>UUID</acronym> Conversion Functions</title>
+   <tgroup cols="1">
+    <thead>
+     <row>
+      <entry role="func_table_entry"><para role="func_signature">
+        Function
+       </para>
+       <para>
+        Description
+       </para>
+       <para>
+        Example(s)
+       </para></entry>
+     </row>
+    </thead>
+
+    <tbody>
+     <row>
+      <entry role="func_table_entry"><para role="func_signature">
+        <function>uuid_encode</function>
+        ( <parameter>value</parameter> <type>uuid</type>,
+        <parameter>format</parameter> <type>text</type> )
+        <returnvalue>text</returnvalue>
+       </para>
+       <para>
+        Converts a UUID to an encoded text string using the specified format.
+        Currently supported formats:
+        <literal>base32hex</literal> - Encodes the UUID as a 26-character
+        base32hex string (0-9, A-V) without padding, as defined in
+        <ulink url="https://datatracker.ietf.org/doc/html/rfc4648">RFC 4648</ulink>.
+        This produces a more compact representation than the standard UUID format
+        and is case-insensitive, making it suitable for URLs and identifiers.
+       </para>
+       <para>
+        <literal>uuid_encode('123e4567-e89b-12d3-a456-&zwsp;426614174000'::uuid, 'base32hex')</literal>
+        <returnvalue>28V4APV8JC9D792M89J185Q000</returnvalue>
+       </para></entry>
+     </row>
+     <row>
+      <entry role="func_table_entry"><para role="func_signature">
+        <function>uuid_decode</function>
+        ( <parameter>string</parameter> <type>text</type>,
+        <parameter>format</parameter> <type>text</type> )
+        <returnvalue>uuid</returnvalue>
+       </para>
+       <para>
+        Converts an encoded text string to a UUID using the specified format.
+        For <literal>base32hex</literal> format, the input can be either
+        26 characters (unpadded) or 32 characters (padded with <literal>=</literal>),
+        and must contain only valid base32hex characters
+        (0-9, A-V, case-insensitive).
+       </para>
+       <para>
+        <literal>uuid_decode('28V4APV8JC9D792M89J185Q000', 'base32hex')</literal>
+        <returnvalue>123e4567-e89b-12d3-a456-426614174000</returnvalue>
+       </para></entry>
+     </row>
+    </tbody>
+   </tgroup>
+  </table>
+
   <para>
    <productname>PostgreSQL</productname> also provides the usual comparison
    operators shown in <xref linkend="functions-comparison-op-table"/> for
diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c
index e5f27ff892b..f4e34d6f795 100644
--- a/src/backend/utils/adt/uuid.c
+++ b/src/backend/utils/adt/uuid.c
@@ -20,6 +20,7 @@
 #include "lib/hyperloglog.h"
 #include "libpq/pqformat.h"
 #include "port/pg_bswap.h"
+#include "utils/builtins.h"
 #include "utils/fmgrprotos.h"
 #include "utils/guc.h"
 #include "utils/skipsupport.h"
@@ -777,3 +778,267 @@ uuid_extract_version(PG_FUNCTION_ARGS)
 
 	PG_RETURN_UINT16(version);
 }
+
+/*
+ * UUID encoding conversion API.
+ */
+struct uuid_encoding
+{
+	uint64		(*encode_len) (void);
+	uint64		(*decode_len) (void);
+	uint64		(*encode) (const pg_uuid_t *uuid, char *res);
+	uint64		(*decode) (const char *data, size_t dlen, pg_uuid_t *res);
+};
+
+static const struct uuid_encoding *uuid_find_encoding(const char *name);
+
+/*
+ * BASE32HEX encoding for UUID
+ *
+ * Base32hex encoding uses 32 characters (0-9, A-V) and represents 5 bits per
+ * character. For a 128-bit UUID (16 bytes), we need 26 characters
+ * (128 bits / 5 bits per char = 25.6, rounded up to 26).
+ * As defined in RFC 4648.
+ */
+static const char base32hex_chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUV";
+
+static uint64
+base32hex_encode_len(void)
+{
+	/* 128 bits / 5 bits per character = 25.6, rounded up to 26 */
+	return 26;
+}
+
+static uint64
+base32hex_decode_len(void)
+{
+	/* Always 16 bytes for UUID */
+	return UUID_LEN;
+}
+
+static uint64
+base32hex_encode(const pg_uuid_t *uuid, char *res)
+{
+	int			i;
+	uint64		bits_buffer = 0;
+	int			bits_in_buffer = 0;
+	int			output_pos = 0;
+
+	for (i = 0; i < UUID_LEN; i++)
+	{
+		/* Add 8 bits to the buffer */
+		bits_buffer = (bits_buffer << 8) | uuid->data[i];
+		bits_in_buffer += 8;
+
+		/* Extract 5-bit chunks while we have enough bits */
+		while (bits_in_buffer >= 5)
+		{
+			bits_in_buffer -= 5;
+			/* Extract top 5 bits */
+			res[output_pos++] = base32hex_chars[(bits_buffer >> bits_in_buffer) & 0x1F];
+			/* Clear the extracted bits by masking */
+			bits_buffer &= ((1ULL << bits_in_buffer) - 1);
+		}
+	}
+
+	/* Handle remaining bits (128 % 5 = 3, so we have 3 bits left) */
+	if (bits_in_buffer > 0)
+	{
+		res[output_pos++] = base32hex_chars[(bits_buffer << (5 - bits_in_buffer)) & 0x1F];
+	}
+
+	return output_pos;
+}
+
+static uint64
+base32hex_decode(const char *src, size_t srclen, pg_uuid_t *uuid)
+{
+	int			i;
+	uint64		bits_buffer = 0;
+	int			bits_in_buffer = 0;
+	int			output_pos = 0;
+	size_t		decode_len = srclen;
+
+	/*
+	 * Accept both unpadded (26 chars) and padded (32 chars) input.
+	 * RFC 4648 specifies padding to make length a multiple of 8.
+	 */
+	if (srclen == 32)
+	{
+		/* Verify padding: should be exactly 6 '=' characters at the end */
+		for (i = 26; i < 32; i++)
+		{
+			if (src[i] != '=')
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("invalid base32hex padding for UUID"),
+						 errdetail("Expected '=' padding characters at position %d.", i)));
+		}
+		decode_len = 26;	/* Only decode the first 26 characters */
+	}
+	else if (srclen != 26)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid base32hex length for UUID"),
+				 errdetail("Expected 26 or 32 characters, got %zu.", srclen)));
+	}
+
+	for (i = 0; i < (int) decode_len; i++)
+	{
+		unsigned char c = src[i];
+		int			val;
+
+		/* Decode base32hex character (0-9, A-V, case-insensitive) */
+		if (c >= '0' && c <= '9')
+			val = c - '0';
+		else if (c >= 'A' && c <= 'V')
+			val = c - 'A' + 10;
+		else if (c >= 'a' && c <= 'v')
+			val = c - 'a' + 10;
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("invalid base32hex digit: \"%c\"", c)));
+
+		/* Add 5 bits to buffer */
+		bits_buffer = (bits_buffer << 5) | val;
+		bits_in_buffer += 5;
+
+		/* Extract 8-bit bytes when we have enough bits */
+		while (bits_in_buffer >= 8)
+		{
+			bits_in_buffer -= 8;
+			if (output_pos < UUID_LEN)
+			{
+				uuid->data[output_pos++] = (unsigned char) (bits_buffer >> bits_in_buffer);
+				/* Clear the extracted bits */
+				bits_buffer &= ((1ULL << bits_in_buffer) - 1);
+			}
+		}
+	}
+
+	/* Verify we got exactly 16 bytes */
+	if (output_pos != UUID_LEN)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid base32hex data for UUID"),
+				 errdetail("Decoded to %d bytes instead of %d.", output_pos, UUID_LEN)));
+
+	/* Verify no extra bits remain (should be exactly 2 padding bits, all zeros) */
+	if (bits_in_buffer != 2 || (bits_buffer & ((1ULL << bits_in_buffer) - 1)) != 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid base32hex padding for UUID")));
+
+	return UUID_LEN;
+}
+
+/*
+ * Encoding lookup table
+ */
+static const struct
+{
+	const char *name;
+	struct uuid_encoding enc;
+}			uuid_enclist[] =
+{
+	{
+		"base32hex",
+		{
+			base32hex_encode_len, base32hex_decode_len, base32hex_encode, base32hex_decode
+		}
+	},
+	{
+		NULL,
+		{
+			NULL, NULL, NULL, NULL
+		}
+	}
+};
+
+static const struct uuid_encoding *
+uuid_find_encoding(const char *name)
+{
+	int			i;
+
+	for (i = 0; uuid_enclist[i].name; i++)
+		if (pg_strcasecmp(uuid_enclist[i].name, name) == 0)
+			return &uuid_enclist[i].enc;
+
+	return NULL;
+}
+
+/*
+ * uuid_encode - encode UUID to text using specified format
+ */
+Datum
+uuid_encode(PG_FUNCTION_ARGS)
+{
+	pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
+	Datum		name = PG_GETARG_DATUM(1);
+	text	   *result;
+	char	   *namebuf;
+	uint64		resultlen;
+	uint64		res;
+	const struct uuid_encoding *enc;
+
+	namebuf = TextDatumGetCString(name);
+
+	enc = uuid_find_encoding(namebuf);
+	if (enc == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("unrecognized encoding: \"%s\"", namebuf)));
+
+	resultlen = enc->encode_len();
+
+	result = (text *) palloc(VARHDRSZ + resultlen);
+
+	res = enc->encode(uuid, VARDATA(result));
+
+	/* Make this FATAL 'cause we've trodden on memory ... */
+	if (res > resultlen)
+		elog(FATAL, "overflow - encode estimate too small");
+
+	SET_VARSIZE(result, VARHDRSZ + res);
+
+	PG_RETURN_TEXT_P(result);
+}
+
+/*
+ * uuid_decode - decode text to UUID using specified format
+ */
+Datum
+uuid_decode(PG_FUNCTION_ARGS)
+{
+	text	   *data = PG_GETARG_TEXT_PP(0);
+	Datum		name = PG_GETARG_DATUM(1);
+	pg_uuid_t  *result;
+	char	   *namebuf;
+	char	   *dataptr;
+	size_t		datalen;
+	uint64		res;
+	const struct uuid_encoding *enc;
+
+	namebuf = TextDatumGetCString(name);
+
+	enc = uuid_find_encoding(namebuf);
+	if (enc == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("unrecognized encoding: \"%s\"", namebuf)));
+
+	dataptr = VARDATA_ANY(data);
+	datalen = VARSIZE_ANY_EXHDR(data);
+
+	result = (pg_uuid_t *) palloc(sizeof(pg_uuid_t));
+
+	res = enc->decode(dataptr, datalen, result);
+
+	/* Make this FATAL 'cause we've trodden on memory ... */
+	if (res > UUID_LEN)
+		elog(FATAL, "overflow - decode estimate too small");
+
+	PG_RETURN_UUID_P(result);
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index b51d2b17379..9e9d15437d2 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -9549,6 +9549,12 @@
 { oid => '6343', descr => 'extract version from RFC 9562 UUID',
   proname => 'uuid_extract_version', proleakproof => 't', prorettype => 'int2',
   proargtypes => 'uuid', prosrc => 'uuid_extract_version' },
+{ oid => '8083', descr => 'encode UUID to text using specified format',
+  proname => 'uuid_encode', prorettype => 'text',
+  proargtypes => 'uuid text', prosrc => 'uuid_encode' },
+{ oid => '8084', descr => 'decode text to UUID using specified format',
+  proname => 'uuid_decode', prorettype => 'uuid',
+  proargtypes => 'text text', prosrc => 'uuid_decode' },
 
 # pg_lsn
 { oid => '3229', descr => 'I/O',
diff --git a/src/test/regress/expected/uuid.out b/src/test/regress/expected/uuid.out
index 95392003b86..8c9ecae44c3 100644
--- a/src/test/regress/expected/uuid.out
+++ b/src/test/regress/expected/uuid.out
@@ -305,5 +305,103 @@ SELECT uuid_extract_timestamp('11111111-1111-1111-1111-111111111111');  -- null
  
 (1 row)
 
+-- UUID encoding/decoding functions
+-- test uuid_encode with base32hex
+SELECT uuid_encode('00000000-0000-0000-0000-000000000000'::uuid, 'base32hex');
+        uuid_encode         
+----------------------------
+ 00000000000000000000000000
+(1 row)
+
+SELECT uuid_encode('11111111-1111-1111-1111-111111111111'::uuid, 'base32hex');
+        uuid_encode         
+----------------------------
+ 248H248H248H248H248H248H24
+(1 row)
+
+SELECT uuid_encode('ffffffff-ffff-ffff-ffff-ffffffffffff'::uuid, 'base32hex');
+        uuid_encode         
+----------------------------
+ VVVVVVVVVVVVVVVVVVVVVVVVVS
+(1 row)
+
+SELECT uuid_encode('123e4567-e89b-12d3-a456-426614174000'::uuid, 'base32hex');
+        uuid_encode         
+----------------------------
+ 28V4APV8JC9D792M89J185Q000
+(1 row)
+
+-- test uuid_decode with base32hex
+SELECT uuid_decode('00000000000000000000000000', 'base32hex');
+             uuid_decode              
+--------------------------------------
+ 00000000-0000-0000-0000-000000000000
+(1 row)
+
+SELECT uuid_decode('28V4APV8JC9D792M89J185Q000', 'base32hex');
+             uuid_decode              
+--------------------------------------
+ 123e4567-e89b-12d3-a456-426614174000
+(1 row)
+
+-- test round-trip conversions
+SELECT uuid_decode(uuid_encode('00000000-0000-0000-0000-000000000000'::uuid, 'base32hex'), 'base32hex');
+             uuid_decode              
+--------------------------------------
+ 00000000-0000-0000-0000-000000000000
+(1 row)
+
+SELECT uuid_encode(uuid_decode('28V4APV8JC9D792M89J185Q000', 'base32hex'), 'base32hex');
+        uuid_encode         
+----------------------------
+ 28V4APV8JC9D792M89J185Q000
+(1 row)
+
+SELECT uuid_decode(uuid_encode('123e4567-e89b-12d3-a456-426614174000'::uuid, 'base32hex'), 'base32hex');
+             uuid_decode              
+--------------------------------------
+ 123e4567-e89b-12d3-a456-426614174000
+(1 row)
+
+-- test case insensitivity
+SELECT uuid_decode('28v4apv8jc9d792m89j185q000', 'base32hex');
+             uuid_decode              
+--------------------------------------
+ 123e4567-e89b-12d3-a456-426614174000
+(1 row)
+
+SELECT uuid_decode('28V4APV8JC9D792M89J185Q000', 'base32hex');
+             uuid_decode              
+--------------------------------------
+ 123e4567-e89b-12d3-a456-426614174000
+(1 row)
+
+-- test RFC 4648 padding (32 chars with 6 '=' signs)
+SELECT uuid_decode('28V4APV8JC9D792M89J185Q000======', 'base32hex');
+             uuid_decode              
+--------------------------------------
+ 123e4567-e89b-12d3-a456-426614174000
+(1 row)
+
+SELECT uuid_decode('00000000000000000000000000======', 'base32hex');
+             uuid_decode              
+--------------------------------------
+ 00000000-0000-0000-0000-000000000000
+(1 row)
+
+-- test error cases for base32hex
+SELECT uuid_decode('short', 'base32hex');  -- too short
+ERROR:  invalid base32hex length for UUID
+DETAIL:  Expected 26 or 32 characters, got 5.
+SELECT uuid_decode('28V4APV8JC9D792M89J185Q000X', 'base32hex');  -- too long
+ERROR:  invalid base32hex length for UUID
+DETAIL:  Expected 26 or 32 characters, got 27.
+SELECT uuid_decode('28V4APV8JC9D792M89J185Q00W', 'base32hex');  -- invalid character (W)
+ERROR:  invalid base32hex digit: "W"
+-- test invalid encoding
+SELECT uuid_encode('123e4567-e89b-12d3-a456-426614174000'::uuid, 'invalid');
+ERROR:  unrecognized encoding: "invalid"
+SELECT uuid_decode('00000000000000000000000000', 'invalid');
+ERROR:  unrecognized encoding: "invalid"
 -- clean up
 DROP TABLE guid1, guid2, guid3 CASCADE;
diff --git a/src/test/regress/sql/uuid.sql b/src/test/regress/sql/uuid.sql
index 465153a0341..b0385a07672 100644
--- a/src/test/regress/sql/uuid.sql
+++ b/src/test/regress/sql/uuid.sql
@@ -146,6 +146,40 @@ SELECT uuid_extract_timestamp('017F22E2-79B0-7CC3-98C4-DC0C0C07398F') = 'Tuesday
 SELECT uuid_extract_timestamp(gen_random_uuid());  -- null
 SELECT uuid_extract_timestamp('11111111-1111-1111-1111-111111111111');  -- null
 
+-- UUID encoding/decoding functions
+
+-- test uuid_encode with base32hex
+SELECT uuid_encode('00000000-0000-0000-0000-000000000000'::uuid, 'base32hex');
+SELECT uuid_encode('11111111-1111-1111-1111-111111111111'::uuid, 'base32hex');
+SELECT uuid_encode('ffffffff-ffff-ffff-ffff-ffffffffffff'::uuid, 'base32hex');
+SELECT uuid_encode('123e4567-e89b-12d3-a456-426614174000'::uuid, 'base32hex');
+
+-- test uuid_decode with base32hex
+SELECT uuid_decode('00000000000000000000000000', 'base32hex');
+SELECT uuid_decode('28V4APV8JC9D792M89J185Q000', 'base32hex');
+
+-- test round-trip conversions
+SELECT uuid_decode(uuid_encode('00000000-0000-0000-0000-000000000000'::uuid, 'base32hex'), 'base32hex');
+SELECT uuid_encode(uuid_decode('28V4APV8JC9D792M89J185Q000', 'base32hex'), 'base32hex');
+SELECT uuid_decode(uuid_encode('123e4567-e89b-12d3-a456-426614174000'::uuid, 'base32hex'), 'base32hex');
+
+-- test case insensitivity
+SELECT uuid_decode('28v4apv8jc9d792m89j185q000', 'base32hex');
+SELECT uuid_decode('28V4APV8JC9D792M89J185Q000', 'base32hex');
+
+-- test RFC 4648 padding (32 chars with 6 '=' signs)
+SELECT uuid_decode('28V4APV8JC9D792M89J185Q000======', 'base32hex');
+SELECT uuid_decode('00000000000000000000000000======', 'base32hex');
+
+-- test error cases for base32hex
+SELECT uuid_decode('short', 'base32hex');  -- too short
+SELECT uuid_decode('28V4APV8JC9D792M89J185Q000X', 'base32hex');  -- too long
+SELECT uuid_decode('28V4APV8JC9D792M89J185Q00W', 'base32hex');  -- invalid character (W)
+
+-- test invalid encoding
+SELECT uuid_encode('123e4567-e89b-12d3-a456-426614174000'::uuid, 'invalid');
+SELECT uuid_decode('00000000000000000000000000', 'invalid');
+
 
 -- clean up
 DROP TABLE guid1, guid2, guid3 CASCADE;
-- 
2.39.5 (Apple Git-154)

