Hi,

> I briefly tested the patched version of v3. The implemented
> functionality works correctly.
>
> ---
> You can also add a case with the error from v3-0002
> "invalid base32hex end sequence" to the tests :
>
> +        ereport(ERROR,
> +                (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
> +                 errmsg("invalid base32hex end sequence"),
> +                 errhint("Input data has non-zero padding bits.")));
>
> ---
> I agree with Masahiko Sawada; information about conversions
> should be added to the documentation.

Here is the rebased patch.

v4-0001 implements uuid <-> bytea casting
v4-0002 implements base32 encoding/decoding

Unless I missed something, 0001 is ready to be merged.

I only rebased v3 and improved the commit messages, but I didn't
account for Masahiko Sawada's feedback for 0002. Andrey, are you still
working on this or others can pick it up?

The patch is not on the commitfest, so I'm about to add it.

-- 
Best regards,
Aleksander Alekseev
From d80aad5edf1f02f27c2d9c2c001f5ef6b748b332 Mon Sep 17 00:00:00 2001
From: Andrey Borodin <[email protected]>
Date: Wed, 29 Oct 2025 15:53:12 +0400
Subject: [PATCH v4 2/2] Add base32hex encoding support to encode() and
 decode()

Implement base32hex encoding/decoding per RFC 4648 Section 7 for
encode() and decode() functions. This encoding uses the extended hex
alphabet (0-9, A-V) which preserves sort order.

The encode() function produces unpadded output, while decode() accepts
both padded and unpadded input. Decoding is case-insensitive.

This is particularly useful for encoding UUIDs compactly:

    SELECT encode(uuid_value::bytea, 'base32hex');

produces a 26-character string compared to the standard 36-character
UUID representation.

Author: Andrey Borodin <[email protected]>
Reviewed-by: Masahiko Sawada <[email protected]>
Reviewed-by: Aleksander Alekseev <[email protected]>
Suggested-by: Sergey Prokhorenko <[email protected]>
Discussion: https://postgr.es/m/CAJ7c6TOramr1UTLcyB128LWMqita1Y7%3Darq3KHaU%3Dqikf5yKOQ%40mail.gmail.com
---
 doc/src/sgml/func/func-binarystring.sgml |  25 +++++
 src/backend/utils/adt/encode.c           | 124 +++++++++++++++++++++++
 src/test/regress/expected/uuid.out       |  88 ++++++++++++++++
 src/test/regress/sql/uuid.sql            |  27 +++++
 4 files changed, 264 insertions(+)

diff --git a/doc/src/sgml/func/func-binarystring.sgml b/doc/src/sgml/func/func-binarystring.sgml
index b256381e01f..257b1bf4c6b 100644
--- a/doc/src/sgml/func/func-binarystring.sgml
+++ b/doc/src/sgml/func/func-binarystring.sgml
@@ -729,6 +729,7 @@
        <parameter>format</parameter> values are:
        <link linkend="encode-format-base64"><literal>base64</literal></link>,
        <link linkend="encode-format-base64url"><literal>base64url</literal></link>,
+       <link linkend="encode-format-base32hex"><literal>base32hex</literal></link>,
        <link linkend="encode-format-escape"><literal>escape</literal></link>,
        <link linkend="encode-format-hex"><literal>hex</literal></link>.
       </para>
@@ -804,6 +805,30 @@
      </listitem>
     </varlistentry>
 
+    <varlistentry id="encode-format-base32hex">
+     <term>base32hex
+      <indexterm>
+       <primary>base32hex format</primary>
+      </indexterm></term>
+     <listitem>
+      <para>
+       The <literal>base32hex</literal> format is that of
+       <ulink url="https://datatracker.ietf.org/doc/html/rfc4648#section-7";>
+       RFC 4648 Section 7</ulink>.  It uses the extended hex alphabet
+       (0-9, A-V) which preserves sort order when encoding binary data.
+       The <function>encode</function> function produces unpadded output,
+       while <function>decode</function> accepts both padded and unpadded
+       input. Decoding is case-insensitive and ignores whitespace characters.
+      </para>
+      <para>
+       This format is particularly useful for encoding UUIDs in a compact,
+       sortable format: <literal>encode(uuid_value::bytea, 'base32hex')</literal>
+       produces a 26-character string compared to the standard 36-character
+       UUID representation.
+      </para>
+     </listitem>
+    </varlistentry>
+
     <varlistentry id="encode-format-escape">
      <term>escape
      <indexterm>
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c
index f5f835e944a..94cc0722422 100644
--- a/src/backend/utils/adt/encode.c
+++ b/src/backend/utils/adt/encode.c
@@ -825,6 +825,124 @@ esc_dec_len(const char *src, size_t srclen)
 	return len;
 }
 
+/*
+ * BASE32HEX
+ */
+
+static const char base32hex_table[] = "0123456789ABCDEFGHIJKLMNOPQRSTUV";
+
+static uint64
+base32hex_enc_len(const char *src, size_t srclen)
+{
+	/* 5 bits per base32hex character, so round up (srclen * 8 + 4) / 5 */
+	return ((uint64) srclen * 8 + 4) / 5;
+}
+
+static uint64
+base32hex_dec_len(const char *src, size_t srclen)
+{
+	/* Decode length is (srclen * 5) / 8, but we may have padding */
+	return ((uint64) srclen * 5) / 8;
+}
+
+static uint64
+base32hex_encode(const char *src, size_t srclen, char *dst)
+{
+	const unsigned char *data = (const unsigned char *) src;
+	uint64		bits_buffer = 0;
+	int			bits_in_buffer = 0;
+	uint64		output_pos = 0;
+	size_t		i;
+
+	for (i = 0; i < srclen; i++)
+	{
+		/* Add 8 bits to the buffer */
+		bits_buffer = (bits_buffer << 8) | data[i];
+		bits_in_buffer += 8;
+
+		/* Extract 5-bit chunks while we have enough bits */
+		while (bits_in_buffer >= 5)
+		{
+			bits_in_buffer -= 5;
+			/* Extract top 5 bits */
+			dst[output_pos++] = base32hex_table[(bits_buffer >> bits_in_buffer) & 0x1F];
+			/* Clear the extracted bits by masking */
+			bits_buffer &= ((1ULL << bits_in_buffer) - 1);
+		}
+	}
+
+	/* Handle remaining bits (if any) */
+	if (bits_in_buffer > 0)
+	{
+		dst[output_pos++] = base32hex_table[(bits_buffer << (5 - bits_in_buffer)) & 0x1F];
+	}
+
+	return output_pos;
+}
+
+static uint64
+base32hex_decode(const char *src, size_t srclen, char *dst)
+{
+	const unsigned char *data = (const unsigned char *) src;
+	uint64		bits_buffer = 0;
+	int			bits_in_buffer = 0;
+	uint64		output_pos = 0;
+	size_t		i;
+	size_t		decode_len = srclen;
+
+	/*
+	 * RFC 4648 allows padding with '=' to make the length a multiple of 8.
+	 * Count and skip trailing padding characters.
+	 */
+	while (decode_len > 0 && data[decode_len - 1] == '=')
+		decode_len--;
+
+	for (i = 0; i < decode_len; i++)
+	{
+		unsigned char c = data[i];
+		int			val;
+
+		/* Skip whitespace */
+		if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
+			continue;
+
+		/* Decode base32hex character (0-9, A-V, case-insensitive) */
+		if (c >= '0' && c <= '9')
+			val = c - '0';
+		else if (c >= 'A' && c <= 'V')
+			val = c - 'A' + 10;
+		else if (c >= 'a' && c <= 'v')
+			val = c - 'a' + 10;
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("invalid symbol \"%.*s\" found while decoding base32hex sequence",
+							pg_mblen((const char *) &c), (const char *) &c)));
+
+		/* Add 5 bits to buffer */
+		bits_buffer = (bits_buffer << 5) | val;
+		bits_in_buffer += 5;
+
+		/* Extract 8-bit bytes when we have enough bits */
+		while (bits_in_buffer >= 8)
+		{
+			bits_in_buffer -= 8;
+			dst[output_pos++] = (unsigned char) (bits_buffer >> bits_in_buffer);
+			/* Clear the extracted bits */
+			bits_buffer &= ((1ULL << bits_in_buffer) - 1);
+		}
+	}
+
+	/* Verify no extra bits remain (padding bits should be zero) */
+	if (bits_in_buffer > 0 && (bits_buffer & ((1ULL << bits_in_buffer) - 1)) != 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid base32hex end sequence"),
+				 errhint("Input data has non-zero padding bits.")));
+
+	return output_pos;
+}
+
 /*
  * Common
  */
@@ -854,6 +972,12 @@ static const struct
 			pg_base64url_enc_len, pg_base64url_dec_len, pg_base64url_encode, pg_base64url_decode
 		}
 	},
+	{
+		"base32hex",
+		{
+			base32hex_enc_len, base32hex_dec_len, base32hex_encode, base32hex_decode
+		}
+	},
 	{
 		"escape",
 		{
diff --git a/src/test/regress/expected/uuid.out b/src/test/regress/expected/uuid.out
index 24486084aaf..86d21a29093 100644
--- a/src/test/regress/expected/uuid.out
+++ b/src/test/regress/expected/uuid.out
@@ -321,5 +321,93 @@ SELECT '\x019a2f859ced7225b99d9c55044a2563'::bytea::uuid;
 SELECT '\x1234567890abcdef'::bytea::uuid; -- error
 ERROR:  invalid length for UUID
 DETAIL:  Expected 16 bytes, got 8.
+-- base32hex encoding via encode/decode
+SELECT encode('00000000-0000-0000-0000-000000000000'::uuid::bytea, 'base32hex');
+           encode           
+----------------------------
+ 00000000000000000000000000
+(1 row)
+
+SELECT encode('11111111-1111-1111-1111-111111111111'::uuid::bytea, 'base32hex');
+           encode           
+----------------------------
+ 248H248H248H248H248H248H24
+(1 row)
+
+SELECT encode('ffffffff-ffff-ffff-ffff-ffffffffffff'::uuid::bytea, 'base32hex');
+           encode           
+----------------------------
+ VVVVVVVVVVVVVVVVVVVVVVVVVS
+(1 row)
+
+SELECT encode('123e4567-e89b-12d3-a456-426614174000'::uuid::bytea, 'base32hex');
+           encode           
+----------------------------
+ 28V4APV8JC9D792M89J185Q000
+(1 row)
+
+-- test decode with base32hex
+SELECT decode('00000000000000000000000000', 'base32hex')::uuid;
+                decode                
+--------------------------------------
+ 00000000-0000-0000-0000-000000000000
+(1 row)
+
+SELECT decode('28V4APV8JC9D792M89J185Q000', 'base32hex')::uuid;
+                decode                
+--------------------------------------
+ 123e4567-e89b-12d3-a456-426614174000
+(1 row)
+
+-- test round-trip conversions
+SELECT decode(encode('00000000-0000-0000-0000-000000000000'::uuid::bytea, 'base32hex'), 'base32hex')::uuid;
+                decode                
+--------------------------------------
+ 00000000-0000-0000-0000-000000000000
+(1 row)
+
+SELECT encode(decode('28V4APV8JC9D792M89J185Q000', 'base32hex')::uuid::bytea, 'base32hex');
+           encode           
+----------------------------
+ 28V4APV8JC9D792M89J185Q000
+(1 row)
+
+SELECT decode(encode('123e4567-e89b-12d3-a456-426614174000'::uuid::bytea, 'base32hex'), 'base32hex')::uuid;
+                decode                
+--------------------------------------
+ 123e4567-e89b-12d3-a456-426614174000
+(1 row)
+
+-- test case insensitivity
+SELECT decode('28v4apv8jc9d792m89j185q000', 'base32hex')::uuid;
+                decode                
+--------------------------------------
+ 123e4567-e89b-12d3-a456-426614174000
+(1 row)
+
+SELECT decode('28V4APV8JC9D792M89J185Q000', 'base32hex')::uuid;
+                decode                
+--------------------------------------
+ 123e4567-e89b-12d3-a456-426614174000
+(1 row)
+
+-- test RFC 4648 padding (32 chars with 6 '=' signs)
+SELECT decode('28V4APV8JC9D792M89J185Q000======', 'base32hex')::uuid;
+                decode                
+--------------------------------------
+ 123e4567-e89b-12d3-a456-426614174000
+(1 row)
+
+SELECT decode('00000000000000000000000000======', 'base32hex')::uuid;
+                decode                
+--------------------------------------
+ 00000000-0000-0000-0000-000000000000
+(1 row)
+
+-- test error cases for base32hex
+SELECT decode('28V4APV8JC9D792M89J185Q00W', 'base32hex')::uuid;  -- invalid character W
+ERROR:  invalid symbol "W" found while decoding base32hex sequence
+SELECT decode('28V4APV8JC9D792M89J185Q00!', 'base32hex')::uuid;  -- invalid character !
+ERROR:  invalid symbol "!" found while decoding base32hex sequence
 -- clean up
 DROP TABLE guid1, guid2, guid3 CASCADE;
diff --git a/src/test/regress/sql/uuid.sql b/src/test/regress/sql/uuid.sql
index 63520d0b640..44e8fa8b243 100644
--- a/src/test/regress/sql/uuid.sql
+++ b/src/test/regress/sql/uuid.sql
@@ -151,5 +151,32 @@ SELECT '5b35380a-7143-4912-9b55-f322699c6770'::uuid::bytea;
 SELECT '\x019a2f859ced7225b99d9c55044a2563'::bytea::uuid;
 SELECT '\x1234567890abcdef'::bytea::uuid; -- error
 
+-- base32hex encoding via encode/decode
+SELECT encode('00000000-0000-0000-0000-000000000000'::uuid::bytea, 'base32hex');
+SELECT encode('11111111-1111-1111-1111-111111111111'::uuid::bytea, 'base32hex');
+SELECT encode('ffffffff-ffff-ffff-ffff-ffffffffffff'::uuid::bytea, 'base32hex');
+SELECT encode('123e4567-e89b-12d3-a456-426614174000'::uuid::bytea, 'base32hex');
+
+-- test decode with base32hex
+SELECT decode('00000000000000000000000000', 'base32hex')::uuid;
+SELECT decode('28V4APV8JC9D792M89J185Q000', 'base32hex')::uuid;
+
+-- test round-trip conversions
+SELECT decode(encode('00000000-0000-0000-0000-000000000000'::uuid::bytea, 'base32hex'), 'base32hex')::uuid;
+SELECT encode(decode('28V4APV8JC9D792M89J185Q000', 'base32hex')::uuid::bytea, 'base32hex');
+SELECT decode(encode('123e4567-e89b-12d3-a456-426614174000'::uuid::bytea, 'base32hex'), 'base32hex')::uuid;
+
+-- test case insensitivity
+SELECT decode('28v4apv8jc9d792m89j185q000', 'base32hex')::uuid;
+SELECT decode('28V4APV8JC9D792M89J185Q000', 'base32hex')::uuid;
+
+-- test RFC 4648 padding (32 chars with 6 '=' signs)
+SELECT decode('28V4APV8JC9D792M89J185Q000======', 'base32hex')::uuid;
+SELECT decode('00000000000000000000000000======', 'base32hex')::uuid;
+
+-- test error cases for base32hex
+SELECT decode('28V4APV8JC9D792M89J185Q00W', 'base32hex')::uuid;  -- invalid character W
+SELECT decode('28V4APV8JC9D792M89J185Q00!', 'base32hex')::uuid;  -- invalid character !
+
 -- clean up
 DROP TABLE guid1, guid2, guid3 CASCADE;
-- 
2.43.0

From 9d7db65047941eb1c9f8d79a3790719a486a3863 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dagfinn=20Ilmari=20Manns=C3=A5ker?= <[email protected]>
Date: Tue, 28 Oct 2025 16:33:17 +0000
Subject: [PATCH v4 1/2] Allow explicit casting between bytea and UUID
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This enables using encode() and decode() to convert UUIDs to and from
alternative formats, such as base64.

Author:	Dagfinn Ilmari MannsÃ¥ker <[email protected]>
Reviewed-by: Andrey Borodin <[email protected]>
Reviewed-by: Aleksander Alekseev <[email protected]>
Reviewed-by: Jelte Fennema-Nio <[email protected]>
Discussion: https://postgr.es/m/CAJ7c6TOramr1UTLcyB128LWMqita1Y7%3Darq3KHaU%3Dqikf5yKOQ%40mail.gmail.com
---
 src/backend/utils/adt/bytea.c      | 27 +++++++++++++++++++++++++++
 src/include/catalog/pg_cast.dat    |  6 ++++++
 src/include/catalog/pg_proc.dat    |  7 +++++++
 src/test/regress/expected/uuid.out | 16 ++++++++++++++++
 src/test/regress/sql/uuid.sql      |  4 ++++
 5 files changed, 60 insertions(+)

diff --git a/src/backend/utils/adt/bytea.c b/src/backend/utils/adt/bytea.c
index fd7662d41ee..0e6d97412d0 100644
--- a/src/backend/utils/adt/bytea.c
+++ b/src/backend/utils/adt/bytea.c
@@ -28,6 +28,7 @@
 #include "utils/guc.h"
 #include "utils/memutils.h"
 #include "utils/sortsupport.h"
+#include "utils/uuid.h"
 #include "varatt.h"
 
 /* GUC variable */
@@ -1340,3 +1341,29 @@ int8_bytea(PG_FUNCTION_ARGS)
 {
 	return int8send(fcinfo);
 }
+
+/* Cast bytea -> uuid */
+Datum
+bytea_uuid(PG_FUNCTION_ARGS)
+{
+	bytea	   *v = PG_GETARG_BYTEA_PP(0);
+	int			len = VARSIZE_ANY_EXHDR(v);
+	pg_uuid_t  *uuid;
+
+	if (len != UUID_LEN)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+				 errmsg("invalid length for UUID"),
+				 errdetail("Expected %d bytes, got %d.", UUID_LEN, len)));
+
+	uuid = (pg_uuid_t *) palloc(sizeof(pg_uuid_t));
+	memcpy(uuid->data, VARDATA_ANY(v), UUID_LEN);
+	PG_RETURN_UUID_P(uuid);
+}
+
+/* Cast uuid -> bytea; can just use uuid_send() */
+Datum
+uuid_bytea(PG_FUNCTION_ARGS)
+{
+	return uuid_send(fcinfo);
+}
diff --git a/src/include/catalog/pg_cast.dat b/src/include/catalog/pg_cast.dat
index 9b1cfb1b590..a7b6d812c5a 100644
--- a/src/include/catalog/pg_cast.dat
+++ b/src/include/catalog/pg_cast.dat
@@ -362,6 +362,12 @@
 { castsource => 'bytea', casttarget => 'int8', castfunc => 'int8(bytea)',
   castcontext => 'e', castmethod => 'f' },
 
+# Allow explicit coercions between bytea and uuid type
+{ castsource => 'bytea', casttarget => 'uuid', castfunc => 'uuid(bytea)',
+  castcontext => 'e', castmethod => 'f' },
+{ castsource => 'uuid', casttarget => 'bytea', castfunc => 'bytea(uuid)',
+  castcontext => 'e', castmethod => 'f' },
+
 # Allow explicit coercions between int4 and "char"
 { castsource => 'char', casttarget => 'int4', castfunc => 'int4(char)',
   castcontext => 'e', castmethod => 'f' },
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 83f6501df38..d2b30390671 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1208,6 +1208,13 @@
   proname => 'int8', prorettype => 'int8', proargtypes => 'bytea',
   prosrc => 'bytea_int8' },
 
+{ oid => '9880', descr => 'convert uuid to bytea',
+  proname => 'bytea', prorettype => 'bytea', proargtypes => 'uuid',
+  prosrc => 'uuid_bytea' },
+{ oid => '9881', descr => 'convert bytea to uuid',
+  proname => 'uuid', prorettype => 'uuid', proargtypes => 'bytea',
+  prosrc => 'bytea_uuid' },
+
 { oid => '449', descr => 'hash',
   proname => 'hashint2', prorettype => 'int4', proargtypes => 'int2',
   prosrc => 'hashint2' },
diff --git a/src/test/regress/expected/uuid.out b/src/test/regress/expected/uuid.out
index 95392003b86..24486084aaf 100644
--- a/src/test/regress/expected/uuid.out
+++ b/src/test/regress/expected/uuid.out
@@ -305,5 +305,21 @@ SELECT uuid_extract_timestamp('11111111-1111-1111-1111-111111111111');  -- null
  
 (1 row)
 
+-- casts
+SELECT '5b35380a-7143-4912-9b55-f322699c6770'::uuid::bytea;
+               bytea                
+------------------------------------
+ \x5b35380a714349129b55f322699c6770
+(1 row)
+
+SELECT '\x019a2f859ced7225b99d9c55044a2563'::bytea::uuid;
+                 uuid                 
+--------------------------------------
+ 019a2f85-9ced-7225-b99d-9c55044a2563
+(1 row)
+
+SELECT '\x1234567890abcdef'::bytea::uuid; -- error
+ERROR:  invalid length for UUID
+DETAIL:  Expected 16 bytes, got 8.
 -- clean up
 DROP TABLE guid1, guid2, guid3 CASCADE;
diff --git a/src/test/regress/sql/uuid.sql b/src/test/regress/sql/uuid.sql
index 465153a0341..63520d0b640 100644
--- a/src/test/regress/sql/uuid.sql
+++ b/src/test/regress/sql/uuid.sql
@@ -146,6 +146,10 @@ SELECT uuid_extract_timestamp('017F22E2-79B0-7CC3-98C4-DC0C0C07398F') = 'Tuesday
 SELECT uuid_extract_timestamp(gen_random_uuid());  -- null
 SELECT uuid_extract_timestamp('11111111-1111-1111-1111-111111111111');  -- null
 
+-- casts
+SELECT '5b35380a-7143-4912-9b55-f322699c6770'::uuid::bytea;
+SELECT '\x019a2f859ced7225b99d9c55044a2563'::bytea::uuid;
+SELECT '\x1234567890abcdef'::bytea::uuid; -- error
 
 -- clean up
 DROP TABLE guid1, guid2, guid3 CASCADE;
-- 
2.43.0

Reply via email to