From 08c1fa81429b3422b6c313f6117f891377ec717e Mon Sep 17 00:00:00 2001
From: Florents Tselai <florents.tselai@gmail.com>
Date: Tue, 11 Mar 2025 09:39:22 +0200
Subject: [PATCH v2 1/2] Add "base64url" in encode / decode functions.
 Documentation included. Additional tests probably needed especially wrt to
 padding. The same for docs.

---
 doc/src/sgml/func.sgml                | 22 ++++++++
 src/backend/utils/adt/encode.c        | 81 +++++++++++++++++++++++++++
 src/test/regress/expected/strings.out | 52 +++++++++++++++++
 src/test/regress/sql/strings.sql      | 13 ++++-
 4 files changed, 167 insertions(+), 1 deletion(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 51dd8ad6571..5df09db633e 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -4934,6 +4934,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
        Encodes binary data into a textual representation; supported
        <parameter>format</parameter> values are:
        <link linkend="encode-format-base64"><literal>base64</literal></link>,
+       <link linkend="encode-format-base64"><literal>base64url</literal></link>,
        <link linkend="encode-format-escape"><literal>escape</literal></link>,
        <link linkend="encode-format-hex"><literal>hex</literal></link>.
       </para>
@@ -4991,6 +4992,27 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
      </listitem>
     </varlistentry>
 
+    <varlistentry id="encode-format-base64url">
+     <term>base64url
+      <indexterm>
+       <primary>base64url format</primary>
+      </indexterm></term>
+     <listitem>
+      <para>
+       The <literal>base64url</literal> format is a URL-safe variant of
+       <ulink url="https://datatracker.ietf.org/doc/html/rfc4648#section-5">RFC 4648
+        Section 5</ulink>. Unlike standard <literal>base64</literal>, it replaces
+       <literal>'+'</literal> with <literal>'-'</literal> and <literal>'/'</literal> with <literal>'_'</literal>
+       to ensure safe usage in URLs and filenames. Additionally, the padding character
+       <literal>'='</literal> is omitted.
+       The <function>decode</function> function automatically handles missing padding
+       by appending the necessary '=' characters before decoding. If the input contains
+       invalid characters or has incorrect padding (when explicitly provided), an error
+       is raised.
+      </para>
+     </listitem>
+    </varlistentry>
+
     <varlistentry id="encode-format-escape">
      <term>escape
      <indexterm>
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c
index 4ccaed815d1..2547e746653 100644
--- a/src/backend/utils/adt/encode.c
+++ b/src/backend/utils/adt/encode.c
@@ -415,6 +415,81 @@ pg_base64_dec_len(const char *src, size_t srclen)
 	return ((uint64) srclen * 3) >> 2;
 }
 
+static uint64
+pg_base64url_enc_len(const char *src, size_t srclen)
+{
+	return ((uint64) srclen + 2) / 3 * 4;
+}
+
+static uint64
+pg_base64url_dec_len(const char *src, size_t srclen)
+{
+	size_t rem = srclen % 4;
+
+	uint64 len = (srclen / 4) * 3;
+
+	/* Adjust for missing padding */
+	if (rem == 2)
+		len += 1; /* 2 extra chars → 1 decoded byte */
+	else if (rem == 3)
+		len += 2; /* 3 extra chars → 2 decoded bytes */
+
+	return len;
+}
+
+static uint64
+pg_base64url_encode(const char *src, size_t len, char *dst)
+{
+	uint64 encoded_len = pg_base64_encode(src, len, dst);
+
+	/* Convert Base64 to Base64URL */
+	for (uint64 i = 0; i < encoded_len; i++)
+	{
+		if (dst[i] == '+')
+			dst[i] = '-';
+		else if (dst[i] == '/')
+			dst[i] = '_';
+	}
+
+	/* Trim '=' padding */
+	while (encoded_len > 0 && dst[encoded_len - 1] == '=')
+		encoded_len--;
+
+	/* Ensure null termination */
+	dst[encoded_len] = '\0';
+
+	return encoded_len;
+}
+
+static uint64
+pg_base64url_decode(const char *src, size_t len, char *dst)
+{
+	char *base64 = palloc(len + 4); /* Allocate extra space */
+	size_t i;
+	size_t pad_len;
+
+	/* Convert Base64URL back to Base64 */
+	for (i = 0; i < len; i++)
+	{
+		if (src[i] == '-')
+			base64[i] = '+';
+		else if (src[i] == '_')
+			base64[i] = '/';
+		else
+			base64[i] = src[i];
+	}
+
+	/* Restore padding if needed */
+	pad_len = (4 - (len % 4)) % 4;
+	while (pad_len--)
+		base64[i++] = '=';
+
+	uint64 decoded_len = pg_base64_decode(base64, i, dst);
+	pfree(base64); /* Free allocated memory */
+
+	return decoded_len;
+}
+
 /*
  * Escape
  * Minimally escape bytea to text.
@@ -606,6 +681,12 @@ static const struct
 			pg_base64_enc_len, pg_base64_dec_len, pg_base64_encode, pg_base64_decode
 		}
 	},
+	{
+		"base64url",
+		{
+			pg_base64url_enc_len, pg_base64url_dec_len, pg_base64url_encode, pg_base64url_decode
+		}
+	},
 	{
 		"escape",
 		{
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index f8cba9f5b24..579840ae394 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -2323,6 +2323,58 @@ SELECT decode(encode('\x1234567890abcdef00', 'escape'), 'escape');
  \x1234567890abcdef00
 (1 row)
 
+-- base64url
+SELECT encode('\x1234567890abcdef00', 'base64url');
+    encode    
+--------------
+ EjRWeJCrze8A
+(1 row)
+
+SELECT decode(encode('\x1234567890abcdef00', 'base64url'), 'base64url');
+        decode        
+----------------------
+ \x1234567890abcdef00
+(1 row)
+
+-- Special characters in binary data
+SELECT encode('\x00ff88ee77', 'base64url'); -- Includes null byte (\x00) and high-bit characters
+ encode  
+---------
+ AP-I7nc
+(1 row)
+
+SELECT decode(encode('\x00ff88ee77', 'base64url'), 'base64url');
+    decode    
+--------------
+ \x00ff88ee77
+(1 row)
+
+-- Padding edge case (length % 3 = 1)
+SELECT encode('\x66', 'base64url'); -- Expected to remove padding
+ encode 
+--------
+ Zg
+(1 row)
+
+SELECT decode(encode('\x66', 'base64url'), 'base64url');
+ decode 
+--------
+ \x66
+(1 row)
+
+-- Padding edge case (length % 3 = 2)
+SELECT encode('\x6666', 'base64url'); -- Another case where padding is removed
+ encode 
+--------
+ ZmY
+(1 row)
+
+SELECT decode(encode('\x6666', 'base64url'), 'base64url'); -- Should match original bytea
+ decode 
+--------
+ \x6666
+(1 row)
+
 --
 -- get_bit/set_bit etc
 --
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index 4deb0683d57..baa7bb32a76 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -738,7 +738,18 @@ SELECT decode(encode(('\x' || repeat('1234567890abcdef0001', 7))::bytea,
                      'base64'), 'base64');
 SELECT encode('\x1234567890abcdef00', 'escape');
 SELECT decode(encode('\x1234567890abcdef00', 'escape'), 'escape');
-
+-- base64url
+SELECT encode('\x1234567890abcdef00', 'base64url');
+SELECT decode(encode('\x1234567890abcdef00', 'base64url'), 'base64url');
+-- Special characters in binary data
+SELECT encode('\x00ff88ee77', 'base64url'); -- Includes null byte (\x00) and high-bit characters
+SELECT decode(encode('\x00ff88ee77', 'base64url'), 'base64url');
+-- Padding edge case (length % 3 = 1)
+SELECT encode('\x66', 'base64url'); -- Expected to remove padding
+SELECT decode(encode('\x66', 'base64url'), 'base64url');
+-- Padding edge case (length % 3 = 2)
+SELECT encode('\x6666', 'base64url'); -- Another case where padding is removed
+SELECT decode(encode('\x6666', 'base64url'), 'base64url'); -- Should match original bytea
 --
 -- get_bit/set_bit etc
 --
-- 
2.48.1

