From a3d31f4fe330761840d3e874223ae118926047b2 Mon Sep 17 00:00:00 2001
From: Florents Tselai <florents.tselai@gmail.com>
Date: Fri, 14 Mar 2025 20:51:33 +0200
Subject: [PATCH v4 1/3] base64url support for encode/decode functions.
 Refactored and with better test cases

---
 doc/src/sgml/func.sgml                |  18 ++++
 src/backend/utils/adt/encode.c        | 126 ++++++++++++++++++++++++++
 src/test/regress/expected/strings.out |  57 ++++++++++++
 src/test/regress/sql/strings.sql      |  18 ++++
 4 files changed, 219 insertions(+)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 1c3810e1a04..b47a09dbe05 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -4951,6 +4951,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
        Encodes binary data into a textual representation; supported
        <parameter>format</parameter> values are:
        <link linkend="encode-format-base64"><literal>base64</literal></link>,
+       <link linkend="encode-format-base64url"><literal>base64url</literal></link>,
        <link linkend="encode-format-escape"><literal>escape</literal></link>,
        <link linkend="encode-format-hex"><literal>hex</literal></link>.
       </para>
@@ -5008,6 +5009,23 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
      </listitem>
     </varlistentry>
 
+    <varlistentry id="encode-format-base64url">
+     <term>base64url
+      <indexterm>
+       <primary>base64url format</primary>
+      </indexterm></term>
+     <listitem>
+      <para>
+       The <literal>base64url</literal> format is a URL-safe variant of
+       <ulink url="https://datatracker.ietf.org/doc/html/rfc4648#section-5">RFC 4648
+        Section 5</ulink>. Unlike standard <literal>base64</literal>, it replaces
+       <literal>'+'</literal> with <literal>'-'</literal> and <literal>'/'</literal> with <literal>'_'</literal>
+       to ensure safe usage in URLs and filenames. Additionally, the padding character
+       <literal>'='</literal> is omitted.
+      </para>
+     </listitem>
+    </varlistentry>
+
     <varlistentry id="encode-format-escape">
      <term>escape
      <indexterm>
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c
index 4ccaed815d1..9522eecd4be 100644
--- a/src/backend/utils/adt/encode.c
+++ b/src/backend/utils/adt/encode.c
@@ -415,6 +415,126 @@ pg_base64_dec_len(const char *src, size_t srclen)
 	return ((uint64) srclen * 3) >> 2;
 }
 
+/*
+ * Calculate the length of base64url encoded output for given input length
+ * Base64url encoding: 3 bytes -> 4 chars, padding to multiple of 4
+ */
+static uint64
+pg_base64url_enc_len(const char *src, size_t srclen)
+{
+	uint64 result;
+
+	/*
+	 * Base64 encoding converts 3 bytes into 4 characters
+	 * Formula: ceil(srclen / 3) * 4
+	 *
+	 * Unlike standard base64, base64url doesn't use padding characters
+	 * when the input length is not divisible by 3
+	 */
+	result = (srclen + 2) / 3 * 4;  /* ceiling division by 3, then multiply by 4 */
+
+	return result;
+}
+
+
+static uint64
+pg_base64url_dec_len(const char *src, size_t srclen)
+{
+	/* For Base64, each 4 characters of input produce at most 3 bytes of output */
+	/* For Base64URL without padding, we need to round up to the nearest 4 */
+	size_t adjusted_len = srclen;
+	if (srclen % 4 != 0)
+		adjusted_len += 4 - (srclen % 4);
+
+	return (adjusted_len * 3) / 4;
+}
+
+static uint64
+pg_base64url_encode(const char *src, size_t len, char *dst)
+{
+	uint64 encoded_len;
+	if (len == 0)
+		return 0;
+
+	encoded_len = pg_base64_encode(src, len, dst);
+
+	/* Convert Base64 to Base64URL */
+	for (uint64 i = 0; i < encoded_len; i++) {
+		if (dst[i] == '+')
+			dst[i] = '-';
+		else if (dst[i] == '/')
+			dst[i] = '_';
+	}
+
+	/* Trim '=' padding */
+	while (encoded_len > 0 && dst[encoded_len - 1] == '=')
+		encoded_len--;
+
+	return encoded_len;
+}
+
+static uint64
+pg_base64url_decode(const char *src, size_t len, char *dst)
+{
+	size_t i, pad_len, base64_len;
+	uint64 decoded_len;
+	char *base64;
+
+	/* Handle empty input specially */
+	if (len == 0)
+		return 0;
+
+	/* Calculate padding needed for standard base64 */
+	pad_len = 0;
+	if (len % 4 != 0)
+		pad_len = 4 - (len % 4);
+
+	/* Allocate memory for converted string */
+	base64_len = len + pad_len;
+	base64 = palloc(base64_len + 1); /* +1 for null terminator */
+
+	/* Convert Base64URL to Base64 */
+	for (i = 0; i < len; i++)
+	{
+		char c = src[i];
+		if (c == '-')
+			base64[i] = '+';  /* Convert '-' to '+' */
+		else if (c == '_')
+			base64[i] = '/';  /* Convert '_' to '/' */
+		else if ((c >= 'A' && c <= 'Z') ||
+				 (c >= 'a' && c <= 'z') ||
+				 (c >= '0' && c <= '9'))
+			base64[i] = c;    /* Keep alphanumeric chars unchanged */
+		else if (c == '=')
+			ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid base64url input"),
+				 errhint("Base64URL encoding should not contain padding '='.")));
+		else if (c == '+' || c == '/')
+			ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid base64url character: '%c'", c),
+				 errhint("Base64URL should use '-' instead of '+' and '_' instead of '/'.")));
+		else
+			ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid base64url character: '%c'", c)));
+	}
+
+	/* Add padding if necessary */
+	for (i = 0; i < pad_len; i++)
+		base64[len + i] = '=';
+
+	base64[base64_len] = '\0';  /* Null-terminate for safety */
+
+	/* Decode using the standard Base64 decoder */
+	decoded_len = pg_base64_decode(base64, base64_len, dst);
+
+	/* Free allocated memory */
+	pfree(base64);
+	return decoded_len;
+}
+
 /*
  * Escape
  * Minimally escape bytea to text.
@@ -606,6 +726,12 @@ static const struct
 			pg_base64_enc_len, pg_base64_dec_len, pg_base64_encode, pg_base64_decode
 		}
 	},
+	{
+		"base64url",
+		{
+			pg_base64url_enc_len, pg_base64url_dec_len, pg_base64url_encode, pg_base64url_decode
+		}
+	},
 	{
 		"escape",
 		{
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index fbe7d7be71f..80e2ff8426c 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -2341,6 +2341,63 @@ SELECT decode(encode('\x1234567890abcdef00', 'escape'), 'escape');
  \x1234567890abcdef00
 (1 row)
 
+--
+-- encode/decode Base64URL
+--
+SET bytea_output TO hex;
+-- Flaghsip Test case against base64.
+-- Notice the = padding removed at the end and special chars.
+SELECT encode('\x69b73eff', 'base64');  -- Expected: abc+/w==
+  encode  
+----------
+ abc+/w==
+(1 row)
+
+SELECT encode('\x69b73eff', 'base64url');  -- Expected: abc-_w
+ encode 
+--------
+ abc-_w
+(1 row)
+
+SELECT decode(encode('\x69b73eff', 'base64url'), 'base64url');
+   decode   
+------------
+ \x69b73eff
+(1 row)
+
+-- Test basic encoding/decoding
+SELECT encode('\x1234567890abcdef00', 'base64url');  -- Expected: EjRWeJCrze8A
+    encode    
+--------------
+ EjRWeJCrze8A
+(1 row)
+
+SELECT decode(encode('\x1234567890abcdef00', 'base64url'), 'base64url');  -- Expected: \x1234567890abcdef00
+        decode        
+----------------------
+ \x1234567890abcdef00
+(1 row)
+
+-- Test with empty input
+SELECT encode('', 'base64url');
+ encode 
+--------
+ 
+(1 row)
+
+SELECT decode('', 'base64url');
+ decode 
+--------
+ \x
+(1 row)
+
+-- Test round-trip conversion
+SELECT encode(decode('SGVsbG8gV29ybGQh', 'base64url'), 'base64url');  -- Expected: SGVsbG8gV29ybGQh (decodes to "Hello World!")
+      encode      
+------------------
+ SGVsbG8gV29ybGQh
+(1 row)
+
 --
 -- get_bit/set_bit etc
 --
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index ed054e6e99c..d14c1cac28f 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -743,6 +743,24 @@ SELECT decode(encode(('\x' || repeat('1234567890abcdef0001', 7))::bytea,
 SELECT encode('\x1234567890abcdef00', 'escape');
 SELECT decode(encode('\x1234567890abcdef00', 'escape'), 'escape');
 
+--
+-- encode/decode Base64URL
+--
+SET bytea_output TO hex;
+-- Flaghsip Test case against base64.
+-- Notice the = padding removed at the end and special chars.
+SELECT encode('\x69b73eff', 'base64');  -- Expected: abc+/w==
+SELECT encode('\x69b73eff', 'base64url');  -- Expected: abc-_w
+SELECT decode(encode('\x69b73eff', 'base64url'), 'base64url');
+-- Test basic encoding/decoding
+SELECT encode('\x1234567890abcdef00', 'base64url');  -- Expected: EjRWeJCrze8A
+SELECT decode(encode('\x1234567890abcdef00', 'base64url'), 'base64url');  -- Expected: \x1234567890abcdef00
+-- Test with empty input
+SELECT encode('', 'base64url');
+SELECT decode('', 'base64url');
+-- Test round-trip conversion
+SELECT encode(decode('SGVsbG8gV29ybGQh', 'base64url'), 'base64url');  -- Expected: SGVsbG8gV29ybGQh (decodes to "Hello World!")
+
 --
 -- get_bit/set_bit etc
 --
-- 
2.49.0

