From 012d585897684a414263dd7f7be7a08ac6761d2d Mon Sep 17 00:00:00 2001
From: Florents Tselai <florents.tselai@gmail.com>
Date: Mon, 10 Mar 2025 12:55:53 +0200
Subject: [PATCH v1] Add base64url_encode, base64url_decode functions along
 with some tests nddocumentation

---
 doc/src/sgml/func.sgml                | 44 +++++++++++-
 src/backend/utils/adt/encode.c        | 92 +++++++++++++++++++++++++
 src/include/catalog/pg_proc.dat       |  7 ++
 src/test/regress/expected/strings.out | 97 +++++++++++++++++++++++++++
 src/test/regress/sql/strings.sql      | 32 +++++++++
 5 files changed, 271 insertions(+), 1 deletion(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 51dd8ad6571..4c50e9537ff 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -4794,7 +4794,49 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
         <returnvalue>\x5678</returnvalue>
        </para></entry>
       </row>
-    </tbody>
+
+      <row>
+       <entry role="func_table_entry">
+        <para role="func_signature">
+         <indexterm>
+          <primary>base64url_encode</primary>
+         </indexterm>
+         <function>base64url_encode</function> ( <parameter>input</parameter> <type>bytea</type> )
+         <returnvalue>text</returnvalue>
+        </para>
+        <para>
+         Encodes the given <parameter>input</parameter> in Base64URL format, replacing
+         '+' with '-', '/' with '_', and removing padding ('='). This is useful for
+         safe encoding in URLs and filenames.
+        </para>
+        <para>
+         <literal>base64url_encode('www.postgresql.org'::bytea)</literal>
+         <returnvalue>d3d3LnBvc3RncmVzcWwub3Jn</returnvalue>
+        </para>
+       </entry>
+      </row>
+
+      <row>
+       <entry role="func_table_entry">
+        <para role="func_signature">
+         <indexterm>
+          <primary>base64url_decode</primary>
+         </indexterm>
+         <function>base64url_decode</function> ( <parameter>input</parameter> <type>text</type> )
+         <returnvalue>bytea</returnvalue>
+        </para>
+        <para>
+         Decodes the given Base64URL-encoded <parameter>input</parameter> back into
+         its original binary format. Converts '-' to '+', '_' to '/', and restores
+         padding if needed.
+        </para>
+        <para>
+         <literal>convert_from(base64url_decode('d3d3LnBvc3RncmVzcWwub3Jn'), 'UTF8')</literal>
+         <returnvalue>www.postgresql.org</returnvalue>
+        </para>
+       </entry>
+      </row>
+     </tbody>
    </tgroup>
   </table>
 
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c
index 4ccaed815d1..2d537bf3096 100644
--- a/src/backend/utils/adt/encode.c
+++ b/src/backend/utils/adt/encode.c
@@ -19,6 +19,7 @@
 #include "utils/builtins.h"
 #include "utils/memutils.h"
 #include "varatt.h"
+#include "common/base64.h"
 
 
 /*
@@ -140,6 +141,97 @@ binary_decode(PG_FUNCTION_ARGS)
 	PG_RETURN_BYTEA_P(result);
 }
 
+/* Base64URL Encode */
+Datum
+base64url_encode(PG_FUNCTION_ARGS)
+{
+	bytea *input;
+	int input_len;
+	int base64_len;
+	char *base64;
+	int encoded_len;
+	char *base64url;
+	int j, i;
+
+	/* Get input */
+	input = PG_GETARG_BYTEA_P(0);
+	input_len = VARSIZE(input) - VARHDRSZ;
+
+	/* Compute required buffer size */
+	base64_len = pg_b64_enc_len(input_len);
+	base64 = palloc(base64_len);
+
+	/* Encode the data */
+	encoded_len = pg_b64_encode(VARDATA(input), input_len, base64, base64_len);
+	if (encoded_len < 0)
+		ereport(ERROR, (errmsg("base64 encoding failed")));
+
+	/* Convert to Base64URL format (replace '+' → '-', '/' → '_', remove '=') */
+	base64url = palloc(encoded_len); /* Allocate same size */
+	j = 0;
+	for (i = 0; i < encoded_len; i++)
+	{
+		if (base64[i] == '+')
+			base64url[j++] = '-';
+		else if (base64[i] == '/')
+			base64url[j++] = '_';
+		else if (base64[i] != '=')
+			base64url[j++] = base64[i];
+	}
+
+	/* Return properly sized text datum */
+	PG_RETURN_TEXT_P(cstring_to_text_with_len(base64url, j));
+}
+
+Datum
+base64url_decode(PG_FUNCTION_ARGS)
+{
+	text *input;
+	char *input_str;
+	int input_len;
+	int decoded_len, actual_decoded_len;
+	int pad_len;
+	char *base64;
+	int i;
+	bytea *decoded;
+
+	/* Get input */
+	input = PG_GETARG_TEXT_P(0);
+	input_str = text_to_cstring(input);
+	input_len = strlen(input_str);
+
+	/* Prepare a Base64 string with padding */
+	pad_len = (4 - (input_len % 4)) % 4;
+	base64 = palloc(input_len + pad_len);
+
+	/* Convert Base64URL to standard Base64 */
+	for (i = 0; i < input_len; i++)
+	{
+		if (input_str[i] == '-')
+			base64[i] = '+';
+		else if (input_str[i] == '_')
+			base64[i] = '/';
+		else
+			base64[i] = input_str[i];
+	}
+
+	/* Add necessary padding */
+	while (pad_len-- > 0)
+		base64[i++] = '=';
+
+	/* Decode Base64 */
+	decoded_len = pg_b64_dec_len(i);
+	decoded = (bytea *) palloc(VARHDRSZ + decoded_len);
+	actual_decoded_len = pg_b64_decode(base64, i, VARDATA(decoded), decoded_len);
+
+	if (actual_decoded_len < 0)
+		ereport(ERROR, (errmsg("invalid base64url input")));
+
+	/* Set correct size */
+	SET_VARSIZE(decoded, VARHDRSZ + actual_decoded_len);
+
+	PG_RETURN_BYTEA_P(decoded);
+}
 
 /*
  * HEX
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index cede992b6e2..bdccf7ac16d 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1184,6 +1184,13 @@
   proname => 'int8', prorettype => 'int8',
   proargtypes => 'bytea', prosrc => 'bytea_int8' },
 
+{ oid => '8590', descr => 'base64url_encode',
+  proname => 'base64url_encode', prorettype => 'text',
+  proargtypes => 'bytea', prosrc => 'base64url_encode' },
+{ oid => '8591', descr => 'base64url_decode',
+  proname => 'base64url_decode', prorettype => 'bytea',
+  proargtypes => 'text', prosrc => 'base64url_decode' },
+
 { oid => '449', descr => 'hash',
   proname => 'hashint2', prorettype => 'int4', proargtypes => 'int2',
   prosrc => 'hashint2' },
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index f8cba9f5b24..5fa12023b3f 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -2792,3 +2792,100 @@ ERROR:  invalid Unicode code point: 2FFFFF
 SELECT unistr('wrong: \xyz');
 ERROR:  invalid Unicode escape
 HINT:  Unicode escapes must be \XXXX, \+XXXXXX, \uXXXX, or \UXXXXXXXX.
+-- base64url_encode
+SELECT base64url_encode('www.postgresql.org'::bytea);
+     base64url_encode     
+--------------------------
+ d3d3LnBvc3RncmVzcWwub3Jn
+(1 row)
+
+SELECT base64url_encode(E'\\x00'::bytea); -- Expected: 'AA'
+ base64url_encode 
+------------------
+ AA
+(1 row)
+
+SELECT base64url_encode('a'::bytea);      -- Expected: 'YQ'
+ base64url_encode 
+------------------
+ YQ
+(1 row)
+
+SELECT base64url_encode('ab'::bytea);     -- Expected: 'YWI'
+ base64url_encode 
+------------------
+ YWI
+(1 row)
+
+SELECT base64url_encode('abc'::bytea);    -- Expected: 'YWJj'
+ base64url_encode 
+------------------
+ YWJj
+(1 row)
+
+SELECT base64url_encode(''::bytea);       -- Expected: ''
+ base64url_encode 
+------------------
+ 
+(1 row)
+
+SELECT base64url_encode(
+               'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#0^&*();:<>,. []{}'::bytea
+       );
+                                                base64url_encode                                                
+----------------------------------------------------------------------------------------------------------------
+ YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NTY3ODkhQCMwXiYqKCk7Ojw-LC4gW117fQ
+(1 row)
+
+-- Expected:
+-- 'YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NTY3ODkhQCMwXiYqKCk7Ojw-'
+-- Note: The last character is '-' instead of '/' due to Base64URL encoding.
+-- base64url_decode
+SELECT base64url_decode('d3d3LnBvc3RncmVzcWwub3Jn'); -- Expected: 'www.postgresql.org'
+  base64url_decode  
+--------------------
+ www.postgresql.org
+(1 row)
+
+SELECT base64url_decode('AA');   -- Expected: E'\\x00'
+ base64url_decode 
+------------------
+ \000
+(1 row)
+
+SELECT base64url_decode('YQ');   -- Expected: 'a'
+ base64url_decode 
+------------------
+ a
+(1 row)
+
+SELECT base64url_decode('YWI');  -- Expected: 'ab'
+ base64url_decode 
+------------------
+ ab
+(1 row)
+
+SELECT base64url_decode('YWJj'); -- Expected: 'abc'
+ base64url_decode 
+------------------
+ abc
+(1 row)
+
+SELECT base64url_decode('');     -- Expected: ''
+ base64url_decode 
+------------------
+ 
+(1 row)
+
+SELECT convert_from(
+               base64url_decode(
+                       'YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NTY3ODkhQCMwXiYqKCk7Ojw-'
+               ),
+               'UTF8'
+       );
+                                convert_from                                 
+-----------------------------------------------------------------------------
+ abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#0^&*();:<>
+(1 row)
+
+-- Expected: 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#0^&*();:<>,. []{}'
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index 4deb0683d57..5a199fcc83a 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -877,3 +877,35 @@ SELECT unistr('wrong: \udb99\u0061');
 SELECT unistr('wrong: \U0000db99\U00000061');
 SELECT unistr('wrong: \U002FFFFF');
 SELECT unistr('wrong: \xyz');
+
+-- base64url_encode
+SELECT base64url_encode('www.postgresql.org'::bytea);
+SELECT base64url_encode(E'\\x00'::bytea); -- Expected: 'AA'
+SELECT base64url_encode('a'::bytea);      -- Expected: 'YQ'
+SELECT base64url_encode('ab'::bytea);     -- Expected: 'YWI'
+SELECT base64url_encode('abc'::bytea);    -- Expected: 'YWJj'
+SELECT base64url_encode(''::bytea);       -- Expected: ''
+SELECT base64url_encode(
+               'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#0^&*();:<>,. []{}'::bytea
+       );
+-- Expected:
+-- 'YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NTY3ODkhQCMwXiYqKCk7Ojw-'
+-- Note: The last character is '-' instead of '/' due to Base64URL encoding.
+
+-- base64url_decode
+SELECT base64url_decode('d3d3LnBvc3RncmVzcWwub3Jn'); -- Expected: 'www.postgresql.org'
+
+SELECT base64url_decode('AA');   -- Expected: E'\\x00'
+SELECT base64url_decode('YQ');   -- Expected: 'a'
+SELECT base64url_decode('YWI');  -- Expected: 'ab'
+SELECT base64url_decode('YWJj'); -- Expected: 'abc'
+SELECT base64url_decode('');     -- Expected: ''
+SELECT convert_from(
+               base64url_decode(
+                       'YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NTY3ODkhQCMwXiYqKCk7Ojw-'
+               ),
+               'UTF8'
+       );
+-- Expected: 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#0^&*();:<>,. []{}'
+
+
-- 
2.48.1

