From 3a508684171ae411e4e8251c717b61a8def04c1f Mon Sep 17 00:00:00 2001
From: Chiranmoy Bhattacharya <chiranmoy.bhattacharya@fujitsu.com>
Date: Mon, 9 Jun 2025 14:16:26 +0530
Subject: [PATCH v5] SVE support for hex coding

---
 config/c-compiler.m4                   |  85 ++++++++
 configure                              | 104 +++++++++
 configure.ac                           |   9 +
 meson.build                            |  81 +++++++
 src/backend/utils/adt/Makefile         |   1 +
 src/backend/utils/adt/encode.c         |   6 +-
 src/backend/utils/adt/encode_aarch64.c | 278 +++++++++++++++++++++++++
 src/backend/utils/adt/meson.build      |   1 +
 src/include/pg_config.h.in             |   3 +
 src/include/utils/builtins.h           |  51 ++++-
 10 files changed, 613 insertions(+), 6 deletions(-)
 create mode 100644 src/backend/utils/adt/encode_aarch64.c

diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index 5f3e1d1faf9..20e71cd8546 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -797,3 +797,88 @@ if test x"$Ac_cachevar" = x"yes"; then
 fi
 undefine([Ac_cachevar])dnl
 ])# PGAC_SVE_POPCNT_INTRINSICS
+
+# PGAC_ARM_SVE_HEX_INTRINSICS
+# ------------------------------
+# Check if the compiler supports the SVE intrinsic required for hex coding:
+# svsub_x, svcmplt, svsel, svcmpgt, svtbl, svlsr_x, svand_z, svcreate2,
+# svptest_any, svnot_z, svorr_z, svcntb, svld1, svwhilelt_b8, svst2, svld2,
+# svget2, svst1 and svlsl_x.
+#
+# If the intrinsics are supported, sets pgac_arm_sve_hex_intrinsics.
+AC_DEFUN([PGAC_ARM_SVE_HEX_INTRINSICS],
+[define([Ac_cachevar], [AS_TR_SH([pgac_cv_arm_sve_hex_intrinsics])])dnl
+AC_CACHE_CHECK([for svtbl, svlsr_x, svand_z, svcreate2, etc], [Ac_cachevar],
+[AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <arm_sve.h>
+
+    char input@<:@64@:>@;
+    char output@<:@128@:>@;
+
+    #if defined(__has_attribute) && __has_attribute (target)
+        __attribute__((target("arch=armv8-a+sve")))
+    #endif
+    int get_hex_sve(svbool_t pred, svuint8_t vec, svuint8_t *res)
+    {
+      svuint8_t	digit = svsub_x(pred, vec, 48),
+                upper = svsub_x(pred, vec, 55),
+                lower = svsub_x(pred, vec, 87);
+      svbool_t	valid_digit = svcmplt(pred, digit, 10),
+                valid_upper = svcmplt(pred, upper, 16);
+      svuint8_t	letter = svsel(valid_upper, upper, lower);
+      svbool_t	valid_letter = svand_z(pred, svcmpgt(pred, letter, 9),
+                                            svcmplt(pred, letter, 16));
+      if (svptest_any(pred, svnot_z(pred, svorr_z(pred, valid_digit, valid_letter))))
+        return 0;
+      *res = svsel(valid_digit, digit, letter);
+      return 1;
+    }
+
+    #if defined(__has_attribute) && __has_attribute (target)
+        __attribute__((target("arch=armv8-a+sve")))
+    #endif
+    static int hex_coding_test(void)
+    {
+      int len = 64, vec_len = svcntb(), vec_len_x2 = svcntb() * 2;
+      const char	*hextbl = "0123456789abcdef";
+      svuint8_t	hextbl_vec = svld1(svwhilelt_b8(0, 16), (uint8_t *) hextbl);
+      char *src = input, *dst = output;
+
+      /* hex encode */
+      for (uint64_t i = 0; i < 64; i += vec_len, dst += 2 * vec_len, src += vec_len)
+      {
+        svbool_t  pred = svwhilelt_b8((uint64_t) i, (uint64_t) len);
+        svuint8_t bytes = svld1(pred, (uint8_t *) src),
+                  high = svlsr_x(pred, bytes, 4),
+                  low = svand_z(pred, bytes, 0xF);
+        svuint8x2_t merged = svcreate2(svtbl(hextbl_vec, high), svtbl(hextbl_vec, low));
+        svst2(pred, (uint8_t *) dst, merged);
+      }
+
+      /* hex decode */
+      len = 128;
+
+      for (int i; i < len; i += vec_len_x2)
+      {
+        svbool_t 	  pred = svwhilelt_b8((uint64_t) i / 2, (uint64_t) len / 2);
+        svuint8x2_t bytes = svld2(pred, (uint8_t *) src + i);
+        svuint8_t 	high = svget2(bytes, 0), low = svget2(bytes, 1);
+
+        if (svptest_any(pred, svorr_z(pred, svcmplt(pred, high, '0'), svcmplt(pred, low, '0'))))
+          break;
+        if (!get_hex_sve(pred, high, &high) || !get_hex_sve(pred, low, &low))
+          break;
+
+        svst1(pred, (uint8_t *) dst + i / 2, svorr_z(pred, svlsl_x(pred, high, 4), low));
+      }
+
+      /* return computed value, to prevent the above being optimized away */
+      return output@<:@0@:>@;
+    }],
+  [return hex_coding_test();])],
+  [Ac_cachevar=yes],
+  [Ac_cachevar=no])])
+if test x"$Ac_cachevar" = x"yes"; then
+  pgac_arm_sve_hex_intrinsics=yes
+fi
+undefine([Ac_cachevar])dnl
+])# PGAC_ARM_SVE_HEX_INTRINSICS
diff --git a/configure b/configure
index 4f15347cc95..4d5d6acefb5 100755
--- a/configure
+++ b/configure
@@ -17851,6 +17851,110 @@ $as_echo "#define USE_SVE_POPCNT_WITH_RUNTIME_CHECK 1" >>confdefs.h
   fi
 fi
 
+# Check for ARM SVE intrinsics for hex coding
+#
+if test x"$host_cpu" = x"aarch64"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for svtbl, svlsr_x, svand_z, svcreate2, etc" >&5
+$as_echo_n "checking for svtbl, svlsr_x, svand_z, svcreate2, etc... " >&6; }
+if ${pgac_cv_arm_sve_hex_intrinsics+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <arm_sve.h>
+
+    char input[64];
+    char output[128];
+
+    #if defined(__has_attribute) && __has_attribute (target)
+        __attribute__((target("arch=armv8-a+sve")))
+    #endif
+    int get_hex_sve(svbool_t pred, svuint8_t vec, svuint8_t *res)
+    {
+      svuint8_t	digit = svsub_x(pred, vec, 48),
+                upper = svsub_x(pred, vec, 55),
+                lower = svsub_x(pred, vec, 87);
+      svbool_t	valid_digit = svcmplt(pred, digit, 10),
+                valid_upper = svcmplt(pred, upper, 16);
+      svuint8_t	letter = svsel(valid_upper, upper, lower);
+      svbool_t	valid_letter = svand_z(pred, svcmpgt(pred, letter, 9),
+                                            svcmplt(pred, letter, 16));
+      if (svptest_any(pred, svnot_z(pred, svorr_z(pred, valid_digit, valid_letter))))
+        return 0;
+      *res = svsel(valid_digit, digit, letter);
+      return 1;
+    }
+
+    #if defined(__has_attribute) && __has_attribute (target)
+        __attribute__((target("arch=armv8-a+sve")))
+    #endif
+    static int hex_coding_test(void)
+    {
+      int len = 64, vec_len = svcntb(), vec_len_x2 = svcntb() * 2;
+      const char	*hextbl = "0123456789abcdef";
+      svuint8_t	hextbl_vec = svld1(svwhilelt_b8(0, 16), (uint8_t *) hextbl);
+      char *src = input, *dst = output;
+
+      /* hex encode */
+      for (uint64_t i = 0; i < 64; i += vec_len, dst += 2 * vec_len, src += vec_len)
+      {
+        svbool_t  pred = svwhilelt_b8((uint64_t) i, (uint64_t) len);
+        svuint8_t bytes = svld1(pred, (uint8_t *) src),
+                  high = svlsr_x(pred, bytes, 4),
+                  low = svand_z(pred, bytes, 0xF);
+        svuint8x2_t merged = svcreate2(svtbl(hextbl_vec, high), svtbl(hextbl_vec, low));
+        svst2(pred, (uint8_t *) dst, merged);
+      }
+
+      /* hex decode */
+      len = 128;
+
+      for (int i; i < len; i += vec_len_x2)
+      {
+        svbool_t 	  pred = svwhilelt_b8((uint64_t) i / 2, (uint64_t) len / 2);
+        svuint8x2_t bytes = svld2(pred, (uint8_t *) src + i);
+        svuint8_t 	high = svget2(bytes, 0), low = svget2(bytes, 1);
+
+        if (svptest_any(pred, svorr_z(pred, svcmplt(pred, high, '0'), svcmplt(pred, low, '0'))))
+          break;
+        if (!get_hex_sve(pred, high, &high) || !get_hex_sve(pred, low, &low))
+          break;
+
+        svst1(pred, (uint8_t *) dst + i / 2, svorr_z(pred, svlsl_x(pred, high, 4), low));
+      }
+
+      /* return computed value, to prevent the above being optimized away */
+      return output[0];
+    }
+int
+main ()
+{
+return hex_coding_test();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  pgac_cv_arm_sve_hex_intrinsics=yes
+else
+  pgac_cv_arm_sve_hex_intrinsics=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_arm_sve_hex_intrinsics" >&5
+$as_echo "$pgac_cv_arm_sve_hex_intrinsics" >&6; }
+if test x"$pgac_cv_arm_sve_hex_intrinsics" = x"yes"; then
+  pgac_arm_sve_hex_intrinsics=yes
+fi
+
+  if test x"$pgac_arm_sve_hex_intrinsics" = x"yes"; then
+
+$as_echo "#define USE_SVE_HEX_WITH_RUNTIME_CHECK 1" >>confdefs.h
+
+  fi
+fi
+
 # Check for Intel SSE 4.2 intrinsics to do CRC calculations.
 #
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm_crc32_u8 and _mm_crc32_u32" >&5
diff --git a/configure.ac b/configure.ac
index 4b8335dc613..fcae9b84616 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2107,6 +2107,15 @@ if test x"$host_cpu" = x"aarch64"; then
   fi
 fi
 
+# Check for ARM SVE intrinsics for hex coding
+#
+if test x"$host_cpu" = x"aarch64"; then
+  PGAC_ARM_SVE_HEX_INTRINSICS()
+  if test x"$pgac_arm_sve_hex_intrinsics" = x"yes"; then
+    AC_DEFINE(USE_SVE_HEX_WITH_RUNTIME_CHECK, 1, [Define to 1 to use ARM SVE intrinsic for hex coding.])
+  fi
+fi
+
 # Check for Intel SSE 4.2 intrinsics to do CRC calculations.
 #
 PGAC_SSE42_CRC32_INTRINSICS()
diff --git a/meson.build b/meson.build
index d142e3e408b..de2d1ebd384 100644
--- a/meson.build
+++ b/meson.build
@@ -2384,6 +2384,87 @@ int main(void)
 endif
 
 
+###############################################################
+# Check the availability of SVE intrinsics for hex coding.
+###############################################################
+
+if host_cpu == 'aarch64'
+
+  prog = '''
+#include <arm_sve.h>
+
+char input[64];
+char output[128];
+
+#if defined(__has_attribute) && __has_attribute (target)
+    __attribute__((target("arch=armv8-a+sve")))
+#endif
+int get_hex_sve(svbool_t pred, svuint8_t vec, svuint8_t *res)
+{
+	svuint8_t	digit = svsub_x(pred, vec, 48),
+				    upper = svsub_x(pred, vec, 55),
+				    lower = svsub_x(pred, vec, 87);
+	svbool_t	valid_digit = svcmplt(pred, digit, 10),
+            valid_upper = svcmplt(pred, upper, 16);
+	svuint8_t	letter = svsel(valid_upper, upper, lower);
+	svbool_t	valid_letter = svand_z(pred, svcmpgt(pred, letter, 9),
+							  				                 svcmplt(pred, letter, 16));
+	if (svptest_any(pred, svnot_z(pred, svorr_z(pred, valid_digit, valid_letter))))
+		return 0;
+	*res = svsel(valid_digit, digit, letter);
+	return 1;
+}
+
+#if defined(__has_attribute) && __has_attribute (target)
+    __attribute__((target("arch=armv8-a+sve")))
+#endif
+int main(void)
+{
+    int len = 64, vec_len = svcntb(), vec_len_x2 = svcntb() * 2;
+    const char	hextbl[] = "0123456789abcdef";
+    svuint8_t	hextbl_vec = svld1(svwhilelt_b8(0, 16), (uint8_t *) hextbl);
+    char *src = input, *dst = output;
+
+    /* hex encode */
+    for (uint64_t i = 0; i < 64; i += vec_len, dst += 2 * vec_len, src += vec_len)
+    {
+      svbool_t  pred = svwhilelt_b8((uint64_t) i, (uint64_t) len);
+      svuint8_t bytes = svld1(pred, (uint8_t *) src),
+                high = svlsr_x(pred, bytes, 4),
+                low = svand_z(pred, bytes, 0xF);
+      svuint8x2_t merged = svcreate2(svtbl(hextbl_vec, high), svtbl(hextbl_vec, low));
+      svst2(pred, (uint8_t *) dst, merged);
+    }
+
+    /* hex decode */
+    len = 128;
+
+    for (int i; i < len; i += vec_len_x2)
+    {
+      svbool_t 	  pred = svwhilelt_b8((uint64_t) i / 2, (uint64_t) len / 2);
+      svuint8x2_t bytes = svld2(pred, (uint8_t *) src + i);
+      svuint8_t 	high = svget2(bytes, 0), low = svget2(bytes, 1);
+
+      if (svptest_any(pred, svorr_z(pred, svcmplt(pred, high, '0'), svcmplt(pred, low, '0'))))
+        break;
+      if (!get_hex_sve(pred, high, &high) || !get_hex_sve(pred, low, &low))
+        break;
+
+      svst1(pred, (uint8_t *) dst + i / 2, svorr_z(pred, svlsl_x(pred, high, 4), low));
+    }
+    
+    /* return computed value, to prevent the above being optimized away */
+    return output[0];
+}
+'''
+
+  if cc.links(prog, name: 'SVE hex coding', args: test_c_args)
+    cdata.set('USE_SVE_HEX_WITH_RUNTIME_CHECK', 1)
+  endif
+
+endif
+
+
 ###############################################################
 # Select CRC-32C implementation.
 #
diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile
index 4a233b63c32..2a3ba1d4485 100644
--- a/src/backend/utils/adt/Makefile
+++ b/src/backend/utils/adt/Makefile
@@ -32,6 +32,7 @@ OBJS = \
 	dbsize.o \
 	domains.o \
 	encode.o \
+	encode_aarch64.o \
 	enum.o \
 	expandeddatum.o \
 	expandedrecord.o \
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c
index 4ccaed815d1..fa62ce3107d 100644
--- a/src/backend/utils/adt/encode.c
+++ b/src/backend/utils/adt/encode.c
@@ -178,7 +178,7 @@ static const int8 hexlookup[128] = {
 };
 
 uint64
-hex_encode(const char *src, size_t len, char *dst)
+hex_encode_scalar(const char *src, size_t len, char *dst)
 {
 	const char *end = src + len;
 
@@ -208,13 +208,13 @@ get_hex(const char *cp, char *out)
 }
 
 uint64
-hex_decode(const char *src, size_t len, char *dst)
+hex_decode_scalar(const char *src, size_t len, char *dst)
 {
 	return hex_decode_safe(src, len, dst, NULL);
 }
 
 uint64
-hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext)
+hex_decode_safe_scalar(const char *src, size_t len, char *dst, Node *escontext)
 {
 	const char *s,
 			   *srcend;
diff --git a/src/backend/utils/adt/encode_aarch64.c b/src/backend/utils/adt/encode_aarch64.c
new file mode 100644
index 00000000000..574a7550469
--- /dev/null
+++ b/src/backend/utils/adt/encode_aarch64.c
@@ -0,0 +1,278 @@
+/*-------------------------------------------------------------------------
+ *
+ * encode_aarch64.c
+ *	  Holds the SVE hex encode/decode implementations.
+ *
+ * Copyright (c) 2025, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/adt/encode_aarch64.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <c.h>
+
+#include "utils/builtins.h"
+
+#ifdef USE_SVE_HEX_WITH_RUNTIME_CHECK
+#include <arm_sve.h>
+
+#if defined(HAVE_ELF_AUX_INFO) || defined(HAVE_GETAUXVAL)
+#include <sys/auxv.h>
+#endif
+
+/*
+ * These are the SVE implementations of the hex encode/decode functions.
+ */
+static uint64 hex_encode_sve(const char *src, size_t len, char *dst);
+static uint64 hex_decode_sve(const char *src, size_t len, char *dst);
+static uint64 hex_decode_safe_sve(const char *src, size_t len, char *dst, Node *escontext);
+
+/*
+ * The function pointers are initially set to "choose" functions.  These
+ * functions will first set the pointers to the right implementations (based on
+ * what the current CPU supports) and then will call the pointer to fulfill the
+ * caller's request.
+ */
+
+static uint64 hex_encode_choose(const char *src, size_t len, char *dst);
+static uint64 hex_decode_choose(const char *src, size_t len, char *dst);
+static uint64 hex_decode_safe_choose(const char *src, size_t len, char *dst, Node *escontext);
+uint64 		(*hex_encode_optimized) (const char *src, size_t len, char *dst) = hex_encode_choose;
+uint64 		(*hex_decode_optimized) (const char *src, size_t len, char *dst) = hex_decode_choose;
+uint64 		(*hex_decode_safe_optimized) (const char *src, size_t len, char *dst, Node *escontext) = hex_decode_safe_choose;
+
+static inline bool
+check_sve_support(void)
+{
+#ifdef HAVE_ELF_AUX_INFO
+	unsigned long value;
+
+	return elf_aux_info(AT_HWCAP, &value, sizeof(value)) == 0 &&
+		(value & HWCAP_SVE) != 0;
+#elif defined(HAVE_GETAUXVAL)
+	return (getauxval(AT_HWCAP) & HWCAP_SVE) != 0;
+#else
+	return false;
+#endif
+}
+
+static inline void
+choose_hex_functions(void)
+{
+	if (check_sve_support())
+	{
+		hex_encode_optimized = hex_encode_sve;
+		hex_decode_optimized = hex_decode_sve;
+		hex_decode_safe_optimized = hex_decode_safe_sve;
+	}
+	else
+	{
+		hex_encode_optimized = hex_encode_scalar;
+		hex_decode_optimized = hex_decode_scalar;
+		hex_decode_safe_optimized = hex_decode_safe_scalar;
+	}
+}
+
+static uint64
+hex_encode_choose(const char *src, size_t len, char *dst)
+{
+	choose_hex_functions();
+	return hex_encode_optimized(src, len, dst);
+}
+static uint64
+hex_decode_choose(const char *src, size_t len, char *dst)
+{
+	choose_hex_functions();
+	return hex_decode_optimized(src, len, dst);
+}
+static uint64
+hex_decode_safe_choose(const char *src, size_t len, char *dst, Node *escontext)
+{
+	choose_hex_functions();
+	return hex_decode_safe_optimized(src, len, dst, escontext);
+}
+
+pg_attribute_target("arch=armv8-a+sve")
+uint64
+hex_encode_sve(const char *src, size_t len, char *dst)
+{
+	const char	hextbl[] = "0123456789abcdef";
+	uint32 		vec_len = svcntb();
+	svuint8_t	hextbl_vec = svld1(svwhilelt_b8(0, 16), (uint8 *) hextbl);
+	svbool_t	pred = svptrue_b8();
+	size_t		loop_bytes = len & ~(2 * vec_len - 1); /* process 2 * vec_len byte chunk each iteration */
+	svuint8_t	bytes, high, low;
+	svuint8x2_t	zipped;
+
+	for (size_t i = 0; i < loop_bytes; i += 2 * vec_len)
+	{
+		bytes = svld1(pred, (uint8 *) src);
+		
+		/* Right-shift to obtain the high nibble */
+		high = svlsr_x(pred, bytes, 4);
+
+		/* Mask the high nibble to obtain the low nibble */
+		low = svand_z(pred, bytes, 0xF);
+
+		/*
+		 * Convert the high and low nibbles to hexadecimal digits using a
+		 * vectorized table lookup and zip (interleave) the hexadecimal digits.
+		 */
+		zipped = svcreate2(svtbl(hextbl_vec, high), svtbl(hextbl_vec, low));
+		svst2(pred, (uint8 *) dst, zipped);
+
+		dst += 2 * vec_len;
+		src += vec_len;
+
+		/* unrolled */
+		bytes = svld1(pred, (uint8 *) src);
+		high = svlsr_x(pred, bytes, 4);
+		low = svand_z(pred, bytes, 0xF);
+
+		zipped = svcreate2(svtbl(hextbl_vec, high), svtbl(hextbl_vec, low));
+		svst2(pred, (uint8 *) dst, zipped);
+
+		dst += 2 * vec_len;
+		src += vec_len;
+	}
+
+	/* process remaining tail bytes */
+	for (size_t i = loop_bytes; i < len; i += vec_len)
+	{
+		pred = svwhilelt_b8((uint64) i, (uint64) len);
+		bytes = svld1(pred, (uint8 *) src);
+		high = svlsr_x(pred, bytes, 4);
+		low = svand_z(pred, bytes, 0xF);
+
+		zipped = svcreate2(svtbl(hextbl_vec, high), svtbl(hextbl_vec, low));
+		svst2(pred, (uint8 *) dst, zipped);
+
+		dst += 2 * vec_len;
+		src += vec_len;
+	}
+
+	return (uint64) len * 2;
+}
+
+/*
+ * get_hex_sve
+ *      Returns true if the hexadecimal digits are successfully converted
+ *      to nibbles and stored in 'res'; otherwise, returns false.
+ */
+pg_attribute_target("arch=armv8-a+sve")
+static inline bool
+get_hex_sve(svbool_t pred, svuint8_t vec, svuint8_t *res)
+{
+	/*
+	 * Convert ASCII of '0'-'9' to integers 0-9 by subtracting 48 (ASCII of '0').
+	 * Similarly, convert letters 'A'–'F' and 'a'–'f' to integers 10–15 by
+	 * subtracting 55 ('A' - 10) and 87 ('a' - 10).
+	 */
+	svuint8_t	digit = svsub_x(pred, vec, '0'),
+				upper = svsub_x(pred, vec, 'A' - 10),
+				lower = svsub_x(pred, vec, 'a' - 10);
+
+	/*
+	 * Identify valid values in digits, upper, and lower vectors.
+	 * Values 0-9 are valid in digits, while values 10-15 are valid
+	 * in upper and lower.
+	 *
+	 * Example:
+	 * 		vec: 				'0'  '9'  'A'  'F'  'a'  'f'
+	 * 		vec (in ASCII):		48   57   65   70   97   102
+	 *
+	 * 		digit:	 			0    9    17   22   49   54
+	 * 		valid_digit:		1	 1	   0	0	 0	  0
+	 *
+	 * 		upper:				249  2    10   15   42   47
+	 * 		valid_upper:		0	 1	   1	1	 0	  0
+	 *
+	 * 		lower:				217  226  234  239  10   15
+	 *
+	 * Note that values 0-9 are also marked valid in valid_upper, this will be
+	 * handled later.
+	 */
+	svbool_t	valid_digit = svcmplt(pred, digit, 10),
+				valid_upper = svcmplt(pred, upper, 16);
+
+	/*
+	 * Merge upper and lower vector using the logic: take the element from
+	 * upper if it's true in valid_upper else pick the element in lower
+	 *
+	 * Mark the valid range i.e. 10-15 in letter vector
+	 *
+	 * 		letter:				217  2    10   15   10   15
+	 * 		valid_letter:		0	 0	   1	1    1	  1
+	 */
+
+	svuint8_t	letter = svsel(valid_upper, upper, lower);
+	svbool_t	valid_letter = svand_z(pred, svcmpgt(pred, letter, 9),
+											 svcmplt(pred, letter, 16));
+
+	/*
+	 * Check for invalid hexadecimal digit. Each value must fall within
+	 * the range 0-9 (true in valid_digit) or 10-15 (true in valid_letter) i.e.
+	 * the OR of valid_digit and valid_letter should be all true.
+	 */
+
+	if (svptest_any(pred, svnot_z(pred, svorr_z(pred, valid_digit, valid_letter))))
+		return false;
+
+	/*
+	 * Finally, combine digit and letter vectors using the logic:
+	 * take the element from digit if it's true in valid_digit else pick the
+	 * element in letter.
+	 * 
+	 * 		res:	 			0    9    10   15   10   15
+	 */
+
+	*res = svsel(valid_digit, digit, letter);
+	return true;
+}
+
+uint64
+hex_decode_sve(const char *src, size_t len, char *dst)
+{
+	return hex_decode_safe_sve(src, len, dst, NULL);
+}
+
+pg_attribute_target("arch=armv8-a+sve")
+uint64
+hex_decode_safe_sve(const char *src, size_t len, char *dst, Node *escontext)
+{
+	uint32		vec_len = svcntb();
+	size_t		loop_bytes = len & ~(2 * vec_len - 1); /* process 2 * vec_len byte chunk each iteration */
+	svbool_t 	pred = svptrue_b8();
+	const char *p = dst;
+
+	for (size_t i = 0; i < loop_bytes; i += 2 * vec_len)
+	{
+		svuint8x2_t bytes = svld2(pred, (uint8 *) src);
+		svuint8_t 	high = svget2(bytes, 0),
+				  	low = svget2(bytes, 1);
+
+		/* fallback for characters with ASCII values below '0' */
+		if (svptest_any(pred, svorr_z(pred, svcmplt(pred, high, '0'), svcmplt(pred, low, '0'))))
+			break;
+
+		/* fallback if an invalid hexadecimal digit is found */
+		if (!get_hex_sve(pred, high, &high) || !get_hex_sve(pred, low, &low))
+			break;
+
+		/* form the byte by left-shifting the high nibble and OR-ing it with the low nibble */
+		svst1(pred, (uint8 *) dst, svorr_z(pred, svlsl_x(pred, high, 4), low));
+
+		src += 2 * vec_len;
+		dst += vec_len;
+	}
+
+	if (len > loop_bytes) /* fallback */
+		return dst - p + hex_decode_safe_scalar(src, len - loop_bytes, dst, escontext);
+
+	return dst - p;
+}
+
+#endif	/* USE_SVE_HEX_WITH_RUNTIME_CHECK */
diff --git a/src/backend/utils/adt/meson.build b/src/backend/utils/adt/meson.build
index 244f48f4fd7..ea88dd77390 100644
--- a/src/backend/utils/adt/meson.build
+++ b/src/backend/utils/adt/meson.build
@@ -21,6 +21,7 @@ backend_sources += files(
   'dbsize.c',
   'domains.c',
   'encode.c',
+  'encode_aarch64.c',
   'enum.c',
   'expandeddatum.c',
   'expandedrecord.c',
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 726a7c1be1f..7a227f1875f 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -675,6 +675,9 @@
 /* Define to 1 to use AVX-512 popcount instructions with a runtime check. */
 #undef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
 
+/* Define to 1 to use SVE instructions for hex coding with a runtime check. */
+#undef USE_SVE_HEX_WITH_RUNTIME_CHECK
+
 /* Define to 1 to build with Bonjour support. (--with-bonjour) */
 #undef USE_BONJOUR
 
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 1c98c7d2255..2f72d8df9d1 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -35,11 +35,56 @@ extern int	errdatatype(Oid datatypeOid);
 extern int	errdomainconstraint(Oid datatypeOid, const char *conname);
 
 /* encode.c */
-extern uint64 hex_encode(const char *src, size_t len, char *dst);
-extern uint64 hex_decode(const char *src, size_t len, char *dst);
-extern uint64 hex_decode_safe(const char *src, size_t len, char *dst,
+extern uint64 hex_encode_scalar(const char *src, size_t len, char *dst);
+extern uint64 hex_decode_scalar(const char *src, size_t len, char *dst);
+extern uint64 hex_decode_safe_scalar(const char *src, size_t len, char *dst,
 							  Node *escontext);
 
+/*
+ * On AArch64, we can try to use an SVE optimized hex encode/decode on some systems.
+ */
+#ifdef USE_SVE_HEX_WITH_RUNTIME_CHECK
+extern PGDLLIMPORT uint64 (*hex_encode_optimized) (const char *src, size_t len, char *dst);
+extern PGDLLIMPORT uint64 (*hex_decode_optimized) (const char *src, size_t len, char *dst);
+extern PGDLLIMPORT uint64 (*hex_decode_safe_optimized) (const char *src, size_t len, char *dst, Node *escontext);
+#endif
+
+static inline uint64
+hex_encode(const char *src, size_t len, char *dst)
+{
+#ifdef USE_SVE_HEX_WITH_RUNTIME_CHECK
+	int	threshold = 16;
+
+	if (len >= threshold)
+		return hex_encode_optimized(src, len, dst);
+#endif
+	return hex_encode_scalar(src, len, dst);
+}
+
+static inline uint64
+hex_decode(const char *src, size_t len, char *dst)
+{
+#ifdef USE_SVE_HEX_WITH_RUNTIME_CHECK
+	int	threshold = 32;
+
+	if (len >= threshold)
+		return hex_decode_optimized(src, len, dst);
+#endif
+	return hex_decode_scalar(src, len, dst);
+}
+
+static inline uint64
+hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext)
+{
+#ifdef USE_SVE_HEX_WITH_RUNTIME_CHECK
+	int	threshold = 32;
+
+	if (len >= threshold)
+		return hex_decode_safe_optimized(src, len, dst, escontext);
+#endif
+	return hex_decode_safe_scalar(src, len, dst, escontext);
+}
+
 /* int.c */
 extern int2vector *buildint2vector(const int16 *int2s, int n);
 
-- 
2.34.1

