On Fri, Nov 10, 2023 at 08:55:29PM -0600, Nathan Bossart wrote:
> On Fri, Nov 10, 2023 at 06:48:39PM -0800, Andres Freund wrote:
>> Yes. We should optimize pg_atomic_exchange_u32() one of these days - it can 
>> be
>> done *far* faster than a cmpxchg. When I was adding the atomic abstraction
>> there was concern with utilizing too many different atomic instructions. I
>> didn't really agree back then, but these days I really don't see a reason to
>> not use a few more intrinsics.
> 
> I might give this a try, if for no other reason than it'd force me to
> improve my mental model of this stuff.  :)

Here's a first draft.  I haven't attempted to add implementations for
PowerPC, and I only added the __atomic version for gcc since
__sync_lock_test_and_set() only supports setting the value to 1 on some
platforms.  Otherwise, I tried to add specialized atomic exchange
implementations wherever there existed other specialized atomic
implementations.

I haven't done any sort of performance testing on this yet.  Some
preliminary web searches suggest that there is unlikely to be much
difference between cmpxchg and xchg, but presumably there's some difference
between xchg and doing cmpxchg in a while loop (as is done in
atomics/generic.h today).  I'll report back once I've had a chance to do
some testing...

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com
>From 44374af7026642f2cffa28dccea71f2d370a3ce1 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nat...@postgresql.org>
Date: Wed, 29 Nov 2023 15:00:58 -0600
Subject: [PATCH v1 1/1] optimized atomic exchange

---
 config/c-compiler.m4                      | 34 ++++++++++-
 configure                                 | 72 +++++++++++++++++++++--
 configure.ac                              |  2 +
 meson.build                               | 12 ++++
 src/include/pg_config.h.in                |  6 ++
 src/include/port/atomics/arch-x86.h       | 28 +++++++++
 src/include/port/atomics/generic-gcc.h    | 28 +++++++++
 src/include/port/atomics/generic-msvc.h   | 18 ++++++
 src/include/port/atomics/generic-sunpro.h | 14 +++++
 src/tools/msvc/Solution.pm                |  2 +
 10 files changed, 210 insertions(+), 6 deletions(-)

diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index 5db02b2ab7..440c90d411 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -571,7 +571,7 @@ fi])# PGAC_HAVE_GCC__SYNC_INT64_CAS
 # Check if the C compiler understands __atomic_compare_exchange_n() for 32bit
 # types, and define HAVE_GCC__ATOMIC_INT32_CAS if so.
 AC_DEFUN([PGAC_HAVE_GCC__ATOMIC_INT32_CAS],
-[AC_CACHE_CHECK(for builtin __atomic int32 atomic operations, pgac_cv_gcc_atomic_int32_cas,
+[AC_CACHE_CHECK(for builtin __atomic int32 atomic compare/exchange, pgac_cv_gcc_atomic_int32_cas,
 [AC_LINK_IFELSE([AC_LANG_PROGRAM([],
   [int val = 0;
    int expect = 0;
@@ -587,7 +587,7 @@ fi])# PGAC_HAVE_GCC__ATOMIC_INT32_CAS
 # Check if the C compiler understands __atomic_compare_exchange_n() for 64bit
 # types, and define HAVE_GCC__ATOMIC_INT64_CAS if so.
 AC_DEFUN([PGAC_HAVE_GCC__ATOMIC_INT64_CAS],
-[AC_CACHE_CHECK(for builtin __atomic int64 atomic operations, pgac_cv_gcc_atomic_int64_cas,
+[AC_CACHE_CHECK(for builtin __atomic int64 atomic compare/exchange, pgac_cv_gcc_atomic_int64_cas,
 [AC_LINK_IFELSE([AC_LANG_PROGRAM([],
   [PG_INT64_TYPE val = 0;
    PG_INT64_TYPE expect = 0;
@@ -598,6 +598,36 @@ if test x"$pgac_cv_gcc_atomic_int64_cas" = x"yes"; then
   AC_DEFINE(HAVE_GCC__ATOMIC_INT64_CAS, 1, [Define to 1 if you have __atomic_compare_exchange_n(int64 *, int64 *, int64).])
 fi])# PGAC_HAVE_GCC__ATOMIC_INT64_CAS
 
+# PGAC_HAVE_GCC__ATOMIC_INT32_XCHG
+# --------------------------------
+# Check if the C compiler understands __atomic_exchange_n() for 32bit types,
+# and define HAVE_GCC__ATOMIC_INT32_XCHG if so.
+AC_DEFUN([PGAC_HAVE_GCC__ATOMIC_INT32_XCHG],
+[AC_CACHE_CHECK(for builtin __atomic int32 atomic exchange, pgac_cv_gcc_atomic_int32_xchg,
+[AC_LINK_IFELSE([AC_LANG_PROGRAM([],
+  [int val = 0;
+   __atomic_exchange_n(&val, 37, __ATOMIC_SEQ_CST);])],
+  [pgac_cv_gcc_atomic_int32_xchg="yes"],
+  [pgac_cv_gcc_atomic_int32_xchg="no"])])
+if test x"$pgac_cv_gcc_atomic_int32_xchg" = x"yes"; then
+  AC_DEFINE(HAVE_GCC__ATOMIC_INT32_XCHG, 1, [Define to 1 if you have __atomic_exchange_n(int *, int).])
+fi])# PGAC_HAVE_GCC__ATOMIC_INT32_XCHG
+
+# PGAC_HAVE_GCC__ATOMIC_INT64_XCHG
+# --------------------------------
+# Check if the C compiler understands __atomic_exchange_n() for 64bit types,
+# and define HAVE_GCC__ATOMIC_INT64_XCHG if so.
+AC_DEFUN([PGAC_HAVE_GCC__ATOMIC_INT64_XCHG],
+[AC_CACHE_CHECK(for builtin __atomic int64 atomic exchange, pgac_cv_gcc_atomic_int64_xchg,
+[AC_LINK_IFELSE([AC_LANG_PROGRAM([],
+  [PG_INT64_TYPE val = 0;
+   __atomic_exchange_n(&val, (PG_INT64_TYPE) 37, __ATOMIC_SEQ_CST);])],
+  [pgac_cv_gcc_atomic_int64_xchg="yes"],
+  [pgac_cv_gcc_atomic_int64_xchg="no"])])
+if test x"$pgac_cv_gcc_atomic_int64_xchg" = x"yes"; then
+  AC_DEFINE(HAVE_GCC__ATOMIC_INT64_XCHG, 1, [Define to 1 if you have __atomic_exchange_n(int64 *, int64).])
+fi])# PGAC_HAVE_GCC__ATOMIC_INT64_XCHG
+
 # PGAC_SSE42_CRC32_INTRINSICS
 # ---------------------------
 # Check if the compiler supports the x86 CRC instructions added in SSE 4.2,
diff --git a/configure b/configure
index 217704e9ca..d487e09b6e 100755
--- a/configure
+++ b/configure
@@ -17600,8 +17600,8 @@ if test x"$pgac_cv_gcc_sync_int64_cas" = x"yes"; then
 $as_echo "#define HAVE_GCC__SYNC_INT64_CAS 1" >>confdefs.h
 
 fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for builtin __atomic int32 atomic operations" >&5
-$as_echo_n "checking for builtin __atomic int32 atomic operations... " >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for builtin __atomic int32 atomic compare/exchange" >&5
+$as_echo_n "checking for builtin __atomic int32 atomic compare/exchange... " >&6; }
 if ${pgac_cv_gcc_atomic_int32_cas+:} false; then :
   $as_echo_n "(cached) " >&6
 else
@@ -17633,8 +17633,8 @@ if test x"$pgac_cv_gcc_atomic_int32_cas" = x"yes"; then
 $as_echo "#define HAVE_GCC__ATOMIC_INT32_CAS 1" >>confdefs.h
 
 fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for builtin __atomic int64 atomic operations" >&5
-$as_echo_n "checking for builtin __atomic int64 atomic operations... " >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for builtin __atomic int64 atomic compare/exchange" >&5
+$as_echo_n "checking for builtin __atomic int64 atomic compare/exchange... " >&6; }
 if ${pgac_cv_gcc_atomic_int64_cas+:} false; then :
   $as_echo_n "(cached) " >&6
 else
@@ -17665,6 +17665,70 @@ if test x"$pgac_cv_gcc_atomic_int64_cas" = x"yes"; then
 
 $as_echo "#define HAVE_GCC__ATOMIC_INT64_CAS 1" >>confdefs.h
 
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for builtin __atomic int32 atomic exchange" >&5
+$as_echo_n "checking for builtin __atomic int32 atomic exchange... " >&6; }
+if ${pgac_cv_gcc_atomic_int32_xchg+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+int val = 0;
+   __atomic_exchange_n(&val, 37, __ATOMIC_SEQ_CST);
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  pgac_cv_gcc_atomic_int32_xchg="yes"
+else
+  pgac_cv_gcc_atomic_int32_xchg="no"
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_gcc_atomic_int32_xchg" >&5
+$as_echo "$pgac_cv_gcc_atomic_int32_xchg" >&6; }
+if test x"$pgac_cv_gcc_atomic_int32_xchg" = x"yes"; then
+
+$as_echo "#define HAVE_GCC__ATOMIC_INT32_XCHG 1" >>confdefs.h
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for builtin __atomic int64 atomic exchange" >&5
+$as_echo_n "checking for builtin __atomic int64 atomic exchange... " >&6; }
+if ${pgac_cv_gcc_atomic_int64_xchg+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+PG_INT64_TYPE val = 0;
+   __atomic_exchange_n(&val, (PG_INT64_TYPE) 37, __ATOMIC_SEQ_CST);
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  pgac_cv_gcc_atomic_int64_xchg="yes"
+else
+  pgac_cv_gcc_atomic_int64_xchg="no"
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_gcc_atomic_int64_xchg" >&5
+$as_echo "$pgac_cv_gcc_atomic_int64_xchg" >&6; }
+if test x"$pgac_cv_gcc_atomic_int64_xchg" = x"yes"; then
+
+$as_echo "#define HAVE_GCC__ATOMIC_INT64_XCHG 1" >>confdefs.h
+
 fi
 
 
diff --git a/configure.ac b/configure.ac
index e49de9e4f0..2234bda39a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2046,6 +2046,8 @@ PGAC_HAVE_GCC__SYNC_INT32_CAS
 PGAC_HAVE_GCC__SYNC_INT64_CAS
 PGAC_HAVE_GCC__ATOMIC_INT32_CAS
 PGAC_HAVE_GCC__ATOMIC_INT64_CAS
+PGAC_HAVE_GCC__ATOMIC_INT32_XCHG
+PGAC_HAVE_GCC__ATOMIC_INT64_XCHG
 
 
 # Check for x86 cpuid instruction
diff --git a/meson.build b/meson.build
index 0095fb183a..9e73b6eca2 100644
--- a/meson.build
+++ b/meson.build
@@ -1957,12 +1957,24 @@ int val = 0;
 int expect = 0;
 __atomic_compare_exchange_n(&val, &expect, 37, 0, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);'''},
 
+    {'name': 'HAVE_GCC__ATOMIC_INT32_XCHG',
+     'desc': '__atomic_exchange_n(int32)',
+     'test': '''
+int val = 0;
+__atomic_exchange_n(&val, 37, __ATOMIC_SEQ_CST);'''},
+
     {'name': 'HAVE_GCC__ATOMIC_INT64_CAS',
      'desc': ' __atomic_compare_exchange_n(int64)',
      'test': '''
 INT64 val = 0;
 INT64 expect = 0;
 __atomic_compare_exchange_n(&val, &expect, 37, 0, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);'''},
+
+    {'name': 'HAVE_GCC__ATOMIC_INT64_XCHG',
+     'desc': '__atomic_exchange_n(int64)',
+     'test': '''
+INT64 val = 0;
+__atomic_exchange_n(&val, (INT64) 37, __ATOMIC_SEQ_CST);'''},
   ]
 
   foreach check : atomic_checks
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 5f16918243..950c32a1dc 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -154,10 +154,16 @@
 /* Define to 1 if you have __atomic_compare_exchange_n(int *, int *, int). */
 #undef HAVE_GCC__ATOMIC_INT32_CAS
 
+/* Define to 1 if you have __atomic_exchange_n(int *, int). */
+#undef HAVE_GCC__ATOMIC_INT32_XCHG
+
 /* Define to 1 if you have __atomic_compare_exchange_n(int64 *, int64 *,
    int64). */
 #undef HAVE_GCC__ATOMIC_INT64_CAS
 
+/* Define to 1 if you have __atomic_exchange_n(int64 *, int64). */
+#undef HAVE_GCC__ATOMIC_INT64_XCHG
+
 /* Define to 1 if you have __sync_lock_test_and_set(char *) and friends. */
 #undef HAVE_GCC__SYNC_CHAR_TAS
 
diff --git a/src/include/port/atomics/arch-x86.h b/src/include/port/atomics/arch-x86.h
index bb84b9bad8..8fce96228b 100644
--- a/src/include/port/atomics/arch-x86.h
+++ b/src/include/port/atomics/arch-x86.h
@@ -184,6 +184,20 @@ pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
 	return (bool) ret;
 }
 
+#define PG_HAVE_ATOMIC_EXCHANGE_U32
+static inline uint32
+pg_atomic_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 newval)
+{
+	uint32 res;
+	__asm__ __volatile__(
+		"	lock				\n"
+		"	xchgl		%0,%1	\n"
+:		"=q" (res), "=m" (ptr->value)
+:		"0"(newval), "m" (ptr->value)
+:		"memory");
+	return res;
+}
+
 #define PG_HAVE_ATOMIC_FETCH_ADD_U32
 static inline uint32
 pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
@@ -221,6 +235,20 @@ pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
 	return (bool) ret;
 }
 
+#define PG_HAVE_ATOMIC_EXCHANGE_U64
+static inline uint64
+pg_atomic_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 newval)
+{
+	uint64 res;
+	__asm__ __volatile__(
+		"   lock				\n"
+		"   xchgq		%0,%1	\n"
+:		"=q" (res), "=m" (ptr->value)
+:		"0"(newval), "m" (ptr->value)
+:		"memory");
+	return res;
+}
+
 #define PG_HAVE_ATOMIC_FETCH_ADD_U64
 static inline uint64
 pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
diff --git a/src/include/port/atomics/generic-gcc.h b/src/include/port/atomics/generic-gcc.h
index da04e9f0dc..c35bbdc399 100644
--- a/src/include/port/atomics/generic-gcc.h
+++ b/src/include/port/atomics/generic-gcc.h
@@ -176,6 +176,20 @@ pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
 }
 #endif
 
+/*
+ * __sync_lock_test_and_set() only supports setting the value to 1 on some
+ * platforms, so we only provide an __atomic implementation for
+ * pg_atomic_exchange.
+ */
+#if !defined(PG_HAVE_ATOMIC_EXCHANGE_U32) && defined(HAVE_GCC__ATOMIC_INT32_XCHG)
+#define PG_HAVE_ATOMIC_EXCHANGE_U32
+static inline uint32
+pg_atomic_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 newval)
+{
+	return __atomic_exchange_n(&ptr->value, newval, __ATOMIC_SEQ_CST);
+}
+#endif
+
 /* if we have 32-bit __sync_val_compare_and_swap, assume we have these too: */
 
 #if !defined(PG_HAVE_ATOMIC_FETCH_ADD_U32) && defined(HAVE_GCC__SYNC_INT32_CAS)
@@ -243,6 +257,20 @@ pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
 }
 #endif
 
+/*
+ * __sync_lock_test_and_set() only supports setting the value to 1 on some
+ * platforms, so we only provide an __atomic implementation for
+ * pg_atomic_exchange.
+ */
+#if !defined(PG_HAVE_ATOMIC_EXCHANGE_U64) && defined(HAVE_GCC__ATOMIC_INT64_XCHG)
+#define PG_HAVE_ATOMIC_EXCHANGE_U64
+static inline uint64
+pg_atomic_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 newval)
+{
+	return __atomic_exchange_n(&ptr->value, newval, __ATOMIC_SEQ_CST);
+}
+#endif
+
 /* if we have 64-bit __sync_val_compare_and_swap, assume we have these too: */
 
 #if !defined(PG_HAVE_ATOMIC_FETCH_ADD_U64) && defined(HAVE_GCC__SYNC_INT64_CAS)
diff --git a/src/include/port/atomics/generic-msvc.h b/src/include/port/atomics/generic-msvc.h
index 8835f4ceea..7566d6f937 100644
--- a/src/include/port/atomics/generic-msvc.h
+++ b/src/include/port/atomics/generic-msvc.h
@@ -58,6 +58,13 @@ pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
 	return ret;
 }
 
+#define PG_HAVE_ATOMIC_EXCHANGE_U32
+static inline uint32
+pg_atomic_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 newval)
+{
+	return InterlockedExchange(&ptr->value, newval);
+}
+
 #define PG_HAVE_ATOMIC_FETCH_ADD_U32
 static inline uint32
 pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
@@ -88,6 +95,16 @@ pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
 
 /* Only implemented on 64bit builds */
 #ifdef _WIN64
+
+#pragma intrinsic(_InterlockedExchange64)
+
+#define PG_HAVE_ATOMIC_EXCHANGE_U64
+static inline uint64
+pg_atomic_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 newval)
+{
+	return _InterlockedExchange64(&ptr->value, newval);
+}
+
 #pragma intrinsic(_InterlockedExchangeAdd64)
 
 #define PG_HAVE_ATOMIC_FETCH_ADD_U64
@@ -96,6 +113,7 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
 {
 	return _InterlockedExchangeAdd64(&ptr->value, add_);
 }
+
 #endif /* _WIN64 */
 
 #endif /* HAVE_ATOMICS */
diff --git a/src/include/port/atomics/generic-sunpro.h b/src/include/port/atomics/generic-sunpro.h
index 30f7d8b536..5d41b73de0 100644
--- a/src/include/port/atomics/generic-sunpro.h
+++ b/src/include/port/atomics/generic-sunpro.h
@@ -87,6 +87,13 @@ pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
 	return ret;
 }
 
+#define PG_HAVE_ATOMIC_EXCHANGE_U32
+static inline uint32
+pg_atomic_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 newval)
+{
+	return atomic_swap_32(&ptr->value, newval);
+}
+
 #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
 static inline bool
 pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
@@ -101,6 +108,13 @@ pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
 	return ret;
 }
 
+#define PG_HAVE_ATOMIC_EXCHANGE_U64
+static inline uint64
+pg_atomic_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 newval)
+{
+	return atomic_swap_64(&ptr->value, newval);
+}
+
 #endif /* HAVE_ATOMIC_H */
 
 #endif /* defined(HAVE_ATOMICS) */
diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm
index a980b51f5d..2d824b201e 100644
--- a/src/tools/msvc/Solution.pm
+++ b/src/tools/msvc/Solution.pm
@@ -249,7 +249,9 @@ sub GenerateFiles
 		HAVE_EXPLICIT_BZERO => undef,
 		HAVE_FSEEKO => 1,
 		HAVE_GCC__ATOMIC_INT32_CAS => undef,
+		HAVE_GCC__ATOMIC_INT32_XCHG => undef,
 		HAVE_GCC__ATOMIC_INT64_CAS => undef,
+		HAVE_GCC__ATOMIC_INT64_XCHG => undef,
 		HAVE_GCC__SYNC_CHAR_TAS => undef,
 		HAVE_GCC__SYNC_INT32_CAS => undef,
 		HAVE_GCC__SYNC_INT32_TAS => undef,
-- 
2.25.1

Reply via email to