On Tue, May 05, 2026 at 12:57:10PM -0500, Nathan Bossart wrote:
> On Tue, May 05, 2026 at 12:56:09PM -0400, Tom Lane wrote:
>> Lastly, I definitely agree now that the file's header comment needs
>> some work.  Maybe this insight helps you with that?  (One thing
>> I noticed is that the ending comment about "Equivalent OS-supplied
>> mutex routines could be used too" feels pretty obsolete.  Maybe
>> instead, "Equivalent compiler intrinsics are another popular option".)
> 
> I think it does help, thanks.  I'll give it a whirl.

Here is a first try.  While at it, I removed the note about using
"volatile" prior to v9.5, as well as the note about using TAS() in a loop.
I couldn't really understand why the latter part needs to be said out loud,
so I did some more digging.  It was added by commit 7f60b81e and refers to
an unsupported platform.  At the time, it was apparently normal to use
TAS() outside of s_lock.h, but the "NOT part of the API" note above it was
added the following year in commit 499abb0c.

Two other things I noticed after staring at s_lock.h for a while:

* If we're willing to define TAS_SPIN to first do an unlocked test
everywhere, we can simplify the ARM and x86 code.  Specifically, we can
merge the x86 blocks together, and we can remove all but the SPIN_DELAY
definition for AArch64.  We've thus far been hesistant to add the unlocked
test to platforms without evidence of improvements, but I'm a little
skeptical that folks have performance-critical workloads on architectures
that don't already have it.

* Furthermore, do we really need architecture-specific implementations of
anything except for SPIN_DELAY()?  I suspect that
__sync_lock_test_and_set() and __sync_lock_release() work pretty well most
of the time, and they've been available in gcc and clang for ~20 years.
Perhaps we could even start using the __atomic builtins if available.
We've been slowly trying to move away from spinlocks, anyway, and I'm not
seeing huge differences in generated code on https://godbolt.org/ for newer
compiler versions.

Of course, further research and benchmarking would be needed, but I figured
I'd at least jot down these thoughts.

-- 
nathan
>From a04b7aa50afe792615e5d41abb8238b14557062a Mon Sep 17 00:00:00 2001
From: Nathan Bossart <[email protected]>
Date: Mon, 4 May 2026 16:04:04 -0500
Subject: [PATCH v3 1/3] Remove fallback declaration for tas().

The last definition of tas() in s_lock.c was removed in commit
718aa43a4e, and the last tas.s file was removed in commit
25f36066dd, so this is dead code.
---
 src/include/storage/s_lock.h | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index c9e52511990..dcfec8ce2af 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -697,13 +697,6 @@ extern void s_unlock(volatile slock_t *lock);
 #define SPIN_DELAY()   ((void) 0)
 #endif  /* SPIN_DELAY */
 
-#if !defined(TAS)
-extern int     tas(volatile slock_t *lock);            /* in port/.../tas.s, or
-                                                                               
                 * s_lock.c */
-
-#define TAS(lock)              tas(lock)
-#endif  /* TAS */
-
 #if !defined(TAS_SPIN)
 #define TAS_SPIN(lock) TAS(lock)
 #endif  /* TAS_SPIN */
-- 
2.50.1 (Apple Git-155)

>From f21928086155631299334d7dbbf453ddd3da65e8 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <[email protected]>
Date: Mon, 4 May 2026 16:24:30 -0500
Subject: [PATCH v3 2/3] Remove HAS_TEST_AND_SET.

This is only set when TAS is defined, so we can just check whether
TAS is defined directly instead.
---
 src/include/storage/s_lock.h | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index dcfec8ce2af..fb872edd2f0 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -124,7 +124,6 @@
 
 
 #ifdef __i386__                /* 32-bit i386 */
-#define HAS_TEST_AND_SET
 
 typedef unsigned char slock_t;
 
@@ -194,7 +193,6 @@ spin_delay(void)
 
 
 #ifdef __x86_64__              /* AMD Opteron, Intel EM64T */
-#define HAS_TEST_AND_SET
 
 typedef unsigned char slock_t;
 
@@ -249,7 +247,6 @@ spin_delay(void)
  */
 #if defined(__arm__) || defined(__arm) || defined(__aarch64__)
 #ifdef HAVE_GCC__SYNC_INT32_TAS
-#define HAS_TEST_AND_SET
 
 #define TAS(lock) tas(lock)
 
@@ -292,7 +289,6 @@ spin_delay(void)
 
 /* S/390 and S/390x Linux (32- and 64-bit zSeries) */
 #if defined(__s390__) || defined(__s390x__)
-#define HAS_TEST_AND_SET
 
 typedef unsigned int slock_t;
 
@@ -321,7 +317,6 @@ tas(volatile slock_t *lock)
  * acquire/release semantics. The CPU will treat superfluous members as
  * NOPs, so it's just code space.
  */
-#define HAS_TEST_AND_SET
 
 typedef unsigned char slock_t;
 
@@ -392,7 +387,6 @@ do \
 
 /* PowerPC */
 #if defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || 
defined(__powerpc64__)
-#define HAS_TEST_AND_SET
 
 typedef unsigned int slock_t;
 
@@ -453,7 +447,6 @@ do \
 
 
 #if defined(__mips__) && !defined(__sgi)       /* non-SGI MIPS */
-#define HAS_TEST_AND_SET
 
 typedef unsigned int slock_t;
 
@@ -531,10 +524,9 @@ do \
  * grounds that that's known to be more likely to work in the ARM ecosystem.
  * (But we dealt with ARM above.)
  */
-#if !defined(HAS_TEST_AND_SET)
+#if !defined(TAS)
 
 #if defined(HAVE_GCC__SYNC_INT32_TAS)
-#define HAS_TEST_AND_SET
 
 #define TAS(lock) tas(lock)
 
@@ -549,7 +541,6 @@ tas(volatile slock_t *lock)
 #define S_UNLOCK(lock) __sync_lock_release(lock)
 
 #elif defined(HAVE_GCC__SYNC_CHAR_TAS)
-#define HAS_TEST_AND_SET
 
 #define TAS(lock) tas(lock)
 
@@ -565,7 +556,7 @@ tas(volatile slock_t *lock)
 
 #endif  /* HAVE_GCC__SYNC_INT32_TAS */
 
-#endif /* !defined(HAS_TEST_AND_SET) */
+#endif /* !defined(TAS) */
 
 
 /*
@@ -592,12 +583,11 @@ tas(volatile slock_t *lock)
  * ---------------------------------------------------------------------
  */
 
-#if !defined(HAS_TEST_AND_SET) /* We didn't trigger above, let's try here */
+#if !defined(TAS)      /* We didn't trigger above, let's try here */
 
 #ifdef _MSC_VER
 typedef LONG slock_t;
 
-#define HAS_TEST_AND_SET
 #define TAS(lock) (InterlockedCompareExchange(lock, 1, 0))
 
 #define SPIN_DELAY() spin_delay()
@@ -649,11 +639,11 @@ spin_delay(void)
 #endif
 
 
-#endif /* !defined(HAS_TEST_AND_SET) */
+#endif /* !defined(TAS) */
 
 
 /* Blow up if we didn't have any way to do spinlocks */
-#ifndef HAS_TEST_AND_SET
+#ifndef TAS
 #error PostgreSQL does not have spinlock support on this platform.  Please 
report this to [email protected].
 #endif
 
-- 
2.50.1 (Apple Git-155)

>From 41c899ab334d156f296343f53123152d3228a810 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <[email protected]>
Date: Thu, 7 May 2026 15:32:50 -0500
Subject: [PATCH v3 3/3] Better express platform requirements in s_lock.h.

---
 src/include/storage/s_lock.h | 56 ++++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 24 deletions(-)

diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index fb872edd2f0..c4369ee4ff6 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -44,23 +44,16 @@
  *             atomic test-and-set only when it appears free.
  *
  *     TAS() and TAS_SPIN() are NOT part of the API, and should never be called
- *     directly.
- *
- *     CAUTION: on some platforms TAS() and/or TAS_SPIN() may sometimes report
- *     failure to acquire a lock even when the lock is not locked.  For 
example,
- *     on Alpha TAS() will "fail" if interrupted.  Therefore a retry loop must
- *     always be used, even if you are certain the lock is free.
+ *     directly.  If a platform-specific TAS() is defined, the platform must
+ *     _not_ define its own S_LOCK().  Conversely, if a platform-specific
+ *     S_LOCK() is defined, the platform must _not_ define its own TAS(), but
+ *     it does need to define its own TAS_SPIN().  Currently, all supported
+ *     platforms define TAS() and use the default S_LOCK() implementation, so
+ *     that is probably a good place to start if adding a new one.
  *
  *     It is the responsibility of these macros to make sure that the compiler
  *     does not re-order accesses to shared memory to precede the actual lock
- *     acquisition, or follow the lock release.  Prior to PostgreSQL 9.5, this
- *     was the caller's responsibility, which meant that callers had to use
- *     volatile-qualified pointers to refer to both the spinlock itself and the
- *     shared data being accessed within the spinlocked critical section.  This
- *     was notationally awkward, easy to forget (and thus error-prone), and
- *     prevented some useful compiler optimizations.  For these reasons, we
- *     now require that the macros themselves prevent compiler re-ordering,
- *     so that the caller doesn't need to take special precautions.
+ *     acquisition, or follow the lock release.
  *
  *     On platforms with weak memory ordering, the TAS(), TAS_SPIN(), and
  *     S_UNLOCK() macros must further include hardware-level memory fence
@@ -72,7 +65,7 @@
  *
  *     On most supported platforms, TAS() uses a tas() function written
  *     in assembly language to execute a hardware atomic-test-and-set
- *     instruction.  Equivalent OS-supplied mutex routines could be used too.
+ *     instruction.  Equivalent compiler intrinsics are another popular option.
  *
  *
  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
@@ -642,20 +635,25 @@ spin_delay(void)
 #endif /* !defined(TAS) */
 
 
-/* Blow up if we didn't have any way to do spinlocks */
-#ifndef TAS
-#error PostgreSQL does not have spinlock support on this platform.  Please 
report this to [email protected].
-#endif
-
-
 /*
  * Default Definitions - override these above as needed.
  */
 
-#if !defined(S_LOCK)
+/*
+ * Make sure S_LOCK is defined, either explicitly for the platform or via a TAS
+ * macro for the platform.  Exactly one of either S_LOCK or TAS should be
+ * defined for a supported platform at this point in the file.
+ */
+#if defined(S_LOCK)
+#if defined(TAS)
+#error Both TAS and S_LOCK defined on this platform.  Please report this to 
[email protected].
+#endif
+#elif defined(TAS)
 #define S_LOCK(lock) \
        (TAS(lock) ? s_lock((lock), __FILE__, __LINE__, __func__) : 0)
-#endif  /* S_LOCK */
+#else
+#error Neither TAS nor S_LOCK defined on this platform.  Please report this to 
[email protected].
+#endif
 
 #if !defined(S_UNLOCK)
 /*
@@ -687,9 +685,19 @@ extern void s_unlock(volatile slock_t *lock);
 #define SPIN_DELAY()   ((void) 0)
 #endif  /* SPIN_DELAY */
 
+/*
+ * We can only define TAS_SPIN if TAS was defined.  Otherwise, the platform
+ * defined its own S_LOCK without TAS, and therefore is responsible for
+ * defining its own TAS_SPIN as well.  (Note that we currently do not have any
+ * platforms that don't define TAS.)
+ */
 #if !defined(TAS_SPIN)
+#if defined(TAS)
 #define TAS_SPIN(lock) TAS(lock)
-#endif  /* TAS_SPIN */
+#else
+#error Neither TAS nor TAS_SPIN defined on this platform.  Please report this 
to [email protected].
+#endif  /* TAS */
+#endif  /* ! TAS_SPIN */
 
 
 /*
-- 
2.50.1 (Apple Git-155)

Reply via email to