Hi, There are some similarities between this and https://www.postgresql.org/message-id/20240207184050.rkvpuudq7huijmkb%40awork3.anarazel.de as described in that email.
On 2024-01-25 15:24:17 +0800, Andy Fan wrote: > From 4c8fd0ab71299e57fbeb18dec70051bd1d035c7c Mon Sep 17 00:00:00 2001 > From: "yizhi.fzh" <yizhi....@alibaba-inc.com> > Date: Thu, 25 Jan 2024 15:19:49 +0800 > Subject: [PATCH v9 1/1] Detect misuse of spin lock automatically > > Spin lock are intended for very short-term locks, but it is possible > to be misused in many cases. e.g. Acquiring another LWLocks or regular > locks, memory allocation, errstart when holding a spin lock. this patch > would detect such misuse automatically in a USE_ASSERT_CHECKING build. > CHECK_FOR_INTERRUPTS should be avoided as well when holding a spin lock. > Depends on what signals are left to handle, PG may raise error/fatal > which would cause the code jump to some other places which is hardly to > release the spin lock anyway. > --- > src/backend/storage/buffer/bufmgr.c | 24 +++++++---- > src/backend/storage/lmgr/lock.c | 6 +++ > src/backend/storage/lmgr/lwlock.c | 21 +++++++--- > src/backend/storage/lmgr/s_lock.c | 63 ++++++++++++++++++++--------- > src/backend/tcop/postgres.c | 6 +++ > src/backend/utils/error/elog.c | 10 +++++ > src/backend/utils/mmgr/mcxt.c | 16 ++++++++ > src/include/miscadmin.h | 21 +++++++++- > src/include/storage/buf_internals.h | 1 + > src/include/storage/s_lock.h | 56 ++++++++++++++++++------- > src/tools/pgindent/typedefs.list | 2 +- > 11 files changed, 176 insertions(+), 50 deletions(-) > > diff --git a/src/backend/storage/buffer/bufmgr.c > b/src/backend/storage/buffer/bufmgr.c > index 7d601bef6d..739a94209b 100644 > --- a/src/backend/storage/buffer/bufmgr.c > +++ b/src/backend/storage/buffer/bufmgr.c > @@ -5402,12 +5402,11 @@ rlocator_comparator(const void *p1, const void *p2) > uint32 > LockBufHdr(BufferDesc *desc) > { > - SpinDelayStatus delayStatus; > uint32 old_buf_state; > > Assert(!BufferIsLocal(BufferDescriptorGetBuffer(desc))); > > - init_local_spin_delay(&delayStatus); > + init_local_spin_delay(); > > while (true) > { Hm, I think this might make this code a bit more expensive. It's cheaper, both in the number of instructions and their cost, to set variables on the stack than in global memory - and it's already performance critical code. I think we need to optimize the code so that we only do init_local_spin_delay() once we are actually spinning, rather than also on uncontended locks. > @@ -5432,20 +5431,29 @@ LockBufHdr(BufferDesc *desc) > static uint32 > WaitBufHdrUnlocked(BufferDesc *buf) > { > - SpinDelayStatus delayStatus; > uint32 buf_state; > > - init_local_spin_delay(&delayStatus); > + /* > + * Suppose the buf will not be locked for a long time, setup a spin on > + * this. > + */ > + init_local_spin_delay(); I don't know what this comment really means. > +#ifdef USE_ASSERT_CHECKING > +void > +VerifyNoSpinLocksHeld(bool check_in_panic) > +{ > + if (!check_in_panic && spinStatus.in_panic) > + return; Why do we need this? > diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h > index aa06e49da2..c3fe75a41d 100644 > --- a/src/include/storage/s_lock.h > +++ b/src/include/storage/s_lock.h > @@ -652,7 +652,10 @@ tas(volatile slock_t *lock) > */ > #if !defined(S_UNLOCK) > #define S_UNLOCK(lock) \ > - do { __asm__ __volatile__("" : : : "memory"); *(lock) = 0; } while (0) > + do { __asm__ __volatile__("" : : : "memory"); \ > + ResetSpinLockStatus(); \ > + *(lock) = 0; \ > +} while (0) > #endif That seems like the wrong place. There are other definitions of S_UNLOCK(), so we clearly can't do this here. > /* > - * Support for spin delay which is useful in various places where > - * spinlock-like procedures take place. > + * Support for spin delay and spin misuse detection purpose. > + * > + * spin delay which is useful in various places where spinlock-like > + * procedures take place. > + * > + * spin misuse is based on global spinStatus to know if a spin lock > + * is held when a heavy operation is taking. > */ > typedef struct > { > @@ -846,22 +854,40 @@ typedef struct > const char *file; > int line; > const char *func; > -} SpinDelayStatus; > + bool in_panic; /* works for spin lock misuse purpose. */ > +} SpinLockStatus; > > +extern PGDLLIMPORT SpinLockStatus spinStatus; > + > +#ifdef USE_ASSERT_CHECKING > +extern void VerifyNoSpinLocksHeld(bool check_in_panic); > +extern void ResetSpinLockStatus(void); > +#else > +#define VerifyNoSpinLocksHeld(check_in_panic) ((void) true) > +#define ResetSpinLockStatus() ((void) true) > +#endif > + > +/* > + * start the spin delay logic and record the places where the spin lock > + * is held which is also helpful for spin lock misuse detection purpose. > + * init_spin_delay should be called with ResetSpinLockStatus in pair. > + */ > static inline void > -init_spin_delay(SpinDelayStatus *status, > - const char *file, int line, const char *func) > +init_spin_delay(const char *file, int line, const char *func) > { > - status->spins = 0; > - status->delays = 0; > - status->cur_delay = 0; > - status->file = file; > - status->line = line; > - status->func = func; > + /* it is not allowed to spin another lock when holding one already. */ > + VerifyNoSpinLocksHeld(true); > + spinStatus.spins = 0; > + spinStatus.delays = 0; > + spinStatus.cur_delay = 0; > + spinStatus.file = file; > + spinStatus.line = line; > + spinStatus.func = func; > + spinStatus.in_panic = false; > } > > -#define init_local_spin_delay(status) init_spin_delay(status, __FILE__, > __LINE__, __func__) > -extern void perform_spin_delay(SpinDelayStatus *status); > -extern void finish_spin_delay(SpinDelayStatus *status); > +#define init_local_spin_delay() init_spin_delay( __FILE__, __LINE__, > __func__) > +extern void perform_spin_delay(void); > +extern void finish_spin_delay(void); As an API this doesn't quite make sense to me. For one, right now an uncontended SpinLockAcquire afaict will not trigger this mechanism, as we never call init_spin_delay(). It also adds overhead to optimized builds, as we now maintain state in global variables instead of local memory. Maybe we could have - spinlock_prepare_acquire() - about to acquire a spinlock empty in optimized builds, asserts that no other spinlock is held etc This would get called in SpinLockAcquire(), LockBufHdr() etc. - spinlock_finish_acquire() - have acquired spinlock empty in optimized builds, in assert builds sets variable indicating we're in spinlock This would get called in SpinLockRelease() etc. - spinlock_finish_release() - not holding the lock anymore This would get called by SpinLockRelease(), UnlockBufHdr() - spinlock_prepare_spin() - about to spin waiting for a spinlock like the current init_spin_delay() This would get called in s_lock(), LockBufHdr() etc. - spinlock_finish_spin() - completed waiting for a spinlock like the current finish_spin_delay() This would get called in s_lock(), LockBufHdr() etc. I don't love the spinlock_ prefix, that could end up confusing people. I toyed with "spinlike_" but am not in love either. Greetings, Andres Freund