Hi,

> On 2024-01-05 14:19:23 -0500, Robert Haas wrote:
>> On Fri, Jan 5, 2024 at 2:11 PM Andres Freund <and...@anarazel.de> wrote:
>> > I see it fairly regularly. Including finding several related bugs that 
>> > lead to
>> > stuck systems last year (signal handlers are a menace).
>> 
>> In that case, I think this proposal is dead. I can't personally
>> testify to this code being a force for good, but it sounds like you
>> can. So be it!
>
> I think the proposal to make it a WARNING shouldn't go anywhere,

OK, I give up the WARNING method as well.

> but I think
> there are several improvements that could come out of this discussion:
>
> - assertion checks against doing dangerous stuff
> - make the stuck lock message more informative, e.g. by including the length
>   of time the lock was stuck for

Could you check the attached to see if it is something similar in your
mind?

commit e264da3050285cffd4885637ee97b2326d2f3938 SHOULD **NOT** BE COMMITTED.
Author: yizhi.fzh <yizhi....@alibaba-inc.com>
Date:   Sun Jan 7 15:06:14 2024 +0800

    simple code to prove previously commit works.

commit 80cf987d1abe2cdae195bd5eea520e28142885b4
Author: yizhi.fzh <yizhi....@alibaba-inc.com>
Date:   Thu Jan 4 22:19:50 2024 +0800

    Detect more misuse of spin lock automatically
    
    spin lock are intended for *very* short-term locks, but it is possible
    to be misused in many cases. e.g. Acquiring another LWLocks or regular
    locks, memory allocation. In this patch, all of such cases will be
    automatically detected in an ASSERT_CHECKING build.
    
    Signal handle should be avoided when holding a spin lock because it is
    nearly impossible to release the spin lock correctly if that happens.


Luckly after applying the patch, there is no failure when run 'make
check-world'.

> - make sure that interrupts can't trigger the stuck lock much quicker, which
>   afaict can happen today

I can't follow this, do you mind explain more about this a bit?

-- 
Best Regards
Andy Fan

>From 80cf987d1abe2cdae195bd5eea520e28142885b4 Mon Sep 17 00:00:00 2001
From: "yizhi.fzh" <yizhi....@alibaba-inc.com>
Date: Thu, 4 Jan 2024 22:19:50 +0800
Subject: [PATCH v3 1/2] Detect more misuse of spin lock automatically

spin lock are intended for *very* short-term locks, but it is possible
to be misused in many cases. e.g. Acquiring another LWLocks or regular
locks, memory allocation. In this patch, all of such cases will be
automatically detected in an ASSERT_CHECKING build.

Signal handle should be avoided when holding a spin lock because it is
nearly impossible to release the spin lock correctly if that happens.
---
 src/backend/storage/buffer/bufmgr.c |  1 +
 src/backend/storage/lmgr/lock.c     |  2 ++
 src/backend/storage/lmgr/lwlock.c   |  2 ++
 src/backend/storage/lmgr/s_lock.c   | 15 ++++++++-------
 src/backend/utils/init/globals.c    |  1 +
 src/backend/utils/mmgr/mcxt.c       |  9 +++++++++
 src/common/scram-common.c           |  1 +
 src/include/miscadmin.h             |  5 ++++-
 src/include/storage/buf_internals.h |  1 +
 src/include/storage/s_lock.h        |  2 ++
 src/include/storage/spin.h          | 24 ++++++++++++++++++++++--
 11 files changed, 53 insertions(+), 10 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 7d601bef6d..822ec77bdc 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -5419,6 +5419,7 @@ LockBufHdr(BufferDesc *desc)
 		perform_spin_delay(&delayStatus);
 	}
 	finish_spin_delay(&delayStatus);
+	START_SPIN_LOCK();
 	return old_buf_state | BM_LOCKED;
 }
 
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index c70a1adb9a..f896201244 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -776,6 +776,8 @@ LockAcquireExtended(const LOCKTAG *locktag,
 	bool		found_conflict;
 	bool		log_lock = false;
 
+	Assert(SpinLockCount == 0);
+
 	if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
 		elog(ERROR, "unrecognized lock method: %d", lockmethodid);
 	lockMethodTable = LockMethods[lockmethodid];
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index b4b989ac56..974a7c26ea 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -1205,6 +1205,8 @@ LWLockAcquire(LWLock *lock, LWLockMode mode)
 
 	Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
 
+	Assert(SpinLockCount == 0);
+
 	PRINT_LWDEBUG("LWLockAcquire", lock, mode);
 
 #ifdef LWLOCK_STATS
diff --git a/src/backend/storage/lmgr/s_lock.c b/src/backend/storage/lmgr/s_lock.c
index 0e5f7ab0b9..43a05b68be 100644
--- a/src/backend/storage/lmgr/s_lock.c
+++ b/src/backend/storage/lmgr/s_lock.c
@@ -71,18 +71,18 @@ static int	spins_per_delay = DEFAULT_SPINS_PER_DELAY;
  * s_lock_stuck() - complain about a stuck spinlock
  */
 static void
-s_lock_stuck(const char *file, int line, const char *func)
+s_lock_stuck(const char *file, int line, const char *func, uint32_t delay_ms)
 {
 	if (!func)
 		func = "(unknown)";
 #if defined(S_LOCK_TEST)
-	fprintf(stderr,
-			"\nStuck spinlock detected at %s, %s:%d.\n",
-			func, file, line);
+	fprintf(stder,
+			"\nStuck spinlock detected at %s, %s:%d. after waiting for %u ms\n",
+			func, file, line, delay_ms);
 	exit(1);
 #else
-	elog(PANIC, "stuck spinlock detected at %s, %s:%d",
-		 func, file, line);
+	elog(PANIC, "stuck spinlock detected at %s, %s:%d after waiting for %u ms",
+		 func, file, line, delay_ms);
 #endif
 }
 
@@ -132,7 +132,7 @@ perform_spin_delay(SpinDelayStatus *status)
 	if (++(status->spins) >= spins_per_delay)
 	{
 		if (++(status->delays) > NUM_DELAYS)
-			s_lock_stuck(status->file, status->line, status->func);
+			s_lock_stuck(status->file, status->line, status->func, status->delay_ms);
 
 		if (status->cur_delay == 0) /* first time to delay? */
 			status->cur_delay = MIN_DELAY_USEC;
@@ -148,6 +148,7 @@ perform_spin_delay(SpinDelayStatus *status)
 		pgstat_report_wait_start(WAIT_EVENT_SPIN_DELAY);
 		pg_usleep(status->cur_delay);
 		pgstat_report_wait_end();
+		status->delay_ms += (status->cur_delay / 1000);
 
 #if defined(S_LOCK_TEST)
 		fprintf(stdout, "*");
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index 88b03e8fa3..913f775d2c 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -40,6 +40,7 @@ volatile sig_atomic_t IdleStatsUpdateTimeoutPending = false;
 volatile uint32 InterruptHoldoffCount = 0;
 volatile uint32 QueryCancelHoldoffCount = 0;
 volatile uint32 CritSectionCount = 0;
+volatile uint32 SpinLockCount = 0;
 
 int			MyProcPid;
 pg_time_t	MyStartTime;
diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c
index 1336944084..5acb03ac60 100644
--- a/src/backend/utils/mmgr/mcxt.c
+++ b/src/backend/utils/mmgr/mcxt.c
@@ -26,6 +26,7 @@
 #include "storage/proc.h"
 #include "storage/procarray.h"
 #include "storage/procsignal.h"
+#include "storage/spin.h"
 #include "utils/fmgrprotos.h"
 #include "utils/memdebug.h"
 #include "utils/memutils.h"
@@ -1028,6 +1029,8 @@ MemoryContextAlloc(MemoryContext context, Size size)
 	if (!AllocSizeIsValid(size))
 		elog(ERROR, "invalid memory alloc request size %zu", size);
 
+	Assert(SpinLockCount == 0);
+
 	context->isReset = false;
 
 	ret = context->methods->alloc(context, size);
@@ -1071,6 +1074,8 @@ MemoryContextAllocZero(MemoryContext context, Size size)
 	if (!AllocSizeIsValid(size))
 		elog(ERROR, "invalid memory alloc request size %zu", size);
 
+	Assert(SpinLockCount == 0);
+
 	context->isReset = false;
 
 	ret = context->methods->alloc(context, size);
@@ -1197,6 +1202,8 @@ palloc(Size size)
 	if (!AllocSizeIsValid(size))
 		elog(ERROR, "invalid memory alloc request size %zu", size);
 
+	Assert(SpinLockCount == 0);
+
 	context->isReset = false;
 
 	ret = context->methods->alloc(context, size);
@@ -1228,6 +1235,8 @@ palloc0(Size size)
 	if (!AllocSizeIsValid(size))
 		elog(ERROR, "invalid memory alloc request size %zu", size);
 
+	Assert(SpinLockCount == 0);
+
 	context->isReset = false;
 
 	ret = context->methods->alloc(context, size);
diff --git a/src/common/scram-common.c b/src/common/scram-common.c
index b611bb8fe7..5728cc5acd 100644
--- a/src/common/scram-common.c
+++ b/src/common/scram-common.c
@@ -27,6 +27,7 @@
 #endif
 #include "port/pg_bswap.h"
 
+
 /*
  * Calculate SaltedPassword.
  *
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 0b01c1f093..14c3688c56 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -27,7 +27,9 @@
 
 #include "datatype/timestamp.h" /* for TimestampTz */
 #include "pgtime.h"				/* for pg_time_t */
-
+#ifndef FRONTEND
+#include "storage/spin.h"
+#endif
 
 #define InvalidPid				(-1)
 
@@ -120,6 +122,7 @@ extern void ProcessInterrupts(void);
 /* Service interrupt, if one is pending and it's safe to service it now */
 #define CHECK_FOR_INTERRUPTS() \
 do { \
+	Assert(SpinLockCount == 0);	\
 	if (INTERRUPTS_PENDING_CONDITION()) \
 		ProcessInterrupts(); \
 } while(0)
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index e43e616579..ad7e1d0a21 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -363,6 +363,7 @@ UnlockBufHdr(BufferDesc *desc, uint32 buf_state)
 {
 	pg_write_barrier();
 	pg_atomic_write_u32(&desc->state, buf_state & (~BM_LOCKED));
+	END_SPIN_LOCK();
 }
 
 /* in bufmgr.c */
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index aa06e49da2..ac875ab3dd 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -846,6 +846,7 @@ typedef struct
 	const char *file;
 	int			line;
 	const char *func;
+	uint32_t	delay_ms;
 } SpinDelayStatus;
 
 static inline void
@@ -858,6 +859,7 @@ init_spin_delay(SpinDelayStatus *status,
 	status->file = file;
 	status->line = line;
 	status->func = func;
+	status->delay_ms = 0;
 }
 
 #define init_local_spin_delay(status) init_spin_delay(status, __FILE__, __LINE__, __func__)
diff --git a/src/include/storage/spin.h b/src/include/storage/spin.h
index c0679c5999..57cf982f46 100644
--- a/src/include/storage/spin.h
+++ b/src/include/storage/spin.h
@@ -56,12 +56,32 @@
 #include "storage/pg_sema.h"
 #endif
 
+extern PGDLLIMPORT volatile uint32 SpinLockCount;
+
+#define START_SPIN_LOCK() (SpinLockCount++)
+#define END_SPIN_LOCK() \
+do { \
+	Assert(SpinLockCount > 0); \
+	SpinLockCount--; \
+} while(0)
 
 #define SpinLockInit(lock)	S_INIT_LOCK(lock)
 
-#define SpinLockAcquire(lock) S_LOCK(lock)
+#define SpinLockAcquire(lock) \
+	do \
+	{ \
+		S_LOCK(lock); \
+		START_SPIN_LOCK(); \
+	} while (false);
+
+
+#define SpinLockRelease(lock) \
+	do \
+	{ \
+		S_UNLOCK(lock); \
+		END_SPIN_LOCK(); \
+	} while (false);
 
-#define SpinLockRelease(lock) S_UNLOCK(lock)
 
 #define SpinLockFree(lock)	S_LOCK_FREE(lock)
 
-- 
2.34.1

>From e264da3050285cffd4885637ee97b2326d2f3938 Mon Sep 17 00:00:00 2001
From: "yizhi.fzh" <yizhi....@alibaba-inc.com>
Date: Sun, 7 Jan 2024 15:06:14 +0800
Subject: [PATCH v3 2/2] simple code to prove previously commit works.

---
 src/backend/storage/buffer/bufmgr.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 822ec77bdc..769a5a416e 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -1391,6 +1391,15 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	 */
 	victim_buf_state = LockBufHdr(victim_buf_hdr);
 
+	CHECK_FOR_INTERRUPTS();
+	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+	{
+		void	   *p = palloc(30);
+
+		(void) p;
+	}
+	LockRelationOid(1259, RowShareLock);
+
 	/* some sanity checks while we hold the buffer header lock */
 	Assert(BUF_STATE_GET_REFCOUNT(victim_buf_state) == 1);
 	Assert(!(victim_buf_state & (BM_TAG_VALID | BM_VALID | BM_DIRTY | BM_IO_IN_PROGRESS)));
-- 
2.34.1

Reply via email to