From e04836e0e1c822f586778d489c8b7ea6708feec5 Mon Sep 17 00:00:00 2001
From: Matthias van de Meent <boekewurm+postgres@gmail.com>
Date: Thu, 12 Sep 2024 15:27:02 +0100
Subject: [PATCH v1 2/2] nbtree: add tracking of processing responsibilities in
 BTPSD

By tracking which proc is responsible for moving the state forward, we can
make assertions about the scan moving forward, and also assign blame to a
specific backend when we still get stuck.
---
 src/backend/access/nbtree/nbtree.c | 36 ++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 2b553d1161..0324860451 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -72,6 +72,10 @@ typedef struct BTParallelScanDescData
 									 * possible states of parallel scan. */
 	slock_t		btps_mutex;		/* protects above variables, btps_arrElems */
 	ConditionVariable btps_cv;	/* used to synchronize parallel scan */
+#ifdef USE_ASSERT_CHECKING
+	ProcNumber	btps_procnumber;	/* procnumber of backend currently
+									 * advancing the scan */
+#endif
 
 	/*
 	 * btps_arrElems is used when scans need to schedule another primitive
@@ -550,6 +554,9 @@ btinitparallelscan(void *target)
 	SpinLockInit(&bt_target->btps_mutex);
 	bt_target->btps_scanPage = InvalidBlockNumber;
 	bt_target->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED;
+#if USE_ASSERT_CHECKING
+	bt_target->btps_procnumber = INVALID_PROC_NUMBER;
+#endif
 	ConditionVariableInit(&bt_target->btps_cv);
 }
 
@@ -575,6 +582,9 @@ btparallelrescan(IndexScanDesc scan)
 	SpinLockAcquire(&btscan->btps_mutex);
 	btscan->btps_scanPage = InvalidBlockNumber;
 	btscan->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED;
+#if USE_ASSERT_CHECKING
+	btscan->btps_procnumber = INVALID_PROC_NUMBER;
+#endif
 	SpinLockRelease(&btscan->btps_mutex);
 }
 
@@ -642,6 +652,9 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno, bool first)
 
 	while (1)
 	{
+#ifdef USE_ASSERT_CHECKING
+		ProcNumber	waitingFor;
+#endif
 		SpinLockAcquire(&btscan->btps_mutex);
 
 		if (btscan->btps_pageStatus == BTPARALLEL_DONE)
@@ -674,6 +687,9 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno, bool first)
 				so->scanBehind = false;
 				*pageno = InvalidBlockNumber;
 				exit_loop = true;
+#ifdef USE_ASSERT_CHECKING
+				btscan->btps_procnumber = MyProcNumber;
+#endif
 			}
 			else
 			{
@@ -690,12 +706,20 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno, bool first)
 			 * of advancing it to a new page!
 			 */
 			btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
+#ifdef USE_ASSERT_CHECKING
+			btscan->btps_procnumber = MyProcNumber;
+#endif
 			*pageno = btscan->btps_scanPage;
 			exit_loop = true;
 		}
+#ifdef USE_ASSERT_CHECKING
+		waitingFor = btscan->btps_procnumber;
+#endif
 		SpinLockRelease(&btscan->btps_mutex);
 		if (exit_loop || !status)
 			break;
+
+		Assert(waitingFor != MyProcNumber && waitingFor != INVALID_PROC_NUMBER);
 		ConditionVariableSleep(&btscan->btps_cv, WAIT_EVENT_BTREE_PAGE);
 	}
 	ConditionVariableCancelSleep();
@@ -726,6 +750,10 @@ _bt_parallel_release(IndexScanDesc scan, BlockNumber scan_page)
 	SpinLockAcquire(&btscan->btps_mutex);
 	btscan->btps_scanPage = scan_page;
 	btscan->btps_pageStatus = BTPARALLEL_IDLE;
+#if USE_ASSERT_CHECKING
+	Assert(btscan->btps_procnumber == MyProcNumber);
+	btscan->btps_procnumber = INVALID_PROC_NUMBER;
+#endif
 	SpinLockRelease(&btscan->btps_mutex);
 	ConditionVariableSignal(&btscan->btps_cv);
 }
@@ -758,6 +786,11 @@ _bt_parallel_done(IndexScanDesc scan)
 	SpinLockAcquire(&btscan->btps_mutex);
 	if (btscan->btps_pageStatus != BTPARALLEL_DONE)
 	{
+#if USE_ASSERT_CHECKING
+		Assert(btscan->btps_procnumber == MyProcNumber);
+		btscan->btps_procnumber = INVALID_PROC_NUMBER;
+#endif
+
 		btscan->btps_pageStatus = BTPARALLEL_DONE;
 		status_changed = true;
 	}
@@ -792,6 +825,9 @@ _bt_parallel_primscan_schedule(IndexScanDesc scan, BlockNumber prev_scan_page)
 	if (btscan->btps_scanPage == prev_scan_page &&
 		btscan->btps_pageStatus == BTPARALLEL_IDLE)
 	{
+#ifdef USE_ASSERT_CHECKING
+		Assert(btscan->btps_procnumber == INVALID_PROC_NUMBER);
+#endif
 		btscan->btps_scanPage = InvalidBlockNumber;
 		btscan->btps_pageStatus = BTPARALLEL_NEED_PRIMSCAN;
 
-- 
2.46.0

