Hello

In Dilip's patch to improve SLRU concurrency, there's a requirement to
prevent predicate.c's SLRU control lock from being used to control
access to another shared memory structure, SerialControlData.  This
struct is used to keep track of the areas of the SLRU that are valid.
Dilip just embedded that change into the patch he submitted, but I think
the patch is actually wrong in detail, because it's leaving the
SerialAdd() function unchanged to use SerialSLRULock, which is wrong
because that function depends heavily on the contents of
SerialControlData, and it also modifies it.  So if run concurrently with
the other functions that are using SerialControlLock, the result would
make no sense.

I came up with the attached.  If there are no objections, I'll be
pushing this shortly.

-- 
Álvaro Herrera        Breisgau, Deutschland  —  https://www.EnterpriseDB.com/
"The Gord often wonders why people threaten never to come back after they've
been told never to return" (www.actsofgord.com)
>From 045cf1a6a134dadf2d7d9a9d20577a83ba0fd966 Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvhe...@alvh.no-ip.org>
Date: Mon, 29 Jan 2024 12:46:50 +0100
Subject: [PATCH v1] Split use of SerialSLRULock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

predicate.c has been using SerialSLRULock (the control lock for its SLRU
structure) to coordinate access to SerialControl, its other shared
memory structure, but this is unnecessary and confuses further changes
to SLRU locking.  Split it out.

Extracted from a larger patch from the same author, and some additional
changes by Álvaro.

Author: Dilip Kumar <dilip.ku...@enterprisedb.com>
Discussion: https://postgr.es/m/CAFiTN-vzDvNz=exgxz6gdyjtzgixksqs0mkhmmaq8sosefz...@mail.gmail.com
---
 src/backend/storage/lmgr/lwlocknames.txt      |  1 +
 src/backend/storage/lmgr/predicate.c          | 39 +++++++++++++------
 .../utils/activity/wait_event_names.txt       |  1 +
 3 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/src/backend/storage/lmgr/lwlocknames.txt b/src/backend/storage/lmgr/lwlocknames.txt
index a0163b2187..3d59d3646e 100644
--- a/src/backend/storage/lmgr/lwlocknames.txt
+++ b/src/backend/storage/lmgr/lwlocknames.txt
@@ -57,3 +57,4 @@ WaitEventExtensionLock				48
 WALSummarizerLock					49
 DSMRegistryLock						50
 InjectionPointLock				51
+SerialControlLock					52
diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c
index ee5ea1175c..13bc4cbb5e 100644
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -85,6 +85,9 @@
  * memory objects must be taken in this order, and should be released in
  * reverse order:
  *
+ *	SerialControlLock
+ *		- Protects SerialControlData members
+ *
  *	SerializableFinishedListLock
  *		- Protects the list of transactions which have completed but which
  *			may yet matter because they overlap still-active transactions.
@@ -828,9 +831,11 @@ SerialInit(void)
 		/*
 		 * Set control information to reflect empty SLRU.
 		 */
+		LWLockAcquire(SerialControlLock, LW_EXCLUSIVE);
 		serialControl->headPage = -1;
 		serialControl->headXid = InvalidTransactionId;
 		serialControl->tailXid = InvalidTransactionId;
+		LWLockRelease(SerialControlLock);
 	}
 }
 
@@ -852,7 +857,12 @@ SerialAdd(TransactionId xid, SerCommitSeqNo minConflictCommitSeqNo)
 
 	targetPage = SerialPage(xid);
 
-	LWLockAcquire(SerialSLRULock, LW_EXCLUSIVE);
+	/*
+	 * In this routine, we must hold both SerialControlLock and SerialSLRULock
+	 * simultaneously while making the SLRU data catch up with the new state
+	 * that we determine.
+	 */
+	LWLockAcquire(SerialControlLock, LW_EXCLUSIVE);
 
 	/*
 	 * If no serializable transactions are active, there shouldn't be anything
@@ -886,6 +896,8 @@ SerialAdd(TransactionId xid, SerCommitSeqNo minConflictCommitSeqNo)
 	if (isNewPage)
 		serialControl->headPage = targetPage;
 
+	LWLockAcquire(SerialSLRULock, LW_EXCLUSIVE);
+
 	if (isNewPage)
 	{
 		/* Initialize intervening pages. */
@@ -903,6 +915,7 @@ SerialAdd(TransactionId xid, SerCommitSeqNo minConflictCommitSeqNo)
 	SerialSlruCtl->shared->page_dirty[slotno] = true;
 
 	LWLockRelease(SerialSLRULock);
+	LWLockRelease(SerialControlLock);
 }
 
 /*
@@ -920,10 +933,10 @@ SerialGetMinConflictCommitSeqNo(TransactionId xid)
 
 	Assert(TransactionIdIsValid(xid));
 
-	LWLockAcquire(SerialSLRULock, LW_SHARED);
+	LWLockAcquire(SerialControlLock, LW_SHARED);
 	headXid = serialControl->headXid;
 	tailXid = serialControl->tailXid;
-	LWLockRelease(SerialSLRULock);
+	LWLockRelease(SerialControlLock);
 
 	if (!TransactionIdIsValid(headXid))
 		return 0;
@@ -954,7 +967,7 @@ SerialGetMinConflictCommitSeqNo(TransactionId xid)
 static void
 SerialSetActiveSerXmin(TransactionId xid)
 {
-	LWLockAcquire(SerialSLRULock, LW_EXCLUSIVE);
+	LWLockAcquire(SerialControlLock, LW_EXCLUSIVE);
 
 	/*
 	 * When no sxacts are active, nothing overlaps, set the xid values to
@@ -966,7 +979,7 @@ SerialSetActiveSerXmin(TransactionId xid)
 	{
 		serialControl->tailXid = InvalidTransactionId;
 		serialControl->headXid = InvalidTransactionId;
-		LWLockRelease(SerialSLRULock);
+		LWLockRelease(SerialControlLock);
 		return;
 	}
 
@@ -984,7 +997,7 @@ SerialSetActiveSerXmin(TransactionId xid)
 		{
 			serialControl->tailXid = xid;
 		}
-		LWLockRelease(SerialSLRULock);
+		LWLockRelease(SerialControlLock);
 		return;
 	}
 
@@ -993,7 +1006,7 @@ SerialSetActiveSerXmin(TransactionId xid)
 
 	serialControl->tailXid = xid;
 
-	LWLockRelease(SerialSLRULock);
+	LWLockRelease(SerialControlLock);
 }
 
 /*
@@ -1007,12 +1020,12 @@ CheckPointPredicate(void)
 {
 	int			truncateCutoffPage;
 
-	LWLockAcquire(SerialSLRULock, LW_EXCLUSIVE);
+	LWLockAcquire(SerialControlLock, LW_EXCLUSIVE);
 
 	/* Exit quickly if the SLRU is currently not in use. */
 	if (serialControl->headPage < 0)
 	{
-		LWLockRelease(SerialSLRULock);
+		LWLockRelease(SerialControlLock);
 		return;
 	}
 
@@ -1072,9 +1085,13 @@ CheckPointPredicate(void)
 		serialControl->headPage = -1;
 	}
 
-	LWLockRelease(SerialSLRULock);
+	LWLockRelease(SerialControlLock);
 
-	/* Truncate away pages that are no longer required */
+	/*
+	 * Truncate away pages that are no longer required.  Note that no
+	 * additional locking is required, because this is only called as part of
+	 * a checkpoint, and the validity limits have already been determined.
+	 */
 	SimpleLruTruncate(SerialSlruCtl, truncateCutoffPage);
 
 	/*
diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt
index a5df835dd4..cd22dca702 100644
--- a/src/backend/utils/activity/wait_event_names.txt
+++ b/src/backend/utils/activity/wait_event_names.txt
@@ -331,6 +331,7 @@ WaitEventExtension	"Waiting to read or update custom wait events information for
 WALSummarizer	"Waiting to read or update WAL summarization state."
 DSMRegistry	"Waiting to read or update the dynamic shared memory registry."
 InjectionPoint	"Waiting to read or update information related to injection points."
+SerialControl	"Waiting to read or update shared <filename>pg_serial</filename> state."
 
 #
 # END OF PREDEFINED LWLOCKS (DO NOT CHANGE THIS LINE)
-- 
2.39.2

Reply via email to