Sorry, brown paper bag bug there.  Here's the correct one.

-- 
Álvaro Herrera               48°01'N 7°57'E  —  https://www.EnterpriseDB.com/
"I can't go to a restaurant and order food because I keep looking at the
fonts on the menu.  Five minutes later I realize that it's also talking
about food" (Donald Knuth)
>From 99cadfdf7475146953e9846c20c4a708a3527937 Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvhe...@alvh.no-ip.org>
Date: Wed, 31 Jan 2024 12:27:51 +0100
Subject: [PATCH v3] Use atomics for SlruSharedData->latest_page_number

---
 src/backend/access/transam/clog.c      |  6 +---
 src/backend/access/transam/commit_ts.c |  7 ++---
 src/backend/access/transam/multixact.c | 28 +++++++++++-------
 src/backend/access/transam/slru.c      | 40 +++++++++++++++++++-------
 src/include/access/slru.h              |  5 +++-
 5 files changed, 54 insertions(+), 32 deletions(-)

diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index f6e7da7ffc..06fc2989ba 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -766,14 +766,10 @@ StartupCLOG(void)
 	TransactionId xid = XidFromFullTransactionId(TransamVariables->nextXid);
 	int64		pageno = TransactionIdToPage(xid);
 
-	LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
-
 	/*
 	 * Initialize our idea of the latest page number.
 	 */
-	XactCtl->shared->latest_page_number = pageno;
-
-	LWLockRelease(XactSLRULock);
+	pg_atomic_write_u64(&XactCtl->shared->latest_page_number, pageno);
 }
 
 /*
diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c
index 61b82385f3..6bfe60343e 100644
--- a/src/backend/access/transam/commit_ts.c
+++ b/src/backend/access/transam/commit_ts.c
@@ -689,9 +689,7 @@ ActivateCommitTs(void)
 	/*
 	 * Re-Initialize our idea of the latest page number.
 	 */
-	LWLockAcquire(CommitTsSLRULock, LW_EXCLUSIVE);
-	CommitTsCtl->shared->latest_page_number = pageno;
-	LWLockRelease(CommitTsSLRULock);
+	pg_atomic_write_u64(&CommitTsCtl->shared->latest_page_number, pageno);
 
 	/*
 	 * If CommitTs is enabled, but it wasn't in the previous server run, we
@@ -1006,7 +1004,8 @@ commit_ts_redo(XLogReaderState *record)
 		 * During XLOG replay, latest_page_number isn't set up yet; insert a
 		 * suitable value to bypass the sanity test in SimpleLruTruncate.
 		 */
-		CommitTsCtl->shared->latest_page_number = trunc->pageno;
+		pg_atomic_write_u64(&CommitTsCtl->shared->latest_page_number,
+							trunc->pageno);
 
 		SimpleLruTruncate(CommitTsCtl, trunc->pageno);
 	}
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 59523be901..febc429f72 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -2017,13 +2017,15 @@ StartupMultiXact(void)
 	 * Initialize offset's idea of the latest page number.
 	 */
 	pageno = MultiXactIdToOffsetPage(multi);
-	MultiXactOffsetCtl->shared->latest_page_number = pageno;
+	pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
+						pageno);
 
 	/*
 	 * Initialize member's idea of the latest page number.
 	 */
 	pageno = MXOffsetToMemberPage(offset);
-	MultiXactMemberCtl->shared->latest_page_number = pageno;
+	pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number,
+						pageno);
 }
 
 /*
@@ -2047,14 +2049,15 @@ TrimMultiXact(void)
 	oldestMXactDB = MultiXactState->oldestMultiXactDB;
 	LWLockRelease(MultiXactGenLock);
 
-	/* Clean up offsets state */
-	LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE);
-
 	/*
 	 * (Re-)Initialize our idea of the latest page number for offsets.
 	 */
 	pageno = MultiXactIdToOffsetPage(nextMXact);
-	MultiXactOffsetCtl->shared->latest_page_number = pageno;
+	pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
+						pageno);
+
+	/* Clean up offsets state */
+	LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE);
 
 	/*
 	 * Zero out the remainder of the current offsets page.  See notes in
@@ -2081,14 +2084,16 @@ TrimMultiXact(void)
 
 	LWLockRelease(MultiXactOffsetSLRULock);
 
-	/* And the same for members */
-	LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE);
-
 	/*
+	 * And the same for members.
+	 *
 	 * (Re-)Initialize our idea of the latest page number for members.
 	 */
 	pageno = MXOffsetToMemberPage(offset);
-	MultiXactMemberCtl->shared->latest_page_number = pageno;
+	pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number,
+						pageno);
+
+	LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE);
 
 	/*
 	 * Zero out the remainder of the current members page.  See notes in
@@ -3333,7 +3338,8 @@ multixact_redo(XLogReaderState *record)
 		 * SimpleLruTruncate.
 		 */
 		pageno = MultiXactIdToOffsetPage(xlrec.endTruncOff);
-		MultiXactOffsetCtl->shared->latest_page_number = pageno;
+		pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
+							pageno);
 		PerformOffsetsTruncation(xlrec.startTruncOff, xlrec.endTruncOff);
 
 		LWLockRelease(MultiXactTruncationLock);
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index 9ac4790f16..c1d0dfc73b 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -17,7 +17,8 @@
  * per-buffer LWLocks that synchronize I/O for each buffer.  The control lock
  * must be held to examine or modify any shared state.  A process that is
  * reading in or writing out a page buffer does not hold the control lock,
- * only the per-buffer lock for the buffer it is working on.
+ * only the per-buffer lock for the buffer it is working on.  One exception
+ * is latest_page_number, which is read and written using atomic ops.
  *
  * "Holding the control lock" means exclusive lock in all cases except for
  * SimpleLruReadPage_ReadOnly(); see comments for SlruRecentlyUsed() for
@@ -239,8 +240,7 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
 		shared->lsn_groups_per_page = nlsns;
 
 		shared->cur_lru_count = 0;
-
-		/* shared->latest_page_number will be set later */
+		pg_atomic_write_u64(&shared->latest_page_number, 0);
 
 		shared->slru_stats_idx = pgstat_get_slru_index(name);
 
@@ -329,8 +329,16 @@ SimpleLruZeroPage(SlruCtl ctl, int64 pageno)
 	/* Set the LSNs for this new page to zero */
 	SimpleLruZeroLSNs(ctl, slotno);
 
-	/* Assume this page is now the latest active page */
-	shared->latest_page_number = pageno;
+	/*
+	 * Assume this page is now the latest active page.
+	 *
+	 * Note that because both this routine and SlruSelectLRUPage run with
+	 * a bank lock held, it is not possible for this to be zeroing a page
+	 * that SlruSelectLRUPage is going to evict simultaneously -- they would
+	 * both have to hold the same bank lock!  Therefore, there's no memory
+	 * barrier here.
+	 */
+	pg_atomic_write_u64(&shared->latest_page_number, pageno);
 
 	/* update the stats counter of zeroed pages */
 	pgstat_count_slru_page_zeroed(shared->slru_stats_idx);
@@ -1113,9 +1121,17 @@ SlruSelectLRUPage(SlruCtl ctl, int64 pageno)
 				shared->page_lru_count[slotno] = cur_count;
 				this_delta = 0;
 			}
+
+			/*
+			 * If this page is the one most recently zeroed, don't consider it
+			 * an eviction candidate. See comments in SimpleLruZeroPage for an
+			 * explanation about the lack of a memory barrier here.
+			 */
 			this_page_number = shared->page_number[slotno];
-			if (this_page_number == shared->latest_page_number)
+			if (this_page_number ==
+				pg_atomic_read_u64(&shared->latest_page_number))
 				continue;
+
 			if (shared->page_status[slotno] == SLRU_PAGE_VALID)
 			{
 				if (this_delta > best_valid_delta ||
@@ -1254,7 +1270,6 @@ void
 SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage)
 {
 	SlruShared	shared = ctl->shared;
-	int			slotno;
 
 	/* update the stats counter of truncates */
 	pgstat_count_slru_truncate(shared->slru_stats_idx);
@@ -1270,10 +1285,13 @@ SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage)
 restart:
 
 	/*
-	 * While we are holding the lock, make an important safety check: the
-	 * current endpoint page must not be eligible for removal.
+	 * An important safety check: the current endpoint page must not be
+	 * eligible for removal.  Like SlruSelectLRUPage, we don't need a
+	 * memory barrier here because for the affected page to be relevant,
+	 * we'd have to have the same bank lock as SimpleLruZeroPage.
 	 */
-	if (ctl->PagePrecedes(shared->latest_page_number, cutoffPage))
+	if (ctl->PagePrecedes(pg_atomic_read_u64(&shared->latest_page_number),
+						  cutoffPage))
 	{
 		LWLockRelease(shared->ControlLock);
 		ereport(LOG,
@@ -1282,7 +1300,7 @@ restart:
 		return;
 	}
 
-	for (slotno = 0; slotno < shared->num_slots; slotno++)
+	for (int slotno = 0; slotno < shared->num_slots; slotno++)
 	{
 		if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
 			continue;
diff --git a/src/include/access/slru.h b/src/include/access/slru.h
index b05f6bc71d..2109488654 100644
--- a/src/include/access/slru.h
+++ b/src/include/access/slru.h
@@ -49,6 +49,9 @@ typedef enum
 
 /*
  * Shared-memory state
+ *
+ * ControlLock is used to protect access to the other fields, except
+ * latest_page_number, which uses atomics; see comment in slru.c.
  */
 typedef struct SlruSharedData
 {
@@ -95,7 +98,7 @@ typedef struct SlruSharedData
 	 * this is not critical data, since we use it only to avoid swapping out
 	 * the latest page.
 	 */
-	int64		latest_page_number;
+	pg_atomic_uint64 latest_page_number;
 
 	/* SLRU's index for statistics purposes (might not be unique) */
 	int			slru_stats_idx;
-- 
2.39.2

Reply via email to