From a05d9741c3161a894decd1b88e148c4bbac60b0e Mon Sep 17 00:00:00 2001
From: Josh Curtis <jcurtis825@gmail.com>
Date: Mon, 1 Sep 2025 21:07:52 -0700
Subject: [PATCH v1] Fix race condition when reading
 `PredXact->SxactGlobalXmin`

`DropAllPredicateLocksFromTable`, `PredicateLockPageSplit`, and
`CheckTableForSerializableConflictIn` all assume that
`!TransactionIdIsValid(PredXact->SxactGlobalXmin)` implies there are
no active serializable transactions.

This assumption is not true due to a race with
`SetNewSxactGlobalXmin`, which first sets `PredXact->SxactGlobalXmin`
to `InvalidTransactionId` then iterates over the active serializable
transactions to find the new xmin. If `SetNewSxactGlobalXmin` is
interrupted before setting a new xmin and other processes check
`!TransactionIdIsValid(PredXact->SxactGlobalXmin)` without taking a
lock during this time, they will incorrectly assume it is safe to
proceed as if there are no concurrent serializable transactions.
---
 src/backend/storage/lmgr/predicate.c | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c
index c1d8511ad17..50e9f62b566 100644
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -2949,12 +2949,14 @@ DropAllPredicateLocksFromTable(Relation relation, bool transfer)
 
 	/*
 	 * Bail out quickly if there are no serializable transactions running.
-	 * It's safe to check this without taking locks because the caller is
-	 * holding an ACCESS EXCLUSIVE lock on the relation.  No new locks which
-	 * would matter here can be acquired while that is held.
 	 */
+	LWLockAcquire(SerializableXactHashLock, LW_SHARED);
 	if (!TransactionIdIsValid(PredXact->SxactGlobalXmin))
+	{
+		LWLockRelease(SerializableXactHashLock);
 		return;
+	}
+	LWLockRelease(SerializableXactHashLock);
 
 	if (!PredicateLockingNeededForRelation(relation))
 		return;
@@ -3150,16 +3152,14 @@ PredicateLockPageSplit(Relation relation, BlockNumber oldblkno,
 
 	/*
 	 * Bail out quickly if there are no serializable transactions running.
-	 *
-	 * It's safe to do this check without taking any additional locks. Even if
-	 * a serializable transaction starts concurrently, we know it can't take
-	 * any SIREAD locks on the page being split because the caller is holding
-	 * the associated buffer page lock. Memory reordering isn't an issue; the
-	 * memory barrier in the LWLock acquisition guarantees that this read
-	 * occurs while the buffer page lock is held.
 	 */
+	LWLockAcquire(SerializableXactHashLock, LW_SHARED);
 	if (!TransactionIdIsValid(PredXact->SxactGlobalXmin))
+	{
+		LWLockRelease(SerializableXactHashLock);
 		return;
+	}
+	LWLockRelease(SerializableXactHashLock);
 
 	if (!PredicateLockingNeededForRelation(relation))
 		return;
@@ -4426,12 +4426,14 @@ CheckTableForSerializableConflictIn(Relation relation)
 
 	/*
 	 * Bail out quickly if there are no serializable transactions running.
-	 * It's safe to check this without taking locks because the caller is
-	 * holding an ACCESS EXCLUSIVE lock on the relation.  No new locks which
-	 * would matter here can be acquired while that is held.
 	 */
+	LWLockAcquire(SerializableXactHashLock, LW_SHARED);
 	if (!TransactionIdIsValid(PredXact->SxactGlobalXmin))
+	{
+		LWLockRelease(SerializableXactHashLock);
 		return;
+	}
+	LWLockRelease(SerializableXactHashLock);
 
 	if (!SerializationNeededForWrite(relation))
 		return;
-- 
2.49.0

