Hi everyone, thanks for your comments.
I've just wanted to share little cosmetic
fixes for previous patch.

Best Regards,
Maksim Melnikov
From 748c5fe8efd4c6365f380f7f9bbeba5e91887ab4 Mon Sep 17 00:00:00 2001
From: Maksim Melnikov <m.melni...@postgrespro.ru>
Date: Wed, 26 Mar 2025 16:44:09 +0300
Subject: [PATCH] Fix sync_standbys_defined race on startup.

WalSndCtl->sync_standbys_defined, can be updated
by checkpointer process only and in backends it
can be read. So, on postgres startup, we have race
between checkpointer process and backends, that check
sync_standbys_defined for sync replications.

To avoid this, now we are working with sync_standbys_defined
not as bool field, but as bitmask, where 7th bit check
standbys_defined or not, 8th bit check
WalSndCtlData->sync_standbys_defined field has been
initialized by checkpointer. So if in synchronous replication functions
was detected that sync_standbys_defined is not initialized,
check SyncStandbysDefined() value.
---
 src/backend/replication/syncrep.c           | 31 +++++++++++++++++----
 src/include/replication/walsender_private.h | 18 +++++++++++-
 2 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/src/backend/replication/syncrep.c b/src/backend/replication/syncrep.c
index d75e3968035..60b9507aa41 100644
--- a/src/backend/replication/syncrep.c
+++ b/src/backend/replication/syncrep.c
@@ -148,6 +148,7 @@ void
 SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
 {
 	int			mode;
+	bool		sync_standbys_defined;
 
 	/*
 	 * This should be called while holding interrupts during a transaction
@@ -170,7 +171,7 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
 	 * it's false, the lock is not necessary because we don't touch the queue.
 	 */
 	if (!SyncRepRequested() ||
-		!((volatile WalSndCtlData *) WalSndCtl)->sync_standbys_defined)
+		IS_SYNCSTANDBYS_INITIALIZED_ONLY(((volatile WalSndCtlData *) WalSndCtl)->sync_standbys_defined))
 		return;
 
 	/* Cap the level for anything other than commit to remote flush only. */
@@ -184,6 +185,8 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
 
 	LWLockAcquire(SyncRepLock, LW_EXCLUSIVE);
 	Assert(MyProc->syncRepState == SYNC_REP_NOT_WAITING);
+	sync_standbys_defined = WalSndCtl->sync_standbys_defined & SYNCSTANDBYSINITIALIZED
+		? (WalSndCtl->sync_standbys_defined & SYNCSTANDBYSDEFINED) : SyncStandbysDefined();
 
 	/*
 	 * We don't wait for sync rep if WalSndCtl->sync_standbys_defined is not
@@ -193,7 +196,7 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
 	 * condition but we'll be fetching that cache line anyway so it's likely
 	 * to be a low cost check.
 	 */
-	if (!WalSndCtl->sync_standbys_defined ||
+	if (!sync_standbys_defined ||
 		lsn <= WalSndCtl->lsn[mode])
 	{
 		LWLockRelease(SyncRepLock);
@@ -920,9 +923,10 @@ SyncRepWakeQueue(bool all, int mode)
 void
 SyncRepUpdateSyncStandbysDefined(void)
 {
-	bool		sync_standbys_defined = SyncStandbysDefined();
+	bool		new_sync_standbys_defined = SyncStandbysDefined();
+	bool		old_sync_standbys_defined = (WalSndCtl->sync_standbys_defined & SYNCSTANDBYSDEFINED);
 
-	if (sync_standbys_defined != WalSndCtl->sync_standbys_defined)
+	if (new_sync_standbys_defined != old_sync_standbys_defined)
 	{
 		LWLockAcquire(SyncRepLock, LW_EXCLUSIVE);
 
@@ -931,7 +935,7 @@ SyncRepUpdateSyncStandbysDefined(void)
 		 * for backends to continue waiting.  Since the user no longer wants
 		 * synchronous replication, we'd better wake them up.
 		 */
-		if (!sync_standbys_defined)
+		if (!new_sync_standbys_defined)
 		{
 			int			i;
 
@@ -946,8 +950,23 @@ SyncRepUpdateSyncStandbysDefined(void)
 		 * backend that hasn't yet reloaded its config might go to sleep on
 		 * the queue (and never wake up).  This prevents that.
 		 */
-		WalSndCtl->sync_standbys_defined = sync_standbys_defined;
 
+		WalSndCtl->sync_standbys_defined = new_sync_standbys_defined
+			? SET_SYNCSTANDBYS_DEFINED(WalSndCtl->sync_standbys_defined)
+			: RESET_SYNCSTANDBYS_DEFINED(WalSndCtl->sync_standbys_defined);
+
+		LWLockRelease(SyncRepLock);
+	}
+	else if (!(WalSndCtl->sync_standbys_defined & SYNCSTANDBYSINITIALIZED))
+	{
+		/*
+		 * Even if new_sync_standbys_defined == old_sync_standbys_defined, we
+		 * should check WalSndCtl->sync_standbys_defined is initialized and
+		 * set its initialize bit, if it reset. Anyway this bit should be set
+		 * only on start and then it will be always setted.
+		 */
+		LWLockAcquire(SyncRepLock, LW_EXCLUSIVE);
+		SET_SYNCSTANDBYS_INITIALIZED(WalSndCtl->sync_standbys_defined);
 		LWLockRelease(SyncRepLock);
 	}
 }
diff --git a/src/include/replication/walsender_private.h b/src/include/replication/walsender_private.h
index 0fc77f1b4af..dd931b81e4f 100644
--- a/src/include/replication/walsender_private.h
+++ b/src/include/replication/walsender_private.h
@@ -100,7 +100,7 @@ typedef struct
 	 * config file safely, so checkpointer updates this value as needed.
 	 * Protected by SyncRepLock.
 	 */
-	bool		sync_standbys_defined;
+	uint8		sync_standbys_defined;
 
 	/* used as a registry of physical / logical walsenders to wake */
 	ConditionVariable wal_flush_cv;
@@ -116,6 +116,22 @@ typedef struct
 	WalSnd		walsnds[FLEXIBLE_ARRAY_MEMBER];
 } WalSndCtlData;
 
+/*
+ * WalSndCtlData->sync_standbys_defined bit flags. 7th bit check standbys_defined or not,
+ * 8th bit check WalSndCtlData->sync_standbys_defined field has been initialized.
+ */
+#define SYNCSTANDBYSINITIALIZED	(1 << 0)
+#define SYNCSTANDBYSDEFINED		(1 << 1)
+
+#define SET_SYNCSTANDBYS_INITIALIZED(sync_standbys)	\
+	(sync_standbys | SYNCSTANDBYSINITIALIZED)
+#define SET_SYNCSTANDBYS_DEFINED(sync_standbys)	\
+	(sync_standbys | SYNCSTANDBYSINITIALIZED | SYNCSTANDBYSDEFINED)
+#define RESET_SYNCSTANDBYS_DEFINED(sync_standbys)	\
+	(SET_SYNCSTANDBYS_INITIALIZED(sync_standbys) & (~SYNCSTANDBYSDEFINED))
+#define IS_SYNCSTANDBYS_INITIALIZED_ONLY(sync_standbys)	\
+	((sync_standbys & (SYNCSTANDBYSINITIALIZED | SYNCSTANDBYSDEFINED)) == SYNCSTANDBYSINITIALIZED)
+
 extern PGDLLIMPORT WalSndCtlData *WalSndCtl;
 
 
-- 
2.43.0

Reply via email to