From cd90ff97b12d3c2e74da6cfa4b0b8939c6f6dbb6 Mon Sep 17 00:00:00 2001
From: alterego665 <824662526@qq.com>
Date: Sun, 8 Jun 2025 20:28:17 +0800
Subject: [PATCH] Add progressive backoff to XactLockTableWait functions

XactLockTableWait() and ConditionalXactLockTableWait() currently use
a fixed 1ms sleep when waiting for transaction completion. In logical
replication scenarios, particularly during CREATE REPLICATION SLOT,
these functions may wait for very long periods (minutes to hours) for
old transactions to complete, leading to excessive CPU usage due to
frequent polling.

This patch implements progressive backoff: keep sleeping for 1ms until
total sleep time reaches 10 seconds, then start doubling the sleep duration
each cycle, up to a maximum of 10 seconds per sleep. This balances
responsiveness for normal operations (which typically complete within seconds)
against CPU efficiency for long waits common in logical replication scenarios.
---
 src/backend/storage/lmgr/lmgr.c | 48 ++++++++++++++++++++++++++++++---
 1 file changed, 44 insertions(+), 4 deletions(-)

diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c
index 3f6bf70bd3c..495fa607932 100644
--- a/src/backend/storage/lmgr/lmgr.c
+++ b/src/backend/storage/lmgr/lmgr.c
@@ -667,6 +667,13 @@ XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid,
 	XactLockTableWaitInfo info;
 	ErrorContextCallback callback;
 	bool		first = true;
+	long		total_sleep_us = 0;
+	long		sleep_us = 1000;		/* Start with 1ms */
+	bool		do_backoff = false;
+
+	/* Progressive backoff threshold */
+	const long	backoff_threshold_us = 10 * USECS_PER_SEC;	/* 10 seconds */
+	const long	max_sleep_us = 10 * USECS_PER_SEC;			/* 10 seconds */
 
 	/*
 	 * If an operation is specified, set up our verbose error context
@@ -713,13 +720,25 @@ XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid,
 		 * as when building snapshots for logical decoding.  It is possible to
 		 * see a transaction in ProcArray before it registers itself in the
 		 * locktable.  The topmost transaction in that case is the same xid,
-		 * so we try again after a short sleep.  (Don't sleep the first time
-		 * through, to avoid slowing down the normal case.)
+		 * so we try again after a progressive sleep.  (Don't sleep the first
+		 * time through, to avoid slowing down the normal case.)
 		 */
 		if (!first)
 		{
 			CHECK_FOR_INTERRUPTS();
-			pg_usleep(1000L);
+			pg_usleep(sleep_us);
+
+			/* Track total only until we start doing backoff */
+			if (!do_backoff)
+			{
+				total_sleep_us += sleep_us;
+				if (total_sleep_us >= backoff_threshold_us)
+					do_backoff = true;
+			}
+
+			/* Exponential backoff once threshold is reached */
+			if (do_backoff && sleep_us < max_sleep_us)
+				sleep_us = Min(sleep_us * 2, max_sleep_us);
 		}
 		first = false;
 		xid = SubTransGetTopmostTransaction(xid);
@@ -734,12 +753,21 @@ XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid,
  *
  * As above, but only lock if we can get the lock without blocking.
  * Returns true if the lock was acquired.
+ *
+ * Uses the same progressive backoff as XactLockTableWait.
  */
 bool
 ConditionalXactLockTableWait(TransactionId xid, bool logLockFailure)
 {
 	LOCKTAG		tag;
 	bool		first = true;
+	long		total_sleep_us = 0;
+	long		sleep_us = 1000;		/* Start with 1ms */
+	bool		do_backoff = false;
+
+	/* Progressive backoff threshold */
+	const long	backoff_threshold_us = 10 * USECS_PER_SEC;	/* 10 seconds */
+	const long	max_sleep_us = 10 * USECS_PER_SEC;			/* 10 seconds */
 
 	for (;;)
 	{
@@ -762,7 +790,19 @@ ConditionalXactLockTableWait(TransactionId xid, bool logLockFailure)
 		if (!first)
 		{
 			CHECK_FOR_INTERRUPTS();
-			pg_usleep(1000L);
+			pg_usleep(sleep_us);
+
+			/* Track total only until we start doing backoff */
+			if (!do_backoff)
+			{
+				total_sleep_us += sleep_us;
+				if (total_sleep_us >= backoff_threshold_us)
+					do_backoff = true;
+			}
+
+			/* Exponential backoff once threshold is reached */
+			if (do_backoff && sleep_us < max_sleep_us)
+				sleep_us = Min(sleep_us * 2, max_sleep_us);
 		}
 		first = false;
 		xid = SubTransGetTopmostTransaction(xid);
-- 
2.48.1

