From ce72ccafca7aad33d4a7f21aa87d555e28d8866b Mon Sep 17 00:00:00 2001
From: alterego665 <824662526@qq.com>
Date: Tue, 3 Jun 2025 09:56:56 +0800
Subject: [PATCH] Add wait event and progressive backoff to XactLockTableWait
 XactLockTableWait and ConditionalXactLockTableWait previously lacked a
 specific wait event, making backend states less transparent. Additionally,
 XactLockTableWait's use of a fixed short sleep could lead to busy-waiting
 during potentially long waits, such as those during logical slot creation on
 a standby. This commit introduces WAIT_EVENT_XACT_DONE, which both functions
 now report to pg_stat_activity during their sleep phases, enhancing
 visibility. In XactLockTableWait, when 'oper' is XLTW_None, the sleep
 duration now uses a progressive backoff (1ms, doubling up to 1s) to reduce
 CPU usage. Other XactLockTableWait calls and ConditionalXactLockTableWait
 continue to use a fixed 1ms sleep.

---
 src/backend/storage/lmgr/lmgr.c               | 31 +++++++++++++++++--
 .../utils/activity/wait_event_names.txt       |  1 +
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c
index 3f6bf70bd3c..be889ec2ad8 100644
--- a/src/backend/storage/lmgr/lmgr.c
+++ b/src/backend/storage/lmgr/lmgr.c
@@ -645,6 +645,9 @@ XactLockTableDelete(TransactionId xid)
 	LockRelease(&tag, ExclusiveLock, false);
 }
 
+#define XACT_LOCK_TABLE_INITIAL_WAIT_US  1000L
+static int XactLockTableWait_us = XACT_LOCK_TABLE_INITIAL_WAIT_US;
+
 /*
  *		XactLockTableWait
  *
@@ -719,8 +722,25 @@ XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid,
 		if (!first)
 		{
 			CHECK_FOR_INTERRUPTS();
-			pg_usleep(1000L);
+			pgstat_report_wait_start(WAIT_EVENT_XACT_DONE);
+			pg_usleep(XactLockTableWait_us);
+			pgstat_report_wait_end();
+
+			/*
+			 * For logical replication use cases (signaled by oper ==
+			 * XLTW_None), progressively increase the sleep times to avoid
+			 * busy-waiting, but not to more than 1s, since pg_usleep isn't
+			 * interruptible on some platforms. Other operations use a fixed
+			 * small delay.
+			 */
+			if (oper == XLTW_None)
+			{
+				XactLockTableWait_us *= 2;
+				if (XactLockTableWait_us > 1000000)
+					XactLockTableWait_us = 1000000;
+			}
 		}
+
 		first = false;
 		xid = SubTransGetTopmostTransaction(xid);
 	}
@@ -762,7 +782,14 @@ ConditionalXactLockTableWait(TransactionId xid, bool logLockFailure)
 		if (!first)
 		{
 			CHECK_FOR_INTERRUPTS();
-			pg_usleep(1000L);
+			/*
+			 * This function uses a fixed short sleep. It's generally not the
+			 * codepath for long waits in logical replication conflicts on a
+			 * standby, where XactLockTableWait with progressive backoff is used.
+			 */
+			pgstat_report_wait_start(WAIT_EVENT_XACT_DONE);
+			pg_usleep(1000);
+			pgstat_report_wait_end();
 		}
 		first = false;
 		xid = SubTransGetTopmostTransaction(xid);
diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt
index 4da68312b5f..3da08b853c3 100644
--- a/src/backend/utils/activity/wait_event_names.txt
+++ b/src/backend/utils/activity/wait_event_names.txt
@@ -161,6 +161,7 @@ WAL_RECEIVER_EXIT	"Waiting for the WAL receiver to exit."
 WAL_RECEIVER_WAIT_START	"Waiting for startup process to send initial data for streaming replication."
 WAL_SUMMARY_READY	"Waiting for a new WAL summary to be generated."
 XACT_GROUP_UPDATE	"Waiting for the group leader to update transaction status at transaction end."
+XACT_DONE	"Waiting for a transaction to commit or abort."
 
 ABI_compatibility:
 
-- 
2.48.1

