From 82a796890029e7dadb72eff5336cd0074e1edc9e Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.mshk@gmail.com>
Date: Wed, 14 Oct 2020 14:23:13 +0900
Subject: [PATCH v7] Log the standby recovery conflict waits.

---
 doc/src/sgml/config.sgml                      |  22 +++
 src/backend/storage/buffer/bufmgr.c           |  13 ++
 src/backend/storage/ipc/standby.c             | 150 +++++++++++++++---
 src/backend/storage/lmgr/proc.c               |  26 +++
 src/backend/utils/misc/guc.c                  |  10 +-
 src/backend/utils/misc/postgresql.conf.sample |   2 +
 src/include/storage/procsignal.h              |   1 +
 src/include/storage/standby.h                 |   2 +
 8 files changed, 205 insertions(+), 21 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index ee914740cc..a4a43dff83 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -6839,6 +6839,28 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-log-recovery-conflict-waits" xreflabel="log_recovery_conflict_waits">
+      <term><varname>log_recovery_conflict_waits</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>log_recovery_conflict_waits</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Controls whether a log message is produced when the startup process
+        is waiting for recovery conflicts longer than
+        <varname>deadlock_timeout</varname>.  This is useful in determining if
+        recovery conflicts prevents the recovery from applying WAL.
+       </para>
+
+       <para>
+        This parameter can only be set in the <filename>postgresql.conf</filename>
+        file or on the server command line.
+        The default is <literal>off</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-log-parameter-max-length" xreflabel="log_parameter_max_length">
       <term><varname>log_parameter_max_length</varname> (<type>integer</type>)
       <indexterm>
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index e549fa1d30..648ced33eb 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -3808,6 +3808,8 @@ LockBufferForCleanup(Buffer buffer)
 {
 	BufferDesc *bufHdr;
 	char	   *new_status = NULL;
+	TimestampTz waitStart = 0;
+	bool		logged_recovery_conflict = false;
 
 	Assert(BufferIsPinned(buffer));
 	Assert(PinCountWaitBuf == NULL);
@@ -3829,6 +3831,9 @@ LockBufferForCleanup(Buffer buffer)
 
 	bufHdr = GetBufferDescriptor(buffer - 1);
 
+	if (log_recovery_conflict_waits)
+		waitStart = GetCurrentTimestamp();
+
 	for (;;)
 	{
 		uint32		buf_state;
@@ -3881,6 +3886,14 @@ LockBufferForCleanup(Buffer buffer)
 				new_status[len] = '\0'; /* truncate off " waiting" */
 			}
 
+			if (log_recovery_conflict_waits && !logged_recovery_conflict &&
+				TimestampDifferenceExceeds(waitStart, GetCurrentTimestamp(),
+										   DeadlockTimeout))
+			{
+				LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, NULL);
+				logged_recovery_conflict = true;
+			}
+
 			/* Publish the bufid that Startup process waits on */
 			SetStartupBufferPinWaitBufId(buffer - 1);
 			/* Set alarm and then wait to be signaled by UnpinBuffer() */
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index 52b2809dac..5820713566 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -39,6 +39,7 @@
 int			vacuum_defer_cleanup_age;
 int			max_standby_archive_delay = 30 * 1000;
 int			max_standby_streaming_delay = 30 * 1000;
+bool		log_recovery_conflict_waits = false;
 
 static HTAB *RecoveryLockLists;
 
@@ -49,6 +50,7 @@ static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlis
 static void SendRecoveryConflictWithBufferPin(ProcSignalReason reason);
 static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts);
 static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks);
+static const char *get_recovery_conflict_desc(ProcSignalReason reason);
 
 /*
  * Keep track of all the locks owned by a given transaction.
@@ -215,15 +217,60 @@ WaitExceedsMaxStandbyDelay(uint32 wait_event_info)
 	return false;
 }
 
+/*
+ * Log the recovery conflict.
+ *
+ * This function also reports the details about the conflicting process ids
+ * if *waitlist is not NULL.
+ */
+void
+LogRecoveryConflict(ProcSignalReason reason, VirtualTransactionId *waitlist)
+{
+	if (waitlist)
+	{
+		VirtualTransactionId *vxids;
+		StringInfoData buf;
+		int	nprocs = 0;
+
+		initStringInfo(&buf);
+
+		/* construct a string of list of the conflicted virtual transactions */
+		vxids = waitlist;
+		while (VirtualTransactionIdIsValid(*vxids))
+		{
+			PGPROC *proc = BackendIdGetProc(vxids->backendId);
+
+			if (nprocs == 0)
+				appendStringInfo(&buf, "%d", proc->pid);
+			else
+				appendStringInfo(&buf, ", %d", proc->pid);
+
+			nprocs++;
+			vxids++;
+		}
+
+		ereport(LOG,
+				(errmsg_internal("%s", get_recovery_conflict_desc(reason)),
+				 (errdetail_log_plural("Conflicting process: %s.",
+									   "Conflicting processes: %s.",
+									   nprocs, buf.data))));
+		pfree(buf.data);
+	}
+	else
+		ereport(LOG,
+				(errmsg_internal("%s", get_recovery_conflict_desc(reason))));
+}
+
 /*
  * This is the main executioner for any query backend that conflicts with
  * recovery processing. Judgement has already been passed on it within
  * a specific rmgr. Here we just issue the orders to the procs. The procs
  * then throw the required error as instructed.
  *
- * If report_waiting is true, "waiting" is reported in PS display if necessary.
- * If the caller has already reported that, report_waiting should be false.
- * Otherwise, "waiting" is reported twice unexpectedly.
+ * If report_waiting is true, "waiting" is reported in PS display and log
+ * the waits for the recovery conflict if necessary.  If the caller has already
+ * reported that, report_waiting should be false.  Otherwise, both are reported
+ * twice unexpectedly.
  */
 static void
 ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
@@ -232,6 +279,7 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
 {
 	TimestampTz waitStart = 0;
 	char	   *new_status;
+	bool		logged = false;
 
 	/* Fast exit, to avoid a kernel call if there's no work to be done. */
 	if (!VirtualTransactionIdIsValid(*waitlist))
@@ -249,13 +297,25 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
 		/* wait until the virtual xid is gone */
 		while (!VirtualXactLock(*waitlist, false))
 		{
+			TimestampTz ts;
+
+			if (report_waiting)
+				ts = GetCurrentTimestamp();
+
+			/* Log the recovery conflict */
+			if (log_recovery_conflict_waits && !logged && report_waiting &&
+				TimestampDifferenceExceeds(waitStart, ts, DeadlockTimeout))
+			{
+				LogRecoveryConflict(reason, waitlist);
+				logged = true;
+			}
+
 			/*
 			 * Report via ps if we have been waiting for more than 500 msec
 			 * (should that be configurable?)
 			 */
 			if (update_process_title && new_status == NULL && report_waiting &&
-				TimestampDifferenceExceeds(waitStart, GetCurrentTimestamp(),
-										   500))
+				TimestampDifferenceExceeds(waitStart, ts, 500))
 			{
 				const char *old_status;
 				int			len;
@@ -408,7 +468,35 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag)
 
 	ltime = GetStandbyLimitTime();
 
-	if (GetCurrentTimestamp() >= ltime)
+	if (ltime == 0 || GetCurrentTimestamp() < ltime)
+	{
+		EnableTimeoutParams timeouts[2];
+		int cnt = 0;
+
+		/*
+		 * Wait (or wait again) until ltime
+		 */
+		if (ltime != 0)
+		{
+			timeouts[cnt].id = STANDBY_LOCK_TIMEOUT;
+			timeouts[cnt].type = TMPARAM_AT;
+			timeouts[cnt].fin_time = ltime;
+			cnt++;
+		}
+
+		/* Also, set deadlock timeout for logging purpose if necessary */
+		if (log_recovery_conflict_waits)
+		{
+			timeouts[cnt].id = STANDBY_TIMEOUT;
+			timeouts[cnt].type = TMPARAM_AFTER;
+			timeouts[cnt].delay_ms = DeadlockTimeout;
+			cnt++;
+		}
+
+		if (cnt > 0)
+			enable_timeouts(timeouts, cnt);
+	}
+	else
 	{
 		/*
 		 * We're already behind, so clear a path as quickly as possible.
@@ -419,26 +507,15 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag)
 
 		/*
 		 * Prevent ResolveRecoveryConflictWithVirtualXIDs() from reporting
-		 * "waiting" in PS display by disabling its argument report_waiting
-		 * because the caller, WaitOnLock(), has already reported that.
+		 * "waiting" in PS display and logging recovery conflict by disabling
+		 * its argument report_waiting because the callers have already reported
+		 * that.
 		 */
 		ResolveRecoveryConflictWithVirtualXIDs(backends,
 											   PROCSIG_RECOVERY_CONFLICT_LOCK,
 											   PG_WAIT_LOCK | locktag.locktag_type,
 											   false);
 	}
-	else
-	{
-		/*
-		 * Wait (or wait again) until ltime
-		 */
-		EnableTimeoutParams timeouts[1];
-
-		timeouts[0].id = STANDBY_LOCK_TIMEOUT;
-		timeouts[0].type = TMPARAM_AT;
-		timeouts[0].fin_time = ltime;
-		enable_timeouts(timeouts, 1);
-	}
 
 	/* Wait to be signaled by the release of the Relation Lock */
 	ProcWaitForSignal(PG_WAIT_LOCK | locktag.locktag_type);
@@ -1116,3 +1193,36 @@ LogStandbyInvalidations(int nmsgs, SharedInvalidationMessage *msgs,
 					 nmsgs * sizeof(SharedInvalidationMessage));
 	XLogInsert(RM_STANDBY_ID, XLOG_INVALIDATIONS);
 }
+
+/* Return the description of recovery conflict */
+static const char *
+get_recovery_conflict_desc(ProcSignalReason reason)
+{
+	const char *reasonDesc = _("unknown reason");
+
+	switch (reason)
+	{
+		case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
+			reasonDesc = _("recovery is waiting recovery conflict on buffer pin");
+			break;
+		case PROCSIG_RECOVERY_CONFLICT_LOCK:
+			reasonDesc = _("recovery is waiting recovery conflict on lock");
+			break;
+		case PROCSIG_RECOVERY_CONFLICT_TABLESPACE:
+			reasonDesc = _("recovery is waiting recovery conflict on tablespace");
+			break;
+		case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
+			reasonDesc = _("recovery is waiting recovery conflict on snapshot");
+			break;
+		case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+			reasonDesc = _("recovery is waiting recovery conflict on buffer deadlock");
+			break;
+		case PROCSIG_RECOVERY_CONFLICT_DATABASE:
+			reasonDesc = _("recovery is waiting recovery conflict on database");
+			break;
+		default:
+			break;
+	}
+
+	return reasonDesc;
+}
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 88566bd9fa..01fb0e8462 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -1068,7 +1068,9 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 	ProcWaitStatus myWaitStatus;
 	PGPROC	   *proc;
 	PGPROC	   *leader = MyProc->lockGroupLeader;
+	TimestampTz standbyWaitStart;
 	int			i;
+	bool		logged_recovery_conflict = false;
 
 	/*
 	 * If group locking is in use, locks held by members of my locking group
@@ -1260,6 +1262,8 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 		else
 			enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout);
 	}
+	else
+		standbyWaitStart = GetCurrentTimestamp();
 
 	/*
 	 * If somebody wakes us between LWLockRelease and WaitLatch, the latch
@@ -1281,6 +1285,28 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 		{
 			/* Set a timer and wait for that or for the Lock to be granted */
 			ResolveRecoveryConflictWithLock(locallock->tag.lock);
+
+			if (log_recovery_conflict_waits && !logged_recovery_conflict &&
+				TimestampDifferenceExceeds(standbyWaitStart, GetCurrentTimestamp(),
+										   DeadlockTimeout))
+			{
+				VirtualTransactionId *vxids;
+				int	cnt;
+
+				vxids =	GetLockConflicts(&locallock->tag.lock,
+										 AccessExclusiveLock, &cnt);
+
+				/*
+				 * Log the recovery conflict if there are still virtual transactions
+				 * conflicting on the lock.
+				 */
+				if (cnt > 0)
+				{
+					LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK,
+										vxids);
+					logged_recovery_conflict = true;
+				}
+			}
 		}
 		else
 		{
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index a62d64eaa4..9d575fac10 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1570,7 +1570,15 @@ static struct config_bool ConfigureNamesBool[] =
 		false,
 		NULL, NULL, NULL
 	},
-
+	{
+		{"log_recovery_conflict_waits", PGC_SIGHUP, LOGGING_WHAT,
+			gettext_noop("Logs standby recovery conflict waits."),
+			NULL
+		},
+		&log_recovery_conflict_waits,
+		false,
+		NULL, NULL, NULL
+	},
 	{
 		{"log_hostname", PGC_SIGHUP, LOGGING_WHAT,
 			gettext_noop("Logs the host name in the connection logs."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 9cb571f7cc..5625681e8a 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -553,6 +553,8 @@
 					#   %% = '%'
 					# e.g. '<%u%%%d> '
 #log_lock_waits = off			# log lock waits >= deadlock_timeout
+#log_recovery_conflict_waits = off	# log standby recovery conflict waits
+					# >= deadlock_timeout
 #log_parameter_max_length = -1		# when logging statements, limit logged
 					# bind-parameter values to N bytes;
 					# -1 means print in full, 0 disables
diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h
index 5cb39697f3..7cd988263d 100644
--- a/src/include/storage/procsignal.h
+++ b/src/include/storage/procsignal.h
@@ -71,5 +71,6 @@ extern void WaitForProcSignalBarrier(uint64 generation);
 extern void ProcessProcSignalBarrier(void);
 
 extern void procsignal_sigusr1_handler(SIGNAL_ARGS);
+extern const char *get_procsignal_reason_desc(ProcSignalReason reason);
 
 #endif							/* PROCSIGNAL_H */
diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h
index faaf1d3817..642ce5201a 100644
--- a/src/include/storage/standby.h
+++ b/src/include/storage/standby.h
@@ -23,6 +23,7 @@
 extern int	vacuum_defer_cleanup_age;
 extern int	max_standby_archive_delay;
 extern int	max_standby_streaming_delay;
+extern bool	log_recovery_conflict_waits;
 
 extern void InitRecoveryTransactionEnvironment(void);
 extern void ShutdownRecoveryTransactionEnvironment(void);
@@ -38,6 +39,7 @@ extern void CheckRecoveryConflictDeadlock(void);
 extern void StandbyDeadLockHandler(void);
 extern void StandbyTimeoutHandler(void);
 extern void StandbyLockTimeoutHandler(void);
+extern void LogRecoveryConflict(ProcSignalReason reason, VirtualTransactionId *waitlist);
 
 /*
  * Standby Rmgr (RM_STANDBY_ID)
-- 
2.27.0

