From 73728acc384d51497bc73a0376a88ca5205c906d Mon Sep 17 00:00:00 2001
From: Melih Mutlu <m.melihmutlu@gmail.com>
Date: Wed, 26 Apr 2023 18:21:32 +0300
Subject: [PATCH] Add timeout to flush stats during startup's main replay loop

---
 src/backend/access/transam/xlogrecovery.c | 18 +++++++++
 src/backend/postmaster/startup.c          | 46 +++++++++++++++++++++++
 src/include/postmaster/startup.h          |  4 ++
 src/include/utils/timeout.h               |  1 +
 4 files changed, 69 insertions(+)

diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index 188f6d6f85..eac5ee3e5c 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -63,6 +63,7 @@
 #include "utils/pg_lsn.h"
 #include "utils/ps_status.h"
 #include "utils/pg_rusage.h"
+#include "utils/timeout.h"
 
 /* Unsupported old recovery command file names (relative to $PGDATA) */
 #define RECOVERY_COMMAND_FILE	"recovery.conf"
@@ -1675,6 +1676,9 @@ PerformWalRecovery(void)
 				ereport_startup_progress("redo in progress, elapsed time: %ld.%02d s, current LSN: %X/%X",
 										 LSN_FORMAT_ARGS(xlogreader->ReadRecPtr));
 
+			/* Is this the right place to enable this? */
+			enable_startup_stat_flush_timeout();
+
 #ifdef WAL_DEBUG
 			if (XLOG_DEBUG ||
 				(record->xl_rmid == RM_XACT_ID && trace_recovery_messages <= DEBUG2) ||
@@ -3617,6 +3621,13 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 						/* Do background tasks that might benefit us later. */
 						KnownAssignedTransactionIdsIdleMaintenance();
 
+						/* 
+						 * Need to disable flush timeout to avoid unnecessary
+						 * wakeups. Enable it again after a WAL record is read
+						 * in PerformWalRecovery.
+						 */
+						disable_startup_stat_flush_timeout();
+
 						(void) WaitLatch(&XLogRecoveryCtl->recoveryWakeupLatch,
 										 WL_LATCH_SET | WL_TIMEOUT |
 										 WL_EXIT_ON_PM_DEATH,
@@ -3889,6 +3900,13 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					/* Update pg_stat_recovery_prefetch before sleeping. */
 					XLogPrefetcherComputeStats(xlogprefetcher);
 
+					/* 
+					 * Need to disable flush timeout to avoid unnecessary
+					 * wakeups. Enable it again after a WAL record is read
+					 * in PerformWalRecovery.
+					 */
+					disable_startup_stat_flush_timeout();
+
 					/*
 					 * Wait for more WAL to arrive, when we will be woken
 					 * immediately by the WAL receiver.
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index efc2580536..b250fa95f9 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -72,6 +72,11 @@ static TimestampTz startup_progress_phase_start_time;
  */
 static volatile sig_atomic_t startup_progress_timer_expired = false;
 
+/* Indicates whether flushing stats is needed. */
+static volatile sig_atomic_t startup_stat_need_flush = false;
+
+int			pgstat_stat_flush_timeout = 1000;	/* 1 sec ?? */
+
 /*
  * Time between progress updates for long-running startup operations.
  */
@@ -206,6 +211,13 @@ HandleStartupProcInterrupts(void)
 	/* Perform logging of memory contexts of this process */
 	if (LogMemoryContextPending)
 		ProcessLogMemoryContextInterrupt();
+
+	if (startup_stat_need_flush)
+	{
+		/* It's time to flush wal stats. */
+		pgstat_report_wal(true);
+		startup_stat_need_flush = false;
+	}
 }
 
 
@@ -256,6 +268,10 @@ StartupProcessMain(void)
 	RegisterTimeout(STANDBY_TIMEOUT, StandbyTimeoutHandler);
 	RegisterTimeout(STANDBY_LOCK_TIMEOUT, StandbyLockTimeoutHandler);
 
+	/* Register the timeout to flush stats periodically. */
+	RegisterTimeout(STARTUP_STAT_FLUSH_TIMEOUT,
+					startup_stat_flush_timeout_handler);
+
 	/*
 	 * Unblock signals (they were blocked when the postmaster forked us)
 	 */
@@ -385,3 +401,33 @@ has_startup_progress_timeout_expired(long *secs, int *usecs)
 
 	return true;
 }
+
+/* Set a flag indicating that it's time to flush. */
+void
+startup_stat_flush_timeout_handler(void)
+{
+	startup_stat_need_flush = true;
+}
+
+/* Disable the timeout set for startup stat flush. */
+void
+disable_startup_stat_flush_timeout(void)
+{
+	/* one last flush might be needed before disabling. */
+	startup_stat_need_flush = true;
+
+	disable_timeout(STARTUP_STAT_FLUSH_TIMEOUT, false);
+}
+
+/* Enable the timeout set for startup stat flush. */
+void
+enable_startup_stat_flush_timeout(void)
+{
+	TimestampTz fin_time;
+
+	startup_progress_phase_start_time = GetCurrentTimestamp();
+	fin_time = TimestampTzPlusMilliseconds(startup_progress_phase_start_time,
+										   pgstat_stat_flush_timeout);
+	enable_timeout_every(STARTUP_STAT_FLUSH_TIMEOUT, fin_time,
+						 pgstat_stat_flush_timeout);
+}
\ No newline at end of file
diff --git a/src/include/postmaster/startup.h b/src/include/postmaster/startup.h
index 6a2e4c4526..dbc79560b5 100644
--- a/src/include/postmaster/startup.h
+++ b/src/include/postmaster/startup.h
@@ -38,4 +38,8 @@ extern void begin_startup_progress_phase(void);
 extern void startup_progress_timeout_handler(void);
 extern bool has_startup_progress_timeout_expired(long *secs, int *usecs);
 
+extern void enable_startup_stat_flush_timeout(void);
+extern void disable_startup_stat_flush_timeout(void);
+extern void startup_stat_flush_timeout_handler(void);
+
 #endif							/* _STARTUP_H */
diff --git a/src/include/utils/timeout.h b/src/include/utils/timeout.h
index e561a1cde9..a8d360e255 100644
--- a/src/include/utils/timeout.h
+++ b/src/include/utils/timeout.h
@@ -35,6 +35,7 @@ typedef enum TimeoutId
 	IDLE_STATS_UPDATE_TIMEOUT,
 	CLIENT_CONNECTION_CHECK_TIMEOUT,
 	STARTUP_PROGRESS_TIMEOUT,
+	STARTUP_STAT_FLUSH_TIMEOUT,
 	/* First user-definable timeout reason */
 	USER_TIMEOUT,
 	/* Maximum number of timeout reasons */
-- 
2.25.1

