From f08be9051c68fbf4fadf4999dd771828c0cf0ab3 Mon Sep 17 00:00:00 2001
From: Nitin <nitin.jadhav@enterprisedb.com>
Date: Tue, 10 Aug 2021 18:06:57 +0530
Subject: [PATCH] Shows the progress of the operations performed during startup
 process. The interval between each progress update is configurable and it
 should be mentioned in seconds

---
 src/backend/access/transam/xlog.c             |  5 ++
 src/backend/postmaster/startup.c              | 94 +++++++++++++++++++++++++++
 src/backend/storage/file/fd.c                 | 11 ++++
 src/backend/storage/file/reinit.c             | 10 +++
 src/backend/utils/init/postinit.c             |  2 +
 src/backend/utils/misc/guc.c                  | 17 +++++
 src/backend/utils/misc/postgresql.conf.sample |  6 ++
 src/include/postmaster/startup.h              | 18 +++++
 src/include/utils/guc.h                       |  2 +
 src/include/utils/timeout.h                   |  1 +
 10 files changed, 166 insertions(+)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 8b39a2f..988302e 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -7354,6 +7354,8 @@ StartupXLOG(void)
 					(errmsg("redo starts at %X/%X",
 							LSN_FORMAT_ARGS(ReadRecPtr))));
 
+			init_startup_progress();
+
 			/*
 			 * main redo apply loop
 			 */
@@ -7361,6 +7363,9 @@ StartupXLOG(void)
 			{
 				bool		switchedTLI = false;
 
+				ereport_startup_progress("redo in progress, elapsed time: %ld.%02d s, current LSN: %X/%X",
+										 LSN_FORMAT_ARGS(ReadRecPtr));
+
 #ifdef WAL_DEBUG
 				if (XLOG_DEBUG ||
 					(rmid == RM_XACT_ID && trace_recovery_messages <= DEBUG2) ||
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index 0f4f00d..4ced040 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -59,6 +59,22 @@ static volatile sig_atomic_t promote_signaled = false;
  */
 static volatile sig_atomic_t in_restore_command = false;
 
+/*
+ * Start timestamp of the operation that occur during startup process.
+ */
+static TimestampTz startup_process_op_start_time;
+
+/*
+ * Indicates the timestamp at which the timer was supposed to expire.
+ */
+static TimestampTz scheduled_startup_progress_timeout;
+
+/*
+ * Indicates whether the startup progress interval mentioned by the user is
+ * elapsed or not. TRUE if timeout occurred, FALSE otherwise.
+ */
+static bool startup_progress_timer_expired;
+
 /* Signal handlers */
 static void StartupProcTriggerHandler(SIGNAL_ARGS);
 static void StartupProcSigHupHandler(SIGNAL_ARGS);
@@ -233,6 +249,7 @@ StartupProcessMain(void)
 	RegisterTimeout(STANDBY_DEADLOCK_TIMEOUT, StandbyDeadLockHandler);
 	RegisterTimeout(STANDBY_TIMEOUT, StandbyTimeoutHandler);
 	RegisterTimeout(STANDBY_LOCK_TIMEOUT, StandbyLockTimeoutHandler);
+	RegisterTimeout(STARTUP_PROGRESS_TIMEOUT, startup_progress_timeout_handler);
 
 	/*
 	 * Unblock signals (they were blocked when the postmaster forked us)
@@ -282,3 +299,80 @@ ResetPromoteSignaled(void)
 {
 	promote_signaled = false;
 }
+
+/*
+ * Schedule a wakeup call for logging the progress of startup process.
+ */
+void
+startup_progress_timeout_handler(void)
+{
+	startup_progress_timer_expired = true;
+}
+
+/*
+ * Sets the start timestamp of the current operation and also enables the
+ * timeout for logging the progress of startup process.
+ */
+void
+init_startup_progress(void)
+{
+	scheduled_startup_progress_timeout = 0;
+	startup_process_op_start_time = GetCurrentTimestamp();
+	reset_startup_progress_timeout(startup_process_op_start_time);
+}
+
+/*
+ * Decides whether to log the startup progress or not based on whether the
+ * timer is expired or not. It returns FALSE if the timer is not expired,
+ * otherwise it calculates the elapsed time and sets the respective out
+ * parameters secs and usecs. Enables the timer for the next log message and
+ * returns TRUE.
+ */
+bool
+startup_progress_timeout_has_expired(long *secs, int *usecs)
+{
+	long		seconds;
+	int			useconds;
+	TimestampTz now;
+
+	/* If the timeout has not occurred, then no need to log the details. */
+	if (!startup_progress_timer_expired)
+		return false;
+
+	/* Calculate the elapsed time. */
+	now = GetCurrentTimestamp();
+	TimestampDifference(startup_process_op_start_time, now, &seconds, &useconds);
+
+	*secs = seconds;
+	*usecs = useconds;
+
+	reset_startup_progress_timeout(now);
+	return true;
+}
+
+/*
+ * Calculates the timestamp at which the next timer should expire and enables
+ * the timer accordingly.
+ */
+void
+reset_startup_progress_timeout(TimestampTz now)
+{
+	TimestampTz next_timeout;
+	int         interval_in_ms;
+
+	interval_in_ms = log_startup_progress_interval * 1000;
+	next_timeout = TimestampTzPlusMilliseconds(scheduled_startup_progress_timeout,
+											   interval_in_ms);
+	if (next_timeout < now)
+	{
+		/*
+		 * Either the timeout was processed so late that we missed an
+		 * entire cycle or system clock was set backwards.
+		 */
+		next_timeout = TimestampTzPlusMilliseconds(now, interval_in_ms);
+	}
+
+	enable_timeout_at(STARTUP_PROGRESS_TIMEOUT, next_timeout);
+	scheduled_startup_progress_timeout = next_timeout;
+	startup_progress_timer_expired = false;
+}
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 5d5e8ae..9128d22 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -96,6 +96,7 @@
 #include "pgstat.h"
 #include "port/pg_iovec.h"
 #include "portability/mem.h"
+#include "postmaster/startup.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
 #include "utils/guc.h"
@@ -3334,6 +3335,9 @@ do_syncfs(const char *path)
 {
 	int			fd;
 
+	ereport_startup_progress("syncing data directory (syncfs), elapsed time: %ld.%02d s, current path: %s",
+							 path);
+
 	fd = OpenTransientFile(path, O_RDONLY);
 	if (fd < 0)
 	{
@@ -3410,6 +3414,8 @@ SyncDataDirectory(void)
 		DIR		   *dir;
 		struct dirent *de;
 
+		init_startup_progress();
+
 		/*
 		 * On Linux, we don't have to open every single file one by one.  We
 		 * can use syncfs() to sync whole filesystems.  We only expect
@@ -3452,6 +3458,8 @@ SyncDataDirectory(void)
 	walkdir("pg_tblspc", pre_sync_fname, true, DEBUG1);
 #endif
 
+	init_startup_progress();
+
 	/*
 	 * Now we do the fsync()s in the same order.
 	 *
@@ -3583,6 +3591,9 @@ pre_sync_fname(const char *fname, bool isdir, int elevel)
 static void
 datadir_fsync_fname(const char *fname, bool isdir, int elevel)
 {
+	ereport_startup_progress("syncing data directory (fsync), elapsed time: %ld.%02d s, current path: %s",
+							 fname);
+
 	/*
 	 * We want to silently ignoring errors about unreadable files.  Pass that
 	 * desire on to fsync_fname_ext().
diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c
index 40c758d..41bae8a 100644
--- a/src/backend/storage/file/reinit.c
+++ b/src/backend/storage/file/reinit.c
@@ -17,6 +17,7 @@
 #include <unistd.h>
 
 #include "common/relpath.h"
+#include "postmaster/startup.h"
 #include "storage/copydir.h"
 #include "storage/fd.h"
 #include "storage/reinit.h"
@@ -64,6 +65,7 @@ ResetUnloggedRelations(int op)
 								   "ResetUnloggedRelations",
 								   ALLOCSET_DEFAULT_SIZES);
 	oldctx = MemoryContextSwitchTo(tmpctx);
+	init_startup_progress();
 
 	/*
 	 * First process unlogged files in pg_default ($PGDATA/base)
@@ -136,6 +138,14 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
 
 		snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s",
 				 tsdirname, de->d_name);
+
+		if (op & UNLOGGED_RELATION_INIT)
+			ereport_startup_progress("resetting unlogged relations (init), elapsed time: %ld.%02d s, current path: %s",
+									 dbspace_path);
+		else if (op & UNLOGGED_RELATION_CLEANUP)
+			ereport_startup_progress("resetting unlogged relations (cleanup), elapsed time: %ld.%02d s, current path: %s",
+									 dbspace_path);
+
 		ResetUnloggedRelationsInDbspaceDir(dbspace_path, op);
 	}
 
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 51d1bbe..1676bc1 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -40,6 +40,7 @@
 #include "pgstat.h"
 #include "postmaster/autovacuum.h"
 #include "postmaster/postmaster.h"
+#include "postmaster/startup.h"
 #include "replication/walsender.h"
 #include "storage/bufmgr.h"
 #include "storage/fd.h"
@@ -622,6 +623,7 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username,
 						IdleInTransactionSessionTimeoutHandler);
 		RegisterTimeout(IDLE_SESSION_TIMEOUT, IdleSessionTimeoutHandler);
 		RegisterTimeout(CLIENT_CONNECTION_CHECK_TIMEOUT, ClientCheckTimeoutHandler);
+		RegisterTimeout(STARTUP_PROGRESS_TIMEOUT, startup_progress_timeout_handler);
 	}
 
 	/*
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index a2e0f8d..31894aa 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -679,6 +679,11 @@ static char *recovery_target_lsn_string;
 /* should be static, but commands/variable.c needs to get at this */
 char	   *role_string;
 
+/*
+ * The interval between each progress update of the operations that occur
+ * during startup process.
+ */
+int log_startup_progress_interval;
 
 /*
  * Displayable names for context types (enum GucContext)
@@ -3546,6 +3551,18 @@ static struct config_int ConfigureNamesInt[] =
 		check_client_connection_check_interval, NULL, NULL
 	},
 
+	{
+		{"log_startup_progress_interval", PGC_SIGHUP, LOGGING_WHEN,
+			gettext_noop("Sets the time interval between each progress update "
+						 "of the startup process."),
+			gettext_noop("0 logs all messages. -1 turns this feature off."),
+			GUC_UNIT_S,
+		},
+		&log_startup_progress_interval,
+		-1, -1, INT_MAX,
+		NULL, NULL, NULL
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index b242a7f..901e4ba 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -523,6 +523,12 @@
 					# are logged regardless of their duration; 1.0 logs all
 					# statements from all transactions, 0.0 never logs
 
+log_startup_progress_interval = 10	# Sets the time interval between each
+									# progress update of the startup process.
+									# -1 disables the feature, 0 logs all
+									# messages, > 0 indicates the interval in
+									# seconds.
+
 # - What to Log -
 
 #debug_print_parse = off
diff --git a/src/include/postmaster/startup.h b/src/include/postmaster/startup.h
index bf6adf1..cbeb42e 100644
--- a/src/include/postmaster/startup.h
+++ b/src/include/postmaster/startup.h
@@ -12,6 +12,19 @@
 #ifndef _STARTUP_H
 #define _STARTUP_H
 
+#include "datatype/timestamp.h"
+
+/*
+ * Logs the startup progress message if the timer has expired.
+ */
+#define ereport_startup_progress(msg, ...) \
+	do { \
+		long    secs; \
+		int     usecs; \
+		if (startup_progress_timeout_has_expired(&secs, &usecs)) \
+			ereport(LOG, errmsg(msg, secs, (usecs / 10000),  ##__VA_ARGS__ )); \
+	} while(0)
+
 extern void HandleStartupProcInterrupts(void);
 extern void StartupProcessMain(void) pg_attribute_noreturn();
 extern void PreRestoreCommand(void);
@@ -19,4 +32,9 @@ extern void PostRestoreCommand(void);
 extern bool IsPromoteSignaled(void);
 extern void ResetPromoteSignaled(void);
 
+extern void init_startup_progress(void);
+extern void startup_progress_timeout_handler(void);
+bool startup_progress_timeout_has_expired(long *secs, int *usecs);
+void reset_startup_progress_timeout(TimestampTz now);
+
 #endif							/* _STARTUP_H */
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index a7c3a49..7c12632 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -282,6 +282,8 @@ extern int	tcp_user_timeout;
 extern bool trace_sort;
 #endif
 
+extern int  log_startup_progress_interval;
+
 /*
  * Functions exported by guc.c
  */
diff --git a/src/include/utils/timeout.h b/src/include/utils/timeout.h
index 93e6a69..29ea7f6 100644
--- a/src/include/utils/timeout.h
+++ b/src/include/utils/timeout.h
@@ -33,6 +33,7 @@ typedef enum TimeoutId
 	IDLE_IN_TRANSACTION_SESSION_TIMEOUT,
 	IDLE_SESSION_TIMEOUT,
 	CLIENT_CONNECTION_CHECK_TIMEOUT,
+	STARTUP_PROGRESS_TIMEOUT,
 	/* First user-definable timeout reason */
 	USER_TIMEOUT,
 	/* Maximum number of timeout reasons */
-- 
1.8.3.1

