From 3d4c40ad2440826ccd4fd6b3b499185ecf61fbb5 Mon Sep 17 00:00:00 2001
From: Nitin <nitin.jadhav@enterprisedb.com>
Date: Mon, 21 Jun 2021 12:02:57 +0530
Subject: [PATCH] Shows the progress of the operations performed during startup
 process. The interval between each progress update is configurable and it can
 be mentioned in milliseconds.

---
 src/backend/access/transam/xlog.c             | 161 ++++++++++++++++++++++++++
 src/backend/postmaster/startup.c              |   1 +
 src/backend/storage/file/fd.c                 |  12 ++
 src/backend/storage/file/reinit.c             |   6 +
 src/backend/utils/misc/guc.c                  |  18 +++
 src/backend/utils/misc/postgresql.conf.sample |   6 +
 src/include/access/xlog.h                     |  23 ++++
 src/include/utils/guc.h                       |   2 +
 src/include/utils/timeout.h                   |   1 +
 9 files changed, 230 insertions(+)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 1b3a3d9..6984387 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -79,6 +79,7 @@
 #include "utils/relmapper.h"
 #include "utils/pg_rusage.h"
 #include "utils/snapmgr.h"
+#include "utils/timeout.h"
 #include "utils/timestamp.h"
 
 extern uint32 bootstrap_data_checksum_version;
@@ -397,6 +398,17 @@ static bool doRequestWalReceiverReply;
  */
 static XLogRecPtr RedoStartLSN = InvalidXLogRecPtr;
 
+/*
+ * Start timestamp of the operation that occur during startup process.
+ */
+static TimestampTz operationStartTimestamp;
+
+/*
+ * Indicates whether the startup progress interval mentioned by the user is
+ * elapsed or not. TRUE if timeout occurred. FALSE otherwise.
+ */
+static bool logStartupProgressTimeout;
+
 /*----------
  * Shared-memory data structures for XLOG control
  *
@@ -7321,6 +7333,8 @@ StartupXLOG(void)
 					(errmsg("redo starts at %X/%X",
 							LSN_FORMAT_ARGS(ReadRecPtr))));
 
+			InitStartupProgress();
+
 			/*
 			 * main redo apply loop
 			 */
@@ -7328,6 +7342,8 @@ StartupXLOG(void)
 			{
 				bool		switchedTLI = false;
 
+				LogStartupProgress(RECOVERY_IN_PROGRESS, NULL);
+
 #ifdef WAL_DEBUG
 				if (XLOG_DEBUG ||
 					(rmid == RM_XACT_ID && trace_recovery_messages <= DEBUG2) ||
@@ -7539,6 +7555,8 @@ StartupXLOG(void)
 			 * end of main redo apply loop
 			 */
 
+			CloseStartupProgress(RECOVERY_END);
+
 			if (reachedRecoveryTarget)
 			{
 				if (!reachedConsistency)
@@ -12935,3 +12953,146 @@ XLogRequestWalReceiverReply(void)
 {
 	doRequestWalReceiverReply = true;
 }
+
+/*
+ * Schedule a wakeup call for logging the progress of startup process.
+ */
+void
+LogStartupProgressTimeoutHandler(void)
+{
+	logStartupProgressTimeout = true;
+}
+
+/*
+ * Sets the start timestamp of the current operation and also enables the
+ * timeout for logging the progress of startup process.
+ */
+void
+InitStartupProgress(void)
+{
+	operationStartTimestamp = GetCurrentTimestamp();
+	enable_timeout_after(LOG_STARTUP_PROGRESS_TIMEOUT,
+						 log_startup_progress_interval);
+}
+
+/*
+ * Fetches the start timestamp of the current operation.
+ */
+TimestampTz
+GetCurrentOperationStartTimestamp(void)
+{
+	return operationStartTimestamp;
+}
+
+/*
+ * Returns a string which indicates the description of the operation performed
+ * during startup process.
+ */
+static char *
+get_startup_process_operation_string(StartupProcessOp operation)
+{
+	switch (operation)
+	{
+		case SYNCFS_IN_PROGRESS:
+			return gettext_noop("Syncing data directory (syncfs)");
+		case FSYNC_IN_PROGRESS:
+			return gettext_noop("Syncing data directory (fsync)");
+		case RECOVERY_IN_PROGRESS:
+			return gettext_noop("Performing crash recovery");
+		case RESET_UNLOGGED_REL_IN_PROGRESS:
+			return gettext_noop("Resetting unlogged relations");
+		case SYNCFS_END:
+			return gettext_noop("Data directory sync (syncfs) complete");
+		case FSYNC_END:
+			return gettext_noop("Data directory sync (fsync) complete");
+		case RECOVERY_END:
+			return gettext_noop("Crash recovery complete");
+		case RESET_UNLOGGED_REL_END:
+			return gettext_noop("Unlogged relations reset");
+		default:
+			ereport(ERROR,
+					(errmsg("unrecognized operation (%d) in startup progress update",
+							operation)));
+	}
+}
+
+/*
+ * Logs the progress of the operations performed during startup process.
+ */
+void
+LogStartupProgress(StartupProcessOp operation, const char *path)
+{
+	long			secs;
+	int				usecs;
+	int				msecs;
+
+	/* If the feature is disabled, then no need to proceed further. */
+	if (log_startup_progress_interval < 0)
+		return;
+
+	/* Return if any invalid operation */
+	if (operation >= SYNCFS_END)
+		return;
+
+	/* If the timeout has not occurred, then no need to log the details. */
+	if (!logStartupProgressTimeout)
+		return;
+
+	/* Timeout has occurred. Log the necessary details. */
+	TimestampDifference(GetCurrentOperationStartTimestamp(),
+						GetCurrentTimestamp(),
+						&secs, &usecs);
+	msecs = usecs / 1000;
+
+	if (operation == RECOVERY_IN_PROGRESS)
+		ereport(LOG,
+				(errmsg("%s, elapsed time: %ld.%03d ms, current LSN: %X/%X",
+						get_startup_process_operation_string(operation),
+						secs * 1000 + msecs, usecs % 1000,
+						LSN_FORMAT_ARGS(ReadRecPtr))));
+	else
+		ereport(LOG,
+				(errmsg("%s, elapsed time: %ld.%03d ms, current path: %s",
+						get_startup_process_operation_string(operation),
+						secs * 1000 + msecs, usecs % 1000,
+						path)));
+
+	/*
+	 * Set the timeout flag to false and enable the timeout to log the progress if
+	 * any in the future.
+	 */
+	logStartupProgressTimeout = false;
+	enable_timeout_after(LOG_STARTUP_PROGRESS_TIMEOUT,
+						 log_startup_progress_interval);
+}
+
+/*
+ * Logs the completion of the operation performed during startup process and
+ * disables the timeout used for logging the progress.
+ */
+void CloseStartupProgress(StartupProcessOp operation)
+{
+	long        secs;
+	int         usecs;
+	int         msecs;
+
+	/* If the feature is disabled, then no need to proceed further. */
+	if (log_startup_progress_interval < 0)
+		return;
+
+	/* Return if any invalid operation */
+	if (operation < SYNCFS_END)
+		return;
+
+	TimestampDifference(GetCurrentOperationStartTimestamp(),
+						GetCurrentTimestamp(),
+						&secs, &usecs);
+	msecs = usecs / 1000;
+
+	ereport(LOG,
+			(errmsg("%s after %ld.%03d ms",
+					get_startup_process_operation_string(operation),
+					secs * 1000 + msecs, usecs % 1000)));
+
+	disable_timeout(LOG_STARTUP_PROGRESS_TIMEOUT, false);
+}
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index 69077bd..08c271c 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -232,6 +232,7 @@ StartupProcessMain(void)
 	RegisterTimeout(STANDBY_DEADLOCK_TIMEOUT, StandbyDeadLockHandler);
 	RegisterTimeout(STANDBY_TIMEOUT, StandbyTimeoutHandler);
 	RegisterTimeout(STANDBY_LOCK_TIMEOUT, StandbyLockTimeoutHandler);
+	RegisterTimeout(LOG_STARTUP_PROGRESS_TIMEOUT, LogStartupProgressTimeoutHandler);
 
 	/*
 	 * Unblock signals (they were blocked when the postmaster forked us)
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index e8cd7ef..e4d9a1e 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -100,6 +100,7 @@
 #include "storage/ipc.h"
 #include "utils/guc.h"
 #include "utils/resowner_private.h"
+#include "utils/timeout.h"
 
 /* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */
 #if defined(HAVE_SYNC_FILE_RANGE)
@@ -3290,6 +3291,8 @@ do_syncfs(const char *path)
 {
 	int			fd;
 
+	LogStartupProgress(SYNCFS_IN_PROGRESS, path);
+
 	fd = OpenTransientFile(path, O_RDONLY);
 	if (fd < 0)
 	{
@@ -3366,6 +3369,8 @@ SyncDataDirectory(void)
 		DIR		   *dir;
 		struct dirent *de;
 
+		InitStartupProgress();
+
 		/*
 		 * On Linux, we don't have to open every single file one by one.  We
 		 * can use syncfs() to sync whole filesystems.  We only expect
@@ -3392,10 +3397,14 @@ SyncDataDirectory(void)
 		/* If pg_wal is a symlink, process that too. */
 		if (xlog_is_symlink)
 			do_syncfs("pg_wal");
+
+		CloseStartupProgress(SYNCFS_END);
 		return;
 	}
 #endif							/* !HAVE_SYNCFS */
 
+	InitStartupProgress();
+
 	/*
 	 * If possible, hint to the kernel that we're soon going to fsync the data
 	 * directory and its contents.  Errors in this step are even less
@@ -3421,6 +3430,8 @@ SyncDataDirectory(void)
 	if (xlog_is_symlink)
 		walkdir("pg_wal", datadir_fsync_fname, false, LOG);
 	walkdir("pg_tblspc", datadir_fsync_fname, true, LOG);
+
+	CloseStartupProgress(FSYNC_END);
 }
 
 /*
@@ -3460,6 +3471,7 @@ walkdir(const char *path,
 			continue;
 
 		snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name);
+		LogStartupProgress(FSYNC_IN_PROGRESS, path);
 
 		switch (get_dirent_type(subpath, de, process_symlinks, elevel))
 		{
diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c
index 40c758d..0b4e62d 100644
--- a/src/backend/storage/file/reinit.c
+++ b/src/backend/storage/file/reinit.c
@@ -16,12 +16,14 @@
 
 #include <unistd.h>
 
+#include "access/xlog.h"
 #include "common/relpath.h"
 #include "storage/copydir.h"
 #include "storage/fd.h"
 #include "storage/reinit.h"
 #include "utils/hsearch.h"
 #include "utils/memutils.h"
+#include "utils/timeout.h"
 
 static void ResetUnloggedRelationsInTablespaceDir(const char *tsdirname,
 												  int op);
@@ -75,6 +77,8 @@ ResetUnloggedRelations(int op)
 	 */
 	spc_dir = AllocateDir("pg_tblspc");
 
+	InitStartupProgress();
+
 	while ((spc_de = ReadDir(spc_dir, "pg_tblspc")) != NULL)
 	{
 		if (strcmp(spc_de->d_name, ".") == 0 ||
@@ -86,6 +90,7 @@ ResetUnloggedRelations(int op)
 		ResetUnloggedRelationsInTablespaceDir(temp_path, op);
 	}
 
+	LogStartupProgress(RESET_UNLOGGED_REL_END, NULL);
 	FreeDir(spc_dir);
 
 	/*
@@ -136,6 +141,7 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
 
 		snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s",
 				 tsdirname, de->d_name);
+		LogStartupProgress(RESET_UNLOGGED_REL_IN_PROGRESS, dbspace_path);
 		ResetUnloggedRelationsInDbspaceDir(dbspace_path, op);
 	}
 
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 68b62d5..56f3126 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -663,6 +663,12 @@ static char *recovery_target_lsn_string;
 /* should be static, but commands/variable.c needs to get at this */
 char	   *role_string;
 
+/*
+ * The interval between each progress update of the operations that occur
+ * during startup process.
+ */
+int			log_startup_progress_interval = -1;
+
 
 /*
  * Displayable names for context types (enum GucContext)
@@ -3539,6 +3545,18 @@ static struct config_int ConfigureNamesInt[] =
 		check_client_connection_check_interval, NULL, NULL
 	},
 
+	{
+		{"log_startup_progress_interval", PGC_SIGHUP, LOGGING_WHEN,
+			gettext_noop("Sets the time interval between each progress update "
+						 "of the startup process."),
+			gettext_noop("Zero logs all messages. -1 turns this feature off."),
+			GUC_UNIT_MS
+		},
+		&log_startup_progress_interval,
+		-1, -1, INT_MAX,
+		NULL, NULL, NULL
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index ddbb6dc..3c72d9f 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -521,6 +521,12 @@
 					# are logged regardless of their duration; 1.0 logs all
 					# statements from all transactions, 0.0 never logs
 
+#log_startup_progress_interval = -1	# Sets the time interval between each
+									# progress update of the startup process.
+									# -1 disables the feature, zero logs all
+									# messages, > 0 indicates the interval in
+									# milliseconds.
+
 # - What to Log -
 
 #debug_print_parse = off
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 77187c1..6abf9ed 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -287,6 +287,23 @@ typedef enum WALAvailability
 	WALAVAIL_REMOVED			/* WAL segment has been removed */
 } WALAvailability;
 
+/*
+ * Codes of the operations performed during startup process
+ */
+typedef enum StartupProcessOp
+{
+	/* Codes for in-progress operations */
+	SYNCFS_IN_PROGRESS,
+	FSYNC_IN_PROGRESS,
+	RECOVERY_IN_PROGRESS,
+	RESET_UNLOGGED_REL_IN_PROGRESS,
+	/* Codes for end of operations */
+	SYNCFS_END,
+	FSYNC_END,
+	RECOVERY_END,
+	RESET_UNLOGGED_REL_END
+} StartupProcessOp;
+
 struct XLogRecData;
 
 extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata,
@@ -391,6 +408,12 @@ extern void do_pg_abort_backup(int code, Datum arg);
 extern void register_persistent_abort_backup_handler(void);
 extern SessionBackupState get_backup_status(void);
 
+extern TimestampTz GetCurrentOperationStartTimestamp(void);
+extern void InitStartupProgress(void);
+extern void LogStartupProgress(StartupProcessOp operation, const char *path);
+extern void CloseStartupProgress(StartupProcessOp operation);
+extern void LogStartupProgressTimeoutHandler(void);
+
 /* File path names (all relative to $PGDATA) */
 #define RECOVERY_SIGNAL_FILE	"recovery.signal"
 #define STANDBY_SIGNAL_FILE		"standby.signal"
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index a7c3a49..7c12632 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -282,6 +282,8 @@ extern int	tcp_user_timeout;
 extern bool trace_sort;
 #endif
 
+extern int  log_startup_progress_interval;
+
 /*
  * Functions exported by guc.c
  */
diff --git a/src/include/utils/timeout.h b/src/include/utils/timeout.h
index 93e6a69..ef892dd 100644
--- a/src/include/utils/timeout.h
+++ b/src/include/utils/timeout.h
@@ -33,6 +33,7 @@ typedef enum TimeoutId
 	IDLE_IN_TRANSACTION_SESSION_TIMEOUT,
 	IDLE_SESSION_TIMEOUT,
 	CLIENT_CONNECTION_CHECK_TIMEOUT,
+	LOG_STARTUP_PROGRESS_TIMEOUT,
 	/* First user-definable timeout reason */
 	USER_TIMEOUT,
 	/* Maximum number of timeout reasons */
-- 
1.8.3.1

