From 0bdf2771a648c8ff480da8b79c877915ee6e922f Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Date: Tue, 9 Aug 2022 14:59:22 +0000
Subject: [PATCH v3] Progress report removal of temp files and temp relation
 files

At times, there can be many temp files (under pgsql_tmp) and temp
relation files (under  removal which after crash may take longer
during which users have no clue about what's going on in the
server before it comes up online. This patch uses
ereport_startup_progress to report the progress of the file
removal.
---
 doc/src/sgml/config.sgml                      | 22 +++++++++++++++++++
 src/backend/postmaster/postmaster.c           | 14 +++++++++++-
 src/backend/storage/file/fd.c                 | 14 ++++++++++++
 src/backend/utils/misc/guc.c                  | 12 ++++++++++
 src/backend/utils/misc/postgresql.conf.sample |  5 +++++
 src/backend/utils/misc/timeout.c              | 11 ++++++++--
 src/include/postmaster/postmaster.h           |  1 +
 7 files changed, 76 insertions(+), 3 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 2522f4c8c5..e10b5d7c34 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -6761,6 +6761,28 @@ local0.*    /var/log/postgresql
        </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-log-postmaster-progress-interval" xreflabel="log_postmaster_progress_interval">
+      <term><varname>log_postmaster_progress_interval</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>log_postmaster_progress_interval</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+       <listitem>
+        <para>
+         Sets the amount of time after which the postmaster process will log
+         a message about a long-running operation that is still in progress,
+         as well as the interval between further progress messages for that
+         operation, similar to <xref linkend="guc-log-startup-progress-interval"/>
+         parameter. The default is 10 seconds. A setting of <literal>0</literal>
+         disables the feature.  If this value is specified without units,
+         it is taken as milliseconds.  This setting is applied separately to
+         each operation.
+         This parameter can only be set in the <filename>postgresql.conf</filename>
+         file or on the server command line.
+        </para>
+       </listitem>
+     </varlistentry>
+
      </variablelist>
 
     <para>
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 81cb585891..32b70420a4 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -247,6 +247,11 @@ char	   *bonjour_name;
 bool		restart_after_crash = true;
 bool		remove_temp_files_after_crash = true;
 
+/*
+ * Time between progress updates for long-running postmaster operations.
+ */
+int			log_postmaster_progress_interval = 10000;	/* 10 sec */
+
 /* PIDs of special child processes; 0 when not running */
 static pid_t StartupPID = 0,
 			BgWriterPID = 0,
@@ -653,7 +658,6 @@ PostmasterMain(int argc, char *argv[])
 	pqsignal_pm(SIGINT, pmdie); /* send SIGTERM and shut down */
 	pqsignal_pm(SIGQUIT, pmdie);	/* send SIGQUIT and die */
 	pqsignal_pm(SIGTERM, pmdie);	/* wait for children and shut down */
-	pqsignal_pm(SIGALRM, SIG_IGN);	/* ignored */
 	pqsignal_pm(SIGPIPE, SIG_IGN);	/* ignored */
 	pqsignal_pm(SIGUSR1, sigusr1_handler);	/* message from child process */
 	pqsignal_pm(SIGUSR2, dummy_handler);	/* unused, reserve for children */
@@ -688,6 +692,11 @@ PostmasterMain(int argc, char *argv[])
 	pqsignal_pm(SIGXFSZ, SIG_IGN);	/* ignored */
 #endif
 
+	InitializeTimeouts();	/* establishes SIGALRM handler */
+
+	RegisterTimeout(PROGRESS_REPORT_TIMEOUT,
+					progress_report_timeout_handler);
+
 	/*
 	 * Options setup
 	 */
@@ -1119,6 +1128,9 @@ PostmasterMain(int argc, char *argv[])
 	/* Write out nondefault GUC settings for child processes to use */
 	write_nondefault_variables(PGC_POSTMASTER);
 
+	/* Prepare to report progress of the temporary files removal phase */
+	begin_progress_report_phase(log_postmaster_progress_interval);
+
 	/*
 	 * Clean out the temp directory used to transmit parameters to child
 	 * processes (see internal_forkexec, below).  We must do this before
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index a227f38775..74c6f264e3 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -97,11 +97,13 @@
 #include "pgstat.h"
 #include "port/pg_iovec.h"
 #include "portability/mem.h"
+#include "postmaster/postmaster.h"
 #include "postmaster/startup.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
 #include "utils/guc.h"
 #include "utils/resowner_private.h"
+#include "utils/timeout.h"
 
 /* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */
 #if defined(HAVE_SYNC_FILE_RANGE)
@@ -3081,6 +3083,12 @@ RemovePgTempFiles(void)
 	DIR		   *spc_dir;
 	struct dirent *spc_de;
 
+	/*
+	 * Prepare to report progress of the temporary and temporary relation files
+	 * removal phase.
+	 */
+	begin_progress_report_phase(log_postmaster_progress_interval);
+
 	/*
 	 * First process temp files in pg_default ($PGDATA/base)
 	 */
@@ -3154,6 +3162,9 @@ RemovePgTempFilesInDir(const char *tmpdirname, bool missing_ok, bool unlink_all)
 		snprintf(rm_path, sizeof(rm_path), "%s/%s",
 				 tmpdirname, temp_de->d_name);
 
+		ereport_progress("removing temporary files under pgsql_tmp directory, elapsed time: %ld.%02d s, current file: %s",
+						 rm_path);
+
 		if (unlink_all ||
 			strncmp(temp_de->d_name,
 					PG_TEMP_FILE_PREFIX,
@@ -3244,6 +3255,9 @@ RemovePgTempRelationFilesInDbspace(const char *dbspacedirname)
 		snprintf(rm_path, sizeof(rm_path), "%s/%s",
 				 dbspacedirname, de->d_name);
 
+		ereport_progress("removing temporary relation files under pg_tblspc directory, elapsed time: %ld.%02d s, current file: %s",
+						 rm_path);
+
 		if (unlink(rm_path) < 0)
 			ereport(LOG,
 					(errcode_for_file_access(),
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 5db5df6285..917717e9e9 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3664,6 +3664,18 @@ static struct config_int ConfigureNamesInt[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"log_postmaster_progress_interval", PGC_SIGHUP, LOGGING_WHEN,
+			gettext_noop("Time between progress updates for "
+						 "long-running postmaster operations."),
+			gettext_noop("0 turns this feature off."),
+			GUC_UNIT_MS,
+		},
+		&log_postmaster_progress_interval,
+		10000, 0, INT_MAX,
+		NULL, NULL, NULL
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 90bec0502c..21d882ee3b 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -540,6 +540,11 @@
 					# 0 disables the feature, > 0 indicates
 					# the interval in milliseconds.
 
+#log_postmaster_progress_interval = 10s	# Time between progress updates for
+					# long-running postmaster operations.
+					# 0 disables the feature, > 0 indicates
+					# the interval in milliseconds.
+
 # - What to Log -
 
 #debug_print_parse = off
diff --git a/src/backend/utils/misc/timeout.c b/src/backend/utils/misc/timeout.c
index 6f5e08bc30..c4c567dc61 100644
--- a/src/backend/utils/misc/timeout.c
+++ b/src/backend/utils/misc/timeout.c
@@ -16,6 +16,7 @@
 
 #include <sys/time.h>
 
+#include "libpq/pqsignal.h"
 #include "miscadmin.h"
 #include "storage/proc.h"
 #include "utils/timeout.h"
@@ -375,8 +376,11 @@ handle_sig_alarm(SIGNAL_ARGS)
 	/*
 	 * SIGALRM is always cause for waking anything waiting on the process
 	 * latch.
+	 *
+	 * Postmaster has no latch associated with it.
 	 */
-	SetLatch(MyLatch);
+	if (MyLatch)
+		SetLatch(MyLatch);
 
 	/*
 	 * Always reset signal_pending, even if !alarm_enabled, since indeed no
@@ -494,7 +498,10 @@ InitializeTimeouts(void)
 	all_timeouts_initialized = true;
 
 	/* Now establish the signal handler */
-	pqsignal(SIGALRM, handle_sig_alarm);
+	if (MyProcPid == PostmasterPid)
+		pqsignal_pm(SIGALRM, handle_sig_alarm);
+	else
+		pqsignal(SIGALRM, handle_sig_alarm);
 }
 
 /*
diff --git a/src/include/postmaster/postmaster.h b/src/include/postmaster/postmaster.h
index 90e333ccd2..540e9e9895 100644
--- a/src/include/postmaster/postmaster.h
+++ b/src/include/postmaster/postmaster.h
@@ -30,6 +30,7 @@ extern PGDLLIMPORT bool enable_bonjour;
 extern PGDLLIMPORT char *bonjour_name;
 extern PGDLLIMPORT bool restart_after_crash;
 extern PGDLLIMPORT bool remove_temp_files_after_crash;
+extern PGDLLIMPORT int log_postmaster_progress_interval;
 
 #ifdef WIN32
 extern PGDLLIMPORT HANDLE PostmasterHandle;
-- 
2.34.1

