Hi,

I would like to propose getting the callstack of the postgres process
by connecting to the server. This helps us in diagnosing the problems
from a customer environment in case of hung process or in case of long
running process.
The idea here is to implement & expose pg_print_callstack function,
internally what this function does is, the connected backend will send
SIGUSR1 signal by setting PMSIGNAL_BACKTRACE_EMIT to the postmaster
process. Postmaster process will send a SIGUSR1 signal to the process
by setting PROCSIG_BACKTRACE_PRINT if the process has access to
ProcSignal. As syslogger process & Stats process don't have access to
ProcSignal, multiplexing with SIGUSR1 is not possible for these
processes, hence SIGUSR2 signal will be sent for these processes. Once
the process receives this signal it will log the backtrace of the
process.
Attached is a WIP patch for the same.
Thoughts?

Regards,
Vignesh
EnterpriseDB: http://www.enterprisedb.com
From c1006110bdeac2135d1c8e9220f65d50cd49ab63 Mon Sep 17 00:00:00 2001
From: Vignesh C <vignes...@gmail.com>
Date: Sun, 22 Nov 2020 05:58:24 +0530
Subject: [PATCH] Print backtrace of postgres process that are part of this
 instance.

The idea here is to implement & expose pg_print_callstack function, internally
what this function does is, the connected backend will send SIGUSR1 signal by
setting PMSIGNAL_BACKTRACE_EMIT to the postmaster process. Postmaster process
will send SIGUSR1 signal to process by setting PROCSIG_BACKTRACE_PRINT if the
process that have access to ProcSignal. As syslogger process & Stats process
don't have access to ProcSignal, multiplexing with SIGUSR1 is not possible
for these processes, hence SIGUSR2 signal will be sent for these process.
Once the process receives this signal it will log the backtrace of the process.
---
 src/backend/postmaster/pgstat.c       | 19 ++++++++++++++++++-
 src/backend/postmaster/postmaster.c   | 20 ++++++++++++++++++++
 src/backend/postmaster/syslogger.c    | 16 +++++++++++++++-
 src/backend/storage/ipc/procsignal.c  | 28 ++++++++++++++++++++++++++++
 src/backend/storage/ipc/signalfuncs.c | 22 ++++++++++++++++++++++
 src/backend/tcop/postgres.c           | 31 +++++++++++++++++++++++++++++++
 src/include/catalog/pg_proc.dat       |  6 +++++-
 src/include/storage/pmsignal.h        |  2 ++
 src/include/storage/procsignal.h      |  2 ++
 src/include/tcop/tcopprot.h           |  2 ++
 10 files changed, 145 insertions(+), 3 deletions(-)

diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index e76e627..bd38264 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -62,6 +62,7 @@
 #include "storage/pg_shmem.h"
 #include "storage/procsignal.h"
 #include "storage/sinvaladt.h"
+#include "tcop/tcopprot.h"
 #include "utils/ascii.h"
 #include "utils/guc.h"
 #include "utils/memutils.h"
@@ -372,6 +373,8 @@ static void pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len
 static void pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len);
 static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
 
+static void sigUsr2Handler(SIGNAL_ARGS);
+
 /* ------------------------------------------------------------
  * Public functions called from postmaster follow
  * ------------------------------------------------------------
@@ -4666,7 +4669,7 @@ PgstatCollectorMain(int argc, char *argv[])
 	pqsignal(SIGALRM, SIG_IGN);
 	pqsignal(SIGPIPE, SIG_IGN);
 	pqsignal(SIGUSR1, SIG_IGN);
-	pqsignal(SIGUSR2, SIG_IGN);
+	pqsignal(SIGUSR2, sigUsr2Handler);
 	/* Reset some signals that are accepted by postmaster but not here */
 	pqsignal(SIGCHLD, SIG_DFL);
 	PG_SETMASK(&UnBlockSig);
@@ -7242,3 +7245,17 @@ pgstat_count_slru_truncate(int slru_idx)
 {
 	slru_entry(slru_idx)->m_truncate += 1;
 }
+
+/*
+ * sigUsr2Handler
+ *
+ * handle SIGUSR2 signal to print call stack of pgstat process.
+ */
+static void
+sigUsr2Handler(SIGNAL_ARGS)
+{
+	int			save_errno = errno;
+
+	LogBackTrace();
+	errno = save_errno;
+}
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index b7799ed..dd7c930 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -5149,6 +5149,26 @@ sigusr1_handler(SIGNAL_ARGS)
 		StartWorkerNeeded = true;
 	}
 
+	/* Process backtrace emit signal. */
+	if (CheckPostmasterSignal(PMSIGNAL_BACKTRACE_EMIT))
+	{
+		EmitProcSignalPrintCallStack();
+
+		/*
+		 * Pgstat process & syslogger process do not have access to ProcSignal,
+		 * multiplexing with SIGUSR1 is not possible for these processes so send
+		 * SIGUSR2 signal for them as multiplexing with SIGUSR1 is not possible.
+		 */
+		if (PgStatPID)
+			kill(PgStatPID, SIGUSR2);
+
+		if (SysLoggerPID)
+			kill(SysLoggerPID, SIGUSR2);
+
+		/* Print call stack for postmaster process. */
+		LogBackTrace();
+	}
+
 	/*
 	 * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
 	 * unexpected states. If the startup process quickly starts up, completes
diff --git a/src/backend/postmaster/syslogger.c b/src/backend/postmaster/syslogger.c
index faa82ec..5f83ee3 100644
--- a/src/backend/postmaster/syslogger.c
+++ b/src/backend/postmaster/syslogger.c
@@ -145,6 +145,7 @@ static void logfile_rotate(bool time_based_rotation, int size_rotation_for);
 static char *logfile_getname(pg_time_t timestamp, const char *suffix);
 static void set_next_rotation_time(void);
 static void sigUsr1Handler(SIGNAL_ARGS);
+static void sigUsr2Handler(SIGNAL_ARGS);
 static void update_metainfo_datafile(void);
 
 
@@ -246,7 +247,7 @@ SysLoggerMain(int argc, char *argv[])
 	pqsignal(SIGALRM, SIG_IGN);
 	pqsignal(SIGPIPE, SIG_IGN);
 	pqsignal(SIGUSR1, sigUsr1Handler);	/* request log rotation */
-	pqsignal(SIGUSR2, SIG_IGN);
+	pqsignal(SIGUSR2, sigUsr2Handler);
 
 	/*
 	 * Reset some signals that are accepted by postmaster but not here
@@ -1563,3 +1564,16 @@ sigUsr1Handler(SIGNAL_ARGS)
 
 	errno = save_errno;
 }
+
+/*
+ * sigUsr2Handler - handle SIGUSR2 signal to print call stack.
+ */
+static void
+sigUsr2Handler(SIGNAL_ARGS)
+{
+	int			save_errno = errno;
+
+	LogBackTrace();
+
+	errno = save_errno;
+}
diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c
index ffe67ac..f1d72e0 100644
--- a/src/backend/storage/ipc/procsignal.c
+++ b/src/backend/storage/ipc/procsignal.c
@@ -302,6 +302,31 @@ SendProcSignal(pid_t pid, ProcSignalReason reason, BackendId backendId)
 }
 
 /*
+ * EmitProcSignalPrintCallStack
+ *
+ * Send SIGUSR1 to all postgres backends by setting PROCSIG_BACKTRACE_PRINT, the
+ * postgres processes will print the backtrace once the signal is received.
+ */
+void
+EmitProcSignalPrintCallStack(void)
+{
+	for (int i = NumProcSignalSlots - 1; i >= 0; i--)
+	{
+		volatile ProcSignalSlot *slot = &ProcSignal->psh_slot[i];
+		pid_t		pid = slot->pss_pid;
+
+		if (pid != 0)
+		{
+			/* see SendProcSignal for details */
+			slot->pss_signalFlags[PROCSIG_BACKTRACE_PRINT] = true;
+
+			/* Signal SIGUSR1 to the process, so that they print backtrace. */
+			kill(pid, SIGUSR1);
+		}
+	}
+}
+
+/*
  * EmitProcSignalBarrier
  *		Send a signal to every Postgres process
  *
@@ -585,6 +610,9 @@ procsignal_sigusr1_handler(SIGNAL_ARGS)
 	if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN))
 		RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
 
+	if (CheckProcSignal(PROCSIG_BACKTRACE_PRINT))
+		LogBackTrace();
+
 	SetLatch(MyLatch);
 
 	latch_sigusr1_handler();
diff --git a/src/backend/storage/ipc/signalfuncs.c b/src/backend/storage/ipc/signalfuncs.c
index d822e82..3c3eb0d 100644
--- a/src/backend/storage/ipc/signalfuncs.c
+++ b/src/backend/storage/ipc/signalfuncs.c
@@ -215,3 +215,25 @@ pg_rotate_logfile_v2(PG_FUNCTION_ARGS)
 	SendPostmasterSignal(PMSIGNAL_ROTATE_LOGFILE);
 	PG_RETURN_BOOL(true);
 }
+
+/*
+ * pg_print_callstack - print callstack of process that are part of this
+ * instance.
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+Datum
+pg_print_callstack(PG_FUNCTION_ARGS)
+{
+#ifdef HAVE_BACKTRACE_SYMBOLS
+	SendPostmasterSignal(PMSIGNAL_BACKTRACE_EMIT);
+#else
+	{
+		ereport(WARNING,
+				(errmsg("backtrace generation is not supported by this installation")));
+		PG_RETURN_BOOL(false);
+	}
+#endif
+	PG_RETURN_BOOL(true);
+}
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 7c5f7c7..1079a65 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -19,6 +19,7 @@
 
 #include "postgres.h"
 
+#include <execinfo.h>
 #include <fcntl.h>
 #include <limits.h>
 #include <signal.h>
@@ -2887,6 +2888,36 @@ FloatExceptionHandler(SIGNAL_ARGS)
 }
 
 /*
+ * LogBackTrace
+ *
+ * Get the backtrace and log the backtrace to log file.
+ */
+void
+LogBackTrace(void)
+{
+	int			save_errno = errno;
+
+	void	   *buf[100];
+	int			nframes;
+	char	  **strfrms;
+	StringInfoData errtrace;
+
+	nframes = backtrace(buf, lengthof(buf));
+	strfrms = backtrace_symbols(buf, nframes);
+	if (strfrms == NULL)
+		return;
+
+	initStringInfo(&errtrace);
+	for (int i = 0; i < nframes; i++)
+		appendStringInfo(&errtrace, "\n%s", strfrms[i]);
+	free(strfrms);
+
+	elog(LOG, "current backtrace:%s", errtrace.data);
+
+	errno = save_errno;
+}
+
+/*
  * RecoveryConflictInterrupt: out-of-line portion of recovery conflict
  * handling following receipt of SIGUSR1. Designed to be similar to die()
  * and StatementCancelHandler(). Called only by a normal user backend
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index c01da4b..8614d94 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -11005,4 +11005,8 @@
   proname => 'is_normalized', prorettype => 'bool', proargtypes => 'text text',
   prosrc => 'unicode_is_normalized' },
 
-]
+{ oid => '4388', descr => 'print callstack of process that are part of this instance',
+  proname => 'pg_print_callstack', provolatile => 'v', prorettype => 'bool',
+  proargtypes => '', prosrc => 'pg_print_callstack' },
+
+]
\ No newline at end of file
diff --git a/src/include/storage/pmsignal.h b/src/include/storage/pmsignal.h
index 56c5ec4..cda2995 100644
--- a/src/include/storage/pmsignal.h
+++ b/src/include/storage/pmsignal.h
@@ -42,6 +42,8 @@ typedef enum
 	PMSIGNAL_START_WALRECEIVER, /* start a walreceiver */
 	PMSIGNAL_ADVANCE_STATE_MACHINE, /* advance postmaster's state machine */
 
+	PMSIGNAL_BACKTRACE_EMIT,	/* send PROCSIG_BACKTRACE_PRINT to all backend */
+
 	NUM_PMSIGNALS				/* Must be last value of enum! */
 } PMSignalReason;
 
diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h
index 5cb3969..c8bbeae 100644
--- a/src/include/storage/procsignal.h
+++ b/src/include/storage/procsignal.h
@@ -43,6 +43,8 @@ typedef enum
 	PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
 	PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
 
+	PROCSIG_BACKTRACE_PRINT,	/* ask backend to print the current backtrace */
+
 	NUM_PROCSIGNALS				/* Must be last! */
 } ProcSignalReason;
 
diff --git a/src/include/tcop/tcopprot.h b/src/include/tcop/tcopprot.h
index bd30607..1188639 100644
--- a/src/include/tcop/tcopprot.h
+++ b/src/include/tcop/tcopprot.h
@@ -71,6 +71,8 @@ extern void RecoveryConflictInterrupt(ProcSignalReason reason); /* called from S
 extern void ProcessClientReadInterrupt(bool blocked);
 extern void ProcessClientWriteInterrupt(bool blocked);
 
+extern void LogBackTrace(void); /* Called from EmitProcSignalPrintCallStack */
+extern void EmitProcSignalPrintCallStack(void);
 extern void process_postgres_switches(int argc, char *argv[],
 									  GucContext ctx, const char **dbname);
 extern void PostgresMain(int argc, char *argv[],
-- 
1.8.3.1

Reply via email to