On Wed, Jan 27, 2021 at 10:40 PM Andres Freund <and...@anarazel.de> wrote:
>
> Hi,
>
> On 2021-01-27 19:05:16 +0530, vignesh C wrote:
>
> >  /*
> > + * LogBackTrace
> > + *
> > + * Get the backtrace and log the backtrace to log file.
> > + */
> > +void
> > +LogBackTrace(void)
> > +{
> > +     int                     save_errno = errno;
> > +
> > +     void       *buf[100];
> > +     int                     nframes;
> > +     char      **strfrms;
> > +     StringInfoData errtrace;
> > +
> > +     /* OK to process messages.  Reset the flag saying there are more to 
> > do. */
> > +     PrintBacktracePending = false;
>
> ISTM that it'd be better to do this in the caller, allowing this
> function to be used outside of signal triggered backtraces.
>
> >
> > +extern void LogBackTrace(void); /* Called from 
> > EmitProcSignalPrintCallStack */
>
> I don't think this comment is correct anymore?

Thanks for the comments, I have fixed and attached an updated patch
with the fixes for the same.
Thoughts?

Regards,
Vignesh
From f1d592cd02379974c8875a950f82fd4df13b7837 Mon Sep 17 00:00:00 2001
From: Vignesh C <vignes...@enterprisedb.com>
Date: Wed, 27 Jan 2021 18:20:13 +0530
Subject: [PATCH v4] Print backtrace of postgres process that are part of this
 instance.

The idea here is to implement & expose pg_print_callstack function, internally
what this function does is, the connected backend will send SIGUSR1 signal by
setting PMSIGNAL_BACKTRACE_EMIT to the postmaster process. Once the process
receives this signal it will log the backtrace of the process.
---
 doc/src/sgml/func.sgml                | 75 +++++++++++++++++++++++++++++++++++
 src/backend/postmaster/autovacuum.c   |  7 ++++
 src/backend/postmaster/checkpointer.c |  8 ++++
 src/backend/postmaster/interrupt.c    |  8 ++++
 src/backend/storage/ipc/procsignal.c  | 33 +++++++++++++++
 src/backend/storage/ipc/signalfuncs.c | 59 ++++++++++++++++++++++++++-
 src/backend/tcop/postgres.c           | 38 ++++++++++++++++++
 src/backend/utils/init/globals.c      |  1 +
 src/bin/pg_ctl/t/005_backtrace.pl     | 73 ++++++++++++++++++++++++++++++++++
 src/include/catalog/pg_proc.dat       |  5 +++
 src/include/miscadmin.h               |  2 +
 src/include/storage/pmsignal.h        |  2 +
 src/include/storage/procsignal.h      |  3 ++
 src/include/tcop/tcopprot.h           |  1 +
 14 files changed, 314 insertions(+), 1 deletion(-)
 create mode 100644 src/bin/pg_ctl/t/005_backtrace.pl

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index aa99665..4ff6e7f 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -24709,6 +24709,25 @@ SELECT collation for ('foo' COLLATE "de_DE");
         however only superusers can terminate superuser backends.
        </para></entry>
       </row>
+
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>pg_print_callstack</primary>
+        </indexterm>
+        <function>pg_print_callstack</function> ( <parameter>pid</parameter> <type>integer</type> )
+        <returnvalue>boolean</returnvalue>
+       </para>
+       <para>
+        Prints the callstack whose backend process has the specified process ID.
+        Callstack will be printed based on the log configuration set. See
+        <xref linkend="runtime-config-logging"/> for more information.  This is
+        allowed if the calling role is a member of the role whose backend
+        callstack is being printed or the calling role has been granted
+        <literal>pg_print_callstack</literal>, however only superusers can
+        print callstack of superuser backends.
+       </para></entry>
+      </row>
      </tbody>
     </tgroup>
    </table>
@@ -24728,6 +24747,62 @@ SELECT collation for ('foo' COLLATE "de_DE");
     <structname>pg_stat_activity</structname> view.
    </para>
 
+   <para>
+    <function>pg_print_callstack</function> can be used to print callstack of
+    a backend porcess. For example:
+<programlisting>
+postgres=# select pg_print_callstack(pg_backend_pid());
+ pg_print_callstack
+--------------------
+ t
+(1 row)
+
+The callstack will be logged in the log file. For example:
+2021-01-27 11:33:50.247 IST [111735] LOG:  current backtrace:
+        postgres: postgresdba postgres [local] SELECT(LogBackTrace+0x33) [0x9501cd]
+        postgres: postgresdba postgres [local] SELECT(ProcessInterrupts+0x774) [0x950bac]
+        postgres: postgresdba postgres [local] SELECT() [0x761ee8]
+        postgres: postgresdba postgres [local] SELECT() [0x71bc39]
+        postgres: postgresdba postgres [local] SELECT() [0x71e3df]
+        postgres: postgresdba postgres [local] SELECT(standard_ExecutorRun+0x1d6) [0x71c25d]
+        postgres: postgresdba postgres [local] SELECT(ExecutorRun+0x55) [0x71c085]
+        postgres: postgresdba postgres [local] SELECT() [0x953f3d]
+        postgres: postgresdba postgres [local] SELECT(PortalRun+0x262) [0x953bf6]
+        postgres: postgresdba postgres [local] SELECT() [0x94dafa]
+        postgres: postgresdba postgres [local] SELECT(PostgresMain+0x7d7) [0x951dea]
+        postgres: postgresdba postgres [local] SELECT() [0x896a7b]
+        postgres: postgresdba postgres [local] SELECT() [0x896401]
+        postgres: postgresdba postgres [local] SELECT() [0x8929c5]
+        postgres: postgresdba postgres [local] SELECT(PostmasterMain+0x116b) [0x89229c]
+        postgres: postgresdba postgres [local] SELECT() [0x795efa]
+        /lib64/libc.so.6(__libc_start_main+0xf5) [0x7f6c12697505]
+        postgres: postgresdba postgres [local] SELECT() [0x484229]
+
+</programlisting>
+    You can get the file name and line number by using:
+<programlisting>
+1)  "info line *address" from gdb on postgres executable. For example:
+gdb ./postgres
+GNU gdb (GDB) Red Hat Enterprise Linux 7.6.1-115.el7
+Copyright (C) 2013 Free Software Foundation, Inc.
+License GPLv3+: GNU GPL version 3 or later <literal>&lt;</literal>http://gnu.org/licenses/gpl.html<literal>&gt;</literal>
+This is free software: you are free to change and redistribute it.
+There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
+and "show warranty" for details.
+This GDB was configured as "x86_64-redhat-linux-gnu".
+For bug reporting instructions, please see:
+<literal>&lt;</literal>http://www.gnu.org/software/gdb/bugs/<literal>&gt;</literal>...
+Reading symbols from /home/vignesh/postgres/postgres/inst/bin/postgres...done.
+(gdb) info line *0x71c25d
+Line 378 of "execMain.c" starts at address 0x71c25d <literal>&lt;</literal>standard_ExecutorRun+470<literal>&gt;</literal> and ends at 0x71c263 <literal>&lt;</literal>standard_ExecutorRun+476<literal>&gt;</literal>.
+OR 
+2) Using "addr2line -e postgres address", For exmple:
+addr2line -e ./postgres 0x71c25d
+/home/vignesh/postgres/postgres/src/backend/executor/execMain.c:378
+</programlisting>
+</para>
+
+
   </sect2>
 
   <sect2 id="functions-admin-backup">
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 47e60ca..0f27f78 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -836,6 +836,13 @@ HandleAutoVacLauncherInterrupts(void)
 	if (ProcSignalBarrierPending)
 		ProcessProcSignalBarrier();
 
+	/* Process printing back trace */
+	if (PrintBacktracePending)
+	{
+		PrintBacktracePending = false;
+		LogBackTrace();
+	}
+
 	/* Process sinval catchup interrupts that happened while sleeping */
 	ProcessCatchupInterrupt();
 }
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index 54a818b..d26bf16 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -57,6 +57,7 @@
 #include "storage/shmem.h"
 #include "storage/smgr.h"
 #include "storage/spin.h"
+#include "tcop/tcopprot.h"
 #include "utils/guc.h"
 #include "utils/memutils.h"
 #include "utils/resowner.h"
@@ -547,6 +548,13 @@ HandleCheckpointerInterrupts(void)
 	if (ProcSignalBarrierPending)
 		ProcessProcSignalBarrier();
 
+	/* Process printing back trace */
+	if (PrintBacktracePending)
+	{
+		PrintBacktracePending = false;
+		LogBackTrace();
+	}
+
 	if (ConfigReloadPending)
 	{
 		ConfigReloadPending = false;
diff --git a/src/backend/postmaster/interrupt.c b/src/backend/postmaster/interrupt.c
index dd9136a..8e1029b 100644
--- a/src/backend/postmaster/interrupt.c
+++ b/src/backend/postmaster/interrupt.c
@@ -21,6 +21,7 @@
 #include "storage/ipc.h"
 #include "storage/latch.h"
 #include "storage/procsignal.h"
+#include "tcop/tcopprot.h"
 #include "utils/guc.h"
 
 volatile sig_atomic_t ConfigReloadPending = false;
@@ -41,6 +42,13 @@ HandleMainLoopInterrupts(void)
 		ProcessConfigFile(PGC_SIGHUP);
 	}
 
+	/* Process printing back trace */
+	if (PrintBacktracePending)
+	{
+		PrintBacktracePending = false;
+		LogBackTrace();
+	}
+
 	if (ShutdownRequestPending)
 		proc_exit(0);
 }
diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c
index c43cdd6..b0479aa 100644
--- a/src/backend/storage/ipc/procsignal.c
+++ b/src/backend/storage/ipc/procsignal.c
@@ -308,6 +308,21 @@ SendProcSignal(pid_t pid, ProcSignalReason reason, BackendId backendId)
 }
 
 /*
+ * EmitProcSignalPrintCallStack
+ *
+ * Send SIGUSR1 to postgres backend whose pid matches bt_pid by setting
+ * PROCSIG_BACKTRACE_PRINT, the postgres processes will print the backtrace once
+ * the signal is received.
+ */
+bool
+EmitProcSignalPrintCallStack(int bt_pid)
+{
+	int			ret = SendProcSignal(bt_pid, PROCSIG_BACKTRACE_PRINT, InvalidBackendId);
+
+	return (ret == 0);
+}
+
+/*
  * EmitProcSignalBarrier
  *		Send a signal to every Postgres process
  *
@@ -453,6 +468,21 @@ HandleProcSignalBarrierInterrupt(void)
 }
 
 /*
+ * Handle receipt of an print backtrace.
+ *
+ * Note: this is called within a signal handler!  All we can do is set
+ * a flag that will cause the next CHECK_FOR_INTERRUPTS() to invoke
+ * LogBackTrace().
+ */
+static void
+HandlePrintBacktraceInterrupt(void)
+{
+	InterruptPending = true;
+	PrintBacktracePending = true;
+	/* latch will be set by procsignal_sigusr1_handler */
+}
+
+/*
  * Perform global barrier related interrupt checking.
  *
  * Any backend that participates in ProcSignal signaling must arrange to
@@ -686,6 +716,9 @@ procsignal_sigusr1_handler(SIGNAL_ARGS)
 	if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN))
 		RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
 
+	if (CheckProcSignal(PROCSIG_BACKTRACE_PRINT))
+		HandlePrintBacktraceInterrupt();
+
 	SetLatch(MyLatch);
 
 	latch_sigusr1_handler();
diff --git a/src/backend/storage/ipc/signalfuncs.c b/src/backend/storage/ipc/signalfuncs.c
index 69fe23a..7b0e1bc 100644
--- a/src/backend/storage/ipc/signalfuncs.c
+++ b/src/backend/storage/ipc/signalfuncs.c
@@ -22,6 +22,7 @@
 #include "storage/pmsignal.h"
 #include "storage/proc.h"
 #include "storage/procarray.h"
+#include "tcop/tcopprot.h"
 #include "utils/acl.h"
 #include "utils/builtins.h"
 
@@ -44,8 +45,9 @@
 #define SIGNAL_BACKEND_ERROR 1
 #define SIGNAL_BACKEND_NOPERMISSION 2
 #define SIGNAL_BACKEND_NOSUPERUSER 3
+
 static int
-pg_signal_backend(int pid, int sig)
+check_valid_pid(int pid)
 {
 	PGPROC	   *proc = BackendPidGetProc(pid);
 
@@ -77,6 +79,17 @@ pg_signal_backend(int pid, int sig)
 		!has_privs_of_role(GetUserId(), DEFAULT_ROLE_SIGNAL_BACKENDID))
 		return SIGNAL_BACKEND_NOPERMISSION;
 
+	return 0;
+}
+
+static int
+pg_signal_backend(int pid, int sig)
+{
+	int			ret = check_valid_pid(pid);
+
+	if (ret)
+		return ret;
+
 	/*
 	 * Can the process we just validated above end, followed by the pid being
 	 * recycled for a new process, before reaching here?  Then we'd be trying
@@ -215,3 +228,47 @@ pg_rotate_logfile_v2(PG_FUNCTION_ARGS)
 	SendPostmasterSignal(PMSIGNAL_ROTATE_LOGFILE);
 	PG_RETURN_BOOL(true);
 }
+
+/*
+ * pg_print_callstack - print callstack of backend process.
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+Datum
+pg_print_callstack(PG_FUNCTION_ARGS)
+{
+	int			bt_pid = PG_ARGISNULL(0) ? -1 : PG_GETARG_INT32(0);
+
+#ifdef HAVE_BACKTRACE_SYMBOLS
+	{
+		int			r = check_valid_pid(bt_pid);
+
+		if (r == SIGNAL_BACKEND_NOSUPERUSER)
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("must be a superuser to print backtrace of superuser query process")));
+
+		if (r == SIGNAL_BACKEND_NOPERMISSION)
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("must be a member of the role whose query process's backtrace is being logged or member of pg_signal_backend")));
+
+		if (r)
+			PG_RETURN_BOOL(false);
+
+		if (EmitProcSignalPrintCallStack(bt_pid))
+			PG_RETURN_BOOL(true);
+
+		ereport(WARNING,
+				(errmsg("failed to send signal to postmaster: %m")));
+		PG_RETURN_BOOL(false);
+	}
+#else
+	{
+		ereport(WARNING,
+				(errmsg("backtrace generation is not supported by this installation")));
+		PG_RETURN_BOOL(false);
+	}
+#endif
+}
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index cb5a961..7aefde3 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -19,6 +19,7 @@
 
 #include "postgres.h"
 
+#include <execinfo.h>
 #include <fcntl.h>
 #include <limits.h>
 #include <signal.h>
@@ -2921,6 +2922,36 @@ FloatExceptionHandler(SIGNAL_ARGS)
 }
 
 /*
+ * LogBackTrace
+ *
+ * Get the backtrace and log the backtrace to log file.
+ */
+void
+LogBackTrace(void)
+{
+	int			save_errno = errno;
+
+	void	   *buf[100];
+	int			nframes;
+	char	  **strfrms;
+	StringInfoData errtrace;
+
+	nframes = backtrace(buf, lengthof(buf));
+	strfrms = backtrace_symbols(buf, nframes);
+	if (strfrms == NULL)
+		return;
+
+	initStringInfo(&errtrace);
+	for (int i = 0; i < nframes; i++)
+		appendStringInfo(&errtrace, "\n%s", strfrms[i]);
+	free(strfrms);
+
+	elog(LOG_SERVER_ONLY, "current backtrace:%s", errtrace.data);
+
+	errno = save_errno;
+}
+
+/*
  * RecoveryConflictInterrupt: out-of-line portion of recovery conflict
  * handling following receipt of SIGUSR1. Designed to be similar to die()
  * and StatementCancelHandler(). Called only by a normal user backend
@@ -3274,6 +3305,13 @@ ProcessInterrupts(void)
 
 	if (ParallelMessagePending)
 		HandleParallelMessages();
+
+	/* Process printing back trace */
+	if (PrintBacktracePending)
+	{
+		PrintBacktracePending = false;
+		LogBackTrace();
+	}
 }
 
 
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index ea28769..0cac8d2 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -37,6 +37,7 @@ volatile sig_atomic_t ProcSignalBarrierPending = false;
 volatile uint32 InterruptHoldoffCount = 0;
 volatile uint32 QueryCancelHoldoffCount = 0;
 volatile uint32 CritSectionCount = 0;
+volatile sig_atomic_t PrintBacktracePending = false;
 
 int			MyProcPid;
 pg_time_t	MyStartTime;
diff --git a/src/bin/pg_ctl/t/005_backtrace.pl b/src/bin/pg_ctl/t/005_backtrace.pl
new file mode 100644
index 0000000..46e77d9
--- /dev/null
+++ b/src/bin/pg_ctl/t/005_backtrace.pl
@@ -0,0 +1,73 @@
+use strict;
+use warnings;
+
+use PostgresNode;
+use TestLib;
+use Test::More tests => 2;
+use Time::HiRes qw(usleep);
+
+# Set up node with logging collector
+my $node = get_new_node('primary');
+$node->init();
+$node->append_conf(
+	'postgresql.conf', qq(
+logging_collector = on
+lc_messages = 'C'
+));
+
+$node->start();
+
+# Verify that log output gets to the file
+$node->psql('postgres', 'select pg_print_callstack(pg_backend_pid())');
+
+# might need to retry if logging collector process is slow...
+my $max_attempts = 180 * 10;
+
+my $current_logfiles;
+for (my $attempts = 0; $attempts < $max_attempts; $attempts++)
+{
+	eval {
+		$current_logfiles = slurp_file($node->data_dir . '/current_logfiles');
+	};
+	last unless $@;
+	usleep(100_000);
+}
+die $@ if $@;
+
+note "current_logfiles = $current_logfiles";
+
+like(
+	$current_logfiles,
+	qr|^stderr log/postgresql-.*log$|,
+	'current_logfiles is sane');
+
+my $lfname = $current_logfiles;
+$lfname =~ s/^stderr //;
+chomp $lfname;
+
+my $first_logfile;
+my $bt_occurence_count;
+
+# Verify that the backtraces of the processes are logged into logfile.
+for (my $attempts = 0; $attempts < $max_attempts; $attempts++)
+{
+	$first_logfile = $node->data_dir . '/' . $lfname;
+	chomp $first_logfile;
+	print "file is $first_logfile";
+	open my $fh, '<', $first_logfile
+	  or die "Could not open '$first_logfile' $!";
+	while (my $line = <$fh>)
+	{
+		chomp $line;
+		if ($line =~ m/current backtrace/)
+		{
+			$bt_occurence_count++;
+		}
+	}
+	last if $bt_occurence_count == 1;
+	usleep(100_000);
+}
+
+is($bt_occurence_count, 1, 'found expected back trace in the log file');
+
+$node->stop();
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index b5f52d4..4c5ec4e 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -11375,4 +11375,9 @@
   proname => 'is_normalized', prorettype => 'bool', proargtypes => 'text text',
   prosrc => 'unicode_is_normalized' },
 
+# function to get the callstack of server process
+{ oid => '6105', descr => 'print callstack of process',
+  proname => 'pg_print_callstack', provolatile => 'v', prorettype => 'bool',
+  proargtypes => 'int4', prosrc => 'pg_print_callstack' },
+
 ]
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 1bdc97e..0d00ff9 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -92,6 +92,8 @@ extern PGDLLIMPORT volatile uint32 InterruptHoldoffCount;
 extern PGDLLIMPORT volatile uint32 QueryCancelHoldoffCount;
 extern PGDLLIMPORT volatile uint32 CritSectionCount;
 
+extern PGDLLIMPORT volatile sig_atomic_t PrintBacktracePending;
+
 /* in tcop/postgres.c */
 extern void ProcessInterrupts(void);
 
diff --git a/src/include/storage/pmsignal.h b/src/include/storage/pmsignal.h
index dbbed18..803f663 100644
--- a/src/include/storage/pmsignal.h
+++ b/src/include/storage/pmsignal.h
@@ -42,6 +42,8 @@ typedef enum
 	PMSIGNAL_START_WALRECEIVER, /* start a walreceiver */
 	PMSIGNAL_ADVANCE_STATE_MACHINE, /* advance postmaster's state machine */
 
+	PMSIGNAL_BACKTRACE_EMIT,	/* send PROCSIG_BACKTRACE_PRINT to backend */
+
 	NUM_PMSIGNALS				/* Must be last value of enum! */
 } PMSignalReason;
 
diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h
index 4ae7dc3..3348494 100644
--- a/src/include/storage/procsignal.h
+++ b/src/include/storage/procsignal.h
@@ -43,6 +43,8 @@ typedef enum
 	PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
 	PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
 
+	PROCSIG_BACKTRACE_PRINT,	/* ask backend to print the current backtrace */
+
 	NUM_PROCSIGNALS				/* Must be last! */
 } ProcSignalReason;
 
@@ -71,5 +73,6 @@ extern void WaitForProcSignalBarrier(uint64 generation);
 extern void ProcessProcSignalBarrier(void);
 
 extern void procsignal_sigusr1_handler(SIGNAL_ARGS);
+extern bool EmitProcSignalPrintCallStack(int bt_pid);
 
 #endif							/* PROCSIGNAL_H */
diff --git a/src/include/tcop/tcopprot.h b/src/include/tcop/tcopprot.h
index e547210..7d640e9 100644
--- a/src/include/tcop/tcopprot.h
+++ b/src/include/tcop/tcopprot.h
@@ -71,6 +71,7 @@ extern void RecoveryConflictInterrupt(ProcSignalReason reason); /* called from S
 extern void ProcessClientReadInterrupt(bool blocked);
 extern void ProcessClientWriteInterrupt(bool blocked);
 
+extern void LogBackTrace(void); /* log the backtrace to log file */
 extern void process_postgres_switches(int argc, char *argv[],
 									  GucContext ctx, const char **dbname);
 extern void PostgresMain(int argc, char *argv[],
-- 
1.8.3.1

Reply via email to