On 2026-02-26 Th 4:25 AM, Jakub Wartak wrote:
On Thu, Feb 26, 2026 at 4:09 AM Chao Li <[email protected]> wrote:
Hi Chao,
I just reviewed v4 again and got a few more comments:
1. This patch only set the global proc_die_sender_pid/uid to 0 at startup, then
assign values to them upon receiving SIGTERM, and never reset them, which
assumes a process must die upon SIGTERM. Is the assumption true? I guess not.
If a process receives SIGTERM and not die immediately, then die for other
reason, then it may report a misleading PID and UID.
Hmm, I'm not sure I follow. If we receive SIGTERM and not die immediately
(for whatever reason), then two scenarios can happen as far as I'm concerned:
* another SIGTERM comes in from the same or different uid/pid and it wll be
reported properly
* different SIGKILL, but in this case we won't report UID/PID at all
am I missing something or do You have any particular scenario in mind?
The flow will be wrapper_handler()->die()->SetLatch()->..->directyl to
err reporting facilities.
2.
syncrpe.c uses errhint to print PID and UID, and postgres.c uses errdetail. We
should keep consistency, maybe all use errhint.
Right, let's make it that way.
3.
```
@@ -319,7 +323,11 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
QueryCancelPending = false;
ereport(WARNING,
(errmsg("canceling wait for synchronous
replication due to user request"),
- errdetail("The transaction has already
committed locally, but might not have been replicated to the standby.")));
+ errdetail("The transaction has already
committed locally, but might not have been replicated to the standby."),
+ proc_die_sender_pid == 0 ? 0 :
+ errhint("Signal sent by PID
%d, UID %d.",
+
proc_die_sender_pid, proc_die_sender_uid)
+ ));
SyncRepCancelWait();
break;
}
```
I don’t think the query cancel case relates to SIGTERM, so we don’t need to log
PID and UID here.
Right, it was superfluous.
v5 attached.
I'd kinda like to sneak this in for pg19, because I think it's useful.
Here's a v6 that changes one or two things:
- changes the globals to sig_atomic_t
- in ProcessInterrupts, copies to local sender_pid/sender_uid, then
zeros the globals before any ereport
- uses errdetail() for all the messages
Plus a few more cosmetic changes like consistent casing.
cheers
andrew
--
Andrew Dunstan
EDB: https://www.enterprisedb.com
From 450b68d29f02ee1f5bf71db708b380ab389a30c6 Mon Sep 17 00:00:00 2001
From: Andrew Dunstan <[email protected]>
Date: Mon, 6 Apr 2026 12:39:14 -0400
Subject: [PATCH v6] Add errdetail() with PID and UID about source of
termination signal.
When a backend is terminated via pg_terminate_backend() or an external
SIGTERM, the error message now includes the sender's PID and UID as
errdetail, making it easier to identify the source of unexpected
terminations in multi-user environments.
On platforms that support SA_SIGINFO (Linux, FreeBSD, and most modern
Unix systems), the signal handler captures si_pid and si_uid from the
siginfo_t structure. On platforms without SA_SIGINFO, the detail is
simply omitted.
Author: Jakub Wartak <[email protected]>
Reviewed-by: Andrew Dunstan <[email protected]>
Reviewed-by: Chao Li <[email protected]>
Discussion: https://postgr.es/m/cakzirmyrowovzsdixpld3pgmqxuql_zw2ght5xhhckq1uds...@mail.gmail.com
---
configure | 42 +++++++++++++++++++++++++++++++
configure.ac | 18 +++++++++++++
meson.build | 4 +++
src/backend/replication/syncrep.c | 6 ++++-
src/backend/tcop/postgres.c | 26 +++++++++++++------
src/backend/utils/init/globals.c | 2 ++
src/bin/psql/t/001_basic.pl | 7 +++---
src/include/miscadmin.h | 10 ++++++++
src/include/pg_config.h.in | 3 +++
src/port/pqsignal.c | 35 +++++++++++++++++++++++---
10 files changed, 138 insertions(+), 15 deletions(-)
diff --git a/configure b/configure
index c56ef60226d..f66c1054a7a 100755
--- a/configure
+++ b/configure
@@ -15797,6 +15797,48 @@ if test "$ac_cv_sizeof_off_t" -lt 8; then
fi
fi
+# Check for SA_SIGINFO extended signal handler availability
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SA_SIGINFO" >&5
+$as_echo_n "checking for SA_SIGINFO... " >&6; }
+if ${ac_cv_have_sa_siginfo+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+ #include <signal.h>
+ #include <stddef.h>
+
+int
+main ()
+{
+
+ struct sigaction sa;
+ sa.sa_flags = SA_SIGINFO;
+
+ ;
+ return 0;
+}
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_have_sa_siginfo=yes
+else
+ ac_cv_have_sa_siginfo=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sa_siginfo" >&5
+$as_echo "$ac_cv_have_sa_siginfo" >&6; }
+
+if test "x$ac_cv_have_sa_siginfo" = "xyes"; then
+
+$as_echo "#define HAVE_SA_SIGINFO 1" >>confdefs.h
+
+fi
##
## Functions, global variables
diff --git a/configure.ac b/configure.ac
index ff5dd64468e..8d176bd3468 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1817,6 +1817,24 @@ if test "$ac_cv_sizeof_off_t" -lt 8; then
fi
fi
+# Check for SA_SIGINFO extended signal handler availability
+AC_CACHE_CHECK([for SA_SIGINFO], [ac_cv_have_sa_siginfo], [
+ AC_COMPILE_IFELSE([
+ AC_LANG_PROGRAM([[
+ #include <signal.h>
+ #include <stddef.h>
+ ]], [[
+ struct sigaction sa;
+ sa.sa_flags = SA_SIGINFO;
+ ]])
+ ],
+ [ac_cv_have_sa_siginfo=yes],
+ [ac_cv_have_sa_siginfo=no])
+])
+
+if test "x$ac_cv_have_sa_siginfo" = "xyes"; then
+ AC_DEFINE([HAVE_SA_SIGINFO], 1, [Define to 1 if you have SA_SIGINFO available.])
+fi
##
## Functions, global variables
diff --git a/meson.build b/meson.build
index 43d5ffc30b1..be97e986e5d 100644
--- a/meson.build
+++ b/meson.build
@@ -2985,6 +2985,10 @@ if cc.has_member('struct sockaddr', 'sa_len',
cdata.set('HAVE_STRUCT_SOCKADDR_SA_LEN', 1)
endif
+if cc.has_header_symbol('signal.h', 'SA_SIGINFO')
+ cdata.set('HAVE_SA_SIGINFO', 1)
+endif
+
if cc.has_member('struct tm', 'tm_zone',
args: test_c_args, include_directories: postgres_inc,
prefix: '''
diff --git a/src/backend/replication/syncrep.c b/src/backend/replication/syncrep.c
index 9cecc83ed68..41a4b837688 100644
--- a/src/backend/replication/syncrep.c
+++ b/src/backend/replication/syncrep.c
@@ -303,7 +303,11 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
ereport(WARNING,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("canceling the wait for synchronous replication and terminating connection due to administrator command"),
- errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));
+ errdetail("The transaction has already committed locally, but might not have been replicated to the standby.%s",
+ ProcDieSenderPid == 0 ? "" :
+ psprintf("\nSignal sent by PID %d, UID %d.",
+ (int) ProcDieSenderPid,
+ (int) ProcDieSenderUid))));
whereToSendOutput = DestNone;
SyncRepCancelWait();
break;
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 10be60011ad..a53df31c989 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -3345,7 +3345,12 @@ ProcessInterrupts(void)
if (ProcDiePending)
{
+ int sender_pid = ProcDieSenderPid;
+ int sender_uid = ProcDieSenderUid;
+
ProcDiePending = false;
+ ProcDieSenderPid = 0;
+ ProcDieSenderUid = 0;
QueryCancelPending = false; /* ProcDie trumps QueryCancel */
LockErrorCleanup();
/* As in quickdie, don't risk sending to client during auth */
@@ -3358,15 +3363,18 @@ ProcessInterrupts(void)
else if (AmAutoVacuumWorkerProcess())
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
- errmsg("terminating autovacuum process due to administrator command")));
+ errmsg("terminating autovacuum process due to administrator command"),
+ ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
else if (IsLogicalWorker())
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
- errmsg("terminating logical replication worker due to administrator command")));
+ errmsg("terminating logical replication worker due to administrator command"),
+ ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
else if (IsLogicalLauncher())
{
ereport(DEBUG1,
- (errmsg_internal("logical replication launcher shutting down")));
+ (errmsg_internal("logical replication launcher shutting down"),
+ ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
/*
* The logical replication launcher can be stopped at any time.
@@ -3377,23 +3385,27 @@ ProcessInterrupts(void)
else if (AmWalReceiverProcess())
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
- errmsg("terminating walreceiver process due to administrator command")));
+ errmsg("terminating walreceiver process due to administrator command"),
+ ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
else if (AmBackgroundWorkerProcess())
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("terminating background worker \"%s\" due to administrator command",
- MyBgworkerEntry->bgw_type)));
+ MyBgworkerEntry->bgw_type),
+ ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
else if (AmIoWorkerProcess())
{
ereport(DEBUG1,
- (errmsg_internal("io worker shutting down due to administrator command")));
+ (errmsg_internal("io worker shutting down due to administrator command"),
+ ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
proc_exit(0);
}
else
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
- errmsg("terminating connection due to administrator command")));
+ errmsg("terminating connection due to administrator command"),
+ ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
}
if (CheckClientConnectionPending)
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index 36ad708b360..073f8102454 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -43,6 +43,8 @@ volatile sig_atomic_t IdleStatsUpdateTimeoutPending = false;
volatile uint32 InterruptHoldoffCount = 0;
volatile uint32 QueryCancelHoldoffCount = 0;
volatile uint32 CritSectionCount = 0;
+volatile sig_atomic_t ProcDieSenderPid = 0;
+volatile sig_atomic_t ProcDieSenderUid = 0;
int MyProcPid;
pg_time_t MyStartTime;
diff --git a/src/bin/psql/t/001_basic.pl b/src/bin/psql/t/001_basic.pl
index 6839f27cbe5..7c21204c1f2 100644
--- a/src/bin/psql/t/001_basic.pl
+++ b/src/bin/psql/t/001_basic.pl
@@ -142,12 +142,11 @@ my ($ret, $out, $err) = $node->psql('postgres',
is($ret, 2, 'server crash: psql exit code');
like($out, qr/before/, 'server crash: output before crash');
unlike($out, qr/AFTER/, 'server crash: no output after crash');
-is( $err,
- 'psql:<stdin>:2: FATAL: terminating connection due to administrator command
-psql:<stdin>:2: server closed the connection unexpectedly
+like( $err, qr/psql:<stdin>:2: FATAL: terminating connection due to administrator command
+(?:DETAIL: Signal sent by PID \d+, UID \d+\.\n)?psql:<stdin>:2: server closed the connection unexpectedly
This probably means the server terminated abnormally
before or while processing the request.
-psql:<stdin>:2: error: connection to server was lost',
+psql:<stdin>:2: error: connection to server was lost/,
'server crash: error message');
# test \errverbose
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 7277c37e779..bc4717ab7da 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -90,6 +90,16 @@
extern PGDLLIMPORT volatile sig_atomic_t InterruptPending;
extern PGDLLIMPORT volatile sig_atomic_t QueryCancelPending;
extern PGDLLIMPORT volatile sig_atomic_t ProcDiePending;
+extern PGDLLIMPORT volatile sig_atomic_t ProcDieSenderPid;
+extern PGDLLIMPORT volatile sig_atomic_t ProcDieSenderUid;
+
+/*
+ * Include signal sender PID/UID as errdetail when available (SA_SIGINFO).
+ * The caller must supply the (already-captured) pid and uid values.
+ */
+#define ERRDETAIL_SIGNAL_SENDER(pid, uid) \
+ ((pid) == 0 ? 0 : \
+ errdetail("Signal sent by PID %d, UID %d.", (int) (pid), (int) (uid)))
extern PGDLLIMPORT volatile sig_atomic_t IdleInTransactionSessionTimeoutPending;
extern PGDLLIMPORT volatile sig_atomic_t TransactionTimeoutPending;
extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending;
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 9f6d512347e..4f8113c144b 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -354,6 +354,9 @@
/* Define to 1 if you have the `rl_variable_bind' function. */
#undef HAVE_RL_VARIABLE_BIND
+/* Define to 1 if you have SA_SIGINFO available. */
+#undef HAVE_SA_SIGINFO
+
/* Define to 1 if you have the <security/pam_appl.h> header file. */
#undef HAVE_SECURITY_PAM_APPL_H
diff --git a/src/port/pqsignal.c b/src/port/pqsignal.c
index fbdf9341c2f..8841464b5cb 100644
--- a/src/port/pqsignal.c
+++ b/src/port/pqsignal.c
@@ -82,10 +82,19 @@ static volatile pqsigfunc pqsignal_handlers[PG_NSIG];
*
* This wrapper also handles restoring the value of errno.
*/
+#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO)
+static void
+wrapper_handler(int signo, siginfo_t * info, void *context)
+#else
static void
wrapper_handler(SIGNAL_ARGS)
+#endif
{
int save_errno = errno;
+#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO)
+ /* SA_SIGINFO signature uses signo, not SIGNAL_ARGS macro */
+ int postgres_signal_arg = signo;
+#endif
Assert(postgres_signal_arg > 0);
Assert(postgres_signal_arg < PG_NSIG);
@@ -105,6 +114,14 @@ wrapper_handler(SIGNAL_ARGS)
raise(postgres_signal_arg);
return;
}
+
+#ifdef HAVE_SA_SIGINFO
+ if (signo == SIGTERM && info)
+ {
+ ProcDieSenderPid = info->si_pid;
+ ProcDieSenderUid = info->si_uid;
+ }
+#endif
#endif
(*pqsignal_handlers[postgres_signal_arg]) (postgres_signal_arg);
@@ -125,6 +142,7 @@ pqsignal(int signo, pqsigfunc func)
#if !(defined(WIN32) && defined(FRONTEND))
struct sigaction act;
#endif
+ bool use_wrapper = false;
Assert(signo > 0);
Assert(signo < PG_NSIG);
@@ -132,13 +150,24 @@ pqsignal(int signo, pqsigfunc func)
if (func != SIG_IGN && func != SIG_DFL)
{
pqsignal_handlers[signo] = func; /* assumed atomic */
- func = wrapper_handler;
+ use_wrapper = true;
}
#if !(defined(WIN32) && defined(FRONTEND))
- act.sa_handler = func;
sigemptyset(&act.sa_mask);
act.sa_flags = SA_RESTART;
+#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO)
+ if (use_wrapper)
+ {
+ act.sa_sigaction = wrapper_handler;
+ act.sa_flags |= SA_SIGINFO;
+ }
+ else
+ act.sa_handler = func;
+#else
+ act.sa_handler = use_wrapper ? wrapper_handler : func;
+#endif
+
#ifdef SA_NOCLDSTOP
if (signo == SIGCHLD)
act.sa_flags |= SA_NOCLDSTOP;
@@ -147,7 +176,7 @@ pqsignal(int signo, pqsigfunc func)
Assert(false); /* probably indicates coding error */
#else
/* Forward to Windows native signal system. */
- if (signal(signo, func) == SIG_ERR)
+ if (signal(signo, use_wrapper ? wrapper_handler : func) == SIG_ERR)
Assert(false); /* probably indicates coding error */
#endif
}
--
2.43.0