On 2021-03-16 13:44, kuroda.hay...@fujitsu.com wrote:
Dear Ikeda-san
I think the idea is good.
I read the patch and other sources, and I found
process_startup_packet_die also execute _exit(1).
I think they can be combined into one function and moved to
interrupt.c, but
some important comments might be removed. How do you think?
Hi, Kuroda-san.
Thanks for your comments.
I agreed that your idea.
I combined them into one function and moved the comments to
the calling function side.
(v2-0001-pgstat_avoid_writing_on_sigquit.patch)
Regards,
--
Masahiro Ikeda
NTT DATA CORPORATION
diff --git a/src/backend/postmaster/interrupt.c b/src/backend/postmaster/interrupt.c
index dd9136a942..9b25294a14 100644
--- a/src/backend/postmaster/interrupt.c
+++ b/src/backend/postmaster/interrupt.c
@@ -64,9 +64,27 @@ SignalHandlerForConfigReload(SIGNAL_ARGS)
}
/*
- * Simple signal handler for exiting quickly as if due to a crash.
+ * Simple signal handler for processes HAVE NOT yet touched or DO NOT
+ * touch shared memory to exit quickly.
*
- * Normally, this would be used for handling SIGQUIT.
+ * If processes already touched shared memory, call
+ * SignalHandlerForCrashExit() because shared memory may be corrupted.
+ */
+void
+SignalHandlerForUnsafeExit(SIGNAL_ARGS)
+{
+ /*
+ * Since we don't touch shared memory, we can just pull the plug and exit
+ * without running any atexit handlers.
+ */
+ _exit(1);
+}
+
+/*
+ * Simple signal handler for processes have touched shared memory to
+ * exit quickly.
+ *
+ * Normally, this would be used for handling SIGQUIT as if due to a crash.
*/
void
SignalHandlerForCrashExit(SIGNAL_ARGS)
@@ -93,9 +111,8 @@ SignalHandlerForCrashExit(SIGNAL_ARGS)
* shut down and exit.
*
* Typically, this handler would be used for SIGTERM, but some processes use
- * other signals. In particular, the checkpointer exits on SIGUSR2, the
- * stats collector on SIGQUIT, and the WAL writer exits on either SIGINT
- * or SIGTERM.
+ * other signals. In particular, the checkpointer exits on SIGUSR2 and the
+ * WAL writer exits on either SIGINT or SIGTERM.
*
* ShutdownRequestPending should be checked at a convenient place within the
* main loop, or else the main loop should call HandleMainLoopInterrupts.
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index b1e2d94951..b2156cec9d 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -725,6 +725,8 @@ pgstat_reset_remove_files(const char *directory)
snprintf(fname, sizeof(fname), "%s/%s", directory,
entry->d_name);
unlink(fname);
+
+ elog(DEBUG2, "removing stats file \"%s\"", fname);
}
FreeDir(dir);
}
@@ -4821,13 +4823,19 @@ PgstatCollectorMain(int argc, char *argv[])
/*
* Ignore all signals usually bound to some action in the postmaster,
- * except SIGHUP and SIGQUIT. Note we don't need a SIGUSR1 handler to
- * support latch operations, because we only use a local latch.
+ * except SIGHUP, SIGTERM and SIGQUIT. Note we don't need a SIGUSR1
+ * handler to support latch operations, because we only use a local latch.
*/
pqsignal(SIGHUP, SignalHandlerForConfigReload);
pqsignal(SIGINT, SIG_IGN);
- pqsignal(SIGTERM, SIG_IGN);
- pqsignal(SIGQUIT, SignalHandlerForShutdownRequest);
+ pqsignal(SIGTERM, SignalHandlerForShutdownRequest);
+
+ /*
+ * If SIGQUIT is received, exit quickly without doing any additional work,
+ * for example writing stats files. We arrange to do _exit(1) because the
+ * stats collector doesn't touch shared memory.
+ */
+ pqsignal(SIGQUIT, SignalHandlerForUnsafeExit);
pqsignal(SIGALRM, SIG_IGN);
pqsignal(SIGPIPE, SIG_IGN);
pqsignal(SIGUSR1, SIG_IGN);
@@ -4852,8 +4860,8 @@ PgstatCollectorMain(int argc, char *argv[])
AddWaitEventToSet(wes, WL_SOCKET_READABLE, pgStatSock, NULL, NULL);
/*
- * Loop to process messages until we get SIGQUIT or detect ungraceful
- * death of our parent postmaster.
+ * Loop to process messages until we get SIGTERM or SIGQUIT of our parent
+ * postmaster.
*
* For performance reasons, we don't want to do ResetLatch/WaitLatch after
* every message; instead, do that only after a recv() fails to obtain a
@@ -4871,7 +4879,7 @@ PgstatCollectorMain(int argc, char *argv[])
ResetLatch(MyLatch);
/*
- * Quit if we get SIGQUIT from the postmaster.
+ * Quit if we get SIGTERM from the postmaster.
*/
if (ShutdownRequestPending)
break;
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 6436ae0f48..4e1d47e0a1 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -400,7 +400,6 @@ static void SIGHUP_handler(SIGNAL_ARGS);
static void pmdie(SIGNAL_ARGS);
static void reaper(SIGNAL_ARGS);
static void sigusr1_handler(SIGNAL_ARGS);
-static void process_startup_packet_die(SIGNAL_ARGS);
static void dummy_handler(SIGNAL_ARGS);
static void StartupPacketTimeoutHandler(void);
static void CleanupBackend(int pid, int exitstatus);
@@ -3084,7 +3083,7 @@ reaper(SIGNAL_ARGS)
* nothing left for it to do.
*/
if (PgStatPID != 0)
- signal_child(PgStatPID, SIGQUIT);
+ signal_child(PgStatPID, SIGTERM);
}
else
{
@@ -4315,8 +4314,14 @@ BackendInitialize(Port *port)
* Exiting with _exit(1) is only possible because we have not yet touched
* shared memory; therefore no outside-the-process state needs to get
* cleaned up.
+ *
+ * One might be tempted to try to send a message, or log one, indicating
+ * why we are disconnecting. However, that would be quite unsafe in
+ * itself. Also, it seems undesirable to provide clues about the
+ * database's state to a client that has not yet completed authentication,
+ * or even sent us a startup packet.
*/
- pqsignal(SIGTERM, process_startup_packet_die);
+ pqsignal(SIGTERM, SignalHandlerForUnsafeExit);
/* SIGQUIT handler was already set up by InitPostmasterChild */
InitializeTimeouts(); /* establishes SIGALRM handler */
PG_SETMASK(&StartupBlockSig);
@@ -5269,25 +5274,6 @@ sigusr1_handler(SIGNAL_ARGS)
errno = save_errno;
}
-/*
- * SIGTERM while processing startup packet.
- *
- * Running proc_exit() from a signal handler would be quite unsafe.
- * However, since we have not yet touched shared memory, we can just
- * pull the plug and exit without running any atexit handlers.
- *
- * One might be tempted to try to send a message, or log one, indicating
- * why we are disconnecting. However, that would be quite unsafe in itself.
- * Also, it seems undesirable to provide clues about the database's state
- * to a client that has not yet completed authentication, or even sent us
- * a startup packet.
- */
-static void
-process_startup_packet_die(SIGNAL_ARGS)
-{
- _exit(1);
-}
-
/*
* Dummy signal handler
*
@@ -5304,7 +5290,7 @@ dummy_handler(SIGNAL_ARGS)
/*
* Timeout while processing startup packet.
- * As for process_startup_packet_die(), we exit via _exit(1).
+ * As for SignalHandlerForUnsafeExit(), we exit via _exit(1).
*/
static void
StartupPacketTimeoutHandler(void)
diff --git a/src/include/postmaster/interrupt.h b/src/include/postmaster/interrupt.h
index 85a1293ef1..71c85a3421 100644
--- a/src/include/postmaster/interrupt.h
+++ b/src/include/postmaster/interrupt.h
@@ -26,6 +26,7 @@ extern PGDLLIMPORT volatile sig_atomic_t ShutdownRequestPending;
extern void HandleMainLoopInterrupts(void);
extern void SignalHandlerForConfigReload(SIGNAL_ARGS);
+extern void SignalHandlerForUnsafeExit(SIGNAL_ARGS);
extern void SignalHandlerForCrashExit(SIGNAL_ARGS);
extern void SignalHandlerForShutdownRequest(SIGNAL_ARGS);