On 2021/03/23 15:50, Fujii Masao wrote:
+ * The statistics collector is started by the postmaster as soon as the
+ * startup subprocess finishes.

This comment needs to be updated? Because the stats collector can
be invoked when the startup process sends PMSIGNAL_BEGIN_HOT_STANDBY
signal.

I updated this comment in the patch.
Attached is the updated version of the patch.

Barring any objection, I'm thinking to commit this patch.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION
diff --git a/src/backend/postmaster/interrupt.c 
b/src/backend/postmaster/interrupt.c
index dd9136a942..d1b1f95400 100644
--- a/src/backend/postmaster/interrupt.c
+++ b/src/backend/postmaster/interrupt.c
@@ -64,9 +64,28 @@ SignalHandlerForConfigReload(SIGNAL_ARGS)
 }
 
 /*
- * Simple signal handler for exiting quickly as if due to a crash.
+ * Simple signal handler for processes which have not yet touched or do not
+ * touch shared memory to exit quickly.
  *
- * Normally, this would be used for handling SIGQUIT.
+ * Note that if processes already touched shared memory, use
+ * SignalHandlerForCrashExit() instead and force the postmaster into
+ * a system reset cycle because shared memory may be corrupted.
+ */
+void
+SignalHandlerForNonCrashExit(SIGNAL_ARGS)
+{
+       /*
+        * Since we don't touch shared memory, we can just pull the plug and 
exit
+        * without running any atexit handlers.
+        */
+       _exit(1);
+}
+
+/*
+ * Simple signal handler for processes which have touched shared memory to
+ * exit quickly.
+ *
+ * Normally, this would be used for handling SIGQUIT as if due to a crash.
  */
 void
 SignalHandlerForCrashExit(SIGNAL_ARGS)
@@ -93,9 +112,8 @@ SignalHandlerForCrashExit(SIGNAL_ARGS)
  * shut down and exit.
  *
  * Typically, this handler would be used for SIGTERM, but some processes use
- * other signals. In particular, the checkpointer exits on SIGUSR2, the
- * stats collector on SIGQUIT, and the WAL writer exits on either SIGINT
- * or SIGTERM.
+ * other signals. In particular, the checkpointer and the stats collector exit
+ * on SIGUSR2, and the WAL writer exits on either SIGINT or SIGTERM.
  *
  * ShutdownRequestPending should be checked at a convenient place within the
  * main loop, or else the main loop should call HandleMainLoopInterrupts.
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 60f45ccc4e..fd0af0f289 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -1,7 +1,23 @@
 /* ----------
  * pgstat.c
  *
- *     All the statistics collector stuff hacked up in one big, ugly file.
+ * All the statistics collector stuff hacked up in one big, ugly file.
+ *
+ * The statistics collector is started by the postmaster as soon as the
+ * startup subprocess finishes, or as soon as the postmaster is ready
+ * to accept read only connections during archive recovery.  It remains
+ * alive until the postmaster commands it to terminate.  Normal
+ * termination is by SIGUSR2 after the checkpointer must exit(0),
+ * which instructs the statistics collector to save the final statistics
+ * to reuse at next startup and then exit(0).
+ * Emergency termination is by SIGQUIT; like any backend, the statistics
+ * collector will exit quickly without saving the final statistics.  It's
+ * ok because the startup process will remove all statistics at next
+ * startup after emergency termination.
+ *
+ * Because the statistics collector doesn't touch shared memory, even if
+ * the statistics collector exits unexpectedly, the postmaster doesn't
+ * treat it as a crash.  The postmaster will just try to restart a new one.
  *
  *     TODO:   - Separate collector, postmaster and backend stuff
  *                       into different files.
@@ -724,6 +740,7 @@ pgstat_reset_remove_files(const char *directory)
 
                snprintf(fname, sizeof(fname), "%s/%s", directory,
                                 entry->d_name);
+               elog(DEBUG2, "removing stats file \"%s\"", fname);
                unlink(fname);
        }
        FreeDir(dir);
@@ -4821,17 +4838,31 @@ PgstatCollectorMain(int argc, char *argv[])
 
        /*
         * Ignore all signals usually bound to some action in the postmaster,
-        * except SIGHUP and SIGQUIT.  Note we don't need a SIGUSR1 handler to
-        * support latch operations, because we only use a local latch.
+        * except SIGHUP, SIGQUIT and SIGUSR2.  Note we don't need a SIGUSR1
+        * handler to support latch operations, because we only use a local 
latch.
+        *
+        * We deliberately ignore SIGTERM and exit in correct order because we
+        * want to collect the stats sent during the shutdown from all 
processes.
+        * SIGTERM will be received during a standard Unix system shutdown cycle
+        * because init will SIGTERM all processes at once, and the postmaster
+        * will SIGTERM all processes at once when 
recovery_target_action=shutdown
+        * and the startup process exits after reaching the recovery target.  We
+        * want to wait for the checkpointer, which is the process sends the 
stats
+        * finally, to exit, whereupon the postmaster will tell us it's okay to
+        * shut down (via SIGUSR2)
+        *
+        * If SIGQUIT is received, exit quickly without doing any additional 
work,
+        * for example writing stats files.  We arrange to do _exit(1) because 
the
+        * stats collector doesn't touch shared memory.
         */
        pqsignal(SIGHUP, SignalHandlerForConfigReload);
        pqsignal(SIGINT, SIG_IGN);
        pqsignal(SIGTERM, SIG_IGN);
-       pqsignal(SIGQUIT, SignalHandlerForShutdownRequest);
+       pqsignal(SIGQUIT, SignalHandlerForNonCrashExit);
        pqsignal(SIGALRM, SIG_IGN);
        pqsignal(SIGPIPE, SIG_IGN);
        pqsignal(SIGUSR1, SIG_IGN);
-       pqsignal(SIGUSR2, SIG_IGN);
+       pqsignal(SIGUSR2, SignalHandlerForShutdownRequest);
        /* Reset some signals that are accepted by postmaster but not here */
        pqsignal(SIGCHLD, SIG_DFL);
        PG_SETMASK(&UnBlockSig);
@@ -4852,8 +4883,8 @@ PgstatCollectorMain(int argc, char *argv[])
        AddWaitEventToSet(wes, WL_SOCKET_READABLE, pgStatSock, NULL, NULL);
 
        /*
-        * Loop to process messages until we get SIGQUIT or detect ungraceful
-        * death of our parent postmaster.
+        * Loop to process messages until we get SIGUSR2, SIGQUIT or detect
+        * ungraceful death of our parent postmaster.
         *
         * For performance reasons, we don't want to do ResetLatch/WaitLatch 
after
         * every message; instead, do that only after a recv() fails to obtain a
@@ -4871,7 +4902,7 @@ PgstatCollectorMain(int argc, char *argv[])
                ResetLatch(MyLatch);
 
                /*
-                * Quit if we get SIGQUIT from the postmaster.
+                * Quit if we get SIGUSR2 from the postmaster.
                 */
                if (ShutdownRequestPending)
                        break;
diff --git a/src/backend/postmaster/postmaster.c 
b/src/backend/postmaster/postmaster.c
index ef0be4ca38..08792b0033 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -401,7 +401,6 @@ static void SIGHUP_handler(SIGNAL_ARGS);
 static void pmdie(SIGNAL_ARGS);
 static void reaper(SIGNAL_ARGS);
 static void sigusr1_handler(SIGNAL_ARGS);
-static void process_startup_packet_die(SIGNAL_ARGS);
 static void dummy_handler(SIGNAL_ARGS);
 static void StartupPacketTimeoutHandler(void);
 static void CleanupBackend(int pid, int exitstatus);
@@ -3085,7 +3084,7 @@ reaper(SIGNAL_ARGS)
                                 * nothing left for it to do.
                                 */
                                if (PgStatPID != 0)
-                                       signal_child(PgStatPID, SIGQUIT);
+                                       signal_child(PgStatPID, SIGUSR2);
                        }
                        else
                        {
@@ -4320,8 +4319,14 @@ BackendInitialize(Port *port)
         * Exiting with _exit(1) is only possible because we have not yet 
touched
         * shared memory; therefore no outside-the-process state needs to get
         * cleaned up.
+        *
+        * One might be tempted to try to send a message, or log one, indicating
+        * why we are disconnecting.  However, that would be quite unsafe in
+        * itself. Also, it seems undesirable to provide clues about the
+        * database's state to a client that has not yet completed 
authentication,
+        * or even sent us a startup packet.
         */
-       pqsignal(SIGTERM, process_startup_packet_die);
+       pqsignal(SIGTERM, SignalHandlerForNonCrashExit);
        /* SIGQUIT handler was already set up by InitPostmasterChild */
        InitializeTimeouts();           /* establishes SIGALRM handler */
        PG_SETMASK(&StartupBlockSig);
@@ -5274,25 +5279,6 @@ sigusr1_handler(SIGNAL_ARGS)
        errno = save_errno;
 }
 
-/*
- * SIGTERM while processing startup packet.
- *
- * Running proc_exit() from a signal handler would be quite unsafe.
- * However, since we have not yet touched shared memory, we can just
- * pull the plug and exit without running any atexit handlers.
- *
- * One might be tempted to try to send a message, or log one, indicating
- * why we are disconnecting.  However, that would be quite unsafe in itself.
- * Also, it seems undesirable to provide clues about the database's state
- * to a client that has not yet completed authentication, or even sent us
- * a startup packet.
- */
-static void
-process_startup_packet_die(SIGNAL_ARGS)
-{
-       _exit(1);
-}
-
 /*
  * Dummy signal handler
  *
@@ -5309,7 +5295,7 @@ dummy_handler(SIGNAL_ARGS)
 
 /*
  * Timeout while processing startup packet.
- * As for process_startup_packet_die(), we exit via _exit(1).
+ * As for SignalHandlerForNonCrashExit(), we exit via _exit(1).
  */
 static void
 StartupPacketTimeoutHandler(void)
diff --git a/src/include/postmaster/interrupt.h 
b/src/include/postmaster/interrupt.h
index 85a1293ef1..3f3dc19e24 100644
--- a/src/include/postmaster/interrupt.h
+++ b/src/include/postmaster/interrupt.h
@@ -26,6 +26,7 @@ extern PGDLLIMPORT volatile sig_atomic_t 
ShutdownRequestPending;
 
 extern void HandleMainLoopInterrupts(void);
 extern void SignalHandlerForConfigReload(SIGNAL_ARGS);
+extern void SignalHandlerForNonCrashExit(SIGNAL_ARGS);
 extern void SignalHandlerForCrashExit(SIGNAL_ARGS);
 extern void SignalHandlerForShutdownRequest(SIGNAL_ARGS);
 

Reply via email to