Reading through postmaster code, I spotted some refactoring opportunities to make it slightly more readable.

Currently, when a child process exits, the postmaster first scans through BackgroundWorkerList to see if it was a bgworker process. If not found, it scans through the BackendList to see if it was a backend process (which it really should be then). That feels a bit silly, because every running background worker process also has an entry in BackendList. There's a lot of duplication between CleanupBackgroundWorker and CleanupBackend.

Before commit 8a02b3d732, we used to created Backend entries only for background worker processes that connected to a database, not for other background worker processes. I think that's why we have the code structure we have. But now that we have a Backend entry for all bgworker processes, it's more natural to have single function to deal with both regular backends and bgworkers.

So I came up with the attached patches. This doesn't make any meaningful user-visible changes, except for some incidental changes in log messages (see commit message for details).

--
Heikki Linnakangas
Neon (https://neon.tech)
From dd0ee8533a3ab5d037ca7c070bf89ad94c96d4b2 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakan...@iki.fi>
Date: Sat, 6 Jul 2024 20:55:19 +0300
Subject: [PATCH v1 1/4] Fix outdated comment; all running bgworkers are in
 BackendList

Before commit 8a02b3d732, only bgworkers that connected to a database
had an entry in the Backendlist. Commit 8a02b3d732 changed that, but
forgot to update this comment.
---
 src/include/postmaster/bgworker_internals.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/include/postmaster/bgworker_internals.h b/src/include/postmaster/bgworker_internals.h
index 9106a0ef3f..61ba54117a 100644
--- a/src/include/postmaster/bgworker_internals.h
+++ b/src/include/postmaster/bgworker_internals.h
@@ -26,14 +26,14 @@
 /*
  * List of background workers, private to postmaster.
  *
- * A worker that requests a database connection during registration will have
- * rw_backend set, and will be present in BackendList.  Note: do not rely on
- * rw_backend being non-NULL for shmem-connected workers!
+ * All workers that are currently running will have rw_backend set, and will
+ * be present in BackendList.
  */
 typedef struct RegisteredBgWorker
 {
 	BackgroundWorker rw_worker; /* its registry entry */
-	struct bkend *rw_backend;	/* its BackendList entry, or NULL */
+	struct bkend *rw_backend;	/* its BackendList entry, or NULL if not
+								 * running */
 	pid_t		rw_pid;			/* 0 if not running */
 	int			rw_child_slot;
 	TimestampTz rw_crashed_at;	/* if not 0, time it last crashed */
-- 
2.39.2

From f0646000628359a1ce2a01be25fe993b50aeb396 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakan...@iki.fi>
Date: Sat, 6 Jul 2024 20:55:28 +0300
Subject: [PATCH v1 2/4] Minor refactoring of assign_backendlist_entry()

Make assign_backendlist_entry() responsible just for allocating
Backend struct. Linking it to the RegisteredBgWorker is the caller's
responsibility now. Seems more clear that way.
---
 src/backend/postmaster/postmaster.c | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 6f974a8d21..ac54798965 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -422,7 +422,7 @@ static void TerminateChildren(int signal);
 #define SignalChildren(sig)			   SignalSomeChildren(sig, BACKEND_TYPE_ALL)
 
 static int	CountChildren(int target);
-static bool assign_backendlist_entry(RegisteredBgWorker *rw);
+static Backend *assign_backendlist_entry(void);
 static void maybe_start_bgworkers(void);
 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
 static pid_t StartChildProcess(BackendType type);
@@ -4160,6 +4160,7 @@ MaxLivePostmasterChildren(void)
 static bool
 do_start_bgworker(RegisteredBgWorker *rw)
 {
+	Backend    *bn;
 	pid_t		worker_pid;
 
 	Assert(rw->rw_pid == 0);
@@ -4174,11 +4175,14 @@ do_start_bgworker(RegisteredBgWorker *rw)
 	 * tried again right away, most likely we'd find ourselves hitting the
 	 * same resource-exhaustion condition.
 	 */
-	if (!assign_backendlist_entry(rw))
+	bn = assign_backendlist_entry();
+	if (bn == NULL)
 	{
 		rw->rw_crashed_at = GetCurrentTimestamp();
 		return false;
 	}
+	rw->rw_backend = bn;
+	rw->rw_child_slot = bn->child_slot;
 
 	ereport(DEBUG1,
 			(errmsg_internal("starting background worker process \"%s\"",
@@ -4254,12 +4258,10 @@ bgworker_should_start_now(BgWorkerStartTime start_time)
  * Allocate the Backend struct for a connected background worker, but don't
  * add it to the list of backends just yet.
  *
- * On failure, return false without changing any worker state.
- *
- * Some info from the Backend is copied into the passed rw.
+ * On failure, return NULL.
  */
-static bool
-assign_backendlist_entry(RegisteredBgWorker *rw)
+static Backend *
+assign_backendlist_entry(void)
 {
 	Backend    *bn;
 
@@ -4273,7 +4275,7 @@ assign_backendlist_entry(RegisteredBgWorker *rw)
 		ereport(LOG,
 				(errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
 				 errmsg("no slot available for new background worker process")));
-		return false;
+		return NULL;
 	}
 
 	/*
@@ -4287,7 +4289,7 @@ assign_backendlist_entry(RegisteredBgWorker *rw)
 		ereport(LOG,
 				(errcode(ERRCODE_INTERNAL_ERROR),
 				 errmsg("could not generate random cancel key")));
-		return false;
+		return NULL;
 	}
 
 	bn = palloc_extended(sizeof(Backend), MCXT_ALLOC_NO_OOM);
@@ -4296,7 +4298,7 @@ assign_backendlist_entry(RegisteredBgWorker *rw)
 		ereport(LOG,
 				(errcode(ERRCODE_OUT_OF_MEMORY),
 				 errmsg("out of memory")));
-		return false;
+		return NULL;
 	}
 
 	bn->cancel_key = MyCancelKey;
@@ -4305,10 +4307,7 @@ assign_backendlist_entry(RegisteredBgWorker *rw)
 	bn->dead_end = false;
 	bn->bgworker_notify = false;
 
-	rw->rw_backend = bn;
-	rw->rw_child_slot = bn->child_slot;
-
-	return true;
+	return bn;
 }
 
 /*
-- 
2.39.2

From d9873670381f3c523d4c32dd58d475f2eaeb7a94 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakan...@iki.fi>
Date: Sat, 6 Jul 2024 20:56:51 +0300
Subject: [PATCH v1 3/4] Make BackgroundWorkerList doubly-linked

This allows ForgetBackgroundWorker() and ReportBackgroundWorkerExit()
to take a RegisteredBgWorker pointer as argument, rather than a list
iterator. That feels a little more natural. But more importantly, this
paves the way for more refactoring in the next commit.
---
 src/backend/postmaster/bgworker.c           | 62 ++++++++++-----------
 src/backend/postmaster/postmaster.c         | 40 ++++++-------
 src/include/postmaster/bgworker_internals.h | 10 ++--
 3 files changed, 54 insertions(+), 58 deletions(-)

diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c
index 77707bb384..981d8177b0 100644
--- a/src/backend/postmaster/bgworker.c
+++ b/src/backend/postmaster/bgworker.c
@@ -37,7 +37,7 @@
 /*
  * The postmaster's list of registered background workers, in private memory.
  */
-slist_head	BackgroundWorkerList = SLIST_STATIC_INIT(BackgroundWorkerList);
+dlist_head	BackgroundWorkerList = DLIST_STATIC_INIT(BackgroundWorkerList);
 
 /*
  * BackgroundWorkerSlots exist in shared memory and can be accessed (via
@@ -168,7 +168,7 @@ BackgroundWorkerShmemInit(void)
 										   &found);
 	if (!IsUnderPostmaster)
 	{
-		slist_iter	siter;
+		dlist_iter	iter;
 		int			slotno = 0;
 
 		BackgroundWorkerData->total_slots = max_worker_processes;
@@ -181,12 +181,12 @@ BackgroundWorkerShmemInit(void)
 		 * correspondence between the postmaster's private list and the array
 		 * in shared memory.
 		 */
-		slist_foreach(siter, &BackgroundWorkerList)
+		dlist_foreach(iter, &BackgroundWorkerList)
 		{
 			BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
 			RegisteredBgWorker *rw;
 
-			rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
+			rw = dlist_container(RegisteredBgWorker, rw_lnode, iter.cur);
 			Assert(slotno < max_worker_processes);
 			slot->in_use = true;
 			slot->terminate = false;
@@ -220,13 +220,13 @@ BackgroundWorkerShmemInit(void)
 static RegisteredBgWorker *
 FindRegisteredWorkerBySlotNumber(int slotno)
 {
-	slist_iter	siter;
+	dlist_iter	iter;
 
-	slist_foreach(siter, &BackgroundWorkerList)
+	dlist_foreach(iter, &BackgroundWorkerList)
 	{
 		RegisteredBgWorker *rw;
 
-		rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
+		rw = dlist_container(RegisteredBgWorker, rw_lnode, iter.cur);
 		if (rw->rw_shmem_slot == slotno)
 			return rw;
 	}
@@ -413,29 +413,25 @@ BackgroundWorkerStateChange(bool allow_new_workers)
 				(errmsg_internal("registering background worker \"%s\"",
 								 rw->rw_worker.bgw_name)));
 
-		slist_push_head(&BackgroundWorkerList, &rw->rw_lnode);
+		dlist_push_head(&BackgroundWorkerList, &rw->rw_lnode);
 	}
 }
 
 /*
  * Forget about a background worker that's no longer needed.
  *
- * The worker must be identified by passing an slist_mutable_iter that
- * points to it.  This convention allows deletion of workers during
- * searches of the worker list, and saves having to search the list again.
+ * NOTE: The entry is unlinked from BackgroundWorkerList.  If the caller is
+ * iterating through it, better use a mutable iterator!
  *
  * Caller is responsible for notifying bgw_notify_pid, if appropriate.
  *
  * This function must be invoked only in the postmaster.
  */
 void
-ForgetBackgroundWorker(slist_mutable_iter *cur)
+ForgetBackgroundWorker(RegisteredBgWorker *rw)
 {
-	RegisteredBgWorker *rw;
 	BackgroundWorkerSlot *slot;
 
-	rw = slist_container(RegisteredBgWorker, rw_lnode, cur->cur);
-
 	Assert(rw->rw_shmem_slot < max_worker_processes);
 	slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot];
 	Assert(slot->in_use);
@@ -454,7 +450,7 @@ ForgetBackgroundWorker(slist_mutable_iter *cur)
 			(errmsg_internal("unregistering background worker \"%s\"",
 							 rw->rw_worker.bgw_name)));
 
-	slist_delete_current(cur);
+	dlist_delete(&rw->rw_lnode);
 	pfree(rw);
 }
 
@@ -480,17 +476,17 @@ ReportBackgroundWorkerPID(RegisteredBgWorker *rw)
  * Report that the PID of a background worker is now zero because a
  * previously-running background worker has exited.
  *
+ * NOTE: The entry may be unlinked from BackgroundWorkerList.  If the caller
+ * is iterating through it, better use a mutable iterator!
+ *
  * This function should only be called from the postmaster.
  */
 void
-ReportBackgroundWorkerExit(slist_mutable_iter *cur)
+ReportBackgroundWorkerExit(RegisteredBgWorker *rw)
 {
-	RegisteredBgWorker *rw;
 	BackgroundWorkerSlot *slot;
 	int			notify_pid;
 
-	rw = slist_container(RegisteredBgWorker, rw_lnode, cur->cur);
-
 	Assert(rw->rw_shmem_slot < max_worker_processes);
 	slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot];
 	slot->pid = rw->rw_pid;
@@ -505,7 +501,7 @@ ReportBackgroundWorkerExit(slist_mutable_iter *cur)
 	 */
 	if (rw->rw_terminate ||
 		rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART)
-		ForgetBackgroundWorker(cur);
+		ForgetBackgroundWorker(rw);
 
 	if (notify_pid != 0)
 		kill(notify_pid, SIGUSR1);
@@ -519,13 +515,13 @@ ReportBackgroundWorkerExit(slist_mutable_iter *cur)
 void
 BackgroundWorkerStopNotifications(pid_t pid)
 {
-	slist_iter	siter;
+	dlist_iter	iter;
 
-	slist_foreach(siter, &BackgroundWorkerList)
+	dlist_foreach(iter, &BackgroundWorkerList)
 	{
 		RegisteredBgWorker *rw;
 
-		rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
+		rw = dlist_container(RegisteredBgWorker, rw_lnode, iter.cur);
 		if (rw->rw_worker.bgw_notify_pid == pid)
 			rw->rw_worker.bgw_notify_pid = 0;
 	}
@@ -546,14 +542,14 @@ BackgroundWorkerStopNotifications(pid_t pid)
 void
 ForgetUnstartedBackgroundWorkers(void)
 {
-	slist_mutable_iter iter;
+	dlist_mutable_iter iter;
 
-	slist_foreach_modify(iter, &BackgroundWorkerList)
+	dlist_foreach_modify(iter, &BackgroundWorkerList)
 	{
 		RegisteredBgWorker *rw;
 		BackgroundWorkerSlot *slot;
 
-		rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
+		rw = dlist_container(RegisteredBgWorker, rw_lnode, iter.cur);
 		Assert(rw->rw_shmem_slot < max_worker_processes);
 		slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot];
 
@@ -564,7 +560,7 @@ ForgetUnstartedBackgroundWorkers(void)
 			/* ... then zap it, and notify the waiter */
 			int			notify_pid = rw->rw_worker.bgw_notify_pid;
 
-			ForgetBackgroundWorker(&iter);
+			ForgetBackgroundWorker(rw);
 			if (notify_pid != 0)
 				kill(notify_pid, SIGUSR1);
 		}
@@ -584,13 +580,13 @@ ForgetUnstartedBackgroundWorkers(void)
 void
 ResetBackgroundWorkerCrashTimes(void)
 {
-	slist_mutable_iter iter;
+	dlist_mutable_iter iter;
 
-	slist_foreach_modify(iter, &BackgroundWorkerList)
+	dlist_foreach_modify(iter, &BackgroundWorkerList)
 	{
 		RegisteredBgWorker *rw;
 
-		rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
+		rw = dlist_container(RegisteredBgWorker, rw_lnode, iter.cur);
 
 		if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART)
 		{
@@ -601,7 +597,7 @@ ResetBackgroundWorkerCrashTimes(void)
 			 * parallel_terminate_count will get incremented after we've
 			 * already zeroed parallel_register_count, which would be bad.)
 			 */
-			ForgetBackgroundWorker(&iter);
+			ForgetBackgroundWorker(rw);
 		}
 		else
 		{
@@ -1036,7 +1032,7 @@ RegisterBackgroundWorker(BackgroundWorker *worker)
 	rw->rw_crashed_at = 0;
 	rw->rw_terminate = false;
 
-	slist_push_head(&BackgroundWorkerList, &rw->rw_lnode);
+	dlist_push_head(&BackgroundWorkerList, &rw->rw_lnode);
 }
 
 /*
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index ac54798965..f376d3b77b 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -1543,7 +1543,7 @@ DetermineSleepTime(void)
 
 	if (HaveCrashedWorker)
 	{
-		slist_mutable_iter siter;
+		dlist_mutable_iter iter;
 
 		/*
 		 * When there are crashed bgworkers, we sleep just long enough that
@@ -1551,12 +1551,12 @@ DetermineSleepTime(void)
 		 * determine the minimum of all wakeup times according to most recent
 		 * crash time and requested restart interval.
 		 */
-		slist_foreach_modify(siter, &BackgroundWorkerList)
+		dlist_foreach_modify(iter, &BackgroundWorkerList)
 		{
 			RegisteredBgWorker *rw;
 			TimestampTz this_wakeup;
 
-			rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
+			rw = dlist_container(RegisteredBgWorker, rw_lnode, iter.cur);
 
 			if (rw->rw_crashed_at == 0)
 				continue;
@@ -1564,7 +1564,7 @@ DetermineSleepTime(void)
 			if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART
 				|| rw->rw_terminate)
 			{
-				ForgetBackgroundWorker(&siter);
+				ForgetBackgroundWorker(rw);
 				continue;
 			}
 
@@ -2696,13 +2696,13 @@ CleanupBackgroundWorker(int pid,
 						int exitstatus) /* child's exit status */
 {
 	char		namebuf[MAXPGPATH];
-	slist_mutable_iter iter;
+	dlist_mutable_iter iter;
 
-	slist_foreach_modify(iter, &BackgroundWorkerList)
+	dlist_foreach_modify(iter, &BackgroundWorkerList)
 	{
 		RegisteredBgWorker *rw;
 
-		rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
+		rw = dlist_container(RegisteredBgWorker, rw_lnode, iter.cur);
 
 		if (rw->rw_pid != pid)
 			continue;
@@ -2768,7 +2768,7 @@ CleanupBackgroundWorker(int pid,
 		rw->rw_backend = NULL;
 		rw->rw_pid = 0;
 		rw->rw_child_slot = 0;
-		ReportBackgroundWorkerExit(&iter);	/* report child death */
+		ReportBackgroundWorkerExit(rw); /* report child death */
 
 		LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
 					 namebuf, pid, exitstatus);
@@ -2873,8 +2873,8 @@ CleanupBackend(int pid,
 static void
 HandleChildCrash(int pid, int exitstatus, const char *procname)
 {
-	dlist_mutable_iter iter;
-	slist_iter	siter;
+	dlist_iter	iter;
+	dlist_mutable_iter miter;
 	Backend    *bp;
 	bool		take_action;
 
@@ -2896,11 +2896,11 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
 	}
 
 	/* Process background workers. */
-	slist_foreach(siter, &BackgroundWorkerList)
+	dlist_foreach(iter, &BackgroundWorkerList)
 	{
 		RegisteredBgWorker *rw;
 
-		rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
+		rw = dlist_container(RegisteredBgWorker, rw_lnode, iter.cur);
 		if (rw->rw_pid == 0)
 			continue;			/* not running */
 		if (rw->rw_pid == pid)
@@ -2933,9 +2933,9 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
 	}
 
 	/* Process regular backends */
-	dlist_foreach_modify(iter, &BackendList)
+	dlist_foreach_modify(miter, &BackendList)
 	{
-		bp = dlist_container(Backend, elem, iter.cur);
+		bp = dlist_container(Backend, elem, miter.cur);
 
 		if (bp->pid == pid)
 		{
@@ -2949,7 +2949,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
 				ShmemBackendArrayRemove(bp);
 #endif
 			}
-			dlist_delete(iter.cur);
+			dlist_delete(miter.cur);
 			pfree(bp);
 			/* Keep looping so we can signal remaining backends */
 		}
@@ -4327,7 +4327,7 @@ maybe_start_bgworkers(void)
 #define MAX_BGWORKERS_TO_LAUNCH 100
 	int			num_launched = 0;
 	TimestampTz now = 0;
-	slist_mutable_iter iter;
+	dlist_mutable_iter iter;
 
 	/*
 	 * During crash recovery, we have no need to be called until the state
@@ -4344,11 +4344,11 @@ maybe_start_bgworkers(void)
 	StartWorkerNeeded = false;
 	HaveCrashedWorker = false;
 
-	slist_foreach_modify(iter, &BackgroundWorkerList)
+	dlist_foreach_modify(iter, &BackgroundWorkerList)
 	{
 		RegisteredBgWorker *rw;
 
-		rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
+		rw = dlist_container(RegisteredBgWorker, rw_lnode, iter.cur);
 
 		/* ignore if already running */
 		if (rw->rw_pid != 0)
@@ -4357,7 +4357,7 @@ maybe_start_bgworkers(void)
 		/* if marked for death, clean up and remove from list */
 		if (rw->rw_terminate)
 		{
-			ForgetBackgroundWorker(&iter);
+			ForgetBackgroundWorker(rw);
 			continue;
 		}
 
@@ -4376,7 +4376,7 @@ maybe_start_bgworkers(void)
 
 				notify_pid = rw->rw_worker.bgw_notify_pid;
 
-				ForgetBackgroundWorker(&iter);
+				ForgetBackgroundWorker(rw);
 
 				/* Report worker is gone now. */
 				if (notify_pid != 0)
diff --git a/src/include/postmaster/bgworker_internals.h b/src/include/postmaster/bgworker_internals.h
index 61ba54117a..e55e38af65 100644
--- a/src/include/postmaster/bgworker_internals.h
+++ b/src/include/postmaster/bgworker_internals.h
@@ -39,17 +39,17 @@ typedef struct RegisteredBgWorker
 	TimestampTz rw_crashed_at;	/* if not 0, time it last crashed */
 	int			rw_shmem_slot;
 	bool		rw_terminate;
-	slist_node	rw_lnode;		/* list link */
+	dlist_node	rw_lnode;		/* list link */
 } RegisteredBgWorker;
 
-extern PGDLLIMPORT slist_head BackgroundWorkerList;
+extern PGDLLIMPORT dlist_head BackgroundWorkerList;
 
 extern Size BackgroundWorkerShmemSize(void);
 extern void BackgroundWorkerShmemInit(void);
 extern void BackgroundWorkerStateChange(bool allow_new_workers);
-extern void ForgetBackgroundWorker(slist_mutable_iter *cur);
-extern void ReportBackgroundWorkerPID(RegisteredBgWorker *);
-extern void ReportBackgroundWorkerExit(slist_mutable_iter *cur);
+extern void ForgetBackgroundWorker(RegisteredBgWorker *rw);
+extern void ReportBackgroundWorkerPID(RegisteredBgWorker *rw);
+extern void ReportBackgroundWorkerExit(RegisteredBgWorker *rw);
 extern void BackgroundWorkerStopNotifications(pid_t pid);
 extern void ForgetUnstartedBackgroundWorkers(void);
 extern void ResetBackgroundWorkerCrashTimes(void);
-- 
2.39.2

From 8ba618344a13b166eb8f343d5a3c730a3eb3feb0 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakan...@iki.fi>
Date: Sat, 6 Jul 2024 21:49:00 +0300
Subject: [PATCH v1 4/4] Refactor code to handle death of a backend or bgworker
 in postmaster

Currently, when a child process exits, the postmaster first scans
through BackgroundWorkerList, to see if it the child process was a
background worker. If not found, then it scans through BackendList to
see if it was a regular backend. That leads to some duplication
between the bgworker and regular backend cleanup code, as both have an
entry in the BackendList that needs to be cleaned up in the same way.
Refactor that so that we scan just the BackendList to find the child
process, and if it was a background worker, do the additional
bgworker-specific cleanup in addition to the normal Backend cleanup.

Change HandleChildCrash so that it doesn't try to handle the cleanup
of the process that already exited, only the signaling of all the
other processes. When called for any of the aux processes, the caller
cleared the *PID global variable, so the code in HandleChildCrash() to
do that was unused.

On Windows, if a child process exits with ERROR_WAIT_NO_CHILDREN, it's
now logged with that exit code, instead of 0. Also, if a bgworker
exits with ERROR_WAIT_NO_CHILDREN, it's now treated as crashed and is
restarted. Previously it was treated as a normal exit.

If a child process is not found in the BackendList, the log message
now calls it "untracked child process" rather than "server process".
Arguably that should be a PANIC, because we do track all the child
processes in the list, so failing to find a child process is highly
unexpected. But if we want to change that, let's discuss and do that
as a separate commit.
---
 src/backend/postmaster/bgworker.c           |   4 -
 src/backend/postmaster/postmaster.c         | 448 +++++++-------------
 src/include/postmaster/bgworker_internals.h |   7 +-
 3 files changed, 166 insertions(+), 293 deletions(-)

diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c
index 981d8177b0..b83967cda3 100644
--- a/src/backend/postmaster/bgworker.c
+++ b/src/backend/postmaster/bgworker.c
@@ -401,9 +401,7 @@ BackgroundWorkerStateChange(bool allow_new_workers)
 		}
 
 		/* Initialize postmaster bookkeeping. */
-		rw->rw_backend = NULL;
 		rw->rw_pid = 0;
-		rw->rw_child_slot = 0;
 		rw->rw_crashed_at = 0;
 		rw->rw_shmem_slot = slotno;
 		rw->rw_terminate = false;
@@ -1026,9 +1024,7 @@ RegisterBackgroundWorker(BackgroundWorker *worker)
 	}
 
 	rw->rw_worker = *worker;
-	rw->rw_backend = NULL;
 	rw->rw_pid = 0;
-	rw->rw_child_slot = 0;
 	rw->rw_crashed_at = 0;
 	rw->rw_terminate = false;
 
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index f376d3b77b..35c92bfc26 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -172,6 +172,7 @@ typedef struct bkend
 	int			child_slot;		/* PMChildSlot for this backend, if any */
 	int			bkend_type;		/* child process flavor, see above */
 	bool		dead_end;		/* is it going to send an error and quit? */
+	RegisteredBgWorker *rw;		/* bgworker info, if this is a bgworker */
 	bool		bgworker_notify;	/* gets bgworker start/stop notifications */
 	dlist_node	elem;			/* list link in BackendList */
 } Backend;
@@ -401,8 +402,7 @@ static void process_pm_child_exit(void);
 static void process_pm_reload_request(void);
 static void process_pm_shutdown_request(void);
 static void dummy_handler(SIGNAL_ARGS);
-static void CleanupBackend(int pid, int exitstatus);
-static bool CleanupBackgroundWorker(int pid, int exitstatus);
+static void CleanupBackend(Backend *bp, int exitstatus);
 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
 static void LogChildExit(int lev, const char *procname,
 						 int pid, int exitstatus);
@@ -2362,6 +2362,9 @@ process_pm_child_exit(void)
 
 	while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0)
 	{
+		bool		found;
+		dlist_mutable_iter iter;
+
 		/*
 		 * Check if this child was a startup process.
 		 */
@@ -2661,18 +2664,34 @@ process_pm_child_exit(void)
 			continue;
 		}
 
-		/* Was it one of our background workers? */
-		if (CleanupBackgroundWorker(pid, exitstatus))
+		/*
+		 * Was it a backend or background worker?
+		 */
+		found = false;
+		dlist_foreach_modify(iter, &BackendList)
 		{
-			/* have it be restarted */
-			HaveCrashedWorker = true;
-			continue;
+			Backend    *bp = dlist_container(Backend, elem, iter.cur);
+
+			if (bp->pid == pid)
+			{
+				dlist_delete(iter.cur);
+				CleanupBackend(bp, exitstatus);
+				found = true;
+				break;
+			}
 		}
 
 		/*
-		 * Else do standard backend child cleanup.
+		 * We don't know anything about this child process.  That's highly
+		 * unexpected, as we do track all the child processes that we fork.
 		 */
-		CleanupBackend(pid, exitstatus);
+		if (!found)
+		{
+			if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
+				HandleChildCrash(pid, exitstatus, _("untracked child process"));
+			else
+				LogChildExit(LOG, _("untracked child process"), pid, exitstatus);
+		}
 	}							/* loop over pending child-death reports */
 
 	/*
@@ -2683,116 +2702,31 @@ process_pm_child_exit(void)
 }
 
 /*
- * Scan the bgworkers list and see if the given PID (which has just stopped
- * or crashed) is in it.  Handle its shutdown if so, and return true.  If not a
- * bgworker, return false.
+ * CleanupBackend -- cleanup after terminated backend or background worker.
  *
- * This is heavily based on CleanupBackend.  One important difference is that
- * we don't know yet that the dying process is a bgworker, so we must be silent
- * until we're sure it is.
+ * Remove all local state associated with backend.
  */
-static bool
-CleanupBackgroundWorker(int pid,
-						int exitstatus) /* child's exit status */
+static void
+CleanupBackend(Backend *bp,
+			   int exitstatus)	/* child's exit status. */
 {
 	char		namebuf[MAXPGPATH];
-	dlist_mutable_iter iter;
+	char	   *procname;
+	bool		crashed = false;
 
-	dlist_foreach_modify(iter, &BackgroundWorkerList)
+	/* Construct a process name for log message */
+	if (bp->dead_end)
+	{
+		procname = _("dead end backend");
+	}
+	else if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
 	{
-		RegisteredBgWorker *rw;
-
-		rw = dlist_container(RegisteredBgWorker, rw_lnode, iter.cur);
-
-		if (rw->rw_pid != pid)
-			continue;
-
-#ifdef WIN32
-		/* see CleanupBackend */
-		if (exitstatus == ERROR_WAIT_NO_CHILDREN)
-			exitstatus = 0;
-#endif
-
 		snprintf(namebuf, MAXPGPATH, _("background worker \"%s\""),
-				 rw->rw_worker.bgw_type);
-
-
-		if (!EXIT_STATUS_0(exitstatus))
-		{
-			/* Record timestamp, so we know when to restart the worker. */
-			rw->rw_crashed_at = GetCurrentTimestamp();
-		}
-		else
-		{
-			/* Zero exit status means terminate */
-			rw->rw_crashed_at = 0;
-			rw->rw_terminate = true;
-		}
-
-		/*
-		 * Additionally, just like a backend, any exit status other than 0 or
-		 * 1 is considered a crash and causes a system-wide restart.
-		 */
-		if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
-		{
-			HandleChildCrash(pid, exitstatus, namebuf);
-			return true;
-		}
-
-		/*
-		 * We must release the postmaster child slot. If the worker failed to
-		 * do so, it did not clean up after itself, requiring a crash-restart
-		 * cycle.
-		 */
-		if (!ReleasePostmasterChildSlot(rw->rw_child_slot))
-		{
-			HandleChildCrash(pid, exitstatus, namebuf);
-			return true;
-		}
-
-		/* Get it out of the BackendList and clear out remaining data */
-		dlist_delete(&rw->rw_backend->elem);
-#ifdef EXEC_BACKEND
-		ShmemBackendArrayRemove(rw->rw_backend);
-#endif
-
-		/*
-		 * It's possible that this background worker started some OTHER
-		 * background worker and asked to be notified when that worker started
-		 * or stopped.  If so, cancel any notifications destined for the
-		 * now-dead backend.
-		 */
-		if (rw->rw_backend->bgworker_notify)
-			BackgroundWorkerStopNotifications(rw->rw_pid);
-		pfree(rw->rw_backend);
-		rw->rw_backend = NULL;
-		rw->rw_pid = 0;
-		rw->rw_child_slot = 0;
-		ReportBackgroundWorkerExit(rw); /* report child death */
-
-		LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
-					 namebuf, pid, exitstatus);
-
-		return true;
+				 bp->rw->rw_worker.bgw_type);
+		procname = namebuf;
 	}
-
-	return false;
-}
-
-/*
- * CleanupBackend -- cleanup after terminated backend.
- *
- * Remove all local state associated with backend.
- *
- * If you change this, see also CleanupBackgroundWorker.
- */
-static void
-CleanupBackend(int pid,
-			   int exitstatus)	/* child's exit status. */
-{
-	dlist_mutable_iter iter;
-
-	LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
+	else
+		procname = _("server process");
 
 	/*
 	 * If a backend dies in an ugly way then we must signal all other backends
@@ -2800,6 +2734,8 @@ CleanupBackend(int pid,
 	 * assume everything is all right and proceed to remove the backend from
 	 * the active backend list.
 	 */
+	if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
+		crashed = true;
 
 #ifdef WIN32
 
@@ -2812,55 +2748,79 @@ CleanupBackend(int pid,
 	 */
 	if (exitstatus == ERROR_WAIT_NO_CHILDREN)
 	{
-		LogChildExit(LOG, _("server process"), pid, exitstatus);
-		exitstatus = 0;
+		LogChildExit(LOG, procname, bp->pid, exitstatus);
+		crashed = false;
 	}
 #endif
 
-	if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
+	/*
+	 * If the process attached to shared memory, check that it detached
+	 * cleanly.
+	 */
+	if (!bp->dead_end)
+	{
+		if (!ReleasePostmasterChildSlot(bp->child_slot))
+		{
+			/*
+			 * Uh-oh, the child failed to clean itself up.  Treat as a crash
+			 * after all.
+			 */
+			crashed = true;
+		}
+#ifdef EXEC_BACKEND
+		ShmemBackendArrayRemove(bp);
+#endif
+	}
+
+	if (crashed)
 	{
-		HandleChildCrash(pid, exitstatus, _("server process"));
+		HandleChildCrash(bp->pid, exitstatus, namebuf);
+		pfree(bp);
 		return;
 	}
 
-	dlist_foreach_modify(iter, &BackendList)
+	/*
+	 * This backend may have been slated to receive SIGUSR1 when some
+	 * background worker started or stopped.  Cancel those notifications, as
+	 * we don't want to signal PIDs that are not PostgreSQL backends.  This
+	 * gets skipped in the (probably very common) case where the backend has
+	 * never requested any such notifications.
+	 */
+	if (bp->bgworker_notify)
+		BackgroundWorkerStopNotifications(bp->pid);
+
+	/*
+	 * If it was a background worker, also update its RegisteredWorker entry.
+	 */
+	if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
 	{
-		Backend    *bp = dlist_container(Backend, elem, iter.cur);
+		RegisteredBgWorker *rw = bp->rw;
 
-		if (bp->pid == pid)
+		if (!EXIT_STATUS_0(exitstatus))
 		{
-			if (!bp->dead_end)
-			{
-				if (!ReleasePostmasterChildSlot(bp->child_slot))
-				{
-					/*
-					 * Uh-oh, the child failed to clean itself up.  Treat as a
-					 * crash after all.
-					 */
-					HandleChildCrash(pid, exitstatus, _("server process"));
-					return;
-				}
-#ifdef EXEC_BACKEND
-				ShmemBackendArrayRemove(bp);
-#endif
-			}
-			if (bp->bgworker_notify)
-			{
-				/*
-				 * This backend may have been slated to receive SIGUSR1 when
-				 * some background worker started or stopped.  Cancel those
-				 * notifications, as we don't want to signal PIDs that are not
-				 * PostgreSQL backends.  This gets skipped in the (probably
-				 * very common) case where the backend has never requested any
-				 * such notifications.
-				 */
-				BackgroundWorkerStopNotifications(bp->pid);
-			}
-			dlist_delete(iter.cur);
-			pfree(bp);
-			break;
+			/* Record timestamp, so we know when to restart the worker. */
+			rw->rw_crashed_at = GetCurrentTimestamp();
+		}
+		else
+		{
+			/* Zero exit status means terminate */
+			rw->rw_crashed_at = 0;
+			rw->rw_terminate = true;
 		}
+
+		rw->rw_pid = 0;
+		ReportBackgroundWorkerExit(rw); /* report child death */
+
+		LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
+					 procname, bp->pid, exitstatus);
+
+		/* have it be restarted */
+		HaveCrashedWorker = true;
 	}
+	else
+		LogChildExit(DEBUG2, procname, bp->pid, exitstatus);
+
+	pfree(bp);
 }
 
 /*
@@ -2869,13 +2829,14 @@ CleanupBackend(int pid,
  *
  * The objectives here are to clean up our local state about the child
  * process, and to signal all other remaining children to quickdie.
+ *
+ * If it's a backend, the caller has already removed it from the
+ * BackendList. If it's an aux process, the corresponding *PID global variable
+ * has been reset already.
  */
 static void
 HandleChildCrash(int pid, int exitstatus, const char *procname)
 {
-	dlist_iter	iter;
-	dlist_mutable_iter miter;
-	Backend    *bp;
 	bool		take_action;
 
 	/*
@@ -2895,145 +2856,64 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
 		SetQuitSignalReason(PMQUIT_FOR_CRASH);
 	}
 
-	/* Process background workers. */
-	dlist_foreach(iter, &BackgroundWorkerList)
+	if (take_action)
 	{
-		RegisteredBgWorker *rw;
+		dlist_iter	iter;
 
-		rw = dlist_container(RegisteredBgWorker, rw_lnode, iter.cur);
-		if (rw->rw_pid == 0)
-			continue;			/* not running */
-		if (rw->rw_pid == pid)
-		{
-			/*
-			 * Found entry for freshly-dead worker, so remove it.
-			 */
-			(void) ReleasePostmasterChildSlot(rw->rw_child_slot);
-			dlist_delete(&rw->rw_backend->elem);
-#ifdef EXEC_BACKEND
-			ShmemBackendArrayRemove(rw->rw_backend);
-#endif
-			pfree(rw->rw_backend);
-			rw->rw_backend = NULL;
-			rw->rw_pid = 0;
-			rw->rw_child_slot = 0;
-			/* don't reset crashed_at */
-			/* don't report child stop, either */
-			/* Keep looping so we can signal remaining workers */
-		}
-		else
+		dlist_foreach(iter, &BackendList)
 		{
-			/*
-			 * This worker is still alive.  Unless we did so already, tell it
-			 * to commit hara-kiri.
-			 */
-			if (take_action)
-				sigquit_child(rw->rw_pid);
-		}
-	}
-
-	/* Process regular backends */
-	dlist_foreach_modify(miter, &BackendList)
-	{
-		bp = dlist_container(Backend, elem, miter.cur);
+			Backend    *bp = dlist_container(Backend, elem, iter.cur);
 
-		if (bp->pid == pid)
-		{
-			/*
-			 * Found entry for freshly-dead backend, so remove it.
-			 */
-			if (!bp->dead_end)
-			{
-				(void) ReleasePostmasterChildSlot(bp->child_slot);
-#ifdef EXEC_BACKEND
-				ShmemBackendArrayRemove(bp);
-#endif
-			}
-			dlist_delete(miter.cur);
-			pfree(bp);
-			/* Keep looping so we can signal remaining backends */
-		}
-		else
-		{
 			/*
 			 * This backend is still alive.  Unless we did so already, tell it
 			 * to commit hara-kiri.
 			 *
 			 * We could exclude dead_end children here, but at least when
 			 * sending SIGABRT it seems better to include them.
-			 *
-			 * Background workers were already processed above; ignore them
-			 * here.
 			 */
-			if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
-				continue;
+			sigquit_child(bp->pid);
+		}
 
-			if (take_action)
-				sigquit_child(bp->pid);
+		if (StartupPID != 0)
+		{
+			sigquit_child(StartupPID);
+			StartupStatus = STARTUP_SIGNALED;
 		}
-	}
 
-	/* Take care of the startup process too */
-	if (pid == StartupPID)
-	{
-		StartupPID = 0;
-		/* Caller adjusts StartupStatus, so don't touch it here */
-	}
-	else if (StartupPID != 0 && take_action)
-	{
-		sigquit_child(StartupPID);
-		StartupStatus = STARTUP_SIGNALED;
-	}
+		/* Take care of the bgwriter too */
+		if (BgWriterPID != 0)
+			sigquit_child(BgWriterPID);
+
+		/* Take care of the checkpointer too */
+		if (CheckpointerPID != 0)
+			sigquit_child(CheckpointerPID);
+
+		/* Take care of the walwriter too */
+		if (WalWriterPID != 0)
+			sigquit_child(WalWriterPID);
+
+		/* Take care of the walreceiver too */
+		if (WalReceiverPID != 0)
+			sigquit_child(WalReceiverPID);
 
-	/* Take care of the bgwriter too */
-	if (pid == BgWriterPID)
-		BgWriterPID = 0;
-	else if (BgWriterPID != 0 && take_action)
-		sigquit_child(BgWriterPID);
-
-	/* Take care of the checkpointer too */
-	if (pid == CheckpointerPID)
-		CheckpointerPID = 0;
-	else if (CheckpointerPID != 0 && take_action)
-		sigquit_child(CheckpointerPID);
-
-	/* Take care of the walwriter too */
-	if (pid == WalWriterPID)
-		WalWriterPID = 0;
-	else if (WalWriterPID != 0 && take_action)
-		sigquit_child(WalWriterPID);
-
-	/* Take care of the walreceiver too */
-	if (pid == WalReceiverPID)
-		WalReceiverPID = 0;
-	else if (WalReceiverPID != 0 && take_action)
-		sigquit_child(WalReceiverPID);
-
-	/* Take care of the walsummarizer too */
-	if (pid == WalSummarizerPID)
-		WalSummarizerPID = 0;
-	else if (WalSummarizerPID != 0 && take_action)
-		sigquit_child(WalSummarizerPID);
-
-	/* Take care of the autovacuum launcher too */
-	if (pid == AutoVacPID)
-		AutoVacPID = 0;
-	else if (AutoVacPID != 0 && take_action)
-		sigquit_child(AutoVacPID);
-
-	/* Take care of the archiver too */
-	if (pid == PgArchPID)
-		PgArchPID = 0;
-	else if (PgArchPID != 0 && take_action)
-		sigquit_child(PgArchPID);
-
-	/* Take care of the slot sync worker too */
-	if (pid == SlotSyncWorkerPID)
-		SlotSyncWorkerPID = 0;
-	else if (SlotSyncWorkerPID != 0 && take_action)
-		sigquit_child(SlotSyncWorkerPID);
-
-	/* We do NOT restart the syslogger */
+		/* Take care of the walsummarizer too */
+		if (WalSummarizerPID != 0)
+			sigquit_child(WalSummarizerPID);
+
+		/* Take care of the autovacuum launcher too */
+		if (AutoVacPID != 0)
+			sigquit_child(AutoVacPID);
+
+		/* Take care of the archiver too */
+		if (PgArchPID != 0)
+			sigquit_child(PgArchPID);
+
+		/* Take care of the slot sync worker too */
+		if (SlotSyncWorkerPID != 0)
+			sigquit_child(SlotSyncWorkerPID);
+
+		/* We do NOT restart the syslogger */
+	}
 
 	if (Shutdown != ImmediateShutdown)
 		FatalError = true;
@@ -3578,6 +3458,7 @@ BackendStartup(ClientSocket *client_sock)
 	startup_data.canAcceptConnections = canAcceptConnections(BACKEND_TYPE_NORMAL);
 	bn->dead_end = (startup_data.canAcceptConnections != CAC_OK);
 	bn->cancel_key = MyCancelKey;
+	bn->rw = NULL;
 
 	/*
 	 * Unless it's a dead_end child, assign it a child slot number
@@ -3993,6 +3874,7 @@ StartAutovacuumWorker(void)
 			bn->dead_end = false;
 			bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
 			bn->bgworker_notify = false;
+			bn->rw = NULL;
 
 			bn->pid = StartChildProcess(B_AUTOVAC_WORKER);
 			if (bn->pid > 0)
@@ -4181,8 +4063,7 @@ do_start_bgworker(RegisteredBgWorker *rw)
 		rw->rw_crashed_at = GetCurrentTimestamp();
 		return false;
 	}
-	rw->rw_backend = bn;
-	rw->rw_child_slot = bn->child_slot;
+	bn->rw = rw;
 
 	ereport(DEBUG1,
 			(errmsg_internal("starting background worker process \"%s\"",
@@ -4195,10 +4076,9 @@ do_start_bgworker(RegisteredBgWorker *rw)
 		ereport(LOG,
 				(errmsg("could not fork background worker process: %m")));
 		/* undo what assign_backendlist_entry did */
-		ReleasePostmasterChildSlot(rw->rw_child_slot);
-		rw->rw_child_slot = 0;
-		pfree(rw->rw_backend);
-		rw->rw_backend = NULL;
+		ReleasePostmasterChildSlot(bn->child_slot);
+		pfree(bn);
+
 		/* mark entry as crashed, so we'll try again later */
 		rw->rw_crashed_at = GetCurrentTimestamp();
 		return false;
@@ -4206,12 +4086,12 @@ do_start_bgworker(RegisteredBgWorker *rw)
 
 	/* in postmaster, fork successful ... */
 	rw->rw_pid = worker_pid;
-	rw->rw_backend->pid = rw->rw_pid;
+	bn->pid = rw->rw_pid;
 	ReportBackgroundWorkerPID(rw);
 	/* add new worker to lists of backends */
-	dlist_push_head(&BackendList, &rw->rw_backend->elem);
+	dlist_push_head(&BackendList, &bn->elem);
 #ifdef EXEC_BACKEND
-	ShmemBackendArrayAdd(rw->rw_backend);
+	ShmemBackendArrayAdd(bn);
 #endif
 	return true;
 }
diff --git a/src/include/postmaster/bgworker_internals.h b/src/include/postmaster/bgworker_internals.h
index e55e38af65..309a91124b 100644
--- a/src/include/postmaster/bgworker_internals.h
+++ b/src/include/postmaster/bgworker_internals.h
@@ -26,16 +26,13 @@
 /*
  * List of background workers, private to postmaster.
  *
- * All workers that are currently running will have rw_backend set, and will
- * be present in BackendList.
+ * All workers that are currently running will also have an entry in
+ * BackendList.
  */
 typedef struct RegisteredBgWorker
 {
 	BackgroundWorker rw_worker; /* its registry entry */
-	struct bkend *rw_backend;	/* its BackendList entry, or NULL if not
-								 * running */
 	pid_t		rw_pid;			/* 0 if not running */
-	int			rw_child_slot;
 	TimestampTz rw_crashed_at;	/* if not 0, time it last crashed */
 	int			rw_shmem_slot;
 	bool		rw_terminate;
-- 
2.39.2

Reply via email to