From deee881ab6651dde633d0d53c9bf81d67135ac04 Mon Sep 17 00:00:00 2001
From: Kuntal Ghosh <kuntal.ghosh@enterprisedb.com>
Date: Wed, 5 Apr 2017 14:10:14 +0530
Subject: [PATCH] Fix parallel worker counts after a crash

Number of terminated parallel workers should be at least the number
of registered parallel worker. When ForgetBackgroundWorker is called
due to a bgworker crash, we should not increase the terminated parallel
worker count;
---
 src/backend/postmaster/bgworker.c           | 16 +++++++++++++---
 src/backend/postmaster/postmaster.c         |  6 +++---
 src/include/postmaster/bgworker_internals.h |  2 +-
 3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c
index 0823317..59b13fc 100644
--- a/src/backend/postmaster/bgworker.c
+++ b/src/backend/postmaster/bgworker.c
@@ -399,10 +399,12 @@ BackgroundWorkerStateChange(void)
  * points to it.  This convention allows deletion of workers during
  * searches of the worker list, and saves having to search the list again.
  *
+ * wasCrashed indicates whether the worker crashed previously.
+ *
  * This function must be invoked only in the postmaster.
  */
 void
-ForgetBackgroundWorker(slist_mutable_iter *cur)
+ForgetBackgroundWorker(slist_mutable_iter *cur, bool wasCrashed)
 {
 	RegisteredBgWorker *rw;
 	BackgroundWorkerSlot *slot;
@@ -412,7 +414,15 @@ ForgetBackgroundWorker(slist_mutable_iter *cur)
 	Assert(rw->rw_shmem_slot < max_worker_processes);
 	slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot];
 	if ((rw->rw_worker.bgw_flags & BGWORKER_CLASS_PARALLEL) != 0)
-		BackgroundWorkerData->parallel_terminate_count++;
+	{
+		/*
+		 * If the worker crashed previously, shared memory must have been
+		 * initialized. Hence, we don't increase the terminate count in
+		 * that case.
+		 */
+		if (!wasCrashed)
+			BackgroundWorkerData->parallel_terminate_count++;
+	}
 
 	slot->in_use = false;
 
@@ -471,7 +481,7 @@ ReportBackgroundWorkerExit(slist_mutable_iter *cur)
 	 */
 	if (rw->rw_terminate ||
 		rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART)
-		ForgetBackgroundWorker(cur);
+		ForgetBackgroundWorker(cur, false);
 
 	if (notify_pid != 0)
 		kill(notify_pid, SIGUSR1);
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 6831342..aa7ccf3 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -1601,7 +1601,7 @@ DetermineSleepTime(struct timeval * timeout)
 			if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART
 				|| rw->rw_terminate)
 			{
-				ForgetBackgroundWorker(&siter);
+				ForgetBackgroundWorker(&siter, false);
 				continue;
 			}
 
@@ -5716,7 +5716,7 @@ maybe_start_bgworker(void)
 		/* marked for death? */
 		if (rw->rw_terminate)
 		{
-			ForgetBackgroundWorker(&iter);
+			ForgetBackgroundWorker(&iter, false);
 			continue;
 		}
 
@@ -5731,7 +5731,7 @@ maybe_start_bgworker(void)
 		{
 			if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART)
 			{
-				ForgetBackgroundWorker(&iter);
+				ForgetBackgroundWorker(&iter, true);
 				continue;
 			}
 
diff --git a/src/include/postmaster/bgworker_internals.h b/src/include/postmaster/bgworker_internals.h
index 9a2de4f..f50b2b1 100644
--- a/src/include/postmaster/bgworker_internals.h
+++ b/src/include/postmaster/bgworker_internals.h
@@ -40,7 +40,7 @@ extern slist_head BackgroundWorkerList;
 extern Size BackgroundWorkerShmemSize(void);
 extern void BackgroundWorkerShmemInit(void);
 extern void BackgroundWorkerStateChange(void);
-extern void ForgetBackgroundWorker(slist_mutable_iter *cur);
+extern void ForgetBackgroundWorker(slist_mutable_iter *cur, bool wasCrashed);
 extern void ReportBackgroundWorkerPID(RegisteredBgWorker *);
 extern void ReportBackgroundWorkerExit(slist_mutable_iter *cur);
 extern void BackgroundWorkerStopNotifications(pid_t pid);
-- 
1.8.3.1

