From f9f4b9cc1f21b80aeaf23c1d58534c99347ffb30 Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.mshk@gmail.com>
Date: Fri, 10 Dec 2021 18:01:36 +0900
Subject: [PATCH v3] Move replication slot release to before_shmem_exit().

Previously, the wal sender releases the replication slot in ProcKill()
on error, resulting in sending the replication slot drop message for
the ephemeral slot to the stats collector after it shut down.

To fix this problem, move replication slot release to a
before_shmem_exit() hook that is called before the stats collector
shuts down.
---
 src/backend/replication/slot.c    | 26 ++++++++++++++++++++++++++
 src/backend/storage/lmgr/proc.c   |  7 -------
 src/backend/tcop/postgres.c       |  5 +++--
 src/backend/utils/init/postinit.c |  6 ++++++
 src/include/replication/slot.h    |  1 +
 5 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index 90ba9b417d..25124264fd 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -46,6 +46,7 @@
 #include "pgstat.h"
 #include "replication/slot.h"
 #include "storage/fd.h"
+#include "storage/ipc.h"
 #include "storage/proc.h"
 #include "storage/procarray.h"
 #include "utils/builtins.h"
@@ -107,6 +108,8 @@ static void RestoreSlotFromDisk(const char *name);
 static void CreateSlotOnDisk(ReplicationSlot *slot);
 static void SaveSlotToPath(ReplicationSlot *slot, const char *path, int elevel);
 
+static void ReplicationSlotShmemExit(int code, Datum arg);
+
 /*
  * Report shared-memory space needed by ReplicationSlotsShmemInit.
  */
@@ -160,6 +163,29 @@ ReplicationSlotsShmemInit(void)
 	}
 }
 
+/*
+ * Register the callback for replication slot cleanup and releasing.
+ */
+void
+ReplicationSlotInitialize(void)
+{
+	before_shmem_exit(ReplicationSlotShmemExit, 0);
+}
+
+/*
+ * Release and cleanup replication slots.
+ */
+static void
+ReplicationSlotShmemExit(int code, Datum arg)
+{
+	/* Make sure active replication slots are released */
+	if (MyReplicationSlot != NULL)
+		ReplicationSlotRelease();
+
+	/* Also cleanup all the temporary slots. */
+	ReplicationSlotCleanup();
+}
+
 /*
  * Check whether the passed slot name is valid and report errors at elevel.
  *
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index d1d3cd0dc8..f68975b461 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -830,13 +830,6 @@ ProcKill(int code, Datum arg)
 	/* Cancel any pending condition variable sleep, too */
 	ConditionVariableCancelSleep();
 
-	/* Make sure active replication slots are released */
-	if (MyReplicationSlot != NULL)
-		ReplicationSlotRelease();
-
-	/* Also cleanup all the temporary slots. */
-	ReplicationSlotCleanup();
-
 	/*
 	 * Detach from any lock group of which we are a member.  If the leader
 	 * exist before all other group members, its PGPROC will remain allocated
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 82de01cdc6..ab633d03bf 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -4261,8 +4261,9 @@ PostgresMain(const char *dbname, const char *username)
 		 * We can't release replication slots inside AbortTransaction() as we
 		 * need to be able to start and abort transactions while having a slot
 		 * acquired. But we never need to hold them across top level errors,
-		 * so releasing here is fine. There's another cleanup in ProcKill()
-		 * ensuring we'll correctly cleanup on FATAL errors as well.
+		 * so releasing here is fine. There's another cleanup in
+		 * before_shmem_exit() callback ensuring we'll correctly cleanup on
+		 * FATAL errors as well.
 		 */
 		if (MyReplicationSlot != NULL)
 			ReplicationSlotRelease();
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 7292e51f7d..12c012e664 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -40,6 +40,7 @@
 #include "pgstat.h"
 #include "postmaster/autovacuum.h"
 #include "postmaster/postmaster.h"
+#include "replication/slot.h"
 #include "replication/walsender.h"
 #include "storage/bufmgr.h"
 #include "storage/fd.h"
@@ -547,6 +548,11 @@ BaseInit(void)
 	 * ever try to insert XLOG.
 	 */
 	InitXLogInsert();
+
+	/*
+	 * Initialize replication slots.
+	 */
+	ReplicationSlotInitialize();
 }
 
 
diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h
index 53d773ccff..2f4b123c87 100644
--- a/src/include/replication/slot.h
+++ b/src/include/replication/slot.h
@@ -206,6 +206,7 @@ extern void ReplicationSlotSave(void);
 extern void ReplicationSlotMarkDirty(void);
 
 /* misc stuff */
+extern void ReplicationSlotInitialize(void);
 extern bool ReplicationSlotValidateName(const char *name, int elevel);
 extern void ReplicationSlotReserveWal(void);
 extern void ReplicationSlotsComputeRequiredXmin(bool already_locked);
-- 
2.24.3 (Apple Git-128)

