From c28058cccf77eb56369ef2f468f88884d0de369f Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <bharath.rupireddy@enterprisedb.com>
Date: Mon, 5 Apr 2021 08:44:44 +0530
Subject: [PATCH v12] pg_terminate_backend with wait and timeout

This patch adds extra default argument timeout (with default
value 0 meaning don't wait), to pg_terminate_backend which
will be used to terminate and wait timeout milliseconds for
the backend's termination.

This patch also adds a new function
pg_wait_for_backend_termination(pid, timeout) which checks
for the existence of the backend with given PID and waits
timeout milliseconds for its termination.
---
 doc/src/sgml/func.sgml                  |  32 +++++-
 doc/src/sgml/monitoring.sgml            |   4 +
 src/backend/catalog/system_views.sql    |  10 ++
 src/backend/storage/ipc/signalfuncs.c   | 132 +++++++++++++++++++++++-
 src/backend/utils/activity/wait_event.c |   3 +
 src/include/catalog/pg_proc.dat         |   9 +-
 src/include/utils/wait_event.h          |   1 +
 7 files changed, 183 insertions(+), 8 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 3cf243a16a..793416772b 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -24953,7 +24953,7 @@ SELECT collation for ('foo' COLLATE "de_DE");
         <indexterm>
          <primary>pg_terminate_backend</primary>
         </indexterm>
-        <function>pg_terminate_backend</function> ( <parameter>pid</parameter> <type>integer</type> )
+        <function>pg_terminate_backend</function> ( <parameter>pid</parameter> <type>integer</type>, <parameter>timeout</parameter> <type>bigint</type> <literal>DEFAULT</literal> <literal>0</literal> )
         <returnvalue>boolean</returnvalue>
        </para>
        <para>
@@ -24961,7 +24961,35 @@ SELECT collation for ('foo' COLLATE "de_DE");
         specified process ID.  This is also allowed if the calling role
         is a member of the role whose backend is being terminated or the
         calling role has been granted <literal>pg_signal_backend</literal>,
-        however only superusers can terminate superuser backends.
+        however only superusers can terminate superuser backends. When
+        <parameter>timeout</parameter> is not specified, this function sends
+        SIGTERM to the backend with the given process ID without waiting for
+        its actual termination and returns <literal>true</literal>. Sometimes
+        the backend process may still exist. With <parameter>timeout</parameter>
+        specified in milliseconds, the function ensures that the backend
+        process is actually goes away from the system processes. It keeps
+        checking for the existence of the backend process until the given
+        <parameter>timeout</parameter> occurs and returns <literal>true</literal>
+        if the process doesn't exit. On timeout a warning is emitted and
+        <literal>false</literal> is returned.
+       </para></entry>
+      </row>
+
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>pg_wait_for_backend_termination</primary>
+        </indexterm>
+        <function>pg_wait_for_backend_termination</function> ( <parameter>pid</parameter> <type>integer</type>, <parameter>timeout</parameter> <type>bigint</type> <literal>DEFAULT</literal> <literal>5000</literal> )
+        <returnvalue>boolean</returnvalue>
+       </para>
+       <para>
+        Checks the existence of backend process with specified process ID. It
+        waits until the given <parameter>timeout</parameter> or the default
+        5000 milliseconds or 5 seconds. <literal>true</literal> is returned
+        when the backend is found to be not existing within the
+        <parameter>timeout</parameter> milliseconds. On timeout, a warning is
+        emitted and <literal>false</literal> is returned.
        </para></entry>
       </row>
      </tbody>
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 56018745c8..caa1918656 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -1569,6 +1569,10 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
       <entry>Waiting for subplan nodes of an <literal>Append</literal> plan
        node to be ready.</entry>
      </row>
+    <row>
+      <entry><literal>BackendTermination</literal></entry>
+      <entry>Waiting for the termination of a backend.</entry>
+     </row>
      <row>
       <entry><literal>BackupWaitWalArchive</literal></entry>
       <entry>Waiting for WAL files required for a backup to be successfully
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 5f2541d316..137ac8569b 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1346,6 +1346,16 @@ CREATE OR REPLACE FUNCTION
   RETURNS boolean STRICT VOLATILE LANGUAGE INTERNAL AS 'pg_promote'
   PARALLEL SAFE;
 
+CREATE OR REPLACE FUNCTION
+  pg_terminate_backend(pid integer, timeout int8 DEFAULT 0)
+  RETURNS boolean STRICT VOLATILE LANGUAGE INTERNAL AS 'pg_terminate_backend'
+  PARALLEL SAFE;
+
+CREATE OR REPLACE FUNCTION
+  pg_wait_for_backend_termination(pid integer, timeout int8 DEFAULT 5000)
+  RETURNS boolean STRICT VOLATILE LANGUAGE INTERNAL AS 'pg_wait_for_backend_termination'
+  PARALLEL SAFE;
+
 -- legacy definition for compatibility with 9.3
 CREATE OR REPLACE FUNCTION
   json_populate_record(base anyelement, from_json json, use_json_as_text boolean DEFAULT false)
diff --git a/src/backend/storage/ipc/signalfuncs.c b/src/backend/storage/ipc/signalfuncs.c
index 8b55ff6e76..ec2da29141 100644
--- a/src/backend/storage/ipc/signalfuncs.c
+++ b/src/backend/storage/ipc/signalfuncs.c
@@ -18,6 +18,7 @@
 
 #include "catalog/pg_authid.h"
 #include "miscadmin.h"
+#include "pgstat.h"
 #include "postmaster/syslogger.h"
 #include "storage/pmsignal.h"
 #include "storage/proc.h"
@@ -126,15 +127,96 @@ pg_cancel_backend(PG_FUNCTION_ARGS)
 }
 
 /*
- * Signal to terminate a backend process.  This is allowed if you are a member
- * of the role whose process is being terminated.
+ * Wait until there is no backend process with the given PID and return true.
+ * On timeout, a warning is emitted and false is returned.
+ */
+static bool
+pg_wait_until_termination(int pid, int64 timeout)
+{
+	/*
+	 * Wait in steps of waittime milliseconds until this function exits or
+	 * timeout.
+	 */
+	int64	waittime = 100;
+	/*
+	 * Initially remaining time is the entire timeout specified by the user.
+	 */
+	int64	remainingtime = timeout;
+
+	/*
+	 * Check existence of the backend. If it doesn't exist, then check for the
+	 * errno ESRCH that confirms it and return true. If the backend still
+	 * exists, then wait for waittime milliseconds, again check for the
+	 * existence. Repeat this until timeout or an error occurs or a pending
+	 * interrupt such as query cancel gets processed.
+	 */
+	do
+	{
+		if (remainingtime < waittime)
+			waittime = remainingtime;
+
+		if (kill(pid, 0) == -1)
+		{
+			if (errno == ESRCH)
+				return true;
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_INTERNAL_ERROR),
+						 errmsg("could not check the existence of the backend with PID %d: %m",
+								 pid)));
+		}
+
+		/* Process interrupts, if any, before going for wait. */
+		CHECK_FOR_INTERRUPTS();
+
+		(void) WaitLatch(MyLatch,
+						 WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
+						 waittime,
+						 WAIT_EVENT_BACKEND_TERMINATION);
+
+		ResetLatch(MyLatch);
+
+		remainingtime -= waittime;
+	} while (remainingtime > 0);
+
+	ereport(WARNING,
+			(errmsg("backend with PID %d did not terminate within %lld milliseconds",
+					pid, (long long int) timeout)));
+
+	return false;
+}
+
+/*
+ * Signal to terminate a backend process. This is allowed if you are a member
+ * of the role whose process is being terminated. If timeout input argument is
+ * 0 (which is default), then this function just signals the backend and
+ * doesn't wait. Otherwise it waits until given the timeout milliseconds or no
+ * process has the given PID and returns true. On timeout, a warning is emitted
+ * and false is returned.
  *
  * Note that only superusers can signal superuser-owned processes.
  */
 Datum
 pg_terminate_backend(PG_FUNCTION_ARGS)
 {
-	int			r = pg_signal_backend(PG_GETARG_INT32(0), SIGTERM);
+	int	 pid;
+	int	 r;
+	int timeout;
+	bool wait;
+	bool result;
+
+	pid = PG_GETARG_INT32(0);
+	timeout = PG_GETARG_INT64(1);
+
+	if (timeout < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+					errmsg("\"timeout\" must not be negative")));
+
+	/* If timeout is zero (which is default), then don't wait. */
+	wait = (timeout == 0) ? false : true;
+
+	r = pg_signal_backend(pid, SIGTERM);
 
 	if (r == SIGNAL_BACKEND_NOSUPERUSER)
 		ereport(ERROR,
@@ -146,7 +228,49 @@ pg_terminate_backend(PG_FUNCTION_ARGS)
 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
 				 errmsg("must be a member of the role whose process is being terminated or member of pg_signal_backend")));
 
-	PG_RETURN_BOOL(r == SIGNAL_BACKEND_SUCCESS);
+	result = (r == SIGNAL_BACKEND_SUCCESS);
+
+	/* Wait only if requested and the termination is successful. */
+	if (wait && result)
+		result = pg_wait_until_termination(pid, timeout);
+
+	PG_RETURN_BOOL(result);
+}
+
+/*
+ * This function waits for a backend with the given PID to be terminated or
+ * until the given timeout milliseconds occurs. Returns true if the backend
+ * doesn't exist. On Timeout a warning is emitted and false is returned.
+ */
+Datum
+pg_wait_for_backend_termination(PG_FUNCTION_ARGS)
+{
+	int	 pid;
+	int64	timeout;
+	PGPROC	*proc = NULL;
+	bool	result = false;
+
+	pid = PG_GETARG_INT32(0);
+	timeout = PG_GETARG_INT64(1);
+
+	if (timeout <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+				errmsg("\"timeout\" must not be negative or zero")));
+
+	proc = BackendPidGetProc(pid);
+
+	if (proc == NULL)
+	{
+		ereport(WARNING,
+				(errmsg("PID %d is not a PostgreSQL server process", pid)));
+
+		PG_RETURN_BOOL(result);
+	}
+
+	result = pg_wait_until_termination(pid, timeout);
+
+	PG_RETURN_BOOL(result);
 }
 
 /*
diff --git a/src/backend/utils/activity/wait_event.c b/src/backend/utils/activity/wait_event.c
index accc1eb577..89b5b8b7b9 100644
--- a/src/backend/utils/activity/wait_event.c
+++ b/src/backend/utils/activity/wait_event.c
@@ -313,6 +313,9 @@ pgstat_get_wait_ipc(WaitEventIPC w)
 		case WAIT_EVENT_APPEND_READY:
 			event_name = "AppendReady";
 			break;
+		case WAIT_EVENT_BACKEND_TERMINATION:
+			event_name = "BackendTermination";
+			break;
 		case WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE:
 			event_name = "BackupWaitWalArchive";
 			break;
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 69ffd0c3f4..e04d9d4141 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -6168,9 +6168,14 @@
 { oid => '2171', descr => 'cancel a server process\' current query',
   proname => 'pg_cancel_backend', provolatile => 'v', prorettype => 'bool',
   proargtypes => 'int4', prosrc => 'pg_cancel_backend' },
-{ oid => '2096', descr => 'terminate a server process',
+{ oid => '2096', descr => 'terminate a backend process and if timeout is specified, wait for its exit or until timeout occurs',
   proname => 'pg_terminate_backend', provolatile => 'v', prorettype => 'bool',
-  proargtypes => 'int4', prosrc => 'pg_terminate_backend' },
+  proargtypes => 'int4 int8', proargnames => '{pid,timeout}',
+  prosrc => 'pg_terminate_backend' },
+{ oid => '16386', descr => 'wait for a backend process exit or timeout occurs',
+  proname => 'pg_wait_for_backend_termination', provolatile => 'v', prorettype => 'bool',
+  proargtypes => 'int4 int8', proargnames => '{pid,timeout}',
+  prosrc => 'pg_wait_for_backend_termination' },
 { oid => '2172', descr => 'prepare for taking an online backup',
   proname => 'pg_start_backup', provolatile => 'v', proparallel => 'r',
   prorettype => 'pg_lsn', proargtypes => 'text bool bool',
diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h
index 44448b48ec..47accc5ffe 100644
--- a/src/include/utils/wait_event.h
+++ b/src/include/utils/wait_event.h
@@ -80,6 +80,7 @@ typedef enum
 typedef enum
 {
 	WAIT_EVENT_APPEND_READY = PG_WAIT_IPC,
+	WAIT_EVENT_BACKEND_TERMINATION,
 	WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE,
 	WAIT_EVENT_BGWORKER_SHUTDOWN,
 	WAIT_EVENT_BGWORKER_STARTUP,
-- 
2.25.1

