On Wed, Oct 28, 2020 at 6:41 PM Fujii Masao <masao.fu...@oss.nttdata.com> wrote:
>
> I prefer that false is returned when the timeout happens,
> like pg_promote() does.
>

Done.

>
> When the specified timeout is negative, the following error is thrown *after*
> SIGTERM is signaled to the target backend. This seems strange to me.
> The timeout value should be verified at the beginning of the function, 
> instead.
>
>      ERROR:  timeout cannot be negative
>

I'm not throwing error for this case, instead a warning and returning
false. This is to keep it consistent with other cases such as the
given pid is not a backend pid.

Attaching the v3 patch. I tried to address the review comments
received so far and added documentation. I tested the patch locally
here. I saw that we don't have any test cases for existing
pg_terminate_backend(), do we need to add test cases into regression
suites for these two new functions?

Please review the v3 patch and let me know comments.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com
From 6d6602d6caa0773327444ec1bc1ead2f7afc5c30 Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <bharath.rupireddy@enterprisedb.com>
Date: Sat, 31 Oct 2020 15:56:09 +0530
Subject: [PATCH v1] pg_terminate_backend with wait, timeout and
 pg_wait_backend with timeout

This patch adds two new functions:
1. pg_terminate_backend(pid, wait, timeout) - terminate and
wait or timeout for a given backend.
2. pg_wait_backend(pid, timeout) - check the existence of the
backend with a given PID and wait or timeout until it goes away.
---
 doc/src/sgml/func.sgml                |  28 +++++-
 src/backend/catalog/system_views.sql  |  10 ++
 src/backend/postmaster/pgstat.c       |   3 +
 src/backend/storage/ipc/signalfuncs.c | 135 ++++++++++++++++++++++++++
 src/include/catalog/pg_proc.dat       |   9 ++
 src/include/pgstat.h                  |   3 +-
 6 files changed, 185 insertions(+), 3 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index d8eee3a826..0472f9166f 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -23972,7 +23972,7 @@ SELECT collation for ('foo' COLLATE "de_DE");
         <indexterm>
          <primary>pg_terminate_backend</primary>
         </indexterm>
-        <function>pg_terminate_backend</function> ( <parameter>pid</parameter> <type>integer</type> )
+        <function>pg_terminate_backend</function> ( <parameter>pid</parameter> <type>integer</type>, <parameter>wait</parameter> <type>boolean</type>, <parameter>timeout</parameter> <type>bigint</type> <literal>DEFAULT</literal> <literal>100</literal> )
         <returnvalue>boolean</returnvalue>
        </para>
        <para>
@@ -23980,7 +23980,31 @@ SELECT collation for ('foo' COLLATE "de_DE");
         specified process ID.  This is also allowed if the calling role
         is a member of the role whose backend is being terminated or the
         calling role has been granted <literal>pg_signal_backend</literal>,
-        however only superusers can terminate superuser backends.
+        however only superusers can terminate superuser backends. When no
+        <parameter>wait</parameter> and <parameter>timeout</parameter> are
+        provided, only SIGTERM is sent to the backend with the given process
+        ID and <literal>false</literal> is returned immediately. But the
+        backend may still exist. With <parameter>wait</parameter> specified as
+        <literal>true</literal>, the function waits for the backend termination
+        until the given <parameter>timeout</parameter> or the default 100
+        milliseconds. On timeout <literal>false</literal> is returned.
+       </para></entry>
+      </row>
+
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>pg_wait_backend</primary>
+        </indexterm>
+        <function>pg_wait_backend</function> ( <parameter>pid</parameter> <type>integer</type>, <parameter>timeout</parameter> <type>bigint</type> <literal>DEFAULT</literal> <literal>100</literal> )
+        <returnvalue>boolean</returnvalue>
+       </para>
+       <para>
+        Check the existence of the session whose backend process has the
+        specified process ID. On success return <literal>true</literal>. This
+        function waits until the given <parameter>timeout</parameter> or the
+        default 100 milliseconds. On timeout <literal>false</literal> is
+        returned.
        </para></entry>
       </row>
      </tbody>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 5171ea05c7..4a5ce09817 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1243,6 +1243,16 @@ CREATE OR REPLACE FUNCTION
   RETURNS boolean STRICT VOLATILE LANGUAGE INTERNAL AS 'pg_promote'
   PARALLEL SAFE;
 
+CREATE OR REPLACE FUNCTION
+  pg_terminate_backend(pid integer, wait boolean, timeout int8 DEFAULT 100)
+  RETURNS boolean STRICT VOLATILE LANGUAGE INTERNAL AS 'pg_terminate_backend_and_wait'
+  PARALLEL UNSAFE;
+
+CREATE OR REPLACE FUNCTION
+  pg_wait_backend(pid integer, timeout int8 DEFAULT 100)
+  RETURNS boolean STRICT VOLATILE LANGUAGE INTERNAL AS 'pg_wait_backend'
+  PARALLEL UNSAFE;
+
 -- legacy definition for compatibility with 9.3
 CREATE OR REPLACE FUNCTION
   json_populate_record(base anyelement, from_json json, use_json_as_text boolean DEFAULT false)
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index f1dca2f25b..4eb11382cd 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -4024,6 +4024,9 @@ pgstat_get_wait_ipc(WaitEventIPC w)
 		case WAIT_EVENT_XACT_GROUP_UPDATE:
 			event_name = "XactGroupUpdate";
 			break;
+		case WAIT_EVENT_BACKEND_TERMINATION:
+			event_name = "BackendTermination";
+			break;
 			/* no default case, so that compiler will warn */
 	}
 
diff --git a/src/backend/storage/ipc/signalfuncs.c b/src/backend/storage/ipc/signalfuncs.c
index d822e82cb9..4ed0c135b0 100644
--- a/src/backend/storage/ipc/signalfuncs.c
+++ b/src/backend/storage/ipc/signalfuncs.c
@@ -18,6 +18,7 @@
 
 #include "catalog/pg_authid.h"
 #include "miscadmin.h"
+#include "pgstat.h"
 #include "postmaster/syslogger.h"
 #include "storage/pmsignal.h"
 #include "storage/proc.h"
@@ -149,6 +150,140 @@ pg_terminate_backend(PG_FUNCTION_ARGS)
 	PG_RETURN_BOOL(r == SIGNAL_BACKEND_SUCCESS);
 }
 
+/*
+ * Wait until there is no backend process with the given PID or timeout. On
+ * timeout, a warning is thrown to the user.
+ */
+static bool
+pg_wait_until_termination(int pid, int64 timeout)
+{
+	/*
+	 * Wait in steps of waittime milliseconds until this function exits or
+	 * timeout.
+	 */
+	int64	waittime = 10;
+	/*
+	 * Initially remaining time is the entire timeout specified by the user.
+	 */
+	int64	remainingtime = timeout;
+
+	/*
+	 * Check the backend existence. If doesn't exist, then check for the errno
+	 * ESRCH that confirms it and return true. If still exist, then wait for
+	 * waittime milliseconds, again check for the existence. Repeat until
+	 * timeout or an error occurs or a pending interrupt such as query cancel
+	 * is processed.
+	 */
+	for (;;)
+	{
+		if (remainingtime < waittime)
+			waittime = remainingtime;
+
+		if (kill(pid, 0) == -1)
+		{
+			if (errno == ESRCH)
+				return true;
+			else
+			{
+				ereport(WARNING,
+						(errmsg("could not check the existence of the backend with PID %d: %m",
+								pid)));
+				return false;
+			}
+		}
+
+		/* Process interrupts, if any, before going for wait. */
+		CHECK_FOR_INTERRUPTS();
+
+		(void) WaitLatch(MyLatch,
+						 WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
+						 waittime,
+						 WAIT_EVENT_BACKEND_TERMINATION);
+
+		remainingtime -= waittime;
+
+		if (remainingtime <= 0)
+			break;
+	}
+
+	ereport(WARNING,
+			(errmsg("could not wait for the termination of the backend with PID %d within %ld milliseconds",
+					pid, timeout)));
+
+	return false;
+}
+
+/*
+ * Signal termination of a backend process, wait until timeout or no backend
+ * has the given PID. If the wait input argument is false, this function
+ * behaviour is same as pg_terminate_backend. On timeout, a warning is thrown
+ * to the user. Self termination is allowed but waiting is not, because once
+ * the backend is self terminated there is no point in waiting for it to go
+ * away.
+ */
+Datum
+pg_terminate_backend_and_wait(PG_FUNCTION_ARGS)
+{
+	int 	pid = PG_GETARG_DATUM(0);
+	bool	wait = PG_GETARG_BOOL(1);
+	int64	timeout = PG_GETARG_INT64(2);
+	bool	r = false;
+
+	if (timeout <= 0)
+	{
+		ereport(WARNING,
+				(errmsg("timeout cannot be negative or zero: %ld", timeout)));
+		PG_RETURN_BOOL(r);
+	}
+
+	/* Self termination is allowed but waiting is not after that. */
+	if (pid == MyProcPid && wait)
+		wait = false;
+
+	r = DatumGetBool(DirectFunctionCall1(pg_terminate_backend, pid));
+
+	/* Wait only if requested and the termination is successful. */
+	if (wait && r)
+		r = pg_wait_until_termination(pid, timeout);
+
+	PG_RETURN_BOOL(r);
+}
+
+/*
+ * This function waits for a backend with the given PID to be terminated or
+ * until the timeout occurs. If the PID is of the backend which issued this
+ * function, either timeout can occur or the function can succeed if the
+ * backend gets terminated by some other process, say postmaster.
+ */
+Datum
+pg_wait_backend(PG_FUNCTION_ARGS)
+{
+	int		pid = PG_GETARG_INT32(0);
+	int64  	timeout = PG_GETARG_INT64(1);
+	PGPROC	*proc = NULL;
+	bool	r = false;
+
+	if (timeout <= 0)
+	{
+		ereport(WARNING,
+				(errmsg("timeout cannot be negative or zero: %ld", timeout)));
+		PG_RETURN_BOOL(r);
+	}
+
+	proc = BackendPidGetProc(pid);
+
+	if (proc == NULL)
+	{
+		ereport(WARNING,
+				(errmsg("PID %d is not a PostgreSQL server process", pid)));
+		PG_RETURN_BOOL(r);
+	}
+
+	r = pg_wait_until_termination(pid, timeout);
+
+	PG_RETURN_BOOL(r);
+}
+
 /*
  * Signal to reload the database configuration
  *
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index d9770bbadd..ce6604dab6 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -6084,6 +6084,15 @@
 { oid => '2096', descr => 'terminate a server process',
   proname => 'pg_terminate_backend', provolatile => 'v', prorettype => 'bool',
   proargtypes => 'int4', prosrc => 'pg_terminate_backend' },
+{ oid => '16386', descr => 'terminate a backend process and wait for it\'s exit or until timeout occurs',
+  proname => 'pg_terminate_backend', provolatile => 'v',
+  prorettype => 'bool', proargtypes => 'int4 bool int8',
+  proargnames => '{pid,wait,timeout}',
+  prosrc => 'pg_terminate_backend_and_wait' },
+{ oid => '16387', descr => 'waits for a backend process exit or timeout occurs',
+  proname => 'pg_wait_backend', provolatile => 'v', prorettype => 'bool',
+  proargtypes => 'int4 int8', proargnames => '{pid,timeout}',
+  prosrc => 'pg_wait_backend' },
 { oid => '2172', descr => 'prepare for taking an online backup',
   proname => 'pg_start_backup', provolatile => 'v', proparallel => 'r',
   prorettype => 'pg_lsn', proargtypes => 'text bool bool',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 257e515bfe..d100a1c737 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -958,7 +958,8 @@ typedef enum
 	WAIT_EVENT_REPLICATION_SLOT_DROP,
 	WAIT_EVENT_SAFE_SNAPSHOT,
 	WAIT_EVENT_SYNC_REP,
-	WAIT_EVENT_XACT_GROUP_UPDATE
+	WAIT_EVENT_XACT_GROUP_UPDATE,
+	WAIT_EVENT_BACKEND_TERMINATION
 } WaitEventIPC;
 
 /* ----------
-- 
2.25.1

Reply via email to