From ab4110fe6bcff362e492ddafccbf495cad14165c Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amborodin@acm.org>
Date: Wed, 6 May 2026 11:17:27 +0500
Subject: [PATCH vAB2 2/3] Add regression test for ProcKill lock-group
 procLatch recycle race

Two backends form a lock group via prockill_become_lock_group_leader /
prockill_become_lock_group_member (a new test module), then are terminated
concurrently.  Injection points placed in ProcKill() right after
LWLockRelease(leader_lwlock) pause both victims there, letting the test
verify that a freshly forked backend can claim the recycled PGPROC slot
without hitting "latch already owned by PID ..." PANIC.

Standard observability (pg_stat_activity, BackendPidGetProc) is unavailable
at that point in ProcKill: pgstat_beshutdown_hook and RemoveProcFromArray,
both registered as on_shmem_exit callbacks, run before ProcKill.  The new
prockill_backend_in_injection() helper reads PGPROC->wait_event_info from
ProcGlobal->allProcs directly, which remains valid until ProcKill zeroes
proc->pid near its end.

Injection waits must be attached from a controller session rather than from
the victims themselves.  injection_points_attach() from the victim would
register a before_shmem_exit cleanup that detaches the point before ProcKill
runs.  The new prockill_attach_injection_wait() calls InjectionPointAttach()
directly, leaving no such hook on the victims.

The test expects a fixed server and must land together with the matching
ProcKill fix.  Requires --enable-injection-points.

Discussion: https://www.postgresql.org/message-id/d2983796-2603-41b7-a66e-fc8489ddb954@gmail.com
Author: Vlad Lesin <vladlesin@gmail.com>
Reviewed-by: Andrey Borodin <amborodin@acm.org>
---
 src/backend/storage/lmgr/proc.c               |   7 ++
 src/test/modules/Makefile                     |   2 +-
 src/test/modules/meson.build                  |   1 +
 src/test/modules/prockill_race/Makefile       |  29 +++++
 src/test/modules/prockill_race/meson.build    |  36 ++++++
 .../prockill_race/prockill_race--1.0.sql      |  27 ++++
 .../modules/prockill_race/prockill_race.c     | 118 ++++++++++++++++++
 .../prockill_race/prockill_race.control       |   7 ++
 .../t/001_prockill_lockgroup_injection.pl     | 101 +++++++++++++++
 9 files changed, 327 insertions(+), 1 deletion(-)
 create mode 100644 src/test/modules/prockill_race/Makefile
 create mode 100644 src/test/modules/prockill_race/meson.build
 create mode 100644 src/test/modules/prockill_race/prockill_race--1.0.sql
 create mode 100644 src/test/modules/prockill_race/prockill_race.c
 create mode 100644 src/test/modules/prockill_race/prockill_race.control
 create mode 100644 src/test/modules/prockill_race/t/001_prockill_lockgroup_injection.pl

diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 1ac25068d62..c6b490af186 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -990,6 +990,13 @@ ProcKill(int code, Datum arg)
 		else if (leader != MyProc)
 			MyProc->lockGroupLeader = NULL;
 		LWLockRelease(leader_lwlock);
+		/*
+		 * Test hooks for src/test/modules/prockill_race.  Synchronize
+		 * concurrent ProcKill paths in a lock group; two names are used so
+		 * a controller can attach a PID-scoped "wait" action per name.
+		 */
+		INJECTION_POINT("prockill-after-lockgroup-leader", NULL);
+		INJECTION_POINT("prockill-after-lockgroup-follower", NULL);
 	}
 
 	/*
diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile
index 0a74ab5c86f..ba80651e009 100644
--- a/src/test/modules/Makefile
+++ b/src/test/modules/Makefile
@@ -59,7 +59,7 @@ SUBDIRS = \
 
 
 ifeq ($(enable_injection_points),yes)
-SUBDIRS += injection_points gin typcache
+SUBDIRS += injection_points gin typcache prockill_race
 else
 ALWAYS_SUBDIRS += injection_points gin typcache
 endif
diff --git a/src/test/modules/meson.build b/src/test/modules/meson.build
index 4bca42bb370..9e013fd05af 100644
--- a/src/test/modules/meson.build
+++ b/src/test/modules/meson.build
@@ -13,6 +13,7 @@ subdir('libpq_pipeline')
 subdir('nbtree')
 subdir('oauth_validator')
 subdir('plsample')
+subdir('prockill_race')
 subdir('spgist_name_ops')
 subdir('ssl_passphrase_callback')
 subdir('test_aio')
diff --git a/src/test/modules/prockill_race/Makefile b/src/test/modules/prockill_race/Makefile
new file mode 100644
index 00000000000..eb9694d5195
--- /dev/null
+++ b/src/test/modules/prockill_race/Makefile
@@ -0,0 +1,29 @@
+# src/test/modules/prockill_race/Makefile
+
+MODULE_big = prockill_race
+OBJS = \
+	$(WIN32RES) \
+	prockill_race.o
+PGFILEDESC = "prockill_race - ProcKill lock-group race test helpers"
+
+EXTRA_INSTALL=src/test/modules/injection_points
+export enable_injection_points
+TAP_TESTS = 1
+
+EXTENSION = prockill_race
+DATA = prockill_race--1.0.sql
+
+# Disabled because these tests require a temporary installation with
+# injection points enabled.
+NO_INSTALLCHECK = 1
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = src/test/modules/prockill_race
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/src/test/modules/prockill_race/meson.build b/src/test/modules/prockill_race/meson.build
new file mode 100644
index 00000000000..fbdd81a6d53
--- /dev/null
+++ b/src/test/modules/prockill_race/meson.build
@@ -0,0 +1,36 @@
+# Copyright (c) 2025-2026, PostgreSQL Global Development Group
+
+prockill_race_sources = files(
+  'prockill_race.c',
+)
+
+if host_system == 'windows'
+  prockill_race_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+    '--NAME', 'prockill_race',
+    '--FILEDESC', 'prockill_race - ProcKill lock-group test helpers',])
+endif
+
+prockill_race = shared_module('prockill_race',
+  prockill_race_sources,
+  kwargs: pg_test_mod_args,
+)
+test_install_libs += prockill_race
+
+test_install_data += files(
+  'prockill_race.control',
+  'prockill_race--1.0.sql',
+)
+
+tests += {
+  'name': 'prockill_race',
+  'sd': meson.current_source_dir(),
+  'bd': meson.current_build_dir(),
+  'tap': {
+    'env': {
+       'enable_injection_points': get_option('injection_points') ? 'yes' : 'no',
+    },
+    'tests': [
+      't/001_prockill_lockgroup_injection.pl',
+    ],
+  },
+}
diff --git a/src/test/modules/prockill_race/prockill_race--1.0.sql b/src/test/modules/prockill_race/prockill_race--1.0.sql
new file mode 100644
index 00000000000..b9a43e70c91
--- /dev/null
+++ b/src/test/modules/prockill_race/prockill_race--1.0.sql
@@ -0,0 +1,27 @@
+/* prockill_race--1.0.sql */
+
+\echo Use "CREATE EXTENSION prockill_race" to load this file. \quit
+
+CREATE FUNCTION prockill_become_lock_group_leader()
+RETURNS void
+AS 'MODULE_PATHNAME', 'prockill_become_lock_group_leader'
+LANGUAGE C STRICT PARALLEL UNSAFE;
+
+CREATE FUNCTION prockill_become_lock_group_member(leader_pid integer)
+RETURNS void
+AS 'MODULE_PATHNAME', 'prockill_become_lock_group_member'
+LANGUAGE C STRICT PARALLEL UNSAFE;
+
+CREATE FUNCTION prockill_attach_injection_wait(point_name text, target_pid integer)
+RETURNS void
+AS 'MODULE_PATHNAME', 'prockill_attach_injection_wait'
+LANGUAGE C STRICT PARALLEL UNSAFE;
+
+-- Test-only probe: is the backend with the given PID currently waiting on the
+-- named injection point?  Looks directly at ProcGlobal->allProcs so it keeps
+-- working while the target is blocked inside ProcKill() (after pgstat and
+-- ProcArray teardown have already run).
+CREATE FUNCTION prockill_backend_in_injection(target_pid integer, point_name text)
+RETURNS boolean
+AS 'MODULE_PATHNAME', 'prockill_backend_in_injection'
+LANGUAGE C STRICT PARALLEL UNSAFE;
diff --git a/src/test/modules/prockill_race/prockill_race.c b/src/test/modules/prockill_race/prockill_race.c
new file mode 100644
index 00000000000..338305858f6
--- /dev/null
+++ b/src/test/modules/prockill_race/prockill_race.c
@@ -0,0 +1,118 @@
+/*-------------------------------------------------------------------------
+ *
+ * prockill_race.c
+ *		SQL helpers for the prockill_race TAP test
+ *
+ * Exposes lock-group formation without parallel query and helpers that
+ * observe/control victim backends while they are inside ProcKill().
+ *
+ * Copyright (c) 2025-2026, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *		src/test/modules/prockill_race/prockill_race.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "fmgr.h"
+#include "miscadmin.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
+#include "utils/builtins.h"
+#include "utils/injection_point.h"
+#include "utils/wait_event.h"
+
+#include "../injection_points/injection_point_condition.h"
+
+PG_MODULE_MAGIC;
+
+/* Read a uint32 field exactly once (matches waitfuncs.c idiom). */
+#define UINT32_ACCESS_ONCE(var)	((uint32) (*((volatile uint32 *) &(var))))
+
+PG_FUNCTION_INFO_V1(prockill_become_lock_group_leader);
+
+Datum
+prockill_become_lock_group_leader(PG_FUNCTION_ARGS)
+{
+	BecomeLockGroupLeader();
+	PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(prockill_become_lock_group_member);
+
+Datum
+prockill_become_lock_group_member(PG_FUNCTION_ARGS)
+{
+	int			leader_pid = PG_GETARG_INT32(0);
+	PGPROC	   *leader;
+
+	leader = BackendPidGetProc(leader_pid);
+	if (leader == NULL)
+		elog(ERROR, "backend with PID %d not found", leader_pid);
+
+	if (!BecomeLockGroupMember(leader, leader_pid))
+		elog(ERROR, "could not join lock group of backend %d", leader_pid);
+
+	PG_RETURN_VOID();
+}
+
+/*
+ * Attach injection_wait to point_name, scoped to target_pid via
+ * INJ_CONDITION_PID.
+ *
+ * Must be called from a controller session that is not one of the victims.
+ * Using injection_points_attach() from the victim itself would register a
+ * before_shmem_exit(injection_points_cleanup) hook that fires before ProcKill
+ * (which is on_shmem_exit) and would detach the point before the victim ever
+ * reaches it.  Calling InjectionPointAttach() directly from a separate
+ * controller session leaves no such hook on the victims.
+ */
+PG_FUNCTION_INFO_V1(prockill_attach_injection_wait);
+
+Datum
+prockill_attach_injection_wait(PG_FUNCTION_ARGS)
+{
+	char	   *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
+	int			target_pid = PG_GETARG_INT32(1);
+	InjectionPointCondition cond = {INJ_CONDITION_PID, target_pid};
+
+	InjectionPointAttach(name, "injection_points", "injection_wait",
+						 &cond, sizeof(cond));
+	PG_RETURN_VOID();
+}
+
+/*
+ * Return true if the backend with target_pid is currently waiting at the
+ * named injection point, by reading PGPROC->wait_event_info directly.
+ *
+ * At the injection points in ProcKill(), the standard observability surfaces
+ * (pg_stat_activity, BackendPidGetProc) are already unavailable: both
+ * pgstat_beshutdown_hook and RemoveProcFromArray run as on_shmem_exit
+ * callbacks before ProcKill.  The PGPROC slot itself remains intact until
+ * ProcKill zeroes proc->pid near its end, so a direct allProcs scan works.
+ */
+PG_FUNCTION_INFO_V1(prockill_backend_in_injection);
+
+Datum
+prockill_backend_in_injection(PG_FUNCTION_ARGS)
+{
+	int			target_pid = PG_GETARG_INT32(0);
+	char	   *want_name = text_to_cstring(PG_GETARG_TEXT_PP(1));
+	uint32		n = ProcGlobal->allProcCount;
+
+	for (uint32 i = 0; i < n; i++)
+	{
+		PGPROC	   *proc = &ProcGlobal->allProcs[i];
+		const char *ev;
+
+		if (proc->pid != target_pid)
+			continue;
+
+		ev = pgstat_get_wait_event(UINT32_ACCESS_ONCE(proc->wait_event_info));
+		PG_RETURN_BOOL(ev != NULL && strcmp(ev, want_name) == 0);
+	}
+
+	PG_RETURN_BOOL(false);
+}
diff --git a/src/test/modules/prockill_race/prockill_race.control b/src/test/modules/prockill_race/prockill_race.control
new file mode 100644
index 00000000000..3f8cfc8c995
--- /dev/null
+++ b/src/test/modules/prockill_race/prockill_race.control
@@ -0,0 +1,7 @@
+# prockill_race extension
+comment = 'TAP test helpers for ProcKill lock-group / injection point race'
+default_version = '1.0'
+module_pathname = '$libdir/prockill_race'
+relocatable = true
+superuser = true
+trusted = true
diff --git a/src/test/modules/prockill_race/t/001_prockill_lockgroup_injection.pl b/src/test/modules/prockill_race/t/001_prockill_lockgroup_injection.pl
new file mode 100644
index 00000000000..bb0779bbb88
--- /dev/null
+++ b/src/test/modules/prockill_race/t/001_prockill_lockgroup_injection.pl
@@ -0,0 +1,101 @@
+# Copyright (c) 2025-2026, PostgreSQL Global Development Group
+#
+# Regression test for the ProcKill lock-group vs. procLatch recycle race.
+#
+# Two backends form a lock group, then are terminated concurrently.  The test
+# uses injection points placed inside ProcKill() to pause both victims there
+# and verify that a freshly forked backend can claim the recycled PGPROC slot
+# without hitting "latch already owned by PID ..." PANIC.
+#
+# Requires --enable-injection-points.
+
+use strict;
+use warnings FATAL => 'all';
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+if (($ENV{enable_injection_points} // '') ne 'yes')
+{
+	plan skip_all => 'Injection points not supported by this build';
+}
+
+my $node = PostgreSQL::Test::Cluster->new('prockill_race');
+$node->init;
+$node->append_conf('postgresql.conf',
+	q{shared_preload_libraries = 'injection_points'});
+$node->start;
+
+$node->safe_psql('postgres',
+	q{CREATE EXTENSION injection_points;
+	  CREATE EXTENSION prockill_race;});
+
+# Form a two-backend lock group.
+my $leader   = $node->background_psql('postgres');
+my $follower = $node->background_psql('postgres');
+
+$leader->query_safe('SELECT prockill_become_lock_group_leader()');
+my $leader_pid   = $leader->query_safe('SELECT pg_backend_pid()');
+my $follower_pid = $follower->query_safe('SELECT pg_backend_pid()');
+$leader_pid   =~ s/\s+//g;
+$follower_pid =~ s/\s+//g;
+
+$follower->query_safe("SELECT prockill_become_lock_group_member($leader_pid)");
+
+# Attach PID-scoped injection waits from a throwaway controller session, not
+# from $leader/$follower.  injection_points_attach() via the victim would
+# register a before_shmem_exit cleanup that fires before ProcKill and detaches
+# the point mid-exit.  InjectionPointAttach() called directly here leaves no
+# such hook on the victims.
+$node->safe_psql('postgres', qq{
+	SELECT prockill_attach_injection_wait('prockill-after-lockgroup-leader',   $leader_pid);
+	SELECT prockill_attach_injection_wait('prockill-after-lockgroup-follower', $follower_pid);
+});
+
+# Kill the leader and wait for it to pause inside ProcKill.
+# prockill_backend_in_injection() reads PGPROC->wait_event_info directly
+# because pg_stat_activity and ProcArray are already torn down before ProcKill.
+$node->safe_psql('postgres', "SELECT pg_terminate_backend($leader_pid)");
+my $leader_ok = $node->poll_query_until(
+	'postgres',
+	"SELECT prockill_backend_in_injection($leader_pid, 'prockill-after-lockgroup-leader')"
+);
+
+# Kill the follower and wait for it similarly.  Use eval: if the fix has
+# regressed, the postmaster may have already PANICed.
+eval {
+	$node->safe_psql('postgres',
+		"SELECT pg_terminate_backend($follower_pid)");
+};
+my $follower_ok = $node->poll_query_until(
+	'postgres',
+	"SELECT prockill_backend_in_injection($follower_pid, 'prockill-after-lockgroup-follower')"
+);
+
+# Release both victims.
+eval {
+	$node->safe_psql('postgres',
+		q{SELECT injection_points_wakeup('prockill-after-lockgroup-follower');
+		  SELECT injection_points_wakeup('prockill-after-lockgroup-leader');});
+};
+
+# A new backend must be able to claim the recycled PGPROC slot without PANIC.
+my $select_ok = eval { $node->safe_psql('postgres', 'SELECT 1'); 1 };
+
+ok($leader_ok && $follower_ok,
+	'leader and follower reached ProcKill injection points');
+ok($select_ok,
+	'new backend claimed recycled PGPROC without latch-recycle PANIC');
+
+eval {
+	$node->safe_psql('postgres',
+		q{SELECT injection_points_detach('prockill-after-lockgroup-leader');
+		  SELECT injection_points_detach('prockill-after-lockgroup-follower');});
+};
+
+eval { $leader->quit };
+eval { $follower->quit };
+$node->stop('fast', fail_ok => 1);
+
+done_testing();
-- 
2.50.1 (Apple Git-155)

