From 51d4335998d2bca24a3cb0a16c3655baaadf0f78 Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amborodin@acm.org>
Date: Thu, 7 May 2026 11:29:09 +0500
Subject: [PATCH vAB3 1/2] Add regression test for ProcKill lock-group
 procLatch recycle race

Two backends form a lock group via prockill_become_lock_group_leader /
prockill_become_lock_group_member (a new test module), then are terminated
concurrently.  Injection points placed in ProcKill() right after
LWLockRelease(leader_lwlock) pause both victims there, letting the test
verify that a freshly forked backend can claim the recycled PGPROC slot
without hitting "latch already owned by PID ..." PANIC.

Standard observability (pg_stat_activity, BackendPidGetProc) is unavailable
at that point in ProcKill: pgstat_beshutdown_hook and RemoveProcFromArray,
both registered as on_shmem_exit callbacks, run before ProcKill.  The new
prockill_backend_in_injection() helper reads PGPROC->wait_event_info from
ProcGlobal->allProcs directly, which remains valid until ProcKill zeroes
proc->pid near its end.

Injection waits must be attached from a controller session rather than from
the victims themselves.  injection_points_attach() from the victim would
register a before_shmem_exit cleanup that detaches the point before ProcKill
runs.  The new prockill_attach_injection_wait() calls InjectionPointAttach()
directly, leaving no such hook on the victims.

The test expects a fixed server and must land together with the matching
ProcKill fix.
---
 src/test/modules/injection_points/Makefile    |   5 +-
 .../injection_point_condition.h               |  25 ++++
 .../injection_points--1.0.sql                 |  31 +++++
 .../injection_points/injection_points.c       |  26 +---
 src/test/modules/injection_points/meson.build |   6 +
 .../injection_points/regress_prockill.c       | 115 ++++++++++++++++++
 .../t/001_prockill_lockgroup.pl               |  99 +++++++++++++++
 7 files changed, 282 insertions(+), 25 deletions(-)
 create mode 100644 src/test/modules/injection_points/injection_point_condition.h
 create mode 100644 src/test/modules/injection_points/regress_prockill.c
 create mode 100644 src/test/modules/injection_points/t/001_prockill_lockgroup.pl

diff --git a/src/test/modules/injection_points/Makefile b/src/test/modules/injection_points/Makefile
index f057d143d1a..9b15adf7e60 100644
--- a/src/test/modules/injection_points/Makefile
+++ b/src/test/modules/injection_points/Makefile
@@ -4,7 +4,8 @@ MODULE_big = injection_points
 OBJS = \
 	$(WIN32RES) \
 	injection_points.o \
-	regress_injection.o
+	regress_injection.o \
+	regress_prockill.o
 EXTENSION = injection_points
 DATA = injection_points--1.0.sql
 PGFILEDESC = "injection_points - facility for injection points"
@@ -12,6 +13,8 @@ PGFILEDESC = "injection_points - facility for injection points"
 REGRESS = injection_points hashagg reindex_conc vacuum
 REGRESS_OPTS = --dlpath=$(top_builddir)/src/test/regress
 
+TAP_TESTS = 1
+
 ISOLATION = basic \
 	    inplace \
 	    repack \
diff --git a/src/test/modules/injection_points/injection_point_condition.h b/src/test/modules/injection_points/injection_point_condition.h
new file mode 100644
index 00000000000..a3200673eed
--- /dev/null
+++ b/src/test/modules/injection_points/injection_point_condition.h
@@ -0,0 +1,25 @@
+/*-------------------------------------------------------------------------
+ * injection_point_condition.h
+ *		Condition payload for injection_wait callbacks
+ *
+ * Copyright (c) 2025-2026, PostgreSQL Global Development Group
+ *
+ * src/test/modules/injection_points/injection_point_condition.h
+ *-------------------------------------------------------------------------
+ */
+#ifndef INJECTION_POINT_CONDITION_H
+#define INJECTION_POINT_CONDITION_H
+
+typedef enum InjectionPointConditionType
+{
+	INJ_CONDITION_ALWAYS = 0,
+	INJ_CONDITION_PID,
+} InjectionPointConditionType;
+
+typedef struct InjectionPointCondition
+{
+	InjectionPointConditionType type;
+	int			pid;
+} InjectionPointCondition;
+
+#endif							/* INJECTION_POINT_CONDITION_H */
diff --git a/src/test/modules/injection_points/injection_points--1.0.sql b/src/test/modules/injection_points/injection_points--1.0.sql
index 861c7355d4e..fa7afa9995e 100644
--- a/src/test/modules/injection_points/injection_points--1.0.sql
+++ b/src/test/modules/injection_points/injection_points--1.0.sql
@@ -108,3 +108,34 @@ CREATE FUNCTION removable_cutoff(rel regclass)
 RETURNS xid8
 AS 'MODULE_PATHNAME'
 LANGUAGE C CALLED ON NULL INPUT;
+
+--
+-- regress_prockill.c functions
+--
+-- Form a lock group without parallel query so ProcKill lock-group teardown
+-- can be tested with injection points.
+--
+CREATE FUNCTION prockill_become_lock_group_leader()
+RETURNS void
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION prockill_become_lock_group_member(leader_pid int)
+RETURNS void
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+-- Attach injection_wait to point_name scoped to target_pid.  Must be called
+-- from a controller session, not from the victim itself (see source).
+CREATE FUNCTION prockill_attach_injection_wait(point_name text, target_pid int)
+RETURNS void
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+-- Return true if target_pid is currently waiting at the named injection point,
+-- read directly from its PGPROC slot (works inside ProcKill where
+-- pg_stat_activity and ProcArray are already torn down).
+CREATE FUNCTION prockill_backend_in_injection(target_pid int, point_name text)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
diff --git a/src/test/modules/injection_points/injection_points.c b/src/test/modules/injection_points/injection_points.c
index 0f1af513673..fb7a7d477cc 100644
--- a/src/test/modules/injection_points/injection_points.c
+++ b/src/test/modules/injection_points/injection_points.c
@@ -34,36 +34,14 @@
 #include "utils/tuplestore.h"
 #include "utils/wait_event.h"
 
+#include "injection_point_condition.h"
+
 PG_MODULE_MAGIC;
 
 /* Maximum number of waits usable in injection points at once */
 #define INJ_MAX_WAIT	8
 #define INJ_NAME_MAXLEN	64
 
-/*
- * Conditions related to injection points.  This tracks in shared memory the
- * runtime conditions under which an injection point is allowed to run,
- * stored as private_data when an injection point is attached, and passed as
- * argument to the callback.
- *
- * If more types of runtime conditions need to be tracked, this structure
- * should be expanded.
- */
-typedef enum InjectionPointConditionType
-{
-	INJ_CONDITION_ALWAYS = 0,	/* always run */
-	INJ_CONDITION_PID,			/* PID restriction */
-} InjectionPointConditionType;
-
-typedef struct InjectionPointCondition
-{
-	/* Type of the condition */
-	InjectionPointConditionType type;
-
-	/* ID of the process where the injection point is allowed to run */
-	int			pid;
-} InjectionPointCondition;
-
 /*
  * List of injection points stored in TopMemoryContext attached
  * locally to this process.
diff --git a/src/test/modules/injection_points/meson.build b/src/test/modules/injection_points/meson.build
index fb1418e2caa..4ac9e54adb8 100644
--- a/src/test/modules/injection_points/meson.build
+++ b/src/test/modules/injection_points/meson.build
@@ -7,6 +7,7 @@ endif
 injection_points_sources = files(
   'injection_points.c',
   'regress_injection.c',
+  'regress_prockill.c',
 )
 
 if host_system == 'windows'
@@ -41,6 +42,11 @@ tests += {
     # The injection points are cluster-wide, so disable installcheck
     'runningcheck': false,
   },
+  'tap': {
+    'tests': [
+      't/001_prockill_lockgroup.pl',
+    ],
+  },
   'isolation': {
     'specs': [
       'basic',
diff --git a/src/test/modules/injection_points/regress_prockill.c b/src/test/modules/injection_points/regress_prockill.c
new file mode 100644
index 00000000000..9720875d9e2
--- /dev/null
+++ b/src/test/modules/injection_points/regress_prockill.c
@@ -0,0 +1,115 @@
+/*-------------------------------------------------------------------------
+ *
+ * regress_prockill.c
+ *		SQL helpers for the ProcKill lock-group TAP test
+ *
+ * Exposes lock-group formation without parallel query and helpers that
+ * observe victim backends while they are inside ProcKill().
+ *
+ * Copyright (c) 2025-2026, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *		src/test/modules/injection_points/regress_prockill.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "fmgr.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
+#include "utils/builtins.h"
+#include "utils/injection_point.h"
+#include "utils/wait_event.h"
+
+#include "injection_point_condition.h"
+
+/* Read a uint32 field exactly once (matches waitfuncs.c idiom). */
+#define UINT32_ACCESS_ONCE(var)	((uint32) (*((volatile uint32 *) &(var))))
+
+PG_FUNCTION_INFO_V1(prockill_become_lock_group_leader);
+
+Datum
+prockill_become_lock_group_leader(PG_FUNCTION_ARGS)
+{
+	BecomeLockGroupLeader();
+	PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(prockill_become_lock_group_member);
+
+Datum
+prockill_become_lock_group_member(PG_FUNCTION_ARGS)
+{
+	int			leader_pid = PG_GETARG_INT32(0);
+	PGPROC	   *leader;
+
+	leader = BackendPidGetProc(leader_pid);
+	if (leader == NULL)
+		elog(ERROR, "backend with PID %d not found", leader_pid);
+
+	if (!BecomeLockGroupMember(leader, leader_pid))
+		elog(ERROR, "could not join lock group of backend %d", leader_pid);
+
+	PG_RETURN_VOID();
+}
+
+/*
+ * Attach injection_wait to point_name, scoped to target_pid via
+ * INJ_CONDITION_PID.
+ *
+ * Must be called from a controller session that is not one of the victims.
+ * Using injection_points_attach() from the victim itself would register a
+ * before_shmem_exit(injection_points_cleanup) hook that fires before ProcKill
+ * (which is on_shmem_exit) and would detach the point before the victim ever
+ * reaches it.  Calling InjectionPointAttach() directly from a separate
+ * controller session leaves no such hook on the victims.
+ */
+PG_FUNCTION_INFO_V1(prockill_attach_injection_wait);
+
+Datum
+prockill_attach_injection_wait(PG_FUNCTION_ARGS)
+{
+	char	   *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
+	int			target_pid = PG_GETARG_INT32(1);
+	InjectionPointCondition cond = {INJ_CONDITION_PID, target_pid};
+
+	InjectionPointAttach(name, "injection_points", "injection_wait",
+						 &cond, sizeof(cond));
+	PG_RETURN_VOID();
+}
+
+/*
+ * Return true if the backend with target_pid is currently waiting at the
+ * named injection point, by reading PGPROC->wait_event_info directly.
+ *
+ * At the injection points in ProcKill(), the standard observability surfaces
+ * (pg_stat_activity, BackendPidGetProc) are already unavailable: both
+ * pgstat_beshutdown_hook and RemoveProcFromArray run as on_shmem_exit
+ * callbacks before ProcKill.  The PGPROC slot itself remains intact until
+ * ProcKill zeroes proc->pid near its end, so a direct allProcs scan works.
+ */
+PG_FUNCTION_INFO_V1(prockill_backend_in_injection);
+
+Datum
+prockill_backend_in_injection(PG_FUNCTION_ARGS)
+{
+	int			target_pid = PG_GETARG_INT32(0);
+	char	   *want_name = text_to_cstring(PG_GETARG_TEXT_PP(1));
+	uint32		n = ProcGlobal->allProcCount;
+
+	for (uint32 i = 0; i < n; i++)
+	{
+		PGPROC	   *proc = &ProcGlobal->allProcs[i];
+		const char *ev;
+
+		if (proc->pid != target_pid)
+			continue;
+
+		ev = pgstat_get_wait_event(UINT32_ACCESS_ONCE(proc->wait_event_info));
+		PG_RETURN_BOOL(ev != NULL && strcmp(ev, want_name) == 0);
+	}
+
+	PG_RETURN_BOOL(false);
+}
diff --git a/src/test/modules/injection_points/t/001_prockill_lockgroup.pl b/src/test/modules/injection_points/t/001_prockill_lockgroup.pl
new file mode 100644
index 00000000000..a2ead24cf10
--- /dev/null
+++ b/src/test/modules/injection_points/t/001_prockill_lockgroup.pl
@@ -0,0 +1,99 @@
+# Copyright (c) 2025-2026, PostgreSQL Global Development Group
+#
+# Regression test for the ProcKill lock-group vs. procLatch recycle race.
+#
+# Two backends form a lock group, then are terminated concurrently.  The test
+# uses injection points placed inside ProcKill() to pause both victims there
+# and verify that a freshly forked backend can claim the recycled PGPROC slot
+# without hitting "latch already owned by PID ..." PANIC.
+#
+# Requires --enable-injection-points.
+
+use strict;
+use warnings FATAL => 'all';
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+if (($ENV{enable_injection_points} // '') ne 'yes')
+{
+	plan skip_all => 'Injection points not supported by this build';
+}
+
+my $node = PostgreSQL::Test::Cluster->new('prockill_race');
+$node->init;
+$node->append_conf('postgresql.conf',
+	q{shared_preload_libraries = 'injection_points'});
+$node->start;
+
+$node->safe_psql('postgres', 'CREATE EXTENSION injection_points;');
+
+# Form a two-backend lock group.
+my $leader   = $node->background_psql('postgres');
+my $follower = $node->background_psql('postgres');
+
+$leader->query_safe('SELECT prockill_become_lock_group_leader()');
+my $leader_pid   = $leader->query_safe('SELECT pg_backend_pid()');
+my $follower_pid = $follower->query_safe('SELECT pg_backend_pid()');
+$leader_pid   =~ s/\s+//g;
+$follower_pid =~ s/\s+//g;
+
+$follower->query_safe("SELECT prockill_become_lock_group_member($leader_pid)");
+
+# Attach PID-scoped injection waits from a throwaway controller session, not
+# from $leader/$follower.  injection_points_attach() via the victim would
+# register a before_shmem_exit cleanup that fires before ProcKill and detaches
+# the point mid-exit.  prockill_attach_injection_wait() calls
+# InjectionPointAttach() directly, leaving no such hook on the victims.
+$node->safe_psql('postgres', qq{
+	SELECT prockill_attach_injection_wait('prockill-after-lockgroup-leader',   $leader_pid);
+	SELECT prockill_attach_injection_wait('prockill-after-lockgroup-follower', $follower_pid);
+});
+
+# Kill the leader and wait for it to pause inside ProcKill.
+# prockill_backend_in_injection() reads PGPROC->wait_event_info directly
+# because pg_stat_activity and ProcArray are already torn down before ProcKill.
+$node->safe_psql('postgres', "SELECT pg_terminate_backend($leader_pid)");
+my $leader_ok = $node->poll_query_until(
+	'postgres',
+	"SELECT prockill_backend_in_injection($leader_pid, 'prockill-after-lockgroup-leader')"
+);
+
+# Kill the follower and wait for it similarly.  Use eval: if the fix has
+# regressed, the postmaster may have already PANICed.
+eval {
+	$node->safe_psql('postgres',
+		"SELECT pg_terminate_backend($follower_pid)");
+};
+my $follower_ok = $node->poll_query_until(
+	'postgres',
+	"SELECT prockill_backend_in_injection($follower_pid, 'prockill-after-lockgroup-follower')"
+);
+
+# Release both victims.
+eval {
+	$node->safe_psql('postgres',
+		q{SELECT injection_points_wakeup('prockill-after-lockgroup-follower');
+		  SELECT injection_points_wakeup('prockill-after-lockgroup-leader');});
+};
+
+# A new backend must be able to claim the recycled PGPROC slot without PANIC.
+my $select_ok = eval { $node->safe_psql('postgres', 'SELECT 1'); 1 };
+
+ok($leader_ok && $follower_ok,
+	'leader and follower reached ProcKill injection points');
+ok($select_ok,
+	'new backend claimed recycled PGPROC without latch-recycle PANIC');
+
+eval {
+	$node->safe_psql('postgres',
+		q{SELECT injection_points_detach('prockill-after-lockgroup-leader');
+		  SELECT injection_points_detach('prockill-after-lockgroup-follower');});
+};
+
+eval { $leader->quit };
+eval { $follower->quit };
+$node->stop('fast', fail_ok => 1);
+
+done_testing();
-- 
2.50.1 (Apple Git-155)

