On 2026-May-17, Baji Shaik wrote:

> v3 uses PG_ENSURE_ERROR_CLEANUP which:
> - Handles both ERROR and FATAL exits
> - Automatically cancels the callback on normal completion
>   (no slot leak)
> - Runs before memory contexts are destroyed (no use-after-free)

Yeah, looks good.  I have pushed it, with some comment wordsmithing and
other cosmetic changes.

While looking at it, I realized that I didn't like the way
stop_repack_decoding_worker() works, mainly because if there's no
handle, we leak everything else -- and the way we initialize things
means we leak the shared memory segment.  This is maybe a rare case and
just a small memory leak, but it seems better to do it nicely.  So
here's a followup patch that reworks that code.  This also forced me to
understand more clearly what is going on, so I rewrote the comments.

> - 20 REPACK (CONCURRENTLY) in same session completes without
>   slot exhaustion

FWIW I tested this by doing "repack (concurrently) foo \watch 0.1" and
letting it run for some time.  I happened to notice that if I have two
psqls running, one with the above and the second with the equivalent for
table bar, when they run together, each runs more quickly than when only
one of them is running.  I don't know what causes this; I suspect/assume
it's because the WAL messages for initial historic snapshot creation
from one gets the other running.

> I have not added a dedicated regression test for the
> pg_terminate_backend scenario yet, but I can write one using
> injection points if needed.

I don't feel a need for that.

-- 
Álvaro Herrera        Breisgau, Deutschland  —  https://www.EnterpriseDB.com/
>From db6f070e73f89c38f8a2bfafa10b0718ea51a57d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Herrera?= <[email protected]>
Date: Mon, 18 May 2026 10:13:24 -0700
Subject: [PATCH] Restructure repack worker teardown
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The original code would leave a shared memory segment unreleased if we
fail partway through initialization.  Change the shutdown order so that
we already free it.

Author: Álvaro Herrera <[email protected]>
Discussion: https://postgr.es/m/[email protected]
---
 src/backend/commands/repack.c | 67 ++++++++++++++++-------------------
 1 file changed, 31 insertions(+), 36 deletions(-)

diff --git a/src/backend/commands/repack.c b/src/backend/commands/repack.c
index bfc62c8f752..c9064d8fd13 100644
--- a/src/backend/commands/repack.c
+++ b/src/backend/commands/repack.c
@@ -3411,10 +3411,14 @@ start_repack_decoding_worker(Oid relid)
 	shm_mq_handle *mqh;
 	BackgroundWorker bgw;
 
+	decoding_worker = palloc0_object(DecodingWorker);
+
 	/* Setup shared memory. */
 	size = BUFFERALIGN(offsetof(DecodingWorkerShared, error_queue)) +
 		BUFFERALIGN(REPACK_ERROR_QUEUE_SIZE);
 	seg = dsm_create(size, 0);
+	decoding_worker->seg = seg;
+
 	shared = (DecodingWorkerShared *) dsm_segment_address(seg);
 	shared->initialized = false;
 	shared->lsn_upto = InvalidXLogRecPtr;
@@ -3454,14 +3458,12 @@ start_repack_decoding_worker(Oid relid)
 	bgw.bgw_main_arg = UInt32GetDatum(dsm_segment_handle(seg));
 	bgw.bgw_notify_pid = MyProcPid;
 
-	decoding_worker = palloc0_object(DecodingWorker);
 	if (!RegisterDynamicBackgroundWorker(&bgw, &decoding_worker->handle))
 		ereport(ERROR,
 				errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
 				errmsg("out of background worker slots"),
 				errhint("You might need to increase \"%s\".", "max_worker_processes"));
 
-	decoding_worker->seg = seg;
 	decoding_worker->error_mqh = mqh;
 
 	/*
@@ -3487,17 +3489,6 @@ start_repack_decoding_worker(Oid relid)
 	ConditionVariableCancelSleep();
 }
 
-/*
- * PG_ENSURE_ERROR_CLEANUP callback to stop the decoding worker.
- * This ensures the worker is terminated on both ERROR and FATAL exits,
- * unlike PG_FINALLY which only handles ERROR.
- */
-static void
-repack_decoding_worker_cleanup_cb(int code, Datum arg)
-{
-	stop_repack_decoding_worker();
-}
-
 /*
  * Stop the decoding worker and cleanup the related resources.
  *
@@ -3508,39 +3499,43 @@ static void
 stop_repack_decoding_worker(void)
 {
 	BgwHandleStatus status;
+	dsm_segment	   *dsmseg;
 
-	/* Haven't reached the worker startup? */
+	/* Nothing to do if no worker was set up. */
 	if (decoding_worker == NULL)
 		return;
 
-	/* Could not register the worker? */
-	if (decoding_worker->handle == NULL)
-		return;
+	/* Terminate the worker process, if one is running. */
+	if (decoding_worker->handle != NULL)
+	{
+		TerminateBackgroundWorker(decoding_worker->handle);
+		/* The worker should really exit before the REPACK command does. */
+		HOLD_INTERRUPTS();
+		status = WaitForBackgroundWorkerShutdown(decoding_worker->handle);
+		RESUME_INTERRUPTS();
 
-	TerminateBackgroundWorker(decoding_worker->handle);
-	/* The worker should really exit before the REPACK command does. */
-	HOLD_INTERRUPTS();
-	status = WaitForBackgroundWorkerShutdown(decoding_worker->handle);
-	RESUME_INTERRUPTS();
-
-	if (status == BGWH_POSTMASTER_DIED)
-		ereport(FATAL,
-				errcode(ERRCODE_ADMIN_SHUTDOWN),
-				errmsg("postmaster exited during REPACK command"));
-
-	shm_mq_detach(decoding_worker->error_mqh);
+		if (status == BGWH_POSTMASTER_DIED)
+			ereport(FATAL,
+					errcode(ERRCODE_ADMIN_SHUTDOWN),
+					errmsg("postmaster exited during REPACK command"));
+	}
 
 	/*
-	 * If we could not cancel the current sleep due to ERROR, do that before
-	 * we detach from the shared memory the condition variable is located in.
-	 * If we did not, the bgworker ERROR handling code would try and fail
-	 * badly.
+	 * Now detach from our shared memory segment.  In error cases there might
+	 * still be messages from the worker in the queue, which ProcessInterrupts
+	 * would try to read; this is pointless (and causes an assertion failure),
+	 * so set the global pointer to NULL to have ProcessRepackMessages ignore
+	 * them.
 	 */
-	ConditionVariableCancelSleep();
-
-	dsm_detach(decoding_worker->seg);
+	dsmseg = decoding_worker->seg;
 	pfree(decoding_worker);
 	decoding_worker = NULL;
+
+	/* We must also cancel the current sleep, if one is still set up */
+	ConditionVariableCancelSleep();
+
+	if (dsmseg != NULL)
+		dsm_detach(dsmseg);
 }
 
 /* stop_repack_decoding_worker, wrapped as a before_shmem_exit callback */
-- 
2.47.3

Reply via email to