On Fri, Apr 19, 2024 at 1:52 PM shveta malik <shveta.ma...@gmail.com> wrote:
>
> Please find v9 with the above comments addressed.
>

I have made minor modifications in the comments and a function name.
Please see the attached top-up patch. Apart from this, the patch looks
good to me.

-- 
With Regards,
Amit Kapila.
diff --git a/src/backend/replication/logical/slotsync.c 
b/src/backend/replication/logical/slotsync.c
index 66fb46ac27..72a775b09c 100644
--- a/src/backend/replication/logical/slotsync.c
+++ b/src/backend/replication/logical/slotsync.c
@@ -79,10 +79,11 @@
  * and also sets stopSignaled=true to handle the race condition when the
  * postmaster has not noticed the promotion yet and thus may end up restarting
  * the slot sync worker. If stopSignaled is set, the worker will exit in such a
- * case. Note that we don't need to reset this variable as after promotion the
- * slot sync worker won't be restarted because the pmState changes to PM_RUN 
from
- * PM_HOT_STANDBY and we don't support demoting primary without restarting the
- * server. See MaybeStartSlotSyncWorker.
+ * case. The SQL function pg_sync_replication_slots() will also error out if
+ * this flag is set. Note that we don't need to reset this variable as after
+ * promotion the slot sync worker won't be restarted because the pmState
+ * changes to PM_RUN from PM_HOT_STANDBY and we don't support demoting
+ * primary without restarting the server. See MaybeStartSlotSyncWorker.
  *
  * The 'syncing' flag is needed to prevent concurrent slot syncs to avoid slot
  * overwrites.
@@ -1246,12 +1247,11 @@ wait_for_slot_activity(bool some_slot_updated)
 }
 
 /*
- * Check stopSignaled and syncing flags. Emit error if promotion has
- * already set stopSignaled or if it is concurrent sync call. Otherwise,
- * set 'syncing' flag and pid info.
+ * Emit an error if a promotion or a concurrent sync call is in progress.
+ * Otherwise, advertise that a sync is in progress.
  */
 static void
-check_flags_and_set_sync_info(pid_t worker_pid)
+check_and_set_sync_info(pid_t worker_pid)
 {
        SpinLockAcquire(&SlotSyncCtx->mutex);
 
@@ -1259,9 +1259,8 @@ check_flags_and_set_sync_info(pid_t worker_pid)
        Assert(worker_pid == InvalidPid || SlotSyncCtx->pid == InvalidPid);
 
        /*
-        * Startup process signaled the slot sync machinery to stop, so if
-        * meanwhile postmaster ended up starting the worker again or user has
-        * invoked pg_sync_replication_slots(), error out.
+        * Emit an error if startup process signaled the slot sync machinery to
+        * stop. See comments atop SlotSyncCtxStruct.
         */
        if (SlotSyncCtx->stopSignaled)
        {
@@ -1281,7 +1280,10 @@ check_flags_and_set_sync_info(pid_t worker_pid)
 
        SlotSyncCtx->syncing = true;
 
-       /* Advertise our PID so that the startup process can kill us on 
promotion */
+       /*
+        * Advertise the required PID so that the startup process can kill the 
slot
+        * sync worker on promotion.
+        */
        SlotSyncCtx->pid = worker_pid;
 
        SpinLockRelease(&SlotSyncCtx->mutex);
@@ -1333,13 +1335,13 @@ ReplSlotSyncWorkerMain(char *startup_data, size_t 
startup_data_len)
 
        /*
         * Register slotsync_worker_onexit() before we register
-        * ReplicationSlotShmemExit() in BaseInit(), to ensure that during exit 
of
-        * slot sync worker, ReplicationSlotShmemExit() is called first, 
followed
-        * by slotsync_worker_onexit(). Startup process during promotion calls
-        * ShutDownSlotSync() which waits for slot sync to finish and it does 
that
-        * by checking the 'syncing' flag. Thus it is important that worker 
should
-        * be done with slots' release and cleanup before it actually marks 
itself
-        * as finished syncing.
+        * ReplicationSlotShmemExit() in BaseInit(), to ensure that during the 
exit
+        * of the slot sync worker, ReplicationSlotShmemExit() is called first,
+        * followed by slotsync_worker_onexit(). The startup process during
+        * promotion invokes ShutDownSlotSync() which waits for slot sync to 
finish
+        * and it does that by checking the 'syncing' flag. Thus worker must be
+        * done with the slots' release and cleanup before it marks itself as
+        * finished syncing.
         */
        before_shmem_exit(slotsync_worker_onexit, (Datum) 0);
 
@@ -1391,7 +1393,7 @@ ReplSlotSyncWorkerMain(char *startup_data, size_t 
startup_data_len)
        pqsignal(SIGPIPE, SIG_IGN);
        pqsignal(SIGCHLD, SIG_DFL);
 
-       check_flags_and_set_sync_info(MyProcPid);
+       check_and_set_sync_info(MyProcPid);
 
        ereport(LOG, errmsg("slot sync worker started"));
 
@@ -1544,9 +1546,9 @@ update_synced_slots_inactive_since(void)
 /*
  * Shut down the slot sync worker.
  *
- * It sends signal to shutdown slot sync worker. It also waits till
- * the slot sync worker has exited and pg_sync_replication_slots()
- * has finished.
+ * This function sends signal to shutdown slot sync worker, if required. It
+ * also waits till the slot sync worker has exited or
+ * pg_sync_replication_slots() has finished.
  */
 void
 ShutDownSlotSync(void)
@@ -1593,10 +1595,7 @@ ShutDownSlotSync(void)
 
                SpinLockAcquire(&SlotSyncCtx->mutex);
 
-               /*
-                * Confirm that both the worker and the function
-                * pg_sync_replication_slots() are done.
-                */
+               /* Ensure that no process is syncing the slots. */
                if (!SlotSyncCtx->syncing)
                        break;
 
@@ -1685,12 +1684,13 @@ slotsync_failure_callback(int code, Datum arg)
        /*
         * We need to do slots cleanup here just like WalSndErrorCleanup() does.
         *
-        * Startup process during promotion calls ShutDownSlotSync() which waits
-        * for slot sync to finish and it does that by checking the 'syncing'
-        * flag. Thus it is important that SQL function should be done with 
slots'
-        * release and cleanup before it actually marks itself as finished
-        * syncing.
+        * The startup process during promotion invokes ShutDownSlotSync() which
+        * waits for slot sync to finish and it does that by checking the 
'syncing'
+        * flag. Thus the SQL function must be done with slots' release and 
cleanup
+        * to avoid any dangling temporary slots or active slots before it marks
+        * itself as finished syncing.
         */
+
        /* Make sure active replication slots are released */
        if (MyReplicationSlot != NULL)
                ReplicationSlotRelease();
@@ -1699,9 +1699,9 @@ slotsync_failure_callback(int code, Datum arg)
        ReplicationSlotCleanup();
 
        /*
-        * If syncing_slots is true, it indicates that the process errored out
-        * without resetting the flag. So, we need to clean up shared memory and
-        * reset the flag here.
+        * The set syncing_slots indicates that the process errored out without
+        * resetting the flag. So, we need to clean up shared memory and reset 
the
+        * flag here.
         */
        if (syncing_slots)
                reset_syncing_flag();
@@ -1718,7 +1718,7 @@ SyncReplicationSlots(WalReceiverConn *wrconn)
 {
        PG_ENSURE_ERROR_CLEANUP(slotsync_failure_callback, 
PointerGetDatum(wrconn));
        {
-               check_flags_and_set_sync_info(InvalidPid);
+               check_and_set_sync_info(InvalidPid);
 
                validate_remote_info(wrconn);
 

Reply via email to