diff --git a/src/backend/replication/logical/applyparallelworker.c b/src/backend/replication/logical/applyparallelworker.c
index a54af9f8d7..e116e038b2 100644
--- a/src/backend/replication/logical/applyparallelworker.c
+++ b/src/backend/replication/logical/applyparallelworker.c
@@ -237,9 +237,11 @@ pa_get_free_worker(void)
 		winfo = (ParallelApplyWorkerInfo *) lfirst(lc);
 
 		/*
-		 * Try to free the worker first, because we don't wait for the rollback
-		 * command to finish so the worker may not be freed at the end of the
-		 * transaction.
+		 * We first try to free the worker so as improve our chances of getting
+		 * the worker. Normally, we free the worker after ensuring that the
+		 * transaction is committed by parallel worker but for rollbacks, we
+		 * don't wait for the transaction to finish so can't free the worker
+		 * information immediately.
 		 */
 		if (pa_free_worker(winfo, winfo->shared->xid))
 			continue;
@@ -254,9 +256,10 @@ pa_get_free_worker(void)
 /*
  * Start a parallel apply worker that will be used for the specified xid.
  *
- * If a parallel apply worker is found but not in use then re-use it, otherwise
- * start a fresh one. Cache the worker information in ParallelApplyWorkersHash
- * keyed by the specified xid.
+ * We first try to get an available worker from the pool, if any and then try
+ * to launch a new worker. On successful allocation, remember the worker
+ * information in the hash table so that we can get it later for processing the
+ * streaming changes.
  */
 void
 pa_start_worker(TransactionId xid)
@@ -285,17 +288,15 @@ pa_start_worker(TransactionId xid)
 
 	/* Try to get a free parallel apply worker. */
 	winfo = pa_get_free_worker();
-
-	if (winfo == NULL)
+	if (!winfo)
 	{
 		/* Try to start a new parallel apply worker. */
 		winfo = pa_init_and_launch_worker();
-
-		if (winfo == NULL)
+		if (!winfo)
 			return;
 	}
 
-	/* Create entry for requested transaction. */
+	/* Create an entry for the requested transaction. */
 	entry = hash_search(ParallelApplyWorkersHash, &xid, HASH_ENTER, &found);
 	if (found)
 		elog(ERROR, "hash table corrupted");
@@ -330,14 +331,12 @@ pa_find_worker(TransactionId xid)
 	if (stream_apply_worker != NULL)
 		return stream_apply_worker;
 
-	/*
-	 * Find entry for requested transaction.
-	 */
+	/* Find an entry for the requested transaction. */
 	entry = hash_search(ParallelApplyWorkersHash, &xid, HASH_FIND, &found);
 	if (found)
 	{
+		/* The worker must not have exited.  */
 		Assert(entry->winfo->in_use);
-
 		return entry->winfo;
 	}
 
@@ -345,17 +344,16 @@ pa_find_worker(TransactionId xid)
 }
 
 /*
- * Remove the parallel apply worker entry from the hash table. Stop the work if
- * there are enough workers in the pool.
- *
- * If the worker is stopped, the worker information will be removed from the
- * ParallelApplyWorkersList and the memory of the information will be freed.
- *
- * Return true if the worker is stopped, false otherwise.
+ * Makes the worker available for reuse.
  *
- * Do nothing if the worker is not being used.
+ * This removes the parallel apply worker entry from the hash table so that
+ * it can't be used. This either stops the worker and free the corresponding
+ * info, if there are enough workers in the pool or just marks it available
+ * for reuse.
  *
  * For more information about the worker pool, see comments atop worker.c.
+ *
+ * Returns true if the worker is stopped, false otherwise.
  */
 bool
 pa_free_worker(ParallelApplyWorkerInfo *winfo, TransactionId xid)
@@ -364,12 +362,13 @@ pa_free_worker(ParallelApplyWorkerInfo *winfo, TransactionId xid)
 
 	Assert(!am_parallel_apply_worker());
 
+	/* worker is already available for reuse */
 	if (!winfo->in_use)
 		return false;
 
 	/*
 	 * Don't free the worker if the transaction in the worker is still in
-	 * progress. This could happen as we don't wait for transaction rollback
+	 * progress. This could happen as we don't wait for the apply of rollback
 	 * to finish.
 	 */
 	if (pa_get_xact_state(winfo->shared) != PARALLEL_TRANS_FINISHED)
@@ -397,8 +396,8 @@ pa_free_worker(ParallelApplyWorkerInfo *winfo, TransactionId xid)
 		winfo->error_mq_handle = NULL;
 
 		SpinLockAcquire(&winfo->shared->mutex);
-		slot_no = winfo->shared->logicalrep_worker_slot_no;
 		generation = winfo->shared->logicalrep_worker_generation;
+		slot_no = winfo->shared->logicalrep_worker_slot_no;
 		SpinLockRelease(&winfo->shared->mutex);
 
 		logicalrep_worker_stop_by_slot(slot_no, generation);
@@ -690,7 +689,7 @@ ParallelApplyWorkerMain(Datum main_arg)
 /*
  * Handle receipt of an interrupt indicating a parallel apply worker message.
  *
- * Note: this is called within a signal handler!  All we can do is set a flag
+ * Note: this is called within a signal handler! All we can do is set a flag
  * that will cause the next CHECK_FOR_INTERRUPTS() to invoke
  * HandleParallelApplyMessages().
  */
@@ -777,14 +776,14 @@ HandleParallelApplyMessages(void)
 	/*
 	 * This is invoked from ProcessInterrupts(), and since some of the
 	 * functions it calls contain CHECK_FOR_INTERRUPTS(), there is a potential
-	 * for recursive calls if more signals are received while this runs.  It's
+	 * for recursive calls if more signals are received while this runs. It's
 	 * unclear that recursive entry would be safe, and it doesn't seem useful
 	 * even if it is safe, so let's block interrupts until done.
 	 */
 	HOLD_INTERRUPTS();
 
 	/*
-	 * Moreover, CurrentMemoryContext might be pointing almost anywhere.  We
+	 * Moreover, CurrentMemoryContext might be pointing almost anywhere. We
 	 * don't want to risk leaking data into long-lived contexts, so let's do
 	 * our work here in a private context that we can reset on each use.
 	 */
@@ -925,6 +924,10 @@ pa_init_and_launch_worker(void)
 	bool		launched;
 	ParallelApplyWorkerInfo *winfo;
 
+	/*
+	 * The wroker info can be used for the entire duration of the worker so
+	 * create it in a permanent context.
+	 */
 	oldcontext = MemoryContextSwitchTo(ApplyContext);
 
 	winfo = (ParallelApplyWorkerInfo *) palloc0(sizeof(ParallelApplyWorkerInfo));
@@ -934,7 +937,6 @@ pa_init_and_launch_worker(void)
 	{
 		MemoryContextSwitchTo(oldcontext);
 		pfree(winfo);
-
 		return NULL;
 	}
 
@@ -952,7 +954,6 @@ pa_init_and_launch_worker(void)
 	else
 	{
 		pa_free_worker_info(winfo);
-
 		winfo = NULL;
 	}
 
@@ -1029,7 +1030,11 @@ pa_wait_for_xact_finish(ParallelApplyWorkerShared *wshared)
 	 */
 	pa_wait_for_xact_state(wshared, PARALLEL_TRANS_STARTED);
 
-	/* Wait for the transaction lock to be released. */
+	/*
+	 * Wait for the transaction lock to be released. This is required to detect
+	 * deadlock among leader and parallel apply workers. See comments atop this
+	 * file.
+	 */
 	pa_lock_transaction(wshared->xid, AccessExclusiveLock);
 	pa_unlock_transaction(wshared->xid, AccessExclusiveLock);
 
@@ -1045,16 +1050,19 @@ pa_wait_for_xact_finish(ParallelApplyWorkerShared *wshared)
 }
 
 /*
- * Wait until the parallel apply worker's transaction state reach or exceed the
- * given xact_state.
+ * Wait until the parallel apply worker's transaction state has reached or
+ * exceeded the given xact_state.
  */
 static void
 pa_wait_for_xact_state(ParallelApplyWorkerShared *wshared,
-								   ParallelTransState xact_state)
+					   ParallelTransState xact_state)
 {
 	for (;;)
 	{
-		/* Stop if the transaction state reach or exceed the xact_state. */
+		/*
+		 * Stop if the transaction state has reached or exceeded the given
+		 * xact_state.
+		 */
 		if (pa_get_xact_state(wshared) >= xact_state)
 			break;
 
@@ -1078,7 +1086,7 @@ pa_wait_for_xact_state(ParallelApplyWorkerShared *wshared,
  */
 void
 pa_set_xact_state(ParallelApplyWorkerShared *wshared,
-							  ParallelTransState xact_state)
+				  ParallelTransState xact_state)
 {
 	SpinLockAcquire(&wshared->mutex);
 	wshared->xact_state = xact_state;
@@ -1102,13 +1110,16 @@ pa_get_xact_state(ParallelApplyWorkerShared *wshared)
 }
 
 /*
- * Form the savepoint name for the streaming transaction.
+ * Form a unique savepoint name for the streaming transaction.
+ *
+ * Note that different subscriptions for publications on different nodes can
+ * receive same remote xid, so we need to use subscription id along with it.
  *
  * Return the name in the supplied buffer.
  */
 static void
-pa_savepoint_name(Oid suboid, TransactionId xid,
-							  char *spname, Size szsp)
+pa_savepoint_name(Oid suboid, TransactionId xid, char *spname,
+				  Size szsp)
 {
 	snprintf(spname, szsp, "pg_sp_%u_%u", suboid, xid);
 }
@@ -1131,7 +1142,7 @@ pa_start_subtrans(TransactionId current_xid, TransactionId top_xid)
 		char		spname[NAMEDATALEN];
 
 		pa_savepoint_name(MySubscription->oid, current_xid,
-									  spname, sizeof(spname));
+						  spname, sizeof(spname));
 
 		elog(DEBUG1, "defining savepoint %s in parallel apply worker", spname);
 
@@ -1218,7 +1229,7 @@ pa_stream_abort(LogicalRepStreamAbortData *abort_data)
 		char		spname[NAMEDATALEN];
 
 		pa_savepoint_name(MySubscription->oid, subxid, spname,
-									  sizeof(spname));
+						  sizeof(spname));
 
 		elog(DEBUG1, "rolling back to savepoint %s in parallel apply worker", spname);
 
@@ -1264,16 +1275,19 @@ pa_unlock_stream(TransactionId xid, LOCKMODE lockmode)
 }
 
 /*
- * Helper functions to acquire and release a lock for each local transaction.
+ * Helper functions to acquire and release a lock for each local transaction
+ * apply.
  *
  * Set locktag_field4 to 1 to indicate that it's a transaction lock.
  *
- * Note that all the callers are passing remote transaction ID instead of local
- * transaction ID as xid. This is because the local transaction ID will only be
- * assigned while applying the first change in the parallel apply, but it's
- * possible that the first change in parallel apply worker is blocked by a
- * concurrently executing transaction in another parallel apply worker causing
- * the leader cannot get local transaction ID.
+ * Note that all the callers must pass a remote transaction ID instead of a
+ * local transaction ID as xid. This is because the local transaction ID will
+ * only be assigned while applying the first change in the parallel apply but
+ * it's possible that the first change in the parallel apply worker is blocked
+ * by a concurrently executing transaction in another parallel apply worker. We
+ * can only communicate the local transaction id to the leader after applying
+ * the first change so it won't be able to wait after sending the xact finish
+ * command using this lock.
  */
 void
 pa_lock_transaction(TransactionId xid, LOCKMODE lockmode)
diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c
index a086997f17..6f90d398ef 100644
--- a/src/backend/replication/logical/launcher.c
+++ b/src/backend/replication/logical/launcher.c
@@ -269,7 +269,7 @@ logicalrep_workers_find(Oid subid, bool only_running)
 }
 
 /*
- * Start new background worker, if possible.
+ * Start new logical replication background worker, if possible.
  *
  * Returns true on success, false on failure.
  */
@@ -482,7 +482,8 @@ retry:
 }
 
 /*
- * Internal function to stop the worker and wait for it to die.
+ * Internal function to stop the worker and wait until it detaches from the
+ * slot.
  */
 static void
 logicalrep_worker_stop_internal(LogicalRepWorker *worker)
@@ -564,8 +565,7 @@ logicalrep_worker_stop_internal(LogicalRepWorker *worker)
 }
 
 /*
- * Stop the logical replication worker for subid/relid, if any, and wait until
- * it detaches from the slot.
+ * Stop the logical replication worker for subid/relid, if any.
  */
 void
 logicalrep_worker_stop(Oid subid, Oid relid)
@@ -586,8 +586,7 @@ logicalrep_worker_stop(Oid subid, Oid relid)
 }
 
 /*
- * Stop the logical replication worker corresponding to the input slot number,
- * and wait until it detaches from the slot.
+ * Stop the logical replication worker corresponding to the input slot number.
  */
 void
 logicalrep_worker_stop_by_slot(int slot_no, uint16 generation)
diff --git a/src/backend/replication/logical/origin.c b/src/backend/replication/logical/origin.c
index 80dea12e65..e1fcfa8b34 100644
--- a/src/backend/replication/logical/origin.c
+++ b/src/backend/replication/logical/origin.c
@@ -1079,14 +1079,11 @@ ReplicationOriginExitCleanup(int code, Datum arg)
  * cached value can only be set again after the previous value is torn down
  * with replorigin_session_reset().
  *
- * However, if the function parameter 'acquired_by' is not 0, we allow the
- * process to use the same slot already acquired by another process. It's safe
- * because 1) The only caller (parallel apply workers) will maintain the
- * commit order by allowing only one process to commit at a time, so no two
- * workers will be operating on the same origin at the same time (see comments
- * in logical/worker.c). 2) Even though we try to advance the session's origin
- * concurrently, it's safe to do so as we change/advance the session_origin
- * LSNs under replicate_state LWLock.
+ * However, we do allow multiple processes to point to the same origin slot
+ * if requested by the caller by passing PID of the process that has already
+ * acquired it. This is to allow using the same origin by multiple parallel
+ * apply processes the provided they maintain commit order, for example, by
+ * allowing only one process to commit at a time.
  */
 void
 replorigin_session_setup(RepOriginId node, int acquired_by)
diff --git a/src/backend/replication/logical/tablesync.c b/src/backend/replication/logical/tablesync.c
index 644d5e01cf..e2430fced5 100644
--- a/src/backend/replication/logical/tablesync.c
+++ b/src/backend/replication/logical/tablesync.c
@@ -632,8 +632,9 @@ void
 process_syncing_tables(XLogRecPtr current_lsn)
 {
 	/*
-	 * Skip for parallel apply workers. See pa_can_start() for
-	 * details.
+	 * Skip for parallel apply workers as they don't operate on tables that
+	 * are not in ready state. See pa_can_start() and
+	 * should_apply_changes_for_rel().
 	 */
 	if (am_parallel_apply_worker())
 		return;
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index f09c5c2dce..4bf194facb 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1416,11 +1416,7 @@ apply_handle_stream_start(StringInfo s)
 
 	set_apply_error_context_xact(stream_xid, InvalidXLogRecPtr);
 
-	/*
-	 * For the first stream start, check if there is any free parallel apply
-	 * worker we can use to process this transaction, otherwise try to start a
-	 * new parallel apply worker.
-	 */
+	/* Try to allocate a worker for the streaming transaction. */
 	if (first_segment)
 		pa_start_worker(stream_xid);
 
diff --git a/src/include/replication/worker_internal.h b/src/include/replication/worker_internal.h
index c1f8666f14..565d4d73de 100644
--- a/src/include/replication/worker_internal.h
+++ b/src/include/replication/worker_internal.h
@@ -129,7 +129,6 @@ typedef struct ParallelApplyWorkerShared
 
 	/* Information from the corresponding LogicalRepWorker slot. */
 	uint16		logicalrep_worker_generation;
-
 	int			logicalrep_worker_slot_no;
 
 	/*