From 4609f40b97c6c39bc7088f364b17dac84d785c11 Mon Sep 17 00:00:00 2001
From: Shveta Malik <shveta.malik@gmail.com>
Date: Mon, 26 May 2025 11:39:17 +0530
Subject: [PATCH v2] Improve log messages and docs for slotsync

---
 doc/src/sgml/func.sgml                     |  2 +-
 doc/src/sgml/logicaldecoding.sgml          | 31 ++++++++++++++++++++++
 src/backend/replication/logical/slotsync.c |  8 +++---
 3 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index c67688cbf5f..fa7c2b3ab5a 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -29698,7 +29698,7 @@ postgres=# SELECT '0/0'::pg_lsn + pd.segment_number * ps.setting::int + :offset
       </row>
 
       <row>
-       <entry role="func_table_entry"><para role="func_signature">
+       <entry id="pg-logical-slot-get-binary-changes" role="func_table_entry"><para role="func_signature">
         <indexterm>
          <primary>pg_logical_slot_get_binary_changes</primary>
         </indexterm>
diff --git a/doc/src/sgml/logicaldecoding.sgml b/doc/src/sgml/logicaldecoding.sgml
index 3f2bcd45a1e..f25a9fcbdd5 100644
--- a/doc/src/sgml/logicaldecoding.sgml
+++ b/doc/src/sgml/logicaldecoding.sgml
@@ -398,6 +398,37 @@ postgres=# select * from pg_logical_slot_get_changes('regression_slot', NULL, NU
      receiving the WAL up to the latest flushed position on the primary server.
     </para>
 
+    <para>
+     When slot-synchronization setup is done as recommended, and
+     slot-synchronization is performed the very first time either automatically
+     or by <link linkend="pg-sync-replication-slots">
+     <function>pg_sync_replication_slots</function></link>
+     then for the synchronized slot to be created and persisted on the standby, one
+     condition must be met. The logical replication slot on primary must be advanced
+     to such a catalog change position (catalog_xmin) and WAL's LSN (restart_lsn) for
+     which sufficient data is retained on the corresponding standby server. This is
+     needed to prevent any data loss and to allow logical replication to continue
+     seamlessly through the synchronized slot if needed after promotion.
+     If the primary slot is still lagging behind and synchronization is attempted
+     for the first time, then to prevent the data loss as explained, persistence
+     and synchronization of newly created slot will be skipped, and the following
+     log message may appear on standby.
+<programlisting>
+     LOG: could not synchronize replication slot "failover_slot" to prevent data loss
+     DETAIL:  The remote slot needs WAL at LSN 0/3003F28 and catalog xmin 754, but the standby has LSN 0/3003F28 and catalog xmin 766.
+</programlisting>
+     If the logical replication slot is actively consumed by a consumer, no further
+     manual action is needed by the user, as the slot on primary will be advanced
+     automatically, and synchronization will proceed in the next cycle. However,
+     if no logical replication consumer is set up yet, to advance the slot, it
+     is recommended to manually run the <link linkend="pg-logical-slot-get-changes">
+     <function>pg_logical_slot_get_changes</function></link> or
+     <link linkend="pg-logical-slot-get-binary-changes">
+     <function>pg_logical_slot_get_binary_changes</function></link> on the primary
+     slot and allow synchronization to proceed.
+    </para>
+
+
     <para>
      The ability to resume logical replication after failover depends upon the
      <link linkend="view-pg-replication-slots">pg_replication_slots</link>.<structfield>synced</structfield>
diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c
index 656e66e0ae0..ef4a60ce189 100644
--- a/src/backend/replication/logical/slotsync.c
+++ b/src/backend/replication/logical/slotsync.c
@@ -211,9 +211,9 @@ update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,
 		 * impact the users, so we used DEBUG1 level to log the message.
 		 */
 		ereport(slot->data.persistency == RS_TEMPORARY ? LOG : DEBUG1,
-				errmsg("could not synchronize replication slot \"%s\" because remote slot precedes local slot",
+				errmsg("could not synchronize replication slot \"%s\" to prevent data loss",
 					   remote_slot->name),
-				errdetail("The remote slot has LSN %X/%X and catalog xmin %u, but the local slot has LSN %X/%X and catalog xmin %u.",
+				errdetail("The remote slot needs WAL at LSN %X/%X and catalog xmin %u, but the standby has LSN %X/%X and catalog xmin %u.",
 						  LSN_FORMAT_ARGS(remote_slot->restart_lsn),
 						  remote_slot->catalog_xmin,
 						  LSN_FORMAT_ARGS(slot->data.restart_lsn),
@@ -592,8 +592,8 @@ update_and_persist_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid)
 	if (!found_consistent_snapshot)
 	{
 		ereport(LOG,
-				errmsg("could not synchronize replication slot \"%s\"", remote_slot->name),
-				errdetail("Logical decoding could not find consistent point from local slot's LSN %X/%X.",
+				errmsg("could not synchronize replication slot \"%s\" to prevent data loss", remote_slot->name),
+				errdetail("Standby does not have enough data to decode WALs at LSN %X/%X.",
 						  LSN_FORMAT_ARGS(slot->data.restart_lsn)));
 
 		return false;
-- 
2.34.1

