Hi Hackers, I noticed that pg_createsubscriber sets recovery target params for correct recovery before converting a physical replica to a logical one but does not reset them afterward. It can lead to recovery failures in certain scenarios. For example, if recovery begins from a checkpoint where no WAL records need to be applied, the system might incorrectly determine that the recovery target was never reached because these parameters remain active.
I’ve attached a TAP test to reproduce the issue. The proposed patch ensures all recovery parameters are reset after conversion to prevent such edge cases. I would appreciate any feedback. -- Regards, Alyona Vinter
v1-0001-TAP-test-recovery_fails_after_pg_createsubscriber.pl
Description: Perl program
diff --git a/src/bin/pg_basebackup/pg_createsubscriber.c b/src/bin/pg_basebackup/pg_createsubscriber.c
index 025b893a41e..16fa0f707f0 100644
--- a/src/bin/pg_basebackup/pg_createsubscriber.c
+++ b/src/bin/pg_basebackup/pg_createsubscriber.c
@@ -99,6 +99,8 @@ static void setup_subscriber(struct LogicalRepInfo *dbinfo,
const char *consistent_lsn);
static void setup_recovery(const struct LogicalRepInfo *dbinfo, const char *datadir,
const char *lsn);
+static void reset_recovery_params(const struct LogicalRepInfo *dbinfo,
+ const char *datadir);
static void drop_primary_replication_slot(struct LogicalRepInfo *dbinfo,
const char *slotname);
static void drop_failover_replication_slots(struct LogicalRepInfo *dbinfo);
@@ -1276,6 +1278,54 @@ setup_recovery(const struct LogicalRepInfo *dbinfo, const char *datadir, const c
pg_log_debug("recovery parameters:\n%s", recoveryconfcontents->data);
}
+/*
+ * Reset the earlier set recovery parameters.
+ */
+static void
+reset_recovery_params(const struct LogicalRepInfo *dbinfo, const char *datadir)
+{
+ PGconn *conn;
+ PGresult *res;
+
+ const char *recovery_params[] = {
+ "recovery_target",
+ "recovery_target_timeline",
+ "recovery_target_inclusive",
+ "recovery_target_action",
+ "recovery_target_name",
+ "recovery_target_time",
+ "recovery_target_xid",
+ "recovery_target_lsn",
+ NULL /* sentinel */
+ };
+
+ conn = connect_database(dbinfo[0].subconninfo, true);
+
+ for (int i = 0; recovery_params[i] != NULL; i++)
+ {
+ char sql[64];
+
+ /*
+ * ALTER SYSTEM RESET does not support prepared statements so we use it
+ * in the old way with sprintf
+ */
+ snprintf(sql, sizeof(sql), "ALTER SYSTEM RESET %s", recovery_params[i]);
+
+ res = PQexec(conn, sql);
+ if (PQresultStatus(res) != PGRES_COMMAND_OK) {
+ pg_log_error("Prepared execution failed: %s\n", PQerrorMessage(conn));
+ PQclear(res);
+ disconnect_database(conn, true);
+ exit(1);
+ }
+ PQclear(res);
+ }
+
+ disconnect_database(conn, false);
+
+ pg_log_debug("recovery parameters were reset");
+}
+
/*
* Drop physical replication slot on primary if the standby was using it. After
* the transformation, it has no use.
@@ -2445,6 +2495,9 @@ main(int argc, char **argv)
/* Remove failover replication slots if they exist on subscriber */
drop_failover_replication_slots(dbinfos.dbinfo);
+ /* Reset recovery parameters */
+ reset_recovery_params(dbinfos.dbinfo, subscriber_dir);
+
/* Stop the subscriber */
pg_log_info("stopping the subscriber");
stop_standby_server(subscriber_dir);
