From f0c2cbd3deec552b1dde078e1801a8f48dce3d6d Mon Sep 17 00:00:00 2001
From: Ajin Cherian <itsajin@gmail.com>
Date: Fri, 8 May 2026 13:00:10 +1000
Subject: [PATCH v4 2/2] Preserve replication origin OIDs during pg_upgrade

When pg_upgrade migrates a subscriber, replication origin OIDs
(roident) can change across the upgrade. This is a problem because
commit-timestamp records embed roident and are copied directly from
the old cluster's pg_commit_ts directory, causing spurious
"update_origin_differs" conflicts after the upgrade.

Fix this by dumping replication origins as global objects via
pg_dumpall during binary upgrade, using a new function
binary_upgrade_create_replication_origin(oid, name, lsn) to recreate
each origin with its preserved roident and remote_lsn. To avoid
conflicts with this, CreateSubscription() skips replorigin_create()
in binary-upgrade mode since the origin is already created by the
time the subscription is restored.

Author: Ajin Cherian <itsajin@gmail.com>
---
 src/backend/commands/subscriptioncmds.c    | 11 +++-
 src/backend/replication/logical/origin.c   | 72 ++++++++++++++++++++++
 src/backend/utils/adt/pg_upgrade_support.c | 33 ++++++++++
 src/bin/pg_dump/pg_dump.c                  | 38 +++---------
 src/bin/pg_dump/pg_dumpall.c               | 64 +++++++++++++++++++
 src/bin/pg_upgrade/check.c                 | 11 ++--
 src/bin/pg_upgrade/info.c                  |  9 +++
 src/bin/pg_upgrade/pg_upgrade.h            |  1 +
 src/bin/pg_upgrade/t/004_subscription.pl   | 42 ++++++++++++-
 src/include/catalog/pg_proc.dat            |  4 ++
 src/include/replication/origin.h           |  2 +
 11 files changed, 246 insertions(+), 41 deletions(-)

diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c
index 87a1b5c4bc2..677ddab19a9 100644
--- a/src/backend/commands/subscriptioncmds.c
+++ b/src/backend/commands/subscriptioncmds.c
@@ -19,6 +19,7 @@
 #include "access/table.h"
 #include "access/twophase.h"
 #include "access/xact.h"
+#include "catalog/binary_upgrade.h"
 #include "catalog/catalog.h"
 #include "catalog/dependency.h"
 #include "catalog/indexing.h"
@@ -867,9 +868,15 @@ CreateSubscription(ParseState *pstate, CreateSubscriptionStmt *stmt,
 	 * apply workers initialization, and to handle origin creation dynamically
 	 * when tables are added to the subscription. It is not clear whether
 	 * preventing creation of origins is worth additional complexity.
+	 *
+	 * In binary-upgrade mode, skip origin creation here. This is required to
+	 * preserve the roident from the old cluster for this subscription.
 	 */
-	ReplicationOriginNameForLogicalRep(subid, InvalidOid, originname, sizeof(originname));
-	replorigin_create(originname);
+	if (!IsBinaryUpgrade)
+	{
+		ReplicationOriginNameForLogicalRep(subid, InvalidOid, originname, sizeof(originname));
+		replorigin_create(originname);
+	}
 
 	/*
 	 * Connect to remote side to execute requested commands and fetch table
diff --git a/src/backend/replication/logical/origin.c b/src/backend/replication/logical/origin.c
index c9dfb094c2b..bb05e29497a 100644
--- a/src/backend/replication/logical/origin.c
+++ b/src/backend/replication/logical/origin.c
@@ -379,6 +379,78 @@ replorigin_create(const char *roname)
 	return roident;
 }
 
+/*
+ * TODO: unify with replorigin_create().
+ *
+ * XXX: Should we put this function in pg_upgrade_support.c, because it's
+ * usable only while upgrading?
+ */
+ReplOriginId
+replorigin_create_with_reploriginid(ReplOriginId node, const char *roname)
+{
+	HeapTuple	tuple = NULL;
+	Relation	rel;
+	Datum		roname_d;
+	SysScanDesc scan;
+	ScanKeyData key;
+	bool		nulls[Natts_pg_replication_origin];
+	Datum		values[Natts_pg_replication_origin];
+	bool		collides;
+
+	Assert(node != InvalidReplOriginId);
+
+	/*
+	 * To avoid needing a TOAST table for pg_replication_origin, we limit
+	 * replication origin names to 512 bytes.  This should be more than enough
+	 * for all practical use.
+	 */
+	if (strlen(roname) > MAX_RONAME_LEN)
+		ereport(ERROR,
+				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+				 errmsg("replication origin name is too long"),
+				 errdetail("Replication origin names must be no longer than %d bytes.",
+						   MAX_RONAME_LEN)));
+
+	roname_d = CStringGetTextDatum(roname);
+
+	Assert(IsTransactionState());
+
+	rel = table_open(ReplicationOriginRelationId, RowExclusiveLock);
+
+	Assert(!OidIsValid(rel->rd_rel->reltoastrelid));
+
+	ScanKeyInit(&key,
+				Anum_pg_replication_origin_roident,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(node));
+	scan = systable_beginscan(rel, ReplicationOriginIdentIndex,
+							  true /* indexOK */ ,
+							  SnapshotSelf,
+							  1, &key);
+	collides = HeapTupleIsValid(systable_getnext(scan));
+
+	if (collides)
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("replication origin ID %d already exists", node)));
+
+	systable_endscan(scan);
+
+	memset(&nulls, 0, sizeof(nulls));
+
+	values[Anum_pg_replication_origin_roident - 1] = ObjectIdGetDatum(node);
+	values[Anum_pg_replication_origin_roname - 1] = roname_d;
+
+	tuple = heap_form_tuple(RelationGetDescr(rel), values, nulls);
+	CatalogTupleInsert(rel, tuple);
+	CommandCounterIncrement();
+
+	table_close(rel, RowExclusiveLock);
+
+	heap_freetuple(tuple);
+	return node;
+}
+
 /*
  * Helper function to drop a replication origin.
  */
diff --git a/src/backend/utils/adt/pg_upgrade_support.c b/src/backend/utils/adt/pg_upgrade_support.c
index 59c3e7f0146..5358c06e228 100644
--- a/src/backend/utils/adt/pg_upgrade_support.c
+++ b/src/backend/utils/adt/pg_upgrade_support.c
@@ -445,3 +445,36 @@ binary_upgrade_create_conflict_detection_slot(PG_FUNCTION_ARGS)
 
 	PG_RETURN_VOID();
 }
+
+Datum
+binary_upgrade_create_replication_origin(PG_FUNCTION_ARGS)
+{
+	ReplOriginId	node;
+	Name			originname;
+
+	CHECK_IS_BINARY_UPGRADE;
+
+	/* NULL node id and origin name are not allowed */
+	if (PG_ARGISNULL(0) ||
+		PG_ARGISNULL(1))
+		elog(ERROR, "null argument to binary_upgrade_create_replication_origin is not allowed");
+
+	node = PG_GETARG_OID(0);
+	originname = PG_GETARG_NAME(1);
+
+	replorigin_create_with_reploriginid(node, NameStr(*originname));
+
+	if (!PG_ARGISNULL(2))
+	{
+		XLogRecPtr		remote_commit = PG_GETARG_LSN(2);
+
+		/* Lock to prevent the replication origin from vanishing */
+		LockRelationOid(ReplicationOriginRelationId, RowExclusiveLock);
+		replorigin_advance(node, remote_commit, InvalidXLogRecPtr,
+						   false /* backward */ ,
+						   false /* WAL log */ );
+		UnlockRelationOid(ReplicationOriginRelationId, RowExclusiveLock);
+	}
+
+	PG_RETURN_VOID();
+}
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 452d0b5e98a..a5fb2b42c3d 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -5668,37 +5668,15 @@ dumpSubscription(Archive *fout, const SubscriptionInfo *subinfo)
 	 * In binary-upgrade mode, we allow the replication to continue after the
 	 * upgrade.
 	 */
-	if (dopt->binary_upgrade && fout->remoteVersion >= 170000)
+	if (dopt->binary_upgrade && subinfo->subenabled && fout->remoteVersion >= 170000)
 	{
-		if (subinfo->suboriginremotelsn)
-		{
-			/*
-			 * Preserve the remote_lsn for the subscriber's replication
-			 * origin. This value is required to start the replication from
-			 * the position before the upgrade. This value will be stale if
-			 * the publisher gets upgraded before the subscriber node.
-			 * However, this shouldn't be a problem as the upgrade of the
-			 * publisher ensures that all the transactions were replicated
-			 * before upgrading it.
-			 */
-			appendPQExpBufferStr(query,
-								 "\n-- For binary upgrade, must preserve the remote_lsn for the subscriber's replication origin.\n");
-			appendPQExpBufferStr(query,
-								 "SELECT pg_catalog.binary_upgrade_replorigin_advance(");
-			appendStringLiteralAH(query, subinfo->dobj.name, fout);
-			appendPQExpBuffer(query, ", '%s');\n", subinfo->suboriginremotelsn);
-		}
-
-		if (subinfo->subenabled)
-		{
-			/*
-			 * Enable the subscription to allow the replication to continue
-			 * after the upgrade.
-			 */
-			appendPQExpBufferStr(query,
-								 "\n-- For binary upgrade, must preserve the subscriber's running state.\n");
-			appendPQExpBuffer(query, "ALTER SUBSCRIPTION %s ENABLE;\n", qsubname);
-		}
+		/*
+		 * Enable the subscription to allow the replication to continue
+		 * after the upgrade.
+		 */
+		appendPQExpBufferStr(query,
+							 "\n-- For binary upgrade, must preserve the subscriber's running state.\n");
+		appendPQExpBuffer(query, "ALTER SUBSCRIPTION %s ENABLE;\n", qsubname);
 	}
 
 	if (subinfo->dobj.dump & DUMP_COMPONENT_DEFINITION)
diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c
index c1f43113c53..9e0af4b1474 100644
--- a/src/bin/pg_dump/pg_dumpall.c
+++ b/src/bin/pg_dump/pg_dumpall.c
@@ -25,6 +25,7 @@
 #include <time.h>
 #include <unistd.h>
 
+#include "access/xlogdefs.h"
 #include "catalog/pg_authid_d.h"
 #include "common/connect.h"
 #include "common/file_perm.h"
@@ -76,6 +77,7 @@ static void dropDBs(PGconn *conn);
 static void dumpUserConfig(PGconn *conn, const char *username);
 static void dumpDatabases(PGconn *conn);
 static void dumpTimestamp(const char *msg);
+static void dumpReplicationOrigins(PGconn *conn);
 static int	runPgDump(const char *dbname, const char *create_opts, char *dbfile);
 static void buildShSecLabels(PGconn *conn,
 							 const char *catalog_name, Oid objectId,
@@ -813,6 +815,10 @@ main(int argc, char *argv[])
 			/* Dump role GUC privileges */
 			if (server_version >= 150000 && !skip_acls)
 				dumpRoleGUCPrivs(conn);
+
+			/* Dump replication origins */
+			if (server_version >= 170000 && binary_upgrade && archDumpFormat == archNull)
+				dumpReplicationOrigins(conn);
 		}
 
 		/* Dump tablespaces */
@@ -2339,6 +2345,64 @@ dumpTimestamp(const char *msg)
 		fprintf(OPF, "-- %s %s\n\n", msg, buf);
 }
 
+static void
+dumpReplicationOrigins(PGconn *conn)
+{
+	PQExpBuffer buf = createPQExpBuffer();
+	PGresult   *res;
+	int			i_roident;
+	int			i_roname;
+	int			i_remotelsn;
+
+	/* Get replication origins in current database. */
+	appendPQExpBufferStr(buf,
+						 "SELECT o.*, os.remote_lsn "
+						 "FROM pg_catalog.pg_replication_origin o "
+						 "LEFT OUTER JOIN pg_catalog.pg_replication_origin_status os ON o.roident = os.local_id ");
+
+	res = executeQuery(conn, buf->data);
+
+	i_roident = PQfnumber(res, "roident");
+	i_roname = PQfnumber(res, "roname");
+	i_remotelsn = PQfnumber(res, "remote_lsn");
+
+	if (PQntuples(res) > 0 && archDumpFormat == archNull)
+		fprintf(OPF, "--\n-- Replication Origins \n--\n\n");
+
+	for (int i = 0; i < PQntuples(res); i++)
+	{
+		ReplOriginId roident;
+		const char *roname;
+
+		roident = atooid(PQgetvalue(res, i, i_roident));
+		roname = PQgetvalue(res, i, i_roname);
+
+		resetPQExpBuffer(buf);
+
+		appendPQExpBufferStr(buf, "\n-- For binary upgrade, must preserve replication origin roident and remote_lsn\n");
+		appendPQExpBuffer(buf,
+			"SELECT pg_catalog.binary_upgrade_create_replication_origin("
+			"'%u'::pg_catalog.oid, ", roident);
+		appendStringLiteralConn(buf, roname, conn);
+		appendPQExpBufferStr(buf, "::pg_catalog.name");
+
+		if (!PQgetisnull(res, i, i_remotelsn))
+		{
+			appendPQExpBufferStr(buf, ", ");
+			appendStringLiteralConn(buf, PQgetvalue(res, i, i_remotelsn), conn);
+			appendPQExpBufferStr(buf, "::pg_catalog.pg_lsn");
+		}
+		else
+			appendPQExpBufferStr(buf, ", NULL");
+
+		appendPQExpBufferStr(buf, ");\n");
+		fprintf(OPF, "%s", buf->data);
+	}
+
+	PQclear(res);
+	destroyPQExpBuffer(buf);
+}
+
 /*
  * read_dumpall_filters - retrieve database identifier patterns from file
  *
diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c
index 5a7afe62eab..0f34c5ab294 100644
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -2306,8 +2306,7 @@ check_new_cluster_replication_slots(void)
  * check_new_cluster_subscription_configuration()
  *
  * Verify that the max_active_replication_origins configuration specified is
- * enough for creating the subscriptions. This is required to create the
- * replication origin for each subscription.
+ * enough for creating all the replication origins.
  */
 static void
 check_new_cluster_subscription_configuration(void)
@@ -2321,7 +2320,7 @@ check_new_cluster_subscription_configuration(void)
 		return;
 
 	/* Quick return if there are no subscriptions to be migrated. */
-	if (old_cluster.nsubs == 0)
+	if (old_cluster.nrepl_origins == 0)
 		return;
 
 	prep_status("Checking for new cluster configuration for subscriptions");
@@ -2335,10 +2334,10 @@ check_new_cluster_subscription_configuration(void)
 		pg_fatal("could not determine parameter settings on new cluster");
 
 	max_active_replication_origins = atoi(PQgetvalue(res, 0, 0));
-	if (old_cluster.nsubs > max_active_replication_origins)
+	if (old_cluster.nrepl_origins > max_active_replication_origins)
 		pg_fatal("\"max_active_replication_origins\" (%d) must be greater than or equal to the number of "
-				 "subscriptions (%d) on the old cluster",
-				 max_active_replication_origins, old_cluster.nsubs);
+				 "replication origins (%d) on the old cluster",
+				 max_active_replication_origins, old_cluster.nrepl_origins);
 
 	PQclear(res);
 	PQfinish(conn);
diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c
index 37fff93892f..630f3f06e24 100644
--- a/src/bin/pg_upgrade/info.c
+++ b/src/bin/pg_upgrade/info.c
@@ -843,6 +843,7 @@ get_subscription_info(ClusterInfo *cluster)
 	PGconn	   *conn;
 	PGresult   *res;
 	int			i_nsub;
+	int			i_nrepl_origins;
 	int			i_retain_dead_tuples;
 
 	conn = connectToServer(cluster, "template1");
@@ -862,6 +863,14 @@ get_subscription_info(ClusterInfo *cluster)
 	cluster->sub_retain_dead_tuples = (strcmp(PQgetvalue(res, 0, i_retain_dead_tuples), "t") == 0);
 
 	PQclear(res);
+
+	res = executeQueryOrDie(conn,
+							"SELECT count(*) AS nrepl_origins "
+							"FROM pg_catalog.pg_replication_origin");
+	i_nrepl_origins = PQfnumber(res, "nrepl_origins");
+	cluster->nrepl_origins = atoi(PQgetvalue(res, 0, i_nrepl_origins));
+	PQclear(res);
+
 	PQfinish(conn);
 }
 
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 1d767bbda2d..1d15700a886 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -311,6 +311,7 @@ typedef struct
 	int			num_tablespaces;
 	const char *tablespace_suffix;	/* directory specification */
 	int			nsubs;			/* number of subscriptions */
+	int			nrepl_origins;	/* number of replication origins */
 	bool		sub_retain_dead_tuples; /* whether a subscription enables
 										 * retain_dead_tuples. */
 } ClusterInfo;
diff --git a/src/bin/pg_upgrade/t/004_subscription.pl b/src/bin/pg_upgrade/t/004_subscription.pl
index ecaa8e1e075..9e3902e3f79 100644
--- a/src/bin/pg_upgrade/t/004_subscription.pl
+++ b/src/bin/pg_upgrade/t/004_subscription.pl
@@ -42,7 +42,7 @@ my $connstr = $publisher->connstr . ' dbname=postgres';
 
 # ------------------------------------------------------
 # Check that pg_upgrade fails when max_active_replication_origins configured
-# in the new cluster is less than the number of subscriptions in the old
+# in the new cluster is less than the number of replication origins in the old
 # cluster.
 # ------------------------------------------------------
 # It is sufficient to use disabled subscription to test upgrade failure.
@@ -74,7 +74,7 @@ command_checks_all(
 	],
 	1,
 	[
-		qr/"max_active_replication_origins" \(0\) must be greater than or equal to the number of subscriptions \(1\) on the old cluster/
+		qr/"max_active_replication_origins" \(0\) must be greater than or equal to the number of replication origins \(1\) on the old cluster/
 	],
 	[qr//],
 	'run of pg_upgrade where the new cluster has insufficient max_active_replication_origins'
@@ -301,8 +301,30 @@ is($result, qq(t), "Check that the table is in init state");
 
 # Get the replication origin's remote_lsn of the old subscriber
 my $remote_lsn = $old_sub->safe_psql('postgres',
-	"SELECT remote_lsn FROM pg_replication_origin_status os, pg_subscription s WHERE os.external_id = 'pg_' || s.oid AND s.subname = 'regress_sub4'"
+    "SELECT os.remote_lsn
+     FROM pg_replication_origin_status os
+     JOIN pg_replication_origin o ON o.roident = os.local_id
+     JOIN pg_subscription s ON o.roname = 'pg_' || s.oid::text
+     WHERE s.subname = 'regress_sub4'"
 );
+
+# Get the replication origin OIDs (roident) for all subscriptions, keyed by
+# subscription name (which is stable across upgrade, unlike suboid). These
+# must be preserved after upgrade. A mismatch would cause spurious
+# update_origin_differs conflicts.
+my %pre_upgrade_roident;
+my $roident_rows = $old_sub->safe_psql('postgres',
+    "SELECT s.subname, o.roident
+     FROM pg_subscription s
+     JOIN pg_replication_origin o ON o.roname = 'pg_' || s.oid::text
+     ORDER BY s.subname"
+);
+for my $row (split /\n/, $roident_rows)
+{
+    my ($subname, $roident) = split /\|/, $row;
+    $pre_upgrade_roident{$subname} = $roident;
+}
+
 # Have the subscription in disabled state before upgrade
 $old_sub->safe_psql('postgres', "ALTER SUBSCRIPTION regress_sub5 DISABLE");
 
@@ -378,6 +400,20 @@ regress_sub5|f|f|f),
 	"check that the subscription's running status, failover, and retain_dead_tuples are preserved"
 );
 
+# Verify that replication origin OIDs are preserved after upgrade.
+my $post_roident_rows = $new_sub->safe_psql('postgres',
+    "SELECT s.subname, o.roident
+     FROM pg_subscription s
+     JOIN pg_replication_origin o ON o.roname = 'pg_' || s.oid::text
+     ORDER BY s.subname"
+);
+for my $row (split /\n/, $post_roident_rows)
+{
+    my ($subname, $roident) = split /\|/, $row;
+    is($roident, $pre_upgrade_roident{$subname},
+        "roident preserved for subscription '$subname' after upgrade");
+}
+
 # Subscription relations should be preserved
 $result = $new_sub->safe_psql('postgres',
 	"SELECT srrelid, srsubstate FROM pg_subscription_rel ORDER BY srrelid");
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 2f98473b1cb..71fb1e70465 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -12028,6 +12028,10 @@
   proname => 'binary_upgrade_set_next_pg_subscription_oid', provolatile => 'v',
   proparallel => 'r', prorettype => 'void', proargtypes => 'oid',
   prosrc => 'binary_upgrade_set_next_pg_subscription_oid' },
+{ oid => '9161', descr => 'for use by pg_upgrade (replication origin)',
+  proname => 'binary_upgrade_create_replication_origin', proisstrict => 'f',
+  provolatile => 'v', proparallel => 'r', prorettype => 'void',
+  proargtypes => 'oid name pg_lsn', prosrc => 'binary_upgrade_create_replication_origin' },
 
 # conversion functions
 { oid => '4310', descr => 'internal conversion function for KOI8R to WIN1251',
diff --git a/src/include/replication/origin.h b/src/include/replication/origin.h
index a69faf6eaaf..ac2fd824469 100644
--- a/src/include/replication/origin.h
+++ b/src/include/replication/origin.h
@@ -55,6 +55,8 @@ extern PGDLLIMPORT int max_active_replication_origins;
 /* API for querying & manipulating replication origins */
 extern ReplOriginId replorigin_by_name(const char *roname, bool missing_ok);
 extern ReplOriginId replorigin_create(const char *roname);
+extern ReplOriginId replorigin_create_with_reploriginid(ReplOriginId node,
+														const char *roname);
 extern void replorigin_drop_by_name(const char *name, bool missing_ok, bool nowait);
 extern bool replorigin_by_oid(ReplOriginId roident, bool missing_ok,
 							  char **roname);
-- 
2.47.3

