From 6c3c7500cf457bc71e66261c3569b150486d626f Mon Sep 17 00:00:00 2001
From: Shlok Kyal <shlok.kyal.oss@gmail.com>
Date: Fri, 23 Aug 2024 14:02:20 +0530
Subject: [PATCH v13 1/2] Distribute invalidatons if change in catalog tables

Distribute invalidations to inprogress transactions if the current
committed transaction change any catalog table.
---
 .../replication/logical/reorderbuffer.c       |   5 +-
 src/backend/replication/logical/snapbuild.c   |  34 ++-
 src/include/replication/reorderbuffer.h       |   4 +
 src/test/subscription/t/100_bugs.pl           | 267 ++++++++++++++++++
 4 files changed, 296 insertions(+), 14 deletions(-)

diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c
index 22bcf171ff..c5dfc1ab06 100644
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -221,9 +221,6 @@ int			debug_logical_replication_streaming = DEBUG_LOGICAL_REP_STREAMING_BUFFERED
  */
 static ReorderBufferTXN *ReorderBufferGetTXN(ReorderBuffer *rb);
 static void ReorderBufferReturnTXN(ReorderBuffer *rb, ReorderBufferTXN *txn);
-static ReorderBufferTXN *ReorderBufferTXNByXid(ReorderBuffer *rb,
-											   TransactionId xid, bool create, bool *is_new,
-											   XLogRecPtr lsn, bool create_as_top);
 static void ReorderBufferTransferSnapToParent(ReorderBufferTXN *txn,
 											  ReorderBufferTXN *subtxn);
 
@@ -622,7 +619,7 @@ ReorderBufferReturnRelids(ReorderBuffer *rb, Oid *relids)
  * (with the given LSN, and as top transaction if that's specified);
  * when this happens, is_new is set to true.
  */
-static ReorderBufferTXN *
+ReorderBufferTXN *
 ReorderBufferTXNByXid(ReorderBuffer *rb, TransactionId xid, bool create,
 					  bool *is_new, XLogRecPtr lsn, bool create_as_top)
 {
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index 0450f94ba8..1f7c24cad0 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -300,7 +300,7 @@ static void SnapBuildFreeSnapshot(Snapshot snap);
 
 static void SnapBuildSnapIncRefcount(Snapshot snap);
 
-static void SnapBuildDistributeNewCatalogSnapshot(SnapBuild *builder, XLogRecPtr lsn);
+static void SnapBuildDistributeSnapshotAndInval(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid);
 
 static inline bool SnapBuildXidHasCatalogChanges(SnapBuild *builder, TransactionId xid,
 												 uint32 xinfo);
@@ -859,18 +859,21 @@ SnapBuildProcessNewCid(SnapBuild *builder, TransactionId xid,
 }
 
 /*
- * Add a new Snapshot to all transactions we're decoding that currently are
- * in-progress so they can see new catalog contents made by the transaction
- * that just committed. This is necessary because those in-progress
- * transactions will use the new catalog's contents from here on (at the very
- * least everything they do needs to be compatible with newer catalog
- * contents).
+ * Add a new Snapshot and invalidation messages to all transactions we're
+ * decoding that currently are in-progress so they can see new catalog contents
+ * made by the transaction that just committed. This is necessary because those
+ * in-progress transactions will use the new catalog's contents from here on
+ * (at the very least everything they do needs to be compatible with newer
+ * catalog contents).
  */
 static void
-SnapBuildDistributeNewCatalogSnapshot(SnapBuild *builder, XLogRecPtr lsn)
+SnapBuildDistributeSnapshotAndInval(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid)
 {
 	dlist_iter	txn_i;
 	ReorderBufferTXN *txn;
+	ReorderBufferTXN *curr_txn;
+
+	curr_txn = ReorderBufferTXNByXid(builder->reorder, xid, false, NULL, InvalidXLogRecPtr, false);
 
 	/*
 	 * Iterate through all toplevel transactions. This can include
@@ -913,6 +916,14 @@ SnapBuildDistributeNewCatalogSnapshot(SnapBuild *builder, XLogRecPtr lsn)
 		SnapBuildSnapIncRefcount(builder->snapshot);
 		ReorderBufferAddSnapshot(builder->reorder, txn->xid, lsn,
 								 builder->snapshot);
+
+		/*
+		 * Add invalidation messages to the reorder buffer of inprogress
+		 * transactions except the current committed transaction
+		 */
+		if (txn->xid != xid && curr_txn->ninvalidations > 0)
+			ReorderBufferAddInvalidations(builder->reorder, txn->xid, lsn,
+										  curr_txn->ninvalidations, curr_txn->invalidations);
 	}
 }
 
@@ -1184,8 +1195,11 @@ SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid,
 		/* refcount of the snapshot builder for the new snapshot */
 		SnapBuildSnapIncRefcount(builder->snapshot);
 
-		/* add a new catalog snapshot to all currently running transactions */
-		SnapBuildDistributeNewCatalogSnapshot(builder, lsn);
+		/*
+		 * add a new catalog snapshot and invalidations messages to all
+		 * currently running transactions
+		 */
+		SnapBuildDistributeSnapshotAndInval(builder, lsn, xid);
 	}
 }
 
diff --git a/src/include/replication/reorderbuffer.h b/src/include/replication/reorderbuffer.h
index e332635f70..093d21213a 100644
--- a/src/include/replication/reorderbuffer.h
+++ b/src/include/replication/reorderbuffer.h
@@ -743,6 +743,10 @@ extern TransactionId *ReorderBufferGetCatalogChangesXacts(ReorderBuffer *rb);
 
 extern void ReorderBufferSetRestartPoint(ReorderBuffer *rb, XLogRecPtr ptr);
 
+extern ReorderBufferTXN *ReorderBufferTXNByXid(ReorderBuffer *rb,
+											   TransactionId xid, bool create, bool *is_new,
+											   XLogRecPtr lsn, bool create_as_top);
+
 extern void StartupReorderBuffer(void);
 
 #endif
diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl
index cb36ca7b16..c581e28261 100644
--- a/src/test/subscription/t/100_bugs.pl
+++ b/src/test/subscription/t/100_bugs.pl
@@ -487,6 +487,273 @@ $result =
 is( $result, qq(2|f
 3|t), 'check replicated update on subscriber');
 
+# Clean up
+$node_publisher->safe_psql('postgres', "DROP  PUBLICATION pub1");
+$node_subscriber->safe_psql('postgres', "DROP  SUBSCRIPTION sub1");
+
+# The bug was that the incremental data synchronization was being skipped when
+# a new table is added to the publication in presence of a concurrent active
+# transaction performing the DML on the same table.
+
+# Initial setup.
+$node_publisher->safe_psql(
+	'postgres', qq(
+	CREATE TABLE tab_conc(a int);
+	CREATE SCHEMA sch3;
+	CREATE TABLE sch3.tab_conc(a int);
+	CREATE PUBLICATION regress_pub1;
+));
+
+$node_subscriber->safe_psql(
+	'postgres', qq(
+	CREATE TABLE tab_conc(a int);
+	CREATE SCHEMA sch3;
+	CREATE TABLE sch3.tab_conc(a int);
+	CREATE SUBSCRIPTION regress_sub1 CONNECTION '$publisher_connstr' PUBLICATION regress_pub1;
+));
+
+# Bump the query timeout to avoid false negatives on slow test systems.
+my $psql_timeout_secs = 4 * $PostgreSQL::Test::Utils::timeout_default;
+
+# Initiate 3 background sessions.
+my $background_psql1 = $node_publisher->background_psql(
+	'postgres',
+	on_error_stop => 0,
+	timeout => $psql_timeout_secs);
+$background_psql1->set_query_timer_restart();
+
+my $background_psql2 = $node_publisher->background_psql(
+	'postgres',
+	on_error_stop => 0,
+	timeout => $psql_timeout_secs);
+
+$background_psql2->set_query_timer_restart();
+
+my $background_psql3 = $node_publisher->background_psql(
+	'postgres',
+	on_error_stop => 0,
+	timeout => $psql_timeout_secs);
+$background_psql3->set_query_timer_restart();
+
+# Maintain an active transaction with the table that will be added to the
+# publication.
+$background_psql1->query_safe(
+	qq(
+	BEGIN;
+	INSERT INTO tab_conc VALUES (1);
+));
+
+# Maintain an active transaction with a schema table that will be added to the
+# publication.
+$background_psql2->query_safe(
+	qq(
+	BEGIN;
+	INSERT INTO sch3.tab_conc VALUES (1);
+));
+
+# Add the table to the publication using background_psql, as the alter
+# publication operation will distribute the invalidations to inprogress txns.
+$background_psql3->query_safe(
+	"ALTER PUBLICATION regress_pub1 ADD TABLE tab_conc, TABLES IN SCHEMA sch3"
+);
+
+# Complete the transaction on the tables.
+$background_psql1->query_safe("COMMIT");
+$background_psql2->query_safe("COMMIT");
+
+$node_publisher->safe_psql(
+	'postgres', qq(
+	INSERT INTO tab_conc VALUES (2);
+	INSERT INTO sch3.tab_conc VALUES (2);
+));
+
+# Refresh the publication.
+$node_subscriber->safe_psql('postgres',
+	"ALTER SUBSCRIPTION regress_sub1 REFRESH PUBLICATION");
+
+$node_subscriber->wait_for_subscription_sync($node_publisher, 'regress_sub1');
+
+$result = $node_subscriber->safe_psql('postgres', "SELECT * FROM tab_conc");
+is( $result, qq(1
+2),
+	'Ensure that the data from the tab_conc table is synchronized to the subscriber after the subscription is refreshed'
+);
+
+$result =
+  $node_subscriber->safe_psql('postgres', "SELECT * FROM sch3.tab_conc");
+is( $result, qq(1
+2),
+	'Ensure that the data from the sch3.tab_conc table is synchronized to the subscriber after the subscription is refreshed'
+);
+
+# Perform an insert.
+$node_publisher->safe_psql(
+	'postgres', qq(
+	INSERT INTO tab_conc VALUES (3);
+	INSERT INTO sch3.tab_conc VALUES (3);
+));
+$node_publisher->wait_for_catchup('regress_sub1');
+
+# Verify that the insert is replicated to the subscriber.
+$result = $node_subscriber->safe_psql('postgres', "SELECT * FROM tab_conc");
+is( $result, qq(1
+2
+3),
+	'Verify that the incremental data for table tab_conc added after table synchronization is replicated to the subscriber'
+);
+
+$result =
+  $node_subscriber->safe_psql('postgres', "SELECT * FROM sch3.tab_conc");
+is( $result, qq(1
+2
+3),
+	'Verify that the incremental data for table sch3.tab_conc added after table synchronization is replicated to the subscriber'
+);
+
+# The bug was that the incremental data synchronization was happening even when
+# tables are dropped from the publication in presence of a concurrent active
+# transaction performing the DML on the same table.
+
+# Maintain an active transaction with the table that will be dropped from the
+# publication.
+$background_psql1->query_safe(
+	qq(
+	BEGIN;
+	INSERT INTO tab_conc VALUES (4);
+));
+
+# Maintain an active transaction with a schema table that will be dropped from the
+# publication.
+$background_psql2->query_safe(
+	qq(
+	BEGIN;
+	INSERT INTO sch3.tab_conc VALUES (4);
+));
+
+# Drop the table from the publication using background_psql, as the alter
+# publication operation will distribute the invalidations to inprogress txns.
+$background_psql3->query_safe(
+	"ALTER PUBLICATION regress_pub1 DROP TABLE tab_conc, TABLES IN SCHEMA sch3"
+);
+
+# Complete the transaction on the tables.
+$background_psql1->query_safe("COMMIT");
+$background_psql2->query_safe("COMMIT");
+
+# Perform an insert.
+$node_publisher->safe_psql(
+	'postgres', qq(
+	INSERT INTO tab_conc VALUES (5);
+	INSERT INTO sch3.tab_conc VALUES (5);
+));
+
+$node_publisher->wait_for_catchup('regress_sub1');
+
+# Verify that the insert is not replicated to the subscriber.
+$result = $node_subscriber->safe_psql('postgres', "SELECT * FROM tab_conc");
+is( $result, qq(1
+2
+3
+4),
+	'Verify that data for table tab_conc are not replicated to subscriber');
+
+$result =
+  $node_subscriber->safe_psql('postgres', "SELECT * FROM sch3.tab_conc");
+is( $result, qq(1
+2
+3
+4),
+	'Verify that the incremental data for table sch3.tab_conc are not replicated to subscriber'
+);
+
+# The bug was that the incremental data synchronization was happening even after
+# publication is dropped in a concurrent active transaction.
+
+# Add tables to the publication.
+$background_psql3->query_safe(
+	"ALTER PUBLICATION regress_pub1 ADD TABLE tab_conc, TABLES IN SCHEMA sch3"
+);
+
+# Maintain an active transaction with the table.
+$background_psql1->query_safe(
+	qq(
+	BEGIN;
+	INSERT INTO tab_conc VALUES (6);
+));
+
+# Maintain an active transaction with a schema table.
+$background_psql2->query_safe(
+	qq(
+	BEGIN;
+	INSERT INTO sch3.tab_conc VALUES (6);
+));
+
+# Drop publication.
+$background_psql3->query_safe("DROP PUBLICATION regress_pub1");
+
+# Perform an insert.
+$background_psql1->query_safe("INSERT INTO tab_conc VALUES (7)");
+$background_psql2->query_safe("INSERT INTO sch3.tab_conc VALUES (7)");
+
+# Complete the transaction on the tables.
+$background_psql1->query_safe("COMMIT");
+$background_psql2->query_safe("COMMIT");
+
+# ERROR should appear on subscriber.
+my $offset = -s $node_subscriber->logfile;
+$node_subscriber->wait_for_log(
+	qr/ERROR:  publication "regress_pub1" does not exist/, $offset);
+
+$node_subscriber->safe_psql('postgres', "DROP SUBSCRIPTION regress_sub1");
+
+# The bug was that the incremental data synchronization was happening even after
+# publication is renamed in a concurrent active transaction.
+
+# Create publication.
+$background_psql3->query_safe(
+	"CREATE PUBLICATION regress_pub1 FOR TABLE tab_conc, TABLES IN SCHEMA sch3"
+);
+
+# Create subscription.
+$node_subscriber->safe_psql('postgres',
+	"CREATE SUBSCRIPTION regress_sub1 CONNECTION '$publisher_connstr' PUBLICATION regress_pub1"
+);
+
+# Maintain an active transaction with the table.
+$background_psql1->query_safe(
+	qq(
+	BEGIN;
+	INSERT INTO tab_conc VALUES (8);
+));
+
+# Maintain an active transaction with a schema table.
+$background_psql2->query_safe(
+	qq(
+	BEGIN;
+	INSERT INTO sch3.tab_conc VALUES (8);
+));
+
+# Rename publication.
+$background_psql3->query_safe(
+	"ALTER PUBLICATION regress_pub1 RENAME TO regress_pub1_rename");
+
+# Perform an insert.
+$background_psql1->query_safe("INSERT INTO tab_conc VALUES (9)");
+$background_psql2->query_safe("INSERT INTO sch3.tab_conc VALUES (9)");
+
+# Complete the transaction on the tables.
+$background_psql1->query_safe("COMMIT");
+$background_psql2->query_safe("COMMIT");
+
+# ERROR should appear on subscriber.
+$offset = -s $node_subscriber->logfile;
+$node_subscriber->wait_for_log(
+	qr/ERROR:  publication "regress_pub1" does not exist/, $offset);
+
+$background_psql1->quit;
+$background_psql2->quit;
+$background_psql3->quit;
+
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-- 
2.34.1

