From 1aaa1cf9379ae1961a05f417c7bf6553bf789cf6 Mon Sep 17 00:00:00 2001
From: Shlok Kyal <shlok.kyal.oss@gmail.com>
Date: Tue, 5 Nov 2024 11:11:25 +0530
Subject: [PATCH v4] Disallow UPDATE/DELETE on table with generated column as
 REPLICA IDENTITY

UPDATE/DELETE on table having unpublished generated column as REPLICA
IDENTITY is allowed. UPDATE/DELETE on such tables should not be allowed
---
 src/backend/commands/publicationcmds.c    | 102 ++++++++++++++++++++++
 src/backend/executor/execReplication.c    |  12 +++
 src/backend/utils/cache/relcache.c        |  12 +++
 src/include/catalog/pg_publication.h      |   6 ++
 src/include/commands/publicationcmds.h    |   2 +
 src/test/regress/expected/publication.out |  25 ++++++
 src/test/regress/sql/publication.sql      |  26 ++++++
 src/test/subscription/t/100_bugs.pl       |  16 +---
 8 files changed, 188 insertions(+), 13 deletions(-)

diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index 0129db18c6..a095c61edc 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -444,6 +444,108 @@ pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestor
 	return result;
 }
 
+/*
+ * Check if REPLICA IDENTITY consists of any unpublished generated column.
+ *
+ * Returns true if any replica identity column is an unpublished generated column.
+ */
+bool
+replident_has_unpublished_gen_col(Oid pubid, Relation relation, List *ancestors,
+								  bool pubviaroot)
+{
+	Oid			relid = RelationGetRelid(relation);
+	Oid			publish_as_relid = RelationGetRelid(relation);
+	bool		result = false;
+	bool		found;
+	Publication *pub;
+
+	/*
+	 * For a partition, if pubviaroot is true, find the topmost ancestor that
+	 * is published via this publication as we need to use its column list for
+	 * the changes.
+	 *
+	 * Note that even though the column list used is for an ancestor, the
+	 * REPLICA IDENTITY used will be for the actual child table.
+	 */
+	if (pubviaroot && relation->rd_rel->relispartition)
+	{
+		publish_as_relid = GetTopMostAncestorInPublication(pubid, ancestors, NULL);
+
+		if (!OidIsValid(publish_as_relid))
+			publish_as_relid = relid;
+	}
+
+	pub = GetPublication(pubid);
+	found = check_and_fetch_column_list(pub, publish_as_relid, NULL, NULL);
+
+	if (!found)
+	{
+		int			x;
+		Bitmapset  *idattrs = NULL;
+
+		/*
+		 * REPLICA IDENTITY can be FULL only if there is no column list for
+		 * publication. If REPLICA IDENTITY is set as FULL and relation has a
+		 * generated column we should error out.
+		 */
+		if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL &&
+			relation->rd_att->constr && relation->rd_att->constr->has_generated_stored)
+			result = true;
+
+		idattrs = RelationGetIndexAttrBitmap(relation,
+											 INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+		x = -1;
+
+		/* Check if any REPLICA IDENTITY column is an generated column. */
+		while ((x = bms_next_member(idattrs, x)) >= 0)
+		{
+			AttrNumber	attnum = (x + FirstLowInvalidHeapAttributeNumber);
+			char		attgenerated;
+
+			/*
+			 * If pubviaroot is true, we are validating the column list of the
+			 * parent table, but the bitmap contains the replica identity
+			 * information of the child table. The parent/child attnums may
+			 * not match, so translate them to the parent - get the attname
+			 * from the child, and look it up in the parent.
+			 */
+			if (pubviaroot && relation->rd_rel->relispartition)
+			{
+				/* attribute name in the child table */
+				char	   *colname = get_attname(relid, attnum, false);
+
+				/*
+				 * Determine the attnum for the attribute name in parent (we
+				 * are using the column list defined on the parent).
+				 */
+				attnum = get_attnum(publish_as_relid, colname);
+				attgenerated = get_attgenerated(publish_as_relid, attnum);
+			}
+			else
+				attgenerated = get_attgenerated(relid, attnum);
+
+			/*
+			 * Check if the column is a generated column.
+			 *
+			 * 'publish_generated_columns = false' and no column list is
+			 * specified for publication. So if the column is a generated
+			 * column, this implies that the REPLICA IDENTITY consists an
+			 * unpublished generated column.
+			 */
+			if (attgenerated == ATTRIBUTE_GENERATED_STORED)
+			{
+				result = true;
+				break;
+			}
+		}
+
+		bms_free(idattrs);
+	}
+
+	return result;
+}
+
 /* check_functions_in_node callback */
 static bool
 contain_mutable_or_user_functions_checker(Oid func_id, void *context)
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 54025c9f15..d3517db8b3 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -809,6 +809,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot update table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_UPDATE && !pubdesc.replident_has_valid_gen_cols)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot update table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("replica identity consists of an unpublished generated column.")));
 	else if (cmd == CMD_DELETE && !pubdesc.rf_valid_for_delete)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
@@ -821,6 +827,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot delete from table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_DELETE && !pubdesc.replident_has_valid_gen_cols)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot delete from table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("replica identity consists of an unpublished generated column.")));
 
 	/* If relation has replica identity we are always good. */
 	if (OidIsValid(RelationGetReplicaIndex(rel)))
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 342467fd18..02d7fba160 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -5736,6 +5736,7 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		pubdesc->rf_valid_for_delete = true;
 		pubdesc->cols_valid_for_update = true;
 		pubdesc->cols_valid_for_delete = true;
+		pubdesc->replident_has_valid_gen_cols = true;
 		return;
 	}
 
@@ -5750,6 +5751,7 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 	pubdesc->rf_valid_for_delete = true;
 	pubdesc->cols_valid_for_update = true;
 	pubdesc->cols_valid_for_delete = true;
+	pubdesc->replident_has_valid_gen_cols = true;
 
 	/* Fetch the publication membership info. */
 	puboids = GetRelationPublications(relid);
@@ -5827,6 +5829,16 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 				pubdesc->cols_valid_for_delete = false;
 		}
 
+		/*
+		 * Check if all columns which are part of the REPLICA IDENTITY is
+		 * published.
+		 */
+		if (!pubform->pubgencols &&
+			(pubform->pubupdate || pubform->pubdelete) &&
+			replident_has_unpublished_gen_col(pubid, relation, ancestors,
+											  pubform->pubviaroot))
+			pubdesc->replident_has_valid_gen_cols = false;
+
 		ReleaseSysCache(tup);
 
 		/*
diff --git a/src/include/catalog/pg_publication.h b/src/include/catalog/pg_publication.h
index 9a83a72d6b..5eeeced91d 100644
--- a/src/include/catalog/pg_publication.h
+++ b/src/include/catalog/pg_publication.h
@@ -98,6 +98,12 @@ typedef struct PublicationDesc
 	 */
 	bool		cols_valid_for_update;
 	bool		cols_valid_for_delete;
+
+	/*
+	 * true if all generated columns which are part of replica identity are
+	 * published.
+	 */
+	bool		replident_has_valid_gen_cols;
 } PublicationDesc;
 
 typedef struct Publication
diff --git a/src/include/commands/publicationcmds.h b/src/include/commands/publicationcmds.h
index 5487c571f6..b18e576b77 100644
--- a/src/include/commands/publicationcmds.h
+++ b/src/include/commands/publicationcmds.h
@@ -35,5 +35,7 @@ extern bool pub_rf_contains_invalid_column(Oid pubid, Relation relation,
 										   List *ancestors, bool pubviaroot);
 extern bool pub_collist_contains_invalid_column(Oid pubid, Relation relation,
 												List *ancestors, bool pubviaroot);
+extern bool replident_has_unpublished_gen_col(Oid pubid, Relation relation,
+											  List *ancestors, bool pubviaroot);
 
 #endif							/* PUBLICATIONCMDS_H */
diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out
index 5de2d64d01..62198ead23 100644
--- a/src/test/regress/expected/publication.out
+++ b/src/test/regress/expected/publication.out
@@ -672,6 +672,31 @@ DROP TABLE rf_tbl_abcd_pk;
 DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  replica identity consists of an unpublished generated column.
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  replica identity consists of an unpublished generated column.
+DROP PUBLICATION pub_gencol;
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql
index 48e68bcca2..3b1c661440 100644
--- a/src/test/regress/sql/publication.sql
+++ b/src/test/regress/sql/publication.sql
@@ -396,6 +396,32 @@ DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
 
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
+
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl
index cb36ca7b16..794b928f50 100644
--- a/src/test/subscription/t/100_bugs.pl
+++ b/src/test/subscription/t/100_bugs.pl
@@ -377,8 +377,8 @@ $node_publisher->safe_psql('postgres', "DROP PUBLICATION tap_pub_sch");
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-# The bug was that when the REPLICA IDENTITY FULL is used with dropped or
-# generated columns, we fail to apply updates and deletes
+# The bug was that when the REPLICA IDENTITY FULL is used with dropped
+# we fail to apply updates and deletes
 $node_publisher->rotate_logfile();
 $node_publisher->start();
 
@@ -389,18 +389,14 @@ $node_publisher->safe_psql(
 	'postgres', qq(
 	CREATE TABLE dropped_cols (a int, b_drop int, c int);
 	ALTER TABLE dropped_cols REPLICA IDENTITY FULL;
-	CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
-	ALTER TABLE generated_cols REPLICA IDENTITY FULL;
-	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols, generated_cols;
+	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols;
 	-- some initial data
 	INSERT INTO dropped_cols VALUES (1, 1, 1);
-	INSERT INTO generated_cols (a, c) VALUES (1, 1);
 ));
 
 $node_subscriber->safe_psql(
 	'postgres', qq(
 	 CREATE TABLE dropped_cols (a int, b_drop int, c int);
-	 CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
 ));
 
 $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
@@ -421,7 +417,6 @@ $node_subscriber->safe_psql(
 $node_publisher->safe_psql(
 	'postgres', qq(
 		UPDATE dropped_cols SET a = 100;
-		UPDATE generated_cols SET a = 100;
 ));
 $node_publisher->wait_for_catchup('sub_dropped_cols');
 
@@ -430,11 +425,6 @@ is( $node_subscriber->safe_psql(
 	qq(1),
 	'replication with RI FULL and dropped columns');
 
-is( $node_subscriber->safe_psql(
-		'postgres', "SELECT count(*) FROM generated_cols WHERE a = 100"),
-	qq(1),
-	'replication with RI FULL and generated columns');
-
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-- 
2.34.1

