From c0cbecb941e4f659142d65b7eac2df801a9b0fa7 Mon Sep 17 00:00:00 2001
From: "houzj.fnst" <houzj.fnst@cn.fujitsu.com>
Date: Sat, 16 Oct 2021 14:14:40 +0800
Subject: [PATCH] fix double publish

if publish_via_partition_root is true, then the child table's data will be
copied twice if adding both child and parent table to the publication. The
reason is that the subscriber will fetch the table list from publisher's
pg_publication_tables view to do the table synchronization. But the view always
show both child and parent table which cause the extra synchronization
for the child table.

Fix it by making pg_publication_tables only show parent table if both parent
and child exists in the publication.

---
 src/backend/catalog/pg_publication.c      | 77 +++++++++++++++++++++++++++++--
 src/include/catalog/pg_publication.h      |  1 +
 src/test/regress/expected/publication.out |  9 ++++
 src/test/regress/sql/publication.sql      |  4 ++
 4 files changed, 87 insertions(+), 4 deletions(-)

diff --git a/src/backend/catalog/pg_publication.c b/src/backend/catalog/pg_publication.c
index 9cd0c82..7c9208b 100644
--- a/src/backend/catalog/pg_publication.c
+++ b/src/backend/catalog/pg_publication.c
@@ -106,6 +106,45 @@ is_publishable_class(Oid relid, Form_pg_class reltuple)
 }
 
 /*
+ * Filter out the partitions whose parent tables was also specified in
+ * the publication.
+ */
+static List *
+filter_out_partitions(List *relids)
+{
+	List	   *result = NIL;
+	ListCell   *lc;
+	ListCell   *lc2;
+
+	foreach(lc, relids)
+	{
+		bool		skip = false;
+		List	   *ancestors = NIL;
+		Oid			relid = lfirst_oid(lc);
+
+		if (get_rel_relispartition(relid))
+			ancestors = get_partition_ancestors(relid);
+
+		foreach(lc2, ancestors)
+		{
+			/*
+			 * Check if the parent table exists in the published table list.
+			 */
+			if (list_member_oid(relids, lfirst_oid(lc2)))
+			{
+				skip = true;
+				break;
+			}
+		}
+
+		if (!skip)
+			result = lappend_oid(result, relid);
+	}
+
+	return result;
+}
+
+/*
  * Another variant of this, taking a Relation.
  */
 bool
@@ -328,6 +367,32 @@ GetPublicationRelations(Oid pubid, PublicationPartOpt pub_partopt)
 	return result;
 }
 
+ /*
+ * Gets list of relation explicitly mentioned in the publication.
+ *
+ * This should only be used FOR TABLE publications, the FOR ALL TABLES
+ * should use GetAllTablesPublicationRelations().
+ *
+ * If pubviaroot is true, we must exclude partitions in favor of including the
+ * root partitioned tables.
+ */
+List *
+GetPublicationRelationsExtended(Oid pubid, bool pubviaroot)
+{
+	List	   *result;
+
+	result = GetPublicationRelations(pubid, PUBLICATION_PART_ROOT);
+
+	/*
+	 * Filter out the partitions whose parent table is also in the same
+	 * publication.
+	 */
+	if (pubviaroot)
+		return filter_out_partitions(result);
+	else
+		return result;
+}
+
 /*
  * Gets list of publication oids for publications marked as FOR ALL TABLES.
  */
@@ -557,10 +622,14 @@ pg_get_publication_tables(PG_FUNCTION_ARGS)
 		if (publication->alltables)
 			tables = GetAllTablesPublicationRelations(publication->pubviaroot);
 		else
-			tables = GetPublicationRelations(publication->oid,
-											 publication->pubviaroot ?
-											 PUBLICATION_PART_ROOT :
-											 PUBLICATION_PART_LEAF);
+		{
+			if (publication->pubviaroot)
+				tables = GetPublicationRelationsExtended(publication->oid,
+														 publication->pubviaroot);
+			else
+				tables = GetPublicationRelations(publication->oid,
+												PUBLICATION_PART_LEAF);
+		}
 		funcctx->user_fctx = (void *) tables;
 
 		MemoryContextSwitchTo(oldcontext);
diff --git a/src/include/catalog/pg_publication.h b/src/include/catalog/pg_publication.h
index 82f2536..d5e3c02 100644
--- a/src/include/catalog/pg_publication.h
+++ b/src/include/catalog/pg_publication.h
@@ -109,6 +109,7 @@ typedef enum PublicationPartOpt
 } PublicationPartOpt;
 
 extern List *GetPublicationRelations(Oid pubid, PublicationPartOpt pub_partopt);
+extern List *GetPublicationRelationsExtended(Oid pubid, bool pubviaroot);
 extern List *GetAllTablesPublications(void);
 extern List *GetAllTablesPublicationRelations(bool pubviaroot);
 extern List *GetPubPartitionOptionRelations(List *result,
diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out
index 82bce9b..5f5445a 100644
--- a/src/test/regress/expected/publication.out
+++ b/src/test/regress/expected/publication.out
@@ -165,6 +165,15 @@ HINT:  To enable updating the table, set REPLICA IDENTITY using ALTER TABLE.
 ALTER PUBLICATION testpub_forparted DROP TABLE testpub_parted;
 -- works again, because update is no longer replicated
 UPDATE testpub_parted2 SET a = 2;
+-- add both child and parent table to the publication
+ALTER PUBLICATION testpub_forparted ADD TABLE testpub_parted, testpub_parted2;
+-- only show parent table when publish_via_partition_root is set
+select * from pg_publication_tables;
+      pubname      | schemaname |   tablename    
+-------------------+------------+----------------
+ testpub_forparted | public     | testpub_parted
+(1 row)
+
 DROP TABLE testpub_parted1, testpub_parted2;
 DROP PUBLICATION testpub_forparted, testpub_forparted1;
 -- Test cache invalidation FOR ALL TABLES publication
diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql
index e5745d5..d3f2b2f 100644
--- a/src/test/regress/sql/publication.sql
+++ b/src/test/regress/sql/publication.sql
@@ -97,6 +97,10 @@ UPDATE testpub_parted2 SET a = 2;
 ALTER PUBLICATION testpub_forparted DROP TABLE testpub_parted;
 -- works again, because update is no longer replicated
 UPDATE testpub_parted2 SET a = 2;
+-- add both child and parent table to the publication
+ALTER PUBLICATION testpub_forparted ADD TABLE testpub_parted, testpub_parted2;
+-- only show parent table when publish_via_partition_root is set
+select * from pg_publication_tables;
 DROP TABLE testpub_parted1, testpub_parted2;
 DROP PUBLICATION testpub_forparted, testpub_forparted1;
 
-- 
2.7.2.windows.1

