On 2022-Apr-28, Tomas Vondra wrote:

> Attached is a patch doing the same thing in tablesync. The overall idea
> is to generate copy statement with CASE expressions, applying filters to
> individual columns. For Alvaro's example, this generates something like
> 
>   SELECT
>     (CASE WHEN (a < 0) OR (a > 0) THEN a ELSE NULL END) AS a,
>     (CASE WHEN (a > 0) THEN b ELSE NULL END) AS b,
>     (CASE WHEN (a < 0) THEN c ELSE NULL END) AS c
>   FROM uno WHERE (a < 0) OR (a > 0)

I've been reading the tablesync.c code you propose and the idea seems
correct.  (I was distracted by wondering if a different data structure
would be more appropriate, because what's there looks slightly
uncomfortable to work with.  But after playing around I can't find
anything that feels better in an obvious way.)

(I confess I'm a bit bothered by the fact that there are now three
different data structures in our code called PublicationInfo).

I propose some comment changes in the attached patch, and my
interpretation (untested) of the idea of optimizing for a single
publication.  (In there I also rename logicalrep_relmap_free_entry
because it's confusing.  That should be a separate patch but I didn't
split it before posting, apologies.)

> There's a couple options how we might optimize this for common cases.
> For example if there's just a single publication, there's no need to
> generate the CASE expressions - the WHERE filter will do the trick.

Right.

-- 
Álvaro Herrera        Breisgau, Deutschland  —  https://www.EnterpriseDB.com/
>From b8abd662b36aa588a7566fa7da3a2b1cd78d5e51 Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvhe...@alvh.no-ip.org>
Date: Fri, 29 Apr 2022 17:41:34 +0200
Subject: [PATCH 1/2] Change some comments

---
 src/backend/replication/logical/relation.c  | 15 ++---
 src/backend/replication/logical/tablesync.c | 72 ++++++++++++---------
 2 files changed, 49 insertions(+), 38 deletions(-)

diff --git a/src/backend/replication/logical/relation.c b/src/backend/replication/logical/relation.c
index 80fb561a9a..9307a6bf1d 100644
--- a/src/backend/replication/logical/relation.c
+++ b/src/backend/replication/logical/relation.c
@@ -119,14 +119,13 @@ logicalrep_relmap_init(void)
 }
 
 /*
- * Free the entry of a relation map cache.
+ * Clear the contents of a LogicalRepRelMapEntry, freeing any subsidiary
+ * allocated members.  The entry itself is not freed.
  */
 static void
-logicalrep_relmap_free_entry(LogicalRepRelMapEntry *entry)
+logicalrep_relmap_clear_entry(LogicalRepRelMapEntry *entry)
 {
-	LogicalRepRelation *remoterel;
-
-	remoterel = &entry->remoterel;
+	LogicalRepRelation *remoterel = &entry->remoterel;
 
 	pfree(remoterel->nspname);
 	pfree(remoterel->relname);
@@ -165,13 +164,13 @@ logicalrep_relmap_update(LogicalRepRelation *remoterel)
 		logicalrep_relmap_init();
 
 	/*
-	 * HASH_ENTER returns the existing entry if present or creates a new one.
+	 * Find an hashtable entry for this rel, or create a new one.  If there
+	 * was one, clear its contents so we can fill it fresh.
 	 */
 	entry = hash_search(LogicalRepRelMap, (void *) &remoterel->remoteid,
 						HASH_ENTER, &found);
-
 	if (found)
-		logicalrep_relmap_free_entry(entry);
+		logicalrep_relmap_clear_entry(entry);
 
 	memset(entry, 0, sizeof(LogicalRepRelMapEntry));
 
diff --git a/src/backend/replication/logical/tablesync.c b/src/backend/replication/logical/tablesync.c
index fd547e16f4..82ee406eb4 100644
--- a/src/backend/replication/logical/tablesync.c
+++ b/src/backend/replication/logical/tablesync.c
@@ -1100,15 +1100,14 @@ copy_table(Relation rel)
 	}
 	else
 	{
+		List	   *quals;
+		ListCell   *lc;
+
 		/*
 		 * For non-tables and tables with row filters, we need to do COPY
 		 * (SELECT ...), but we can't just do SELECT * because we need to not
 		 * copy generated columns. For tables with any row filters, build a
 		 * SELECT query with OR'ed row filters for COPY.
-		 *
-		 * FIXME can be simplified if all subscriptions have the same column
-		 * list (or no column list), in which case we don't need the CASE
-		 * expressions at all.
 		 */
 		appendStringInfoString(&cmd, "COPY (SELECT ");
 		for (int i = 0; i < lrel.natts; i++)
@@ -1146,7 +1145,17 @@ copy_table(Relation rel)
 					appendStringInfo(&qual, " OR %s", strVal(pubinfo->rowfilter));
 			}
 
-			if (no_filter)
+			/*
+			 * If there's no row filter (which includes the case of there
+			 * being several publications of which at least one does not have
+			 * a row filter), or there is a single publication, then we can
+			 * emit the column name itself (the latter is possible because the
+			 * WHERE clause will take care of omitting these rows; this works
+			 * only with a single publication.)  If there are several
+			 * publications, then emit a CASE expression that only prints the
+			 * column if the row meets the row filter of any publications.
+			 */
+			if (no_filter || list_length(pubinfos) == 1)
 				appendStringInfoString(&cmd, quote_identifier(lrel.attnames[i]));
 			else
 				appendStringInfo(&cmd, "(CASE WHEN (%s) THEN %s ELSE NULL END) AS %s",
@@ -1169,38 +1178,41 @@ copy_table(Relation rel)
 
 		appendStringInfoString(&cmd, quote_qualified_identifier(lrel.nspname, lrel.relname));
 
+		/*
+		 * Now, based on the row filters, build a WHERE clause for the query.
+		 * If there's at least one publication with no row filter, then the
+		 * overall WHERE reduces to empty.
+		 */
+
+		quals = NIL;
+		foreach (lc, pubinfos)
 		{
-			List *qual = NIL;
-			ListCell *lc;
+			PublicationInfo *pubinfo = (PublicationInfo *) lfirst(lc);
 
-			foreach (lc, pubinfos)
+			if (pubinfo->rowfilter == NULL)
 			{
-				PublicationInfo *pubinfo = (PublicationInfo *) lfirst(lc);
-
-				if (pubinfo->rowfilter == NULL)
-				{
-					qual = NIL;
-					break;
-				}
-
-				qual = lappend(qual, pubinfo->rowfilter);
+				quals = NIL;
+				break;
 			}
 
-		/* list of OR'ed filters */
-		if (qual != NIL)
-		{
-			ListCell   *lc;
-			char	   *q = strVal(linitial(qual));
-
-			appendStringInfo(&cmd, " WHERE %s", q);
-			for_each_from(lc, qual, 1)
-			{
-				q = strVal(lfirst(lc));
-				appendStringInfo(&cmd, " OR %s", q);
-			}
-			list_free_deep(qual);
+			quals = lappend(quals, pubinfo->rowfilter);
 		}
 
+		/*
+		 * If we do have filters, write a WHERE clause that ORs them all
+		 * together.
+		 */
+		if (quals != NIL)
+		{
+			char	   *q = strVal(linitial(quals));
+
+			appendStringInfo(&cmd, " WHERE (%s)", q);
+			for_each_from(lc, quals, 1)
+			{
+				q = strVal(lfirst(lc));
+				appendStringInfo(&cmd, " OR (%s)", q);
+			}
+			list_free_deep(quals);
 		}
 
 		appendStringInfoString(&cmd, ") TO STDOUT");
-- 
2.30.2

Reply via email to