From 753d0862eafb0739acba513be83aff359351f315 Mon Sep 17 00:00:00 2001
From: Jelte Fennema <jelte.fennema@microsoft.com>
Date: Mon, 3 Jul 2023 13:50:31 +0200
Subject: [PATCH v2] Allow specifying a dbname in pg_basebackup connection
 string

Normally it doesn't really matter which dbname is used in the connection
string that pg_basebackup and other physical replication CLI tools use.
The reason being, that physical replication does not work at the
database level, but instead at the server level. So you will always get
the data for all databases.

However, when there's a proxy, such as PgBouncer, in between the client
and the server, then might very well matter. Because this proxy might
want to route the connection to a different server depending on the
dbname parameter in the startup packet.

This patch changes the creation of the connection string key value
pairs, so that the following command will actually include
dbname=postgres in the startup packet to the server:

```
pg_basebackup --dbname 'dbname=postgres port=6432'
```

This also applies to other physical replication CLI tools like
pg_receivewal.
---
 doc/src/sgml/ref/pg_basebackup.sgml |  9 +++++++--
 doc/src/sgml/ref/pg_receivewal.sgml |  9 +++++++--
 src/bin/pg_basebackup/streamutil.c  | 29 ++++++++++++++++++-----------
 3 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index 79d3e657c32..685bfc6e2c0 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -752,8 +752,13 @@ PostgreSQL documentation
        <para>
         The option is called <literal>--dbname</literal> for consistency with other
         client applications, but because <application>pg_basebackup</application>
-        doesn't connect to any particular database in the cluster, any database
-        name in the connection string will be ignored.
+        doesn't connect to any particular database in the cluster, supplying a
+        specific database name in the connection string won't cause
+        <productname>PostgreSQL</productname> to behave any differently.
+        However, if you are connecting to <productname>PostgreSQL</productname>
+        through a proxy, then it's possible that this proxy does use the
+        supplied databasename to make certain decisions, such as to which
+        cluster to route the connection.
        </para>
       </listitem>
      </varlistentry>
diff --git a/doc/src/sgml/ref/pg_receivewal.sgml b/doc/src/sgml/ref/pg_receivewal.sgml
index cecc7daec97..9413543099b 100644
--- a/doc/src/sgml/ref/pg_receivewal.sgml
+++ b/doc/src/sgml/ref/pg_receivewal.sgml
@@ -316,8 +316,13 @@ PostgreSQL documentation
        <para>
         The option is called <literal>--dbname</literal> for consistency with other
         client applications, but because <application>pg_receivewal</application>
-        doesn't connect to any particular database in the cluster, database
-        name in the connection string will be ignored.
+        doesn't connect to any particular database in the cluster, supplying a
+        specific database name in the connection string won't cause
+        <productname>PostgreSQL</productname> to behave any differently.
+        However, if you are connecting to <productname>PostgreSQL</productname>
+        through a proxy, then it's possible that this proxy does use the
+        supplied databasename to make certain decisions, such as to which
+        cluster to route the connection.
        </para>
       </listitem>
      </varlistentry>
diff --git a/src/bin/pg_basebackup/streamutil.c b/src/bin/pg_basebackup/streamutil.c
index 15514599c4e..38cae8ff190 100644
--- a/src/bin/pg_basebackup/streamutil.c
+++ b/src/bin/pg_basebackup/streamutil.c
@@ -79,9 +79,6 @@ GetConnection(void)
 	/*
 	 * Merge the connection info inputs given in form of connection string,
 	 * options and default values (dbname=replication, replication=true, etc.)
-	 * Explicitly discard any dbname value in the connection string;
-	 * otherwise, PQconnectdbParams() would interpret that value as being
-	 * itself a connection string.
 	 */
 	i = 0;
 	if (connection_string)
@@ -92,18 +89,24 @@ GetConnection(void)
 
 		for (conn_opt = conn_opts; conn_opt->keyword != NULL; conn_opt++)
 		{
-			if (conn_opt->val != NULL && conn_opt->val[0] != '\0' &&
-				strcmp(conn_opt->keyword, "dbname") != 0)
+			if (conn_opt->val != NULL && conn_opt->val[0] != '\0')
 				argcount++;
 		}
 
 		keywords = pg_malloc0((argcount + 1) * sizeof(*keywords));
 		values = pg_malloc0((argcount + 1) * sizeof(*values));
 
+		/*
+		 * Set dbname here already, so it can be overridden by a dbname in the
+		 * connection string.
+		 */
+		keywords[i] = "dbname";
+		values[i] = "replication";
+		i++;
+
 		for (conn_opt = conn_opts; conn_opt->keyword != NULL; conn_opt++)
 		{
-			if (conn_opt->val != NULL && conn_opt->val[0] != '\0' &&
-				strcmp(conn_opt->keyword, "dbname") != 0)
+			if (conn_opt->val != NULL && conn_opt->val[0] != '\0')
 			{
 				keywords[i] = conn_opt->keyword;
 				values[i] = conn_opt->val;
@@ -115,11 +118,11 @@ GetConnection(void)
 	{
 		keywords = pg_malloc0((argcount + 1) * sizeof(*keywords));
 		values = pg_malloc0((argcount + 1) * sizeof(*values));
+		keywords[i] = "dbname";
+		values[i] = dbname;
+		i++;
 	}
 
-	keywords[i] = "dbname";
-	values[i] = dbname == NULL ? "replication" : dbname;
-	i++;
 	keywords[i] = "replication";
 	values[i] = dbname == NULL ? "true" : "database";
 	i++;
@@ -171,7 +174,11 @@ GetConnection(void)
 			values[i] = NULL;
 		}
 
-		tmpconn = PQconnectdbParams(keywords, values, true);
+		/*
+		 * Only expand dbname when we did not already parse the argument as a
+		 * connection string ourselves.
+		 */
+		tmpconn = PQconnectdbParams(keywords, values, !connection_string);
 
 		/*
 		 * If there is too little memory even to allocate the PGconn object

base-commit: 4f4d73466d71976b58f29121bab9d9fef6f3436e
-- 
2.34.1

