From 1785f9ab45dd2ce111eb67ee2a681e493f3c6942 Mon Sep 17 00:00:00 2001
From: Shubham Khanna <shubham.khanna@fujitsu.com>
Date: Wed, 22 Jan 2025 12:03:12 +0530
Subject: [PATCH v4] Enhance 'pg_createsubscriber' to fetch and append all
 databases

This patch enhances the 'pg_createsubscriber' utility by adding the
'--all-databases' option, which automatically fetches all non-template
databases on the source server (publisher) and creates corresponding
subscriptions on the target server (subscriber). This simplifies the process
of converting a physical standby to a logical subscriber, particularly
during upgrades.

When '--all-databases' is used, the tool queries the source server for all
databases and attempts to create subscriptions on the target server for
databases with matching names. The tool auto-generates subscription,publication
and replication slot names using the format:
  - Subscription: '<database>_sub'
  - Publication: '<database>_pub'
  - Replication slot: '<database>_slot'

The patch ensures that conflicting options such as '--all-databases' and
'--database', '--publication', '--subscription', or '--replication-slot' cannot
be used together.
---
 doc/src/sgml/ref/pg_createsubscriber.sgml     |  36 ++++++-
 src/bin/pg_basebackup/pg_createsubscriber.c   | 101 +++++++++++++++++-
 .../t/040_pg_createsubscriber.pl              |  49 +++++++++
 3 files changed, 178 insertions(+), 8 deletions(-)

diff --git a/doc/src/sgml/ref/pg_createsubscriber.sgml b/doc/src/sgml/ref/pg_createsubscriber.sgml
index 26b8e64a4e..4a9dded234 100644
--- a/doc/src/sgml/ref/pg_createsubscriber.sgml
+++ b/doc/src/sgml/ref/pg_createsubscriber.sgml
@@ -87,6 +87,32 @@ PostgreSQL documentation
    command-line arguments:
 
    <variablelist>
+    <varlistentry>
+     <term><option>-a</option></term>
+     <term><option>--all-databases</option></term>
+     <listitem>
+      <para>
+       Automatically fetch all non-template databases from the source server
+       and create subscriptions for databases with the same names on the
+       target server.
+       If a database is present on the source but missing on the target, it is
+       skipped or an error is raised.
+       If a database exists on the target but not on the source, no
+       subscription is created for it.
+       Subscription names, publication names, and replication slot names are
+       automatically generated using the format:
+       <literal><replaceable>database</replaceable>_sub</literal>
+       <literal><replaceable>database</replaceable>_pub</literal> and
+       <literal><replaceable>database</replaceable>_slot</literal> respectively.
+      </para>
+      <para>
+       <option>--all-databases</option> cannot be used with
+       <option>--publication</option>, <option>--subscription</option>,
+       or <option>--replication-slot</option>.
+      </para>
+     </listitem>
+    </varlistentry>
+
     <varlistentry>
      <term><option>-d <replaceable class="parameter">dbname</replaceable></option></term>
      <term><option>--database=<replaceable class="parameter">dbname</replaceable></option></term>
@@ -94,7 +120,7 @@ PostgreSQL documentation
       <para>
        The name of the database in which to create a subscription.  Multiple
        databases can be selected by writing multiple <option>-d</option>
-       switches.
+       switches. Cannot be used together with <option>-a</option>.
       </para>
      </listitem>
     </varlistentry>
@@ -214,7 +240,8 @@ PostgreSQL documentation
        names must match the number of specified databases, otherwise an error
        is reported.  The order of the multiple publication name switches must
        match the order of database switches.  If this option is not specified,
-       a generated name is assigned to the publication name.
+       a generated name is assigned to the publication name. Cannot be used
+       together with <option>-a</option>.
       </para>
      </listitem>
     </varlistentry>
@@ -230,7 +257,7 @@ PostgreSQL documentation
        otherwise an error is reported.  The order of the multiple replication
        slot name switches must match the order of database switches.  If this
        option is not specified, the subscription name is assigned to the
-       replication slot name.
+       replication slot name. Cannot be used together with <option>-a</option>.
       </para>
      </listitem>
     </varlistentry>
@@ -245,7 +272,8 @@ PostgreSQL documentation
        names must match the number of specified databases, otherwise an error
        is reported.  The order of the multiple subscription name switches must
        match the order of database switches.  If this option is not specified,
-       a generated name is assigned to the subscription name.
+       a generated name is assigned to the subscription name. Cannot be used
+       together with <option>-a</option>.
       </para>
      </listitem>
     </varlistentry>
diff --git a/src/bin/pg_basebackup/pg_createsubscriber.c b/src/bin/pg_basebackup/pg_createsubscriber.c
index faf18ccf13..3e6dfb02fe 100644
--- a/src/bin/pg_basebackup/pg_createsubscriber.c
+++ b/src/bin/pg_basebackup/pg_createsubscriber.c
@@ -43,6 +43,7 @@ struct CreateSubscriberOptions
 	SimpleStringList sub_names; /* list of subscription names */
 	SimpleStringList replslot_names;	/* list of replication slot names */
 	int			recovery_timeout;	/* stop recovery after this time */
+	bool		all_databases;	/* fetch and specify all databases */
 };
 
 struct LogicalRepInfo
@@ -106,6 +107,7 @@ static void check_and_drop_existing_subscriptions(PGconn *conn,
 												  const struct LogicalRepInfo *dbinfo);
 static void drop_existing_subscriptions(PGconn *conn, const char *subname,
 										const char *dbname);
+static void fetch_source_databases(struct CreateSubscriberOptions *opt);
 
 #define	USEC_PER_SEC	1000000
 #define	WAIT_INTERVAL	1		/* 1 second */
@@ -220,6 +222,7 @@ usage(void)
 	printf(_("Usage:\n"));
 	printf(_("  %s [OPTION]...\n"), progname);
 	printf(_("\nOptions:\n"));
+	printf(_("  -a, --all-databases             create subscriptions for all matching databases on the target\n"));
 	printf(_("  -d, --database=DBNAME           database in which to create a subscription\n"));
 	printf(_("  -D, --pgdata=DATADIR            location for the subscriber data directory\n"));
 	printf(_("  -n, --dry-run                   dry run, just show what would be done\n"));
@@ -1860,11 +1863,69 @@ enable_subscription(PGconn *conn, const struct LogicalRepInfo *dbinfo)
 	destroyPQExpBuffer(str);
 }
 
+/* Fetch all databases from the source server */
+static void
+fetch_source_databases(struct CreateSubscriberOptions *opt)
+{
+	PGconn	   *conn;
+	PGresult   *res;
+	int			num_rows;
+	const char *query = "SELECT datname FROM pg_database WHERE datistemplate = false";
+
+	/* Establish a connection to the PostgreSQL server */
+	conn = connect_database(opt->pub_conninfo_str, true);
+
+	/* Check for connection errors */
+	if (PQstatus(conn) != CONNECTION_OK)
+	{
+		pg_log_error("connection to the source server failed: %s", PQerrorMessage(conn));
+		disconnect_database(conn, false);
+		exit(1);
+	}
+
+	res = PQexec(conn, query);
+
+	/* Check for errors during query execution */
+	if (PQresultStatus(res) != PGRES_TUPLES_OK)
+	{
+		pg_log_error("failed to execute query on the source server: %s", PQerrorMessage(conn));
+		PQclear(res);
+		disconnect_database(conn, false);
+		exit(1);
+	}
+
+	/* Process the query result */
+	num_rows = PQntuples(res);
+	for (int i = 0; i < num_rows; i++)
+	{
+		const char *dbname = PQgetvalue(res, i, 0);
+
+		simple_string_list_append(&opt->database_names, dbname);
+
+		/* Increment num_dbs to reflect multiple --database options */
+		num_dbs++;
+	}
+
+	/* Error if no databases were found on the source server */
+	if (num_rows == 0)
+	{
+		pg_log_error("no databases found on the source server");
+		pg_log_error_hint("Ensure that there are user-created databases on the source server.");
+		PQclear(res);
+		disconnect_database(conn, false);
+		exit(1);
+	}
+
+	PQclear(res);
+	disconnect_database(conn, false);
+}
+
 int
 main(int argc, char **argv)
 {
 	static struct option long_options[] =
 	{
+		{"all-databases", no_argument, NULL, 'a'},
 		{"database", required_argument, NULL, 'd'},
 		{"pgdata", required_argument, NULL, 'D'},
 		{"dry-run", no_argument, NULL, 'n'},
@@ -1927,6 +1988,7 @@ main(int argc, char **argv)
 	opt.socket_dir = NULL;
 	opt.sub_port = DEFAULT_SUB_PORT;
 	opt.sub_username = NULL;
+	opt.all_databases = false;
 	opt.database_names = (SimpleStringList)
 	{
 		0
@@ -1949,12 +2011,21 @@ main(int argc, char **argv)
 
 	get_restricted_token();
 
-	while ((c = getopt_long(argc, argv, "d:D:np:P:s:t:U:v",
+	while ((c = getopt_long(argc, argv, "ad:D:np:P:s:t:U:v",
 							long_options, &option_index)) != -1)
 	{
 		switch (c)
 		{
+			case 'a':
+				opt.all_databases = true;
+				break;
+
 			case 'd':
+				if (opt.all_databases)
+				{
+					pg_log_error("--database cannot be used with --all-databases");
+					exit(1);
+				}
 				if (!simple_string_list_member(&opt.database_names, optarg))
 				{
 					simple_string_list_append(&opt.database_names, optarg);
@@ -1977,6 +2048,11 @@ main(int argc, char **argv)
 				opt.sub_port = pg_strdup(optarg);
 				break;
 			case 'P':
+				if (opt.all_databases)
+				{
+					pg_log_error("--publication cannot be used with --all-databases");
+					exit(1);
+				}
 				opt.pub_conninfo_str = pg_strdup(optarg);
 				break;
 			case 's':
@@ -2008,6 +2084,11 @@ main(int argc, char **argv)
 				}
 				break;
 			case 3:
+				if (opt.all_databases)
+				{
+					pg_log_error("--replslot cannot be used with --all-databases");
+					exit(1);
+				}
 				if (!simple_string_list_member(&opt.replslot_names, optarg))
 				{
 					simple_string_list_append(&opt.replslot_names, optarg);
@@ -2020,6 +2101,11 @@ main(int argc, char **argv)
 				}
 				break;
 			case 4:
+				if (opt.all_databases)
+				{
+					pg_log_error("--subscription cannot be used with --all-databases");
+					exit(1);
+				}
 				if (!simple_string_list_member(&opt.sub_names, optarg))
 				{
 					simple_string_list_append(&opt.sub_names, optarg);
@@ -2091,14 +2177,21 @@ main(int argc, char **argv)
 	pg_log_info("validating subscriber connection string");
 	sub_base_conninfo = get_sub_conninfo(&opt);
 
+	/*
+	 * Fetch all databases from the source (publisher) if --all-databases is
+	 * specified.
+	 */
+	if (opt.all_databases)
+		fetch_source_databases(&opt);
+
 	if (opt.database_names.head == NULL)
 	{
 		pg_log_info("no database was specified");
 
 		/*
-		 * If --database option is not provided, try to obtain the dbname from
-		 * the publisher conninfo. If dbname parameter is not available, error
-		 * out.
+		 * If neither --database nor --all-databases option is provided, try
+		 * to obtain the dbname from the publisher conninfo. If dbname
+		 * parameter is not available, error out.
 		 */
 		if (dbname_conninfo)
 		{
diff --git a/src/bin/pg_basebackup/t/040_pg_createsubscriber.pl b/src/bin/pg_basebackup/t/040_pg_createsubscriber.pl
index c8dbdb7e9b..af2c991ee5 100644
--- a/src/bin/pg_basebackup/t/040_pg_createsubscriber.pl
+++ b/src/bin/pg_basebackup/t/040_pg_createsubscriber.pl
@@ -371,6 +371,55 @@ command_ok(
 	],
 	'run pg_createsubscriber without --databases');
 
+# run pg_createsubscriber with '--publication' and '--replication-slot' and
+# verify the failure
+command_fails(
+	[
+		'pg_createsubscriber',
+		'--verbose',
+		'--dry-run',
+		'--pgdata' => $node_s->data_dir,
+		'--publisher-server' => $node_p->connstr($db1),
+		'--socketdir' => $node_s->host,
+		'--subscriber-port' => $node_s->port,
+		'--publication' => 'pub1',
+		'--replication-slot' => 'replslot1',
+		'--all-databases',
+	],
+	qr/cannot specify --publication or --replication-slot when using --all-databases/,
+	'fail if --all-databases is used with publication and slot');
+
+# run pg_createsubscriber with '--dbname' and '--all-databases' and verify the
+# failure
+command_fails(
+	[
+		'pg_createsubscriber',
+		'--verbose',
+		'--dry-run',
+		'--pgdata' => $node_s->data_dir,
+		'--publisher-server' => $node_p->connstr($db1),
+		'--socketdir' => $node_s->host,
+		'--subscriber-port' => $node_s->port,
+		'--dbname' => 'db1',
+		'--all-databases',
+	],
+	qr/cannot specify --dbname when using --all-databases/,
+	'fail if --dbname is used with --all-databases');
+
+# run pg_createsubscriber with '--all-databases' option
+command_ok(
+	[
+		'pg_createsubscriber',
+		'--verbose',
+		'--dry-run',
+		'--pgdata' => $node_s->data_dir,
+		'--publisher-server' => $node_p->connstr($db1),
+		'--socketdir' => $node_s->host,
+		'--subscriber-port' => $node_s->port,
+		'--all-databases',
+	],
+	'run pg_createsubscriber with --all-databases');
+
 # Run pg_createsubscriber on node S.  --verbose is used twice
 # to show more information.
 command_ok(
-- 
2.34.1

