From 5180545cbcefeb98ddc80ee22441d51253328986 Mon Sep 17 00:00:00 2001
From: Shubham Khanna <shubham.khanna@fujitsu.com>
Date: Wed, 22 Jan 2025 12:03:12 +0530
Subject: [PATCH v2] Enhance 'pg_createsubscriber' to fetch and append all
 databases

This patch enhances the 'pg_createsubscriber' utility to automatically
fetch all non-template databases from the publisher and create subscriptions
for them. This simplifies converting a physical standby to a logical subscriber
for multiple databases, particularly during upgrades.

A new function 'fetch_all_databases' in 'pg_createsubscriber.c'
that queries the publisher for all databases and adds them to the subscription
options is added.
Additionally, 'validate_databases' ensures that conflicting options are not
used together, enforcing correct usage of '--all-databases'.

The '--all-databases' option fetches all databases from the publisher.
It auto-generates publication and replication slot names as 'pub_names' and
'slot_names' respectively.
This option validates database name lengths to ensure generated names fit
within system limits.
---
 doc/src/sgml/ref/pg_createsubscriber.sgml     |  18 ++-
 src/bin/pg_basebackup/pg_createsubscriber.c   | 110 +++++++++++++++++-
 .../t/040_pg_createsubscriber.pl              |  90 ++++++++++++++
 3 files changed, 216 insertions(+), 2 deletions(-)

diff --git a/doc/src/sgml/ref/pg_createsubscriber.sgml b/doc/src/sgml/ref/pg_createsubscriber.sgml
index 26b8e64a4e..8dea53c955 100644
--- a/doc/src/sgml/ref/pg_createsubscriber.sgml
+++ b/doc/src/sgml/ref/pg_createsubscriber.sgml
@@ -87,6 +87,18 @@ PostgreSQL documentation
    command-line arguments:
 
    <variablelist>
+    <varlistentry>
+     <term><option>-a</option></term>
+     <term><option>--all-databases</option></term>
+     <listitem>
+      <para>
+       Automatically fetch all non-template databases from the publisher and
+       create subscriptions for them.
+       replica.
+      </para>
+     </listitem>
+    </varlistentry>
+
     <varlistentry>
      <term><option>-d <replaceable class="parameter">dbname</replaceable></option></term>
      <term><option>--database=<replaceable class="parameter">dbname</replaceable></option></term>
@@ -94,7 +106,11 @@ PostgreSQL documentation
       <para>
        The name of the database in which to create a subscription.  Multiple
        databases can be selected by writing multiple <option>-d</option>
-       switches.
+       switches. If neither <option>-d</option> nor <option>-a</option> is
+       specified, <application>pg_createsubscriber</application> will use
+       <option>--all-databases</option> by default. When using
+       <option>-a</option>, ensure database names are shorter than 59 characters
+       to allow for generated publication and slot names.
       </para>
      </listitem>
     </varlistentry>
diff --git a/src/bin/pg_basebackup/pg_createsubscriber.c b/src/bin/pg_basebackup/pg_createsubscriber.c
index faf18ccf13..ed06388966 100644
--- a/src/bin/pg_basebackup/pg_createsubscriber.c
+++ b/src/bin/pg_basebackup/pg_createsubscriber.c
@@ -43,6 +43,7 @@ struct CreateSubscriberOptions
 	SimpleStringList sub_names; /* list of subscription names */
 	SimpleStringList replslot_names;	/* list of replication slot names */
 	int			recovery_timeout;	/* stop recovery after this time */
+	bool		all_databases;	/* fetch and specify all databases */
 };
 
 struct LogicalRepInfo
@@ -106,6 +107,8 @@ static void check_and_drop_existing_subscriptions(PGconn *conn,
 												  const struct LogicalRepInfo *dbinfo);
 static void drop_existing_subscriptions(PGconn *conn, const char *subname,
 										const char *dbname);
+static void fetch_all_databases(struct CreateSubscriberOptions *opt);
+static void validate_databases(struct CreateSubscriberOptions *opt);
 
 #define	USEC_PER_SEC	1000000
 #define	WAIT_INTERVAL	1		/* 1 second */
@@ -220,6 +223,7 @@ usage(void)
 	printf(_("Usage:\n"));
 	printf(_("  %s [OPTION]...\n"), progname);
 	printf(_("\nOptions:\n"));
+	printf(_("  -a, --all-databases             fetch and specify all databases\n"));
 	printf(_("  -d, --database=DBNAME           database in which to create a subscription\n"));
 	printf(_("  -D, --pgdata=DATADIR            location for the subscriber data directory\n"));
 	printf(_("  -n, --dry-run                   dry run, just show what would be done\n"));
@@ -1860,11 +1864,104 @@ enable_subscription(PGconn *conn, const struct LogicalRepInfo *dbinfo)
 	destroyPQExpBuffer(str);
 }
 
+/* Placeholder function to fetch all databases from the publisher */
+static void
+fetch_all_databases(struct CreateSubscriberOptions *opt)
+{
+	PGconn	   *conn;
+	PGresult   *res;
+	int			num_rows;
+	const char *query = "SELECT datname FROM pg_database WHERE datistemplate = false";
+
+	/* Establish a connection to the PostgreSQL server */
+	conn = PQconnectdb(opt->pub_conninfo_str);
+	/* Check for connection errors */
+	if (PQstatus(conn) != CONNECTION_OK)
+	{
+		pg_log_error("connection to the PostgreSQL server failed: %s", PQerrorMessage(conn));
+		PQfinish(conn);
+		exit(1);
+	}
+
+	res = PQexec(conn, query);
+
+	/* Check for errors during query execution */
+	if (PQresultStatus(res) != PGRES_TUPLES_OK)
+	{
+		pg_log_error("failed to execute query on the PostgreSQL server: %s", PQerrorMessage(conn));
+		PQclear(res);
+		PQfinish(conn);
+		exit(1);
+	}
+
+	/* Process the query result */
+	num_rows = PQntuples(res);
+	for (int i = 0; i < num_rows; i++)
+	{
+		const char *dbname = PQgetvalue(res, i, 0);
+
+		simple_string_list_append(&opt->database_names, dbname);
+		num_dbs++;
+	}
+
+	/* Check if any databases were added */
+	if (opt->database_names.head == NULL)
+	{
+		pg_log_error("no database names could be fetched or specified");
+		pg_log_error_hint("Ensure that databases exist on the publisher or specify a database using --database option.");
+		PQclear(res);
+		PQfinish(conn);
+		exit(1);
+	}
+
+	PQclear(res);
+	PQfinish(conn);
+}
+
+/* Function to validate all the databases and generate publication/slot names
+ * when using '--all-databases'.
+ */
+static void
+validate_databases(struct CreateSubscriberOptions *opt)
+{
+	/* Check for conflicting options */
+	if (opt->all_databases && opt->database_names.head != NULL)
+	{
+		pg_log_error("cannot specify both --all-databases and -d/--database");
+		exit(1);
+	}
+
+	/* Auto-generate parameters if using --all-databases */
+	if (opt->all_databases)
+	{
+		/* Generate publication and slot names if not specified */
+		SimpleStringListCell *cell;
+
+		fetch_all_databases(opt);
+
+		cell = opt->database_names.head;
+
+		while (cell != NULL)
+		{
+			char		slot_name[NAMEDATALEN];
+
+			snprintf(slot_name, sizeof(slot_name), "%s_slot", cell->val);
+			simple_string_list_append(&opt->replslot_names, slot_name);
+
+			snprintf(slot_name, sizeof(slot_name), "%s_pub", cell->val);
+			simple_string_list_append(&opt->pub_names, slot_name);
+
+			cell = cell->next;
+		}
+	}
+}
+
 int
 main(int argc, char **argv)
 {
 	static struct option long_options[] =
 	{
+		{"all-databases", no_argument, NULL, 'a'},
 		{"database", required_argument, NULL, 'd'},
 		{"pgdata", required_argument, NULL, 'D'},
 		{"dry-run", no_argument, NULL, 'n'},
@@ -1927,6 +2024,7 @@ main(int argc, char **argv)
 	opt.socket_dir = NULL;
 	opt.sub_port = DEFAULT_SUB_PORT;
 	opt.sub_username = NULL;
+	opt.all_databases = false;
 	opt.database_names = (SimpleStringList)
 	{
 		0
@@ -1949,11 +2047,15 @@ main(int argc, char **argv)
 
 	get_restricted_token();
 
-	while ((c = getopt_long(argc, argv, "d:D:np:P:s:t:U:v",
+	while ((c = getopt_long(argc, argv, "ad:D:np:P:s:t:U:v",
 							long_options, &option_index)) != -1)
 	{
 		switch (c)
 		{
+			case 'a':
+				opt.all_databases = true;
+				break;
+
 			case 'd':
 				if (!simple_string_list_member(&opt.database_names, optarg))
 				{
@@ -2091,6 +2193,10 @@ main(int argc, char **argv)
 	pg_log_info("validating subscriber connection string");
 	sub_base_conninfo = get_sub_conninfo(&opt);
 
+	/* Validate and process database options */
+	if (opt.all_databases)
+		validate_databases(&opt);
+
 	if (opt.database_names.head == NULL)
 	{
 		pg_log_info("no database was specified");
@@ -2117,6 +2223,8 @@ main(int argc, char **argv)
 		}
 	}
 
+
+
 	/* Number of object names must match number of databases */
 	if (num_pubs > 0 && num_pubs != num_dbs)
 	{
diff --git a/src/bin/pg_basebackup/t/040_pg_createsubscriber.pl b/src/bin/pg_basebackup/t/040_pg_createsubscriber.pl
index c8dbdb7e9b..90874ae909 100644
--- a/src/bin/pg_basebackup/t/040_pg_createsubscriber.pl
+++ b/src/bin/pg_basebackup/t/040_pg_createsubscriber.pl
@@ -448,10 +448,100 @@ my $sysid_s = $node_s->safe_psql('postgres',
 	'SELECT system_identifier FROM pg_control_system()');
 ok($sysid_p != $sysid_s, 'system identifier was changed');
 
+# Set up node A as primary
+my $node_a = PostgreSQL::Test::Cluster->new('node_a');
+my $aconnstr = $node_a->connstr;
+$node_a->init(allows_streaming => 'logical');
+$node_a->append_conf('postgresql.conf', 'autovacuum = off');
+$node_a->start;
+
+# Set up node B as standby linking to node A
+$node_a->backup('backup_3');
+my $node_b = PostgreSQL::Test::Cluster->new('node_b');
+$node_b->init_from_backup($node_a, 'backup_3', has_streaming => 1);
+$node_b->append_conf(
+	'postgresql.conf', qq[
+primary_conninfo = '$aconnstr dbname=postgres'
+hot_standby_feedback = on
+max_logical_replication_workers = 5
+]);
+$node_b->set_standby_mode();
+$node_b->start;
+
+# Fetch the count of non-template databases on the publisher before
+# running pg_createsubscriber without --all-databases option
+my $db_count_before =
+  $node_a->safe_psql('postgres',
+	"SELECT count(*) FROM pg_database WHERE datistemplate = false;");
+is($db_count_before, '1', 'database count without --all-databases option');
+
+# Ensure there are some user databases on the publisher
+$node_a->safe_psql('postgres', 'CREATE DATABASE db3');
+$node_a->safe_psql('postgres', 'CREATE DATABASE db4');
+
+$node_b->stop;
+
+# run pg_createsubscriber publication and slot and verify the failure
+command_fails(
+	[
+		'pg_createsubscriber',
+		'--verbose',
+		'--pgdata' => $node_b->data_dir,
+		'--publisher-server' => $node_a->connstr('postgres'),
+		'--socketdir' => $node_b->host,
+		'--subscriber-port' => $node_b->port,
+		'--publication' => 'pub1',
+		'--replication-slot' => 'replslot1',
+		'--all-databases',
+	],
+	qr/cannot use --all-databases with --publication or --replication-slot/,
+	'fail if --all-databases is used with publication and slot');
+
+# run pg_createsubscriber '--dbname' and '--all-databases' and verify the
+# conflict issue
+command_fails(
+	[
+		'pg_createsubscriber',
+		'--verbose',
+		'--pgdata' => $node_b->data_dir,
+		'--publisher-server' => $node_a->connstr('postgres'),
+		'--socketdir' => $node_b->host,
+		'--subscriber-port' => $node_b->port,
+		'--dbname' => 'db3',
+		'--all-databases',
+	],
+	qr/cannot use --dbname with --all-databases/,
+	'fail if --dbname is used with --all-databases');
+
+# run pg_createsubscriber with --all-databases option
+command_ok(
+	[
+		'pg_createsubscriber',
+		'--verbose',
+		'--pgdata' => $node_b->data_dir,
+		'--publisher-server' => $node_a->connstr('postgres'),
+		'--socketdir' => $node_b->host,
+		'--subscriber-port' => $node_b->port,
+		'--all-databases',
+	],
+	'run pg_createsubscriber with --all-databases');
+
+$node_b->start;
+# Fetch the count of non-template databases on the subscriber after
+# running pg_createsubscriber with --all-databases option
+my $db_count_after =
+  $node_b->safe_psql('postgres',
+	"SELECT count(*) FROM pg_database WHERE datistemplate = false;");
+is($db_count_after, '3', 'database count with --all-databases option');
+
+$node_b->stop;
+
 # clean up
 $node_p->teardown_node;
 $node_s->teardown_node;
 $node_t->teardown_node;
 $node_f->teardown_node;
+$node_a->teardown_node;
+$node_b->teardown_node;
 
 done_testing();
-- 
2.34.1

