From dea409d093d4be1b844d1103a75f56bcafcfc342 Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Date: Fri, 17 Sep 2021 06:15:33 +0000
Subject: [PATCH v3] improve pg_receivewal code

This patch does following improvements to pg_receivewal.c:

1) Fetch the server system identifier in the StreamLog RunIdentifySystem
call and use it to identify(via pg_receivewal's ReceiveXlogStream)
any unexpected changes that may happen in the server while pg_receivewal
is connected to it. This can be helpful in scenarios when pg_receivewal
tries to reconnect to the server (see the code around pg_usleep
with RECONNECT_SLEEP_TIME) but something unexpected has happenend
in the server that changed the its system identifier. Once the
pg_receivewal establishes the connection to server again, then
the ReceiveXlogStream has a code chunk to compare the system identifier
that we received in the initial connection.
2) Move the RunIdentifySystem to identify timeline id and start LSN
from the server only if the pg_receivewal failed to get them from
FindStreamingStart. This way, an extra IDENTIFY_SYSTEM command is avoided.
3) Place the "replication connection shouldn't have any database name
associated" error check right after RunIdentifySystem so that we can
avoid fetching WAL segment size with RetrieveWalSegSize if at all we
were to fail with that error. This change is similar to what pg_recvlogical.c
does.
4) Move the RetrieveWalSegSize to just before pg_receivewal.c
enters main loop to get the WAL from the server. This avoids an
unnecessary query for pg_receivewal with "--create-slot" or "--drop-slot".
5) Have an assertion after the pg_receivewal done a good amount of
work to find start timeline and LSN might be helpful:
Assert(stream.timeline != 0 && stream.startpos != InvalidXLogRecPtr);
---
 src/bin/pg_basebackup/pg_receivewal.c | 34 ++++++++++++++-------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/bin/pg_basebackup/pg_receivewal.c b/src/bin/pg_basebackup/pg_receivewal.c
index 9d1843728d..1c094040fc 100644
--- a/src/bin/pg_basebackup/pg_receivewal.c
+++ b/src/bin/pg_basebackup/pg_receivewal.c
@@ -47,7 +47,6 @@ static bool synchronous = false;
 static char *replication_slot = NULL;
 static XLogRecPtr endpos = InvalidXLogRecPtr;
 
-
 static void usage(void);
 static DIR *get_destination_dir(char *dest_folder);
 static void close_destination_dir(DIR *dest_dir, char *dest_folder);
@@ -399,7 +398,7 @@ StreamLog(void)
 	 * at the same time, necessary if not valid data can be found in the
 	 * existing output directory.
 	 */
-	if (!RunIdentifySystem(conn, NULL, &servertli, &serverpos, NULL))
+	if (!RunIdentifySystem(conn, &stream.sysidentifier, &servertli, &serverpos, NULL))
 		exit(1);
 
 	/*
@@ -412,6 +411,8 @@ StreamLog(void)
 		stream.timeline = servertli;
 	}
 
+	Assert(stream.timeline != 0 && stream.startpos != InvalidXLogRecPtr);
+
 	/*
 	 * Always start streaming at the beginning of a segment
 	 */
@@ -449,6 +450,7 @@ StreamLog(void)
 
 	FreeWalDirectoryMethod();
 	pg_free(stream.walmethod);
+	pg_free(stream.sysidentifier);
 }
 
 /*
@@ -687,20 +689,6 @@ main(int argc, char **argv)
 	if (!RunIdentifySystem(conn, NULL, NULL, NULL, &db_name))
 		exit(1);
 
-	/*
-	 * Set umask so that directories/files are created with the same
-	 * permissions as directories/files in the source data directory.
-	 *
-	 * pg_mode_mask is set to owner-only by default and then updated in
-	 * GetConnection() where we get the mode from the server-side with
-	 * RetrieveDataDirCreatePerm() and then call SetDataDirectoryCreatePerm().
-	 */
-	umask(pg_mode_mask);
-
-	/* determine remote server's xlog segment size */
-	if (!RetrieveWalSegSize(conn))
-		exit(1);
-
 	/*
 	 * Check that there is a database associated with connection, none should
 	 * be defined in this context.
@@ -712,6 +700,16 @@ main(int argc, char **argv)
 		exit(1);
 	}
 
+	/*
+	 * Set umask so that directories/files are created with the same
+	 * permissions as directories/files in the source data directory.
+	 *
+	 * pg_mode_mask is set to owner-only by default and then updated in
+	 * GetConnection() where we get the mode from the server-side with
+	 * RetrieveDataDirCreatePerm() and then call SetDataDirectoryCreatePerm().
+	 */
+	umask(pg_mode_mask);
+
 	/*
 	 * Drop a replication slot.
 	 */
@@ -737,6 +735,10 @@ main(int argc, char **argv)
 		exit(0);
 	}
 
+	/* determine remote server's xlog segment size */
+	if (!RetrieveWalSegSize(conn))
+		exit(1);
+
 	/*
 	 * Don't close the connection here so that subsequent StreamLog() can
 	 * reuse it.
-- 
2.25.1

