From d31f4a268877f5551f9107e7110791228675e36a Mon Sep 17 00:00:00 2001
From: Daniel Gustafsson <daniel@yesql.se>
Date: Mon, 23 Aug 2021 13:25:26 +0200
Subject: [PATCH v2] Quick exit on log stream child exit in pg_basebackup

If the log streaming child process (thread on Windows) dies during
backup then the whole backup will be aborted at the end of the
backup.  Instead, trap ungraceful termination of the log streaming
child and exit early.
---
 src/bin/pg_basebackup/pg_basebackup.c | 42 ++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 7296eb97d0..1a55c06834 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -174,6 +174,8 @@ static int	bgpipe[2] = {-1, -1};
 /* Handle to child process */
 static pid_t bgchild = -1;
 static bool in_log_streamer = false;
+/* State of child process */
+static volatile sig_atomic_t bgchild_exited = false;
 
 /* End position for xlog streaming, empty string if unknown yet */
 static XLogRecPtr xlogendptr;
@@ -268,6 +270,18 @@ disconnect_atexit(void)
 }
 
 #ifndef WIN32
+/*
+ * If the bgchild exits prematurely and raises a SIGCHLD signal, we can abort
+ * processing rather than wait till the backup has finished and error out at
+ * that time. On Windows we use a background thread which can communicate
+ * without the need for a signal handler.
+ */
+static void
+sigchld_handler(SIGNAL_ARGS)
+{
+	bgchild_exited = true;
+}
+
 /*
  * On windows, our background thread dies along with the process. But on
  * Unix, if we have started a subprocess, we want to kill it off so it
@@ -562,17 +576,28 @@ LogStreamerMain(logstreamer_param *param)
 											  stream.do_sync);
 
 	if (!ReceiveXlogStream(param->bgconn, &stream))
-
+	{
 		/*
 		 * Any errors will already have been reported in the function process,
 		 * but we need to tell the parent that we didn't shutdown in a nice
 		 * way.
 		 */
+#ifdef WIN32
+		/*
+		 * In order to signal the main thread of an ungraceful exit we
+		 * set the same flag that we use on Unix to signal SIGCHLD.
+		 */
+		bgchild_exited = true;
+#endif
 		return 1;
+	}
 
 	if (!stream.walmethod->finish())
 	{
 		pg_log_error("could not finish writing WAL files: %m");
+#ifdef WIN32
+		bgchild_exited = true;
+#endif
 		return 1;
 	}
 
@@ -985,6 +1010,12 @@ ReceiveCopyData(PGconn *conn, WriteDataCallback callback,
 			exit(1);
 		}
 
+		if (bgchild_exited)
+		{
+			pg_log_error("background WAL receiver terminated unexpectedly");
+			exit(1);
+		}
+
 		(*callback) (r, copybuf, callback_data);
 
 		PQfreemem(copybuf);
@@ -2600,6 +2631,15 @@ main(int argc, char **argv)
 	}
 	atexit(disconnect_atexit);
 
+#ifndef WIN32
+	/*
+	 * Trap SIGCHLD to be able to handle the WAL stream process exiting. There
+	 * is no SIGCHLD on Windows, there we rely on the background thread setting
+	 * the signal variable on ungraceful exit.
+	 */
+	pqsignal(SIGCHLD, sigchld_handler);
+#endif
+
 	/*
 	 * Set umask so that directories/files are created with the same
 	 * permissions as directories/files in the source data directory.
-- 
2.24.3 (Apple Git-128)

