From 3cce74e0a8124e2c7c3745e961b99e4700677692 Mon Sep 17 00:00:00 2001
From: Matthias van de Meent <boekewurm+postgres@gmail.com>
Date: Tue, 11 Jun 2024 10:24:00 +0200
Subject: [PATCH] pg_upgrade: use CREATE DATABASE ... STRATEGY=FILE_COPY

While the strategy is considered very costly due to expensive checkpoints
when the target system has configured large shared_buffers, the issues that
those checkpoints protect against don't apply in a pg_upgrade system.

By skipping the checkpoints during CREATE DATABASE ... STRATEGY=FILE_COPY
and using that strategy for pg_upgrade, we can save a lot of time and IO
that would otherwise be spent on reading pages from disk, generating WAL
records, writing those WAL records to disk, and then writing the pages to
disk - which has the potential of at least double the IO of FILE_COPY.

Author: Nathan Bossart
Author: Matthias van de Meent
---
 src/backend/commands/dbcommands.c | 20 +++++++++++++++++---
 src/bin/pg_dump/pg_dump.c         |  4 +++-
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index be629ea92c..04ad6f0b34 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -563,9 +563,16 @@ CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dst_dboid, Oid src_tsid,
 	 * happened while we're copying files, a file might be deleted just when
 	 * we're about to copy it, causing the lstat() call in copydir() to fail
 	 * with ENOENT.
+	 *
+	 * While we're in binary upgrade mode we have full control over which
+	 * databases we access. As the template database is always going to be
+	 * template0, which doesn't allow connections, we can be sure that that
+	 * database won't have any modified pages in shared memory, thus we can
+	 * omit this checkpoint.
 	 */
-	RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE |
-					  CHECKPOINT_WAIT | CHECKPOINT_FLUSH_ALL);
+	if (!IsBinaryUpgrade)
+		RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE |
+						  CHECKPOINT_WAIT | CHECKPOINT_FLUSH_ALL);
 
 	/*
 	 * Iterate through all tablespaces of the template database, and copy each
@@ -657,10 +664,17 @@ CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dst_dboid, Oid src_tsid,
 	 * seem to be much we can do about that except document it as a
 	 * limitation.
 	 *
+	 * In binary upgrade mode, neither of these are a concern: generated WAL
+	 * is dropped, and the template0 database (which is the only source
+	 * database used in binary upgrade) can't have concurrent modifications,
+	 * so we skip this checkpoint.
+	 *
 	 * See CreateDatabaseUsingWalLog() for a less cheesy CREATE DATABASE
 	 * strategy that avoids these problems.
 	 */
-	RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
+	if (!IsBinaryUpgrade)
+		RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE |
+						  CHECKPOINT_WAIT);
 }
 
 /*
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index cb14fcafea..aec702675c 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -3126,7 +3126,9 @@ dumpDatabase(Archive *fout)
 	 */
 	if (dopt->binary_upgrade)
 	{
-		appendPQExpBuffer(creaQry, "CREATE DATABASE %s WITH TEMPLATE = template0 OID = %u",
+		appendPQExpBuffer(creaQry,
+						  "CREATE DATABASE %s WITH TEMPLATE = template0 OID = %u "
+						  "STRATEGY = FILE_COPY",
 						  qdatname, dbCatId.oid);
 	}
 	else
-- 
2.40.1

