Hi,

We have some issues with CI on macos and windows being too expensive (more on
that soon in a separate email), which reminded me of this thread (with
original title: [1])

I've attached a somewhat cleaned up version of the patch to cache initdb
across runs.  The results are still fairly impressive in my opinion.


One thing I do not like, but don't have a good idea for how to improve, is
that there's a bunch of duplicated logic in pg_regress.c and Cluster.pm. I've
tried to move that into initdb.c itself, but that ends up pretty ugly, because
we need to be a lot more careful about checking whether options are compatible
etc. I've also thought about just putting this into a separate perl script,
but right now we still allow basic regression tests without perl being
available.  So I concluded that for now just having the copies is the best
answer.


Times for running all tests under meson, on my workstation (20 cores / 40
threads):

cassert build -O2:

Before:
real    0m44.638s
user    7m58.780s
sys     2m48.773s

After:
real    0m38.938s
user    2m37.615s
sys     2m0.570s


cassert build -O0:

Before:
real    1m11.290s
user    13m9.817s
sys     2m54.946s

After:
real    1m2.959s
user    3m5.835s
sys     1m59.887s


non-cassert build:

Before:
real    0m34.579s
user    5m30.418s
sys     2m40.507s

After:
real    0m27.710s
user    2m20.644s
sys     1m55.770s


On CI this reduces the test times substantially:
Freebsd                           8:51 -> 5:35
Debian w/ asan, autoconf          6:43 -> 4:55
Debian w/ alignmentsan, ubsan     4:02 -> 2:33
macos                             5:07 -> 4:29
windows                          10:21 -> 9:49

This is ignoring a bit of run-to-run variance, but the trend is obvious enough
that it's not worth worrying about that.

Greetings,

Andres Freund

[1] https://postgr.es/m/20220120021859.3zpsfqn4z7ob7afz%40alap3.anarazel.de
>From 0fd431c277f01284a91999a04368de6b59b6691e Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Thu, 2 Feb 2023 21:51:53 -0800
Subject: [PATCH v3 1/2] Use "template" initdb in tests

Discussion: https://postgr.es/m/20220120021859.3zpsfqn4z7ob7...@alap3.anarazel.de
---
 meson.build                              | 30 ++++++++++
 .cirrus.yml                              |  3 +-
 src/test/perl/PostgreSQL/Test/Cluster.pm | 46 ++++++++++++++-
 src/test/regress/pg_regress.c            | 74 ++++++++++++++++++------
 src/Makefile.global.in                   | 52 +++++++++--------
 5 files changed, 161 insertions(+), 44 deletions(-)

diff --git a/meson.build b/meson.build
index 04ea3488522..47429a18c3f 100644
--- a/meson.build
+++ b/meson.build
@@ -3056,8 +3056,10 @@ testport = 40000
 test_env = environment()
 
 temp_install_bindir = test_install_location / get_option('bindir')
+test_initdb_template = meson.build_root() / 'tmp_install' / 'initdb-template'
 test_env.set('PG_REGRESS', pg_regress.full_path())
 test_env.set('REGRESS_SHLIB', regress_module.full_path())
+test_env.set('INITDB_TEMPLATE', test_initdb_template)
 
 # Test suites that are not safe by default but can be run if selected
 # by the user via the whitespace-separated list in variable PG_TEST_EXTRA.
@@ -3072,6 +3074,34 @@ if library_path_var != ''
 endif
 
 
+# Create (and remove old) initdb template directory. Tests use that, where
+# possible, to make it cheaper to run tests.
+#
+# Use python to remove the old cached initdb, as we cannot rely on a working
+# 'rm' binary on windows.
+test('initdb_cache',
+     python,
+     args: [
+       '-c', '''
+import shutil
+import sys
+import subprocess
+
+shutil.rmtree(sys.argv[1], ignore_errors=True)
+sp = subprocess.run(sys.argv[2:] + [sys.argv[1]])
+sys.exit(sp.returncode)
+''',
+       test_initdb_template,
+       temp_install_bindir / 'initdb',
+       '-A', 'trust', '-N', '--no-instructions'
+     ],
+     priority: setup_tests_priority - 1,
+     timeout: 300,
+     is_parallel: false,
+     env: test_env,
+     suite: ['setup'])
+
+
 
 ###############################################################
 # Test Generation
diff --git a/.cirrus.yml b/.cirrus.yml
index d260f15c4e2..8fce17dff08 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -115,8 +115,9 @@ task:
   test_minimal_script: |
     su postgres <<-EOF
       ulimit -c unlimited
+      meson test $MTEST_ARGS --suite setup
       meson test $MTEST_ARGS --num-processes ${TEST_JOBS} \
-        tmp_install cube/regress pg_ctl/001_start_stop
+        cube/regress pg_ctl/001_start_stop
     EOF
 
   on_failure:
diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm
index 5e161dbee60..4d449c35de9 100644
--- a/src/test/perl/PostgreSQL/Test/Cluster.pm
+++ b/src/test/perl/PostgreSQL/Test/Cluster.pm
@@ -522,8 +522,50 @@ sub init
 	mkdir $self->backup_dir;
 	mkdir $self->archive_dir;
 
-	PostgreSQL::Test::Utils::system_or_bail('initdb', '-D', $pgdata, '-A',
-		'trust', '-N', @{ $params{extra} });
+	# If available and if there aren't any parameters, use a previously
+	# initdb'd cluster as a template by copying it. For a lot of tests, that's
+	# substantially cheaper. Do so only if there aren't parameters, it doesn't
+	# seem worth figuring out whether they affect compatibility.
+	#
+	# There's very similar code in pg_regress.c, but we can't easily
+	# deduplicate it until we require perl at build time.
+	if (defined $params{extra} or !defined $ENV{INITDB_TEMPLATE})
+	{
+		note("initializing database system by running initdb");
+		PostgreSQL::Test::Utils::system_or_bail('initdb', '-D', $pgdata, '-A',
+			'trust', '-N', @{ $params{extra} });
+	}
+	else
+	{
+		my @copycmd;
+		my $expected_exitcode;
+
+		note("initializing database system by copying initdb template");
+
+		if ($PostgreSQL::Test::Utils::windows_os)
+		{
+			@copycmd = qw(robocopy /E /NJS /NJH /NFL /NDL /NP);
+			$expected_exitcode = 1;    # 1 denotes files were copied
+		}
+		else
+		{
+			@copycmd = qw(cp -a);
+			$expected_exitcode = 0;
+		}
+
+		@copycmd = (@copycmd, $ENV{INITDB_TEMPLATE}, $pgdata);
+
+		my $ret = PostgreSQL::Test::Utils::system_log(@copycmd);
+
+		# See http://perldoc.perl.org/perlvar.html#%24CHILD_ERROR
+		if ($ret & 127 or $ret >> 8 != $expected_exitcode)
+		{
+			BAIL_OUT(
+				sprintf("failed to execute command \"%s\": $ret",
+					join(" ", @copycmd)));
+		}
+	}
+
 	PostgreSQL::Test::Utils::system_or_bail($ENV{PG_REGRESS},
 		'--config-auth', $pgdata, @{ $params{auth_extra} });
 
diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c
index b68632320a7..407e3915cec 100644
--- a/src/test/regress/pg_regress.c
+++ b/src/test/regress/pg_regress.c
@@ -2295,6 +2295,7 @@ regression_main(int argc, char *argv[],
 		FILE	   *pg_conf;
 		const char *env_wait;
 		int			wait_seconds;
+		const char *initdb_template_dir;
 
 		/*
 		 * Prepare the temp instance
@@ -2316,25 +2317,64 @@ regression_main(int argc, char *argv[],
 		if (!directory_exists(buf))
 			make_directory(buf);
 
-		/* initdb */
 		initStringInfo(&cmd);
-		appendStringInfo(&cmd,
-						 "\"%s%sinitdb\" -D \"%s/data\" --no-clean --no-sync",
-						 bindir ? bindir : "",
-						 bindir ? "/" : "",
-						 temp_instance);
-		if (debug)
-			appendStringInfo(&cmd, " --debug");
-		if (nolocale)
-			appendStringInfo(&cmd, " --no-locale");
-		appendStringInfo(&cmd, " > \"%s/log/initdb.log\" 2>&1", outputdir);
-		fflush(NULL);
-		if (system(cmd.data))
+
+		/*
+		 * Create data directory.
+		 *
+		 * If available, use a previously initdb'd cluster as a template by
+		 * copying it. For a lot of tests, that's substantially cheaper.
+		 *
+		 * There's very similar code in Cluster.pm, but we can't easily de
+		 * duplicate it until we require perl at build time.
+		 */
+		initdb_template_dir = getenv("INITDB_TEMPLATE");
+		if (initdb_template_dir == NULL || nolocale || debug)
 		{
-			bail("initdb failed\n"
-				 "# Examine \"%s/log/initdb.log\" for the reason.\n"
-				 "# Command was: %s",
-				 outputdir, cmd.data);
+			note("initializing database system by running initdb");
+
+			appendStringInfo(&cmd,
+							 "\"%s%sinitdb\" -D \"%s/data\" --no-clean --no-sync",
+							 bindir ? bindir : "",
+							 bindir ? "/" : "",
+							 temp_instance);
+			if (debug)
+				appendStringInfo(&cmd, " --debug");
+			if (nolocale)
+				appendStringInfo(&cmd, " --no-locale");
+			appendStringInfo(&cmd, " > \"%s/log/initdb.log\" 2>&1", outputdir);
+			fflush(NULL);
+			if (system(cmd.data))
+			{
+				bail("initdb failed\n"
+					 "# Examine \"%s/log/initdb.log\" for the reason.\n"
+					 "# Command was: %s",
+					 outputdir, cmd.data);
+			}
+		}
+		else
+		{
+#ifndef WIN32
+			const char *copycmd = "cp -a \"%s\" \"%s/data\"";
+			int			expected_exitcode = 0;
+#else
+			const char *copycmd = "robocopy /E /NJS /NJH /NFL /NDL /NP \"%s\" \"%s/data\"";
+			int			expected_exitcode = 1;	/* 1 denotes files were copied */
+#endif
+
+			note("initializing database system by copying initdb template");
+
+			appendStringInfo(&cmd,
+							 copycmd,
+							 initdb_template_dir,
+							 temp_instance);
+			if (system(cmd.data) != expected_exitcode)
+			{
+				bail("copying of initdb template failed\n"
+					 "# Examine \"%s/log/initdb.log\" for the reason.\n"
+					 "# Command was: %s",
+					 outputdir, cmd.data);
+			}
 		}
 
 		pfree(cmd.data);
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index df9f721a41a..0b4ca0eb6ae 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -397,30 +397,6 @@ check: temp-install
 
 .PHONY: temp-install
 
-temp-install: | submake-generated-headers
-ifndef NO_TEMP_INSTALL
-ifneq ($(abs_top_builddir),)
-ifeq ($(MAKELEVEL),0)
-	rm -rf '$(abs_top_builddir)'/tmp_install
-	$(MKDIR_P) '$(abs_top_builddir)'/tmp_install/log
-	$(MAKE) -C '$(top_builddir)' DESTDIR='$(abs_top_builddir)'/tmp_install install >'$(abs_top_builddir)'/tmp_install/log/install.log 2>&1
-	$(MAKE) -j1 $(if $(CHECKPREP_TOP),-C $(CHECKPREP_TOP),) checkprep >>'$(abs_top_builddir)'/tmp_install/log/install.log 2>&1
-endif
-endif
-endif
-
-# Tasks to run serially at the end of temp-install.  Some EXTRA_INSTALL
-# entries appear more than once in the tree, and parallel installs of the same
-# file can fail with EEXIST.
-checkprep:
-	$(if $(EXTRA_INSTALL),for extra in $(EXTRA_INSTALL); do $(MAKE) -C '$(top_builddir)'/$$extra DESTDIR='$(abs_top_builddir)'/tmp_install install || exit; done)
-
-PROVE = @PROVE@
-# There are common routines in src/test/perl, and some test suites have
-# extra perl modules in their own directory.
-PG_PROVE_FLAGS = -I $(top_srcdir)/src/test/perl/ -I $(srcdir)
-# User-supplied prove flags such as --verbose can be provided in PROVE_FLAGS.
-PROVE_FLAGS =
 
 # prepend to path if already set, else just set it
 define add_to_path
@@ -437,8 +413,36 @@ ld_library_path_var = LD_LIBRARY_PATH
 with_temp_install = \
 	PATH="$(abs_top_builddir)/tmp_install$(bindir):$(CURDIR):$$PATH" \
 	$(call add_to_path,$(strip $(ld_library_path_var)),$(abs_top_builddir)/tmp_install$(libdir)) \
+	INITDB_TEMPLATE='$(abs_top_builddir)'/tmp_install/initdb-template \
 	$(with_temp_install_extra)
 
+temp-install: | submake-generated-headers
+ifndef NO_TEMP_INSTALL
+ifneq ($(abs_top_builddir),)
+ifeq ($(MAKELEVEL),0)
+	rm -rf '$(abs_top_builddir)'/tmp_install
+	$(MKDIR_P) '$(abs_top_builddir)'/tmp_install/log
+	$(MAKE) -C '$(top_builddir)' DESTDIR='$(abs_top_builddir)'/tmp_install install >'$(abs_top_builddir)'/tmp_install/log/install.log 2>&1
+	$(MAKE) -j1 $(if $(CHECKPREP_TOP),-C $(CHECKPREP_TOP),) checkprep >>'$(abs_top_builddir)'/tmp_install/log/install.log 2>&1
+
+	$(with_temp_install) initdb -A trust -N --no-instructions '$(abs_top_builddir)'/tmp_install/initdb-template >>'$(abs_top_builddir)'/tmp_install/log/initdb-template.log 2>&1
+endif
+endif
+endif
+
+# Tasks to run serially at the end of temp-install.  Some EXTRA_INSTALL
+# entries appear more than once in the tree, and parallel installs of the same
+# file can fail with EEXIST.
+checkprep:
+	$(if $(EXTRA_INSTALL),for extra in $(EXTRA_INSTALL); do $(MAKE) -C '$(top_builddir)'/$$extra DESTDIR='$(abs_top_builddir)'/tmp_install install || exit; done)
+
+PROVE = @PROVE@
+# There are common routines in src/test/perl, and some test suites have
+# extra perl modules in their own directory.
+PG_PROVE_FLAGS = -I $(top_srcdir)/src/test/perl/ -I $(srcdir)
+# User-supplied prove flags such as --verbose can be provided in PROVE_FLAGS.
+PROVE_FLAGS =
+
 ifeq ($(enable_tap_tests),yes)
 
 ifndef PGXS
-- 
2.38.0

Reply via email to