From 668772af9bcce2c266bb1d135aa38b3259b83859 Mon Sep 17 00:00:00 2001
From: Israel Barth Rubio <barthisrael@gmail.com>
Date: Tue, 14 Jan 2025 19:49:35 +0000
Subject: [PATCH v2] pg_combinebackup: add support for hard links

Up to now, pg_combinebackup reconstructs incremental files, if needed,
otherwise copy them from any of the input backups to the output
directory. That copy mecanism can use different methods, depending on
the argument specified by the user.

This commit adds support for a new "copy method": hard links
(-k/--link). When using that mode, instead of copying unmodified files
from the input backups to the output directory, pg_combinebackup
creates the files as hard links from the output directory to the input
backups.

The new link method might speed up the reconstruction of the synthetic
backup (no file copy) and reduce disk usage taken by the synthetic
backup. The benefits depend on the modification pattern of files in
PGDATA between backups, imposed by the workload on Postgres.

This feature requires that the input backups plus the output directory
are in the same file system. Also, caution is required from the user
when modifying or starting the cluster from a synthetic backup, as that
might invalidate one or more of the input backups.

Signed-off-by: Israel Barth Rubio <barthisrael@gmail.com>
---
 doc/src/sgml/ref/pg_combinebackup.sgml      |  57 +++++++-
 src/bin/pg_combinebackup/copy_file.c        |  25 ++++
 src/bin/pg_combinebackup/copy_file.h        |   1 +
 src/bin/pg_combinebackup/pg_combinebackup.c |  19 ++-
 src/bin/pg_combinebackup/t/003_timeline.pl  |  35 ++++-
 src/bin/pg_combinebackup/t/010_links.pl     | 139 ++++++++++++++++++++
 6 files changed, 269 insertions(+), 7 deletions(-)
 create mode 100644 src/bin/pg_combinebackup/t/010_links.pl

diff --git a/doc/src/sgml/ref/pg_combinebackup.sgml b/doc/src/sgml/ref/pg_combinebackup.sgml
index 091982f62a..185cf6fff4 100644
--- a/doc/src/sgml/ref/pg_combinebackup.sgml
+++ b/doc/src/sgml/ref/pg_combinebackup.sgml
@@ -137,13 +137,38 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>-k</option></term>
+      <term><option>--link</option></term>
+      <listitem>
+       <para>
+        Use hard links instead of copying files to the synthetic backup.
+        Reconstruction of the synthetic backup might be faster (no file copying)
+        and use less disk space.
+       </para>
+
+       <para>
+        Requires that the input backups and the output directory are in the
+        same file system.
+       </para>
+
+       <para>
+        If a backup manifest is not available or does not contain checksum of
+        the right type, file cloning will be used to copy the file, but the
+        file will be also read block-by-block for the checksum calculation.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--clone</option></term>
       <listitem>
        <para>
         Use efficient file cloning (also known as <quote>reflinks</quote> on
         some systems) instead of copying files to the new data directory,
-        which can result in near-instantaneous copying of the data files.
+        which can result in near-instantaneous copying of the data files, giving
+        the speed advantages of <option>-k</option>/<option>--link</option>
+        while leaving the input backups untouched.
        </para>
 
        <para>
@@ -167,7 +192,8 @@ PostgreSQL documentation
       <listitem>
        <para>
         Perform regular file copy.  This is the default.  (See also
-        <option>--copy-file-range</option> and <option>--clone</option>.)
+        <option>--copy-file-range</option>, <option>--clone</option> and
+        <option>-k</option>/<option>--link</option>.)
        </para>
       </listitem>
      </varlistentry>
@@ -308,6 +334,33 @@ PostgreSQL documentation
   </para>
  </refsect1>
 
+ <refsect1>
+  <title>Notes</title>
+
+  <para>
+   When <application>pg_combinebackup</application> is run with
+   <option>-k</option>/<option>--link</option>, the output synthetic backup may
+   share several files with the input backups because of the hard links it
+   creates. Any changes performed on files that were linked between any of the
+   input backups and the synthetic backup, will be reflected on both places.
+  </para>
+
+  <caution>
+   <para>
+    Modifying files or starting a cluster from a synthetic backup that was
+    reconstructed with <option>-k</option>/<option>--link</option>, might
+    invalidate one or more of the inputs backups that were used to reconstruct
+    it.
+   </para>
+
+   <para>
+    If the input directories are not staging directories, it is recommended to
+    move the synthetic backup to another file system or machine before
+    performing changes to its files and/or starting the cluster.
+   </para>
+  </caution>
+ </refsect1>
+
  <refsect1>
   <title>See Also</title>
 
diff --git a/src/bin/pg_combinebackup/copy_file.c b/src/bin/pg_combinebackup/copy_file.c
index 4e27814839..8687e23fde 100644
--- a/src/bin/pg_combinebackup/copy_file.c
+++ b/src/bin/pg_combinebackup/copy_file.c
@@ -40,6 +40,9 @@ static void copy_file_copyfile(const char *src, const char *dst,
 							   pg_checksum_context *checksum_ctx);
 #endif
 
+static void copy_file_link(const char *src, const char *dest,
+						   pg_checksum_context *checksum_ctx);
+
 /*
  * Copy a regular file, optionally computing a checksum, and emitting
  * appropriate debug messages. But if we're in dry-run mode, then just emit
@@ -93,6 +96,10 @@ copy_file(const char *src, const char *dst,
 			strategy_implementation = copy_file_copyfile;
 			break;
 #endif
+		case COPY_METHOD_LINK:
+			strategy_name = "link";
+			strategy_implementation = copy_file_link;
+			break;
 	}
 
 	if (dry_run)
@@ -304,3 +311,21 @@ copy_file_copyfile(const char *src, const char *dst,
 	checksum_file(src, checksum_ctx);
 }
 #endif							/* WIN32 */
+
+/*
+ * copy_file_link
+ * 		Hard-links a file from src to dest.
+ *
+ * If needed, also reads the file and calculates the checksum.
+ */
+static void
+copy_file_link(const char *src, const char *dest,
+			   pg_checksum_context *checksum_ctx)
+{
+	if (link(src, dest) < 0)
+		pg_fatal("error while linking file from \"%s\" to \"%s\": %m",
+				 src, dest);
+
+	/* if needed, calculate checksum of the file */
+	checksum_file(src, checksum_ctx);
+}
diff --git a/src/bin/pg_combinebackup/copy_file.h b/src/bin/pg_combinebackup/copy_file.h
index 92f104115b..5a8517629c 100644
--- a/src/bin/pg_combinebackup/copy_file.h
+++ b/src/bin/pg_combinebackup/copy_file.h
@@ -25,6 +25,7 @@ typedef enum CopyMethod
 #ifdef WIN32
 	COPY_METHOD_COPYFILE,
 #endif
+	COPY_METHOD_LINK,
 } CopyMethod;
 
 extern void copy_file(const char *src, const char *dst,
diff --git a/src/bin/pg_combinebackup/pg_combinebackup.c b/src/bin/pg_combinebackup/pg_combinebackup.c
index 5864ec574f..16b6715d7c 100644
--- a/src/bin/pg_combinebackup/pg_combinebackup.c
+++ b/src/bin/pg_combinebackup/pg_combinebackup.c
@@ -135,6 +135,7 @@ main(int argc, char *argv[])
 		{"no-sync", no_argument, NULL, 'N'},
 		{"output", required_argument, NULL, 'o'},
 		{"tablespace-mapping", required_argument, NULL, 'T'},
+		{"link", no_argument, NULL, 'k'},
 		{"manifest-checksums", required_argument, NULL, 1},
 		{"no-manifest", no_argument, NULL, 2},
 		{"sync-method", required_argument, NULL, 3},
@@ -172,7 +173,7 @@ main(int argc, char *argv[])
 	opt.copy_method = COPY_METHOD_COPY;
 
 	/* process command-line options */
-	while ((c = getopt_long(argc, argv, "dnNo:T:",
+	while ((c = getopt_long(argc, argv, "dnNo:T:k",
 							long_options, &optindex)) != -1)
 	{
 		switch (c)
@@ -193,6 +194,9 @@ main(int argc, char *argv[])
 			case 'T':
 				add_tablespace_mapping(&opt, optarg);
 				break;
+			case 'k':
+				opt.copy_method = COPY_METHOD_LINK;
+				break;
 			case 1:
 				if (!pg_checksum_parse_type(optarg,
 											&opt.manifest_checksums))
@@ -424,6 +428,18 @@ main(int argc, char *argv[])
 		}
 	}
 
+	/* Warn about the possibility of compromising the backups, when link mode */
+	if (opt.copy_method == COPY_METHOD_LINK)
+	{
+		pg_log_warning("--link mode was used; any modifications to the output "
+					   "directory may destructively modify input directories");
+		pg_log_warning_hint("If the input directories are not staging "
+							"directories, it is recommended to move the output"
+							"directory to another file system or machine "
+							"before performing changes to the files and/or "
+							"starting the cluster");
+	}
+
 	/* It's a success, so don't remove the output directories. */
 	reset_directory_cleanup_list();
 	exit(0);
@@ -766,6 +782,7 @@ help(const char *progname)
 	printf(_("  -o, --output=DIRECTORY    output directory\n"));
 	printf(_("  -T, --tablespace-mapping=OLDDIR=NEWDIR\n"
 			 "                            relocate tablespace in OLDDIR to NEWDIR\n"));
+	printf(_("  -k, --link                link files instead of copying\n"));
 	printf(_("      --clone               clone (reflink) files instead of copying\n"));
 	printf(_("      --copy                copy files (default)\n"));
 	printf(_("      --copy-file-range     copy using copy_file_range() system call\n"));
diff --git a/src/bin/pg_combinebackup/t/003_timeline.pl b/src/bin/pg_combinebackup/t/003_timeline.pl
index 0205a59f92..cce551276c 100644
--- a/src/bin/pg_combinebackup/t/003_timeline.pl
+++ b/src/bin/pg_combinebackup/t/003_timeline.pl
@@ -81,6 +81,20 @@ $node2->command_ok(
 	],
 	"incremental backup from node2");
 
+# Verify all the backups at this point, when mode is --link.
+# In that case, the next init_from_backup runs pg_combinebackup with --link and
+# then changes the linked postgresql.conf, causing future executions of
+# pg_verifybackup to fail because of a mismatch in the size of that file.
+if ($mode eq "--link")
+{
+	for my $backup_name (qw(backup1 backup2 backup3))
+	{
+		$node1->command_ok(
+			[ 'pg_verifybackup', $node1->backup_dir . '/' . $backup_name ],
+			"verify backup $backup_name");
+	}
+}
+
 # Restore the incremental backup and use it to create a new node.
 my $node3 = PostgreSQL::Test::Cluster->new('node3');
 $node3->init_from_backup(
@@ -100,12 +114,25 @@ select string_agg(a::text, ':'), string_agg(b, ':') from mytable;
 EOM
 is($result, '1:2:4:5|aardvark:beetle:dingo:elephant');
 
-# Let's also verify all the backups.
+# Let's also verify all the backups, now for any mode.
 for my $backup_name (qw(backup1 backup2 backup3))
 {
-	$node1->command_ok(
-		[ 'pg_verifybackup', $node1->backup_dir . '/' . $backup_name ],
-		"verify backup $backup_name");
+	my $cmd = [ 'pg_verifybackup', $node1->backup_dir . '/' . $backup_name ];
+	my $msg = "verify backup $backup_name";
+
+	# We expect verify backup3 to fail, when mode is --link. For more details,
+	# refer to a previous comment related with pg_verifybackup execution.
+	if ($mode eq "--link" && $backup_name eq "backup3")
+	{
+		$node1->command_fails_like(
+			$cmd,
+			qr/"postgresql\.conf" has size \d+ on disk but size \d+ in the manifest/,
+			$msg);
+	}
+	else
+	{
+		$node1->command_ok($cmd, $msg);
+	}
 }
 
 # OK, that's all.
diff --git a/src/bin/pg_combinebackup/t/010_links.pl b/src/bin/pg_combinebackup/t/010_links.pl
new file mode 100644
index 0000000000..3b5e8dc71d
--- /dev/null
+++ b/src/bin/pg_combinebackup/t/010_links.pl
@@ -0,0 +1,139 @@
+# Copyright (c) 2021-2025, PostgreSQL Global Development Group
+#
+# This test aims to validate that hard-links are created as expected in the
+# output directory, when running pg_combinebackup with --link mode.
+
+use strict;
+use warnings FATAL => 'all';
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+use File::Find;
+use File::Spec;
+use File::stat;
+
+# Set up a new database instance.
+my $primary = PostgreSQL::Test::Cluster->new('primary');
+$primary->init(has_archiving => 1, allows_streaming => 1);
+$primary->append_conf('postgresql.conf', 'summarize_wal = on');
+$primary->start;
+
+# Create some tables.
+$primary->safe_psql('postgres', <<EOM);
+CREATE TABLE test_1 AS SELECT generate_series(1, 1000000);
+CREATE TABLE test_2 AS SELECT generate_series(1, 1000000);
+CREATE TABLE test_3 AS SELECT generate_series(1, 1000000);
+EOM
+
+# Fetch information about the data files.
+my $query = "SELECT pg_relation_filepath(oid) FROM pg_class WHERE relname = '%s';";
+
+my $pg_attribute_path = $primary->safe_psql('postgres', sprintf($query, 'pg_attribute'));
+note "pg_attribute path is $pg_attribute_path";
+my $pg_class_path = $primary->safe_psql('postgres', sprintf($query, 'pg_class'));
+note "pg_class path is $pg_class_path";
+my $pg_statistic_path = $primary->safe_psql('postgres', sprintf($query, 'pg_statistic'));
+note "pg_statistic path is $pg_statistic_path";
+my $test_1_path = $primary->safe_psql('postgres', sprintf($query, 'test_1'));
+note "test_1 path is $test_1_path";
+my $test_2_path = $primary->safe_psql('postgres', sprintf($query, 'test_2'));
+note "test_2 path is $test_2_path";
+my $test_3_path = $primary->safe_psql('postgres', sprintf($query, 'test_3'));
+note "test_3 path is $test_3_path";
+
+# Take a full backup.
+my $backup1path = $primary->backup_dir . '/backup1';
+$primary->command_ok(
+	[
+		'pg_basebackup',
+		'--pgdata' => $backup1path,
+		'--no-sync',
+		'--checkpoint' => 'fast',
+        '--wal-method' => 'none'
+	],
+	"full backup");
+
+# Perform an update that touches the whole file.
+$primary->safe_psql('postgres', <<EOM);
+UPDATE test_2
+SET generate_series = generate_series + 1;
+EOM
+
+# Perform an insert that touches a single page.
+$primary->safe_psql('postgres', <<EOM);
+INSERT INTO test_3 (generate_series) VALUES (1);
+EOM
+
+# Take an incremental backup.
+my $backup2path = $primary->backup_dir . '/backup2';
+$primary->command_ok(
+	[
+		'pg_basebackup',
+		'--pgdata' => $backup2path,
+		'--no-sync',
+		'--checkpoint' => 'fast',
+        '--wal-method' => 'none',
+		'--incremental' => $backup1path . '/backup_manifest'
+	],
+	"incremental backup");
+
+# Restore the incremental backup and use it to create a new node.
+my $restore = PostgreSQL::Test::Cluster->new('restore');
+$restore->init_from_backup(
+	$primary, 'backup2',
+	combine_with_prior => ['backup1'],
+	combine_mode => '--link');
+
+# Ensure files have the expected counter of hard-links.
+# We expect all files to have 2 hard-links in this case, except for:
+# 
+# * backup_label and backup_manifest: the backups were taken in different
+#   LSNs and with different contents on the test tables.
+# * pg_attribute, pg_class and pg_statistic might have been modified in the
+#   meantime, so they can have 1 or 2 hard-links.
+# * data file of table test_3 is different because of changes in a page.
+
+# Directory to traverse
+my $backup_dest = $restore->data_dir;
+
+# Set of non-linked files (these are the ones with 1 hard link)
+my %non_linked_files = map { $_ => 1 } (
+    File::Spec->catfile($restore->data_dir, $test_3_path),
+    File::Spec->catfile($restore->data_dir, 'backup_manifest'),
+    File::Spec->catfile($restore->data_dir, 'backup_label')
+);
+
+# Set of linked or non-linked files (these are the ones that may be with 1 or 2
+# hard links)
+my %linked_or_non_linked_files = map { $_ => 1 } (
+    File::Spec->catfile($restore->data_dir, $pg_attribute_path),
+    File::Spec->catfile($restore->data_dir, $pg_class_path),
+    File::Spec->catfile($restore->data_dir, $pg_statistic_path),
+);
+
+# Recursively traverse the directory
+find(sub {
+    my $file = $File::Find::name;
+    
+    # Skip directories
+    return if -d $file;
+    
+    # Get the file's stat information
+    my $stat = stat($file);
+    my $nlink_count = $stat->nlink;
+
+    # Check if the file's exact path matches any in the non_linked_files set
+    if (exists $non_linked_files{$file}) {
+        # Non-linked files should have 1 hard link
+        ok($nlink_count == 1, "File '$file' has 1 hard link");
+    } elsif (exists $linked_or_non_linked_files{$file}) {
+        # These files can have either 1 or 2 hard link
+        ok($nlink_count == 1 || $nlink_count == 2, "File '$file' has 1 or 2 hard link");
+    } else {
+        # Other files should have 2 hard links
+        ok($nlink_count == 2, "File '$file' has 2 hard links");
+    }
+}, $backup_dest);
+
+# OK, that's all.
+done_testing();
-- 
2.43.5

