From dcd2530630c2fee0ee5b3cbf9508430a7019d193 Mon Sep 17 00:00:00 2001
From: Israel Barth Rubio <barthisrael@gmail.com>
Date: Wed, 19 Feb 2025 03:26:30 +0000
Subject: [PATCH v5] pg_combinebackup: add support for hard links

Up to now, pg_combinebackup reconstructs incremental files, if needed,
otherwise copy them from any of the input backups to the output
directory. That copy mecanism can use different methods, depending on
the argument specified by the user.

This commit adds support for a new "copy method": hard links
(-k/--link). When using that mode, instead of copying unmodified files
from the input backups to the output directory, pg_combinebackup
creates the files as hard links from the output directory to the input
backups.

The new link method might speed up the reconstruction of the synthetic
backup (no file copy) and reduce disk usage taken by the synthetic
backup. The benefits depend on the modification pattern of files in
PGDATA between backups, imposed by the workload on Postgres.

This feature requires that the input backups plus the output directory
are in the same file system. Also, caution is required from the user
when modifying or starting the cluster from a synthetic backup, as that
might invalidate one or more of the input backups.

Signed-off-by: Israel Barth Rubio <barthisrael@gmail.com>
---
 doc/src/sgml/ref/pg_combinebackup.sgml      |  32 ++-
 src/bin/pg_combinebackup/copy_file.c        |  33 ++-
 src/bin/pg_combinebackup/copy_file.h        |   1 +
 src/bin/pg_combinebackup/meson.build        |   1 +
 src/bin/pg_combinebackup/pg_combinebackup.c |  12 +-
 src/bin/pg_combinebackup/t/010_links.pl     | 228 ++++++++++++++++++++
 6 files changed, 304 insertions(+), 3 deletions(-)
 create mode 100644 src/bin/pg_combinebackup/t/010_links.pl
diff --git a/doc/src/sgml/ref/pg_combinebackup.sgml b/doc/src/sgml/ref/pg_combinebackup.sgml
index 091982f62a..55bc46849d 100644
--- a/doc/src/sgml/ref/pg_combinebackup.sgml
+++ b/doc/src/sgml/ref/pg_combinebackup.sgml
@@ -137,6 +137,35 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>-k</option></term>
+      <term><option>--link</option></term>
+      <listitem>
+       <para>
+        Use hard links instead of copying files to the synthetic backup.
+        Reconstruction of the synthetic backup might be faster (no file copying)
+        and use less disk space, but care must be taken when using the output
+        directory, because any modifications to that directory (for example,
+        starting the server) can also affect the input directories. Likewise,
+        changes to the input directories (for example, starting the server on
+        the full backup) could affect the output directory. Thus, this option
+        is best used when the input directories are only copies that will be
+        removed after <application>pg_combinebackup</application> has completed.
+       </para>
+
+       <para>
+        Requires that the input backups and the output directory are in the
+        same file system.
+       </para>
+
+       <para>
+        If a backup manifest is not available or does not contain checksum of
+        the right type, hard links will still be created, but the file will be
+        also read block-by-block for the checksum calculation.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--clone</option></term>
       <listitem>
@@ -167,7 +196,8 @@ PostgreSQL documentation
       <listitem>
        <para>
         Perform regular file copy.  This is the default.  (See also
-        <option>--copy-file-range</option> and <option>--clone</option>.)
+        <option>--copy-file-range</option>, <option>--clone</option>, and
+        <option>-k</option>/<option>--link</option>.)
        </para>
       </listitem>
      </varlistentry>
diff --git a/src/bin/pg_combinebackup/copy_file.c b/src/bin/pg_combinebackup/copy_file.c
index 4e27814839..97ecda5a66 100644
--- a/src/bin/pg_combinebackup/copy_file.c
+++ b/src/bin/pg_combinebackup/copy_file.c
@@ -40,6 +40,9 @@ static void copy_file_copyfile(const char *src, const char *dst,
 							   pg_checksum_context *checksum_ctx);
 #endif
 
+static void copy_file_link(const char *src, const char *dest,
+						   pg_checksum_context *checksum_ctx);
+
 /*
  * Copy a regular file, optionally computing a checksum, and emitting
  * appropriate debug messages. But if we're in dry-run mode, then just emit
@@ -69,7 +72,13 @@ copy_file(const char *src, const char *dst,
 	}
 
 #ifdef WIN32
-	copy_method = COPY_METHOD_COPYFILE;
+	/*
+	 * We have no specific switch to enable CopyFile on Windows, because
+	 * it's supported (as far as we know) on all Windows machines. So,
+	 * automatically enable it unless some other strategy was selected.
+	 */
+	if (copy_method == COPY_METHOD_COPY)
+		copy_method = COPY_METHOD_COPYFILE;
 #endif
 
 	/* Determine the name of the copy strategy for use in log messages. */
@@ -93,6 +102,10 @@ copy_file(const char *src, const char *dst,
 			strategy_implementation = copy_file_copyfile;
 			break;
 #endif
+		case COPY_METHOD_LINK:
+			strategy_name = "link";
+			strategy_implementation = copy_file_link;
+			break;
 	}
 
 	if (dry_run)
@@ -304,3 +317,21 @@ copy_file_copyfile(const char *src, const char *dst,
 	checksum_file(src, checksum_ctx);
 }
 #endif							/* WIN32 */
+
+/*
+ * copy_file_link
+ * 		Hard-links a file from src to dest.
+ *
+ * If needed, also reads the file and calculates the checksum.
+ */
+static void
+copy_file_link(const char *src, const char *dest,
+			   pg_checksum_context *checksum_ctx)
+{
+	if (link(src, dest) < 0)
+		pg_fatal("error while linking file from \"%s\" to \"%s\": %m",
+				 src, dest);
+
+	/* if needed, calculate checksum of the file */
+	checksum_file(src, checksum_ctx);
+}
diff --git a/src/bin/pg_combinebackup/copy_file.h b/src/bin/pg_combinebackup/copy_file.h
index 92f104115b..5a8517629c 100644
--- a/src/bin/pg_combinebackup/copy_file.h
+++ b/src/bin/pg_combinebackup/copy_file.h
@@ -25,6 +25,7 @@ typedef enum CopyMethod
 #ifdef WIN32
 	COPY_METHOD_COPYFILE,
 #endif
+	COPY_METHOD_LINK,
 } CopyMethod;
 
 extern void copy_file(const char *src, const char *dst,
diff --git a/src/bin/pg_combinebackup/meson.build b/src/bin/pg_combinebackup/meson.build
index 0c4fd9e627..e19c309ad2 100644
--- a/src/bin/pg_combinebackup/meson.build
+++ b/src/bin/pg_combinebackup/meson.build
@@ -37,6 +37,7 @@ tests += {
       't/007_wal_level_minimal.pl',
       't/008_promote.pl',
       't/009_no_full_file.pl',
+      't/010_links.pl',
     ],
   }
 }
diff --git a/src/bin/pg_combinebackup/pg_combinebackup.c b/src/bin/pg_combinebackup/pg_combinebackup.c
index 5864ec574f..d480dc7443 100644
--- a/src/bin/pg_combinebackup/pg_combinebackup.c
+++ b/src/bin/pg_combinebackup/pg_combinebackup.c
@@ -135,6 +135,7 @@ main(int argc, char *argv[])
 		{"no-sync", no_argument, NULL, 'N'},
 		{"output", required_argument, NULL, 'o'},
 		{"tablespace-mapping", required_argument, NULL, 'T'},
+		{"link", no_argument, NULL, 'k'},
 		{"manifest-checksums", required_argument, NULL, 1},
 		{"no-manifest", no_argument, NULL, 2},
 		{"sync-method", required_argument, NULL, 3},
@@ -172,7 +173,7 @@ main(int argc, char *argv[])
 	opt.copy_method = COPY_METHOD_COPY;
 
 	/* process command-line options */
-	while ((c = getopt_long(argc, argv, "dnNo:T:",
+	while ((c = getopt_long(argc, argv, "dknNo:T:",
 							long_options, &optindex)) != -1)
 	{
 		switch (c)
@@ -181,6 +182,9 @@ main(int argc, char *argv[])
 				opt.debug = true;
 				pg_logging_increase_verbosity();
 				break;
+			case 'k':
+				opt.copy_method = COPY_METHOD_LINK;
+				break;
 			case 'n':
 				opt.dry_run = true;
 				break;
@@ -424,6 +428,11 @@ main(int argc, char *argv[])
 		}
 	}
 
+	/* Warn about the possibility of compromising the backups, when link mode */
+	if (opt.copy_method == COPY_METHOD_LINK)
+		pg_log_warning("--link mode was used; any modifications to the output "
+					   "directory may destructively modify input directories");
+
 	/* It's a success, so don't remove the output directories. */
 	reset_directory_cleanup_list();
 	exit(0);
@@ -761,6 +770,7 @@ help(const char *progname)
 	printf(_("  %s [OPTION]... DIRECTORY...\n"), progname);
 	printf(_("\nOptions:\n"));
 	printf(_("  -d, --debug               generate lots of debugging output\n"));
+	printf(_("  -k, --link                link files instead of copying\n"));
 	printf(_("  -n, --dry-run             do not actually do anything\n"));
 	printf(_("  -N, --no-sync             do not wait for changes to be written safely to disk\n"));
 	printf(_("  -o, --output=DIRECTORY    output directory\n"));
diff --git a/src/bin/pg_combinebackup/t/010_links.pl b/src/bin/pg_combinebackup/t/010_links.pl
new file mode 100644
index 0000000000..6da02ac243
--- /dev/null
+++ b/src/bin/pg_combinebackup/t/010_links.pl
@@ -0,0 +1,228 @@
+# Copyright (c) 2025, PostgreSQL Global Development Group
+#
+# This test aims to validate that hard links are created as expected in the
+# output directory, when running pg_combinebackup with --link mode.
+
+use strict;
+use warnings FATAL => 'all';
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+use File::Basename;
+
+# Set up a new database instance.
+my $primary = PostgreSQL::Test::Cluster->new('primary');
+$primary->init(has_archiving => 1, allows_streaming => 1);
+$primary->append_conf('postgresql.conf', 'summarize_wal = on');
+$primary->append_conf('postgresql.conf', 'autovacuum = off');
+$primary->start;
+
+# Create some tables (~264KB each).
+my $query = <<'EOM';
+CREATE TABLE test_%s AS
+    SELECT x.id::bigint,
+           repeat('a', 1600) AS value
+    FROM generate_series(1, 100) AS x(id);
+EOM
+
+$primary->safe_psql('postgres', sprintf($query, '1'));
+$primary->safe_psql('postgres', sprintf($query, '2'));
+
+# Fetch information about the data files.
+$query = <<'EOM';
+SELECT pg_relation_filepath(oid)
+FROM pg_class
+WHERE relname = 'test_%s';
+EOM
+
+my $test_1_path = $primary->safe_psql('postgres', sprintf($query, '1'));
+note "test_1 path is $test_1_path";
+
+my $test_2_path = $primary->safe_psql('postgres', sprintf($query, '2'));
+note "test_2 path is $test_2_path";
+
+# Take a full backup.
+my $backup1path = $primary->backup_dir . '/backup1';
+$primary->command_ok(
+	[
+		'pg_basebackup',
+		'--pgdata' => $backup1path,
+		'--no-sync',
+		'--checkpoint' => 'fast',
+        '--wal-method' => 'none'
+	],
+	"full backup");
+
+# Perform an insert that touches pages of the last segment of the data file of
+# table test_2.
+$primary->safe_psql('postgres', <<EOM);
+INSERT INTO test_2 (id, value)
+    SELECT x.id::bigint,
+           repeat('a', 1600) AS value
+    FROM generate_series(101, 110) AS x(id);
+EOM
+
+# Take an incremental backup.
+my $backup2path = $primary->backup_dir . '/backup2';
+$primary->command_ok(
+	[
+		'pg_basebackup',
+		'--pgdata' => $backup2path,
+		'--no-sync',
+		'--checkpoint' => 'fast',
+        '--wal-method' => 'none',
+		'--incremental' => $backup1path . '/backup_manifest'
+	],
+	"incremental backup");
+
+# Restore the incremental backup and use it to create a new node.
+my $restore = PostgreSQL::Test::Cluster->new('restore');
+$restore->init_from_backup(
+	$primary, 'backup2',
+	combine_with_prior => ['backup1'],
+	combine_mode => '--link');
+
+# Work around differences between Windows and Linux test runners. The perl
+# functions to inspect files return paths with forward slashes, while the
+# restore->data_dir variable contains back slashes on Windows. This step is just
+# to normalize the paths, so we are able to match strings later.
+my $restore_dir_normalized = $restore->data_dir;
+$restore_dir_normalized =~ s/\\/\//g;
+
+# Ensure files have the expected count of hard links. We expect all data files
+# from test_1 to contain 2 hard links, because they were not touched between the
+# full and incremental backups, and the last data file of table test_2 to
+# contain a single hard link because of changes in its last pages.
+my $test_1_full_path = join('/', $restore_dir_normalized, $test_1_path);
+check_data_file($test_1_full_path, 2);
+
+my $test_2_full_path = join('/', $restore_dir_normalized, $test_2_path);
+check_data_file($test_2_full_path, 1);
+
+# OK, that's all.
+done_testing();
+
+
+# Given the path to the first segment of a data file, inspect its parent
+# directory to find all the segments of that data file, and make sure all the
+# segments contain 2 hard links. The last one must have the given number of hard
+# links.
+#
+# Parameters:
+# * data_file: path to the first segment of a data file, as per the output of
+#              pg_relation_filepath.
+# * last_segment_nlinks: the number of hard links expected in the last segment
+#                        of the given data file.
+sub check_data_file
+{
+    my ($data_file, $last_segment_nlinks) = @_;
+
+    # By default Postgres uses 1GB segments for the data files, and our test
+    # tables are 264KB worth of data each. However, that segment size is
+    # configurable, so we have to handle all possibilities here. Cirrus CI e.g.
+    # is configured with 6 blocks per segment, and we need to cover that test
+    # case too. We want to scan all files under the directory which contains the
+    # data file, and get all the segments of that data file.
+    my @data_file_segments = ();
+    my $dir = dirname($data_file);
+
+    opendir(my $dh, $dir) or die "Cannot open directory $dir: $!";
+
+    while (my $file = readdir($dh))
+    {
+        my $full_path = "$dir/$file";
+
+        # Skip the '.' and '..' entries, and directories
+        next if $file eq '.' or $file eq '..' or !-f $full_path;
+
+        # The first segment of the data file contains no dots. From the second
+        # onwards, it follows the same name pattern of the first segment,
+        # followed by a dot and the sequence number. Here we want to normalize
+        # all the segments to the same name.
+        my $basename = (split /\./, $full_path)[0];
+
+        # If it is a segment of the given data file, add it to the list of
+        # segments.
+        if ($basename eq $data_file)
+        {
+            push @data_file_segments, $full_path;
+        }
+    }
+
+    closedir($dh);
+
+    # All segments of the given data file should contain 2 hard links, except
+    # for the last one, which should match the given number of links.
+    @data_file_segments = sort { natural_sort ($a, $b) } @data_file_segments;
+    my $last_segment = pop @data_file_segments;
+
+    for my $segment (@data_file_segments)
+    {
+        # Get the file's stat information of each segment
+        my $nlink_count = get_hard_link_count($segment);
+        ok($nlink_count == 2, "File '$segment' has 2 hard links");
+    }
+
+    # Get the file's stat information of the last segment
+    my $nlink_count = get_hard_link_count($last_segment);
+    ok($nlink_count == $last_segment_nlinks,
+       "File '$last_segment' has $last_segment_nlinks hard link(s)");
+}
+
+# Natural comparison subroutine for strings with numbers.
+# This is just a helper function for sorting strings with numbers. We want
+# "base/123.13" to come before "base/123.123", for example.
+sub natural_sort
+{
+    my ($a, $b) = @_;
+
+    # Split into non-numeric and numeric parts
+    my @parts_a = $a =~ /(\D+|\d+)/g;
+    my @parts_b = $b =~ /(\D+|\d+)/g;
+
+    for (my $i = 0; $i < scalar(@parts_a) && $i < scalar(@parts_b); $i++)
+    {
+        if ($parts_a[$i] =~ /^\d/ && $parts_b[$i] =~ /^\d/)
+        {
+            # Compare numerically if both parts are numbers
+            if ($parts_a[$i] < $parts_b[$i])
+            {
+                return -1;
+            }
+            elsif ($parts_a[$i] > $parts_b[$i])
+            {
+                return 1;
+            }
+        }
+        else
+        {
+            # Compare lexicographically if not numbers
+            if ($parts_a[$i] lt $parts_b[$i])
+            {
+                return -1;
+            }
+            elsif ($parts_a[$i] gt $parts_b[$i])
+            {
+                return 1;
+            }
+        }
+    }
+
+    # If all compared parts are the same, the shorter string comes first
+    return scalar(@parts_a) <=> scalar(@parts_b);
+}
+
+
+# Subroutine to get hard link count of a given file.
+# Receives the path to a file, and returns the number of hard links of
+# that file.
+sub get_hard_link_count
+{
+    my ($file) = @_;
+
+    # Get file stats
+    my @stats = stat($file);
+    my $nlink = $stats[3];  # Number of hard links
+
+    return $nlink;
+}
-- 
2.43.5