On 26/09/2018 08:44, Michael Paquier wrote:
> On Sat, Sep 01, 2018 at 07:28:37AM +0200, Peter Eisentraut wrote:
>> rebased patch, no functionality changes
> 
> Could you rebase once again?  I am going through the patch and wanted to
> test pg_upgrade on Linux with XFS, but it does not apply anymore.

attached

-- 
Peter Eisentraut              http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
>From 9a58fc2589e50d69b4b158ea5e8f3898483290d0 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pete...@gmx.net>
Date: Thu, 27 Sep 2018 22:42:33 +0200
Subject: [PATCH v5] pg_upgrade: Allow use of file cloning

For file copying in pg_upgrade, allow using special file cloning calls
if available.  This makes the copying faster and more space efficient.
This achieves speed similar to --link mode without the associated
drawbacks.

Add an option --reflink to select whether file cloning is turned on,
off, or automatic.  Automatic is the default.

On Linux, file cloning is supported on Btrfs and XFS (if formatted with
reflink support).  On macOS, file cloning is supported on APFS.
---
 configure                        |   2 +-
 configure.in                     |   2 +-
 doc/src/sgml/ref/pgupgrade.sgml  |  33 +++++++++
 src/bin/pg_upgrade/check.c       |   2 +
 src/bin/pg_upgrade/file.c        | 123 +++++++++++++++++++++++++++++++
 src/bin/pg_upgrade/option.c      |  14 ++++
 src/bin/pg_upgrade/pg_upgrade.h  |  15 ++++
 src/bin/pg_upgrade/relfilenode.c |  31 +++++++-
 src/include/pg_config.h.in       |   3 +
 9 files changed, 220 insertions(+), 5 deletions(-)

diff --git a/configure b/configure
index 6414ec1ea6..ae6f1a2e17 100755
--- a/configure
+++ b/configure
@@ -15100,7 +15100,7 @@ fi
 LIBS_including_readline="$LIBS"
 LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
 
-for ac_func in cbrt clock_gettime fdatasync getifaddrs getpeerucred getrlimit 
mbstowcs_l memmove poll posix_fallocate ppoll pstat pthread_is_threaded_np 
readlink setproctitle setproctitle_fast setsid shm_open symlink sync_file_range 
utime utimes wcstombs_l
+for ac_func in cbrt clock_gettime copyfile fdatasync getifaddrs getpeerucred 
getrlimit mbstowcs_l memmove poll posix_fallocate ppoll pstat 
pthread_is_threaded_np readlink setproctitle setproctitle_fast setsid shm_open 
symlink sync_file_range utime utimes wcstombs_l
 do :
   as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
 ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
diff --git a/configure.in b/configure.in
index 158d5a1ac8..265faf1b99 100644
--- a/configure.in
+++ b/configure.in
@@ -1571,7 +1571,7 @@ PGAC_FUNC_WCSTOMBS_L
 LIBS_including_readline="$LIBS"
 LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
 
-AC_CHECK_FUNCS([cbrt clock_gettime fdatasync getifaddrs getpeerucred getrlimit 
mbstowcs_l memmove poll posix_fallocate ppoll pstat pthread_is_threaded_np 
readlink setproctitle setproctitle_fast setsid shm_open symlink sync_file_range 
utime utimes wcstombs_l])
+AC_CHECK_FUNCS([cbrt clock_gettime copyfile fdatasync getifaddrs getpeerucred 
getrlimit mbstowcs_l memmove poll posix_fallocate ppoll pstat 
pthread_is_threaded_np readlink setproctitle setproctitle_fast setsid shm_open 
symlink sync_file_range utime utimes wcstombs_l])
 
 AC_REPLACE_FUNCS(fseeko)
 case $host_os in
diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml
index d51146d641..d994218c44 100644
--- a/doc/src/sgml/ref/pgupgrade.sgml
+++ b/doc/src/sgml/ref/pgupgrade.sgml
@@ -182,6 +182,39 @@ <title>Options</title>
       <listitem><para>display version information, then exit</para></listitem>
      </varlistentry>
 
+     <varlistentry>
+      
<term><literal><option>--reflink</option>={always|auto|never}</literal></term>
+      <listitem>
+       <para>
+        Determines whether <application>pg_upgrade</application>, when in copy
+        mode, should use efficient file cloning (also known as
+        <quote>reflinks</quote>) on some operating systems and file systems.
+        This can result in near-instantaneous copying of the data files,
+        giving the speed advantages of
+        <option>-k</option>/<option>--link</option> while leaving the old
+        cluster untouched.
+       </para>
+
+       <para>
+        The setting <literal>always</literal> requires the use of reflinks.  If
+        they are not supported, the <application>pg_upgrade</application> run
+        will abort.  Use this in production to limit the upgrade run time.
+        The setting <literal>auto</literal> uses reflinks when available,
+        otherwise it falls back to a normal copy.  This is the default.  The
+        setting <literal>never</literal> prevents use of reflinks and always
+        uses a normal copy.  This can be useful to ensure that the upgraded
+        cluster has its disk space fully allocated and not shared with the old
+        cluster.
+       </para>
+
+       <para>
+        At present, reflinks are supported on Linux (kernel 4.5 or later) with
+        Btrfs and XFS (on file systems created with reflink support, which is
+        not the default for XFS at this writing), and on macOS with APFS.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-?</option></term>
       <term><option>--help</option></term>
diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c
index 5a78d603dc..eb1f18180a 100644
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -151,6 +151,8 @@ check_new_cluster(void)
 
        if (user_opts.transfer_mode == TRANSFER_MODE_LINK)
                check_hard_link();
+       else if (user_opts.transfer_mode == TRANSFER_MODE_COPY && 
user_opts.reflink_mode != REFLINK_NEVER)
+               check_reflink();
 
        check_is_install_user(&new_cluster);
 
diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c
index c27cc93dc2..2e864cd6bb 100644
--- a/src/bin/pg_upgrade/file.c
+++ b/src/bin/pg_upgrade/file.c
@@ -18,6 +18,13 @@
 
 #include <sys/stat.h>
 #include <fcntl.h>
+#ifdef HAVE_COPYFILE
+#include <copyfile.h>
+#endif
+#ifdef __linux__
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#endif
 
 
 #ifdef WIN32
@@ -93,6 +100,68 @@ copyFile(const char *src, const char *dst,
 #endif                                                 /* WIN32 */
 }
 
+/*
+ * cloneFile()
+ *
+ * Clones/reflinks a relation file from src to dst.
+ *
+ * schemaName/relName are relation's SQL name (used for error messages only).
+ *
+ * If unsupported_ok is true, then if the cloning fails because the OS or file
+ * system don't support it, don't error, instead return false.  Otherwise,
+ * true is returned.  Based on this, the caller can then try to call
+ * copyFile() instead, for example.
+ */
+bool
+cloneFile(const char *src, const char *dst,
+                 const char *schemaName, const char *relName,
+                 bool unsupported_ok)
+{
+#if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
+       if (copyfile(src, dst, NULL, COPYFILE_CLONE_FORCE) < 0)
+       {
+               if (unsupported_ok && errno == ENOTSUP)
+                       return false;
+               else
+                       pg_fatal("error while cloning relation \"%s.%s\" 
(\"%s\" to \"%s\"): %s\n",
+                                        schemaName, relName, src, dst, 
strerror(errno));
+       }
+       return true;
+#elif defined(__linux__) && defined(FICLONE)
+       int                     src_fd;
+       int                     dest_fd;
+
+       if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
+               pg_fatal("error while cloning relation \"%s.%s\": could not 
open file \"%s\": %s\n",
+                                schemaName, relName, src, strerror(errno));
+
+       if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
+                                               pg_file_create_mode)) < 0)
+               pg_fatal("error while cloning relation \"%s.%s\": could not 
create file \"%s\": %s\n",
+                                schemaName, relName, dst, strerror(errno));
+
+       if (ioctl(dest_fd, FICLONE, src_fd) < 0)
+       {
+               unlink(dst);
+               if (unsupported_ok && errno == EOPNOTSUPP)
+               {
+                       close(src_fd);
+                       close(dest_fd);
+                       return false;
+               }
+               else
+                       pg_fatal("error while cloning relation \"%s.%s\" 
(\"%s\" to \"%s\"): %s\n",
+                                        schemaName, relName, src, dst, 
strerror(errno));
+       }
+
+       close(src_fd);
+       close(dest_fd);
+       return true;
+#else
+       return false;
+#endif
+}
+
 
 /*
  * linkFile()
@@ -270,6 +339,60 @@ rewriteVisibilityMap(const char *fromfile, const char 
*tofile,
        close(src_fd);
 }
 
+void
+check_reflink(void)
+{
+       char            existing_file[MAXPGPATH];
+       char            new_link_file[MAXPGPATH];
+
+       snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", 
old_cluster.pgdata);
+       snprintf(new_link_file, sizeof(new_link_file), 
"%s/PG_VERSION.reflinktest", new_cluster.pgdata);
+       unlink(new_link_file);          /* might fail */
+
+#if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
+       if (copyfile(existing_file, new_link_file, NULL, COPYFILE_CLONE_FORCE) 
< 0)
+       {
+               if (user_opts.reflink_mode == REFLINK_ALWAYS)
+                       pg_fatal("could not clone file between old and new data 
directories: %s\n",
+                                        strerror(errno));
+               else if (user_opts.check)
+                       pg_log(PG_REPORT, "could not clone file between old and 
new data directories: %s\n",
+                                  strerror(errno));
+       }
+#elif defined(__linux__) && defined(FICLONE)
+       {
+               int                     src_fd;
+               int                     dest_fd;
+
+               if ((src_fd = open(existing_file, O_RDONLY | PG_BINARY, 0)) < 0)
+                       pg_fatal("could not open file \"%s\": %s\n",
+                                        existing_file, strerror(errno));
+
+               if ((dest_fd = open(new_link_file, O_RDWR | O_CREAT | O_EXCL | 
PG_BINARY,
+                                                       pg_file_create_mode)) < 
0)
+                       pg_fatal("could not create file \"%s\": %s\n",
+                                        new_link_file, strerror(errno));
+
+               if (ioctl(dest_fd, FICLONE, src_fd) < 0)
+               {
+                       if (user_opts.reflink_mode == REFLINK_ALWAYS)
+                               pg_fatal("could not clone file between old and 
new data directories: %s\n",
+                                                strerror(errno));
+                       else if (user_opts.check)
+                               pg_log(PG_REPORT, "could not clone file between 
old and new data directories: %s\n",
+                                          strerror(errno));
+               }
+
+               close(src_fd);
+               close(dest_fd);
+       }
+#else
+       pg_fatal("file cloning not supported on this platform\n");
+#endif
+
+       unlink(new_link_file);
+}
+
 void
 check_hard_link(void)
 {
diff --git a/src/bin/pg_upgrade/option.c b/src/bin/pg_upgrade/option.c
index 9dbc9225a6..d52a1bcee3 100644
--- a/src/bin/pg_upgrade/option.c
+++ b/src/bin/pg_upgrade/option.c
@@ -53,6 +53,9 @@ parseCommandLine(int argc, char *argv[])
                {"retain", no_argument, NULL, 'r'},
                {"jobs", required_argument, NULL, 'j'},
                {"verbose", no_argument, NULL, 'v'},
+
+               {"reflink", required_argument, NULL, 1},
+
                {NULL, 0, NULL, 0}
        };
        int                     option;                 /* Command line option 
*/
@@ -203,6 +206,17 @@ parseCommandLine(int argc, char *argv[])
                                log_opts.verbose = true;
                                break;
 
+                       case 1:
+                               if (strcmp(optarg, "always") == 0)
+                                       user_opts.reflink_mode = REFLINK_ALWAYS;
+                               else if (strcmp(optarg, "auto") == 0)
+                                       user_opts.reflink_mode = REFLINK_AUTO;
+                               else if (strcmp(optarg, "never") == 0)
+                                       user_opts.reflink_mode = REFLINK_NEVER;
+                               else
+                                       pg_fatal("invalid reflink mode: %s\n", 
optarg);
+                               break;
+
                        default:
                                pg_fatal("Try \"%s --help\" for more 
information.\n",
                                                 os_info.progname);
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index f83a3eeb67..16eb34e14c 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -238,6 +238,16 @@ typedef enum
        TRANSFER_MODE_LINK
 } transferMode;
 
+/*
+ * Enumeration to denote reflink modes
+ */
+typedef enum
+{
+       REFLINK_NEVER,
+       REFLINK_AUTO,
+       REFLINK_ALWAYS
+} reflinkMode;
+
 /*
  * Enumeration to denote pg_log modes
  */
@@ -297,6 +307,7 @@ typedef struct
        bool            check;                  /* true -> ask user for 
permission to make
                                                                 * changes */
        transferMode transfer_mode; /* copy files or link them? */
+       reflinkMode     reflink_mode;
        int                     jobs;
 } UserOpts;
 
@@ -374,10 +385,14 @@ bool              pid_lock_file_exists(const char 
*datadir);
 
 void copyFile(const char *src, const char *dst,
                 const char *schemaName, const char *relName);
+bool cloneFile(const char *src, const char *dst,
+                const char *schemaName, const char *relName,
+                bool unsupported_ok);
 void linkFile(const char *src, const char *dst,
                 const char *schemaName, const char *relName);
 void rewriteVisibilityMap(const char *fromfile, const char *tofile,
                                         const char *schemaName, const char 
*relName);
+void           check_reflink(void);
 void           check_hard_link(void);
 
 /* fopen_priv() is no longer different from fopen() */
diff --git a/src/bin/pg_upgrade/relfilenode.c b/src/bin/pg_upgrade/relfilenode.c
index ed604f26ca..fc00cfdfae 100644
--- a/src/bin/pg_upgrade/relfilenode.c
+++ b/src/bin/pg_upgrade/relfilenode.c
@@ -252,9 +252,34 @@ transfer_relfile(FileNameMap *map, const char 
*type_suffix, bool vm_must_add_fro
                }
                else if (user_opts.transfer_mode == TRANSFER_MODE_COPY)
                {
-                       pg_log(PG_VERBOSE, "copying \"%s\" to \"%s\"\n",
-                                  old_file, new_file);
-                       copyFile(old_file, new_file, map->nspname, 
map->relname);
+                       if (user_opts.reflink_mode == REFLINK_ALWAYS)
+                       {
+                               pg_log(PG_VERBOSE, "cloning \"%s\" to \"%s\"\n",
+                                          old_file, new_file);
+                               cloneFile(old_file, new_file, map->nspname, 
map->relname, false);
+                       }
+                       else if (user_opts.reflink_mode == REFLINK_AUTO)
+                       {
+                               static bool             cloning_ok = true;
+
+                               pg_log(PG_VERBOSE, "copying \"%s\" to \"%s\"\n",
+                                          old_file, new_file);
+                               if (cloning_ok &&
+                                       !cloneFile(old_file, new_file, 
map->nspname, map->relname, true))
+                               {
+                                       pg_log(PG_VERBOSE, "cloning not 
supported, switching to copying\n");
+                                       cloning_ok = false;
+                                       copyFile(old_file, new_file, 
map->nspname, map->relname);
+                               }
+                               else
+                                       copyFile(old_file, new_file, 
map->nspname, map->relname);
+                       }
+                       else
+                       {
+                               pg_log(PG_VERBOSE, "copying \"%s\" to \"%s\"\n",
+                                          old_file, new_file);
+                               copyFile(old_file, new_file, map->nspname, 
map->relname);
+                       }
                }
                else
                {
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 90dda8ea05..2c57e31dcd 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -114,6 +114,9 @@
 /* Define to 1 if your compiler handles computed gotos. */
 #undef HAVE_COMPUTED_GOTO
 
+/* Define to 1 if you have the `copyfile' function. */
+#undef HAVE_COPYFILE
+
 /* Define to 1 if you have the <crtdefs.h> header file. */
 #undef HAVE_CRTDEFS_H
 

base-commit: 27e082b0c6e564facfbf54b56090fdcc4bf44cca
-- 
2.19.0

Reply via email to