Recently a hacker proposed a patch to add pg_dynshmem to the list of
directories whose contents are excluded in pg_basebackup.  I wasn't able
to find the original email despite several attempts.

That patch got me thinking about what else could be excluded and after
some investigation I found the following: pg_notify, pg_serial,
pg_snapshots, pg_subtrans.  These directories are all cleaned, zeroed,
or rebuilt on server start.

The attached patch adds these directories to the pg_basebackup
exclusions and takes an array-based approach to excluding directories
and files during backup.

I also incorporated Ashutosh's patch to fix corruption when
pg_stat_tmp/pg_replslot are links [1].  This logic has been extended to
all excluded directories.

Perhaps these patches should be merged in the CF but first I'd like to
see if anyone can identify problems with the additional exclusions.

Thanks,
-- 
-David
da...@pgmasters.net

[1]
http://www.postgresql.org/message-id/flat/CAE9k0Pm7=x_o0w7e2b2s2cwczdcbgczgdrxttzxozgp8beb...@mail.gmail.com/
diff --git a/src/backend/replication/basebackup.c 
b/src/backend/replication/basebackup.c
index da9b7a6..209b2cb 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -30,6 +30,7 @@
 #include "replication/basebackup.h"
 #include "replication/walsender.h"
 #include "replication/walsender_private.h"
+#include "storage/dsm_impl.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
 #include "utils/builtins.h"
@@ -69,9 +70,6 @@ static void throttle(size_t increment);
 /* Was the backup currently in-progress initiated in recovery mode? */
 static bool backup_started_in_recovery = false;
 
-/* Relative path of temporary statistics directory */
-static char *statrelpath = NULL;
-
 /*
  * Size of each block sent into the tar stream for larger files.
  */
@@ -95,6 +93,68 @@ static int64 elapsed_min_unit;
 static int64 throttled_last;
 
 /*
+ * The contents of these directories are removed or recreated during server
+ * start so they will not be included in the backup.  The directory entry
+ * will be included to preserve permissions.
+ */
+#define EXCLUDE_DIR_MAX                8
+#define EXCLUDE_DIR_STAT_TMP   0
+
+const char *excludeDirContents[EXCLUDE_DIR_MAX] =
+{
+       /*
+        * Skip temporary statistics files. The first array position will be
+        * filled with the value of pgstat_stat_directory relative to PGDATA.
+        * PG_STAT_TMP_DIR must be skipped even when stats_temp_directory is set
+        * because PGSS_TEXT_FILE is always created there.
+        */
+       NULL,
+       PG_STAT_TMP_DIR,
+
+       /* Recreated on startup, see StartupReplicationSlots(). */
+       "pg_replslot",
+
+       /* Removed on startup, see dsm_cleanup_for_mmap(). */
+       PG_DYNSHMEM_DIR,
+
+       /* Recreated/zeroed on startup, see AsyncShmemInit(). */
+       "pg_notify",
+
+       /* Recreated on startup, see OldSerXidInit(). */
+       "pg_serial",
+
+       /* Removed on startup, see DeleteAllExportedSnapshotFiles(). */
+       "pg_snapshots",
+
+       /* Recreated/zeroed on startup, see StartupSUBTRANS(. */
+       "pg_subtrans"
+};
+
+/*
+ * Files that should not be included in the backup.
+ */
+#define EXCLUDE_FILE_MAX       5
+
+const char *excludeFile[EXCLUDE_FILE_MAX] =
+{
+       /* Skip auto conf temporary file. */
+       PG_AUTOCONF_FILENAME ".tmp",
+
+       /*
+        * If there's a backup_label or tablespace_map file, it belongs to a 
backup
+        * started by the user with pg_start_backup(). It is *not* correct for 
this
+        * backup, our backup_label/tablespace_map is injected into the tar
+        * separately.
+        */
+       BACKUP_LABEL_FILE,
+       TABLESPACE_MAP,
+
+       /* Skip postmaster.pid and postmaster.opts. */
+       "postmaster.pid",
+       "postmaster.opts"
+};
+
+/*
  * Called when ERROR or FATAL happens in perform_base_backup() after
  * we have started the backup - make sure we end it!
  */
@@ -154,11 +214,13 @@ perform_base_backup(basebackup_options *opt, DIR 
*tblspcdir)
                 */
                if (is_absolute_path(pgstat_stat_directory) &&
                        strncmp(pgstat_stat_directory, DataDir, datadirpathlen) 
== 0)
-                       statrelpath = psprintf("./%s", pgstat_stat_directory + 
datadirpathlen + 1);
+                       excludeDirContents[EXCLUDE_DIR_STAT_TMP] =
+                               pgstat_stat_directory + datadirpathlen + 1;
                else if (strncmp(pgstat_stat_directory, "./", 2) != 0)
-                       statrelpath = psprintf("./%s", pgstat_stat_directory);
+                       excludeDirContents[EXCLUDE_DIR_STAT_TMP] = 
pgstat_stat_directory;
                else
-                       statrelpath = pgstat_stat_directory;
+                       excludeDirContents[EXCLUDE_DIR_STAT_TMP] =
+                               pgstat_stat_directory + 2;
 
                /* Add a node for the base directory at the end */
                ti = palloc0(sizeof(tablespaceinfo));
@@ -889,6 +951,8 @@ sendDir(char *path, int basepathlen, bool sizeonly, List 
*tablespaces,
        char            pathbuf[MAXPGPATH];
        struct stat statbuf;
        int64           size = 0;
+       int             excludeIdx;
+       bool            excludeFound;
 
        dir = AllocateDir(path);
        while ((de = ReadDir(dir, path)) != NULL)
@@ -903,22 +967,80 @@ sendDir(char *path, int basepathlen, bool sizeonly, List 
*tablespaces,
                                        strlen(PG_TEMP_FILE_PREFIX)) == 0)
                        continue;
 
-               /* skip auto conf temporary file */
-               if (strncmp(de->d_name,
-                                       PG_AUTOCONF_FILENAME ".tmp",
-                                       sizeof(PG_AUTOCONF_FILENAME) + 4) == 0)
+               /* Stat the file */
+               snprintf(pathbuf, MAXPGPATH, "%s/%s", path, de->d_name);
+
+               if (lstat(pathbuf, &statbuf) != 0)
+               {
+                       if (errno != ENOENT)
+                               ereport(ERROR,
+                                               (errcode_for_file_access(),
+                                                errmsg("could not stat file or 
directory \"%s\": %m",
+                                                               pathbuf)));
+
+                       /* If the file went away while scanning, it's not an 
error. */
                        continue;
+               }
 
                /*
-                * If there's a backup_label or tablespace_map file, it belongs 
to a
-                * backup started by the user with pg_start_backup(). It is 
*not*
-                * correct for this backup, our backup_label/tablespace_map is
-                * injected into the tar separately.
+                * Scan for files that should be excluded. See excludeFile[] 
for info
+                * on exclusions.
                 */
-               if (strcmp(de->d_name, BACKUP_LABEL_FILE) == 0)
-                       continue;
+               excludeFound = false;
+
+               for (excludeIdx = 0; excludeIdx < EXCLUDE_FILE_MAX; 
excludeIdx++)
+               {
+                       if (excludeFile[excludeIdx] != NULL &&
+                               strcmp(pathbuf + 2, excludeFile[excludeIdx]) == 
0)
+                       {
+                               ereport(DEBUG1,
+                                               (errmsg("file excluded from 
backup: %s", excludeFile[excludeIdx])));
+
+                               excludeFound = true;
+                               break;
+                       }
+               }
+
+               /*
+                * Scan for directories whose contents should be excluded.  See
+                * excludeDirContents[] for info on exclusions.
+                */
+               if (!excludeFound)
+               {
+                       for (excludeIdx = 0; excludeIdx < EXCLUDE_DIR_MAX; 
excludeIdx++)
+                       {
+                               if (excludeDirContents[excludeIdx] != NULL &&
+                                       strcmp(pathbuf + 2, 
excludeDirContents[excludeIdx]) == 0)
+                               {
+                                       ereport(DEBUG1,
+                                                       (errmsg("directory 
contents excluded from backup: %s",
+                                                                       
excludeDirContents[excludeIdx])));
+
+                                       /* Include the directory entry to 
preserve permissions. */
+                                       if (!sizeonly)
+                                       {
+                                               /* If symlink, write it as a 
directory anyway */
+#ifndef WIN32
+                                               if (S_ISLNK(statbuf.st_mode))
+#else
+                                               if 
(pgwin32_is_junction(pathbuf))
+#endif
+                                                       statbuf.st_mode = 
S_IFDIR | S_IRWXU;
 
-               if (strcmp(de->d_name, TABLESPACE_MAP) == 0)
+                                               _tarWriteHeader(pathbuf + 
basepathlen + 1,
+                                                                           
NULL, &statbuf);
+                                       }
+
+                                       size += 512;
+
+                                       excludeFound = true;
+                                       break;
+                               }
+                       }
+               }
+
+               /* If file matched exclusion, continue. */
+               if (excludeFound)
                        continue;
 
                /*
@@ -938,55 +1060,10 @@ sendDir(char *path, int basepathlen, bool sizeonly, List 
*tablespaces,
                                                 "and should not be used. "
                                                 "Try taking another online 
backup.")));
 
-               snprintf(pathbuf, MAXPGPATH, "%s/%s", path, de->d_name);
-
-               /* Skip postmaster.pid and postmaster.opts in the data 
directory */
-               if (strcmp(pathbuf, "./postmaster.pid") == 0 ||
-                       strcmp(pathbuf, "./postmaster.opts") == 0)
-                       continue;
-
                /* Skip pg_control here to back up it last */
                if (strcmp(pathbuf, "./global/pg_control") == 0)
                        continue;
 
-               if (lstat(pathbuf, &statbuf) != 0)
-               {
-                       if (errno != ENOENT)
-                               ereport(ERROR,
-                                               (errcode_for_file_access(),
-                                                errmsg("could not stat file or 
directory \"%s\": %m",
-                                                               pathbuf)));
-
-                       /* If the file went away while scanning, it's no error. 
*/
-                       continue;
-               }
-
-               /*
-                * Skip temporary statistics files. PG_STAT_TMP_DIR must be 
skipped
-                * even when stats_temp_directory is set because PGSS_TEXT_FILE 
is
-                * always created there.
-                */
-               if ((statrelpath != NULL && strcmp(pathbuf, statrelpath) == 0) 
||
-                 strncmp(de->d_name, PG_STAT_TMP_DIR, strlen(PG_STAT_TMP_DIR)) 
== 0)
-               {
-                       if (!sizeonly)
-                               _tarWriteHeader(pathbuf + basepathlen + 1, 
NULL, &statbuf);
-                       size += 512;
-                       continue;
-               }
-
-               /*
-                * Skip pg_replslot, not useful to copy. But include it as an 
empty
-                * directory anyway, so we get permissions right.
-                */
-               if (strcmp(de->d_name, "pg_replslot") == 0)
-               {
-                       if (!sizeonly)
-                               _tarWriteHeader(pathbuf + basepathlen + 1, 
NULL, &statbuf);
-                       size += 512;            /* Size of the header just 
added */
-                       continue;
-               }
-
                /*
                 * We can skip pg_xlog, the WAL segments need to be fetched 
from the
                 * WAL archive anyway. But include it as an empty directory 
anyway, so
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to