Recently a hacker proposed a patch to add pg_dynshmem to the list of directories whose contents are excluded in pg_basebackup. I wasn't able to find the original email despite several attempts.
That patch got me thinking about what else could be excluded and after some investigation I found the following: pg_notify, pg_serial, pg_snapshots, pg_subtrans. These directories are all cleaned, zeroed, or rebuilt on server start. The attached patch adds these directories to the pg_basebackup exclusions and takes an array-based approach to excluding directories and files during backup. I also incorporated Ashutosh's patch to fix corruption when pg_stat_tmp/pg_replslot are links [1]. This logic has been extended to all excluded directories. Perhaps these patches should be merged in the CF but first I'd like to see if anyone can identify problems with the additional exclusions. Thanks, -- -David da...@pgmasters.net [1] http://www.postgresql.org/message-id/flat/CAE9k0Pm7=x_o0w7e2b2s2cwczdcbgczgdrxttzxozgp8beb...@mail.gmail.com/
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index da9b7a6..209b2cb 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -30,6 +30,7 @@ #include "replication/basebackup.h" #include "replication/walsender.h" #include "replication/walsender_private.h" +#include "storage/dsm_impl.h" #include "storage/fd.h" #include "storage/ipc.h" #include "utils/builtins.h" @@ -69,9 +70,6 @@ static void throttle(size_t increment); /* Was the backup currently in-progress initiated in recovery mode? */ static bool backup_started_in_recovery = false; -/* Relative path of temporary statistics directory */ -static char *statrelpath = NULL; - /* * Size of each block sent into the tar stream for larger files. */ @@ -95,6 +93,68 @@ static int64 elapsed_min_unit; static int64 throttled_last; /* + * The contents of these directories are removed or recreated during server + * start so they will not be included in the backup. The directory entry + * will be included to preserve permissions. + */ +#define EXCLUDE_DIR_MAX 8 +#define EXCLUDE_DIR_STAT_TMP 0 + +const char *excludeDirContents[EXCLUDE_DIR_MAX] = +{ + /* + * Skip temporary statistics files. The first array position will be + * filled with the value of pgstat_stat_directory relative to PGDATA. + * PG_STAT_TMP_DIR must be skipped even when stats_temp_directory is set + * because PGSS_TEXT_FILE is always created there. + */ + NULL, + PG_STAT_TMP_DIR, + + /* Recreated on startup, see StartupReplicationSlots(). */ + "pg_replslot", + + /* Removed on startup, see dsm_cleanup_for_mmap(). */ + PG_DYNSHMEM_DIR, + + /* Recreated/zeroed on startup, see AsyncShmemInit(). */ + "pg_notify", + + /* Recreated on startup, see OldSerXidInit(). */ + "pg_serial", + + /* Removed on startup, see DeleteAllExportedSnapshotFiles(). */ + "pg_snapshots", + + /* Recreated/zeroed on startup, see StartupSUBTRANS(. */ + "pg_subtrans" +}; + +/* + * Files that should not be included in the backup. + */ +#define EXCLUDE_FILE_MAX 5 + +const char *excludeFile[EXCLUDE_FILE_MAX] = +{ + /* Skip auto conf temporary file. */ + PG_AUTOCONF_FILENAME ".tmp", + + /* + * If there's a backup_label or tablespace_map file, it belongs to a backup + * started by the user with pg_start_backup(). It is *not* correct for this + * backup, our backup_label/tablespace_map is injected into the tar + * separately. + */ + BACKUP_LABEL_FILE, + TABLESPACE_MAP, + + /* Skip postmaster.pid and postmaster.opts. */ + "postmaster.pid", + "postmaster.opts" +}; + +/* * Called when ERROR or FATAL happens in perform_base_backup() after * we have started the backup - make sure we end it! */ @@ -154,11 +214,13 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir) */ if (is_absolute_path(pgstat_stat_directory) && strncmp(pgstat_stat_directory, DataDir, datadirpathlen) == 0) - statrelpath = psprintf("./%s", pgstat_stat_directory + datadirpathlen + 1); + excludeDirContents[EXCLUDE_DIR_STAT_TMP] = + pgstat_stat_directory + datadirpathlen + 1; else if (strncmp(pgstat_stat_directory, "./", 2) != 0) - statrelpath = psprintf("./%s", pgstat_stat_directory); + excludeDirContents[EXCLUDE_DIR_STAT_TMP] = pgstat_stat_directory; else - statrelpath = pgstat_stat_directory; + excludeDirContents[EXCLUDE_DIR_STAT_TMP] = + pgstat_stat_directory + 2; /* Add a node for the base directory at the end */ ti = palloc0(sizeof(tablespaceinfo)); @@ -889,6 +951,8 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces, char pathbuf[MAXPGPATH]; struct stat statbuf; int64 size = 0; + int excludeIdx; + bool excludeFound; dir = AllocateDir(path); while ((de = ReadDir(dir, path)) != NULL) @@ -903,22 +967,80 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces, strlen(PG_TEMP_FILE_PREFIX)) == 0) continue; - /* skip auto conf temporary file */ - if (strncmp(de->d_name, - PG_AUTOCONF_FILENAME ".tmp", - sizeof(PG_AUTOCONF_FILENAME) + 4) == 0) + /* Stat the file */ + snprintf(pathbuf, MAXPGPATH, "%s/%s", path, de->d_name); + + if (lstat(pathbuf, &statbuf) != 0) + { + if (errno != ENOENT) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file or directory \"%s\": %m", + pathbuf))); + + /* If the file went away while scanning, it's not an error. */ continue; + } /* - * If there's a backup_label or tablespace_map file, it belongs to a - * backup started by the user with pg_start_backup(). It is *not* - * correct for this backup, our backup_label/tablespace_map is - * injected into the tar separately. + * Scan for files that should be excluded. See excludeFile[] for info + * on exclusions. */ - if (strcmp(de->d_name, BACKUP_LABEL_FILE) == 0) - continue; + excludeFound = false; + + for (excludeIdx = 0; excludeIdx < EXCLUDE_FILE_MAX; excludeIdx++) + { + if (excludeFile[excludeIdx] != NULL && + strcmp(pathbuf + 2, excludeFile[excludeIdx]) == 0) + { + ereport(DEBUG1, + (errmsg("file excluded from backup: %s", excludeFile[excludeIdx]))); + + excludeFound = true; + break; + } + } + + /* + * Scan for directories whose contents should be excluded. See + * excludeDirContents[] for info on exclusions. + */ + if (!excludeFound) + { + for (excludeIdx = 0; excludeIdx < EXCLUDE_DIR_MAX; excludeIdx++) + { + if (excludeDirContents[excludeIdx] != NULL && + strcmp(pathbuf + 2, excludeDirContents[excludeIdx]) == 0) + { + ereport(DEBUG1, + (errmsg("directory contents excluded from backup: %s", + excludeDirContents[excludeIdx]))); + + /* Include the directory entry to preserve permissions. */ + if (!sizeonly) + { + /* If symlink, write it as a directory anyway */ +#ifndef WIN32 + if (S_ISLNK(statbuf.st_mode)) +#else + if (pgwin32_is_junction(pathbuf)) +#endif + statbuf.st_mode = S_IFDIR | S_IRWXU; - if (strcmp(de->d_name, TABLESPACE_MAP) == 0) + _tarWriteHeader(pathbuf + basepathlen + 1, + NULL, &statbuf); + } + + size += 512; + + excludeFound = true; + break; + } + } + } + + /* If file matched exclusion, continue. */ + if (excludeFound) continue; /* @@ -938,55 +1060,10 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces, "and should not be used. " "Try taking another online backup."))); - snprintf(pathbuf, MAXPGPATH, "%s/%s", path, de->d_name); - - /* Skip postmaster.pid and postmaster.opts in the data directory */ - if (strcmp(pathbuf, "./postmaster.pid") == 0 || - strcmp(pathbuf, "./postmaster.opts") == 0) - continue; - /* Skip pg_control here to back up it last */ if (strcmp(pathbuf, "./global/pg_control") == 0) continue; - if (lstat(pathbuf, &statbuf) != 0) - { - if (errno != ENOENT) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not stat file or directory \"%s\": %m", - pathbuf))); - - /* If the file went away while scanning, it's no error. */ - continue; - } - - /* - * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped - * even when stats_temp_directory is set because PGSS_TEXT_FILE is - * always created there. - */ - if ((statrelpath != NULL && strcmp(pathbuf, statrelpath) == 0) || - strncmp(de->d_name, PG_STAT_TMP_DIR, strlen(PG_STAT_TMP_DIR)) == 0) - { - if (!sizeonly) - _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf); - size += 512; - continue; - } - - /* - * Skip pg_replslot, not useful to copy. But include it as an empty - * directory anyway, so we get permissions right. - */ - if (strcmp(de->d_name, "pg_replslot") == 0) - { - if (!sizeonly) - _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf); - size += 512; /* Size of the header just added */ - continue; - } - /* * We can skip pg_xlog, the WAL segments need to be fetched from the * WAL archive anyway. But include it as an empty directory anyway, so
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers