At 2015-05-28 17:37:16 -0400, t...@sss.pgh.pa.us wrote:
>
> I've committed this after some mostly-cosmetic rearrangements.

Thank you.

> I have to leave shortly, so I'll look at the initdb cleanup tomorrow.

Here's a revision of that patch that's more along the lines of what you
committed.

It occurred to me that we should probably also silently skip EACCES on
opendir and stat/lstat in walkdir. That's what the attached initdb patch
does. If you think it's a good idea, there's also a small fixup attached
for the backend version too.

-- Abhijit
>From 26bb25e99a2954381587bbfe296a50b19a7f047c Mon Sep 17 00:00:00 2001
From: Abhijit Menon-Sen <a...@2ndquadrant.com>
Date: Thu, 28 May 2015 22:02:29 +0530
Subject: Make initdb -S behave like xlog.c:fsync_pgdata()

In particular, it should silently skip unreadable/unexpected files in
the data directory and follow symlinks only for pg_xlog and under
pg_tblspc.
---
 src/bin/initdb/initdb.c | 305 ++++++++++++++++++++++++------------------------
 1 file changed, 151 insertions(+), 154 deletions(-)

diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index f1c4920..9d5478c 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -71,6 +71,13 @@
 /* Ideally this would be in a .h file, but it hardly seems worth the trouble */
 extern const char *select_default_timezone(const char *share_path);
 
+/* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */
+#if defined(HAVE_SYNC_FILE_RANGE)
+#define PG_FLUSH_DATA_WORKS 1
+#elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
+#define PG_FLUSH_DATA_WORKS 1
+#endif
+
 static const char *auth_methods_host[] = {"trust", "reject", "md5", "password", "ident", "radius",
 #ifdef ENABLE_GSS
 	"gss",
@@ -217,10 +224,13 @@ static char **filter_lines_with_token(char **lines, const char *token);
 #endif
 static char **readfile(const char *path);
 static void writefile(char *path, char **lines);
-static void walkdir(char *path, void (*action) (char *fname, bool isdir));
-static void walktblspc_links(char *path, void (*action) (char *fname, bool isdir));
-static void pre_sync_fname(char *fname, bool isdir);
-static void fsync_fname(char *fname, bool isdir);
+static void walkdir(const char *path,
+					void (*action) (const char *fname, bool isdir),
+					bool process_symlinks);
+#ifdef PG_FLUSH_DATA_WORKS
+static void pre_sync_fname(const char *fname, bool isdir);
+#endif
+static void fsync_fname_ext(const char *fname, bool isdir);
 static FILE *popen_check(const char *command, const char *mode);
 static void exit_nicely(void);
 static char *get_id(void);
@@ -248,7 +258,7 @@ static void load_plpgsql(void);
 static void vacuum_db(void);
 static void make_template0(void);
 static void make_postgres(void);
-static void perform_fsync(void);
+static void fsync_pgdata(void);
 static void trapsig(int signum);
 static void check_ok(void);
 static char *escape_quotes(const char *src);
@@ -521,56 +531,58 @@ writefile(char *path, char **lines)
  * Adapted from copydir() in copydir.c.
  */
 static void
-walkdir(char *path, void (*action) (char *fname, bool isdir))
+walkdir(const char *path,
+		void (*action) (const char *fname, bool isdir),
+		bool process_symlinks)
 {
 	DIR		   *dir;
-	struct dirent *direntry;
-	char		subpath[MAXPGPATH];
+	struct dirent *de;
 
 	dir = opendir(path);
 	if (dir == NULL)
 	{
-		fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
-				progname, path, strerror(errno));
-		exit_nicely();
+		if (errno != EACCES)
+			fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
+					progname, path, strerror(errno));
+		return;
 	}
 
-	while (errno = 0, (direntry = readdir(dir)) != NULL)
+	while (errno = 0, (de = readdir(dir)) != NULL)
 	{
+		char		subpath[MAXPGPATH];
 		struct stat fst;
+		int			sret;
 
-		if (strcmp(direntry->d_name, ".") == 0 ||
-			strcmp(direntry->d_name, "..") == 0)
+		if (strcmp(de->d_name, ".") == 0 ||
+			strcmp(de->d_name, "..") == 0)
 			continue;
 
-		snprintf(subpath, MAXPGPATH, "%s/%s", path, direntry->d_name);
+		snprintf(subpath, MAXPGPATH, "%s/%s", path, de->d_name);
+
+		if (process_symlinks)
+			sret = stat(subpath, &fst);
+		else
+			sret = lstat(subpath, &fst);
 
-		if (lstat(subpath, &fst) < 0)
+		if (sret < 0)
 		{
-			fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"),
-					progname, subpath, strerror(errno));
-			exit_nicely();
+			if (errno != EACCES)
+				fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"),
+						progname, subpath, strerror(errno));
+			continue;
 		}
 
-		if (S_ISDIR(fst.st_mode))
-			walkdir(subpath, action);
-		else if (S_ISREG(fst.st_mode))
+		if (S_ISREG(fst.st_mode))
 			(*action) (subpath, false);
+		else if (S_ISDIR(fst.st_mode))
+			walkdir(subpath, action, false);
 	}
 
 	if (errno)
-	{
 		fprintf(stderr, _("%s: could not read directory \"%s\": %s\n"),
 				progname, path, strerror(errno));
-		exit_nicely();
-	}
 
-	if (closedir(dir))
-	{
-		fprintf(stderr, _("%s: could not close directory \"%s\": %s\n"),
-				progname, path, strerror(errno));
-		exit_nicely();
-	}
+	(void) closedir(dir);
 
 	/*
 	 * It's important to fsync the destination directory itself as individual
@@ -582,149 +594,111 @@ walkdir(char *path, void (*action) (char *fname, bool isdir))
 }
 
 /*
- * walktblspc_links: call walkdir on each entry under the given
- * pg_tblspc directory, or do nothing if pg_tblspc doesn't exist.
- */
-static void
-walktblspc_links(char *path, void (*action) (char *fname, bool isdir))
-{
-	DIR		   *dir;
-	struct dirent *direntry;
-	char		subpath[MAXPGPATH];
-
-	dir = opendir(path);
-	if (dir == NULL)
-	{
-		if (errno == ENOENT)
-			return;
-		fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
-				progname, path, strerror(errno));
-		exit_nicely();
-	}
-
-	while (errno = 0, (direntry = readdir(dir)) != NULL)
-	{
-		if (strcmp(direntry->d_name, ".") == 0 ||
-			strcmp(direntry->d_name, "..") == 0)
-			continue;
-
-		/* fsync the version specific tablespace subdirectory */
-		snprintf(subpath, sizeof(subpath), "%s/%s/%s",
-				 path, direntry->d_name, TABLESPACE_VERSION_DIRECTORY);
-
-		walkdir(subpath, action);
-	}
-
-	if (errno)
-	{
-		fprintf(stderr, _("%s: could not read directory \"%s\": %s\n"),
-				progname, path, strerror(errno));
-		exit_nicely();
-	}
-
-	if (closedir(dir))
-	{
-		fprintf(stderr, _("%s: could not close directory \"%s\": %s\n"),
-				progname, path, strerror(errno));
-		exit_nicely();
-	}
-}
-
-/*
  * Hint to the OS that it should get ready to fsync() this file.
+ *
+ * Ignores errors trying to open unreadable files, and logs other errors
+ * at a caller-specified level.
  */
+#ifdef PG_FLUSH_DATA_WORKS
+
 static void
-pre_sync_fname(char *fname, bool isdir)
+pre_sync_fname(const char *fname, bool isdir)
 {
-#if defined(HAVE_SYNC_FILE_RANGE) || \
-	(defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED))
 	int			fd;
 
 	fd = open(fname, O_RDONLY | PG_BINARY);
 
-	/*
-	 * Some OSs don't allow us to open directories at all (Windows returns
-	 * EACCES)
-	 */
-	if (fd < 0 && isdir && (errno == EISDIR || errno == EACCES))
-		return;
-
 	if (fd < 0)
 	{
+		if (errno == EACCES || (isdir && errno == EISDIR))
+			return;
+
+#ifdef ETXTBSY
+		if (errno == ETXTBSY)
+			return;
+#endif
+
 		fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
 				progname, fname, strerror(errno));
-		exit_nicely();
+		return;
 	}
 
 	/*
-	 * Prefer sync_file_range, else use posix_fadvise.  We ignore any error
-	 * here since this operation is only a hint anyway.
+	 * We do what pg_flush_fd would do in the backend: prefer to use
+	 * sync_file_range, but fall back to posix_fadvise, and ignore
+	 * errors because this is only a hint.
 	 */
 #if defined(HAVE_SYNC_FILE_RANGE)
-	sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE);
+	(void) sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE);
 #elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
-	posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
+	(void) posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
+#else
+#error PG_FLUSH_DATA_WORKS should not have been defined
 #endif
 
-	close(fd);
-#endif
+	(void) close(fd);
 }
 
+#endif   /* PG_FLUSH_DATA_WORKS */
+
 /*
- * fsync a file or directory
+ * fsync_fname_ext -- Try to fsync a file or directory
  *
- * Try to fsync directories but ignore errors that indicate the OS
- * just doesn't allow/require fsyncing directories.
- *
- * Adapted from fsync_fname() in copydir.c.
+ * Ignores errors trying to open unreadable files, or trying to fsync
+ * directories on systems where that isn't allowed/required. Reports
+ * other errors non-fatally.
  */
 static void
-fsync_fname(char *fname, bool isdir)
+fsync_fname_ext(const char *fname, bool isdir)
 {
 	int			fd;
+	int			flags;
 	int			returncode;
 
 	/*
 	 * Some OSs require directories to be opened read-only whereas other
 	 * systems don't allow us to fsync files opened read-only; so we need both
-	 * cases here
+	 * cases here.  Using O_RDWR will cause us to fail to fsync files that are
+	 * not writable by our userid, but we assume that's OK.
 	 */
+	flags = PG_BINARY;
 	if (!isdir)
-		fd = open(fname, O_RDWR | PG_BINARY);
+		flags |= O_RDWR;
 	else
-		fd = open(fname, O_RDONLY | PG_BINARY);
+		flags |= O_RDONLY;
 
 	/*
-	 * Some OSs don't allow us to open directories at all (Windows returns
-	 * EACCES)
+	 * Open the file, silently ignoring errors about unreadable files (or
+	 * unsupported operations, e.g. opening a directory under Windows), and
+	 * logging others.
 	 */
-	if (fd < 0 && isdir && (errno == EISDIR || errno == EACCES))
-		return;
-
-	else if (fd < 0)
+	fd = open(fname, flags);
+	if (fd < 0)
 	{
+		if (errno == EACCES || (isdir && errno == EISDIR))
+			return;
+
+#ifdef ETXTBSY
+		if (errno == ETXTBSY)
+			return;
+#endif
+
 		fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
 				progname, fname, strerror(errno));
-		exit_nicely();
+		return;
 	}
 
 	returncode = fsync(fd);
 
-	/* Some OSs don't allow us to fsync directories at all */
-	if (returncode != 0 && isdir && errno == EBADF)
-	{
-		close(fd);
-		return;
-	}
-
-	if (returncode != 0)
-	{
+	/*
+	 * Some OSes don't allow us to fsync directories at all, so we can ignore
+	 * those errors. Anything else needs to be reported.
+	 */
+	if (returncode != 0 && !(isdir && errno == EBADF))
 		fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"),
 				progname, fname, strerror(errno));
-		exit_nicely();
-	}
 
-	close(fd);
+	(void) close(fd);
 }
 
 /*
@@ -2419,50 +2393,73 @@ make_postgres(void)
 }
 
 /*
- * fsync everything down to disk
+ * Issue fsync recursively on PGDATA and all its contents.
+ *
+ * We fsync regular files and directories wherever they are, but we
+ * follow symlinks only for pg_xlog and immediately under pg_tblspc.
+ * Other symlinks are presumed to point at files we're not responsible
+ * for fsyncing, and might not have privileges to write at all.
+ *
+ * Errors are reported but not considered fatal.
  */
 static void
-perform_fsync(void)
+fsync_pgdata(void)
 {
-	char		pdir[MAXPGPATH];
+	bool		xlog_is_symlink;
+	char		pg_xlog[MAXPGPATH];
 	char		pg_tblspc[MAXPGPATH];
 
 	fputs(_("syncing data to disk ... "), stdout);
 	fflush(stdout);
 
-	/*
-	 * We need to name the parent of PGDATA.  get_parent_directory() isn't
-	 * enough here, because it can result in an empty string.
-	 */
-	snprintf(pdir, MAXPGPATH, "%s/..", pg_data);
-	canonicalize_path(pdir);
+	snprintf(pg_xlog, MAXPGPATH, "%s/pg_xlog", pg_data);
+	snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", pg_data);
 
 	/*
-	 * Hint to the OS so that we're going to fsync each of these files soon.
+	 * If pg_xlog is a symlink, we'll need to recurse into it separately,
+	 * because the first walkdir below will ignore it.
 	 */
+	xlog_is_symlink = false;
 
-	/* first the parent of the PGDATA directory */
-	pre_sync_fname(pdir, true);
-
-	/* then recursively through the data directory */
-	walkdir(pg_data, pre_sync_fname);
+#ifndef WIN32
+	{
+		struct stat st;
 
-	/* now do the same thing for everything under pg_tblspc */
-	snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", pg_data);
-	walktblspc_links(pg_tblspc, pre_sync_fname);
+		if (lstat(pg_xlog, &st) < 0)
+			fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
+					progname, pg_xlog, strerror(errno));
+		else if (S_ISLNK(st.st_mode))
+			xlog_is_symlink = true;
+	}
+#else
+	if (pgwin32_is_junction(pg_xlog))
+		xlog_is_symlink = true;
+#endif
 
 	/*
-	 * Now, do the fsync()s in the same order.
+	 * If possible, hint to the kernel that we're soon going to fsync the data
+	 * directory and its contents.
 	 */
+#ifdef PG_FLUSH_DATA_WORKS
+	walkdir(pg_data, pre_sync_fname, false);
+	if (xlog_is_symlink)
+		walkdir(pg_xlog, pre_sync_fname, false);
+	walkdir(pg_tblspc, pre_sync_fname, true);
+#endif
 
-	/* first the parent of the PGDATA directory */
-	fsync_fname(pdir, true);
-
-	/* then recursively through the data directory */
-	walkdir(pg_data, fsync_fname);
-
-	/* and now the same for all tablespaces */
-	walktblspc_links(pg_tblspc, fsync_fname);
+	/*
+	 * Now we do the fsync()s in the same order.
+	 *
+	 * The main call ignores symlinks, so in addition to specially processing
+	 * pg_xlog if it's a symlink, pg_tblspc has to be visited separately with
+	 * process_symlinks = true.  Note that if there are any plain directories
+	 * in pg_tblspc, they'll get fsync'd twice.  That's not an expected case
+	 * so we don't worry about optimizing it.
+	 */
+	walkdir(pg_data, fsync_fname_ext, false);
+	if (xlog_is_symlink)
+		walkdir(pg_xlog, fsync_fname_ext, false);
+	walkdir(pg_tblspc, fsync_fname_ext, true);
 
 	check_ok();
 }
@@ -3576,7 +3573,7 @@ main(int argc, char *argv[])
 	if (sync_only)
 	{
 		setup_pgdata();
-		perform_fsync();
+		fsync_pgdata();
 		return 0;
 	}
 
@@ -3629,7 +3626,7 @@ main(int argc, char *argv[])
 	initialize_data_directory();
 
 	if (do_sync)
-		perform_fsync();
+		fsync_pgdata();
 	else
 		printf(_("\nSync to disk skipped.\nThe data directory might become corrupt if the operating system crashes.\n"));
 
-- 
1.9.1

>From 1394a162e681af58a81c8ec420ff526a9a1b3a24 Mon Sep 17 00:00:00 2001
From: Abhijit Menon-Sen <a...@2ndquadrant.com>
Date: Fri, 29 May 2015 08:45:04 +0530
Subject: Silently accept EACCES on opendir/stat

---
 src/backend/storage/file/fd.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index b4f6590..af39744 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -2579,9 +2579,10 @@ walkdir(const char *path,
 	dir = AllocateDir(path);
 	if (dir == NULL)
 	{
-		ereport(elevel,
-				(errcode_for_file_access(),
-				 errmsg("could not open directory \"%s\": %m", path)));
+		if (errno != EACCES)
+			ereport(elevel,
+					(errcode_for_file_access(),
+					 errmsg("could not open directory \"%s\": %m", path)));
 		return;
 	}
 
@@ -2606,9 +2607,10 @@ walkdir(const char *path,
 
 		if (sret < 0)
 		{
-			ereport(elevel,
-					(errcode_for_file_access(),
-					 errmsg("could not stat file \"%s\": %m", subpath)));
+			if (errno != EACCES)
+				ereport(elevel,
+						(errcode_for_file_access(),
+						 errmsg("could not stat file \"%s\": %m", subpath)));
 			continue;
 		}
 
-- 
1.9.1

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to