At 2015-05-27 11:46:39 +0530, [email protected] wrote:
>
> I'm trying a couple of approaches to that (e.g. using readdir directly
> instead of ReadDir), but other suggestions are welcome.
Here's what that looks like, but not yet fully tested.
-- Abhijit
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 087b6be..6956e88 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -11609,16 +11609,41 @@ SetWalWriterSleeping(bool sleeping)
static void
fsync_pgdata(char *datadir)
{
+ bool xlog_is_symlink;
+ char pg_xlog[MAXPGPATH];
+ char pg_tblspc[MAXPGPATH];
+ struct stat st;
+
if (!enableFsync)
return;
+ snprintf(pg_xlog, MAXPGPATH, "%s/pg_xlog", datadir);
+ snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", datadir);
+
+ if (lstat(pg_xlog, &st) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat directory \"pg_xlog\": %m")));
+
+ xlog_is_symlink = false;
+#ifndef WIN32
+ if (S_ISLNK(st.st_mode))
+ xlog_is_symlink = true;
+#else
+ if (pgwin32_is_junction(subpath))
+ xlog_is_symlink = true;
+#endif
+
/*
* If possible, hint to the kernel that we're soon going to fsync the data
* directory and its contents.
*/
#if defined(HAVE_SYNC_FILE_RANGE) || \
(defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED))
- walkdir(datadir, pre_sync_fname);
+ walkdir(DEBUG1, datadir, pre_sync_fname);
+ if (xlog_is_symlink)
+ walkdir(DEBUG1, pg_xlog, pre_sync_fname);
+ walktblspc_links(DEBUG1, pg_tblspc, pre_sync_fname);
#endif
/*
@@ -11628,5 +11653,8 @@ fsync_pgdata(char *datadir)
* file fsyncs don't guarantee that the directory entry for the file is
* synced.
*/
- walkdir(datadir, fsync_fname);
+ walkdir(WARNING, datadir, fsync_fname_ext);
+ if (xlog_is_symlink)
+ walkdir(DEBUG1, pg_xlog, fsync_fname_ext);
+ walktblspc_links(WARNING, pg_tblspc, fsync_fname_ext);
}
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 68d43c6..711b4b7 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -2443,48 +2443,113 @@ looks_like_temp_rel_name(const char *name)
/*
* Hint to the OS that it should get ready to fsync() this file.
*
- * Adapted from pre_sync_fname in initdb.c
+ * Ignores errors trying to open unreadable files, and logs other errors at a
+ * caller-specified level.
*/
void
-pre_sync_fname(char *fname, bool isdir)
+pre_sync_fname(int elevel, char *fname, bool isdir)
{
int fd;
fd = BasicOpenFile(fname, O_RDONLY | PG_BINARY, 0);
+ if (fd < 0)
+ {
+ if (errno == EACCES || (isdir && errno == EISDIR))
+ return;
+
+#ifdef ETXTBSY
+ if (errno == ETXTBSY)
+ return;
+#endif
+
+ ereport(elevel,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\": %m", fname)));
+ }
+
+ (void) pg_flush_data(fd, 0, 0);
+ (void) close(fd);
+}
+
+/*
+ * fsync_fname_ext -- Try to fsync a file or directory
+ *
+ * Ignores errors trying to open unreadable files, or trying to fsync
+ * directories on systems where that isn't allowed/required, and logs other
+ * errors at a caller-specified level.
+ */
+void
+fsync_fname_ext(int elevel, char *fname, bool isdir)
+{
+ int fd;
+ int returncode;
+
/*
- * Some OSs don't allow us to open directories at all (Windows returns
- * EACCES)
+ * Some OSs require directories to be opened read-only whereas other
+ * systems don't allow us to fsync files opened read-only; so we need both
+ * cases here
*/
- if (fd < 0 && isdir && (errno == EISDIR || errno == EACCES))
- return;
+ if (!isdir)
+ fd = OpenTransientFile(fname,
+ O_RDWR | PG_BINARY,
+ S_IRUSR | S_IWUSR);
+ else
+ fd = OpenTransientFile(fname,
+ O_RDONLY | PG_BINARY,
+ S_IRUSR | S_IWUSR);
if (fd < 0)
- ereport(FATAL,
- (errmsg("could not open file \"%s\": %m",
- fname)));
+ {
+ if (errno == EACCES || (isdir && errno == EISDIR))
+ return;
- pg_flush_data(fd, 0, 0);
+#ifdef ETXTBSY
+ if (errno == ETXTBSY)
+ return;
+#endif
- close(fd);
+ ereport(elevel,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\": %m", fname)));
+ }
+
+ returncode = pg_fsync(fd);
+
+ /*
+ * Some OSes don't allow us to fsync directories at all, so we can ignore
+ * those errors. Anything else needs to be logged.
+ */
+ if (returncode != 0 && !(isdir && errno == EBADF))
+ ereport(elevel,
+ (errcode_for_file_access(),
+ errmsg("could not fsync file \"%s\": %m", fname)));
+
+ CloseTransientFile(fd);
}
/*
* walkdir: recursively walk a directory, applying the action to each
- * regular file and directory (including the named directory itself)
- * and following symbolic links.
+ * regular file and directory (including the named directory itself).
*
- * NB: There is another version of walkdir in initdb.c, but that version
- * behaves differently with respect to symbolic links. Caveat emptor!
+ * Adapted from walkdir in initdb.c
*/
void
-walkdir(char *path, void (*action) (char *fname, bool isdir))
+walkdir(int elevel, char *path, void (*action) (int elevel, char *fname, bool isdir))
{
DIR *dir;
struct dirent *de;
dir = AllocateDir(path);
- while ((de = ReadDir(dir, path)) != NULL)
+ if (dir == NULL)
+ {
+ ereport(elevel,
+ (errcode_for_file_access(),
+ errmsg("could not open directory \"%s\": %m", path)));
+ return;
+ }
+
+ while (errno = 0, (de = readdir(dir)) != NULL)
{
char subpath[MAXPGPATH];
struct stat fst;
@@ -2498,59 +2563,66 @@ walkdir(char *path, void (*action) (char *fname, bool isdir))
snprintf(subpath, MAXPGPATH, "%s/%s", path, de->d_name);
if (lstat(subpath, &fst) < 0)
- ereport(ERROR,
+ ereport(elevel,
(errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m", subpath)));
if (S_ISREG(fst.st_mode))
- (*action) (subpath, false);
+ (*action) (elevel, subpath, false);
else if (S_ISDIR(fst.st_mode))
- walkdir(subpath, action);
-#ifndef WIN32
- else if (S_ISLNK(fst.st_mode))
-#else
- else if (pgwin32_is_junction(subpath))
-#endif
- {
-#if defined(HAVE_READLINK) || defined(WIN32)
- char linkpath[MAXPGPATH];
- int len;
- struct stat lst;
+ walkdir(elevel, subpath, action);
+ }
- len = readlink(subpath, linkpath, sizeof(linkpath) - 1);
- if (len < 0)
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not read symbolic link \"%s\": %m",
- subpath)));
+ if (errno)
+ ereport(elevel,
+ (errcode_for_file_access(),
+ errmsg("could not read directory \"%s\": %m", path)));
- if (len >= sizeof(linkpath) - 1)
- ereport(ERROR,
- (errmsg("symbolic link \"%s\" target is too long",
- subpath)));
+ FreeDir(dir);
- linkpath[len] = '\0';
+ (*action) (elevel, path, true);
+}
- if (lstat(linkpath, &lst) == 0)
- {
- if (S_ISREG(lst.st_mode))
- (*action) (linkpath, false);
- else if (S_ISDIR(lst.st_mode))
- walkdir(subpath, action);
- }
- else if (errno != ENOENT)
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not stat file \"%s\": %m", linkpath)));
-#else
- ereport(WARNING,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("this platform does not support symbolic links; ignoring \"%s\"",
- subpath)));
-#endif
- }
+/*
+ * walktblspc_links: call walkdir on each entry under the given
+ * pg_tblspc directory, or do nothing if pg_tblspc doesn't exist.
+ *
+ * Adapted from walktblspc_links in initdb.c
+ */
+void
+walktblspc_links(int elevel, char *path, void (*action) (int elevel, char *fname, bool isdir))
+{
+ DIR *dir;
+ struct dirent *de;
+ char subpath[MAXPGPATH];
+
+ dir = AllocateDir(path);
+ if (dir == NULL)
+ {
+ if (errno != ENOENT)
+ ereport(elevel,
+ (errcode_for_file_access(),
+ errmsg("could not open directory \"%s\": %m", path)));
+ return;
}
- FreeDir(dir);
- (*action) (path, true);
+ while (errno = 0, (de = readdir(dir)) != NULL)
+ {
+ if (strcmp(de->d_name, ".") == 0 ||
+ strcmp(de->d_name, "..") == 0)
+ continue;
+
+ /* fsync the version specific tablespace subdirectory */
+ snprintf(subpath, sizeof(subpath), "%s/%s/%s",
+ path, de->d_name, TABLESPACE_VERSION_DIRECTORY);
+
+ walkdir(elevel, subpath, action);
+ }
+
+ if (errno)
+ ereport(elevel,
+ (errcode_for_file_access(),
+ errmsg("could not read directory \"%s\": %m", path)));
+
+ FreeDir(dir);
}
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 5b563a6..322fd40 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -114,8 +114,10 @@ extern int pg_fsync_writethrough(int fd);
extern int pg_fdatasync(int fd);
extern int pg_flush_data(int fd, off_t offset, off_t amount);
extern void fsync_fname(char *fname, bool isdir);
-extern void pre_sync_fname(char *fname, bool isdir);
-extern void walkdir(char *path, void (*action) (char *fname, bool isdir));
+extern void fsync_fname_ext(int elevel, char *fname, bool isdir);
+extern void pre_sync_fname(int elevel, char *fname, bool isdir);
+extern void walkdir(int elevel, char *path, void (*action) (int elevel, char *fname, bool isdir));
+extern void walktblspc_links(int elevel, char *path, void (*action) (int elevel, char *fname, bool isdir));
/* Filename components for OpenTemporaryFile */
#define PG_TEMP_FILES_DIR "pgsql_tmp"
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers