Hi Konstantin, On Thu, Aug 30, 2018 at 11:27:31AM -0700, Michael Paquier wrote: > It seems to me that you are right here, "path" points to > pg_replslot/$SLOTNAME/state which is a file so the fsync is incorrect. > I am not sure that we'd want to publish fsync_parent_path out of fd.c > though, so perhaps we could just save the slot path in a different > variable and use it?
I have spent more time on this bug, and the code path you have pointed at is the only one having such an issue. Attached is a patch to fix the problem. It includes the sanity checks I have used to check all code paths calling fsync_fname() for both the frontend and the backend code. The checks will not be included in the final fix, still they look useful so I am planning to start a new thread on the matter as perhaps other folks have more and/or better ideas. -- Michael
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index 19978d9a9e..4f30904141 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -1352,6 +1352,7 @@ RestoreSlotFromDisk(const char *name)
{
ReplicationSlotOnDisk cp;
int i;
+ char slotdir[MAXPGPATH];
char path[MAXPGPATH + 22];
int fd;
bool restored = false;
@@ -1361,13 +1362,14 @@ RestoreSlotFromDisk(const char *name)
/* no need to lock here, no concurrent access allowed yet */
/* delete temp file if it exists */
- sprintf(path, "pg_replslot/%s/state.tmp", name);
+ sprintf(slotdir, "pg_replslot/%s", name);
+ sprintf(path, "%s/state.tmp", slotdir);
if (unlink(path) < 0 && errno != ENOENT)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m", path)));
- sprintf(path, "pg_replslot/%s/state", name);
+ sprintf(path, "%s/state", slotdir);
elog(DEBUG1, "restoring replication slot from \"%s\"", path);
@@ -1402,7 +1404,7 @@ RestoreSlotFromDisk(const char *name)
/* Also sync the parent directory */
START_CRIT_SECTION();
- fsync_fname(path, true);
+ fsync_fname(slotdir, true);
END_CRIT_SECTION();
/* read part of statefile that's guaranteed to be version independent */
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 8dd51f1767..49a5640c61 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -574,6 +574,14 @@ pg_flush_data(int fd, off_t offset, off_t nbytes)
void
fsync_fname(const char *fname, bool isdir)
{
+#ifdef USE_ASSERT_CHECKING
+ struct stat statbuf;
+
+ stat(fname, &statbuf);
+ Assert((isdir && S_ISDIR(statbuf.st_mode)) ||
+ (!isdir && !S_ISDIR(statbuf.st_mode)));
+#endif
+
fsync_fname_ext(fname, isdir, false, ERROR);
}
diff --git a/src/common/file_utils.c b/src/common/file_utils.c
index 48876061c3..36095b01af 100644
--- a/src/common/file_utils.c
+++ b/src/common/file_utils.c
@@ -266,6 +266,14 @@ fsync_fname(const char *fname, bool isdir, const char *progname)
int flags;
int returncode;
+#ifdef USE_ASSERT_CHECKING
+ struct stat statbuf;
+
+ stat(fname, &statbuf);
+ Assert((isdir && S_ISDIR(statbuf.st_mode)) ||
+ (!isdir && !S_ISDIR(statbuf.st_mode)));
+#endif
+
/*
* Some OSs require directories to be opened read-only whereas other
* systems don't allow us to fsync files opened read-only; so we need both
signature.asc
Description: PGP signature
