From f90ae9f970720b742a888d721d6a8efee6d881a8 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Sat, 29 May 2021 23:38:10 +1200
Subject: [PATCH v2] Support direct I/O on macOS.

Macs don't understand O_DIRECT, but they do know how to disable kernel
caching if you make a separate fcntl() call.  Extend the file opening
wrappers in fd.c to handle this.

For now, this affects only WAL data and even then only if you set:

  wal_level=minimal
  max_wal_senders=0

Later proposed patches will make greater use of direct I/O, and it'll be
useful for testing if people developing on Macs can test that.
---
 src/backend/storage/file/fd.c         | 35 +++++++++++++++++++++++++++
 src/bin/pg_test_fsync/pg_test_fsync.c | 35 ++++++++++++++++++++++++---
 src/include/access/xlogdefs.h         | 15 ------------
 src/include/storage/fd.h              | 16 ++++++++++++
 4 files changed, 82 insertions(+), 19 deletions(-)

diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index e8cd7ef088..d8dca336a3 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -1057,10 +1057,45 @@ BasicOpenFilePerm(const char *fileName, int fileFlags, mode_t fileMode)
 	int			fd;
 
 tryAgain:
+#ifdef PG_O_DIRECT_USE_F_NOCACHE
+	/*
+	 * The value we defined to stand in for O_DIRECT when simulating it with
+	 * F_NOCACHE had better not collide with any of the standard flags.
+	 */
+	StaticAssertStmt((PG_O_DIRECT &
+					  (O_APPEND |
+					   O_CLOEXEC |
+					   O_CREAT |
+					   O_DSYNC |
+					   O_RDWR |
+					   O_RDONLY |
+					   O_SYNC |
+					   O_TRUNC |
+					   O_WRONLY)) == 0,
+					 "PG_O_DIRECT value collides with standard flag");
+	fd = open(fileName, fileFlags & ~PG_O_DIRECT, fileMode);
+#else
 	fd = open(fileName, fileFlags, fileMode);
+#endif
 
 	if (fd >= 0)
+	{
+#ifdef PG_O_DIRECT_USE_F_NOCACHE
+		if (fileFlags & PG_O_DIRECT)
+		{
+			if (fcntl(fd, F_NOCACHE, 1) < 0)
+			{
+				int			save_errno = errno;
+
+				close(fd);
+				errno = save_errno;
+				return -1;
+			}
+		}
+#endif
+
 		return fd;				/* success! */
+	}
 
 	if (errno == EMFILE || errno == ENFILE)
 	{
diff --git a/src/bin/pg_test_fsync/pg_test_fsync.c b/src/bin/pg_test_fsync/pg_test_fsync.c
index 78dab5096c..fef31844fa 100644
--- a/src/bin/pg_test_fsync/pg_test_fsync.c
+++ b/src/bin/pg_test_fsync/pg_test_fsync.c
@@ -217,8 +217,10 @@ handle_args(int argc, char *argv[])
 					"%u seconds per test\n",
 					secs_per_test),
 		   secs_per_test);
-#if PG_O_DIRECT != 0
+#if defined(O_DIRECT)
 	printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n"));
+#elif defined(F_NOCACHE)
+	printf(_("F_NOCACHE supported on this platform for open_datasync and open_sync.\n"));
 #else
 	printf(_("Direct I/O is not supported on this platform.\n"));
 #endif
@@ -258,6 +260,31 @@ test_open(void)
 	close(tmpfile);
 }
 
+static int
+open_direct(const char *path, int flags, mode_t mode)
+{
+	int			fd;
+
+#ifdef O_DIRECT
+	flags |= O_DIRECT;
+#endif
+
+	fd = open(path, flags, mode);
+
+#if !defined(O_DIRECT) && defined(F_NOCACHE)
+	if (fd >= 0 && fcntl(fd, F_NOCACHE, 1) < 0)
+	{
+		int			save_errno = errno;
+
+		close(fd);
+		errno = save_errno;
+		return -1;
+	}
+#endif
+
+	return fd;
+}
+
 static void
 test_sync(int writes_per_op)
 {
@@ -279,7 +306,7 @@ test_sync(int writes_per_op)
 	fflush(stdout);
 
 #ifdef OPEN_DATASYNC_FLAG
-	if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT | PG_BINARY, 0)) == -1)
+	if ((tmpfile = open_direct(filename, O_RDWR | O_DSYNC | PG_BINARY, 0)) == -1)
 	{
 		printf(NA_FORMAT, _("n/a*"));
 		fs_warning = true;
@@ -386,7 +413,7 @@ test_sync(int writes_per_op)
 	fflush(stdout);
 
 #ifdef OPEN_SYNC_FLAG
-	if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT | PG_BINARY, 0)) == -1)
+	if ((tmpfile = open_direct(filename, O_RDWR | OPEN_SYNC_FLAG | PG_BINARY, 0)) == -1)
 	{
 		printf(NA_FORMAT, _("n/a*"));
 		fs_warning = true;
@@ -454,7 +481,7 @@ test_open_sync(const char *msg, int writes_size)
 	fflush(stdout);
 
 #ifdef OPEN_SYNC_FLAG
-	if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT | PG_BINARY, 0)) == -1)
+	if ((tmpfile = open_direct(filename, O_RDWR | OPEN_SYNC_FLAG | PG_BINARY, 0)) == -1)
 		printf(NA_FORMAT, _("n/a*"));
 	else
 	{
diff --git a/src/include/access/xlogdefs.h b/src/include/access/xlogdefs.h
index 0940b64ca6..60348d1850 100644
--- a/src/include/access/xlogdefs.h
+++ b/src/include/access/xlogdefs.h
@@ -64,21 +64,6 @@ typedef uint32 TimeLineID;
  */
 typedef uint16 RepOriginId;
 
-/*
- *	Because O_DIRECT bypasses the kernel buffers, and because we never
- *	read those buffers except during crash recovery or if wal_level != minimal,
- *	it is a win to use it in all cases where we sync on each write().  We could
- *	allow O_DIRECT with fsync(), but it is unclear if fsync() could process
- *	writes not buffered in the kernel.  Also, O_DIRECT is never enough to force
- *	data to the drives, it merely tries to bypass the kernel cache, so we still
- *	need O_SYNC/O_DSYNC.
- */
-#ifdef O_DIRECT
-#define PG_O_DIRECT				O_DIRECT
-#else
-#define PG_O_DIRECT				0
-#endif
-
 /*
  * This chunk of hackery attempts to determine which file sync methods
  * are available on the current platform, and to choose an appropriate
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 5b3c280dd7..3584d59226 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -79,6 +79,22 @@ extern int	max_safe_fds;
 #define FILE_POSSIBLY_DELETED(err)	((err) == ENOENT || (err) == EACCES)
 #endif
 
+/*
+ * O_DIRECT is not standard, but almost every Unix has it and we simulate it
+ * for Windows in port.h/open.c.  We simulate it with fcntl(F_NOCACHE) on macOS
+ * in fd.c, but we'll use PG_O_DIRECT for that, because it doesn't seem like a
+ * good idea to define our own arbitrary O_DIRECT macro on a Unix system -- it
+ * might confuse other code that reaches open() directly.
+ */
+#if defined(O_DIRECT)
+#define		PG_O_DIRECT O_DIRECT
+#elif defined(F_NOCACHE)
+#define		PG_O_DIRECT 0x80000000
+#define		PG_O_DIRECT_USE_F_NOCACHE
+#else
+#define		PG_O_DIRECT 0
+#endif
+
 /*
  * prototypes for functions in fd.c
  */
-- 
2.24.3 (Apple Git-128)

