On Sat, May 14, 2022 at 10:25 AM Thomas Munro <thomas.mu...@gmail.com> wrote:
> Japin, are you able to reproduce the problem reliably?  Did I guess
> right, that you're on illumos?  Does this help?  I used
> defined(__sun__) to select the option, but I don't remember if that's
> the right way to detect that OS family, could you confirm that, or
> adjust as required?

Better version.  Now you can independently set -DWAIT_USE_{POLL,EPOLL}
and -DWAIT_USE_{SELF_PIPE,SIGNALFD} for testing, and it picks a
sensible default.
From 4f3027b35d5f25da37836b68d81fbcfb077e41d5 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Sat, 14 May 2022 10:15:30 +1200
Subject: [PATCH v2] Don't trust signalfd() on illumos.

Allow the choice between signalfd vs self-pipe to be controlled
independently of the choice between epoll() and poll().  Make the
default choice the same as before (signalfd + epoll() for Linux systems,
self-pipe + poll() for other Unixes that don't have kqueue), except on
illumos where it's self-pipe + epoll().  We don't have a very good
understanding of why, yet, but its signalfd doesn't seem to behave the
same as Linux in some edge case that leads to lost wakeups.  This way,
illumos users get a working default that doesn't give up the benefits of
epoll() over poll().

Back-patch to 14, where use of signalfd() appeared.

Discussion: https://postgr.es/m/MEYP282MB1669C8D88F0997354C2313C1B6CA9%40MEYP282MB1669.AUSP282.PROD.OUTLOOK.COM
---
 src/backend/storage/ipc/latch.c | 58 +++++++++++++++++++++++----------
 1 file changed, 40 insertions(+), 18 deletions(-)

diff --git a/src/backend/storage/ipc/latch.c b/src/backend/storage/ipc/latch.c
index 78c6a89271..c99dbb9f46 100644
--- a/src/backend/storage/ipc/latch.c
+++ b/src/backend/storage/ipc/latch.c
@@ -72,7 +72,7 @@
 #if defined(WAIT_USE_EPOLL) || defined(WAIT_USE_POLL) || \
 	defined(WAIT_USE_KQUEUE) || defined(WAIT_USE_WIN32)
 /* don't overwrite manual choice */
-#elif defined(HAVE_SYS_EPOLL_H) && defined(HAVE_SYS_SIGNALFD_H)
+#elif defined(HAVE_SYS_EPOLL_H)
 #define WAIT_USE_EPOLL
 #elif defined(HAVE_KQUEUE)
 #define WAIT_USE_KQUEUE
@@ -84,6 +84,22 @@
 #error "no wait set implementation available"
 #endif
 
+/*
+ * By default, we use a self-pipe with poll() and a signalfd with epoll(), if
+ * available.  We avoid signalfd on illumos because it doesn't seem to work
+ * reliably.  For testing the choice can also be manually specified.
+ */
+#if defined(WAIT_USE_POLL) || defined(WAIT_USE_EPOLL)
+#if defined(WAIT_USE_SELF_PIPE) || defined(WAIT_USE_SIGNALFD)
+/* don't overwrite manual choice */
+#elif defined(WAIT_USE_EPOLL) && defined(HAVE_SYS_SIGNALFD_H) && \
+	!defined(__sun__)
+#define WAIT_USE_SIGNALFD
+#else
+#define WAIT_USE_SELF_PIPE
+#endif
+#endif
+
 /* typedef in latch.h */
 struct WaitEventSet
 {
@@ -146,12 +162,12 @@ static WaitEventSet *LatchWaitSet;
 static volatile sig_atomic_t waiting = false;
 #endif
 
-#ifdef WAIT_USE_EPOLL
+#ifdef WAIT_USE_SIGNALFD
 /* On Linux, we'll receive SIGURG via a signalfd file descriptor. */
 static int	signal_fd = -1;
 #endif
 
-#if defined(WAIT_USE_POLL)
+#ifdef WAIT_USE_SELF_PIPE
 /* Read and write ends of the self-pipe */
 static int	selfpipe_readfd = -1;
 static int	selfpipe_writefd = -1;
@@ -164,7 +180,7 @@ static void latch_sigurg_handler(SIGNAL_ARGS);
 static void sendSelfPipeByte(void);
 #endif
 
-#if defined(WAIT_USE_POLL) || defined(WAIT_USE_EPOLL)
+#if defined(WAIT_USE_SELF_PIPE) || defined(WAIT_USE_SIGNALFD)
 static void drain(void);
 #endif
 
@@ -190,7 +206,7 @@ static inline int WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
 void
 InitializeLatchSupport(void)
 {
-#if defined(WAIT_USE_POLL)
+#if defined(WAIT_USE_SELF_PIPE)
 	int			pipefd[2];
 
 	if (IsUnderPostmaster)
@@ -264,7 +280,7 @@ InitializeLatchSupport(void)
 	pqsignal(SIGURG, latch_sigurg_handler);
 #endif
 
-#ifdef WAIT_USE_EPOLL
+#ifdef WAIT_USE_SIGNALFD
 	sigset_t	signalfd_mask;
 
 	/* Block SIGURG, because we'll receive it through a signalfd. */
@@ -316,7 +332,7 @@ ShutdownLatchSupport(void)
 		LatchWaitSet = NULL;
 	}
 
-#if defined(WAIT_USE_POLL)
+#if defined(WAIT_USE_SELF_PIPE)
 	close(selfpipe_readfd);
 	close(selfpipe_writefd);
 	selfpipe_readfd = -1;
@@ -324,7 +340,7 @@ ShutdownLatchSupport(void)
 	selfpipe_owner_pid = InvalidPid;
 #endif
 
-#if defined(WAIT_USE_EPOLL)
+#if defined(WAIT_USE_SIGNALFD)
 	close(signal_fd);
 	signal_fd = -1;
 #endif
@@ -341,9 +357,12 @@ InitLatch(Latch *latch)
 	latch->owner_pid = MyProcPid;
 	latch->is_shared = false;
 
-#if defined(WAIT_USE_POLL)
+#if defined(WAIT_USE_SELF_PIPE)
 	/* Assert InitializeLatchSupport has been called in this process */
 	Assert(selfpipe_readfd >= 0 && selfpipe_owner_pid == MyProcPid);
+#elif defined(WAIT_USE_SIGNALFD)
+	/* Assert InitializeLatchSupport has been called in this process */
+	Assert(signal_fd >= 0);
 #elif defined(WAIT_USE_WIN32)
 	latch->event = CreateEvent(NULL, TRUE, FALSE, NULL);
 	if (latch->event == NULL)
@@ -405,9 +424,12 @@ OwnLatch(Latch *latch)
 	/* Sanity checks */
 	Assert(latch->is_shared);
 
-#if defined(WAIT_USE_POLL)
+#if defined(WAIT_USE_SELF_PIPE)
 	/* Assert InitializeLatchSupport has been called in this process */
 	Assert(selfpipe_readfd >= 0 && selfpipe_owner_pid == MyProcPid);
+#elif defined(WAIT_USE_SIGNALFD)
+	/* Assert InitializeLatchSupport has been called in this process */
+	Assert(signal_fd >= 0);
 #endif
 
 	if (latch->owner_pid != 0)
@@ -617,7 +639,7 @@ SetLatch(Latch *latch)
 		return;
 	else if (owner_pid == MyProcPid)
 	{
-#if defined(WAIT_USE_POLL)
+#if defined(WAIT_USE_SELF_PIPE)
 		if (waiting)
 			sendSelfPipeByte();
 #else
@@ -904,9 +926,9 @@ AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch,
 	{
 		set->latch = latch;
 		set->latch_pos = event->pos;
-#if defined(WAIT_USE_POLL)
+#if defined(WAIT_USE_SELF_PIPE)
 		event->fd = selfpipe_readfd;
-#elif defined(WAIT_USE_EPOLL)
+#elif defined(WAIT_USE_SIGNALFD)
 		event->fd = signal_fd;
 #else
 		event->fd = PGINVALID_SOCKET;
@@ -2083,7 +2105,7 @@ GetNumRegisteredWaitEvents(WaitEventSet *set)
 	return set->nevents;
 }
 
-#if defined(WAIT_USE_POLL)
+#if defined(WAIT_USE_SELF_PIPE)
 
 /*
  * SetLatch uses SIGURG to wake up the process waiting on the latch.
@@ -2134,7 +2156,7 @@ retry:
 
 #endif
 
-#if defined(WAIT_USE_POLL) || defined(WAIT_USE_EPOLL)
+#if defined(WAIT_USE_SELF_PIPE) || defined(WAIT_USE_SIGNALFD)
 
 /*
  * Read all available data from self-pipe or signalfd.
@@ -2150,7 +2172,7 @@ drain(void)
 	int			rc;
 	int			fd;
 
-#ifdef WAIT_USE_POLL
+#ifdef WAIT_USE_SELF_PIPE
 	fd = selfpipe_readfd;
 #else
 	fd = signal_fd;
@@ -2168,7 +2190,7 @@ drain(void)
 			else
 			{
 				waiting = false;
-#ifdef WAIT_USE_POLL
+#ifdef WAIT_USE_SELF_PIPE
 				elog(ERROR, "read() on self-pipe failed: %m");
 #else
 				elog(ERROR, "read() on signalfd failed: %m");
@@ -2178,7 +2200,7 @@ drain(void)
 		else if (rc == 0)
 		{
 			waiting = false;
-#ifdef WAIT_USE_POLL
+#ifdef WAIT_USE_SELF_PIPE
 			elog(ERROR, "unexpected EOF on self-pipe");
 #else
 			elog(ERROR, "unexpected EOF on signalfd");
-- 
2.30.2

Reply via email to