On Fri, Jun 08, 2018 at 02:10:44PM +0200, William Dauchy wrote:
> On Thu, Jun 07, 2018 at 11:50:45AM +0200, William Lallemand wrote:
> > Sorry for the late reply, I manage to reproduce and fix what seams to be 
> > the bug.
> > The signal management was not handled correctly with threads.
> > Could you try those patches and see if it fixes the problem?
> 
> Thanks a lot for the proposed patches!
> I started a new test yesterday evening, and it seems ok so far, so I
> deployed on two new machines this morning.
> I hope I don't speak too fast but it looks promising because I was
> previously able to reproduce in a few hours maximum.
> 

That's great news!
 
Here's the new patches. It shouldn't change anything to the fix, it only
changes the sigprocmask to pthread_sigmask.

-- 
William Lallemand
>From f2942335e76d98df0da05235b9d8df95bc5636b0 Mon Sep 17 00:00:00 2001
From: William Lallemand <[email protected]>
Date: Thu, 7 Jun 2018 09:46:01 +0200
Subject: [PATCH 1/3] BUG/MEDIUM: threads: handle signal queue only in thread 0

Signals were handled in all threads which caused some signals to be lost
from time to time. To avoid complicated lock system (threads+signals),
we prefer handling the signals in one thread avoiding concurrent access.

The side effect of this bug was that some process were not leaving from
time to time during a reload.

This patch must be backported in 1.8.
---
 src/haproxy.c | 21 ++++++++++++++++++---
 src/signal.c  |  8 --------
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/src/haproxy.c b/src/haproxy.c
index 766758f62..96b8abcf9 100644
--- a/src/haproxy.c
+++ b/src/haproxy.c
@@ -2402,8 +2402,10 @@ static void run_poll_loop()
 		/* Process a few tasks */
 		process_runnable_tasks();
 
-		/* check if we caught some signals and process them */
-		signal_process_queue();
+		/* check if we caught some signals and process them in the
+		 first thread */
+		if (tid == 0)
+			signal_process_queue();
 
 		/* Check if we can expire some tasks */
 		next = wake_expired_tasks();
@@ -2418,7 +2420,7 @@ static void run_poll_loop()
 			activity[tid].wake_cache++;
 		else if (active_tasks_mask & tid_bit)
 			activity[tid].wake_tasks++;
-		else if (signal_queue_len)
+		else if (signal_queue_len && tid == 0)
 			activity[tid].wake_signal++;
 		else
 			exp = next;
@@ -3008,6 +3010,7 @@ int main(int argc, char **argv)
 		unsigned int *tids    = calloc(global.nbthread, sizeof(unsigned int));
 		pthread_t    *threads = calloc(global.nbthread, sizeof(pthread_t));
 		int          i;
+		sigset_t     blocked_sig, old_sig;
 
 		THREAD_SYNC_INIT((1UL << global.nbthread) - 1);
 
@@ -3015,6 +3018,15 @@ int main(int argc, char **argv)
 		for (i = 0; i < global.nbthread; i++)
 			tids[i] = i;
 
+		/* ensure the signals will be blocked in every thread */
+		sigfillset(&blocked_sig);
+		sigdelset(&blocked_sig, SIGPROF);
+		sigdelset(&blocked_sig, SIGBUS);
+		sigdelset(&blocked_sig, SIGFPE);
+		sigdelset(&blocked_sig, SIGILL);
+		sigdelset(&blocked_sig, SIGSEGV);
+		pthread_sigmask(SIG_SETMASK, &blocked_sig, &old_sig);
+
 		/* Create nbthread-1 thread. The first thread is the current process */
 		threads[0] = pthread_self();
 		for (i = 1; i < global.nbthread; i++)
@@ -3048,6 +3060,9 @@ int main(int argc, char **argv)
 		}
 #endif /* !USE_CPU_AFFINITY */
 
+		/* when multithreading we need to let only the thread 0 handle the signals */
+		pthread_sigmask(SIG_SETMASK, &old_sig, NULL);
+
 		/* Finally, start the poll loop for the first thread */
 		run_thread_poll_loop(&tids[0]);
 
diff --git a/src/signal.c b/src/signal.c
index a0975910b..f1f682188 100644
--- a/src/signal.c
+++ b/src/signal.c
@@ -31,7 +31,6 @@ struct pool_head *pool_head_sig_handlers = NULL;
 sigset_t blocked_sig;
 int signal_pending = 0; /* non-zero if t least one signal remains unprocessed */
 
-__decl_hathreads(HA_SPINLOCK_T signals_lock);
 
 /* Common signal handler, used by all signals. Received signals are queued.
  * Signal number zero has a specific status, as it cannot be delivered by the
@@ -71,9 +70,6 @@ void __signal_process_queue()
 	struct signal_descriptor *desc;
 	sigset_t old_sig;
 
-	if (HA_SPIN_TRYLOCK(SIGNALS_LOCK, &signals_lock))
-		return;
-
 	/* block signal delivery during processing */
 	sigprocmask(SIG_SETMASK, &blocked_sig, &old_sig);
 
@@ -100,7 +96,6 @@ void __signal_process_queue()
 
 	/* restore signal delivery */
 	sigprocmask(SIG_SETMASK, &old_sig, NULL);
-	HA_SPIN_UNLOCK(SIGNALS_LOCK, &signals_lock);
 }
 
 /* perform minimal intializations, report 0 in case of error, 1 if OK. */
@@ -112,8 +107,6 @@ int signal_init()
 	memset(signal_queue, 0, sizeof(signal_queue));
 	memset(signal_state, 0, sizeof(signal_state));
 
-	HA_SPIN_INIT(&signals_lock);
-
 	/* Ensure signals are not blocked. Some shells or service managers may
 	 * accidently block all of our signals unfortunately, causing lots of
 	 * zombie processes to remain in the background during reloads.
@@ -148,7 +141,6 @@ void deinit_signals()
 			pool_free(pool_head_sig_handlers, sh);
 		}
 	}
-	HA_SPIN_DESTROY(&signals_lock);
 }
 
 /* Register a function and an integer argument on a signal. A pointer to the
-- 
2.16.1

>From ed34186a587934b86e448547fddf597a013e3e37 Mon Sep 17 00:00:00 2001
From: William Lallemand <[email protected]>
Date: Thu, 7 Jun 2018 09:49:04 +0200
Subject: [PATCH 2/3] BUG/MINOR: don't ignore SIG{BUS,FPE,ILL,SEGV} during
 signal processing

We don't have any reason of blocking those signals.

If SIGBUS, SIGFPE, SIGILL, or SIGSEGV are generated while they are blocked, the
result is undefined, unless the signal was generated by kill(2), sigqueue(3), or
raise(3).

This should be backported to 1.8.
---
 src/signal.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/signal.c b/src/signal.c
index f1f682188..0dadd762c 100644
--- a/src/signal.c
+++ b/src/signal.c
@@ -120,6 +120,14 @@ int signal_init()
 
 	sigfillset(&blocked_sig);
 	sigdelset(&blocked_sig, SIGPROF);
+	/* man sigprocmask: If SIGBUS, SIGFPE, SIGILL, or SIGSEGV are
+	   generated while they are blocked, the result is undefined, unless
+	   the signal was generated by kill(2),
+	   sigqueue(3), or raise(3) */
+	sigdelset(&blocked_sig, SIGBUS);
+	sigdelset(&blocked_sig, SIGFPE);
+	sigdelset(&blocked_sig, SIGILL);
+	sigdelset(&blocked_sig, SIGSEGV);
 	for (sig = 0; sig < MAX_SIGNAL; sig++)
 		LIST_INIT(&signal_state[sig].handlers);
 
-- 
2.16.1

>From 723985e4f4b5b67b9d843636151d4c148e392f21 Mon Sep 17 00:00:00 2001
From: William Lallemand <[email protected]>
Date: Thu, 7 Jun 2018 11:23:40 +0200
Subject: [PATCH 3/3] BUG/MINOR: signals: ha_sigmask macro for multithreading

The behavior of sigprocmask in an multithreaded environment is
undefined.

The new macro ha_sigmask() calls either pthreads_sigmask() or
sigprocmask() if haproxy was built with thread support or not.

This should be backported to 1.8.
---
 include/common/hathreads.h | 6 ++++++
 src/checks.c               | 4 ++--
 src/haproxy.c              | 4 ++--
 src/signal.c               | 8 +++++---
 4 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/include/common/hathreads.h b/include/common/hathreads.h
index c25f69102..5c4ceca7c 100644
--- a/include/common/hathreads.h
+++ b/include/common/hathreads.h
@@ -108,6 +108,9 @@ extern THREAD_LOCAL unsigned long tid_bit; /* The bit corresponding to the threa
 #define HA_RWLOCK_TRYRDLOCK(lbl, l)   ({ 0; })
 #define HA_RWLOCK_RDUNLOCK(lbl, l) do { /* do nothing */ } while(0)
 
+#define ha_sigmask(how, set, oldset)  sigprocmask(how, set, oldset)
+
+
 static inline void __ha_barrier_load(void)
 {
 }
@@ -259,6 +262,9 @@ int  thread_need_sync(void);
 
 extern unsigned long all_threads_mask;
 
+#define ha_sigmask(how, set, oldset)  pthread_sigmask(how, set, oldset)
+
+
 #if defined(DEBUG_THREAD) || defined(DEBUG_FULL)
 
 /* WARNING!!! if you update this enum, please also keep lock_label() up to date below */
diff --git a/src/checks.c b/src/checks.c
index 3f4c0e1b3..3cae94d51 100644
--- a/src/checks.c
+++ b/src/checks.c
@@ -1618,7 +1618,7 @@ void block_sigchld(void)
 	sigset_t set;
 	sigemptyset(&set);
 	sigaddset(&set, SIGCHLD);
-	assert(sigprocmask(SIG_BLOCK, &set, NULL) == 0);
+	assert(ha_sigmask(SIG_BLOCK, &set, NULL) == 0);
 }
 
 void unblock_sigchld(void)
@@ -1626,7 +1626,7 @@ void unblock_sigchld(void)
 	sigset_t set;
 	sigemptyset(&set);
 	sigaddset(&set, SIGCHLD);
-	assert(sigprocmask(SIG_UNBLOCK, &set, NULL) == 0);
+	assert(ha_sigmask(SIG_UNBLOCK, &set, NULL) == 0);
 }
 
 static struct pid_list *pid_list_add(pid_t pid, struct task *t)
diff --git a/src/haproxy.c b/src/haproxy.c
index 96b8abcf9..1c0455c56 100644
--- a/src/haproxy.c
+++ b/src/haproxy.c
@@ -508,7 +508,7 @@ static void mworker_block_signals()
 	sigaddset(&set, SIGHUP);
 	sigaddset(&set, SIGINT);
 	sigaddset(&set, SIGTERM);
-	sigprocmask(SIG_SETMASK, &set, NULL);
+	ha_sigmask(SIG_SETMASK, &set, NULL);
 }
 
 static void mworker_unblock_signals()
@@ -521,7 +521,7 @@ static void mworker_unblock_signals()
 	sigaddset(&set, SIGHUP);
 	sigaddset(&set, SIGINT);
 	sigaddset(&set, SIGTERM);
-	sigprocmask(SIG_UNBLOCK, &set, NULL);
+	ha_sigmask(SIG_UNBLOCK, &set, NULL);
 }
 
 static void mworker_unregister_signals()
diff --git a/src/signal.c b/src/signal.c
index 0dadd762c..4bee7d70e 100644
--- a/src/signal.c
+++ b/src/signal.c
@@ -13,6 +13,8 @@
 #include <signal.h>
 #include <string.h>
 
+#include <common/hathreads.h>
+
 #include <proto/signal.h>
 #include <proto/log.h>
 #include <proto/task.h>
@@ -71,7 +73,7 @@ void __signal_process_queue()
 	sigset_t old_sig;
 
 	/* block signal delivery during processing */
-	sigprocmask(SIG_SETMASK, &blocked_sig, &old_sig);
+	ha_sigmask(SIG_SETMASK, &blocked_sig, &old_sig);
 
 	/* It is important that we scan the queue forwards so that we can
 	 * catch any signal that would have been queued by another signal
@@ -95,7 +97,7 @@ void __signal_process_queue()
 	signal_queue_len = 0;
 
 	/* restore signal delivery */
-	sigprocmask(SIG_SETMASK, &old_sig, NULL);
+	ha_sigmask(SIG_SETMASK, &old_sig, NULL);
 }
 
 /* perform minimal intializations, report 0 in case of error, 1 if OK. */
@@ -116,7 +118,7 @@ int signal_init()
 	 * parsing We don't want the process to be killed by an unregistered
 	 * USR2 signal when the master-worker is reloading */
 	sigaddset(&blocked_sig, SIGUSR2);
-	sigprocmask(SIG_SETMASK, &blocked_sig, NULL);
+	ha_sigmask(SIG_SETMASK, &blocked_sig, NULL);
 
 	sigfillset(&blocked_sig);
 	sigdelset(&blocked_sig, SIGPROF);
-- 
2.16.1

Reply via email to