Hi John,

On Wed, May 07, 2014 at 10:54:33PM +0200, John-Paul Bader wrote:
> Hmm yeah I noticed from what you wrote in the mail and by reading 
> through the patch - but still it confirmed that the shared pthread thing 
> was not available on FreeBSD right?

Yes that's it. Old freebsd code did not return an error for this, and
haproxy did not check this error. Newer freebsd code now does return
an error, but haproxy still didn't check. Emeric's patch introduced
the test for the feature. Note that older freebsd versions will still
pretend to work well but be broken, hence the proposal to remove
pthread by default since there's no reliable way of detecting its
support at runtime.

> Would I also need to compile with USE_PRIVATE_CACHE=1 or would you patch 
> take care of that?

No you don't need it anymore.

> When it uses the private cache, I would also have to change the 
> configuration to allow ssl sessions over multiple http requests right?

No you don't need to change anymore, what Emeric's patch does is to
reimplement a hand-crafted spinlock mechanism. I just ran a few tests
here and at 4.5k conn/s spread over 4 processes I see that the lock is
already held about 1% of the time, which is very low and does not justify
using a syscall to sleep.

I'm appending the two patches for you to test. They're to be applied on
top of latest master, but I think it will be OK on yours (provided you
don't already have previous patches from Emeric).

You don't need to pass any specific options to the Makefile, it defaults
to using this implementation.

Once you confirm that these ones fix your issue, I'll merge them.

Thanks!
Willy

>From 13dea9e46ccb84655a5f945f076a0e03327515a5 Mon Sep 17 00:00:00 2001
From: Emeric Brun <eb...@exceliance.fr>
Date: Wed, 7 May 2014 16:10:18 +0200
Subject: BUG/MAJOR: ssl: Fallback to private session cache if current lock
 mode is not supported.

Process shared mutex seems not supported on some OSs (FreeBSD).

This patch checks errors on mutex lock init to fallback
on a private session cache (per process cache) in error cases.
---
 include/proto/shctx.h |  3 +++
 src/cfgparse.c        | 18 ++++++++++++++----
 src/shctx.c           | 29 +++++++++++++++++++++++------
 3 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/include/proto/shctx.h b/include/proto/shctx.h
index a84e4a6..e0c695d 100644
--- a/include/proto/shctx.h
+++ b/include/proto/shctx.h
@@ -28,6 +28,9 @@
 #define SHCTX_APPNAME "haproxy"
 #endif
 
+#define SHCTX_E_ALLOC_CACHE -1
+#define SHCTX_E_INIT_LOCK   -2
+
 /* Allocate shared memory context.
  * <size> is the number of allocated blocks into cache (default 128 bytes)
  * A block is large enough to contain a classic session (without client cert)
diff --git a/src/cfgparse.c b/src/cfgparse.c
index c4f092f..7176b59 100644
--- a/src/cfgparse.c
+++ b/src/cfgparse.c
@@ -6744,6 +6744,8 @@ out_uri_auth_compat:
                 * remains NULL so that listeners can later detach.
                 */
                list_for_each_entry(bind_conf, &curproxy->conf.bind, by_fe) {
+                       int alloc_ctx;
+
                        if (!bind_conf->is_ssl) {
                                if (bind_conf->default_ctx) {
                                        Warning("Proxy '%s': A certificate was 
specified but SSL was not enabled on bind '%s' at [%s:%d] (use 'ssl').\n",
@@ -6758,10 +6760,18 @@ out_uri_auth_compat:
                                continue;
                        }
 
-                       if (shared_context_init(global.tune.sslcachesize, 
(global.nbproc > 1) ? 1 : 0) < 0) {
-                               Alert("Unable to allocate SSL session 
cache.\n");
-                               cfgerr++;
-                               continue;
+                       alloc_ctx = 
shared_context_init(global.tune.sslcachesize, (global.nbproc > 1) ? 1 : 0);
+                       if (alloc_ctx < 0) {
+                               if (alloc_ctx == SHCTX_E_INIT_LOCK) {
+                                       Warning("Unable to init lock for the 
shared SSL session cache. Falling back to private cache.\n");
+                                       alloc_ctx = 
shared_context_init(global.tune.sslcachesize, 0);
+                               }
+
+                               if (alloc_ctx < 0) {
+                                       Alert("Unable to allocate SSL session 
cache.\n");
+                                       cfgerr++;
+                                       continue;
+                               }
                        }
 
                        /* initialize all certificate contexts */
diff --git a/src/shctx.c b/src/shctx.c
index f259b9c..86e6056 100644
--- a/src/shctx.c
+++ b/src/shctx.c
@@ -532,19 +532,36 @@ int shared_context_init(int size, int shared)
                                              PROT_READ | PROT_WRITE, maptype | 
MAP_ANON, -1, 0);
        if (!shctx || shctx == MAP_FAILED) {
                shctx = NULL;
-               return -1;
+               return SHCTX_E_ALLOC_CACHE;
        }
 
 #ifndef USE_PRIVATE_CACHE
+       if (maptype == MAP_SHARED) {
 #ifdef USE_SYSCALL_FUTEX
-       shctx->waiters = 0;
+               shctx->waiters = 0;
 #else
-       pthread_mutexattr_init(&attr);
-       pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
-       pthread_mutex_init(&shctx->mutex, &attr);
+               if (pthread_mutexattr_init(&attr)) {
+                       munmap(shctx, sizeof(struct 
shared_context)+(size*sizeof(struct shared_block)));
+                       shctx = NULL;
+                       return SHCTX_E_INIT_LOCK;
+               }
+
+               if (pthread_mutexattr_setpshared(&attr, 
PTHREAD_PROCESS_SHARED)) {
+                       pthread_mutexattr_destroy(&attr);
+                       munmap(shctx, sizeof(struct 
shared_context)+(size*sizeof(struct shared_block)));
+                       shctx = NULL;
+                       return SHCTX_E_INIT_LOCK;
+               }
+
+               if (pthread_mutex_init(&shctx->mutex, &attr)) {
+                       pthread_mutexattr_destroy(&attr);
+                       munmap(shctx, sizeof(struct 
shared_context)+(size*sizeof(struct shared_block)));
+                       shctx = NULL;
+                       return SHCTX_E_INIT_LOCK;
+               }
 #endif
-       if (maptype == MAP_SHARED)
                use_shared_mem = 1;
+       }
 #endif
 
        memset(&shctx->active.data.session.key, 0, sizeof(struct ebmb_node));
-- 
1.7.12.1

>From 7193e99a15733e74ad93283426135d68e7d990d9 Mon Sep 17 00:00:00 2001
From: Emeric Brun <eb...@exceliance.fr>
Date: Wed, 7 May 2014 23:11:42 +0200
Subject: MAJOR: ssl: Change default locks on ssl session cache.

Prevously pthread process shared lock were used by default,
if USE_SYSCALL_FUTEX is not specified.

This patch implements an OS independant kind of lock:
An active spinlock is usedf if USE_SYSCALL_FUTEX is not specified.

The old behavior is still available if USE_PTHREAD_PSHARED=1.
---
 Makefile    |  8 ++++--
 src/shctx.c | 90 ++++++++++++++++++++++++++++++++++++++++++++-----------------
 2 files changed, 72 insertions(+), 26 deletions(-)

diff --git a/Makefile b/Makefile
index f95ba03..f722e11 100644
--- a/Makefile
+++ b/Makefile
@@ -17,6 +17,7 @@
 #   USE_PCRE_JIT         : enable JIT for faster regex on libpcre >= 8.32
 #   USE_POLL             : enable poll(). Automatic.
 #   USE_PRIVATE_CACHE    : disable shared memory cache of ssl sessions.
+#   USE_PTHREAD_PSHARED  : enable pthread process shared mutex on sslcache.
 #   USE_REGPARM          : enable regparm optimization. Recommended on x86.
 #   USE_STATIC_PCRE      : enable static libpcre. Recommended.
 #   USE_TPROXY           : enable transparent proxy. Automatic.
@@ -536,10 +537,13 @@ OPTIONS_OBJS  += src/ssl_sock.o src/shctx.o
 ifneq ($(USE_PRIVATE_CACHE),)
 OPTIONS_CFLAGS  += -DUSE_PRIVATE_CACHE
 else
+ifneq ($(USE_PTHREAD_PSHARED),)
+OPTIONS_CFLAGS  += -DUSE_PTHREAD_PSHARED
+OPTIONS_LDFLAGS += -lpthread
+else
 ifneq ($(USE_FUTEX),)
 OPTIONS_CFLAGS  += -DUSE_SYSCALL_FUTEX
-else
-OPTIONS_LDFLAGS += -lpthread
+endif
 endif
 endif
 endif
diff --git a/src/shctx.c b/src/shctx.c
index 86e6056..f33b7ca 100644
--- a/src/shctx.c
+++ b/src/shctx.c
@@ -13,6 +13,9 @@
 
 #include <sys/mman.h>
 #ifndef USE_PRIVATE_CACHE
+#ifdef USE_PTHREAD_PSHARED
+#include <pthread.h>
+#else
 #ifdef USE_SYSCALL_FUTEX
 #include <unistd.h>
 #ifndef u32
@@ -20,9 +23,8 @@
 #endif
 #include <linux/futex.h>
 #include <sys/syscall.h>
-#else /* USE_SYSCALL_FUTEX */
-#include <pthread.h>
-#endif /* USE_SYSCALL_FUTEX */
+#endif
+#endif
 #endif
 #include <arpa/inet.h>
 #include "ebmbtree.h"
@@ -60,10 +62,10 @@ struct shared_block {
 
 struct shared_context {
 #ifndef USE_PRIVATE_CACHE
-#ifdef USE_SYSCALL_FUTEX
-       unsigned int waiters;
-#else /* USE_SYSCALL_FUTEX */
+#ifdef USE_PTHREAD_PSHARED
        pthread_mutex_t mutex;
+#else
+       unsigned int waiters;
 #endif
 #endif
        struct shsess_packet_hdr upd;
@@ -75,17 +77,63 @@ struct shared_context {
 
 /* Static shared context */
 static struct shared_context *shctx = NULL;
-#ifndef USE_PRIVATE_CACHE
-static int use_shared_mem = 0;
-#endif
 
 /* Lock functions */
-#ifdef USE_PRIVATE_CACHE
+
+#if defined (USE_PRIVATE_CACHE)
+
 #define shared_context_lock()
 #define shared_context_unlock()
 
+#elif defined (USE_PTHREAD_PSHARED)
+static int use_shared_mem = 0;
+
+#define shared_context_lock()   if (use_shared_mem) 
pthread_mutex_lock(&shctx->mutex)
+#define shared_context_unlock() if (use_shared_mem) 
pthread_mutex_unlock(&shctx->mutex)
+
 #else
+static int use_shared_mem = 0;
+
 #ifdef USE_SYSCALL_FUTEX
+static inline void _shared_context_wait4lock(unsigned int *count, unsigned int 
*uaddr, int value)
+{
+       syscall(SYS_futex, uaddr, FUTEX_WAIT, value, NULL, 0, 0);
+}
+
+static inline void _shared_context_awakelocker(unsigned int *uaddr)
+{
+       syscall(SYS_futex, uaddr, FUTEX_WAKE, 1, NULL, 0, 0);
+}
+
+#else /* internal spin lock */
+
+#if defined (__i486__) || defined (__i586__) || defined (__i686__) || defined 
(__x86_64__)
+static inline void relax()
+{
+       __asm volatile("rep;nop\n" ::: "memory");
+}
+#else /* if no x86_64 or i586 arch: use less optimized but generic asm */
+static inline void relax()
+{
+       __asm volatile("" ::: "memory");
+}
+#endif
+
+static inline void _shared_context_wait4lock(unsigned int *count, unsigned int 
*uaddr, int value)
+{
+        int i;
+
+        for (i = 0; i < *count; i++) {
+                relax();
+                relax();
+        }
+        *count = *count << 1;
+}
+
+#define _shared_context_awakelocker(a)
+
+#endif
+
 #if defined (__i486__) || defined (__i586__) || defined (__i686__) || defined 
(__x86_64__)
 static inline unsigned int xchg(unsigned int *ptr, unsigned int x)
 {
@@ -139,6 +187,7 @@ static inline unsigned char atomic_dec(unsigned int *ptr)
 static inline void _shared_context_lock(void)
 {
        unsigned int x;
+       unsigned int count = 4;
 
        x = cmpxchg(&shctx->waiters, 0, 1);
        if (x) {
@@ -146,7 +195,7 @@ static inline void _shared_context_lock(void)
                        x = xchg(&shctx->waiters, 2);
 
                while (x) {
-                       syscall(SYS_futex, &shctx->waiters, FUTEX_WAIT, 2, 
NULL, 0, 0);
+                       _shared_context_wait4lock(&count, &shctx->waiters, 2);
                        x = xchg(&shctx->waiters, 2);
                }
        }
@@ -156,7 +205,7 @@ static inline void _shared_context_unlock(void)
 {
        if (atomic_dec(&shctx->waiters)) {
                shctx->waiters = 0;
-               syscall(SYS_futex, &shctx->waiters, FUTEX_WAKE, 1, NULL, 0, 0);
+               _shared_context_awakelocker(&shctx->waiters);
        }
 }
 
@@ -164,13 +213,6 @@ static inline void _shared_context_unlock(void)
 
 #define shared_context_unlock() if (use_shared_mem) _shared_context_unlock()
 
-#else /* USE_SYSCALL_FUTEX */
-
-#define shared_context_lock()   if (use_shared_mem) 
pthread_mutex_lock(&shctx->mutex)
-
-#define shared_context_unlock() if (use_shared_mem) 
pthread_mutex_unlock(&shctx->mutex)
-
-#endif
 #endif
 
 /* List Macros */
@@ -508,9 +550,9 @@ int shared_context_init(int size, int shared)
 {
        int i;
 #ifndef USE_PRIVATE_CACHE
-#ifndef USE_SYSCALL_FUTEX
+#ifdef USE_PTHREAD_PSHARED
        pthread_mutexattr_t attr;
-#endif /* USE_SYSCALL_FUTEX */
+#endif
 #endif
        struct shared_block *prev,*cur;
        int maptype = MAP_PRIVATE;
@@ -537,9 +579,7 @@ int shared_context_init(int size, int shared)
 
 #ifndef USE_PRIVATE_CACHE
        if (maptype == MAP_SHARED) {
-#ifdef USE_SYSCALL_FUTEX
-               shctx->waiters = 0;
-#else
+#ifdef USE_PTHREAD_PSHARED
                if (pthread_mutexattr_init(&attr)) {
                        munmap(shctx, sizeof(struct 
shared_context)+(size*sizeof(struct shared_block)));
                        shctx = NULL;
@@ -559,6 +599,8 @@ int shared_context_init(int size, int shared)
                        shctx = NULL;
                        return SHCTX_E_INIT_LOCK;
                }
+#else
+               shctx->waiters = 0;
 #endif
                use_shared_mem = 1;
        }
-- 
1.7.12.1

Reply via email to