Hi all,

the patch about SO_REUSEPORT proposed by Yingqi Lu in [1] and discussed in
[2] uses listeners buckets to address a defect [3] in the current linux
implementation (his patch goes beyond SO_REUSEPORT though, and suggests a
new MPM even when the option is not available).

Should this defect be addressed by linux folks, the event/worker/prefork
MPMs could take full advantage of the option (linux-3.9+) with quite simple
modifications of the current code.
I'm proposing here the corresponding patch.

The idea is to re-create and re-bind/listen the parent's listeners sockets
for each child process, when starting, before dropping priviledges.

For this, the patch introduces a new ap_reopen_listeners() function (meant
to be called by each child) to do the job on the inherited listeners. It
does nothing unless HAVE_SO_REUSEPORT is defined.

The advantage of this approach is that no accept mutex is needed anymore
(each child has its own sockets), hence the SAFE_ACCEPT macros can do
nothing when HAVE_SO_REUSEPORT is defined.
The new (incoming) connections are evenly distributed accross the children
for all the listners (as assured by Linux when SO_REUSEPORT is used).

I'm proposing the patch here so that everyone can figure out whether
SO_REUSEPORT per se needs its own MPM or not (once/if the defect is fixed).
The option seems to be quite easily pluggable to existing MPMs (no ABI
change), and I don't see an advantage to not using it when available (and
working).

Also, FreeBSD has an implementation of SO_REUSEPORT,
however 
I couldn't find whether it has the same scheduling garantee
or not 
(at least I guess the accept mutex can be avoided too).

Regarding the linux kernel defect, is someone aware of a fix/work on that
in the latest versions?

Finally, about the accept mutex, mpm event seems to work well without any,
why prefork and worker would need one (both poll() all the listeners in a
single thread, while other children can do the same)?

The patch follows and is attached.
It can already be tested with a workaround against the defect: don't let
perform_idle_server_maintenance() create/stop children after startup (eg.
StartServers, ServerLimit, Min/MaxSpareServers using the same value).

Thoughts, feedbacks welcome.

Regards,
Yann.

[1] https://issues.apache.org/bugzilla/show_bug.cgi?id=55897#c7
[2]
http://mail-archives.apache.org/mod_mbox/httpd-bugs/201312.mbox/%3cbug-55897-7...@https.issues.apache.org/bugzilla/%3E
[3] http://lwn.net/Articles/542629/ and http://lwn.net/Articles/542738/

Index: server/mpm/event/event.c
===================================================================
--- server/mpm/event/event.c    (revision 1575322)
+++ server/mpm/event/event.c    (working copy)
@@ -2356,6 +2356,13 @@ static void child_main(int child_num_arg)
     /*stuff to do before we switch id's, so we have permissions. */
     ap_reopen_scoreboard(pchild, NULL, 0);

+    rv = ap_reopen_listeners(pchild, num_listensocks);
+    if (rv != APR_SUCCESS) {
+        ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf, APLOGNO()
+                     "Couldn't re-open listeners");
+        clean_child_exit(APEXIT_CHILDFATAL);
+    }
+
     if (ap_run_drop_privileges(pchild, ap_server_conf)) {
         clean_child_exit(APEXIT_CHILDFATAL);
     }
Index: server/mpm/prefork/prefork.c
===================================================================
--- server/mpm/prefork/prefork.c    (revision 1575322)
+++ server/mpm/prefork/prefork.c    (working copy)
@@ -271,7 +271,9 @@ static void accept_mutex_off(void)
  * multiple Listen statements.  Define SINGLE_LISTEN_UNSERIALIZED_ACCEPT
  * when it's safe in the single Listen case.
  */
-#ifdef SINGLE_LISTEN_UNSERIALIZED_ACCEPT
+#if HAVE_SO_REUSEPORT
+#define SAFE_ACCEPT(stmt)
+#elif defined(SINGLE_LISTEN_UNSERIALIZED_ACCEPT)
 #define SAFE_ACCEPT(stmt) do {if (ap_listeners->next) {stmt;}} while(0)
 #else
 #define SAFE_ACCEPT(stmt) do {stmt;} while(0)
@@ -536,6 +538,13 @@ static void child_main(int child_num_arg)
         clean_child_exit(APEXIT_CHILDFATAL);
     }

+    status = ap_reopen_listeners(pchild, num_listensocks);
+    if (status != APR_SUCCESS) {
+        ap_log_error(APLOG_MARK, APLOG_EMERG, status, ap_server_conf,
APLOGNO()
+                     "Couldn't re-open listeners");
+        clean_child_exit(APEXIT_CHILDFATAL);
+    }
+
     if (ap_run_drop_privileges(pchild, ap_server_conf)) {
         clean_child_exit(APEXIT_CHILDFATAL);
     }
Index: server/mpm/worker/worker.c
===================================================================
--- server/mpm/worker/worker.c    (revision 1575322)
+++ server/mpm/worker/worker.c    (working copy)
@@ -220,7 +220,9 @@ static apr_os_thread_t *listener_os_thread;
 /* Locks for accept serialization */
 static apr_proc_mutex_t *accept_mutex;

-#ifdef SINGLE_LISTEN_UNSERIALIZED_ACCEPT
+#if HAVE_SO_REUSEPORT
+#define SAFE_ACCEPT(stmt)
+#elif defined(SINGLE_LISTEN_UNSERIALIZED_ACCEPT)
 #define SAFE_ACCEPT(stmt) (ap_listeners->next ? (stmt) : APR_SUCCESS)
 #else
 #define SAFE_ACCEPT(stmt) (stmt)
@@ -1228,6 +1230,13 @@ static void child_main(int child_num_arg)
     /*stuff to do before we switch id's, so we have permissions.*/
     ap_reopen_scoreboard(pchild, NULL, 0);

+    rv = ap_reopen_listeners(pchild, num_listensocks);
+    if (rv != APR_SUCCESS) {
+        ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf, APLOGNO()
+                     "Couldn't re-open listeners");
+        clean_child_exit(APEXIT_CHILDFATAL);
+    }
+
     rv = SAFE_ACCEPT(apr_proc_mutex_child_init(&accept_mutex,

apr_proc_mutex_lockfile(accept_mutex),
                                                pchild));
Index: server/listen.c
===================================================================
--- server/listen.c    (revision 1575322)
+++ server/listen.c    (working copy)
@@ -60,6 +60,9 @@ static apr_status_t make_sock(apr_pool_t *p, ap_li
 #endif
     apr_status_t stat;

+    /* until further notice */
+    server->sd = NULL;
+
 #ifndef WIN32
     stat = apr_socket_opt_set(s, APR_SO_REUSEADDR, one);
     if (stat != APR_SUCCESS && stat != APR_ENOTIMPL) {
@@ -125,6 +128,23 @@ static apr_status_t make_sock(apr_pool_t *p, ap_li
 #endif

     if (do_bind_listen) {
+#if HAVE_SO_REUSEPORT
+        {
+            int yes = 1;
+            apr_os_sock_t sd = -1;
+            apr_os_sock_get(&sd, s);
+            if (setsockopt(sd, SOL_SOCKET, SO_REUSEPORT,
+                           (void *)&yes, sizeof(int)) == -1) {
+                stat = errno;
+                ap_log_perror(APLOG_MARK, APLOG_CRIT, stat, p, APLOGNO()
+                              "make_sock: for address %pI, setsockopt: "
+                              "(SO_REUSEPORT)", server->bind_addr);
+                apr_socket_close(s);
+                return stat;
+            }
+        }
+#endif
+
 #if APR_HAVE_IPV6
         if (server->bind_addr->family == APR_INET6) {
             stat = apr_socket_opt_set(s, APR_IPV6_V6ONLY, v6only_setting);
@@ -727,6 +747,34 @@ AP_DECLARE(int) ap_setup_listeners(server_rec *s)
     return num_listeners;
 }

+AP_DECLARE(apr_status_t) ap_reopen_listeners(apr_pool_t *p, int num)
+{
+#if HAVE_SO_REUSEPORT
+    ap_listen_rec *lr;
+
+    for (lr = ap_listeners; lr && num; lr = lr->next, --num) {
+        apr_socket_t *sd = lr->sd, *s;
+
+        rv = apr_socket_create(&s, lr->bind_addr->family, SOCK_STREAM, 0,
p);
+        if (rv != APR_SUCCESS) {
+            return rv;
+        }
+        lr->sd = s;
+
+        rv = make_sock(p, lr, 1);
+        if (rv != APR_SUCCESS) {
+            /* make_sock() has closed lr->sd,
+             * restore the original socket */
+            lr->sd = sd;
+            return rv;
+        }
+        /* not needed anymore */
+        apr_socket_close(sd);
+    }
+#endif
+    return APR_SUCCESS;
+}
+
 AP_DECLARE_NONSTD(void) ap_close_listeners(void)
 {
     ap_listen_rec *lr;
Index: include/ap_listen.h
===================================================================
--- include/ap_listen.h    (revision 1575322)
+++ include/ap_listen.h    (working copy)
@@ -92,6 +92,15 @@ AP_DECLARE(void) ap_listen_pre_config(void);
 AP_DECLARE(int) ap_setup_listeners(server_rec *s);

 /**
+ * Loop through the global ap_listen_rec list and reopen up to @a num
sockets
+ * when applicable (eg. HAVE_SO_REUSEPORT to reuse ports).
+ * @param p The pool reopen the sockets on
+ * @param num The number of sockets to reopen (all if negative)
+ * @return APR_SUCCESS or an APR error
+ */
+AP_DECLARE(apr_status_t) ap_reopen_listeners(apr_pool_t *p, int num);
+
+/**
  * Loop through the global ap_listen_rec list and close each of the
sockets.
  */
 AP_DECLARE_NONSTD(void) ap_close_listeners(void);
[EOS]
Index: server/mpm/event/event.c
===================================================================
--- server/mpm/event/event.c	(revision 1575322)
+++ server/mpm/event/event.c	(working copy)
@@ -2356,6 +2356,13 @@ static void child_main(int child_num_arg)
     /*stuff to do before we switch id's, so we have permissions. */
     ap_reopen_scoreboard(pchild, NULL, 0);
 
+    rv = ap_reopen_listeners(pchild, num_listensocks);
+    if (rv != APR_SUCCESS) {
+        ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf, APLOGNO()
+                     "Couldn't re-open listeners");
+        clean_child_exit(APEXIT_CHILDFATAL);
+    }
+
     if (ap_run_drop_privileges(pchild, ap_server_conf)) {
         clean_child_exit(APEXIT_CHILDFATAL);
     }
Index: server/mpm/prefork/prefork.c
===================================================================
--- server/mpm/prefork/prefork.c	(revision 1575322)
+++ server/mpm/prefork/prefork.c	(working copy)
@@ -271,7 +271,9 @@ static void accept_mutex_off(void)
  * multiple Listen statements.  Define SINGLE_LISTEN_UNSERIALIZED_ACCEPT
  * when it's safe in the single Listen case.
  */
-#ifdef SINGLE_LISTEN_UNSERIALIZED_ACCEPT
+#if HAVE_SO_REUSEPORT
+#define SAFE_ACCEPT(stmt)
+#elif defined(SINGLE_LISTEN_UNSERIALIZED_ACCEPT)
 #define SAFE_ACCEPT(stmt) do {if (ap_listeners->next) {stmt;}} while(0)
 #else
 #define SAFE_ACCEPT(stmt) do {stmt;} while(0)
@@ -536,6 +538,13 @@ static void child_main(int child_num_arg)
         clean_child_exit(APEXIT_CHILDFATAL);
     }
 
+    status = ap_reopen_listeners(pchild, num_listensocks);
+    if (status != APR_SUCCESS) {
+        ap_log_error(APLOG_MARK, APLOG_EMERG, status, ap_server_conf, APLOGNO()
+                     "Couldn't re-open listeners");
+        clean_child_exit(APEXIT_CHILDFATAL);
+    }
+
     if (ap_run_drop_privileges(pchild, ap_server_conf)) {
         clean_child_exit(APEXIT_CHILDFATAL);
     }
Index: server/mpm/worker/worker.c
===================================================================
--- server/mpm/worker/worker.c	(revision 1575322)
+++ server/mpm/worker/worker.c	(working copy)
@@ -220,7 +220,9 @@ static apr_os_thread_t *listener_os_thread;
 /* Locks for accept serialization */
 static apr_proc_mutex_t *accept_mutex;
 
-#ifdef SINGLE_LISTEN_UNSERIALIZED_ACCEPT
+#if HAVE_SO_REUSEPORT
+#define SAFE_ACCEPT(stmt)
+#elif defined(SINGLE_LISTEN_UNSERIALIZED_ACCEPT)
 #define SAFE_ACCEPT(stmt) (ap_listeners->next ? (stmt) : APR_SUCCESS)
 #else
 #define SAFE_ACCEPT(stmt) (stmt)
@@ -1228,6 +1230,13 @@ static void child_main(int child_num_arg)
     /*stuff to do before we switch id's, so we have permissions.*/
     ap_reopen_scoreboard(pchild, NULL, 0);
 
+    rv = ap_reopen_listeners(pchild, num_listensocks);
+    if (rv != APR_SUCCESS) {
+        ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf, APLOGNO()
+                     "Couldn't re-open listeners");
+        clean_child_exit(APEXIT_CHILDFATAL);
+    }
+
     rv = SAFE_ACCEPT(apr_proc_mutex_child_init(&accept_mutex,
                                                apr_proc_mutex_lockfile(accept_mutex),
                                                pchild));
Index: server/listen.c
===================================================================
--- server/listen.c	(revision 1575322)
+++ server/listen.c	(working copy)
@@ -60,6 +60,9 @@ static apr_status_t make_sock(apr_pool_t *p, ap_li
 #endif
     apr_status_t stat;
 
+    /* until further notice */
+    server->sd = NULL;
+
 #ifndef WIN32
     stat = apr_socket_opt_set(s, APR_SO_REUSEADDR, one);
     if (stat != APR_SUCCESS && stat != APR_ENOTIMPL) {
@@ -125,6 +128,23 @@ static apr_status_t make_sock(apr_pool_t *p, ap_li
 #endif
 
     if (do_bind_listen) {
+#if HAVE_SO_REUSEPORT
+        {
+            int yes = 1;
+            apr_os_sock_t sd = -1;
+            apr_os_sock_get(&sd, s);
+            if (setsockopt(sd, SOL_SOCKET, SO_REUSEPORT,
+                           (void *)&yes, sizeof(int)) == -1) {
+                stat = errno;
+                ap_log_perror(APLOG_MARK, APLOG_CRIT, stat, p, APLOGNO()
+                              "make_sock: for address %pI, setsockopt: "
+                              "(SO_REUSEPORT)", server->bind_addr);
+                apr_socket_close(s);
+                return stat;
+            }
+        }
+#endif
+
 #if APR_HAVE_IPV6
         if (server->bind_addr->family == APR_INET6) {
             stat = apr_socket_opt_set(s, APR_IPV6_V6ONLY, v6only_setting);
@@ -727,6 +747,34 @@ AP_DECLARE(int) ap_setup_listeners(server_rec *s)
     return num_listeners;
 }
 
+AP_DECLARE(apr_status_t) ap_reopen_listeners(apr_pool_t *p, int num)
+{
+#if HAVE_SO_REUSEPORT
+    ap_listen_rec *lr;
+
+    for (lr = ap_listeners; lr && num; lr = lr->next, --num) {
+        apr_socket_t *sd = lr->sd, *s;
+
+        rv = apr_socket_create(&s, lr->bind_addr->family, SOCK_STREAM, 0, p);
+        if (rv != APR_SUCCESS) {
+            return rv;
+        }
+        lr->sd = s;
+
+        rv = make_sock(p, lr, 1);
+        if (rv != APR_SUCCESS) {
+            /* make_sock() has closed lr->sd,
+             * restore the original socket */
+            lr->sd = sd;
+            return rv;
+        }
+        /* not needed anymore */
+        apr_socket_close(sd);
+    }
+#endif
+    return APR_SUCCESS;
+}
+
 AP_DECLARE_NONSTD(void) ap_close_listeners(void)
 {
     ap_listen_rec *lr;
Index: include/ap_listen.h
===================================================================
--- include/ap_listen.h	(revision 1575322)
+++ include/ap_listen.h	(working copy)
@@ -92,6 +92,15 @@ AP_DECLARE(void) ap_listen_pre_config(void);
 AP_DECLARE(int) ap_setup_listeners(server_rec *s);
 
 /**
+ * Loop through the global ap_listen_rec list and reopen up to @a num sockets
+ * when applicable (eg. HAVE_SO_REUSEPORT to reuse ports).
+ * @param p The pool reopen the sockets on
+ * @param num The number of sockets to reopen (all if negative)
+ * @return APR_SUCCESS or an APR error
+ */
+AP_DECLARE(apr_status_t) ap_reopen_listeners(apr_pool_t *p, int num);
+
+/**
  * Loop through the global ap_listen_rec list and close each of the sockets.
  */
 AP_DECLARE_NONSTD(void) ap_close_listeners(void);

Reply via email to