Late(r) stop of children processes on restart

Yann Ylavic Mon, 28 Jun 2021 07:25:37 -0700

When the MPM event/worker is restarting, it first signals the
children's processes to stop (via POD), then reload the configuration,
and finally start the new generation.


This may be problematic when the reload takes some time to complete
because incoming connections are no longer processed.
A module at day $job is loading quite some regexes and JSON schemas
for each vhost, and I have seen restarts take tens of seconds to
complete with a large number of vhosts. I suppose this can happen with
many RewriteRules too.

How about we wait for the reload to complete before stopping the old
generation, like in the attached patch (MPM event only for now,
changes in worker would be quite similar)?

This is achieved by creating the PODs and listeners buckets from a
generation pool (gen_pool), with a different lifetime than pconf.
gen_pool survives restarts and is created/cleared after the old
generation is stopped, entirely in the run_mpm hook, so the stop and
PODs and buckets handling is moved there (most changes are cut/paste).

WDYT?

Regards;
Yann.

Index: server/listen.c
===================================================================
--- server/listen.c	(revision 1890695)
+++ server/listen.c	(working copy)
@@ -853,7 +853,7 @@ AP_DECLARE(apr_status_t) ap_duplicate_listeners(ap
                 if (val > 1) {
                     *num_buckets = val;
                 }
-                ap_log_perror(APLOG_MARK, APLOG_INFO, 0, p, APLOGNO(02819)
+                ap_log_error(APLOG_MARK, APLOG_INFO, 0, s, APLOGNO(02819)
                               "Using %i listeners bucket(s) based on %i "
                               "online CPU cores and a ratio of %i",
                               *num_buckets, num_online_cores,
@@ -862,7 +862,7 @@ AP_DECLARE(apr_status_t) ap_duplicate_listeners(ap
             else
 #endif
             if (!warn_once) {
-                ap_log_perror(APLOG_MARK, APLOG_WARNING, 0, p, APLOGNO(02820)
+                ap_log_error(APLOG_MARK, APLOG_INFO, 0, s, APLOGNO(02820)
                               "ListenCoresBucketsRatio ignored without "
                               "SO_REUSEPORT and _SC_NPROCESSORS_ONLN "
                               "support: using a single listeners bucket");
Index: server/mpm/event/event.c
===================================================================
--- server/mpm/event/event.c	(revision 1890695)
+++ server/mpm/event/event.c	(working copy)
@@ -390,6 +390,11 @@ typedef struct socket_callback_baton
     unsigned int signaled :1;
 } socket_callback_baton_t;
 
+typedef struct event_child_bucket {
+    ap_pod_t *pod;
+    ap_listen_rec *listeners;
+} event_child_bucket;
+
 /* data retained by event across load/unload of the module
  * allocated on first call to pre-config hook; located on
  * subsequent calls to pre-config hook
@@ -397,6 +402,9 @@ typedef struct socket_callback_baton
 typedef struct event_retained_data {
     ap_unixd_mpm_retained_data *mpm;
 
+    apr_pool_t *gen_pool; /* generation pool (start->stop children lifetime) */
+    event_child_bucket *buckets; /* children buckets (reset per generation) */
+
     int first_server_limit;
     int first_thread_limit;
     int sick_child_detected;
@@ -431,12 +439,7 @@ typedef struct event_retained_data {
 } event_retained_data;
 static event_retained_data *retained;
 
-typedef struct event_child_bucket {
-    ap_pod_t *pod;
-    ap_listen_rec *listeners;
-} event_child_bucket;
-static event_child_bucket *all_buckets, /* All listeners buckets */
-                          *my_bucket;   /* Current child bucket */
+static event_child_bucket *my_bucket;  /* Current child bucket */
 
 struct event_srv_cfg_s {
     struct timeout_queue *wc_q,
@@ -2774,8 +2777,8 @@ static void child_main(int child_num_arg, int chil
     /* close unused listeners and pods */
     for (i = 0; i < retained->mpm->num_buckets; i++) {
         if (i != child_bucket) {
-            ap_close_listeners_ex(all_buckets[i].listeners);
-            ap_mpm_podx_close(all_buckets[i].pod);
+            ap_close_listeners_ex(retained->buckets[i].listeners);
+            ap_mpm_podx_close(retained->buckets[i].pod);
         }
     }
 
@@ -2800,6 +2803,9 @@ static void child_main(int child_num_arg, int chil
         clean_child_exit(APEXIT_CHILDFATAL);
     }
 
+    /* For rand() users (e.g. skiplist). */
+    srand((unsigned int)apr_time_now());
+
     ap_run_child_init(pchild, ap_server_conf);
 
     if (ap_max_requests_per_child) {
@@ -2944,7 +2950,7 @@ static int make_child(server_rec * s, int slot, in
     }
 
     if (one_process) {
-        my_bucket = &all_buckets[0];
+        my_bucket = &retained->buckets[0];
 
         event_note_child_started(slot, getpid());
         child_main(slot, 0);
@@ -2973,7 +2979,7 @@ static int make_child(server_rec * s, int slot, in
     }
 
     if (!pid) {
-        my_bucket = &all_buckets[bucket];
+        my_bucket = &retained->buckets[bucket];
 
 #ifdef HAVE_BINDPROCESSOR
         /* By default, AIX binds to a single processor.  This bit unbinds
@@ -3123,7 +3129,7 @@ static void perform_idle_server_maintenance(int ch
         if (retained->total_daemons <= active_daemons_limit &&
             retained->total_daemons < server_limit) {
             /* Kill off one child */
-            ap_mpm_podx_signal(all_buckets[child_bucket].pod,
+            ap_mpm_podx_signal(retained->buckets[child_bucket].pod,
                                AP_MPM_PODX_GRACEFUL);
             retained->idle_spawn_rate[child_bucket] = 1;
             active_daemons--;
@@ -3320,8 +3326,10 @@ static void server_main_loop(int remaining_childre
 
 static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
 {
+    ap_listen_rec **dup_buckets = NULL;
     int num_buckets = retained->mpm->num_buckets;
     int remaining_children_to_start;
+    apr_status_t rv;
     int i;
 
     ap_log_pid(pconf, ap_pid_fname);
@@ -3339,6 +3347,107 @@ static int event_run(apr_pool_t * _pconf, apr_pool
 
     ap_unixd_mpm_set_signals(pconf, one_process);
 
+    /* On the first startup create gen_pool which satisfies the lifetime of
+     * the parent's PODs and listeners, on restart stop the children from the
+     * previous generation and clear gen_pool for the next one.
+     */
+    if (!retained->gen_pool) {
+        apr_pool_create(&retained->gen_pool, ap_pglobal);
+    }
+    else {
+        if (retained->mpm->was_graceful) {
+            /* wake up the children...time to die.  But we'll have more soon */
+            for (i = 0; i < num_buckets; i++) {
+                ap_mpm_podx_killpg(retained->buckets[i].pod,
+                                   active_daemons_limit, AP_MPM_PODX_GRACEFUL);
+            }
+        }
+        else {
+            /* Kill 'em all.  Since the child acts the same on the parents SIGTERM
+             * and a SIGHUP, we may as well use the same signal, because some user
+             * pthreads are stealing signals from us left and right.
+             */
+            for (i = 0; i < num_buckets; i++) {
+                ap_mpm_podx_killpg(retained->buckets[i].pod,
+                                   active_daemons_limit, AP_MPM_PODX_RESTART);
+            }
+            ap_reclaim_child_processes(1,  /* Start with SIGTERM */
+                                       event_note_child_killed);
+        }
+        apr_pool_clear(retained->gen_pool);
+        retained->buckets = NULL;
+
+        /* advance to the next generation */
+        /* XXX: we really need to make sure this new generation number isn't in
+         * use by any of the previous children.
+         */
+        ++retained->mpm->my_generation;
+        ap_scoreboard_image->global->running_generation = retained->mpm->my_generation;
+    }
+
+    /* Preserve the number of buckets on graceful restarts, otherwise set
+     * num_buckets to zero and let ap_duplicate_listeners() determine that.
+     */
+    if (!retained->mpm->was_graceful) {
+        num_buckets = (one_process) ? 1 : 0; /* one_process <= one bucket */
+    }
+    if ((rv = ap_duplicate_listeners(retained->gen_pool, ap_server_conf,
+                                     &dup_buckets, &num_buckets))) {
+        ap_log_error(APLOG_MARK, APLOG_CRIT, rv,
+                     ap_server_conf, APLOGNO(03273)
+                     "could not duplicate listeners");
+        return !OK;
+    }
+
+    retained->buckets = apr_palloc(retained->gen_pool,
+                                   num_buckets * sizeof(event_child_bucket));
+    for (i = 0; i < num_buckets; i++) {
+        if (one_process) {
+            /* no POD in one_process mode */
+            retained->buckets[i].pod = NULL;
+        }
+        else if ((rv = ap_mpm_podx_open(retained->gen_pool,
+                                        &retained->buckets[i].pod))) {
+            ap_log_error(APLOG_MARK, APLOG_CRIT, rv,
+                         ap_server_conf, APLOGNO(03274)
+                         "could not open pipe-of-death");
+            return !OK;
+        }
+        retained->buckets[i].listeners = dup_buckets[i];
+    }
+
+    if (retained->mpm->max_buckets < num_buckets) {
+        int new_max, *new_ptr;
+        new_max = retained->mpm->max_buckets * 2;
+        if (new_max < num_buckets) {
+            new_max = num_buckets;
+        }
+        new_ptr = (int *)apr_palloc(ap_pglobal, new_max * sizeof(int));
+        if (retained->idle_spawn_rate) /* NULL at startup */
+            memcpy(new_ptr, retained->idle_spawn_rate,
+                   retained->mpm->num_buckets * sizeof(int));
+        retained->idle_spawn_rate = new_ptr;
+        retained->mpm->max_buckets = new_max;
+    }
+    if (retained->mpm->num_buckets < num_buckets) {
+        int rate_max = 1;
+        /* If new buckets are added, set their idle spawn rate to
+         * the highest so far, so that they get filled as quickly
+         * as the existing ones.
+         */
+        for (i = 0; i < retained->mpm->num_buckets; i++) {
+            if (rate_max < retained->idle_spawn_rate[i]) {
+                rate_max = retained->idle_spawn_rate[i];
+            }
+        }
+        for (/* up to date i */; i < num_buckets; i++) {
+            retained->idle_spawn_rate[i] = rate_max;
+        }
+    }
+    retained->mpm->num_buckets = num_buckets;
+
+    active_daemons = 0;
+
     /* Don't thrash since num_buckets depends on the
      * system and the number of online CPU cores...
      */
@@ -3398,8 +3507,8 @@ static int event_run(apr_pool_t * _pconf, apr_pool
          * Kill child processes, tell them to call child_exit, etc...
          */
         for (i = 0; i < num_buckets; i++) {
-            ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
-                               AP_MPM_PODX_RESTART);
+            ap_mpm_podx_killpg(retained->buckets[i].pod,
+                               active_daemons_limit, AP_MPM_PODX_RESTART);
         }
         ap_reclaim_child_processes(1, /* Start with SIGTERM */
                                    event_note_child_killed);
@@ -3425,8 +3534,8 @@ static int event_run(apr_pool_t * _pconf, apr_pool
         /* Close our listeners, and then ask our children to do same */
         ap_close_listeners();
         for (i = 0; i < num_buckets; i++) {
-            ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
-                               AP_MPM_PODX_GRACEFUL);
+            ap_mpm_podx_killpg(retained->buckets[i].pod,
+                               active_daemons_limit, AP_MPM_PODX_GRACEFUL);
         }
         ap_relieve_child_processes(event_note_child_killed);
 
@@ -3468,8 +3577,8 @@ static int event_run(apr_pool_t * _pconf, apr_pool
          * really dead.
          */
         for (i = 0; i < num_buckets; i++) {
-            ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
-                               AP_MPM_PODX_RESTART);
+            ap_mpm_podx_killpg(retained->buckets[i].pod,
+                               active_daemons_limit, AP_MPM_PODX_RESTART);
         }
         ap_reclaim_child_processes(1, event_note_child_killed);
 
@@ -3482,46 +3591,15 @@ static int event_run(apr_pool_t * _pconf, apr_pool
         return DONE;
     }
 
-    /* advance to the next generation */
-    /* XXX: we really need to make sure this new generation number isn't in
-     * use by any of the children.
-     */
-    ++retained->mpm->my_generation;
-    ap_scoreboard_image->global->running_generation = retained->mpm->my_generation;
-
     if (!retained->mpm->is_ungraceful) {
         ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00493)
-                     AP_SIG_GRACEFUL_STRING
-                     " received.  Doing graceful restart");
-        /* wake up the children...time to die.  But we'll have more soon */
-        for (i = 0; i < num_buckets; i++) {
-            ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
-                               AP_MPM_PODX_GRACEFUL);
-        }
-
-        /* This is mostly for debugging... so that we know what is still
-         * gracefully dealing with existing request.
-         */
-
+                     "%s received.  Doing graceful restart",
+                     AP_SIG_GRACEFUL_STRING);
     }
     else {
-        /* Kill 'em all.  Since the child acts the same on the parents SIGTERM
-         * and a SIGHUP, we may as well use the same signal, because some user
-         * pthreads are stealing signals from us left and right.
-         */
-        for (i = 0; i < num_buckets; i++) {
-            ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
-                               AP_MPM_PODX_RESTART);
-        }
-
-        ap_reclaim_child_processes(1,  /* Start with SIGTERM */
-                                   event_note_child_killed);
         ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00494)
                      "SIGHUP received.  Attempting to restart");
     }
-
-    active_daemons = 0;
-
     return OK;
 }
 
@@ -3582,10 +3660,6 @@ static int event_open_logs(apr_pool_t * p, apr_poo
 {
     int startup = 0;
     int level_flags = 0;
-    int num_buckets = 0;
-    ap_listen_rec **listen_buckets;
-    apr_status_t rv;
-    int i;
 
     pconf = p;
 
@@ -3602,65 +3676,6 @@ static int event_open_logs(apr_pool_t * p, apr_poo
         return !OK;
     }
 
-    if (one_process) {
-        num_buckets = 1;
-    }
-    else if (retained->mpm->was_graceful) {
-        /* Preserve the number of buckets on graceful restarts. */
-        num_buckets = retained->mpm->num_buckets;
-    }
-    if ((rv = ap_duplicate_listeners(pconf, ap_server_conf,
-                                     &listen_buckets, &num_buckets))) {
-        ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
-                     (startup ? NULL : s), APLOGNO(03273)
-                     "could not duplicate listeners");
-        return !OK;
-    }
-
-    all_buckets = apr_pcalloc(pconf, num_buckets * sizeof(*all_buckets));
-    for (i = 0; i < num_buckets; i++) {
-        if (!one_process && /* no POD in one_process mode */
-                (rv = ap_mpm_podx_open(pconf, &all_buckets[i].pod))) {
-            ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
-                         (startup ? NULL : s), APLOGNO(03274)
-                         "could not open pipe-of-death");
-            return !OK;
-        }
-        all_buckets[i].listeners = listen_buckets[i];
-    }
-
-    if (retained->mpm->max_buckets < num_buckets) {
-        int new_max, *new_ptr;
-        new_max = retained->mpm->max_buckets * 2;
-        if (new_max < num_buckets) {
-            new_max = num_buckets;
-        }
-        new_ptr = (int *)apr_palloc(ap_pglobal, new_max * sizeof(int));
-        if (retained->idle_spawn_rate) /* NULL at startup */
-            memcpy(new_ptr, retained->idle_spawn_rate,
-                   retained->mpm->num_buckets * sizeof(int));
-        retained->idle_spawn_rate = new_ptr;
-        retained->mpm->max_buckets = new_max;
-    }
-    if (retained->mpm->num_buckets < num_buckets) {
-        int rate_max = 1;
-        /* If new buckets are added, set their idle spawn rate to
-         * the highest so far, so that they get filled as quickly
-         * as the existing ones.
-         */
-        for (i = 0; i < retained->mpm->num_buckets; i++) {
-            if (rate_max < retained->idle_spawn_rate[i]) {
-                rate_max = retained->idle_spawn_rate[i];
-            }
-        }
-        for (/* up to date i */; i < num_buckets; i++) {
-            retained->idle_spawn_rate[i] = rate_max;
-        }
-    }
-    retained->mpm->num_buckets = num_buckets;
-
-    /* for skiplist */
-    srand((unsigned int)apr_time_now());
     return OK;
 }

Late(r) stop of children processes on restart

Reply via email to