On Fri, Jun 30, 2017 at 1:33 PM, Yann Ylavic <ylavic....@gmail.com> wrote:
> On Fri, Jun 30, 2017 at 1:20 PM, Ruediger Pluem <rpl...@apache.org> wrote:
>>
>> On 06/30/2017 12:18 PM, Yann Ylavic wrote:
>>>
>>> IMHO mod_ssl shoudn't (BIO_)flush unconditionally in
>>> modssl_smart_shutdown(), only in the "abortive" mode of
>>> ssl_filter_io_shutdown().
>>
>> I think the issue starts before that.
>> ap_prep_lingering_close calls the pre_close_connection hook and modules that 
>> are registered
>> to this hook can perform all sort of long lasting blocking operations there.
>> While it can be argued that this would be a bug in the module I think the 
>> only safe way
>> is to have the whole start_lingering_close_nonblocking being executed by a 
>> worker thread.
>
> Correct, that'd be much simpler/safer indeed.
> We need a new SHUTDOWN state then, right?

Actually it was less simple than expected, and it has some caveats obviously...

The attached patch does not introduce a new state but reuses the
existing CONN_STATE_LINGER since it was not really considered by the
listener thread (which uses CONN_STATE_LINGER_NORMAL and
CONN_STATE_LINGER_SHORT instead), but that's a detail.

Mainly, start_lingering_close_nonblocking() now simply schedules a
shutdown (i.e. pre_close_connection() followed by immediate close)
that will we be run by a worker thread.
A new shutdown_linger_q is created/handled (with the same timeout as
the short_linger_q, namely 2 seconds) to hold connections to be
shutdown.

So now when a connection times out in the write_completion or
keepalive queues, it needs (i.e. the listener may wait for) an
available worker to process its shutdown/close.
This means we can *not* close kept alive connections immediatly like
before when becoming short of workers, which will favor active KA
connections over new ones in this case (I don't think it's that
serious but the previous was taking care of that. For me it's up to
the admin to size the workers appropriately...).

Same when a connection in the shutdown_linger_q itself times out, the
patch will require a worker immediatly to do the job (see
shutdown_lingering_close() callback).

So overall, this patch may introduce the need for more workers than
before, what was (wrongly) done by the listener thread has to be done
somewhere anyway...

Finally, I think there is room for improvements like batching
shutdowns in the same worker if there is no objection on the approach
so far.

WDYT?

Regards,
Yann.
Index: server/mpm/event/event.c
===================================================================
--- server/mpm/event/event.c	(revision 1801974)
+++ server/mpm/event/event.c	(working copy)
@@ -243,6 +243,8 @@ struct event_conn_state_t {
     apr_pollfd_t pfd;
     /** public parts of the connection state */
     conn_state_t pub;
+    /** shutdown quickly in LINGER state? */
+    int shutdown_linger;
 };
 APR_RING_HEAD(timeout_head_t, event_conn_state_t);
 
@@ -260,11 +262,13 @@ struct timeout_queue {
  *   keepalive_q        uses vhost's KeepAliveTimeOut
  *   linger_q           uses MAX_SECS_TO_LINGER
  *   short_linger_q     uses SECONDS_TO_LINGER
+ *   shutdown_linger_q  uses SECONDS_TO_LINGER
  */
 static struct timeout_queue *write_completion_q,
                             *keepalive_q,
                             *linger_q,
-                            *short_linger_q;
+                            *short_linger_q,
+                            *shutdown_linger_q;
 static volatile apr_time_t  queues_next_expiry;
 
 /* Prevent extra poll/wakeup calls for timeouts close in the future (queues
@@ -699,6 +703,7 @@ static apr_status_t decrement_connection_count(voi
 {
     event_conn_state_t *cs = cs_;
     switch (cs->pub.state) {
+        case CONN_STATE_LINGER:
         case CONN_STATE_LINGER_NORMAL:
         case CONN_STATE_LINGER_SHORT:
             apr_atomic_dec32(&lingering_count);
@@ -731,11 +736,16 @@ static void notify_resume(event_conn_state_t *cs,
     ap_run_resume_connection(cs->c, cs->r);
 }
 
-static int start_lingering_close_common(event_conn_state_t *cs, int in_worker)
+static int have_idle_worker = 0;
+static void push_cs2worker(event_conn_state_t *cs,
+                           int *all_busy);
+
+static int start_lingering_close_common(event_conn_state_t *cs,
+                                        struct timeout_queue *q,
+                                        int in_worker)
 {
     apr_status_t rv;
-    struct timeout_queue *q;
-    apr_socket_t *csd = cs->pfd.desc.s;
+    apr_socket_t *csd = ap_get_conn_socket(cs->c);
 #ifdef AP_DEBUG
     {
         rv = apr_socket_timeout_set(csd, 0);
@@ -745,19 +755,6 @@ static void notify_resume(event_conn_state_t *cs,
     apr_socket_timeout_set(csd, 0);
 #endif
     cs->queue_timestamp = apr_time_now();
-    /*
-     * If some module requested a shortened waiting period, only wait for
-     * 2s (SECONDS_TO_LINGER). This is useful for mitigating certain
-     * DoS attacks.
-     */
-    if (apr_table_get(cs->c->notes, "short-lingering-close")) {
-        q = short_linger_q;
-        cs->pub.state = CONN_STATE_LINGER_SHORT;
-    }
-    else {
-        q = linger_q;
-        cs->pub.state = CONN_STATE_LINGER_NORMAL;
-    }
     apr_atomic_inc32(&lingering_count);
     if (in_worker) { 
         notify_suspend(cs);
@@ -796,40 +793,56 @@ static void notify_resume(event_conn_state_t *cs,
  */
 static int start_lingering_close_blocking(event_conn_state_t *cs)
 {
-    if (ap_start_lingering_close(cs->c)) {
+    struct timeout_queue *q;
+    int closed = ap_start_lingering_close(cs->c);
+    if (closed || cs->shutdown_linger) {
         notify_suspend(cs);
+        if (!closed) {
+            apr_socket_t *csd = ap_get_conn_socket(cs->c);
+            apr_socket_close(csd);
+        }
         ap_push_pool(worker_queue_info, cs->p);
         return 0;
     }
-    return start_lingering_close_common(cs, 1);
+    /*
+     * If some module requested a shortened waiting period, only wait for
+     * 2s (SECONDS_TO_LINGER). This is useful for mitigating certain
+     * DoS attacks.
+     */
+    if (apr_table_get(cs->c->notes, "short-lingering-close")) {
+        q = short_linger_q;
+        cs->pub.state = CONN_STATE_LINGER_SHORT;
+    }
+    else {
+        q = linger_q;
+        cs->pub.state = CONN_STATE_LINGER_NORMAL;
+    }
+    return start_lingering_close_common(cs, q, 1);
 }
 
 /*
- * Close our side of the connection, NOT flushing data to the client.
- * This should only be called if there has been an error or if we know
- * that our send buffers are empty.
+ * Defer to a worker the flush and close of our side of the connection.
  * Pre-condition: cs is not in any timeout queue and not in the pollset,
  *                timeout_mutex is not locked
  * return: 0 if connection is fully closed,
  *         1 if connection is lingering
- * may be called by listener thread
+ * May be called by listener thread
+ *
+ * The caller wants the nonblocking garantee and (still) to shutdown the
+ * connection (i.e. run the pre_close hook, send a TLS close notify alert
+ * possibly, the FIN packet, ...), quite antithetical hence the defer.
  */
 static int start_lingering_close_nonblocking(event_conn_state_t *cs)
 {
-    conn_rec *c = cs->c;
-    apr_socket_t *csd = cs->pfd.desc.s;
-
-    if (ap_prep_lingering_close(c)
-        || c->aborted
-        || ap_shutdown_conn(c, 0) != APR_SUCCESS || c->aborted
-        || apr_socket_shutdown(csd, APR_SHUTDOWN_WRITE) != APR_SUCCESS) {
-        apr_socket_close(csd);
-        ap_push_pool(worker_queue_info, cs->p);
-        if (dying)
-            ap_queue_interrupt_one(worker_queue);
-        return 0;
-    }
-    return start_lingering_close_common(cs, 0);
+    /* Wait/poll/ask for writability on the socket and then let some worker
+     * do the job. It's likely called(-back) when a timeout occurs (keepalive,
+     * write completion) so it should take a limited time to complete in any
+     * case, currently SECONDS_TO_LINGER.
+     */
+    cs->shutdown_linger = 1;
+    cs->pub.state = CONN_STATE_LINGER;
+    cs->pub.sense = CONN_SENSE_WANT_WRITE;
+    return start_lingering_close_common(cs, shutdown_linger_q, 0);
 }
 
 /*
@@ -856,6 +869,19 @@ static int stop_lingering_close(event_conn_state_t
 }
 
 /*
+ * forcibly close a lingering connection after the shutdown period has
+ * expired
+ * Pre-condition: cs is not in any timeout queue and not in the pollset
+ * return: irrelevant (need same prototype as start_lingering_close)
+ */
+static int shutdown_lingering_close(event_conn_state_t *cs)
+{
+    int all_busy = 0;
+    push_cs2worker(cs, &all_busy);
+    return 0;
+}
+
+/*
  * This runs before any non-MPM cleanup code on the connection;
  * if the connection is currently suspended as far as modules
  * know, provide notification of resumption.
@@ -1281,6 +1307,8 @@ static apr_status_t init_pollset(apr_pool_t *p)
     return APR_SUCCESS;
 }
 
+static void get_worker(int blocking, int *all_busy);
+
 static apr_status_t push_timer2worker(timer_event_t* te)
 {
     return ap_queue_push_timer(worker_queue, te);
@@ -1287,16 +1315,14 @@ static apr_status_t push_timer2worker(timer_event_
 }
 
 /*
- * Pre-condition: pfd->cs is neither in pollset nor timeout queue
+ * Pre-condition: cs is neither in pollset nor timeout queue
  * this function may only be called by the listener
  */
-static apr_status_t push2worker(const apr_pollfd_t * pfd,
-                                apr_pollset_t * pollset)
+static void push_cs2worker(event_conn_state_t *cs,
+                           int *all_busy)
 {
-    listener_poll_type *pt = (listener_poll_type *) pfd->client_data;
-    event_conn_state_t *cs = (event_conn_state_t *) pt->baton;
     apr_status_t rc;
-
+    get_worker(1, all_busy);
     rc = ap_queue_push(worker_queue, cs->pfd.desc.s, cs, cs->p);
     if (rc != APR_SUCCESS) {
         /* trash the connection; we couldn't queue the connected
@@ -1304,28 +1330,33 @@ static apr_status_t push_timer2worker(timer_event_
          */
         apr_bucket_alloc_destroy(cs->bucket_alloc);
         apr_socket_close(cs->pfd.desc.s);
-        ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
-                     ap_server_conf, APLOGNO(00471) "push2worker: ap_queue_push failed");
+        ap_log_error(APLOG_MARK, APLOG_CRIT, rc, ap_server_conf, APLOGNO(00471)
+                     "push_cs2worker: ap_queue_push failed");
         ap_push_pool(worker_queue_info, cs->p);
+
+        /* XXX: shouldn't we call this?
+        signal_threads(ST_GRACEFUL);
+        */
     }
-
-    return rc;
+    else {
+        have_idle_worker = 0;
+    }
 }
 
 /* get_worker:
- *     If *have_idle_worker_p == 0, reserve a worker thread, and set
- *     *have_idle_worker_p = 1.
- *     If *have_idle_worker_p is already 1, will do nothing.
+ *     If have_idle_worker == 0, reserve a worker thread, and set
+ *     have_idle_worker = 1.
+ *     If have_idle_worker is already 1, will do nothing.
  *     If blocking == 1, block if all workers are currently busy.
  *     If no worker was available immediately, will set *all_busy to 1.
  *     XXX: If there are no workers, we should not block immediately but
  *     XXX: close all keep-alive connections first.
  */
-static void get_worker(int *have_idle_worker_p, int blocking, int *all_busy)
+static void get_worker(int blocking, int *all_busy)
 {
     apr_status_t rc;
 
-    if (*have_idle_worker_p) {
+    if (have_idle_worker) {
         /* already reserved a worker thread - must have hit a
          * transient error on a previous pass
          */
@@ -1338,7 +1369,7 @@ static apr_status_t push_timer2worker(timer_event_
         rc = ap_queue_info_try_get_idler(worker_queue_info);
 
     if (rc == APR_SUCCESS || APR_STATUS_IS_EOF(rc)) {
-        *have_idle_worker_p = 1;
+        have_idle_worker = 1;
     }
     else if (!blocking && rc == APR_EAGAIN) {
         *all_busy = 1;
@@ -1675,7 +1706,6 @@ static void * APR_THREAD_FUNC listener_thread(apr_
     struct process_score *ps = ap_get_scoreboard_process(process_slot);
     apr_pool_t *tpool = apr_thread_pool_get(thd);
     int closed = 0, listeners_disabled = 0;
-    int have_idle_worker = 0;
     apr_time_t last_log;
 
     last_log = apr_time_now();
@@ -1845,22 +1875,25 @@ static void * APR_THREAD_FUNC listener_thread(apr_
                 /* one of the sockets is readable */
                 event_conn_state_t *cs = (event_conn_state_t *) pt->baton;
                 struct timeout_queue *remove_from_q = cs->sc->wc_q;
-                int blocking = 1;
 
                 switch (cs->pub.state) {
+                case CONN_STATE_LINGER:
+                    remove_from_q = shutdown_linger_q;
+                    goto pass_to_worker;
+
                 case CONN_STATE_CHECK_REQUEST_LINE_READABLE:
                     cs->pub.state = CONN_STATE_READ_REQUEST_LINE;
                     remove_from_q = cs->sc->ka_q;
-                    /* don't wait for a worker for a keepalive request */
-                    blocking = 0;
-                    /* FALL THROUGH */
+                    goto pass_to_worker;
+
                 case CONN_STATE_WRITE_COMPLETION:
-                    get_worker(&have_idle_worker, blocking,
-                               &workers_were_busy);
+pass_to_worker:
                     apr_thread_mutex_lock(timeout_mutex);
                     TO_QUEUE_REMOVE(remove_from_q, cs);
                     rc = apr_pollset_remove(event_pollset, &cs->pfd);
                     apr_thread_mutex_unlock(timeout_mutex);
+                    TO_QUEUE_ELEM_INIT(cs);
+
                     /*
                      * Some of the pollset backends, like KQueue or Epoll
                      * automagically remove the FD if the socket is closed,
@@ -1870,33 +1903,19 @@ static void * APR_THREAD_FUNC listener_thread(apr_
                     if (rc != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rc)) {
                         ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
                                      APLOGNO(03094) "pollset remove failed");
-                        start_lingering_close_nonblocking(cs);
-                        break;
+                        apr_socket_timeout_set(cs->pfd.desc.s, 0);
+                        cs->pub.state = CONN_STATE_LINGER;
+                        cs->shutdown_linger = 1;
                     }
 
-                    TO_QUEUE_ELEM_INIT(cs);
-                    /* If we didn't get a worker immediately for a keep-alive
-                     * request, we close the connection, so that the client can
-                     * re-connect to a different process.
-                     */
-                    if (!have_idle_worker) {
-                        start_lingering_close_nonblocking(cs);
-                        break;
-                    }
-                    rc = push2worker(out_pfd, event_pollset);
-                    if (rc != APR_SUCCESS) {
-                        ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
-                                     ap_server_conf, APLOGNO(03095)
-                                     "push2worker failed");
-                    }
-                    else {
-                        have_idle_worker = 0;
-                    }
+                    push_cs2worker(cs, &workers_were_busy);
                     break;
+
                 case CONN_STATE_LINGER_NORMAL:
                 case CONN_STATE_LINGER_SHORT:
                     process_lingering_close(cs, out_pfd);
                     break;
+
                 default:
                     ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
                                  ap_server_conf, APLOGNO(03096)
@@ -1964,7 +1983,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_
                     }
                     apr_pool_tag(ptrans, "transaction");
 
-                    get_worker(&have_idle_worker, 1, &workers_were_busy);
+                    get_worker(1, &workers_were_busy);
                     rc = lr->accept_func(&csd, lr, ptrans);
 
                     /* later we trash rv and rely on csd to indicate
@@ -2073,6 +2092,9 @@ static void * APR_THREAD_FUNC listener_thread(apr_
             /* Step 4: (short) lingering close completion timeouts */
             process_timeout_queue(short_linger_q, timeout_time,
                                   stop_lingering_close);
+            /* Step 5: (shutdown) lingering close completion timeouts */
+            process_timeout_queue(shutdown_linger_q, timeout_time,
+                                  shutdown_lingering_close);
 
             apr_thread_mutex_unlock(timeout_mutex);
 
@@ -3520,10 +3542,15 @@ static int event_post_config(apr_pool_t *pconf, ap
     wc.hash = apr_hash_make(ptemp);
     ka.hash = apr_hash_make(ptemp);
 
-    linger_q = TO_QUEUE_MAKE(pconf, apr_time_from_sec(MAX_SECS_TO_LINGER),
+    linger_q = TO_QUEUE_MAKE(pconf,
+                             apr_time_from_sec(MAX_SECS_TO_LINGER),
                              NULL);
-    short_linger_q = TO_QUEUE_MAKE(pconf, apr_time_from_sec(SECONDS_TO_LINGER),
+    short_linger_q = TO_QUEUE_MAKE(pconf,
+                                   apr_time_from_sec(SECONDS_TO_LINGER),
                                    NULL);
+    shutdown_linger_q = TO_QUEUE_MAKE(pconf,
+                                      apr_time_from_sec(SECONDS_TO_LINGER),
+                                      NULL);
 
     for (; s; s = s->next) {
         event_srv_cfg *sc = apr_pcalloc(pconf, sizeof *sc);

Reply via email to