Having hundreds of HTTP SSL health checks leads to CPU saturation.
This patch allows HTTP health checks without any http-expect directives
to keep the connection open for subsequent health checks. This patch
also does not affect any TCP check code.
---

Notes:
    We have a situation where we need to do basic SSL+HTTP health check on a 
large amount of nodes.
    Without persistent connections, the amount of CPU usage is much too high.
    
    This patch is my attempt at resolving this (tested for ourselves). Since 
this
    is my first time mucking around in the HAProxy code, I'm sure there's some 
discussion necessary.
    
    I also wasn't sure if this should be a configuration option or not, or 
could be expanded to include
    HTTP checks with http-expect directives. Since there's an explicit 
"Connection: close" added, I excluded them
    to be on the safe side.
    
    Thanks!
    Steven Davidovitz

 include/types/checks.h |  1 +
 src/checks.c           | 96 +++++++++++++++++++++++++++++---------------------
 2 files changed, 57 insertions(+), 40 deletions(-)

diff --git a/include/types/checks.h b/include/types/checks.h
index 283ff3db..0e86a741 100644
--- a/include/types/checks.h
+++ b/include/types/checks.h
@@ -166,6 +166,7 @@ struct check {
        short status, code;                     /* check result, check code */
        char desc[HCHK_DESC_LEN];               /* health check description */
        int use_ssl;                            /* use SSL for health checks */
+       int use_ssl_persistent;                 /* use persistent connections 
for SSL health checks */
        int send_proxy;                         /* send a PROXY protocol header 
with checks */
        struct list *tcpcheck_rules;            /* tcp-check send / expect 
rules */
        struct tcpcheck_rule *current_step;     /* current step when using 
tcpcheck */
diff --git a/src/checks.c b/src/checks.c
index 49bd886b..c03b7abd 100644
--- a/src/checks.c
+++ b/src/checks.c
@@ -778,6 +778,7 @@ static void event_srv_chk_w(struct connection *conn)
                t->expire = tick_add_ifset(now_ms, s->proxy->timeout.check);
                task_queue(t);
        }
+       __conn_sock_want_recv(conn);
        goto out_nowake;
 
  out_wakeup:
@@ -1349,14 +1350,16 @@ static void event_srv_chk_r(struct connection *conn)
        *check->bi->data = '\0';
        check->bi->i = 0;
 
-       /* Close the connection... We absolutely want to perform a hard close
-        * and reset the connection if some data are pending, otherwise we end
-        * up with many TIME_WAITs and eat all the source port range quickly.
-        * To avoid sending RSTs all the time, we first try to drain pending
-        * data.
-        */
-       __conn_data_stop_both(conn);
-       conn_data_shutw_hard(conn);
+       if (conn->flags & CO_FL_ERROR || !check->use_ssl_persistent) {
+               /* Close the connection... We absolutely want to perform a hard 
close
+                * and reset the connection if some data are pending, otherwise 
we end
+                * up with many TIME_WAITs and eat all the source port range 
quickly.
+                * To avoid sending RSTs all the time, we first try to drain 
pending
+                * data.
+                */
+               __conn_data_stop_both(conn);
+               conn_data_shutw_hard(conn);
+       }
 
        /* OK, let's not stay here forever */
        if (check->result == CHK_RES_FAILED)
@@ -1398,13 +1401,14 @@ static int wake_srv_chk(struct connection *conn)
                task_wakeup(check->task, TASK_WOKEN_IO);
        }
 
-       if (check->result != CHK_RES_UNKNOWN) {
+       if (check->result != CHK_RES_UNKNOWN && ((conn->flags & CO_FL_ERROR) || 
!check->use_ssl_persistent)) {
                /* We're here because nobody wants to handle the error, so we
                 * sure want to abort the hard way.
                 */
                conn_sock_drain(conn);
                conn_force_close(conn);
        }
+
        return 0;
 }
 
@@ -1465,7 +1469,6 @@ static int connect_conn_chk(struct task *t)
        struct check *check = t->context;
        struct server *s = check->server;
        struct connection *conn = check->conn;
-       struct protocol *proto;
        int ret;
        int quickack;
 
@@ -1505,33 +1508,34 @@ static int connect_conn_chk(struct task *t)
                bo_putblk(check->bo, check->send_string, 
check->send_string_len);
        }
 
-       /* prepare a new connection */
-       conn_init(conn);
+       if (!conn_xprt_ready(conn)) {
+               /* prepare a new connection */
+               conn_init(conn);
 
-       if (is_addr(&check->addr)) {
-               /* we'll connect to the check addr specified on the server */
-               conn->addr.to = check->addr;
-       }
-       else {
-               /* we'll connect to the addr on the server */
-               conn->addr.to = s->addr;
-       }
+               if (is_addr(&check->addr)) {
+                       /* we'll connect to the check addr specified on the 
server */
+                       conn->addr.to = check->addr;
+               }
+               else {
+                       /* we'll connect to the addr on the server */
+                       conn->addr.to = s->addr;
+               }
 
-       if ((conn->addr.to.ss_family == AF_INET) || (conn->addr.to.ss_family == 
AF_INET6)) {
-               int i = 0;
+               if ((conn->addr.to.ss_family == AF_INET) || 
(conn->addr.to.ss_family == AF_INET6)) {
+                       int i = 0;
 
-               i = srv_check_healthcheck_port(check);
-               if (i == 0) {
-                       conn->owner = check;
-                       return SF_ERR_CHK_PORT;
+                       i = srv_check_healthcheck_port(check);
+                       if (i == 0) {
+                               conn->owner = check;
+                               return SF_ERR_CHK_PORT;
+                       }
+
+                       set_host_port(&conn->addr.to, i);
                }
 
-               set_host_port(&conn->addr.to, i);
+               conn_prepare(conn, protocol_by_family(conn->addr.to.ss_family), 
check->xprt);
        }
 
-       proto = protocol_by_family(conn->addr.to.ss_family);
-
-       conn_prepare(conn, proto, check->xprt);
        conn_attach(conn, check, &check_conn_cb);
        conn->target = &s->obj_type;
 
@@ -1555,15 +1559,21 @@ static int connect_conn_chk(struct task *t)
                        quickack = 0;
        }
 
-       ret = SF_ERR_INTERNAL;
-       if (proto->connect)
-               ret = proto->connect(conn, check->type, quickack ? 2 : 0);
-       conn->flags |= CO_FL_WAKE_DATA;
-       if (s->check.send_proxy) {
-               conn->send_proxy_ofs = 1;
-               conn->flags |= CO_FL_SEND_PROXY;
+       ret = SF_ERR_NONE;
+
+       if (!conn_ctrl_ready(conn) || !conn_xprt_ready(conn)) {
+               ret = conn->ctrl->connect(conn, check->type, quickack ? 2 : 0);
+
+               /* we need to be notified about connection establishment */
+               conn->flags |= CO_FL_WAKE_DATA;
+
+               if (s->check.send_proxy) {
+                       conn->send_proxy_ofs = 1;
+                       conn->flags |= CO_FL_SEND_PROXY;
+               }
        }
 
+
        return ret;
 }
 
@@ -2100,8 +2110,13 @@ static struct task *process_chk_conn(struct task *t)
                                t->expire = tick_first(t->expire, t_con);
                        }
 
-                       if (check->type)
-                               conn_data_want_recv(conn);   /* prepare for 
reading a possible reply */
+                       if (check->type) {
+                               if (conn->flags & CO_FL_CONNECTED)
+                                       conn_data_want_send(conn);
+                               else
+                                       conn_data_want_recv(conn);   /* prepare 
for reading a possible reply */
+                       }
+
 
                        goto reschedule;
 
@@ -2161,7 +2176,7 @@ static struct task *process_chk_conn(struct task *t)
                }
 
                /* check complete or aborted */
-               if (conn->xprt) {
+               if (conn->xprt && ((conn->flags & CO_FL_ERROR) || 
!check->use_ssl_persistent)) {
                        /* The check was aborted and the connection was not yet 
closed.
                         * This can happen upon timeout, or when an external 
event such
                         * as a failed response coupled with "observe layer7" 
caused the
@@ -3425,6 +3440,7 @@ int srv_check_healthcheck_port(struct check *chk)
         */
        if (!chk->port && !is_addr(&chk->addr)) {
                chk->use_ssl |= (srv->use_ssl || (srv->proxy->options & 
PR_O_TCPCHK_SSL));
+               chk->use_ssl_persistent |= (chk->use_ssl && 
!(srv->proxy->options2 & PR_O2_EXP_TYPE));
                chk->send_proxy |= (srv->pp_opts);
        }
 
-- 
2.11.1


Reply via email to