Hi, On Sat, Jan 12, 2019 at 01:11:29PM +0100, Willy Tarreau wrote: > Hi Pieter, > > On Sat, Jan 12, 2019 at 01:00:33AM +0100, PiBa-NL wrote: > > Thanks for this 'change in behavior' ;). Indeed the mailbomb is fixed, and > > it seems the expected mails get generated and delivered, but a segfault also > > happens on occasion. Not with the regtest as it was, but with a few minor > > modifications (adding a unreachable mailserver, and giving it a little more > > time seems to be the most reliable reproduction a.t.m.) it will crash > > consistently after 11 seconds.. So i guess the patch needs a bit more > > tweaking. > > > > Regards, > > PiBa-NL (Pieter) > > > > Core was generated by `haproxy -d -f /tmp/vtc.37274.4b8a1a3a/h1/cfg'. > > Program terminated with signal SIGSEGV, Segmentation fault. > > #0 0x0000000000500955 in chk_report_conn_err (check=0x802616a10, > > errno_bck=0, expired=1) at src/checks.c:689 > > 689 dns_trigger_resolution(check->server->dns_requester); > > (gdb) bt full > > #0 0x0000000000500955 in chk_report_conn_err (check=0x802616a10, > > errno_bck=0, expired=1) at src/checks.c:689 > > Indeed, this function should not have any special effect in this case, > it is needed to prepend this at the beginning of chk_report_conn_err() : > > if (!check->server) > return; > > We need to make sure that check->server is properly tested everywhere. > With a bit of luck this one was the only remnant. >
I'd rather just avoid calling dns_trigger_resolution() if there's no server, it seems it is the only use of check->server in chk_report_conn_err(), so that set_server_check_status() is call, and the check's status and result may be updated. Not sure it is really needed, but I'd rather not offend the Check Gods. The attached patches are updated to od just that. Regards, Olivier
>From 7e7b41cac480029c5fd93338dd86a875fee0b5a7 Mon Sep 17 00:00:00 2001 From: Olivier Houchard <ohouch...@haproxy.com> Date: Fri, 11 Jan 2019 18:17:17 +0100 Subject: [PATCH 1/2] MINOR: checks: Store the proxy in checks. Instead of assuming we have a server, store the proxy directly in struct check, and use it instead of s->server. This should be a no-op for now, but will be useful later when we change mail checks to avoid having a server. This should be backported to 1.9. --- include/types/checks.h | 1 + src/checks.c | 32 +++++++++++++++++--------------- src/server.c | 1 + 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/include/types/checks.h b/include/types/checks.h index 6346fe33..f89abcba 100644 --- a/include/types/checks.h +++ b/include/types/checks.h @@ -180,6 +180,7 @@ struct check { int send_string_len; /* length of agent command string */ char *send_string; /* optionally send a string when connecting to the agent */ struct server *server; /* back-pointer to server */ + struct proxy *proxy; /* proxy to be used */ char **argv; /* the arguments to use if running a process-based check */ char **envp; /* the environment to use if running a process-based check */ struct pid_list *curpid; /* entry in pid_list used for current process-based test, or -1 if not in test */ diff --git a/src/checks.c b/src/checks.c index 4baaf9fc..edb61b4f 100644 --- a/src/checks.c +++ b/src/checks.c @@ -2124,7 +2124,7 @@ static struct task *process_chk_proc(struct task *t, void *context, unsigned sho static struct task *process_chk_conn(struct task *t, void *context, unsigned short state) { struct check *check = context; - struct server *s = check->server; + struct proxy *proxy = check->proxy; struct conn_stream *cs = check->cs; struct connection *conn = cs_conn(cs); int rv; @@ -2142,7 +2142,7 @@ static struct task *process_chk_conn(struct task *t, void *context, unsigned sho * is disabled. */ if (((check->state & (CHK_ST_ENABLED | CHK_ST_PAUSED)) != CHK_ST_ENABLED) || - s->proxy->state == PR_STSTOPPED) + proxy->state == PR_STSTOPPED) goto reschedule; /* we'll initiate a new check */ @@ -2167,8 +2167,8 @@ static struct task *process_chk_conn(struct task *t, void *context, unsigned sho */ t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter)); - if (s->proxy->timeout.check && s->proxy->timeout.connect) { - int t_con = tick_add(now_ms, s->proxy->timeout.connect); + if (proxy->timeout.check && proxy->timeout.connect) { + int t_con = tick_add(now_ms, proxy->timeout.connect); t->expire = tick_first(t->expire, t_con); } @@ -2213,10 +2213,10 @@ static struct task *process_chk_conn(struct task *t, void *context, unsigned sho while (tick_is_expired(t->expire, now_ms)) { int t_con; - t_con = tick_add(t->expire, s->proxy->timeout.connect); + t_con = tick_add(t->expire, proxy->timeout.connect); t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter)); - if (s->proxy->timeout.check) + if (proxy->timeout.check) t->expire = tick_first(t->expire, t_con); } } @@ -2609,6 +2609,7 @@ static int tcpcheck_main(struct check *check) struct conn_stream *cs = check->cs; struct connection *conn = cs_conn(cs); struct server *s = check->server; + struct proxy *proxy = check->proxy; struct task *t = check->task; struct list *head = check->tcpcheck_rules; int retcode = 0; @@ -2647,10 +2648,10 @@ static int tcpcheck_main(struct check *check) while (tick_is_expired(t->expire, now_ms)) { int t_con; - t_con = tick_add(t->expire, s->proxy->timeout.connect); + t_con = tick_add(t->expire, proxy->timeout.connect); t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter)); - if (s->proxy->timeout.check) + if (proxy->timeout.check) t->expire = tick_first(t->expire, t_con); } goto out; @@ -2669,8 +2670,8 @@ static int tcpcheck_main(struct check *check) b_reset(&check->bi); check->current_step = next; t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter)); - if (s->proxy->timeout.check) - t->expire = tick_add_ifset(now_ms, s->proxy->timeout.check); + if (proxy->timeout.check) + t->expire = tick_add_ifset(now_ms, proxy->timeout.check); } while (1) { @@ -2790,7 +2791,7 @@ static int tcpcheck_main(struct check *check) } conn_prepare(conn, proto, xprt); - conn_install_mux(conn, &mux_pt_ops, cs, s->proxy, NULL); + conn_install_mux(conn, &mux_pt_ops, cs, proxy, NULL); cs_attach(cs, check, &check_conn_cb); ret = SF_ERR_INTERNAL; @@ -2822,8 +2823,8 @@ static int tcpcheck_main(struct check *check) */ t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter)); - if (s->proxy->timeout.check && s->proxy->timeout.connect) { - int t_con = tick_add(now_ms, s->proxy->timeout.connect); + if (proxy->timeout.check && proxy->timeout.connect) { + int t_con = tick_add(now_ms, proxy->timeout.connect); t->expire = tick_first(t->expire, t_con); } break; @@ -3062,10 +3063,10 @@ static int tcpcheck_main(struct check *check) while (tick_is_expired(t->expire, now_ms)) { int t_con; - t_con = tick_add(t->expire, s->proxy->timeout.connect); + t_con = tick_add(t->expire, proxy->timeout.connect); t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter)); - if (s->proxy->timeout.check) + if (proxy->timeout.check) t->expire = tick_first(t->expire, t_con); } goto out; @@ -3219,6 +3220,7 @@ int init_email_alert(struct mailers *mls, struct proxy *p, char **err) HA_SPIN_INIT(&q->lock); check->inter = mls->timeout.mail; check->rise = DEF_AGENT_RISETIME; + check->proxy = p; check->fall = DEF_AGENT_FALLTIME; if ((err_str = init_check(check, PR_O2_TCPCHK_CHK))) { memprintf(err, "%s", err_str); diff --git a/src/server.c b/src/server.c index bc9e8052..d4c48fb6 100644 --- a/src/server.c +++ b/src/server.c @@ -1721,6 +1721,7 @@ struct server *new_server(struct proxy *proxy) srv->check.status = HCHK_STATUS_INI; srv->check.server = srv; + srv->check.proxy = proxy; srv->check.tcpcheck_rules = &proxy->tcpcheck_rules; srv->agent.status = HCHK_STATUS_INI; -- 2.14.4
>From ff6ac4b4ddc06394b780dd2dd5e77771cee5bd5b Mon Sep 17 00:00:00 2001 From: Olivier Houchard <ohouch...@haproxy.com> Date: Fri, 11 Jan 2019 18:43:04 +0100 Subject: [PATCH 2/2] BUG/MEDIUM: checks: Avoid having an associated server for email checks. When using a check to send email, avoid having an associated server, so that we don't modify the server state if we fail to send an email. Also revert back to initialize the check status to HCHK_STATUS_INI, now that set_server_check_status() stops early if there's no server, we shouldn't get in a mail loop anymore. This should be backported to 1.9. --- src/checks.c | 82 ++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 55 insertions(+), 27 deletions(-) diff --git a/src/checks.c b/src/checks.c index edb61b4f..8593510e 100644 --- a/src/checks.c +++ b/src/checks.c @@ -236,6 +236,11 @@ static void set_server_check_status(struct check *check, short status, const cha if (check->result == CHK_RES_NEUTRAL) return; + /* If the check was really just sending a mail, it won't have an + * associated server, so we're done now. + */ + if (!s) + return; report = 0; switch (check->result) { @@ -681,7 +686,8 @@ static void chk_report_conn_err(struct check *check, int errno_bck, int expired) * might be due to a server IP change. * Let's trigger a DNS resolution if none are currently running. */ - dns_trigger_resolution(check->server->dns_requester); + if (check->server) + dns_trigger_resolution(check->server->dns_requester); } else if ((conn->flags & (CO_FL_CONNECTED|CO_FL_WAIT_L6_CONN)) == CO_FL_WAIT_L6_CONN) { @@ -711,13 +717,22 @@ static struct task *event_srv_chk_io(struct task *t, void *ctx, unsigned short s { struct check *check = ctx; struct conn_stream *cs = check->cs; + struct email_alertq *q; if (!(check->wait_list.events & SUB_RETRY_SEND)) wake_srv_chk(cs); if (!(check->wait_list.events & SUB_RETRY_RECV)) { - HA_SPIN_LOCK(SERVER_LOCK, &check->server->lock); + if (check->server) + HA_SPIN_LOCK(SERVER_LOCK, &check->server->lock); + else { + q = container_of(check, typeof(*q), check); + HA_SPIN_LOCK(EMAIL_ALERTS_LOCK, &q->lock); + } __event_srv_chk_r(cs); - HA_SPIN_UNLOCK(SERVER_LOCK, &check->server->lock); + if (check->server) + HA_SPIN_UNLOCK(SERVER_LOCK, &check->server->lock); + else + HA_SPIN_UNLOCK(EMAIL_ALERTS_LOCK, &q->lock); } return NULL; } @@ -1394,9 +1409,15 @@ static int wake_srv_chk(struct conn_stream *cs) { struct connection *conn = cs->conn; struct check *check = cs->data; + struct email_alertq *q; int ret = 0; - HA_SPIN_LOCK(SERVER_LOCK, &check->server->lock); + if (check->server) + HA_SPIN_LOCK(SERVER_LOCK, &check->server->lock); + else { + q = container_of(check, typeof(*q), check); + HA_SPIN_LOCK(EMAIL_ALERTS_LOCK, &q->lock); + } /* we may have to make progress on the TCP checks */ if (check->type == PR_O2_TCPCHK_CHK) { @@ -1433,7 +1454,10 @@ static int wake_srv_chk(struct conn_stream *cs) ret = -1; } - HA_SPIN_UNLOCK(SERVER_LOCK, &check->server->lock); + if (check->server) + HA_SPIN_UNLOCK(SERVER_LOCK, &check->server->lock); + else + HA_SPIN_UNLOCK(EMAIL_ALERTS_LOCK, &q->lock); /* if a connection got replaced, we must absolutely prevent the connection * handler from touching its fd, and perform the FD polling updates ourselves @@ -2131,7 +2155,8 @@ static struct task *process_chk_conn(struct task *t, void *context, unsigned sho int ret; int expired = tick_is_expired(t->expire, now_ms); - HA_SPIN_LOCK(SERVER_LOCK, &check->server->lock); + if (check->server) + HA_SPIN_LOCK(SERVER_LOCK, &check->server->lock); if (!(check->state & CHK_ST_INPROGRESS)) { /* no check currently running */ if (!expired) /* woke up too early */ @@ -2257,34 +2282,39 @@ static struct task *process_chk_conn(struct task *t, void *context, unsigned sho conn = NULL; } - if (check->result == CHK_RES_FAILED) { - /* a failure or timeout detected */ - check_notify_failure(check); - } - else if (check->result == CHK_RES_CONDPASS) { - /* check is OK but asks for stopping mode */ - check_notify_stopping(check); - } - else if (check->result == CHK_RES_PASSED) { - /* a success was detected */ - check_notify_success(check); + if (check->server) { + if (check->result == CHK_RES_FAILED) { + /* a failure or timeout detected */ + check_notify_failure(check); + } + else if (check->result == CHK_RES_CONDPASS) { + /* check is OK but asks for stopping mode */ + check_notify_stopping(check); + } + else if (check->result == CHK_RES_PASSED) { + /* a success was detected */ + check_notify_success(check); + } } task_set_affinity(t, MAX_THREADS_MASK); check->state &= ~CHK_ST_INPROGRESS; - rv = 0; - if (global.spread_checks > 0) { - rv = srv_getinter(check) * global.spread_checks / 100; - rv -= (int) (2 * rv * (rand() / (RAND_MAX + 1.0))); + if (check->server) { + rv = 0; + if (global.spread_checks > 0) { + rv = srv_getinter(check) * global.spread_checks / 100; + rv -= (int) (2 * rv * (rand() / (RAND_MAX + 1.0))); + } + t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(check) + rv)); } - t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(check) + rv)); } reschedule: while (tick_is_expired(t->expire, now_ms)) t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter)); out_unlock: - HA_SPIN_UNLOCK(SERVER_LOCK, &check->server->lock); + if (check->server) + HA_SPIN_UNLOCK(SERVER_LOCK, &check->server->lock); return t; } @@ -2760,7 +2790,7 @@ static int tcpcheck_main(struct check *check) conn = cs->conn; /* Maybe there were an older connection we were waiting on */ check->wait_list.events = 0; - conn->target = &s->obj_type; + conn->target = s ? &s->obj_type : &proxy->obj_type; /* no client address */ clear_addr(&conn->addr.from); @@ -3173,9 +3203,8 @@ static struct task *process_email_alert(struct task *t, void *context, unsigned alert = LIST_NEXT(&q->email_alerts, typeof(alert), list); LIST_DEL(&alert->list); t->expire = now_ms; - check->server = alert->srv; check->tcpcheck_rules = &alert->tcpcheck_rules; - check->status = HCHK_STATUS_UNKNOWN; // the UNKNOWN status is used to exit set_server_check_status(.) early + check->status = HCHK_STATUS_INI; check->state |= CHK_ST_ENABLED; } @@ -3230,7 +3259,6 @@ int init_email_alert(struct mailers *mls, struct proxy *p, char **err) check->xprt = mailer->xprt; check->addr = mailer->addr; check->port = get_host_port(&mailer->addr); - //check->server = s; if ((t = task_new(MAX_THREADS_MASK)) == NULL) { memprintf(err, "out of memory while allocating mailer alerts task"); -- 2.14.4