--- src/haproxy.c | 117 ++++++++++++++++++++++++++++++++++++++++++++------------ 1 files changed, 92 insertions(+), 25 deletions(-)
diff --git a/src/haproxy.c b/src/haproxy.c index 0187f47..94adcf9 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -138,6 +138,7 @@ struct global global_default = { int stopping; /* non zero means stopping in progress */ int restarting; /* non zero means restart in progress */ +int replacing_workers = 0; /* non zero means replacing workers in progress */ int is_master = 0; /* non zero means that master/worker mode * has been activated and the current process * is the master */ @@ -322,6 +323,16 @@ void sig_listen(struct sig_handler *sh) /* * upon SIGCHLD reap child */ + +static struct task *replace_workers_task = NULL; + +struct task *task_replace_workers(struct task *t) +{ + replacing_workers = 1; + t->expire = TICK_ETERNITY; + return t; +} + void sig_reaper(struct sig_handler *sh) { int status, p; @@ -331,13 +342,24 @@ void sig_reaper(struct sig_handler *sh) pid = waitpid(-1, &status, WNOHANG); if (pid <= 0) break; + status = 0; for (p = 0; p < nb_allocated_oldpids; p++) if (oldpids[p] == pid) { oldpids[p] = 0; nb_oldpids--; + status++; break; } } + + /* Delay replacing workers for 3s to mitigate the possibility + * of a restart storm. */ + if (status) { + tv_update_date(0,1); /* else, the old time before select + * will be used */ + replace_workers_task->expire = tick_add_ifset(now_ms, 3000); + task_queue(replace_workers_task); + } } /* @@ -765,6 +787,13 @@ void deinit(void) int i; deinit_signals(); + + if (replace_workers_task) { + task_delete(replace_workers_task); + task_free(replace_workers_task); + replace_workers_task = NULL; + } + while (p) { free(p->id); free(p->check_req); @@ -989,6 +1018,11 @@ static int tell_old_pids(int sig) return ret; } +int worker_missing() +{ + return nb_allocated_oldpids != nb_oldpids; +} + /* * Runs the polling loop * @@ -1028,9 +1062,11 @@ void run_poll_loop() break; } - if (is_master) { - sleep(1); - continue; + if (replacing_workers) { + send_log(NULL, LOG_INFO, + "Replacing %d workers.\n", + nb_allocated_oldpids - nb_oldpids); + break; } /* The poller will ensure it returns around <next> */ @@ -1145,7 +1181,6 @@ static FILE *prepare(int argc, char **argv) signal_register_fct(SIGUSR2, sig_restart, SIGUSR2); signal_register_fct(SIGHUP, sig_dump_state, SIGHUP); signal_register_fct(SIGTERM, sig_term, SIGTERM); - signal_register_fct(SIGCHLD, sig_reaper, SIGCHLD); /* Always catch SIGPIPE even on platforms which define MSG_NOSIGNAL. * Some recent FreeBSD setups report broken pipes, and MSG_NOSIGNAL @@ -1372,20 +1407,26 @@ static void create_processes(int argc, char **argv, FILE *pidfile) send_log(NULL, LOG_INFO, "Master started\n"); } - /* Store PIDs of worker processes in oldpid so - * they can be signaled later */ - nb_oldpids = global.nbproc; - free(oldpids); - oldpids = malloc(nb_oldpids * sizeof(*oldpids)); - if (!oldpids) { - send_log(NULL, LOG_ERR, "Cannot allocate memory " - "for oldpids.\n"); - protocol_unbind_all(); - exit(1); /* there has been an error */ + if (!replacing_workers) { + /* Store PIDs of worker processes in oldpid so + * they can be signaled later */ + nb_oldpids = 0; + nb_allocated_oldpids = global.nbproc; + free(oldpids); + oldpids = calloc(nb_allocated_oldpids, sizeof(*oldpids)); + if (!oldpids) { + send_log(NULL, LOG_ERR, "Cannot allocate memory " + "for oldpids.\n"); + protocol_unbind_all(); + exit(1); /* there has been an error */ + } } /* the father launches the required number of processes */ for (proc = 0; proc < global.nbproc; proc++) { + /* In the case of replacing_workers a worker may still exist */ + if (oldpids[proc]) + continue; ret = fork(); if (ret < 0) { send_log(NULL, LOG_ERR, "Cannot fork.\n"); @@ -1404,6 +1445,7 @@ static void create_processes(int argc, char **argv, FILE *pidfile) break; } oldpids[proc] = ret; + nb_oldpids++; relative_pid++; /* each child will get a different one */ } @@ -1411,8 +1453,22 @@ static void create_processes(int argc, char **argv, FILE *pidfile) if (!(global.mode & MODE_MASTER_WORKER)) /* The parent process is no longer needed */ exit(0); - else - is_master = 1; + + is_master = 1; + if (!replace_workers_task) { + replace_workers_task = task_new(); + if (unlikely(replace_workers_task == NULL)) { + send_log(NULL, LOG_ERR, + "Cannot create reaper task.\n"); + protocol_unbind_all(); + exit(1); /* there has been an error */ + } + + replace_workers_task->process = task_replace_workers; + replace_workers_task->expire = TICK_ETERNITY; + + signal_register_fct(SIGCHLD, sig_reaper, SIGCHLD); + } } /* we might have to unbind some proxies from some processes */ @@ -1453,14 +1509,23 @@ static void create_processes(int argc, char **argv, FILE *pidfile) int main(int argc, char **argv) { FILE *pidfile = NULL; + int mode = 0; while (1) { - FILE *newpidfile = prepare(argc, argv); - if (!is_master) - pidfile = newpidfile; - + if (!replacing_workers) { + FILE *newpidfile = prepare(argc, argv); + if (!is_master) + pidfile = newpidfile; + mode = global.mode & (MODE_QUIET|MODE_VERBOSE); + } else + /* Restore value of mode before it was + * mangled by create_processes() */ + global.mode = (global.mode & + ~(MODE_QUIET|MODE_VERBOSE)) | mode; create_processes(argc, argv, pidfile); + replacing_workers = 0; + if (!is_master) drop_capabilities(); @@ -1471,12 +1536,14 @@ int main(int argc, char **argv) */ run_poll_loop(); - /* Free all Hash Keys and all Hash elements */ - appsession_cleanup(); - /* Do some cleanup */ - deinit(); + if (!replacing_workers) { + /* Free all Hash Keys and all Hash elements */ + appsession_cleanup(); + /* Do some cleanup */ + deinit(); + } - if (!restarting) + if (!restarting && !replacing_workers) break; } -- 1.7.2.3