On Tue, Oct 26, 2010 at 12:26 PM, Jeff Trawick <[email protected]> wrote:
> The biggest problem here is potentially waiting a long time for a
> specific child process before moving on to the next. That's
> potentially 30 seconds * huge-number. Also, I think it is worthwhile
> setting up the code so that it is easy to tweak the sleeps and
> actions, like the MPMs have.
>
> Look for ap_reclaim_child_processes() at
> http://svn.apache.org/viewvc/httpd/httpd/trunk/server/mpm_unix.c?view=markup
a lightly tested fix is attached
Index: modules/fcgid/fcgid_pm_main.c
===================================================================
--- modules/fcgid/fcgid_pm_main.c (revision 1027924)
+++ modules/fcgid/fcgid_pm_main.c (working copy)
@@ -360,49 +360,108 @@
}
}
-static void kill_all_subprocess(server_rec * main_server)
+typedef enum action_t {DO_NOTHING, KILL_GRACEFULLY, KILL_FORCEFULLY,
+ GIVE_UP} action_t;
+
+static int reclaim_one_pid(server_rec *main_server, fcgid_procnode *proc,
+ action_t action)
{
- size_t i;
int exitcode;
apr_exit_why_e exitwhy;
- fcgid_procnode *proc_table = proctable_get_table_array();
- /* Kill gracefully */
- for (i = 0; i < proctable_get_table_size(); i++) {
- if (proc_table[i].proc_pool)
- proc_kill_gracefully(&proc_table[i], main_server);
+ if (apr_proc_wait(&proc->proc_id, &exitcode, &exitwhy,
+ APR_NOWAIT) != APR_CHILD_NOTDONE) {
+ proc->diewhy = FCGID_DIE_SHUTDOWN;
+ proc_print_exit_info(proc, exitcode, exitwhy,
+ main_server);
+ apr_pool_destroy(proc->proc_pool);
+ proc->proc_pool = NULL;
+ return 1;
}
- apr_sleep(apr_time_from_sec(1));
- /* Kill with SIGKILL if it doesn't work */
- for (i = 0; i < proctable_get_table_size(); i++) {
- if (proc_table[i].proc_pool) {
- if (apr_proc_wait(&(proc_table[i].proc_id), &exitcode, &exitwhy,
- APR_NOWAIT) != APR_CHILD_NOTDONE) {
- proc_table[i].diewhy = FCGID_DIE_SHUTDOWN;
- proc_print_exit_info(&proc_table[i], exitcode, exitwhy,
- main_server);
- apr_pool_destroy(proc_table[i].proc_pool);
- proc_table[i].proc_pool = NULL;
- }
- else
- proc_kill_force(&proc_table[i], main_server);
- }
+ switch(action) {
+ case DO_NOTHING:
+ break;
+
+ case KILL_GRACEFULLY:
+ proc_kill_gracefully(proc, main_server);
+ break;
+
+ case KILL_FORCEFULLY:
+ ap_log_error(APLOG_MARK, APLOG_ERR, 0, main_server,
+ "FastCGI process %" APR_PID_T_FMT
+ " still did not exit, "
+ "terminating forcefully",
+ proc->proc_id.pid);
+ proc_kill_force(proc, main_server);
+ break;
+
+ case GIVE_UP:
+ ap_log_error(APLOG_MARK, APLOG_ERR, 0, main_server,
+ "could not make FastCGI process %" APR_PID_T_FMT
+ " exit",
+ proc->proc_id.pid);
+ break;
}
- /* Wait again */
- for (i = 0; i < proctable_get_table_size(); i++) {
- if (proc_table[i].proc_pool) {
- if (apr_proc_wait(&(proc_table[i].proc_id), &exitcode, &exitwhy,
- APR_WAIT) != APR_CHILD_NOTDONE) {
- proc_table[i].diewhy = FCGID_DIE_SHUTDOWN;
- proc_print_exit_info(&proc_table[i], exitcode, exitwhy,
- main_server);
- apr_pool_destroy(proc_table[i].proc_pool);
- proc_table[i].proc_pool = NULL;
+ return 0;
+}
+
+static void kill_all_subprocess(server_rec *main_server)
+{
+ apr_time_t waittime = 1024 * 16;
+ int i;
+ int not_dead_yet;
+ int cur_action, next_action;
+ apr_time_t starttime = apr_time_now();
+ struct {
+ action_t action;
+ apr_time_t action_time;
+ } action_table[] = {
+ {DO_NOTHING, 0}, /* index 0 must be DO_NOTHING */
+ {KILL_GRACEFULLY, apr_time_from_sec(0)},
+ {KILL_GRACEFULLY, apr_time_from_sec(1)},
+ {KILL_FORCEFULLY, apr_time_from_sec(8)},
+ {GIVE_UP, apr_time_from_sec(10)}
+ };
+ fcgid_procnode *proc_table = proctable_get_table_array();
+
+ next_action = 1;
+ do {
+ apr_sleep(waittime);
+ /* don't let waittime get longer than 1 second; otherwise, we don't
+ * react quickly to the last child exiting, and taking action can
+ * be delayed
+ */
+ waittime = waittime * 4;
+ if (waittime > apr_time_from_sec(1)) {
+ waittime = apr_time_from_sec(1);
+ }
+
+ /* see what action to take, if any */
+ if (action_table[next_action].action_time <= apr_time_now() -
starttime) {
+ cur_action = next_action;
+ ++next_action;
+ }
+ else {
+ cur_action = 0; /* index of DO_NOTHING entry */
+ }
+
+ /* now see who is done */
+ not_dead_yet = 0;
+ for (i = 0; i < proctable_get_table_size(); i++) {
+ if (!proc_table[i].proc_pool) {
+ continue; /* unused */
}
+
+ if (!reclaim_one_pid(main_server, &proc_table[i],
+ action_table[cur_action].action)) {
+ ++not_dead_yet;
+ }
}
- }
+
+ } while (not_dead_yet &&
+ action_table[cur_action].action != GIVE_UP);
}
/* This should be proposed as a stand-alone improvement to the httpd module,